Project

General

Profile

1
package eu.dnetlib.data.mdstore.plugins;
2

    
3
import java.io.StringReader;
4
import java.util.Map;
5
import java.util.Set;
6
import java.util.regex.Matcher;
7
import java.util.regex.Pattern;
8
import java.util.stream.Collectors;
9

    
10
import org.apache.commons.lang3.math.NumberUtils;
11
import org.apache.commons.logging.Log;
12
import org.apache.commons.logging.LogFactory;
13
import org.dom4j.Document;
14
import org.dom4j.DocumentException;
15
import org.dom4j.Element;
16
import org.dom4j.Node;
17
import org.dom4j.QName;
18
import org.dom4j.io.SAXReader;
19
import org.springframework.beans.factory.annotation.Autowired;
20

    
21
import com.mongodb.BasicDBObject;
22
import com.mongodb.DBObject;
23
import com.mongodb.client.MongoCollection;
24

    
25
import eu.dnetlib.data.db.AffiliationsDao;
26
import eu.dnetlib.data.db.Group;
27
import eu.dnetlib.data.db.Person;
28
import eu.dnetlib.data.mdstore.modular.mongodb.MongoMDStore;
29
import eu.dnetlib.rmi.data.MDStoreServiceException;
30

    
31
public class EnrichLabsPluginNEW extends AbstractIstiMDStorePlugin {
32

    
33
	private static final Log log = LogFactory.getLog(EnrichLabsPluginNEW.class);
34

    
35
	@Autowired
36
	private AffiliationsDao affilationDao;
37

    
38
	@SuppressWarnings("unchecked")
39
	@Override
40
	public void process(final MongoMDStore store, final Map<String, String> params) throws MDStoreServiceException {
41
		log.warn("********************************************");
42
		log.warn("* ENRICH LABS");
43
		log.warn("********************************************");
44

    
45
		// CODE -> year -> Lab
46
		final Map<String, Person> affiliations = affilationDao.listPersonsWithAffiliations()
47
				.stream()
48
				.collect(Collectors.toMap(Person::getId, a -> a));
49

    
50
		final MongoCollection<DBObject> pubsCollection = store.getCollection();
51

    
52
		for (final DBObject obj : pubsCollection.find()) {
53
			final String id = obj.get("id").toString();
54
			log.debug("**********************************************");
55
			log.debug("Record " + id);
56

    
57
			try {
58
				final Document doc = (new SAXReader()).read(new StringReader(obj.get("body").toString()));
59

    
60
				resetAffiliations(doc);
61

    
62
				final Integer publicationYear = NumberUtils.toInt(doc.valueOf("//*[local-name()='date' and @dateType='Accepted']"), 0);
63

    
64
				for (final Object o : doc.selectNodes("//*[local-name()='person']")) {
65
					final Element node = (Element) o;
66
					final String infoId = node.valueOf("./*[local-name()='infoId']");
67
					final Pattern pattern = Pattern.compile("info:cnr-pdr\\/author\\/(.+)\\/(.+)\\/(.+)");
68
					final Matcher matcher = pattern.matcher(infoId);
69
					if (matcher.find()) {
70
						final String code = matcher.group(1);
71

    
72
						final Person p = affiliations.get(code);
73

    
74
						if (p != null) {
75
							final Set<Group> groups = p.getAffiliations().get(publicationYear);
76
							if (groups != null) {
77
								node.selectSingleNode("./*[local-name() = 'personName']").setText(p.getName());
78
								for (final Group g : groups) {
79
									final Element affNode = node.addElement(new QName("affiliation", node.getNamespace()));
80
									affNode.setText("ISTI-CNR");
81
									affNode.addAttribute("code", code);
82
									affNode.addAttribute("groupAcronym", g.getId());
83
									affNode.addAttribute("group", g.getName());
84
									affNode.addAttribute("type", g.getType());
85
								}
86
							} else {
87
								log.info("Affilition for year " + publicationYear + " not found, person: " + infoId);
88
							}
89
						} else {
90
							log.info("Person not found: " + infoId);
91
						}
92
					} else {
93
						log.warn("Invalid infoId: " + infoId);
94
					}
95
				}
96

    
97
				pubsCollection.updateOne(new BasicDBObject("id", id), new BasicDBObject("$set", new BasicDBObject("body", doc.asXML())));
98

    
99
			} catch (final DocumentException e) {
100
				log.warn("Problem parsing a mdstore record");
101
			}
102
		}
103
	}
104

    
105
	private void resetAffiliations(final Document doc) {
106
		for (final Object n : doc.selectNodes("//*[local-name() = 'person']/*[local-name() = 'affiliation']")) {
107
			((Node) n).detach();
108
		}
109
	}
110

    
111
}
(4-4/8)