1
|
package eu.dnetlib.data.mdstore.plugins;
|
2
|
|
3
|
import java.io.StringReader;
|
4
|
import java.util.Map;
|
5
|
import java.util.Set;
|
6
|
import java.util.regex.Matcher;
|
7
|
import java.util.regex.Pattern;
|
8
|
import java.util.stream.Collectors;
|
9
|
|
10
|
import org.apache.commons.lang3.math.NumberUtils;
|
11
|
import org.apache.commons.logging.Log;
|
12
|
import org.apache.commons.logging.LogFactory;
|
13
|
import org.dom4j.Document;
|
14
|
import org.dom4j.DocumentException;
|
15
|
import org.dom4j.Element;
|
16
|
import org.dom4j.Node;
|
17
|
import org.dom4j.QName;
|
18
|
import org.dom4j.io.SAXReader;
|
19
|
import org.springframework.beans.factory.annotation.Autowired;
|
20
|
|
21
|
import com.mongodb.BasicDBObject;
|
22
|
import com.mongodb.DBObject;
|
23
|
import com.mongodb.client.MongoCollection;
|
24
|
|
25
|
import eu.dnetlib.data.db.AffiliationsDao;
|
26
|
import eu.dnetlib.data.db.Group;
|
27
|
import eu.dnetlib.data.db.Person;
|
28
|
import eu.dnetlib.data.mdstore.modular.mongodb.MongoMDStore;
|
29
|
import eu.dnetlib.rmi.data.MDStoreServiceException;
|
30
|
|
31
|
public class EnrichLabsPluginNEW extends AbstractIstiMDStorePlugin {
|
32
|
|
33
|
private static final Log log = LogFactory.getLog(EnrichLabsPluginNEW.class);
|
34
|
|
35
|
@Autowired
|
36
|
private AffiliationsDao affilationDao;
|
37
|
|
38
|
@SuppressWarnings("unchecked")
|
39
|
@Override
|
40
|
public void process(final MongoMDStore store, final Map<String, String> params) throws MDStoreServiceException {
|
41
|
log.warn("********************************************");
|
42
|
log.warn("* ENRICH LABS");
|
43
|
log.warn("********************************************");
|
44
|
|
45
|
// CODE -> year -> Lab
|
46
|
final Map<String, Person> affiliations = affilationDao.listPersonsWithAffiliations()
|
47
|
.stream()
|
48
|
.collect(Collectors.toMap(Person::getId, a -> a));
|
49
|
|
50
|
final MongoCollection<DBObject> pubsCollection = store.getCollection();
|
51
|
|
52
|
for (final DBObject obj : pubsCollection.find()) {
|
53
|
final String id = obj.get("id").toString();
|
54
|
log.debug("**********************************************");
|
55
|
log.debug("Record " + id);
|
56
|
|
57
|
try {
|
58
|
final Document doc = (new SAXReader()).read(new StringReader(obj.get("body").toString()));
|
59
|
|
60
|
resetAffiliations(doc);
|
61
|
|
62
|
final Integer publicationYear = NumberUtils.toInt(doc.valueOf("//*[local-name()='date' and @dateType='Accepted']"), 0);
|
63
|
|
64
|
for (final Object o : doc.selectNodes("//*[local-name()='person']")) {
|
65
|
final Element node = (Element) o;
|
66
|
final String infoId = node.valueOf("./*[local-name()='infoId']");
|
67
|
final Pattern pattern = Pattern.compile("info:cnr-pdr\\/author\\/(.+)\\/(.+)\\/(.+)");
|
68
|
final Matcher matcher = pattern.matcher(infoId);
|
69
|
if (matcher.find()) {
|
70
|
final String code = matcher.group(1);
|
71
|
|
72
|
final Person p = affiliations.get(code);
|
73
|
|
74
|
if (p != null) {
|
75
|
final Set<Group> groups = p.getAffiliations().get(publicationYear);
|
76
|
if (groups != null) {
|
77
|
node.selectSingleNode("./*[local-name() = 'personName']").setText(p.getName());
|
78
|
for (final Group g : groups) {
|
79
|
final Element affNode = node.addElement(new QName("affiliation", node.getNamespace()));
|
80
|
affNode.setText("ISTI-CNR");
|
81
|
affNode.addAttribute("code", code);
|
82
|
affNode.addAttribute("groupAcronym", g.getId());
|
83
|
affNode.addAttribute("group", g.getName());
|
84
|
affNode.addAttribute("type", g.getType());
|
85
|
}
|
86
|
} else {
|
87
|
log.info("Affilition for year " + publicationYear + " not found, person: " + infoId);
|
88
|
}
|
89
|
} else {
|
90
|
log.info("Person not found: " + infoId);
|
91
|
}
|
92
|
} else {
|
93
|
log.warn("Invalid infoId: " + infoId);
|
94
|
}
|
95
|
}
|
96
|
|
97
|
pubsCollection.updateOne(new BasicDBObject("id", id), new BasicDBObject("$set", new BasicDBObject("body", doc.asXML())));
|
98
|
|
99
|
} catch (final DocumentException e) {
|
100
|
log.warn("Problem parsing a mdstore record");
|
101
|
}
|
102
|
}
|
103
|
}
|
104
|
|
105
|
private void resetAffiliations(final Document doc) {
|
106
|
for (final Object n : doc.selectNodes("//*[local-name() = 'person']/*[local-name() = 'affiliation']")) {
|
107
|
((Node) n).detach();
|
108
|
}
|
109
|
}
|
110
|
|
111
|
}
|