Revision 51020
Added by Michele Artini about 6 years ago
EnrichLabsPlugin.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.data.mdstore.plugins; |
2 | 2 |
|
3 |
import java.io.StringReader;
|
|
3 |
import java.util.HashMap;
|
|
4 | 4 |
import java.util.Map; |
5 | 5 |
import java.util.Set; |
6 | 6 |
import java.util.regex.Matcher; |
7 | 7 |
import java.util.regex.Pattern; |
8 | 8 |
import java.util.stream.Collectors; |
9 | 9 |
|
10 |
import org.apache.commons.lang3.math.NumberUtils; |
|
11 | 10 |
import org.apache.commons.logging.Log; |
12 | 11 |
import org.apache.commons.logging.LogFactory; |
13 |
import org.dom4j.Document; |
|
14 |
import org.dom4j.DocumentException; |
|
15 |
import org.dom4j.Element; |
|
16 |
import org.dom4j.Node; |
|
17 |
import org.dom4j.QName; |
|
18 |
import org.dom4j.io.SAXReader; |
|
19 | 12 |
import org.springframework.beans.factory.annotation.Autowired; |
20 | 13 |
|
21 | 14 |
import com.google.common.collect.Sets; |
22 |
import com.mongodb.BasicDBObject; |
|
23 |
import com.mongodb.DBObject; |
|
24 |
import com.mongodb.client.MongoCollection; |
|
25 | 15 |
|
26 |
import eu.dnetlib.data.db.Affiliation; |
|
27 | 16 |
import eu.dnetlib.data.db.AffiliationsDao; |
28 | 17 |
import eu.dnetlib.data.db.Group; |
29 | 18 |
import eu.dnetlib.data.db.Person; |
30 |
import eu.dnetlib.data.mdstore.modular.mongodb.MongoMDStore; |
|
31 |
import eu.dnetlib.rmi.data.MDStoreServiceException; |
|
32 |
import eu.dnetlib.xml.ISTIUtilityFunction; |
|
19 |
import eu.dnetlib.data.mdstore.plugins.objects.Affiliation; |
|
20 |
import eu.dnetlib.data.mdstore.plugins.objects.CnrPerson; |
|
21 |
import eu.dnetlib.data.mdstore.plugins.objects.MdRecord; |
|
22 |
import eu.dnetlib.data.utils.IstiConstants; |
|
23 |
import eu.dnetlib.data.utils.XsltFunctions; |
|
33 | 24 |
|
34 |
public class EnrichLabsPlugin extends AbstractIstiMDStorePlugin {
|
|
25 |
public class EnrichLabsPlugin extends MdRecordPlugin {
|
|
35 | 26 |
|
36 | 27 |
private static final Log log = LogFactory.getLog(EnrichLabsPlugin.class); |
37 | 28 |
|
... | ... | |
40 | 31 |
@Autowired |
41 | 32 |
private AffiliationsDao dao; |
42 | 33 |
|
34 |
private Map<String, Person> affiliations = new HashMap<>(); |
|
35 |
|
|
43 | 36 |
@Override |
44 |
public void process(final MongoMDStore store, final Map<String, String> params) throws MDStoreServiceException { |
|
45 |
log.warn("********************************************"); |
|
46 |
log.warn("* ENRICH LABS"); |
|
47 |
log.warn("********************************************"); |
|
48 |
|
|
49 |
// CODE -> year -> Lab |
|
50 |
final Map<String, Person> affiliations = dao.listPersonsWithAffiliations() |
|
37 |
protected void reconfigure() { |
|
38 |
affiliations = dao.listPersonsWithAffiliations() |
|
51 | 39 |
.stream() |
52 | 40 |
.collect(Collectors.toMap(Person::getId, a -> a)); |
41 |
} |
|
53 | 42 |
|
54 |
final MongoCollection<DBObject> pubsCollection = store.getCollection(); |
|
43 |
@Override |
|
44 |
protected boolean updateRecord(final MdRecord doc) { |
|
45 |
final int year = doc.getDate(); |
|
55 | 46 |
|
56 |
for (final DBObject obj : pubsCollection.find()) { |
|
57 |
final String id = obj.get("id").toString(); |
|
58 |
log.debug("**********************************************"); |
|
59 |
log.debug("Record " + id); |
|
47 |
for (final CnrPerson cp : doc.getCnrPersons()) { |
|
48 |
cp.getAffiliations().clear(); |
|
60 | 49 |
|
61 |
try { |
|
62 |
final Document doc = (new SAXReader()).read(new StringReader(obj.get("body").toString())); |
|
50 |
if (year >= IstiConstants.PIMPA_START_YEAR) { |
|
51 |
final String infoId = cp.getId(); |
|
52 |
final Pattern pattern = Pattern.compile("info:cnr-pdr\\/author\\/(.+)\\/(.+)\\/(.+)"); |
|
53 |
final Matcher matcher = pattern.matcher(infoId); |
|
54 |
if (matcher.find()) { |
|
55 |
final String code = matcher.group(1); |
|
56 |
final String fullname = XsltFunctions.capitalize(String.format("%s %s", matcher.group(3), matcher.group(2))); |
|
63 | 57 |
|
64 |
final int year = NumberUtils.toInt(doc.valueOf("//*[local-name()='date' and @dateType='Accepted']"), 0);
|
|
58 |
final Person p = affiliations.get(code);
|
|
65 | 59 |
|
66 |
if (year >= Affiliation.START_YEAR) { |
|
67 |
resetAffiliations(doc); |
|
68 |
|
|
69 |
for (final Object o : doc.selectNodes("//*[local-name()='person']")) { |
|
70 |
final Element node = (Element) o; |
|
71 |
final String infoId = node.valueOf("./*[local-name()='infoId']"); |
|
72 |
final Pattern pattern = Pattern.compile("info:cnr-pdr\\/author\\/(.+)\\/(.+)\\/(.+)"); |
|
73 |
final Matcher matcher = pattern.matcher(infoId); |
|
74 |
if (matcher.find()) { |
|
75 |
final String code = matcher.group(1); |
|
76 |
final String fullname = ISTIUtilityFunction.capitalize(String.format("%s %s", matcher.group(3), matcher.group(2))); |
|
77 |
|
|
78 |
final Person p = affiliations.get(code); |
|
79 |
|
|
80 |
if (p != null) { |
|
81 |
final Set<Group> groups = p.getAffiliations().get(year); |
|
82 |
if (groups != null) { |
|
83 |
node.selectSingleNode("./*[local-name() = 'personName']").setText(p.getName()); |
|
84 |
for (final Group g : groups) { |
|
85 |
if (!g.getId().equals(UNKNOWN_GROUP.getId())) { |
|
86 |
final Element affNode = node.addElement(new QName("affiliation", node.getNamespace())); |
|
87 |
affNode.addAttribute("code", code); |
|
88 |
affNode.addAttribute("groupAcronym", g.getId()); |
|
89 |
affNode.addAttribute("group", g.getName()); |
|
90 |
affNode.addAttribute("type", g.getType()); |
|
91 |
} |
|
92 |
} |
|
93 |
} else { |
|
94 |
log.info("Affiliation not found, infoId: " + infoId + ", year: " + year); |
|
95 |
dao.registerAffiliation(code, UNKNOWN_GROUP.getId(), year); |
|
96 |
p.getAffiliations().put(year, Sets.newHashSet(UNKNOWN_GROUP)); |
|
60 |
if (p != null) { |
|
61 |
final Set<Group> groups = p.getAffiliations().get(year); |
|
62 |
if (groups != null) { |
|
63 |
cp.setName(p.getName()); |
|
64 |
for (final Group g : groups) { |
|
65 |
if (!g.getId().equals(UNKNOWN_GROUP.getId())) { |
|
66 |
cp.getAffiliations().add(new Affiliation(code, g.getType(), g.getId(), g.getName())); |
|
97 | 67 |
} |
98 |
} else { |
|
99 |
log.info("Person not found, infoId: " + infoId + ", year: " + year); |
|
100 |
dao.registerPerson(code, fullname); |
|
101 |
dao.registerAffiliation(code, UNKNOWN_GROUP.getId(), year); |
|
102 |
|
|
103 |
final Person np = new Person(code, fullname); |
|
104 |
np.getAffiliations().put(year, Sets.newHashSet(UNKNOWN_GROUP)); |
|
105 |
|
|
106 |
affiliations.put(code, np); |
|
107 | 68 |
} |
108 | 69 |
} else { |
109 |
log.warn("Invalid infoId: " + infoId); |
|
70 |
log.info("Affiliation not found, infoId: " + infoId + ", year: " + year); |
|
71 |
dao.registerAffiliation(code, UNKNOWN_GROUP.getId(), year); |
|
72 |
p.getAffiliations().put(year, Sets.newHashSet(UNKNOWN_GROUP)); |
|
110 | 73 |
} |
74 |
} else { |
|
75 |
log.info("Person not found, infoId: " + infoId + ", year: " + year); |
|
76 |
dao.registerPerson(code, fullname); |
|
77 |
dao.registerAffiliation(code, UNKNOWN_GROUP.getId(), year); |
|
78 |
|
|
79 |
final Person np = new Person(code, fullname); |
|
80 |
np.getAffiliations().put(year, Sets.newHashSet(UNKNOWN_GROUP)); |
|
81 |
|
|
82 |
affiliations.put(code, np); |
|
111 | 83 |
} |
112 |
|
|
113 |
pubsCollection.updateOne(new BasicDBObject("id", id), new BasicDBObject("$set", new BasicDBObject("body", doc.asXML())));
|
|
84 |
} else { |
|
85 |
log.warn("Invalid infoId: " + infoId);
|
|
114 | 86 |
} |
115 |
} catch (final DocumentException e) { |
|
116 |
log.warn("Problem parsing a mdstore record"); |
|
117 | 87 |
} |
118 |
} |
|
119 |
} |
|
120 | 88 |
|
121 |
private void resetAffiliations(final Document doc) { |
|
122 |
for (final Object n : doc.selectNodes("//*[local-name() = 'person']/*[local-name() = 'affiliation']")) { |
|
123 |
((Node) n).detach(); |
|
124 | 89 |
} |
90 |
return true; |
|
125 | 91 |
} |
126 | 92 |
|
127 | 93 |
} |
Also available in: Unified diff
refactoring using jaxb