Revision 51020
Added by Michele Artini about 6 years ago
EnrichProjectsPlugin.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.data.mdstore.plugins; |
2 | 2 |
|
3 |
import java.io.StringReader; |
|
4 | 3 |
import java.net.URI; |
5 | 4 |
import java.net.URISyntaxException; |
6 |
import java.util.ArrayList; |
|
7 | 5 |
import java.util.HashMap; |
8 | 6 |
import java.util.List; |
9 | 7 |
import java.util.Map; |
8 |
import java.util.stream.Collectors; |
|
10 | 9 |
|
11 | 10 |
import org.apache.commons.lang3.StringUtils; |
12 | 11 |
import org.apache.commons.logging.Log; |
13 | 12 |
import org.apache.commons.logging.LogFactory; |
14 |
import org.dom4j.Document; |
|
15 |
import org.dom4j.DocumentException; |
|
16 |
import org.dom4j.Element; |
|
17 |
import org.dom4j.io.SAXReader; |
|
18 | 13 |
import org.springframework.beans.factory.annotation.Value; |
19 | 14 |
|
20 |
import com.mongodb.BasicDBObject; |
|
21 |
import com.mongodb.DBObject; |
|
22 |
import com.mongodb.client.MongoCollection; |
|
23 |
|
|
24 |
import eu.dnetlib.data.mdstore.modular.mongodb.MongoMDStore; |
|
15 |
import eu.dnetlib.data.mdstore.plugins.objects.MdRecord; |
|
25 | 16 |
import eu.dnetlib.data.mdstore.plugins.objects.Project; |
26 |
import eu.dnetlib.rmi.data.MDStoreServiceException; |
|
27 | 17 |
|
28 |
public class EnrichProjectsPlugin extends AbstractIstiMDStorePlugin {
|
|
18 |
public class EnrichProjectsPlugin extends MdRecordPlugin {
|
|
29 | 19 |
|
30 | 20 |
private static final Log log = LogFactory.getLog(EnrichProjectsPlugin.class); |
31 | 21 |
|
32 | 22 |
@Value("${plugin.enrich.projects.openaire.url}") |
33 | 23 |
private String baseUrl; |
34 | 24 |
|
25 |
private Map<String, Project> mapProjects = new HashMap<>(); |
|
26 |
|
|
35 | 27 |
@Override |
36 |
public void process(final MongoMDStore store, final Map<String, String> params) throws MDStoreServiceException {
|
|
37 |
log.info("*****************************************");
|
|
38 |
log.info("* ENRICH PROJECTS ...");
|
|
39 |
log.info("*****************************************");
|
|
28 |
protected void reconfigure() {
|
|
29 |
log.info("Cleaning projects cache");
|
|
30 |
mapProjects.clear();
|
|
31 |
}
|
|
40 | 32 |
|
41 |
final MongoCollection<DBObject> collPubs = store.getCollection(); |
|
33 |
@Override |
|
34 |
protected boolean updateRecord(final MdRecord record) { |
|
35 |
log.debug("Found " + record.getProjects().size() + " projects"); |
|
42 | 36 |
|
43 |
final Map<String, List<Project>> pubs = resolveProjects(collPubs);
|
|
37 |
if (record.getProjects().isEmpty()) { return false; }
|
|
44 | 38 |
|
45 |
// update the publications |
|
46 |
pubs.entrySet().forEach(e -> patchRecord(collPubs, e.getKey(), e.getValue())); |
|
39 |
final List<Project> list = record.getProjects() |
|
40 |
.stream() |
|
41 |
.map(Project::getInfoId) |
|
42 |
.filter(StringUtils::isNotBlank) |
|
43 |
.map(Project::newInstance) |
|
44 |
.map(this::findEnrichedProject) |
|
45 |
.collect(Collectors.toList()); |
|
47 | 46 |
|
48 |
touch(store); |
|
49 |
} |
|
47 |
log.debug("Processed " + list.size() + " projects"); |
|
50 | 48 |
|
51 |
private Map<String, List<Project>> resolveProjects(final MongoCollection<DBObject> collPubs) { |
|
52 |
final Map<String, List<Project>> pubs = new HashMap<>(); |
|
53 |
final Map<String, Project> mapProjects = new HashMap<>(); |
|
54 |
for (final DBObject obj : collPubs.find()) { |
|
55 |
final List<Project> projects = projectsFromPublication(obj, mapProjects); |
|
56 |
pubs.put(obj.get("id").toString(), projects); |
|
57 |
} |
|
58 |
return pubs; |
|
59 |
} |
|
49 |
if (list.isEmpty()) { return false; } |
|
60 | 50 |
|
61 |
private List<Project> projectsFromPublication(final DBObject obj, final Map<String, Project> mapProjects) { |
|
62 |
final List<Project> projects = new ArrayList<>(); |
|
63 |
try { |
|
64 |
final Document doc = (new SAXReader()).read(new StringReader(obj.get("body").toString())); |
|
65 |
for (final Object o : doc.selectNodes("//*[local-name()='project']")) { |
|
66 |
final Element p = (Element) o; |
|
67 |
final String code = p.valueOf("./*[local-name() = 'code']").trim(); |
|
68 |
final String infoId = p.valueOf("./*[local-name() = 'infoId']").trim(); |
|
51 |
record.getProjects().clear(); |
|
52 |
record.getProjects().addAll(list); |
|
69 | 53 |
|
70 |
if (StringUtils.isEmpty(code) && StringUtils.isNotEmpty(infoId)) { |
|
71 |
final Project simpleProject = Project.newInstance(infoId); |
|
72 |
if (simpleProject != null) { |
|
73 |
final String s = simpleProject.getFunder() + "/" + simpleProject.getProgram() + "/" + simpleProject.getCode(); |
|
74 |
if (!mapProjects.containsKey(s)) { |
|
75 |
final Project project = resolveProject(s); |
|
76 |
mapProjects.put(s, project != null ? project : simpleProject); |
|
77 |
} |
|
78 |
projects.add(mapProjects.get(s)); |
|
79 |
} |
|
80 |
} |
|
81 |
|
|
82 |
} |
|
83 |
} catch (final DocumentException e) { |
|
84 |
log.warn("Problem parsing a mdstore record"); |
|
85 |
} |
|
86 |
|
|
87 |
return projects; |
|
54 |
return true; |
|
88 | 55 |
} |
89 | 56 |
|
90 |
private void patchRecord(final MongoCollection<DBObject> collPubs, final String id, final List<Project> projects) { |
|
91 |
log.debug("Adding to " + id + " " + projects.size() + " project(s)"); |
|
92 |
|
|
93 |
try { |
|
94 |
final SAXReader reader = new SAXReader(); |
|
95 |
final DBObject obj = collPubs.find(new BasicDBObject("id", id)).first(); |
|
96 |
|
|
97 |
final Document doc = reader.read(new StringReader(obj.get("body").toString())); |
|
98 |
|
|
99 |
for (final Object o : doc.selectNodes("//*[local-name()='project']")) { |
|
100 |
final Element n = (Element) o; |
|
101 |
final String infoId = n.valueOf("./*[local-name()='infoId']"); |
|
102 |
|
|
103 |
projects.stream() |
|
104 |
.filter(p -> (p != null) && p.match(infoId)) |
|
105 |
.forEach(p -> { |
|
106 |
n.selectSingleNode("./*[local-name()='code']").setText(p.getCode()); |
|
107 |
n.selectSingleNode("./*[local-name()='name']").setText(p.getName()); |
|
108 |
n.selectSingleNode("./*[local-name()='acronym']").setText(p.getAcronym()); |
|
109 |
n.selectSingleNode("./*[local-name()='funder']").setText(p.getFunder()); |
|
110 |
n.selectSingleNode("./*[local-name()='program']").setText(p.getProgram()); |
|
111 |
n.selectSingleNode("./*[local-name()='jurisdiction']").setText(p.getJurisdiction()); |
|
112 |
n.selectSingleNode("./*[local-name()='openaireId']").setText(p.getOpenaireId()); |
|
113 |
}); |
|
114 |
} |
|
115 |
|
|
116 |
collPubs.updateOne(new BasicDBObject("id", id), new BasicDBObject("$set", new BasicDBObject("body", doc.asXML()))); |
|
117 |
} catch (final DocumentException e) { |
|
118 |
log.warn("Error patching record: " + id); |
|
57 |
private Project findEnrichedProject(final Project simpleProject) { |
|
58 |
final String s = simpleProject.getFunder() + "/" + simpleProject.getProgram() + "/" + simpleProject.getCode(); |
|
59 |
if (!mapProjects.containsKey(s)) { |
|
60 |
final Project project = resolveProject(s); |
|
61 |
mapProjects.put(s, project != null ? project : simpleProject); |
|
119 | 62 |
} |
63 |
return mapProjects.get(s); |
|
120 | 64 |
} |
121 | 65 |
|
122 | 66 |
private Project resolveProject(final String s) { |
... | ... | |
129 | 73 |
throw new RuntimeException("Error resolving project: " + s, e); |
130 | 74 |
} |
131 | 75 |
} |
76 |
|
|
132 | 77 |
} |
Also available in: Unified diff
refactoring using jaxb