Project

General

Profile

1
package eu.dnetlib.data.mdstore.plugins;
2

    
3
import java.io.StringReader;
4
import java.net.URI;
5
import java.net.URISyntaxException;
6
import java.util.ArrayList;
7
import java.util.HashMap;
8
import java.util.List;
9
import java.util.Map;
10

    
11
import org.apache.commons.lang3.StringUtils;
12
import org.apache.commons.logging.Log;
13
import org.apache.commons.logging.LogFactory;
14
import org.dom4j.Document;
15
import org.dom4j.DocumentException;
16
import org.dom4j.Element;
17
import org.dom4j.io.SAXReader;
18
import org.springframework.beans.factory.annotation.Value;
19

    
20
import com.mongodb.BasicDBObject;
21
import com.mongodb.DBObject;
22
import com.mongodb.client.MongoCollection;
23

    
24
import eu.dnetlib.data.mdstore.modular.mongodb.MongoMDStore;
25
import eu.dnetlib.data.mdstore.plugins.objects.Project;
26
import eu.dnetlib.rmi.data.MDStoreServiceException;
27

    
28
public class EnrichProjectsPlugin extends AbstractIstiMDStorePlugin {
29

    
30
	private static final Log log = LogFactory.getLog(EnrichProjectsPlugin.class);
31

    
32
	@Value("${plugin.enrich.projects.openaire.url}")
33
	private String baseUrl;
34

    
35
	@Override
36
	public void process(final MongoMDStore store, final Map<String, String> params) throws MDStoreServiceException {
37
		log.info("*****************************************");
38
		log.info("* ENRICH PROJECTS ...");
39
		log.info("*****************************************");
40

    
41
		final MongoCollection<DBObject> collPubs = store.getCollection();
42

    
43
		final Map<String, List<Project>> pubs = resolveProjects(collPubs);
44

    
45
		// update the publications
46
		pubs.entrySet().forEach(e -> patchRecord(collPubs, e.getKey(), e.getValue()));
47

    
48
		touch(store);
49
	}
50

    
51
	private Map<String, List<Project>> resolveProjects(final MongoCollection<DBObject> collPubs) {
52
		final Map<String, List<Project>> pubs = new HashMap<>();
53
		final Map<String, Project> mapProjects = new HashMap<>();
54
		for (final DBObject obj : collPubs.find()) {
55
			final List<Project> projects = projectsFromPublication(obj, mapProjects);
56
			pubs.put(obj.get("id").toString(), projects);
57
		}
58
		return pubs;
59
	}
60

    
61
	private List<Project> projectsFromPublication(final DBObject obj, final Map<String, Project> mapProjects) {
62
		final List<Project> projects = new ArrayList<>();
63
		try {
64
			final Document doc = (new SAXReader()).read(new StringReader(obj.get("body").toString()));
65
			for (final Object o : doc.selectNodes("//*[local-name()='project']")) {
66
				final Element p = (Element) o;
67
				final String code = p.valueOf("./*[local-name() = 'code']").trim();
68
				final String infoId = p.valueOf("./*[local-name() = 'infoId']").trim();
69

    
70
				if (StringUtils.isEmpty(code) && StringUtils.isNotEmpty(infoId)) {
71
					final Project simpleProject = Project.newInstance(infoId);
72
					if (simpleProject != null) {
73
						final String s = simpleProject.getFunder() + "/" + simpleProject.getProgram() + "/" + simpleProject.getCode();
74
						if (!mapProjects.containsKey(s)) {
75
							final Project project = resolveProject(s);
76
							mapProjects.put(s, project != null ? project : simpleProject);
77
						}
78
						projects.add(mapProjects.get(s));
79
					}
80
				}
81

    
82
			}
83
		} catch (final DocumentException e) {
84
			log.warn("Problem parsing a mdstore record");
85
		}
86

    
87
		return projects;
88
	}
89

    
90
	private void patchRecord(final MongoCollection<DBObject> collPubs, final String id, final List<Project> projects) {
91
		log.debug("Adding to " + id + " " + projects.size() + " project(s)");
92

    
93
		try {
94
			final SAXReader reader = new SAXReader();
95
			final DBObject obj = collPubs.find(new BasicDBObject("id", id)).first();
96

    
97
			final Document doc = reader.read(new StringReader(obj.get("body").toString()));
98

    
99
			for (final Object o : doc.selectNodes("//*[local-name()='project']")) {
100
				final Element n = (Element) o;
101
				final String infoId = n.valueOf("./*[local-name()='infoId']");
102

    
103
				projects.stream()
104
						.filter(p -> (p != null) && p.match(infoId))
105
						.forEach(p -> {
106
							n.selectSingleNode("./*[local-name()='code']").setText(p.getCode());
107
							n.selectSingleNode("./*[local-name()='name']").setText(p.getName());
108
							n.selectSingleNode("./*[local-name()='acronym']").setText(p.getAcronym());
109
							n.selectSingleNode("./*[local-name()='funder']").setText(p.getFunder());
110
							n.selectSingleNode("./*[local-name()='program']").setText(p.getProgram());
111
							n.selectSingleNode("./*[local-name()='jurisdiction']").setText(p.getJurisdiction());
112
							n.selectSingleNode("./*[local-name()='openaireId']").setText(p.getOpenaireId());
113
						});
114
			}
115

    
116
			collPubs.updateOne(new BasicDBObject("id", id), new BasicDBObject("$set", new BasicDBObject("body", doc.asXML())));
117
		} catch (final DocumentException e) {
118
			log.warn("Error patching record: " + id);
119
		}
120
	}
121

    
122
	private Project resolveProject(final String s) {
123
		try {
124
			final String[] arr = s.split("/");
125
			final String url = String.format(baseUrl, arr[0], arr[1], arr[2]);
126
			return Project.newInstance(new URI(url));
127
		} catch (final URISyntaxException e) {
128
			log.error("Error resolving project: " + s, e);
129
			throw new RuntimeException("Error resolving project: " + s, e);
130
		}
131
	}
132
}
(5-5/7)