Project

General

Profile

1
package eu.dnetlib.data.mdstore.plugins;
2

    
3
import java.io.StringReader;
4
import java.net.URI;
5
import java.net.URISyntaxException;
6
import java.util.ArrayList;
7
import java.util.Arrays;
8
import java.util.HashMap;
9
import java.util.List;
10
import java.util.Map;
11
import java.util.Set;
12
import java.util.stream.Collectors;
13

    
14
import org.apache.commons.lang3.StringUtils;
15
import org.apache.commons.logging.Log;
16
import org.apache.commons.logging.LogFactory;
17
import org.dom4j.Document;
18
import org.dom4j.DocumentException;
19
import org.dom4j.Element;
20
import org.dom4j.Node;
21
import org.dom4j.io.SAXReader;
22
import org.springframework.beans.factory.annotation.Autowired;
23
import org.springframework.beans.factory.annotation.Value;
24

    
25
import com.google.common.base.Splitter;
26
import com.mongodb.MongoClient;
27
import com.mongodb.client.MongoCollection;
28

    
29
import eu.dnetlib.data.mdstore.plugins.objects.MdRecord;
30
import eu.dnetlib.data.mdstore.plugins.objects.MyURL;
31
import eu.dnetlib.data.mdstore.plugins.objects.Project;
32
import eu.dnetlib.data.utils.XsltFunctions;
33

    
34
public class EnrichOpenairePlugin extends GenericDoiMdstorePlugin {
35

    
36
	private static final Log log = LogFactory.getLog(EnrichOpenairePlugin.class);
37

    
38
	@Value("${plugin.enrich.publications.openaire.url}")
39
	private String baseUrl;
40

    
41
	@Value("${plugin.enrich.openaire.datasources.blacklist}")
42
	private String datasourceBlackList;
43

    
44
	@Autowired
45
	private MongoClient mongoClient;
46

    
47
	private Map<String, Counter> counters = new HashMap<>();
48

    
49
	@Override
50
	protected URI prepareURI(final String doi) throws URISyntaxException {
51
		return new URI(String.format(baseUrl, doi));
52
	}
53

    
54
	@Override
55
	protected MongoCollection<org.bson.Document> getCacheCollection() {
56
		return mongoClient.getDatabase("API_CACHES").getCollection("OPENAIRE_API_CACHE");
57
	}
58

    
59
	@Override
60
	protected void reconfigure(final Map<String, String> params) {
61
		counters.clear();
62
		counters.put("subjects", new Counter());
63
		counters.put("citations", new Counter());
64
		counters.put("urls", new Counter());
65
		counters.put("projects", new Counter());
66
		counters.put("dois", new Counter());
67
	}
68

    
69
	@Override
70
	protected void resetConfiguration() {
71
		log.info("***** Openaire Enrichment - subjects  : " + counters.get("subjects"));
72
		log.info("***** Openaire Enrichment - citations : " + counters.get("citations"));
73
		log.info("***** Openaire Enrichment - urls      : " + counters.get("urls"));
74
		log.info("***** Openaire Enrichment - projects  : " + counters.get("projects"));
75
		log.info("***** Openaire Enrichment - dois      : " + counters.get("dois"));
76
		counters.clear();
77
	}
78

    
79
	@Override
80
	protected boolean updateDocument(final MdRecord doc, final String response) {
81
		counters.get("subjects").incrementBefore(doc.getSubjects().size());
82
		counters.get("citations").incrementBefore(doc.getCitations().size());
83
		counters.get("urls").incrementBefore(doc.getUrls().size());
84
		counters.get("projects").incrementBefore(doc.getProjects().size());
85
		counters.get("dois").incrementBefore(doc.getDois().size());
86

    
87
		try {
88
			final Document docRes = (new SAXReader()).read(new StringReader(response));
89

    
90
			final List<?> results = docRes.selectNodes("/response/results/result");
91

    
92
			if (results.size() == 1) {
93
				final Node n = (Node) results.get(0);
94
				updateSubjects(doc, n);
95
				updateCitations(doc, n);
96
				updateUrls(doc, n);
97
				updateProjects(doc, n);
98
				updateDois(doc, n);
99
				updateBestRights(doc);
100

    
101
				return true;
102
			} else if (results.size() == 1) {
103
				log.warn("Too many responses");
104
			}
105
		} catch (final DocumentException e) {
106
			log.warn("Invalid response", e);
107
		} finally {
108
			counters.get("subjects").incrementAfter(doc.getSubjects().size());
109
			counters.get("citations").incrementAfter(doc.getCitations().size());
110
			counters.get("urls").incrementAfter(doc.getUrls().size());
111
			counters.get("projects").incrementAfter(doc.getProjects().size());
112
			counters.get("dois").incrementAfter(doc.getDois().size());
113
		}
114

    
115
		return false;
116
	}
117

    
118
	private void updateSubjects(final MdRecord doc, final Node n) {
119
		final Set<String> subjects = doc.getSubjects()
120
				.stream()
121
				.map(EnrichOpenairePlugin::cleanSubject)
122
				.flatMap(List::stream)
123
				.collect(Collectors.toSet());
124

    
125
		for (final Object o : n.selectNodes(".//subject[@classid='keyword']")) {
126
			subjects.addAll(cleanSubject(((Node) o).getText().trim()));
127
		}
128

    
129
		doc.setSubjects(subjects);
130
	}
131

    
132
	public static List<String> cleanSubject(final String s) {
133
		if (s.isEmpty()) {
134
			return new ArrayList<>();
135
		} else if (s.startsWith("info:eu-repo/classification/msc/")) {
136
			return new ArrayList<>();
137
		} else if (s.startsWith("info:eu-repo/classification/acm/")) {
138
			return Arrays.asList(s.replaceFirst("info:eu-repo/classification/acm/", ""));
139
		} else if (s.contains(";")) {
140
			return Splitter.on(";").trimResults().omitEmptyStrings().splitToList(s);
141
		} else if (s.contains(",")) {
142
			return Splitter.on(",").trimResults().omitEmptyStrings().splitToList(s);
143
		} else {
144
			return Arrays.asList(s);
145
		}
146
	}
147

    
148
	private void updateCitations(final MdRecord doc, final Node n) {
149
		doc.getCitations().clear();
150

    
151
		for (final Object o : n.selectNodes(".//citations/citation/rawText")) {
152
			doc.getCitations().add(((Node) o).getText());
153
		}
154
	}
155

    
156
	private void updateUrls(final MdRecord doc, final Node n) {
157
		doc.getUrls().addAll(doc.getUrls());
158

    
159
		final Set<String> blacklist =
160
				Arrays.stream(datasourceBlackList.split(",")).map(String::trim).filter(StringUtils::isNotBlank).collect(Collectors.toSet());
161

    
162
		for (final Object oin : n.selectNodes(".//instance")) {
163

    
164
			final String hostedByid = ((Element) oin).valueOf("./hostedby/@id").trim();
165

    
166
			if (!blacklist.contains(hostedByid)) {
167
				final String hostedByName = ((Element) oin).valueOf("./hostedby/@name").trim();
168
				final String rights = ((Element) oin).valueOf("./accessright/@classname").trim();
169

    
170
				for (final Object ourl : ((Element) oin).selectNodes("./webresource/url")) {
171
					final MyURL u = new MyURL(((Node) ourl).getText().trim(), hostedByName, rights);
172
					doc.getUrls().remove(u);
173
					doc.getUrls().add(u);
174
				}
175
			}
176
		}
177
	}
178

    
179
	private void updateProjects(final MdRecord doc, final Node n) {
180

    
181
		for (final Object op : n.selectNodes(".//rels/rel[./to/@type='project']")) {
182
			final Node p = (Node) op;
183
			final String name = p.valueOf("./title").trim();
184

    
185
			if (StringUtils.isNotBlank(name) && !name.equalsIgnoreCase("null") && !name.equalsIgnoreCase("unidentified") && !name.equalsIgnoreCase("unknown")) {
186
				final Project np = new Project();
187
				np.setOpenaireId(p.valueOf("./to"));
188
				np.setCode(p.valueOf("./code"));
189
				np.setName(name);
190
				np.setAcronym(p.valueOf("./acronym"));
191
				np.setFunder(p.valueOf(".//funder/@shortname"));
192
				np.setProgram(p.valueOf(".//funding_level_0/@name"));
193
				np.setJurisdiction(p.valueOf(".//funder/@jurisdiction"));
194
				np.setInfoId(XsltFunctions.projectLongId(np.getFunder(), np.getProgram(), np.getCode(), np.getJurisdiction(),
195
						np.getName(), np.getAcronym()));
196

    
197
				doc.getProjects().add(np);
198
			}
199
		}
200
	}
201

    
202
	private void updateDois(final MdRecord doc, final Node n) {
203
		for (final Object od : n.selectNodes(".//*[local-name()='result']/pid[@classid='doi']")) {
204
			final String doi = ((Node) od).getText().trim();
205
			if (StringUtils.isNotBlank(doi)) {
206
				doc.getDois().add(doi);
207
			}
208
		}
209
	}
210

    
211
	private void updateBestRights(final MdRecord doc) {
212
		final Set<String> availables = doc.getUrls().stream().map(MyURL::getRights).map(String::toUpperCase).collect(Collectors.toSet());
213
		if (availables.contains("OPEN ACCESS")) {
214
			doc.setBestRights("Open Access");
215
		} else if (availables.contains("EMBARGO")) {
216
			doc.setBestRights("Embargo");
217
		} else if (availables.contains("RESTRICTED")) {
218
			doc.setBestRights("Restricted");
219
		} else if (availables.contains("CLOSED ACCESS")) {
220
			doc.setBestRights("Closed Access");
221
		} else {
222
			doc.setBestRights("Unknown");
223
		}
224
	}
225

    
226
}
(7-7/11)