1
|
package eu.dnetlib.data.mdstore.plugins;
|
2
|
|
3
|
import java.io.StringReader;
|
4
|
import java.net.URI;
|
5
|
import java.net.URISyntaxException;
|
6
|
import java.util.LinkedHashMap;
|
7
|
import java.util.LinkedHashSet;
|
8
|
import java.util.List;
|
9
|
import java.util.Map;
|
10
|
import java.util.Map.Entry;
|
11
|
import java.util.Set;
|
12
|
|
13
|
import org.apache.commons.lang3.StringUtils;
|
14
|
import org.apache.commons.logging.Log;
|
15
|
import org.apache.commons.logging.LogFactory;
|
16
|
import org.dom4j.Document;
|
17
|
import org.dom4j.DocumentException;
|
18
|
import org.dom4j.Element;
|
19
|
import org.dom4j.Node;
|
20
|
import org.dom4j.QName;
|
21
|
import org.dom4j.io.SAXReader;
|
22
|
import org.springframework.beans.factory.annotation.Value;
|
23
|
|
24
|
public class EnrichOpenairePlugin extends GenericDoiMdstorePlugin {
|
25
|
|
26
|
private static final Log log = LogFactory.getLog(EnrichOpenairePlugin.class);
|
27
|
|
28
|
@Value("${plugin.enrich.publications.openaire.url}")
|
29
|
private String baseUrl;
|
30
|
|
31
|
@Override
|
32
|
protected URI prepareURI(final String doi) throws URISyntaxException {
|
33
|
return new URI(String.format(baseUrl, doi));
|
34
|
}
|
35
|
|
36
|
@Override
|
37
|
protected boolean updateDocument(final Document doc, final String response) {
|
38
|
|
39
|
try {
|
40
|
final Document docRes = (new SAXReader()).read(new StringReader(response));
|
41
|
|
42
|
final List<?> results = docRes.selectNodes("/response/results/result");
|
43
|
|
44
|
if (results.size() == 1) {
|
45
|
final Node n = (Node) results.get(0);
|
46
|
updateRights(doc, n);
|
47
|
updateSubjects(doc, n);
|
48
|
updateCitations(doc, n);
|
49
|
updateUrls(doc, n);
|
50
|
return true;
|
51
|
} else if (results.size() == 1) {
|
52
|
log.warn("Too many responses");
|
53
|
}
|
54
|
|
55
|
} catch (final DocumentException e) {
|
56
|
log.warn("Invalid response", e);
|
57
|
}
|
58
|
|
59
|
return false;
|
60
|
|
61
|
}
|
62
|
|
63
|
private void updateRights(final Document doc, final Node n) {
|
64
|
final Element rightsList = (Element) doc.selectSingleNode("//*[local-name() = 'rightsList']");
|
65
|
final Set<String> rights = new LinkedHashSet<>();
|
66
|
for (final Object o : rightsList.selectNodes("./*[local-name() = 'rights']")) {
|
67
|
rights.add(((Node) o).getText());
|
68
|
((Node) o).detach();
|
69
|
}
|
70
|
|
71
|
final String bestLicence = n.valueOf("./bestlicense/@classname");
|
72
|
if (StringUtils.isNotBlank(bestLicence)) {
|
73
|
rights.add(bestLicence);
|
74
|
}
|
75
|
|
76
|
for (final Object o : n.selectNodes(".//instance")) {
|
77
|
rights.add(((Node) o).valueOf("./licence/@classname").trim());
|
78
|
}
|
79
|
|
80
|
for (final String r : rights) {
|
81
|
rightsList.addElement(new QName("rights", rightsList.getNamespace())).setText(r);
|
82
|
}
|
83
|
}
|
84
|
|
85
|
private void updateSubjects(final Document doc, final Node n) {
|
86
|
final Element subjectsList = (Element) doc.selectSingleNode("//*[local-name() = 'subjects']");
|
87
|
final Set<String> subjects = new LinkedHashSet<>();
|
88
|
for (final Object o : subjectsList.selectNodes("./*[local-name() = 'subject']")) {
|
89
|
subjects.add(((Node) o).getText());
|
90
|
((Node) o).detach();
|
91
|
}
|
92
|
|
93
|
for (final Object o : n.selectNodes(".//subject[@classid='keyword']")) {
|
94
|
subjects.add(((Node) o).getText().trim());
|
95
|
}
|
96
|
|
97
|
for (final String r : subjects) {
|
98
|
subjectsList.addElement(new QName("subject", subjectsList.getNamespace())).setText(r);
|
99
|
}
|
100
|
}
|
101
|
|
102
|
private void updateCitations(final Document doc, final Node n) {
|
103
|
final Element citations = (Element) doc.selectSingleNode("//*[local-name() = 'citations']");
|
104
|
|
105
|
for (final Object o : n.selectNodes(".//citations/citation/rawText")) {
|
106
|
citations.addElement(new QName("citation", citations.getNamespace())).setText(((Node) o).getText());
|
107
|
}
|
108
|
}
|
109
|
|
110
|
private void updateUrls(final Document doc, final Node n) {
|
111
|
final Element urlList = (Element) doc.selectSingleNode("//*[local-name() = 'alternateIdentifiers']");
|
112
|
final Map<String, String> urls = new LinkedHashMap<>();
|
113
|
for (final Object o : urlList.selectNodes("./*[local-name() = 'alternateIdentifier' and @alternateIdentifierType='url']")) {
|
114
|
urls.put(((Node) o).getText().trim(), "unknown");
|
115
|
((Node) o).detach();
|
116
|
}
|
117
|
|
118
|
for (final Object oin : n.selectNodes(".//instance")) {
|
119
|
final String licence = ((Element) oin).valueOf("./licence/@classname").trim();
|
120
|
for (final Object ourl : ((Element) oin).selectNodes("./webresource/url")) {
|
121
|
urls.put(((Node) ourl).getText().trim(), licence);
|
122
|
}
|
123
|
}
|
124
|
|
125
|
for (final Entry<String, String> e : urls.entrySet()) {
|
126
|
final Element aid = urlList.addElement(new QName("alternateIdentifier", urlList.getNamespace()));
|
127
|
aid.addAttribute("alternateIdentifierType", "url");
|
128
|
aid.addAttribute("licence", e.getValue());
|
129
|
aid.setText(e.getKey());
|
130
|
}
|
131
|
}
|
132
|
}
|