1
|
package eu.dnetlib.data.mdstore.plugins;
|
2
|
|
3
|
import java.io.StringReader;
|
4
|
import java.net.URI;
|
5
|
import java.net.URISyntaxException;
|
6
|
import java.util.List;
|
7
|
import java.util.Map;
|
8
|
import java.util.Set;
|
9
|
import java.util.stream.Collectors;
|
10
|
|
11
|
import org.apache.commons.logging.Log;
|
12
|
import org.apache.commons.logging.LogFactory;
|
13
|
import org.dom4j.Document;
|
14
|
import org.dom4j.DocumentException;
|
15
|
import org.dom4j.Element;
|
16
|
import org.dom4j.Node;
|
17
|
import org.dom4j.io.SAXReader;
|
18
|
import org.springframework.beans.factory.annotation.Value;
|
19
|
|
20
|
import eu.dnetlib.data.mdstore.plugins.objects.MdRecord;
|
21
|
import eu.dnetlib.data.mdstore.plugins.objects.MyURL;
|
22
|
import eu.dnetlib.data.mdstore.plugins.objects.Project;
|
23
|
|
24
|
public class EnrichOpenairePlugin extends GenericDoiMdstorePlugin {
|
25
|
|
26
|
private static final Log log = LogFactory.getLog(EnrichOpenairePlugin.class);
|
27
|
|
28
|
@Value("${plugin.enrich.publications.openaire.url}")
|
29
|
private String baseUrl;
|
30
|
|
31
|
@Override
|
32
|
protected URI prepareURI(final String doi) throws URISyntaxException {
|
33
|
return new URI(String.format(baseUrl, doi));
|
34
|
}
|
35
|
|
36
|
@Override
|
37
|
protected void reconfigure(final Map<String, String> params) {}
|
38
|
|
39
|
@Override
|
40
|
protected boolean updateDocument(final MdRecord doc, final String response) {
|
41
|
|
42
|
try {
|
43
|
final Document docRes = (new SAXReader()).read(new StringReader(response));
|
44
|
|
45
|
final List<?> results = docRes.selectNodes("/response/results/result");
|
46
|
|
47
|
if (results.size() == 1) {
|
48
|
final Node n = (Node) results.get(0);
|
49
|
updateSubjects(doc, n);
|
50
|
updateCitations(doc, n);
|
51
|
updateUrls(doc, n);
|
52
|
updateProjects(doc, n);
|
53
|
updateBestRights(doc);
|
54
|
return true;
|
55
|
} else if (results.size() == 1) {
|
56
|
log.warn("Too many responses");
|
57
|
}
|
58
|
|
59
|
} catch (final DocumentException e) {
|
60
|
log.warn("Invalid response", e);
|
61
|
}
|
62
|
|
63
|
return false;
|
64
|
}
|
65
|
|
66
|
private void updateSubjects(final MdRecord doc, final Node n) {
|
67
|
final Set<String> subjects = doc.getSubjects().stream().map(EnrichOpenairePlugin::cleanSubject).collect(Collectors.toSet());
|
68
|
for (final Object o : n.selectNodes(".//subject[@classid='keyword']")) {
|
69
|
subjects.add(cleanSubject(((Node) o).getText().trim()));
|
70
|
}
|
71
|
doc.setSubjects(subjects);
|
72
|
}
|
73
|
|
74
|
public static String cleanSubject(final String s) {
|
75
|
if (s.startsWith("info:eu-repo/classification/msc/")) { return ""; }
|
76
|
return s.replaceFirst("info:eu-repo/classification/acm/", "");
|
77
|
}
|
78
|
|
79
|
private void updateCitations(final MdRecord doc, final Node n) {
|
80
|
doc.getCitations().clear();
|
81
|
|
82
|
for (final Object o : n.selectNodes(".//citations/citation/rawText")) {
|
83
|
doc.getCitations().add(((Node) o).getText());
|
84
|
}
|
85
|
}
|
86
|
|
87
|
private void updateUrls(final MdRecord doc, final Node n) {
|
88
|
doc.getUrls().addAll(doc.getUrls());
|
89
|
|
90
|
for (final Object oin : n.selectNodes(".//instance")) {
|
91
|
final String rights = ((Element) oin).valueOf("./accessright/@classname").trim();
|
92
|
final String hostedBy = ((Element) oin).valueOf("./hostedby/@name").trim();
|
93
|
for (final Object ourl : ((Element) oin).selectNodes("./webresource/url")) {
|
94
|
final MyURL u = new MyURL(((Node) ourl).getText().trim(), hostedBy, rights);
|
95
|
doc.getUrls().remove(u);
|
96
|
doc.getUrls().add(u);
|
97
|
}
|
98
|
}
|
99
|
}
|
100
|
|
101
|
private void updateProjects(final MdRecord doc, final Node n) {
|
102
|
|
103
|
for (final Object op : n.selectNodes(".//rels/rel[./to/@type='project']")) {
|
104
|
final Node p = (Node) op;
|
105
|
final Project np = new Project();
|
106
|
np.setOpenaireId(p.valueOf("./to"));
|
107
|
np.setCode(p.valueOf("./code"));
|
108
|
np.setName(p.valueOf("./title"));
|
109
|
np.setAcronym(p.valueOf("./acronym"));
|
110
|
np.setFunder(p.valueOf(".//funder/@shortname"));
|
111
|
np.setProgram(p.valueOf(".//funding_level_0/@name"));
|
112
|
np.setJurisdiction(p.valueOf(".//funder/@jurisdiction"));
|
113
|
np.setInfoId(String.format("info:eu-repo/grantAgreement/%s/%s/%s/%s/%s/%s", np.getFunder(), np.getProgram(), np.getCode(), np.getJurisdiction(),
|
114
|
np.getName(), np.getAcronym()));
|
115
|
|
116
|
doc.getProjects().add(np);
|
117
|
}
|
118
|
}
|
119
|
|
120
|
private void updateBestRights(final MdRecord doc) {
|
121
|
final Set<String> availables = doc.getUrls().stream().map(MyURL::getRights).map(String::toUpperCase).collect(Collectors.toSet());
|
122
|
if (availables.contains("OPEN ACCESS")) {
|
123
|
doc.setBestRights("Open Access");
|
124
|
} else if (availables.contains("EMBARGO")) {
|
125
|
doc.setBestRights("Embargo");
|
126
|
} else if (availables.contains("RESTRICTED")) {
|
127
|
doc.setBestRights("Restricted");
|
128
|
} else if (availables.contains("CLOSED ACCESS")) {
|
129
|
doc.setBestRights("Closed Access");
|
130
|
} else {
|
131
|
doc.setBestRights("Unknown");
|
132
|
}
|
133
|
}
|
134
|
|
135
|
}
|