Revision 51020
Added by Michele Artini about 6 years ago
EnrichOpenairePlugin.java | ||
---|---|---|
3 | 3 |
import java.io.StringReader; |
4 | 4 |
import java.net.URI; |
5 | 5 |
import java.net.URISyntaxException; |
6 |
import java.util.HashSet; |
|
7 |
import java.util.LinkedHashSet; |
|
8 | 6 |
import java.util.List; |
9 | 7 |
import java.util.Set; |
8 |
import java.util.stream.Collectors; |
|
10 | 9 |
|
11 |
import org.apache.commons.lang3.StringUtils; |
|
12 | 10 |
import org.apache.commons.logging.Log; |
13 | 11 |
import org.apache.commons.logging.LogFactory; |
14 | 12 |
import org.dom4j.Document; |
15 | 13 |
import org.dom4j.DocumentException; |
16 | 14 |
import org.dom4j.Element; |
17 | 15 |
import org.dom4j.Node; |
18 |
import org.dom4j.QName; |
|
19 | 16 |
import org.dom4j.io.SAXReader; |
20 | 17 |
import org.springframework.beans.factory.annotation.Value; |
21 | 18 |
|
19 |
import eu.dnetlib.data.mdstore.plugins.objects.MdRecord; |
|
22 | 20 |
import eu.dnetlib.data.mdstore.plugins.objects.MyURL; |
21 |
import eu.dnetlib.data.mdstore.plugins.objects.Project; |
|
23 | 22 |
|
24 | 23 |
public class EnrichOpenairePlugin extends GenericDoiMdstorePlugin { |
25 | 24 |
|
... | ... | |
34 | 33 |
} |
35 | 34 |
|
36 | 35 |
@Override |
37 |
protected boolean updateDocument(final Document doc, final String response) {
|
|
36 |
protected void reconfigure() {}
|
|
38 | 37 |
|
38 |
@Override |
|
39 |
protected boolean updateDocument(final MdRecord doc, final String response) { |
|
40 |
|
|
39 | 41 |
try { |
40 | 42 |
final Document docRes = (new SAXReader()).read(new StringReader(response)); |
41 | 43 |
|
... | ... | |
43 | 45 |
|
44 | 46 |
if (results.size() == 1) { |
45 | 47 |
final Node n = (Node) results.get(0); |
46 |
|
|
47 | 48 |
updateSubjects(doc, n); |
48 | 49 |
updateCitations(doc, n); |
49 | 50 |
updateUrls(doc, n); |
50 | 51 |
updateProjects(doc, n); |
51 |
|
|
52 |
updateBestLicence(doc); |
|
53 |
|
|
52 |
updateBestRights(doc); |
|
54 | 53 |
return true; |
55 | 54 |
} else if (results.size() == 1) { |
56 | 55 |
log.warn("Too many responses"); |
... | ... | |
63 | 62 |
return false; |
64 | 63 |
} |
65 | 64 |
|
66 |
private void updateSubjects(final Document doc, final Node n) { |
|
67 |
final Set<String> subjects = new LinkedHashSet<>(); |
|
68 |
|
|
69 |
final Element subjectsList = (Element) doc.selectSingleNode("//*[local-name() = 'subjects']"); |
|
70 |
for (final Object o : subjectsList.selectNodes("./*[local-name() = 'subject']")) { |
|
71 |
subjects.add(cleanSubject(((Node) o).getText())); |
|
72 |
((Node) o).detach(); |
|
73 |
} |
|
74 |
|
|
65 |
private void updateSubjects(final MdRecord doc, final Node n) { |
|
66 |
final Set<String> subjects = doc.getSubjects().stream().map(EnrichOpenairePlugin::cleanSubject).collect(Collectors.toSet()); |
|
75 | 67 |
for (final Object o : n.selectNodes(".//subject[@classid='keyword']")) { |
76 | 68 |
subjects.add(cleanSubject(((Node) o).getText().trim())); |
77 | 69 |
} |
78 |
|
|
79 |
subjects.stream() |
|
80 |
.filter(StringUtils::isNotBlank) |
|
81 |
.forEach(s -> subjectsList.addElement(new QName("subject", subjectsList.getNamespace())).setText(s)); |
|
70 |
doc.setSubjects(subjects); |
|
82 | 71 |
} |
83 | 72 |
|
84 | 73 |
public static String cleanSubject(final String s) { |
... | ... | |
86 | 75 |
return s.replaceFirst("info:eu-repo/classification/acm/", ""); |
87 | 76 |
} |
88 | 77 |
|
89 |
private void updateCitations(final Document doc, final Node n) {
|
|
90 |
final Element citations = (Element) doc.selectSingleNode("//*[local-name() = 'citations']");
|
|
78 |
private void updateCitations(final MdRecord doc, final Node n) {
|
|
79 |
doc.getCitations().clear();
|
|
91 | 80 |
|
92 | 81 |
for (final Object o : n.selectNodes(".//citations/citation/rawText")) { |
93 |
citations.addElement(new QName("citation", citations.getNamespace())).setText(((Node) o).getText());
|
|
82 |
doc.getCitations().add(((Node) o).getText());
|
|
94 | 83 |
} |
95 | 84 |
} |
96 | 85 |
|
97 |
private void updateUrls(final Document doc, final Node n) { |
|
98 |
final Element urlList = (Element) doc.selectSingleNode("//*[local-name() = 'alternateIdentifiers']"); |
|
99 |
final Set<MyURL> urls = new HashSet<>(); |
|
100 |
for (final Object o : urlList.selectNodes("./*[local-name() = 'alternateIdentifier' and @alternateIdentifierType='url']")) { |
|
101 |
final String url = ((Node) o).getText().trim(); |
|
102 |
final String licence = StringUtils.defaultString(((Node) o).valueOf("@licence"), "Unknown"); |
|
103 |
final String hostedBy = StringUtils.defaultString(((Node) o).valueOf("@hostedBy"), "Unknown"); |
|
104 |
final MyURL u = new MyURL(url, hostedBy, licence); |
|
105 |
urls.remove(u); |
|
106 |
urls.add(u); |
|
107 |
((Node) o).detach(); |
|
108 |
} |
|
86 |
private void updateUrls(final MdRecord doc, final Node n) { |
|
87 |
doc.getUrls().addAll(doc.getUrls()); |
|
109 | 88 |
|
110 | 89 |
for (final Object oin : n.selectNodes(".//instance")) { |
111 |
final String licence = ((Element) oin).valueOf("./accessright/@classname").trim();
|
|
90 |
final String rights = ((Element) oin).valueOf("./accessright/@classname").trim();
|
|
112 | 91 |
final String hostedBy = ((Element) oin).valueOf("./hostedby/@name").trim(); |
113 | 92 |
for (final Object ourl : ((Element) oin).selectNodes("./webresource/url")) { |
114 |
final MyURL u = new MyURL(((Node) ourl).getText().trim(), hostedBy, licence);
|
|
115 |
urls.remove(u);
|
|
116 |
urls.add(u);
|
|
93 |
final MyURL u = new MyURL(((Node) ourl).getText().trim(), hostedBy, rights);
|
|
94 |
doc.getUrls().remove(u);
|
|
95 |
doc.getUrls().add(u);
|
|
117 | 96 |
} |
118 | 97 |
} |
119 |
|
|
120 |
urls.stream() |
|
121 |
.sorted((o1, o2) -> { |
|
122 |
final String l1 = o1.getLicence(); |
|
123 |
final String l2 = o2.getLicence(); |
|
124 |
final int n1 = l1.equalsIgnoreCase("Open Access") ? 0 : l1.equalsIgnoreCase("Unknown") ? 2 : 1; |
|
125 |
final int n2 = l2.equalsIgnoreCase("Open Access") ? 0 : l2.equalsIgnoreCase("Unknown") ? 2 : 1; |
|
126 |
return Integer.compare(n1, n2); |
|
127 |
}) |
|
128 |
// .filter(u -> !u.getUrl().contains("puma.isti.cnr.it")) |
|
129 |
.forEach(u -> addUrl(urlList, u)); |
|
130 | 98 |
} |
131 | 99 |
|
132 |
private void addUrl(final Element urlList, final MyURL u) { |
|
133 |
final Element aid = urlList.addElement(new QName("alternateIdentifier", urlList.getNamespace())); |
|
134 |
aid.addAttribute("alternateIdentifierType", "url"); |
|
135 |
aid.addAttribute("licence", u.getLicence()); |
|
136 |
aid.addAttribute("hostedBy", u.getHostedBy()); |
|
137 |
aid.setText(u.getUrl()); |
|
138 |
} |
|
100 |
private void updateProjects(final MdRecord doc, final Node n) { |
|
139 | 101 |
|
140 |
private void updateProjects(final Document doc, final Node n) { |
|
141 |
final Element projectsList = (Element) doc.selectSingleNode("//*[local-name() = 'projects']"); |
|
142 |
|
|
143 | 102 |
for (final Object op : n.selectNodes(".//rels/rel[./to/@type='project']")) { |
144 | 103 |
final Node p = (Node) op; |
145 |
final String infoId = p.valueOf("./to"); |
|
146 |
final String code = p.valueOf("./code"); |
|
147 |
final String name = p.valueOf("./title"); |
|
148 |
final String acronym = p.valueOf("./acronym"); |
|
149 |
final String funder = p.valueOf(".//funder/@shortname"); |
|
150 |
final String program = p.valueOf(".//funding_level_0/@name"); |
|
151 |
final String jurisdiction = p.valueOf(".//funder/@jurisdiction"); |
|
104 |
final Project np = new Project(); |
|
105 |
np.setOpenaireId(p.valueOf("./to")); |
|
106 |
np.setCode(p.valueOf("./code")); |
|
107 |
np.setName(p.valueOf("./title")); |
|
108 |
np.setAcronym(p.valueOf("./acronym")); |
|
109 |
np.setFunder(p.valueOf(".//funder/@shortname")); |
|
110 |
np.setProgram(p.valueOf(".//funding_level_0/@name")); |
|
111 |
np.setJurisdiction(p.valueOf(".//funder/@jurisdiction")); |
|
112 |
np.setInfoId(String.format("info:eu-repo/grantAgreement/%s/%s/%s/%s/%s/%s", np.getFunder(), np.getProgram(), np.getCode(), np.getJurisdiction(), |
|
113 |
np.getName(), np.getAcronym())); |
|
152 | 114 |
|
153 |
if (StringUtils.isNoneBlank(infoId, code, name, acronym, funder, program, jurisdiction) |
|
154 |
&& projectsList.selectNodes(".//*[local-name() = 'code' and ./text() = '" + code + "']").isEmpty()) { |
|
155 |
|
|
156 |
if (log.isDebugEnabled()) { |
|
157 |
log.debug("************************"); |
|
158 |
log.debug("Add missing project " + acronym); |
|
159 |
for (final Object o : projectsList.selectNodes(".//*[local-name() = 'infoId']")) { |
|
160 |
log.debug(" Existing project: " + ((Node) o).getText()); |
|
161 |
} |
|
162 |
log.debug("************************"); |
|
163 |
} |
|
164 |
|
|
165 |
final Element np = projectsList.addElement(new QName("project", projectsList.getNamespace())); |
|
166 |
np.addElement(new QName("infoId", np.getNamespace())) |
|
167 |
.setText(String.format("info:eu-repo/grantAgreement/%s/%s/%s/%s/%s/%s", funder, program, code, jurisdiction, name, acronym)); |
|
168 |
np.addElement(new QName("openaireId", np.getNamespace())).setText(infoId); |
|
169 |
np.addElement(new QName("code", np.getNamespace())).setText(code); |
|
170 |
np.addElement(new QName("name", np.getNamespace())).setText(name); |
|
171 |
np.addElement(new QName("acronym", np.getNamespace())).setText(acronym); |
|
172 |
np.addElement(new QName("funder", np.getNamespace())).setText(funder); |
|
173 |
np.addElement(new QName("program", np.getNamespace())).setText(program); |
|
174 |
np.addElement(new QName("jurisdiction", np.getNamespace())).setText(jurisdiction); |
|
175 |
} |
|
176 |
|
|
115 |
doc.getProjects().add(np); |
|
177 | 116 |
} |
178 | 117 |
} |
179 | 118 |
|
180 |
private void updateBestLicence(final Document doc) { |
|
181 |
final Set<String> availables = new HashSet<>(); |
|
182 |
for (final Object o : doc.selectNodes("//*[local-name()='alternateIdentifier']")) { |
|
183 |
availables.add(((Node) o).valueOf("@licence").trim().toUpperCase()); |
|
184 |
} |
|
119 |
private void updateBestRights(final MdRecord doc) { |
|
120 |
final Set<String> availables = doc.getUrls().stream().map(MyURL::getRights).map(String::toUpperCase).collect(Collectors.toSet()); |
|
185 | 121 |
if (availables.contains("OPEN ACCESS")) { |
186 |
doc.selectSingleNode("//*[local-name()='bestLicence']").setText("Open Access");
|
|
122 |
doc.setBestRights("Open Access");
|
|
187 | 123 |
} else if (availables.contains("EMBARGO")) { |
188 |
doc.selectSingleNode("//*[local-name()='bestLicence']").setText("Embargo");
|
|
124 |
doc.setBestRights("Embargo");
|
|
189 | 125 |
} else if (availables.contains("RESTRICTED")) { |
190 |
doc.selectSingleNode("//*[local-name()='bestLicence']").setText("Restricted");
|
|
126 |
doc.setBestRights("Restricted");
|
|
191 | 127 |
} else if (availables.contains("CLOSED ACCESS")) { |
192 |
doc.selectSingleNode("//*[local-name()='bestLicence']").setText("Closed Access");
|
|
128 |
doc.setBestRights("Closed Access");
|
|
193 | 129 |
} else { |
194 |
doc.selectSingleNode("//*[local-name()='bestLicence']").setText("Unknown");
|
|
130 |
doc.setBestRights("Unknown");
|
|
195 | 131 |
} |
196 | 132 |
} |
197 | 133 |
|
Also available in: Unified diff
refactoring using jaxb