Project

General

Profile

1 44133 michele.ar
package eu.dnetlib.data.mdstore.plugins;
2
3 49368 michele.ar
import java.io.StringReader;
4 49210 michele.ar
import java.net.URI;
5
import java.net.URISyntaxException;
6 55934 michele.ar
import java.util.ArrayList;
7
import java.util.Arrays;
8 55101 michele.ar
import java.util.HashMap;
9 49368 michele.ar
import java.util.List;
10 51852 michele.ar
import java.util.Map;
11 49380 michele.ar
import java.util.Set;
12 51020 michele.ar
import java.util.stream.Collectors;
13 44133 michele.ar
14 59332 michele.ar
import org.apache.commons.lang3.StringUtils;
15 49368 michele.ar
import org.apache.commons.logging.Log;
16
import org.apache.commons.logging.LogFactory;
17 49210 michele.ar
import org.dom4j.Document;
18 49368 michele.ar
import org.dom4j.DocumentException;
19 49380 michele.ar
import org.dom4j.Element;
20 49368 michele.ar
import org.dom4j.Node;
21
import org.dom4j.io.SAXReader;
22 60646 michele.ar
import org.springframework.beans.factory.annotation.Autowired;
23 49368 michele.ar
import org.springframework.beans.factory.annotation.Value;
24 44133 michele.ar
25 55934 michele.ar
import com.google.common.base.Splitter;
26
27 51020 michele.ar
import eu.dnetlib.data.mdstore.plugins.objects.MdRecord;
28 49416 michele.ar
import eu.dnetlib.data.mdstore.plugins.objects.MyURL;
29 51020 michele.ar
import eu.dnetlib.data.mdstore.plugins.objects.Project;
30 60646 michele.ar
import eu.dnetlib.data.utils.OpenAIREAuthenticationProvider;
31 49416 michele.ar
32 49291 michele.ar
public class EnrichOpenairePlugin extends GenericDoiMdstorePlugin {
33 49210 michele.ar
34 49368 michele.ar
	private static final Log log = LogFactory.getLog(EnrichOpenairePlugin.class);
35
36
	@Value("${plugin.enrich.publications.openaire.url}")
37
	private String baseUrl;
38
39 60102 michele.ar
	@Value("${plugin.enrich.openaire.datasources.blacklist}")
40
	private String datasourceBlackList;
41
42 60646 michele.ar
	@Autowired
43
	private OpenAIREAuthenticationProvider openAIREAuthenticationProvider;
44
45 55101 michele.ar
	private Map<String, Counter> counters = new HashMap<>();
46
47 49291 michele.ar
	@Override
48
	protected URI prepareURI(final String doi) throws URISyntaxException {
49 49368 michele.ar
		return new URI(String.format(baseUrl, doi));
50 49291 michele.ar
	}
51 44133 michele.ar
52
	@Override
53 55101 michele.ar
	protected void reconfigure(final Map<String, String> params) {
54
		counters.clear();
55
		counters.put("subjects", new Counter());
56
		counters.put("citations", new Counter());
57
		counters.put("urls", new Counter());
58
		counters.put("projects", new Counter());
59 59928 michele.ar
		counters.put("dois", new Counter());
60 60646 michele.ar
61
		setAccessToken(openAIREAuthenticationProvider.obtainAccessToken());
62 55101 michele.ar
	}
63 44133 michele.ar
64 51020 michele.ar
	@Override
65 55101 michele.ar
	protected void resetConfiguration() {
66
		log.info("***** Openaire Enrichment - subjects  : " + counters.get("subjects"));
67
		log.info("***** Openaire Enrichment - citations : " + counters.get("citations"));
68
		log.info("***** Openaire Enrichment - urls      : " + counters.get("urls"));
69
		log.info("***** Openaire Enrichment - projects  : " + counters.get("projects"));
70 59928 michele.ar
		log.info("***** Openaire Enrichment - dois      : " + counters.get("dois"));
71 55101 michele.ar
		counters.clear();
72 60646 michele.ar
73
		setAccessToken(null);
74 55101 michele.ar
	}
75 54968 michele.ar
76
	@Override
77 51020 michele.ar
	protected boolean updateDocument(final MdRecord doc, final String response) {
78 55101 michele.ar
		counters.get("subjects").incrementBefore(doc.getSubjects().size());
79
		counters.get("citations").incrementBefore(doc.getCitations().size());
80
		counters.get("urls").incrementBefore(doc.getUrls().size());
81
		counters.get("projects").incrementBefore(doc.getProjects().size());
82 59928 michele.ar
		counters.get("dois").incrementBefore(doc.getDois().size());
83 51020 michele.ar
84 49368 michele.ar
		try {
85
			final Document docRes = (new SAXReader()).read(new StringReader(response));
86 49210 michele.ar
87 49368 michele.ar
			final List<?> results = docRes.selectNodes("/response/results/result");
88 44133 michele.ar
89 49368 michele.ar
			if (results.size() == 1) {
90
				final Node n = (Node) results.get(0);
91 49380 michele.ar
				updateSubjects(doc, n);
92
				updateCitations(doc, n);
93
				updateUrls(doc, n);
94 49395 michele.ar
				updateProjects(doc, n);
95 59928 michele.ar
				updateDois(doc, n);
96 51020 michele.ar
				updateBestRights(doc);
97 55101 michele.ar
98 49368 michele.ar
				return true;
99
			} else if (results.size() == 1) {
100
				log.warn("Too many responses");
101
			}
102
		} catch (final DocumentException e) {
103
			log.warn("Invalid response", e);
104 55101 michele.ar
		} finally {
105
			counters.get("subjects").incrementAfter(doc.getSubjects().size());
106
			counters.get("citations").incrementAfter(doc.getCitations().size());
107
			counters.get("urls").incrementAfter(doc.getUrls().size());
108
			counters.get("projects").incrementAfter(doc.getProjects().size());
109 59928 michele.ar
			counters.get("dois").incrementAfter(doc.getDois().size());
110 49368 michele.ar
		}
111
112
		return false;
113 49210 michele.ar
	}
114 49380 michele.ar
115 51020 michele.ar
	private void updateSubjects(final MdRecord doc, final Node n) {
116 55934 michele.ar
		final Set<String> subjects = doc.getSubjects()
117
				.stream()
118
				.map(EnrichOpenairePlugin::cleanSubject)
119
				.flatMap(List::stream)
120
				.collect(Collectors.toSet());
121
122 49380 michele.ar
		for (final Object o : n.selectNodes(".//subject[@classid='keyword']")) {
123 55934 michele.ar
			subjects.addAll(cleanSubject(((Node) o).getText().trim()));
124 49380 michele.ar
		}
125 55934 michele.ar
126 51020 michele.ar
		doc.setSubjects(subjects);
127 49380 michele.ar
	}
128
129 55934 michele.ar
	public static List<String> cleanSubject(final String s) {
130
		if (s.isEmpty()) {
131
			return new ArrayList<>();
132
		} else if (s.startsWith("info:eu-repo/classification/msc/")) {
133
			return new ArrayList<>();
134
		} else if (s.startsWith("info:eu-repo/classification/acm/")) {
135
			return Arrays.asList(s.replaceFirst("info:eu-repo/classification/acm/", ""));
136
		} else if (s.contains(";")) {
137
			return Splitter.on(";").trimResults().omitEmptyStrings().splitToList(s);
138
		} else if (s.contains(",")) {
139
			return Splitter.on(",").trimResults().omitEmptyStrings().splitToList(s);
140
		} else {
141
			return Arrays.asList(s);
142
		}
143 49804 michele.ar
	}
144
145 51020 michele.ar
	private void updateCitations(final MdRecord doc, final Node n) {
146
		doc.getCitations().clear();
147 49380 michele.ar
148
		for (final Object o : n.selectNodes(".//citations/citation/rawText")) {
149 51020 michele.ar
			doc.getCitations().add(((Node) o).getText());
150 49380 michele.ar
		}
151
	}
152
153 51020 michele.ar
	private void updateUrls(final MdRecord doc, final Node n) {
154
		doc.getUrls().addAll(doc.getUrls());
155 49380 michele.ar
156 60102 michele.ar
		final Set<String> blacklist =
157
				Arrays.stream(datasourceBlackList.split(",")).map(String::trim).filter(StringUtils::isNotBlank).collect(Collectors.toSet());
158
159 49380 michele.ar
		for (final Object oin : n.selectNodes(".//instance")) {
160 60102 michele.ar
161
			final String hostedByid = ((Element) oin).valueOf("./hostedby/@id").trim();
162
163
			if (!blacklist.contains(hostedByid)) {
164
				final String hostedByName = ((Element) oin).valueOf("./hostedby/@name").trim();
165
				final String rights = ((Element) oin).valueOf("./accessright/@classname").trim();
166
167
				for (final Object ourl : ((Element) oin).selectNodes("./webresource/url")) {
168
					final MyURL u = new MyURL(((Node) ourl).getText().trim(), hostedByName, rights);
169
					doc.getUrls().remove(u);
170
					doc.getUrls().add(u);
171
				}
172 49380 michele.ar
			}
173
		}
174
	}
175 49395 michele.ar
176 51020 michele.ar
	private void updateProjects(final MdRecord doc, final Node n) {
177 49416 michele.ar
178 49395 michele.ar
		for (final Object op : n.selectNodes(".//rels/rel[./to/@type='project']")) {
179
			final Node p = (Node) op;
180 59332 michele.ar
			final String name = p.valueOf("./title").trim();
181 49395 michele.ar
182 59332 michele.ar
			if (StringUtils.isNotBlank(name) && !name.equalsIgnoreCase("null") && !name.equalsIgnoreCase("unidentified") && !name.equalsIgnoreCase("unknown")) {
183
				final Project np = new Project();
184
				np.setOpenaireId(p.valueOf("./to"));
185
				np.setCode(p.valueOf("./code"));
186
				np.setName(name);
187
				np.setAcronym(p.valueOf("./acronym"));
188
				np.setFunder(p.valueOf(".//funder/@shortname"));
189
				np.setProgram(p.valueOf(".//funding_level_0/@name"));
190
				np.setJurisdiction(p.valueOf(".//funder/@jurisdiction"));
191
				np.setInfoId(String.format("info:eu-repo/grantAgreement/%s/%s/%s/%s/%s/%s", np.getFunder(), np.getProgram(), np.getCode(), np.getJurisdiction(),
192
						np.getName(), np.getAcronym()));
193
194
				doc.getProjects().add(np);
195
			}
196 49395 michele.ar
		}
197 50059 michele.ar
	}
198 49395 michele.ar
199 59928 michele.ar
	private void updateDois(final MdRecord doc, final Node n) {
200 59937 michele.ar
		for (final Object od : n.selectNodes(".//*[local-name()='result']/pid[@classid='doi']")) {
201 59928 michele.ar
			final String doi = ((Node) od).getText().trim();
202
			if (StringUtils.isNotBlank(doi)) {
203
				doc.getDois().add(doi);
204
			}
205
		}
206
	}
207
208 51020 michele.ar
	private void updateBestRights(final MdRecord doc) {
209
		final Set<String> availables = doc.getUrls().stream().map(MyURL::getRights).map(String::toUpperCase).collect(Collectors.toSet());
210 50059 michele.ar
		if (availables.contains("OPEN ACCESS")) {
211 51020 michele.ar
			doc.setBestRights("Open Access");
212 50059 michele.ar
		} else if (availables.contains("EMBARGO")) {
213 51020 michele.ar
			doc.setBestRights("Embargo");
214 50059 michele.ar
		} else if (availables.contains("RESTRICTED")) {
215 51020 michele.ar
			doc.setBestRights("Restricted");
216 50059 michele.ar
		} else if (availables.contains("CLOSED ACCESS")) {
217 51020 michele.ar
			doc.setBestRights("Closed Access");
218 50059 michele.ar
		} else {
219 51020 michele.ar
			doc.setBestRights("Unknown");
220 50059 michele.ar
		}
221 49395 michele.ar
	}
222
223 44133 michele.ar
}