Project

General

Profile

1
package eu.dnetlib.data.mdstore.plugins;
2

    
3
import java.io.StringReader;
4
import java.net.URI;
5
import java.net.URISyntaxException;
6
import java.util.ArrayList;
7
import java.util.Arrays;
8
import java.util.HashMap;
9
import java.util.List;
10
import java.util.Map;
11
import java.util.Set;
12
import java.util.stream.Collectors;
13

    
14
import org.apache.commons.lang3.StringUtils;
15
import org.apache.commons.logging.Log;
16
import org.apache.commons.logging.LogFactory;
17
import org.dom4j.Document;
18
import org.dom4j.DocumentException;
19
import org.dom4j.Element;
20
import org.dom4j.Node;
21
import org.dom4j.io.SAXReader;
22
import org.springframework.beans.factory.annotation.Value;
23

    
24
import com.google.common.base.Splitter;
25

    
26
import eu.dnetlib.data.mdstore.plugins.objects.MdRecord;
27
import eu.dnetlib.data.mdstore.plugins.objects.MyURL;
28
import eu.dnetlib.data.mdstore.plugins.objects.Project;
29

    
30
public class EnrichOpenairePlugin extends GenericDoiMdstorePlugin {
31

    
32
	private static final Log log = LogFactory.getLog(EnrichOpenairePlugin.class);
33

    
34
	@Value("${plugin.enrich.publications.openaire.url}")
35
	private String baseUrl;
36

    
37
	private Map<String, Counter> counters = new HashMap<>();
38

    
39
	@Override
40
	protected URI prepareURI(final String doi) throws URISyntaxException {
41
		return new URI(String.format(baseUrl, doi));
42
	}
43

    
44
	@Override
45
	protected void reconfigure(final Map<String, String> params) {
46
		counters.clear();
47
		counters.put("subjects", new Counter());
48
		counters.put("citations", new Counter());
49
		counters.put("urls", new Counter());
50
		counters.put("projects", new Counter());
51
		counters.put("dois", new Counter());
52
	}
53

    
54
	@Override
55
	protected void resetConfiguration() {
56
		log.info("***** Openaire Enrichment - subjects  : " + counters.get("subjects"));
57
		log.info("***** Openaire Enrichment - citations : " + counters.get("citations"));
58
		log.info("***** Openaire Enrichment - urls      : " + counters.get("urls"));
59
		log.info("***** Openaire Enrichment - projects  : " + counters.get("projects"));
60
		log.info("***** Openaire Enrichment - dois      : " + counters.get("dois"));
61
		counters.clear();
62
	}
63

    
64
	@Override
65
	protected boolean updateDocument(final MdRecord doc, final String response) {
66
		counters.get("subjects").incrementBefore(doc.getSubjects().size());
67
		counters.get("citations").incrementBefore(doc.getCitations().size());
68
		counters.get("urls").incrementBefore(doc.getUrls().size());
69
		counters.get("projects").incrementBefore(doc.getProjects().size());
70
		counters.get("dois").incrementBefore(doc.getDois().size());
71

    
72
		try {
73
			final Document docRes = (new SAXReader()).read(new StringReader(response));
74

    
75
			final List<?> results = docRes.selectNodes("/response/results/result");
76

    
77
			if (results.size() == 1) {
78
				final Node n = (Node) results.get(0);
79
				updateSubjects(doc, n);
80
				updateCitations(doc, n);
81
				updateUrls(doc, n);
82
				updateProjects(doc, n);
83
				updateDois(doc, n);
84
				updateBestRights(doc);
85

    
86
				return true;
87
			} else if (results.size() == 1) {
88
				log.warn("Too many responses");
89
			}
90
		} catch (final DocumentException e) {
91
			log.warn("Invalid response", e);
92
		} finally {
93
			counters.get("subjects").incrementAfter(doc.getSubjects().size());
94
			counters.get("citations").incrementAfter(doc.getCitations().size());
95
			counters.get("urls").incrementAfter(doc.getUrls().size());
96
			counters.get("projects").incrementAfter(doc.getProjects().size());
97
			counters.get("dois").incrementAfter(doc.getDois().size());
98
		}
99

    
100
		return false;
101
	}
102

    
103
	private void updateSubjects(final MdRecord doc, final Node n) {
104
		final Set<String> subjects = doc.getSubjects()
105
				.stream()
106
				.map(EnrichOpenairePlugin::cleanSubject)
107
				.flatMap(List::stream)
108
				.collect(Collectors.toSet());
109

    
110
		for (final Object o : n.selectNodes(".//subject[@classid='keyword']")) {
111
			subjects.addAll(cleanSubject(((Node) o).getText().trim()));
112
		}
113

    
114
		doc.setSubjects(subjects);
115
	}
116

    
117
	public static List<String> cleanSubject(final String s) {
118
		if (s.isEmpty()) {
119
			return new ArrayList<>();
120
		} else if (s.startsWith("info:eu-repo/classification/msc/")) {
121
			return new ArrayList<>();
122
		} else if (s.startsWith("info:eu-repo/classification/acm/")) {
123
			return Arrays.asList(s.replaceFirst("info:eu-repo/classification/acm/", ""));
124
		} else if (s.contains(";")) {
125
			return Splitter.on(";").trimResults().omitEmptyStrings().splitToList(s);
126
		} else if (s.contains(",")) {
127
			return Splitter.on(",").trimResults().omitEmptyStrings().splitToList(s);
128
		} else {
129
			return Arrays.asList(s);
130
		}
131
	}
132

    
133
	private void updateCitations(final MdRecord doc, final Node n) {
134
		doc.getCitations().clear();
135

    
136
		for (final Object o : n.selectNodes(".//citations/citation/rawText")) {
137
			doc.getCitations().add(((Node) o).getText());
138
		}
139
	}
140

    
141
	private void updateUrls(final MdRecord doc, final Node n) {
142
		doc.getUrls().addAll(doc.getUrls());
143

    
144
		for (final Object oin : n.selectNodes(".//instance")) {
145
			final String rights = ((Element) oin).valueOf("./accessright/@classname").trim();
146
			final String hostedBy = ((Element) oin).valueOf("./hostedby/@name").trim();
147
			for (final Object ourl : ((Element) oin).selectNodes("./webresource/url")) {
148
				final MyURL u = new MyURL(((Node) ourl).getText().trim(), hostedBy, rights);
149
				doc.getUrls().remove(u);
150
				doc.getUrls().add(u);
151
			}
152
		}
153
	}
154

    
155
	private void updateProjects(final MdRecord doc, final Node n) {
156

    
157
		for (final Object op : n.selectNodes(".//rels/rel[./to/@type='project']")) {
158
			final Node p = (Node) op;
159
			final String name = p.valueOf("./title").trim();
160

    
161
			if (StringUtils.isNotBlank(name) && !name.equalsIgnoreCase("null") && !name.equalsIgnoreCase("unidentified") && !name.equalsIgnoreCase("unknown")) {
162
				final Project np = new Project();
163
				np.setOpenaireId(p.valueOf("./to"));
164
				np.setCode(p.valueOf("./code"));
165
				np.setName(name);
166
				np.setAcronym(p.valueOf("./acronym"));
167
				np.setFunder(p.valueOf(".//funder/@shortname"));
168
				np.setProgram(p.valueOf(".//funding_level_0/@name"));
169
				np.setJurisdiction(p.valueOf(".//funder/@jurisdiction"));
170
				np.setInfoId(String.format("info:eu-repo/grantAgreement/%s/%s/%s/%s/%s/%s", np.getFunder(), np.getProgram(), np.getCode(), np.getJurisdiction(),
171
						np.getName(), np.getAcronym()));
172

    
173
				doc.getProjects().add(np);
174
			}
175
		}
176
	}
177

    
178
	private void updateDois(final MdRecord doc, final Node n) {
179
		for (final Object od : n.selectNodes(".//pid[@classid='doi']")) {
180
			final String doi = ((Node) od).getText().trim();
181
			if (StringUtils.isNotBlank(doi)) {
182
				doc.getDois().add(doi);
183
			}
184
		}
185
	}
186

    
187
	private void updateBestRights(final MdRecord doc) {
188
		final Set<String> availables = doc.getUrls().stream().map(MyURL::getRights).map(String::toUpperCase).collect(Collectors.toSet());
189
		if (availables.contains("OPEN ACCESS")) {
190
			doc.setBestRights("Open Access");
191
		} else if (availables.contains("EMBARGO")) {
192
			doc.setBestRights("Embargo");
193
		} else if (availables.contains("RESTRICTED")) {
194
			doc.setBestRights("Restricted");
195
		} else if (availables.contains("CLOSED ACCESS")) {
196
			doc.setBestRights("Closed Access");
197
		} else {
198
			doc.setBestRights("Unknown");
199
		}
200
	}
201

    
202
}
(7-7/11)