Project

General

Profile

1
package eu.dnetlib.data.mdstore.plugins;
2

    
3
import java.io.StringReader;
4
import java.io.StringWriter;
5
import java.net.URISyntaxException;
6
import java.util.ArrayList;
7
import java.util.HashMap;
8
import java.util.List;
9
import java.util.Map;
10
import java.util.Objects;
11
import java.util.regex.Matcher;
12
import java.util.regex.Pattern;
13
import java.util.stream.Collectors;
14

    
15
import org.apache.commons.lang3.StringUtils;
16
import org.apache.commons.logging.Log;
17
import org.apache.commons.logging.LogFactory;
18
import org.dom4j.Document;
19
import org.dom4j.DocumentException;
20
import org.dom4j.Node;
21
import org.dom4j.io.SAXReader;
22
import org.springframework.beans.factory.annotation.Autowired;
23

    
24
import eu.dnetlib.data.mdstore.plugins.objects.CnrCollection;
25
import eu.dnetlib.data.mdstore.plugins.objects.MdRecord;
26
import eu.dnetlib.data.utils.HttpFetcher;
27
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
28
import eu.dnetlib.rmi.enabling.ISLookUpService;
29

    
30
public class EnrichCollectionPlugin extends MdRecordPlugin {
31

    
32
	private Map<String, CnrCollection> colls = new HashMap<>();
33

    
34
	private static final Log log = LogFactory.getLog(EnrichCollectionPlugin.class);
35

    
36
	@Autowired
37
	private UniqueServiceLocator serviceLocator;
38

    
39
	@Override
40
	protected void reconfigure(final Map<String, String> params) {
41

    
42
		params.entrySet().forEach(e -> log.debug(String.format("******************* %s -> %s", e.getKey(), e.getValue())));
43

    
44
		try {
45
			final String profile = serviceLocator.getService(ISLookUpService.class).getResourceProfile(params.get("dsId"));
46
			final Document doc = new SAXReader().read(new StringReader(profile));
47
			final String baseUrl = doc.valueOf("//INTERFACE[@id='" + params.get("dsInterface") + "']/BASE_URL").trim();
48

    
49
			colls.clear();
50
			colls.putAll(listOaiCollections(baseUrl, null).stream()
51
					.map(this::createCollection)
52
					.filter(Objects::nonNull)
53
					.distinct()
54
					.collect(Collectors.toMap(CnrCollection::getCode, o -> o)));
55

    
56
		} catch (final Throwable e) {
57
			log.error("Error evaluating ListSets", e);
58
			throw new RuntimeException("Error evaluating ListSets", e);
59
		}
60

    
61
	}
62

    
63
	@Override
64
	protected boolean updateRecord(final String recordId, final MdRecord record) {
65
		log.debug("Updating record: " + recordId);
66
		for (final CnrCollection c : record.getInCollections()) {
67
			if (colls.containsKey(c.getCode())) {
68
				c.setName(colls.get(c.getCode()).getName());
69
				c.setAcronym(colls.get(c.getCode()).getAcronym());
70
			}
71
		}
72
		log.debug("Record updated: " + recordId);
73

    
74
		return true;
75
	}
76

    
77
	@SuppressWarnings("unchecked")
78
	private List<Node> listOaiCollections(final String baseUrl, final String token) {
79
		final SAXReader reader = new SAXReader();
80
		try {
81

    
82
			final StringWriter listSetsUrl = new StringWriter();
83
			listSetsUrl.append(baseUrl);
84
			listSetsUrl.append("?verb=ListSets");
85
			if (token != null) {
86
				listSetsUrl.append("&resumptionToken=");
87
				listSetsUrl.append(token);
88

    
89
			}
90
			log.info("ListSets: " + listSetsUrl);
91
			final String s = HttpFetcher.fetch(listSetsUrl.toString());
92
			final Document doc = reader.read(new StringReader(s));
93

    
94
			final List<Node> list = doc.selectNodes("//*[local-name() = 'ListSets']/*[local-name() = 'set']");
95

    
96
			final String nextToken = doc.valueOf("//*[local-name() = 'resumptionToken']").trim();
97
			if (StringUtils.isNotBlank(nextToken)) {
98
				list.addAll(listOaiCollections(baseUrl, nextToken));
99
			}
100
			return list;
101

    
102
		} catch (final DocumentException | URISyntaxException e) {
103
			log.error("Error listing sets from url: " + baseUrl, e);
104
			try {
105
				final Document doc = reader.read(getClass().getResourceAsStream("ListSets.xml"));
106
				return doc.selectNodes("//*[local-name() = 'ListSets']/*[local-name() = 'set']");
107
			} catch (final DocumentException e1) {
108
				log.error("Error listing sets from local file", e1);
109
				return new ArrayList<>();
110
			}
111
		}
112
	}
113

    
114
	private CnrCollection createCollection(final Node n) {
115

    
116
		final CnrCollection c = new CnrCollection();
117
		final String code = n.valueOf("./*[local-name() = 'setSpec']").trim();
118

    
119
		c.setCode(n.valueOf("./*[local-name() = 'setSpec']").trim());
120

    
121
		if (code.equalsIgnoreCase("openaire")) {
122
			c.setAcronym("openaire");
123
			c.setName("Openaire Collection");
124
		} else {
125
			final String desc = n.valueOf("./*[local-name() = 'setDescription']").trim();
126
			if (StringUtils.isNotBlank(desc)) {
127
				final Matcher m = Pattern.compile("^Prodotti della ricerca di (.+) \\- (.+)$").matcher(desc);
128
				if (m.matches()) {
129
					c.setAcronym(m.group(1));
130
					c.setName(m.group(2));
131
				} else {
132
					c.setAcronym(desc);
133
					c.setName(desc);
134
				}
135
			} else {
136
				final String name = n.valueOf("./*[local-name() = 'setName']").trim();
137
				c.setAcronym(name);
138
				c.setName(name);
139
			}
140

    
141
		}
142
		return c;
143
	}
144

    
145
	@Override
146
	protected void resetConfiguration() {
147
		colls.clear();
148
	}
149

    
150
}
(3-3/12)