Project

General

Profile

1
package eu.dnetlib.openaire.blacklist;
2

    
3
import java.io.StringReader;
4
import java.util.List;
5

    
6
import org.apache.commons.lang.StringUtils;
7
import org.apache.commons.logging.Log;
8
import org.apache.commons.logging.LogFactory;
9
import org.apache.solr.client.solrj.SolrQuery;
10
import org.apache.solr.client.solrj.impl.CloudSolrServer;
11
import org.apache.solr.client.solrj.response.QueryResponse;
12
import org.apache.solr.common.SolrDocument;
13
import org.apache.solr.common.SolrDocumentList;
14
import org.dom4j.Attribute;
15
import org.dom4j.Document;
16
import org.dom4j.DocumentException;
17
import org.dom4j.io.SAXReader;
18
import org.springframework.beans.factory.annotation.Autowired;
19

    
20
import com.google.common.base.Function;
21
import com.google.common.collect.Iterables;
22
import com.google.common.collect.Lists;
23

    
24
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
25
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
26
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
27

    
28
/**
29
 * The goal of this task is to return the original identifiers of objects merged in a representative object by deduplication. Created by
30
 * alessia on 09/02/16.
31
 */
32
public class OpenaireIdResolver {
33

    
34
	public final static String SOLR_COLLECTION_POSTFIX = "-index-openaire";
35
	private static final Log log = LogFactory.getLog(OpenaireIdResolver.class);
36
	private final static String RESULT_FIELD = "__result";
37
	private final static String XPATH_TO_MERGED = "//*[local-name()='entity']/*//children/result/@objidentifier";
38
	private final SAXReader saxReader = new SAXReader();
39
	@Autowired
40
	private UniqueServiceLocator serviceLocator;
41

    
42
	public List<String> resolveIdentifier(final String id) {
43
		if (StringUtils.isBlank(id)) return Lists.newArrayList();
44
		else {
45
			return findOriginalIds(id);
46
		}
47
	}
48

    
49
	protected List<String> findOriginalIds(final String id) {
50
		CloudSolrServer solrCore = null;
51
		final SolrQuery q = new SolrQuery("objidentifier:\"" + id + "\"");
52
		QueryResponse response = null;
53
		try {
54
			solrCore = new CloudSolrServer(getIndexEndpoint());
55
			solrCore.setDefaultCollection(getPublicIndexCollection() + SOLR_COLLECTION_POSTFIX);
56
			response = solrCore.query(q);
57
			final SolrDocumentList results = response.getResults();
58
			if (results.isEmpty()) {
59
				log.debug("Query " + q + " returned 0 documents");
60
				return Lists.newArrayList();
61
			}
62
			// my results contain the document with the given identifier
63
			final SolrDocument solrDoc = results.get(0);
64
			return extractMergedIdentifiers(solrDoc);
65
		} catch (final Exception e) {
66
			log.error("Can't get original ids for " + id + "\n ", e);
67
			throw new RuntimeException("Can't get original ids for " + id + "\n " + e);
68
		} finally {
69
			solrCore.shutdown();
70
		}
71
	}
72

    
73
	@SuppressWarnings("unchecked")
74
	protected List<String> extractMergedIdentifiers(final SolrDocument doc) throws DocumentException {
75
		final String xmlRecord = (String) doc.getFirstValue(RESULT_FIELD);
76
		final Document xmlDoc = this.saxReader.read(new StringReader(xmlRecord));
77
		return Lists.newArrayList(Iterables.transform(xmlDoc.selectNodes(XPATH_TO_MERGED), new Function<Attribute, String>() {
78

    
79
			@Override
80
			public String apply(final Attribute a) {
81
				return a.getStringValue();
82
			}
83
		}));
84
	}
85

    
86
	protected String getIndexEndpoint() throws ISLookUpException {
87
		return this.serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(
88
				"for $x in collection('/db/DRIVER/ServiceResources/IndexServiceResourceType') return $x//PROTOCOL[./@name='solr']/@address/string()");
89
	}
90

    
91
	protected String getPublicIndexCollection() throws ISLookUpException {
92
		return this.serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(
93
				"for $x in collection('/db/DRIVER/ServiceResources/SearchServiceResourceType') return $x[.//PROPERTY[@key='infrastructure']/@value='public']//PROPERTY[@key='mdformat']/@value/string()");
94
	}
95

    
96
	public UniqueServiceLocator getServiceLocator() {
97
		return serviceLocator;
98
	}
99

    
100
	public void setServiceLocator(final UniqueServiceLocator serviceLocator) {
101
		this.serviceLocator = serviceLocator;
102
	}
103

    
104
}
(3-3/3)