Project

General

Profile

1
package eu.dnetlib.openaire.blacklist;
2

    
3
import java.io.StringReader;
4
import java.util.List;
5
import java.util.Set;
6

    
7
import com.google.common.base.Function;
8
import com.google.common.collect.Iterables;
9
import com.google.common.collect.Lists;
10
import com.google.common.collect.Sets;
11
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
12
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
13
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
14

    
15
import eu.dnetlib.functionality.index.client.solr.SolrIndexClient;
16
import eu.dnetlib.functionality.index.client.solr.SolrIndexClientFactory;
17
import org.apache.commons.lang.StringUtils;
18
import org.apache.commons.logging.Log;
19
import org.apache.commons.logging.LogFactory;
20
import org.apache.solr.client.solrj.response.QueryResponse;
21
import org.apache.solr.common.SolrDocument;
22
import org.apache.solr.common.SolrDocumentList;
23
import org.dom4j.Attribute;
24
import org.dom4j.Document;
25
import org.dom4j.DocumentException;
26
import org.dom4j.io.SAXReader;
27
import org.springframework.beans.factory.annotation.Autowired;
28

    
29
/**
30
 * The goal of this task is to return the original identifiers of objects merged in a representative object by deduplication. Created by
31
 * alessia on 09/02/16.
32
 */
33
public class OpenaireIdResolver {
34

    
35
	private static final Log log = LogFactory.getLog(OpenaireIdResolver.class);
36
	public final static String SOLR_COLLECTION_POSTFIX = "-index-openaire";
37
	private final static String RESULT_FIELD = "__result";
38
	private final static String XPATH_TO_MERGED = "//*[local-name()='entity']/*//children/result/@objidentifier";
39
	private final SAXReader saxReader = new SAXReader();
40
	@Autowired
41
	private UniqueServiceLocator serviceLocator;
42
	@Autowired
43
	private SolrIndexClientFactory indexClientFactory;
44

    
45
	public Set<String> resolveIdentifier(final String id) {
46
		if (StringUtils.isBlank(id)) return  Sets.newHashSet();
47
		else {
48
			return findOriginalIds(id);
49
		}
50
	}
51

    
52
	protected Set<String> findOriginalIds(final String id) {
53
		try(final SolrIndexClient client = (SolrIndexClient) indexClientFactory.getClient(getPublicIndexCollection()+SOLR_COLLECTION_POSTFIX)) {
54

    
55
			final String query = String.format("objidentifier:\"%s\"", id);
56
			final QueryResponse response = client.query(query, 1);
57
			final SolrDocumentList results = response.getResults();
58
			if (results.isEmpty()) {
59
				log.debug("Query " + query + " returned 0 documents");
60
				return Sets.newHashSet();
61
			}
62
			// my results contain the document with the given identifier
63
			final SolrDocument solrDoc = results.get(0);
64
			return extractMergedIdentifiers(solrDoc);
65
		} catch (final Exception e) {
66
			log.error("Can't get original ids for " + id + "\n ", e);
67
			throw new RuntimeException("Can't get original ids for " + id + "\n " + e);
68
		}
69
	}
70

    
71
	@SuppressWarnings("unchecked")
72
	protected Set<String> extractMergedIdentifiers(final SolrDocument doc) throws DocumentException {
73
		final String xmlRecord = (String) doc.getFirstValue(RESULT_FIELD);
74
		final Document xmlDoc = this.saxReader.read(new StringReader(xmlRecord));
75
		return Sets.newHashSet(Iterables.transform(xmlDoc.selectNodes(XPATH_TO_MERGED), (Function<Attribute, String>) a -> a.getStringValue()));
76
	}
77

    
78
	protected String getIndexEndpoint() throws ISLookUpException {
79
		return this.serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(
80
				"for $x in collection('/db/DRIVER/ServiceResources/IndexServiceResourceType') return $x//PROTOCOL[./@name='solr']/@address/string()");
81
	}
82

    
83
	protected String getPublicIndexCollection() throws ISLookUpException {
84
		return this.serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(
85
				"for $x in collection('/db/DRIVER/ServiceResources/SearchServiceResourceType') return $x[.//PROPERTY[@key='infrastructure']/@value='public']//PROPERTY[@key='mdformat']/@value/string()");
86
	}
87

    
88
	public UniqueServiceLocator getServiceLocator() {
89
		return serviceLocator;
90
	}
91

    
92
	public void setServiceLocator(final UniqueServiceLocator serviceLocator) {
93
		this.serviceLocator = serviceLocator;
94
	}
95

    
96
}
(3-3/3)