Revision 50543
Added by Claudio Atzori about 6 years ago
OpenaireIdResolver.java | ||
---|---|---|
3 | 3 |
import java.io.StringReader; |
4 | 4 |
import java.util.List; |
5 | 5 |
|
6 |
import com.google.common.base.Function; |
|
7 |
import com.google.common.collect.Iterables; |
|
8 |
import com.google.common.collect.Lists; |
|
9 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; |
|
10 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
|
11 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
|
12 |
import eu.dnetlib.functionality.index.client.solr.SolrIndexClient; |
|
13 |
import eu.dnetlib.functionality.index.client.solr.SolrIndexClientFactory; |
|
6 | 14 |
import org.apache.commons.lang.StringUtils; |
7 | 15 |
import org.apache.commons.logging.Log; |
8 | 16 |
import org.apache.commons.logging.LogFactory; |
9 |
import org.apache.solr.client.solrj.SolrQuery; |
|
10 |
import org.apache.solr.client.solrj.impl.CloudSolrClient; |
|
11 | 17 |
import org.apache.solr.client.solrj.response.QueryResponse; |
12 | 18 |
import org.apache.solr.common.SolrDocument; |
13 | 19 |
import org.apache.solr.common.SolrDocumentList; |
... | ... | |
17 | 23 |
import org.dom4j.io.SAXReader; |
18 | 24 |
import org.springframework.beans.factory.annotation.Autowired; |
19 | 25 |
|
20 |
import com.google.common.base.Function; |
|
21 |
import com.google.common.collect.Iterables; |
|
22 |
import com.google.common.collect.Lists; |
|
23 |
|
|
24 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; |
|
25 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
|
26 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
|
27 |
|
|
28 | 26 |
/** |
29 | 27 |
* The goal of this task is to return the original identifiers of objects merged in a representative object by deduplication. Created by |
30 | 28 |
* alessia on 09/02/16. |
31 | 29 |
*/ |
32 | 30 |
public class OpenaireIdResolver { |
33 | 31 |
|
34 |
public final static String SOLR_COLLECTION_POSTFIX = "-index-openaire"; |
|
35 | 32 |
private static final Log log = LogFactory.getLog(OpenaireIdResolver.class); |
36 | 33 |
private final static String RESULT_FIELD = "__result"; |
37 | 34 |
private final static String XPATH_TO_MERGED = "//*[local-name()='entity']/*//children/result/@objidentifier"; |
38 | 35 |
private final SAXReader saxReader = new SAXReader(); |
39 | 36 |
@Autowired |
40 | 37 |
private UniqueServiceLocator serviceLocator; |
38 |
@Autowired |
|
39 |
private SolrIndexClientFactory indexClientFactory; |
|
41 | 40 |
|
42 | 41 |
public List<String> resolveIdentifier(final String id) { |
43 | 42 |
if (StringUtils.isBlank(id)) return Lists.newArrayList(); |
... | ... | |
47 | 46 |
} |
48 | 47 |
|
49 | 48 |
protected List<String> findOriginalIds(final String id) { |
49 |
try(final SolrIndexClient client = (SolrIndexClient) indexClientFactory.getClient(getPublicIndexCollection())) { |
|
50 | 50 |
|
51 |
final SolrQuery q = new SolrQuery("objidentifier:\"" + id + "\""); |
|
52 |
|
|
53 |
try (final CloudSolrClient solrCore = new CloudSolrClient.Builder().withZkHost(getIndexEndpoint()).build()) { |
|
54 |
solrCore.setDefaultCollection(getPublicIndexCollection() + SOLR_COLLECTION_POSTFIX); |
|
55 |
final QueryResponse response = solrCore.query(q); |
|
51 |
final String query = String.format("objidentifier:\"%s\"", id); |
|
52 |
final QueryResponse response = client.query(query, 1); |
|
56 | 53 |
final SolrDocumentList results = response.getResults(); |
57 | 54 |
if (results.isEmpty()) { |
58 |
log.debug("Query " + q + " returned 0 documents"); |
|
55 |
log.debug("Query " + query + " returned 0 documents");
|
|
59 | 56 |
return Lists.newArrayList(); |
60 | 57 |
} |
61 | 58 |
// my results contain the document with the given identifier |
... | ... | |
71 | 68 |
protected List<String> extractMergedIdentifiers(final SolrDocument doc) throws DocumentException { |
72 | 69 |
final String xmlRecord = (String) doc.getFirstValue(RESULT_FIELD); |
73 | 70 |
final Document xmlDoc = this.saxReader.read(new StringReader(xmlRecord)); |
74 |
return Lists.newArrayList(Iterables.transform(xmlDoc.selectNodes(XPATH_TO_MERGED), new Function<Attribute, String>() { |
|
75 |
|
|
76 |
@Override |
|
77 |
public String apply(final Attribute a) { |
|
78 |
return a.getStringValue(); |
|
79 |
} |
|
80 |
})); |
|
71 |
return Lists.newArrayList(Iterables.transform(xmlDoc.selectNodes(XPATH_TO_MERGED), (Function<Attribute, String>) a -> a.getStringValue())); |
|
81 | 72 |
} |
82 | 73 |
|
83 | 74 |
protected String getIndexEndpoint() throws ISLookUpException { |
Also available in: Unified diff
consolidated index modules and updated to solr 7.2.0