Revision 55840
Added by Claudio Atzori almost 5 years ago
OpenaireIdResolver.java | ||
---|---|---|
3 | 3 |
import java.io.StringReader; |
4 | 4 |
import java.util.List; |
5 | 5 |
|
6 |
import com.google.common.base.Function; |
|
7 |
import com.google.common.collect.Iterables; |
|
8 |
import com.google.common.collect.Lists; |
|
9 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; |
|
10 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
|
11 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
|
12 |
|
|
13 |
import eu.dnetlib.functionality.index.client.solr.SolrIndexClient; |
|
14 |
import eu.dnetlib.functionality.index.client.solr.SolrIndexClientFactory; |
|
6 | 15 |
import org.apache.commons.lang.StringUtils; |
7 | 16 |
import org.apache.commons.logging.Log; |
8 | 17 |
import org.apache.commons.logging.LogFactory; |
9 |
import org.apache.solr.client.solrj.SolrQuery; |
|
10 |
import org.apache.solr.client.solrj.impl.CloudSolrServer; |
|
11 | 18 |
import org.apache.solr.client.solrj.response.QueryResponse; |
12 | 19 |
import org.apache.solr.common.SolrDocument; |
13 | 20 |
import org.apache.solr.common.SolrDocumentList; |
... | ... | |
17 | 24 |
import org.dom4j.io.SAXReader; |
18 | 25 |
import org.springframework.beans.factory.annotation.Autowired; |
19 | 26 |
|
20 |
import com.google.common.base.Function; |
|
21 |
import com.google.common.collect.Iterables; |
|
22 |
import com.google.common.collect.Lists; |
|
23 |
|
|
24 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; |
|
25 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
|
26 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
|
27 |
|
|
28 | 27 |
/** |
29 | 28 |
* The goal of this task is to return the original identifiers of objects merged in a representative object by deduplication. Created by |
30 | 29 |
* alessia on 09/02/16. |
31 | 30 |
*/ |
32 | 31 |
public class OpenaireIdResolver { |
33 | 32 |
|
34 |
public final static String SOLR_COLLECTION_POSTFIX = "-index-openaire"; |
|
35 | 33 |
private static final Log log = LogFactory.getLog(OpenaireIdResolver.class); |
36 | 34 |
private final static String RESULT_FIELD = "__result"; |
37 | 35 |
private final static String XPATH_TO_MERGED = "//*[local-name()='entity']/*//children/result/@objidentifier"; |
38 | 36 |
private final SAXReader saxReader = new SAXReader(); |
39 | 37 |
@Autowired |
40 | 38 |
private UniqueServiceLocator serviceLocator; |
39 |
@Autowired |
|
40 |
private SolrIndexClientFactory indexClientFactory; |
|
41 | 41 |
|
42 | 42 |
public List<String> resolveIdentifier(final String id) { |
43 | 43 |
if (StringUtils.isBlank(id)) return Lists.newArrayList(); |
... | ... | |
47 | 47 |
} |
48 | 48 |
|
49 | 49 |
protected List<String> findOriginalIds(final String id) { |
50 |
CloudSolrServer solrCore = null; |
|
51 |
final SolrQuery q = new SolrQuery("objidentifier:\"" + id + "\""); |
|
52 |
QueryResponse response = null; |
|
53 |
try { |
|
54 |
solrCore = new CloudSolrServer(getIndexEndpoint()); |
|
55 |
solrCore.setDefaultCollection(getPublicIndexCollection() + SOLR_COLLECTION_POSTFIX); |
|
56 |
response = solrCore.query(q); |
|
50 |
try(final SolrIndexClient client = (SolrIndexClient) indexClientFactory.getClient(getPublicIndexCollection())) { |
|
51 |
|
|
52 |
final String query = String.format("objidentifier:\"%s\"", id); |
|
53 |
final QueryResponse response = client.query(query, 1); |
|
57 | 54 |
final SolrDocumentList results = response.getResults(); |
58 | 55 |
if (results.isEmpty()) { |
59 |
log.debug("Query " + q + " returned 0 documents"); |
|
56 |
log.debug("Query " + query + " returned 0 documents");
|
|
60 | 57 |
return Lists.newArrayList(); |
61 | 58 |
} |
62 | 59 |
// my results contain the document with the given identifier |
... | ... | |
65 | 62 |
} catch (final Exception e) { |
66 | 63 |
log.error("Can't get original ids for " + id + "\n ", e); |
67 | 64 |
throw new RuntimeException("Can't get original ids for " + id + "\n " + e); |
68 |
} finally { |
|
69 |
solrCore.shutdown(); |
|
70 | 65 |
} |
71 | 66 |
} |
72 | 67 |
|
... | ... | |
74 | 69 |
protected List<String> extractMergedIdentifiers(final SolrDocument doc) throws DocumentException { |
75 | 70 |
final String xmlRecord = (String) doc.getFirstValue(RESULT_FIELD); |
76 | 71 |
final Document xmlDoc = this.saxReader.read(new StringReader(xmlRecord)); |
77 |
return Lists.newArrayList(Iterables.transform(xmlDoc.selectNodes(XPATH_TO_MERGED), new Function<Attribute, String>() { |
|
78 |
|
|
79 |
@Override |
|
80 |
public String apply(final Attribute a) { |
|
81 |
return a.getStringValue(); |
|
82 |
} |
|
83 |
})); |
|
72 |
return Lists.newArrayList(Iterables.transform(xmlDoc.selectNodes(XPATH_TO_MERGED), (Function<Attribute, String>) a -> a.getStringValue())); |
|
84 | 73 |
} |
85 | 74 |
|
86 | 75 |
protected String getIndexEndpoint() throws ISLookUpException { |
Also available in: Unified diff
adjusted dependencies