Revision 58672
Added by Alessia Bardi almost 4 years ago
modules/dnet-openaire-blacklist/trunk/src/test/java/eu/dnetlib/openaire/blacklist/OpenaireIdResolverTest.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.openaire.blacklist; |
2 | 2 |
|
3 |
import java.util.List;
|
|
3 |
import java.util.Set;
|
|
4 | 4 |
|
5 | 5 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
6 | 6 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
... | ... | |
44 | 44 |
@Test |
45 | 45 |
public void testFindOriginalIds() throws Exception { |
46 | 46 |
log.debug("Testing for " + id); |
47 |
List<String> originals = resolver.findOriginalIds(id);
|
|
47 |
Set<String> originals = resolver.findOriginalIds(id);
|
|
48 | 48 |
Assert.assertTrue(originals.contains("od_______165::deb6d865011c09f228af41a30e0ff1a4")); |
49 | 49 |
Assert.assertTrue(originals.contains("od_______165::00066fdc65f1e21677c69d976a8e852d")); |
50 | 50 |
Assert.assertTrue(originals.size() == 2); |
modules/dnet-openaire-blacklist/trunk/src/main/java/eu/dnetlib/openaire/blacklist/OpenaireIdResolver.java | ||
---|---|---|
2 | 2 |
|
3 | 3 |
import java.io.StringReader; |
4 | 4 |
import java.util.List; |
5 |
import java.util.Set; |
|
5 | 6 |
|
6 | 7 |
import com.google.common.base.Function; |
7 | 8 |
import com.google.common.collect.Iterables; |
8 | 9 |
import com.google.common.collect.Lists; |
10 |
import com.google.common.collect.Sets; |
|
9 | 11 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; |
10 | 12 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
11 | 13 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
... | ... | |
40 | 42 |
@Autowired |
41 | 43 |
private SolrIndexClientFactory indexClientFactory; |
42 | 44 |
|
43 |
public List<String> resolveIdentifier(final String id) {
|
|
44 |
if (StringUtils.isBlank(id)) return Lists.newArrayList();
|
|
45 |
public Set<String> resolveIdentifier(final String id) {
|
|
46 |
if (StringUtils.isBlank(id)) return Sets.newHashSet();
|
|
45 | 47 |
else { |
46 | 48 |
return findOriginalIds(id); |
47 | 49 |
} |
48 | 50 |
} |
49 | 51 |
|
50 |
protected List<String> findOriginalIds(final String id) {
|
|
52 |
protected Set<String> findOriginalIds(final String id) {
|
|
51 | 53 |
try(final SolrIndexClient client = (SolrIndexClient) indexClientFactory.getClient(getPublicIndexCollection()+SOLR_COLLECTION_POSTFIX)) { |
52 | 54 |
|
53 | 55 |
final String query = String.format("objidentifier:\"%s\"", id); |
... | ... | |
55 | 57 |
final SolrDocumentList results = response.getResults(); |
56 | 58 |
if (results.isEmpty()) { |
57 | 59 |
log.debug("Query " + query + " returned 0 documents"); |
58 |
return Lists.newArrayList();
|
|
60 |
return Sets.newHashSet();
|
|
59 | 61 |
} |
60 | 62 |
// my results contain the document with the given identifier |
61 | 63 |
final SolrDocument solrDoc = results.get(0); |
... | ... | |
67 | 69 |
} |
68 | 70 |
|
69 | 71 |
@SuppressWarnings("unchecked") |
70 |
protected List<String> extractMergedIdentifiers(final SolrDocument doc) throws DocumentException {
|
|
72 |
protected Set<String> extractMergedIdentifiers(final SolrDocument doc) throws DocumentException {
|
|
71 | 73 |
final String xmlRecord = (String) doc.getFirstValue(RESULT_FIELD); |
72 | 74 |
final Document xmlDoc = this.saxReader.read(new StringReader(xmlRecord)); |
73 |
return Lists.newArrayList(Iterables.transform(xmlDoc.selectNodes(XPATH_TO_MERGED), (Function<Attribute, String>) a -> a.getStringValue()));
|
|
75 |
return Sets.newHashSet(Iterables.transform(xmlDoc.selectNodes(XPATH_TO_MERGED), (Function<Attribute, String>) a -> a.getStringValue()));
|
|
74 | 76 |
} |
75 | 77 |
|
76 | 78 |
protected String getIndexEndpoint() throws ISLookUpException { |
modules/dnet-openaire-blacklist/trunk/src/main/java/eu/dnetlib/openaire/blacklist/BlacklistManager.java | ||
---|---|---|
7 | 7 |
|
8 | 8 |
import javax.xml.ws.wsaddressing.W3CEndpointReference; |
9 | 9 |
|
10 |
import com.google.common.collect.Sets; |
|
10 | 11 |
import org.apache.commons.lang.StringUtils; |
11 | 12 |
import org.apache.commons.logging.Log; |
12 | 13 |
import org.apache.commons.logging.LogFactory; |
... | ... | |
40 | 41 |
@Autowired |
41 | 42 |
private OpenaireIdResolver openaireIdResolver; |
42 | 43 |
|
43 |
private List<String> getOriginalIds(final String id, final String entityType) {
|
|
44 |
List<String> originalIds = Lists.newArrayList();
|
|
44 |
private Set<String> getOriginalIds(final String id, final String entityType) {
|
|
45 |
Set<String> originalIds = Sets.newHashSet();
|
|
45 | 46 |
// We need to find original ids only for entities that are deduplicated: result, organization, person. |
46 | 47 |
if (entityType.equals("result") || entityType.equals("organization") || entityType.equals("person")) { |
47 | 48 |
originalIds = openaireIdResolver.resolveIdentifier(id); |
... | ... | |
50 | 51 |
} |
51 | 52 |
|
52 | 53 |
public void addToBlacklist(final BlacklistEntry entry) throws DatabaseException { |
53 |
final List<String> sourceIds = getOriginalIds(entry.getSourceObject(), entry.getSourceType());
|
|
54 |
final List<String> targetIds = getOriginalIds(entry.getTargetObject(), entry.getTargetType());
|
|
55 |
entry.setOriginalSourceObjects(sourceIds);
|
|
56 |
entry.setOriginalTargetObjects(targetIds);
|
|
54 |
final Set<String> sourceIds = getOriginalIds(entry.getSourceObject(), entry.getSourceType());
|
|
55 |
final Set<String> targetIds = getOriginalIds(entry.getTargetObject(), entry.getTargetType());
|
|
56 |
sourceIds.add(entry.getSourceObject());
|
|
57 |
targetIds.add(entry.getTargetObject());
|
|
57 | 58 |
|
58 | 59 |
final DatabaseService dbService = serviceLocator.getService(DatabaseService.class); |
59 | 60 |
final String addQuery = String.format( |
... | ... | |
71 | 72 |
asSqlParam(entry.getTargetObject()), |
72 | 73 |
asSqlParam(entry.getTargetType()), |
73 | 74 |
asSqlParam(entry.getTicketId()), |
74 |
asSqlParam(joinCollectionForSQL(sourceIds.isEmpty() ? Lists.newArrayList(entry.getSourceObject()) : sourceIds)),
|
|
75 |
asSqlParam(joinCollectionForSQL(targetIds.isEmpty() ? Lists.newArrayList(entry.getTargetObject()) : targetIds)),
|
|
75 |
asSqlParam(joinCollectionForSQL(sourceIds)), |
|
76 |
asSqlParam(joinCollectionForSQL(targetIds)), |
|
76 | 77 |
asSqlParam(entry.getNote())); |
77 | 78 |
log.debug("Adding new blacklist entry"); |
78 | 79 |
this.safeUpdateSql(dbService, blacklistDatabaseName, addQuery); |
modules/dnet-openaire-blacklist/trunk/src/main/java/eu/dnetlib/openaire/blacklist/BlacklistEntry.java | ||
---|---|---|
57 | 57 |
**/ |
58 | 58 |
private String sourceType, targetType; |
59 | 59 |
/** |
60 |
* If the source or the target are representatives, then the following strings are not empty and contain the json strings with the ids of the merged objects
|
|
60 |
* Lists of identifiers that merge or are merged in the sourceObject and targetObject, respectively.
|
|
61 | 61 |
**/ |
62 | 62 |
private List<String> originalSourceObjects = Lists.newArrayList(), originalTargetObjects = Lists.newArrayList(); |
63 | 63 |
|
Also available in: Unified diff
ensure we have the proper identifiers in the list of original source and target objects. They may, as it used to be, contain also representative identifiers. Asking for them should be enough to completely blacklist a relationship