Revision 28089
Added by Claudio Atzori over 10 years ago
modules/dnet-pace-core/branches/1.1.0/src/test/java/eu/dnetlib/pace/util/DedupConfigTest.java | ||
---|---|---|
23 | 23 |
"entity.type = organization, " + |
24 | 24 |
"order.field = legalname, " + |
25 | 25 |
"ngram.fields = [legalname], " + |
26 |
"rootbuilder = [organization,projectOrganization,resultOrganization], " +
|
|
26 |
"rootbuilder = [organization,projectOrganization_participation_isParticipant,datasourceOrganization_provision_isProvidedBy], " +
|
|
27 | 27 |
"skiplist = [od_______908,od________18] }"; |
28 | 28 |
|
29 | 29 |
final DedupConfig cfg = DedupConfigLoader.load(s); |
... | ... | |
33 | 33 |
assertTrue(cfg.getDedupRun().equals("001")); |
34 | 34 |
assertTrue(cfg.getEntityType().equals(Type.organization)); |
35 | 35 |
assertTrue(cfg.getOrderField().equals("legalname")); |
36 |
assertTrue(Lists.newArrayList("organization", "projectOrganization", "resultOrganization").equals(cfg.getRootBuilderFamilies()));
|
|
36 |
assertTrue(Lists.newArrayList("organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy").equals(cfg.getRootBuilderFamilies()));
|
|
37 | 37 |
assertTrue(Sets.newHashSet("od_______908", "od________18").equals(cfg.getSkipList())); |
38 | 38 |
} |
39 | 39 |
|
... | ... | |
46 | 46 |
"entity.type = organization, " + |
47 | 47 |
"order.field = legalname, " + |
48 | 48 |
"ngram.fields = [legalname], " + |
49 |
"rootbuilder = [organization,projectOrganization,resultOrganization] }";
|
|
49 |
"rootbuilder = [organization,projectOrganization_participation_isParticipant,datasourceOrganization_provision_isProvidedBy] }";
|
|
50 | 50 |
|
51 | 51 |
final DedupConfig cfg = DedupConfigLoader.load(s); |
52 | 52 |
|
... | ... | |
54 | 54 |
assertTrue(cfg.getThreshold() == 0.99); |
55 | 55 |
assertTrue(cfg.getEntityType().equals(Type.organization)); |
56 | 56 |
assertTrue(cfg.getOrderField().equals("legalname")); |
57 |
assertTrue(Lists.newArrayList("organization", "projectOrganization", "resultOrganization").equals(cfg.getRootBuilderFamilies()));
|
|
57 |
assertTrue(Lists.newArrayList("organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy").equals(cfg.getRootBuilderFamilies()));
|
|
58 | 58 |
assertTrue(new HashSet<String>().equals(cfg.getSkipList())); |
59 | 59 |
} |
60 | 60 |
|
modules/dnet-pace-core/branches/1.1.0/src/main/java/eu/dnetlib/pace/util/DedupConfig.java | ||
---|---|---|
3 | 3 |
import java.util.List; |
4 | 4 |
import java.util.Set; |
5 | 5 |
|
6 |
import com.google.gson.Gson; |
|
6 |
import com.google.gson.GsonBuilder;
|
|
7 | 7 |
|
8 | 8 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
9 | 9 |
|
... | ... | |
12 | 12 |
/** |
13 | 13 |
* Entity type. |
14 | 14 |
*/ |
15 |
private Type entityType; |
|
15 |
private final Type entityType;
|
|
16 | 16 |
|
17 | 17 |
/** |
18 | 18 |
* Field name used to sort the values in the reducer phase. |
19 | 19 |
*/ |
20 |
private String orderField; |
|
20 |
private final String orderField;
|
|
21 | 21 |
|
22 | 22 |
/** |
23 | 23 |
* Column Families involved in the relations redirection. |
24 | 24 |
*/ |
25 |
private List<String> rootBuilderFamilies; |
|
26 |
|
|
25 |
private final List<String> rootBuilderFamilies;
|
|
26 |
|
|
27 | 27 |
/** |
28 | 28 |
* Set of datasource namespace prefixes that won't be deduplicated. |
29 | 29 |
*/ |
30 |
private Set<String> skipList; |
|
31 |
|
|
30 |
private final Set<String> skipList;
|
|
31 |
|
|
32 | 32 |
/** |
33 | 33 |
* Subprefix used to build the root id, allows multiple dedup runs. |
34 | 34 |
*/ |
35 |
private String dedupRun; |
|
36 |
|
|
35 |
private final String dedupRun;
|
|
36 |
|
|
37 | 37 |
/** |
38 | 38 |
* Similarity threshold. |
39 | 39 |
*/ |
40 |
private double threshold; |
|
40 |
private final double threshold;
|
|
41 | 41 |
|
42 | 42 |
public DedupConfig(Type entityType, String orderField, List<String> rootBuilderFamilies, String dedupRun, double threshold, Set<String> skipList) { |
43 | 43 |
super(); |
... | ... | |
72 | 72 |
public List<String> getRootBuilderFamilies() { |
73 | 73 |
return rootBuilderFamilies; |
74 | 74 |
} |
75 |
|
|
75 |
|
|
76 | 76 |
public String getDedupRun() { |
77 | 77 |
return dedupRun; |
78 | 78 |
} |
... | ... | |
84 | 84 |
public Set<String> getSkipList() { |
85 | 85 |
return skipList; |
86 | 86 |
} |
87 |
|
|
87 |
|
|
88 | 88 |
@Override |
89 | 89 |
public String toString() { |
90 |
return new Gson().toJson(this); |
|
90 |
return new GsonBuilder().setPrettyPrinting().create().toJson(this);
|
|
91 | 91 |
} |
92 | 92 |
|
93 | 93 |
} |
Also available in: Unified diff
prettyprint DedupConfig, updated tests