Revision 56104
Added by Claudio Atzori almost 5 years ago
modules/dnet-deduplication/trunk/src/test/java/eu/dnetlib/msro/workflows/dedup/conf/DedupConfigurationOrchestrationTest.java | ||
---|---|---|
4 | 4 |
import static org.junit.Assert.assertTrue; |
5 | 5 |
|
6 | 6 |
import java.io.IOException; |
7 |
import java.nio.charset.Charset; |
|
7 | 8 |
import java.util.Queue; |
8 | 9 |
|
10 |
import org.apache.commons.io.IOUtils; |
|
9 | 11 |
import org.junit.Before; |
10 | 12 |
import org.junit.Test; |
11 | 13 |
|
... | ... | |
42 | 44 |
assertNotNull(anotherDco); |
43 | 45 |
assertTrue(json.equals(anotherDco.toString())); |
44 | 46 |
} |
47 |
|
|
48 |
@Test |
|
49 |
public void testSerializationOrgs() throws IOException { |
|
50 |
|
|
51 |
final Entity e = new Entity("organization", "20", "Organization"); |
|
52 |
|
|
53 |
final String actionSetId = "001"; |
|
54 |
final Queue<DedupConfig> configurations = Lists.newLinkedList(); |
|
55 |
|
|
56 |
configurations.add(DedupConfig.load(IOUtils.toString(getClass().getResourceAsStream("organisation.conf.json"), Charset.forName("UTF-8")))); |
|
57 |
dco = new DedupConfigurationOrchestration(e, actionSetId, configurations); |
|
58 |
|
|
59 |
System.out.println(dco.toString()); |
|
60 |
|
|
61 |
} |
|
45 | 62 |
} |
modules/dnet-deduplication/trunk/src/test/resources/eu/dnetlib/msro/workflows/dedup/conf/organisation.conf.json | ||
---|---|---|
1 |
{ |
|
2 |
"wf" : { |
|
3 |
"threshold" : "0.9", |
|
4 |
"dedupRun" : "001", |
|
5 |
"entityType" : "organization", |
|
6 |
"orderField" : "legalname", |
|
7 |
"queueMaxSize" : "2000", |
|
8 |
"groupMaxSize" : "50", |
|
9 |
"slidingWindowSize" : "200", |
|
10 |
"rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy" ], |
|
11 |
"includeChildren" : "true" |
|
12 |
}, |
|
13 |
"pace" : { |
|
14 |
"clustering" : [ |
|
15 |
{ "name" : "sortedngrampairs", "fields" : [ "legalname" ], "params" : { "max" : 2, "ngramLen" : "3"} }, |
|
16 |
{ "name" : "suffixprefix", "fields" : [ "legalname" ], "params" : { "max" : 1, "len" : "3" } }, |
|
17 |
{ "name" : "urlclustering", "fields" : [ "websiteurl" ], "params" : { } } |
|
18 |
], |
|
19 |
"strictConditions" : [ |
|
20 |
{ "name" : "exactMatch", "fields" : [ "gridid" ] } |
|
21 |
], |
|
22 |
"conditions" : [ |
|
23 |
{ "name" : "exactMatch", "fields" : [ "country" ] }, |
|
24 |
{ "name" : "DomainExactMatch", "fields" : [ "websiteurl" ] } |
|
25 |
], |
|
26 |
"model" : [ |
|
27 |
{ "name" : "country", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "false", "path" : "organization/metadata/country/classid" }, |
|
28 |
{ "name" : "legalshortname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.1", "ignoreMissing" : "false", "path" : "organization/metadata/legalshortname/value" }, |
|
29 |
{ "name" : "legalname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.9", "ignoreMissing" : "false", "path" : "organization/metadata/legalname/value", "params" : {"windowSize" : 4, "threshold" : 0.5} }, |
|
30 |
{ "name" : "websiteurl", "algo" : "Null", "type" : "URL", "weight" : "0", "ignoreMissing" : "true", "path" : "organization/metadata/websiteurl/value", "params" : { "host" : 0.5, "path" : 0.5 } }, |
|
31 |
{ "name" : "gridid", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid[qualifier#classid = {grid}]/value" } |
|
32 |
], |
|
33 |
"blacklists" : { } |
|
34 |
} |
|
35 |
} |
modules/dnet-deduplication/trunk/src/main/java/eu/dnetlib/msro/workflows/dedup/conf/DedupConfigurationOrchestration.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.msro.workflows.dedup.conf; |
2 | 2 |
|
3 |
import java.io.IOException; |
|
3 | 4 |
import java.util.Queue; |
4 | 5 |
|
5 | 6 |
import com.google.gson.Gson; |
6 | 7 |
import com.google.gson.GsonBuilder; |
7 | 8 |
|
8 | 9 |
import eu.dnetlib.pace.config.DedupConfig; |
10 |
import eu.dnetlib.pace.util.PaceException; |
|
11 |
import org.codehaus.jackson.map.ObjectMapper; |
|
9 | 12 |
|
10 | 13 |
/** |
11 | 14 |
* The Class DedupConfigurationOrchestration. |
... | ... | |
87 | 90 |
* @return the dedup configuration orchestration |
88 | 91 |
*/ |
89 | 92 |
public static DedupConfigurationOrchestration fromJSON(final String json) { |
90 |
return new Gson().fromJson(json, DedupConfigurationOrchestration.class); |
|
93 |
try { |
|
94 |
return new ObjectMapper().readValue(json, DedupConfigurationOrchestration.class); |
|
95 |
} catch (IOException e) { |
|
96 |
throw new PaceException("unable to deserialise configuration", e); |
|
97 |
} |
|
91 | 98 |
} |
92 | 99 |
|
93 | 100 |
/* |
... | ... | |
97 | 104 |
*/ |
98 | 105 |
@Override |
99 | 106 |
public String toString() { |
100 |
return new GsonBuilder().setPrettyPrinting().create().toJson(this); |
|
107 |
try { |
|
108 |
return new ObjectMapper().writeValueAsString(this); |
|
109 |
} catch (IOException e) { |
|
110 |
throw new PaceException("unable to serialise configuration", e); |
|
111 |
} |
|
101 | 112 |
} |
102 | 113 |
|
103 | 114 |
} |
Also available in: Unified diff
using jackson to serialise dedup configurations