Revision 57657
Added by Michele De Bonis over 4 years ago
modules/dnet-deduplication/branches/tree-dedup/deploy.info | ||
---|---|---|
1 |
{ |
|
2 |
"type_source": "SVN", |
|
3 |
"goal": "package -U source:jar", |
|
4 |
"url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-deduplication/trunk", |
|
5 |
"deploy_repository": "dnet45-snapshots", |
|
6 |
"version": "4", |
|
7 |
"mail": "alessia.bardi@isti.cnr.it", |
|
8 |
"deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots", |
|
9 |
"name": "dnet-deduplication" |
|
10 |
} |
modules/dnet-deduplication/branches/tree-dedup/src/test/java/eu/dnetlib/msro/workflows/dedup/conf/DedupConfigurationOrchestrationTest.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workflows.dedup.conf; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertNotNull; |
|
4 |
import static org.junit.Assert.assertTrue; |
|
5 |
|
|
6 |
import java.io.IOException; |
|
7 |
import java.nio.charset.Charset; |
|
8 |
import java.util.Queue; |
|
9 |
|
|
10 |
import org.apache.commons.io.IOUtils; |
|
11 |
import org.junit.Before; |
|
12 |
import org.junit.Test; |
|
13 |
|
|
14 |
import com.google.common.collect.Lists; |
|
15 |
|
|
16 |
import eu.dnetlib.pace.config.DedupConfig; |
|
17 |
|
|
18 |
public class DedupConfigurationOrchestrationTest { |
|
19 |
|
|
20 |
public DedupConfigurationOrchestration dco; |
|
21 |
|
|
22 |
@Before |
|
23 |
public void setUp() throws IOException { |
|
24 |
|
|
25 |
final Entity e = new Entity("result", "50", "Publication"); |
|
26 |
|
|
27 |
final String actionSetId = "001"; |
|
28 |
final Queue<DedupConfig> configurations = Lists.newLinkedList(); |
|
29 |
|
|
30 |
configurations.add(DedupConfig.loadDefault()); |
|
31 |
|
|
32 |
dco = new DedupConfigurationOrchestration(e, actionSetId, configurations); |
|
33 |
assertNotNull(dco); |
|
34 |
assertNotNull(dco.getActionSetId()); |
|
35 |
assertNotNull(dco.getEntity()); |
|
36 |
assertNotNull(dco.getConfigurations()); |
|
37 |
} |
|
38 |
|
|
39 |
@Test |
|
40 |
public void testSerialization() { |
|
41 |
|
|
42 |
final String json = dco.toString(); |
|
43 |
final DedupConfigurationOrchestration anotherDco = DedupConfigurationOrchestration.fromJSON(json); |
|
44 |
assertNotNull(anotherDco); |
|
45 |
assertTrue(json.equals(anotherDco.toString())); |
|
46 |
} |
|
47 |
|
|
48 |
@Test |
|
49 |
public void testSerializationOrgs() throws IOException { |
|
50 |
|
|
51 |
final Entity e = new Entity("organization", "20", "Organization"); |
|
52 |
|
|
53 |
final String actionSetId = "001"; |
|
54 |
final Queue<DedupConfig> configurations = Lists.newLinkedList(); |
|
55 |
|
|
56 |
configurations.add(DedupConfig.load(IOUtils.toString(getClass().getResourceAsStream("organisation.conf.json"), Charset.forName("UTF-8")))); |
|
57 |
dco = new DedupConfigurationOrchestration(e, actionSetId, configurations); |
|
58 |
|
|
59 |
System.out.println(dco.toString()); |
|
60 |
|
|
61 |
} |
|
62 |
} |
modules/dnet-deduplication/branches/tree-dedup/src/test/java/eu/dnetlib/msro/workflows/dedup/SimilarityMeshBuilderTest.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workflows.dedup; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
|
|
5 |
import org.junit.Before; |
|
6 |
import org.junit.Test; |
|
7 |
|
|
8 |
import com.google.common.collect.Lists; |
|
9 |
|
|
10 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
11 |
import eu.dnetlib.msro.workflows.hadoop.utils.Similarity; |
|
12 |
import eu.dnetlib.msro.workflows.hadoop.utils.SimilarityMeshBuilder; |
|
13 |
|
|
14 |
public class SimilarityMeshBuilderTest { |
|
15 |
|
|
16 |
private List<String> list; |
|
17 |
|
|
18 |
@Before |
|
19 |
public void setUp() throws Exception { |
|
20 |
list = Lists.newArrayList(); |
|
21 |
for (int i = 0; i < 10; i++) { |
|
22 |
list.add(i + ""); |
|
23 |
} |
|
24 |
} |
|
25 |
|
|
26 |
@Test |
|
27 |
public void test() { |
|
28 |
final List<Similarity> combinations = SimilarityMeshBuilder.build(Type.result, list); |
|
29 |
|
|
30 |
System.out.println(combinations); |
|
31 |
System.out.println(combinations.size()); |
|
32 |
|
|
33 |
} |
|
34 |
|
|
35 |
} |
modules/dnet-deduplication/branches/tree-dedup/src/test/java/eu/dnetlib/msro/workflows/dedup/SerializationTest.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workflows.dedup; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
|
|
5 |
import com.google.common.collect.Iterables; |
|
6 |
import com.googlecode.protobuf.format.JsonFormat; |
|
7 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
8 |
import org.apache.commons.codec.binary.Base64; |
|
9 |
import org.apache.commons.io.IOUtils; |
|
10 |
import org.apache.commons.logging.Log; |
|
11 |
import org.apache.commons.logging.LogFactory; |
|
12 |
import org.junit.Test; |
|
13 |
|
|
14 |
/** |
|
15 |
* Created by claudio on 05/04/16. |
|
16 |
*/ |
|
17 |
public class SerializationTest { |
|
18 |
|
|
19 |
private static final Log log = LogFactory.getLog(SerializationTest.class); |
|
20 |
|
|
21 |
@Test |
|
22 |
public void test() throws IOException { |
|
23 |
|
|
24 |
final String data = Iterables.getFirst(IOUtils.readLines(getClass().getResourceAsStream("oaf_data.base64")), ""); |
|
25 |
|
|
26 |
final byte[] oafBytes = Base64.decodeBase64(data); |
|
27 |
|
|
28 |
Oaf oaf = Oaf.parseFrom(oafBytes); |
|
29 |
|
|
30 |
JsonFormat jsonFormat = new JsonFormat(); |
|
31 |
String asJson = jsonFormat.printToString(oaf); |
|
32 |
|
|
33 |
log.info(asJson); |
|
34 |
|
|
35 |
} |
|
36 |
|
|
37 |
} |
modules/dnet-deduplication/branches/tree-dedup/src/test/java/eu/dnetlib/data/dedup/DedupInspectorFunctionsTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.dedup; |
|
2 |
|
|
3 |
import com.google.common.collect.Lists; |
|
4 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
5 |
import com.google.common.base.Function; |
|
6 |
import org.apache.commons.logging.Log; |
|
7 |
import org.apache.commons.logging.LogFactory; |
|
8 |
import org.junit.Before; |
|
9 |
import org.junit.Test; |
|
10 |
|
|
11 |
import java.util.Map; |
|
12 |
|
|
13 |
public class DedupInspectorFunctionsTest { |
|
14 |
|
|
15 |
private static final Log log = LogFactory.getLog(DedupInspectorFunctionsTest.class); |
|
16 |
|
|
17 |
private String orgFromIndex = "<record>CAESigMIFCK7ARK4ARIlCiNJbnRlcm5hdGlvbmFsIFRlbm5pcyBGZWRlcmF0aW9uIEx0ZCoHCgVm\r\nYWxzZTIHCgVmYWxzZToHCgVmYWxzZUIHCgVmYWxzZUoHCgVmYWxzZVIHCgVmYWxzZVoHCgVmYWxz\r\nZWIHCgVmYWxzZWoHCgVmYWxzZXIHCgVmYWxzZYIBNAoCR0ISDlVuaXRlZCBLaW5nZG9tGg5kbmV0\r\nOmNvdW50cmllcyIOZG5ldDpjb3VudHJpZXNCMnJjdWtfX19fX19fXzo6ODYxOTIxQzMtNjcyMy00\r\nOUMwLUIyQ0UtRDcyODJBOUMxRTk2SkkKMTEwfG9wZW5haXJlX19fXzo6YWIyZDMzMTA3NDFlYTgw\r\nZDNiODcyNmY2NTE1MDI4NTgSFFJlc2VhcmNoIENvdW5jaWxzIFVLWgoyMDE3LTExLTA0YjEyMHxy\r\nY3VrX19fX19fX186OjVjYjFmZTZhYjg1NDcwMTBiMjJkOTAyN2U3MjUyMzVkagoyMDE5LTA1LTE5\r\nIqsBCAEQARoDMC45IiRkZWR1cC1zaW1pbGFyaXR5LW9yZ2FuaXphdGlvbi1zaW1wbGUqegoic3lz\r\naW1wb3J0OmNyb3Nzd2FsazplbnRpdHlyZWdpc3RyeRIic3lzaW1wb3J0OmNyb3Nzd2FsazplbnRp\r\ndHlyZWdpc3RyeRoXZG5ldDpwcm92ZW5hbmNlX2FjdGlvbnMiF2RuZXQ6cHJvdmVuYW5jZV9hY3Rp\r\nb25z\r\n\n</record>"; |
|
18 |
|
|
19 |
private DedupIndexDAO dao; |
|
20 |
|
|
21 |
@Before |
|
22 |
public void setUp() { |
|
23 |
dao = new DedupIndexDAO(); |
|
24 |
} |
|
25 |
|
|
26 |
@Test |
|
27 |
public void test_1() { |
|
28 |
|
|
29 |
Oaf oaf = dao.getXml2OafFunction().apply(orgFromIndex); |
|
30 |
System.out.println(oaf); |
|
31 |
|
|
32 |
|
|
33 |
Map<String, String> map = dao.getOaf2FieldMapFunction("organization", Lists.newArrayList("legalname", "legalshortname", "country", "websiteurl", "provenance")).apply(oaf); |
|
34 |
|
|
35 |
System.out.println(map); |
|
36 |
|
|
37 |
|
|
38 |
} |
|
39 |
|
|
40 |
} |
modules/dnet-deduplication/branches/tree-dedup/src/test/resources/eu/dnetlib/msro/workflows/dedup/oaf_data.base64 | ||
---|---|---|
1 |
CAES6D0IMjrDDRLZAwpICghBcnRpY2xlcxI8CgdrZXl3b3JkEgdrZXl3b3JkGhNkbmV0OnJlc3VsdF9zdWJqZWN0IhNkbmV0OnJlc3VsdF9zdWJqZWN0Ev0BCrYBQW5hbHlzaXMgYW5kIFJlcG9ydCBvbiBPcmlnaW5hbCBEb2N1bWVudGFyeSBFdmlkZW5jZSBDb25jZXJuaW5nIHRoZSBVc2Ugb2YgT3BpdW0gaW4gSW5kaWE6IFtGdXJuaXNoZWQgdG8gdGhlIOKAnEJyaXRpc2ggTWVkaWNhbCBKb3VybmFs4oCdIGJ5IHVwd2FyZHMgb2YgMTAwIEluZGlhbiBNZWRpY2FsIE9mZmljZXJzLl0SQgoKbWFpbiB0aXRsZRIKbWFpbiB0aXRsZRoTZG5ldDpkYXRhQ2l0ZV90aXRsZSITZG5ldDpkYXRhQ2l0ZV90aXRsZSIMCgoxODk0LTAyLTE3QkoKC3B1YmxpY2F0aW9uEgtwdWJsaWNhdGlvbhoWZG5ldDpyZXN1bHRfdHlwb2xvZ2llcyIWZG5ldDpyZXN1bHRfdHlwb2xvZ2llc2IuCgNlbmcSB0VuZ2xpc2gaDmRuZXQ6bGFuZ3VhZ2VzIg5kbmV0Omxhbmd1YWdlc8oBAgoAMvgBGjkKBE9QRU4SC09wZW4gQWNjZXNzGhFkbmV0OmFjY2Vzc19tb2RlcyIRZG5ldDphY2Nlc3NfbW9kZXMiRQoEMDAwMRIHQXJ0aWNsZRoZZG5ldDpwdWJsaWNhdGlvbl9yZXNvdXJjZSIZZG5ldDpwdWJsaWNhdGlvbl9yZXNvdXJjZSpKCjExMHxvcGVuZG9hcl9fX186OjhiNmRkN2RiOWFmNDllNjczMDZmZWI1OWE4YmRjNTJjEhVFdXJvcGUgUHViTWVkIENlbnRyYWxKKGh0dHA6Ly9ldXJvcGVwbWMub3JnL2FydGljbGVzL1BNQzI0MDM4NTgy+AEaOQoET1BFThILT3BlbiBBY2Nlc3MaEWRuZXQ6YWNjZXNzX21vZGVzIhFkbmV0OmFjY2Vzc19tb2RlcyJFCgQwMDAxEgdBcnRpY2xlGhlkbmV0OnB1YmxpY2F0aW9uX3Jlc291cmNlIhlkbmV0OnB1YmxpY2F0aW9uX3Jlc291cmNlKkoKMTEwfG9wZW5kb2FyX19fXzo6OGI2ZGQ3ZGI5YWY0OWU2NzMwNmZlYjU5YThiZGM1MmMSFUV1cm9wZSBQdWJNZWQgQ2VudHJhbEooaHR0cDovL2V1cm9wZXBtYy5vcmcvYXJ0aWNsZXMvUE1DMjQwMzU3NzL4ARo5CgRPUEVOEgtPcGVuIEFjY2VzcxoRZG5ldDphY2Nlc3NfbW9kZXMiEWRuZXQ6YWNjZXNzX21vZGVzIkUKBDAwMDESB0FydGljbGUaGWRuZXQ6cHVibGljYXRpb25fcmVzb3VyY2UiGWRuZXQ6cHVibGljYXRpb25fcmVzb3VyY2UqSgoxMTB8b3BlbmRvYXJfX19fOjo4YjZkZDdkYjlhZjQ5ZTY3MzA2ZmViNTlhOGJkYzUyYxIVRXVyb3BlIFB1Yk1lZCBDZW50cmFsSihodHRwOi8vZXVyb3BlcG1jLm9yZy9hcnRpY2xlcy9QTUMyNDAzMzQyMvgBGjkKBE9QRU4SC09wZW4gQWNjZXNzGhFkbmV0OmFjY2Vzc19tb2RlcyIRZG5ldDphY2Nlc3NfbW9kZXMiRQoEMDAwMRIHQXJ0aWNsZRoZZG5ldDpwdWJsaWNhdGlvbl9yZXNvdXJjZSIZZG5ldDpwdWJsaWNhdGlvbl9yZXNvdXJjZSpKCjExMHxvcGVuZG9hcl9fX186OjhiNmRkN2RiOWFmNDllNjczMDZmZWI1OWE4YmRjNTJjEhVFdXJvcGUgUHViTWVkIENlbnRyYWxKKGh0dHA6Ly9ldXJvcGVwbWMub3JnL2FydGljbGVzL1BNQzI0MDM3OTMy+AEaOQoET1BFThILT3BlbiBBY2Nlc3MaEWRuZXQ6YWNjZXNzX21vZGVzIhFkbmV0OmFjY2Vzc19tb2RlcyJFCgQwMDAxEgdBcnRpY2xlGhlkbmV0OnB1YmxpY2F0aW9uX3Jlc291cmNlIhlkbmV0OnB1YmxpY2F0aW9uX3Jlc291cmNlKkoKMTEwfG9wZW5kb2FyX19fXzo6OGI2ZGQ3ZGI5YWY0OWU2NzMwNmZlYjU5YThiZGM1MmMSFUV1cm9wZSBQdWJNZWQgQ2VudHJhbEooaHR0cDovL2V1cm9wZXBtYy5vcmcvYXJ0aWNsZXMvUE1DMjQwMzQ1MEIYb2FpOmV1cm9wZXBtYy5vcmc6OTkzNzkyQhhvYWk6ZXVyb3BlcG1jLm9yZzo5OTM4MzdCGG9haTpldXJvcGVwbWMub3JnOjk5MzYyOUIYb2FpOmV1cm9wZXBtYy5vcmc6OTkzNDYxQhhvYWk6ZXVyb3BlcG1jLm9yZzo5OTM1MzZKSgoxMTB8b3BlbmRvYXJfX19fOjo4YjZkZDdkYjlhZjQ5ZTY3MzA2ZmViNTlhOGJkYzUyYxIVRXVyb3BlIFB1Yk1lZCBDZW50cmFsUkYKGG9haTpldXJvcGVwbWMub3JnOjk5Mzc5MhIqCgNvYWkSA29haRoOZG5ldDpwaWRfdHlwZXMiDmRuZXQ6cGlkX3R5cGVzUjgKClBNQzI0MDMzNDISKgoDcG1jEgNwbWMaDmRuZXQ6cGlkX3R5cGVzIg5kbmV0OnBpZF90eXBlc1JGChhvYWk6ZXVyb3BlcG1jLm9yZzo5OTM4MzcSKgoDb2FpEgNvYWkaDmRuZXQ6cGlkX3R5cGVzIg5kbmV0OnBpZF90eXBlc1I4CgpQTUMyNDAzNzkzEioKA3BtYxIDcG1jGg5kbmV0OnBpZF90eXBlcyIOZG5ldDpwaWRfdHlwZXNSRgoYb2FpOmV1cm9wZXBtYy5vcmc6OTkzNjI5EioKA29haRIDb2FpGg5kbmV0OnBpZF90eXBlcyIOZG5ldDpwaWRfdHlwZXNSOAoKUE1DMjQwMzQ1MBIqCgNwbWMSA3BtYxoOZG5ldDpwaWRfdHlwZXMiDmRuZXQ6cGlkX3R5cGVzUkYKGG9haTpldXJvcGVwbWMub3JnOjk5MzQ2MRIqCgNvYWkSA29haRoOZG5ldDpwaWRfdHlwZXMiDmRuZXQ6cGlkX3R5cGVzUjgKClBNQzI0MDM1NzcSKgoDcG1jEgNwbWMaDmRuZXQ6cGlkX3R5cGVzIg5kbmV0OnBpZF90eXBlc1I4CgpQTUMyNDAzODU4EioKA3BtYxIDcG1jGg5kbmV0OnBpZF90eXBlcyIOZG5ldDpwaWRfdHlwZXNSRgoYb2FpOmV1cm9wZXBtYy5vcmc6OTkzNTM2EioKA29haRIDb2FpGg5kbmV0OnBpZF90eXBlcyIOZG5ldDpwaWRfdHlwZXNaGDIwMTUtMDItMDZUMTM6MjU6MzMuODU0WmIxNTB8ZGVkdXBfd2ZfMDAxOjo0MTA0YTc3NDBhMTZhMjIzOWUzODcxYTEzYzZlNWQxY4IBlAgIMjrXBRLZAwpICghBcnRpY2xlcxI8CgdrZXl3b3JkEgdrZXl3b3JkGhNkbmV0OnJlc3VsdF9zdWJqZWN0IhNkbmV0OnJlc3VsdF9zdWJqZWN0Ev0BCrYBQW5hbHlzaXMgYW5kIFJlcG9ydCBvbiBPcmlnaW5hbCBEb2N1bWVudGFyeSBFdmlkZW5jZSBDb25jZXJuaW5nIHRoZSBVc2Ugb2YgT3BpdW0gaW4gSW5kaWE6IFtGdXJuaXNoZWQgdG8gdGhlIOKAnEJyaXRpc2ggTWVkaWNhbCBKb3VybmFs4oCdIGJ5IHVwd2FyZHMgb2YgMTAwIEluZGlhbiBNZWRpY2FsIE9mZmljZXJzLl0SQgoKbWFpbiB0aXRsZRIKbWFpbiB0aXRsZRoTZG5ldDpkYXRhQ2l0ZV90aXRsZSITZG5ldDpkYXRhQ2l0ZV90aXRsZSIMCgoxODk0LTAyLTAzQkoKC3B1YmxpY2F0aW9uEgtwdWJsaWNhdGlvbhoWZG5ldDpyZXN1bHRfdHlwb2xvZ2llcyIWZG5ldDpyZXN1bHRfdHlwb2xvZ2llc2IuCgNlbmcSB0VuZ2xpc2gaDmRuZXQ6bGFuZ3VhZ2VzIg5kbmV0Omxhbmd1YWdlc8oBAgoAMvgBGjkKBE9QRU4SC09wZW4gQWNjZXNzGhFkbmV0OmFjY2Vzc19tb2RlcyIRZG5ldDphY2Nlc3NfbW9kZXMiRQoEMDAwMRIHQXJ0aWNsZRoZZG5ldDpwdWJsaWNhdGlvbl9yZXNvdXJjZSIZZG5ldDpwdWJsaWNhdGlvbl9yZXNvdXJjZSpKCjExMHxvcGVuZG9hcl9fX186OjhiNmRkN2RiOWFmNDllNjczMDZmZWI1OWE4YmRjNTJjEhVFdXJvcGUgUHViTWVkIENlbnRyYWxKKGh0dHA6Ly9ldXJvcGVwbWMub3JnL2FydGljbGVzL1BNQzI0MDM4NThCGG9haTpldXJvcGVwbWMub3JnOjk5MzgzN0pKCjExMHxvcGVuZG9hcl9fX186OjhiNmRkN2RiOWFmNDllNjczMDZmZWI1OWE4YmRjNTJjEhVFdXJvcGUgUHViTWVkIENlbnRyYWxSOAoKUE1DMjQwMzg1OBIqCgNwbWMSA3BtYxoOZG5ldDpwaWRfdHlwZXMiDmRuZXQ6cGlkX3R5cGVzUkYKGG9haTpldXJvcGVwbWMub3JnOjk5MzgzNxIqCgNvYWkSA29haRoOZG5ldDpwaWRfdHlwZXMiDmRuZXQ6cGlkX3R5cGVzWhgyMDE1LTAyLTA2VDEzOjI0OjQzLjM1NVpiMTUwfG9kX19fX19fXzkwODo6OTQxMTFjYzhhNTYwMTgzOGQ5MjQ1NTFmZWViMTI4OTSKAQCCAZQICDI61wUS2QMKSAoIQXJ0aWNsZXMSPAoHa2V5d29yZBIHa2V5d29yZBoTZG5ldDpyZXN1bHRfc3ViamVjdCITZG5ldDpyZXN1bHRfc3ViamVjdBL9AQq2AUFuYWx5c2lzIGFuZCBSZXBvcnQgb24gT3JpZ2luYWwgRG9jdW1lbnRhcnkgRXZpZGVuY2UgQ29uY2VybmluZyB0aGUgVXNlIG9mIE9waXVtIGluIEluZGlhOiBbRnVybmlzaGVkIHRvIHRoZSDigJxCcml0aXNoIE1lZGljYWwgSm91cm5hbOKAnSBieSB1cHdhcmRzIG9mIDEwMCBJbmRpYW4gTWVkaWNhbCBPZmZpY2Vycy5dEkIKCm1haW4gdGl0bGUSCm1haW4gdGl0bGUaE2RuZXQ6ZGF0YUNpdGVfdGl0bGUiE2RuZXQ6ZGF0YUNpdGVfdGl0bGUiDAoKMTg5NC0wMi0xMEJKCgtwdWJsaWNhdGlvbhILcHVibGljYXRpb24aFmRuZXQ6cmVzdWx0X3R5cG9sb2dpZXMiFmRuZXQ6cmVzdWx0X3R5cG9sb2dpZXNiLgoDZW5nEgdFbmdsaXNoGg5kbmV0Omxhbmd1YWdlcyIOZG5ldDpsYW5ndWFnZXPKAQIKADL4ARo5CgRPUEVOEgtPcGVuIEFjY2VzcxoRZG5ldDphY2Nlc3NfbW9kZXMiEWRuZXQ6YWNjZXNzX21vZGVzIkUKBDAwMDESB0FydGljbGUaGWRuZXQ6cHVibGljYXRpb25fcmVzb3VyY2UiGWRuZXQ6cHVibGljYXRpb25fcmVzb3VyY2UqSgoxMTB8b3BlbmRvYXJfX19fOjo4YjZkZDdkYjlhZjQ5ZTY3MzA2ZmViNTlhOGJkYzUyYxIVRXVyb3BlIFB1Yk1lZCBDZW50cmFsSihodHRwOi8vZXVyb3BlcG1jLm9yZy9hcnRpY2xlcy9QTUMyNDAzNTc3QhhvYWk6ZXVyb3BlcG1jLm9yZzo5OTM2MjlKSgoxMTB8b3BlbmRvYXJfX19fOjo4YjZkZDdkYjlhZjQ5ZTY3MzA2ZmViNTlhOGJkYzUyYxIVRXVyb3BlIFB1Yk1lZCBDZW50cmFsUjgKClBNQzI0MDM1NzcSKgoDcG1jEgNwbWMaDmRuZXQ6cGlkX3R5cGVzIg5kbmV0OnBpZF90eXBlc1JGChhvYWk6ZXVyb3BlcG1jLm9yZzo5OTM2MjkSKgoDb2FpEgNvYWkaDmRuZXQ6cGlkX3R5cGVzIg5kbmV0OnBpZF90eXBlc1oYMjAxNS0wMi0wNlQxMzoyMDoyMi43ODhaYjE1MHxvZF9fX19fX185MDg6OmJhNGY4OGYwMjhiOWJkM2FlYzYxZjc2NGZkNTA4MGQzigEAggGUCAgyOtcFEtkDCkgKCEFydGljbGVzEjwKB2tleXdvcmQSB2tleXdvcmQaE2RuZXQ6cmVzdWx0X3N1YmplY3QiE2RuZXQ6cmVzdWx0X3N1YmplY3QS/QEKtgFBbmFseXNpcyBhbmQgUmVwb3J0IG9uIE9yaWdpbmFsIERvY3VtZW50YXJ5IEV2aWRlbmNlIENvbmNlcm5pbmcgdGhlIFVzZSBvZiBPcGl1bSBpbiBJbmRpYTogW0Z1cm5pc2hlZCB0byB0aGUg4oCcQnJpdGlzaCBNZWRpY2FsIEpvdXJuYWzigJ0gYnkgdXB3YXJkcyBvZiAxMDAgSW5kaWFuIE1lZGljYWwgT2ZmaWNlcnMuXRJCCgptYWluIHRpdGxlEgptYWluIHRpdGxlGhNkbmV0OmRhdGFDaXRlX3RpdGxlIhNkbmV0OmRhdGFDaXRlX3RpdGxlIgwKCjE4OTQtMDEtMDZCSgoLcHVibGljYXRpb24SC3B1YmxpY2F0aW9uGhZkbmV0OnJlc3VsdF90eXBvbG9naWVzIhZkbmV0OnJlc3VsdF90eXBvbG9naWVzYi4KA2VuZxIHRW5nbGlzaBoOZG5ldDpsYW5ndWFnZXMiDmRuZXQ6bGFuZ3VhZ2VzygECCgAy+AEaOQoET1BFThILT3BlbiBBY2Nlc3MaEWRuZXQ6YWNjZXNzX21vZGVzIhFkbmV0OmFjY2Vzc19tb2RlcyJFCgQwMDAxEgdBcnRpY2xlGhlkbmV0OnB1YmxpY2F0aW9uX3Jlc291cmNlIhlkbmV0OnB1YmxpY2F0aW9uX3Jlc291cmNlKkoKMTEwfG9wZW5kb2FyX19fXzo6OGI2ZGQ3ZGI5YWY0OWU2NzMwNmZlYjU5YThiZGM1MmMSFUV1cm9wZSBQdWJNZWQgQ2VudHJhbEooaHR0cDovL2V1cm9wZXBtYy5vcmcvYXJ0aWNsZXMvUE1DMjQwMzM0MkIYb2FpOmV1cm9wZXBtYy5vcmc6OTkzNDYxSkoKMTEwfG9wZW5kb2FyX19fXzo6OGI2ZGQ3ZGI5YWY0OWU2NzMwNmZlYjU5YThiZGM1MmMSFUV1cm9wZSBQdWJNZWQgQ2VudHJhbFI4CgpQTUMyNDAzMzQyEioKA3BtYxIDcG1jGg5kbmV0OnBpZF90eXBlcyIOZG5ldDpwaWRfdHlwZXNSRgoYb2FpOmV1cm9wZXBtYy5vcmc6OTkzNDYxEioKA29haRIDb2FpGg5kbmV0OnBpZF90eXBlcyIOZG5ldDpwaWRfdHlwZXNaGDIwMTUtMDItMDZUMTM6MjQ6NTQuMjE4WmIxNTB8b2RfX19fX19fOTA4Ojo0OGNiZmYxOTk1MDc0YzI0NmU2MGY1YzM5YmZiMzVjNooBAIIBlAgIMjrXBRLZAwpICghBcnRpY2xlcxI8CgdrZXl3b3JkEgdrZXl3b3JkGhNkbmV0OnJlc3VsdF9zdWJqZWN0IhNkbmV0OnJlc3VsdF9zdWJqZWN0Ev0BCrYBQW5hbHlzaXMgYW5kIFJlcG9ydCBvbiBPcmlnaW5hbCBEb2N1bWVudGFyeSBFdmlkZW5jZSBDb25jZXJuaW5nIHRoZSBVc2Ugb2YgT3BpdW0gaW4gSW5kaWE6IFtGdXJuaXNoZWQgdG8gdGhlIOKAnEJyaXRpc2ggTWVkaWNhbCBKb3VybmFs4oCdIGJ5IHVwd2FyZHMgb2YgMTAwIEluZGlhbiBNZWRpY2FsIE9mZmljZXJzLl0SQgoKbWFpbiB0aXRsZRIKbWFpbiB0aXRsZRoTZG5ldDpkYXRhQ2l0ZV90aXRsZSITZG5ldDpkYXRhQ2l0ZV90aXRsZSIMCgoxODk0LTAxLTEzQkoKC3B1YmxpY2F0aW9uEgtwdWJsaWNhdGlvbhoWZG5ldDpyZXN1bHRfdHlwb2xvZ2llcyIWZG5ldDpyZXN1bHRfdHlwb2xvZ2llc2IuCgNlbmcSB0VuZ2xpc2gaDmRuZXQ6bGFuZ3VhZ2VzIg5kbmV0Omxhbmd1YWdlc8oBAgoAMvgBGjkKBE9QRU4SC09wZW4gQWNjZXNzGhFkbmV0OmFjY2Vzc19tb2RlcyIRZG5ldDphY2Nlc3NfbW9kZXMiRQoEMDAwMRIHQXJ0aWNsZRoZZG5ldDpwdWJsaWNhdGlvbl9yZXNvdXJjZSIZZG5ldDpwdWJsaWNhdGlvbl9yZXNvdXJjZSpKCjExMHxvcGVuZG9hcl9fX186OjhiNmRkN2RiOWFmNDllNjczMDZmZWI1OWE4YmRjNTJjEhVFdXJvcGUgUHViTWVkIENlbnRyYWxKKGh0dHA6Ly9ldXJvcGVwbWMub3JnL2FydGljbGVzL1BNQzI0MDM3OTNCGG9haTpldXJvcGVwbWMub3JnOjk5Mzc5MkpKCjExMHxvcGVuZG9hcl9fX186OjhiNmRkN2RiOWFmNDllNjczMDZmZWI1OWE4YmRjNTJjEhVFdXJvcGUgUHViTWVkIENlbnRyYWxSOAoKUE1DMjQwMzc5MxIqCgNwbWMSA3BtYxoOZG5ldDpwaWRfdHlwZXMiDmRuZXQ6cGlkX3R5cGVzUkYKGG9haTpldXJvcGVwbWMub3JnOjk5Mzc5MhIqCgNvYWkSA29haRoOZG5ldDpwaWRfdHlwZXMiDmRuZXQ6cGlkX3R5cGVzWhgyMDE1LTAyLTA2VDEzOjI0OjQzLjEyMlpiMTUwfG9kX19fX19fXzkwODo6NDEwNGE3NzQwYTE2YTIyMzllMzg3MWExM2M2ZTVkMWOKAQCCAZQICDI61wUS2QMKSAoIQXJ0aWNsZXMSPAoHa2V5d29yZBIHa2V5d29yZBoTZG5ldDpyZXN1bHRfc3ViamVjdCITZG5ldDpyZXN1bHRfc3ViamVjdBL9AQq2AUFuYWx5c2lzIGFuZCBSZXBvcnQgb24gT3JpZ2luYWwgRG9jdW1lbnRhcnkgRXZpZGVuY2UgQ29uY2VybmluZyB0aGUgVXNlIG9mIE9waXVtIGluIEluZGlhOiBbRnVybmlzaGVkIHRvIHRoZSDigJxCcml0aXNoIE1lZGljYWwgSm91cm5hbOKAnSBieSB1cHdhcmRzIG9mIDEwMCBJbmRpYW4gTWVkaWNhbCBPZmZpY2Vycy5dEkIKCm1haW4gdGl0bGUSCm1haW4gdGl0bGUaE2RuZXQ6ZGF0YUNpdGVfdGl0bGUiE2RuZXQ6ZGF0YUNpdGVfdGl0bGUiDAoKMTg5NC0wMi0xN0JKCgtwdWJsaWNhdGlvbhILcHVibGljYXRpb24aFmRuZXQ6cmVzdWx0X3R5cG9sb2dpZXMiFmRuZXQ6cmVzdWx0X3R5cG9sb2dpZXNiLgoDZW5nEgdFbmdsaXNoGg5kbmV0Omxhbmd1YWdlcyIOZG5ldDpsYW5ndWFnZXPKAQIKADL4ARo5CgRPUEVOEgtPcGVuIEFjY2VzcxoRZG5ldDphY2Nlc3NfbW9kZXMiEWRuZXQ6YWNjZXNzX21vZGVzIkUKBDAwMDESB0FydGljbGUaGWRuZXQ6cHVibGljYXRpb25fcmVzb3VyY2UiGWRuZXQ6cHVibGljYXRpb25fcmVzb3VyY2UqSgoxMTB8b3BlbmRvYXJfX19fOjo4YjZkZDdkYjlhZjQ5ZTY3MzA2ZmViNTlhOGJkYzUyYxIVRXVyb3BlIFB1Yk1lZCBDZW50cmFsSihodHRwOi8vZXVyb3BlcG1jLm9yZy9hcnRpY2xlcy9QTUMyNDAzNDUwQhhvYWk6ZXVyb3BlcG1jLm9yZzo5OTM1MzZKSgoxMTB8b3BlbmRvYXJfX19fOjo4YjZkZDdkYjlhZjQ5ZTY3MzA2ZmViNTlhOGJkYzUyYxIVRXVyb3BlIFB1Yk1lZCBDZW50cmFsUjgKClBNQzI0MDM0NTASKgoDcG1jEgNwbWMaDmRuZXQ6cGlkX3R5cGVzIg5kbmV0OnBpZF90eXBlc1JGChhvYWk6ZXVyb3BlcG1jLm9yZzo5OTM1MzYSKgoDb2FpEgNvYWkaDmRuZXQ6cGlkX3R5cGVzIg5kbmV0OnBpZF90eXBlc1oYMjAxNS0wMi0wNlQxMzoyNTozMy44NTRaYjE1MHxvZF9fX19fX185MDg6OmMzYjJiMjA4OWE2YTJkMmUzNDZiZDg1YWQ5NmM3MzlligEAigEAIoEBCAEQABoDMC45IiJkZWR1cC1zaW1pbGFyaXR5LXJlc3VsdC1sZXZlbnN0ZWluKlIKD3N5c2ltcG9ydDpkZWR1cBIPc3lzaW1wb3J0OmRlZHVwGhZkbmV0OnByb3ZlbmFuY2VBY3Rpb25zIhZkbmV0OnByb3ZlbmFuY2VBY3Rpb25zKUT7BZlTAQAA |
modules/dnet-deduplication/branches/tree-dedup/src/test/resources/eu/dnetlib/msro/workflows/dedup/conf/organisation.conf.json | ||
---|---|---|
1 |
{ |
|
2 |
"wf" : { |
|
3 |
"threshold" : "0.9", |
|
4 |
"dedupRun" : "001", |
|
5 |
"entityType" : "organization", |
|
6 |
"orderField" : "legalname", |
|
7 |
"queueMaxSize" : "2000", |
|
8 |
"groupMaxSize" : "50", |
|
9 |
"slidingWindowSize" : "200", |
|
10 |
"rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy" ], |
|
11 |
"includeChildren" : "true" |
|
12 |
}, |
|
13 |
"pace" : { |
|
14 |
"clustering" : [ |
|
15 |
{ "name" : "sortedngrampairs", "fields" : [ "legalname" ], "params" : { "max" : 2, "ngramLen" : "3"} }, |
|
16 |
{ "name" : "suffixprefix", "fields" : [ "legalname" ], "params" : { "max" : 1, "len" : "3" } }, |
|
17 |
{ "name" : "urlclustering", "fields" : [ "websiteurl" ], "params" : { } } |
|
18 |
], |
|
19 |
"strictConditions" : [ |
|
20 |
{ "name" : "exactMatch", "fields" : [ "gridid" ] } |
|
21 |
], |
|
22 |
"conditions" : [ |
|
23 |
{ "name" : "exactMatch", "fields" : [ "country" ] }, |
|
24 |
{ "name" : "DomainExactMatch", "fields" : [ "websiteurl" ] } |
|
25 |
], |
|
26 |
"model" : [ |
|
27 |
{ "name" : "country", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "false", "path" : "organization/metadata/country/classid" }, |
|
28 |
{ "name" : "legalshortname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.1", "ignoreMissing" : "false", "path" : "organization/metadata/legalshortname/value" }, |
|
29 |
{ "name" : "legalname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.9", "ignoreMissing" : "false", "path" : "organization/metadata/legalname/value", "params" : {"windowSize" : 4, "threshold" : 0.5} }, |
|
30 |
{ "name" : "websiteurl", "algo" : "Null", "type" : "URL", "weight" : "0", "ignoreMissing" : "true", "path" : "organization/metadata/websiteurl/value", "params" : { "host" : 0.5, "path" : 0.5 } }, |
|
31 |
{ "name" : "gridid", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid[qualifier#classid = {grid}]/value" } |
|
32 |
], |
|
33 |
"blacklists" : { } |
|
34 |
} |
|
35 |
} |
modules/dnet-deduplication/branches/tree-dedup/src/main/java/eu/dnetlib/msro/workflows/actions/PrepareConfiguredActionSetJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workflows.actions; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
import java.util.Map; |
|
5 |
|
|
6 |
import com.google.common.collect.Lists; |
|
7 |
import com.google.common.collect.Maps; |
|
8 |
import com.google.gson.Gson; |
|
9 |
import com.googlecode.sarasvati.Arc; |
|
10 |
import com.googlecode.sarasvati.NodeToken; |
|
11 |
import eu.dnetlib.actionmanager.set.RawSet; |
|
12 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
|
13 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
|
14 |
import eu.dnetlib.miscutils.datetime.DateUtils; |
|
15 |
import eu.dnetlib.msro.workflows.dedup.conf.DedupConfigurationOrchestration; |
|
16 |
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode; |
|
17 |
import org.apache.commons.lang.StringUtils; |
|
18 |
import org.apache.commons.logging.Log; |
|
19 |
import org.apache.commons.logging.LogFactory; |
|
20 |
import org.springframework.beans.factory.annotation.Autowired; |
|
21 |
|
|
22 |
/** |
|
23 |
* The Class PrepareConfiguredActionSetJobNode. |
|
24 |
*/ |
|
25 |
public class PrepareConfiguredActionSetJobNode extends SimpleJobNode { |
|
26 |
|
|
27 |
/** |
|
28 |
* logger. |
|
29 |
*/ |
|
30 |
private static final Log log = LogFactory.getLog(PrepareConfiguredActionSetJobNode.class); |
|
31 |
|
|
32 |
/** |
|
33 |
* The dedup config sequence param. |
|
34 |
*/ |
|
35 |
private String dedupConfigSequenceParam; |
|
36 |
|
|
37 |
/** |
|
38 |
* The job property. |
|
39 |
*/ |
|
40 |
private String jobProperty; |
|
41 |
|
|
42 |
/** |
|
43 |
* The action set path param name. |
|
44 |
*/ |
|
45 |
private String actionSetPathParam; |
|
46 |
|
|
47 |
/** |
|
48 |
* The service locator. |
|
49 |
*/ |
|
50 |
@Autowired |
|
51 |
private UniqueServiceLocator serviceLocator; |
|
52 |
|
|
53 |
/* |
|
54 |
* (non-Javadoc) |
|
55 |
* |
|
56 |
* @see eu.dnetlib.msro.workflows.nodes.SimpleJobNode#execute(com.googlecode.sarasvati.NodeToken) |
|
57 |
*/ |
|
58 |
@Override |
|
59 |
protected String execute(final NodeToken token) throws Exception { |
|
60 |
|
|
61 |
final List<Map<String, String>> setList = Lists.newArrayList(); |
|
62 |
|
|
63 |
final Map<String, String> set = Maps.newHashMap(); |
|
64 |
|
|
65 |
final String actionSetId = getActionSetId(token); |
|
66 |
final ISLookUpService isLookUpService = serviceLocator.getService(ISLookUpService.class); |
|
67 |
final String basePath = isLookUpService.getResourceProfileByQuery( |
|
68 |
"/RESOURCE_PROFILE[./HEADER/RESOURCE_TYPE/@value='ActionManagerServiceResourceType']//SERVICE_PROPERTIES/PROPERTY[@key='basePath']/@value/string()"); |
|
69 |
if (StringUtils.isBlank(basePath)) { |
|
70 |
throw new IllegalStateException("missing basePath in ActionManagerService"); |
|
71 |
} |
|
72 |
|
|
73 |
final String actionSetDirectory = isLookUpService.getResourceProfileByQuery( |
|
74 |
"/RESOURCE_PROFILE[./HEADER/RESOURCE_TYPE/@value='ActionManagerSetDSResourceType' and .//SET/@id = '"+actionSetId+"']//SET/@ directory/string()"); |
|
75 |
|
|
76 |
if (StringUtils.isBlank(actionSetDirectory)) { |
|
77 |
throw new IllegalStateException("missing directory in ActionSet profile: " + actionSetId); |
|
78 |
} |
|
79 |
|
|
80 |
final String rawSetId = RawSet.newInstance().getId(); |
|
81 |
set.put("rawset", rawSetId); |
|
82 |
set.put("creationDate", DateUtils.now_ISO8601()); |
|
83 |
set.put("set", actionSetId); |
|
84 |
set.put("enabled", "true"); |
|
85 |
set.put("jobProperty", getJobProperty()); |
|
86 |
|
|
87 |
token.getEnv().setAttribute(set.get("jobProperty"), set.get("rawset")); |
|
88 |
|
|
89 |
final String path = basePath + "/" + actionSetDirectory + "/" + rawSetId; |
|
90 |
log.info("using action set path: " + path); |
|
91 |
token.getEnv().setAttribute(getActionSetPathParam(), path); |
|
92 |
|
|
93 |
setList.add(set); |
|
94 |
final String sets = new Gson().toJson(setList); |
|
95 |
log.debug("built set: " + sets); |
|
96 |
|
|
97 |
token.getEnv().setAttribute("sets", sets); |
|
98 |
|
|
99 |
return Arc.DEFAULT_ARC; |
|
100 |
} |
|
101 |
|
|
102 |
/** |
|
103 |
* Gets the action set id. |
|
104 |
* |
|
105 |
* @param token the token |
|
106 |
* @return the action set id |
|
107 |
*/ |
|
108 |
private String getActionSetId(final NodeToken token) { |
|
109 |
final String json = token.getEnv().getAttribute(getDedupConfigSequenceParam()); |
|
110 |
final DedupConfigurationOrchestration dco = DedupConfigurationOrchestration.fromJSON(json); |
|
111 |
final String actionSetId = dco.getActionSetId(); |
|
112 |
log.info("found actionSetId in workflow env: " + actionSetId); |
|
113 |
return actionSetId; |
|
114 |
} |
|
115 |
|
|
116 |
/** |
|
117 |
* Gets the dedup config sequence param. |
|
118 |
* |
|
119 |
* @return the dedup config sequence param |
|
120 |
*/ |
|
121 |
public String getDedupConfigSequenceParam() { |
|
122 |
return dedupConfigSequenceParam; |
|
123 |
} |
|
124 |
|
|
125 |
/** |
|
126 |
* Sets the dedup config sequence param. |
|
127 |
* |
|
128 |
* @param dedupConfigSequenceParam the new dedup config sequence param |
|
129 |
*/ |
|
130 |
public void setDedupConfigSequenceParam(final String dedupConfigSequenceParam) { |
|
131 |
this.dedupConfigSequenceParam = dedupConfigSequenceParam; |
|
132 |
} |
|
133 |
|
|
134 |
/** |
|
135 |
* Gets the job property. |
|
136 |
* |
|
137 |
* @return the job property |
|
138 |
*/ |
|
139 |
public String getJobProperty() { |
|
140 |
return jobProperty; |
|
141 |
} |
|
142 |
|
|
143 |
/** |
|
144 |
* Sets the job property. |
|
145 |
* |
|
146 |
* @param jobProperty the new job property |
|
147 |
*/ |
|
148 |
public void setJobProperty(final String jobProperty) { |
|
149 |
this.jobProperty = jobProperty; |
|
150 |
} |
|
151 |
|
|
152 |
public String getActionSetPathParam() { |
|
153 |
return actionSetPathParam; |
|
154 |
} |
|
155 |
|
|
156 |
public void setActionSetPathParam(final String actionSetPathParam) { |
|
157 |
this.actionSetPathParam = actionSetPathParam; |
|
158 |
} |
|
159 |
} |
modules/dnet-deduplication/branches/tree-dedup/src/main/java/eu/dnetlib/msro/workflows/dedup/DedupDuplicateScanJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workflows.dedup; |
|
2 |
|
|
3 |
import org.apache.commons.collections.CollectionUtils; |
|
4 |
import org.apache.commons.logging.Log; |
|
5 |
import org.apache.commons.logging.LogFactory; |
|
6 |
|
|
7 |
import com.googlecode.sarasvati.Arc; |
|
8 |
import com.googlecode.sarasvati.Engine; |
|
9 |
import com.googlecode.sarasvati.NodeToken; |
|
10 |
|
|
11 |
import eu.dnetlib.enabling.tools.blackboard.BlackboardJob; |
|
12 |
import eu.dnetlib.msro.workflows.dedup.conf.DedupConfigurationOrchestration; |
|
13 |
import eu.dnetlib.msro.workflows.nodes.blackboard.BlackboardWorkflowJobListener; |
|
14 |
|
|
15 |
public class DedupDuplicateScanJobNode extends DedupConfigurationAwareJobNode { |
|
16 |
|
|
17 |
private static final Log log = LogFactory.getLog(DedupDuplicateScanJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
18 |
|
|
19 |
private class DedupBlackboardWorkflowJobListener extends BlackboardWorkflowJobListener { |
|
20 |
|
|
21 |
public DedupBlackboardWorkflowJobListener(final Engine engine, final NodeToken token) { |
|
22 |
super(engine, token); |
|
23 |
} |
|
24 |
|
|
25 |
@Override |
|
26 |
protected void onDone(final BlackboardJob job) { |
|
27 |
|
|
28 |
final DedupConfigurationOrchestration confs = dedupConfigurations(getToken()); |
|
29 |
|
|
30 |
confs.getConfigurations().poll(); |
|
31 |
|
|
32 |
log.info("checking dedup configs queue, size: " + confs.getConfigurations().size()); |
|
33 |
|
|
34 |
if (CollectionUtils.isEmpty(confs.getConfigurations())) { |
|
35 |
log.info("dedup similarity scan done"); |
|
36 |
super.complete(job, "done"); |
|
37 |
} else { |
|
38 |
log.debug("remaining confs: " + confs); |
|
39 |
|
|
40 |
getToken().getEnv().setAttribute(getDedupConfigSequenceParam(), confs.toString()); |
|
41 |
|
|
42 |
super.complete(job, Arc.DEFAULT_ARC); |
|
43 |
} |
|
44 |
} |
|
45 |
} |
|
46 |
|
|
47 |
@Override |
|
48 |
protected BlackboardWorkflowJobListener generateBlackboardListener(final Engine engine, final NodeToken token) { |
|
49 |
return new DedupBlackboardWorkflowJobListener(engine, token); |
|
50 |
} |
|
51 |
|
|
52 |
} |
modules/dnet-deduplication/branches/tree-dedup/src/main/java/eu/dnetlib/msro/workflows/dedup/conf/DedupConfigurationOrchestrationLoader.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workflows.dedup.conf; |
|
2 |
|
|
3 |
import java.io.StringReader; |
|
4 |
import java.util.List; |
|
5 |
import java.util.Queue; |
|
6 |
|
|
7 |
import javax.annotation.Resource; |
|
8 |
|
|
9 |
import org.apache.commons.logging.Log; |
|
10 |
import org.apache.commons.logging.LogFactory; |
|
11 |
import org.dom4j.Document; |
|
12 |
import org.dom4j.DocumentException; |
|
13 |
import org.dom4j.Element; |
|
14 |
import org.dom4j.io.SAXReader; |
|
15 |
|
|
16 |
import com.google.common.collect.Lists; |
|
17 |
|
|
18 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException; |
|
19 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; |
|
20 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
|
21 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
|
22 |
import eu.dnetlib.pace.config.DedupConfig; |
|
23 |
|
|
24 |
/** |
|
25 |
* The Class DedupConfigurationOrchestrationLoader. |
|
26 |
*/ |
|
27 |
public class DedupConfigurationOrchestrationLoader { |
|
28 |
|
|
29 |
/** The Constant log. */ |
|
30 |
private static final Log log = LogFactory.getLog(DedupConfigurationOrchestrationLoader.class); |
|
31 |
|
|
32 |
/** The service locator. */ |
|
33 |
@Resource |
|
34 |
private UniqueServiceLocator serviceLocator; |
|
35 |
|
|
36 |
/** |
|
37 |
* Load the dedup orchestration profile from the IS. |
|
38 |
* |
|
39 |
* @param id |
|
40 |
* the id |
|
41 |
* @return the dedup configuration orchestration |
|
42 |
* @throws ISLookUpDocumentNotFoundException |
|
43 |
* the IS look up document not found exception |
|
44 |
* @throws ISLookUpException |
|
45 |
* the IS look up exception |
|
46 |
* @throws DocumentException |
|
47 |
* the document exception |
|
48 |
*/ |
|
49 |
public DedupConfigurationOrchestration loadByActionSetId(final String id) throws Exception { |
|
50 |
|
|
51 |
final ISLookUpService isLookUpService = serviceLocator.getService(ISLookUpService.class); |
|
52 |
|
|
53 |
final String xquery = String.format("/RESOURCE_PROFILE[.//DEDUPLICATION/ACTION_SET/@id = '%s']", id); |
|
54 |
log.info("loading dedup orchestration: " + xquery); |
|
55 |
|
|
56 |
return parseOrchestrationProfile(isLookUpService, isLookUpService.getResourceProfileByQuery(xquery)); |
|
57 |
} |
|
58 |
|
|
59 |
public List<DedupConfigurationOrchestration> loadByEntityName(final String entityName) throws Exception { |
|
60 |
|
|
61 |
final ISLookUpService isLookUpService = serviceLocator.getService(ISLookUpService.class); |
|
62 |
|
|
63 |
final String xquery = String.format("/RESOURCE_PROFILE[.//DEDUPLICATION/ENTITY/@name = '%s']", entityName); |
|
64 |
log.info("loading dedup orchestration: " + xquery); |
|
65 |
|
|
66 |
final List<DedupConfigurationOrchestration> res = Lists.newArrayList(); |
|
67 |
|
|
68 |
for (final String profile : isLookUpService.quickSearchProfile(xquery)) { |
|
69 |
res.add(parseOrchestrationProfile(isLookUpService, profile)); |
|
70 |
} |
|
71 |
|
|
72 |
return res; |
|
73 |
} |
|
74 |
|
|
75 |
private DedupConfigurationOrchestration parseOrchestrationProfile(final ISLookUpService isLookUpService, final String dedupOrchestation) |
|
76 |
throws DocumentException, |
|
77 |
ISLookUpException, ISLookUpDocumentNotFoundException { |
|
78 |
final Document doc = new SAXReader().read(new StringReader(dedupOrchestation)); |
|
79 |
|
|
80 |
final Element e = (Element) doc.selectSingleNode("//DEDUPLICATION/ENTITY"); |
|
81 |
final Entity entity = new Entity(e.attributeValue("name"), e.attributeValue("code"), e.attributeValue("label")); |
|
82 |
|
|
83 |
final String actionSetId = doc.valueOf("//DEDUPLICATION/ACTION_SET/@id"); |
|
84 |
final Queue<DedupConfig> configurations = Lists.newLinkedList(); |
|
85 |
|
|
86 |
for (final Object o : doc.selectNodes("//SCAN_SEQUENCE/SCAN")) { |
|
87 |
configurations.add(loadConfig(isLookUpService, actionSetId, o)); |
|
88 |
} |
|
89 |
|
|
90 |
final DedupConfigurationOrchestration dco = new DedupConfigurationOrchestration(entity, actionSetId, configurations); |
|
91 |
|
|
92 |
log.debug("loaded dedup configuration orchestration: " + dco.toString()); |
|
93 |
log.info("loaded dedup configuration orchestration, size: " + dco.getConfigurations().size()); |
|
94 |
return dco; |
|
95 |
} |
|
96 |
|
|
97 |
private DedupConfig loadConfig(final ISLookUpService isLookUpService, final String actionSetId, final Object o) throws ISLookUpException, |
|
98 |
ISLookUpDocumentNotFoundException { |
|
99 |
final Element s = (Element) o; |
|
100 |
final String configProfileId = s.attributeValue("id"); |
|
101 |
final String conf = |
|
102 |
isLookUpService.getResourceProfileByQuery(String.format( |
|
103 |
"for $x in /RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value = '%s'] return $x//DEDUPLICATION/text()", |
|
104 |
configProfileId)); |
|
105 |
log.debug("loaded dedup configuration from IS profile: " + conf); |
|
106 |
final DedupConfig dedupConfig = DedupConfig.load(conf); |
|
107 |
dedupConfig.getWf().setConfigurationId(actionSetId); |
|
108 |
return dedupConfig; |
|
109 |
} |
|
110 |
} |
modules/dnet-deduplication/branches/tree-dedup/src/main/java/eu/dnetlib/msro/workflows/dedup/conf/Entity.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workflows.dedup.conf; |
|
2 |
|
|
3 |
import com.google.gson.GsonBuilder; |
|
4 |
|
|
5 |
/** |
|
6 |
* The Class Entity. |
|
7 |
*/ |
|
8 |
public class Entity { |
|
9 |
|
|
10 |
/** The name. */ |
|
11 |
private String name; |
|
12 |
|
|
13 |
/** The code. */ |
|
14 |
private String code; |
|
15 |
|
|
16 |
/** The label. */ |
|
17 |
private String label; |
|
18 |
|
|
19 |
public Entity() {} |
|
20 |
|
|
21 |
/** |
|
22 |
* Instantiates a new entity. |
|
23 |
* |
|
24 |
* @param name |
|
25 |
* the name |
|
26 |
* @param code |
|
27 |
* the code |
|
28 |
* @param label |
|
29 |
* the label |
|
30 |
*/ |
|
31 |
public Entity(final String name, final String code, final String label) { |
|
32 |
super(); |
|
33 |
this.setName(name); |
|
34 |
this.setCode(code); |
|
35 |
this.setLabel(label); |
|
36 |
} |
|
37 |
|
|
38 |
/** |
|
39 |
* Gets the name. |
|
40 |
* |
|
41 |
* @return the name |
|
42 |
*/ |
|
43 |
public String getName() { |
|
44 |
return name; |
|
45 |
} |
|
46 |
|
|
47 |
/** |
|
48 |
* Gets the code. |
|
49 |
* |
|
50 |
* @return the code |
|
51 |
*/ |
|
52 |
public String getCode() { |
|
53 |
return code; |
|
54 |
} |
|
55 |
|
|
56 |
/** |
|
57 |
* Gets the label. |
|
58 |
* |
|
59 |
* @return the label |
|
60 |
*/ |
|
61 |
public String getLabel() { |
|
62 |
return label; |
|
63 |
} |
|
64 |
|
|
65 |
public void setName(final String name) { |
|
66 |
this.name = name; |
|
67 |
} |
|
68 |
|
|
69 |
public void setCode(final String code) { |
|
70 |
this.code = code; |
|
71 |
} |
|
72 |
|
|
73 |
public void setLabel(final String label) { |
|
74 |
this.label = label; |
|
75 |
} |
|
76 |
|
|
77 |
/* |
|
78 |
* (non-Javadoc) |
|
79 |
* |
|
80 |
* @see java.lang.Object#toString() |
|
81 |
*/ |
|
82 |
@Override |
|
83 |
public String toString() { |
|
84 |
return new GsonBuilder().setPrettyPrinting().create().toJson(this); |
|
85 |
} |
|
86 |
|
|
87 |
} |
modules/dnet-deduplication/branches/tree-dedup/src/main/java/eu/dnetlib/msro/workflows/dedup/conf/DedupConfigurationOrchestration.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workflows.dedup.conf; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.util.Queue; |
|
5 |
|
|
6 |
import com.google.gson.Gson; |
|
7 |
import com.google.gson.GsonBuilder; |
|
8 |
|
|
9 |
import eu.dnetlib.pace.config.DedupConfig; |
|
10 |
import eu.dnetlib.pace.util.PaceException; |
|
11 |
import org.codehaus.jackson.map.ObjectMapper; |
|
12 |
|
|
13 |
/** |
|
14 |
* The Class DedupConfigurationOrchestration. |
|
15 |
*/ |
|
16 |
public class DedupConfigurationOrchestration { |
|
17 |
|
|
18 |
/** The entity. */ |
|
19 |
private Entity entity; |
|
20 |
|
|
21 |
/** The action set id. */ |
|
22 |
private String actionSetId; |
|
23 |
|
|
24 |
/** The configurations. */ |
|
25 |
private Queue<DedupConfig> configurations; |
|
26 |
|
|
27 |
public DedupConfigurationOrchestration() {} |
|
28 |
|
|
29 |
/** |
|
30 |
* Instantiates a new dedup configuration orchestration. |
|
31 |
* |
|
32 |
* @param entity |
|
33 |
* the entity |
|
34 |
* @param actionSetId |
|
35 |
* the action set id |
|
36 |
* @param configurations |
|
37 |
* the configurations |
|
38 |
*/ |
|
39 |
public DedupConfigurationOrchestration(final Entity entity, final String actionSetId, final Queue<DedupConfig> configurations) { |
|
40 |
super(); |
|
41 |
this.setEntity(entity); |
|
42 |
this.setActionSetId(actionSetId); |
|
43 |
this.setConfigurations(configurations); |
|
44 |
} |
|
45 |
|
|
46 |
/** |
|
47 |
* Gets the entity. |
|
48 |
* |
|
49 |
* @return the entity |
|
50 |
*/ |
|
51 |
public Entity getEntity() { |
|
52 |
return entity; |
|
53 |
} |
|
54 |
|
|
55 |
/** |
|
56 |
* Gets the action set id. |
|
57 |
* |
|
58 |
* @return the action set id |
|
59 |
*/ |
|
60 |
public String getActionSetId() { |
|
61 |
return actionSetId; |
|
62 |
} |
|
63 |
|
|
64 |
/** |
|
65 |
* Gets the configurations. |
|
66 |
* |
|
67 |
* @return the configurations |
|
68 |
*/ |
|
69 |
public Queue<DedupConfig> getConfigurations() { |
|
70 |
return configurations; |
|
71 |
} |
|
72 |
|
|
73 |
public void setEntity(final Entity entity) { |
|
74 |
this.entity = entity; |
|
75 |
} |
|
76 |
|
|
77 |
public void setActionSetId(final String actionSetId) { |
|
78 |
this.actionSetId = actionSetId; |
|
79 |
} |
|
80 |
|
|
81 |
public void setConfigurations(final Queue<DedupConfig> configurations) { |
|
82 |
this.configurations = configurations; |
|
83 |
} |
|
84 |
|
|
85 |
/** |
|
86 |
* From json. |
|
87 |
* |
|
88 |
* @param json |
|
89 |
* the json |
|
90 |
* @return the dedup configuration orchestration |
|
91 |
*/ |
|
92 |
public static DedupConfigurationOrchestration fromJSON(final String json) { |
|
93 |
try { |
|
94 |
return new ObjectMapper().readValue(json, DedupConfigurationOrchestration.class); |
|
95 |
} catch (IOException e) { |
|
96 |
throw new PaceException("unable to deserialise configuration", e); |
|
97 |
} |
|
98 |
} |
|
99 |
|
|
100 |
/* |
|
101 |
* (non-Javadoc) |
|
102 |
* |
|
103 |
* @see java.lang.Object#toString() |
|
104 |
*/ |
|
105 |
@Override |
|
106 |
public String toString() { |
|
107 |
try { |
|
108 |
return new ObjectMapper().writeValueAsString(this); |
|
109 |
} catch (IOException e) { |
|
110 |
throw new PaceException("unable to serialise configuration", e); |
|
111 |
} |
|
112 |
} |
|
113 |
|
|
114 |
} |
modules/dnet-deduplication/branches/tree-dedup/src/main/java/eu/dnetlib/msro/workflows/dedup/ResetCountersJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workflows.dedup; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
|
|
5 |
import org.apache.commons.lang.StringUtils; |
|
6 |
import org.apache.commons.logging.Log; |
|
7 |
import org.apache.commons.logging.LogFactory; |
|
8 |
|
|
9 |
import com.google.common.base.Splitter; |
|
10 |
import com.googlecode.sarasvati.Arc; |
|
11 |
import com.googlecode.sarasvati.NodeToken; |
|
12 |
|
|
13 |
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode; |
|
14 |
|
|
15 |
public class ResetCountersJobNode extends SimpleJobNode { |
|
16 |
|
|
17 |
private static final Log log = LogFactory.getLog(ResetCountersJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
18 |
|
|
19 |
private String attributesCSV; |
|
20 |
|
|
21 |
@Override |
|
22 |
protected String execute(final NodeToken token) throws Exception { |
|
23 |
|
|
24 |
if (StringUtils.isNotBlank(getAttributesCSV())) { |
|
25 |
|
|
26 |
log.info("got wf attributes CSV: " + getAttributesCSV()); |
|
27 |
|
|
28 |
final Splitter splitter = Splitter.on(",").trimResults().omitEmptyStrings(); |
|
29 |
final List<String> wfAttrs = splitter.splitToList(getAttributesCSV()); |
|
30 |
|
|
31 |
for (final String attr : wfAttrs) { |
|
32 |
resetWorkflowParam(token, attr); |
|
33 |
} |
|
34 |
|
|
35 |
} else { |
|
36 |
log.info("attribute list is empty, nothing to do here."); |
|
37 |
} |
|
38 |
|
|
39 |
return Arc.DEFAULT_ARC; |
|
40 |
} |
|
41 |
|
|
42 |
private void resetWorkflowParam(final NodeToken token, final String attribute) { |
|
43 |
final String count = token.getFullEnv().getAttribute(attribute); |
|
44 |
if (StringUtils.isNotBlank(count)) { |
|
45 |
log.info(String.format("found loop counter '%s', value '%s'", attribute, count)); |
|
46 |
token.getFullEnv().setAttribute(attribute, 0); |
|
47 |
|
|
48 |
log.info(String.format("set '%s', to 0", attribute)); |
|
49 |
} else { |
|
50 |
log.info("loop counter was not found in workflow env, nothing to do here."); |
|
51 |
} |
|
52 |
} |
|
53 |
|
|
54 |
public String getAttributesCSV() { |
|
55 |
return attributesCSV; |
|
56 |
} |
|
57 |
|
|
58 |
public void setAttributesCSV(final String attributesCSV) { |
|
59 |
this.attributesCSV = attributesCSV; |
|
60 |
} |
|
61 |
|
|
62 |
} |
modules/dnet-deduplication/branches/tree-dedup/src/main/java/eu/dnetlib/msro/workflows/dedup/DedupGrouperJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workflows.dedup; |
|
2 |
|
|
3 |
import org.apache.commons.logging.Log; |
|
4 |
import org.apache.commons.logging.LogFactory; |
|
5 |
|
|
6 |
import com.googlecode.sarasvati.Arc; |
|
7 |
import com.googlecode.sarasvati.Engine; |
|
8 |
import com.googlecode.sarasvati.NodeToken; |
|
9 |
|
|
10 |
import eu.dnetlib.enabling.tools.blackboard.BlackboardJob; |
|
11 |
import eu.dnetlib.msro.workflows.nodes.blackboard.BlackboardWorkflowJobListener; |
|
12 |
|
|
13 |
public class DedupGrouperJobNode extends DedupConfigurationAwareJobNode { |
|
14 |
|
|
15 |
// TODO factor out this constant, it should be a configuration parameter |
|
16 |
public static final int DEDUP_GROUPER_MAX_LOOPS = 10; |
|
17 |
|
|
18 |
public static final String DEDUP_GROUPER_LOOPER = "dedup.grouper.looper"; |
|
19 |
public static final String DEDUP_GROUPER_CURR_WRITTEN_RELS = "dedup.grouper.written.rels"; |
|
20 |
public static final String DEDUP_GROUPER_PREV_WRITTEN_RELS = "dedup.grouper.prev.written.rels"; |
|
21 |
|
|
22 |
private static final Log log = LogFactory.getLog(DedupGrouperJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
23 |
|
|
24 |
private class DedupBlackboardWorkflowJobListener extends BlackboardWorkflowJobListener { |
|
25 |
|
|
26 |
public DedupBlackboardWorkflowJobListener(final Engine engine, final NodeToken token) { |
|
27 |
super(engine, token); |
|
28 |
} |
|
29 |
|
|
30 |
@Override |
|
31 |
protected void onDone(final BlackboardJob job) { |
|
32 |
|
|
33 |
final int times = currentIteration(getToken()); |
|
34 |
final String curr = job.getParameters().get(DEDUP_GROUPER_CURR_WRITTEN_RELS); |
|
35 |
|
|
36 |
if (times == 0) { |
|
37 |
getToken().getFullEnv().setAttribute(DEDUP_GROUPER_PREV_WRITTEN_RELS, -1); |
|
38 |
} |
|
39 |
|
|
40 |
if ((times >= DEDUP_GROUPER_MAX_LOOPS) || isStable(getToken(), curr)) { |
|
41 |
super.complete(job, "done"); |
|
42 |
} else { |
|
43 |
log.info("incrementing dedup.grouper.looper to " + (times + 1)); |
|
44 |
getToken().getFullEnv().setAttribute(DEDUP_GROUPER_LOOPER, times + 1); |
|
45 |
getToken().getFullEnv().setAttribute(DEDUP_GROUPER_PREV_WRITTEN_RELS, curr); |
|
46 |
super.complete(job, Arc.DEFAULT_ARC); |
|
47 |
} |
|
48 |
} |
|
49 |
} |
|
50 |
|
|
51 |
private int currentIteration(final NodeToken token) { |
|
52 |
try { |
|
53 |
final String sTimes = token.getFullEnv().getAttribute(DEDUP_GROUPER_LOOPER); |
|
54 |
log.info("read dedup.grouper.looper from fullEnv: '" + sTimes + "'"); |
|
55 |
return Integer.parseInt(sTimes); |
|
56 |
} catch (final NumberFormatException e) { |
|
57 |
log.info("got empty dedup.grouper.looper, initializing to 0"); |
|
58 |
return 0; |
|
59 |
} |
|
60 |
} |
|
61 |
|
|
62 |
private boolean isStable(final NodeToken token, final String sCurr) { |
|
63 |
final String sPrev = token.getFullEnv().getAttribute(DEDUP_GROUPER_PREV_WRITTEN_RELS); |
|
64 |
|
|
65 |
log.info("Comparing written rels, prev=" + sPrev + ", curr=" + sCurr); |
|
66 |
try { |
|
67 |
final boolean b = Integer.parseInt(sCurr) == Integer.parseInt(sPrev); |
|
68 |
if (b) { |
|
69 |
log.info(" --- The number of written rels is STABLE"); |
|
70 |
} |
|
71 |
return b; |
|
72 |
} catch (final Exception e) { |
|
73 |
log.error("Invalid parsing of written rels counters - curr: " + sCurr + ", prev: " + sPrev); |
|
74 |
return false; |
|
75 |
} |
|
76 |
} |
|
77 |
|
|
78 |
@Override |
|
79 |
protected BlackboardWorkflowJobListener generateBlackboardListener(final Engine engine, final NodeToken token) { |
|
80 |
return new DedupBlackboardWorkflowJobListener(engine, token); |
|
81 |
} |
|
82 |
|
|
83 |
} |
modules/dnet-deduplication/branches/tree-dedup/src/main/java/eu/dnetlib/msro/workflows/dedup/QueryUserActionDbJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workflows.dedup; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
|
|
5 |
import javax.annotation.Resource; |
|
6 |
import javax.xml.ws.wsaddressing.W3CEndpointReference; |
|
7 |
|
|
8 |
import org.apache.commons.io.IOUtils; |
|
9 |
import org.apache.commons.lang.StringUtils; |
|
10 |
import org.apache.commons.logging.Log; |
|
11 |
import org.apache.commons.logging.LogFactory; |
|
12 |
|
|
13 |
import com.googlecode.sarasvati.Arc; |
|
14 |
import com.googlecode.sarasvati.NodeToken; |
|
15 |
|
|
16 |
import eu.dnetlib.enabling.database.rmi.DatabaseService; |
|
17 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
|
18 |
import eu.dnetlib.msro.workflows.dedup.conf.DedupConfigurationOrchestration; |
|
19 |
import eu.dnetlib.msro.workflows.nodes.AsyncJobNode; |
|
20 |
|
|
21 |
public class QueryUserActionDbJobNode extends AsyncJobNode { |
|
22 |
|
|
23 |
/** |
|
24 |
* logger. |
|
25 |
*/ |
|
26 |
private static final Log log = LogFactory.getLog(QueryUserActionDbJobNode.class); |
|
27 |
|
|
28 |
private String db; |
|
29 |
private String dbParam; |
|
30 |
private String dbProperty; |
|
31 |
|
|
32 |
private String sql; |
|
33 |
private String sqlForSize; |
|
34 |
private String xslt; |
|
35 |
private String outputEprParam; |
|
36 |
|
|
37 |
@Resource |
|
38 |
private UniqueServiceLocator serviceLocator; |
|
39 |
|
|
40 |
/** The dedup config sequence param. */ |
|
41 |
private String dedupConfigSequenceParam; |
|
42 |
|
|
43 |
@Override |
|
44 |
protected String execute(final NodeToken token) throws Exception { |
|
45 |
|
|
46 |
final String actionSetId = getActionSetId(token); |
|
47 |
if (StringUtils.isBlank(actionSetId)) throw new IllegalArgumentException("empty action set id"); |
|
48 |
|
|
49 |
final String sqlText = String.format(readFromClasspath(getSql()), actionSetId); |
|
50 |
log.info("executing query: " + sqlText); |
|
51 |
|
|
52 |
final String sqlTextForsize = String.format(StringUtils.isBlank(getSqlForSize()) ? "" : readFromClasspath(getSqlForSize()), actionSetId); |
|
53 |
if (!sqlTextForsize.isEmpty()) { |
|
54 |
log.info("using sql for size: " + sqlTextForsize); |
|
55 |
} |
|
56 |
|
|
57 |
W3CEndpointReference epr = null; |
|
58 |
|
|
59 |
final DatabaseService dbService = serviceLocator.getService(DatabaseService.class); |
|
60 |
|
|
61 |
if (StringUtils.isNotBlank(xslt)) { |
|
62 |
final String xsltText = IOUtils.toString(getClass().getResourceAsStream(xslt)); |
|
63 |
|
|
64 |
if (StringUtils.isBlank(sqlForSize)) { |
|
65 |
epr = dbService.xsltSearchSQL(findDb(token), sqlText, xsltText); |
|
66 |
} else { |
|
67 |
epr = dbService.alternativeXsltSearchSQL(findDb(token), sqlText, sqlTextForsize, xsltText); |
|
68 |
} |
|
69 |
} else { |
|
70 |
if (StringUtils.isBlank(sqlForSize)) { |
|
71 |
epr = dbService.searchSQL(findDb(token), sqlText); |
|
72 |
} else { |
|
73 |
epr = dbService.alternativeSearchSQL(findDb(token), sqlText, sqlTextForsize); |
|
74 |
} |
|
75 |
} |
|
76 |
|
|
77 |
token.getEnv().setAttribute(outputEprParam, epr.toString()); |
|
78 |
|
|
79 |
return Arc.DEFAULT_ARC; |
|
80 |
} |
|
81 |
|
|
82 |
private String findDb(final NodeToken token) { |
|
83 |
if ((dbParam != null) && !dbParam.isEmpty()) return token.getEnv().getAttribute(dbParam); |
|
84 |
else if ((dbProperty != null) && !dbProperty.isEmpty()) return getPropertyFetcher().getProperty(dbProperty); |
|
85 |
else return db; |
|
86 |
} |
|
87 |
|
|
88 |
private String readFromClasspath(final String path) throws IOException { |
|
89 |
return IOUtils.toString(getClass().getResourceAsStream(path)); |
|
90 |
} |
|
91 |
|
|
92 |
/** |
|
93 |
* Gets the action set id. |
|
94 |
* |
|
95 |
* @param token |
|
96 |
* the token |
|
97 |
* @return the action set id |
|
98 |
*/ |
|
99 |
private String getActionSetId(final NodeToken token) { |
|
100 |
final String json = token.getEnv().getAttribute(getDedupConfigSequenceParam()); |
|
101 |
final DedupConfigurationOrchestration dco = DedupConfigurationOrchestration.fromJSON(json); |
|
102 |
final String actionSetId = dco.getActionSetId(); |
|
103 |
log.info("found actionSetId in workflow env: " + actionSetId); |
|
104 |
return actionSetId; |
|
105 |
} |
|
106 |
|
|
107 |
public String getDb() { |
|
108 |
return db; |
|
109 |
} |
|
110 |
|
|
111 |
public void setDb(final String db) { |
|
112 |
this.db = db; |
|
113 |
} |
|
114 |
|
|
115 |
public String getDbParam() { |
|
116 |
return dbParam; |
|
117 |
} |
|
118 |
|
|
119 |
public void setDbParam(final String dbParam) { |
|
120 |
this.dbParam = dbParam; |
|
121 |
} |
|
122 |
|
|
123 |
public String getSql() { |
|
124 |
return sql; |
|
125 |
} |
|
126 |
|
|
127 |
public void setSql(final String sql) { |
|
128 |
this.sql = sql; |
|
129 |
} |
|
130 |
|
|
131 |
public String getXslt() { |
|
132 |
return xslt; |
|
133 |
} |
|
134 |
|
|
135 |
public void setXslt(final String xslt) { |
|
136 |
this.xslt = xslt; |
|
137 |
} |
|
138 |
|
|
139 |
public String getOutputEprParam() { |
|
140 |
return outputEprParam; |
|
141 |
} |
|
142 |
|
|
143 |
public void setOutputEprParam(final String outputEprParam) { |
|
144 |
this.outputEprParam = outputEprParam; |
|
145 |
} |
|
146 |
|
|
147 |
public String getDbProperty() { |
|
148 |
return dbProperty; |
|
149 |
} |
|
150 |
|
|
151 |
public void setDbProperty(final String dbProperty) { |
|
152 |
this.dbProperty = dbProperty; |
|
153 |
} |
|
154 |
|
|
155 |
public String getSqlForSize() { |
|
156 |
return sqlForSize; |
|
157 |
} |
|
158 |
|
|
159 |
public void setSqlForSize(final String sqlForSize) { |
|
160 |
this.sqlForSize = sqlForSize; |
|
161 |
} |
|
162 |
|
|
163 |
public String getDedupConfigSequenceParam() { |
|
164 |
return dedupConfigSequenceParam; |
|
165 |
} |
|
166 |
|
|
167 |
public void setDedupConfigSequenceParam(final String dedupConfigSequenceParam) { |
|
168 |
this.dedupConfigSequenceParam = dedupConfigSequenceParam; |
|
169 |
} |
|
170 |
|
|
171 |
} |
modules/dnet-deduplication/branches/tree-dedup/src/main/java/eu/dnetlib/msro/workflows/dedup/PrepareDedupIndexJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workflows.dedup; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
import java.util.Map; |
|
5 |
|
|
6 |
import javax.annotation.Resource; |
|
7 |
|
|
8 |
import org.apache.commons.lang.StringUtils; |
|
9 |
import org.apache.commons.logging.Log; |
|
10 |
import org.apache.commons.logging.LogFactory; |
|
11 |
import org.springframework.beans.factory.annotation.Autowired; |
|
12 |
import org.springframework.beans.factory.annotation.Required; |
|
13 |
|
|
14 |
import com.google.common.collect.Iterables; |
|
15 |
import com.google.gson.Gson; |
|
16 |
import com.googlecode.sarasvati.Arc; |
|
17 |
import com.googlecode.sarasvati.NodeToken; |
|
18 |
|
|
19 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; |
|
20 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
|
21 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
|
22 |
import eu.dnetlib.miscutils.datetime.DateUtils; |
|
23 |
import eu.dnetlib.msro.workflows.dedup.conf.DedupConfigurationOrchestrationLoader; |
|
24 |
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode; |
|
25 |
|
|
26 |
public class PrepareDedupIndexJobNode extends SimpleJobNode { |
|
27 |
|
|
28 |
/** |
|
29 |
* logger. |
|
30 |
*/ |
|
31 |
private static final Log log = LogFactory.getLog(PrepareDedupIndexJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
32 |
|
|
33 |
@Resource |
|
34 |
private UniqueServiceLocator serviceLocator; |
|
35 |
|
|
36 |
@Autowired |
|
37 |
private DedupConfigurationOrchestrationLoader dedupOrchestrationLoader; |
|
38 |
|
|
39 |
private String rottenRecordsPathParam; |
|
40 |
|
|
41 |
private String hbaseTable; |
|
42 |
|
|
43 |
private String dedupConfig; |
|
44 |
|
|
45 |
@Override |
|
46 |
protected String execute(final NodeToken token) throws Exception { |
|
47 |
|
|
48 |
log.info("start preparing job"); |
|
49 |
|
|
50 |
final String fields = getFields(env("format", token), env("layout", token)); |
|
51 |
token.getEnv().setAttribute("index.fields", fields); |
|
52 |
|
|
53 |
if (!StringUtils.isBlank(getRottenRecordsPathParam())) { |
|
54 |
token.getEnv().setAttribute(getRottenRecordsPathParam(), "/tmp" + getFileName(token, "rottenrecords")); |
|
55 |
} |
|
56 |
|
|
57 |
token.getEnv().setAttribute("index.solr.url", getIndexSolrUrlZk()); |
|
58 |
token.getEnv().setAttribute("index.solr.collection", getCollectionName(token)); |
|
59 |
|
|
60 |
token.getEnv().setAttribute("index.shutdown.wait.time", getIndexSolrShutdownWait()); |
|
61 |
token.getEnv().setAttribute("index.buffer.flush.threshold", getIndexBufferFlushTreshold()); |
|
62 |
token.getEnv().setAttribute("index.solr.sim.mode", isFeedingSimulationMode()); |
|
63 |
|
|
64 |
token.getEnv().setAttribute("index.feed.timestamp", DateUtils.now_ISO8601()); |
|
65 |
|
|
66 |
@SuppressWarnings("unchecked") |
|
67 |
final List<Map<String, String>> sets = new Gson().fromJson(token.getEnv().getAttribute("sets"), List.class); |
|
68 |
|
|
69 |
token.getEnv().setAttribute("actionset", Iterables.getOnlyElement(sets).get("set")); |
|
70 |
token.getEnv().setAttribute("entityType", token.getEnv().getAttribute("entityType")); |
|
71 |
token.getEnv().setAttribute("entityTypeId", token.getEnv().getAttribute("entityTypeId")); |
|
72 |
|
|
73 |
return Arc.DEFAULT_ARC; |
|
74 |
} |
|
75 |
|
|
76 |
private String getFields(final String format, final String layout) throws ISLookUpException { |
|
77 |
return isLookup(String |
|
78 |
.format("<FIELDS>{for $x in collection('')//RESOURCE_PROFILE[.//RESOURCE_TYPE/@value = 'MDFormatDSResourceType' and .//NAME='%s']//LAYOUT[@name='%s']/FIELDS/FIELD return $x[string(@path)]}</FIELDS>", |
|
79 |
format, layout)); |
|
80 |
} |
|
81 |
|
|
82 |
public String getIndexSolrUrlZk() throws ISLookUpException { |
|
83 |
return isLookup("for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='IndexServiceResourceType'] return $x//PROTOCOL[./@name='solr']/@address/string()"); |
|
84 |
} |
|
85 |
|
|
86 |
public String getIndexSolrShutdownWait() throws ISLookUpException { |
|
87 |
return queryForServiceProperty("solr:feedingShutdownTolerance"); |
|
88 |
} |
|
89 |
|
|
90 |
public String getIndexBufferFlushTreshold() throws ISLookUpException { |
|
91 |
return queryForServiceProperty("solr:feedingBufferFlushThreshold"); |
|
92 |
} |
|
93 |
|
|
94 |
public String isFeedingSimulationMode() throws ISLookUpException { |
|
95 |
return queryForServiceProperty("solr:feedingSimulationMode"); |
|
96 |
} |
|
97 |
|
|
98 |
private String queryForServiceProperty(final String key) throws ISLookUpException { |
|
99 |
return isLookup("for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='IndexServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='" |
|
100 |
+ key + "']/@value/string()"); |
|
101 |
} |
|
102 |
|
|
103 |
private String isLookup(final String xquery) throws ISLookUpException { |
|
104 |
log.debug("quering for service property: " + xquery); |
|
105 |
final String res = serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(xquery); |
|
106 |
if (StringUtils.isBlank(res)) throw new IllegalStateException("unable to find unique service property, xquery: " + xquery); |
|
107 |
return res; |
|
108 |
} |
|
109 |
|
|
110 |
private String getFileName(final NodeToken token, final String fileNamePrefix) { |
|
111 |
return "/" + fileNamePrefix + "_" + getHbaseTable() + "_" + token.getEnv().getAttribute("format") + ".seq"; |
|
112 |
} |
|
113 |
|
|
114 |
private String getCollectionName(final NodeToken token) { |
|
115 |
return env("format", token) + "-" + env("layout", token) + "-" + env("interpretation", token); |
|
116 |
} |
|
117 |
|
|
118 |
private String env(final String s, final NodeToken token) { |
|
119 |
return token.getEnv().getAttribute(s); |
|
120 |
} |
|
121 |
|
|
122 |
public String getHbaseTable() { |
|
123 |
return hbaseTable; |
|
124 |
} |
|
125 |
|
|
126 |
@Required |
|
127 |
public void setHbaseTable(final String hbaseTable) { |
|
128 |
this.hbaseTable = hbaseTable; |
|
129 |
} |
|
130 |
|
|
131 |
public String getRottenRecordsPathParam() { |
|
132 |
return rottenRecordsPathParam; |
|
133 |
} |
|
134 |
|
|
135 |
public void setRottenRecordsPathParam(final String rottenRecordsPathParam) { |
|
136 |
this.rottenRecordsPathParam = rottenRecordsPathParam; |
|
137 |
} |
|
138 |
|
|
139 |
public String getDedupConfig() { |
|
140 |
return dedupConfig; |
|
141 |
} |
|
142 |
|
|
143 |
public void setDedupConfig(final String dedupConfig) { |
|
144 |
this.dedupConfig = dedupConfig; |
|
145 |
} |
|
146 |
|
|
147 |
} |
modules/dnet-deduplication/branches/tree-dedup/src/main/java/eu/dnetlib/msro/workflows/dedup/DedupConfigurationAwareJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workflows.dedup; |
|
2 |
|
|
3 |
import com.googlecode.sarasvati.NodeToken; |
|
4 |
import eu.dnetlib.enabling.tools.blackboard.BlackboardJob; |
|
5 |
import eu.dnetlib.msro.workflows.dedup.conf.DedupConfigurationOrchestration; |
|
6 |
import eu.dnetlib.msro.workflows.hadoop.SubmitHadoopJobNode; |
|
7 |
import eu.dnetlib.pace.config.DedupConfig; |
|
8 |
import org.apache.commons.lang.StringUtils; |
|
9 |
import org.apache.commons.logging.Log; |
|
10 |
import org.apache.commons.logging.LogFactory; |
|
11 |
|
|
12 |
public class DedupConfigurationAwareJobNode extends SubmitHadoopJobNode { |
|
13 |
|
|
14 |
private static final Log log = LogFactory.getLog(DedupConfigurationAwareJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
15 |
|
|
16 |
private String dedupConfigSequenceParam; |
|
17 |
|
|
18 |
@Override |
|
19 |
protected void prepareJob(final BlackboardJob job, final NodeToken token) throws Exception { |
|
20 |
super.prepareJob(job, token); |
|
21 |
|
|
22 |
final DedupConfigurationOrchestration dedupConfigurations = dedupConfigurations(token); |
|
23 |
final DedupConfig currentConf = dedupConfigurations.getConfigurations().peek(); |
|
24 |
|
|
25 |
log.debug("using dedup configuration: '" + currentConf + "'"); |
|
26 |
|
|
27 |
job.getParameters().put("dedup.conf", currentConf.toString()); |
|
28 |
|
|
29 |
token.getEnv().setAttribute("dedup.conf", currentConf.toString()); |
|
30 |
} |
|
31 |
|
|
32 |
protected DedupConfigurationOrchestration dedupConfigurations(final NodeToken token) { |
|
33 |
final String configs = token.getFullEnv().getAttribute(getDedupConfigSequenceParam()); |
|
34 |
if ((configs == null) || configs.trim().isEmpty()) |
|
35 |
throw new IllegalStateException("Cannot find dedup configurations in workflow env: '" + getDedupConfigSequenceParam() + "'"); |
|
36 |
|
|
37 |
return DedupConfigurationOrchestration.fromJSON(configs); |
|
38 |
} |
|
39 |
|
|
40 |
protected String getEntityType(final NodeToken token) { |
|
41 |
final String entityType = token.getEnv().getAttribute("entityType"); |
|
42 |
if (StringUtils.isBlank(entityType)) throw new IllegalStateException("Cannot find 'entityType' parameter in workflow env."); |
|
43 |
return entityType; |
|
44 |
} |
|
45 |
|
|
46 |
// ////////// |
|
47 |
|
|
48 |
public String getDedupConfigSequenceParam() { |
|
49 |
return dedupConfigSequenceParam; |
|
50 |
} |
|
51 |
|
|
52 |
public void setDedupConfigSequenceParam(final String dedupConfigSequenceParam) { |
|
53 |
this.dedupConfigSequenceParam = dedupConfigSequenceParam; |
|
54 |
} |
|
55 |
|
|
56 |
} |
modules/dnet-deduplication/branches/tree-dedup/src/main/java/eu/dnetlib/msro/workflows/dedup/FinalizeDedupIndexJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workflows.dedup; |
|
2 |
|
|
3 |
import static java.lang.String.format; |
|
4 |
|
|
5 |
import org.apache.commons.lang.StringUtils; |
|
6 |
import org.apache.commons.logging.Log; |
|
7 |
import org.apache.commons.logging.LogFactory; |
|
8 |
|
|
9 |
import com.googlecode.sarasvati.NodeToken; |
|
10 |
|
|
11 |
import eu.dnetlib.data.provision.index.rmi.IndexService; |
|
12 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException; |
|
13 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; |
|
14 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
|
15 |
import eu.dnetlib.enabling.tools.blackboard.BlackboardJob; |
|
16 |
import eu.dnetlib.functionality.index.solr.feed.InputDocumentFactory; |
|
17 |
import eu.dnetlib.msro.rmi.MSROException; |
|
18 |
import eu.dnetlib.msro.workflows.nodes.BlackboardJobNode; |
|
19 |
|
|
20 |
public class FinalizeDedupIndexJobNode extends BlackboardJobNode { |
|
21 |
|
|
22 |
private static final Log log = LogFactory.getLog(FinalizeDedupIndexJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
23 |
|
|
24 |
@Override |
|
25 |
protected String obtainServiceId(final NodeToken token) { |
|
26 |
return getServiceLocator().getServiceId(IndexService.class); |
|
27 |
} |
|
28 |
|
|
29 |
@Override |
|
30 |
protected void prepareJob(final BlackboardJob job, final NodeToken token) throws Exception { |
|
31 |
final String indexDsId = getEnvParam(token, "index_id"); |
|
32 |
|
|
33 |
log.info("preparing blackboard job DELETE_BY_QUERY index: " + indexDsId); |
|
34 |
|
|
35 |
final String backendId = getBackendId(indexDsId); |
|
36 |
if (StringUtils.isBlank(backendId)) |
|
37 |
throw new MSROException("empty index backend Id"); |
|
38 |
|
|
39 |
job.setAction("DELETE_BY_QUERY"); |
|
40 |
job.getParameters().put("id", indexDsId); |
|
41 |
job.getParameters().put("backend_Id", backendId); |
|
42 |
job.getParameters().put("query", |
|
43 |
buildQuery(getEnvParam(token, "entityType"), getEnvParam(token, "index.feed.timestamp"), getEnvParam(token, "actionset"))); |
|
44 |
} |
|
45 |
|
|
46 |
private String buildQuery(final String entityType, final String version, final String actionset) { |
|
47 |
final String query = |
|
48 |
String.format("__dsversion:{* TO %s} AND oaftype:%s AND actionset:%s", InputDocumentFactory.getParsedDateField(version), entityType, actionset); |
|
49 |
|
|
50 |
log.info("delete by query: " + query); |
|
51 |
|
|
52 |
return query; |
|
53 |
} |
|
54 |
|
|
55 |
private String getEnvParam(final NodeToken token, final String name) throws MSROException { |
|
56 |
final String value = token.getEnv().getAttribute(name); |
|
57 |
|
|
58 |
if (StringUtils.isBlank(value)) |
|
59 |
throw new MSROException(format("unable to finalize index feeding, cannot find property '%s' in the workflow env.", name)); |
|
60 |
|
|
61 |
return value; |
|
62 |
} |
|
63 |
|
|
64 |
public String getBackendId(final String indexDsId) throws ISLookUpDocumentNotFoundException, ISLookUpException { |
|
65 |
return getServiceLocator().getService(ISLookUpService.class).getResourceProfileByQuery( |
|
66 |
"//RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value='" + indexDsId + "']//BACKEND/text()"); |
|
67 |
} |
|
68 |
|
|
69 |
} |
modules/dnet-deduplication/branches/tree-dedup/src/main/java/eu/dnetlib/msro/workflows/dedup/SetHdfsPathJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workflows.dedup; |
|
2 |
|
|
3 |
import com.googlecode.sarasvati.Arc; |
|
4 |
import com.googlecode.sarasvati.NodeToken; |
|
5 |
|
|
6 |
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode; |
|
7 |
|
|
8 |
public class SetHdfsPathJobNode extends SimpleJobNode { |
|
9 |
|
|
10 |
private String path; |
|
11 |
private String pathParam; |
|
12 |
|
|
13 |
@Override |
|
14 |
protected String execute(final NodeToken token) throws Exception { |
|
15 |
|
|
16 |
token.getEnv().setAttribute(getPathParam(), getPath()); |
|
17 |
|
|
18 |
return Arc.DEFAULT_ARC; |
|
19 |
} |
|
20 |
|
|
21 |
public String getPath() { |
|
22 |
return path; |
|
23 |
} |
|
24 |
|
|
25 |
public void setPath(final String path) { |
|
26 |
this.path = path; |
|
27 |
} |
|
28 |
|
|
29 |
public String getPathParam() { |
|
30 |
return pathParam; |
|
31 |
} |
|
32 |
|
|
33 |
public void setPathParam(final String pathParam) { |
|
34 |
this.pathParam = pathParam; |
|
35 |
} |
|
36 |
|
|
37 |
} |
modules/dnet-deduplication/branches/tree-dedup/src/main/java/eu/dnetlib/msro/workflows/dedup/DedupConfigurationSetterJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workflows.dedup; |
|
2 |
|
|
3 |
import org.apache.commons.lang.StringUtils; |
|
4 |
import org.springframework.beans.factory.annotation.Autowired; |
|
5 |
|
|
6 |
import com.googlecode.sarasvati.Arc; |
|
7 |
import com.googlecode.sarasvati.NodeToken; |
|
8 |
|
|
9 |
import eu.dnetlib.msro.workflows.dedup.conf.DedupConfigurationOrchestration; |
|
10 |
import eu.dnetlib.msro.workflows.dedup.conf.DedupConfigurationOrchestrationLoader; |
|
11 |
import eu.dnetlib.msro.workflows.nodes.AsyncJobNode; |
|
12 |
|
|
13 |
public class DedupConfigurationSetterJobNode extends AsyncJobNode { |
|
14 |
|
|
15 |
private String dedupConfigSequence; |
|
16 |
|
|
17 |
private String dedupConfigSequenceParam; |
|
18 |
|
|
19 |
@Autowired |
|
20 |
private DedupConfigurationOrchestrationLoader dedupOrchestrationLoader; |
|
21 |
|
|
22 |
@Override |
|
23 |
protected String execute(final NodeToken token) throws Exception { |
|
24 |
|
|
25 |
if (StringUtils.isBlank(getDedupConfigSequence())) throw new IllegalArgumentException("missing configuration sequence"); |
|
26 |
|
|
27 |
final DedupConfigurationOrchestration dedupOrchestration = dedupOrchestrationLoader.loadByActionSetId(getDedupConfigSequence()); |
|
28 |
|
|
29 |
token.getEnv().setAttribute("entityType", dedupOrchestration.getEntity().getName()); |
|
30 |
token.getEnv().setAttribute("entityTypeId", dedupOrchestration.getEntity().getCode()); |
|
31 |
|
|
32 |
token.getEnv().setAttribute(getDedupConfigSequenceParam(), dedupOrchestration.toString()); |
|
33 |
|
Also available in: Unified diff
create branch for tree-dedup