Revision 33553
Added by Marek Horst over 9 years ago
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/deploy.info | ||
---|---|---|
1 |
[ |
|
2 |
{ |
|
3 |
"type_source": "SVN", |
|
4 |
"goal": "package -U -T 4C source:jar", |
|
5 |
"url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet40/modules/icm-iis-collapsers/trunk/", |
|
6 |
"deploy_repository": "dnet4-snapshots", |
|
7 |
"version": "4", |
|
8 |
"mail": "m.horst@icm.edu.pl", |
|
9 |
"deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet4-snapshots", |
|
10 |
"name": "icm-iis-collapsers" |
|
11 |
}, |
|
12 |
{ |
|
13 |
"type_source": "SVN", |
|
14 |
"goal": "clean verify -U -e -X", |
|
15 |
"url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet40/modules/icm-iis-collapsers/trunk/", |
|
16 |
"nightly" : "true", |
|
17 |
"cron" : "H H * * *", |
|
18 |
"version": "4", |
|
19 |
"mail": "d.tkaczyk@icm.edu.pl,m.horst@icm.edu.pl", |
|
20 |
"name": "icm-iis-collapsers-embedded-integration-test" |
|
21 |
} |
|
22 |
] |
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/basic/BestFilledCollapserTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.collapsers.basic; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.collapsers.SampleData; |
|
4 |
import eu.dnetlib.iis.importer.schemas.DocumentMetadata; |
|
5 |
import java.util.ArrayList; |
|
6 |
import java.util.List; |
|
7 |
import static org.junit.Assert.assertNull; |
|
8 |
import org.junit.Test; |
|
9 |
import org.python.google.common.collect.Lists; |
|
10 |
|
|
11 |
/** |
|
12 |
* |
|
13 |
* @author Dominika Tkaczyk |
|
14 |
* |
|
15 |
*/ |
|
16 |
public class BestFilledCollapserTest { |
|
17 |
|
|
18 |
public static final List<DocumentMetadata> emptyList = |
|
19 |
new ArrayList<DocumentMetadata>(); |
|
20 |
|
|
21 |
public static final List<DocumentMetadata> list1 = |
|
22 |
Lists.newArrayList(SampleData.metadataRecord11); |
|
23 |
|
|
24 |
public static final List<DocumentMetadata> list2 = |
|
25 |
Lists.newArrayList(SampleData.metadataRecord12); |
|
26 |
|
|
27 |
public static final List<DocumentMetadata> list123 = |
|
28 |
Lists.newArrayList(SampleData.metadataRecord11, SampleData.metadataRecord12, SampleData.metadataRecord13); |
|
29 |
|
|
30 |
public static final List<DocumentMetadata> list321 = |
|
31 |
Lists.newArrayList(SampleData.metadataRecord13, SampleData.metadataRecord12, SampleData.metadataRecord11); |
|
32 |
|
|
33 |
|
|
34 |
@Test |
|
35 |
public void testBestFilledEmpty() throws Exception { |
|
36 |
BestFilledCollapser<DocumentMetadata> collapser = new BestFilledCollapser<DocumentMetadata>(); |
|
37 |
|
|
38 |
assertNull(collapser.collapse(null)); |
|
39 |
assertNull(collapser.collapse(emptyList)); |
|
40 |
} |
|
41 |
|
|
42 |
@Test |
|
43 |
public void testBestFilledDefaultFieldSet() throws Exception { |
|
44 |
BestFilledCollapser<DocumentMetadata> collapser = new BestFilledCollapser<DocumentMetadata>(); |
|
45 |
|
|
46 |
SampleData.assertEqualRecords( |
|
47 |
list1, |
|
48 |
collapser.collapse(list1)); |
|
49 |
SampleData.assertEqualRecords( |
|
50 |
list1, |
|
51 |
collapser.collapse(list123)); |
|
52 |
SampleData.assertEqualRecords( |
|
53 |
list1, |
|
54 |
collapser.collapse(list321)); |
|
55 |
} |
|
56 |
|
|
57 |
@Test |
|
58 |
public void testBestFilled() throws Exception { |
|
59 |
BestFilledCollapser<DocumentMetadata> collapser = new BestFilledCollapser<DocumentMetadata>(); |
|
60 |
collapser.setFields(SampleData.significantFields); |
|
61 |
|
|
62 |
SampleData.assertEqualRecords( |
|
63 |
list1, |
|
64 |
collapser.collapse(list1)); |
|
65 |
SampleData.assertEqualRecords( |
|
66 |
list2, |
|
67 |
collapser.collapse(list123)); |
|
68 |
SampleData.assertEqualRecords( |
|
69 |
list2, |
|
70 |
collapser.collapse(list321)); |
|
71 |
} |
|
72 |
|
|
73 |
} |
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/basic/DocumentTextCollapserTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.collapsers.basic; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.collapsers.SampleData; |
|
4 |
import eu.dnetlib.iis.metadataextraction.schemas.DocumentText; |
|
5 |
import java.util.ArrayList; |
|
6 |
import java.util.List; |
|
7 |
import static org.junit.Assert.assertNull; |
|
8 |
import org.junit.Test; |
|
9 |
import org.python.google.common.collect.Lists; |
|
10 |
|
|
11 |
/** |
|
12 |
* |
|
13 |
* @author Dominika Tkaczyk |
|
14 |
* |
|
15 |
*/ |
|
16 |
public class DocumentTextCollapserTest { |
|
17 |
|
|
18 |
public static final List<DocumentText> oneElementList = |
|
19 |
Lists.newArrayList( |
|
20 |
DocumentText.newBuilder().setId("id").setText("This is text").build() |
|
21 |
); |
|
22 |
|
|
23 |
public static final List<DocumentText> list = |
|
24 |
Lists.newArrayList( |
|
25 |
DocumentText.newBuilder().setId("id").setText("This is text").build(), |
|
26 |
DocumentText.newBuilder().setId("id").setText("This is another text").build(), |
|
27 |
DocumentText.newBuilder().setId("id").setText("This is a duplicated text").build() |
|
28 |
); |
|
29 |
|
|
30 |
public static final List<DocumentText> collapsedList = |
|
31 |
Lists.newArrayList( |
|
32 |
DocumentText.newBuilder().setId("id").setText("This is text\n\nThis is another text\n\nThis is a duplicated text").build() |
|
33 |
); |
|
34 |
|
|
35 |
|
|
36 |
@Test |
|
37 |
public void testDocumentTextCollapserEmpty() throws Exception { |
|
38 |
DocumentTextCollapser collapser = new DocumentTextCollapser(); |
|
39 |
|
|
40 |
assertNull(collapser.collapse(null)); |
|
41 |
assertNull(collapser.collapse(new ArrayList<DocumentText>())); |
|
42 |
} |
|
43 |
|
|
44 |
@Test |
|
45 |
public void testDocumentTextCollapser() throws Exception { |
|
46 |
DocumentTextCollapser collapser = new DocumentTextCollapser(); |
|
47 |
|
|
48 |
SampleData.assertEqualRecords( |
|
49 |
oneElementList, |
|
50 |
collapser.collapse(oneElementList)); |
|
51 |
SampleData.assertEqualRecords( |
|
52 |
collapsedList, |
|
53 |
collapser.collapse(list)); |
|
54 |
} |
|
55 |
|
|
56 |
} |
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/basic/WorkflowTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.collapsers.basic; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.IntegrationTest; |
|
4 |
import eu.dnetlib.iis.core.AbstractWorkflowTestCase; |
|
5 |
import org.junit.Test; |
|
6 |
import org.junit.experimental.categories.Category; |
|
7 |
|
|
8 |
/** |
|
9 |
* @author Dominika Tkaczyk |
|
10 |
* @author Michal Oniszczuk |
|
11 |
*/ |
|
12 |
@Category(IntegrationTest.class) |
|
13 |
public class WorkflowTest extends AbstractWorkflowTestCase { |
|
14 |
|
|
15 |
@Test |
|
16 |
public void testDefaultWorkflow() throws Exception { |
|
17 |
runWorkflow("eu/dnetlib/iis/collapsers/basic_collapser/default/oozie_app"); |
|
18 |
} |
|
19 |
|
|
20 |
/* |
|
21 |
@Test |
|
22 |
public void testCitationWorkflow() throws Exception { |
|
23 |
runWorkflow("eu/dnetlib/iis/collapsers/collapser/citation/oozie_app"); |
|
24 |
} |
|
25 |
*/ |
|
26 |
@Test |
|
27 |
public void testDocumentTextWorkflow() throws Exception { |
|
28 |
runWorkflow("eu/dnetlib/iis/collapsers/basic_collapser/documenttext/oozie_app"); |
|
29 |
} |
|
30 |
|
|
31 |
} |
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/basic/BestFilledMergingCollapserTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.collapsers.basic; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.collapsers.SampleData; |
|
4 |
import eu.dnetlib.iis.importer.schemas.DocumentMetadata; |
|
5 |
import java.util.ArrayList; |
|
6 |
import java.util.List; |
|
7 |
import static org.junit.Assert.assertNull; |
|
8 |
import org.junit.Test; |
|
9 |
import org.python.google.common.collect.Lists; |
|
10 |
|
|
11 |
/** |
|
12 |
* |
|
13 |
* @author Dominika Tkaczyk |
|
14 |
* |
|
15 |
*/ |
|
16 |
public class BestFilledMergingCollapserTest { |
|
17 |
|
|
18 |
public static final List<DocumentMetadata> emptyList = |
|
19 |
new ArrayList<DocumentMetadata>(); |
|
20 |
|
|
21 |
public static final List<DocumentMetadata> list1 = |
|
22 |
Lists.newArrayList(SampleData.metadataRecord11); |
|
23 |
|
|
24 |
public static final List<DocumentMetadata> mergedList12 = |
|
25 |
Lists.newArrayList(SampleData.mergedRecord1112); |
|
26 |
|
|
27 |
public static final List<DocumentMetadata> mergedList21 = |
|
28 |
Lists.newArrayList(SampleData.mergedRecord1211); |
|
29 |
|
|
30 |
public static final List<DocumentMetadata> list123 = |
|
31 |
Lists.newArrayList(SampleData.metadataRecord11, SampleData.metadataRecord12, SampleData.metadataRecord13); |
|
32 |
|
|
33 |
public static final List<DocumentMetadata> list321 = |
|
34 |
Lists.newArrayList(SampleData.metadataRecord13, SampleData.metadataRecord12, SampleData.metadataRecord11); |
|
35 |
|
|
36 |
|
|
37 |
@Test |
|
38 |
public void testBestFilledEmpty() throws Exception { |
|
39 |
BestFilledMergingCollapser<DocumentMetadata> collapser = new BestFilledMergingCollapser<DocumentMetadata>(); |
|
40 |
|
|
41 |
assertNull(collapser.collapse(null)); |
|
42 |
assertNull(collapser.collapse(emptyList)); |
|
43 |
} |
|
44 |
|
|
45 |
@Test |
|
46 |
public void testBestFilledMergingDefaultFieldSet() throws Exception { |
|
47 |
BestFilledMergingCollapser<DocumentMetadata> collapser = new BestFilledMergingCollapser<DocumentMetadata>(); |
|
48 |
|
|
49 |
SampleData.assertEqualRecords( |
|
50 |
list1, |
|
51 |
collapser.collapse(list1)); |
|
52 |
SampleData.assertEqualRecords( |
|
53 |
mergedList12, |
|
54 |
collapser.collapse(list321)); |
|
55 |
} |
|
56 |
|
|
57 |
@Test |
|
58 |
public void testBestFilledMerging() throws Exception { |
|
59 |
BestFilledMergingCollapser<DocumentMetadata> collapser = new BestFilledMergingCollapser<DocumentMetadata>(); |
|
60 |
collapser.setFields(SampleData.significantFields); |
|
61 |
|
|
62 |
SampleData.assertEqualRecords( |
|
63 |
list1, |
|
64 |
collapser.collapse(list1)); |
|
65 |
SampleData.assertEqualRecords( |
|
66 |
mergedList21, |
|
67 |
collapser.collapse(list321)); |
|
68 |
} |
|
69 |
|
|
70 |
} |
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/CollapserUtilsTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.collapsers; |
|
2 |
|
|
3 |
import java.util.ArrayList; |
|
4 |
import java.util.List; |
|
5 |
import org.apache.avro.generic.IndexedRecord; |
|
6 |
import static org.junit.Assert.*; |
|
7 |
import org.junit.Test; |
|
8 |
import org.python.google.common.collect.Lists; |
|
9 |
|
|
10 |
/** |
|
11 |
* |
|
12 |
* @author Dominika Tkaczyk |
|
13 |
* |
|
14 |
*/ |
|
15 |
public class CollapserUtilsTest { |
|
16 |
|
|
17 |
@Test |
|
18 |
public void testHaveEqualSchema() { |
|
19 |
assertTrue(CollapserUtils.haveEqualSchema(null)); |
|
20 |
assertTrue(CollapserUtils.haveEqualSchema(new ArrayList<IndexedRecord>())); |
|
21 |
assertTrue(CollapserUtils.haveEqualSchema( |
|
22 |
Lists.newArrayList((IndexedRecord)SampleData.envMetadataRecord11, SampleData.envMetadataRecord12))); |
|
23 |
assertFalse(CollapserUtils.haveEqualSchema( |
|
24 |
Lists.newArrayList((IndexedRecord)SampleData.envMetadataRecord11, SampleData.envMetadataRecord12, SampleData.envTextRecord))); |
|
25 |
} |
|
26 |
|
|
27 |
@Test |
|
28 |
public void testIsOriginSchema() { |
|
29 |
assertTrue(CollapserUtils.isEnvelopeSchema(SampleData.envMetadataRecord11.getSchema())); |
|
30 |
assertFalse(CollapserUtils.isEnvelopeSchema(SampleData.textRecord.getSchema())); |
|
31 |
} |
|
32 |
|
|
33 |
@Test |
|
34 |
public void testGetOriginValue() { |
|
35 |
assertEquals("origin1", CollapserUtils.getOriginValue(SampleData.envMetadataRecord11)); |
|
36 |
assertEquals("origin1", CollapserUtils.getOriginValue(SampleData.envTextRecord)); |
|
37 |
} |
|
38 |
|
|
39 |
@Test |
|
40 |
public void testGetDataRecord() { |
|
41 |
SampleData.assertEqualRecords( |
|
42 |
SampleData.metadataRecord11, |
|
43 |
CollapserUtils.getDataRecord(SampleData.envMetadataRecord11)); |
|
44 |
SampleData.assertEqualRecords( |
|
45 |
SampleData.textRecord, |
|
46 |
CollapserUtils.getDataRecord(SampleData.envTextRecord)); |
|
47 |
} |
|
48 |
|
|
49 |
@Test |
|
50 |
public void testGetNumberOfFilledFields() { |
|
51 |
assertEquals(8, CollapserUtils.getNumberOfFilledFields(SampleData.metadataRecord11, null)); |
|
52 |
assertEquals(2, CollapserUtils.getNumberOfFilledFields(SampleData.metadataRecord11, SampleData.significantFields)); |
|
53 |
} |
|
54 |
|
|
55 |
@Test |
|
56 |
public void testSortByFilledFields() { |
|
57 |
List<IndexedRecord> empty = new ArrayList<IndexedRecord>(); |
|
58 |
CollapserUtils.sortByFilledDataFields(empty, SampleData.significantFields); |
|
59 |
assertTrue(empty.isEmpty()); |
|
60 |
|
|
61 |
List<IndexedRecord> oneElement = Lists.newArrayList((IndexedRecord)SampleData.metadataRecord13); |
|
62 |
CollapserUtils.sortByFilledDataFields(oneElement, SampleData.significantFields); |
|
63 |
assertEquals(Lists.newArrayList(SampleData.metadataRecord13), |
|
64 |
oneElement); |
|
65 |
|
|
66 |
List<IndexedRecord> list = Lists.newArrayList( |
|
67 |
(IndexedRecord)SampleData.metadataRecord11, SampleData.metadataRecord12, SampleData.metadataRecord13); |
|
68 |
CollapserUtils.sortByFilledDataFields(list, SampleData.significantFields); |
|
69 |
assertEquals(Lists.newArrayList(SampleData.metadataRecord12, SampleData.metadataRecord11, SampleData.metadataRecord13), |
|
70 |
list); |
|
71 |
|
|
72 |
CollapserUtils.sortByFilledDataFields(list, null); |
|
73 |
assertEquals(Lists.newArrayList(SampleData.metadataRecord11, SampleData.metadataRecord12, SampleData.metadataRecord13), |
|
74 |
list); |
|
75 |
} |
|
76 |
|
|
77 |
@Test |
|
78 |
public void testMerge() { |
|
79 |
SampleData.assertEqualRecords( |
|
80 |
SampleData.mergedRecord1112, |
|
81 |
CollapserUtils.merge(SampleData.metadataRecord11, SampleData.metadataRecord12)); |
|
82 |
|
|
83 |
SampleData.assertEqualRecords( |
|
84 |
SampleData.mergedRecord2221, |
|
85 |
CollapserUtils.merge(SampleData.metadataRecord22, SampleData.metadataRecord21)); |
|
86 |
} |
|
87 |
|
|
88 |
@Test |
|
89 |
public void testGetNestedFieldValue() { |
|
90 |
assertNull(CollapserUtils.getNestedFieldValue(null, null)); |
|
91 |
assertNull(CollapserUtils.getNestedFieldValue(SampleData.metadataRecord11, null)); |
|
92 |
assertNull(CollapserUtils.getNestedFieldValue(null, "notnull")); |
|
93 |
|
|
94 |
assertNull(CollapserUtils.getNestedFieldValue(SampleData.metadataRecord11, "field")); |
|
95 |
assertNull(CollapserUtils.getNestedFieldValue(SampleData.metadataRecord11, "id.field")); |
|
96 |
|
|
97 |
assertEquals("id-1", CollapserUtils.getNestedFieldValue(SampleData.metadataRecord11, "id")); |
|
98 |
assertEquals(1990, CollapserUtils.getNestedFieldValue(SampleData.metadataRecord11, "year")); |
|
99 |
assertEquals(true, CollapserUtils.getNestedFieldValue(SampleData.metadataRecord11, "publicationType.article")); |
|
100 |
|
|
101 |
assertEquals("id-1", CollapserUtils.getNestedFieldValue(SampleData.envMetadataRecord11, "data.id")); |
|
102 |
assertEquals(1990, CollapserUtils.getNestedFieldValue(SampleData.envMetadataRecord11, "data.year")); |
|
103 |
assertEquals(true, CollapserUtils.getNestedFieldValue(SampleData.envMetadataRecord11, "data.publicationType.article")); |
|
104 |
} |
|
105 |
|
|
106 |
} |
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/union/WorkflowTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.collapsers.union; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.IntegrationTest; |
|
4 |
import eu.dnetlib.iis.core.AbstractWorkflowTestCase; |
|
5 |
import org.junit.Test; |
|
6 |
import org.junit.experimental.categories.Category; |
|
7 |
|
|
8 |
/** |
|
9 |
* |
|
10 |
* @author Dominika Tkaczyk |
|
11 |
* |
|
12 |
*/ |
|
13 |
@Category(IntegrationTest.class) |
|
14 |
public class WorkflowTest extends AbstractWorkflowTestCase { |
|
15 |
|
|
16 |
@Test |
|
17 |
public void testWorkflow2Inputs() throws Exception { |
|
18 |
runWorkflow("eu/dnetlib/iis/collapsers/union/input_2/oozie_app"); |
|
19 |
} |
|
20 |
|
|
21 |
@Test |
|
22 |
public void testWorkflow3Inputs() throws Exception { |
|
23 |
runWorkflow("eu/dnetlib/iis/collapsers/union/input_3/oozie_app"); |
|
24 |
} |
|
25 |
|
|
26 |
} |
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/multiple_input/WorkflowTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.collapsers.multiple_input; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.IntegrationTest; |
|
4 |
import eu.dnetlib.iis.core.AbstractWorkflowTestCase; |
|
5 |
import eu.dnetlib.iis.core.WorkflowConfiguration; |
|
6 |
import org.junit.Test; |
|
7 |
import org.junit.experimental.categories.Category; |
|
8 |
|
|
9 |
/** |
|
10 |
* |
|
11 |
* @author Dominika Tkaczyk |
|
12 |
* |
|
13 |
*/ |
|
14 |
@Category(IntegrationTest.class) |
|
15 |
public class WorkflowTest extends AbstractWorkflowTestCase { |
|
16 |
|
|
17 |
@Test |
|
18 |
public void testDefaultWorkflow() throws Exception { |
|
19 |
WorkflowConfiguration wc = new WorkflowConfiguration(); |
|
20 |
wc.setTimeoutInSeconds(720); |
|
21 |
runWorkflow("eu/dnetlib/iis/collapsers/multiple_input_collapser/default/oozie_app", wc); |
|
22 |
} |
|
23 |
|
|
24 |
@Test |
|
25 |
public void testDocumentTextWorkflow() throws Exception { |
|
26 |
WorkflowConfiguration wc = new WorkflowConfiguration(); |
|
27 |
wc.setTimeoutInSeconds(720); |
|
28 |
runWorkflow("eu/dnetlib/iis/collapsers/multiple_input_collapser/documenttext/oozie_app", wc); |
|
29 |
} |
|
30 |
|
|
31 |
} |
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/origins/PMCCitationCollapserTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.collapsers.origins; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertEquals; |
|
4 |
import static org.junit.Assert.assertNotNull; |
|
5 |
|
|
6 |
import java.util.Arrays; |
|
7 |
import java.util.HashMap; |
|
8 |
import java.util.List; |
|
9 |
import java.util.Map; |
|
10 |
|
|
11 |
import org.junit.Test; |
|
12 |
|
|
13 |
import eu.dnetlib.iis.common.citations.schemas.Citation; |
|
14 |
import eu.dnetlib.iis.common.citations.schemas.CitationEntry; |
|
15 |
|
|
16 |
/** |
|
17 |
* {@link PMCCitationCollapser} test class. |
|
18 |
* @author mhorst |
|
19 |
* |
|
20 |
*/ |
|
21 |
public class PMCCitationCollapserTest { |
|
22 |
|
|
23 |
@Test |
|
24 |
public void testCollapsingWhenPmcTargetIdSet() throws Exception { |
|
25 |
PMCCitationCollapser collapser = new PMCCitationCollapser(); |
|
26 |
Map<String,List<Citation>> objects = new HashMap<String, List<Citation>>(); |
|
27 |
|
|
28 |
String sourceId = "sourceId"; |
|
29 |
String pmcTargetId = "pmcTargetId"; |
|
30 |
String cermineTargetId = "cermineTargetId"; |
|
31 |
String text = "citation text"; |
|
32 |
|
|
33 |
objects.put("ingested", Arrays.asList(new Citation[] { |
|
34 |
Citation |
|
35 |
.newBuilder() |
|
36 |
.setSourceDocumentId(sourceId) |
|
37 |
.setEntry( |
|
38 |
CitationEntry.newBuilder() |
|
39 |
.setConfidenceLevel(1f) |
|
40 |
.setDestinationDocumentId(pmcTargetId) |
|
41 |
.setExternalDestinationDocumentIds(new HashMap<CharSequence, CharSequence>()) |
|
42 |
.setRawText(text).build()).build() |
|
43 |
})); |
|
44 |
objects.put("matched", Arrays.asList(new Citation[] { |
|
45 |
Citation |
|
46 |
.newBuilder() |
|
47 |
.setSourceDocumentId(sourceId) |
|
48 |
.setEntry( |
|
49 |
CitationEntry |
|
50 |
.newBuilder() |
|
51 |
.setConfidenceLevel(0.1f) |
|
52 |
.setDestinationDocumentId(cermineTargetId) |
|
53 |
.setExternalDestinationDocumentIds(new HashMap<CharSequence, CharSequence>()) |
|
54 |
.setRawText(text).build()).build() |
|
55 |
})); |
|
56 |
|
|
57 |
List<Citation> results = collapser.collapseBetweenOrigins(objects); |
|
58 |
assertNotNull(results); |
|
59 |
assertEquals(1, results.size()); |
|
60 |
assertEquals(pmcTargetId,results.get(0).getEntry().getDestinationDocumentId()); |
|
61 |
assertEquals(new Float(1f),results.get(0).getEntry().getConfidenceLevel()); |
|
62 |
} |
|
63 |
|
|
64 |
@Test |
|
65 |
public void testCollapsingWhenPmcTargetIdNotSet() throws Exception { |
|
66 |
PMCCitationCollapser collapser = new PMCCitationCollapser(); |
|
67 |
Map<String,List<Citation>> objects = new HashMap<String, List<Citation>>(); |
|
68 |
|
|
69 |
String sourceId = "sourceId"; |
|
70 |
String cermineTargetId = "cermineTargetId"; |
|
71 |
String text = "citation text"; |
|
72 |
|
|
73 |
objects.put("ingested", Arrays.asList(new Citation[] { |
|
74 |
Citation |
|
75 |
.newBuilder() |
|
76 |
.setSourceDocumentId(sourceId) |
|
77 |
.setEntry( |
|
78 |
CitationEntry.newBuilder() |
|
79 |
.setExternalDestinationDocumentIds(new HashMap<CharSequence, CharSequence>()) |
|
80 |
.setRawText(text).build()).build() |
|
81 |
})); |
|
82 |
objects.put("matched", Arrays.asList(new Citation[] { |
|
83 |
Citation |
|
84 |
.newBuilder() |
|
85 |
.setSourceDocumentId(sourceId) |
|
86 |
.setEntry( |
|
87 |
CitationEntry |
|
88 |
.newBuilder() |
|
89 |
.setConfidenceLevel(0.1f) |
|
90 |
.setDestinationDocumentId(cermineTargetId) |
|
91 |
.setExternalDestinationDocumentIds(new HashMap<CharSequence, CharSequence>()) |
|
92 |
.setRawText(text).build()).build() |
|
93 |
})); |
|
94 |
|
|
95 |
List<Citation> results = collapser.collapseBetweenOrigins(objects); |
|
96 |
assertNotNull(results); |
|
97 |
assertEquals(1, results.size()); |
|
98 |
assertEquals(cermineTargetId,results.get(0).getEntry().getDestinationDocumentId()); |
|
99 |
assertEquals(new Float(0.1f),results.get(0).getEntry().getConfidenceLevel()); |
|
100 |
} |
|
101 |
|
|
102 |
@Test |
|
103 |
public void testCollapsingWithDifferentText() throws Exception { |
|
104 |
PMCCitationCollapser collapser = new PMCCitationCollapser(); |
|
105 |
Map<String,List<Citation>> objects = new HashMap<String, List<Citation>>(); |
|
106 |
|
|
107 |
String sourceId = "sourceId"; |
|
108 |
String pmcTargetId = "pmcTargetId"; |
|
109 |
String cermineTargetId = "cermineTargetId"; |
|
110 |
String textPmc = "pmc citation text"; |
|
111 |
String textCermine = "cermine citation text"; |
|
112 |
|
|
113 |
objects.put("ingested", Arrays.asList(new Citation[] { |
|
114 |
Citation |
|
115 |
.newBuilder() |
|
116 |
.setSourceDocumentId(sourceId) |
|
117 |
.setEntry( |
|
118 |
CitationEntry.newBuilder() |
|
119 |
.setConfidenceLevel(1f) |
|
120 |
.setDestinationDocumentId(pmcTargetId) |
|
121 |
.setExternalDestinationDocumentIds(new HashMap<CharSequence, CharSequence>()) |
|
122 |
.setRawText(textPmc).build()).build() |
|
123 |
})); |
|
124 |
objects.put("matched", Arrays.asList(new Citation[] { |
|
125 |
Citation |
|
126 |
.newBuilder() |
|
127 |
.setSourceDocumentId(sourceId) |
|
128 |
.setEntry( |
|
129 |
CitationEntry |
|
130 |
.newBuilder() |
|
131 |
.setConfidenceLevel(0.1f) |
|
132 |
.setDestinationDocumentId(cermineTargetId) |
|
133 |
.setExternalDestinationDocumentIds(new HashMap<CharSequence, CharSequence>()) |
|
134 |
.setRawText(textCermine).build()).build() |
|
135 |
})); |
|
136 |
|
|
137 |
List<Citation> results = collapser.collapseBetweenOrigins(objects); |
|
138 |
assertNotNull(results); |
|
139 |
assertEquals(2, results.size()); |
|
140 |
assertEquals(pmcTargetId,results.get(0).getEntry().getDestinationDocumentId()); |
|
141 |
assertEquals(new Float(1f),results.get(0).getEntry().getConfidenceLevel()); |
|
142 |
assertEquals(textPmc,results.get(0).getEntry().getRawText()); |
|
143 |
|
|
144 |
assertEquals(cermineTargetId,results.get(1).getEntry().getDestinationDocumentId()); |
|
145 |
assertEquals(new Float(0.1f),results.get(1).getEntry().getConfidenceLevel()); |
|
146 |
assertEquals(textCermine,results.get(1).getEntry().getRawText()); |
|
147 |
} |
|
148 |
} |
|
0 | 149 |
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/origins/OriginConfidenceMergingCollapserTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.collapsers.origins; |
|
2 |
|
|
3 |
import com.google.common.collect.Lists; |
|
4 |
import eu.dnetlib.iis.collapsers.SampleData; |
|
5 |
import eu.dnetlib.iis.collapsers.schemas.DocumentMetadataEnvelope; |
|
6 |
import eu.dnetlib.iis.importer.schemas.DocumentMetadata; |
|
7 |
import java.util.ArrayList; |
|
8 |
import java.util.List; |
|
9 |
import static org.junit.Assert.assertNull; |
|
10 |
import org.junit.Test; |
|
11 |
|
|
12 |
/** |
|
13 |
* |
|
14 |
* @author Dominika Tkaczyk |
|
15 |
* |
|
16 |
*/ |
|
17 |
public class OriginConfidenceMergingCollapserTest { |
|
18 |
|
|
19 |
public static final List<DocumentMetadataEnvelope> emptyList = new ArrayList<DocumentMetadataEnvelope>(); |
|
20 |
|
|
21 |
public static final List<DocumentMetadataEnvelope> oneElementList = |
|
22 |
Lists.newArrayList(SampleData.envMetadataRecord11); |
|
23 |
|
|
24 |
public static final List<DocumentMetadata> mergedOneElementList = |
|
25 |
Lists.newArrayList(SampleData.metadataRecord11); |
|
26 |
|
|
27 |
public static final List<DocumentMetadataEnvelope> list = |
|
28 |
Lists.newArrayList(SampleData.envMetadataRecord11, SampleData.envMetadataRecord21); |
|
29 |
|
|
30 |
public static final List<DocumentMetadata> mergedList = |
|
31 |
Lists.newArrayList(SampleData.mergedRecord1121); |
|
32 |
|
|
33 |
|
|
34 |
@Test |
|
35 |
public void testOriginConfidenceMerging() throws Exception { |
|
36 |
OriginConfidenceMergingCollapser<DocumentMetadataEnvelope, DocumentMetadata> collapser = |
|
37 |
new OriginConfidenceMergingCollapser<DocumentMetadataEnvelope, DocumentMetadata>(); |
|
38 |
collapser.setOrigins(SampleData.origins); |
|
39 |
|
|
40 |
assertNull(collapser.collapse(null)); |
|
41 |
assertNull(collapser.collapse(emptyList)); |
|
42 |
SampleData.assertEqualRecords( |
|
43 |
mergedOneElementList, |
|
44 |
collapser.collapse(oneElementList)); |
|
45 |
SampleData.assertEqualRecords( |
|
46 |
mergedList, |
|
47 |
collapser.collapse(list)); |
|
48 |
} |
|
49 |
|
|
50 |
} |
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/origins/DocumentTextCollapserTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.collapsers.origins; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.collapsers.SampleData; |
|
4 |
import eu.dnetlib.iis.collapsers.schemas.DocumentTextEnvelope; |
|
5 |
import eu.dnetlib.iis.metadataextraction.schemas.DocumentText; |
|
6 |
import java.util.ArrayList; |
|
7 |
import java.util.List; |
|
8 |
import static org.junit.Assert.assertNull; |
|
9 |
import org.junit.Test; |
|
10 |
import org.python.google.common.collect.Lists; |
|
11 |
|
|
12 |
/** |
|
13 |
* |
|
14 |
* @author Dominika Tkaczyk |
|
15 |
* |
|
16 |
*/ |
|
17 |
public class DocumentTextCollapserTest { |
|
18 |
|
|
19 |
public static final List<String> origins = Lists.newArrayList("origin1", "origin2"); |
|
20 |
|
|
21 |
public static final DocumentTextEnvelope record1 = DocumentTextEnvelope.newBuilder() |
|
22 |
.setOrigin("origin1") |
|
23 |
.setData(DocumentText.newBuilder().setId("id").setText("This is text").build()) |
|
24 |
.build(); |
|
25 |
|
|
26 |
public static final DocumentTextEnvelope record2 = DocumentTextEnvelope.newBuilder() |
|
27 |
.setOrigin("origin2") |
|
28 |
.setData(DocumentText.newBuilder().setId("id").setText("This is text").build()) |
|
29 |
.build(); |
|
30 |
|
|
31 |
public static final DocumentTextEnvelope record3 = DocumentTextEnvelope.newBuilder() |
|
32 |
.setOrigin("origin2") |
|
33 |
.setData(DocumentText.newBuilder().setId("id").setText("This is another text").build()) |
|
34 |
.build(); |
|
35 |
|
|
36 |
public static final DocumentTextEnvelope record4 = DocumentTextEnvelope.newBuilder() |
|
37 |
.setOrigin("origin2") |
|
38 |
.setData(DocumentText.newBuilder().setId("id").setText("This is a duplicated text").build()) |
|
39 |
.build(); |
|
40 |
|
|
41 |
|
|
42 |
public static final DocumentText collapsed1 = DocumentText.newBuilder() |
|
43 |
.setId("id") |
|
44 |
.setText("This is text").build(); |
|
45 |
|
|
46 |
public static final DocumentText collapsed2 = DocumentText.newBuilder() |
|
47 |
.setId("id") |
|
48 |
.setText("This is text\n\nThis is text\n\nThis is another text\n\nThis is a duplicated text").build(); |
|
49 |
|
|
50 |
|
|
51 |
public static final List<DocumentTextEnvelope> oneElementList = |
|
52 |
Lists.newArrayList(record1); |
|
53 |
|
|
54 |
public static final List<DocumentText> collapsedOneElementList = |
|
55 |
Lists.newArrayList(collapsed1); |
|
56 |
|
|
57 |
public static final List<DocumentTextEnvelope> list = |
|
58 |
Lists.newArrayList(record1, record2, record3, record4); |
|
59 |
|
|
60 |
public static final List<DocumentText> collapsedList = |
|
61 |
Lists.newArrayList(collapsed2); |
|
62 |
|
|
63 |
|
|
64 |
@Test |
|
65 |
public void testDocumentTextCollapserEmpty() throws Exception { |
|
66 |
DocumentTextCollapser collapser = new DocumentTextCollapser(); |
|
67 |
collapser.setOrigins(origins); |
|
68 |
|
|
69 |
assertNull(collapser.collapse(null)); |
|
70 |
assertNull(collapser.collapse(new ArrayList<DocumentTextEnvelope>())); |
|
71 |
} |
|
72 |
|
|
73 |
@Test |
|
74 |
public void testDocumentTextCollapser() throws Exception { |
|
75 |
DocumentTextCollapser collapser = new DocumentTextCollapser(); |
|
76 |
collapser.setOrigins(origins); |
|
77 |
|
|
78 |
SampleData.assertEqualRecords( |
|
79 |
collapsedOneElementList, |
|
80 |
collapser.collapse(oneElementList)); |
|
81 |
SampleData.assertEqualRecords( |
|
82 |
collapsedList, |
|
83 |
collapser.collapse(list)); |
|
84 |
} |
|
85 |
|
|
86 |
} |
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/origins/OriginConfidenceCollapserTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.collapsers.origins; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.collapsers.SampleData; |
|
4 |
import eu.dnetlib.iis.collapsers.schemas.DocumentMetadataEnvelope; |
|
5 |
import eu.dnetlib.iis.importer.schemas.DocumentMetadata; |
|
6 |
import java.util.ArrayList; |
|
7 |
import java.util.List; |
|
8 |
import static org.junit.Assert.assertNull; |
|
9 |
import org.junit.Test; |
|
10 |
import org.python.google.common.collect.Lists; |
|
11 |
|
|
12 |
/** |
|
13 |
* |
|
14 |
* @author Dominika Tkaczyk |
|
15 |
* |
|
16 |
*/ |
|
17 |
public class OriginConfidenceCollapserTest { |
|
18 |
|
|
19 |
public static final List<DocumentMetadataEnvelope> emptyList = new ArrayList<DocumentMetadataEnvelope>(); |
|
20 |
|
|
21 |
public static final List<DocumentMetadataEnvelope> oneElementList = |
|
22 |
Lists.newArrayList(SampleData.envMetadataRecord11); |
|
23 |
|
|
24 |
public static final List<DocumentMetadata> mergedOneElementList = |
|
25 |
Lists.newArrayList(SampleData.metadataRecord11); |
|
26 |
|
|
27 |
public static final List<DocumentMetadataEnvelope> list = |
|
28 |
Lists.newArrayList(SampleData.envMetadataRecord11, SampleData.envMetadataRecord21); |
|
29 |
|
|
30 |
|
|
31 |
@Test |
|
32 |
public void testOriginConfidence() throws Exception { |
|
33 |
OriginConfidenceCollapser<DocumentMetadataEnvelope, DocumentMetadata> collapser = |
|
34 |
new OriginConfidenceCollapser<DocumentMetadataEnvelope, DocumentMetadata>(); |
|
35 |
collapser.setOrigins(SampleData.origins); |
|
36 |
|
|
37 |
assertNull(collapser.collapse(null)); |
|
38 |
assertNull(collapser.collapse(emptyList)); |
|
39 |
SampleData.assertEqualRecords( |
|
40 |
mergedOneElementList, |
|
41 |
collapser.collapse(oneElementList)); |
|
42 |
SampleData.assertEqualRecords( |
|
43 |
mergedOneElementList, |
|
44 |
collapser.collapse(list)); |
|
45 |
} |
|
46 |
|
|
47 |
} |
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/origins/WorkflowTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.collapsers.origins; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.IntegrationTest; |
|
4 |
import eu.dnetlib.iis.core.AbstractWorkflowTestCase; |
|
5 |
import org.junit.Test; |
|
6 |
import org.junit.experimental.categories.Category; |
|
7 |
|
|
8 |
/** |
|
9 |
* @author Dominika Tkaczyk |
|
10 |
* @author Michal Oniszczuk |
|
11 |
*/ |
|
12 |
@Category(IntegrationTest.class) |
|
13 |
public class WorkflowTest extends AbstractWorkflowTestCase { |
|
14 |
|
|
15 |
@Test |
|
16 |
public void testDefaultWorkflow() throws Exception { |
|
17 |
runWorkflow("eu/dnetlib/iis/collapsers/origins_collapser/default/oozie_app"); |
|
18 |
} |
|
19 |
|
|
20 |
/* |
|
21 |
@Test |
|
22 |
public void testCitationWorkflow() throws Exception { |
|
23 |
runWorkflow("eu/dnetlib/iis/collapsers/collapser/citation/oozie_app"); |
|
24 |
} |
|
25 |
*/ |
|
26 |
|
|
27 |
@Test |
|
28 |
public void testDocumentTextWorkflow() throws Exception { |
|
29 |
runWorkflow("eu/dnetlib/iis/collapsers/origins_collapser/documenttext/oozie_app"); |
|
30 |
} |
|
31 |
|
|
32 |
} |
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/SampleData.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.collapsers; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.collapsers.schemas.DocumentMetadataEnvelope; |
|
4 |
import eu.dnetlib.iis.collapsers.schemas.DocumentTextEnvelope; |
|
5 |
import eu.dnetlib.iis.importer.schemas.DocumentMetadata; |
|
6 |
import eu.dnetlib.iis.importer.schemas.PublicationType; |
|
7 |
import eu.dnetlib.iis.metadataextraction.schemas.DocumentText; |
|
8 |
import java.util.List; |
|
9 |
import org.apache.avro.generic.GenericData; |
|
10 |
import org.apache.avro.generic.IndexedRecord; |
|
11 |
import static org.junit.Assert.assertEquals; |
|
12 |
import static org.junit.Assert.assertTrue; |
|
13 |
import org.python.google.common.collect.Lists; |
|
14 |
|
|
15 |
/** |
|
16 |
* |
|
17 |
* @author Dominika Tkaczyk |
|
18 |
*/ |
|
19 |
public class SampleData { |
|
20 |
|
|
21 |
/* parameters */ |
|
22 |
|
|
23 |
public static final List<String> origins = Lists.newArrayList("origin1", "origin2"); |
|
24 |
|
|
25 |
public static final List<String> significantFields = Lists.newArrayList("title", "authorIds", "abstract", "journal", "year"); |
|
26 |
|
|
27 |
|
|
28 |
/* input records */ |
|
29 |
|
|
30 |
public static final DocumentMetadata metadataRecord11 = DocumentMetadata.newBuilder() |
|
31 |
.setId("id-1") |
|
32 |
.setAbstract$("abstract 1") |
|
33 |
.setLanguage("en") |
|
34 |
.setYear(1990) |
|
35 |
.setKeywords(Lists.newArrayList((CharSequence)"kwd 1", "kwd 2")) |
|
36 |
.setDatasourceIds(Lists.newArrayList((CharSequence)"d 1", "d 2")) |
|
37 |
.setPublisher("publisher 1") |
|
38 |
.setPublicationType(PublicationType.newBuilder().setArticle(true).build()) |
|
39 |
.build(); |
|
40 |
|
|
41 |
public static final DocumentMetadataEnvelope envMetadataRecord11 = DocumentMetadataEnvelope.newBuilder() |
|
42 |
.setOrigin("origin1") |
|
43 |
.setData(metadataRecord11).build(); |
|
44 |
|
|
45 |
public static final DocumentMetadata metadataRecord12 = DocumentMetadata.newBuilder() |
|
46 |
.setId("id-1") |
|
47 |
.setAbstract$("abstract 2") |
|
48 |
.setAuthorIds(Lists.newArrayList((CharSequence)"aut 1", "aut 2")) |
|
49 |
.setPublicationType(PublicationType.newBuilder().setArticle(true).build()) |
|
50 |
.setTitle("title 2") |
|
51 |
.setYear(1991) |
|
52 |
.build(); |
|
53 |
|
|
54 |
public static final DocumentMetadataEnvelope envMetadataRecord12 = DocumentMetadataEnvelope.newBuilder() |
|
55 |
.setOrigin("origin1") |
|
56 |
.setData(metadataRecord12).build(); |
|
57 |
|
|
58 |
public static final DocumentMetadata metadataRecord13 = DocumentMetadata.newBuilder() |
|
59 |
.setId("id-1") |
|
60 |
.setPublicationType(PublicationType.newBuilder().setArticle(true).build()) |
|
61 |
.build(); |
|
62 |
|
|
63 |
public static final DocumentMetadataEnvelope envMetadataRecord13 = DocumentMetadataEnvelope.newBuilder() |
|
64 |
.setOrigin("origin1") |
|
65 |
.setData(metadataRecord13).build(); |
|
66 |
|
|
67 |
public static final DocumentMetadata metadataRecord21 = DocumentMetadata.newBuilder() |
|
68 |
.setId("id-1") |
|
69 |
.setAbstract$("abstract 3") |
|
70 |
.setAuthorIds(Lists.newArrayList((CharSequence)"aut 13", "aut 23")) |
|
71 |
.setPublicationType(PublicationType.newBuilder().setArticle(true).build()) |
|
72 |
.setTitle("title 3") |
|
73 |
.setYear(1999) |
|
74 |
.build(); |
|
75 |
|
|
76 |
public static final DocumentMetadataEnvelope envMetadataRecord21 = DocumentMetadataEnvelope.newBuilder() |
|
77 |
.setOrigin("origin2") |
|
78 |
.setData(metadataRecord21).build(); |
|
79 |
|
|
80 |
public static final DocumentMetadata metadataRecord22 = DocumentMetadata.newBuilder() |
|
81 |
.setId("id-1") |
|
82 |
.setPublicationType(PublicationType.newBuilder().setArticle(true).build()) |
|
83 |
.build(); |
|
84 |
|
|
85 |
public static final DocumentMetadataEnvelope envMetadataRecord22 = DocumentMetadataEnvelope.newBuilder() |
|
86 |
.setOrigin("origin2") |
|
87 |
.setData(metadataRecord22).build(); |
|
88 |
|
|
89 |
public static final DocumentText textRecord = DocumentText.newBuilder() |
|
90 |
.setId("text-1") |
|
91 |
.setText("text text") |
|
92 |
.build(); |
|
93 |
|
|
94 |
public static final DocumentTextEnvelope envTextRecord = DocumentTextEnvelope.newBuilder() |
|
95 |
.setOrigin("origin1") |
|
96 |
.setData(textRecord).build(); |
|
97 |
|
|
98 |
|
|
99 |
/* merged records */ |
|
100 |
|
|
101 |
public static final DocumentMetadata mergedRecord1112 = DocumentMetadata.newBuilder() |
|
102 |
.setId("id-1") |
|
103 |
.setAbstract$("abstract 1") |
|
104 |
.setLanguage("en") |
|
105 |
.setAuthorIds(Lists.newArrayList((CharSequence)"aut 1", "aut 2")) |
|
106 |
.setTitle("title 2") |
|
107 |
.setYear(1990) |
|
108 |
.setKeywords(Lists.newArrayList((CharSequence)"kwd 1", "kwd 2")) |
|
109 |
.setDatasourceIds(Lists.newArrayList((CharSequence)"d 1", "d 2")) |
|
110 |
.setPublisher("publisher 1") |
|
111 |
.setPublicationType(PublicationType.newBuilder().setArticle(true).build()) |
|
112 |
.build(); |
|
113 |
|
|
114 |
public static final DocumentMetadata mergedRecord1211 = DocumentMetadata.newBuilder() |
|
115 |
.setId("id-1") |
|
116 |
.setAbstract$("abstract 2") |
|
117 |
.setAuthorIds(Lists.newArrayList((CharSequence)"aut 1", "aut 2")) |
|
118 |
.setTitle("title 2") |
|
119 |
.setLanguage("en") |
|
120 |
.setYear(1991) |
|
121 |
.setKeywords(Lists.newArrayList((CharSequence)"kwd 1", "kwd 2")) |
|
122 |
.setDatasourceIds(Lists.newArrayList((CharSequence)"d 1", "d 2")) |
|
123 |
.setPublisher("publisher 1") |
|
124 |
.setPublicationType(PublicationType.newBuilder().setArticle(true).build()) |
|
125 |
.build(); |
|
126 |
|
|
127 |
public static final DocumentMetadata mergedRecord1121 = DocumentMetadata.newBuilder() |
|
128 |
.setId("id-1") |
|
129 |
.setAbstract$("abstract 1") |
|
130 |
.setLanguage("en") |
|
131 |
.setYear(1990) |
|
132 |
.setAuthorIds(Lists.newArrayList((CharSequence)"aut 13", "aut 23")) |
|
133 |
.setTitle("title 3") |
|
134 |
.setKeywords(Lists.newArrayList((CharSequence)"kwd 1", "kwd 2")) |
|
135 |
.setDatasourceIds(Lists.newArrayList((CharSequence)"d 1", "d 2")) |
|
136 |
.setPublisher("publisher 1") |
|
137 |
.setPublicationType(PublicationType.newBuilder().setArticle(true).build()) |
|
138 |
.build(); |
|
139 |
|
|
140 |
public static final DocumentMetadata mergedRecord2221 = DocumentMetadata.newBuilder() |
|
141 |
.setId("id-1") |
|
142 |
.setAbstract$("abstract 3") |
|
143 |
.setAuthorIds(Lists.newArrayList((CharSequence)"aut 13", "aut 23")) |
|
144 |
.setPublicationType(PublicationType.newBuilder().setArticle(true).build()) |
|
145 |
.setTitle("title 3") |
|
146 |
.setYear(1999) |
|
147 |
.build(); |
|
148 |
|
|
149 |
|
|
150 |
/* collapsed records */ |
|
151 |
|
|
152 |
// within no merge, between no merge |
|
153 |
public static final DocumentMetadata recordWNoMergeBNoMerge = DocumentMetadata.newBuilder() |
|
154 |
.setId("id-1") |
|
155 |
.setAbstract$("abstract 2") |
|
156 |
.setAuthorIds(Lists.newArrayList((CharSequence)"aut 1", "aut 2")) |
|
157 |
.setPublicationType(PublicationType.newBuilder().setArticle(true).build()) |
|
158 |
.setTitle("title 2") |
|
159 |
.setYear(1991) |
|
160 |
.build(); |
|
161 |
|
|
162 |
// within merge, between no merge |
|
163 |
public static final DocumentMetadata recordWMergeBNoMerge = DocumentMetadata.newBuilder() |
|
164 |
.setId("id-1") |
|
165 |
.setAbstract$("abstract 2") |
|
166 |
.setLanguage("en") |
|
167 |
.setAuthorIds(Lists.newArrayList((CharSequence)"aut 1", "aut 2")) |
|
168 |
.setTitle("title 2") |
|
169 |
.setYear(1991) |
|
170 |
.setKeywords(Lists.newArrayList((CharSequence)"kwd 1", "kwd 2")) |
|
171 |
.setDatasourceIds(Lists.newArrayList((CharSequence)"d 1", "d 2")) |
|
172 |
.setPublisher("publisher 1") |
|
173 |
.setPublicationType(PublicationType.newBuilder().setArticle(true).build()) |
|
174 |
.build(); |
|
175 |
|
|
176 |
// within no merge, between merge |
|
177 |
public static final DocumentMetadata recordWNoMergeBMerge = DocumentMetadata.newBuilder() |
|
178 |
.setId("id-1") |
|
179 |
.setAbstract$("abstract 2") |
|
180 |
.setAuthorIds(Lists.newArrayList((CharSequence)"aut 1", "aut 2")) |
|
181 |
.setPublicationType(PublicationType.newBuilder().setArticle(true).build()) |
|
182 |
.setTitle("title 2") |
|
183 |
.setYear(1991) |
|
184 |
.build(); |
|
185 |
|
|
186 |
// within merge, between merge |
|
187 |
public static final DocumentMetadata recordWMergeBMerge = DocumentMetadata.newBuilder() |
|
188 |
.setId("id-1") |
|
189 |
.setAbstract$("abstract 2") |
|
190 |
.setLanguage("en") |
|
191 |
.setAuthorIds(Lists.newArrayList((CharSequence)"aut 1", "aut 2")) |
|
192 |
.setTitle("title 2") |
|
193 |
.setYear(1991) |
|
194 |
.setKeywords(Lists.newArrayList((CharSequence)"kwd 1", "kwd 2")) |
|
195 |
.setDatasourceIds(Lists.newArrayList((CharSequence)"d 1", "d 2")) |
|
196 |
.setPublisher("publisher 1") |
|
197 |
.setPublicationType(PublicationType.newBuilder().setArticle(true).build()) |
|
198 |
.build(); |
|
199 |
|
|
200 |
public static void assertEqualRecords(IndexedRecord expected, IndexedRecord actual) { |
|
201 |
assertEquals("Records are not equal: \nExpected: " + expected + "\nActual: " + actual + "\n", |
|
202 |
0, GenericData.get().compare(expected, actual, expected.getSchema())); |
|
203 |
} |
|
204 |
|
|
205 |
public static <T extends IndexedRecord> void assertEqualRecords(List<T> expected, List<T> actual) { |
|
206 |
assertEquals("Records lists have different sizes: " + expected.size() + " and " + actual.size() + "\n", |
|
207 |
expected.size(), actual.size()); |
|
208 |
List<T> actualCopy = Lists.newArrayList(actual); |
|
209 |
for (T exp : expected) { |
|
210 |
T found = null; |
|
211 |
for (T act : actualCopy) { |
|
212 |
if (0 == GenericData.get().compare(exp, act, exp.getSchema())) { |
|
213 |
found = act; |
|
214 |
} |
|
215 |
} |
|
216 |
assertTrue( |
|
217 |
"Expected record " + exp.toString() + " was not found among the actual records\n", |
|
218 |
found != null); |
|
219 |
actualCopy.remove(found); |
|
220 |
} |
|
221 |
} |
|
222 |
|
|
223 |
} |
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/resources/eu/dnetlib/iis/collapsers/basic_collapser/documenttext/oozie_app/import.txt | ||
---|---|---|
1 |
## This is a classpath-based import file (this header is required) |
|
2 |
basic_collapser classpath eu/dnetlib/iis/collapsers/basic_collapser/oozie_app |
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/resources/eu/dnetlib/iis/collapsers/basic_collapser/documenttext/oozie_app/workflow.xml | ||
---|---|---|
1 |
<workflow-app xmlns="uri:oozie:workflow:0.2" name="test-basic_collapser_documenttext"> |
|
2 |
<start to="producer"/> |
|
3 |
<action name="producer"> |
|
4 |
<java> |
|
5 |
<job-tracker>${jobTracker}</job-tracker> |
|
6 |
<name-node>${nameNode}</name-node> |
|
7 |
<!-- The data generated by this node is deleted in this section --> |
|
8 |
<prepare> |
|
9 |
<delete path="${nameNode}${workingDir}/producer" /> |
|
10 |
<mkdir path="${nameNode}${workingDir}/producer" /> |
|
11 |
</prepare> |
|
12 |
<configuration> |
|
13 |
<property> |
|
14 |
<name>mapred.job.queue.name</name> |
|
15 |
<value>${queueName}</value> |
|
16 |
</property> |
|
17 |
</configuration> |
|
18 |
<!-- This is simple wrapper for the Java code --> |
|
19 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
20 |
<!-- The business Java code that gets to be executed --> |
|
21 |
<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg> |
|
22 |
<!-- Specification of the output ports --> |
|
23 |
<arg>-C{input, |
|
24 |
eu.dnetlib.iis.metadataextraction.schemas.DocumentText, |
|
25 |
eu/dnetlib/iis/collapsers/basic_collapser/documenttext/data/texts.json}</arg> |
|
26 |
<!-- All input and output ports have to be bound to paths in HDFS, working |
|
27 |
directory has to be specified as well --> |
|
28 |
<arg>-SworkingDir=${workingDir}/producer/working_dir</arg> |
|
29 |
<arg>-Oinput=${workingDir}/producer/input</arg> |
|
30 |
</java> |
|
31 |
<ok to="basic_collapser"/> |
|
32 |
<error to="fail"/> |
|
33 |
</action> |
|
34 |
<action name="basic_collapser"> |
|
35 |
<sub-workflow> |
|
36 |
<app-path>${wf:appPath()}/basic_collapser</app-path> |
|
37 |
<configuration> |
|
38 |
<property> |
|
39 |
<name>jobTracker</name> |
|
40 |
<value>${jobTracker}</value> |
|
41 |
</property> |
|
42 |
<property> |
|
43 |
<name>nameNode</name> |
|
44 |
<value>${nameNode}</value> |
|
45 |
</property> |
|
46 |
<property> |
|
47 |
<name>queueName</name> |
|
48 |
<value>${queueName}</value> |
|
49 |
</property> |
|
50 |
<!-- Working directory of the subworkflow --> |
|
51 |
<property> |
|
52 |
<name>workingDir</name> |
|
53 |
<value>${workingDir}/basic_collapser/working_dir</value> |
|
54 |
</property> |
|
55 |
<property> |
|
56 |
<name>blocking_field</name> |
|
57 |
<value>id</value> |
|
58 |
</property> |
|
59 |
<property> |
|
60 |
<name>record_collapser</name> |
|
61 |
<value>eu.dnetlib.iis.collapsers.basic.DocumentTextCollapser</value> |
|
62 |
</property> |
|
63 |
<property> |
|
64 |
<name>schema</name> |
|
65 |
<value>eu.dnetlib.iis.metadataextraction.schemas.DocumentText</value> |
|
66 |
</property> |
|
67 |
<!-- Input ports. --> |
|
68 |
<property> |
|
69 |
<name>input</name> |
|
70 |
<value>${workingDir}/producer/input</value> |
|
71 |
</property> |
|
72 |
<!-- Output port bound to given path --> |
|
73 |
<property> |
|
74 |
<name>output</name> |
|
75 |
<value>${workingDir}/basic_collapser/output</value> |
|
76 |
</property> |
|
77 |
</configuration> |
|
78 |
</sub-workflow> |
|
79 |
<ok to="consumer"/> |
|
80 |
<error to="fail"/> |
|
81 |
</action> |
|
82 |
<action name="consumer"> |
|
83 |
<java> |
|
84 |
<job-tracker>${jobTracker}</job-tracker> |
|
85 |
<name-node>${nameNode}</name-node> |
|
86 |
<!-- The data generated by this node is deleted in this section --> |
|
87 |
<prepare> |
|
88 |
<delete path="${nameNode}${workingDir}/consumer" /> |
|
89 |
<mkdir path="${nameNode}${workingDir}/consumer" /> |
|
90 |
</prepare> |
|
91 |
<configuration> |
|
92 |
<property> |
|
93 |
<name>mapred.job.queue.name</name> |
|
94 |
<value>${queueName}</value> |
|
95 |
</property> |
|
96 |
</configuration> |
|
97 |
<!-- This is simple wrapper for the Java code --> |
|
98 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
99 |
<!-- The business Java code that gets to be executed --> |
|
100 |
<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.TestingConsumer</arg> |
|
101 |
<!-- Specification of the input ports --> |
|
102 |
<arg>-C{output, |
|
103 |
eu.dnetlib.iis.metadataextraction.schemas.DocumentText, |
|
104 |
eu/dnetlib/iis/collapsers/basic_collapser/documenttext/data/output.json}</arg> |
|
105 |
<!-- All input and output ports have to be bound to paths in HDFS, working |
|
106 |
directory has to be specified as well --> |
|
107 |
<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg> |
|
108 |
<arg>-Ioutput=${workingDir}/basic_collapser/output</arg> |
|
109 |
</java> |
|
110 |
<ok to="end" /> |
|
111 |
<error to="fail" /> |
|
112 |
</action> |
|
113 |
<kill name="fail"> |
|
114 |
<message>Unfortunately, the workflow failed -- error message: |
|
115 |
[${wf:errorMessage(wf:lastErrorNode())}]</message> |
|
116 |
</kill> |
|
117 |
<end name="end"/> |
|
118 |
</workflow-app> |
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/resources/eu/dnetlib/iis/collapsers/basic_collapser/documenttext/data/texts.json | ||
---|---|---|
1 |
{"id": "id-6", "text": "This is a different text"} |
|
2 |
{"id": "id-3", "text": "This is another text"} |
|
3 |
{"id": "id-3", "text": "This is yet another text"} |
|
4 |
{"id": "id-6", "text": "This is a totally different text"} |
|
5 |
{"id": "id-7", "text": "This is a text"} |
|
6 |
{"id": "id-4", "text": "This is not a text"} |
|
7 |
{"id": "id-6", "text": "This is a duplicated text"} |
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/resources/eu/dnetlib/iis/collapsers/basic_collapser/documenttext/data/output.json | ||
---|---|---|
1 |
{"id": "id-3", "text": "This is another text\n\nThis is yet another text"} |
|
2 |
{"id": "id-6", "text": "This is a different text\n\nThis is a totally different text\n\nThis is a duplicated text"} |
|
3 |
{"id": "id-7", "text": "This is a text"} |
|
4 |
{"id": "id-4", "text": "This is not a text"} |
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/resources/eu/dnetlib/iis/collapsers/basic_collapser/default/oozie_app/import.txt | ||
---|---|---|
1 |
## This is a classpath-based import file (this header is required) |
|
2 |
basic_collapser classpath eu/dnetlib/iis/collapsers/basic_collapser/oozie_app |
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/resources/eu/dnetlib/iis/collapsers/basic_collapser/default/oozie_app/workflow.xml | ||
---|---|---|
1 |
<workflow-app xmlns="uri:oozie:workflow:0.2" name="test-basic_collapser_default"> |
|
2 |
<start to="producer"/> |
|
3 |
<action name="producer"> |
|
4 |
<java> |
|
5 |
<job-tracker>${jobTracker}</job-tracker> |
|
6 |
<name-node>${nameNode}</name-node> |
|
7 |
<!-- The data generated by this node is deleted in this section --> |
|
8 |
<prepare> |
|
9 |
<delete path="${nameNode}${workingDir}/producer" /> |
|
10 |
<mkdir path="${nameNode}${workingDir}/producer" /> |
|
11 |
</prepare> |
|
12 |
<configuration> |
|
13 |
<property> |
|
14 |
<name>mapred.job.queue.name</name> |
|
15 |
<value>${queueName}</value> |
|
16 |
</property> |
|
17 |
</configuration> |
|
18 |
<!-- This is simple wrapper for the Java code --> |
|
19 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
20 |
<!-- The business Java code that gets to be executed --> |
|
21 |
<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg> |
|
22 |
<!-- Specification of the output ports --> |
|
23 |
<arg>-C{input, |
|
24 |
eu.dnetlib.iis.importer.schemas.DocumentMetadata, |
|
25 |
eu/dnetlib/iis/collapsers/basic_collapser/default/data/metadata.json}</arg> |
|
26 |
<!-- All input and output ports have to be bound to paths in HDFS, working |
|
27 |
directory has to be specified as well --> |
|
28 |
<arg>-SworkingDir=${workingDir}/producer/working_dir</arg> |
|
29 |
<arg>-Oinput=${workingDir}/producer/input</arg> |
|
30 |
</java> |
|
31 |
<ok to="basic_collapser"/> |
|
32 |
<error to="fail"/> |
|
33 |
</action> |
|
34 |
<action name="basic_collapser"> |
|
35 |
<sub-workflow> |
|
36 |
<app-path>${wf:appPath()}/basic_collapser</app-path> |
|
37 |
<configuration> |
|
38 |
<property> |
|
39 |
<name>jobTracker</name> |
|
40 |
<value>${jobTracker}</value> |
|
41 |
</property> |
|
42 |
<property> |
|
43 |
<name>nameNode</name> |
|
44 |
<value>${nameNode}</value> |
|
45 |
</property> |
|
46 |
<property> |
|
47 |
<name>queueName</name> |
|
48 |
<value>${queueName}</value> |
|
49 |
</property> |
|
50 |
<!-- Working directory of the subworkflow --> |
|
51 |
<property> |
|
52 |
<name>workingDir</name> |
|
53 |
<value>${workingDir}/basic_collapser/working_dir</value> |
|
54 |
</property> |
|
55 |
<property> |
|
56 |
<name>blocking_field</name> |
|
57 |
<value>id</value> |
|
58 |
</property> |
|
59 |
<property> |
|
60 |
<name>schema</name> |
|
61 |
<value>eu.dnetlib.iis.importer.schemas.DocumentMetadata</value> |
|
62 |
</property> |
|
63 |
<property> |
|
64 |
<name>significant_fields</name> |
|
65 |
<value>title,authorIds,journal,year</value> |
|
66 |
</property> |
|
67 |
<!-- Input ports. --> |
|
68 |
<property> |
|
69 |
<name>input</name> |
|
70 |
<value>${workingDir}/producer/input</value> |
|
71 |
</property> |
|
72 |
<!-- Output port bound to given path --> |
|
73 |
<property> |
|
74 |
<name>output</name> |
|
75 |
<value>${workingDir}/basic_collapser/output</value> |
|
76 |
</property> |
|
77 |
</configuration> |
|
78 |
</sub-workflow> |
|
79 |
<ok to="consumer"/> |
|
80 |
<error to="fail"/> |
|
81 |
</action> |
|
82 |
<action name="consumer"> |
|
83 |
<java> |
|
84 |
<job-tracker>${jobTracker}</job-tracker> |
|
85 |
<name-node>${nameNode}</name-node> |
|
86 |
<!-- The data generated by this node is deleted in this section --> |
|
87 |
<prepare> |
|
88 |
<delete path="${nameNode}${workingDir}/consumer" /> |
|
89 |
<mkdir path="${nameNode}${workingDir}/consumer" /> |
|
90 |
</prepare> |
|
91 |
<configuration> |
|
92 |
<property> |
|
93 |
<name>mapred.job.queue.name</name> |
|
94 |
<value>${queueName}</value> |
|
95 |
</property> |
|
96 |
</configuration> |
|
97 |
<!-- This is simple wrapper for the Java code --> |
|
98 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
99 |
<!-- The business Java code that gets to be executed --> |
|
100 |
<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.TestingConsumer</arg> |
|
101 |
<!-- Specification of the input ports --> |
|
102 |
<arg>-C{output, |
|
103 |
eu.dnetlib.iis.importer.schemas.DocumentMetadata, |
|
104 |
eu/dnetlib/iis/collapsers/basic_collapser/default/data/output.json}</arg> |
|
105 |
<!-- All input and output ports have to be bound to paths in HDFS, working |
|
106 |
directory has to be specified as well --> |
|
107 |
<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg> |
|
108 |
<arg>-Ioutput=${workingDir}/basic_collapser/output</arg> |
|
109 |
</java> |
|
110 |
<ok to="end" /> |
|
111 |
<error to="fail" /> |
|
112 |
</action> |
|
113 |
<kill name="fail"> |
|
114 |
<message>Unfortunately, the workflow failed -- error message: |
|
115 |
[${wf:errorMessage(wf:lastErrorNode())}]</message> |
|
116 |
</kill> |
|
117 |
<end name="end"/> |
|
118 |
</workflow-app> |
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/resources/eu/dnetlib/iis/collapsers/basic_collapser/default/data/metadata.json | ||
---|---|---|
1 |
{"id": "id-1", "title": "Ender's Game", "abstract": null, "language": null, "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": "Journal-1", "year": 2010, "publisher": "", "publicationType": {"article": false, "dataset": false}, "authorIds": ["id-1", "id-2", "id-3"], "datasourceIds": null} |
|
2 |
{"id": "id-2", "title": "Seventh Son (Tales of Alvin Maker)", "abstract": "The tales of Alvin Maker", "language": null, "keywords": null, "externalIdentifiers": {"id-1": "val-1", "id-2": "val-2"}, "journal": "Journal", "year": 1993, "publisher": null, "publicationType": {"article": true, "dataset": false}, "authorIds": ["id"], "datasourceIds": null} |
|
3 |
{"id": "id-1", "title": "Enders game", "abstract": null, "language": null, "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": "Journal-1", "year": null, "publisher": "", "publicationType": {"article": false, "dataset": false}, "authorIds": ["id-1", "id-256", "id-3"], "datasourceIds": null} |
|
4 |
{"id": "id-2", "title": "Seventh Son (Tales of Alvin Maker) 67", "abstract": "The tales of Alvin Maker", "language": null, "keywords": null, "externalIdentifiers": {"id-1": "val-1", "id-2": "val-2"}, "journal": "Journal", "year": 1993, "publisher": null, "publicationType": {"article": true, "dataset": false}, "authorIds": null, "datasourceIds": null} |
|
5 |
{"id": "id-3", "title": "A Disk", "abstract": "A flat circular world carried on the back of a giant turtle - Discworld", "language": "eng", "keywords": null, "externalIdentifiers": {"id-1": "val-1", "id-2": "val-2"}, "journal": null, "year": 2003, "publisher": "HarperTorch", "publicationType": {"article": true, "dataset": false}, "authorIds": ["id-1", "id-2"], "datasourceIds": null} |
|
6 |
{"id": "id-5", "title": "HArry Potter and the Sorcerer's Stone", "abstract": null, "language": null, "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": null, "year": null, "publisher": null, "publicationType": {"article": false, "dataset": true}, "authorIds": null, "datasourceIds": null} |
|
7 |
{"id": "id-4", "title": "null", "abstract": null, "language": "pl", "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": null, "year": null, "publisher": "Ballantine Books", "publicationType": {"article": true, "dataset": false}, "authorIds": null, "datasourceIds": null} |
|
8 |
{"id": "id-3", "title": null, "abstract": "A flat, circular world carried on the back of a giant turtle - Discworld", "language": "eng", "keywords": null, "externalIdentifiers": {"id-1": "val-1", "id-2": "val-2"}, "journal": null, "year": 2003, "publisher": "HarperTorch", "publicationType": {"article": true, "dataset": false}, "authorIds": ["id-1", "id-2"], "datasourceIds": null} |
|
9 |
{"id": "id-5", "title": "Harry Potter and the Sorcerer's Stone", "abstract": null, "language": null, "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": null, "year": null, "publisher": null, "publicationType": {"article": false, "dataset": true}, "authorIds": ["id"], "datasourceIds": null} |
|
10 |
{"id": "id-4", "title": null, "abstract": null, "language": "eng", "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": null, "year": null, "publisher": "Ballantine Books", "publicationType": {"article": true, "dataset": false}, "authorIds": null, "datasourceIds": null} |
|
11 |
{"id": "id-8", "title": "Harry Potter and the Sorcerer's Stone", "abstract": null, "language": null, "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": null, "year": null, "publisher": null, "publicationType": {"article": false, "dataset": true}, "authorIds": null, "datasourceIds": null} |
|
12 |
{"id": "id-6", "title": null, "abstract": null, "language": "eng", "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": null, "year": null, "publisher": "Ballantine Books", "publicationType": {"article": true, "dataset": false}, "authorIds": null, "datasourceIds": null} |
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/resources/eu/dnetlib/iis/collapsers/basic_collapser/default/data/output.json | ||
---|---|---|
1 |
{"id": "id-1", "title": "Ender's Game", "abstract": null, "language": null, "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": "Journal-1", "year": 2010, "publisher": "", "publicationType": {"article": false, "dataset": false}, "authorIds": ["id-1", "id-2", "id-3"], "datasourceIds": null} |
|
2 |
{"id": "id-2", "title": "Seventh Son (Tales of Alvin Maker)", "abstract": "The tales of Alvin Maker", "language": null, "keywords": null, "externalIdentifiers": {"id-1": "val-1", "id-2": "val-2"}, "journal": "Journal", "year": 1993, "publisher": null, "publicationType": {"article": true, "dataset": false}, "authorIds": ["id"], "datasourceIds": null} |
|
3 |
{"id": "id-3", "title": "A Disk", "abstract": "A flat circular world carried on the back of a giant turtle - Discworld", "language": "eng", "keywords": null, "externalIdentifiers": {"id-1": "val-1", "id-2": "val-2"}, "journal": null, "year": 2003, "publisher": "HarperTorch", "publicationType": {"article": true, "dataset": false}, "authorIds": ["id-1", "id-2"], "datasourceIds": null} |
|
4 |
{"id": "id-4", "title": "null", "abstract": null, "language": "pl", "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": null, "year": null, "publisher": "Ballantine Books", "publicationType": {"article": true, "dataset": false}, "authorIds": null, "datasourceIds": null} |
|
5 |
{"id": "id-5", "title": "Harry Potter and the Sorcerer's Stone", "abstract": null, "language": null, "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": null, "year": null, "publisher": null, "publicationType": {"article": false, "dataset": true}, "authorIds": ["id"], "datasourceIds": null} |
|
6 |
{"id": "id-6", "title": null, "abstract": null, "language": "eng", "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": null, "year": null, "publisher": "Ballantine Books", "publicationType": {"article": true, "dataset": false}, "authorIds": null, "datasourceIds": null} |
|
7 |
{"id": "id-8", "title": "Harry Potter and the Sorcerer's Stone", "abstract": null, "language": null, "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": null, "year": null, "publisher": null, "publicationType": {"article": false, "dataset": true}, "authorIds": null, "datasourceIds": null} |
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/resources/eu/dnetlib/iis/collapsers/basic_collapser/citation/oozie_app/import.txt | ||
---|---|---|
1 |
## This is a classpath-based import file (this header is required) |
|
2 |
basic_collapser classpath eu/dnetlib/iis/collapsers/basic_collapser/oozie_app |
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/resources/eu/dnetlib/iis/collapsers/basic_collapser/citation/oozie_app/workflow.xml | ||
---|---|---|
1 |
<workflow-app xmlns="uri:oozie:workflow:0.2" name="test-collapser_citation"> |
|
2 |
<start to="producer"/> |
|
3 |
<action name="producer"> |
|
4 |
<java> |
|
5 |
<job-tracker>${jobTracker}</job-tracker> |
|
6 |
<name-node>${nameNode}</name-node> |
|
7 |
<!-- The data generated by this node is deleted in this section --> |
|
8 |
<prepare> |
|
9 |
<delete path="${nameNode}${workingDir}/producer" /> |
|
10 |
<mkdir path="${nameNode}${workingDir}/producer" /> |
|
11 |
</prepare> |
|
12 |
<configuration> |
|
13 |
<property> |
|
14 |
<name>mapred.job.queue.name</name> |
|
15 |
<value>${queueName}</value> |
|
16 |
</property> |
|
17 |
</configuration> |
|
18 |
<!-- This is simple wrapper for the Java code --> |
|
19 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
20 |
<!-- The business Java code that gets to be executed --> |
|
21 |
<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg> |
|
22 |
<!-- Specification of the output ports --> |
|
23 |
<arg>-C{input, |
|
24 |
eu.dnetlib.iis.collapsers.schemas.CitationEnvelope, |
|
25 |
eu/dnetlib/iis/collapsers/collapser/citation/data/citation.json}</arg> |
|
26 |
<!-- All input and output ports have to be bound to paths in HDFS, working |
|
27 |
directory has to be specified as well --> |
|
28 |
<arg>-SworkingDir=${workingDir}/producer/working_dir</arg> |
|
29 |
<arg>-Oinput=${workingDir}/producer/input</arg> |
|
30 |
</java> |
|
31 |
<ok to="collapser"/> |
|
32 |
<error to="fail"/> |
|
33 |
</action> |
|
34 |
<action name="collapser"> |
|
35 |
<sub-workflow> |
|
36 |
<app-path>${wf:appPath()}/collapser</app-path> |
|
37 |
<configuration> |
|
38 |
<property> |
|
39 |
<name>jobTracker</name> |
|
40 |
<value>${jobTracker}</value> |
|
41 |
</property> |
|
42 |
<property> |
|
43 |
<name>nameNode</name> |
|
44 |
<value>${nameNode}</value> |
|
45 |
</property> |
|
46 |
<property> |
|
47 |
<name>queueName</name> |
|
48 |
<value>${queueName}</value> |
|
49 |
</property> |
|
50 |
<!-- Working directory of the subworkflow --> |
|
51 |
<property> |
|
52 |
<name>workingDir</name> |
|
53 |
<value>${workingDir}/collapser/working_dir</value> |
|
54 |
</property> |
|
55 |
<!-- Input ports & parameters. --> |
|
56 |
<property> |
|
57 |
<name>blocking_field</name> |
|
58 |
<value>sourceDocumentId</value> |
|
59 |
</property> |
|
60 |
<property> |
|
61 |
<name>origins</name> |
|
62 |
<value>orig,cermine</value> |
|
63 |
</property> |
|
64 |
<property> |
|
65 |
<name>schema_input</name> |
|
66 |
<value>eu.dnetlib.iis.collapsers.schemas.CitationEnvelope</value> |
|
67 |
</property> |
|
68 |
<property> |
|
69 |
<name>schema_output</name> |
|
70 |
<value>eu.dnetlib.iis.citationmatching.schemas.Citation</value> |
|
71 |
</property> |
|
72 |
<property> |
|
73 |
<name>input</name> |
|
74 |
<value>${workingDir}/producer/input</value> |
|
75 |
</property> |
|
76 |
<!-- Output port bound to given path --> |
|
77 |
<property> |
|
78 |
<name>output</name> |
|
79 |
<value>${workingDir}/collapser_collapser/output</value> |
|
80 |
</property> |
|
81 |
</configuration> |
|
82 |
</sub-workflow> |
|
83 |
<ok to="consumer"/> |
|
84 |
<error to="fail"/> |
|
85 |
</action> |
|
86 |
<action name="consumer"> |
|
87 |
<java> |
|
88 |
<job-tracker>${jobTracker}</job-tracker> |
|
89 |
<name-node>${nameNode}</name-node> |
|
90 |
<!-- The data generated by this node is deleted in this section --> |
|
91 |
<prepare> |
|
92 |
<delete path="${nameNode}${workingDir}/consumer" /> |
|
93 |
<mkdir path="${nameNode}${workingDir}/consumer" /> |
|
94 |
</prepare> |
|
95 |
<configuration> |
|
96 |
<property> |
|
97 |
<name>mapred.job.queue.name</name> |
|
98 |
<value>${queueName}</value> |
|
99 |
</property> |
Also available in: Unified diff
[maven-release-plugin] copy for tag icm-iis-collapsers-1.0.0