Revision 35252
Added by Marek Horst about 9 years ago
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/deploy.info | ||
---|---|---|
1 |
[ |
|
2 |
{ |
|
3 |
"type_source": "SVN", |
|
4 |
"goal": "package -U -T 4C source:jar", |
|
5 |
"url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet40/modules/icm-iis-transformers/trunk/", |
|
6 |
"deploy_repository": "dnet4-snapshots", |
|
7 |
"version": "4", |
|
8 |
"mail": "m.horst@icm.edu.pl,d.tkaczyk@icm.edu.pl", |
|
9 |
"deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet4-snapshots", |
|
10 |
"name": "icm-iis-transformers" |
|
11 |
}, |
|
12 |
{ |
|
13 |
"type_source": "SVN", |
|
14 |
"goal": "clean verify -U -e -X", |
|
15 |
"url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet40/modules/icm-iis-transformers/trunk/", |
|
16 |
"nightly" : "true", |
|
17 |
"cron" : "H H * * *", |
|
18 |
"version": "4", |
|
19 |
"mail": "m.horst@icm.edu.pl", |
|
20 |
"name": "icm-iis-transformers-embedded-integration-test" |
|
21 |
} |
|
22 |
] |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/referenceextraction/WorkflowTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.referenceextraction; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.IntegrationTest; |
|
4 |
import eu.dnetlib.iis.core.AbstractWorkflowTestCase; |
|
5 |
import eu.dnetlib.iis.core.WorkflowConfiguration; |
|
6 |
import org.junit.Test; |
|
7 |
import org.junit.experimental.categories.Category; |
|
8 |
|
|
9 |
/** |
|
10 |
* |
|
11 |
* @author mhorst |
|
12 |
* |
|
13 |
*/ |
|
14 |
@Category(IntegrationTest.class) |
|
15 |
public class WorkflowTest extends AbstractWorkflowTestCase { |
|
16 |
|
|
17 |
@Test |
|
18 |
public void testWorkflow() throws Exception { |
|
19 |
WorkflowConfiguration wf = new WorkflowConfiguration(); |
|
20 |
wf.setTimeoutInSeconds(720); |
|
21 |
runWorkflow("eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/oozie_app", wf); |
|
22 |
} |
|
23 |
|
|
24 |
} |
|
0 | 25 |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/export/documentmetadata/WorkflowTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.export.documentmetadata; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.IntegrationTest; |
|
4 |
import eu.dnetlib.iis.core.AbstractWorkflowTestCase; |
|
5 |
import eu.dnetlib.iis.core.WorkflowConfiguration; |
|
6 |
import org.junit.Test; |
|
7 |
import org.junit.experimental.categories.Category; |
|
8 |
|
|
9 |
/** |
|
10 |
* |
|
11 |
* @author mhorst |
|
12 |
* |
|
13 |
*/ |
|
14 |
@Category(IntegrationTest.class) |
|
15 |
public class WorkflowTest extends AbstractWorkflowTestCase { |
|
16 |
|
|
17 |
@Test |
|
18 |
public void testWorkflow() throws Exception { |
|
19 |
WorkflowConfiguration wf = new WorkflowConfiguration(); |
|
20 |
wf.setTimeoutInSeconds(720); |
|
21 |
runWorkflow("eu/dnetlib/iis/transformers/export/documentmetadata/sampledataproducer/oozie_app", wf); |
|
22 |
} |
|
23 |
|
|
24 |
} |
|
0 | 25 |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/WorkflowTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.export.documenttodataset_without_imported_data; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.IntegrationTest; |
|
4 |
import eu.dnetlib.iis.core.AbstractWorkflowTestCase; |
|
5 |
import org.junit.Test; |
|
6 |
import org.junit.experimental.categories.Category; |
|
7 |
|
|
8 |
/** |
|
9 |
* |
|
10 |
* @author Dominika Tkaczyk |
|
11 |
* |
|
12 |
*/ |
|
13 |
@Category(IntegrationTest.class) |
|
14 |
public class WorkflowTest extends AbstractWorkflowTestCase { |
|
15 |
|
|
16 |
@Test |
|
17 |
public void testWorkflow() throws Exception { |
|
18 |
runWorkflow("eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/sampledataproducer/oozie_app"); |
|
19 |
} |
|
20 |
|
|
21 |
} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/export/documenttoproject_without_imported_data/WorkflowTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.export.documenttoproject_without_imported_data; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.IntegrationTest; |
|
4 |
import eu.dnetlib.iis.core.AbstractWorkflowTestCase; |
|
5 |
import org.junit.Test; |
|
6 |
import org.junit.experimental.categories.Category; |
|
7 |
|
|
8 |
/** |
|
9 |
* |
|
10 |
* @author Dominika Tkaczyk |
|
11 |
* |
|
12 |
*/ |
|
13 |
@Category(IntegrationTest.class) |
|
14 |
public class WorkflowTest extends AbstractWorkflowTestCase { |
|
15 |
|
|
16 |
@Test |
|
17 |
public void testWorkflow() throws Exception { |
|
18 |
runWorkflow("eu/dnetlib/iis/transformers/export/documenttoproject_without_imported_data/sampledataproducer/oozie_app"); |
|
19 |
} |
|
20 |
|
|
21 |
} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/importer/documentmetadata/idextractor/WorkflowTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.importer.documentmetadata.idextractor; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.IntegrationTest; |
|
4 |
import eu.dnetlib.iis.core.AbstractWorkflowTestCase; |
|
5 |
import org.junit.Test; |
|
6 |
import org.junit.experimental.categories.Category; |
|
7 |
|
|
8 |
/** |
|
9 |
* |
|
10 |
* @author mhorst |
|
11 |
* |
|
12 |
*/ |
|
13 |
@Category(IntegrationTest.class) |
|
14 |
public class WorkflowTest extends AbstractWorkflowTestCase { |
|
15 |
|
|
16 |
@Test |
|
17 |
public void testIdExtraction() throws Exception { |
|
18 |
runWorkflow("eu/dnetlib/iis/transformers/importer/documentmetadata/idextractor/sampledataproducer/oozie_app"); |
|
19 |
} |
|
20 |
|
|
21 |
} |
|
0 | 22 |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/metadataextraction/WorkflowTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.metadataextraction; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.IntegrationTest; |
|
4 |
import eu.dnetlib.iis.core.AbstractWorkflowTestCase; |
|
5 |
import org.junit.Test; |
|
6 |
import org.junit.experimental.categories.Category; |
|
7 |
|
|
8 |
/** |
|
9 |
* |
|
10 |
* @author mhorst |
|
11 |
* |
|
12 |
*/ |
|
13 |
@Category(IntegrationTest.class) |
|
14 |
public class WorkflowTest extends AbstractWorkflowTestCase { |
|
15 |
|
|
16 |
@Test |
|
17 |
public void testChecksumPreprocessing() throws Exception { |
|
18 |
runWorkflow("eu/dnetlib/iis/transformers/metadataextraction/checksum/preprocessing/sampledataproducer/oozie_app"); |
|
19 |
} |
|
20 |
|
|
21 |
@Test |
|
22 |
public void testChecksumPostprocessingText() throws Exception { |
|
23 |
runWorkflow("eu/dnetlib/iis/transformers/metadataextraction/checksum/postprocessing/text/sampledataproducer/oozie_app"); |
|
24 |
} |
|
25 |
|
|
26 |
@Test |
|
27 |
public void testChecksumPostprocessingMeta() throws Exception { |
|
28 |
runWorkflow("eu/dnetlib/iis/transformers/metadataextraction/checksum/postprocessing/meta/sampledataproducer/oozie_app"); |
|
29 |
} |
|
30 |
} |
|
0 | 31 |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/udfs/IntegerFirstNotEmptyTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.udfs; |
|
2 |
|
|
3 |
import com.google.common.collect.Lists; |
|
4 |
import java.io.IOException; |
|
5 |
import junit.framework.TestCase; |
|
6 |
import org.apache.pig.data.TupleFactory; |
|
7 |
import org.junit.Test; |
|
8 |
|
|
9 |
/** |
|
10 |
* |
|
11 |
* @author Dominika Tkaczyk |
|
12 |
*/ |
|
13 |
public class IntegerFirstNotEmptyTest extends TestCase { |
|
14 |
|
|
15 |
@Test |
|
16 |
public void testUDF() throws IOException { |
|
17 |
IntegerFirstNotEmpty udf = new IntegerFirstNotEmpty(); |
|
18 |
TupleFactory factory = TupleFactory.getInstance(); |
|
19 |
|
|
20 |
assertNull(udf.exec(null)); |
|
21 |
assertNull(udf.exec(factory.newTuple())); |
|
22 |
assertNull(udf.exec(factory.newTuple((Integer) null))); |
|
23 |
assertEquals(125, (int)udf.exec(factory.newTuple(Integer.valueOf(125)))); |
|
24 |
assertNull(udf.exec(factory.newTuple(Lists.newArrayList()))); |
|
25 |
assertNull(udf.exec(factory.newTuple(Lists.newArrayList(null, null)))); |
|
26 |
assertEquals(23, (int)udf.exec(factory.newTuple(Lists.newArrayList(23, null, 256, 90)))); |
|
27 |
assertEquals(256, (int)udf.exec(factory.newTuple(Lists.newArrayList(null, null, null, 256, 567)))); |
|
28 |
} |
|
29 |
|
|
30 |
} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/udfs/StringBagsMergerTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.udfs; |
|
2 |
|
|
3 |
import com.google.common.collect.Lists; |
|
4 |
import java.io.IOException; |
|
5 |
import junit.framework.TestCase; |
|
6 |
import org.apache.pig.data.BagFactory; |
|
7 |
import org.apache.pig.data.DataBag; |
|
8 |
import org.apache.pig.data.TupleFactory; |
|
9 |
import org.junit.Test; |
|
10 |
|
|
11 |
/** |
|
12 |
* |
|
13 |
* @author Dominika Tkaczyk |
|
14 |
*/ |
|
15 |
public class StringBagsMergerTest extends TestCase { |
|
16 |
|
|
17 |
@Test |
|
18 |
public void testUDF() throws IOException { |
|
19 |
StringBagsMerger udf = new StringBagsMerger(); |
|
20 |
TupleFactory tupleFactory = TupleFactory.getInstance(); |
|
21 |
BagFactory bagFactory = BagFactory.getInstance(); |
|
22 |
DataBag emptyBag = bagFactory.newDefaultBag(); |
|
23 |
DataBag bag1 = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup1"), |
|
24 |
tupleFactory.newTuple("tup2"))); |
|
25 |
DataBag bag2 = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup3"), |
|
26 |
tupleFactory.newTuple("tup4"))); |
|
27 |
DataBag bag3 = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup1"), |
|
28 |
tupleFactory.newTuple("tup4"), |
|
29 |
tupleFactory.newTuple("tup5"))); |
|
30 |
DataBag bag4 = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup1"), |
|
31 |
tupleFactory.newTuple("tup2"), |
|
32 |
tupleFactory.newTuple("tup3"), |
|
33 |
tupleFactory.newTuple("tup4"), |
|
34 |
tupleFactory.newTuple("tup5"))); |
|
35 |
|
|
36 |
assertNull(udf.exec(null)); |
|
37 |
assertNull(udf.exec(tupleFactory.newTuple())); |
|
38 |
assertNull(udf.exec(tupleFactory.newTuple((DataBag)null))); |
|
39 |
assertNull(udf.exec(tupleFactory.newTuple(emptyBag))); |
|
40 |
assertEquals(bag1, udf.exec(tupleFactory.newTuple(bag1))); |
|
41 |
assertEquals(bag4, udf.exec(tupleFactory.newTuple(Lists.newArrayList(bag1, emptyBag, bag2, bag3)))); |
|
42 |
} |
|
43 |
|
|
44 |
} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/udfs/EmptyBagToNullTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.udfs; |
|
2 |
|
|
3 |
import com.google.common.collect.Lists; |
|
4 |
import java.io.IOException; |
|
5 |
import junit.framework.TestCase; |
|
6 |
import org.apache.pig.data.BagFactory; |
|
7 |
import org.apache.pig.data.DataBag; |
|
8 |
import org.apache.pig.data.TupleFactory; |
|
9 |
import org.junit.Test; |
|
10 |
|
|
11 |
/** |
|
12 |
* |
|
13 |
* @author Dominika Tkaczyk |
|
14 |
*/ |
|
15 |
public class EmptyBagToNullTest extends TestCase { |
|
16 |
|
|
17 |
@Test |
|
18 |
public void testUDF() throws IOException { |
|
19 |
EmptyBagToNull udf = new EmptyBagToNull(); |
|
20 |
TupleFactory tupleFactory = TupleFactory.getInstance(); |
|
21 |
BagFactory bagFactory = BagFactory.getInstance(); |
|
22 |
|
|
23 |
DataBag emptyBag = bagFactory.newDefaultBag(); |
|
24 |
DataBag bag = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup1"), tupleFactory.newTuple())); |
|
25 |
|
|
26 |
assertNull(udf.exec(null)); |
|
27 |
assertNull(udf.exec(tupleFactory.newTuple())); |
|
28 |
assertNull(udf.exec(tupleFactory.newTuple((DataBag)null))); |
|
29 |
assertNull(udf.exec(tupleFactory.newTuple(emptyBag))); |
|
30 |
assertEquals(bag, udf.exec(tupleFactory.newTuple(bag))); |
|
31 |
assertNull(udf.exec(tupleFactory.newTuple(Lists.newArrayList(bag, emptyBag)))); |
|
32 |
} |
|
33 |
|
|
34 |
} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/udfs/NullToEmptyBagTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.udfs; |
|
2 |
|
|
3 |
import com.google.common.collect.Lists; |
|
4 |
import java.io.IOException; |
|
5 |
import junit.framework.TestCase; |
|
6 |
import org.apache.pig.data.BagFactory; |
|
7 |
import org.apache.pig.data.DataBag; |
|
8 |
import org.apache.pig.data.TupleFactory; |
|
9 |
import org.junit.Test; |
|
10 |
|
|
11 |
/** |
|
12 |
* |
|
13 |
* @author Dominika Tkaczyk |
|
14 |
*/ |
|
15 |
public class NullToEmptyBagTest extends TestCase { |
|
16 |
|
|
17 |
@Test |
|
18 |
public void testUDF() throws IOException { |
|
19 |
NullToEmptyBag udf = new NullToEmptyBag(); |
|
20 |
TupleFactory tupleFactory = TupleFactory.getInstance(); |
|
21 |
BagFactory bagFactory = BagFactory.getInstance(); |
|
22 |
DataBag emptyBag = bagFactory.newDefaultBag(); |
|
23 |
DataBag bag = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup1"), tupleFactory.newTuple())); |
|
24 |
|
|
25 |
assertNull(udf.exec(null)); |
|
26 |
assertNull(udf.exec(tupleFactory.newTuple())); |
|
27 |
assertEquals(emptyBag, udf.exec(tupleFactory.newTuple((DataBag)null))); |
|
28 |
assertEquals(emptyBag, udf.exec(tupleFactory.newTuple(emptyBag))); |
|
29 |
assertEquals(bag, udf.exec(tupleFactory.newTuple(bag))); |
|
30 |
assertNull(udf.exec(tupleFactory.newTuple(Lists.newArrayList(bag, emptyBag)))); |
|
31 |
} |
|
32 |
|
|
33 |
} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/udfs/NullTupleFieldsToNullTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.udfs; |
|
2 |
|
|
3 |
import com.google.common.collect.Lists; |
|
4 |
import java.io.IOException; |
|
5 |
import junit.framework.TestCase; |
|
6 |
import org.apache.pig.data.BagFactory; |
|
7 |
import org.apache.pig.data.DataBag; |
|
8 |
import org.apache.pig.data.Tuple; |
|
9 |
import org.apache.pig.data.TupleFactory; |
|
10 |
import org.junit.Test; |
|
11 |
|
|
12 |
/** |
|
13 |
* |
|
14 |
* @author Dominika Tkaczyk |
|
15 |
*/ |
|
16 |
public class NullTupleFieldsToNullTest extends TestCase { |
|
17 |
|
|
18 |
@Test |
|
19 |
public void testUDF() throws IOException { |
|
20 |
NullTupleFieldsToNull udf = new NullTupleFieldsToNull(); |
|
21 |
TupleFactory tupleFactory = TupleFactory.getInstance(); |
|
22 |
BagFactory bagFactory = BagFactory.getInstance(); |
|
23 |
|
|
24 |
DataBag emptyBag = bagFactory.newDefaultBag(); |
|
25 |
DataBag bag = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup1"), null)); |
|
26 |
Tuple nullTuple = tupleFactory.newTuple(Lists.newArrayList(null, null, null)); |
|
27 |
Tuple tuple = tupleFactory.newTuple(Lists.newArrayList(null, null, "tup1")); |
|
28 |
|
|
29 |
assertNull(udf.exec(null)); |
|
30 |
assertNull(udf.exec(tupleFactory.newTuple())); |
|
31 |
assertNull(udf.exec(tupleFactory.newTuple((Tuple)null))); |
|
32 |
assertNull(udf.exec(tupleFactory.newTuple(tupleFactory.newTuple()))); |
|
33 |
assertNull(udf.exec(tupleFactory.newTuple(nullTuple))); |
|
34 |
assertEquals(tuple, udf.exec(tupleFactory.newTuple(tuple))); |
|
35 |
assertNull(udf.exec(tupleFactory.newTuple(Lists.newArrayList(bag, emptyBag)))); |
|
36 |
} |
|
37 |
|
|
38 |
} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/udfs/StringMapsMergerTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.udfs; |
|
2 |
|
|
3 |
import com.google.common.collect.Lists; |
|
4 |
|
|
5 |
import java.io.IOException; |
|
6 |
import java.util.HashMap; |
|
7 |
import java.util.Map; |
|
8 |
|
|
9 |
import junit.framework.TestCase; |
|
10 |
|
|
11 |
import org.apache.pig.data.TupleFactory; |
|
12 |
import org.junit.Test; |
|
13 |
|
|
14 |
/** |
|
15 |
* |
|
16 |
* @author Dominika Tkaczyk |
|
17 |
*/ |
|
18 |
public class StringMapsMergerTest extends TestCase { |
|
19 |
|
|
20 |
@SuppressWarnings({ "unchecked", "rawtypes" }) |
|
21 |
@Test |
|
22 |
public void testUDF() throws IOException { |
|
23 |
StringMapsMerger udf = new StringMapsMerger(); |
|
24 |
TupleFactory tupleFactory = TupleFactory.getInstance(); |
|
25 |
Map<String, String> emptyMap = new HashMap<String, String>(); |
|
26 |
Map<String, String> map1 = new HashMap<String, String>(); |
|
27 |
map1.put("12", "12"); |
|
28 |
map1.put("3", "bag"); |
|
29 |
map1.put("1", ""); |
|
30 |
Map<String, String> map2 = new HashMap<String, String>(); |
|
31 |
map2.put("12", "null"); |
|
32 |
map2.put("3", "data"); |
|
33 |
map2.put("35", "empty"); |
|
34 |
Map<String, String> map3 = new HashMap<String, String>(); |
|
35 |
map3.put("1", "notempty"); |
|
36 |
Map<String, String> map4 = new HashMap<String, String>(); |
|
37 |
map4.put("1", ""); |
|
38 |
map4.put("3", "bag"); |
|
39 |
map4.put("12", "12"); |
|
40 |
map4.put("35", "empty"); |
|
41 |
|
|
42 |
assertNull(udf.exec(null)); |
|
43 |
assertNull(udf.exec(tupleFactory.newTuple())); |
|
44 |
assertNull(udf.exec(tupleFactory.newTuple((Map)null))); |
|
45 |
assertNull(udf.exec(tupleFactory.newTuple(emptyMap))); |
|
46 |
assertEquals(map1, udf.exec(tupleFactory.newTuple(map1))); |
|
47 |
assertEquals(map4, udf.exec(tupleFactory.newTuple(Lists.newArrayList(map1, emptyMap, map2, null, map3)))); |
|
48 |
} |
|
49 |
|
|
50 |
} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/udfs/StringBagsDifferenceTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.udfs; |
|
2 |
|
|
3 |
import com.google.common.collect.Lists; |
|
4 |
import java.io.IOException; |
|
5 |
import junit.framework.TestCase; |
|
6 |
import org.apache.pig.data.BagFactory; |
|
7 |
import org.apache.pig.data.DataBag; |
|
8 |
import org.apache.pig.data.TupleFactory; |
|
9 |
import org.junit.Test; |
|
10 |
|
|
11 |
/** |
|
12 |
* |
|
13 |
* @author Dominika Tkaczyk |
|
14 |
*/ |
|
15 |
public class StringBagsDifferenceTest extends TestCase { |
|
16 |
|
|
17 |
@Test |
|
18 |
public void testUDF() throws IOException { |
|
19 |
StringBagsDifference udf = new StringBagsDifference(); |
|
20 |
TupleFactory tupleFactory = TupleFactory.getInstance(); |
|
21 |
BagFactory bagFactory = BagFactory.getInstance(); |
|
22 |
DataBag nullBag = null; |
|
23 |
DataBag emptyBag = bagFactory.newDefaultBag(); |
|
24 |
DataBag bag1 = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup1"), |
|
25 |
tupleFactory.newTuple("tup2"))); |
|
26 |
DataBag bag2 = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup3"), |
|
27 |
tupleFactory.newTuple("tup4"))); |
|
28 |
DataBag bag3 = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup1"), |
|
29 |
tupleFactory.newTuple("tup4"), |
|
30 |
tupleFactory.newTuple("tup5"))); |
|
31 |
DataBag bag4 = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup1"), |
|
32 |
tupleFactory.newTuple("tup5"))); |
|
33 |
|
|
34 |
assertNull(udf.exec(null)); |
|
35 |
assertNull(udf.exec(tupleFactory.newTuple())); |
|
36 |
assertNull(udf.exec(tupleFactory.newTuple(nullBag))); |
|
37 |
assertNull(udf.exec(tupleFactory.newTuple(emptyBag))); |
|
38 |
assertNull(udf.exec(tupleFactory.newTuple(Lists.newArrayList(bag1, emptyBag, bag2, bag3)))); |
|
39 |
assertNull(udf.exec(tupleFactory.newTuple(Lists.newArrayList(nullBag, bag3, bag2)))); |
|
40 |
assertEquals(bag1, udf.exec(tupleFactory.newTuple(Lists.newArrayList(bag1, bag2)))); |
|
41 |
assertEquals(bag4, udf.exec(tupleFactory.newTuple(Lists.newArrayList(bag3, bag2)))); |
|
42 |
assertNull(udf.exec(tupleFactory.newTuple(Lists.newArrayList(bag4, bag3)))); |
|
43 |
} |
|
44 |
|
|
45 |
} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/udfs/StringFirstNotEmptyTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.udfs; |
|
2 |
|
|
3 |
import com.google.common.collect.Lists; |
|
4 |
import java.io.IOException; |
|
5 |
import junit.framework.TestCase; |
|
6 |
import org.apache.pig.data.TupleFactory; |
|
7 |
import org.junit.Test; |
|
8 |
|
|
9 |
/** |
|
10 |
* |
|
11 |
* @author Dominika Tkaczyk |
|
12 |
*/ |
|
13 |
public class StringFirstNotEmptyTest extends TestCase { |
|
14 |
|
|
15 |
@Test |
|
16 |
public void testUDF() throws IOException { |
|
17 |
StringFirstNotEmpty udf = new StringFirstNotEmpty(); |
|
18 |
TupleFactory factory = TupleFactory.getInstance(); |
|
19 |
|
|
20 |
assertNull(udf.exec(null)); |
|
21 |
assertNull(udf.exec(factory.newTuple())); |
|
22 |
assertNull(udf.exec(factory.newTuple((String) null))); |
|
23 |
assertEquals("tup", udf.exec(factory.newTuple("tup"))); |
|
24 |
assertNull(udf.exec(factory.newTuple(Lists.newArrayList()))); |
|
25 |
assertNull(udf.exec(factory.newTuple(Lists.newArrayList(null, null)))); |
|
26 |
assertEquals("val1", udf.exec(factory.newTuple(Lists.newArrayList("val1", null, "256", "90")))); |
|
27 |
assertEquals("k256", udf.exec(factory.newTuple(Lists.newArrayList(null, null, null, "k256", "567")))); |
|
28 |
assertEquals("k256", udf.exec(factory.newTuple(Lists.newArrayList(null, "", null, "k256", "567")))); |
|
29 |
} |
|
30 |
|
|
31 |
} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/metadatamerger/WorkflowTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.metadatamerger; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.IntegrationTest; |
|
4 |
import eu.dnetlib.iis.core.AbstractWorkflowTestCase; |
|
5 |
import org.junit.Test; |
|
6 |
import org.junit.experimental.categories.Category; |
|
7 |
|
|
8 |
/** |
|
9 |
* |
|
10 |
* @author Dominika Tkaczyk |
|
11 |
* |
|
12 |
*/ |
|
13 |
@Category(IntegrationTest.class) |
|
14 |
public class WorkflowTest extends AbstractWorkflowTestCase { |
|
15 |
|
|
16 |
@Test |
|
17 |
public void testJoin() throws Exception { |
|
18 |
runWorkflow("eu/dnetlib/iis/transformers/metadatamerger/sampledataproducer/oozie_app"); |
|
19 |
} |
|
20 |
|
|
21 |
} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/citationmatching/WorkflowTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.citationmatching; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.IntegrationTest; |
|
4 |
import eu.dnetlib.iis.core.AbstractWorkflowTestCase; |
|
5 |
import org.junit.Test; |
|
6 |
import org.junit.experimental.categories.Category; |
|
7 |
|
|
8 |
/** |
|
9 |
* |
|
10 |
* @author Dominika Tkaczyk |
|
11 |
* |
|
12 |
*/ |
|
13 |
@Category(IntegrationTest.class) |
|
14 |
public class WorkflowTest extends AbstractWorkflowTestCase { |
|
15 |
|
|
16 |
@Test |
|
17 |
public void testJoin() throws Exception { |
|
18 |
runWorkflow("eu/dnetlib/iis/transformers/citationmatching/sampledataproducer/oozie_app"); |
|
19 |
} |
|
20 |
|
|
21 |
} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/common/existencefilter/sampledataproducer/WorkflowTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.common.existencefilter.sampledataproducer; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.IntegrationTest; |
|
4 |
import eu.dnetlib.iis.core.AbstractWorkflowTestCase; |
|
5 |
import eu.dnetlib.iis.core.WorkflowConfiguration; |
|
6 |
import org.junit.Test; |
|
7 |
import org.junit.experimental.categories.Category; |
|
8 |
|
|
9 |
/** |
|
10 |
* |
|
11 |
* @author Mateusz Fedoryszak |
|
12 |
* |
|
13 |
*/ |
|
14 |
@Category(IntegrationTest.class) |
|
15 |
public class WorkflowTest extends AbstractWorkflowTestCase { |
|
16 |
|
|
17 |
@Test |
|
18 |
public void testWorkflow() throws Exception { |
|
19 |
WorkflowConfiguration wf = new WorkflowConfiguration(); |
|
20 |
wf.setTimeoutInSeconds(720); |
|
21 |
runWorkflow("eu/dnetlib/iis/transformers/common/existencefilter/sampledataproducer/oozie_app", wf); |
|
22 |
} |
|
23 |
|
|
24 |
} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/documentssimilarity/WorkflowTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.documentssimilarity; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.IntegrationTest; |
|
4 |
import eu.dnetlib.iis.core.AbstractWorkflowTestCase; |
|
5 |
import eu.dnetlib.iis.core.WorkflowConfiguration; |
|
6 |
import org.junit.Test; |
|
7 |
import org.junit.experimental.categories.Category; |
|
8 |
|
|
9 |
/** |
|
10 |
* |
|
11 |
* @author Michal Oniszczuk (m.oniszczuk@icm.edu.pl) |
|
12 |
* |
|
13 |
*/ |
|
14 |
@Category(IntegrationTest.class) |
|
15 |
public class WorkflowTest extends AbstractWorkflowTestCase { |
|
16 |
|
|
17 |
@Test |
|
18 |
public void testWorkflow() throws Exception { |
|
19 |
WorkflowConfiguration wf = new WorkflowConfiguration(); |
|
20 |
wf.setTimeoutInSeconds(720); |
|
21 |
runWorkflow("eu/dnetlib/iis/transformers/documentssimilarity/sampledataproducer/oozie_app", wf); |
|
22 |
} |
|
23 |
|
|
24 |
} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/ingest/pmc/metadata/WorkflowTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.ingest.pmc.metadata; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.IntegrationTest; |
|
4 |
import eu.dnetlib.iis.core.AbstractWorkflowTestCase; |
|
5 |
import eu.dnetlib.iis.core.WorkflowConfiguration; |
|
6 |
import org.junit.Test; |
|
7 |
import org.junit.experimental.categories.Category; |
|
8 |
|
|
9 |
/** |
|
10 |
* |
|
11 |
* @author mhorst |
|
12 |
* |
|
13 |
*/ |
|
14 |
@Category(IntegrationTest.class) |
|
15 |
public class WorkflowTest extends AbstractWorkflowTestCase { |
|
16 |
|
|
17 |
@Test |
|
18 |
public void testWorkflow() throws Exception { |
|
19 |
WorkflowConfiguration wf = new WorkflowConfiguration(); |
|
20 |
wf.setTimeoutInSeconds(720); |
|
21 |
runWorkflow("eu/dnetlib/iis/transformers/ingest/pmc/metadata/sampledataproducer/oozie_app", wf); |
|
22 |
} |
|
23 |
|
|
24 |
} |
|
0 | 25 |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/documentssimilarity_with_fulltext/WorkflowTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.documentssimilarity_with_fulltext; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.IntegrationTest; |
|
4 |
import eu.dnetlib.iis.core.AbstractWorkflowTestCase; |
|
5 |
import eu.dnetlib.iis.core.WorkflowConfiguration; |
|
6 |
import org.junit.Test; |
|
7 |
import org.junit.experimental.categories.Category; |
|
8 |
|
|
9 |
/** |
|
10 |
* |
|
11 |
* @author Michal Oniszczuk (m.oniszczuk@icm.edu.pl) |
|
12 |
* |
|
13 |
*/ |
|
14 |
@Category(IntegrationTest.class) |
|
15 |
public class WorkflowTest extends AbstractWorkflowTestCase { |
|
16 |
|
|
17 |
@Test |
|
18 |
public void testWorkflow() throws Exception { |
|
19 |
WorkflowConfiguration wf = new WorkflowConfiguration(); |
|
20 |
wf.setTimeoutInSeconds(720); |
|
21 |
runWorkflow("eu/dnetlib/iis/transformers/documentssimilarity_with_fulltext/sampledataproducer/oozie_app", wf); |
|
22 |
} |
|
23 |
|
|
24 |
} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/idreplacer/WorkflowTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.idreplacer; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.IntegrationTest; |
|
4 |
import eu.dnetlib.iis.core.AbstractWorkflowTestCase; |
|
5 |
import org.junit.Test; |
|
6 |
import org.junit.experimental.categories.Category; |
|
7 |
|
|
8 |
/** |
|
9 |
* |
|
10 |
* @author Dominika Tkaczyk |
|
11 |
* @author Michal Oniszczuk |
|
12 |
* |
|
13 |
*/ |
|
14 |
@Category(IntegrationTest.class) |
|
15 |
public class WorkflowTest extends AbstractWorkflowTestCase { |
|
16 |
|
|
17 |
@Test |
|
18 |
public void testReplacer1Field() throws Exception { |
|
19 |
runWorkflow("eu/dnetlib/iis/transformers/idreplacer/replacer_1_field/oozie_app"); |
|
20 |
} |
|
21 |
|
|
22 |
@Test |
|
23 |
public void testReplacer2Fields() throws Exception { |
|
24 |
runWorkflow("eu/dnetlib/iis/transformers/idreplacer/replacer_2_fields/oozie_app"); |
|
25 |
} |
|
26 |
|
|
27 |
} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/metricsprimary/WorkflowTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.metricsprimary; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.IntegrationTest; |
|
4 |
import eu.dnetlib.iis.core.AbstractWorkflowTestCase; |
|
5 |
import org.junit.Test; |
|
6 |
import org.junit.experimental.categories.Category; |
|
7 |
|
|
8 |
/** |
|
9 |
* |
|
10 |
* @author Dominika Tkaczyk |
|
11 |
* |
|
12 |
*/ |
|
13 |
@Category(IntegrationTest.class) |
|
14 |
public class WorkflowTest extends AbstractWorkflowTestCase { |
|
15 |
|
|
16 |
@Test |
|
17 |
public void testWorkflow() throws Exception { |
|
18 |
runWorkflow("eu/dnetlib/iis/transformers/metricsprimary/sampledataproducer/oozie_app"); |
|
19 |
} |
|
20 |
|
|
21 |
} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/statistics/WorkflowTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.statistics; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.IntegrationTest; |
|
4 |
import eu.dnetlib.iis.core.AbstractWorkflowTestCase; |
|
5 |
import org.junit.Test; |
|
6 |
import org.junit.experimental.categories.Category; |
|
7 |
|
|
8 |
/** |
|
9 |
* |
|
10 |
* @author Dominika Tkaczyk |
|
11 |
* |
|
12 |
*/ |
|
13 |
@Category(IntegrationTest.class) |
|
14 |
public class WorkflowTest extends AbstractWorkflowTestCase { |
|
15 |
|
|
16 |
@Test |
|
17 |
public void testWorkflow() throws Exception { |
|
18 |
runWorkflow("eu/dnetlib/iis/transformers/statistics/sampledataproducer/oozie_app"); |
|
19 |
} |
|
20 |
|
|
21 |
} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/documentsclassification/WorkflowTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.documentsclassification; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.IntegrationTest; |
|
4 |
import eu.dnetlib.iis.core.AbstractWorkflowTestCase; |
|
5 |
import org.junit.Test; |
|
6 |
import org.junit.experimental.categories.Category; |
|
7 |
|
|
8 |
/** |
|
9 |
* |
|
10 |
* @author Dominika Tkaczyk |
|
11 |
* |
|
12 |
*/ |
|
13 |
@Category(IntegrationTest.class) |
|
14 |
public class WorkflowTest extends AbstractWorkflowTestCase { |
|
15 |
|
|
16 |
@Test |
|
17 |
public void testWorkflow() throws Exception { |
|
18 |
runWorkflow("eu/dnetlib/iis/transformers/documentsclassification/sampledataproducer/oozie_app"); |
|
19 |
} |
|
20 |
|
|
21 |
} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/oozie_app/import.txt | ||
---|---|---|
1 |
## This is a classpath-based import file (this header is required) |
|
2 |
transformer_project_toconcept classpath eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/oozie_app |
|
0 | 3 |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/oozie_app/workflow.xml | ||
---|---|---|
1 |
<workflow-app xmlns="uri:oozie:workflow:0.4" name="test-transformers_referenceextraction_project_toconcept"> |
|
2 |
|
|
3 |
<start to="producer"/> |
|
4 |
<action name="producer"> |
|
5 |
<java> |
|
6 |
<job-tracker>${jobTracker}</job-tracker> |
|
7 |
<name-node>${nameNode}</name-node> |
|
8 |
<!-- The data generated by this node is deleted in this section --> |
|
9 |
<prepare> |
|
10 |
<delete path="${nameNode}${workingDir}/producer" /> |
|
11 |
<mkdir path="${nameNode}${workingDir}/producer" /> |
|
12 |
</prepare> |
|
13 |
<configuration> |
|
14 |
<property> |
|
15 |
<name>mapred.job.queue.name</name> |
|
16 |
<value>${queueName}</value> |
|
17 |
</property> |
|
18 |
</configuration> |
|
19 |
<!-- This is simple wrapper for the Java code --> |
|
20 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
21 |
<!-- The business Java code that gets to be executed --> |
|
22 |
<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg> |
|
23 |
<!-- Specification of the output ports --> |
|
24 |
<arg>-C{document_to_project, |
|
25 |
eu.dnetlib.iis.referenceextraction.project.schemas.DocumentToProject, |
|
26 |
eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/data/document_to_project.json}</arg> |
|
27 |
<arg>-C{project, |
|
28 |
eu.dnetlib.iis.importer.schemas.Project, |
|
29 |
eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/data/project.json}</arg> |
|
30 |
<arg>-C{concept, |
|
31 |
eu.dnetlib.iis.importer.schemas.Concept, |
|
32 |
eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/data/concept.json}</arg> |
|
33 |
<!-- All input and output ports have to be bound to paths in HDFS, working |
|
34 |
directory has to be specified as well --> |
|
35 |
<arg>-SworkingDir=${workingDir}/producer/working_dir</arg> |
|
36 |
<arg>-Odocument_to_project=${workingDir}/producer/document_to_project</arg> |
|
37 |
<arg>-Oproject=${workingDir}/producer/project</arg> |
|
38 |
<arg>-Oconcept=${workingDir}/producer/concept</arg> |
|
39 |
</java> |
|
40 |
<ok to="transformer_project_toconcept"/> |
|
41 |
<error to="fail"/> |
|
42 |
</action> |
|
43 |
<action name="transformer_project_toconcept"> |
|
44 |
<sub-workflow> |
|
45 |
<app-path>${wf:appPath()}/transformer_project_toconcept</app-path> |
|
46 |
<configuration> |
|
47 |
<property> |
|
48 |
<name>jobTracker</name> |
|
49 |
<value>${jobTracker}</value> |
|
50 |
</property> |
|
51 |
<property> |
|
52 |
<name>nameNode</name> |
|
53 |
<value>${nameNode}</value> |
|
54 |
</property> |
|
55 |
<property> |
|
56 |
<name>queueName</name> |
|
57 |
<value>${queueName}</value> |
|
58 |
</property> |
|
59 |
<!-- Working directory of the subworkflow --> |
|
60 |
<property> |
|
61 |
<name>workingDir</name> |
|
62 |
<value>${workingDir}/transformer_project_toconcept/working_dir</value> |
|
63 |
</property> |
|
64 |
<property> |
|
65 |
<name>grant_id_param_name</name> |
|
66 |
<value>CD_PROJECT_NUMBER</value> |
|
67 |
</property> |
|
68 |
<!-- Input ports. --> |
|
69 |
<property> |
|
70 |
<name>input_document_to_project</name> |
|
71 |
<value>${workingDir}/producer/document_to_project</value> |
|
72 |
</property> |
|
73 |
<property> |
|
74 |
<name>input_project</name> |
|
75 |
<value>${workingDir}/producer/project</value> |
|
76 |
</property> |
|
77 |
<property> |
|
78 |
<name>input_concept</name> |
|
79 |
<value>${workingDir}/producer/concept</value> |
|
80 |
</property> |
|
81 |
<!-- Output port bound to given path --> |
|
82 |
<property> |
|
83 |
<name>output</name> |
|
84 |
<value>${workingDir}/transformer_project_toconcept/output</value> |
|
85 |
</property> |
|
86 |
</configuration> |
|
87 |
</sub-workflow> |
|
88 |
<ok to="consumer"/> |
|
89 |
<error to="fail"/> |
|
90 |
</action> |
|
91 |
<action name="consumer"> |
|
92 |
<java> |
|
93 |
<job-tracker>${jobTracker}</job-tracker> |
|
94 |
<name-node>${nameNode}</name-node> |
|
95 |
<configuration> |
|
96 |
<property> |
|
97 |
<name>mapred.job.queue.name</name> |
|
98 |
<value>${queueName}</value> |
|
99 |
</property> |
|
100 |
</configuration> |
|
101 |
<!-- This is simple wrapper for the Java code --> |
|
102 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
103 |
<!-- The business Java code that gets to be executed --> |
|
104 |
<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.TestingConsumer</arg> |
|
105 |
<!-- Specification of the input ports --> |
|
106 |
<arg>-C{output, |
|
107 |
eu.dnetlib.iis.referenceextraction.researchinitiative.schemas.DocumentToConceptId, |
|
108 |
eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/data/output.json}</arg> |
|
109 |
<!-- All input and output ports have to be bound to paths in HDFS, working |
|
110 |
directory has to be specified as well --> |
|
111 |
<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg> |
|
112 |
<arg>-Ioutput=${workingDir}/transformer_project_toconcept/output</arg> |
|
113 |
</java> |
|
114 |
<ok to="end" /> |
|
115 |
<error to="fail" /> |
|
116 |
</action> |
|
117 |
<kill name="fail"> |
|
118 |
<message>Unfortunately, the workflow failed -- error message: |
|
119 |
[${wf:errorMessage(wf:lastErrorNode())}]</message> |
|
120 |
</kill> |
|
121 |
<end name="end"/> |
|
122 |
|
|
123 |
</workflow-app> |
|
0 | 124 |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/data/concept.json | ||
---|---|---|
1 |
{"id": "concept-id1", "label": "concept id1", "params": {"CD_PROJECT_NUMBER": "123456", "CD_FRAMEWORK": "FP7"}} |
|
2 |
{"id": "concept-id2", "label": "concept id2", "params": {"CD_PROJECT_NUMBER": "654321", "CD_FRAMEWORK": "FP7"}} |
|
3 |
{"id": "concept-id3", "label": "concept id3", "params": {"CD_PROJECT_NUMBER": "999999", "CD_FRAMEWORK": "FP7"}} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/data/output.json | ||
---|---|---|
1 |
{"documentId": "docId1", "conceptId": "concept-id1", "confidenceLevel": 0.5} |
|
2 |
{"documentId": "docId1", "conceptId": "concept-id2", "confidenceLevel": 0.1} |
|
3 |
{"documentId": "docId2", "conceptId": "concept-id3", "confidenceLevel": 0.3} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/data/document_to_project.json | ||
---|---|---|
1 |
{"documentId": "docId1", "projectId": "projId1", "confidenceLevel": 0.5} |
|
2 |
{"documentId": "docId1", "projectId": "projId2", "confidenceLevel": 0.1} |
|
3 |
{"documentId": "docId2", "projectId": "projId3", "confidenceLevel": 0.3} |
|
4 |
{"documentId": "docId2", "projectId": "nonexistingProjId", "confidenceLevel": 0.1} |
|
5 |
{"documentId": "docId2", "projectId": "noFetProjId", "confidenceLevel": 0.1} |
|
6 |
{"documentId": "docId3", "projectId": "nonexistingProjId", "confidenceLevel": 0.1} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/data/project.json | ||
---|---|---|
1 |
{"id":"projId1","projectAcronym":"project 1","projectGrantId":"123456","fundingClass":"FP7"} |
|
2 |
{"id":"projId2","projectAcronym":"project 2","projectGrantId":"654321","fundingClass":"FP7"} |
|
3 |
{"id":"projId3","projectAcronym":"project 3","projectGrantId":"999999","fundingClass":"FP7"} |
|
4 |
{"id":"noFetProjId","projectAcronym":"non fet project","projectGrantId":"000000","fundingClass":"FP7"} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/documentmetadata/sampledataproducer/oozie_app/import.txt | ||
---|---|---|
1 |
## This is a classpath-based import file (this header is required) |
|
2 |
transformer_export_documentmetadata classpath eu/dnetlib/iis/transformers/export/documentmetadata/oozie_app |
|
0 | 3 |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/documentmetadata/sampledataproducer/oozie_app/workflow.xml | ||
---|---|---|
1 |
<workflow-app xmlns="uri:oozie:workflow:0.2" name="test-transformers_export_documentmetadata_sampledataproducer"> |
|
2 |
<start to="producer"/> |
|
3 |
<action name="producer"> |
|
4 |
<java> |
|
5 |
<job-tracker>${jobTracker}</job-tracker> |
|
6 |
<name-node>${nameNode}</name-node> |
|
7 |
<!-- The data generated by this node is deleted in this section --> |
|
8 |
<prepare> |
|
9 |
<delete path="${nameNode}${workingDir}/producer" /> |
|
10 |
<mkdir path="${nameNode}${workingDir}/producer" /> |
|
11 |
</prepare> |
|
12 |
<configuration> |
|
13 |
<property> |
|
14 |
<name>mapred.job.queue.name</name> |
|
15 |
<value>${queueName}</value> |
|
16 |
</property> |
|
17 |
</configuration> |
|
18 |
<!-- This is simple wrapper for the Java code --> |
|
19 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
20 |
<!-- The business Java code that gets to be executed --> |
|
21 |
<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg> |
|
22 |
<!-- Specification of the output ports --> |
|
23 |
<arg>-C{extracted_metadata, |
|
24 |
eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata, |
|
25 |
eu/dnetlib/iis/transformers/export/documentmetadata/sampledataproducer/data/extr_metadata.json}</arg> |
|
26 |
<!-- All input and output ports have to be bound to paths in HDFS, working |
|
27 |
directory has to be specified as well --> |
|
28 |
<arg>-SworkingDir=${workingDir}/producer/working_dir</arg> |
|
29 |
<arg>-Oextracted_metadata=${workingDir}/producer/extr_metadata</arg> |
|
30 |
</java> |
|
31 |
<ok to="transformer_export_documentmetadata"/> |
|
32 |
<error to="fail"/> |
|
33 |
</action> |
|
34 |
<action name="transformer_export_documentmetadata"> |
|
35 |
<sub-workflow> |
|
36 |
<app-path>${wf:appPath()}/transformer_export_documentmetadata</app-path> |
|
37 |
<configuration> |
|
38 |
<property> |
|
39 |
<name>jobTracker</name> |
|
40 |
<value>${jobTracker}</value> |
|
41 |
</property> |
|
42 |
<property> |
|
43 |
<name>nameNode</name> |
|
44 |
<value>${nameNode}</value> |
|
45 |
</property> |
|
46 |
<property> |
|
47 |
<name>queueName</name> |
|
48 |
<value>${queueName}</value> |
|
49 |
</property> |
|
50 |
<!-- Working directory of the subworkflow --> |
|
51 |
<property> |
|
52 |
<name>workingDir</name> |
|
53 |
<value>${workingDir}/transformer_export_documentmetadata/working_dir</value> |
|
54 |
</property> |
|
55 |
<!-- Input ports. --> |
|
56 |
<property> |
|
57 |
<name>input_extracted_metadata</name> |
|
58 |
<value>${workingDir}/producer/extr_metadata</value> |
|
59 |
</property> |
|
60 |
<!-- Output port bound to given path --> |
|
61 |
<property> |
|
62 |
<name>output_metadata</name> |
|
63 |
<value>${workingDir}/transformer_export_documentmetadata/output_metadata</value> |
|
64 |
</property> |
|
65 |
</configuration> |
|
66 |
</sub-workflow> |
|
67 |
<ok to="consumer"/> |
|
68 |
<error to="fail"/> |
|
69 |
</action> |
|
70 |
<action name="consumer"> |
|
71 |
<java> |
|
72 |
<job-tracker>${jobTracker}</job-tracker> |
|
73 |
<name-node>${nameNode}</name-node> |
|
74 |
<configuration> |
|
75 |
<property> |
|
76 |
<name>mapred.job.queue.name</name> |
|
77 |
<value>${queueName}</value> |
|
78 |
</property> |
|
79 |
</configuration> |
|
80 |
<!-- This is simple wrapper for the Java code --> |
|
81 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
82 |
<!-- The business Java code that gets to be executed --> |
|
83 |
<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.TestingConsumer</arg> |
|
84 |
<!-- Specification of the input ports --> |
|
85 |
<arg>-C{output_metadata, |
|
86 |
eu.dnetlib.iis.export.schemas.DocumentMetadata, |
|
87 |
eu/dnetlib/iis/transformers/export/documentmetadata/sampledataproducer/data/output_metadata.json}</arg> |
|
88 |
<!-- All input and output ports have to be bound to paths in HDFS, working |
|
89 |
directory has to be specified as well --> |
|
90 |
<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg> |
|
91 |
<arg>-Ioutput_metadata=${workingDir}/transformer_export_documentmetadata/output_metadata</arg> |
|
92 |
</java> |
|
93 |
<ok to="end" /> |
|
94 |
<error to="fail" /> |
|
95 |
</action> |
|
96 |
<kill name="fail"> |
|
97 |
<message>Unfortunately, the workflow failed -- error message: |
|
98 |
[${wf:errorMessage(wf:lastErrorNode())}]</message> |
|
99 |
</kill> |
|
100 |
<end name="end"/> |
|
101 |
</workflow-app> |
|
0 | 102 |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/documentmetadata/sampledataproducer/data/output_metadata.json | ||
---|---|---|
1 |
{"id": "id-2", "affiliations": []} |
|
2 |
{"id": "id-3", "affiliations": [{"organization": "some-org", "countryName": "pl", "countryCode": "PL", "address": null, "rawText": "some raw text"}]} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/documentmetadata/sampledataproducer/data/extr_metadata.json | ||
---|---|---|
1 |
{"publisher": "Tor Science Fiction", "affiliations": null, "authors": [{"authorFullName": "author-1", "affiliationPositions": null}, {"authorFullName": "author-2", "affiliationPositions": null}, {"authorFullName": "author-3", "affiliationPositions": null}], "language": "eng", "title": "Enders Game", "externalIdentifiers": null, "journal": "Journal-2", "id": "id-1", "pages": {"start": "123", "end": "128"}, "volume": null, "references": null, "year": null, "keywords": null, "issue": null, "abstract": null} |
|
2 |
{"publisher": null, "affiliations": [], "authors": [{"authorFullName": "author-1", "affiliationPositions": null}, {"authorFullName": "author-2", "affiliationPositions": null}], "language": null, "title": null, "externalIdentifiers": {"id-1": "val-extr-1", "id-3": "val-extr-3"}, "journal": null, "id": "id-2", "pages": null, "volume": "124", "references": [{"position": 1, "basicMetadata": {"publisher": null, "title": "The Lord of the Rings", "url": null, "series": null, "authors": null, "volume": null, "edition": null, "source": null, "year": "2012", "issue": null, "pages": {"start": "1", "end": "236"}, "location": null}, "text": "J.R.R. Tolkien, The Lord of the Rings, 2012"}, {"position": 2, "basicMetadata": {"publisher": null, "title": "The Other Wind", "url": null, "series": null, "authors": null, "volume": "vol.23", "edition": null, "source": null, "year": "2003", "issue": null, "pages": null, "location": null}, "text": "Ursula K. Le Guin, The Other Wind, 2003"}], "year": 1970, "keywords": ["kwd_1", "kwd_3", "kwd_5"], "issue": null, "abstract": "The tales"} |
|
3 |
{"publisher": "Harp3r T0rch", "affiliations": [{"organization": "some-org", "countryName": "pl", "countryCode": "PL", "address": null, "rawText": "some raw text"}], "authors": null, "language": "en", "title": "Small Gods", "externalIdentifiers": null, "journal": "Journal", "id": "id-3", "pages": null, "volume": "32", "references": [{"position": 1, "basicMetadata": {"publisher": null, "title": "The Lord of the Rings", "url": null, "series": null, "authors": null, "volume": null, "edition": null, "source": null, "year": "2012", "issue": null, "pages": {"start": "1", "end": "236"}, "location": null}, "text": "J.R.R. Tolkien, The Lord of the Rings, 2012"}], "year": null, "keywords": null, "issue": "4", "abstract": null} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/researchinitiatives/sampledataproducer/oozie_app/import.txt | ||
---|---|---|
1 |
## This is a classpath-based import file (this header is required) |
|
2 |
transformer_export_researchinitiatives classpath eu/dnetlib/iis/transformers/export/researchinitiatives/oozie_app |
|
0 | 3 |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/researchinitiatives/sampledataproducer/oozie_app/workflow.xml | ||
---|---|---|
1 |
<workflow-app xmlns="uri:oozie:workflow:0.2" name="test-transformers_export_researchinitiatives_sampledataproducer"> |
|
2 |
|
|
3 |
<start to="producer"/> |
|
4 |
|
|
5 |
<action name="producer"> |
|
6 |
<java> |
|
7 |
<job-tracker>${jobTracker}</job-tracker> |
|
8 |
<name-node>${nameNode}</name-node> |
|
9 |
<!-- The data generated by this node is deleted in this section --> |
|
10 |
<prepare> |
|
11 |
<delete path="${nameNode}${workingDir}/producer" /> |
|
12 |
<mkdir path="${nameNode}${workingDir}/producer" /> |
|
13 |
</prepare> |
|
14 |
<configuration> |
|
15 |
<property> |
|
16 |
<name>mapred.job.queue.name</name> |
|
17 |
<value>${queueName}</value> |
|
18 |
</property> |
|
19 |
</configuration> |
|
20 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
21 |
<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg> |
|
22 |
<arg>-C{document_to_concept_id, |
|
23 |
eu.dnetlib.iis.referenceextraction.researchinitiative.schemas.DocumentToConceptId, |
|
24 |
eu/dnetlib/iis/transformers/export/researchinitiatives/sampledataproducer/data/document_to_concept_id.json}</arg> |
|
25 |
<arg>-SworkingDir=${workingDir}/producer/working_dir</arg> |
|
26 |
<arg>-Odocument_to_concept_id=${workingDir}/producer/output</arg> |
|
27 |
</java> |
|
28 |
<ok to="transformer_export_researchinitiatives"/> |
|
29 |
<error to="fail"/> |
|
30 |
</action> |
|
31 |
|
|
32 |
<action name="transformer_export_researchinitiatives"> |
|
33 |
<sub-workflow> |
|
34 |
<app-path>${wf:appPath()}/transformer_export_researchinitiatives</app-path> |
|
35 |
<configuration> |
|
36 |
<property> |
|
37 |
<name>jobTracker</name> |
|
38 |
<value>${jobTracker}</value> |
|
39 |
</property> |
|
40 |
<property> |
|
41 |
<name>nameNode</name> |
|
42 |
<value>${nameNode}</value> |
|
43 |
</property> |
|
44 |
<property> |
|
45 |
<name>queueName</name> |
|
46 |
<value>${queueName}</value> |
|
47 |
</property> |
|
48 |
<property> |
|
49 |
<name>workingDir</name> |
|
50 |
<value>${workingDir}/transformer_export_researchinitiatives/working_dir</value> |
|
51 |
</property> |
|
52 |
<property> |
|
53 |
<name>input_document_to_research_initiative</name> |
|
54 |
<value>${workingDir}/producer/output</value> |
|
55 |
</property> |
|
56 |
<property> |
|
57 |
<name>output_document_to_research_initiatives</name> |
|
58 |
<value>${workingDir}/transformer_export_researchinitiatives/output</value> |
|
59 |
</property> |
|
60 |
</configuration> |
|
61 |
</sub-workflow> |
|
62 |
<ok to="consumer"/> |
|
63 |
<error to="fail"/> |
|
64 |
</action> |
|
65 |
|
|
66 |
<action name="consumer"> |
|
67 |
<java> |
|
68 |
<job-tracker>${jobTracker}</job-tracker> |
|
69 |
<name-node>${nameNode}</name-node> |
|
70 |
<configuration> |
|
71 |
<property> |
|
72 |
<name>mapred.job.queue.name</name> |
|
73 |
<value>${queueName}</value> |
|
74 |
</property> |
|
75 |
</configuration> |
|
76 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
77 |
<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.TestingConsumer</arg> |
|
78 |
<arg>-C{output, |
|
79 |
eu.dnetlib.iis.export.schemas.DocumentToConceptIds, |
|
80 |
eu/dnetlib/iis/transformers/export/researchinitiatives/sampledataproducer/data/document_to_concept_ids.json}</arg> |
|
81 |
<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg> |
|
82 |
<arg>-Ioutput=${workingDir}/transformer_export_researchinitiatives/output</arg> |
|
83 |
</java> |
|
84 |
<ok to="end" /> |
|
85 |
<error to="fail" /> |
|
86 |
</action> |
|
87 |
|
|
88 |
<kill name="fail"> |
|
89 |
<message>Unfortunately, the workflow failed -- error message: |
|
90 |
[${wf:errorMessage(wf:lastErrorNode())}]</message> |
|
91 |
</kill> |
|
92 |
|
|
93 |
<end name="end"/> |
|
94 |
|
|
95 |
</workflow-app> |
|
0 | 96 |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/researchinitiatives/sampledataproducer/data/document_to_concept_id.json | ||
---|---|---|
1 |
{"documentId": "docId1", "conceptId": "concept1", "confidenceLevel": 0.9} |
|
2 |
{"documentId": "docId1", "conceptId": "concept1", "confidenceLevel": null} |
|
3 |
{"documentId": "docId2", "conceptId": "concept1", "confidenceLevel": 0.9} |
|
4 |
{"documentId": "docId2", "conceptId": "concept2", "confidenceLevel": 0.8} |
|
5 |
{"documentId": "docId3", "conceptId": "concept3", "confidenceLevel": 0.7} |
|
6 |
{"documentId": "docId3", "conceptId": "concept2", "confidenceLevel": null} |
|
7 |
{"documentId": "docId3", "conceptId": "concept1", "confidenceLevel": 0.7} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/researchinitiatives/sampledataproducer/data/document_to_concept_ids.json | ||
---|---|---|
1 |
{"documentId": "docId1", "conceptIds": ["concept1"]} |
|
2 |
{"documentId": "docId2", "conceptIds": ["concept1", "concept2"]} |
|
3 |
{"documentId": "docId3", "conceptIds": ["concept1", "concept2", "concept3"]} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/researchinitiatives/sampledataproducer/data/output_metadata.json | ||
---|---|---|
1 |
{"id": "id-2", "affiliations": []} |
|
2 |
{"id": "id-3", "affiliations": [{"organization": "some-org", "countryName": "pl", "countryCode": "PL", "address": null, "rawText": "some raw text"}]} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/researchinitiatives/sampledataproducer/data/extr_metadata.json | ||
---|---|---|
1 |
{"publisher": "Tor Science Fiction", "affiliations": null, "authors": [{"authorFullName": "author-1", "affiliationPositions": null}, {"authorFullName": "author-2", "affiliationPositions": null}, {"authorFullName": "author-3", "affiliationPositions": null}], "language": "eng", "title": "Enders Game", "externalIdentifiers": null, "journal": "Journal-2", "id": "id-1", "pages": {"start": "123", "end": "128"}, "volume": null, "references": null, "year": null, "keywords": null, "issue": null, "abstract": null} |
|
2 |
{"publisher": null, "affiliations": [], "authors": [{"authorFullName": "author-1", "affiliationPositions": null}, {"authorFullName": "author-2", "affiliationPositions": null}], "language": null, "title": null, "externalIdentifiers": {"id-1": "val-extr-1", "id-3": "val-extr-3"}, "journal": null, "id": "id-2", "pages": null, "volume": "124", "references": [{"position": 1, "basicMetadata": {"publisher": null, "title": "The Lord of the Rings", "url": null, "series": null, "authors": null, "volume": null, "edition": null, "source": null, "year": "2012", "issue": null, "pages": {"start": "1", "end": "236"}, "location": null}, "text": "J.R.R. Tolkien, The Lord of the Rings, 2012"}, {"position": 2, "basicMetadata": {"publisher": null, "title": "The Other Wind", "url": null, "series": null, "authors": null, "volume": "vol.23", "edition": null, "source": null, "year": "2003", "issue": null, "pages": null, "location": null}, "text": "Ursula K. Le Guin, The Other Wind, 2003"}], "year": 1970, "keywords": ["kwd_1", "kwd_3", "kwd_5"], "issue": null, "abstract": "The tales"} |
|
3 |
{"publisher": "Harp3r T0rch", "affiliations": [{"organization": "some-org", "countryName": "pl", "countryCode": "PL", "address": null, "rawText": "some raw text"}], "authors": null, "language": "en", "title": "Small Gods", "externalIdentifiers": null, "journal": "Journal", "id": "id-3", "pages": null, "volume": "32", "references": [{"position": 1, "basicMetadata": {"publisher": null, "title": "The Lord of the Rings", "url": null, "series": null, "authors": null, "volume": null, "edition": null, "source": null, "year": "2012", "issue": null, "pages": {"start": "1", "end": "236"}, "location": null}, "text": "J.R.R. Tolkien, The Lord of the Rings, 2012"}], "year": null, "keywords": null, "issue": "4", "abstract": null} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/sampledataproducer/oozie_app/import.txt | ||
---|---|---|
1 |
## This is a classpath-based import file (this header is required) |
|
2 |
transformer_export_documenttodataset_without_imported_data classpath eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/oozie_app |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/sampledataproducer/oozie_app/workflow.xml | ||
---|---|---|
1 |
<workflow-app xmlns="uri:oozie:workflow:0.2" name="test-transformers_export_documenttodataset_without_imported_data_sampledataproducer"> |
|
2 |
<start to="producer"/> |
|
3 |
<action name="producer"> |
|
4 |
<java> |
|
5 |
<job-tracker>${jobTracker}</job-tracker> |
|
6 |
<name-node>${nameNode}</name-node> |
|
7 |
<!-- The data generated by this node is deleted in this section --> |
|
8 |
<prepare> |
|
9 |
<delete path="${nameNode}${workingDir}/producer" /> |
|
10 |
<mkdir path="${nameNode}${workingDir}/producer" /> |
|
11 |
</prepare> |
|
12 |
<configuration> |
|
13 |
<property> |
|
14 |
<name>mapred.job.queue.name</name> |
|
15 |
<value>${queueName}</value> |
|
16 |
</property> |
|
17 |
</configuration> |
|
18 |
<!-- This is simple wrapper for the Java code --> |
|
19 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
20 |
<!-- The business Java code that gets to be executed --> |
|
21 |
<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg> |
|
22 |
<!-- Specification of the output ports --> |
|
23 |
<arg>-C{document_to_dataset, |
|
24 |
eu.dnetlib.iis.referenceextraction.dataset.schemas.DocumentToDataSet, |
|
25 |
eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/sampledataproducer/data/document_to_dataset.json}</arg> |
|
26 |
<arg>-C{document_relation, |
|
27 |
eu.dnetlib.iis.importer.schemas.DocumentRelation, |
|
28 |
eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/sampledataproducer/data/document_relation.json}</arg> |
|
29 |
|
|
30 |
<!-- All input and output ports have to be bound to paths in HDFS, working |
|
31 |
directory has to be specified as well --> |
|
32 |
<arg>-SworkingDir=${workingDir}/producer/working_dir</arg> |
|
33 |
<arg>-Odocument_to_dataset=${workingDir}/producer/document_to_dataset</arg> |
|
34 |
<arg>-Odocument_relation=${workingDir}/producer/document_relation</arg> |
|
35 |
</java> |
|
36 |
<ok to="transformer_export_docdataset_without_imported"/> |
|
37 |
<error to="fail"/> |
|
38 |
</action> |
|
39 |
<action name="transformer_export_docdataset_without_imported"> |
|
40 |
<sub-workflow> |
|
41 |
<app-path>${wf:appPath()}/transformer_export_documenttodataset_without_imported_data</app-path> |
|
42 |
<configuration> |
|
43 |
<property> |
|
44 |
<name>jobTracker</name> |
|
45 |
<value>${jobTracker}</value> |
|
46 |
</property> |
|
47 |
<property> |
|
48 |
<name>nameNode</name> |
|
49 |
<value>${nameNode}</value> |
|
50 |
</property> |
|
51 |
<property> |
|
52 |
<name>queueName</name> |
|
53 |
<value>${queueName}</value> |
|
54 |
</property> |
|
55 |
<!-- Working directory of the subworkflow --> |
|
56 |
<property> |
|
57 |
<name>workingDir</name> |
|
58 |
<value>${workingDir}/transformer_export_documenttodataset_without_imported_data/working_dir</value> |
|
59 |
</property> |
|
60 |
<!-- Input ports. --> |
|
61 |
<property> |
|
62 |
<name>input_document_to_dataset</name> |
|
63 |
<value>${workingDir}/producer/document_to_dataset</value> |
|
64 |
</property> |
|
65 |
<property> |
|
66 |
<name>input_document_relation</name> |
|
67 |
<value>${workingDir}/producer/document_relation</value> |
|
68 |
</property> |
|
69 |
<!-- Output port bound to given path --> |
|
70 |
<property> |
|
71 |
<name>output_document_to_dataset</name> |
|
72 |
<value>${workingDir}/transformer_export_documenttodataset_without_imported_data/document_to_dataset</value> |
|
73 |
</property> |
|
74 |
</configuration> |
|
75 |
</sub-workflow> |
|
76 |
<ok to="consumer"/> |
|
77 |
<error to="fail"/> |
|
78 |
</action> |
|
79 |
<action name="consumer"> |
|
80 |
<java> |
|
81 |
<job-tracker>${jobTracker}</job-tracker> |
|
82 |
<name-node>${nameNode}</name-node> |
|
83 |
<configuration> |
|
84 |
<property> |
|
85 |
<name>mapred.job.queue.name</name> |
|
86 |
<value>${queueName}</value> |
|
87 |
</property> |
|
88 |
</configuration> |
|
89 |
<!-- This is simple wrapper for the Java code --> |
|
90 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
91 |
<!-- The business Java code that gets to be executed --> |
|
92 |
<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.TestingConsumer</arg> |
|
93 |
<!-- Specification of the input ports --> |
|
94 |
<arg>-C{document_to_dataset, |
|
95 |
eu.dnetlib.iis.referenceextraction.dataset.schemas.DocumentToDataSet, |
|
96 |
eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/sampledataproducer/data/output_document_to_dataset.json}</arg> |
|
97 |
<!-- All input and output ports have to be bound to paths in HDFS, working |
|
98 |
directory has to be specified as well --> |
|
99 |
<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg> |
|
100 |
<arg>-Idocument_to_dataset=${workingDir}/transformer_export_documenttodataset_without_imported_data/document_to_dataset</arg> |
|
101 |
</java> |
|
102 |
<ok to="end" /> |
|
103 |
<error to="fail" /> |
|
104 |
</action> |
|
105 |
<kill name="fail"> |
|
106 |
<message>Unfortunately, the workflow failed -- error message: |
|
107 |
[${wf:errorMessage(wf:lastErrorNode())}]</message> |
|
108 |
</kill> |
|
109 |
<end name="end"/> |
|
110 |
</workflow-app> |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/sampledataproducer/data/document_relation.json | ||
---|---|---|
1 |
{"id": "id-1", "referencedIds": []} |
|
2 |
{"id": "id-3", "referencedIds": ["refid-4", "refid-2"]} |
|
3 |
{"id": "id-2", "referencedIds": ["refid-1", "refid-4", "refid-2", "refid-48", "refid-32"]} |
|
4 |
{"id": "id-4", "referencedIds": []} |
|
5 |
{"id": "id-5", "referencedIds": ["refid-4"]} |
|
6 |
{"id": "id-6", "referencedIds": ["refid-4", "refid-48", "refid-32"]} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/sampledataproducer/data/document_to_dataset.json | ||
---|---|---|
1 |
{"documentId": "id-1", "datasetId": "8095", "confidenceLevel": null} |
|
2 |
{"documentId": "id-2", "datasetId": "refid-48", "confidenceLevel": 1} |
|
3 |
{"documentId": "id-2", "datasetId": "refid-14", "confidenceLevel": null} |
|
4 |
{"documentId": "id-8", "datasetId": "0820", "confidenceLevel": null} |
|
5 |
{"documentId": "id-1", "datasetId": "refid-32", "confidenceLevel": 1.1} |
|
6 |
{"documentId": "id-2", "datasetId": "refid-32", "confidenceLevel": null} |
|
7 |
{"documentId": "id-5", "datasetId": "refid-4", "confidenceLevel": 2} |
|
8 |
{"documentId": "id-8", "datasetId": "5103", "confidenceLevel": 2} |
|
9 |
{"documentId": "id-1", "datasetId": "5103", "confidenceLevel": null} |
|
10 |
{"documentId": "id-2", "datasetId": "refid-148", "confidenceLevel": 0.9} |
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/sampledataproducer/data/output_document_to_dataset.json | ||
---|---|---|
1 |
{"documentId": "id-1", "datasetId": "8095", "confidenceLevel": null} |
|
2 |
{"documentId": "id-2", "datasetId": "refid-14", "confidenceLevel": null} |
|
3 |
{"documentId": "id-8", "datasetId": "0820", "confidenceLevel": null} |
|
4 |
{"documentId": "id-1", "datasetId": "refid-32", "confidenceLevel": 1.1} |
Also available in: Unified diff
creating IIS-CDH-5.3.0 branch