Project

General

Profile

« Previous | Next » 

Revision 35252

Added by Marek Horst about 9 years ago

creating IIS-CDH-5.3.0 branch

View differences:

modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/deploy.info
1
[
2
{
3
  "type_source": "SVN", 
4
  "goal": "package -U -T 4C source:jar", 
5
  "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet40/modules/icm-iis-transformers/trunk/", 
6
  "deploy_repository": "dnet4-snapshots", 
7
  "version": "4",
8
  "mail": "m.horst@icm.edu.pl,d.tkaczyk@icm.edu.pl",
9
  "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet4-snapshots", 
10
  "name": "icm-iis-transformers"
11
},
12
{
13
  "type_source": "SVN",
14
  "goal": "clean verify -U -e -X",
15
  "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet40/modules/icm-iis-transformers/trunk/",
16
  "nightly" : "true",
17
  "cron" : "H H * * *",
18
  "version": "4",
19
  "mail": "m.horst@icm.edu.pl",
20
  "name": "icm-iis-transformers-embedded-integration-test"
21
}
22
]
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/referenceextraction/WorkflowTest.java
1
package eu.dnetlib.iis.transformers.referenceextraction;
2

  
3
import eu.dnetlib.iis.IntegrationTest;
4
import eu.dnetlib.iis.core.AbstractWorkflowTestCase;
5
import eu.dnetlib.iis.core.WorkflowConfiguration;
6
import org.junit.Test;
7
import org.junit.experimental.categories.Category;
8

  
9
/**
10
 * 
11
 * @author mhorst
12
 *
13
 */
14
@Category(IntegrationTest.class)
15
public class WorkflowTest extends AbstractWorkflowTestCase {
16

  
17
    @Test
18
	public void testWorkflow() throws Exception {
19
        WorkflowConfiguration wf = new WorkflowConfiguration();
20
        wf.setTimeoutInSeconds(720);
21
        runWorkflow("eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/oozie_app", wf);
22
    }
23

  
24
}
0 25

  
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/export/documentmetadata/WorkflowTest.java
1
package eu.dnetlib.iis.transformers.export.documentmetadata;
2

  
3
import eu.dnetlib.iis.IntegrationTest;
4
import eu.dnetlib.iis.core.AbstractWorkflowTestCase;
5
import eu.dnetlib.iis.core.WorkflowConfiguration;
6
import org.junit.Test;
7
import org.junit.experimental.categories.Category;
8

  
9
/**
10
 * 
11
 * @author mhorst
12
 *
13
 */
14
@Category(IntegrationTest.class)
15
public class WorkflowTest extends AbstractWorkflowTestCase {
16

  
17
    @Test
18
	public void testWorkflow() throws Exception {
19
        WorkflowConfiguration wf = new WorkflowConfiguration();
20
        wf.setTimeoutInSeconds(720);
21
        runWorkflow("eu/dnetlib/iis/transformers/export/documentmetadata/sampledataproducer/oozie_app", wf);
22
    }
23

  
24
}
0 25

  
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/WorkflowTest.java
1
package eu.dnetlib.iis.transformers.export.documenttodataset_without_imported_data;
2

  
3
import eu.dnetlib.iis.IntegrationTest;
4
import eu.dnetlib.iis.core.AbstractWorkflowTestCase;
5
import org.junit.Test;
6
import org.junit.experimental.categories.Category;
7

  
8
/**
9
 * 
10
 * @author Dominika Tkaczyk
11
 *
12
 */
13
@Category(IntegrationTest.class)
14
public class WorkflowTest extends AbstractWorkflowTestCase {
15

  
16
    @Test
17
	public void testWorkflow() throws Exception {
18
        runWorkflow("eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/sampledataproducer/oozie_app");
19
    }
20

  
21
}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/export/documenttoproject_without_imported_data/WorkflowTest.java
1
package eu.dnetlib.iis.transformers.export.documenttoproject_without_imported_data;
2

  
3
import eu.dnetlib.iis.IntegrationTest;
4
import eu.dnetlib.iis.core.AbstractWorkflowTestCase;
5
import org.junit.Test;
6
import org.junit.experimental.categories.Category;
7

  
8
/**
9
 * 
10
 * @author Dominika Tkaczyk
11
 *
12
 */
13
@Category(IntegrationTest.class)
14
public class WorkflowTest extends AbstractWorkflowTestCase {
15

  
16
    @Test
17
	public void testWorkflow() throws Exception {
18
        runWorkflow("eu/dnetlib/iis/transformers/export/documenttoproject_without_imported_data/sampledataproducer/oozie_app");
19
    }
20

  
21
}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/importer/documentmetadata/idextractor/WorkflowTest.java
1
package eu.dnetlib.iis.transformers.importer.documentmetadata.idextractor;
2

  
3
import eu.dnetlib.iis.IntegrationTest;
4
import eu.dnetlib.iis.core.AbstractWorkflowTestCase;
5
import org.junit.Test;
6
import org.junit.experimental.categories.Category;
7

  
8
/**
9
 * 
10
 * @author mhorst
11
 *
12
 */
13
@Category(IntegrationTest.class)
14
public class WorkflowTest extends AbstractWorkflowTestCase {
15

  
16
    @Test
17
    public void testIdExtraction() throws Exception {
18
        runWorkflow("eu/dnetlib/iis/transformers/importer/documentmetadata/idextractor/sampledataproducer/oozie_app");
19
    }
20

  
21
}
0 22

  
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/metadataextraction/WorkflowTest.java
1
package eu.dnetlib.iis.transformers.metadataextraction;
2

  
3
import eu.dnetlib.iis.IntegrationTest;
4
import eu.dnetlib.iis.core.AbstractWorkflowTestCase;
5
import org.junit.Test;
6
import org.junit.experimental.categories.Category;
7

  
8
/**
9
 * 
10
 * @author mhorst
11
 *
12
 */
13
@Category(IntegrationTest.class)
14
public class WorkflowTest extends AbstractWorkflowTestCase {
15

  
16
    @Test
17
    public void testChecksumPreprocessing() throws Exception {
18
        runWorkflow("eu/dnetlib/iis/transformers/metadataextraction/checksum/preprocessing/sampledataproducer/oozie_app");
19
    }
20
    
21
    @Test
22
    public void testChecksumPostprocessingText() throws Exception {
23
        runWorkflow("eu/dnetlib/iis/transformers/metadataextraction/checksum/postprocessing/text/sampledataproducer/oozie_app");
24
    }
25
    
26
    @Test
27
    public void testChecksumPostprocessingMeta() throws Exception {
28
        runWorkflow("eu/dnetlib/iis/transformers/metadataextraction/checksum/postprocessing/meta/sampledataproducer/oozie_app");
29
    }
30
}
0 31

  
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/udfs/IntegerFirstNotEmptyTest.java
1
package eu.dnetlib.iis.transformers.udfs;
2

  
3
import com.google.common.collect.Lists;
4
import java.io.IOException;
5
import junit.framework.TestCase;
6
import org.apache.pig.data.TupleFactory;
7
import org.junit.Test;
8

  
9
/**
10
 *
11
 * @author Dominika Tkaczyk
12
 */
13
public class IntegerFirstNotEmptyTest extends TestCase {
14
    
15
    @Test
16
	public void testUDF() throws IOException {
17
        IntegerFirstNotEmpty udf = new IntegerFirstNotEmpty();
18
        TupleFactory factory = TupleFactory.getInstance();
19
        
20
        assertNull(udf.exec(null));
21
        assertNull(udf.exec(factory.newTuple()));
22
        assertNull(udf.exec(factory.newTuple((Integer) null)));
23
        assertEquals(125, (int)udf.exec(factory.newTuple(Integer.valueOf(125))));
24
        assertNull(udf.exec(factory.newTuple(Lists.newArrayList())));
25
        assertNull(udf.exec(factory.newTuple(Lists.newArrayList(null, null))));
26
        assertEquals(23, (int)udf.exec(factory.newTuple(Lists.newArrayList(23, null, 256, 90))));
27
        assertEquals(256, (int)udf.exec(factory.newTuple(Lists.newArrayList(null, null, null, 256, 567))));
28
    }
29
    
30
}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/udfs/StringBagsMergerTest.java
1
package eu.dnetlib.iis.transformers.udfs;
2

  
3
import com.google.common.collect.Lists;
4
import java.io.IOException;
5
import junit.framework.TestCase;
6
import org.apache.pig.data.BagFactory;
7
import org.apache.pig.data.DataBag;
8
import org.apache.pig.data.TupleFactory;
9
import org.junit.Test;
10

  
11
/**
12
 *
13
 * @author Dominika Tkaczyk
14
 */
15
public class StringBagsMergerTest extends TestCase {
16
    
17
    @Test
18
	public void testUDF() throws IOException {
19
        StringBagsMerger udf = new StringBagsMerger();
20
        TupleFactory tupleFactory = TupleFactory.getInstance();
21
        BagFactory bagFactory = BagFactory.getInstance();
22
        DataBag emptyBag = bagFactory.newDefaultBag();
23
        DataBag bag1 = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup1"), 
24
                                                                   tupleFactory.newTuple("tup2")));
25
        DataBag bag2 = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup3"), 
26
                                                                   tupleFactory.newTuple("tup4")));
27
        DataBag bag3 = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup1"), 
28
                                                                   tupleFactory.newTuple("tup4"),
29
                                                                   tupleFactory.newTuple("tup5")));
30
        DataBag bag4 = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup1"), 
31
                                                                   tupleFactory.newTuple("tup2"),
32
                                                                   tupleFactory.newTuple("tup3"),
33
                                                                   tupleFactory.newTuple("tup4"),
34
                                                                   tupleFactory.newTuple("tup5")));
35
        
36
        assertNull(udf.exec(null));
37
        assertNull(udf.exec(tupleFactory.newTuple()));
38
        assertNull(udf.exec(tupleFactory.newTuple((DataBag)null)));
39
        assertNull(udf.exec(tupleFactory.newTuple(emptyBag)));
40
        assertEquals(bag1, udf.exec(tupleFactory.newTuple(bag1)));
41
        assertEquals(bag4, udf.exec(tupleFactory.newTuple(Lists.newArrayList(bag1, emptyBag, bag2, bag3))));
42
    }
43
    
44
}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/udfs/EmptyBagToNullTest.java
1
package eu.dnetlib.iis.transformers.udfs;
2

  
3
import com.google.common.collect.Lists;
4
import java.io.IOException;
5
import junit.framework.TestCase;
6
import org.apache.pig.data.BagFactory;
7
import org.apache.pig.data.DataBag;
8
import org.apache.pig.data.TupleFactory;
9
import org.junit.Test;
10

  
11
/**
12
 *
13
 * @author Dominika Tkaczyk
14
 */
15
public class EmptyBagToNullTest extends TestCase {
16
    
17
    @Test
18
	public void testUDF() throws IOException {
19
        EmptyBagToNull udf = new EmptyBagToNull();
20
        TupleFactory tupleFactory = TupleFactory.getInstance();
21
        BagFactory bagFactory = BagFactory.getInstance();
22
        
23
        DataBag emptyBag = bagFactory.newDefaultBag();
24
        DataBag bag = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup1"), tupleFactory.newTuple()));
25
        
26
        assertNull(udf.exec(null));
27
        assertNull(udf.exec(tupleFactory.newTuple()));
28
        assertNull(udf.exec(tupleFactory.newTuple((DataBag)null)));
29
        assertNull(udf.exec(tupleFactory.newTuple(emptyBag)));
30
        assertEquals(bag, udf.exec(tupleFactory.newTuple(bag)));
31
        assertNull(udf.exec(tupleFactory.newTuple(Lists.newArrayList(bag, emptyBag))));
32
    }
33
    
34
}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/udfs/NullToEmptyBagTest.java
1
package eu.dnetlib.iis.transformers.udfs;
2

  
3
import com.google.common.collect.Lists;
4
import java.io.IOException;
5
import junit.framework.TestCase;
6
import org.apache.pig.data.BagFactory;
7
import org.apache.pig.data.DataBag;
8
import org.apache.pig.data.TupleFactory;
9
import org.junit.Test;
10

  
11
/**
12
 *
13
 * @author Dominika Tkaczyk
14
 */
15
public class NullToEmptyBagTest extends TestCase {
16
    
17
    @Test
18
	public void testUDF() throws IOException {
19
        NullToEmptyBag udf = new NullToEmptyBag();
20
        TupleFactory tupleFactory = TupleFactory.getInstance();
21
        BagFactory bagFactory = BagFactory.getInstance();
22
        DataBag emptyBag = bagFactory.newDefaultBag();
23
        DataBag bag = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup1"), tupleFactory.newTuple()));
24
        
25
        assertNull(udf.exec(null));
26
        assertNull(udf.exec(tupleFactory.newTuple()));
27
        assertEquals(emptyBag, udf.exec(tupleFactory.newTuple((DataBag)null)));
28
        assertEquals(emptyBag, udf.exec(tupleFactory.newTuple(emptyBag)));
29
        assertEquals(bag, udf.exec(tupleFactory.newTuple(bag)));
30
        assertNull(udf.exec(tupleFactory.newTuple(Lists.newArrayList(bag, emptyBag))));
31
    }
32
    
33
}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/udfs/NullTupleFieldsToNullTest.java
1
package eu.dnetlib.iis.transformers.udfs;
2

  
3
import com.google.common.collect.Lists;
4
import java.io.IOException;
5
import junit.framework.TestCase;
6
import org.apache.pig.data.BagFactory;
7
import org.apache.pig.data.DataBag;
8
import org.apache.pig.data.Tuple;
9
import org.apache.pig.data.TupleFactory;
10
import org.junit.Test;
11

  
12
/**
13
 *
14
 * @author Dominika Tkaczyk
15
 */
16
public class NullTupleFieldsToNullTest extends TestCase {
17
    
18
    @Test
19
	public void testUDF() throws IOException {
20
        NullTupleFieldsToNull udf = new NullTupleFieldsToNull();
21
        TupleFactory tupleFactory = TupleFactory.getInstance();
22
        BagFactory bagFactory = BagFactory.getInstance();
23
        
24
        DataBag emptyBag = bagFactory.newDefaultBag();
25
        DataBag bag = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup1"), null));
26
        Tuple nullTuple = tupleFactory.newTuple(Lists.newArrayList(null, null, null));
27
        Tuple tuple = tupleFactory.newTuple(Lists.newArrayList(null, null, "tup1"));
28
        
29
        assertNull(udf.exec(null));
30
        assertNull(udf.exec(tupleFactory.newTuple()));
31
        assertNull(udf.exec(tupleFactory.newTuple((Tuple)null)));
32
        assertNull(udf.exec(tupleFactory.newTuple(tupleFactory.newTuple())));
33
        assertNull(udf.exec(tupleFactory.newTuple(nullTuple)));
34
        assertEquals(tuple, udf.exec(tupleFactory.newTuple(tuple)));
35
        assertNull(udf.exec(tupleFactory.newTuple(Lists.newArrayList(bag, emptyBag))));
36
    }
37
    
38
}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/udfs/StringMapsMergerTest.java
1
package eu.dnetlib.iis.transformers.udfs;
2

  
3
import com.google.common.collect.Lists;
4

  
5
import java.io.IOException;
6
import java.util.HashMap;
7
import java.util.Map;
8

  
9
import junit.framework.TestCase;
10

  
11
import org.apache.pig.data.TupleFactory;
12
import org.junit.Test;
13

  
14
/**
15
 *
16
 * @author Dominika Tkaczyk
17
 */
18
public class StringMapsMergerTest extends TestCase {
19
    
20
    @SuppressWarnings({ "unchecked", "rawtypes" })
21
	@Test
22
	public void testUDF() throws IOException {
23
        StringMapsMerger udf = new StringMapsMerger();
24
        TupleFactory tupleFactory = TupleFactory.getInstance();
25
        Map<String, String> emptyMap = new HashMap<String, String>();
26
        Map<String, String> map1 = new HashMap<String, String>();
27
        map1.put("12", "12");
28
        map1.put("3", "bag");
29
        map1.put("1", "");
30
        Map<String, String> map2 = new HashMap<String, String>();
31
        map2.put("12", "null");
32
        map2.put("3", "data");
33
        map2.put("35", "empty");
34
        Map<String, String> map3 = new HashMap<String, String>();
35
        map3.put("1", "notempty");
36
        Map<String, String> map4 = new HashMap<String, String>();
37
        map4.put("1", "");
38
        map4.put("3", "bag");
39
        map4.put("12", "12");
40
        map4.put("35", "empty");
41
        
42
        assertNull(udf.exec(null));
43
        assertNull(udf.exec(tupleFactory.newTuple()));
44
        assertNull(udf.exec(tupleFactory.newTuple((Map)null)));
45
        assertNull(udf.exec(tupleFactory.newTuple(emptyMap)));
46
        assertEquals(map1, udf.exec(tupleFactory.newTuple(map1)));
47
        assertEquals(map4, udf.exec(tupleFactory.newTuple(Lists.newArrayList(map1, emptyMap, map2, null, map3))));
48
    }
49
    
50
}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/udfs/StringBagsDifferenceTest.java
1
package eu.dnetlib.iis.transformers.udfs;
2

  
3
import com.google.common.collect.Lists;
4
import java.io.IOException;
5
import junit.framework.TestCase;
6
import org.apache.pig.data.BagFactory;
7
import org.apache.pig.data.DataBag;
8
import org.apache.pig.data.TupleFactory;
9
import org.junit.Test;
10

  
11
/**
12
 *
13
 * @author Dominika Tkaczyk
14
 */
15
public class StringBagsDifferenceTest extends TestCase {
16
    
17
    @Test
18
	public void testUDF() throws IOException {
19
        StringBagsDifference udf = new StringBagsDifference();
20
        TupleFactory tupleFactory = TupleFactory.getInstance();
21
        BagFactory bagFactory = BagFactory.getInstance();
22
        DataBag nullBag = null;
23
        DataBag emptyBag = bagFactory.newDefaultBag();
24
        DataBag bag1 = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup1"), 
25
                                                                   tupleFactory.newTuple("tup2")));
26
        DataBag bag2 = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup3"), 
27
                                                                   tupleFactory.newTuple("tup4")));
28
        DataBag bag3 = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup1"), 
29
                                                                   tupleFactory.newTuple("tup4"),
30
                                                                   tupleFactory.newTuple("tup5")));
31
        DataBag bag4 = bagFactory.newDefaultBag(Lists.newArrayList(tupleFactory.newTuple("tup1"), 
32
                                                                   tupleFactory.newTuple("tup5")));
33
        
34
        assertNull(udf.exec(null));
35
        assertNull(udf.exec(tupleFactory.newTuple()));
36
        assertNull(udf.exec(tupleFactory.newTuple(nullBag)));
37
        assertNull(udf.exec(tupleFactory.newTuple(emptyBag)));
38
        assertNull(udf.exec(tupleFactory.newTuple(Lists.newArrayList(bag1, emptyBag, bag2, bag3))));
39
        assertNull(udf.exec(tupleFactory.newTuple(Lists.newArrayList(nullBag, bag3, bag2))));
40
        assertEquals(bag1, udf.exec(tupleFactory.newTuple(Lists.newArrayList(bag1, bag2))));
41
        assertEquals(bag4, udf.exec(tupleFactory.newTuple(Lists.newArrayList(bag3, bag2))));
42
        assertNull(udf.exec(tupleFactory.newTuple(Lists.newArrayList(bag4, bag3))));
43
    }
44
    
45
}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/udfs/StringFirstNotEmptyTest.java
1
package eu.dnetlib.iis.transformers.udfs;
2

  
3
import com.google.common.collect.Lists;
4
import java.io.IOException;
5
import junit.framework.TestCase;
6
import org.apache.pig.data.TupleFactory;
7
import org.junit.Test;
8

  
9
/**
10
 *
11
 * @author Dominika Tkaczyk
12
 */
13
public class StringFirstNotEmptyTest extends TestCase {
14
    
15
    @Test
16
	public void testUDF() throws IOException {
17
        StringFirstNotEmpty udf = new StringFirstNotEmpty();
18
        TupleFactory factory = TupleFactory.getInstance();
19
        
20
        assertNull(udf.exec(null));
21
        assertNull(udf.exec(factory.newTuple()));
22
        assertNull(udf.exec(factory.newTuple((String) null)));
23
        assertEquals("tup", udf.exec(factory.newTuple("tup")));
24
        assertNull(udf.exec(factory.newTuple(Lists.newArrayList())));
25
        assertNull(udf.exec(factory.newTuple(Lists.newArrayList(null, null))));
26
        assertEquals("val1", udf.exec(factory.newTuple(Lists.newArrayList("val1", null, "256", "90"))));
27
        assertEquals("k256", udf.exec(factory.newTuple(Lists.newArrayList(null, null, null, "k256", "567"))));
28
        assertEquals("k256", udf.exec(factory.newTuple(Lists.newArrayList(null, "", null, "k256", "567"))));
29
    }
30
    
31
}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/metadatamerger/WorkflowTest.java
1
package eu.dnetlib.iis.transformers.metadatamerger;
2

  
3
import eu.dnetlib.iis.IntegrationTest;
4
import eu.dnetlib.iis.core.AbstractWorkflowTestCase;
5
import org.junit.Test;
6
import org.junit.experimental.categories.Category;
7

  
8
/**
9
 * 
10
 * @author Dominika Tkaczyk
11
 *
12
 */
13
@Category(IntegrationTest.class)
14
public class WorkflowTest extends AbstractWorkflowTestCase {
15

  
16
    @Test
17
	public void testJoin() throws Exception {
18
        runWorkflow("eu/dnetlib/iis/transformers/metadatamerger/sampledataproducer/oozie_app");
19
    }
20

  
21
}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/citationmatching/WorkflowTest.java
1
package eu.dnetlib.iis.transformers.citationmatching;
2

  
3
import eu.dnetlib.iis.IntegrationTest;
4
import eu.dnetlib.iis.core.AbstractWorkflowTestCase;
5
import org.junit.Test;
6
import org.junit.experimental.categories.Category;
7

  
8
/**
9
 * 
10
 * @author Dominika Tkaczyk
11
 *
12
 */
13
@Category(IntegrationTest.class)
14
public class WorkflowTest extends AbstractWorkflowTestCase {
15

  
16
    @Test
17
	public void testJoin() throws Exception {
18
    	runWorkflow("eu/dnetlib/iis/transformers/citationmatching/sampledataproducer/oozie_app");
19
    }
20

  
21
}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/common/existencefilter/sampledataproducer/WorkflowTest.java
1
package eu.dnetlib.iis.transformers.common.existencefilter.sampledataproducer;
2

  
3
import eu.dnetlib.iis.IntegrationTest;
4
import eu.dnetlib.iis.core.AbstractWorkflowTestCase;
5
import eu.dnetlib.iis.core.WorkflowConfiguration;
6
import org.junit.Test;
7
import org.junit.experimental.categories.Category;
8

  
9
/**
10
 * 
11
 * @author Mateusz Fedoryszak
12
 *
13
 */
14
@Category(IntegrationTest.class)
15
public class WorkflowTest extends AbstractWorkflowTestCase {
16

  
17
    @Test
18
	public void testWorkflow() throws Exception {
19
        WorkflowConfiguration wf = new WorkflowConfiguration();
20
        wf.setTimeoutInSeconds(720);
21
        runWorkflow("eu/dnetlib/iis/transformers/common/existencefilter/sampledataproducer/oozie_app", wf);
22
    }
23

  
24
}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/documentssimilarity/WorkflowTest.java
1
package eu.dnetlib.iis.transformers.documentssimilarity;
2

  
3
import eu.dnetlib.iis.IntegrationTest;
4
import eu.dnetlib.iis.core.AbstractWorkflowTestCase;
5
import eu.dnetlib.iis.core.WorkflowConfiguration;
6
import org.junit.Test;
7
import org.junit.experimental.categories.Category;
8

  
9
/**
10
 * 
11
 * @author Michal Oniszczuk (m.oniszczuk@icm.edu.pl)
12
 *
13
 */
14
@Category(IntegrationTest.class)
15
public class WorkflowTest extends AbstractWorkflowTestCase {
16

  
17
    @Test
18
	public void testWorkflow() throws Exception {
19
        WorkflowConfiguration wf = new WorkflowConfiguration();
20
        wf.setTimeoutInSeconds(720);
21
        runWorkflow("eu/dnetlib/iis/transformers/documentssimilarity/sampledataproducer/oozie_app", wf);
22
    }
23

  
24
}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/ingest/pmc/metadata/WorkflowTest.java
1
package eu.dnetlib.iis.transformers.ingest.pmc.metadata;
2

  
3
import eu.dnetlib.iis.IntegrationTest;
4
import eu.dnetlib.iis.core.AbstractWorkflowTestCase;
5
import eu.dnetlib.iis.core.WorkflowConfiguration;
6
import org.junit.Test;
7
import org.junit.experimental.categories.Category;
8

  
9
/**
10
 * 
11
 * @author mhorst
12
 *
13
 */
14
@Category(IntegrationTest.class)
15
public class WorkflowTest extends AbstractWorkflowTestCase {
16

  
17
    @Test
18
	public void testWorkflow() throws Exception {
19
        WorkflowConfiguration wf = new WorkflowConfiguration();
20
        wf.setTimeoutInSeconds(720);
21
        runWorkflow("eu/dnetlib/iis/transformers/ingest/pmc/metadata/sampledataproducer/oozie_app", wf);
22
    }
23

  
24
}
0 25

  
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/documentssimilarity_with_fulltext/WorkflowTest.java
1
package eu.dnetlib.iis.transformers.documentssimilarity_with_fulltext;
2

  
3
import eu.dnetlib.iis.IntegrationTest;
4
import eu.dnetlib.iis.core.AbstractWorkflowTestCase;
5
import eu.dnetlib.iis.core.WorkflowConfiguration;
6
import org.junit.Test;
7
import org.junit.experimental.categories.Category;
8

  
9
/**
10
 * 
11
 * @author Michal Oniszczuk (m.oniszczuk@icm.edu.pl)
12
 *
13
 */
14
@Category(IntegrationTest.class)
15
public class WorkflowTest extends AbstractWorkflowTestCase {
16

  
17
    @Test
18
	public void testWorkflow() throws Exception {
19
        WorkflowConfiguration wf = new WorkflowConfiguration();
20
        wf.setTimeoutInSeconds(720);
21
        runWorkflow("eu/dnetlib/iis/transformers/documentssimilarity_with_fulltext/sampledataproducer/oozie_app", wf);
22
    }
23

  
24
}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/idreplacer/WorkflowTest.java
1
package eu.dnetlib.iis.transformers.idreplacer;
2

  
3
import eu.dnetlib.iis.IntegrationTest;
4
import eu.dnetlib.iis.core.AbstractWorkflowTestCase;
5
import org.junit.Test;
6
import org.junit.experimental.categories.Category;
7

  
8
/**
9
 * 
10
 * @author Dominika Tkaczyk
11
 * @author Michal Oniszczuk
12
 *
13
 */
14
@Category(IntegrationTest.class)
15
public class WorkflowTest extends AbstractWorkflowTestCase {
16

  
17
    @Test
18
    public void testReplacer1Field() throws Exception {
19
        runWorkflow("eu/dnetlib/iis/transformers/idreplacer/replacer_1_field/oozie_app");
20
    }
21

  
22
    @Test
23
	public void testReplacer2Fields() throws Exception {
24
    	runWorkflow("eu/dnetlib/iis/transformers/idreplacer/replacer_2_fields/oozie_app");
25
    }
26

  
27
}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/metricsprimary/WorkflowTest.java
1
package eu.dnetlib.iis.transformers.metricsprimary;
2

  
3
import eu.dnetlib.iis.IntegrationTest;
4
import eu.dnetlib.iis.core.AbstractWorkflowTestCase;
5
import org.junit.Test;
6
import org.junit.experimental.categories.Category;
7

  
8
/**
9
 * 
10
 * @author Dominika Tkaczyk
11
 *
12
 */
13
@Category(IntegrationTest.class)
14
public class WorkflowTest extends AbstractWorkflowTestCase {
15

  
16
    @Test
17
	public void testWorkflow() throws Exception {
18
        runWorkflow("eu/dnetlib/iis/transformers/metricsprimary/sampledataproducer/oozie_app");
19
    }
20

  
21
}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/statistics/WorkflowTest.java
1
package eu.dnetlib.iis.transformers.statistics;
2

  
3
import eu.dnetlib.iis.IntegrationTest;
4
import eu.dnetlib.iis.core.AbstractWorkflowTestCase;
5
import org.junit.Test;
6
import org.junit.experimental.categories.Category;
7

  
8
/**
9
 * 
10
 * @author Dominika Tkaczyk
11
 *
12
 */
13
@Category(IntegrationTest.class)
14
public class WorkflowTest extends AbstractWorkflowTestCase {
15

  
16
    @Test
17
	public void testWorkflow() throws Exception {
18
        runWorkflow("eu/dnetlib/iis/transformers/statistics/sampledataproducer/oozie_app");
19
    }
20

  
21
}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/transformers/documentsclassification/WorkflowTest.java
1
package eu.dnetlib.iis.transformers.documentsclassification;
2

  
3
import eu.dnetlib.iis.IntegrationTest;
4
import eu.dnetlib.iis.core.AbstractWorkflowTestCase;
5
import org.junit.Test;
6
import org.junit.experimental.categories.Category;
7

  
8
/**
9
 * 
10
 * @author Dominika Tkaczyk
11
 *
12
 */
13
@Category(IntegrationTest.class)
14
public class WorkflowTest extends AbstractWorkflowTestCase {
15

  
16
    @Test
17
	public void testWorkflow() throws Exception {
18
        runWorkflow("eu/dnetlib/iis/transformers/documentsclassification/sampledataproducer/oozie_app");
19
    }
20

  
21
}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/oozie_app/import.txt
1
## This is a classpath-based import file (this header is required)
2
transformer_project_toconcept classpath eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/oozie_app
0 3

  
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/oozie_app/workflow.xml
1
<workflow-app xmlns="uri:oozie:workflow:0.4" name="test-transformers_referenceextraction_project_toconcept">
2
    
3
    <start to="producer"/>
4
    <action name="producer">
5
        <java>
6
            <job-tracker>${jobTracker}</job-tracker>
7
            <name-node>${nameNode}</name-node>
8
			<!-- The data generated by this node is deleted in this section -->
9
			<prepare>
10
				<delete path="${nameNode}${workingDir}/producer" />
11
				<mkdir path="${nameNode}${workingDir}/producer" />
12
			</prepare>
13
            <configuration>
14
                <property>
15
                    <name>mapred.job.queue.name</name>
16
                    <value>${queueName}</value>
17
                </property>
18
            </configuration>
19
            <!-- This is simple wrapper for the Java code -->
20
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
21
			<!-- The business Java code that gets to be executed -->
22
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg>
23
			<!-- Specification of the output ports -->
24
            <arg>-C{document_to_project,
25
				eu.dnetlib.iis.referenceextraction.project.schemas.DocumentToProject,
26
				eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/data/document_to_project.json}</arg>
27
			<arg>-C{project,
28
				eu.dnetlib.iis.importer.schemas.Project,
29
				eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/data/project.json}</arg>
30
			<arg>-C{concept,
31
				eu.dnetlib.iis.importer.schemas.Concept,
32
				eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/data/concept.json}</arg>
33
			<!-- All input and output ports have to be bound to paths in HDFS, working 
34
				directory has to be specified as well -->
35
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
36
            <arg>-Odocument_to_project=${workingDir}/producer/document_to_project</arg>
37
            <arg>-Oproject=${workingDir}/producer/project</arg>
38
            <arg>-Oconcept=${workingDir}/producer/concept</arg>
39
        </java>
40
        <ok to="transformer_project_toconcept"/>
41
        <error to="fail"/>
42
    </action>
43
    <action name="transformer_project_toconcept">
44
        <sub-workflow>
45
            <app-path>${wf:appPath()}/transformer_project_toconcept</app-path>
46
            <configuration>
47
                <property>
48
                    <name>jobTracker</name>
49
                    <value>${jobTracker}</value>
50
                </property>
51
                <property>
52
                    <name>nameNode</name>
53
                    <value>${nameNode}</value>
54
                </property>
55
                <property>
56
                    <name>queueName</name>
57
                    <value>${queueName}</value>
58
                </property>
59
                <!-- Working directory of the subworkflow -->
60
                <property>
61
                    <name>workingDir</name>
62
                    <value>${workingDir}/transformer_project_toconcept/working_dir</value>
63
                </property>
64
                <property>
65
                    <name>grant_id_param_name</name>
66
                    <value>CD_PROJECT_NUMBER</value>
67
                </property>
68
                <!-- Input ports. -->
69
                <property>
70
                    <name>input_document_to_project</name>
71
                    <value>${workingDir}/producer/document_to_project</value>
72
                </property>
73
                <property>
74
                    <name>input_project</name>
75
                    <value>${workingDir}/producer/project</value>
76
                </property>
77
                <property>
78
                    <name>input_concept</name>
79
                    <value>${workingDir}/producer/concept</value>
80
                </property>
81
                <!-- Output port bound to given path -->
82
                <property>
83
                    <name>output</name>
84
                    <value>${workingDir}/transformer_project_toconcept/output</value>
85
                </property>
86
            </configuration>
87
        </sub-workflow>
88
        <ok to="consumer"/>
89
        <error to="fail"/>
90
    </action>
91
    <action name="consumer">
92
		<java>
93
			<job-tracker>${jobTracker}</job-tracker>
94
			<name-node>${nameNode}</name-node>
95
			<configuration>
96
				<property>
97
					<name>mapred.job.queue.name</name>
98
					<value>${queueName}</value>
99
				</property>
100
			</configuration>
101
			<!-- This is simple wrapper for the Java code -->
102
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
103
			<!-- The business Java code that gets to be executed -->
104
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.TestingConsumer</arg>
105
			<!-- Specification of the input ports -->
106
			<arg>-C{output,
107
				eu.dnetlib.iis.referenceextraction.researchinitiative.schemas.DocumentToConceptId,
108
				eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/data/output.json}</arg>
109
			<!-- All input and output ports have to be bound to paths in HDFS, working 
110
				directory has to be specified as well -->
111
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
112
			<arg>-Ioutput=${workingDir}/transformer_project_toconcept/output</arg>
113
		</java>
114
		<ok to="end" />
115
		<error to="fail" />
116
	</action>    
117
    <kill name="fail">
118
		<message>Unfortunately, the workflow failed -- error message:
119
			[${wf:errorMessage(wf:lastErrorNode())}]</message>
120
    </kill>
121
    <end name="end"/>
122
    
123
</workflow-app>
0 124

  
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/data/concept.json
1
{"id": "concept-id1", "label": "concept id1", "params": {"CD_PROJECT_NUMBER": "123456", "CD_FRAMEWORK": "FP7"}}
2
{"id": "concept-id2", "label": "concept id2", "params": {"CD_PROJECT_NUMBER": "654321", "CD_FRAMEWORK": "FP7"}}
3
{"id": "concept-id3", "label": "concept id3", "params": {"CD_PROJECT_NUMBER": "999999", "CD_FRAMEWORK": "FP7"}}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/data/output.json
1
{"documentId": "docId1", "conceptId": "concept-id1", "confidenceLevel": 0.5}
2
{"documentId": "docId1", "conceptId": "concept-id2", "confidenceLevel": 0.1}
3
{"documentId": "docId2", "conceptId": "concept-id3", "confidenceLevel": 0.3}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/data/document_to_project.json
1
{"documentId": "docId1", "projectId": "projId1", "confidenceLevel": 0.5}
2
{"documentId": "docId1", "projectId": "projId2", "confidenceLevel": 0.1}
3
{"documentId": "docId2", "projectId": "projId3", "confidenceLevel": 0.3}
4
{"documentId": "docId2", "projectId": "nonexistingProjId", "confidenceLevel": 0.1}
5
{"documentId": "docId2", "projectId": "noFetProjId", "confidenceLevel": 0.1}
6
{"documentId": "docId3", "projectId": "nonexistingProjId", "confidenceLevel": 0.1}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/data/project.json
1
{"id":"projId1","projectAcronym":"project 1","projectGrantId":"123456","fundingClass":"FP7"}
2
{"id":"projId2","projectAcronym":"project 2","projectGrantId":"654321","fundingClass":"FP7"}
3
{"id":"projId3","projectAcronym":"project 3","projectGrantId":"999999","fundingClass":"FP7"}
4
{"id":"noFetProjId","projectAcronym":"non fet project","projectGrantId":"000000","fundingClass":"FP7"}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/documentmetadata/sampledataproducer/oozie_app/import.txt
1
## This is a classpath-based import file (this header is required)
2
transformer_export_documentmetadata classpath eu/dnetlib/iis/transformers/export/documentmetadata/oozie_app
0 3

  
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/documentmetadata/sampledataproducer/oozie_app/workflow.xml
1
<workflow-app xmlns="uri:oozie:workflow:0.2" name="test-transformers_export_documentmetadata_sampledataproducer">
2
    <start to="producer"/>
3
    <action name="producer">
4
        <java>
5
            <job-tracker>${jobTracker}</job-tracker>
6
            <name-node>${nameNode}</name-node>
7
			<!-- The data generated by this node is deleted in this section -->
8
			<prepare>
9
				<delete path="${nameNode}${workingDir}/producer" />
10
				<mkdir path="${nameNode}${workingDir}/producer" />
11
			</prepare>
12
            <configuration>
13
                <property>
14
                    <name>mapred.job.queue.name</name>
15
                    <value>${queueName}</value>
16
                </property>
17
            </configuration>
18
            <!-- This is simple wrapper for the Java code -->
19
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
20
			<!-- The business Java code that gets to be executed -->
21
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg>
22
			<!-- Specification of the output ports -->
23
            <arg>-C{extracted_metadata,
24
				eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata,
25
				eu/dnetlib/iis/transformers/export/documentmetadata/sampledataproducer/data/extr_metadata.json}</arg>
26
			<!-- All input and output ports have to be bound to paths in HDFS, working 
27
				directory has to be specified as well -->
28
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
29
            <arg>-Oextracted_metadata=${workingDir}/producer/extr_metadata</arg>
30
        </java>
31
        <ok to="transformer_export_documentmetadata"/>
32
        <error to="fail"/>
33
    </action>
34
    <action name="transformer_export_documentmetadata">
35
        <sub-workflow>
36
            <app-path>${wf:appPath()}/transformer_export_documentmetadata</app-path>
37
            <configuration>
38
                <property>
39
                    <name>jobTracker</name>
40
                    <value>${jobTracker}</value>
41
                </property>
42
                <property>
43
                    <name>nameNode</name>
44
                    <value>${nameNode}</value>
45
                </property>
46
                <property>
47
                    <name>queueName</name>
48
                    <value>${queueName}</value>
49
                </property>
50
                <!-- Working directory of the subworkflow -->
51
                <property>
52
                    <name>workingDir</name>
53
                    <value>${workingDir}/transformer_export_documentmetadata/working_dir</value>
54
                </property>
55
                <!-- Input ports. -->
56
                <property>
57
                    <name>input_extracted_metadata</name>
58
                    <value>${workingDir}/producer/extr_metadata</value>
59
                </property>
60
                <!-- Output port bound to given path -->
61
                <property>
62
                    <name>output_metadata</name>
63
                    <value>${workingDir}/transformer_export_documentmetadata/output_metadata</value>
64
                </property>
65
            </configuration>
66
        </sub-workflow>
67
        <ok to="consumer"/>
68
        <error to="fail"/>
69
    </action>
70
    <action name="consumer">
71
		<java>
72
			<job-tracker>${jobTracker}</job-tracker>
73
			<name-node>${nameNode}</name-node>
74
			<configuration>
75
				<property>
76
					<name>mapred.job.queue.name</name>
77
					<value>${queueName}</value>
78
				</property>
79
			</configuration>
80
			<!-- This is simple wrapper for the Java code -->
81
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
82
			<!-- The business Java code that gets to be executed -->
83
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.TestingConsumer</arg>
84
			<!-- Specification of the input ports -->
85
			<arg>-C{output_metadata,
86
				eu.dnetlib.iis.export.schemas.DocumentMetadata,
87
				eu/dnetlib/iis/transformers/export/documentmetadata/sampledataproducer/data/output_metadata.json}</arg>
88
			<!-- All input and output ports have to be bound to paths in HDFS, working 
89
				directory has to be specified as well -->
90
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
91
			<arg>-Ioutput_metadata=${workingDir}/transformer_export_documentmetadata/output_metadata</arg>
92
		</java>
93
		<ok to="end" />
94
		<error to="fail" />
95
	</action>    
96
    <kill name="fail">
97
		<message>Unfortunately, the workflow failed -- error message:
98
			[${wf:errorMessage(wf:lastErrorNode())}]</message>
99
    </kill>
100
    <end name="end"/>
101
</workflow-app>
0 102

  
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/documentmetadata/sampledataproducer/data/output_metadata.json
1
{"id": "id-2", "affiliations": []}
2
{"id": "id-3", "affiliations": [{"organization": "some-org", "countryName": "pl", "countryCode": "PL", "address": null, "rawText": "some raw text"}]}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/documentmetadata/sampledataproducer/data/extr_metadata.json
1
{"publisher": "Tor Science Fiction", "affiliations": null, "authors": [{"authorFullName": "author-1", "affiliationPositions": null}, {"authorFullName": "author-2", "affiliationPositions": null}, {"authorFullName": "author-3", "affiliationPositions": null}], "language": "eng", "title": "Enders Game", "externalIdentifiers": null, "journal": "Journal-2", "id": "id-1", "pages": {"start": "123", "end": "128"}, "volume": null, "references": null, "year": null, "keywords": null, "issue": null, "abstract": null}
2
{"publisher": null, "affiliations": [], "authors": [{"authorFullName": "author-1", "affiliationPositions": null}, {"authorFullName": "author-2", "affiliationPositions": null}], "language": null, "title": null, "externalIdentifiers": {"id-1": "val-extr-1", "id-3": "val-extr-3"}, "journal": null, "id": "id-2", "pages": null, "volume": "124", "references": [{"position": 1, "basicMetadata": {"publisher": null, "title": "The Lord of the Rings", "url": null, "series": null, "authors": null, "volume": null, "edition": null, "source": null, "year": "2012", "issue": null, "pages": {"start": "1", "end": "236"}, "location": null}, "text": "J.R.R. Tolkien, The Lord of the Rings, 2012"}, {"position": 2, "basicMetadata": {"publisher": null, "title": "The Other Wind", "url": null, "series": null, "authors": null, "volume": "vol.23", "edition": null, "source": null, "year": "2003", "issue": null, "pages": null, "location": null}, "text": "Ursula K. Le Guin, The Other Wind, 2003"}], "year": 1970, "keywords": ["kwd_1", "kwd_3", "kwd_5"], "issue": null, "abstract": "The tales"}
3
{"publisher": "Harp3r T0rch", "affiliations": [{"organization": "some-org", "countryName": "pl", "countryCode": "PL", "address": null, "rawText": "some raw text"}], "authors": null, "language": "en", "title": "Small Gods", "externalIdentifiers": null, "journal": "Journal", "id": "id-3", "pages": null, "volume": "32", "references": [{"position": 1, "basicMetadata": {"publisher": null, "title": "The Lord of the Rings", "url": null, "series": null, "authors": null, "volume": null, "edition": null, "source": null, "year": "2012", "issue": null, "pages": {"start": "1", "end": "236"}, "location": null}, "text": "J.R.R. Tolkien, The Lord of the Rings, 2012"}], "year": null, "keywords": null, "issue": "4", "abstract": null}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/researchinitiatives/sampledataproducer/oozie_app/import.txt
1
## This is a classpath-based import file (this header is required)
2
transformer_export_researchinitiatives classpath eu/dnetlib/iis/transformers/export/researchinitiatives/oozie_app
0 3

  
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/researchinitiatives/sampledataproducer/oozie_app/workflow.xml
1
<workflow-app xmlns="uri:oozie:workflow:0.2" name="test-transformers_export_researchinitiatives_sampledataproducer">
2

  
3
    <start to="producer"/>
4

  
5
    <action name="producer">
6
        <java>
7
            <job-tracker>${jobTracker}</job-tracker>
8
            <name-node>${nameNode}</name-node>
9
			<!-- The data generated by this node is deleted in this section -->
10
			<prepare>
11
				<delete path="${nameNode}${workingDir}/producer" />
12
				<mkdir path="${nameNode}${workingDir}/producer" />
13
			</prepare>
14
            <configuration>
15
                <property>
16
                    <name>mapred.job.queue.name</name>
17
                    <value>${queueName}</value>
18
                </property>
19
            </configuration>
20
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
21
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg>
22
            <arg>-C{document_to_concept_id,
23
				eu.dnetlib.iis.referenceextraction.researchinitiative.schemas.DocumentToConceptId,
24
				eu/dnetlib/iis/transformers/export/researchinitiatives/sampledataproducer/data/document_to_concept_id.json}</arg>
25
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
26
            <arg>-Odocument_to_concept_id=${workingDir}/producer/output</arg>
27
        </java>
28
        <ok to="transformer_export_researchinitiatives"/>
29
        <error to="fail"/>
30
    </action>
31
    
32
    <action name="transformer_export_researchinitiatives">
33
        <sub-workflow>
34
            <app-path>${wf:appPath()}/transformer_export_researchinitiatives</app-path>
35
            <configuration>
36
                <property>
37
                    <name>jobTracker</name>
38
                    <value>${jobTracker}</value>
39
                </property>
40
                <property>
41
                    <name>nameNode</name>
42
                    <value>${nameNode}</value>
43
                </property>
44
                <property>
45
                    <name>queueName</name>
46
                    <value>${queueName}</value>
47
                </property>
48
                <property>
49
                    <name>workingDir</name>
50
                    <value>${workingDir}/transformer_export_researchinitiatives/working_dir</value>
51
                </property>
52
                <property>
53
                    <name>input_document_to_research_initiative</name>
54
                    <value>${workingDir}/producer/output</value>
55
                </property>
56
                <property>
57
                    <name>output_document_to_research_initiatives</name>
58
                    <value>${workingDir}/transformer_export_researchinitiatives/output</value>
59
                </property>
60
            </configuration>
61
        </sub-workflow>
62
        <ok to="consumer"/>
63
        <error to="fail"/>
64
    </action>
65

  
66
    <action name="consumer">
67
		<java>
68
			<job-tracker>${jobTracker}</job-tracker>
69
			<name-node>${nameNode}</name-node>
70
			<configuration>
71
				<property>
72
					<name>mapred.job.queue.name</name>
73
					<value>${queueName}</value>
74
				</property>
75
			</configuration>
76
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
77
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.TestingConsumer</arg>
78
			<arg>-C{output,
79
				eu.dnetlib.iis.export.schemas.DocumentToConceptIds,
80
				eu/dnetlib/iis/transformers/export/researchinitiatives/sampledataproducer/data/document_to_concept_ids.json}</arg>
81
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
82
			<arg>-Ioutput=${workingDir}/transformer_export_researchinitiatives/output</arg>
83
		</java>
84
		<ok to="end" />
85
		<error to="fail" />
86
	</action>    
87

  
88
    <kill name="fail">
89
		<message>Unfortunately, the workflow failed -- error message:
90
			[${wf:errorMessage(wf:lastErrorNode())}]</message>
91
    </kill>
92

  
93
    <end name="end"/>
94

  
95
</workflow-app>
0 96

  
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/researchinitiatives/sampledataproducer/data/document_to_concept_id.json
1
{"documentId": "docId1", "conceptId": "concept1", "confidenceLevel": 0.9}
2
{"documentId": "docId1", "conceptId": "concept1", "confidenceLevel": null}
3
{"documentId": "docId2", "conceptId": "concept1", "confidenceLevel": 0.9}
4
{"documentId": "docId2", "conceptId": "concept2", "confidenceLevel": 0.8}
5
{"documentId": "docId3", "conceptId": "concept3", "confidenceLevel": 0.7}
6
{"documentId": "docId3", "conceptId": "concept2", "confidenceLevel": null}
7
{"documentId": "docId3", "conceptId": "concept1", "confidenceLevel": 0.7}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/researchinitiatives/sampledataproducer/data/document_to_concept_ids.json
1
{"documentId": "docId1", "conceptIds": ["concept1"]}
2
{"documentId": "docId2", "conceptIds": ["concept1", "concept2"]}
3
{"documentId": "docId3", "conceptIds": ["concept1", "concept2", "concept3"]}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/researchinitiatives/sampledataproducer/data/output_metadata.json
1
{"id": "id-2", "affiliations": []}
2
{"id": "id-3", "affiliations": [{"organization": "some-org", "countryName": "pl", "countryCode": "PL", "address": null, "rawText": "some raw text"}]}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/researchinitiatives/sampledataproducer/data/extr_metadata.json
1
{"publisher": "Tor Science Fiction", "affiliations": null, "authors": [{"authorFullName": "author-1", "affiliationPositions": null}, {"authorFullName": "author-2", "affiliationPositions": null}, {"authorFullName": "author-3", "affiliationPositions": null}], "language": "eng", "title": "Enders Game", "externalIdentifiers": null, "journal": "Journal-2", "id": "id-1", "pages": {"start": "123", "end": "128"}, "volume": null, "references": null, "year": null, "keywords": null, "issue": null, "abstract": null}
2
{"publisher": null, "affiliations": [], "authors": [{"authorFullName": "author-1", "affiliationPositions": null}, {"authorFullName": "author-2", "affiliationPositions": null}], "language": null, "title": null, "externalIdentifiers": {"id-1": "val-extr-1", "id-3": "val-extr-3"}, "journal": null, "id": "id-2", "pages": null, "volume": "124", "references": [{"position": 1, "basicMetadata": {"publisher": null, "title": "The Lord of the Rings", "url": null, "series": null, "authors": null, "volume": null, "edition": null, "source": null, "year": "2012", "issue": null, "pages": {"start": "1", "end": "236"}, "location": null}, "text": "J.R.R. Tolkien, The Lord of the Rings, 2012"}, {"position": 2, "basicMetadata": {"publisher": null, "title": "The Other Wind", "url": null, "series": null, "authors": null, "volume": "vol.23", "edition": null, "source": null, "year": "2003", "issue": null, "pages": null, "location": null}, "text": "Ursula K. Le Guin, The Other Wind, 2003"}], "year": 1970, "keywords": ["kwd_1", "kwd_3", "kwd_5"], "issue": null, "abstract": "The tales"}
3
{"publisher": "Harp3r T0rch", "affiliations": [{"organization": "some-org", "countryName": "pl", "countryCode": "PL", "address": null, "rawText": "some raw text"}], "authors": null, "language": "en", "title": "Small Gods", "externalIdentifiers": null, "journal": "Journal", "id": "id-3", "pages": null, "volume": "32", "references": [{"position": 1, "basicMetadata": {"publisher": null, "title": "The Lord of the Rings", "url": null, "series": null, "authors": null, "volume": null, "edition": null, "source": null, "year": "2012", "issue": null, "pages": {"start": "1", "end": "236"}, "location": null}, "text": "J.R.R. Tolkien, The Lord of the Rings, 2012"}], "year": null, "keywords": null, "issue": "4", "abstract": null}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/sampledataproducer/oozie_app/import.txt
1
## This is a classpath-based import file (this header is required)
2
transformer_export_documenttodataset_without_imported_data classpath eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/oozie_app
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/sampledataproducer/oozie_app/workflow.xml
1
<workflow-app xmlns="uri:oozie:workflow:0.2" name="test-transformers_export_documenttodataset_without_imported_data_sampledataproducer">
2
    <start to="producer"/>
3
    <action name="producer">
4
        <java>
5
            <job-tracker>${jobTracker}</job-tracker>
6
            <name-node>${nameNode}</name-node>
7
			<!-- The data generated by this node is deleted in this section -->
8
			<prepare>
9
				<delete path="${nameNode}${workingDir}/producer" />
10
				<mkdir path="${nameNode}${workingDir}/producer" />
11
			</prepare>
12
            <configuration>
13
                <property>
14
                    <name>mapred.job.queue.name</name>
15
                    <value>${queueName}</value>
16
                </property>
17
            </configuration>
18
            <!-- This is simple wrapper for the Java code -->
19
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
20
			<!-- The business Java code that gets to be executed -->
21
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg>
22
			<!-- Specification of the output ports -->
23
            <arg>-C{document_to_dataset,
24
				eu.dnetlib.iis.referenceextraction.dataset.schemas.DocumentToDataSet,
25
				eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/sampledataproducer/data/document_to_dataset.json}</arg>
26
            <arg>-C{document_relation,
27
				eu.dnetlib.iis.importer.schemas.DocumentRelation,
28
				eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/sampledataproducer/data/document_relation.json}</arg>
29
                             
30
			<!-- All input and output ports have to be bound to paths in HDFS, working 
31
				directory has to be specified as well -->
32
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
33
            <arg>-Odocument_to_dataset=${workingDir}/producer/document_to_dataset</arg>
34
            <arg>-Odocument_relation=${workingDir}/producer/document_relation</arg>
35
        </java>
36
        <ok to="transformer_export_docdataset_without_imported"/>
37
        <error to="fail"/>
38
    </action>
39
    <action name="transformer_export_docdataset_without_imported">
40
        <sub-workflow>
41
            <app-path>${wf:appPath()}/transformer_export_documenttodataset_without_imported_data</app-path>
42
            <configuration>
43
                <property>
44
                    <name>jobTracker</name>
45
                    <value>${jobTracker}</value>
46
                </property>
47
                <property>
48
                    <name>nameNode</name>
49
                    <value>${nameNode}</value>
50
                </property>
51
                <property>
52
                    <name>queueName</name>
53
                    <value>${queueName}</value>
54
                </property>
55
                <!-- Working directory of the subworkflow -->
56
                <property>
57
                    <name>workingDir</name>
58
                    <value>${workingDir}/transformer_export_documenttodataset_without_imported_data/working_dir</value>
59
                </property>
60
                <!-- Input ports. -->
61
                <property>
62
                    <name>input_document_to_dataset</name>
63
                    <value>${workingDir}/producer/document_to_dataset</value>
64
                </property>
65
                <property>
66
                    <name>input_document_relation</name>
67
                    <value>${workingDir}/producer/document_relation</value>
68
                </property>
69
                <!-- Output port bound to given path -->
70
                <property>
71
                    <name>output_document_to_dataset</name>
72
                    <value>${workingDir}/transformer_export_documenttodataset_without_imported_data/document_to_dataset</value>
73
                </property>
74
            </configuration>
75
        </sub-workflow>
76
        <ok to="consumer"/>
77
        <error to="fail"/>
78
    </action>
79
    <action name="consumer">
80
		<java>
81
			<job-tracker>${jobTracker}</job-tracker>
82
			<name-node>${nameNode}</name-node>
83
			<configuration>
84
				<property>
85
					<name>mapred.job.queue.name</name>
86
					<value>${queueName}</value>
87
				</property>
88
			</configuration>
89
			<!-- This is simple wrapper for the Java code -->
90
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
91
			<!-- The business Java code that gets to be executed -->
92
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.TestingConsumer</arg>
93
			<!-- Specification of the input ports -->
94
			<arg>-C{document_to_dataset,
95
				eu.dnetlib.iis.referenceextraction.dataset.schemas.DocumentToDataSet,
96
				eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/sampledataproducer/data/output_document_to_dataset.json}</arg>
97
			<!-- All input and output ports have to be bound to paths in HDFS, working 
98
				directory has to be specified as well -->
99
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
100
			<arg>-Idocument_to_dataset=${workingDir}/transformer_export_documenttodataset_without_imported_data/document_to_dataset</arg>
101
		</java>
102
		<ok to="end" />
103
		<error to="fail" />
104
	</action>    
105
    <kill name="fail">
106
		<message>Unfortunately, the workflow failed -- error message:
107
			[${wf:errorMessage(wf:lastErrorNode())}]</message>
108
    </kill>
109
    <end name="end"/>
110
</workflow-app>
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/sampledataproducer/data/document_relation.json
1
{"id": "id-1", "referencedIds": []}
2
{"id": "id-3", "referencedIds": ["refid-4", "refid-2"]}
3
{"id": "id-2", "referencedIds": ["refid-1", "refid-4", "refid-2", "refid-48", "refid-32"]}
4
{"id": "id-4", "referencedIds": []}
5
{"id": "id-5", "referencedIds": ["refid-4"]}
6
{"id": "id-6", "referencedIds": ["refid-4", "refid-48", "refid-32"]}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/sampledataproducer/data/document_to_dataset.json
1
{"documentId": "id-1", "datasetId": "8095", "confidenceLevel": null}
2
{"documentId": "id-2", "datasetId": "refid-48", "confidenceLevel": 1}
3
{"documentId": "id-2", "datasetId": "refid-14", "confidenceLevel": null}
4
{"documentId": "id-8", "datasetId": "0820", "confidenceLevel": null}
5
{"documentId": "id-1", "datasetId": "refid-32", "confidenceLevel": 1.1}
6
{"documentId": "id-2", "datasetId": "refid-32", "confidenceLevel": null}
7
{"documentId": "id-5", "datasetId": "refid-4", "confidenceLevel": 2}
8
{"documentId": "id-8", "datasetId": "5103", "confidenceLevel": 2}
9
{"documentId": "id-1", "datasetId": "5103", "confidenceLevel": null}
10
{"documentId": "id-2", "datasetId": "refid-148", "confidenceLevel": 0.9}
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/sampledataproducer/data/output_document_to_dataset.json
1
{"documentId": "id-1", "datasetId": "8095", "confidenceLevel": null}
2
{"documentId": "id-2", "datasetId": "refid-14", "confidenceLevel": null}
3
{"documentId": "id-8", "datasetId": "0820", "confidenceLevel": null}
4
{"documentId": "id-1", "datasetId": "refid-32", "confidenceLevel": 1.1}
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff