Project

General

Profile

« Previous | Next » 

Revision 33553

Added by Marek Horst over 9 years ago

[maven-release-plugin] copy for tag icm-iis-collapsers-1.0.0

View differences:

modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/deploy.info
1
[
2
{
3
  "type_source": "SVN", 
4
  "goal": "package -U -T 4C source:jar", 
5
  "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet40/modules/icm-iis-collapsers/trunk/", 
6
  "deploy_repository": "dnet4-snapshots", 
7
  "version": "4",
8
  "mail": "m.horst@icm.edu.pl",
9
  "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet4-snapshots", 
10
  "name": "icm-iis-collapsers"
11
},
12
{
13
  "type_source": "SVN",
14
  "goal": "clean verify -U -e -X",
15
  "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet40/modules/icm-iis-collapsers/trunk/",
16
  "nightly" : "true",
17
  "cron" : "H H * * *",
18
  "version": "4",
19
  "mail": "d.tkaczyk@icm.edu.pl,m.horst@icm.edu.pl",
20
  "name": "icm-iis-collapsers-embedded-integration-test"
21
}
22
]
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/basic/BestFilledCollapserTest.java
1
package eu.dnetlib.iis.collapsers.basic;
2

  
3
import eu.dnetlib.iis.collapsers.SampleData;
4
import eu.dnetlib.iis.importer.schemas.DocumentMetadata;
5
import java.util.ArrayList;
6
import java.util.List;
7
import static org.junit.Assert.assertNull;
8
import org.junit.Test;
9
import org.python.google.common.collect.Lists;
10

  
11
/**
12
 * 
13
 * @author Dominika Tkaczyk
14
 *
15
 */
16
public class BestFilledCollapserTest {
17

  
18
    public static final List<DocumentMetadata> emptyList = 
19
            new ArrayList<DocumentMetadata>();
20
    
21
    public static final List<DocumentMetadata> list1 = 
22
            Lists.newArrayList(SampleData.metadataRecord11);
23
    
24
    public static final List<DocumentMetadata> list2 = 
25
            Lists.newArrayList(SampleData.metadataRecord12);
26

  
27
    public static final List<DocumentMetadata> list123 =
28
            Lists.newArrayList(SampleData.metadataRecord11, SampleData.metadataRecord12, SampleData.metadataRecord13);
29

  
30
    public static final List<DocumentMetadata> list321 =
31
            Lists.newArrayList(SampleData.metadataRecord13, SampleData.metadataRecord12, SampleData.metadataRecord11);
32
    
33
    
34
    @Test
35
	public void testBestFilledEmpty() throws Exception {
36
        BestFilledCollapser<DocumentMetadata> collapser = new BestFilledCollapser<DocumentMetadata>();
37
        
38
        assertNull(collapser.collapse(null));
39
        assertNull(collapser.collapse(emptyList));
40
    }
41
    
42
    @Test
43
	public void testBestFilledDefaultFieldSet() throws Exception {
44
        BestFilledCollapser<DocumentMetadata> collapser = new BestFilledCollapser<DocumentMetadata>();
45
        
46
        SampleData.assertEqualRecords(
47
                list1,
48
                collapser.collapse(list1));
49
        SampleData.assertEqualRecords(
50
                list1,
51
                collapser.collapse(list123));
52
        SampleData.assertEqualRecords(
53
                list1, 
54
                collapser.collapse(list321));
55
    }
56

  
57
    @Test
58
	public void testBestFilled() throws Exception {
59
        BestFilledCollapser<DocumentMetadata> collapser = new BestFilledCollapser<DocumentMetadata>();
60
        collapser.setFields(SampleData.significantFields);
61
        
62
        SampleData.assertEqualRecords(
63
                list1,
64
                collapser.collapse(list1));
65
        SampleData.assertEqualRecords(
66
                list2, 
67
                collapser.collapse(list123));
68
        SampleData.assertEqualRecords(
69
                list2, 
70
                collapser.collapse(list321));
71
    }
72

  
73
}
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/basic/DocumentTextCollapserTest.java
1
package eu.dnetlib.iis.collapsers.basic;
2

  
3
import eu.dnetlib.iis.collapsers.SampleData;
4
import eu.dnetlib.iis.metadataextraction.schemas.DocumentText;
5
import java.util.ArrayList;
6
import java.util.List;
7
import static org.junit.Assert.assertNull;
8
import org.junit.Test;
9
import org.python.google.common.collect.Lists;
10

  
11
/**
12
 * 
13
 * @author Dominika Tkaczyk
14
 *
15
 */
16
public class DocumentTextCollapserTest {
17

  
18
    public static final List<DocumentText> oneElementList = 
19
            Lists.newArrayList(
20
                DocumentText.newBuilder().setId("id").setText("This is text").build()
21
            );
22
    
23
    public static final List<DocumentText> list =
24
            Lists.newArrayList(
25
                DocumentText.newBuilder().setId("id").setText("This is text").build(),
26
                DocumentText.newBuilder().setId("id").setText("This is another text").build(),
27
                DocumentText.newBuilder().setId("id").setText("This is a duplicated text").build()
28
            );
29

  
30
    public static final List<DocumentText> collapsedList =
31
            Lists.newArrayList(
32
                DocumentText.newBuilder().setId("id").setText("This is text\n\nThis is another text\n\nThis is a duplicated text").build()
33
            );
34
  
35
    
36
    @Test
37
	public void testDocumentTextCollapserEmpty() throws Exception {
38
        DocumentTextCollapser collapser = new DocumentTextCollapser();
39
   
40
        assertNull(collapser.collapse(null));
41
        assertNull(collapser.collapse(new ArrayList<DocumentText>()));
42
    }
43
    
44
    @Test
45
	public void testDocumentTextCollapser() throws Exception {
46
        DocumentTextCollapser collapser = new DocumentTextCollapser();
47
        
48
        SampleData.assertEqualRecords(
49
                oneElementList,
50
                collapser.collapse(oneElementList));
51
        SampleData.assertEqualRecords(
52
                collapsedList,
53
                collapser.collapse(list));
54
    }
55

  
56
}
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/basic/WorkflowTest.java
1
package eu.dnetlib.iis.collapsers.basic;
2

  
3
import eu.dnetlib.iis.IntegrationTest;
4
import eu.dnetlib.iis.core.AbstractWorkflowTestCase;
5
import org.junit.Test;
6
import org.junit.experimental.categories.Category;
7

  
8
/**
9
 * @author Dominika Tkaczyk
10
 * @author Michal Oniszczuk
11
 */
12
@Category(IntegrationTest.class)
13
public class WorkflowTest extends AbstractWorkflowTestCase {
14

  
15
    @Test
16
    public void testDefaultWorkflow() throws Exception {
17
        runWorkflow("eu/dnetlib/iis/collapsers/basic_collapser/default/oozie_app");
18
    }
19

  
20
    /*
21
    @Test
22
    public void testCitationWorkflow() throws Exception {
23
        runWorkflow("eu/dnetlib/iis/collapsers/collapser/citation/oozie_app");
24
    }
25
*/
26
    @Test
27
    public void testDocumentTextWorkflow() throws Exception {
28
        runWorkflow("eu/dnetlib/iis/collapsers/basic_collapser/documenttext/oozie_app");
29
    }
30

  
31
}
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/basic/BestFilledMergingCollapserTest.java
1
package eu.dnetlib.iis.collapsers.basic;
2

  
3
import eu.dnetlib.iis.collapsers.SampleData;
4
import eu.dnetlib.iis.importer.schemas.DocumentMetadata;
5
import java.util.ArrayList;
6
import java.util.List;
7
import static org.junit.Assert.assertNull;
8
import org.junit.Test;
9
import org.python.google.common.collect.Lists;
10

  
11
/**
12
 * 
13
 * @author Dominika Tkaczyk
14
 *
15
 */
16
public class BestFilledMergingCollapserTest {
17

  
18
    public static final List<DocumentMetadata> emptyList = 
19
            new ArrayList<DocumentMetadata>();
20
    
21
    public static final List<DocumentMetadata> list1 = 
22
            Lists.newArrayList(SampleData.metadataRecord11);
23
    
24
    public static final List<DocumentMetadata> mergedList12 = 
25
            Lists.newArrayList(SampleData.mergedRecord1112);
26

  
27
    public static final List<DocumentMetadata> mergedList21 = 
28
            Lists.newArrayList(SampleData.mergedRecord1211);
29
    
30
    public static final List<DocumentMetadata> list123 =
31
            Lists.newArrayList(SampleData.metadataRecord11, SampleData.metadataRecord12, SampleData.metadataRecord13);
32

  
33
    public static final List<DocumentMetadata> list321 =
34
            Lists.newArrayList(SampleData.metadataRecord13, SampleData.metadataRecord12, SampleData.metadataRecord11);
35
    
36
    
37
    @Test
38
	public void testBestFilledEmpty() throws Exception {
39
        BestFilledMergingCollapser<DocumentMetadata> collapser = new BestFilledMergingCollapser<DocumentMetadata>();
40
        
41
        assertNull(collapser.collapse(null));
42
        assertNull(collapser.collapse(emptyList));
43
    }
44
    
45
    @Test
46
	public void testBestFilledMergingDefaultFieldSet() throws Exception {
47
        BestFilledMergingCollapser<DocumentMetadata> collapser = new BestFilledMergingCollapser<DocumentMetadata>();
48
                
49
        SampleData.assertEqualRecords(
50
                list1,
51
                collapser.collapse(list1));
52
        SampleData.assertEqualRecords(
53
                mergedList12,
54
                collapser.collapse(list321));
55
    }
56
    
57
    @Test
58
	public void testBestFilledMerging() throws Exception {
59
        BestFilledMergingCollapser<DocumentMetadata> collapser = new BestFilledMergingCollapser<DocumentMetadata>();
60
        collapser.setFields(SampleData.significantFields);
61
        
62
        SampleData.assertEqualRecords(
63
                list1,
64
                collapser.collapse(list1));
65
        SampleData.assertEqualRecords(
66
                mergedList21,
67
                collapser.collapse(list321));
68
    }
69
   
70
}
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/CollapserUtilsTest.java
1
package eu.dnetlib.iis.collapsers;
2

  
3
import java.util.ArrayList;
4
import java.util.List;
5
import org.apache.avro.generic.IndexedRecord;
6
import static org.junit.Assert.*;
7
import org.junit.Test;
8
import org.python.google.common.collect.Lists;
9

  
10
/**
11
 * 
12
 * @author Dominika Tkaczyk
13
 *
14
 */
15
public class CollapserUtilsTest {
16

  
17
    @Test
18
    public void testHaveEqualSchema() {
19
        assertTrue(CollapserUtils.haveEqualSchema(null));
20
        assertTrue(CollapserUtils.haveEqualSchema(new ArrayList<IndexedRecord>()));
21
        assertTrue(CollapserUtils.haveEqualSchema(
22
                Lists.newArrayList((IndexedRecord)SampleData.envMetadataRecord11, SampleData.envMetadataRecord12)));
23
        assertFalse(CollapserUtils.haveEqualSchema(
24
                Lists.newArrayList((IndexedRecord)SampleData.envMetadataRecord11, SampleData.envMetadataRecord12, SampleData.envTextRecord)));
25
    }
26
    
27
    @Test
28
    public void testIsOriginSchema() {
29
        assertTrue(CollapserUtils.isEnvelopeSchema(SampleData.envMetadataRecord11.getSchema()));
30
        assertFalse(CollapserUtils.isEnvelopeSchema(SampleData.textRecord.getSchema()));
31
    }
32
    
33
    @Test
34
    public void testGetOriginValue() {
35
        assertEquals("origin1", CollapserUtils.getOriginValue(SampleData.envMetadataRecord11));
36
        assertEquals("origin1", CollapserUtils.getOriginValue(SampleData.envTextRecord));
37
    }
38
    
39
    @Test
40
    public void testGetDataRecord() {
41
        SampleData.assertEqualRecords(
42
                SampleData.metadataRecord11, 
43
                CollapserUtils.getDataRecord(SampleData.envMetadataRecord11));
44
        SampleData.assertEqualRecords(
45
                SampleData.textRecord, 
46
                CollapserUtils.getDataRecord(SampleData.envTextRecord));
47
    }
48
        
49
    @Test
50
    public void testGetNumberOfFilledFields() {
51
        assertEquals(8, CollapserUtils.getNumberOfFilledFields(SampleData.metadataRecord11, null));
52
        assertEquals(2, CollapserUtils.getNumberOfFilledFields(SampleData.metadataRecord11, SampleData.significantFields));
53
    }
54
    
55
    @Test
56
    public void testSortByFilledFields() {
57
        List<IndexedRecord> empty = new ArrayList<IndexedRecord>();
58
        CollapserUtils.sortByFilledDataFields(empty, SampleData.significantFields);
59
        assertTrue(empty.isEmpty());
60
        
61
        List<IndexedRecord> oneElement = Lists.newArrayList((IndexedRecord)SampleData.metadataRecord13);
62
        CollapserUtils.sortByFilledDataFields(oneElement, SampleData.significantFields);
63
        assertEquals(Lists.newArrayList(SampleData.metadataRecord13),
64
                oneElement);
65
        
66
        List<IndexedRecord> list = Lists.newArrayList(
67
                (IndexedRecord)SampleData.metadataRecord11, SampleData.metadataRecord12, SampleData.metadataRecord13);
68
        CollapserUtils.sortByFilledDataFields(list, SampleData.significantFields);
69
        assertEquals(Lists.newArrayList(SampleData.metadataRecord12, SampleData.metadataRecord11, SampleData.metadataRecord13),
70
                list);
71
       
72
        CollapserUtils.sortByFilledDataFields(list, null);
73
        assertEquals(Lists.newArrayList(SampleData.metadataRecord11, SampleData.metadataRecord12, SampleData.metadataRecord13),
74
                list);
75
    }
76
    
77
    @Test
78
    public void testMerge() {
79
        SampleData.assertEqualRecords(
80
                SampleData.mergedRecord1112,
81
                CollapserUtils.merge(SampleData.metadataRecord11, SampleData.metadataRecord12));
82
        
83
        SampleData.assertEqualRecords(
84
                SampleData.mergedRecord2221,
85
                CollapserUtils.merge(SampleData.metadataRecord22, SampleData.metadataRecord21));
86
    }
87
    
88
    @Test
89
    public void testGetNestedFieldValue() {
90
        assertNull(CollapserUtils.getNestedFieldValue(null, null));
91
        assertNull(CollapserUtils.getNestedFieldValue(SampleData.metadataRecord11, null));
92
        assertNull(CollapserUtils.getNestedFieldValue(null, "notnull"));
93
        
94
        assertNull(CollapserUtils.getNestedFieldValue(SampleData.metadataRecord11, "field"));
95
        assertNull(CollapserUtils.getNestedFieldValue(SampleData.metadataRecord11, "id.field"));
96
        
97
        assertEquals("id-1", CollapserUtils.getNestedFieldValue(SampleData.metadataRecord11, "id"));
98
        assertEquals(1990, CollapserUtils.getNestedFieldValue(SampleData.metadataRecord11, "year"));
99
        assertEquals(true, CollapserUtils.getNestedFieldValue(SampleData.metadataRecord11, "publicationType.article"));
100
        
101
        assertEquals("id-1", CollapserUtils.getNestedFieldValue(SampleData.envMetadataRecord11, "data.id"));
102
        assertEquals(1990, CollapserUtils.getNestedFieldValue(SampleData.envMetadataRecord11, "data.year"));
103
        assertEquals(true, CollapserUtils.getNestedFieldValue(SampleData.envMetadataRecord11, "data.publicationType.article"));
104
    }
105
    
106
}
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/union/WorkflowTest.java
1
package eu.dnetlib.iis.collapsers.union;
2

  
3
import eu.dnetlib.iis.IntegrationTest;
4
import eu.dnetlib.iis.core.AbstractWorkflowTestCase;
5
import org.junit.Test;
6
import org.junit.experimental.categories.Category;
7

  
8
/**
9
 * 
10
 * @author Dominika Tkaczyk
11
 *
12
 */
13
@Category(IntegrationTest.class)
14
public class WorkflowTest extends AbstractWorkflowTestCase {
15

  
16
    @Test
17
	public void testWorkflow2Inputs() throws Exception {
18
    	runWorkflow("eu/dnetlib/iis/collapsers/union/input_2/oozie_app");
19
    }
20

  
21
    @Test
22
	public void testWorkflow3Inputs() throws Exception {
23
    	runWorkflow("eu/dnetlib/iis/collapsers/union/input_3/oozie_app");
24
    }
25
    
26
}
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/multiple_input/WorkflowTest.java
1
package eu.dnetlib.iis.collapsers.multiple_input;
2

  
3
import eu.dnetlib.iis.IntegrationTest;
4
import eu.dnetlib.iis.core.AbstractWorkflowTestCase;
5
import eu.dnetlib.iis.core.WorkflowConfiguration;
6
import org.junit.Test;
7
import org.junit.experimental.categories.Category;
8

  
9
/**
10
 * 
11
 * @author Dominika Tkaczyk
12
 *
13
 */
14
@Category(IntegrationTest.class)
15
public class WorkflowTest extends AbstractWorkflowTestCase {
16

  
17
    @Test
18
	public void testDefaultWorkflow() throws Exception {
19
        WorkflowConfiguration wc = new WorkflowConfiguration();
20
        wc.setTimeoutInSeconds(720);
21
    	runWorkflow("eu/dnetlib/iis/collapsers/multiple_input_collapser/default/oozie_app", wc);
22
    }
23
   
24
    @Test
25
	public void testDocumentTextWorkflow() throws Exception {
26
        WorkflowConfiguration wc = new WorkflowConfiguration();
27
        wc.setTimeoutInSeconds(720);
28
    	runWorkflow("eu/dnetlib/iis/collapsers/multiple_input_collapser/documenttext/oozie_app", wc);
29
    }
30

  
31
}
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/origins/PMCCitationCollapserTest.java
1
package eu.dnetlib.iis.collapsers.origins;
2

  
3
import static org.junit.Assert.assertEquals;
4
import static org.junit.Assert.assertNotNull;
5

  
6
import java.util.Arrays;
7
import java.util.HashMap;
8
import java.util.List;
9
import java.util.Map;
10

  
11
import org.junit.Test;
12

  
13
import eu.dnetlib.iis.common.citations.schemas.Citation;
14
import eu.dnetlib.iis.common.citations.schemas.CitationEntry;
15

  
16
/**
17
 * {@link PMCCitationCollapser} test class.
18
 * @author mhorst
19
 *
20
 */
21
public class PMCCitationCollapserTest {
22

  
23
	@Test
24
	public void testCollapsingWhenPmcTargetIdSet() throws Exception {
25
		PMCCitationCollapser collapser = new PMCCitationCollapser();
26
		Map<String,List<Citation>> objects = new HashMap<String, List<Citation>>();
27
		
28
		String sourceId = "sourceId";
29
		String pmcTargetId = "pmcTargetId";
30
		String cermineTargetId = "cermineTargetId";
31
		String text = "citation text";
32
		
33
		objects.put("ingested", Arrays.asList(new Citation[] {
34
				Citation
35
						.newBuilder()
36
						.setSourceDocumentId(sourceId)
37
						.setEntry(
38
								CitationEntry.newBuilder()
39
										.setConfidenceLevel(1f)
40
										.setDestinationDocumentId(pmcTargetId)
41
										.setExternalDestinationDocumentIds(new HashMap<CharSequence, CharSequence>())
42
										.setRawText(text).build()).build()		
43
		}));
44
		objects.put("matched", Arrays.asList(new Citation[] {
45
				Citation
46
						.newBuilder()
47
						.setSourceDocumentId(sourceId)
48
						.setEntry(
49
								CitationEntry
50
										.newBuilder()
51
										.setConfidenceLevel(0.1f)
52
										.setDestinationDocumentId(cermineTargetId)
53
										.setExternalDestinationDocumentIds(new HashMap<CharSequence, CharSequence>())
54
										.setRawText(text).build()).build()
55
		}));
56
		
57
		List<Citation> results = collapser.collapseBetweenOrigins(objects);
58
		assertNotNull(results);
59
		assertEquals(1, results.size());
60
		assertEquals(pmcTargetId,results.get(0).getEntry().getDestinationDocumentId());
61
		assertEquals(new Float(1f),results.get(0).getEntry().getConfidenceLevel());
62
	}
63
	
64
	@Test
65
	public void testCollapsingWhenPmcTargetIdNotSet() throws Exception {
66
		PMCCitationCollapser collapser = new PMCCitationCollapser();
67
		Map<String,List<Citation>> objects = new HashMap<String, List<Citation>>();
68
		
69
		String sourceId = "sourceId";
70
		String cermineTargetId = "cermineTargetId";
71
		String text = "citation text";
72
		
73
		objects.put("ingested", Arrays.asList(new Citation[] {
74
				Citation
75
						.newBuilder()
76
						.setSourceDocumentId(sourceId)
77
						.setEntry(
78
								CitationEntry.newBuilder()
79
										.setExternalDestinationDocumentIds(new HashMap<CharSequence, CharSequence>())
80
										.setRawText(text).build()).build()		
81
		}));
82
		objects.put("matched", Arrays.asList(new Citation[] {
83
				Citation
84
						.newBuilder()
85
						.setSourceDocumentId(sourceId)
86
						.setEntry(
87
								CitationEntry
88
										.newBuilder()
89
										.setConfidenceLevel(0.1f)
90
										.setDestinationDocumentId(cermineTargetId)
91
										.setExternalDestinationDocumentIds(new HashMap<CharSequence, CharSequence>())
92
										.setRawText(text).build()).build()
93
		}));
94
		
95
		List<Citation> results = collapser.collapseBetweenOrigins(objects);
96
		assertNotNull(results);
97
		assertEquals(1, results.size());
98
		assertEquals(cermineTargetId,results.get(0).getEntry().getDestinationDocumentId());
99
		assertEquals(new Float(0.1f),results.get(0).getEntry().getConfidenceLevel());
100
	}
101
	
102
	@Test
103
	public void testCollapsingWithDifferentText() throws Exception {
104
		PMCCitationCollapser collapser = new PMCCitationCollapser();
105
		Map<String,List<Citation>> objects = new HashMap<String, List<Citation>>();
106
		
107
		String sourceId = "sourceId";
108
		String pmcTargetId = "pmcTargetId";
109
		String cermineTargetId = "cermineTargetId";
110
		String textPmc = "pmc citation text";
111
		String textCermine = "cermine citation text";
112
		
113
		objects.put("ingested", Arrays.asList(new Citation[] {
114
				Citation
115
						.newBuilder()
116
						.setSourceDocumentId(sourceId)
117
						.setEntry(
118
								CitationEntry.newBuilder()
119
										.setConfidenceLevel(1f)
120
										.setDestinationDocumentId(pmcTargetId)
121
										.setExternalDestinationDocumentIds(new HashMap<CharSequence, CharSequence>())
122
										.setRawText(textPmc).build()).build()		
123
		}));
124
		objects.put("matched", Arrays.asList(new Citation[] {
125
				Citation
126
						.newBuilder()
127
						.setSourceDocumentId(sourceId)
128
						.setEntry(
129
								CitationEntry
130
										.newBuilder()
131
										.setConfidenceLevel(0.1f)
132
										.setDestinationDocumentId(cermineTargetId)
133
										.setExternalDestinationDocumentIds(new HashMap<CharSequence, CharSequence>())
134
										.setRawText(textCermine).build()).build()
135
		}));
136
		
137
		List<Citation> results = collapser.collapseBetweenOrigins(objects);
138
		assertNotNull(results);
139
		assertEquals(2, results.size());
140
		assertEquals(pmcTargetId,results.get(0).getEntry().getDestinationDocumentId());
141
		assertEquals(new Float(1f),results.get(0).getEntry().getConfidenceLevel());
142
		assertEquals(textPmc,results.get(0).getEntry().getRawText());
143
		
144
		assertEquals(cermineTargetId,results.get(1).getEntry().getDestinationDocumentId());
145
		assertEquals(new Float(0.1f),results.get(1).getEntry().getConfidenceLevel());
146
		assertEquals(textCermine,results.get(1).getEntry().getRawText());
147
	}
148
}
0 149

  
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/origins/OriginConfidenceMergingCollapserTest.java
1
package eu.dnetlib.iis.collapsers.origins;
2

  
3
import com.google.common.collect.Lists;
4
import eu.dnetlib.iis.collapsers.SampleData;
5
import eu.dnetlib.iis.collapsers.schemas.DocumentMetadataEnvelope;
6
import eu.dnetlib.iis.importer.schemas.DocumentMetadata;
7
import java.util.ArrayList;
8
import java.util.List;
9
import static org.junit.Assert.assertNull;
10
import org.junit.Test;
11

  
12
/**
13
 * 
14
 * @author Dominika Tkaczyk
15
 *
16
 */
17
public class OriginConfidenceMergingCollapserTest {
18

  
19
    public static final List<DocumentMetadataEnvelope> emptyList = new ArrayList<DocumentMetadataEnvelope>();
20
  
21
    public static final List<DocumentMetadataEnvelope> oneElementList = 
22
            Lists.newArrayList(SampleData.envMetadataRecord11);
23
    
24
    public static final List<DocumentMetadata> mergedOneElementList = 
25
            Lists.newArrayList(SampleData.metadataRecord11);
26
    
27
    public static final List<DocumentMetadataEnvelope> list = 
28
            Lists.newArrayList(SampleData.envMetadataRecord11, SampleData.envMetadataRecord21);
29
    
30
    public static final List<DocumentMetadata> mergedList = 
31
            Lists.newArrayList(SampleData.mergedRecord1121);
32
    
33
    
34
    @Test
35
	public void testOriginConfidenceMerging() throws Exception {
36
        OriginConfidenceMergingCollapser<DocumentMetadataEnvelope, DocumentMetadata> collapser = 
37
                new OriginConfidenceMergingCollapser<DocumentMetadataEnvelope, DocumentMetadata>();
38
        collapser.setOrigins(SampleData.origins);
39

  
40
        assertNull(collapser.collapse(null));
41
        assertNull(collapser.collapse(emptyList));
42
        SampleData.assertEqualRecords(
43
                mergedOneElementList,
44
                collapser.collapse(oneElementList));
45
        SampleData.assertEqualRecords(
46
                mergedList,
47
                collapser.collapse(list));
48
    }
49
    
50
}
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/origins/DocumentTextCollapserTest.java
1
package eu.dnetlib.iis.collapsers.origins;
2

  
3
import eu.dnetlib.iis.collapsers.SampleData;
4
import eu.dnetlib.iis.collapsers.schemas.DocumentTextEnvelope;
5
import eu.dnetlib.iis.metadataextraction.schemas.DocumentText;
6
import java.util.ArrayList;
7
import java.util.List;
8
import static org.junit.Assert.assertNull;
9
import org.junit.Test;
10
import org.python.google.common.collect.Lists;
11

  
12
/**
13
 * 
14
 * @author Dominika Tkaczyk
15
 *
16
 */
17
public class DocumentTextCollapserTest {
18

  
19
    public static final List<String> origins = Lists.newArrayList("origin1", "origin2");
20
    
21
    public static final DocumentTextEnvelope record1 = DocumentTextEnvelope.newBuilder()
22
            .setOrigin("origin1")
23
            .setData(DocumentText.newBuilder().setId("id").setText("This is text").build())
24
            .build();
25
    
26
    public static final DocumentTextEnvelope record2 = DocumentTextEnvelope.newBuilder()
27
            .setOrigin("origin2")
28
            .setData(DocumentText.newBuilder().setId("id").setText("This is text").build())
29
            .build();
30
    
31
    public static final DocumentTextEnvelope record3 = DocumentTextEnvelope.newBuilder()
32
            .setOrigin("origin2")
33
            .setData(DocumentText.newBuilder().setId("id").setText("This is another text").build())
34
            .build();
35
    
36
    public static final DocumentTextEnvelope record4 = DocumentTextEnvelope.newBuilder()
37
            .setOrigin("origin2")
38
            .setData(DocumentText.newBuilder().setId("id").setText("This is a duplicated text").build())
39
            .build();
40

  
41
    
42
    public static final DocumentText collapsed1 = DocumentText.newBuilder()
43
            .setId("id")
44
            .setText("This is text").build();
45
    
46
    public static final DocumentText collapsed2 = DocumentText.newBuilder()
47
            .setId("id")
48
            .setText("This is text\n\nThis is text\n\nThis is another text\n\nThis is a duplicated text").build();
49
    
50
    
51
    public static final List<DocumentTextEnvelope> oneElementList = 
52
            Lists.newArrayList(record1);
53

  
54
    public static final List<DocumentText> collapsedOneElementList = 
55
            Lists.newArrayList(collapsed1);
56
    
57
    public static final List<DocumentTextEnvelope> list =
58
            Lists.newArrayList(record1, record2, record3, record4);
59
    
60
    public static final List<DocumentText> collapsedList =
61
            Lists.newArrayList(collapsed2);
62
    
63
    
64
    @Test
65
	public void testDocumentTextCollapserEmpty() throws Exception {
66
        DocumentTextCollapser collapser = new DocumentTextCollapser();
67
        collapser.setOrigins(origins);
68
        
69
        assertNull(collapser.collapse(null));
70
        assertNull(collapser.collapse(new ArrayList<DocumentTextEnvelope>()));
71
    }
72
    
73
    @Test
74
	public void testDocumentTextCollapser() throws Exception {
75
        DocumentTextCollapser collapser = new DocumentTextCollapser();
76
        collapser.setOrigins(origins);
77
        
78
        SampleData.assertEqualRecords(
79
                collapsedOneElementList,
80
                collapser.collapse(oneElementList));
81
        SampleData.assertEqualRecords(
82
                collapsedList,
83
                collapser.collapse(list));
84
    }
85

  
86
}
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/origins/OriginConfidenceCollapserTest.java
1
package eu.dnetlib.iis.collapsers.origins;
2

  
3
import eu.dnetlib.iis.collapsers.SampleData;
4
import eu.dnetlib.iis.collapsers.schemas.DocumentMetadataEnvelope;
5
import eu.dnetlib.iis.importer.schemas.DocumentMetadata;
6
import java.util.ArrayList;
7
import java.util.List;
8
import static org.junit.Assert.assertNull;
9
import org.junit.Test;
10
import org.python.google.common.collect.Lists;
11

  
12
/**
13
 * 
14
 * @author Dominika Tkaczyk
15
 *
16
 */
17
public class OriginConfidenceCollapserTest {
18
    
19
    public static final List<DocumentMetadataEnvelope> emptyList = new ArrayList<DocumentMetadataEnvelope>();
20
  
21
    public static final List<DocumentMetadataEnvelope> oneElementList = 
22
            Lists.newArrayList(SampleData.envMetadataRecord11);
23
    
24
    public static final List<DocumentMetadata> mergedOneElementList = 
25
            Lists.newArrayList(SampleData.metadataRecord11);
26
    
27
    public static final List<DocumentMetadataEnvelope> list = 
28
            Lists.newArrayList(SampleData.envMetadataRecord11, SampleData.envMetadataRecord21);
29
    
30
    
31
    @Test
32
	public void testOriginConfidence() throws Exception {
33
        OriginConfidenceCollapser<DocumentMetadataEnvelope, DocumentMetadata> collapser = 
34
                new OriginConfidenceCollapser<DocumentMetadataEnvelope, DocumentMetadata>();
35
        collapser.setOrigins(SampleData.origins);
36
        
37
        assertNull(collapser.collapse(null));
38
        assertNull(collapser.collapse(emptyList));
39
        SampleData.assertEqualRecords(
40
                mergedOneElementList,
41
                collapser.collapse(oneElementList));
42
        SampleData.assertEqualRecords(
43
                mergedOneElementList,
44
                collapser.collapse(list));
45
    }
46
    
47
}
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/origins/WorkflowTest.java
1
package eu.dnetlib.iis.collapsers.origins;
2

  
3
import eu.dnetlib.iis.IntegrationTest;
4
import eu.dnetlib.iis.core.AbstractWorkflowTestCase;
5
import org.junit.Test;
6
import org.junit.experimental.categories.Category;
7

  
8
/**
9
 * @author Dominika Tkaczyk
10
 * @author Michal Oniszczuk
11
 */
12
@Category(IntegrationTest.class)
13
public class WorkflowTest extends AbstractWorkflowTestCase {
14

  
15
    @Test
16
    public void testDefaultWorkflow() throws Exception {
17
        runWorkflow("eu/dnetlib/iis/collapsers/origins_collapser/default/oozie_app");
18
    }
19

  
20
/*
21
    @Test
22
    public void testCitationWorkflow() throws Exception {
23
        runWorkflow("eu/dnetlib/iis/collapsers/collapser/citation/oozie_app");
24
    }
25
*/
26
    
27
    @Test
28
    public void testDocumentTextWorkflow() throws Exception {
29
        runWorkflow("eu/dnetlib/iis/collapsers/origins_collapser/documenttext/oozie_app");
30
    }
31

  
32
}
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/java/eu/dnetlib/iis/collapsers/SampleData.java
1
package eu.dnetlib.iis.collapsers;
2

  
3
import eu.dnetlib.iis.collapsers.schemas.DocumentMetadataEnvelope;
4
import eu.dnetlib.iis.collapsers.schemas.DocumentTextEnvelope;
5
import eu.dnetlib.iis.importer.schemas.DocumentMetadata;
6
import eu.dnetlib.iis.importer.schemas.PublicationType;
7
import eu.dnetlib.iis.metadataextraction.schemas.DocumentText;
8
import java.util.List;
9
import org.apache.avro.generic.GenericData;
10
import org.apache.avro.generic.IndexedRecord;
11
import static org.junit.Assert.assertEquals;
12
import static org.junit.Assert.assertTrue;
13
import org.python.google.common.collect.Lists;
14

  
15
/**
16
 *
17
 * @author Dominika Tkaczyk
18
 */
19
public class SampleData {
20

  
21
    /* parameters */
22
    
23
    public static final List<String> origins = Lists.newArrayList("origin1", "origin2");
24
    
25
    public static final List<String> significantFields = Lists.newArrayList("title", "authorIds", "abstract", "journal", "year");
26

  
27
    
28
    /* input records */
29

  
30
    public static final DocumentMetadata metadataRecord11 = DocumentMetadata.newBuilder()
31
                .setId("id-1")
32
                .setAbstract$("abstract 1")
33
                .setLanguage("en")
34
                .setYear(1990)
35
                .setKeywords(Lists.newArrayList((CharSequence)"kwd 1", "kwd 2"))
36
                .setDatasourceIds(Lists.newArrayList((CharSequence)"d 1", "d 2"))
37
                .setPublisher("publisher 1")
38
                .setPublicationType(PublicationType.newBuilder().setArticle(true).build())
39
                .build();
40
    
41
    public static final DocumentMetadataEnvelope envMetadataRecord11 = DocumentMetadataEnvelope.newBuilder()
42
            .setOrigin("origin1")
43
            .setData(metadataRecord11).build();
44
    
45
    public static final DocumentMetadata metadataRecord12 = DocumentMetadata.newBuilder()
46
                .setId("id-1")
47
                .setAbstract$("abstract 2")
48
                .setAuthorIds(Lists.newArrayList((CharSequence)"aut 1", "aut 2"))
49
                .setPublicationType(PublicationType.newBuilder().setArticle(true).build())
50
                .setTitle("title 2")
51
                .setYear(1991)
52
                .build();
53
    
54
    public static final DocumentMetadataEnvelope envMetadataRecord12 = DocumentMetadataEnvelope.newBuilder()
55
            .setOrigin("origin1")
56
            .setData(metadataRecord12).build();
57
    
58
    public static final DocumentMetadata metadataRecord13 = DocumentMetadata.newBuilder()
59
                .setId("id-1")
60
                .setPublicationType(PublicationType.newBuilder().setArticle(true).build())
61
                .build();
62
    
63
    public static final DocumentMetadataEnvelope envMetadataRecord13 = DocumentMetadataEnvelope.newBuilder()
64
            .setOrigin("origin1")
65
            .setData(metadataRecord13).build();
66
    
67
    public static final DocumentMetadata metadataRecord21 = DocumentMetadata.newBuilder()
68
                .setId("id-1")
69
                .setAbstract$("abstract 3")
70
                .setAuthorIds(Lists.newArrayList((CharSequence)"aut 13", "aut 23"))
71
                .setPublicationType(PublicationType.newBuilder().setArticle(true).build())
72
                .setTitle("title 3")
73
                .setYear(1999)
74
                .build();
75
    
76
    public static final DocumentMetadataEnvelope envMetadataRecord21 = DocumentMetadataEnvelope.newBuilder()
77
            .setOrigin("origin2")
78
            .setData(metadataRecord21).build();
79
    
80
    public static final DocumentMetadata metadataRecord22 = DocumentMetadata.newBuilder()
81
                .setId("id-1")
82
                .setPublicationType(PublicationType.newBuilder().setArticle(true).build())
83
                .build();
84
    
85
    public static final DocumentMetadataEnvelope envMetadataRecord22 = DocumentMetadataEnvelope.newBuilder()
86
            .setOrigin("origin2")
87
            .setData(metadataRecord22).build();
88
    
89
    public static final DocumentText textRecord = DocumentText.newBuilder()
90
                 .setId("text-1")
91
                 .setText("text text")
92
                 .build();
93
    
94
    public static final DocumentTextEnvelope envTextRecord = DocumentTextEnvelope.newBuilder()
95
            .setOrigin("origin1")
96
            .setData(textRecord).build();
97

  
98
    
99
    /* merged records */
100
    
101
    public static final DocumentMetadata mergedRecord1112 = DocumentMetadata.newBuilder()
102
                .setId("id-1")
103
                .setAbstract$("abstract 1")
104
                .setLanguage("en")
105
                .setAuthorIds(Lists.newArrayList((CharSequence)"aut 1", "aut 2"))
106
                .setTitle("title 2")
107
                .setYear(1990)
108
                .setKeywords(Lists.newArrayList((CharSequence)"kwd 1", "kwd 2"))
109
                .setDatasourceIds(Lists.newArrayList((CharSequence)"d 1", "d 2"))
110
                .setPublisher("publisher 1")
111
                .setPublicationType(PublicationType.newBuilder().setArticle(true).build())
112
                .build();
113
    
114
    public static final DocumentMetadata mergedRecord1211 = DocumentMetadata.newBuilder()
115
                 .setId("id-1")
116
                 .setAbstract$("abstract 2")
117
                 .setAuthorIds(Lists.newArrayList((CharSequence)"aut 1", "aut 2"))
118
                 .setTitle("title 2")
119
                 .setLanguage("en")
120
                 .setYear(1991)
121
                 .setKeywords(Lists.newArrayList((CharSequence)"kwd 1", "kwd 2"))
122
                 .setDatasourceIds(Lists.newArrayList((CharSequence)"d 1", "d 2"))
123
                 .setPublisher("publisher 1")
124
                 .setPublicationType(PublicationType.newBuilder().setArticle(true).build())
125
                 .build();
126
    
127
    public static final DocumentMetadata mergedRecord1121 = DocumentMetadata.newBuilder()
128
                .setId("id-1")
129
                .setAbstract$("abstract 1")
130
                .setLanguage("en")
131
                .setYear(1990)
132
                .setAuthorIds(Lists.newArrayList((CharSequence)"aut 13", "aut 23"))
133
                .setTitle("title 3")
134
                .setKeywords(Lists.newArrayList((CharSequence)"kwd 1", "kwd 2"))
135
                .setDatasourceIds(Lists.newArrayList((CharSequence)"d 1", "d 2"))
136
                .setPublisher("publisher 1")
137
                .setPublicationType(PublicationType.newBuilder().setArticle(true).build())
138
                .build();
139
    
140
    public static final DocumentMetadata mergedRecord2221 = DocumentMetadata.newBuilder()
141
                .setId("id-1")
142
                .setAbstract$("abstract 3")
143
                .setAuthorIds(Lists.newArrayList((CharSequence)"aut 13", "aut 23"))
144
                .setPublicationType(PublicationType.newBuilder().setArticle(true).build())
145
                .setTitle("title 3")
146
                .setYear(1999)
147
                .build();
148
    
149
  
150
    /* collapsed records */
151
    
152
    // within no merge, between no merge
153
    public static final DocumentMetadata recordWNoMergeBNoMerge = DocumentMetadata.newBuilder()
154
                .setId("id-1")
155
                .setAbstract$("abstract 2")
156
                .setAuthorIds(Lists.newArrayList((CharSequence)"aut 1", "aut 2"))
157
                .setPublicationType(PublicationType.newBuilder().setArticle(true).build())
158
                .setTitle("title 2")
159
                .setYear(1991)
160
                .build();
161

  
162
    // within merge, between no merge
163
    public static final DocumentMetadata recordWMergeBNoMerge = DocumentMetadata.newBuilder()
164
                .setId("id-1")
165
                .setAbstract$("abstract 2")
166
                .setLanguage("en")
167
                .setAuthorIds(Lists.newArrayList((CharSequence)"aut 1", "aut 2"))
168
                .setTitle("title 2")
169
                .setYear(1991)
170
                .setKeywords(Lists.newArrayList((CharSequence)"kwd 1", "kwd 2"))
171
                .setDatasourceIds(Lists.newArrayList((CharSequence)"d 1", "d 2"))
172
                .setPublisher("publisher 1")
173
                .setPublicationType(PublicationType.newBuilder().setArticle(true).build())
174
                .build();
175
    
176
    // within no merge, between merge
177
    public static final DocumentMetadata recordWNoMergeBMerge = DocumentMetadata.newBuilder()
178
                .setId("id-1")
179
                .setAbstract$("abstract 2")
180
                .setAuthorIds(Lists.newArrayList((CharSequence)"aut 1", "aut 2"))
181
                .setPublicationType(PublicationType.newBuilder().setArticle(true).build())
182
                .setTitle("title 2")
183
                .setYear(1991)
184
                .build();
185
    
186
    // within merge, between merge
187
    public static final DocumentMetadata recordWMergeBMerge = DocumentMetadata.newBuilder()
188
                .setId("id-1")
189
                .setAbstract$("abstract 2")
190
                .setLanguage("en")
191
                .setAuthorIds(Lists.newArrayList((CharSequence)"aut 1", "aut 2"))
192
                .setTitle("title 2")
193
                .setYear(1991)
194
                .setKeywords(Lists.newArrayList((CharSequence)"kwd 1", "kwd 2"))
195
                .setDatasourceIds(Lists.newArrayList((CharSequence)"d 1", "d 2"))
196
                .setPublisher("publisher 1")
197
                .setPublicationType(PublicationType.newBuilder().setArticle(true).build())
198
                .build();
199
    
200
    public static void assertEqualRecords(IndexedRecord expected, IndexedRecord actual) {
201
        assertEquals("Records are not equal: \nExpected: " + expected + "\nActual: " + actual + "\n", 
202
                0, GenericData.get().compare(expected, actual, expected.getSchema()));
203
    }
204
    
205
    public static <T extends IndexedRecord> void assertEqualRecords(List<T> expected, List<T> actual) {
206
        assertEquals("Records lists have different sizes: " + expected.size() + " and " + actual.size() + "\n",
207
                expected.size(), actual.size());
208
        List<T> actualCopy = Lists.newArrayList(actual);
209
        for (T exp : expected) {
210
            T found = null;
211
            for (T act : actualCopy) {
212
                if (0 == GenericData.get().compare(exp, act, exp.getSchema())) {
213
                    found = act;
214
                }
215
            }
216
            assertTrue(
217
                    "Expected record " + exp.toString() + " was not found among the actual records\n", 
218
                    found != null);
219
            actualCopy.remove(found);
220
        }
221
    }
222

  
223
}
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/resources/eu/dnetlib/iis/collapsers/basic_collapser/documenttext/oozie_app/import.txt
1
## This is a classpath-based import file (this header is required)
2
basic_collapser classpath eu/dnetlib/iis/collapsers/basic_collapser/oozie_app
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/resources/eu/dnetlib/iis/collapsers/basic_collapser/documenttext/oozie_app/workflow.xml
1
<workflow-app xmlns="uri:oozie:workflow:0.2" name="test-basic_collapser_documenttext">
2
    <start to="producer"/>
3
    <action name="producer">
4
        <java>
5
            <job-tracker>${jobTracker}</job-tracker>
6
            <name-node>${nameNode}</name-node>
7
			<!-- The data generated by this node is deleted in this section -->
8
			<prepare>
9
				<delete path="${nameNode}${workingDir}/producer" />
10
				<mkdir path="${nameNode}${workingDir}/producer" />
11
			</prepare>
12
            <configuration>
13
                <property>
14
                    <name>mapred.job.queue.name</name>
15
                    <value>${queueName}</value>
16
                </property>
17
            </configuration>
18
            <!-- This is simple wrapper for the Java code -->
19
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
20
			<!-- The business Java code that gets to be executed -->
21
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg>
22
			<!-- Specification of the output ports -->
23
			<arg>-C{input,
24
				eu.dnetlib.iis.metadataextraction.schemas.DocumentText,
25
				eu/dnetlib/iis/collapsers/basic_collapser/documenttext/data/texts.json}</arg>
26
			<!-- All input and output ports have to be bound to paths in HDFS, working 
27
				directory has to be specified as well -->
28
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
29
            <arg>-Oinput=${workingDir}/producer/input</arg>
30
        </java>
31
        <ok to="basic_collapser"/>
32
        <error to="fail"/>
33
    </action>
34
    <action name="basic_collapser">
35
        <sub-workflow>
36
            <app-path>${wf:appPath()}/basic_collapser</app-path>
37
            <configuration>
38
                <property>
39
                    <name>jobTracker</name>
40
                    <value>${jobTracker}</value>
41
                </property>
42
                <property>
43
                    <name>nameNode</name>
44
                    <value>${nameNode}</value>
45
                </property>
46
                <property>
47
                    <name>queueName</name>
48
                    <value>${queueName}</value>
49
                </property>
50
                <!-- Working directory of the subworkflow -->                
51
                <property>
52
                    <name>workingDir</name>
53
                    <value>${workingDir}/basic_collapser/working_dir</value>
54
                </property>
55
                <property>
56
                    <name>blocking_field</name>
57
                    <value>id</value>
58
                </property>
59
                <property>
60
                    <name>record_collapser</name>
61
                    <value>eu.dnetlib.iis.collapsers.basic.DocumentTextCollapser</value>
62
        		</property>
63
                <property>
64
                    <name>schema</name>
65
                    <value>eu.dnetlib.iis.metadataextraction.schemas.DocumentText</value>
66
                </property>
67
                <!-- Input ports. -->
68
                <property>
69
                    <name>input</name>
70
                    <value>${workingDir}/producer/input</value>
71
                </property>
72
                <!-- Output port bound to given path -->
73
                <property>
74
                    <name>output</name>
75
                    <value>${workingDir}/basic_collapser/output</value>
76
                </property>
77
            </configuration>
78
        </sub-workflow>
79
        <ok to="consumer"/>
80
        <error to="fail"/>
81
    </action>
82
    <action name="consumer">
83
		<java>
84
			<job-tracker>${jobTracker}</job-tracker>
85
			<name-node>${nameNode}</name-node>
86
			<!-- The data generated by this node is deleted in this section -->
87
			<prepare>
88
				<delete path="${nameNode}${workingDir}/consumer" />
89
				<mkdir path="${nameNode}${workingDir}/consumer" />
90
			</prepare>
91
			<configuration>
92
				<property>
93
					<name>mapred.job.queue.name</name>
94
					<value>${queueName}</value>
95
				</property>
96
			</configuration>
97
			<!-- This is simple wrapper for the Java code -->
98
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
99
			<!-- The business Java code that gets to be executed -->
100
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.TestingConsumer</arg>
101
			<!-- Specification of the input ports -->
102
			<arg>-C{output,
103
				eu.dnetlib.iis.metadataextraction.schemas.DocumentText,
104
				eu/dnetlib/iis/collapsers/basic_collapser/documenttext/data/output.json}</arg>
105
    		<!-- All input and output ports have to be bound to paths in HDFS, working 
106
				directory has to be specified as well -->
107
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
108
			<arg>-Ioutput=${workingDir}/basic_collapser/output</arg>
109
		</java>
110
		<ok to="end" />
111
		<error to="fail" />
112
	</action>
113
    <kill name="fail">
114
		<message>Unfortunately, the workflow failed -- error message:
115
			[${wf:errorMessage(wf:lastErrorNode())}]</message>
116
    </kill>
117
    <end name="end"/>
118
</workflow-app>
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/resources/eu/dnetlib/iis/collapsers/basic_collapser/documenttext/data/texts.json
1
{"id": "id-6", "text": "This is a different text"}
2
{"id": "id-3", "text": "This is another text"}
3
{"id": "id-3", "text": "This is yet another text"}
4
{"id": "id-6", "text": "This is a totally different text"}
5
{"id": "id-7", "text": "This is a text"}
6
{"id": "id-4", "text": "This is not a text"}
7
{"id": "id-6", "text": "This is a duplicated text"}
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/resources/eu/dnetlib/iis/collapsers/basic_collapser/documenttext/data/output.json
1
{"id": "id-3", "text": "This is another text\n\nThis is yet another text"}
2
{"id": "id-6", "text": "This is a different text\n\nThis is a totally different text\n\nThis is a duplicated text"}
3
{"id": "id-7", "text": "This is a text"}
4
{"id": "id-4", "text": "This is not a text"}
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/resources/eu/dnetlib/iis/collapsers/basic_collapser/default/oozie_app/import.txt
1
## This is a classpath-based import file (this header is required)
2
basic_collapser classpath eu/dnetlib/iis/collapsers/basic_collapser/oozie_app
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/resources/eu/dnetlib/iis/collapsers/basic_collapser/default/oozie_app/workflow.xml
1
<workflow-app xmlns="uri:oozie:workflow:0.2" name="test-basic_collapser_default">
2
    <start to="producer"/>
3
    <action name="producer">
4
        <java>
5
            <job-tracker>${jobTracker}</job-tracker>
6
            <name-node>${nameNode}</name-node>
7
			<!-- The data generated by this node is deleted in this section -->
8
			<prepare>
9
				<delete path="${nameNode}${workingDir}/producer" />
10
				<mkdir path="${nameNode}${workingDir}/producer" />
11
			</prepare>
12
            <configuration>
13
                <property>
14
                    <name>mapred.job.queue.name</name>
15
                    <value>${queueName}</value>
16
                </property>
17
            </configuration>
18
            <!-- This is simple wrapper for the Java code -->
19
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
20
			<!-- The business Java code that gets to be executed -->
21
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg>
22
			<!-- Specification of the output ports -->
23
			<arg>-C{input,
24
				eu.dnetlib.iis.importer.schemas.DocumentMetadata,
25
				eu/dnetlib/iis/collapsers/basic_collapser/default/data/metadata.json}</arg>
26
			<!-- All input and output ports have to be bound to paths in HDFS, working 
27
				directory has to be specified as well -->
28
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
29
            <arg>-Oinput=${workingDir}/producer/input</arg>
30
        </java>
31
        <ok to="basic_collapser"/>
32
        <error to="fail"/>
33
    </action>
34
    <action name="basic_collapser">
35
        <sub-workflow>
36
            <app-path>${wf:appPath()}/basic_collapser</app-path>
37
            <configuration>
38
                <property>
39
                    <name>jobTracker</name>
40
                    <value>${jobTracker}</value>
41
                </property>
42
                <property>
43
                    <name>nameNode</name>
44
                    <value>${nameNode}</value>
45
                </property>
46
                <property>
47
                    <name>queueName</name>
48
                    <value>${queueName}</value>
49
                </property>
50
                <!-- Working directory of the subworkflow -->                
51
                <property>
52
                    <name>workingDir</name>
53
                    <value>${workingDir}/basic_collapser/working_dir</value>
54
                </property>
55
                <property>
56
                    <name>blocking_field</name>
57
                    <value>id</value>
58
                </property>
59
                <property>
60
                    <name>schema</name>
61
                    <value>eu.dnetlib.iis.importer.schemas.DocumentMetadata</value>
62
                </property>
63
                <property>
64
                    <name>significant_fields</name>
65
                    <value>title,authorIds,journal,year</value>
66
                </property>
67
                <!-- Input ports. -->
68
                <property>
69
                    <name>input</name>
70
                    <value>${workingDir}/producer/input</value>
71
                </property>
72
                <!-- Output port bound to given path -->
73
                <property>
74
                    <name>output</name>
75
                    <value>${workingDir}/basic_collapser/output</value>
76
                </property>
77
            </configuration>
78
        </sub-workflow>
79
        <ok to="consumer"/>
80
        <error to="fail"/>
81
    </action>
82
    <action name="consumer">
83
		<java>
84
			<job-tracker>${jobTracker}</job-tracker>
85
			<name-node>${nameNode}</name-node>
86
			<!-- The data generated by this node is deleted in this section -->
87
			<prepare>
88
				<delete path="${nameNode}${workingDir}/consumer" />
89
				<mkdir path="${nameNode}${workingDir}/consumer" />
90
			</prepare>
91
			<configuration>
92
				<property>
93
					<name>mapred.job.queue.name</name>
94
					<value>${queueName}</value>
95
				</property>
96
			</configuration>
97
			<!-- This is simple wrapper for the Java code -->
98
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
99
			<!-- The business Java code that gets to be executed -->
100
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.TestingConsumer</arg>
101
			<!-- Specification of the input ports -->
102
			<arg>-C{output,
103
				eu.dnetlib.iis.importer.schemas.DocumentMetadata,
104
				eu/dnetlib/iis/collapsers/basic_collapser/default/data/output.json}</arg>
105
    		<!-- All input and output ports have to be bound to paths in HDFS, working 
106
				directory has to be specified as well -->
107
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
108
			<arg>-Ioutput=${workingDir}/basic_collapser/output</arg>
109
		</java>
110
		<ok to="end" />
111
		<error to="fail" />
112
	</action>
113
    <kill name="fail">
114
		<message>Unfortunately, the workflow failed -- error message:
115
			[${wf:errorMessage(wf:lastErrorNode())}]</message>
116
    </kill>
117
    <end name="end"/>
118
</workflow-app>
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/resources/eu/dnetlib/iis/collapsers/basic_collapser/default/data/metadata.json
1
{"id": "id-1", "title": "Ender's Game", "abstract": null, "language": null, "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": "Journal-1", "year": 2010, "publisher": "", "publicationType": {"article": false, "dataset": false}, "authorIds": ["id-1", "id-2", "id-3"], "datasourceIds": null}
2
{"id": "id-2", "title": "Seventh Son (Tales of Alvin Maker)", "abstract": "The tales of Alvin Maker", "language": null, "keywords": null, "externalIdentifiers": {"id-1": "val-1", "id-2": "val-2"}, "journal": "Journal", "year": 1993, "publisher": null, "publicationType": {"article": true, "dataset": false}, "authorIds": ["id"], "datasourceIds": null}
3
{"id": "id-1", "title": "Enders game", "abstract": null, "language": null, "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": "Journal-1", "year": null, "publisher": "", "publicationType": {"article": false, "dataset": false}, "authorIds": ["id-1", "id-256", "id-3"], "datasourceIds": null}
4
{"id": "id-2", "title": "Seventh Son (Tales of Alvin Maker) 67", "abstract": "The tales of Alvin Maker", "language": null, "keywords": null, "externalIdentifiers": {"id-1": "val-1", "id-2": "val-2"}, "journal": "Journal", "year": 1993, "publisher": null, "publicationType": {"article": true, "dataset": false}, "authorIds": null, "datasourceIds": null}
5
{"id": "id-3", "title": "A Disk", "abstract": "A flat circular world carried on the back of a giant turtle - Discworld", "language": "eng", "keywords": null, "externalIdentifiers": {"id-1": "val-1", "id-2": "val-2"}, "journal": null, "year": 2003, "publisher": "HarperTorch", "publicationType": {"article": true, "dataset": false}, "authorIds": ["id-1", "id-2"], "datasourceIds": null}
6
{"id": "id-5", "title": "HArry Potter and the Sorcerer's Stone", "abstract": null, "language": null, "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": null, "year": null, "publisher": null, "publicationType": {"article": false, "dataset": true}, "authorIds": null, "datasourceIds": null}
7
{"id": "id-4", "title": "null", "abstract": null, "language": "pl", "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": null, "year": null, "publisher": "Ballantine Books", "publicationType": {"article": true, "dataset": false}, "authorIds": null, "datasourceIds": null}
8
{"id": "id-3", "title": null, "abstract": "A flat, circular world carried on the back of a giant turtle - Discworld", "language": "eng", "keywords": null, "externalIdentifiers": {"id-1": "val-1", "id-2": "val-2"}, "journal": null, "year": 2003, "publisher": "HarperTorch", "publicationType": {"article": true, "dataset": false}, "authorIds": ["id-1", "id-2"], "datasourceIds": null}
9
{"id": "id-5", "title": "Harry Potter and the Sorcerer's Stone", "abstract": null, "language": null, "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": null, "year": null, "publisher": null, "publicationType": {"article": false, "dataset": true}, "authorIds": ["id"], "datasourceIds": null}
10
{"id": "id-4", "title": null, "abstract": null, "language": "eng", "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": null, "year": null, "publisher": "Ballantine Books", "publicationType": {"article": true, "dataset": false}, "authorIds": null, "datasourceIds": null}
11
{"id": "id-8", "title": "Harry Potter and the Sorcerer's Stone", "abstract": null, "language": null, "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": null, "year": null, "publisher": null, "publicationType": {"article": false, "dataset": true}, "authorIds": null, "datasourceIds": null}
12
{"id": "id-6", "title": null, "abstract": null, "language": "eng", "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": null, "year": null, "publisher": "Ballantine Books", "publicationType": {"article": true, "dataset": false}, "authorIds": null, "datasourceIds": null}
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/resources/eu/dnetlib/iis/collapsers/basic_collapser/default/data/output.json
1
{"id": "id-1", "title": "Ender's Game", "abstract": null, "language": null, "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": "Journal-1", "year": 2010, "publisher": "", "publicationType": {"article": false, "dataset": false}, "authorIds": ["id-1", "id-2", "id-3"], "datasourceIds": null}
2
{"id": "id-2", "title": "Seventh Son (Tales of Alvin Maker)", "abstract": "The tales of Alvin Maker", "language": null, "keywords": null, "externalIdentifiers": {"id-1": "val-1", "id-2": "val-2"}, "journal": "Journal", "year": 1993, "publisher": null, "publicationType": {"article": true, "dataset": false}, "authorIds": ["id"], "datasourceIds": null}
3
{"id": "id-3", "title": "A Disk", "abstract": "A flat circular world carried on the back of a giant turtle - Discworld", "language": "eng", "keywords": null, "externalIdentifiers": {"id-1": "val-1", "id-2": "val-2"}, "journal": null, "year": 2003, "publisher": "HarperTorch", "publicationType": {"article": true, "dataset": false}, "authorIds": ["id-1", "id-2"], "datasourceIds": null}
4
{"id": "id-4", "title": "null", "abstract": null, "language": "pl", "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": null, "year": null, "publisher": "Ballantine Books", "publicationType": {"article": true, "dataset": false}, "authorIds": null, "datasourceIds": null}
5
{"id": "id-5", "title": "Harry Potter and the Sorcerer's Stone", "abstract": null, "language": null, "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": null, "year": null, "publisher": null, "publicationType": {"article": false, "dataset": true}, "authorIds": ["id"], "datasourceIds": null}
6
{"id": "id-6", "title": null, "abstract": null, "language": "eng", "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": null, "year": null, "publisher": "Ballantine Books", "publicationType": {"article": true, "dataset": false}, "authorIds": null, "datasourceIds": null}
7
{"id": "id-8", "title": "Harry Potter and the Sorcerer's Stone", "abstract": null, "language": null, "keywords": ["kwd_1", "kwd_2", "kwd_3", "kwd_4"], "externalIdentifiers": null, "journal": null, "year": null, "publisher": null, "publicationType": {"article": false, "dataset": true}, "authorIds": null, "datasourceIds": null}
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/resources/eu/dnetlib/iis/collapsers/basic_collapser/citation/oozie_app/import.txt
1
## This is a classpath-based import file (this header is required)
2
basic_collapser classpath eu/dnetlib/iis/collapsers/basic_collapser/oozie_app
modules/icm-iis-collapsers/tags/icm-iis-collapsers-1.0.0/src/test/resources/eu/dnetlib/iis/collapsers/basic_collapser/citation/oozie_app/workflow.xml
1
<workflow-app xmlns="uri:oozie:workflow:0.2" name="test-collapser_citation">
2
    <start to="producer"/>
3
    <action name="producer">
4
        <java>
5
            <job-tracker>${jobTracker}</job-tracker>
6
            <name-node>${nameNode}</name-node>
7
			<!-- The data generated by this node is deleted in this section -->
8
			<prepare>
9
				<delete path="${nameNode}${workingDir}/producer" />
10
				<mkdir path="${nameNode}${workingDir}/producer" />
11
			</prepare>
12
            <configuration>
13
                <property>
14
                    <name>mapred.job.queue.name</name>
15
                    <value>${queueName}</value>
16
                </property>
17
            </configuration>
18
            <!-- This is simple wrapper for the Java code -->
19
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
20
			<!-- The business Java code that gets to be executed -->
21
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg>
22
			<!-- Specification of the output ports -->
23
			<arg>-C{input,
24
                eu.dnetlib.iis.collapsers.schemas.CitationEnvelope,
25
                eu/dnetlib/iis/collapsers/collapser/citation/data/citation.json}</arg>
26
			<!-- All input and output ports have to be bound to paths in HDFS, working
27
				directory has to be specified as well -->
28
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
29
            <arg>-Oinput=${workingDir}/producer/input</arg>
30
        </java>
31
        <ok to="collapser"/>
32
        <error to="fail"/>
33
    </action>
34
    <action name="collapser">
35
        <sub-workflow>
36
            <app-path>${wf:appPath()}/collapser</app-path>
37
            <configuration>
38
                <property>
39
                    <name>jobTracker</name>
40
                    <value>${jobTracker}</value>
41
                </property>
42
                <property>
43
                    <name>nameNode</name>
44
                    <value>${nameNode}</value>
45
                </property>
46
                <property>
47
                    <name>queueName</name>
48
                    <value>${queueName}</value>
49
                </property>
50
                <!-- Working directory of the subworkflow -->                
51
                <property>
52
                    <name>workingDir</name>
53
                    <value>${workingDir}/collapser/working_dir</value>
54
                </property>
55
                <!-- Input ports & parameters. -->
56
                <property>
57
                    <name>blocking_field</name>
58
                    <value>sourceDocumentId</value>
59
                </property>
60
                <property>
61
                    <name>origins</name>
62
                    <value>orig,cermine</value>
63
                </property>
64
                <property>
65
                    <name>schema_input</name>
66
                    <value>eu.dnetlib.iis.collapsers.schemas.CitationEnvelope</value>
67
                </property>
68
                <property>
69
                    <name>schema_output</name>
70
                    <value>eu.dnetlib.iis.citationmatching.schemas.Citation</value>
71
                </property>
72
                <property>
73
                    <name>input</name>
74
                    <value>${workingDir}/producer/input</value>
75
                </property>
76
                <!-- Output port bound to given path -->
77
                <property>
78
                    <name>output</name>
79
                    <value>${workingDir}/collapser_collapser/output</value>
80
                </property>
81
            </configuration>
82
        </sub-workflow>
83
        <ok to="consumer"/>
84
        <error to="fail"/>
85
    </action>
86
    <action name="consumer">
87
		<java>
88
			<job-tracker>${jobTracker}</job-tracker>
89
			<name-node>${nameNode}</name-node>
90
			<!-- The data generated by this node is deleted in this section -->
91
			<prepare>
92
				<delete path="${nameNode}${workingDir}/consumer" />
93
				<mkdir path="${nameNode}${workingDir}/consumer" />
94
			</prepare>
95
			<configuration>
96
				<property>
97
					<name>mapred.job.queue.name</name>
98
					<value>${queueName}</value>
99
				</property>
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff