Project

General

Profile

« Previous | Next » 

Revision 37109

Added by Marek Horst over 9 years ago

merging trunk changes with IIS-CDH-5.3.0 branch

View differences:

modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/export/researchinitiatives/oozie_app/lib/scripts/transformer.pig
1 1
define avro_load_document_to_research_initiative
2 2
org.apache.pig.piggybank.storage.avro.AvroStorage(
3
'input_schema_class', '$schema_input_document_to_research_initiative');
3
'schema', '$schema_input_document_to_research_initiative');
4 4

  
5 5
define avro_store_document_to_research_initiatives
6 6
org.apache.pig.piggybank.storage.avro.AvroStorage(
7 7
'index', '0',
8
'output_schema_class', '$schema_output_document_to_research_initiatives');
8
'schema', '$schema_output_document_to_research_initiatives');
9 9

  
10 10
documentToResearchInitiative = load '$input_document_to_research_initiative' using avro_load_document_to_research_initiative;
11 11

  
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/oozie_app/workflow.xml
30 30
			<arg>-C{concept,
31 31
				eu.dnetlib.iis.importer.schemas.Concept,
32 32
				eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/data/concept.json}</arg>
33
			<!-- All input and output ports have to be bound to paths in HDFS, working 
34
				directory has to be specified as well -->
35
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
33
			<!-- All input and output ports have to be bound to paths in HDFS -->
36 34
            <arg>-Odocument_to_project=${workingDir}/producer/document_to_project</arg>
37 35
            <arg>-Oproject=${workingDir}/producer/project</arg>
38 36
            <arg>-Oconcept=${workingDir}/producer/concept</arg>
......
106 104
			<arg>-C{output,
107 105
				eu.dnetlib.iis.referenceextraction.researchinitiative.schemas.DocumentToConceptId,
108 106
				eu/dnetlib/iis/transformers/referenceextraction/project/toconcept/sampledataproducer/data/output.json}</arg>
109
			<!-- All input and output ports have to be bound to paths in HDFS, working 
110
				directory has to be specified as well -->
111
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
107
			<!-- All input and output ports have to be bound to paths in HDFS -->
112 108
			<arg>-Ioutput=${workingDir}/transformer_project_toconcept/output</arg>
113 109
		</java>
114 110
		<ok to="end" />
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/metadataextraction/checksum/preprocessing/oozie_app/workflow.xml
11 11
		</property>
12 12
	</parameters>
13 13
    
14
    <start to="transformer"/>
14
    <start to="generate-schema"/>
15
    
16
    <action name="generate-schema">
17
	    <java>
18
    		<job-tracker>${jobTracker}</job-tracker>
19
        	<name-node>${nameNode}</name-node>
20
	        <main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
21
	        <arg>eu.dnetlib.iis.importer.auxiliary.schemas.DocumentContentUrl</arg>
22
	        <capture-output />
23
	    </java>
24
	    <ok to="transformer" />
25
	    <error to="fail" />
26
	</action>
27
	
15 28
    <action name="transformer">
16 29
        <pig>
17 30
            <job-tracker>${jobTracker}</job-tracker>
......
28 41
            </configuration>
29 42
            <!-- Path to PIG script the workflow executes. -->
30 43
            <script>lib/scripts/transformer.pig</script>
31
            <param>schema_document_content_url=eu.dnetlib.iis.importer.auxiliary.schemas.DocumentContentUrl</param>
44
            <param>schema_document_content_url=${wf:actionData('generate-schema')['eu.dnetlib.iis.importer.auxiliary.schemas.DocumentContentUrl']}</param>
32 45
            <param>input=${input}</param>
33 46
            <param>output=${output}</param>
34 47
        </pig>
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/documentmetadata/sampledataproducer/oozie_app/workflow.xml
23 23
            <arg>-C{extracted_metadata,
24 24
				eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata,
25 25
				eu/dnetlib/iis/transformers/export/documentmetadata/sampledataproducer/data/extr_metadata.json}</arg>
26
			<!-- All input and output ports have to be bound to paths in HDFS, working 
27
				directory has to be specified as well -->
28
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
26
			<!-- All input and output ports have to be bound to paths in HDFS -->
29 27
            <arg>-Oextracted_metadata=${workingDir}/producer/extr_metadata</arg>
30 28
        </java>
31 29
        <ok to="transformer_export_documentmetadata"/>
......
85 83
			<arg>-C{output_metadata,
86 84
				eu.dnetlib.iis.export.schemas.DocumentMetadata,
87 85
				eu/dnetlib/iis/transformers/export/documentmetadata/sampledataproducer/data/output_metadata.json}</arg>
88
			<!-- All input and output ports have to be bound to paths in HDFS, working 
89
				directory has to be specified as well -->
90
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
86
			<!-- All input and output ports have to be bound to paths in HDFS -->
91 87
			<arg>-Ioutput_metadata=${workingDir}/transformer_export_documentmetadata/output_metadata</arg>
92 88
		</java>
93 89
		<ok to="end" />
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/researchinitiatives/sampledataproducer/oozie_app/workflow.xml
22 22
            <arg>-C{document_to_concept_id,
23 23
				eu.dnetlib.iis.referenceextraction.researchinitiative.schemas.DocumentToConceptId,
24 24
				eu/dnetlib/iis/transformers/export/researchinitiatives/sampledataproducer/data/document_to_concept_id.json}</arg>
25
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
26 25
            <arg>-Odocument_to_concept_id=${workingDir}/producer/output</arg>
27 26
        </java>
28 27
        <ok to="transformer_export_researchinitiatives"/>
......
78 77
			<arg>-C{output,
79 78
				eu.dnetlib.iis.export.schemas.DocumentToConceptIds,
80 79
				eu/dnetlib/iis/transformers/export/researchinitiatives/sampledataproducer/data/document_to_concept_ids.json}</arg>
81
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
82 80
			<arg>-Ioutput=${workingDir}/transformer_export_researchinitiatives/output</arg>
83 81
		</java>
84 82
		<ok to="end" />
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/sampledataproducer/oozie_app/workflow.xml
27 27
				eu.dnetlib.iis.importer.schemas.DocumentRelation,
28 28
				eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/sampledataproducer/data/document_relation.json}</arg>
29 29
                             
30
			<!-- All input and output ports have to be bound to paths in HDFS, working 
31
				directory has to be specified as well -->
32
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
30
			<!-- All input and output ports have to be bound to paths in HDFS -->
33 31
            <arg>-Odocument_to_dataset=${workingDir}/producer/document_to_dataset</arg>
34 32
            <arg>-Odocument_relation=${workingDir}/producer/document_relation</arg>
35 33
        </java>
......
94 92
			<arg>-C{document_to_dataset,
95 93
				eu.dnetlib.iis.referenceextraction.dataset.schemas.DocumentToDataSet,
96 94
				eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/sampledataproducer/data/output_document_to_dataset.json}</arg>
97
			<!-- All input and output ports have to be bound to paths in HDFS, working 
98
				directory has to be specified as well -->
99
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
95
			<!-- All input and output ports have to be bound to paths in HDFS -->
100 96
			<arg>-Idocument_to_dataset=${workingDir}/transformer_export_documenttodataset_without_imported_data/document_to_dataset</arg>
101 97
		</java>
102 98
		<ok to="end" />
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/export/documenttoproject_without_imported_data/sampledataproducer/oozie_app/workflow.xml
27 27
				eu.dnetlib.iis.importer.schemas.DocumentToProject,
28 28
				eu/dnetlib/iis/transformers/export/documenttoproject_without_imported_data/sampledataproducer/data/imported_document_to_project.json}</arg>
29 29
              
30
			<!-- All input and output ports have to be bound to paths in HDFS, working 
31
				directory has to be specified as well -->
32
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
30
			<!-- All input and output ports have to be bound to paths in HDFS -->
33 31
            <arg>-Odocument_to_project=${workingDir}/producer/document_to_project</arg>
34 32
            <arg>-Oimported_document_to_project=${workingDir}/producer/imported_document_to_project</arg>
35 33
        </java>
......
94 92
			<arg>-C{document_to_project,
95 93
				eu.dnetlib.iis.referenceextraction.project.schemas.DocumentToProject,
96 94
				eu/dnetlib/iis/transformers/export/documenttoproject_without_imported_data/sampledataproducer/data/output_document_to_project.json}</arg>
97
			<!-- All input and output ports have to be bound to paths in HDFS, working 
98
				directory has to be specified as well -->
99
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
95
			<!-- All input and output ports have to be bound to paths in HDFS -->
100 96
			<arg>-Idocument_to_project=${workingDir}/transformer_export_documenttoproject_without_imported_data/document_to_project</arg>
101 97
		</java>
102 98
		<ok to="end" />
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/importer/documentmetadata/idextractor/sampledataproducer/oozie_app/workflow.xml
25 25
            <arg>-C{document_metadata,
26 26
				eu.dnetlib.iis.importer.schemas.DocumentMetadata,
27 27
				eu/dnetlib/iis/transformers/importer/documentmetadata/idextractor/sampledataproducer/data/input_document_metadata.json}</arg>
28
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
29 28
            <arg>-Odocument_metadata=${workingDir}/producer/document_metadata</arg>
30 29
        </java>
31 30
        <ok to="id-extractor"/>
......
82 81
			<arg>-C{output_identifier,
83 82
				eu.dnetlib.iis.common.schemas.DocumentId,
84 83
				eu/dnetlib/iis/transformers/importer/documentmetadata/idextractor/sampledataproducer/data/output_document_id.json}</arg>
85
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
86 84
			<arg>-Ioutput_identifier=${workingDir}/transformers_idextractor/output</arg>
87 85
		</java>
88 86
		<ok to="end" />
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/metadataextraction/checksum/postprocessing/text/sampledataproducer/oozie_app/workflow.xml
26 26
            <arg>-C{document_text,
27 27
				eu.dnetlib.iis.metadataextraction.schemas.DocumentText,
28 28
				eu/dnetlib/iis/transformers/metadataextraction/checksum/postprocessing/text/sampledataproducer/data/input_document_text.json}</arg>
29
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
30 29
            <arg>-Odocument_content_url=${workingDir}/producer/document_content_url</arg>
31 30
            <arg>-Odocument_text=${workingDir}/producer/document_text</arg>
32 31
        </java>
......
86 85
			<arg>-C{output_metadata,
87 86
				eu.dnetlib.iis.metadataextraction.schemas.DocumentText,
88 87
				eu/dnetlib/iis/transformers/metadataextraction/checksum/postprocessing/text/sampledataproducer/data/output_document_text.json}</arg>
89
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
90 88
			<arg>-Ioutput_metadata=${workingDir}/transformer_metadataextraction_checksum_postprocessing_text/output</arg>
91 89
		</java>
92 90
		<ok to="end" />
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/oozie_app/workflow.xml
12 12
		</property>
13 13
	</parameters>
14 14
    
15
    <start to="transformer"/>
15
    <start to="generate-schema"/>
16
    
17
    <action name="generate-schema">
18
	    <java>
19
    		<job-tracker>${jobTracker}</job-tracker>
20
        	<name-node>${nameNode}</name-node>
21
	        <main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
22
	        <arg>eu.dnetlib.iis.referenceextraction.dataset.schemas.DocumentToDataSet</arg>
23
	        <arg>eu.dnetlib.iis.importer.schemas.DocumentRelation</arg>
24
	        <capture-output />
25
	    </java>
26
	    <ok to="transformer" />
27
	    <error to="fail" />
28
	</action>
29
	
16 30
    <action name="transformer">
17 31
        <pig>
18 32
            <job-tracker>${jobTracker}</job-tracker>
......
31 45
            <script>lib/scripts/transformer.pig</script>
32 46
            
33 47
            <param>input_document_to_dataset=${input_document_to_dataset}</param>
34
            <param>schema_input_document_to_dataset=eu.dnetlib.iis.referenceextraction.dataset.schemas.DocumentToDataSet</param>
48
            <param>schema_input_document_to_dataset=${wf:actionData('generate-schema')['eu.dnetlib.iis.referenceextraction.dataset.schemas.DocumentToDataSet']}</param>
35 49
            
36 50
            <param>input_document_relation=${input_document_relation}</param>
37
            <param>schema_input_document_relation=eu.dnetlib.iis.importer.schemas.DocumentRelation</param>
51
            <param>schema_input_document_relation=${wf:actionData('generate-schema')['eu.dnetlib.iis.importer.schemas.DocumentRelation']}</param>
38 52
        
39 53
            <param>output_document_to_dataset=${output_document_to_dataset}</param>
40
            <param>schema_output_document_to_dataset=eu.dnetlib.iis.referenceextraction.dataset.schemas.DocumentToDataSet</param>
54
            <param>schema_output_document_to_dataset=${wf:actionData('generate-schema')['eu.dnetlib.iis.referenceextraction.dataset.schemas.DocumentToDataSet']}</param>
41 55
        </pig>
42 56
        <ok to="end"/>
43 57
        <error to="fail"/>
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/export/citations/oozie_app/lib/scripts/transformer.pig
1 1
define avro_load_input_citations
2 2
org.apache.pig.piggybank.storage.avro.AvroStorage(
3
'input_schema_class', '$schema_input');
3
'schema', '$schema_input');
4 4

  
5 5
define avro_store_output_citations
6 6
org.apache.pig.piggybank.storage.avro.AvroStorage(
7 7
'index', '0',
8
'output_schema_class', '$schema_output');
8
'schema', '$schema_output');
9 9

  
10 10
input_citations = load '$input' using avro_load_input_citations;
11 11

  
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/metadataextraction/skip_extracted/oozie_app/workflow.xml
27 27
		</property>
28 28
	</parameters>
29 29
    
30
    <start to="transformer"/>
30
    <start to="generate-schema"/>
31
    
32
    <action name="generate-schema">
33
	    <java>
34
    		<job-tracker>${jobTracker}</job-tracker>
35
        	<name-node>${nameNode}</name-node>
36
	        <main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
37
	        <arg>eu.dnetlib.iis.importer.auxiliary.schemas.DocumentContentUrl</arg>
38
	        <arg>eu.dnetlib.iis.metadataextraction.schemas.DocumentText</arg>
39
	        <arg>eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata</arg>
40
	        <capture-output />
41
	    </java>
42
	    <ok to="transformer" />
43
	    <error to="fail" />
44
	</action>
45
	
31 46
    <action name="transformer">
32 47
        <pig>
33 48
            <job-tracker>${jobTracker}</job-tracker>
......
55 70
            <!-- Path to PIG script the workflow executes. -->
56 71
            <script>lib/scripts/transformer.pig</script>
57 72
            
58
            <param>schema_document_content=eu.dnetlib.iis.importer.auxiliary.schemas.DocumentContentUrl</param>
59
            <param>schema_document_text=eu.dnetlib.iis.metadataextraction.schemas.DocumentText</param>
60
            <param>schema_document_meta=eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata</param>
73
            <param>schema_document_content=${wf:actionData('generate-schema')['eu.dnetlib.iis.importer.auxiliary.schemas.DocumentContentUrl']}</param>
74
            <param>schema_document_text=${wf:actionData('generate-schema')['eu.dnetlib.iis.metadataextraction.schemas.DocumentText']}</param>
75
            <param>schema_document_meta=${wf:actionData('generate-schema')['eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata']}</param>
61 76
            
62 77
            <param>input_document_content=${input_document_content}</param>
63 78
            <param>input_document_text=${input_document_text}</param>
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/export/documentmetadata/oozie_app/workflow.xml
11 11
		</property>
12 12
	</parameters>
13 13
    
14
    <start to="transformer"/>
14
    <start to="generate-schema"/>
15 15
    
16
    <action name="generate-schema">
17
	    <java>
18
    		<job-tracker>${jobTracker}</job-tracker>
19
        	<name-node>${nameNode}</name-node>
20
	        <main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
21
	        <arg>eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata</arg>
22
	        <arg>eu.dnetlib.iis.export.schemas.DocumentMetadata</arg>
23
	        <capture-output />
24
	    </java>
25
	    <ok to="transformer" />
26
	    <error to="fail" />
27
	</action>
28
    
16 29
    <action name="transformer">
17 30
        <pig>
18 31
            <job-tracker>${jobTracker}</job-tracker>
......
31 44
            <script>lib/scripts/transformer/transformer.pig</script>
32 45
            
33 46
            <param>input_extracted_metadata=${input_extracted_metadata}</param>
34
            <param>schema_input_extracted_metadata=eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata</param>
47
            <param>schema_input_extracted_metadata=${wf:actionData('generate-schema')['eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata']}</param>
35 48
           
36 49
            <param>output_metadata=${output_metadata}</param>
37
            <param>schema_output_metadata=eu.dnetlib.iis.export.schemas.DocumentMetadata</param>
50
            <param>schema_output_metadata=${wf:actionData('generate-schema')['eu.dnetlib.iis.export.schemas.DocumentMetadata']}</param>
38 51
        </pig>
39 52
        <ok to="end"/>
40 53
        <error to="fail"/>
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/export/identifier/documenttodataset/oozie_app/workflow.xml
12 12
		</property>
13 13
	</parameters>
14 14
    
15
    <start to="transformer"/>
15
    <start to="generate-schema"/>
16
    
17
    <action name="generate-schema">
18
	    <java>
19
    		<job-tracker>${jobTracker}</job-tracker>
20
        	<name-node>${nameNode}</name-node>
21
	        <main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
22
	        <arg>eu.dnetlib.iis.referenceextraction.dataset.schemas.DocumentToDataSet</arg>
23
	        <arg>eu.dnetlib.iis.importer.schemas.DocumentToMDStore</arg>
24
	        <capture-output />
25
	    </java>
26
	    <ok to="transformer" />
27
	    <error to="fail" />
28
	</action>
29
	
16 30
    <action name="transformer">
17 31
        <pig>
18 32
            <job-tracker>${jobTracker}</job-tracker>
......
31 45
            <script>lib/scripts/transformer.pig</script>
32 46
            
33 47
            <param>input_document_to_dataset=${input_document_to_dataset}</param>
34
            <param>schema_input_document_to_dataset=eu.dnetlib.iis.referenceextraction.dataset.schemas.DocumentToDataSet</param>
48
            <param>schema_input_document_to_dataset=${wf:actionData('generate-schema')['eu.dnetlib.iis.referenceextraction.dataset.schemas.DocumentToDataSet']}</param>
35 49
            
36 50
            <param>input_document_to_mdstore=${input_document_to_mdstore}</param>
37
            <param>schema_input_document_to_mdstore=eu.dnetlib.iis.importer.schemas.DocumentToMDStore</param>
51
            <param>schema_input_document_to_mdstore=${wf:actionData('generate-schema')['eu.dnetlib.iis.importer.schemas.DocumentToMDStore']}</param>
38 52
            
39 53
            <param>output_document_to_mdstore=${output_document_to_mdstore}</param>
40
            <param>schema_output_document_to_mdstore=eu.dnetlib.iis.importer.schemas.DocumentToMDStore</param>
54
            <param>schema_output_document_to_mdstore=${wf:actionData('generate-schema')['eu.dnetlib.iis.importer.schemas.DocumentToMDStore']}</param>
41 55
        </pig>
42 56
        <ok to="end"/>
43 57
        <error to="fail"/>
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/export/identifier/documenttoproject/oozie_app/workflow.xml
9 9
		</property>
10 10
	</parameters>
11 11
    
12
    <start to="transformer"/>
12
    <start to="generate-schema"/>
13
    
14
    <action name="generate-schema">
15
	    <java>
16
    		<job-tracker>${jobTracker}</job-tracker>
17
        	<name-node>${nameNode}</name-node>
18
	        <main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
19
	        <arg>eu.dnetlib.iis.referenceextraction.project.schemas.DocumentToProject</arg>
20
	        <arg>eu.dnetlib.iis.export.auxiliary.schemas.Identifier</arg>
21
	        <capture-output />
22
	    </java>
23
	    <ok to="transformer" />
24
	    <error to="fail" />
25
	</action>
26
	
13 27
    <action name="transformer">
14 28
        <pig>
15 29
            <job-tracker>${jobTracker}</job-tracker>
......
28 42
            <script>lib/scripts/transformer.pig</script>
29 43
            
30 44
            <param>input_document_to_project=${input_document_to_project}</param>
31
            <param>schema_input_document_to_project=eu.dnetlib.iis.referenceextraction.project.schemas.DocumentToProject</param>
45
            <param>schema_input_document_to_project=${wf:actionData('generate-schema')['eu.dnetlib.iis.referenceextraction.project.schemas.DocumentToProject']}</param>
32 46
            
33 47
            <param>output_identifier=${output_identifier}</param>
34
            <param>schema_output_identifier=eu.dnetlib.iis.export.auxiliary.schemas.Identifier</param>
48
            <param>schema_output_identifier=${wf:actionData('generate-schema')['eu.dnetlib.iis.export.auxiliary.schemas.Identifier']}</param>
35 49
        </pig>
36 50
        <ok to="end"/>
37 51
        <error to="fail"/>
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/export/documenttodataset_without_imported_data/oozie_app/lib/scripts/transformer.pig
1 1
define avro_load_document_to_dataset
2 2
org.apache.pig.piggybank.storage.avro.AvroStorage(
3
'input_schema_class', '$schema_input_document_to_dataset');
3
'schema', '$schema_input_document_to_dataset');
4 4

  
5 5
define avro_load_document_relation
6 6
org.apache.pig.piggybank.storage.avro.AvroStorage(
7
'input_schema_class', '$schema_input_document_relation');
7
'schema', '$schema_input_document_relation');
8 8

  
9 9
define avro_store_document_to_dataset
10 10
org.apache.pig.piggybank.storage.avro.AvroStorage(
11 11
'index', '0',
12
'output_schema_class', '$schema_output_document_to_dataset');
12
'schema', '$schema_output_document_to_dataset');
13 13

  
14 14

  
15 15
documentToDataset = load '$input_document_to_dataset' using avro_load_document_to_dataset;
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/export/documenttoproject_without_imported_data/oozie_app/workflow.xml
12 12
		</property>
13 13
	</parameters>
14 14
    
15
    <start to="transformer"/>
15
    <start to="generate-schema"/>
16
    
17
    <action name="generate-schema">
18
	    <java>
19
    		<job-tracker>${jobTracker}</job-tracker>
20
        	<name-node>${nameNode}</name-node>
21
	        <main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
22
	        <arg>eu.dnetlib.iis.referenceextraction.project.schemas.DocumentToProject</arg>
23
	        <arg>eu.dnetlib.iis.importer.schemas.DocumentToProject</arg>
24
	        <capture-output />
25
	    </java>
26
	    <ok to="transformer" />
27
	    <error to="fail" />
28
	</action>
29
	
16 30
    <action name="transformer">
17 31
        <pig>
18 32
            <job-tracker>${jobTracker}</job-tracker>
......
31 45
            <script>lib/scripts/transformer.pig</script>
32 46
            
33 47
            <param>input_document_to_project=${input_document_to_project}</param>
34
            <param>schema_input_document_to_project=eu.dnetlib.iis.referenceextraction.project.schemas.DocumentToProject</param>
48
            <param>schema_input_document_to_project=${wf:actionData('generate-schema')['eu.dnetlib.iis.referenceextraction.project.schemas.DocumentToProject']}</param>
35 49
            
36 50
            <param>input_imported_document_to_project=${input_imported_document_to_project}</param>
37
            <param>schema_input_imported_document_to_project=eu.dnetlib.iis.importer.schemas.DocumentToProject</param>
51
            <param>schema_input_imported_document_to_project=${wf:actionData('generate-schema')['eu.dnetlib.iis.importer.schemas.DocumentToProject']}</param>
38 52
        
39 53
            <param>output_document_to_project=${output_document_to_project}</param>
40
            <param>schema_output_document_to_project=eu.dnetlib.iis.referenceextraction.project.schemas.DocumentToProject</param>
54
            <param>schema_output_document_to_project=${wf:actionData('generate-schema')['eu.dnetlib.iis.referenceextraction.project.schemas.DocumentToProject']}</param>
41 55
        </pig>
42 56
        <ok to="end"/>
43 57
        <error to="fail"/>
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/metadatamerger/oozie_app/workflow.xml
15 15
		</property>
16 16
	</parameters>
17 17
    
18
    <start to="merger"/>
18
    <start to="generate-schema"/>
19 19
    
20
    <action name="generate-schema">
21
	    <java>
22
    		<job-tracker>${jobTracker}</job-tracker>
23
        	<name-node>${nameNode}</name-node>
24
	        <main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
25
	        <arg>eu.dnetlib.iis.importer.schemas.DocumentMetadata</arg>
26
	        <arg>eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata</arg>
27
	        <arg>eu.dnetlib.iis.transformers.metadatamerger.schemas.ExtractedDocumentMetadataMergedWithOriginal</arg>
28
	        <capture-output />
29
	    </java>
30
	    <ok to="merger" />
31
	    <error to="fail" />
32
	</action>
33
    
20 34
    <action name="merger">
21 35
        <pig>
22 36
            <job-tracker>${jobTracker}</job-tracker>
......
35 49
            <script>lib/scripts/merger/merger.pig</script>
36 50

  
37 51
            <param>input_base_metadata=${input_base_metadata}</param>
38
            <param>schema_input_base_metadata=eu.dnetlib.iis.importer.schemas.DocumentMetadata</param>
52
            <param>schema_input_base_metadata=${wf:actionData('generate-schema')['eu.dnetlib.iis.importer.schemas.DocumentMetadata']}</param>
39 53
            
40 54
            <param>input_extracted_metadata=${input_extracted_metadata}</param>
41
            <param>schema_input_extracted_metadata=eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata</param>
55
            <param>schema_input_extracted_metadata=${wf:actionData('generate-schema')['eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata']}</param>
42 56
           
43 57
            <param>output_merged_metadata=${output_merged_metadata}</param>
44
            <param>schema_output_merged_metadata=eu.dnetlib.iis.transformers.metadatamerger.schemas.ExtractedDocumentMetadataMergedWithOriginal</param>
58
            <param>schema_output_merged_metadata=${wf:actionData('generate-schema')['eu.dnetlib.iis.transformers.metadatamerger.schemas.ExtractedDocumentMetadataMergedWithOriginal']}</param>
45 59
        </pig>
46 60
        <ok to="end"/>
47 61
        <error to="fail"/>
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/metadatamerger/oozie_app/lib/scripts/merger/merger.pig
1 1
define avro_load_base_metadata
2 2
org.apache.pig.piggybank.storage.avro.AvroStorage(
3
'input_schema_class', '$schema_input_base_metadata');
3
'schema', '$schema_input_base_metadata');
4 4

  
5 5
define avro_load_extracted_metadata
6 6
org.apache.pig.piggybank.storage.avro.AvroStorage(
7
'input_schema_class', '$schema_input_extracted_metadata');
7
'schema', '$schema_input_extracted_metadata');
8 8

  
9 9
define avro_store_merged_metadata
10 10
org.apache.pig.piggybank.storage.avro.AvroStorage(
11 11
'index', '0',
12
'output_schema_class', '$schema_output_merged_metadata');
12
'schema', '$schema_output_merged_metadata');
13 13

  
14 14
define FIRST_NOT_NULL_STR eu.dnetlib.iis.transformers.udfs.StringFirstNotEmpty;
15 15
define FIRST_NOT_NULL_INT eu.dnetlib.iis.transformers.udfs.IntegerFirstNotEmpty;
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/metadataextraction/checksum/postprocessing/meta/sampledataproducer/oozie_app/workflow.xml
26 26
            <arg>-C{extracted_document_metadata,
27 27
				eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata,
28 28
				eu/dnetlib/iis/transformers/metadataextraction/checksum/postprocessing/meta/sampledataproducer/data/input_extracted_document_metadata.json}</arg>
29
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
30 29
            <arg>-Odocument_content_url=${workingDir}/producer/document_content_url</arg>
31 30
            <arg>-Oextracted_document_metadata=${workingDir}/producer/extracted_document_metadata</arg>
32 31
        </java>
......
87 86
			<arg>-C{output_metadata,
88 87
				eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata,
89 88
				eu/dnetlib/iis/transformers/metadataextraction/checksum/postprocessing/meta/sampledataproducer/data/output_extracted_document_metadata.json}</arg>
90
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
91 89
			<arg>-Ioutput_metadata=${workingDir}/transformer_metadataextraction_checksum_postprocessing_meta/output</arg>
92 90
		</java>
93 91
		<ok to="end" />
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/metadataextraction/checksum/preprocessing/sampledataproducer/oozie_app/workflow.xml
23 23
            <arg>-C{document_content_url,
24 24
				eu.dnetlib.iis.importer.auxiliary.schemas.DocumentContentUrl,
25 25
				eu/dnetlib/iis/transformers/metadataextraction/checksum/preprocessing/sampledataproducer/data/input_document_content_url.json}</arg>
26
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
27 26
            <arg>-Odocument_content_url=${workingDir}/producer/document_content_url</arg>
28 27
        </java>
29 28
        <ok to="checksum_preprocessing"/>
......
78 77
			<arg>-C{output_metadata,
79 78
				eu.dnetlib.iis.importer.auxiliary.schemas.DocumentContentUrl,
80 79
				eu/dnetlib/iis/transformers/metadataextraction/checksum/preprocessing/sampledataproducer/data/output_document_content_url.json}</arg>
81
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
82 80
			<arg>-Ioutput_metadata=${workingDir}/transformer_metadataextraction_checksum_preprocessing/output</arg>
83 81
		</java>
84 82
		<ok to="end" />
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/metadatamerger/sampledataproducer/oozie_app/workflow.xml
26 26
            <arg>-C{extracted_metadata,
27 27
				eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata,
28 28
				eu/dnetlib/iis/transformers/metadatamerger/sampledataproducer/data/extr_metadata.json}</arg>
29
			<!-- All input and output ports have to be bound to paths in HDFS, working 
30
				directory has to be specified as well -->
31
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
29
			<!-- All input and output ports have to be bound to paths in HDFS -->
32 30
            <arg>-Obase_metadata=${workingDir}/producer/base_metadata</arg>
33 31
            <arg>-Oextracted_metadata=${workingDir}/producer/extr_metadata</arg>
34 32
        </java>
......
93 91
			<arg>-C{merged_metadata,
94 92
				eu.dnetlib.iis.transformers.metadatamerger.schemas.ExtractedDocumentMetadataMergedWithOriginal,
95 93
				eu/dnetlib/iis/transformers/metadatamerger/sampledataproducer/data/merged_metadata.json}</arg>
96
			<!-- All input and output ports have to be bound to paths in HDFS, working 
97
				directory has to be specified as well -->
98
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
94
			<!-- All input and output ports have to be bound to paths in HDFS -->
99 95
			<arg>-Imerged_metadata=${workingDir}/transformer_metadatamerger/merged_metadata</arg>
100 96
		</java>
101 97
		<ok to="end" />
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/citationmatching/sampledataproducer/oozie_app/workflow.xml
26 26
            <arg>-C{metadata,
27 27
				eu.dnetlib.iis.transformers.metadatamerger.schemas.ExtractedDocumentMetadataMergedWithOriginal,
28 28
				eu/dnetlib/iis/transformers/citationmatching/sampledataproducer/data/metadata.json}</arg>
29
			<!-- All input and output ports have to be bound to paths in HDFS, working 
30
				directory has to be specified as well -->
31
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
29
			<!-- All input and output ports have to be bound to paths in HDFS -->
32 30
            <arg>-Operson=${workingDir}/producer/person</arg>
33 31
            <arg>-Ometadata=${workingDir}/producer/metadata</arg>
34 32
		</java>
......
93 91
			<arg>-C{citation_metadata,
94 92
				eu.dnetlib.iis.citationmatching.schemas.DocumentMetadata,
95 93
				eu/dnetlib/iis/transformers/citationmatching/sampledataproducer/data/citation_metadata.json}</arg>
96
			<!-- All input and output ports have to be bound to paths in HDFS, working 
97
				directory has to be specified as well -->
98
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
94
			<!-- All input and output ports have to be bound to paths in HDFS -->
99 95
			<arg>-Icitation_metadata=${workingDir}/transformer_citationmatching/citation_metadata</arg>
100 96
		</java>
101 97
		<ok to="end" />
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/common/existencefilter/sampledataproducer/oozie_app/workflow.xml
26 26
            <arg>-C{data,
27 27
            eu.dnetlib.iis.importer.auxiliary.schemas.DocumentContentUrl,
28 28
            eu/dnetlib/iis/transformers/common/existencefilter/sampledataproducer/data/data.json}</arg>
29
            <!-- All input and output ports have to be bound to paths in HDFS, working
30
                directory has to be specified as well -->
31
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
29
            <!-- All input and output ports have to be bound to paths in HDFS -->
32 30
            <arg>-Oexistent_id=${workingDir}/producer/existent_id</arg>
33 31
            <arg>-Odata=${workingDir}/producer/data</arg>
34 32
        </java>
......
88 86
			<arg>-C{filtered,
89 87
                eu.dnetlib.iis.importer.auxiliary.schemas.DocumentContentUrl,
90 88
                eu/dnetlib/iis/transformers/common/existencefilter/sampledataproducer/data/filtered.json}</arg>
91
    		<!-- All input and output ports have to be bound to paths in HDFS, working
92
				directory has to be specified as well -->
93
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
89
    		<!-- All input and output ports have to be bound to paths in HDFS -->
94 90
			<arg>-Ifiltered=${workingDir}/existencefilter/filtered</arg>
95 91
		</java>
96 92
		<ok to="end" />
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/documentssimilarity/sampledataproducer/oozie_app/workflow.xml
27 27
				eu.dnetlib.iis.transformers.metadatamerger.schemas.ExtractedDocumentMetadataMergedWithOriginal,
28 28
				eu/dnetlib/iis/transformers/documentssimilarity/sampledataproducer/data/input_metadata.json}</arg>
29 29

  
30
			<!-- All input and output ports have to be bound to paths in HDFS, working 
31
				directory has to be specified as well -->
32
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
30
			<!-- All input and output ports have to be bound to paths in HDFS -->
33 31
            <arg>-Oinput_person=${workingDir}/producer/input_person</arg>
34 32
            <arg>-Oinput_metadata=${workingDir}/producer/input_metadata</arg>
35 33
        </java>
......
94 92
			<arg>-C{output_document_metadata,
95 93
				eu.dnetlib.iis.documentssimilarity.schemas.DocumentMetadata,
96 94
				eu/dnetlib/iis/transformers/documentssimilarity/sampledataproducer/data/output_document_metadata.json}</arg>
97
			<!-- All input and output ports have to be bound to paths in HDFS, working 
98
				directory has to be specified as well -->
99
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
95
			<!-- All input and output ports have to be bound to paths in HDFS -->
100 96
			<arg>-Ioutput_document_metadata=${workingDir}/transformer_documentssimilarity/output_document_metadata</arg>
101 97
		</java>
102 98
		<ok to="end" />
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/ingest/pmc/metadata/sampledataproducer/oozie_app/workflow.xml
24 24
				eu.dnetlib.iis.ingest.pmc.metadata.schemas.ExtractedDocumentMetadata,
25 25
				eu/dnetlib/iis/transformers/ingest/pmc/metadata/sampledataproducer/data/input.json}</arg>
26 26

  
27
			<!-- All input and output ports have to be bound to paths in HDFS, working 
28
				directory has to be specified as well -->
29
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
27
			<!-- All input and output ports have to be bound to paths in HDFS -->
30 28
            <arg>-Oinput=${workingDir}/producer/input</arg>
31 29
        </java>
32 30
        <ok to="transformer"/>
......
86 84
			<arg>-C{output,
87 85
				eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata,
88 86
				eu/dnetlib/iis/transformers/ingest/pmc/metadata/sampledataproducer/data/output.json}</arg>
89
			<!-- All input and output ports have to be bound to paths in HDFS, working 
90
				directory has to be specified as well -->
91
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
87
			<!-- All input and output ports have to be bound to paths in HDFS -->
92 88
			<arg>-Ioutput=${workingDir}/transformer/output</arg>
93 89
		</java>
94 90
		<ok to="end" />
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/documentssimilarity_with_fulltext/sampledataproducer/oozie_app/workflow.xml
30 30
				eu.dnetlib.iis.metadataextraction.schemas.DocumentText,
31 31
				eu/dnetlib/iis/transformers/documentssimilarity_with_fulltext/sampledataproducer/data/input_document_text.json}</arg>
32 32

  
33
			<!-- All input and output ports have to be bound to paths in HDFS, working 
34
				directory has to be specified as well -->
35
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
33
			<!-- All input and output ports have to be bound to paths in HDFS -->
36 34
            <arg>-Oinput_person=${workingDir}/producer/input_person</arg>
37 35
            <arg>-Oinput_metadata=${workingDir}/producer/input_metadata</arg>
38 36
            <arg>-Oinput_document_text=${workingDir}/producer/input_document_text</arg>
......
102 100
			<arg>-C{output_document_metadata,
103 101
				eu.dnetlib.iis.documentssimilarity_with_fulltext.schemas.DocumentMetadata,
104 102
				eu/dnetlib/iis/transformers/documentssimilarity_with_fulltext/sampledataproducer/data/output_document_metadata.json}</arg>
105
			<!-- All input and output ports have to be bound to paths in HDFS, working 
106
				directory has to be specified as well -->
107
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
103
			<!-- All input and output ports have to be bound to paths in HDFS -->
108 104
			<arg>-Ioutput_document_metadata=${workingDir}/transformer_documentssimilarity_with_fulltext/output_document_metadata</arg>
109 105
		</java>
110 106
		<ok to="end" />
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/idreplacer/replacer_1_field/oozie_app/workflow.xml
26 26
            <arg>-C{id_mapping,
27 27
				eu.dnetlib.iis.common.schemas.IdentifierMapping,
28 28
				eu/dnetlib/iis/transformers/idreplacer/replacer_1_field/data/id_mapping.json}</arg>
29
			<!-- All input and output ports have to be bound to paths in HDFS, working 
30
				directory has to be specified as well -->
31
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
29
			<!-- All input and output ports have to be bound to paths in HDFS -->
32 30
            <arg>-Omain=${workingDir}/producer/main</arg>
33 31
            <arg>-Oid_mapping=${workingDir}/producer/id_mapping</arg>
34 32
        </java>
......
101 99
			<arg>-C{output,
102 100
				eu.dnetlib.iis.importer.schemas.DocumentMetadata,
103 101
				eu/dnetlib/iis/transformers/idreplacer/replacer_1_field/data/output.json}</arg>
104
    		<!-- All input and output ports have to be bound to paths in HDFS, working 
105
				directory has to be specified as well -->
106
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
102
    		<!-- All input and output ports have to be bound to paths in HDFS -->
107 103
			<arg>-Ioutput=${workingDir}/collapser_idreplacer/output</arg>
108 104
		</java>
109 105
		<ok to="end" />
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/idreplacer/replacer_2_fields/oozie_app/workflow.xml
26 26
            <arg>-C{id_mapping,
27 27
				eu.dnetlib.iis.common.schemas.IdentifierMapping,
28 28
				eu/dnetlib/iis/transformers/idreplacer/replacer_2_fields/data/id_mapping.json}</arg>
29
			<!-- All input and output ports have to be bound to paths in HDFS, working 
30
				directory has to be specified as well -->
31
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
29
			<!-- All input and output ports have to be bound to paths in HDFS -->
32 30
            <arg>-Omain=${workingDir}/producer/main</arg>
33 31
            <arg>-Oid_mapping=${workingDir}/producer/id_mapping</arg>
34 32
        </java>
......
105 103
			<arg>-C{output,
106 104
                eu.dnetlib.iis.citationmatching.schemas.Citation,
107 105
				eu/dnetlib/iis/transformers/idreplacer/replacer_2_fields/data/output.json}</arg>
108
    		<!-- All input and output ports have to be bound to paths in HDFS, working 
109
				directory has to be specified as well -->
110
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
106
    		<!-- All input and output ports have to be bound to paths in HDFS -->
111 107
			<arg>-Ioutput=${workingDir}/collapser_idreplacer/output</arg>
112 108
		</java>
113 109
		<ok to="end" />
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/metricsprimary/sampledataproducer/oozie_app/workflow.xml
29 29
            <arg>-C{person,
30 30
				eu.dnetlib.iis.importer.schemas.Person,
31 31
				eu/dnetlib/iis/transformers/metricsprimary/sampledataproducer/data/person.json}</arg>
32
			<!-- All input and output ports have to be bound to paths in HDFS, working 
33
				directory has to be specified as well -->
34
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
32
			<!-- All input and output ports have to be bound to paths in HDFS -->
35 33
            <arg>-Odocument=${workingDir}/producer/document</arg>
36 34
            <arg>-Ocitation=${workingDir}/producer/citation</arg>
37 35
            <arg>-Operson=${workingDir}/producer/person</arg>
......
108 106
            <arg>-C{person_id,
109 107
				eu.dnetlib.iis.metrics.primary.schemas.PersonId,
110 108
				eu/dnetlib/iis/transformers/metricsprimary/sampledataproducer/data/person_id.json}</arg>
111
			<!-- All input and output ports have to be bound to paths in HDFS, working 
112
				directory has to be specified as well -->
113
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
109
			<!-- All input and output ports have to be bound to paths in HDFS -->
114 110
			<arg>-Idocument_authors_citations=${workingDir}/transformer_metricsprimary/document_authors_citations</arg>
115 111
            <arg>-Iperson_id=${workingDir}/transformer_metricsprimary/person_id</arg>
116 112
		</java>
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/statistics/sampledataproducer/oozie_app/workflow.xml
35 35
            <arg>-C{document_to_project,
36 36
				eu.dnetlib.iis.importer.schemas.DocumentToProject,
37 37
				eu/dnetlib/iis/transformers/statistics/sampledataproducer/data/document_to_project.json}</arg>
38
			<!-- All input and output ports have to be bound to paths in HDFS, working 
39
				directory has to be specified as well -->
40
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
38
			<!-- All input and output ports have to be bound to paths in HDFS -->
41 39
            <arg>-Odocument=${workingDir}/producer/document</arg>
42 40
            <arg>-Ocitation=${workingDir}/producer/citation</arg>
43 41
            <arg>-Operson=${workingDir}/producer/person</arg>
......
131 129
            <arg>-C{project_id,
132 130
				eu.dnetlib.iis.statistics.schemas.ProjectId,
133 131
				eu/dnetlib/iis/transformers/statistics/sampledataproducer/data/project_id.json}</arg>
134
			<!-- All input and output ports have to be bound to paths in HDFS, working 
135
				directory has to be specified as well -->
136
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
132
			<!-- All input and output ports have to be bound to paths in HDFS -->
137 133
			<arg>-Idocument_authors_citations=${workingDir}/transformer_statistics/document_authors_citations</arg>
138 134
            <arg>-Iperson_id=${workingDir}/transformer_statistics/person_id</arg>
139 135
            <arg>-Iproject_id=${workingDir}/transformer_statistics/project_id</arg>
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/transformers/documentsclassification/sampledataproducer/oozie_app/workflow.xml
23 23
            <arg>-C{merged_metadata,
24 24
				eu.dnetlib.iis.transformers.metadatamerger.schemas.ExtractedDocumentMetadataMergedWithOriginal,
25 25
				eu/dnetlib/iis/transformers/documentsclassification/sampledataproducer/data/merged_metadata.json}</arg>
26
			<!-- All input and output ports have to be bound to paths in HDFS, working 
27
				directory has to be specified as well -->
28
            <arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
26
			<!-- All input and output ports have to be bound to paths in HDFS -->
29 27
            <arg>-Omerged_metadata=${workingDir}/producer/merged_metadata</arg>
30 28
        </java>
31 29
        <ok to="transformer_documentsclassification"/>
......
85 83
			<arg>-C{document_metadata,
86 84
				eu.dnetlib.iis.documentsclassification.schemas.DocumentMetadata,
87 85
				eu/dnetlib/iis/transformers/documentsclassification/sampledataproducer/data/abstract_metadata.json}</arg>
88
			<!-- All input and output ports have to be bound to paths in HDFS, working 
89
				directory has to be specified as well -->
90
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
86
			<!-- All input and output ports have to be bound to paths in HDFS -->
91 87
			<arg>-Idocument_metadata=${workingDir}/transformer_documentsclassification/document_metadata</arg>
92 88
		</java>
93 89
		<ok to="end" />
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/metadataextraction/checksum/postprocessing/text/oozie_app/lib/scripts/transformer.pig
1 1
define avro_load_document_text
2 2
org.apache.pig.piggybank.storage.avro.AvroStorage(
3
'input_schema_class', '$schema_document_text');
3
'schema', '$schema_document_text');
4 4

  
5 5
define avro_load_document_content_url
6 6
org.apache.pig.piggybank.storage.avro.AvroStorage(
7
'input_schema_class', '$schema_document_content_url');
7
'schema', '$schema_document_content_url');
8 8

  
9 9
define avro_store_document_text
10 10
org.apache.pig.piggybank.storage.avro.AvroStorage(
11 11
'index', '0',
12
'output_schema_class', '$schema_document_text');
12
'schema', '$schema_document_text');
13 13

  
14 14
sourceDocumentText = load '$input_document_text' using avro_load_document_text;
15 15
sourceDocumentContentUrl = load '$input_document_content_url' using avro_load_document_content_url;
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/metadataextraction/checksum/postprocessing/text/oozie_app/workflow.xml
15 15
		</property>
16 16
	</parameters>
17 17
    
18
    <start to="transformer"/>
18
    <start to="generate-schema"/>
19
    
20
    <action name="generate-schema">
21
	    <java>
22
    		<job-tracker>${jobTracker}</job-tracker>
23
        	<name-node>${nameNode}</name-node>
24
	        <main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
25
	        <arg>eu.dnetlib.iis.importer.auxiliary.schemas.DocumentContentUrl</arg>
26
	        <arg>eu.dnetlib.iis.metadataextraction.schemas.DocumentText</arg>
27
	        <capture-output />
28
	    </java>
29
	    <ok to="transformer" />
30
	    <error to="fail" />
31
	</action>
32
	
19 33
    <action name="transformer">
20 34
        <pig>
21 35
            <job-tracker>${jobTracker}</job-tracker>
......
32 46
            </configuration>
33 47
            <!-- Path to PIG script the workflow executes. -->
34 48
            <script>lib/scripts/transformer.pig</script>
35
            <param>schema_document_content_url=eu.dnetlib.iis.importer.auxiliary.schemas.DocumentContentUrl</param>
36
            <param>schema_document_text=eu.dnetlib.iis.metadataextraction.schemas.DocumentText</param>
49
            <param>schema_document_content_url=${wf:actionData('generate-schema')['eu.dnetlib.iis.importer.auxiliary.schemas.DocumentContentUrl']}</param>
50
            <param>schema_document_text=${wf:actionData('generate-schema')['eu.dnetlib.iis.metadataextraction.schemas.DocumentText']}</param>
37 51
            <param>input_document_content_url=${input_document_content_url}</param>
38 52
            <param>input_document_text=${input_document_text}</param>
39 53
            <param>output=${output}</param>
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/metadataextraction/checksum/postprocessing/meta/oozie_app/lib/scripts/transformer.pig
1 1
define avro_load_extracted_document_metadata
2 2
org.apache.pig.piggybank.storage.avro.AvroStorage(
3
'input_schema_class', '$schema_extracted_document_metadata');
3
'schema', '$schema_extracted_document_metadata');
4 4

  
5 5
define avro_load_document_content_url
6 6
org.apache.pig.piggybank.storage.avro.AvroStorage(
7
'input_schema_class', '$schema_document_content_url');
7
'schema', '$schema_document_content_url');
8 8

  
9 9
define avro_store_extracted_document_metadata
10 10
org.apache.pig.piggybank.storage.avro.AvroStorage(
11 11
'index', '0',
12
'output_schema_class', '$schema_extracted_document_metadata');
12
'schema', '$schema_extracted_document_metadata');
13 13

  
14 14
sourceDocumentMeta = load '$input_extracted_document_metadata' using avro_load_extracted_document_metadata;
15 15
sourceDocumentContentUrl = load '$input_document_content_url' using avro_load_document_content_url;
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/metadataextraction/checksum/postprocessing/meta/oozie_app/workflow.xml
15 15
		</property>
16 16
	</parameters>
17 17
    
18
    <start to="transformer"/>
18
    <start to="generate-schema"/>
19
    
20
    <action name="generate-schema">
21
	    <java>
22
    		<job-tracker>${jobTracker}</job-tracker>
23
        	<name-node>${nameNode}</name-node>
24
	        <main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
25
	        <arg>eu.dnetlib.iis.importer.auxiliary.schemas.DocumentContentUrl</arg>
26
	        <arg>eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata</arg>
27
	        <capture-output />
28
	    </java>
29
	    <ok to="transformer" />
30
	    <error to="fail" />
31
	</action>
32
	
19 33
    <action name="transformer">
20 34
        <pig>
21 35
            <job-tracker>${jobTracker}</job-tracker>
......
32 46
            </configuration>
33 47
            <!-- Path to PIG script the workflow executes. -->
34 48
            <script>lib/scripts/transformer.pig</script>
35
            <param>schema_document_content_url=eu.dnetlib.iis.importer.auxiliary.schemas.DocumentContentUrl</param>
36
            <param>schema_extracted_document_metadata=eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata</param>
49
            <param>schema_document_content_url=${wf:actionData('generate-schema')['eu.dnetlib.iis.importer.auxiliary.schemas.DocumentContentUrl']}</param>
50
            <param>schema_extracted_document_metadata=${wf:actionData('generate-schema')['eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata']}</param>
37 51
            <param>input_document_content_url=${input_document_content_url}</param>
38 52
            <param>input_extracted_document_metadata=${input_extracted_document_metadata}</param>
39 53
            <param>output=${output}</param>
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/metadataextraction/checksum/preprocessing/oozie_app/lib/scripts/transformer.pig
1 1
define avro_load_document_content_url
2 2
org.apache.pig.piggybank.storage.avro.AvroStorage(
3
'input_schema_class', '$schema_document_content_url');
3
'schema', '$schema_document_content_url');
4 4

  
5 5
define avro_store_document_content_url
6 6
org.apache.pig.piggybank.storage.avro.AvroStorage(
7 7
'index', '0',
8
'output_schema_class', '$schema_document_content_url');
8
'schema', '$schema_document_content_url');
9 9

  
10 10
sourceDocumentContentUrl = load '$input' using avro_load_document_content_url;
11 11

  
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/metadataextraction/skip_extracted/oozie_app/lib/scripts/transformer.pig
1 1
define avro_load_document_content
2 2
org.apache.pig.piggybank.storage.avro.AvroStorage(
3
'input_schema_class', '$schema_document_content');
3
'schema', '$schema_document_content');
4 4

  
5 5
define avro_load_document_text
6 6
org.apache.pig.piggybank.storage.avro.AvroStorage(
7
'input_schema_class', '$schema_document_text');
7
'schema', '$schema_document_text');
8 8

  
9 9
define avro_load_document_meta
10 10
org.apache.pig.piggybank.storage.avro.AvroStorage(
11
'input_schema_class', '$schema_document_meta');
11
'schema', '$schema_document_meta');
12 12

  
13 13
define avro_store_document_content
14 14
org.apache.pig.piggybank.storage.avro.AvroStorage(
15 15
'index', '0',
16
'output_schema_class', '$schema_document_content');
16
'schema', '$schema_document_content');
17 17

  
18 18
define avro_store_document_text
19 19
org.apache.pig.piggybank.storage.avro.AvroStorage(
20 20
'index', '1',
21
'output_schema_class', '$schema_document_text');
21
'schema', '$schema_document_text');
22 22

  
23 23
define avro_store_document_meta
24 24
org.apache.pig.piggybank.storage.avro.AvroStorage(
25 25
'index', '2',
26
'output_schema_class', '$schema_document_meta');
26
'schema', '$schema_document_meta');
27 27

  
28 28
documentContent = load '$input_document_content' using avro_load_document_content;
29 29
documentText = load '$input_document_text' using avro_load_document_text;
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/export/documentmetadata/oozie_app/lib/scripts/transformer/transformer.pig
1 1
define avro_load_extracted_metadata
2 2
org.apache.pig.piggybank.storage.avro.AvroStorage(
3
'input_schema_class', '$schema_input_extracted_metadata');
3
'schema', '$schema_input_extracted_metadata');
4 4

  
5 5
define avro_store_metadata
6 6
org.apache.pig.piggybank.storage.avro.AvroStorage(
7 7
'index', '0',
8
'output_schema_class', '$schema_output_metadata');
8
'schema', '$schema_output_metadata');
9 9

  
10 10
extr_meta = load '$input_extracted_metadata' using avro_load_extracted_metadata;
11 11

  
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/export/identifier/documenttodataset/oozie_app/lib/scripts/transformer.pig
1 1
define avro_load_document_to_dataset
2 2
org.apache.pig.piggybank.storage.avro.AvroStorage(
3
'input_schema_class', '$schema_input_document_to_dataset');
3
'schema', '$schema_input_document_to_dataset');
4 4

  
5 5
define avro_load_document_to_mdstore
6 6
org.apache.pig.piggybank.storage.avro.AvroStorage(
7
'input_schema_class', '$schema_input_document_to_mdstore');
7
'schema', '$schema_input_document_to_mdstore');
8 8

  
9 9
define avro_store_document_to_mdstore
10 10
org.apache.pig.piggybank.storage.avro.AvroStorage(
11 11
'index', '0',
12
'output_schema_class', '$schema_output_document_to_mdstore');
12
'schema', '$schema_output_document_to_mdstore');
13 13

  
14 14
documentToDataset = load '$input_document_to_dataset' using avro_load_document_to_dataset;
15 15
datasetIds = foreach documentToDataset generate datasetId as id;
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/export/identifier/documenttoproject/oozie_app/lib/scripts/transformer.pig
1 1
define avro_load_document_to_project
2 2
org.apache.pig.piggybank.storage.avro.AvroStorage(
3
'input_schema_class', '$schema_input_document_to_project');
3
'schema', '$schema_input_document_to_project');
4 4

  
5 5
define avro_store_identifier
6 6
org.apache.pig.piggybank.storage.avro.AvroStorage(
7 7
'index', '0',
8
'output_schema_class', '$schema_output_identifier');
8
'schema', '$schema_output_identifier');
9 9

  
10 10
documentToProject = load '$input_document_to_project' using avro_load_document_to_project;
11 11
documentToProjectId = foreach documentToProject generate documentId;
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/export/researchinitiatives/oozie_app/workflow.xml
9 9
		</property>
10 10
	</parameters>
11 11
    
12
    <start to="transformer"/>
12
    <start to="generate-schema"/>
13
    
14
    <action name="generate-schema">
15
	    <java>
16
    		<job-tracker>${jobTracker}</job-tracker>
17
        	<name-node>${nameNode}</name-node>
18
	        <main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
19
	        <arg>eu.dnetlib.iis.referenceextraction.researchinitiative.schemas.DocumentToConceptId</arg>
20
	        <arg>eu.dnetlib.iis.export.schemas.DocumentToConceptIds</arg>
21
	        <capture-output />
22
	    </java>
23
	    <ok to="transformer" />
24
	    <error to="fail" />
25
	</action>
26
	
13 27
    <action name="transformer">
14 28
        <pig>
15 29
            <job-tracker>${jobTracker}</job-tracker>
......
28 42
            <script>lib/scripts/transformer.pig</script>
29 43
            
30 44
            <param>input_document_to_research_initiative=${input_document_to_research_initiative}</param>
31
            <param>schema_input_document_to_research_initiative=eu.dnetlib.iis.referenceextraction.researchinitiative.schemas.DocumentToConceptId</param>
45
            <param>schema_input_document_to_research_initiative=${wf:actionData('generate-schema')['eu.dnetlib.iis.referenceextraction.researchinitiative.schemas.DocumentToConceptId']}</param>
32 46
            
33 47
            <param>output_document_to_research_initiatives=${output_document_to_research_initiatives}</param>
34
            <param>schema_output_document_to_research_initiatives=eu.dnetlib.iis.export.schemas.DocumentToConceptIds</param>
48
            <param>schema_output_document_to_research_initiatives=${wf:actionData('generate-schema')['eu.dnetlib.iis.export.schemas.DocumentToConceptIds']}</param>
35 49
        </pig>
36 50
        <ok to="end"/>
37 51
        <error to="fail"/>
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/export/documenttoproject_without_imported_data/oozie_app/lib/scripts/transformer.pig
1 1
define avro_load_document_to_project
2 2
org.apache.pig.piggybank.storage.avro.AvroStorage(
3
'input_schema_class', '$schema_input_document_to_project');
3
'schema', '$schema_input_document_to_project');
4 4

  
5 5
define avro_load_imported_document_to_project
6 6
org.apache.pig.piggybank.storage.avro.AvroStorage(
7
'input_schema_class', '$schema_input_imported_document_to_project');
7
'schema', '$schema_input_imported_document_to_project');
8 8

  
9 9

  
10 10
define avro_store_document_to_project
11 11
org.apache.pig.piggybank.storage.avro.AvroStorage(
12 12
'index', '0',
13
'output_schema_class', '$schema_output_document_to_project');
13
'schema', '$schema_output_document_to_project');
14 14

  
15 15

  
16 16
documentToProject = load '$input_document_to_project' using avro_load_document_to_project;
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/export/citations/oozie_app/workflow.xml
11 11
		</property>
12 12
	</parameters>
13 13
    
14
    <start to="transformer"/>
14
    <start to="generate-schema"/>
15 15
    
16
    <action name="generate-schema">
17
	    <java>
18
    		<job-tracker>${jobTracker}</job-tracker>
19
        	<name-node>${nameNode}</name-node>
20
	        <main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
21
	        <arg>eu.dnetlib.iis.common.citations.schemas.Citation</arg>
22
	        <arg>eu.dnetlib.iis.export.schemas.Citations</arg>
23
	        <capture-output />
24
	    </java>
25
	    <ok to="transformer" />
26
	    <error to="fail" />
27
	</action>
28
    
16 29
    <action name="transformer">
17 30
        <pig>
18 31
            <job-tracker>${jobTracker}</job-tracker>
......
31 44
            <script>lib/scripts/transformer.pig</script>
32 45

  
33 46
            <param>input=${input}</param>
34
            <param>schema_input=eu.dnetlib.iis.common.citations.schemas.Citation</param>
47
            <param>schema_input=${wf:actionData('generate-schema')['eu.dnetlib.iis.common.citations.schemas.Citation']}</param>
35 48
           
36 49
            <param>output=${output}</param>
37
            <param>schema_output=eu.dnetlib.iis.export.schemas.Citations</param>
50
            <param>schema_output=${wf:actionData('generate-schema')['eu.dnetlib.iis.export.schemas.Citations']}</param>
38 51
        </pig>
39 52
        <ok to="end"/>
40 53
        <error to="fail"/>
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/avro2json/oozie_app/workflow.xml
1
<workflow-app xmlns="uri:oozie:workflow:0.4" name="transformers_avro2json">
2
	
3
	<parameters>
4
		<property>
5
			<name>input</name>
6
			<description>input avro datastore</description>
7
		</property>
8
		<property>
9
			<name>output</name>
10
			<description>output json datastore</description>
11
		</property>
12
	</parameters>
13
	 
14
    <start to="decision-transformer"/>
15
    
16
    <decision name="decision-transformer">
17
        <switch>
18
            <case to="end">${input eq "$UNDEFINED$"}</case>
19
            <default to="transformer"/>
20
        </switch>
21
    </decision>
22
    
23
    <action name="transformer">
24
        <pig>
25
            <job-tracker>${jobTracker}</job-tracker>
26
            <name-node>${nameNode}</name-node>
27
			<!-- The data generated by this node is deleted in this section -->
28
			<prepare>
29
				<delete path="${nameNode}${output}" />
30
			</prepare>
31
            <configuration>
32
                <property>
33
                    <name>mapred.job.queue.name</name>
34
                    <value>${queueName}</value>
35
                </property>
36
            </configuration>
37
            <!-- Path to PIG script the workflow executes. -->
38
            <script>lib/scripts/transformer/transformer.pig</script>
39
            <param>input=${input}</param>
40
            <param>output=${output}</param>
41
        </pig>
42
        <ok to="end"/>
43
        <error to="fail"/>
44
    </action>
45
    
46
    <kill name="fail">
47
		<message>Unfortunately, the workflow failed -- error message:
48
			[${wf:errorMessage(wf:lastErrorNode())}]</message>
49
    </kill>
50
    
51
    <end name="end"/>
52
</workflow-app>
0 53

  
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/citationmatching/oozie_app/lib/scripts/transformer/transformer.pig
1 1
define avro_load_person
2 2
org.apache.pig.piggybank.storage.avro.AvroStorage(
3
'input_schema_class', '$schema_input_person');
3
'schema', '$schema_input_person');
4 4

  
5 5
define avro_load_metadata
6 6
org.apache.pig.piggybank.storage.avro.AvroStorage(
7
'input_schema_class', '$schema_input_metadata');
7
'schema', '$schema_input_metadata');
8 8

  
9 9
define avro_store_citation_metadata
10 10
org.apache.pig.piggybank.storage.avro.AvroStorage(
11 11
'index', '0',
12
'output_schema_class', '$schema_output_citation_metadata');
12
'schema', '$schema_output_citation_metadata');
13 13

  
14 14
define CREATE_ARRAY eu.dnetlib.iis.transformers.udfs.NullToEmptyBag;
15 15
define LIST_TO_INDEXED_LIST eu.dnetlib.iis.transformers.udfs.StringListToListWithIndexes;
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/common/thresholdfilter/oozie_app/lib/scripts/filter.pig
1
define avro_load_input
2
org.apache.pig.piggybank.storage.avro.AvroStorage(
3
'schema', '$schema');
4

  
5
define avro_store_output
6
org.apache.pig.piggybank.storage.avro.AvroStorage(
7
'index', '0',
8
'schema', '$schema');
9

  
10
input_records = load '$input' using avro_load_input;
11

  
12
output_records = filter input_records by ($threshold_field is not null) AND ($threshold_field >= $threshold_value);
13

  
14
store output_records into '$output' using avro_store_output;
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/common/union/oozie_app/workflow.xml
15 15
		</property>
16 16
	</parameters>
17 17
    
18
    <start to="transformer"/>
18
    <start to="generate-schema"/>
19
    
20
    <action name="generate-schema">
21
	    <java>
22
    		<job-tracker>${jobTracker}</job-tracker>
23
        	<name-node>${nameNode}</name-node>
24
	        <main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
25
	        <arg>${schema}</arg>
26
	        <capture-output />
27
	    </java>
28
	    <ok to="transformer" />
29
	    <error to="fail" />
30
	</action>
31
	
19 32
    <action name="transformer">
20 33
        <pig>
21 34
            <job-tracker>${jobTracker}</job-tracker>
......
36 49
            <param>input_a=${input_a}</param>
37 50
            <param>input_b=${input_b}</param>
38 51
            <param>output=${output}</param>
39
            <param>schema=${schema}</param>
52
            <param>schema=${wf:actionData('generate-schema')[wf:conf('schema')]}</param>
40 53
        </pig>
41 54
        <ok to="end"/>
42 55
        <error to="fail"/>
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/avro2json/job.properties
1
input=/share/import/doc_meta/2015-02-18_beta
2
output=${workingDir}/out
0 3

  
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/websiteusage/document/community_filter/oozie_app/workflow.xml
14 14
        </property>
15 15
    </parameters>
16 16

  
17
    <start to="filter"/>
17
    <start to="generate-schema"/>
18
    
19
    <action name="generate-schema">
20
	    <java>
21
    		<job-tracker>${jobTracker}</job-tracker>
22
        	<name-node>${nameNode}</name-node>
23
	        <main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
24
	        <arg>eu.dnetlib.iis.websiteusage.schemas.DocumentToCommunity</arg>
25
	        <arg>eu.dnetlib.iis.common.schemas.DocumentId</arg>
26
	        <capture-output />
27
	    </java>
28
	    <ok to="filter" />
29
	    <error to="fail" />
30
	</action>
18 31

  
19 32
    <action name="filter">
20 33
        <pig>
......
34 47
            <param>input_community=${input_community}</param>
35 48
            <param>input_document_id=${input_document_id}</param>
36 49
            <param>output=${output}</param>
37
            <param>schema_community=eu.dnetlib.iis.websiteusage.schemas.DocumentToCommunity</param>
38
            <param>schema_document_id=eu.dnetlib.iis.common.schemas.DocumentId</param>
50
            <param>schema_community=${wf:actionData('generate-schema')['eu.dnetlib.iis.websiteusage.schemas.DocumentToCommunity']}</param>
51
            <param>schema_document_id=${wf:actionData('generate-schema')['eu.dnetlib.iis.common.schemas.DocumentId']}</param>
39 52
        </pig>
40 53
        <ok to="end"/>
41 54
        <error to="fail"/>
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/common/union/oozie_app/lib/scripts/union.pig
1 1
define avro_load_input_a
2 2
org.apache.pig.piggybank.storage.avro.AvroStorage(
3
'input_schema_class', '$schema');
3
'schema', '$schema');
4 4

  
5 5
define avro_load_input_b
6 6
org.apache.pig.piggybank.storage.avro.AvroStorage(
7
'input_schema_class', '$schema');
7
'schema', '$schema');
8 8

  
9 9
define avro_store_output
10 10
org.apache.pig.piggybank.storage.avro.AvroStorage(
11 11
'index', '0',
12
'output_schema_class', '$schema');
12
'schema', '$schema');
13 13

  
14 14
input_a = load '$input_a' using avro_load_input_a;
15 15
input_b = load '$input_b' using avro_load_input_b;
modules/icm-iis-transformers/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/transformers/common/union4/oozie_app/lib/scripts/union.pig
1 1
define avro_load_input_a
2 2
org.apache.pig.piggybank.storage.avro.AvroStorage(
3
'input_schema_class', '$schema');
3
'schema', '$schema');
4 4

  
5 5
define avro_load_input_b
6 6
org.apache.pig.piggybank.storage.avro.AvroStorage(
7
'input_schema_class', '$schema');
7
'schema', '$schema');
8 8

  
9 9
define avro_load_input_c
10 10
org.apache.pig.piggybank.storage.avro.AvroStorage(
11
'input_schema_class', '$schema');
11
'schema', '$schema');
12 12

  
13 13
define avro_load_input_d
14 14
org.apache.pig.piggybank.storage.avro.AvroStorage(
15
'input_schema_class', '$schema');
15
'schema', '$schema');
16 16

  
17 17
define avro_store_output
18 18
org.apache.pig.piggybank.storage.avro.AvroStorage(
19 19
'index', '0',
20
'output_schema_class', '$schema');
20
'schema', '$schema');
21 21

  
22 22
input_a = load '$input_a' using avro_load_input_a;
23 23
input_b = load '$input_b' using avro_load_input_b;
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff