Project

General

Profile

« Previous | Next » 

Revision 56733

[maven-release-plugin] copy for tag dnet-openaireplus-profiles-1.0.34

View differences:

modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.34/deploy.info
1
{"type_source": "SVN", "goal": "package -U -T 4C source:jar", "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-openaireplus-profiles/trunk/", "deploy_repository": "dnet45-snapshots", "version": "4", "mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it", "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots", "name": "dnet-openaireplus-profiles"}
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.34/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/offlineHbaseLoadJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="073e55eb-c6f4-49a9-80b3-1a927612ba5b_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="offlineHbaseLoad" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that loads a given entity type in the offline dedup table</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.OfflineHbaseLoadMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22
				
23
				<!-- MISC -->				
24
				<PROPERTY key="mapred.compress.map.output" value="true" />	
25
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
26
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
27
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
28
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />				
29
			
30
				<PROPERTY key="mapred.reduce.tasks" value="0" />
31
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
32
				
33
		<!--  	Uncomment to override the default lib path -->			
34
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
35
        	</STATIC_CONFIGURATION>
36
        	<JOB_INTERFACE>
37
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
38
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
39
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
40
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />
41
        	</JOB_INTERFACE>
42
        	<SCAN>
43
        		<FILTERS operator="MUST_PASS_ALL">
44
        			<FILTER type="prefix" param="entityTypeId" />
45
        		</FILTERS>
46
        		<FAMILIES>
47
        			<FAMILY param="entityType" />
48
        		</FAMILIES>
49
        	</SCAN>
50
        </HADOOP_JOB>
51
        <STATUS>
52
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
53
            <RUNNING_INSTANCES value="0"/>
54
            <CUMULATIVE_RUN value="0" />
55
        </STATUS>
56
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
57
    </BODY>
58
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.34/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisPreprocessingQuickJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="13beed98-81bf-4fbd-ab4f-de071177997c_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
    	<HADOOP_JOB name="iisPreprocessingQuickJob" type="oozie">
11
        	<DESCRIPTION>IIS preprocessing</DESCRIPTION>
12
            <STATIC_CONFIGURATION>
13
				<!-- Cluster wide -->
14
                <PROPERTY key="queueName" value="default"/>
15
				<PROPERTY key="user.name" value="dnet" />
16

  
17
				<!-- Runtime -->
18
                <PROPERTY key="workingDir" value="/tmp/integration/working_dir/preprocessing_quick_test"/>
19
                <PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/>
20
                <PROPERTY key="input_referenceextraction_project" value="/user/marek.horst/share/referenceextraction/document_projects/2014-04-11"/>
21
                <PROPERTY key="input_referenceextraction_dataset" value="/user/marek.horst/share/referenceextraction/document_datasets/all/2014-04-11"/>
22
                <PROPERTY key="export_action_hbase_table_initialize" value="false"/>
23
            </STATIC_CONFIGURATION>
24
        	<JOB_INTERFACE>
25
 		       	<PARAM name="import_content_object_store_location" required="true" description="mdstore service location" />
26
        		<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location" />
27
        		<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore id for dataset records" />
28
        		<PARAM name="import_wos_mdstore_id" required="true" description="mdstore id for WoS records" />
29
        		<PARAM name="import_database_service_location" required="true" description="database service endpoint" />
30
        		<PARAM name="import_content_datacite_objectstores_csv" required="true" description="objecstore ids subject to dataset reference extraction" />
31
        		<PARAM name="import_content_wos_plaintext_objectstores_csv" required="true" description="objecstore ids for WoS plaintext" />
32
	      		<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table" />
33
        		<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum" />
34
        		<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port" />
35
        		<PARAM name="nameNode" required="true" description="hdfs name node" />
36
        		<PARAM name="jobTracker" required="true" description="job tracker name" />
37
        		<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path" />
38
       			<PARAM name="export_action_set_id_document_referencedProjects" required="true" description="target action set for project references" />
39
       			<PARAM name="export_action_set_id_document_referencedDatasets" required="true" description="target action set for dataset references" />
40
       			<PARAM name="export_action_set_id_entity_wos" required="true" description="target action set for WoS entities" />
41
     			<PARAM name="export_action_set_id_entity_dataset" required="true" description="target action set for dataset entities" />
42
        	</JOB_INTERFACE>
43
        </HADOOP_JOB>
44
        <STATUS>
45
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
46
            <RUNNING_INSTANCES value="0"/>
47
            <CUMULATIVE_RUN value="0" />
48
        </STATUS>
49
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
50
    </BODY>
51
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.34/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisPreprocessingJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
				value="732656e3-5ac6-4344-9d1f-f5c805f53a06_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<HADOOP_JOB name="iisPreprocessingJob" type="oozie">
12
			<DESCRIPTION>IIS preprocessing</DESCRIPTION>
13
			<STATIC_CONFIGURATION>
14
				<!-- Cluster wide -->
15
				<PROPERTY key="queueName" value="default"/>
16
				<PROPERTY key="user.name" value="dnet.beta"/>
17

  
18
				<!-- Runtime -->
19
				<PROPERTY key="workingDir" value="/tmp/integration/working_dir/preprocessing"/>
20
				<PROPERTY key="oozie.wf.application.path" value="/tmp/integration/apps/preprocessing"/>
21
				<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/>
22
				<PROPERTY key="export_action_hbase_table_initialize" value="false"/>
23
				<!-- <PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction"/> -->
24
				<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction_updated_model"/>
25
				<PROPERTY key="metadataextraction_excluded_checksums"
26
				          value="1e5b574109da731f4918c7f91fc24864,bea4728578070c3d66774bf9454d41fe,da458477233b5561ae47042aa2a73086"/>
27
				<PROPERTY key="export_trust_level_threshold_document_referencedDatasets" value="0.0"/>
28
				<PROPERTY key="export_trust_level_threshold_document_referencedProjects" value="0.0"/>
29
				<PROPERTY key="output_remote_location" value="hdfs://namenode.hadoop.dm.openaire.eu/var/lib/dnet/actionManager_BETA"/>
30
			</STATIC_CONFIGURATION>
31
			<JOB_INTERFACE>
32
				<PARAM name="import_content_object_store_location" required="true" description="mdstore service location"/>
33
				<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location"/>
34
				<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore id for dataset records"/>
35
				<PARAM name="import_wos_mdstore_id" required="true" description="mdstore id for WoS records"/>
36
				<PARAM name="import_database_service_location" required="true" description="database service endpoint"/>
37
				<PARAM name="import_content_datacite_objectstores_csv" required="true" description="objecstore ids subject to dataset reference extraction"/>
38
				<PARAM name="import_content_wos_plaintext_objectstores_csv" required="true" description="objecstore ids for WoS plaintext"/>
39
				<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table"/>
40
				<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum"/>
41
				<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port"/>
42
				<PARAM name="nameNode" required="true" description="hdfs name node"/>
43
				<PARAM name="jobTracker" required="true" description="job tracker name"/>
44
				<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path"/>
45
				<PARAM name="export_action_set_id_document_referencedProjects" required="true" description="target action set for project references"/>
46
				<PARAM name="export_action_set_id_document_referencedDatasets" required="true" description="target action set for dataset references"/>
47
				<PARAM name="export_action_set_id_entity_wos" required="true" description="target action set for WoS entities"/>
48
				<PARAM name="export_action_set_id_entity_dataset" required="true" description="target action set for dataset entities"/>
49
			</JOB_INTERFACE>
50
		</HADOOP_JOB>
51
		<STATUS>
52
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
53
			<RUNNING_INSTANCES value="0"/>
54
			<CUMULATIVE_RUN value="0"/>
55
		</STATUS>
56
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
57
	</BODY>
58
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.34/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisMainJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
				value="f6e4cbcd-b173-4f8d-9205-e64ba15f03ad_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<HADOOP_JOB name="iisMainJob" type="oozie">
12
			<DESCRIPTION>IIS main workflow</DESCRIPTION>
13
			<STATIC_CONFIGURATION>
14

  
15
				<!-- Cluster wide -->
16
				<PROPERTY key="queueName" value="default"/>
17
				<PROPERTY key="user.name" value="dnet.beta"/>
18

  
19
				<!-- Runtime -->
20
				<PROPERTY key="match_content_with_metadata" value="true"/>
21
				<PROPERTY key="import_hbase_approved_datasources_csv" value="$UNDEFINED$"/>
22
				<PROPERTY key="import_hbase_input_table" value="information_space-2014-11-05"/>
23
				<!--<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction"/> -->
24
				<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction_updated_model"/>
25

  
26
				<PROPERTY key="export_action_hbase_table_initialize" value="true"/>
27
				<PROPERTY key="import_content_connection_timeout" value="180000"/>
28
				<PROPERTY key="import_content_read_timeout" value="180000"/>
29
				<PROPERTY key="import_resultset_client_read_timeout" value="180000"/>
30

  
31
				<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/>
32
				<PROPERTY key="workingDir" value="/tmp/integration/working_dir/main"/>
33
				<PROPERTY key="metadataextraction_excluded_checksums"
34
				          value="1e5b574109da731f4918c7f91fc24864,bea4728578070c3d66774bf9454d41fe,da458477233b5561ae47042aa2a73086"/>
35
				<PROPERTY key="export_documentssimilarity_threshold" value="0.7"/>
36
				<PROPERTY key="export_trust_level_threshold_document_classes" value="0.5"/>
37
				<PROPERTY key="export_trust_level_threshold_document_referencedDatasets" value="0.0"/>
38
				<PROPERTY key="export_trust_level_threshold_document_referencedProjects" value="0.0"/>
39
				<PROPERTY key="export_trust_level_threshold_document_pdb" value="0.0"/>
40
				<PROPERTY key="output_remote_location" value="hdfs://namenode.hadoop.dm.openaire.eu/var/lib/dnet/actionManager_BETA"/>
41
			</STATIC_CONFIGURATION>
42
			<JOB_INTERFACE>
43
				<PARAM name="import_content_object_store_location" required="true" description="objectStore service endpoint"/>
44
				<PARAM name="import_content_objectstores_csv" required="true" description="csv list of the available object stores subject to processing"/>
45
				<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location"/>
46
				<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore ids for dataset records"/>
47
				<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table"/>
48
				<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum"/>
49
				<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port"/>
50
				<PARAM name="nameNode" required="true" description="hdfs name node"/>
51
				<PARAM name="jobTracker" required="true" description="job tracker name"/>
52
				<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path"/>
53
				<PARAM description="target action set for referenced projects" name="export_action_set_id_document_referencedProjects" required="true"/>
54
				<PARAM description="target action set for referenced datasets" name="export_action_set_id_document_referencedDatasets" required="true"/>
55
				<PARAM description="target action set for research initiative outcome" name="export_action_set_id_document_research_initiative"
56
				       required="true"/>
57
				<PARAM description="target action set for document similarities" name="export_action_set_id_document_similarities_standard" required="true"/>
58
				<PARAM description="target action set for document statistics" name="export_action_set_id_document_statistics" required="true"/>
59
				<PARAM description="target action set for document classification" name="export_action_set_id_document_classes" required="true"/>
60
				<PARAM description="target action set for document citations" name="export_action_set_id_document_referencedDocuments" required="true"/>
61
				<PARAM description="target action set for dataset entities" name="export_action_set_id_entity_dataset" required="true"/>
62
				<PARAM description="target action set for pdb entities" name="export_action_set_id_document_pdb" required="true"/>
63

  
64
				<!-- flags to enable/disable IIS modules -->
65
				<PARAM name="active_metadataextraction_export" required="true" description="enable/disable the affiliation matching module"/>
66
				<PARAM name="active_citationmatching" required="true" description="enable/disable the citation matching module"/>
67
				<PARAM name="active_documentsclassification" required="true" description="enable/disable the document classification module"/>
68
				<PARAM name="active_documentssimilarity" required="true" description="enable/disable the document similarity module"/>
69
				<PARAM name="active_referenceextraction_dataset" required="true" description="enable/disable the dataset reference extraction module"/>
70
				<PARAM name="active_referenceextraction_project" required="true" description="enable/disable the project reference extracion module"/>
71
				<PARAM name="active_referenceextraction_researchinitiative" required="true"
72
				       description="enable/disable the research initiative extraction module"/>
73
				<PARAM name="active_statistics" required="true" description="enable/disable the statistics module"/>
74
				<PARAM name="active_referenceextraction_pdb" required="true" description="enable/disable the protein data bank extraction module"/>
75

  
76
			</JOB_INTERFACE>
77
		</HADOOP_JOB>
78
		<STATUS>
79
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
80
			<RUNNING_INSTANCES value="0"/>
81
			<CUMULATIVE_RUN value="0"/>
82
		</STATUS>
83
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
84
	</BODY>
85
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.34/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/importOrcidJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
				value="249e40c2-6420-4207-b40a-e1236f77f1fc_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2019-05-29T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<HADOOP_JOB name="importOrcidJob" type="mapreduce">
12
			<DESCRIPTION>map reduce job that import the Orcid works (no doi) into actions</DESCRIPTION>
13
			<STATIC_CONFIGURATION>
14

  
15
				<!-- I/O FORMAT -->
16
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.TextInputFormat"/>
17
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"/>
18

  
19
				<!-- MAPPER -->
20
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataimport.OrcidImportMapper"/>
21
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.LongWritable"/>
22
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text"/>
23

  
24
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/>
25
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/>
26

  
27
				<!-- MISC -->
28
				<PROPERTY key="mapred.compress.map.output" value="false"/>
29
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
30
				<PROPERTY key="mapreduce.map.speculative" value="false"/>
31

  
32
				<PROPERTY key="mapred.reduce.tasks" value="0"/>
33
				<PROPERTY key="dfs.blocksize" value="256M"/>
34

  
35
                <!-- Orcid Mapper Properties -->
36
                <PROPERTY key="setName" value="orcidworks-no-doi"/>
37
				<PROPERTY key="agentId" value="dnet"/>
38
				<PROPERTY key="agentName" value="D-Net"/>
39
				<PROPERTY key="invisible" value="true"/>
40

  
41
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
42

  
43
				<!--  	Uncomment to override the default lib path -->
44
				<!--  PROPERTY key="job.lib" value="/lib/dnet/snapshots/dnet-mapreduce-jobs-orcid_beta.jar"/ -->
45
			</STATIC_CONFIGURATION>
46
			<JOB_INTERFACE>
47
				<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs"/>
48
				<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs"/>
49
			</JOB_INTERFACE>
50
			<SCAN>
51
				<FILTERS/>
52
				<FAMILIES/>
53
			</SCAN>
54
		</HADOOP_JOB>
55
		<STATUS>
56
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
57
			<RUNNING_INSTANCES value="0"/>
58
			<CUMULATIVE_RUN value="0" />
59
		</STATUS>
60
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
61
	</BODY>
62
</RESOURCE_PROFILE>
63

  
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.34/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupDeleteDedupRelsJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="5626c94e-0005-416a-9ea4-48fc8af85ecd_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="deleteDedupRelsJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that deletes the dedup rels used to in the deduplication process</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupDeleteRelMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.Writable" />
22
				
23
				<!-- MISC -->				
24
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
25
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
26
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
27
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />		
28
			
29
				<PROPERTY key="mapred.reduce.tasks" value="0" />
30
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
31
				
32
		<!--  	Uncomment to override the default lib path -->			
33
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
34
        	</STATIC_CONFIGURATION>
35
        	<JOB_INTERFACE>
36
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
37
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
38
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
39
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />     
40
        	</JOB_INTERFACE>
41
        	<SCAN>
42
        		<FILTERS operator="MUST_PASS_ALL">
43
        			<FILTER type="prefix" param="entityTypeId" />
44
        		</FILTERS>
45
        		<FAMILIES>
46
        			<FAMILY param="entityType" />
47
			        <FAMILY value="resultResult_dedup_merges" />
48
			        <FAMILY value="resultResult_dedup_isMergedIn" />
49
			        <FAMILY value="resultResult_dedupSimilarity_isSimilarTo" />
50

  
51
			        <FAMILY value="personPerson_dedup_merges" />
52
			        <FAMILY value="personPerson_dedup_isMergedIn" />
53
			        <FAMILY value="personPerson_dedupSimilarity_isSimilarTo" />
54

  
55
			        <FAMILY value="organizationOrganization_dedup_merges" />
56
			        <FAMILY value="organizationOrganization_dedup_isMergedIn" />
57
			        <FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" />
58
        		</FAMILIES>
59
        	</SCAN>
60
        </HADOOP_JOB>
61
        <STATUS>
62
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
63
            <RUNNING_INSTANCES value="0"/>
64
            <CUMULATIVE_RUN value="0" />
65
        </STATUS>
66
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
67
    </BODY>
68
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.34/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisMainJobV2.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="d730a831-a982-4034-a890-de98fd972e87_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2017-07-05T15:05:50+00:00"/>
8
	</HEADER>
9
	<BODY>
10
		<HADOOP_JOB name="iisMainJobV2" type="oozie">
11
			<DESCRIPTION>IIS main workflow</DESCRIPTION>
12
			<STATIC_CONFIGURATION>
13

  
14
				<!-- Cluster wide -->
15
				<PROPERTY key="user.name" value="dnet.production"/>
16

  
17
				<!-- Runtime -->
18
				<PROPERTY key="metadataextraction_default_cache_location" value="/user/${user.name}/iis/cache/metadataextraction"/>
19
				<!-- skipping _default chunk from property name since 2017.02.21 -->
20
				<PROPERTY key="metadataextraction_cache_location" value="/user/${user.name}/iis/cache/metadataextraction"/>
21
				<PROPERTY key="ingest_pmc_cache_location" value="/user/${user.name}/iis/cache/ingestpmc"/>
22
				<PROPERTY key="software_webcrawl_cache_location" value="/user/${user.name}/iis/cache/webcrawler"/>
23
				<PROPERTY key="import_content_objectstore_s3_keystore_location" value="jceks://hdfs/user/${user.name}/secrets/s3keyfile.jceks"/>
24
				<PROPERTY key="export_documentssimilarity_threshold" value="0.7"/>
25
				<PROPERTY key="export_trust_level_threshold_document_classes" value="0.5"/>
26
				<PROPERTY key="export_trust_level_threshold_document_referencedDatasets" value="0.0"/>
27
				<PROPERTY key="export_trust_level_threshold_document_referencedProjects" value="0.0"/>
28
				<PROPERTY key="export_trust_level_threshold_document_pdb" value="0.0"/>
29
				<PROPERTY key="export_trust_level_threshold_document_software_url" value="0.0"/>
30
				<PROPERTY key="export_trust_level_threshold_matched_doc_organizations" value="0.0"/>
31
				<PROPERTY key="output_remote_location" value="webhdfs://namenode.hadoop.dm.openaire.eu:50071/var/lib/dnet/actionManager_PROD"/>
32
				<PROPERTY key="reports_external_path" value="/user/${user.name}/iis/reports/${execution_environment}"/>
33
				<PROPERTY key="import_hbase_dump_location" value="webhdfs://namenode.hadoop.dm.openaire.eu:50071/tmp/db_openaireplus_services.export.2017.07.19"/>
34
				<PROPERTY key="oozieServiceLoc" value="http://iis-cdh5-test-m3:11000/oozie"/>
35
			</STATIC_CONFIGURATION>
36
			<JOB_INTERFACE>
37
				<PARAM description="oozie job application absolute path" name="oozie.wf.application.path" required="true"/>
38
				<PARAM description="objectStore service endpoint" name="import_content_object_store_location" required="true"/>
39
				<PARAM description="csv list of the available object stores subject to processing" name="import_content_objectstores_csv" required="true"/>
40
				<PARAM description="mdstore service location" name="import_mdstore_service_location" required="true"/>
41
				<PARAM description="mdstore ids for dataset records" name="import_dataset_mdstore_ids_csv" required="true"/>
42
				<PARAM description="target action set for referenced datasets" name="export_action_set_id_document_referencedDatasets" required="true"/>
43
				<PARAM description="target action set for referenced projects" name="export_action_set_id_document_referencedProjects" required="true"/>
44
				<PARAM description="target action set for research initiative outcome" name="export_action_set_id_document_research_initiative" required="true"/>
45
				<PARAM description="target action set for document similarities" name="export_action_set_id_document_similarities_standard" required="true"/>
46
				<PARAM description="target action set for document classification" name="export_action_set_id_document_classes" required="true"/>
47
				<PARAM description="target action set for document citations" name="export_action_set_id_document_referencedDocuments" required="true"/>
48
				<PARAM description="target action set for dataset entities" name="export_action_set_id_entity_dataset" required="true"/>
49
				<PARAM description="target action set for pdb entities" name="export_action_set_id_document_pdb" required="true"/>
50
				<PARAM description="software reference extraction actionset identifier" name="export_action_set_id_document_software_url" required="true"/>
51
				<PARAM description="target action set for affiliations" name="export_action_set_id_matched_doc_organizations" required="true"/>
52

  
53

  
54
				<!-- flags to enable/disable IIS modules -->
55
				<PARAM description="enable/disable the citation matching module" name="active_citationmatching" required="true"/>
56
				<PARAM description="enable/disable the document classification module" name="active_documentsclassification" required="true"/>
57
				<PARAM description="enable/disable the document similarity module" name="active_documentssimilarity" required="true"/>
58
				<PARAM description="enable/disable the dataset reference extraction module" name="active_referenceextraction_dataset" required="true"/>
59
				<PARAM description="enable/disable the project reference extracion module" name="active_referenceextraction_project" required="true"/>
60
				<PARAM description="enable/disable the research initiative extraction module" name="active_referenceextraction_researchinitiative" required="true"/>
61
				<PARAM description="enable/disable the protein data bank extraction module" name="active_referenceextraction_pdb" required="true"/>
62
				<PARAM description="enable/disable the software reference extraction algorithm" name="active_referenceextraction_software_url" required="true"/>
63
			</JOB_INTERFACE>
64
		</HADOOP_JOB>
65
		<STATUS>
66
			<LAST_SUBMISSION_DATE value="2017-07-02T09:39:51+00:00"/>
67
			<RUNNING_INSTANCES value="0"/>
68
			<CUMULATIVE_RUN value="13"/>
69
		</STATUS>
70
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
71
	</BODY>
72
</RESOURCE_PROFILE>
73

  
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.34/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/brokerEnrichmentJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="ffcc359c-555a-4d86-9ee2-b8ad212a0185_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="brokerEnrichmentJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that calculates the enrichment events based on the publications dedup results</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
		        <!-- I/O FORMAT -->
15
		        <PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/>
16
		        <PROPERTY key="mapreduce.outputformat.class" value="org.elasticsearch.hadoop.mr.EsOutputFormat"/>
17

  
18
		        <!-- MAPPER -->
19
		        <PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.broker.enrich.EnrichmentMapper"/>
20
		        <PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/>
21
		        <PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/>
22

  
23
		        <!-- REDUCER -->
24
		        <PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.broker.enrich.EnrichmentReducer" />
25
		        <PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" />
26
		        <PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" />
27

  
28

  
29
		        <!-- MISC -->
30
		        <PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/>
31
		        <PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
32
		        <PROPERTY key="mapreduce.map.speculative" value="false"/>
33
		        <PROPERTY key="mapreduce.reduce.speculative" value="false"/>
34

  
35
		        <PROPERTY key="dfs.blocksize" value="256M"/>
36
		        <PROPERTY key="mapred.reduce.tasks" value="1"/>
37

  
38
		        <!-- ES -->
39
		        <PROPERTY key="es.nodes"
40
		                  value="ip-90-147-167-137.ct1.garrservices.it:9200,ip-90-147-167-126.ct1.garrservices.it:9200,ip-90-147-167-13.ct1.garrservices.it:9200,ip-90-147-167-125.ct1.garrservices.it:9200"/>
41
		        <PROPERTY key="es.nodes.resolve.hostname" value="false"/>
42
		        <PROPERTY key="es.nodes.wan.only" value="true"/>
43
		        <PROPERTY key="es.resource" value="events_{infra}/event"/>
44
		        <PROPERTY key="es.input.json" value="yes"/>
45
		        <PROPERTY key="es.mapping.id" value="eventId"/>
46

  
47
		        <!-- BROKER -->
48
		        <PROPERTY key="broker.datasource.id.whitelist" value=""/>
49
		        <PROPERTY key="broker.datasource.id.blacklist" value=""/>
50
		        <PROPERTY key="broker.datasource.untrusted.oa.list" value="opendoar____::8b6dd7db9af49e67306feb59a8bdc52c"/>
51
		        <PROPERTY key="broker.datasource.type.whitelist" value="pubsrepository::unknown,pubsrepository::institutional,pubsrepository::thematic"/>
52

  
53

  
54
		        <!-- <PROPERTY key="user.name" value="dnet" /> -->
55
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
56
				
57
		<!--  	Uncomment to override the default lib path -->			
58
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
59
        	</STATIC_CONFIGURATION>
60
        	<JOB_INTERFACE>
61
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
62
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
63
        	</JOB_INTERFACE>
64
        	<SCAN>
65
        		<FILTERS operator="MUST_PASS_ALL">
66
        			<FILTER type="prefix" value="50" />
67
        		</FILTERS>
68
        		<FAMILIES>
69
	       			<FAMILY value="result" />
70
        			<FAMILY value="resultResult_dedup_isMergedIn" />
71
        		</FAMILIES>
72
        	</SCAN>
73
        </HADOOP_JOB>
74
        <STATUS>
75
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
76
            <RUNNING_INSTANCES value="0"/>
77
            <CUMULATIVE_RUN value="0" />
78
        </STATUS>
79
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
80
    </BODY>
81
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.34/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/personCsvJoinJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="3f544a36-f123-4f5c-acf4-7c25f6591ec4_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
	</HEADER>
9
	<BODY>
10
		<HADOOP_JOB name="personCsvJoinJob" type="mapreduce">
11
			<DESCRIPTION>map reduce job that joins person entities by "surname+first name letter" and serialise the output as csv</DESCRIPTION>
12
			<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat" />
17
				<PROPERTY key="mapreduce.output.lazyoutputformat.outputformat" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" />
18

  
19
				<!-- MAPPER -->
20
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.experiment.JoinPersonGroupMapper" />
21
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
22
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" />
23

  
24
				<!-- REDUCER -->
25
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.experiment.JoinPersonGroupReducer" />
26
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" />
27
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" />
28

  
29
				<!-- MISC -->
30
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />
31
				<PROPERTY key="mapreduce.map.speculative" value="false" />
32
				<PROPERTY key="mapred.reduce.tasks" value="10" />
33

  
34
				<PROPERTY key="hash.values.csv" value="manghip,pmanghi,corog,gcoro" />
35

  
36
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
37

  
38
				<!--  	Uncomment to override the default lib path -->
39
				<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
40

  
41
			</STATIC_CONFIGURATION>
42
			<JOB_INTERFACE>
43
				<PARAM name="mapred.input.dir" required="true" description="input sequence file" />
44
			</JOB_INTERFACE>
45
		</HADOOP_JOB>
46
		<STATUS>
47
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
48
			<RUNNING_INSTANCES value="0"/>
49
			<CUMULATIVE_RUN value="0" />
50
		</STATUS>
51
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
52
	</BODY>
53
</RESOURCE_PROFILE>
54

  
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.34/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/propagationCountryInstitutionalOrganizationSaveToFile.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="7d4ab172-6772-4960-abfa-037537f58872_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2018-09-10T18:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="propagationCountryFromInstitutionalRepositoriesSaveToFile" type="mapreduce">
11
            <DESCRIPTION>map reduce job that propagates country value of the organization to products belonging to institutional repositories related to the organization (pubsrepository:institutional)</DESCRIPTION>
12
            <STATIC_CONFIGURATION><!-- I/O FORMAT -->
13
                <PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/>
14
                <PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"/>
15

  
16
                <!-- MAPPER -->
17
                <PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.propagation.country.institutionalrepositories.PropagationCountryFromDsOrgResultMapper"/>
18
                <PROPERTY key="mapred.mapoutput.key.class" value="eu.dnetlib.data.mapreduce.hbase.propagation.country.institutionalrepositories.InstOrgKey"/>
19
                <PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text"/>
20

  
21
                <!-- REDUCER -->
22
                <PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.propagation.country.institutionalrepositories.PropagationCountryFromDsOrgResultFileReducer"/>
23
                <PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/>
24
                <PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/>
25

  
26
                <!-- PARTITIONER -->
27
                <PROPERTY key="mapred.partitioner.class" value="eu.dnetlib.data.mapreduce.hbase.propagation.country.institutionalrepositories.NaturalInstOrgKeyPartitioner"/>
28
                <PROPERTY key="mapreduce.partitioner.class" value="eu.dnetlib.data.mapreduce.hbase.propagation.country.institutionalrepositories.NaturalInstOrgKeyPartitioner"/>
29
                <PROPERTY key="mapred.output.value.groupfn.class" value="eu.dnetlib.data.mapreduce.hbase.propagation.country.institutionalrepositories.NaturalInstOrgKeyGroupingComparator"/>
30
                <PROPERTY key="mapreduce.output.value.groupfn.class" value="eu.dnetlib.data.mapreduce.hbase.propagation.country.institutionalrepositories.NaturalInstOrgKeyGroupingComparator"/>
31

  
32
                <!-- MISC -->
33
                <PROPERTY key="mapred.compress.map.output" value="true"/>
34
                <PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
35
                <PROPERTY key="mapreduce.map.speculative" value="false"/>
36
                <PROPERTY key="mapreduce.reduce.speculative" value="false"/>
37
                <PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/>
38
                <PROPERTY key="mapred.reduce.tasks" value="20"/>
39

  
40
                <!-- CSV of datasource typologies considered in the processing -->
41
                <PROPERTY key="datasource.types" value="pubsrepository::institutional"/>
42

  
43
                <!-- <PROPERTY key="user.name" value="dnet" /> --><!--  	Uncomment to override the default lib path -->
44
                <PROPERTY key="job.lib" value="/user/dnet/openaire/dnet-mapreduce-jobs-assembly-country_propagation-1.1.1-SNAPSHOT.jar"/>
45
            </STATIC_CONFIGURATION>
46
            <JOB_INTERFACE>
47
                <PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table"/>
48
                <PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table"/>
49
                <PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table"/>
50
                <PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table"/><!--<PARAM name="countryPropagation.conf" required="true" description="configuration for country propagation" />-->
51
                <PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" />
52
            </JOB_INTERFACE>
53
            <SCAN/>
54
        </HADOOP_JOB>
55
        <STATUS>
56
            <LAST_SUBMISSION_DATE value="2018-09-18T12:13:07+02:00"/>
57
            <RUNNING_INSTANCES value="1"/>
58
            <CUMULATIVE_RUN value="30"/>
59
        </STATUS>
60
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
61
    </BODY>
62
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.34/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/brokerEnrichmentProjectsJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="17dd747e-f5f2-45d5-8554-9f70343bfe55_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="brokerEnrichmentProjectsJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that calculates the enrichment events based on the publications dedup results</DESCRIPTION>
12

  
13
	        <STATIC_CONFIGURATION><!-- I/O FORMAT -->
14
		        <PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/>
15
		        <PROPERTY key="mapreduce.outputformat.class" value="org.elasticsearch.hadoop.mr.EsOutputFormat"/>
16

  
17
		        <!-- MAPPER -->
18
		        <PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.broker.enrich.ProjectEnrichmentMapper"/>
19
		        <PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/>
20
		        <PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/>
21

  
22
		        <!-- REDUCER -->
23
		        <PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.broker.enrich.ProjectEnrichmentReducer"/>
24
		        <PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/>
25
		        <PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/>
26

  
27
		        <!-- MISC -->
28
		        <PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/>
29
		        <PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
30
		        <PROPERTY key="mapreduce.map.speculative" value="false"/>
31
		        <PROPERTY key="mapreduce.reduce.speculative" value="false"/>
32
		        <PROPERTY key="dfs.blocksize" value="256M"/>
33
		        <PROPERTY key="mapred.reduce.tasks" value="4"/>
34

  
35
				<PROPERTY key="broker.baseurl.publication" value="https://explore.openaire.eu/search/publication?articleId=%s"/>
36
				<PROPERTY key="broker.baseurl.dataset" value="https://explore.openaire.eu/search/dataset?datasetId=%s"/>
37
				<PROPERTY key="broker.baseurl.software" value="https://explore.openaire.eu/search/software?softwareId=%s"/>
38
				<PROPERTY key="broker.baseurl.other" value="https://explore.openaire.eu/search/other?orpId=%s"/>
39

  
40

  
41
		        <!-- ES -->
42
		        <PROPERTY key="es.nodes" value="ip-90-147-167-137.ct1.garrservices.it:9200,ip-90-147-167-126.ct1.garrservices.it:9200,ip-90-147-167-13.ct1.garrservices.it:9200,ip-90-147-167-125.ct1.garrservices.it:9200"/>
43
		        <PROPERTY key="es.nodes.resolve.hostname" value="false"/>
44
		        <PROPERTY key="es.batch.write.retry.count " value="10"/>
45
		        <PROPERTY key="es.batch.size.entries " value="500"/>
46
		        <PROPERTY key="es.nodes.wan.only" value="true"/>
47
		        <PROPERTY key="es.resource" value="events_{infra}/event"/>
48
		        <PROPERTY key="es.input.json" value="yes"/>
49
		        <PROPERTY key="es.mapping.id" value="eventId"/>
50

  
51
		        <!-- BROKER -->
52
		        <PROPERTY key="broker.datasource.id.whitelist" value=""/>
53
		        <PROPERTY key="broker.datasource.id.blacklist" value=""/>
54
		        <PROPERTY key="broker.datasource.untrusted.oa.list" value="opendoar____::8b6dd7db9af49e67306feb59a8bdc52c"/>
55
		        <PROPERTY key="broker.datasource.type.whitelist" value="pubsrepository::unknown,pubsrepository::institutional,pubsrepository::thematic"/>
56

  
57
		        <!-- <PROPERTY key="user.name" value="dnet" /> -->
58
		        <!--  	Uncomment to override the default lib path -->
59
		        <!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
60
	        </STATIC_CONFIGURATION>
61
	        <JOB_INTERFACE>
62
		        <PARAM description="source hbase table" name="hbase.mapred.inputtable" required="true"/>
63
		        <PARAM description="source hbase table" name="hbase.mapreduce.inputtable" required="true"/>
64
	        </JOB_INTERFACE>
65
        	<SCAN>
66
        		<FILTERS operator="MUST_PASS_ONE">
67
			        <FILTER type="prefix" value="40" />
68
        			<FILTER type="prefix" value="50" />
69
        		</FILTERS>
70
        		<FAMILIES>
71
	       			<FAMILY value="result" />
72
			        <FAMILY value="project" />
73
        			<FAMILY value="resultResult_dedup_isMergedIn" />
74
			        <FAMILY value="resultProject_outcome_produces" />
75
			        <FAMILY value="resultProject_outcome_isProducedBy" />
76
        		</FAMILIES>
77
        	</SCAN>
78
        </HADOOP_JOB>
79
        <STATUS>
80
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
81
            <RUNNING_INSTANCES value="0"/>
82
            <CUMULATIVE_RUN value="0" />
83
        </STATUS>
84
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
85
    </BODY>
86
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.34/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupMinDistGraphJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
				value="de888da6-2d10-4d42-a624-a44d4083414a_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<HADOOP_JOB name="dedupMinDistGraphJob" type="mapreduce">
12
			<DESCRIPTION>map reduce job that finds the minimum vertex in each connected component in the input graph (as adjacency lists)</DESCRIPTION>
13
			<STATIC_CONFIGURATION>
14

  
15
				<!-- I/O FORMAT -->
16
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"/>
17
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"/>
18

  
19
				<!-- MAPPER -->
20
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.MindistSearchMapper"/>
21
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/>
22
				<PROPERTY key="mapred.mapoutput.value.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable"/>
23

  
24
				<!-- REDUCER -->
25
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.MindistSearchReducer"/>
26
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/>
27
				<PROPERTY key="mapred.output.value.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable"/>
28

  
29
				<!-- MISC -->
30
				<PROPERTY key="mapred.compress.map.output" value="false"/>
31
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/>
32
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
33
				<PROPERTY key="mapreduce.map.speculative" value="false"/>
34
				<PROPERTY key="mapreduce.reduce.speculative" value="false"/>
35

  
36
				<PROPERTY key="mapred.reduce.tasks" value="1"/>
37
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
38

  
39
				<!--  	Uncomment to override the default lib path -->
40
				<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
41
			</STATIC_CONFIGURATION>
42
			<JOB_INTERFACE>
43
				<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs"/>
44
				<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs"/>
45
			</JOB_INTERFACE>
46
			<SCAN>
47
				<FILTERS/>
48
				<FAMILIES/>
49
			</SCAN>
50
		</HADOOP_JOB>
51
		<STATUS>
52
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
53
			<RUNNING_INSTANCES value="0"/>
54
			<CUMULATIVE_RUN value="0"/>
55
		</STATUS>
56
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
57
	</BODY>
58
</RESOURCE_PROFILE>
59

  
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.34/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupSimilarity2GraphJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
				value="5907741a-d97e-41ea-9dbe-963209aa58d9_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<HADOOP_JOB name="dedupSimilarity2GraphJob" type="mapreduce">
12
			<DESCRIPTION>map only job that scans a given entity type and creates the similarRel graph as adjacency lists</DESCRIPTION>
13
			<STATIC_CONFIGURATION>
14

  
15
				<!-- I/O FORMAT -->
16
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/>
17
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"/>
18

  
19
				<!-- MAPPER -->
20
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.HBaseToSimilarityGraphMapper"/>
21
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/>
22
				<PROPERTY key="mapred.mapoutput.value.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable"/>
23

  
24
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/>
25
				<PROPERTY key="mapred.output.value.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable"/>
26

  
27
				<!-- MISC -->
28
				<PROPERTY key="mapred.compress.map.output" value="false"/>
29
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/>
30
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
31
				<PROPERTY key="mapreduce.map.speculative" value="false"/>
32
				<PROPERTY key="mapreduce.reduce.speculative" value="false"/>
33

  
34
				<PROPERTY key="mapred.reduce.tasks" value="0"/>
35
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
36

  
37
				<!--  	Uncomment to override the default lib path -->
38
				<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
39
			</STATIC_CONFIGURATION>
40
			<JOB_INTERFACE>
41
				<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table"/>
42
				<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table"/>
43
				<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs"/>
44
			</JOB_INTERFACE>
45
			<SCAN>
46
				<FILTERS operator="MUST_PASS_ALL">
47
					<FILTER type="prefix" param="entityTypeId"/>
48
				</FILTERS>
49
				<FAMILIES>
50
					<FAMILY value="resultResult_dedupSimilarity_isSimilarTo"/>
51
					<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo"/>
52
				</FAMILIES>
53
			</SCAN>
54
		</HADOOP_JOB>
55
		<STATUS>
56
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
57
			<RUNNING_INSTANCES value="0"/>
58
			<CUMULATIVE_RUN value="0"/>
59
		</STATUS>
60
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
61
	</BODY>
62
</RESOURCE_PROFILE>
63

  
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.34/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/DnetHadoopTransformationJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="bf711c9b-8c92-42ad-9fc2-797815e13760_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2019-04-11T11:54:24+02:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dnetHadoopTrasnformation" type="oozie">
11
            <DESCRIPTION>IIS preprocessing</DESCRIPTION>
12
            <STATIC_CONFIGURATION><!-- Cluster wide -->
13
                <PROPERTY key="queueName" value="default"/>
14
                <PROPERTY key="user.name" value="dnet"/><!-- Runtime -->
15
                <PROPERTY key="oozie.wf.application.path" value="/user/dnet/oozieApp/transformation"/>
16
                <PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/>
17
                <PROPERTY key="oozie.use.system.libpath" value="True"/>
18
                <PROPERTY key="security_enabled" value="False"/>
19
                <PROPERTY key="dryrun" value="True"/>
20
                <PROPERTY key="oozie.action.sharelib.for.spark" value="spark2"/>
21
                <PROPERTY key="metadataEncoding" value="XML"/>
22
            </STATIC_CONFIGURATION>
23
            <JOB_INTERFACE>
24
                <PARAM description="the path of the input MDStore" name="mdstoreInputPath" required="true"/>
25
                <PARAM description="the path of the cleaned mdstore" name="mdstoreOutputPath" required="true"/>
26
                <PARAM description="The transformation Rule to apply" name="transformationRule" required="true"/>
27
                <PARAM description="The timestamp of the collection date" name="timestamp" required="true"/>
28
                <PARAM description="the Dnet Workflow Identifier" name="workflowId" required="true"/>
29
            </JOB_INTERFACE>
30
        </HADOOP_JOB>
31
        <STATUS>
32
            <LAST_SUBMISSION_DATE value="2019-04-11T15:34:37+02:00"/>
33
            <RUNNING_INSTANCES value="2"/>
34
            <CUMULATIVE_RUN value="71"/>
35
        </STATUS>
36
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
37
    </BODY>
38
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.34/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/publicationAnalysisJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="721fd82c-6444-41c9-ba23-5eb0652ddaeb_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="publicationAnalysisJob" type="mapreduce">
11
 			<DESCRIPTION>map only job that analyses publication features</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13
        	
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.NullOutputFormat" />
17

  
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.experiment.PublicationAnalysisMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.NullWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.NullWritable" />
22
			
23
				<!-- MISC -->
24
				
25
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />
26
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
27
				<PROPERTY key="mapreduce.map.speculative" value="false" />
28
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />	
29
			
30
				<PROPERTY key="mapred.reduce.tasks" value="0" />
31
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
32
				
33
		<!--  	Uncomment to override the default lib path -->			
34
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
35
        	</STATIC_CONFIGURATION>
36
        	<JOB_INTERFACE>
37
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
38
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
39
        	</JOB_INTERFACE>
40
        	<SCAN>
41
        		<FILTERS operator="MUST_PASS_ALL">
42
        			<FILTER type="prefix" value="50" />
43
        		</FILTERS>
44
        		<FAMILIES>
45
	       			<FAMILY value="result" />
46
        		</FAMILIES>
47
        	</SCAN>
48
        </HADOOP_JOB>
49
        <STATUS>
50
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
51
            <RUNNING_INSTANCES value="0"/>
52
            <CUMULATIVE_RUN value="0" />
53
        </STATUS>
54
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
55
    </BODY>
56
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.34/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupRootsPersonExportJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="4c63a9ab-057f-442c-8da2-9b956c41e645_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupRootsPersonExportJob" type="mapreduce">
11
 			<DESCRIPTION>map only job that exports the representative publications as json</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13
        	
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" />
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.gt.RootPersonExportMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" />
22
			
23
			
24
				<!-- MISC -->
25

  
26
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
27
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
28
				<PROPERTY key="mapreduce.map.speculative" value="false" />
29
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />	
30

  
31
				<PROPERTY key="dfs.blocksize" value="256M" />
32
			
33
				<PROPERTY key="mapred.reduce.tasks" value="1" />
34
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
35
				
36
		<!--  	Uncomment to override the default lib path -->			
37
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
38
        	</STATIC_CONFIGURATION>
39
        	<JOB_INTERFACE>
40
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
41
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
42
        		
43
        		<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" />         		
44
        	</JOB_INTERFACE>
45
        	<SCAN>
46
        		<FILTERS operator="MUST_PASS_ALL">
47
        			<FILTER type="prefix" param="entityTypeId" />
48
        		</FILTERS>
49
        		<FAMILIES>
50
	       			<FAMILY param="entityType" />
51
        		</FAMILIES>
52
        	</SCAN>
53
        </HADOOP_JOB>
54
        <STATUS>
55
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
56
            <RUNNING_INSTANCES value="0"/>
57
            <CUMULATIVE_RUN value="0" />
58
        </STATUS>
59
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
60
    </BODY>
61
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.34/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/mdStoreHdfsImportAuthorsJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="a53df5e3-8a38-4d3f-8f67-bf9fc43279a6_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="authorImportRecordsJob" type="mapreduce">
11
 			<DESCRIPTION>map only job that maps xml metada records from a sequence file into an hbase table</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13
        	
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />
17
				
18
				<!-- MAPPER -->        	
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataimport.AuthorImportRecordsMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put" />
22
			
23
				<!-- MISC -->
24
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />
25
				<PROPERTY key="mapreduce.map.speculative" value="false" />
26
				<PROPERTY key="mapred.reduce.tasks" value="0" />
27
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
28
				
29
		<!--  	Uncomment to override the default lib path -->			
30
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
31
				
32
        	</STATIC_CONFIGURATION>
33
        	<JOB_INTERFACE>
34
        		<PARAM name="mapred.input.dir" required="true" description="input sequence file" />
35
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
36
        		<PARAM name="hbase.import.xslt" required="true" description="mapping" />
37
        	</JOB_INTERFACE>
38
        </HADOOP_JOB>
39
        <STATUS>
40
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
41
            <RUNNING_INSTANCES value="0"/>
42
            <CUMULATIVE_RUN value="0" />
43
        </STATUS>
44
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
45
    </BODY>
46
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.34/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/sqoopStatsUpdateJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="bf0ab07b-36bf-4164-ab73-342bfb11e51a_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="StatsExportJob" type="oozie">
11
            <DESCRIPTION>Job for importing data from HBASE to the relational Stats Database</DESCRIPTION>
12
            <STATIC_CONFIGURATION>
13
			
14
				<!-- Cluster wide -->
15
                <PROPERTY key="queueName" value="default"/>
16
                <PROPERTY key="user.name" value="dnet"/> <!-- username = sqoop?? -->
17
                <PROPERTY key="workingDir" value="/user/dnet/lib/stats/working_dir"/>
18
                <PROPERTY key="numReducers" value="1"/>
19
                
20
				<PROPERTY key="oozie.wf.application.path" value="hdfs://nmis-hadoop-cluster/user/eri.katsari/stats/oozie_app"/><!-- edit this property! -->
21
				<PROPERTY key="Stats_db_Url" value="jdbc:postgresql://node1.t.openaire.research-infrastructures.eu:5432/stats"/><!-- complete the jdbc url with the actual value! -->
22
				<PROPERTY key="Stats_db_User" value="sqoop"/>
23
				<PROPERTY key="Stats_db_Pass" value="sqoop"/>
24
				<PROPERTY key="Stats_db_Driver" value="org.postgresql.Driver"/>
25
				<PROPERTY key="Stats_db_table_map" value="datasourceLanguage=datasource_languages,datasource=datasource,project=project,result=result,organization=organization,datasourceOrganization=datasource_organizations,datasourceTopic=datasource_topics,projectOrganization=project_organizations,resultClaim=result_claims,resultClassification=result_classifications,resultConcept=result_concepts,resultLanguage=result_languages,resultOrganization=result_organizations,resultResult=result_results,resultProject=project_results,resultTopic=result_topics,category=category,context=context,claim=claim,concept=concept,datasourceLanguage=datasource_languages,resultLanguage=result_languages,resultDatasource=result_datasources"/>
26
				<PROPERTY key="Stats_sqoop_RecsPerStatement" value="10000"/>
27
				<PROPERTY key="Stats_sqoop_StatementPerTrans" value="1000000"/>
28
				<PROPERTY key="Stats_sqoop_ReducersCount" value="4"/>
29
				<PROPERTY key="Stats_output_Path" value="/tmp/stats/"/>
30
				<PROPERTY key="Stats_null_String_Field" value="null"/>
31
				<PROPERTY key="Stats_null_Numeric_Field" value="null"/>
32
				<PROPERTY key="Stats_enclosing_Character" value="#"/>
33
				<PROPERTY key="Stats_delim_Character" value="!"/>
34
				<PROPERTY key="out1" value="datasource"/>
35
				<PROPERTY key="out2" value="project"/>
36
				<PROPERTY key="out3" value="organization"/>
37
				<PROPERTY key="out4" value="datasourceOrganization"/>
38
				<PROPERTY key="out5" value="datasourceTopic"/>
39
				<PROPERTY key="out6" value="datasourceLanguage"/>
40
				<PROPERTY key="out7" value="projectOrganization"/>
41
				<PROPERTY key="out8" value="resultClaim"/>
42
				<PROPERTY key="out9" value="resultClassification"/>
43
				<PROPERTY key="out10" value="resultConcept"/>
44
				<PROPERTY key="out11" value="resultLanguage"/>
45
				<PROPERTY key="out12" value="resultOrganization"/>
46
				<PROPERTY key="out13" value="resultResult"/>
47
				<PROPERTY key="out14" value="resultProject"/>
48
				<PROPERTY key="out15" value="category"/>
49
				<PROPERTY key="out16" value="resultTopic"/>
50
				<PROPERTY key="out17" value="resultDatasource"/>
51
				<PROPERTY key="out18" value="result"/>
52
				<PROPERTY key="out19" value="claim"/>
53
				<PROPERTY key="out20" value="concept"/>
54
            </STATIC_CONFIGURATION>
55
            <JOB_INTERFACE>
56
                <PARAM name="nameNode" required="true" description="hdfs name node"/>
57
                <PARAM name="jobTracker" required="true" description="job tracker name"/>
58
                <PARAM name="Stats_Hbase_Source_Table" required="true" description="Hbase Table with Protobuffs."/>
59
                <PARAM name="Stats_indexConf" required="true" description="Index Entity Links configuration."/>
60
                <PARAM name="isLookupEndpoint" required="true" description="IS lookup service endpoint"/>
61
            </JOB_INTERFACE>
62
        </HADOOP_JOB>
63
        <STATUS>
64
            <LAST_SUBMISSION_DATE value="2014-11-14T19:57:25+00:00"/>
65
            <RUNNING_INSTANCES value="0"/>
66
            <CUMULATIVE_RUN value="75"/>
67
        </STATUS>
68
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
69
    </BODY>
70
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.34/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/coauthorUpdateJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="6d91b311-a7fd-48ff-98d2-1fed70850e3a_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="coauthorUpdateJob" type="mapreduce">
11
 			<DESCRIPTION>update coauthors using a map {merged author id --> anchorId}</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.inputformat.class"	value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
17
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />			
18
        	
19
        		<!-- MAPPER -->
20
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.gt.CoAuthorUpdateMapper" />
21
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put" />
23
				
24
				
25
				<!-- MISC -->
26
				<PROPERTY key="mapred.output.compress" value="false" />
27
				<PROPERTY key="mapred.compress.map.output" value="true" />	
28
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
29
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
30
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
31
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />
32
							
33
				<PROPERTY key="mapred.reduce.tasks" value="0" />
34
				
35
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
36
				
37
		<!--  	Uncomment to override the default lib path -->			
38
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
39
        	</STATIC_CONFIGURATION>
40
        	<JOB_INTERFACE>
41
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
42
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
43
        		<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" /> 
44
        	</JOB_INTERFACE>
45
        	<SCAN>
46
                <FILTERS operator="MUST_PASS_ALL">
47
                    <FILTER type="prefix" value="30"/>
48
                </FILTERS>
49
                <FAMILIES>
50
                    <FAMILY value="person"/>
51
                </FAMILIES>
52
        	</SCAN>
53
        </HADOOP_JOB>
54
        <STATUS>
55
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
56
            <RUNNING_INSTANCES value="0"/>
57
            <CUMULATIVE_RUN value="0" />
58
        </STATUS>
59
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
60
    </BODY>
61
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.34/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/predatoryJournalsJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="bab7a0b8-66b4-4e1a-a8d4-0bb9b3493f90_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
	</HEADER>
9
	<BODY>
10
		<HADOOP_JOB name="predatoryJournalsJob" type="mapreduce">
11
			<DESCRIPTION>map reduce job that counts the number of publications from journals linked to EC projects</DESCRIPTION>
12
			<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" />
17

  
18
				<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.misc.PredatoryJournalsMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22

  
23
				<!-- REDUCER -->
24
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.misc.PredatoryJournalsReducer" />
25
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" />
26
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" />
27

  
28
				<!-- MISC -->
29
				<PROPERTY key="mapred.output.compress" value="false" />
30
				<PROPERTY key="mapred.compress.map.output" value="true" />
31
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />
32
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />
33
				<PROPERTY key="mapreduce.map.speculative" value="false" />
34
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />
35

  
36
				<PROPERTY key="mapred.reduce.tasks" value="1" />
37
				<PROPERTY key="dfs.blocksize" value="256M" />
38

  
39
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
40

  
41
				<!--  	Uncomment to override the default lib path -->
42
				<PROPERTY key="job.lib" value="/lib/dnet/snapshots/dnet-mapreduce-jobs-assembly-1.1.4-BETA-SNAPSHOT-predatoryJournals.jar"/>
43
			</STATIC_CONFIGURATION>
44
			<JOB_INTERFACE>
45
				<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
46
				<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
47
				<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" />
48
			</JOB_INTERFACE>
49
			<SCAN>
50
				<FILTERS />
51
				<FAMILIES />
52
			</SCAN>
53
		</HADOOP_JOB>
54
		<STATUS>
55
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
56
			<RUNNING_INSTANCES value="0"/>
57
			<CUMULATIVE_RUN value="0" />
58
		</STATUS>
59
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
60
	</BODY>
61
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.34/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupPersonJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="29638605-235b-4cc1-9bf5-a5dd2fc84915_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupPersonJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that scans a given entity type and creates the similarRel graph</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT  -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.SimpleDedupPersonMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22
				
23
				<!-- REDUCER -->
24
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.SimpleDedupPersonReducer" />
25
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />				
26
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Writable" />				
27
				
28
				<!-- MISC -->				
29
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
30
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
31
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
32
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />				
33
			
34
				<PROPERTY key="mapred.reduce.tasks" value="1000" />
35
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
36
				
37
		<!--  	Uncomment to override the default lib path -->			
38
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
39
        	</STATIC_CONFIGURATION>
40
        	<JOB_INTERFACE>
41
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
42
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
43
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
44
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />     
45
        	</JOB_INTERFACE>
46
        	<SCAN>
47
        		<FILTERS operator="MUST_PASS_ALL">
48
        			<FILTER type="prefix" param="entityTypeId" />
49
        		</FILTERS>
50
        		<FAMILIES>
51
        			<FAMILY param="entityType" />
52
        		</FAMILIES>
53
        	</SCAN>
54
        </HADOOP_JOB>
55
        <STATUS>
56
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
57
            <RUNNING_INSTANCES value="0"/>
58
            <CUMULATIVE_RUN value="0" />
59
        </STATUS>
60
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
61
    </BODY>
62
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.34/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupRootsExportJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="53f2a9b4-adf3-4ceb-9308-d88b53dc44c5_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupRootsExportJob" type="mapreduce">
11
 			<DESCRIPTION>map only job that exports the representative publications as json</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13
        	
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" />
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.gt.RootExportMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" />
22
			
23
			
24
				<!-- MISC -->
25

  
26
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
27
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
28
				<PROPERTY key="mapreduce.map.speculative" value="false" />
29
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />	
30

  
31
				<PROPERTY key="dfs.blocksize" value="256M" />
32
			
33
				<PROPERTY key="mapred.reduce.tasks" value="1" />
34
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
35
				
36
		<!--  	Uncomment to override the default lib path -->			
37
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
38
        	</STATIC_CONFIGURATION>
39
        	<JOB_INTERFACE>
40
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
41
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
42
        		
43
        		<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" />         		
44
        	</JOB_INTERFACE>
45
        	<SCAN>
46
        		<FILTERS operator="MUST_PASS_ALL">
47
        			<FILTER type="prefix" param="entityTypeId" />
48
        		</FILTERS>
49
        		<FAMILIES>
50
	       			<FAMILY param="entityType" />
51
        		</FAMILIES>
52
        	</SCAN>
53
        </HADOOP_JOB>
54
        <STATUS>
55
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
56
            <RUNNING_INSTANCES value="0"/>
57
            <CUMULATIVE_RUN value="0" />
58
        </STATUS>
59
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
60
    </BODY>
61
</RESOURCE_PROFILE>
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff