Project

General

Profile

« Previous | Next » 

Revision 54159

[maven-release-plugin] copy for tag dnet-openaireplus-profiles-1.0.27

View differences:

modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupMarkDeletedEntityJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
				value="667fe203-ee51-4dff-8c9c-b90e66e96eb4_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<HADOOP_JOB name="dedupMarkDeletedEntityJob" type="mapreduce">
12
			<DESCRIPTION>map only job that closes the similarity mesh</DESCRIPTION>
13
			<STATIC_CONFIGURATION>
14

  
15
				<!-- I/O FORMAT -->
16
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/>
17
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat"/>
18

  
19
				<!-- MAPPER -->
20
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupMarkDeletedEntityMapper"/>
21
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/>
22
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put"/>
23

  
24
				<!-- MISC -->
25
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/>
26
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
27
				<PROPERTY key="mapreduce.map.speculative" value="false"/>
28
				<PROPERTY key="mapreduce.reduce.speculative" value="false"/>
29

  
30
				<PROPERTY key="mapred.reduce.tasks" value="0"/>
31
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
32

  
33
				<!--  	Uncomment to override the default lib path -->
34
				<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
35
			</STATIC_CONFIGURATION>
36
			<JOB_INTERFACE>
37
				<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table"/>
38
				<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table"/>
39
				<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table"/>
40
				<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table"/>
41
			</JOB_INTERFACE>
42
			<SCAN>
43
				<FILTERS operator="MUST_PASS_ALL">
44
					<FILTER type="prefix" param="entityTypeId"/>
45
				</FILTERS>
46
				<FAMILIES>
47
					<FAMILY param="entityType"/>
48
					<FAMILY value="resultResult_dedup_isMergedIn"/>
49
					<FAMILY value="organizationOrganization_dedup_isMergedIn"/>
50
				</FAMILIES>
51
			</SCAN>
52
		</HADOOP_JOB>
53
		<STATUS>
54
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
55
			<RUNNING_INSTANCES value="0"/>
56
			<CUMULATIVE_RUN value="0"/>
57
		</STATUS>
58
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
59
	</BODY>
60
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupSimilarity2HdfsActionsJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="baa63c0c-0ff3-4a15-93c1-e361800e9ca8_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
	</HEADER>
9
	<BODY>
10
		<HADOOP_JOB name="dedupSimilarity2HdfsActionsJob" type="mapreduce">
11
			<DESCRIPTION>map reduce job that scans a given entity type and creates the similarRel graph</DESCRIPTION>
12
			<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" />
17

  
18
				<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupSimilarityToHdfsActionsMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" />
22

  
23
				<!-- OUTPUT -->
24
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" />
25
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" />
26

  
27
				<!-- MISC -->
28
				<PROPERTY key="mapred.compress.map.output" value="true" />
29
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />
30
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />
31
				<PROPERTY key="mapreduce.map.speculative" value="false" />
32
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />
33

  
34
				<PROPERTY key="mapred.output.compress" value="true"/>
35
				<PROPERTY key="mapred.output.compression.type" value="BLOCK"/>
36

  
37
				<PROPERTY key="mapred.reduce.tasks" value="10" />
38
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
39

  
40
				<!--  	Uncomment to override the default lib path -->
41
				<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
42
			</STATIC_CONFIGURATION>
43
			<JOB_INTERFACE>
44
				<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
45
				<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
46
				<PARAM name="mapred.output.dir" required="true" description="target hbase table" />
47
				<PARAM name="dedup.conf" required="true" description="dedup configuration" />
48
				<PARAM name="rawSetId" required="true" description="raw set identifier" />
49
				<PARAM name="similarityCF" required="true" description="similarity column family name" />
50
			</JOB_INTERFACE>
51
			<SCAN>
52
				<FILTERS operator="MUST_PASS_ALL">
53
					<FILTER type="prefix" param="entityTypeId" />
54
				</FILTERS>
55
				<FAMILIES>
56
					<FAMILY param="entityType" />
57
					<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" />
58
					<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" />
59
				</FAMILIES>
60
			</SCAN>
61
		</HADOOP_JOB>
62
		<STATUS>
63
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
64
			<RUNNING_INSTANCES value="0"/>
65
			<CUMULATIVE_RUN value="0" />
66
		</STATUS>
67
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
68
	</BODY>
69
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisPreprocessingJobV2.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="51e1660e-b1fe-4389-96b5-11a522b78c15_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2016-10-15T12:03:30+00:00"/>
8
	</HEADER>
9
	<BODY>
10
		<HADOOP_JOB name="iisPreprocessingJobV2" type="oozie">
11
			<DESCRIPTION>IIS preprocessing</DESCRIPTION>
12
			<STATIC_CONFIGURATION>
13
				<!-- Cluster wide -->
14
				<PROPERTY key="user.name" value="dnet.production"/>
15

  
16
				<!-- Runtime -->
17
				<PROPERTY key="metadataextraction_default_cache_location" value="/user/${user.name}/iis/cache/metadataextraction"/>
18
				<!-- skipping _default chunk from property name since 2017.02.21 -->
19
				<PROPERTY key="metadataextraction_cache_location" value="/user/${user.name}/iis/cache/metadataextraction"/>
20
				<PROPERTY key="ingest_pmc_cache_location" value="/user/${user.name}/iis/cache/ingestpmc"/>
21
				<PROPERTY key="export_trust_level_threshold_document_referencedDatasets" value="0.0"/>
22
				<PROPERTY key="export_trust_level_threshold_document_referencedProjects" value="0.0"/>
23
				<PROPERTY key="output_remote_location" value="webhdfs://namenode.hadoop.dm.openaire.eu:50071/var/lib/dnet/actionManager_PROD"/>
24
				<PROPERTY key="reports_external_path" value="/user/${user.name}/iis/reports/${execution_environment}"/>
25
				<PROPERTY key="oozieServiceLoc" value="http://iis-cdh5-test-m3:11000/oozie"/>
26
				<PROPERTY key="import_project_stream_endpoint_url" value="http://services.openaire.eu:8980/provision/mvc/openaire/export/streamProjectDetails.do"/>
27
			</STATIC_CONFIGURATION>
28
			<JOB_INTERFACE>
29
				<PARAM description="mdstore service location" name="import_content_object_store_location" required="true"/>
30
				<PARAM description="mdstore service location" name="import_mdstore_service_location" required="true"/>
31
				<PARAM description="mdstore id for dataset records" name="import_dataset_mdstore_ids_csv" required="true"/>
32
				<PARAM description="mdstore id for WoS records" name="import_wos_mdstore_id" required="true"/>
33
				<PARAM description="database service endpoint" name="import_database_service_location" required="true"/>
34
				<PARAM description="objecstore ids subject to dataset reference extraction" name="import_content_datacite_objectstores_csv" required="true"/>
35
				<PARAM description="objecstore ids for WoS plaintext" name="import_content_wos_plaintext_objectstores_csv" required="true"/>
36
				<PARAM description="oozie job application absolute path" name="oozie.wf.application.path" required="true"/>
37
				<PARAM description="target action set for project references" name="export_action_set_id_document_referencedProjects" required="true"/>
38
				<PARAM description="target action set for dataset references" name="export_action_set_id_document_referencedDatasets" required="true"/>
39
				<PARAM description="target action set for WoS entities" name="export_action_set_id_entity_wos" required="true"/>
40
				<PARAM description="target action set for dataset entities" name="export_action_set_id_entity_dataset" required="true"/>
41
			</JOB_INTERFACE>
42
		</HADOOP_JOB>
43
		<STATUS>
44
			<LAST_SUBMISSION_DATE value="2017-07-20T07:13:23+00:00"/>
45
			<RUNNING_INSTANCES value="1"/>
46
			<CUMULATIVE_RUN value="23"/>
47
		</STATUS>
48
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
49
	</BODY>
50
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/prepareBrokerDataJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="e0f1b6fa-e2bd-445d-9c37-e48ed8a8561b_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="prepareBrokerDataJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that joins the entities on the hbase table and produces a sequence file containig the joined OAF objects</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.index.PrepareFeedMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22
				
23
				<!-- REDUCER -->
24
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.index.PrepareFeedReducer" />
25
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" />				
26
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" />				
27
				
28
				<!-- MISC -->
29
				<PROPERTY key="mapred.output.compress" value="false" />						
30
				<PROPERTY key="mapred.compress.map.output" value="true" />	
31
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
32
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
33
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
34
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />
35
							
36
				<PROPERTY key="mapred.reduce.tasks" value="100" />
37
				<PROPERTY key="dfs.blocksize" value="256M" />
38
				
39
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
40
				
41
		<!--  	Uncomment to override the default lib path -->			
42
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
43
        	</STATIC_CONFIGURATION>
44
        	<JOB_INTERFACE>
45
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
46
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
47
        		<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" /> 
48
        		
49
        		<PARAM name="index.entity.links" required="true" description="entity joiner configuration" /> 
50
        		<PARAM name="contextmap" required="true" description="context map (ContextDSResources)" />        		
51
        	</JOB_INTERFACE>
52
        	<SCAN>
53
        		<FILTERS />
54
        		<FAMILIES />
55
        	</SCAN>
56
        </HADOOP_JOB>
57
        <STATUS>
58
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
59
            <RUNNING_INSTANCES value="0"/>
60
            <CUMULATIVE_RUN value="0" />
61
        </STATUS>
62
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
63
    </BODY>
64
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisMainJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
				value="f6e4cbcd-b173-4f8d-9205-e64ba15f03ad_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<HADOOP_JOB name="iisMainJob" type="oozie">
12
			<DESCRIPTION>IIS main workflow</DESCRIPTION>
13
			<STATIC_CONFIGURATION>
14

  
15
				<!-- Cluster wide -->
16
				<PROPERTY key="queueName" value="default"/>
17
				<PROPERTY key="user.name" value="dnet.beta"/>
18

  
19
				<!-- Runtime -->
20
				<PROPERTY key="match_content_with_metadata" value="true"/>
21
				<PROPERTY key="import_hbase_approved_datasources_csv" value="$UNDEFINED$"/>
22
				<PROPERTY key="import_hbase_input_table" value="information_space-2014-11-05"/>
23
				<!--<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction"/> -->
24
				<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction_updated_model"/>
25

  
26
				<PROPERTY key="export_action_hbase_table_initialize" value="true"/>
27
				<PROPERTY key="import_content_connection_timeout" value="180000"/>
28
				<PROPERTY key="import_content_read_timeout" value="180000"/>
29
				<PROPERTY key="import_resultset_client_read_timeout" value="180000"/>
30

  
31
				<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/>
32
				<PROPERTY key="workingDir" value="/tmp/integration/working_dir/main"/>
33
				<PROPERTY key="metadataextraction_excluded_checksums"
34
				          value="1e5b574109da731f4918c7f91fc24864,bea4728578070c3d66774bf9454d41fe,da458477233b5561ae47042aa2a73086"/>
35
				<PROPERTY key="export_documentssimilarity_threshold" value="0.7"/>
36
				<PROPERTY key="export_trust_level_threshold_document_classes" value="0.5"/>
37
				<PROPERTY key="export_trust_level_threshold_document_referencedDatasets" value="0.0"/>
38
				<PROPERTY key="export_trust_level_threshold_document_referencedProjects" value="0.0"/>
39
				<PROPERTY key="export_trust_level_threshold_document_pdb" value="0.0"/>
40
				<PROPERTY key="output_remote_location" value="hdfs://namenode.hadoop.dm.openaire.eu/var/lib/dnet/actionManager_BETA"/>
41
			</STATIC_CONFIGURATION>
42
			<JOB_INTERFACE>
43
				<PARAM name="import_content_object_store_location" required="true" description="objectStore service endpoint"/>
44
				<PARAM name="import_content_objectstores_csv" required="true" description="csv list of the available object stores subject to processing"/>
45
				<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location"/>
46
				<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore ids for dataset records"/>
47
				<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table"/>
48
				<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum"/>
49
				<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port"/>
50
				<PARAM name="nameNode" required="true" description="hdfs name node"/>
51
				<PARAM name="jobTracker" required="true" description="job tracker name"/>
52
				<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path"/>
53
				<PARAM description="target action set for referenced projects" name="export_action_set_id_document_referencedProjects" required="true"/>
54
				<PARAM description="target action set for referenced datasets" name="export_action_set_id_document_referencedDatasets" required="true"/>
55
				<PARAM description="target action set for research initiative outcome" name="export_action_set_id_document_research_initiative"
56
				       required="true"/>
57
				<PARAM description="target action set for document similarities" name="export_action_set_id_document_similarities_standard" required="true"/>
58
				<PARAM description="target action set for document statistics" name="export_action_set_id_document_statistics" required="true"/>
59
				<PARAM description="target action set for document classification" name="export_action_set_id_document_classes" required="true"/>
60
				<PARAM description="target action set for document citations" name="export_action_set_id_document_referencedDocuments" required="true"/>
61
				<PARAM description="target action set for dataset entities" name="export_action_set_id_entity_dataset" required="true"/>
62
				<PARAM description="target action set for pdb entities" name="export_action_set_id_document_pdb" required="true"/>
63

  
64
				<!-- flags to enable/disable IIS modules -->
65
				<PARAM name="active_metadataextraction_export" required="true" description="enable/disable the affiliation matching module"/>
66
				<PARAM name="active_citationmatching" required="true" description="enable/disable the citation matching module"/>
67
				<PARAM name="active_documentsclassification" required="true" description="enable/disable the document classification module"/>
68
				<PARAM name="active_documentssimilarity" required="true" description="enable/disable the document similarity module"/>
69
				<PARAM name="active_referenceextraction_dataset" required="true" description="enable/disable the dataset reference extraction module"/>
70
				<PARAM name="active_referenceextraction_project" required="true" description="enable/disable the project reference extracion module"/>
71
				<PARAM name="active_referenceextraction_researchinitiative" required="true"
72
				       description="enable/disable the research initiative extraction module"/>
73
				<PARAM name="active_statistics" required="true" description="enable/disable the statistics module"/>
74
				<PARAM name="active_referenceextraction_pdb" required="true" description="enable/disable the protein data bank extraction module"/>
75

  
76
			</JOB_INTERFACE>
77
		</HADOOP_JOB>
78
		<STATUS>
79
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
80
			<RUNNING_INSTANCES value="0"/>
81
			<CUMULATIVE_RUN value="0"/>
82
		</STATUS>
83
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
84
	</BODY>
85
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupExportPersonFullnameJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="ba309300-76f2-40d1-afe3-b77016f443e9_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="exportPersonFullnamesJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that exports the person fullnames on a text file on HDFS</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" />
17

  
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.preprocess.ExportFullnameMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" />
22

  
23
				<!-- REDUCER -->
24
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.preprocess.ExportFullnameReducer" />
25
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" />				
26
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" />				
27
	
28
				<!-- MISC -->				
29
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
30
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
31
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
32
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />
33
				
34
				<PROPERTY key="mapred.reduce.tasks" value="1" />
35
				<PROPERTY key="dfs.blocksize" value="256M" />
36
				<PROPERTY key="mapred.textoutputformat.separator" value="" />
37
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
38
	
39
		<!--  	Uncomment to override the default lib path -->			
40
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
41
        	</STATIC_CONFIGURATION>
42
        	<JOB_INTERFACE>
43
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
44
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
45
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
46
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />     
47
        	</JOB_INTERFACE>
48
        	<SCAN caching="10">
49
        		<FILTERS operator="MUST_PASS_ALL">
50
        			<FILTER type="prefix" param="entityTypeId" />
51
        		</FILTERS>
52
        		<FAMILIES/>        		
53
        	</SCAN>
54
        </HADOOP_JOB>
55
        <STATUS>
56
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
57
            <RUNNING_INSTANCES value="0"/>
58
            <CUMULATIVE_RUN value="0" />
59
        </STATUS>
60
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
61
    </BODY>
62
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/personCsvJoinJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="3f544a36-f123-4f5c-acf4-7c25f6591ec4_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
	</HEADER>
9
	<BODY>
10
		<HADOOP_JOB name="personCsvJoinJob" type="mapreduce">
11
			<DESCRIPTION>map reduce job that joins person entities by "surname+first name letter" and serialise the output as csv</DESCRIPTION>
12
			<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat" />
17
				<PROPERTY key="mapreduce.output.lazyoutputformat.outputformat" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" />
18

  
19
				<!-- MAPPER -->
20
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.experiment.JoinPersonGroupMapper" />
21
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
22
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" />
23

  
24
				<!-- REDUCER -->
25
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.experiment.JoinPersonGroupReducer" />
26
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" />
27
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" />
28

  
29
				<!-- MISC -->
30
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />
31
				<PROPERTY key="mapreduce.map.speculative" value="false" />
32
				<PROPERTY key="mapred.reduce.tasks" value="10" />
33

  
34
				<PROPERTY key="hash.values.csv" value="manghip,pmanghi,corog,gcoro" />
35

  
36
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
37

  
38
				<!--  	Uncomment to override the default lib path -->
39
				<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
40

  
41
			</STATIC_CONFIGURATION>
42
			<JOB_INTERFACE>
43
				<PARAM name="mapred.input.dir" required="true" description="input sequence file" />
44
			</JOB_INTERFACE>
45
		</HADOOP_JOB>
46
		<STATUS>
47
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
48
			<RUNNING_INSTANCES value="0"/>
49
			<CUMULATIVE_RUN value="0" />
50
		</STATUS>
51
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
52
	</BODY>
53
</RESOURCE_PROFILE>
54

  
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/elasticsearchTestJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="719b5d2b-4156-4936-bbc3-41d908ec3c57_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="elastisearchTestJob" type="mapreduce">
11
 			<DESCRIPTION>map only job that indexes over ES</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13
        	
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.elasticsearch.hadoop.mr.EsOutputFormat" />
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.es.ElasticsearchFeedMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.NullWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.BytesWritable" />
22
				
23
			
24
				<!-- MISC -->
25
				<PROPERTY key="es.nodes" value="146.48.87.110:9200" />
26
				<PROPERTY key="es.resource" value="openaire/oaf" />
27
				<PROPERTY key="es.input.json" value="yes" />
28
				
29
				<PROPERTY key="mapred.reduce.tasks" value="0" />
30

  
31
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
32
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
33
	
34
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
35
				
36
		<!--  	Uncomment to override the default lib path -->			
37
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
38
        	</STATIC_CONFIGURATION>
39
        	<JOB_INTERFACE>
40
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
41
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
42
        		
43
        		<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" />         		
44
        	</JOB_INTERFACE>
45
        	<SCAN>
46
        		<FILTERS />
47
        		<FAMILIES />
48
        	</SCAN>
49
        </HADOOP_JOB>
50
        <STATUS>
51
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
52
            <RUNNING_INSTANCES value="0"/>
53
            <CUMULATIVE_RUN value="0" />
54
        </STATUS>
55
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
56
    </BODY>
57
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/promoteMultipleActionSets.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE>
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER value="fada2678-061e-4139-9444-549eff793126_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<HADOOP_JOB type="mapreduce" name="promoteMultipleActionSetsJob">
12
			<DESCRIPTION>map only job that promote actions from a set on HDFS to the data table</DESCRIPTION>
13
			<STATIC_CONFIGURATION>
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.DelegatingInputFormat"/>
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat"/>
17

  
18
				<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.actions2.PromoteActionSetFromHDFSMapper"/>
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/>
21
				<PROPERTY value="org.apache.hadoop.hbase.client.Put" key="mapred.mapoutput.value.class"/>
22

  
23
				<!-- MISC -->
24
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/>
25
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
26
				<PROPERTY value="false" key="mapreduce.map.speculative"/>
27
				<PROPERTY value="false" key="mapreduce.reduce.speculative"/>
28
				<PROPERTY value="true" key="mapred.output.compress"/>
29
				<PROPERTY value="BLOCK" key="mapred.output.compression.type"/>
30
				<PROPERTY value="0" key="mapred.reduce.tasks"/>
31
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
32

  
33
				<!--  	Uncomment to override the default lib path -->
34
				<!-- <PROPERTY key="job.lib" value="/lib/dnet/snapshots/dnet-mapreduce-jobs-assembly-0.0.8.5-SNAPSHOT-exportIds.jar"/> -->
35
			</STATIC_CONFIGURATION>
36
			<JOB_INTERFACE>
37
				<PARAM required="true" name="mapred.input.dir.formats" description="source sequence file on hdfs"/>
38
				<PARAM name="hbase.mapred.outputtable" required="true" description="source hbase table"/>
39
			</JOB_INTERFACE>
40
		</HADOOP_JOB>
41
		<STATUS>
42
			<LAST_SUBMISSION_DATE value="2016-04-14T18:22:06+02:00"/>
43
			<RUNNING_INSTANCES value="0"/>
44
			<CUMULATIVE_RUN value="4"/>
45
		</STATUS>
46
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
47
	</BODY>
48
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupGTCleanerJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="888ef72f-701a-4d59-8b8a-2ad01986f975_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="gtCleanerJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that deletes the non-GT rows</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.gt.GTCleanerMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Delete" />
22
				
23
				<!-- MISC -->				
24
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
25
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
26
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
27
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />		
28
			
29
				<PROPERTY key="mapred.reduce.tasks" value="0" />
30
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
31
				
32
		<!--  	Uncomment to override the default lib path -->			
33
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
34
        	</STATIC_CONFIGURATION>
35
        	<JOB_INTERFACE>
36
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
37
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
38
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
39
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />     
40
        	</JOB_INTERFACE>
41
        	<SCAN>
42
        		<FILTERS operator="MUST_PASS_ALL">
43
        			<FILTER type="prefix" param="entityTypeId" />
44
        		</FILTERS>
45
        		<FAMILIES>
46
        			<FAMILY param="entityType" />
47
        			<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" />
48
        			<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" />
49
     				<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" />
50
        		</FAMILIES>
51
        	</SCAN>
52
        </HADOOP_JOB>
53
        <STATUS>
54
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
55
            <RUNNING_INSTANCES value="0"/>
56
            <CUMULATIVE_RUN value="0" />
57
        </STATUS>
58
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
59
    </BODY>
60
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/promoteSingleActionSet.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
				value="54ee1986-50a3-4e5b-a0d3-b411b12c7329_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<HADOOP_JOB name="promoteSingleActionSetJob" type="mapreduce">
12
			<DESCRIPTION>map only job that imports the inference actions in the main hbase table</DESCRIPTION>
13
			<STATIC_CONFIGURATION>
14

  
15
				<!-- I/O FORMAT -->
16
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"/>
17
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat"/>
18

  
19
				<!-- MAPPER -->
20
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.actions2.PromoteSingleActionSetMapper"/>
21
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/>
22
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put"/>
23

  
24
				<!-- MISC -->
25
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/>
26
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
27
				<PROPERTY key="mapreduce.map.speculative" value="false"/>
28
				<PROPERTY key="mapreduce.reduce.speculative" value="false"/>
29

  
30

  
31
				<PROPERTY key="mapred.output.compress" value="true"/>
32
				<PROPERTY key="mapred.output.compression.type" value="BLOCK"/>
33

  
34
				<PROPERTY key="mapred.reduce.tasks" value="0"/>
35
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
36

  
37
				<!--  	Uncomment to override the default lib path -->
38
				<!-- <PROPERTY key="job.lib" value="/lib/dnet/snapshots/dnet-mapreduce-jobs-assembly-0.0.8.5-SNAPSHOT-exportIds.jar"/> -->
39
			</STATIC_CONFIGURATION>
40
			<JOB_INTERFACE>
41
				<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs"/>
42
				<PARAM name="hbase.mapred.outputtable" required="true" description="source hbase table"/>
43
			</JOB_INTERFACE>
44
		</HADOOP_JOB>
45
		<STATUS>
46
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
47
			<RUNNING_INSTANCES value="0"/>
48
			<CUMULATIVE_RUN value="0"/>
49
		</STATUS>
50
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
51
	</BODY>
52
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupDeleteDedupRelsJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="5626c94e-0005-416a-9ea4-48fc8af85ecd_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="deleteDedupRelsJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that deletes the dedup rels used to in the deduplication process</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupDeleteRelMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.Writable" />
22
				
23
				<!-- MISC -->				
24
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
25
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
26
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
27
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />		
28
			
29
				<PROPERTY key="mapred.reduce.tasks" value="0" />
30
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
31
				
32
		<!--  	Uncomment to override the default lib path -->			
33
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
34
        	</STATIC_CONFIGURATION>
35
        	<JOB_INTERFACE>
36
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
37
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
38
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
39
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />     
40
        	</JOB_INTERFACE>
41
        	<SCAN>
42
        		<FILTERS operator="MUST_PASS_ALL">
43
        			<FILTER type="prefix" param="entityTypeId" />
44
        		</FILTERS>
45
        		<FAMILIES>
46
        			<FAMILY param="entityType" />
47
			        <FAMILY value="resultResult_dedup_merges" />
48
			        <FAMILY value="resultResult_dedup_isMergedIn" />
49
			        <FAMILY value="resultResult_dedupSimilarity_isSimilarTo" />
50

  
51
			        <FAMILY value="personPerson_dedup_merges" />
52
			        <FAMILY value="personPerson_dedup_isMergedIn" />
53
			        <FAMILY value="personPerson_dedupSimilarity_isSimilarTo" />
54

  
55
			        <FAMILY value="organizationOrganization_dedup_merges" />
56
			        <FAMILY value="organizationOrganization_dedup_isMergedIn" />
57
			        <FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" />
58
        		</FAMILIES>
59
        	</SCAN>
60
        </HADOOP_JOB>
61
        <STATUS>
62
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
63
            <RUNNING_INSTANCES value="0"/>
64
            <CUMULATIVE_RUN value="0" />
65
        </STATUS>
66
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
67
    </BODY>
68
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/buildMergedToAnchorMapJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="4e16c8dd-8944-4266-8b5c-62e4b26e3090_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="buildMergedToAnchorMapJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that builds a map {merged author id --> anchorId}</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.gt.BuildMergedAnchorMapMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" />
22
				
23
				<!-- REDUCER -->
24
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.gt.BuildMergedAnchorMapReducer" />
25
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" />				
26
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" />				
27
				
28
				<!-- MISC -->
29
				<PROPERTY key="mapred.output.compress" value="false" />
30
				<PROPERTY key="mapred.compress.map.output" value="true" />	
31
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
32
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
33
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
34
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />
35
							
36
				<PROPERTY key="mapred.reduce.tasks" value="1" />
37
				<PROPERTY key="dfs.blocksize" value="256M" />
38
				<PROPERTY key="mapred.textoutputformat.separator" value="=" />
39
				
40
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
41
				
42
		<!--  	Uncomment to override the default lib path -->			
43
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
44
        	</STATIC_CONFIGURATION>
45
        	<JOB_INTERFACE>
46
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
47
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
48
        		<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" /> 
49
        	</JOB_INTERFACE>
50
        	<SCAN>
51
                <FILTERS operator="MUST_PASS_ALL">
52
                    <FILTER type="prefix" value="30"/>
53
                </FILTERS>
54
                <FAMILIES>
55
                    <FAMILY value="person"/>
56
	                <FAMILY value="personPerson_dedup_isMergedIn" />
57
                </FAMILIES>
58
        	</SCAN>
59
        </HADOOP_JOB>
60
        <STATUS>
61
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
62
            <RUNNING_INSTANCES value="0"/>
63
            <CUMULATIVE_RUN value="0" />
64
        </STATUS>
65
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
66
    </BODY>
67
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/exportIdentifiersJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
				value="d5a7c415-932b-442e-91c2-46f648945ac2_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<HADOOP_JOB name="exportIdentifiersJob" type="mapreduce">
12
			<DESCRIPTION>map only job that exports the publication identifiers as json</DESCRIPTION>
13
			<STATIC_CONFIGURATION>
14

  
15
				<!-- I/O FORMAT -->
16
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/>
17
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat"/>
18

  
19
				<!-- MAPPER -->
20
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataexport.ExportResultIdentifiersMapper"/>
21
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/>
22
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text"/>
23

  
24
				<!-- MISC -->
25
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/>
26
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
27
				<PROPERTY key="mapreduce.map.speculative" value="false"/>
28
				<PROPERTY key="mapreduce.reduce.speculative" value="false"/>
29

  
30
				<PROPERTY key="dfs.blocksize" value="256M"/>
31

  
32
				<PROPERTY key="mapred.reduce.tasks" value="1"/>
33
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
34

  
35
				<!--  	Uncomment to override the default lib path -->
36
				<PROPERTY key="job.lib" value="/lib/dnet/snapshots/dnet-mapreduce-jobs-assembly-0.0.8.5-SNAPSHOT-exportIds.jar"/>
37
			</STATIC_CONFIGURATION>
38
			<JOB_INTERFACE>
39
				<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table"/>
40
				<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table"/>
41

  
42
				<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs"/>
43
			</JOB_INTERFACE>
44
			<SCAN>
45
				<FILTERS operator="MUST_PASS_ALL">
46
					<FILTER type="prefix" value="50"/>
47
				</FILTERS>
48
				<FAMILIES>
49
					<FAMILY value="result"/>
50
				</FAMILIES>
51
			</SCAN>
52
		</HADOOP_JOB>
53
		<STATUS>
54
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
55
			<RUNNING_INSTANCES value="0"/>
56
			<CUMULATIVE_RUN value="0"/>
57
		</STATUS>
58
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
59
	</BODY>
60
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/informationSpaceExportJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="62c1e44c-14b6-4639-9083-29bf432e6759_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="informationSpaceExportJob" type="mapreduce">
11
 			<DESCRIPTION>map only job that exports the whole information space table as json</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13
        	
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" />
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataexport.ExportInformationSpaceMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" />
22
			
23
			
24
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" />				
25
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" />	
26
			
27
				<!-- MISC -->
28

  
29
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
30
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
31
				<PROPERTY key="mapreduce.map.speculative" value="false" />
32
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />	
33

  
34
				<PROPERTY key="dfs.blocksize" value="256M" />
35
				
36
				<PROPERTY key="mapred.compress.map.output" value="true" />	
37
				<PROPERTY key="mapred.output.compress" value="true" />
38
				<PROPERTY key="mapred.output.compression.type" value="BLOCK" />
39
				
40
			
41
				<PROPERTY key="mapred.reduce.tasks" value="0" />
42
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
43
				
44
		<!--  	Uncomment to override the default lib path -->			
45
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
46
        	</STATIC_CONFIGURATION>
47
        	<JOB_INTERFACE>
48
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
49
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
50
        		
51
        		<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" />         		
52
        	</JOB_INTERFACE>
53
        	<SCAN>
54
        		<FILTERS />
55
        		<FAMILIES />
56
        	</SCAN>
57
        </HADOOP_JOB>
58
        <STATUS>
59
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
60
            <RUNNING_INSTANCES value="0"/>
61
            <CUMULATIVE_RUN value="0" />
62
        </STATUS>
63
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
64
    </BODY>
65
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/calculatePersonDistributionStep1Job.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER
4
                value="f61bc720-2821-4871-937d-64b79f098714_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
        <RESOURCE_URI value=""/>
8
        <DATE_OF_CREATION value="2015-06-15T10:50:29+02:00"/>
9
    </HEADER>
10
    <BODY>
11
        <HADOOP_JOB name="calculatePersonDistributionStep1Job" type="mapreduce">
12
            <DESCRIPTION>map reduce job that perform tests for notificationBroker service (step 1/2)</DESCRIPTION>
13
            <STATIC_CONFIGURATION>
14

  
15
                <!-- I/O FORMAT -->
16
                <PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/>
17
                <PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"/>
18

  
19
                <!-- MAPPER -->
20
                <PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.broker.CalculatePersonDistributionStep1Mapper"/>
21
                <PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/>
22
                <PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text"/>
23

  
24
                <!-- REDUCER -->
25
                <PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.broker.CalculatePersonDistributionStep1Reducer"/>
26
                <PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/>
27
                <PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/>
28

  
29
                <!-- MISC -->
30
                <PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/>
31
                <PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
32
                <PROPERTY key="mapreduce.map.speculative" value="false"/>
33
                <PROPERTY key="mapreduce.reduce.speculative" value="false"/>
34
                <PROPERTY key="mapred.reduce.tasks" value="100"/>
35

  
36
                <!-- <PROPERTY key="user.name" value="dnet" /> -->
37

  
38
                <!--  	Uncomment to override the default lib path -->
39
                <!-- <PROPERTY key="job.lib" value="/user/michele.artini/lib/dnet-mapreduce-jobs-assembly-0.0.6.6-SNAPSHOT.jar"/> -->
40
            </STATIC_CONFIGURATION>
41
            <JOB_INTERFACE>
42
                <PARAM description="source hbase table" name="hbase.mapred.inputtable" required="true"/>
43
                <PARAM description="source hbase table" name="hbase.mapreduce.inputtable" required="true"/>
44
            </JOB_INTERFACE>
45
            <SCAN>
46
                <FILTERS operator="MUST_PASS_ONE">
47
                    <FILTER type="prefix" value="50"/>
48
                    <FILTER type="prefix" value="10"/>
49
                </FILTERS>
50
                <FAMILIES>
51
                    <FAMILY value="result"/>
52
                    <FAMILY value="datasource"/>
53
                </FAMILIES>
54
            </SCAN>
55
        </HADOOP_JOB>
56
        <STATUS>
57
            <LAST_SUBMISSION_DATE value="2015-06-15T11:10:17+02:00"/>
58
            <RUNNING_INSTANCES value="0"/>
59
            <CUMULATIVE_RUN value="7"/>
60
        </STATUS>
61
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
62
    </BODY>
63
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/pom.xml
1
<?xml version="1.0" ?>
2
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3
	<parent>
4
		<groupId>eu.dnetlib</groupId>
5
		<artifactId>dnet45-parent</artifactId>
6
		<version>1.0.0</version>
7
		<relativePath />
8
	</parent>
9
	<modelVersion>4.0.0</modelVersion>
10
	<groupId>eu.dnetlib</groupId>
11
	<artifactId>dnet-openaireplus-profiles</artifactId>
12
	<packaging>jar</packaging>
13
	<version>1.0.27</version>
14
	<scm>
15
   		<developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27</developerConnection>
16
	</scm>
17

  
18
	<dependencies>
19

  
20
	</dependencies>
21
</project>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/invalidRecordDoiExporterJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="7edd375c-1d1e-4f2b-81c9-1b7d4f65c233_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2018-10-22T10:34:48+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="invalidRecordDoiExporterJob" type="mapreduce">
11
            <DESCRIPTION>map only job that ensures xml records are valid</DESCRIPTION>
12
            <STATIC_CONFIGURATION><!-- I/O FORMAT -->
13
                <PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"/>
14
                <PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat"/><!-- MAPPER -->
15
                <PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataimport.GetInvalidXmlRecordsMapper"/>
16
                <PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/>
17
                <PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text"/><!-- JOB GLOBAL -->
18
                <PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/>
19
                <PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/><!-- MISC -->
20
                <PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
21
                <PROPERTY key="mapreduce.map.speculative" value="false"/>
22
                <PROPERTY key="mapred.reduce.tasks" value="1"/><!-- <PROPERTY key="user.name" value="dnet" /> --><!--  	Uncomment to override the default lib path -->
23
                <!-- <PROPERTY key="job.lib" value="/lib/dnet/snapshots/dnet-mapreduce-jobs-assembly-1.1.4-BETA-SNAPSHOT-FilterXmlRecordsMapper.jar"/> -->
24
                <PROPERTY key="mapred.output.compress" value="false"/>
25
            </STATIC_CONFIGURATION>
26
            <JOB_INTERFACE>
27
                <PARAM description="input sequence file" name="mapred.input.dir" required="true"/>
28
                <PARAM description="target file" name="mapred.output.dir" required="true"/>
29
            </JOB_INTERFACE>
30
        </HADOOP_JOB>
31
        <STATUS>
32
            <LAST_SUBMISSION_DATE value="2018-10-22T10:35:08+00:00"/>
33
            <RUNNING_INSTANCES value="0"/>
34
            <CUMULATIVE_RUN value="20"/>
35
        </STATUS>
36
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
37
    </BODY>
38
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupFindPersonRootsJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="8ec4731e-4e91-4863-9a4b-7f0a8ca0542e_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupFindPersonRootsJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that find the root of a similarity group (person)</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupFindRootsPersonMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22
				
23
				<!-- REDUCER -->
24
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupFindRootsPersonReducer" />
25
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />				
26
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Writable" />
27
				
28
				<!-- MISC -->				
29
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
30
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
31
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
32
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />		
33
			
34
				<PROPERTY key="mapred.reduce.tasks" value="100" />
35
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
36
				
37
		<!--  	Uncomment to override the default lib path -->			
38
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
39
        	</STATIC_CONFIGURATION>
40
        	<JOB_INTERFACE>
41
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
42
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
43
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
44
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />     
45
        	</JOB_INTERFACE>
46
        	<SCAN>
47
        		<FILTERS operator="MUST_PASS_ALL">
48
        			<FILTER type="prefix" param="entityTypeId" />
49
        		</FILTERS>
50
        		<FAMILIES>
51
        			<FAMILY param="entityType" />
52
        			<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" />
53
        			<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" />
54
     				<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" />
55
        		</FAMILIES>
56
        	</SCAN>
57
        </HADOOP_JOB>
58
        <STATUS>
59
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
60
            <RUNNING_INSTANCES value="0"/>
61
            <CUMULATIVE_RUN value="0" />
62
        </STATUS>
63
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
64
    </BODY>
65
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/prepareIndexDataJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="ed8c0a4e-7cf2-49df-bfed-fcfab0699ade_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="prepareIndexDataJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that joins the entities on the hbase table and produces a sequence file containig the xml records</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.index.PrepareFeedMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22
				
23
				<!-- REDUCER -->
24
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.index.PrepareFeedReducer" />
25
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" />				
26
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" />				
27
				
28
				<!-- MISC -->
29
				<PROPERTY key="mapred.output.compress" value="false" />						
30
				<PROPERTY key="mapred.compress.map.output" value="true" />	
31
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
32
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
33
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
34
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />
35

  
36
				<PROPERTY key="mapred.fairscheduler.pool" value="solr"/>
37
				<PROPERTY key="mapred.reduce.tasks" value="500" />
38
				<PROPERTY key="dfs.blocksize" value="256M" />
39
		        <PROPERTY key="ui.other.datasourcetypes" value="scholarcomminfra, infospace, pubsrepository::mock, entityregistry, entityregistry::projects, entityregistry::repositories, websource"/>
40

  
41
				
42
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
43
				
44
		<!--  	Uncomment to override the default lib path -->			
45
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
46
        	</STATIC_CONFIGURATION>
47
        	<JOB_INTERFACE>
48
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
49
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
50
        		<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" /> 
51
        		
52
        		<PARAM name="index.entity.links" required="true" description="entity joiner configuration" /> 
53
        		<PARAM name="contextmap" required="true" description="context map (ContextDSResources)" />
54

  
55
	        </JOB_INTERFACE>
56
        	<SCAN>
57
        		<FILTERS />
58
        		<FAMILIES />
59
        	</SCAN>
60
        </HADOOP_JOB>
61
        <STATUS>
62
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
63
            <RUNNING_INSTANCES value="0"/>
64
            <CUMULATIVE_RUN value="0" />
65
        </STATUS>
66
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
67
    </BODY>
68
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupMergeCoAuthors.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="141e7add-0765-4679-b04d-c9668c011003_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="mergeCoAuthorsJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that build the roots and redirects the rels</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />
17

  
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataimport.CoAuthorMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22

  
23
				<!-- REDUCER -->
24
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dataimport.CoAuthorReducer" />
25
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />				
26
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />				
27
	
28
				<!-- MISC -->				
29
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
30
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
31
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
32
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />
33
				
34
				<PROPERTY key="max.coauthors" value="50" />				
35
			
36
				<PROPERTY key="mapred.reduce.tasks" value="100" />
37
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
38
	
39
		<!--  	Uncomment to override the default lib path -->			
40
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
41
        	</STATIC_CONFIGURATION>
42
        	<JOB_INTERFACE>
43
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
44
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
45
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
46
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />        		
47
        	</JOB_INTERFACE>
48
        	<SCAN caching="10">
49
        		<FILTERS operator="MUST_PASS_ALL">
50
        			<FILTER type="prefix" param="entityTypeId" />
51
        		</FILTERS>
52
        		<FAMILIES/>        		
53
        	</SCAN>
54
        </HADOOP_JOB>
55
        <STATUS>
56
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
57
            <RUNNING_INSTANCES value="0"/>
58
            <CUMULATIVE_RUN value="0" />
59
        </STATUS>
60
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
61
    </BODY>
62
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/xmlRecordValidatorJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="d051952c-6f2f-4374-ac14-7fbb8d8fa430_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2018-10-22T10:34:48+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="xmlRecordValidatorJob" type="mapreduce">
11
            <DESCRIPTION>map only job that ensures xml records are valid</DESCRIPTION>
12
            <STATIC_CONFIGURATION><!-- I/O FORMAT -->
13
                <PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"/>
14
                <PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"/><!-- MAPPER -->
15
                <PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataimport.FilterXmlRecordsMapper"/>
16
                <PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/>
17
                <PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text"/><!-- JOB GLOBAL -->
18
                <PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/>
19
                <PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/><!-- MISC -->
20
                <PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
21
                <PROPERTY key="mapreduce.map.speculative" value="false"/>
22
                <PROPERTY key="mapred.reduce.tasks" value="100"/><!-- <PROPERTY key="user.name" value="dnet" /> --><!--  	Uncomment to override the default lib path -->
23
                <!-- <PROPERTY key="job.lib" value="/lib/dnet/snapshots/dnet-mapreduce-jobs-assembly-1.1.4-BETA-SNAPSHOT-FilterXmlRecordsMapper.jar"/> -->
24
                <PROPERTY key="mapred.output.compress" value="true"/>
25
                <PROPERTY key="mapred.output.compression.type" value="BLOCK"/>
26
            </STATIC_CONFIGURATION>
27
            <JOB_INTERFACE>
28
                <PARAM description="input sequence file" name="mapred.input.dir" required="true"/>
29
                <PARAM description="target file" name="mapred.output.dir" required="true"/>
30
            </JOB_INTERFACE>
31
        </HADOOP_JOB>
32
        <STATUS>
33
            <LAST_SUBMISSION_DATE value="2018-10-22T10:35:08+00:00"/>
34
            <RUNNING_INSTANCES value="0"/>
35
            <CUMULATIVE_RUN value="20"/>
36
        </STATUS>
37
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
38
    </BODY>
39
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/mdStoreHdfsImportAuthorsJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="a53df5e3-8a38-4d3f-8f67-bf9fc43279a6_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="authorImportRecordsJob" type="mapreduce">
11
 			<DESCRIPTION>map only job that maps xml metada records from a sequence file into an hbase table</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13
        	
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />
17
				
18
				<!-- MAPPER -->        	
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataimport.AuthorImportRecordsMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put" />
22
			
23
				<!-- MISC -->
24
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />
25
				<PROPERTY key="mapreduce.map.speculative" value="false" />
26
				<PROPERTY key="mapred.reduce.tasks" value="0" />
27
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
28
				
29
		<!--  	Uncomment to override the default lib path -->			
30
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
31
				
32
        	</STATIC_CONFIGURATION>
33
        	<JOB_INTERFACE>
34
        		<PARAM name="mapred.input.dir" required="true" description="input sequence file" />
35
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
36
        		<PARAM name="hbase.import.xslt" required="true" description="mapping" />
37
        	</JOB_INTERFACE>
38
        </HADOOP_JOB>
39
        <STATUS>
40
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
41
            <RUNNING_INSTANCES value="0"/>
42
            <CUMULATIVE_RUN value="0" />
43
        </STATUS>
44
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
45
    </BODY>
46
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/publicationAnalysisJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="721fd82c-6444-41c9-ba23-5eb0652ddaeb_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="publicationAnalysisJob" type="mapreduce">
11
 			<DESCRIPTION>map only job that analyses publication features</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13
        	
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.NullOutputFormat" />
17

  
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.experiment.PublicationAnalysisMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.NullWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.NullWritable" />
22
			
23
				<!-- MISC -->
24
				
25
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />
26
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
27
				<PROPERTY key="mapreduce.map.speculative" value="false" />
28
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />	
29
			
30
				<PROPERTY key="mapred.reduce.tasks" value="0" />
31
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
32
				
33
		<!--  	Uncomment to override the default lib path -->			
34
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
35
        	</STATIC_CONFIGURATION>
36
        	<JOB_INTERFACE>
37
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
38
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
39
        	</JOB_INTERFACE>
40
        	<SCAN>
41
        		<FILTERS operator="MUST_PASS_ALL">
42
        			<FILTER type="prefix" value="50" />
43
        		</FILTERS>
44
        		<FAMILIES>
45
	       			<FAMILY value="result" />
46
        		</FAMILIES>
47
        	</SCAN>
48
        </HADOOP_JOB>
49
        <STATUS>
50
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
51
            <RUNNING_INSTANCES value="0"/>
52
            <CUMULATIVE_RUN value="0" />
53
        </STATUS>
54
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
55
    </BODY>
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff