Project

General

Profile

« Previous | Next » 

Revision 52186

[maven-release-plugin] copy for tag dnet-openaireplus-profiles-1.0.23

View differences:

modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/deploy.info
1
{"type_source": "SVN", "goal": "package -U -T 4C source:jar", "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-openaireplus-profiles/trunk/", "deploy_repository": "dnet45-snapshots", "version": "4", "mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it", "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots", "name": "dnet-openaireplus-profiles"}
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/brokerJoinProjectPublicationJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="f377cc29-0130-41a7-9edc-7e88230a1b01_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
	</HEADER>
9
	<BODY>
10
		<HADOOP_JOB name="brokerJoinProjectPublicationJob" type="mapreduce">
11
			<DESCRIPTION>map reduce job that enriches the publications with the project metadata, starting from the projects linked to them</DESCRIPTION>
12
			<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/>
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat"/>
17

  
18
				<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.broker.join.ProjectPublicationJoinMapper"/>
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/>
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/>
22

  
23
				<!-- REDUCER -->
24
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.broker.join.ProjectPublicationJoinReducer" />
25
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
26
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Writable" />
27

  
28
				<!-- MISC -->
29
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/>
30
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
31
				<PROPERTY key="mapreduce.map.speculative" value="false"/>
32
				<PROPERTY key="mapreduce.reduce.speculative" value="false"/>
33

  
34
				<PROPERTY key="dfs.blocksize" value="256M"/>
35

  
36
				<PROPERTY key="mapred.reduce.tasks" value="100"/>
37

  
38
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
39

  
40
				<!--  	Uncomment to override the default lib path -->
41
				<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
42
			</STATIC_CONFIGURATION>
43
			<JOB_INTERFACE>
44
				<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
45
				<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
46
				<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
47
				<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />
48
			</JOB_INTERFACE>
49
			<SCAN>
50
				<FILTERS operator="MUST_PASS_ONE">
51
					<FILTER type="prefix" value="50" />
52
					<FILTER type="prefix" value="40" />
53
				</FILTERS>
54
				<FAMILIES>
55
					<FAMILY value="result" />
56
					<FAMILY value="project" />
57
					<FAMILY value="resultProject_outcome_produces" />
58
				</FAMILIES>
59
			</SCAN>
60
		</HADOOP_JOB>
61
		<STATUS>
62
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
63
			<RUNNING_INSTANCES value="0"/>
64
			<CUMULATIVE_RUN value="0" />
65
		</STATUS>
66
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
67
	</BODY>
68
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisPreprocessingQuickJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="13beed98-81bf-4fbd-ab4f-de071177997c_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
    	<HADOOP_JOB name="iisPreprocessingQuickJob" type="oozie">
11
        	<DESCRIPTION>IIS preprocessing</DESCRIPTION>
12
            <STATIC_CONFIGURATION>
13
				<!-- Cluster wide -->
14
                <PROPERTY key="queueName" value="default"/>
15
				<PROPERTY key="user.name" value="dnet" />
16

  
17
				<!-- Runtime -->
18
                <PROPERTY key="workingDir" value="/tmp/integration/working_dir/preprocessing_quick_test"/>
19
                <PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/>
20
                <PROPERTY key="input_referenceextraction_project" value="/user/marek.horst/share/referenceextraction/document_projects/2014-04-11"/>
21
                <PROPERTY key="input_referenceextraction_dataset" value="/user/marek.horst/share/referenceextraction/document_datasets/all/2014-04-11"/>
22
                <PROPERTY key="export_action_hbase_table_initialize" value="false"/>
23
            </STATIC_CONFIGURATION>
24
        	<JOB_INTERFACE>
25
 		       	<PARAM name="import_content_object_store_location" required="true" description="mdstore service location" />
26
        		<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location" />
27
        		<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore id for dataset records" />
28
        		<PARAM name="import_wos_mdstore_id" required="true" description="mdstore id for WoS records" />
29
        		<PARAM name="import_database_service_location" required="true" description="database service endpoint" />
30
        		<PARAM name="import_content_datacite_objectstores_csv" required="true" description="objecstore ids subject to dataset reference extraction" />
31
        		<PARAM name="import_content_wos_plaintext_objectstores_csv" required="true" description="objecstore ids for WoS plaintext" />
32
	      		<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table" />
33
        		<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum" />
34
        		<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port" />
35
        		<PARAM name="nameNode" required="true" description="hdfs name node" />
36
        		<PARAM name="jobTracker" required="true" description="job tracker name" />
37
        		<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path" />
38
       			<PARAM name="export_action_set_id_document_referencedProjects" required="true" description="target action set for project references" />
39
       			<PARAM name="export_action_set_id_document_referencedDatasets" required="true" description="target action set for dataset references" />
40
       			<PARAM name="export_action_set_id_entity_wos" required="true" description="target action set for WoS entities" />
41
     			<PARAM name="export_action_set_id_entity_dataset" required="true" description="target action set for dataset entities" />
42
        	</JOB_INTERFACE>
43
        </HADOOP_JOB>
44
        <STATUS>
45
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
46
            <RUNNING_INSTANCES value="0"/>
47
            <CUMULATIVE_RUN value="0" />
48
        </STATUS>
49
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
50
    </BODY>
51
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/offlineHbaseLoadJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="073e55eb-c6f4-49a9-80b3-1a927612ba5b_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="offlineHbaseLoad" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that loads a given entity type in the offline dedup table</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.OfflineHbaseLoadMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22
				
23
				<!-- MISC -->				
24
				<PROPERTY key="mapred.compress.map.output" value="true" />	
25
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
26
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
27
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
28
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />				
29
			
30
				<PROPERTY key="mapred.reduce.tasks" value="0" />
31
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
32
				
33
		<!--  	Uncomment to override the default lib path -->			
34
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
35
        	</STATIC_CONFIGURATION>
36
        	<JOB_INTERFACE>
37
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
38
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
39
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
40
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />
41
        	</JOB_INTERFACE>
42
        	<SCAN>
43
        		<FILTERS operator="MUST_PASS_ALL">
44
        			<FILTER type="prefix" param="entityTypeId" />
45
        		</FILTERS>
46
        		<FAMILIES>
47
        			<FAMILY param="entityType" />
48
        		</FAMILIES>
49
        	</SCAN>
50
        </HADOOP_JOB>
51
        <STATUS>
52
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
53
            <RUNNING_INSTANCES value="0"/>
54
            <CUMULATIVE_RUN value="0" />
55
        </STATUS>
56
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
57
    </BODY>
58
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisPreprocessingJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
				value="732656e3-5ac6-4344-9d1f-f5c805f53a06_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<HADOOP_JOB name="iisPreprocessingJob" type="oozie">
12
			<DESCRIPTION>IIS preprocessing</DESCRIPTION>
13
			<STATIC_CONFIGURATION>
14
				<!-- Cluster wide -->
15
				<PROPERTY key="queueName" value="default"/>
16
				<PROPERTY key="user.name" value="dnet.beta"/>
17

  
18
				<!-- Runtime -->
19
				<PROPERTY key="workingDir" value="/tmp/integration/working_dir/preprocessing"/>
20
				<PROPERTY key="oozie.wf.application.path" value="/tmp/integration/apps/preprocessing"/>
21
				<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/>
22
				<PROPERTY key="export_action_hbase_table_initialize" value="false"/>
23
				<!-- <PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction"/> -->
24
				<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction_updated_model"/>
25
				<PROPERTY key="metadataextraction_excluded_checksums"
26
				          value="1e5b574109da731f4918c7f91fc24864,bea4728578070c3d66774bf9454d41fe,da458477233b5561ae47042aa2a73086"/>
27
				<PROPERTY key="export_trust_level_threshold_document_referencedDatasets" value="0.0"/>
28
				<PROPERTY key="export_trust_level_threshold_document_referencedProjects" value="0.0"/>
29
				<PROPERTY key="output_remote_location" value="hdfs://namenode.hadoop.dm.openaire.eu/var/lib/dnet/actionManager_BETA"/>
30
			</STATIC_CONFIGURATION>
31
			<JOB_INTERFACE>
32
				<PARAM name="import_content_object_store_location" required="true" description="mdstore service location"/>
33
				<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location"/>
34
				<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore id for dataset records"/>
35
				<PARAM name="import_wos_mdstore_id" required="true" description="mdstore id for WoS records"/>
36
				<PARAM name="import_database_service_location" required="true" description="database service endpoint"/>
37
				<PARAM name="import_content_datacite_objectstores_csv" required="true" description="objecstore ids subject to dataset reference extraction"/>
38
				<PARAM name="import_content_wos_plaintext_objectstores_csv" required="true" description="objecstore ids for WoS plaintext"/>
39
				<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table"/>
40
				<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum"/>
41
				<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port"/>
42
				<PARAM name="nameNode" required="true" description="hdfs name node"/>
43
				<PARAM name="jobTracker" required="true" description="job tracker name"/>
44
				<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path"/>
45
				<PARAM name="export_action_set_id_document_referencedProjects" required="true" description="target action set for project references"/>
46
				<PARAM name="export_action_set_id_document_referencedDatasets" required="true" description="target action set for dataset references"/>
47
				<PARAM name="export_action_set_id_entity_wos" required="true" description="target action set for WoS entities"/>
48
				<PARAM name="export_action_set_id_entity_dataset" required="true" description="target action set for dataset entities"/>
49
			</JOB_INTERFACE>
50
		</HADOOP_JOB>
51
		<STATUS>
52
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
53
			<RUNNING_INSTANCES value="0"/>
54
			<CUMULATIVE_RUN value="0"/>
55
		</STATUS>
56
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
57
	</BODY>
58
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupFindRootsJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="61f9270e-ffc1-4095-9f76-3852e4d227fb_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupFindRootsJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that find the root of a similarity group</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupFindRootsMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put" />
22
				
23
				<!-- MISC -->				
24
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
25
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
26
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
27
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />				
28
			
29
				<PROPERTY key="mapred.reduce.tasks" value="0" />
30
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
31
				
32
		<!--  	Uncomment to override the default lib path -->			
33
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
34
        	</STATIC_CONFIGURATION>
35
        	<JOB_INTERFACE>
36
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
37
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
38
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
39
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />     
40
        	</JOB_INTERFACE>
41
        	<SCAN>
42
        		<FILTERS operator="MUST_PASS_ALL">
43
        			<FILTER type="prefix" param="entityTypeId" />
44
        		</FILTERS>
45
        		<FAMILIES>
46
        			<FAMILY param="entityType" />
47
        			<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" />
48
        			<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" />
49
     				<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" />
50
        		</FAMILIES>
51
        	</SCAN>
52
        </HADOOP_JOB>
53
        <STATUS>
54
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
55
            <RUNNING_INSTANCES value="0"/>
56
            <CUMULATIVE_RUN value="0" />
57
        </STATUS>
58
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
59
    </BODY>
60
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisMainJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
				value="f6e4cbcd-b173-4f8d-9205-e64ba15f03ad_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<HADOOP_JOB name="iisMainJob" type="oozie">
12
			<DESCRIPTION>IIS main workflow</DESCRIPTION>
13
			<STATIC_CONFIGURATION>
14

  
15
				<!-- Cluster wide -->
16
				<PROPERTY key="queueName" value="default"/>
17
				<PROPERTY key="user.name" value="dnet.beta"/>
18

  
19
				<!-- Runtime -->
20
				<PROPERTY key="match_content_with_metadata" value="true"/>
21
				<PROPERTY key="import_hbase_approved_datasources_csv" value="$UNDEFINED$"/>
22
				<PROPERTY key="import_hbase_input_table" value="information_space-2014-11-05"/>
23
				<!--<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction"/> -->
24
				<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction_updated_model"/>
25

  
26
				<PROPERTY key="export_action_hbase_table_initialize" value="true"/>
27
				<PROPERTY key="import_content_connection_timeout" value="180000"/>
28
				<PROPERTY key="import_content_read_timeout" value="180000"/>
29
				<PROPERTY key="import_resultset_client_read_timeout" value="180000"/>
30

  
31
				<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/>
32
				<PROPERTY key="workingDir" value="/tmp/integration/working_dir/main"/>
33
				<PROPERTY key="metadataextraction_excluded_checksums"
34
				          value="1e5b574109da731f4918c7f91fc24864,bea4728578070c3d66774bf9454d41fe,da458477233b5561ae47042aa2a73086"/>
35
				<PROPERTY key="export_documentssimilarity_threshold" value="0.7"/>
36
				<PROPERTY key="export_trust_level_threshold_document_classes" value="0.5"/>
37
				<PROPERTY key="export_trust_level_threshold_document_referencedDatasets" value="0.0"/>
38
				<PROPERTY key="export_trust_level_threshold_document_referencedProjects" value="0.0"/>
39
				<PROPERTY key="export_trust_level_threshold_document_pdb" value="0.0"/>
40
				<PROPERTY key="output_remote_location" value="hdfs://namenode.hadoop.dm.openaire.eu/var/lib/dnet/actionManager_BETA"/>
41
			</STATIC_CONFIGURATION>
42
			<JOB_INTERFACE>
43
				<PARAM name="import_content_object_store_location" required="true" description="objectStore service endpoint"/>
44
				<PARAM name="import_content_objectstores_csv" required="true" description="csv list of the available object stores subject to processing"/>
45
				<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location"/>
46
				<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore ids for dataset records"/>
47
				<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table"/>
48
				<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum"/>
49
				<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port"/>
50
				<PARAM name="nameNode" required="true" description="hdfs name node"/>
51
				<PARAM name="jobTracker" required="true" description="job tracker name"/>
52
				<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path"/>
53
				<PARAM description="target action set for referenced projects" name="export_action_set_id_document_referencedProjects" required="true"/>
54
				<PARAM description="target action set for referenced datasets" name="export_action_set_id_document_referencedDatasets" required="true"/>
55
				<PARAM description="target action set for research initiative outcome" name="export_action_set_id_document_research_initiative"
56
				       required="true"/>
57
				<PARAM description="target action set for document similarities" name="export_action_set_id_document_similarities_standard" required="true"/>
58
				<PARAM description="target action set for document statistics" name="export_action_set_id_document_statistics" required="true"/>
59
				<PARAM description="target action set for document classification" name="export_action_set_id_document_classes" required="true"/>
60
				<PARAM description="target action set for document citations" name="export_action_set_id_document_referencedDocuments" required="true"/>
61
				<PARAM description="target action set for dataset entities" name="export_action_set_id_entity_dataset" required="true"/>
62
				<PARAM description="target action set for pdb entities" name="export_action_set_id_document_pdb" required="true"/>
63

  
64
				<!-- flags to enable/disable IIS modules -->
65
				<PARAM name="active_metadataextraction_export" required="true" description="enable/disable the affiliation matching module"/>
66
				<PARAM name="active_citationmatching" required="true" description="enable/disable the citation matching module"/>
67
				<PARAM name="active_documentsclassification" required="true" description="enable/disable the document classification module"/>
68
				<PARAM name="active_documentssimilarity" required="true" description="enable/disable the document similarity module"/>
69
				<PARAM name="active_referenceextraction_dataset" required="true" description="enable/disable the dataset reference extraction module"/>
70
				<PARAM name="active_referenceextraction_project" required="true" description="enable/disable the project reference extracion module"/>
71
				<PARAM name="active_referenceextraction_researchinitiative" required="true"
72
				       description="enable/disable the research initiative extraction module"/>
73
				<PARAM name="active_statistics" required="true" description="enable/disable the statistics module"/>
74
				<PARAM name="active_referenceextraction_pdb" required="true" description="enable/disable the protein data bank extraction module"/>
75

  
76
			</JOB_INTERFACE>
77
		</HADOOP_JOB>
78
		<STATUS>
79
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
80
			<RUNNING_INSTANCES value="0"/>
81
			<CUMULATIVE_RUN value="0"/>
82
		</STATUS>
83
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
84
	</BODY>
85
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupDeleteDedupRelsJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="5626c94e-0005-416a-9ea4-48fc8af85ecd_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="deleteDedupRelsJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that deletes the dedup rels used to in the deduplication process</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupDeleteRelMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.Writable" />
22
				
23
				<!-- MISC -->				
24
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
25
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
26
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
27
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />		
28
			
29
				<PROPERTY key="mapred.reduce.tasks" value="0" />
30
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
31
				
32
		<!--  	Uncomment to override the default lib path -->			
33
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
34
        	</STATIC_CONFIGURATION>
35
        	<JOB_INTERFACE>
36
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
37
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
38
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
39
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />     
40
        	</JOB_INTERFACE>
41
        	<SCAN>
42
        		<FILTERS operator="MUST_PASS_ALL">
43
        			<FILTER type="prefix" param="entityTypeId" />
44
        		</FILTERS>
45
        		<FAMILIES>
46
        			<FAMILY param="entityType" />
47
			        <FAMILY value="resultResult_dedup_merges" />
48
			        <FAMILY value="resultResult_dedup_isMergedIn" />
49
			        <FAMILY value="resultResult_dedupSimilarity_isSimilarTo" />
50

  
51
			        <FAMILY value="personPerson_dedup_merges" />
52
			        <FAMILY value="personPerson_dedup_isMergedIn" />
53
			        <FAMILY value="personPerson_dedupSimilarity_isSimilarTo" />
54

  
55
			        <FAMILY value="organizationOrganization_dedup_merges" />
56
			        <FAMILY value="organizationOrganization_dedup_isMergedIn" />
57
			        <FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" />
58
        		</FAMILIES>
59
        	</SCAN>
60
        </HADOOP_JOB>
61
        <STATUS>
62
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
63
            <RUNNING_INSTANCES value="0"/>
64
            <CUMULATIVE_RUN value="0" />
65
        </STATUS>
66
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
67
    </BODY>
68
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupMarkDeletedEntityJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
				value="667fe203-ee51-4dff-8c9c-b90e66e96eb4_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<HADOOP_JOB name="dedupMarkDeletedEntityJob" type="mapreduce">
12
			<DESCRIPTION>map only job that closes the similarity mesh</DESCRIPTION>
13
			<STATIC_CONFIGURATION>
14

  
15
				<!-- I/O FORMAT -->
16
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/>
17
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat"/>
18

  
19
				<!-- MAPPER -->
20
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupMarkDeletedEntityMapper"/>
21
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/>
22
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put"/>
23

  
24
				<!-- MISC -->
25
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/>
26
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
27
				<PROPERTY key="mapreduce.map.speculative" value="false"/>
28
				<PROPERTY key="mapreduce.reduce.speculative" value="false"/>
29

  
30
				<PROPERTY key="mapred.reduce.tasks" value="0"/>
31
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
32

  
33
				<!--  	Uncomment to override the default lib path -->
34
				<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
35
			</STATIC_CONFIGURATION>
36
			<JOB_INTERFACE>
37
				<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table"/>
38
				<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table"/>
39
				<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table"/>
40
				<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table"/>
41
			</JOB_INTERFACE>
42
			<SCAN>
43
				<FILTERS operator="MUST_PASS_ALL">
44
					<FILTER type="prefix" param="entityTypeId"/>
45
				</FILTERS>
46
				<FAMILIES>
47
					<FAMILY param="entityType"/>
48
					<FAMILY value="resultResult_dedup_isMergedIn"/>
49
					<FAMILY value="organizationOrganization_dedup_isMergedIn"/>
50
				</FAMILIES>
51
			</SCAN>
52
		</HADOOP_JOB>
53
		<STATUS>
54
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
55
			<RUNNING_INSTANCES value="0"/>
56
			<CUMULATIVE_RUN value="0"/>
57
		</STATUS>
58
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
59
	</BODY>
60
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisPreprocessingJobV2.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="51e1660e-b1fe-4389-96b5-11a522b78c15_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2016-10-15T12:03:30+00:00"/>
8
	</HEADER>
9
	<BODY>
10
		<HADOOP_JOB name="iisPreprocessingJobV2" type="oozie">
11
			<DESCRIPTION>IIS preprocessing</DESCRIPTION>
12
			<STATIC_CONFIGURATION>
13
				<!-- Cluster wide -->
14
				<PROPERTY key="user.name" value="dnet.production"/>
15

  
16
				<!-- Runtime -->
17
				<PROPERTY key="metadataextraction_default_cache_location" value="/user/${user.name}/iis/cache/metadataextraction"/>
18
				<!-- skipping _default chunk from property name since 2017.02.21 -->
19
				<PROPERTY key="metadataextraction_cache_location" value="/user/${user.name}/iis/cache/metadataextraction"/>
20
				<PROPERTY key="ingest_pmc_cache_location" value="/user/${user.name}/iis/cache/ingestpmc"/>
21
				<PROPERTY key="export_trust_level_threshold_document_referencedDatasets" value="0.0"/>
22
				<PROPERTY key="export_trust_level_threshold_document_referencedProjects" value="0.0"/>
23
				<PROPERTY key="output_remote_location" value="webhdfs://namenode.hadoop.dm.openaire.eu:50071/var/lib/dnet/actionManager_PROD"/>
24
				<PROPERTY key="reports_external_path" value="/user/${user.name}/iis/reports/${execution_environment}"/>
25
				<PROPERTY key="oozieServiceLoc" value="http://iis-cdh5-test-m3:11000/oozie"/>
26
				<PROPERTY key="import_project_stream_endpoint_url" value="http://services.openaire.eu:8980/provision/mvc/openaire/export/streamProjectDetails.do"/>
27
			</STATIC_CONFIGURATION>
28
			<JOB_INTERFACE>
29
				<PARAM description="mdstore service location" name="import_content_object_store_location" required="true"/>
30
				<PARAM description="mdstore service location" name="import_mdstore_service_location" required="true"/>
31
				<PARAM description="mdstore id for dataset records" name="import_dataset_mdstore_ids_csv" required="true"/>
32
				<PARAM description="mdstore id for WoS records" name="import_wos_mdstore_id" required="true"/>
33
				<PARAM description="database service endpoint" name="import_database_service_location" required="true"/>
34
				<PARAM description="objecstore ids subject to dataset reference extraction" name="import_content_datacite_objectstores_csv" required="true"/>
35
				<PARAM description="objecstore ids for WoS plaintext" name="import_content_wos_plaintext_objectstores_csv" required="true"/>
36
				<PARAM description="oozie job application absolute path" name="oozie.wf.application.path" required="true"/>
37
				<PARAM description="target action set for project references" name="export_action_set_id_document_referencedProjects" required="true"/>
38
				<PARAM description="target action set for dataset references" name="export_action_set_id_document_referencedDatasets" required="true"/>
39
				<PARAM description="target action set for WoS entities" name="export_action_set_id_entity_wos" required="true"/>
40
				<PARAM description="target action set for dataset entities" name="export_action_set_id_entity_dataset" required="true"/>
41
			</JOB_INTERFACE>
42
		</HADOOP_JOB>
43
		<STATUS>
44
			<LAST_SUBMISSION_DATE value="2017-07-20T07:13:23+00:00"/>
45
			<RUNNING_INSTANCES value="1"/>
46
			<CUMULATIVE_RUN value="23"/>
47
		</STATUS>
48
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
49
	</BODY>
50
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/brokerEnrichmentJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="ffcc359c-555a-4d86-9ee2-b8ad212a0185_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="brokerEnrichmentJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that calculates the enrichment events based on the publications dedup results</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
		        <!-- I/O FORMAT -->
15
		        <PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/>
16
		        <PROPERTY key="mapreduce.outputformat.class" value="org.elasticsearch.hadoop.mr.EsOutputFormat"/>
17

  
18
		        <!-- MAPPER -->
19
		        <PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.broker.enrich.EnrichmentMapper"/>
20
		        <PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/>
21
		        <PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/>
22

  
23
		        <!-- REDUCER -->
24
		        <PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.broker.enrich.EnrichmentReducer" />
25
		        <PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" />
26
		        <PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" />
27

  
28

  
29
		        <!-- MISC -->
30
		        <PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/>
31
		        <PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
32
		        <PROPERTY key="mapreduce.map.speculative" value="false"/>
33
		        <PROPERTY key="mapreduce.reduce.speculative" value="false"/>
34

  
35
		        <PROPERTY key="dfs.blocksize" value="256M"/>
36
		        <PROPERTY key="mapred.reduce.tasks" value="1"/>
37

  
38
		        <!-- ES -->
39
		        <PROPERTY key="es.nodes"
40
		                  value="ip-90-147-167-137.ct1.garrservices.it:9200,ip-90-147-167-126.ct1.garrservices.it:9200,ip-90-147-167-13.ct1.garrservices.it:9200,ip-90-147-167-125.ct1.garrservices.it:9200"/>
41
		        <PROPERTY key="es.nodes.resolve.hostname" value="false"/>
42
		        <PROPERTY key="es.nodes.wan.only" value="true"/>
43
		        <PROPERTY key="es.resource" value="events/event"/>
44
		        <PROPERTY key="es.input.json" value="yes"/>
45

  
46

  
47
		        <!-- BROKER -->
48
		        <PROPERTY key="broker.datasource.id.whitelist" value=""/>
49
		        <PROPERTY key="broker.datasource.id.blacklist" value=""/>
50
		        <PROPERTY key="broker.datasource.untrusted.oa.list" value="opendoar____::8b6dd7db9af49e67306feb59a8bdc52c"/>
51
		        <PROPERTY key="broker.datasource.type.whitelist" value="pubsrepository::unknown,pubsrepository::institutional,pubsrepository::thematic"/>
52

  
53

  
54
		        <!-- <PROPERTY key="user.name" value="dnet" /> -->
55
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
56
				
57
		<!--  	Uncomment to override the default lib path -->			
58
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
59
        	</STATIC_CONFIGURATION>
60
        	<JOB_INTERFACE>
61
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
62
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
63
        	</JOB_INTERFACE>
64
        	<SCAN>
65
        		<FILTERS operator="MUST_PASS_ALL">
66
        			<FILTER type="prefix" value="50" />
67
        		</FILTERS>
68
        		<FAMILIES>
69
	       			<FAMILY value="result" />
70
        			<FAMILY value="resultResult_dedup_isMergedIn" />
71
        		</FAMILIES>
72
        	</SCAN>
73
        </HADOOP_JOB>
74
        <STATUS>
75
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
76
            <RUNNING_INSTANCES value="0"/>
77
            <CUMULATIVE_RUN value="0" />
78
        </STATUS>
79
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
80
    </BODY>
81
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/calculatePersonDistributionStep1Job.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER
4
                value="f61bc720-2821-4871-937d-64b79f098714_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
        <RESOURCE_URI value=""/>
8
        <DATE_OF_CREATION value="2015-06-15T10:50:29+02:00"/>
9
    </HEADER>
10
    <BODY>
11
        <HADOOP_JOB name="calculatePersonDistributionStep1Job" type="mapreduce">
12
            <DESCRIPTION>map reduce job that perform tests for notificationBroker service (step 1/2)</DESCRIPTION>
13
            <STATIC_CONFIGURATION>
14

  
15
                <!-- I/O FORMAT -->
16
                <PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/>
17
                <PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"/>
18

  
19
                <!-- MAPPER -->
20
                <PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.broker.CalculatePersonDistributionStep1Mapper"/>
21
                <PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/>
22
                <PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text"/>
23

  
24
                <!-- REDUCER -->
25
                <PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.broker.CalculatePersonDistributionStep1Reducer"/>
26
                <PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/>
27
                <PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/>
28

  
29
                <!-- MISC -->
30
                <PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/>
31
                <PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
32
                <PROPERTY key="mapreduce.map.speculative" value="false"/>
33
                <PROPERTY key="mapreduce.reduce.speculative" value="false"/>
34
                <PROPERTY key="mapred.reduce.tasks" value="100"/>
35

  
36
                <!-- <PROPERTY key="user.name" value="dnet" /> -->
37

  
38
                <!--  	Uncomment to override the default lib path -->
39
                <!-- <PROPERTY key="job.lib" value="/user/michele.artini/lib/dnet-mapreduce-jobs-assembly-0.0.6.6-SNAPSHOT.jar"/> -->
40
            </STATIC_CONFIGURATION>
41
            <JOB_INTERFACE>
42
                <PARAM description="source hbase table" name="hbase.mapred.inputtable" required="true"/>
43
                <PARAM description="source hbase table" name="hbase.mapreduce.inputtable" required="true"/>
44
            </JOB_INTERFACE>
45
            <SCAN>
46
                <FILTERS operator="MUST_PASS_ONE">
47
                    <FILTER type="prefix" value="50"/>
48
                    <FILTER type="prefix" value="10"/>
49
                </FILTERS>
50
                <FAMILIES>
51
                    <FAMILY value="result"/>
52
                    <FAMILY value="datasource"/>
53
                </FAMILIES>
54
            </SCAN>
55
        </HADOOP_JOB>
56
        <STATUS>
57
            <LAST_SUBMISSION_DATE value="2015-06-15T11:10:17+02:00"/>
58
            <RUNNING_INSTANCES value="0"/>
59
            <CUMULATIVE_RUN value="7"/>
60
        </STATUS>
61
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
62
    </BODY>
63
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisMainJobV2.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="d730a831-a982-4034-a890-de98fd972e87_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2017-07-05T15:05:50+00:00"/>
8
	</HEADER>
9
	<BODY>
10
		<HADOOP_JOB name="iisMainJobV2" type="oozie">
11
			<DESCRIPTION>IIS main workflow</DESCRIPTION>
12
			<STATIC_CONFIGURATION>
13

  
14
				<!-- Cluster wide -->
15
				<PROPERTY key="user.name" value="dnet.production"/>
16

  
17
				<!-- Runtime -->
18
				<PROPERTY key="metadataextraction_default_cache_location" value="/user/${user.name}/iis/cache/metadataextraction"/>
19
				<!-- skipping _default chunk from property name since 2017.02.21 -->
20
				<PROPERTY key="metadataextraction_cache_location" value="/user/${user.name}/iis/cache/metadataextraction"/>
21
				<PROPERTY key="ingest_pmc_cache_location" value="/user/${user.name}/iis/cache/ingestpmc"/>
22
				<PROPERTY key="software_webcrawl_cache_location" value="/user/${user.name}/iis/cache/webcrawler"/>
23
				<PROPERTY key="export_documentssimilarity_threshold" value="0.7"/>
24
				<PROPERTY key="export_trust_level_threshold_document_classes" value="0.5"/>
25
				<PROPERTY key="export_trust_level_threshold_document_referencedDatasets" value="0.0"/>
26
				<PROPERTY key="export_trust_level_threshold_document_referencedProjects" value="0.0"/>
27
				<PROPERTY key="export_trust_level_threshold_document_pdb" value="0.0"/>
28
				<PROPERTY key="export_trust_level_threshold_document_software_url" value="0.0"/>
29
				<PROPERTY key="export_trust_level_threshold_matched_doc_organizations" value="0.0"/>
30
				<PROPERTY key="output_remote_location" value="webhdfs://namenode.hadoop.dm.openaire.eu:50071/var/lib/dnet/actionManager_PROD"/>
31
				<PROPERTY key="reports_external_path" value="/user/${user.name}/iis/reports/${execution_environment}"/>
32
				<PROPERTY key="import_hbase_dump_location" value="webhdfs://namenode.hadoop.dm.openaire.eu:50071/tmp/db_openaireplus_services.export.2017.07.19"/>
33
				<PROPERTY key="oozieServiceLoc" value="http://iis-cdh5-test-m3:11000/oozie"/>
34
			</STATIC_CONFIGURATION>
35
			<JOB_INTERFACE>
36
				<PARAM description="oozie job application absolute path" name="oozie.wf.application.path" required="true"/>
37
				<PARAM description="objectStore service endpoint" name="import_content_object_store_location" required="true"/>
38
				<PARAM description="csv list of the available object stores subject to processing" name="import_content_objectstores_csv" required="true"/>
39
				<PARAM description="mdstore service location" name="import_mdstore_service_location" required="true"/>
40
				<PARAM description="mdstore ids for dataset records" name="import_dataset_mdstore_ids_csv" required="true"/>
41
				<PARAM description="target action set for referenced datasets" name="export_action_set_id_document_referencedDatasets" required="true"/>
42
				<PARAM description="target action set for referenced projects" name="export_action_set_id_document_referencedProjects" required="true"/>
43
				<PARAM description="target action set for research initiative outcome" name="export_action_set_id_document_research_initiative" required="true"/>
44
				<PARAM description="target action set for document similarities" name="export_action_set_id_document_similarities_standard" required="true"/>
45
				<PARAM description="target action set for document classification" name="export_action_set_id_document_classes" required="true"/>
46
				<PARAM description="target action set for document citations" name="export_action_set_id_document_referencedDocuments" required="true"/>
47
				<PARAM description="target action set for dataset entities" name="export_action_set_id_entity_dataset" required="true"/>
48
				<PARAM description="target action set for pdb entities" name="export_action_set_id_document_pdb" required="true"/>
49
				<PARAM description="software reference extraction actionset identifier" name="export_action_set_id_document_software_url" required="true"/>
50
				<PARAM description="target action set for affiliations" name="export_action_set_id_matched_doc_organizations" required="true"/>
51

  
52

  
53
				<!-- flags to enable/disable IIS modules -->
54
				<PARAM description="enable/disable the citation matching module" name="active_citationmatching" required="true"/>
55
				<PARAM description="enable/disable the document classification module" name="active_documentsclassification" required="true"/>
56
				<PARAM description="enable/disable the document similarity module" name="active_documentssimilarity" required="true"/>
57
				<PARAM description="enable/disable the dataset reference extraction module" name="active_referenceextraction_dataset" required="true"/>
58
				<PARAM description="enable/disable the project reference extracion module" name="active_referenceextraction_project" required="true"/>
59
				<PARAM description="enable/disable the research initiative extraction module" name="active_referenceextraction_researchinitiative" required="true"/>
60
				<PARAM description="enable/disable the protein data bank extraction module" name="active_referenceextraction_pdb" required="true"/>
61
				<PARAM description="enable/disable the software reference extraction algorithm" name="active_referenceextraction_software_url" required="true"/>
62
			</JOB_INTERFACE>
63
		</HADOOP_JOB>
64
		<STATUS>
65
			<LAST_SUBMISSION_DATE value="2017-07-02T09:39:51+00:00"/>
66
			<RUNNING_INSTANCES value="0"/>
67
			<CUMULATIVE_RUN value="13"/>
68
		</STATUS>
69
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
70
	</BODY>
71
</RESOURCE_PROFILE>
72

  
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/personCsvJoinJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="3f544a36-f123-4f5c-acf4-7c25f6591ec4_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
	</HEADER>
9
	<BODY>
10
		<HADOOP_JOB name="personCsvJoinJob" type="mapreduce">
11
			<DESCRIPTION>map reduce job that joins person entities by "surname+first name letter" and serialise the output as csv</DESCRIPTION>
12
			<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat" />
17
				<PROPERTY key="mapreduce.output.lazyoutputformat.outputformat" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" />
18

  
19
				<!-- MAPPER -->
20
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.experiment.JoinPersonGroupMapper" />
21
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
22
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" />
23

  
24
				<!-- REDUCER -->
25
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.experiment.JoinPersonGroupReducer" />
26
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" />
27
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" />
28

  
29
				<!-- MISC -->
30
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />
31
				<PROPERTY key="mapreduce.map.speculative" value="false" />
32
				<PROPERTY key="mapred.reduce.tasks" value="10" />
33

  
34
				<PROPERTY key="hash.values.csv" value="manghip,pmanghi,corog,gcoro" />
35

  
36
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
37

  
38
				<!--  	Uncomment to override the default lib path -->
39
				<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
40

  
41
			</STATIC_CONFIGURATION>
42
			<JOB_INTERFACE>
43
				<PARAM name="mapred.input.dir" required="true" description="input sequence file" />
44
			</JOB_INTERFACE>
45
		</HADOOP_JOB>
46
		<STATUS>
47
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
48
			<RUNNING_INSTANCES value="0"/>
49
			<CUMULATIVE_RUN value="0" />
50
		</STATUS>
51
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
52
	</BODY>
53
</RESOURCE_PROFILE>
54

  
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupCandidateScanJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="675f1436-205a-4b19-8b6b-35e1c17fb125_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupCandidateScanJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that scans a given entity type and creates the similarRel graph</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22
				
23
				<!-- REDUCER -->
24
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupReducer" />
25
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />				
26
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Writable" />				
27
				
28
				<!-- MISC -->				
29
				<PROPERTY key="mapred.compress.map.output" value="true" />	
30
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
31
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
32
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
33
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />				
34
			
35
				<PROPERTY key="mapred.reduce.tasks" value="1000" />
36
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
37
				
38
		<!--  	Uncomment to override the default lib path -->			
39
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
40
        	</STATIC_CONFIGURATION>
41
        	<JOB_INTERFACE>
42
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
43
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
44
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
45
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />
46
        		<PARAM name="dedup.conf" required="true" description="dedup configuration" />
47
        	</JOB_INTERFACE>
48
        	<SCAN>
49
        		<FILTERS operator="MUST_PASS_ALL">
50
        			<FILTER type="prefix" param="entityTypeId" />
51
        		</FILTERS>
52
        		<FAMILIES>
53
        			<FAMILY param="entityType" />
54
           			<FAMILY value="resultResult_dedup_merges" />
55
        			<FAMILY value="personPerson_dedup_merges" />
56
     				<FAMILY value="organizationOrganization_dedup_merges" />        			
57
        		</FAMILIES>
58
        	</SCAN>
59
        </HADOOP_JOB>
60
        <STATUS>
61
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
62
            <RUNNING_INSTANCES value="0"/>
63
            <CUMULATIVE_RUN value="0" />
64
        </STATUS>
65
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
66
    </BODY>
67
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/informationSpaceExportJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="62c1e44c-14b6-4639-9083-29bf432e6759_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="informationSpaceExportJob" type="mapreduce">
11
 			<DESCRIPTION>map only job that exports the whole information space table as json</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13
        	
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" />
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataexport.ExportInformationSpaceMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" />
22
			
23
			
24
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" />				
25
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" />	
26
			
27
				<!-- MISC -->
28

  
29
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
30
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
31
				<PROPERTY key="mapreduce.map.speculative" value="false" />
32
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />	
33

  
34
				<PROPERTY key="dfs.blocksize" value="256M" />
35
				
36
				<PROPERTY key="mapred.compress.map.output" value="true" />	
37
				<PROPERTY key="mapred.output.compress" value="true" />
38
				<PROPERTY key="mapred.output.compression.type" value="BLOCK" />
39
				
40
			
41
				<PROPERTY key="mapred.reduce.tasks" value="0" />
42
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
43
				
44
		<!--  	Uncomment to override the default lib path -->			
45
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
46
        	</STATIC_CONFIGURATION>
47
        	<JOB_INTERFACE>
48
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
49
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
50
        		
51
        		<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" />         		
52
        	</JOB_INTERFACE>
53
        	<SCAN>
54
        		<FILTERS />
55
        		<FAMILIES />
56
        	</SCAN>
57
        </HADOOP_JOB>
58
        <STATUS>
59
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
60
            <RUNNING_INSTANCES value="0"/>
61
            <CUMULATIVE_RUN value="0" />
62
        </STATUS>
63
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
64
    </BODY>
65
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupGrouperJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="6b2d8db3-346f-4ddc-8591-39fd488c1191_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupGrouperJob" type="mapreduce">
11
 			<DESCRIPTION>map only job that closes the similarity mesh</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13
        	
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />		        	
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupGrouperMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put" />
22
				
23
				<!-- MISC -->				
24
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
25
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
26
				<PROPERTY key="mapreduce.map.speculative" value="false" />
27
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />	
28
			
29
				<PROPERTY key="mapred.reduce.tasks" value="0" />
30
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
31
				
32
		<!--  	Uncomment to override the default lib path -->			
33
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
34
        	</STATIC_CONFIGURATION>
35
        	<JOB_INTERFACE>
36
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
37
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
38
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
39
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />     
40
        	</JOB_INTERFACE>
41
        	<SCAN>
42
        		<FILTERS operator="MUST_PASS_ALL">
43
        			<FILTER type="prefix" param="entityTypeId" />
44
        		</FILTERS>
45
        		<FAMILIES>
46
        			<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" />
47
        			<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" />
48
     				<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" />
49
        		</FAMILIES>
50
        	</SCAN>
51
        </HADOOP_JOB>
52
        <STATUS>
53
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
54
            <RUNNING_INSTANCES value="0"/>
55
            <CUMULATIVE_RUN value="0" />
56
        </STATUS>
57
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
58
    </BODY>
59
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/exportIdentifiersJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
				value="d5a7c415-932b-442e-91c2-46f648945ac2_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<HADOOP_JOB name="exportIdentifiersJob" type="mapreduce">
12
			<DESCRIPTION>map only job that exports the publication identifiers as json</DESCRIPTION>
13
			<STATIC_CONFIGURATION>
14

  
15
				<!-- I/O FORMAT -->
16
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/>
17
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat"/>
18

  
19
				<!-- MAPPER -->
20
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataexport.ExportResultIdentifiersMapper"/>
21
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/>
22
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text"/>
23

  
24
				<!-- MISC -->
25
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/>
26
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
27
				<PROPERTY key="mapreduce.map.speculative" value="false"/>
28
				<PROPERTY key="mapreduce.reduce.speculative" value="false"/>
29

  
30
				<PROPERTY key="dfs.blocksize" value="256M"/>
31

  
32
				<PROPERTY key="mapred.reduce.tasks" value="1"/>
33
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
34

  
35
				<!--  	Uncomment to override the default lib path -->
36
				<PROPERTY key="job.lib" value="/lib/dnet/snapshots/dnet-mapreduce-jobs-assembly-0.0.8.5-SNAPSHOT-exportIds.jar"/>
37
			</STATIC_CONFIGURATION>
38
			<JOB_INTERFACE>
39
				<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table"/>
40
				<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table"/>
41

  
42
				<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs"/>
43
			</JOB_INTERFACE>
44
			<SCAN>
45
				<FILTERS operator="MUST_PASS_ALL">
46
					<FILTER type="prefix" value="50"/>
47
				</FILTERS>
48
				<FAMILIES>
49
					<FAMILY value="result"/>
50
				</FAMILIES>
51
			</SCAN>
52
		</HADOOP_JOB>
53
		<STATUS>
54
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
55
			<RUNNING_INSTANCES value="0"/>
56
			<CUMULATIVE_RUN value="0"/>
57
		</STATUS>
58
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
59
	</BODY>
60
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupBuildRootsJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="895ce6a9-4131-4954-b9ed-949ff78f5448_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupBuildRootsJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that build the roots and redirects the rels</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />
17

  
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupBuildRootsMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22

  
23
				<!-- REDUCER -->
24
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupBuildRootsReducer" />
25
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />				
26
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Writable" />				
27
	
28
				<!-- MISC -->				
29
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
30
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
31
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
32
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />				
33
			
34
				<PROPERTY key="mapred.reduce.tasks" value="500" />
35
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
36
	
37
		<!--  	Uncomment to override the default lib path -->			
38
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
39
        	</STATIC_CONFIGURATION>
40
        	<JOB_INTERFACE>
41
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
42
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
43
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
44
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />     
45
        	</JOB_INTERFACE>
46
        	<SCAN caching="10">
47
        		<FILTERS operator="MUST_PASS_ALL">
48
        			<FILTER type="prefix" param="entityTypeId" />
49
        		</FILTERS>
50
        		<FAMILIES/>        		
51
        	</SCAN>
52
        </HADOOP_JOB>
53
        <STATUS>
54
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
55
            <RUNNING_INSTANCES value="0"/>
56
            <CUMULATIVE_RUN value="0" />
57
        </STATUS>
58
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
59
    </BODY>
60
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupSimilarity2ActionsJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="5c4b4dbf-8198-4f7a-9a35-367c7b0a7391_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupSimilarity2ActionsJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that scans a given entity type and creates the similarRel graph</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupSimilarityToActionsMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22
				
23
				<!-- MISC -->				
24
				<PROPERTY key="mapred.compress.map.output" value="true" />	
25
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
26
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
27
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
28
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />				
29
			
30
				<PROPERTY key="mapred.reduce.tasks" value="0" />
31
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
32
				
33
		<!--  	Uncomment to override the default lib path -->			
34
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
35
        	</STATIC_CONFIGURATION>
36
        	<JOB_INTERFACE>
37
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
38
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
39
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
40
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />
41
        		<PARAM name="dedup.conf" required="true" description="dedup configuration" />
42
        		<PARAM name="rawSetId" required="true" description="raw set identifier" />
43
        		<PARAM name="similarityCF" required="true" description="similarity column family name" />      			
44
        	</JOB_INTERFACE>
45
        	<SCAN>
46
        		<FILTERS operator="MUST_PASS_ALL">
47
        			<FILTER type="prefix" param="entityTypeId" />
48
        		</FILTERS>
49
        		<FAMILIES>
50
        			<FAMILY param="entityType" />
51
           			<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" />
52
     				<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" />
53
        		</FAMILIES>
54
        	</SCAN>
55
        </HADOOP_JOB>
56
        <STATUS>
57
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
58
            <RUNNING_INSTANCES value="0"/>
59
            <CUMULATIVE_RUN value="0" />
60
        </STATUS>
61
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
62
    </BODY>
63
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupMinDistGraphJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
				value="de888da6-2d10-4d42-a624-a44d4083414a_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<HADOOP_JOB name="dedupMinDistGraphJob" type="mapreduce">
12
			<DESCRIPTION>map reduce job that finds the minimum vertex in each connected component in the input graph (as adjacency lists)</DESCRIPTION>
13
			<STATIC_CONFIGURATION>
14

  
15
				<!-- I/O FORMAT -->
16
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"/>
17
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"/>
18

  
19
				<!-- MAPPER -->
20
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.MindistSearchMapper"/>
21
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/>
22
				<PROPERTY key="mapred.mapoutput.value.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable"/>
23

  
24
				<!-- REDUCER -->
25
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.MindistSearchReducer"/>
26
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/>
27
				<PROPERTY key="mapred.output.value.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable"/>
28

  
29
				<!-- MISC -->
30
				<PROPERTY key="mapred.compress.map.output" value="false"/>
31
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/>
32
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
33
				<PROPERTY key="mapreduce.map.speculative" value="false"/>
34
				<PROPERTY key="mapreduce.reduce.speculative" value="false"/>
35

  
36
				<PROPERTY key="mapred.reduce.tasks" value="1"/>
37
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
38

  
39
				<!--  	Uncomment to override the default lib path -->
40
				<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
41
			</STATIC_CONFIGURATION>
42
			<JOB_INTERFACE>
43
				<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs"/>
44
				<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs"/>
45
			</JOB_INTERFACE>
46
			<SCAN>
47
				<FILTERS/>
48
				<FAMILIES/>
49
			</SCAN>
50
		</HADOOP_JOB>
51
		<STATUS>
52
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
53
			<RUNNING_INSTANCES value="0"/>
54
			<CUMULATIVE_RUN value="0"/>
55
		</STATUS>
56
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
57
	</BODY>
58
</RESOURCE_PROFILE>
59

  
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/brokerEnrichmentProjectsJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="17dd747e-f5f2-45d5-8554-9f70343bfe55_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="brokerEnrichmentProjectsJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that calculates the enrichment events based on the publications dedup results</DESCRIPTION>
12

  
13
	        <STATIC_CONFIGURATION><!-- I/O FORMAT -->
14
		        <PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/>
15
		        <PROPERTY key="mapreduce.outputformat.class" value="org.elasticsearch.hadoop.mr.EsOutputFormat"/>
16

  
17
		        <!-- MAPPER -->
18
		        <PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.broker.enrich.ProjectEnrichmentMapper"/>
19
		        <PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/>
20
		        <PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/>
21

  
22
		        <!-- REDUCER -->
23
		        <PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.broker.enrich.ProjectEnrichmentReducer"/>
24
		        <PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/>
25
		        <PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/>
26

  
27
		        <!-- MISC -->
28
		        <PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/>
29
		        <PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
30
		        <PROPERTY key="mapreduce.map.speculative" value="false"/>
31
		        <PROPERTY key="mapreduce.reduce.speculative" value="false"/>
32
		        <PROPERTY key="dfs.blocksize" value="256M"/>
33
		        <PROPERTY key="mapred.reduce.tasks" value="4"/>
34

  
35
		        <!-- ES -->
36
		        <PROPERTY key="es.nodes" value="ip-90-147-167-137.ct1.garrservices.it:9200,ip-90-147-167-126.ct1.garrservices.it:9200,ip-90-147-167-13.ct1.garrservices.it:9200,ip-90-147-167-125.ct1.garrservices.it:9200"/>
37
		        <PROPERTY key="es.nodes.resolve.hostname" value="false"/>
38
		        <PROPERTY key="es.batch.write.retry.count " value="10"/>
39
		        <PROPERTY key="es.batch.size.entries " value="500"/>
40
		        <PROPERTY key="es.nodes.wan.only" value="true"/>
41
		        <PROPERTY key="es.resource" value="events/event"/>
42
		        <PROPERTY key="es.input.json" value="yes"/>
43

  
44
		        <!-- BROKER -->
45
		        <PROPERTY key="broker.datasource.id.whitelist" value=""/>
46
		        <PROPERTY key="broker.datasource.id.blacklist" value=""/>
47
		        <PROPERTY key="broker.datasource.untrusted.oa.list" value="opendoar____::8b6dd7db9af49e67306feb59a8bdc52c"/>
48
		        <PROPERTY key="broker.datasource.type.whitelist" value="pubsrepository::unknown,pubsrepository::institutional,pubsrepository::thematic"/><!-- <PROPERTY key="user.name" value="dnet" /> --><!-- <PROPERTY key="user.name" value="dnet" /> --><!--  	Uncomment to override the default lib path -->
49
	        </STATIC_CONFIGURATION>
50
	        <JOB_INTERFACE>
51
		        <PARAM description="source hbase table" name="hbase.mapred.inputtable" required="true"/>
52
		        <PARAM description="source hbase table" name="hbase.mapreduce.inputtable" required="true"/>
53
	        </JOB_INTERFACE>
54
        	<SCAN>
55
        		<FILTERS operator="MUST_PASS_ONE">
56
			        <FILTER type="prefix" value="40" />
57
        			<FILTER type="prefix" value="50" />
58
        		</FILTERS>
59
        		<FAMILIES>
60
	       			<FAMILY value="result" />
61
			        <FAMILY value="project" />
62
        			<FAMILY value="resultResult_dedup_isMergedIn" />
63
			        <FAMILY value="resultProject_outcome_produces" />
64
			        <FAMILY value="resultProject_outcome_isProducedBy" />
65
        		</FAMILIES>
66
        	</SCAN>
67
        </HADOOP_JOB>
68
        <STATUS>
69
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
70
            <RUNNING_INSTANCES value="0"/>
71
            <CUMULATIVE_RUN value="0" />
72
        </STATUS>
73
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
74
    </BODY>
75
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupAnchorStatsJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="2af384c6-9118-426d-9394-d7bbc42d707c_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupAnchorStatsJob" type="mapreduce">
11
 			<DESCRIPTION>map only job that calculate statistics on the person anchors</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.NullOutputFormat" />
17

  
18
        	
19
        		<!-- MAPPER -->
20
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.experiment.AnchorStatsMapper" />
21
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
22
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" />
23
				
24
				<!-- JOB GLOBAL -->		
25
                <PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.NullWritable" />
26
                <PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.NullWritable"/>
27
				
28
				<!-- MISC -->					
29
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
30
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
31
				<PROPERTY key="mapred.reduce.tasks" value="0" />
32
				<!--<PROPERTY key="mapred.fairscheduler.pool" value="solr"/> -->
33
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
34
				
35
		<!--  	Uncomment to override the default lib path -->			
36
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
37
        	</STATIC_CONFIGURATION>
38
        	<JOB_INTERFACE>
39
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
40
	       		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
41
        	</JOB_INTERFACE>
42
        	<SCAN>
43
        		<FILTERS operator="MUST_PASS_ALL">
44
        			<FILTER type="prefix" param="entityTypeId" />
45
        		</FILTERS>
46
        		<FAMILIES>
47
        			<FAMILY param="entityType" />
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff