Revision 52186
Added by Claudio Atzori about 6 years ago
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/deploy.info | ||
---|---|---|
1 |
{"type_source": "SVN", "goal": "package -U -T 4C source:jar", "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-openaireplus-profiles/trunk/", "deploy_repository": "dnet45-snapshots", "version": "4", "mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it", "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots", "name": "dnet-openaireplus-profiles"} |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/brokerJoinProjectPublicationJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="f377cc29-0130-41a7-9edc-7e88230a1b01_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="brokerJoinProjectPublicationJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that enriches the publications with the project metadata, starting from the projects linked to them</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat"/> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.broker.join.ProjectPublicationJoinMapper"/> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/> |
|
22 |
|
|
23 |
<!-- REDUCER --> |
|
24 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.broker.join.ProjectPublicationJoinReducer" /> |
|
25 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
26 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Writable" /> |
|
27 |
|
|
28 |
<!-- MISC --> |
|
29 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
30 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
31 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
32 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
33 |
|
|
34 |
<PROPERTY key="dfs.blocksize" value="256M"/> |
|
35 |
|
|
36 |
<PROPERTY key="mapred.reduce.tasks" value="100"/> |
|
37 |
|
|
38 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
39 |
|
|
40 |
<!-- Uncomment to override the default lib path --> |
|
41 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
42 |
</STATIC_CONFIGURATION> |
|
43 |
<JOB_INTERFACE> |
|
44 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
45 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
46 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
47 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
48 |
</JOB_INTERFACE> |
|
49 |
<SCAN> |
|
50 |
<FILTERS operator="MUST_PASS_ONE"> |
|
51 |
<FILTER type="prefix" value="50" /> |
|
52 |
<FILTER type="prefix" value="40" /> |
|
53 |
</FILTERS> |
|
54 |
<FAMILIES> |
|
55 |
<FAMILY value="result" /> |
|
56 |
<FAMILY value="project" /> |
|
57 |
<FAMILY value="resultProject_outcome_produces" /> |
|
58 |
</FAMILIES> |
|
59 |
</SCAN> |
|
60 |
</HADOOP_JOB> |
|
61 |
<STATUS> |
|
62 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
63 |
<RUNNING_INSTANCES value="0"/> |
|
64 |
<CUMULATIVE_RUN value="0" /> |
|
65 |
</STATUS> |
|
66 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
67 |
</BODY> |
|
68 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisPreprocessingQuickJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="13beed98-81bf-4fbd-ab4f-de071177997c_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="iisPreprocessingQuickJob" type="oozie"> |
|
11 |
<DESCRIPTION>IIS preprocessing</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
<!-- Cluster wide --> |
|
14 |
<PROPERTY key="queueName" value="default"/> |
|
15 |
<PROPERTY key="user.name" value="dnet" /> |
|
16 |
|
|
17 |
<!-- Runtime --> |
|
18 |
<PROPERTY key="workingDir" value="/tmp/integration/working_dir/preprocessing_quick_test"/> |
|
19 |
<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/> |
|
20 |
<PROPERTY key="input_referenceextraction_project" value="/user/marek.horst/share/referenceextraction/document_projects/2014-04-11"/> |
|
21 |
<PROPERTY key="input_referenceextraction_dataset" value="/user/marek.horst/share/referenceextraction/document_datasets/all/2014-04-11"/> |
|
22 |
<PROPERTY key="export_action_hbase_table_initialize" value="false"/> |
|
23 |
</STATIC_CONFIGURATION> |
|
24 |
<JOB_INTERFACE> |
|
25 |
<PARAM name="import_content_object_store_location" required="true" description="mdstore service location" /> |
|
26 |
<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location" /> |
|
27 |
<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore id for dataset records" /> |
|
28 |
<PARAM name="import_wos_mdstore_id" required="true" description="mdstore id for WoS records" /> |
|
29 |
<PARAM name="import_database_service_location" required="true" description="database service endpoint" /> |
|
30 |
<PARAM name="import_content_datacite_objectstores_csv" required="true" description="objecstore ids subject to dataset reference extraction" /> |
|
31 |
<PARAM name="import_content_wos_plaintext_objectstores_csv" required="true" description="objecstore ids for WoS plaintext" /> |
|
32 |
<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table" /> |
|
33 |
<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum" /> |
|
34 |
<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port" /> |
|
35 |
<PARAM name="nameNode" required="true" description="hdfs name node" /> |
|
36 |
<PARAM name="jobTracker" required="true" description="job tracker name" /> |
|
37 |
<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path" /> |
|
38 |
<PARAM name="export_action_set_id_document_referencedProjects" required="true" description="target action set for project references" /> |
|
39 |
<PARAM name="export_action_set_id_document_referencedDatasets" required="true" description="target action set for dataset references" /> |
|
40 |
<PARAM name="export_action_set_id_entity_wos" required="true" description="target action set for WoS entities" /> |
|
41 |
<PARAM name="export_action_set_id_entity_dataset" required="true" description="target action set for dataset entities" /> |
|
42 |
</JOB_INTERFACE> |
|
43 |
</HADOOP_JOB> |
|
44 |
<STATUS> |
|
45 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
46 |
<RUNNING_INSTANCES value="0"/> |
|
47 |
<CUMULATIVE_RUN value="0" /> |
|
48 |
</STATUS> |
|
49 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
50 |
</BODY> |
|
51 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/offlineHbaseLoadJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="073e55eb-c6f4-49a9-80b3-1a927612ba5b_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="offlineHbaseLoad" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that loads a given entity type in the offline dedup table</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.OfflineHbaseLoadMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
22 |
|
|
23 |
<!-- MISC --> |
|
24 |
<PROPERTY key="mapred.compress.map.output" value="true" /> |
|
25 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
26 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
27 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
28 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
29 |
|
|
30 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
31 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
32 |
|
|
33 |
<!-- Uncomment to override the default lib path --> |
|
34 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
35 |
</STATIC_CONFIGURATION> |
|
36 |
<JOB_INTERFACE> |
|
37 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
38 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
39 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
40 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
41 |
</JOB_INTERFACE> |
|
42 |
<SCAN> |
|
43 |
<FILTERS operator="MUST_PASS_ALL"> |
|
44 |
<FILTER type="prefix" param="entityTypeId" /> |
|
45 |
</FILTERS> |
|
46 |
<FAMILIES> |
|
47 |
<FAMILY param="entityType" /> |
|
48 |
</FAMILIES> |
|
49 |
</SCAN> |
|
50 |
</HADOOP_JOB> |
|
51 |
<STATUS> |
|
52 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
53 |
<RUNNING_INSTANCES value="0"/> |
|
54 |
<CUMULATIVE_RUN value="0" /> |
|
55 |
</STATUS> |
|
56 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
57 |
</BODY> |
|
58 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisPreprocessingJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="732656e3-5ac6-4344-9d1f-f5c805f53a06_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="iisPreprocessingJob" type="oozie"> |
|
12 |
<DESCRIPTION>IIS preprocessing</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
<!-- Cluster wide --> |
|
15 |
<PROPERTY key="queueName" value="default"/> |
|
16 |
<PROPERTY key="user.name" value="dnet.beta"/> |
|
17 |
|
|
18 |
<!-- Runtime --> |
|
19 |
<PROPERTY key="workingDir" value="/tmp/integration/working_dir/preprocessing"/> |
|
20 |
<PROPERTY key="oozie.wf.application.path" value="/tmp/integration/apps/preprocessing"/> |
|
21 |
<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/> |
|
22 |
<PROPERTY key="export_action_hbase_table_initialize" value="false"/> |
|
23 |
<!-- <PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction"/> --> |
|
24 |
<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction_updated_model"/> |
|
25 |
<PROPERTY key="metadataextraction_excluded_checksums" |
|
26 |
value="1e5b574109da731f4918c7f91fc24864,bea4728578070c3d66774bf9454d41fe,da458477233b5561ae47042aa2a73086"/> |
|
27 |
<PROPERTY key="export_trust_level_threshold_document_referencedDatasets" value="0.0"/> |
|
28 |
<PROPERTY key="export_trust_level_threshold_document_referencedProjects" value="0.0"/> |
|
29 |
<PROPERTY key="output_remote_location" value="hdfs://namenode.hadoop.dm.openaire.eu/var/lib/dnet/actionManager_BETA"/> |
|
30 |
</STATIC_CONFIGURATION> |
|
31 |
<JOB_INTERFACE> |
|
32 |
<PARAM name="import_content_object_store_location" required="true" description="mdstore service location"/> |
|
33 |
<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location"/> |
|
34 |
<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore id for dataset records"/> |
|
35 |
<PARAM name="import_wos_mdstore_id" required="true" description="mdstore id for WoS records"/> |
|
36 |
<PARAM name="import_database_service_location" required="true" description="database service endpoint"/> |
|
37 |
<PARAM name="import_content_datacite_objectstores_csv" required="true" description="objecstore ids subject to dataset reference extraction"/> |
|
38 |
<PARAM name="import_content_wos_plaintext_objectstores_csv" required="true" description="objecstore ids for WoS plaintext"/> |
|
39 |
<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table"/> |
|
40 |
<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum"/> |
|
41 |
<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port"/> |
|
42 |
<PARAM name="nameNode" required="true" description="hdfs name node"/> |
|
43 |
<PARAM name="jobTracker" required="true" description="job tracker name"/> |
|
44 |
<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path"/> |
|
45 |
<PARAM name="export_action_set_id_document_referencedProjects" required="true" description="target action set for project references"/> |
|
46 |
<PARAM name="export_action_set_id_document_referencedDatasets" required="true" description="target action set for dataset references"/> |
|
47 |
<PARAM name="export_action_set_id_entity_wos" required="true" description="target action set for WoS entities"/> |
|
48 |
<PARAM name="export_action_set_id_entity_dataset" required="true" description="target action set for dataset entities"/> |
|
49 |
</JOB_INTERFACE> |
|
50 |
</HADOOP_JOB> |
|
51 |
<STATUS> |
|
52 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
53 |
<RUNNING_INSTANCES value="0"/> |
|
54 |
<CUMULATIVE_RUN value="0"/> |
|
55 |
</STATUS> |
|
56 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
57 |
</BODY> |
|
58 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupFindRootsJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="61f9270e-ffc1-4095-9f76-3852e4d227fb_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="dedupFindRootsJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that find the root of a similarity group</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupFindRootsMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put" /> |
|
22 |
|
|
23 |
<!-- MISC --> |
|
24 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
25 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
26 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
27 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
28 |
|
|
29 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
30 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
31 |
|
|
32 |
<!-- Uncomment to override the default lib path --> |
|
33 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
34 |
</STATIC_CONFIGURATION> |
|
35 |
<JOB_INTERFACE> |
|
36 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
37 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
38 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
39 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
40 |
</JOB_INTERFACE> |
|
41 |
<SCAN> |
|
42 |
<FILTERS operator="MUST_PASS_ALL"> |
|
43 |
<FILTER type="prefix" param="entityTypeId" /> |
|
44 |
</FILTERS> |
|
45 |
<FAMILIES> |
|
46 |
<FAMILY param="entityType" /> |
|
47 |
<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" /> |
|
48 |
<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" /> |
|
49 |
<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" /> |
|
50 |
</FAMILIES> |
|
51 |
</SCAN> |
|
52 |
</HADOOP_JOB> |
|
53 |
<STATUS> |
|
54 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
55 |
<RUNNING_INSTANCES value="0"/> |
|
56 |
<CUMULATIVE_RUN value="0" /> |
|
57 |
</STATUS> |
|
58 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
59 |
</BODY> |
|
60 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisMainJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="f6e4cbcd-b173-4f8d-9205-e64ba15f03ad_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="iisMainJob" type="oozie"> |
|
12 |
<DESCRIPTION>IIS main workflow</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
|
|
15 |
<!-- Cluster wide --> |
|
16 |
<PROPERTY key="queueName" value="default"/> |
|
17 |
<PROPERTY key="user.name" value="dnet.beta"/> |
|
18 |
|
|
19 |
<!-- Runtime --> |
|
20 |
<PROPERTY key="match_content_with_metadata" value="true"/> |
|
21 |
<PROPERTY key="import_hbase_approved_datasources_csv" value="$UNDEFINED$"/> |
|
22 |
<PROPERTY key="import_hbase_input_table" value="information_space-2014-11-05"/> |
|
23 |
<!--<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction"/> --> |
|
24 |
<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction_updated_model"/> |
|
25 |
|
|
26 |
<PROPERTY key="export_action_hbase_table_initialize" value="true"/> |
|
27 |
<PROPERTY key="import_content_connection_timeout" value="180000"/> |
|
28 |
<PROPERTY key="import_content_read_timeout" value="180000"/> |
|
29 |
<PROPERTY key="import_resultset_client_read_timeout" value="180000"/> |
|
30 |
|
|
31 |
<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/> |
|
32 |
<PROPERTY key="workingDir" value="/tmp/integration/working_dir/main"/> |
|
33 |
<PROPERTY key="metadataextraction_excluded_checksums" |
|
34 |
value="1e5b574109da731f4918c7f91fc24864,bea4728578070c3d66774bf9454d41fe,da458477233b5561ae47042aa2a73086"/> |
|
35 |
<PROPERTY key="export_documentssimilarity_threshold" value="0.7"/> |
|
36 |
<PROPERTY key="export_trust_level_threshold_document_classes" value="0.5"/> |
|
37 |
<PROPERTY key="export_trust_level_threshold_document_referencedDatasets" value="0.0"/> |
|
38 |
<PROPERTY key="export_trust_level_threshold_document_referencedProjects" value="0.0"/> |
|
39 |
<PROPERTY key="export_trust_level_threshold_document_pdb" value="0.0"/> |
|
40 |
<PROPERTY key="output_remote_location" value="hdfs://namenode.hadoop.dm.openaire.eu/var/lib/dnet/actionManager_BETA"/> |
|
41 |
</STATIC_CONFIGURATION> |
|
42 |
<JOB_INTERFACE> |
|
43 |
<PARAM name="import_content_object_store_location" required="true" description="objectStore service endpoint"/> |
|
44 |
<PARAM name="import_content_objectstores_csv" required="true" description="csv list of the available object stores subject to processing"/> |
|
45 |
<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location"/> |
|
46 |
<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore ids for dataset records"/> |
|
47 |
<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table"/> |
|
48 |
<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum"/> |
|
49 |
<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port"/> |
|
50 |
<PARAM name="nameNode" required="true" description="hdfs name node"/> |
|
51 |
<PARAM name="jobTracker" required="true" description="job tracker name"/> |
|
52 |
<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path"/> |
|
53 |
<PARAM description="target action set for referenced projects" name="export_action_set_id_document_referencedProjects" required="true"/> |
|
54 |
<PARAM description="target action set for referenced datasets" name="export_action_set_id_document_referencedDatasets" required="true"/> |
|
55 |
<PARAM description="target action set for research initiative outcome" name="export_action_set_id_document_research_initiative" |
|
56 |
required="true"/> |
|
57 |
<PARAM description="target action set for document similarities" name="export_action_set_id_document_similarities_standard" required="true"/> |
|
58 |
<PARAM description="target action set for document statistics" name="export_action_set_id_document_statistics" required="true"/> |
|
59 |
<PARAM description="target action set for document classification" name="export_action_set_id_document_classes" required="true"/> |
|
60 |
<PARAM description="target action set for document citations" name="export_action_set_id_document_referencedDocuments" required="true"/> |
|
61 |
<PARAM description="target action set for dataset entities" name="export_action_set_id_entity_dataset" required="true"/> |
|
62 |
<PARAM description="target action set for pdb entities" name="export_action_set_id_document_pdb" required="true"/> |
|
63 |
|
|
64 |
<!-- flags to enable/disable IIS modules --> |
|
65 |
<PARAM name="active_metadataextraction_export" required="true" description="enable/disable the affiliation matching module"/> |
|
66 |
<PARAM name="active_citationmatching" required="true" description="enable/disable the citation matching module"/> |
|
67 |
<PARAM name="active_documentsclassification" required="true" description="enable/disable the document classification module"/> |
|
68 |
<PARAM name="active_documentssimilarity" required="true" description="enable/disable the document similarity module"/> |
|
69 |
<PARAM name="active_referenceextraction_dataset" required="true" description="enable/disable the dataset reference extraction module"/> |
|
70 |
<PARAM name="active_referenceextraction_project" required="true" description="enable/disable the project reference extracion module"/> |
|
71 |
<PARAM name="active_referenceextraction_researchinitiative" required="true" |
|
72 |
description="enable/disable the research initiative extraction module"/> |
|
73 |
<PARAM name="active_statistics" required="true" description="enable/disable the statistics module"/> |
|
74 |
<PARAM name="active_referenceextraction_pdb" required="true" description="enable/disable the protein data bank extraction module"/> |
|
75 |
|
|
76 |
</JOB_INTERFACE> |
|
77 |
</HADOOP_JOB> |
|
78 |
<STATUS> |
|
79 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
80 |
<RUNNING_INSTANCES value="0"/> |
|
81 |
<CUMULATIVE_RUN value="0"/> |
|
82 |
</STATUS> |
|
83 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
84 |
</BODY> |
|
85 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupDeleteDedupRelsJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="5626c94e-0005-416a-9ea4-48fc8af85ecd_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="deleteDedupRelsJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that deletes the dedup rels used to in the deduplication process</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupDeleteRelMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.Writable" /> |
|
22 |
|
|
23 |
<!-- MISC --> |
|
24 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
25 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
26 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
27 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
28 |
|
|
29 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
30 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
31 |
|
|
32 |
<!-- Uncomment to override the default lib path --> |
|
33 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
34 |
</STATIC_CONFIGURATION> |
|
35 |
<JOB_INTERFACE> |
|
36 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
37 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
38 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
39 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
40 |
</JOB_INTERFACE> |
|
41 |
<SCAN> |
|
42 |
<FILTERS operator="MUST_PASS_ALL"> |
|
43 |
<FILTER type="prefix" param="entityTypeId" /> |
|
44 |
</FILTERS> |
|
45 |
<FAMILIES> |
|
46 |
<FAMILY param="entityType" /> |
|
47 |
<FAMILY value="resultResult_dedup_merges" /> |
|
48 |
<FAMILY value="resultResult_dedup_isMergedIn" /> |
|
49 |
<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" /> |
|
50 |
|
|
51 |
<FAMILY value="personPerson_dedup_merges" /> |
|
52 |
<FAMILY value="personPerson_dedup_isMergedIn" /> |
|
53 |
<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" /> |
|
54 |
|
|
55 |
<FAMILY value="organizationOrganization_dedup_merges" /> |
|
56 |
<FAMILY value="organizationOrganization_dedup_isMergedIn" /> |
|
57 |
<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" /> |
|
58 |
</FAMILIES> |
|
59 |
</SCAN> |
|
60 |
</HADOOP_JOB> |
|
61 |
<STATUS> |
|
62 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
63 |
<RUNNING_INSTANCES value="0"/> |
|
64 |
<CUMULATIVE_RUN value="0" /> |
|
65 |
</STATUS> |
|
66 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
67 |
</BODY> |
|
68 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupMarkDeletedEntityJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="667fe203-ee51-4dff-8c9c-b90e66e96eb4_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="dedupMarkDeletedEntityJob" type="mapreduce"> |
|
12 |
<DESCRIPTION>map only job that closes the similarity mesh</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
|
|
15 |
<!-- I/O FORMAT --> |
|
16 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/> |
|
17 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat"/> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupMarkDeletedEntityMapper"/> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put"/> |
|
23 |
|
|
24 |
<!-- MISC --> |
|
25 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
26 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
27 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
28 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
29 |
|
|
30 |
<PROPERTY key="mapred.reduce.tasks" value="0"/> |
|
31 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
32 |
|
|
33 |
<!-- Uncomment to override the default lib path --> |
|
34 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
35 |
</STATIC_CONFIGURATION> |
|
36 |
<JOB_INTERFACE> |
|
37 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table"/> |
|
38 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table"/> |
|
39 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table"/> |
|
40 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table"/> |
|
41 |
</JOB_INTERFACE> |
|
42 |
<SCAN> |
|
43 |
<FILTERS operator="MUST_PASS_ALL"> |
|
44 |
<FILTER type="prefix" param="entityTypeId"/> |
|
45 |
</FILTERS> |
|
46 |
<FAMILIES> |
|
47 |
<FAMILY param="entityType"/> |
|
48 |
<FAMILY value="resultResult_dedup_isMergedIn"/> |
|
49 |
<FAMILY value="organizationOrganization_dedup_isMergedIn"/> |
|
50 |
</FAMILIES> |
|
51 |
</SCAN> |
|
52 |
</HADOOP_JOB> |
|
53 |
<STATUS> |
|
54 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
55 |
<RUNNING_INSTANCES value="0"/> |
|
56 |
<CUMULATIVE_RUN value="0"/> |
|
57 |
</STATUS> |
|
58 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
59 |
</BODY> |
|
60 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisPreprocessingJobV2.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="51e1660e-b1fe-4389-96b5-11a522b78c15_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2016-10-15T12:03:30+00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="iisPreprocessingJobV2" type="oozie"> |
|
11 |
<DESCRIPTION>IIS preprocessing</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
<!-- Cluster wide --> |
|
14 |
<PROPERTY key="user.name" value="dnet.production"/> |
|
15 |
|
|
16 |
<!-- Runtime --> |
|
17 |
<PROPERTY key="metadataextraction_default_cache_location" value="/user/${user.name}/iis/cache/metadataextraction"/> |
|
18 |
<!-- skipping _default chunk from property name since 2017.02.21 --> |
|
19 |
<PROPERTY key="metadataextraction_cache_location" value="/user/${user.name}/iis/cache/metadataextraction"/> |
|
20 |
<PROPERTY key="ingest_pmc_cache_location" value="/user/${user.name}/iis/cache/ingestpmc"/> |
|
21 |
<PROPERTY key="export_trust_level_threshold_document_referencedDatasets" value="0.0"/> |
|
22 |
<PROPERTY key="export_trust_level_threshold_document_referencedProjects" value="0.0"/> |
|
23 |
<PROPERTY key="output_remote_location" value="webhdfs://namenode.hadoop.dm.openaire.eu:50071/var/lib/dnet/actionManager_PROD"/> |
|
24 |
<PROPERTY key="reports_external_path" value="/user/${user.name}/iis/reports/${execution_environment}"/> |
|
25 |
<PROPERTY key="oozieServiceLoc" value="http://iis-cdh5-test-m3:11000/oozie"/> |
|
26 |
<PROPERTY key="import_project_stream_endpoint_url" value="http://services.openaire.eu:8980/provision/mvc/openaire/export/streamProjectDetails.do"/> |
|
27 |
</STATIC_CONFIGURATION> |
|
28 |
<JOB_INTERFACE> |
|
29 |
<PARAM description="mdstore service location" name="import_content_object_store_location" required="true"/> |
|
30 |
<PARAM description="mdstore service location" name="import_mdstore_service_location" required="true"/> |
|
31 |
<PARAM description="mdstore id for dataset records" name="import_dataset_mdstore_ids_csv" required="true"/> |
|
32 |
<PARAM description="mdstore id for WoS records" name="import_wos_mdstore_id" required="true"/> |
|
33 |
<PARAM description="database service endpoint" name="import_database_service_location" required="true"/> |
|
34 |
<PARAM description="objecstore ids subject to dataset reference extraction" name="import_content_datacite_objectstores_csv" required="true"/> |
|
35 |
<PARAM description="objecstore ids for WoS plaintext" name="import_content_wos_plaintext_objectstores_csv" required="true"/> |
|
36 |
<PARAM description="oozie job application absolute path" name="oozie.wf.application.path" required="true"/> |
|
37 |
<PARAM description="target action set for project references" name="export_action_set_id_document_referencedProjects" required="true"/> |
|
38 |
<PARAM description="target action set for dataset references" name="export_action_set_id_document_referencedDatasets" required="true"/> |
|
39 |
<PARAM description="target action set for WoS entities" name="export_action_set_id_entity_wos" required="true"/> |
|
40 |
<PARAM description="target action set for dataset entities" name="export_action_set_id_entity_dataset" required="true"/> |
|
41 |
</JOB_INTERFACE> |
|
42 |
</HADOOP_JOB> |
|
43 |
<STATUS> |
|
44 |
<LAST_SUBMISSION_DATE value="2017-07-20T07:13:23+00:00"/> |
|
45 |
<RUNNING_INSTANCES value="1"/> |
|
46 |
<CUMULATIVE_RUN value="23"/> |
|
47 |
</STATUS> |
|
48 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
49 |
</BODY> |
|
50 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/brokerEnrichmentJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="ffcc359c-555a-4d86-9ee2-b8ad212a0185_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="brokerEnrichmentJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that calculates the enrichment events based on the publications dedup results</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.elasticsearch.hadoop.mr.EsOutputFormat"/> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.broker.enrich.EnrichmentMapper"/> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/> |
|
22 |
|
|
23 |
<!-- REDUCER --> |
|
24 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.broker.enrich.EnrichmentReducer" /> |
|
25 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" /> |
|
26 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" /> |
|
27 |
|
|
28 |
|
|
29 |
<!-- MISC --> |
|
30 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
31 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
32 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
33 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
34 |
|
|
35 |
<PROPERTY key="dfs.blocksize" value="256M"/> |
|
36 |
<PROPERTY key="mapred.reduce.tasks" value="1"/> |
|
37 |
|
|
38 |
<!-- ES --> |
|
39 |
<PROPERTY key="es.nodes" |
|
40 |
value="ip-90-147-167-137.ct1.garrservices.it:9200,ip-90-147-167-126.ct1.garrservices.it:9200,ip-90-147-167-13.ct1.garrservices.it:9200,ip-90-147-167-125.ct1.garrservices.it:9200"/> |
|
41 |
<PROPERTY key="es.nodes.resolve.hostname" value="false"/> |
|
42 |
<PROPERTY key="es.nodes.wan.only" value="true"/> |
|
43 |
<PROPERTY key="es.resource" value="events/event"/> |
|
44 |
<PROPERTY key="es.input.json" value="yes"/> |
|
45 |
|
|
46 |
|
|
47 |
<!-- BROKER --> |
|
48 |
<PROPERTY key="broker.datasource.id.whitelist" value=""/> |
|
49 |
<PROPERTY key="broker.datasource.id.blacklist" value=""/> |
|
50 |
<PROPERTY key="broker.datasource.untrusted.oa.list" value="opendoar____::8b6dd7db9af49e67306feb59a8bdc52c"/> |
|
51 |
<PROPERTY key="broker.datasource.type.whitelist" value="pubsrepository::unknown,pubsrepository::institutional,pubsrepository::thematic"/> |
|
52 |
|
|
53 |
|
|
54 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
55 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
56 |
|
|
57 |
<!-- Uncomment to override the default lib path --> |
|
58 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
59 |
</STATIC_CONFIGURATION> |
|
60 |
<JOB_INTERFACE> |
|
61 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
62 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
63 |
</JOB_INTERFACE> |
|
64 |
<SCAN> |
|
65 |
<FILTERS operator="MUST_PASS_ALL"> |
|
66 |
<FILTER type="prefix" value="50" /> |
|
67 |
</FILTERS> |
|
68 |
<FAMILIES> |
|
69 |
<FAMILY value="result" /> |
|
70 |
<FAMILY value="resultResult_dedup_isMergedIn" /> |
|
71 |
</FAMILIES> |
|
72 |
</SCAN> |
|
73 |
</HADOOP_JOB> |
|
74 |
<STATUS> |
|
75 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
76 |
<RUNNING_INSTANCES value="0"/> |
|
77 |
<CUMULATIVE_RUN value="0" /> |
|
78 |
</STATUS> |
|
79 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
80 |
</BODY> |
|
81 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/calculatePersonDistributionStep1Job.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="f61bc720-2821-4871-937d-64b79f098714_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2015-06-15T10:50:29+02:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="calculatePersonDistributionStep1Job" type="mapreduce"> |
|
12 |
<DESCRIPTION>map reduce job that perform tests for notificationBroker service (step 1/2)</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
|
|
15 |
<!-- I/O FORMAT --> |
|
16 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/> |
|
17 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"/> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.broker.CalculatePersonDistributionStep1Mapper"/> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text"/> |
|
23 |
|
|
24 |
<!-- REDUCER --> |
|
25 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.broker.CalculatePersonDistributionStep1Reducer"/> |
|
26 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/> |
|
27 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/> |
|
28 |
|
|
29 |
<!-- MISC --> |
|
30 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
31 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
32 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
33 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
34 |
<PROPERTY key="mapred.reduce.tasks" value="100"/> |
|
35 |
|
|
36 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
37 |
|
|
38 |
<!-- Uncomment to override the default lib path --> |
|
39 |
<!-- <PROPERTY key="job.lib" value="/user/michele.artini/lib/dnet-mapreduce-jobs-assembly-0.0.6.6-SNAPSHOT.jar"/> --> |
|
40 |
</STATIC_CONFIGURATION> |
|
41 |
<JOB_INTERFACE> |
|
42 |
<PARAM description="source hbase table" name="hbase.mapred.inputtable" required="true"/> |
|
43 |
<PARAM description="source hbase table" name="hbase.mapreduce.inputtable" required="true"/> |
|
44 |
</JOB_INTERFACE> |
|
45 |
<SCAN> |
|
46 |
<FILTERS operator="MUST_PASS_ONE"> |
|
47 |
<FILTER type="prefix" value="50"/> |
|
48 |
<FILTER type="prefix" value="10"/> |
|
49 |
</FILTERS> |
|
50 |
<FAMILIES> |
|
51 |
<FAMILY value="result"/> |
|
52 |
<FAMILY value="datasource"/> |
|
53 |
</FAMILIES> |
|
54 |
</SCAN> |
|
55 |
</HADOOP_JOB> |
|
56 |
<STATUS> |
|
57 |
<LAST_SUBMISSION_DATE value="2015-06-15T11:10:17+02:00"/> |
|
58 |
<RUNNING_INSTANCES value="0"/> |
|
59 |
<CUMULATIVE_RUN value="7"/> |
|
60 |
</STATUS> |
|
61 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
62 |
</BODY> |
|
63 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisMainJobV2.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="d730a831-a982-4034-a890-de98fd972e87_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2017-07-05T15:05:50+00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="iisMainJobV2" type="oozie"> |
|
11 |
<DESCRIPTION>IIS main workflow</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- Cluster wide --> |
|
15 |
<PROPERTY key="user.name" value="dnet.production"/> |
|
16 |
|
|
17 |
<!-- Runtime --> |
|
18 |
<PROPERTY key="metadataextraction_default_cache_location" value="/user/${user.name}/iis/cache/metadataextraction"/> |
|
19 |
<!-- skipping _default chunk from property name since 2017.02.21 --> |
|
20 |
<PROPERTY key="metadataextraction_cache_location" value="/user/${user.name}/iis/cache/metadataextraction"/> |
|
21 |
<PROPERTY key="ingest_pmc_cache_location" value="/user/${user.name}/iis/cache/ingestpmc"/> |
|
22 |
<PROPERTY key="software_webcrawl_cache_location" value="/user/${user.name}/iis/cache/webcrawler"/> |
|
23 |
<PROPERTY key="export_documentssimilarity_threshold" value="0.7"/> |
|
24 |
<PROPERTY key="export_trust_level_threshold_document_classes" value="0.5"/> |
|
25 |
<PROPERTY key="export_trust_level_threshold_document_referencedDatasets" value="0.0"/> |
|
26 |
<PROPERTY key="export_trust_level_threshold_document_referencedProjects" value="0.0"/> |
|
27 |
<PROPERTY key="export_trust_level_threshold_document_pdb" value="0.0"/> |
|
28 |
<PROPERTY key="export_trust_level_threshold_document_software_url" value="0.0"/> |
|
29 |
<PROPERTY key="export_trust_level_threshold_matched_doc_organizations" value="0.0"/> |
|
30 |
<PROPERTY key="output_remote_location" value="webhdfs://namenode.hadoop.dm.openaire.eu:50071/var/lib/dnet/actionManager_PROD"/> |
|
31 |
<PROPERTY key="reports_external_path" value="/user/${user.name}/iis/reports/${execution_environment}"/> |
|
32 |
<PROPERTY key="import_hbase_dump_location" value="webhdfs://namenode.hadoop.dm.openaire.eu:50071/tmp/db_openaireplus_services.export.2017.07.19"/> |
|
33 |
<PROPERTY key="oozieServiceLoc" value="http://iis-cdh5-test-m3:11000/oozie"/> |
|
34 |
</STATIC_CONFIGURATION> |
|
35 |
<JOB_INTERFACE> |
|
36 |
<PARAM description="oozie job application absolute path" name="oozie.wf.application.path" required="true"/> |
|
37 |
<PARAM description="objectStore service endpoint" name="import_content_object_store_location" required="true"/> |
|
38 |
<PARAM description="csv list of the available object stores subject to processing" name="import_content_objectstores_csv" required="true"/> |
|
39 |
<PARAM description="mdstore service location" name="import_mdstore_service_location" required="true"/> |
|
40 |
<PARAM description="mdstore ids for dataset records" name="import_dataset_mdstore_ids_csv" required="true"/> |
|
41 |
<PARAM description="target action set for referenced datasets" name="export_action_set_id_document_referencedDatasets" required="true"/> |
|
42 |
<PARAM description="target action set for referenced projects" name="export_action_set_id_document_referencedProjects" required="true"/> |
|
43 |
<PARAM description="target action set for research initiative outcome" name="export_action_set_id_document_research_initiative" required="true"/> |
|
44 |
<PARAM description="target action set for document similarities" name="export_action_set_id_document_similarities_standard" required="true"/> |
|
45 |
<PARAM description="target action set for document classification" name="export_action_set_id_document_classes" required="true"/> |
|
46 |
<PARAM description="target action set for document citations" name="export_action_set_id_document_referencedDocuments" required="true"/> |
|
47 |
<PARAM description="target action set for dataset entities" name="export_action_set_id_entity_dataset" required="true"/> |
|
48 |
<PARAM description="target action set for pdb entities" name="export_action_set_id_document_pdb" required="true"/> |
|
49 |
<PARAM description="software reference extraction actionset identifier" name="export_action_set_id_document_software_url" required="true"/> |
|
50 |
<PARAM description="target action set for affiliations" name="export_action_set_id_matched_doc_organizations" required="true"/> |
|
51 |
|
|
52 |
|
|
53 |
<!-- flags to enable/disable IIS modules --> |
|
54 |
<PARAM description="enable/disable the citation matching module" name="active_citationmatching" required="true"/> |
|
55 |
<PARAM description="enable/disable the document classification module" name="active_documentsclassification" required="true"/> |
|
56 |
<PARAM description="enable/disable the document similarity module" name="active_documentssimilarity" required="true"/> |
|
57 |
<PARAM description="enable/disable the dataset reference extraction module" name="active_referenceextraction_dataset" required="true"/> |
|
58 |
<PARAM description="enable/disable the project reference extracion module" name="active_referenceextraction_project" required="true"/> |
|
59 |
<PARAM description="enable/disable the research initiative extraction module" name="active_referenceextraction_researchinitiative" required="true"/> |
|
60 |
<PARAM description="enable/disable the protein data bank extraction module" name="active_referenceextraction_pdb" required="true"/> |
|
61 |
<PARAM description="enable/disable the software reference extraction algorithm" name="active_referenceextraction_software_url" required="true"/> |
|
62 |
</JOB_INTERFACE> |
|
63 |
</HADOOP_JOB> |
|
64 |
<STATUS> |
|
65 |
<LAST_SUBMISSION_DATE value="2017-07-02T09:39:51+00:00"/> |
|
66 |
<RUNNING_INSTANCES value="0"/> |
|
67 |
<CUMULATIVE_RUN value="13"/> |
|
68 |
</STATUS> |
|
69 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
70 |
</BODY> |
|
71 |
</RESOURCE_PROFILE> |
|
72 |
|
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/personCsvJoinJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="3f544a36-f123-4f5c-acf4-7c25f6591ec4_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="personCsvJoinJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that joins person entities by "surname+first name letter" and serialise the output as csv</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat" /> |
|
17 |
<PROPERTY key="mapreduce.output.lazyoutputformat.outputformat" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" /> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.experiment.JoinPersonGroupMapper" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" /> |
|
23 |
|
|
24 |
<!-- REDUCER --> |
|
25 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.experiment.JoinPersonGroupReducer" /> |
|
26 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" /> |
|
27 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" /> |
|
28 |
|
|
29 |
<!-- MISC --> |
|
30 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
31 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
32 |
<PROPERTY key="mapred.reduce.tasks" value="10" /> |
|
33 |
|
|
34 |
<PROPERTY key="hash.values.csv" value="manghip,pmanghi,corog,gcoro" /> |
|
35 |
|
|
36 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
37 |
|
|
38 |
<!-- Uncomment to override the default lib path --> |
|
39 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
40 |
|
|
41 |
</STATIC_CONFIGURATION> |
|
42 |
<JOB_INTERFACE> |
|
43 |
<PARAM name="mapred.input.dir" required="true" description="input sequence file" /> |
|
44 |
</JOB_INTERFACE> |
|
45 |
</HADOOP_JOB> |
|
46 |
<STATUS> |
|
47 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
48 |
<RUNNING_INSTANCES value="0"/> |
|
49 |
<CUMULATIVE_RUN value="0" /> |
|
50 |
</STATUS> |
|
51 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
52 |
</BODY> |
|
53 |
</RESOURCE_PROFILE> |
|
54 |
|
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupCandidateScanJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="675f1436-205a-4b19-8b6b-35e1c17fb125_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="dedupCandidateScanJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that scans a given entity type and creates the similarRel graph</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
22 |
|
|
23 |
<!-- REDUCER --> |
|
24 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupReducer" /> |
|
25 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
26 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Writable" /> |
|
27 |
|
|
28 |
<!-- MISC --> |
|
29 |
<PROPERTY key="mapred.compress.map.output" value="true" /> |
|
30 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
31 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
32 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
33 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
34 |
|
|
35 |
<PROPERTY key="mapred.reduce.tasks" value="1000" /> |
|
36 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
37 |
|
|
38 |
<!-- Uncomment to override the default lib path --> |
|
39 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
40 |
</STATIC_CONFIGURATION> |
|
41 |
<JOB_INTERFACE> |
|
42 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
43 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
44 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
45 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
46 |
<PARAM name="dedup.conf" required="true" description="dedup configuration" /> |
|
47 |
</JOB_INTERFACE> |
|
48 |
<SCAN> |
|
49 |
<FILTERS operator="MUST_PASS_ALL"> |
|
50 |
<FILTER type="prefix" param="entityTypeId" /> |
|
51 |
</FILTERS> |
|
52 |
<FAMILIES> |
|
53 |
<FAMILY param="entityType" /> |
|
54 |
<FAMILY value="resultResult_dedup_merges" /> |
|
55 |
<FAMILY value="personPerson_dedup_merges" /> |
|
56 |
<FAMILY value="organizationOrganization_dedup_merges" /> |
|
57 |
</FAMILIES> |
|
58 |
</SCAN> |
|
59 |
</HADOOP_JOB> |
|
60 |
<STATUS> |
|
61 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
62 |
<RUNNING_INSTANCES value="0"/> |
|
63 |
<CUMULATIVE_RUN value="0" /> |
|
64 |
</STATUS> |
|
65 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
66 |
</BODY> |
|
67 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/informationSpaceExportJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="62c1e44c-14b6-4639-9083-29bf432e6759_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="informationSpaceExportJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map only job that exports the whole information space table as json</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataexport.ExportInformationSpaceMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" /> |
|
22 |
|
|
23 |
|
|
24 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" /> |
|
25 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" /> |
|
26 |
|
|
27 |
<!-- MISC --> |
|
28 |
|
|
29 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
30 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
31 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
32 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
33 |
|
|
34 |
<PROPERTY key="dfs.blocksize" value="256M" /> |
|
35 |
|
|
36 |
<PROPERTY key="mapred.compress.map.output" value="true" /> |
|
37 |
<PROPERTY key="mapred.output.compress" value="true" /> |
|
38 |
<PROPERTY key="mapred.output.compression.type" value="BLOCK" /> |
|
39 |
|
|
40 |
|
|
41 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
42 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
43 |
|
|
44 |
<!-- Uncomment to override the default lib path --> |
|
45 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
46 |
</STATIC_CONFIGURATION> |
|
47 |
<JOB_INTERFACE> |
|
48 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
49 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
50 |
|
|
51 |
<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" /> |
|
52 |
</JOB_INTERFACE> |
|
53 |
<SCAN> |
|
54 |
<FILTERS /> |
|
55 |
<FAMILIES /> |
|
56 |
</SCAN> |
|
57 |
</HADOOP_JOB> |
|
58 |
<STATUS> |
|
59 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
60 |
<RUNNING_INSTANCES value="0"/> |
|
61 |
<CUMULATIVE_RUN value="0" /> |
|
62 |
</STATUS> |
|
63 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
64 |
</BODY> |
|
65 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupGrouperJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="6b2d8db3-346f-4ddc-8591-39fd488c1191_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="dedupGrouperJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map only job that closes the similarity mesh</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupGrouperMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put" /> |
|
22 |
|
|
23 |
<!-- MISC --> |
|
24 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
25 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
26 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
27 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
28 |
|
|
29 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
30 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
31 |
|
|
32 |
<!-- Uncomment to override the default lib path --> |
|
33 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
34 |
</STATIC_CONFIGURATION> |
|
35 |
<JOB_INTERFACE> |
|
36 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
37 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
38 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
39 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
40 |
</JOB_INTERFACE> |
|
41 |
<SCAN> |
|
42 |
<FILTERS operator="MUST_PASS_ALL"> |
|
43 |
<FILTER type="prefix" param="entityTypeId" /> |
|
44 |
</FILTERS> |
|
45 |
<FAMILIES> |
|
46 |
<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" /> |
|
47 |
<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" /> |
|
48 |
<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" /> |
|
49 |
</FAMILIES> |
|
50 |
</SCAN> |
|
51 |
</HADOOP_JOB> |
|
52 |
<STATUS> |
|
53 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
54 |
<RUNNING_INSTANCES value="0"/> |
|
55 |
<CUMULATIVE_RUN value="0" /> |
|
56 |
</STATUS> |
|
57 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
58 |
</BODY> |
|
59 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/exportIdentifiersJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="d5a7c415-932b-442e-91c2-46f648945ac2_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="exportIdentifiersJob" type="mapreduce"> |
|
12 |
<DESCRIPTION>map only job that exports the publication identifiers as json</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
|
|
15 |
<!-- I/O FORMAT --> |
|
16 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/> |
|
17 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat"/> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataexport.ExportResultIdentifiersMapper"/> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text"/> |
|
23 |
|
|
24 |
<!-- MISC --> |
|
25 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
26 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
27 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
28 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
29 |
|
|
30 |
<PROPERTY key="dfs.blocksize" value="256M"/> |
|
31 |
|
|
32 |
<PROPERTY key="mapred.reduce.tasks" value="1"/> |
|
33 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
34 |
|
|
35 |
<!-- Uncomment to override the default lib path --> |
|
36 |
<PROPERTY key="job.lib" value="/lib/dnet/snapshots/dnet-mapreduce-jobs-assembly-0.0.8.5-SNAPSHOT-exportIds.jar"/> |
|
37 |
</STATIC_CONFIGURATION> |
|
38 |
<JOB_INTERFACE> |
|
39 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table"/> |
|
40 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table"/> |
|
41 |
|
|
42 |
<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs"/> |
|
43 |
</JOB_INTERFACE> |
|
44 |
<SCAN> |
|
45 |
<FILTERS operator="MUST_PASS_ALL"> |
|
46 |
<FILTER type="prefix" value="50"/> |
|
47 |
</FILTERS> |
|
48 |
<FAMILIES> |
|
49 |
<FAMILY value="result"/> |
|
50 |
</FAMILIES> |
|
51 |
</SCAN> |
|
52 |
</HADOOP_JOB> |
|
53 |
<STATUS> |
|
54 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
55 |
<RUNNING_INSTANCES value="0"/> |
|
56 |
<CUMULATIVE_RUN value="0"/> |
|
57 |
</STATUS> |
|
58 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
59 |
</BODY> |
|
60 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupBuildRootsJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="895ce6a9-4131-4954-b9ed-949ff78f5448_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="dedupBuildRootsJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that build the roots and redirects the rels</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupBuildRootsMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
22 |
|
|
23 |
<!-- REDUCER --> |
|
24 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupBuildRootsReducer" /> |
|
25 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
26 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Writable" /> |
|
27 |
|
|
28 |
<!-- MISC --> |
|
29 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
30 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
31 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
32 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
33 |
|
|
34 |
<PROPERTY key="mapred.reduce.tasks" value="500" /> |
|
35 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
36 |
|
|
37 |
<!-- Uncomment to override the default lib path --> |
|
38 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
39 |
</STATIC_CONFIGURATION> |
|
40 |
<JOB_INTERFACE> |
|
41 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
42 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
43 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
44 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
45 |
</JOB_INTERFACE> |
|
46 |
<SCAN caching="10"> |
|
47 |
<FILTERS operator="MUST_PASS_ALL"> |
|
48 |
<FILTER type="prefix" param="entityTypeId" /> |
|
49 |
</FILTERS> |
|
50 |
<FAMILIES/> |
|
51 |
</SCAN> |
|
52 |
</HADOOP_JOB> |
|
53 |
<STATUS> |
|
54 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
55 |
<RUNNING_INSTANCES value="0"/> |
|
56 |
<CUMULATIVE_RUN value="0" /> |
|
57 |
</STATUS> |
|
58 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
59 |
</BODY> |
|
60 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupSimilarity2ActionsJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="5c4b4dbf-8198-4f7a-9a35-367c7b0a7391_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="dedupSimilarity2ActionsJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that scans a given entity type and creates the similarRel graph</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupSimilarityToActionsMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
22 |
|
|
23 |
<!-- MISC --> |
|
24 |
<PROPERTY key="mapred.compress.map.output" value="true" /> |
|
25 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
26 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
27 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
28 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
29 |
|
|
30 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
31 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
32 |
|
|
33 |
<!-- Uncomment to override the default lib path --> |
|
34 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
35 |
</STATIC_CONFIGURATION> |
|
36 |
<JOB_INTERFACE> |
|
37 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
38 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
39 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
40 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
41 |
<PARAM name="dedup.conf" required="true" description="dedup configuration" /> |
|
42 |
<PARAM name="rawSetId" required="true" description="raw set identifier" /> |
|
43 |
<PARAM name="similarityCF" required="true" description="similarity column family name" /> |
|
44 |
</JOB_INTERFACE> |
|
45 |
<SCAN> |
|
46 |
<FILTERS operator="MUST_PASS_ALL"> |
|
47 |
<FILTER type="prefix" param="entityTypeId" /> |
|
48 |
</FILTERS> |
|
49 |
<FAMILIES> |
|
50 |
<FAMILY param="entityType" /> |
|
51 |
<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" /> |
|
52 |
<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" /> |
|
53 |
</FAMILIES> |
|
54 |
</SCAN> |
|
55 |
</HADOOP_JOB> |
|
56 |
<STATUS> |
|
57 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
58 |
<RUNNING_INSTANCES value="0"/> |
|
59 |
<CUMULATIVE_RUN value="0" /> |
|
60 |
</STATUS> |
|
61 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
62 |
</BODY> |
|
63 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupMinDistGraphJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="de888da6-2d10-4d42-a624-a44d4083414a_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="dedupMinDistGraphJob" type="mapreduce"> |
|
12 |
<DESCRIPTION>map reduce job that finds the minimum vertex in each connected component in the input graph (as adjacency lists)</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
|
|
15 |
<!-- I/O FORMAT --> |
|
16 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"/> |
|
17 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"/> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.MindistSearchMapper"/> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable"/> |
|
23 |
|
|
24 |
<!-- REDUCER --> |
|
25 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.MindistSearchReducer"/> |
|
26 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/> |
|
27 |
<PROPERTY key="mapred.output.value.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable"/> |
|
28 |
|
|
29 |
<!-- MISC --> |
|
30 |
<PROPERTY key="mapred.compress.map.output" value="false"/> |
|
31 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
32 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
33 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
34 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
35 |
|
|
36 |
<PROPERTY key="mapred.reduce.tasks" value="1"/> |
|
37 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
38 |
|
|
39 |
<!-- Uncomment to override the default lib path --> |
|
40 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
41 |
</STATIC_CONFIGURATION> |
|
42 |
<JOB_INTERFACE> |
|
43 |
<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs"/> |
|
44 |
<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs"/> |
|
45 |
</JOB_INTERFACE> |
|
46 |
<SCAN> |
|
47 |
<FILTERS/> |
|
48 |
<FAMILIES/> |
|
49 |
</SCAN> |
|
50 |
</HADOOP_JOB> |
|
51 |
<STATUS> |
|
52 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
53 |
<RUNNING_INSTANCES value="0"/> |
|
54 |
<CUMULATIVE_RUN value="0"/> |
|
55 |
</STATUS> |
|
56 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
57 |
</BODY> |
|
58 |
</RESOURCE_PROFILE> |
|
59 |
|
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/brokerEnrichmentProjectsJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="17dd747e-f5f2-45d5-8554-9f70343bfe55_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="brokerEnrichmentProjectsJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that calculates the enrichment events based on the publications dedup results</DESCRIPTION> |
|
12 |
|
|
13 |
<STATIC_CONFIGURATION><!-- I/O FORMAT --> |
|
14 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/> |
|
15 |
<PROPERTY key="mapreduce.outputformat.class" value="org.elasticsearch.hadoop.mr.EsOutputFormat"/> |
|
16 |
|
|
17 |
<!-- MAPPER --> |
|
18 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.broker.enrich.ProjectEnrichmentMapper"/> |
|
19 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/> |
|
20 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/> |
|
21 |
|
|
22 |
<!-- REDUCER --> |
|
23 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.broker.enrich.ProjectEnrichmentReducer"/> |
|
24 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/> |
|
25 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/> |
|
26 |
|
|
27 |
<!-- MISC --> |
|
28 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
29 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
30 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
31 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
32 |
<PROPERTY key="dfs.blocksize" value="256M"/> |
|
33 |
<PROPERTY key="mapred.reduce.tasks" value="4"/> |
|
34 |
|
|
35 |
<!-- ES --> |
|
36 |
<PROPERTY key="es.nodes" value="ip-90-147-167-137.ct1.garrservices.it:9200,ip-90-147-167-126.ct1.garrservices.it:9200,ip-90-147-167-13.ct1.garrservices.it:9200,ip-90-147-167-125.ct1.garrservices.it:9200"/> |
|
37 |
<PROPERTY key="es.nodes.resolve.hostname" value="false"/> |
|
38 |
<PROPERTY key="es.batch.write.retry.count " value="10"/> |
|
39 |
<PROPERTY key="es.batch.size.entries " value="500"/> |
|
40 |
<PROPERTY key="es.nodes.wan.only" value="true"/> |
|
41 |
<PROPERTY key="es.resource" value="events/event"/> |
|
42 |
<PROPERTY key="es.input.json" value="yes"/> |
|
43 |
|
|
44 |
<!-- BROKER --> |
|
45 |
<PROPERTY key="broker.datasource.id.whitelist" value=""/> |
|
46 |
<PROPERTY key="broker.datasource.id.blacklist" value=""/> |
|
47 |
<PROPERTY key="broker.datasource.untrusted.oa.list" value="opendoar____::8b6dd7db9af49e67306feb59a8bdc52c"/> |
|
48 |
<PROPERTY key="broker.datasource.type.whitelist" value="pubsrepository::unknown,pubsrepository::institutional,pubsrepository::thematic"/><!-- <PROPERTY key="user.name" value="dnet" /> --><!-- <PROPERTY key="user.name" value="dnet" /> --><!-- Uncomment to override the default lib path --> |
|
49 |
</STATIC_CONFIGURATION> |
|
50 |
<JOB_INTERFACE> |
|
51 |
<PARAM description="source hbase table" name="hbase.mapred.inputtable" required="true"/> |
|
52 |
<PARAM description="source hbase table" name="hbase.mapreduce.inputtable" required="true"/> |
|
53 |
</JOB_INTERFACE> |
|
54 |
<SCAN> |
|
55 |
<FILTERS operator="MUST_PASS_ONE"> |
|
56 |
<FILTER type="prefix" value="40" /> |
|
57 |
<FILTER type="prefix" value="50" /> |
|
58 |
</FILTERS> |
|
59 |
<FAMILIES> |
|
60 |
<FAMILY value="result" /> |
|
61 |
<FAMILY value="project" /> |
|
62 |
<FAMILY value="resultResult_dedup_isMergedIn" /> |
|
63 |
<FAMILY value="resultProject_outcome_produces" /> |
|
64 |
<FAMILY value="resultProject_outcome_isProducedBy" /> |
|
65 |
</FAMILIES> |
|
66 |
</SCAN> |
|
67 |
</HADOOP_JOB> |
|
68 |
<STATUS> |
|
69 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
70 |
<RUNNING_INSTANCES value="0"/> |
|
71 |
<CUMULATIVE_RUN value="0" /> |
|
72 |
</STATUS> |
|
73 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
74 |
</BODY> |
|
75 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.23/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupAnchorStatsJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="2af384c6-9118-426d-9394-d7bbc42d707c_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="dedupAnchorStatsJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map only job that calculate statistics on the person anchors</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.NullOutputFormat" /> |
|
17 |
|
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.experiment.AnchorStatsMapper" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" /> |
|
23 |
|
|
24 |
<!-- JOB GLOBAL --> |
|
25 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.NullWritable" /> |
|
26 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.NullWritable"/> |
|
27 |
|
|
28 |
<!-- MISC --> |
|
29 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
30 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
31 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
32 |
<!--<PROPERTY key="mapred.fairscheduler.pool" value="solr"/> --> |
|
33 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
34 |
|
|
35 |
<!-- Uncomment to override the default lib path --> |
|
36 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
37 |
</STATIC_CONFIGURATION> |
|
38 |
<JOB_INTERFACE> |
|
39 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
40 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
41 |
</JOB_INTERFACE> |
|
42 |
<SCAN> |
|
43 |
<FILTERS operator="MUST_PASS_ALL"> |
|
44 |
<FILTER type="prefix" param="entityTypeId" /> |
|
45 |
</FILTERS> |
|
46 |
<FAMILIES> |
|
47 |
<FAMILY param="entityType" /> |
Also available in: Unified diff
[maven-release-plugin] copy for tag dnet-openaireplus-profiles-1.0.23