Revision 54159
Added by Claudio Atzori over 4 years ago
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/deploy.info | ||
---|---|---|
1 |
{"type_source": "SVN", "goal": "package -U -T 4C source:jar", "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-openaireplus-profiles/trunk/", "deploy_repository": "dnet45-snapshots", "version": "4", "mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it", "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots", "name": "dnet-openaireplus-profiles"} |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/offlineHbaseLoadJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="073e55eb-c6f4-49a9-80b3-1a927612ba5b_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="offlineHbaseLoad" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that loads a given entity type in the offline dedup table</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.OfflineHbaseLoadMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
22 |
|
|
23 |
<!-- MISC --> |
|
24 |
<PROPERTY key="mapred.compress.map.output" value="true" /> |
|
25 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
26 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
27 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
28 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
29 |
|
|
30 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
31 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
32 |
|
|
33 |
<!-- Uncomment to override the default lib path --> |
|
34 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
35 |
</STATIC_CONFIGURATION> |
|
36 |
<JOB_INTERFACE> |
|
37 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
38 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
39 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
40 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
41 |
</JOB_INTERFACE> |
|
42 |
<SCAN> |
|
43 |
<FILTERS operator="MUST_PASS_ALL"> |
|
44 |
<FILTER type="prefix" param="entityTypeId" /> |
|
45 |
</FILTERS> |
|
46 |
<FAMILIES> |
|
47 |
<FAMILY param="entityType" /> |
|
48 |
</FAMILIES> |
|
49 |
</SCAN> |
|
50 |
</HADOOP_JOB> |
|
51 |
<STATUS> |
|
52 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
53 |
<RUNNING_INSTANCES value="0"/> |
|
54 |
<CUMULATIVE_RUN value="0" /> |
|
55 |
</STATUS> |
|
56 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
57 |
</BODY> |
|
58 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisPreprocessingQuickJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="13beed98-81bf-4fbd-ab4f-de071177997c_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="iisPreprocessingQuickJob" type="oozie"> |
|
11 |
<DESCRIPTION>IIS preprocessing</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
<!-- Cluster wide --> |
|
14 |
<PROPERTY key="queueName" value="default"/> |
|
15 |
<PROPERTY key="user.name" value="dnet" /> |
|
16 |
|
|
17 |
<!-- Runtime --> |
|
18 |
<PROPERTY key="workingDir" value="/tmp/integration/working_dir/preprocessing_quick_test"/> |
|
19 |
<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/> |
|
20 |
<PROPERTY key="input_referenceextraction_project" value="/user/marek.horst/share/referenceextraction/document_projects/2014-04-11"/> |
|
21 |
<PROPERTY key="input_referenceextraction_dataset" value="/user/marek.horst/share/referenceextraction/document_datasets/all/2014-04-11"/> |
|
22 |
<PROPERTY key="export_action_hbase_table_initialize" value="false"/> |
|
23 |
</STATIC_CONFIGURATION> |
|
24 |
<JOB_INTERFACE> |
|
25 |
<PARAM name="import_content_object_store_location" required="true" description="mdstore service location" /> |
|
26 |
<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location" /> |
|
27 |
<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore id for dataset records" /> |
|
28 |
<PARAM name="import_wos_mdstore_id" required="true" description="mdstore id for WoS records" /> |
|
29 |
<PARAM name="import_database_service_location" required="true" description="database service endpoint" /> |
|
30 |
<PARAM name="import_content_datacite_objectstores_csv" required="true" description="objecstore ids subject to dataset reference extraction" /> |
|
31 |
<PARAM name="import_content_wos_plaintext_objectstores_csv" required="true" description="objecstore ids for WoS plaintext" /> |
|
32 |
<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table" /> |
|
33 |
<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum" /> |
|
34 |
<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port" /> |
|
35 |
<PARAM name="nameNode" required="true" description="hdfs name node" /> |
|
36 |
<PARAM name="jobTracker" required="true" description="job tracker name" /> |
|
37 |
<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path" /> |
|
38 |
<PARAM name="export_action_set_id_document_referencedProjects" required="true" description="target action set for project references" /> |
|
39 |
<PARAM name="export_action_set_id_document_referencedDatasets" required="true" description="target action set for dataset references" /> |
|
40 |
<PARAM name="export_action_set_id_entity_wos" required="true" description="target action set for WoS entities" /> |
|
41 |
<PARAM name="export_action_set_id_entity_dataset" required="true" description="target action set for dataset entities" /> |
|
42 |
</JOB_INTERFACE> |
|
43 |
</HADOOP_JOB> |
|
44 |
<STATUS> |
|
45 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
46 |
<RUNNING_INSTANCES value="0"/> |
|
47 |
<CUMULATIVE_RUN value="0" /> |
|
48 |
</STATUS> |
|
49 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
50 |
</BODY> |
|
51 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisPreprocessingJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="732656e3-5ac6-4344-9d1f-f5c805f53a06_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="iisPreprocessingJob" type="oozie"> |
|
12 |
<DESCRIPTION>IIS preprocessing</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
<!-- Cluster wide --> |
|
15 |
<PROPERTY key="queueName" value="default"/> |
|
16 |
<PROPERTY key="user.name" value="dnet.beta"/> |
|
17 |
|
|
18 |
<!-- Runtime --> |
|
19 |
<PROPERTY key="workingDir" value="/tmp/integration/working_dir/preprocessing"/> |
|
20 |
<PROPERTY key="oozie.wf.application.path" value="/tmp/integration/apps/preprocessing"/> |
|
21 |
<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/> |
|
22 |
<PROPERTY key="export_action_hbase_table_initialize" value="false"/> |
|
23 |
<!-- <PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction"/> --> |
|
24 |
<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction_updated_model"/> |
|
25 |
<PROPERTY key="metadataextraction_excluded_checksums" |
|
26 |
value="1e5b574109da731f4918c7f91fc24864,bea4728578070c3d66774bf9454d41fe,da458477233b5561ae47042aa2a73086"/> |
|
27 |
<PROPERTY key="export_trust_level_threshold_document_referencedDatasets" value="0.0"/> |
|
28 |
<PROPERTY key="export_trust_level_threshold_document_referencedProjects" value="0.0"/> |
|
29 |
<PROPERTY key="output_remote_location" value="hdfs://namenode.hadoop.dm.openaire.eu/var/lib/dnet/actionManager_BETA"/> |
|
30 |
</STATIC_CONFIGURATION> |
|
31 |
<JOB_INTERFACE> |
|
32 |
<PARAM name="import_content_object_store_location" required="true" description="mdstore service location"/> |
|
33 |
<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location"/> |
|
34 |
<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore id for dataset records"/> |
|
35 |
<PARAM name="import_wos_mdstore_id" required="true" description="mdstore id for WoS records"/> |
|
36 |
<PARAM name="import_database_service_location" required="true" description="database service endpoint"/> |
|
37 |
<PARAM name="import_content_datacite_objectstores_csv" required="true" description="objecstore ids subject to dataset reference extraction"/> |
|
38 |
<PARAM name="import_content_wos_plaintext_objectstores_csv" required="true" description="objecstore ids for WoS plaintext"/> |
|
39 |
<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table"/> |
|
40 |
<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum"/> |
|
41 |
<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port"/> |
|
42 |
<PARAM name="nameNode" required="true" description="hdfs name node"/> |
|
43 |
<PARAM name="jobTracker" required="true" description="job tracker name"/> |
|
44 |
<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path"/> |
|
45 |
<PARAM name="export_action_set_id_document_referencedProjects" required="true" description="target action set for project references"/> |
|
46 |
<PARAM name="export_action_set_id_document_referencedDatasets" required="true" description="target action set for dataset references"/> |
|
47 |
<PARAM name="export_action_set_id_entity_wos" required="true" description="target action set for WoS entities"/> |
|
48 |
<PARAM name="export_action_set_id_entity_dataset" required="true" description="target action set for dataset entities"/> |
|
49 |
</JOB_INTERFACE> |
|
50 |
</HADOOP_JOB> |
|
51 |
<STATUS> |
|
52 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
53 |
<RUNNING_INSTANCES value="0"/> |
|
54 |
<CUMULATIVE_RUN value="0"/> |
|
55 |
</STATUS> |
|
56 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
57 |
</BODY> |
|
58 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisMainJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="f6e4cbcd-b173-4f8d-9205-e64ba15f03ad_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="iisMainJob" type="oozie"> |
|
12 |
<DESCRIPTION>IIS main workflow</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
|
|
15 |
<!-- Cluster wide --> |
|
16 |
<PROPERTY key="queueName" value="default"/> |
|
17 |
<PROPERTY key="user.name" value="dnet.beta"/> |
|
18 |
|
|
19 |
<!-- Runtime --> |
|
20 |
<PROPERTY key="match_content_with_metadata" value="true"/> |
|
21 |
<PROPERTY key="import_hbase_approved_datasources_csv" value="$UNDEFINED$"/> |
|
22 |
<PROPERTY key="import_hbase_input_table" value="information_space-2014-11-05"/> |
|
23 |
<!--<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction"/> --> |
|
24 |
<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction_updated_model"/> |
|
25 |
|
|
26 |
<PROPERTY key="export_action_hbase_table_initialize" value="true"/> |
|
27 |
<PROPERTY key="import_content_connection_timeout" value="180000"/> |
|
28 |
<PROPERTY key="import_content_read_timeout" value="180000"/> |
|
29 |
<PROPERTY key="import_resultset_client_read_timeout" value="180000"/> |
|
30 |
|
|
31 |
<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/> |
|
32 |
<PROPERTY key="workingDir" value="/tmp/integration/working_dir/main"/> |
|
33 |
<PROPERTY key="metadataextraction_excluded_checksums" |
|
34 |
value="1e5b574109da731f4918c7f91fc24864,bea4728578070c3d66774bf9454d41fe,da458477233b5561ae47042aa2a73086"/> |
|
35 |
<PROPERTY key="export_documentssimilarity_threshold" value="0.7"/> |
|
36 |
<PROPERTY key="export_trust_level_threshold_document_classes" value="0.5"/> |
|
37 |
<PROPERTY key="export_trust_level_threshold_document_referencedDatasets" value="0.0"/> |
|
38 |
<PROPERTY key="export_trust_level_threshold_document_referencedProjects" value="0.0"/> |
|
39 |
<PROPERTY key="export_trust_level_threshold_document_pdb" value="0.0"/> |
|
40 |
<PROPERTY key="output_remote_location" value="hdfs://namenode.hadoop.dm.openaire.eu/var/lib/dnet/actionManager_BETA"/> |
|
41 |
</STATIC_CONFIGURATION> |
|
42 |
<JOB_INTERFACE> |
|
43 |
<PARAM name="import_content_object_store_location" required="true" description="objectStore service endpoint"/> |
|
44 |
<PARAM name="import_content_objectstores_csv" required="true" description="csv list of the available object stores subject to processing"/> |
|
45 |
<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location"/> |
|
46 |
<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore ids for dataset records"/> |
|
47 |
<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table"/> |
|
48 |
<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum"/> |
|
49 |
<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port"/> |
|
50 |
<PARAM name="nameNode" required="true" description="hdfs name node"/> |
|
51 |
<PARAM name="jobTracker" required="true" description="job tracker name"/> |
|
52 |
<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path"/> |
|
53 |
<PARAM description="target action set for referenced projects" name="export_action_set_id_document_referencedProjects" required="true"/> |
|
54 |
<PARAM description="target action set for referenced datasets" name="export_action_set_id_document_referencedDatasets" required="true"/> |
|
55 |
<PARAM description="target action set for research initiative outcome" name="export_action_set_id_document_research_initiative" |
|
56 |
required="true"/> |
|
57 |
<PARAM description="target action set for document similarities" name="export_action_set_id_document_similarities_standard" required="true"/> |
|
58 |
<PARAM description="target action set for document statistics" name="export_action_set_id_document_statistics" required="true"/> |
|
59 |
<PARAM description="target action set for document classification" name="export_action_set_id_document_classes" required="true"/> |
|
60 |
<PARAM description="target action set for document citations" name="export_action_set_id_document_referencedDocuments" required="true"/> |
|
61 |
<PARAM description="target action set for dataset entities" name="export_action_set_id_entity_dataset" required="true"/> |
|
62 |
<PARAM description="target action set for pdb entities" name="export_action_set_id_document_pdb" required="true"/> |
|
63 |
|
|
64 |
<!-- flags to enable/disable IIS modules --> |
|
65 |
<PARAM name="active_metadataextraction_export" required="true" description="enable/disable the affiliation matching module"/> |
|
66 |
<PARAM name="active_citationmatching" required="true" description="enable/disable the citation matching module"/> |
|
67 |
<PARAM name="active_documentsclassification" required="true" description="enable/disable the document classification module"/> |
|
68 |
<PARAM name="active_documentssimilarity" required="true" description="enable/disable the document similarity module"/> |
|
69 |
<PARAM name="active_referenceextraction_dataset" required="true" description="enable/disable the dataset reference extraction module"/> |
|
70 |
<PARAM name="active_referenceextraction_project" required="true" description="enable/disable the project reference extracion module"/> |
|
71 |
<PARAM name="active_referenceextraction_researchinitiative" required="true" |
|
72 |
description="enable/disable the research initiative extraction module"/> |
|
73 |
<PARAM name="active_statistics" required="true" description="enable/disable the statistics module"/> |
|
74 |
<PARAM name="active_referenceextraction_pdb" required="true" description="enable/disable the protein data bank extraction module"/> |
|
75 |
|
|
76 |
</JOB_INTERFACE> |
|
77 |
</HADOOP_JOB> |
|
78 |
<STATUS> |
|
79 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
80 |
<RUNNING_INSTANCES value="0"/> |
|
81 |
<CUMULATIVE_RUN value="0"/> |
|
82 |
</STATUS> |
|
83 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
84 |
</BODY> |
|
85 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupDeleteDedupRelsJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="5626c94e-0005-416a-9ea4-48fc8af85ecd_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="deleteDedupRelsJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that deletes the dedup rels used to in the deduplication process</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupDeleteRelMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.Writable" /> |
|
22 |
|
|
23 |
<!-- MISC --> |
|
24 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
25 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
26 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
27 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
28 |
|
|
29 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
30 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
31 |
|
|
32 |
<!-- Uncomment to override the default lib path --> |
|
33 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
34 |
</STATIC_CONFIGURATION> |
|
35 |
<JOB_INTERFACE> |
|
36 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
37 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
38 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
39 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
40 |
</JOB_INTERFACE> |
|
41 |
<SCAN> |
|
42 |
<FILTERS operator="MUST_PASS_ALL"> |
|
43 |
<FILTER type="prefix" param="entityTypeId" /> |
|
44 |
</FILTERS> |
|
45 |
<FAMILIES> |
|
46 |
<FAMILY param="entityType" /> |
|
47 |
<FAMILY value="resultResult_dedup_merges" /> |
|
48 |
<FAMILY value="resultResult_dedup_isMergedIn" /> |
|
49 |
<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" /> |
|
50 |
|
|
51 |
<FAMILY value="personPerson_dedup_merges" /> |
|
52 |
<FAMILY value="personPerson_dedup_isMergedIn" /> |
|
53 |
<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" /> |
|
54 |
|
|
55 |
<FAMILY value="organizationOrganization_dedup_merges" /> |
|
56 |
<FAMILY value="organizationOrganization_dedup_isMergedIn" /> |
|
57 |
<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" /> |
|
58 |
</FAMILIES> |
|
59 |
</SCAN> |
|
60 |
</HADOOP_JOB> |
|
61 |
<STATUS> |
|
62 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
63 |
<RUNNING_INSTANCES value="0"/> |
|
64 |
<CUMULATIVE_RUN value="0" /> |
|
65 |
</STATUS> |
|
66 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
67 |
</BODY> |
|
68 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisMainJobV2.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="d730a831-a982-4034-a890-de98fd972e87_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2017-07-05T15:05:50+00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="iisMainJobV2" type="oozie"> |
|
11 |
<DESCRIPTION>IIS main workflow</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- Cluster wide --> |
|
15 |
<PROPERTY key="user.name" value="dnet.production"/> |
|
16 |
|
|
17 |
<!-- Runtime --> |
|
18 |
<PROPERTY key="metadataextraction_default_cache_location" value="/user/${user.name}/iis/cache/metadataextraction"/> |
|
19 |
<!-- skipping _default chunk from property name since 2017.02.21 --> |
|
20 |
<PROPERTY key="metadataextraction_cache_location" value="/user/${user.name}/iis/cache/metadataextraction"/> |
|
21 |
<PROPERTY key="ingest_pmc_cache_location" value="/user/${user.name}/iis/cache/ingestpmc"/> |
|
22 |
<PROPERTY key="software_webcrawl_cache_location" value="/user/${user.name}/iis/cache/webcrawler"/> |
|
23 |
<PROPERTY key="export_documentssimilarity_threshold" value="0.7"/> |
|
24 |
<PROPERTY key="export_trust_level_threshold_document_classes" value="0.5"/> |
|
25 |
<PROPERTY key="export_trust_level_threshold_document_referencedDatasets" value="0.0"/> |
|
26 |
<PROPERTY key="export_trust_level_threshold_document_referencedProjects" value="0.0"/> |
|
27 |
<PROPERTY key="export_trust_level_threshold_document_pdb" value="0.0"/> |
|
28 |
<PROPERTY key="export_trust_level_threshold_document_software_url" value="0.0"/> |
|
29 |
<PROPERTY key="export_trust_level_threshold_matched_doc_organizations" value="0.0"/> |
|
30 |
<PROPERTY key="output_remote_location" value="webhdfs://namenode.hadoop.dm.openaire.eu:50071/var/lib/dnet/actionManager_PROD"/> |
|
31 |
<PROPERTY key="reports_external_path" value="/user/${user.name}/iis/reports/${execution_environment}"/> |
|
32 |
<PROPERTY key="import_hbase_dump_location" value="webhdfs://namenode.hadoop.dm.openaire.eu:50071/tmp/db_openaireplus_services.export.2017.07.19"/> |
|
33 |
<PROPERTY key="oozieServiceLoc" value="http://iis-cdh5-test-m3:11000/oozie"/> |
|
34 |
</STATIC_CONFIGURATION> |
|
35 |
<JOB_INTERFACE> |
|
36 |
<PARAM description="oozie job application absolute path" name="oozie.wf.application.path" required="true"/> |
|
37 |
<PARAM description="objectStore service endpoint" name="import_content_object_store_location" required="true"/> |
|
38 |
<PARAM description="csv list of the available object stores subject to processing" name="import_content_objectstores_csv" required="true"/> |
|
39 |
<PARAM description="mdstore service location" name="import_mdstore_service_location" required="true"/> |
|
40 |
<PARAM description="mdstore ids for dataset records" name="import_dataset_mdstore_ids_csv" required="true"/> |
|
41 |
<PARAM description="target action set for referenced datasets" name="export_action_set_id_document_referencedDatasets" required="true"/> |
|
42 |
<PARAM description="target action set for referenced projects" name="export_action_set_id_document_referencedProjects" required="true"/> |
|
43 |
<PARAM description="target action set for research initiative outcome" name="export_action_set_id_document_research_initiative" required="true"/> |
|
44 |
<PARAM description="target action set for document similarities" name="export_action_set_id_document_similarities_standard" required="true"/> |
|
45 |
<PARAM description="target action set for document classification" name="export_action_set_id_document_classes" required="true"/> |
|
46 |
<PARAM description="target action set for document citations" name="export_action_set_id_document_referencedDocuments" required="true"/> |
|
47 |
<PARAM description="target action set for dataset entities" name="export_action_set_id_entity_dataset" required="true"/> |
|
48 |
<PARAM description="target action set for pdb entities" name="export_action_set_id_document_pdb" required="true"/> |
|
49 |
<PARAM description="software reference extraction actionset identifier" name="export_action_set_id_document_software_url" required="true"/> |
|
50 |
<PARAM description="target action set for affiliations" name="export_action_set_id_matched_doc_organizations" required="true"/> |
|
51 |
|
|
52 |
|
|
53 |
<!-- flags to enable/disable IIS modules --> |
|
54 |
<PARAM description="enable/disable the citation matching module" name="active_citationmatching" required="true"/> |
|
55 |
<PARAM description="enable/disable the document classification module" name="active_documentsclassification" required="true"/> |
|
56 |
<PARAM description="enable/disable the document similarity module" name="active_documentssimilarity" required="true"/> |
|
57 |
<PARAM description="enable/disable the dataset reference extraction module" name="active_referenceextraction_dataset" required="true"/> |
|
58 |
<PARAM description="enable/disable the project reference extracion module" name="active_referenceextraction_project" required="true"/> |
|
59 |
<PARAM description="enable/disable the research initiative extraction module" name="active_referenceextraction_researchinitiative" required="true"/> |
|
60 |
<PARAM description="enable/disable the protein data bank extraction module" name="active_referenceextraction_pdb" required="true"/> |
|
61 |
<PARAM description="enable/disable the software reference extraction algorithm" name="active_referenceextraction_software_url" required="true"/> |
|
62 |
</JOB_INTERFACE> |
|
63 |
</HADOOP_JOB> |
|
64 |
<STATUS> |
|
65 |
<LAST_SUBMISSION_DATE value="2017-07-02T09:39:51+00:00"/> |
|
66 |
<RUNNING_INSTANCES value="0"/> |
|
67 |
<CUMULATIVE_RUN value="13"/> |
|
68 |
</STATUS> |
|
69 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
70 |
</BODY> |
|
71 |
</RESOURCE_PROFILE> |
|
72 |
|
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/brokerEnrichmentJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="ffcc359c-555a-4d86-9ee2-b8ad212a0185_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="brokerEnrichmentJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that calculates the enrichment events based on the publications dedup results</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.elasticsearch.hadoop.mr.EsOutputFormat"/> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.broker.enrich.EnrichmentMapper"/> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/> |
|
22 |
|
|
23 |
<!-- REDUCER --> |
|
24 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.broker.enrich.EnrichmentReducer" /> |
|
25 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" /> |
|
26 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" /> |
|
27 |
|
|
28 |
|
|
29 |
<!-- MISC --> |
|
30 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
31 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
32 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
33 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
34 |
|
|
35 |
<PROPERTY key="dfs.blocksize" value="256M"/> |
|
36 |
<PROPERTY key="mapred.reduce.tasks" value="1"/> |
|
37 |
|
|
38 |
<!-- ES --> |
|
39 |
<PROPERTY key="es.nodes" |
|
40 |
value="ip-90-147-167-137.ct1.garrservices.it:9200,ip-90-147-167-126.ct1.garrservices.it:9200,ip-90-147-167-13.ct1.garrservices.it:9200,ip-90-147-167-125.ct1.garrservices.it:9200"/> |
|
41 |
<PROPERTY key="es.nodes.resolve.hostname" value="false"/> |
|
42 |
<PROPERTY key="es.nodes.wan.only" value="true"/> |
|
43 |
<PROPERTY key="es.resource" value="events_{infra}/event"/> |
|
44 |
<PROPERTY key="es.input.json" value="yes"/> |
|
45 |
<PROPERTY key="es.mapping.id" value="eventId"/> |
|
46 |
|
|
47 |
<!-- BROKER --> |
|
48 |
<PROPERTY key="broker.datasource.id.whitelist" value=""/> |
|
49 |
<PROPERTY key="broker.datasource.id.blacklist" value=""/> |
|
50 |
<PROPERTY key="broker.datasource.untrusted.oa.list" value="opendoar____::8b6dd7db9af49e67306feb59a8bdc52c"/> |
|
51 |
<PROPERTY key="broker.datasource.type.whitelist" value="pubsrepository::unknown,pubsrepository::institutional,pubsrepository::thematic"/> |
|
52 |
|
|
53 |
|
|
54 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
55 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
56 |
|
|
57 |
<!-- Uncomment to override the default lib path --> |
|
58 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
59 |
</STATIC_CONFIGURATION> |
|
60 |
<JOB_INTERFACE> |
|
61 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
62 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
63 |
</JOB_INTERFACE> |
|
64 |
<SCAN> |
|
65 |
<FILTERS operator="MUST_PASS_ALL"> |
|
66 |
<FILTER type="prefix" value="50" /> |
|
67 |
</FILTERS> |
|
68 |
<FAMILIES> |
|
69 |
<FAMILY value="result" /> |
|
70 |
<FAMILY value="resultResult_dedup_isMergedIn" /> |
|
71 |
</FAMILIES> |
|
72 |
</SCAN> |
|
73 |
</HADOOP_JOB> |
|
74 |
<STATUS> |
|
75 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
76 |
<RUNNING_INSTANCES value="0"/> |
|
77 |
<CUMULATIVE_RUN value="0" /> |
|
78 |
</STATUS> |
|
79 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
80 |
</BODY> |
|
81 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/personCsvJoinJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="3f544a36-f123-4f5c-acf4-7c25f6591ec4_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="personCsvJoinJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that joins person entities by "surname+first name letter" and serialise the output as csv</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat" /> |
|
17 |
<PROPERTY key="mapreduce.output.lazyoutputformat.outputformat" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" /> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.experiment.JoinPersonGroupMapper" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" /> |
|
23 |
|
|
24 |
<!-- REDUCER --> |
|
25 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.experiment.JoinPersonGroupReducer" /> |
|
26 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" /> |
|
27 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" /> |
|
28 |
|
|
29 |
<!-- MISC --> |
|
30 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
31 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
32 |
<PROPERTY key="mapred.reduce.tasks" value="10" /> |
|
33 |
|
|
34 |
<PROPERTY key="hash.values.csv" value="manghip,pmanghi,corog,gcoro" /> |
|
35 |
|
|
36 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
37 |
|
|
38 |
<!-- Uncomment to override the default lib path --> |
|
39 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
40 |
|
|
41 |
</STATIC_CONFIGURATION> |
|
42 |
<JOB_INTERFACE> |
|
43 |
<PARAM name="mapred.input.dir" required="true" description="input sequence file" /> |
|
44 |
</JOB_INTERFACE> |
|
45 |
</HADOOP_JOB> |
|
46 |
<STATUS> |
|
47 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
48 |
<RUNNING_INSTANCES value="0"/> |
|
49 |
<CUMULATIVE_RUN value="0" /> |
|
50 |
</STATUS> |
|
51 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
52 |
</BODY> |
|
53 |
</RESOURCE_PROFILE> |
|
54 |
|
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/propagationCountryInstitutionalOrganizationSaveToFile.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="7d4ab172-6772-4960-abfa-037537f58872_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2018-09-10T18:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="propagationCountryFromInstitutionalRepositoriesSaveToFile" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that propagates country value of the organization to products belonging to institutional repositories related to the organization (pubsrepository:institutional)</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION><!-- I/O FORMAT --> |
|
13 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/> |
|
14 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"/> |
|
15 |
|
|
16 |
<!-- MAPPER --> |
|
17 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.propagation.country.institutionalrepositories.PropagationCountryFromDsOrgResultMapper"/> |
|
18 |
<PROPERTY key="mapred.mapoutput.key.class" value="eu.dnetlib.data.mapreduce.hbase.propagation.country.institutionalrepositories.InstOrgKey"/> |
|
19 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text"/> |
|
20 |
|
|
21 |
<!-- REDUCER --> |
|
22 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.propagation.country.institutionalrepositories.PropagationCountryFromDsOrgResultFileReducer"/> |
|
23 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/> |
|
24 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/> |
|
25 |
|
|
26 |
<!-- PARTITIONER --> |
|
27 |
<PROPERTY key="mapred.partitioner.class" value="eu.dnetlib.data.mapreduce.hbase.propagation.country.institutionalrepositories.NaturalInstOrgKeyPartitioner"/> |
|
28 |
<PROPERTY key="mapreduce.partitioner.class" value="eu.dnetlib.data.mapreduce.hbase.propagation.country.institutionalrepositories.NaturalInstOrgKeyPartitioner"/> |
|
29 |
<PROPERTY key="mapred.output.value.groupfn.class" value="eu.dnetlib.data.mapreduce.hbase.propagation.country.institutionalrepositories.NaturalInstOrgKeyGroupingComparator"/> |
|
30 |
<PROPERTY key="mapreduce.output.value.groupfn.class" value="eu.dnetlib.data.mapreduce.hbase.propagation.country.institutionalrepositories.NaturalInstOrgKeyGroupingComparator"/> |
|
31 |
|
|
32 |
<!-- MISC --> |
|
33 |
<PROPERTY key="mapred.compress.map.output" value="true"/> |
|
34 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
35 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
36 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
37 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
38 |
<PROPERTY key="mapred.reduce.tasks" value="20"/> |
|
39 |
|
|
40 |
<!-- CSV of datasource typologies considered in the processing --> |
|
41 |
<PROPERTY key="datasource.types" value="pubsrepository::institutional"/> |
|
42 |
|
|
43 |
<!-- <PROPERTY key="user.name" value="dnet" /> --><!-- Uncomment to override the default lib path --> |
|
44 |
<PROPERTY key="job.lib" value="/user/dnet/openaire/dnet-mapreduce-jobs-assembly-country_propagation-1.1.1-SNAPSHOT.jar"/> |
|
45 |
</STATIC_CONFIGURATION> |
|
46 |
<JOB_INTERFACE> |
|
47 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table"/> |
|
48 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table"/> |
|
49 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table"/> |
|
50 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table"/><!--<PARAM name="countryPropagation.conf" required="true" description="configuration for country propagation" />--> |
|
51 |
<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" /> |
|
52 |
</JOB_INTERFACE> |
|
53 |
<SCAN/> |
|
54 |
</HADOOP_JOB> |
|
55 |
<STATUS> |
|
56 |
<LAST_SUBMISSION_DATE value="2018-09-18T12:13:07+02:00"/> |
|
57 |
<RUNNING_INSTANCES value="1"/> |
|
58 |
<CUMULATIVE_RUN value="30"/> |
|
59 |
</STATUS> |
|
60 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
61 |
</BODY> |
|
62 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/brokerEnrichmentProjectsJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="17dd747e-f5f2-45d5-8554-9f70343bfe55_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="brokerEnrichmentProjectsJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that calculates the enrichment events based on the publications dedup results</DESCRIPTION> |
|
12 |
|
|
13 |
<STATIC_CONFIGURATION><!-- I/O FORMAT --> |
|
14 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/> |
|
15 |
<PROPERTY key="mapreduce.outputformat.class" value="org.elasticsearch.hadoop.mr.EsOutputFormat"/> |
|
16 |
|
|
17 |
<!-- MAPPER --> |
|
18 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.broker.enrich.ProjectEnrichmentMapper"/> |
|
19 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/> |
|
20 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/> |
|
21 |
|
|
22 |
<!-- REDUCER --> |
|
23 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.broker.enrich.ProjectEnrichmentReducer"/> |
|
24 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/> |
|
25 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/> |
|
26 |
|
|
27 |
<!-- MISC --> |
|
28 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
29 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
30 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
31 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
32 |
<PROPERTY key="dfs.blocksize" value="256M"/> |
|
33 |
<PROPERTY key="mapred.reduce.tasks" value="4"/> |
|
34 |
|
|
35 |
<PROPERTY key="broker.baseurl.publication" value="https://explore.openaire.eu/search/publication?articleId=%s"/> |
|
36 |
<PROPERTY key="broker.baseurl.dataset" value="https://explore.openaire.eu/search/dataset?datasetId=%s"/> |
|
37 |
<PROPERTY key="broker.baseurl.software" value="https://explore.openaire.eu/search/software?softwareId=%s"/> |
|
38 |
<PROPERTY key="broker.baseurl.other" value="https://explore.openaire.eu/search/other?orpId=%s"/> |
|
39 |
|
|
40 |
|
|
41 |
<!-- ES --> |
|
42 |
<PROPERTY key="es.nodes" value="ip-90-147-167-137.ct1.garrservices.it:9200,ip-90-147-167-126.ct1.garrservices.it:9200,ip-90-147-167-13.ct1.garrservices.it:9200,ip-90-147-167-125.ct1.garrservices.it:9200"/> |
|
43 |
<PROPERTY key="es.nodes.resolve.hostname" value="false"/> |
|
44 |
<PROPERTY key="es.batch.write.retry.count " value="10"/> |
|
45 |
<PROPERTY key="es.batch.size.entries " value="500"/> |
|
46 |
<PROPERTY key="es.nodes.wan.only" value="true"/> |
|
47 |
<PROPERTY key="es.resource" value="events_{infra}/event"/> |
|
48 |
<PROPERTY key="es.input.json" value="yes"/> |
|
49 |
<PROPERTY key="es.mapping.id" value="eventId"/> |
|
50 |
|
|
51 |
<!-- BROKER --> |
|
52 |
<PROPERTY key="broker.datasource.id.whitelist" value=""/> |
|
53 |
<PROPERTY key="broker.datasource.id.blacklist" value=""/> |
|
54 |
<PROPERTY key="broker.datasource.untrusted.oa.list" value="opendoar____::8b6dd7db9af49e67306feb59a8bdc52c"/> |
|
55 |
<PROPERTY key="broker.datasource.type.whitelist" value="pubsrepository::unknown,pubsrepository::institutional,pubsrepository::thematic"/> |
|
56 |
|
|
57 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
58 |
<!-- Uncomment to override the default lib path --> |
|
59 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
60 |
</STATIC_CONFIGURATION> |
|
61 |
<JOB_INTERFACE> |
|
62 |
<PARAM description="source hbase table" name="hbase.mapred.inputtable" required="true"/> |
|
63 |
<PARAM description="source hbase table" name="hbase.mapreduce.inputtable" required="true"/> |
|
64 |
</JOB_INTERFACE> |
|
65 |
<SCAN> |
|
66 |
<FILTERS operator="MUST_PASS_ONE"> |
|
67 |
<FILTER type="prefix" value="40" /> |
|
68 |
<FILTER type="prefix" value="50" /> |
|
69 |
</FILTERS> |
|
70 |
<FAMILIES> |
|
71 |
<FAMILY value="result" /> |
|
72 |
<FAMILY value="project" /> |
|
73 |
<FAMILY value="resultResult_dedup_isMergedIn" /> |
|
74 |
<FAMILY value="resultProject_outcome_produces" /> |
|
75 |
<FAMILY value="resultProject_outcome_isProducedBy" /> |
|
76 |
</FAMILIES> |
|
77 |
</SCAN> |
|
78 |
</HADOOP_JOB> |
|
79 |
<STATUS> |
|
80 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
81 |
<RUNNING_INSTANCES value="0"/> |
|
82 |
<CUMULATIVE_RUN value="0" /> |
|
83 |
</STATUS> |
|
84 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
85 |
</BODY> |
|
86 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupMinDistGraphJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="de888da6-2d10-4d42-a624-a44d4083414a_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="dedupMinDistGraphJob" type="mapreduce"> |
|
12 |
<DESCRIPTION>map reduce job that finds the minimum vertex in each connected component in the input graph (as adjacency lists)</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
|
|
15 |
<!-- I/O FORMAT --> |
|
16 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"/> |
|
17 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"/> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.MindistSearchMapper"/> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable"/> |
|
23 |
|
|
24 |
<!-- REDUCER --> |
|
25 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.MindistSearchReducer"/> |
|
26 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/> |
|
27 |
<PROPERTY key="mapred.output.value.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable"/> |
|
28 |
|
|
29 |
<!-- MISC --> |
|
30 |
<PROPERTY key="mapred.compress.map.output" value="false"/> |
|
31 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
32 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
33 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
34 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
35 |
|
|
36 |
<PROPERTY key="mapred.reduce.tasks" value="1"/> |
|
37 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
38 |
|
|
39 |
<!-- Uncomment to override the default lib path --> |
|
40 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
41 |
</STATIC_CONFIGURATION> |
|
42 |
<JOB_INTERFACE> |
|
43 |
<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs"/> |
|
44 |
<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs"/> |
|
45 |
</JOB_INTERFACE> |
|
46 |
<SCAN> |
|
47 |
<FILTERS/> |
|
48 |
<FAMILIES/> |
|
49 |
</SCAN> |
|
50 |
</HADOOP_JOB> |
|
51 |
<STATUS> |
|
52 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
53 |
<RUNNING_INSTANCES value="0"/> |
|
54 |
<CUMULATIVE_RUN value="0"/> |
|
55 |
</STATUS> |
|
56 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
57 |
</BODY> |
|
58 |
</RESOURCE_PROFILE> |
|
59 |
|
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupSimilarity2GraphJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="5907741a-d97e-41ea-9dbe-963209aa58d9_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="dedupSimilarity2GraphJob" type="mapreduce"> |
|
12 |
<DESCRIPTION>map only job that scans a given entity type and creates the similarRel graph as adjacency lists</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
|
|
15 |
<!-- I/O FORMAT --> |
|
16 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/> |
|
17 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"/> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.HBaseToSimilarityGraphMapper"/> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable"/> |
|
23 |
|
|
24 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/> |
|
25 |
<PROPERTY key="mapred.output.value.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable"/> |
|
26 |
|
|
27 |
<!-- MISC --> |
|
28 |
<PROPERTY key="mapred.compress.map.output" value="false"/> |
|
29 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
30 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
31 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
32 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
33 |
|
|
34 |
<PROPERTY key="mapred.reduce.tasks" value="0"/> |
|
35 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
36 |
|
|
37 |
<!-- Uncomment to override the default lib path --> |
|
38 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
39 |
</STATIC_CONFIGURATION> |
|
40 |
<JOB_INTERFACE> |
|
41 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table"/> |
|
42 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table"/> |
|
43 |
<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs"/> |
|
44 |
</JOB_INTERFACE> |
|
45 |
<SCAN> |
|
46 |
<FILTERS operator="MUST_PASS_ALL"> |
|
47 |
<FILTER type="prefix" param="entityTypeId"/> |
|
48 |
</FILTERS> |
|
49 |
<FAMILIES> |
|
50 |
<FAMILY value="resultResult_dedupSimilarity_isSimilarTo"/> |
|
51 |
<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo"/> |
|
52 |
</FAMILIES> |
|
53 |
</SCAN> |
|
54 |
</HADOOP_JOB> |
|
55 |
<STATUS> |
|
56 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
57 |
<RUNNING_INSTANCES value="0"/> |
|
58 |
<CUMULATIVE_RUN value="0"/> |
|
59 |
</STATUS> |
|
60 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
61 |
</BODY> |
|
62 |
</RESOURCE_PROFILE> |
|
63 |
|
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/publicationAnalysisJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="721fd82c-6444-41c9-ba23-5eb0652ddaeb_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="publicationAnalysisJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map only job that analyses publication features</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.NullOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.experiment.PublicationAnalysisMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.NullWritable" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.NullWritable" /> |
|
22 |
|
|
23 |
<!-- MISC --> |
|
24 |
|
|
25 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
26 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
27 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
28 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
29 |
|
|
30 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
31 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
32 |
|
|
33 |
<!-- Uncomment to override the default lib path --> |
|
34 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
35 |
</STATIC_CONFIGURATION> |
|
36 |
<JOB_INTERFACE> |
|
37 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
38 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
39 |
</JOB_INTERFACE> |
|
40 |
<SCAN> |
|
41 |
<FILTERS operator="MUST_PASS_ALL"> |
|
42 |
<FILTER type="prefix" value="50" /> |
|
43 |
</FILTERS> |
|
44 |
<FAMILIES> |
|
45 |
<FAMILY value="result" /> |
|
46 |
</FAMILIES> |
|
47 |
</SCAN> |
|
48 |
</HADOOP_JOB> |
|
49 |
<STATUS> |
|
50 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
51 |
<RUNNING_INSTANCES value="0"/> |
|
52 |
<CUMULATIVE_RUN value="0" /> |
|
53 |
</STATUS> |
|
54 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
55 |
</BODY> |
|
56 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupRootsPersonExportJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="4c63a9ab-057f-442c-8da2-9b956c41e645_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="dedupRootsPersonExportJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map only job that exports the representative publications as json</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.gt.RootPersonExportMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" /> |
|
22 |
|
|
23 |
|
|
24 |
<!-- MISC --> |
|
25 |
|
|
26 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
27 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
28 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
29 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
30 |
|
|
31 |
<PROPERTY key="dfs.blocksize" value="256M" /> |
|
32 |
|
|
33 |
<PROPERTY key="mapred.reduce.tasks" value="1" /> |
|
34 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
35 |
|
|
36 |
<!-- Uncomment to override the default lib path --> |
|
37 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
38 |
</STATIC_CONFIGURATION> |
|
39 |
<JOB_INTERFACE> |
|
40 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
41 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
42 |
|
|
43 |
<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" /> |
|
44 |
</JOB_INTERFACE> |
|
45 |
<SCAN> |
|
46 |
<FILTERS operator="MUST_PASS_ALL"> |
|
47 |
<FILTER type="prefix" param="entityTypeId" /> |
|
48 |
</FILTERS> |
|
49 |
<FAMILIES> |
|
50 |
<FAMILY param="entityType" /> |
|
51 |
</FAMILIES> |
|
52 |
</SCAN> |
|
53 |
</HADOOP_JOB> |
|
54 |
<STATUS> |
|
55 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
56 |
<RUNNING_INSTANCES value="0"/> |
|
57 |
<CUMULATIVE_RUN value="0" /> |
|
58 |
</STATUS> |
|
59 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
60 |
</BODY> |
|
61 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/mdStoreHdfsImportAuthorsJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="a53df5e3-8a38-4d3f-8f67-bf9fc43279a6_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="authorImportRecordsJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map only job that maps xml metada records from a sequence file into an hbase table</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataimport.AuthorImportRecordsMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put" /> |
|
22 |
|
|
23 |
<!-- MISC --> |
|
24 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
25 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
26 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
27 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
28 |
|
|
29 |
<!-- Uncomment to override the default lib path --> |
|
30 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
31 |
|
|
32 |
</STATIC_CONFIGURATION> |
|
33 |
<JOB_INTERFACE> |
|
34 |
<PARAM name="mapred.input.dir" required="true" description="input sequence file" /> |
|
35 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
36 |
<PARAM name="hbase.import.xslt" required="true" description="mapping" /> |
|
37 |
</JOB_INTERFACE> |
|
38 |
</HADOOP_JOB> |
|
39 |
<STATUS> |
|
40 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
41 |
<RUNNING_INSTANCES value="0"/> |
|
42 |
<CUMULATIVE_RUN value="0" /> |
|
43 |
</STATUS> |
|
44 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
45 |
</BODY> |
|
46 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/sqoopStatsUpdateJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="bf0ab07b-36bf-4164-ab73-342bfb11e51a_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="StatsExportJob" type="oozie"> |
|
11 |
<DESCRIPTION>Job for importing data from HBASE to the relational Stats Database</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- Cluster wide --> |
|
15 |
<PROPERTY key="queueName" value="default"/> |
|
16 |
<PROPERTY key="user.name" value="dnet"/> <!-- username = sqoop?? --> |
|
17 |
<PROPERTY key="workingDir" value="/user/dnet/lib/stats/working_dir"/> |
|
18 |
<PROPERTY key="numReducers" value="1"/> |
|
19 |
|
|
20 |
<PROPERTY key="oozie.wf.application.path" value="hdfs://nmis-hadoop-cluster/user/eri.katsari/stats/oozie_app"/><!-- edit this property! --> |
|
21 |
<PROPERTY key="Stats_db_Url" value="jdbc:postgresql://node1.t.openaire.research-infrastructures.eu:5432/stats"/><!-- complete the jdbc url with the actual value! --> |
|
22 |
<PROPERTY key="Stats_db_User" value="sqoop"/> |
|
23 |
<PROPERTY key="Stats_db_Pass" value="sqoop"/> |
|
24 |
<PROPERTY key="Stats_db_Driver" value="org.postgresql.Driver"/> |
|
25 |
<PROPERTY key="Stats_db_table_map" value="datasourceLanguage=datasource_languages,datasource=datasource,project=project,result=result,organization=organization,datasourceOrganization=datasource_organizations,datasourceTopic=datasource_topics,projectOrganization=project_organizations,resultClaim=result_claims,resultClassification=result_classifications,resultConcept=result_concepts,resultLanguage=result_languages,resultOrganization=result_organizations,resultResult=result_results,resultProject=project_results,resultTopic=result_topics,category=category,context=context,claim=claim,concept=concept,datasourceLanguage=datasource_languages,resultLanguage=result_languages,resultDatasource=result_datasources"/> |
|
26 |
<PROPERTY key="Stats_sqoop_RecsPerStatement" value="10000"/> |
|
27 |
<PROPERTY key="Stats_sqoop_StatementPerTrans" value="1000000"/> |
|
28 |
<PROPERTY key="Stats_sqoop_ReducersCount" value="4"/> |
|
29 |
<PROPERTY key="Stats_output_Path" value="/tmp/stats/"/> |
|
30 |
<PROPERTY key="Stats_null_String_Field" value="null"/> |
|
31 |
<PROPERTY key="Stats_null_Numeric_Field" value="null"/> |
|
32 |
<PROPERTY key="Stats_enclosing_Character" value="#"/> |
|
33 |
<PROPERTY key="Stats_delim_Character" value="!"/> |
|
34 |
<PROPERTY key="out1" value="datasource"/> |
|
35 |
<PROPERTY key="out2" value="project"/> |
|
36 |
<PROPERTY key="out3" value="organization"/> |
|
37 |
<PROPERTY key="out4" value="datasourceOrganization"/> |
|
38 |
<PROPERTY key="out5" value="datasourceTopic"/> |
|
39 |
<PROPERTY key="out6" value="datasourceLanguage"/> |
|
40 |
<PROPERTY key="out7" value="projectOrganization"/> |
|
41 |
<PROPERTY key="out8" value="resultClaim"/> |
|
42 |
<PROPERTY key="out9" value="resultClassification"/> |
|
43 |
<PROPERTY key="out10" value="resultConcept"/> |
|
44 |
<PROPERTY key="out11" value="resultLanguage"/> |
|
45 |
<PROPERTY key="out12" value="resultOrganization"/> |
|
46 |
<PROPERTY key="out13" value="resultResult"/> |
|
47 |
<PROPERTY key="out14" value="resultProject"/> |
|
48 |
<PROPERTY key="out15" value="category"/> |
|
49 |
<PROPERTY key="out16" value="resultTopic"/> |
|
50 |
<PROPERTY key="out17" value="resultDatasource"/> |
|
51 |
<PROPERTY key="out18" value="result"/> |
|
52 |
<PROPERTY key="out19" value="claim"/> |
|
53 |
<PROPERTY key="out20" value="concept"/> |
|
54 |
</STATIC_CONFIGURATION> |
|
55 |
<JOB_INTERFACE> |
|
56 |
<PARAM name="nameNode" required="true" description="hdfs name node"/> |
|
57 |
<PARAM name="jobTracker" required="true" description="job tracker name"/> |
|
58 |
<PARAM name="Stats_Hbase_Source_Table" required="true" description="Hbase Table with Protobuffs."/> |
|
59 |
<PARAM name="Stats_indexConf" required="true" description="Index Entity Links configuration."/> |
|
60 |
<PARAM name="isLookupEndpoint" required="true" description="IS lookup service endpoint"/> |
|
61 |
</JOB_INTERFACE> |
|
62 |
</HADOOP_JOB> |
|
63 |
<STATUS> |
|
64 |
<LAST_SUBMISSION_DATE value="2014-11-14T19:57:25+00:00"/> |
|
65 |
<RUNNING_INSTANCES value="0"/> |
|
66 |
<CUMULATIVE_RUN value="75"/> |
|
67 |
</STATUS> |
|
68 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
69 |
</BODY> |
|
70 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/predatoryJournalsJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="bab7a0b8-66b4-4e1a-a8d4-0bb9b3493f90_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="predatoryJournalsJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that counts the number of publications from journals linked to EC projects</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.misc.PredatoryJournalsMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
22 |
|
|
23 |
<!-- REDUCER --> |
|
24 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.misc.PredatoryJournalsReducer" /> |
|
25 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" /> |
|
26 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" /> |
|
27 |
|
|
28 |
<!-- MISC --> |
|
29 |
<PROPERTY key="mapred.output.compress" value="false" /> |
|
30 |
<PROPERTY key="mapred.compress.map.output" value="true" /> |
|
31 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
32 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
33 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
34 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
35 |
|
|
36 |
<PROPERTY key="mapred.reduce.tasks" value="1" /> |
|
37 |
<PROPERTY key="dfs.blocksize" value="256M" /> |
|
38 |
|
|
39 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
40 |
|
|
41 |
<!-- Uncomment to override the default lib path --> |
|
42 |
<PROPERTY key="job.lib" value="/lib/dnet/snapshots/dnet-mapreduce-jobs-assembly-1.1.4-BETA-SNAPSHOT-predatoryJournals.jar"/> |
|
43 |
</STATIC_CONFIGURATION> |
|
44 |
<JOB_INTERFACE> |
|
45 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
46 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
47 |
<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" /> |
|
48 |
</JOB_INTERFACE> |
|
49 |
<SCAN> |
|
50 |
<FILTERS /> |
|
51 |
<FAMILIES /> |
|
52 |
</SCAN> |
|
53 |
</HADOOP_JOB> |
|
54 |
<STATUS> |
|
55 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
56 |
<RUNNING_INSTANCES value="0"/> |
|
57 |
<CUMULATIVE_RUN value="0" /> |
|
58 |
</STATUS> |
|
59 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
60 |
</BODY> |
|
61 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/coauthorUpdateJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="6d91b311-a7fd-48ff-98d2-1fed70850e3a_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="coauthorUpdateJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>update coauthors using a map {merged author id --> anchorId}</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
17 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.gt.CoAuthorUpdateMapper" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put" /> |
|
23 |
|
|
24 |
|
|
25 |
<!-- MISC --> |
|
26 |
<PROPERTY key="mapred.output.compress" value="false" /> |
|
27 |
<PROPERTY key="mapred.compress.map.output" value="true" /> |
|
28 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
29 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
30 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
31 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
32 |
|
|
33 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
34 |
|
|
35 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
36 |
|
|
37 |
<!-- Uncomment to override the default lib path --> |
|
38 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
39 |
</STATIC_CONFIGURATION> |
|
40 |
<JOB_INTERFACE> |
|
41 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
42 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
43 |
<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" /> |
|
44 |
</JOB_INTERFACE> |
|
45 |
<SCAN> |
|
46 |
<FILTERS operator="MUST_PASS_ALL"> |
|
47 |
<FILTER type="prefix" value="30"/> |
|
48 |
</FILTERS> |
|
49 |
<FAMILIES> |
|
50 |
<FAMILY value="person"/> |
|
51 |
</FAMILIES> |
|
52 |
</SCAN> |
|
53 |
</HADOOP_JOB> |
|
54 |
<STATUS> |
|
55 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
56 |
<RUNNING_INSTANCES value="0"/> |
|
57 |
<CUMULATIVE_RUN value="0" /> |
|
58 |
</STATUS> |
|
59 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
60 |
</BODY> |
|
61 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupPersonJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="29638605-235b-4cc1-9bf5-a5dd2fc84915_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="dedupPersonJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that scans a given entity type and creates the similarRel graph</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.SimpleDedupPersonMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
22 |
|
|
23 |
<!-- REDUCER --> |
|
24 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.SimpleDedupPersonReducer" /> |
|
25 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
26 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Writable" /> |
|
27 |
|
|
28 |
<!-- MISC --> |
|
29 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
30 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
31 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
32 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
33 |
|
|
34 |
<PROPERTY key="mapred.reduce.tasks" value="1000" /> |
|
35 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
36 |
|
|
37 |
<!-- Uncomment to override the default lib path --> |
|
38 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
39 |
</STATIC_CONFIGURATION> |
|
40 |
<JOB_INTERFACE> |
|
41 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
42 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
43 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
44 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
45 |
</JOB_INTERFACE> |
|
46 |
<SCAN> |
|
47 |
<FILTERS operator="MUST_PASS_ALL"> |
|
48 |
<FILTER type="prefix" param="entityTypeId" /> |
|
49 |
</FILTERS> |
|
50 |
<FAMILIES> |
|
51 |
<FAMILY param="entityType" /> |
|
52 |
</FAMILIES> |
|
53 |
</SCAN> |
|
54 |
</HADOOP_JOB> |
|
55 |
<STATUS> |
|
56 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
57 |
<RUNNING_INSTANCES value="0"/> |
|
58 |
<CUMULATIVE_RUN value="0" /> |
|
59 |
</STATUS> |
|
60 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
61 |
</BODY> |
|
62 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupRootsExportJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="53f2a9b4-adf3-4ceb-9308-d88b53dc44c5_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="dedupRootsExportJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map only job that exports the representative publications as json</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.gt.RootExportMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" /> |
|
22 |
|
|
23 |
|
|
24 |
<!-- MISC --> |
|
25 |
|
|
26 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
27 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
28 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
29 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
30 |
|
|
31 |
<PROPERTY key="dfs.blocksize" value="256M" /> |
|
32 |
|
|
33 |
<PROPERTY key="mapred.reduce.tasks" value="1" /> |
|
34 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
35 |
|
|
36 |
<!-- Uncomment to override the default lib path --> |
|
37 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
38 |
</STATIC_CONFIGURATION> |
|
39 |
<JOB_INTERFACE> |
|
40 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
41 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
42 |
|
|
43 |
<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" /> |
|
44 |
</JOB_INTERFACE> |
|
45 |
<SCAN> |
|
46 |
<FILTERS operator="MUST_PASS_ALL"> |
|
47 |
<FILTER type="prefix" param="entityTypeId" /> |
|
48 |
</FILTERS> |
|
49 |
<FAMILIES> |
|
50 |
<FAMILY param="entityType" /> |
|
51 |
</FAMILIES> |
|
52 |
</SCAN> |
|
53 |
</HADOOP_JOB> |
|
54 |
<STATUS> |
|
55 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
56 |
<RUNNING_INSTANCES value="0"/> |
|
57 |
<CUMULATIVE_RUN value="0" /> |
|
58 |
</STATUS> |
|
59 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
60 |
</BODY> |
|
61 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/informationSpaceImportJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="b7d51a07-6996-4841-9a4a-685a044638e3_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="informationSpaceImportJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map only job that import the whole information space table from a sequence file holding a json dump</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataimport.ImportInformationSpaceDumpMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put" /> |
|
22 |
|
|
23 |
<!-- MISC --> |
|
24 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
25 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
26 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
27 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
28 |
|
|
29 |
<!-- Uncomment to override the default lib path --> |
|
30 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
31 |
</STATIC_CONFIGURATION> |
|
32 |
<JOB_INTERFACE> |
|
33 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
34 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
35 |
|
|
36 |
<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs" /> |
|
37 |
</JOB_INTERFACE> |
|
38 |
<SCAN> |
|
39 |
<FILTERS /> |
|
40 |
<FAMILIES /> |
|
41 |
</SCAN> |
|
42 |
</HADOOP_JOB> |
|
43 |
<STATUS> |
|
44 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
45 |
<RUNNING_INSTANCES value="0"/> |
|
46 |
<CUMULATIVE_RUN value="0" /> |
|
47 |
</STATUS> |
|
48 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
49 |
</BODY> |
|
50 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/xmlRecordCounterJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="16845094-5871-4584-b934-590b5d005836_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2018-10-22T10:34:48+00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="xmlRecordCounterJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map only job that count information from the index xml records</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION><!-- I/O FORMAT --> |
|
13 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"/> |
|
14 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.NullOutputFormat"/><!-- MAPPER --> |
|
15 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.index.CountXmlRecordsMapper"/> |
|
16 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.NullWritable"/> |
|
17 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.NullWritable"/><!-- JOB GLOBAL --> |
|
18 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.NullWritable"/> |
|
19 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.NullWritable"/><!-- MISC --> |
|
20 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
21 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
22 |
<PROPERTY key="mapred.reduce.tasks" value="0"/><!-- <PROPERTY key="user.name" value="dnet" /> --><!-- Uncomment to override the default lib path --> |
|
23 |
<!-- <PROPERTY key="job.lib" value="/lib/dnet/snapshots/dnet-mapreduce-jobs-assembly-1.1.4-BETA-SNAPSHOT-FilterXmlRecordsMapper.jar"/> --> |
|
24 |
</STATIC_CONFIGURATION> |
|
25 |
<JOB_INTERFACE> |
|
26 |
<PARAM description="input sequence file" name="mapred.input.dir" required="true"/> |
|
27 |
</JOB_INTERFACE> |
|
28 |
</HADOOP_JOB> |
|
29 |
<STATUS> |
|
30 |
<LAST_SUBMISSION_DATE value="2018-10-22T10:35:08+00:00"/> |
|
31 |
<RUNNING_INSTANCES value="0"/> |
|
32 |
<CUMULATIVE_RUN value="20"/> |
|
33 |
</STATUS> |
|
34 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
35 |
</BODY> |
|
36 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/brokerAdditionJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="a525f9b5-b3e5-495d-921a-a0abe734039d_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="brokerAdditionJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map job that calculates the addition events</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/> |
Also available in: Unified diff
[maven-release-plugin] copy for tag dnet-openaireplus-profiles-1.0.27