Revision 54159
Added by Claudio Atzori over 5 years ago
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupMarkDeletedEntityJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="667fe203-ee51-4dff-8c9c-b90e66e96eb4_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="dedupMarkDeletedEntityJob" type="mapreduce"> |
|
12 |
<DESCRIPTION>map only job that closes the similarity mesh</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
|
|
15 |
<!-- I/O FORMAT --> |
|
16 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/> |
|
17 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat"/> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupMarkDeletedEntityMapper"/> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put"/> |
|
23 |
|
|
24 |
<!-- MISC --> |
|
25 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
26 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
27 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
28 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
29 |
|
|
30 |
<PROPERTY key="mapred.reduce.tasks" value="0"/> |
|
31 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
32 |
|
|
33 |
<!-- Uncomment to override the default lib path --> |
|
34 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
35 |
</STATIC_CONFIGURATION> |
|
36 |
<JOB_INTERFACE> |
|
37 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table"/> |
|
38 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table"/> |
|
39 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table"/> |
|
40 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table"/> |
|
41 |
</JOB_INTERFACE> |
|
42 |
<SCAN> |
|
43 |
<FILTERS operator="MUST_PASS_ALL"> |
|
44 |
<FILTER type="prefix" param="entityTypeId"/> |
|
45 |
</FILTERS> |
|
46 |
<FAMILIES> |
|
47 |
<FAMILY param="entityType"/> |
|
48 |
<FAMILY value="resultResult_dedup_isMergedIn"/> |
|
49 |
<FAMILY value="organizationOrganization_dedup_isMergedIn"/> |
|
50 |
</FAMILIES> |
|
51 |
</SCAN> |
|
52 |
</HADOOP_JOB> |
|
53 |
<STATUS> |
|
54 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
55 |
<RUNNING_INSTANCES value="0"/> |
|
56 |
<CUMULATIVE_RUN value="0"/> |
|
57 |
</STATUS> |
|
58 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
59 |
</BODY> |
|
60 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupSimilarity2HdfsActionsJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="baa63c0c-0ff3-4a15-93c1-e361800e9ca8_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="dedupSimilarity2HdfsActionsJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that scans a given entity type and creates the similarRel graph</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupSimilarityToHdfsActionsMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" /> |
|
22 |
|
|
23 |
<!-- OUTPUT --> |
|
24 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" /> |
|
25 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" /> |
|
26 |
|
|
27 |
<!-- MISC --> |
|
28 |
<PROPERTY key="mapred.compress.map.output" value="true" /> |
|
29 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
30 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
31 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
32 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
33 |
|
|
34 |
<PROPERTY key="mapred.output.compress" value="true"/> |
|
35 |
<PROPERTY key="mapred.output.compression.type" value="BLOCK"/> |
|
36 |
|
|
37 |
<PROPERTY key="mapred.reduce.tasks" value="10" /> |
|
38 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
39 |
|
|
40 |
<!-- Uncomment to override the default lib path --> |
|
41 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
42 |
</STATIC_CONFIGURATION> |
|
43 |
<JOB_INTERFACE> |
|
44 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
45 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
46 |
<PARAM name="mapred.output.dir" required="true" description="target hbase table" /> |
|
47 |
<PARAM name="dedup.conf" required="true" description="dedup configuration" /> |
|
48 |
<PARAM name="rawSetId" required="true" description="raw set identifier" /> |
|
49 |
<PARAM name="similarityCF" required="true" description="similarity column family name" /> |
|
50 |
</JOB_INTERFACE> |
|
51 |
<SCAN> |
|
52 |
<FILTERS operator="MUST_PASS_ALL"> |
|
53 |
<FILTER type="prefix" param="entityTypeId" /> |
|
54 |
</FILTERS> |
|
55 |
<FAMILIES> |
|
56 |
<FAMILY param="entityType" /> |
|
57 |
<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" /> |
|
58 |
<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" /> |
|
59 |
</FAMILIES> |
|
60 |
</SCAN> |
|
61 |
</HADOOP_JOB> |
|
62 |
<STATUS> |
|
63 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
64 |
<RUNNING_INSTANCES value="0"/> |
|
65 |
<CUMULATIVE_RUN value="0" /> |
|
66 |
</STATUS> |
|
67 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
68 |
</BODY> |
|
69 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisPreprocessingJobV2.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="51e1660e-b1fe-4389-96b5-11a522b78c15_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2016-10-15T12:03:30+00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="iisPreprocessingJobV2" type="oozie"> |
|
11 |
<DESCRIPTION>IIS preprocessing</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
<!-- Cluster wide --> |
|
14 |
<PROPERTY key="user.name" value="dnet.production"/> |
|
15 |
|
|
16 |
<!-- Runtime --> |
|
17 |
<PROPERTY key="metadataextraction_default_cache_location" value="/user/${user.name}/iis/cache/metadataextraction"/> |
|
18 |
<!-- skipping _default chunk from property name since 2017.02.21 --> |
|
19 |
<PROPERTY key="metadataextraction_cache_location" value="/user/${user.name}/iis/cache/metadataextraction"/> |
|
20 |
<PROPERTY key="ingest_pmc_cache_location" value="/user/${user.name}/iis/cache/ingestpmc"/> |
|
21 |
<PROPERTY key="export_trust_level_threshold_document_referencedDatasets" value="0.0"/> |
|
22 |
<PROPERTY key="export_trust_level_threshold_document_referencedProjects" value="0.0"/> |
|
23 |
<PROPERTY key="output_remote_location" value="webhdfs://namenode.hadoop.dm.openaire.eu:50071/var/lib/dnet/actionManager_PROD"/> |
|
24 |
<PROPERTY key="reports_external_path" value="/user/${user.name}/iis/reports/${execution_environment}"/> |
|
25 |
<PROPERTY key="oozieServiceLoc" value="http://iis-cdh5-test-m3:11000/oozie"/> |
|
26 |
<PROPERTY key="import_project_stream_endpoint_url" value="http://services.openaire.eu:8980/provision/mvc/openaire/export/streamProjectDetails.do"/> |
|
27 |
</STATIC_CONFIGURATION> |
|
28 |
<JOB_INTERFACE> |
|
29 |
<PARAM description="mdstore service location" name="import_content_object_store_location" required="true"/> |
|
30 |
<PARAM description="mdstore service location" name="import_mdstore_service_location" required="true"/> |
|
31 |
<PARAM description="mdstore id for dataset records" name="import_dataset_mdstore_ids_csv" required="true"/> |
|
32 |
<PARAM description="mdstore id for WoS records" name="import_wos_mdstore_id" required="true"/> |
|
33 |
<PARAM description="database service endpoint" name="import_database_service_location" required="true"/> |
|
34 |
<PARAM description="objecstore ids subject to dataset reference extraction" name="import_content_datacite_objectstores_csv" required="true"/> |
|
35 |
<PARAM description="objecstore ids for WoS plaintext" name="import_content_wos_plaintext_objectstores_csv" required="true"/> |
|
36 |
<PARAM description="oozie job application absolute path" name="oozie.wf.application.path" required="true"/> |
|
37 |
<PARAM description="target action set for project references" name="export_action_set_id_document_referencedProjects" required="true"/> |
|
38 |
<PARAM description="target action set for dataset references" name="export_action_set_id_document_referencedDatasets" required="true"/> |
|
39 |
<PARAM description="target action set for WoS entities" name="export_action_set_id_entity_wos" required="true"/> |
|
40 |
<PARAM description="target action set for dataset entities" name="export_action_set_id_entity_dataset" required="true"/> |
|
41 |
</JOB_INTERFACE> |
|
42 |
</HADOOP_JOB> |
|
43 |
<STATUS> |
|
44 |
<LAST_SUBMISSION_DATE value="2017-07-20T07:13:23+00:00"/> |
|
45 |
<RUNNING_INSTANCES value="1"/> |
|
46 |
<CUMULATIVE_RUN value="23"/> |
|
47 |
</STATUS> |
|
48 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
49 |
</BODY> |
|
50 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/prepareBrokerDataJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="e0f1b6fa-e2bd-445d-9c37-e48ed8a8561b_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="prepareBrokerDataJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that joins the entities on the hbase table and produces a sequence file containig the joined OAF objects</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.index.PrepareFeedMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
22 |
|
|
23 |
<!-- REDUCER --> |
|
24 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.index.PrepareFeedReducer" /> |
|
25 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" /> |
|
26 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" /> |
|
27 |
|
|
28 |
<!-- MISC --> |
|
29 |
<PROPERTY key="mapred.output.compress" value="false" /> |
|
30 |
<PROPERTY key="mapred.compress.map.output" value="true" /> |
|
31 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
32 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
33 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
34 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
35 |
|
|
36 |
<PROPERTY key="mapred.reduce.tasks" value="100" /> |
|
37 |
<PROPERTY key="dfs.blocksize" value="256M" /> |
|
38 |
|
|
39 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
40 |
|
|
41 |
<!-- Uncomment to override the default lib path --> |
|
42 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
43 |
</STATIC_CONFIGURATION> |
|
44 |
<JOB_INTERFACE> |
|
45 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
46 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
47 |
<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" /> |
|
48 |
|
|
49 |
<PARAM name="index.entity.links" required="true" description="entity joiner configuration" /> |
|
50 |
<PARAM name="contextmap" required="true" description="context map (ContextDSResources)" /> |
|
51 |
</JOB_INTERFACE> |
|
52 |
<SCAN> |
|
53 |
<FILTERS /> |
|
54 |
<FAMILIES /> |
|
55 |
</SCAN> |
|
56 |
</HADOOP_JOB> |
|
57 |
<STATUS> |
|
58 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
59 |
<RUNNING_INSTANCES value="0"/> |
|
60 |
<CUMULATIVE_RUN value="0" /> |
|
61 |
</STATUS> |
|
62 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
63 |
</BODY> |
|
64 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisMainJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="f6e4cbcd-b173-4f8d-9205-e64ba15f03ad_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="iisMainJob" type="oozie"> |
|
12 |
<DESCRIPTION>IIS main workflow</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
|
|
15 |
<!-- Cluster wide --> |
|
16 |
<PROPERTY key="queueName" value="default"/> |
|
17 |
<PROPERTY key="user.name" value="dnet.beta"/> |
|
18 |
|
|
19 |
<!-- Runtime --> |
|
20 |
<PROPERTY key="match_content_with_metadata" value="true"/> |
|
21 |
<PROPERTY key="import_hbase_approved_datasources_csv" value="$UNDEFINED$"/> |
|
22 |
<PROPERTY key="import_hbase_input_table" value="information_space-2014-11-05"/> |
|
23 |
<!--<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction"/> --> |
|
24 |
<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction_updated_model"/> |
|
25 |
|
|
26 |
<PROPERTY key="export_action_hbase_table_initialize" value="true"/> |
|
27 |
<PROPERTY key="import_content_connection_timeout" value="180000"/> |
|
28 |
<PROPERTY key="import_content_read_timeout" value="180000"/> |
|
29 |
<PROPERTY key="import_resultset_client_read_timeout" value="180000"/> |
|
30 |
|
|
31 |
<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/> |
|
32 |
<PROPERTY key="workingDir" value="/tmp/integration/working_dir/main"/> |
|
33 |
<PROPERTY key="metadataextraction_excluded_checksums" |
|
34 |
value="1e5b574109da731f4918c7f91fc24864,bea4728578070c3d66774bf9454d41fe,da458477233b5561ae47042aa2a73086"/> |
|
35 |
<PROPERTY key="export_documentssimilarity_threshold" value="0.7"/> |
|
36 |
<PROPERTY key="export_trust_level_threshold_document_classes" value="0.5"/> |
|
37 |
<PROPERTY key="export_trust_level_threshold_document_referencedDatasets" value="0.0"/> |
|
38 |
<PROPERTY key="export_trust_level_threshold_document_referencedProjects" value="0.0"/> |
|
39 |
<PROPERTY key="export_trust_level_threshold_document_pdb" value="0.0"/> |
|
40 |
<PROPERTY key="output_remote_location" value="hdfs://namenode.hadoop.dm.openaire.eu/var/lib/dnet/actionManager_BETA"/> |
|
41 |
</STATIC_CONFIGURATION> |
|
42 |
<JOB_INTERFACE> |
|
43 |
<PARAM name="import_content_object_store_location" required="true" description="objectStore service endpoint"/> |
|
44 |
<PARAM name="import_content_objectstores_csv" required="true" description="csv list of the available object stores subject to processing"/> |
|
45 |
<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location"/> |
|
46 |
<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore ids for dataset records"/> |
|
47 |
<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table"/> |
|
48 |
<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum"/> |
|
49 |
<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port"/> |
|
50 |
<PARAM name="nameNode" required="true" description="hdfs name node"/> |
|
51 |
<PARAM name="jobTracker" required="true" description="job tracker name"/> |
|
52 |
<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path"/> |
|
53 |
<PARAM description="target action set for referenced projects" name="export_action_set_id_document_referencedProjects" required="true"/> |
|
54 |
<PARAM description="target action set for referenced datasets" name="export_action_set_id_document_referencedDatasets" required="true"/> |
|
55 |
<PARAM description="target action set for research initiative outcome" name="export_action_set_id_document_research_initiative" |
|
56 |
required="true"/> |
|
57 |
<PARAM description="target action set for document similarities" name="export_action_set_id_document_similarities_standard" required="true"/> |
|
58 |
<PARAM description="target action set for document statistics" name="export_action_set_id_document_statistics" required="true"/> |
|
59 |
<PARAM description="target action set for document classification" name="export_action_set_id_document_classes" required="true"/> |
|
60 |
<PARAM description="target action set for document citations" name="export_action_set_id_document_referencedDocuments" required="true"/> |
|
61 |
<PARAM description="target action set for dataset entities" name="export_action_set_id_entity_dataset" required="true"/> |
|
62 |
<PARAM description="target action set for pdb entities" name="export_action_set_id_document_pdb" required="true"/> |
|
63 |
|
|
64 |
<!-- flags to enable/disable IIS modules --> |
|
65 |
<PARAM name="active_metadataextraction_export" required="true" description="enable/disable the affiliation matching module"/> |
|
66 |
<PARAM name="active_citationmatching" required="true" description="enable/disable the citation matching module"/> |
|
67 |
<PARAM name="active_documentsclassification" required="true" description="enable/disable the document classification module"/> |
|
68 |
<PARAM name="active_documentssimilarity" required="true" description="enable/disable the document similarity module"/> |
|
69 |
<PARAM name="active_referenceextraction_dataset" required="true" description="enable/disable the dataset reference extraction module"/> |
|
70 |
<PARAM name="active_referenceextraction_project" required="true" description="enable/disable the project reference extracion module"/> |
|
71 |
<PARAM name="active_referenceextraction_researchinitiative" required="true" |
|
72 |
description="enable/disable the research initiative extraction module"/> |
|
73 |
<PARAM name="active_statistics" required="true" description="enable/disable the statistics module"/> |
|
74 |
<PARAM name="active_referenceextraction_pdb" required="true" description="enable/disable the protein data bank extraction module"/> |
|
75 |
|
|
76 |
</JOB_INTERFACE> |
|
77 |
</HADOOP_JOB> |
|
78 |
<STATUS> |
|
79 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
80 |
<RUNNING_INSTANCES value="0"/> |
|
81 |
<CUMULATIVE_RUN value="0"/> |
|
82 |
</STATUS> |
|
83 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
84 |
</BODY> |
|
85 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupExportPersonFullnameJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="ba309300-76f2-40d1-afe3-b77016f443e9_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="exportPersonFullnamesJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that exports the person fullnames on a text file on HDFS</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.preprocess.ExportFullnameMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" /> |
|
22 |
|
|
23 |
<!-- REDUCER --> |
|
24 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.preprocess.ExportFullnameReducer" /> |
|
25 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" /> |
|
26 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" /> |
|
27 |
|
|
28 |
<!-- MISC --> |
|
29 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
30 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
31 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
32 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
33 |
|
|
34 |
<PROPERTY key="mapred.reduce.tasks" value="1" /> |
|
35 |
<PROPERTY key="dfs.blocksize" value="256M" /> |
|
36 |
<PROPERTY key="mapred.textoutputformat.separator" value="" /> |
|
37 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
38 |
|
|
39 |
<!-- Uncomment to override the default lib path --> |
|
40 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
41 |
</STATIC_CONFIGURATION> |
|
42 |
<JOB_INTERFACE> |
|
43 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
44 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
45 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
46 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
47 |
</JOB_INTERFACE> |
|
48 |
<SCAN caching="10"> |
|
49 |
<FILTERS operator="MUST_PASS_ALL"> |
|
50 |
<FILTER type="prefix" param="entityTypeId" /> |
|
51 |
</FILTERS> |
|
52 |
<FAMILIES/> |
|
53 |
</SCAN> |
|
54 |
</HADOOP_JOB> |
|
55 |
<STATUS> |
|
56 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
57 |
<RUNNING_INSTANCES value="0"/> |
|
58 |
<CUMULATIVE_RUN value="0" /> |
|
59 |
</STATUS> |
|
60 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
61 |
</BODY> |
|
62 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/personCsvJoinJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="3f544a36-f123-4f5c-acf4-7c25f6591ec4_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="personCsvJoinJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that joins person entities by "surname+first name letter" and serialise the output as csv</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat" /> |
|
17 |
<PROPERTY key="mapreduce.output.lazyoutputformat.outputformat" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" /> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.experiment.JoinPersonGroupMapper" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" /> |
|
23 |
|
|
24 |
<!-- REDUCER --> |
|
25 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.experiment.JoinPersonGroupReducer" /> |
|
26 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" /> |
|
27 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" /> |
|
28 |
|
|
29 |
<!-- MISC --> |
|
30 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
31 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
32 |
<PROPERTY key="mapred.reduce.tasks" value="10" /> |
|
33 |
|
|
34 |
<PROPERTY key="hash.values.csv" value="manghip,pmanghi,corog,gcoro" /> |
|
35 |
|
|
36 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
37 |
|
|
38 |
<!-- Uncomment to override the default lib path --> |
|
39 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
40 |
|
|
41 |
</STATIC_CONFIGURATION> |
|
42 |
<JOB_INTERFACE> |
|
43 |
<PARAM name="mapred.input.dir" required="true" description="input sequence file" /> |
|
44 |
</JOB_INTERFACE> |
|
45 |
</HADOOP_JOB> |
|
46 |
<STATUS> |
|
47 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
48 |
<RUNNING_INSTANCES value="0"/> |
|
49 |
<CUMULATIVE_RUN value="0" /> |
|
50 |
</STATUS> |
|
51 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
52 |
</BODY> |
|
53 |
</RESOURCE_PROFILE> |
|
54 |
|
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/elasticsearchTestJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="719b5d2b-4156-4936-bbc3-41d908ec3c57_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="elastisearchTestJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map only job that indexes over ES</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.elasticsearch.hadoop.mr.EsOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.es.ElasticsearchFeedMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.NullWritable" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.BytesWritable" /> |
|
22 |
|
|
23 |
|
|
24 |
<!-- MISC --> |
|
25 |
<PROPERTY key="es.nodes" value="146.48.87.110:9200" /> |
|
26 |
<PROPERTY key="es.resource" value="openaire/oaf" /> |
|
27 |
<PROPERTY key="es.input.json" value="yes" /> |
|
28 |
|
|
29 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
30 |
|
|
31 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
32 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
33 |
|
|
34 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
35 |
|
|
36 |
<!-- Uncomment to override the default lib path --> |
|
37 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
38 |
</STATIC_CONFIGURATION> |
|
39 |
<JOB_INTERFACE> |
|
40 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
41 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
42 |
|
|
43 |
<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" /> |
|
44 |
</JOB_INTERFACE> |
|
45 |
<SCAN> |
|
46 |
<FILTERS /> |
|
47 |
<FAMILIES /> |
|
48 |
</SCAN> |
|
49 |
</HADOOP_JOB> |
|
50 |
<STATUS> |
|
51 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
52 |
<RUNNING_INSTANCES value="0"/> |
|
53 |
<CUMULATIVE_RUN value="0" /> |
|
54 |
</STATUS> |
|
55 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
56 |
</BODY> |
|
57 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/promoteMultipleActionSets.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<RESOURCE_PROFILE> |
|
3 |
<HEADER> |
|
4 |
<RESOURCE_IDENTIFIER value="fada2678-061e-4139-9444-549eff793126_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB type="mapreduce" name="promoteMultipleActionSetsJob"> |
|
12 |
<DESCRIPTION>map only job that promote actions from a set on HDFS to the data table</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.DelegatingInputFormat"/> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat"/> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.actions2.PromoteActionSetFromHDFSMapper"/> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/> |
|
21 |
<PROPERTY value="org.apache.hadoop.hbase.client.Put" key="mapred.mapoutput.value.class"/> |
|
22 |
|
|
23 |
<!-- MISC --> |
|
24 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
25 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
26 |
<PROPERTY value="false" key="mapreduce.map.speculative"/> |
|
27 |
<PROPERTY value="false" key="mapreduce.reduce.speculative"/> |
|
28 |
<PROPERTY value="true" key="mapred.output.compress"/> |
|
29 |
<PROPERTY value="BLOCK" key="mapred.output.compression.type"/> |
|
30 |
<PROPERTY value="0" key="mapred.reduce.tasks"/> |
|
31 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
32 |
|
|
33 |
<!-- Uncomment to override the default lib path --> |
|
34 |
<!-- <PROPERTY key="job.lib" value="/lib/dnet/snapshots/dnet-mapreduce-jobs-assembly-0.0.8.5-SNAPSHOT-exportIds.jar"/> --> |
|
35 |
</STATIC_CONFIGURATION> |
|
36 |
<JOB_INTERFACE> |
|
37 |
<PARAM required="true" name="mapred.input.dir.formats" description="source sequence file on hdfs"/> |
|
38 |
<PARAM name="hbase.mapred.outputtable" required="true" description="source hbase table"/> |
|
39 |
</JOB_INTERFACE> |
|
40 |
</HADOOP_JOB> |
|
41 |
<STATUS> |
|
42 |
<LAST_SUBMISSION_DATE value="2016-04-14T18:22:06+02:00"/> |
|
43 |
<RUNNING_INSTANCES value="0"/> |
|
44 |
<CUMULATIVE_RUN value="4"/> |
|
45 |
</STATUS> |
|
46 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
47 |
</BODY> |
|
48 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupGTCleanerJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="888ef72f-701a-4d59-8b8a-2ad01986f975_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="gtCleanerJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that deletes the non-GT rows</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.gt.GTCleanerMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Delete" /> |
|
22 |
|
|
23 |
<!-- MISC --> |
|
24 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
25 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
26 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
27 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
28 |
|
|
29 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
30 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
31 |
|
|
32 |
<!-- Uncomment to override the default lib path --> |
|
33 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
34 |
</STATIC_CONFIGURATION> |
|
35 |
<JOB_INTERFACE> |
|
36 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
37 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
38 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
39 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
40 |
</JOB_INTERFACE> |
|
41 |
<SCAN> |
|
42 |
<FILTERS operator="MUST_PASS_ALL"> |
|
43 |
<FILTER type="prefix" param="entityTypeId" /> |
|
44 |
</FILTERS> |
|
45 |
<FAMILIES> |
|
46 |
<FAMILY param="entityType" /> |
|
47 |
<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" /> |
|
48 |
<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" /> |
|
49 |
<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" /> |
|
50 |
</FAMILIES> |
|
51 |
</SCAN> |
|
52 |
</HADOOP_JOB> |
|
53 |
<STATUS> |
|
54 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
55 |
<RUNNING_INSTANCES value="0"/> |
|
56 |
<CUMULATIVE_RUN value="0" /> |
|
57 |
</STATUS> |
|
58 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
59 |
</BODY> |
|
60 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/promoteSingleActionSet.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="54ee1986-50a3-4e5b-a0d3-b411b12c7329_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="promoteSingleActionSetJob" type="mapreduce"> |
|
12 |
<DESCRIPTION>map only job that imports the inference actions in the main hbase table</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
|
|
15 |
<!-- I/O FORMAT --> |
|
16 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"/> |
|
17 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat"/> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.actions2.PromoteSingleActionSetMapper"/> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put"/> |
|
23 |
|
|
24 |
<!-- MISC --> |
|
25 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
26 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
27 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
28 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
29 |
|
|
30 |
|
|
31 |
<PROPERTY key="mapred.output.compress" value="true"/> |
|
32 |
<PROPERTY key="mapred.output.compression.type" value="BLOCK"/> |
|
33 |
|
|
34 |
<PROPERTY key="mapred.reduce.tasks" value="0"/> |
|
35 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
36 |
|
|
37 |
<!-- Uncomment to override the default lib path --> |
|
38 |
<!-- <PROPERTY key="job.lib" value="/lib/dnet/snapshots/dnet-mapreduce-jobs-assembly-0.0.8.5-SNAPSHOT-exportIds.jar"/> --> |
|
39 |
</STATIC_CONFIGURATION> |
|
40 |
<JOB_INTERFACE> |
|
41 |
<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs"/> |
|
42 |
<PARAM name="hbase.mapred.outputtable" required="true" description="source hbase table"/> |
|
43 |
</JOB_INTERFACE> |
|
44 |
</HADOOP_JOB> |
|
45 |
<STATUS> |
|
46 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
47 |
<RUNNING_INSTANCES value="0"/> |
|
48 |
<CUMULATIVE_RUN value="0"/> |
|
49 |
</STATUS> |
|
50 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
51 |
</BODY> |
|
52 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupDeleteDedupRelsJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="5626c94e-0005-416a-9ea4-48fc8af85ecd_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="deleteDedupRelsJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that deletes the dedup rels used to in the deduplication process</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupDeleteRelMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.Writable" /> |
|
22 |
|
|
23 |
<!-- MISC --> |
|
24 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
25 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
26 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
27 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
28 |
|
|
29 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
30 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
31 |
|
|
32 |
<!-- Uncomment to override the default lib path --> |
|
33 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
34 |
</STATIC_CONFIGURATION> |
|
35 |
<JOB_INTERFACE> |
|
36 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
37 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
38 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
39 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
40 |
</JOB_INTERFACE> |
|
41 |
<SCAN> |
|
42 |
<FILTERS operator="MUST_PASS_ALL"> |
|
43 |
<FILTER type="prefix" param="entityTypeId" /> |
|
44 |
</FILTERS> |
|
45 |
<FAMILIES> |
|
46 |
<FAMILY param="entityType" /> |
|
47 |
<FAMILY value="resultResult_dedup_merges" /> |
|
48 |
<FAMILY value="resultResult_dedup_isMergedIn" /> |
|
49 |
<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" /> |
|
50 |
|
|
51 |
<FAMILY value="personPerson_dedup_merges" /> |
|
52 |
<FAMILY value="personPerson_dedup_isMergedIn" /> |
|
53 |
<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" /> |
|
54 |
|
|
55 |
<FAMILY value="organizationOrganization_dedup_merges" /> |
|
56 |
<FAMILY value="organizationOrganization_dedup_isMergedIn" /> |
|
57 |
<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" /> |
|
58 |
</FAMILIES> |
|
59 |
</SCAN> |
|
60 |
</HADOOP_JOB> |
|
61 |
<STATUS> |
|
62 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
63 |
<RUNNING_INSTANCES value="0"/> |
|
64 |
<CUMULATIVE_RUN value="0" /> |
|
65 |
</STATUS> |
|
66 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
67 |
</BODY> |
|
68 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/buildMergedToAnchorMapJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="4e16c8dd-8944-4266-8b5c-62e4b26e3090_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="buildMergedToAnchorMapJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that builds a map {merged author id --> anchorId}</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.gt.BuildMergedAnchorMapMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" /> |
|
22 |
|
|
23 |
<!-- REDUCER --> |
|
24 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.gt.BuildMergedAnchorMapReducer" /> |
|
25 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" /> |
|
26 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" /> |
|
27 |
|
|
28 |
<!-- MISC --> |
|
29 |
<PROPERTY key="mapred.output.compress" value="false" /> |
|
30 |
<PROPERTY key="mapred.compress.map.output" value="true" /> |
|
31 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
32 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
33 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
34 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
35 |
|
|
36 |
<PROPERTY key="mapred.reduce.tasks" value="1" /> |
|
37 |
<PROPERTY key="dfs.blocksize" value="256M" /> |
|
38 |
<PROPERTY key="mapred.textoutputformat.separator" value="=" /> |
|
39 |
|
|
40 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
41 |
|
|
42 |
<!-- Uncomment to override the default lib path --> |
|
43 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
44 |
</STATIC_CONFIGURATION> |
|
45 |
<JOB_INTERFACE> |
|
46 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
47 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
48 |
<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" /> |
|
49 |
</JOB_INTERFACE> |
|
50 |
<SCAN> |
|
51 |
<FILTERS operator="MUST_PASS_ALL"> |
|
52 |
<FILTER type="prefix" value="30"/> |
|
53 |
</FILTERS> |
|
54 |
<FAMILIES> |
|
55 |
<FAMILY value="person"/> |
|
56 |
<FAMILY value="personPerson_dedup_isMergedIn" /> |
|
57 |
</FAMILIES> |
|
58 |
</SCAN> |
|
59 |
</HADOOP_JOB> |
|
60 |
<STATUS> |
|
61 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
62 |
<RUNNING_INSTANCES value="0"/> |
|
63 |
<CUMULATIVE_RUN value="0" /> |
|
64 |
</STATUS> |
|
65 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
66 |
</BODY> |
|
67 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/exportIdentifiersJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="d5a7c415-932b-442e-91c2-46f648945ac2_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="exportIdentifiersJob" type="mapreduce"> |
|
12 |
<DESCRIPTION>map only job that exports the publication identifiers as json</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
|
|
15 |
<!-- I/O FORMAT --> |
|
16 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/> |
|
17 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat"/> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataexport.ExportResultIdentifiersMapper"/> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text"/> |
|
23 |
|
|
24 |
<!-- MISC --> |
|
25 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
26 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
27 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
28 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
29 |
|
|
30 |
<PROPERTY key="dfs.blocksize" value="256M"/> |
|
31 |
|
|
32 |
<PROPERTY key="mapred.reduce.tasks" value="1"/> |
|
33 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
34 |
|
|
35 |
<!-- Uncomment to override the default lib path --> |
|
36 |
<PROPERTY key="job.lib" value="/lib/dnet/snapshots/dnet-mapreduce-jobs-assembly-0.0.8.5-SNAPSHOT-exportIds.jar"/> |
|
37 |
</STATIC_CONFIGURATION> |
|
38 |
<JOB_INTERFACE> |
|
39 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table"/> |
|
40 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table"/> |
|
41 |
|
|
42 |
<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs"/> |
|
43 |
</JOB_INTERFACE> |
|
44 |
<SCAN> |
|
45 |
<FILTERS operator="MUST_PASS_ALL"> |
|
46 |
<FILTER type="prefix" value="50"/> |
|
47 |
</FILTERS> |
|
48 |
<FAMILIES> |
|
49 |
<FAMILY value="result"/> |
|
50 |
</FAMILIES> |
|
51 |
</SCAN> |
|
52 |
</HADOOP_JOB> |
|
53 |
<STATUS> |
|
54 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
55 |
<RUNNING_INSTANCES value="0"/> |
|
56 |
<CUMULATIVE_RUN value="0"/> |
|
57 |
</STATUS> |
|
58 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
59 |
</BODY> |
|
60 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/informationSpaceExportJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="62c1e44c-14b6-4639-9083-29bf432e6759_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="informationSpaceExportJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map only job that exports the whole information space table as json</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataexport.ExportInformationSpaceMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" /> |
|
22 |
|
|
23 |
|
|
24 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" /> |
|
25 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" /> |
|
26 |
|
|
27 |
<!-- MISC --> |
|
28 |
|
|
29 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
30 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
31 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
32 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
33 |
|
|
34 |
<PROPERTY key="dfs.blocksize" value="256M" /> |
|
35 |
|
|
36 |
<PROPERTY key="mapred.compress.map.output" value="true" /> |
|
37 |
<PROPERTY key="mapred.output.compress" value="true" /> |
|
38 |
<PROPERTY key="mapred.output.compression.type" value="BLOCK" /> |
|
39 |
|
|
40 |
|
|
41 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
42 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
43 |
|
|
44 |
<!-- Uncomment to override the default lib path --> |
|
45 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
46 |
</STATIC_CONFIGURATION> |
|
47 |
<JOB_INTERFACE> |
|
48 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
49 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
50 |
|
|
51 |
<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" /> |
|
52 |
</JOB_INTERFACE> |
|
53 |
<SCAN> |
|
54 |
<FILTERS /> |
|
55 |
<FAMILIES /> |
|
56 |
</SCAN> |
|
57 |
</HADOOP_JOB> |
|
58 |
<STATUS> |
|
59 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
60 |
<RUNNING_INSTANCES value="0"/> |
|
61 |
<CUMULATIVE_RUN value="0" /> |
|
62 |
</STATUS> |
|
63 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
64 |
</BODY> |
|
65 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/calculatePersonDistributionStep1Job.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="f61bc720-2821-4871-937d-64b79f098714_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2015-06-15T10:50:29+02:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="calculatePersonDistributionStep1Job" type="mapreduce"> |
|
12 |
<DESCRIPTION>map reduce job that perform tests for notificationBroker service (step 1/2)</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
|
|
15 |
<!-- I/O FORMAT --> |
|
16 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/> |
|
17 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"/> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.broker.CalculatePersonDistributionStep1Mapper"/> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text"/> |
|
23 |
|
|
24 |
<!-- REDUCER --> |
|
25 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.broker.CalculatePersonDistributionStep1Reducer"/> |
|
26 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/> |
|
27 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/> |
|
28 |
|
|
29 |
<!-- MISC --> |
|
30 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
31 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
32 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
33 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
34 |
<PROPERTY key="mapred.reduce.tasks" value="100"/> |
|
35 |
|
|
36 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
37 |
|
|
38 |
<!-- Uncomment to override the default lib path --> |
|
39 |
<!-- <PROPERTY key="job.lib" value="/user/michele.artini/lib/dnet-mapreduce-jobs-assembly-0.0.6.6-SNAPSHOT.jar"/> --> |
|
40 |
</STATIC_CONFIGURATION> |
|
41 |
<JOB_INTERFACE> |
|
42 |
<PARAM description="source hbase table" name="hbase.mapred.inputtable" required="true"/> |
|
43 |
<PARAM description="source hbase table" name="hbase.mapreduce.inputtable" required="true"/> |
|
44 |
</JOB_INTERFACE> |
|
45 |
<SCAN> |
|
46 |
<FILTERS operator="MUST_PASS_ONE"> |
|
47 |
<FILTER type="prefix" value="50"/> |
|
48 |
<FILTER type="prefix" value="10"/> |
|
49 |
</FILTERS> |
|
50 |
<FAMILIES> |
|
51 |
<FAMILY value="result"/> |
|
52 |
<FAMILY value="datasource"/> |
|
53 |
</FAMILIES> |
|
54 |
</SCAN> |
|
55 |
</HADOOP_JOB> |
|
56 |
<STATUS> |
|
57 |
<LAST_SUBMISSION_DATE value="2015-06-15T11:10:17+02:00"/> |
|
58 |
<RUNNING_INSTANCES value="0"/> |
|
59 |
<CUMULATIVE_RUN value="7"/> |
|
60 |
</STATUS> |
|
61 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
62 |
</BODY> |
|
63 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/pom.xml | ||
---|---|---|
1 |
<?xml version="1.0" ?> |
|
2 |
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> |
|
3 |
<parent> |
|
4 |
<groupId>eu.dnetlib</groupId> |
|
5 |
<artifactId>dnet45-parent</artifactId> |
|
6 |
<version>1.0.0</version> |
|
7 |
<relativePath /> |
|
8 |
</parent> |
|
9 |
<modelVersion>4.0.0</modelVersion> |
|
10 |
<groupId>eu.dnetlib</groupId> |
|
11 |
<artifactId>dnet-openaireplus-profiles</artifactId> |
|
12 |
<packaging>jar</packaging> |
|
13 |
<version>1.0.27</version> |
|
14 |
<scm> |
|
15 |
<developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27</developerConnection> |
|
16 |
</scm> |
|
17 |
|
|
18 |
<dependencies> |
|
19 |
|
|
20 |
</dependencies> |
|
21 |
</project> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/invalidRecordDoiExporterJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="7edd375c-1d1e-4f2b-81c9-1b7d4f65c233_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2018-10-22T10:34:48+00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="invalidRecordDoiExporterJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map only job that ensures xml records are valid</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION><!-- I/O FORMAT --> |
|
13 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"/> |
|
14 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat"/><!-- MAPPER --> |
|
15 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataimport.GetInvalidXmlRecordsMapper"/> |
|
16 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/> |
|
17 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text"/><!-- JOB GLOBAL --> |
|
18 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/> |
|
19 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/><!-- MISC --> |
|
20 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
21 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
22 |
<PROPERTY key="mapred.reduce.tasks" value="1"/><!-- <PROPERTY key="user.name" value="dnet" /> --><!-- Uncomment to override the default lib path --> |
|
23 |
<!-- <PROPERTY key="job.lib" value="/lib/dnet/snapshots/dnet-mapreduce-jobs-assembly-1.1.4-BETA-SNAPSHOT-FilterXmlRecordsMapper.jar"/> --> |
|
24 |
<PROPERTY key="mapred.output.compress" value="false"/> |
|
25 |
</STATIC_CONFIGURATION> |
|
26 |
<JOB_INTERFACE> |
|
27 |
<PARAM description="input sequence file" name="mapred.input.dir" required="true"/> |
|
28 |
<PARAM description="target file" name="mapred.output.dir" required="true"/> |
|
29 |
</JOB_INTERFACE> |
|
30 |
</HADOOP_JOB> |
|
31 |
<STATUS> |
|
32 |
<LAST_SUBMISSION_DATE value="2018-10-22T10:35:08+00:00"/> |
|
33 |
<RUNNING_INSTANCES value="0"/> |
|
34 |
<CUMULATIVE_RUN value="20"/> |
|
35 |
</STATUS> |
|
36 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
37 |
</BODY> |
|
38 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupFindPersonRootsJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="8ec4731e-4e91-4863-9a4b-7f0a8ca0542e_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="dedupFindPersonRootsJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that find the root of a similarity group (person)</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupFindRootsPersonMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
22 |
|
|
23 |
<!-- REDUCER --> |
|
24 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupFindRootsPersonReducer" /> |
|
25 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
26 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Writable" /> |
|
27 |
|
|
28 |
<!-- MISC --> |
|
29 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
30 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
31 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
32 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
33 |
|
|
34 |
<PROPERTY key="mapred.reduce.tasks" value="100" /> |
|
35 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
36 |
|
|
37 |
<!-- Uncomment to override the default lib path --> |
|
38 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
39 |
</STATIC_CONFIGURATION> |
|
40 |
<JOB_INTERFACE> |
|
41 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
42 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
43 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
44 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
45 |
</JOB_INTERFACE> |
|
46 |
<SCAN> |
|
47 |
<FILTERS operator="MUST_PASS_ALL"> |
|
48 |
<FILTER type="prefix" param="entityTypeId" /> |
|
49 |
</FILTERS> |
|
50 |
<FAMILIES> |
|
51 |
<FAMILY param="entityType" /> |
|
52 |
<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" /> |
|
53 |
<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" /> |
|
54 |
<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" /> |
|
55 |
</FAMILIES> |
|
56 |
</SCAN> |
|
57 |
</HADOOP_JOB> |
|
58 |
<STATUS> |
|
59 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
60 |
<RUNNING_INSTANCES value="0"/> |
|
61 |
<CUMULATIVE_RUN value="0" /> |
|
62 |
</STATUS> |
|
63 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
64 |
</BODY> |
|
65 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/prepareIndexDataJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="ed8c0a4e-7cf2-49df-bfed-fcfab0699ade_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="prepareIndexDataJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that joins the entities on the hbase table and produces a sequence file containig the xml records</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.index.PrepareFeedMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
22 |
|
|
23 |
<!-- REDUCER --> |
|
24 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.index.PrepareFeedReducer" /> |
|
25 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" /> |
|
26 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" /> |
|
27 |
|
|
28 |
<!-- MISC --> |
|
29 |
<PROPERTY key="mapred.output.compress" value="false" /> |
|
30 |
<PROPERTY key="mapred.compress.map.output" value="true" /> |
|
31 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
32 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
33 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
34 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
35 |
|
|
36 |
<PROPERTY key="mapred.fairscheduler.pool" value="solr"/> |
|
37 |
<PROPERTY key="mapred.reduce.tasks" value="500" /> |
|
38 |
<PROPERTY key="dfs.blocksize" value="256M" /> |
|
39 |
<PROPERTY key="ui.other.datasourcetypes" value="scholarcomminfra, infospace, pubsrepository::mock, entityregistry, entityregistry::projects, entityregistry::repositories, websource"/> |
|
40 |
|
|
41 |
|
|
42 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
43 |
|
|
44 |
<!-- Uncomment to override the default lib path --> |
|
45 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
46 |
</STATIC_CONFIGURATION> |
|
47 |
<JOB_INTERFACE> |
|
48 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
49 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
50 |
<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" /> |
|
51 |
|
|
52 |
<PARAM name="index.entity.links" required="true" description="entity joiner configuration" /> |
|
53 |
<PARAM name="contextmap" required="true" description="context map (ContextDSResources)" /> |
|
54 |
|
|
55 |
</JOB_INTERFACE> |
|
56 |
<SCAN> |
|
57 |
<FILTERS /> |
|
58 |
<FAMILIES /> |
|
59 |
</SCAN> |
|
60 |
</HADOOP_JOB> |
|
61 |
<STATUS> |
|
62 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
63 |
<RUNNING_INSTANCES value="0"/> |
|
64 |
<CUMULATIVE_RUN value="0" /> |
|
65 |
</STATUS> |
|
66 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
67 |
</BODY> |
|
68 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupMergeCoAuthors.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="141e7add-0765-4679-b04d-c9668c011003_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="mergeCoAuthorsJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that build the roots and redirects the rels</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataimport.CoAuthorMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
22 |
|
|
23 |
<!-- REDUCER --> |
|
24 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dataimport.CoAuthorReducer" /> |
|
25 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
26 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
27 |
|
|
28 |
<!-- MISC --> |
|
29 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
30 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
31 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
32 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
33 |
|
|
34 |
<PROPERTY key="max.coauthors" value="50" /> |
|
35 |
|
|
36 |
<PROPERTY key="mapred.reduce.tasks" value="100" /> |
|
37 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
38 |
|
|
39 |
<!-- Uncomment to override the default lib path --> |
|
40 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
41 |
</STATIC_CONFIGURATION> |
|
42 |
<JOB_INTERFACE> |
|
43 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
44 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
45 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
46 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
47 |
</JOB_INTERFACE> |
|
48 |
<SCAN caching="10"> |
|
49 |
<FILTERS operator="MUST_PASS_ALL"> |
|
50 |
<FILTER type="prefix" param="entityTypeId" /> |
|
51 |
</FILTERS> |
|
52 |
<FAMILIES/> |
|
53 |
</SCAN> |
|
54 |
</HADOOP_JOB> |
|
55 |
<STATUS> |
|
56 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
57 |
<RUNNING_INSTANCES value="0"/> |
|
58 |
<CUMULATIVE_RUN value="0" /> |
|
59 |
</STATUS> |
|
60 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
61 |
</BODY> |
|
62 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/xmlRecordValidatorJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="d051952c-6f2f-4374-ac14-7fbb8d8fa430_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2018-10-22T10:34:48+00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="xmlRecordValidatorJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map only job that ensures xml records are valid</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION><!-- I/O FORMAT --> |
|
13 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"/> |
|
14 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"/><!-- MAPPER --> |
|
15 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataimport.FilterXmlRecordsMapper"/> |
|
16 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/> |
|
17 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text"/><!-- JOB GLOBAL --> |
|
18 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/> |
|
19 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/><!-- MISC --> |
|
20 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
21 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
22 |
<PROPERTY key="mapred.reduce.tasks" value="100"/><!-- <PROPERTY key="user.name" value="dnet" /> --><!-- Uncomment to override the default lib path --> |
|
23 |
<!-- <PROPERTY key="job.lib" value="/lib/dnet/snapshots/dnet-mapreduce-jobs-assembly-1.1.4-BETA-SNAPSHOT-FilterXmlRecordsMapper.jar"/> --> |
|
24 |
<PROPERTY key="mapred.output.compress" value="true"/> |
|
25 |
<PROPERTY key="mapred.output.compression.type" value="BLOCK"/> |
|
26 |
</STATIC_CONFIGURATION> |
|
27 |
<JOB_INTERFACE> |
|
28 |
<PARAM description="input sequence file" name="mapred.input.dir" required="true"/> |
|
29 |
<PARAM description="target file" name="mapred.output.dir" required="true"/> |
|
30 |
</JOB_INTERFACE> |
|
31 |
</HADOOP_JOB> |
|
32 |
<STATUS> |
|
33 |
<LAST_SUBMISSION_DATE value="2018-10-22T10:35:08+00:00"/> |
|
34 |
<RUNNING_INSTANCES value="0"/> |
|
35 |
<CUMULATIVE_RUN value="20"/> |
|
36 |
</STATUS> |
|
37 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
38 |
</BODY> |
|
39 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/mdStoreHdfsImportAuthorsJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="a53df5e3-8a38-4d3f-8f67-bf9fc43279a6_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="authorImportRecordsJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map only job that maps xml metada records from a sequence file into an hbase table</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataimport.AuthorImportRecordsMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put" /> |
|
22 |
|
|
23 |
<!-- MISC --> |
|
24 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
25 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
26 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
27 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
28 |
|
|
29 |
<!-- Uncomment to override the default lib path --> |
|
30 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
31 |
|
|
32 |
</STATIC_CONFIGURATION> |
|
33 |
<JOB_INTERFACE> |
|
34 |
<PARAM name="mapred.input.dir" required="true" description="input sequence file" /> |
|
35 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
36 |
<PARAM name="hbase.import.xslt" required="true" description="mapping" /> |
|
37 |
</JOB_INTERFACE> |
|
38 |
</HADOOP_JOB> |
|
39 |
<STATUS> |
|
40 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
41 |
<RUNNING_INSTANCES value="0"/> |
|
42 |
<CUMULATIVE_RUN value="0" /> |
|
43 |
</STATUS> |
|
44 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
45 |
</BODY> |
|
46 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.27/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/publicationAnalysisJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="721fd82c-6444-41c9-ba23-5eb0652ddaeb_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="publicationAnalysisJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map only job that analyses publication features</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.NullOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.experiment.PublicationAnalysisMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.NullWritable" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.NullWritable" /> |
|
22 |
|
|
23 |
<!-- MISC --> |
|
24 |
|
|
25 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
26 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
27 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
28 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
29 |
|
|
30 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
31 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
32 |
|
|
33 |
<!-- Uncomment to override the default lib path --> |
|
34 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
35 |
</STATIC_CONFIGURATION> |
|
36 |
<JOB_INTERFACE> |
|
37 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
38 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
39 |
</JOB_INTERFACE> |
|
40 |
<SCAN> |
|
41 |
<FILTERS operator="MUST_PASS_ALL"> |
|
42 |
<FILTER type="prefix" value="50" /> |
|
43 |
</FILTERS> |
|
44 |
<FAMILIES> |
|
45 |
<FAMILY value="result" /> |
|
46 |
</FAMILIES> |
|
47 |
</SCAN> |
|
48 |
</HADOOP_JOB> |
|
49 |
<STATUS> |
|
50 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
51 |
<RUNNING_INSTANCES value="0"/> |
|
52 |
<CUMULATIVE_RUN value="0" /> |
|
53 |
</STATUS> |
|
54 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
55 |
</BODY> |
Also available in: Unified diff
[maven-release-plugin] copy for tag dnet-openaireplus-profiles-1.0.27