Revision 56002
Added by Enrico Ottonello almost 5 years ago
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/dm/import.infospace.xml | ||
---|---|---|
12 | 12 |
<METAWORKFLOW_SECTION>InfoSpace Provision</METAWORKFLOW_SECTION> |
13 | 13 |
<ADMIN_EMAIL>alessia.bardi@isti.cnr.it,claudio.atzori@isti.cnr.it</ADMIN_EMAIL> |
14 | 14 |
<CONFIGURATION status="EXECUTABLE"> |
15 |
<WORKFLOW id="e03f256e-1e4d-4b3d-9c07-91faf5d25207_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" name="Import ScholExplorer"> |
|
16 |
</WORKFLOW> |
|
17 |
</CONFIGURATION> |
|
15 |
<WORKFLOW id="e03f256e-1e4d-4b3d-9c07-91faf5d25207_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" name="Import ScholExplorer"> |
|
16 |
<WORKFLOW id="e03f256e-1e4d-4b3d-9c07-91faf5d25208_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" name="Import DOIboost"/> |
|
17 |
<WORKFLOW id="970c4826-cf6d-4dfe-850c-41e508d341fa_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" name="Import organizations from DOIboost"/> |
|
18 |
<WORKFLOW id="ff25219c-e485-4440-8bc0-a8bbe42512ac_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" name="Import Grid.AC"/> |
|
19 |
<WORKFLOW id="7c8765af-1253-4bd7-8806-315b73bf7319_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" name="Import Orcid"/> |
|
20 |
</WORKFLOW> |
|
21 |
</CONFIGURATION> |
|
18 | 22 |
<SCHEDULING enabled="false"> |
19 | 23 |
<CRON>29 5 22 ? * *</CRON> |
20 | 24 |
<MININTERVAL>10080</MININTERVAL> |
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/importOrcid.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="7c8765af-1253-4bd7-8806-315b73bf7319_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/> |
|
4 |
<RESOURCE_TYPE value="WorkflowDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="WorkflowDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2019-05-29T10:54:33+00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<WORKFLOW_NAME>Import Orcid</WORKFLOW_NAME> |
|
11 |
<WORKFLOW_TYPE>Import InfoSpace</WORKFLOW_TYPE> |
|
12 |
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY> |
|
13 |
<CONFIGURATION start="manual"> |
|
14 |
<NODE isStart="true" name="setInputPath" type="SetHdfsFile"> |
|
15 |
<DESCRIPTION>set the hdfs output path</DESCRIPTION> |
|
16 |
<PARAMETERS> |
|
17 |
<PARAM managedBy="user" name="hdfsPath" required="true" type="string">/usr/dnet/data/orcid</PARAM> |
|
18 |
<PARAM managedBy="system" name="hdfsPathParam" required="true" type="string">inputPath</PARAM> |
|
19 |
</PARAMETERS> |
|
20 |
<ARCS> |
|
21 |
<ARC to="prepareActionSets"/> |
|
22 |
</ARCS> |
|
23 |
</NODE> |
|
24 |
<NODE name="prepareActionSets" type="PrepareActionSets"> |
|
25 |
<DESCRIPTION>prepare action sets</DESCRIPTION> |
|
26 |
<PARAMETERS> |
|
27 |
<PARAM managedBy="system" name="sets" required="true" type="string"> |
|
28 |
[ |
|
29 |
{ |
|
30 |
'set' : 'orcidworks-no-doi', |
|
31 |
'jobProperty' : 'export_action_set_orcidworks_no_doi', |
|
32 |
'enablingProperty' : 'active_orcidworks_no_doi', |
|
33 |
'enabled' : 'true' |
|
34 |
} |
|
35 |
] |
|
36 |
</PARAM> |
|
37 |
</PARAMETERS> |
|
38 |
<ARCS> |
|
39 |
<ARC to="extractOutputPath"/> |
|
40 |
</ARCS> |
|
41 |
</NODE> |
|
42 |
<NODE name="extractOutputPath" type="ExtractOutputPath"> |
|
43 |
<DESCRIPTION>extract the hdfs output path generated in the previous node</DESCRIPTION> |
|
44 |
<PARAMETERS> |
|
45 |
<PARAM managedBy="system" name="hdfsOutputPathParam" required="true" type="string">outputPath</PARAM> |
|
46 |
</PARAMETERS> |
|
47 |
<ARCS> |
|
48 |
<ARC to="importActionSet"/> |
|
49 |
</ARCS> |
|
50 |
</NODE> |
|
51 |
<NODE isJoin="true" name="importActionSet" type="SubmitHadoopJob"> |
|
52 |
<DESCRIPTION>IIS main</DESCRIPTION> |
|
53 |
<PARAMETERS> |
|
54 |
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">importOrcidJob</PARAM> |
|
55 |
<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM> |
|
56 |
<PARAM managedBy="system" name="envParams" required="true" type="string"> |
|
57 |
{ |
|
58 |
'mapred.input.dir':'inputPath', |
|
59 |
'mapred.output.dir':'outputPath' |
|
60 |
} |
|
61 |
</PARAM> |
|
62 |
<PARAM managedBy="user" name="simulation" required="true" type="boolean">false</PARAM> |
|
63 |
</PARAMETERS> |
|
64 |
<ARCS> |
|
65 |
<ARC to="updateActionSets"/> |
|
66 |
</ARCS> |
|
67 |
</NODE> |
|
68 |
<NODE name="updateActionSets" type="UpdateActionSets"> |
|
69 |
<DESCRIPTION>update action sets</DESCRIPTION> |
|
70 |
<PARAMETERS/> |
|
71 |
<ARCS> |
|
72 |
<ARC to="success"/> |
|
73 |
</ARCS> |
|
74 |
</NODE> |
|
75 |
</CONFIGURATION> |
|
76 |
<STATUS/> |
|
77 |
</BODY> |
|
78 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/importOrcidJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="249e40c2-6420-4207-b40a-e1236f77f1fc_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2019-05-29T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="importOrcidJob" type="mapreduce"> |
|
12 |
<DESCRIPTION>map reduce job that import the Orcid works (no doi) into actions</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
|
|
15 |
<!-- I/O FORMAT --> |
|
16 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.TextInputFormat"/> |
|
17 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"/> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataimport.OrcidImportMapper"/> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.LongWritable"/> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text"/> |
|
23 |
|
|
24 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/> |
|
25 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/> |
|
26 |
|
|
27 |
<!-- MISC --> |
|
28 |
<PROPERTY key="mapred.compress.map.output" value="false"/> |
|
29 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
30 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
31 |
|
|
32 |
<PROPERTY key="mapred.reduce.tasks" value="0"/> |
|
33 |
<PROPERTY key="dfs.blocksize" value="256M"/> |
|
34 |
|
|
35 |
<!-- Orcid Mapper Properties --> |
|
36 |
<PROPERTY key="setName" value="orcidworks-no-doi"/> |
|
37 |
<PROPERTY key="agentId" value="dnet"/> |
|
38 |
<PROPERTY key="agentName" value="D-Net"/> |
|
39 |
<PROPERTY key="invisible" value="true"/> |
|
40 |
|
|
41 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
42 |
|
|
43 |
<!-- Uncomment to override the default lib path --> |
|
44 |
<!-- PROPERTY key="job.lib" value="/lib/dnet/snapshots/dnet-mapreduce-jobs-orcid_beta.jar"/ --> |
|
45 |
</STATIC_CONFIGURATION> |
|
46 |
<JOB_INTERFACE> |
|
47 |
<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs"/> |
|
48 |
<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs"/> |
|
49 |
</JOB_INTERFACE> |
|
50 |
<SCAN> |
|
51 |
<FILTERS/> |
|
52 |
<FAMILIES/> |
|
53 |
</SCAN> |
|
54 |
</HADOOP_JOB> |
|
55 |
<STATUS/> |
|
56 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
57 |
</BODY> |
|
58 |
</RESOURCE_PROFILE> |
|
59 |
|
Also available in: Unified diff
added workflow configuration and hadoop job for orcid publications without doi import