1 |
60316
|
claudio.at
|
<NODE name="SET_INFO" isStart="true" type="SetProviderInfo">
|
2 |
57299
|
sandro.lab
|
<DESCRIPTION>Set information about current provider</DESCRIPTION>
|
3 |
|
|
<PARAMETERS>
|
4 |
|
|
<PARAM required="true" type="string" name="providerId" managedBy="system">$params.("dataprovider:id")$</PARAM>
|
5 |
|
|
<PARAM required="true" type="string" name="providerName" managedBy="system">$params.("dataprovider:name")$</PARAM>
|
6 |
|
|
<PARAM required="true" type="string" name="api" managedBy="system">$params.("dataprovider:interface")$</PARAM>
|
7 |
|
|
</PARAMETERS>
|
8 |
|
|
<ARCS>
|
9 |
|
|
<ARC to="obtainParams"/>
|
10 |
|
|
</ARCS>
|
11 |
|
|
</NODE>
|
12 |
|
|
|
13 |
|
|
<NODE name="obtainParams" type="ObtainOpenaireDataSourceParams">
|
14 |
|
|
<DESCRIPTION>Obtain data source params</DESCRIPTION>
|
15 |
|
|
<PARAMETERS>
|
16 |
|
|
<PARAM required="true" type="string" name="providerId" managedBy="system">$params.("dataprovider:id")$</PARAM>
|
17 |
|
|
</PARAMETERS>
|
18 |
|
|
<ARCS>
|
19 |
60316
|
claudio.at
|
<ARC to="PREPARE_ENV_COLLECTION"/>
|
20 |
57299
|
sandro.lab
|
</ARCS>
|
21 |
|
|
</NODE>
|
22 |
60316
|
claudio.at
|
|
23 |
57299
|
sandro.lab
|
<NODE name="PREPARE_ENV_COLLECTION" type="PrepareEnvCollectHadoopJobNode">
|
24 |
60316
|
claudio.at
|
<DESCRIPTION>Set in the environment all the variable needed to the collection oozie job</DESCRIPTION>
|
25 |
57299
|
sandro.lab
|
<PARAMETERS>
|
26 |
60318
|
claudio.at
|
<PARAM required="true" type="string" name="mdId" managedBy="system" category="MDSTORE_ID">$params.("harv_id")$</PARAM>
|
27 |
60316
|
claudio.at
|
<PARAM required="true" type="string" name="collectionMode" managedBy="user" function="validValues(['REFRESH','INCREMENTAL'])"></PARAM>
|
28 |
60321
|
claudio.at
|
<PARAM required="true" type="string" name="metadataEncoding" managedBy="user">XML</PARAM>
|
29 |
60381
|
claudio.at
|
<PARAM managedBy="user" name="fromDateOverride" required="false" type="string"/>
|
30 |
60972
|
claudio.at
|
<PARAM managedBy="user" name="untilDateOverride" required="false" type="string"/>
|
31 |
60380
|
claudio.at
|
<PARAM managedBy="user" name="maxNumberOfRetry" required="false" type="int">5</PARAM>
|
32 |
|
|
<PARAM managedBy="user" name="requestDelay" required="false" type="int">0</PARAM>
|
33 |
|
|
<PARAM managedBy="user" name="retryDelay" required="false" type="int">60</PARAM>
|
34 |
|
|
<PARAM managedBy="user" name="connectTimeOut" required="false" type="int">30</PARAM>
|
35 |
|
|
<PARAM managedBy="user" name="readTimeOut" required="false" type="int">60</PARAM>
|
36 |
57299
|
sandro.lab
|
</PARAMETERS>
|
37 |
|
|
<ARCS>
|
38 |
|
|
<ARC to="COLLECT_HADOOP"/>
|
39 |
|
|
</ARCS>
|
40 |
|
|
</NODE>
|
41 |
60316
|
claudio.at
|
|
42 |
60384
|
claudio.at
|
<NODE name="COLLECT_HADOOP" type="SubmitDnetHadoopJobNode">
|
43 |
57299
|
sandro.lab
|
<DESCRIPTION>Start the Hadoop Job</DESCRIPTION>
|
44 |
|
|
<PARAMETERS>
|
45 |
60316
|
claudio.at
|
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
46 |
57299
|
sandro.lab
|
<PARAM managedBy="user" name="cluster" required="true" type="string">DHP</PARAM>
|
47 |
|
|
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
48 |
|
|
{
|
49 |
|
|
"apiDescription":"apiDescription",
|
50 |
60316
|
claudio.at
|
"dataSourceInfo":"dataSourceInfo",
|
51 |
60321
|
claudio.at
|
"identifierPath":"identifierPath",
|
52 |
|
|
"metadataEncoding":"metadataEncoding",
|
53 |
57299
|
sandro.lab
|
"timestamp":"timestamp",
|
54 |
60318
|
claudio.at
|
"workflowId":"workflowId",
|
55 |
60321
|
claudio.at
|
"mdStoreID":"mdId",
|
56 |
|
|
"collectionMode":"collectionMode",
|
57 |
60380
|
claudio.at
|
"maxNumberOfRetry":"maxNumberOfRetry",
|
58 |
60383
|
claudio.at
|
"requestDelay":"requestDelay",
|
59 |
60380
|
claudio.at
|
"retryDelay":"retryDelay",
|
60 |
|
|
"connectTimeOut":"connectTimeOut",
|
61 |
|
|
"readTimeOut":"readTimeOut",
|
62 |
60366
|
claudio.at
|
"dnetMessageManagerURL":"dnetMessageManagerURL",
|
63 |
60318
|
claudio.at
|
"oozie.wf.application.path":"oozieWfPath"
|
64 |
57299
|
sandro.lab
|
}
|
65 |
|
|
</PARAM>
|
66 |
60384
|
claudio.at
|
<PARAM managedBy="system" name="params" required="true" type="string">
|
67 |
|
|
{
|
68 |
|
|
"collection_java_xmx" : "-Xmx300m"
|
69 |
|
|
}
|
70 |
60385
|
claudio.at
|
</PARAM>
|
71 |
60363
|
claudio.at
|
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">BeginRead,StartTransaction,CollectionWorker</PARAM>
|
72 |
57299
|
sandro.lab
|
</PARAMETERS>
|
73 |
|
|
<ARCS>
|
74 |
60316
|
claudio.at
|
<ARC to="UPDATE_INFO"/>
|
75 |
57299
|
sandro.lab
|
</ARCS>
|
76 |
|
|
</NODE>
|
77 |
|
|
|
78 |
|
|
<NODE name="UPDATE_INFO" type="MDStoreToApiExtraFieldHadoop">
|
79 |
|
|
<DESCRIPTION>Update datasouce API extra fields</DESCRIPTION>
|
80 |
|
|
<PARAMETERS>
|
81 |
|
|
<PARAM required="true" type="string" name="mdId" managedBy="system">$params.("harv_id")$</PARAM>
|
82 |
|
|
<PARAM required="true" type="string" name="datasourceId" managedBy="system">$params.("dataprovider:id")$</PARAM>
|
83 |
|
|
<PARAM required="true" type="string" name="datasourceInterface" managedBy="system">$params.("dataprovider:interface")$</PARAM>
|
84 |
|
|
<PARAM required="true" type="string" name="extraFieldForTotal" managedBy="system">last_collection_total</PARAM>
|
85 |
|
|
<PARAM required="true" type="string" name="extraFieldForDate" managedBy="system">last_collection_date</PARAM>
|
86 |
|
|
<PARAM required="true" type="string" name="extraFieldForMdId" managedBy="system">last_collection_mdId</PARAM>
|
87 |
|
|
</PARAMETERS>
|
88 |
|
|
<ARCS>
|
89 |
|
|
<ARC to="success"/>
|
90 |
|
|
</ARCS>
|
91 |
|
|
</NODE>
|