1 |
60316
|
claudio.at
|
<NODE name="SET_INFO" isStart="true" type="SetProviderInfo">
|
2 |
57299
|
sandro.lab
|
<DESCRIPTION>Set information about current provider</DESCRIPTION>
|
3 |
|
|
<PARAMETERS>
|
4 |
|
|
<PARAM required="true" type="string" name="providerId" managedBy="system">$params.("dataprovider:id")$</PARAM>
|
5 |
|
|
<PARAM required="true" type="string" name="providerName" managedBy="system">$params.("dataprovider:name")$</PARAM>
|
6 |
|
|
<PARAM required="true" type="string" name="api" managedBy="system">$params.("dataprovider:interface")$</PARAM>
|
7 |
|
|
</PARAMETERS>
|
8 |
|
|
<ARCS>
|
9 |
|
|
<ARC to="obtainParams"/>
|
10 |
|
|
</ARCS>
|
11 |
|
|
</NODE>
|
12 |
|
|
|
13 |
|
|
<NODE name="obtainParams" type="ObtainOpenaireDataSourceParams">
|
14 |
|
|
<DESCRIPTION>Obtain data source params</DESCRIPTION>
|
15 |
|
|
<PARAMETERS>
|
16 |
|
|
<PARAM required="true" type="string" name="providerId" managedBy="system">$params.("dataprovider:id")$</PARAM>
|
17 |
|
|
</PARAMETERS>
|
18 |
|
|
<ARCS>
|
19 |
60316
|
claudio.at
|
<ARC to="PREPARE_ENV_COLLECTION"/>
|
20 |
57299
|
sandro.lab
|
</ARCS>
|
21 |
|
|
</NODE>
|
22 |
60316
|
claudio.at
|
|
23 |
57299
|
sandro.lab
|
<NODE name="PREPARE_ENV_COLLECTION" type="PrepareEnvCollectHadoopJobNode">
|
24 |
60316
|
claudio.at
|
<DESCRIPTION>Set in the environment all the variable needed to the collection oozie job</DESCRIPTION>
|
25 |
57299
|
sandro.lab
|
<PARAMETERS>
|
26 |
60318
|
claudio.at
|
<PARAM required="true" type="string" name="mdId" managedBy="system" category="MDSTORE_ID">$params.("harv_id")$</PARAM>
|
27 |
60316
|
claudio.at
|
<PARAM required="true" type="string" name="collectionMode" managedBy="user" function="validValues(['REFRESH','INCREMENTAL'])"></PARAM>
|
28 |
60321
|
claudio.at
|
<PARAM required="true" type="string" name="metadataEncoding" managedBy="user">XML</PARAM>
|
29 |
60327
|
claudio.at
|
<PARAM managedBy="user" name="fromDateOverride" required="false" type="string"/>
|
30 |
57299
|
sandro.lab
|
</PARAMETERS>
|
31 |
|
|
<ARCS>
|
32 |
|
|
<ARC to="COLLECT_HADOOP"/>
|
33 |
|
|
</ARCS>
|
34 |
|
|
</NODE>
|
35 |
60316
|
claudio.at
|
|
36 |
|
|
<NODE name="COLLECT_HADOOP" type="SubmitHadoopJob">
|
37 |
57299
|
sandro.lab
|
<DESCRIPTION>Start the Hadoop Job</DESCRIPTION>
|
38 |
|
|
<PARAMETERS>
|
39 |
60316
|
claudio.at
|
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">executeOozieJob</PARAM>
|
40 |
57299
|
sandro.lab
|
<PARAM managedBy="user" name="cluster" required="true" type="string">DHP</PARAM>
|
41 |
|
|
<PARAM managedBy="system" name="envParams" required="true" type="string">
|
42 |
|
|
{
|
43 |
|
|
"apiDescription":"apiDescription",
|
44 |
60316
|
claudio.at
|
"dataSourceInfo":"dataSourceInfo",
|
45 |
60321
|
claudio.at
|
"identifierPath":"identifierPath",
|
46 |
|
|
"metadataEncoding":"metadataEncoding",
|
47 |
57299
|
sandro.lab
|
"timestamp":"timestamp",
|
48 |
60318
|
claudio.at
|
"workflowId":"workflowId",
|
49 |
60321
|
claudio.at
|
"mdStoreID":"mdId",
|
50 |
|
|
"collectionMode":"collectionMode",
|
51 |
60318
|
claudio.at
|
"oozie.wf.application.path":"oozieWfPath"
|
52 |
57299
|
sandro.lab
|
}
|
53 |
|
|
</PARAM>
|
54 |
|
|
</PARAMETERS>
|
55 |
|
|
<ARCS>
|
56 |
60316
|
claudio.at
|
<ARC to="UPDATE_INFO"/>
|
57 |
57299
|
sandro.lab
|
</ARCS>
|
58 |
|
|
</NODE>
|
59 |
|
|
|
60 |
|
|
<NODE name="UPDATE_INFO" type="MDStoreToApiExtraFieldHadoop">
|
61 |
|
|
<DESCRIPTION>Update datasouce API extra fields</DESCRIPTION>
|
62 |
|
|
<PARAMETERS>
|
63 |
|
|
<PARAM required="true" type="string" name="mdId" managedBy="system">$params.("harv_id")$</PARAM>
|
64 |
|
|
<PARAM required="true" type="string" name="datasourceId" managedBy="system">$params.("dataprovider:id")$</PARAM>
|
65 |
|
|
<PARAM required="true" type="string" name="datasourceInterface" managedBy="system">$params.("dataprovider:interface")$</PARAM>
|
66 |
|
|
<PARAM required="true" type="string" name="extraFieldForTotal" managedBy="system">last_collection_total</PARAM>
|
67 |
|
|
<PARAM required="true" type="string" name="extraFieldForDate" managedBy="system">last_collection_date</PARAM>
|
68 |
|
|
<PARAM required="true" type="string" name="extraFieldForMdId" managedBy="system">last_collection_mdId</PARAM>
|
69 |
|
|
</PARAMETERS>
|
70 |
|
|
<ARCS>
|
71 |
|
|
<ARC to="success"/>
|
72 |
|
|
</ARCS>
|
73 |
|
|
</NODE>
|