1
|
<RESOURCE_PROFILE>
|
2
|
<HEADER>
|
3
|
<RESOURCE_IDENTIFIER value="72a974b4-a13f-48e2-a18c-98e0effb4579_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/>
|
4
|
<RESOURCE_TYPE value="WorkflowTemplateDSResourceType"/>
|
5
|
<RESOURCE_KIND value="WorkflowTemplateDSResources"/>
|
6
|
<RESOURCE_URI value=""/>
|
7
|
<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/>
|
8
|
</HEADER>
|
9
|
<BODY>
|
10
|
<CONFIGURATION>
|
11
|
|
12
|
<PARAMETERS>
|
13
|
|
14
|
<PARAM name="set" description="csv list of action sets to be promoted" required="true" type="string"/>
|
15
|
<PARAM name="mappingSimilarities" description="xslt mapping function for similarities" required="true" type="string"/>
|
16
|
<PARAM name="mappingDissimilarities" description="xslt mapping function for dissimilarities" required="true" type="string"/>
|
17
|
|
18
|
<PARAM name="hbaseTable" description="HBase table name" required="true" type="string"/>
|
19
|
<PARAM name="cluster" description="Hadoop cluster name" required="true" type="string"/>
|
20
|
</PARAMETERS>
|
21
|
|
22
|
<WORKFLOW>
|
23
|
|
24
|
<NODE isStart="true" name="promoteActions" type="PromoteActionsHDFS">
|
25
|
<DESCRIPTION>Promote Actions</DESCRIPTION>
|
26
|
<PARAMETERS>
|
27
|
<!-- dedup-similarity-organization-simple,dedup-similarity-result-levenstein,iis-dataset-entities-main,iis-referenced-datasets-preprocessing,iis-dataset-entities-preprocessing,iis-document-citations,iis-document-classes,iis-referenced-projects-main,iis-document-similarities,iis-document-statistics,iis-extracted-metadata,iis-referenced-datasets-main,iis-referenced-projects-preprocessing,iis-researchinitiative,iis-wos-entities,iis-referenceextraction-pdb -->
|
28
|
<PARAM name="set" ref="set"/>
|
29
|
<PARAM name="tableName" ref="hbaseTable"/>
|
30
|
</PARAMETERS>
|
31
|
<ARCS>
|
32
|
<ARC to="querySimilarities"/>
|
33
|
<ARC to="queryDissimilarities"/>
|
34
|
</ARCS>
|
35
|
</NODE>
|
36
|
|
37
|
|
38
|
<NODE name="querySimilarities" type="QueryDb">
|
39
|
<DESCRIPTION>query similarity</DESCRIPTION>
|
40
|
<PARAMETERS>
|
41
|
<PARAM name="db" property="dnet.dedup.db.name"/>
|
42
|
<PARAM name="sql" value="/eu/dnetlib/msro/workflows/dedup/querySimilarities.sql"/>
|
43
|
<PARAM name="outputEprParam" value="simEpr"/>
|
44
|
</PARAMETERS>
|
45
|
<ARCS>
|
46
|
<ARC to="buildSimilarityMesh"/>
|
47
|
</ARCS>
|
48
|
</NODE>
|
49
|
|
50
|
<NODE name="buildSimilarityMesh" type="BuildSimilarityMeshJob">
|
51
|
<DESCRIPTION>build mesh</DESCRIPTION>
|
52
|
<PARAMETERS>
|
53
|
<PARAM name="inputEprParam" value="simEpr"/>
|
54
|
<PARAM name="outputEprParam" value="simMeshEpr"/>
|
55
|
</PARAMETERS>
|
56
|
<ARCS>
|
57
|
<ARC to="storeSimilarities"/>
|
58
|
</ARCS>
|
59
|
</NODE>
|
60
|
|
61
|
<NODE name="storeSimilarities" type="StoreHBase">
|
62
|
<DESCRIPTION>Store ResultSet to HBase</DESCRIPTION>
|
63
|
<PARAMETERS>
|
64
|
<PARAM name="inputEprParam" value="simMeshEpr"/>
|
65
|
<PARAM name="hbaseTable" ref="hbaseTable"/>
|
66
|
<PARAM name="cluster" ref="cluster"/>
|
67
|
<PARAM name="mapping" ref="mappingSimilarities"/>
|
68
|
</PARAMETERS>
|
69
|
<ARCS>
|
70
|
<ARC to="wait"/>
|
71
|
</ARCS>
|
72
|
</NODE>
|
73
|
|
74
|
<NODE name="queryDissimilarities" type="QueryDb">
|
75
|
<DESCRIPTION>query dissimilarity</DESCRIPTION>
|
76
|
<PARAMETERS>
|
77
|
<PARAM name="db" property="dnet.dedup.db.name"/>
|
78
|
<PARAM name="sql" value="/eu/dnetlib/msro/workflows/dedup/queryDissimilarities.sql"/>
|
79
|
<PARAM name="outputEprParam" value="dissimEpr"/>
|
80
|
</PARAMETERS>
|
81
|
<ARCS>
|
82
|
<ARC to="storeDissimilarities"/>
|
83
|
</ARCS>
|
84
|
</NODE>
|
85
|
|
86
|
<NODE name="storeDissimilarities" type="DeleteFromHBase">
|
87
|
<DESCRIPTION>Store ResultSet to HBase</DESCRIPTION>
|
88
|
<PARAMETERS>
|
89
|
<PARAM name="inputEprParam" value="dissimEpr"/>
|
90
|
<PARAM name="hbaseTable" ref="hbaseTable"/>
|
91
|
<PARAM name="cluster" ref="cluster"/>
|
92
|
<PARAM name="mapping" ref="mappingDissimilarities"/>
|
93
|
</PARAMETERS>
|
94
|
<ARCS>
|
95
|
<ARC to="wait"/>
|
96
|
</ARCS>
|
97
|
</NODE>
|
98
|
|
99
|
<NODE isJoin="true" name="wait">
|
100
|
<DESCRIPTION/>
|
101
|
<PARAMETERS/>
|
102
|
<ARCS>
|
103
|
<ARC to="success"/>
|
104
|
</ARCS>
|
105
|
</NODE>
|
106
|
|
107
|
</WORKFLOW>
|
108
|
</CONFIGURATION>
|
109
|
</BODY>
|
110
|
</RESOURCE_PROFILE>
|