Revision 58106
Added by Sandro La Bruzzo about 4 years ago
modules/dnet-hadoop-services/trunk/src/main/resources/eu/dnetlib/data/hadoop/applicationContext-dnet-hadoop-service.properties | ||
---|---|---|
1 |
services.hadoop.clients={"DM":{"oozie":"true","mapred":"true","hbase":"true"},"IIS":{"oozie":"true","mapred":"false","hbase":"false"}} |
|
1 |
services.hadoop.clients={"DM":{"oozie":"true","mapred":"true","hbase":"true"},"IIS":{"oozie":"true","mapred":"false","hbase":"false"},"GARR":{"oozie":"true","mapred":"false","hbase":"false"}}
|
|
2 | 2 |
services.hadoop.hbase.tablefeeder.batchsize=500 |
3 | 3 |
services.hadoop.jobregistry.size=100 |
4 | 4 |
services.hadoop.lib.path=/user/dnet/lib/dnet-mapreduce-jobs-assembly-1.0.0-SNAPSHOT.jar |
modules/dnet-hadoop-services/trunk/src/main/resources/eu/dnetlib/data/hadoop/config/applicationContext-hadoop.xml | ||
---|---|---|
8 | 8 |
<bean id="DM" class="eu.dnetlib.data.hadoop.config.ConfigurationFactory" |
9 | 9 |
p:defaults="${services.data.hadoop.dm.properties}" /> |
10 | 10 |
|
11 |
<bean id="GARR" class="eu.dnetlib.data.hadoop.config.ConfigurationFactory" |
|
12 |
p:defaults="${services.data.hadoop.garr.properties}" /> |
|
13 |
|
|
11 | 14 |
<bean id="IIS" class="eu.dnetlib.data.hadoop.config.ConfigurationFactory" |
12 | 15 |
p:defaults="${services.data.hadoop.iis.properties}" /> |
13 | 16 |
|
modules/dnet-hadoop-services/trunk/src/main/resources/eu/dnetlib/data/hadoop/config/hadoop-default.dm.garr.properties | ||
---|---|---|
1 |
dnet.clustername = GARR |
|
2 |
##CORE-SITE |
|
3 |
#fs.defaultFS = hdfs://dm-cluster-nn |
|
4 |
# |
|
5 |
#hadoop.security.authentication = simple |
|
6 |
#hadoop.security.auth_to_local = DEFAULT |
|
7 |
# |
|
8 |
#hadoop.rpc.socket.factory.class.default = org.apache.hadoop.net.StandardSocketFactory |
|
9 |
# |
|
10 |
##HBASE-SITE |
|
11 |
#hbase.rootdir = hdfs://dm-cluster-nn/hbase |
|
12 |
# |
|
13 |
#hbase.security.authentication = simple |
|
14 |
#zookeeper.znode.rootserver = root-region-server |
|
15 |
#hbase.zookeeper.quorum = namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu |
|
16 |
#hbase.zookeeper.property.clientPort = 2181 |
|
17 |
#hbase.zookeeper.client.port = 2181 |
|
18 |
#zookeeper.znode.parent = /hbase |
|
19 |
# |
|
20 |
##HDFS-SITE |
|
21 |
#dfs.replication = 2 |
|
22 |
#dfs.nameservices = dm-cluster-nn |
|
23 |
#dfs.ha.namenodes.dm-cluster-nn = nn1,nn2 |
|
24 |
# |
|
25 |
#dfs.namenode.rpc-address.dm-cluster-nn.nn1=namenode1.hadoop.dm.openaire.eu:8020 |
|
26 |
#dfs.namenode.http-address.dm-cluster-nn.nn1=namenode1.hadoop.dm.openaire.eu:50070 |
|
27 |
#dfs.namenode.rpc-address.dm-cluster-nn.nn2=namenode2.hadoop.dm.openaire.eu:8020 |
|
28 |
#dfs.namenode.http-address.dm-cluster-nn.nn2=namenode2.hadoop.dm.openaire.eu:50070 |
|
29 |
# |
|
30 |
#dfs.client.failover.proxy.provider.dm-cluster-nn=org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider |
|
31 |
|
|
32 |
|
|
33 |
|
|
34 |
#OOZIE SERVER |
|
35 |
oozie.service.loc = http://hadoop-edge3.garr-pa1.d4science.org:11000/oozie/ |
modules/dnet-hadoop-services/trunk/src/main/resources/eu/dnetlib/data/hadoop/config/hadoop-default.iis.icm.properties | ||
---|---|---|
41 | 41 |
mapred.reducer.new-api = true |
42 | 42 |
|
43 | 43 |
#OOZIE SERVER |
44 |
oozie.service.loc = http://oozie.hadoop.iis.openaire.eu:11000/oozie |
|
44 |
oozie.service.loc = http://iis-cdh5-test-m1.ocean.icm.edu.pl:11000/oozie/ |
modules/dnet-hadoop-services/trunk/src/main/resources/eu/dnetlib/data/hadoop/config/applicationContext-hadoop.properties | ||
---|---|---|
1 | 1 |
services.data.hadoop.dm.properties = classpath:/eu/dnetlib/data/hadoop/config/hadoop-default.dm.cnr.properties |
2 | 2 |
services.data.hadoop.iis.properties = classpath:/eu/dnetlib/data/hadoop/config/hadoop-default.iis.icm.properties |
3 |
services.data.hadoop.garr.properties = classpath:/eu/dnetlib/data/hadoop/config/hadoop-default.dm.garr.properties |
|
3 | 4 |
|
4 | 5 |
services.data.hadoop.hdfs.seqfilewriterfactory.keyclass = org.apache.hadoop.io.Text |
5 | 6 |
services.data.hadoop.hdfs.seqfilewriterfactory.valueclass = org.apache.hadoop.io.Text |
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/msro/workflows/nodes/dedup/FindDedupConfigurationJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workflows.nodes.dedup; |
|
2 |
|
|
3 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
|
4 |
import eu.dnetlib.msro.workflows.graph.Arc; |
|
5 |
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode; |
|
6 |
import eu.dnetlib.msro.workflows.procs.Env; |
|
7 |
import eu.dnetlib.rmi.enabling.ISLookUpException; |
|
8 |
import eu.dnetlib.rmi.enabling.ISLookUpService; |
|
9 |
import org.apache.commons.lang3.StringUtils; |
|
10 |
import org.springframework.beans.factory.annotation.Autowired; |
|
11 |
|
|
12 |
import java.io.ByteArrayOutputStream; |
|
13 |
import java.util.List; |
|
14 |
import java.util.zip.GZIPOutputStream; |
|
15 |
|
|
16 |
public class FindDedupConfigurationJobNode extends SimpleJobNode { |
|
17 |
|
|
18 |
|
|
19 |
private String configName; |
|
20 |
|
|
21 |
@Autowired |
|
22 |
UniqueServiceLocator uniqueServiceLocator; |
|
23 |
|
|
24 |
private final static String query ="for $x in collection('/db/DRIVER/DedupConfigurationDSResources/DedupConfigurationDSResourceType') where $x//RESOURCE_IDENTIFIER/@value ='%s' return $x//DEDUPLICATION/text()"; |
|
25 |
|
|
26 |
@Override |
|
27 |
protected String execute(Env env) throws Exception { |
|
28 |
|
|
29 |
|
|
30 |
if (StringUtils.isBlank(configName)) throw new IllegalArgumentException("missing configuration sequence"); |
|
31 |
|
|
32 |
env.setAttribute("dconf", getProfile()); |
|
33 |
return Arc.DEFAULT_ARC; |
|
34 |
} |
|
35 |
|
|
36 |
|
|
37 |
private String getProfile() throws Exception { |
|
38 |
ISLookUpService service = uniqueServiceLocator.getService(ISLookUpService.class); |
|
39 |
|
|
40 |
List<String> resourceProfile = service.quickSearchProfile(String.format(query,configName)); |
|
41 |
|
|
42 |
|
|
43 |
if (resourceProfile.size()!= 1) throw new IllegalStateException("Query should return on result query:"+String.format(query,configName)); |
|
44 |
|
|
45 |
return compressArgument(resourceProfile.get(0)); |
|
46 |
|
|
47 |
|
|
48 |
} |
|
49 |
|
|
50 |
private static String compressArgument(final String value) throws Exception{ |
|
51 |
ByteArrayOutputStream out = new ByteArrayOutputStream(); |
|
52 |
GZIPOutputStream gzip = new GZIPOutputStream(out); |
|
53 |
gzip.write(value.getBytes()); |
|
54 |
gzip.close(); |
|
55 |
return java.util.Base64.getEncoder().encodeToString(out.toByteArray()); |
|
56 |
} |
|
57 |
|
|
58 |
public String getConfigName() { |
|
59 |
return configName; |
|
60 |
} |
|
61 |
|
|
62 |
public void setConfigName(String configName) { |
|
63 |
this.configName = configName; |
|
64 |
} |
|
65 |
|
|
66 |
|
|
67 |
} |
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/msro/workflows/nodes/applicationContext-msro-dli-nodes.xml | ||
---|---|---|
27 | 27 |
p:mongoDBName="${services.mdstore.mongodb.db}" |
28 | 28 |
scope="prototype"/> |
29 | 29 |
|
30 |
|
|
31 |
<bean id="wfNodeFindDedupConfiguration" class="eu.dnetlib.msro.workflows.nodes.dedup.FindDedupConfigurationJobNode" |
|
32 |
scope="prototype"/> |
|
33 |
|
|
30 | 34 |
</beans> |
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/workflows/dedup.dli.spark.global.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="1dd10bf0-5c97-470c-9938-ae8e57a422fc_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/> |
|
4 |
<RESOURCE_TYPE value="WorkflowDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="WorkflowDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2020-02-05T18:13:51.0Z"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<WORKFLOW_NAME menuSection="InfoSpace Deduplication">InfoSpace Deduplication using Spark</WORKFLOW_NAME> |
|
11 |
<WORKFLOW_DESCRIPTION>InfoSpace Deduplication using Spark</WORKFLOW_DESCRIPTION> |
|
12 |
<WORKFLOW_INFO/> |
|
13 |
<WORKFLOW_FAMILY>InfoSpace Deduplication</WORKFLOW_FAMILY> |
|
14 |
<WORKFLOW_PRIORITY>35</WORKFLOW_PRIORITY> |
|
15 |
<CONFIGURATION status="EXECUTABLE" start="MANUAL"> |
|
16 |
|
|
17 |
<PARAMETERS> |
|
18 |
<PARAM description="Oozie Job name" function="listProfiles('HadoopJobConfigurationDSResourceType', '//HADOOP_JOB/@name','executeOozie')" managedBy="user" name="oozieJobName" required="true" type="string"/> |
|
19 |
<PARAM name="workingDirPath" description="working dir where generate all the intermediate verison of the graph" required="true" type="string" managedBy="user"/> |
|
20 |
<PARAM name="cluster" description="Hadoop cluster logical name" required="true" managedBy="user" type="string" function="validValues(['DM','IIS', 'GARR'])"/> |
|
21 |
<PARAM name="reusePublication" description="reuse publications on HDFS?" required="true" type="boolean" managedBy="user"/> |
|
22 |
<PARAM name="reuseResolvedPublication" description="reuse resolved publications on HDFS?" required="true" type="boolean" managedBy="user"/> |
|
23 |
<PARAM name="reuseDataset" description="reuse datasets on HDFS?" required="true" type="boolean" managedBy="user"/> |
|
24 |
<PARAM name="reuseResolvedDataset" description="reuse resolved datasets on HDFS?" required="true" type="boolean" managedBy="user"/> |
|
25 |
<PARAM name="reuseUnresolved" description="reuse unresolved objects on HDFS?" required="true" type="boolean" managedBy="user"/> |
|
26 |
<PARAM name="dedupConfigDataset" description="dedup configuration orchestration name" required="true" type="string" function="obtainValues('dedupOrchestrations', {})" managedBy="user"/> |
|
27 |
<PARAM name="dedupConfigPublication" description="dedup configuration orchestration name" required="true" type="string" function="listProfiles('DedupConfigurationDSResources', '//DESCRIPTION', '')" managedBy="user"/> |
|
28 |
<PARAM name="dedupConfigUnknown" description="dedup configuration orchestration name" required="true" type="string" function="obtainValues('dedupOrchestrations', {})" managedBy="user"/> |
|
29 |
</PARAMETERS> |
|
30 |
<WORKFLOW> |
|
31 |
<NODE name="pmf2hdfs" type="LaunchWorkflowTemplate"> |
|
32 |
<DESCRIPTION>import PMF Publications to HDFS DIR</DESCRIPTION> |
|
33 |
<PARAMETERS> |
|
34 |
<PARAM name="wfTemplateId" value="4a268738-b635-4d86-9a4a-52bec6d20866_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/> |
|
35 |
<PARAM name="wfTemplateParams"> |
|
36 |
<MAP> |
|
37 |
<ENTRY key="cluster" ref="cluster"/> |
|
38 |
<ENTRY key="reuseMdRecords" ref="reusePublication"/> |
|
39 |
<ENTRY key="mdFormat" value="PMF"/> |
|
40 |
<ENTRY key="sourcePath" value="${workingDirPath}/xml/pmf.dli.seq"/> |
|
41 |
<ENTRY key="workingDirPath" value="${workingDirPath}"/> |
|
42 |
<ENTRY key="targetPath" value="${workingDirPath}/input/0"/> |
|
43 |
<ENTRY key="entity" value="publication"/> |
|
44 |
</MAP> |
|
45 |
</PARAM> |
|
46 |
</PARAMETERS> |
|
47 |
<ARCS> |
|
48 |
<ARC to="extractPublication"/> |
|
49 |
</ARCS> |
|
50 |
</NODE> |
|
51 |
<NODE name="extractPublication" type="SubmitHadoopJob"> |
|
52 |
<DESCRIPTION>Run M/R import Job</DESCRIPTION> |
|
53 |
<PARAMETERS> |
|
54 |
<PARAM name="cluster" ref="cluster"/> |
|
55 |
<PARAM name="hadoopJob" ref="oozieJobName"/> |
|
56 |
<PARAM name="jobParams"> |
|
57 |
<MAP> |
|
58 |
<ENTRY key="oozie.wf.application.path" value="/user/sandro.labruzzo/graph/scholexplorer/extractentities/oozie_app"/> |
|
59 |
<ENTRY key="sourcePath" value="${workingDirPath}/input/0"/> |
|
60 |
<ENTRY key="targetPath" value="${workingDirPath}/extracted"/> |
|
61 |
<ENTRY key="targetDir" value="0"/> |
|
62 |
<ENTRY key="entities" value="publication,unknown,relation"/> |
|
63 |
</MAP> |
|
64 |
</PARAM> |
|
65 |
</PARAMETERS> |
|
66 |
<ARCS> |
|
67 |
<ARC to="pmfResolved2hdfs"/> |
|
68 |
</ARCS> |
|
69 |
</NODE> |
|
70 |
<NODE name="pmfResolved2hdfs" type="LaunchWorkflowTemplate"> |
|
71 |
<DESCRIPTION>import PMF Publications to HDFS DIR</DESCRIPTION> |
|
72 |
<PARAMETERS> |
|
73 |
<PARAM name="wfTemplateId" value="4a268738-b635-4d86-9a4a-52bec6d20866_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/> |
|
74 |
<PARAM name="wfTemplateParams"> |
|
75 |
<MAP> |
|
76 |
<ENTRY key="cluster" ref="cluster"/> |
|
77 |
<ENTRY key="reuseMdRecords" ref="reuseResolvedPublication"/> |
|
78 |
<ENTRY key="mdFormat" value="PMF"/> |
|
79 |
<ENTRY key="interpretation" value="resolved"/> |
|
80 |
<ENTRY key="sourcePath" value="${workingDirPath}/xml/pmf.dli.resolved.seq"/> |
|
81 |
<ENTRY key="workingDirPath" value="${workingDirPath}"/> |
|
82 |
<ENTRY key="targetPath" value="${workingDirPath}/input/1"/> |
|
83 |
<ENTRY key="entity" value="publication"/> |
|
84 |
</MAP> |
|
85 |
</PARAM> |
|
86 |
</PARAMETERS> |
|
87 |
<ARCS> |
|
88 |
<ARC to="extractPublicationResolved"/> |
|
89 |
</ARCS> |
|
90 |
</NODE> |
|
91 |
<NODE name="extractPublicationResolved" type="SubmitHadoopJob"> |
|
92 |
<DESCRIPTION>Run M/R import Job</DESCRIPTION> |
|
93 |
<PARAMETERS> |
|
94 |
<PARAM name="cluster" ref="cluster"/> |
|
95 |
<PARAM name="hadoopJob" ref="oozieJobName"/> |
|
96 |
<PARAM name="jobParams"> |
|
97 |
<MAP> |
|
98 |
<ENTRY key="oozie.wf.application.path" value="/user/sandro.labruzzo/graph/scholexplorer/extractentities/oozie_app"/> |
|
99 |
<ENTRY key="sourcePath" value="${workingDirPath}/input/1"/> |
|
100 |
<ENTRY key="targetPath" value="${workingDirPath}/extracted"/> |
|
101 |
<ENTRY key="targetDir" value="1"/> |
|
102 |
<ENTRY key="entities" value="publication"/> |
|
103 |
</MAP> |
|
104 |
</PARAM> |
|
105 |
</PARAMETERS> |
|
106 |
<ARCS> |
|
107 |
<ARC to="dmf2hdfs"/> |
|
108 |
</ARCS> |
|
109 |
</NODE> |
|
110 |
<NODE name="dmf2hdfs" type="LaunchWorkflowTemplate"> |
|
111 |
<DESCRIPTION>import PMF Publications to HDFS DIR</DESCRIPTION> |
|
112 |
<PARAMETERS> |
|
113 |
<PARAM name="wfTemplateId" value="4a268738-b635-4d86-9a4a-52bec6d20866_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/> |
|
114 |
<PARAM name="wfTemplateParams"> |
|
115 |
<MAP> |
|
116 |
<ENTRY key="cluster" ref="cluster"/> |
|
117 |
<ENTRY key="reuseMdRecords" ref="reuseDataset"/> |
|
118 |
<ENTRY key="mdFormat" value="DMF"/> |
|
119 |
<ENTRY key="sourcePath" value="${workingDirPath}/xml/dmf.dli.seq"/> |
|
120 |
<ENTRY key="workingDirPath" value="${workingDirPath}"/> |
|
121 |
<ENTRY key="targetPath" value="${workingDirPath}/input/2"/> |
|
122 |
<ENTRY key="entity" value="dataset"/> |
|
123 |
</MAP> |
|
124 |
</PARAM> |
|
125 |
</PARAMETERS> |
|
126 |
<ARCS> |
|
127 |
<ARC to="extractDataset"/> |
|
128 |
</ARCS> |
|
129 |
</NODE> |
|
130 |
<NODE name="extractDataset" type="SubmitHadoopJob"> |
|
131 |
<DESCRIPTION>Run M/R import Job</DESCRIPTION> |
|
132 |
<PARAMETERS> |
|
133 |
<PARAM name="cluster" ref="cluster"/> |
|
134 |
<PARAM name="hadoopJob" ref="oozieJobName"/> |
|
135 |
<PARAM name="jobParams"> |
|
136 |
<MAP> |
|
137 |
<ENTRY key="oozie.wf.application.path" value="/user/sandro.labruzzo/graph/scholexplorer/extractentities/oozie_app"/> |
|
138 |
<ENTRY key="sourcePath" value="${workingDirPath}/input/2"/> |
|
139 |
<ENTRY key="targetPath" value="${workingDirPath}/extracted"/> |
|
140 |
<ENTRY key="workingDirPath" value="${workingDirPath}"/> |
|
141 |
<ENTRY key="targetDir" value="2"/> |
|
142 |
<ENTRY key="entities" value="dataset,unknown,relation"/> |
|
143 |
</MAP> |
|
144 |
</PARAM> |
|
145 |
</PARAMETERS> |
|
146 |
<ARCS> |
|
147 |
<ARC to="dmfResolved2hdfs"/> |
|
148 |
</ARCS> |
|
149 |
</NODE> |
|
150 |
<NODE name="dmfResolved2hdfs" type="LaunchWorkflowTemplate"> |
|
151 |
<DESCRIPTION>import PMF Publications to HDFS DIR</DESCRIPTION> |
|
152 |
<PARAMETERS> |
|
153 |
<PARAM name="wfTemplateId" value="4a268738-b635-4d86-9a4a-52bec6d20866_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/> |
|
154 |
<PARAM name="wfTemplateParams"> |
|
155 |
<MAP> |
|
156 |
<ENTRY key="cluster" ref="cluster"/> |
|
157 |
<ENTRY key="reuseMdRecords" ref="reuseResolvedDataset"/> |
|
158 |
<ENTRY key="mdFormat" value="DMF"/> |
|
159 |
<ENTRY key="interpretation" value="resolved"/> |
|
160 |
<ENTRY key="workingDirPath" value="${workingDirPath}"/> |
|
161 |
<ENTRY key="sourcePath" value="${workingDirPath}/xml/dmf.dli.resolved.seq"/> |
|
162 |
<ENTRY key="targetPath" value="${workingDirPath}/input/3"/> |
|
163 |
<ENTRY key="entity" value="dataset"/> |
|
164 |
</MAP> |
|
165 |
</PARAM> |
|
166 |
</PARAMETERS> |
|
167 |
<ARCS> |
|
168 |
<ARC to="extractDatasetResolved"/> |
|
169 |
</ARCS> |
|
170 |
</NODE> |
|
171 |
<NODE name="extractDatasetResolved" type="SubmitHadoopJob"> |
|
172 |
<DESCRIPTION>Run M/R import Job</DESCRIPTION> |
|
173 |
<PARAMETERS> |
|
174 |
<PARAM name="cluster" ref="cluster"/> |
|
175 |
<PARAM name="hadoopJob" ref="oozieJobName"/> |
|
176 |
<PARAM name="jobParams"> |
|
177 |
<MAP> |
|
178 |
<ENTRY key="oozie.wf.application.path" value="/user/sandro.labruzzo/graph/scholexplorer/extractentities/oozie_app"/> |
|
179 |
<ENTRY key="workingDirPath" value="${workingDirPath}"/> |
|
180 |
<ENTRY key="sourcePath" value="${workingDirPath}/input/3"/> |
|
181 |
<ENTRY key="targetPath" value="${workingDirPath}/extracted"/> |
|
182 |
<ENTRY key="targetDir" value="3"/> |
|
183 |
<ENTRY key="entities" value="dataset"/> |
|
184 |
</MAP> |
|
185 |
</PARAM> |
|
186 |
</PARAMETERS> |
|
187 |
<ARCS> |
|
188 |
<ARC to="mergeDataset"/> |
|
189 |
</ARCS> |
|
190 |
</NODE> |
|
191 |
<NODE isStart="true" name="mergeDataset" type="SubmitHadoopJob"> |
|
192 |
<DESCRIPTION>Run M/R import Job</DESCRIPTION> |
|
193 |
<PARAMETERS> |
|
194 |
<PARAM name="cluster" ref="cluster"/> |
|
195 |
<PARAM name="hadoopJob" ref="oozieJobName"/> |
|
196 |
<PARAM name="jobParams"> |
|
197 |
<MAP> |
|
198 |
<ENTRY key="oozie.wf.application.path" value="/user/sandro.labruzzo/graph/mergeentities/oozie_app"/> |
|
199 |
<ENTRY key="workingDirPath" value="${workingDirPath}"/> |
|
200 |
<ENTRY key="sourcePath" value="${workingDirPath}/extracted"/> |
|
201 |
<ENTRY key="targetPath" value="${workingDirPath}/graph"/> |
|
202 |
<ENTRY key="entity" value="dataset"/> |
|
203 |
</MAP> |
|
204 |
</PARAM> |
|
205 |
</PARAMETERS> |
|
206 |
<ARCS> |
|
207 |
<ARC to="mergePublication"/> |
|
208 |
</ARCS> |
|
209 |
</NODE> |
|
210 |
<NODE name="mergePublication" type="SubmitHadoopJob"> |
|
211 |
<DESCRIPTION>Run M/R import Job</DESCRIPTION> |
|
212 |
<PARAMETERS> |
|
213 |
<PARAM name="cluster" ref="cluster"/> |
|
214 |
<PARAM name="hadoopJob" ref="oozieJobName"/> |
|
215 |
<PARAM name="jobParams"> |
|
216 |
<MAP> |
|
217 |
<ENTRY key="oozie.wf.application.path" value="/user/sandro.labruzzo/graph/mergeentities/oozie_app"/> |
|
218 |
<ENTRY key="workingDirPath" value="${workingDirPath}"/> |
|
219 |
<ENTRY key="sourcePath" value="${workingDirPath}/extracted"/> |
|
220 |
<ENTRY key="targetPath" value="${workingDirPath}/graph"/> |
|
221 |
<ENTRY key="entity" value="publication"/> |
|
222 |
</MAP> |
|
223 |
</PARAM> |
|
224 |
</PARAMETERS> |
|
225 |
<ARCS> |
|
226 |
<ARC to="mergeUnknown"/> |
|
227 |
</ARCS> |
|
228 |
</NODE> |
|
229 |
<NODE name="mergeUnknown" type="SubmitHadoopJob"> |
|
230 |
<DESCRIPTION>Run M/R import Job</DESCRIPTION> |
|
231 |
<PARAMETERS> |
|
232 |
<PARAM name="cluster" ref="cluster"/> |
|
233 |
<PARAM name="hadoopJob" ref="oozieJobName"/> |
|
234 |
<PARAM name="jobParams"> |
|
235 |
<MAP> |
|
236 |
<ENTRY key="oozie.wf.application.path" value="/user/sandro.labruzzo/graph/mergeentities/oozie_app"/> |
|
237 |
<ENTRY key="workingDirPath" value="${workingDirPath}"/> |
|
238 |
<ENTRY key="sourcePath" value="${workingDirPath}/extracted"/> |
|
239 |
<ENTRY key="targetPath" value="${workingDirPath}/graph"/> |
|
240 |
<ENTRY key="entity" value="unknown"/> |
|
241 |
</MAP> |
|
242 |
</PARAM> |
|
243 |
</PARAMETERS> |
|
244 |
<ARCS> |
|
245 |
<ARC to="mergeRelation"/> |
|
246 |
</ARCS> |
|
247 |
</NODE> |
|
248 |
<NODE name="mergeRelation" type="SubmitHadoopJob"> |
|
249 |
<DESCRIPTION>Run M/R import Job</DESCRIPTION> |
|
250 |
<PARAMETERS> |
|
251 |
<PARAM name="cluster" ref="cluster"/> |
|
252 |
<PARAM name="hadoopJob" ref="oozieJobName"/> |
|
253 |
<PARAM name="jobParams"> |
|
254 |
<MAP> |
|
255 |
<ENTRY key="oozie.wf.application.path" value="/user/sandro.labruzzo/graph/mergeentities/oozie_app"/> |
|
256 |
<ENTRY key="workingDirPath" value="${workingDirPath}"/> |
|
257 |
<ENTRY key="sourcePath" value="${workingDirPath}/extracted"/> |
|
258 |
<ENTRY key="targetPath" value="${workingDirPath}/graph"/> |
|
259 |
<ENTRY key="entity" value="relation"/> |
|
260 |
</MAP> |
|
261 |
</PARAM> |
|
262 |
</PARAMETERS> |
|
263 |
<ARCS> |
|
264 |
<ARC to="success"/> |
|
265 |
</ARCS> |
|
266 |
</NODE> |
|
267 |
<NODE name="dedupPublication" type="LaunchWorkflowTemplate"> |
|
268 |
<DESCRIPTION>import PMF Publications to HDFS DIR</DESCRIPTION> |
|
269 |
<PARAMETERS> |
|
270 |
<PARAM name="wfTemplateId" value="b8e1afcf-e5ca-47d0-9ee8-47da90e1a9c3_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/> |
|
271 |
<PARAM name="wfTemplateParams"> |
|
272 |
<MAP> |
|
273 |
<ENTRY key="cluster" ref="cluster"/> |
|
274 |
<ENTRY key="sourcePath" value="${workingDirPath}/graph"/> |
|
275 |
<ENTRY key="targetPath" value="${workingDirPath}/dedupGraphWD"/> |
|
276 |
<ENTRY key="entity" value="publication"/> |
|
277 |
<ENTRY key="dedup_conf" ref="dedupConfigPublication"/> |
|
278 |
<ENTRY key="oozieJob" ref="oozieJobName"/> |
|
279 |
</MAP> |
|
280 |
</PARAM> |
|
281 |
</PARAMETERS> |
|
282 |
<ARCS> |
|
283 |
<ARC to="success"/> |
|
284 |
</ARCS> |
|
285 |
</NODE> |
|
286 |
</WORKFLOW> |
|
287 |
</CONFIGURATION> |
|
288 |
<NOTIFICATIONS/> |
|
289 |
<SCHEDULING enabled="false"> |
|
290 |
<CRON>29 5 22 ? * *</CRON> |
|
291 |
<MININTERVAL>10080</MININTERVAL> |
|
292 |
</SCHEDULING> |
|
293 |
<STATUS/> |
|
294 |
</BODY> |
|
295 |
</RESOURCE_PROFILE> |
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/workflows/dedupEntitywf.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="b8e1afcf-e5ca-47d0-9ee8-47da90e1a9c3_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="WorkflowTemplateDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="WorkflowTemplateDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2020-02-14T12:27:20+01:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<CONFIGURATION> |
|
11 |
<PARAMETERS> |
|
12 |
<PARAM description="HDFS path" name="sourcePath" required="true" type="string"/> |
|
13 |
<PARAM description="HDFS path" name="targetPath" required="true" type="string"/> |
|
14 |
<PARAM description="HDFS path" name="entity" required="true" type="string"/> |
|
15 |
<PARAM description="Hadoop cluster name" name="cluster" required="true" type="string"/> |
|
16 |
<PARAM description="Dedup Configuration" name="dedup_conf" required="true" type="string"/> |
|
17 |
</PARAMETERS> |
|
18 |
<WORKFLOW> |
|
19 |
<NODE isStart="true" name="findDedupConfiguration" type="FindDedupConfiguration"> |
|
20 |
<DESCRIPTION>Find Dedup configuration</DESCRIPTION> |
|
21 |
<PARAMETERS> |
|
22 |
<PARAM name="configName" ref="dedup_conf"/> |
|
23 |
</PARAMETERS> |
|
24 |
<ARCS> |
|
25 |
<ARC to="dedupPublication"/> |
|
26 |
</ARCS> |
|
27 |
</NODE> |
|
28 |
|
|
29 |
|
|
30 |
<NODE name="dedupPublication" type="SubmitHadoopJob"> |
|
31 |
<DESCRIPTION>Run M/R import Job</DESCRIPTION> |
|
32 |
<PARAMETERS> |
|
33 |
<PARAM name="cluster" ref="cluster"/> |
|
34 |
<PARAM name="hadoopJob" value="executeOozieJobGARR"/> |
|
35 |
<PARAM name="jobParams"> |
|
36 |
<MAP> |
|
37 |
<ENTRY key="oozie.wf.application.path" value="/user/sandro.labruzzo/dedup/oozie_app"/> |
|
38 |
<ENTRY key="sourcePath" ref="sourcePath"/> |
|
39 |
<ENTRY key="targetPath" ref="targetPath"/> |
|
40 |
<ENTRY key="entity" ref="entity"/> |
|
41 |
<ENTRY key="dedupConf" env="dconf"/> |
|
42 |
</MAP> |
|
43 |
</PARAM> |
|
44 |
</PARAMETERS> |
|
45 |
<ARCS> |
|
46 |
<ARC to="success"/> |
|
47 |
</ARCS> |
|
48 |
</NODE> |
|
49 |
</WORKFLOW> |
|
50 |
</CONFIGURATION> |
|
51 |
</BODY> |
|
52 |
</RESOURCE_PROFILE> |
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/workflows/mergeDLIEntities.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="6f111858-1719-48a9-8e90-8f829497f7a8_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="WorkflowTemplateDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="WorkflowTemplateDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2016-06-20T07:52:08+00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<CONFIGURATION> |
|
11 |
<PARAMETERS> |
|
12 |
<PARAM name="targetDir" description="metadata interpretation name" required="false" type="string" default="cleaned"/> |
|
13 |
<PARAM name="sourcePath" description="HDFS path" required="true" type="string"/> |
|
14 |
<PARAM name="targetPath" description="HDFS path" required="true" type="string"/> |
|
15 |
<PARAM name="entity" description="HDFS path" required="true" type="string"/> |
|
16 |
<PARAM name="cluster" description="Hadoop cluster name" required="true" type="string"/> |
|
17 |
</PARAMETERS> |
|
18 |
<WORKFLOW> |
|
19 |
|
|
20 |
|
|
21 |
<NODE name="convertEntity" type="SubmitHadoopJob"> |
|
22 |
<DESCRIPTION>Run M/R import Job</DESCRIPTION> |
|
23 |
<PARAMETERS> |
|
24 |
<PARAM name="cluster" ref="cluster"/> |
|
25 |
<PARAM name="hadoopJob" value="importMetadataToHDFS"/> |
|
26 |
<PARAM name="jobParams"> |
|
27 |
<MAP> |
|
28 |
<ENTRY key="sourcePath" ref="sourcePath"/> |
|
29 |
<ENTRY key="targetPath" ref="targetPath"/> |
|
30 |
<ENTRY key="entity" ref="entity"/> |
|
31 |
</MAP> |
|
32 |
</PARAM> |
|
33 |
</PARAMETERS> |
|
34 |
<ARCS> |
|
35 |
<ARC to="success"/> |
|
36 |
</ARCS> |
|
37 |
</NODE> |
|
38 |
|
|
39 |
</WORKFLOW> |
|
40 |
</CONFIGURATION> |
|
41 |
</BODY> |
|
42 |
</RESOURCE_PROFILE> |
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/HadoopJobConfigurationDSResourceType/importMetadataToHDFS.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="ea5ea1a0-a750-42db-8dfa-1606158698da_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2019-04-12T13:16:20+02:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="importMetadataToHDFS" type="oozie"> |
|
11 |
<DESCRIPTION>Import XML Data into sequence File</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION><!-- Cluster wide --> |
|
13 |
<PROPERTY key="queueName" value="default"/> |
|
14 |
<PROPERTY key="user.name" value="sandro.labruzzo"/><!-- Runtime --> |
|
15 |
<PROPERTY key="oozie.wf.application.path" value="/user/sandro.labruzzo/graph/scholexplorer/oozie_app/"/> |
|
16 |
<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/> |
|
17 |
<PROPERTY key="oozie.use.system.libpath" value="True"/> |
|
18 |
<PROPERTY key="security_enabled" value="False"/> |
|
19 |
<PROPERTY key="dryrun" value="True"/> |
|
20 |
<PROPERTY key="oozie.action.sharelib.for.spark" value="spark2"/> |
|
21 |
<PROPERTY key="sparkDriverMemory" value="3G"/> |
|
22 |
<PROPERTY key="sparkExecutorMemory" value="3G"/> |
|
23 |
<PROPERTY key="metadataEncoding" value="XML"/> |
|
24 |
|
|
25 |
</STATIC_CONFIGURATION> |
|
26 |
<JOB_INTERFACE> |
|
27 |
<PARAM description="the path of the input hdfs file contains xml" name="sourcePath" required="true"/> |
|
28 |
<PARAM description="the path of the result hdfs containig OAF entities" name="targetPath" required="true"/> |
|
29 |
<PARAM description="The entity type" name="entity" required="true"/> |
|
30 |
</JOB_INTERFACE> |
|
31 |
</HADOOP_JOB> |
|
32 |
<STATUS> |
|
33 |
<LAST_SUBMISSION_DATE value="2019-04-17T17:46:31+02:00"/> |
|
34 |
<RUNNING_INSTANCES value="2"/> |
|
35 |
<CUMULATIVE_RUN value="83"/> |
|
36 |
</STATUS> |
|
37 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
38 |
</BODY> |
|
39 |
</RESOURCE_PROFILE> |
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/HadoopJobConfigurationDSResourceType/mergeEntitiesToHDFS.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="80b642da-533c-4c6f-b896-fbba12146175_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2019-04-12T13:16:20+02:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="mergeEntitiesToHDFS" type="oozie"> |
|
11 |
<DESCRIPTION>Import XML Data into sequence File</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION><!-- Cluster wide --> |
|
13 |
<PROPERTY key="queueName" value="default"/> |
|
14 |
<PROPERTY key="user.name" value="sandro.labruzzo"/><!-- Runtime --> |
|
15 |
<PROPERTY key="oozie.wf.application.path" value="/user/sandro.labruzzo/graph/mergeentities/oozie_app/"/> |
|
16 |
<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/> |
|
17 |
<PROPERTY key="oozie.use.system.libpath" value="True"/> |
|
18 |
<PROPERTY key="security_enabled" value="False"/> |
|
19 |
<PROPERTY key="dryrun" value="True"/> |
|
20 |
<PROPERTY key="oozie.action.sharelib.for.spark" value="spark2"/> |
|
21 |
<PROPERTY key="sparkDriverMemory" value="4G"/> |
|
22 |
<PROPERTY key="sparkExecutorMemory" value="4G"/> |
|
23 |
<PROPERTY key="metadataEncoding" value="XML"/> |
|
24 |
</STATIC_CONFIGURATION> |
|
25 |
<JOB_INTERFACE> |
|
26 |
<PARAM description="the path of the input hdfs file contains extracted Entities" name="sourcePath" required="true"/> |
|
27 |
<PARAM description="the baseDir path of the result hdfs containig OAF merged entities" name="targetPath" required="true"/> |
|
28 |
<PARAM description="The entity type" name="entity" required="true"/> |
|
29 |
</JOB_INTERFACE> |
|
30 |
</HADOOP_JOB> |
|
31 |
<STATUS> |
|
32 |
<LAST_SUBMISSION_DATE value="2019-04-17T17:46:31+02:00"/> |
|
33 |
<RUNNING_INSTANCES value="2"/> |
|
34 |
<CUMULATIVE_RUN value="83"/> |
|
35 |
</STATUS> |
|
36 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
37 |
</BODY> |
|
38 |
</RESOURCE_PROFILE> |
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/HadoopJobConfigurationDSResourceType/executeOozieJobICM.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="ed671880-d053-4692-90e4-5901ddde661b_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2020-02-17T15:36:24+01:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="executeOozieJobICM" type="oozie"> |
|
11 |
<DESCRIPTION>Import XML Data into sequence File</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION><!-- Cluster wide --> |
|
13 |
<PROPERTY key="queueName" value="default"/> |
|
14 |
<PROPERTY key="user.name" value="sandro.labruzzo"/> |
|
15 |
<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/> |
|
16 |
<PROPERTY key="oozie.use.system.libpath" value="True"/> |
|
17 |
<PROPERTY key="security_enabled" value="False"/> |
|
18 |
<PROPERTY key="dryrun" value="True"/> |
|
19 |
<PROPERTY key="oozie.action.sharelib.for.spark" value="spark2"/> |
|
20 |
<PROPERTY key="sparkDriverMemory" value="4G"/> |
|
21 |
<PROPERTY key="sparkExecutorMemory" value="4G"/> |
|
22 |
<PROPERTY key="metadataEncoding" value="XML"/> |
|
23 |
<PROPERTY key="jobTracker" value="yarnRM"/> |
|
24 |
<PROPERTY key="nameNode" value="hdfs://nameservice1"/> |
|
25 |
<PROPERTY key="sparkExtraOPT" value="--conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener" --conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener" --conf spark.sql.warehouse.dir="/user/hive/warehouse""/> |
|
26 |
<PROPERTY key="projectVersion" value="1.0.5-SNAPSHOT"/> |
|
27 |
</STATIC_CONFIGURATION> |
|
28 |
<JOB_INTERFACE> |
|
29 |
<PARAM description="the path of the input hdfs file contains xml" name="oozie.wf.application.path" required="true"/> |
|
30 |
</JOB_INTERFACE> |
|
31 |
</HADOOP_JOB> |
|
32 |
<STATUS> |
|
33 |
<LAST_SUBMISSION_DATE value="2020-02-17T14:18:37+01:00"/> |
|
34 |
<RUNNING_INSTANCES value="4"/> |
|
35 |
<CUMULATIVE_RUN value="105"/> |
|
36 |
</STATUS> |
|
37 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
38 |
</BODY> |
|
39 |
</RESOURCE_PROFILE> |
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/HadoopJobConfigurationDSResourceType/executeOozieJobGARR.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="c277d01e-d10b-4150-8d9f-caf524e40fe3_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2019-04-12T13:16:20+02:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="executeOozieJobGARR" type="oozie"> |
|
11 |
<DESCRIPTION>Import XML Data into sequence File</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION><!-- Cluster wide --> |
|
13 |
<PROPERTY key="queueName" value="default"/> |
|
14 |
<PROPERTY key="user.name" value="sandro.labruzzo"/><!-- Runtime --> |
|
15 |
<!-- <PROPERTY key="oozie.wf.application.path" value="/user/sandro.labruzzo/graph/scholexplorer/oozie_app/"/>--> |
|
16 |
<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/> |
|
17 |
<PROPERTY key="oozie.use.system.libpath" value="True"/> |
|
18 |
<PROPERTY key="security_enabled" value="False"/> |
|
19 |
<PROPERTY key="dryrun" value="True"/> |
|
20 |
<PROPERTY key="oozie.action.sharelib.for.spark" value="spark2"/> |
|
21 |
<PROPERTY key="sparkDriverMemory" value="4G"/> |
|
22 |
<PROPERTY key="sparkExecutorMemory" value="4G"/> |
|
23 |
<PROPERTY key="metadataEncoding" value="XML"/> |
|
24 |
<PROPERTY key="jobTracker" value="hadoop-rm3.garr-pa1.d4science.org:8032"/> |
|
25 |
<PROPERTY key="nameNode" value="hdfs://hadoop-rm1.garr-pa1.d4science.org:8020"/> |
|
26 |
<PROPERTY key="projectVersion" value="1.0.5-SNAPSHOT"/> |
|
27 |
</STATIC_CONFIGURATION> |
|
28 |
<JOB_INTERFACE> |
|
29 |
<PARAM description="the path of the input hdfs file contains xml" name="oozie.wf.application.path" required="true"/> |
|
30 |
</JOB_INTERFACE> |
|
31 |
</HADOOP_JOB> |
|
32 |
<STATUS> |
|
33 |
<LAST_SUBMISSION_DATE value="2019-04-17T17:46:31+02:00"/> |
|
34 |
<RUNNING_INSTANCES value="2"/> |
|
35 |
<CUMULATIVE_RUN value="83"/> |
|
36 |
</STATUS> |
|
37 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
38 |
</BODY> |
|
39 |
</RESOURCE_PROFILE> |
modules/dnet-graph/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/workflows/metadata2hdfs.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="4a268738-b635-4d86-9a4a-52bec6d20866_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="WorkflowTemplateDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="WorkflowTemplateDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2020-02-17T16:16:24+01:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<CONFIGURATION> |
|
11 |
<PARAMETERS> |
|
12 |
<PARAM description="reuse metadata records?" name="reuseMdRecords" required="true" type="boolean"/> |
|
13 |
<PARAM description="metadata format name" name="mdFormat" required="true" type="string"/> |
|
14 |
<PARAM default="cleaned" description="metadata interpretation name" name="interpretation" required="false" type="string"/> |
|
15 |
<PARAM description="HDFS path" name="sourcePath" required="true" type="string"/> |
|
16 |
<PARAM description="HDFS path" name="targetPath" required="true" type="string"/> |
|
17 |
<PARAM description="HDFS path" name="entity" required="true" type="string"/> |
|
18 |
<PARAM description="Hadoop cluster name" name="cluster" required="true" type="string"/> |
|
19 |
<PARAM description="Oozie Job Name" name="oozieJob" required="true" type="string"/> |
|
20 |
</PARAMETERS> |
|
21 |
<WORKFLOW> |
|
22 |
<NODE isStart="true" name="reuseHdfsRecords" type="ReuseHdfsRecords"> |
|
23 |
<DESCRIPTION>reuse mdstore records</DESCRIPTION> |
|
24 |
<PARAMETERS> |
|
25 |
<PARAM name="reuseMdRecords" ref="reuseMdRecords"/> |
|
26 |
</PARAMETERS> |
|
27 |
<ARCS> |
|
28 |
<ARC name="true" to="doneExport"/> |
|
29 |
<ARC name="false" to="exportRecords"/> |
|
30 |
</ARCS> |
|
31 |
</NODE> |
|
32 |
<NODE name="exportRecords" type="MDStoreBatchExporter"> |
|
33 |
<DESCRIPTION>Fetch mdstore records</DESCRIPTION> |
|
34 |
<PARAMETERS> |
|
35 |
<PARAM name="format" ref="mdFormat"/> |
|
36 |
<PARAM name="layout" value="store"/> |
|
37 |
<PARAM name="interpretation" ref="interpretation"/> |
|
38 |
<PARAM name="outputEprParam" value="records_epr"/> |
|
39 |
</PARAMETERS> |
|
40 |
<ARCS> |
|
41 |
<ARC to="storeHdfsRecords"/> |
|
42 |
</ARCS> |
|
43 |
</NODE> |
|
44 |
<NODE name="storeHdfsRecords" type="StoreHdfsRecords"> |
|
45 |
<DESCRIPTION>Store records to HDFS</DESCRIPTION> |
|
46 |
<PARAMETERS> |
|
47 |
<PARAM name="inputEprParam" value="records_epr"/> |
|
48 |
<PARAM name="hdfsPath" ref="hdfsPath"/> |
|
49 |
<PARAM name="cluster" ref="cluster"/> |
|
50 |
</PARAMETERS> |
|
51 |
<ARCS> |
|
52 |
<ARC to="doneExport"/> |
|
53 |
</ARCS> |
|
54 |
</NODE> |
|
55 |
<NODE name="doneExport"> |
|
56 |
<DESCRIPTION/> |
|
57 |
<PARAMETERS/> |
|
58 |
<ARCS> |
|
59 |
<ARC to="convertEntity"/> |
|
60 |
</ARCS> |
|
61 |
</NODE> |
|
62 |
<NODE name="convertEntity" type="SubmitHadoopJob"> |
|
63 |
<DESCRIPTION>Run M/R import Job</DESCRIPTION> |
|
64 |
<PARAMETERS> |
|
65 |
<PARAM name="cluster" ref="cluster"/> |
|
66 |
<PARAM name="hadoopJob" ref="oozieJob"/> |
|
67 |
<PARAM name="jobParams"> |
|
68 |
<MAP> |
|
69 |
<ENTRY key="sourcePath" ref="sourcePath"/> |
|
70 |
<ENTRY key="targetPath" ref="targetPath"/> |
|
71 |
<ENTRY key="entity" ref="entity"/> |
|
72 |
<ENTRY key="oozie.wf.application.path" value="/user/sandro.labruzzo/graph/scholexplorer/oozie_app"/> |
|
73 |
</MAP> |
|
74 |
</PARAM> |
|
75 |
</PARAMETERS> |
|
76 |
<ARCS> |
|
77 |
<ARC to="success"/> |
|
78 |
</ARCS> |
|
79 |
</NODE> |
|
80 |
</WORKFLOW> |
|
81 |
</CONFIGURATION> |
|
82 |
</BODY> |
|
83 |
</RESOURCE_PROFILE> |
modules/dnet-core-components/trunk/src/main/java/eu/dnetlib/rmi/data/hadoop/ClusterName.java | ||
---|---|---|
15 | 15 |
*/ |
16 | 16 |
public enum ClusterName { |
17 | 17 |
DM, // Data Management |
18 |
GARR, // Data Management at GARR with spark2 |
|
18 | 19 |
IIS; // Information Inference Service(s) |
19 | 20 |
|
20 | 21 |
public static List<String> asStringList() { |
modules/dnet-msro-service/trunk/src/test/java/eu/dnetlib/graph/GraphLoaderTest.java | ||
---|---|---|
1 |
package eu.dnetlib.graph; |
|
2 |
|
|
3 |
import org.junit.Test; |
|
4 |
|
|
5 |
import java.util.regex.Matcher; |
|
6 |
import java.util.regex.Pattern; |
|
7 |
|
|
8 |
|
|
9 |
public class GraphLoaderTest { |
|
10 |
|
|
11 |
|
|
12 |
final String regex = "\\$\\{(\\w*)\\}"; |
|
13 |
|
|
14 |
|
|
15 |
final Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE); |
|
16 |
|
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
@Test |
|
21 |
public void testRegEx () { |
|
22 |
String string = "${1234}/sdfjasdfpojawpdf/${dd}"; |
|
23 |
final Matcher matcher = pattern.matcher(string); |
|
24 |
int k = 0; |
|
25 |
while (matcher.find()) { |
|
26 |
System.out.println("Full match: " + matcher.group(0)); |
|
27 |
for (int i = 1; i <= matcher.groupCount(); i++) { |
|
28 |
System.out.println("Group " + i + ": " + matcher.group(i)); |
|
29 |
string = string.replaceAll(Pattern.quote(matcher.group(0)), "VALORE"+k++); |
|
30 |
System.out.println("new String = " + string); |
|
31 |
|
|
32 |
} |
|
33 |
} |
|
34 |
|
|
35 |
|
|
36 |
|
|
37 |
} |
|
38 |
} |
modules/dnet-msro-service/trunk/src/main/java/eu/dnetlib/msro/workflows/graph/GraphLoader.java | ||
---|---|---|
5 | 5 |
import java.util.List; |
6 | 6 |
import java.util.Map; |
7 | 7 |
import java.util.Set; |
8 |
import java.util.regex.Matcher; |
|
9 |
import java.util.regex.Pattern; |
|
8 | 10 |
import java.util.stream.Collectors; |
9 | 11 |
|
10 | 12 |
import javax.annotation.Resource; |
... | ... | |
29 | 31 |
*/ |
30 | 32 |
public class GraphLoader { |
31 | 33 |
|
32 |
private static final Log log = LogFactory.getLog(GraphLoader.class);
|
|
34 |
private static final Log log = LogFactory.getLog(GraphLoader.class);
|
|
33 | 35 |
|
34 |
private NodeHelper nodeHelper;
|
|
36 |
private NodeHelper nodeHelper;
|
|
35 | 37 |
|
36 |
@Resource(name = "propertyFetcher") |
|
37 |
private PropertyFetcher propertyFetcher; |
|
38 |
private String regExRef = "\\$\\{(\\w*)\\}"; |
|
38 | 39 |
|
39 |
public Graph loadGraph(final Document doc, final Map<String, String> globalParams) throws MSROException { |
|
40 |
final Graph graph = new Graph(); |
|
40 |
final Pattern pattern = Pattern.compile(regExRef, Pattern.MULTILINE); |
|
41 | 41 |
|
42 |
for (final Object o : doc.selectNodes("//CONFIGURATION/WORKFLOW/NODE")) { |
|
43 |
final Element n = (Element) o; |
|
44 |
final String nodeName = n.valueOf("@name"); |
|
45 |
final String nodeType = n.valueOf("@type"); |
|
46 |
final boolean isStart = StringUtils.equalsIgnoreCase(n.valueOf("@isStart"), "true"); |
|
47 |
final boolean isJoin = StringUtils.equalsIgnoreCase(n.valueOf("@isJoin"), "true"); |
|
48 | 42 |
|
49 |
final Map<String, GraphNodeParameter> params = calculateParamsForNode(n, globalParams); |
|
43 |
@Resource(name = "propertyFetcher") |
|
44 |
private PropertyFetcher propertyFetcher; |
|
50 | 45 |
|
51 |
if (isStart) { |
|
52 |
graph.addNode(GraphNode.newStartNode(nodeName, nodeType, params)); |
|
53 |
} else if (isJoin) { |
|
54 |
graph.addNode(GraphNode.newJoinNode(nodeName, nodeType, params)); |
|
55 |
} else { |
|
56 |
graph.addNode(GraphNode.newNode(nodeName, nodeType, params)); |
|
57 |
} |
|
46 |
public Graph loadGraph(final Document doc, final Map<String, String> globalParams) throws MSROException { |
|
47 |
final Graph graph = new Graph(); |
|
58 | 48 |
|
59 |
for (final Object o1 : n.selectNodes(".//ARC")) {
|
|
60 |
final Element a = (Element) o1;
|
|
61 |
final String arcName = a.valueOf("@name");
|
|
62 |
final String to = a.valueOf("@to");
|
|
63 |
graph.addArc(new Arc(StringUtils.isNotBlank(arcName) ? arcName : Arc.DEFAULT_ARC, nodeName, to));
|
|
64 |
}
|
|
49 |
for (final Object o : doc.selectNodes("//CONFIGURATION/WORKFLOW/NODE")) {
|
|
50 |
final Element n = (Element) o;
|
|
51 |
final String nodeName = n.valueOf("@name");
|
|
52 |
final String nodeType = n.valueOf("@type");
|
|
53 |
final boolean isStart = StringUtils.equalsIgnoreCase(n.valueOf("@isStart"), "true");
|
|
54 |
final boolean isJoin = StringUtils.equalsIgnoreCase(n.valueOf("@isJoin"), "true");
|
|
65 | 55 |
|
66 |
graph.addNode(GraphNode.newSuccessNode()); |
|
67 |
} |
|
56 |
final Map<String, GraphNodeParameter> params = calculateParamsForNode(n, globalParams); |
|
68 | 57 |
|
69 |
checkValidity(graph); |
|
58 |
if (isStart) { |
|
59 |
graph.addNode(GraphNode.newStartNode(nodeName, nodeType, params)); |
|
60 |
} else if (isJoin) { |
|
61 |
graph.addNode(GraphNode.newJoinNode(nodeName, nodeType, params)); |
|
62 |
} else { |
|
63 |
graph.addNode(GraphNode.newNode(nodeName, nodeType, params)); |
|
64 |
} |
|
70 | 65 |
|
71 |
return graph; |
|
72 |
} |
|
66 |
for (final Object o1 : n.selectNodes(".//ARC")) { |
|
67 |
final Element a = (Element) o1; |
|
68 |
final String arcName = a.valueOf("@name"); |
|
69 |
final String to = a.valueOf("@to"); |
|
70 |
graph.addArc(new Arc(StringUtils.isNotBlank(arcName) ? arcName : Arc.DEFAULT_ARC, nodeName, to)); |
|
71 |
} |
|
73 | 72 |
|
74 |
public Map<String, GraphNodeParameter> calculateParamsForNode(final Node node, final Map<String, String> globalParams) { |
|
73 |
graph.addNode(GraphNode.newSuccessNode()); |
|
74 |
} |
|
75 | 75 |
|
76 |
final Map<String, GraphNodeParameter> params = new HashMap<>();
|
|
76 |
checkValidity(graph);
|
|
77 | 77 |
|
78 |
if (node != null) { |
|
79 |
for (final Object o : node.selectNodes(".//PARAM")) { |
|
80 |
final Element p = (Element) o; |
|
78 |
return graph; |
|
79 |
} |
|
81 | 80 |
|
82 |
final String pName = p.valueOf("@name"); |
|
83 |
final GraphNodeParameter pValue = calculateSimpleValue((Element) o, globalParams); |
|
81 |
public Map<String, GraphNodeParameter> calculateParamsForNode(final Node node, final Map<String, String> globalParams) { |
|
84 | 82 |
|
85 |
if (pValue != null) { |
|
86 |
params.put(pName, pValue); |
|
87 |
} else if (p.selectSingleNode("./MAP") != null) { |
|
83 |
final Map<String, GraphNodeParameter> params = new HashMap<>(); |
|
88 | 84 |
|
89 |
@SuppressWarnings("unchecked") |
|
90 |
final Map<String, GraphNodeParameter> map = ((List<Element>) p.selectNodes("./MAP/ENTRY")) |
|
91 |
.stream() |
|
92 |
.collect(Collectors.toMap( |
|
93 |
e -> e.valueOf("@key"), |
|
94 |
e -> { |
|
95 |
final GraphNodeParameter gnp = calculateSimpleValue(e, globalParams); |
|
96 |
if (gnp == null) { |
|
97 |
final String msg = String.format("missing value for param: \"%s\"", e.valueOf("@key")); |
|
98 |
log.debug(msg); |
|
99 |
return GraphNodeParameter.newNullParam(); |
|
100 |
} |
|
101 |
return gnp; |
|
102 |
})); |
|
85 |
if (node != null) { |
|
86 |
for (final Object o : node.selectNodes(".//PARAM")) { |
|
87 |
final Element p = (Element) o; |
|
103 | 88 |
|
104 |
params.put(pName, GraphNodeParameter.newMapParam(map)); |
|
89 |
final String pName = p.valueOf("@name"); |
|
90 |
final GraphNodeParameter pValue = calculateSimpleValue((Element) o, globalParams); |
|
105 | 91 |
|
106 |
} else if (p.selectSingleNode("./LIST") != null) { |
|
107 |
@SuppressWarnings("unchecked") |
|
108 |
final List<GraphNodeParameter> list = ((List<Element>) p.selectNodes("./LIST/ITEM")) |
|
109 |
.stream() |
|
110 |
.map(e -> calculateSimpleValue(e, globalParams)) |
|
111 |
.collect(Collectors.toList()); |
|
112 |
params.put(pName, GraphNodeParameter.newListParam(list)); |
|
113 |
} |
|
114 |
} |
|
115 |
} |
|
92 |
if (pValue != null) { |
|
93 |
params.put(pName, pValue); |
|
94 |
} else if (p.selectSingleNode("./MAP") != null) { |
|
116 | 95 |
|
117 |
return params; |
|
118 |
} |
|
96 |
@SuppressWarnings("unchecked") final Map<String, GraphNodeParameter> map = ((List<Element>) p.selectNodes("./MAP/ENTRY")) |
|
97 |
.stream() |
|
98 |
.collect(Collectors.toMap( |
|
99 |
e -> e.valueOf("@key"), |
|
100 |
e -> { |
|
101 |
final GraphNodeParameter gnp = calculateSimpleValue(e, globalParams); |
|
102 |
if (gnp == null) { |
|
103 |
final String msg = String.format("missing value for param: \"%s\"", e.valueOf("@key")); |
|
104 |
log.debug(msg); |
|
105 |
return GraphNodeParameter.newNullParam(); |
|
106 |
} |
|
107 |
return gnp; |
|
108 |
})); |
|
119 | 109 |
|
120 |
private GraphNodeParameter calculateSimpleValue(final Element elem, final Map<String, String> globalParams) { |
|
121 |
final String value = elem.valueOf("@value"); |
|
122 |
final String ref = elem.valueOf("@ref"); |
|
123 |
final String prop = elem.valueOf("@property"); |
|
124 |
final String envRef = elem.valueOf("@env"); |
|
110 |
params.put(pName, GraphNodeParameter.newMapParam(map)); |
|
125 | 111 |
|
126 |
if (StringUtils.isNotBlank(ref) && StringUtils.isNotBlank(globalParams.get(ref))) { |
|
127 |
return GraphNodeParameter.newSimpleParam(globalParams.get(ref)); |
|
128 |
} else if (StringUtils.isNotBlank(envRef)) { |
|
129 |
return GraphNodeParameter.newEnvParam(envRef); |
|
130 |
} else if (StringUtils.isNotBlank(value)) { |
|
131 |
return GraphNodeParameter.newSimpleParam(value); |
|
132 |
} else if (StringUtils.isNotBlank(prop)) { |
|
133 |
return GraphNodeParameter.newSimpleParam(this.propertyFetcher.getProperty(prop)); |
|
134 |
} else { |
|
135 |
return null; |
|
136 |
} |
|
137 |
} |
|
112 |
} else if (p.selectSingleNode("./LIST") != null) { |
|
113 |
@SuppressWarnings("unchecked") final List<GraphNodeParameter> list = ((List<Element>) p.selectNodes("./LIST/ITEM")) |
|
114 |
.stream() |
|
115 |
.map(e -> calculateSimpleValue(e, globalParams)) |
|
116 |
.collect(Collectors.toList()); |
|
117 |
params.put(pName, GraphNodeParameter.newListParam(list)); |
|
118 |
} |
|
119 |
} |
|
120 |
} |
|
138 | 121 |
|
139 |
private void checkValidity(final Graph graph) throws MSROException { |
|
122 |
return params; |
|
123 |
} |
|
140 | 124 |
|
141 |
final Set<String> nodesFromArcs = new HashSet<String>(); |
|
125 |
private GraphNodeParameter calculateSimpleValue(final Element elem, final Map<String, String> globalParams) { |
|
126 |
String value = elem.valueOf("@value"); |
|
127 |
final String ref = elem.valueOf("@ref"); |
|
128 |
final String prop = elem.valueOf("@property"); |
|
129 |
final String envRef = elem.valueOf("@env"); |
|
142 | 130 |
|
143 |
boolean foundSuccess = false; |
|
144 |
boolean foundStart = false; |
|
131 |
if (StringUtils.isNotBlank(ref) && StringUtils.isNotBlank(globalParams.get(ref))) { |
|
132 |
return GraphNodeParameter.newSimpleParam(globalParams.get(ref)); |
|
133 |
} else if (StringUtils.isNotBlank(envRef)) { |
|
134 |
return GraphNodeParameter.newEnvParam(envRef); |
|
135 |
} else if (StringUtils.isNotBlank(value)) { |
|
136 |
Matcher matcher = pattern.matcher(value); |
|
137 |
while (matcher.find()) { |
|
138 |
final String rName = matcher.group(1); |
|
139 |
final String rValue = globalParams.get(rName); |
|
140 |
if (StringUtils.isBlank(rValue)) { |
|
141 |
return null; |
|
142 |
} |
|
143 |
value = value.replaceAll(Pattern.quote(matcher.group(0)), rValue); |
|
144 |
System.out.println("NEW VALUE "+value); |
|
145 |
} |
|
146 |
return GraphNodeParameter.newSimpleParam(value); |
|
147 |
} else if (StringUtils.isNotBlank(prop)) { |
|
148 |
return GraphNodeParameter.newSimpleParam(this.propertyFetcher.getProperty(prop)); |
|
149 |
} else { |
|
150 |
return null; |
|
151 |
} |
|
145 | 152 |
|
146 |
for (final Arc arc : graph.getArcs()) { |
|
147 |
if (StringUtils.isBlank(arc.getFrom()) || StringUtils.isBlank(arc.getFrom())) { throw new MSROException("Invalid arc: missing from e/o to"); } |
|
148 |
if (StringUtils.equals(arc.getTo(), GraphNode.SUCCESS_NODE)) { |
|
149 |
foundSuccess = true; |
|
150 |
} |
|
151 |
nodesFromArcs.add(arc.getFrom()); |
|
152 |
nodesFromArcs.add(arc.getTo()); |
|
153 |
} |
|
153 |
} |
|
154 | 154 |
|
155 |
if (!foundSuccess) { throw new MSROException("Arc to success not found"); }
|
|
155 |
private void checkValidity(final Graph graph) throws MSROException {
|
|
156 | 156 |
|
157 |
final Set<String> diff = Sets.symmetricDifference(graph.nodeNames(), nodesFromArcs); |
|
158 |
if (!diff.isEmpty()) { throw new MSROException("Missing or invalid nodes in arcs: " + diff); } |
|
157 |
final Set<String> nodesFromArcs = new HashSet<String>(); |
|
159 | 158 |
|
160 |
for (final GraphNode n : graph.nodes()) { |
|
161 |
if (StringUtils.isBlank(n.getName())) { throw new MSROException("Invalid node: missing name"); } |
|
162 |
if (n.isStart()) { |
|
163 |
foundStart = true; |
|
164 |
} |
|
165 |
if (!this.nodeHelper.isValidType(n.getType())) { throw new MSROException("Invalid node type: " + n.getType()); } |
|
166 |
} |
|
167 |
if (!foundStart) { throw new MSROException("Start node not found"); } |
|
168 |
} |
|
159 |
boolean foundSuccess = false; |
|
160 |
boolean foundStart = false; |
|
169 | 161 |
|
170 |
public NodeHelper getNodeHelper() { |
|
171 |
return this.nodeHelper; |
|
172 |
} |
|
162 |
for (final Arc arc : graph.getArcs()) { |
|
163 |
if (StringUtils.isBlank(arc.getFrom()) || StringUtils.isBlank(arc.getFrom())) { |
|
164 |
throw new MSROException("Invalid arc: missing from e/o to"); |
|
165 |
} |
|
166 |
if (StringUtils.equals(arc.getTo(), GraphNode.SUCCESS_NODE)) { |
|
167 |
foundSuccess = true; |
|
168 |
} |
|
169 |
nodesFromArcs.add(arc.getFrom()); |
|
170 |
nodesFromArcs.add(arc.getTo()); |
|
171 |
} |
|
173 | 172 |
|
174 |
@Required |
|
175 |
public void setNodeHelper(final NodeHelper nodeHelper) { |
|
176 |
this.nodeHelper = nodeHelper; |
|
177 |
} |
|
173 |
if (!foundSuccess) { |
|
174 |
throw new MSROException("Arc to success not found"); |
|
175 |
} |
|
178 | 176 |
|
177 |
final Set<String> diff = Sets.symmetricDifference(graph.nodeNames(), nodesFromArcs); |
|
178 |
if (!diff.isEmpty()) { |
|
179 |
throw new MSROException("Missing or invalid nodes in arcs: " + diff); |
|
180 |
} |
|
181 |
|
|
182 |
for (final GraphNode n : graph.nodes()) { |
|
183 |
if (StringUtils.isBlank(n.getName())) { |
|
184 |
throw new MSROException("Invalid node: missing name"); |
|
185 |
} |
|
186 |
if (n.isStart()) { |
|
187 |
foundStart = true; |
|
188 |
} |
|
189 |
if (!this.nodeHelper.isValidType(n.getType())) { |
|
190 |
throw new MSROException("Invalid node type: " + n.getType()); |
|
191 |
} |
|
192 |
} |
|
193 |
if (!foundStart) { |
|
194 |
throw new MSROException("Start node not found"); |
|
195 |
} |
|
196 |
} |
|
197 |
|
|
198 |
public NodeHelper getNodeHelper() { |
|
199 |
return this.nodeHelper; |
|
200 |
} |
|
201 |
|
|
202 |
@Required |
|
203 |
public void setNodeHelper(final NodeHelper nodeHelper) { |
|
204 |
this.nodeHelper = nodeHelper; |
|
205 |
} |
|
206 |
|
|
179 | 207 |
} |
Also available in: Unified diff
migration to spark