Revision 43601
Added by Claudio Atzori over 7 years ago
modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/PrepareIISParamsV2.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.openaireplus.workflows.nodes; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
import java.util.NoSuchElementException; |
|
5 |
import javax.annotation.Resource; |
|
6 |
|
|
7 |
import com.google.common.base.Joiner; |
|
8 |
import com.google.common.base.Splitter; |
|
9 |
import com.google.common.collect.Iterables; |
|
10 |
import com.google.common.collect.Lists; |
|
11 |
import com.googlecode.sarasvati.NodeToken; |
|
12 |
import eu.dnetlib.data.hadoop.config.ClusterName; |
|
13 |
import eu.dnetlib.data.hadoop.config.ConfigurationEnumerator; |
|
14 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; |
|
15 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
|
16 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
|
17 |
import eu.dnetlib.msro.rmi.MSROException; |
|
18 |
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode; |
|
19 |
import org.apache.commons.logging.Log; |
|
20 |
import org.apache.commons.logging.LogFactory; |
|
21 |
import org.apache.hadoop.conf.Configuration; |
|
22 |
import org.springframework.beans.factory.annotation.Required; |
|
23 |
|
|
24 |
public abstract class PrepareIISParamsV2 extends SimpleJobNode { |
|
25 |
|
|
26 |
private static final Log log = LogFactory.getLog(PrepareIISParamsV2.class); |
|
27 |
|
|
28 |
@Resource |
|
29 |
protected ConfigurationEnumerator configurationEnumerator; |
|
30 |
|
|
31 |
@Resource |
|
32 |
private UniqueServiceLocator serviceLocator; |
|
33 |
|
|
34 |
private String clusterName; |
|
35 |
|
|
36 |
private String clusterParam = "cluster"; |
|
37 |
|
|
38 |
private String oozieWfAppPath; |
|
39 |
|
|
40 |
private String oozieWfAppPathParam = "oozie.wf.application.path"; |
|
41 |
|
|
42 |
private String xqueryMdStoreService; |
|
43 |
|
|
44 |
private String mdStoreStoreLocationParam = "import_mdstore_service_location"; |
|
45 |
|
|
46 |
private String xqueryObjectStoreService; |
|
47 |
|
|
48 |
private String objectStoreLocationParam = "import_content_object_store_location"; |
|
49 |
|
|
50 |
private String xqueryIsLookupService; |
|
51 |
|
|
52 |
private String islookupLocationParam = "import_islookup_service_location"; |
|
53 |
|
|
54 |
private String importProjectConceptsContextIdParam = "import_project_concepts_context_id"; |
|
55 |
|
|
56 |
private String importProjectConceptsContextId; |
|
57 |
|
|
58 |
private String xqueryDatasetStore; |
|
59 |
|
|
60 |
private String mdStoreDatasetParam = "import_dataset_mdstore_ids_csv"; |
|
61 |
|
|
62 |
private String objectStoreBlacklistCSV = ""; |
|
63 |
|
|
64 |
private String importHbaseDumpLocationParam = "import_hbase_dump_location"; |
|
65 |
|
|
66 |
private String importHbaseDumpLocation; |
|
67 |
|
|
68 |
protected void prepare(final NodeToken token) throws Exception { |
|
69 |
|
|
70 |
token.getEnv().setAttribute(getClusterParam(), getClusterName()); |
|
71 |
|
|
72 |
// Assumes we only have one mdStore service instance |
|
73 |
token.getEnv().setAttribute(getMdStoreStoreLocationParam(), getServiceEndpoint(getXqueryMdStoreService())); |
|
74 |
// Assumes we only have one objectStore service instance |
|
75 |
token.getEnv().setAttribute(getObjectStoreLocationParam(), getServiceEndpoint(getXqueryObjectStoreService())); |
|
76 |
|
|
77 |
token.getEnv().setAttribute(getIslookupLocationParam(), getServiceEndpoint(getXqueryIsLookupService())); |
|
78 |
token.getEnv().setAttribute(getImportProjectConceptsContextIdParam(), getImportProjectConceptsContextId()); |
|
79 |
|
|
80 |
Configuration dmConf = configurationEnumerator.get(ClusterName.DM); |
|
81 |
String dmNameNode = dmConf.get("fs.defaultFS"); |
|
82 |
token.getEnv().setAttribute(getImportHbaseDumpLocationParam(), getURI(dmNameNode, getImportHbaseDumpLocation())); |
|
83 |
|
|
84 |
Configuration conf = configurationEnumerator.get(ClusterName.valueOf(getClusterName())); |
|
85 |
String nameNode = conf.get("fs.defaultFS"); |
|
86 |
|
|
87 |
token.getEnv().setAttribute("nameNode", nameNode); |
|
88 |
token.getEnv().setAttribute("jobTracker", conf.get("mapred.job.tracker")); |
|
89 |
token.getEnv().setAttribute(getOozieWfAppPathParam(), getURI(nameNode, getOozieWfAppPath())); |
|
90 |
token.getEnv().setAttribute(getMdStoreDatasetParam(), asCSV(getProfileIds(getXqueryDatasetStore()))); |
|
91 |
} |
|
92 |
|
|
93 |
protected String getServiceEndpoint(final String xquery) throws MSROException { |
|
94 |
try { |
|
95 |
return Iterables.getOnlyElement(serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xquery)); |
|
96 |
} catch (ISLookUpException e) { |
|
97 |
throw new MSROException("unable to fetch service endpoint", e); |
|
98 |
} catch (NoSuchElementException e) { |
|
99 |
throw new MSROException("unable to find service endpoint, xquery: " + getXqueryMdStoreService(), e); |
|
100 |
} catch (IllegalArgumentException e) { |
|
101 |
throw new MSROException("more than one services found, we assume to have only one available", e); |
|
102 |
} |
|
103 |
} |
|
104 |
|
|
105 |
protected String getProfileId(final String xquery) throws MSROException { |
|
106 |
try { |
|
107 |
return Iterables.getOnlyElement(serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xquery)); |
|
108 |
} catch (ISLookUpException e) { |
|
109 |
throw new MSROException("unable to fetch profile id", e); |
|
110 |
} catch (NoSuchElementException e) { |
|
111 |
throw new MSROException("unable to find profile profile, xquery: " + xquery, e); |
|
112 |
} catch (IllegalArgumentException e) { |
|
113 |
throw new MSROException("more than one profile profiles was found, we assume to have only one available, xquery: " + xquery, e); |
|
114 |
} |
|
115 |
} |
|
116 |
|
|
117 |
protected List<String> getProfileIds(final String xquery) throws MSROException { |
|
118 |
try { |
|
119 |
List<String> ids = serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xquery); |
|
120 |
|
|
121 |
if (ids.isEmpty()) { |
|
122 |
log.warn("couldn't find any profile, xquery: " + xquery); |
|
123 |
} |
|
124 |
|
|
125 |
return ids; |
|
126 |
} catch (ISLookUpException e) { |
|
127 |
throw new MSROException("unable to fetch profile ids, x query: " + xquery, e); |
|
128 |
} |
|
129 |
} |
|
130 |
|
|
131 |
protected String getFilteredObjectStoreCSV(final String xquery) throws MSROException { |
|
132 |
return asCSV(filter(getProfileIds(xquery), asList(getObjectStoreBlacklistCSV()))); |
|
133 |
} |
|
134 |
|
|
135 |
protected List<String> filter(final List<String> list, final List<String> filter) { |
|
136 |
if (filter == null || filter.isEmpty()) { return list; } |
|
137 |
list.removeAll(filter); |
|
138 |
return list; |
|
139 |
} |
|
140 |
|
|
141 |
protected String asCSV(final List<String> list) { |
|
142 |
return Joiner.on(",").skipNulls().join(list); |
|
143 |
} |
|
144 |
|
|
145 |
protected List<String> asList(final String csv) { |
|
146 |
return Lists.newArrayList(Splitter.on(",").trimResults().omitEmptyStrings().split(csv)); |
|
147 |
} |
|
148 |
|
|
149 |
private String getURI(final String nameNode, final String relative) { |
|
150 |
// TODO ensure to return a valid URI |
|
151 |
return nameNode + relative; |
|
152 |
} |
|
153 |
|
|
154 |
private String getZkQuorumCSV(final Configuration conf, final String zkPort) { |
|
155 |
return Joiner.on(":" + zkPort + ",").join(Splitter.on(",").omitEmptyStrings().split(conf.get("hbase.zookeeper.quorum"))); |
|
156 |
} |
|
157 |
|
|
158 |
@Required |
|
159 |
public void setXqueryMdStoreService(final String xqueryMdStoreService) { |
|
160 |
this.xqueryMdStoreService = xqueryMdStoreService; |
|
161 |
} |
|
162 |
|
|
163 |
public String getXqueryMdStoreService() { |
|
164 |
return xqueryMdStoreService; |
|
165 |
} |
|
166 |
|
|
167 |
public String getMdStoreStoreLocationParam() { |
|
168 |
return mdStoreStoreLocationParam; |
|
169 |
} |
|
170 |
|
|
171 |
public void setMdStoreStoreLocationParam(final String mdStoreStoreLocationParam) { |
|
172 |
this.mdStoreStoreLocationParam = mdStoreStoreLocationParam; |
|
173 |
} |
|
174 |
|
|
175 |
public String getClusterName() { |
|
176 |
return clusterName; |
|
177 |
} |
|
178 |
|
|
179 |
public void setClusterName(final String clusterName) { |
|
180 |
this.clusterName = clusterName; |
|
181 |
} |
|
182 |
|
|
183 |
public String getClusterParam() { |
|
184 |
return clusterParam; |
|
185 |
} |
|
186 |
|
|
187 |
public void setClusterParam(final String clusterParam) { |
|
188 |
this.clusterParam = clusterParam; |
|
189 |
} |
|
190 |
|
|
191 |
public String getOozieWfAppPathParam() { |
|
192 |
return oozieWfAppPathParam; |
|
193 |
} |
|
194 |
|
|
195 |
public void setOozieWfAppPathParam(final String oozieWfAppPathParam) { |
|
196 |
this.oozieWfAppPathParam = oozieWfAppPathParam; |
|
197 |
} |
|
198 |
|
|
199 |
public String getOozieWfAppPath() { |
|
200 |
return oozieWfAppPath; |
|
201 |
} |
|
202 |
|
|
203 |
public void setOozieWfAppPath(final String oozieWfAppPath) { |
|
204 |
this.oozieWfAppPath = oozieWfAppPath; |
|
205 |
} |
|
206 |
|
|
207 |
@Required |
|
208 |
public String getXqueryDatasetStore() { |
|
209 |
return xqueryDatasetStore; |
|
210 |
} |
|
211 |
|
|
212 |
public void setXqueryDatasetStore(final String xqueryDatasetStore) { |
|
213 |
this.xqueryDatasetStore = xqueryDatasetStore; |
|
214 |
} |
|
215 |
|
|
216 |
public String getMdStoreDatasetParam() { |
|
217 |
return mdStoreDatasetParam; |
|
218 |
} |
|
219 |
|
|
220 |
public void setMdStoreDatasetParam(final String mdStoreDatasetParam) { |
|
221 |
this.mdStoreDatasetParam = mdStoreDatasetParam; |
|
222 |
} |
|
223 |
|
|
224 |
public String getXqueryObjectStoreService() { |
|
225 |
return xqueryObjectStoreService; |
|
226 |
} |
|
227 |
|
|
228 |
@Required |
|
229 |
public void setXqueryObjectStoreService(final String xqueryObjectStoreService) { |
|
230 |
this.xqueryObjectStoreService = xqueryObjectStoreService; |
|
231 |
} |
|
232 |
|
|
233 |
public String getObjectStoreLocationParam() { |
|
234 |
return objectStoreLocationParam; |
|
235 |
} |
|
236 |
|
|
237 |
public void setObjectStoreLocationParam(final String objectStoreLocationParam) { |
|
238 |
this.objectStoreLocationParam = objectStoreLocationParam; |
|
239 |
} |
|
240 |
|
|
241 |
public String getObjectStoreBlacklistCSV() { |
|
242 |
return objectStoreBlacklistCSV; |
|
243 |
} |
|
244 |
|
|
245 |
public void setObjectStoreBlacklistCSV(final String objectStoreBlacklistCSV) { |
|
246 |
this.objectStoreBlacklistCSV = objectStoreBlacklistCSV; |
|
247 |
} |
|
248 |
|
|
249 |
public String getXqueryIsLookupService() { |
|
250 |
return xqueryIsLookupService; |
|
251 |
} |
|
252 |
|
|
253 |
@Required |
|
254 |
public void setXqueryIsLookupService(final String xqueryIsLookupService) { |
|
255 |
this.xqueryIsLookupService = xqueryIsLookupService; |
|
256 |
} |
|
257 |
|
|
258 |
public String getIslookupLocationParam() { |
|
259 |
return islookupLocationParam; |
|
260 |
} |
|
261 |
|
|
262 |
public void setIslookupLocationParam(final String islookupLocationParam) { |
|
263 |
this.islookupLocationParam = islookupLocationParam; |
|
264 |
} |
|
265 |
|
|
266 |
public String getImportProjectConceptsContextIdParam() { |
|
267 |
return importProjectConceptsContextIdParam; |
|
268 |
} |
|
269 |
|
|
270 |
public void setImportProjectConceptsContextIdParam(final String importProjectConceptsContextIdParam) { |
|
271 |
this.importProjectConceptsContextIdParam = importProjectConceptsContextIdParam; |
|
272 |
} |
|
273 |
|
|
274 |
public String getImportProjectConceptsContextId() { |
|
275 |
return importProjectConceptsContextId; |
|
276 |
} |
|
277 |
|
|
278 |
public void setImportProjectConceptsContextId(final String importProjectConceptsContextId) { |
|
279 |
this.importProjectConceptsContextId = importProjectConceptsContextId; |
|
280 |
} |
|
281 |
|
|
282 |
public String getImportHbaseDumpLocationParam() { |
|
283 |
return importHbaseDumpLocationParam; |
|
284 |
} |
|
285 |
|
|
286 |
public void setImportHbaseDumpLocationParam(final String importHbaseDumpLocationParam) { |
|
287 |
this.importHbaseDumpLocationParam = importHbaseDumpLocationParam; |
|
288 |
} |
|
289 |
|
|
290 |
public String getImportHbaseDumpLocation() { |
|
291 |
return importHbaseDumpLocation; |
|
292 |
} |
|
293 |
|
|
294 |
public void setImportHbaseDumpLocation(final String importHbaseDumpLocation) { |
|
295 |
this.importHbaseDumpLocation = importHbaseDumpLocation; |
|
296 |
} |
|
297 |
} |
modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/PrepareIISPreprocessingParamsV2JobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.openaireplus.workflows.nodes; |
|
2 |
|
|
3 |
import com.googlecode.sarasvati.Arc; |
|
4 |
import com.googlecode.sarasvati.NodeToken; |
|
5 |
import org.springframework.beans.factory.annotation.Required; |
|
6 |
|
|
7 |
public class PrepareIISPreprocessingParamsV2JobNode extends PrepareIISParamsV2 { |
|
8 |
|
|
9 |
private String xqueryWosMDStore; |
|
10 |
|
|
11 |
private String mdStoreWosParam = "import_wos_mdstore_id"; |
|
12 |
|
|
13 |
private String xqueryDatabaseService; |
|
14 |
|
|
15 |
private String databaseServiceLocationParam = "import_database_service_location"; |
|
16 |
|
|
17 |
private String xqueryDataciteObjectStore; |
|
18 |
|
|
19 |
private String dataciteObjectStoreParam = "import_content_datacite_objectstores_csv"; |
|
20 |
|
|
21 |
private String xqueryWosObjectStore; |
|
22 |
|
|
23 |
private String wosObjectStorePlaintextParam = "import_content_wos_plaintext_objectstores_csv"; |
|
24 |
|
|
25 |
@Override |
|
26 |
protected String execute(final NodeToken token) throws Exception { |
|
27 |
super.prepare(token); |
|
28 |
|
|
29 |
token.getEnv().setAttribute(getMdStoreWosParam(), getProfileId(getXqueryWosMDStore())); |
|
30 |
token.getEnv().setAttribute(getDatabaseServiceLocationParam(), getServiceEndpoint(getXqueryDatabaseService())); |
|
31 |
|
|
32 |
token.getEnv().setAttribute(getDataciteObjectStoreParam(), getFilteredObjectStoreCSV(getXqueryDataciteObjectStore())); |
|
33 |
token.getEnv().setAttribute(getWosObjectStorePlaintextParam(), getProfileId(getXqueryWosObjectStore())); |
|
34 |
|
|
35 |
return Arc.DEFAULT_ARC; |
|
36 |
} |
|
37 |
|
|
38 |
public String getXqueryWosMDStore() { |
|
39 |
return xqueryWosMDStore; |
|
40 |
} |
|
41 |
|
|
42 |
@Required |
|
43 |
public void setXqueryWosMDStore(final String xqueryWosMDStore) { |
|
44 |
this.xqueryWosMDStore = xqueryWosMDStore; |
|
45 |
} |
|
46 |
|
|
47 |
public String getMdStoreWosParam() { |
|
48 |
return mdStoreWosParam; |
|
49 |
} |
|
50 |
|
|
51 |
public void setMdStoreWosParam(final String mdStoreWosParam) { |
|
52 |
this.mdStoreWosParam = mdStoreWosParam; |
|
53 |
} |
|
54 |
|
|
55 |
public String getXqueryDatabaseService() { |
|
56 |
return xqueryDatabaseService; |
|
57 |
} |
|
58 |
|
|
59 |
@Required |
|
60 |
public void setXqueryDatabaseService(final String xqueryDatabaseService) { |
|
61 |
this.xqueryDatabaseService = xqueryDatabaseService; |
|
62 |
} |
|
63 |
|
|
64 |
public String getDatabaseServiceLocationParam() { |
|
65 |
return databaseServiceLocationParam; |
|
66 |
} |
|
67 |
|
|
68 |
public void setDatabaseServiceLocationParam(final String databaseServiceLocationParam) { |
|
69 |
this.databaseServiceLocationParam = databaseServiceLocationParam; |
|
70 |
} |
|
71 |
|
|
72 |
public String getWosObjectStorePlaintextParam() { |
|
73 |
return wosObjectStorePlaintextParam; |
|
74 |
} |
|
75 |
|
|
76 |
public void setWosObjectStorePlaintextParam(final String wosObjectStorePlaintextParam) { |
|
77 |
this.wosObjectStorePlaintextParam = wosObjectStorePlaintextParam; |
|
78 |
} |
|
79 |
|
|
80 |
public String getXqueryWosObjectStore() { |
|
81 |
return xqueryWosObjectStore; |
|
82 |
} |
|
83 |
|
|
84 |
@Required |
|
85 |
public void setXqueryWosObjectStore(final String xqueryWosObjectStore) { |
|
86 |
this.xqueryWosObjectStore = xqueryWosObjectStore; |
|
87 |
} |
|
88 |
|
|
89 |
public String getXqueryDataciteObjectStore() { |
|
90 |
return xqueryDataciteObjectStore; |
|
91 |
} |
|
92 |
|
|
93 |
@Required |
|
94 |
public void setXqueryDataciteObjectStore(final String xqueryDataciteObjectStore) { |
|
95 |
this.xqueryDataciteObjectStore = xqueryDataciteObjectStore; |
|
96 |
} |
|
97 |
|
|
98 |
public String getDataciteObjectStoreParam() { |
|
99 |
return dataciteObjectStoreParam; |
|
100 |
} |
|
101 |
|
|
102 |
public void setDataciteObjectStoreParam(final String dataciteObjectStoreParam) { |
|
103 |
this.dataciteObjectStoreParam = dataciteObjectStoreParam; |
|
104 |
} |
|
105 |
|
|
106 |
} |
modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/PrepareIISMainParamsV2JobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.openaireplus.workflows.nodes; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
import java.util.Map; |
|
5 |
|
|
6 |
import com.google.gson.Gson; |
|
7 |
import com.googlecode.sarasvati.Arc; |
|
8 |
import com.googlecode.sarasvati.NodeToken; |
|
9 |
import org.springframework.beans.factory.annotation.Required; |
|
10 |
|
|
11 |
public class PrepareIISMainParamsV2JobNode extends PrepareIISParamsV2 { |
|
12 |
|
|
13 |
private String xqueryObjectStores; |
|
14 |
|
|
15 |
private String objectStoreParam = "import_content_objectstores_csv"; |
|
16 |
|
|
17 |
// Enable/Disable inference modules |
|
18 |
|
|
19 |
@Override |
|
20 |
protected String execute(final NodeToken token) throws Exception { |
|
21 |
|
|
22 |
super.prepare(token); |
|
23 |
|
|
24 |
token.getEnv().setAttribute(getObjectStoreParam(), getFilteredObjectStoreCSV(getXqueryObjectStores())); |
|
25 |
|
|
26 |
@SuppressWarnings("unchecked") |
|
27 |
final List<Map<String, String>> sets = new Gson().fromJson(token.getEnv().getAttribute("sets"), List.class); |
|
28 |
for (Map<String, String> set : sets) { |
|
29 |
token.getEnv().setAttribute(set.get("enablingProperty"), set.get("enabled")); |
|
30 |
} |
|
31 |
|
|
32 |
return Arc.DEFAULT_ARC; |
|
33 |
} |
|
34 |
|
|
35 |
public String getXqueryObjectStores() { |
|
36 |
return xqueryObjectStores; |
|
37 |
} |
|
38 |
|
|
39 |
@Required |
|
40 |
public void setXqueryObjectStores(final String xqueryObjectStores) { |
|
41 |
this.xqueryObjectStores = xqueryObjectStores; |
|
42 |
} |
|
43 |
|
|
44 |
public String getObjectStoreParam() { |
|
45 |
return objectStoreParam; |
|
46 |
} |
|
47 |
|
|
48 |
public void setObjectStoreParam(final String objectStoreParam) { |
|
49 |
this.objectStoreParam = objectStoreParam; |
|
50 |
} |
|
51 |
|
|
52 |
} |
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/iis/iis.meta.xml | ||
---|---|---|
14 | 14 |
<ADMIN_EMAIL /> |
15 | 15 |
<CONFIGURATION status="EXECUTABLE"> |
16 | 16 |
<WORKFLOW id="4801c33c-66ca-4ab6-af64-aa812194ec66_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" name="IIS Preprocessing"> |
17 |
<WORKFLOW id="4801c33c-66ca-4ab6-af64-aa812194ec67_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" name="IIS Preprocessing V2"/> |
|
17 | 18 |
<WORKFLOW id="dc19a8d1-93a8-4ffb-9fed-950f4156eedb_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" name="IIS Main"/> |
19 |
<WORKFLOW id="dc19a8d1-93a8-4ffb-9fed-950f4156eedc_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" name="IIS Main V2"/> |
|
18 | 20 |
</WORKFLOW> |
19 | 21 |
</CONFIGURATION> |
20 | 22 |
<SCHEDULING enabled="false"> |
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/iis/iis-preprocessingV2.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> |
|
3 |
<HEADER> |
|
4 |
<RESOURCE_IDENTIFIER value="4801c33c-66ca-4ab6-af64-aa812194ec67_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/> |
|
5 |
<RESOURCE_TYPE value="WorkflowDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="WorkflowDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<WORKFLOW_NAME>IIS preprocessing V2</WORKFLOW_NAME> |
|
12 |
<WORKFLOW_TYPE>IIS</WORKFLOW_TYPE> |
|
13 |
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY> |
|
14 |
<CONFIGURATION start="manual"> |
|
15 |
<NODE name="start" isStart="true"> |
|
16 |
<DESCRIPTION>start</DESCRIPTION> |
|
17 |
<PARAMETERS/> |
|
18 |
<ARCS> |
|
19 |
<ARC to="prepareActionSets" /> |
|
20 |
<ARC to="prepareParameters" /> |
|
21 |
</ARCS> |
|
22 |
</NODE> |
|
23 |
<NODE name="prepareActionSets" type="PrepareActionSets"> |
|
24 |
<DESCRIPTION>prepare action sets</DESCRIPTION> |
|
25 |
<PARAMETERS> |
|
26 |
<PARAM required="true" type="string" name="sets" managedBy="system"> |
|
27 |
[ |
|
28 |
{ |
|
29 |
'set' : 'iis-referenced-projects-preprocessing', |
|
30 |
'jobProperty' : 'export_action_set_id_document_referencedProjects', |
|
31 |
'enablingProperty' : 'active_referencedProjects_export', |
|
32 |
'enabled' : 'true' |
|
33 |
}, |
|
34 |
{ |
|
35 |
'set' : 'iis-referenced-datasets-preprocessing', |
|
36 |
'jobProperty' : 'export_action_set_id_document_referencedDatasets', |
|
37 |
'enablingProperty' : 'active_referencedDatasets_export', |
|
38 |
'enabled' : 'true' |
|
39 |
}, |
|
40 |
{ |
|
41 |
'set' : 'iis-wos-entities', |
|
42 |
'jobProperty' : 'export_action_set_id_entity_wos', |
|
43 |
'enablingProperty' : 'active_entity_wos_export', |
|
44 |
'enabled' : 'true' |
|
45 |
}, |
|
46 |
{ |
|
47 |
'set' : 'iis-dataset-entities-preprocessing', |
|
48 |
'jobProperty' : 'export_action_set_id_entity_dataset', |
|
49 |
'enablingProperty' : 'active_entity_dataset_export', |
|
50 |
'enabled' : 'true' |
|
51 |
} |
|
52 |
] |
|
53 |
</PARAM> |
|
54 |
</PARAMETERS> |
|
55 |
<ARCS> |
|
56 |
<ARC to="preprocessing" /> |
|
57 |
</ARCS> |
|
58 |
</NODE> |
|
59 |
<NODE name="prepareParameters" type="PreparePreprocessParamsV2"> |
|
60 |
<DESCRIPTION>prepare parameters</DESCRIPTION> |
|
61 |
<PARAMETERS> |
|
62 |
<PARAM required="true" type="string" name="islookupLocationParam" managedBy="system">import_islookup_service_location</PARAM> |
|
63 |
<PARAM required="true" type="string" name="objectStoreLocationParam" managedBy="system">import_content_object_store_location</PARAM> |
|
64 |
<PARAM required="true" type="string" name="mdStoreStoreLocationParam" managedBy="system">import_mdstore_service_location</PARAM> |
|
65 |
<PARAM required="true" type="string" name="mdStoreDatasetParam" managedBy="system">import_dataset_mdstore_ids_csv</PARAM> |
|
66 |
<PARAM required="true" type="string" name="mdStoreWosParam" managedBy="system">import_wos_mdstore_id</PARAM> |
|
67 |
<PARAM required="true" type="string" name="databaseServiceLocationParam" managedBy="system">import_database_service_location</PARAM> |
|
68 |
<PARAM required="true" type="string" name="dataciteObjectStoreParam" managedBy="system">import_content_datacite_objectstores_csv</PARAM> |
|
69 |
<PARAM required="true" type="string" name="wosObjectStorePlaintextParam" managedBy="system">import_content_wos_plaintext_objectstores_csv</PARAM> |
|
70 |
<PARAM required="true" type="string" name="oozieWfAppPathParam" managedBy="system">oozie.wf.application.path</PARAM> |
|
71 |
<PARAM required="true" type="string" name="oozieWfAppPath" managedBy="user">/tmp/integration/apps/preprocessing</PARAM> |
|
72 |
<PARAM required="true" type="string" name="clusterName" managedBy="user" function="validValues(['IIS','DM'])">DM</PARAM> |
|
73 |
<PARAM required="false" type="string" name="objectStoreBlacklistCSV" managedBy="user"></PARAM> |
|
74 |
<PARAM required="true" type="string" name="importProjectConceptsContextIdParam" managedBy="system">import_project_concepts_context_id</PARAM> |
|
75 |
<PARAM required="true" type="string" name="importProjectConceptsContextId" managedBy="user">fet-fp7</PARAM> |
|
76 |
</PARAMETERS> |
|
77 |
<ARCS> |
|
78 |
<ARC to="preprocessing" /> |
|
79 |
</ARCS> |
|
80 |
</NODE> |
|
81 |
<NODE name="preprocessing" type="SubmitHadoopJob" isJoin="true"> |
|
82 |
<DESCRIPTION>IIS preprocessing</DESCRIPTION> |
|
83 |
<PARAMETERS> |
|
84 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">iisPreprocessingJob</PARAM> |
|
85 |
<PARAM required="true" type="string" name="envParams" managedBy="system"> |
|
86 |
{ |
|
87 |
'cluster' : 'cluster', |
|
88 |
'nameNode' : 'nameNode', |
|
89 |
'jobTracker' : 'jobTracker', |
|
90 |
'oozie.wf.application.path' : 'oozie.wf.application.path', |
|
91 |
'import_content_object_store_location' : 'import_content_object_store_location', |
|
92 |
'import_mdstore_service_location' : 'import_mdstore_service_location', |
|
93 |
'import_islookup_service_location' : 'import_islookup_service_location', |
|
94 |
'import_project_concepts_context_id' : 'import_project_concepts_context_id', |
|
95 |
'import_dataset_mdstore_ids_csv' : 'import_dataset_mdstore_ids_csv', |
|
96 |
'import_wos_mdstore_id' : 'import_wos_mdstore_id', |
|
97 |
'import_database_service_location' : 'import_database_service_location', |
|
98 |
'import_content_datacite_objectstores_csv' : 'import_content_datacite_objectstores_csv', |
|
99 |
'import_content_wos_plaintext_objectstores_csv' : 'import_content_wos_plaintext_objectstores_csv', |
|
100 |
'export_action_set_id_document_referencedProjects' : 'export_action_set_id_document_referencedProjects', |
|
101 |
'export_action_set_id_document_referencedDatasets' : 'export_action_set_id_document_referencedDatasets', |
|
102 |
'export_action_set_id_entity_wos' : 'export_action_set_id_entity_wos', |
|
103 |
'export_action_set_id_entity_dataset' : 'export_action_set_id_entity_dataset', |
|
104 |
'output_remote_location' : 'actionManagerBasePath' |
|
105 |
} |
|
106 |
</PARAM> |
|
107 |
<PARAM required="true" type="string" name="sysParams" managedBy="system"> |
|
108 |
{ |
|
109 |
'export_action_hbase_table_name' : 'hbase.actions.table', |
|
110 |
'import_database_dbname' : 'dnet.openaire.db.name' |
|
111 |
} |
|
112 |
</PARAM> |
|
113 |
<PARAM required="true" type="boolean" name="simulation" managedBy="user">false</PARAM> |
|
114 |
</PARAMETERS> |
|
115 |
<ARCS> |
|
116 |
<ARC to="updateActionSets" /> |
|
117 |
</ARCS> |
|
118 |
</NODE> |
|
119 |
<NODE name="updateActionSets" type="UpdateActionSets"> |
|
120 |
<DESCRIPTION>update action sets</DESCRIPTION> |
|
121 |
<PARAMETERS/> |
|
122 |
<ARCS> |
|
123 |
<ARC to="success" /> |
|
124 |
</ARCS> |
|
125 |
</NODE> |
|
126 |
</CONFIGURATION> |
|
127 |
<STATUS /> |
|
128 |
</BODY> |
|
129 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/iis/iis-mainV2.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> |
|
3 |
<HEADER> |
|
4 |
<RESOURCE_IDENTIFIER |
|
5 |
value="dc19a8d1-93a8-4ffb-9fed-950f4156eedc_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" /> |
|
6 |
<RESOURCE_TYPE value="WorkflowDSResourceType" /> |
|
7 |
<RESOURCE_KIND value="WorkflowDSResources" /> |
|
8 |
<RESOURCE_URI value="" /> |
|
9 |
<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z" /> |
|
10 |
</HEADER> |
|
11 |
<BODY> |
|
12 |
<WORKFLOW_NAME>IIS main workflow V2</WORKFLOW_NAME> |
|
13 |
<WORKFLOW_TYPE>IIS</WORKFLOW_TYPE> |
|
14 |
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY> |
|
15 |
<CONFIGURATION start="manual"> |
|
16 |
<NODE name="start" isStart="true"> |
|
17 |
<DESCRIPTION>start</DESCRIPTION> |
|
18 |
<PARAMETERS /> |
|
19 |
<ARCS> |
|
20 |
<ARC to="prepareActionSets" /> |
|
21 |
</ARCS> |
|
22 |
</NODE> |
|
23 |
<NODE name="prepareActionSets" type="PrepareActionSets"> |
|
24 |
<DESCRIPTION>prepare action sets</DESCRIPTION> |
|
25 |
<PARAMETERS> |
|
26 |
<PARAM required="true" type="string" name="sets" managedBy="system"> |
|
27 |
[ |
|
28 |
{ |
|
29 |
'set' : 'iis-document-affiliation', |
|
30 |
'jobProperty' : 'export_action_set_id_affiliation_matching', |
|
31 |
'enablingProperty' : 'active_document_affiliation', |
|
32 |
'enabled' : 'true' |
|
33 |
}, |
|
34 |
{ |
|
35 |
'set' : 'iis-referenced-projects-main', |
|
36 |
'jobProperty' : 'export_action_set_id_document_referencedProjects', |
|
37 |
'enablingProperty' : 'active_referenceextraction_project', |
|
38 |
'enabled' : 'true' |
|
39 |
}, |
|
40 |
{ |
|
41 |
'set' : 'iis-referenced-datasets-main', |
|
42 |
'jobProperty' : 'export_action_set_id_document_referencedDatasets', |
|
43 |
'enablingProperty' : 'active_referenceextraction_dataset', |
|
44 |
'enabled' : 'true' |
|
45 |
}, |
|
46 |
{ |
|
47 |
'set' : 'iis-dataset-entities-main', |
|
48 |
'jobProperty' : 'export_action_set_id_entity_dataset', |
|
49 |
'enablingProperty' : 'active_referenceextraction_dataset', |
|
50 |
'enabled' : 'true' |
|
51 |
}, |
|
52 |
{ |
|
53 |
'set' : 'iis-researchinitiative', |
|
54 |
'jobProperty' : 'export_action_set_id_document_research_initiative', |
|
55 |
'enablingProperty' : |
|
56 |
'active_referenceextraction_researchinitiative', |
|
57 |
'enabled' : 'true' |
|
58 |
}, |
|
59 |
{ |
|
60 |
'set' : 'iis-document-similarities', |
|
61 |
'jobProperty' : 'export_action_set_id_document_similarities_standard', |
|
62 |
'enablingProperty' : 'active_documentssimilarity', |
|
63 |
'enabled' : 'true' |
|
64 |
}, |
|
65 |
{ |
|
66 |
'set' : 'iis-document-classes', |
|
67 |
'jobProperty' : 'export_action_set_id_document_classes', |
|
68 |
'enablingProperty' : 'active_documentsclassification', |
|
69 |
'enabled' : 'true' |
|
70 |
}, |
|
71 |
{ |
|
72 |
'set' : 'iis-document-citations', |
|
73 |
'jobProperty' : 'export_action_set_id_document_referencedDocuments', |
|
74 |
'enablingProperty' : 'active_citationmatching', |
|
75 |
'enabled' : 'true' |
|
76 |
}, |
|
77 |
{ |
|
78 |
'set' : 'iis-referenceextraction-pdb', |
|
79 |
'jobProperty' : 'export_action_set_id_document_pdb', |
|
80 |
'enablingProperty' : 'active_referenceextraction_pdb', |
|
81 |
'enabled' : 'true' |
|
82 |
}, |
|
83 |
{ |
|
84 |
'set' : 'iis-referenceextraction-software', |
|
85 |
'jobProperty' : 'export_action_set_id_document_software_url', |
|
86 |
'enablingProperty' : 'active_referenceextraction_software_url', |
|
87 |
'enabled' : 'true' |
|
88 |
} |
|
89 |
] |
|
90 |
</PARAM> |
|
91 |
</PARAMETERS> |
|
92 |
<ARCS> |
|
93 |
<ARC to="prepareParameters" /> |
|
94 |
</ARCS> |
|
95 |
</NODE> |
|
96 |
<NODE name="prepareParameters" type="PrepareIisMainParamsV2"> |
|
97 |
<DESCRIPTION>prepare parameters</DESCRIPTION> |
|
98 |
<PARAMETERS> |
|
99 |
<PARAM required="true" type="string" name="islookupLocationParam" managedBy="system">import_islookup_service_location</PARAM> |
|
100 |
<PARAM required="true" type="string" name="objectStoreParam" managedBy="system">import_content_objectstores_csv</PARAM> |
|
101 |
<PARAM required="true" type="string" name="objectStoreLocationParam" managedBy="system">import_content_object_store_location</PARAM> |
|
102 |
<PARAM required="true" type="string" name="mdStoreStoreLocationParam" managedBy="system">import_mdstore_service_location</PARAM> |
|
103 |
<PARAM required="true" type="string" name="mdStoreDatasetParam" managedBy="system">import_dataset_mdstore_ids_csv</PARAM> |
|
104 |
<PARAM required="true" type="string" name="oozieWfAppPathParam" managedBy="system">oozie.wf.application.path</PARAM> |
|
105 |
<PARAM required="true" type="string" name="oozieWfAppPath" managedBy="user">/tmp/integration/apps/main</PARAM> |
|
106 |
<PARAM required="true" type="string" name="clusterName" managedBy="user" function="validValues(['IIS','DM'])">IIS</PARAM> |
|
107 |
<PARAM required="true" type="string" name="importHbaseDumpLocation" managedBy="user"></PARAM> |
|
108 |
<PARAM required="true" type="string" name="importHbaseDumpLocationParam" managedBy="system">import_hbase_dump_location</PARAM> |
|
109 |
<PARAM required="false" type="string" name="objectStoreBlacklistCSV" managedBy="user"></PARAM> |
|
110 |
<PARAM required="true" type="string" name="importProjectConceptsContextIdParam" managedBy="system">import_project_concepts_context_id</PARAM> |
|
111 |
<PARAM required="true" type="string" name="importProjectConceptsContextId" managedBy="user">fet-fp7</PARAM> |
|
112 |
</PARAMETERS> |
|
113 |
<ARCS> |
|
114 |
<ARC to="main" /> |
|
115 |
</ARCS> |
|
116 |
</NODE> |
|
117 |
<NODE name="main" type="SubmitHadoopJob" isJoin="true"> |
|
118 |
<DESCRIPTION>IIS main</DESCRIPTION> |
|
119 |
<PARAMETERS> |
|
120 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">iisMainJob</PARAM> |
|
121 |
<PARAM required="true" type="string" name="envParams" managedBy="system"> |
|
122 |
{ |
|
123 |
'cluster' : 'cluster', |
|
124 |
'nameNode' : 'nameNode', |
|
125 |
'jobTracker' : 'jobTracker', |
|
126 |
'oozie.wf.application.path' : 'oozie.wf.application.path', |
|
127 |
|
|
128 |
'active_document_affiliation' : 'active_document_affiliation', |
|
129 |
'active_referenceextraction_project' : 'active_referenceextraction_project', |
|
130 |
'active_referenceextraction_dataset' : 'active_referenceextraction_dataset', |
|
131 |
'active_referenceextraction_researchinitiative' : 'active_referenceextraction_researchinitiative', |
|
132 |
'active_documentsclassification' : 'active_documentsclassification', |
|
133 |
'active_documentssimilarity' : 'active_documentssimilarity', |
|
134 |
'active_citationmatching' : 'active_citationmatching', |
|
135 |
'active_referenceextraction_pdb' : 'active_referenceextraction_pdb', |
|
136 |
'active_referenceextraction_software_url' : 'active_referenceextraction_software_url', |
|
137 |
|
|
138 |
'import_content_objectstores_csv' : 'import_content_objectstores_csv', |
|
139 |
'import_content_object_store_location' : 'import_content_object_store_location', |
|
140 |
'import_mdstore_service_location' : 'import_mdstore_service_location', |
|
141 |
'import_islookup_service_location' : 'import_islookup_service_location', |
|
142 |
'import_project_concepts_context_id' : 'import_project_concepts_context_id', |
|
143 |
'import_dataset_mdstore_ids_csv' : 'import_dataset_mdstore_ids_csv', |
|
144 |
|
|
145 |
'export_action_set_id_affiliation_matching' : 'export_action_set_id_affiliation_matching', |
|
146 |
'export_action_set_id_document_referencedDatasets' : 'export_action_set_id_document_referencedDatasets', |
|
147 |
'export_action_set_id_document_referencedProjects' : 'export_action_set_id_document_referencedProjects', |
|
148 |
'export_action_set_id_document_research_initiative' : 'export_action_set_id_document_research_initiative', |
|
149 |
'export_action_set_id_document_similarities_standard' : 'export_action_set_id_document_similarities_standard', |
|
150 |
|
|
151 |
'export_action_set_id_document_classes' : 'export_action_set_id_document_classes', |
|
152 |
'export_action_set_id_document_referencedDocuments' : 'export_action_set_id_document_referencedDocuments', |
|
153 |
'export_action_set_id_entity_dataset' : 'export_action_set_id_entity_dataset', |
|
154 |
'export_action_set_id_document_pdb' : 'export_action_set_id_document_pdb', |
|
155 |
'export_action_set_id_document_software_url' : 'export_action_set_id_document_software_url', |
|
156 |
|
|
157 |
'output_remote_location' : 'actionManagerBasePath' |
|
158 |
} |
|
159 |
</PARAM> |
|
160 |
<PARAM required="true" type="boolean" name="simulation" managedBy="user">false</PARAM> |
|
161 |
</PARAMETERS> |
|
162 |
<ARCS> |
|
163 |
<ARC to="updateActionSets" /> |
|
164 |
</ARCS> |
|
165 |
</NODE> |
|
166 |
<NODE name="updateActionSets" type="UpdateActionSets"> |
|
167 |
<DESCRIPTION>update action sets</DESCRIPTION> |
|
168 |
<PARAMETERS /> |
|
169 |
<ARCS> |
|
170 |
<ARC to="success" /> |
|
171 |
</ARCS> |
|
172 |
</NODE> |
|
173 |
</CONFIGURATION> |
|
174 |
<STATUS /> |
|
175 |
</BODY> |
|
176 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/applicationContext-msro-openaire-nodes.xml | ||
---|---|---|
165 | 165 |
p:xqueryObjectStoreService="${dnet.openaire.iis.objecstore.endpoint.xquery}" |
166 | 166 |
p:xqueryIsLookupService="${dnet.openaire.iis.islookup.endpoint.xquery}"/> |
167 | 167 |
|
168 |
<bean id="wfNodePrepareIisMainParamsV2" |
|
169 |
class="eu.dnetlib.msro.openaireplus.workflows.nodes.PrepareIISMainParamsV2JobNode" |
|
170 |
scope="prototype" p:xqueryMdStoreService="${dnet.openaire.iis.mdstore.endpoint.xquery}" |
|
171 |
p:xqueryDatasetStore="${dnet.openaire.iis.mdstore.dataset.xquery}" |
|
172 |
p:xqueryObjectStores="${dnet.openaire.iis.objecstores.xquery}" |
|
173 |
p:xqueryObjectStoreService="${dnet.openaire.iis.objecstore.endpoint.xquery}" |
|
174 |
p:xqueryIsLookupService="${dnet.openaire.iis.islookup.endpoint.xquery}"/> |
|
175 |
|
|
168 | 176 |
<bean id="wfNodePreparePreprocessParams" |
169 | 177 |
class="eu.dnetlib.msro.openaireplus.workflows.nodes.PrepareIISPreprocessingParamsJobNode" |
170 | 178 |
scope="prototype" p:xqueryDatabaseService="${dnet.openaire.iis.dbservice.endpoint.xquery}" |
... | ... | |
176 | 184 |
p:xqueryWosObjectStore="${dnet.openaire.iis.objecstores.wos.xquery}" |
177 | 185 |
p:xqueryIsLookupService="${dnet.openaire.iis.islookup.endpoint.xquery}"/> |
178 | 186 |
|
187 |
<bean id="wfNodePreparePreprocessParamsV2" |
|
188 |
class="eu.dnetlib.msro.openaireplus.workflows.nodes.PrepareIISPreprocessingParamsV2JobNode" |
|
189 |
scope="prototype" p:xqueryDatabaseService="${dnet.openaire.iis.dbservice.endpoint.xquery}" |
|
190 |
p:xqueryMdStoreService="${dnet.openaire.iis.mdstore.endpoint.xquery}" |
|
191 |
p:xqueryObjectStoreService="${dnet.openaire.iis.objecstore.endpoint.xquery}" |
|
192 |
p:xqueryDatasetStore="${dnet.openaire.iis.mdstore.dataset.xquery}" |
|
193 |
p:xqueryWosMDStore="${dnet.openaire.iis.mdstore.wos.xquery}" |
|
194 |
p:xqueryDataciteObjectStore="${dnet.openaire.iis.objecstores.dataset.xquery}" |
|
195 |
p:xqueryWosObjectStore="${dnet.openaire.iis.objecstores.wos.xquery}" |
|
196 |
p:xqueryIsLookupService="${dnet.openaire.iis.islookup.endpoint.xquery}"/> |
|
197 |
|
|
179 | 198 |
<bean id="wfNodeIISCacheBuilder" |
180 | 199 |
class="eu.dnetlib.msro.openaireplus.workflows.nodes.IISCacheBuilderJobNode" |
181 | 200 |
scope="prototype" p:xqueryObjectStoreService="${dnet.openaire.iis.objecstore.endpoint.xquery}" /> |
Also available in: Unified diff
introduced iisCacheBuilder and CDH5 specific inference workflows