Project

General

Profile

« Previous | Next » 

Revision 43601

introduced iisCacheBuilder and CDH5 specific inference workflows

View differences:

modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/PrepareIISParamsV2.java
1
package eu.dnetlib.msro.openaireplus.workflows.nodes;
2

  
3
import java.util.List;
4
import java.util.NoSuchElementException;
5
import javax.annotation.Resource;
6

  
7
import com.google.common.base.Joiner;
8
import com.google.common.base.Splitter;
9
import com.google.common.collect.Iterables;
10
import com.google.common.collect.Lists;
11
import com.googlecode.sarasvati.NodeToken;
12
import eu.dnetlib.data.hadoop.config.ClusterName;
13
import eu.dnetlib.data.hadoop.config.ConfigurationEnumerator;
14
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
15
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
16
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
17
import eu.dnetlib.msro.rmi.MSROException;
18
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
19
import org.apache.commons.logging.Log;
20
import org.apache.commons.logging.LogFactory;
21
import org.apache.hadoop.conf.Configuration;
22
import org.springframework.beans.factory.annotation.Required;
23

  
24
public abstract class PrepareIISParamsV2 extends SimpleJobNode {
25

  
26
	private static final Log log = LogFactory.getLog(PrepareIISParamsV2.class);
27

  
28
	@Resource
29
	protected ConfigurationEnumerator configurationEnumerator;
30

  
31
	@Resource
32
	private UniqueServiceLocator serviceLocator;
33

  
34
	private String clusterName;
35

  
36
	private String clusterParam = "cluster";
37

  
38
	private String oozieWfAppPath;
39

  
40
	private String oozieWfAppPathParam = "oozie.wf.application.path";
41

  
42
	private String xqueryMdStoreService;
43

  
44
	private String mdStoreStoreLocationParam = "import_mdstore_service_location";
45

  
46
	private String xqueryObjectStoreService;
47

  
48
	private String objectStoreLocationParam = "import_content_object_store_location";
49

  
50
	private String xqueryIsLookupService;
51

  
52
	private String islookupLocationParam = "import_islookup_service_location";
53

  
54
	private String importProjectConceptsContextIdParam = "import_project_concepts_context_id";
55

  
56
	private String importProjectConceptsContextId;
57

  
58
	private String xqueryDatasetStore;
59

  
60
	private String mdStoreDatasetParam = "import_dataset_mdstore_ids_csv";
61

  
62
	private String objectStoreBlacklistCSV = "";
63

  
64
	private String importHbaseDumpLocationParam = "import_hbase_dump_location";
65

  
66
	private String importHbaseDumpLocation;
67

  
68
	protected void prepare(final NodeToken token) throws Exception {
69

  
70
		token.getEnv().setAttribute(getClusterParam(), getClusterName());
71

  
72
		// Assumes we only have one mdStore service instance
73
		token.getEnv().setAttribute(getMdStoreStoreLocationParam(), getServiceEndpoint(getXqueryMdStoreService()));
74
		// Assumes we only have one objectStore service instance
75
		token.getEnv().setAttribute(getObjectStoreLocationParam(), getServiceEndpoint(getXqueryObjectStoreService()));
76

  
77
		token.getEnv().setAttribute(getIslookupLocationParam(), getServiceEndpoint(getXqueryIsLookupService()));
78
		token.getEnv().setAttribute(getImportProjectConceptsContextIdParam(), getImportProjectConceptsContextId());
79

  
80
		Configuration dmConf = configurationEnumerator.get(ClusterName.DM);
81
		String dmNameNode = dmConf.get("fs.defaultFS");
82
		token.getEnv().setAttribute(getImportHbaseDumpLocationParam(), getURI(dmNameNode, getImportHbaseDumpLocation()));
83

  
84
		Configuration conf = configurationEnumerator.get(ClusterName.valueOf(getClusterName()));
85
		String nameNode = conf.get("fs.defaultFS");
86

  
87
		token.getEnv().setAttribute("nameNode", nameNode);
88
		token.getEnv().setAttribute("jobTracker", conf.get("mapred.job.tracker"));
89
		token.getEnv().setAttribute(getOozieWfAppPathParam(), getURI(nameNode, getOozieWfAppPath()));
90
		token.getEnv().setAttribute(getMdStoreDatasetParam(), asCSV(getProfileIds(getXqueryDatasetStore())));
91
	}
92

  
93
	protected String getServiceEndpoint(final String xquery) throws MSROException {
94
		try {
95
			return Iterables.getOnlyElement(serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xquery));
96
		} catch (ISLookUpException e) {
97
			throw new MSROException("unable to fetch service endpoint", e);
98
		} catch (NoSuchElementException e) {
99
			throw new MSROException("unable to find service endpoint, xquery: " + getXqueryMdStoreService(), e);
100
		} catch (IllegalArgumentException e) {
101
			throw new MSROException("more than one services found, we assume to have only one available", e);
102
		}
103
	}
104

  
105
	protected String getProfileId(final String xquery) throws MSROException {
106
		try {
107
			return Iterables.getOnlyElement(serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xquery));
108
		} catch (ISLookUpException e) {
109
			throw new MSROException("unable to fetch profile id", e);
110
		} catch (NoSuchElementException e) {
111
			throw new MSROException("unable to find profile profile, xquery: " + xquery, e);
112
		} catch (IllegalArgumentException e) {
113
			throw new MSROException("more than one profile profiles was found, we assume to have only one available, xquery: " + xquery, e);
114
		}
115
	}
116

  
117
	protected List<String> getProfileIds(final String xquery) throws MSROException {
118
		try {
119
			List<String> ids = serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xquery);
120

  
121
			if (ids.isEmpty()) {
122
				log.warn("couldn't find any profile, xquery: " + xquery);
123
			}
124

  
125
			return ids;
126
		} catch (ISLookUpException e) {
127
			throw new MSROException("unable to fetch profile ids, x query: " + xquery, e);
128
		}
129
	}
130

  
131
	protected String getFilteredObjectStoreCSV(final String xquery) throws MSROException {
132
		return asCSV(filter(getProfileIds(xquery), asList(getObjectStoreBlacklistCSV())));
133
	}
134

  
135
	protected List<String> filter(final List<String> list, final List<String> filter) {
136
		if (filter == null || filter.isEmpty()) { return list; }
137
		list.removeAll(filter);
138
		return list;
139
	}
140

  
141
	protected String asCSV(final List<String> list) {
142
		return Joiner.on(",").skipNulls().join(list);
143
	}
144

  
145
	protected List<String> asList(final String csv) {
146
		return Lists.newArrayList(Splitter.on(",").trimResults().omitEmptyStrings().split(csv));
147
	}
148

  
149
	private String getURI(final String nameNode, final String relative) {
150
		// TODO ensure to return a valid URI
151
		return nameNode + relative;
152
	}
153

  
154
	private String getZkQuorumCSV(final Configuration conf, final String zkPort) {
155
		return Joiner.on(":" + zkPort + ",").join(Splitter.on(",").omitEmptyStrings().split(conf.get("hbase.zookeeper.quorum")));
156
	}
157

  
158
	@Required
159
	public void setXqueryMdStoreService(final String xqueryMdStoreService) {
160
		this.xqueryMdStoreService = xqueryMdStoreService;
161
	}
162

  
163
	public String getXqueryMdStoreService() {
164
		return xqueryMdStoreService;
165
	}
166

  
167
	public String getMdStoreStoreLocationParam() {
168
		return mdStoreStoreLocationParam;
169
	}
170

  
171
	public void setMdStoreStoreLocationParam(final String mdStoreStoreLocationParam) {
172
		this.mdStoreStoreLocationParam = mdStoreStoreLocationParam;
173
	}
174

  
175
	public String getClusterName() {
176
		return clusterName;
177
	}
178

  
179
	public void setClusterName(final String clusterName) {
180
		this.clusterName = clusterName;
181
	}
182

  
183
	public String getClusterParam() {
184
		return clusterParam;
185
	}
186

  
187
	public void setClusterParam(final String clusterParam) {
188
		this.clusterParam = clusterParam;
189
	}
190

  
191
	public String getOozieWfAppPathParam() {
192
		return oozieWfAppPathParam;
193
	}
194

  
195
	public void setOozieWfAppPathParam(final String oozieWfAppPathParam) {
196
		this.oozieWfAppPathParam = oozieWfAppPathParam;
197
	}
198

  
199
	public String getOozieWfAppPath() {
200
		return oozieWfAppPath;
201
	}
202

  
203
	public void setOozieWfAppPath(final String oozieWfAppPath) {
204
		this.oozieWfAppPath = oozieWfAppPath;
205
	}
206

  
207
	@Required
208
	public String getXqueryDatasetStore() {
209
		return xqueryDatasetStore;
210
	}
211

  
212
	public void setXqueryDatasetStore(final String xqueryDatasetStore) {
213
		this.xqueryDatasetStore = xqueryDatasetStore;
214
	}
215

  
216
	public String getMdStoreDatasetParam() {
217
		return mdStoreDatasetParam;
218
	}
219

  
220
	public void setMdStoreDatasetParam(final String mdStoreDatasetParam) {
221
		this.mdStoreDatasetParam = mdStoreDatasetParam;
222
	}
223

  
224
	public String getXqueryObjectStoreService() {
225
		return xqueryObjectStoreService;
226
	}
227

  
228
	@Required
229
	public void setXqueryObjectStoreService(final String xqueryObjectStoreService) {
230
		this.xqueryObjectStoreService = xqueryObjectStoreService;
231
	}
232

  
233
	public String getObjectStoreLocationParam() {
234
		return objectStoreLocationParam;
235
	}
236

  
237
	public void setObjectStoreLocationParam(final String objectStoreLocationParam) {
238
		this.objectStoreLocationParam = objectStoreLocationParam;
239
	}
240

  
241
	public String getObjectStoreBlacklistCSV() {
242
		return objectStoreBlacklistCSV;
243
	}
244

  
245
	public void setObjectStoreBlacklistCSV(final String objectStoreBlacklistCSV) {
246
		this.objectStoreBlacklistCSV = objectStoreBlacklistCSV;
247
	}
248

  
249
	public String getXqueryIsLookupService() {
250
		return xqueryIsLookupService;
251
	}
252

  
253
	@Required
254
	public void setXqueryIsLookupService(final String xqueryIsLookupService) {
255
		this.xqueryIsLookupService = xqueryIsLookupService;
256
	}
257

  
258
	public String getIslookupLocationParam() {
259
		return islookupLocationParam;
260
	}
261

  
262
	public void setIslookupLocationParam(final String islookupLocationParam) {
263
		this.islookupLocationParam = islookupLocationParam;
264
	}
265

  
266
	public String getImportProjectConceptsContextIdParam() {
267
		return importProjectConceptsContextIdParam;
268
	}
269

  
270
	public void setImportProjectConceptsContextIdParam(final String importProjectConceptsContextIdParam) {
271
		this.importProjectConceptsContextIdParam = importProjectConceptsContextIdParam;
272
	}
273

  
274
	public String getImportProjectConceptsContextId() {
275
		return importProjectConceptsContextId;
276
	}
277

  
278
	public void setImportProjectConceptsContextId(final String importProjectConceptsContextId) {
279
		this.importProjectConceptsContextId = importProjectConceptsContextId;
280
	}
281

  
282
	public String getImportHbaseDumpLocationParam() {
283
		return importHbaseDumpLocationParam;
284
	}
285

  
286
	public void setImportHbaseDumpLocationParam(final String importHbaseDumpLocationParam) {
287
		this.importHbaseDumpLocationParam = importHbaseDumpLocationParam;
288
	}
289

  
290
	public String getImportHbaseDumpLocation() {
291
		return importHbaseDumpLocation;
292
	}
293

  
294
	public void setImportHbaseDumpLocation(final String importHbaseDumpLocation) {
295
		this.importHbaseDumpLocation = importHbaseDumpLocation;
296
	}
297
}
modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/PrepareIISPreprocessingParamsV2JobNode.java
1
package eu.dnetlib.msro.openaireplus.workflows.nodes;
2

  
3
import com.googlecode.sarasvati.Arc;
4
import com.googlecode.sarasvati.NodeToken;
5
import org.springframework.beans.factory.annotation.Required;
6

  
7
public class PrepareIISPreprocessingParamsV2JobNode extends PrepareIISParamsV2 {
8

  
9
	private String xqueryWosMDStore;
10

  
11
	private String mdStoreWosParam = "import_wos_mdstore_id";
12

  
13
	private String xqueryDatabaseService;
14

  
15
	private String databaseServiceLocationParam = "import_database_service_location";
16

  
17
	private String xqueryDataciteObjectStore;
18

  
19
	private String dataciteObjectStoreParam = "import_content_datacite_objectstores_csv";
20

  
21
	private String xqueryWosObjectStore;
22

  
23
	private String wosObjectStorePlaintextParam = "import_content_wos_plaintext_objectstores_csv";
24

  
25
	@Override
26
	protected String execute(final NodeToken token) throws Exception {
27
		super.prepare(token);
28

  
29
		token.getEnv().setAttribute(getMdStoreWosParam(), getProfileId(getXqueryWosMDStore()));
30
		token.getEnv().setAttribute(getDatabaseServiceLocationParam(), getServiceEndpoint(getXqueryDatabaseService()));
31

  
32
		token.getEnv().setAttribute(getDataciteObjectStoreParam(), getFilteredObjectStoreCSV(getXqueryDataciteObjectStore()));
33
		token.getEnv().setAttribute(getWosObjectStorePlaintextParam(), getProfileId(getXqueryWosObjectStore()));
34

  
35
		return Arc.DEFAULT_ARC;
36
	}
37

  
38
	public String getXqueryWosMDStore() {
39
		return xqueryWosMDStore;
40
	}
41

  
42
	@Required
43
	public void setXqueryWosMDStore(final String xqueryWosMDStore) {
44
		this.xqueryWosMDStore = xqueryWosMDStore;
45
	}
46

  
47
	public String getMdStoreWosParam() {
48
		return mdStoreWosParam;
49
	}
50

  
51
	public void setMdStoreWosParam(final String mdStoreWosParam) {
52
		this.mdStoreWosParam = mdStoreWosParam;
53
	}
54

  
55
	public String getXqueryDatabaseService() {
56
		return xqueryDatabaseService;
57
	}
58

  
59
	@Required
60
	public void setXqueryDatabaseService(final String xqueryDatabaseService) {
61
		this.xqueryDatabaseService = xqueryDatabaseService;
62
	}
63

  
64
	public String getDatabaseServiceLocationParam() {
65
		return databaseServiceLocationParam;
66
	}
67

  
68
	public void setDatabaseServiceLocationParam(final String databaseServiceLocationParam) {
69
		this.databaseServiceLocationParam = databaseServiceLocationParam;
70
	}
71

  
72
	public String getWosObjectStorePlaintextParam() {
73
		return wosObjectStorePlaintextParam;
74
	}
75

  
76
	public void setWosObjectStorePlaintextParam(final String wosObjectStorePlaintextParam) {
77
		this.wosObjectStorePlaintextParam = wosObjectStorePlaintextParam;
78
	}
79

  
80
	public String getXqueryWosObjectStore() {
81
		return xqueryWosObjectStore;
82
	}
83

  
84
	@Required
85
	public void setXqueryWosObjectStore(final String xqueryWosObjectStore) {
86
		this.xqueryWosObjectStore = xqueryWosObjectStore;
87
	}
88

  
89
	public String getXqueryDataciteObjectStore() {
90
		return xqueryDataciteObjectStore;
91
	}
92

  
93
	@Required
94
	public void setXqueryDataciteObjectStore(final String xqueryDataciteObjectStore) {
95
		this.xqueryDataciteObjectStore = xqueryDataciteObjectStore;
96
	}
97

  
98
	public String getDataciteObjectStoreParam() {
99
		return dataciteObjectStoreParam;
100
	}
101

  
102
	public void setDataciteObjectStoreParam(final String dataciteObjectStoreParam) {
103
		this.dataciteObjectStoreParam = dataciteObjectStoreParam;
104
	}
105

  
106
}
modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/PrepareIISMainParamsV2JobNode.java
1
package eu.dnetlib.msro.openaireplus.workflows.nodes;
2

  
3
import java.util.List;
4
import java.util.Map;
5

  
6
import com.google.gson.Gson;
7
import com.googlecode.sarasvati.Arc;
8
import com.googlecode.sarasvati.NodeToken;
9
import org.springframework.beans.factory.annotation.Required;
10

  
11
public class PrepareIISMainParamsV2JobNode extends PrepareIISParamsV2 {
12

  
13
	private String xqueryObjectStores;
14

  
15
	private String objectStoreParam = "import_content_objectstores_csv";
16

  
17
	// Enable/Disable inference modules
18

  
19
	@Override
20
	protected String execute(final NodeToken token) throws Exception {
21

  
22
		super.prepare(token);
23

  
24
		token.getEnv().setAttribute(getObjectStoreParam(), getFilteredObjectStoreCSV(getXqueryObjectStores()));
25

  
26
		@SuppressWarnings("unchecked")
27
		final List<Map<String, String>> sets = new Gson().fromJson(token.getEnv().getAttribute("sets"), List.class);
28
		for (Map<String, String> set : sets) {
29
			token.getEnv().setAttribute(set.get("enablingProperty"), set.get("enabled"));
30
		}
31

  
32
		return Arc.DEFAULT_ARC;
33
	}
34

  
35
	public String getXqueryObjectStores() {
36
		return xqueryObjectStores;
37
	}
38

  
39
	@Required
40
	public void setXqueryObjectStores(final String xqueryObjectStores) {
41
		this.xqueryObjectStores = xqueryObjectStores;
42
	}
43

  
44
	public String getObjectStoreParam() {
45
		return objectStoreParam;
46
	}
47

  
48
	public void setObjectStoreParam(final String objectStoreParam) {
49
		this.objectStoreParam = objectStoreParam;
50
	}
51

  
52
}
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/iis/iis.meta.xml
14 14
		<ADMIN_EMAIL />
15 15
		<CONFIGURATION status="EXECUTABLE">
16 16
			<WORKFLOW id="4801c33c-66ca-4ab6-af64-aa812194ec66_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" name="IIS Preprocessing">
17
				<WORKFLOW id="4801c33c-66ca-4ab6-af64-aa812194ec67_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" name="IIS Preprocessing V2"/>
17 18
				<WORKFLOW id="dc19a8d1-93a8-4ffb-9fed-950f4156eedb_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" name="IIS Main"/>
19
				<WORKFLOW id="dc19a8d1-93a8-4ffb-9fed-950f4156eedc_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" name="IIS Main V2"/>
18 20
			</WORKFLOW>
19 21
		</CONFIGURATION>
20 22
		<SCHEDULING enabled="false">
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/iis/iis-preprocessingV2.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
    <HEADER>
4
        <RESOURCE_IDENTIFIER value="4801c33c-66ca-4ab6-af64-aa812194ec67_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
5
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
6
        <RESOURCE_KIND value="WorkflowDSResources"/>
7
        <RESOURCE_URI value=""/>
8
        <DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/>
9
    </HEADER>
10
    <BODY>
11
        <WORKFLOW_NAME>IIS preprocessing V2</WORKFLOW_NAME>
12
        <WORKFLOW_TYPE>IIS</WORKFLOW_TYPE>
13
        <WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
14
        <CONFIGURATION start="manual">
15
			<NODE name="start" isStart="true">				
16
				<DESCRIPTION>start</DESCRIPTION>
17
				<PARAMETERS/>
18
				<ARCS>
19
					<ARC to="prepareActionSets" />
20
					<ARC to="prepareParameters" />
21
				</ARCS>
22
			</NODE>
23
			<NODE name="prepareActionSets" type="PrepareActionSets">				
24
				<DESCRIPTION>prepare action sets</DESCRIPTION>
25
				<PARAMETERS>
26
					<PARAM required="true" type="string" name="sets" managedBy="system">
27
					[
28
						{
29
							'set' : 'iis-referenced-projects-preprocessing',
30
							'jobProperty' : 'export_action_set_id_document_referencedProjects',
31
							'enablingProperty' : 'active_referencedProjects_export',
32
							'enabled' : 'true'
33
						},
34
						{
35
							'set' : 'iis-referenced-datasets-preprocessing',
36
							'jobProperty' : 'export_action_set_id_document_referencedDatasets',
37
							'enablingProperty' : 'active_referencedDatasets_export',
38
							'enabled' : 'true'
39
						},
40
						{
41
							'set' : 'iis-wos-entities',
42
							'jobProperty' : 'export_action_set_id_entity_wos',
43
							'enablingProperty' : 'active_entity_wos_export',
44
							'enabled' : 'true'							
45
						},
46
						{
47
							'set' : 'iis-dataset-entities-preprocessing',
48
							'jobProperty' : 'export_action_set_id_entity_dataset',
49
							'enablingProperty' : 'active_entity_dataset_export',
50
							'enabled' : 'true'								
51
						}
52
					]
53
					</PARAM>
54
				</PARAMETERS>
55
				<ARCS>
56
					<ARC to="preprocessing" />
57
				</ARCS>
58
			</NODE>
59
			<NODE name="prepareParameters" type="PreparePreprocessParamsV2">
60
				<DESCRIPTION>prepare parameters</DESCRIPTION>
61
				<PARAMETERS>
62
					<PARAM required="true" type="string" name="islookupLocationParam" managedBy="system">import_islookup_service_location</PARAM>
63
					<PARAM required="true" type="string" name="objectStoreLocationParam" managedBy="system">import_content_object_store_location</PARAM>
64
					<PARAM required="true" type="string" name="mdStoreStoreLocationParam" managedBy="system">import_mdstore_service_location</PARAM>
65
					<PARAM required="true" type="string" name="mdStoreDatasetParam" managedBy="system">import_dataset_mdstore_ids_csv</PARAM>
66
					<PARAM required="true" type="string" name="mdStoreWosParam" managedBy="system">import_wos_mdstore_id</PARAM>
67
					<PARAM required="true" type="string" name="databaseServiceLocationParam" managedBy="system">import_database_service_location</PARAM>
68
					<PARAM required="true" type="string" name="dataciteObjectStoreParam" managedBy="system">import_content_datacite_objectstores_csv</PARAM>
69
					<PARAM required="true" type="string" name="wosObjectStorePlaintextParam" managedBy="system">import_content_wos_plaintext_objectstores_csv</PARAM>					
70
					<PARAM required="true" type="string" name="oozieWfAppPathParam" managedBy="system">oozie.wf.application.path</PARAM>
71
					<PARAM required="true" type="string" name="oozieWfAppPath" managedBy="user">/tmp/integration/apps/preprocessing</PARAM>
72
					<PARAM required="true" type="string" name="clusterName" managedBy="user" function="validValues(['IIS','DM'])">DM</PARAM>
73
					<PARAM required="false" type="string" name="objectStoreBlacklistCSV" managedBy="user"></PARAM>
74
					<PARAM required="true" type="string" name="importProjectConceptsContextIdParam" managedBy="system">import_project_concepts_context_id</PARAM>
75
					<PARAM required="true" type="string" name="importProjectConceptsContextId" managedBy="user">fet-fp7</PARAM>
76
				</PARAMETERS>
77
				<ARCS>
78
					<ARC to="preprocessing" />
79
				</ARCS>
80
			</NODE>				
81
			<NODE name="preprocessing" type="SubmitHadoopJob" isJoin="true">
82
				<DESCRIPTION>IIS preprocessing</DESCRIPTION>
83
				<PARAMETERS>
84
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">iisPreprocessingJob</PARAM>				
85
					<PARAM required="true" type="string" name="envParams" managedBy="system">
86
						{
87
							'cluster' : 'cluster',
88
							'nameNode' : 'nameNode',
89
							'jobTracker' : 'jobTracker',
90
							'oozie.wf.application.path' : 'oozie.wf.application.path',
91
							'import_content_object_store_location' : 'import_content_object_store_location',
92
							'import_mdstore_service_location' : 'import_mdstore_service_location',
93
							'import_islookup_service_location' : 'import_islookup_service_location', 
94
							'import_project_concepts_context_id' : 'import_project_concepts_context_id',
95
							'import_dataset_mdstore_ids_csv' : 'import_dataset_mdstore_ids_csv',
96
							'import_wos_mdstore_id' : 'import_wos_mdstore_id',
97
							'import_database_service_location' : 'import_database_service_location',
98
							'import_content_datacite_objectstores_csv' : 'import_content_datacite_objectstores_csv',
99
							'import_content_wos_plaintext_objectstores_csv' : 'import_content_wos_plaintext_objectstores_csv',
100
							'export_action_set_id_document_referencedProjects' : 'export_action_set_id_document_referencedProjects',
101
							'export_action_set_id_document_referencedDatasets' : 'export_action_set_id_document_referencedDatasets',
102
							'export_action_set_id_entity_wos' : 'export_action_set_id_entity_wos',
103
							'export_action_set_id_entity_dataset' : 'export_action_set_id_entity_dataset',
104
							'output_remote_location' : 'actionManagerBasePath'
105
						}
106
					</PARAM>
107
					<PARAM required="true" type="string" name="sysParams" managedBy="system">
108
						{ 	
109
							'export_action_hbase_table_name' : 'hbase.actions.table',
110
							'import_database_dbname' : 'dnet.openaire.db.name'
111
						}
112
					</PARAM>
113
					<PARAM required="true" type="boolean" name="simulation"	managedBy="user">false</PARAM>				
114
				</PARAMETERS>
115
				<ARCS>
116
					<ARC to="updateActionSets" />
117
				</ARCS>
118
			</NODE>
119
			<NODE name="updateActionSets" type="UpdateActionSets">				
120
				<DESCRIPTION>update action sets</DESCRIPTION>
121
				<PARAMETERS/>
122
				<ARCS>
123
					<ARC to="success" />
124
				</ARCS>
125
			</NODE>
126
        </CONFIGURATION>
127
        <STATUS />
128
    </BODY>
129
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/iis/iis-mainV2.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER
5
			value="dc19a8d1-93a8-4ffb-9fed-950f4156eedc_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" />
6
		<RESOURCE_TYPE value="WorkflowDSResourceType" />
7
		<RESOURCE_KIND value="WorkflowDSResources" />
8
		<RESOURCE_URI value="" />
9
		<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z" />
10
	</HEADER>
11
	<BODY>
12
		<WORKFLOW_NAME>IIS main workflow V2</WORKFLOW_NAME>
13
		<WORKFLOW_TYPE>IIS</WORKFLOW_TYPE>
14
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
15
		<CONFIGURATION start="manual">
16
			<NODE name="start" isStart="true">
17
				<DESCRIPTION>start</DESCRIPTION>
18
				<PARAMETERS />
19
				<ARCS>
20
					<ARC to="prepareActionSets" />
21
				</ARCS>
22
			</NODE>
23
			<NODE name="prepareActionSets" type="PrepareActionSets">
24
				<DESCRIPTION>prepare action sets</DESCRIPTION>
25
				<PARAMETERS>
26
					<PARAM required="true" type="string" name="sets" managedBy="system">
27
						[
28
						{
29
						'set' : 'iis-document-affiliation',
30
						'jobProperty' : 'export_action_set_id_affiliation_matching',
31
						'enablingProperty' : 'active_document_affiliation',
32
						'enabled' : 'true'
33
						},
34
						{
35
						'set' : 'iis-referenced-projects-main',
36
						'jobProperty' : 'export_action_set_id_document_referencedProjects',
37
						'enablingProperty' : 'active_referenceextraction_project',
38
						'enabled' : 'true'
39
						},
40
						{
41
						'set' : 'iis-referenced-datasets-main',
42
						'jobProperty' : 'export_action_set_id_document_referencedDatasets',
43
						'enablingProperty' : 'active_referenceextraction_dataset',
44
						'enabled' : 'true'
45
						},
46
						{
47
						'set' : 'iis-dataset-entities-main',
48
						'jobProperty' : 'export_action_set_id_entity_dataset',
49
						'enablingProperty' : 'active_referenceextraction_dataset',
50
						'enabled' : 'true'
51
						},
52
						{
53
						'set' : 'iis-researchinitiative',
54
						'jobProperty' : 'export_action_set_id_document_research_initiative',
55
						'enablingProperty' :
56
						'active_referenceextraction_researchinitiative',
57
						'enabled' : 'true'
58
						},
59
						{
60
						'set' : 'iis-document-similarities',
61
						'jobProperty' : 'export_action_set_id_document_similarities_standard',
62
						'enablingProperty' : 'active_documentssimilarity',
63
						'enabled' : 'true'
64
						},
65
						{
66
						'set' : 'iis-document-classes',
67
						'jobProperty' : 'export_action_set_id_document_classes',
68
						'enablingProperty' : 'active_documentsclassification',
69
						'enabled' : 'true'
70
						},
71
						{
72
						'set' : 'iis-document-citations',
73
						'jobProperty' : 'export_action_set_id_document_referencedDocuments',
74
						'enablingProperty' : 'active_citationmatching',
75
						'enabled' : 'true'
76
						},
77
						{
78
						'set' : 'iis-referenceextraction-pdb',
79
						'jobProperty' : 'export_action_set_id_document_pdb',
80
						'enablingProperty' : 'active_referenceextraction_pdb',
81
						'enabled' : 'true'
82
						},
83
						{
84
						'set' : 'iis-referenceextraction-software',
85
						'jobProperty' : 'export_action_set_id_document_software_url',
86
						'enablingProperty' : 'active_referenceextraction_software_url',
87
						'enabled' : 'true'
88
						}
89
						]
90
					</PARAM>
91
				</PARAMETERS>
92
				<ARCS>
93
					<ARC to="prepareParameters" />
94
				</ARCS>
95
			</NODE>
96
			<NODE name="prepareParameters" type="PrepareIisMainParamsV2">
97
				<DESCRIPTION>prepare parameters</DESCRIPTION>
98
				<PARAMETERS>
99
					<PARAM required="true" type="string" name="islookupLocationParam" managedBy="system">import_islookup_service_location</PARAM>
100
					<PARAM required="true" type="string" name="objectStoreParam" managedBy="system">import_content_objectstores_csv</PARAM>
101
					<PARAM required="true" type="string" name="objectStoreLocationParam" managedBy="system">import_content_object_store_location</PARAM>
102
					<PARAM required="true" type="string" name="mdStoreStoreLocationParam" managedBy="system">import_mdstore_service_location</PARAM>
103
					<PARAM required="true" type="string" name="mdStoreDatasetParam" managedBy="system">import_dataset_mdstore_ids_csv</PARAM>
104
					<PARAM required="true" type="string" name="oozieWfAppPathParam" managedBy="system">oozie.wf.application.path</PARAM>
105
					<PARAM required="true" type="string" name="oozieWfAppPath" managedBy="user">/tmp/integration/apps/main</PARAM>
106
					<PARAM required="true" type="string" name="clusterName"	managedBy="user" function="validValues(['IIS','DM'])">IIS</PARAM>
107
					<PARAM required="true" type="string" name="importHbaseDumpLocation" managedBy="user"></PARAM>
108
					<PARAM required="true" type="string" name="importHbaseDumpLocationParam" managedBy="system">import_hbase_dump_location</PARAM>
109
					<PARAM required="false" type="string" name="objectStoreBlacklistCSV" managedBy="user"></PARAM>
110
					<PARAM required="true" type="string" name="importProjectConceptsContextIdParam" managedBy="system">import_project_concepts_context_id</PARAM>
111
					<PARAM required="true" type="string" name="importProjectConceptsContextId" managedBy="user">fet-fp7</PARAM>
112
				</PARAMETERS>
113
				<ARCS>
114
					<ARC to="main" />
115
				</ARCS>
116
			</NODE>
117
			<NODE name="main" type="SubmitHadoopJob" isJoin="true">
118
				<DESCRIPTION>IIS main</DESCRIPTION>
119
				<PARAMETERS>
120
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">iisMainJob</PARAM>
121
					<PARAM required="true" type="string" name="envParams" managedBy="system">
122
						{
123
						'cluster' : 'cluster',
124
						'nameNode' : 'nameNode',
125
						'jobTracker' : 'jobTracker',
126
						'oozie.wf.application.path' : 'oozie.wf.application.path',
127

  
128
						'active_document_affiliation' : 'active_document_affiliation',
129
						'active_referenceextraction_project' : 'active_referenceextraction_project',
130
						'active_referenceextraction_dataset' : 'active_referenceextraction_dataset',
131
						'active_referenceextraction_researchinitiative' : 'active_referenceextraction_researchinitiative',
132
						'active_documentsclassification' : 'active_documentsclassification',
133
						'active_documentssimilarity' : 'active_documentssimilarity',
134
						'active_citationmatching' : 'active_citationmatching',
135
						'active_referenceextraction_pdb' : 'active_referenceextraction_pdb',
136
						'active_referenceextraction_software_url' : 'active_referenceextraction_software_url',
137

  
138
						'import_content_objectstores_csv' : 'import_content_objectstores_csv',
139
						'import_content_object_store_location' : 'import_content_object_store_location',
140
						'import_mdstore_service_location' : 'import_mdstore_service_location',
141
						'import_islookup_service_location' : 'import_islookup_service_location', 
142
						'import_project_concepts_context_id' : 'import_project_concepts_context_id',
143
						'import_dataset_mdstore_ids_csv' : 'import_dataset_mdstore_ids_csv',
144

  
145
						'export_action_set_id_affiliation_matching' : 'export_action_set_id_affiliation_matching',
146
						'export_action_set_id_document_referencedDatasets' : 'export_action_set_id_document_referencedDatasets',
147
						'export_action_set_id_document_referencedProjects' : 'export_action_set_id_document_referencedProjects',
148
						'export_action_set_id_document_research_initiative' : 'export_action_set_id_document_research_initiative',
149
						'export_action_set_id_document_similarities_standard' : 'export_action_set_id_document_similarities_standard',
150

  
151
						'export_action_set_id_document_classes' : 'export_action_set_id_document_classes',
152
						'export_action_set_id_document_referencedDocuments' : 'export_action_set_id_document_referencedDocuments',
153
						'export_action_set_id_entity_dataset' : 'export_action_set_id_entity_dataset',
154
						'export_action_set_id_document_pdb' : 'export_action_set_id_document_pdb',
155
						'export_action_set_id_document_software_url' : 'export_action_set_id_document_software_url',
156

  
157
						'output_remote_location' : 'actionManagerBasePath'
158
						}
159
					</PARAM>
160
					<PARAM required="true" type="boolean" name="simulation"	managedBy="user">false</PARAM>
161
				</PARAMETERS>
162
				<ARCS>
163
					<ARC to="updateActionSets" />
164
				</ARCS>
165
			</NODE>
166
			<NODE name="updateActionSets" type="UpdateActionSets">
167
				<DESCRIPTION>update action sets</DESCRIPTION>
168
				<PARAMETERS />
169
				<ARCS>
170
					<ARC to="success" />
171
				</ARCS>
172
			</NODE>
173
		</CONFIGURATION>
174
		<STATUS />
175
	</BODY>
176
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/applicationContext-msro-openaire-nodes.xml
165 165
          p:xqueryObjectStoreService="${dnet.openaire.iis.objecstore.endpoint.xquery}"
166 166
          p:xqueryIsLookupService="${dnet.openaire.iis.islookup.endpoint.xquery}"/>
167 167

  
168
    <bean id="wfNodePrepareIisMainParamsV2"
169
          class="eu.dnetlib.msro.openaireplus.workflows.nodes.PrepareIISMainParamsV2JobNode"
170
          scope="prototype" p:xqueryMdStoreService="${dnet.openaire.iis.mdstore.endpoint.xquery}"
171
          p:xqueryDatasetStore="${dnet.openaire.iis.mdstore.dataset.xquery}"
172
          p:xqueryObjectStores="${dnet.openaire.iis.objecstores.xquery}"
173
          p:xqueryObjectStoreService="${dnet.openaire.iis.objecstore.endpoint.xquery}"
174
          p:xqueryIsLookupService="${dnet.openaire.iis.islookup.endpoint.xquery}"/>
175

  
168 176
    <bean id="wfNodePreparePreprocessParams"
169 177
          class="eu.dnetlib.msro.openaireplus.workflows.nodes.PrepareIISPreprocessingParamsJobNode"
170 178
          scope="prototype" p:xqueryDatabaseService="${dnet.openaire.iis.dbservice.endpoint.xquery}"
......
176 184
          p:xqueryWosObjectStore="${dnet.openaire.iis.objecstores.wos.xquery}"
177 185
          p:xqueryIsLookupService="${dnet.openaire.iis.islookup.endpoint.xquery}"/>
178 186

  
187
    <bean id="wfNodePreparePreprocessParamsV2"
188
          class="eu.dnetlib.msro.openaireplus.workflows.nodes.PrepareIISPreprocessingParamsV2JobNode"
189
          scope="prototype" p:xqueryDatabaseService="${dnet.openaire.iis.dbservice.endpoint.xquery}"
190
          p:xqueryMdStoreService="${dnet.openaire.iis.mdstore.endpoint.xquery}"
191
          p:xqueryObjectStoreService="${dnet.openaire.iis.objecstore.endpoint.xquery}"
192
          p:xqueryDatasetStore="${dnet.openaire.iis.mdstore.dataset.xquery}"
193
          p:xqueryWosMDStore="${dnet.openaire.iis.mdstore.wos.xquery}"
194
          p:xqueryDataciteObjectStore="${dnet.openaire.iis.objecstores.dataset.xquery}"
195
          p:xqueryWosObjectStore="${dnet.openaire.iis.objecstores.wos.xquery}"
196
          p:xqueryIsLookupService="${dnet.openaire.iis.islookup.endpoint.xquery}"/>
197

  
179 198
    <bean id="wfNodeIISCacheBuilder"
180 199
          class="eu.dnetlib.msro.openaireplus.workflows.nodes.IISCacheBuilderJobNode"
181 200
          scope="prototype" p:xqueryObjectStoreService="${dnet.openaire.iis.objecstore.endpoint.xquery}" />

Also available in: Unified diff