1 |
27525
|
claudio.at
|
package eu.dnetlib.msro.openaireplus.workflows.nodes;
|
2 |
|
|
|
3 |
27568
|
claudio.at
|
import java.util.List;
|
4 |
|
|
import java.util.NoSuchElementException;
|
5 |
27525
|
claudio.at
|
import javax.annotation.Resource;
|
6 |
|
|
|
7 |
|
|
import com.google.common.base.Joiner;
|
8 |
|
|
import com.google.common.base.Splitter;
|
9 |
|
|
import com.google.common.collect.Iterables;
|
10 |
31423
|
claudio.at
|
import com.google.common.collect.Lists;
|
11 |
27525
|
claudio.at
|
import com.googlecode.sarasvati.NodeToken;
|
12 |
|
|
import eu.dnetlib.data.hadoop.config.ClusterName;
|
13 |
|
|
import eu.dnetlib.data.hadoop.config.ConfigurationEnumerator;
|
14 |
27568
|
claudio.at
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
15 |
27525
|
claudio.at
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
16 |
32798
|
michele.ar
|
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
|
17 |
27568
|
claudio.at
|
import eu.dnetlib.msro.rmi.MSROException;
|
18 |
27525
|
claudio.at
|
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
|
19 |
43601
|
claudio.at
|
import org.apache.commons.logging.Log;
|
20 |
|
|
import org.apache.commons.logging.LogFactory;
|
21 |
|
|
import org.apache.hadoop.conf.Configuration;
|
22 |
|
|
import org.springframework.beans.factory.annotation.Required;
|
23 |
27525
|
claudio.at
|
|
24 |
43601
|
claudio.at
|
public abstract class PrepareIISParamsV2 extends SimpleJobNode {
|
25 |
27525
|
claudio.at
|
|
26 |
43601
|
claudio.at
|
private static final Log log = LogFactory.getLog(PrepareIISParamsV2.class);
|
27 |
32863
|
claudio.at
|
|
28 |
27525
|
claudio.at
|
@Resource
|
29 |
|
|
protected ConfigurationEnumerator configurationEnumerator;
|
30 |
|
|
|
31 |
32798
|
michele.ar
|
@Resource
|
32 |
|
|
private UniqueServiceLocator serviceLocator;
|
33 |
27525
|
claudio.at
|
|
34 |
|
|
private String clusterName;
|
35 |
|
|
|
36 |
|
|
private String clusterParam = "cluster";
|
37 |
|
|
|
38 |
|
|
private String oozieWfAppPath;
|
39 |
|
|
|
40 |
|
|
private String oozieWfAppPathParam = "oozie.wf.application.path";
|
41 |
|
|
|
42 |
|
|
private String xqueryMdStoreService;
|
43 |
|
|
|
44 |
|
|
private String mdStoreStoreLocationParam = "import_mdstore_service_location";
|
45 |
|
|
|
46 |
27568
|
claudio.at
|
private String xqueryObjectStoreService;
|
47 |
|
|
|
48 |
|
|
private String objectStoreLocationParam = "import_content_object_store_location";
|
49 |
|
|
|
50 |
33390
|
claudio.at
|
private String xqueryIsLookupService;
|
51 |
|
|
|
52 |
|
|
private String islookupLocationParam = "import_islookup_service_location";
|
53 |
|
|
|
54 |
48139
|
alessia.ba
|
private String importProjectConceptsContextCSVParam = "import_project_concepts_context_ids_csv";
|
55 |
33390
|
claudio.at
|
|
56 |
48139
|
alessia.ba
|
private String importProjectConceptsContextCSV;
|
57 |
33390
|
claudio.at
|
|
58 |
27568
|
claudio.at
|
private String xqueryDatasetStore;
|
59 |
|
|
|
60 |
29945
|
claudio.at
|
private String mdStoreDatasetParam = "import_dataset_mdstore_ids_csv";
|
61 |
27568
|
claudio.at
|
|
62 |
31423
|
claudio.at
|
private String objectStoreBlacklistCSV = "";
|
63 |
|
|
|
64 |
28824
|
claudio.at
|
protected void prepare(final NodeToken token) throws Exception {
|
65 |
27525
|
claudio.at
|
|
66 |
|
|
token.getEnv().setAttribute(getClusterParam(), getClusterName());
|
67 |
|
|
|
68 |
|
|
// Assumes we only have one mdStore service instance
|
69 |
27568
|
claudio.at
|
token.getEnv().setAttribute(getMdStoreStoreLocationParam(), getServiceEndpoint(getXqueryMdStoreService()));
|
70 |
|
|
// Assumes we only have one objectStore service instance
|
71 |
|
|
token.getEnv().setAttribute(getObjectStoreLocationParam(), getServiceEndpoint(getXqueryObjectStoreService()));
|
72 |
27525
|
claudio.at
|
|
73 |
33390
|
claudio.at
|
token.getEnv().setAttribute(getIslookupLocationParam(), getServiceEndpoint(getXqueryIsLookupService()));
|
74 |
48139
|
alessia.ba
|
token.getEnv().setAttribute(getImportProjectConceptsContextCSVParam(), getImportProjectConceptsContextCSV());
|
75 |
33390
|
claudio.at
|
|
76 |
27525
|
claudio.at
|
Configuration conf = configurationEnumerator.get(ClusterName.valueOf(getClusterName()));
|
77 |
|
|
String nameNode = conf.get("fs.defaultFS");
|
78 |
|
|
|
79 |
|
|
token.getEnv().setAttribute(getOozieWfAppPathParam(), getURI(nameNode, getOozieWfAppPath()));
|
80 |
31423
|
claudio.at
|
token.getEnv().setAttribute(getMdStoreDatasetParam(), asCSV(getProfileIds(getXqueryDatasetStore())));
|
81 |
27568
|
claudio.at
|
}
|
82 |
27525
|
claudio.at
|
|
83 |
28824
|
claudio.at
|
protected String getServiceEndpoint(final String xquery) throws MSROException {
|
84 |
27568
|
claudio.at
|
try {
|
85 |
32798
|
michele.ar
|
return Iterables.getOnlyElement(serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xquery));
|
86 |
27568
|
claudio.at
|
} catch (ISLookUpException e) {
|
87 |
|
|
throw new MSROException("unable to fetch service endpoint", e);
|
88 |
|
|
} catch (NoSuchElementException e) {
|
89 |
|
|
throw new MSROException("unable to find service endpoint, xquery: " + getXqueryMdStoreService(), e);
|
90 |
|
|
} catch (IllegalArgumentException e) {
|
91 |
|
|
throw new MSROException("more than one services found, we assume to have only one available", e);
|
92 |
|
|
}
|
93 |
27525
|
claudio.at
|
}
|
94 |
|
|
|
95 |
28824
|
claudio.at
|
protected String getProfileId(final String xquery) throws MSROException {
|
96 |
27568
|
claudio.at
|
try {
|
97 |
32798
|
michele.ar
|
return Iterables.getOnlyElement(serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xquery));
|
98 |
27568
|
claudio.at
|
} catch (ISLookUpException e) {
|
99 |
|
|
throw new MSROException("unable to fetch profile id", e);
|
100 |
|
|
} catch (NoSuchElementException e) {
|
101 |
|
|
throw new MSROException("unable to find profile profile, xquery: " + xquery, e);
|
102 |
|
|
} catch (IllegalArgumentException e) {
|
103 |
|
|
throw new MSROException("more than one profile profiles was found, we assume to have only one available, xquery: " + xquery, e);
|
104 |
|
|
}
|
105 |
|
|
}
|
106 |
|
|
|
107 |
31423
|
claudio.at
|
protected List<String> getProfileIds(final String xquery) throws MSROException {
|
108 |
27568
|
claudio.at
|
try {
|
109 |
32798
|
michele.ar
|
List<String> ids = serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xquery);
|
110 |
27568
|
claudio.at
|
|
111 |
32863
|
claudio.at
|
if (ids.isEmpty()) {
|
112 |
|
|
log.warn("couldn't find any profile, xquery: " + xquery);
|
113 |
|
|
}
|
114 |
27568
|
claudio.at
|
|
115 |
31423
|
claudio.at
|
return ids;
|
116 |
27568
|
claudio.at
|
} catch (ISLookUpException e) {
|
117 |
|
|
throw new MSROException("unable to fetch profile ids, x query: " + xquery, e);
|
118 |
|
|
}
|
119 |
|
|
}
|
120 |
|
|
|
121 |
31423
|
claudio.at
|
protected String getFilteredObjectStoreCSV(final String xquery) throws MSROException {
|
122 |
|
|
return asCSV(filter(getProfileIds(xquery), asList(getObjectStoreBlacklistCSV())));
|
123 |
|
|
}
|
124 |
|
|
|
125 |
|
|
protected List<String> filter(final List<String> list, final List<String> filter) {
|
126 |
32798
|
michele.ar
|
if (filter == null || filter.isEmpty()) { return list; }
|
127 |
31423
|
claudio.at
|
list.removeAll(filter);
|
128 |
|
|
return list;
|
129 |
|
|
}
|
130 |
|
|
|
131 |
|
|
protected String asCSV(final List<String> list) {
|
132 |
|
|
return Joiner.on(",").skipNulls().join(list);
|
133 |
|
|
}
|
134 |
|
|
|
135 |
|
|
protected List<String> asList(final String csv) {
|
136 |
|
|
return Lists.newArrayList(Splitter.on(",").trimResults().omitEmptyStrings().split(csv));
|
137 |
|
|
}
|
138 |
|
|
|
139 |
28824
|
claudio.at
|
private String getURI(final String nameNode, final String relative) {
|
140 |
27525
|
claudio.at
|
// TODO ensure to return a valid URI
|
141 |
|
|
return nameNode + relative;
|
142 |
|
|
}
|
143 |
|
|
|
144 |
28824
|
claudio.at
|
private String getZkQuorumCSV(final Configuration conf, final String zkPort) {
|
145 |
27525
|
claudio.at
|
return Joiner.on(":" + zkPort + ",").join(Splitter.on(",").omitEmptyStrings().split(conf.get("hbase.zookeeper.quorum")));
|
146 |
|
|
}
|
147 |
|
|
|
148 |
|
|
@Required
|
149 |
28824
|
claudio.at
|
public void setXqueryMdStoreService(final String xqueryMdStoreService) {
|
150 |
27525
|
claudio.at
|
this.xqueryMdStoreService = xqueryMdStoreService;
|
151 |
|
|
}
|
152 |
|
|
|
153 |
|
|
public String getXqueryMdStoreService() {
|
154 |
|
|
return xqueryMdStoreService;
|
155 |
|
|
}
|
156 |
|
|
|
157 |
|
|
public String getMdStoreStoreLocationParam() {
|
158 |
|
|
return mdStoreStoreLocationParam;
|
159 |
|
|
}
|
160 |
|
|
|
161 |
28824
|
claudio.at
|
public void setMdStoreStoreLocationParam(final String mdStoreStoreLocationParam) {
|
162 |
27525
|
claudio.at
|
this.mdStoreStoreLocationParam = mdStoreStoreLocationParam;
|
163 |
|
|
}
|
164 |
|
|
|
165 |
|
|
public String getClusterName() {
|
166 |
|
|
return clusterName;
|
167 |
|
|
}
|
168 |
|
|
|
169 |
28824
|
claudio.at
|
public void setClusterName(final String clusterName) {
|
170 |
27525
|
claudio.at
|
this.clusterName = clusterName;
|
171 |
|
|
}
|
172 |
|
|
|
173 |
|
|
public String getClusterParam() {
|
174 |
|
|
return clusterParam;
|
175 |
|
|
}
|
176 |
|
|
|
177 |
28824
|
claudio.at
|
public void setClusterParam(final String clusterParam) {
|
178 |
27525
|
claudio.at
|
this.clusterParam = clusterParam;
|
179 |
|
|
}
|
180 |
|
|
|
181 |
|
|
public String getOozieWfAppPathParam() {
|
182 |
|
|
return oozieWfAppPathParam;
|
183 |
|
|
}
|
184 |
|
|
|
185 |
28824
|
claudio.at
|
public void setOozieWfAppPathParam(final String oozieWfAppPathParam) {
|
186 |
27525
|
claudio.at
|
this.oozieWfAppPathParam = oozieWfAppPathParam;
|
187 |
|
|
}
|
188 |
|
|
|
189 |
|
|
public String getOozieWfAppPath() {
|
190 |
|
|
return oozieWfAppPath;
|
191 |
|
|
}
|
192 |
|
|
|
193 |
28824
|
claudio.at
|
public void setOozieWfAppPath(final String oozieWfAppPath) {
|
194 |
27525
|
claudio.at
|
this.oozieWfAppPath = oozieWfAppPath;
|
195 |
|
|
}
|
196 |
|
|
|
197 |
27568
|
claudio.at
|
@Required
|
198 |
|
|
public String getXqueryDatasetStore() {
|
199 |
|
|
return xqueryDatasetStore;
|
200 |
|
|
}
|
201 |
|
|
|
202 |
28824
|
claudio.at
|
public void setXqueryDatasetStore(final String xqueryDatasetStore) {
|
203 |
27568
|
claudio.at
|
this.xqueryDatasetStore = xqueryDatasetStore;
|
204 |
|
|
}
|
205 |
|
|
|
206 |
|
|
public String getMdStoreDatasetParam() {
|
207 |
|
|
return mdStoreDatasetParam;
|
208 |
|
|
}
|
209 |
|
|
|
210 |
28824
|
claudio.at
|
public void setMdStoreDatasetParam(final String mdStoreDatasetParam) {
|
211 |
27568
|
claudio.at
|
this.mdStoreDatasetParam = mdStoreDatasetParam;
|
212 |
|
|
}
|
213 |
|
|
|
214 |
|
|
public String getXqueryObjectStoreService() {
|
215 |
|
|
return xqueryObjectStoreService;
|
216 |
|
|
}
|
217 |
|
|
|
218 |
|
|
@Required
|
219 |
28824
|
claudio.at
|
public void setXqueryObjectStoreService(final String xqueryObjectStoreService) {
|
220 |
27568
|
claudio.at
|
this.xqueryObjectStoreService = xqueryObjectStoreService;
|
221 |
|
|
}
|
222 |
|
|
|
223 |
|
|
public String getObjectStoreLocationParam() {
|
224 |
|
|
return objectStoreLocationParam;
|
225 |
|
|
}
|
226 |
|
|
|
227 |
28824
|
claudio.at
|
public void setObjectStoreLocationParam(final String objectStoreLocationParam) {
|
228 |
27568
|
claudio.at
|
this.objectStoreLocationParam = objectStoreLocationParam;
|
229 |
|
|
}
|
230 |
|
|
|
231 |
31423
|
claudio.at
|
public String getObjectStoreBlacklistCSV() {
|
232 |
|
|
return objectStoreBlacklistCSV;
|
233 |
|
|
}
|
234 |
|
|
|
235 |
|
|
public void setObjectStoreBlacklistCSV(final String objectStoreBlacklistCSV) {
|
236 |
|
|
this.objectStoreBlacklistCSV = objectStoreBlacklistCSV;
|
237 |
|
|
}
|
238 |
|
|
|
239 |
33390
|
claudio.at
|
public String getXqueryIsLookupService() {
|
240 |
|
|
return xqueryIsLookupService;
|
241 |
|
|
}
|
242 |
|
|
|
243 |
|
|
@Required
|
244 |
|
|
public void setXqueryIsLookupService(final String xqueryIsLookupService) {
|
245 |
|
|
this.xqueryIsLookupService = xqueryIsLookupService;
|
246 |
|
|
}
|
247 |
|
|
|
248 |
|
|
public String getIslookupLocationParam() {
|
249 |
|
|
return islookupLocationParam;
|
250 |
|
|
}
|
251 |
|
|
|
252 |
|
|
public void setIslookupLocationParam(final String islookupLocationParam) {
|
253 |
|
|
this.islookupLocationParam = islookupLocationParam;
|
254 |
|
|
}
|
255 |
|
|
|
256 |
48139
|
alessia.ba
|
public String getImportProjectConceptsContextCSVParam() {
|
257 |
|
|
return importProjectConceptsContextCSVParam;
|
258 |
33390
|
claudio.at
|
}
|
259 |
|
|
|
260 |
48139
|
alessia.ba
|
public void setImportProjectConceptsContextCSVParam(final String importProjectConceptsContextCSVParam) {
|
261 |
|
|
this.importProjectConceptsContextCSVParam = importProjectConceptsContextCSVParam;
|
262 |
33390
|
claudio.at
|
}
|
263 |
|
|
|
264 |
48139
|
alessia.ba
|
public String getImportProjectConceptsContextCSV() {
|
265 |
|
|
return importProjectConceptsContextCSV;
|
266 |
33390
|
claudio.at
|
}
|
267 |
|
|
|
268 |
48139
|
alessia.ba
|
public void setImportProjectConceptsContextCSV(final String importProjectConceptsContextCSV) {
|
269 |
|
|
this.importProjectConceptsContextCSV = importProjectConceptsContextCSV;
|
270 |
33390
|
claudio.at
|
}
|
271 |
27525
|
claudio.at
|
}
|