Revision 60380
Added by Claudio Atzori about 3 years ago
PrepareEnvCollectHadoopJobNode.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.msro.openaireplus.workflows.nodes.dhp; |
2 |
|
|
2 | 3 |
import java.text.SimpleDateFormat; |
3 | 4 |
import java.util.Date; |
4 | 5 |
import java.util.Iterator; |
... | ... | |
7 | 8 |
import java.util.stream.Collectors; |
8 | 9 |
|
9 | 10 |
import eu.dnetlib.common.logging.DnetLogger; |
10 |
import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor;
|
|
11 |
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
|
11 | 12 |
import eu.dnetlib.dhp.model.mdstore.Provenance; |
12 | 13 |
import eu.dnetlib.enabling.datasources.common.Datasource; |
13 | 14 |
import eu.dnetlib.enabling.datasources.common.DsmNotFoundException; |
... | ... | |
30 | 31 |
|
31 | 32 |
import javax.annotation.Resource; |
32 | 33 |
|
34 |
import static eu.dnetlib.dhp.common.Constants.*; |
|
35 |
|
|
33 | 36 |
public class PrepareEnvCollectHadoopJobNode extends SimpleJobNode { |
34 | 37 |
|
35 | 38 |
private static final Log log = LogFactory.getLog(PrepareEnvCollectHadoopJobNode.class); |
... | ... | |
65 | 68 |
*/ |
66 | 69 |
private String metadataEncoding = "XML"; |
67 | 70 |
|
71 |
/** |
|
72 |
* Maximum number of allowed retires before failing |
|
73 |
*/ |
|
74 |
private int maxNumberOfRetry = 5; |
|
75 |
|
|
76 |
/** |
|
77 |
* Delay between request (Milliseconds) |
|
78 |
*/ |
|
79 |
private int requestDelay = 0; |
|
80 |
|
|
81 |
/** |
|
82 |
* Time to wait after a failure before retrying (Seconds) |
|
83 |
*/ |
|
84 |
private int retryDelay = 60; |
|
85 |
|
|
86 |
/** |
|
87 |
* Connect timeout (Seconds) |
|
88 |
*/ |
|
89 |
private int connectTimeOut = 30; |
|
90 |
|
|
91 |
/** |
|
92 |
* Read timeout (Seconds) |
|
93 |
*/ |
|
94 |
private int readTimeOut = 60; |
|
95 |
|
|
68 | 96 |
@Value("${dnet.openaire.dhp.collection.app.path}") |
69 | 97 |
private String oozieWfPath; |
70 | 98 |
|
... | ... | |
98 | 126 |
|
99 | 127 |
if (opt.isPresent()) { |
100 | 128 |
token.getEnv().setAttribute("mdId", getMdId()); |
101 |
token.getEnv().setAttribute("collectionMode", getCollectionMode());
|
|
102 |
token.getEnv().setAttribute("metadataEncoding", getMetadataEncoding());
|
|
103 |
token.getEnv().setAttribute("oozieWfPath", getOozieWfPath());
|
|
104 |
token.getEnv().setAttribute("dnetMessageManagerURL", getDnetMessageManagerURL());
|
|
129 |
token.getEnv().setAttribute(COLLECTION_MODE, getCollectionMode());
|
|
130 |
token.getEnv().setAttribute(METADATA_ENCODING, getMetadataEncoding());
|
|
131 |
token.getEnv().setAttribute(OOZIE_WF_PATH, getOozieWfPath());
|
|
132 |
token.getEnv().setAttribute(DNET_MESSAGE_MGR_URL, getDnetMessageManagerURL());
|
|
105 | 133 |
|
134 |
token.getEnv().setAttribute(MAX_NUMBER_OF_RETRY, getMaxNumberOfRetry()); |
|
135 |
token.getEnv().setAttribute(REQUEST_DELAY, getRequestDelay()); |
|
136 |
token.getEnv().setAttribute(CONNECT_TIMEOUT, getConnectTimeOut()); |
|
137 |
token.getEnv().setAttribute(READ_TIMEOUT, getReadTimeOut()); |
|
138 |
|
|
106 | 139 |
final ApiDescriptor api = opt.get(); |
107 | 140 |
if ("INCREMENTAL".equals(getCollectionMode())) { |
108 | 141 |
final String fromDate = calculateFromDate(token); |
... | ... | |
229 | 262 |
this.fromDateOverride = fromDateOverride; |
230 | 263 |
} |
231 | 264 |
|
265 |
public int getMaxNumberOfRetry() { |
|
266 |
return maxNumberOfRetry; |
|
267 |
} |
|
268 |
|
|
269 |
public void setMaxNumberOfRetry(int maxNumberOfRetry) { |
|
270 |
this.maxNumberOfRetry = maxNumberOfRetry; |
|
271 |
} |
|
272 |
|
|
273 |
public int getRequestDelay() { |
|
274 |
return requestDelay; |
|
275 |
} |
|
276 |
|
|
277 |
public void setRequestDelay(int requestDelay) { |
|
278 |
this.requestDelay = requestDelay; |
|
279 |
} |
|
280 |
|
|
281 |
public int getRetryDelay() { |
|
282 |
return retryDelay; |
|
283 |
} |
|
284 |
|
|
285 |
public void setRetryDelay(int retryDelay) { |
|
286 |
this.retryDelay = retryDelay; |
|
287 |
} |
|
288 |
|
|
289 |
public int getConnectTimeOut() { |
|
290 |
return connectTimeOut; |
|
291 |
} |
|
292 |
|
|
293 |
public void setConnectTimeOut(int connectTimeOut) { |
|
294 |
this.connectTimeOut = connectTimeOut; |
|
295 |
} |
|
296 |
|
|
297 |
public int getReadTimeOut() { |
|
298 |
return readTimeOut; |
|
299 |
} |
|
300 |
|
|
301 |
public void setReadTimeOut(int readTimeOut) { |
|
302 |
this.readTimeOut = readTimeOut; |
|
303 |
} |
|
304 |
|
|
232 | 305 |
public String getDnetMessageManagerURL() { |
233 | 306 |
return dnetMessageManagerURL; |
234 | 307 |
} |
... | ... | |
236 | 309 |
public void setDnetMessageManagerURL(String dnetMessageManagerURL) { |
237 | 310 |
this.dnetMessageManagerURL = dnetMessageManagerURL; |
238 | 311 |
} |
312 |
|
|
239 | 313 |
} |
Also available in: Unified diff
updated hadoop-specific collection workflow parameters: allow to configure the http client behaviour