Project

General

Profile

« Previous | Next » 

Revision 60380

updated hadoop-specific collection workflow parameters: allow to configure the http client behaviour

View differences:

modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/dhp/PrepareEnvCollectHadoopJobNode.java
1 1
package eu.dnetlib.msro.openaireplus.workflows.nodes.dhp;
2

  
2 3
import java.text.SimpleDateFormat;
3 4
import java.util.Date;
4 5
import java.util.Iterator;
......
7 8
import java.util.stream.Collectors;
8 9

  
9 10
import eu.dnetlib.common.logging.DnetLogger;
10
import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor;
11
import eu.dnetlib.dhp.collection.ApiDescriptor;
11 12
import eu.dnetlib.dhp.model.mdstore.Provenance;
12 13
import eu.dnetlib.enabling.datasources.common.Datasource;
13 14
import eu.dnetlib.enabling.datasources.common.DsmNotFoundException;
......
30 31

  
31 32
import javax.annotation.Resource;
32 33

  
34
import static eu.dnetlib.dhp.common.Constants.*;
35

  
33 36
public class PrepareEnvCollectHadoopJobNode extends SimpleJobNode {
34 37

  
35 38
    private static final Log log = LogFactory.getLog(PrepareEnvCollectHadoopJobNode.class);
......
65 68
     */
66 69
    private String metadataEncoding = "XML";
67 70

  
71
    /**
72
     * Maximum number of allowed retires before failing
73
     */
74
    private int maxNumberOfRetry = 5;
75

  
76
    /**
77
     * Delay between request (Milliseconds)
78
     */
79
    private int requestDelay = 0;
80

  
81
    /**
82
     * Time to wait after a failure before retrying (Seconds)
83
     */
84
    private int retryDelay = 60;
85

  
86
    /**
87
     * Connect timeout (Seconds)
88
     */
89
    private int connectTimeOut = 30;
90

  
91
    /**
92
     * Read timeout (Seconds)
93
     */
94
    private int readTimeOut = 60;
95

  
68 96
    @Value("${dnet.openaire.dhp.collection.app.path}")
69 97
    private String oozieWfPath;
70 98

  
......
98 126

  
99 127
        if (opt.isPresent()) {
100 128
            token.getEnv().setAttribute("mdId", getMdId());
101
            token.getEnv().setAttribute("collectionMode", getCollectionMode());
102
            token.getEnv().setAttribute("metadataEncoding", getMetadataEncoding());
103
            token.getEnv().setAttribute("oozieWfPath", getOozieWfPath());
104
            token.getEnv().setAttribute("dnetMessageManagerURL", getDnetMessageManagerURL());
129
            token.getEnv().setAttribute(COLLECTION_MODE, getCollectionMode());
130
            token.getEnv().setAttribute(METADATA_ENCODING, getMetadataEncoding());
131
            token.getEnv().setAttribute(OOZIE_WF_PATH, getOozieWfPath());
132
            token.getEnv().setAttribute(DNET_MESSAGE_MGR_URL, getDnetMessageManagerURL());
105 133

  
134
            token.getEnv().setAttribute(MAX_NUMBER_OF_RETRY, getMaxNumberOfRetry());
135
            token.getEnv().setAttribute(REQUEST_DELAY, getRequestDelay());
136
            token.getEnv().setAttribute(CONNECT_TIMEOUT, getConnectTimeOut());
137
            token.getEnv().setAttribute(READ_TIMEOUT, getReadTimeOut());
138

  
106 139
            final ApiDescriptor api = opt.get();
107 140
            if ("INCREMENTAL".equals(getCollectionMode())) {
108 141
                final String fromDate = calculateFromDate(token);
......
229 262
        this.fromDateOverride = fromDateOverride;
230 263
    }
231 264

  
265
    public int getMaxNumberOfRetry() {
266
        return maxNumberOfRetry;
267
    }
268

  
269
    public void setMaxNumberOfRetry(int maxNumberOfRetry) {
270
        this.maxNumberOfRetry = maxNumberOfRetry;
271
    }
272

  
273
    public int getRequestDelay() {
274
        return requestDelay;
275
    }
276

  
277
    public void setRequestDelay(int requestDelay) {
278
        this.requestDelay = requestDelay;
279
    }
280

  
281
    public int getRetryDelay() {
282
        return retryDelay;
283
    }
284

  
285
    public void setRetryDelay(int retryDelay) {
286
        this.retryDelay = retryDelay;
287
    }
288

  
289
    public int getConnectTimeOut() {
290
        return connectTimeOut;
291
    }
292

  
293
    public void setConnectTimeOut(int connectTimeOut) {
294
        this.connectTimeOut = connectTimeOut;
295
    }
296

  
297
    public int getReadTimeOut() {
298
        return readTimeOut;
299
    }
300

  
301
    public void setReadTimeOut(int readTimeOut) {
302
        this.readTimeOut = readTimeOut;
303
    }
304

  
232 305
    public String getDnetMessageManagerURL() {
233 306
        return dnetMessageManagerURL;
234 307
    }
......
236 309
    public void setDnetMessageManagerURL(String dnetMessageManagerURL) {
237 310
        this.dnetMessageManagerURL = dnetMessageManagerURL;
238 311
    }
312

  
239 313
}
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/repo-hi/pubsRepositoryHadoop/collection.wf.st
26 26
        <PARAM required="true" type="string" name="mdId" managedBy="system" category="MDSTORE_ID">$params.("harv_id")$</PARAM>
27 27
        <PARAM required="true" type="string" name="collectionMode" managedBy="user" function="validValues(['REFRESH','INCREMENTAL'])"></PARAM>
28 28
        <PARAM required="true" type="string" name="metadataEncoding" managedBy="user">XML</PARAM>
29
        <PARAM managedBy="user" name="fromDateOverride" required="false" type="string"/>
29
        <PARAM managedBy="user" name="maxNumberOfRetry" required="false" type="int">5</PARAM>
30
        <PARAM managedBy="user" name="requestDelay" required="false" type="int">0</PARAM>
31
        <PARAM managedBy="user" name="retryDelay" required="false" type="int">60</PARAM>
32
        <PARAM managedBy="user" name="connectTimeOut" required="false" type="int">30</PARAM>
33
        <PARAM managedBy="user" name="readTimeOut" required="false" type="int">60</PARAM>
30 34
    </PARAMETERS>
31 35
    <ARCS>
32 36
        <ARC to="COLLECT_HADOOP"/>
......
48 52
                "workflowId":"workflowId",
49 53
                "mdStoreID":"mdId",
50 54
                "collectionMode":"collectionMode",
55
                "maxNumberOfRetry":"maxNumberOfRetry",
56
                "retryDelay":"retryDelay",
57
                "connectTimeOut":"connectTimeOut",
58
                "readTimeOut":"readTimeOut",
51 59
                "dnetMessageManagerURL":"dnetMessageManagerURL",
52 60
                "oozie.wf.application.path":"oozieWfPath"
53 61
            }

Also available in: Unified diff