Revision 37979
Added by Marek Horst over 9 years ago
modules/icm-iis-import/trunk/src/main/java/eu/dnetlib/iis/importer/dataset/DataciteMDStoreImporter.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.iis.importer.dataset; |
2 | 2 |
|
3 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.*; |
|
4 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_DATACITE_MDSTORE_PAGESIZE; |
|
5 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT; |
|
6 |
|
|
3 | 7 |
import java.io.StringReader; |
4 | 8 |
import java.security.InvalidParameterException; |
5 | 9 |
import java.util.Collections; |
... | ... | |
30 | 34 |
import eu.dnetlib.iis.core.java.porttype.PortType; |
31 | 35 |
import eu.dnetlib.iis.importer.schemas.DataSetReference; |
32 | 36 |
import eu.dnetlib.iis.importer.schemas.DocumentToMDStore; |
33 |
|
|
34 | 37 |
/** |
35 | 38 |
* Process module importing dataset identifiers from datacite xml dump |
36 | 39 |
* and writing output to avro datastore. |
... | ... | |
71 | 74 |
public void run(PortBindings portBindings, Configuration conf, |
72 | 75 |
Map<String, String> parameters) throws Exception { |
73 | 76 |
FileSystem fs = FileSystem.get(conf); |
74 |
if (!parameters.containsKey(WorkflowRuntimeParameters.IMPORT_DATACITE_MDSTORE_SERVICE_LOCATION)) {
|
|
77 |
if (!parameters.containsKey(IMPORT_DATACITE_MDSTORE_SERVICE_LOCATION)) { |
|
75 | 78 |
throw new InvalidParameterException("unknown MDStore service location, " |
76 |
+ "required parameter '" + WorkflowRuntimeParameters.IMPORT_DATACITE_MDSTORE_SERVICE_LOCATION + "' is missing!");
|
|
79 |
+ "required parameter '" + IMPORT_DATACITE_MDSTORE_SERVICE_LOCATION + "' is missing!"); |
|
77 | 80 |
} |
78 |
if (!parameters.containsKey(WorkflowRuntimeParameters.IMPORT_DATACITE_MDSTORE_IDS_CSV)) {
|
|
81 |
if (!parameters.containsKey(IMPORT_DATACITE_MDSTORE_IDS_CSV)) { |
|
79 | 82 |
throw new InvalidParameterException("unknown MDStore identifier, " |
80 |
+ "required parameter '" + WorkflowRuntimeParameters.IMPORT_DATACITE_MDSTORE_IDS_CSV + "' is missing!");
|
|
83 |
+ "required parameter '" + IMPORT_DATACITE_MDSTORE_IDS_CSV + "' is missing!"); |
|
81 | 84 |
} |
82 | 85 |
|
83 | 86 |
// setting result set client read timeout |
84 | 87 |
Long rsClientReadTimeout = null; |
85 |
if (parameters.containsKey(WorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT)) {
|
|
88 |
if (parameters.containsKey(IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT)) { |
|
86 | 89 |
rsClientReadTimeout = Long.valueOf( |
87 |
parameters.get(WorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT));
|
|
90 |
parameters.get(IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT)); |
|
88 | 91 |
} |
89 | 92 |
|
90 | 93 |
DataFileWriter<DataSetReference> datasetRefWriter = null; |
... | ... | |
100 | 103 |
|
101 | 104 |
// initializing MDStore reader |
102 | 105 |
W3CEndpointReferenceBuilder eprBuilder = new W3CEndpointReferenceBuilder(); |
103 |
eprBuilder.address(parameters.get(WorkflowRuntimeParameters.IMPORT_DATACITE_MDSTORE_SERVICE_LOCATION));
|
|
106 |
eprBuilder.address(parameters.get(IMPORT_DATACITE_MDSTORE_SERVICE_LOCATION)); |
|
104 | 107 |
eprBuilder.build(); |
105 | 108 |
MDStoreService mdStore = new JaxwsServiceResolverImpl().getService( |
106 | 109 |
MDStoreService.class, eprBuilder.build()); |
107 |
String mdStoresCSV = parameters.get(WorkflowRuntimeParameters.IMPORT_DATACITE_MDSTORE_IDS_CSV);
|
|
110 |
String mdStoresCSV = parameters.get(IMPORT_DATACITE_MDSTORE_IDS_CSV); |
|
108 | 111 |
if (mdStoresCSV!=null && !mdStoresCSV.isEmpty() && |
109 | 112 |
!WorkflowRuntimeParameters.UNDEFINED_NONEMPTY_VALUE.equals(mdStoresCSV)) { |
110 | 113 |
String[] mdStoreIds = StringUtils.split(mdStoresCSV, |
... | ... | |
120 | 123 |
rsFactory.setTimeout(rsClientReadTimeout); |
121 | 124 |
} |
122 | 125 |
rsFactory.setServiceResolver(new JaxwsServiceResolverImpl()); |
123 |
rsFactory.setPageSize(parameters.containsKey(WorkflowRuntimeParameters.IMPORT_DATACITE_MDSTORE_PAGESIZE)?
|
|
124 |
Integer.valueOf(parameters.get(WorkflowRuntimeParameters.IMPORT_DATACITE_MDSTORE_PAGESIZE)):
|
|
126 |
rsFactory.setPageSize(parameters.containsKey(IMPORT_DATACITE_MDSTORE_PAGESIZE)? |
|
127 |
Integer.valueOf(parameters.get(IMPORT_DATACITE_MDSTORE_PAGESIZE)): |
|
125 | 128 |
defaultPagesize); |
126 | 129 |
SAXParserFactory parserFactory = SAXParserFactory.newInstance(); |
127 | 130 |
SAXParser saxParser = parserFactory.newSAXParser(); |
modules/icm-iis-import/trunk/src/main/java/eu/dnetlib/iis/importer/concept/ISLookupServiceBasedConceptImporter.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.iis.importer.concept; |
2 | 2 |
|
3 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_ISLOOKUP_SERVICE_LOCATION; |
|
4 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT; |
|
5 |
|
|
3 | 6 |
import java.io.StringReader; |
4 | 7 |
import java.security.InvalidParameterException; |
5 | 8 |
import java.util.Collections; |
... | ... | |
22 | 25 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
23 | 26 |
import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory; |
24 | 27 |
import eu.dnetlib.enabling.tools.JaxwsServiceResolverImpl; |
25 |
import eu.dnetlib.iis.common.WorkflowRuntimeParameters; |
|
26 | 28 |
import eu.dnetlib.iis.core.java.PortBindings; |
27 | 29 |
import eu.dnetlib.iis.core.java.Process; |
28 | 30 |
import eu.dnetlib.iis.core.java.io.DataStore; |
... | ... | |
69 | 71 |
public void run(PortBindings portBindings, Configuration conf, |
70 | 72 |
Map<String, String> parameters) throws Exception { |
71 | 73 |
FileSystem fs = FileSystem.get(conf); |
72 |
if (!parameters.containsKey(WorkflowRuntimeParameters.IMPORT_ISLOOKUP_SERVICE_LOCATION)) {
|
|
74 |
if (!parameters.containsKey(IMPORT_ISLOOKUP_SERVICE_LOCATION)) { |
|
73 | 75 |
throw new InvalidParameterException("unknown ISLookup service location, " |
74 |
+ "required parameter '" + WorkflowRuntimeParameters.IMPORT_ISLOOKUP_SERVICE_LOCATION + "' is missing!");
|
|
76 |
+ "required parameter '" + IMPORT_ISLOOKUP_SERVICE_LOCATION + "' is missing!"); |
|
75 | 77 |
} |
76 | 78 |
String contextIdsCSV; |
77 | 79 |
if (parameters.containsKey(PARAM_IMPORT_CONTEXT_IDS_CSV)) { |
... | ... | |
82 | 84 |
|
83 | 85 |
// setting result set client read timeout |
84 | 86 |
Long rsClientReadTimeout = null; |
85 |
if (parameters.containsKey(WorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT)) {
|
|
87 |
if (parameters.containsKey(IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT)) { |
|
86 | 88 |
rsClientReadTimeout = Long.valueOf( |
87 |
parameters.get(WorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT));
|
|
89 |
parameters.get(IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT)); |
|
88 | 90 |
} |
89 | 91 |
|
90 | 92 |
W3CEndpointReferenceBuilder eprBuilder = new W3CEndpointReferenceBuilder(); |
91 |
String isLookupServiceLocation = parameters.get(WorkflowRuntimeParameters.IMPORT_ISLOOKUP_SERVICE_LOCATION);
|
|
93 |
String isLookupServiceLocation = parameters.get(IMPORT_ISLOOKUP_SERVICE_LOCATION); |
|
92 | 94 |
eprBuilder.address(isLookupServiceLocation); |
93 | 95 |
eprBuilder.build(); |
94 | 96 |
ISLookUpService isLookupService = new JaxwsServiceResolverImpl().getService( |
modules/icm-iis-import/trunk/src/main/java/eu/dnetlib/iis/importer/database/project/DatabaseServiceBasedProjectImporter.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.iis.importer.database.project; |
2 | 2 |
|
3 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_DATABASE_SERVICE_DBNAME; |
|
4 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_DATABASE_SERVICE_LOCATION; |
|
5 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT; |
|
6 |
|
|
3 | 7 |
import java.io.StringReader; |
4 | 8 |
import java.io.StringWriter; |
5 | 9 |
import java.security.InvalidParameterException; |
... | ... | |
22 | 26 |
import eu.dnetlib.enabling.database.rmi.DatabaseService; |
23 | 27 |
import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory; |
24 | 28 |
import eu.dnetlib.enabling.tools.JaxwsServiceResolverImpl; |
25 |
import eu.dnetlib.iis.common.WorkflowRuntimeParameters; |
|
26 | 29 |
import eu.dnetlib.iis.core.java.PortBindings; |
27 | 30 |
import eu.dnetlib.iis.core.java.Process; |
28 | 31 |
import eu.dnetlib.iis.core.java.io.DataStore; |
... | ... | |
68 | 71 |
public void run(PortBindings portBindings, Configuration conf, |
69 | 72 |
Map<String, String> parameters) throws Exception { |
70 | 73 |
FileSystem fs = FileSystem.get(conf); |
71 |
if (!parameters.containsKey(WorkflowRuntimeParameters.IMPORT_DATABASE_SERVICE_LOCATION)) {
|
|
74 |
if (!parameters.containsKey(IMPORT_DATABASE_SERVICE_LOCATION)) { |
|
72 | 75 |
throw new InvalidParameterException("unknown database service location, " |
73 |
+ "required parameter '" + WorkflowRuntimeParameters.IMPORT_DATABASE_SERVICE_LOCATION + "' is missing!");
|
|
76 |
+ "required parameter '" + IMPORT_DATABASE_SERVICE_LOCATION + "' is missing!"); |
|
74 | 77 |
} |
75 |
if (!parameters.containsKey(WorkflowRuntimeParameters.IMPORT_DATABASE_SERVICE_DBNAME)) {
|
|
78 |
if (!parameters.containsKey(IMPORT_DATABASE_SERVICE_DBNAME)) { |
|
76 | 79 |
throw new InvalidParameterException("unknown database holding projects name, " |
77 |
+ "required parameter '" + WorkflowRuntimeParameters.IMPORT_DATABASE_SERVICE_DBNAME + "' is missing!");
|
|
80 |
+ "required parameter '" + IMPORT_DATABASE_SERVICE_DBNAME + "' is missing!"); |
|
78 | 81 |
} |
79 | 82 |
|
80 | 83 |
// setting result set client read timeout |
81 | 84 |
Long rsClientReadTimeout = null; |
82 |
if (parameters.containsKey(WorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT)) {
|
|
85 |
if (parameters.containsKey(IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT)) { |
|
83 | 86 |
rsClientReadTimeout = Long.valueOf( |
84 |
parameters.get(WorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT));
|
|
87 |
parameters.get(IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT)); |
|
85 | 88 |
} |
86 | 89 |
|
87 | 90 |
DataFileWriter<Project> projectWriter = null; |
... | ... | |
92 | 95 |
Project.SCHEMA$); |
93 | 96 |
// initializing MDStore reader |
94 | 97 |
W3CEndpointReferenceBuilder eprBuilder = new W3CEndpointReferenceBuilder(); |
95 |
eprBuilder.address(parameters.get(WorkflowRuntimeParameters.IMPORT_DATABASE_SERVICE_LOCATION));
|
|
98 |
eprBuilder.address(parameters.get(IMPORT_DATABASE_SERVICE_LOCATION)); |
|
96 | 99 |
eprBuilder.build(); |
97 | 100 |
DatabaseService databaseService = new JaxwsServiceResolverImpl().getService( |
98 | 101 |
DatabaseService.class, eprBuilder.build()); |
... | ... | |
106 | 109 |
writer, "utf-8"); |
107 | 110 |
|
108 | 111 |
W3CEndpointReference eprResult = databaseService.searchSQL( |
109 |
parameters.get(WorkflowRuntimeParameters.IMPORT_DATABASE_SERVICE_DBNAME),
|
|
112 |
parameters.get(IMPORT_DATABASE_SERVICE_DBNAME), |
|
110 | 113 |
writer.toString()); |
111 | 114 |
log.warn("obtained ResultSet EPR: " + eprResult.toString()); |
112 | 115 |
|
modules/icm-iis-import/trunk/src/main/java/eu/dnetlib/iis/importer/vocabulary/ISLookupServiceBasedVocabularyImporter.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.iis.importer.vocabulary; |
2 | 2 |
|
3 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_ISLOOKUP_SERVICE_LOCATION; |
|
4 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_VOCABULARY_CODE; |
|
5 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_VOCABULARY_OUTPUT_FILENAME; |
|
6 |
|
|
3 | 7 |
import java.io.StringReader; |
4 | 8 |
import java.security.InvalidParameterException; |
5 | 9 |
import java.util.Collections; |
... | ... | |
20 | 24 |
|
21 | 25 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
22 | 26 |
import eu.dnetlib.enabling.tools.JaxwsServiceResolverImpl; |
23 |
import eu.dnetlib.iis.common.WorkflowRuntimeParameters; |
|
24 | 27 |
import eu.dnetlib.iis.core.java.PortBindings; |
25 | 28 |
import eu.dnetlib.iis.core.java.Process; |
26 | 29 |
import eu.dnetlib.iis.core.java.porttype.AnyPortType; |
... | ... | |
57 | 60 |
public void run(PortBindings portBindings, Configuration conf, |
58 | 61 |
Map<String, String> parameters) throws Exception { |
59 | 62 |
FileSystem fs = FileSystem.get(conf); |
60 |
if (!parameters.containsKey(WorkflowRuntimeParameters.IMPORT_ISLOOKUP_SERVICE_LOCATION)) {
|
|
63 |
if (!parameters.containsKey(IMPORT_ISLOOKUP_SERVICE_LOCATION)) { |
|
61 | 64 |
throw new InvalidParameterException("unknown ISLookup service location, " |
62 |
+ "required parameter '" + WorkflowRuntimeParameters.IMPORT_ISLOOKUP_SERVICE_LOCATION + "' is missing!");
|
|
65 |
+ "required parameter '" + IMPORT_ISLOOKUP_SERVICE_LOCATION + "' is missing!"); |
|
63 | 66 |
} |
64 |
if (!parameters.containsKey(WorkflowRuntimeParameters.IMPORT_VOCABULARY_OUTPUT_FILENAME)) {
|
|
67 |
if (!parameters.containsKey(IMPORT_VOCABULARY_OUTPUT_FILENAME)) { |
|
65 | 68 |
throw new InvalidParameterException("unknown output filename"); |
66 | 69 |
} |
67 | 70 |
String vocabularyCode = DEFAULT_VOCABULARY_CODE; |
68 |
if (parameters.containsKey(WorkflowRuntimeParameters.IMPORT_VOCABULARY_CODE)) {
|
|
69 |
vocabularyCode = parameters.get(WorkflowRuntimeParameters.IMPORT_VOCABULARY_CODE);
|
|
71 |
if (parameters.containsKey(IMPORT_VOCABULARY_CODE)) { |
|
72 |
vocabularyCode = parameters.get(IMPORT_VOCABULARY_CODE); |
|
70 | 73 |
} |
71 | 74 |
|
72 | 75 |
W3CEndpointReferenceBuilder eprBuilder = new W3CEndpointReferenceBuilder(); |
73 |
String isLookupServiceLocation = parameters.get(WorkflowRuntimeParameters.IMPORT_ISLOOKUP_SERVICE_LOCATION);
|
|
76 |
String isLookupServiceLocation = parameters.get(IMPORT_ISLOOKUP_SERVICE_LOCATION); |
|
74 | 77 |
eprBuilder.address(isLookupServiceLocation); |
75 | 78 |
eprBuilder.build(); |
76 | 79 |
ISLookUpService isLookupService = new JaxwsServiceResolverImpl().getService( |
... | ... | |
93 | 96 |
FSDataOutputStream outputStream = fs.create( |
94 | 97 |
new Path( |
95 | 98 |
portBindings.getOutput().get(PORT_OUT_VOCABULARY), |
96 |
parameters.get(WorkflowRuntimeParameters.IMPORT_VOCABULARY_OUTPUT_FILENAME)),
|
|
99 |
parameters.get(IMPORT_VOCABULARY_OUTPUT_FILENAME)), |
|
97 | 100 |
true); |
98 | 101 |
try { |
99 | 102 |
properties.store(outputStream, null); |
modules/icm-iis-import/trunk/src/main/java/eu/dnetlib/iis/importer/javamapreduce/hack/TableInputFormat.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.iis.importer.javamapreduce.hack; |
2 | 2 |
|
3 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_HBASE_REMOTE_ZOOKEEPER_CLIENTPORT; |
|
4 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_HBASE_REMOTE_ZOOKEEPER_QUORUM; |
|
5 |
|
|
3 | 6 |
import java.io.IOException; |
4 | 7 |
|
5 | 8 |
import org.apache.hadoop.conf.Configuration; |
... | ... | |
37 | 40 |
Configuration hbaseConf = HBaseConfiguration.create(sourceConfig); |
38 | 41 |
// overriding zookeeper properties when provided and not set to $UNDEFINED$ value |
39 | 42 |
String zookeeperQuorum = ProcessUtils.getParameterValue( |
40 |
WorkflowRuntimeParameters.IMPORT_HBASE_REMOTE_ZOOKEEPER_QUORUM,
|
|
43 |
IMPORT_HBASE_REMOTE_ZOOKEEPER_QUORUM, |
|
41 | 44 |
sourceConfig, null); |
42 | 45 |
if (zookeeperQuorum!=null && !zookeeperQuorum.trim().isEmpty() && |
43 | 46 |
!zookeeperQuorum.trim().equals( |
44 | 47 |
WorkflowRuntimeParameters.UNDEFINED_NONEMPTY_VALUE)) { |
45 | 48 |
hbaseConf.set(HConstants.ZOOKEEPER_QUORUM, zookeeperQuorum.trim()); |
46 | 49 |
String zookeeperClientPort = ProcessUtils.getParameterValue( |
47 |
WorkflowRuntimeParameters.IMPORT_HBASE_REMOTE_ZOOKEEPER_CLIENTPORT,
|
|
50 |
IMPORT_HBASE_REMOTE_ZOOKEEPER_CLIENTPORT, |
|
48 | 51 |
sourceConfig, null); |
49 | 52 |
if (zookeeperClientPort!=null && !zookeeperClientPort.trim().isEmpty() && |
50 | 53 |
!zookeeperClientPort.trim().equals( |
modules/icm-iis-import/trunk/src/main/java/eu/dnetlib/iis/importer/mapred/IISDataImporterMapper.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.iis.importer.mapred; |
2 | 2 |
|
3 | 3 |
import static eu.dnetlib.iis.common.WorkflowRuntimeParameters.DEFAULT_CSV_DELIMITER; |
4 |
import static eu.dnetlib.iis.common.WorkflowRuntimeParameters.HBASE_ENCODING;
|
|
5 |
import static eu.dnetlib.iis.common.WorkflowRuntimeParameters.IMPORT_APPROVED_DATASOURCES_CSV;
|
|
6 |
import static eu.dnetlib.iis.common.WorkflowRuntimeParameters.IMPORT_INFERENCE_PROVENANCE_BLACKLIST;
|
|
7 |
import static eu.dnetlib.iis.common.WorkflowRuntimeParameters.IMPORT_MERGE_BODY_WITH_UPDATES;
|
|
8 |
import static eu.dnetlib.iis.common.WorkflowRuntimeParameters.IMPORT_SKIP_DELETED_BY_INFERENCE;
|
|
9 |
import static eu.dnetlib.iis.common.WorkflowRuntimeParameters.IMPORT_TRUST_LEVEL_THRESHOLD;
|
|
4 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.HBASE_ENCODING;
|
|
5 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_APPROVED_DATASOURCES_CSV;
|
|
6 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_INFERENCE_PROVENANCE_BLACKLIST;
|
|
7 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_MERGE_BODY_WITH_UPDATES;
|
|
8 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_SKIP_DELETED_BY_INFERENCE;
|
|
9 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_TRUST_LEVEL_THRESHOLD;
|
|
10 | 10 |
|
11 | 11 |
import java.io.IOException; |
12 | 12 |
import java.io.UnsupportedEncodingException; |
modules/icm-iis-import/trunk/src/main/java/eu/dnetlib/iis/importer/content/ObjectStoreContentImporter.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.iis.importer.content; |
2 | 2 |
|
3 |
import static eu.dnetlib.iis.common.WorkflowRuntimeParameters.DEFAULT_CSV_DELIMITER; |
|
4 |
import static eu.dnetlib.iis.common.WorkflowRuntimeParameters.UNDEFINED_NONEMPTY_VALUE; |
|
5 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_APPROVED_DATASOURCES_CSV; |
|
6 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_CONTENT_APPROVED_OBJECSTORES_CSV; |
|
7 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_CONTENT_APPROVER_SIZELIMIT_MEGABYTES; |
|
8 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_CONTENT_CONNECTION_TIMEOUT; |
|
9 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_CONTENT_LOOKUP_SERVICE_LOC; |
|
10 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_CONTENT_OBJECSTORE_PAGESIZE; |
|
11 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_CONTENT_OBJECT_STORE_LOC; |
|
12 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_CONTENT_READ_TIMEOUT; |
|
13 |
|
|
3 | 14 |
import java.nio.ByteBuffer; |
4 | 15 |
import java.util.Collection; |
5 | 16 |
import java.util.Collections; |
... | ... | |
27 | 38 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
28 | 39 |
import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory; |
29 | 40 |
import eu.dnetlib.enabling.tools.JaxwsServiceResolverImpl; |
30 |
import eu.dnetlib.iis.common.WorkflowRuntimeParameters; |
|
31 | 41 |
import eu.dnetlib.iis.core.java.PortBindings; |
32 | 42 |
import eu.dnetlib.iis.core.java.Process; |
33 | 43 |
import eu.dnetlib.iis.core.java.porttype.AvroPortType; |
... | ... | |
118 | 128 |
public void run(PortBindings portBindings, Configuration conf, |
119 | 129 |
Map<String, String> parameters) throws Exception { |
120 | 130 |
int connectionTimeout = parameters.containsKey( |
121 |
WorkflowRuntimeParameters.IMPORT_CONTENT_CONNECTION_TIMEOUT)?
|
|
122 |
Integer.valueOf(parameters.get(WorkflowRuntimeParameters.IMPORT_CONTENT_CONNECTION_TIMEOUT)):
|
|
131 |
IMPORT_CONTENT_CONNECTION_TIMEOUT)? |
|
132 |
Integer.valueOf(parameters.get(IMPORT_CONTENT_CONNECTION_TIMEOUT)): |
|
123 | 133 |
conf.getInt( |
124 |
WorkflowRuntimeParameters.IMPORT_CONTENT_CONNECTION_TIMEOUT, 60000);
|
|
134 |
IMPORT_CONTENT_CONNECTION_TIMEOUT, 60000); |
|
125 | 135 |
int readTimeout = parameters.containsKey( |
126 |
WorkflowRuntimeParameters.IMPORT_CONTENT_READ_TIMEOUT)?
|
|
127 |
Integer.valueOf(parameters.get(WorkflowRuntimeParameters.IMPORT_CONTENT_READ_TIMEOUT)):
|
|
136 |
IMPORT_CONTENT_READ_TIMEOUT)? |
|
137 |
Integer.valueOf(parameters.get(IMPORT_CONTENT_READ_TIMEOUT)): |
|
128 | 138 |
conf.getInt( |
129 |
WorkflowRuntimeParameters.IMPORT_CONTENT_READ_TIMEOUT, 60000);
|
|
139 |
IMPORT_CONTENT_READ_TIMEOUT, 60000); |
|
130 | 140 |
|
131 | 141 |
ContentApprover contentApprover; |
132 | 142 |
int sizeLimitMegabytes = parameters.containsKey( |
133 |
WorkflowRuntimeParameters.IMPORT_CONTENT_APPROVER_SIZELIMIT_MEGABYTES)?
|
|
134 |
Integer.valueOf(parameters.get(WorkflowRuntimeParameters.IMPORT_CONTENT_APPROVER_SIZELIMIT_MEGABYTES)):
|
|
143 |
IMPORT_CONTENT_APPROVER_SIZELIMIT_MEGABYTES)? |
|
144 |
Integer.valueOf(parameters.get(IMPORT_CONTENT_APPROVER_SIZELIMIT_MEGABYTES)): |
|
135 | 145 |
conf.getInt( |
136 |
WorkflowRuntimeParameters.IMPORT_CONTENT_APPROVER_SIZELIMIT_MEGABYTES,-1);
|
|
146 |
IMPORT_CONTENT_APPROVER_SIZELIMIT_MEGABYTES,-1); |
|
137 | 147 |
if (sizeLimitMegabytes>0) { |
138 | 148 |
contentApprover = new ComplexContentApprover( |
139 | 149 |
new PDFHeaderBasedContentApprover(), |
... | ... | |
144 | 154 |
|
145 | 155 |
// looking for object stores |
146 | 156 |
String objectStoreLocation = parameters.containsKey( |
147 |
WorkflowRuntimeParameters.IMPORT_CONTENT_OBJECT_STORE_LOC)?
|
|
148 |
parameters.get(WorkflowRuntimeParameters.IMPORT_CONTENT_OBJECT_STORE_LOC):
|
|
157 |
IMPORT_CONTENT_OBJECT_STORE_LOC)? |
|
158 |
parameters.get(IMPORT_CONTENT_OBJECT_STORE_LOC): |
|
149 | 159 |
conf.get( |
150 |
WorkflowRuntimeParameters.IMPORT_CONTENT_OBJECT_STORE_LOC);
|
|
160 |
IMPORT_CONTENT_OBJECT_STORE_LOC); |
|
151 | 161 |
if (objectStoreLocation == null || objectStoreLocation.isEmpty()) { |
152 | 162 |
throw new RuntimeException("unknown object store service location: " |
153 | 163 |
+ "no parameter provided: '" + |
154 |
WorkflowRuntimeParameters.IMPORT_CONTENT_OBJECT_STORE_LOC + "'");
|
|
164 |
IMPORT_CONTENT_OBJECT_STORE_LOC + "'"); |
|
155 | 165 |
} |
156 | 166 |
String[] objectStoreIds = null; |
157 | 167 |
String objectStoresCSV = parameters.containsKey( |
158 |
WorkflowRuntimeParameters.IMPORT_CONTENT_APPROVED_OBJECSTORES_CSV)?
|
|
159 |
parameters.get(WorkflowRuntimeParameters.IMPORT_CONTENT_APPROVED_OBJECSTORES_CSV):
|
|
168 |
IMPORT_CONTENT_APPROVED_OBJECSTORES_CSV)? |
|
169 |
parameters.get(IMPORT_CONTENT_APPROVED_OBJECSTORES_CSV): |
|
160 | 170 |
conf.get( |
161 |
WorkflowRuntimeParameters.IMPORT_CONTENT_APPROVED_OBJECSTORES_CSV);
|
|
171 |
IMPORT_CONTENT_APPROVED_OBJECSTORES_CSV); |
|
162 | 172 |
if (objectStoresCSV!=null && !objectStoresCSV.isEmpty() && |
163 |
!WorkflowRuntimeParameters.UNDEFINED_NONEMPTY_VALUE.equals(objectStoresCSV)) {
|
|
173 |
!UNDEFINED_NONEMPTY_VALUE.equals(objectStoresCSV)) { |
|
164 | 174 |
objectStoreIds = StringUtils.split(objectStoresCSV, |
165 |
WorkflowRuntimeParameters.DEFAULT_CSV_DELIMITER);
|
|
175 |
DEFAULT_CSV_DELIMITER); |
|
166 | 176 |
} else { |
167 | 177 |
// looking for data sources |
168 | 178 |
String datasourcesCSV = parameters.containsKey( |
169 |
WorkflowRuntimeParameters.IMPORT_APPROVED_DATASOURCES_CSV)?
|
|
170 |
parameters.get(WorkflowRuntimeParameters.IMPORT_APPROVED_DATASOURCES_CSV):
|
|
179 |
IMPORT_APPROVED_DATASOURCES_CSV)? |
|
180 |
parameters.get(IMPORT_APPROVED_DATASOURCES_CSV): |
|
171 | 181 |
conf.get( |
172 |
WorkflowRuntimeParameters.IMPORT_APPROVED_DATASOURCES_CSV);
|
|
182 |
IMPORT_APPROVED_DATASOURCES_CSV); |
|
173 | 183 |
if (datasourcesCSV==null || datasourcesCSV.isEmpty() || |
174 |
WorkflowRuntimeParameters.UNDEFINED_NONEMPTY_VALUE.equals(datasourcesCSV)) {
|
|
184 |
UNDEFINED_NONEMPTY_VALUE.equals(datasourcesCSV)) { |
|
175 | 185 |
log.warn("unable to locate object stores containing contents: neither '" + |
176 |
WorkflowRuntimeParameters.IMPORT_CONTENT_APPROVED_OBJECSTORES_CSV + "' nor '" +
|
|
177 |
WorkflowRuntimeParameters.IMPORT_APPROVED_DATASOURCES_CSV + "' parameter provided! "
|
|
186 |
IMPORT_CONTENT_APPROVED_OBJECSTORES_CSV + "' nor '" + |
|
187 |
IMPORT_APPROVED_DATASOURCES_CSV + "' parameter provided! " |
|
178 | 188 |
+ "Empty content and text datastores will be created!"); |
179 | 189 |
objectStoreIds = new String[0]; |
180 | 190 |
} else { |
181 | 191 |
// finding objectstores based on datasources utilizing ISLookup service |
182 | 192 |
String lookupServiceLocation = parameters.containsKey( |
183 |
WorkflowRuntimeParameters.IMPORT_CONTENT_LOOKUP_SERVICE_LOC)?
|
|
184 |
parameters.get(WorkflowRuntimeParameters.IMPORT_CONTENT_LOOKUP_SERVICE_LOC):
|
|
193 |
IMPORT_CONTENT_LOOKUP_SERVICE_LOC)? |
|
194 |
parameters.get(IMPORT_CONTENT_LOOKUP_SERVICE_LOC): |
|
185 | 195 |
conf.get( |
186 |
WorkflowRuntimeParameters.IMPORT_CONTENT_LOOKUP_SERVICE_LOC);
|
|
196 |
IMPORT_CONTENT_LOOKUP_SERVICE_LOC); |
|
187 | 197 |
if (lookupServiceLocation == null || lookupServiceLocation.isEmpty()) { |
188 | 198 |
throw new RuntimeException("unable to get objectstore id based on datasource id, " |
189 | 199 |
+ "unknown IS Lookup service location: no parameter provided: '" + |
190 |
WorkflowRuntimeParameters.IMPORT_CONTENT_LOOKUP_SERVICE_LOC + "'");
|
|
200 |
IMPORT_CONTENT_LOOKUP_SERVICE_LOC + "'"); |
|
191 | 201 |
} |
192 | 202 |
W3CEndpointReferenceBuilder eprBuilder = new W3CEndpointReferenceBuilder(); |
193 | 203 |
eprBuilder = new W3CEndpointReferenceBuilder(); |
... | ... | |
196 | 206 |
ISLookUpService lookupService = new JaxwsServiceResolverImpl().getService( |
197 | 207 |
ISLookUpService.class, eprBuilder.build()); |
198 | 208 |
String[] datasourceIds = StringUtils.split(datasourcesCSV, |
199 |
WorkflowRuntimeParameters.DEFAULT_CSV_DELIMITER);
|
|
209 |
DEFAULT_CSV_DELIMITER); |
|
200 | 210 |
objectStoreIds = new String[datasourceIds.length]; |
201 | 211 |
for (int i=0; i<datasourceIds.length; i++) { |
202 | 212 |
objectStoreIds[i] = ObjectStoreContentProviderUtils.objectStoreIdLookup( |
... | ... | |
241 | 251 |
ResultSetClientFactory rsFactory = new ResultSetClientFactory(); |
242 | 252 |
rsFactory.setServiceResolver(new JaxwsServiceResolverImpl()); |
243 | 253 |
rsFactory.setPageSize(parameters.containsKey( |
244 |
WorkflowRuntimeParameters.IMPORT_CONTENT_OBJECSTORE_PAGESIZE)?
|
|
254 |
IMPORT_CONTENT_OBJECSTORE_PAGESIZE)? |
|
245 | 255 |
Integer.valueOf(parameters.get( |
246 |
WorkflowRuntimeParameters.IMPORT_CONTENT_OBJECSTORE_PAGESIZE)):
|
|
256 |
IMPORT_CONTENT_OBJECSTORE_PAGESIZE)): |
|
247 | 257 |
defaultPagesize); |
248 | 258 |
|
249 | 259 |
for (String record : rsFactory.getClient(objStoreResults)) { |
modules/icm-iis-import/trunk/src/main/java/eu/dnetlib/iis/importer/content/ObjectStoreDocumentContentUrlImporterProcess.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.iis.importer.content; |
2 | 2 |
|
3 |
import static eu.dnetlib.iis.common.WorkflowRuntimeParameters.DEFAULT_CSV_DELIMITER; |
|
4 |
import static eu.dnetlib.iis.common.WorkflowRuntimeParameters.UNDEFINED_NONEMPTY_VALUE; |
|
5 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_APPROVED_DATASOURCES_CSV; |
|
6 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_CONTENT_APPROVED_OBJECSTORES_CSV; |
|
7 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_CONTENT_BLACKLISTED_OBJECSTORES_CSV; |
|
8 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_CONTENT_LOOKUP_SERVICE_LOC; |
|
9 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_CONTENT_OBJECSTORE_PAGESIZE; |
|
10 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_CONTENT_OBJECT_STORE_LOC; |
|
11 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT; |
|
12 |
|
|
3 | 13 |
import java.util.Arrays; |
4 | 14 |
import java.util.Collections; |
5 | 15 |
import java.util.HashMap; |
... | ... | |
27 | 37 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
28 | 38 |
import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory; |
29 | 39 |
import eu.dnetlib.enabling.tools.JaxwsServiceResolverImpl; |
30 |
import eu.dnetlib.iis.common.WorkflowRuntimeParameters; |
|
31 | 40 |
import eu.dnetlib.iis.core.java.PortBindings; |
32 | 41 |
import eu.dnetlib.iis.core.java.Process; |
33 | 42 |
import eu.dnetlib.iis.core.java.porttype.AvroPortType; |
... | ... | |
83 | 92 |
Map<String, String> parameters) throws Exception { |
84 | 93 |
// setting result set client read timeout |
85 | 94 |
Long rsClientReadTimeout = null; |
86 |
if (parameters.containsKey(WorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT)) {
|
|
95 |
if (parameters.containsKey(IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT)) { |
|
87 | 96 |
rsClientReadTimeout = Long.valueOf( |
88 |
parameters.get(WorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT));
|
|
97 |
parameters.get(IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT)); |
|
89 | 98 |
} |
90 | 99 |
|
91 | 100 |
// looking for object stores |
92 | 101 |
String objectStoreLocation = parameters.containsKey( |
93 |
WorkflowRuntimeParameters.IMPORT_CONTENT_OBJECT_STORE_LOC)?
|
|
94 |
parameters.get(WorkflowRuntimeParameters.IMPORT_CONTENT_OBJECT_STORE_LOC):
|
|
102 |
IMPORT_CONTENT_OBJECT_STORE_LOC)? |
|
103 |
parameters.get(IMPORT_CONTENT_OBJECT_STORE_LOC): |
|
95 | 104 |
conf.get( |
96 |
WorkflowRuntimeParameters.IMPORT_CONTENT_OBJECT_STORE_LOC);
|
|
105 |
IMPORT_CONTENT_OBJECT_STORE_LOC); |
|
97 | 106 |
if (objectStoreLocation == null || objectStoreLocation.isEmpty()) { |
98 | 107 |
throw new RuntimeException("unknown object store service location: " |
99 | 108 |
+ "no parameter provided: '" + |
100 |
WorkflowRuntimeParameters.IMPORT_CONTENT_OBJECT_STORE_LOC + "'");
|
|
109 |
IMPORT_CONTENT_OBJECT_STORE_LOC + "'"); |
|
101 | 110 |
} |
102 | 111 |
|
103 | 112 |
// blacklisting objectstores |
104 | 113 |
Set<String> blacklistedObjectStoreIds = null; |
105 | 114 |
String blacklistedObjectStoresCSV = parameters.containsKey( |
106 |
WorkflowRuntimeParameters.IMPORT_CONTENT_BLACKLISTED_OBJECSTORES_CSV)?
|
|
107 |
parameters.get(WorkflowRuntimeParameters.IMPORT_CONTENT_BLACKLISTED_OBJECSTORES_CSV):
|
|
115 |
IMPORT_CONTENT_BLACKLISTED_OBJECSTORES_CSV)? |
|
116 |
parameters.get(IMPORT_CONTENT_BLACKLISTED_OBJECSTORES_CSV): |
|
108 | 117 |
conf.get( |
109 |
WorkflowRuntimeParameters.IMPORT_CONTENT_BLACKLISTED_OBJECSTORES_CSV);
|
|
118 |
IMPORT_CONTENT_BLACKLISTED_OBJECSTORES_CSV); |
|
110 | 119 |
if (blacklistedObjectStoresCSV!=null && !blacklistedObjectStoresCSV.isEmpty() && |
111 |
!WorkflowRuntimeParameters.UNDEFINED_NONEMPTY_VALUE.equals(blacklistedObjectStoresCSV)) {
|
|
120 |
!UNDEFINED_NONEMPTY_VALUE.equals(blacklistedObjectStoresCSV)) { |
|
112 | 121 |
blacklistedObjectStoreIds = new HashSet<String>(Arrays.asList(StringUtils.split(blacklistedObjectStoresCSV, |
113 |
WorkflowRuntimeParameters.DEFAULT_CSV_DELIMITER)));
|
|
122 |
DEFAULT_CSV_DELIMITER))); |
|
114 | 123 |
} else { |
115 | 124 |
blacklistedObjectStoreIds = Collections.emptySet(); |
116 | 125 |
} |
... | ... | |
118 | 127 |
// |
119 | 128 |
String[] objectStoreIds = null; |
120 | 129 |
String objectStoresCSV = parameters.containsKey( |
121 |
WorkflowRuntimeParameters.IMPORT_CONTENT_APPROVED_OBJECSTORES_CSV)?
|
|
122 |
parameters.get(WorkflowRuntimeParameters.IMPORT_CONTENT_APPROVED_OBJECSTORES_CSV):
|
|
130 |
IMPORT_CONTENT_APPROVED_OBJECSTORES_CSV)? |
|
131 |
parameters.get(IMPORT_CONTENT_APPROVED_OBJECSTORES_CSV): |
|
123 | 132 |
conf.get( |
124 |
WorkflowRuntimeParameters.IMPORT_CONTENT_APPROVED_OBJECSTORES_CSV);
|
|
133 |
IMPORT_CONTENT_APPROVED_OBJECSTORES_CSV); |
|
125 | 134 |
if (objectStoresCSV!=null && !objectStoresCSV.isEmpty() && |
126 |
!WorkflowRuntimeParameters.UNDEFINED_NONEMPTY_VALUE.equals(objectStoresCSV)) {
|
|
135 |
!UNDEFINED_NONEMPTY_VALUE.equals(objectStoresCSV)) { |
|
127 | 136 |
objectStoreIds = StringUtils.split(objectStoresCSV, |
128 |
WorkflowRuntimeParameters.DEFAULT_CSV_DELIMITER);
|
|
137 |
DEFAULT_CSV_DELIMITER); |
|
129 | 138 |
} else { |
130 | 139 |
// looking for data sources |
131 | 140 |
String datasourcesCSV = parameters.containsKey( |
132 |
WorkflowRuntimeParameters.IMPORT_APPROVED_DATASOURCES_CSV)?
|
|
133 |
parameters.get(WorkflowRuntimeParameters.IMPORT_APPROVED_DATASOURCES_CSV):
|
|
141 |
IMPORT_APPROVED_DATASOURCES_CSV)? |
|
142 |
parameters.get(IMPORT_APPROVED_DATASOURCES_CSV): |
|
134 | 143 |
conf.get( |
135 |
WorkflowRuntimeParameters.IMPORT_APPROVED_DATASOURCES_CSV);
|
|
144 |
IMPORT_APPROVED_DATASOURCES_CSV); |
|
136 | 145 |
if (datasourcesCSV==null || datasourcesCSV.isEmpty() || |
137 |
WorkflowRuntimeParameters.UNDEFINED_NONEMPTY_VALUE.equals(datasourcesCSV)) {
|
|
146 |
UNDEFINED_NONEMPTY_VALUE.equals(datasourcesCSV)) { |
|
138 | 147 |
log.warn("unable to locate object stores containing contents: neither '" + |
139 |
WorkflowRuntimeParameters.IMPORT_CONTENT_APPROVED_OBJECSTORES_CSV + "' nor '" +
|
|
140 |
WorkflowRuntimeParameters.IMPORT_APPROVED_DATASOURCES_CSV + "' parameter provided! "
|
|
148 |
IMPORT_CONTENT_APPROVED_OBJECSTORES_CSV + "' nor '" + |
|
149 |
IMPORT_APPROVED_DATASOURCES_CSV + "' parameter provided! " |
|
141 | 150 |
+ "Empty content and text datastores will be created!"); |
142 | 151 |
objectStoreIds = new String[0]; |
143 | 152 |
} else { |
144 | 153 |
// finding objectstores based on datasources utilizing ISLookup service |
145 | 154 |
String lookupServiceLocation = parameters.containsKey( |
146 |
WorkflowRuntimeParameters.IMPORT_CONTENT_LOOKUP_SERVICE_LOC)?
|
|
147 |
parameters.get(WorkflowRuntimeParameters.IMPORT_CONTENT_LOOKUP_SERVICE_LOC):
|
|
155 |
IMPORT_CONTENT_LOOKUP_SERVICE_LOC)? |
|
156 |
parameters.get(IMPORT_CONTENT_LOOKUP_SERVICE_LOC): |
|
148 | 157 |
conf.get( |
149 |
WorkflowRuntimeParameters.IMPORT_CONTENT_LOOKUP_SERVICE_LOC);
|
|
158 |
IMPORT_CONTENT_LOOKUP_SERVICE_LOC); |
|
150 | 159 |
if (lookupServiceLocation == null || lookupServiceLocation.isEmpty()) { |
151 | 160 |
throw new RuntimeException("unable to get objectstore id based on datasource id, " |
152 | 161 |
+ "unknown IS Lookup service location: no parameter provided: '" + |
153 |
WorkflowRuntimeParameters.IMPORT_CONTENT_LOOKUP_SERVICE_LOC + "'");
|
|
162 |
IMPORT_CONTENT_LOOKUP_SERVICE_LOC + "'"); |
|
154 | 163 |
} |
155 | 164 |
W3CEndpointReferenceBuilder eprBuilder = new W3CEndpointReferenceBuilder(); |
156 | 165 |
eprBuilder = new W3CEndpointReferenceBuilder(); |
... | ... | |
159 | 168 |
ISLookUpService lookupService = new JaxwsServiceResolverImpl().getService( |
160 | 169 |
ISLookUpService.class, eprBuilder.build()); |
161 | 170 |
String[] datasourceIds = StringUtils.split(datasourcesCSV, |
162 |
WorkflowRuntimeParameters.DEFAULT_CSV_DELIMITER);
|
|
171 |
DEFAULT_CSV_DELIMITER); |
|
163 | 172 |
objectStoreIds = new String[datasourceIds.length]; |
164 | 173 |
for (int i=0; i<datasourceIds.length; i++) { |
165 | 174 |
objectStoreIds[i] = ObjectStoreContentProviderUtils.objectStoreIdLookup( |
... | ... | |
210 | 219 |
} |
211 | 220 |
rsFactory.setServiceResolver(new JaxwsServiceResolverImpl()); |
212 | 221 |
rsFactory.setPageSize(parameters.containsKey( |
213 |
WorkflowRuntimeParameters.IMPORT_CONTENT_OBJECSTORE_PAGESIZE)?
|
|
222 |
IMPORT_CONTENT_OBJECSTORE_PAGESIZE)? |
|
214 | 223 |
Integer.valueOf(parameters.get( |
215 |
WorkflowRuntimeParameters.IMPORT_CONTENT_OBJECSTORE_PAGESIZE)):
|
|
224 |
IMPORT_CONTENT_OBJECSTORE_PAGESIZE)): |
|
216 | 225 |
defaultPagesize); |
217 | 226 |
for (String record : rsFactory.getClient(objStoreResults)) { |
218 | 227 |
ObjectStoreFile objStoreFile = ObjectStoreFile.createObject(record); |
modules/icm-iis-import/trunk/src/main/java/eu/dnetlib/iis/importer/content/DocumentContentUrlBasedImporterMapper.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.iis.importer.content; |
2 | 2 |
|
3 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_CONTENT_APPROVER_SIZELIMIT_MEGABYTES; |
|
4 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_CONTENT_CONNECTION_TIMEOUT; |
|
5 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_CONTENT_READ_TIMEOUT; |
|
6 |
|
|
3 | 7 |
import java.io.IOException; |
4 | 8 |
import java.nio.ByteBuffer; |
5 | 9 |
|
... | ... | |
8 | 12 |
import org.apache.hadoop.mapreduce.Mapper; |
9 | 13 |
import org.apache.log4j.Logger; |
10 | 14 |
|
11 |
import eu.dnetlib.iis.common.WorkflowRuntimeParameters; |
|
12 | 15 |
import eu.dnetlib.iis.importer.auxiliary.schemas.DocumentContentUrl; |
13 | 16 |
import eu.dnetlib.iis.importer.content.appover.ComplexContentApprover; |
14 | 17 |
import eu.dnetlib.iis.importer.content.appover.ContentApprover; |
... | ... | |
46 | 49 |
InterruptedException { |
47 | 50 |
// connection and approver related parameters |
48 | 51 |
this.connectionTimeout = context.getConfiguration().getInt( |
49 |
WorkflowRuntimeParameters.IMPORT_CONTENT_CONNECTION_TIMEOUT, 60000);
|
|
52 |
IMPORT_CONTENT_CONNECTION_TIMEOUT, 60000); |
|
50 | 53 |
this.readTimeout = context.getConfiguration().getInt( |
51 |
WorkflowRuntimeParameters.IMPORT_CONTENT_READ_TIMEOUT, 60000);
|
|
54 |
IMPORT_CONTENT_READ_TIMEOUT, 60000); |
|
52 | 55 |
int sizeLimitMegabytes = context.getConfiguration().getInt( |
53 |
WorkflowRuntimeParameters.IMPORT_CONTENT_APPROVER_SIZELIMIT_MEGABYTES,-1);
|
|
56 |
IMPORT_CONTENT_APPROVER_SIZELIMIT_MEGABYTES,-1); |
|
54 | 57 |
if (sizeLimitMegabytes>0) { |
55 | 58 |
this.contentApprover = new ComplexContentApprover( |
56 | 59 |
new PDFHeaderBasedContentApprover(), |
modules/icm-iis-import/trunk/src/main/java/eu/dnetlib/iis/importer/content/DocumentTextUrlBasedImporterProcess.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.iis.importer.content; |
2 | 2 |
|
3 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_CONTENT_CONNECTION_TIMEOUT; |
|
4 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_CONTENT_READ_TIMEOUT; |
|
5 |
|
|
3 | 6 |
import java.io.IOException; |
4 | 7 |
import java.util.HashMap; |
5 | 8 |
import java.util.Map; |
... | ... | |
10 | 13 |
import org.apache.hadoop.fs.Path; |
11 | 14 |
import org.apache.log4j.Logger; |
12 | 15 |
|
13 |
import eu.dnetlib.iis.common.WorkflowRuntimeParameters; |
|
14 | 16 |
import eu.dnetlib.iis.core.java.PortBindings; |
15 | 17 |
import eu.dnetlib.iis.core.java.io.CloseableIterator; |
16 | 18 |
import eu.dnetlib.iis.core.java.io.DataStore; |
... | ... | |
75 | 77 |
Map<String, String> parameters) throws IOException{ |
76 | 78 |
|
77 | 79 |
readTimeout = parameters.containsKey( |
78 |
WorkflowRuntimeParameters.IMPORT_CONTENT_READ_TIMEOUT)?
|
|
79 |
Integer.valueOf(parameters.get(WorkflowRuntimeParameters.IMPORT_CONTENT_READ_TIMEOUT)):
|
|
80 |
IMPORT_CONTENT_READ_TIMEOUT)? |
|
81 |
Integer.valueOf(parameters.get(IMPORT_CONTENT_READ_TIMEOUT)): |
|
80 | 82 |
conf.getInt( |
81 |
WorkflowRuntimeParameters.IMPORT_CONTENT_READ_TIMEOUT, 60000);
|
|
83 |
IMPORT_CONTENT_READ_TIMEOUT, 60000); |
|
82 | 84 |
connectionTimeout = parameters.containsKey( |
83 |
WorkflowRuntimeParameters.IMPORT_CONTENT_CONNECTION_TIMEOUT)?
|
|
84 |
Integer.valueOf(parameters.get(WorkflowRuntimeParameters.IMPORT_CONTENT_CONNECTION_TIMEOUT)):
|
|
85 |
IMPORT_CONTENT_CONNECTION_TIMEOUT)? |
|
86 |
Integer.valueOf(parameters.get(IMPORT_CONTENT_CONNECTION_TIMEOUT)): |
|
85 | 87 |
conf.getInt( |
86 |
WorkflowRuntimeParameters.IMPORT_CONTENT_CONNECTION_TIMEOUT, 60000);
|
|
88 |
IMPORT_CONTENT_CONNECTION_TIMEOUT, 60000); |
|
87 | 89 |
|
88 | 90 |
Map<String, Path> input = portBindings.getInput(); |
89 | 91 |
Map<String, Path> output = portBindings.getOutput(); |
modules/icm-iis-import/trunk/src/main/java/eu/dnetlib/iis/importer/content/DocumentTextUrlBasedImporterMapper.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.iis.importer.content; |
2 | 2 |
|
3 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_CONTENT_CONNECTION_TIMEOUT; |
|
4 |
import static eu.dnetlib.iis.importer.ImportWorkflowRuntimeParameters.IMPORT_CONTENT_READ_TIMEOUT; |
|
5 |
|
|
3 | 6 |
import java.io.IOException; |
4 | 7 |
|
5 | 8 |
import org.apache.avro.mapred.AvroKey; |
... | ... | |
7 | 10 |
import org.apache.hadoop.mapreduce.Mapper; |
8 | 11 |
import org.apache.log4j.Logger; |
9 | 12 |
|
10 |
import eu.dnetlib.iis.common.WorkflowRuntimeParameters; |
|
11 | 13 |
import eu.dnetlib.iis.importer.auxiliary.schemas.DocumentContentUrl; |
12 | 14 |
import eu.dnetlib.iis.metadataextraction.schemas.DocumentText; |
13 | 15 |
|
... | ... | |
37 | 39 |
InterruptedException { |
38 | 40 |
// connection and approver related parameters |
39 | 41 |
this.connectionTimeout = context.getConfiguration().getInt( |
40 |
WorkflowRuntimeParameters.IMPORT_CONTENT_CONNECTION_TIMEOUT, 60000);
|
|
42 |
IMPORT_CONTENT_CONNECTION_TIMEOUT, 60000); |
|
41 | 43 |
this.readTimeout = context.getConfiguration().getInt( |
42 |
WorkflowRuntimeParameters.IMPORT_CONTENT_READ_TIMEOUT, 60000);
|
|
44 |
IMPORT_CONTENT_READ_TIMEOUT, 60000); |
|
43 | 45 |
} |
44 | 46 |
|
45 | 47 |
@Override |
modules/icm-iis-import/trunk/src/main/java/eu/dnetlib/iis/importer/ImportWorkflowRuntimeParameters.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.importer; |
|
2 |
|
|
3 |
/** |
|
4 |
* Import realated workflow parameters. |
|
5 |
* @author mhorst |
|
6 |
* |
|
7 |
*/ |
|
8 |
public abstract class ImportWorkflowRuntimeParameters { |
|
9 |
|
|
10 |
private ImportWorkflowRuntimeParameters() {} |
|
11 |
|
|
12 |
public static final String IMPORT_INFERENCE_PROVENANCE_BLACKLIST = "import.inference.provenance.blacklist"; |
|
13 |
public static final String IMPORT_SKIP_DELETED_BY_INFERENCE = "import.skip.deleted.by.inference"; |
|
14 |
public static final String IMPORT_TRUST_LEVEL_THRESHOLD = "import.trust.level.threshold"; |
|
15 |
public static final String IMPORT_APPROVED_DATASOURCES_CSV = "import.approved.datasources.csv"; |
|
16 |
public static final String IMPORT_MERGE_BODY_WITH_UPDATES = "import.merge.body.with.updates"; |
|
17 |
public static final String IMPORT_CONTENT_APPROVED_OBJECSTORES_CSV = "import.content.approved.objectstores.csv"; |
|
18 |
public static final String IMPORT_CONTENT_BLACKLISTED_OBJECSTORES_CSV = "import.content.blacklisted.objectstores.csv"; |
|
19 |
public static final String IMPORT_CONTENT_OBJECSTORE_PAGESIZE = "import.content.objectstore.resultset.pagesize"; |
|
20 |
|
|
21 |
public static final String IMPORT_HBASE_TABLE_NAME = "import.hbase.table.name"; |
|
22 |
public static final String IMPORT_HBASE_REMOTE_ZOOKEEPER_QUORUM = "import.hbase.remote.zookeeper.quorum"; |
|
23 |
public static final String IMPORT_HBASE_REMOTE_ZOOKEEPER_CLIENTPORT = "import.hbase.remote.zookeeper.clientport"; |
|
24 |
|
|
25 |
public static final String IMPORT_CONTENT_OBJECT_STORE_LOC = "import.content.object.store.location"; |
|
26 |
public static final String IMPORT_CONTENT_LOOKUP_SERVICE_LOC = "import.content.lookup.service.location"; |
|
27 |
public static final String IMPORT_CONTENT_OBJECT_STORE_IDS_CSV = "import.content.object.store.ids.csv"; |
|
28 |
public static final String IMPORT_CONTENT_APPROVER_SIZELIMIT_MEGABYTES = "import.content.approver.sizelimit.megabytes"; |
|
29 |
public static final String IMPORT_CONTENT_CONNECTION_TIMEOUT = "import.content.connection.timeout"; |
|
30 |
public static final String IMPORT_CONTENT_READ_TIMEOUT = "import.content.read.timeout"; |
|
31 |
|
|
32 |
public static final String IMPORT_DATACITE_MDSTORE_IDS_CSV = "import.datacite.mdstore.ids.csv"; |
|
33 |
public static final String IMPORT_DATACITE_MDSTORE_PAGESIZE = "import.datacite.mdstore.resultset.pagesize"; |
|
34 |
public static final String IMPORT_DATACITE_MDSTORE_SERVICE_LOCATION = "import.datacite.mdstore.service.location"; |
|
35 |
|
|
36 |
public static final String IMPORT_DATABASE_SERVICE_LOCATION = "import.database.service.location"; |
|
37 |
public static final String IMPORT_DATABASE_SERVICE_DBNAME = "import.database.service.dbname"; |
|
38 |
|
|
39 |
public static final String IMPORT_ISLOOKUP_SERVICE_LOCATION = "import.islookup.service.location"; |
|
40 |
public static final String IMPORT_VOCABULARY_CODE = "import.vocabulary.code"; |
|
41 |
public static final String IMPORT_VOCABULARY_OUTPUT_FILENAME = "import.vocabulary.output.filename"; |
|
42 |
|
|
43 |
public static final String IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT = "import.resultset.client.read.timeout"; |
|
44 |
|
|
45 |
public static final String HBASE_ENCODING = "hbase.table.encoding"; |
|
46 |
} |
|
0 | 47 |
Also available in: Unified diff
#1395 WorkflowRuntimeParameters static fields cleanup, moving parameters to dedicated modules to prevent excessing icm-iis-common module modifications