Revision 52109
Added by Alessia Bardi almost 6 years ago
modules/dnet-mapreduce-jobs/branches/beta/src/main/java/eu/dnetlib/data/mapreduce/hbase/oai/OaiFeedMapper.java | ||
---|---|---|
12 | 12 |
import java.util.zip.ZipOutputStream; |
13 | 13 |
|
14 | 14 |
import com.google.common.base.Function; |
15 |
import com.google.common.base.Splitter; |
|
15 | 16 |
import com.google.common.collect.Iterables; |
16 | 17 |
import com.google.common.collect.Lists; |
17 | 18 |
import com.google.common.collect.Maps; |
... | ... | |
67 | 68 |
|
68 | 69 |
private Collection<String> enrichmentXPaths; |
69 | 70 |
|
71 |
private String[] parseDatePatterns; |
|
72 |
|
|
70 | 73 |
@Override |
71 | 74 |
protected void setup(final Context context) throws UnknownHostException { |
72 | 75 |
|
... | ... | |
74 | 77 |
String port = context.getConfiguration().get("services.publisher.oai.port"); |
75 | 78 |
String db = context.getConfiguration().get("services.publisher.oai.db"); |
76 | 79 |
String collectionName = context.getConfiguration().get("services.publisher.oai.collection"); |
80 |
String patterns = context.getConfiguration().get("services.publisher.oai.datepatterns"); |
|
81 |
this.parseDatePatterns = Splitter.on(',') |
|
82 |
.trimResults() |
|
83 |
.omitEmptyStrings().splitToList(patterns).toArray(new String[0]); |
|
77 | 84 |
|
78 | 85 |
System.out.println("Mongodb client params"); |
79 | 86 |
System.out.println("host: " + host); |
80 | 87 |
System.out.println("port: " + port); |
81 | 88 |
System.out.println("db: " + db); |
82 | 89 |
System.out.println("collection: " + collectionName); |
90 |
System.out.println("split date patterns: " + patterns); |
|
83 | 91 |
|
84 | 92 |
String[] formatLayoutInterp = collectionName.split("-"); |
85 | 93 |
format = formatLayoutInterp[0]; |
... | ... | |
190 | 198 |
try { |
191 | 199 |
return org.apache.commons.lang.time.DateUtils.parseDate( |
192 | 200 |
date, |
193 |
new String[] { "yyyy-MM-dd", "yyyy-MM-dd'T'HH:mm:ssXXX", "yyyy-MM-dd'T'HH:mm:ss.SSSX", "yyyy-MM-dd'T'HH:mm:ssZ", |
|
194 |
"yyyy-MM-dd'T'HH:mm:ss.SX" }); |
|
201 |
parseDatePatterns); |
|
195 | 202 |
}catch(Exception dateException2){ |
196 | 203 |
dateException2.printStackTrace(System.err); |
197 | 204 |
throw new RuntimeException(dateException2); |
... | ... | |
338 | 345 |
this.skipDuplicates = skipDuplicates; |
339 | 346 |
} |
340 | 347 |
|
348 |
public String[] getParseDatePatterns() { |
|
349 |
return parseDatePatterns; |
|
350 |
} |
|
351 |
|
|
352 |
public void setParseDatePatterns(final String[] parseDatePatterns) { |
|
353 |
this.parseDatePatterns = parseDatePatterns; |
|
354 |
} |
|
341 | 355 |
} |
Also available in: Unified diff
OAI M/R jobs expect a new parameter that lists the date patterns to try 'services.publisher.oai.datepatterns'