Revision 57556
Added by Claudio Atzori over 4 years ago
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/dataimport/DOIBoostToActions.java | ||
---|---|---|
10 | 10 |
import java.util.stream.Stream; |
11 | 11 |
import java.util.zip.Inflater; |
12 | 12 |
|
13 |
import com.google.common.collect.Lists; |
|
13 | 14 |
import com.google.gson.Gson; |
14 | 15 |
import com.google.gson.JsonElement; |
15 | 16 |
import com.google.gson.JsonObject; |
... | ... | |
44 | 45 |
public static final String SEPARATOR = "::"; |
45 | 46 |
public static final String DNET_LANGUAGES = "dnet:languages"; |
46 | 47 |
|
48 |
private static final List<String> DATE_TYPES = Lists.newArrayList("issued", "accepted", "published-online", "published-print"); |
|
49 |
|
|
50 |
|
|
51 |
|
|
47 | 52 |
private static Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() {{ |
48 | 53 |
put(MAG.toLowerCase(), new Pair<>("Microsoft Academic Graph", OPENAIRE_PREFIX + SEPARATOR + "microsoft")); |
49 | 54 |
put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid")); |
... | ... | |
391 | 396 |
.setQualifier(getQualifier("main title", "dnet:dataCite_title")) |
392 | 397 |
.build())); |
393 | 398 |
|
394 |
settingRelevantDate(rootElement, metadata, "issued", "issued", true); |
|
399 |
|
|
400 |
final String firstValidDate = getFirstValidDate(rootElement); |
|
401 |
if (StringUtils.isNotBlank(firstValidDate)) { |
|
402 |
setDate(metadata, "issued", firstValidDate, true); |
|
403 |
} else { |
|
404 |
context.incrementCounter("filtered", "missing_date", 1); |
|
405 |
return null; |
|
406 |
} |
|
395 | 407 |
settingRelevantDate(rootElement, metadata, "accepted", "accepted", false); |
396 | 408 |
settingRelevantDate(rootElement, metadata, "published-online", "published-online", false); |
397 | 409 |
settingRelevantDate(rootElement, metadata, "published-print", "published-print", false); |
... | ... | |
547 | 559 |
return root.has(key) && root.get(key).isJsonArray(); |
548 | 560 |
} |
549 | 561 |
|
562 |
private static String getFirstValidDate(final JsonObject root) { |
|
563 |
return DATE_TYPES.stream() |
|
564 |
.map(type -> getStringValue(root, type)) |
|
565 |
.filter(Objects::nonNull) |
|
566 |
.filter(DumpToActionsUtility::isValidDate) |
|
567 |
.findFirst() |
|
568 |
.orElseGet(null); |
|
569 |
} |
|
570 |
|
|
571 |
private static void setDate(ResultProtos.Result.Metadata.Builder metadata, |
|
572 |
final String dictionaryKey, |
|
573 |
final String date, |
|
574 |
final boolean addToDateOfAcceptance) { |
|
575 |
if (date == null) |
|
576 |
return; |
|
577 |
if (addToDateOfAcceptance) { |
|
578 |
metadata.setDateofacceptance(FieldTypeProtos.StringField.newBuilder().setValue(date).build()); |
|
579 |
} else { |
|
580 |
metadata.addRelevantdate(FieldTypeProtos.StructuredProperty.newBuilder() |
|
581 |
.setValue(date) |
|
582 |
.setQualifier(getQualifier(dictionaryKey, "dnet:dataCite_date")) |
|
583 |
.build()); |
|
584 |
} |
|
585 |
} |
|
586 |
|
|
550 | 587 |
private static void settingRelevantDate(JsonObject rootElement, |
551 | 588 |
ResultProtos.Result.Metadata.Builder metadata, |
552 | 589 |
final String jsonKey, |
Also available in: Unified diff
DOIBOOST mapping: include dates formatted as \d{4}-\d{1,2}-\d{1,2}, discard records not providing at least one date