Project

General

Profile

« Previous | Next » 

Revision 57556

DOIBOOST mapping: include dates formatted as \d{4}-\d{1,2}-\d{1,2}, discard records not providing at least one date

View differences:

modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/dataimport/DOIBoostToActions.java
10 10
import java.util.stream.Stream;
11 11
import java.util.zip.Inflater;
12 12

  
13
import com.google.common.collect.Lists;
13 14
import com.google.gson.Gson;
14 15
import com.google.gson.JsonElement;
15 16
import com.google.gson.JsonObject;
......
44 45
	public static final String SEPARATOR = "::";
45 46
	public static final String DNET_LANGUAGES = "dnet:languages";
46 47

  
48
	private static final List<String> DATE_TYPES = Lists.newArrayList("issued", "accepted", "published-online", "published-print");
49

  
50

  
51

  
47 52
	private static Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() {{
48 53
		put(MAG.toLowerCase(), new Pair<>("Microsoft Academic Graph", OPENAIRE_PREFIX + SEPARATOR + "microsoft"));
49 54
		put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid"));
......
391 396
						.setQualifier(getQualifier("main title", "dnet:dataCite_title"))
392 397
						.build()));
393 398

  
394
		settingRelevantDate(rootElement, metadata, "issued", "issued", true);
399

  
400
		final String firstValidDate = getFirstValidDate(rootElement);
401
		if (StringUtils.isNotBlank(firstValidDate)) {
402
			setDate(metadata, "issued", firstValidDate, true);
403
		} else {
404
			context.incrementCounter("filtered", "missing_date", 1);
405
			return null;
406
		}
395 407
		settingRelevantDate(rootElement, metadata, "accepted", "accepted", false);
396 408
		settingRelevantDate(rootElement, metadata, "published-online", "published-online", false);
397 409
		settingRelevantDate(rootElement, metadata, "published-print", "published-print", false);
......
547 559
		return root.has(key) && root.get(key).isJsonArray();
548 560
	}
549 561

  
562
	private static String getFirstValidDate(final JsonObject root) {
563
		return DATE_TYPES.stream()
564
			.map(type -> getStringValue(root, type))
565
			.filter(Objects::nonNull)
566
			.filter(DumpToActionsUtility::isValidDate)
567
			.findFirst()
568
			.orElseGet(null);
569
	}
570

  
571
	private static void setDate(ResultProtos.Result.Metadata.Builder metadata,
572
											final String dictionaryKey,
573
											final String date,
574
											final boolean addToDateOfAcceptance) {
575
		if (date == null)
576
			return;
577
		if (addToDateOfAcceptance) {
578
			metadata.setDateofacceptance(FieldTypeProtos.StringField.newBuilder().setValue(date).build());
579
		} else {
580
			metadata.addRelevantdate(FieldTypeProtos.StructuredProperty.newBuilder()
581
					.setValue(date)
582
					.setQualifier(getQualifier(dictionaryKey, "dnet:dataCite_date"))
583
					.build());
584
		}
585
	}
586

  
550 587
	private static void settingRelevantDate(JsonObject rootElement,
551 588
			ResultProtos.Result.Metadata.Builder metadata,
552 589
			final String jsonKey,

Also available in: Unified diff