Project

General

Profile

« Previous | Next » 

Revision 57379

pick the first mergedIn identifier

View differences:

EnrichmentMapper.java
1 1
package eu.dnetlib.data.mapreduce.hbase.broker.enrich;
2 2

  
3 3
import java.io.IOException;
4
import java.util.List;
4 5
import java.util.Map;
6
import java.util.stream.Collectors;
5 7

  
6 8
import com.google.common.collect.Iterables;
7 9
import eu.dnetlib.data.mapreduce.util.DedupUtils;
......
54 56
	private byte[] getEmitKey(final Context context, final ImmutableBytesWritable key, final Map<byte[], byte[]> mergedIn) {
55 57
		if (MapUtils.isNotEmpty(mergedIn)) {
56 58
			context.getCounter(Type.result.name(), RelName.isMergedIn.name()).increment(1);
57
			return Iterables.getOnlyElement(mergedIn.keySet());
59

  
60
			//TODO a duplicate should be merged into only one representative object, but we noticed some cases where
61
			// a duplicate is merged in two different representative objects
62
			return mergedIn.keySet().iterator().next();
58 63
		} else {
59 64
			return key.copyBytes();
60 65
		}

Also available in: Unified diff