Project

General

Profile

« Previous | Next » 

Revision 57634

[broker] factored out method to obtain the key to be emitted by the enrichment map phase

View differences:

modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/broker/enrich/EnrichmentMapper.java
53 53
		emit(context, outKey, body.toByteArray(), type);
54 54
	}
55 55

  
56
	private byte[] getEmitKey(final Context context, final ImmutableBytesWritable key, final Map<byte[], byte[]> mergedIn) {
57
		if (MapUtils.isNotEmpty(mergedIn)) {
58
			context.getCounter(Type.result.name(), RelName.isMergedIn.name()).increment(1);
59

  
60
			//TODO a duplicate should be merged into only one representative object, but we noticed some cases where
61
			// a duplicate is merged in two different representative objects
62
			return mergedIn.keySet().iterator().next();
63
		} else {
64
			return key.copyBytes();
65
		}
66
	}
67

  
68 56
}
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/broker/enrich/AbstractEnrichmentMapper.java
1 1
package eu.dnetlib.data.mapreduce.hbase.broker.enrich;
2 2

  
3
import eu.dnetlib.data.proto.DedupProtos;
4
import eu.dnetlib.data.proto.TypeProtos;
5
import org.apache.commons.collections.MapUtils;
3 6
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
4 7
import org.apache.hadoop.hbase.mapreduce.TableMapper;
5 8

  
9
import java.util.Map;
10

  
6 11
/**
7 12
 * Created by claudio on 21/02/2017.
8 13
 */
......
31 36
		}
32 37
	}
33 38

  
39
	protected byte[] getEmitKey(final Context context, final ImmutableBytesWritable key, final Map<byte[], byte[]> mergedIn) {
40
		if (MapUtils.isNotEmpty(mergedIn)) {
41
			context.getCounter(TypeProtos.Type.result.name(), DedupProtos.Dedup.RelName.isMergedIn.name()).increment(1);
42

  
43
			//TODO a duplicate should be merged into only one representative object, but we noticed some cases where
44
			// a duplicate is merged in two different representative objects
45
			return mergedIn.keySet().iterator().next();
46
		} else {
47
			return key.copyBytes();
48
		}
49
	}
50

  
34 51
}
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/broker/enrich/ProjectEnrichmentMapper.java
63 63
			} else if (DedupUtils.isRoot(key)) {
64 64
				emit(context, key.copyBytes(), oafBuilder.build().toByteArray(), "result merges");
65 65
			} else {
66
				emit(context, getRootId(mergedIn), oafBuilder.build().toByteArray(), "result mergedIn");
66
				emit(context, getEmitKey(context, key, mergedIn), oafBuilder.build().toByteArray(), "result mergedIn");
67 67
			}
68 68

  
69 69
			break;
......
111 111
		return OafRowKeyDecoder.decode(key.copyBytes()).getType();
112 112
	}
113 113

  
114
	private byte[] getRootId(final Map<byte[], byte[]> mergedIn) {
115
		return Iterables.getOnlyElement(mergedIn.keySet());
116
	}
117

  
118 114
}

Also available in: Unified diff