Revision 57634
Added by Claudio Atzori about 5 years ago
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/broker/enrich/EnrichmentMapper.java | ||
---|---|---|
53 | 53 |
emit(context, outKey, body.toByteArray(), type); |
54 | 54 |
} |
55 | 55 |
|
56 |
private byte[] getEmitKey(final Context context, final ImmutableBytesWritable key, final Map<byte[], byte[]> mergedIn) { |
|
57 |
if (MapUtils.isNotEmpty(mergedIn)) { |
|
58 |
context.getCounter(Type.result.name(), RelName.isMergedIn.name()).increment(1); |
|
59 |
|
|
60 |
//TODO a duplicate should be merged into only one representative object, but we noticed some cases where |
|
61 |
// a duplicate is merged in two different representative objects |
|
62 |
return mergedIn.keySet().iterator().next(); |
|
63 |
} else { |
|
64 |
return key.copyBytes(); |
|
65 |
} |
|
66 |
} |
|
67 |
|
|
68 | 56 |
} |
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/broker/enrich/AbstractEnrichmentMapper.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.data.mapreduce.hbase.broker.enrich; |
2 | 2 |
|
3 |
import eu.dnetlib.data.proto.DedupProtos; |
|
4 |
import eu.dnetlib.data.proto.TypeProtos; |
|
5 |
import org.apache.commons.collections.MapUtils; |
|
3 | 6 |
import org.apache.hadoop.hbase.io.ImmutableBytesWritable; |
4 | 7 |
import org.apache.hadoop.hbase.mapreduce.TableMapper; |
5 | 8 |
|
9 |
import java.util.Map; |
|
10 |
|
|
6 | 11 |
/** |
7 | 12 |
* Created by claudio on 21/02/2017. |
8 | 13 |
*/ |
... | ... | |
31 | 36 |
} |
32 | 37 |
} |
33 | 38 |
|
39 |
protected byte[] getEmitKey(final Context context, final ImmutableBytesWritable key, final Map<byte[], byte[]> mergedIn) { |
|
40 |
if (MapUtils.isNotEmpty(mergedIn)) { |
|
41 |
context.getCounter(TypeProtos.Type.result.name(), DedupProtos.Dedup.RelName.isMergedIn.name()).increment(1); |
|
42 |
|
|
43 |
//TODO a duplicate should be merged into only one representative object, but we noticed some cases where |
|
44 |
// a duplicate is merged in two different representative objects |
|
45 |
return mergedIn.keySet().iterator().next(); |
|
46 |
} else { |
|
47 |
return key.copyBytes(); |
|
48 |
} |
|
49 |
} |
|
50 |
|
|
34 | 51 |
} |
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/broker/enrich/ProjectEnrichmentMapper.java | ||
---|---|---|
63 | 63 |
} else if (DedupUtils.isRoot(key)) { |
64 | 64 |
emit(context, key.copyBytes(), oafBuilder.build().toByteArray(), "result merges"); |
65 | 65 |
} else { |
66 |
emit(context, getRootId(mergedIn), oafBuilder.build().toByteArray(), "result mergedIn");
|
|
66 |
emit(context, getEmitKey(context, key, mergedIn), oafBuilder.build().toByteArray(), "result mergedIn");
|
|
67 | 67 |
} |
68 | 68 |
|
69 | 69 |
break; |
... | ... | |
111 | 111 |
return OafRowKeyDecoder.decode(key.copyBytes()).getType(); |
112 | 112 |
} |
113 | 113 |
|
114 |
private byte[] getRootId(final Map<byte[], byte[]> mergedIn) { |
|
115 |
return Iterables.getOnlyElement(mergedIn.keySet()); |
|
116 |
} |
|
117 |
|
|
118 | 114 |
} |
Also available in: Unified diff
[broker] factored out method to obtain the key to be emitted by the enrichment map phase