Revision 49029
Added by Claudio Atzori over 6 years ago
DedupReducer.java | ||
---|---|---|
6 | 6 |
import com.google.common.collect.Lists; |
7 | 7 |
import eu.dnetlib.data.mapreduce.JobParams; |
8 | 8 |
import eu.dnetlib.data.mapreduce.util.DedupUtils; |
9 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
10 |
import eu.dnetlib.data.proto.OafProtos.OafRel; |
|
11 | 9 |
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; |
12 | 10 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
13 | 11 |
import eu.dnetlib.pace.clustering.NGramUtils; |
... | ... | |
26 | 24 |
import org.apache.hadoop.hbase.mapreduce.TableReducer; |
27 | 25 |
import org.apache.hadoop.hbase.util.Bytes; |
28 | 26 |
import org.apache.hadoop.io.Text; |
29 |
import org.apache.hadoop.mapred.JobTracker.IllegalStateException; |
|
30 | 27 |
|
31 | 28 |
public class DedupReducer extends TableReducer<Text, ImmutableBytesWritable, ImmutableBytesWritable> { |
32 | 29 |
|
... | ... | |
54 | 51 |
log.info("reducing key: '" + key + "' records: " + q.size()); |
55 | 52 |
|
56 | 53 |
switch (Type.valueOf(dedupConf.getWf().getEntityType())) { |
57 |
case person: |
|
58 |
process(q, context); |
|
59 |
break; |
|
60 | 54 |
case result: |
61 | 55 |
process(simplifyQueue(q, key.toString(), context), context); |
62 | 56 |
break; |
Also available in: Unified diff
getting rid of person entities