Revision 49029
Added by Claudio Atzori over 6 years ago
DedupBuildRootsMapper.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.data.mapreduce.hbase.dedup; |
2 | 2 |
|
3 | 3 |
import java.io.IOException; |
4 |
import java.nio.ByteBuffer; |
|
5 | 4 |
import java.util.Collections; |
6 | 5 |
import java.util.HashSet; |
7 | 6 |
import java.util.Map; |
8 | 7 |
import java.util.Set; |
9 | 8 |
|
10 | 9 |
import com.google.common.base.Function; |
10 |
import com.google.common.collect.Iterables; |
|
11 | 11 |
import com.google.common.collect.Sets; |
12 |
import org.apache.commons.logging.Log; |
|
13 |
import org.apache.commons.logging.LogFactory; |
|
14 |
import org.apache.hadoop.hbase.client.Result; |
|
15 |
import org.apache.hadoop.hbase.io.ImmutableBytesWritable; |
|
16 |
import org.apache.hadoop.hbase.mapreduce.TableMapper; |
|
17 |
import org.apache.hadoop.hbase.util.Bytes; |
|
18 |
import org.apache.hadoop.io.Text; |
|
19 |
|
|
20 |
import com.google.common.collect.Iterables; |
|
21 | 12 |
import com.google.protobuf.InvalidProtocolBufferException; |
22 |
|
|
23 | 13 |
import eu.dnetlib.data.mapreduce.JobParams; |
24 | 14 |
import eu.dnetlib.data.mapreduce.util.DedupUtils; |
25 | 15 |
import eu.dnetlib.data.mapreduce.util.OafDecoder; |
... | ... | |
27 | 17 |
import eu.dnetlib.data.proto.KindProtos.Kind; |
28 | 18 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
29 | 19 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
30 |
import eu.dnetlib.data.proto.RelTypeProtos.RelType; |
|
31 | 20 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
32 | 21 |
import eu.dnetlib.data.transform.OafUtils; |
33 | 22 |
import eu.dnetlib.pace.config.DedupConfig; |
23 |
import org.apache.commons.logging.Log; |
|
24 |
import org.apache.commons.logging.LogFactory; |
|
25 |
import org.apache.hadoop.hbase.client.Result; |
|
26 |
import org.apache.hadoop.hbase.io.ImmutableBytesWritable; |
|
27 |
import org.apache.hadoop.hbase.mapreduce.TableMapper; |
|
28 |
import org.apache.hadoop.hbase.util.Bytes; |
|
29 |
import org.apache.hadoop.io.Text; |
|
34 | 30 |
|
35 | 31 |
public class DedupBuildRootsMapper extends TableMapper<Text, ImmutableBytesWritable> { |
36 | 32 |
|
... | ... | |
150 | 146 |
context.write(rootId, ibw); |
151 | 147 |
} |
152 | 148 |
|
153 |
private boolean checkHack(final byte[] rowkey, final byte[] rootId, final String family) { |
|
154 |
return dedupConf.getWf().getEntityType().equals(Type.result.toString()) && // we're deduplicating the results |
|
155 |
family.equals(RelType.personResult.toString()) && // we're dealing with a personResult relation |
|
156 |
!rootId.equals(DedupUtils.newIdBytes(new String(rowkey), dedupConf.getWf().getDedupRun())); |
|
157 |
} |
|
158 |
|
|
159 | 149 |
private boolean isRelMarkedDeleted(final Context context, final byte[] o) { |
160 | 150 |
try { |
161 | 151 |
final Oaf oaf = Oaf.parseFrom(o); |
Also available in: Unified diff
getting rid of person entities