Project

General

Profile

« Previous | Next » 

Revision 49029

getting rid of person entities

View differences:

DedupBuildRootsMapper.java
1 1
package eu.dnetlib.data.mapreduce.hbase.dedup;
2 2

  
3 3
import java.io.IOException;
4
import java.nio.ByteBuffer;
5 4
import java.util.Collections;
6 5
import java.util.HashSet;
7 6
import java.util.Map;
8 7
import java.util.Set;
9 8

  
10 9
import com.google.common.base.Function;
10
import com.google.common.collect.Iterables;
11 11
import com.google.common.collect.Sets;
12
import org.apache.commons.logging.Log;
13
import org.apache.commons.logging.LogFactory;
14
import org.apache.hadoop.hbase.client.Result;
15
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
16
import org.apache.hadoop.hbase.mapreduce.TableMapper;
17
import org.apache.hadoop.hbase.util.Bytes;
18
import org.apache.hadoop.io.Text;
19

  
20
import com.google.common.collect.Iterables;
21 12
import com.google.protobuf.InvalidProtocolBufferException;
22

  
23 13
import eu.dnetlib.data.mapreduce.JobParams;
24 14
import eu.dnetlib.data.mapreduce.util.DedupUtils;
25 15
import eu.dnetlib.data.mapreduce.util.OafDecoder;
......
27 17
import eu.dnetlib.data.proto.KindProtos.Kind;
28 18
import eu.dnetlib.data.proto.OafProtos.Oaf;
29 19
import eu.dnetlib.data.proto.OafProtos.OafEntity;
30
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
31 20
import eu.dnetlib.data.proto.TypeProtos.Type;
32 21
import eu.dnetlib.data.transform.OafUtils;
33 22
import eu.dnetlib.pace.config.DedupConfig;
23
import org.apache.commons.logging.Log;
24
import org.apache.commons.logging.LogFactory;
25
import org.apache.hadoop.hbase.client.Result;
26
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
27
import org.apache.hadoop.hbase.mapreduce.TableMapper;
28
import org.apache.hadoop.hbase.util.Bytes;
29
import org.apache.hadoop.io.Text;
34 30

  
35 31
public class DedupBuildRootsMapper extends TableMapper<Text, ImmutableBytesWritable> {
36 32

  
......
150 146
		context.write(rootId, ibw);
151 147
	}
152 148

  
153
	private boolean checkHack(final byte[] rowkey, final byte[] rootId, final String family) {
154
		return dedupConf.getWf().getEntityType().equals(Type.result.toString()) && 	// we're deduplicating the results
155
				family.equals(RelType.personResult.toString()) && 			// we're dealing with a personResult relation
156
				!rootId.equals(DedupUtils.newIdBytes(new String(rowkey), dedupConf.getWf().getDedupRun()));
157
	}
158

  
159 149
	private boolean isRelMarkedDeleted(final Context context, final byte[] o) {
160 150
		try {
161 151
			final Oaf oaf = Oaf.parseFrom(o);

Also available in: Unified diff