Project

General

Profile

« Previous | Next » 

Revision 57533

fixed infospace export procedure, avoid to emit the same result more than once

View differences:

modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/dataexport/ExportInformationSpaceMapper2DHP.java
9 9
import eu.dnetlib.data.proto.TypeProtos;
10 10
import eu.dnetlib.dhp.schema.oaf.Oaf;
11 11
import eu.dnetlib.dhp.schema.oaf.Relation;
12
import org.apache.commons.lang.StringUtils;
12 13
import org.apache.hadoop.hbase.client.Result;
13 14
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
14 15
import org.apache.hadoop.hbase.mapreduce.TableMapper;
......
20 21
import java.util.Map;
21 22
import java.util.Map.Entry;
22 23
import java.util.NavigableMap;
24
import java.util.stream.Collectors;
23 25

  
24 26
/**
25 27
 * Exports Oaf objects as their json serialization.
......
66 68
			}
67 69

  
68 70
			final Map<byte[], NavigableMap<byte[], byte[]>> row = value.getNoVersionMap();
69

  
71
			
70 72
			for (byte[] cf : row.keySet()) {
71 73

  
72
				for (Entry<byte[], byte[]> q : row.get(cf).entrySet()) {
73
					final String key = new String(q.getKey());
74
					if(key.startsWith("update") || DedupUtils.BODY_S.equals(key)) {
75
						continue;
74
				for (Entry<byte[], byte[]> q : value.getFamilyMap(cf).entrySet().stream().filter(e -> {
75
					final String key = new String(e.getKey());
76
					boolean skip = key.startsWith("update") || key.equals(DedupUtils.BODY_S);
77
					if (skip) {
78
						context.getCounter("export", String.format("skipped %s", StringUtils.substring(key, 0, 6))).increment(1);
76 79
					}
80
					return !skip;
81
				}).collect(Collectors.toList())) {
77 82
					if (new String(q.getValue()).equals("")) {
78
						context.getCounter("export", "skipped relation " + new String(cf)).increment(1);
83
						context.getCounter("export", "skipped " + new String(cf)).increment(1);
79 84
					} else {
80 85
						emit(context, OafProtos.Oaf.parseFrom(q.getValue()));
81 86
					}
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/dataexport/ProtoConverter.java
11 11

  
12 12
    public static Oaf convert(OafProtos.Oaf oaf) {
13 13
        try {
14
            if (oaf.getKind() == KindProtos.Kind.entity)
15
                return convertEntity(oaf);
16
            else {
17
                return convertRelation(oaf);
14
            switch (oaf.getKind()) {
15
                case entity:
16
                    return convertEntity(oaf);
17
                case relation:
18
                    return convertRelation(oaf);
19
                default:
20
                    throw new IllegalArgumentException("invalid kind " + oaf.getKind());
18 21
            }
19 22
        } catch (Throwable e) {
20 23
            throw new RuntimeException("error on getting " + JsonFormat.printToString(oaf), e);

Also available in: Unified diff