Revision 53710
Added by Claudio Atzori over 5 years ago
InfospaceCountsReducer.java | ||
---|---|---|
3 | 3 |
import eu.dnetlib.data.mapreduce.util.OafDecoder; |
4 | 4 |
import eu.dnetlib.data.mapreduce.util.OafHbaseUtils; |
5 | 5 |
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder; |
6 |
import eu.dnetlib.data.proto.KindProtos; |
|
6 | 7 |
import eu.dnetlib.data.proto.OafProtos; |
7 | 8 |
import eu.dnetlib.data.proto.TypeProtos; |
8 | 9 |
import org.apache.hadoop.hbase.io.ImmutableBytesWritable; |
... | ... | |
13 | 14 |
|
14 | 15 |
public class InfospaceCountsReducer extends Reducer<Text, ImmutableBytesWritable, NullWritable, NullWritable> { |
15 | 16 |
|
17 |
public static final String ENTITY = KindProtos.Kind.entity.toString(); |
|
18 |
|
|
16 | 19 |
@Override |
17 | 20 |
protected void reduce(final Text key, final Iterable<ImmutableBytesWritable> values, final Context context) { |
18 | 21 |
try { |
... | ... | |
21 | 24 |
final OafDecoder decoder = OafHbaseUtils.decode(bytes); |
22 | 25 |
final TypeProtos.Type type = keyDecoder.getType(); |
23 | 26 |
|
27 |
final OafProtos.Oaf oaf = decoder.getOaf(); |
|
28 |
|
|
24 | 29 |
switch (decoder.getKind()) { |
25 | 30 |
case entity: |
26 |
incrementCounter(context, decoder.getKind().toString(), getEntityType(decoder.getOaf(), type), 1); |
|
31 |
if (deletedByInference(oaf)) { |
|
32 |
if (isInvisible(oaf)) { |
|
33 |
incrementCounter(context, ENTITY, String.format("%s (deleted true / invisible true)", getEntityType(oaf, type)), 1); |
|
34 |
} else { |
|
35 |
incrementCounter(context, ENTITY, String.format("%s (deleted true / invisible false)", getEntityType(oaf, type)), 1); |
|
36 |
} |
|
37 |
} else { |
|
38 |
|
|
39 |
if (isInvisible(oaf)) { |
|
40 |
incrementCounter(context, ENTITY, String.format("%s (deleted false / invisible true)", getEntityType(oaf, type)), 1); |
|
41 |
} else { |
|
42 |
incrementCounter(context, ENTITY, String.format("%s (deleted false / invisible false)", getEntityType(oaf, type)), 1); |
|
43 |
} |
|
44 |
} |
|
27 | 45 |
break; |
28 | 46 |
case relation: |
29 |
|
|
30 |
incrementCounter(context, decoder.getKind().toString(), decoder.getCFQ(), 1); |
|
47 |
if (deletedByInference(oaf)) { |
|
48 |
incrementCounter(context, String.format("%s (deleted true)", ENTITY), decoder.getCFQ(), 1); |
|
49 |
} else { |
|
50 |
incrementCounter(context, String.format("%s (deleted false)", ENTITY), decoder.getCFQ(), 1); |
|
51 |
} |
|
31 | 52 |
break; |
32 | 53 |
default: |
33 | 54 |
throw new IllegalArgumentException("unknow type: " + decoder.getKind()); |
... | ... | |
56 | 77 |
} |
57 | 78 |
} |
58 | 79 |
|
80 |
private boolean deletedByInference(final OafProtos.Oaf oaf) { |
|
81 |
return oaf.getDataInfo().getDeletedbyinference(); |
|
82 |
} |
|
83 |
|
|
84 |
private boolean isInvisible(final OafProtos.Oaf oaf) { |
|
85 |
return oaf.getDataInfo().getInvisible(); |
|
86 |
} |
|
87 |
|
|
59 | 88 |
} |
Also available in: Unified diff
added Mapper and Reducer class for infoSpace counts workflows