Revision 29732
Added by Claudio Atzori over 10 years ago
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/dedup/DedupMapper.java | ||
---|---|---|
49 | 49 |
outKey = new Text(); |
50 | 50 |
ibw = new ImmutableBytesWritable(); |
51 | 51 |
|
52 |
System.out.println("dedup map phase \npace conf: " + paceConf.toString() + "\nwf conf: " + dedupConf.toString() + "\nblacklists: " + blackListMap); |
|
52 |
System.out.println("pace conf"); |
|
53 |
System.out.println("entity type: " + dedupConf.getEntityType()); |
|
54 |
System.out.println("clustering: " + paceConf.clusterings()); |
|
55 |
System.out.println("conditions: " + paceConf.conditions()); |
|
56 |
System.out.println("fields: " + paceConf.fields()); |
|
57 |
System.out.println("blacklists: " + blackListMap); |
|
58 |
System.out.println("wf conf: " + dedupConf.toString()); |
|
53 | 59 |
} |
54 | 60 |
|
55 | 61 |
@Override |
... | ... | |
61 | 67 |
if (body != null) { |
62 | 68 |
|
63 | 69 |
final OafEntity entity = OafDecoder.decode(body).getEntity(); |
64 |
if (entity.getType().equals(dedupConf.getEntityType())) { |
|
65 | 70 |
|
71 |
context.getCounter(entity.getType().toString(), "decoded").increment(1); |
|
72 |
|
|
73 |
if (entity.getType().equals(Type.valueOf(dedupConf.getEntityType()))) { |
|
74 |
|
|
66 | 75 |
// TODO: remove this hack - here because we don't want to dedup datasets |
67 | 76 |
if (entity.getType().equals(Type.result) && entity.getResult().getMetadata().getResulttype().getClassid().equals("dataset")) { return; } |
68 | 77 |
|
... | ... | |
75 | 84 |
} |
76 | 85 |
} |
77 | 86 |
|
78 |
private void emitNGrams(final Context context, final MapDocument doc, final Collection<String> collection) throws IOException, InterruptedException {
|
|
79 |
for (String ngram : collection) {
|
|
87 |
private void emitNGrams(final Context context, final MapDocument doc, final Collection<String> ngrams) throws IOException, InterruptedException {
|
|
88 |
for (String ngram : ngrams) {
|
|
80 | 89 |
outKey.set(ngram); |
81 | 90 |
ibw.set(doc.toByteArray()); |
82 | 91 |
context.write(outKey, ibw); |
Also available in: Unified diff
more logging. fixed entity type check