Revision 52804
Added by Claudio Atzori almost 6 years ago
modules/dnet-mapreduce-jobs/branches/beta/src/main/java/eu/dnetlib/data/mapreduce/hbase/broker/enrich/SoftwareEnrichmentReducer.java | ||
---|---|---|
8 | 8 |
import com.google.common.collect.Lists; |
9 | 9 |
import com.google.common.collect.Maps; |
10 | 10 |
import com.google.common.collect.Streams; |
11 |
import eu.dnetlib.data.mapreduce.hbase.broker.ProjectEventFactory; |
|
12 | 11 |
import eu.dnetlib.data.mapreduce.hbase.broker.SoftwareEventFactory; |
13 | 12 |
import eu.dnetlib.data.mapreduce.hbase.broker.model.EventWrapper; |
14 | 13 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
... | ... | |
16 | 15 |
import org.apache.hadoop.hbase.io.ImmutableBytesWritable; |
17 | 16 |
import org.dom4j.DocumentException; |
18 | 17 |
|
18 |
import static eu.dnetlib.data.mapreduce.hbase.broker.enrich.SoftwareEnrichmentMapper.PUBLICATION; |
|
19 |
import static eu.dnetlib.data.mapreduce.hbase.broker.enrich.SoftwareEnrichmentMapper.SOFTWARE; |
|
19 | 20 |
import static eu.dnetlib.data.mapreduce.util.OafHbaseUtils.getKey; |
20 |
import static eu.dnetlib.data.mapreduce.hbase.broker.enrich.SoftwareEnrichmentMapper.*; |
|
21 | 21 |
|
22 | 22 |
/** |
23 | 23 |
* Created by claudio on 08/07/16. |
... | ... | |
98 | 98 |
final String otherId = other.getEntity().getId(); |
99 | 99 |
if (!currentId.equals(otherId)) { |
100 | 100 |
|
101 |
final float trust = similarity(current, other); |
|
102 |
events.addAll(ProjectEventFactory.process(context, current, other, trust)); |
|
101 |
final double similarity = similarity(current, other); |
|
102 |
|
|
103 |
if (similarity >= dedupConf.getWf().getThreshold()) { |
|
104 |
|
|
105 |
final float trust = scale(similarity); |
|
106 |
events.addAll(SoftwareEventFactory.process(context, current, other, trust)); |
|
107 |
} else { |
|
108 |
context.getCounter(counterGroup(), "d < " + dedupConf.getWf().getThreshold()).increment(1); |
|
109 |
} |
|
103 | 110 |
} |
104 | 111 |
} |
105 | 112 |
} |
Also available in: Unified diff
avoid to emit enrichment events when the similarity score is below the threshold