Project

General

Profile

« Previous | Next » 

Revision 52804

avoid to emit enrichment events when the similarity score is below the threshold

View differences:

modules/dnet-mapreduce-jobs/branches/beta/src/main/java/eu/dnetlib/data/mapreduce/hbase/broker/enrich/SoftwareEnrichmentReducer.java
8 8
import com.google.common.collect.Lists;
9 9
import com.google.common.collect.Maps;
10 10
import com.google.common.collect.Streams;
11
import eu.dnetlib.data.mapreduce.hbase.broker.ProjectEventFactory;
12 11
import eu.dnetlib.data.mapreduce.hbase.broker.SoftwareEventFactory;
13 12
import eu.dnetlib.data.mapreduce.hbase.broker.model.EventWrapper;
14 13
import eu.dnetlib.data.proto.OafProtos.Oaf;
......
16 15
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
17 16
import org.dom4j.DocumentException;
18 17

  
18
import static eu.dnetlib.data.mapreduce.hbase.broker.enrich.SoftwareEnrichmentMapper.PUBLICATION;
19
import static eu.dnetlib.data.mapreduce.hbase.broker.enrich.SoftwareEnrichmentMapper.SOFTWARE;
19 20
import static eu.dnetlib.data.mapreduce.util.OafHbaseUtils.getKey;
20
import static eu.dnetlib.data.mapreduce.hbase.broker.enrich.SoftwareEnrichmentMapper.*;
21 21

  
22 22
/**
23 23
 * Created by claudio on 08/07/16.
......
98 98
						final String otherId = other.getEntity().getId();
99 99
						if (!currentId.equals(otherId)) {
100 100

  
101
							final float trust = similarity(current, other);
102
							events.addAll(ProjectEventFactory.process(context, current, other, trust));
101
							final double similarity = similarity(current, other);
102

  
103
							if (similarity >= dedupConf.getWf().getThreshold()) {
104

  
105
								final float trust = scale(similarity);
106
								events.addAll(SoftwareEventFactory.process(context, current, other, trust));
107
							} else {
108
								context.getCounter(counterGroup(), "d < " + dedupConf.getWf().getThreshold()).increment(1);
109
							}
103 110
						}
104 111
					}
105 112
				}

Also available in: Unified diff