Project

General

Profile

« Previous | Next » 

Revision 52803

avoid to emit enrichment events when the similarity score is below the threshold

View differences:

modules/dnet-mapreduce-jobs/branches/beta/src/main/java/eu/dnetlib/data/mapreduce/hbase/broker/enrich/EnrichmentReducer.java
66 66
						final String otherId = other.getEntity().getId();
67 67
						if (!currentId.equals(otherId)) {
68 68

  
69
							final float trust = similarity(current, other);
69
							final double similarity = similarity(current, other);
70 70

  
71
							if (!DedupUtils.isRoot(current.getEntity().getId())) {
72
								events.addAll(PIDEventFactory.process(current, other, trust));
73
								events.addAll(OAVersionEventFactory.process(current, other, trust, untrustedOaDsList));
74
								events.addAll(AbstractEventFactory.process(current, other, trust));
75
								events.addAll(PublicationDateEventFactory.process(current, other, trust));
71
							if (similarity >= dedupConf.getWf().getThreshold()) {
72

  
73
								final float trust = scale(similarity);
74
								if (!DedupUtils.isRoot(current.getEntity().getId())) {
75
									events.addAll(PIDEventFactory.process(current, other, trust));
76
									events.addAll(OAVersionEventFactory.process(current, other, trust, untrustedOaDsList));
77
									events.addAll(AbstractEventFactory.process(current, other, trust));
78
									events.addAll(PublicationDateEventFactory.process(current, other, trust));
79
								}
80

  
81
								events.addAll(SubjectEventFactory.process(context, current, other, trust));
82
							} else {
83
								context.getCounter(counterGroup(), "d < " + dedupConf.getWf().getThreshold()).increment(1);
76 84
							}
77
							events.addAll(SubjectEventFactory.process(context, current, other, trust));
78 85

  
79 86
						} else if (oafList.size() == 1) {
80 87
							events.addAll(SubjectEventFactory.process(context, current));
modules/dnet-mapreduce-jobs/branches/beta/src/main/java/eu/dnetlib/data/mapreduce/hbase/broker/enrich/AbstractEnrichmentReducer.java
148 148
		});
149 149
	}
150 150

  
151
	protected float similarity(final Oaf oa, final Oaf ob) {
151
	protected double similarity(final Oaf oa, final Oaf ob) {
152 152

  
153 153
		final MapDocument a = ProtoDocumentBuilder.newInstance(oa.getEntity().getId(), oa.getEntity(), dedupConf.getPace().getModel());
154 154
		final MapDocument b = ProtoDocumentBuilder.newInstance(ob.getEntity().getId(), ob.getEntity(), dedupConf.getPace().getModel());
155 155

  
156 156
		final ScoreResult sr =  new PaceDocumentDistance().between(a, b, dedupConf);
157
		final float score = (float) Algorithms.scale(sr.getScore(), scaleLB, 1, 0, 1);
157
		return sr.getScore();
158
	}
158 159

  
160
	protected float scale(final double d) {
161
		final float score = (float) Algorithms.scale(d, scaleLB, 1, 0, 1);
162

  
159 163
		return MathUtils.round(score, SCORE_DECIMALS, BigDecimal.ROUND_HALF_DOWN);
160 164
	}
161 165

  
modules/dnet-mapreduce-jobs/branches/beta/src/main/java/eu/dnetlib/data/mapreduce/hbase/broker/enrich/ProjectEnrichmentReducer.java
96 96
						final String otherId = other.getEntity().getId();
97 97
						if (!currentId.equals(otherId)) {
98 98

  
99
							final float trust = similarity(current, other);
100
							events.addAll(ProjectEventFactory.process(context, current, other, trust));
99
							final double similarity = similarity(current, other);
100

  
101
							if (similarity >= dedupConf.getWf().getThreshold()) {
102

  
103
								final float trust = scale(similarity);
104

  
105
								events.addAll(ProjectEventFactory.process(context, current, other, trust));
106
							} else {
107
								context.getCounter(counterGroup(), "d < " + dedupConf.getWf().getThreshold()).increment(1);
108
							}
109

  
101 110
						}
102 111
					}
103 112
				}

Also available in: Unified diff