Revision 52803
Added by Claudio Atzori almost 6 years ago
modules/dnet-mapreduce-jobs/branches/beta/src/main/java/eu/dnetlib/data/mapreduce/hbase/broker/enrich/EnrichmentReducer.java | ||
---|---|---|
66 | 66 |
final String otherId = other.getEntity().getId(); |
67 | 67 |
if (!currentId.equals(otherId)) { |
68 | 68 |
|
69 |
final float trust = similarity(current, other);
|
|
69 |
final double similarity = similarity(current, other);
|
|
70 | 70 |
|
71 |
if (!DedupUtils.isRoot(current.getEntity().getId())) { |
|
72 |
events.addAll(PIDEventFactory.process(current, other, trust)); |
|
73 |
events.addAll(OAVersionEventFactory.process(current, other, trust, untrustedOaDsList)); |
|
74 |
events.addAll(AbstractEventFactory.process(current, other, trust)); |
|
75 |
events.addAll(PublicationDateEventFactory.process(current, other, trust)); |
|
71 |
if (similarity >= dedupConf.getWf().getThreshold()) { |
|
72 |
|
|
73 |
final float trust = scale(similarity); |
|
74 |
if (!DedupUtils.isRoot(current.getEntity().getId())) { |
|
75 |
events.addAll(PIDEventFactory.process(current, other, trust)); |
|
76 |
events.addAll(OAVersionEventFactory.process(current, other, trust, untrustedOaDsList)); |
|
77 |
events.addAll(AbstractEventFactory.process(current, other, trust)); |
|
78 |
events.addAll(PublicationDateEventFactory.process(current, other, trust)); |
|
79 |
} |
|
80 |
|
|
81 |
events.addAll(SubjectEventFactory.process(context, current, other, trust)); |
|
82 |
} else { |
|
83 |
context.getCounter(counterGroup(), "d < " + dedupConf.getWf().getThreshold()).increment(1); |
|
76 | 84 |
} |
77 |
events.addAll(SubjectEventFactory.process(context, current, other, trust)); |
|
78 | 85 |
|
79 | 86 |
} else if (oafList.size() == 1) { |
80 | 87 |
events.addAll(SubjectEventFactory.process(context, current)); |
modules/dnet-mapreduce-jobs/branches/beta/src/main/java/eu/dnetlib/data/mapreduce/hbase/broker/enrich/AbstractEnrichmentReducer.java | ||
---|---|---|
148 | 148 |
}); |
149 | 149 |
} |
150 | 150 |
|
151 |
protected float similarity(final Oaf oa, final Oaf ob) {
|
|
151 |
protected double similarity(final Oaf oa, final Oaf ob) {
|
|
152 | 152 |
|
153 | 153 |
final MapDocument a = ProtoDocumentBuilder.newInstance(oa.getEntity().getId(), oa.getEntity(), dedupConf.getPace().getModel()); |
154 | 154 |
final MapDocument b = ProtoDocumentBuilder.newInstance(ob.getEntity().getId(), ob.getEntity(), dedupConf.getPace().getModel()); |
155 | 155 |
|
156 | 156 |
final ScoreResult sr = new PaceDocumentDistance().between(a, b, dedupConf); |
157 |
final float score = (float) Algorithms.scale(sr.getScore(), scaleLB, 1, 0, 1); |
|
157 |
return sr.getScore(); |
|
158 |
} |
|
158 | 159 |
|
160 |
protected float scale(final double d) { |
|
161 |
final float score = (float) Algorithms.scale(d, scaleLB, 1, 0, 1); |
|
162 |
|
|
159 | 163 |
return MathUtils.round(score, SCORE_DECIMALS, BigDecimal.ROUND_HALF_DOWN); |
160 | 164 |
} |
161 | 165 |
|
modules/dnet-mapreduce-jobs/branches/beta/src/main/java/eu/dnetlib/data/mapreduce/hbase/broker/enrich/ProjectEnrichmentReducer.java | ||
---|---|---|
96 | 96 |
final String otherId = other.getEntity().getId(); |
97 | 97 |
if (!currentId.equals(otherId)) { |
98 | 98 |
|
99 |
final float trust = similarity(current, other); |
|
100 |
events.addAll(ProjectEventFactory.process(context, current, other, trust)); |
|
99 |
final double similarity = similarity(current, other); |
|
100 |
|
|
101 |
if (similarity >= dedupConf.getWf().getThreshold()) { |
|
102 |
|
|
103 |
final float trust = scale(similarity); |
|
104 |
|
|
105 |
events.addAll(ProjectEventFactory.process(context, current, other, trust)); |
|
106 |
} else { |
|
107 |
context.getCounter(counterGroup(), "d < " + dedupConf.getWf().getThreshold()).increment(1); |
|
108 |
} |
|
109 |
|
|
101 | 110 |
} |
102 | 111 |
} |
103 | 112 |
} |
Also available in: Unified diff
avoid to emit enrichment events when the similarity score is below the threshold