Project

General

Profile

1
package eu.dnetlib.pace.distance;
2

    
3
import java.util.List;
4

    
5
import eu.dnetlib.pace.model.CondDef;
6
import eu.dnetlib.pace.model.Document;
7
import eu.dnetlib.pace.model.Field;
8
import eu.dnetlib.pace.model.FieldDef;
9

    
10
/**
11
 * The distance between two documents is given by the weighted mean of the field distances
12
 */
13
public class DistanceScorer {
14

    
15
	private List<FieldDef> fields;
16

    
17
	private List<CondDef> strictConditions;
18

    
19
	private List<CondDef> conditions;
20

    
21
	public DistanceScorer(final List<FieldDef> fields, final List<CondDef> strictConditions, final List<CondDef> conditions) {
22
		this.fields = fields;
23
		this.conditions = conditions;
24
		this.strictConditions = strictConditions;
25
	}
26

    
27
	public double distance(final Document a, final Document b) {
28

    
29
		double w = sumWeights(fields);
30
		double sum = 0.0;
31
		int cond = verify(a, b, strictConditions, true);
32

    
33
		if (cond > 0) return 1.0;
34
		if (cond < 0) return 0.0;
35

    
36
		if (verify(a, b, conditions, true) >= 0) {
37
			for (FieldDef fd : fields) {
38
				double d = fieldDistance(a, b, fd);
39

    
40
				if (d > 0) {
41
					sum += d;
42
				} else {
43
					w -= fd.getAlgo().getWeight();
44
				}
45
			}
46
			return w == 0 ? 0 : sum / w;
47
		}
48
		return 0.0;
49
	}
50

    
51
	private int verify(final Document a, final Document b, final List<CondDef> conditions, final boolean strict) {
52
		int cond = 0;
53

    
54
		for (CondDef cd : conditions) {
55
			int verify = cd.getConditionAlgo().verify(a, b);
56
			if (strict && (verify < 0)) return -1;
57
			cond += verify;
58
		}
59
		return cond;
60
	}
61

    
62
	private double fieldDistance(final Document a, final Document b, final FieldDef fd) {
63
		final double w = fd.getAlgo().getWeight();
64
		if ((w == 0)) return 0.0; // optimization for 0 weight
65
		else {
66
			Field va = getValue(a, fd);
67
			Field vb = getValue(b, fd);
68

    
69
			if (va.isEmpty() || vb.isEmpty()) {
70
				if (fd.isIgnoreMissing()) return -1;
71
				else return w;
72
			} else {
73

    
74
				if (va.getType().equals(vb.getType())) {
75
					double d = fd.getAlgo().distance(va, vb);
76
					return w * d;
77
				}
78
				throw new IllegalArgumentException("Types are differents type");
79
			}
80
		}
81
	}
82

    
83
	private Field getValue(final Document d, final FieldDef fd) {
84
		return d.values(fd.getName());
85
	}
86

    
87
	private double sumWeights(final List<FieldDef> fields) {
88
		double sum = 0.0;
89
		for (FieldDef fd : fields) {
90
			sum += fd.getAlgo().getWeight();
91
		}
92
		return sum;
93
	}
94

    
95
}
(4-4/18)