1
|
package eu.dnetlib.pace.distance;
|
2
|
|
3
|
import java.util.List;
|
4
|
|
5
|
import com.wcohen.ss.AbstractStringDistance;
|
6
|
|
7
|
import eu.dnetlib.pace.common.AbstractPaceFunctions;
|
8
|
import eu.dnetlib.pace.config.Type;
|
9
|
import eu.dnetlib.pace.model.Field;
|
10
|
import eu.dnetlib.pace.model.FieldList;
|
11
|
|
12
|
/**
|
13
|
* For the rest of the fields delegate the distance measure to the second string library.
|
14
|
*/
|
15
|
public abstract class SecondStringDistanceAlgo extends AbstractPaceFunctions implements DistanceAlgo {
|
16
|
|
17
|
// val aliases = Map(('â‚' to '₉') zip ('1' to '9'): _*) ++ Map(('â´' to 'â¹') zip ('4' to '9'): _*) ++ Map('¹' -> '1', '²' ->
|
18
|
// '2', * '³'
|
19
|
// -> '3')
|
20
|
|
21
|
/** The ssalgo. */
|
22
|
protected AbstractStringDistance ssalgo;
|
23
|
|
24
|
/** The weight. */
|
25
|
protected double weight = 0.0;
|
26
|
|
27
|
/**
|
28
|
* Instantiates a new second string distance algo.
|
29
|
*
|
30
|
* @param weight
|
31
|
* the weight
|
32
|
* @param ssalgo
|
33
|
* the ssalgo
|
34
|
*/
|
35
|
protected SecondStringDistanceAlgo(final double weight, final AbstractStringDistance ssalgo) {
|
36
|
this.ssalgo = ssalgo;
|
37
|
this.weight = weight;
|
38
|
}
|
39
|
|
40
|
/**
|
41
|
* Normalize.
|
42
|
*
|
43
|
* @param d
|
44
|
* the d
|
45
|
* @return the double
|
46
|
*/
|
47
|
protected abstract double normalize(double d);
|
48
|
|
49
|
/**
|
50
|
* Distance.
|
51
|
*
|
52
|
* @param a
|
53
|
* the a
|
54
|
* @param b
|
55
|
* the b
|
56
|
* @return the double
|
57
|
*/
|
58
|
public double distance(final String a, final String b) {
|
59
|
double score = ssalgo.score(a, b);
|
60
|
return normalize(score);
|
61
|
}
|
62
|
|
63
|
/**
|
64
|
* Distance.
|
65
|
*
|
66
|
* @param a
|
67
|
* the a
|
68
|
* @param b
|
69
|
* the b
|
70
|
* @return the double
|
71
|
*/
|
72
|
protected double distance(final List<String> a, final List<String> b) {
|
73
|
return distance(concat(a), concat(b));
|
74
|
}
|
75
|
|
76
|
/*
|
77
|
* (non-Javadoc)
|
78
|
*
|
79
|
* @see eu.dnetlib.pace.distance.DistanceAlgo#distance(eu.dnetlib.pace.model.Field, eu.dnetlib.pace.model.Field)
|
80
|
*/
|
81
|
@Override
|
82
|
public double distance(final Field a, final Field b) {
|
83
|
if (a.getType().equals(Type.String) && b.getType().equals(Type.String)) return distance(a.stringValue(), b.stringValue());
|
84
|
if (a.getType().equals(Type.List) && b.getType().equals(Type.List)) return distance(toList(a), toList(b));
|
85
|
|
86
|
throw new IllegalArgumentException("invalid types\n- A: " + a.toString() + "\n- B: " + b.toString());
|
87
|
}
|
88
|
|
89
|
/**
|
90
|
* To list.
|
91
|
*
|
92
|
* @param list
|
93
|
* the list
|
94
|
* @return the list
|
95
|
*/
|
96
|
protected List<String> toList(final Field list) {
|
97
|
return ((FieldList) list).stringList();
|
98
|
}
|
99
|
|
100
|
}
|