1
|
package eu.dnetlib.pace.distance.algo;
|
2
|
|
3
|
import eu.dnetlib.pace.distance.SecondStringDistanceAlgo;
|
4
|
import org.apache.commons.lang.StringUtils;
|
5
|
|
6
|
import com.wcohen.ss.AbstractStringDistance;
|
7
|
|
8
|
import eu.dnetlib.pace.config.Type;
|
9
|
import eu.dnetlib.pace.model.Field;
|
10
|
|
11
|
/**
|
12
|
* The Class SubStringLevenstein.
|
13
|
*/
|
14
|
public class SubStringLevenstein extends SecondStringDistanceAlgo {
|
15
|
|
16
|
/** The limit. */
|
17
|
protected int limit;
|
18
|
|
19
|
/**
|
20
|
* Instantiates a new sub string levenstein.
|
21
|
*
|
22
|
* @param w
|
23
|
* the w
|
24
|
*/
|
25
|
public SubStringLevenstein(final double w) {
|
26
|
super(w, new com.wcohen.ss.Levenstein());
|
27
|
}
|
28
|
|
29
|
/**
|
30
|
* Instantiates a new sub string levenstein.
|
31
|
*
|
32
|
* @param w
|
33
|
* the w
|
34
|
* @param limit
|
35
|
* the limit
|
36
|
*/
|
37
|
public SubStringLevenstein(final double w, final int limit) {
|
38
|
super(w, new com.wcohen.ss.Levenstein());
|
39
|
this.limit = limit;
|
40
|
}
|
41
|
|
42
|
/**
|
43
|
* Instantiates a new sub string levenstein.
|
44
|
*
|
45
|
* @param w
|
46
|
* the w
|
47
|
* @param limit
|
48
|
* the limit
|
49
|
* @param ssalgo
|
50
|
* the ssalgo
|
51
|
*/
|
52
|
protected SubStringLevenstein(final double w, final int limit, final AbstractStringDistance ssalgo) {
|
53
|
super(w, ssalgo);
|
54
|
this.limit = limit;
|
55
|
}
|
56
|
|
57
|
/*
|
58
|
* (non-Javadoc)
|
59
|
*
|
60
|
* @see eu.dnetlib.pace.distance.SecondStringDistanceAlgo#distance(eu.dnetlib.pace.model.Field, eu.dnetlib.pace.model.Field)
|
61
|
*/
|
62
|
@Override
|
63
|
public double distance(final Field a, final Field b) {
|
64
|
if (a.getType().equals(Type.String) && b.getType().equals(Type.String))
|
65
|
return distance(StringUtils.left(a.stringValue(), limit), StringUtils.left(b.stringValue(), limit));
|
66
|
|
67
|
throw new IllegalArgumentException("invalid types\n- A: " + a.toString() + "\n- B: " + b.toString());
|
68
|
}
|
69
|
|
70
|
/*
|
71
|
* (non-Javadoc)
|
72
|
*
|
73
|
* @see eu.dnetlib.pace.distance.DistanceAlgo#getWeight()
|
74
|
*/
|
75
|
@Override
|
76
|
public double getWeight() {
|
77
|
return super.weight;
|
78
|
}
|
79
|
|
80
|
/*
|
81
|
* (non-Javadoc)
|
82
|
*
|
83
|
* @see eu.dnetlib.pace.distance.SecondStringDistanceAlgo#normalize(double)
|
84
|
*/
|
85
|
@Override
|
86
|
protected double normalize(final double d) {
|
87
|
return 1 / Math.pow(Math.abs(d) + 1, 0.1);
|
88
|
}
|
89
|
|
90
|
}
|