1 |
26600
|
sandro.lab
|
package eu.dnetlib.pace.model;
|
2 |
|
|
|
3 |
33135
|
claudio.at
|
import java.util.List;
|
4 |
38423
|
claudio.at
|
import java.util.Map;
|
5 |
33135
|
claudio.at
|
|
6 |
|
|
import com.google.common.base.Splitter;
|
7 |
|
|
import com.google.common.collect.Lists;
|
8 |
36613
|
claudio.at
|
import com.google.gson.Gson;
|
9 |
|
|
import eu.dnetlib.pace.config.Algo;
|
10 |
26600
|
sandro.lab
|
import eu.dnetlib.pace.config.Type;
|
11 |
40910
|
claudio.at
|
import eu.dnetlib.pace.distance.*;
|
12 |
41642
|
claudio.at
|
import eu.dnetlib.pace.distance.algo.*;
|
13 |
26600
|
sandro.lab
|
|
14 |
|
|
/**
|
15 |
33135
|
claudio.at
|
* The schema is composed by field definitions (FieldDef). Each field has a type, a name, and an associated distance algorithm.
|
16 |
26600
|
sandro.lab
|
*/
|
17 |
|
|
public class FieldDef {
|
18 |
|
|
|
19 |
|
|
public final static String PATH_SEPARATOR = "/";
|
20 |
|
|
|
21 |
36613
|
claudio.at
|
private Algo algo;
|
22 |
|
|
|
23 |
26600
|
sandro.lab
|
private String name;
|
24 |
|
|
|
25 |
33135
|
claudio.at
|
private String path;
|
26 |
|
|
|
27 |
26600
|
sandro.lab
|
private boolean ignoreMissing;
|
28 |
|
|
|
29 |
36613
|
claudio.at
|
private Type type;
|
30 |
26600
|
sandro.lab
|
|
31 |
36613
|
claudio.at
|
private boolean overrideMatch;
|
32 |
|
|
|
33 |
|
|
private double weight;
|
34 |
|
|
|
35 |
|
|
private int limit = -1;
|
36 |
|
|
|
37 |
38523
|
claudio.at
|
private Map<String, String> params;
|
38 |
38423
|
claudio.at
|
|
39 |
36613
|
claudio.at
|
public FieldDef() {}
|
40 |
|
|
|
41 |
33135
|
claudio.at
|
// def apply(s: String): Field[A]
|
42 |
|
|
public Field apply(final Type type, final String s) {
|
43 |
26600
|
sandro.lab
|
switch (type) {
|
44 |
|
|
case Int:
|
45 |
33135
|
claudio.at
|
return new FieldValueImpl(type, name, Integer.parseInt(s));
|
46 |
26600
|
sandro.lab
|
case String:
|
47 |
33135
|
claudio.at
|
return new FieldValueImpl(type, name, s);
|
48 |
|
|
case List:
|
49 |
41504
|
claudio.at
|
return new FieldListImpl(name, type);
|
50 |
26600
|
sandro.lab
|
default:
|
51 |
|
|
throw new IllegalArgumentException("Casting not implemented for type " + type);
|
52 |
|
|
}
|
53 |
|
|
}
|
54 |
|
|
|
55 |
|
|
public String getName() {
|
56 |
33135
|
claudio.at
|
return name;
|
57 |
26600
|
sandro.lab
|
}
|
58 |
|
|
|
59 |
33135
|
claudio.at
|
public String getPath() {
|
60 |
|
|
return path;
|
61 |
|
|
}
|
62 |
|
|
|
63 |
|
|
public List<String> getPathList() {
|
64 |
|
|
return Lists.newArrayList(Splitter.on(PATH_SEPARATOR).split(getPath()));
|
65 |
|
|
}
|
66 |
|
|
|
67 |
36613
|
claudio.at
|
public DistanceAlgo getDistanceAlgo() {
|
68 |
|
|
switch (getAlgo()) {
|
69 |
|
|
case JaroWinkler:
|
70 |
|
|
return new JaroWinkler(getWeight());
|
71 |
|
|
case JaroWinklerTitle:
|
72 |
|
|
return new JaroWinklerTitle(getWeight());
|
73 |
|
|
case Level2JaroWinkler:
|
74 |
|
|
return new Level2JaroWinkler(getWeight());
|
75 |
37195
|
claudio.at
|
case Level2JaroWinklerTitle:
|
76 |
|
|
return new Level2JaroWinklerTitle(getWeight());
|
77 |
36613
|
claudio.at
|
case Level2Levenstein:
|
78 |
|
|
return new Level2Levenstein(getWeight());
|
79 |
|
|
case Levenstein:
|
80 |
|
|
return new Levenstein(getWeight());
|
81 |
37195
|
claudio.at
|
case LevensteinTitle:
|
82 |
|
|
return new LevensteinTitle(getWeight());
|
83 |
36613
|
claudio.at
|
case SubStringLevenstein:
|
84 |
|
|
return new SubStringLevenstein(getWeight(), getLimit());
|
85 |
|
|
case YearLevenstein:
|
86 |
|
|
return new YearLevenstein(getWeight(), getLimit());
|
87 |
|
|
case SortedJaroWinkler:
|
88 |
|
|
return new SortedJaroWinkler(getWeight());
|
89 |
|
|
case SortedLevel2JaroWinkler:
|
90 |
|
|
return new SortedLevel2JaroWinkler(getWeight());
|
91 |
53171
|
claudio.at
|
case urlMatcher:
|
92 |
|
|
return new UrlMatcher(getParams(), getWeight());
|
93 |
37300
|
claudio.at
|
case ExactMatch:
|
94 |
|
|
return new ExactMatch(getWeight());
|
95 |
41137
|
claudio.at
|
case MustBeDifferent:
|
96 |
|
|
return new MustBeDifferent(getWeight());
|
97 |
37304
|
claudio.at
|
case AlwaysMatch:
|
98 |
|
|
return new AlwaysMatch(getWeight());
|
99 |
38600
|
claudio.at
|
case PersonCoAnchorsDistance:
|
100 |
|
|
return new PersonCoAnchorsDistance(getParams(), getWeight());
|
101 |
|
|
case PersonCoAuthorSurnamesDistance:
|
102 |
|
|
return new PersonCoAuthorSurnamesDistance(getParams(), getWeight());
|
103 |
40910
|
claudio.at
|
case PersonDistance:
|
104 |
|
|
return new PersonDistance(getParams(), getWeight());
|
105 |
36613
|
claudio.at
|
case Null:
|
106 |
|
|
return new NullDistanceAlgo();
|
107 |
|
|
default:
|
108 |
|
|
return new NullDistanceAlgo();
|
109 |
|
|
}
|
110 |
26600
|
sandro.lab
|
}
|
111 |
|
|
|
112 |
|
|
public boolean isIgnoreMissing() {
|
113 |
|
|
return ignoreMissing;
|
114 |
|
|
}
|
115 |
|
|
|
116 |
36613
|
claudio.at
|
public Type getType() {
|
117 |
|
|
return type;
|
118 |
|
|
}
|
119 |
|
|
|
120 |
|
|
public void setType(final Type type) {
|
121 |
|
|
this.type = type;
|
122 |
|
|
}
|
123 |
|
|
|
124 |
|
|
public boolean isOverrideMatch() {
|
125 |
|
|
return overrideMatch;
|
126 |
|
|
}
|
127 |
|
|
|
128 |
|
|
public void setOverrideMatch(final boolean overrideMatch) {
|
129 |
|
|
this.overrideMatch = overrideMatch;
|
130 |
|
|
}
|
131 |
|
|
|
132 |
26600
|
sandro.lab
|
@Override
|
133 |
|
|
public String toString() {
|
134 |
36613
|
claudio.at
|
return new Gson().toJson(this);
|
135 |
26600
|
sandro.lab
|
}
|
136 |
33135
|
claudio.at
|
|
137 |
36613
|
claudio.at
|
public double getWeight() {
|
138 |
|
|
return weight;
|
139 |
|
|
}
|
140 |
|
|
|
141 |
|
|
public void setWeight(final double weight) {
|
142 |
|
|
this.weight = weight;
|
143 |
|
|
}
|
144 |
|
|
|
145 |
|
|
public Algo getAlgo() {
|
146 |
|
|
return algo;
|
147 |
|
|
}
|
148 |
|
|
|
149 |
|
|
public void setAlgo(final Algo algo) {
|
150 |
|
|
this.algo = algo;
|
151 |
|
|
}
|
152 |
|
|
|
153 |
|
|
public int getLimit() {
|
154 |
|
|
return limit;
|
155 |
|
|
}
|
156 |
|
|
|
157 |
|
|
public void setLimit(final int limit) {
|
158 |
|
|
this.limit = limit;
|
159 |
|
|
}
|
160 |
|
|
|
161 |
38523
|
claudio.at
|
public Map<String, String> getParams() {
|
162 |
38423
|
claudio.at
|
return params;
|
163 |
|
|
}
|
164 |
|
|
|
165 |
38523
|
claudio.at
|
public void setParams(final Map<String, String> params) {
|
166 |
38423
|
claudio.at
|
this.params = params;
|
167 |
|
|
}
|
168 |
|
|
|
169 |
26600
|
sandro.lab
|
}
|