Revision 53171
Added by Claudio Atzori over 5 years ago
modules/dnet-pace-core/trunk/src/main/java/eu/dnetlib/pace/model/FieldValueImpl.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.pace.model; |
2 | 2 |
|
3 |
import java.net.MalformedURLException; |
|
4 |
import java.net.URL; |
|
3 | 5 |
import java.util.Iterator; |
4 | 6 |
import java.util.List; |
5 | 7 |
|
6 | 8 |
import eu.dnetlib.pace.config.Type; |
7 | 9 |
import org.apache.commons.collections.iterators.SingletonIterator; |
10 |
import org.apache.commons.lang.StringUtils; |
|
8 | 11 |
|
9 | 12 |
/** |
10 | 13 |
* The Class FieldValueImpl. |
... | ... | |
50 | 53 |
case List: |
51 | 54 |
List<?> list = (List<?>) value; |
52 | 55 |
return list.isEmpty() || ((FieldValueImpl) list.get(0)).isEmpty(); |
56 |
case URL: |
|
57 |
String str = value.toString(); |
|
58 |
return StringUtils.isNotBlank(str) && isValidURL(str); |
|
53 | 59 |
default: |
54 | 60 |
return true; |
55 | 61 |
} |
56 | 62 |
} |
57 | 63 |
|
64 |
private boolean isValidURL(final String s) { |
|
65 |
try { |
|
66 |
new URL(value.toString()); |
|
67 |
return true; |
|
68 |
} catch (MalformedURLException e) { |
|
69 |
return true; |
|
70 |
} |
|
71 |
} |
|
72 |
|
|
58 | 73 |
/* |
59 | 74 |
* (non-Javadoc) |
60 | 75 |
* |
modules/dnet-pace-core/trunk/src/main/java/eu/dnetlib/pace/model/FieldDef.java | ||
---|---|---|
88 | 88 |
return new SortedJaroWinkler(getWeight()); |
89 | 89 |
case SortedLevel2JaroWinkler: |
90 | 90 |
return new SortedLevel2JaroWinkler(getWeight()); |
91 |
case urlMatcher: |
|
92 |
return new UrlMatcher(getParams(), getWeight()); |
|
91 | 93 |
case ExactMatch: |
92 | 94 |
return new ExactMatch(getWeight()); |
93 | 95 |
case MustBeDifferent: |
modules/dnet-pace-core/trunk/src/main/java/eu/dnetlib/pace/config/Type.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.pace.config; |
2 | 2 |
|
3 | 3 |
public enum Type { |
4 |
String, Int, List, JSON |
|
4 |
String, Int, List, JSON, URL
|
|
5 | 5 |
} |
modules/dnet-pace-core/trunk/src/main/java/eu/dnetlib/pace/config/Algo.java | ||
---|---|---|
27 | 27 |
SortedJaroWinkler, |
28 | 28 |
/** The Sorted level2 jaro winkler. */ |
29 | 29 |
SortedLevel2JaroWinkler, |
30 |
/** Compares two urls */ |
|
31 |
urlMatcher, |
|
30 | 32 |
/** Exact match algo. */ |
31 | 33 |
ExactMatch, |
32 | 34 |
/** |
modules/dnet-pace-core/trunk/src/main/java/eu/dnetlib/pace/distance/algo/UrlMatcher.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.distance.algo; |
|
2 |
|
|
3 |
import eu.dnetlib.pace.distance.ConfigurableDistanceAlgo; |
|
4 |
import eu.dnetlib.pace.distance.DistanceAlgo; |
|
5 |
import eu.dnetlib.pace.model.Field; |
|
6 |
|
|
7 |
import java.net.MalformedURLException; |
|
8 |
import java.net.URL; |
|
9 |
import java.util.Map; |
|
10 |
|
|
11 |
public class UrlMatcher extends ConfigurableDistanceAlgo implements DistanceAlgo { |
|
12 |
|
|
13 |
public UrlMatcher(Map<String, String> params, double weight) { |
|
14 |
super(params, weight); |
|
15 |
} |
|
16 |
|
|
17 |
@Override |
|
18 |
public double distance(Field a, Field b) { |
|
19 |
|
|
20 |
final URL urlA = asUrl(getFirstValue(a)); |
|
21 |
final URL urlB = asUrl(getFirstValue(b)); |
|
22 |
|
|
23 |
return urlA.getHost().equalsIgnoreCase(urlB.getHost()) ? 1.0 : 0.0; |
|
24 |
} |
|
25 |
|
|
26 |
private URL asUrl(final String value) { |
|
27 |
try { |
|
28 |
return new URL(value); |
|
29 |
} catch (MalformedURLException e) { |
|
30 |
// should not happen as checked by pace typing |
|
31 |
throw new IllegalStateException("invalid URL: " + value); |
|
32 |
} |
|
33 |
} |
|
34 |
|
|
35 |
@Override |
|
36 |
public double getWeight() { |
|
37 |
return super.getWeigth(); |
|
38 |
} |
|
39 |
|
|
40 |
} |
Also available in: Unified diff
introduced url type and UrlMatcher