Project

General

Profile

« Previous | Next » 

Revision 53171

introduced url type and UrlMatcher

View differences:

modules/dnet-pace-core/trunk/src/main/java/eu/dnetlib/pace/model/FieldValueImpl.java
1 1
package eu.dnetlib.pace.model;
2 2

  
3
import java.net.MalformedURLException;
4
import java.net.URL;
3 5
import java.util.Iterator;
4 6
import java.util.List;
5 7

  
6 8
import eu.dnetlib.pace.config.Type;
7 9
import org.apache.commons.collections.iterators.SingletonIterator;
10
import org.apache.commons.lang.StringUtils;
8 11

  
9 12
/**
10 13
 * The Class FieldValueImpl.
......
50 53
		case List:
51 54
			List<?> list = (List<?>) value;
52 55
			return list.isEmpty() || ((FieldValueImpl) list.get(0)).isEmpty();
56
		case URL:
57
			String str = value.toString();
58
			return StringUtils.isNotBlank(str) && isValidURL(str);
53 59
		default:
54 60
			return true;
55 61
		}
56 62
	}
57 63

  
64
	private boolean isValidURL(final String s) {
65
		try {
66
			new URL(value.toString());
67
			return true;
68
		} catch (MalformedURLException e) {
69
			return true;
70
		}
71
	}
72

  
58 73
	/*
59 74
	 * (non-Javadoc)
60 75
	 * 
modules/dnet-pace-core/trunk/src/main/java/eu/dnetlib/pace/model/FieldDef.java
88 88
			return new SortedJaroWinkler(getWeight());
89 89
		case SortedLevel2JaroWinkler:
90 90
			return new SortedLevel2JaroWinkler(getWeight());
91
		case urlMatcher:
92
			return new UrlMatcher(getParams(), getWeight());
91 93
		case ExactMatch:
92 94
			return new ExactMatch(getWeight());
93 95
		case MustBeDifferent:
modules/dnet-pace-core/trunk/src/main/java/eu/dnetlib/pace/config/Type.java
1 1
package eu.dnetlib.pace.config;
2 2

  
3 3
public enum Type {
4
	String, Int, List, JSON
4
	String, Int, List, JSON, URL
5 5
}
modules/dnet-pace-core/trunk/src/main/java/eu/dnetlib/pace/config/Algo.java
27 27
	SortedJaroWinkler,
28 28
	/** The Sorted level2 jaro winkler. */
29 29
	SortedLevel2JaroWinkler,
30
	/** Compares two urls */
31
	urlMatcher,
30 32
	/** Exact match algo. */
31 33
	ExactMatch,
32 34
	/**
modules/dnet-pace-core/trunk/src/main/java/eu/dnetlib/pace/distance/algo/UrlMatcher.java
1
package eu.dnetlib.pace.distance.algo;
2

  
3
import eu.dnetlib.pace.distance.ConfigurableDistanceAlgo;
4
import eu.dnetlib.pace.distance.DistanceAlgo;
5
import eu.dnetlib.pace.model.Field;
6

  
7
import java.net.MalformedURLException;
8
import java.net.URL;
9
import java.util.Map;
10

  
11
public class UrlMatcher extends ConfigurableDistanceAlgo implements DistanceAlgo {
12

  
13
    public UrlMatcher(Map<String, String> params, double weight) {
14
        super(params, weight);
15
    }
16

  
17
    @Override
18
    public double distance(Field a, Field b) {
19

  
20
        final URL urlA = asUrl(getFirstValue(a));
21
        final URL urlB = asUrl(getFirstValue(b));
22

  
23
        return urlA.getHost().equalsIgnoreCase(urlB.getHost()) ? 1.0 : 0.0;
24
    }
25

  
26
    private URL asUrl(final String value) {
27
        try {
28
            return new URL(value);
29
        } catch (MalformedURLException e) {
30
            // should not happen as checked by pace typing
31
            throw new IllegalStateException("invalid URL: " + value);
32
        }
33
    }
34

  
35
    @Override
36
    public double getWeight() {
37
        return super.getWeigth();
38
    }
39

  
40
}

Also available in: Unified diff