Project

General

Profile

1
package eu.dnetlib.pace.distance.algo;
2

    
3
import java.util.HashSet;
4
import java.util.Map;
5
import java.util.Set;
6

    
7
import com.google.common.base.Function;
8
import com.google.common.base.Predicates;
9
import com.google.common.collect.Iterables;
10
import com.google.common.collect.Sets;
11
import com.google.common.collect.Sets.SetView;
12
import eu.dnetlib.pace.distance.ConfigurableDistanceAlgo;
13
import eu.dnetlib.pace.distance.DistanceAlgo;
14
import eu.dnetlib.pace.model.Field;
15
import eu.dnetlib.pace.model.FieldListImpl;
16
import eu.dnetlib.pace.model.gt.CoAuthor;
17
import eu.dnetlib.pace.model.gt.CoAuthors;
18
import eu.dnetlib.pace.model.gt.GTAuthor;
19
import org.apache.commons.logging.Log;
20
import org.apache.commons.logging.LogFactory;
21

    
22
public class PersonDistance extends ConfigurableDistanceAlgo implements DistanceAlgo {
23

    
24
	/**
25
	 * logger.
26
	 */
27
	private static final Log log = LogFactory.getLog(PersonDistance.class); // NOPMD by marko on 11/24/08 5:02 PM
28

    
29
	private Integer commonAnchors = null;
30

    
31
	private Integer commonSurnames = null;
32

    
33
	public PersonDistance(final Map<String, String> params, final double weight) {
34
		super(params, weight);
35

    
36
		final String commonAnchors = getParams().get("common.anchors");
37
		this.commonAnchors = isEnabled(commonAnchors) ? Integer.parseInt(commonAnchors) : null;
38
		log.debug("min common anchors: " + commonAnchors);
39

    
40
		final String commonSurnames = getParams().get("common.surnames");
41
		this.commonSurnames = isEnabled(commonSurnames) ? Integer.parseInt(commonSurnames) : null;
42
		log.debug("min common surnames: " + commonSurnames);
43
	}
44

    
45
	@Override
46
	public double distance(final Field a, final Field b) {
47

    
48
		final GTAuthor ga = Iterables.getLast(GTAuthor.fromOafJson(((FieldListImpl) a).stringList()));
49
		final GTAuthor gb = Iterables.getLast(GTAuthor.fromOafJson(((FieldListImpl) b).stringList()));
50

    
51
		if (commonAnchors != null && (anchorsInCommon(ga, gb).size() >= commonAnchors)) // log.info("matched coauthor intersection");
52
			return 1.0;
53

    
54
		if (commonSurnames != null && (surnamesInCommon(ga, gb).size() >= commonSurnames))
55
			return 1.0;
56

    
57

    
58

    
59
		return 0;
60
	}
61

    
62
	private boolean isEnabled(final String property) {
63
		return (property != null) && (Integer.parseInt(property) >= 0);
64
	}
65

    
66
	private Set<String> anchorsInCommon(final GTAuthor a, final GTAuthor b) {
67
		final SetView<String> set = Sets.intersection(getAnchorIds(a.getCoAuthors()), getAnchorIds(b.getCoAuthors()));
68
		if (log.isDebugEnabled()) {
69
			log.debug(
70
					String.format("anchors intersection '%s' - '%s': size: %s, %s", a.getAuthor().getFullname(), b.getAuthor().getFullname(), set.size(), set));
71
		}
72
		return set;
73
	}
74

    
75
	private Set<String> getAnchorIds(final CoAuthors ca) {
76
		if ((ca == null) || ca.isEmpty()) return new HashSet<String>();
77
		final Iterable<String> anchorIds = Iterables.transform(ca, new Function<CoAuthor, String>() {
78

    
79
			@Override
80
			public String apply(final CoAuthor c) {
81
				return c.getAnchorId();
82
			}
83
		});
84
		final Iterable<String> filtered = Iterables.filter(anchorIds, Predicates.notNull());
85
		return Sets.newHashSet(filtered);
86
	}
87

    
88
	private Set<String> surnamesInCommon(final GTAuthor a, final GTAuthor b) {
89
		final SetView<String> set = Sets.intersection(getSurnames(a.getCoAuthors()), getSurnames(b.getCoAuthors()));
90
		if (log.isDebugEnabled()) {
91
			log.debug(String.format("surnames intersection '%s' - '%s' size: %s, %s", a.getAuthor().getFullname(), b.getAuthor().getFullname(),
92
					set.size(), set));
93
		}
94
		return set;
95
	}
96

    
97
	private Set<String> getSurnames(final CoAuthors ca) {
98
		if ((ca == null) || ca.isEmpty()) return new HashSet<String>();
99
		return Sets.newHashSet(Iterables.filter(Iterables.transform(ca, new Function<CoAuthor, String>() {
100

    
101
			@Override
102
			public String apply(final CoAuthor c) {
103
				return c.getSecondnames();
104
			}
105
		}), Predicates.notNull()));
106
	}
107

    
108
	@Override
109
	public double getWeight() {
110
		return getWeigth();
111
	}
112

    
113
}
(15-15/21)