1
|
package eu.dnetlib.pace.distance.algo;
|
2
|
|
3
|
import java.util.HashSet;
|
4
|
import java.util.Map;
|
5
|
import java.util.Set;
|
6
|
|
7
|
import com.google.common.base.Function;
|
8
|
import com.google.common.base.Predicates;
|
9
|
import com.google.common.collect.Iterables;
|
10
|
import com.google.common.collect.Sets;
|
11
|
import com.google.common.collect.Sets.SetView;
|
12
|
import eu.dnetlib.pace.distance.ConfigurableDistanceAlgo;
|
13
|
import eu.dnetlib.pace.distance.DistanceAlgo;
|
14
|
import eu.dnetlib.pace.model.Field;
|
15
|
import eu.dnetlib.pace.model.FieldListImpl;
|
16
|
import eu.dnetlib.pace.model.gt.CoAuthor;
|
17
|
import eu.dnetlib.pace.model.gt.CoAuthors;
|
18
|
import eu.dnetlib.pace.model.gt.GTAuthor;
|
19
|
import org.apache.commons.logging.Log;
|
20
|
import org.apache.commons.logging.LogFactory;
|
21
|
|
22
|
public class PersonDistance extends ConfigurableDistanceAlgo implements DistanceAlgo {
|
23
|
|
24
|
/**
|
25
|
* logger.
|
26
|
*/
|
27
|
private static final Log log = LogFactory.getLog(PersonDistance.class); // NOPMD by marko on 11/24/08 5:02 PM
|
28
|
|
29
|
private Integer commonAnchors = null;
|
30
|
|
31
|
private Integer commonSurnames = null;
|
32
|
|
33
|
public PersonDistance(final Map<String, String> params, final double weight) {
|
34
|
super(params, weight);
|
35
|
|
36
|
final String commonAnchors = getParams().get("common.anchors");
|
37
|
this.commonAnchors = isEnabled(commonAnchors) ? Integer.parseInt(commonAnchors) : null;
|
38
|
log.debug("min common anchors: " + commonAnchors);
|
39
|
|
40
|
final String commonSurnames = getParams().get("common.surnames");
|
41
|
this.commonSurnames = isEnabled(commonSurnames) ? Integer.parseInt(commonSurnames) : null;
|
42
|
log.debug("min common surnames: " + commonSurnames);
|
43
|
}
|
44
|
|
45
|
@Override
|
46
|
public double distance(final Field a, final Field b) {
|
47
|
|
48
|
final GTAuthor ga = Iterables.getLast(GTAuthor.fromOafJson(((FieldListImpl) a).stringList()));
|
49
|
final GTAuthor gb = Iterables.getLast(GTAuthor.fromOafJson(((FieldListImpl) b).stringList()));
|
50
|
|
51
|
if (commonAnchors != null && (anchorsInCommon(ga, gb).size() >= commonAnchors)) // log.info("matched coauthor intersection");
|
52
|
return 1.0;
|
53
|
|
54
|
if (commonSurnames != null && (surnamesInCommon(ga, gb).size() >= commonSurnames))
|
55
|
return 1.0;
|
56
|
|
57
|
|
58
|
|
59
|
return 0;
|
60
|
}
|
61
|
|
62
|
private boolean isEnabled(final String property) {
|
63
|
return (property != null) && (Integer.parseInt(property) >= 0);
|
64
|
}
|
65
|
|
66
|
private Set<String> anchorsInCommon(final GTAuthor a, final GTAuthor b) {
|
67
|
final SetView<String> set = Sets.intersection(getAnchorIds(a.getCoAuthors()), getAnchorIds(b.getCoAuthors()));
|
68
|
if (log.isDebugEnabled()) {
|
69
|
log.debug(
|
70
|
String.format("anchors intersection '%s' - '%s': size: %s, %s", a.getAuthor().getFullname(), b.getAuthor().getFullname(), set.size(), set));
|
71
|
}
|
72
|
return set;
|
73
|
}
|
74
|
|
75
|
private Set<String> getAnchorIds(final CoAuthors ca) {
|
76
|
if ((ca == null) || ca.isEmpty()) return new HashSet<String>();
|
77
|
final Iterable<String> anchorIds = Iterables.transform(ca, new Function<CoAuthor, String>() {
|
78
|
|
79
|
@Override
|
80
|
public String apply(final CoAuthor c) {
|
81
|
return c.getAnchorId();
|
82
|
}
|
83
|
});
|
84
|
final Iterable<String> filtered = Iterables.filter(anchorIds, Predicates.notNull());
|
85
|
return Sets.newHashSet(filtered);
|
86
|
}
|
87
|
|
88
|
private Set<String> surnamesInCommon(final GTAuthor a, final GTAuthor b) {
|
89
|
final SetView<String> set = Sets.intersection(getSurnames(a.getCoAuthors()), getSurnames(b.getCoAuthors()));
|
90
|
if (log.isDebugEnabled()) {
|
91
|
log.debug(String.format("surnames intersection '%s' - '%s' size: %s, %s", a.getAuthor().getFullname(), b.getAuthor().getFullname(),
|
92
|
set.size(), set));
|
93
|
}
|
94
|
return set;
|
95
|
}
|
96
|
|
97
|
private Set<String> getSurnames(final CoAuthors ca) {
|
98
|
if ((ca == null) || ca.isEmpty()) return new HashSet<String>();
|
99
|
return Sets.newHashSet(Iterables.filter(Iterables.transform(ca, new Function<CoAuthor, String>() {
|
100
|
|
101
|
@Override
|
102
|
public String apply(final CoAuthor c) {
|
103
|
return c.getSecondnames();
|
104
|
}
|
105
|
}), Predicates.notNull()));
|
106
|
}
|
107
|
|
108
|
@Override
|
109
|
public double getWeight() {
|
110
|
return getWeigth();
|
111
|
}
|
112
|
|
113
|
}
|