1 |
41642
|
claudio.at
|
package eu.dnetlib.pace.distance.algo;
|
2 |
38600
|
claudio.at
|
|
3 |
|
|
import java.util.HashSet;
|
4 |
|
|
import java.util.Map;
|
5 |
|
|
import java.util.Set;
|
6 |
|
|
|
7 |
|
|
import com.google.common.base.Function;
|
8 |
|
|
import com.google.common.base.Predicates;
|
9 |
|
|
import com.google.common.collect.Iterables;
|
10 |
|
|
import com.google.common.collect.Sets;
|
11 |
|
|
import com.google.common.collect.Sets.SetView;
|
12 |
41642
|
claudio.at
|
import eu.dnetlib.pace.distance.ConfigurableDistanceAlgo;
|
13 |
|
|
import eu.dnetlib.pace.distance.DistanceAlgo;
|
14 |
38600
|
claudio.at
|
import eu.dnetlib.pace.model.Field;
|
15 |
|
|
import eu.dnetlib.pace.model.FieldListImpl;
|
16 |
|
|
import eu.dnetlib.pace.model.gt.CoAuthor;
|
17 |
|
|
import eu.dnetlib.pace.model.gt.CoAuthors;
|
18 |
|
|
import eu.dnetlib.pace.model.gt.GTAuthor;
|
19 |
40910
|
claudio.at
|
import org.apache.commons.logging.Log;
|
20 |
|
|
import org.apache.commons.logging.LogFactory;
|
21 |
38600
|
claudio.at
|
|
22 |
|
|
public class PersonCoAnchorsDistance extends ConfigurableDistanceAlgo implements DistanceAlgo {
|
23 |
|
|
|
24 |
|
|
/**
|
25 |
|
|
* logger.
|
26 |
|
|
*/
|
27 |
|
|
private static final Log log = LogFactory.getLog(PersonCoAnchorsDistance.class); // NOPMD by marko on 11/24/08 5:02 PM
|
28 |
|
|
|
29 |
|
|
public PersonCoAnchorsDistance(final Map<String, String> params, final double weight) {
|
30 |
|
|
super(params, weight);
|
31 |
|
|
}
|
32 |
|
|
|
33 |
|
|
@Override
|
34 |
|
|
public double distance(final Field a, final Field b) {
|
35 |
|
|
|
36 |
41504
|
claudio.at
|
final GTAuthor ga = Iterables.getLast(GTAuthor.fromOafJson(((FieldListImpl) a).stringList()));
|
37 |
|
|
final GTAuthor gb = Iterables.getLast(GTAuthor.fromOafJson(((FieldListImpl) b).stringList()));
|
38 |
38600
|
claudio.at
|
|
39 |
|
|
final Integer commonAnchors = Integer.parseInt(getParams().get("common.anchors"));
|
40 |
|
|
// log.info("min common anchors: " + commonAnchors);
|
41 |
|
|
if (isEnabled(commonAnchors) && (anchorsInCommon(ga, gb).size() >= commonAnchors)) // log.info("matched coauthor intersection");
|
42 |
|
|
return 1.0;
|
43 |
|
|
|
44 |
|
|
return 0;
|
45 |
|
|
}
|
46 |
|
|
|
47 |
|
|
private boolean isEnabled(final Integer property) {
|
48 |
|
|
return (property != null) && (property >= 0);
|
49 |
|
|
}
|
50 |
|
|
|
51 |
|
|
private Set<String> anchorsInCommon(final GTAuthor a, final GTAuthor b) {
|
52 |
|
|
final SetView<String> intersection = Sets.intersection(getAnchorIds(a.getCoAuthors()), getAnchorIds(b.getCoAuthors()));
|
53 |
40910
|
claudio.at
|
//log.info(String.format("anchors intersection '%s' - '%s': %s", a.getAuthor().getFullname(), b.getAuthor().getFullname(), intersection.size()));
|
54 |
38600
|
claudio.at
|
return intersection;
|
55 |
|
|
}
|
56 |
|
|
|
57 |
|
|
private Set<String> getAnchorIds(final CoAuthors ca) {
|
58 |
|
|
if ((ca == null) || ca.isEmpty()) return new HashSet<String>();
|
59 |
|
|
final Iterable<String> anchorIds = Iterables.transform(ca, new Function<CoAuthor, String>() {
|
60 |
|
|
|
61 |
|
|
@Override
|
62 |
|
|
public String apply(final CoAuthor c) {
|
63 |
|
|
return c.getAnchorId();
|
64 |
|
|
}
|
65 |
|
|
});
|
66 |
|
|
final Iterable<String> filtered = Iterables.filter(anchorIds, Predicates.notNull());
|
67 |
|
|
return Sets.newHashSet(filtered);
|
68 |
|
|
}
|
69 |
|
|
|
70 |
|
|
@Override
|
71 |
|
|
public double getWeight() {
|
72 |
|
|
return getWeigth();
|
73 |
|
|
}
|
74 |
|
|
|
75 |
|
|
}
|