1
|
package eu.dnetlib.pace.model;
|
2
|
|
3
|
import java.util.ArrayList;
|
4
|
import java.util.Collections;
|
5
|
import java.util.List;
|
6
|
import java.util.Set;
|
7
|
|
8
|
import com.google.common.collect.Lists;
|
9
|
import com.google.common.collect.Sets;
|
10
|
|
11
|
public class PersonComparatorUtils {
|
12
|
|
13
|
private static final int MAX_FULLNAME_LENGTH = 50;
|
14
|
|
15
|
public static Set<String> getNgramsForPerson(String fullname) {
|
16
|
|
17
|
Set<String> set = Sets.newHashSet();
|
18
|
|
19
|
if (fullname.length() > MAX_FULLNAME_LENGTH) {
|
20
|
return set;
|
21
|
}
|
22
|
|
23
|
Person p = new Person(fullname, true);
|
24
|
|
25
|
if (p.isAccurate()) {
|
26
|
for (String name : p.getName()) {
|
27
|
for (String surname : p.getSurname()) {
|
28
|
set.add((name.charAt(0) + "_" + surname).toLowerCase());
|
29
|
}
|
30
|
}
|
31
|
} else {
|
32
|
List<String> list = p.getFullname();
|
33
|
for (int i = 0; i < list.size(); i++) {
|
34
|
if (list.get(i).length() > 1) {
|
35
|
for (int j = 0; j < list.size(); j++) {
|
36
|
if (i != j) {
|
37
|
set.add((list.get(j).charAt(0) + "_" + list.get(i)).toLowerCase());
|
38
|
}
|
39
|
}
|
40
|
}
|
41
|
}
|
42
|
}
|
43
|
|
44
|
return set;
|
45
|
}
|
46
|
|
47
|
public static boolean areSimilar(String s1, String s2) {
|
48
|
Person p1 = new Person(s1, true);
|
49
|
Person p2 = new Person(s2, true);
|
50
|
|
51
|
if (p1.isAccurate() && p2.isAccurate()) {
|
52
|
return verifyNames(p1.getName(), p2.getName()) && verifySurnames(p1.getSurname(), p2.getSurname());
|
53
|
} else {
|
54
|
return verifyFullnames(p1.getFullname(), p2.getFullname());
|
55
|
}
|
56
|
}
|
57
|
|
58
|
private static boolean verifyNames(List<String> list1, List<String> list2) {
|
59
|
return verifySimilarity(extractExtendedNames(list1), extractExtendedNames(list2))
|
60
|
&& verifySimilarity(extractInitials(list1), extractInitials(list2));
|
61
|
}
|
62
|
|
63
|
private static boolean verifySurnames(List<String> list1, List<String> list2) {
|
64
|
if (list1.size() != list2.size()) {
|
65
|
return false;
|
66
|
}
|
67
|
for (int i = 0; i < list1.size(); i++) {
|
68
|
if (!list1.get(i).equalsIgnoreCase(list2.get(i))) {
|
69
|
return false;
|
70
|
}
|
71
|
}
|
72
|
return true;
|
73
|
}
|
74
|
|
75
|
private static boolean verifyFullnames(List<String> list1, List<String> list2) {
|
76
|
Collections.sort(list1);
|
77
|
Collections.sort(list2);
|
78
|
return verifySimilarity(extractExtendedNames(list1), extractExtendedNames(list2))
|
79
|
&& verifySimilarity(extractInitials(list1), extractInitials(list2));
|
80
|
}
|
81
|
|
82
|
private static List<String> extractExtendedNames(List<String> list) {
|
83
|
ArrayList<String> res = Lists.newArrayList();
|
84
|
for (String s : list) {
|
85
|
if (s.length() > 1) {
|
86
|
res.add(s.toLowerCase());
|
87
|
}
|
88
|
}
|
89
|
return res;
|
90
|
}
|
91
|
|
92
|
private static List<String> extractInitials(List<String> list) {
|
93
|
ArrayList<String> res = Lists.newArrayList();
|
94
|
for (String s : list) {
|
95
|
res.add(s.substring(0, 1).toLowerCase());
|
96
|
}
|
97
|
return res;
|
98
|
}
|
99
|
|
100
|
private static boolean verifySimilarity(List<String> list1, List<String> list2) {
|
101
|
if (list1.size() > list2.size()) {
|
102
|
return verifySimilarity(list2, list1);
|
103
|
}
|
104
|
|
105
|
// NB: List2 is greater than list1 (or equal)
|
106
|
int pos = -1;
|
107
|
for (String s : list1) {
|
108
|
int curr = list2.indexOf(s);
|
109
|
if (curr > pos) {
|
110
|
list2.set(curr, "*"); // I invalidate the found element, example: "amm - amm"
|
111
|
pos = curr;
|
112
|
} else {
|
113
|
return false;
|
114
|
}
|
115
|
}
|
116
|
return true;
|
117
|
}
|
118
|
}
|