Project

General

Profile

1
package eu.dnetlib.pace.model;
2

    
3
import java.util.ArrayList;
4
import java.util.Collections;
5
import java.util.List;
6
import java.util.Set;
7

    
8
import com.google.common.collect.Lists;
9
import com.google.common.collect.Sets;
10

    
11
public class PersonComparatorUtils {
12

    
13
	private static final int MAX_FULLNAME_LENGTH = 50;
14

    
15
	public static Set<String> getNgramsForPerson(String fullname) {
16

    
17
		Set<String> set = Sets.newHashSet();
18

    
19
		if (fullname.length() > MAX_FULLNAME_LENGTH) {
20
			return set;
21
		}
22

    
23
		Person p = new Person(fullname, true);
24

    
25
		if (p.isAccurate()) {
26
			for (String name : p.getName()) {
27
				for (String surname : p.getSurname()) {
28
					set.add((name.charAt(0) + "_" + surname).toLowerCase());
29
				}
30
			}
31
		} else {
32
			List<String> list = p.getFullname();
33
			for (int i = 0; i < list.size(); i++) {
34
				if (list.get(i).length() > 1) {
35
					for (int j = 0; j < list.size(); j++) {
36
						if (i != j) {
37
							set.add((list.get(j).charAt(0) + "_" + list.get(i)).toLowerCase());
38
						}
39
					}
40
				}
41
			}
42
		}
43

    
44
		return set;
45
	}
46

    
47
	public static boolean areSimilar(String s1, String s2) {
48
		Person p1 = new Person(s1, true);
49
		Person p2 = new Person(s2, true);
50

    
51
		if (p1.isAccurate() && p2.isAccurate()) {
52
			return verifyNames(p1.getName(), p2.getName()) && verifySurnames(p1.getSurname(), p2.getSurname());
53
		} else {
54
			return verifyFullnames(p1.getFullname(), p2.getFullname());
55
		}
56
	}
57

    
58
	private static boolean verifyNames(List<String> list1, List<String> list2) {
59
		return verifySimilarity(extractExtendedNames(list1), extractExtendedNames(list2))
60
				&& verifySimilarity(extractInitials(list1), extractInitials(list2));
61
	}
62

    
63
	private static boolean verifySurnames(List<String> list1, List<String> list2) {
64
		if (list1.size() != list2.size()) {
65
			return false;
66
		}
67
		for (int i = 0; i < list1.size(); i++) {
68
			if (!list1.get(i).equalsIgnoreCase(list2.get(i))) {
69
				return false;
70
			}
71
		}
72
		return true;
73
	}
74

    
75
	private static boolean verifyFullnames(List<String> list1, List<String> list2) {
76
		Collections.sort(list1);
77
		Collections.sort(list2);
78
		return verifySimilarity(extractExtendedNames(list1), extractExtendedNames(list2))
79
				&& verifySimilarity(extractInitials(list1), extractInitials(list2));
80
	}
81

    
82
	private static List<String> extractExtendedNames(List<String> list) {
83
		ArrayList<String> res = Lists.newArrayList();
84
		for (String s : list) {
85
			if (s.length() > 1) {
86
				res.add(s.toLowerCase());
87
			}
88
		}
89
		return res;
90
	}
91

    
92
	private static List<String> extractInitials(List<String> list) {
93
		ArrayList<String> res = Lists.newArrayList();
94
		for (String s : list) {
95
			res.add(s.substring(0, 1).toLowerCase());
96
		}
97
		return res;
98
	}
99

    
100
	private static boolean verifySimilarity(List<String> list1, List<String> list2) {
101
		if (list1.size() > list2.size()) {
102
			return verifySimilarity(list2, list1);
103
		}
104

    
105
		// NB: List2 is greater than list1 (or equal)
106
		int pos = -1;
107
		for (String s : list1) {
108
			int curr = list2.indexOf(s);
109
			if (curr > pos) {
110
				list2.set(curr, "*"); // I invalidate the found element, example: "amm - amm" 
111
				pos = curr;
112
			} else {
113
				return false;
114
			}
115
		}
116
		return true;
117
	}
118
}
(15-15/15)