Project

General

Profile

« Previous | Next » 

Revision 28483

branch 1.2

View differences:

DistanceAlgoTest.java
1
package eu.dnetlib.pace.distance;
2

  
3
import static java.lang.String.format;
4

  
5
import org.apache.commons.lang.StringUtils;
6
import org.junit.Before;
7
import org.junit.Test;
8

  
9
import com.google.common.collect.Lists;
10
import com.wcohen.ss.JaroWinkler;
11

  
12
/**
13
 * The Class DistanceAlgoTest.
14
 */
15
public class DistanceAlgoTest {
16

  
17
	/** The Constant alpha. */
18
	private static final String alpha = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
19

  
20
	/** The Constant aliases_from. */
21
	private static final String aliases_from = "⁰¹²³⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎";
22

  
23
	/** The Constant aliases_to. */
24
	private static final String aliases_to = "0123456789+-=()n0123456789+-=()";
25

  
26
	/** The Constant TEST_STRING. */
27
	private final static String TEST_STRING = "Toshiba NB550D: netbook su piattaforma AMD Fusion⁽¹²⁾.";
28

  
29
	/**
30
	 * Setup.
31
	 */
32
	@Before
33
	public void setup() {
34
		System.out.println("****************************************************************");
35
		System.out.println("Test String    : " + TEST_STRING);
36
	}
37

  
38
	/**
39
	 * Test get numbers.
40
	 */
41
	@Test
42
	public void testGetNumbers() {
43
		System.out.println("Numbers        : " + getNumbers(TEST_STRING));
44
	}
45

  
46
	/**
47
	 * Test remove symbols.
48
	 */
49
	@Test
50
	public void testRemoveSymbols() {
51
		System.out.println("Without symbols: " + removeSymbols(TEST_STRING));
52
	}
53

  
54
	/**
55
	 * Test fix aliases.
56
	 */
57
	@Test
58
	public void testFixAliases() {
59
		System.out.println("Fixed aliases  : " + fixAliases(TEST_STRING));
60
	}
61

  
62
	/**
63
	 * Test jaro winkler.
64
	 */
65
	@Test
66
	public void testJaroWinkler() {
67

  
68
		JaroWinkler algo = new JaroWinkler();
69
		double d = algo.score("Artin Bardi Manghi", "Artini Bardi Manghi");
70

  
71
		System.out.println(format("d -> %s", d));
72
	}
73

  
74
	/**
75
	 * Gets the numbers.
76
	 * 
77
	 * @param s
78
	 *            the s
79
	 * @return the numbers
80
	 */
81
	private String getNumbers(final String s) {
82
		return s.replaceAll("\\D", "");
83
	}
84

  
85
	/**
86
	 * Removes the symbols.
87
	 * 
88
	 * @param s
89
	 *            the s
90
	 * @return the string
91
	 */
92
	private String removeSymbols(final String s) {
93
		final StringBuilder sb = new StringBuilder();
94

  
95
		for (char ch : Lists.charactersOf(s)) {
96
			sb.append(StringUtils.contains(alpha, ch) ? ch : " ");
97
		}
98
		return sb.toString().replaceAll("\\s+", " ");
99
	}
100

  
101
	/**
102
	 * Fix aliases.
103
	 * 
104
	 * @param s
105
	 *            the s
106
	 * @return the string
107
	 */
108
	private String fixAliases(final String s) {
109
		final StringBuilder sb = new StringBuilder();
110
		for (char ch : Lists.charactersOf(s)) {
111
			int i = StringUtils.indexOf(aliases_from, ch);
112
			sb.append(i >= 0 ? aliases_to.charAt(i) : ch);
113
		}
114
		return sb.toString();
115
	}
116

  
117
}

Also available in: Unified diff