Project

General

Profile

1
package eu.dnetlib.data.information.oai.publisher;
2

    
3
import java.text.Normalizer;
4

    
5
import org.apache.commons.lang3.StringEscapeUtils;
6
import org.junit.Ignore;
7
import org.junit.Test;
8

    
9
import static org.junit.Assert.assertEquals;
10
import static org.junit.Assert.assertFalse;
11

    
12
public class PublisherMiscTest {
13

    
14
	@Ignore
15
	@Test
16
	//FIXME: do we really need to change © into © ?
17
	public void test() {
18
		String id = "NonavCreation.filmportal.de/DIF_NonAVCreation_EUROPA_TM & © Aardman Animations, LTD";
19
		String newId = StringEscapeUtils.escapeXml11(id);
20
		assertEquals("NonavCreation.filmportal.de/DIF_NonAVCreation_EUROPA_TM & © Aardman Animations, LTD", newId);
21
		assertFalse(id.equals(newId));
22
	}
23

    
24
	@Test
25
	public void test2() {
26
		// Hochschulschriftenserver - Universität Frankfurt am Main
27
		String s = "Publikationenserver der Georg-August-Universität Göttingen";
28
		System.out.println("String to normalize: " + s);
29
		s = StringEscapeUtils.unescapeXml(s);
30
		System.out.println("unescaped: " + s);
31
		s = Normalizer.normalize(s, Normalizer.Form.NFD);
32
		System.out.println("normalized: " + s);
33
		// remove tilde, dots... over letters
34
		s = s.replaceAll("[\\p{InCombiningDiacriticalMarks}&&[^-_]]", "");
35
		// change punctuation into an underscore
36
		s = s.replaceAll("[\\p{Punct}&&[^-_]]", "_");
37
		// remove all non-word charcheters
38
		s = s.replaceAll("[\\W&&[^-_]]", "");
39
		System.out.println("Converted setSpec to: " + s);
40
	}
41

    
42
}
    (1-1/1)