Project

General

Profile

1
package eu.dnetlib.data.utils;
2

    
3
import java.net.MalformedURLException;
4
import java.net.URL;
5
import java.util.Arrays;
6
import java.util.regex.Matcher;
7
import java.util.regex.Pattern;
8
import java.util.stream.Collectors;
9

    
10
import org.apache.commons.codec.digest.DigestUtils;
11
import org.apache.commons.lang3.StringUtils;
12
import org.apache.commons.logging.Log;
13
import org.apache.commons.logging.LogFactory;
14

    
15
import eu.dnetlib.data.mdstore.plugins.objects.Project;
16

    
17
/**
18
 * Created by sandro on 12/6/16.
19
 */
20
public class XsltFunctions {
21

    
22
	private static final Log log = LogFactory.getLog(XsltFunctions.class);
23

    
24
	public static String cleanNames(final String s) {
25
		if (StringUtils.isBlank(s)) { return ""; }
26
		return cleanNames(s, StringUtils.countMatches(s, ",") > 1 ? "," : ";");
27
	}
28

    
29
	private static String cleanNames(final String s, final String sep) {
30
		return Arrays.stream(s.split(sep))
31
				.map(String::trim)
32
				.map(XsltFunctions::clean)
33
				.map(XsltFunctions::capitalize)
34
				.filter(StringUtils::isNotBlank)
35
				.collect(Collectors.joining("#"));
36
	}
37

    
38
	private static String clean(final String s) {
39
		return s.replaceAll("\\(.*\\)", "")
40
				.replaceAll("\\[.*\\]", "")
41
				.replaceAll("(?i)^et\\.? al(\\.|\\s)*$", "")
42
				.replaceAll("(?i)\\s*et\\.? al(\\.|\\s)*$", "")
43
				.replaceAll("\\d|\\*", "")
44
				.replaceAll("^(\\s|\\-|\\.)+", "")
45
				.replaceAll("\\,", "")
46
				.replaceAll("\\.", ". ")
47
				.trim();
48
	}
49

    
50
	public static String cleanDoi(final String doi) {
51
		final String x = doi.replaceAll("\\?", "")
52
				.replaceAll("\\s*\\/\\s*", "/")
53
				.replaceAll("\\s*\\-\\s*", "-")
54
				.trim()
55
				.replaceAll("\\s", "_");
56

    
57
		if (!x.equals(doi)) {
58
			log.info("Cleaning doi: " + doi + " -> " + x);
59
		}
60

    
61
		return x;
62
	}
63

    
64
	public static String capitalize(final String s) {
65
		return Arrays.stream(s.split(" "))
66
				.map(String::toLowerCase)
67
				.map(StringUtils::capitalize)
68
				.map(XsltFunctions::fixApostrophes)
69
				.collect(Collectors.joining(" "));
70
	}
71

    
72
	public static String fixApostrophes(final String s) {
73
		return s.replaceAll("'a", "'A").replaceAll("'e", "'E").replaceAll("'i", "'I").replaceAll("'o", "'O").replaceAll("'u", "'U")
74
				.replaceAll("a'", "à").replaceAll("e'", "è").replaceAll("i'", "ì").replaceAll("o'", "ò").replaceAll("u'", "ù");
75
	}
76

    
77
	public static boolean isValidProject(final String id) {
78
		return Project.isValid(id);
79
	}
80

    
81
	public static String calculatePersonName(final String s) {
82
		final Pattern pattern = Pattern.compile("info:cnr-pdr\\/author\\/(.+):(.+)\\/(.+)\\/(.+)");
83
		final Matcher matcher = pattern.matcher(s);
84
		return matcher.find() ? capitalize(String.format("%s %s", matcher.group(4), matcher.group(3))) : "";
85
	}
86

    
87
	public static String serverName(final String s) {
88
		try {
89
			return new URL(s).getHost();
90
		} catch (final MalformedURLException e) {
91
			return "Unknown";
92
		}
93
	}
94

    
95
	public static String md5(final String s) {
96
		return DigestUtils.md5Hex(s);
97
	}
98

    
99
	// <xsl:for-each select="tokenize(istiFunction:cleanName(.), ';')">
100
	// <xsl:choose>
101
	// <xsl:when test="matches(normalize-space(.), ',(\s*[a-zA-Z]\.)+$')">
102
	// <creator>
103
	// <creatorName>
104
	// <xsl:value-of select="normalize-space(translate(.,',',' '))"/>
105
	// </creatorName>
106
	// </creator>
107
	// </xsl:when>
108
	//
109
	// <xsl:when test="matches(normalize-space(.), '(^[a-zA-Z\.]+,\s?[a-zA-Z\.\s]+$)|(^[a-zA-Z\.\s]+,\s?[a-zA-Z\.]+$)')">
110
	// <creator>
111
	// <creatorName>
112
	// <xsl:value-of select="normalize-space(translate(.,',',' '))"/>
113
	// </creatorName>
114
	// </creator>
115
	// </xsl:when>
116
	//
117
	// <xsl:otherwise>
118
	// <xsl:for-each select="tokenize(., ',')">
119
	// <creator>
120
	// <creatorName>
121
	// <xsl:value-of select="normalize-space(.)"/>
122
	// </creatorName>
123
	// </creator>
124
	// </xsl:for-each>
125
	// </xsl:otherwise>
126
	// </xsl:choose>
127
	// </xsl:for-each>
128
	//
129

    
130
}
(5-5/5)