Project

General

Profile

1
package eu.dnetlib.data.utils;
2

    
3
import java.net.MalformedURLException;
4
import java.net.URL;
5
import java.util.Arrays;
6
import java.util.regex.Matcher;
7
import java.util.regex.Pattern;
8
import java.util.stream.Collectors;
9

    
10
import org.apache.commons.codec.digest.DigestUtils;
11
import org.apache.commons.lang3.StringUtils;
12
import org.apache.commons.logging.Log;
13
import org.apache.commons.logging.LogFactory;
14

    
15
import eu.dnetlib.data.mdstore.plugins.objects.Project;
16

    
17
/**
18
 * Created by sandro on 12/6/16.
19
 */
20
public class XsltFunctions {
21

    
22
	private static final Log log = LogFactory.getLog(XsltFunctions.class);
23

    
24
	public static String cleanNames(final String s) {
25
		if (StringUtils.isBlank(s)) { return ""; }
26
		return cleanNames(s, StringUtils.countMatches(s, ",") > 1 ? "," : ";");
27
	}
28

    
29
	public static String extractYear(final String s) {
30
		if (StringUtils.isBlank(s)) { return ""; }
31
		final Matcher m = Pattern.compile("^.*(\\d{4}).*$").matcher(s);
32
		return m.matches() ? m.group(1) : "";
33
	}
34

    
35
	private static String cleanNames(final String s, final String sep) {
36
		return Arrays.stream(s.split(sep))
37
				.map(String::trim)
38
				.map(XsltFunctions::clean)
39
				.map(XsltFunctions::capitalize)
40
				.filter(StringUtils::isNotBlank)
41
				.collect(Collectors.joining("#"));
42
	}
43

    
44
	private static String clean(final String s) {
45
		return s.replaceAll("\\(.*\\)", "")
46
				.replaceAll("\\[.*\\]", "")
47
				.replaceAll("(?i)^et\\.? al(\\.|\\s)*$", "")
48
				.replaceAll("(?i)\\s*et\\.? al(\\.|\\s)*$", "")
49
				.replaceAll("\\d|\\*", "")
50
				.replaceAll("^(\\s|\\-|\\.)+", "")
51
				.replaceAll("\\,", "")
52
				.replaceAll("\\.", ". ")
53
				.trim();
54
	}
55

    
56
	public static String cleanDoi(final String doi) {
57
		if (StringUtils.isBlank(doi)) { return ""; }
58

    
59
		final String x = doi.replaceAll("\\?", "")
60
				.replaceAll("\\s*\\/\\s*", "/")
61
				.replaceAll("\\s*\\-\\s*", "-")
62
				.trim()
63
				.replaceAll("\\s", "_")
64
				.toLowerCase();
65

    
66
		if (!x.equals(doi)) {
67
			log.info("Cleaning doi: " + doi + " -> " + x);
68
		}
69

    
70
		return x;
71
	}
72

    
73
	public static String capitalize(final String s) {
74
		return Arrays.stream(fixApostrophes(s.toLowerCase()).split(" "))
75
				.map(p -> Arrays.stream(p.split("-"))
76
						.map(StringUtils::capitalize)
77
						.collect(Collectors.joining("-")))
78
				.map(StringUtils::capitalize)
79
				.collect(Collectors.joining(" "));
80
	}
81

    
82
	public static String fixApostrophes(final String s) {
83
		return s.replaceAll("'a", "'A").replaceAll("'e", "'E").replaceAll("'i", "'I").replaceAll("'o", "'O").replaceAll("'u", "'U")
84
				.replaceAll("a'", "à").replaceAll("e'", "è").replaceAll("i'", "ì").replaceAll("o'", "ò").replaceAll("u'", "ù");
85
	}
86

    
87
	public static boolean isValidProject(final String id) {
88
		return Project.isValid(id);
89
	}
90

    
91
	public static String projectLongId(final String funder,
92
			final String program,
93
			final String code,
94
			final String jurisdiction,
95
			final String name,
96
			final String acronym) {
97

    
98
		return String.format("info:eu-repo/grantAgreement/%s/%s/%s/%s/%s/%s",
99
				funder.replaceAll("/", "%2F"),
100
				program.replaceAll("/", "%2F"),
101
				code.replaceAll("/", "%2F"),
102
				jurisdiction.replaceAll("/", "%2F"),
103
				name.replaceAll("/", "%2F"),
104
				acronym.replaceAll("/", "%2F"));
105

    
106
	}
107

    
108
	public static String calculatePersonName(final String s) {
109
		final Pattern pattern = Pattern.compile("info:cnr-pdr\\/author\\/(.+):(.+)\\/(.+)\\/(.+)");
110
		final Matcher matcher = pattern.matcher(s);
111
		return matcher.find() ? capitalize(String.format("%s, %s", matcher.group(3), matcher.group(4))) : "";
112
	}
113

    
114
	public static String serverName(final String s) {
115
		try {
116
			return new URL(s).getHost();
117
		} catch (final MalformedURLException e) {
118
			return "Unknown";
119
		}
120
	}
121

    
122
	public static String md5(final String s) {
123
		return DigestUtils.md5Hex(s);
124
	}
125

    
126
	// <xsl:for-each select="tokenize(istiFunction:cleanName(.), ';')">
127
	// <xsl:choose>
128
	// <xsl:when test="matches(normalize-space(.), ',(\s*[a-zA-Z]\.)+$')">
129
	// <creator>
130
	// <creatorName>
131
	// <xsl:value-of select="normalize-space(translate(.,',',' '))"/>
132
	// </creatorName>
133
	// </creator>
134
	// </xsl:when>
135
	//
136
	// <xsl:when test="matches(normalize-space(.), '(^[a-zA-Z\.]+,\s?[a-zA-Z\.\s]+$)|(^[a-zA-Z\.\s]+,\s?[a-zA-Z\.]+$)')">
137
	// <creator>
138
	// <creatorName>
139
	// <xsl:value-of select="normalize-space(translate(.,',',' '))"/>
140
	// </creatorName>
141
	// </creator>
142
	// </xsl:when>
143
	//
144
	// <xsl:otherwise>
145
	// <xsl:for-each select="tokenize(., ',')">
146
	// <creator>
147
	// <creatorName>
148
	// <xsl:value-of select="normalize-space(.)"/>
149
	// </creatorName>
150
	// </creator>
151
	// </xsl:for-each>
152
	// </xsl:otherwise>
153
	// </xsl:choose>
154
	// </xsl:for-each>
155
	//
156

    
157
}
(5-5/5)