Project

General

Profile

1 26600 sandro.lab
package eu.dnetlib.pace.clustering;
2
3
import java.util.Collection;
4
import java.util.Map;
5
import java.util.Set;
6
import java.util.StringTokenizer;
7
8
import com.google.common.collect.Sets;
9
10 28483 claudio.at
/**
11
 * The Class Acronyms.
12
 */
13 26600 sandro.lab
public class Acronyms extends AbstractClusteringFunction {
14
15 28483 claudio.at
	/**
16
	 * Instantiates a new acronyms.
17
	 *
18
	 * @param params
19
	 *            the params
20
	 */
21
	public Acronyms(final Map<String, Integer> params) {
22 26600 sandro.lab
		super(params);
23
	}
24
25 28483 claudio.at
	/*
26
	 * (non-Javadoc)
27
	 *
28
	 * @see eu.dnetlib.pace.clustering.AbstractClusteringFunction#doApply(java.lang.String)
29
	 */
30 26600 sandro.lab
	@Override
31 28483 claudio.at
	protected Collection<String> doApply(final String s) {
32 26600 sandro.lab
		return extractAcronyms(s, param("max"), param("minLen"), param("maxLen"));
33
	}
34 28483 claudio.at
35
	/**
36
	 * Extract acronyms.
37
	 *
38
	 * @param s
39
	 *            the s
40
	 * @param maxAcronyms
41
	 *            the max acronyms
42
	 * @param minLen
43
	 *            the min len
44
	 * @param maxLen
45
	 *            the max len
46
	 * @return the sets the
47
	 */
48
	private Set<String> extractAcronyms(final String s, final int maxAcronyms, final int minLen, final int maxLen) {
49
50 26600 sandro.lab
		final Set<String> acronyms = Sets.newLinkedHashSet();
51 28483 claudio.at
52 26600 sandro.lab
		for (int i = 0; i < maxAcronyms; i++) {
53 28483 claudio.at
54 26600 sandro.lab
			final StringTokenizer st = new StringTokenizer(s);
55
			final StringBuilder sb = new StringBuilder();
56 28483 claudio.at
57 26600 sandro.lab
			while (st.hasMoreTokens()) {
58
				final String token = st.nextToken();
59
				if (sb.length() > maxLen) {
60
					break;
61
				}
62 28483 claudio.at
				if ((token.length() > 1) && (i < token.length())) {
63 26600 sandro.lab
					sb.append(token.charAt(i));
64
				}
65
			}
66
			String acronym = sb.toString();
67
			if (acronym.length() > minLen) {
68
				acronyms.add(acronym);
69
			}
70
		}
71
		return acronyms;
72
	}
73
74
}