Project

General

Profile

1
package eu.dnetlib.pace.clustering;
2

    
3
import java.util.Collection;
4
import java.util.Map;
5
import java.util.Set;
6
import java.util.StringTokenizer;
7

    
8
import com.google.common.collect.Sets;
9

    
10
/**
11
 * The Class Acronyms.
12
 */
13
public class Acronyms extends AbstractClusteringFunction {
14

    
15
	/**
16
	 * Instantiates a new acronyms.
17
	 * 
18
	 * @param params
19
	 *            the params
20
	 */
21
	public Acronyms(final Map<String, Integer> params) {
22
		super(params);
23
	}
24

    
25
	/*
26
	 * (non-Javadoc)
27
	 * 
28
	 * @see eu.dnetlib.pace.clustering.AbstractClusteringFunction#doApply(java.lang.String)
29
	 */
30
	@Override
31
	protected Collection<String> doApply(final String s) {
32
		return extractAcronyms(s, param("max"), param("minLen"), param("maxLen"));
33
	}
34

    
35
	/**
36
	 * Extract acronyms.
37
	 * 
38
	 * @param s
39
	 *            the s
40
	 * @param maxAcronyms
41
	 *            the max acronyms
42
	 * @param minLen
43
	 *            the min len
44
	 * @param maxLen
45
	 *            the max len
46
	 * @return the sets the
47
	 */
48
	private Set<String> extractAcronyms(final String s, final int maxAcronyms, final int minLen, final int maxLen) {
49

    
50
		final Set<String> acronyms = Sets.newLinkedHashSet();
51

    
52
		for (int i = 0; i < maxAcronyms; i++) {
53

    
54
			final StringTokenizer st = new StringTokenizer(s);
55
			final StringBuilder sb = new StringBuilder();
56

    
57
			while (st.hasMoreTokens()) {
58
				final String token = st.nextToken();
59
				if (sb.length() > maxLen) {
60
					break;
61
				}
62
				if ((token.length() > 1) && (i < token.length())) {
63
					sb.append(token.charAt(i));
64
				}
65
			}
66
			String acronym = sb.toString();
67
			if (acronym.length() > minLen) {
68
				acronyms.add(acronym);
69
			}
70
		}
71
		return acronyms;
72
	}
73

    
74
}
(2-2/12)