Project

General

Profile

1
package eu.dnetlib.pace.clustering;
2

    
3
import java.util.Collection;
4
import java.util.Map;
5
import java.util.Set;
6
import java.util.StringTokenizer;
7

    
8
import com.google.common.collect.Sets;
9

    
10
public class Acronyms extends AbstractClusteringFunction {
11

    
12
	public Acronyms(Map<String, Integer> params) {
13
		super(params);
14
	}
15

    
16
	@Override
17
	protected Collection<String> doApply(String s) {
18
		return extractAcronyms(s, param("max"), param("minLen"), param("maxLen"));
19
	}
20
	
21
	private Set<String> extractAcronyms(final String s, int maxAcronyms, int minLen, int maxLen) {
22
		
23
		final Set<String> acronyms = Sets.newLinkedHashSet();
24
		
25
		for (int i = 0; i < maxAcronyms; i++) {
26
			
27
			final StringTokenizer st = new StringTokenizer(s);
28
			final StringBuilder sb = new StringBuilder();
29
			
30
			while (st.hasMoreTokens()) {
31
				final String token = st.nextToken();
32
				if (sb.length() > maxLen) {
33
					break;
34
				}
35
				if (token.length() > 1 && i < token.length()) {
36
					sb.append(token.charAt(i));
37
				}
38
			}
39
			String acronym = sb.toString();
40
			if (acronym.length() > minLen) {
41
				acronyms.add(acronym);
42
			}
43
		}
44
		return acronyms;
45
	}
46

    
47
}
(2-2/18)