Project

General

Profile

1
package eu.dnetlib.pace.clustering;
2

    
3
import java.util.Collection;
4
import java.util.Map;
5
import java.util.Set;
6

    
7
import com.google.common.collect.Sets;
8

    
9
public class SuffixPrefix extends AbstractClusteringFunction {
10

    
11
	public SuffixPrefix(Map<String, Integer> params) {
12
		super(params);
13
	}
14

    
15
	@Override
16
	protected Collection<String> doApply(String s) {
17
		return suffixPrefix(s, param("len"), param("max"));
18
	}
19
	
20
	private Collection<String> suffixPrefix(String s, int len, int max) {
21
		final Set<String> bigrams = Sets.newLinkedHashSet();
22
		int i = 0;
23
		while (++i < s.length() && bigrams.size() < max) {
24
			int j = s.indexOf(" ", i);
25

    
26
			int offset = j + len + 1 < s.length() ? j + len + 1 : s.length();
27

    
28
			if (j - len > 0) {
29
				String bigram = s.substring(j - len, offset).replaceAll(" ", "").trim();
30
				if (bigram.length() >= 4) {
31
					bigrams.add(bigram);
32
				}
33
			}
34
		}
35
		return bigrams;
36
	}
37

    
38
}
(18-18/19)