Project

General

Profile

1
package eu.dnetlib.pace.clustering;
2

    
3
import java.util.Collection;
4
import java.util.List;
5
import java.util.Map;
6

    
7
import com.google.common.collect.Lists;
8

    
9
public class NgramPairs extends Ngrams {
10

    
11
	public NgramPairs(Map<String, Integer> params) {
12
		super(params);
13
	}
14
	
15
	@Override
16
	protected Collection<String> doApply(String s) {
17
		return ngramPairs(Lists.newArrayList(getNgrams(s, param("ngramLen"), param("max") * 2, 1, 2)), param("max"));
18
	}
19

    
20
	protected Collection<String> ngramPairs(final List<String> ngrams, int maxNgrams) {
21
		Collection<String> res = Lists.newArrayList();
22
		int j = 0;
23
		for (int i = 0; i < ngrams.size() && res.size() < maxNgrams; i++) {
24
			if (++j >= ngrams.size()) {
25
				break;
26
			}
27
			res.add(ngrams.get(i) + ngrams.get(j));
28
			//System.out.println("-- " + concatNgrams);
29
		}
30
		return res;
31
	}
32

    
33
}
(11-11/19)