1
|
package eu.dnetlib.pace.clustering;
|
2
|
|
3
|
import java.util.Collection;
|
4
|
import java.util.List;
|
5
|
import java.util.Map;
|
6
|
|
7
|
import com.google.common.collect.Lists;
|
8
|
|
9
|
public class NgramPairs extends Ngrams {
|
10
|
|
11
|
public NgramPairs(Map<String, Integer> params) {
|
12
|
super(params);
|
13
|
}
|
14
|
|
15
|
@Override
|
16
|
protected Collection<String> doApply(String s) {
|
17
|
return ngramPairs(Lists.newArrayList(getNgrams(s, param("ngramLen"), param("max") * 2, 1, 2)), param("max"));
|
18
|
}
|
19
|
|
20
|
protected Collection<String> ngramPairs(final List<String> ngrams, int maxNgrams) {
|
21
|
Collection<String> res = Lists.newArrayList();
|
22
|
int j = 0;
|
23
|
for (int i = 0; i < ngrams.size() && res.size() < maxNgrams; i++) {
|
24
|
if (++j >= ngrams.size()) {
|
25
|
break;
|
26
|
}
|
27
|
res.add(ngrams.get(i) + ngrams.get(j));
|
28
|
//System.out.println("-- " + concatNgrams);
|
29
|
}
|
30
|
return res;
|
31
|
}
|
32
|
|
33
|
}
|