Revision 28483
Added by Claudio Atzori about 10 years ago
NgramPairs.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.clustering; |
|
2 |
|
|
3 |
import java.util.Collection; |
|
4 |
import java.util.List; |
|
5 |
import java.util.Map; |
|
6 |
|
|
7 |
import com.google.common.collect.Lists; |
|
8 |
|
|
9 |
/** |
|
10 |
* The Class NgramPairs. |
|
11 |
*/ |
|
12 |
public class NgramPairs extends Ngrams { |
|
13 |
|
|
14 |
/** |
|
15 |
* Instantiates a new ngram pairs. |
|
16 |
* |
|
17 |
* @param params |
|
18 |
* the params |
|
19 |
*/ |
|
20 |
public NgramPairs(final Map<String, Integer> params) { |
|
21 |
super(params); |
|
22 |
} |
|
23 |
|
|
24 |
/* |
|
25 |
* (non-Javadoc) |
|
26 |
* |
|
27 |
* @see eu.dnetlib.pace.clustering.Ngrams#doApply(java.lang.String) |
|
28 |
*/ |
|
29 |
@Override |
|
30 |
protected Collection<String> doApply(final String s) { |
|
31 |
return ngramPairs(Lists.newArrayList(getNgrams(s, param("ngramLen"), param("max") * 2, 1, 2)), param("max")); |
|
32 |
} |
|
33 |
|
|
34 |
/** |
|
35 |
* Ngram pairs. |
|
36 |
* |
|
37 |
* @param ngrams |
|
38 |
* the ngrams |
|
39 |
* @param maxNgrams |
|
40 |
* the max ngrams |
|
41 |
* @return the collection |
|
42 |
*/ |
|
43 |
private Collection<String> ngramPairs(final List<String> ngrams, final int maxNgrams) { |
|
44 |
Collection<String> res = Lists.newArrayList(); |
|
45 |
int j = 0; |
|
46 |
for (int i = 0; (i < ngrams.size()) && (res.size() < maxNgrams); i++) { |
|
47 |
if (++j >= ngrams.size()) { |
|
48 |
break; |
|
49 |
} |
|
50 |
res.add(ngrams.get(i) + ngrams.get(j)); |
|
51 |
// System.out.println("-- " + concatNgrams); |
|
52 |
} |
|
53 |
return res; |
|
54 |
} |
|
55 |
|
|
56 |
} |
|
0 | 57 |
Also available in: Unified diff
branch 1.2