Project

General

Profile

1
package eu.dnetlib.pace.clustering;
2

    
3
import java.util.Collection;
4
import java.util.List;
5
import java.util.Map;
6
import java.util.function.Predicate;
7

    
8
import com.google.common.collect.Sets;
9

    
10
import eu.dnetlib.pace.common.AbstractPaceFunctions;
11
import eu.dnetlib.pace.model.Field;
12

    
13
public abstract class AbstractClusteringFunction extends AbstractPaceFunctions implements ClusteringFunction {
14

    
15
	protected Map<String, Integer> params;
16
	
17
	public AbstractClusteringFunction(final Map<String, Integer> params) {
18
		this.params = params;
19
	}
20
	
21
	protected abstract Collection<String> doApply(String s);
22
	
23
	@Override
24
	public Collection<String> apply(List<Field> fields) {
25
		Collection<String> c = Sets.newLinkedHashSet();
26
		fields.stream().filter(f -> !f.isEmpty()).forEach(f -> c.addAll(filterBlacklisted(doApply(filterStopWords(normalize(f.stringValue()), stopwords)), ngramBlacklist)));
27
		return c;
28
	}
29

    
30
	public Map<String, Integer> getParams() {
31
		return params;
32
	}
33
	
34
	protected Integer param(String name) {
35
		return params.get(name);
36
	}
37
}
(1-1/19)