Project

General

Profile

1
package eu.dnetlib.pace.clustering;
2

    
3
import java.util.*;
4
import java.util.function.Function;
5
import java.util.function.Predicate;
6
import java.util.stream.Collectors;
7

    
8
import com.google.common.collect.Sets;
9

    
10
import eu.dnetlib.pace.common.AbstractPaceFunctions;
11
import eu.dnetlib.pace.model.Field;
12
import org.apache.commons.lang.StringUtils;
13

    
14
public abstract class AbstractClusteringFunction extends AbstractPaceFunctions implements ClusteringFunction {
15

    
16
	protected Map<String, Integer> params;
17
	
18
	public AbstractClusteringFunction(final Map<String, Integer> params) {
19
		this.params = params;
20
	}
21
	
22
	protected abstract Collection<String> doApply(String s);
23
	
24
	@Override
25
	public Collection<String> apply(List<Field> fields) {
26
		return fields.stream().filter(f -> !f.isEmpty())
27
				.map(Field::stringValue)
28
				.map(this::normalize)
29
				.map(s -> filterStopWords(s, stopwords))
30
				.map(this::doApply)
31
				.map(c -> filterBlacklisted(c, ngramBlacklist))
32
				.flatMap(c -> c.stream())
33
				.filter(StringUtils::isNotBlank)
34
				.collect(Collectors.toCollection(HashSet::new));
35
	}
36

    
37
	public Map<String, Integer> getParams() {
38
		return params;
39
	}
40
	
41
	protected Integer param(String name) {
42
		return params.get(name);
43
	}
44
}
(1-1/19)