Project

General

Profile

1
package eu.dnetlib.pace.clustering;
2

    
3
import java.util.*;
4
import java.util.function.Function;
5
import java.util.function.Predicate;
6
import java.util.stream.Collectors;
7

    
8
import com.google.common.collect.Sets;
9

    
10
import eu.dnetlib.pace.common.AbstractPaceFunctions;
11
import eu.dnetlib.pace.model.Field;
12

    
13
public abstract class AbstractClusteringFunction extends AbstractPaceFunctions implements ClusteringFunction {
14

    
15
	protected Map<String, Integer> params;
16
	
17
	public AbstractClusteringFunction(final Map<String, Integer> params) {
18
		this.params = params;
19
	}
20
	
21
	protected abstract Collection<String> doApply(String s);
22
	
23
	@Override
24
	public Collection<String> apply(List<Field> fields) {
25
		return fields.stream().filter(f -> !f.isEmpty())
26
				.map(Field::stringValue)
27
				.map(this::normalize)
28
				.map(s -> filterStopWords(s, stopwords))
29
				.map(this::doApply)
30
				.map(c -> filterBlacklisted(c, ngramBlacklist))
31
				.flatMap(c -> c.stream())
32
				.collect(Collectors.toCollection(HashSet::new));
33
	}
34

    
35
	public Map<String, Integer> getParams() {
36
		return params;
37
	}
38
	
39
	protected Integer param(String name) {
40
		return params.get(name);
41
	}
42
}
(1-1/19)