Project

General

Profile

« Previous | Next » 

Revision 53194

added URL clustering

View differences:

UrlClustering.java
1 1
package eu.dnetlib.pace.clustering;
2 2

  
3
import com.google.common.collect.Sets;
3
import eu.dnetlib.pace.common.AbstractPaceFunctions;
4
import eu.dnetlib.pace.model.Field;
4 5

  
5 6
import java.net.MalformedURLException;
6 7
import java.net.URL;
7 8
import java.util.Collection;
9
import java.util.HashSet;
10
import java.util.List;
8 11
import java.util.Map;
12
import java.util.stream.Collectors;
9 13

  
10
public class UrlClustering extends AbstractClusteringFunction {
14
public class UrlClustering extends AbstractPaceFunctions implements ClusteringFunction {
11 15

  
12
    public UrlClustering(Map<String, Integer> params) {
13
        super(params);
16
    protected Map<String, Integer> params;
17

  
18
    public UrlClustering(final Map<String, Integer> params) {
19
        this.params = params;
14 20
    }
15 21

  
16 22
    @Override
17
    protected Collection<String> doApply(String s) {
18
        return Sets.newHashSet(asUrl(s).getHost());
23
    public Collection<String> apply(List<Field> fields) {
24
        return fields.stream()
25
                .filter(f -> !f.isEmpty())
26
                .map(Field::stringValue)
27
                .map(this::asUrl)
28
                .map(URL::getHost)
29
                .collect(Collectors.toCollection(HashSet::new));
19 30
    }
20 31

  
32
    @Override
33
    public Map<String, Integer> getParams() {
34
        return null;
35
    }
36

  
21 37
    private URL asUrl(final String value) {
22 38
        try {
23 39
            return new URL(value);

Also available in: Unified diff