1
|
package eu.dnetlib.pace.clustering;
|
2
|
|
3
|
import com.google.common.collect.Sets;
|
4
|
|
5
|
import java.net.MalformedURLException;
|
6
|
import java.net.URL;
|
7
|
import java.util.Collection;
|
8
|
import java.util.Map;
|
9
|
|
10
|
public class UrlClustering extends AbstractClusteringFunction {
|
11
|
|
12
|
public UrlClustering(Map<String, Integer> params) {
|
13
|
super(params);
|
14
|
}
|
15
|
|
16
|
@Override
|
17
|
protected Collection<String> doApply(String s) {
|
18
|
return Sets.newHashSet(asUrl(s).getHost());
|
19
|
}
|
20
|
|
21
|
private URL asUrl(final String value) {
|
22
|
try {
|
23
|
return new URL(value);
|
24
|
} catch (MalformedURLException e) {
|
25
|
// should not happen as checked by pace typing
|
26
|
throw new IllegalStateException("invalid URL: " + value);
|
27
|
}
|
28
|
}
|
29
|
|
30
|
}
|