Project

General

Profile

1 43053 giorgos.al
package eu.dnetlib.data.mapreduce.hbase.lodExport.utils;
2
3
import java.net.URLDecoder;
4
import java.util.ArrayList;
5
import java.util.Set;
6
7 43143 giorgos.al
import com.google.common.collect.Multimap;
8
9 43053 giorgos.al
public class Blocking {
10
11
12
	public static ArrayList<String> tokenBlocking(Multimap<String, String >fields){
13
		ArrayList<String> blockingKeys = new ArrayList<>();
14
15
		Set<String> keys = fields.keySet();
16
		for(String key : keys){
17
18
			if(key.equals("id")) continue;
19
20 43055 eri.katsar
			for(String value : fields.get(key) ){
21 43053 giorgos.al
				try{
22
                    value = URLDecoder.decode(value, "UTF-8").replaceAll("_", " ");
23
                } catch (Exception ex) {
24
                    value = value.replaceAll("_", " ");
25
                }
26
				String[] tokens = value.split("[\\W_]");
27 43055 eri.katsar
28
					for (int i = 0; i < tokens.length; i++) {
29 43053 giorgos.al
                   String currentToken = tokens[i].trim();
30
                   if (0 < currentToken.length()) {
31
                      blockingKeys.add(currentToken);
32
                   }
33
				}
34
			}
35
		}
36
37
		return blockingKeys;
38
	}
39
40
}