Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.lodExport.utils;
2

    
3
import java.net.URLDecoder;
4
import java.util.ArrayList;
5
import java.util.Set;
6

    
7
import com.google.common.collect.Multimap;
8

    
9
public class Blocking {
10

    
11
	
12
	public static ArrayList<String> tokenBlocking(Multimap<String, String >fields){
13
		ArrayList<String> blockingKeys = new ArrayList<>();
14
		
15
		Set<String> keys = fields.keySet();
16
		for(String key : keys){
17
			
18
			if(key.equals("id")) continue;
19
			
20
			for(String value : fields.get(key) ){
21
				try{
22
                    value = URLDecoder.decode(value, "UTF-8").replaceAll("_", " ");
23
                } catch (Exception ex) {
24
                    value = value.replaceAll("_", " ");
25
                }
26
				String[] tokens = value.split("[\\W_]");
27

    
28
					for (int i = 0; i < tokens.length; i++) {
29
                   String currentToken = tokens[i].trim();
30
                   if (0 < currentToken.length()) {
31
                      blockingKeys.add(currentToken);
32
                   }
33
				}
34
			}
35
		}
36
		
37
		return blockingKeys;
38
	}
39
	
40
}
(1-1/3)