Project

General

Profile

« Previous | Next » 

Revision 45806

Added by Eri Katsari about 7 years ago

'updates'

View differences:

MyComparator.java
6 6
public class MyComparator {
7 7
    private static double FIELDS_SIMILARITY_THRESHOLD = 0.7;
8 8
    private static double RECORD_SIMILARITY_THRESHOLD = 0.8;
9
    private static String SEPARATOR = ",";
9
    private static final String LINE_SEPERATOR = "\t.\t";
10
    private static final String FIELD_DELIM = "\t";
10 11
    private static Map<String, String> sourceRecordMappings = new HashMap<>();
11 12

  
12 13
    static {
13 14
        //TODO remove later!!! make it configurable
14
        sourceRecordMappings.put("http://www.eurocris.org/ontologies/cerif/1.3#name",
15
                "http://www.w3.org/2000/01/rdf-schema#label");
16
        sourceRecordMappings.put("http://lod.openaire.eu/vocab/year", "http://purl.org/dc/terms/issued");
15
        sourceRecordMappings.put("<http://www.eurocris.org/ontologies/cerif/1.3#name>",
16
                "<http://www.w3.org/2000/01/rdf-schema#label>");
17
        sourceRecordMappings.put("<http://lod.openaire.eu/vocab/year>", "<http://purl.org/dc/terms/issued>");
17 18
    }
18 19

  
19 20
    public static double findMatchingPair(String source, String target) {
21
        Map<String, String> sourceRecordMap = getRecordsFiledMap(source);
22
        Map<String, String> targetRecordMap = getRecordsFiledMap(target);
20 23

  
21
        String[] sourceFields = source.split(SEPARATOR);
22
        Map<String, String> sourceFieldsMap = new HashMap<>();
23
        for (int j = 0; j < sourceFields.length; j++) {
24
            String[] split = sourceFields[j].split("\t");
25
            sourceFieldsMap.put(split[0], split[1]);
26
        }
27
        //get target fields
28
        String[] targetFields = target.split(",");
29
        Map<String, String> targetFieldsMap = new HashMap<>();
30
        for (int j = 0; j < targetFields.length; j++) {
31
            String[] split = targetFields[j].split("\t");
32
            targetFieldsMap.put(split[0], split[1]);
33
        }
34 24
        //similarity counters
35 25
        int matchedFields = 0;
36
        double totalFields = (double) targetFields.length;
26
        double totalFields = (double) sourceRecordMappings.size();
37 27
        double recordSimilarity = 0.0;
38 28

  
39
        for (Map.Entry<String, String> sourceField : sourceFieldsMap.entrySet()) {
29
        for (Map.Entry<String, String> sourceField : sourceRecordMap.entrySet()) {
40 30
            String correspondingTargetField = sourceRecordMappings.get(sourceField.getKey());
41
            String targetFieldValue = targetFieldsMap.get(correspondingTargetField);
31
            String targetFieldValue = targetRecordMap.get(correspondingTargetField);
42 32
            double fieldsSimilarity = compare(sourceField.getValue(), targetFieldValue);
43 33
            System.out.println(sourceField + "\n" + targetFieldValue + "\n : field similarity: " + fieldsSimilarity + "\n-----------------------------------------");
44 34
            recordSimilarity += fieldsSimilarity;
......
66 56
            }
67 57
        }
68 58
        //  System.out.println("Similar chars " + similarChars);
69
        return (sourceValue.length() >= targetValue.length() ? similarChars / (double) sourceValue.length() :  similarChars / (double)targetValue.length());
59
        return (sourceValue.length() >= targetValue.length() ? similarChars / (double) sourceValue.length() : similarChars / (double) targetValue.length());
70 60
    }
71 61

  
72

  
62
    private static Map<String, String> getRecordsFiledMap(String source) {
63
        String sourceRecord = source.substring(source.indexOf(FIELD_DELIM) + 1).trim();
64
        String[] sourceTriples = sourceRecord.split(LINE_SEPERATOR);
65
        Map<String, String> sourceFieldsMap = new HashMap<>();
66
        for (String sourceTriple : sourceTriples) {
67
            String[] split = sourceTriple.split(FIELD_DELIM);
68
            sourceFieldsMap.put(split[0], split[1]);
69
        }
70
        return sourceFieldsMap;
71
    }
73 72
}

Also available in: Unified diff