Project

General

Profile

« Previous | Next » 

Revision 48226

Added by Eri Katsari almost 7 years ago

Changed token blocking to accept year as a token

View differences:

DatasetComparator.java
26 26

  
27 27
        String groundTruthPath = args[0];
28 28
        String linkageOutputPath = args[1];
29
        System.out.println("groundTruthPath  path is " + linkageOutputPath);
29
        System.out.println("groundTruthPath  path is " + groundTruthPath);
30 30
        System.out.println("linkage output path is " + linkageOutputPath);
31 31

  
32 32
        int trueMatches = 0;
......
38 38
            List linkageFiles = getFiles(fs, new Path(linkageOutputPath));
39 39
            Map linkageMap = getLinkageRecordMap(fs, linkageFiles);
40 40
            List groundTruthFiles = getFiles(fs, new Path(groundTruthPath));
41
            Map groundTruthMap = getGroundRecordMap(fs, groundTruthFiles);
41
           // Map groundTruthMap = getGroundRecordMap(fs, groundTruthFiles);
42 42

  
43
            System.out.println("ground truth map size" + groundTruthMap.size());
43
            //System.out.println("ground truth map size" + groundTruthMap.size());
44 44
            System.out.println("record map size" + linkageMap.size());
45 45
            Iterator linkageIterator = linkageMap.keySet().iterator();
46

  
47

  
48 46
            OutputStream trueMatchesOut = fs.create(new Path("trueMatches"));
49 47

  
50

  
51 48
            while (linkageIterator.hasNext()) {
52 49
                String key = (String) linkageIterator.next();
53 50
                String[] splitKey = key.split(" ");
54 51
                String reversedKey = splitKey[1] + " " + splitKey[0];
55 52

  
56
                if (!groundTruthMap.containsKey(key) && !groundTruthMap.containsKey(reversedKey)) {
57
                    linkageIterator.remove();
58
                    falseMatches++;
59
                } else {
53
                //if (!groundTruthMap.containsKey(key) && !groundTruthMap.containsKey(reversedKey))
54
                {
55
                 //   linkageIterator.remove();
56
                   // falseMatches++;
57
                }
58
                //elseblo
59
                {
60 60
                    trueMatches++;
61 61
                    trueMatchesOut.write((key + "\n").getBytes());
62 62
                }

Also available in: Unified diff