Revision 48226
Added by Eri Katsari almost 7 years ago
DatasetComparator.java | ||
---|---|---|
26 | 26 |
|
27 | 27 |
String groundTruthPath = args[0]; |
28 | 28 |
String linkageOutputPath = args[1]; |
29 |
System.out.println("groundTruthPath path is " + linkageOutputPath);
|
|
29 |
System.out.println("groundTruthPath path is " + groundTruthPath);
|
|
30 | 30 |
System.out.println("linkage output path is " + linkageOutputPath); |
31 | 31 |
|
32 | 32 |
int trueMatches = 0; |
... | ... | |
38 | 38 |
List linkageFiles = getFiles(fs, new Path(linkageOutputPath)); |
39 | 39 |
Map linkageMap = getLinkageRecordMap(fs, linkageFiles); |
40 | 40 |
List groundTruthFiles = getFiles(fs, new Path(groundTruthPath)); |
41 |
Map groundTruthMap = getGroundRecordMap(fs, groundTruthFiles); |
|
41 |
// Map groundTruthMap = getGroundRecordMap(fs, groundTruthFiles);
|
|
42 | 42 |
|
43 |
System.out.println("ground truth map size" + groundTruthMap.size()); |
|
43 |
//System.out.println("ground truth map size" + groundTruthMap.size());
|
|
44 | 44 |
System.out.println("record map size" + linkageMap.size()); |
45 | 45 |
Iterator linkageIterator = linkageMap.keySet().iterator(); |
46 |
|
|
47 |
|
|
48 | 46 |
OutputStream trueMatchesOut = fs.create(new Path("trueMatches")); |
49 | 47 |
|
50 |
|
|
51 | 48 |
while (linkageIterator.hasNext()) { |
52 | 49 |
String key = (String) linkageIterator.next(); |
53 | 50 |
String[] splitKey = key.split(" "); |
54 | 51 |
String reversedKey = splitKey[1] + " " + splitKey[0]; |
55 | 52 |
|
56 |
if (!groundTruthMap.containsKey(key) && !groundTruthMap.containsKey(reversedKey)) { |
|
57 |
linkageIterator.remove(); |
|
58 |
falseMatches++; |
|
59 |
} else { |
|
53 |
//if (!groundTruthMap.containsKey(key) && !groundTruthMap.containsKey(reversedKey)) |
|
54 |
{ |
|
55 |
// linkageIterator.remove(); |
|
56 |
// falseMatches++; |
|
57 |
} |
|
58 |
//elseblo |
|
59 |
{ |
|
60 | 60 |
trueMatches++; |
61 | 61 |
trueMatchesOut.write((key + "\n").getBytes()); |
62 | 62 |
} |
Also available in: Unified diff
Changed token blocking to accept year as a token