Revision 43414
Added by Eri Katsari almost 8 years ago
FrequencyCounter.java | ||
---|---|---|
3 | 3 |
import com.google.common.base.Functions; |
4 | 4 |
import com.google.common.collect.ImmutableSortedMap; |
5 | 5 |
import com.google.common.collect.Ordering; |
6 |
|
|
7 |
|
|
8 | 6 |
import org.apache.hadoop.conf.Configuration; |
9 | 7 |
import org.apache.hadoop.fs.FileSystem; |
10 | 8 |
import org.apache.hadoop.fs.LocatedFileStatus; |
... | ... | |
15 | 13 |
import java.io.*; |
16 | 14 |
import java.math.BigInteger; |
17 | 15 |
import java.util.*; |
16 |
import java.io.BufferedReader; |
|
17 |
import java.io.BufferedWriter; |
|
18 |
import java.io.IOException; |
|
19 |
import java.io.InputStreamReader; |
|
20 |
import java.io.OutputStreamWriter; |
|
21 |
import java.util.ArrayList; |
|
22 |
import java.util.HashMap; |
|
23 |
import java.util.List; |
|
24 |
import java.util.Map; |
|
18 | 25 |
|
19 | 26 |
|
20 | 27 |
public class FrequencyCounter { |
21 | 28 |
|
29 |
private enum ORDERING_TYPE { |
|
30 |
ASC, DESC |
|
31 |
} |
|
32 |
|
|
22 | 33 |
public static void main(String[] args) throws Exception { |
23 | 34 |
String inputPath = "/tmp/lod_blocks/stats"; |
24 | 35 |
String outputPath = "/tmp/lod_blocks/stats/frequencyMap"; |
... | ... | |
29 | 40 |
FileSystem fs = FileSystem.get(new Configuration()); |
30 | 41 |
Path path = new Path(inputPath); |
31 | 42 |
List filenames = getFiles(fs, path); |
32 |
Map<String,Integer> frequencyMap = getFrequencyMap(fs, filenames);
|
|
33 |
Map<String,Integer> sortedMap = sortMapByValue(frequencyMap);
|
|
43 |
Map frequencyMap = getFrequencyMap(fs, filenames); |
|
44 |
Map sortedMap = sortMapByValue(frequencyMap, ORDERING_TYPE.DESC);
|
|
34 | 45 |
writeMap(fs, sortedMap, outputPath); |
35 | 46 |
fs.close(); |
36 | 47 |
} catch (Exception e) { |
... | ... | |
54 | 65 |
return fileNames; |
55 | 66 |
} |
56 | 67 |
|
57 |
|
|
58 |
private static Map getStatistics(Map<String,Integer> sortedMap){ |
|
59 |
Map<Integer,Integer> statistics= new HashMap<Integer,Integer>(); |
|
60 |
double CC = 0d; |
|
61 |
CC = 0d; |
|
62 |
int lastBlockSize = 2; |
|
63 |
int f; |
|
64 |
BigInteger totalSizeOfBlocks = BigInteger.ZERO; |
|
65 |
BigInteger numberOfComparisons = BigInteger.ZERO; |
|
66 |
|
|
67 |
Set<String> keys = sortedMap.keySet(); |
|
68 |
for(String key : keys){ |
|
69 |
BigInteger blockSize = new BigInteger(key); |
|
70 |
f = sortedMap.get(key); |
|
71 |
totalSizeOfBlocks = totalSizeOfBlocks.add(blockSize); |
|
72 |
numberOfComparisons = numberOfComparisons.add(blockSize.multiply(blockSize.subtract(BigInteger.ONE)).shiftLeft(1)); |
|
73 |
CC = totalSizeOfBlocks.doubleValue()/numberOfComparisons.doubleValue(); |
|
74 |
} |
|
75 |
|
|
76 |
|
|
77 |
return statistics; |
|
78 |
} |
|
79 |
|
|
80 |
private static Map<String,Integer> getFrequencyMap(FileSystem fs, List<String> fileNames) throws Exception { |
|
68 |
private static Map getFrequencyMap(FileSystem fs, List<String> fileNames) throws Exception { |
|
81 | 69 |
String line = null; |
82 | 70 |
; |
83 | 71 |
try { |
... | ... | |
95 | 83 |
|
96 | 84 |
if (frequencyMap.containsKey(split[1])) { |
97 | 85 |
frequencyMap.put(split[1], frequencyMap.get(split[1]) + 1); |
86 |
|
|
98 | 87 |
} else { |
99 | 88 |
frequencyMap.put(split[1], 1); |
100 | 89 |
|
... | ... | |
112 | 101 |
} |
113 | 102 |
|
114 | 103 |
|
115 |
public static <K, V extends Comparable<? super V>> Map<K, V> |
|
116 |
sortMapByValue(Map<K, V> map) { |
|
117 |
List<Map.Entry<K, V>> list = |
|
118 |
new LinkedList<Map.Entry<K, V>>(map.entrySet()); |
|
119 |
Collections.sort(list, new Comparator<Map.Entry<K, V>>() { |
|
120 |
public int compare(Map.Entry<K, V> o1, Map.Entry<K, V> o2) { |
|
121 |
return (o1.getValue()).compareTo(o2.getValue()); |
|
122 |
} |
|
123 |
}); |
|
104 |
public static Map sortMapByValue(Map map, ORDERING_TYPE ordering) { |
|
105 |
Ordering valueComparator; |
|
106 |
if (ordering == ORDERING_TYPE.DESC) { |
|
107 |
valueComparator = Ordering.natural().onResultOf(Functions.forMap(map)).compound(Ordering.natural()).reverse(); |
|
108 |
} else { |
|
109 |
valueComparator = Ordering.natural().onResultOf(Functions.forMap(map)).compound(Ordering.natural()); |
|
110 |
} |
|
111 |
return ImmutableSortedMap.copyOf(map, valueComparator); |
|
124 | 112 |
|
125 |
Map<K, V> result = new LinkedHashMap<K, V>(); |
|
126 |
for (Map.Entry<K, V> entry : list) { |
|
127 |
result.put(entry.getKey(), entry.getValue()); |
|
128 |
} |
|
129 |
System.out.println("sorted" + result.entrySet()); |
|
130 |
return result; |
|
113 |
|
|
131 | 114 |
} |
132 | 115 |
|
133 | 116 |
private static void writeMap(FileSystem fs, Map<String, Integer> map, String output) throws IOException { |
... | ... | |
136 | 119 |
// TO append data to a file, use fs.append(Path f) |
137 | 120 |
|
138 | 121 |
for (Map.Entry<String, Integer> entry : map.entrySet()) { |
139 |
br.write(entry.getKey() + ":" + entry.getValue() + ",");
|
|
122 |
br.append(entry.getKey() + ":" + entry.getValue() + ",");
|
|
140 | 123 |
} |
141 | 124 |
|
142 | 125 |
br.close(); |
Also available in: Unified diff
fixed ordering