Project

General

Profile

« Previous | Next » 

Revision 43424

More Methods added

View differences:

modules/dnet-openaire-lodinterlinking/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/utils/FrequencyCounter.java
42 42
            List filenames = getFiles(fs, path);
43 43
            Map<Integer, Integer> frequencyMap = getFrequencyMap(fs, filenames);
44 44
            Map<Integer, Integer> sortedMap = sortMapByValue(frequencyMap, ORDERING_TYPE.ASC);
45
            Map<BigInteger,Double> statistics = getStatistics(sortedMap);
46
            
45 47
            System.out.println("Sorted Map is " + sortedMap.entrySet());
46 48
            writeMap(fs, sortedMap, outputPath);
47 49
            fs.close();
......
66 68
        return fileNames;
67 69
    }
68 70

  
69
    private static Map getFrequencyMap(FileSystem fs, List<String> fileNames) throws Exception {
71
    private static Map<Integer,Integer> getFrequencyMap(FileSystem fs, List<String> fileNames) throws Exception {
70 72
        String line = null;
71 73
        ;
72 74
        try {
......
101 103

  
102 104
    }
103 105

  
106
    private static Map<BigInteger,Double> getStatistics(Map<Integer,Integer> sortedMap){
107
    	Map<BigInteger,Double> statistics= new HashMap<BigInteger,Double>();
108
    	double CC = 0d;
109
		CC = 0d;
110
		int lastBlockSize = 2;
111
		int f;
112
		BigInteger totalSizeOfBlocks = BigInteger.ZERO;
113
		BigInteger numberOfComparisons = BigInteger.ZERO;
114
		
115
		Set<Integer> keys = sortedMap.keySet();
116
		for(Integer key : keys){
117
			BigInteger blockSize = BigInteger.valueOf(key.intValue());
118
			f = sortedMap.get(key);
119
			totalSizeOfBlocks = totalSizeOfBlocks.add(BigInteger.valueOf(f).multiply(blockSize));
120
			numberOfComparisons = numberOfComparisons.add(BigInteger.valueOf(f).multiply(blockSize.multiply(blockSize.subtract(BigInteger.ONE)).shiftLeft(1)));
121
			CC = totalSizeOfBlocks.doubleValue()/numberOfComparisons.doubleValue();
122
			statistics.put(blockSize, CC);
123
		}
124
    	
125
    	
126
    	return  statistics;
127
    }
128
    
129
    
130
    private static int optimalBlockSize(Map<BigInteger,Double> statistics){
131
    	int optimalBlockSize = statistics.;
132
    	
133
    	for(int i = statistics.size() -1; i >= 1; i--){
134
			if(Math.abs(statistics.get(i)._2 - statistics.get(i-1)._2) < eps){
135
				eps = Math.abs(statistics.get(i)._2 - statistics.get(i-1)._2);
136
				optimalBlockSize = statistics.get(i)._1;
137
			}
138
		}
104 139

  
105
    public static Map sortMapByValue(Map map, ORDERING_TYPE ordering) {
106
        Ordering valueComparator;
140
    	
141
    	
142
    	return optimalBlockSize;
143
    }
144

  
145
    public static Map<Integer,Integer> sortMapByValue(Map map, ORDERING_TYPE ordering) {
146
        Ordering<Integer> valueComparator;
107 147
        if (ordering == ORDERING_TYPE.DESC) {
108 148
            valueComparator = Ordering.natural().onResultOf(Functions.forMap(map)).compound(Ordering.natural()).reverse();
109 149
        } else {

Also available in: Unified diff