Project

General

Profile

« Previous | Next » 

Revision 43436

errors and performance at optimal size fixed

View differences:

modules/dnet-openaire-lodinterlinking/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/utils/FrequencyCounter.java
6 6
import com.lambdaworks.redis.RedisClient;
7 7
import com.lambdaworks.redis.RedisConnection;
8 8
import com.lambdaworks.redis.RedisStringsConnection;
9

  
10
import algorithms.edjoin.Entry;
11

  
9 12
import org.apache.hadoop.conf.Configuration;
10 13
import org.apache.hadoop.fs.FileSystem;
11 14
import org.apache.hadoop.fs.LocatedFileStatus;
......
141 144

  
142 145
    private static int getOptimalBlockSize(TreeMap<BigInteger, Double> statistics) {
143 146
        int optimalBlockSize = Integer.valueOf(statistics.lastEntry().getKey().intValue());
144
        double eps = 1d;
145
        NavigableSet<BigInteger> keys = statistics.descendingKeySet();
146
        BigInteger lastKey = keys.last();
147
        for (BigInteger key : keys) {
148
        	if (lastKey.equals(key)) break;
149
        	double diff = Math.abs(statistics.get(key) - statistics.get(key.subtract(BigInteger.ONE)));
150
            if (diff < eps) {
151
                eps = diff;
152
                optimalBlockSize = Integer.valueOf(key.intValue());
153
            }
147
        double eps = 1d;                
148
        NavigableMap<BigInteger, Double> statisticsRev = statistics.descendingMap();
149
        
150
        Iterator<Map.Entry<BigInteger,Double>> statisticsRevIt =  statisticsRev.entrySet().iterator();
151
      
152
        if (statisticsRevIt.hasNext()){
153
        	Map.Entry<BigInteger,Double> nextEntry = statisticsRevIt.next();        
154
	        while (statisticsRevIt.hasNext()){
155
	        	Map.Entry<BigInteger,Double> entry = nextEntry;	        	
156
	        	double diff = Math.abs(entry.getValue() - nextEntry.getValue());
157
	            if (diff < eps) {
158
	                eps = diff;
159
	                optimalBlockSize = Integer.valueOf(entry.getKey().intValue());
160
	            }
161
	        }
154 162
        }
155

  
156 163
        return optimalBlockSize;
157 164
    }
158 165

  

Also available in: Unified diff