Revision 43436
Added by Giorgos Alexiou over 8 years ago
modules/dnet-openaire-lodinterlinking/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/utils/FrequencyCounter.java | ||
---|---|---|
6 | 6 |
import com.lambdaworks.redis.RedisClient; |
7 | 7 |
import com.lambdaworks.redis.RedisConnection; |
8 | 8 |
import com.lambdaworks.redis.RedisStringsConnection; |
9 |
|
|
10 |
import algorithms.edjoin.Entry; |
|
11 |
|
|
9 | 12 |
import org.apache.hadoop.conf.Configuration; |
10 | 13 |
import org.apache.hadoop.fs.FileSystem; |
11 | 14 |
import org.apache.hadoop.fs.LocatedFileStatus; |
... | ... | |
141 | 144 |
|
142 | 145 |
private static int getOptimalBlockSize(TreeMap<BigInteger, Double> statistics) { |
143 | 146 |
int optimalBlockSize = Integer.valueOf(statistics.lastEntry().getKey().intValue()); |
144 |
double eps = 1d; |
|
145 |
NavigableSet<BigInteger> keys = statistics.descendingKeySet(); |
|
146 |
BigInteger lastKey = keys.last(); |
|
147 |
for (BigInteger key : keys) { |
|
148 |
if (lastKey.equals(key)) break; |
|
149 |
double diff = Math.abs(statistics.get(key) - statistics.get(key.subtract(BigInteger.ONE))); |
|
150 |
if (diff < eps) { |
|
151 |
eps = diff; |
|
152 |
optimalBlockSize = Integer.valueOf(key.intValue()); |
|
153 |
} |
|
147 |
double eps = 1d; |
|
148 |
NavigableMap<BigInteger, Double> statisticsRev = statistics.descendingMap(); |
|
149 |
|
|
150 |
Iterator<Map.Entry<BigInteger,Double>> statisticsRevIt = statisticsRev.entrySet().iterator(); |
|
151 |
|
|
152 |
if (statisticsRevIt.hasNext()){ |
|
153 |
Map.Entry<BigInteger,Double> nextEntry = statisticsRevIt.next(); |
|
154 |
while (statisticsRevIt.hasNext()){ |
|
155 |
Map.Entry<BigInteger,Double> entry = nextEntry; |
|
156 |
double diff = Math.abs(entry.getValue() - nextEntry.getValue()); |
|
157 |
if (diff < eps) { |
|
158 |
eps = diff; |
|
159 |
optimalBlockSize = Integer.valueOf(entry.getKey().intValue()); |
|
160 |
} |
|
161 |
} |
|
154 | 162 |
} |
155 |
|
|
156 | 163 |
return optimalBlockSize; |
157 | 164 |
} |
158 | 165 |
|
Also available in: Unified diff
errors and performance at optimal size fixed