Project

General

Profile

« Previous | Next » 

Revision 43414

Added by Eri Katsari almost 8 years ago

fixed ordering

View differences:

modules/dnet-openaire-lodinterlinking/trunk/dnet-openaire-lod-interlinking.iml
16 16
    <orderEntry type="library" name="Maven: org.apache.cxf:cxf-rt-transports-http:2.7.8" level="project" />
17 17
    <orderEntry type="library" name="Maven: eu.dnetlib:dnet-openaire-data-protos:3.6.1-SNAPSHOT" level="project" />
18 18
    <orderEntry type="library" name="Maven: org.mongodb:mongo-java-driver:2.9.3" level="project" />
19
    <orderEntry type="library" name="Maven: eu.dnetlib:cnr-rmi-api:2.0.0" level="project" />
20
    <orderEntry type="library" name="Maven: org.eclipse.rdf4j:rdf4j-model:2.0M2" level="project" />
21
    <orderEntry type="library" name="Maven: org.eclipse.rdf4j:rdf4j-util:2.0M2" level="project" />
22
    <orderEntry type="library" name="Maven: org.eclipse.rdf4j:rdf4j-rio-api:2.0M2" level="project" />
23
    <orderEntry type="library" name="Maven: biz.paluch.redis:lettuce:3.5.0.Final" level="project" />
24
    <orderEntry type="library" name="Maven: io.reactivex:rxjava:1.1.6" level="project" />
25
    <orderEntry type="library" name="Maven: io.netty:netty-common:4.0.37.Final" level="project" />
26
    <orderEntry type="library" name="Maven: io.netty:netty-transport:4.0.37.Final" level="project" />
27
    <orderEntry type="library" name="Maven: io.netty:netty-buffer:4.0.37.Final" level="project" />
28
    <orderEntry type="library" name="Maven: io.netty:netty-handler:4.0.37.Final" level="project" />
29
    <orderEntry type="library" name="Maven: io.netty:netty-codec:4.0.37.Final" level="project" />
30
    <orderEntry type="library" name="Maven: com.google.guava:guava:17.0" level="project" />
31
    <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-common:2.6.0-cdh5.4.7" level="project" />
32
    <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-annotations:2.6.0-cdh5.4.7" level="project" />
33
    <orderEntry type="library" name="Maven: org.apache.commons:commons-math3:3.1.1" level="project" />
34
    <orderEntry type="library" name="Maven: commons-io:commons-io:2.4" level="project" />
35
    <orderEntry type="library" name="Maven: org.mortbay.jetty:jetty:6.1.26.cloudera.4" level="project" />
36
    <orderEntry type="library" name="Maven: org.mortbay.jetty:jetty-util:6.1.26.cloudera.4" level="project" />
37
    <orderEntry type="library" name="Maven: com.sun.jersey:jersey-core:1.9" level="project" />
38
    <orderEntry type="library" name="Maven: com.sun.jersey:jersey-json:1.9" level="project" />
39
    <orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-jaxrs:1.8.3" level="project" />
40
    <orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-xc:1.8.3" level="project" />
41
    <orderEntry type="library" name="Maven: com.sun.jersey:jersey-server:1.9" level="project" />
42
    <orderEntry type="library" name="Maven: net.java.dev.jets3t:jets3t:0.9.0" level="project" />
43
    <orderEntry type="library" name="Maven: com.jamesmurty.utils:java-xmlbuilder:0.4" level="project" />
44
    <orderEntry type="library" name="Maven: org.apache.avro:avro:1.7.6-cdh5.4.7" level="project" />
45
    <orderEntry type="library" name="Maven: org.xerial.snappy:snappy-java:1.0.5" level="project" />
46
    <orderEntry type="library" name="Maven: com.google.protobuf:protobuf-java:2.5.0" level="project" />
47
    <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-auth:2.6.0-cdh5.4.7" level="project" />
48
    <orderEntry type="library" name="Maven: org.apache.directory.server:apacheds-kerberos-codec:2.0.0-M15" level="project" />
49
    <orderEntry type="library" name="Maven: org.apache.directory.server:apacheds-i18n:2.0.0-M15" level="project" />
50
    <orderEntry type="library" name="Maven: org.apache.directory.api:api-asn1-api:1.0.0-M20" level="project" />
51
    <orderEntry type="library" name="Maven: org.apache.directory.api:api-util:1.0.0-M20" level="project" />
52
    <orderEntry type="library" name="Maven: org.apache.curator:curator-framework:2.7.1" level="project" />
53
    <orderEntry type="library" name="Maven: org.apache.curator:curator-client:2.7.1" level="project" />
54
    <orderEntry type="library" name="Maven: org.apache.curator:curator-recipes:2.7.1" level="project" />
55
    <orderEntry type="library" name="Maven: com.google.code.findbugs:jsr305:3.0.0" level="project" />
56
    <orderEntry type="library" name="Maven: org.htrace:htrace-core:3.0.4" level="project" />
57
    <orderEntry type="library" name="Maven: org.apache.zookeeper:zookeeper:3.4.5-cdh5.4.7" level="project" />
58
    <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-core:2.6.0-mr1-cdh5.4.7" level="project" />
19
    <orderEntry type="library" name="Maven: com.sun.xml.bind:jaxb-impl:2.2.3-1" level="project" />
20
    <orderEntry type="library" name="Maven: javax.xml.bind:jaxb-api:2.2.2" level="project" />
21
    <orderEntry type="library" name="Maven: javax.xml.stream:stax-api:1.0-2" level="project" />
22
    <orderEntry type="library" name="Maven: javax.activation:activation:1.1" level="project" />
23
    <orderEntry type="library" name="Maven: asm:asm:3.1" level="project" />
24
    <orderEntry type="library" name="Maven: commons-logging:commons-logging:1.1.3" level="project" />
25
    <orderEntry type="library" name="Maven: org.apache.httpcomponents:httpclient:4.1.2" level="project" />
26
    <orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore:4.1.2" level="project" />
59 27
    <orderEntry type="library" name="Maven: javax.xml:jaxp-api:1.4.2" level="project" />
60 28
    <orderEntry type="library" name="Maven: org.apache.poi:poi:3.8" level="project" />
61 29
    <orderEntry type="library" name="Maven: commons-codec:commons-codec:1.5" level="project" />
......
70 38
    <orderEntry type="library" name="Maven: org.eclipse.rdf4j:rdf4j-rio-api:2.0M2" level="project" />
71 39
    <orderEntry type="library" name="Maven: org.eclipse.rdf4j:rdf4j-model:2.0M2" level="project" />
72 40
    <orderEntry type="library" name="Maven: org.eclipse.rdf4j:rdf4j-util:2.0M2" level="project" />
73
    <orderEntry type="library" name="Maven: biz.paluch.redis:lettuce:3.5.0.Final" level="project" />
74
    <orderEntry type="library" name="Maven: io.reactivex:rxjava:1.1.6" level="project" />
75
    <orderEntry type="library" name="Maven: io.netty:netty-common:4.0.37.Final" level="project" />
76
    <orderEntry type="library" name="Maven: io.netty:netty-transport:4.0.37.Final" level="project" />
77
    <orderEntry type="library" name="Maven: io.netty:netty-buffer:4.0.37.Final" level="project" />
78
    <orderEntry type="library" name="Maven: io.netty:netty-handler:4.0.37.Final" level="project" />
79
    <orderEntry type="library" name="Maven: io.netty:netty-codec:4.0.37.Final" level="project" />
80
    <orderEntry type="library" name="Maven: com.google.guava:guava:17.0" level="project" />
81 41
    <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-common:2.6.0-cdh5.4.7" level="project" />
82 42
    <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-annotations:2.6.0-cdh5.4.7" level="project" />
43
    <orderEntry type="library" name="Maven: com.google.guava:guava:11.0.2" level="project" />
83 44
    <orderEntry type="library" name="Maven: commons-cli:commons-cli:1.2" level="project" />
84 45
    <orderEntry type="library" name="Maven: org.apache.commons:commons-math3:3.1.1" level="project" />
85 46
    <orderEntry type="library" name="Maven: xmlenc:xmlenc:0.52" level="project" />
......
139 100
    <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-core:2.6.0-mr1-cdh5.4.7" level="project" />
140 101
    <orderEntry type="library" name="Maven: hsqldb:hsqldb:1.8.0.10" level="project" />
141 102
    <orderEntry type="library" name="Maven: org.eclipse.jdt:core:3.1.1" level="project" />
103
    <orderEntry type="library" name="Maven: biz.paluch.redis:lettuce:shaded:3.5.0.Final" level="project" />
142 104
  </component>
143 105
</module>
modules/dnet-openaire-lodinterlinking/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/utils/FrequencyCounter.java
3 3
import com.google.common.base.Functions;
4 4
import com.google.common.collect.ImmutableSortedMap;
5 5
import com.google.common.collect.Ordering;
6

  
7

  
8 6
import org.apache.hadoop.conf.Configuration;
9 7
import org.apache.hadoop.fs.FileSystem;
10 8
import org.apache.hadoop.fs.LocatedFileStatus;
......
15 13
import java.io.*;
16 14
import java.math.BigInteger;
17 15
import java.util.*;
16
import java.io.BufferedReader;
17
import java.io.BufferedWriter;
18
import java.io.IOException;
19
import java.io.InputStreamReader;
20
import java.io.OutputStreamWriter;
21
import java.util.ArrayList;
22
import java.util.HashMap;
23
import java.util.List;
24
import java.util.Map;
18 25

  
19 26

  
20 27
public class FrequencyCounter {
21 28

  
29
    private enum ORDERING_TYPE {
30
        ASC, DESC
31
    }
32

  
22 33
    public static void main(String[] args) throws Exception {
23 34
        String inputPath = "/tmp/lod_blocks/stats";
24 35
        String outputPath = "/tmp/lod_blocks/stats/frequencyMap";
......
29 40
            FileSystem fs = FileSystem.get(new Configuration());
30 41
            Path path = new Path(inputPath);
31 42
            List filenames = getFiles(fs, path);
32
            Map<String,Integer> frequencyMap = getFrequencyMap(fs, filenames);
33
            Map<String,Integer> sortedMap = sortMapByValue(frequencyMap);
43
            Map frequencyMap = getFrequencyMap(fs, filenames);
44
            Map sortedMap = sortMapByValue(frequencyMap, ORDERING_TYPE.DESC);
34 45
            writeMap(fs, sortedMap, outputPath);
35 46
            fs.close();
36 47
        } catch (Exception e) {
......
54 65
        return fileNames;
55 66
    }
56 67

  
57
    
58
    private static Map getStatistics(Map<String,Integer> sortedMap){
59
    	Map<Integer,Integer> statistics= new HashMap<Integer,Integer>();
60
    	double CC = 0d;
61
		CC = 0d;
62
		int lastBlockSize = 2;
63
		int f;
64
		BigInteger totalSizeOfBlocks = BigInteger.ZERO;
65
		BigInteger numberOfComparisons = BigInteger.ZERO;
66
		
67
		Set<String> keys = sortedMap.keySet();
68
		for(String key : keys){
69
			BigInteger blockSize = new BigInteger(key);
70
			f = sortedMap.get(key);
71
			totalSizeOfBlocks = totalSizeOfBlocks.add(blockSize);
72
			numberOfComparisons = numberOfComparisons.add(blockSize.multiply(blockSize.subtract(BigInteger.ONE)).shiftLeft(1));
73
			CC = totalSizeOfBlocks.doubleValue()/numberOfComparisons.doubleValue();
74
		}
75
    	
76
    	
77
    	return  statistics;
78
    }
79
    
80
    private static Map<String,Integer> getFrequencyMap(FileSystem fs, List<String> fileNames) throws Exception {
68
    private static Map getFrequencyMap(FileSystem fs, List<String> fileNames) throws Exception {
81 69
        String line = null;
82 70
        ;
83 71
        try {
......
95 83

  
96 84
                    if (frequencyMap.containsKey(split[1])) {
97 85
                        frequencyMap.put(split[1], frequencyMap.get(split[1]) + 1);
86

  
98 87
                    } else {
99 88
                        frequencyMap.put(split[1], 1);
100 89

  
......
112 101
    }
113 102

  
114 103

  
115
    public static <K, V extends Comparable<? super V>> Map<K, V>
116
    sortMapByValue(Map<K, V> map) {
117
        List<Map.Entry<K, V>> list =
118
                new LinkedList<Map.Entry<K, V>>(map.entrySet());
119
        Collections.sort(list, new Comparator<Map.Entry<K, V>>() {
120
            public int compare(Map.Entry<K, V> o1, Map.Entry<K, V> o2) {
121
                return (o1.getValue()).compareTo(o2.getValue());
122
            }
123
        });
104
    public static Map sortMapByValue(Map map, ORDERING_TYPE ordering) {
105
        Ordering valueComparator;
106
        if (ordering == ORDERING_TYPE.DESC) {
107
            valueComparator = Ordering.natural().onResultOf(Functions.forMap(map)).compound(Ordering.natural()).reverse();
108
        } else {
109
            valueComparator = Ordering.natural().onResultOf(Functions.forMap(map)).compound(Ordering.natural());
110
        }
111
        return ImmutableSortedMap.copyOf(map, valueComparator);
124 112

  
125
        Map<K, V> result = new LinkedHashMap<K, V>();
126
        for (Map.Entry<K, V> entry : list) {
127
            result.put(entry.getKey(), entry.getValue());
128
        }
129
        System.out.println("sorted" + result.entrySet());
130
        return result;
113

  
131 114
    }
132 115

  
133 116
    private static void writeMap(FileSystem fs, Map<String, Integer> map, String output) throws IOException {
......
136 119
        // TO append data to a file, use fs.append(Path f)
137 120

  
138 121
        for (Map.Entry<String, Integer> entry : map.entrySet()) {
139
            br.write(entry.getKey() + ":" + entry.getValue() + ",");
122
            br.append(entry.getKey() + ":" + entry.getValue() + ",");
140 123
        }
141 124

  
142 125
        br.close();

Also available in: Unified diff