Project

General

Profile

« Previous | Next » 

Revision 52237

code cleaning

View differences:

modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HTTPWithFileName/HTTPWithFileNameCollectorIterable.java
40 40
        ft.start();
41 41
    }
42 42

  
43
  /*  private boolean containsFilter(String meta){
44
        if (filter == null || filter.isEmpty())
45
            return false;
46
        String[] filter = this.filter.split(";");
47
        for(String item:filter){
48
            if (meta.contains(item))
49
                return true;
50
        }
51
        return false;
52
    }
53 43

  
54
    private String addFilePath(String meta,String url, boolean isJson){
55
        String path = url.replace("metadata", "pdf");
56

  
57
        try {
58
            if(isJson)
59
                meta = meta.substring(0, meta.length() - 1) + ",'downloadFileUrl':'" + path.substring(0, path.indexOf(".json")) + ".pdf'}";
60
            else{
61

  
62
                    if (meta.contains("<!DOCTYPE")) {
63
                        meta = meta.substring(meta.indexOf("<!DOCTYPE"));
64
                        meta = meta.substring(meta.indexOf(">") + 1);
65
                    }
66
                    int index = meta.lastIndexOf("</");
67
                    meta = meta.substring(0, index) + "<downloadFileUrl>" + path.substring(0, path.indexOf(".xml")) + ".pdf</downloadFileUrl>" + meta.substring(index);
68

  
69

  
70
            }
71

  
72
        }catch(Exception ex){
73
            log.info("not file with extension .json or .xml");
74
        }
75

  
76

  
77
        if(isJson){
78
            JSONObject jsonobj = null;
79
            try {
80
                jsonobj = new JSONObject("{'resource':" + meta + "}");
81

  
82
                return XML.toString(jsonobj);
83
            }catch(Exception e){
84
                log.fatal("Impossible to transform json object to xml \n" + jsonobj + "\n " + e.getMessage() + "\n" + url);
85
                throw new RuntimeException();
86
            }
87
        }
88
        return meta;
89
    }
90

  
91
    private void recurFolder(String text, String url){
92
        Document doc = Jsoup.parse(text);
93
        Elements links = doc.select("a");
94
        for(Element e:links){
95
            if (!e.text().equals("../")){
96
                String file = e.attr("href");
97
                if(file.endsWith(".json") || file.endsWith(".xml"))
98
                    metas.add(url+file);
99
                else
100
                    urls.add(url+file);
101
            }
102
        }
103
    }*/
104

  
105

  
106 44
    @Override
107 45
    public Iterator<String> iterator() {
108 46

  
109 47

  
110 48

  
111 49
        return new Iterator<String>(){
112
    /*        int total = 0;
113
            int filtered = 0;
114
            public void fillQueue() {
115
                Connector c = new Connector();
116
                String url;
117
                while((metas.size()>0 || urls.size() > 0 ) && queue.size()<100){
118
                    log.debug("metas.size() = " + metas.size() + " urls.size() = " + urls.size() + " queue.size() = " +queue.size());
119
                    if (metas.size() > 0){
120
                        url = metas.remove(0);
121
                        try {
122
                            c.get(url);
123
                        } catch (CollectorServiceException e) {
124
                            log.info("Impossible to collect url: " + url + " error: " + e.getMessage());
125
                        }
126
                        if(c.isStatusOk()){
127
                            try {
128
                                String ret = c.getResponse();
129
                                if (ret != null && ret.length()>0) {
130
                                    if (!containsFilter(ret))
131
                                        queue.put(addFilePath(ret, url, url.endsWith(".json")));
132
                                    else
133
                                        filtered++;
134
                                    total++;
135
                                }
136
                            } catch (InterruptedException e) {
137
                                log.info("not inserted in queue element associate to url " + url + " error: " + e.getMessage() );
138 50

  
139

  
140
                            }
141
                        }
142
                    }else{
143
                        url = urls.remove(0);
144
                        try {
145
                            c.get(url);
146
                        } catch (CollectorServiceException e) {
147
                            log.info("Impossible to collect url: " + url + " error: " + e.getMessage());
148
                        }
149
                        if(c.isStatusOk()){
150
                            if (c.responseTypeContains("text/html")){
151
                                recurFolder(c.getResponse(),url);
152
                            }
153
                            else if(c.responseTypeContains("application/json") || c.responseTypeContains("application/xml")){
154
                                try {
155
                                    queue.put(addFilePath(c.getResponse(),url, c.responseTypeContains("application/json")));
156
                                } catch (InterruptedException e) {
157
                                    log.info("not inserted in queue element associate to url " + url + " error: " + e.getMessage() );
158
                                }
159
                            }
160
                        }
161

  
162
                    }
163

  
164
                }
165

  
166
            }*/
167 51
            @Override
168 52
            public boolean hasNext() {
169 53
                while (queue.isEmpty());
170 54
                if (queue.peek().equals(TERMINATOR))
171 55
                    log.info(String.format("Total number of metadata %d, Number of metadata filtered %d", total, filtered));
172 56
                return !queue.peek().equals(TERMINATOR);
173
                /*if (queue.isEmpty()){
174
                    fillQueue();
175
                }*/
176
               // if(queue.isEmpty()){
177
                    //log.info(String.format("Total number of metadata %d, Number of metadata filtered %d", total, filtered));
178
               //     return false;
179
                //}
180 57

  
181
                //return true;
182 58
            }
183 59

  
184 60
            @Override
......
294 170
                    return XML.toString(jsonobj);
295 171
                }catch(Exception e){
296 172
                    log.fatal("Impossible to transform json object to xml \n" + jsonobj + "\n " + e.getMessage() + "\n" + url);
297
                    throw new RuntimeException();
173
                   // throw new RuntimeException();
298 174
                }
299 175
            }
300 176
            return meta;

Also available in: Unified diff