Project

General

Profile

« Previous | Next » 

Revision 52026

considered the case metadata are given in xml format instead of json

View differences:

modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HTTPWithFileName/HTTPWithFileNameCollectorIterable.java
23 23
    private static final Log log = LogFactory.getLog(HTTPWithFileNameCollectorIterable.class);
24 24

  
25 25
    private final ArrayList<String> urls = new ArrayList<>();
26
    private final ArrayList<String> jsons = new ArrayList<String>();
26
    private final ArrayList<String> metas = new ArrayList<String>();
27 27

  
28 28

  
29 29
    public HTTPWithFileNameCollectorIterable(String startUrl){
......
31 31
        urls.add(startUrl);
32 32
    }
33 33

  
34
    private String addFilePath(String json,String url){
34
    private String addFilePath(String meta,String url, boolean isJson){
35 35
        String path = url.replace("metadata", "pdf");
36 36
        try {
37
            json = json.substring(0, json.length() - 1) + ",'downloadFileUrl':'" + path.substring(0, path.indexOf(".json")) + ".pdf'}";
37
            if(isJson)
38
                meta = meta.substring(0, meta.length() - 1) + ",'downloadFileUrl':'" + path.substring(0, path.indexOf(".json")) + ".pdf'}";
39
            else
40
                meta = meta.substring(0, meta.length() - 10) + "<downloadFileUrl>'" + path.substring(0, path.indexOf(".xml")) + ".pdf</downloadFileUrl></article>";
38 41
        }catch(Exception ex){
39
            log.info("not file with extension .json");
42
            log.info("not file with extension .json or .xml");
40 43
        }
41 44

  
42
        JSONObject jsonobj = new JSONObject("{'metadata':"+json+"}");
43

  
44
        return XML.toString(jsonobj);
45
       //JSONObject jsonobj = new JSONObject("{'metadata':"+json+"}");
46
        if(isJson){
47
            JSONObject jsonobj = new JSONObject(meta);
48
            return XML.toString(jsonobj);
49
        }
50
        return meta;
45 51
    }
46 52

  
47 53
    private void recurFolder(String text, String url){
......
50 56
        for(Element e:links){
51 57
            if (!e.text().equals("../")){
52 58
                String file = e.attr("href");
53
                if(file.endsWith(".json"))
54
                    jsons.add(url+file);
59
                if(file.endsWith(".json") || file.endsWith(".xml"))
60
                    metas.add(url+file);
55 61
                else
56 62
                    urls.add(url+file);
57 63
            }
......
69 75
            public void fillQueue() {
70 76
                Connector c = new Connector();
71 77
                String url;
72
                while((jsons.size()>0 || urls.size() > 0 ) && queue.size()<100){
73
                    if (jsons.size() > 0){
74
                        url = jsons.remove(0);
78
                while((metas.size()>0 || urls.size() > 0 ) && queue.size()<100){
79
                    if (metas.size() > 0){
80
                        url = metas.remove(0);
75 81
                        try {
76 82
                            c.get(url);
77 83
                        } catch (CollectorServiceException e) {
......
81 87
                            try {
82 88
                                String ret = c.getResponse();
83 89
                                if (ret != null && ret.length()>0)
84
                                    queue.put(addFilePath(ret,url));
90
                                    queue.put(addFilePath(ret,url,url.endsWith(".json")));
85 91
                            } catch (InterruptedException e) {
86 92
                                log.error("not inserted in queue element associate to url " + url + " error: " + e.getMessage() );
87 93

  
......
98 104
                            if (c.responseTypeContains("text/html")){
99 105
                                recurFolder(c.getResponse(),url);
100 106
                            }
101
                            else if(c.responseTypeContains("application/json")){
107
                            else if(c.responseTypeContains("application/json") || c.responseTypeContains("application/xml")){
102 108
                                try {
103
                                    queue.put(addFilePath(c.getResponse(),url));
109
                                    queue.put(addFilePath(c.getResponse(),url, c.responseTypeContains("application/json")));
104 110
                                } catch (InterruptedException e) {
105 111
                                    log.error("not inserted in queue element associate to url " + url + " error: " + e.getMessage() );
106 112
                                }

Also available in: Unified diff