Project

General

Profile

« Previous | Next » 

Revision 51970

commit after refactoring

View differences:

modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/resourcesynck/Connector.java
1
package eu.dnetlib.data.collector.plugins.resourcesynck;
2

  
3
import eu.dnetlib.data.collector.plugins.HttpConnector;
4
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
5

  
6

  
7
/**
8
 * Created by miriam on 07/05/2018.
9
 */
10
public class Connector extends HttpConnector implements ConnectorInterface  {
11
    private String response;
12

  
13
    @Override
14
    public void get(final String requestUrl) throws CollectorServiceException {
15
        response = getInputSource(requestUrl);
16
    }
17

  
18
    @Override
19
    public String getResponse() {
20
        return response;
21
    }
22

  
23
    @Override
24
    public boolean isStatusOk() {
25
        return (response != null);
26
    }
27

  
28
    @Override
29
    public boolean responseTypeContains(String string) {
30
        String responseType = getResponseType();
31
        if (responseType != null)
32
            return responseType.contains(string);
33
        return false;
34
    }
35

  
36

  
37
}
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/resourcesynck/RSCollectorPlugin.java
1
package eu.dnetlib.data.collector.plugins.resourcesynck;
2

  
3
import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
4
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
5
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
6

  
7
/**
8
 * Created by miriam on 04/05/2018.
9
 */
10
public class RSCollectorPlugin extends AbstractCollectorPlugin {
11

  
12
    @Override
13
    public Iterable<String> collect(InterfaceDescriptor interfaceDescriptor, String s, String s1) throws CollectorServiceException {
14
        return new RSCollectorIterable(interfaceDescriptor.getBaseUrl());
15
    }
16
}
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/resourcesynck/ConnectorInterface.java
1
package eu.dnetlib.data.collector.plugins.resourcesynck;
2

  
3
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
4

  
5
/**
6
 * Created by miriam on 07/05/2018.
7
 */
8
public interface ConnectorInterface {
9

  
10
    public void get(final String requestUrl) throws CollectorServiceException;
11

  
12
    public String getResponse();
13

  
14
    public boolean isStatusOk();
15

  
16

  
17
    public boolean responseTypeContains(String string);
18

  
19
}
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/resourcesynck/RSCollectorIterable.java
1
package eu.dnetlib.data.collector.plugins.resourcesynck;
2

  
3
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
4
import org.apache.commons.logging.Log;
5
import org.apache.commons.logging.LogFactory;
6
import org.json.JSONObject;
7
import org.json.XML;
8
import org.jsoup.Jsoup;
9
import org.jsoup.nodes.Document;
10
import org.jsoup.nodes.Element;
11
import org.jsoup.select.Elements;
12

  
13
import java.util.ArrayList;
14
import java.util.Iterator;
15

  
16
import java.util.concurrent.ArrayBlockingQueue;
17
import java.util.function.Consumer;
18

  
19
/**
20
 * Created by miriam on 04/05/2018.
21
 */
22
public class RSCollectorIterable implements Iterable<String> {
23
    private static final Log log = LogFactory.getLog(RSCollectorIterable.class);
24
    private final ArrayBlockingQueue<String> queue = new ArrayBlockingQueue<String>(100);
25
    private final ArrayList<String> urls = new ArrayList<>();
26
    private final ArrayList<String> jsons = new ArrayList<String>();
27

  
28

  
29
    public RSCollectorIterable(String startUrl){
30

  
31
        urls.add(startUrl);
32
        fillQueue();
33
    }
34

  
35
    private String addFilePath(String json,String url){
36
        String path = url.replace("metadata", "pdf");
37
        try {
38
            json = json.substring(0, json.length() - 1) + ",'downloadFileUrl':'" + path.substring(0, path.indexOf(".json")) + ".pdf'}";
39
        }catch(Exception ex){
40
            log.info("not file with extension .json");
41
        }
42

  
43
        JSONObject jsonobj = new JSONObject("{'metadata':"+json+"}");
44

  
45
        return XML.toString(jsonobj);
46
    }
47

  
48
    private void recurFolder(String text, String url){
49
        Document doc = Jsoup.parse(text);
50
        Elements links = doc.select("a");
51
        for(Element e:links){
52
            if (!e.text().equals("../")){
53
                String file = e.attr("href");
54
                if(file.endsWith(".json"))
55
                    jsons.add(url+file);
56
                else
57
                    urls.add(url+file);
58
            }
59
        }
60
    }
61

  
62
    private void fillQueue() {
63
        Connector c = new Connector();
64
        String url;
65
        while((jsons.size()>0 || urls.size() > 0 ) && queue.size()<100){
66
            if (jsons.size() > 0){
67
                url = jsons.remove(0);
68
                try {
69
                    c.get(url);
70
                } catch (CollectorServiceException e) {
71
                    log.error("Impossible to collect url: " + url + " error: " + e.getMessage());
72
                }
73
                if(c.isStatusOk()){
74
                    try {
75
                        String ret = c.getResponse();
76
                        if (ret != null && ret.length()>0)
77
                            queue.put(addFilePath(ret,url));
78
                    } catch (InterruptedException e) {
79
                        log.error("not inserted in queue element associate to url " + url + " error: " + e.getMessage() );
80

  
81
                    }
82
                }
83
            }else{
84
                url = urls.remove(0);
85
                try {
86
                    c.get(url);
87
                } catch (CollectorServiceException e) {
88
                    log.error("Impossible to collect url: " + url + " error: " + e.getMessage());
89
                }
90
                if(c.isStatusOk()){
91
                    if (c.responseTypeContains("text/html")){
92
                        recurFolder(c.getResponse(),url);
93
                    }
94
                    else if(c.responseTypeContains("application/json")){
95
                        try {
96
                            queue.put(addFilePath(c.getResponse(),url));
97
                        } catch (InterruptedException e) {
98
                            log.error("not inserted in queue element associate to url " + url + " error: " + e.getMessage() );
99
                        }
100
                    }
101
                }
102

  
103
            }
104

  
105
        }
106

  
107
    }
108

  
109
    @Override
110
    public Iterator<String> iterator() {
111

  
112
        return new Iterator<String>(){
113

  
114
            @Override
115
            public boolean hasNext() {
116
                if (queue.isEmpty()){
117
                    fillQueue();
118
                }
119
                return (!queue.isEmpty());
120
            }
121

  
122
            @Override
123
            public String next() {
124
                return queue.poll(); 
125
            }
126

  
127
            @Override
128
            public void remove() {
129

  
130
            }
131

  
132
            @Override
133
            public void forEachRemaining(Consumer<? super String> action) {
134

  
135
            }
136
        };
137
    }
138

  
139

  
140
}
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/excel/Read.java
13 13

  
14 14
import eu.dnetlib.data.collector.plugins.HttpCSVCollectorPlugin;
15 15
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
16
import org.apache.commons.lang3.StringUtils;
16 17
import org.apache.commons.logging.Log;
17 18
import org.apache.commons.logging.LogFactory;
18 19
import org.apache.poi.ss.usermodel.Cell;
......
114 115
	}
115 116

  
116 117
	private void fillMap(JSONObject json, HashMap<String,String> map, String elem){
117
		JSONArray arr = json.getJSONObject("replace").getJSONArray(elem);
118
		for(Object entry: arr) {
119
			try {
118
		try{
119
			final JSONArray arr = json.getJSONObject("replace").getJSONArray(elem);
120
			for(Object entry: arr)
120 121
				map.put(((JSONObject)entry).getString("from"), ((JSONObject)entry).getString("to"));
121
			}catch(Exception ex){
122
				ex.printStackTrace();
123
			}
122
		}catch(Throwable e){
123
			log.error("Problems filling the map for " + elem);
124
			throw(e);
124 125
		}
125 126

  
126 127
	}
127 128

  
129

  
130

  
128 131
	private void parseArguments() {
129
		JSONObject json = new JSONObject(argument);
130
		fillMap(json, map_header,"header");
131
		fillMap(json,map_body,"body");
132
		if (StringUtils.isNotEmpty(argument)){
133
			try{
134
				final JSONObject json = new JSONObject(argument);
135
				if(json.has("header"))
136
					fillMap(json, map_header,"header");
137
				if (json.has("body"))
138
					fillMap(json,map_body,"body");
132 139

  
133
		if (!(json.getJSONArray("replace_currency")==null)){
134
			replace_currency = true	;
135
			from_currency = json.getJSONArray("replace_currency").getJSONObject(0).getString("from");
136
			to_currency = json.getJSONArray("replace_currency").getJSONObject(0).getString("to");
140
				if(json.has("replace_currency"))
141
				{
142
					replace_currency = true	;
143
					from_currency = json.getJSONArray("replace_currency").getJSONObject(0).getString("from");
144
					to_currency = json.getJSONArray("replace_currency").getJSONObject(0).getString("to");
145

  
146
				}
147

  
148
				if (json.has("col_currency"))
149
					currency_column = json.getInt("col_currency");
150
			}catch(Throwable e){
151
				log.error("Problems while parsing the argument parameter.");
152
				throw (e);
153
			}
137 154
		}
138 155

  
139
		currency_column = json.getInt("col_currency");
140 156

  
157

  
141 158
	}
142 159

  
143 160
	private String applyReplace(String row, HashMap<String,String>replace){
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HTTPWithFileName/HTTPWithFileNameCollectorPlugin.java
1
package eu.dnetlib.data.collector.plugins.HTTPWithFileName;
2

  
3
import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
4
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
5
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
6

  
7
/**
8
 * Created by miriam on 04/05/2018.
9
 */
10
public class HTTPWithFileNameCollectorPlugin extends AbstractCollectorPlugin {
11

  
12
    @Override
13
    public Iterable<String> collect(InterfaceDescriptor interfaceDescriptor, String s, String s1) throws CollectorServiceException {
14
        return new HTTPWithFileNameCollectorIterable(interfaceDescriptor.getBaseUrl());
15
    }
16
}
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HTTPWithFileName/Connector.java
1
package eu.dnetlib.data.collector.plugins.HTTPWithFileName;
2

  
3
import eu.dnetlib.data.collector.plugins.HttpConnector;
4
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
5

  
6

  
7
/**
8
 * Created by miriam on 07/05/2018.
9
 */
10
public class Connector extends HttpConnector implements ConnectorInterface  {
11
    private String response;
12

  
13
    @Override
14
    public void get(final String requestUrl) throws CollectorServiceException {
15
        response = getInputSource(requestUrl);
16
    }
17

  
18
    @Override
19
    public String getResponse() {
20
        return response;
21
    }
22

  
23
    @Override
24
    public boolean isStatusOk() {
25
        return (response != null);
26
    }
27

  
28
    @Override
29
    public boolean responseTypeContains(String string) {
30
        String responseType = getResponseType();
31
        if (responseType != null)
32
            return responseType.contains(string);
33
        return false;
34
    }
35

  
36

  
37
}
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HTTPWithFileName/ConnectorInterface.java
1
package eu.dnetlib.data.collector.plugins.HTTPWithFileName;
2

  
3
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
4

  
5
/**
6
 * Created by miriam on 07/05/2018.
7
 */
8
public interface ConnectorInterface {
9

  
10
    public void get(final String requestUrl) throws CollectorServiceException;
11

  
12
    public String getResponse();
13

  
14
    public boolean isStatusOk();
15

  
16

  
17
    public boolean responseTypeContains(String string);
18

  
19
}
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HTTPWithFileName/HTTPWithFileNameCollectorIterable.java
1
package eu.dnetlib.data.collector.plugins.HTTPWithFileName;
2

  
3
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
4
import org.apache.commons.logging.Log;
5
import org.apache.commons.logging.LogFactory;
6
import org.json.JSONObject;
7
import org.json.XML;
8
import org.jsoup.Jsoup;
9
import org.jsoup.nodes.Document;
10
import org.jsoup.nodes.Element;
11
import org.jsoup.select.Elements;
12

  
13
import java.util.ArrayList;
14
import java.util.Iterator;
15

  
16
import java.util.concurrent.ArrayBlockingQueue;
17
import java.util.function.Consumer;
18

  
19
/**
20
 * Created by miriam on 04/05/2018.
21
 */
22
public class HTTPWithFileNameCollectorIterable implements Iterable<String> {
23
    private static final Log log = LogFactory.getLog(HTTPWithFileNameCollectorIterable.class);
24

  
25
    private final ArrayList<String> urls = new ArrayList<>();
26
    private final ArrayList<String> jsons = new ArrayList<String>();
27

  
28

  
29
    public HTTPWithFileNameCollectorIterable(String startUrl){
30

  
31
        urls.add(startUrl);
32
    }
33

  
34
    private String addFilePath(String json,String url){
35
        String path = url.replace("metadata", "pdf");
36
        try {
37
            json = json.substring(0, json.length() - 1) + ",'downloadFileUrl':'" + path.substring(0, path.indexOf(".json")) + ".pdf'}";
38
        }catch(Exception ex){
39
            log.info("not file with extension .json");
40
        }
41

  
42
        JSONObject jsonobj = new JSONObject("{'metadata':"+json+"}");
43

  
44
        return XML.toString(jsonobj);
45
    }
46

  
47
    private void recurFolder(String text, String url){
48
        Document doc = Jsoup.parse(text);
49
        Elements links = doc.select("a");
50
        for(Element e:links){
51
            if (!e.text().equals("../")){
52
                String file = e.attr("href");
53
                if(file.endsWith(".json"))
54
                    jsons.add(url+file);
55
                else
56
                    urls.add(url+file);
57
            }
58
        }
59
    }
60

  
61

  
62
    @Override
63
    public Iterator<String> iterator() {
64
        final ArrayBlockingQueue<String> queue = new ArrayBlockingQueue<String>(100);
65

  
66

  
67
        return new Iterator<String>(){
68

  
69
            public void fillQueue() {
70
                Connector c = new Connector();
71
                String url;
72
                while((jsons.size()>0 || urls.size() > 0 ) && queue.size()<100){
73
                    if (jsons.size() > 0){
74
                        url = jsons.remove(0);
75
                        try {
76
                            c.get(url);
77
                        } catch (CollectorServiceException e) {
78
                            log.error("Impossible to collect url: " + url + " error: " + e.getMessage());
79
                        }
80
                        if(c.isStatusOk()){
81
                            try {
82
                                String ret = c.getResponse();
83
                                if (ret != null && ret.length()>0)
84
                                    queue.put(addFilePath(ret,url));
85
                            } catch (InterruptedException e) {
86
                                log.error("not inserted in queue element associate to url " + url + " error: " + e.getMessage() );
87

  
88
                            }
89
                        }
90
                    }else{
91
                        url = urls.remove(0);
92
                        try {
93
                            c.get(url);
94
                        } catch (CollectorServiceException e) {
95
                            log.error("Impossible to collect url: " + url + " error: " + e.getMessage());
96
                        }
97
                        if(c.isStatusOk()){
98
                            if (c.responseTypeContains("text/html")){
99
                                recurFolder(c.getResponse(),url);
100
                            }
101
                            else if(c.responseTypeContains("application/json")){
102
                                try {
103
                                    queue.put(addFilePath(c.getResponse(),url));
104
                                } catch (InterruptedException e) {
105
                                    log.error("not inserted in queue element associate to url " + url + " error: " + e.getMessage() );
106
                                }
107
                            }
108
                        }
109

  
110
                    }
111

  
112
                }
113

  
114
            }
115
            @Override
116
            public boolean hasNext() {
117
                if (queue.isEmpty()){
118
                    fillQueue();
119
                }
120
                return (!queue.isEmpty());
121
            }
122

  
123
            @Override
124
            public String next() {
125
                return queue.poll(); 
126
            }
127

  
128
            @Override
129
            public void remove() {
130

  
131
            }
132

  
133
            @Override
134
            public void forEachRemaining(Consumer<? super String> action) {
135

  
136
            }
137
        };
138
    }
139

  
140

  
141
}

Also available in: Unified diff