Revision 55262
Added by Alessia Bardi about 5 years ago
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/datacite/DataciteESIterator.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.data.collector.plugins.datacite; |
2 | 2 |
|
3 | 3 |
|
4 |
import com.google.gson.Gson; |
|
5 |
import com.google.gson.GsonBuilder; |
|
6 |
|
|
7 |
import eu.dnetlib.data.collector.plugins.datacite.schema.DataciteSchema; |
|
8 |
import eu.dnetlib.data.collector.plugins.datacite.schema.Result; |
|
9 |
import org.apache.commons.codec.binary.Base64; |
|
10 |
import org.apache.commons.io.IOUtils; |
|
11 |
import org.apache.commons.lang3.StringUtils; |
|
12 |
|
|
13 | 4 |
import java.io.ByteArrayOutputStream; |
14 | 5 |
import java.io.IOException; |
15 | 6 |
import java.net.URL; |
... | ... | |
20 | 11 |
import java.util.zip.DataFormatException; |
21 | 12 |
import java.util.zip.Inflater; |
22 | 13 |
|
14 |
import com.google.gson.Gson; |
|
15 |
import com.google.gson.GsonBuilder; |
|
16 |
import eu.dnetlib.data.collector.plugins.datacite.schema.DataciteSchema; |
|
17 |
import eu.dnetlib.data.collector.plugins.datacite.schema.Result; |
|
18 |
import org.apache.commons.codec.binary.Base64; |
|
19 |
import org.apache.commons.io.IOUtils; |
|
20 |
import org.apache.commons.lang3.StringUtils; |
|
21 |
|
|
23 | 22 |
public class DataciteESIterator implements Iterator<String> { |
24 | 23 |
|
25 | 24 |
|
... | ... | |
31 | 30 |
|
32 | 31 |
private final Gson g = new GsonBuilder().create(); |
33 | 32 |
|
34 |
private static final String BASE_URL= "http://ip-90-147-167-25.ct1.garrservices.it:5000/new_scan";
|
|
33 |
private String baseURL = "http://ip-90-147-167-25.ct1.garrservices.it:5000";
|
|
35 | 34 |
|
36 |
private static final String NEXT_URL= "http://ip-90-147-167-25.ct1.garrservices.it:5000/scan/%s"; |
|
35 |
private static final String START_PATH = "new_scan"; |
|
36 |
private static final String NEXT_PATH = "scan/%s"; |
|
37 | 37 |
|
38 | 38 |
|
39 |
public DataciteESIterator(long timestamp) throws Exception { |
|
39 |
public DataciteESIterator(long timestamp, String baseUrl) throws Exception {
|
|
40 | 40 |
this.timestamp = timestamp; |
41 |
this.baseURL = baseUrl; |
|
41 | 42 |
currentPage = new ArrayDeque<>(); |
42 | 43 |
startRequest(); |
43 | 44 |
} |
... | ... | |
77 | 78 |
} |
78 | 79 |
|
79 | 80 |
private void startRequest() throws Exception { |
80 |
final URL startUrl = new URL(timestamp >0 ?BASE_URL + "?timestamp="+timestamp: BASE_URL); |
|
81 |
String url = baseURL+"/"+START_PATH; |
|
82 |
final URL startUrl = new URL(timestamp >0 ? url + "?timestamp="+timestamp : url); |
|
81 | 83 |
fillQueue(IOUtils.toString(startUrl.openStream())); |
82 | 84 |
} |
83 | 85 |
|
84 | 86 |
private void getNextPage() throws IOException { |
85 |
final URL startUrl = new URL(String.format(NEXT_URL,scrollId)); |
|
87 |
String url = baseURL+"/"+NEXT_PATH; |
|
88 |
final URL startUrl = new URL(String.format(url,scrollId)); |
|
86 | 89 |
fillQueue(IOUtils.toString(startUrl.openStream())); |
87 | 90 |
} |
88 | 91 |
|
... | ... | |
111 | 114 |
|
112 | 115 |
return nextItem; |
113 | 116 |
} |
117 |
|
|
118 |
public String getBaseURL() { |
|
119 |
return baseURL; |
|
120 |
} |
|
121 |
|
|
122 |
public void setBaseURL(final String baseURL) { |
|
123 |
this.baseURL = baseURL; |
|
124 |
} |
|
114 | 125 |
} |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/datacite/DataciteCollectorPlugin.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.data.collector.plugins.datacite; |
2 | 2 |
|
3 |
import java.util.Date; |
|
4 |
|
|
3 | 5 |
import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin; |
4 | 6 |
import eu.dnetlib.data.collector.plugin.CollectorPlugin; |
5 | 7 |
import eu.dnetlib.data.collector.rmi.CollectorServiceException; |
6 | 8 |
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor; |
7 | 9 |
import eu.dnetlib.miscutils.datetime.DateUtils; |
8 |
|
|
9 | 10 |
import org.apache.commons.lang3.StringUtils; |
11 |
import org.apache.commons.logging.Log; |
|
12 |
import org.apache.commons.logging.LogFactory; |
|
10 | 13 |
|
11 |
import java.util.Date; |
|
12 |
|
|
13 | 14 |
public class DataciteCollectorPlugin extends AbstractCollectorPlugin implements CollectorPlugin { |
14 | 15 |
|
16 |
private static final Log log = LogFactory.getLog(DataciteCollectorPlugin.class); |
|
15 | 17 |
|
16 | 18 |
@Override |
17 | 19 |
public Iterable<String> collect(InterfaceDescriptor interfaceDescriptor, String fromDate, String untilDate) throws CollectorServiceException { |
18 | 20 |
|
21 |
String baseurl = interfaceDescriptor.getBaseUrl(); |
|
22 |
if(StringUtils.isBlank(baseurl)) throw new CollectorServiceException("baseUrl cannot be empty"); |
|
19 | 23 |
long timestamp= 0; |
20 | 24 |
if (StringUtils.isNotBlank(fromDate)) { |
21 | 25 |
Date parsed = new DateUtils().parse(fromDate); |
22 | 26 |
timestamp =parsed.getTime() /1000; |
27 |
log.debug("Querying for Datacite records from timestamp "+timestamp+" (date was "+fromDate+")"); |
|
23 | 28 |
} |
24 | 29 |
|
25 | 30 |
final long finalTimestamp = timestamp; |
26 | 31 |
return () -> { |
27 | 32 |
try { |
28 |
return new DataciteESIterator(finalTimestamp); |
|
33 |
return new DataciteESIterator(finalTimestamp, baseurl);
|
|
29 | 34 |
} catch (Exception e) { |
30 | 35 |
throw new RuntimeException(e ); |
31 | 36 |
} |
Also available in: Unified diff
Datacite plugin now gets the baseUrl from the interfacedescriptor