Revision 48028
Added by Claudio Atzori almost 7 years ago
modules/dnet-collector-plugins/trunk/src/test/java/eu/dnetlib/data/collector/plugins/excel/ReadExcelTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.excel; |
|
2 |
|
|
3 |
import eu.dnetlib.data.collector.plugins.HttpCSVCollectorPlugin; |
|
4 |
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor; |
|
5 |
import org.junit.Assert; |
|
6 |
import org.junit.Before; |
|
7 |
import org.junit.Test; |
|
8 |
|
|
9 |
import java.util.HashMap; |
|
10 |
import java.util.Iterator; |
|
11 |
/** |
|
12 |
* Created by miriam on 10/05/2017. |
|
13 |
*/ |
|
14 |
public class ReadExcelTest { |
|
15 |
private InterfaceDescriptor descr; |
|
16 |
private Read r; |
|
17 |
private Object asserNotNul; |
|
18 |
|
|
19 |
@Before |
|
20 |
public void setUp() throws Exception { |
|
21 |
descr = new InterfaceDescriptor(); |
|
22 |
descr.setBaseUrl("https://pf.fwf.ac.at/en/research-in-practice/project-finder.xlsx?&&&search%5Bcall%5D=&search%5Bdecision_board_ids%5D=&search%5Bend_date%5D=&search%5Binstitute_name%5D=&search%5Blead_firstname%5D=&search%5Blead_lastname%5D=&search%5Bper_page%5D=10&search%5Bproject_number%5D=&search%5Bproject_title%5D=&search%5Bscience_discipline_id%5D=&search%5Bstart_date%5D=&search%5Bstatus_id%5D=&search%5Bwhat%5D=&action=index&controller=projects&locale=en&per_page=10" ); |
|
23 |
HashMap<String, String> params = new HashMap<String, String>(); |
|
24 |
|
|
25 |
params.put("argument", "{\"replace\":{\"header\":[{\"from\":\"&\",\"to\":\"and\"}],\"body\":[{\"from\":\"\\n\",\"to\":\" \"}]}," + |
|
26 |
"\"replace_currency\":[{\"from\":\"$\",\"to\":\"€\"}],\"col_currency\":10}"); |
|
27 |
params.put("header_row","4"); |
|
28 |
params.put("tmp_file","//tmp//fwf.xslx"); |
|
29 |
params.put("remove_empty_lines","yes"); |
|
30 |
params.put("remove_lines_with_id"," – "); |
|
31 |
params.put("col_id","1"); |
|
32 |
params.put("remove_tmp_file","no"); |
|
33 |
params.put("sheet_number","0"); |
|
34 |
params.put("file_to_save","/tmp/project_search.2017.05.10.csv"); |
|
35 |
params.put("separator", ","); |
|
36 |
params.put("quote","\""); |
|
37 |
descr.setParams(params); |
|
38 |
|
|
39 |
r = new Read(descr); |
|
40 |
r.setCollector(new HttpCSVCollectorPlugin()); |
|
41 |
} |
|
42 |
|
|
43 |
@Test |
|
44 |
public void readExcelFromUrl()throws Exception{ |
|
45 |
Iterator<String> it = r.parseFile().iterator(); |
|
46 |
|
|
47 |
while(it.hasNext()){ |
|
48 |
Assert.assertNotNull(it.next()); |
|
49 |
//System.out.println(it.next()); |
|
50 |
} |
|
51 |
|
|
52 |
|
|
53 |
} |
|
54 |
} |
modules/dnet-collector-plugins/trunk/src/test/java/eu/dnetlib/data/collector/plugins/opentrial/OpentrialTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.opentrial; |
|
2 |
|
|
3 |
/** |
|
4 |
* Created by miriam on 07/03/2017. |
|
5 |
*/ |
|
6 |
|
|
7 |
import eu.dnetlib.data.collector.plugins.opentrial.OpenTrialIterator; |
|
8 |
import org.junit.Test; |
|
9 |
|
|
10 |
import java.io.BufferedWriter; |
|
11 |
import java.io.FileWriter; |
|
12 |
import java.io.PrintWriter; |
|
13 |
import java.util.Iterator; |
|
14 |
|
|
15 |
|
|
16 |
public class OpentrialTest { |
|
17 |
|
|
18 |
@Test |
|
19 |
public void importOpentrial() throws Exception { |
|
20 |
PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("opentrials.xml"))); |
|
21 |
OpenTrialIterator trial = new OpenTrialIterator("https://api.opentrials.net/v1/search?",null,null); |
|
22 |
Iterator<String> iterator = trial.iterator(); |
|
23 |
int parse_number = 0; |
|
24 |
while(iterator.hasNext() && parse_number < 30){ |
|
25 |
writer.println("<doc>" + iterator.next() + "</doc>"); |
|
26 |
parse_number++; |
|
27 |
} |
|
28 |
writer.close(); |
|
29 |
|
|
30 |
} |
|
31 |
|
|
32 |
} |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/excel/CSVFileWriter.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.excel; |
|
2 |
|
|
3 |
/** |
|
4 |
* Created by miriam on 10/05/2017. |
|
5 |
*/ |
|
6 |
import java.io.BufferedWriter; |
|
7 |
import java.io.FileOutputStream; |
|
8 |
import java.io.IOException; |
|
9 |
import java.io.OutputStreamWriter; |
|
10 |
import java.util.ArrayList; |
|
11 |
import org.apache.commons.csv.CSVPrinter; |
|
12 |
import org.apache.commons.csv.CSVFormat; |
|
13 |
|
|
14 |
public class CSVFileWriter { |
|
15 |
private static final String NEW_LINE_SEPARATOR = "\n"; |
|
16 |
|
|
17 |
private Object [] file_header ; |
|
18 |
private ArrayList<ArrayList<String>> projects = new ArrayList<ArrayList<String>>(); |
|
19 |
|
|
20 |
public void setHeader(String[] header){ |
|
21 |
this.file_header = header; |
|
22 |
} |
|
23 |
|
|
24 |
public void addProject(ArrayList<String> project) { |
|
25 |
projects.add(project); |
|
26 |
|
|
27 |
} |
|
28 |
|
|
29 |
public void writeFile(String csv_file_path){ |
|
30 |
BufferedWriter writer = null; |
|
31 |
CSVPrinter csvFilePrinter = null; |
|
32 |
|
|
33 |
CSVFormat csvFileFormat = CSVFormat.DEFAULT.withRecordSeparator(NEW_LINE_SEPARATOR); |
|
34 |
|
|
35 |
try{ |
|
36 |
writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(csv_file_path),"UTF-8")); |
|
37 |
|
|
38 |
csvFilePrinter = new CSVPrinter(writer,csvFileFormat); |
|
39 |
csvFilePrinter.printRecord(file_header); |
|
40 |
|
|
41 |
for(ArrayList<String> project:projects){ |
|
42 |
csvFilePrinter.printRecord(project); |
|
43 |
} |
|
44 |
}catch(Exception e){ |
|
45 |
e.printStackTrace(); |
|
46 |
}finally{ |
|
47 |
try{ |
|
48 |
writer.flush(); |
|
49 |
writer.close(); |
|
50 |
csvFilePrinter.close(); |
|
51 |
}catch(IOException ioe){ |
|
52 |
ioe.printStackTrace(); |
|
53 |
} |
|
54 |
} |
|
55 |
} |
|
56 |
|
|
57 |
} |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/excel/Read.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.excel; |
|
2 |
|
|
3 |
/** |
|
4 |
* Created by miriam on 10/05/2017. |
|
5 |
*/ |
|
6 |
import java.io.File; |
|
7 |
import java.io.FileInputStream; |
|
8 |
import java.io.IOException; |
|
9 |
import java.net.URL; |
|
10 |
import java.util.ArrayList; |
|
11 |
import java.util.HashMap; |
|
12 |
import java.util.Iterator; |
|
13 |
|
|
14 |
import eu.dnetlib.data.collector.plugins.HttpCSVCollectorPlugin; |
|
15 |
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor; |
|
16 |
import org.apache.commons.logging.Log; |
|
17 |
import org.apache.commons.logging.LogFactory; |
|
18 |
import org.apache.poi.ss.usermodel.Cell; |
|
19 |
import org.apache.poi.ss.usermodel.DataFormatter; |
|
20 |
import org.apache.poi.ss.usermodel.Row; |
|
21 |
import org.apache.poi.ss.usermodel.Sheet; |
|
22 |
import org.apache.poi.ss.usermodel.Workbook; |
|
23 |
import org.apache.poi.xssf.usermodel.XSSFWorkbook; |
|
24 |
import org.json.*; |
|
25 |
|
|
26 |
import org.apache.commons.io.FileUtils; |
|
27 |
|
|
28 |
public class Read { |
|
29 |
|
|
30 |
private static final Log log = LogFactory.getLog(Read.class); |
|
31 |
|
|
32 |
/** The descriptor. */ |
|
33 |
private InterfaceDescriptor descriptor; |
|
34 |
|
|
35 |
|
|
36 |
/*private final String EXCEL_FILE_URL ="https://pf.fwf.ac.at/en/research-in-practice/project-finder.xlsx?&&&search%5Bcall%5D=&search%5Bdecision_board_ids%5D=&search%5Bend_date%5D=&search%5Binstitute_name%5D=&search%5Blead_firstname%5D=&search%5Blead_lastname%5D=&search%5Bper_page%5D=10&search%5Bproject_number%5D=&search%5Bproject_title%5D=&search%5Bscience_discipline_id%5D=&search%5Bstart_date%5D=&search%5Bstatus_id%5D=&search%5Bwhat%5D=&action=index&controller=projects&locale=en&per_page=10"; |
|
37 |
private final String CSV_FILE_PATH = "//Users//miriam//Documents//svn//mirima//FWF//projects_search2017.05.09.5.csv"; |
|
38 |
private final String argument = "{\"replace\":{\"header\":[{\"from\":\"&\",\"to\":\"and\"}],\"body\":[{\"from\":\"\\n\",\"to\":\" \"}]}," + |
|
39 |
"\"replace_currency\":[{\"from\":\"$\",\"to\":\"€\"}]," |
|
40 |
+ "\"col_currency\":10}"; */ |
|
41 |
private Sheet sheet; |
|
42 |
private CSVFileWriter csv_writer = new CSVFileWriter(); |
|
43 |
private HashMap<String,String> map_header = new HashMap<String,String>(); |
|
44 |
private HashMap<String,String> map_body = new HashMap<String,String>(); |
|
45 |
private int header_row; |
|
46 |
private String file_to_save ; |
|
47 |
private boolean replace_currency = false; |
|
48 |
private String from_currency, to_currency; |
|
49 |
private boolean remove_empty, remove_tmp_file; |
|
50 |
private String remove_id; |
|
51 |
private int column_id; |
|
52 |
private int currency_column; |
|
53 |
private int sheet_number; |
|
54 |
private String tmp_file; |
|
55 |
private String argument; |
|
56 |
private String identifier; |
|
57 |
|
|
58 |
private HttpCSVCollectorPlugin collector; |
|
59 |
|
|
60 |
public HttpCSVCollectorPlugin getCollector() { |
|
61 |
return collector; |
|
62 |
} |
|
63 |
|
|
64 |
public void setCollector(HttpCSVCollectorPlugin collector) { |
|
65 |
this.collector = collector; |
|
66 |
} |
|
67 |
|
|
68 |
public Read(InterfaceDescriptor descriptor){ |
|
69 |
this.descriptor = descriptor; |
|
70 |
|
|
71 |
} |
|
72 |
|
|
73 |
private static String getCellValue( Cell cell) |
|
74 |
{ |
|
75 |
DataFormatter formatter = new DataFormatter(); |
|
76 |
String formattedCellValue = formatter.formatCellValue(cell); |
|
77 |
return formattedCellValue; |
|
78 |
|
|
79 |
} |
|
80 |
|
|
81 |
private void copyFile() throws IOException{ |
|
82 |
FileUtils.copyURLToFile(new URL(descriptor.getBaseUrl()), new File(tmp_file)); |
|
83 |
|
|
84 |
} |
|
85 |
|
|
86 |
private void parseDescriptor(){ |
|
87 |
HashMap<String, String> params = descriptor.getParams(); |
|
88 |
argument = params.get("argument"); |
|
89 |
header_row = Integer.parseInt(params.get("header_row")); |
|
90 |
tmp_file = params.get("tmp_file"); |
|
91 |
remove_empty = (params.get("remove_empty_lines") == "yes"); |
|
92 |
remove_id = params.get("remove_lines_with_id"); |
|
93 |
column_id = Integer.parseInt(params.get("col_id")); |
|
94 |
remove_tmp_file = (params.get("remove_tmp_file") == "yes"); |
|
95 |
sheet_number = Integer.parseInt(params.get("sheet_number")); |
|
96 |
file_to_save = params.get("file_to_save"); |
|
97 |
} |
|
98 |
private void init() throws IOException{ |
|
99 |
parseDescriptor(); |
|
100 |
log.info("Parsing the arguments"); |
|
101 |
parseArguments(); |
|
102 |
log.info("Copying the file in temp local file"); |
|
103 |
copyFile(); |
|
104 |
log.info("Extracting the sheet " + sheet_number); |
|
105 |
FileInputStream fis = new FileInputStream(tmp_file); |
|
106 |
Workbook workbook = new XSSFWorkbook(fis); |
|
107 |
sheet = workbook.getSheetAt(sheet_number); |
|
108 |
fis.close(); |
|
109 |
if(remove_tmp_file) { |
|
110 |
File f = new File(tmp_file); |
|
111 |
f.delete(); |
|
112 |
} |
|
113 |
|
|
114 |
} |
|
115 |
|
|
116 |
private void fillMap(JSONObject json, HashMap<String,String> map, String elem){ |
|
117 |
JSONArray arr = json.getJSONObject("replace").getJSONArray(elem); |
|
118 |
for(Object entry: arr) { |
|
119 |
try { |
|
120 |
map.put(((JSONObject)entry).getString("from"), ((JSONObject)entry).getString("to")); |
|
121 |
}catch(Exception ex){ |
|
122 |
ex.printStackTrace(); |
|
123 |
} |
|
124 |
} |
|
125 |
|
|
126 |
} |
|
127 |
|
|
128 |
private void parseArguments() { |
|
129 |
JSONObject json = new JSONObject(argument); |
|
130 |
fillMap(json, map_header,"header"); |
|
131 |
fillMap(json,map_body,"body"); |
|
132 |
|
|
133 |
if (!(json.getJSONArray("replace_currency")==null)){ |
|
134 |
replace_currency = true ; |
|
135 |
from_currency = json.getJSONArray("replace_currency").getJSONObject(0).getString("from"); |
|
136 |
to_currency = json.getJSONArray("replace_currency").getJSONObject(0).getString("to"); |
|
137 |
} |
|
138 |
|
|
139 |
currency_column = json.getInt("col_currency"); |
|
140 |
|
|
141 |
} |
|
142 |
|
|
143 |
private String applyReplace(String row, HashMap<String,String>replace){ |
|
144 |
for(String key: replace.keySet()){ |
|
145 |
if(row.contains(key)) |
|
146 |
row = row.replace(key, replace.get(key)); |
|
147 |
} |
|
148 |
return row; |
|
149 |
} |
|
150 |
|
|
151 |
private void getHeader(){ |
|
152 |
Row row = sheet.getRow(header_row); |
|
153 |
Iterator<Cell> cellIterator = row.cellIterator(); |
|
154 |
Cell cell; |
|
155 |
String project = ""; |
|
156 |
int count = 0; |
|
157 |
while (cellIterator.hasNext()){ |
|
158 |
cell = cellIterator.next(); |
|
159 |
project += applyReplace(cell.getStringCellValue(),map_header) + ";"; |
|
160 |
if(count++ == column_id) identifier = applyReplace(cell.getStringCellValue(),map_header); |
|
161 |
} |
|
162 |
project = project.substring(0, project.length() -1 ); |
|
163 |
csv_writer.setHeader(project.split(";")); |
|
164 |
|
|
165 |
} |
|
166 |
|
|
167 |
private void getData(){ |
|
168 |
Row row; |
|
169 |
Cell cell; |
|
170 |
String tmp; |
|
171 |
Iterator<Cell>cellIterator; |
|
172 |
for(int row_number = header_row + 1; row_number < sheet.getLastRowNum(); row_number++){ |
|
173 |
row = sheet.getRow(row_number); |
|
174 |
cellIterator = row.cellIterator(); |
|
175 |
int col_number = 0; |
|
176 |
|
|
177 |
boolean discard_row = false; |
|
178 |
ArrayList<String> al = new ArrayList<String>(); |
|
179 |
while(cellIterator.hasNext() && !discard_row){ |
|
180 |
cell = cellIterator.next(); |
|
181 |
tmp = getCellValue(cell); |
|
182 |
if (col_number == column_id && |
|
183 |
((remove_empty && tmp.trim().equals("")) || |
|
184 |
(!remove_id.equals("") && tmp.equals(remove_id)))) |
|
185 |
discard_row = true; |
|
186 |
|
|
187 |
if (replace_currency && col_number == currency_column) |
|
188 |
tmp = tmp.replace(from_currency,to_currency); |
|
189 |
|
|
190 |
al.add(applyReplace(tmp,map_body)); |
|
191 |
col_number ++; |
|
192 |
} |
|
193 |
if(!discard_row){ |
|
194 |
csv_writer.addProject(al); |
|
195 |
|
|
196 |
} |
|
197 |
} |
|
198 |
|
|
199 |
} |
|
200 |
|
|
201 |
private void writeCSVFile(){ |
|
202 |
|
|
203 |
csv_writer.writeFile(file_to_save); |
|
204 |
} |
|
205 |
|
|
206 |
private InterfaceDescriptor prepareHTTPCSVDescriptor(){ |
|
207 |
InterfaceDescriptor dex = new InterfaceDescriptor(); |
|
208 |
dex.setBaseUrl("file://"+file_to_save); |
|
209 |
HashMap<String, String> params = new HashMap<String, String>(); |
|
210 |
params.put("separator", descriptor.getParams().get("separator")); |
|
211 |
params.put("identifier",identifier); |
|
212 |
params.put("quote",descriptor.getParams().get("quote")); |
|
213 |
dex.setParams(params); |
|
214 |
return dex; |
|
215 |
} |
|
216 |
|
|
217 |
public Iterable<String> parseFile() throws Exception{ |
|
218 |
|
|
219 |
|
|
220 |
init(); |
|
221 |
log.info("Getting header elements"); |
|
222 |
getHeader(); |
|
223 |
log.info("Getting sheet data"); |
|
224 |
getData(); |
|
225 |
log.info("Writing the csv file"); |
|
226 |
writeCSVFile(); |
|
227 |
log.info("Preparing to parse csv"); |
|
228 |
|
|
229 |
return collector.collect(prepareHTTPCSVDescriptor(),"",""); |
|
230 |
|
|
231 |
} |
|
232 |
|
|
233 |
|
|
234 |
} |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/excel/ReadExcelPlugin.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.excel; |
|
2 |
|
|
3 |
|
|
4 |
import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin; |
|
5 |
import eu.dnetlib.data.collector.plugins.HttpCSVCollectorPlugin; |
|
6 |
import eu.dnetlib.data.collector.rmi.CollectorServiceException; |
|
7 |
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor; |
|
8 |
import org.apache.commons.logging.Log; |
|
9 |
import org.apache.commons.logging.LogFactory; |
|
10 |
import org.springframework.beans.factory.annotation.Autowired; |
|
11 |
import org.springframework.beans.factory.annotation.Required; |
|
12 |
|
|
13 |
/** |
|
14 |
* Created by miriam on 10/05/2017. |
|
15 |
*/ |
|
16 |
public class ReadExcelPlugin extends AbstractCollectorPlugin{ |
|
17 |
|
|
18 |
private static final Log log = LogFactory.getLog(ReadExcelPlugin.class); |
|
19 |
@Autowired |
|
20 |
HttpCSVCollectorPlugin httpCSVCollectorPlugin; |
|
21 |
|
|
22 |
|
|
23 |
|
|
24 |
@Override |
|
25 |
public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate) |
|
26 |
throws CollectorServiceException { |
|
27 |
Read r = new Read(interfaceDescriptor); |
|
28 |
r.setCollector(httpCSVCollectorPlugin); |
|
29 |
|
|
30 |
try { |
|
31 |
return r.parseFile(); |
|
32 |
}catch(Exception e){ |
|
33 |
log.error("Error importing excel file"); |
|
34 |
throw new CollectorServiceException(e); |
|
35 |
} |
|
36 |
|
|
37 |
|
|
38 |
} |
|
39 |
} |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/opentrial/OpenTrialIterator.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.opentrial; |
|
2 |
|
|
3 |
/** |
|
4 |
* Created by miriam on 07/03/2017. |
|
5 |
*/ |
|
6 |
import eu.dnetlib.data.collector.rmi.CollectorServiceException; |
|
7 |
import eu.dnetlib.data.collector.rmi.CollectorServiceRuntimeException; |
|
8 |
import org.apache.commons.io.IOUtils; |
|
9 |
import java.net.*; |
|
10 |
import java.util.Iterator; |
|
11 |
import java.util.concurrent.ArrayBlockingQueue; |
|
12 |
//import java.util.function.Consumer; |
|
13 |
|
|
14 |
import org.apache.commons.logging.Log; |
|
15 |
import org.apache.commons.logging.LogFactory; |
|
16 |
import org.json.*; |
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
public class OpenTrialIterator implements Iterable<String> { |
|
21 |
|
|
22 |
private final String base_url; |
|
23 |
private int total ; |
|
24 |
private ArrayBlockingQueue<String> trials = new ArrayBlockingQueue<String>(100); |
|
25 |
private int current = 0; |
|
26 |
private static final Log log = LogFactory.getLog(OpenTrialIterator.class); |
|
27 |
|
|
28 |
public OpenTrialIterator(String base_url, String from_date, String to_date)throws CollectorServiceException{ |
|
29 |
try { |
|
30 |
String q = "per_page=100"; |
|
31 |
if (!(from_date == null)) { |
|
32 |
if (!(to_date == null)) { |
|
33 |
q = "q=registration_date%3A%5B" + from_date + "%20TO%20" + to_date + "%5D&" + q; |
|
34 |
|
|
35 |
} else |
|
36 |
q = "q=registration_date%3A%5B" + from_date + "%20TO%20*%5D&" + q; |
|
37 |
} |
|
38 |
this.base_url = base_url+ q; |
|
39 |
log.info("url from which to collect " + this.base_url); |
|
40 |
prepare(); |
|
41 |
}catch(Exception ex){ |
|
42 |
throw new CollectorServiceException(ex); |
|
43 |
} |
|
44 |
} |
|
45 |
|
|
46 |
private void prepare()throws Exception { |
|
47 |
JSONObject json = new JSONObject(getPage(1)); |
|
48 |
total = json.getInt("total_count"); |
|
49 |
log.info("Total number of entries to collect: " + total); |
|
50 |
fillTrials(json); |
|
51 |
} |
|
52 |
|
|
53 |
|
|
54 |
@Override |
|
55 |
public Iterator<String> iterator() { |
|
56 |
return new Iterator<String>(){ |
|
57 |
|
|
58 |
private int page_number = 2; |
|
59 |
|
|
60 |
|
|
61 |
@Override |
|
62 |
public void remove(){ |
|
63 |
|
|
64 |
} |
|
65 |
|
|
66 |
@Override |
|
67 |
public String next() { |
|
68 |
try { |
|
69 |
if (trials.isEmpty()) { |
|
70 |
JSONObject json = new JSONObject(getPage(page_number)); |
|
71 |
fillTrials(json); |
|
72 |
page_number++; |
|
73 |
} |
|
74 |
return trials.poll(); |
|
75 |
}catch(Exception ex){ |
|
76 |
throw new CollectorServiceRuntimeException(ex); |
|
77 |
} |
|
78 |
} |
|
79 |
|
|
80 |
@Override |
|
81 |
public boolean hasNext(){ |
|
82 |
log.debug("More entries to collect: (" + current + "<" + total + "=" + (current < total)); |
|
83 |
return (current < total || !trials.isEmpty()); |
|
84 |
} |
|
85 |
|
|
86 |
|
|
87 |
}; |
|
88 |
|
|
89 |
} |
|
90 |
|
|
91 |
private void fillTrials(JSONObject json)throws CollectorServiceException{ |
|
92 |
|
|
93 |
JSONArray entries = json.getJSONArray("items"); |
|
94 |
for(Object entry: entries) { |
|
95 |
try { |
|
96 |
trials.put(XML.toString(entry)); |
|
97 |
}catch(Exception ex){ |
|
98 |
throw new CollectorServiceException(ex); |
|
99 |
} |
|
100 |
current++; |
|
101 |
} |
|
102 |
|
|
103 |
} |
|
104 |
private String getPage(int page_number)throws CollectorServiceException { |
|
105 |
|
|
106 |
try { |
|
107 |
URL url = new URL(base_url + "&page=" + page_number); |
|
108 |
URLConnection conn = url.openConnection(); |
|
109 |
conn.setRequestProperty("User-Agent", "Mozilla/5.0"); |
|
110 |
return (IOUtils.toString(conn.getInputStream())); |
|
111 |
}catch(Exception ex){ |
|
112 |
throw new CollectorServiceException(ex); |
|
113 |
} |
|
114 |
} |
|
115 |
|
|
116 |
|
|
117 |
} |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/opentrial/OpenTrialPlugin.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.opentrial; |
|
2 |
|
|
3 |
import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin; |
|
4 |
import eu.dnetlib.data.collector.rmi.CollectorServiceException; |
|
5 |
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor; |
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
/** |
|
10 |
* Created by miriam on 07/03/2017. |
|
11 |
*/ |
|
12 |
public class OpenTrialPlugin extends AbstractCollectorPlugin{ |
|
13 |
|
|
14 |
|
|
15 |
@Override |
|
16 |
public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate) |
|
17 |
throws CollectorServiceException { |
|
18 |
try { |
|
19 |
|
|
20 |
OpenTrialIterator iterator = new OpenTrialIterator(interfaceDescriptor.getBaseUrl(),fromDate,untilDate); |
|
21 |
return iterator; |
|
22 |
} catch (Exception e) { |
|
23 |
throw new CollectorServiceException("OOOPS something bad happen on creating iterator ", e); |
|
24 |
} |
|
25 |
|
|
26 |
} |
|
27 |
} |
modules/dnet-collector-plugins/trunk/src/main/resources/eu/dnetlib/data/collector/plugins/applicationContext-dnet-modular-collector-plugins.xml | ||
---|---|---|
32 | 32 |
</property> |
33 | 33 |
</bean> |
34 | 34 |
|
35 |
<bean id="excelPlugin" class="eu.dnetlib.data.collector.plugins.excel.ReadExcelPlugin"> |
|
36 |
<property name="protocolDescriptor"> |
|
37 |
<bean class="eu.dnetlib.data.collector.rmi.ProtocolDescriptor" p:name="excelFile"> |
|
38 |
<property name="params"> |
|
39 |
<list> |
|
40 |
<bean class="eu.dnetlib.data.collector.rmi.ProtocolParameter" |
|
41 |
p:name="argument" /> |
|
42 |
<bean class="eu.dnetlib.data.collector.rmi.ProtocolParameter" |
|
43 |
p:name="header_row" /> |
|
44 |
<bean class="eu.dnetlib.data.collector.rmi.ProtocolParameter" |
|
45 |
p:name="file_to_save" /> |
|
46 |
<bean class="eu.dnetlib.data.collector.rmi.ProtocolParameter" |
|
47 |
p:name="remove_empty_lines" /> |
|
48 |
<bean class="eu.dnetlib.data.collector.rmi.ProtocolParameter" |
|
49 |
p:name="remove_lines_with_id" /> |
|
50 |
<bean class="eu.dnetlib.data.collector.rmi.ProtocolParameter" |
|
51 |
p:name="col_id" /> |
|
52 |
<bean class="eu.dnetlib.data.collector.rmi.ProtocolParameter" |
|
53 |
p:name="remove_tmp_file" /> |
|
54 |
<bean class="eu.dnetlib.data.collector.rmi.ProtocolParameter" |
|
55 |
p:name="sheet_number" /> |
|
56 |
<bean class="eu.dnetlib.data.collector.rmi.ProtocolParameter" |
|
57 |
p:name="tmp_file" /> |
|
58 |
<bean class="eu.dnetlib.data.collector.rmi.ProtocolParameter" |
|
59 |
p:name="separator" /> |
|
60 |
<bean class="eu.dnetlib.data.collector.rmi.ProtocolParameter" |
|
61 |
p:name="quote" /> |
|
62 |
</list> |
|
63 |
</property> |
|
64 |
</bean> |
|
65 |
</property> |
|
66 |
|
|
67 |
|
|
68 |
</bean> |
|
69 |
|
|
70 |
|
|
35 | 71 |
</beans> |
modules/dnet-collector-plugins/trunk/pom.xml | ||
---|---|---|
7 | 7 |
</parent> |
8 | 8 |
<groupId>eu.dnetlib</groupId> |
9 | 9 |
<artifactId>dnet-collector-plugins</artifactId> |
10 |
<version>1.3.4-SNAPSHOT</version>
|
|
10 |
<version>1.3.4</version> |
|
11 | 11 |
<scm> |
12 | 12 |
<developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-collector-plugins/trunk</developerConnection> |
13 | 13 |
</scm> |
... | ... | |
18 | 18 |
<version>[1.3.0,2.0.0)</version> |
19 | 19 |
</dependency> |
20 | 20 |
<dependency> |
21 |
<groupId>eu.dnetlib</groupId> |
|
22 |
<artifactId>dnet-modular-collector-service</artifactId> |
|
23 |
<version>[1.3.0,4.0.0)</version> |
|
24 |
</dependency> |
|
25 |
<dependency> |
|
21 | 26 |
<groupId>com.google.code.gson</groupId> |
22 | 27 |
<artifactId>gson</artifactId> |
23 | 28 |
<version>${google.gson.version}</version> |
... | ... | |
59 | 64 |
<artifactId>joda-time</artifactId> |
60 | 65 |
<version>2.9.2</version> |
61 | 66 |
</dependency> |
67 |
|
|
68 |
<dependency> |
|
69 |
<groupId>org.json</groupId> |
|
70 |
<artifactId>json</artifactId> |
|
71 |
<version>20160810</version> |
|
72 |
</dependency> |
|
73 |
<dependency> |
|
74 |
<groupId>org.apache.commons</groupId> |
|
75 |
<artifactId>commons-lang3</artifactId> |
|
76 |
<version>3.0</version> |
|
77 |
</dependency> |
|
78 |
|
|
79 |
<dependency> |
|
80 |
<groupId>org.apache.poi</groupId> |
|
81 |
<artifactId>poi</artifactId> |
|
82 |
<version>3.16</version> |
|
83 |
</dependency> |
|
84 |
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml --> |
|
85 |
<dependency> |
|
86 |
<groupId>org.apache.poi</groupId> |
|
87 |
<artifactId>poi-ooxml</artifactId> |
|
88 |
<version>3.16</version> |
|
89 |
</dependency> |
|
62 | 90 |
</dependencies> |
63 |
<build> |
|
64 |
<plugins> |
|
65 |
<plugin> |
|
66 |
<groupId>org.apache.maven.plugins</groupId> |
|
67 |
<artifactId>maven-compiler-plugin</artifactId> |
|
68 |
<version>3.1</version> |
|
69 |
<configuration> |
|
70 |
<source>1.7</source> |
|
71 |
<target>${maven.compiler.target.version}</target> |
|
72 |
</configuration> |
|
73 |
</plugin> |
|
74 |
</plugins> |
|
75 |
</build> |
|
76 | 91 |
</project> |
Also available in: Unified diff
integrated latest changes from dnet40