Revision 63307
Added by Michele Artini about 1 month ago
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/projects/gtr2/Gtr2Helper.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.projects.gtr2; |
|
2 |
|
|
3 |
import java.io.ByteArrayOutputStream; |
|
4 |
import java.io.StringWriter; |
|
5 |
|
|
6 |
import org.apache.commons.lang3.StringEscapeUtils; |
|
7 |
import org.apache.commons.logging.Log; |
|
8 |
import org.apache.commons.logging.LogFactory; |
|
9 |
|
|
10 |
import com.ximpleware.AutoPilot; |
|
11 |
import com.ximpleware.VTDGen; |
|
12 |
import com.ximpleware.VTDNav; |
|
13 |
|
|
14 |
import eu.dnetlib.data.collector.plugins.HttpConnector; |
|
15 |
|
|
16 |
public class Gtr2Helper { |
|
17 |
|
|
18 |
private static final Log log = LogFactory.getLog(Gtr2Helper.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
19 |
|
|
20 |
private VTDNav mainVTDNav; |
|
21 |
private AutoPilot mainAutoPilot; |
|
22 |
private StringWriter writer; |
|
23 |
private HttpConnector connector; |
|
24 |
// private BlockingQueue<String> fragment = new ArrayBlockingQueue<String>(20); |
|
25 |
|
|
26 |
public static String cleanURL(final String url) { |
|
27 |
String cleaned = url; |
|
28 |
if (cleaned.contains("gtr.gtr")) { |
|
29 |
cleaned = cleaned.replace("gtr.gtr", "gtr"); |
|
30 |
} |
|
31 |
if (cleaned.startsWith("http://")) { |
|
32 |
cleaned = cleaned.replaceFirst("http://", "https://"); |
|
33 |
} |
|
34 |
return cleaned; |
|
35 |
} |
|
36 |
|
|
37 |
public String processProject(final VTDNav vn, final String namespaces, final String projectUrl) throws Exception { |
|
38 |
writer = new StringWriter(); |
|
39 |
mainVTDNav = vn; |
|
40 |
mainAutoPilot = new AutoPilot(mainVTDNav); |
|
41 |
writer.write("<doc " + namespaces + ">"); |
|
42 |
writeFragment(mainVTDNav); |
|
43 |
|
|
44 |
mainAutoPilot.selectXPath("//link[@rel='FUND']"); |
|
45 |
|
|
46 |
while (mainAutoPilot.evalXPath() != -1) { |
|
47 |
processFunder(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href")), projectUrl); |
|
48 |
} |
|
49 |
|
|
50 |
mainAutoPilot.resetXPath(); |
|
51 |
mainAutoPilot.selectXPath(".//link[@rel='LEAD_ORG']"); |
|
52 |
while (mainAutoPilot.evalXPath() != -1) { |
|
53 |
processOrg(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href")), new String[] { |
|
54 |
"<ld-org>", "</ld-org>" |
|
55 |
}, projectUrl); |
|
56 |
} |
|
57 |
mainAutoPilot.resetXPath(); |
|
58 |
mainAutoPilot.selectXPath(".//link[@rel='PP_ORG']"); |
|
59 |
while (mainAutoPilot.evalXPath() != -1) { |
|
60 |
processOrg(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href")), new String[] { |
|
61 |
"<pp-org>", "</pp-org>" |
|
62 |
}, projectUrl); |
|
63 |
} |
|
64 |
|
|
65 |
// mainAutoPilot.resetXPath(); |
|
66 |
// mainAutoPilot.selectXPath(".//link[@rel='PARTICIPANT_ORG']"); |
|
67 |
// while (mainAutoPilot.evalXPath() != -1) { |
|
68 |
// processOrg(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href")), |
|
69 |
// new String[]{"<pp-org>", "</pp-org>"}, projectUrl); |
|
70 |
// } |
|
71 |
|
|
72 |
mainAutoPilot.resetXPath(); |
|
73 |
mainAutoPilot.selectXPath(".//link[@rel='PI_PER']"); |
|
74 |
while (mainAutoPilot.evalXPath() != -1) { |
|
75 |
processPerson(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href")), projectUrl); |
|
76 |
} |
|
77 |
writer.write("</doc>"); |
|
78 |
writer.close(); |
|
79 |
|
|
80 |
return writer.toString(); |
|
81 |
} |
|
82 |
|
|
83 |
private VTDNav setNavigator(final String httpUrl) { |
|
84 |
final VTDGen vg_tmp = new VTDGen(); |
|
85 |
connector = new HttpConnector(); |
|
86 |
try { |
|
87 |
final byte[] bytes = connector.getInputSource(cleanURL(httpUrl)).getBytes("UTF-8"); |
|
88 |
vg_tmp.setDoc(bytes); |
|
89 |
vg_tmp.parse(false); |
|
90 |
// vg_tmp.parseHttpUrl(httpUrl, false); |
|
91 |
return vg_tmp.getNav(); |
|
92 |
} catch (final Throwable e) { |
|
93 |
return null; |
|
94 |
} |
|
95 |
} |
|
96 |
|
|
97 |
private int evalXpath(final VTDNav fragmentVTDNav, final String xPath) throws Exception { |
|
98 |
|
|
99 |
final AutoPilot ap_tmp = new AutoPilot(fragmentVTDNav); |
|
100 |
ap_tmp.selectXPath(xPath); |
|
101 |
return ap_tmp.evalXPath(); |
|
102 |
} |
|
103 |
|
|
104 |
private void writeFragment(final VTDNav nav) throws Exception { |
|
105 |
final ByteArrayOutputStream b = new ByteArrayOutputStream(); |
|
106 |
nav.dumpFragment(b); |
|
107 |
final String ret = b.toString(); |
|
108 |
b.reset(); |
|
109 |
writer.write(ret); |
|
110 |
} |
|
111 |
|
|
112 |
private void writeNewTagAndInfo(final VTDNav vn, final String xPath, final String xmlOpenTag, final String xmlCloseTag, final String attrName) |
|
113 |
throws Exception { |
|
114 |
|
|
115 |
final int nav_res = evalXpath(vn, xPath); |
|
116 |
if (nav_res != -1) { |
|
117 |
String tmp = xmlOpenTag; |
|
118 |
if (attrName != null) { |
|
119 |
tmp += vn.toNormalizedString(vn.getAttrVal(attrName)); |
|
120 |
} else { |
|
121 |
tmp += StringEscapeUtils.escapeXml11(vn.toNormalizedString(vn.getText())); |
|
122 |
} |
|
123 |
tmp += xmlCloseTag; |
|
124 |
writer.write(tmp); |
|
125 |
} |
|
126 |
} |
|
127 |
|
|
128 |
private void processPerson(final String httpUrl, final String projectUrl) { |
|
129 |
log.debug(String.format("Getting person %s for project %s", httpUrl, projectUrl)); |
|
130 |
final VTDNav vn = setNavigator(cleanURL(httpUrl)); |
|
131 |
try { |
|
132 |
writeFragment(vn); |
|
133 |
} catch (final Throwable e) { |
|
134 |
log.debug(String.format("Exception in processPerson from %s \n Error message: \n %s", httpUrl, e.getMessage())); |
|
135 |
} |
|
136 |
|
|
137 |
} |
|
138 |
|
|
139 |
private void processOrg(final String httpUrl, final String[] tags, final String projectUrl) { |
|
140 |
log.debug(String.format("Getting org %s for project %s", httpUrl, projectUrl)); |
|
141 |
final VTDNav vn = setNavigator(cleanURL(httpUrl)); |
|
142 |
try { |
|
143 |
writeNewTagAndInfo(vn, "//name", tags[0] + "<name>", "</name>", null); |
|
144 |
vn.toElement(VTDNav.ROOT); |
|
145 |
writeNewTagAndInfo(vn, "//country", "<country>", "</country>", null); |
|
146 |
vn.toElement(VTDNav.ROOT); |
|
147 |
writeNewTagAndInfo(vn, ".", "<id>", "</id>" + tags[1], "id"); |
|
148 |
} catch (final Throwable e) { |
|
149 |
log.debug(String.format("Exception in processOrg from %s \n Error message: \n %s", httpUrl, e.getMessage())); |
|
150 |
} |
|
151 |
} |
|
152 |
|
|
153 |
private void processFunder(final String httpUrl, final String projectUrl) { |
|
154 |
log.debug(String.format("Getting funder %s for project %s", httpUrl, projectUrl)); |
|
155 |
final VTDNav vn = setNavigator(cleanURL(httpUrl)); |
|
156 |
try { |
|
157 |
final AutoPilot ap = new AutoPilot(vn); |
|
158 |
writeFragment(vn); |
|
159 |
ap.selectXPath(".//link[@rel='FUNDER']"); |
|
160 |
VTDNav tmp_vn; |
|
161 |
while (ap.evalXPath() != -1) { |
|
162 |
tmp_vn = setNavigator(vn.toNormalizedString(vn.getAttrVal("href"))); |
|
163 |
writeNewTagAndInfo(tmp_vn, "//name", "<funder> <name>", "</name></funder>", null); |
|
164 |
} |
|
165 |
} catch (final Throwable e) { |
|
166 |
log.debug(String.format("Exception in processFunder from %s \n Error message: \n %s", httpUrl, e.getMessage())); |
|
167 |
} |
|
168 |
} |
|
169 |
} |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/projects/gtr2/Gtr2ProjectsIterator.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.projects.gtr2; |
|
2 |
|
|
3 |
import java.util.Iterator; |
|
4 |
import java.util.concurrent.ArrayBlockingQueue; |
|
5 |
import java.util.concurrent.TimeUnit; |
|
6 |
|
|
7 |
import org.apache.commons.lang3.StringUtils; |
|
8 |
import org.apache.commons.logging.Log; |
|
9 |
import org.apache.commons.logging.LogFactory; |
|
10 |
import org.joda.time.DateTime; |
|
11 |
import org.joda.time.format.DateTimeFormat; |
|
12 |
import org.joda.time.format.DateTimeFormatter; |
|
13 |
|
|
14 |
import com.ximpleware.AutoPilot; |
|
15 |
import com.ximpleware.VTDGen; |
|
16 |
import com.ximpleware.VTDNav; |
|
17 |
|
|
18 |
import eu.dnetlib.data.collector.plugins.HttpConnector; |
|
19 |
import eu.dnetlib.data.collector.rmi.CollectorServiceException; |
|
20 |
import eu.dnetlib.data.collector.rmi.CollectorServiceRuntimeException; |
|
21 |
|
|
22 |
/** |
|
23 |
* Created by alessia on 28/11/16. |
|
24 |
*/ |
|
25 |
public class Gtr2ProjectsIterator implements Iterator<String> { |
|
26 |
|
|
27 |
public static final String TERMINATOR = "ARNOLD"; |
|
28 |
public static final int WAIT_END_SECONDS = 600; |
|
29 |
public static final int PAGE_SZIE = 20; |
|
30 |
|
|
31 |
private static final Log log = LogFactory.getLog(Gtr2ProjectsIterator.class); |
|
32 |
|
|
33 |
private String queryURL; |
|
34 |
private int total = -1; |
|
35 |
private int startFromPage = 1; |
|
36 |
private int endAtPage; |
|
37 |
private VTDGen vg; |
|
38 |
private VTDNav vn; |
|
39 |
private AutoPilot ap; |
|
40 |
private String namespaces; |
|
41 |
private boolean incremental = false; |
|
42 |
private DateTime fromDate; |
|
43 |
private final DateTimeFormatter simpleDateTimeFormatter = DateTimeFormat.forPattern("yyyy-MM-dd"); |
|
44 |
private final ArrayBlockingQueue<String> projects = new ArrayBlockingQueue<>(200); |
|
45 |
// private boolean finished = false; |
|
46 |
private String nextElement = "<doc></doc>"; |
|
47 |
private HttpConnector connector; |
|
48 |
|
|
49 |
@Override |
|
50 |
public boolean hasNext() { |
|
51 |
return !TERMINATOR.equals(nextElement); |
|
52 |
} |
|
53 |
|
|
54 |
@Override |
|
55 |
public String next() { |
|
56 |
try { |
|
57 |
return nextElement; |
|
58 |
} finally { |
|
59 |
try { |
|
60 |
nextElement = projects.poll(WAIT_END_SECONDS, TimeUnit.SECONDS); |
|
61 |
} catch (final InterruptedException e) { |
|
62 |
throw new RuntimeException(e); |
|
63 |
} |
|
64 |
} |
|
65 |
|
|
66 |
} |
|
67 |
|
|
68 |
@Override |
|
69 |
public void remove() { |
|
70 |
throw new UnsupportedOperationException(); |
|
71 |
} |
|
72 |
|
|
73 |
public Gtr2ProjectsIterator(final String baseUrl, final String fromDate) throws CollectorServiceException { |
|
74 |
prepare(baseUrl, fromDate); |
|
75 |
fillInfo(true); |
|
76 |
} |
|
77 |
|
|
78 |
public Gtr2ProjectsIterator(final String baseUrl, final String fromDate, final int startFromPage, final int endAtPage) throws CollectorServiceException { |
|
79 |
prepare(baseUrl, fromDate); |
|
80 |
this.setStartFromPage(startFromPage); |
|
81 |
this.setEndAtPage(endAtPage); |
|
82 |
fillInfo(false); |
|
83 |
} |
|
84 |
|
|
85 |
public Gtr2ProjectsIterator(final String baseUrl, final String fromDate, final String startFromPage, final String endAtPage) |
|
86 |
throws CollectorServiceException { |
|
87 |
prepare(baseUrl, fromDate); |
|
88 |
if (StringUtils.isNotBlank(startFromPage)) { |
|
89 |
this.setStartFromPage(Integer.parseInt(startFromPage)); |
|
90 |
} |
|
91 |
if (StringUtils.isNotBlank(endAtPage)) { |
|
92 |
this.setEndAtPage(Integer.parseInt(endAtPage)); |
|
93 |
} |
|
94 |
fillInfo(false); |
|
95 |
} |
|
96 |
|
|
97 |
private void prepare(final String baseUrl, final String fromDate) { |
|
98 |
connector = new HttpConnector(); |
|
99 |
queryURL = baseUrl + "/projects"; |
|
100 |
vg = new VTDGen(); |
|
101 |
this.incremental = StringUtils.isNotBlank(fromDate); |
|
102 |
if (incremental) { |
|
103 |
// I expect fromDate in the format 'yyyy-MM-dd'. See class |
|
104 |
// eu.dnetlib.msro.workflows.nodes.collect.FindDateRangeForIncrementalHarvestingJobNode |
|
105 |
this.fromDate = DateTime.parse(fromDate, simpleDateTimeFormatter); |
|
106 |
log.debug("fromDate string: " + fromDate + " -- parsed: " + this.fromDate.toString()); |
|
107 |
} |
|
108 |
} |
|
109 |
|
|
110 |
private void fillInfo(final boolean all) throws CollectorServiceException { |
|
111 |
try { |
|
112 |
// log.debug("Getting hit count from: " + queryURL); |
|
113 |
final byte[] bytes = connector.getInputSource(queryURL).getBytes("UTF-8"); |
|
114 |
vg.setDoc(bytes); |
|
115 |
vg.parse(false); |
|
116 |
// vg.parseHttpUrl(queryURL, false); |
|
117 |
initParser(); |
|
118 |
final String hitCount = vn.toNormalizedString(vn.getAttrVal("totalSize")); |
|
119 |
final String totalPages = vn.toNormalizedString(vn.getAttrVal("totalPages")); |
|
120 |
namespaces = "xmlns:ns1=\"" + vn.toNormalizedString(vn.getAttrVal("ns1")) + "\" "; |
|
121 |
namespaces += "xmlns:ns2=\"" + vn.toNormalizedString(vn.getAttrVal("ns2")) + "\" "; |
|
122 |
namespaces += "xmlns:ns3=\"" + vn.toNormalizedString(vn.getAttrVal("ns3")) + "\" "; |
|
123 |
namespaces += "xmlns:ns4=\"" + vn.toNormalizedString(vn.getAttrVal("ns4")) + "\" "; |
|
124 |
namespaces += "xmlns:ns5=\"" + vn.toNormalizedString(vn.getAttrVal("ns5")) + "\" "; |
|
125 |
namespaces += "xmlns:ns6=\"" + vn.toNormalizedString(vn.getAttrVal("ns6")) + "\" "; |
|
126 |
if (all) { |
|
127 |
setEndAtPage(Integer.parseInt(totalPages)); |
|
128 |
total = Integer.parseInt(hitCount); |
|
129 |
} |
|
130 |
final Thread ft = new Thread(new FillProjectList()); |
|
131 |
ft.start(); |
|
132 |
log.debug("Expected number of pages: " + (endAtPage - startFromPage + 1)); |
|
133 |
} catch (final NumberFormatException e) { |
|
134 |
log.error("Cannot set the total count or the number of pages"); |
|
135 |
throw new CollectorServiceException(e); |
|
136 |
} catch (final Throwable e) { |
|
137 |
throw new CollectorServiceException(e); |
|
138 |
} |
|
139 |
} |
|
140 |
|
|
141 |
private void initParser() { |
|
142 |
vn = vg.getNav(); |
|
143 |
ap = new AutoPilot(vn); |
|
144 |
} |
|
145 |
|
|
146 |
public String getQueryURL() { |
|
147 |
return queryURL; |
|
148 |
} |
|
149 |
|
|
150 |
public void setQueryURL(final String queryURL) { |
|
151 |
this.queryURL = queryURL; |
|
152 |
} |
|
153 |
|
|
154 |
public int getTotal() { |
|
155 |
return total; |
|
156 |
} |
|
157 |
|
|
158 |
public void setTotal(final int total) { |
|
159 |
this.total = total; |
|
160 |
} |
|
161 |
|
|
162 |
public int getEndAtPage() { |
|
163 |
return endAtPage; |
|
164 |
} |
|
165 |
|
|
166 |
public void setEndAtPage(final int endAtPage) { |
|
167 |
this.endAtPage = endAtPage; |
|
168 |
log.debug("Overriding endAtPage to " + endAtPage); |
|
169 |
} |
|
170 |
|
|
171 |
public VTDGen getVg() { |
|
172 |
return vg; |
|
173 |
} |
|
174 |
|
|
175 |
public void setVg(final VTDGen vg) { |
|
176 |
this.vg = vg; |
|
177 |
} |
|
178 |
|
|
179 |
public VTDNav getVn() { |
|
180 |
return vn; |
|
181 |
} |
|
182 |
|
|
183 |
public void setVn(final VTDNav vn) { |
|
184 |
this.vn = vn; |
|
185 |
} |
|
186 |
|
|
187 |
public AutoPilot getAp() { |
|
188 |
return ap; |
|
189 |
} |
|
190 |
|
|
191 |
public void setAp(final AutoPilot ap) { |
|
192 |
this.ap = ap; |
|
193 |
} |
|
194 |
|
|
195 |
public String getNamespaces() { |
|
196 |
return namespaces; |
|
197 |
} |
|
198 |
|
|
199 |
public void setNamespaces(final String namespaces) { |
|
200 |
this.namespaces = namespaces; |
|
201 |
} |
|
202 |
|
|
203 |
public int getStartFromPage() { |
|
204 |
return startFromPage; |
|
205 |
} |
|
206 |
|
|
207 |
public void setStartFromPage(final int startFromPage) { |
|
208 |
this.startFromPage = startFromPage; |
|
209 |
log.debug("Overriding startFromPage to " + startFromPage); |
|
210 |
} |
|
211 |
|
|
212 |
private class FillProjectList implements Runnable { |
|
213 |
|
|
214 |
private boolean morePages = true; |
|
215 |
private int pageNumber = startFromPage; |
|
216 |
|
|
217 |
@Override |
|
218 |
public void run() { |
|
219 |
String resultPageUrl = ""; |
|
220 |
try { |
|
221 |
do { |
|
222 |
resultPageUrl = getNextPageUrl(); |
|
223 |
log.debug("Page: " + resultPageUrl); |
|
224 |
// clear VGen before processing the next file |
|
225 |
vg.clear(); |
|
226 |
final byte[] bytes = connector.getInputSource(resultPageUrl).getBytes("UTF-8"); |
|
227 |
vg.setDoc(bytes); |
|
228 |
vg.parse(false); |
|
229 |
// vg.parseHttpUrl(resultPageUrl, false); |
|
230 |
initParser(); |
|
231 |
ap.selectXPath("//project"); |
|
232 |
while (ap.evalXPath() != -1) { |
|
233 |
final String projectHref = vn.toNormalizedString(vn.getAttrVal("href")); |
|
234 |
final ParseProject p = new ParseProject(projectHref); |
|
235 |
p.execute(); |
|
236 |
} |
|
237 |
ap.resetXPath(); |
|
238 |
|
|
239 |
} while (morePages); |
|
240 |
projects.put(TERMINATOR); |
|
241 |
|
|
242 |
} catch (final Throwable e) { |
|
243 |
log.error("Exception processing " + resultPageUrl + "\n" + e.getMessage()); |
|
244 |
} |
|
245 |
} |
|
246 |
|
|
247 |
private String getNextPageUrl() { |
|
248 |
final String url = queryURL + "?p=" + pageNumber; |
|
249 |
if (pageNumber == endAtPage) { |
|
250 |
morePages = false; |
|
251 |
} |
|
252 |
pageNumber++; |
|
253 |
return url; |
|
254 |
} |
|
255 |
|
|
256 |
} |
|
257 |
|
|
258 |
private class ParseProject { |
|
259 |
|
|
260 |
VTDNav vn1; |
|
261 |
VTDGen vg1; |
|
262 |
private final String projectRef; |
|
263 |
|
|
264 |
public ParseProject(final String projectHref) { |
|
265 |
projectRef = Gtr2Helper.cleanURL(projectHref); |
|
266 |
vg1 = new VTDGen(); |
|
267 |
try { |
|
268 |
final byte[] bytes = connector.getInputSource(projectRef).getBytes("UTF-8"); |
|
269 |
vg1.setDoc(bytes); |
|
270 |
vg1.parse(false); |
|
271 |
vn1 = vg1.getNav(); |
|
272 |
} catch (final Throwable e) { |
|
273 |
log.error("Exception processing " + projectRef + "\n" + e.getMessage()); |
|
274 |
} |
|
275 |
} |
|
276 |
|
|
277 |
private int projectsUpdate(final String attr) throws CollectorServiceException { |
|
278 |
try { |
|
279 |
final int index = vn1.getAttrVal(attr); |
|
280 |
if (index != -1) { |
|
281 |
final String d = vn1.toNormalizedString(index); |
|
282 |
final DateTime recordDate = DateTime.parse(d.substring(0, d.indexOf("T")), simpleDateTimeFormatter); |
|
283 |
// updated or created after the last time it was collected |
|
284 |
if (recordDate.isAfter(fromDate)) { |
|
285 |
log.debug("New project to collect"); |
|
286 |
return index; |
|
287 |
} |
|
288 |
return -1; |
|
289 |
} |
|
290 |
return index; |
|
291 |
} catch (final Throwable e) { |
|
292 |
throw new CollectorServiceException(e); |
|
293 |
} |
|
294 |
} |
|
295 |
|
|
296 |
private String collectProject() throws CollectorServiceException { |
|
297 |
try { |
|
298 |
final int p = vn1.getAttrVal("href"); |
|
299 |
final String projectHref = vn1.toNormalizedString(p); |
|
300 |
log.debug("Collecting project at " + projectHref); |
|
301 |
final Gtr2Helper gtr2Helper = new Gtr2Helper(); |
|
302 |
return gtr2Helper.processProject(vn1, namespaces, projectHref); |
|
303 |
} catch (final Throwable e) { |
|
304 |
throw new CollectorServiceException(e); |
|
305 |
} |
|
306 |
} |
|
307 |
|
|
308 |
private boolean add(final String attr) throws CollectorServiceException { |
|
309 |
return projectsUpdate(attr) != -1; |
|
310 |
} |
|
311 |
|
|
312 |
public void execute() { |
|
313 |
try { |
|
314 |
if (!incremental || incremental && (add("created") || add("updated"))) { |
|
315 |
projects.put(collectProject()); |
|
316 |
} |
|
317 |
} catch (final Throwable e) { |
|
318 |
log.error("Error on ParseProject " + e.getMessage()); |
|
319 |
throw new CollectorServiceRuntimeException(e); |
|
320 |
} |
|
321 |
} |
|
322 |
|
|
323 |
} |
|
324 |
|
|
325 |
} |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/projects/gtr2/Gtr2CollectorPlugin.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.projects.gtr2; |
|
2 |
|
|
3 |
import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin; |
|
4 |
import eu.dnetlib.data.collector.rmi.CollectorServiceException; |
|
5 |
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor; |
|
6 |
|
|
7 |
/** |
|
8 |
* Plugin to collect metadata record about projects and fundings via the UKRI grt2 API. |
|
9 |
* <p> |
|
10 |
* Documentation : http://gtr.ukri.org/resources/api.html. |
|
11 |
* </p> |
|
12 |
* <p> |
|
13 |
* BaseURL: https://gtr.ukri.org/gtr/api The results returned by the API are XMLs. |
|
14 |
* </p> |
|
15 |
* <p> |
|
16 |
* Pagination: TO BE DEFINED. Exceeding the number of pages available will result in a HTTP response code of 404 |
|
17 |
* </p> |
|
18 |
* |
|
19 |
* @author alessia |
|
20 |
*/ |
|
21 |
public class Gtr2CollectorPlugin extends AbstractCollectorPlugin { |
|
22 |
|
|
23 |
@Override |
|
24 |
public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate) |
|
25 |
throws CollectorServiceException { |
|
26 |
if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new CollectorServiceException("Invalid date (YYYY-MM-DD): " + fromDate); } |
|
27 |
|
|
28 |
return () -> { |
|
29 |
|
|
30 |
try { |
|
31 |
return new Gtr2ProjectsIterator(interfaceDescriptor.getBaseUrl(), fromDate, |
|
32 |
interfaceDescriptor.getParams().get("startPage"), |
|
33 |
interfaceDescriptor.getParams().get("endPage")); |
|
34 |
} catch (final CollectorServiceException e) { |
|
35 |
throw new RuntimeException(e); |
|
36 |
} |
|
37 |
}; |
|
38 |
} |
|
39 |
|
|
40 |
} |
modules/dnet-collector-plugins/trunk/src/test/java/eu/dnetlib/data/collector/plugins/projects/gtr2/VTDXMLTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.projects.gtr2; |
|
2 |
|
|
3 |
import java.io.BufferedWriter; |
|
4 |
import java.io.ByteArrayOutputStream; |
|
5 |
import java.io.FileWriter; |
|
6 |
import java.io.PrintWriter; |
|
7 |
|
|
8 |
import com.ximpleware.AutoPilot; |
|
9 |
import com.ximpleware.VTDGen; |
|
10 |
import com.ximpleware.VTDNav; |
|
11 |
import org.apache.commons.lang3.StringUtils; |
|
12 |
import org.junit.Ignore; |
|
13 |
import org.junit.Test; |
|
14 |
@Ignore |
|
15 |
public class VTDXMLTest { |
|
16 |
|
|
17 |
private VTDGen vg; |
|
18 |
private VTDNav vn; |
|
19 |
private AutoPilot ap; |
|
20 |
|
|
21 |
private VTDGen vg_tmp; |
|
22 |
private VTDNav vn_tmp; |
|
23 |
private AutoPilot ap_tmp; |
|
24 |
|
|
25 |
private PrintWriter writer; |
|
26 |
//TODO: use resource and not full path |
|
27 |
private String inputFilePath = |
|
28 |
"/Users/alessia/workspace/dnet/dnet-collector-plugins/src/test/resources/eu.dnetlib.data.collector.plugins.projects.gtr2/projects.xml"; |
|
29 |
|
|
30 |
@Test |
|
31 |
public void test() throws Exception { |
|
32 |
vg = new VTDGen(); |
|
33 |
vg.parseFile(inputFilePath, false); |
|
34 |
vn = vg.getNav(); |
|
35 |
ap = new AutoPilot(vn); |
|
36 |
String ns = ""; |
|
37 |
ap.selectXPath(".//projects"); |
|
38 |
ap.evalXPath(); |
|
39 |
ns += "xmlns:ns1=\"" + vn.toNormalizedString(vn.getAttrVal("ns1")) + "\" "; |
|
40 |
ns += "xmlns:ns2=\"" + vn.toNormalizedString(vn.getAttrVal("ns2")) + "\" "; |
|
41 |
ns += "xmlns:ns3=\"" + vn.toNormalizedString(vn.getAttrVal("ns3")) + "\" "; |
|
42 |
ns += "xmlns:ns4=\"" + vn.toNormalizedString(vn.getAttrVal("ns4")) + "\" "; |
|
43 |
ns += "xmlns:ns5=\"" + vn.toNormalizedString(vn.getAttrVal("ns5")) + "\" "; |
|
44 |
ns += "xmlns:ns6=\"" + vn.toNormalizedString(vn.getAttrVal("ns6")) + "\" "; |
|
45 |
|
|
46 |
ap.selectXPath("//project"); |
|
47 |
int res = -1; |
|
48 |
ByteArrayOutputStream b = new ByteArrayOutputStream(); |
|
49 |
int i = 0; |
|
50 |
while ((res = ap.evalXPath()) != -1) { |
|
51 |
writer = new PrintWriter(new BufferedWriter(new FileWriter("projectPackage_"+(++i)+".xml"))); |
|
52 |
System.out.println(res); |
|
53 |
writer.println("<doc " + ns + ">"); |
|
54 |
writeFragment(vn); |
|
55 |
VTDNav clone = vn.cloneNav(); |
|
56 |
AutoPilot ap2 = new AutoPilot(clone); |
|
57 |
ap2.selectXPath(".//link[@rel='FUND']"); |
|
58 |
vg_tmp = new VTDGen(); |
|
59 |
|
|
60 |
while (ap2.evalXPath() != -1) { |
|
61 |
//String fund = clone.toNormalizedString(clone.getAttrVal("href")); |
|
62 |
evalXpath(clone.toNormalizedString(clone.getAttrVal("href")), ".//link[@rel='FUNDER']"); |
|
63 |
String funder = vn_tmp.toNormalizedString(vn_tmp.getAttrVal("href")); |
|
64 |
vn_tmp.toElement(VTDNav.ROOT); |
|
65 |
writeFragment(vn_tmp); |
|
66 |
writeNewTagAndInfo(funder, "//name", "<funder> <name>", "</name></funder>", null); |
|
67 |
} |
|
68 |
ap2.resetXPath(); |
|
69 |
ap2.selectXPath(".//link[@rel='LEAD_ORG']"); |
|
70 |
while (ap2.evalXPath() != -1) { |
|
71 |
writeNewTagAndInfo(clone.toNormalizedString(clone.getAttrVal("href")), "//name", "<lead-org><name>", "</name>", null); |
|
72 |
writeNewTagAndInfo(clone.toNormalizedString(clone.getAttrVal("href")), ".", "<id>", "</id></lead-org>", "id"); |
|
73 |
} |
|
74 |
ap2.resetXPath(); |
|
75 |
ap2.selectXPath(".//link[@rel='PP_ORG']"); |
|
76 |
while (ap2.evalXPath() != -1) { |
|
77 |
writeNewTagAndInfo(clone.toNormalizedString(clone.getAttrVal("href")), "//name", "<pp-org><name>", "</name></pp-org>", null); |
|
78 |
writeNewTagAndInfo(clone.toNormalizedString(clone.getAttrVal("href")), ".", "<id>", "</id></lead-org>", "id"); |
|
79 |
} |
|
80 |
ap2.resetXPath(); |
|
81 |
|
|
82 |
ap2.selectXPath(".//link[@rel='PI_PER']"); |
|
83 |
while (ap2.evalXPath() != -1) { |
|
84 |
setNavigator(clone.toNormalizedString(clone.getAttrVal("href"))); |
|
85 |
vn_tmp.toElement(VTDNav.ROOT); |
|
86 |
writeFragment(vn_tmp); |
|
87 |
} |
|
88 |
writer.println("</doc>"); |
|
89 |
writer.close(); |
|
90 |
} |
|
91 |
|
|
92 |
} |
|
93 |
|
|
94 |
private void setNavigator(String httpUrl) { |
|
95 |
vg_tmp.clear(); |
|
96 |
vg_tmp.parseHttpUrl(httpUrl, false); |
|
97 |
vn_tmp = vg_tmp.getNav(); |
|
98 |
} |
|
99 |
|
|
100 |
private int evalXpath(String httpUrl, String xPath) throws Exception { |
|
101 |
setNavigator(httpUrl); |
|
102 |
ap_tmp = new AutoPilot(vn_tmp); |
|
103 |
ap_tmp.selectXPath(xPath); |
|
104 |
return ap_tmp.evalXPath(); |
|
105 |
} |
|
106 |
|
|
107 |
private void writeFragment(VTDNav nav) throws Exception { |
|
108 |
ByteArrayOutputStream b = new ByteArrayOutputStream(); |
|
109 |
nav.dumpFragment(b); |
|
110 |
writer.println(b); |
|
111 |
b.reset(); |
|
112 |
} |
|
113 |
|
|
114 |
private void writeNewTagAndInfo(String search, String xPath, String xmlOpenTag, String xmlCloseTag, String attrName) throws Exception { |
|
115 |
int nav_res = evalXpath(search, xPath); |
|
116 |
if (nav_res != -1) { |
|
117 |
writer.println(xmlOpenTag); |
|
118 |
if(StringUtils.isNotBlank(attrName)) writer.println(vn_tmp.toNormalizedString(vn_tmp.getAttrVal(attrName))); |
|
119 |
else |
|
120 |
writer.println(vn_tmp.toNormalizedString(vn_tmp.getText())); |
|
121 |
writer.println(xmlCloseTag); |
|
122 |
} |
|
123 |
} |
|
124 |
|
|
125 |
} |
modules/dnet-collector-plugins/trunk/src/test/java/eu/dnetlib/data/collector/plugins/projects/gtr2/Gtr2Test.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.projects.gtr2; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertEquals; |
|
4 |
import static org.junit.Assert.assertNotNull; |
|
5 |
|
|
6 |
import java.util.Iterator; |
|
7 |
|
|
8 |
import org.junit.Before; |
|
9 |
import org.junit.Ignore; |
|
10 |
import org.junit.Test; |
|
11 |
|
|
12 |
import com.ximpleware.VTDGen; |
|
13 |
|
|
14 |
import eu.dnetlib.data.collector.plugins.HttpConnector; |
|
15 |
import eu.dnetlib.miscutils.functional.xml.TryIndentXmlString; |
|
16 |
|
|
17 |
@Ignore |
|
18 |
public class Gtr2Test { |
|
19 |
|
|
20 |
private final String baseURL = "https://gtr.ukri.org/gtr/api"; |
|
21 |
private Gtr2Helper helper; |
|
22 |
private Gtr2ProjectsIterator iterator; |
|
23 |
private HttpConnector connector; |
|
24 |
|
|
25 |
@Before |
|
26 |
public void prepare() { |
|
27 |
helper = new Gtr2Helper(); |
|
28 |
// System.setProperty("jsse.enableSNIExtension","false"); |
|
29 |
} |
|
30 |
|
|
31 |
@Test |
|
32 |
public void testOne() throws Exception { |
|
33 |
System.out.println("one project"); |
|
34 |
final String url = "http://gtr.ukri.org/gtr/api/projects/0AE039A7-9A84-4943-AA36-001DB5763245"; |
|
35 |
final VTDGen vg_tmp = new VTDGen(); |
|
36 |
connector = new HttpConnector(); |
|
37 |
final String tmp = connector.getInputSource(url); |
|
38 |
final byte[] bytes = tmp.getBytes("UTF-8"); |
|
39 |
vg_tmp.setDoc(bytes); |
|
40 |
vg_tmp.parse(false); |
|
41 |
final String s = helper.processProject(vg_tmp.getNav(), "xmlns:ns=\"http:///afgshs\"", url); |
|
42 |
System.out.println(s); |
|
43 |
} |
|
44 |
|
|
45 |
@Test |
|
46 |
public void testPaging() throws Exception { |
|
47 |
iterator = new Gtr2ProjectsIterator(baseURL, null, 2, 2); |
|
48 |
final TryIndentXmlString indenter = new TryIndentXmlString(); |
|
49 |
|
|
50 |
while (iterator.hasNext()) { |
|
51 |
Thread.sleep(300); |
|
52 |
final String res = iterator.next(); |
|
53 |
assertNotNull(res); |
|
54 |
indenter.evaluate(res); |
|
55 |
System.out.println(res); |
|
56 |
} |
|
57 |
} |
|
58 |
|
|
59 |
@Test |
|
60 |
public void testOnePage() throws Exception { |
|
61 |
iterator = new Gtr2ProjectsIterator(baseURL, null, 12, 12); |
|
62 |
final int count = iterateAndCount(iterator); |
|
63 |
assertEquals(21, count); |
|
64 |
} |
|
65 |
|
|
66 |
@Test |
|
67 |
public void testIncrementalHarvestingNoRecords() throws Exception { |
|
68 |
System.out.println("incremental Harvesting"); |
|
69 |
iterator = new Gtr2ProjectsIterator(baseURL, "2050-12-12", 11, 13); |
|
70 |
final int count = iterateAndCount(iterator); |
|
71 |
assertEquals(1, count); |
|
72 |
} |
|
73 |
|
|
74 |
@Test |
|
75 |
public void testIncrementalHarvesting() throws Exception { |
|
76 |
System.out.println("incremental Harvesting"); |
|
77 |
iterator = new Gtr2ProjectsIterator(baseURL, "2016-11-30", 11, 11); |
|
78 |
final int count = iterateAndCount(iterator); |
|
79 |
assertEquals(21, count); |
|
80 |
} |
|
81 |
|
|
82 |
@Test |
|
83 |
@Ignore |
|
84 |
public void testCompleteHarvesting() throws Exception { |
|
85 |
System.out.println("testing complete harvesting"); |
|
86 |
iterator = new Gtr2ProjectsIterator(baseURL, null); |
|
87 |
// TryIndentXmlString indenter = new TryIndentXmlString(); |
|
88 |
// it.setEndAtPage(3); |
|
89 |
|
|
90 |
while (iterator.hasNext()) { |
|
91 |
final String res = iterator.next(); |
|
92 |
assertNotNull(res); |
|
93 |
// System.out.println(res); |
|
94 |
// Scanner keyboard = new Scanner(System.in); |
|
95 |
// System.out.println("press enter for next record"); |
|
96 |
// keyboard.nextLine(); |
|
97 |
|
|
98 |
} |
|
99 |
} |
|
100 |
|
|
101 |
private int iterateAndCount(final Iterator<String> iterator) throws Exception { |
|
102 |
int i = 0; |
|
103 |
while (iterator.hasNext()) { |
|
104 |
assertNotNull(iterator.next()); |
|
105 |
i++; |
|
106 |
} |
|
107 |
System.out.println("Got " + i + " projects"); |
|
108 |
return i; |
|
109 |
} |
|
110 |
} |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/gtr2/AbstractGtr2CollectorPlugin.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.gtr2; |
|
2 |
|
|
3 |
import java.util.Iterator; |
|
4 |
|
|
5 |
import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin; |
|
6 |
import eu.dnetlib.data.collector.rmi.CollectorServiceException; |
|
7 |
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor; |
|
8 |
|
|
9 |
public abstract class AbstractGtr2CollectorPlugin extends AbstractCollectorPlugin { |
|
10 |
|
|
11 |
@Override |
|
12 |
public final Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate) |
|
13 |
throws CollectorServiceException { |
|
14 |
|
|
15 |
if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new CollectorServiceException("Invalid date (YYYY-MM-DD): " + fromDate); } |
|
16 |
|
|
17 |
final String baseUrl = interfaceDescriptor.getBaseUrl(); |
|
18 |
final String startPage = interfaceDescriptor.getParams().get("startPage"); |
|
19 |
final String endPage = interfaceDescriptor.getParams().get("endPage"); |
|
20 |
|
|
21 |
return () -> { |
|
22 |
try { |
|
23 |
return createIterator(baseUrl, fromDate, startPage, endPage); |
|
24 |
} catch (final CollectorServiceException e) { |
|
25 |
throw new RuntimeException(e); |
|
26 |
} |
|
27 |
}; |
|
28 |
} |
|
29 |
|
|
30 |
protected abstract Iterator<String> createIterator(String baseUrl, final String fromDate, String startPage, String endPage) |
|
31 |
throws CollectorServiceException; |
|
32 |
|
|
33 |
} |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/gtr2/Gtr2Iterator.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.gtr2; |
|
2 |
|
|
3 |
import java.util.ArrayList; |
|
4 |
import java.util.HashMap; |
|
5 |
import java.util.Iterator; |
|
6 |
import java.util.LinkedList; |
|
7 |
import java.util.List; |
|
8 |
import java.util.Map; |
|
9 |
import java.util.Queue; |
|
10 |
import java.util.function.Function; |
|
11 |
|
|
12 |
import org.apache.commons.lang.math.NumberUtils; |
|
13 |
import org.apache.commons.lang3.StringUtils; |
|
14 |
import org.apache.commons.logging.Log; |
|
15 |
import org.apache.commons.logging.LogFactory; |
|
16 |
import org.dom4j.Document; |
|
17 |
import org.dom4j.DocumentException; |
|
18 |
import org.dom4j.DocumentHelper; |
|
19 |
import org.dom4j.Element; |
|
20 |
import org.joda.time.DateTime; |
|
21 |
|
|
22 |
import eu.dnetlib.data.collector.rmi.CollectorServiceException; |
|
23 |
import eu.dnetlib.data.collector.rmi.CollectorServiceRuntimeException; |
|
24 |
|
|
25 |
public abstract class Gtr2Iterator implements Iterator<String> { |
|
26 |
|
|
27 |
public static final int PAGE_SIZE = 20; |
|
28 |
|
|
29 |
private static final Log log = LogFactory.getLog(Gtr2Iterator.class); |
|
30 |
|
|
31 |
private final String baseUrl; |
|
32 |
private int currPage; |
|
33 |
private int endPage; |
|
34 |
private boolean incremental = false; |
|
35 |
private DateTime fromDate; |
|
36 |
|
|
37 |
private final Map<String, String> cache = new HashMap<>(); |
|
38 |
|
|
39 |
private final Queue<String> queue = new LinkedList<>(); |
|
40 |
|
|
41 |
private String nextElement; |
|
42 |
|
|
43 |
public Gtr2Iterator(final String baseUrl, final String fromDate, final String startPage, final String endPage) |
|
44 |
throws CollectorServiceException { |
|
45 |
|
|
46 |
this.baseUrl = baseUrl; |
|
47 |
this.currPage = NumberUtils.toInt(startPage, 1); |
|
48 |
this.endPage = NumberUtils.toInt(endPage, Integer.MAX_VALUE); |
|
49 |
this.incremental = StringUtils.isNotBlank(fromDate); |
|
50 |
|
|
51 |
if (this.incremental) { |
|
52 |
this.fromDate = Gtr2Helper.parseDate(fromDate); |
|
53 |
} |
|
54 |
|
|
55 |
prepareNextElement(); |
|
56 |
} |
|
57 |
|
|
58 |
@Override |
|
59 |
public boolean hasNext() { |
|
60 |
return nextElement != null; |
|
61 |
} |
|
62 |
|
|
63 |
@Override |
|
64 |
public String next() { |
|
65 |
try { |
|
66 |
return nextElement; |
|
67 |
} finally { |
|
68 |
prepareNextElement(); |
|
69 |
} |
|
70 |
} |
|
71 |
|
|
72 |
@Override |
|
73 |
public void remove() { |
|
74 |
throw new UnsupportedOperationException(); |
|
75 |
} |
|
76 |
|
|
77 |
private void prepareNextElement() { |
|
78 |
while (this.currPage <= this.endPage && queue.isEmpty()) { |
|
79 |
log.debug("FETCHING PAGE + " + currPage + "/" + endPage); |
|
80 |
this.queue.addAll(fetchPage(currPage++)); |
|
81 |
} |
|
82 |
this.nextElement = this.queue.poll(); |
|
83 |
} |
|
84 |
|
|
85 |
private List<String> fetchPage(final int pageNumber) { |
|
86 |
|
|
87 |
final List<String> res = new ArrayList<>(); |
|
88 |
try { |
|
89 |
final Document doc = Gtr2Helper.loadURL(urlForPage(baseUrl, pageNumber)); |
|
90 |
|
|
91 |
if (endPage == Integer.MAX_VALUE) { |
|
92 |
endPage = NumberUtils.toInt(doc.valueOf("/*/@*[local-name() = 'totalPages']")); |
|
93 |
} |
|
94 |
|
|
95 |
for (final Object po : doc.selectNodes(xpathForEntity())) { |
|
96 |
final Element mainEntity = (Element) ((Element) po).detach(); |
|
97 |
|
|
98 |
if (filterIncremental(mainEntity)) { |
|
99 |
res.add(expandMainEntity(mainEntity)); |
|
100 |
} else { |
|
101 |
log.debug("Skipped entity"); |
|
102 |
} |
|
103 |
|
|
104 |
} |
|
105 |
} catch (final Throwable e) { |
|
106 |
log.error("Exception fetching page " + pageNumber, e); |
|
107 |
throw new CollectorServiceRuntimeException("Exception fetching page " + pageNumber, e); |
|
108 |
} |
|
109 |
|
|
110 |
return res; |
|
111 |
} |
|
112 |
|
|
113 |
protected void addLinkedEntities(final Element master, final String relType, final Element newRoot, final Function<Document, Element> mapper) { |
|
114 |
|
|
115 |
for (final Object o : master.selectNodes(".//*[local-name()='link']")) { |
|
116 |
final String rel = ((Element) o).valueOf("@*[local-name()='rel']"); |
|
117 |
final String href = ((Element) o).valueOf("@*[local-name()='href']"); |
|
118 |
|
|
119 |
if (relType.equals(rel) && StringUtils.isNotBlank(href)) { |
|
120 |
final String cacheKey = relType + "#" + href; |
|
121 |
if (cache.containsKey(cacheKey)) { |
|
122 |
try { |
|
123 |
log.debug(" * from cache (" + relType + "): " + href); |
|
124 |
newRoot.add(DocumentHelper.parseText(cache.get(cacheKey)).getRootElement()); |
|
125 |
} catch (final DocumentException e) { |
|
126 |
log.error("Error retrieving cache element: " + cacheKey, e); |
|
127 |
throw new CollectorServiceRuntimeException("Error retrieving cache element: " + cacheKey, e); |
|
128 |
} |
|
129 |
} else { |
|
130 |
final Document doc = Gtr2Helper.loadURL(href); |
|
131 |
final Element elem = mapper.apply(doc); |
|
132 |
newRoot.add(elem); |
|
133 |
cache.put(cacheKey, elem.asXML()); |
|
134 |
} |
|
135 |
|
|
136 |
} |
|
137 |
} |
|
138 |
} |
|
139 |
|
|
140 |
private boolean filterIncremental(final Element e) { |
|
141 |
if (!incremental) { |
|
142 |
return true; |
|
143 |
} else if (Gtr2Helper.isAfter(e.valueOf("@*[local-name() = 'created']"), fromDate)) { |
|
144 |
return true; |
|
145 |
} else if (Gtr2Helper.isAfter(e.valueOf("@*[local-name() = 'updated']"), fromDate)) { |
|
146 |
return true; |
|
147 |
} else { |
|
148 |
return false; |
|
149 |
} |
|
150 |
} |
|
151 |
|
|
152 |
abstract protected String expandMainEntity(final Element mainEntity); |
|
153 |
|
|
154 |
abstract protected String urlForPage(final String baseUrl, final int pageNumber); |
|
155 |
|
|
156 |
abstract protected String xpathForEntity(); |
|
157 |
} |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/gtr2/Gtr2ProjectsCollectorPlugin.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.gtr2; |
|
2 |
|
|
3 |
import java.util.Iterator; |
|
4 |
|
|
5 |
import org.dom4j.Document; |
|
6 |
import org.dom4j.DocumentHelper; |
|
7 |
import org.dom4j.Element; |
|
8 |
|
|
9 |
import eu.dnetlib.data.collector.rmi.CollectorServiceException; |
|
10 |
|
|
11 |
/** |
|
12 |
* Plugin to collect metadata record about projects and fundings via the UKRI grt2 API. |
|
13 |
* <p> |
|
14 |
* Documentation : http://gtr.ukri.org/resources/api.html. |
|
15 |
* </p> |
|
16 |
* <p> |
|
17 |
* BaseURL: https://gtr.ukri.org/gtr/api The results returned by the API are XMLs. |
|
18 |
* </p> |
|
19 |
* <p> |
|
20 |
* Pagination: TO BE DEFINED. Exceeding the number of pages available will result in a HTTP response code of 404 |
|
21 |
* </p> |
|
22 |
* |
|
23 |
* @author alessia |
|
24 |
*/ |
|
25 |
public class Gtr2ProjectsCollectorPlugin extends AbstractGtr2CollectorPlugin { |
|
26 |
|
|
27 |
@Override |
|
28 |
protected Iterator<String> createIterator(final String baseUrl, final String fromDate, final String startPage, final String endPage) |
|
29 |
throws CollectorServiceException { |
|
30 |
|
|
31 |
return new Gtr2Iterator(baseUrl, fromDate, startPage, endPage) { |
|
32 |
|
|
33 |
@Override |
|
34 |
protected String urlForPage(final String baseUrl, final int pageNumber) { |
|
35 |
return baseUrl + "/projects?p=" + pageNumber; |
|
36 |
} |
|
37 |
|
|
38 |
@Override |
|
39 |
protected String xpathForEntity() { |
|
40 |
return "//*[local-name() = 'project']"; |
|
41 |
} |
|
42 |
|
|
43 |
@Override |
|
44 |
protected String expandMainEntity(final Element mainEntity) { |
|
45 |
|
|
46 |
final Element newRoot = DocumentHelper.createElement("doc"); |
|
47 |
|
|
48 |
newRoot.add(mainEntity); |
|
49 |
|
|
50 |
addLinkedEntities(mainEntity, "LEAD_ORG", newRoot, o -> asOrgElement("ld-org", o)); |
|
51 |
addLinkedEntities(mainEntity, "PP_ORG", newRoot, o -> asOrgElement("pp-org", o)); |
|
52 |
addLinkedEntities(mainEntity, "PI_PER", newRoot, o -> asPersonElement("pi-per", o)); |
|
53 |
|
|
54 |
return DocumentHelper.createDocument(newRoot).asXML(); |
|
55 |
} |
|
56 |
|
|
57 |
private Element asOrgElement(final String nodeName, final Document doc) { |
|
58 |
final Element newOrg = DocumentHelper.createElement(nodeName); |
|
59 |
newOrg.addElement("id").setText(doc.valueOf("/*/@*[local-name()='id']")); |
|
60 |
newOrg.addElement("name").setText(doc.valueOf("//*[local-name()='name']")); |
|
61 |
newOrg.addElement("country").setText(doc.valueOf("//*[local-name()='country']")); |
|
62 |
return newOrg; |
|
63 |
} |
|
64 |
|
|
65 |
private Element asPersonElement(final String nodeName, final Document doc) { |
|
66 |
final Element newPers = DocumentHelper.createElement(nodeName); |
|
67 |
newPers.addElement("id").setText(doc.valueOf("/*/@*[local-name()='id']")); |
|
68 |
newPers.addElement("firstName").setText(doc.valueOf("//*[local-name()='firstName']")); |
|
69 |
newPers.addElement("otherNames").setText(doc.valueOf("//*[local-name()='otherNames']")); |
|
70 |
newPers.addElement("surname").setText(doc.valueOf("//*[local-name()='surname']")); |
|
71 |
return newPers; |
|
72 |
} |
|
73 |
}; |
|
74 |
} |
|
75 |
|
|
76 |
} |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/gtr2/Gtr2PublicationsCollectorPlugin.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.gtr2; |
|
2 |
|
|
3 |
import java.util.Iterator; |
|
4 |
|
|
5 |
import org.dom4j.Document; |
|
6 |
import org.dom4j.DocumentHelper; |
|
7 |
import org.dom4j.Element; |
|
8 |
|
|
9 |
import eu.dnetlib.data.collector.rmi.CollectorServiceException; |
|
10 |
|
|
11 |
/** |
|
12 |
* Plugin to collect metadata record about publications via the UKRI grt2 API. |
|
13 |
* <p> |
|
14 |
* Documentation : http://gtr.ukri.org/resources/api.html. |
|
15 |
* </p> |
|
16 |
* <p> |
|
17 |
* BaseURL: https://gtr.ukri.org/gtr/api The results returned by the API are XMLs. |
|
18 |
* </p> |
|
19 |
* <p> |
|
20 |
* Pagination: TO BE DEFINED. Exceeding the number of pages available will result in a HTTP response code of 404 |
|
21 |
* </p> |
|
22 |
* |
|
23 |
* @author alessia |
|
24 |
*/ |
|
25 |
public class Gtr2PublicationsCollectorPlugin extends AbstractGtr2CollectorPlugin { |
|
26 |
|
|
27 |
@Override |
|
28 |
protected Iterator<String> createIterator(final String baseUrl, final String fromDate, final String startPage, final String endPage) |
|
29 |
throws CollectorServiceException { |
|
30 |
return new Gtr2Iterator(baseUrl, fromDate, startPage, endPage) { |
|
31 |
|
|
32 |
@Override |
|
33 |
protected String urlForPage(final String baseUrl, final int pageNumber) { |
|
34 |
return baseUrl + "/outcomes/publications?p=" + pageNumber; |
|
35 |
} |
|
36 |
|
|
37 |
@Override |
|
38 |
protected String xpathForEntity() { |
|
39 |
return "//*[local-name() = 'publication']"; |
|
40 |
} |
|
41 |
|
|
42 |
@Override |
|
43 |
protected String expandMainEntity(final Element mainEntity) { |
|
44 |
final Element newRoot = DocumentHelper.createElement("doc"); |
|
45 |
newRoot.add(mainEntity); |
|
46 |
addLinkedEntities(mainEntity, "PROJECT", newRoot, o -> asProjectElement(o)); |
|
47 |
return DocumentHelper.createDocument(newRoot).asXML(); |
|
48 |
} |
|
49 |
|
|
50 |
private Element asProjectElement(final Document doc) { |
|
51 |
final Element newOrg = DocumentHelper.createElement("project"); |
|
52 |
newOrg.addElement("id").setText(doc.valueOf("/*/@*[local-name()='id']")); |
|
53 |
newOrg.addElement("code").setText(doc.valueOf("//*[local-name()='identifier' and @*[local-name()='type'] = 'RCUK']")); |
|
54 |
newOrg.addElement("title").setText(doc.valueOf("//*[local-name()='title']")); |
|
55 |
return newOrg; |
|
56 |
} |
|
57 |
|
|
58 |
}; |
|
59 |
} |
|
60 |
|
|
61 |
} |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/gtr2/Gtr2Helper.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.gtr2; |
|
2 |
|
|
3 |
import org.apache.commons.logging.Log; |
|
4 |
import org.apache.commons.logging.LogFactory; |
|
5 |
import org.dom4j.Document; |
|
6 |
import org.dom4j.DocumentHelper; |
|
7 |
import org.joda.time.DateTime; |
|
8 |
import org.joda.time.format.DateTimeFormat; |
|
9 |
import org.joda.time.format.DateTimeFormatter; |
|
10 |
|
|
11 |
import eu.dnetlib.data.collector.plugins.HttpConnector; |
|
12 |
import eu.dnetlib.data.collector.rmi.CollectorServiceRuntimeException; |
|
13 |
|
|
14 |
public class Gtr2Helper { |
|
15 |
|
|
16 |
private static final Log log = LogFactory.getLog(Gtr2Helper.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
17 |
|
|
18 |
private static final HttpConnector connector = new HttpConnector(); |
|
19 |
private static final DateTimeFormatter simpleDateTimeFormatter = DateTimeFormat.forPattern("yyyy-MM-dd"); |
|
20 |
|
|
21 |
private static final int MAX_ATTEMPTS = 10; |
|
22 |
|
|
23 |
public static String cleanURL(final String url) { |
|
24 |
String cleaned = url; |
|
25 |
if (cleaned.contains("gtr.gtr")) { |
|
26 |
cleaned = cleaned.replace("gtr.gtr", "gtr"); |
|
27 |
} |
|
28 |
if (cleaned.startsWith("http://")) { |
|
29 |
cleaned = cleaned.replaceFirst("http://", "https://"); |
|
30 |
} |
|
31 |
return cleaned; |
|
32 |
} |
|
33 |
|
|
34 |
public static Document loadURL(final String url) { |
|
35 |
final String cleanUrl = cleanURL(url); |
|
36 |
return loadURL(cleanUrl, 0); |
|
37 |
} |
|
38 |
|
|
39 |
private static Document loadURL(final String cleanUrl, final int attempt) { |
|
40 |
try { |
|
41 |
log.debug(" * Downloading Url: " + cleanUrl); |
|
42 |
final byte[] bytes = connector.getInputSource(cleanUrl).getBytes("UTF-8"); |
|
43 |
return DocumentHelper.parseText(new String(bytes)); |
|
44 |
} catch (final Throwable e) { |
|
45 |
log.error("Error dowloading url: " + cleanUrl + ", attempt = " + attempt, e); |
|
46 |
if (attempt < MAX_ATTEMPTS) { |
|
47 |
try { |
|
48 |
Thread.sleep(60000); // I wait for a minute |
|
49 |
} catch (final InterruptedException e1) { |
|
50 |
throw new CollectorServiceRuntimeException("Error dowloading url: " + cleanUrl, e); |
|
51 |
} |
|
52 |
return loadURL(cleanUrl, attempt + 1); |
|
53 |
} else { |
|
54 |
throw new CollectorServiceRuntimeException("Error dowloading url: " + cleanUrl, e); |
|
55 |
} |
|
56 |
} |
|
57 |
} |
|
58 |
|
|
59 |
public static DateTime parseDate(final String s) { |
|
60 |
// I expect dates in the format 'yyyy-MM-dd'. See class |
|
61 |
// eu.dnetlib.msro.workflows.nodes.collect.FindDateRangeForIncrementalHarvestingJobNode |
|
62 |
return DateTime.parse(s.substring(0, s.indexOf("T")), simpleDateTimeFormatter); |
|
63 |
} |
|
64 |
|
|
65 |
public static boolean isAfter(final String d, final DateTime fromDate) { |
|
66 |
return Gtr2Helper.parseDate(d).isAfter(fromDate); |
|
67 |
} |
|
68 |
} |
modules/dnet-collector-plugins/trunk/src/main/resources/eu/dnetlib/data/collector/plugins/applicationContext-dnet-modular-collector-plugins.xml | ||
---|---|---|
114 | 114 |
</property> |
115 | 115 |
</bean> |
116 | 116 |
|
117 |
<bean id="gtr2Plugin" class="eu.dnetlib.data.collector.plugins.projects.gtr2.Gtr2CollectorPlugin">
|
|
117 |
<bean id="gtr2ProjectsPlugin" class="eu.dnetlib.data.collector.plugins.gtr2.Gtr2ProjectsCollectorPlugin">
|
|
118 | 118 |
<property name="protocolDescriptor"> |
119 | 119 |
<bean class="eu.dnetlib.data.collector.rmi.ProtocolDescriptor" p:name="gtr2Projects"> |
120 | 120 |
<property name="params"> |
... | ... | |
126 | 126 |
</bean> |
127 | 127 |
</property> |
128 | 128 |
</bean> |
129 |
|
|
130 |
<bean id="gtr2PublicationsPlugin" class="eu.dnetlib.data.collector.plugins.gtr2.Gtr2PublicationsCollectorPlugin"> |
|
131 |
<property name="protocolDescriptor"> |
|
132 |
<bean class="eu.dnetlib.data.collector.rmi.ProtocolDescriptor" p:name="gtr2Publications"> |
|
133 |
<property name="params"> |
|
134 |
<list> |
|
135 |
<bean class="eu.dnetlib.data.collector.rmi.ProtocolParameter" p:name="startPage" p:optional="true" p:type="NUMBER"/> |
|
136 |
<bean class="eu.dnetlib.data.collector.rmi.ProtocolParameter" p:name="endPage" p:optional="true" p:type="NUMBER"/> |
|
137 |
</list> |
|
138 |
</property> |
|
139 |
</bean> |
|
140 |
</property> |
|
141 |
</bean> |
|
129 | 142 |
|
130 | 143 |
<bean id="HTTPWithFileNamePlugin" class="eu.dnetlib.data.collector.plugins.httpfilename.HTTPWithFileNameCollectorPlugin"> |
131 | 144 |
<property name="protocolDescriptor"> |
modules/dnet-collector-plugins/trunk/src/test/java/eu/dnetlib/data/collector/plugins/gtr2/Gtr2ProjectsCollectorPluginTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.gtr2; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertEquals; |
|
4 |
import static org.junit.Assert.assertNotNull; |
|
5 |
|
|
6 |
import java.util.Iterator; |
|
7 |
|
|
8 |
import org.junit.Ignore; |
|
9 |
import org.junit.Test; |
|
10 |
|
|
11 |
import eu.dnetlib.miscutils.functional.xml.TryIndentXmlString; |
|
12 |
|
|
13 |
public class Gtr2ProjectsCollectorPluginTest { |
|
14 |
|
|
15 |
private static final String baseURL = "https://gtr.ukri.org/gtr/api"; |
|
16 |
|
|
17 |
private final Gtr2ProjectsCollectorPlugin plugin = new Gtr2ProjectsCollectorPlugin(); |
|
18 |
|
|
19 |
@Test |
|
20 |
@Ignore |
|
21 |
public void testOne() throws Exception { |
|
22 |
System.out.println("one project"); |
|
23 |
|
|
24 |
final Iterator<String> iterator = plugin.createIterator(baseURL, null, null, null); |
|
25 |
|
|
26 |
while (iterator.hasNext()) { |
|
27 |
final String res = iterator.next(); |
|
28 |
assertNotNull(res); |
|
29 |
System.out.println(res); |
|
30 |
return; |
|
31 |
} |
|
32 |
} |
|
33 |
|
|
34 |
@Test |
|
35 |
@Ignore |
|
36 |
public void testPaging() throws Exception { |
|
37 |
final Iterator<String> iterator = plugin.createIterator(baseURL, null, "2", "2"); |
|
38 |
|
|
39 |
final TryIndentXmlString indenter = new TryIndentXmlString(); |
|
40 |
|
|
41 |
while (iterator.hasNext()) { |
|
42 |
Thread.sleep(300); |
|
43 |
final String res = iterator.next(); |
|
44 |
assertNotNull(res); |
|
45 |
indenter.evaluate(res); |
|
46 |
System.out.println(res); |
|
47 |
} |
|
48 |
} |
|
49 |
|
|
50 |
@Test |
|
51 |
@Ignore |
|
52 |
public void testOnePage() throws Exception { |
|
53 |
final Iterator<String> iterator = plugin.createIterator(baseURL, null, "12", "12"); |
|
54 |
final int count = iterateAndCount(iterator); |
|
55 |
assertEquals(21, count); |
|
56 |
} |
|
57 |
|
|
58 |
@Test |
|
59 |
@Ignore |
|
60 |
public void testIncrementalHarvestingNoRecords() throws Exception { |
|
61 |
System.out.println("incremental Harvesting"); |
|
62 |
final Iterator<String> iterator = plugin.createIterator(baseURL, "2050-12-12", "11", "13"); |
|
63 |
final int count = iterateAndCount(iterator); |
|
64 |
assertEquals(1, count); |
|
65 |
} |
|
66 |
|
|
67 |
@Test |
|
68 |
@Ignore |
|
69 |
public void testIncrementalHarvesting() throws Exception { |
|
70 |
System.out.println("incremental Harvesting"); |
|
71 |
final Iterator<String> iterator = plugin.createIterator(baseURL, "2016-11-30", "11", "11"); |
|
72 |
final int count = iterateAndCount(iterator); |
|
73 |
assertEquals(21, count); |
|
74 |
} |
|
75 |
|
|
76 |
@Test |
|
77 |
@Ignore |
|
78 |
public void testCompleteHarvesting() throws Exception { |
|
79 |
System.out.println("testing complete harvesting"); |
|
80 |
final Iterator<String> iterator = plugin.createIterator(baseURL, null, null, null); |
|
81 |
// TryIndentXmlString indenter = new TryIndentXmlString(); |
|
82 |
// it.setEndAtPage(3); |
|
83 |
|
|
84 |
while (iterator.hasNext()) { |
|
85 |
final String res = iterator.next(); |
|
86 |
assertNotNull(res); |
|
87 |
// System.out.println(res); |
|
88 |
// Scanner keyboard = new Scanner(System.in); |
|
89 |
// System.out.println("press enter for next record"); |
|
90 |
// keyboard.nextLine(); |
|
91 |
|
|
92 |
} |
|
93 |
} |
|
94 |
|
|
95 |
private int iterateAndCount(final Iterator<String> iterator) throws Exception { |
|
96 |
int i = 0; |
|
97 |
while (iterator.hasNext()) { |
|
98 |
assertNotNull(iterator.next()); |
|
99 |
i++; |
|
100 |
} |
|
101 |
System.out.println("Got " + i + " projects"); |
|
102 |
return i; |
|
103 |
} |
|
104 |
} |
modules/dnet-collector-plugins/trunk/src/test/java/eu/dnetlib/data/collector/plugins/gtr2/Gtr2PublicationsCollectorPluginTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.gtr2; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertEquals; |
|
4 |
import static org.junit.Assert.assertNotNull; |
|
5 |
|
|
6 |
import java.util.Iterator; |
|
7 |
|
|
8 |
import org.junit.Ignore; |
|
9 |
import org.junit.Test; |
|
10 |
|
|
11 |
import eu.dnetlib.miscutils.functional.xml.TryIndentXmlString; |
|
12 |
|
|
13 |
public class Gtr2PublicationsCollectorPluginTest { |
|
14 |
|
|
15 |
private static final String baseURL = "https://gtr.ukri.org/gtr/api"; |
|
16 |
|
|
17 |
private final Gtr2PublicationsCollectorPlugin plugin = new Gtr2PublicationsCollectorPlugin(); |
|
18 |
|
|
19 |
@Test |
|
20 |
@Ignore |
|
21 |
public void testOne() throws Exception { |
|
22 |
System.out.println("one publication"); |
|
23 |
|
|
24 |
final Iterator<String> iterator = plugin.createIterator(baseURL, null, null, null); |
|
25 |
|
|
26 |
while (iterator.hasNext()) { |
|
27 |
final String res = iterator.next(); |
|
28 |
assertNotNull(res); |
|
29 |
System.out.println(res); |
|
30 |
return; |
|
31 |
} |
|
32 |
} |
|
33 |
|
|
34 |
@Test |
|
35 |
@Ignore |
|
36 |
public void testPaging() throws Exception { |
|
37 |
final Iterator<String> iterator = plugin.createIterator(baseURL, null, "2", "2"); |
|
38 |
|
|
39 |
final TryIndentXmlString indenter = new TryIndentXmlString(); |
|
40 |
|
|
41 |
while (iterator.hasNext()) { |
|
42 |
Thread.sleep(300); |
|
43 |
final String res = iterator.next(); |
|
44 |
assertNotNull(res); |
|
45 |
indenter.evaluate(res); |
|
46 |
System.out.println(res); |
|
47 |
} |
|
48 |
} |
|
49 |
|
|
50 |
@Test |
|
51 |
@Ignore |
|
52 |
public void testOnePage() throws Exception { |
|
53 |
final Iterator<String> iterator = plugin.createIterator(baseURL, null, "12", "12"); |
|
54 |
final int count = iterateAndCount(iterator); |
|
55 |
assertEquals(21, count); |
|
56 |
} |
|
57 |
|
|
58 |
@Test |
|
59 |
@Ignore |
|
60 |
public void testIncrementalHarvestingNoRecords() throws Exception { |
|
61 |
System.out.println("incremental Harvesting"); |
|
62 |
final Iterator<String> iterator = plugin.createIterator(baseURL, "2050-12-12", "11", "13"); |
|
63 |
final int count = iterateAndCount(iterator); |
|
64 |
assertEquals(1, count); |
|
65 |
} |
|
66 |
|
|
67 |
@Test |
|
68 |
@Ignore |
|
69 |
public void testIncrementalHarvesting() throws Exception { |
|
70 |
System.out.println("incremental Harvesting"); |
|
71 |
final Iterator<String> iterator = plugin.createIterator(baseURL, "2016-11-30", "11", "11"); |
|
72 |
final int count = iterateAndCount(iterator); |
|
73 |
assertEquals(21, count); |
|
74 |
} |
|
75 |
|
|
76 |
@Test |
|
77 |
@Ignore |
|
78 |
public void testCompleteHarvesting() throws Exception { |
|
79 |
System.out.println("testing complete harvesting"); |
|
80 |
final Iterator<String> iterator = plugin.createIterator(baseURL, null, null, null); |
|
81 |
// TryIndentXmlString indenter = new TryIndentXmlString(); |
|
82 |
// it.setEndAtPage(3); |
|
83 |
|
|
84 |
while (iterator.hasNext()) { |
|
85 |
final String res = iterator.next(); |
|
86 |
assertNotNull(res); |
|
87 |
// System.out.println(res); |
|
88 |
// Scanner keyboard = new Scanner(System.in); |
|
89 |
// System.out.println("press enter for next record"); |
|
90 |
// keyboard.nextLine(); |
|
91 |
|
|
92 |
} |
|
93 |
} |
|
94 |
|
|
95 |
private int iterateAndCount(final Iterator<String> iterator) throws Exception { |
|
96 |
int i = 0; |
|
97 |
while (iterator.hasNext()) { |
|
98 |
assertNotNull(iterator.next()); |
|
99 |
i++; |
|
100 |
} |
|
101 |
System.out.println("Got " + i + " publications"); |
|
102 |
return i; |
|
103 |
} |
|
104 |
} |
modules/dnet-collector-plugins/trunk/src/test/java/eu/dnetlib/data/collector/plugins/gtr2/VTDXMLTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collector.plugins.gtr2; |
|
2 |
|
|
3 |
import java.io.BufferedWriter; |
|
4 |
import java.io.ByteArrayOutputStream; |
|
5 |
import java.io.FileWriter; |
|
6 |
import java.io.PrintWriter; |
|
7 |
|
|
8 |
import com.ximpleware.AutoPilot; |
|
9 |
import com.ximpleware.VTDGen; |
|
10 |
import com.ximpleware.VTDNav; |
|
11 |
import org.apache.commons.lang3.StringUtils; |
|
12 |
import org.junit.Ignore; |
|
13 |
import org.junit.Test; |
|
14 |
@Ignore |
|
15 |
public class VTDXMLTest { |
|
16 |
|
|
17 |
private VTDGen vg; |
|
18 |
private VTDNav vn; |
|
19 |
private AutoPilot ap; |
|
20 |
|
|
21 |
private VTDGen vg_tmp; |
|
22 |
private VTDNav vn_tmp; |
|
23 |
private AutoPilot ap_tmp; |
|
24 |
|
|
25 |
private PrintWriter writer; |
|
26 |
//TODO: use resource and not full path |
|
27 |
private String inputFilePath = |
|
28 |
"/Users/alessia/workspace/dnet/dnet-collector-plugins/src/test/resources/eu.dnetlib.data.collector.plugins.projects.gtr2/projects.xml"; |
|
29 |
|
|
30 |
@Test |
|
31 |
public void test() throws Exception { |
Also available in: Unified diff
merge branch gtr2_michele