Project

General

Profile

« Previous | Next » 

Revision 63307

merge branch gtr2_michele

View differences:

modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/projects/gtr2/Gtr2Helper.java
1
package eu.dnetlib.data.collector.plugins.projects.gtr2;
2

  
3
import java.io.ByteArrayOutputStream;
4
import java.io.StringWriter;
5

  
6
import org.apache.commons.lang3.StringEscapeUtils;
7
import org.apache.commons.logging.Log;
8
import org.apache.commons.logging.LogFactory;
9

  
10
import com.ximpleware.AutoPilot;
11
import com.ximpleware.VTDGen;
12
import com.ximpleware.VTDNav;
13

  
14
import eu.dnetlib.data.collector.plugins.HttpConnector;
15

  
16
public class Gtr2Helper {
17

  
18
	private static final Log log = LogFactory.getLog(Gtr2Helper.class); // NOPMD by marko on 11/24/08 5:02 PM
19

  
20
	private VTDNav mainVTDNav;
21
	private AutoPilot mainAutoPilot;
22
	private StringWriter writer;
23
	private HttpConnector connector;
24
	// private BlockingQueue<String> fragment = new ArrayBlockingQueue<String>(20);
25

  
26
	public static String cleanURL(final String url) {
27
		String cleaned = url;
28
		if (cleaned.contains("gtr.gtr")) {
29
			cleaned = cleaned.replace("gtr.gtr", "gtr");
30
		}
31
		if (cleaned.startsWith("http://")) {
32
			cleaned = cleaned.replaceFirst("http://", "https://");
33
		}
34
		return cleaned;
35
	}
36

  
37
	public String processProject(final VTDNav vn, final String namespaces, final String projectUrl) throws Exception {
38
		writer = new StringWriter();
39
		mainVTDNav = vn;
40
		mainAutoPilot = new AutoPilot(mainVTDNav);
41
		writer.write("<doc " + namespaces + ">");
42
		writeFragment(mainVTDNav);
43

  
44
		mainAutoPilot.selectXPath("//link[@rel='FUND']");
45

  
46
		while (mainAutoPilot.evalXPath() != -1) {
47
			processFunder(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href")), projectUrl);
48
		}
49

  
50
		mainAutoPilot.resetXPath();
51
		mainAutoPilot.selectXPath(".//link[@rel='LEAD_ORG']");
52
		while (mainAutoPilot.evalXPath() != -1) {
53
			processOrg(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href")), new String[] {
54
				"<ld-org>", "</ld-org>"
55
			}, projectUrl);
56
		}
57
		mainAutoPilot.resetXPath();
58
		mainAutoPilot.selectXPath(".//link[@rel='PP_ORG']");
59
		while (mainAutoPilot.evalXPath() != -1) {
60
			processOrg(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href")), new String[] {
61
				"<pp-org>", "</pp-org>"
62
			}, projectUrl);
63
		}
64

  
65
		// mainAutoPilot.resetXPath();
66
		// mainAutoPilot.selectXPath(".//link[@rel='PARTICIPANT_ORG']");
67
		// while (mainAutoPilot.evalXPath() != -1) {
68
		// processOrg(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href")),
69
		// new String[]{"<pp-org>", "</pp-org>"}, projectUrl);
70
		// }
71

  
72
		mainAutoPilot.resetXPath();
73
		mainAutoPilot.selectXPath(".//link[@rel='PI_PER']");
74
		while (mainAutoPilot.evalXPath() != -1) {
75
			processPerson(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href")), projectUrl);
76
		}
77
		writer.write("</doc>");
78
		writer.close();
79

  
80
		return writer.toString();
81
	}
82

  
83
	private VTDNav setNavigator(final String httpUrl) {
84
		final VTDGen vg_tmp = new VTDGen();
85
		connector = new HttpConnector();
86
		try {
87
			final byte[] bytes = connector.getInputSource(cleanURL(httpUrl)).getBytes("UTF-8");
88
			vg_tmp.setDoc(bytes);
89
			vg_tmp.parse(false);
90
			// vg_tmp.parseHttpUrl(httpUrl, false);
91
			return vg_tmp.getNav();
92
		} catch (final Throwable e) {
93
			return null;
94
		}
95
	}
96

  
97
	private int evalXpath(final VTDNav fragmentVTDNav, final String xPath) throws Exception {
98

  
99
		final AutoPilot ap_tmp = new AutoPilot(fragmentVTDNav);
100
		ap_tmp.selectXPath(xPath);
101
		return ap_tmp.evalXPath();
102
	}
103

  
104
	private void writeFragment(final VTDNav nav) throws Exception {
105
		final ByteArrayOutputStream b = new ByteArrayOutputStream();
106
		nav.dumpFragment(b);
107
		final String ret = b.toString();
108
		b.reset();
109
		writer.write(ret);
110
	}
111

  
112
	private void writeNewTagAndInfo(final VTDNav vn, final String xPath, final String xmlOpenTag, final String xmlCloseTag, final String attrName)
113
		throws Exception {
114

  
115
		final int nav_res = evalXpath(vn, xPath);
116
		if (nav_res != -1) {
117
			String tmp = xmlOpenTag;
118
			if (attrName != null) {
119
				tmp += vn.toNormalizedString(vn.getAttrVal(attrName));
120
			} else {
121
				tmp += StringEscapeUtils.escapeXml11(vn.toNormalizedString(vn.getText()));
122
			}
123
			tmp += xmlCloseTag;
124
			writer.write(tmp);
125
		}
126
	}
127

  
128
	private void processPerson(final String httpUrl, final String projectUrl) {
129
		log.debug(String.format("Getting person %s for project %s", httpUrl, projectUrl));
130
		final VTDNav vn = setNavigator(cleanURL(httpUrl));
131
		try {
132
			writeFragment(vn);
133
		} catch (final Throwable e) {
134
			log.debug(String.format("Exception in processPerson from %s \n Error message: \n %s", httpUrl, e.getMessage()));
135
		}
136

  
137
	}
138

  
139
	private void processOrg(final String httpUrl, final String[] tags, final String projectUrl) {
140
		log.debug(String.format("Getting org %s for project %s", httpUrl, projectUrl));
141
		final VTDNav vn = setNavigator(cleanURL(httpUrl));
142
		try {
143
			writeNewTagAndInfo(vn, "//name", tags[0] + "<name>", "</name>", null);
144
			vn.toElement(VTDNav.ROOT);
145
			writeNewTagAndInfo(vn, "//country", "<country>", "</country>", null);
146
			vn.toElement(VTDNav.ROOT);
147
			writeNewTagAndInfo(vn, ".", "<id>", "</id>" + tags[1], "id");
148
		} catch (final Throwable e) {
149
			log.debug(String.format("Exception in processOrg from %s \n Error message: \n %s", httpUrl, e.getMessage()));
150
		}
151
	}
152

  
153
	private void processFunder(final String httpUrl, final String projectUrl) {
154
		log.debug(String.format("Getting funder %s for project %s", httpUrl, projectUrl));
155
		final VTDNav vn = setNavigator(cleanURL(httpUrl));
156
		try {
157
			final AutoPilot ap = new AutoPilot(vn);
158
			writeFragment(vn);
159
			ap.selectXPath(".//link[@rel='FUNDER']");
160
			VTDNav tmp_vn;
161
			while (ap.evalXPath() != -1) {
162
				tmp_vn = setNavigator(vn.toNormalizedString(vn.getAttrVal("href")));
163
				writeNewTagAndInfo(tmp_vn, "//name", "<funder> <name>", "</name></funder>", null);
164
			}
165
		} catch (final Throwable e) {
166
			log.debug(String.format("Exception in processFunder from %s \n Error message: \n %s", httpUrl, e.getMessage()));
167
		}
168
	}
169
}
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/projects/gtr2/Gtr2ProjectsIterator.java
1
package eu.dnetlib.data.collector.plugins.projects.gtr2;
2

  
3
import java.util.Iterator;
4
import java.util.concurrent.ArrayBlockingQueue;
5
import java.util.concurrent.TimeUnit;
6

  
7
import org.apache.commons.lang3.StringUtils;
8
import org.apache.commons.logging.Log;
9
import org.apache.commons.logging.LogFactory;
10
import org.joda.time.DateTime;
11
import org.joda.time.format.DateTimeFormat;
12
import org.joda.time.format.DateTimeFormatter;
13

  
14
import com.ximpleware.AutoPilot;
15
import com.ximpleware.VTDGen;
16
import com.ximpleware.VTDNav;
17

  
18
import eu.dnetlib.data.collector.plugins.HttpConnector;
19
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
20
import eu.dnetlib.data.collector.rmi.CollectorServiceRuntimeException;
21

  
22
/**
23
 * Created by alessia on 28/11/16.
24
 */
25
public class Gtr2ProjectsIterator implements Iterator<String> {
26

  
27
	public static final String TERMINATOR = "ARNOLD";
28
	public static final int WAIT_END_SECONDS = 600;
29
	public static final int PAGE_SZIE = 20;
30

  
31
	private static final Log log = LogFactory.getLog(Gtr2ProjectsIterator.class);
32

  
33
	private String queryURL;
34
	private int total = -1;
35
	private int startFromPage = 1;
36
	private int endAtPage;
37
	private VTDGen vg;
38
	private VTDNav vn;
39
	private AutoPilot ap;
40
	private String namespaces;
41
	private boolean incremental = false;
42
	private DateTime fromDate;
43
	private final DateTimeFormatter simpleDateTimeFormatter = DateTimeFormat.forPattern("yyyy-MM-dd");
44
	private final ArrayBlockingQueue<String> projects = new ArrayBlockingQueue<>(200);
45
	// private boolean finished = false;
46
	private String nextElement = "<doc></doc>";
47
	private HttpConnector connector;
48

  
49
	@Override
50
	public boolean hasNext() {
51
		return !TERMINATOR.equals(nextElement);
52
	}
53

  
54
	@Override
55
	public String next() {
56
		try {
57
			return nextElement;
58
		} finally {
59
			try {
60
				nextElement = projects.poll(WAIT_END_SECONDS, TimeUnit.SECONDS);
61
			} catch (final InterruptedException e) {
62
				throw new RuntimeException(e);
63
			}
64
		}
65

  
66
	}
67

  
68
	@Override
69
	public void remove() {
70
		throw new UnsupportedOperationException();
71
	}
72

  
73
	public Gtr2ProjectsIterator(final String baseUrl, final String fromDate) throws CollectorServiceException {
74
		prepare(baseUrl, fromDate);
75
		fillInfo(true);
76
	}
77

  
78
	public Gtr2ProjectsIterator(final String baseUrl, final String fromDate, final int startFromPage, final int endAtPage) throws CollectorServiceException {
79
		prepare(baseUrl, fromDate);
80
		this.setStartFromPage(startFromPage);
81
		this.setEndAtPage(endAtPage);
82
		fillInfo(false);
83
	}
84

  
85
	public Gtr2ProjectsIterator(final String baseUrl, final String fromDate, final String startFromPage, final String endAtPage)
86
		throws CollectorServiceException {
87
		prepare(baseUrl, fromDate);
88
		if (StringUtils.isNotBlank(startFromPage)) {
89
			this.setStartFromPage(Integer.parseInt(startFromPage));
90
		}
91
		if (StringUtils.isNotBlank(endAtPage)) {
92
			this.setEndAtPage(Integer.parseInt(endAtPage));
93
		}
94
		fillInfo(false);
95
	}
96

  
97
	private void prepare(final String baseUrl, final String fromDate) {
98
		connector = new HttpConnector();
99
		queryURL = baseUrl + "/projects";
100
		vg = new VTDGen();
101
		this.incremental = StringUtils.isNotBlank(fromDate);
102
		if (incremental) {
103
			// I expect fromDate in the format 'yyyy-MM-dd'. See class
104
			// eu.dnetlib.msro.workflows.nodes.collect.FindDateRangeForIncrementalHarvestingJobNode
105
			this.fromDate = DateTime.parse(fromDate, simpleDateTimeFormatter);
106
			log.debug("fromDate string: " + fromDate + " -- parsed: " + this.fromDate.toString());
107
		}
108
	}
109

  
110
	private void fillInfo(final boolean all) throws CollectorServiceException {
111
		try {
112
			// log.debug("Getting hit count from: " + queryURL);
113
			final byte[] bytes = connector.getInputSource(queryURL).getBytes("UTF-8");
114
			vg.setDoc(bytes);
115
			vg.parse(false);
116
			// vg.parseHttpUrl(queryURL, false);
117
			initParser();
118
			final String hitCount = vn.toNormalizedString(vn.getAttrVal("totalSize"));
119
			final String totalPages = vn.toNormalizedString(vn.getAttrVal("totalPages"));
120
			namespaces = "xmlns:ns1=\"" + vn.toNormalizedString(vn.getAttrVal("ns1")) + "\" ";
121
			namespaces += "xmlns:ns2=\"" + vn.toNormalizedString(vn.getAttrVal("ns2")) + "\" ";
122
			namespaces += "xmlns:ns3=\"" + vn.toNormalizedString(vn.getAttrVal("ns3")) + "\" ";
123
			namespaces += "xmlns:ns4=\"" + vn.toNormalizedString(vn.getAttrVal("ns4")) + "\" ";
124
			namespaces += "xmlns:ns5=\"" + vn.toNormalizedString(vn.getAttrVal("ns5")) + "\" ";
125
			namespaces += "xmlns:ns6=\"" + vn.toNormalizedString(vn.getAttrVal("ns6")) + "\" ";
126
			if (all) {
127
				setEndAtPage(Integer.parseInt(totalPages));
128
				total = Integer.parseInt(hitCount);
129
			}
130
			final Thread ft = new Thread(new FillProjectList());
131
			ft.start();
132
			log.debug("Expected number of pages: " + (endAtPage - startFromPage + 1));
133
		} catch (final NumberFormatException e) {
134
			log.error("Cannot set the total count or the number of pages");
135
			throw new CollectorServiceException(e);
136
		} catch (final Throwable e) {
137
			throw new CollectorServiceException(e);
138
		}
139
	}
140

  
141
	private void initParser() {
142
		vn = vg.getNav();
143
		ap = new AutoPilot(vn);
144
	}
145

  
146
	public String getQueryURL() {
147
		return queryURL;
148
	}
149

  
150
	public void setQueryURL(final String queryURL) {
151
		this.queryURL = queryURL;
152
	}
153

  
154
	public int getTotal() {
155
		return total;
156
	}
157

  
158
	public void setTotal(final int total) {
159
		this.total = total;
160
	}
161

  
162
	public int getEndAtPage() {
163
		return endAtPage;
164
	}
165

  
166
	public void setEndAtPage(final int endAtPage) {
167
		this.endAtPage = endAtPage;
168
		log.debug("Overriding endAtPage to " + endAtPage);
169
	}
170

  
171
	public VTDGen getVg() {
172
		return vg;
173
	}
174

  
175
	public void setVg(final VTDGen vg) {
176
		this.vg = vg;
177
	}
178

  
179
	public VTDNav getVn() {
180
		return vn;
181
	}
182

  
183
	public void setVn(final VTDNav vn) {
184
		this.vn = vn;
185
	}
186

  
187
	public AutoPilot getAp() {
188
		return ap;
189
	}
190

  
191
	public void setAp(final AutoPilot ap) {
192
		this.ap = ap;
193
	}
194

  
195
	public String getNamespaces() {
196
		return namespaces;
197
	}
198

  
199
	public void setNamespaces(final String namespaces) {
200
		this.namespaces = namespaces;
201
	}
202

  
203
	public int getStartFromPage() {
204
		return startFromPage;
205
	}
206

  
207
	public void setStartFromPage(final int startFromPage) {
208
		this.startFromPage = startFromPage;
209
		log.debug("Overriding startFromPage to " + startFromPage);
210
	}
211

  
212
	private class FillProjectList implements Runnable {
213

  
214
		private boolean morePages = true;
215
		private int pageNumber = startFromPage;
216

  
217
		@Override
218
		public void run() {
219
			String resultPageUrl = "";
220
			try {
221
				do {
222
					resultPageUrl = getNextPageUrl();
223
					log.debug("Page: " + resultPageUrl);
224
					// clear VGen before processing the next file
225
					vg.clear();
226
					final byte[] bytes = connector.getInputSource(resultPageUrl).getBytes("UTF-8");
227
					vg.setDoc(bytes);
228
					vg.parse(false);
229
					// vg.parseHttpUrl(resultPageUrl, false);
230
					initParser();
231
					ap.selectXPath("//project");
232
					while (ap.evalXPath() != -1) {
233
						final String projectHref = vn.toNormalizedString(vn.getAttrVal("href"));
234
						final ParseProject p = new ParseProject(projectHref);
235
						p.execute();
236
					}
237
					ap.resetXPath();
238

  
239
				} while (morePages);
240
				projects.put(TERMINATOR);
241

  
242
			} catch (final Throwable e) {
243
				log.error("Exception processing " + resultPageUrl + "\n" + e.getMessage());
244
			}
245
		}
246

  
247
		private String getNextPageUrl() {
248
			final String url = queryURL + "?p=" + pageNumber;
249
			if (pageNumber == endAtPage) {
250
				morePages = false;
251
			}
252
			pageNumber++;
253
			return url;
254
		}
255

  
256
	}
257

  
258
	private class ParseProject {
259

  
260
		VTDNav vn1;
261
		VTDGen vg1;
262
		private final String projectRef;
263

  
264
		public ParseProject(final String projectHref) {
265
			projectRef = Gtr2Helper.cleanURL(projectHref);
266
			vg1 = new VTDGen();
267
			try {
268
				final byte[] bytes = connector.getInputSource(projectRef).getBytes("UTF-8");
269
				vg1.setDoc(bytes);
270
				vg1.parse(false);
271
				vn1 = vg1.getNav();
272
			} catch (final Throwable e) {
273
				log.error("Exception processing " + projectRef + "\n" + e.getMessage());
274
			}
275
		}
276

  
277
		private int projectsUpdate(final String attr) throws CollectorServiceException {
278
			try {
279
				final int index = vn1.getAttrVal(attr);
280
				if (index != -1) {
281
					final String d = vn1.toNormalizedString(index);
282
					final DateTime recordDate = DateTime.parse(d.substring(0, d.indexOf("T")), simpleDateTimeFormatter);
283
					// updated or created after the last time it was collected
284
					if (recordDate.isAfter(fromDate)) {
285
						log.debug("New project to collect");
286
						return index;
287
					}
288
					return -1;
289
				}
290
				return index;
291
			} catch (final Throwable e) {
292
				throw new CollectorServiceException(e);
293
			}
294
		}
295

  
296
		private String collectProject() throws CollectorServiceException {
297
			try {
298
				final int p = vn1.getAttrVal("href");
299
				final String projectHref = vn1.toNormalizedString(p);
300
				log.debug("Collecting project at " + projectHref);
301
				final Gtr2Helper gtr2Helper = new Gtr2Helper();
302
				return gtr2Helper.processProject(vn1, namespaces, projectHref);
303
			} catch (final Throwable e) {
304
				throw new CollectorServiceException(e);
305
			}
306
		}
307

  
308
		private boolean add(final String attr) throws CollectorServiceException {
309
			return projectsUpdate(attr) != -1;
310
		}
311

  
312
		public void execute() {
313
			try {
314
				if (!incremental || incremental && (add("created") || add("updated"))) {
315
					projects.put(collectProject());
316
				}
317
			} catch (final Throwable e) {
318
				log.error("Error on ParseProject " + e.getMessage());
319
				throw new CollectorServiceRuntimeException(e);
320
			}
321
		}
322

  
323
	}
324

  
325
}
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/projects/gtr2/Gtr2CollectorPlugin.java
1
package eu.dnetlib.data.collector.plugins.projects.gtr2;
2

  
3
import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
4
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
5
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
6

  
7
/**
8
 * Plugin to collect metadata record about projects and fundings via the UKRI grt2 API.
9
 * <p>
10
 * Documentation : http://gtr.ukri.org/resources/api.html.
11
 * </p>
12
 * <p>
13
 * BaseURL: https://gtr.ukri.org/gtr/api The results returned by the API are XMLs.
14
 * </p>
15
 * <p>
16
 * Pagination: TO BE DEFINED. Exceeding the number of pages available will result in a HTTP response code of 404
17
 * </p>
18
 *
19
 * @author alessia
20
 */
21
public class Gtr2CollectorPlugin extends AbstractCollectorPlugin {
22

  
23
	@Override
24
	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
25
		throws CollectorServiceException {
26
		if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new CollectorServiceException("Invalid date (YYYY-MM-DD): " + fromDate); }
27

  
28
		return () -> {
29

  
30
			try {
31
				return new Gtr2ProjectsIterator(interfaceDescriptor.getBaseUrl(), fromDate,
32
					interfaceDescriptor.getParams().get("startPage"),
33
					interfaceDescriptor.getParams().get("endPage"));
34
			} catch (final CollectorServiceException e) {
35
				throw new RuntimeException(e);
36
			}
37
		};
38
	}
39

  
40
}
modules/dnet-collector-plugins/trunk/src/test/java/eu/dnetlib/data/collector/plugins/projects/gtr2/VTDXMLTest.java
1
package eu.dnetlib.data.collector.plugins.projects.gtr2;
2

  
3
import java.io.BufferedWriter;
4
import java.io.ByteArrayOutputStream;
5
import java.io.FileWriter;
6
import java.io.PrintWriter;
7

  
8
import com.ximpleware.AutoPilot;
9
import com.ximpleware.VTDGen;
10
import com.ximpleware.VTDNav;
11
import org.apache.commons.lang3.StringUtils;
12
import org.junit.Ignore;
13
import org.junit.Test;
14
@Ignore
15
public class VTDXMLTest {
16

  
17
	private VTDGen vg;
18
	private VTDNav vn;
19
	private AutoPilot ap;
20

  
21
	private VTDGen vg_tmp;
22
	private VTDNav vn_tmp;
23
	private AutoPilot ap_tmp;
24

  
25
	private PrintWriter writer;
26
	//TODO: use resource and not full path
27
	private String inputFilePath =
28
			"/Users/alessia/workspace/dnet/dnet-collector-plugins/src/test/resources/eu.dnetlib.data.collector.plugins.projects.gtr2/projects.xml";
29

  
30
	@Test
31
	public void test() throws Exception {
32
		vg = new VTDGen();
33
		vg.parseFile(inputFilePath, false);
34
		vn = vg.getNav();
35
		ap = new AutoPilot(vn);
36
		String ns = "";
37
		ap.selectXPath(".//projects");
38
		ap.evalXPath();
39
		ns += "xmlns:ns1=\"" + vn.toNormalizedString(vn.getAttrVal("ns1")) + "\" ";
40
		ns += "xmlns:ns2=\"" + vn.toNormalizedString(vn.getAttrVal("ns2")) + "\" ";
41
		ns += "xmlns:ns3=\"" + vn.toNormalizedString(vn.getAttrVal("ns3")) + "\" ";
42
		ns += "xmlns:ns4=\"" + vn.toNormalizedString(vn.getAttrVal("ns4")) + "\" ";
43
		ns += "xmlns:ns5=\"" + vn.toNormalizedString(vn.getAttrVal("ns5")) + "\" ";
44
		ns += "xmlns:ns6=\"" + vn.toNormalizedString(vn.getAttrVal("ns6")) + "\" ";
45

  
46
		ap.selectXPath("//project");
47
		int res = -1;
48
		ByteArrayOutputStream b = new ByteArrayOutputStream();
49
		int i = 0;
50
		while ((res = ap.evalXPath()) != -1) {
51
			writer = new PrintWriter(new BufferedWriter(new FileWriter("projectPackage_"+(++i)+".xml")));
52
			System.out.println(res);
53
			writer.println("<doc " + ns + ">");
54
			writeFragment(vn);
55
			VTDNav clone = vn.cloneNav();
56
			AutoPilot ap2 = new AutoPilot(clone);
57
			ap2.selectXPath(".//link[@rel='FUND']");
58
			vg_tmp = new VTDGen();
59

  
60
			while (ap2.evalXPath() != -1) {
61
				//String fund = clone.toNormalizedString(clone.getAttrVal("href"));
62
				evalXpath(clone.toNormalizedString(clone.getAttrVal("href")), ".//link[@rel='FUNDER']");
63
				String funder = vn_tmp.toNormalizedString(vn_tmp.getAttrVal("href"));
64
				vn_tmp.toElement(VTDNav.ROOT);
65
				writeFragment(vn_tmp);
66
				writeNewTagAndInfo(funder, "//name", "<funder> <name>", "</name></funder>", null);
67
			}
68
			ap2.resetXPath();
69
			ap2.selectXPath(".//link[@rel='LEAD_ORG']");
70
			while (ap2.evalXPath() != -1) {
71
				writeNewTagAndInfo(clone.toNormalizedString(clone.getAttrVal("href")), "//name", "<lead-org><name>", "</name>", null);
72
				writeNewTagAndInfo(clone.toNormalizedString(clone.getAttrVal("href")), ".", "<id>", "</id></lead-org>", "id");
73
			}
74
			ap2.resetXPath();
75
			ap2.selectXPath(".//link[@rel='PP_ORG']");
76
			while (ap2.evalXPath() != -1) {
77
				writeNewTagAndInfo(clone.toNormalizedString(clone.getAttrVal("href")), "//name", "<pp-org><name>", "</name></pp-org>", null);
78
				writeNewTagAndInfo(clone.toNormalizedString(clone.getAttrVal("href")), ".", "<id>", "</id></lead-org>", "id");
79
			}
80
			ap2.resetXPath();
81

  
82
			ap2.selectXPath(".//link[@rel='PI_PER']");
83
			while (ap2.evalXPath() != -1) {
84
				setNavigator(clone.toNormalizedString(clone.getAttrVal("href")));
85
				vn_tmp.toElement(VTDNav.ROOT);
86
				writeFragment(vn_tmp);
87
			}
88
			writer.println("</doc>");
89
			writer.close();
90
		}
91

  
92
	}
93

  
94
	private void setNavigator(String httpUrl) {
95
		vg_tmp.clear();
96
		vg_tmp.parseHttpUrl(httpUrl, false);
97
		vn_tmp = vg_tmp.getNav();
98
	}
99

  
100
	private int evalXpath(String httpUrl, String xPath) throws Exception {
101
		setNavigator(httpUrl);
102
		ap_tmp = new AutoPilot(vn_tmp);
103
		ap_tmp.selectXPath(xPath);
104
		return ap_tmp.evalXPath();
105
	}
106

  
107
	private void writeFragment(VTDNav nav) throws Exception {
108
		ByteArrayOutputStream b = new ByteArrayOutputStream();
109
		nav.dumpFragment(b);
110
		writer.println(b);
111
		b.reset();
112
	}
113

  
114
	private void writeNewTagAndInfo(String search, String xPath, String xmlOpenTag, String xmlCloseTag, String attrName) throws Exception {
115
		int nav_res = evalXpath(search, xPath);
116
		if (nav_res != -1) {
117
			writer.println(xmlOpenTag);
118
			if(StringUtils.isNotBlank(attrName)) writer.println(vn_tmp.toNormalizedString(vn_tmp.getAttrVal(attrName)));
119
			else
120
				writer.println(vn_tmp.toNormalizedString(vn_tmp.getText()));
121
			writer.println(xmlCloseTag);
122
		}
123
	}
124

  
125
}
modules/dnet-collector-plugins/trunk/src/test/java/eu/dnetlib/data/collector/plugins/projects/gtr2/Gtr2Test.java
1
package eu.dnetlib.data.collector.plugins.projects.gtr2;
2

  
3
import static org.junit.Assert.assertEquals;
4
import static org.junit.Assert.assertNotNull;
5

  
6
import java.util.Iterator;
7

  
8
import org.junit.Before;
9
import org.junit.Ignore;
10
import org.junit.Test;
11

  
12
import com.ximpleware.VTDGen;
13

  
14
import eu.dnetlib.data.collector.plugins.HttpConnector;
15
import eu.dnetlib.miscutils.functional.xml.TryIndentXmlString;
16

  
17
@Ignore
18
public class Gtr2Test {
19

  
20
	private final String baseURL = "https://gtr.ukri.org/gtr/api";
21
	private Gtr2Helper helper;
22
	private Gtr2ProjectsIterator iterator;
23
	private HttpConnector connector;
24

  
25
	@Before
26
	public void prepare() {
27
		helper = new Gtr2Helper();
28
		// System.setProperty("jsse.enableSNIExtension","false");
29
	}
30

  
31
	@Test
32
	public void testOne() throws Exception {
33
		System.out.println("one project");
34
		final String url = "http://gtr.ukri.org/gtr/api/projects/0AE039A7-9A84-4943-AA36-001DB5763245";
35
		final VTDGen vg_tmp = new VTDGen();
36
		connector = new HttpConnector();
37
		final String tmp = connector.getInputSource(url);
38
		final byte[] bytes = tmp.getBytes("UTF-8");
39
		vg_tmp.setDoc(bytes);
40
		vg_tmp.parse(false);
41
		final String s = helper.processProject(vg_tmp.getNav(), "xmlns:ns=\"http:///afgshs\"", url);
42
		System.out.println(s);
43
	}
44

  
45
	@Test
46
	public void testPaging() throws Exception {
47
		iterator = new Gtr2ProjectsIterator(baseURL, null, 2, 2);
48
		final TryIndentXmlString indenter = new TryIndentXmlString();
49

  
50
		while (iterator.hasNext()) {
51
			Thread.sleep(300);
52
			final String res = iterator.next();
53
			assertNotNull(res);
54
			indenter.evaluate(res);
55
			System.out.println(res);
56
		}
57
	}
58

  
59
	@Test
60
	public void testOnePage() throws Exception {
61
		iterator = new Gtr2ProjectsIterator(baseURL, null, 12, 12);
62
		final int count = iterateAndCount(iterator);
63
		assertEquals(21, count);
64
	}
65

  
66
	@Test
67
	public void testIncrementalHarvestingNoRecords() throws Exception {
68
		System.out.println("incremental Harvesting");
69
		iterator = new Gtr2ProjectsIterator(baseURL, "2050-12-12", 11, 13);
70
		final int count = iterateAndCount(iterator);
71
		assertEquals(1, count);
72
	}
73

  
74
	@Test
75
	public void testIncrementalHarvesting() throws Exception {
76
		System.out.println("incremental Harvesting");
77
		iterator = new Gtr2ProjectsIterator(baseURL, "2016-11-30", 11, 11);
78
		final int count = iterateAndCount(iterator);
79
		assertEquals(21, count);
80
	}
81

  
82
	@Test
83
	@Ignore
84
	public void testCompleteHarvesting() throws Exception {
85
		System.out.println("testing complete harvesting");
86
		iterator = new Gtr2ProjectsIterator(baseURL, null);
87
		// TryIndentXmlString indenter = new TryIndentXmlString();
88
		// it.setEndAtPage(3);
89

  
90
		while (iterator.hasNext()) {
91
			final String res = iterator.next();
92
			assertNotNull(res);
93
			// System.out.println(res);
94
			// Scanner keyboard = new Scanner(System.in);
95
			// System.out.println("press enter for next record");
96
			// keyboard.nextLine();
97

  
98
		}
99
	}
100

  
101
	private int iterateAndCount(final Iterator<String> iterator) throws Exception {
102
		int i = 0;
103
		while (iterator.hasNext()) {
104
			assertNotNull(iterator.next());
105
			i++;
106
		}
107
		System.out.println("Got " + i + " projects");
108
		return i;
109
	}
110
}
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/gtr2/AbstractGtr2CollectorPlugin.java
1
package eu.dnetlib.data.collector.plugins.gtr2;
2

  
3
import java.util.Iterator;
4

  
5
import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
6
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
7
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
8

  
9
public abstract class AbstractGtr2CollectorPlugin extends AbstractCollectorPlugin {
10

  
11
	@Override
12
	public final Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
13
		throws CollectorServiceException {
14

  
15
		if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new CollectorServiceException("Invalid date (YYYY-MM-DD): " + fromDate); }
16

  
17
		final String baseUrl = interfaceDescriptor.getBaseUrl();
18
		final String startPage = interfaceDescriptor.getParams().get("startPage");
19
		final String endPage = interfaceDescriptor.getParams().get("endPage");
20

  
21
		return () -> {
22
			try {
23
				return createIterator(baseUrl, fromDate, startPage, endPage);
24
			} catch (final CollectorServiceException e) {
25
				throw new RuntimeException(e);
26
			}
27
		};
28
	}
29

  
30
	protected abstract Iterator<String> createIterator(String baseUrl, final String fromDate, String startPage, String endPage)
31
		throws CollectorServiceException;
32

  
33
}
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/gtr2/Gtr2Iterator.java
1
package eu.dnetlib.data.collector.plugins.gtr2;
2

  
3
import java.util.ArrayList;
4
import java.util.HashMap;
5
import java.util.Iterator;
6
import java.util.LinkedList;
7
import java.util.List;
8
import java.util.Map;
9
import java.util.Queue;
10
import java.util.function.Function;
11

  
12
import org.apache.commons.lang.math.NumberUtils;
13
import org.apache.commons.lang3.StringUtils;
14
import org.apache.commons.logging.Log;
15
import org.apache.commons.logging.LogFactory;
16
import org.dom4j.Document;
17
import org.dom4j.DocumentException;
18
import org.dom4j.DocumentHelper;
19
import org.dom4j.Element;
20
import org.joda.time.DateTime;
21

  
22
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
23
import eu.dnetlib.data.collector.rmi.CollectorServiceRuntimeException;
24

  
25
public abstract class Gtr2Iterator implements Iterator<String> {
26

  
27
	public static final int PAGE_SIZE = 20;
28

  
29
	private static final Log log = LogFactory.getLog(Gtr2Iterator.class);
30

  
31
	private final String baseUrl;
32
	private int currPage;
33
	private int endPage;
34
	private boolean incremental = false;
35
	private DateTime fromDate;
36

  
37
	private final Map<String, String> cache = new HashMap<>();
38

  
39
	private final Queue<String> queue = new LinkedList<>();
40

  
41
	private String nextElement;
42

  
43
	public Gtr2Iterator(final String baseUrl, final String fromDate, final String startPage, final String endPage)
44
		throws CollectorServiceException {
45

  
46
		this.baseUrl = baseUrl;
47
		this.currPage = NumberUtils.toInt(startPage, 1);
48
		this.endPage = NumberUtils.toInt(endPage, Integer.MAX_VALUE);
49
		this.incremental = StringUtils.isNotBlank(fromDate);
50

  
51
		if (this.incremental) {
52
			this.fromDate = Gtr2Helper.parseDate(fromDate);
53
		}
54

  
55
		prepareNextElement();
56
	}
57

  
58
	@Override
59
	public boolean hasNext() {
60
		return nextElement != null;
61
	}
62

  
63
	@Override
64
	public String next() {
65
		try {
66
			return nextElement;
67
		} finally {
68
			prepareNextElement();
69
		}
70
	}
71

  
72
	@Override
73
	public void remove() {
74
		throw new UnsupportedOperationException();
75
	}
76

  
77
	private void prepareNextElement() {
78
		while (this.currPage <= this.endPage && queue.isEmpty()) {
79
			log.debug("FETCHING PAGE + " + currPage + "/" + endPage);
80
			this.queue.addAll(fetchPage(currPage++));
81
		}
82
		this.nextElement = this.queue.poll();
83
	}
84

  
85
	private List<String> fetchPage(final int pageNumber) {
86

  
87
		final List<String> res = new ArrayList<>();
88
		try {
89
			final Document doc = Gtr2Helper.loadURL(urlForPage(baseUrl, pageNumber));
90

  
91
			if (endPage == Integer.MAX_VALUE) {
92
				endPage = NumberUtils.toInt(doc.valueOf("/*/@*[local-name() = 'totalPages']"));
93
			}
94

  
95
			for (final Object po : doc.selectNodes(xpathForEntity())) {
96
				final Element mainEntity = (Element) ((Element) po).detach();
97

  
98
				if (filterIncremental(mainEntity)) {
99
					res.add(expandMainEntity(mainEntity));
100
				} else {
101
					log.debug("Skipped entity");
102
				}
103

  
104
			}
105
		} catch (final Throwable e) {
106
			log.error("Exception fetching page " + pageNumber, e);
107
			throw new CollectorServiceRuntimeException("Exception fetching page " + pageNumber, e);
108
		}
109

  
110
		return res;
111
	}
112

  
113
	protected void addLinkedEntities(final Element master, final String relType, final Element newRoot, final Function<Document, Element> mapper) {
114

  
115
		for (final Object o : master.selectNodes(".//*[local-name()='link']")) {
116
			final String rel = ((Element) o).valueOf("@*[local-name()='rel']");
117
			final String href = ((Element) o).valueOf("@*[local-name()='href']");
118

  
119
			if (relType.equals(rel) && StringUtils.isNotBlank(href)) {
120
				final String cacheKey = relType + "#" + href;
121
				if (cache.containsKey(cacheKey)) {
122
					try {
123
						log.debug(" * from cache (" + relType + "): " + href);
124
						newRoot.add(DocumentHelper.parseText(cache.get(cacheKey)).getRootElement());
125
					} catch (final DocumentException e) {
126
						log.error("Error retrieving cache element: " + cacheKey, e);
127
						throw new CollectorServiceRuntimeException("Error retrieving cache element: " + cacheKey, e);
128
					}
129
				} else {
130
					final Document doc = Gtr2Helper.loadURL(href);
131
					final Element elem = mapper.apply(doc);
132
					newRoot.add(elem);
133
					cache.put(cacheKey, elem.asXML());
134
				}
135

  
136
			}
137
		}
138
	}
139

  
140
	private boolean filterIncremental(final Element e) {
141
		if (!incremental) {
142
			return true;
143
		} else if (Gtr2Helper.isAfter(e.valueOf("@*[local-name() = 'created']"), fromDate)) {
144
			return true;
145
		} else if (Gtr2Helper.isAfter(e.valueOf("@*[local-name() = 'updated']"), fromDate)) {
146
			return true;
147
		} else {
148
			return false;
149
		}
150
	}
151

  
152
	abstract protected String expandMainEntity(final Element mainEntity);
153

  
154
	abstract protected String urlForPage(final String baseUrl, final int pageNumber);
155

  
156
	abstract protected String xpathForEntity();
157
}
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/gtr2/Gtr2ProjectsCollectorPlugin.java
1
package eu.dnetlib.data.collector.plugins.gtr2;
2

  
3
import java.util.Iterator;
4

  
5
import org.dom4j.Document;
6
import org.dom4j.DocumentHelper;
7
import org.dom4j.Element;
8

  
9
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
10

  
11
/**
12
 * Plugin to collect metadata record about projects and fundings via the UKRI grt2 API.
13
 * <p>
14
 * Documentation : http://gtr.ukri.org/resources/api.html.
15
 * </p>
16
 * <p>
17
 * BaseURL: https://gtr.ukri.org/gtr/api The results returned by the API are XMLs.
18
 * </p>
19
 * <p>
20
 * Pagination: TO BE DEFINED. Exceeding the number of pages available will result in a HTTP response code of 404
21
 * </p>
22
 *
23
 * @author alessia
24
 */
25
public class Gtr2ProjectsCollectorPlugin extends AbstractGtr2CollectorPlugin {
26

  
27
	@Override
28
	protected Iterator<String> createIterator(final String baseUrl, final String fromDate, final String startPage, final String endPage)
29
		throws CollectorServiceException {
30

  
31
		return new Gtr2Iterator(baseUrl, fromDate, startPage, endPage) {
32

  
33
			@Override
34
			protected String urlForPage(final String baseUrl, final int pageNumber) {
35
				return baseUrl + "/projects?p=" + pageNumber;
36
			}
37

  
38
			@Override
39
			protected String xpathForEntity() {
40
				return "//*[local-name() = 'project']";
41
			}
42

  
43
			@Override
44
			protected String expandMainEntity(final Element mainEntity) {
45

  
46
				final Element newRoot = DocumentHelper.createElement("doc");
47

  
48
				newRoot.add(mainEntity);
49

  
50
				addLinkedEntities(mainEntity, "LEAD_ORG", newRoot, o -> asOrgElement("ld-org", o));
51
				addLinkedEntities(mainEntity, "PP_ORG", newRoot, o -> asOrgElement("pp-org", o));
52
				addLinkedEntities(mainEntity, "PI_PER", newRoot, o -> asPersonElement("pi-per", o));
53

  
54
				return DocumentHelper.createDocument(newRoot).asXML();
55
			}
56

  
57
			private Element asOrgElement(final String nodeName, final Document doc) {
58
				final Element newOrg = DocumentHelper.createElement(nodeName);
59
				newOrg.addElement("id").setText(doc.valueOf("/*/@*[local-name()='id']"));
60
				newOrg.addElement("name").setText(doc.valueOf("//*[local-name()='name']"));
61
				newOrg.addElement("country").setText(doc.valueOf("//*[local-name()='country']"));
62
				return newOrg;
63
			}
64

  
65
			private Element asPersonElement(final String nodeName, final Document doc) {
66
				final Element newPers = DocumentHelper.createElement(nodeName);
67
				newPers.addElement("id").setText(doc.valueOf("/*/@*[local-name()='id']"));
68
				newPers.addElement("firstName").setText(doc.valueOf("//*[local-name()='firstName']"));
69
				newPers.addElement("otherNames").setText(doc.valueOf("//*[local-name()='otherNames']"));
70
				newPers.addElement("surname").setText(doc.valueOf("//*[local-name()='surname']"));
71
				return newPers;
72
			}
73
		};
74
	}
75

  
76
}
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/gtr2/Gtr2PublicationsCollectorPlugin.java
1
package eu.dnetlib.data.collector.plugins.gtr2;
2

  
3
import java.util.Iterator;
4

  
5
import org.dom4j.Document;
6
import org.dom4j.DocumentHelper;
7
import org.dom4j.Element;
8

  
9
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
10

  
11
/**
12
 * Plugin to collect metadata record about publications via the UKRI grt2 API.
13
 * <p>
14
 * Documentation : http://gtr.ukri.org/resources/api.html.
15
 * </p>
16
 * <p>
17
 * BaseURL: https://gtr.ukri.org/gtr/api The results returned by the API are XMLs.
18
 * </p>
19
 * <p>
20
 * Pagination: TO BE DEFINED. Exceeding the number of pages available will result in a HTTP response code of 404
21
 * </p>
22
 *
23
 * @author alessia
24
 */
25
public class Gtr2PublicationsCollectorPlugin extends AbstractGtr2CollectorPlugin {
26

  
27
	@Override
28
	protected Iterator<String> createIterator(final String baseUrl, final String fromDate, final String startPage, final String endPage)
29
		throws CollectorServiceException {
30
		return new Gtr2Iterator(baseUrl, fromDate, startPage, endPage) {
31

  
32
			@Override
33
			protected String urlForPage(final String baseUrl, final int pageNumber) {
34
				return baseUrl + "/outcomes/publications?p=" + pageNumber;
35
			}
36

  
37
			@Override
38
			protected String xpathForEntity() {
39
				return "//*[local-name() = 'publication']";
40
			}
41

  
42
			@Override
43
			protected String expandMainEntity(final Element mainEntity) {
44
				final Element newRoot = DocumentHelper.createElement("doc");
45
				newRoot.add(mainEntity);
46
				addLinkedEntities(mainEntity, "PROJECT", newRoot, o -> asProjectElement(o));
47
				return DocumentHelper.createDocument(newRoot).asXML();
48
			}
49

  
50
			private Element asProjectElement(final Document doc) {
51
				final Element newOrg = DocumentHelper.createElement("project");
52
				newOrg.addElement("id").setText(doc.valueOf("/*/@*[local-name()='id']"));
53
				newOrg.addElement("code").setText(doc.valueOf("//*[local-name()='identifier' and @*[local-name()='type'] = 'RCUK']"));
54
				newOrg.addElement("title").setText(doc.valueOf("//*[local-name()='title']"));
55
				return newOrg;
56
			}
57

  
58
		};
59
	}
60

  
61
}
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/gtr2/Gtr2Helper.java
1
package eu.dnetlib.data.collector.plugins.gtr2;
2

  
3
import org.apache.commons.logging.Log;
4
import org.apache.commons.logging.LogFactory;
5
import org.dom4j.Document;
6
import org.dom4j.DocumentHelper;
7
import org.joda.time.DateTime;
8
import org.joda.time.format.DateTimeFormat;
9
import org.joda.time.format.DateTimeFormatter;
10

  
11
import eu.dnetlib.data.collector.plugins.HttpConnector;
12
import eu.dnetlib.data.collector.rmi.CollectorServiceRuntimeException;
13

  
14
public class Gtr2Helper {
15

  
16
	private static final Log log = LogFactory.getLog(Gtr2Helper.class); // NOPMD by marko on 11/24/08 5:02 PM
17

  
18
	private static final HttpConnector connector = new HttpConnector();
19
	private static final DateTimeFormatter simpleDateTimeFormatter = DateTimeFormat.forPattern("yyyy-MM-dd");
20

  
21
	private static final int MAX_ATTEMPTS = 10;
22

  
23
	public static String cleanURL(final String url) {
24
		String cleaned = url;
25
		if (cleaned.contains("gtr.gtr")) {
26
			cleaned = cleaned.replace("gtr.gtr", "gtr");
27
		}
28
		if (cleaned.startsWith("http://")) {
29
			cleaned = cleaned.replaceFirst("http://", "https://");
30
		}
31
		return cleaned;
32
	}
33

  
34
	public static Document loadURL(final String url) {
35
		final String cleanUrl = cleanURL(url);
36
		return loadURL(cleanUrl, 0);
37
	}
38

  
39
	private static Document loadURL(final String cleanUrl, final int attempt) {
40
		try {
41
			log.debug("  * Downloading Url: " + cleanUrl);
42
			final byte[] bytes = connector.getInputSource(cleanUrl).getBytes("UTF-8");
43
			return DocumentHelper.parseText(new String(bytes));
44
		} catch (final Throwable e) {
45
			log.error("Error dowloading url: " + cleanUrl + ", attempt = " + attempt, e);
46
			if (attempt < MAX_ATTEMPTS) {
47
				try {
48
					Thread.sleep(60000); // I wait for a minute
49
				} catch (final InterruptedException e1) {
50
					throw new CollectorServiceRuntimeException("Error dowloading url: " + cleanUrl, e);
51
				}
52
				return loadURL(cleanUrl, attempt + 1);
53
			} else {
54
				throw new CollectorServiceRuntimeException("Error dowloading url: " + cleanUrl, e);
55
			}
56
		}
57
	}
58

  
59
	public static DateTime parseDate(final String s) {
60
		// I expect dates in the format 'yyyy-MM-dd'. See class
61
		// eu.dnetlib.msro.workflows.nodes.collect.FindDateRangeForIncrementalHarvestingJobNode
62
		return DateTime.parse(s.substring(0, s.indexOf("T")), simpleDateTimeFormatter);
63
	}
64

  
65
	public static boolean isAfter(final String d, final DateTime fromDate) {
66
		return Gtr2Helper.parseDate(d).isAfter(fromDate);
67
	}
68
}
modules/dnet-collector-plugins/trunk/src/main/resources/eu/dnetlib/data/collector/plugins/applicationContext-dnet-modular-collector-plugins.xml
114 114
		</property>
115 115
	</bean>
116 116

  
117
	<bean id="gtr2Plugin" class="eu.dnetlib.data.collector.plugins.projects.gtr2.Gtr2CollectorPlugin">
117
	<bean id="gtr2ProjectsPlugin" class="eu.dnetlib.data.collector.plugins.gtr2.Gtr2ProjectsCollectorPlugin">
118 118
		<property name="protocolDescriptor">
119 119
			<bean class="eu.dnetlib.data.collector.rmi.ProtocolDescriptor" p:name="gtr2Projects">
120 120
				<property name="params">
......
126 126
			</bean>
127 127
		</property>
128 128
	</bean>
129
	
130
	<bean id="gtr2PublicationsPlugin" class="eu.dnetlib.data.collector.plugins.gtr2.Gtr2PublicationsCollectorPlugin">
131
		<property name="protocolDescriptor">
132
			<bean class="eu.dnetlib.data.collector.rmi.ProtocolDescriptor" p:name="gtr2Publications">
133
				<property name="params">
134
				<list>
135
					<bean class="eu.dnetlib.data.collector.rmi.ProtocolParameter" p:name="startPage" p:optional="true" p:type="NUMBER"/>
136
					<bean class="eu.dnetlib.data.collector.rmi.ProtocolParameter" p:name="endPage" p:optional="true" p:type="NUMBER"/>
137
				</list>
138
				</property>
139
			</bean>
140
		</property>
141
	</bean>
129 142

  
130 143
	<bean id="HTTPWithFileNamePlugin" class="eu.dnetlib.data.collector.plugins.httpfilename.HTTPWithFileNameCollectorPlugin">
131 144
		<property name="protocolDescriptor">
modules/dnet-collector-plugins/trunk/src/test/java/eu/dnetlib/data/collector/plugins/gtr2/Gtr2ProjectsCollectorPluginTest.java
1
package eu.dnetlib.data.collector.plugins.gtr2;
2

  
3
import static org.junit.Assert.assertEquals;
4
import static org.junit.Assert.assertNotNull;
5

  
6
import java.util.Iterator;
7

  
8
import org.junit.Ignore;
9
import org.junit.Test;
10

  
11
import eu.dnetlib.miscutils.functional.xml.TryIndentXmlString;
12

  
13
public class Gtr2ProjectsCollectorPluginTest {
14

  
15
	private static final String baseURL = "https://gtr.ukri.org/gtr/api";
16

  
17
	private final Gtr2ProjectsCollectorPlugin plugin = new Gtr2ProjectsCollectorPlugin();
18

  
19
	@Test
20
	@Ignore
21
	public void testOne() throws Exception {
22
		System.out.println("one project");
23

  
24
		final Iterator<String> iterator = plugin.createIterator(baseURL, null, null, null);
25

  
26
		while (iterator.hasNext()) {
27
			final String res = iterator.next();
28
			assertNotNull(res);
29
			System.out.println(res);
30
			return;
31
		}
32
	}
33

  
34
	@Test
35
	@Ignore
36
	public void testPaging() throws Exception {
37
		final Iterator<String> iterator = plugin.createIterator(baseURL, null, "2", "2");
38

  
39
		final TryIndentXmlString indenter = new TryIndentXmlString();
40

  
41
		while (iterator.hasNext()) {
42
			Thread.sleep(300);
43
			final String res = iterator.next();
44
			assertNotNull(res);
45
			indenter.evaluate(res);
46
			System.out.println(res);
47
		}
48
	}
49

  
50
	@Test
51
	@Ignore
52
	public void testOnePage() throws Exception {
53
		final Iterator<String> iterator = plugin.createIterator(baseURL, null, "12", "12");
54
		final int count = iterateAndCount(iterator);
55
		assertEquals(21, count);
56
	}
57

  
58
	@Test
59
	@Ignore
60
	public void testIncrementalHarvestingNoRecords() throws Exception {
61
		System.out.println("incremental Harvesting");
62
		final Iterator<String> iterator = plugin.createIterator(baseURL, "2050-12-12", "11", "13");
63
		final int count = iterateAndCount(iterator);
64
		assertEquals(1, count);
65
	}
66

  
67
	@Test
68
	@Ignore
69
	public void testIncrementalHarvesting() throws Exception {
70
		System.out.println("incremental Harvesting");
71
		final Iterator<String> iterator = plugin.createIterator(baseURL, "2016-11-30", "11", "11");
72
		final int count = iterateAndCount(iterator);
73
		assertEquals(21, count);
74
	}
75

  
76
	@Test
77
	@Ignore
78
	public void testCompleteHarvesting() throws Exception {
79
		System.out.println("testing complete harvesting");
80
		final Iterator<String> iterator = plugin.createIterator(baseURL, null, null, null);
81
		// TryIndentXmlString indenter = new TryIndentXmlString();
82
		// it.setEndAtPage(3);
83

  
84
		while (iterator.hasNext()) {
85
			final String res = iterator.next();
86
			assertNotNull(res);
87
			// System.out.println(res);
88
			// Scanner keyboard = new Scanner(System.in);
89
			// System.out.println("press enter for next record");
90
			// keyboard.nextLine();
91

  
92
		}
93
	}
94

  
95
	private int iterateAndCount(final Iterator<String> iterator) throws Exception {
96
		int i = 0;
97
		while (iterator.hasNext()) {
98
			assertNotNull(iterator.next());
99
			i++;
100
		}
101
		System.out.println("Got " + i + " projects");
102
		return i;
103
	}
104
}
modules/dnet-collector-plugins/trunk/src/test/java/eu/dnetlib/data/collector/plugins/gtr2/Gtr2PublicationsCollectorPluginTest.java
1
package eu.dnetlib.data.collector.plugins.gtr2;
2

  
3
import static org.junit.Assert.assertEquals;
4
import static org.junit.Assert.assertNotNull;
5

  
6
import java.util.Iterator;
7

  
8
import org.junit.Ignore;
9
import org.junit.Test;
10

  
11
import eu.dnetlib.miscutils.functional.xml.TryIndentXmlString;
12

  
13
public class Gtr2PublicationsCollectorPluginTest {
14

  
15
	private static final String baseURL = "https://gtr.ukri.org/gtr/api";
16

  
17
	private final Gtr2PublicationsCollectorPlugin plugin = new Gtr2PublicationsCollectorPlugin();
18

  
19
	@Test
20
	@Ignore
21
	public void testOne() throws Exception {
22
		System.out.println("one publication");
23

  
24
		final Iterator<String> iterator = plugin.createIterator(baseURL, null, null, null);
25

  
26
		while (iterator.hasNext()) {
27
			final String res = iterator.next();
28
			assertNotNull(res);
29
			System.out.println(res);
30
			return;
31
		}
32
	}
33

  
34
	@Test
35
	@Ignore
36
	public void testPaging() throws Exception {
37
		final Iterator<String> iterator = plugin.createIterator(baseURL, null, "2", "2");
38

  
39
		final TryIndentXmlString indenter = new TryIndentXmlString();
40

  
41
		while (iterator.hasNext()) {
42
			Thread.sleep(300);
43
			final String res = iterator.next();
44
			assertNotNull(res);
45
			indenter.evaluate(res);
46
			System.out.println(res);
47
		}
48
	}
49

  
50
	@Test
51
	@Ignore
52
	public void testOnePage() throws Exception {
53
		final Iterator<String> iterator = plugin.createIterator(baseURL, null, "12", "12");
54
		final int count = iterateAndCount(iterator);
55
		assertEquals(21, count);
56
	}
57

  
58
	@Test
59
	@Ignore
60
	public void testIncrementalHarvestingNoRecords() throws Exception {
61
		System.out.println("incremental Harvesting");
62
		final Iterator<String> iterator = plugin.createIterator(baseURL, "2050-12-12", "11", "13");
63
		final int count = iterateAndCount(iterator);
64
		assertEquals(1, count);
65
	}
66

  
67
	@Test
68
	@Ignore
69
	public void testIncrementalHarvesting() throws Exception {
70
		System.out.println("incremental Harvesting");
71
		final Iterator<String> iterator = plugin.createIterator(baseURL, "2016-11-30", "11", "11");
72
		final int count = iterateAndCount(iterator);
73
		assertEquals(21, count);
74
	}
75

  
76
	@Test
77
	@Ignore
78
	public void testCompleteHarvesting() throws Exception {
79
		System.out.println("testing complete harvesting");
80
		final Iterator<String> iterator = plugin.createIterator(baseURL, null, null, null);
81
		// TryIndentXmlString indenter = new TryIndentXmlString();
82
		// it.setEndAtPage(3);
83

  
84
		while (iterator.hasNext()) {
85
			final String res = iterator.next();
86
			assertNotNull(res);
87
			// System.out.println(res);
88
			// Scanner keyboard = new Scanner(System.in);
89
			// System.out.println("press enter for next record");
90
			// keyboard.nextLine();
91

  
92
		}
93
	}
94

  
95
	private int iterateAndCount(final Iterator<String> iterator) throws Exception {
96
		int i = 0;
97
		while (iterator.hasNext()) {
98
			assertNotNull(iterator.next());
99
			i++;
100
		}
101
		System.out.println("Got " + i + " publications");
102
		return i;
103
	}
104
}
modules/dnet-collector-plugins/trunk/src/test/java/eu/dnetlib/data/collector/plugins/gtr2/VTDXMLTest.java
1
package eu.dnetlib.data.collector.plugins.gtr2;
2

  
3
import java.io.BufferedWriter;
4
import java.io.ByteArrayOutputStream;
5
import java.io.FileWriter;
6
import java.io.PrintWriter;
7

  
8
import com.ximpleware.AutoPilot;
9
import com.ximpleware.VTDGen;
10
import com.ximpleware.VTDNav;
11
import org.apache.commons.lang3.StringUtils;
12
import org.junit.Ignore;
13
import org.junit.Test;
14
@Ignore
15
public class VTDXMLTest {
16

  
17
	private VTDGen vg;
18
	private VTDNav vn;
19
	private AutoPilot ap;
20

  
21
	private VTDGen vg_tmp;
22
	private VTDNav vn_tmp;
23
	private AutoPilot ap_tmp;
24

  
25
	private PrintWriter writer;
26
	//TODO: use resource and not full path
27
	private String inputFilePath =
28
			"/Users/alessia/workspace/dnet/dnet-collector-plugins/src/test/resources/eu.dnetlib.data.collector.plugins.projects.gtr2/projects.xml";
29

  
30
	@Test
31
	public void test() throws Exception {
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff