Project

General

Profile

1
package eu.dnetlib.data.collector.plugins.projects.gtr2;
2

    
3
import java.io.ByteArrayOutputStream;
4
import java.io.StringWriter;
5
import java.util.concurrent.*;
6

    
7
import com.ximpleware.AutoPilot;
8
import com.ximpleware.VTDGen;
9
import com.ximpleware.VTDNav;
10
import eu.dnetlib.data.collector.plugins.HttpConnector;
11
import org.apache.commons.logging.Log;
12
import org.apache.commons.logging.LogFactory;
13
import org.apache.commons.lang3.*;
14

    
15
public class Gtr2Helper {
16

    
17
	private static final Log log = LogFactory.getLog(Gtr2Helper.class); // NOPMD by marko on 11/24/08 5:02 PM
18

    
19
	private VTDNav mainVTDNav;
20
	private AutoPilot mainAutoPilot;
21
	private StringWriter writer;
22
	private HttpConnector connector;
23
	//private BlockingQueue<String> fragment = new ArrayBlockingQueue<String>(20);
24

    
25
	public String processProject(final VTDNav vn, final String namespaces) throws Exception {
26
		//log.debug("Processing project at "+projectURL);
27
		writer = new StringWriter();
28
		mainVTDNav = vn;
29
		mainAutoPilot = new AutoPilot(mainVTDNav);
30
		writer.write("<doc " + namespaces + ">");
31
		writeFragment(mainVTDNav);
32

    
33
		mainAutoPilot.selectXPath("//link[@rel='FUND']");
34
		ExecutorService es = Executors.newFixedThreadPool(5);
35

    
36
		while (mainAutoPilot.evalXPath() != -1) {
37
			Thread t = new Thread(new ProcessFunder(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href"))));
38
			es.execute(t);
39
		}
40

    
41
		mainAutoPilot.resetXPath();
42
		mainAutoPilot.selectXPath(".//link[@rel='LEAD_ORG']");
43
		while (mainAutoPilot.evalXPath() != -1) {
44
			Thread t = new Thread(new Org(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href")),
45
					new String[] { "<ld-org>", "</ld-org>" }));
46
			es.execute(t);
47
		}
48
		mainAutoPilot.resetXPath();
49
		mainAutoPilot.selectXPath(".//link[@rel='PP_ORG']");
50
		while (mainAutoPilot.evalXPath() != -1) {
51
			Thread t = new Thread(new Org(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href")),
52
					new String[] { "<pp-org>","</pp-org>" }));
53
			es.execute(t);
54
		}
55
		mainAutoPilot.resetXPath();
56

    
57
		mainAutoPilot.selectXPath(".//link[@rel='PI_PER']");
58
		while (mainAutoPilot.evalXPath() != -1) {
59
			Thread t = new Thread(new PiPer(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href"))));
60
			es.execute(t);
61
		}
62
		es.shutdown();
63
		log.debug("Waiting threads");
64
		es.awaitTermination(10, TimeUnit.MINUTES);
65

    
66
		log.debug("Finished writing project");
67
		writer.write("</doc>");
68
		writer.close();
69

    
70
		return writer.toString();
71
	}
72

    
73
	private VTDNav setNavigator( String httpUrl) {
74
		if(httpUrl.contains("gtr.gtr"))
75
			httpUrl = httpUrl.replace("gtr.gtr", "gtr");
76
		VTDGen vg_tmp = new VTDGen();
77
		connector = new HttpConnector();
78
		try {
79
			byte[] bytes = connector.getInputSource(httpUrl).getBytes("UTF-8");
80
			vg_tmp.setDoc(bytes);
81
			vg_tmp.parse(false);
82
			//vg_tmp.parseHttpUrl(httpUrl, false);
83
			return vg_tmp.getNav();
84
		}catch (Throwable e){
85
			return null;
86
		}
87
	}
88

    
89
	private int evalXpath(final VTDNav fragmentVTDNav, final String xPath) throws Exception {
90

    
91
		AutoPilot ap_tmp = new AutoPilot(fragmentVTDNav);
92
		ap_tmp.selectXPath(xPath);
93
		return ap_tmp.evalXPath();
94
	}
95

    
96
	private void writeFragment(final VTDNav nav) throws Exception {
97
		ByteArrayOutputStream b = new ByteArrayOutputStream();
98
		nav.dumpFragment(b);
99
		String ret = b.toString();
100
		b.reset();
101
		writer.write(ret);
102
	}
103

    
104
	private void writeNewTagAndInfo(final VTDNav vn, final String xPath, final String xmlOpenTag, final String xmlCloseTag, final String attrName) throws Exception {
105

    
106
		int nav_res = evalXpath(vn, xPath);
107
		if (nav_res != -1) {
108
			String tmp = xmlOpenTag;
109
			if (attrName != null) tmp += (vn.toNormalizedString(vn.getAttrVal(attrName)));
110
			else
111
				tmp += (StringEscapeUtils.escapeXml11(vn.toNormalizedString(vn.getText())));
112
			tmp += (xmlCloseTag);
113
			writer.write(tmp);
114
		}
115
	}
116

    
117
	private class PiPer implements Runnable {
118

    
119
		private VTDNav vn;
120

    
121
		public PiPer(String httpURL) {
122
			if(httpURL.contains("gtr.gtr"))
123
				httpURL = httpURL.replace("gtr.gtr","gtr");
124
			vn = setNavigator(httpURL);
125
		}
126

    
127
		@Override
128
		public void run() {
129
			try {
130
				writeFragment(vn);
131
			} catch (Throwable e) {log.debug("Exception in  PiPer " + e.getMessage());}
132

    
133
		}
134
	}
135

    
136
	private class Org implements Runnable {
137

    
138
		private String[] tags;
139
		private VTDNav vn;
140

    
141
		public Org(String httpURL, final String[] tags) {
142
			if(httpURL.contains("gtr.gtr"))
143
				httpURL = httpURL.replace("gtr.gtr","gtr");
144
			vn = setNavigator(httpURL);
145
			this.tags = tags;
146
		}
147

    
148
		@Override
149
		public void run() {
150
			try {
151
				writeNewTagAndInfo(vn, "//name", tags[0]+"<name>", "</name>", null);
152
				vn.toElement(VTDNav.ROOT);
153
				writeNewTagAndInfo(vn, "//country", "<country>", "</country>", null);
154
				vn.toElement(VTDNav.ROOT);
155
				writeNewTagAndInfo(vn, ".", "<id>", "</id>"+tags[1], "id");
156
			} catch (Throwable e) {
157
				log.debug("Exception in  Org " + e.getMessage());
158
			}
159
		}
160

    
161
	}
162

    
163
	private class ProcessFunder implements Runnable {
164

    
165
		private VTDNav vn;
166

    
167
		public ProcessFunder(String httpURL) {
168
			if(httpURL.contains("gtr.gtr"))
169
				httpURL = httpURL.replace("gtr.gtr", "gtr");
170
			vn = setNavigator(httpURL);
171
		}
172

    
173
		@Override
174
		public void run() {
175

    
176
			try {
177
				AutoPilot ap = new AutoPilot(vn);
178
				writeFragment(vn);
179
				ap.selectXPath(".//link[@rel='FUNDER']");
180
				VTDNav tmp_vn;
181
				while (ap.evalXPath() != -1) {
182
					tmp_vn = setNavigator(vn.toNormalizedString(vn.getAttrVal("href")));
183
					writeNewTagAndInfo(tmp_vn, "//name", "<funder> <name>", "</name></funder>", null);
184
				}
185
			} catch (Throwable e) {log.debug("Eccezione in Funder" + e.getMessage());}
186
		}
187

    
188
	}
189
}
(2-2/3)