Project

General

Profile

1
package eu.dnetlib.data.collector.plugins.projects.gtr2;
2

    
3
import java.io.ByteArrayOutputStream;
4
import java.io.StringWriter;
5
import java.util.concurrent.*;
6

    
7
import com.ximpleware.AutoPilot;
8
import com.ximpleware.VTDGen;
9
import com.ximpleware.VTDNav;
10
import eu.dnetlib.data.collector.plugins.HttpConnector;
11
import org.apache.commons.logging.Log;
12
import org.apache.commons.logging.LogFactory;
13
import org.apache.commons.lang3.*;
14

    
15
public class Gtr2Helper {
16

    
17
	private static final Log log = LogFactory.getLog(Gtr2Helper.class); // NOPMD by marko on 11/24/08 5:02 PM
18

    
19
	private VTDNav mainVTDNav;
20
	private AutoPilot mainAutoPilot;
21
	private StringWriter writer;
22
	private HttpConnector connector;
23
	//private BlockingQueue<String> fragment = new ArrayBlockingQueue<String>(20);
24

    
25
	public String processProject(final VTDNav vn, final String namespaces) throws Exception {
26
		//log.debug("Processing project at "+projectURL);
27
		writer = new StringWriter();
28
		mainVTDNav = vn;
29
		mainAutoPilot = new AutoPilot(mainVTDNav);
30
		writer.write("<doc " + namespaces + ">");
31
		writeFragment(mainVTDNav);
32

    
33
		mainAutoPilot.selectXPath("//link[@rel='FUND']");
34
		ExecutorService es = Executors.newFixedThreadPool(5);
35

    
36
		while (mainAutoPilot.evalXPath() != -1) {
37
			Thread t = new Thread(new ProcessFunder(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href"))));
38
			es.execute(t);
39
		}
40

    
41
		mainAutoPilot.resetXPath();
42
		mainAutoPilot.selectXPath(".//link[@rel='LEAD_ORG']");
43
		while (mainAutoPilot.evalXPath() != -1) {
44
			Thread t = new Thread(new Org(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href")),
45
					new String[] { "<ld-org>", "</ld-org>" }));
46
			es.execute(t);
47
		}
48
		mainAutoPilot.resetXPath();
49
		mainAutoPilot.selectXPath(".//link[@rel='PP_ORG']");
50
		while (mainAutoPilot.evalXPath() != -1) {
51
			Thread t = new Thread(new Org(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href")),
52
					new String[] { "<pp-org>","</pp-org>" }));
53
			es.execute(t);
54
		}
55
		mainAutoPilot.resetXPath();
56

    
57
		mainAutoPilot.selectXPath(".//link[@rel='PI_PER']");
58
		while (mainAutoPilot.evalXPath() != -1) {
59
			Thread t = new Thread(new PiPer(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href"))));
60
			es.execute(t);
61
		}
62
		es.shutdown();
63
		log.debug("Waiting threads");
64
		es.awaitTermination(10, TimeUnit.MINUTES);
65

    
66
		log.debug("Finished writing project");
67
		writer.write("</doc>");
68
		writer.close();
69

    
70
		return writer.toString();
71
	}
72

    
73
	private VTDNav setNavigator(final String httpUrl) {
74
		VTDGen vg_tmp = new VTDGen();
75
		connector = new HttpConnector();
76
		try {
77
			byte[] bytes = connector.getInputSource(httpUrl).getBytes("UTF-8");
78
			vg_tmp.setDoc(bytes);
79
			vg_tmp.parse(false);
80
			//vg_tmp.parseHttpUrl(httpUrl, false);
81
			return vg_tmp.getNav();
82
		}catch (Throwable e){
83
			return null;
84
		}
85
	}
86

    
87
	private int evalXpath(final VTDNav fragmentVTDNav, final String xPath) throws Exception {
88

    
89
		AutoPilot ap_tmp = new AutoPilot(fragmentVTDNav);
90
		ap_tmp.selectXPath(xPath);
91
		return ap_tmp.evalXPath();
92
	}
93

    
94
	private void writeFragment(final VTDNav nav) throws Exception {
95
		ByteArrayOutputStream b = new ByteArrayOutputStream();
96
		nav.dumpFragment(b);
97
		String ret = b.toString();
98
		b.reset();
99
		writer.write(ret);
100
	}
101

    
102
	private void writeNewTagAndInfo(final VTDNav vn, final String xPath, final String xmlOpenTag, final String xmlCloseTag, final String attrName) throws Exception {
103

    
104
		int nav_res = evalXpath(vn, xPath);
105
		if (nav_res != -1) {
106
			String tmp = xmlOpenTag;
107
			if (attrName != null) tmp += (vn.toNormalizedString(vn.getAttrVal(attrName)));
108
			else
109
				tmp += (StringEscapeUtils.escapeXml11(vn.toNormalizedString(vn.getText())));
110
			tmp += (xmlCloseTag);
111
			writer.write(tmp);
112
		}
113
	}
114

    
115
	private class PiPer implements Runnable {
116

    
117
		private VTDNav vn;
118

    
119
		public PiPer(String httpURL) {
120
			vn = setNavigator(httpURL);
121
		}
122

    
123
		@Override
124
		public void run() {
125
			try {
126
				writeFragment(vn);
127
			} catch (Throwable e) {log.debug("Eccezione in  PiPer " + e.getMessage());}
128

    
129
		}
130
	}
131

    
132
	private class Org implements Runnable {
133

    
134
		private String[] tags;
135
		private VTDNav vn;
136

    
137
		public Org(final String httpURL, final String[] tags) {
138
			vn = setNavigator(httpURL);
139
			this.tags = tags;
140
		}
141

    
142
		@Override
143
		public void run() {
144
			try {
145
				writeNewTagAndInfo(vn, "//name", tags[0]+"<name>", "</name>", null);
146
				vn.toElement(VTDNav.ROOT);
147
				writeNewTagAndInfo(vn, "//country", "<country>", "</country>", null);
148
				vn.toElement(VTDNav.ROOT);
149
				writeNewTagAndInfo(vn, ".", "<id>", "</id>"+tags[1], "id");
150
			} catch (Throwable e) {
151
				log.debug("Eccezione in  Org " + e.getMessage());
152
			}
153
		}
154

    
155
	}
156

    
157
	private class ProcessFunder implements Runnable {
158

    
159
		private VTDNav vn;
160

    
161
		public ProcessFunder(final String httpURL) {
162
			vn = setNavigator(httpURL);
163
		}
164

    
165
		@Override
166
		public void run() {
167

    
168
			try {
169
				AutoPilot ap = new AutoPilot(vn);
170
				writeFragment(vn);
171
				ap.selectXPath(".//link[@rel='FUNDER']");
172
				VTDNav tmp_vn;
173
				while (ap.evalXPath() != -1) {
174
					tmp_vn = setNavigator(vn.toNormalizedString(vn.getAttrVal("href")));
175
					writeNewTagAndInfo(tmp_vn, "//name", "<funder> <name>", "</name></funder>", null);
176
				}
177
			} catch (Throwable e) {log.debug("Eccezione in Funder" + e.getMessage());}
178
		}
179

    
180
	}
181
}
(2-2/3)