Project

General

Profile

1
package eu.dnetlib.data.collector.plugins.projects.gtr2;
2

    
3
import java.io.ByteArrayOutputStream;
4
import java.io.StringWriter;
5
import java.util.concurrent.*;
6

    
7
import com.ximpleware.AutoPilot;
8
import com.ximpleware.VTDGen;
9
import com.ximpleware.VTDNav;
10
import org.apache.commons.logging.Log;
11
import org.apache.commons.logging.LogFactory;
12
import org.apache.commons.lang3.*;
13

    
14
public class Gtr2Helper {
15

    
16
	private static final Log log = LogFactory.getLog(Gtr2Helper.class); // NOPMD by marko on 11/24/08 5:02 PM
17

    
18
	private VTDNav mainVTDNav;
19
	private AutoPilot mainAutoPilot;
20
	private StringWriter writer;
21
	//private BlockingQueue<String> fragment = new ArrayBlockingQueue<String>(20);
22

    
23
	public String processProject(final VTDNav vn, final String namespaces) throws Exception {
24
		//log.debug("Processing project at "+projectURL);
25
		writer = new StringWriter();
26
		mainVTDNav = vn;
27
		mainAutoPilot = new AutoPilot(mainVTDNav);
28
		writer.write("<doc " + namespaces + ">");
29
		writeFragment(mainVTDNav);
30

    
31
		mainAutoPilot.selectXPath("//link[@rel='FUND']");
32
		ExecutorService es = Executors.newFixedThreadPool(5);
33

    
34
		while (mainAutoPilot.evalXPath() != -1) {
35
			Thread t = new Thread(new ProcessFunder(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href"))));
36
			es.execute(t);
37
		}
38

    
39
		mainAutoPilot.resetXPath();
40
		mainAutoPilot.selectXPath(".//link[@rel='LEAD_ORG']");
41
		while (mainAutoPilot.evalXPath() != -1) {
42
			Thread t = new Thread(new Org(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href")),
43
					new String[] { "<ld-org>", "</ld-org>" }));
44
			es.execute(t);
45
		}
46
		mainAutoPilot.resetXPath();
47
		mainAutoPilot.selectXPath(".//link[@rel='PP_ORG']");
48
		while (mainAutoPilot.evalXPath() != -1) {
49
			Thread t = new Thread(new Org(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href")),
50
					new String[] { "<pp-org>","</pp-org>" }));
51
			es.execute(t);
52
		}
53
		mainAutoPilot.resetXPath();
54

    
55
		mainAutoPilot.selectXPath(".//link[@rel='PI_PER']");
56
		while (mainAutoPilot.evalXPath() != -1) {
57
			Thread t = new Thread(new PiPer(mainVTDNav.toNormalizedString(mainVTDNav.getAttrVal("href"))));
58
			es.execute(t);
59
		}
60
		es.shutdown();
61
		log.debug("Waiting threads");
62
		es.awaitTermination(10, TimeUnit.MINUTES);
63

    
64
		log.debug("Finished writing project");
65
		writer.write("</doc>");
66
		writer.close();
67

    
68
		return writer.toString();
69
	}
70

    
71
	private VTDNav setNavigator(final String httpUrl) {
72
		VTDGen vg_tmp = new VTDGen();
73
		vg_tmp.parseHttpUrl(httpUrl, false);
74
		return vg_tmp.getNav();
75
	}
76

    
77
	private int evalXpath(final VTDNav fragmentVTDNav, final String xPath) throws Exception {
78

    
79
		AutoPilot ap_tmp = new AutoPilot(fragmentVTDNav);
80
		ap_tmp.selectXPath(xPath);
81
		return ap_tmp.evalXPath();
82
	}
83

    
84
	private void writeFragment(final VTDNav nav) throws Exception {
85
		ByteArrayOutputStream b = new ByteArrayOutputStream();
86
		nav.dumpFragment(b);
87
		String ret = b.toString();
88
		b.reset();
89
		writer.write(ret);
90
	}
91

    
92
	private void writeNewTagAndInfo(final VTDNav vn, final String xPath, final String xmlOpenTag, final String xmlCloseTag, final String attrName) throws Exception {
93

    
94
		int nav_res = evalXpath(vn, xPath);
95
		if (nav_res != -1) {
96
			String tmp = xmlOpenTag;
97
			if (attrName != null) tmp += (vn.toNormalizedString(vn.getAttrVal(attrName)));
98
			else
99
				tmp += (StringEscapeUtils.escapeXml11(vn.toNormalizedString(vn.getText())));
100
			tmp += (xmlCloseTag);
101
			writer.write(tmp);
102
		}
103
	}
104

    
105
	private class PiPer implements Runnable {
106

    
107
		private VTDNav vn;
108

    
109
		public PiPer(String httpURL) {
110
			vn = setNavigator(httpURL);
111
		}
112

    
113
		@Override
114
		public void run() {
115
			try {
116
				writeFragment(vn);
117
			} catch (Throwable e) {log.debug("Eccezione in  PiPer " + e.getMessage());}
118

    
119
		}
120
	}
121

    
122
	private class Org implements Runnable {
123

    
124
		private String[] tags;
125
		private VTDNav vn;
126

    
127
		public Org(final String httpURL, final String[] tags) {
128
			vn = setNavigator(httpURL);
129
			this.tags = tags;
130
		}
131

    
132
		@Override
133
		public void run() {
134
			try {
135
				writeNewTagAndInfo(vn, "//name", tags[0]+"<name>", "</name>", null);
136
				vn.toElement(VTDNav.ROOT);
137
				writeNewTagAndInfo(vn, "//country", "<country>", "</country>", null);
138
				vn.toElement(VTDNav.ROOT);
139
				writeNewTagAndInfo(vn, ".", "<id>", "</id>"+tags[1], "id");
140
			} catch (Throwable e) {
141
				log.debug("Eccezione in  Org " + e.getMessage());
142
			}
143
		}
144

    
145
	}
146

    
147
	private class ProcessFunder implements Runnable {
148

    
149
		private VTDNav vn;
150

    
151
		public ProcessFunder(final String httpURL) {
152
			vn = setNavigator(httpURL);
153
		}
154

    
155
		@Override
156
		public void run() {
157

    
158
			try {
159
				AutoPilot ap = new AutoPilot(vn);
160
				writeFragment(vn);
161
				ap.selectXPath(".//link[@rel='FUNDER']");
162
				VTDNav tmp_vn;
163
				while (ap.evalXPath() != -1) {
164
					tmp_vn = setNavigator(vn.toNormalizedString(vn.getAttrVal("href")));
165
					writeNewTagAndInfo(tmp_vn, "//name", "<funder> <name>", "</name></funder>", null);
166
				}
167
			} catch (Throwable e) {log.debug("Eccezione in Funder" + e.getMessage());}
168
		}
169

    
170
	}
171
}
(2-2/3)