Project

General

Profile

1
package eu.dnetlib.msro.openaireplus.workflows.nodes.contexts;
2

    
3
import java.io.IOException;
4
import java.util.*;
5

    
6
import com.google.common.collect.Maps;
7
import com.ximpleware.AutoPilot;
8
import com.ximpleware.VTDException;
9
import com.ximpleware.VTDGen;
10
import com.ximpleware.VTDNav;
11
import org.apache.commons.logging.Log;
12
import org.apache.commons.logging.LogFactory;
13
import org.dom4j.Document;
14
import org.dom4j.DocumentHelper;
15
import org.dom4j.Element;
16
import org.joda.time.DateTime;
17

    
18
import static eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions.md5;
19

    
20
public class BuildH2020FETTaxonomy {
21

    
22
	private static final Log log = LogFactory.getLog(BuildH2020FETTaxonomy.class);
23

    
24
	private Iterator<String> projects;
25
	private Map<String, Map<String, Map<String, List<Info>>>> dic;
26

    
27
	public BuildH2020FETTaxonomy() {
28
		dic = Maps.newHashMap();
29
	}
30

    
31
	public void setIterator(Iterator<String> projects) {
32
		this.projects = projects;
33
	}
34

    
35
	public String parseProjects() throws Exception {
36
		while (projects.hasNext())
37
			parse(projects.next());
38
		return getTaxonomy();
39
	}
40

    
41
	public String getTaxonomy() throws Exception {
42
		Document taxonomy = DocumentHelper.createDocument();
43
		Element root1 = taxonomy.addElement("RESOURCE_PROFILE");
44
		createHeader(root1);
45
		Element body = root1.addElement("BODY");
46
		Element configurationElement = body.addElement("CONFIGURATION");
47
		createContext(configurationElement);
48
		body.addElement("STATUS");
49
		body.addElement("SECURITY_PARAMETERS");
50
		return taxonomy.getDocument().asXML();
51

    
52
	}
53

    
54
	private void createContext(final Element configurationElement) {
55
		Element context = configurationElement.addElement("context");
56
		context.addAttribute("type", "community");
57
		context.addAttribute("id", "fet-h2020");
58
		context.addAttribute("label", "FET H2020");
59
		for (Map.Entry<String, Map<String, Map<String, List<Info>>>> entry : dic.entrySet()) {
60
			String type = entry.getKey();
61
			Element cat = context.addElement("category");
62
			String catid = "fet-h2020::" + type.toLowerCase();
63
			cat.addAttribute("id", catid);
64
			cat.addAttribute("label", "FET " + type);
65
			cat.addAttribute("claim", "false");
66
			//int count = 0;
67
			for (Map.Entry<String, Map<String, List<Info>>> entryst : dic.get(type).entrySet()) {
68
				String subtype = entryst.getKey();
69
				for (Map.Entry<String, List<Info>> entryp : dic.get(type).get(subtype).entrySet()) {
70
					String opt = entryp.getKey();
71
			//		count++;
72
					Element subcat = cat.addElement("concept");
73
					String proj_md5 = md5(opt).substring(0,12);
74
					//String subcatid = catid + "::" + count;
75
					String subcatid = catid + "::" + proj_md5;
76
					subcat.addAttribute("id", subcatid);
77
					Element pa = subcat.addElement("param");
78
					pa.addAttribute("name", "CALL_ID");
79
					List<Info> projectRows = dic.get(type).get(subtype).get(opt);
80
					if (!projectRows.isEmpty()) {
81
						for (Info row : projectRows) {
82
							createProjectConcept(row, subcat, subcatid);
83
						}
84
						subcat.addAttribute("label", projectRows.get(0).getCallname());
85
						subcat.addAttribute("claim", "false");
86
						pa.setText(projectRows.get(0).getCallID());
87
					} else throw new RuntimeException("The lis of projects for " + subcatid + " is empty. This exception should never be thrown.");
88
				}
89
			}
90
		}
91
	}
92

    
93
	private void createProjectConcept(Info row, Element father, String prefix) {
94
		String code = row.getCode();
95
		String acronym = row.getAcronym();
96
		String title = row.getTitle();
97
		String projcallid = row.getProjcallid();
98
		Element concept = father.addElement("concept");
99
		concept.addAttribute("id", prefix + "::" + code);
100
		concept.addAttribute("label", title);
101
		concept.addAttribute("claim", "true");
102
		Element p = concept.addElement("param");
103
		p.addAttribute("name", "CD_PROJ_ID");
104
		p.setText(code);
105
		p = concept.addElement("param");
106
		p.addAttribute("name", "CD_CALL_ID");
107
		p.setText(projcallid);
108
		p = concept.addElement("param");
109
		p.addAttribute("name", "CD_PROJECT_NUMBER");
110
		p.setText(code);
111
		p = concept.addElement("param");
112
		p.addAttribute("name", "CD_ACRONYM");
113
		p.setText(acronym);
114
		p = concept.addElement("param");
115
		p.addAttribute("name", "CD_FRAMEWORK");
116
		p.setText("H2020");
117
		p = concept.addElement("param");
118
		p.addAttribute("name", "funder");
119
		p.setText("EC");
120
	}
121

    
122
	private void createHeader(final Element profile) {
123
		Element header = profile.addElement("HEADER");
124
		Element rs = header.addElement("RESOURCE_IDENTIFIER");
125
		rs.addAttribute("value", "");
126
		Element rt = header.addElement("RESOURCE_TYPE");
127
		rt.addAttribute("value", "ContextDSResourceType");
128
		Element rk = header.addElement("RESOURCE_KIND");
129
		rk.addAttribute("value", "ContextDSResources");
130
		Element ru = header.addElement("RESOURCE_URI");
131
		ru.addAttribute("value", "");
132
		Element daoc = header.addElement("DATE_OF_CREATION");
133
		daoc.addAttribute("value", DateTime.now().toString());
134
	}
135

    
136
	private boolean parse(String project) throws VTDException, IOException {
137
		boolean fet = false;
138
		String callid = "";
139
		String projectid = "";
140
		VTDGen vg = new VTDGen();
141
		vg.setDoc(project.getBytes("UTF-8"));
142
		vg.parse(false);
143
		VTDNav vn = vg.getNav();
144
		AutoPilot ap = new AutoPilot(vn);
145
		ap.selectXPath("//metadata/ROWS/ROW[@table=\"projects\"]");
146

    
147
		while (ap.evalXPath() != -1) {
148
			AutoPilot ap1 = new AutoPilot(vn);
149
			ap1.selectXPath("./FIELD[@name=\"call_identifier\"]");
150
			if (ap1.evalXPath() != -1)
151
				callid = vn.toNormalizedString(vn.getText());
152
			ap1.clearVariableExprs();
153
			vn.toElement(VTDNav.PARENT);
154
			ap1.selectXPath("./FIELD[@name=\"optional1\"]");
155
			if (ap1.evalXPath() != -1)
156
				projectid = vn.toNormalizedString(vn.getText());
157
			if (callid.contains("FET")) {
158
				String type;
159
				if (callid.contains("OPEN"))
160
					type = "OPEN";
161
				else if (callid.contains("PROACT"))
162
					type = "PROACT";
163
				else if (callid.contains("HPC"))
164
					type = "HPC";
165
				else
166
					type = "FLAG";
167
				vn.toElement(VTDNav.ROOT);
168
				fet = true;
169
				insert(type, callid, projectid, new Info(vn));
170
			}
171
		}
172
		return fet;
173
	}
174

    
175
	private void insert(String type, String callid, String projcallid, Info row) {
176
		Map<String, Map<String, List<Info>>> dopen;
177
		Map<String, List<Info>> entry;
178
		List<Info> projects;
179
		if (dic.containsKey(type)) {
180
			dopen = dic.get(type);
181
			if (dopen.containsKey(callid)) {
182
				entry = dopen.get(callid);
183
				if (entry.containsKey(projcallid)) {
184
					entry.get(projcallid).add(row);
185
				} else {
186
					projects = new ArrayList<>();
187
					projects.add(row);
188
					entry.put(projcallid, projects);
189
				}
190
			} else {
191
				projects = new ArrayList<>();
192
				entry = new HashMap<>();
193
				projects.add(row);
194
				entry.put(projcallid, projects);
195
				dopen.put(callid, entry);
196
			}
197
		} else {
198
			projects = new ArrayList<>();
199
			entry = new HashMap<>();
200
			dopen = new HashMap<>();
201
			projects.add(row);
202
			entry.put(projcallid, projects);
203
			dopen.put(callid, entry);
204
			dic.put(type, dopen);
205
		}
206
	}
207

    
208
	public void parseFETProject(int projects_number) throws VTDException, IOException {
209
		int parsed = 0;
210
		while (projects.hasNext() && parsed < projects_number) {
211
			if (parse(projects.next()))
212
				parsed++;
213
		}
214
		log.debug(parsed);
215
	}
216

    
217
	private class Info {
218

    
219
		VTDNav vn;
220
		private String code, acronym, title, call_identifier, projcallid, callname;
221

    
222
		public String getCode() {
223
			return code;
224
		}
225

    
226
		public String getAcronym() {
227
			return acronym;
228
		}
229

    
230
		public String getTitle() {
231
			return title;
232
		}
233

    
234
		public String getCallID() {
235
			return call_identifier;
236
		}
237

    
238
		public String getProjcallid() {
239
			return projcallid;
240
		}
241

    
242
		public String getCallname() {
243
			return callname;
244
		}
245

    
246
		public Info(VTDNav vn) {
247
			this.vn = vn;
248
			parse();
249
		}
250

    
251
		private void parse() {
252
			try {
253
				AutoPilot ap = new AutoPilot(vn);
254
				ap.selectXPath("//FIELD");
255
				while (ap.evalXPath() != -1) {
256
					String aname = vn.toNormalizedString(vn.getAttrVal("name"));
257
					String text = "";
258
					if (vn.getText() != -1)
259
						text = vn.toNormalizedString(vn.getText());
260
					if (aname.equals("code"))
261
						code = text;
262
					else if (aname.equals("acronym"))
263
						acronym = text;
264
					else if (aname.equals("title"))
265
						title = text;
266
					else if (aname.equals("call_identifier"))
267
						call_identifier = text;
268
					else if (aname.equals("optional1"))
269
						projcallid = text;
270
					else if (aname.equals("optional2"))
271
						callname = text;
272
				}
273
			} catch (Exception e) {
274
			}
275
		}
276

    
277
	}
278

    
279
}
(1-1/8)