Project

General

Profile

1
package eu.dnetlib.data.collective.transformation.core.xsl.ext;
2

    
3
import java.io.StringReader;
4
import java.io.StringWriter;
5
import java.util.Collection;
6
import java.util.HashMap;
7
import java.util.Iterator;
8
import java.util.LinkedList;
9
import java.util.List;
10
import java.util.Map;
11

    
12
import javax.xml.XMLConstants;
13
import javax.xml.namespace.NamespaceContext;
14
import javax.xml.parsers.DocumentBuilder;
15
import javax.xml.parsers.DocumentBuilderFactory;
16
import javax.xml.transform.Result;
17
import javax.xml.transform.Transformer;
18
import javax.xml.transform.TransformerFactory;
19
import javax.xml.transform.dom.DOMSource;
20
import javax.xml.transform.stream.StreamResult;
21
import javax.xml.xpath.XPath;
22
import javax.xml.xpath.XPathFactory;
23

    
24
import org.apache.commons.text.StringEscapeUtils;
25
import org.apache.commons.logging.Log;
26
import org.apache.commons.logging.LogFactory;
27
import org.svenson.JSONParser;
28
import org.w3c.dom.Document;
29
import org.w3c.dom.Node;
30
import org.w3c.dom.NodeList;
31
import org.w3c.dom.Text;
32
import org.xml.sax.InputSource;
33

    
34
import eu.dnetlib.data.collective.transformation.engine.FunctionResults;
35
import eu.dnetlib.data.collective.transformation.engine.functions.Convert;
36
import eu.dnetlib.data.collective.transformation.engine.functions.IdentifierExtract;
37
import eu.dnetlib.data.collective.transformation.engine.functions.LookupRecord;
38
import eu.dnetlib.data.collective.transformation.engine.functions.ProcessingException;
39
import eu.dnetlib.data.collective.transformation.engine.functions.RegularExpression;
40
import eu.dnetlib.data.collective.transformation.engine.functions.Split;
41

    
42
/**
43
 * The class implements external XSLT functions
44
 * @author jochen
45
 *
46
 */
47
public class TransformationFunctionProxy {
48

    
49
	@SuppressWarnings("unused")
50
	private static final Log log = LogFactory.getLog(TransformationFunctionProxy.class);
51
	private static TransformationFunctionProxy tf;
52
	private RegularExpression regExprFunction = new RegularExpression();
53
	private Convert convertFunction;
54
	private IdentifierExtract identifierExtractFunction = new IdentifierExtract();
55
	private static DocumentBuilder docBuilder;
56
	private static Transformer transformer;
57
	private Split split = new Split();
58
	private Map<String, FunctionResults> mapOfResults = new HashMap<String, FunctionResults>();
59
	private LookupRecord lookupRecord;
60
	private static XPath xpath = XPathFactory.newInstance().newXPath();
61
	
62
	/**
63
	 * @return the transformationFunctionProxy instance
64
	 */
65
	public static TransformationFunctionProxy getInstance(){
66
		if ( tf == null ){
67
			tf = new TransformationFunctionProxy();
68
			try {
69
				DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
70
				dbf.setNamespaceAware(true);
71
				docBuilder = dbf.newDocumentBuilder();
72
				transformer  = TransformerFactory.newInstance().newTransformer();
73
				xpath.setNamespaceContext(new NamespaceContext() {
74
					
75
					@Override
76
					public Iterator getPrefixes(String namespaceURI) {
77
						// TODO Auto-generated method stub
78
						return null;
79
					}
80
					
81
					@Override
82
					public String getPrefix(String namespaceURI) {
83
						// TODO Auto-generated method stub
84
						return null;
85
					}
86
					
87
					@Override
88
					public String getNamespaceURI(String aPrefix) {
89
						if (aPrefix == null){
90
							throw new IllegalArgumentException("No prefix provided!");
91
						}else if (aPrefix.equals(XMLConstants.DEFAULT_NS_PREFIX)){
92
							return "http://namespace.openaire.eu";
93
						}else if (aPrefix.equals("dc")){
94
							return "http://purl.org/dc/elements/1.1/";
95
						}else{
96
							return XMLConstants.NULL_NS_URI;
97
						}
98
					}
99
				});
100

    
101
			} catch (Exception e) {
102
				log.fatal("error while instantiating DocumentBuilderFactory, Transfomer, Xpath.namespacecontext", e);
103
				throw new IllegalStateException(e);
104
			}
105
		}
106
		return tf;
107
	}
108
	
109
	/**
110
	 * @param uniqueKey
111
	 * @param i
112
	 * @return
113
	 */
114
	public String getValue(String uniqueKey, int i){
115
		if ( !mapOfResults.containsKey(uniqueKey)){
116
			throw new IllegalStateException("unknown key: " + uniqueKey);
117
		}
118
		return mapOfResults.get(uniqueKey).get(i);
119
	}
120
	
121
	/**
122
	 * @param uniqueKey
123
	 * @param i
124
	 * @return
125
	 * @deprecated
126
	 */
127
//	public String convert(String uniqueKey, int i){
128
//		if (mapOfResults == null){
129
//			return "transformationFunctionProxy_convert not initialized";
130
//		}else{
131
//			if (!mapOfResults.containsKey(uniqueKey)){
132
//				throw new IllegalStateException("unknown key: " + uniqueKey);
133
//			}
134
//			return mapOfResults.get(uniqueKey).get(i);			
135
//		}
136
//	}
137
	
138
	/**
139
	 * @param uniqueKey
140
	 * @param i
141
	 * @param aPos
142
	 * @return
143
	 * @deprecated
144
	 */
145
//	public String convert(String uniqueKey, int i, int aPos){
146
//		if (mapOfResults == null){
147
//			return "transformationFunctionProxy_convert not initialized";
148
//		}else{
149
//			if (!mapOfResults.containsKey(uniqueKey)){
150
//				throw new IllegalStateException("unknown key: " + uniqueKey);
151
//			}
152
//			return mapOfResults.get(uniqueKey).get(i, aPos);
153
//		}
154
//	}
155
	
156
	/**
157
	 * @param uniqueKey
158
	 * @param i
159
	 * @return
160
	 */
161
	public String extract(String uniqueKey, int i){
162
		if (mapOfResults == null){
163
			return "transformationFunctionProxy_extract not initialized";
164
		}else{
165
			if (!mapOfResults.containsKey(uniqueKey)){
166
				throw new IllegalStateException("unknown key: " + uniqueKey);
167
			}
168
			return mapOfResults.get(uniqueKey).get(i);			
169
		}		
170
	}
171
	
172
	/**
173
 	 * normalize values given as an input value by using a vocabulary 
174
	 * @param aInput - the value as a String
175
	 * @param aVocabularyName - the name of the vocabulary, which must be known for the vocabulary registry
176
	 * @return
177
	 */
178
	public synchronized String convertString(String aInput, String aVocabularyName){
179
		List<String> values = new LinkedList<String>();
180
		values.add(aInput);
181
		try {
182
			log.debug("conversion input: " + aInput);
183
			String conversionResult = convertFunction.executeSingleValue(aVocabularyName, values);
184
			log.debug("conversion result: " + conversionResult);
185
			return conversionResult;
186
		} catch (ProcessingException e) {
187
			log.fatal("convert failed for args 'input': " + aInput + " , 'vocabularyName': " + aVocabularyName, e);
188
			throw new IllegalStateException(e);
189
		}
190
	}
191

    
192
	/**
193
	 * normalize values given as a NodeList by using a vocabulary 
194
	 * @param aInput - the input values as NodeList
195
	 * @param aVocabularyName - the name of the vocabulary, which must be known for the vocabulary registry
196
	 * @return
197
	 */
198
	public synchronized String convert(NodeList aInput, String aVocabularyName){
199
		List<String> values = new LinkedList<String>();
200
		getTextFromNodeList(aInput, values);
201
		try {
202
			return convertFunction.executeSingleValue(aVocabularyName, values);
203
		} catch (ProcessingException e) {
204
			throw new IllegalStateException(e);
205
		}
206
	}
207
	
208
	public synchronized String convert(NodeList aInput, String aVocabularyName, String aDefaultPattern, String aFunction){
209
		List<String> values = new LinkedList<String>();
210
		getTextFromNodeList(aInput, values);
211
		try {
212
			List<String> results = convertFunction.executeFilterByParams(aVocabularyName, values, aDefaultPattern, aFunction);
213
			if (results.size() > 0)
214
				return results.get(0);
215
			else
216
				return "";
217
		} catch (ProcessingException e) {
218
			throw new IllegalStateException(e);
219
		}
220
	}
221
	
222
	private void getTextFromNodeList(NodeList aNodeList, List<String> aTextvalues){
223
		for (int i = 0; i < aNodeList.getLength(); i++){
224
			Node n = aNodeList.item(i);
225
			if (n.getNodeType() == Node.ELEMENT_NODE)
226
				getTextFromNodeList(n.getChildNodes(), aTextvalues);
227
			else if (n instanceof Text)
228
				aTextvalues.add(n.getNodeValue());
229
		}
230
	}
231

    
232
	/**
233
	 * substitutes using regular expression
234
	 * @param aInput
235
	 * @param aReplacement
236
	 * @param aRegularExpression
237
	 * @return
238
	 */
239
	public synchronized String regExpr(String aInput, String aReplacement, String aRegularExpression){
240
		try {
241
			int lastSlash = aRegularExpression.lastIndexOf("/");
242
			String trailingOptions = aRegularExpression.substring(lastSlash);
243
//			log.debug("trailingOptions: " + trailingOptions);
244
			int replacementSlash = aRegularExpression.substring(0, lastSlash).lastIndexOf("/");
245
			String replacementFromExpression = aRegularExpression.substring(replacementSlash + 1, lastSlash);
246
//			log.debug("replacementFromExpr lengt: " + replacementFromExpression.length() + ", value: " + replacementFromExpression);
247
			String newRegExpr = aRegularExpression.substring(0, replacementSlash + 1) + aReplacement + replacementFromExpression + trailingOptions;
248
//			log.debug("newRegExpr: " + newRegExpr);
249
			return regExprFunction.executeSingleValue(newRegExpr, aInput, aReplacement);
250
		} catch (ProcessingException e) {
251
			throw new IllegalStateException(e);
252
		}
253
	}
254
	
255
	public String lookup(String aIdentifier, String aPropertyKey){
256
		log.debug("functionProxy.lookup: param identifier: " + aIdentifier + " , key: " + aPropertyKey);
257
		return this.lookupRecord.getPropertyValue(aIdentifier, aPropertyKey);
258
	}
259

    
260
	public synchronized Collection<String> split(NodeList aInput, String aRegularExpression, String aCallId){
261
		try {
262
			List<String> textValues = new LinkedList<String>();
263
			getTextFromNodeList(aInput, textValues);
264
			return split.executeAllValues(textValues, aRegularExpression);
265
			//return split.executeSingleValue(textValues, aRegularExpression, aCallId);
266
		}catch (ProcessingException e){
267
			throw new IllegalStateException(e);
268
		}
269
	}
270
	
271
	public synchronized String split(String aCallId){
272
		try {
273
			return split.executeSingleValue(aCallId);
274
		}catch (ProcessingException e){
275
			throw new IllegalStateException(e);
276
		}		
277
	}
278
	
279
	/**
280
	 * extract content that match pattern given by a regular expression from a given node
281
	 * @param xpathExprJson
282
	 * @param aInput
283
	 * @param aRegExpression
284
	 * @return nodeList
285
	 */
286
	public synchronized NodeList identifierExtract(String aXpathExprJson, Node aInput, String aRegExpression){
287
		String xpathExprJson = StringEscapeUtils.unescapeXml(aXpathExprJson);
288
		log.debug("unescape xpathExprJson: " + xpathExprJson);
289
		String regExpression = StringEscapeUtils.unescapeXml(aRegExpression);
290
		log.debug("unescape regExpr" + regExpression);
291

    
292
		try{
293
			List<String> xpathExprList = JSONParser.defaultJSONParser().parse(List.class, xpathExprJson);
294
			
295
			// workaround: rewrap, why ?
296
			DOMSource s = new DOMSource(aInput);
297
			StringWriter w = new StringWriter();
298
			Result r = new StreamResult(w);
299
			transformer.transform(s, r);
300
			Document doc = docBuilder.parse(new InputSource(new StringReader(w.toString())));
301
			
302
			return identifierExtractFunction.extract(xpathExprList, doc, regExpression, docBuilder.newDocument(), xpath);
303
		}catch(Exception e){
304
			log.fatal("identifierExtract failed for node value: " + aInput.getNodeValue(), e);
305
			throw new IllegalStateException(e.getMessage());
306
		}
307
	}
308
		
309
	/**
310
	 * @param key
311
	 * @param resultsFunction_getvalue
312
	 */
313
	public void setResults(String key, FunctionResults resultsFunction_getvalue) {
314
		mapOfResults.put(key, resultsFunction_getvalue);
315
	}
316

    
317
	/**
318
	 * @param convertFunction the convertFunction to set
319
	 */
320
	public void setConvertFunction(Convert convertFunction) {
321
		this.convertFunction = convertFunction;
322
	}
323

    
324
	/**
325
	 * @return the convertFunction
326
	 */
327
	public Convert getConvertFunction() {
328
		return convertFunction;
329
	}
330

    
331
	/**
332
	 * @return the lookupRecord
333
	 */
334
	public LookupRecord getLookupRecord() {
335
		return lookupRecord;
336
	}
337

    
338
	/**
339
	 * @param lookupRecord the lookupRecord to set
340
	 */
341
	public void setLookupRecord(LookupRecord lookupRecord) {
342
		this.lookupRecord = lookupRecord;
343
	}
344

    
345
}
    (1-1/1)