Revision 46151
Added by Alessia Bardi over 7 years ago
TransformationFunctionProxy.java | ||
---|---|---|
1 |
package eu.dnetlib.data.collective.transformation.core.xsl.ext; |
|
2 |
|
|
3 |
import java.io.StringReader; |
|
4 |
import java.io.StringWriter; |
|
5 |
import java.util.*; |
|
6 |
import javax.xml.XMLConstants; |
|
7 |
import javax.xml.namespace.NamespaceContext; |
|
8 |
import javax.xml.parsers.DocumentBuilder; |
|
9 |
import javax.xml.parsers.DocumentBuilderFactory; |
|
10 |
import javax.xml.transform.Result; |
|
11 |
import javax.xml.transform.Transformer; |
|
12 |
import javax.xml.transform.TransformerFactory; |
|
13 |
import javax.xml.transform.dom.DOMSource; |
|
14 |
import javax.xml.transform.stream.StreamResult; |
|
15 |
import javax.xml.xpath.XPath; |
|
16 |
import javax.xml.xpath.XPathFactory; |
|
17 |
|
|
18 |
import eu.dnetlib.data.collective.transformation.engine.FunctionResults; |
|
19 |
import eu.dnetlib.data.collective.transformation.engine.functions.*; |
|
20 |
import org.apache.commons.lang3.StringEscapeUtils; |
|
21 |
import org.apache.commons.logging.Log; |
|
22 |
import org.apache.commons.logging.LogFactory; |
|
23 |
import org.svenson.JSONParser; |
|
24 |
import org.w3c.dom.Document; |
|
25 |
import org.w3c.dom.Node; |
|
26 |
import org.w3c.dom.NodeList; |
|
27 |
import org.w3c.dom.Text; |
|
28 |
import org.xml.sax.InputSource; |
|
29 |
|
|
30 |
/** |
|
31 |
* The class implements external XSLT functions. |
|
32 |
* |
|
33 |
* Alessia: every method that is called as external function must become a class in eu.dnetlib.saxon.ext, extend AbstractTransformatorExtFunction and be annotated as Spring Component. |
|
34 |
* Please see ConvertFunction for an example. |
|
35 |
* |
|
36 |
* @author jochen |
|
37 |
*/ |
|
38 |
public class TransformationFunctionProxy { |
|
39 |
|
|
40 |
@SuppressWarnings("unused") |
|
41 |
private static final Log log = LogFactory.getLog(TransformationFunctionProxy.class); |
|
42 |
private static TransformationFunctionProxy tf; |
|
43 |
private static DocumentBuilder docBuilder; |
|
44 |
private static Transformer transformer; |
|
45 |
private static XPath xpath = XPathFactory.newInstance().newXPath(); |
|
46 |
private RegularExpression regExprFunction = new RegularExpression(); |
|
47 |
private Convert convertFunction; |
|
48 |
private IdentifierExtract identifierExtractFunction = new IdentifierExtract(); |
|
49 |
private Split split = new Split(); |
|
50 |
private Map<String, FunctionResults> mapOfResults = new HashMap<String, FunctionResults>(); |
|
51 |
private LookupRecord lookupRecord; |
|
52 |
|
|
53 |
/** |
|
54 |
* @return the transformationFunctionProxy instance |
|
55 |
*/ |
|
56 |
public static TransformationFunctionProxy getInstance(final TransformerFactory transformerFactory) { |
|
57 |
if (tf == null) { |
|
58 |
tf = new TransformationFunctionProxy(); |
|
59 |
try { |
|
60 |
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); |
|
61 |
dbf.setNamespaceAware(true); |
|
62 |
docBuilder = dbf.newDocumentBuilder(); |
|
63 |
transformer = transformerFactory.newTransformer(); |
|
64 |
xpath.setNamespaceContext(new NamespaceContext() { |
|
65 |
|
|
66 |
@Override |
|
67 |
public Iterator getPrefixes(String namespaceURI) { |
|
68 |
// TODO Auto-generated method stub |
|
69 |
return null; |
|
70 |
} |
|
71 |
|
|
72 |
@Override |
|
73 |
public String getPrefix(String namespaceURI) { |
|
74 |
// TODO Auto-generated method stub |
|
75 |
return null; |
|
76 |
} |
|
77 |
|
|
78 |
@Override |
|
79 |
public String getNamespaceURI(String aPrefix) { |
|
80 |
if (aPrefix == null) { |
|
81 |
throw new IllegalArgumentException("No prefix provided!"); |
|
82 |
} else if (aPrefix.equals(XMLConstants.DEFAULT_NS_PREFIX)) { |
|
83 |
return "http://namespace.openaire.eu"; |
|
84 |
} else if (aPrefix.equals("dc")) { |
|
85 |
return "http://purl.org/dc/elements/1.1/"; |
|
86 |
} else { |
|
87 |
return XMLConstants.NULL_NS_URI; |
|
88 |
} |
|
89 |
} |
|
90 |
}); |
|
91 |
|
|
92 |
} catch (Exception e) { |
|
93 |
log.fatal("error while instantiating DocumentBuilderFactory, Transfomer, Xpath.namespacecontext", e); |
|
94 |
throw new IllegalStateException(e); |
|
95 |
} |
|
96 |
} |
|
97 |
return tf; |
|
98 |
} |
|
99 |
|
|
100 |
/** |
|
101 |
* @param uniqueKey |
|
102 |
* @param i |
|
103 |
* @return |
|
104 |
*/ |
|
105 |
public String getValue(String uniqueKey, int i) { |
|
106 |
if (!mapOfResults.containsKey(uniqueKey)) { |
|
107 |
throw new IllegalStateException("unknown key: " + uniqueKey); |
|
108 |
} |
|
109 |
return mapOfResults.get(uniqueKey).get(i); |
|
110 |
} |
|
111 |
|
|
112 |
/** |
|
113 |
* @param uniqueKey |
|
114 |
* @param i |
|
115 |
* @return |
|
116 |
* @deprecated |
|
117 |
*/ |
|
118 |
// public String convert(String uniqueKey, int i){ |
|
119 |
// if (mapOfResults == null){ |
|
120 |
// return "transformationFunctionProxy_convert not initialized"; |
|
121 |
// }else{ |
|
122 |
// if (!mapOfResults.containsKey(uniqueKey)){ |
|
123 |
// throw new IllegalStateException("unknown key: " + uniqueKey); |
|
124 |
// } |
|
125 |
// return mapOfResults.get(uniqueKey).get(i); |
|
126 |
// } |
|
127 |
// } |
|
128 |
|
|
129 |
/** |
|
130 |
* @param uniqueKey |
|
131 |
* @param i |
|
132 |
* @param aPos |
|
133 |
* @return |
|
134 |
* @deprecated |
|
135 |
*/ |
|
136 |
// public String convert(String uniqueKey, int i, int aPos){ |
|
137 |
// if (mapOfResults == null){ |
|
138 |
// return "transformationFunctionProxy_convert not initialized"; |
|
139 |
// }else{ |
|
140 |
// if (!mapOfResults.containsKey(uniqueKey)){ |
|
141 |
// throw new IllegalStateException("unknown key: " + uniqueKey); |
|
142 |
// } |
|
143 |
// return mapOfResults.get(uniqueKey).get(i, aPos); |
|
144 |
// } |
|
145 |
// } |
|
146 |
|
|
147 |
/** |
|
148 |
* @param uniqueKey |
|
149 |
* @param i |
|
150 |
* @return |
|
151 |
*/ |
|
152 |
public String extract(String uniqueKey, int i) { |
|
153 |
if (mapOfResults == null) { |
|
154 |
return "transformationFunctionProxy_extract not initialized"; |
|
155 |
} else { |
|
156 |
if (!mapOfResults.containsKey(uniqueKey)) { |
|
157 |
throw new IllegalStateException("unknown key: " + uniqueKey); |
|
158 |
} |
|
159 |
return mapOfResults.get(uniqueKey).get(i); |
|
160 |
} |
|
161 |
} |
|
162 |
|
|
163 |
/** |
|
164 |
* normalize values given as an input value by using a vocabulary |
|
165 |
* |
|
166 |
* @param aInput - the value as a String |
|
167 |
* @param aVocabularyName - the name of the vocabulary, which must be known for the vocabulary registry |
|
168 |
* @return |
|
169 |
*/ |
|
170 |
public synchronized String convertString(String aInput, String aVocabularyName) { |
|
171 |
List<String> values = new LinkedList<String>(); |
|
172 |
values.add(aInput); |
|
173 |
try { |
|
174 |
log.debug("conversion input: " + aInput); |
|
175 |
String conversionResult = convertFunction.executeSingleValue(aVocabularyName, values); |
|
176 |
log.debug("conversion result: " + conversionResult); |
|
177 |
return conversionResult; |
|
178 |
} catch (ProcessingException e) { |
|
179 |
log.fatal("convert failed for args 'input': " + aInput + " , 'vocabularyName': " + aVocabularyName, e); |
|
180 |
throw new IllegalStateException(e); |
|
181 |
} |
|
182 |
} |
|
183 |
|
|
184 |
/** |
|
185 |
* normalize values given as a NodeList by using a vocabulary |
|
186 |
* |
|
187 |
* @param aInput - the input values as NodeList |
|
188 |
* @param aVocabularyName - the name of the vocabulary, which must be known for the vocabulary registry |
|
189 |
* @return |
|
190 |
*/ |
|
191 |
public synchronized String convert(NodeList aInput, String aVocabularyName) { |
|
192 |
List<String> values = new LinkedList<String>(); |
|
193 |
getTextFromNodeList(aInput, values); |
|
194 |
try { |
|
195 |
return convertFunction.executeSingleValue(aVocabularyName, values); |
|
196 |
} catch (ProcessingException e) { |
|
197 |
throw new IllegalStateException(e); |
|
198 |
} |
|
199 |
} |
|
200 |
|
|
201 |
public synchronized String convert(NodeList aInput, String aVocabularyName, String aDefaultPattern, String aFunction) { |
|
202 |
List<String> values = new LinkedList<String>(); |
|
203 |
getTextFromNodeList(aInput, values); |
|
204 |
try { |
|
205 |
List<String> results = convertFunction.executeFilterByParams(aVocabularyName, values, aDefaultPattern, aFunction); |
|
206 |
if (results.size() > 0) |
|
207 |
return results.get(0); |
|
208 |
else |
|
209 |
return ""; |
|
210 |
} catch (ProcessingException e) { |
|
211 |
throw new IllegalStateException(e); |
|
212 |
} |
|
213 |
} |
|
214 |
|
|
215 |
private void getTextFromNodeList(NodeList aNodeList, List<String> aTextvalues) { |
|
216 |
for (int i = 0; i < aNodeList.getLength(); i++) { |
|
217 |
Node n = aNodeList.item(i); |
|
218 |
if (n.getNodeType() == Node.ELEMENT_NODE) |
|
219 |
getTextFromNodeList(n.getChildNodes(), aTextvalues); |
|
220 |
else if (n instanceof Text) |
|
221 |
aTextvalues.add(n.getNodeValue()); |
|
222 |
} |
|
223 |
} |
|
224 |
|
|
225 |
/** |
|
226 |
* substitutes using regular expression |
|
227 |
* |
|
228 |
* @param aInput |
|
229 |
* @param aReplacement |
|
230 |
* @param aRegularExpression |
|
231 |
* @return |
|
232 |
*/ |
|
233 |
public synchronized String regExpr(String aInput, String aReplacement, String aRegularExpression) { |
|
234 |
try { |
|
235 |
int lastSlash = aRegularExpression.lastIndexOf("/"); |
|
236 |
String trailingOptions = aRegularExpression.substring(lastSlash); |
|
237 |
// log.debug("trailingOptions: " + trailingOptions); |
|
238 |
int replacementSlash = aRegularExpression.substring(0, lastSlash).lastIndexOf("/"); |
|
239 |
String replacementFromExpression = aRegularExpression.substring(replacementSlash + 1, lastSlash); |
|
240 |
// log.debug("replacementFromExpr lengt: " + replacementFromExpression.length() + ", value: " + replacementFromExpression); |
|
241 |
String newRegExpr = aRegularExpression.substring(0, replacementSlash + 1) + aReplacement + replacementFromExpression + trailingOptions; |
|
242 |
// log.debug("newRegExpr: " + newRegExpr); |
|
243 |
return regExprFunction.executeSingleValue(newRegExpr, aInput, aReplacement); |
|
244 |
} catch (ProcessingException e) { |
|
245 |
throw new IllegalStateException(e); |
|
246 |
} |
|
247 |
} |
|
248 |
|
|
249 |
public String lookup(String aIdentifier, String aPropertyKey) { |
|
250 |
log.debug("functionProxy.lookup: param identifier: " + aIdentifier + " , key: " + aPropertyKey); |
|
251 |
return this.lookupRecord.getPropertyValue(aIdentifier, aPropertyKey); |
|
252 |
} |
|
253 |
|
|
254 |
public synchronized Collection<String> split(NodeList aInput, String aRegularExpression, String aCallId) { |
|
255 |
try { |
|
256 |
List<String> textValues = new LinkedList<String>(); |
|
257 |
getTextFromNodeList(aInput, textValues); |
|
258 |
return split.executeAllValues(textValues, aRegularExpression); |
|
259 |
//return split.executeSingleValue(textValues, aRegularExpression, aCallId); |
|
260 |
} catch (ProcessingException e) { |
|
261 |
throw new IllegalStateException(e); |
|
262 |
} |
|
263 |
} |
|
264 |
|
|
265 |
public synchronized String split(String aCallId) { |
|
266 |
try { |
|
267 |
return split.executeSingleValue(aCallId); |
|
268 |
} catch (ProcessingException e) { |
|
269 |
throw new IllegalStateException(e); |
|
270 |
} |
|
271 |
} |
|
272 |
|
|
273 |
/** |
|
274 |
* extract content that match pattern given by a regular expression from a given node |
|
275 |
* |
|
276 |
* @param aXpathExprJson |
|
277 |
* @param aInput |
|
278 |
* @param aRegExpression |
|
279 |
* @return nodeList |
|
280 |
*/ |
|
281 |
public synchronized NodeList identifierExtract(String aXpathExprJson, Node aInput, String aRegExpression) { |
|
282 |
String xpathExprJson = StringEscapeUtils.unescapeXml(aXpathExprJson); |
|
283 |
log.debug("unescape xpathExprJson: " + xpathExprJson); |
|
284 |
String regExpression = StringEscapeUtils.unescapeXml(aRegExpression); |
|
285 |
log.debug("unescape regExpr" + regExpression); |
|
286 |
|
|
287 |
try { |
|
288 |
List<String> xpathExprList = JSONParser.defaultJSONParser().parse(List.class, xpathExprJson); |
|
289 |
|
|
290 |
// workaround: rewrap, why ? |
|
291 |
DOMSource s = new DOMSource(aInput); |
|
292 |
StringWriter w = new StringWriter(); |
|
293 |
Result r = new StreamResult(w); |
|
294 |
transformer.transform(s, r); |
|
295 |
Document doc = docBuilder.parse(new InputSource(new StringReader(w.toString()))); |
|
296 |
|
|
297 |
return identifierExtractFunction.extract(xpathExprList, doc, regExpression, docBuilder.newDocument(), xpath); |
|
298 |
} catch (Exception e) { |
|
299 |
log.fatal("identifierExtract failed for node value: " + aInput.getNodeValue(), e); |
|
300 |
throw new IllegalStateException(e.getMessage()); |
|
301 |
} |
|
302 |
} |
|
303 |
|
|
304 |
/** |
|
305 |
* @param key |
|
306 |
* @param resultsFunction_getvalue |
|
307 |
*/ |
|
308 |
public void setResults(String key, FunctionResults resultsFunction_getvalue) { |
|
309 |
mapOfResults.put(key, resultsFunction_getvalue); |
|
310 |
} |
|
311 |
|
|
312 |
/** |
|
313 |
* @return the convertFunction |
|
314 |
*/ |
|
315 |
public Convert getConvertFunction() { |
|
316 |
return convertFunction; |
|
317 |
} |
|
318 |
|
|
319 |
/** |
|
320 |
* @param convertFunction the convertFunction to set |
|
321 |
*/ |
|
322 |
public void setConvertFunction(Convert convertFunction) { |
|
323 |
this.convertFunction = convertFunction; |
|
324 |
} |
|
325 |
|
|
326 |
/** |
|
327 |
* @return the lookupRecord |
|
328 |
*/ |
|
329 |
public LookupRecord getLookupRecord() { |
|
330 |
return lookupRecord; |
|
331 |
} |
|
332 |
|
|
333 |
/** |
|
334 |
* @param lookupRecord the lookupRecord to set |
|
335 |
*/ |
|
336 |
public void setLookupRecord(LookupRecord lookupRecord) { |
|
337 |
this.lookupRecord = lookupRecord; |
|
338 |
} |
|
339 |
|
|
340 |
} |
Also available in: Unified diff
Branch for migration to Saxon HE