Project

General

Profile

1
package eu.dnetlib.data.collective.transformation.engine;
2

    
3
import java.io.StringReader;
4
import java.util.HashMap;
5
import java.util.LinkedList;
6
import java.util.List;
7
import java.util.Map;
8

    
9
import org.apache.commons.logging.Log;
10
import org.apache.commons.logging.LogFactory;
11
import org.dom4j.Document;
12
import org.dom4j.DocumentException;
13
import org.dom4j.DocumentHelper;
14
import org.dom4j.Node;
15
import org.dom4j.XPath;
16
import org.dom4j.io.SAXReader;
17

    
18
import eu.dnetlib.data.collective.transformation.core.xsl.ext.TransformationFunctionProxy;
19
import eu.dnetlib.data.collective.transformation.engine.functions.Convert;
20
import eu.dnetlib.data.collective.transformation.engine.functions.Extract;
21
import eu.dnetlib.data.collective.transformation.engine.functions.ProcessingException;
22
import eu.dnetlib.data.collective.transformation.engine.functions.RegularExpression;
23
import eu.dnetlib.data.collective.transformation.engine.functions.RetrieveValue;
24
import eu.dnetlib.data.collective.transformation.rulelanguage.IRule;
25
import eu.dnetlib.data.collective.transformation.rulelanguage.Rules;
26
import eu.dnetlib.data.collective.transformation.rulelanguage.util.FunctionCall;
27

    
28
/**
29
 * @author jochen
30
 *
31
 */
32
public class PreProcessor {
33

    
34
	@SuppressWarnings("unused")
35
	private static final Log log = LogFactory.getLog(PreProcessor.class);
36
	private Convert convertFunction;
37
	private Extract extractFunction;
38
	private RetrieveValue retrieveFunction;
39
	private RegularExpression regExprFunction;
40
	private TransformationFunctionProxy functionProxy;
41
	private SAXReader reader = new SAXReader();
42
	private Map<String, String> nsMap = new HashMap<String, String>();
43

    
44
	/**
45
	 * pre-process output values from object records using a function call
46
	 * @param aFunctionCall function call object
47
	 * @param aObjectRecords list of object records
48
	 * @param aNamespaceMap map of namespace prefixes and uris
49
	 */
50
	public void preprocess( 
51
			FunctionCall aFunctionCall, 
52
			List<String> aObjectRecords, 
53
			Map<String, String> aNamespaceMap, 
54
			Map<String, String> aStaticResults, 
55
			Map<String, String> aJobProperties, 
56
			Map<String, IRule> aVarRules){
57
		this.nsMap = aNamespaceMap;
58
		FunctionResults functionResults = new FunctionResults();
59
		
60
		try {
61
			if (aFunctionCall.getExternalFunctionName().equals("extract")){
62
				String featureName = aFunctionCall.getParameters().get(Extract.paramNameFeature);
63
				functionResults.addAll(extractFunction.execute(aObjectRecords, featureName));
64
			}else{
65
				for (String objRecord: aObjectRecords){
66
					String result = null;
67
						if (aFunctionCall.getExternalFunctionName().equals("convert")){
68
							if (aFunctionCall.isStatic() && aStaticResults.containsKey(aFunctionCall.getUuid())){
69
								functionResults.add(aStaticResults.get(aFunctionCall.getUuid()));
70
							}else{
71
								String vocabName = aFunctionCall.getParameters().get(Convert.paramVocabularyName);
72
								String fieldExpr = aFunctionCall.getParameters().get(Convert.paramFieldValue);
73
								List<String> recordValues = getValuesFromRecord(objRecord, fieldExpr);
74
								if (aFunctionCall.isStatic())
75
									aStaticResults.put(aFunctionCall.getUuid(), convertFunction.executeSingleValue(vocabName, recordValues));
76
								else
77
									functionResults.add(convertFunction.executeAllValues(vocabName, recordValues));
78
							}
79
						}else if (aFunctionCall.getExternalFunctionName().equals("getValue")){
80
							if (aFunctionCall.isStatic() && aStaticResults.containsKey(aFunctionCall.getUuid()))
81
								functionResults.add(aStaticResults.get(aFunctionCall.getUuid()));
82
							else{
83
								String functionName = aFunctionCall.getParameters().get(RetrieveValue.paramFunctionName);
84
								result = retrieveFunction.executeSingleValue(functionName, aFunctionCall.getArguments(), objRecord, nsMap);
85
								functionResults.add(result);
86
								if (aFunctionCall.isStatic())
87
									aStaticResults.put(aFunctionCall.getUuid(), result);
88
							}
89
						}else if (aFunctionCall.getExternalFunctionName().equals("regExpr")){
90
							// TODO
91
							if (aFunctionCall.isStatic() && aStaticResults.containsKey(aFunctionCall.getUuid())){
92
								//log.debug("static functioncal; static result exist to compute regexpr: " + aFunctionCall.getParameters().get(RegularExpression.paramRegularExpr));
93
//								functionResults.add(aStaticResults.get(aFunctionCall.getUuid()));
94
							}else{
95
//								log.debug("static functioncal to compute regexpr: " + aFunctionCall.getParameters().get(RegularExpression.paramRegularExpr));
96
								String regularExpression = aFunctionCall.getParameters().get(RegularExpression.paramRegularExpr); //.replaceAll("'", "");
97
								String expression1 = aFunctionCall.getParameters().get(RegularExpression.paramExpr1);
98
								List<String> recordValues = null;
99
								// distinguish xpath-expr, jobConst, var
100
//								log.debug("expression1: " + expression1);
101
								
102
								if (aJobProperties.containsKey(expression1)){
103
									recordValues = new LinkedList<String>();
104
									recordValues.add(aJobProperties.get(expression1));
105
								}else{
106
									recordValues = getValuesFromRecord(objRecord, expression1);
107
								}
108
								
109
								String expression2 = aFunctionCall.getParameters().get(RegularExpression.paramExpr2);
110
								String replacement = "";
111
								if (aJobProperties.containsKey(expression2)){
112
									replacement = aJobProperties.get(expression2);
113
								}else if (aVarRules.containsKey(expression2)){
114
									Rules varRule = (Rules)aVarRules.get(expression2);
115
									replacement = varRule.getConstant().replace("'", ""); // currently limited to constant rules.
116
								}else {
117
									replacement = getValuesFromRecord(objRecord, expression2).get(0); // get the first available value
118
								}
119
								List<String> regExprResults = new LinkedList<String>();
120
								for (String fieldValue: recordValues){
121
									try {
122
										int lastSlash = regularExpression.lastIndexOf("/");
123
										String trailingOptions = regularExpression.substring(lastSlash);
124
										int replacementSlash = regularExpression.substring(0, lastSlash).lastIndexOf("/");
125
										String replacementFromExpression = regularExpression.substring(replacementSlash + 1, lastSlash);
126
										String newRegExpr = regularExpression.substring(0, replacementSlash + 1) + replacement + replacementFromExpression + trailingOptions; // ???
127
										result = regExprFunction.executeSingleValue(newRegExpr, fieldValue, replacement);
128
										regExprResults.add(result);
129
									} catch (ProcessingException e) {
130
										throw new IllegalStateException(e);
131
									}
132
//									regExprResults.add(regExprFunction.executeSingleValue(regularExpression, fieldValue, expression2));
133
								}
134
								functionResults.add(regExprResults);
135
								// assuming 1 result only
136
								if (aFunctionCall.isStatic()){
137
									aStaticResults.put(aFunctionCall.getUuid(), result);
138
								}
139

    
140
								// unsupported
141
//								if (aFunctionCall.isStatic()){
142
//									aStaticResults.put(aFunctionCall.getUuid(), result);
143
//								}
144
							}
145
						}
146
				}			
147
			}		
148
		} catch (ProcessingException e) {
149
			throw new IllegalStateException(e);
150
		} catch (DocumentException e) {
151
			throw new IllegalStateException(e);
152
		}
153
		functionProxy.setResults(aFunctionCall.getUuid(), functionResults);
154
	}
155

    
156
	public void setFunctionProxy(TransformationFunctionProxy functionProxy) {
157
		this.functionProxy = functionProxy;
158
	}
159

    
160
	public TransformationFunctionProxy getFunctionProxy() {
161
		return functionProxy;
162
	}
163

    
164
	public void setConvertFunction(Convert convertFunction) {
165
		this.convertFunction = convertFunction;
166
	}
167

    
168
	public Convert getConvertFunction() {
169
		return convertFunction;
170
	}
171

    
172
	/**
173
	 * @param retrieveFunction the retrieveFunction to set
174
	 */
175
	public void setRetrieveFunction(RetrieveValue retrieveFunction) {
176
		this.retrieveFunction = retrieveFunction;
177
	}
178

    
179
	/**
180
	 * @return the retrieveFunction
181
	 */
182
	public RetrieveValue getRetrieveFunction() {
183
		return retrieveFunction;
184
	}
185
	
186
	/**
187
	 * @return the regExprFunction
188
	 */
189
	public RegularExpression getRegExprFunction() {
190
		return regExprFunction;
191
	}
192

    
193
	/**
194
	 * @param regExprFunction the regExprFunction to set
195
	 */
196
	public void setRegExprFunction(RegularExpression regExprFunction) {
197
		this.regExprFunction = regExprFunction;
198
	}
199

    
200
	/**
201
	 * @param extractFunction the extractFunction to set
202
	 */
203
	public void setExtractFunction(Extract extractFunction) {
204
		this.extractFunction = extractFunction;
205
	}
206

    
207
	/**
208
	 * @return the extractFunction
209
	 */
210
	public Extract getExtractFunction() {
211
		return extractFunction;
212
	}
213

    
214
	/**
215
	 * evaluate given XPath Expr applied on a record and return the values as a list of strings
216
	 * @param record
217
	 * @param xpathExpr
218
	 * @return list of strings
219
	 * @throws DocumentException
220
	 */
221
	@SuppressWarnings("unchecked")
222
	private List<String> getValuesFromRecord(String record, String xpathExpr) throws DocumentException{
223
		List<String> values = new LinkedList<String>();
224
		Document doc = reader.read(new StringReader(record));
225
		XPath xpath =  DocumentHelper.createXPath(xpathExpr);
226
		xpath.setNamespaceURIs(nsMap);
227
		Object context = xpath.evaluate(doc);
228
		if (context instanceof String)
229
			values.add((String)context);
230
		else if (context instanceof List)
231
			for (Node node: (List<Node>)context)
232
				values.add(node.getText());
233
		else if (context instanceof Node)
234
			values.add( ((Node)context).getText());
235
		else if (context instanceof Number)
236
			values.add( ((Number)context).intValue() + "");
237
		return values;
238
	}
239

    
240
}
(2-2/3)