Project

General

Profile

1
package eu.dnetlib.data.collective.transformation.engine;
2

    
3
import java.io.StringReader;
4
import java.util.*;
5
import javax.xml.transform.TransformerFactory;
6
import javax.xml.xpath.XPath;
7
import javax.xml.xpath.XPathConstants;
8
import javax.xml.xpath.XPathExpressionException;
9
import javax.xml.xpath.XPathFactory;
10

    
11
import eu.dnetlib.common.profile.ResourceDao;
12
import eu.dnetlib.data.collective.transformation.IDatabaseConnector;
13
import eu.dnetlib.data.collective.transformation.TransformationException;
14
import eu.dnetlib.data.collective.transformation.VocabularyRegistry;
15
import eu.dnetlib.data.collective.transformation.core.xsl.ext.TransformationFunctionProxy;
16
import eu.dnetlib.data.collective.transformation.engine.core.ITransformation;
17
import eu.dnetlib.data.collective.transformation.engine.functions.*;
18
import eu.dnetlib.data.collective.transformation.engine.functions.RetrieveValue.FUNCTION;
19
import eu.dnetlib.data.collective.transformation.rulelanguage.Argument;
20
import eu.dnetlib.data.collective.transformation.rulelanguage.Argument.Type;
21
import eu.dnetlib.data.collective.transformation.rulelanguage.util.FunctionCall;
22
import eu.dnetlib.data.collective.transformation.utils.BlacklistConsumer;
23
import net.sf.saxon.expr.instruct.TerminationException;
24
import org.apache.commons.logging.Log;
25
import org.apache.commons.logging.LogFactory;
26
import org.springframework.core.io.Resource;
27
import org.w3c.dom.Node;
28
import org.xml.sax.InputSource;
29

    
30
// import eu.dnetlib.data.collective.transformation.engine.functions.Dblookup;
31

    
32
/**
33
 * @author jochen
34
 */
35
public class SimpleTransformationEngine {
36

    
37
	private static Log log = LogFactory.getLog(SimpleTransformationEngine.class);
38
	private final List<String> mdRecords = new LinkedList<String>();
39
	private ITransformation transformation;
40
	private VocabularyRegistry vocabularyRegistry;
41
	private IDatabaseConnector databaseConnector;
42
	private ResourceDao resourceDao;
43
	private IFeatureExtraction featureExtraction;
44
	private long totalTransformedRecords = 0;
45
	private long totalIgnoredRecords = 0;
46
	private String mappingFile;
47
	private boolean stylesheetParamsCalculated = false;
48
	private boolean preprocessingDone = false;
49
	private Map<String, String> stylesheetParams = new LinkedHashMap<String, String>();
50
	private Resource blacklistApi;
51
	private List<String> blacklistedRecords = new LinkedList<String>();
52
	private TransformerFactory transformerFactory;
53

    
54

    
55
	public SimpleTransformationEngine(final TransformerFactory transformerFactory){
56
		this.transformerFactory = transformerFactory;
57
	}
58
	/**
59
	 * execute any preprocessings declared in the transformation script prior starting the transformation of records
60
	 */
61
	public void preprocess(String dataSourceId) {
62
		for (Map<String, String> preprocMap : this.transformation.getRuleLanguageParser().getPreprocessings()) {
63
			Iterator<String> it = preprocMap.keySet().iterator();
64
			while (it.hasNext()) {
65
				String function = it.next();
66
				//				if (function.equals("dblookup")) {
67
				//					Dblookup fun = new Dblookup();
68
				//					fun.setDbConnector(databaseConnector);
69
				//					try {
70
				//						log.debug("preprocessingMap value: " + preprocMap.get(function));
71
				//						TransformationFunctionProxy.getInstance().setLookupRecord(fun.getResults(preprocMap.get(function)));
72
				//					} catch (Exception e) {
73
				//						log.debug(e.getMessage());
74
				//						throw new IllegalStateException(e);
75
				//					}
76
				//				}
77
				if (function.equals("blacklist")) {
78
					BlacklistConsumer bc = new BlacklistConsumer();
79
					try {
80
						blacklistedRecords = bc.getBlackList(blacklistApi.getURL() + dataSourceId);
81
					} catch (Exception e) {
82
						throw new IllegalStateException("error in preprocess: " + e.getMessage());
83
					}
84
				}
85
			}
86
		}
87
		log.debug("preprocessing done.");
88
	}
89

    
90
	/**
91
	 * check if blacklistedRecords exist and if so check if the current record is blacklisted by its objIdentifier
92
	 *
93
	 * @param aRecord
94
	 * @return
95
	 * @throws XPathExpressionException
96
	 * @throws ProcessingException
97
	 */
98
	private boolean isBlacklistRecord(String aRecord) {
99
		if (blacklistedRecords.size() == 0) return false;
100
		XPath xpath = XPathFactory.newInstance().newXPath();
101
		try {
102
			Node root = (Node) xpath.evaluate("/", new InputSource(new StringReader(aRecord)), XPathConstants.NODE);
103
			String objId = xpath.evaluate("//*[local-name()='objIdentifier']", root);
104
			if (blacklistedRecords.contains(objId)) return true;
105
		} catch (Exception e) {
106
			throw new IllegalStateException("error in isBlacklistRecord: " + e.getMessage());
107
		}
108
		return false;
109
	}
110

    
111
	/**
112
	 * transforms a source record
113
	 *
114
	 * @param sourceRecord the record to transform
115
	 * @return transformed record
116
	 */
117
	public String transform(final String sourceRecord) {
118
		List<String> objectRecords = new LinkedList<String>();
119
		objectRecords.add(sourceRecord);
120
		int index = 0;
121
		mdRecords.clear();
122
		initTransformationFunction();
123

    
124
		if (!stylesheetParamsCalculated) {
125
			try {
126
				calculateStylesheetParams(sourceRecord);
127
			} catch (Exception e) {
128
				throw new IllegalStateException("error in calculateStylesheetParams" + e.getMessage());
129
			}
130
		}
131

    
132
		if (!preprocessingDone) {
133
			// xpath sourceRecord dataSourceid
134
			preprocess(stylesheetParams.get("varBlacklistDataSourceId"));
135
			preprocessingDone = true;
136
		}
137

    
138
		if (isBlacklistRecord(sourceRecord)) {
139
			try {
140
				mdRecords.add(transformation.transformRecord(sourceRecord, ITransformation.XSLSyntaxcheckfailed));
141
			} catch (Exception e) {
142
				log.fatal(sourceRecord);
143
				throw new IllegalStateException(e);
144
			}
145
		} else if (!transformation.getRuleLanguageParser().isXslStylesheet()) {
146
			// iterate over all rules which are functionCalls
147
			log.debug("functionCalls size: " + transformation.getRuleLanguageParser().getFunctionCalls().size());
148
			for (FunctionCall functionCall : transformation.getRuleLanguageParser().getFunctionCalls()) {
149
				preprocess(objectRecords, functionCall);
150
			}
151
			for (String record : objectRecords) {
152
				// log.debug(record);
153
				try {
154
					log.debug("now run transformation for record with index: " + index);
155
					try {
156
						String transformedRecord = transformation.transformRecord(record, index);
157
						mdRecords.add(transformedRecord);
158
					} catch (TerminationException e) {
159
						log.debug("record transformation terminated.");
160
						String failedRecord = transformation.transformRecord(record, ITransformation.XSLSyntaxcheckfailed);
161
						log.debug(failedRecord);
162
						totalIgnoredRecords++;
163
						mdRecords.add(failedRecord);
164
					}
165
				} catch (TransformationException e) {
166
					log.error(sourceRecord);
167
					throw new IllegalStateException(e);
168
				}
169
				index++;
170
			}
171
		} else {
172
			for (String record : objectRecords) {
173
				// test for init params and assign values
174
				try {
175
					log.debug("now run transformation for record with index: " + index);
176
					try {
177
						String transformedRecord = transformation.transformRecord(record, stylesheetParams);
178
						mdRecords.add(transformedRecord);
179
					} catch (TerminationException e) {
180
						String failedRecord = transformation.transformRecord(record, ITransformation.XSLSyntaxcheckfailed);
181
						totalIgnoredRecords++;
182
						log.debug(failedRecord);
183
						mdRecords.add(failedRecord);
184
					}
185
				} catch (TransformationException e) {
186
					log.error(sourceRecord);
187
					throw new IllegalStateException(e);
188
				}
189
				index++;
190
			}
191
		}
192

    
193
		totalTransformedRecords = totalTransformedRecords + mdRecords.size();
194
		log.debug("objRecordSize: " + objectRecords.size() + ", mdRecordSize: " + mdRecords.size() + ", ignoredRecordSize: " + totalIgnoredRecords);
195
		return mdRecords.get(0);
196
	}
197

    
198
	private void calculateStylesheetParams(final String aRecord) throws XPathExpressionException, ProcessingException {
199
		stylesheetParamsCalculated = true;
200
		XPath xpath = XPathFactory.newInstance().newXPath();
201
		Node root = (Node) xpath.evaluate("/", new InputSource(new StringReader(aRecord)), XPathConstants.NODE);
202
		String datasourcePrefix = xpath.evaluate("//*[local-name()='datasourceprefix']", root);
203
		String profileXquery = "collection('/db/DRIVER/RepositoryServiceResources')//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=\"NamespacePrefix\"][value=\""
204
				+ datasourcePrefix + "\"]]";
205
		//String repositoryId = xpath.evaluate("//*[local-name()='repositoryId']", root);
206
		log.debug("profileXquery: " + profileXquery);
207
		// static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId",
208
		// xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
209
		RetrieveValue retrieveValue = new RetrieveValue();
210
		retrieveValue.setResourceDao(resourceDao);
211
		List<Argument> argList = new LinkedList<Argument>();
212
		argList.add(new Argument(Type.VALUE, profileXquery));
213
		Argument argXpath = new Argument(Type.INPUTFIELD, "//OFFICIAL_NAME");
214
		argList.add(argXpath);
215
		String varOfficialName = retrieveValue.executeSingleValue(FUNCTION.PROFILEFIELD.toString(), argList, null, new HashMap<String, String>());
216
		stylesheetParams.put("varOfficialName", varOfficialName);
217
		argList.remove(argXpath);
218
		argXpath = new Argument(Type.INPUTFIELD, "//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value");
219
		argList.add(argXpath);
220
		String varDataSourceId = retrieveValue.executeSingleValue(FUNCTION.PROFILEFIELD.toString(), argList, null, new HashMap<String, String>());
221
		stylesheetParams.put("varDataSourceId", varDataSourceId);
222
		argList.remove(argXpath);
223
		argXpath = new Argument(Type.INPUTFIELD, "//CONFIGURATION/DATASOURCE_TYPE");
224
		argList.add(argXpath);
225
		String varDsType = retrieveValue.executeSingleValue(FUNCTION.PROFILEFIELD.toString(), argList, null, new HashMap<String, String>());
226
		stylesheetParams.put("varDsType", varDsType);
227
		argList.remove(argXpath);
228

    
229
		// if blacklist
230
		for (Map<String, String> preprocMap : this.transformation.getRuleLanguageParser().getPreprocessings()) {
231
			Iterator<String> it = preprocMap.keySet().iterator();
232
			while (it.hasNext()) {
233
				String function = it.next();
234
				if (function.equals("blacklist")) {
235
					argXpath = new Argument(Type.INPUTFIELD, preprocMap.get(function)); // blacklistDataSourceIdXpath
236
					argList.add(argXpath);
237
					String varBlacklistDataSourceId =
238
							retrieveValue.executeSingleValue(FUNCTION.PROFILEFIELD.toString(), argList, null, new HashMap<String, String>());
239
					stylesheetParams.put("varBlacklistDataSourceId", varBlacklistDataSourceId);
240
					argList.remove(argXpath);
241
				}
242
			}
243
		}
244
	}
245

    
246
	private void initTransformationFunction() {
247
		if (this.vocabularyRegistry == null) { throw new IllegalStateException("vocabularyReg is null"); }
248
		Convert convertFunction = new Convert();
249
		convertFunction.setVocabularyRegistry(this.vocabularyRegistry);
250
		TransformationFunctionProxy.getInstance(transformerFactory).setConvertFunction(convertFunction);
251

    
252
	}
253

    
254
	/**
255
	 * preprocesses function if function is configured resp.
256
	 *
257
	 * @param records       list of object records
258
	 * @param aFunctionCall
259
	 */
260
	private void preprocess(final List<String> records, final FunctionCall aFunctionCall) {
261
		try {
262
			log.debug("preprocess");
263
			if (transformation.getRuleLanguageParser() == null) { throw new IllegalStateException("rulelanguageparser not initialised"); }
264
			if (transformation.getRuleLanguageParser().getNamespaceDeclarations() == null) { throw new IllegalStateException("nsDecl is null"); }
265
			PreProcessor preProc = new PreProcessor();
266
			preProc.setConvertFunction(TransformationFunctionProxy.getInstance(transformerFactory).getConvertFunction());
267
			RetrieveValue retrieveValue = new RetrieveValue();
268
			retrieveValue.setResourceDao(resourceDao);
269
			preProc.setRetrieveFunction(retrieveValue);
270
			RegularExpression regExpr = new RegularExpression();
271
			preProc.setRegExprFunction(regExpr);
272
			TransformationFunctionProxy functionProxy = TransformationFunctionProxy.getInstance(transformerFactory);
273
			preProc.setFunctionProxy(functionProxy);
274
			Extract extractFunction = new Extract();
275
			extractFunction.setFeatureExtraction(featureExtraction);
276
			preProc.setExtractFunction(extractFunction);
277
			if (aFunctionCall.doPreprocess() || aFunctionCall.isStatic()) {
278
				// log.debug("now call preprocess with: " + aFunctionCall.getExternalFunctionName() + " " + aFunctionCall.getUuid());
279
				preProc.preprocess(
280
						aFunctionCall,
281
						records,
282
						transformation.getRuleLanguageParser().getNamespaceDeclarations(),
283
						transformation.getStaticTransformationResults(),
284
						transformation.getJobProperties(),
285
						transformation.getRuleLanguageParser().getVariableMappingRules());
286
				// log.debug("preprocess end");
287
			} else {
288
				log.debug("skip preprocessing for function: " + aFunctionCall.getExternalFunctionName());
289
			}
290

    
291
		} catch (Exception e) {
292
			throw new IllegalStateException(e);
293
		}
294

    
295
	}
296

    
297
	/**
298
	 * @return the transformation
299
	 */
300
	public ITransformation getTransformation() {
301
		return transformation;
302
	}
303

    
304
	/**
305
	 * @param transformation the transformation to set
306
	 */
307
	public void setTransformation(final ITransformation transformation) {
308
		this.transformation = transformation;
309
	}
310

    
311
	/**
312
	 * @return the vocabularyRegistry
313
	 */
314
	public VocabularyRegistry getVocabularyRegistry() {
315
		return vocabularyRegistry;
316
	}
317

    
318
	/**
319
	 * @param vocabularyRegistry the vocabularyRegistry to set
320
	 */
321
	public void setVocabularyRegistry(final VocabularyRegistry vocabularyRegistry) {
322
		this.vocabularyRegistry = vocabularyRegistry;
323
	}
324

    
325
	/**
326
	 * @return the resourceDao
327
	 */
328
	public ResourceDao getResourceDao() {
329
		return resourceDao;
330
	}
331

    
332
	/**
333
	 * @param resourceDao the resourceDao to set
334
	 */
335
	public void setResourceDao(final ResourceDao resourceDao) {
336
		this.resourceDao = resourceDao;
337
	}
338

    
339
	/**
340
	 * @return the featureExtraction
341
	 */
342
	public IFeatureExtraction getFeatureExtraction() {
343
		return featureExtraction;
344
	}
345

    
346
	/**
347
	 * @param featureExtraction the featureExtraction to set
348
	 */
349
	public void setFeatureExtraction(final IFeatureExtraction featureExtraction) {
350
		this.featureExtraction = featureExtraction;
351
	}
352

    
353
	/**
354
	 * @return the databaseConnector
355
	 */
356
	public IDatabaseConnector getDatabaseConnector() {
357
		return databaseConnector;
358
	}
359

    
360
	/**
361
	 * @param databaseConnector the databaseConnector to set
362
	 */
363
	public void setDatabaseConnector(final IDatabaseConnector databaseConnector) {
364
		this.databaseConnector = databaseConnector;
365
	}
366

    
367
	public long getTotalTransformedRecords() {
368
		return this.totalTransformedRecords;
369
	}
370

    
371
	public long getTotalIgnoredRecords() {
372
		return this.totalIgnoredRecords;
373
	}
374

    
375
	/**
376
	 * @return the mappingFile
377
	 */
378
	public String getMappingFile() {
379
		return mappingFile;
380
	}
381

    
382
	/**
383
	 * @param mappingFile the mappingFile to set
384
	 */
385
	public void setMappingFile(final String mappingFile) {
386
		this.mappingFile = mappingFile;
387
	}
388

    
389
	public Resource getBlacklistApi() {
390
		return blacklistApi;
391
	}
392

    
393
	public void setBlacklistApi(Resource blacklistApi) {
394
		this.blacklistApi = blacklistApi;
395
	}
396
}
(3-3/3)