Project

General

Profile

1
package eu.dnetlib.data.collective.transformation.engine;
2

    
3
import java.io.StringReader;
4
import java.util.Iterator;
5
import java.util.LinkedHashMap;
6
import java.util.LinkedList;
7
import java.util.List;
8
import java.util.Map;
9

    
10
import javax.xml.xpath.XPath;
11
import javax.xml.xpath.XPathConstants;
12
import javax.xml.xpath.XPathExpressionException;
13
import javax.xml.xpath.XPathFactory;
14

    
15
import org.apache.commons.lang3.StringUtils;
16
import org.apache.commons.logging.Log;
17
import org.apache.commons.logging.LogFactory;
18
import org.springframework.core.io.Resource;
19
import org.w3c.dom.Node;
20
import org.xml.sax.InputSource;
21

    
22
import eu.dnetlib.common.profile.ResourceDao;
23
import eu.dnetlib.data.collective.transformation.IDatabaseConnector;
24
import eu.dnetlib.data.collective.transformation.TransformationException;
25
import eu.dnetlib.data.collective.transformation.VocabularyRegistry;
26
import eu.dnetlib.data.collective.transformation.core.xsl.ext.TransformationFunctionProxy;
27
import eu.dnetlib.data.collective.transformation.engine.core.ITransformation;
28
import eu.dnetlib.data.collective.transformation.engine.functions.Convert;
29
// import eu.dnetlib.data.collective.transformation.engine.functions.Dblookup;
30
import eu.dnetlib.data.collective.transformation.engine.functions.Extract;
31
import eu.dnetlib.data.collective.transformation.engine.functions.IFeatureExtraction;
32
import eu.dnetlib.data.collective.transformation.engine.functions.ProcessingException;
33
import eu.dnetlib.data.collective.transformation.engine.functions.RegularExpression;
34
import eu.dnetlib.data.collective.transformation.engine.functions.RetrieveValue;
35
import eu.dnetlib.data.collective.transformation.rulelanguage.util.FunctionCall;
36
import eu.dnetlib.data.collective.transformation.utils.BlacklistConsumer;
37
import eu.dnetlib.enabling.datasources.common.Api;
38
import eu.dnetlib.enabling.datasources.common.Datasource;
39
import eu.dnetlib.enabling.datasources.common.DsmException;
40
import eu.dnetlib.enabling.datasources.common.LocalDatasourceManager;
41
import net.sf.saxon.instruct.TerminationException;
42

    
43
/**
44
 * @author jochen
45
 *
46
 */
47
public class SimpleTransformationEngine {
48

    
49
	private LocalDatasourceManager<Datasource<?, ?, ?>, Api<?>> dsManager;
50

    
51
	private static Log log = LogFactory.getLog(SimpleTransformationEngine.class);
52
	private ITransformation transformation;
53
	private VocabularyRegistry vocabularyRegistry;
54
	private IDatabaseConnector databaseConnector;
55
	private ResourceDao resourceDao;
56
	private IFeatureExtraction featureExtraction;
57
	private final List<String> mdRecords = new LinkedList<>();
58
	private long totalTransformedRecords = 0;
59
	private long totalIgnoredRecords = 0;
60
	private String mappingFile;
61
	private boolean stylesheetParamsCalculated = false;
62
	private boolean preprocessingDone = false;
63
	private final Map<String, String> stylesheetParams = new LinkedHashMap<>();
64
	private Resource blacklistApi;
65
	private List<String> blacklistedRecords = new LinkedList<>();
66

    
67
	/**
68
	 * execute any preprocessings declared in the transformation script prior starting the transformation of records
69
	 */
70
	public void preprocess(final String dataSourceId) {
71
		for (final Map<String, String> preprocMap : this.transformation.getRuleLanguageParser().getPreprocessings()) {
72
			final Iterator<String> it = preprocMap.keySet().iterator();
73
			while (it.hasNext()) {
74
				final String function = it.next();
75
				// if (function.equals("dblookup")) {
76
				// Dblookup fun = new Dblookup();
77
				// fun.setDbConnector(databaseConnector);
78
				// try {
79
				// log.debug("preprocessingMap value: " + preprocMap.get(function));
80
				// TransformationFunctionProxy.getInstance().setLookupRecord(fun.getResults(preprocMap.get(function)));
81
				// } catch (Exception e) {
82
				// log.debug(e.getMessage());
83
				// throw new IllegalStateException(e);
84
				// }
85
				// }
86
				if (function.equals("blacklist")) {
87
					final BlacklistConsumer bc = new BlacklistConsumer();
88
					try {
89
						blacklistedRecords = bc.getBlackList(blacklistApi.getURL() + dataSourceId);
90
					} catch (final Exception e) {
91
						throw new IllegalStateException("error in preprocess: " + e.getMessage());
92
					}
93
				}
94
			}
95
		}
96
		log.debug("preprocessing done.");
97
	}
98

    
99
	/**
100
	 * check if blacklistedRecords exist and if so check if the current record is blacklisted by its objIdentifier
101
	 *
102
	 * @param aRecord
103
	 * @return
104
	 * @throws XPathExpressionException
105
	 * @throws ProcessingException
106
	 */
107
	private boolean isBlacklistRecord(final String aRecord) {
108
		if (blacklistedRecords.size() == 0) { return false; }
109
		final XPath xpath = XPathFactory.newInstance().newXPath();
110
		try {
111
			final Node root = (Node) xpath.evaluate("/", new InputSource(new StringReader(aRecord)), XPathConstants.NODE);
112
			final String objId = xpath.evaluate("//*[local-name()='objIdentifier']", root);
113
			if (blacklistedRecords.contains(objId)) { return true; }
114
		} catch (final Exception e) {
115
			throw new IllegalStateException("error in isBlacklistRecord: " + e.getMessage());
116
		}
117
		return false;
118
	}
119

    
120
	/**
121
	 * transforms a source record
122
	 *
123
	 * @param sourceRecord
124
	 *            the record to transform
125
	 * @return transformed record
126
	 */
127
	public String transform(final String sourceRecord) {
128
		final List<String> objectRecords = new LinkedList<>();
129
		objectRecords.add(sourceRecord);
130
		int index = 0;
131
		mdRecords.clear();
132
		initTransformationFunction();
133

    
134
		if (!stylesheetParamsCalculated) {
135
			try {
136
				calculateStylesheetParams(sourceRecord);
137
			} catch (final Exception e) {
138
				throw new IllegalStateException("error in calculateStylesheetParams" + e.getMessage(), e);
139
			}
140
		}
141

    
142
		if (!preprocessingDone) {
143
			// xpath sourceRecord dataSourceid
144
			preprocess(stylesheetParams.get("varBlacklistDataSourceId"));
145
			preprocessingDone = true;
146
		}
147

    
148
		if (isBlacklistRecord(sourceRecord)) {
149
			try {
150
				mdRecords.add(transformation.transformRecord(sourceRecord, ITransformation.XSLSyntaxcheckfailed));
151
			} catch (final Exception e) {
152
				log.fatal(sourceRecord);
153
				throw new IllegalStateException(e);
154
			}
155
		} else if (!transformation.getRuleLanguageParser().isXslStylesheet()) {
156
			// iterate over all rules which are functionCalls
157
			log.debug("functionCalls size: " + transformation.getRuleLanguageParser().getFunctionCalls().size());
158
			for (final FunctionCall functionCall : transformation.getRuleLanguageParser().getFunctionCalls()) {
159
				preprocess(objectRecords, functionCall);
160
			}
161
			for (final String record : objectRecords) {
162
				// log.debug(record);
163
				try {
164
					log.debug("now run transformation for record with index: " + index);
165
					try {
166
						final String transformedRecord = transformation.transformRecord(record, index);
167
						mdRecords.add(transformedRecord);
168
					} catch (final TerminationException e) {
169
						log.debug("record transformation terminated.");
170
						final String failedRecord = transformation.transformRecord(record, ITransformation.XSLSyntaxcheckfailed);
171
						log.debug(failedRecord);
172
						totalIgnoredRecords++;
173
						mdRecords.add(failedRecord);
174
					}
175
				} catch (final TransformationException e) {
176
					log.error(sourceRecord);
177
					throw new IllegalStateException(e);
178
				}
179
				index++;
180
			}
181
		} else {
182
			for (final String record : objectRecords) {
183
				// test for init params and assign values
184
				try {
185
					log.debug("now run transformation for record with index: " + index);
186
					try {
187
						final String transformedRecord = transformation.transformRecord(record, stylesheetParams);
188
						mdRecords.add(transformedRecord);
189
					} catch (final TerminationException e) {
190
						final String failedRecord = transformation.transformRecord(record, ITransformation.XSLSyntaxcheckfailed);
191
						totalIgnoredRecords++;
192
						log.debug(failedRecord);
193
						mdRecords.add(failedRecord);
194
					}
195
				} catch (final TransformationException e) {
196
					log.error(sourceRecord);
197
					throw new IllegalStateException(e);
198
				}
199
				index++;
200
			}
201
		}
202

    
203
		totalTransformedRecords = totalTransformedRecords + mdRecords.size();
204
		log.debug("objRecordSize: " + objectRecords.size() + ", mdRecordSize: " + mdRecords.size() + ", ignoredRecordSize: " + totalIgnoredRecords);
205
		return mdRecords.get(0);
206
	}
207

    
208
	private void calculateStylesheetParams(final String aRecord) throws XPathExpressionException, ProcessingException, DsmException {
209
		stylesheetParamsCalculated = true;
210
		final XPath xpath = XPathFactory.newInstance().newXPath();
211
		final Node root = (Node) xpath.evaluate("/", new InputSource(new StringReader(aRecord)), XPathConstants.NODE);
212

    
213
		final String dsId = xpath.evaluate("//*[local-name()='repositoryId']", root);
214
		final String nsPrefix = xpath.evaluate("//*[local-name()='datasourceprefix']", root);
215

    
216
		final Datasource<?, ?, ?> ds;
217
		if (StringUtils.isNotBlank(nsPrefix)) {
218
			ds = dsManager.getDsByNsPrefix(nsPrefix);
219
		} else if (StringUtils.isNotBlank(dsId)) { 
220
			ds = dsManager.getDs(dsId);
221
		} else {
222
			ds = null;
223
		}
224

    
225
		if (ds != null) {
226
			stylesheetParams.put("varOfficialName", ds.getOfficialname());
227
			stylesheetParams.put("varDataSourceId", ds.getId());
228
			stylesheetParams.put("varDsType", ds.getEoscDatasourceType());
229
		}
230
		// if blacklist
231
		for (final Map<String, String> preprocMap : this.transformation.getRuleLanguageParser().getPreprocessings()) {
232
			final Iterator<String> it = preprocMap.keySet().iterator();
233
			while (it.hasNext()) {
234
				final String function = it.next();
235
				if (function.equals("blacklist")) {
236
					// TODO
237
					// stylesheetParams.put("varBlacklistDataSourceId", varBlacklistDataSourceId);
238
				}
239
			}
240
		}
241
	}
242

    
243
	private void initTransformationFunction() {
244
		if (this.vocabularyRegistry == null) { throw new IllegalStateException("vocabularyReg is null"); }
245
		final Convert convertFunction = new Convert();
246
		convertFunction.setVocabularyRegistry(this.vocabularyRegistry);
247
		TransformationFunctionProxy.getInstance().setConvertFunction(convertFunction);
248

    
249
	}
250

    
251
	/**
252
	 * preprocesses function if function is configured resp.
253
	 *
254
	 * @param records
255
	 *            list of object records
256
	 * @param aFunctionCall
257
	 */
258
	private void preprocess(final List<String> records, final FunctionCall aFunctionCall) {
259
		try {
260
			log.debug("preprocess");
261
			if (transformation.getRuleLanguageParser() == null) { throw new IllegalStateException("rulelanguageparser not initialised"); }
262
			if (transformation.getRuleLanguageParser().getNamespaceDeclarations() == null) { throw new IllegalStateException("nsDecl is null"); }
263
			final PreProcessor preProc = new PreProcessor();
264
			preProc.setConvertFunction(TransformationFunctionProxy.getInstance().getConvertFunction());
265
			final RetrieveValue retrieveValue = new RetrieveValue();
266
			retrieveValue.setResourceDao(resourceDao);
267
			retrieveValue.setDsManager(dsManager);
268
			preProc.setRetrieveFunction(retrieveValue);
269
			final RegularExpression regExpr = new RegularExpression();
270
			preProc.setRegExprFunction(regExpr);
271
			final TransformationFunctionProxy functionProxy = TransformationFunctionProxy.getInstance();
272
			preProc.setFunctionProxy(functionProxy);
273
			final Extract extractFunction = new Extract();
274
			extractFunction.setFeatureExtraction(featureExtraction);
275
			preProc.setExtractFunction(extractFunction);
276
			if (aFunctionCall.doPreprocess() || aFunctionCall.isStatic()) {
277
				// log.debug("now call preprocess with: " + aFunctionCall.getExternalFunctionName() + " " + aFunctionCall.getUuid());
278
				preProc.preprocess(aFunctionCall, records, transformation.getRuleLanguageParser().getNamespaceDeclarations(), transformation
279
					.getStaticTransformationResults(), transformation.getJobProperties(), transformation.getRuleLanguageParser().getVariableMappingRules());
280
				// log.debug("preprocess end");
281
			} else {
282
				log.debug("skip preprocessing for function: " + aFunctionCall.getExternalFunctionName());
283
			}
284

    
285
		} catch (final Exception e) {
286
			throw new IllegalStateException(e);
287
		}
288

    
289
	}
290

    
291
	/**
292
	 * @param transformation
293
	 *            the transformation to set
294
	 */
295
	public void setTransformation(final ITransformation transformation) {
296
		this.transformation = transformation;
297
	}
298

    
299
	/**
300
	 * @return the transformation
301
	 */
302
	public ITransformation getTransformation() {
303
		return transformation;
304
	}
305

    
306
	/**
307
	 * @param vocabularyRegistry
308
	 *            the vocabularyRegistry to set
309
	 */
310
	public void setVocabularyRegistry(final VocabularyRegistry vocabularyRegistry) {
311
		this.vocabularyRegistry = vocabularyRegistry;
312
	}
313

    
314
	/**
315
	 * @return the vocabularyRegistry
316
	 */
317
	public VocabularyRegistry getVocabularyRegistry() {
318
		return vocabularyRegistry;
319
	}
320

    
321
	/**
322
	 * @return the resourceDao
323
	 */
324
	public ResourceDao getResourceDao() {
325
		return resourceDao;
326
	}
327

    
328
	/**
329
	 * @param resourceDao
330
	 *            the resourceDao to set
331
	 */
332
	public void setResourceDao(final ResourceDao resourceDao) {
333
		this.resourceDao = resourceDao;
334
	}
335

    
336
	/**
337
	 * @param featureExtraction
338
	 *            the featureExtraction to set
339
	 */
340
	public void setFeatureExtraction(final IFeatureExtraction featureExtraction) {
341
		this.featureExtraction = featureExtraction;
342
	}
343

    
344
	/**
345
	 * @return the featureExtraction
346
	 */
347
	public IFeatureExtraction getFeatureExtraction() {
348
		return featureExtraction;
349
	}
350

    
351
	/**
352
	 * @return the databaseConnector
353
	 */
354
	public IDatabaseConnector getDatabaseConnector() {
355
		return databaseConnector;
356
	}
357

    
358
	/**
359
	 * @param databaseConnector
360
	 *            the databaseConnector to set
361
	 */
362
	public void setDatabaseConnector(final IDatabaseConnector databaseConnector) {
363
		this.databaseConnector = databaseConnector;
364
	}
365

    
366
	public long getTotalTransformedRecords() {
367
		return this.totalTransformedRecords;
368
	}
369

    
370
	public long getTotalIgnoredRecords() {
371
		return this.totalIgnoredRecords;
372
	}
373

    
374
	/**
375
	 * @return the mappingFile
376
	 */
377
	public String getMappingFile() {
378
		return mappingFile;
379
	}
380

    
381
	/**
382
	 * @param mappingFile
383
	 *            the mappingFile to set
384
	 */
385
	public void setMappingFile(final String mappingFile) {
386
		this.mappingFile = mappingFile;
387
	}
388

    
389
	public Resource getBlacklistApi() {
390
		return blacklistApi;
391
	}
392

    
393
	public void setBlacklistApi(final Resource blacklistApi) {
394
		this.blacklistApi = blacklistApi;
395
	}
396

    
397
	public LocalDatasourceManager<Datasource<?, ?, ?>, Api<?>> getDsManager() {
398
		return dsManager;
399
	}
400

    
401
	public void setDsManager(final LocalDatasourceManager<Datasource<?, ?, ?>, Api<?>> dsManager) {
402
		this.dsManager = dsManager;
403
	}
404
}
(3-3/3)