Project

General

Profile

1
package eu.dnetlib.data.collective.transformation.engine;
2

    
3
import java.io.StringReader;
4
import java.util.HashMap;
5
import java.util.Iterator;
6
import java.util.LinkedHashMap;
7
import java.util.LinkedList;
8
import java.util.List;
9
import java.util.Map;
10

    
11
import javax.xml.xpath.XPath;
12
import javax.xml.xpath.XPathConstants;
13
import javax.xml.xpath.XPathExpressionException;
14
import javax.xml.xpath.XPathFactory;
15

    
16
import net.sf.saxon.instruct.TerminationException;
17

    
18
import org.apache.commons.logging.Log;
19
import org.apache.commons.logging.LogFactory;
20
import org.springframework.core.io.Resource;
21
import org.w3c.dom.Node;
22
import org.xml.sax.InputSource;
23

    
24
import eu.dnetlib.common.profile.ResourceDao;
25
import eu.dnetlib.data.collective.transformation.IDatabaseConnector;
26
import eu.dnetlib.data.collective.transformation.TransformationException;
27
import eu.dnetlib.data.collective.transformation.VocabularyRegistry;
28
import eu.dnetlib.data.collective.transformation.core.xsl.ext.TransformationFunctionProxy;
29
import eu.dnetlib.data.collective.transformation.engine.core.ITransformation;
30
import eu.dnetlib.data.collective.transformation.engine.functions.Convert;
31
// import eu.dnetlib.data.collective.transformation.engine.functions.Dblookup;
32
import eu.dnetlib.data.collective.transformation.engine.functions.Extract;
33
import eu.dnetlib.data.collective.transformation.engine.functions.IFeatureExtraction;
34
import eu.dnetlib.data.collective.transformation.engine.functions.ProcessingException;
35
import eu.dnetlib.data.collective.transformation.engine.functions.RegularExpression;
36
import eu.dnetlib.data.collective.transformation.engine.functions.RetrieveValue;
37
import eu.dnetlib.data.collective.transformation.engine.functions.RetrieveValue.FUNCTION;
38
import eu.dnetlib.data.collective.transformation.rulelanguage.Argument;
39
import eu.dnetlib.data.collective.transformation.rulelanguage.Argument.Type;
40
import eu.dnetlib.data.collective.transformation.rulelanguage.util.FunctionCall;
41
import eu.dnetlib.data.collective.transformation.utils.BlacklistConsumer;
42

    
43
/**
44
 * @author jochen
45
 *
46
 */
47
public class SimpleTransformationEngine {
48

    
49
	private static Log log = LogFactory.getLog(SimpleTransformationEngine.class);
50
	private ITransformation transformation;
51
	private VocabularyRegistry vocabularyRegistry;
52
	private IDatabaseConnector databaseConnector;
53
	private ResourceDao resourceDao;
54
	private IFeatureExtraction featureExtraction;
55
	private final List<String> mdRecords = new LinkedList<String>();
56
	private long totalTransformedRecords = 0;
57
	private long totalIgnoredRecords = 0;
58
	private String mappingFile;
59
	private boolean stylesheetParamsCalculated = false;
60
	private boolean preprocessingDone = false;
61
	private Map<String, String> stylesheetParams = new LinkedHashMap<String, String>();
62
	private Resource blacklistApi;
63
	private List<String> blacklistedRecords = new LinkedList<String>();
64

    
65

    
66
	/**
67
	 * execute any preprocessings declared in the transformation script prior starting the transformation of records
68
	 */
69
	public void preprocess(String dataSourceId) {
70
		for (Map<String, String> preprocMap : this.transformation.getRuleLanguageParser().getPreprocessings()) {
71
			Iterator<String> it = preprocMap.keySet().iterator();
72
			while (it.hasNext()) {
73
				String function = it.next();
74
//				if (function.equals("dblookup")) {
75
//					Dblookup fun = new Dblookup();
76
//					fun.setDbConnector(databaseConnector);
77
//					try {
78
//						log.debug("preprocessingMap value: " + preprocMap.get(function));
79
//						TransformationFunctionProxy.getInstance().setLookupRecord(fun.getResults(preprocMap.get(function)));
80
//					} catch (Exception e) {
81
//						log.debug(e.getMessage());
82
//						throw new IllegalStateException(e);
83
//					}
84
//				}
85
				if (function.equals("blacklist")) {
86
					BlacklistConsumer bc = new BlacklistConsumer();
87
					try{
88
						blacklistedRecords = bc.getBlackList(blacklistApi.getURL() + dataSourceId);						
89
					}catch(Exception e){
90
						throw new IllegalStateException("error in preprocess: " + e.getMessage());
91
					}
92
				}
93
			}
94
		}
95
		log.debug("preprocessing done.");
96
	}
97

    
98
	/**
99
	 * check if blacklistedRecords exist and if so check if the current record is blacklisted by its objIdentifier
100
	 * @param aRecord
101
	 * @return
102
	 * @throws XPathExpressionException
103
	 * @throws ProcessingException
104
	 */
105
	private boolean isBlacklistRecord(String aRecord){
106
		if (blacklistedRecords.size() == 0) return false;		
107
		XPath xpath = XPathFactory.newInstance().newXPath();
108
		try{
109
			Node root = (Node) xpath.evaluate("/", new InputSource(new StringReader(aRecord)), XPathConstants.NODE);
110
			String objId = xpath.evaluate("//*[local-name()='objIdentifier']", root);
111
			if (blacklistedRecords.contains(objId)) return true;			
112
		}catch(Exception e){
113
			throw new IllegalStateException("error in isBlacklistRecord: " + e.getMessage());
114
		}		
115
		return false;
116
	}
117
	
118
	/**
119
	 * transforms a source record
120
	 *
121
	 * @param sourceRecord
122
	 *            the record to transform
123
	 * @return transformed record
124
	 */
125
	public String transform(final String sourceRecord) {
126
		List<String> objectRecords = new LinkedList<String>();
127
		objectRecords.add(sourceRecord);
128
		int index = 0;
129
		mdRecords.clear();
130
		initTransformationFunction();
131

    
132
		if (!stylesheetParamsCalculated) {
133
			try{
134
				calculateStylesheetParams(sourceRecord);
135
			}catch(Exception e){
136
				throw new IllegalStateException("error in calculateStylesheetParams" + e.getMessage());
137
			}
138
		}
139
		
140
		if (!preprocessingDone){
141
			// xpath sourceRecord dataSourceid
142
			preprocess(stylesheetParams.get("varBlacklistDataSourceId"));
143
			preprocessingDone = true;
144
		}
145
		
146
		if (isBlacklistRecord(sourceRecord)){
147
			try{
148
				mdRecords.add(transformation.transformRecord(sourceRecord, ITransformation.XSLSyntaxcheckfailed));
149
			}catch(Exception e){
150
				log.fatal(sourceRecord);
151
				throw new IllegalStateException(e);				
152
			}
153
		}else if (!transformation.getRuleLanguageParser().isXslStylesheet()) {
154
			// iterate over all rules which are functionCalls
155
			log.debug("functionCalls size: " + transformation.getRuleLanguageParser().getFunctionCalls().size());
156
			for (FunctionCall functionCall : transformation.getRuleLanguageParser().getFunctionCalls()) {
157
				preprocess(objectRecords, functionCall);
158
			}
159
			for (String record : objectRecords) {
160
				// log.debug(record);
161
				try {
162
					log.debug("now run transformation for record with index: " + index);
163
					try{
164
						String transformedRecord = transformation.transformRecord(record, index);
165
						mdRecords.add(transformedRecord);
166
					} catch (TerminationException e){
167
						log.debug("record transformation terminated.");
168
						String failedRecord = transformation.transformRecord(record, ITransformation.XSLSyntaxcheckfailed);
169
						log.debug(failedRecord);
170
						totalIgnoredRecords++;
171
						mdRecords.add(failedRecord);
172
					}
173
				} catch (TransformationException e) {
174
					log.error(sourceRecord);
175
					throw new IllegalStateException(e);
176
				}
177
				index++;
178
			}
179
		} else {
180
			for (String record : objectRecords) {
181
				// test for init params and assign values
182
				try {
183
					log.debug("now run transformation for record with index: " + index);
184
					try{
185
						String transformedRecord = transformation.transformRecord(record, stylesheetParams);
186
						mdRecords.add(transformedRecord);
187
					}catch(TerminationException e){
188
						String failedRecord = transformation.transformRecord(record, ITransformation.XSLSyntaxcheckfailed);
189
						totalIgnoredRecords++;
190
						log.debug(failedRecord);
191
						mdRecords.add(failedRecord);
192
					}
193
				} catch (TransformationException e) {
194
					log.error(sourceRecord);
195
					throw new IllegalStateException(e);
196
				}
197
				index++;
198
			}
199
		}
200

    
201
		totalTransformedRecords = totalTransformedRecords + mdRecords.size();
202
		log.debug("objRecordSize: " + objectRecords.size() + ", mdRecordSize: " + mdRecords.size() + ", ignoredRecordSize: " + totalIgnoredRecords);
203
		return mdRecords.get(0);
204
	}
205

    
206
	private void calculateStylesheetParams(final String aRecord) throws XPathExpressionException, ProcessingException {
207
		stylesheetParamsCalculated = true;
208
		XPath xpath = XPathFactory.newInstance().newXPath();
209
		Node root = (Node) xpath.evaluate("/", new InputSource(new StringReader(aRecord)), XPathConstants.NODE);
210
		String datasourcePrefix = xpath.evaluate("//*[local-name()='datasourceprefix']", root);
211
		String profileXquery = "collection('/db/DRIVER/RepositoryServiceResources')//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=\"NamespacePrefix\"][value=\"" + datasourcePrefix + "\"]]";
212
		//String repositoryId = xpath.evaluate("//*[local-name()='repositoryId']", root);
213
		log.debug("profileXquery: " + profileXquery);
214
		// static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId",
215
		// xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
216
		RetrieveValue retrieveValue = new RetrieveValue();
217
		retrieveValue.setResourceDao(resourceDao);
218
		List<Argument> argList = new LinkedList<Argument>();
219
		argList.add(new Argument(Type.VALUE, profileXquery));
220
		Argument argXpath = new Argument(Type.INPUTFIELD, "//OFFICIAL_NAME");
221
		argList.add(argXpath);
222
		String varOfficialName = retrieveValue.executeSingleValue(FUNCTION.PROFILEFIELD.toString(), argList, null, new HashMap<String, String>());
223
		stylesheetParams.put("varOfficialName", varOfficialName);
224
		argList.remove(argXpath);
225
		argXpath = new Argument(Type.INPUTFIELD, "//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value");
226
		argList.add(argXpath);
227
		String varDataSourceId = retrieveValue.executeSingleValue(FUNCTION.PROFILEFIELD.toString(), argList, null, new HashMap<String, String>());
228
		stylesheetParams.put("varDataSourceId", varDataSourceId);
229
		argList.remove(argXpath);
230
		argXpath = new Argument(Type.INPUTFIELD, "//CONFIGURATION/DATASOURCE_TYPE");
231
		argList.add(argXpath);
232
		String varDsType = retrieveValue.executeSingleValue(FUNCTION.PROFILEFIELD.toString(), argList, null, new HashMap<String, String>());
233
		stylesheetParams.put("varDsType", varDsType);
234
		argList.remove(argXpath);
235
		
236
		// if blacklist
237
		for (Map<String, String> preprocMap : this.transformation.getRuleLanguageParser().getPreprocessings()) {
238
			Iterator<String> it = preprocMap.keySet().iterator();
239
			while (it.hasNext()) {	
240
				String function = it.next();
241
				if (function.equals("blacklist")) {
242
					argXpath = new Argument(Type.INPUTFIELD, preprocMap.get(function)); // blacklistDataSourceIdXpath
243
					argList.add(argXpath);
244
					String varBlacklistDataSourceId = retrieveValue.executeSingleValue(FUNCTION.PROFILEFIELD.toString(), argList, null, new HashMap<String, String>());
245
					stylesheetParams.put("varBlacklistDataSourceId", varBlacklistDataSourceId);					
246
					argList.remove(argXpath);
247
				}
248
			}
249
		}
250
	}
251

    
252
	private void initTransformationFunction() {
253
		if (this.vocabularyRegistry == null) { throw new IllegalStateException("vocabularyReg is null"); }
254
		Convert convertFunction = new Convert();
255
		convertFunction.setVocabularyRegistry(this.vocabularyRegistry);
256
		TransformationFunctionProxy.getInstance().setConvertFunction(convertFunction);
257

    
258
	}
259

    
260
	/**
261
	 * preprocesses function if function is configured resp.
262
	 *
263
	 * @param records
264
	 *            list of object records
265
	 * @param aFunctionCall
266
	 */
267
	private void preprocess(final List<String> records, final FunctionCall aFunctionCall) {
268
		try {
269
			log.debug("preprocess");
270
			if (transformation.getRuleLanguageParser() == null) { throw new IllegalStateException("rulelanguageparser not initialised"); }
271
			if (transformation.getRuleLanguageParser().getNamespaceDeclarations() == null) { throw new IllegalStateException("nsDecl is null"); }
272
			PreProcessor preProc = new PreProcessor();
273
			preProc.setConvertFunction(TransformationFunctionProxy.getInstance().getConvertFunction());
274
			RetrieveValue retrieveValue = new RetrieveValue();
275
			retrieveValue.setResourceDao(resourceDao);
276
			preProc.setRetrieveFunction(retrieveValue);
277
			RegularExpression regExpr = new RegularExpression();
278
			preProc.setRegExprFunction(regExpr);
279
			TransformationFunctionProxy functionProxy = TransformationFunctionProxy.getInstance();
280
			preProc.setFunctionProxy(functionProxy);
281
			Extract extractFunction = new Extract();
282
			extractFunction.setFeatureExtraction(featureExtraction);
283
			preProc.setExtractFunction(extractFunction);
284
			if (aFunctionCall.doPreprocess() || aFunctionCall.isStatic()) {
285
				// log.debug("now call preprocess with: " + aFunctionCall.getExternalFunctionName() + " " + aFunctionCall.getUuid());
286
				preProc.preprocess(
287
						aFunctionCall,
288
						records,
289
						transformation.getRuleLanguageParser().getNamespaceDeclarations(),
290
						transformation.getStaticTransformationResults(),
291
						transformation.getJobProperties(),
292
						transformation.getRuleLanguageParser().getVariableMappingRules());
293
				// log.debug("preprocess end");
294
			} else {
295
				log.debug("skip preprocessing for function: " + aFunctionCall.getExternalFunctionName());
296
			}
297

    
298
		} catch (Exception e) {
299
			throw new IllegalStateException(e);
300
		}
301

    
302
	}
303

    
304
	/**
305
	 * @param transformation
306
	 *            the transformation to set
307
	 */
308
	public void setTransformation(final ITransformation transformation) {
309
		this.transformation = transformation;
310
	}
311

    
312
	/**
313
	 * @return the transformation
314
	 */
315
	public ITransformation getTransformation() {
316
		return transformation;
317
	}
318

    
319
	/**
320
	 * @param vocabularyRegistry
321
	 *            the vocabularyRegistry to set
322
	 */
323
	public void setVocabularyRegistry(final VocabularyRegistry vocabularyRegistry) {
324
		this.vocabularyRegistry = vocabularyRegistry;
325
	}
326

    
327
	/**
328
	 * @return the vocabularyRegistry
329
	 */
330
	public VocabularyRegistry getVocabularyRegistry() {
331
		return vocabularyRegistry;
332
	}
333

    
334
	/**
335
	 * @return the resourceDao
336
	 */
337
	public ResourceDao getResourceDao() {
338
		return resourceDao;
339
	}
340

    
341
	/**
342
	 * @param resourceDao
343
	 *            the resourceDao to set
344
	 */
345
	public void setResourceDao(final ResourceDao resourceDao) {
346
		this.resourceDao = resourceDao;
347
	}
348

    
349
	/**
350
	 * @param featureExtraction
351
	 *            the featureExtraction to set
352
	 */
353
	public void setFeatureExtraction(final IFeatureExtraction featureExtraction) {
354
		this.featureExtraction = featureExtraction;
355
	}
356

    
357
	/**
358
	 * @return the featureExtraction
359
	 */
360
	public IFeatureExtraction getFeatureExtraction() {
361
		return featureExtraction;
362
	}
363

    
364
	/**
365
	 * @return the databaseConnector
366
	 */
367
	public IDatabaseConnector getDatabaseConnector() {
368
		return databaseConnector;
369
	}
370

    
371
	/**
372
	 * @param databaseConnector
373
	 *            the databaseConnector to set
374
	 */
375
	public void setDatabaseConnector(final IDatabaseConnector databaseConnector) {
376
		this.databaseConnector = databaseConnector;
377
	}
378

    
379
	public long getTotalTransformedRecords() {
380
		return this.totalTransformedRecords;
381
	}
382

    
383
	public long getTotalIgnoredRecords() {
384
		return this.totalIgnoredRecords;
385
	}
386

    
387
	/**
388
	 * @return the mappingFile
389
	 */
390
	public String getMappingFile() {
391
		return mappingFile;
392
	}
393

    
394
	/**
395
	 * @param mappingFile
396
	 *            the mappingFile to set
397
	 */
398
	public void setMappingFile(final String mappingFile) {
399
		this.mappingFile = mappingFile;
400
	}
401

    
402
	public Resource getBlacklistApi() {
403
		return blacklistApi;
404
	}
405

    
406
	public void setBlacklistApi(Resource blacklistApi) {
407
		this.blacklistApi = blacklistApi;
408
	}
409
}
(3-3/3)