Project

General

Profile

« Previous | Next » 

Revision 53622

[maven-release-plugin] copy for tag unibi-data-collective-transformation-common-2.2.7

View differences:

modules/unibi-data-collective-transformation-common/tags/unibi-data-collective-transformation-common-2.2.7/pom.xml
1
<?xml version="1.0" ?>
2
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3
	<parent>
4
		<groupId>eu.dnetlib</groupId>
5
		<artifactId>dnet45-parent</artifactId>
6
		<version>1.0.0</version>
7
	</parent>
8
	<modelVersion>4.0.0</modelVersion>
9
	<groupId>eu.dnetlib</groupId>
10
	<artifactId>unibi-data-collective-transformation-common</artifactId>
11
	<packaging>jar</packaging>
12
	<version>2.2.7</version>
13
	<scm>
14
	  <developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/unibi-data-collective-transformation-common/tags/unibi-data-collective-transformation-common-2.2.7</developerConnection>
15
	</scm>
16
	<dependencies>
17
		<dependency>
18
			<groupId>commons-beanutils</groupId>
19
			<artifactId>commons-beanutils</artifactId>
20
			<version>1.8.3</version>
21
		</dependency>
22
		<dependency>
23
			<groupId>org.apache.commons</groupId>
24
			<artifactId>commons-text</artifactId>
25
			<version>1.3</version>
26
		</dependency>
27
		<dependency>
28
			<groupId>junit</groupId>
29
			<artifactId>junit</artifactId>
30
			<version>${junit.version}</version>
31
			<scope>test</scope>
32
		</dependency>
33
		<dependency>
34
			<groupId>org.mockito</groupId>
35
			<artifactId>mockito-core</artifactId>
36
			<version>${mockito.version}</version>
37
			<scope>test</scope>
38
		</dependency>
39
		<dependency>
40
			<groupId>org.svenson</groupId>
41
			<artifactId>svenson-json</artifactId>
42
			<version>[1.4.0,1.5.0)</version>
43
		</dependency>
44
		<dependency>
45
			<groupId>org.slf4j</groupId>
46
			<artifactId>slf4j-api</artifactId>
47
			<version>1.6.4</version>
48
			<scope>compile</scope>
49
		</dependency>
50
		<dependency>
51
			<groupId>org.slf4j</groupId>
52
			<artifactId>slf4j-log4j12</artifactId>
53
			<version>1.6.4</version>
54
			<scope>runtime</scope>
55
		</dependency>
56
		<dependency>
57
			<groupId>com.google.guava</groupId>
58
			<artifactId>guava</artifactId>
59
			<version>${google.guava.version}</version>
60
		</dependency>
61
		<dependency>
62
			<groupId>eu.dnetlib</groupId>
63
			<artifactId>cnr-service-common</artifactId>
64
			<version>[2.1.0,3.0.0)</version>
65
		</dependency>
66
		<dependency>
67
			<groupId>com.sun.xsom</groupId>
68
			<artifactId>xsom</artifactId>
69
			<version>20110809</version>
70
		</dependency>		
71
	</dependencies>
72
</project>
modules/unibi-data-collective-transformation-common/tags/unibi-data-collective-transformation-common-2.2.7/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/LookupRecord.java
1
/**
2
 * 
3
 */
4
package eu.dnetlib.data.collective.transformation.engine.functions;
5

  
6
import java.util.HashMap;
7
import java.util.LinkedHashMap;
8
import java.util.Properties;
9

  
10
/**
11
 * @author jochen
12
 *
13
 */
14
public class LookupRecord {
15

  
16
	private HashMap<String, Properties> recordMap = new LinkedHashMap<String, Properties>();
17
	
18
	public void setRecord(String aRecordKey, String aPropertyKey, String aPropertyValue){
19
		if (recordMap.containsKey(aRecordKey)){
20
			recordMap.get(aRecordKey).setProperty(aPropertyKey, aPropertyValue);
21
		}else{
22
			Properties p = new Properties();
23
			p.setProperty(aPropertyKey, aPropertyValue);
24
			recordMap.put(aRecordKey, p);
25
		}
26
	}
27
	
28
	public String getPropertyValue(String aRecordKey, String aPropertyKey){
29
		if (!recordMap.containsKey(aRecordKey))	return "UNKNOWN";
30
		return recordMap.get(aRecordKey).getProperty(aPropertyKey, "UNKNOWN");
31
	}
32
	
33
}
modules/unibi-data-collective-transformation-common/tags/unibi-data-collective-transformation-common-2.2.7/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/DateVocabulary.java
1
/**
2
 * 
3
 */
4
package eu.dnetlib.data.collective.transformation.engine.functions;
5

  
6
import java.text.ParseException;
7
import java.text.SimpleDateFormat;
8
import java.util.Arrays;
9
import java.util.Date;
10
//import java.util.HashMap;
11
import java.util.LinkedList;
12
import java.util.List;
13
//import java.util.Map;
14

  
15

  
16

  
17

  
18
import org.apache.oro.text.perl.Perl5Util;
19

  
20

  
21
/**
22
 * @author jochen
23
 *
24
 */
25
public class DateVocabulary extends Vocabulary{
26

  
27
	private static final String filterFuncMin = "min()";
28
	private String pattern_1 = "/^(\\d{4,4}-\\d{1,2}-\\d{1,2})/";
29
	private String pattern_2 = "/^(\\d{4,4}-\\d{1,2})$/";
30
	private String pattern_3 = "/^(\\d{4,4})$/";
31
	private String pattern_4 = "/^(\\d{1,2}.\\d{1,2}.\\d{4,4})$/";
32
	private SimpleDateFormat df;
33
	
34
	private transient Perl5Util perl5 = new Perl5Util();
35

  
36
	public String encoding(List<String> aKeys) throws ProcessingException{
37
		String tempKey_1 = null;
38
		String tempKey_2 = null;
39
		String tempKey_3 = null;
40
		String currentKey = null;
41
		String twoDigitFormat = String.format("%%0%dd", 2);
42

  
43
		try{
44
			for (String key: aKeys){
45
				key = key.trim();
46
				currentKey = key;
47
				if (perl5.match(pattern_1, key)){
48
					String[] dateSplitted = perl5.getMatch().toString().split("-");
49
					String dateNormalized = dateSplitted[0] + "-" + String.format(twoDigitFormat, Integer.parseInt(dateSplitted[1])) + "-" + String.format(twoDigitFormat, Integer.parseInt(dateSplitted[2]));
50
					return dateNormalized;
51
				}else if (perl5.match(pattern_2, key)){
52
					String[] dateSplitted = key.split("-");
53
					tempKey_1 = dateSplitted[0] + "-" + String.format(twoDigitFormat, Integer.parseInt(dateSplitted[1])) + "-01";
54
				}else if (perl5.match(pattern_3, key)){
55
					tempKey_2 = key + "-01-01";
56
				}else if (perl5.match(pattern_4, key)){
57
					String[] components = key.split("[\\-\\/\\.]");
58
					// ignore this key if it has less than 3 components
59
					if (components.length >= 3)
60
						tempKey_3 = components[2] + "-" + String.format(twoDigitFormat, Integer.parseInt(components[1])) + "-" + String.format(twoDigitFormat, Integer.parseInt(components[0]));					
61
				}
62
			}			
63
		}catch(Throwable e){
64
			throw new ProcessingException("Exception thrown in Datevocabulary (tried to match for value '" + currentKey + "'):", e);
65
		}
66
		if (tempKey_1 != null){
67
			return tempKey_1;
68
		}else if (tempKey_2 != null){
69
			return tempKey_2;
70
		}else if (tempKey_3 != null){
71
			return tempKey_3;
72
		}else{
73
			return "";			
74
		}
75
	}
76
	
77
	@Override
78
	public List<String> encoding(List<String> aKeys, String aDefaultPattern,
79
			String aFilterFunction) throws ProcessingException {
80
		List<String> evList = new LinkedList<String>();
81
		df = new SimpleDateFormat(aDefaultPattern);
82
		for (String v: aKeys){
83
			String ev = encoding(Arrays.asList(new String[]{v}));
84
			if (ev.length() > 0){
85
				try {
86
					if (aFilterFunction.trim().length() > 0 && !evList.isEmpty())
87
						evList.add( filter(df.parse(ev), df.parse(evList.remove(0)), aFilterFunction) );
88
					else
89
						evList.add(df.format(df.parse(ev)));
90
				} catch (ParseException e) {
91
					throw new ProcessingException("invalid date format: " + ev);
92
				}
93
			}
94
		}
95
		return evList;
96
	}
97
	
98
	private String filter(Date d1, Date d2, String filter) throws ProcessingException{
99
		if (filter.equals(filterFuncMin))
100
			if (d1.before(d2))
101
				return df.format(d1);
102
			else
103
				return df.format(d2);
104
		else
105
			throw new ProcessingException("unsupported filter function: " + filter);
106
	}
107
	
108
}
modules/unibi-data-collective-transformation-common/tags/unibi-data-collective-transformation-common-2.2.7/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IVocabulary.java
1
/**
2
 * 
3
 */
4
package eu.dnetlib.data.collective.transformation.engine.functions;
5

  
6
import java.util.List;
7

  
8
/**
9
 * @author jochen
10
 *
11
 */
12
public interface IVocabulary {
13

  
14
	/**
15
	 * return the encoding for a given list of values
16
	 * @param keys
17
	 * @return the encoding as string
18
	 * @throws ProcessingException
19
	 */
20
	public String encoding(List<String> keys) throws ProcessingException;
21
	
22
	/**
23
	 * return the encoding for a given list of values using a default pattern and applying a filter function
24
	 * @param aKeys
25
	 * @param aDefaultPattern
26
	 * @param aFilterFunction
27
	 * @return the list of encoded values
28
	 * @throws ProcessingException
29
	 */
30
	public List<String> encoding(List<String> aKeys, String aDefaultPattern, String aFilterFunction) throws ProcessingException;
31
}
modules/unibi-data-collective-transformation-common/tags/unibi-data-collective-transformation-common-2.2.7/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IdentifierExtract.java
1
package eu.dnetlib.data.collective.transformation.engine.functions;
2

  
3
import java.util.HashSet;
4
import java.util.LinkedList;
5
import java.util.List;
6
import java.util.Set;
7
import java.util.regex.Matcher;
8
import java.util.regex.Pattern;
9

  
10
import javax.xml.parsers.ParserConfigurationException;
11
import javax.xml.xpath.XPath;
12
import javax.xml.xpath.XPathConstants;
13
import javax.xml.xpath.XPathExpressionException;
14

  
15
import org.apache.commons.logging.Log;
16
import org.apache.commons.logging.LogFactory;
17
import org.w3c.dom.Document;
18
import org.w3c.dom.DocumentFragment;
19
import org.w3c.dom.Element;
20
import org.w3c.dom.Node;
21
import org.w3c.dom.NodeList;
22

  
23
public class IdentifierExtract extends AbstractTransformationFunction{
24
	public static final Log log = LogFactory.getLog(IdentifierExtract.class);
25
	public static final String paramXpathExprJson = "xpathExprJson";
26
	public static final String paramXpathExprInSource = "xpathExprInputSource";
27
	public static final String paramRegExpr = "regExpr";
28
	
29
	@Override
30
	String execute() throws ProcessingException {
31
		// TODO Auto-generated method stub
32
		return null;
33
	}
34

  
35
	/**
36
	 * extract content matched by a regular expression pattern from a given node and return matched content as a node-list
37
	 * @param aXpathExprList
38
	 * @param aInput
39
	 * @param aRegExpression
40
	 * @param aDocument
41
	 * @param aXpath
42
	 * @return nodeList
43
	 * @throws ProcessingException
44
	 */
45
	public NodeList extract(List<String> aXpathExprList, Node aInput,
46
			String aRegExpression, Document aDocument, XPath aXpath) throws ProcessingException {
47
		
48
		log.debug("xpathExprList: " + aXpathExprList);
49
		log.debug("regExpr: " + aRegExpression);
50
		Set<String> identifierSet = new HashSet<String>();
51
		
52
//		log.debug("node: length: " + aInput.getChildNodes().getLength());
53
		log.debug("regular expression : " + aRegExpression);
54
		Pattern p = Pattern.compile(aRegExpression);
55
		try {
56
			List<String> textList = extractText(aXpathExprList, aInput, aXpath);
57
			for (String text: textList){
58
				log.debug("text as input : " + text);
59
				Matcher m = p.matcher(text);
60
				while (m.find()){
61
					log.debug("extracted identifier: " + m.group());
62
					identifierSet.add(m.group());
63
				}
64
			}
65
			return toNodeList(identifierSet, aDocument);
66
		} catch (XPathExpressionException e) {
67
			e.printStackTrace();
68
			throw new ProcessingException(e);
69
		} catch (ParserConfigurationException e) {
70
			e.printStackTrace();
71
			throw new ProcessingException(e);
72
		}
73
	}
74
	
75
	/**
76
	 * create a list of nodes from a list of string values
77
	 * @param aValueSet, set of unique values
78
	 * @param aDocument
79
	 * @return nodeList
80
	 */
81
	private NodeList toNodeList(Set<String> aValueSet, Document aDocument){
82
		DocumentFragment dFrag = aDocument.createDocumentFragment();
83
		Element root = aDocument.createElement("root");
84
		dFrag.appendChild(root);
85
		for (String value: aValueSet){
86
			Element eVal = aDocument.createElement("value");
87
			eVal.setTextContent(value);
88
			root.appendChild(eVal);
89
		}
90
		return dFrag.getChildNodes();
91
	}
92
	
93
	/**
94
	 * extract text from a given node using a list of given xpath expressions
95
	 * @param aXpathExprList
96
	 * @param aInput
97
	 * @param aXpath
98
	 * @return list of strings
99
	 * @throws XPathExpressionException
100
	 * @throws ParserConfigurationException 
101
	 */
102
	private List<String> extractText(List<String> aXpathExprList, Node aInput, XPath aXpath) throws XPathExpressionException, ParserConfigurationException{
103
		
104
		List<String> resultList = new LinkedList<String>();
105
		for (String xpathExpr: aXpathExprList){
106
			NodeList nodeList = (NodeList)aXpath.evaluate(xpathExpr, aInput, XPathConstants.NODESET);
107
			log.debug("extract text: nodelist length: " + nodeList.getLength()); 
108
			for (int i = 0; i < nodeList.getLength(); i++){
109
				resultList.add(nodeList.item(i).getTextContent());
110
			}
111
		}
112
		return resultList;
113
	}
114
}
modules/unibi-data-collective-transformation-common/tags/unibi-data-collective-transformation-common-2.2.7/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/ProcessingException.java
1
/**
2
 * 
3
 */
4
package eu.dnetlib.data.collective.transformation.engine.functions;
5

  
6
/**
7
 * @author jochen
8
 *
9
 */
10
public class ProcessingException extends Exception {
11

  
12
	/**
13
	 * 
14
	 */
15
	private static final long serialVersionUID = -8648116731979859467L;
16

  
17
	/**
18
	 * 
19
	 */
20
	public ProcessingException() {
21
		super();
22
	}
23

  
24
	/**
25
	 * @param arg0
26
	 */
27
	public ProcessingException(String arg0) {
28
		super(arg0);
29
	}
30

  
31
	/**
32
	 * @param arg0
33
	 */
34
	public ProcessingException(Throwable arg0) {
35
		super(arg0);
36
	}
37

  
38
	/**
39
	 * @param arg0
40
	 * @param arg1
41
	 */
42
	public ProcessingException(String arg0, Throwable arg1) {
43
		super(arg0, arg1);
44
	}
45

  
46
}
modules/unibi-data-collective-transformation-common/tags/unibi-data-collective-transformation-common-2.2.7/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/PersonVocabulary.java
1
/**
2
 * 
3
 */
4
package eu.dnetlib.data.collective.transformation.engine.functions;
5

  
6
import java.util.List;
7

  
8
import prototype.Person;
9

  
10
/**
11
 * @author jochen
12
 *
13
 */
14
public class PersonVocabulary extends Vocabulary{
15

  
16
	@Override
17
	public String encoding(List<String> keys)throws ProcessingException{
18
		Person p;
19
		String result = "";
20
		for (String input: keys){
21
			p = new Person(input);
22
			result = p.getNormalisedFullname();
23
		}
24
		return result;
25
	}
26
}
modules/unibi-data-collective-transformation-common/tags/unibi-data-collective-transformation-common-2.2.7/src/main/java/eu/dnetlib/data/collective/transformation/engine/SimpleTransformationEngine.java
1
package eu.dnetlib.data.collective.transformation.engine;
2

  
3
import java.io.StringReader;
4
import java.util.HashMap;
5
import java.util.Iterator;
6
import java.util.LinkedHashMap;
7
import java.util.LinkedList;
8
import java.util.List;
9
import java.util.Map;
10

  
11
import javax.xml.xpath.XPath;
12
import javax.xml.xpath.XPathConstants;
13
import javax.xml.xpath.XPathExpressionException;
14
import javax.xml.xpath.XPathFactory;
15

  
16
import net.sf.saxon.instruct.TerminationException;
17

  
18
import org.apache.commons.logging.Log;
19
import org.apache.commons.logging.LogFactory;
20
import org.springframework.core.io.Resource;
21
import org.w3c.dom.Node;
22
import org.xml.sax.InputSource;
23

  
24
import eu.dnetlib.common.profile.ResourceDao;
25
import eu.dnetlib.data.collective.transformation.IDatabaseConnector;
26
import eu.dnetlib.data.collective.transformation.TransformationException;
27
import eu.dnetlib.data.collective.transformation.VocabularyRegistry;
28
import eu.dnetlib.data.collective.transformation.core.xsl.ext.TransformationFunctionProxy;
29
import eu.dnetlib.data.collective.transformation.engine.core.ITransformation;
30
import eu.dnetlib.data.collective.transformation.engine.functions.Convert;
31
// import eu.dnetlib.data.collective.transformation.engine.functions.Dblookup;
32
import eu.dnetlib.data.collective.transformation.engine.functions.Extract;
33
import eu.dnetlib.data.collective.transformation.engine.functions.IFeatureExtraction;
34
import eu.dnetlib.data.collective.transformation.engine.functions.ProcessingException;
35
import eu.dnetlib.data.collective.transformation.engine.functions.RegularExpression;
36
import eu.dnetlib.data.collective.transformation.engine.functions.RetrieveValue;
37
import eu.dnetlib.data.collective.transformation.engine.functions.RetrieveValue.FUNCTION;
38
import eu.dnetlib.data.collective.transformation.rulelanguage.Argument;
39
import eu.dnetlib.data.collective.transformation.rulelanguage.Argument.Type;
40
import eu.dnetlib.data.collective.transformation.rulelanguage.util.FunctionCall;
41
import eu.dnetlib.data.collective.transformation.utils.BlacklistConsumer;
42

  
43
/**
44
 * @author jochen
45
 *
46
 */
47
public class SimpleTransformationEngine {
48

  
49
	private static Log log = LogFactory.getLog(SimpleTransformationEngine.class);
50
	private ITransformation transformation;
51
	private VocabularyRegistry vocabularyRegistry;
52
	private IDatabaseConnector databaseConnector;
53
	private ResourceDao resourceDao;
54
	private IFeatureExtraction featureExtraction;
55
	private final List<String> mdRecords = new LinkedList<String>();
56
	private long totalTransformedRecords = 0;
57
	private long totalIgnoredRecords = 0;
58
	private String mappingFile;
59
	private boolean stylesheetParamsCalculated = false;
60
	private boolean preprocessingDone = false;
61
	private Map<String, String> stylesheetParams = new LinkedHashMap<String, String>();
62
	private Resource blacklistApi;
63
	private List<String> blacklistedRecords = new LinkedList<String>();
64

  
65

  
66
	/**
67
	 * execute any preprocessings declared in the transformation script prior starting the transformation of records
68
	 */
69
	public void preprocess(String dataSourceId) {
70
		for (Map<String, String> preprocMap : this.transformation.getRuleLanguageParser().getPreprocessings()) {
71
			Iterator<String> it = preprocMap.keySet().iterator();
72
			while (it.hasNext()) {
73
				String function = it.next();
74
//				if (function.equals("dblookup")) {
75
//					Dblookup fun = new Dblookup();
76
//					fun.setDbConnector(databaseConnector);
77
//					try {
78
//						log.debug("preprocessingMap value: " + preprocMap.get(function));
79
//						TransformationFunctionProxy.getInstance().setLookupRecord(fun.getResults(preprocMap.get(function)));
80
//					} catch (Exception e) {
81
//						log.debug(e.getMessage());
82
//						throw new IllegalStateException(e);
83
//					}
84
//				}
85
				if (function.equals("blacklist")) {
86
					BlacklistConsumer bc = new BlacklistConsumer();
87
					try{
88
						blacklistedRecords = bc.getBlackList(blacklistApi.getURL() + dataSourceId);						
89
					}catch(Exception e){
90
						throw new IllegalStateException("error in preprocess: " + e.getMessage());
91
					}
92
				}
93
			}
94
		}
95
		log.debug("preprocessing done.");
96
	}
97

  
98
	/**
99
	 * check if blacklistedRecords exist and if so check if the current record is blacklisted by its objIdentifier
100
	 * @param aRecord
101
	 * @return
102
	 * @throws XPathExpressionException
103
	 * @throws ProcessingException
104
	 */
105
	private boolean isBlacklistRecord(String aRecord){
106
		if (blacklistedRecords.size() == 0) return false;		
107
		XPath xpath = XPathFactory.newInstance().newXPath();
108
		try{
109
			Node root = (Node) xpath.evaluate("/", new InputSource(new StringReader(aRecord)), XPathConstants.NODE);
110
			String objId = xpath.evaluate("//*[local-name()='objIdentifier']", root);
111
			if (blacklistedRecords.contains(objId)) return true;			
112
		}catch(Exception e){
113
			throw new IllegalStateException("error in isBlacklistRecord: " + e.getMessage());
114
		}		
115
		return false;
116
	}
117
	
118
	/**
119
	 * transforms a source record
120
	 *
121
	 * @param sourceRecord
122
	 *            the record to transform
123
	 * @return transformed record
124
	 */
125
	public String transform(final String sourceRecord) {
126
		List<String> objectRecords = new LinkedList<String>();
127
		objectRecords.add(sourceRecord);
128
		int index = 0;
129
		mdRecords.clear();
130
		initTransformationFunction();
131

  
132
		if (!stylesheetParamsCalculated) {
133
			try{
134
				calculateStylesheetParams(sourceRecord);
135
			}catch(Exception e){
136
				throw new IllegalStateException("error in calculateStylesheetParams" + e.getMessage());
137
			}
138
		}
139
		
140
		if (!preprocessingDone){
141
			// xpath sourceRecord dataSourceid
142
			preprocess(stylesheetParams.get("varBlacklistDataSourceId"));
143
			preprocessingDone = true;
144
		}
145
		
146
		if (isBlacklistRecord(sourceRecord)){
147
			try{
148
				mdRecords.add(transformation.transformRecord(sourceRecord, ITransformation.XSLSyntaxcheckfailed));
149
			}catch(Exception e){
150
				log.fatal(sourceRecord);
151
				throw new IllegalStateException(e);				
152
			}
153
		}else if (!transformation.getRuleLanguageParser().isXslStylesheet()) {
154
			// iterate over all rules which are functionCalls
155
			log.debug("functionCalls size: " + transformation.getRuleLanguageParser().getFunctionCalls().size());
156
			for (FunctionCall functionCall : transformation.getRuleLanguageParser().getFunctionCalls()) {
157
				preprocess(objectRecords, functionCall);
158
			}
159
			for (String record : objectRecords) {
160
				// log.debug(record);
161
				try {
162
					log.debug("now run transformation for record with index: " + index);
163
					try{
164
						String transformedRecord = transformation.transformRecord(record, index);
165
						mdRecords.add(transformedRecord);
166
					} catch (TerminationException e){
167
						log.debug("record transformation terminated.");
168
						String failedRecord = transformation.transformRecord(record, ITransformation.XSLSyntaxcheckfailed);
169
						log.debug(failedRecord);
170
						totalIgnoredRecords++;
171
						mdRecords.add(failedRecord);
172
					}
173
				} catch (TransformationException e) {
174
					log.error(sourceRecord);
175
					throw new IllegalStateException(e);
176
				}
177
				index++;
178
			}
179
		} else {
180
			for (String record : objectRecords) {
181
				// test for init params and assign values
182
				try {
183
					log.debug("now run transformation for record with index: " + index);
184
					try{
185
						String transformedRecord = transformation.transformRecord(record, stylesheetParams);
186
						mdRecords.add(transformedRecord);
187
					}catch(TerminationException e){
188
						String failedRecord = transformation.transformRecord(record, ITransformation.XSLSyntaxcheckfailed);
189
						totalIgnoredRecords++;
190
						log.debug(failedRecord);
191
						mdRecords.add(failedRecord);
192
					}
193
				} catch (TransformationException e) {
194
					log.error(sourceRecord);
195
					throw new IllegalStateException(e);
196
				}
197
				index++;
198
			}
199
		}
200

  
201
		totalTransformedRecords = totalTransformedRecords + mdRecords.size();
202
		log.debug("objRecordSize: " + objectRecords.size() + ", mdRecordSize: " + mdRecords.size() + ", ignoredRecordSize: " + totalIgnoredRecords);
203
		return mdRecords.get(0);
204
	}
205

  
206
	private void calculateStylesheetParams(final String aRecord) throws XPathExpressionException, ProcessingException {
207
		stylesheetParamsCalculated = true;
208
		XPath xpath = XPathFactory.newInstance().newXPath();
209
		Node root = (Node) xpath.evaluate("/", new InputSource(new StringReader(aRecord)), XPathConstants.NODE);
210
		String datasourcePrefix = xpath.evaluate("//*[local-name()='datasourceprefix']", root);
211
		String profileXquery = "collection('/db/DRIVER/RepositoryServiceResources')//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=\"NamespacePrefix\"][value=\"" + datasourcePrefix + "\"]]";
212
		//String repositoryId = xpath.evaluate("//*[local-name()='repositoryId']", root);
213
		log.debug("profileXquery: " + profileXquery);
214
		// static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId",
215
		// xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
216
		RetrieveValue retrieveValue = new RetrieveValue();
217
		retrieveValue.setResourceDao(resourceDao);
218
		List<Argument> argList = new LinkedList<Argument>();
219
		argList.add(new Argument(Type.VALUE, profileXquery));
220
		Argument argXpath = new Argument(Type.INPUTFIELD, "//OFFICIAL_NAME");
221
		argList.add(argXpath);
222
		String varOfficialName = retrieveValue.executeSingleValue(FUNCTION.PROFILEFIELD.toString(), argList, null, new HashMap<String, String>());
223
		stylesheetParams.put("varOfficialName", varOfficialName);
224
		argList.remove(argXpath);
225
		argXpath = new Argument(Type.INPUTFIELD, "//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value");
226
		argList.add(argXpath);
227
		String varDataSourceId = retrieveValue.executeSingleValue(FUNCTION.PROFILEFIELD.toString(), argList, null, new HashMap<String, String>());
228
		stylesheetParams.put("varDataSourceId", varDataSourceId);
229
		argList.remove(argXpath);
230
		argXpath = new Argument(Type.INPUTFIELD, "//CONFIGURATION/DATASOURCE_TYPE");
231
		argList.add(argXpath);
232
		String varDsType = retrieveValue.executeSingleValue(FUNCTION.PROFILEFIELD.toString(), argList, null, new HashMap<String, String>());
233
		stylesheetParams.put("varDsType", varDsType);
234
		argList.remove(argXpath);
235
		
236
		// if blacklist
237
		for (Map<String, String> preprocMap : this.transformation.getRuleLanguageParser().getPreprocessings()) {
238
			Iterator<String> it = preprocMap.keySet().iterator();
239
			while (it.hasNext()) {	
240
				String function = it.next();
241
				if (function.equals("blacklist")) {
242
					argXpath = new Argument(Type.INPUTFIELD, preprocMap.get(function)); // blacklistDataSourceIdXpath
243
					argList.add(argXpath);
244
					String varBlacklistDataSourceId = retrieveValue.executeSingleValue(FUNCTION.PROFILEFIELD.toString(), argList, null, new HashMap<String, String>());
245
					stylesheetParams.put("varBlacklistDataSourceId", varBlacklistDataSourceId);					
246
					argList.remove(argXpath);
247
				}
248
			}
249
		}
250
	}
251

  
252
	private void initTransformationFunction() {
253
		if (this.vocabularyRegistry == null) { throw new IllegalStateException("vocabularyReg is null"); }
254
		Convert convertFunction = new Convert();
255
		convertFunction.setVocabularyRegistry(this.vocabularyRegistry);
256
		TransformationFunctionProxy.getInstance().setConvertFunction(convertFunction);
257

  
258
	}
259

  
260
	/**
261
	 * preprocesses function if function is configured resp.
262
	 *
263
	 * @param records
264
	 *            list of object records
265
	 * @param aFunctionCall
266
	 */
267
	private void preprocess(final List<String> records, final FunctionCall aFunctionCall) {
268
		try {
269
			log.debug("preprocess");
270
			if (transformation.getRuleLanguageParser() == null) { throw new IllegalStateException("rulelanguageparser not initialised"); }
271
			if (transformation.getRuleLanguageParser().getNamespaceDeclarations() == null) { throw new IllegalStateException("nsDecl is null"); }
272
			PreProcessor preProc = new PreProcessor();
273
			preProc.setConvertFunction(TransformationFunctionProxy.getInstance().getConvertFunction());
274
			RetrieveValue retrieveValue = new RetrieveValue();
275
			retrieveValue.setResourceDao(resourceDao);
276
			preProc.setRetrieveFunction(retrieveValue);
277
			RegularExpression regExpr = new RegularExpression();
278
			preProc.setRegExprFunction(regExpr);
279
			TransformationFunctionProxy functionProxy = TransformationFunctionProxy.getInstance();
280
			preProc.setFunctionProxy(functionProxy);
281
			Extract extractFunction = new Extract();
282
			extractFunction.setFeatureExtraction(featureExtraction);
283
			preProc.setExtractFunction(extractFunction);
284
			if (aFunctionCall.doPreprocess() || aFunctionCall.isStatic()) {
285
				// log.debug("now call preprocess with: " + aFunctionCall.getExternalFunctionName() + " " + aFunctionCall.getUuid());
286
				preProc.preprocess(
287
						aFunctionCall,
288
						records,
289
						transformation.getRuleLanguageParser().getNamespaceDeclarations(),
290
						transformation.getStaticTransformationResults(),
291
						transformation.getJobProperties(),
292
						transformation.getRuleLanguageParser().getVariableMappingRules());
293
				// log.debug("preprocess end");
294
			} else {
295
				log.debug("skip preprocessing for function: " + aFunctionCall.getExternalFunctionName());
296
			}
297

  
298
		} catch (Exception e) {
299
			throw new IllegalStateException(e);
300
		}
301

  
302
	}
303

  
304
	/**
305
	 * @param transformation
306
	 *            the transformation to set
307
	 */
308
	public void setTransformation(final ITransformation transformation) {
309
		this.transformation = transformation;
310
	}
311

  
312
	/**
313
	 * @return the transformation
314
	 */
315
	public ITransformation getTransformation() {
316
		return transformation;
317
	}
318

  
319
	/**
320
	 * @param vocabularyRegistry
321
	 *            the vocabularyRegistry to set
322
	 */
323
	public void setVocabularyRegistry(final VocabularyRegistry vocabularyRegistry) {
324
		this.vocabularyRegistry = vocabularyRegistry;
325
	}
326

  
327
	/**
328
	 * @return the vocabularyRegistry
329
	 */
330
	public VocabularyRegistry getVocabularyRegistry() {
331
		return vocabularyRegistry;
332
	}
333

  
334
	/**
335
	 * @return the resourceDao
336
	 */
337
	public ResourceDao getResourceDao() {
338
		return resourceDao;
339
	}
340

  
341
	/**
342
	 * @param resourceDao
343
	 *            the resourceDao to set
344
	 */
345
	public void setResourceDao(final ResourceDao resourceDao) {
346
		this.resourceDao = resourceDao;
347
	}
348

  
349
	/**
350
	 * @param featureExtraction
351
	 *            the featureExtraction to set
352
	 */
353
	public void setFeatureExtraction(final IFeatureExtraction featureExtraction) {
354
		this.featureExtraction = featureExtraction;
355
	}
356

  
357
	/**
358
	 * @return the featureExtraction
359
	 */
360
	public IFeatureExtraction getFeatureExtraction() {
361
		return featureExtraction;
362
	}
363

  
364
	/**
365
	 * @return the databaseConnector
366
	 */
367
	public IDatabaseConnector getDatabaseConnector() {
368
		return databaseConnector;
369
	}
370

  
371
	/**
372
	 * @param databaseConnector
373
	 *            the databaseConnector to set
374
	 */
375
	public void setDatabaseConnector(final IDatabaseConnector databaseConnector) {
376
		this.databaseConnector = databaseConnector;
377
	}
378

  
379
	public long getTotalTransformedRecords() {
380
		return this.totalTransformedRecords;
381
	}
382

  
383
	public long getTotalIgnoredRecords() {
384
		return this.totalIgnoredRecords;
385
	}
386

  
387
	/**
388
	 * @return the mappingFile
389
	 */
390
	public String getMappingFile() {
391
		return mappingFile;
392
	}
393

  
394
	/**
395
	 * @param mappingFile
396
	 *            the mappingFile to set
397
	 */
398
	public void setMappingFile(final String mappingFile) {
399
		this.mappingFile = mappingFile;
400
	}
401

  
402
	public Resource getBlacklistApi() {
403
		return blacklistApi;
404
	}
405

  
406
	public void setBlacklistApi(Resource blacklistApi) {
407
		this.blacklistApi = blacklistApi;
408
	}
409
}
modules/unibi-data-collective-transformation-common/tags/unibi-data-collective-transformation-common-2.2.7/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/visitor/XSTermVisitorImpl.java
1
package eu.dnetlib.data.collective.transformation.core.schema.visitor;
2

  
3
import org.apache.commons.lang3.NotImplementedException;
4

  
5
import com.sun.xml.xsom.XSElementDecl;
6
import com.sun.xml.xsom.XSModelGroup;
7
import com.sun.xml.xsom.XSModelGroupDecl;
8
import com.sun.xml.xsom.XSParticle;
9
import com.sun.xml.xsom.XSWildcard;
10
import com.sun.xml.xsom.visitor.XSTermVisitor;
11

  
12
/**
13
 * @author jochen
14
 *
15
 */
16
public class XSTermVisitorImpl implements XSTermVisitor {
17

  
18
	private Visitor visitor;
19
	
20
	@Override
21
	public void elementDecl(XSElementDecl aElementDecl) {
22
		if (aElementDecl.isLocal()){
23
			  this.visitor.elementDecl(aElementDecl);
24
		}else{
25
			// ignore non local element declarations
26
		}
27
	}
28

  
29
	@Override
30
	public void modelGroup(XSModelGroup aModelGroup) {
31
		XSContentTypeVisitorImpl contentTypeVisitor = new XSContentTypeVisitorImpl();
32
		contentTypeVisitor.setVisitor(this.visitor);
33
		for (XSParticle p: aModelGroup.getChildren()){
34
			contentTypeVisitor.particle(p);
35
		}										
36
	}
37

  
38
	@Override
39
	public void modelGroupDecl(XSModelGroupDecl arg0) {
40
		throw new NotImplementedException("TODO modelGroupDecl");
41
	}
42

  
43
	@Override
44
	public void wildcard(XSWildcard arg0) {
45
		throw new NotImplementedException("TODO wildcard");
46
	}
47

  
48
	public void setVisitor(Visitor visitor) {
49
		this.visitor = visitor;
50
	}
51

  
52
	public Visitor getVisitor() {
53
		return visitor;
54
	}
55

  
56
}
modules/unibi-data-collective-transformation-common/tags/unibi-data-collective-transformation-common-2.2.7/src/main/java/eu/dnetlib/data/collective/transformation/engine/core/TransformationImpl.java
1
package eu.dnetlib.data.collective.transformation.engine.core;
2

  
3
import java.io.Reader;
4
import java.io.StringReader;
5
import java.io.StringWriter;
6
import java.util.ArrayList;
7
import java.util.HashMap;
8
import java.util.LinkedHashMap;
9
import java.util.List;
10
import java.util.Map;
11
import java.util.Properties;
12

  
13
import javax.xml.transform.OutputKeys;
14
import javax.xml.transform.Source;
15
import javax.xml.transform.Templates;
16
import javax.xml.transform.Transformer;
17
import javax.xml.transform.TransformerConfigurationException;
18
import javax.xml.transform.TransformerException;
19
import javax.xml.transform.TransformerFactory;
20
import javax.xml.transform.stream.StreamResult;
21
import javax.xml.transform.stream.StreamSource;
22

  
23
import net.sf.saxon.FeatureKeys;
24
import net.sf.saxon.instruct.TerminationException;
25

  
26
import org.apache.commons.logging.Log;
27
import org.apache.commons.logging.LogFactory;
28
import org.dom4j.Document;
29
import org.dom4j.DocumentException;
30
import org.dom4j.DocumentHelper;
31
import org.dom4j.Element;
32
import org.dom4j.Node;
33
import org.dom4j.io.SAXReader;
34
import org.springframework.core.io.Resource;
35

  
36
import eu.dnetlib.data.collective.transformation.TransformationException;
37
import eu.dnetlib.data.collective.transformation.core.schema.SchemaInspector;
38
import eu.dnetlib.data.collective.transformation.rulelanguage.RuleLanguageParser;
39
import eu.dnetlib.data.collective.transformation.rulelanguage.Rules;
40
import eu.dnetlib.data.collective.transformation.utils.NamespaceContextImpl;
41

  
42
/**
43
 * @author jochen
44
 *
45
 */
46
public class TransformationImpl implements
47
		ITransformation {
48

  
49
	private static final String rootElement = "record";
50
	private final Log log = LogFactory.getLog(TransformationImpl.class);
51
	private Document xslDoc;
52
	private SAXReader reader = new SAXReader();
53
	private Transformer transformer;
54
	private Transformer transformerFailed;
55
	protected RuleLanguageParser ruleLanguageParser;
56
	private StylesheetBuilder stylesheetBuilder;
57
	// cache static transformation results, valid for one transformation job
58
	private Map<String, String> staticResults = new LinkedHashMap<String, String>();
59
	private Map<String, String> jobConstantMap = new HashMap<String, String>();
60
	
61
	@javax.annotation.Resource(name="template")
62
	private Resource template;
63
	
64
	private Resource schema;
65
	
66
	private Source xsltSyntaxcheckFailed;
67
	
68
	/**
69
	 * initializes the transformation with the underlying XSL-template
70
	 */
71
	public void init(){
72
		try {
73
			xslDoc = reader.read(template.getInputStream());
74
			Resource xslResource = template.createRelative(XSLSyntaxcheckfailed);
75
			String systemId = xslResource.getURL().toExternalForm();
76
			xsltSyntaxcheckFailed = new StreamSource(xslResource.getInputStream(), systemId);
77

  
78
		} catch (Throwable e) {
79
			log.error("cannot initialize this transformation.", e);
80
			throw new IllegalStateException(e);
81
		}
82

  
83
	}
84

  
85
	public void addJobConstant(String aKey, String aValue){
86
		this.jobConstantMap.put(aKey, aValue);
87
	}
88

  
89
	/**
90
	 * creates a new Transformer object using a stylesheet based on the transformation rules
91
	 */
92
	public void configureTransformation()throws TransformerConfigurationException{
93
        final List<TransformerException> errorList = new ArrayList<TransformerException>();
94
        
95
        javax.xml.transform.ErrorListener listener = new javax.xml.transform.ErrorListener() {
96
			
97
			@Override
98
			public void warning(TransformerException exception) throws TransformerException {
99
				// TODO Auto-generated method stub
100
				
101
			}
102
			
103
			@Override
104
			public void fatalError(TransformerException exception) throws TransformerException {
105
				// TODO Auto-generated method stub
106
				errorList.add(exception);
107
				throw exception;
108
			}
109
			
110
			@Override
111
			public void error(TransformerException exception) throws TransformerException {
112
				// TODO Auto-generated method stub
113
				
114
			}
115
		};
116

  
117
		TransformerFactory factory = TransformerFactory.newInstance();
118
		factory.setAttribute(FeatureKeys.ALLOW_EXTERNAL_FUNCTIONS, Boolean.TRUE);
119
		factory.setErrorListener(listener);
120
		Templates templates = null;		
121
		try{
122
			if (this.ruleLanguageParser.isXslStylesheet()){
123
				templates = factory.newTemplates(new StreamSource(new StringReader(ruleLanguageParser.getXslStylesheet())));
124
			}else{
125
				templates = factory.newTemplates(new StreamSource(createStylesheet()));			
126
			}		
127

  
128
			transformer = templates.newTransformer();
129
			//((net.sf.saxon.Controller)transformer).setMessageEmitter(mw);
130
			transformer.setOutputProperty(OutputKeys.INDENT, "yes");
131
			transformer.setOutputProperty(OutputKeys.METHOD, "xml");
132
			transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
133
			
134
			Templates templateFailed = factory.newTemplates(xsltSyntaxcheckFailed);
135
			transformerFailed = templateFailed.newTransformer();			
136
		}catch(TransformerConfigurationException e){
137
			if (!errorList.isEmpty()) {
138
                System.out.println(errorList.get(0).getMessageAndLocation()); // todo it seems the location information is not yet correct
139
                throw new TransformerConfigurationException(errorList.get(0).getMessageAndLocation());
140
			}else{
141
				throw e;
142
			}
143
		}		
144

  
145
		//((net.sf.saxon.Controller)transformerFailed).setMessageEmitter(mw);
146
	}
147
	
148
	/* (non-Javadoc)
149
	 * @see eu.dnetlib.data.collective.transformation.engine.core.ITransformation#transformRecord(java.lang.String, int)
150
	 */
151
	public String transformRecord(String record, int index)throws TerminationException, TransformationException{
152
		try {
153
			StreamSource s = new StreamSource(new StringReader(record));
154
			StringWriter writer = new StringWriter();
155
			StreamResult r = new StreamResult(writer);
156
			transformer.setParameter("index", index);
157
			transformer.transform(s , r);
158
			return writer.toString();
159
		}catch (TerminationException e) {
160
			log.debug(e.getLocalizedMessage());
161
			throw e;
162
		} catch (TransformerException e) {
163
			log.error(e);
164
			throw new TransformationException(e);
165
		}
166
	}
167
	
168
	public String transformRecord(String record, Map<String, String> parameters) throws TerminationException, TransformationException{
169
		try {
170
			StreamSource s = new StreamSource(new StringReader(record));
171
			StringWriter writer = new StringWriter();
172
			StreamResult r = new StreamResult(writer);
173
			for (String key: parameters.keySet()){
174
				transformer.setParameter(key, parameters.get(key));				
175
			}
176
			transformer.transform(s , r);
177
			return writer.toString();
178
		}catch (TerminationException e){
179
			log.debug(e.getLocalizedMessage());
180
			throw e;
181
		} catch (TransformerException e) {
182
			log.error(e);
183
			throw new TransformationException(e);
184
		}		
185
	}
186
	
187
	public String transformRecord(String record, String stylesheetName) throws TransformationException{
188
		if (!stylesheetName.equals(XSLSyntaxcheckfailed))
189
			throw new IllegalArgumentException("in TransformationImpl: stylesheetname " + stylesheetName + " is unsupported!" );
190
		try{
191
			StreamSource s = new StreamSource(new StringReader(record));
192
			StringWriter w = new StringWriter();
193
			StreamResult r = new StreamResult(w);
194
			transformerFailed.transform(s, r);
195
			return w.toString();			
196
		}catch (TransformerException e){
197
			log.error(e);
198
			throw new TransformationException(e);
199
		}
200
	}
201
	
202
	public String dumpStylesheet(){
203
		return xslDoc.asXML();
204

  
205
//		StringWriter writer = new StringWriter();
206
//		try {
207
//			Transformer tXsl = transformer; //.newTransformer();
208
//			tXsl.setOutputProperty(OutputKeys.INDENT, "yes");
209
//			tXsl.setOutputProperty(OutputKeys.METHOD, "xml");
210
//			tXsl.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
211
//
212
//		StreamResult r = new StreamResult(writer);
213
//		Source s = new StreamSource(new StringReader(xslDoc.asXML()));
214
//			tXsl.transform(s, r);
215
//		} catch (TransformerException e) {
216
//			// TODO Auto-generated catch block
217
//			e.printStackTrace();
218
//		}
219
//		return writer.toString();
220
	}
221
	
222
	
223
	/**
224
	 * sets the XSL template
225
	 * @param template - resource to access the XSL template
226
	 */
227
	public void setTemplate(Resource template) {
228
		this.template = template;
229
	}
230

  
231
	/**
232
	 * @return the resource to access the XSL template
233
	 */
234
	public Resource getTemplate() {
235
		return template;
236
	}
237

  
238
	public void setRuleLanguageParser(RuleLanguageParser ruleLanguageParser) {
239
		this.ruleLanguageParser = ruleLanguageParser;
240
	}
241

  
242
	public RuleLanguageParser getRuleLanguageParser() {
243
		return ruleLanguageParser;
244
	}
245

  
246
	/**
247
	 * @param stylesheetBuilder the stylesheetBuilder to set
248
	 */
249
	public void setStylesheetBuilder(StylesheetBuilder stylesheetBuilder) {
250
		this.stylesheetBuilder = stylesheetBuilder;
251
	}
252

  
253
	/**
254
	 * @return the stylesheetBuilder
255
	 */
256
	public StylesheetBuilder getStylesheetBuilder() {
257
		return stylesheetBuilder;
258
	}
259
			
260
	/**
261
	 * @return the transformation rules as String object
262
	 */
263
	protected String getTransformationRules(){
264
		// add job-properties to the rules as variables
265
		for (String key: jobConstantMap.keySet()){
266
			Rules r = new Rules();
267
			r.setVariable(key);
268
			r.setConstant("'" + jobConstantMap.get(key) + "'");
269
			ruleLanguageParser.getVariableMappingRules().put(JOBCONST_DATASINKID, r);
270
		}
271
		if (this.stylesheetBuilder == null){
272
			// create DMF compliant stylesheet builder
273
			this.stylesheetBuilder = new StylesheetBuilder();
274
			this.stylesheetBuilder.setRuleLanguageParser(this.ruleLanguageParser);
275
			NamespaceContextImpl namespaceContext = new NamespaceContextImpl();
276
			for (String prefix: ruleLanguageParser.getNamespaceDeclarations().keySet()){
277
				namespaceContext.addNamespace(prefix, ruleLanguageParser.getNamespaceDeclarations().get(prefix));				
278
			}
279
			SchemaInspector inspector = new SchemaInspector();
280
			try {
281
				inspector.inspect(this.schema.getURL(), rootElement);
282
			} catch (Exception e) {
283
				throw new IllegalStateException(e);
284
			}
285
			this.stylesheetBuilder.setNamespaceContext(namespaceContext);
286
			this.stylesheetBuilder.setSchemaInspector(inspector);
287
		}
288
		return this.stylesheetBuilder.createTemplate();
289
	}
290
	
291
	/**
292
	 * creates a stylesheet from transformation rules;
293
	 * <p>don't call this method multiple times, unless transformation configuration changes, then re-init and configure transformation</p>
294
	 * @return the stylesheet
295
	 */
296
	private Reader createStylesheet(){
297
		try { 
298
			Document rulesDoc = DocumentHelper.parseText(getTransformationRules());
299
			for(String key: this.ruleLanguageParser.getNamespaceDeclarations().keySet()){
300
				xslDoc.getRootElement().addNamespace(key, this.ruleLanguageParser.getNamespaceDeclarations().get(key));				
301
			}
302
			@SuppressWarnings("unchecked")
303
			List<Node> nodes = rulesDoc.getRootElement().selectNodes("//xsl:template");
304
			
305
			@SuppressWarnings("unchecked")
306
			List<Node> varNodes = rulesDoc.getRootElement().selectNodes("/templateroot/xsl:param");
307
			for (Node node: varNodes){				
308
				xslDoc.getRootElement().add( ((Element)node).detach() );
309
			}
310
			
311
//			xslDoc.getRootElement().add(rulesDoc.getRootElement().selectSingleNode("//xsl:param[@name='var1']").detach());
312
			for (Node node: nodes){				
313
				xslDoc.getRootElement().add( ((Element)node).detach() ); //  (rulesDoc.getRootElement().aget);				
314
			}
315
		} catch (DocumentException e) {
316
			log.error("error in creating stylesheet: " + e);
317
			throw new IllegalStateException(e);
318
		}
319
		return new StringReader(xslDoc.asXML());
320
	}
321

  
322
	/**
323
	 * @param schema the schema to set
324
	 */
325
	public void setSchema(Resource schema) {
326
		this.schema = schema;
327
	}
328

  
329
	/**
330
	 * @return the schema
331
	 */
332
	public Resource getSchema() {
333
		return schema;
334
	}
335

  
336
	@Override
337
	public Map<String, String> getStaticTransformationResults() {
338
		return this.staticResults;
339
	}
340

  
341
	@Override
342
	public Map<String, String> getJobProperties() {
343
		// TODO Auto-generated method stub
344
		return this.jobConstantMap;
345
	}
346

  
347
	@Override
348
	public Properties getLogInformation() {
349
		// TODO Auto-generated method stub
350
		return null;
351
	}
352

  
353
}
modules/unibi-data-collective-transformation-common/tags/unibi-data-collective-transformation-common-2.2.7/src/main/java/eu/dnetlib/data/collective/transformation/engine/PreProcessor.java
1
package eu.dnetlib.data.collective.transformation.engine;
2

  
3
import java.io.StringReader;
4
import java.util.HashMap;
5
import java.util.LinkedList;
6
import java.util.List;
7
import java.util.Map;
8

  
9
import org.apache.commons.logging.Log;
10
import org.apache.commons.logging.LogFactory;
11
import org.dom4j.Document;
12
import org.dom4j.DocumentException;
13
import org.dom4j.DocumentHelper;
14
import org.dom4j.Node;
15
import org.dom4j.XPath;
16
import org.dom4j.io.SAXReader;
17

  
18
import eu.dnetlib.data.collective.transformation.core.xsl.ext.TransformationFunctionProxy;
19
import eu.dnetlib.data.collective.transformation.engine.functions.Convert;
20
import eu.dnetlib.data.collective.transformation.engine.functions.Extract;
21
import eu.dnetlib.data.collective.transformation.engine.functions.ProcessingException;
22
import eu.dnetlib.data.collective.transformation.engine.functions.RegularExpression;
23
import eu.dnetlib.data.collective.transformation.engine.functions.RetrieveValue;
24
import eu.dnetlib.data.collective.transformation.rulelanguage.IRule;
25
import eu.dnetlib.data.collective.transformation.rulelanguage.Rules;
26
import eu.dnetlib.data.collective.transformation.rulelanguage.util.FunctionCall;
27

  
28
/**
29
 * @author jochen
30
 *
31
 */
32
public class PreProcessor {
33

  
34
	@SuppressWarnings("unused")
35
	private static final Log log = LogFactory.getLog(PreProcessor.class);
36
	private Convert convertFunction;
37
	private Extract extractFunction;
38
	private RetrieveValue retrieveFunction;
39
	private RegularExpression regExprFunction;
40
	private TransformationFunctionProxy functionProxy;
41
	private SAXReader reader = new SAXReader();
42
	private Map<String, String> nsMap = new HashMap<String, String>();
43

  
44
	/**
45
	 * pre-process output values from object records using a function call
46
	 * @param aFunctionCall function call object
47
	 * @param aObjectRecords list of object records
48
	 * @param aNamespaceMap map of namespace prefixes and uris
49
	 */
50
	public void preprocess( 
51
			FunctionCall aFunctionCall, 
52
			List<String> aObjectRecords, 
53
			Map<String, String> aNamespaceMap, 
54
			Map<String, String> aStaticResults, 
55
			Map<String, String> aJobProperties, 
56
			Map<String, IRule> aVarRules){
57
		this.nsMap = aNamespaceMap;
58
		FunctionResults functionResults = new FunctionResults();
59
		
60
		try {
61
			if (aFunctionCall.getExternalFunctionName().equals("extract")){
62
				String featureName = aFunctionCall.getParameters().get(Extract.paramNameFeature);
63
				functionResults.addAll(extractFunction.execute(aObjectRecords, featureName));
64
			}else{
65
				for (String objRecord: aObjectRecords){
66
					String result = null;
67
						if (aFunctionCall.getExternalFunctionName().equals("convert")){
68
							if (aFunctionCall.isStatic() && aStaticResults.containsKey(aFunctionCall.getUuid())){
69
								functionResults.add(aStaticResults.get(aFunctionCall.getUuid()));
70
							}else{
71
								String vocabName = aFunctionCall.getParameters().get(Convert.paramVocabularyName);
72
								String fieldExpr = aFunctionCall.getParameters().get(Convert.paramFieldValue);
73
								List<String> recordValues = getValuesFromRecord(objRecord, fieldExpr);
74
								if (aFunctionCall.isStatic())
75
									aStaticResults.put(aFunctionCall.getUuid(), convertFunction.executeSingleValue(vocabName, recordValues));
76
								else
77
									functionResults.add(convertFunction.executeAllValues(vocabName, recordValues));
78
							}
79
						}else if (aFunctionCall.getExternalFunctionName().equals("getValue")){
80
							if (aFunctionCall.isStatic() && aStaticResults.containsKey(aFunctionCall.getUuid()))
81
								functionResults.add(aStaticResults.get(aFunctionCall.getUuid()));
82
							else{
83
								String functionName = aFunctionCall.getParameters().get(RetrieveValue.paramFunctionName);
84
								result = retrieveFunction.executeSingleValue(functionName, aFunctionCall.getArguments(), objRecord, nsMap);
85
								functionResults.add(result);
86
								if (aFunctionCall.isStatic())
87
									aStaticResults.put(aFunctionCall.getUuid(), result);
88
							}
89
						}else if (aFunctionCall.getExternalFunctionName().equals("regExpr")){
90
							// TODO
91
							if (aFunctionCall.isStatic() && aStaticResults.containsKey(aFunctionCall.getUuid())){
92
								//log.debug("static functioncal; static result exist to compute regexpr: " + aFunctionCall.getParameters().get(RegularExpression.paramRegularExpr));
93
//								functionResults.add(aStaticResults.get(aFunctionCall.getUuid()));
94
							}else{
95
//								log.debug("static functioncal to compute regexpr: " + aFunctionCall.getParameters().get(RegularExpression.paramRegularExpr));
96
								String regularExpression = aFunctionCall.getParameters().get(RegularExpression.paramRegularExpr); //.replaceAll("'", "");
97
								String expression1 = aFunctionCall.getParameters().get(RegularExpression.paramExpr1);
98
								List<String> recordValues = null;
99
								// distinguish xpath-expr, jobConst, var
100
//								log.debug("expression1: " + expression1);
101
								
102
								if (aJobProperties.containsKey(expression1)){
103
									recordValues = new LinkedList<String>();
104
									recordValues.add(aJobProperties.get(expression1));
105
								}else{
106
									recordValues = getValuesFromRecord(objRecord, expression1);
107
								}
108
								
109
								String expression2 = aFunctionCall.getParameters().get(RegularExpression.paramExpr2);
110
								String replacement = "";
111
								if (aJobProperties.containsKey(expression2)){
112
									replacement = aJobProperties.get(expression2);
113
								}else if (aVarRules.containsKey(expression2)){
114
									Rules varRule = (Rules)aVarRules.get(expression2);
115
									replacement = varRule.getConstant().replace("'", ""); // currently limited to constant rules.
116
								}else {
117
									replacement = getValuesFromRecord(objRecord, expression2).get(0); // get the first available value
118
								}
119
								List<String> regExprResults = new LinkedList<String>();
120
								for (String fieldValue: recordValues){
121
									try {
122
										int lastSlash = regularExpression.lastIndexOf("/");
123
										String trailingOptions = regularExpression.substring(lastSlash);
124
										int replacementSlash = regularExpression.substring(0, lastSlash).lastIndexOf("/");
125
										String replacementFromExpression = regularExpression.substring(replacementSlash + 1, lastSlash);
126
										String newRegExpr = regularExpression.substring(0, replacementSlash + 1) + replacement + replacementFromExpression + trailingOptions; // ???
127
										result = regExprFunction.executeSingleValue(newRegExpr, fieldValue, replacement);
128
										regExprResults.add(result);
129
									} catch (ProcessingException e) {
130
										throw new IllegalStateException(e);
131
									}
132
//									regExprResults.add(regExprFunction.executeSingleValue(regularExpression, fieldValue, expression2));
133
								}
134
								functionResults.add(regExprResults);
135
								// assuming 1 result only
136
								if (aFunctionCall.isStatic()){
137
									aStaticResults.put(aFunctionCall.getUuid(), result);
138
								}
139

  
140
								// unsupported
141
//								if (aFunctionCall.isStatic()){
142
//									aStaticResults.put(aFunctionCall.getUuid(), result);
143
//								}
144
							}
145
						}
146
				}			
147
			}		
148
		} catch (ProcessingException e) {
149
			throw new IllegalStateException(e);
150
		} catch (DocumentException e) {
151
			throw new IllegalStateException(e);
152
		}
153
		functionProxy.setResults(aFunctionCall.getUuid(), functionResults);
154
	}
155

  
156
	public void setFunctionProxy(TransformationFunctionProxy functionProxy) {
157
		this.functionProxy = functionProxy;
158
	}
159

  
160
	public TransformationFunctionProxy getFunctionProxy() {
161
		return functionProxy;
162
	}
163

  
164
	public void setConvertFunction(Convert convertFunction) {
165
		this.convertFunction = convertFunction;
166
	}
167

  
168
	public Convert getConvertFunction() {
169
		return convertFunction;
170
	}
171

  
172
	/**
173
	 * @param retrieveFunction the retrieveFunction to set
174
	 */
175
	public void setRetrieveFunction(RetrieveValue retrieveFunction) {
176
		this.retrieveFunction = retrieveFunction;
177
	}
178

  
179
	/**
180
	 * @return the retrieveFunction
181
	 */
182
	public RetrieveValue getRetrieveFunction() {
183
		return retrieveFunction;
184
	}
185
	
186
	/**
187
	 * @return the regExprFunction
188
	 */
189
	public RegularExpression getRegExprFunction() {
190
		return regExprFunction;
191
	}
192

  
193
	/**
194
	 * @param regExprFunction the regExprFunction to set
195
	 */
196
	public void setRegExprFunction(RegularExpression regExprFunction) {
197
		this.regExprFunction = regExprFunction;
198
	}
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff