Project

General

Profile

« Previous | Next » 

Revision 27074

fixed identifierextraxt function to return unique values of identifiers

View differences:

modules/unibi-data-collective-transformation-common/trunk/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IdentifierExtract.java
1 1
package eu.dnetlib.data.collective.transformation.engine.functions;
2 2

  
3
import java.util.HashSet;
3 4
import java.util.LinkedList;
4 5
import java.util.List;
6
import java.util.Set;
5 7
import java.util.regex.Matcher;
6 8
import java.util.regex.Pattern;
7 9

  
......
42 44
	 */
43 45
	public NodeList extract(List<String> aXpathExprList, Node aInput,
44 46
			String aRegExpression, Document aDocument, XPath aXpath) throws ProcessingException {
45
		List<String> identifierList = new LinkedList<String>();
47
		Set<String> identifierSet = new HashSet<String>();
46 48
		
47 49
//		log.debug("node: length: " + aInput.getChildNodes().getLength());
48 50
		log.debug("regular expression : " + aRegExpression);
......
54 56
				Matcher m = p.matcher(text);
55 57
				while (m.find()){
56 58
					log.debug("extracted identifier: " + m.group());
57
					identifierList.add(m.group());
59
					identifierSet.add(m.group());
58 60
				}
59 61
			}
60
			return toNodeList(identifierList, aDocument);
62
			return toNodeList(identifierSet, aDocument);
61 63
		} catch (XPathExpressionException e) {
62 64
			e.printStackTrace();
63 65
			throw new ProcessingException(e);
......
69 71
	
70 72
	/**
71 73
	 * create a list of nodes from a list of string values
72
	 * @param aValueList
74
	 * @param aValueSet, set of unique values
73 75
	 * @param aDocument
74 76
	 * @return nodeList
75 77
	 */
76
	private NodeList toNodeList(List<String> aValueList, Document aDocument){
78
	private NodeList toNodeList(Set<String> aValueSet, Document aDocument){
77 79
		DocumentFragment dFrag = aDocument.createDocumentFragment();
78 80
		Element root = aDocument.createElement("root");
79 81
		dFrag.appendChild(root);
80
		for (String value: aValueList){
82
		for (String value: aValueSet){
81 83
			Element eVal = aDocument.createElement("value");
82 84
			eVal.setTextContent(value);
83 85
			root.appendChild(eVal);

Also available in: Unified diff