Revision 27074
Added by Jochen Schirrwagen over 10 years ago
modules/unibi-data-collective-transformation-common/trunk/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IdentifierExtract.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.data.collective.transformation.engine.functions; |
2 | 2 |
|
3 |
import java.util.HashSet; |
|
3 | 4 |
import java.util.LinkedList; |
4 | 5 |
import java.util.List; |
6 |
import java.util.Set; |
|
5 | 7 |
import java.util.regex.Matcher; |
6 | 8 |
import java.util.regex.Pattern; |
7 | 9 |
|
... | ... | |
42 | 44 |
*/ |
43 | 45 |
public NodeList extract(List<String> aXpathExprList, Node aInput, |
44 | 46 |
String aRegExpression, Document aDocument, XPath aXpath) throws ProcessingException { |
45 |
List<String> identifierList = new LinkedList<String>();
|
|
47 |
Set<String> identifierSet = new HashSet<String>();
|
|
46 | 48 |
|
47 | 49 |
// log.debug("node: length: " + aInput.getChildNodes().getLength()); |
48 | 50 |
log.debug("regular expression : " + aRegExpression); |
... | ... | |
54 | 56 |
Matcher m = p.matcher(text); |
55 | 57 |
while (m.find()){ |
56 | 58 |
log.debug("extracted identifier: " + m.group()); |
57 |
identifierList.add(m.group());
|
|
59 |
identifierSet.add(m.group());
|
|
58 | 60 |
} |
59 | 61 |
} |
60 |
return toNodeList(identifierList, aDocument);
|
|
62 |
return toNodeList(identifierSet, aDocument);
|
|
61 | 63 |
} catch (XPathExpressionException e) { |
62 | 64 |
e.printStackTrace(); |
63 | 65 |
throw new ProcessingException(e); |
... | ... | |
69 | 71 |
|
70 | 72 |
/** |
71 | 73 |
* create a list of nodes from a list of string values |
72 |
* @param aValueList
|
|
74 |
* @param aValueSet, set of unique values
|
|
73 | 75 |
* @param aDocument |
74 | 76 |
* @return nodeList |
75 | 77 |
*/ |
76 |
private NodeList toNodeList(List<String> aValueList, Document aDocument){
|
|
78 |
private NodeList toNodeList(Set<String> aValueSet, Document aDocument){
|
|
77 | 79 |
DocumentFragment dFrag = aDocument.createDocumentFragment(); |
78 | 80 |
Element root = aDocument.createElement("root"); |
79 | 81 |
dFrag.appendChild(root); |
80 |
for (String value: aValueList){
|
|
82 |
for (String value: aValueSet){
|
|
81 | 83 |
Element eVal = aDocument.createElement("value"); |
82 | 84 |
eVal.setTextContent(value); |
83 | 85 |
root.appendChild(eVal); |
Also available in: Unified diff
fixed identifierextraxt function to return unique values of identifiers