1
|
package eu.dnetlib.msro.workflows.nodes.claims;
|
2
|
|
3
|
import java.io.StringReader;
|
4
|
import java.util.function.UnaryOperator;
|
5
|
|
6
|
import eu.dnetlib.enabling.resultset.factory.ResultSetFactory;
|
7
|
import eu.dnetlib.msro.workflows.graph.Arc;
|
8
|
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
|
9
|
import eu.dnetlib.msro.workflows.nodes.hadoop.SetHdfsFileJobNode;
|
10
|
import eu.dnetlib.msro.workflows.procs.Env;
|
11
|
import eu.dnetlib.rmi.common.ResultSet;
|
12
|
import org.apache.commons.logging.Log;
|
13
|
import org.apache.commons.logging.LogFactory;
|
14
|
import org.dom4j.Document;
|
15
|
import org.dom4j.Element;
|
16
|
import org.dom4j.Namespace;
|
17
|
import org.dom4j.QName;
|
18
|
import org.dom4j.io.SAXReader;
|
19
|
import org.springframework.beans.factory.annotation.Autowired;
|
20
|
|
21
|
public class DecapsuleClaimsJobNode extends SimpleJobNode {
|
22
|
|
23
|
/**
|
24
|
* logger.
|
25
|
*/
|
26
|
private static final Log log = LogFactory.getLog(SetHdfsFileJobNode.class);
|
27
|
|
28
|
private static final Namespace OAF_NS = new Namespace("oaf", "http://namespace.openaire.eu/oaf");
|
29
|
private static final double DEFAULT_TRUST = 0.9;
|
30
|
|
31
|
private String inputEprParam;
|
32
|
private String outputEprParam;
|
33
|
|
34
|
@Autowired
|
35
|
private ResultSetFactory resultSetFactory;
|
36
|
|
37
|
@Override
|
38
|
protected String execute(final Env env) throws Exception {
|
39
|
final SAXReader reader = new SAXReader();
|
40
|
|
41
|
final UnaryOperator<String> mapper = row -> {
|
42
|
try {
|
43
|
final Document docRow = reader.read(new StringReader(row));
|
44
|
|
45
|
final String xml = docRow.valueOf("//ROW/FIELD[@name = 'xml']");
|
46
|
final String provenance = docRow.valueOf("//ROW/FIELD[@name = 'provenance']");
|
47
|
|
48
|
final Document doc = reader.read(new StringReader(xml));
|
49
|
final Element record = (Element) doc.selectSingleNode("//*[local-name() = 'record']");
|
50
|
final Element about = record.addElement(new QName("about"));
|
51
|
final Element dataInfo = about.addElement(new QName("datainfo", OAF_NS));
|
52
|
|
53
|
dataInfo.addElement(new QName("inferred", OAF_NS)).setText("false");
|
54
|
dataInfo.addElement(new QName("deletedbyinference", OAF_NS)).setText("false");
|
55
|
dataInfo.addElement(new QName("trust", OAF_NS)).setText(Double.toString(DEFAULT_TRUST));
|
56
|
dataInfo.addElement(new QName("inferenceprovenance", OAF_NS)).setText("");
|
57
|
|
58
|
final Element action = dataInfo.addElement(new QName("provenanceaction", OAF_NS));
|
59
|
action.addAttribute("schemename", "dnet:provenanceActions");
|
60
|
action.addAttribute("schemeid", "dnet:provenanceActions");
|
61
|
action.addAttribute("classname", provenance);
|
62
|
action.addAttribute("classid", provenance);
|
63
|
|
64
|
return doc.asXML();
|
65
|
} catch (final Throwable e) {
|
66
|
log.error("Error parsing row: " + row, e);
|
67
|
throw new IllegalArgumentException("Error parsing row: " + row, e);
|
68
|
}
|
69
|
};
|
70
|
|
71
|
final ResultSet<?> inputEpr = env.getAttribute(getInputEprParam(), ResultSet.class);
|
72
|
final ResultSet<String> epr = this.resultSetFactory.map(inputEpr, String.class, mapper);
|
73
|
|
74
|
env.setAttribute(getOutputEprParam(), epr);
|
75
|
|
76
|
return Arc.DEFAULT_ARC;
|
77
|
}
|
78
|
|
79
|
public String getInputEprParam() {
|
80
|
return this.inputEprParam;
|
81
|
}
|
82
|
|
83
|
public void setInputEprParam(final String inputEprParam) {
|
84
|
this.inputEprParam = inputEprParam;
|
85
|
}
|
86
|
|
87
|
public String getOutputEprParam() {
|
88
|
return this.outputEprParam;
|
89
|
}
|
90
|
|
91
|
public void setOutputEprParam(final String outputEprParam) {
|
92
|
this.outputEprParam = outputEprParam;
|
93
|
}
|
94
|
|
95
|
}
|