Revision 46228
Added by Alessia Bardi over 7 years ago
modules/dnet-msro-service/branches/saxonHE/src/main/java/eu/dnetlib/msro/workflows/nodes/transform/X3MTransformJobNode.java | ||
---|---|---|
3 | 3 |
import java.io.ByteArrayOutputStream; |
4 | 4 |
import java.io.IOException; |
5 | 5 |
import java.io.InputStream; |
6 |
import java.io.PrintStream;
|
|
6 |
import java.time.LocalDateTime;
|
|
7 | 7 |
import java.util.List; |
8 |
import javax.xml.parsers.DocumentBuilder; |
|
9 |
import javax.xml.parsers.DocumentBuilderFactory; |
|
8 | 10 |
|
9 | 11 |
import eu.delving.x3ml.X3MLEngineFactory; |
10 | 12 |
import eu.delving.x3ml.X3MLEngineFactory.OutputFormat; |
11 | 13 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
12 | 14 |
import eu.dnetlib.enabling.resultset.factory.ResultSetFactory; |
15 |
import eu.dnetlib.miscutils.functional.xml.SaxonHelper; |
|
16 |
import eu.dnetlib.miscutils.functional.xml.XMLIndenter; |
|
13 | 17 |
import eu.dnetlib.msro.workflows.graph.Arc; |
14 | 18 |
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode; |
15 | 19 |
import eu.dnetlib.msro.workflows.procs.Env; |
... | ... | |
17 | 21 |
import eu.dnetlib.rmi.enabling.ISLookUpException; |
18 | 22 |
import eu.dnetlib.rmi.enabling.ISLookUpService; |
19 | 23 |
import eu.dnetlib.rmi.manager.MSROException; |
24 |
import net.sf.saxon.s9api.SaxonApiException; |
|
25 |
import net.sf.saxon.s9api.Serializer.Property; |
|
26 |
import net.sf.saxon.s9api.XPathSelector; |
|
20 | 27 |
import org.apache.commons.io.IOUtils; |
28 |
import org.apache.commons.lang3.StringUtils; |
|
21 | 29 |
import org.apache.commons.logging.Log; |
22 | 30 |
import org.apache.commons.logging.LogFactory; |
23 | 31 |
import org.springframework.beans.factory.annotation.Autowired; |
32 |
import org.w3c.dom.Document; |
|
33 |
import org.w3c.dom.Element; |
|
34 |
import org.w3c.dom.Node; |
|
24 | 35 |
|
25 | 36 |
public class X3MTransformJobNode extends SimpleJobNode { |
26 | 37 |
|
... | ... | |
33 | 44 |
|
34 | 45 |
private boolean verboseLogging; |
35 | 46 |
|
47 |
private XPathSelector xpathSelectorMetadata; |
|
48 |
private XPathSelector xpathSelectorHeader; |
|
49 |
private XPathSelector xpathSelectorFooter; |
|
50 |
|
|
36 | 51 |
@Autowired |
37 | 52 |
private ResultSetFactory resultSetFactory; |
38 | 53 |
@Autowired |
39 | 54 |
private UniqueServiceLocator serviceLocator; |
55 |
@Autowired |
|
56 |
private SaxonHelper saxonHelper; |
|
40 | 57 |
|
41 | 58 |
@Override |
42 | 59 |
protected String execute(final Env env) throws Exception { |
60 |
log.debug("Mapping profile id: " + mappingProfileId); |
|
61 |
log.debug("Mapping Policy profile id: " + mappingPolicyProfileId); |
|
62 |
|
|
63 |
LocalDateTime now = LocalDateTime.now(); |
|
43 | 64 |
final ResultSet<?> rsIn = env.getAttribute(this.inputEprParam, ResultSet.class); |
44 | 65 |
if ((rsIn == null)) { throw new MSROException("InputEprParam (" + this.inputEprParam + ") not found in ENV"); } |
66 |
prepareXpathSelectors(); |
|
45 | 67 |
|
46 |
X3MLEngineFactory x3mEngineFactory = getConfiguredX3MEngineFactory(); |
|
47 |
|
|
48 | 68 |
final ResultSet<String> rsOut = this.resultSetFactory.map(rsIn, String.class, record -> { |
69 |
//log.debug("Transforming: " + record); |
|
70 |
String metadata = extractFromRecord(record, xpathSelectorMetadata); |
|
49 | 71 |
final ByteArrayOutputStream os = new ByteArrayOutputStream(); |
50 |
final PrintStream ps = new PrintStream(os);
|
|
72 |
X3MLEngineFactory x3mEngineFactory = getConfiguredX3MEngineFactory();
|
|
51 | 73 |
try { |
52 |
x3mEngineFactory.withInput(IOUtils.toInputStream(record, "UTF-8"))
|
|
53 |
.withOutput(ps, OutputFormat.RDF_XML);
|
|
74 |
x3mEngineFactory.withInput(IOUtils.toInputStream(metadata, "UTF-8"))
|
|
75 |
.withOutput(os, OutputFormat.RDF_XML);
|
|
54 | 76 |
} catch (IOException e) { |
55 |
log.fatal("Could not get stream from record. Cause: "+e);
|
|
77 |
log.fatal("Could not get stream from record. Cause: " + e);
|
|
56 | 78 |
throw new RuntimeException(e); |
57 | 79 |
} |
58 | 80 |
x3mEngineFactory.execute(); |
59 |
return new String(os.toByteArray()); |
|
81 |
String res = new String(os.toByteArray()); |
|
82 |
log.debug("Transformed: " + res); |
|
83 |
|
|
84 |
String header = extractFromRecord(record, xpathSelectorHeader); |
|
85 |
String provenanceFooter = extractFromRecord(record, xpathSelectorFooter); |
|
86 |
return buildXML(header, now.toString(), res, provenanceFooter); |
|
60 | 87 |
}); |
61 | 88 |
|
62 | 89 |
env.setAttribute(this.outputEprParam, rsOut); |
... | ... | |
64 | 91 |
return Arc.DEFAULT_ARC; |
65 | 92 |
} |
66 | 93 |
|
67 |
private X3MLEngineFactory getConfiguredX3MEngineFactory() throws IOException, ISLookUpException { |
|
68 |
InputStream mapping = getProfileAsStream(mappingProfileId); |
|
69 |
InputStream policy = getProfileAsStream(mappingPolicyProfileId); |
|
70 |
X3MLEngineFactory x3mEngineFactory = X3MLEngineFactory.create() |
|
71 |
.withGeneratorPolicy(policy) |
|
72 |
.withMappings(mapping); |
|
73 |
if(verboseLogging) |
|
74 |
x3mEngineFactory.withVerboseLogging(); |
|
75 |
return x3mEngineFactory; |
|
94 |
private String buildXML(String header, String transformationDate, String metadata, String provenance) { |
|
95 |
try { |
|
96 |
XMLIndenter xmlHelper = new XMLIndenter(); |
|
97 |
DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); |
|
98 |
DocumentBuilder docBuilder = docFactory.newDocumentBuilder(); |
|
99 |
// root elements |
|
100 |
Document doc = docBuilder.newDocument(); |
|
101 |
Element rootElement = doc.createElementNS("http://www.openarchives.org/OAI/2.0/", "oai:record"); |
|
102 |
Element headerElem = docBuilder.parse(IOUtils.toInputStream(header, "UTF-8")).getDocumentElement(); |
|
103 |
Node headerNode = doc.importNode(headerElem, true); |
|
104 |
rootElement.appendChild(headerNode); |
|
105 |
Element transDate = doc.createElementNS("http://www.driver-repository.eu/namespace/dr", "dr:dateOfTransformation"); |
|
106 |
transDate.setTextContent(transformationDate); |
|
107 |
headerNode.appendChild(transDate); |
|
108 |
Element metadataElement = doc.createElementNS("http://www.openarchives.org/OAI/2.0/", "oai:metadata"); |
|
109 |
Element contentElem = docBuilder.parse(IOUtils.toInputStream(metadata, "UTF-8")).getDocumentElement(); |
|
110 |
Node contentNode = doc.importNode(contentElem, true); |
|
111 |
metadataElement.appendChild(contentNode); |
|
112 |
rootElement.appendChild(metadataElement); |
|
113 |
Element aboutElem = docBuilder.parse(IOUtils.toInputStream(provenance, "UTF-8")).getDocumentElement(); |
|
114 |
Node aboutNode = doc.importNode(aboutElem, true); |
|
115 |
rootElement.appendChild(aboutNode); |
|
116 |
|
|
117 |
doc.appendChild(rootElement); |
|
118 |
return xmlHelper.indent(doc); |
|
119 |
} catch (Exception e) { |
|
120 |
throw new RuntimeException("Cannot build the transformed xml file", e); |
|
121 |
} |
|
122 |
|
|
76 | 123 |
} |
77 | 124 |
|
125 |
private void prepareXpathSelectors() throws SaxonApiException { |
|
126 |
xpathSelectorHeader = this.saxonHelper.help().prepareXPathSelector("//oai:header", "oai", "http://www.openarchives.org/OAI/2.0/"); |
|
127 |
xpathSelectorMetadata = this.saxonHelper.help().prepareXPathSelector("//oai:metadata/*", "oai", "http://www.openarchives.org/OAI/2.0/"); |
|
128 |
xpathSelectorFooter = this.saxonHelper.help().prepareXPathSelector("//oai:about", "oai", "http://www.openarchives.org/OAI/2.0/"); |
|
129 |
} |
|
130 |
|
|
131 |
private String extractFromRecord(final String record, final XPathSelector xPathSelector) { |
|
132 |
try { |
|
133 |
String s = this.saxonHelper.help().setSerializerProperty(Property.OMIT_XML_DECLARATION, "yes").evaluateSingleAsString(record, xPathSelector); |
|
134 |
log.debug("Extracted: " + s); |
|
135 |
return s; |
|
136 |
} catch (SaxonApiException e) { |
|
137 |
throw new RuntimeException("Cannot extract content ", e); |
|
138 |
} |
|
139 |
} |
|
140 |
|
|
141 |
private X3MLEngineFactory getConfiguredX3MEngineFactory() { |
|
142 |
InputStream mapping = null; |
|
143 |
try { |
|
144 |
mapping = getProfileAsStream(mappingProfileId); |
|
145 |
X3MLEngineFactory x3mEngineFactory = X3MLEngineFactory.create().withMappings(mapping); |
|
146 |
if (StringUtils.isNotBlank(mappingPolicyProfileId)) { |
|
147 |
InputStream policy = getProfileAsStream(mappingPolicyProfileId); |
|
148 |
x3mEngineFactory.withGeneratorPolicy(policy); |
|
149 |
} |
|
150 |
if (verboseLogging) |
|
151 |
x3mEngineFactory.withVerboseLogging(); |
|
152 |
return x3mEngineFactory; |
|
153 |
} catch (ISLookUpException | IOException e) { |
|
154 |
throw new RuntimeException("Cannot create X3MLEngineFactory", e); |
|
155 |
} |
|
156 |
|
|
157 |
} |
|
158 |
|
|
78 | 159 |
private InputStream getProfileAsStream(String profId) throws ISLookUpException, IOException { |
79 | 160 |
String xquery = "string(collection('/db/DRIVER/TransformationRuleDSResources')//RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value ='" + |
80 | 161 |
profId + "']//CODE)"; |
81 | 162 |
List<String> res = serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xquery); |
82 |
if(res.isEmpty()){
|
|
83 |
throw new RuntimeException("Can't find transformation rule CODE for "+profId);
|
|
163 |
if (res.isEmpty()) {
|
|
164 |
throw new RuntimeException("Can't find transformation rule CODE for " + profId);
|
|
84 | 165 |
} |
85 |
String code =res.get(0); |
|
166 |
String code = res.get(0); |
|
167 |
// log.debug(code); |
|
86 | 168 |
return IOUtils.toInputStream(code, "UTF-8"); |
87 | 169 |
} |
88 | 170 |
|
Also available in: Unified diff
Building correct D-Net XMLs after transformation with X3M