Project

General

Profile

« Previous | Next » 

Revision 46228

Building correct D-Net XMLs after transformation with X3M

View differences:

modules/dnet-msro-service/branches/saxonHE/src/main/java/eu/dnetlib/msro/workflows/nodes/transform/X3MTransformJobNode.java
3 3
import java.io.ByteArrayOutputStream;
4 4
import java.io.IOException;
5 5
import java.io.InputStream;
6
import java.io.PrintStream;
6
import java.time.LocalDateTime;
7 7
import java.util.List;
8
import javax.xml.parsers.DocumentBuilder;
9
import javax.xml.parsers.DocumentBuilderFactory;
8 10

  
9 11
import eu.delving.x3ml.X3MLEngineFactory;
10 12
import eu.delving.x3ml.X3MLEngineFactory.OutputFormat;
11 13
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
12 14
import eu.dnetlib.enabling.resultset.factory.ResultSetFactory;
15
import eu.dnetlib.miscutils.functional.xml.SaxonHelper;
16
import eu.dnetlib.miscutils.functional.xml.XMLIndenter;
13 17
import eu.dnetlib.msro.workflows.graph.Arc;
14 18
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
15 19
import eu.dnetlib.msro.workflows.procs.Env;
......
17 21
import eu.dnetlib.rmi.enabling.ISLookUpException;
18 22
import eu.dnetlib.rmi.enabling.ISLookUpService;
19 23
import eu.dnetlib.rmi.manager.MSROException;
24
import net.sf.saxon.s9api.SaxonApiException;
25
import net.sf.saxon.s9api.Serializer.Property;
26
import net.sf.saxon.s9api.XPathSelector;
20 27
import org.apache.commons.io.IOUtils;
28
import org.apache.commons.lang3.StringUtils;
21 29
import org.apache.commons.logging.Log;
22 30
import org.apache.commons.logging.LogFactory;
23 31
import org.springframework.beans.factory.annotation.Autowired;
32
import org.w3c.dom.Document;
33
import org.w3c.dom.Element;
34
import org.w3c.dom.Node;
24 35

  
25 36
public class X3MTransformJobNode extends SimpleJobNode {
26 37

  
......
33 44

  
34 45
	private boolean verboseLogging;
35 46

  
47
	private XPathSelector xpathSelectorMetadata;
48
	private XPathSelector xpathSelectorHeader;
49
	private XPathSelector xpathSelectorFooter;
50

  
36 51
	@Autowired
37 52
	private ResultSetFactory resultSetFactory;
38 53
	@Autowired
39 54
	private UniqueServiceLocator serviceLocator;
55
	@Autowired
56
	private SaxonHelper saxonHelper;
40 57

  
41 58
	@Override
42 59
	protected String execute(final Env env) throws Exception {
60
		log.debug("Mapping profile id: " + mappingProfileId);
61
		log.debug("Mapping Policy profile id: " + mappingPolicyProfileId);
62

  
63
		LocalDateTime now = LocalDateTime.now();
43 64
		final ResultSet<?> rsIn = env.getAttribute(this.inputEprParam, ResultSet.class);
44 65
		if ((rsIn == null)) { throw new MSROException("InputEprParam (" + this.inputEprParam + ") not found in ENV"); }
66
		prepareXpathSelectors();
45 67

  
46
		X3MLEngineFactory x3mEngineFactory = getConfiguredX3MEngineFactory();
47

  
48 68
		final ResultSet<String> rsOut = this.resultSetFactory.map(rsIn, String.class, record -> {
69
			//log.debug("Transforming: " + record);
70
			String metadata = extractFromRecord(record, xpathSelectorMetadata);
49 71
			final ByteArrayOutputStream os = new ByteArrayOutputStream();
50
			final PrintStream ps = new PrintStream(os);
72
			X3MLEngineFactory x3mEngineFactory = getConfiguredX3MEngineFactory();
51 73
			try {
52
				x3mEngineFactory.withInput(IOUtils.toInputStream(record, "UTF-8"))
53
						.withOutput(ps, OutputFormat.RDF_XML);
74
				x3mEngineFactory.withInput(IOUtils.toInputStream(metadata, "UTF-8"))
75
						.withOutput(os, OutputFormat.RDF_XML);
54 76
			} catch (IOException e) {
55
				log.fatal("Could not get stream from record. Cause: "+e);
77
				log.fatal("Could not get stream from record. Cause: " + e);
56 78
				throw new RuntimeException(e);
57 79
			}
58 80
			x3mEngineFactory.execute();
59
			return new String(os.toByteArray());
81
			String res = new String(os.toByteArray());
82
			log.debug("Transformed: " + res);
83

  
84
			String header = extractFromRecord(record, xpathSelectorHeader);
85
			String provenanceFooter = extractFromRecord(record, xpathSelectorFooter);
86
			return buildXML(header, now.toString(), res, provenanceFooter);
60 87
		});
61 88

  
62 89
		env.setAttribute(this.outputEprParam, rsOut);
......
64 91
		return Arc.DEFAULT_ARC;
65 92
	}
66 93

  
67
	private X3MLEngineFactory getConfiguredX3MEngineFactory() throws IOException, ISLookUpException {
68
		InputStream mapping = getProfileAsStream(mappingProfileId);
69
		InputStream policy = getProfileAsStream(mappingPolicyProfileId);
70
		X3MLEngineFactory x3mEngineFactory = X3MLEngineFactory.create()
71
				.withGeneratorPolicy(policy)
72
				.withMappings(mapping);
73
		if(verboseLogging)
74
			x3mEngineFactory.withVerboseLogging();
75
		return x3mEngineFactory;
94
	private String buildXML(String header, String transformationDate, String metadata, String provenance) {
95
		try {
96
			XMLIndenter xmlHelper = new XMLIndenter();
97
			DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
98
			DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
99
			// root elements
100
			Document doc = docBuilder.newDocument();
101
			Element rootElement = doc.createElementNS("http://www.openarchives.org/OAI/2.0/", "oai:record");
102
			Element headerElem = docBuilder.parse(IOUtils.toInputStream(header, "UTF-8")).getDocumentElement();
103
			Node headerNode = doc.importNode(headerElem, true);
104
			rootElement.appendChild(headerNode);
105
			Element transDate = doc.createElementNS("http://www.driver-repository.eu/namespace/dr", "dr:dateOfTransformation");
106
			transDate.setTextContent(transformationDate);
107
			headerNode.appendChild(transDate);
108
			Element metadataElement = doc.createElementNS("http://www.openarchives.org/OAI/2.0/", "oai:metadata");
109
			Element contentElem = docBuilder.parse(IOUtils.toInputStream(metadata, "UTF-8")).getDocumentElement();
110
			Node contentNode = doc.importNode(contentElem, true);
111
			metadataElement.appendChild(contentNode);
112
			rootElement.appendChild(metadataElement);
113
			Element aboutElem = docBuilder.parse(IOUtils.toInputStream(provenance, "UTF-8")).getDocumentElement();
114
			Node aboutNode = doc.importNode(aboutElem, true);
115
			rootElement.appendChild(aboutNode);
116

  
117
			doc.appendChild(rootElement);
118
			return xmlHelper.indent(doc);
119
		} catch (Exception e) {
120
			throw new RuntimeException("Cannot build the transformed xml file", e);
121
		}
122

  
76 123
	}
77 124

  
125
	private void prepareXpathSelectors() throws SaxonApiException {
126
		xpathSelectorHeader = this.saxonHelper.help().prepareXPathSelector("//oai:header", "oai", "http://www.openarchives.org/OAI/2.0/");
127
		xpathSelectorMetadata = this.saxonHelper.help().prepareXPathSelector("//oai:metadata/*", "oai", "http://www.openarchives.org/OAI/2.0/");
128
		xpathSelectorFooter = this.saxonHelper.help().prepareXPathSelector("//oai:about", "oai", "http://www.openarchives.org/OAI/2.0/");
129
	}
130

  
131
	private String extractFromRecord(final String record, final XPathSelector xPathSelector) {
132
		try {
133
			String s = this.saxonHelper.help().setSerializerProperty(Property.OMIT_XML_DECLARATION, "yes").evaluateSingleAsString(record, xPathSelector);
134
			log.debug("Extracted: " + s);
135
			return s;
136
		} catch (SaxonApiException e) {
137
			throw new RuntimeException("Cannot extract content ", e);
138
		}
139
	}
140

  
141
	private X3MLEngineFactory getConfiguredX3MEngineFactory() {
142
		InputStream mapping = null;
143
		try {
144
			mapping = getProfileAsStream(mappingProfileId);
145
			X3MLEngineFactory x3mEngineFactory = X3MLEngineFactory.create().withMappings(mapping);
146
			if (StringUtils.isNotBlank(mappingPolicyProfileId)) {
147
				InputStream policy = getProfileAsStream(mappingPolicyProfileId);
148
				x3mEngineFactory.withGeneratorPolicy(policy);
149
			}
150
			if (verboseLogging)
151
				x3mEngineFactory.withVerboseLogging();
152
			return x3mEngineFactory;
153
		} catch (ISLookUpException | IOException e) {
154
			throw new RuntimeException("Cannot create X3MLEngineFactory", e);
155
		}
156

  
157
	}
158

  
78 159
	private InputStream getProfileAsStream(String profId) throws ISLookUpException, IOException {
79 160
		String xquery = "string(collection('/db/DRIVER/TransformationRuleDSResources')//RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value ='" +
80 161
				profId + "']//CODE)";
81 162
		List<String> res = serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xquery);
82
		if(res.isEmpty()){
83
			throw new RuntimeException("Can't find transformation rule CODE for "+profId);
163
		if (res.isEmpty()) {
164
			throw new RuntimeException("Can't find transformation rule CODE for " + profId);
84 165
		}
85
		String code =res.get(0);
166
		String code = res.get(0);
167
		//	log.debug(code);
86 168
		return IOUtils.toInputStream(code, "UTF-8");
87 169
	}
88 170

  

Also available in: Unified diff