Revision 57486
Added by Enrico Ottonello over 4 years ago
modules/dnet-msro-service/branches/saxonHE-SOLR772/src/test/java/eu/dnetlib/msro/workflows/nodes/transform/X3MTransformJobNodeTest.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workflows.nodes.transform; |
|
2 |
|
|
3 |
import java.time.Duration; |
|
4 |
import java.time.LocalDateTime; |
|
5 |
|
|
6 |
import org.junit.Test; |
|
7 |
|
|
8 |
/** |
|
9 |
* Created by Alessia Bardi on 13/04/2017. |
|
10 |
* |
|
11 |
* @author Alessia Bardi |
|
12 |
*/ |
|
13 |
public class X3MTransformJobNodeTest { |
|
14 |
|
|
15 |
private X3MTransformJobNode transformJob = new X3MTransformJobNode(); |
|
16 |
private String header = "<oai:header xmlns:dri=\"http://www.driver-repository.eu/namespace/dri\">\n" |
|
17 |
+ " <dri:objIdentifier>ariadne_mock::0000023f507999464aa2b78875b7e5d6</dri:objIdentifier>\n" |
|
18 |
+ " <dri:recordIdentifier>2420500</dri:recordIdentifier>\n" |
|
19 |
+ " <dri:dateOfCollection>2017-04-10T18:44:46.85+02:00</dri:dateOfCollection>\n" |
|
20 |
+ " <dri:datasourceprefix>ariadne_mock</dri:datasourceprefix>\n" |
|
21 |
+ " <dri:datasourcename>Ariadne Mock</dri:datasourcename>\n" |
|
22 |
+ " <dri:dateOfTransformation>2017-04-12T16:31:45.766</dri:dateOfTransformation>\n" |
|
23 |
+ " <dri:invalid value=\"true\">\n" |
|
24 |
+ " <dri:error vocabularies=\"dnet:languages\" xpath=\"//*[local-name()='P72_has_language']\"\n" |
|
25 |
+ " term=\"en\"/>\n" |
|
26 |
+ " </dri:invalid>\n" |
|
27 |
+ " </oai:header>"; |
|
28 |
private String footer = "<oai:about xmlns:dri=\"http://www.driver-repository.eu/namespace/dri\">\n" |
|
29 |
+ " <provenance xmlns=\"http://www.openarchives.org/OAI/2.0/provenance\"\n" |
|
30 |
+ " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" |
|
31 |
+ " xsi:schemaLocation=\"http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd\">\n" |
|
32 |
+ " <originDescription xmlns=\"\" altered=\"true\" harvestDate=\"2017-04-10T18:44:46.85+02:00\">\n" |
|
33 |
+ " <baseURL>sftp%3A%2F%2Fariadne2.isti.cnr.it%2F..%2F..%2Fdata%2Ftransform%2Facdm_correct</baseURL>\n" |
|
34 |
+ " <identifier/>\n" |
|
35 |
+ " <datestamp/>\n" |
|
36 |
+ " <metadataNamespace/>\n" |
|
37 |
+ " </originDescription>\n" |
|
38 |
+ " </provenance>\n" |
|
39 |
+ " </oai:about>"; |
|
40 |
private String metadata=" <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n" |
|
41 |
+ " xmlns:dbpedia-owl=\"http://dbpedia.org/ontology/\"\n" |
|
42 |
+ " xmlns:acdm=\"http://registry.ariadne-infrastructure.eu/\"\n" |
|
43 |
+ " xmlns:xsd=\"http://www.w3.org/2001/XMLSchema#\"\n" |
|
44 |
+ " xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\"\n" |
|
45 |
+ " xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\"\n" |
|
46 |
+ " xmlns:frbr=\"http://www.cidoc-crm.org/frbroo/\" xmlns:dcterms=\"http://purl.org/dc/terms/\"\n" |
|
47 |
+ " xmlns:dcat=\"http://www.w3.org/ns/dcat#\" xmlns:foaf=\"http://xmlns.com/foaf/0.1/\"\n" |
|
48 |
+ " xmlns:crm=\"http://www.cidoc-crm.org/cidoc-crm/\"\n" |
|
49 |
+ " xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n" |
|
50 |
+ " <frbr:F30_Publication_Event rdf:about=\"uuid:AAAA\"/>\n" |
|
51 |
+ " <crm:E65_Creation rdf:about=\"uuid:AAAB\"/>\n" |
|
52 |
+ " <crm:E73_Information_Object\n" |
|
53 |
+ " rdf:about=\"http://registry.ariadne-infrastructure.eu/dataset/2420500\">\n" |
|
54 |
+ " <crm:P129_is_about>\n" |
|
55 |
+ " <crm:E73_Information_Object\n" |
|
56 |
+ " rdf:about=\"http://registry.ariadne-infrastructure.eu/subject/CAIRN\">\n" |
|
57 |
+ " <crm:P129_is_about>CAIRN</crm:P129_is_about>\n" |
|
58 |
+ " </crm:E73_Information_Object>\n" |
|
59 |
+ " </crm:P129_is_about>\n" |
|
60 |
+ " <crm:P129_is_about>\n" |
|
61 |
+ " <crm:E73_Information_Object\n" |
|
62 |
+ " rdf:about=\"http://registry.ariadne-infrastructure.eu/subject/HUT%20CIRCLE\">\n" |
|
63 |
+ " <crm:P129_is_about>HUT CIRCLE</crm:P129_is_about>\n" |
|
64 |
+ " </crm:E73_Information_Object>\n" |
|
65 |
+ " </crm:P129_is_about>\n" |
|
66 |
+ " <crm:P104_is_subject_to>ADS Terms and Conditions</crm:P104_is_subject_to>\n" |
|
67 |
+ " <crm:P129_is_about>\n" |
|
68 |
+ " <crm:E73_Information_Object\n" |
|
69 |
+ " rdf:about=\"http://registry.ariadne-infrastructure.eu/subject/SHEEPFOLD\">\n" |
|
70 |
+ " <crm:P129_is_about>SHEEPFOLD</crm:P129_is_about>\n" |
|
71 |
+ " </crm:E73_Information_Object>\n" |
|
72 |
+ " </crm:P129_is_about>\n" |
|
73 |
+ " <crm:P129_is_about>\n" |
|
74 |
+ " <crm:E73_Information_Object\n" |
|
75 |
+ " rdf:about=\"http://registry.ariadne-infrastructure.eu/subject/FIELD%20SYSTEM\">\n" |
|
76 |
+ " <crm:P129_is_about>FIELD SYSTEM</crm:P129_is_about>\n" |
|
77 |
+ " </crm:E73_Information_Object>\n" |
|
78 |
+ " </crm:P129_is_about>\n" |
|
79 |
+ " <crm:P102_has_title>MID GLEN CROE</crm:P102_has_title>\n" |
|
80 |
+ " <crm:P165_incorporates>\n" |
|
81 |
+ " <crm:E33_Linguistic_Object rdf:about=\"uuid:AAAG\">\n" |
|
82 |
+ " <crm:P72_has_language>en</crm:P72_has_language>\n" |
|
83 |
+ " </crm:E33_Linguistic_Object>\n" |
|
84 |
+ " </crm:P165_incorporates>\n" |
|
85 |
+ " <crm:P67_refers_to>\n" |
|
86 |
+ " <crm:E1_CRM_Entity rdf:about=\"uuid:AAAH\">\n" |
|
87 |
+ " <crm:P2_has_type>Sites and monuments databases or\n" |
|
88 |
+ " inventories</crm:P2_has_type>\n" |
|
89 |
+ " </crm:E1_CRM_Entity>\n" |
|
90 |
+ " </crm:P67_refers_to>\n" |
|
91 |
+ " <crm:P93i_was_taken_out_of_existence_by>\n" |
|
92 |
+ " <crm:E6_Destruction rdf:about=\"uuid:AAAE\">\n" |
|
93 |
+ " <crm:P4_has_time-span>\n" |
|
94 |
+ " <crm:E52_Time-Span rdf:about=\"uuid:AAAF\">\n" |
|
95 |
+ " <crm:P81_ongoing_throughout>2013-12-09\n" |
|
96 |
+ " 00:00:00.0</crm:P81_ongoing_throughout>\n" |
|
97 |
+ " </crm:E52_Time-Span>\n" |
|
98 |
+ " </crm:P4_has_time-span>\n" |
|
99 |
+ " </crm:E6_Destruction>\n" |
|
100 |
+ " </crm:P93i_was_taken_out_of_existence_by>\n" |
|
101 |
+ " <crm:P94i_was_created_by>\n" |
|
102 |
+ " <frbr:F30_Publication_Event rdf:about=\"uuid:AAAC\">\n" |
|
103 |
+ " <crm:P4_has_time-span>\n" |
|
104 |
+ " <crm:E52_Time-Span rdf:about=\"uuid:AAAD\">\n" |
|
105 |
+ " <crm:P81_ongoing_throughout>2013-12-09\n" |
|
106 |
+ " 00:00:00.0</crm:P81_ongoing_throughout>\n" |
|
107 |
+ " </crm:E52_Time-Span>\n" |
|
108 |
+ " </crm:P4_has_time-span>\n" |
|
109 |
+ " </frbr:F30_Publication_Event>\n" |
|
110 |
+ " </crm:P94i_was_created_by>\n" |
|
111 |
+ " <crm:P129_is_about>\n" |
|
112 |
+ " <crm:E73_Information_Object\n" |
|
113 |
+ " rdf:about=\"http://registry.ariadne-infrastructure.eu/subject/BUILDING\">\n" |
|
114 |
+ " <crm:P129_is_about>BUILDING</crm:P129_is_about>\n" |
|
115 |
+ " </crm:E73_Information_Object>\n" |
|
116 |
+ " </crm:P129_is_about>\n" |
|
117 |
+ " <crm:P1_is_identified_by>2420500</crm:P1_is_identified_by>\n" |
|
118 |
+ " <crm:P106i_forms_part_of>http://registry.ariadne-infrastructure.eu/collection/22721290</crm:P106i_forms_part_of>\n" |
|
119 |
+ " <crm:P3_has_note>Multiple instances of: SHEEPFOLD<br /><br />Multiple\n" |
|
120 |
+ " instances of: BUILDING<br />Multiple instances of: FIELD SYSTEM<br\n" |
|
121 |
+ " />Possible instance of: CAIRN<br />Multiple instances of: HUT\n" |
|
122 |
+ " CIRCLE<br />Possible instance of: HUT CIRCLE</crm:P3_has_note>\n" |
|
123 |
+ " </crm:E73_Information_Object>\n" |
|
124 |
+ " </rdf:RDF>"; |
|
125 |
|
|
126 |
@Test |
|
127 |
public void testBuildXML(){ |
|
128 |
LocalDateTime now = LocalDateTime.now(); |
|
129 |
String res = transformJob.buildXML(header, now.toString(), metadata, footer); |
|
130 |
LocalDateTime end = LocalDateTime.now(); |
|
131 |
System.out.println("Building XML took:"+Duration.between(now, end).toMillis()); |
|
132 |
} |
|
133 |
} |
modules/dnet-msro-service/branches/saxonHE-SOLR772/src/main/java/eu/dnetlib/msro/workflows/nodes/transform/X3MTransformJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workflows.nodes.transform; |
|
2 |
|
|
3 |
import java.time.Duration; |
|
4 |
import java.time.Instant; |
|
5 |
import java.time.LocalDateTime; |
|
6 |
import java.util.List; |
|
7 |
import java.util.Map; |
|
8 |
import javax.xml.parsers.DocumentBuilder; |
|
9 |
import javax.xml.parsers.DocumentBuilderFactory; |
|
10 |
|
|
11 |
import com.google.common.collect.Maps; |
|
12 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
|
13 |
import eu.dnetlib.enabling.resultset.factory.ResultSetFactory; |
|
14 |
import eu.dnetlib.miscutils.functional.xml.SaxonHelper; |
|
15 |
import eu.dnetlib.miscutils.functional.xml.XMLIndenter; |
|
16 |
import eu.dnetlib.msro.workflows.graph.Arc; |
|
17 |
import eu.dnetlib.msro.workflows.nodes.AsyncJobNode; |
|
18 |
import eu.dnetlib.msro.workflows.procs.Env; |
|
19 |
import eu.dnetlib.rmi.common.ResultSet; |
|
20 |
import eu.dnetlib.rmi.enabling.ISLookUpException; |
|
21 |
import eu.dnetlib.rmi.enabling.ISLookUpService; |
|
22 |
import eu.dnetlib.rmi.manager.MSROException; |
|
23 |
import net.sf.saxon.s9api.SaxonApiException; |
|
24 |
import net.sf.saxon.s9api.Serializer.Property; |
|
25 |
import net.sf.saxon.s9api.XPathSelector; |
|
26 |
import org.apache.commons.io.IOUtils; |
|
27 |
import org.apache.commons.lang3.StringUtils; |
|
28 |
import org.apache.commons.logging.Log; |
|
29 |
import org.apache.commons.logging.LogFactory; |
|
30 |
import org.springframework.beans.factory.annotation.Autowired; |
|
31 |
import org.w3c.dom.Document; |
|
32 |
import org.w3c.dom.Element; |
|
33 |
import org.w3c.dom.Node; |
|
34 |
|
|
35 |
public class X3MTransformJobNode extends AsyncJobNode { |
|
36 |
|
|
37 |
private static final Log log = LogFactory.getLog(X3MTransformJobNode.class); |
|
38 |
private static final String OAI_NAMESPACE_URI = "http://www.openarchives.org/OAI/2.0/"; |
|
39 |
private static final String DRI_NAMESPACE_URI = "http://www.driver-repository.eu/namespace/dri"; |
|
40 |
|
|
41 |
private String inputEprParam; |
|
42 |
private String outputEprParam; |
|
43 |
|
|
44 |
private String mappingPolicyProfileId; |
|
45 |
private String mappingProfileIds; |
|
46 |
|
|
47 |
private boolean verboseLogging; |
|
48 |
|
|
49 |
private XPathSelector xpathSelectorMetadata; |
|
50 |
private XPathSelector xpathSelectorHeader; |
|
51 |
private XPathSelector xpathSelectorFooter; |
|
52 |
private XPathSelector xpathSelectorObjIdentifier; |
|
53 |
|
|
54 |
/** |
|
55 |
* true to pass the full record to X3m-engine. False to pass only what's in the metadata section. |
|
56 |
**/ |
|
57 |
private boolean passFullRecord; |
|
58 |
|
|
59 |
|
|
60 |
@Autowired |
|
61 |
private ResultSetFactory resultSetFactory; |
|
62 |
@Autowired |
|
63 |
private UniqueServiceLocator serviceLocator; |
|
64 |
@Autowired |
|
65 |
private SaxonHelper saxonHelper; |
|
66 |
|
|
67 |
@Override |
|
68 |
protected String execute(final Env env) throws Exception { |
|
69 |
log.info("Mapping profile ids read from node configuration: " + mappingProfileIds); |
|
70 |
log.info("Mapping Policy profile id read from node configuration: " + mappingPolicyProfileId); |
|
71 |
final String[] mappings = getMappingsCode(mappingProfileIds.split(",")); |
|
72 |
final String policy = getProfileCode(mappingPolicyProfileId); |
|
73 |
|
|
74 |
LocalDateTime now = LocalDateTime.now(); |
|
75 |
final ResultSet<?> rsIn = env.getAttribute(this.inputEprParam, ResultSet.class); |
|
76 |
if ((rsIn == null)) { throw new MSROException("InputEprParam (" + this.inputEprParam + ") not found in ENV"); } |
|
77 |
prepareXpathSelectors(); |
|
78 |
|
|
79 |
|
|
80 |
final ResultSet<String> rsOut = this.resultSetFactory.map(rsIn, String.class, record -> { |
|
81 |
//JUST FOR DEBUGGING THE TIMEOUT OF THE MONGO CURSOR: is there a metadata record that it is really slow to transform? |
|
82 |
if(log.isDebugEnabled()) { |
|
83 |
String objIdentifier = extractFromRecord(record, xpathSelectorObjIdentifier); |
|
84 |
log.debug("Transforming record objIdentifier: " + objIdentifier); |
|
85 |
} |
|
86 |
ApplyX3Mapping mappingFunction = new ApplyX3Mapping(mappings, policy, verboseLogging); |
|
87 |
|
|
88 |
String toTransform = record; |
|
89 |
Instant startExtraction = Instant.now(); |
|
90 |
if(!isPassFullRecord()) { |
|
91 |
log.debug("Extracting XML from the metadata block"); |
|
92 |
toTransform = extractFromRecord(record, xpathSelectorMetadata); |
|
93 |
} |
|
94 |
|
|
95 |
String header = extractFromRecord(record, xpathSelectorHeader); |
|
96 |
String provenanceFooter = extractFromRecord(record, xpathSelectorFooter); |
|
97 |
Instant endExtraction = Instant.now(); |
|
98 |
|
|
99 |
Instant startTransform = Instant.now(); |
|
100 |
String transformed = mappingFunction.apply(toTransform); |
|
101 |
Instant endTransform = Instant.now(); |
|
102 |
|
|
103 |
if(log.isDebugEnabled()){ |
|
104 |
log.debug("Extraction took "+ Duration.between(startExtraction, endExtraction).toMillis()+" ms"); |
|
105 |
log.debug("Transformation took "+ Duration.between(startTransform, endTransform).toMillis()+" ms"); |
|
106 |
log.debug("Total mapping time: "+Duration.between(startExtraction, endTransform).toMillis()+" ms"); |
|
107 |
} |
|
108 |
String res = buildXML(header, now.toString(), transformed, provenanceFooter); |
|
109 |
if(log.isDebugEnabled()) { |
|
110 |
log.debug("SOURCE:\n"+toTransform); |
|
111 |
log.debug("TRANFORMED:\n"+res); |
|
112 |
} |
|
113 |
return res; |
|
114 |
}); |
|
115 |
|
|
116 |
env.setAttribute(this.outputEprParam, rsOut); |
|
117 |
|
|
118 |
return Arc.DEFAULT_ARC; |
|
119 |
} |
|
120 |
|
|
121 |
private String[] getMappingsCode(String[] mappingIds) throws ISLookUpException { |
|
122 |
String[] mappings = new String[mappingIds.length]; |
|
123 |
for(int i =0; i < mappingIds.length; i++){ |
|
124 |
mappings[i] = getProfileCode(mappingIds[i]); |
|
125 |
} |
|
126 |
return mappings; |
|
127 |
} |
|
128 |
|
|
129 |
protected String buildXML(final String header, final String transformationDate, final String metadata, final String provenance) { |
|
130 |
Instant start = Instant.now(); |
|
131 |
try { |
|
132 |
XMLIndenter xmlHelper = new XMLIndenter(); |
|
133 |
DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); |
|
134 |
DocumentBuilder docBuilder = docFactory.newDocumentBuilder(); |
|
135 |
// root elements |
|
136 |
Document doc = docBuilder.newDocument(); |
|
137 |
Element rootElement = doc.createElementNS(OAI_NAMESPACE_URI, "oai:record"); |
|
138 |
Element headerElem = docBuilder.parse(IOUtils.toInputStream(header, "UTF-8")).getDocumentElement(); |
|
139 |
Node headerNode = doc.importNode(headerElem, true); |
|
140 |
rootElement.appendChild(headerNode); |
|
141 |
Element transDate = doc.createElementNS(DRI_NAMESPACE_URI, "dri:dateOfTransformation"); |
|
142 |
transDate.setTextContent(transformationDate); |
|
143 |
headerNode.appendChild(transDate); |
|
144 |
Element metadataElement = doc.createElementNS(OAI_NAMESPACE_URI, "oai:metadata"); |
|
145 |
Element contentElem = docBuilder.parse(IOUtils.toInputStream(metadata, "UTF-8")).getDocumentElement(); |
|
146 |
Node contentNode = doc.importNode(contentElem, true); |
|
147 |
metadataElement.appendChild(contentNode); |
|
148 |
rootElement.appendChild(metadataElement); |
|
149 |
Element aboutElem = docBuilder.parse(IOUtils.toInputStream(provenance, "UTF-8")).getDocumentElement(); |
|
150 |
Node aboutNode = doc.importNode(aboutElem, true); |
|
151 |
rootElement.appendChild(aboutNode); |
|
152 |
|
|
153 |
doc.appendChild(rootElement); |
|
154 |
Instant startIndent = Instant.now(); |
|
155 |
String res = xmlHelper.indent(doc); |
|
156 |
Instant end = Instant.now(); |
|
157 |
if(log.isDebugEnabled()){ |
|
158 |
log.debug("XML built in "+ Duration.between(start, end).toMillis()+" ms"); |
|
159 |
log.debug("Serialization with indent took "+ Duration.between(startIndent, end).toMillis()+" ms"); |
|
160 |
} |
|
161 |
return res; |
|
162 |
} catch (Exception e) { |
|
163 |
throw new RuntimeException("Cannot build the transformed xml file", e); |
|
164 |
} |
|
165 |
|
|
166 |
} |
|
167 |
|
|
168 |
private void prepareXpathSelectors() throws SaxonApiException { |
|
169 |
Map<String, String> namespaces = Maps.newHashMap(); |
|
170 |
namespaces.put("oai", OAI_NAMESPACE_URI); |
|
171 |
namespaces.put("dri", DRI_NAMESPACE_URI); |
|
172 |
xpathSelectorHeader = this.saxonHelper.help().prepareXPathSelector("//oai:header", namespaces); |
|
173 |
xpathSelectorMetadata = this.saxonHelper.help().prepareXPathSelector("//oai:metadata/*", namespaces); |
|
174 |
xpathSelectorFooter = this.saxonHelper.help().prepareXPathSelector("//oai:about", namespaces); |
|
175 |
xpathSelectorObjIdentifier = this.saxonHelper.help().prepareXPathSelector("//oai:header/*[local-name()='objIdentifier']/text()", namespaces); |
|
176 |
|
|
177 |
} |
|
178 |
|
|
179 |
private String extractFromRecord(final String record, final XPathSelector xPathSelector) { |
|
180 |
try { |
|
181 |
return this.saxonHelper.help().setSerializerProperty(Property.OMIT_XML_DECLARATION, "yes").evaluateSingleAsString(record, xPathSelector); |
|
182 |
} catch (SaxonApiException e) { |
|
183 |
throw new RuntimeException("Cannot extract content ", e); |
|
184 |
} |
|
185 |
} |
|
186 |
|
|
187 |
private String getProfileCode(String profId) throws ISLookUpException { |
|
188 |
if (StringUtils.isBlank(profId)) return null; |
|
189 |
String xquery = "string(collection('/db/DRIVER/TransformationRuleDSResources')//RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value ='" + |
|
190 |
profId + "']//CODE)"; |
|
191 |
List<String> res = serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xquery); |
|
192 |
if (res.isEmpty() || StringUtils.isBlank(res.get(0))) { |
|
193 |
throw new RuntimeException("Can't find transformation rule CODE for " + profId); |
|
194 |
} |
|
195 |
return res.get(0); |
|
196 |
} |
|
197 |
|
|
198 |
public String getInputEprParam() { |
|
199 |
return this.inputEprParam; |
|
200 |
} |
|
201 |
|
|
202 |
public void setInputEprParam(final String inputEprParam) { |
|
203 |
this.inputEprParam = inputEprParam; |
|
204 |
} |
|
205 |
|
|
206 |
public String getOutputEprParam() { |
|
207 |
return this.outputEprParam; |
|
208 |
} |
|
209 |
|
|
210 |
public void setOutputEprParam(final String outputEprParam) { |
|
211 |
this.outputEprParam = outputEprParam; |
|
212 |
} |
|
213 |
|
|
214 |
public String getMappingPolicyProfileId() { |
|
215 |
return mappingPolicyProfileId; |
|
216 |
} |
|
217 |
|
|
218 |
public void setMappingPolicyProfileId(final String mappingPolicyProfileId) { |
|
219 |
this.mappingPolicyProfileId = mappingPolicyProfileId; |
|
220 |
} |
|
221 |
|
|
222 |
public String getMappingProfileIds() { |
|
223 |
return mappingProfileIds; |
|
224 |
} |
|
225 |
|
|
226 |
public void setMappingProfileIds(final String mappingProfileIds) { |
|
227 |
this.mappingProfileIds = mappingProfileIds; |
|
228 |
} |
|
229 |
|
|
230 |
public boolean isVerboseLogging() { |
|
231 |
return verboseLogging; |
|
232 |
} |
|
233 |
|
|
234 |
public void setVerboseLogging(final boolean verboseLogging) { |
|
235 |
this.verboseLogging = verboseLogging; |
|
236 |
} |
|
237 |
|
|
238 |
public ResultSetFactory getResultSetFactory() { |
|
239 |
return resultSetFactory; |
|
240 |
} |
|
241 |
|
|
242 |
public void setResultSetFactory(final ResultSetFactory resultSetFactory) { |
|
243 |
this.resultSetFactory = resultSetFactory; |
|
244 |
} |
|
245 |
|
|
246 |
public UniqueServiceLocator getServiceLocator() { |
|
247 |
return serviceLocator; |
|
248 |
} |
|
249 |
|
|
250 |
public void setServiceLocator(final UniqueServiceLocator serviceLocator) { |
|
251 |
this.serviceLocator = serviceLocator; |
|
252 |
} |
|
253 |
|
|
254 |
public boolean isPassFullRecord() { |
|
255 |
return passFullRecord; |
|
256 |
} |
|
257 |
|
|
258 |
public void setPassFullRecord(final boolean passFullRecord) { |
|
259 |
this.passFullRecord = passFullRecord; |
|
260 |
} |
|
261 |
} |
modules/dnet-msro-service/branches/saxonHE-SOLR772/src/main/java/eu/dnetlib/msro/workflows/nodes/transform/ApplyX3Mapping.java | ||
---|---|---|
19 | 19 |
public class ApplyX3Mapping implements Function<String, String> { |
20 | 20 |
|
21 | 21 |
private static final Log log = LogFactory.getLog(ApplyX3Mapping.class); |
22 |
private String[] mappings; |
|
23 | 22 |
private String generatorPolicy; |
24 | 23 |
private boolean verboseLogging; |
25 | 24 |
private URL mappingUrl; |
26 |
|
|
27 |
public ApplyX3Mapping(final String[] mappings, final String generatorPolicy, final boolean verboseLogging) { |
|
28 |
this.mappings = mappings; |
|
29 |
this.generatorPolicy = generatorPolicy; |
|
30 |
this.verboseLogging = verboseLogging; |
|
31 |
} |
|
32 | 25 |
|
33 | 26 |
public ApplyX3Mapping(final URL mappingUrl, final String generatorPolicy, final boolean verboseLogging) { |
34 | 27 |
this.mappingUrl = mappingUrl; |
... | ... | |
38 | 31 |
|
39 | 32 |
@Override |
40 | 33 |
public String apply(final String metadata) { |
41 |
InputStream[] mappingStreams = null; |
|
42 |
if (mappingUrl==null) { |
|
43 |
mappingStreams = new InputStream[mappings.length]; |
|
44 |
try { |
|
45 |
for (int i = 0; i < mappings.length; i++) { |
|
46 |
mappingStreams[i] = getStream(mappings[i]); |
|
47 |
} |
|
48 |
}catch(IOException e){ |
|
49 |
log.error("Can't create mappingStreams for mappings"); |
|
50 |
closeStreams(mappingStreams); |
|
51 |
throw new RuntimeException(e); |
|
52 |
} |
|
53 |
} |
|
54 | 34 |
try ( InputStream policyStream = getStream(generatorPolicy); |
55 | 35 |
InputStream metadataStream = getStream(metadata); |
56 | 36 |
final ByteArrayOutputStream os = new ByteArrayOutputStream()){ |
57 |
X3MLEngineFactory x3mEngineFactory = null; |
|
58 |
if (mappingUrl==null) { |
|
59 |
x3mEngineFactory = getConfiguredX3MEngineFactory(mappingStreams, policyStream); |
|
60 |
} |
|
61 |
else { |
|
62 |
x3mEngineFactory = getConfiguredX3MEngineFactory(mappingUrl, policyStream); |
|
63 |
} |
|
37 |
X3MLEngineFactory x3mEngineFactory = getConfiguredX3MEngineFactory(mappingUrl, policyStream); |
|
64 | 38 |
x3mEngineFactory.withInput(metadataStream).withOutput(os, OutputFormat.RDF_XML_PLAIN); |
65 | 39 |
x3mEngineFactory.execute(); |
66 | 40 |
return new String(os.toByteArray()); |
... | ... | |
68 | 42 |
log.error("Error transforming record: "+e.getMessage()+"\n"+metadata); |
69 | 43 |
throw new RuntimeException(e); |
70 | 44 |
} |
71 |
finally { |
|
72 |
if (mappingStreams!=null) { |
|
73 |
closeStreams(mappingStreams); |
|
74 |
} |
|
75 |
} |
|
76 | 45 |
} |
77 | 46 |
|
78 |
private void closeStreams(InputStream[] streams){ |
|
79 |
for(InputStream s: streams){ |
|
80 |
try { |
|
81 |
if(s != null) s.close(); |
|
82 |
} catch (IOException e) { |
|
83 |
log.error("Can't close stream"+e.getMessage()); |
|
84 |
} |
|
85 |
} |
|
86 |
} |
|
87 |
|
|
88 |
private X3MLEngineFactory getConfiguredX3MEngineFactory(InputStream[] mappings, InputStream policy) { |
|
89 |
X3MLEngineFactory x3mEngineFactory = X3MLEngineFactory.create().withMappings(mappings); |
|
90 |
if (policy != null) { |
|
91 |
x3mEngineFactory.withGeneratorPolicy(policy); |
|
92 |
} |
|
93 |
if (verboseLogging) |
|
94 |
x3mEngineFactory.withVerboseLogging(); |
|
95 |
//to enable real UUID |
|
96 |
x3mEngineFactory.withUuidSize(0); |
|
97 |
return x3mEngineFactory; |
|
98 |
} |
|
99 |
|
|
100 | 47 |
private InputStream getStream(String s) throws IOException { |
101 | 48 |
if (StringUtils.isNotBlank(s)) { |
102 | 49 |
return IOUtils.toInputStream(s, "UTF-8"); |
modules/dnet-msro-service/branches/saxonHE-SOLR772/src/main/resources/eu/dnetlib/msro/service/applicationContext-msro-nodes.xml | ||
---|---|---|
261 | 261 |
class="eu.dnetlib.msro.workflows.nodes.oai.SetCurrentOAIDBJobNode" |
262 | 262 |
scope="prototype" /> |
263 | 263 |
|
264 |
|
|
265 |
<bean id="wfNodeX3MTransform" |
|
266 |
class="eu.dnetlib.msro.workflows.nodes.transform.X3MTransformJobNode" |
|
267 |
scope="prototype" /> |
|
268 |
|
|
269 | 264 |
<bean id="wfNodeXMLSchemaValidator" |
270 | 265 |
class="eu.dnetlib.msro.workflows.nodes.xmlvalidation.XMLSchemaValidatorJobNode" |
271 | 266 |
scope="prototype" /> |
Also available in: Unified diff
removed mapping from profiles section, mapping is taken only from url; x3m transformation node moved to dnet-ariadneplus module