Project

General

Profile

1
package eu.dnetlib.data.transform;
2

    
3
import java.io.StringReader;
4
import java.util.Arrays;
5
import java.util.List;
6
import java.util.stream.Collectors;
7
import javax.xml.transform.Transformer;
8
import javax.xml.transform.stream.StreamSource;
9

    
10
import com.google.common.base.Function;
11
import com.ximpleware.AutoPilot;
12
import com.ximpleware.VTDGen;
13
import com.ximpleware.VTDNav;
14
import eu.dnetlib.data.graph.model.DNGFDecoder;
15
import eu.dnetlib.data.transform.VtdUtilityParser.Node;
16
import org.apache.commons.codec.binary.Base64;
17
import org.apache.commons.lang3.StringUtils;
18
import org.apache.commons.logging.Log;
19
import org.apache.commons.logging.LogFactory;
20
import org.dom4j.io.DocumentResult;
21

    
22
public class XsltRowTransformer implements Function<String, List<Row>> {
23

    
24
	private static final Log log = LogFactory.getLog(XsltRowTransformer.class); // NOPMD by marko on 11/24/08 5:02 PM
25

    
26
	public static final String DATA_XPATH = "/ROWS/ROW";
27
	
28
	public static final List<String> ATTRS = Arrays.asList("value", "mode");
29

    
30
	private Transformer transformer;
31

    
32
	public XsltRowTransformer(final Transformer transformer) {
33
		this.transformer = transformer;
34
		log.info(String.format("using trasformer: '%s'", getTransformerClassName()));
35
	}
36

    
37
	private List<Row> transform(final String xml) {
38
		final DocumentResult result = new DocumentResult();
39
		try {
40
			transformer.transform(new StreamSource(new StringReader(xml)), result);
41

    
42
			final VTDGen parser = new VTDGen();
43
			parser.setDoc(result.getDocument().asXML().getBytes());
44
			parser.parse(true);
45

    
46
			final VTDNav nav = parser.getNav();
47
			final AutoPilot autoPilot = new AutoPilot(nav);
48
			return VtdUtilityParser.getTextValuesWithAttributes(autoPilot, nav, DATA_XPATH, ATTRS)
49
					.stream()
50
					.map(this::asRow)
51
					.collect(Collectors.toList());
52
		} catch (Exception e) {
53
			log.error("Error parsing xml:\n" + xml, e);
54
			throw new RuntimeException("Unable to parse document:\n" + xml, e);
55
		}
56
	}
57

    
58
	private Row asRow(final Node base64Node) {
59
		final String base64 = base64Node.getAttributes().get("value");
60
		if (StringUtils.isBlank(base64))
61
			throw new IllegalArgumentException("empty protocolbuffer value, check xslt");
62

    
63
		final DNGFDecoder d = DNGFDecoder.decode(Base64.decodeBase64(base64));
64
		final String qualifier = getQualifier(base64Node, d);
65
		return new Row(d.getColumnFamily(), d.getRowkey(), Arrays.asList(new Column<>(qualifier, d.getDNGF().toByteArray())));
66
	}
67

    
68
	private String getQualifier(final Node base64Node, final DNGFDecoder d) {
69
		if("update".equals(base64Node.getAttributes().get("mode"))) {
70
			return "update_" + System.nanoTime();
71
		}
72
		return d.getQualifier();
73
	}
74

    
75
	public String getTransformerClassName() {
76
		return transformer != null ? transformer.getClass().getName() : "unknown";
77
	}
78

    
79
	@Override
80
	public List<Row> apply(final String xml) {
81
		return transform(xml);
82
	}
83

    
84
}
(12-12/13)