Project

General

Profile

1
package eu.dnetlib.data.mdstore.modular;
2

    
3
import java.io.ByteArrayInputStream;
4
import java.util.HashMap;
5
import java.util.Map;
6
import java.util.Stack;
7

    
8
import javax.xml.stream.XMLInputFactory;
9
import javax.xml.stream.XMLStreamConstants;
10
import javax.xml.stream.XMLStreamException;
11
import javax.xml.stream.XMLStreamReader;
12

    
13
import static eu.dnetlib.data.mdstore.modular.MDStoreConstants.*;
14

    
15
/**
16
 * This method outperforms SimpleRecordParser by a vast amount, especially since we are just getting stuff in the
17
 * header.
18
 * 
19
 * @author marko
20
 * 
21
 */
22
public class StreamingRecordParser implements RecordParser {
23

    
24
	private long ts;
25

    
26
	@Override
27
	public Map<String, String> parseRecord(String record) {
28

    
29
		try {
30
			XMLInputFactory factory = XMLInputFactory.newInstance();
31
			XMLStreamReader parser = factory.createXMLStreamReader(new ByteArrayInputStream(record.getBytes()));
32

    
33
			HashMap<String, String> res = new HashMap<String, String>();
34
			res.put(TIMESTAMP, String.valueOf(getTimestamp()));
35

    
36
			Stack<String> elementStack = new Stack<String>();
37
			elementStack.push("/");
38

    
39
			while (parser.hasNext()) {
40
				int event = parser.next();
41

    
42
				if (event == XMLStreamConstants.END_ELEMENT) {
43
					elementStack.pop();
44
				} else if (event == XMLStreamConstants.START_ELEMENT) {
45
					final String localName = parser.getLocalName();
46
					elementStack.push(localName);
47

    
48
					if (OBJIDENTIFIER.equals(localName)) {
49
						parser.next();
50

    
51
						res.put(ID, parser.getText().trim());
52

    
53
					} else if ("identifier".equals(localName) && "efgEntity".equals(grandParent(elementStack))) {
54
						if (!res.containsKey("originalId")) {
55
							parser.next();
56
//							log.info("ZZZZZZ OK: found identifier at right depth " + elementStack);
57
							res.put("originalId", parser.getText().trim());
58
						}
59
					}
60

    
61
					else if ("identifier".equals(localName)) {
62

    
63
//						log.info("ZZZZZZ: found identifier not at right depth " + elementStack + " grand parent " + grandParent(elementStack));
64
					}
65

    
66
					if (res.containsKey(ID) && res.containsKey("originalId"))
67
						return res;
68
				}
69
			}
70
			return res;
71
		} catch (XMLStreamException e) {
72
			throw new IllegalStateException(e);
73
		}
74

    
75
	}
76

    
77
	private String grandParent(Stack<String> elementStack) {
78
		if (elementStack.size() <= 3)
79
			return "";
80
		return elementStack.get(elementStack.size() - 3);
81
	}
82

    
83
	@Override
84
	public void setTimestamp(final long ts) {
85
		this.ts = ts;
86
	}
87

    
88
	@Override
89
	public long getTimestamp() {
90
		return ts;
91
	}
92

    
93
}
(13-13/13)