Project

General

Profile

1
package eu.dnetlib.data.mdstore.modular;
2

    
3
import java.io.ByteArrayInputStream;
4
import java.util.HashMap;
5
import java.util.Map;
6
import java.util.Stack;
7

    
8
import javax.xml.stream.XMLInputFactory;
9
import javax.xml.stream.XMLStreamConstants;
10
import javax.xml.stream.XMLStreamException;
11
import javax.xml.stream.XMLStreamReader;
12

    
13
/**
14
 * This method outperforms SimpleRecordParser by a vast amount, especially since we are just getting stuff in the header.
15
 *
16
 * @author marko
17
 *
18
 */
19
public class StreamingRecordParser implements RecordParser {
20

    
21
	@Override
22
	public Map<String, String> parseRecord(final String record) {
23

    
24
		try {
25
			XMLInputFactory factory = XMLInputFactory.newInstance();
26
			XMLStreamReader parser = factory.createXMLStreamReader(new ByteArrayInputStream(record.getBytes()));
27

    
28
			HashMap<String, String> res = new HashMap<String, String>();
29

    
30
			Stack<String> elementStack = new Stack<String>();
31
			elementStack.push("/");
32

    
33
			while (parser.hasNext()) {
34
				int event = parser.next();
35
				if (event == XMLStreamConstants.END_ELEMENT) {
36
					elementStack.pop();
37
				} else if (event == XMLStreamConstants.START_ELEMENT) {
38
					final String localName = parser.getLocalName();
39
					elementStack.push(localName);
40

    
41
					if ("objIdentifier".equals(localName)) {
42
						parser.next();
43

    
44
						res.put("id", parser.getText().trim());
45

    
46
					} else if ("identifier".equals(localName) && "efgEntity".equals(grandParent(elementStack))) {
47
						if (!res.containsKey("originalId")) {
48
							parser.next();
49
							// log.info("ZZZZZZ OK: found identifier at right depth " + elementStack);
50
							res.put("originalId", parser.getText().trim());
51
						}
52
					}
53

    
54
					else if ("identifier".equals(localName)) {
55

    
56
						// log.info("ZZZZZZ: found identifier not at right depth " + elementStack + " grand parent " +
57
						// grandParent(elementStack));
58
					}
59

    
60
					if (res.containsKey("id") && res.containsKey("originalId")) { return res; }
61
				}
62
			}
63
			return res;
64
		} catch (XMLStreamException e) {
65
			throw new IllegalStateException(e);
66
		}
67

    
68
	}
69

    
70
	private String grandParent(final Stack<String> elementStack) {
71
		if (elementStack.size() <= 3) { return ""; }
72
		return elementStack.get(elementStack.size() - 3);
73
	}
74

    
75
}
(20-20/20)