Project

General

Profile

1
package eu.dnetlib.data.mdstore.modular;
2

    
3
import java.io.ByteArrayInputStream;
4
import java.util.HashMap;
5
import java.util.Map;
6
import java.util.Stack;
7

    
8
import javax.xml.stream.XMLInputFactory;
9
import javax.xml.stream.XMLStreamConstants;
10
import javax.xml.stream.XMLStreamException;
11
import javax.xml.stream.XMLStreamReader;
12

    
13
import org.apache.commons.logging.Log;
14
import org.apache.commons.logging.LogFactory;
15

    
16
import static eu.dnetlib.data.mdstore.modular.MDStoreConstants.*;
17

    
18
/**
19
 * This method outperforms SimpleRecordParser by a vast amount, especially since we are just getting stuff in the
20
 * header.
21
 * 
22
 * @author marko
23
 * 
24
 */
25
public class StreamingRecordParser implements RecordParser {
26

    
27
	private static final Log log = LogFactory.getLog(StreamingRecordParser.class);
28
	private long ts;
29

    
30
	@Override
31
	public Map<String, String> parseRecord(String record) {
32

    
33
		try {
34
			XMLInputFactory factory = XMLInputFactory.newInstance();
35
			XMLStreamReader parser = factory.createXMLStreamReader(new ByteArrayInputStream(record.getBytes()));
36

    
37
			HashMap<String, String> res = new HashMap<String, String>();
38
			res.put(TIMESTAMP, String.valueOf(getTimestamp()));
39

    
40
			Stack<String> elementStack = new Stack<String>();
41
			elementStack.push("/");
42

    
43
			while (parser.hasNext()) {
44
				int event = parser.next();
45

    
46
				if (event == XMLStreamConstants.END_ELEMENT) {
47
					elementStack.pop();
48
				} else if (event == XMLStreamConstants.START_ELEMENT) {
49
					final String localName = parser.getLocalName();
50
					elementStack.push(localName);
51

    
52
					if (OBJIDENTIFIER.equals(localName)) {
53
						parser.next();
54

    
55
						res.put(ID, parser.getText().trim());
56

    
57
					} else if ("identifier".equals(localName) && "efgEntity".equals(grandParent(elementStack))) {
58
						if (!res.containsKey("originalId")) {
59
							parser.next();
60
//							log.info("ZZZZZZ OK: found identifier at right depth " + elementStack);
61
							res.put("originalId", parser.getText().trim());
62
						}
63
					}
64

    
65
					else if ("identifier".equals(localName)) {
66

    
67
//						log.info("ZZZZZZ: found identifier not at right depth " + elementStack + " grand parent " + grandParent(elementStack));
68
					}
69

    
70
					if (res.containsKey(ID) && res.containsKey("originalId"))
71
						return res;
72
				}
73
			}
74
			return res;
75
		} catch (XMLStreamException e) {
76
			throw new IllegalStateException(e);
77
		}
78

    
79
	}
80

    
81
	private String grandParent(Stack<String> elementStack) {
82
		if (elementStack.size() <= 3)
83
			return "";
84
		return elementStack.get(elementStack.size() - 3);
85
	}
86

    
87
	@Override
88
	public void setTimestamp(final long ts) {
89
		this.ts = ts;
90
		log.debug("RecordParser date set to "+ts);
91
	}
92

    
93
	@Override
94
	public long getTimestamp() {
95
		return ts;
96
	}
97

    
98
}
(13-13/13)