1
|
package eu.dnetlib.data.mdstore.modular;
|
2
|
|
3
|
import java.io.StringReader;
|
4
|
import java.util.HashMap;
|
5
|
import java.util.Map;
|
6
|
|
7
|
import javax.xml.xpath.XPath;
|
8
|
import javax.xml.xpath.XPathFactory;
|
9
|
|
10
|
import org.apache.commons.logging.Log;
|
11
|
import org.apache.commons.logging.LogFactory;
|
12
|
import org.xml.sax.InputSource;
|
13
|
|
14
|
/**
|
15
|
* Terrible implementation of a record parser.
|
16
|
*
|
17
|
* @author marko
|
18
|
*
|
19
|
*/
|
20
|
public class SimpleRecordParser implements RecordParser {
|
21
|
static final Log log = LogFactory.getLog(SimpleRecordParser.class); // NOPMD by marko on 11/24/08 5:02 PM
|
22
|
|
23
|
private long ts;
|
24
|
|
25
|
@Override
|
26
|
public Map<String, String> parseRecord(String record) {
|
27
|
Map<String, String> props = new HashMap<String, String>();
|
28
|
props.put("timestamp", String.valueOf(getTimestamp()));
|
29
|
|
30
|
try {
|
31
|
// DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
|
32
|
XPath xpath = XPathFactory.newInstance().newXPath();
|
33
|
|
34
|
// Document doc = builder.parse(new InputSource(new StringReader(record)));
|
35
|
InputSource doc = new InputSource(new StringReader(record));
|
36
|
|
37
|
props.put("id", xpath.evaluate("//*[local-name()='objIdentifier']", doc));
|
38
|
props.put("originalId", xpath.evaluate("//*[local-name()='efgEntity']/*/*[local-name()='identifier']", doc));
|
39
|
|
40
|
// String date = xpath.evaluate("//*[local-name()='dateOfCollection'][1]", doc);
|
41
|
// props.put("date", new Date(date).getTime());
|
42
|
|
43
|
} catch (Exception e) {
|
44
|
log.warn("got exception while parsing document", e);
|
45
|
log.warn("record is:");
|
46
|
log.warn(record);
|
47
|
log.warn("------------");
|
48
|
}
|
49
|
return props;
|
50
|
|
51
|
}
|
52
|
|
53
|
@Override
|
54
|
public void setTimestamp(final long ts) {
|
55
|
this.ts = ts;
|
56
|
}
|
57
|
|
58
|
@Override
|
59
|
public long getTimestamp() {
|
60
|
return ts;
|
61
|
}
|
62
|
|
63
|
}
|