1
|
package eu.dnetlib.oai.mongo;
|
2
|
|
3
|
import java.io.ByteArrayInputStream;
|
4
|
import java.io.IOException;
|
5
|
import java.io.StringReader;
|
6
|
import java.util.Date;
|
7
|
import java.util.List;
|
8
|
import java.util.zip.ZipEntry;
|
9
|
import java.util.zip.ZipInputStream;
|
10
|
|
11
|
import com.google.common.collect.Sets;
|
12
|
import com.mongodb.DBObject;
|
13
|
import eu.dnetlib.oai.conf.OAIConfigurationReader;
|
14
|
import eu.dnetlib.oai.info.RecordInfo;
|
15
|
import eu.dnetlib.rmi.provision.OaiPublisherRuntimeException;
|
16
|
import org.apache.commons.io.IOUtils;
|
17
|
import org.apache.commons.lang3.StringEscapeUtils;
|
18
|
import org.dom4j.Document;
|
19
|
import org.dom4j.DocumentException;
|
20
|
import org.dom4j.io.SAXReader;
|
21
|
import org.springframework.beans.factory.annotation.Autowired;
|
22
|
|
23
|
/**
|
24
|
* Helper class to generate a RecordInfo object from a Mongo DBObject.
|
25
|
*
|
26
|
* @author alessia
|
27
|
*/
|
28
|
public class RecordInfoGenerator {
|
29
|
|
30
|
@Autowired
|
31
|
private MetadataExtractor metadataExtractor;
|
32
|
@Autowired
|
33
|
private ProvenanceExtractor provenanceExtractor;
|
34
|
|
35
|
@SuppressWarnings("unchecked")
|
36
|
public RecordInfo transformDBObject(final DBObject object, final boolean includeBody) {
|
37
|
if ((object == null) || object.keySet().isEmpty()) { return null; }
|
38
|
String id = (String) object.get(OAIConfigurationReader.ID_FIELD);
|
39
|
// need to escape the identifier, otherwise the XML breaks
|
40
|
id = StringEscapeUtils.escapeXml11(id);
|
41
|
final boolean deleted = (Boolean) object.get("deleted");
|
42
|
final RecordInfo record = new RecordInfo();
|
43
|
record.setIdentifier(id);
|
44
|
record.setInternalId(object.get("_id").toString());
|
45
|
record.setDatestamp((Date) object.get(OAIConfigurationReader.DATESTAMP_FIELD));
|
46
|
record.setDeleted(deleted);
|
47
|
final List<String> sets = (List<String>) object.get(OAIConfigurationReader.SET_FIELD);
|
48
|
if (sets != null) {
|
49
|
record.setSetspecs(Sets.newHashSet(sets));
|
50
|
}
|
51
|
if (includeBody && !deleted) {
|
52
|
final String body = decompressRecord((byte[]) object.get(OAIConfigurationReader.BODY_FIELD));
|
53
|
final SAXReader reader = new SAXReader();
|
54
|
Document doc;
|
55
|
try {
|
56
|
doc = reader.read(new StringReader(body));
|
57
|
record.setMetadata(this.metadataExtractor.apply(doc));
|
58
|
record.setProvenance(this.provenanceExtractor.apply(doc));
|
59
|
} catch (final DocumentException e) {
|
60
|
throw new OaiPublisherRuntimeException(e);
|
61
|
}
|
62
|
}
|
63
|
return record;
|
64
|
|
65
|
}
|
66
|
|
67
|
public String decompressRecord(final byte[] input) {
|
68
|
|
69
|
try {
|
70
|
final ByteArrayInputStream bis = new ByteArrayInputStream(input);
|
71
|
final ZipInputStream zis = new ZipInputStream(bis);
|
72
|
ZipEntry ze;
|
73
|
ze = zis.getNextEntry();
|
74
|
if (ze == null) { throw new OaiPublisherRuntimeException("cannot decompress null zip entry "); }
|
75
|
if (!ze.getName()
|
76
|
.equals(OAIConfigurationReader.BODY_FIELD)) { throw new OaiPublisherRuntimeException("cannot decompress zip entry name :" + ze.getName()); }
|
77
|
return IOUtils.toString(zis);
|
78
|
} catch (final IOException e) {
|
79
|
throw new OaiPublisherRuntimeException(e);
|
80
|
}
|
81
|
|
82
|
}
|
83
|
|
84
|
public MetadataExtractor getMetadataExtractor() {
|
85
|
return this.metadataExtractor;
|
86
|
}
|
87
|
|
88
|
public void setMetadataExtractor(final MetadataExtractor metadataExtractor) {
|
89
|
this.metadataExtractor = metadataExtractor;
|
90
|
}
|
91
|
|
92
|
public ProvenanceExtractor getProvenanceExtractor() {
|
93
|
return this.provenanceExtractor;
|
94
|
}
|
95
|
|
96
|
public void setProvenanceExtractor(final ProvenanceExtractor provenanceExtractor) {
|
97
|
this.provenanceExtractor = provenanceExtractor;
|
98
|
}
|
99
|
}
|