Project

General

Profile

1
package eu.dnetlib.oai.mongo;
2

    
3
import java.io.ByteArrayInputStream;
4
import java.io.IOException;
5
import java.io.StringReader;
6
import java.util.Date;
7
import java.util.List;
8
import java.util.zip.ZipEntry;
9
import java.util.zip.ZipInputStream;
10

    
11
import javax.annotation.Resource;
12

    
13
import org.apache.commons.io.IOUtils;
14
import org.apache.commons.lang3.StringEscapeUtils;
15
import org.dom4j.Document;
16
import org.dom4j.DocumentException;
17
import org.dom4j.io.SAXReader;
18

    
19
import com.google.common.collect.Sets;
20
import com.mongodb.DBObject;
21

    
22
import eu.dnetlib.oai.conf.OAIConfigurationReader;
23
import eu.dnetlib.oai.info.RecordInfo;
24
import eu.dnetlib.rmi.provision.OaiPublisherRuntimeException;
25

    
26
/**
27
 * Helper class to generate a RecordInfo object from a Mongo DBObject.
28
 *
29
 * @author alessia
30
 */
31
public class RecordInfoGenerator {
32

    
33
	@Resource
34
	private MetadataExtractor metadataExtractor;
35
	@Resource
36
	private ProvenanceExtractor provenanceExtractor;
37

    
38
	@SuppressWarnings("unchecked")
39
	public RecordInfo transformDBObject(final DBObject object, final boolean includeBody) {
40
		if ((object == null) || object.keySet().isEmpty()) { return null; }
41
		String id = (String) object.get(OAIConfigurationReader.ID_FIELD);
42
		// need to escape the identifier, otherwise the XML breaks
43
		id = StringEscapeUtils.escapeXml11(id);
44
		final boolean deleted = (Boolean) object.get("deleted");
45
		final RecordInfo record = new RecordInfo();
46
		record.setIdentifier(id);
47
		record.setInternalId(object.get("_id").toString());
48
		record.setDatestamp((Date) object.get(OAIConfigurationReader.DATESTAMP_FIELD));
49
		record.setDeleted(deleted);
50
		final List<String> sets = (List<String>) object.get(OAIConfigurationReader.SET_FIELD);
51
		if (sets != null) {
52
			record.setSetspecs(Sets.newHashSet(sets));
53
		}
54
		if (includeBody && !deleted) {
55
			final String body = decompressRecord((byte[]) object.get(OAIConfigurationReader.BODY_FIELD));
56
			final SAXReader reader = new SAXReader();
57
			Document doc;
58
			try {
59
				doc = reader.read(new StringReader(body));
60
				record.setMetadata(this.metadataExtractor.apply(doc));
61
				record.setProvenance(this.provenanceExtractor.apply(doc));
62
			} catch (final DocumentException e) {
63
				throw new OaiPublisherRuntimeException(e);
64
			}
65
		}
66
		return record;
67

    
68
	}
69

    
70
	public String decompressRecord(final byte[] input) {
71

    
72
		try {
73
			final ByteArrayInputStream bis = new ByteArrayInputStream(input);
74
			final ZipInputStream zis = new ZipInputStream(bis);
75
			ZipEntry ze;
76
			ze = zis.getNextEntry();
77
			if (ze == null) { throw new OaiPublisherRuntimeException("cannot decompress null zip entry "); }
78
			if (!ze.getName()
79
					.equals(OAIConfigurationReader.BODY_FIELD)) { throw new OaiPublisherRuntimeException("cannot decompress zip entry name :" + ze.getName()); }
80
			return IOUtils.toString(zis);
81
		} catch (final IOException e) {
82
			throw new OaiPublisherRuntimeException(e);
83
		}
84

    
85
	}
86

    
87
	public MetadataExtractor getMetadataExtractor() {
88
		return this.metadataExtractor;
89
	}
90

    
91
	public void setMetadataExtractor(final MetadataExtractor metadataExtractor) {
92
		this.metadataExtractor = metadataExtractor;
93
	}
94

    
95
	public ProvenanceExtractor getProvenanceExtractor() {
96
		return this.provenanceExtractor;
97
	}
98

    
99
	public void setProvenanceExtractor(final ProvenanceExtractor provenanceExtractor) {
100
		this.provenanceExtractor = provenanceExtractor;
101
	}
102
}
(6-6/6)