Project

General

Profile

1
package eu.dnetlib.oai.mongo;
2

    
3
import java.io.ByteArrayInputStream;
4
import java.io.IOException;
5
import java.io.StringReader;
6
import java.util.Date;
7
import java.util.List;
8
import java.util.zip.ZipEntry;
9
import java.util.zip.ZipInputStream;
10

    
11
import com.google.common.collect.Sets;
12
import com.mongodb.DBObject;
13
import eu.dnetlib.oai.conf.OAIConfigurationReader;
14
import eu.dnetlib.oai.info.RecordInfo;
15
import eu.dnetlib.rmi.provision.OaiPublisherRuntimeException;
16
import org.apache.commons.io.IOUtils;
17
import org.apache.commons.lang3.StringEscapeUtils;
18
import org.dom4j.Document;
19
import org.dom4j.DocumentException;
20
import org.dom4j.io.SAXReader;
21
import org.springframework.beans.factory.annotation.Autowired;
22

    
23
/**
24
 * Helper class to generate a RecordInfo object from a Mongo DBObject.
25
 *
26
 * @author alessia
27
 */
28
public class RecordInfoGenerator {
29

    
30
	@Autowired
31
	private MetadataExtractor metadataExtractor;
32
	@Autowired
33
	private ProvenanceExtractor provenanceExtractor;
34

    
35
	@SuppressWarnings("unchecked")
36
	public RecordInfo transformDBObject(final DBObject object, final boolean includeBody) {
37
		if ((object == null) || object.keySet().isEmpty()) { return null; }
38
		String id = (String) object.get(OAIConfigurationReader.ID_FIELD);
39
		// need to escape the identifier, otherwise the XML breaks
40
		id = StringEscapeUtils.escapeXml11(id);
41
		final boolean deleted = (Boolean) object.get("deleted");
42
		final RecordInfo record = new RecordInfo();
43
		record.setIdentifier(id);
44
		record.setInternalId(object.get("_id").toString());
45
		record.setDatestamp((Date) object.get(OAIConfigurationReader.DATESTAMP_FIELD));
46
		record.setDeleted(deleted);
47
		final List<String> sets = (List<String>) object.get(OAIConfigurationReader.SET_FIELD);
48
		if (sets != null) {
49
			record.setSetspecs(Sets.newHashSet(sets));
50
		}
51
		if (includeBody && !deleted) {
52
			final String body = decompressRecord((byte[]) object.get(OAIConfigurationReader.BODY_FIELD));
53
			final SAXReader reader = new SAXReader();
54
			Document doc;
55
			try {
56
				doc = reader.read(new StringReader(body));
57
				record.setMetadata(this.metadataExtractor.apply(doc));
58
				record.setProvenance(this.provenanceExtractor.apply(doc));
59
			} catch (final DocumentException e) {
60
				throw new OaiPublisherRuntimeException(e);
61
			}
62
		}
63
		return record;
64

    
65
	}
66

    
67
	public String decompressRecord(final byte[] input) {
68

    
69
		try {
70
			final ByteArrayInputStream bis = new ByteArrayInputStream(input);
71
			final ZipInputStream zis = new ZipInputStream(bis);
72
			ZipEntry ze;
73
			ze = zis.getNextEntry();
74
			if (ze == null) { throw new OaiPublisherRuntimeException("cannot decompress null zip entry "); }
75
			if (!ze.getName()
76
					.equals(OAIConfigurationReader.BODY_FIELD)) { throw new OaiPublisherRuntimeException("cannot decompress zip entry name :" + ze.getName()); }
77
			return IOUtils.toString(zis);
78
		} catch (final IOException e) {
79
			throw new OaiPublisherRuntimeException(e);
80
		}
81

    
82
	}
83

    
84
	public MetadataExtractor getMetadataExtractor() {
85
		return this.metadataExtractor;
86
	}
87

    
88
	public void setMetadataExtractor(final MetadataExtractor metadataExtractor) {
89
		this.metadataExtractor = metadataExtractor;
90
	}
91

    
92
	public ProvenanceExtractor getProvenanceExtractor() {
93
		return this.provenanceExtractor;
94
	}
95

    
96
	public void setProvenanceExtractor(final ProvenanceExtractor provenanceExtractor) {
97
		this.provenanceExtractor = provenanceExtractor;
98
	}
99
}
(6-6/6)