Project

General

Profile

1
package eu.dnetlib.oai.mongo;
2

    
3
import java.io.ByteArrayInputStream;
4
import java.io.IOException;
5
import java.io.StringReader;
6
import java.util.Date;
7
import java.util.List;
8
import java.util.zip.ZipEntry;
9
import java.util.zip.ZipInputStream;
10
import javax.annotation.Resource;
11

    
12
import com.google.common.collect.Sets;
13
import com.mongodb.DBObject;
14
import eu.dnetlib.data.information.oai.publisher.OaiPublisherRuntimeException;
15
import eu.dnetlib.data.information.oai.publisher.conf.OAIConfigurationReader;
16
import eu.dnetlib.data.information.oai.publisher.info.RecordInfo;
17
import org.apache.commons.io.IOUtils;
18
import org.apache.commons.lang.StringEscapeUtils;
19
import org.dom4j.Document;
20
import org.dom4j.DocumentException;
21
import org.dom4j.io.SAXReader;
22

    
23
/**
24
 * Helper class to generate a RecordInfo object from a Mongo DBObject.
25
 *
26
 * @author alessia
27
 */
28
public class RecordInfoGenerator {
29

    
30
	@Resource
31
	private MetadataExtractor metadataExtractor;
32
	@Resource
33
	private ProvenanceExtractor provenanceExtractor;
34

    
35
	@SuppressWarnings("unchecked")
36
	public RecordInfo transformDBObject(final DBObject object, final boolean includeBody) {
37
		if ((object == null) || object.keySet().isEmpty()) return null;
38
		String id = (String) object.get(OAIConfigurationReader.ID_FIELD);
39
		// need to escape the identifier, otherwise the XML breaks
40
		id = StringEscapeUtils.escapeXml(id);
41
		boolean deleted = (Boolean) object.get("deleted");
42
		RecordInfo record = new RecordInfo();
43
		record.setIdentifier(id);
44
		record.setInternalId(object.get("_id").toString());
45
		record.setDatestamp((Date) object.get(OAIConfigurationReader.DATESTAMP_FIELD));
46
		record.setDeleted(deleted);
47
		List<String> sets = (List<String>) object.get(OAIConfigurationReader.SET_FIELD);
48
		if (sets != null) {
49
			record.setSetspecs(Sets.newHashSet(sets));
50
		}
51
		if (includeBody && !deleted) {
52
			String body = decompressRecord((byte[]) object.get(OAIConfigurationReader.BODY_FIELD));
53
			final SAXReader reader = new SAXReader();
54
			Document doc;
55
			try {
56
				doc = reader.read(new StringReader(body));
57
				record.setMetadata(this.metadataExtractor.evaluate(doc));
58
				record.setProvenance(this.provenanceExtractor.evaluate(doc));
59
			} catch (DocumentException e) {
60
				throw new OaiPublisherRuntimeException(e);
61
			}
62
		}
63
		return record;
64

    
65
	}
66

    
67
	public String decompressRecord(final byte[] input) {
68

    
69
		try {
70
			ByteArrayInputStream bis = new ByteArrayInputStream(input);
71
			ZipInputStream zis = new ZipInputStream(bis);
72
			ZipEntry ze;
73
			ze = zis.getNextEntry();
74
			if (ze == null)
75
				throw new OaiPublisherRuntimeException("cannot decompress null zip entry ");
76
			if (!ze.getName().equals(OAIConfigurationReader.BODY_FIELD))
77
				throw new OaiPublisherRuntimeException("cannot decompress zip entry name :" + ze.getName());
78
			return IOUtils.toString(zis);
79
		} catch (IOException e) {
80
			throw new OaiPublisherRuntimeException(e);
81
		}
82

    
83
	}
84

    
85
	public MetadataExtractor getMetadataExtractor() {
86
		return metadataExtractor;
87
	}
88

    
89
	public void setMetadataExtractor(final MetadataExtractor metadataExtractor) {
90
		this.metadataExtractor = metadataExtractor;
91
	}
92

    
93
	public ProvenanceExtractor getProvenanceExtractor() {
94
		return provenanceExtractor;
95
	}
96

    
97
	public void setProvenanceExtractor(final ProvenanceExtractor provenanceExtractor) {
98
		this.provenanceExtractor = provenanceExtractor;
99
	}
100
}
(6-6/6)