Revision 42184
Added by Michele Artini about 8 years ago
RecordInfoGenerator.java | ||
---|---|---|
7 | 7 |
import java.util.List; |
8 | 8 |
import java.util.zip.ZipEntry; |
9 | 9 |
import java.util.zip.ZipInputStream; |
10 |
|
|
10 | 11 |
import javax.annotation.Resource; |
11 | 12 |
|
12 |
import com.google.common.collect.Sets; |
|
13 |
import com.mongodb.DBObject; |
|
14 |
import eu.dnetlib.data.information.oai.publisher.OaiPublisherRuntimeException; |
|
15 |
import eu.dnetlib.data.information.oai.publisher.conf.OAIConfigurationReader; |
|
16 |
import eu.dnetlib.data.information.oai.publisher.info.RecordInfo; |
|
17 | 13 |
import org.apache.commons.io.IOUtils; |
18 |
import org.apache.commons.lang.StringEscapeUtils; |
|
14 |
import org.apache.commons.lang3.StringEscapeUtils;
|
|
19 | 15 |
import org.dom4j.Document; |
20 | 16 |
import org.dom4j.DocumentException; |
21 | 17 |
import org.dom4j.io.SAXReader; |
22 | 18 |
|
19 |
import com.google.common.collect.Sets; |
|
20 |
import com.mongodb.DBObject; |
|
21 |
|
|
22 |
import eu.dnetlib.oai.conf.OAIConfigurationReader; |
|
23 |
import eu.dnetlib.oai.info.RecordInfo; |
|
24 |
import eu.dnetlib.rmi.provision.OaiPublisherRuntimeException; |
|
25 |
|
|
23 | 26 |
/** |
24 | 27 |
* Helper class to generate a RecordInfo object from a Mongo DBObject. |
25 | 28 |
* |
... | ... | |
34 | 37 |
|
35 | 38 |
@SuppressWarnings("unchecked") |
36 | 39 |
public RecordInfo transformDBObject(final DBObject object, final boolean includeBody) { |
37 |
if ((object == null) || object.keySet().isEmpty()) return null;
|
|
40 |
if ((object == null) || object.keySet().isEmpty()) { return null; }
|
|
38 | 41 |
String id = (String) object.get(OAIConfigurationReader.ID_FIELD); |
39 | 42 |
// need to escape the identifier, otherwise the XML breaks |
40 |
id = StringEscapeUtils.escapeXml(id); |
|
41 |
boolean deleted = (Boolean) object.get("deleted"); |
|
42 |
RecordInfo record = new RecordInfo(); |
|
43 |
id = StringEscapeUtils.escapeXml11(id);
|
|
44 |
final boolean deleted = (Boolean) object.get("deleted");
|
|
45 |
final RecordInfo record = new RecordInfo();
|
|
43 | 46 |
record.setIdentifier(id); |
44 | 47 |
record.setInternalId(object.get("_id").toString()); |
45 | 48 |
record.setDatestamp((Date) object.get(OAIConfigurationReader.DATESTAMP_FIELD)); |
46 | 49 |
record.setDeleted(deleted); |
47 |
List<String> sets = (List<String>) object.get(OAIConfigurationReader.SET_FIELD); |
|
50 |
final List<String> sets = (List<String>) object.get(OAIConfigurationReader.SET_FIELD);
|
|
48 | 51 |
if (sets != null) { |
49 | 52 |
record.setSetspecs(Sets.newHashSet(sets)); |
50 | 53 |
} |
51 | 54 |
if (includeBody && !deleted) { |
52 |
String body = decompressRecord((byte[]) object.get(OAIConfigurationReader.BODY_FIELD)); |
|
55 |
final String body = decompressRecord((byte[]) object.get(OAIConfigurationReader.BODY_FIELD));
|
|
53 | 56 |
final SAXReader reader = new SAXReader(); |
54 | 57 |
Document doc; |
55 | 58 |
try { |
56 | 59 |
doc = reader.read(new StringReader(body)); |
57 |
record.setMetadata(this.metadataExtractor.evaluate(doc));
|
|
58 |
record.setProvenance(this.provenanceExtractor.evaluate(doc));
|
|
59 |
} catch (DocumentException e) { |
|
60 |
record.setMetadata(this.metadataExtractor.apply(doc));
|
|
61 |
record.setProvenance(this.provenanceExtractor.apply(doc));
|
|
62 |
} catch (final DocumentException e) {
|
|
60 | 63 |
throw new OaiPublisherRuntimeException(e); |
61 | 64 |
} |
62 | 65 |
} |
... | ... | |
67 | 70 |
public String decompressRecord(final byte[] input) { |
68 | 71 |
|
69 | 72 |
try { |
70 |
ByteArrayInputStream bis = new ByteArrayInputStream(input); |
|
71 |
ZipInputStream zis = new ZipInputStream(bis); |
|
73 |
final ByteArrayInputStream bis = new ByteArrayInputStream(input);
|
|
74 |
final ZipInputStream zis = new ZipInputStream(bis);
|
|
72 | 75 |
ZipEntry ze; |
73 | 76 |
ze = zis.getNextEntry(); |
74 |
if (ze == null) |
|
75 |
throw new OaiPublisherRuntimeException("cannot decompress null zip entry "); |
|
76 |
if (!ze.getName().equals(OAIConfigurationReader.BODY_FIELD)) |
|
77 |
throw new OaiPublisherRuntimeException("cannot decompress zip entry name :" + ze.getName()); |
|
77 |
if (ze == null) { throw new OaiPublisherRuntimeException("cannot decompress null zip entry "); } |
|
78 |
if (!ze.getName() |
|
79 |
.equals(OAIConfigurationReader.BODY_FIELD)) { throw new OaiPublisherRuntimeException("cannot decompress zip entry name :" + ze.getName()); } |
|
78 | 80 |
return IOUtils.toString(zis); |
79 |
} catch (IOException e) { |
|
81 |
} catch (final IOException e) {
|
|
80 | 82 |
throw new OaiPublisherRuntimeException(e); |
81 | 83 |
} |
82 | 84 |
|
83 | 85 |
} |
84 | 86 |
|
85 | 87 |
public MetadataExtractor getMetadataExtractor() { |
86 |
return metadataExtractor; |
|
88 |
return this.metadataExtractor;
|
|
87 | 89 |
} |
88 | 90 |
|
89 | 91 |
public void setMetadataExtractor(final MetadataExtractor metadataExtractor) { |
... | ... | |
91 | 93 |
} |
92 | 94 |
|
93 | 95 |
public ProvenanceExtractor getProvenanceExtractor() { |
94 |
return provenanceExtractor; |
|
96 |
return this.provenanceExtractor;
|
|
95 | 97 |
} |
96 | 98 |
|
97 | 99 |
public void setProvenanceExtractor(final ProvenanceExtractor provenanceExtractor) { |
Also available in: Unified diff
oai import