Revision 42181
Added by Sandro La Bruzzo almost 8 years ago
modules/dnet-core-components/trunk/src/main/java/eu/dnetlib/rmi/provision/OaiPublisherException.java | ||
---|---|---|
1 |
package eu.dnetlib.rmi.provision; |
|
2 |
|
|
3 |
import javax.xml.ws.WebFault; |
|
4 |
|
|
5 |
import eu.dnetlib.rmi.common.RMIException; |
|
6 |
|
|
7 |
/** |
|
8 |
* OAI publisher specific exception. |
|
9 |
* |
|
10 |
* @author michele |
|
11 |
*/ |
|
12 |
@WebFault |
|
13 |
public class OaiPublisherException extends RMIException { |
|
14 |
|
|
15 |
/** |
|
16 |
* serialization. |
|
17 |
*/ |
|
18 |
private static final long serialVersionUID = 6833698764790681184L; |
|
19 |
|
|
20 |
/** |
|
21 |
* Creates an exception. |
|
22 |
* |
|
23 |
* @param e exception |
|
24 |
*/ |
|
25 |
public OaiPublisherException(final Throwable e) { |
|
26 |
super(e); |
|
27 |
} |
|
28 |
|
|
29 |
/** |
|
30 |
* Creates an exception. |
|
31 |
* |
|
32 |
* @param message message |
|
33 |
* @param e exception |
|
34 |
*/ |
|
35 |
public OaiPublisherException(final String message, final Throwable e) { |
|
36 |
super(message, e); |
|
37 |
} |
|
38 |
|
|
39 |
/** |
|
40 |
* Creates an exception. |
|
41 |
* |
|
42 |
* @param message message |
|
43 |
*/ |
|
44 |
public OaiPublisherException(final String message) { |
|
45 |
super(message); |
|
46 |
} |
|
47 |
|
|
48 |
} |
modules/dnet-core-components/trunk/src/main/java/eu/dnetlib/rmi/provision/OaiPublisherRuntimeException.java | ||
---|---|---|
1 |
package eu.dnetlib.rmi.provision; |
|
2 |
|
|
3 |
public class OaiPublisherRuntimeException extends RuntimeException { |
|
4 |
|
|
5 |
/** |
|
6 |
* |
|
7 |
*/ |
|
8 |
private static final long serialVersionUID = -4295850381530160166L; |
|
9 |
|
|
10 |
public OaiPublisherRuntimeException() { |
|
11 |
super(); |
|
12 |
} |
|
13 |
|
|
14 |
public OaiPublisherRuntimeException(final Throwable e) { |
|
15 |
super(e); |
|
16 |
} |
|
17 |
|
|
18 |
public OaiPublisherRuntimeException(final String msg, final Throwable e) { |
|
19 |
super(msg, e); |
|
20 |
} |
|
21 |
|
|
22 |
public OaiPublisherRuntimeException(final String msg) { |
|
23 |
super(msg); |
|
24 |
} |
|
25 |
|
|
26 |
} |
modules/dnet-core-components/trunk/src/main/java/eu/dnetlib/rmi/provision/GroupResult.java | ||
---|---|---|
30 | 30 |
/** |
31 | 31 |
* Builds a groupResult. |
32 | 32 |
* |
33 |
* @param fieldName
|
|
34 |
* @param fieldValue
|
|
33 |
* @param name
|
|
34 |
* @param value
|
|
35 | 35 |
* @param count |
36 | 36 |
*/ |
37 | 37 |
public GroupResult(final String name, final String value, final int count) { |
modules/dnet-data-provision-services/trunk/src/main/java/eu/dnetlib/oai/RecordChangeDetector.java | ||
---|---|---|
1 |
package eu.dnetlib.oai; |
|
2 |
|
|
3 |
public interface RecordChangeDetector { |
|
4 |
|
|
5 |
/** |
|
6 |
* Checks if the two records have differences based on logics that vary on the actual implementor class. |
|
7 |
* |
|
8 |
* @param record1 first record to compare |
|
9 |
* @param record2 second record to compare |
|
10 |
* @return true if the two records differ based on the implementor's logics |
|
11 |
*/ |
|
12 |
boolean differs(final String record1, final String record2); |
|
13 |
} |
modules/dnet-data-provision-services/trunk/src/main/java/eu/dnetlib/oai/mongo/DNetOAIMongoCursor.java | ||
---|---|---|
1 |
package eu.dnetlib.oai.mongo; |
|
2 |
|
|
3 |
import java.util.Iterator; |
|
4 |
|
|
5 |
import com.google.common.collect.Lists; |
|
6 |
import com.mongodb.DBObject; |
|
7 |
import com.mongodb.client.MongoCursor; |
|
8 |
import eu.dnetlib.data.information.oai.publisher.info.RecordInfo; |
|
9 |
import eu.dnetlib.data.oai.store.Cursor; |
|
10 |
import eu.dnetlib.miscutils.functional.UnaryFunction; |
|
11 |
|
|
12 |
public class DNetOAIMongoCursor implements Cursor { |
|
13 |
|
|
14 |
/** |
|
15 |
* Underlying mongo cursor. |
|
16 |
*/ |
|
17 |
private MongoCursor<DBObject> dbCursor; |
|
18 |
private int size = 0; |
|
19 |
/** |
|
20 |
* Function to apply to records before delivering. |
|
21 |
*/ |
|
22 |
private UnaryFunction<String, String> function; |
|
23 |
|
|
24 |
/** |
|
25 |
* true if the RecordInfo returned by this Cursor must include the record body, false otherwise. |
|
26 |
*/ |
|
27 |
private boolean bodyIncluded; |
|
28 |
|
|
29 |
private RecordInfoGenerator recordInfoGenerator; |
|
30 |
private MetadataExtractor metadataExtractor; |
|
31 |
private ProvenanceExtractor provenanceExtractor; |
|
32 |
|
|
33 |
public DNetOAIMongoCursor() { |
|
34 |
super(); |
|
35 |
} |
|
36 |
|
|
37 |
public DNetOAIMongoCursor(final MongoCursor<DBObject> dbCursor, final boolean bodyIncluded, final RecordInfoGenerator recordInfoGenerator, |
|
38 |
final MetadataExtractor metadataExtractor) { |
|
39 |
this(dbCursor, null, bodyIncluded, recordInfoGenerator, metadataExtractor); |
|
40 |
} |
|
41 |
|
|
42 |
public DNetOAIMongoCursor(final MongoCursor<DBObject> dbCursor, final UnaryFunction<String, String> function, final boolean bodyIncluded, |
|
43 |
final RecordInfoGenerator recordInfoGenerator, final MetadataExtractor metadataExtractor) { |
|
44 |
super(); |
|
45 |
this.dbCursor = dbCursor; |
|
46 |
this.size = 0; |
|
47 |
this.function = function; |
|
48 |
this.bodyIncluded = bodyIncluded; |
|
49 |
this.recordInfoGenerator = recordInfoGenerator; |
|
50 |
this.metadataExtractor = metadataExtractor; |
|
51 |
} |
|
52 |
|
|
53 |
/** |
|
54 |
* {@inheritDoc} |
|
55 |
*/ |
|
56 |
@Override |
|
57 |
public int count() { |
|
58 |
//I can do it because MongoCursor are always created from queries with "limit", so I do not expect the creation of the list to explode |
|
59 |
//to not exagerate, I'll get the size only if the current size is 0 |
|
60 |
if (size == 0) |
|
61 |
size = Lists.newArrayList(dbCursor).size(); |
|
62 |
return size; |
|
63 |
} |
|
64 |
|
|
65 |
/** |
|
66 |
* {@inheritDoc} |
|
67 |
* |
|
68 |
* @see Iterable#iterator() |
|
69 |
*/ |
|
70 |
@Override |
|
71 |
public Iterator<RecordInfo> iterator() { |
|
72 |
|
|
73 |
return new Iterator<RecordInfo>() { |
|
74 |
|
|
75 |
@Override |
|
76 |
public boolean hasNext() { |
|
77 |
return dbCursor.hasNext(); |
|
78 |
} |
|
79 |
|
|
80 |
@Override |
|
81 |
public RecordInfo next() { |
|
82 |
DBObject res = dbCursor.next(); |
|
83 |
RecordInfo info = recordInfoGenerator.transformDBObject(res, bodyIncluded); |
|
84 |
if ((function != null) && bodyIncluded && (info != null)) { |
|
85 |
info.setMetadata(function.evaluate(info.getMetadata())); |
|
86 |
} |
|
87 |
return info; |
|
88 |
} |
|
89 |
|
|
90 |
@Override |
|
91 |
public void remove() { |
|
92 |
throw new UnsupportedOperationException(); |
|
93 |
} |
|
94 |
|
|
95 |
}; |
|
96 |
} |
|
97 |
|
|
98 |
public UnaryFunction<String, String> getFunction() { |
|
99 |
return function; |
|
100 |
} |
|
101 |
|
|
102 |
public void setFunction(final UnaryFunction<String, String> function) { |
|
103 |
this.function = function; |
|
104 |
} |
|
105 |
|
|
106 |
public MongoCursor<DBObject> getDbCursor() { |
|
107 |
return dbCursor; |
|
108 |
} |
|
109 |
|
|
110 |
public void setDbCursor(final MongoCursor<DBObject> dbCursor) { |
|
111 |
this.dbCursor = dbCursor; |
|
112 |
} |
|
113 |
|
|
114 |
@Override |
|
115 |
public boolean isBodyIncluded() { |
|
116 |
return this.bodyIncluded; |
|
117 |
} |
|
118 |
|
|
119 |
@Override |
|
120 |
public void setBodyIncluded(final boolean bodyIncluded) { |
|
121 |
this.bodyIncluded = bodyIncluded; |
|
122 |
} |
|
123 |
|
|
124 |
public RecordInfoGenerator getRecordInfoGenerator() { |
|
125 |
return recordInfoGenerator; |
|
126 |
} |
|
127 |
|
|
128 |
public void setRecordInfoGenerator(final RecordInfoGenerator recordInfoGenerator) { |
|
129 |
this.recordInfoGenerator = recordInfoGenerator; |
|
130 |
} |
|
131 |
|
|
132 |
public MetadataExtractor getMetadataExtractor() { |
|
133 |
return metadataExtractor; |
|
134 |
} |
|
135 |
|
|
136 |
public void setMetadataExtractor(final MetadataExtractor metadataExtractor) { |
|
137 |
this.metadataExtractor = metadataExtractor; |
|
138 |
} |
|
139 |
|
|
140 |
public ProvenanceExtractor getProvenanceExtractor() { |
|
141 |
return provenanceExtractor; |
|
142 |
} |
|
143 |
|
|
144 |
public void setProvenanceExtractor(final ProvenanceExtractor provenanceExtractor) { |
|
145 |
this.provenanceExtractor = provenanceExtractor; |
|
146 |
} |
|
147 |
|
|
148 |
} |
modules/dnet-data-provision-services/trunk/src/main/java/eu/dnetlib/oai/actions/RefreshConfigAction.java | ||
---|---|---|
1 |
package eu.dnetlib.oai.actions; |
|
2 |
|
|
3 |
import eu.dnetlib.data.oai.store.conf.OAISetHelper; |
|
4 |
import eu.dnetlib.enabling.tools.blackboard.BlackboardJob; |
|
5 |
import eu.dnetlib.enabling.tools.blackboard.BlackboardServerHandler; |
|
6 |
import org.springframework.beans.factory.annotation.Autowired; |
|
7 |
|
|
8 |
public class RefreshConfigAction extends AbstractOAIStoreAction { |
|
9 |
|
|
10 |
@Autowired |
|
11 |
private OAISetHelper oaiSetHelper; |
|
12 |
|
|
13 |
@Override |
|
14 |
public void execute(final BlackboardServerHandler handler, final BlackboardJob job) throws Exception { |
|
15 |
String dbName = job.getParameters().get("oai_dbName"); |
|
16 |
oaiSetHelper.loadConfiguration(dbName); |
|
17 |
handler.done(job); |
|
18 |
} |
|
19 |
|
|
20 |
public OAISetHelper getOaiSetHelper() { |
|
21 |
return oaiSetHelper; |
|
22 |
} |
|
23 |
|
|
24 |
public void setOaiSetHelper(final OAISetHelper oaiSetHelper) { |
|
25 |
this.oaiSetHelper = oaiSetHelper; |
|
26 |
} |
|
27 |
|
|
28 |
} |
modules/dnet-data-provision-services/trunk/src/main/java/eu/dnetlib/oai/actions/OAIStoreActions.java | ||
---|---|---|
1 |
package eu.dnetlib.oai.actions; |
|
2 |
|
|
3 |
public enum OAIStoreActions { |
|
4 |
SYNC, COUNT_SETS, REFRESH_CONFIG, ENSURE_INDEXES, CREATE_STORE, CREATE_OAI_INDEX, DROP_STORE |
|
5 |
} |
modules/dnet-data-provision-services/trunk/src/main/java/eu/dnetlib/oai/mongo/RecordInfoGenerator.java | ||
---|---|---|
1 |
package eu.dnetlib.oai.mongo; |
|
2 |
|
|
3 |
import java.io.ByteArrayInputStream; |
|
4 |
import java.io.IOException; |
|
5 |
import java.io.StringReader; |
|
6 |
import java.util.Date; |
|
7 |
import java.util.List; |
|
8 |
import java.util.zip.ZipEntry; |
|
9 |
import java.util.zip.ZipInputStream; |
|
10 |
import javax.annotation.Resource; |
|
11 |
|
|
12 |
import com.google.common.collect.Sets; |
|
13 |
import com.mongodb.DBObject; |
|
14 |
import eu.dnetlib.data.information.oai.publisher.OaiPublisherRuntimeException; |
|
15 |
import eu.dnetlib.data.information.oai.publisher.conf.OAIConfigurationReader; |
|
16 |
import eu.dnetlib.data.information.oai.publisher.info.RecordInfo; |
|
17 |
import org.apache.commons.io.IOUtils; |
|
18 |
import org.apache.commons.lang.StringEscapeUtils; |
|
19 |
import org.dom4j.Document; |
|
20 |
import org.dom4j.DocumentException; |
|
21 |
import org.dom4j.io.SAXReader; |
|
22 |
|
|
23 |
/** |
|
24 |
* Helper class to generate a RecordInfo object from a Mongo DBObject. |
|
25 |
* |
|
26 |
* @author alessia |
|
27 |
*/ |
|
28 |
public class RecordInfoGenerator { |
|
29 |
|
|
30 |
@Resource |
|
31 |
private MetadataExtractor metadataExtractor; |
|
32 |
@Resource |
|
33 |
private ProvenanceExtractor provenanceExtractor; |
|
34 |
|
|
35 |
@SuppressWarnings("unchecked") |
|
36 |
public RecordInfo transformDBObject(final DBObject object, final boolean includeBody) { |
|
37 |
if ((object == null) || object.keySet().isEmpty()) return null; |
|
38 |
String id = (String) object.get(OAIConfigurationReader.ID_FIELD); |
|
39 |
// need to escape the identifier, otherwise the XML breaks |
|
40 |
id = StringEscapeUtils.escapeXml(id); |
|
41 |
boolean deleted = (Boolean) object.get("deleted"); |
|
42 |
RecordInfo record = new RecordInfo(); |
|
43 |
record.setIdentifier(id); |
|
44 |
record.setInternalId(object.get("_id").toString()); |
|
45 |
record.setDatestamp((Date) object.get(OAIConfigurationReader.DATESTAMP_FIELD)); |
|
46 |
record.setDeleted(deleted); |
|
47 |
List<String> sets = (List<String>) object.get(OAIConfigurationReader.SET_FIELD); |
|
48 |
if (sets != null) { |
|
49 |
record.setSetspecs(Sets.newHashSet(sets)); |
|
50 |
} |
|
51 |
if (includeBody && !deleted) { |
|
52 |
String body = decompressRecord((byte[]) object.get(OAIConfigurationReader.BODY_FIELD)); |
|
53 |
final SAXReader reader = new SAXReader(); |
|
54 |
Document doc; |
|
55 |
try { |
|
56 |
doc = reader.read(new StringReader(body)); |
|
57 |
record.setMetadata(this.metadataExtractor.evaluate(doc)); |
|
58 |
record.setProvenance(this.provenanceExtractor.evaluate(doc)); |
|
59 |
} catch (DocumentException e) { |
|
60 |
throw new OaiPublisherRuntimeException(e); |
|
61 |
} |
|
62 |
} |
|
63 |
return record; |
|
64 |
|
|
65 |
} |
|
66 |
|
|
67 |
public String decompressRecord(final byte[] input) { |
|
68 |
|
|
69 |
try { |
|
70 |
ByteArrayInputStream bis = new ByteArrayInputStream(input); |
|
71 |
ZipInputStream zis = new ZipInputStream(bis); |
|
72 |
ZipEntry ze; |
|
73 |
ze = zis.getNextEntry(); |
|
74 |
if (ze == null) |
|
75 |
throw new OaiPublisherRuntimeException("cannot decompress null zip entry "); |
|
76 |
if (!ze.getName().equals(OAIConfigurationReader.BODY_FIELD)) |
|
77 |
throw new OaiPublisherRuntimeException("cannot decompress zip entry name :" + ze.getName()); |
|
78 |
return IOUtils.toString(zis); |
|
79 |
} catch (IOException e) { |
|
80 |
throw new OaiPublisherRuntimeException(e); |
|
81 |
} |
|
82 |
|
|
83 |
} |
|
84 |
|
|
85 |
public MetadataExtractor getMetadataExtractor() { |
|
86 |
return metadataExtractor; |
|
87 |
} |
|
88 |
|
|
89 |
public void setMetadataExtractor(final MetadataExtractor metadataExtractor) { |
|
90 |
this.metadataExtractor = metadataExtractor; |
|
91 |
} |
|
92 |
|
|
93 |
public ProvenanceExtractor getProvenanceExtractor() { |
|
94 |
return provenanceExtractor; |
|
95 |
} |
|
96 |
|
|
97 |
public void setProvenanceExtractor(final ProvenanceExtractor provenanceExtractor) { |
|
98 |
this.provenanceExtractor = provenanceExtractor; |
|
99 |
} |
|
100 |
} |
modules/dnet-data-provision-services/trunk/src/main/java/eu/dnetlib/oai/actions/CreateStoreAction.java | ||
---|---|---|
1 |
package eu.dnetlib.oai.actions; |
|
2 |
|
|
3 |
import javax.annotation.Resource; |
|
4 |
|
|
5 |
import eu.dnetlib.data.oai.store.mongo.MongoPublisherStore; |
|
6 |
import eu.dnetlib.data.oai.store.mongo.MongoPublisherStoreDAO; |
|
7 |
import eu.dnetlib.enabling.tools.blackboard.BlackboardJob; |
|
8 |
import eu.dnetlib.enabling.tools.blackboard.BlackboardServerHandler; |
|
9 |
import org.apache.commons.logging.Log; |
|
10 |
import org.apache.commons.logging.LogFactory; |
|
11 |
|
|
12 |
public class CreateStoreAction extends AbstractOAIStoreAction { |
|
13 |
|
|
14 |
private static final Log log = LogFactory.getLog(CreateStoreAction.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
15 |
|
|
16 |
@Resource |
|
17 |
private MongoPublisherStoreDAO mongoPublisherStoreDAO; |
|
18 |
|
|
19 |
@Override |
|
20 |
public void execute(final BlackboardServerHandler handler, final BlackboardJob job) throws Exception { |
|
21 |
String mdformat = job.getParameters().get("format"); |
|
22 |
String layout = job.getParameters().get("layout"); |
|
23 |
String interp = job.getParameters().get("interpretation"); |
|
24 |
String dbName = job.getParameters().get("oai_dbName"); |
|
25 |
if (this.mongoPublisherStoreDAO.getStore(mdformat, interp, layout, dbName) == null) { |
|
26 |
MongoPublisherStore store = this.mongoPublisherStoreDAO.createStore(mdformat, interp, layout, dbName); |
|
27 |
log.info("Created store with id: " + store.getId() + "on db " + dbName); |
|
28 |
} else { |
|
29 |
log.info("Store already exists for format=" + mdformat + " layout=" + layout + " interpretation=" + interp + " on db " + dbName); |
|
30 |
} |
|
31 |
handler.done(job); |
|
32 |
} |
|
33 |
|
|
34 |
} |
modules/dnet-data-provision-services/trunk/src/main/java/eu/dnetlib/oai/OAIProperties.java | ||
---|---|---|
1 |
package eu.dnetlib.oai; |
|
2 |
|
|
3 |
import eu.dnetlib.data.information.oai.publisher.OAIController.DELETED_SUPPORT; |
|
4 |
|
|
5 |
public class OAIProperties { |
|
6 |
|
|
7 |
/** |
|
8 |
* forwarded url header name, default "X-Forwarded-Url". |
|
9 |
*/ |
|
10 |
private String forwardedUrlHeaderName = "X-Forwarded-Url"; |
|
11 |
|
|
12 |
/** |
|
13 |
* optional base url. If present it overrides the X-Forwarded-Url. |
|
14 |
*/ |
|
15 |
private String baseUrl; |
|
16 |
private String repoName = "Driver Service for supporting Open Archive Initiative requests"; |
|
17 |
private String repoEmail = "artini@isti.cnr.it"; |
|
18 |
private String earliestDatestamp = "1970-01-01"; |
|
19 |
private DELETED_SUPPORT deletedRecordSupport; |
|
20 |
private String dateGranularity = "YYYY-MM-DD"; |
|
21 |
|
|
22 |
public String getBaseUrl() { |
|
23 |
return baseUrl; |
|
24 |
} |
|
25 |
|
|
26 |
public void setBaseUrl(String baseUrl) { |
|
27 |
this.baseUrl = baseUrl; |
|
28 |
} |
|
29 |
|
|
30 |
public String getRepoName() { |
|
31 |
return repoName; |
|
32 |
} |
|
33 |
|
|
34 |
public void setRepoName(String repoName) { |
|
35 |
this.repoName = repoName; |
|
36 |
} |
|
37 |
|
|
38 |
public String getRepoEmail() { |
|
39 |
return repoEmail; |
|
40 |
} |
|
41 |
|
|
42 |
public void setRepoEmail(String repoEmail) { |
|
43 |
this.repoEmail = repoEmail; |
|
44 |
} |
|
45 |
|
|
46 |
public String getEarliestDatestamp() { |
|
47 |
return earliestDatestamp; |
|
48 |
} |
|
49 |
|
|
50 |
public void setEarliestDatestamp(String earliestDatestamp) { |
|
51 |
this.earliestDatestamp = earliestDatestamp; |
|
52 |
} |
|
53 |
|
|
54 |
public String getDeletedRecordSupport() { |
|
55 |
return deletedRecordSupport.toString(); |
|
56 |
} |
|
57 |
|
|
58 |
public void setDeletedRecordSupport(String deletedRecordSupport) { |
|
59 |
this.deletedRecordSupport = DELETED_SUPPORT.valueOf(deletedRecordSupport.trim().toUpperCase()); |
|
60 |
} |
|
61 |
|
|
62 |
public String getDateGranularity() { |
|
63 |
return dateGranularity; |
|
64 |
} |
|
65 |
|
|
66 |
public void setDateGranularity(String dateGranularity) { |
|
67 |
this.dateGranularity = dateGranularity; |
|
68 |
} |
|
69 |
|
|
70 |
public String getForwardedUrlHeaderName() { |
|
71 |
return forwardedUrlHeaderName; |
|
72 |
} |
|
73 |
|
|
74 |
public void setForwardedUrlHeaderName(String forwardedUrlHeaderName) { |
|
75 |
this.forwardedUrlHeaderName = forwardedUrlHeaderName; |
|
76 |
} |
|
77 |
|
|
78 |
} |
modules/dnet-data-provision-services/trunk/src/main/java/eu/dnetlib/oai/mongo/MongoPublisherStore.java | ||
---|---|---|
1 |
package eu.dnetlib.oai.mongo; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.util.Collection; |
|
5 |
import java.util.Date; |
|
6 |
import java.util.List; |
|
7 |
import java.util.concurrent.ArrayBlockingQueue; |
|
8 |
import java.util.concurrent.BlockingQueue; |
|
9 |
import java.util.concurrent.TimeUnit; |
|
10 |
import java.util.zip.ZipEntry; |
|
11 |
import java.util.zip.ZipOutputStream; |
|
12 |
|
|
13 |
import com.google.common.base.Function; |
|
14 |
import com.google.common.base.Predicate; |
|
15 |
import com.google.common.base.Stopwatch; |
|
16 |
import com.google.common.collect.Iterables; |
|
17 |
import com.google.common.collect.Lists; |
|
18 |
import com.google.common.collect.Multimap; |
|
19 |
import com.mongodb.BasicDBObject; |
|
20 |
import com.mongodb.BasicDBObjectBuilder; |
|
21 |
import com.mongodb.DBObject; |
|
22 |
import com.mongodb.WriteConcern; |
|
23 |
import com.mongodb.client.FindIterable; |
|
24 |
import com.mongodb.client.ListIndexesIterable; |
|
25 |
import com.mongodb.client.MongoCollection; |
|
26 |
import com.mongodb.client.MongoDatabase; |
|
27 |
import com.mongodb.client.model.Filters; |
|
28 |
import com.mongodb.client.model.IndexOptions; |
|
29 |
import com.mongodb.client.model.Sorts; |
|
30 |
import com.mongodb.client.model.UpdateOptions; |
|
31 |
import com.mongodb.client.result.DeleteResult; |
|
32 |
import com.mongodb.client.result.UpdateResult; |
|
33 |
import eu.dnetlib.data.information.oai.publisher.OaiPublisherRuntimeException; |
|
34 |
import eu.dnetlib.data.information.oai.publisher.PublisherField; |
|
35 |
import eu.dnetlib.data.information.oai.publisher.conf.OAIConfigurationReader; |
|
36 |
import eu.dnetlib.data.information.oai.publisher.info.RecordInfo; |
|
37 |
import eu.dnetlib.data.information.oai.publisher.info.SetInfo; |
|
38 |
import eu.dnetlib.data.oai.store.PublisherStore; |
|
39 |
import eu.dnetlib.data.oai.store.RecordChangeDetector; |
|
40 |
import eu.dnetlib.data.oai.store.parser.MongoQueryParser; |
|
41 |
import eu.dnetlib.data.oai.store.parser.PublisherRecordParser; |
|
42 |
import eu.dnetlib.data.oai.store.sets.MongoSetCollection; |
|
43 |
import eu.dnetlib.miscutils.functional.UnaryFunction; |
|
44 |
import org.apache.commons.io.output.ByteArrayOutputStream; |
|
45 |
import org.apache.commons.lang.StringUtils; |
|
46 |
import org.apache.commons.logging.Log; |
|
47 |
import org.apache.commons.logging.LogFactory; |
|
48 |
import org.bson.conversions.Bson; |
|
49 |
import org.bson.types.Binary; |
|
50 |
|
|
51 |
public class MongoPublisherStore implements PublisherStore<DNetOAIMongoCursor> { |
|
52 |
|
|
53 |
private static final Log log = LogFactory.getLog(MongoPublisherStore.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
54 |
|
|
55 |
private String id, metadataFormat, interpretation, layout; |
|
56 |
/** |
|
57 |
* Keeps information about the fields to be created in mongo. |
|
58 |
**/ |
|
59 |
private List<PublisherField> mongoFields; |
|
60 |
|
|
61 |
private MongoCollection<DBObject> collection; |
|
62 |
private MongoCollection<DBObject> discardedCollection; |
|
63 |
|
|
64 |
private RecordInfoGenerator recordInfoGenerator; |
|
65 |
private MetadataExtractor metadataExtractor; |
|
66 |
|
|
67 |
private MongoQueryParser queryParser; |
|
68 |
|
|
69 |
private RecordChangeDetector recordChangeDetector; |
|
70 |
|
|
71 |
private MongoSetCollection mongoSetCollection; |
|
72 |
|
|
73 |
/** |
|
74 |
* Used to generate the OAI identifiers compliant to the protocol. See |
|
75 |
* http://www.openarchives.org/OAI/openarchivesprotocol.html#UniqueIdentifier. |
|
76 |
*/ |
|
77 |
private String idScheme; |
|
78 |
/** |
|
79 |
* Used to generate the OAI identifiers compliant to the protocol. See |
|
80 |
* http://www.openarchives.org/OAI/openarchivesprotocol.html#UniqueIdentifier. |
|
81 |
*/ |
|
82 |
private String idNamespace; |
|
83 |
|
|
84 |
private boolean alwaysNewRecord; |
|
85 |
|
|
86 |
public MongoPublisherStore() { |
|
87 |
super(); |
|
88 |
} |
|
89 |
|
|
90 |
public MongoPublisherStore(final String id, |
|
91 |
final String metadataFormat, |
|
92 |
final String interpretation, |
|
93 |
final String layout, |
|
94 |
final MongoCollection<DBObject> collection, |
|
95 |
final List<PublisherField> mongoFields, |
|
96 |
final MongoQueryParser queryParser, |
|
97 |
final RecordInfoGenerator recordInfoGenerator, |
|
98 |
final String idScheme, |
|
99 |
final String idNamespace, |
|
100 |
final MetadataExtractor metadataExtractor, |
|
101 |
final RecordChangeDetector recordChangeDetector, |
|
102 |
final boolean alwaysNewRecord, |
|
103 |
final MongoDatabase mongodb) { |
|
104 |
super(); |
|
105 |
this.id = id; |
|
106 |
this.metadataFormat = metadataFormat; |
|
107 |
this.interpretation = interpretation; |
|
108 |
this.layout = layout; |
|
109 |
this.collection = collection; |
|
110 |
this.discardedCollection = mongodb.getCollection("discarded-" + collection.getNamespace().getCollectionName(), DBObject.class); |
|
111 |
this.mongoFields = mongoFields; |
|
112 |
this.queryParser = queryParser; |
|
113 |
this.recordInfoGenerator = recordInfoGenerator; |
|
114 |
this.idScheme = idScheme; |
|
115 |
this.idNamespace = idNamespace; |
|
116 |
this.recordChangeDetector = recordChangeDetector; |
|
117 |
this.alwaysNewRecord = alwaysNewRecord; |
|
118 |
} |
|
119 |
|
|
120 |
@Override |
|
121 |
public RecordInfo getRecord(final String recordId) { |
|
122 |
Bson query = Filters.eq(OAIConfigurationReader.ID_FIELD, recordId); |
|
123 |
DBObject result = this.collection.find(query).first(); |
|
124 |
log.debug(result); |
|
125 |
return this.recordInfoGenerator.transformDBObject(result, true); |
|
126 |
} |
|
127 |
|
|
128 |
@Override |
|
129 |
public RecordInfo getRecord(final String recordId, final UnaryFunction<String, String> unaryFunction) { |
|
130 |
RecordInfo result = this.getRecord(recordId); |
|
131 |
if (result != null) { |
|
132 |
String transformedBody = unaryFunction.evaluate(result.getMetadata()); |
|
133 |
result.setMetadata(transformedBody); |
|
134 |
} |
|
135 |
return result; |
|
136 |
} |
|
137 |
|
|
138 |
@Override |
|
139 |
public DNetOAIMongoCursor getRecords(final String queryString, final boolean bodyIncluded, final int limit) { |
|
140 |
FindIterable<DBObject> iter = loggedFindByQuery(queryString, limit); |
|
141 |
return new DNetOAIMongoCursor(iter.iterator(), bodyIncluded, this.recordInfoGenerator, this.metadataExtractor); |
|
142 |
} |
|
143 |
|
|
144 |
@Override |
|
145 |
public DNetOAIMongoCursor getRecords(final String queryString, |
|
146 |
final UnaryFunction<String, String> unaryFunction, |
|
147 |
final boolean bodyIncluded, |
|
148 |
final int limit) { |
|
149 |
FindIterable<DBObject> iter = loggedFindByQuery(queryString, limit); |
|
150 |
return new DNetOAIMongoCursor(iter.iterator(), unaryFunction, bodyIncluded, this.recordInfoGenerator, this.metadataExtractor); |
|
151 |
} |
|
152 |
|
|
153 |
private FindIterable<DBObject> loggedFindByQuery(final String queryString, final int limit) { |
|
154 |
Bson query = this.queryParser.parse(queryString); |
|
155 |
long start = System.currentTimeMillis(); |
|
156 |
Bson sortByIdAsc = Sorts.orderBy(Sorts.ascending("_id")); |
|
157 |
FindIterable<DBObject> iter = this.collection.find(query).sort(sortByIdAsc).limit(limit); |
|
158 |
long end = System.currentTimeMillis(); |
|
159 |
log.debug("Query:" + query + "\ntime to get mongo iterable (ms): " + (end - start)); |
|
160 |
return iter; |
|
161 |
} |
|
162 |
|
|
163 |
@Override |
|
164 |
public List<PublisherField> getIndices() { |
|
165 |
return this.mongoFields; |
|
166 |
} |
|
167 |
|
|
168 |
/** |
|
169 |
* <p> |
|
170 |
* Ensure indices on the configuration-defined fields and on the system fields DATESTAMP_FIELD and LAST_COLLECTION_DATE_FIELD. |
|
171 |
* <p> |
|
172 |
* <p> |
|
173 |
* Note that by default ID_FIELD, SET_FIELD, DELETED_FIELD, BODY_FIELD, UPDATED_FIELD are not indexed. If you want an index on those, |
|
174 |
* then you have to specify it in the configuration file of the OAI Publisher: <br> |
|
175 |
* <INDEX name="deleted"> |
|
176 |
* </p> |
|
177 |
* <p> |
|
178 |
* {@inheritDoc} |
|
179 |
*/ |
|
180 |
@Override |
|
181 |
public void ensureIndices() { |
|
182 |
final ListIndexesIterable<BasicDBObject> indexesIterable = this.collection.listIndexes(BasicDBObject.class); |
|
183 |
final IndexOptions indexOptions = new IndexOptions().background(true); |
|
184 |
Stopwatch sw = Stopwatch.createUnstarted(); |
|
185 |
sw.start(); |
|
186 |
// I want to keep the composite indexes that might have been defined manually |
|
187 |
log.debug("Ensuring currently defined composite indexes:"); |
|
188 |
for (BasicDBObject o : indexesIterable) { |
|
189 |
BasicDBObject fieldIndexed = (BasicDBObject) o.get("key"); |
|
190 |
if (fieldIndexed.keySet().size() > 1) { |
|
191 |
log.debug(o); |
|
192 |
this.collection.createIndex(fieldIndexed, indexOptions); |
|
193 |
} |
|
194 |
} |
|
195 |
|
|
196 |
// Indexes on single fields. |
|
197 |
for (PublisherField field : this.mongoFields) { |
|
198 |
BasicDBObject mongoIdx = new BasicDBObject(field.getFieldName(), 1); |
|
199 |
log.debug("Creating index : " + mongoIdx); |
|
200 |
this.collection.createIndex(mongoIdx, indexOptions); |
|
201 |
} |
|
202 |
log.debug("Creating index over : " + OAIConfigurationReader.DATESTAMP_FIELD); |
|
203 |
this.collection.createIndex(new BasicDBObject(OAIConfigurationReader.DATESTAMP_FIELD, 1), indexOptions); |
|
204 |
log.debug("Creating index over : " + OAIConfigurationReader.LAST_COLLECTION_DATE_FIELD); |
|
205 |
this.collection.createIndex(new BasicDBObject(OAIConfigurationReader.LAST_COLLECTION_DATE_FIELD, 1), indexOptions); |
|
206 |
sw.stop(); |
|
207 |
log.info("All indexes have been updated in " + sw.elapsed(TimeUnit.MILLISECONDS) + " milliseconds"); |
|
208 |
} |
|
209 |
|
|
210 |
/** |
|
211 |
* Creates a compound index over the specified fields on the given store. |
|
212 |
* <p> |
|
213 |
* The creation is performed on the background |
|
214 |
* </p> |
|
215 |
* |
|
216 |
* @param fieldNames List of fields to be included in the compound index |
|
217 |
* @theStore MongoPublisherStore where to create the index |
|
218 |
*/ |
|
219 |
public void createCompoundIndex(final List<String> fieldNames) { |
|
220 |
if ((fieldNames == null) || fieldNames.isEmpty()) { |
|
221 |
log.fatal("No fields specified for the creation of the compound index"); |
|
222 |
} |
|
223 |
BasicDBObjectBuilder theIndexBuilder = BasicDBObjectBuilder.start(); |
|
224 |
for (String f : fieldNames) { |
|
225 |
theIndexBuilder.add(f, 1); |
|
226 |
} |
|
227 |
BasicDBObject theIndex = (BasicDBObject) theIndexBuilder.get(); |
|
228 |
log.info("Creating index " + theIndex + " on " + this.getId()); |
|
229 |
this.getCollection().createIndex(theIndex, new IndexOptions().background(true)); |
|
230 |
} |
|
231 |
|
|
232 |
private void dropDiscarded(final String source) { |
|
233 |
if (StringUtils.isBlank(source)) { |
|
234 |
log.debug("Dropping discarded records from publisherStore " + id); |
|
235 |
discardedCollection.drop(); |
|
236 |
} else { |
|
237 |
log.debug("Dropping discarded records for source " + source + " from publisherStore " + id); |
|
238 |
discardedCollection.deleteMany(Filters.eq(OAIConfigurationReader.SET_FIELD, source)); |
|
239 |
} |
|
240 |
} |
|
241 |
|
|
242 |
@Override |
|
243 |
public int feed(final Iterable<String> records, final String source) { |
|
244 |
final BlockingQueue<Object> queue = new ArrayBlockingQueue<Object>(80); |
|
245 |
final Object sentinel = new Object(); |
|
246 |
this.dropDiscarded(source); |
|
247 |
final Date feedDate = new Date(); |
|
248 |
Thread background = new Thread(new Runnable() { |
|
249 |
|
|
250 |
@Override |
|
251 |
public void run() { |
|
252 |
//For fast feeding we want to use a collection with unack write concern |
|
253 |
final MongoCollection<DBObject> unackCollection = collection.withWriteConcern(WriteConcern.UNACKNOWLEDGED); |
|
254 |
while (true) { |
|
255 |
try { |
|
256 |
Object record = queue.take(); |
|
257 |
if (record == sentinel) { |
|
258 |
break; |
|
259 |
} |
|
260 |
safeFeedRecord((String) record, source, feedDate, unackCollection); |
|
261 |
} catch (InterruptedException e) { |
|
262 |
log.fatal("got exception in background thread", e); |
|
263 |
throw new IllegalStateException(e); |
|
264 |
} |
|
265 |
} |
|
266 |
} |
|
267 |
}); |
|
268 |
background.start(); |
|
269 |
long startFeed = feedDate.getTime(); |
|
270 |
try { |
|
271 |
log.info("feeding publisherStore " + id); |
|
272 |
for (final String record : records) { |
|
273 |
queue.put(record); |
|
274 |
} |
|
275 |
queue.put(sentinel); |
|
276 |
log.info("finished feeding publisherStore " + id); |
|
277 |
|
|
278 |
background.join(); |
|
279 |
} catch (InterruptedException e) { |
|
280 |
throw new IllegalStateException(e); |
|
281 |
} |
|
282 |
long endFeed = System.currentTimeMillis(); |
|
283 |
log.fatal("OAI STORE " + id + " FEEDING COMPLETED IN " + (endFeed - startFeed) + "ms"); |
|
284 |
this.setDeletedFlags(feedDate, source); |
|
285 |
return this.count(); |
|
286 |
} |
|
287 |
|
|
288 |
/** |
|
289 |
* Launches the thread that flags the records to be considered as 'deleted'. |
|
290 |
* <p> |
|
291 |
* The datestamp of the deleted records must be updated as well, according to the OAI specs available at |
|
292 |
* http://www.openarchives.org/OAI/openarchivesprotocol.html#DeletedRecords: if a repository does keep track of deletions then the |
|
293 |
* datestamp of the deleted record must be the date and time that it was deleted. |
|
294 |
* </p> |
|
295 |
* |
|
296 |
* @param feedDate |
|
297 |
* @param source |
|
298 |
*/ |
|
299 |
private void setDeletedFlags(final Date feedDate, final String source) { |
|
300 |
//get the collection with ACKNOWLEDGE Write concern |
|
301 |
final MongoCollection<DBObject> ackCollection = collection.withWriteConcern(WriteConcern.ACKNOWLEDGED); |
|
302 |
Thread deletedSetter = new Thread(new Runnable() { |
|
303 |
|
|
304 |
@Override |
|
305 |
public void run() { |
|
306 |
Bson filter = Filters.and(Filters.eq(OAIConfigurationReader.DELETED_FIELD, false), |
|
307 |
Filters.lt(OAIConfigurationReader.LAST_COLLECTION_DATE_FIELD, feedDate)); |
|
308 |
if (!StringUtils.isBlank(source)) { |
|
309 |
filter = Filters.and(filter, Filters.eq(OAIConfigurationReader.SET_FIELD, source)); |
|
310 |
} |
|
311 |
log.debug("Delete flag query: " + filter); |
|
312 |
BasicDBObject update = new BasicDBObject("$set", |
|
313 |
BasicDBObjectBuilder.start(OAIConfigurationReader.DELETED_FIELD, true).append(OAIConfigurationReader.DATESTAMP_FIELD, feedDate) |
|
314 |
.append(OAIConfigurationReader.UPDATED_FIELD, true).get()); |
|
315 |
log.debug("Updating as: " + update.toString()); |
|
316 |
final UpdateResult updateResult = ackCollection.updateMany(filter, update, new UpdateOptions().upsert(false)); |
|
317 |
log.debug("Deleted flags set for source: " + source + " #records = " + updateResult.getModifiedCount()); |
|
318 |
} |
|
319 |
}); |
|
320 |
|
|
321 |
deletedSetter.start(); |
|
322 |
try { |
|
323 |
deletedSetter.join(); |
|
324 |
} catch (InterruptedException e) { |
|
325 |
throw new IllegalStateException(e); |
|
326 |
} |
|
327 |
} |
|
328 |
|
|
329 |
@Override |
|
330 |
public void drop() { |
|
331 |
this.collection.drop(); |
|
332 |
} |
|
333 |
|
|
334 |
@Override |
|
335 |
public void drop(final String queryString) { |
|
336 |
Bson query = this.queryParser.parse(queryString); |
|
337 |
final DeleteResult deleteResult = this.collection.deleteMany(query); |
|
338 |
log.debug("Deleted by query: " + queryString + " #deleted: " + deleteResult.getDeletedCount()); |
|
339 |
|
|
340 |
} |
|
341 |
|
|
342 |
@Override |
|
343 |
public int count() { |
|
344 |
return (int) this.collection.count(); |
|
345 |
} |
|
346 |
|
|
347 |
@Override |
|
348 |
public int count(final String queryString) { |
|
349 |
if (StringUtils.isBlank(queryString)) return (int) this.collection.count(); |
|
350 |
Bson query = this.queryParser.parse(queryString); |
|
351 |
return (int) this.collection.count(query); |
|
352 |
} |
|
353 |
|
|
354 |
public List<String> getDistinctSetNamesFromRecords() { |
|
355 |
log.info("Going to ask for all distinct sets in the oaistore " + id + ": this may take a long time..."); |
|
356 |
return Lists.newArrayList(this.collection.distinct(OAIConfigurationReader.SET_FIELD, String.class)); |
|
357 |
} |
|
358 |
|
|
359 |
// ***********************************************************************************************// |
|
360 |
// Feed utilities |
|
361 |
// ***********************************************************************************************// |
|
362 |
private boolean safeFeedRecord(final String record, final String source, final Date feedDate, final MongoCollection<DBObject> unackCollection) { |
|
363 |
try { |
|
364 |
if (!record.isEmpty()) return feedRecord(record, source, feedDate, unackCollection); |
|
365 |
} catch (final Throwable e) { |
|
366 |
log.error("Got unhandled exception while parsing record", e); |
|
367 |
discardedCollection.insertOne(new BasicDBObject(OAIConfigurationReader.SET_FIELD, source).append(OAIConfigurationReader.BODY_FIELD, record)); |
|
368 |
} |
|
369 |
return false; |
|
370 |
} |
|
371 |
|
|
372 |
/** |
|
373 |
* Feed the record to the store. |
|
374 |
* |
|
375 |
* @return true if the record is new, false otherwise |
|
376 |
*/ |
|
377 |
private boolean feedRecord(final String record, final String source, final Date feedDate, final MongoCollection<DBObject> unackCollection) { |
|
378 |
PublisherRecordParser parser = new PublisherRecordParser(this.mongoFields); |
|
379 |
final Multimap<String, String> recordProperties = parser.parseRecord(record); |
|
380 |
String id = ""; |
|
381 |
String oaiID = ""; |
|
382 |
if (recordProperties.containsKey(OAIConfigurationReader.ID_FIELD)) { |
|
383 |
id = recordProperties.get(OAIConfigurationReader.ID_FIELD).iterator().next(); |
|
384 |
oaiID = getOAIIdentifier(id); |
|
385 |
if (isNewRecord(oaiID)) { |
|
386 |
feedNew(oaiID, record, recordProperties, feedDate, unackCollection); |
|
387 |
return true; |
|
388 |
} else { |
|
389 |
if (isChanged(oaiID, record)) { |
|
390 |
updateRecord(oaiID, record, recordProperties, feedDate, unackCollection); |
|
391 |
} else { |
|
392 |
// it is not changed, I only have to update the last collection date |
|
393 |
handleRecord(oaiID, feedDate, unackCollection); |
|
394 |
} |
|
395 |
} |
|
396 |
} else { |
|
397 |
log.error("parsed record seems invalid -- no identifier property with name: " + OAIConfigurationReader.ID_FIELD); |
|
398 |
discardedCollection.insertOne(new BasicDBObject(OAIConfigurationReader.SET_FIELD, source).append(OAIConfigurationReader.BODY_FIELD, record).append( |
|
399 |
OAIConfigurationReader.DATESTAMP_FIELD, feedDate)); |
|
400 |
} |
|
401 |
return false; |
|
402 |
} |
|
403 |
|
|
404 |
private BasicDBObject createBasicObject(final String oaiID, final String record, final Multimap<String, String> recordProperties) { |
|
405 |
BasicDBObject obj = new BasicDBObject(); |
|
406 |
for (final String key : recordProperties.keySet()) { |
|
407 |
if (key.equals(OAIConfigurationReader.ID_FIELD)) { |
|
408 |
obj.put(key, oaiID); |
|
409 |
} else { |
|
410 |
Collection<String> values = recordProperties.get(key); |
|
411 |
if (key.equals(OAIConfigurationReader.SET_FIELD)) { |
|
412 |
|
|
413 |
Iterable<String> setSpecs = Iterables.transform(values, new Function<String, String>() { |
|
414 |
|
|
415 |
@Override |
|
416 |
public String apply(final String s) { |
|
417 |
return mongoSetCollection.normalizeSetSpec(s); |
|
418 |
} |
|
419 |
|
|
420 |
}); |
|
421 |
obj.put(key, setSpecs); |
|
422 |
} else { |
|
423 |
// let's check if the key is the name of a repeatable field or not |
|
424 |
PublisherField keyField = Iterables.find(this.mongoFields, new Predicate<PublisherField>() { |
|
425 |
|
|
426 |
@Override |
|
427 |
public boolean apply(final PublisherField field) { |
|
428 |
return field.getFieldName().equals(key); |
|
429 |
} |
|
430 |
}, null); |
|
431 |
if (keyField == null) { |
|
432 |
log.warn("Expected field to index: " + key + " could not be found, but we keep going..."); |
|
433 |
} |
|
434 |
if ((keyField != null) && !keyField.isRepeatable()) { |
|
435 |
if ((values != null) && !values.isEmpty()) { |
|
436 |
obj.put(key, values.iterator().next()); |
|
437 |
} |
|
438 |
} else { |
|
439 |
obj.put(key, values); |
|
440 |
} |
|
441 |
} |
|
442 |
} |
|
443 |
} |
|
444 |
try { |
|
445 |
obj.put(OAIConfigurationReader.BODY_FIELD, createCompressRecord(record)); |
|
446 |
obj.put(OAIConfigurationReader.DELETED_FIELD, false); |
|
447 |
return obj; |
|
448 |
} catch (IOException e) { |
|
449 |
throw new OaiPublisherRuntimeException(e); |
|
450 |
} |
|
451 |
} |
|
452 |
|
|
453 |
/** |
|
454 |
* @param record |
|
455 |
* @throws IOException |
|
456 |
*/ |
|
457 |
public Binary createCompressRecord(final String record) throws IOException { |
|
458 |
ByteArrayOutputStream os = new ByteArrayOutputStream(); |
|
459 |
ZipOutputStream zos = new ZipOutputStream(os); |
|
460 |
ZipEntry entry = new ZipEntry(OAIConfigurationReader.BODY_FIELD); |
|
461 |
zos.putNextEntry(entry); |
|
462 |
zos.write(record.getBytes()); |
|
463 |
zos.closeEntry(); |
|
464 |
zos.flush(); |
|
465 |
zos.close(); |
|
466 |
return new Binary(os.toByteArray()); |
|
467 |
} |
|
468 |
|
|
469 |
private void feedNew(final String oaiID, |
|
470 |
final String record, |
|
471 |
final Multimap<String, String> recordProperties, |
|
472 |
final Date feedDate, |
|
473 |
final MongoCollection<DBObject> unackCollection) { |
|
474 |
log.debug("New record received. Assigned oai id: " + oaiID); |
|
475 |
DBObject obj = this.createBasicObject(oaiID, record, recordProperties); |
|
476 |
obj.put(OAIConfigurationReader.LAST_COLLECTION_DATE_FIELD, feedDate); |
|
477 |
obj.put(OAIConfigurationReader.DATESTAMP_FIELD, feedDate); |
|
478 |
obj.put(OAIConfigurationReader.UPDATED_FIELD, false); |
|
479 |
unackCollection.insertOne(obj); |
|
480 |
this.upsertSets(recordProperties.get(OAIConfigurationReader.SET_FIELD)); |
|
481 |
} |
|
482 |
|
|
483 |
private void updateRecord(final String oaiID, |
|
484 |
final String record, |
|
485 |
final Multimap<String, String> recordProperties, |
|
486 |
final Date feedDate, |
|
487 |
final MongoCollection<DBObject> unackCollection) { |
|
488 |
log.debug("updating record " + oaiID); |
|
489 |
BasicDBObject obj = this.createBasicObject(oaiID, record, recordProperties); |
|
490 |
obj.put(OAIConfigurationReader.LAST_COLLECTION_DATE_FIELD, feedDate); |
|
491 |
obj.put(OAIConfigurationReader.DATESTAMP_FIELD, feedDate); |
|
492 |
obj.put(OAIConfigurationReader.UPDATED_FIELD, true); |
|
493 |
Bson oldObj = Filters.eq(OAIConfigurationReader.ID_FIELD, oaiID); |
|
494 |
unackCollection.updateOne(oldObj, obj, new UpdateOptions().upsert(true)); |
|
495 |
this.upsertSets(recordProperties.get(OAIConfigurationReader.SET_FIELD)); |
|
496 |
} |
|
497 |
|
|
498 |
public void upsertSets(final Iterable<String> setNames) { |
|
499 |
// feed the list of sets, if any |
|
500 |
if (setNames != null) { |
|
501 |
for (String setName : setNames) { |
|
502 |
if (StringUtils.isNotBlank(setName)) { |
|
503 |
final SetInfo set = new SetInfo(); |
|
504 |
String setSpec = this.mongoSetCollection.normalizeSetSpec(setName); |
|
505 |
set.setSetSpec(setSpec); |
|
506 |
set.setSetName(setName); |
|
507 |
set.setSetDescription("This set contains metadata records whose provenance is " + setName); |
|
508 |
set.setEnabled(true); |
|
509 |
String query = "(" + OAIConfigurationReader.SET_FIELD + " = \"" + setSpec + "\") "; |
|
510 |
set.setQuery(query); |
|
511 |
this.mongoSetCollection.upsertSet(set, false, getCollection().getNamespace().getDatabaseName()); |
|
512 |
} |
|
513 |
} |
|
514 |
} |
|
515 |
} |
|
516 |
|
|
517 |
private void handleRecord(final String oaiID, final Date lastCollectionDate, final MongoCollection<DBObject> unackCollection) { |
|
518 |
log.debug("handling unchanged record " + oaiID); |
|
519 |
Bson oldObj = Filters.eq(OAIConfigurationReader.ID_FIELD, oaiID); |
|
520 |
BasicDBObject update = new BasicDBObject("$set", new BasicDBObject(OAIConfigurationReader.LAST_COLLECTION_DATE_FIELD, lastCollectionDate)); |
|
521 |
unackCollection.updateOne(oldObj, update, new UpdateOptions().upsert(true)); |
|
522 |
} |
|
523 |
|
|
524 |
private boolean isNewRecord(final String oaiIdentifier) { |
|
525 |
if (alwaysNewRecord || (collection.count() == 0)) return true; |
|
526 |
return this.collection.find(Filters.eq(OAIConfigurationReader.ID_FIELD, oaiIdentifier)).first() == null; |
|
527 |
} |
|
528 |
|
|
529 |
// ***********************************************************************************************// |
|
530 |
// Setters / Getters / Basic utilities |
|
531 |
// ***********************************************************************************************// |
|
532 |
|
|
533 |
private boolean isChanged(final String oaiID, final String record) { |
|
534 |
RecordInfo oldRecord = getRecord(oaiID); |
|
535 |
if (oldRecord == null) return StringUtils.isBlank(record); |
|
536 |
return this.recordChangeDetector.differs(oldRecord.getMetadata(), record); |
|
537 |
} |
|
538 |
|
|
539 |
private String getOAIIdentifier(final String id) { |
|
540 |
return this.idScheme + ":" + this.idNamespace + ":" + id; |
|
541 |
} |
|
542 |
|
|
543 |
@Override |
|
544 |
public int hashCode() { |
|
545 |
final int prime = 31; |
|
546 |
int result = 1; |
|
547 |
result = (prime * result) + ((collection == null) ? 0 : collection.hashCode()); |
|
548 |
result = (prime * result) + ((id == null) ? 0 : id.hashCode()); |
|
549 |
result = (prime * result) + ((interpretation == null) ? 0 : interpretation.hashCode()); |
|
550 |
result = (prime * result) + ((layout == null) ? 0 : layout.hashCode()); |
|
551 |
result = (prime * result) + ((metadataFormat == null) ? 0 : metadataFormat.hashCode()); |
|
552 |
return result; |
|
553 |
} |
|
554 |
|
|
555 |
@Override |
|
556 |
public boolean equals(final Object obj) { |
|
557 |
if (this == obj) return true; |
|
558 |
if (obj == null) return false; |
|
559 |
if (!(obj instanceof MongoPublisherStore)) return false; |
|
560 |
MongoPublisherStore other = (MongoPublisherStore) obj; |
|
561 |
if (collection == null) { |
|
562 |
if (other.collection != null) return false; |
|
563 |
} else if (!collection.equals(other.collection)) return false; |
|
564 |
if (id == null) { |
|
565 |
if (other.id != null) return false; |
|
566 |
} else if (!id.equals(other.id)) return false; |
|
567 |
if (interpretation == null) { |
|
568 |
if (other.interpretation != null) return false; |
|
569 |
} else if (!interpretation.equals(other.interpretation)) return false; |
|
570 |
if (layout == null) { |
|
571 |
if (other.layout != null) return false; |
|
572 |
} else if (!layout.equals(other.layout)) return false; |
|
573 |
if (metadataFormat == null) { |
|
574 |
if (other.metadataFormat != null) return false; |
|
575 |
} else if (!metadataFormat.equals(other.metadataFormat)) return false; |
|
576 |
return true; |
|
577 |
} |
|
578 |
|
|
579 |
public MongoCollection<DBObject> getCollection() { |
|
580 |
return collection; |
|
581 |
} |
|
582 |
|
|
583 |
public void setCollection(final MongoCollection<DBObject> collection) { |
|
584 |
this.collection = collection; |
|
585 |
} |
|
586 |
|
|
587 |
public MongoQueryParser getQueryParser() { |
|
588 |
return queryParser; |
|
589 |
} |
|
590 |
|
|
591 |
public void setQueryParser(final MongoQueryParser queryParser) { |
|
592 |
this.queryParser = queryParser; |
|
593 |
} |
|
594 |
|
|
595 |
public MongoCollection<DBObject> getDiscardedCollection() { |
|
596 |
return discardedCollection; |
|
597 |
} |
|
598 |
|
|
599 |
public void setDiscardedCollection(final MongoCollection<DBObject> discardedCollection) { |
|
600 |
this.discardedCollection = discardedCollection; |
|
601 |
} |
|
602 |
|
|
603 |
public String getIdScheme() { |
|
604 |
return idScheme; |
|
605 |
} |
|
606 |
|
|
607 |
public void setIdScheme(final String idScheme) { |
|
608 |
this.idScheme = idScheme; |
|
609 |
} |
|
610 |
|
|
611 |
public String getIdNamespace() { |
|
612 |
return idNamespace; |
|
613 |
} |
|
614 |
|
|
615 |
public void setIdNamespace(final String idNamespace) { |
|
616 |
this.idNamespace = idNamespace; |
|
617 |
} |
|
618 |
|
|
619 |
public RecordInfoGenerator getRecordInfoGenerator() { |
|
620 |
return recordInfoGenerator; |
|
621 |
} |
|
622 |
|
|
623 |
public void setRecordInfoGenerator(final RecordInfoGenerator recordInfoGenerator) { |
|
624 |
this.recordInfoGenerator = recordInfoGenerator; |
|
625 |
} |
|
626 |
|
|
627 |
public MetadataExtractor getMetadataExtractor() { |
|
628 |
return metadataExtractor; |
|
629 |
} |
|
630 |
|
|
631 |
public void setMetadataExtractor(final MetadataExtractor metadataExtractor) { |
|
632 |
this.metadataExtractor = metadataExtractor; |
|
633 |
} |
|
634 |
|
|
635 |
public RecordChangeDetector getRecordChangeDetector() { |
|
636 |
return recordChangeDetector; |
|
637 |
} |
|
638 |
|
|
639 |
public void setRecordChangeDetector(final RecordChangeDetector recordChangeDetector) { |
|
640 |
this.recordChangeDetector = recordChangeDetector; |
|
641 |
} |
|
642 |
|
|
643 |
@Override |
|
644 |
public String getId() { |
|
645 |
return this.id; |
|
646 |
} |
|
647 |
|
|
648 |
public void setId(final String id) { |
|
649 |
this.id = id; |
|
650 |
} |
|
651 |
|
|
652 |
@Override |
|
653 |
public String getMetadataFormat() { |
|
654 |
return this.metadataFormat; |
|
655 |
} |
|
656 |
|
|
657 |
public void setMetadataFormat(final String metadataFormat) { |
|
658 |
this.metadataFormat = metadataFormat; |
|
659 |
} |
|
660 |
|
|
661 |
@Override |
|
662 |
public String getInterpretation() { |
|
663 |
return this.interpretation; |
|
664 |
} |
|
665 |
|
|
666 |
public void setInterpretation(final String interpretation) { |
|
667 |
this.interpretation = interpretation; |
|
668 |
} |
|
669 |
|
|
670 |
@Override |
|
671 |
public String getLayout() { |
|
672 |
return this.layout; |
|
673 |
} |
|
674 |
|
|
675 |
public void setLayout(final String layout) { |
|
676 |
this.layout = layout; |
|
677 |
} |
|
678 |
|
|
679 |
public MongoSetCollection getMongoSetCollection() { |
|
680 |
return mongoSetCollection; |
|
681 |
} |
|
682 |
|
|
683 |
public void setMongoSetCollection(final MongoSetCollection mongoSetCollection) { |
|
684 |
this.mongoSetCollection = mongoSetCollection; |
|
685 |
} |
|
686 |
|
|
687 |
public List<PublisherField> getMongoFields() { |
|
688 |
return mongoFields; |
|
689 |
} |
|
690 |
|
|
691 |
public void setMongoFields(final List<PublisherField> mongoFields) { |
|
692 |
this.mongoFields = mongoFields; |
|
693 |
} |
|
694 |
|
|
695 |
public boolean isAlwaysNewRecord() { |
|
696 |
return alwaysNewRecord; |
|
697 |
} |
|
698 |
|
|
699 |
public void setAlwaysNewRecord(final boolean alwaysNewRecord) { |
|
700 |
this.alwaysNewRecord = alwaysNewRecord; |
|
701 |
} |
|
702 |
|
|
703 |
} |
modules/dnet-data-provision-services/trunk/src/main/java/eu/dnetlib/oai/mongo/MetadataExtractor.java | ||
---|---|---|
1 |
package eu.dnetlib.oai.mongo; |
|
2 |
|
|
3 |
import eu.dnetlib.miscutils.functional.UnaryFunction; |
|
4 |
import org.dom4j.Document; |
|
5 |
import org.dom4j.Node; |
|
6 |
|
|
7 |
/** |
|
8 |
* Function to skip the header of the record and deliver only its metadata content as XML String. |
|
9 |
*/ |
|
10 |
public class MetadataExtractor implements UnaryFunction<String, Document> { |
|
11 |
|
|
12 |
@Override |
|
13 |
public String evaluate(final Document xmlDoc) { |
|
14 |
Node metadataNode = xmlDoc.selectSingleNode("//*[local-name() = 'metadata']/*"); |
|
15 |
//Node metadataNode = xmlDoc.selectSingleNode("/*[local-name()='record']/*[local-name() = 'metadata']/*"); |
|
16 |
return metadataNode.asXML(); |
|
17 |
} |
|
18 |
} |
modules/dnet-data-provision-services/trunk/src/main/java/eu/dnetlib/oai/info/SetInfo.java | ||
---|---|---|
1 |
package eu.dnetlib.oai.info; |
|
2 |
|
|
3 |
public class SetInfo { |
|
4 |
|
|
5 |
private String setSpec; |
|
6 |
private String setName; |
|
7 |
private String setDescription; |
|
8 |
private String query; |
|
9 |
private boolean enabled; |
|
10 |
|
|
11 |
public SetInfo() { |
|
12 |
} |
|
13 |
|
|
14 |
public SetInfo(final String spec, final String name, final String description, final boolean enabled) { |
|
15 |
this.setSpec = spec; |
|
16 |
this.setName = name; |
|
17 |
this.setDescription = description; |
|
18 |
this.enabled = enabled; |
|
19 |
} |
|
20 |
|
|
21 |
/** |
|
22 |
* This is a string-only constructor to be used by the SplittedQueryExecutor, which enables us to create instances of this class |
|
23 |
* directly from the information in the IS. |
|
24 |
* |
|
25 |
* @param spec |
|
26 |
* @param name |
|
27 |
* @param description |
|
28 |
* @param enabled |
|
29 |
*/ |
|
30 |
public SetInfo(final String spec, final String name, final String description, final String enabled) { |
|
31 |
this(spec, name, description, Boolean.valueOf(enabled)); |
|
32 |
} |
|
33 |
|
|
34 |
public SetInfo(final String spec, final String name, final String description, final String query, final boolean enabled) { |
|
35 |
this(spec, name, description, enabled); |
|
36 |
this.query = query; |
|
37 |
} |
|
38 |
|
|
39 |
/** |
|
40 |
* This is a string-only constructor to be used by the SplittedQueryExecutor, which enables us to create instances of this class |
|
41 |
* directly from the information in the IS. |
|
42 |
* |
|
43 |
* @param spec |
|
44 |
* @param name |
|
45 |
* @param description |
|
46 |
* @param query |
|
47 |
* @param enabled |
|
48 |
*/ |
|
49 |
public SetInfo(final String spec, final String name, final String description, final String query, final String enabled) { |
|
50 |
this(spec, name, description, enabled); |
|
51 |
this.query = query; |
|
52 |
} |
|
53 |
|
|
54 |
public String getSetSpec() { |
|
55 |
return setSpec; |
|
56 |
} |
|
57 |
|
|
58 |
public void setSetSpec(final String setSpec) { |
|
59 |
this.setSpec = setSpec; |
|
60 |
} |
|
61 |
|
|
62 |
public String getSetDescription() { |
|
63 |
return setDescription; |
|
64 |
} |
|
65 |
|
|
66 |
public void setSetDescription(final String setDescription) { |
|
67 |
this.setDescription = setDescription; |
|
68 |
} |
|
69 |
|
|
70 |
public String getQuery() { |
|
71 |
return query; |
|
72 |
} |
|
73 |
|
|
74 |
public void setQuery(final String query) { |
|
75 |
this.query = query; |
|
76 |
} |
|
77 |
|
|
78 |
public String getSetName() { |
|
79 |
return setName; |
|
80 |
} |
|
81 |
|
|
82 |
public void setSetName(final String setName) { |
|
83 |
this.setName = setName; |
|
84 |
} |
|
85 |
|
|
86 |
public boolean isEnabled() { |
|
87 |
return enabled; |
|
88 |
} |
|
89 |
|
|
90 |
public void setEnabled(final boolean enabled) { |
|
91 |
this.enabled = enabled; |
|
92 |
} |
|
93 |
|
|
94 |
@Override |
|
95 |
public String toString() { |
|
96 |
return "SetInfo [setSpec=" + setSpec + ", setName=" + setName + ", setDescription=" + setDescription + ", query=" + query + ", enabled=" + enabled |
|
97 |
+ "]"; |
|
98 |
} |
|
99 |
|
|
100 |
} |
modules/dnet-data-provision-services/trunk/src/main/java/eu/dnetlib/oai/mongo/ProvenanceExtractor.java | ||
---|---|---|
1 |
package eu.dnetlib.oai.mongo; |
|
2 |
|
|
3 |
import eu.dnetlib.miscutils.functional.UnaryFunction; |
|
4 |
import org.dom4j.Document; |
|
5 |
import org.dom4j.Node; |
|
6 |
|
|
7 |
/** |
|
8 |
* Function to deliver only the about/provenance content of a record as XML String. |
|
9 |
*/ |
|
10 |
public class ProvenanceExtractor implements UnaryFunction<String, Document> { |
|
11 |
|
|
12 |
@Override |
|
13 |
public String evaluate(final Document xmlDoc) { |
|
14 |
// Node provenanceNode = xmlDoc.selectSingleNode("//*[local-name() = 'about']/*[local-name() = 'provenance']"); |
|
15 |
Node provenanceNode = xmlDoc.selectSingleNode("/*[local-name()='record']/*[local-name() = 'about']/*[local-name() = 'provenance']"); |
|
16 |
if (provenanceNode != null) return provenanceNode.asXML(); |
|
17 |
else return null; |
|
18 |
} |
|
19 |
} |
modules/dnet-data-provision-services/trunk/src/main/java/eu/dnetlib/oai/info/ListDocumentsInfo.java | ||
---|---|---|
1 |
package eu.dnetlib.oai.info; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
|
|
5 |
public class ListDocumentsInfo { |
|
6 |
|
|
7 |
private List<RecordInfo> docs = null; |
|
8 |
private ResumptionToken resumptionToken = null; |
|
9 |
private int nMaxElements = 0; |
|
10 |
private int cursor = 0; |
|
11 |
private String metadataPrefix = ""; |
|
12 |
|
|
13 |
public List<RecordInfo> getDocs() { |
|
14 |
return docs; |
|
15 |
} |
|
16 |
|
|
17 |
public void setDocs(final List<RecordInfo> docs) { |
|
18 |
this.docs = docs; |
|
19 |
} |
|
20 |
|
|
21 |
public ResumptionToken getResumptionToken() { |
|
22 |
return resumptionToken; |
|
23 |
} |
|
24 |
|
|
25 |
public void setResumptionToken(final ResumptionToken resumptionToken) { |
|
26 |
this.resumptionToken = resumptionToken; |
|
27 |
} |
|
28 |
|
|
29 |
public int getnMaxElements() { |
|
30 |
return nMaxElements; |
|
31 |
} |
|
32 |
|
|
33 |
public void setnMaxElements(final int nMaxElements) { |
|
34 |
this.nMaxElements = nMaxElements; |
|
35 |
} |
|
36 |
|
|
37 |
public int getCursor() { |
|
38 |
return cursor; |
|
39 |
} |
|
40 |
|
|
41 |
public void setCursor(final int cursor) { |
|
42 |
this.cursor = cursor; |
|
43 |
} |
|
44 |
|
|
45 |
public String getMetadataPrefix() { |
|
46 |
return metadataPrefix; |
|
47 |
} |
|
48 |
|
|
49 |
public void setMetadataPrefix(final String metadataPrefix) { |
|
50 |
this.metadataPrefix = metadataPrefix; |
|
51 |
} |
|
52 |
|
|
53 |
} |
modules/dnet-data-provision-services/trunk/src/main/java/eu/dnetlib/oai/info/RecordInfo.java | ||
---|---|---|
1 |
package eu.dnetlib.oai.info; |
|
2 |
|
|
3 |
import java.text.SimpleDateFormat; |
|
4 |
import java.util.Date; |
|
5 |
import java.util.Set; |
|
6 |
import java.util.TimeZone; |
|
7 |
|
|
8 |
import com.google.common.collect.Sets; |
|
9 |
|
|
10 |
public class RecordInfo { |
|
11 |
|
|
12 |
private String prefix = ""; |
|
13 |
private String identifier = ""; |
|
14 |
private Date datestamp = null; |
|
15 |
private Set<String> setspecs = Sets.newHashSet(); |
|
16 |
private String metadata = ""; |
|
17 |
private String internalId = ""; |
|
18 |
private boolean deleted; |
|
19 |
private String provenance = ""; |
|
20 |
|
|
21 |
public RecordInfo() { |
|
22 |
} |
|
23 |
|
|
24 |
public RecordInfo(final String prefix, final String identifier, final Date datestamp, final String setspec, final String metadata, final boolean deleted) { |
|
25 |
this(prefix, identifier, datestamp, Sets.newHashSet(setspec), metadata, deleted); |
|
26 |
} |
|
27 |
|
|
28 |
public RecordInfo(final String prefix, final String identifier, final Date datestamp, final Set<String> setspecs, final String metadata, |
|
29 |
final boolean deleted) { |
|
30 |
super(); |
|
31 |
this.prefix = prefix; |
|
32 |
this.identifier = identifier; |
|
33 |
this.datestamp = datestamp; |
|
34 |
this.setspecs = setspecs; |
|
35 |
this.metadata = metadata; |
|
36 |
this.deleted = deleted; |
|
37 |
} |
|
38 |
|
Also available in: Unified diff
Added OAI STORE Service which doesn't work