Project

General

Profile

1
package eu.dnetlib.data.information.oai.publisher.core;
2

    
3
import java.util.Iterator;
4
import java.util.List;
5
import javax.annotation.Resource;
6

    
7
import com.google.common.collect.Lists;
8
import eu.dnetlib.data.information.oai.publisher.*;
9
import eu.dnetlib.data.information.oai.publisher.conf.OAIConfigurationReader;
10
import eu.dnetlib.data.information.oai.publisher.info.ListDocumentsInfo;
11
import eu.dnetlib.data.information.oai.publisher.info.MDFInfo;
12
import eu.dnetlib.data.information.oai.publisher.info.RecordInfo;
13
import eu.dnetlib.data.information.oai.publisher.info.ResumptionTokenImpl;
14
import eu.dnetlib.data.oai.store.Cursor;
15
import eu.dnetlib.data.oai.store.PublisherStore;
16
import eu.dnetlib.data.oai.store.PublisherStoreDAO;
17
import eu.dnetlib.miscutils.functional.UnaryFunction;
18
import org.apache.commons.lang3.StringUtils;
19
import org.apache.commons.logging.Log;
20
import org.apache.commons.logging.LogFactory;
21

    
22
public class DNetOAICore extends AbstractOAICore {
23

    
24
	private static final Log log = LogFactory.getLog(DNetOAICore.class); // NOPMD by marko on 11/24/08 5:02 PM
25

    
26
	@Resource(name = "mongodbPublisherStoreDao")
27
	private PublisherStoreDAO<PublisherStore<Cursor>, Cursor> publisherStoreDAO;
28

    
29
	private String defaultDate = "2008-01-01T12:00:00Z";
30

    
31
	@Override
32
	protected RecordInfo getRecordById(final MDFInfo mdf, final String id) throws OaiPublisherException {
33
		PublisherStore<Cursor> store = this.publisherStoreDAO.getStoreFor(mdf.getPrefix(), getCurrentDBName());
34
		if (store == null)
35
			throw new OaiPublisherRuntimeException("Missing store for metadata prefix " + mdf.getPrefix() + ". Please check OAI publisher configuration.");
36
		RecordInfo record = null;
37
		if (StringUtils.isBlank(mdf.getTransformationRuleID())) {
38
			record = store.getRecord(id);
39
		} else {
40
			UnaryFunction<String, String> function = getLookupClient().getUnaryFunctionFromTDSRule(mdf.getTransformationRuleID());
41
			record = store.getRecord(id, function);
42
		}
43
		if (record != null) {
44
			record.setPrefix(mdf.getPrefix());
45
		}
46
		return record;
47
	}
48

    
49
	/**
50
	 * 
51
	 * {@inheritDoc}
52
	 * 
53
	 * @see eu.dnetlib.data.information.oai.publisher.core.AbstractOAICore#getDocuments(boolean, java.lang.String, java.lang.String,
54
	 *      java.lang.String, java.lang.String)
55
	 */
56
	@Override
57
	protected ListDocumentsInfo getDocuments(final boolean onlyIdentifiers, final String set, final String metadataPrefix, final String from, final String until)
58
			throws OaiPublisherException {
59
		MDFInfo mdf = obtainMDFInfo(metadataPrefix);
60
		boolean hasDateRange = StringUtils.isNotBlank(from) || StringUtils.isNotBlank(until);
61
		String query = this.generateQuery(mdf, set, from, until, hasDateRange);
62
		int total = this.countTotal(hasDateRange, query, set, mdf);
63
		log.debug("Total number of records: " + total);
64
		Cursor results = this.getCursor(query, onlyIdentifiers, mdf);
65
		ListDocumentsInfo res = this.prepareListDocumentsInfo(results, mdf, query, set, 0, total, hasDateRange);
66
		log.debug("Delivering " + res.getDocs().size() + " in a page");
67
		return res;
68
	}
69

    
70
	@Override
71
	protected ListDocumentsInfo getDocuments(final boolean onlyIdentifiers, final String resumptionToken) throws OaiPublisherException {
72
		ResumptionTokenImpl resToken = new ResumptionTokenImpl();
73
		resToken.deserialize(resumptionToken);
74

    
75
		log.debug(resToken.toString());
76

    
77
		MDFInfo mdf = obtainMDFInfo(resToken.getMetadataPrefix());
78
		String lastID = resToken.getLastObjIdentifier();
79
		String query = resToken.getQuery();
80
		String newQuery = "";
81
		if (StringUtils.isNotBlank(query)) {
82
			newQuery = query + " AND ";
83
		}
84
		newQuery += " _id > \"" + lastID + "\"";
85
		log.debug("NEW QUERY BECAUSE of resumptionToken: " + newQuery);
86
		int total = this.countTotal(resToken.hasDateRange(), query, resToken.getRequestedSet(), mdf);
87
		Cursor results = this.getCursor(newQuery, onlyIdentifiers, mdf);
88
		int oldCount = resToken.getnMaxElements();
89
		// if the number of records changed, then for sure we can invalidate the resumption token, unless we have a new total of -1 (date
90
		// range queries can't be counted for performance reasons)
91
		if ((total != -1) && (oldCount != total)) throw new BadResumptionTokenException(resumptionToken);
92
		long start = System.currentTimeMillis();
93
		ListDocumentsInfo res = this.prepareListDocumentsInfo(results, mdf, query, resToken.getRequestedSet(), resToken.getnRead(), resToken.getnMaxElements(),
94
				resToken.hasDateRange());
95
		res.setCursor(resToken.getnRead());
96
		long end = System.currentTimeMillis();
97
		log.debug("ListDocumentInfo ready in (ms): " + (end - start));
98
		return res;
99
	}
100

    
101
	protected ListDocumentsInfo prepareListDocumentsInfo(final Cursor results,
102
			final MDFInfo mdf,
103
			final String query,
104
			final String requestedSet,
105
			final int read,
106
			final int totalNumber,
107
			final boolean hasDateRange) throws OaiPublisherException {
108
		ListDocumentsInfo documentList = new ListDocumentsInfo();
109
		documentList.setnMaxElements(totalNumber);
110
		documentList.setMetadataPrefix(mdf.getPrefix());
111
		documentList.setCursor(0);
112
		if (documentList.getnMaxElements() == 0) throw new NoRecordsMatchException(OAIError.noRecordsMatch.getMessage());
113

    
114
		List<RecordInfo> theRecords = this.generateOAIRecords(mdf, requestedSet, results);
115
		documentList.setDocs(theRecords);
116

    
117
		if ((theRecords == null) || theRecords.isEmpty()) throw new NoRecordsMatchException("noRecordsMatch: 'documents' is null or empty");
118

    
119
		if ((documentList.getnMaxElements() > (read + theRecords.size())) || (documentList.getnMaxElements() == -1)) {
120
			String lastID = theRecords.get(theRecords.size() - 1).getInternalId();
121
			ResumptionTokenImpl nextToken = new ResumptionTokenImpl();
122
			nextToken.setDateRange(hasDateRange);
123
			nextToken.setLastObjIdentifier(lastID);
124
			nextToken.setMetadataPrefix(mdf.getPrefix());
125
			nextToken.setnMaxElements(totalNumber);
126
			nextToken.setnRead(read + theRecords.size());
127
			nextToken.setQuery(query);
128
			nextToken.setRequestedSet(requestedSet);
129
			documentList.setResumptionToken(nextToken);
130
		}
131

    
132
		return documentList;
133
	}
134

    
135
	protected Cursor getCursor(final String query, final boolean onlyIdentifiers, final MDFInfo mdfInfo) {
136
		PublisherStore<Cursor> store = this.publisherStoreDAO.getStore(mdfInfo.getSourceFormatName(), mdfInfo.getSourceFormatInterpretation(),
137
				mdfInfo.getSourceFormatLayout(), getCurrentDBName());
138
		if (store == null)
139
			throw new OaiPublisherRuntimeException("Missing store for metadata prefix " + mdfInfo.getPrefix() + ". Please check OAI publisher configuration.");
140
		Cursor results = null;
141
		if (StringUtils.isBlank(mdfInfo.getTransformationRuleID())) {
142
			results = store.getRecords(query, !onlyIdentifiers, pageSize);
143
		} else {
144
			UnaryFunction<String, String> function = getLookupClient().getUnaryFunctionFromTDSRule(mdfInfo.getTransformationRuleID());
145
			results = store.getRecords(query, function, !onlyIdentifiers, pageSize);
146
		}
147
		log.debug("Got cursor");
148
		return results;
149
	}
150

    
151
	/**
152
	 * Generates the List of RecordInfo to be delivered.
153
	 *
154
	 * @param mdf
155
	 *            MDFInfo, the requested metadata format information.
156
	 * @param requestedSet
157
	 *            set specified in the request. It is blank if no set was requested.
158
	 * @param cursor
159
	 *            Cursor instance to use to get the records.
160
	 * @return List of RecordInfo instances
161
	 */
162
	protected List<RecordInfo> generateOAIRecords(final MDFInfo mdf, final String requestedSet, final Cursor cursor) {
163
		final List<RecordInfo> documents = Lists.newArrayList();
164
		Iterator<RecordInfo> cursorIterator = cursor.iterator();
165
		while (cursorIterator.hasNext()) {
166
			RecordInfo current = cursorIterator.next();
167
			current.addSetspec(requestedSet);
168
			current.setPrefix(mdf.getPrefix());
169
			documents.add(current);
170
		}
171
		return documents;
172
	}
173

    
174
	protected String generateQuery(final MDFInfo mdf, final String set, final String from, final String until, final boolean hasDateRange) {
175
		String datestampIndexName = OAIConfigurationReader.DATESTAMP_FIELD;
176

    
177
		String query = mdf.getBaseQuery();
178
		if (!StringUtils.isBlank(set)) {
179
			if (!StringUtils.isBlank(query)) {
180
				query += " AND ";
181
			}
182
			query += getSetCollection().getSetQuery(set, getCurrentDBName());
183
		}
184
		if (hasDateRange) {
185
			if (!StringUtils.isBlank(query)) {
186
				query += " AND ";
187
			}
188
			if ((from != null) && (until != null)) {
189
				query += datestampIndexName + " >= " + from + " AND " + datestampIndexName + " <= " + until;
190
			} else if (from != null) {
191
				query += datestampIndexName + " >= " + from;
192
			} else if (until != null) {
193
				query += datestampIndexName + " <= " + until;
194
			}
195
		}
196

    
197
		log.info("QUERY GENERATED: \n" + query);
198
		return query;
199
	}
200

    
201
	private int countTotal(final boolean hasDateRange, final String query, final String set, final MDFInfo mdFormat) {
202
		int total = 0;
203
		if (hasDateRange) {
204
			// Counting in the store by date ranges is too expensive and delays to much the response
205
			total = -1;
206
		} else {
207
			String theSet = set;
208
			if (StringUtils.isBlank(set)) {
209
				theSet = "ALL";
210
			}
211
			log.debug("SET::: " + theSet);
212
			total = getSetCollection().count(theSet, mdFormat.getPrefix(), getCurrentDBName());
213
		}
214
		return total;
215
	}
216

    
217
	public String getDefaultDate() {
218
		return defaultDate;
219
	}
220

    
221
	public void setDefaultDate(final String defaultDate) {
222
		this.defaultDate = defaultDate;
223
	}
224

    
225
	public PublisherStoreDAO<PublisherStore<Cursor>, Cursor> getPublisherStoreDAO() {
226
		return publisherStoreDAO;
227
	}
228

    
229
	public void setPublisherStoreDAO(final PublisherStoreDAO<PublisherStore<Cursor>, Cursor> publisherStoreDAO) {
230
		this.publisherStoreDAO = publisherStoreDAO;
231
	}
232

    
233
	public int getPageSize() {
234
		return pageSize;
235
	}
236

    
237
	public void setPageSize(final int pageSize) {
238
		this.pageSize = pageSize;
239
	}
240

    
241
}
(2-2/2)