Project

General

Profile

1 37005 sandro.lab
package eu.dnetlib.data.objectstore.filesystem;
2
3 43652 claudio.at
import java.io.ByteArrayInputStream;
4
import java.io.IOException;
5 37005 sandro.lab
import java.nio.file.FileSystems;
6
import java.nio.file.Files;
7
import java.nio.file.Path;
8
import java.util.regex.Pattern;
9
10 41664 alessia.ba
import com.google.common.base.Function;
11
import com.google.common.collect.Iterables;
12 37005 sandro.lab
import com.google.common.collect.Lists;
13
import com.google.gson.Gson;
14
import com.mongodb.BasicDBObject;
15
import com.mongodb.DBObject;
16 41664 alessia.ba
import com.mongodb.client.MongoCollection;
17
import com.mongodb.client.model.Filters;
18
import com.mongodb.client.result.DeleteResult;
19 37005 sandro.lab
import eu.dnetlib.data.objectstore.modular.ObjectStoreRecord;
20
import eu.dnetlib.data.objectstore.modular.connector.ObjectStore;
21
import eu.dnetlib.data.objectstore.rmi.MetadataObjectRecord;
22
import eu.dnetlib.data.objectstore.rmi.ObjectStoreFile;
23
import eu.dnetlib.data.objectstore.rmi.ObjectStoreFileNotFoundException;
24
import eu.dnetlib.data.objectstore.rmi.ObjectStoreServiceException;
25
import eu.dnetlib.enabling.resultset.ResultSetListener;
26
import eu.dnetlib.miscutils.collections.Pair;
27 41052 alessia.ba
import org.apache.commons.lang.StringUtils;
28
import org.apache.commons.logging.Log;
29
import org.apache.commons.logging.LogFactory;
30 41664 alessia.ba
import org.bson.conversions.Bson;
31 37005 sandro.lab
32
/**
33
 * The Class FileSystemObjectStore.
34
 *
35
 * @author sandro
36
 */
37 38932 sandro.lab
public class FileSystemObjectStore implements ObjectStore {
38 37005 sandro.lab
39
	/**
40
	 *
41
	 */
42
	private static final String URI_FIELD = "uri";
43
44
	/**
45
	 *
46
	 */
47
	private static final String FS_PATH_FIELD = "fsPath";
48
49
	/** The Constant log. */
50
	private static final Log log = LogFactory.getLog(FileSystemObjectStore.class); // NOPMD by marko on 11/24/08 5:02 PM
51
52
	/** The id. */
53
	private final String id;
54
55
	/** The interpretation. */
56
	private final String interpretation;
57
58
	/** The base path. */
59
	private final String basePath;
60
61
	/** The base uri. */
62 38932 sandro.lab
	private final String baseURI;
63 37005 sandro.lab
64
	/** The mongo metadata. */
65 41664 alessia.ba
	private final MongoCollection<DBObject> mongoMetadata;
66 37005 sandro.lab
67
	/**
68
	 * Instantiates a new file system object store.
69
	 *
70 38932 sandro.lab
	 * @param identifier
71
	 *            the identifier
72
	 * @param interpretation
73
	 *            the interpretation
74
	 * @param basePath
75
	 *            the base path
76
	 * @param mongoMetadata
77
	 *            the mongo metadata
78
	 * @param baseURI
79
	 *            the base uri
80 37005 sandro.lab
	 */
81 41664 alessia.ba
	public FileSystemObjectStore(final String identifier, final String interpretation, final String basePath, final MongoCollection<DBObject> mongoMetadata,
82 38932 sandro.lab
			final String baseURI) {
83 37005 sandro.lab
		this.id = identifier;
84
		this.basePath = basePath;
85
		this.interpretation = interpretation;
86
		this.mongoMetadata = mongoMetadata;
87
		this.baseURI = baseURI;
88
	}
89
90
	/**
91
	 * {@inheritDoc}
92 38932 sandro.lab
	 *
93 37005 sandro.lab
	 * @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#getId()
94
	 */
95
	@Override
96
	public String getId() {
97
		return this.id;
98
	}
99
100
	/**
101
	 * {@inheritDoc}
102 38932 sandro.lab
	 *
103 37005 sandro.lab
	 * @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#getInterpretation()
104
	 */
105
	@Override
106
	public String getInterpretation() {
107
		return this.interpretation;
108
	}
109
110
	/**
111
	 * {@inheritDoc}
112 38932 sandro.lab
	 *
113 37005 sandro.lab
	 * @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#feed(java.lang.Iterable, boolean)
114
	 */
115
	@Override
116
	public int feed(final Iterable<ObjectStoreRecord> records, final boolean incremental) throws ObjectStoreServiceException {
117 38932 sandro.lab
		if (records == null)
118 37005 sandro.lab
			return 0;
119
120 43652 claudio.at
		Path baseDirPath = FileSystems.getDefault().getPath(getBasePath()).resolve(getId());
121 37005 sandro.lab
		if (!Files.exists(baseDirPath))
122 38932 sandro.lab
			throw new ObjectStoreServiceException("Error can't feed objects because the folder " + baseDirPath + " does not exist");
123 37005 sandro.lab
124 38932 sandro.lab
		int addedCounter = 0;
125
		for (ObjectStoreRecord record : records) {
126 41664 alessia.ba
			String url = feedObject(record);
127 43652 claudio.at
			if (StringUtils.isNotBlank(url)) {
128 41664 alessia.ba
				addedCounter++;
129 43652 claudio.at
			}
130 41664 alessia.ba
		}
131
		return addedCounter;
132
	}
133 37005 sandro.lab
134 41664 alessia.ba
	/**
135
	 * {@inheritDoc}
136
	 *
137
	 * @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#feedMetadataRecord(java.lang.Iterable, boolean)
138
	 *
139
	 * This method handles the case of web crawl files and other cases when the metadata in mdstores are also the objects to put into the objectstores.
140
	 */
141
	@Override
142
	public int  feedMetadataRecord(final Iterable<MetadataObjectRecord> records, final boolean incremental) throws ObjectStoreServiceException {
143
		Iterable<ObjectStoreRecord> it = Iterables.transform(records, new Function<MetadataObjectRecord, ObjectStoreRecord>() {
144
			@Override
145
			public ObjectStoreRecord apply(final MetadataObjectRecord metadataObjectRecord) {
146
				ObjectStoreRecord r = new ObjectStoreRecord();
147
				r.setInputStream(new ByteArrayInputStream(metadataObjectRecord.getRecord().getBytes()));
148
				ObjectStoreFile fileMetadata = new ObjectStoreFile();
149
				fileMetadata.setObjectID(metadataObjectRecord.getId());
150
				fileMetadata.setMimeType(metadataObjectRecord.getMime());
151
				r.setFileMetadata(fileMetadata);
152
				return r;
153
			}
154
		});
155
		return feed(it, incremental);
156
	}
157
158
	/**
159
	 * {@inheritDoc}
160
	 *
161
	 * @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#feedObjectRecord(eu.dnetlib.data.objectstore.modular.ObjectStoreRecord)
162
	 */
163
	@Override
164
	public String feedObjectRecord(final ObjectStoreRecord record) throws ObjectStoreServiceException {
165
		return feedObject(record);
166
	}
167
168
	private String feedObject(final ObjectStoreRecord record) {
169
		if (record != null) {
170 37005 sandro.lab
			String objectIdentifier = record.getFileMetadata().getObjectID();
171 41664 alessia.ba
			if (StringUtils.isNotBlank(objectIdentifier)) {
172 43652 claudio.at
				final Path objResolvedPath = FileSystemUtility.objectStoreFilePath(basePath, id, objectIdentifier);
173
174 41664 alessia.ba
				if (Files.notExists(objResolvedPath)) {
175 37005 sandro.lab
					try {
176 43652 claudio.at
						log.debug("Creation of folder " + objResolvedPath.getParent());
177
						Files.createDirectories(objResolvedPath.getParent());
178
						log.debug("Folder " + objResolvedPath.getParent() + " created");
179 37005 sandro.lab
						String md5Sum = null;
180
						Integer size = 0;
181 38932 sandro.lab
						if (record.getInputStream() != null) {
182 41664 alessia.ba
							Pair<String, Integer> infos = FileSystemUtility.saveAndGenerateMD5(record.getInputStream(), objResolvedPath);
183 37005 sandro.lab
							md5Sum = infos.getKey();
184
							size = infos.getValue();
185
						}
186 43652 claudio.at
						final String url =
187
								ModularObjectStoreRESTService.retrieveURL(getBaseURI(), getBasePath(), getId(), record.getFileMetadata().getObjectID());
188 41664 alessia.ba
						if (StringUtils.isNotBlank(md5Sum)) {
189 37005 sandro.lab
							double timestamp = System.currentTimeMillis();
190
							BasicDBObject metadata = new BasicDBObject();
191
							metadata.put("id", record.getFileMetadata().getObjectID());
192
							metadata.put("mime", record.getFileMetadata().getMimeType());
193
							metadata.put("originalObject", record.getFileMetadata().toJSON());
194
							metadata.put("timestamp", timestamp);
195
							metadata.put("md5Sum", md5Sum);
196
							metadata.put("size", size);
197 41664 alessia.ba
							metadata.put(FS_PATH_FIELD, objResolvedPath.toAbsolutePath().toString());
198
							metadata.put(URI_FIELD, url);
199 38932 sandro.lab
							log.debug("saving metadata object to the collection: " + metadata.toString());
200 41664 alessia.ba
							mongoMetadata.insertOne(metadata);
201 37005 sandro.lab
						}
202 41664 alessia.ba
						return url;
203 37005 sandro.lab
					} catch (Exception e) {
204
						log.error("Something bad happen on inserting Record", e);
205 38932 sandro.lab
						log.error("Record: " + new Gson().toJson(record.getFileMetadata()));
206
					} finally {
207 41664 alessia.ba
						if (record.getInputStream() != null) {
208 38932 sandro.lab
							try {
209
								record.getInputStream().close();
210
							} catch (Exception e) {
211
								log.error("Error on close inputStream", e);
212
							}
213
						}
214 37005 sandro.lab
					}
215 38484 sandro.lab
				} else {
216 43652 claudio.at
					log.debug("The File in the path" + objResolvedPath.getParent() + "exists ");
217 37005 sandro.lab
				}
218
			}
219
		}
220 41664 alessia.ba
		log.warn("Record for object store is null");
221
		return null;
222 37005 sandro.lab
	}
223
224
	/**
225
	 * {@inheritDoc}
226 38932 sandro.lab
	 *
227 37005 sandro.lab
	 * @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#deliver(java.lang.Long, java.lang.Long)
228
	 */
229
	@Override
230
	public ResultSetListener deliver(final Long from, final Long until) throws ObjectStoreServiceException {
231 41664 alessia.ba
		FileSystemObjectStoreResultSetListener resultSet = new FileSystemObjectStoreResultSetListener();
232 43652 claudio.at
		resultSet.setBaseURI(getBaseURI());
233 41664 alessia.ba
		resultSet.setMongoCollection(mongoMetadata);
234 43652 claudio.at
		resultSet.setObjectStoreID(getId());
235 41664 alessia.ba
		resultSet.setFromDate(from);
236
		resultSet.setUntilDate(until);
237 43652 claudio.at
		resultSet.setBasePath(getBasePath());
238 41664 alessia.ba
		return resultSet;
239 37005 sandro.lab
	}
240
241
	/**
242
	 * {@inheritDoc}
243 38932 sandro.lab
	 *
244 37005 sandro.lab
	 * @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#deliverIds(java.lang.Iterable)
245
	 */
246
	@Override
247
	public ResultSetListener deliverIds(final Iterable<String> ids) throws ObjectStoreServiceException {
248 41664 alessia.ba
		FileSystemObjectStoreResultSetListener resultSet = new FileSystemObjectStoreResultSetListener();
249 43652 claudio.at
		resultSet.setBaseURI(getBaseURI());
250 41664 alessia.ba
		resultSet.setMongoCollection(mongoMetadata);
251 43652 claudio.at
		resultSet.setObjectStoreID(getId());
252 41664 alessia.ba
		resultSet.setRecords(Lists.newArrayList(ids));
253 43652 claudio.at
		resultSet.setBasePath(basePath);
254 41664 alessia.ba
		return resultSet;
255 37005 sandro.lab
	}
256
257
	/**
258
	 * {@inheritDoc}
259 38932 sandro.lab
	 *
260 37005 sandro.lab
	 * @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#deliverObject(java.lang.String)
261
	 */
262
	@Override
263
	public ObjectStoreFile deliverObject(final String objectId) throws ObjectStoreServiceException {
264 41664 alessia.ba
		Bson query = Filters.eq("id", objectId);
265
		DBObject resultQuery = mongoMetadata.find(query).first();
266
		checkAndGetFsPathField(resultQuery, objectId);
267 43652 claudio.at
		return ObjectStoreFileUtility.build(resultQuery, getBaseURI(), getId(), basePath);
268 37005 sandro.lab
	}
269
270 41664 alessia.ba
	private String checkAndGetFsPathField(final DBObject resultQuery, final String objectId) throws ObjectStoreServiceException {
271
		if (resultQuery == null || !resultQuery.containsField(FS_PATH_FIELD))
272
			throw new ObjectStoreFileNotFoundException("Object with identifier :" + objectId + " not found or missing " + FS_PATH_FIELD + " field");
273
		String pathStr = (String) resultQuery.get(FS_PATH_FIELD);
274
		if (StringUtils.isBlank(pathStr))
275
			throw new ObjectStoreFileNotFoundException("Object with identifier :" + objectId + " with blank " + FS_PATH_FIELD);
276
		return pathStr;
277
	}
278
279 37005 sandro.lab
	/**
280
	 * {@inheritDoc}
281 38932 sandro.lab
	 *
282 37005 sandro.lab
	 * @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#getSize()
283
	 */
284
	@Override
285
	public int getSize() throws ObjectStoreServiceException {
286 41664 alessia.ba
		return (int) mongoMetadata.count();
287 37005 sandro.lab
	}
288
289
	/**
290
	 * {@inheritDoc}
291 38932 sandro.lab
	 *
292 37005 sandro.lab
	 * @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#deleteObject(java.lang.String)
293
	 */
294
	@Override
295
	public void deleteObject(final String objectId) throws ObjectStoreServiceException {
296 41664 alessia.ba
		Bson query = Filters.eq("id", objectId);
297
		DBObject response = mongoMetadata.find(query).first();
298
		String pathStr = checkAndGetFsPathField(response, objectId);
299
		Path path = FileSystems.getDefault().getPath(pathStr);
300 38932 sandro.lab
		if (Files.notExists(path))
301
			throw new ObjectStoreFileNotFoundException("Object with identifier :" + objectId + " not found in the assigned path " + path);
302 37005 sandro.lab
		try {
303
			Files.delete(path);
304
		} catch (IOException e) {
305 38932 sandro.lab
			throw new ObjectStoreServiceException("An error occurs on delete file ", e);
306 37005 sandro.lab
		}
307 41664 alessia.ba
		mongoMetadata.deleteOne(query);
308 37005 sandro.lab
	}
309
310
	/**
311
	 * {@inheritDoc}
312 38932 sandro.lab
	 *
313 37005 sandro.lab
	 * @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#getObject(java.lang.String)
314
	 */
315
	@Override
316
	public String getObject(final String recordId) throws ObjectStoreServiceException {
317 41664 alessia.ba
		Bson query = Filters.eq("id", recordId);
318
		DBObject response = mongoMetadata.find(query).first();
319 38932 sandro.lab
		if (response == null || !response.containsField(URI_FIELD))
320 37005 sandro.lab
			return null;
321 38932 sandro.lab
		return (String) response.get(URI_FIELD);
322 37005 sandro.lab
	}
323
324
	/**
325
	 * {@inheritDoc}
326 38932 sandro.lab
	 *
327 37005 sandro.lab
	 * @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#existIDStartsWith(java.lang.String)
328
	 */
329
	@Override
330
	public boolean existIDStartsWith(final String startId) throws ObjectStoreServiceException {
331 41664 alessia.ba
		Bson query = Filters.regex("id", Pattern.compile(startId));
332 38932 sandro.lab
		return mongoMetadata.count(query) > 0;
333 37005 sandro.lab
	}
334
335 41466 sandro.lab
	@Override
336
	public boolean dropContent() throws ObjectStoreServiceException {
337 43652 claudio.at
		if (getBasePath() == null) {
338 41466 sandro.lab
			throw new ObjectStoreServiceException("Error on dropping object store base_path required");
339
		}
340 43652 claudio.at
		final Path baseDirPath = FileSystems.getDefault().getPath(getBasePath()).resolve(getId());
341 41466 sandro.lab
		try {
342
			FileSystemUtility.deleteFolderRecursive(baseDirPath);
343
		} catch (IOException e) {
344
			throw new ObjectStoreServiceException("Error on dropping store ", e);
345
		}
346
		log.info("Deleted folder" + baseDirPath.toString());
347
		if (!Files.exists(baseDirPath)) {
348
			log.info("Recreating folder " + baseDirPath);
349
			try {
350
				Files.createDirectory(baseDirPath);
351
			} catch (IOException e) {
352
				throw new ObjectStoreServiceException("Error on dropping store ", e);
353
			}
354
		}
355 41664 alessia.ba
		final DeleteResult deleteResult = this.mongoMetadata.deleteMany(new BasicDBObject());
356
		log.info("Dropped content for object store " + id + ". " + deleteResult.getDeletedCount() + " object(s) deleted.");
357 41466 sandro.lab
		return true;
358
	}
359
360 41664 alessia.ba
	@Override
361
	public String toString() {
362
		return "FileSystemObjectStore{" +
363 43652 claudio.at
				"id='" + getId() + '\'' +
364
				", interpretation='" + getInterpretation() + '\'' +
365
				", basePath='" + getBasePath() + '\'' +
366
				", baseURI='" + getBaseURI() + '\'' +
367 41664 alessia.ba
				'}';
368
	}
369 41466 sandro.lab
370 37005 sandro.lab
	/**
371
	 * Gets the base uri.
372
	 *
373
	 * @return the baseURI
374
	 */
375
	public String getBaseURI() {
376
		return baseURI;
377
	}
378
379 43652 claudio.at
	public String getBasePath() {
380
		return basePath;
381
	}
382 37005 sandro.lab
}