Project

General

Profile

1
package eu.dnetlib.data.objectstore.filesystem;
2

    
3
import java.io.ByteArrayInputStream;
4
import java.io.IOException;
5
import java.nio.file.FileSystems;
6
import java.nio.file.Files;
7
import java.nio.file.Path;
8
import java.util.regex.Pattern;
9

    
10
import com.google.common.collect.Iterables;
11
import com.google.common.collect.Lists;
12
import com.google.gson.Gson;
13
import com.mongodb.BasicDBObject;
14
import com.mongodb.DBObject;
15
import com.mongodb.client.MongoCollection;
16
import com.mongodb.client.model.Filters;
17
import com.mongodb.client.result.DeleteResult;
18
import eu.dnetlib.data.objectstore.modular.ObjectStoreRecord;
19
import eu.dnetlib.data.objectstore.modular.connector.ObjectStore;
20
import eu.dnetlib.data.objectstore.rmi.MetadataObjectRecord;
21
import eu.dnetlib.data.objectstore.rmi.ObjectStoreFile;
22
import eu.dnetlib.data.objectstore.rmi.ObjectStoreFileNotFoundException;
23
import eu.dnetlib.data.objectstore.rmi.ObjectStoreServiceException;
24
import eu.dnetlib.enabling.resultset.ResultSetListener;
25
import eu.dnetlib.miscutils.collections.Pair;
26
import org.apache.commons.lang3.StringUtils;
27
import org.apache.commons.logging.Log;
28
import org.apache.commons.logging.LogFactory;
29
import org.bson.conversions.Bson;
30

    
31
/**
32
 * The Class FileSystemObjectStore.
33
 *
34
 * @author sandro
35
 */
36
public class FileSystemObjectStore implements ObjectStore {
37

    
38
	/**
39
	 *
40
	 */
41
	private static final String URI_FIELD = "uri";
42

    
43
	/**
44
	 *
45
	 */
46
	private static final String FS_PATH_FIELD = "fsPath";
47

    
48
	/** The Constant log. */
49
	private static final Log log = LogFactory.getLog(FileSystemObjectStore.class); // NOPMD by marko on 11/24/08 5:02 PM
50

    
51
	/** The id. */
52
	private final String id;
53

    
54
	/** The interpretation. */
55
	private final String interpretation;
56

    
57
	/** The base path. */
58
	private final String basePath;
59

    
60
	/** The base uri. */
61
	private final String baseURI;
62

    
63
	/** The mongo metadata. */
64
	private final MongoCollection<DBObject> mongoMetadata;
65

    
66
	/**
67
	 * Instantiates a new file system object store.
68
	 *
69
	 * @param identifier
70
	 *            the identifier
71
	 * @param interpretation
72
	 *            the interpretation
73
	 * @param basePath
74
	 *            the base path
75
	 * @param mongoMetadata
76
	 *            the mongo metadata
77
	 * @param baseURI
78
	 *            the base uri
79
	 */
80
	public FileSystemObjectStore(final String identifier, final String interpretation, final String basePath, final MongoCollection<DBObject> mongoMetadata,
81
			final String baseURI) {
82
		this.id = identifier;
83
		this.basePath = basePath;
84
		this.interpretation = interpretation;
85
		this.mongoMetadata = mongoMetadata;
86
		this.baseURI = baseURI;
87
	}
88

    
89
	/**
90
	 * {@inheritDoc}
91
	 *
92
	 * @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#getId()
93
	 */
94
	@Override
95
	public String getId() {
96
		return this.id;
97
	}
98

    
99
	/**
100
	 * {@inheritDoc}
101
	 *
102
	 * @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#getInterpretation()
103
	 */
104
	@Override
105
	public String getInterpretation() {
106
		return this.interpretation;
107
	}
108

    
109
	/**
110
	 * {@inheritDoc}
111
	 *
112
	 * @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#feed(java.lang.Iterable, boolean)
113
	 */
114
	@Override
115
	public int feed(final Iterable<ObjectStoreRecord> records, final boolean incremental) throws ObjectStoreServiceException {
116
		if (records == null)
117
			return 0;
118

    
119
		Path baseDirPath = FileSystems.getDefault().getPath(getBasePath()).resolve(getId());
120
		if (!Files.exists(baseDirPath))
121
			throw new ObjectStoreServiceException("Error can't feed objects because the folder " + baseDirPath + " does not exist");
122

    
123
		int addedCounter = 0;
124
		int nulls = 0;
125
		for (ObjectStoreRecord record : records) {
126
			String url = feedObject(record);
127
			if (StringUtils.isNotBlank(url)) {
128
				addedCounter++;
129
			} else {
130
				nulls++;
131
			}
132
		}
133
		if (nulls > 0) {
134
			log.warn(String.format("Found %s null records", nulls));
135
		}
136
		return addedCounter;
137
	}
138

    
139
	/**
140
	 * {@inheritDoc}
141
	 *
142
	 * @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#feedMetadataRecord(java.lang.Iterable, boolean)
143
	 *
144
	 * This method handles the case of web crawl files and other cases when the metadata in mdstores are also the objects to put into the objectstores.
145
	 */
146
	@Override
147
	public int  feedMetadataRecord(final Iterable<MetadataObjectRecord> records, final boolean incremental) throws ObjectStoreServiceException {
148
		Iterable<ObjectStoreRecord> it = Iterables.transform(records, mor -> {
149
			ObjectStoreRecord r = new ObjectStoreRecord();
150
			r.setInputStream(new ByteArrayInputStream(mor.getRecord().getBytes()));
151
			ObjectStoreFile fileMetadata = new ObjectStoreFile();
152
			fileMetadata.setObjectID(mor.getId());
153
			fileMetadata.setMimeType(mor.getMime());
154
			r.setFileMetadata(fileMetadata);
155
			return r;
156
		});
157
		return feed(it, incremental);
158
	}
159

    
160
	/**
161
	 * {@inheritDoc}
162
	 *
163
	 * @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#feedObjectRecord(eu.dnetlib.data.objectstore.modular.ObjectStoreRecord)
164
	 */
165
	@Override
166
	public String feedObjectRecord(final ObjectStoreRecord record) throws ObjectStoreServiceException {
167
		return feedObject(record);
168
	}
169

    
170
	private String feedObject(final ObjectStoreRecord record) {
171
		if (record != null) {
172
			String objectIdentifier = record.getFileMetadata().getObjectID();
173
			if (StringUtils.isNotBlank(objectIdentifier)) {
174
				final Path objResolvedPath = FileSystemUtility.objectStoreFilePath(basePath, id, objectIdentifier);
175

    
176
				if (Files.notExists(objResolvedPath)) {
177
					try {
178
						log.debug("Creation of folder " + objResolvedPath.getParent());
179
						Files.createDirectories(objResolvedPath.getParent());
180
						log.debug("Folder " + objResolvedPath.getParent() + " created");
181
						String md5Sum = null;
182
						Long size = new Long(0);
183
						if (record.getInputStream() != null) {
184
							Pair<String, Long> infos = FileSystemUtility.saveAndGenerateMD5(record.getInputStream(), objResolvedPath);
185
							md5Sum = infos.getKey();
186
							size = infos.getValue();
187
						}
188
						final String url =
189
								ModularObjectStoreRESTService.retrieveURL(getBaseURI(), getBasePath(), getId(), record.getFileMetadata().getObjectID());
190
						if (StringUtils.isNotBlank(md5Sum)) {
191
							double timestamp = System.currentTimeMillis();
192
							BasicDBObject metadata = new BasicDBObject();
193
							metadata.put("id", record.getFileMetadata().getObjectID());
194
							metadata.put("mime", record.getFileMetadata().getMimeType());
195
							metadata.put("originalObject", record.getFileMetadata().toJSON());
196
							metadata.put("timestamp", timestamp);
197
							metadata.put("md5Sum", md5Sum);
198
							metadata.put("size", size);
199
							metadata.put(FS_PATH_FIELD, objResolvedPath.toAbsolutePath().toString());
200
							metadata.put(URI_FIELD, url);
201
							log.debug("saving metadata object to the collection: " + metadata.toString());
202
							mongoMetadata.insertOne(metadata);
203
						}
204
						return url;
205
					} catch (Exception e) {
206
						log.error("Something bad happen on inserting Record", e);
207
						log.error("Record: " + new Gson().toJson(record.getFileMetadata()));
208
					} finally {
209
						if (record.getInputStream() != null) {
210
							try {
211
								record.getInputStream().close();
212
							} catch (Exception e) {
213
								log.error("Error on close inputStream", e);
214
							}
215
						}
216
					}
217
				} else {
218
					log.debug("The File in the path" + objResolvedPath.getParent() + "exists ");
219
				}
220
			}
221
		}
222
		return null;
223
	}
224

    
225
	/**
226
	 * {@inheritDoc}
227
	 *
228
	 * @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#deliver(java.lang.Long, java.lang.Long)
229
	 */
230
	@Override
231
	public ResultSetListener deliver(final Long from, final Long until) throws ObjectStoreServiceException {
232
		FileSystemObjectStoreResultSetListener resultSet = new FileSystemObjectStoreResultSetListener();
233
		resultSet.setBaseURI(getBaseURI());
234
		resultSet.setMongoCollection(mongoMetadata);
235
		resultSet.setObjectStoreID(getId());
236
		resultSet.setFromDate(from);
237
		resultSet.setUntilDate(until);
238
		resultSet.setBasePath(getBasePath());
239
		return resultSet;
240
	}
241

    
242
	/**
243
	 * {@inheritDoc}
244
	 *
245
	 * @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#deliverIds(java.lang.Iterable)
246
	 */
247
	@Override
248
	public ResultSetListener deliverIds(final Iterable<String> ids) throws ObjectStoreServiceException {
249
		FileSystemObjectStoreResultSetListener resultSet = new FileSystemObjectStoreResultSetListener();
250
		resultSet.setBaseURI(getBaseURI());
251
		resultSet.setMongoCollection(mongoMetadata);
252
		resultSet.setObjectStoreID(getId());
253
		resultSet.setRecords(Lists.newArrayList(ids));
254
		resultSet.setBasePath(basePath);
255
		return resultSet;
256
	}
257

    
258
	/**
259
	 * {@inheritDoc}
260
	 *
261
	 * @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#deliverObject(java.lang.String)
262
	 */
263
	@Override
264
	public ObjectStoreFile deliverObject(final String objectId) throws ObjectStoreServiceException {
265
		Bson query = Filters.eq("id", objectId);
266
		DBObject resultQuery = mongoMetadata.find(query).first();
267
		checkAndGetFsPathField(resultQuery, objectId);
268
		return ObjectStoreFileUtility.build(resultQuery, getBaseURI(), getId(), basePath);
269
	}
270

    
271
	private String checkAndGetFsPathField(final DBObject resultQuery, final String objectId) throws ObjectStoreServiceException {
272
		if (resultQuery == null || !resultQuery.containsField(FS_PATH_FIELD))
273
			throw new ObjectStoreFileNotFoundException("Object with identifier :" + objectId + " not found or missing " + FS_PATH_FIELD + " field");
274
		String pathStr = (String) resultQuery.get(FS_PATH_FIELD);
275
		if (StringUtils.isBlank(pathStr))
276
			throw new ObjectStoreFileNotFoundException("Object with identifier :" + objectId + " with blank " + FS_PATH_FIELD);
277
		return pathStr;
278
	}
279

    
280
	/**
281
	 * {@inheritDoc}
282
	 *
283
	 * @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#getSize()
284
	 */
285
	@Override
286
	public int getSize() throws ObjectStoreServiceException {
287
		return (int) mongoMetadata.count();
288
	}
289

    
290
	/**
291
	 * {@inheritDoc}
292
	 *
293
	 * @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#deleteObject(java.lang.String)
294
	 */
295
	@Override
296
	public void deleteObject(final String objectId) throws ObjectStoreServiceException {
297
		Bson query = Filters.eq("id", objectId);
298
		DBObject response = mongoMetadata.find(query).first();
299
		String pathStr = checkAndGetFsPathField(response, objectId);
300
		Path path = FileSystems.getDefault().getPath(pathStr);
301
		if (Files.notExists(path))
302
			throw new ObjectStoreFileNotFoundException("Object with identifier :" + objectId + " not found in the assigned path " + path);
303
		try {
304
			Files.delete(path);
305
		} catch (IOException e) {
306
			throw new ObjectStoreServiceException("An error occurs on delete file ", e);
307
		}
308
		mongoMetadata.deleteOne(query);
309
	}
310

    
311
	/**
312
	 * {@inheritDoc}
313
	 *
314
	 * @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#getObject(java.lang.String)
315
	 */
316
	@Override
317
	public String getObject(final String recordId) throws ObjectStoreServiceException {
318
		Bson query = Filters.eq("id", recordId);
319
		DBObject response = mongoMetadata.find(query).first();
320
		if (response == null || !response.containsField(URI_FIELD))
321
			return null;
322
		return (String) response.get(URI_FIELD);
323
	}
324

    
325
	/**
326
	 * {@inheritDoc}
327
	 *
328
	 * @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#existIDStartsWith(java.lang.String)
329
	 */
330
	@Override
331
	public boolean existIDStartsWith(final String startId) throws ObjectStoreServiceException {
332
		Bson query = Filters.regex("id", Pattern.compile(startId));
333
		return mongoMetadata.count(query) > 0;
334
	}
335

    
336
	@Override
337
	public boolean dropContent() throws ObjectStoreServiceException {
338
		if (getBasePath() == null) {
339
			throw new ObjectStoreServiceException("Error on dropping object store base_path required");
340
		}
341
		final Path baseDirPath = FileSystems.getDefault().getPath(getBasePath()).resolve(getId());
342
		try {
343
			FileSystemUtility.deleteFolderRecursive(baseDirPath);
344
		} catch (IOException e) {
345
			throw new ObjectStoreServiceException("Error on dropping store ", e);
346
		}
347
		log.info("Deleted folder" + baseDirPath.toString());
348
		if (!Files.exists(baseDirPath)) {
349
			log.info("Recreating folder " + baseDirPath);
350
			try {
351
				Files.createDirectory(baseDirPath);
352
			} catch (IOException e) {
353
				throw new ObjectStoreServiceException("Error on dropping store ", e);
354
			}
355
		}
356
		final DeleteResult deleteResult = this.mongoMetadata.deleteMany(new BasicDBObject());
357
		log.info("Dropped content for object store " + id + ". " + deleteResult.getDeletedCount() + " object(s) deleted.");
358
		return true;
359
	}
360

    
361
	@Override
362
	public String toString() {
363
		return "FileSystemObjectStore{" +
364
				"id='" + getId() + '\'' +
365
				", interpretation='" + getInterpretation() + '\'' +
366
				", basePath='" + getBasePath() + '\'' +
367
				", baseURI='" + getBaseURI() + '\'' +
368
				'}';
369
	}
370

    
371
	/**
372
	 * Gets the base uri.
373
	 *
374
	 * @return the baseURI
375
	 */
376
	public String getBaseURI() {
377
		return baseURI;
378
	}
379

    
380
	public String getBasePath() {
381
		return basePath;
382
	}
383
}
(1-1/8)