1
|
package eu.dnetlib.data.objectstore.filesystem;
|
2
|
|
3
|
import com.google.common.base.Function;
|
4
|
import com.google.common.collect.Iterables;
|
5
|
import com.google.common.collect.Lists;
|
6
|
import com.google.gson.Gson;
|
7
|
import com.mongodb.BasicDBObject;
|
8
|
import com.mongodb.DBObject;
|
9
|
import com.mongodb.client.MongoCollection;
|
10
|
import com.mongodb.client.model.Filters;
|
11
|
import com.mongodb.client.result.DeleteResult;
|
12
|
import eu.dnetlib.data.objectstore.modular.ObjectStoreRecord;
|
13
|
import eu.dnetlib.data.objectstore.modular.connector.ObjectStore;
|
14
|
import eu.dnetlib.data.objectstore.rmi.MetadataObjectRecord;
|
15
|
import eu.dnetlib.data.objectstore.rmi.ObjectStoreFile;
|
16
|
import eu.dnetlib.data.objectstore.rmi.ObjectStoreFileNotFoundException;
|
17
|
import eu.dnetlib.data.objectstore.rmi.ObjectStoreServiceException;
|
18
|
import eu.dnetlib.enabling.resultset.ResultSetListener;
|
19
|
import eu.dnetlib.miscutils.collections.Pair;
|
20
|
import org.apache.commons.lang.StringUtils;
|
21
|
import org.apache.commons.logging.Log;
|
22
|
import org.apache.commons.logging.LogFactory;
|
23
|
import org.bson.conversions.Bson;
|
24
|
|
25
|
import java.io.ByteArrayInputStream;
|
26
|
import java.io.IOException;
|
27
|
import java.nio.file.FileSystems;
|
28
|
import java.nio.file.Files;
|
29
|
import java.nio.file.Path;
|
30
|
import java.util.regex.Pattern;
|
31
|
|
32
|
/**
|
33
|
* The Class FileSystemObjectStore.
|
34
|
*
|
35
|
* @author sandro
|
36
|
*/
|
37
|
public class FileSystemObjectStore implements ObjectStore {
|
38
|
|
39
|
/**
|
40
|
*
|
41
|
*/
|
42
|
private static final String URI_FIELD = "uri";
|
43
|
|
44
|
/**
|
45
|
*
|
46
|
*/
|
47
|
private static final String FS_PATH_FIELD = "fsPath";
|
48
|
|
49
|
/** The Constant log. */
|
50
|
private static final Log log = LogFactory.getLog(FileSystemObjectStore.class); // NOPMD by marko on 11/24/08 5:02 PM
|
51
|
|
52
|
/** The id. */
|
53
|
private final String id;
|
54
|
|
55
|
/** The interpretation. */
|
56
|
private final String interpretation;
|
57
|
|
58
|
/** The base path. */
|
59
|
private final String basePath;
|
60
|
|
61
|
/** The base uri. */
|
62
|
private final String baseURI;
|
63
|
|
64
|
/** The mongo metadata. */
|
65
|
private final MongoCollection<DBObject> mongoMetadata;
|
66
|
|
67
|
/**
|
68
|
* Instantiates a new file system object store.
|
69
|
*
|
70
|
* @param identifier
|
71
|
* the identifier
|
72
|
* @param interpretation
|
73
|
* the interpretation
|
74
|
* @param basePath
|
75
|
* the base path
|
76
|
* @param mongoMetadata
|
77
|
* the mongo metadata
|
78
|
* @param baseURI
|
79
|
* the base uri
|
80
|
*/
|
81
|
public FileSystemObjectStore(final String identifier, final String interpretation, final String basePath, final MongoCollection<DBObject> mongoMetadata,
|
82
|
final String baseURI) {
|
83
|
this.id = identifier;
|
84
|
this.basePath = basePath;
|
85
|
this.interpretation = interpretation;
|
86
|
this.mongoMetadata = mongoMetadata;
|
87
|
this.baseURI = baseURI;
|
88
|
}
|
89
|
|
90
|
/**
|
91
|
* {@inheritDoc}
|
92
|
*
|
93
|
* @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#getId()
|
94
|
*/
|
95
|
@Override
|
96
|
public String getId() {
|
97
|
return this.id;
|
98
|
}
|
99
|
|
100
|
/**
|
101
|
* {@inheritDoc}
|
102
|
*
|
103
|
* @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#getInterpretation()
|
104
|
*/
|
105
|
@Override
|
106
|
public String getInterpretation() {
|
107
|
return this.interpretation;
|
108
|
}
|
109
|
|
110
|
/**
|
111
|
* {@inheritDoc}
|
112
|
*
|
113
|
* @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#feed(java.lang.Iterable, boolean)
|
114
|
*/
|
115
|
@Override
|
116
|
public int feed(final Iterable<ObjectStoreRecord> records, final boolean incremental) throws ObjectStoreServiceException {
|
117
|
if (records == null)
|
118
|
return 0;
|
119
|
|
120
|
Path baseDirPath = FileSystems.getDefault().getPath(getBasePath()).resolve(getId());
|
121
|
if (!Files.exists(baseDirPath))
|
122
|
throw new ObjectStoreServiceException("Error can't feed objects because the folder " + baseDirPath + " does not exist");
|
123
|
|
124
|
int addedCounter = 0;
|
125
|
for (ObjectStoreRecord record : records) {
|
126
|
String url = feedObject(record);
|
127
|
if (StringUtils.isNotBlank(url)) {
|
128
|
addedCounter++;
|
129
|
}
|
130
|
}
|
131
|
return addedCounter;
|
132
|
}
|
133
|
|
134
|
/**
|
135
|
* {@inheritDoc}
|
136
|
*
|
137
|
* @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#feedMetadataRecord(java.lang.Iterable, boolean)
|
138
|
*
|
139
|
* This method handles the case of web crawl files and other cases when the metadata in mdstores are also the objects to put into the objectstores.
|
140
|
*/
|
141
|
@Override
|
142
|
public int feedMetadataRecord(final Iterable<MetadataObjectRecord> records, final boolean incremental) throws ObjectStoreServiceException {
|
143
|
Iterable<ObjectStoreRecord> it = Iterables.transform(records, new Function<MetadataObjectRecord, ObjectStoreRecord>() {
|
144
|
@Override
|
145
|
public ObjectStoreRecord apply(final MetadataObjectRecord metadataObjectRecord) {
|
146
|
ObjectStoreRecord r = new ObjectStoreRecord();
|
147
|
r.setInputStream(new ByteArrayInputStream(metadataObjectRecord.getRecord().getBytes()));
|
148
|
ObjectStoreFile fileMetadata = new ObjectStoreFile();
|
149
|
fileMetadata.setObjectID(metadataObjectRecord.getId());
|
150
|
fileMetadata.setMimeType(metadataObjectRecord.getMime());
|
151
|
r.setFileMetadata(fileMetadata);
|
152
|
return r;
|
153
|
}
|
154
|
});
|
155
|
return feed(it, incremental);
|
156
|
}
|
157
|
|
158
|
/**
|
159
|
* {@inheritDoc}
|
160
|
*
|
161
|
* @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#feedObjectRecord(eu.dnetlib.data.objectstore.modular.ObjectStoreRecord)
|
162
|
*/
|
163
|
@Override
|
164
|
public String feedObjectRecord(final ObjectStoreRecord record) throws ObjectStoreServiceException {
|
165
|
return feedObject(record);
|
166
|
}
|
167
|
|
168
|
private String feedObject(final ObjectStoreRecord record) {
|
169
|
if (record != null) {
|
170
|
String objectIdentifier = record.getFileMetadata().getObjectID();
|
171
|
if (StringUtils.isNotBlank(objectIdentifier)) {
|
172
|
final Path objResolvedPath = FileSystemUtility.objectStoreFilePath(basePath, id, objectIdentifier);
|
173
|
|
174
|
if (Files.notExists(objResolvedPath)) {
|
175
|
try {
|
176
|
log.debug("Creation of folder " + objResolvedPath.getParent());
|
177
|
Files.createDirectories(objResolvedPath.getParent());
|
178
|
log.debug("Folder " + objResolvedPath.getParent() + " created");
|
179
|
String md5Sum = null;
|
180
|
Long size = new Long(0);
|
181
|
if (record.getInputStream() != null) {
|
182
|
Pair<String, Long> infos = FileSystemUtility.saveAndGenerateMD5(record.getInputStream(), objResolvedPath);
|
183
|
md5Sum = infos.getKey();
|
184
|
size = infos.getValue();
|
185
|
}
|
186
|
final String url =
|
187
|
ModularObjectStoreRESTService.retrieveURL(getBaseURI(), getBasePath(), getId(), record.getFileMetadata().getObjectID());
|
188
|
if (StringUtils.isNotBlank(md5Sum)) {
|
189
|
double timestamp = System.currentTimeMillis();
|
190
|
BasicDBObject metadata = new BasicDBObject();
|
191
|
metadata.put("id", record.getFileMetadata().getObjectID());
|
192
|
metadata.put("mime", record.getFileMetadata().getMimeType());
|
193
|
metadata.put("originalObject", record.getFileMetadata().toJSON());
|
194
|
metadata.put("timestamp", timestamp);
|
195
|
metadata.put("md5Sum", md5Sum);
|
196
|
metadata.put("size", size);
|
197
|
metadata.put(FS_PATH_FIELD, objResolvedPath.toAbsolutePath().toString());
|
198
|
metadata.put(URI_FIELD, url);
|
199
|
log.debug("saving metadata object to the collection: " + metadata.toString());
|
200
|
mongoMetadata.insertOne(metadata);
|
201
|
}
|
202
|
return url;
|
203
|
} catch (Exception e) {
|
204
|
log.error("Something bad happen on inserting Record", e);
|
205
|
log.error("Record: " + new Gson().toJson(record.getFileMetadata()));
|
206
|
} finally {
|
207
|
if (record.getInputStream() != null) {
|
208
|
try {
|
209
|
record.getInputStream().close();
|
210
|
} catch (Exception e) {
|
211
|
log.error("Error on close inputStream", e);
|
212
|
}
|
213
|
}
|
214
|
}
|
215
|
} else {
|
216
|
log.debug("The File in the path" + objResolvedPath.getParent() + "exists ");
|
217
|
}
|
218
|
}
|
219
|
}
|
220
|
log.warn("Record for object store is null");
|
221
|
return null;
|
222
|
}
|
223
|
|
224
|
/**
|
225
|
* {@inheritDoc}
|
226
|
*
|
227
|
* @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#deliver(java.lang.Long, java.lang.Long)
|
228
|
*/
|
229
|
@Override
|
230
|
public ResultSetListener deliver(final Long from, final Long until) throws ObjectStoreServiceException {
|
231
|
FileSystemObjectStoreResultSetListener resultSet = new FileSystemObjectStoreResultSetListener();
|
232
|
resultSet.setBaseURI(getBaseURI());
|
233
|
resultSet.setMongoCollection(mongoMetadata);
|
234
|
resultSet.setObjectStoreID(getId());
|
235
|
resultSet.setFromDate(from);
|
236
|
resultSet.setUntilDate(until);
|
237
|
resultSet.setBasePath(getBasePath());
|
238
|
return resultSet;
|
239
|
}
|
240
|
|
241
|
/**
|
242
|
* {@inheritDoc}
|
243
|
*
|
244
|
* @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#deliverIds(java.lang.Iterable)
|
245
|
*/
|
246
|
@Override
|
247
|
public ResultSetListener deliverIds(final Iterable<String> ids) throws ObjectStoreServiceException {
|
248
|
FileSystemObjectStoreResultSetListener resultSet = new FileSystemObjectStoreResultSetListener();
|
249
|
resultSet.setBaseURI(getBaseURI());
|
250
|
resultSet.setMongoCollection(mongoMetadata);
|
251
|
resultSet.setObjectStoreID(getId());
|
252
|
resultSet.setRecords(Lists.newArrayList(ids));
|
253
|
resultSet.setBasePath(basePath);
|
254
|
return resultSet;
|
255
|
}
|
256
|
|
257
|
/**
|
258
|
* {@inheritDoc}
|
259
|
*
|
260
|
* @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#deliverObject(java.lang.String)
|
261
|
*/
|
262
|
@Override
|
263
|
public ObjectStoreFile deliverObject(final String objectId) throws ObjectStoreServiceException {
|
264
|
Bson query = Filters.eq("id", objectId);
|
265
|
DBObject resultQuery = mongoMetadata.find(query).first();
|
266
|
checkAndGetFsPathField(resultQuery, objectId);
|
267
|
return ObjectStoreFileUtility.build(resultQuery, getBaseURI(), getId(), basePath);
|
268
|
}
|
269
|
|
270
|
private String checkAndGetFsPathField(final DBObject resultQuery, final String objectId) throws ObjectStoreServiceException {
|
271
|
if (resultQuery == null || !resultQuery.containsField(FS_PATH_FIELD))
|
272
|
throw new ObjectStoreFileNotFoundException("Object with identifier :" + objectId + " not found or missing " + FS_PATH_FIELD + " field");
|
273
|
String pathStr = (String) resultQuery.get(FS_PATH_FIELD);
|
274
|
if (StringUtils.isBlank(pathStr))
|
275
|
throw new ObjectStoreFileNotFoundException("Object with identifier :" + objectId + " with blank " + FS_PATH_FIELD);
|
276
|
return pathStr;
|
277
|
}
|
278
|
|
279
|
/**
|
280
|
* {@inheritDoc}
|
281
|
*
|
282
|
* @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#getSize()
|
283
|
*/
|
284
|
@Override
|
285
|
public int getSize() throws ObjectStoreServiceException {
|
286
|
return (int) mongoMetadata.count();
|
287
|
}
|
288
|
|
289
|
/**
|
290
|
* {@inheritDoc}
|
291
|
*
|
292
|
* @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#deleteObject(java.lang.String)
|
293
|
*/
|
294
|
@Override
|
295
|
public void deleteObject(final String objectId) throws ObjectStoreServiceException {
|
296
|
Bson query = Filters.eq("id", objectId);
|
297
|
DBObject response = mongoMetadata.find(query).first();
|
298
|
String pathStr = checkAndGetFsPathField(response, objectId);
|
299
|
Path path = FileSystems.getDefault().getPath(pathStr);
|
300
|
if (Files.notExists(path))
|
301
|
throw new ObjectStoreFileNotFoundException("Object with identifier :" + objectId + " not found in the assigned path " + path);
|
302
|
try {
|
303
|
Files.delete(path);
|
304
|
} catch (IOException e) {
|
305
|
throw new ObjectStoreServiceException("An error occurs on delete file ", e);
|
306
|
}
|
307
|
mongoMetadata.deleteOne(query);
|
308
|
}
|
309
|
|
310
|
/**
|
311
|
* {@inheritDoc}
|
312
|
*
|
313
|
* @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#getObject(java.lang.String)
|
314
|
*/
|
315
|
@Override
|
316
|
public String getObject(final String recordId) throws ObjectStoreServiceException {
|
317
|
Bson query = Filters.eq("id", recordId);
|
318
|
DBObject response = mongoMetadata.find(query).first();
|
319
|
if (response == null || !response.containsField(URI_FIELD))
|
320
|
return null;
|
321
|
return (String) response.get(URI_FIELD);
|
322
|
}
|
323
|
|
324
|
/**
|
325
|
* {@inheritDoc}
|
326
|
*
|
327
|
* @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#existIDStartsWith(java.lang.String)
|
328
|
*/
|
329
|
@Override
|
330
|
public boolean existIDStartsWith(final String startId) throws ObjectStoreServiceException {
|
331
|
Bson query = Filters.regex("id", Pattern.compile(startId));
|
332
|
return mongoMetadata.count(query) > 0;
|
333
|
}
|
334
|
|
335
|
@Override
|
336
|
public boolean dropContent() throws ObjectStoreServiceException {
|
337
|
if (getBasePath() == null) {
|
338
|
throw new ObjectStoreServiceException("Error on dropping object store base_path required");
|
339
|
}
|
340
|
final Path baseDirPath = FileSystems.getDefault().getPath(getBasePath()).resolve(getId());
|
341
|
try {
|
342
|
FileSystemUtility.deleteFolderRecursive(baseDirPath);
|
343
|
} catch (IOException e) {
|
344
|
throw new ObjectStoreServiceException("Error on dropping store ", e);
|
345
|
}
|
346
|
log.info("Deleted folder" + baseDirPath.toString());
|
347
|
if (!Files.exists(baseDirPath)) {
|
348
|
log.info("Recreating folder " + baseDirPath);
|
349
|
try {
|
350
|
Files.createDirectory(baseDirPath);
|
351
|
} catch (IOException e) {
|
352
|
throw new ObjectStoreServiceException("Error on dropping store ", e);
|
353
|
}
|
354
|
}
|
355
|
final DeleteResult deleteResult = this.mongoMetadata.deleteMany(new BasicDBObject());
|
356
|
log.info("Dropped content for object store " + id + ". " + deleteResult.getDeletedCount() + " object(s) deleted.");
|
357
|
return true;
|
358
|
}
|
359
|
|
360
|
@Override
|
361
|
public String toString() {
|
362
|
return "FileSystemObjectStore{" +
|
363
|
"id='" + getId() + '\'' +
|
364
|
", interpretation='" + getInterpretation() + '\'' +
|
365
|
", basePath='" + getBasePath() + '\'' +
|
366
|
", baseURI='" + getBaseURI() + '\'' +
|
367
|
'}';
|
368
|
}
|
369
|
|
370
|
/**
|
371
|
* Gets the base uri.
|
372
|
*
|
373
|
* @return the baseURI
|
374
|
*/
|
375
|
public String getBaseURI() {
|
376
|
return baseURI;
|
377
|
}
|
378
|
|
379
|
public String getBasePath() {
|
380
|
return basePath;
|
381
|
}
|
382
|
}
|