Revision 54585
Added by Sandro La Bruzzo over 5 years ago
modules/s3-objectStore-migration/trunk/s3_migration.py | ||
---|---|---|
1 |
import boto |
|
2 |
import boto.s3.connection |
|
3 |
import sys |
|
4 |
from pymongo import MongoClient |
|
5 |
import os.path |
|
6 |
|
|
7 |
|
|
8 |
def exportItemForMongoCollection(obsId, db, bucket, log_file): |
|
9 |
destination_collection =db[ 's3_'+obsId[:36]] |
|
10 |
source_collection = db[ obsId[:36]] |
|
11 |
print 'Start to exporting objectStore :%s '%obsId |
|
12 |
i = 0 |
|
13 |
last_percentage = 0 |
|
14 |
total = source_collection.estimated_document_count() |
|
15 |
for item in source_collection.find(no_cursor_timeout=True): |
|
16 |
fs_path = item['fsPath'] |
|
17 |
objectId = item['id'] |
|
18 |
if os.path.isfile(fs_path): |
|
19 |
key = bucket.new_key('%s/%s'%(obsId,objectId)) |
|
20 |
key.set_contents_from_filename(fs_path) |
|
21 |
i += 1 |
|
22 |
item.pop('_id', None) |
|
23 |
item.pop('fsPath') |
|
24 |
item['uri'] = 's3://%s/%s'%(bucket.name, key.name) |
|
25 |
destination_collection.insert_one(item) |
|
26 |
if i % 1000 == 0: |
|
27 |
print "Exported %i/%i"%(i, total) |
|
28 |
else: |
|
29 |
log_file.writeline('Missing file for objectStoreid: %s ObjectId:%s path: %s'%(obsId, objectId, fs_path)) |
|
30 |
|
|
31 |
|
|
32 |
|
|
33 |
|
|
34 |
|
|
35 |
|
|
36 |
|
|
37 |
|
|
38 |
|
|
39 |
def start_import(metadataCollection, bucket, log_file): |
|
40 |
client = MongoClient() |
|
41 |
db = client['objectStore'] |
|
42 |
metadataCollection = db[metadataCollection] |
|
43 |
for item in metadataCollection.find(no_cursor_timeout=True): |
|
44 |
obsId = item['obsId'] |
|
45 |
exportItemForMongoCollection(obsId, db, bucket, log_file) |
|
46 |
|
|
47 |
|
|
48 |
|
|
49 |
|
|
50 |
|
|
51 |
if __name__=='__main__': |
|
52 |
args = sys.argv |
|
53 |
if not len(args) is not 3 : |
|
54 |
print "Error on applying script usage: python scriptName.py s3cfgPath objectstoreBucket" |
|
55 |
f = open(args[1]) |
|
56 |
props = {} |
|
57 |
for line in f: |
|
58 |
d =line.split('=') |
|
59 |
if len(d) == 2: |
|
60 |
props[d[0].strip()] = d[1].strip() |
|
61 |
|
|
62 |
bname = args[2] |
|
63 |
conn = boto.connect_s3(aws_access_key_id = props['access_key'], aws_secret_access_key = props['secret_key'], host = props['host_base'], calling_format = boto.s3.connection.OrdinaryCallingFormat()) |
|
64 |
bucket = conn.get_bucket(bname, validate=True) |
|
65 |
log_file = open('s3_migration.log', 'w') |
|
66 |
start_import('metadataObjectStore',bucket, log_file) |
|
67 |
log_file.close() |
Also available in: Unified diff
added script for s3 migration