1
|
import boto
|
2
|
import boto.s3.connection
|
3
|
import sys
|
4
|
from pymongo import MongoClient
|
5
|
import os.path
|
6
|
|
7
|
|
8
|
def exportItemForMongoCollection(obsId, db, bucket, log_file):
|
9
|
destination_collection =db[ 's3_'+obsId[:36]]
|
10
|
source_collection = db[ obsId[:36]]
|
11
|
print 'Start to exporting objectStore :%s '%obsId
|
12
|
i = 0
|
13
|
last_percentage = 0
|
14
|
total = source_collection.estimated_document_count()
|
15
|
for item in source_collection.find(no_cursor_timeout=True):
|
16
|
fs_path = item['fsPath']
|
17
|
objectId = item['id']
|
18
|
if os.path.isfile(fs_path):
|
19
|
key = bucket.new_key('%s/%s'%(obsId,objectId))
|
20
|
key.set_contents_from_filename(fs_path)
|
21
|
i += 1
|
22
|
item.pop('_id', None)
|
23
|
item.pop('fsPath')
|
24
|
item['uri'] = 's3://%s/%s'%(bucket.name, key.name)
|
25
|
destination_collection.insert_one(item)
|
26
|
if i % 1000 == 0:
|
27
|
print "Exported %i/%i"%(i, total)
|
28
|
else:
|
29
|
log_file.writeline('Missing file for objectStoreid: %s ObjectId:%s path: %s'%(obsId, objectId, fs_path))
|
30
|
|
31
|
|
32
|
|
33
|
|
34
|
|
35
|
|
36
|
|
37
|
|
38
|
|
39
|
def start_import(metadataCollection, bucket, log_file):
|
40
|
client = MongoClient()
|
41
|
db = client['objectStore']
|
42
|
metadataCollection = db[metadataCollection]
|
43
|
for item in metadataCollection.find(no_cursor_timeout=True):
|
44
|
obsId = item['obsId']
|
45
|
exportItemForMongoCollection(obsId, db, bucket, log_file)
|
46
|
|
47
|
|
48
|
|
49
|
|
50
|
|
51
|
if __name__=='__main__':
|
52
|
args = sys.argv
|
53
|
if not len(args) is not 3 :
|
54
|
print "Error on applying script usage: python scriptName.py s3cfgPath objectstoreBucket"
|
55
|
f = open(args[1])
|
56
|
props = {}
|
57
|
for line in f:
|
58
|
d =line.split('=')
|
59
|
if len(d) == 2:
|
60
|
props[d[0].strip()] = d[1].strip()
|
61
|
|
62
|
bname = args[2]
|
63
|
conn = boto.connect_s3(aws_access_key_id = props['access_key'], aws_secret_access_key = props['secret_key'], host = props['host_base'], calling_format = boto.s3.connection.OrdinaryCallingFormat())
|
64
|
bucket = conn.get_bucket(bname, validate=True)
|
65
|
log_file = open('s3_migration.log', 'w')
|
66
|
start_import('metadataObjectStore',bucket, log_file)
|
67
|
log_file.close()
|