Project

General

Profile

1 54585 sandro.lab
import boto
2
import boto.s3.connection
3
import sys
4
from pymongo import MongoClient
5
import os.path
6 54586 sandro.lab
import time
7 54585 sandro.lab
8
9 54586 sandro.lab
10
def file_exist(bucket, file_path):
11 54587 sandro.lab
	c =bucket.get_key(file_path)
12
	if c is not None and c.size > 0:
13
		return True
14
	return False
15 54586 sandro.lab
16 54585 sandro.lab
def exportItemForMongoCollection(obsId, db, bucket, log_file):
17
	destination_collection =db[ 's3_'+obsId[:36]]
18
	source_collection = db[ obsId[:36]]
19
	print 'Start to exporting objectStore :%s '%obsId
20
	i = 0
21
	last_percentage = 0
22
	total = source_collection.estimated_document_count()
23
	for item in source_collection.find(no_cursor_timeout=True):
24
		fs_path = item['fsPath']
25
		objectId = item['id']
26
		if os.path.isfile(fs_path):
27 54586 sandro.lab
			if not file_exist(bucket, '%s/%s'%(obsId,objectId)):
28
				key = bucket.new_key('%s/%s'%(obsId,objectId))
29
				try:
30
					key.set_contents_from_filename(fs_path)
31
				except Exception as e:
32
					time.sleep(10)
33
					key.set_contents_from_filename(fs_path)
34
				i += 1
35
				item.pop('_id', None)
36
				item.pop('fsPath')
37
				item['uri'] = 's3://%s/%s'%(bucket.name, key.name)
38
				destination_collection.insert_one(item)
39 54585 sandro.lab
			if i % 1000 == 0:
40
				print "Exported %i/%i"%(i, total)
41
		else:
42
			log_file.writeline('Missing file for objectStoreid: %s ObjectId:%s path: %s'%(obsId, objectId, fs_path))
43
44
45
def start_import(metadataCollection, bucket, log_file):
46
	client = MongoClient()
47
	db = client['objectStore']
48
	metadataCollection = db[metadataCollection]
49
	for item in metadataCollection.find(no_cursor_timeout=True):
50
		obsId = item['obsId']
51
		exportItemForMongoCollection(obsId, db, bucket, log_file)
52
53
54
55
56
57
if __name__=='__main__':
58
	args = sys.argv
59
	if not len(args) is not 3 :
60
		print "Error on applying script usage: python scriptName.py s3cfgPath objectstoreBucket"
61
	f = open(args[1])
62
	props = {}
63
	for line in f:
64
		d =line.split('=')
65
		if len(d) == 2:
66
			props[d[0].strip()] = d[1].strip()
67
68
	bname = args[2]
69
	conn = boto.connect_s3(aws_access_key_id = props['access_key'], aws_secret_access_key = props['secret_key'],  host = props['host_base'], calling_format = boto.s3.connection.OrdinaryCallingFormat())
70
	bucket = conn.get_bucket(bname, validate=True)
71
	log_file = open('s3_migration.log', 'w')
72
	start_import('metadataObjectStore',bucket, log_file)
73
	log_file.close()