Project

General

Profile

1 54585 sandro.lab
import boto
2
import boto.s3.connection
3
import sys
4 54591 sandro.lab
import pymongo
5 54585 sandro.lab
from pymongo import MongoClient
6
import os.path
7 54586 sandro.lab
import time
8 54585 sandro.lab
9
10 54586 sandro.lab
11
def file_exist(bucket, file_path):
12 54587 sandro.lab
	c =bucket.get_key(file_path)
13
	if c is not None and c.size > 0:
14
		return True
15
	return False
16 54586 sandro.lab
17 54585 sandro.lab
def exportItemForMongoCollection(obsId, db, bucket, log_file):
18
	destination_collection =db[ 's3_'+obsId[:36]]
19
	source_collection = db[ obsId[:36]]
20
	print 'Start to exporting objectStore :%s '%obsId
21
	i = 0
22 54595 sandro.lab
	skip = 0
23 54585 sandro.lab
	last_percentage = 0
24
	total = source_collection.estimated_document_count()
25
	for item in source_collection.find(no_cursor_timeout=True):
26
		fs_path = item['fsPath']
27
		objectId = item['id']
28 54593 sandro.lab
		dest_item = destination_collection.find_one({'id':objectId})
29 54591 sandro.lab
		if dest_item is None:
30
			if os.path.isfile(fs_path):
31
				i += 1
32
				if not file_exist(bucket, '%s/%s'%(obsId,objectId)):
33
					key = bucket.new_key('%s/%s'%(obsId,objectId))
34
					try:
35
						key.set_contents_from_filename(fs_path)
36
					except Exception as e:
37 54600 sandro.lab
						for k in range(10):
38
							try:
39
								time.sleep(10)
40
								key.set_contents_from_filename(fs_path)
41
							except Exception as e:
42
								print "Error on saving object on S3"
43
								print e
44
								print "Sleeping 10 seconds and retry"
45
46 54591 sandro.lab
					item.pop('_id', None)
47
					item.pop('fsPath')
48
					item['uri'] = 's3://%s/%s'%(bucket.name, key.name)
49
					destination_collection.insert_one(item)
50 54595 sandro.lab
				if i % 100 == 0:
51
					print "Exported %i/%i   (skipped)%i "%(i, total, skip)
52 54591 sandro.lab
			else:
53
				log_file.writeline('Missing file for objectStoreid: %s ObjectId:%s path: %s'%(obsId, objectId, fs_path))
54 54595 sandro.lab
		else:
55
			skip += 1
56 54591 sandro.lab
57 54585 sandro.lab
58 54594 sandro.lab
def start_import(metadataCollection, bucket, log_file, done_file, skip_store):
59 54585 sandro.lab
	client = MongoClient()
60
	db = client['objectStore']
61
	metadataCollection = db[metadataCollection]
62 54597 sandro.lab
	print skip_store
63 54585 sandro.lab
	for item in metadataCollection.find(no_cursor_timeout=True):
64
		obsId = item['obsId']
65 54594 sandro.lab
		if obsId not in skip_store:
66 54598 sandro.lab
			destination_collection = db[ 's3_'+obsId[:36]]
67 54597 sandro.lab
			destination_collection.create_index([('id',pymongo.ASCENDING)])
68 54594 sandro.lab
			exportItemForMongoCollection(obsId, db, bucket, log_file)
69
			done_file.write('{}\n'.format(obsId))
70 54585 sandro.lab
71
72
73
74
75
if __name__=='__main__':
76
	args = sys.argv
77
	if not len(args) is not 3 :
78
		print "Error on applying script usage: python scriptName.py s3cfgPath objectstoreBucket"
79
	f = open(args[1])
80
	props = {}
81
	for line in f:
82
		d =line.split('=')
83
		if len(d) == 2:
84
			props[d[0].strip()] = d[1].strip()
85 54594 sandro.lab
	skip_store =[]
86
	if os.path.isfile('store_done'):
87
		f = open('store_done')
88 54596 sandro.lab
		skip_store =[line.strip() for line in f if len(line) >0]
89 54585 sandro.lab
90
	bname = args[2]
91
	conn = boto.connect_s3(aws_access_key_id = props['access_key'], aws_secret_access_key = props['secret_key'],  host = props['host_base'], calling_format = boto.s3.connection.OrdinaryCallingFormat())
92
	bucket = conn.get_bucket(bname, validate=True)
93
	log_file = open('s3_migration.log', 'w')
94 54597 sandro.lab
	done_file = open('store_done_1', 'wb')
95 54594 sandro.lab
	start_import('metadataObjectStore',bucket, log_file, done_file, skip_store)
96 54585 sandro.lab
	log_file.close()