Project

General

Profile

« Previous | Next » 

Revision 54585

added script for s3 migration

View differences:

modules/s3-objectStore-migration/trunk/s3_migration.py
1
import boto
2
import boto.s3.connection
3
import sys
4
from pymongo import MongoClient
5
import os.path
6

  
7

  
8
def exportItemForMongoCollection(obsId, db, bucket, log_file):	
9
	destination_collection =db[ 's3_'+obsId[:36]]
10
	source_collection = db[ obsId[:36]]
11
	print 'Start to exporting objectStore :%s '%obsId
12
	i = 0
13
	last_percentage = 0
14
	total = source_collection.estimated_document_count()
15
	for item in source_collection.find(no_cursor_timeout=True):
16
		fs_path = item['fsPath']
17
		objectId = item['id']
18
		if os.path.isfile(fs_path):
19
			key = bucket.new_key('%s/%s'%(obsId,objectId))
20
			key.set_contents_from_filename(fs_path)
21
			i += 1
22
			item.pop('_id', None)
23
			item.pop('fsPath')
24
			item['uri'] = 's3://%s/%s'%(bucket.name, key.name)
25
			destination_collection.insert_one(item)
26
			if i % 1000 == 0:
27
				print "Exported %i/%i"%(i, total)
28
		else:
29
			log_file.writeline('Missing file for objectStoreid: %s ObjectId:%s path: %s'%(obsId, objectId, fs_path))
30
	
31

  
32

  
33

  
34

  
35

  
36

  
37

  
38

  
39
def start_import(metadataCollection, bucket, log_file):
40
	client = MongoClient()
41
	db = client['objectStore']
42
	metadataCollection = db[metadataCollection]
43
	for item in metadataCollection.find(no_cursor_timeout=True):
44
		obsId = item['obsId']
45
		exportItemForMongoCollection(obsId, db, bucket, log_file)
46

  
47

  
48

  
49

  
50

  
51
if __name__=='__main__':
52
	args = sys.argv
53
	if not len(args) is not 3 :
54
		print "Error on applying script usage: python scriptName.py s3cfgPath objectstoreBucket"
55
	f = open(args[1])
56
	props = {}	
57
	for line in f:
58
		d =line.split('=')
59
		if len(d) == 2:
60
			props[d[0].strip()] = d[1].strip()
61

  
62
	bname = args[2]
63
	conn = boto.connect_s3(aws_access_key_id = props['access_key'], aws_secret_access_key = props['secret_key'],  host = props['host_base'], calling_format = boto.s3.connection.OrdinaryCallingFormat())
64
	bucket = conn.get_bucket(bname, validate=True)
65
	log_file = open('s3_migration.log', 'w')
66
	start_import('metadataObjectStore',bucket, log_file)
67
	log_file.close()

Also available in: Unified diff