Files
openstack-swift-backup-rest…/local_to_swift.py

317 lines
20 KiB
Python

__author__ = 'xgiovio'
import authentication
import utility
import os, sys,platform, math, hashlib,time
from utility import read_in_chunks
from time import time
import shutil
def launch(localpath,temp_dir,swift_container,prefix,size_limit_to_segment,size_limit_reading_os,upload,uploadlarge,fail_tries ,md5_compare, encrypted,encrypt_key,excluded_patterns,copy_to_dir):
print ("Localpath " + localpath)
print ("Temppath " + temp_dir)
print ("Swift container " + swift_container)
print ("Swift prefix " + prefix)
print ("Segmentation Limit " + str(size_limit_to_segment))
print ("Os reading Limit " + str(size_limit_reading_os))
print ("Upload " + str(upload))
print ("Upload large " + str(uploadlarge))
print ("Fail tries " + str(fail_tries))
print ("MD5 Compare " + str(md5_compare))
print ("Encrypted " + str(encrypted))
if encrypted:
if copy_to_dir != None:
print("Encryption must be false if you are copying diff files to another dir")
sys.exit(-4)
if encrypt_key == None:
print("Error Bad Encryption Key")
sys.exit(-3)
else:
print ("Encrypted key " + "hidden")
if copy_to_dir != None:
print ("Copy to dir " + copy_to_dir)
print("___________")
#checks
utility.check_segments_size (size_limit_reading_os,size_limit_to_segment)
if utility.check_start_slash(swift_container) or utility.check_start_slash(prefix) or not utility.check_end_slash(localpath) or not utility.check_end_slash(temp_dir) or utility.check_end_slash(swift_container) or not utility.check_end_slash(prefix):
print ("Errors on local or remote paths. Checks \\ or / at the begin or end of each path")
sys.exit(-2)
localfiles = utility.get_local_files (localpath)
print ("Files locali " + str(len(localfiles)))
print("___________")
swift_conn = authentication.set_authentication ()
swift_conn,objects = utility.get_list(fail_tries,swift_conn,swift_container,prefix)
byte0real,byte0manifest,swift_conn,remotefiles,remotefiles_md5 = utility.list_compute_correct_size (fail_tries,objects,swift_conn,swift_container,prefix)
if encrypted:
remotefiles_encr = utility.list_compute_correct_names_for_enctyption(objects,prefix)
print ("Files remoti " + str(len(remotefiles)))
difffiles = {}
filterskipped = 0
sizetoupload=0
filestoupload = 0
largefilestoupload = 0
largesizetoupload= 0
encrypted_skipped_error = 0
skipped_large_files = 0
skipped_old_md5_large_files = 0
if encrypted and upload :
import utility_aes
if not os.path.isdir(temp_dir + "_xg10v10_encrypted"):
os.makedirs(temp_dir + "_xg10v10_encrypted")
temp_dir = temp_dir + "_xg10v10_encrypted" + utility.set_dash()
print("Comparing local files with remote ones")
if encrypted and upload:
print("Encrypting local files for upload")
for lname in localfiles.keys():
#print("Comparing " + lname)
upload_file = True
if not uploadlarge and localfiles[lname] > size_limit_to_segment:
upload_file = False
print("Skipped " + lname + " Upload of large files is disabled. Limit " + size_limit_to_segment)
skipped_large_files = skipped_large_files + 1
#105 is the maximum string length appended to an encrypted file name : "_xg10v10_fd3b094fd9c48c6ee288f58c991dec9d_xg10v10_999000000000_xg10v10_1449108838_xg10v10_encrypted_0001"
elif encrypted and (len(utility.file_only_name(lname,utility.set_dash())) + 105 ) > 255 :
print("Skipped " + lname + " encryption: The name is too long for the filesystem " + str(len(utility.file_only_name(lname,utility.set_dash())) + 105))
encrypted_skipped_error = encrypted_skipped_error + 1
upload_file = False
else:
for pattern in excluded_patterns:
if pattern in lname:
upload_file = False
print("Skipped " + lname + " due to filters : " + pattern )
filterskipped = filterskipped + 1
break
if upload_file:
if encrypted :
lnameenc = lname + "_xg10v10_encrypted"
xtime = str(int(time()))
if lnameenc not in remotefiles_encr.keys() or localfiles[lname] != int((remotefiles_encr[lnameenc]).split("_xg10v10_")[2]) or remotefiles[remotefiles_encr[lnameenc]] != utility.total_size_encrypted(localfiles[lname]) :
if upload:
with open(localpath + lname, 'rb') as f:
if not os.path.isdir(utility.folder_from_path(temp_dir + lname,utility.set_dash())[:-1]):
os.makedirs(utility.folder_from_path(temp_dir + lname,utility.set_dash())[:-1])
with open(temp_dir + lname, 'wb') as d:
aes = utility_aes.AESCipherFile(f,d, size_limit_reading_os, encrypt_key)
md5 = aes.encrypt()
if os.path.isfile(temp_dir + lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_" + xtime + "_xg10v10_encrypted"):
os.remove(temp_dir + lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_" + xtime + "_xg10v10_encrypted")
os.rename (temp_dir + lname, temp_dir + lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_" + xtime + "_xg10v10_encrypted")
#double check os.rename has removed the old file. maybe a bug
if os.path.isfile(temp_dir + lname):
os.remove(temp_dir + lname)
difffiles[lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_" + xtime + "_xg10v10_encrypted"] = utility.total_size_encrypted(localfiles[lname])
else :
difffiles[lname + "_xg10v10_" + "md5_not_calculated" + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_" + xtime + "_xg10v10_encrypted"] = utility.total_size_encrypted(localfiles[lname])
sizetoupload = sizetoupload + utility.total_size_encrypted(localfiles[lname])
filestoupload = filestoupload + 1
if (localfiles[lname] > size_limit_to_segment):
largefilestoupload = largefilestoupload + 1
largesizetoupload = largesizetoupload + utility.total_size_encrypted(localfiles[lname])
elif md5_compare :
hash = hashlib.md5()
with open(localpath + lname, 'rb') as f:
for piece in read_in_chunks(f,size_limit_reading_os):
hash.update(piece)
if hash.hexdigest() != (remotefiles_encr[lnameenc]).split("_xg10v10_")[1]:
md5 = hash.hexdigest()
if upload:
with open(localpath + lname, 'rb') as f:
with open(temp_dir + lname, 'wb') as d:
aes = utility_aes.AESCipherFile(f,d, size_limit_reading_os, encrypt_key)
if os.path.isfile(temp_dir + lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_" + xtime + "_xg10v10_encrypted"):
os.remove(temp_dir + lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_" + xtime + "_xg10v10_encrypted")
os.rename (temp_dir + lname, temp_dir + lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_" + xtime + "_xg10v10_encrypted")
#double check os.rename has removed the old file. maybe a bug
if os.path.isfile(temp_dir + lname):
os.remove(temp_dir + lname)
difffiles[lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_" + xtime + "_xg10v10_encrypted"] = utility.total_size_encrypted(localfiles[lname])
sizetoupload = sizetoupload + utility.total_size_encrypted(localfiles[lname])
filestoupload = filestoupload + 1
if (localfiles[lname] > size_limit_to_segment):
largefilestoupload = largefilestoupload + 1
largesizetoupload = largesizetoupload + utility.total_size_encrypted(localfiles[lname])
elif lname not in remotefiles.keys() or localfiles[lname] != remotefiles[lname]:
difffiles[lname] = localfiles[lname]
sizetoupload = sizetoupload + localfiles[lname]
filestoupload = filestoupload + 1
if (localfiles[lname] > size_limit_to_segment):
largefilestoupload = largefilestoupload + 1
largesizetoupload = largesizetoupload + localfiles[lname]
elif md5_compare :
#remotefiles_md5[lname]!= "0" are md5 correctly calculated remote files
if remotefiles_md5[lname]!= "0":
hash = hashlib.md5()
with open(localpath + lname, 'rb') as f:
for piece in read_in_chunks(f,size_limit_reading_os):
hash.update(piece)
if hash.hexdigest() != (remotefiles_md5[lname]):
difffiles[lname] = localfiles[lname]
sizetoupload = sizetoupload + localfiles[lname]
filestoupload = filestoupload + 1
if (localfiles[lname] > size_limit_to_segment):
largefilestoupload = largefilestoupload + 1
largesizetoupload = largesizetoupload + localfiles[lname]
else:
skipped_old_md5_large_files = skipped_old_md5_large_files + 1
print("___________Differenze___________")
print ("Files locali " + str(len(localfiles)))
print ("Skipped due to filters " + str(filterskipped))
if encrypted:
print ("Skipped due to errors during encrpytion phase. The name is too long for the filesystem " + str(encrypted_skipped_error))
if md5_compare:
print ("Skipped md5 comparison on old large files without md5 embedded in x-object-manifest: " + str(skipped_old_md5_large_files))
if not uploadlarge :
print ("Skipped large files because uploadlarge is disabled: " + str(skipped_large_files))
allskipped = len(localfiles) - filterskipped
if encrypted:
allskipped = allskipped - encrypted_skipped_error
if md5_compare:
allskipped = allskipped - skipped_old_md5_large_files
if not uploadlarge:
allskipped = allskipped - skipped_large_files
print ("Files locali - skipped " + str(allskipped))
print ("Files remoti " + str(len(remotefiles)))
print ("Files remoti 0byte reali " + str(byte0real))
print ("Files remoti 0byte large " + str(byte0manifest))
print("___________Files to upload______")
for files, sizes in difffiles.items():
print(files,sizes)
print("___________")
print ("MBytes to upload " + str(sizetoupload/1000000))
print ("Files to upload " + str(filestoupload))
if uploadlarge:
print ("Large files to upload " + str(largefilestoupload))
print ("MB of normal files to upload " + str((sizetoupload - largesizetoupload)/1000000 ))
if uploadlarge:
print ("MB of large files to upload " + str(largesizetoupload/1000000 ))
print("___________")
def format_numbers_for_large_files (input_string, width) :
ret = ""
for i in range(width - len(input_string)):
ret = ret + "0"
ret = ret + input_string
return ret
if copy_to_dir != None:
remainingtocopy = sizetoupload
for file, size in difffiles.items():
if not os.path.isdir(utility.folder_from_path(copy_to_dir + file,utility.set_dash())[:-1]):
os.makedirs(utility.folder_from_path(copy_to_dir + file,utility.set_dash())[:-1])
print("Copying : " + localpath + file)
with open(localpath + file, 'rb') as f:
with open(copy_to_dir + file, 'wb') as d:
for piece in read_in_chunks(f,size_limit_reading_os):
d.write(piece)
remainingtocopy = remainingtocopy - size
print("Remaining to copy : " + str(remainingtocopy / 1000000) + " MB")
print("Copy to dir terminated")
if upload :
remainingtoupload = sizetoupload
errors_upload = 0
skipped_uploads = 0
for file, size in difffiles.items():
hash_dir = hashlib.md5()
hash_dir.update((utility.folder_from_path(file,utility.set_dash())[:-1]).encode("utf-8"))
hash_dir = hash_dir.hexdigest()
if encrypted:
local_path_corrected =temp_dir
else:
local_path_corrected =localpath
with open(local_path_corrected + file, 'rb') as f:
large_segments_created = False
large_segments_uploaded = False
large_manifest_created = False
for fail_tries_counter in range (fail_tries) :
try:
if size > size_limit_to_segment:
print("Uploading Large File: " + utility.dash_replace(prefix + file) + " " + str(size) )
if not large_segments_created :
local_segments_dict = {}
local_segments_to_upload_dict = {}
counter = 0
hash = hashlib.md5()
bytes_written = 0
for piece in read_in_chunks(f,size_limit_reading_os):
hash.update(piece)
if bytes_written == 0:
t = open(temp_dir + utility.file_only_name(file,utility.set_dash()) + "_" + str(format_numbers_for_large_files(str(counter),len(str(math.ceil( (size/size_limit_to_segment) * 10 ))))),'wb')
if (bytes_written + len(piece) <= size_limit_to_segment):
t.write(piece)
bytes_written = bytes_written +len(piece)
local_segments_dict[utility.file_only_name(file,utility.set_dash()) + "_" + str(format_numbers_for_large_files(str(counter),len(str(math.ceil( (size/size_limit_to_segment) * 10 )))))] = bytes_written
if bytes_written == size_limit_to_segment:
bytes_written = 0
counter = counter + 1
t.close()
if bytes_written > 0 :
counter = counter + 1
hash = hash.hexdigest()
large_segments_created = True
# check if there are uploaded segments
if not large_segments_uploaded:
headers,remote_segments_list = swift_conn.get_container(swift_container + "_segments", prefix =hash + "_xg10v10_" + hash_dir + "_xg10v10_" + str(size_limit_to_segment) + "/",full_listing=True )
remote_segments_dict = {}
for o in remote_segments_list :
remote_segments_dict[o["name"].replace(hash +"_xg10v10_" + str(size_limit_to_segment) + "/","")] = o["bytes"]
for local_segment_name,local_segment_size in local_segments_dict.items() :
if (local_segment_name) not in remote_segments_dict.keys() or local_segment_size != remote_segments_dict[local_segment_name]:
local_segments_to_upload_dict[local_segment_name] = local_segment_size
else:
print ("Segment " + local_segment_name + " aready present " + str(local_segment_size))
#end check
for local_segments_to_upload_name,local_segments_to_upload_size in local_segments_to_upload_dict.items():
with open(temp_dir + local_segments_to_upload_name,'rb') as t :
print("Uploading Segment: " + local_segments_to_upload_name + " " + str(local_segments_to_upload_size))
swift_conn.put_object(swift_container + "_segments",(hash +"_xg10v10_" + hash_dir + "_xg10v10_" + str(size_limit_to_segment) + "/" + local_segments_to_upload_name),t,chunk_size=size_limit_reading_os)
large_segments_uploaded = True
if not large_manifest_created:
open(temp_dir + utility.file_only_name(file,utility.set_dash()) + "_manifest",'wb').close()
with open(temp_dir + utility.file_only_name(file,utility.set_dash()) + "_manifest",'rb') as t:
print("Creating Manifest")
swift_conn.put_object(swift_container,utility.dash_replace(prefix + file),t,headers={"X-Object-Manifest":swift_container + "_segments/" + hash +"_xg10v10_" + hash_dir + "_xg10v10_" + str(size_limit_to_segment) + "/"},chunk_size=size_limit_reading_os)
large_manifest_created = True
print("Deleting temporary data")
for i in range(counter):
os.remove(temp_dir + utility.file_only_name(file,utility.set_dash()) + "_" + str(format_numbers_for_large_files(str(i),len(str(math.ceil( (size/size_limit_to_segment) * 10 ))))))
os.remove(temp_dir + utility.file_only_name(file,utility.set_dash()) + "_manifest")
else:
print("Uploading File: " + utility.dash_replace(prefix + file) + " " + str(size) )
swift_conn.put_object(swift_container,utility.dash_replace(prefix + file),f,chunk_size=size_limit_reading_os)
remainingtoupload = remainingtoupload - size
print("Remaining to upload : " + str(remainingtoupload / 1000000) + " MB")
except Exception as e:
print("Exception during upload")
print(e)
time.sleep(1)
errors_upload = errors_upload + 1
swift_conn = authentication.set_authentication ()
if fail_tries_counter == fail_tries - 1 :
print("Maximum tries reached. Skipping upload of the file")
skipped_uploads = skipped_uploads + 1
else:
break
if encrypted:
os.remove(local_path_corrected + file)
if encrypted:
shutil.rmtree(temp_dir)
print("Upload Terminated : Remaining MB to upload " + str(remainingtoupload))
print("Errors during upload : " + str(errors_upload))
print("Skipped files during upload : " + str(skipped_uploads))
else:
print("Upload Disabled")
swift_conn.close()