309 lines
19 KiB
Python
309 lines
19 KiB
Python
__author__ = 'xgiovio'
|
|
|
|
import authentication
|
|
import utility
|
|
import os, sys,platform, math, hashlib,time
|
|
from utility import read_in_chunks
|
|
import shutil
|
|
|
|
def launch(localpath,temp_dir,swift_container,prefix,size_limit_to_segment,size_limit_reading_os,upload,uploadlarge,fail_tries ,md5_compare, encrypted,encrypt_key,excluded_patterns,copy_to_dir):
|
|
print ("Localpath " + localpath)
|
|
print ("Temppath " + temp_dir)
|
|
print ("Swift container " + swift_container)
|
|
print ("Swift prefix " + prefix)
|
|
print ("Segmentation Limit " + str(size_limit_to_segment))
|
|
print ("Os reading Limit " + str(size_limit_reading_os))
|
|
print ("Upload " + str(upload))
|
|
print ("Upload large " + str(uploadlarge))
|
|
print ("Fail tries " + str(fail_tries))
|
|
print ("MD5 Compare " + str(md5_compare))
|
|
print ("Encrypted " + str(encrypted))
|
|
if encrypted:
|
|
if copy_to_dir != None:
|
|
print("Encryption must be false if you are copying diff files to another dir")
|
|
sys.exit(-4)
|
|
if encrypt_key == None:
|
|
print("Error Bad Encryption Key")
|
|
sys.exit(-3)
|
|
else:
|
|
print ("Encrypted key " + "hidden")
|
|
if copy_to_dir != None:
|
|
print ("Copy to dir " + copy_to_dir)
|
|
print("___________")
|
|
|
|
|
|
|
|
#checks
|
|
utility.check_segments_size (size_limit_reading_os,size_limit_to_segment)
|
|
if utility.check_start_slash(swift_container) or utility.check_start_slash(prefix) or not utility.check_end_slash(localpath) or not utility.check_end_slash(temp_dir) or utility.check_end_slash(swift_container) or not utility.check_end_slash(prefix):
|
|
print ("Errors on local or remote paths. Checks \\ or / at the begin or end of each path")
|
|
sys.exit(-2)
|
|
localfiles = utility.get_local_files (localpath)
|
|
print ("Files locali " + str(len(localfiles)))
|
|
print("___________")
|
|
swift_conn = authentication.set_authentication ()
|
|
swift_conn,objects = utility.get_list(fail_tries,swift_conn,swift_container,prefix)
|
|
byte0real,byte0manifest,swift_conn,remotefiles,remotefiles_md5 = utility.list_compute_correct_size (fail_tries,objects,swift_conn,swift_container,prefix)
|
|
if encrypted:
|
|
remotefiles_encr = utility.list_compute_correct_names_for_enctyption(objects,prefix)
|
|
|
|
print ("Files remoti " + str(len(remotefiles)))
|
|
|
|
difffiles = {}
|
|
filterskipped = 0
|
|
sizetoupload=0
|
|
filestoupload = 0
|
|
largefilestoupload = 0
|
|
largesizetoupload= 0
|
|
encrypted_skipped_error = 0
|
|
skipped_large_files = 0
|
|
skipped_old_md5_large_files = 0
|
|
if encrypted and upload :
|
|
import utility_aes
|
|
if not os.path.isdir(temp_dir + "_xg10v10_encrypted"):
|
|
os.makedirs(temp_dir + "_xg10v10_encrypted")
|
|
temp_dir = temp_dir + "_xg10v10_encrypted" + utility.set_dash()
|
|
|
|
print("Comparing local files with remote ones")
|
|
if encrypted and upload:
|
|
print("Encrypting local files for upload")
|
|
for lname in localfiles.keys():
|
|
#print("Comparing " + lname)
|
|
upload_file = True
|
|
#80 is the maximum string length appended to an encrypted file name : "_xg10v10_fd3b094fd9c48c6ee288f58c991dec9d_xg10v10_999000000000_xg10v10_encrypted"
|
|
if not uploadlarge and localfiles[lname] > size_limit_to_segment:
|
|
upload_file = False
|
|
print("Skipped " + lname + " Upload of large files is disabled. Limit " + size_limit_to_segment)
|
|
skipped_large_files = skipped_large_files + 1
|
|
elif encrypted and (len(utility.file_only_name(lname,utility.set_dash())) + 80 ) > 255 :
|
|
print("Skipped " + lname + " encryption: The name is too long for the filesystem " + str(len(utility.file_only_name(lname,utility.set_dash())) + 80))
|
|
encrypted_skipped_error = encrypted_skipped_error + 1
|
|
upload_file = False
|
|
else:
|
|
for pattern in excluded_patterns:
|
|
if pattern in lname:
|
|
upload_file = False
|
|
print("Skipped " + lname + " due to filters : " + pattern )
|
|
filterskipped = filterskipped + 1
|
|
break
|
|
if upload_file:
|
|
if encrypted :
|
|
lnameenc = lname + "_xg10v10_encrypted"
|
|
if lnameenc not in remotefiles_encr.keys() or localfiles[lname] != int((remotefiles_encr[lnameenc]).split("_xg10v10_")[2]) or remotefiles[remotefiles_encr[lnameenc]] != utility.total_size_encrypted(localfiles[lname]) :
|
|
if upload:
|
|
with open(localpath + lname, 'rb') as f:
|
|
if not os.path.isdir(utility.folder_from_path(temp_dir + lname,utility.set_dash())[:-1]):
|
|
os.makedirs(utility.folder_from_path(temp_dir + lname,utility.set_dash())[:-1])
|
|
with open(temp_dir + lname, 'wb') as d:
|
|
aes = utility_aes.AESCipherFile(f,d, size_limit_reading_os, encrypt_key)
|
|
md5 = aes.encrypt()
|
|
if os.path.isfile(temp_dir + lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted"):
|
|
os.remove(temp_dir + lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted")
|
|
os.rename (temp_dir + lname, temp_dir + lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted")
|
|
difffiles[lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted"] = utility.total_size_encrypted(localfiles[lname])
|
|
else :
|
|
difffiles[lname + "_xg10v10_" + "md5_not_calculated" + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted"] = utility.total_size_encrypted(localfiles[lname])
|
|
sizetoupload = sizetoupload + utility.total_size_encrypted(localfiles[lname])
|
|
filestoupload = filestoupload + 1
|
|
if (localfiles[lname] > size_limit_to_segment):
|
|
largefilestoupload = largefilestoupload + 1
|
|
largesizetoupload = largesizetoupload + utility.total_size_encrypted(localfiles[lname])
|
|
elif md5_compare :
|
|
hash = hashlib.md5()
|
|
with open(localpath + lname, 'rb') as f:
|
|
for piece in read_in_chunks(f,size_limit_reading_os):
|
|
hash.update(piece)
|
|
if hash.hexdigest() != (remotefiles_encr[lnameenc]).split("_xg10v10_")[1]:
|
|
md5 = hash.hexdigest()
|
|
if upload:
|
|
with open(localpath + lname, 'rb') as f:
|
|
with open(temp_dir + lname, 'wb') as d:
|
|
aes = utility_aes.AESCipherFile(f,d, size_limit_reading_os, encrypt_key)
|
|
if os.path.isfile(temp_dir + lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted"):
|
|
os.remove(temp_dir + lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted")
|
|
os.rename (temp_dir + lname, temp_dir + lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted")
|
|
difffiles[lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted"] = utility.total_size_encrypted(localfiles[lname])
|
|
sizetoupload = sizetoupload + utility.total_size_encrypted(localfiles[lname])
|
|
filestoupload = filestoupload + 1
|
|
if (localfiles[lname] > size_limit_to_segment):
|
|
largefilestoupload = largefilestoupload + 1
|
|
largesizetoupload = largesizetoupload + utility.total_size_encrypted(localfiles[lname])
|
|
elif lname not in remotefiles.keys() or localfiles[lname] != remotefiles[lname]:
|
|
difffiles[lname] = localfiles[lname]
|
|
sizetoupload = sizetoupload + localfiles[lname]
|
|
filestoupload = filestoupload + 1
|
|
if (localfiles[lname] > size_limit_to_segment):
|
|
largefilestoupload = largefilestoupload + 1
|
|
largesizetoupload = largesizetoupload + localfiles[lname]
|
|
elif md5_compare :
|
|
#remotefiles_md5[lname]!= "0" are md5 correctly calculated remote files
|
|
if remotefiles_md5[lname]!= "0":
|
|
hash = hashlib.md5()
|
|
with open(localpath + lname, 'rb') as f:
|
|
for piece in read_in_chunks(f,size_limit_reading_os):
|
|
hash.update(piece)
|
|
if hash.hexdigest() != (remotefiles_md5[lname]):
|
|
difffiles[lname] = localfiles[lname]
|
|
sizetoupload = sizetoupload + localfiles[lname]
|
|
filestoupload = filestoupload + 1
|
|
if (localfiles[lname] > size_limit_to_segment):
|
|
largefilestoupload = largefilestoupload + 1
|
|
largesizetoupload = largesizetoupload + localfiles[lname]
|
|
else:
|
|
skipped_old_md5_large_files = skipped_old_md5_large_files + 1
|
|
print("___________Differenze___________")
|
|
print ("Files locali " + str(len(localfiles)))
|
|
print ("Skipped due to filters " + str(filterskipped))
|
|
if encrypted:
|
|
print ("Skipped due to errors during encrpytion phase. The name is too long for the filesystem " + str(encrypted_skipped_error))
|
|
if md5_compare:
|
|
print ("Skipped md5 comparison on old large files without md5 embedded in x-object-manifest: " + str(skipped_old_md5_large_files))
|
|
if not uploadlarge :
|
|
print ("Skipped large files because uploadlarge is disabled: " + str(skipped_large_files))
|
|
allskipped = len(localfiles) - filterskipped
|
|
if encrypted:
|
|
allskipped = allskipped - encrypted_skipped_error
|
|
if md5_compare:
|
|
allskipped = allskipped - skipped_old_md5_large_files
|
|
if not uploadlarge:
|
|
allskipped = allskipped - skipped_large_files
|
|
print ("Files locali - skipped " + str(allskipped))
|
|
print ("Files remoti " + str(len(remotefiles)))
|
|
print ("Files remoti 0byte reali " + str(byte0real))
|
|
print ("Files remoti 0byte large " + str(byte0manifest))
|
|
|
|
print("___________Files to upload______")
|
|
for files, sizes in difffiles.items():
|
|
print(files,sizes)
|
|
print("___________")
|
|
print ("MBytes to upload " + str(sizetoupload/1000000))
|
|
print ("Files to upload " + str(filestoupload))
|
|
if uploadlarge:
|
|
print ("Large files to upload " + str(largefilestoupload))
|
|
print ("MB of normal files to upload " + str((sizetoupload - largesizetoupload)/1000000 ))
|
|
if uploadlarge:
|
|
print ("MB of large files to upload " + str(largesizetoupload/1000000 ))
|
|
print("___________")
|
|
|
|
|
|
|
|
def format_numbers_for_large_files (input_string, width) :
|
|
ret = ""
|
|
for i in range(width - len(input_string)):
|
|
ret = ret + "0"
|
|
ret = ret + input_string
|
|
return ret
|
|
|
|
|
|
if copy_to_dir != None:
|
|
remainingtocopy = sizetoupload
|
|
for file, size in difffiles.items():
|
|
if not os.path.isdir(utility.folder_from_path(copy_to_dir + file,utility.set_dash())[:-1]):
|
|
os.makedirs(utility.folder_from_path(copy_to_dir + file,utility.set_dash())[:-1])
|
|
print("Copying : " + localpath + file)
|
|
with open(localpath + file, 'rb') as f:
|
|
with open(copy_to_dir + file, 'wb') as d:
|
|
for piece in read_in_chunks(f,size_limit_reading_os):
|
|
d.write(piece)
|
|
remainingtocopy = remainingtocopy - size
|
|
print("Remaining to copy : " + str(remainingtocopy / 1000000) + " MB")
|
|
print("Copy to dir terminated")
|
|
|
|
if upload :
|
|
remainingtoupload = sizetoupload
|
|
errors_upload = 0
|
|
skipped_uploads = 0
|
|
for file, size in difffiles.items():
|
|
hash_dir = hashlib.md5()
|
|
hash_dir.update((utility.folder_from_path(file,utility.set_dash())[:-1]).encode("utf-8"))
|
|
hash_dir = hash_dir.hexdigest()
|
|
if encrypted:
|
|
local_path_corrected =temp_dir
|
|
else:
|
|
local_path_corrected =localpath
|
|
with open(local_path_corrected + file, 'rb') as f:
|
|
large_segments_created = False
|
|
large_segments_uploaded = False
|
|
large_manifest_created = False
|
|
for fail_tries_counter in range (fail_tries) :
|
|
try:
|
|
if size > size_limit_to_segment:
|
|
print("Uploading Large File: " + utility.dash_replace(prefix + file) + " " + str(size) )
|
|
if not large_segments_created :
|
|
local_segments_dict = {}
|
|
local_segments_to_upload_dict = {}
|
|
counter = 0
|
|
hash = hashlib.md5()
|
|
bytes_written = 0
|
|
for piece in read_in_chunks(f,size_limit_reading_os):
|
|
hash.update(piece)
|
|
if bytes_written == 0:
|
|
t = open(temp_dir + utility.file_only_name(file,utility.set_dash()) + "_" + str(format_numbers_for_large_files(str(counter),len(str(math.ceil( (size/size_limit_to_segment) * 10 ))))),'wb')
|
|
if (bytes_written + len(piece) <= size_limit_to_segment):
|
|
t.write(piece)
|
|
bytes_written = bytes_written +len(piece)
|
|
local_segments_dict[utility.file_only_name(file,utility.set_dash()) + "_" + str(format_numbers_for_large_files(str(counter),len(str(math.ceil( (size/size_limit_to_segment) * 10 )))))] = bytes_written
|
|
if bytes_written == size_limit_to_segment:
|
|
bytes_written = 0
|
|
counter = counter + 1
|
|
t.close()
|
|
if bytes_written > 0 :
|
|
counter = counter + 1
|
|
hash = hash.hexdigest()
|
|
large_segments_created = True
|
|
# check if there are uploaded segments
|
|
if not large_segments_uploaded:
|
|
headers,remote_segments_list = swift_conn.get_container(swift_container + "_segments", prefix =hash + "_xg10v10_" + hash_dir + "_xg10v10_" + str(size_limit_to_segment) + "/",full_listing=True )
|
|
remote_segments_dict = {}
|
|
for o in remote_segments_list :
|
|
remote_segments_dict[o["name"].replace(hash +"_xg10v10_" + str(size_limit_to_segment) + "/","")] = o["bytes"]
|
|
for local_segment_name,local_segment_size in local_segments_dict.items() :
|
|
if (local_segment_name) not in remote_segments_dict.keys() or local_segment_size != remote_segments_dict[local_segment_name]:
|
|
local_segments_to_upload_dict[local_segment_name] = local_segment_size
|
|
else:
|
|
print ("Segment " + local_segment_name + " aready present " + str(local_segment_size))
|
|
#end check
|
|
for local_segments_to_upload_name,local_segments_to_upload_size in local_segments_to_upload_dict.items():
|
|
with open(temp_dir + local_segments_to_upload_name,'rb') as t :
|
|
print("Uploading Segment: " + local_segments_to_upload_name + " " + str(local_segments_to_upload_size))
|
|
swift_conn.put_object(swift_container + "_segments",(hash +"_xg10v10_" + hash_dir + "_xg10v10_" + str(size_limit_to_segment) + "/" + local_segments_to_upload_name),t,chunk_size=size_limit_reading_os)
|
|
large_segments_uploaded = True
|
|
if not large_manifest_created:
|
|
open(temp_dir + utility.file_only_name(file,utility.set_dash()) + "_manifest",'wb').close()
|
|
with open(temp_dir + utility.file_only_name(file,utility.set_dash()) + "_manifest",'rb') as t:
|
|
print("Creating Manifest")
|
|
swift_conn.put_object(swift_container,utility.dash_replace(prefix + file),t,headers={"X-Object-Manifest":swift_container + "_segments/" + hash +"_xg10v10_" + hash_dir + "_xg10v10_" + str(size_limit_to_segment) + "/"},chunk_size=size_limit_reading_os)
|
|
large_manifest_created = True
|
|
print("Deleting temporary data")
|
|
for i in range(counter):
|
|
os.remove(temp_dir + utility.file_only_name(file,utility.set_dash()) + "_" + str(format_numbers_for_large_files(str(i),len(str(math.ceil( (size/size_limit_to_segment) * 10 ))))))
|
|
os.remove(temp_dir + utility.file_only_name(file,utility.set_dash()) + "_manifest")
|
|
else:
|
|
print("Uploading File: " + utility.dash_replace(prefix + file) + " " + str(size) )
|
|
swift_conn.put_object(swift_container,utility.dash_replace(prefix + file),f,chunk_size=size_limit_reading_os)
|
|
remainingtoupload = remainingtoupload - size
|
|
print("Remaining to upload : " + str(remainingtoupload / 1000000) + " MB")
|
|
except Exception as e:
|
|
print("Exception during upload")
|
|
print(e)
|
|
time.sleep(1)
|
|
errors_upload = errors_upload + 1
|
|
swift_conn = authentication.set_authentication ()
|
|
if fail_tries_counter == fail_tries - 1 :
|
|
print("Maximum tries reached. Skipping upload of the file")
|
|
skipped_uploads = skipped_uploads + 1
|
|
else:
|
|
break
|
|
if encrypted:
|
|
os.remove(local_path_corrected + file)
|
|
if encrypted:
|
|
shutil.rmtree(temp_dir)
|
|
print("Upload Terminated : Remaining MB to upload " + str(remainingtoupload))
|
|
print("Errors during upload : " + str(errors_upload))
|
|
print("Skipped files during upload : " + str(skipped_uploads))
|
|
|
|
else:
|
|
print("Upload Disabled")
|
|
|
|
swift_conn.close()
|