From c87da373cd9865250edaa33152d676eea7a1c39c Mon Sep 17 00:00:00 2001 From: Giovanni Di Grezia Date: Tue, 1 Dec 2015 12:42:50 +0100 Subject: [PATCH] first release. readme will follow up --- authentication.py | 31 +++ generate_temp_url.py | 44 ++++ linux_launch_monitor.py | 87 +++++++ local_to_swift.py | 308 +++++++++++++++++++++++ settings_generate_temp_url.py | 13 + settings_local_to_swift.py | 45 ++++ settings_swift_delete_orphan_segments.py | 9 + settings_swift_to_local.py | 31 +++ swift_delete_orphan_segments.py | 89 +++++++ swift_to_local.py | 160 ++++++++++++ utility.py | 222 ++++++++++++++++ utility_aes.py | 102 ++++++++ 12 files changed, 1141 insertions(+) create mode 100644 authentication.py create mode 100644 generate_temp_url.py create mode 100644 linux_launch_monitor.py create mode 100644 local_to_swift.py create mode 100644 settings_generate_temp_url.py create mode 100644 settings_local_to_swift.py create mode 100644 settings_swift_delete_orphan_segments.py create mode 100644 settings_swift_to_local.py create mode 100644 swift_delete_orphan_segments.py create mode 100644 swift_to_local.py create mode 100644 utility.py create mode 100644 utility_aes.py diff --git a/authentication.py b/authentication.py new file mode 100644 index 0000000..5e0a240 --- /dev/null +++ b/authentication.py @@ -0,0 +1,31 @@ +__author__ = 'xgiovio' + +#######################################authentication +authentication="v1" +#######################################types of authentication +#authentication="v1" +swift_user = "########" +swift_pass = "########" +swift_auth = "https://www.########" +#authentication="pre" +url = "https://########/v1/AUTH_###############" +tok ="########" +#######################################end authentication +timeout = 3 #sec +insecure = False + + +import swiftclient + +def set_authentication (): + if authentication=="v1": + swift_conn = swiftclient.client.Connection(authurl=swift_auth, user=swift_user, key=swift_pass, timeout = timeout,insecure = insecure) + print("Using v1 authentication") + elif authentication=="pre": + swift_conn = swiftclient.client.Connection(preauthurl= url,preauthtoken=tok,timeout = timeout,insecure = insecure) + print("Using pre authentication") + else: + #v1 again + swift_conn = swiftclient.client.Connection(authurl=swift_auth, user=swift_user, key=swift_pass,timeout = timeout,insecure = insecure) + print("Using v1/alternative authentication") + return swift_conn diff --git a/generate_temp_url.py b/generate_temp_url.py new file mode 100644 index 0000000..f8784f9 --- /dev/null +++ b/generate_temp_url.py @@ -0,0 +1,44 @@ +__author__ = 'xgiovio' + +import authentication, swiftclient +import hmac,sys +from hashlib import sha1 +from time import time + +def launch(secretkey,set_secretkey,create_temp_url,duration_in_seconds,objectpath,fail_tries): + + swift_conn = authentication.set_authentication () + storageurl,_ = swift_conn.get_auth() + #print(storageurl) + for fail_tries_counter in range (fail_tries) : + try: + if set_secretkey: + swift_conn.post_account({"x-account-meta-temp-url-key":secretkey}) + else: + headers = swift_conn.head_account() + secretkey = headers['x-account-meta-temp-url-key'] + except Exception as e: + print("Exception during setting / getting the secret key.") + print(e) + time.sleep(1) + if fail_tries_counter == fail_tries - 1 : + print("Maximum tries reached. Exiting.") + sys.exit(-1) + else: + swift_conn = authentication.set_authentication () + else : + break + print("Secretkey " + secretkey) + if create_temp_url : + storageurl = storageurl.replace("https://","") + method = 'GET' + expires = int(time() + duration_in_seconds) + path = "/" + storageurl.split("/")[1] + "/" + storageurl.split("/")[2] + "/" + objectpath + key = secretkey + hmac_body = '%s\n%s\n%s' % (method, expires, path) + sig = hmac.new(key, hmac_body, sha1).hexdigest() + s = 'https://{host}{path}?temp_url_sig={sig}&temp_url_expires={expires}' + url = s.format(host=storageurl.split("/")[0], path=path, sig=sig, expires=expires) + + print(url) + diff --git a/linux_launch_monitor.py b/linux_launch_monitor.py new file mode 100644 index 0000000..092840d --- /dev/null +++ b/linux_launch_monitor.py @@ -0,0 +1,87 @@ + +import os +os.environ['COLUMNS'] = "1024" +import time +import subprocess +import sys + +python3binpath = "/########/python3" +scriptpath = "/########.py" +outpath = "out_monitor.txt" +time_sleep_creation = 10 #sec +base_time_sleep_active_def = 60 #sec +time_sleep_inactive = 15 #sec +max_time_sleep_active = 600 #sec +time_sleep_exception = 300 #sec + + +def split_and_remove_null_strings (inputstr): + to_ret =[] + for s in inputstr.split(" "): + if s != "": + to_ret.append(s) + return to_ret + +def get_cpu_util(pattern) : + out = subprocess.check_output(["top", "-b","-n", "1"]) + out = out.decode("utf-8") + #print(out) + + out = out.split("\n") + row= "" + for s in out: + if pattern in s: + row =s + if row == "": + return [0, 0] + row = split_and_remove_null_strings (row) + print (row) + pid = row[0] + cpu = row[-6] + return [int(pid), float(cpu)] +poll=0 +p=None +base_time_sleep_active = base_time_sleep_active_def +try: + while True: + pid,cpu = get_cpu_util (scriptpath) + if pid == 0: + print ("Starting " + scriptpath) + p = subprocess.Popen(['nohup', python3binpath, "-u", scriptpath,">" + outpath, '&']) + time.sleep(time_sleep_creation) + counter = 0 + while counter < 10: + pid,cpu = get_cpu_util (scriptpath) + if pid == 0 or (p and p.poll()): + print(scriptpath + " completed. Exiting") + poll=1 + sys.exit(0) + if cpu > 0: + print ("Process active" ) + time.sleep(base_time_sleep_active) + base_time_sleep_active = min(base_time_sleep_active + 30, max_time_sleep_active ) + counter = 0 + else: + print ("Process inactive, attempt " + str(counter) ) + time.sleep(time_sleep_inactive) + counter = counter + 1 + base_time_sleep_active = base_time_sleep_active_def + print ("Killing") + out = subprocess.call(["kill", "-9", str(pid)]) +except: + while True : + print("Monitor Exception") + if p and not p.poll() and poll==0: + print("Waiting " + scriptpath + " to finish") + time.sleep(time_sleep_exception) + else: + if not p: + print(scriptpath + " not launched by monitor. Exiting") + else: + print(scriptpath + " completed. Exiting") + sys.exit(-1) + + + + + diff --git a/local_to_swift.py b/local_to_swift.py new file mode 100644 index 0000000..b137d82 --- /dev/null +++ b/local_to_swift.py @@ -0,0 +1,308 @@ +__author__ = 'xgiovio' + +import authentication +import utility +import os, sys,platform, math, hashlib,time +from utility import read_in_chunks +import shutil + +def launch(localpath,temp_dir,swift_container,prefix,size_limit_to_segment,size_limit_reading_os,upload,uploadlarge,fail_tries ,md5_compare, encrypted,encrypt_key,excluded_patterns,copy_to_dir): + print ("Localpath " + localpath) + print ("Temppath " + temp_dir) + print ("Swift container " + swift_container) + print ("Swift prefix " + prefix) + print ("Segmentation Limit " + str(size_limit_to_segment)) + print ("Os reading Limit " + str(size_limit_reading_os)) + print ("Upload " + str(upload)) + print ("Upload large " + str(uploadlarge)) + print ("Fail tries " + str(fail_tries)) + print ("MD5 Compare " + str(md5_compare)) + print ("Encrypted " + str(encrypted)) + if encrypted: + if copy_to_dir != None: + print("Encryption must be false if you are copying diff files to another dir") + sys.exit(-4) + if encrypt_key == None: + print("Error Bad Encryption Key") + sys.exit(-3) + else: + print ("Encrypted key " + "hidden") + if copy_to_dir != None: + print ("Copy to dir " + copy_to_dir) + print("___________") + + + + #checks + utility.check_segments_size (size_limit_reading_os,size_limit_to_segment) + if utility.check_start_slash(swift_container) or utility.check_start_slash(prefix) or not utility.check_end_slash(localpath) or not utility.check_end_slash(temp_dir) or utility.check_end_slash(swift_container) or not utility.check_end_slash(prefix): + print ("Errors on local or remote paths. Checks \\ or / at the begin or end of each path") + sys.exit(-2) + localfiles = utility.get_local_files (localpath) + print ("Files locali " + str(len(localfiles))) + print("___________") + swift_conn = authentication.set_authentication () + swift_conn,objects = utility.get_list(fail_tries,swift_conn,swift_container,prefix) + byte0real,byte0manifest,swift_conn,remotefiles,remotefiles_md5 = utility.list_compute_correct_size (fail_tries,objects,swift_conn,swift_container,prefix) + if encrypted: + remotefiles_encr = utility.list_compute_correct_names_for_enctyption(objects,prefix) + + print ("Files remoti " + str(len(remotefiles))) + + difffiles = {} + filterskipped = 0 + sizetoupload=0 + filestoupload = 0 + largefilestoupload = 0 + largesizetoupload= 0 + encrypted_skipped_error = 0 + skipped_large_files = 0 + skipped_old_md5_large_files = 0 + if encrypted and upload : + import utility_aes + if not os.path.isdir(temp_dir + "_xg10v10_encrypted"): + os.makedirs(temp_dir + "_xg10v10_encrypted") + temp_dir = temp_dir + "_xg10v10_encrypted" + utility.set_dash() + + print("Comparing local files with remote ones") + if encrypted and upload: + print("Encrypting local files for upload") + for lname in localfiles.keys(): + #print("Comparing " + lname) + upload_file = True + #80 is the maximum string length appended to an encrypted file name : "_xg10v10_fd3b094fd9c48c6ee288f58c991dec9d_xg10v10_999000000000_xg10v10_encrypted" + if not uploadlarge and localfiles[lname] > size_limit_to_segment: + upload_file = False + print("Skipped " + lname + " Upload of large files is disabled. Limit " + size_limit_to_segment) + skipped_large_files = skipped_large_files + 1 + elif encrypted and (len(utility.file_only_name(lname,utility.set_dash())) + 80 ) > 255 : + print("Skipped " + lname + " encryption: The name is too long for the filesystem " + str(len(utility.file_only_name(lname,utility.set_dash())) + 80)) + encrypted_skipped_error = encrypted_skipped_error + 1 + upload_file = False + else: + for pattern in excluded_patterns: + if pattern in lname: + upload_file = False + print("Skipped " + lname + " due to filters : " + pattern ) + filterskipped = filterskipped + 1 + break + if upload_file: + if encrypted : + lnameenc = lname + "_xg10v10_encrypted" + if lnameenc not in remotefiles_encr.keys() or localfiles[lname] != int((remotefiles_encr[lnameenc]).split("_xg10v10_")[2]) or remotefiles[remotefiles_encr[lnameenc]] != utility.total_size_encrypted(localfiles[lname]) : + if upload: + with open(localpath + lname, 'rb') as f: + if not os.path.isdir(utility.folder_from_path(temp_dir + lname,utility.set_dash())[:-1]): + os.makedirs(utility.folder_from_path(temp_dir + lname,utility.set_dash())[:-1]) + with open(temp_dir + lname, 'wb') as d: + aes = utility_aes.AESCipherFile(f,d, size_limit_reading_os, encrypt_key) + md5 = aes.encrypt() + if os.path.isfile(temp_dir + lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted"): + os.remove(temp_dir + lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted") + os.rename (temp_dir + lname, temp_dir + lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted") + difffiles[lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted"] = utility.total_size_encrypted(localfiles[lname]) + else : + difffiles[lname + "_xg10v10_" + "md5_not_calculated" + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted"] = utility.total_size_encrypted(localfiles[lname]) + sizetoupload = sizetoupload + utility.total_size_encrypted(localfiles[lname]) + filestoupload = filestoupload + 1 + if (localfiles[lname] > size_limit_to_segment): + largefilestoupload = largefilestoupload + 1 + largesizetoupload = largesizetoupload + utility.total_size_encrypted(localfiles[lname]) + elif md5_compare : + hash = hashlib.md5() + with open(localpath + lname, 'rb') as f: + for piece in read_in_chunks(f,size_limit_reading_os): + hash.update(piece) + if hash.hexdigest() != (remotefiles_encr[lnameenc]).split("_xg10v10_")[1]: + md5 = hash.hexdigest() + if upload: + with open(localpath + lname, 'rb') as f: + with open(temp_dir + lname, 'wb') as d: + aes = utility_aes.AESCipherFile(f,d, size_limit_reading_os, encrypt_key) + if os.path.isfile(temp_dir + lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted"): + os.remove(temp_dir + lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted") + os.rename (temp_dir + lname, temp_dir + lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted") + difffiles[lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted"] = utility.total_size_encrypted(localfiles[lname]) + sizetoupload = sizetoupload + utility.total_size_encrypted(localfiles[lname]) + filestoupload = filestoupload + 1 + if (localfiles[lname] > size_limit_to_segment): + largefilestoupload = largefilestoupload + 1 + largesizetoupload = largesizetoupload + utility.total_size_encrypted(localfiles[lname]) + elif lname not in remotefiles.keys() or localfiles[lname] != remotefiles[lname]: + difffiles[lname] = localfiles[lname] + sizetoupload = sizetoupload + localfiles[lname] + filestoupload = filestoupload + 1 + if (localfiles[lname] > size_limit_to_segment): + largefilestoupload = largefilestoupload + 1 + largesizetoupload = largesizetoupload + localfiles[lname] + elif md5_compare : + #remotefiles_md5[lname]!= "0" are md5 correctly calculated remote files + if remotefiles_md5[lname]!= "0": + hash = hashlib.md5() + with open(localpath + lname, 'rb') as f: + for piece in read_in_chunks(f,size_limit_reading_os): + hash.update(piece) + if hash.hexdigest() != (remotefiles_md5[lname]): + difffiles[lname] = localfiles[lname] + sizetoupload = sizetoupload + localfiles[lname] + filestoupload = filestoupload + 1 + if (localfiles[lname] > size_limit_to_segment): + largefilestoupload = largefilestoupload + 1 + largesizetoupload = largesizetoupload + localfiles[lname] + else: + skipped_old_md5_large_files = skipped_old_md5_large_files + 1 + print("___________Differenze___________") + print ("Files locali " + str(len(localfiles))) + print ("Skipped due to filters " + str(filterskipped)) + if encrypted: + print ("Skipped due to errors during encrpytion phase. The name is too long for the filesystem " + str(encrypted_skipped_error)) + if md5_compare: + print ("Skipped md5 comparison on old large files without md5 embedded in x-object-manifest: " + str(skipped_old_md5_large_files)) + if not uploadlarge : + print ("Skipped large files because uploadlarge is disabled: " + str(skipped_large_files)) + allskipped = len(localfiles) - filterskipped + if encrypted: + allskipped = allskipped - encrypted_skipped_error + if md5_compare: + allskipped = allskipped - skipped_old_md5_large_files + if not uploadlarge: + allskipped = allskipped - skipped_large_files + print ("Files locali - skipped " + str(allskipped)) + print ("Files remoti " + str(len(remotefiles))) + print ("Files remoti 0byte reali " + str(byte0real)) + print ("Files remoti 0byte large " + str(byte0manifest)) + + print("___________Files to upload______") + for files, sizes in difffiles.items(): + print(files,sizes) + print("___________") + print ("MBytes to upload " + str(sizetoupload/1000000)) + print ("Files to upload " + str(filestoupload)) + if uploadlarge: + print ("Large files to upload " + str(largefilestoupload)) + print ("MB of normal files to upload " + str((sizetoupload - largesizetoupload)/1000000 )) + if uploadlarge: + print ("MB of large files to upload " + str(largesizetoupload/1000000 )) + print("___________") + + + + def format_numbers_for_large_files (input_string, width) : + ret = "" + for i in range(width - len(input_string)): + ret = ret + "0" + ret = ret + input_string + return ret + + + if copy_to_dir != None: + remainingtocopy = sizetoupload + for file, size in difffiles.items(): + if not os.path.isdir(utility.folder_from_path(copy_to_dir + file,utility.set_dash())[:-1]): + os.makedirs(utility.folder_from_path(copy_to_dir + file,utility.set_dash())[:-1]) + print("Copying : " + localpath + file) + with open(localpath + file, 'rb') as f: + with open(copy_to_dir + file, 'wb') as d: + for piece in read_in_chunks(f,size_limit_reading_os): + d.write(piece) + remainingtocopy = remainingtocopy - size + print("Remaining to copy : " + str(remainingtocopy / 1000000) + " MB") + print("Copy to dir terminated") + + if upload : + remainingtoupload = sizetoupload + errors_upload = 0 + skipped_uploads = 0 + for file, size in difffiles.items(): + hash_dir = hashlib.md5() + hash_dir.update((utility.folder_from_path(file,utility.set_dash())[:-1]).encode("utf-8")) + hash_dir = hash_dir.hexdigest() + if encrypted: + local_path_corrected =temp_dir + else: + local_path_corrected =localpath + with open(local_path_corrected + file, 'rb') as f: + large_segments_created = False + large_segments_uploaded = False + large_manifest_created = False + for fail_tries_counter in range (fail_tries) : + try: + if size > size_limit_to_segment: + print("Uploading Large File: " + utility.dash_replace(prefix + file) + " " + str(size) ) + if not large_segments_created : + local_segments_dict = {} + local_segments_to_upload_dict = {} + counter = 0 + hash = hashlib.md5() + bytes_written = 0 + for piece in read_in_chunks(f,size_limit_reading_os): + hash.update(piece) + if bytes_written == 0: + t = open(temp_dir + utility.file_only_name(file,utility.set_dash()) + "_" + str(format_numbers_for_large_files(str(counter),len(str(math.ceil( (size/size_limit_to_segment) * 10 ))))),'wb') + if (bytes_written + len(piece) <= size_limit_to_segment): + t.write(piece) + bytes_written = bytes_written +len(piece) + local_segments_dict[utility.file_only_name(file,utility.set_dash()) + "_" + str(format_numbers_for_large_files(str(counter),len(str(math.ceil( (size/size_limit_to_segment) * 10 )))))] = bytes_written + if bytes_written == size_limit_to_segment: + bytes_written = 0 + counter = counter + 1 + t.close() + if bytes_written > 0 : + counter = counter + 1 + hash = hash.hexdigest() + large_segments_created = True + # check if there are uploaded segments + if not large_segments_uploaded: + headers,remote_segments_list = swift_conn.get_container(swift_container + "_segments", prefix =hash + "_xg10v10_" + hash_dir + "_xg10v10_" + str(size_limit_to_segment) + "/",full_listing=True ) + remote_segments_dict = {} + for o in remote_segments_list : + remote_segments_dict[o["name"].replace(hash +"_xg10v10_" + str(size_limit_to_segment) + "/","")] = o["bytes"] + for local_segment_name,local_segment_size in local_segments_dict.items() : + if (local_segment_name) not in remote_segments_dict.keys() or local_segment_size != remote_segments_dict[local_segment_name]: + local_segments_to_upload_dict[local_segment_name] = local_segment_size + else: + print ("Segment " + local_segment_name + " aready present " + str(local_segment_size)) + #end check + for local_segments_to_upload_name,local_segments_to_upload_size in local_segments_to_upload_dict.items(): + with open(temp_dir + local_segments_to_upload_name,'rb') as t : + print("Uploading Segment: " + local_segments_to_upload_name + " " + str(local_segments_to_upload_size)) + swift_conn.put_object(swift_container + "_segments",(hash +"_xg10v10_" + hash_dir + "_xg10v10_" + str(size_limit_to_segment) + "/" + local_segments_to_upload_name),t,chunk_size=size_limit_reading_os) + large_segments_uploaded = True + if not large_manifest_created: + open(temp_dir + utility.file_only_name(file,utility.set_dash()) + "_manifest",'wb').close() + with open(temp_dir + utility.file_only_name(file,utility.set_dash()) + "_manifest",'rb') as t: + print("Creating Manifest") + swift_conn.put_object(swift_container,utility.dash_replace(prefix + file),t,headers={"X-Object-Manifest":swift_container + "_segments/" + hash +"_xg10v10_" + hash_dir + "_xg10v10_" + str(size_limit_to_segment) + "/"},chunk_size=size_limit_reading_os) + large_manifest_created = True + print("Deleting temporary data") + for i in range(counter): + os.remove(temp_dir + utility.file_only_name(file,utility.set_dash()) + "_" + str(format_numbers_for_large_files(str(i),len(str(math.ceil( (size/size_limit_to_segment) * 10 )))))) + os.remove(temp_dir + utility.file_only_name(file,utility.set_dash()) + "_manifest") + else: + print("Uploading File: " + utility.dash_replace(prefix + file) + " " + str(size) ) + swift_conn.put_object(swift_container,utility.dash_replace(prefix + file),f,chunk_size=size_limit_reading_os) + remainingtoupload = remainingtoupload - size + print("Remaining to upload : " + str(remainingtoupload / 1000000) + " MB") + except Exception as e: + print("Exception during upload") + print(e) + time.sleep(1) + errors_upload = errors_upload + 1 + swift_conn = authentication.set_authentication () + if fail_tries_counter == fail_tries - 1 : + print("Maximum tries reached. Skipping upload of the file") + skipped_uploads = skipped_uploads + 1 + else: + break + if encrypted: + os.remove(local_path_corrected + file) + if encrypted: + shutil.rmtree(temp_dir) + print("Upload Terminated : Remaining MB to upload " + str(remainingtoupload)) + print("Errors during upload : " + str(errors_upload)) + print("Skipped files during upload : " + str(skipped_uploads)) + + else: + print("Upload Disabled") + + swift_conn.close() diff --git a/settings_generate_temp_url.py b/settings_generate_temp_url.py new file mode 100644 index 0000000..bdc1a92 --- /dev/null +++ b/settings_generate_temp_url.py @@ -0,0 +1,13 @@ +__author__ = 'xgiovio' + +import generate_temp_url + +secretkey ="" +set_secretkey = False +create_temp_url = True +duration_in_seconds = 60*60*24*7 +objectpath = "/########" # container/object +fail_tries = 100 + + +generate_temp_url.launch(secretkey,set_secretkey,create_temp_url,duration_in_seconds,objectpath,fail_tries) \ No newline at end of file diff --git a/settings_local_to_swift.py b/settings_local_to_swift.py new file mode 100644 index 0000000..e24c207 --- /dev/null +++ b/settings_local_to_swift.py @@ -0,0 +1,45 @@ +__author__ = 'xgiovio' + +import local_to_swift +size_limit_to_segment = 2147483648 #must be a power of 2 # def 2147483648 +# 2MB 2097152 +# 4MB 4194304 +# 8MB 8388608 +# 128MB 134217728 +# 256MB 268435456 +# 512MB 536870912 +# 1GB 1073741824 +# 2GB 2147483648 +# 4GB 4294967296 +size_limit_reading_os = 134217728 #must be a power of 2 and smaller/equal than size_limit_to_segment # def 134217728 +# 64k 65536 +# 128k 131072 +# 256k 262144 +# 512k 524288 +# 1MB 1048576 +# 2MB 2097152 +# 4MB 4194304 +# 8MB 8388608 +# 128MB 134217728 +# 256MB 268435456 +# 512MB 536870912 +# 1GB 1073741824 +# 2GB 2147483648 +# 4GB 4294967296 +upload = False +enableLarge = True +fail_tries = 100 +temp_path = "\\\\?\\" + "c:\\temp\\" +excluded_patterns = ["Thumbs.db",".DS_Store","_gsdata_","__MACOSX", "desktop.ini","@eaDir"] +batch = [ + #source, swift container, swift prefix, md5 comparison enabled?, encrypted?, encryption_key, additional_excluded_patterns,copy_to_dir + ["\\\\?\\" + "c:\\orig\\","default","prefix/",False,False,None,[],None] , + ["\\\\?\\" + "c:\\orig2\\","default","prefix/",False,False,None,[],None] , + #["\\\\?\\" + "c:\\orig3\\","default","prefix/",False,False,None,[],None] , + + ] + +############################################################################################### +for job in batch: + #local folder,temp path, swift container, swift prefix, size to segment, size reading limit os, upload enabled?. upload large enabled? , fail tries, md5 comparison enabled?, encrypted?, encryption_key,additional_excluded_patterns,copy_to_dir + local_to_swift.launch(job[0],temp_path,job[1],job[2],size_limit_to_segment,size_limit_reading_os,upload,enableLarge,fail_tries, job[3],job[4],job[5], job[6] + excluded_patterns,job[7]) \ No newline at end of file diff --git a/settings_swift_delete_orphan_segments.py b/settings_swift_delete_orphan_segments.py new file mode 100644 index 0000000..57d5c03 --- /dev/null +++ b/settings_swift_delete_orphan_segments.py @@ -0,0 +1,9 @@ +__author__ = 'xgiovio' + +import swift_delete_orphan_segments +#swift +swift_container = "####" # container +delete = False +fail_tries = 100 + +swift_delete_orphan_segments.launch(swift_container,delete,fail_tries) diff --git a/settings_swift_to_local.py b/settings_swift_to_local.py new file mode 100644 index 0000000..ae037c1 --- /dev/null +++ b/settings_swift_to_local.py @@ -0,0 +1,31 @@ +__author__ = 'xgiovio' + +import swift_to_local +size_limit_reading_os = 134217728 #must be a power of 2 and smaller/equal than size_limit_to_segment # def 134217728 +# 64k 65536 +# 128k 131072 +# 256k 262144 +# 512k 524288 +# 1MB 1048576 +# 2MB 2097152 +# 4MB 4194304 +# 8MB 8388608 +# 128MB 134217728 +# 256MB 268435456 +# 512MB 536870912 +# 1GB 1073741824 +# 2GB 2147483648 +# 4GB 4294967296 +download = True +fail_tries = 1000000 +excluded_patterns = ["Thumbs.db",".DS_Store","_gsdata_","__MACOSX", "desktop.ini","@eaDir"] +batch = [ + #source, swift container, swift prefix, md5 comparison enabled?, encrypted?, encryption_key, additional_excluded_patterns + ["\\\\?\\" + "C:\\test\\","default","test/",False,True,"pass",[]] , + + ] + +############################################################################################### +for job in batch: + #local folder,temp path, swift container, swift prefix, size to segment, size reading limit os, upload enabled?. upload large enabled? , fail tries, md5 comparison enabled?, encrypted?, encryption_key + swift_to_local.launch(job[0],job[1],job[2],size_limit_reading_os,download,fail_tries, job[3],job[4],job[5], job[6] + excluded_patterns) \ No newline at end of file diff --git a/swift_delete_orphan_segments.py b/swift_delete_orphan_segments.py new file mode 100644 index 0000000..6f7aff1 --- /dev/null +++ b/swift_delete_orphan_segments.py @@ -0,0 +1,89 @@ +__author__ = 'xgiovio' + + + +import authentication, utility + +def launch(swift_container,delete,fail_tries): + print ("Swift container " + swift_container) + print("___________") + + swift_conn = authentication.set_authentication () + + listfoldermanifest=[] + #all files on the container + swift_conn,container_objects_raw = utility.get_list(fail_tries,swift_conn,swift_container,"") + #get dict name:manifesturl only for large files + byte0manifest,swift_conn,container_objects_manifest = utility.list_compute_manifest (fail_tries,container_objects_raw,swift_conn,swift_container,"") + for name,manifest in container_objects_manifest.items(): + listfoldermanifest.append(manifest) + ''' + for o in listfoldermanifest: + print(o) + ''' + #_____________________________________________________________________________________________________________________ + #______________________________Folders with segments + listfoldersegmentsprefix = [] + + #______________________________Folders with segments : container: swift_container +"_segments" + + #all files on container_segments -> they are only segments + swift_conn,container_segments_objects_raw = utility.get_list(fail_tries,swift_conn,swift_container +"_segments","") + #get dict name:size + container_segments_objects = utility.list (container_segments_objects_raw,"") + #get list only with folders path name from segments + listfolder = [] + for name in container_segments_objects.keys(): + if utility.folder_from_path(name,"/") not in listfolder : + listfolder.append(utility.folder_from_path(name,"/")) + for o in listfolder: + listfoldersegmentsprefix.append(swift_container +"_segments/" + o) + + #______________________________Folders with segments : container: swift_container path: @SynologyCloudSync/ + #all files @SynologyCloudSync/ on container -> they are only segments + #get dict name:size + container_segments_objects = utility.filter_list_begin(container_objects_raw,"@SynologyCloudSync/","") + #get list only with folders path name from segments + listfolder = [] + for name in container_segments_objects.keys(): + if utility.folder_from_path(name,"/") not in listfolder : + listfolder.append(utility.folder_from_path(name,"/")) + for o in listfolder: + listfoldersegmentsprefix.append(swift_container +"/" + o) + + + #______________________________Folders with segments "!CB_" + #all files !CB_ of CloudBerry on container -> they are only segments + #get dict name:size + container_segments_objects = utility.search_list(container_objects_raw,"!CB_","") + #get list only with folders path name from segments + listfolder = [] + for name in container_segments_objects.keys(): + if utility.folder_from_path(name,"_") not in listfolder : + listfolder.append(utility.folder_from_path(name,"_")) + for o in listfolder: + listfoldersegmentsprefix.append(swift_container +"/" + o) + + ''' + for o in listfoldersegmentsprefix: + print(o) + ''' + #_____________________________________________________________________________________________________________________ + + segments_to_delete = [] + segments_not_listed = [] + for manifesturlsegments in listfoldersegmentsprefix : + if manifesturlsegments not in listfoldermanifest: + segments_to_delete.append(manifesturlsegments) + + for manifest in segments_to_delete: + print("Segments folder to delete: " + manifest) + + for manifesturl in listfoldermanifest : + if manifesturl not in listfoldersegmentsprefix: + segments_not_listed.append(manifesturl) + + for manifest in segments_not_listed: + print("Segments not present in given segments list : " + manifest) + + swift_conn.close() diff --git a/swift_to_local.py b/swift_to_local.py new file mode 100644 index 0000000..c7f049c --- /dev/null +++ b/swift_to_local.py @@ -0,0 +1,160 @@ +__author__ = 'xgiovio' + +import authentication +import utility +import os, sys,platform, hashlib,time +from utility import read_in_chunks + +def launch(localpath,swift_container,prefix,size_limit_reading_os,download,fail_tries ,md5_compare, encrypted,encrypt_key,excluded_patterns): + print ("Localpath " + localpath) + print ("Swift container " + swift_container) + print ("Swift prefix " + prefix) + print ("Os reading Limit " + str(size_limit_reading_os)) + print ("Fail tries " + str(fail_tries)) + print ("Download " + str(download)) + print ("MD5 Compare " + str(md5_compare)) + print ("Encrypted " + str(encrypted)) + if encrypted: + if encrypt_key == None: + print("Error Bad Encryption Key") + sys.exit(-3) + else: + print ("Encrypted key " + "hidden") + print("___________") + + + + #checks + utility.check_segments_size_single (size_limit_reading_os) + if utility.check_start_slash(swift_container) or utility.check_start_slash(prefix) or not utility.check_end_slash(localpath) or utility.check_end_slash(swift_container) or not utility.check_end_slash(prefix): + print ("Errors on local or remote paths. Checks \\ or / at the begin or end of each path") + sys.exit(-2) + localfiles = utility.get_local_files (localpath) + print ("Files locali " + str(len(localfiles))) + print("___________") + swift_conn = authentication.set_authentication () + swift_conn,objects = utility.get_list(fail_tries,swift_conn,swift_container,prefix) + byte0real,byte0manifest,swift_conn,remotefiles,remotefiles_md5 = utility.list_compute_correct_size (fail_tries,objects,swift_conn,swift_container,prefix) + + print ("Files remoti " + str(len(remotefiles))) + + difffiles = {} + skipped = 0 + sizetodownload=0 + filestodownload = 0 + skipped_old_md5_large_files = 0 + if encrypted and download : + import utility_aes + for rname in remotefiles.keys(): + download_file = True + for pattern in excluded_patterns: + if pattern in rname: + download_file = False + break + if download_file: + #print("Comparing " + lname) + if encrypted and rname.endswith("_xg10v10_encrypted"): + rnamedec = rname.split("_xg10v10_")[0] + rnamesize = rname.split("_xg10v10_")[2] + rnamemd5 = rname.split("_xg10v10_")[1] + if rnamedec not in localfiles.keys() or localfiles[rnamedec] != int(rnamesize) : + difffiles[rname] = remotefiles[rname] + sizetodownload = sizetodownload + remotefiles[rname] + filestodownload = filestodownload + 1 + elif md5_compare : + hash = hashlib.md5() + with open(localpath + rnamedec, 'rb') as f: + for piece in read_in_chunks(f,size_limit_reading_os): + hash.update(piece) + if hash.hexdigest() != rnamemd5: + difffiles[rname] = remotefiles[rname] + sizetodownload = sizetodownload + remotefiles[rname] + filestodownload = filestodownload + 1 + else: + if rname not in localfiles.keys() or localfiles[rname] != remotefiles[rname] : + difffiles[rname] = remotefiles[rname] + sizetodownload = sizetodownload + remotefiles[rname] + filestodownload = filestodownload + 1 + elif md5_compare : + if remotefiles_md5[rname] != "0": + hash = hashlib.md5() + with open(localpath + rname, 'rb') as f: + for piece in read_in_chunks(f,size_limit_reading_os): + hash.update(piece) + if hash.hexdigest() != remotefiles_md5[rname] : + difffiles[rname] = remotefiles[rname] + sizetodownload = sizetodownload + remotefiles[rname] + filestodownload = filestodownload + 1 + else: + skipped_old_md5_large_files = skipped_old_md5_large_files + 1 + else: + print("Skipped " + rname + " due to filters" ) + skipped = skipped + 1 + + print("___________Differenze___________") + print ("Files locali " + str(len(localfiles))) + print ("Skipped due to filters " + str(skipped)) + if md5_compare: + print ("Skipped md5 comparison on old large files without md5 embedded in x-object-manifest: " + str(skipped_old_md5_large_files)) + print ("Files remoti " + str(len(remotefiles))) + print ("Files remoti - skipped " + str(len(remotefiles) - skipped)) + print ("Files remoti 0byte reali " + str(byte0real)) + print ("Files remoti 0byte large " + str(byte0manifest)) + print("___________Files to download______") + for files, sizes in difffiles.items(): + if encrypted and files.endswith("_xg10v10_encrypted"): + print(files.split("_xg10v10_")[0],sizes) + else: + print(files,sizes) + print("___________") + print ("MBytes to download " + str(sizetodownload/1000000)) + print ("Files to download " + str(filestodownload)) + print("___________") + + remainingtodownload = sizetodownload + if download : + errors_download = 0 + skipped_downloads = 0 + for file, size in difffiles.items(): + for fail_tries_counter in range (fail_tries) : + try: + print("Downloading File: " + utility.dash_replace(prefix + file) + " " + str(size) ) + f = swift_conn.get_object(swift_container,utility.dash_replace(prefix + file),resp_chunk_size=size_limit_reading_os) + if not os.path.isdir(utility.folder_from_path(localpath + file,utility.set_dash())[:-1]): + os.makedirs(utility.folder_from_path(localpath + file,utility.set_dash())[:-1]) + with open( localpath + file,'wb') as t: + while True: + try: + t.write(f[1].next()) + except StopIteration: + break + if encrypted and file.endswith("_xg10v10_encrypted"): + rnamedec = file.split("_xg10v10_")[0] + with open(localpath + file, 'rb') as f: + with open(localpath + rnamedec, 'wb') as d: + aes = utility_aes.AESCipherFile(f,d, size_limit_reading_os, encrypt_key) + aes.decrypt() + os.remove(localpath + file) + print("Decrypted") + remainingtodownload = remainingtodownload - size + print("Remaining to download : " + str(remainingtodownload / 1000000) + " MB") + except Exception as e: + print("Exception during download") + print(e) + time.sleep(1) + errors_download = errors_download + 1 + swift_conn = authentication.set_authentication () + if fail_tries_counter == fail_tries - 1 : + print("Maximum tries reached. Skipping download of the file") + skipped_downloads = skipped_downloads + 1 + else: + break + print("___________") + print("Download Terminated : Remaining MB to download " + str(remainingtodownload)) + print("Errors during download : " + str(errors_download)) + print("Skipped files during download : " + str(skipped_downloads)) + + else: + print("Download Disabled") + + swift_conn.close() diff --git a/utility.py b/utility.py new file mode 100644 index 0000000..9450497 --- /dev/null +++ b/utility.py @@ -0,0 +1,222 @@ +__author__ = 'xgiovio' + +import authentication +import sys, platform,os +import time + + +def get_local_files (localpath): + print("Building local files list") + localfiles = {} + for root, dirs, files in os.walk(localpath, topdown=True): + for name in files: + localfiles[os.path.join(root.replace(localpath,""), name)]=int(os.stat(os.path.join(root, name)).st_size) + + #skip dirs + ''' + for name in dirs: + print(os.path.join(root, name)) + ''' + #end skip + return localfiles + +def get_list (fail_tries,swift_conn,swift_container,prefix) : + for fail_tries_counter in range (fail_tries) : + try: + print ("Downloading remote list for " + swift_container + " with prefix " +prefix + " ... ") + headers,objects = swift_conn.get_container(swift_container, prefix =prefix,full_listing=True ) + except Exception as e: + print("Exception during the download of remote list") + print(e) + time.sleep(1) + if fail_tries_counter == fail_tries - 1 : + print("Maximum tries reached. Can't download remote list for container " + swift_container + " with prefix " +prefix +". Exiting.") + sys.exit(-1) + else: + swift_conn = authentication.set_authentication () + else : + break + return [swift_conn,objects] + +def filter_list_begin(objects,beginpattern,prefix): + filtered = {} + for o in objects : + if o["content_type"] != "application/directory": + if (o["name"]).find(beginpattern)== 0: + filtered[(o["name"].replace(prefix,""))]=int(o["bytes"]) + return filtered + +def search_list (objects,pattern,prefix): + filtered = {} + for o in objects : + if o["content_type"] != "application/directory": + if pattern in o["name"]: + filtered[(o["name"].replace(prefix,""))]=int(o["bytes"]) + return filtered + + +def list_compute_correct_size (fail_tries,objects,swift_conn,swift_container,prefix): + + remotefiles = {} + remotefiles_md5 = {} + byte0real = 0 + byte0manifest = 0 + + for o in objects : + if o["content_type"] != "application/directory": + if int(o["bytes"]) == 0 : + print ("Requesting metadata for 0byte file " + o["name"] ) + for fail_tries_counter in range (fail_tries) : + try: + oheaders = swift_conn.head_object(swift_container,o["name"]) + except Exception as e: + print("Exception during the request of metadata") + print(e) + time.sleep(1) + if fail_tries_counter == fail_tries - 1 : + print("Maximum tries reached. Can't download sizes for all large files on container " + swift_container + " with prefix: " +prefix +". Exiting.") + sys.exit(-1) + else: + swift_conn = authentication.set_authentication () + else : + break + if "x-object-manifest" in oheaders.keys(): + print ("0byte file " + o["name"] + " e' un large file" ) + o["bytes"] = oheaders["content-length"] + if "_xg10v10_" in oheaders["x-object-manifest"]: + split1 = oheaders["x-object-manifest"].split("_xg10v10_")[0] + split2 = split1.split("/")[1] + o["hash"] = split2 + else: + print("Impossible to get remote large file md5. Cause: Not uploaded with xgiovio method (md5 in x-object-manifest)") + o["hash"] = "0" + byte0manifest = byte0manifest + 1 + else: + print ("0byte file " + o["name"] + " e' un file normale" ) + byte0real = byte0real + 1 + remotefiles[remote_dash_replace(o["name"].replace(prefix,""))]=int(o["bytes"]) + remotefiles_md5[remote_dash_replace(o["name"].replace(prefix,""))]=o["hash"] + + + return [byte0real,byte0manifest,swift_conn,remotefiles,remotefiles_md5] + + + +def list_compute_correct_names_for_enctyption (objects,prefix): + + remotefiles = {} + + for o in objects : + if o["content_type"] != "application/directory": + full = remote_dash_replace(o["name"].replace(prefix,"")) + if (full.endswith("_xg10v10_encrypted")): + encrypted_name_only = full.split("_xg10v10_")[0] + "_xg10v10_encrypted" + else: + encrypted_name_only = full + remotefiles[encrypted_name_only] = full + + return remotefiles + + +def list_compute_manifest (fail_tries,objects,swift_conn,swift_container,prefix): + + remotefiles = {} + byte0manifest = 0 + + for o in objects : + if o["content_type"] != "application/directory": + if int(o["bytes"]) == 0 : + print ("Requesting metadata for 0byte file " + o["name"] ) + for fail_tries_counter in range (fail_tries) : + try: + oheaders = swift_conn.head_object(swift_container,o["name"]) + except Exception as e: + print("Exception during the request of metadata") + print(e) + time.sleep(1) + if fail_tries_counter == fail_tries - 1 : + print("Maximum tries reached. Can't download sizes for all large files on container " + swift_container + " with prefix: " +prefix +". Exiting.") + sys.exit(-1) + else: + swift_conn = authentication.set_authentication () + else : + break + if "x-object-manifest" in oheaders.keys(): + byte0manifest = byte0manifest + 1 + remotefiles[(o["name"].replace(prefix,""))]=oheaders["x-object-manifest"] + + return [byte0manifest,swift_conn,remotefiles] + +def list (objects,prefix): + + remotefiles = {} + for o in objects : + if o["content_type"] != "application/directory": + remotefiles[(o["name"].replace(prefix,""))]=int(o["bytes"]) + + return remotefiles + +def listall (objects,prefix): + + remotefiles = {} + for o in objects : + remotefiles[(o["name"].replace(prefix,""))]=int(o["bytes"]) + return remotefiles + +def check_segments_size (size_limit_reading_os,size_limit_to_segment): + if size_limit_reading_os%2 != 0 or size_limit_to_segment%2 != 0 or size_limit_reading_os > size_limit_to_segment or size_limit_reading_os < 16: + print ("Error size segments") + sys.exit(-2) + +def check_segments_size_single (size_limit_reading_os): + if size_limit_reading_os%2 != 0 or size_limit_reading_os < 16: + print ("Error size segments") + sys.exit(-2) + +def file_only_name (stringa,delimiter): + return stringa.split(delimiter)[len(stringa.split(delimiter))-1] + +def folder_from_path (stringa,delimiter): + string_to_ret ="" + for i in range(len(stringa.split(delimiter)) - 1): + string_to_ret = string_to_ret + stringa.split(delimiter)[i] + delimiter + return string_to_ret + +def check_end_slash (stringa) : + lastchar = stringa[len(stringa) - 1] + if lastchar == "\\" or lastchar == "/" : + return True + return False +def check_start_slash (stringa) : + firstchar = stringa[0] + if firstchar == "\\" or firstchar == "/" : + return True + return False + +def read_in_chunks(file_object, chunk_size): + while True: + data = file_object.read(chunk_size) + if not data: + break + yield data + +def total_size_encrypted (original_size) : + if original_size % 16 == 0: + return original_size + 17 + return original_size + ( 16 - original_size % 16) + 17 + + +def dash_replace (string): + if platform.system() == "Windows": + return string.replace("\\","/") + return string + +def remote_dash_replace (string): + if platform.system() == "Windows": + return string.replace("/","\\") + return string + +def set_dash(): + if platform.system() == "Windows": + return "\\" + return "/" diff --git a/utility_aes.py b/utility_aes.py new file mode 100644 index 0000000..286ecb8 --- /dev/null +++ b/utility_aes.py @@ -0,0 +1,102 @@ +import hashlib,os +from utility import read_in_chunks + +try: + pycrypto = True + from Crypto.Cipher import AES + from Crypto import Random + print("Using PyCrypto for AES enc/dec") +except ImportError: + pycrypto = False + import pyaes + print("Using Pyaes for AES enc/dec") + + + +class AESCipherFile: + def __init__( self ,f,d, size_limit_reading_os, key ): + h = hashlib.sha256() + h.update(key.encode("utf_8")) + self.key = h.digest() + self.f = f + self.d = d + self.size_limit_reading_os = size_limit_reading_os + + def pad(self,b): + b = bytearray(b) + fill = 16 - len(b) % 16 + for i in range(fill): + b.append(fill) + return bytes(b) + + def unpad(self,b) : + return b[:-b[len(b)-1]] + + + def encrypt( self ): + if pycrypto: + iv = Random.new().read( 16 ) + aes_encrypt_state = AES.new( self.key, AES.MODE_CBC, iv ) + else: + iv = os.urandom(16) + aes_encrypt_state = pyaes.AESModeOfOperationCBC(self.key, iv = iv) + #check if the file is a multiple of 16 bytes for padding + self.f.seek(0,2) + if self.f.tell() % 16 == 0: + self.d.write(bytes(b"1")) + else: + self.d.write(bytes(b"0")) + self.f.seek(0,0) + #end check + self.d.write(iv) + hash = hashlib.md5() + for piece in read_in_chunks(self.f,self.size_limit_reading_os): + hash.update(piece) + if len(piece) % 16 > 0 : + piece = self.pad(piece) + steps = int(len(piece) / 16) + for i in range(steps): + self.d.write(aes_encrypt_state.encrypt( piece[16 * i : 16 * (i + 1)] )) + return hash.hexdigest() + + def decrypt( self ): + perfect = False + perfectbyte = self.f.read(1) + if perfectbyte == bytes(b"1"): + perfect = True + if pycrypto: + aes_decrypt_state = AES.new( self.key, AES.MODE_CBC, self.f.read(16) ) + else: + aes_decrypt_state = pyaes.AESModeOfOperationCBC(self.key, self.f.read(16)) + last_16_bytes_in_piece = None + for piece in read_in_chunks(self.f,self.size_limit_reading_os): + if last_16_bytes_in_piece != None: + self.d.write(aes_decrypt_state.decrypt(last_16_bytes_in_piece)) + steps = int(len(piece) / 16) + if steps > 1 : + for i in range(steps - 1): + self.d.write(aes_decrypt_state.decrypt( piece[16 * i : 16 * (i + 1)] )) + i = i + 1 + else: + i = 0 + last_16_bytes_in_piece = aes_decrypt_state.decrypt( piece[16 * i : 16 * (i + 1)] ) + if last_16_bytes_in_piece != None: + if perfect: + self.d.write(last_16_bytes_in_piece) + else: + self.d.write(self.unpad(last_16_bytes_in_piece)) + + + +''' usage +with open("c:\\test\\file", 'rb') as f: + with open("c:\\test\\file_enc", 'wb') as d: + aes = AESCipherFile(f,d,134217728,"chiave") + aes.encrypt() +with open("c:\\test\\file_enc", 'rb') as f: + with open("c:\\test\\file", 'wb') as d: + aes = AESCipherFile(f,d,134217728,"chiave") + aes.decrypt() +''' + +