first release. readme will follow up

This commit is contained in:
2015-12-01 12:42:50 +01:00
commit c87da373cd
12 changed files with 1141 additions and 0 deletions

31
authentication.py Normal file
View File

@@ -0,0 +1,31 @@
__author__ = 'xgiovio'
#######################################authentication
authentication="v1"
#######################################types of authentication
#authentication="v1"
swift_user = "########"
swift_pass = "########"
swift_auth = "https://www.########"
#authentication="pre"
url = "https://########/v1/AUTH_###############"
tok ="########"
#######################################end authentication
timeout = 3 #sec
insecure = False
import swiftclient
def set_authentication ():
if authentication=="v1":
swift_conn = swiftclient.client.Connection(authurl=swift_auth, user=swift_user, key=swift_pass, timeout = timeout,insecure = insecure)
print("Using v1 authentication")
elif authentication=="pre":
swift_conn = swiftclient.client.Connection(preauthurl= url,preauthtoken=tok,timeout = timeout,insecure = insecure)
print("Using pre authentication")
else:
#v1 again
swift_conn = swiftclient.client.Connection(authurl=swift_auth, user=swift_user, key=swift_pass,timeout = timeout,insecure = insecure)
print("Using v1/alternative authentication")
return swift_conn

44
generate_temp_url.py Normal file
View File

@@ -0,0 +1,44 @@
__author__ = 'xgiovio'
import authentication, swiftclient
import hmac,sys
from hashlib import sha1
from time import time
def launch(secretkey,set_secretkey,create_temp_url,duration_in_seconds,objectpath,fail_tries):
swift_conn = authentication.set_authentication ()
storageurl,_ = swift_conn.get_auth()
#print(storageurl)
for fail_tries_counter in range (fail_tries) :
try:
if set_secretkey:
swift_conn.post_account({"x-account-meta-temp-url-key":secretkey})
else:
headers = swift_conn.head_account()
secretkey = headers['x-account-meta-temp-url-key']
except Exception as e:
print("Exception during setting / getting the secret key.")
print(e)
time.sleep(1)
if fail_tries_counter == fail_tries - 1 :
print("Maximum tries reached. Exiting.")
sys.exit(-1)
else:
swift_conn = authentication.set_authentication ()
else :
break
print("Secretkey " + secretkey)
if create_temp_url :
storageurl = storageurl.replace("https://","")
method = 'GET'
expires = int(time() + duration_in_seconds)
path = "/" + storageurl.split("/")[1] + "/" + storageurl.split("/")[2] + "/" + objectpath
key = secretkey
hmac_body = '%s\n%s\n%s' % (method, expires, path)
sig = hmac.new(key, hmac_body, sha1).hexdigest()
s = 'https://{host}{path}?temp_url_sig={sig}&temp_url_expires={expires}'
url = s.format(host=storageurl.split("/")[0], path=path, sig=sig, expires=expires)
print(url)

87
linux_launch_monitor.py Normal file
View File

@@ -0,0 +1,87 @@
import os
os.environ['COLUMNS'] = "1024"
import time
import subprocess
import sys
python3binpath = "/########/python3"
scriptpath = "/########.py"
outpath = "out_monitor.txt"
time_sleep_creation = 10 #sec
base_time_sleep_active_def = 60 #sec
time_sleep_inactive = 15 #sec
max_time_sleep_active = 600 #sec
time_sleep_exception = 300 #sec
def split_and_remove_null_strings (inputstr):
to_ret =[]
for s in inputstr.split(" "):
if s != "":
to_ret.append(s)
return to_ret
def get_cpu_util(pattern) :
out = subprocess.check_output(["top", "-b","-n", "1"])
out = out.decode("utf-8")
#print(out)
out = out.split("\n")
row= ""
for s in out:
if pattern in s:
row =s
if row == "":
return [0, 0]
row = split_and_remove_null_strings (row)
print (row)
pid = row[0]
cpu = row[-6]
return [int(pid), float(cpu)]
poll=0
p=None
base_time_sleep_active = base_time_sleep_active_def
try:
while True:
pid,cpu = get_cpu_util (scriptpath)
if pid == 0:
print ("Starting " + scriptpath)
p = subprocess.Popen(['nohup', python3binpath, "-u", scriptpath,">" + outpath, '&'])
time.sleep(time_sleep_creation)
counter = 0
while counter < 10:
pid,cpu = get_cpu_util (scriptpath)
if pid == 0 or (p and p.poll()):
print(scriptpath + " completed. Exiting")
poll=1
sys.exit(0)
if cpu > 0:
print ("Process active" )
time.sleep(base_time_sleep_active)
base_time_sleep_active = min(base_time_sleep_active + 30, max_time_sleep_active )
counter = 0
else:
print ("Process inactive, attempt " + str(counter) )
time.sleep(time_sleep_inactive)
counter = counter + 1
base_time_sleep_active = base_time_sleep_active_def
print ("Killing")
out = subprocess.call(["kill", "-9", str(pid)])
except:
while True :
print("Monitor Exception")
if p and not p.poll() and poll==0:
print("Waiting " + scriptpath + " to finish")
time.sleep(time_sleep_exception)
else:
if not p:
print(scriptpath + " not launched by monitor. Exiting")
else:
print(scriptpath + " completed. Exiting")
sys.exit(-1)

308
local_to_swift.py Normal file
View File

@@ -0,0 +1,308 @@
__author__ = 'xgiovio'
import authentication
import utility
import os, sys,platform, math, hashlib,time
from utility import read_in_chunks
import shutil
def launch(localpath,temp_dir,swift_container,prefix,size_limit_to_segment,size_limit_reading_os,upload,uploadlarge,fail_tries ,md5_compare, encrypted,encrypt_key,excluded_patterns,copy_to_dir):
print ("Localpath " + localpath)
print ("Temppath " + temp_dir)
print ("Swift container " + swift_container)
print ("Swift prefix " + prefix)
print ("Segmentation Limit " + str(size_limit_to_segment))
print ("Os reading Limit " + str(size_limit_reading_os))
print ("Upload " + str(upload))
print ("Upload large " + str(uploadlarge))
print ("Fail tries " + str(fail_tries))
print ("MD5 Compare " + str(md5_compare))
print ("Encrypted " + str(encrypted))
if encrypted:
if copy_to_dir != None:
print("Encryption must be false if you are copying diff files to another dir")
sys.exit(-4)
if encrypt_key == None:
print("Error Bad Encryption Key")
sys.exit(-3)
else:
print ("Encrypted key " + "hidden")
if copy_to_dir != None:
print ("Copy to dir " + copy_to_dir)
print("___________")
#checks
utility.check_segments_size (size_limit_reading_os,size_limit_to_segment)
if utility.check_start_slash(swift_container) or utility.check_start_slash(prefix) or not utility.check_end_slash(localpath) or not utility.check_end_slash(temp_dir) or utility.check_end_slash(swift_container) or not utility.check_end_slash(prefix):
print ("Errors on local or remote paths. Checks \\ or / at the begin or end of each path")
sys.exit(-2)
localfiles = utility.get_local_files (localpath)
print ("Files locali " + str(len(localfiles)))
print("___________")
swift_conn = authentication.set_authentication ()
swift_conn,objects = utility.get_list(fail_tries,swift_conn,swift_container,prefix)
byte0real,byte0manifest,swift_conn,remotefiles,remotefiles_md5 = utility.list_compute_correct_size (fail_tries,objects,swift_conn,swift_container,prefix)
if encrypted:
remotefiles_encr = utility.list_compute_correct_names_for_enctyption(objects,prefix)
print ("Files remoti " + str(len(remotefiles)))
difffiles = {}
filterskipped = 0
sizetoupload=0
filestoupload = 0
largefilestoupload = 0
largesizetoupload= 0
encrypted_skipped_error = 0
skipped_large_files = 0
skipped_old_md5_large_files = 0
if encrypted and upload :
import utility_aes
if not os.path.isdir(temp_dir + "_xg10v10_encrypted"):
os.makedirs(temp_dir + "_xg10v10_encrypted")
temp_dir = temp_dir + "_xg10v10_encrypted" + utility.set_dash()
print("Comparing local files with remote ones")
if encrypted and upload:
print("Encrypting local files for upload")
for lname in localfiles.keys():
#print("Comparing " + lname)
upload_file = True
#80 is the maximum string length appended to an encrypted file name : "_xg10v10_fd3b094fd9c48c6ee288f58c991dec9d_xg10v10_999000000000_xg10v10_encrypted"
if not uploadlarge and localfiles[lname] > size_limit_to_segment:
upload_file = False
print("Skipped " + lname + " Upload of large files is disabled. Limit " + size_limit_to_segment)
skipped_large_files = skipped_large_files + 1
elif encrypted and (len(utility.file_only_name(lname,utility.set_dash())) + 80 ) > 255 :
print("Skipped " + lname + " encryption: The name is too long for the filesystem " + str(len(utility.file_only_name(lname,utility.set_dash())) + 80))
encrypted_skipped_error = encrypted_skipped_error + 1
upload_file = False
else:
for pattern in excluded_patterns:
if pattern in lname:
upload_file = False
print("Skipped " + lname + " due to filters : " + pattern )
filterskipped = filterskipped + 1
break
if upload_file:
if encrypted :
lnameenc = lname + "_xg10v10_encrypted"
if lnameenc not in remotefiles_encr.keys() or localfiles[lname] != int((remotefiles_encr[lnameenc]).split("_xg10v10_")[2]) or remotefiles[remotefiles_encr[lnameenc]] != utility.total_size_encrypted(localfiles[lname]) :
if upload:
with open(localpath + lname, 'rb') as f:
if not os.path.isdir(utility.folder_from_path(temp_dir + lname,utility.set_dash())[:-1]):
os.makedirs(utility.folder_from_path(temp_dir + lname,utility.set_dash())[:-1])
with open(temp_dir + lname, 'wb') as d:
aes = utility_aes.AESCipherFile(f,d, size_limit_reading_os, encrypt_key)
md5 = aes.encrypt()
if os.path.isfile(temp_dir + lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted"):
os.remove(temp_dir + lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted")
os.rename (temp_dir + lname, temp_dir + lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted")
difffiles[lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted"] = utility.total_size_encrypted(localfiles[lname])
else :
difffiles[lname + "_xg10v10_" + "md5_not_calculated" + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted"] = utility.total_size_encrypted(localfiles[lname])
sizetoupload = sizetoupload + utility.total_size_encrypted(localfiles[lname])
filestoupload = filestoupload + 1
if (localfiles[lname] > size_limit_to_segment):
largefilestoupload = largefilestoupload + 1
largesizetoupload = largesizetoupload + utility.total_size_encrypted(localfiles[lname])
elif md5_compare :
hash = hashlib.md5()
with open(localpath + lname, 'rb') as f:
for piece in read_in_chunks(f,size_limit_reading_os):
hash.update(piece)
if hash.hexdigest() != (remotefiles_encr[lnameenc]).split("_xg10v10_")[1]:
md5 = hash.hexdigest()
if upload:
with open(localpath + lname, 'rb') as f:
with open(temp_dir + lname, 'wb') as d:
aes = utility_aes.AESCipherFile(f,d, size_limit_reading_os, encrypt_key)
if os.path.isfile(temp_dir + lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted"):
os.remove(temp_dir + lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted")
os.rename (temp_dir + lname, temp_dir + lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted")
difffiles[lname + "_xg10v10_" + md5 + "_xg10v10_" + str(localfiles[lname]) + "_xg10v10_encrypted"] = utility.total_size_encrypted(localfiles[lname])
sizetoupload = sizetoupload + utility.total_size_encrypted(localfiles[lname])
filestoupload = filestoupload + 1
if (localfiles[lname] > size_limit_to_segment):
largefilestoupload = largefilestoupload + 1
largesizetoupload = largesizetoupload + utility.total_size_encrypted(localfiles[lname])
elif lname not in remotefiles.keys() or localfiles[lname] != remotefiles[lname]:
difffiles[lname] = localfiles[lname]
sizetoupload = sizetoupload + localfiles[lname]
filestoupload = filestoupload + 1
if (localfiles[lname] > size_limit_to_segment):
largefilestoupload = largefilestoupload + 1
largesizetoupload = largesizetoupload + localfiles[lname]
elif md5_compare :
#remotefiles_md5[lname]!= "0" are md5 correctly calculated remote files
if remotefiles_md5[lname]!= "0":
hash = hashlib.md5()
with open(localpath + lname, 'rb') as f:
for piece in read_in_chunks(f,size_limit_reading_os):
hash.update(piece)
if hash.hexdigest() != (remotefiles_md5[lname]):
difffiles[lname] = localfiles[lname]
sizetoupload = sizetoupload + localfiles[lname]
filestoupload = filestoupload + 1
if (localfiles[lname] > size_limit_to_segment):
largefilestoupload = largefilestoupload + 1
largesizetoupload = largesizetoupload + localfiles[lname]
else:
skipped_old_md5_large_files = skipped_old_md5_large_files + 1
print("___________Differenze___________")
print ("Files locali " + str(len(localfiles)))
print ("Skipped due to filters " + str(filterskipped))
if encrypted:
print ("Skipped due to errors during encrpytion phase. The name is too long for the filesystem " + str(encrypted_skipped_error))
if md5_compare:
print ("Skipped md5 comparison on old large files without md5 embedded in x-object-manifest: " + str(skipped_old_md5_large_files))
if not uploadlarge :
print ("Skipped large files because uploadlarge is disabled: " + str(skipped_large_files))
allskipped = len(localfiles) - filterskipped
if encrypted:
allskipped = allskipped - encrypted_skipped_error
if md5_compare:
allskipped = allskipped - skipped_old_md5_large_files
if not uploadlarge:
allskipped = allskipped - skipped_large_files
print ("Files locali - skipped " + str(allskipped))
print ("Files remoti " + str(len(remotefiles)))
print ("Files remoti 0byte reali " + str(byte0real))
print ("Files remoti 0byte large " + str(byte0manifest))
print("___________Files to upload______")
for files, sizes in difffiles.items():
print(files,sizes)
print("___________")
print ("MBytes to upload " + str(sizetoupload/1000000))
print ("Files to upload " + str(filestoupload))
if uploadlarge:
print ("Large files to upload " + str(largefilestoupload))
print ("MB of normal files to upload " + str((sizetoupload - largesizetoupload)/1000000 ))
if uploadlarge:
print ("MB of large files to upload " + str(largesizetoupload/1000000 ))
print("___________")
def format_numbers_for_large_files (input_string, width) :
ret = ""
for i in range(width - len(input_string)):
ret = ret + "0"
ret = ret + input_string
return ret
if copy_to_dir != None:
remainingtocopy = sizetoupload
for file, size in difffiles.items():
if not os.path.isdir(utility.folder_from_path(copy_to_dir + file,utility.set_dash())[:-1]):
os.makedirs(utility.folder_from_path(copy_to_dir + file,utility.set_dash())[:-1])
print("Copying : " + localpath + file)
with open(localpath + file, 'rb') as f:
with open(copy_to_dir + file, 'wb') as d:
for piece in read_in_chunks(f,size_limit_reading_os):
d.write(piece)
remainingtocopy = remainingtocopy - size
print("Remaining to copy : " + str(remainingtocopy / 1000000) + " MB")
print("Copy to dir terminated")
if upload :
remainingtoupload = sizetoupload
errors_upload = 0
skipped_uploads = 0
for file, size in difffiles.items():
hash_dir = hashlib.md5()
hash_dir.update((utility.folder_from_path(file,utility.set_dash())[:-1]).encode("utf-8"))
hash_dir = hash_dir.hexdigest()
if encrypted:
local_path_corrected =temp_dir
else:
local_path_corrected =localpath
with open(local_path_corrected + file, 'rb') as f:
large_segments_created = False
large_segments_uploaded = False
large_manifest_created = False
for fail_tries_counter in range (fail_tries) :
try:
if size > size_limit_to_segment:
print("Uploading Large File: " + utility.dash_replace(prefix + file) + " " + str(size) )
if not large_segments_created :
local_segments_dict = {}
local_segments_to_upload_dict = {}
counter = 0
hash = hashlib.md5()
bytes_written = 0
for piece in read_in_chunks(f,size_limit_reading_os):
hash.update(piece)
if bytes_written == 0:
t = open(temp_dir + utility.file_only_name(file,utility.set_dash()) + "_" + str(format_numbers_for_large_files(str(counter),len(str(math.ceil( (size/size_limit_to_segment) * 10 ))))),'wb')
if (bytes_written + len(piece) <= size_limit_to_segment):
t.write(piece)
bytes_written = bytes_written +len(piece)
local_segments_dict[utility.file_only_name(file,utility.set_dash()) + "_" + str(format_numbers_for_large_files(str(counter),len(str(math.ceil( (size/size_limit_to_segment) * 10 )))))] = bytes_written
if bytes_written == size_limit_to_segment:
bytes_written = 0
counter = counter + 1
t.close()
if bytes_written > 0 :
counter = counter + 1
hash = hash.hexdigest()
large_segments_created = True
# check if there are uploaded segments
if not large_segments_uploaded:
headers,remote_segments_list = swift_conn.get_container(swift_container + "_segments", prefix =hash + "_xg10v10_" + hash_dir + "_xg10v10_" + str(size_limit_to_segment) + "/",full_listing=True )
remote_segments_dict = {}
for o in remote_segments_list :
remote_segments_dict[o["name"].replace(hash +"_xg10v10_" + str(size_limit_to_segment) + "/","")] = o["bytes"]
for local_segment_name,local_segment_size in local_segments_dict.items() :
if (local_segment_name) not in remote_segments_dict.keys() or local_segment_size != remote_segments_dict[local_segment_name]:
local_segments_to_upload_dict[local_segment_name] = local_segment_size
else:
print ("Segment " + local_segment_name + " aready present " + str(local_segment_size))
#end check
for local_segments_to_upload_name,local_segments_to_upload_size in local_segments_to_upload_dict.items():
with open(temp_dir + local_segments_to_upload_name,'rb') as t :
print("Uploading Segment: " + local_segments_to_upload_name + " " + str(local_segments_to_upload_size))
swift_conn.put_object(swift_container + "_segments",(hash +"_xg10v10_" + hash_dir + "_xg10v10_" + str(size_limit_to_segment) + "/" + local_segments_to_upload_name),t,chunk_size=size_limit_reading_os)
large_segments_uploaded = True
if not large_manifest_created:
open(temp_dir + utility.file_only_name(file,utility.set_dash()) + "_manifest",'wb').close()
with open(temp_dir + utility.file_only_name(file,utility.set_dash()) + "_manifest",'rb') as t:
print("Creating Manifest")
swift_conn.put_object(swift_container,utility.dash_replace(prefix + file),t,headers={"X-Object-Manifest":swift_container + "_segments/" + hash +"_xg10v10_" + hash_dir + "_xg10v10_" + str(size_limit_to_segment) + "/"},chunk_size=size_limit_reading_os)
large_manifest_created = True
print("Deleting temporary data")
for i in range(counter):
os.remove(temp_dir + utility.file_only_name(file,utility.set_dash()) + "_" + str(format_numbers_for_large_files(str(i),len(str(math.ceil( (size/size_limit_to_segment) * 10 ))))))
os.remove(temp_dir + utility.file_only_name(file,utility.set_dash()) + "_manifest")
else:
print("Uploading File: " + utility.dash_replace(prefix + file) + " " + str(size) )
swift_conn.put_object(swift_container,utility.dash_replace(prefix + file),f,chunk_size=size_limit_reading_os)
remainingtoupload = remainingtoupload - size
print("Remaining to upload : " + str(remainingtoupload / 1000000) + " MB")
except Exception as e:
print("Exception during upload")
print(e)
time.sleep(1)
errors_upload = errors_upload + 1
swift_conn = authentication.set_authentication ()
if fail_tries_counter == fail_tries - 1 :
print("Maximum tries reached. Skipping upload of the file")
skipped_uploads = skipped_uploads + 1
else:
break
if encrypted:
os.remove(local_path_corrected + file)
if encrypted:
shutil.rmtree(temp_dir)
print("Upload Terminated : Remaining MB to upload " + str(remainingtoupload))
print("Errors during upload : " + str(errors_upload))
print("Skipped files during upload : " + str(skipped_uploads))
else:
print("Upload Disabled")
swift_conn.close()

View File

@@ -0,0 +1,13 @@
__author__ = 'xgiovio'
import generate_temp_url
secretkey =""
set_secretkey = False
create_temp_url = True
duration_in_seconds = 60*60*24*7
objectpath = "/########" # container/object
fail_tries = 100
generate_temp_url.launch(secretkey,set_secretkey,create_temp_url,duration_in_seconds,objectpath,fail_tries)

View File

@@ -0,0 +1,45 @@
__author__ = 'xgiovio'
import local_to_swift
size_limit_to_segment = 2147483648 #must be a power of 2 # def 2147483648
# 2MB 2097152
# 4MB 4194304
# 8MB 8388608
# 128MB 134217728
# 256MB 268435456
# 512MB 536870912
# 1GB 1073741824
# 2GB 2147483648
# 4GB 4294967296
size_limit_reading_os = 134217728 #must be a power of 2 and smaller/equal than size_limit_to_segment # def 134217728
# 64k 65536
# 128k 131072
# 256k 262144
# 512k 524288
# 1MB 1048576
# 2MB 2097152
# 4MB 4194304
# 8MB 8388608
# 128MB 134217728
# 256MB 268435456
# 512MB 536870912
# 1GB 1073741824
# 2GB 2147483648
# 4GB 4294967296
upload = False
enableLarge = True
fail_tries = 100
temp_path = "\\\\?\\" + "c:\\temp\\"
excluded_patterns = ["Thumbs.db",".DS_Store","_gsdata_","__MACOSX", "desktop.ini","@eaDir"]
batch = [
#source, swift container, swift prefix, md5 comparison enabled?, encrypted?, encryption_key, additional_excluded_patterns,copy_to_dir
["\\\\?\\" + "c:\\orig\\","default","prefix/",False,False,None,[],None] ,
["\\\\?\\" + "c:\\orig2\\","default","prefix/",False,False,None,[],None] ,
#["\\\\?\\" + "c:\\orig3\\","default","prefix/",False,False,None,[],None] ,
]
###############################################################################################
for job in batch:
#local folder,temp path, swift container, swift prefix, size to segment, size reading limit os, upload enabled?. upload large enabled? , fail tries, md5 comparison enabled?, encrypted?, encryption_key,additional_excluded_patterns,copy_to_dir
local_to_swift.launch(job[0],temp_path,job[1],job[2],size_limit_to_segment,size_limit_reading_os,upload,enableLarge,fail_tries, job[3],job[4],job[5], job[6] + excluded_patterns,job[7])

View File

@@ -0,0 +1,9 @@
__author__ = 'xgiovio'
import swift_delete_orphan_segments
#swift
swift_container = "####" # container
delete = False
fail_tries = 100
swift_delete_orphan_segments.launch(swift_container,delete,fail_tries)

View File

@@ -0,0 +1,31 @@
__author__ = 'xgiovio'
import swift_to_local
size_limit_reading_os = 134217728 #must be a power of 2 and smaller/equal than size_limit_to_segment # def 134217728
# 64k 65536
# 128k 131072
# 256k 262144
# 512k 524288
# 1MB 1048576
# 2MB 2097152
# 4MB 4194304
# 8MB 8388608
# 128MB 134217728
# 256MB 268435456
# 512MB 536870912
# 1GB 1073741824
# 2GB 2147483648
# 4GB 4294967296
download = True
fail_tries = 1000000
excluded_patterns = ["Thumbs.db",".DS_Store","_gsdata_","__MACOSX", "desktop.ini","@eaDir"]
batch = [
#source, swift container, swift prefix, md5 comparison enabled?, encrypted?, encryption_key, additional_excluded_patterns
["\\\\?\\" + "C:\\test\\","default","test/",False,True,"pass",[]] ,
]
###############################################################################################
for job in batch:
#local folder,temp path, swift container, swift prefix, size to segment, size reading limit os, upload enabled?. upload large enabled? , fail tries, md5 comparison enabled?, encrypted?, encryption_key
swift_to_local.launch(job[0],job[1],job[2],size_limit_reading_os,download,fail_tries, job[3],job[4],job[5], job[6] + excluded_patterns)

View File

@@ -0,0 +1,89 @@
__author__ = 'xgiovio'
import authentication, utility
def launch(swift_container,delete,fail_tries):
print ("Swift container " + swift_container)
print("___________")
swift_conn = authentication.set_authentication ()
listfoldermanifest=[]
#all files on the container
swift_conn,container_objects_raw = utility.get_list(fail_tries,swift_conn,swift_container,"")
#get dict name:manifesturl only for large files
byte0manifest,swift_conn,container_objects_manifest = utility.list_compute_manifest (fail_tries,container_objects_raw,swift_conn,swift_container,"")
for name,manifest in container_objects_manifest.items():
listfoldermanifest.append(manifest)
'''
for o in listfoldermanifest:
print(o)
'''
#_____________________________________________________________________________________________________________________
#______________________________Folders with segments
listfoldersegmentsprefix = []
#______________________________Folders with segments : container: swift_container +"_segments"
#all files on container_segments -> they are only segments
swift_conn,container_segments_objects_raw = utility.get_list(fail_tries,swift_conn,swift_container +"_segments","")
#get dict name:size
container_segments_objects = utility.list (container_segments_objects_raw,"")
#get list only with folders path name from segments
listfolder = []
for name in container_segments_objects.keys():
if utility.folder_from_path(name,"/") not in listfolder :
listfolder.append(utility.folder_from_path(name,"/"))
for o in listfolder:
listfoldersegmentsprefix.append(swift_container +"_segments/" + o)
#______________________________Folders with segments : container: swift_container path: @SynologyCloudSync/
#all files @SynologyCloudSync/ on container -> they are only segments
#get dict name:size
container_segments_objects = utility.filter_list_begin(container_objects_raw,"@SynologyCloudSync/","")
#get list only with folders path name from segments
listfolder = []
for name in container_segments_objects.keys():
if utility.folder_from_path(name,"/") not in listfolder :
listfolder.append(utility.folder_from_path(name,"/"))
for o in listfolder:
listfoldersegmentsprefix.append(swift_container +"/" + o)
#______________________________Folders with segments "!CB_"
#all files !CB_ of CloudBerry on container -> they are only segments
#get dict name:size
container_segments_objects = utility.search_list(container_objects_raw,"!CB_","")
#get list only with folders path name from segments
listfolder = []
for name in container_segments_objects.keys():
if utility.folder_from_path(name,"_") not in listfolder :
listfolder.append(utility.folder_from_path(name,"_"))
for o in listfolder:
listfoldersegmentsprefix.append(swift_container +"/" + o)
'''
for o in listfoldersegmentsprefix:
print(o)
'''
#_____________________________________________________________________________________________________________________
segments_to_delete = []
segments_not_listed = []
for manifesturlsegments in listfoldersegmentsprefix :
if manifesturlsegments not in listfoldermanifest:
segments_to_delete.append(manifesturlsegments)
for manifest in segments_to_delete:
print("Segments folder to delete: " + manifest)
for manifesturl in listfoldermanifest :
if manifesturl not in listfoldersegmentsprefix:
segments_not_listed.append(manifesturl)
for manifest in segments_not_listed:
print("Segments not present in given segments list : " + manifest)
swift_conn.close()

160
swift_to_local.py Normal file
View File

@@ -0,0 +1,160 @@
__author__ = 'xgiovio'
import authentication
import utility
import os, sys,platform, hashlib,time
from utility import read_in_chunks
def launch(localpath,swift_container,prefix,size_limit_reading_os,download,fail_tries ,md5_compare, encrypted,encrypt_key,excluded_patterns):
print ("Localpath " + localpath)
print ("Swift container " + swift_container)
print ("Swift prefix " + prefix)
print ("Os reading Limit " + str(size_limit_reading_os))
print ("Fail tries " + str(fail_tries))
print ("Download " + str(download))
print ("MD5 Compare " + str(md5_compare))
print ("Encrypted " + str(encrypted))
if encrypted:
if encrypt_key == None:
print("Error Bad Encryption Key")
sys.exit(-3)
else:
print ("Encrypted key " + "hidden")
print("___________")
#checks
utility.check_segments_size_single (size_limit_reading_os)
if utility.check_start_slash(swift_container) or utility.check_start_slash(prefix) or not utility.check_end_slash(localpath) or utility.check_end_slash(swift_container) or not utility.check_end_slash(prefix):
print ("Errors on local or remote paths. Checks \\ or / at the begin or end of each path")
sys.exit(-2)
localfiles = utility.get_local_files (localpath)
print ("Files locali " + str(len(localfiles)))
print("___________")
swift_conn = authentication.set_authentication ()
swift_conn,objects = utility.get_list(fail_tries,swift_conn,swift_container,prefix)
byte0real,byte0manifest,swift_conn,remotefiles,remotefiles_md5 = utility.list_compute_correct_size (fail_tries,objects,swift_conn,swift_container,prefix)
print ("Files remoti " + str(len(remotefiles)))
difffiles = {}
skipped = 0
sizetodownload=0
filestodownload = 0
skipped_old_md5_large_files = 0
if encrypted and download :
import utility_aes
for rname in remotefiles.keys():
download_file = True
for pattern in excluded_patterns:
if pattern in rname:
download_file = False
break
if download_file:
#print("Comparing " + lname)
if encrypted and rname.endswith("_xg10v10_encrypted"):
rnamedec = rname.split("_xg10v10_")[0]
rnamesize = rname.split("_xg10v10_")[2]
rnamemd5 = rname.split("_xg10v10_")[1]
if rnamedec not in localfiles.keys() or localfiles[rnamedec] != int(rnamesize) :
difffiles[rname] = remotefiles[rname]
sizetodownload = sizetodownload + remotefiles[rname]
filestodownload = filestodownload + 1
elif md5_compare :
hash = hashlib.md5()
with open(localpath + rnamedec, 'rb') as f:
for piece in read_in_chunks(f,size_limit_reading_os):
hash.update(piece)
if hash.hexdigest() != rnamemd5:
difffiles[rname] = remotefiles[rname]
sizetodownload = sizetodownload + remotefiles[rname]
filestodownload = filestodownload + 1
else:
if rname not in localfiles.keys() or localfiles[rname] != remotefiles[rname] :
difffiles[rname] = remotefiles[rname]
sizetodownload = sizetodownload + remotefiles[rname]
filestodownload = filestodownload + 1
elif md5_compare :
if remotefiles_md5[rname] != "0":
hash = hashlib.md5()
with open(localpath + rname, 'rb') as f:
for piece in read_in_chunks(f,size_limit_reading_os):
hash.update(piece)
if hash.hexdigest() != remotefiles_md5[rname] :
difffiles[rname] = remotefiles[rname]
sizetodownload = sizetodownload + remotefiles[rname]
filestodownload = filestodownload + 1
else:
skipped_old_md5_large_files = skipped_old_md5_large_files + 1
else:
print("Skipped " + rname + " due to filters" )
skipped = skipped + 1
print("___________Differenze___________")
print ("Files locali " + str(len(localfiles)))
print ("Skipped due to filters " + str(skipped))
if md5_compare:
print ("Skipped md5 comparison on old large files without md5 embedded in x-object-manifest: " + str(skipped_old_md5_large_files))
print ("Files remoti " + str(len(remotefiles)))
print ("Files remoti - skipped " + str(len(remotefiles) - skipped))
print ("Files remoti 0byte reali " + str(byte0real))
print ("Files remoti 0byte large " + str(byte0manifest))
print("___________Files to download______")
for files, sizes in difffiles.items():
if encrypted and files.endswith("_xg10v10_encrypted"):
print(files.split("_xg10v10_")[0],sizes)
else:
print(files,sizes)
print("___________")
print ("MBytes to download " + str(sizetodownload/1000000))
print ("Files to download " + str(filestodownload))
print("___________")
remainingtodownload = sizetodownload
if download :
errors_download = 0
skipped_downloads = 0
for file, size in difffiles.items():
for fail_tries_counter in range (fail_tries) :
try:
print("Downloading File: " + utility.dash_replace(prefix + file) + " " + str(size) )
f = swift_conn.get_object(swift_container,utility.dash_replace(prefix + file),resp_chunk_size=size_limit_reading_os)
if not os.path.isdir(utility.folder_from_path(localpath + file,utility.set_dash())[:-1]):
os.makedirs(utility.folder_from_path(localpath + file,utility.set_dash())[:-1])
with open( localpath + file,'wb') as t:
while True:
try:
t.write(f[1].next())
except StopIteration:
break
if encrypted and file.endswith("_xg10v10_encrypted"):
rnamedec = file.split("_xg10v10_")[0]
with open(localpath + file, 'rb') as f:
with open(localpath + rnamedec, 'wb') as d:
aes = utility_aes.AESCipherFile(f,d, size_limit_reading_os, encrypt_key)
aes.decrypt()
os.remove(localpath + file)
print("Decrypted")
remainingtodownload = remainingtodownload - size
print("Remaining to download : " + str(remainingtodownload / 1000000) + " MB")
except Exception as e:
print("Exception during download")
print(e)
time.sleep(1)
errors_download = errors_download + 1
swift_conn = authentication.set_authentication ()
if fail_tries_counter == fail_tries - 1 :
print("Maximum tries reached. Skipping download of the file")
skipped_downloads = skipped_downloads + 1
else:
break
print("___________")
print("Download Terminated : Remaining MB to download " + str(remainingtodownload))
print("Errors during download : " + str(errors_download))
print("Skipped files during download : " + str(skipped_downloads))
else:
print("Download Disabled")
swift_conn.close()

222
utility.py Normal file
View File

@@ -0,0 +1,222 @@
__author__ = 'xgiovio'
import authentication
import sys, platform,os
import time
def get_local_files (localpath):
print("Building local files list")
localfiles = {}
for root, dirs, files in os.walk(localpath, topdown=True):
for name in files:
localfiles[os.path.join(root.replace(localpath,""), name)]=int(os.stat(os.path.join(root, name)).st_size)
#skip dirs
'''
for name in dirs:
print(os.path.join(root, name))
'''
#end skip
return localfiles
def get_list (fail_tries,swift_conn,swift_container,prefix) :
for fail_tries_counter in range (fail_tries) :
try:
print ("Downloading remote list for " + swift_container + " with prefix " +prefix + " ... ")
headers,objects = swift_conn.get_container(swift_container, prefix =prefix,full_listing=True )
except Exception as e:
print("Exception during the download of remote list")
print(e)
time.sleep(1)
if fail_tries_counter == fail_tries - 1 :
print("Maximum tries reached. Can't download remote list for container " + swift_container + " with prefix " +prefix +". Exiting.")
sys.exit(-1)
else:
swift_conn = authentication.set_authentication ()
else :
break
return [swift_conn,objects]
def filter_list_begin(objects,beginpattern,prefix):
filtered = {}
for o in objects :
if o["content_type"] != "application/directory":
if (o["name"]).find(beginpattern)== 0:
filtered[(o["name"].replace(prefix,""))]=int(o["bytes"])
return filtered
def search_list (objects,pattern,prefix):
filtered = {}
for o in objects :
if o["content_type"] != "application/directory":
if pattern in o["name"]:
filtered[(o["name"].replace(prefix,""))]=int(o["bytes"])
return filtered
def list_compute_correct_size (fail_tries,objects,swift_conn,swift_container,prefix):
remotefiles = {}
remotefiles_md5 = {}
byte0real = 0
byte0manifest = 0
for o in objects :
if o["content_type"] != "application/directory":
if int(o["bytes"]) == 0 :
print ("Requesting metadata for 0byte file " + o["name"] )
for fail_tries_counter in range (fail_tries) :
try:
oheaders = swift_conn.head_object(swift_container,o["name"])
except Exception as e:
print("Exception during the request of metadata")
print(e)
time.sleep(1)
if fail_tries_counter == fail_tries - 1 :
print("Maximum tries reached. Can't download sizes for all large files on container " + swift_container + " with prefix: " +prefix +". Exiting.")
sys.exit(-1)
else:
swift_conn = authentication.set_authentication ()
else :
break
if "x-object-manifest" in oheaders.keys():
print ("0byte file " + o["name"] + " e' un large file" )
o["bytes"] = oheaders["content-length"]
if "_xg10v10_" in oheaders["x-object-manifest"]:
split1 = oheaders["x-object-manifest"].split("_xg10v10_")[0]
split2 = split1.split("/")[1]
o["hash"] = split2
else:
print("Impossible to get remote large file md5. Cause: Not uploaded with xgiovio method (md5 in x-object-manifest)")
o["hash"] = "0"
byte0manifest = byte0manifest + 1
else:
print ("0byte file " + o["name"] + " e' un file normale" )
byte0real = byte0real + 1
remotefiles[remote_dash_replace(o["name"].replace(prefix,""))]=int(o["bytes"])
remotefiles_md5[remote_dash_replace(o["name"].replace(prefix,""))]=o["hash"]
return [byte0real,byte0manifest,swift_conn,remotefiles,remotefiles_md5]
def list_compute_correct_names_for_enctyption (objects,prefix):
remotefiles = {}
for o in objects :
if o["content_type"] != "application/directory":
full = remote_dash_replace(o["name"].replace(prefix,""))
if (full.endswith("_xg10v10_encrypted")):
encrypted_name_only = full.split("_xg10v10_")[0] + "_xg10v10_encrypted"
else:
encrypted_name_only = full
remotefiles[encrypted_name_only] = full
return remotefiles
def list_compute_manifest (fail_tries,objects,swift_conn,swift_container,prefix):
remotefiles = {}
byte0manifest = 0
for o in objects :
if o["content_type"] != "application/directory":
if int(o["bytes"]) == 0 :
print ("Requesting metadata for 0byte file " + o["name"] )
for fail_tries_counter in range (fail_tries) :
try:
oheaders = swift_conn.head_object(swift_container,o["name"])
except Exception as e:
print("Exception during the request of metadata")
print(e)
time.sleep(1)
if fail_tries_counter == fail_tries - 1 :
print("Maximum tries reached. Can't download sizes for all large files on container " + swift_container + " with prefix: " +prefix +". Exiting.")
sys.exit(-1)
else:
swift_conn = authentication.set_authentication ()
else :
break
if "x-object-manifest" in oheaders.keys():
byte0manifest = byte0manifest + 1
remotefiles[(o["name"].replace(prefix,""))]=oheaders["x-object-manifest"]
return [byte0manifest,swift_conn,remotefiles]
def list (objects,prefix):
remotefiles = {}
for o in objects :
if o["content_type"] != "application/directory":
remotefiles[(o["name"].replace(prefix,""))]=int(o["bytes"])
return remotefiles
def listall (objects,prefix):
remotefiles = {}
for o in objects :
remotefiles[(o["name"].replace(prefix,""))]=int(o["bytes"])
return remotefiles
def check_segments_size (size_limit_reading_os,size_limit_to_segment):
if size_limit_reading_os%2 != 0 or size_limit_to_segment%2 != 0 or size_limit_reading_os > size_limit_to_segment or size_limit_reading_os < 16:
print ("Error size segments")
sys.exit(-2)
def check_segments_size_single (size_limit_reading_os):
if size_limit_reading_os%2 != 0 or size_limit_reading_os < 16:
print ("Error size segments")
sys.exit(-2)
def file_only_name (stringa,delimiter):
return stringa.split(delimiter)[len(stringa.split(delimiter))-1]
def folder_from_path (stringa,delimiter):
string_to_ret =""
for i in range(len(stringa.split(delimiter)) - 1):
string_to_ret = string_to_ret + stringa.split(delimiter)[i] + delimiter
return string_to_ret
def check_end_slash (stringa) :
lastchar = stringa[len(stringa) - 1]
if lastchar == "\\" or lastchar == "/" :
return True
return False
def check_start_slash (stringa) :
firstchar = stringa[0]
if firstchar == "\\" or firstchar == "/" :
return True
return False
def read_in_chunks(file_object, chunk_size):
while True:
data = file_object.read(chunk_size)
if not data:
break
yield data
def total_size_encrypted (original_size) :
if original_size % 16 == 0:
return original_size + 17
return original_size + ( 16 - original_size % 16) + 17
def dash_replace (string):
if platform.system() == "Windows":
return string.replace("\\","/")
return string
def remote_dash_replace (string):
if platform.system() == "Windows":
return string.replace("/","\\")
return string
def set_dash():
if platform.system() == "Windows":
return "\\"
return "/"

102
utility_aes.py Normal file
View File

@@ -0,0 +1,102 @@
import hashlib,os
from utility import read_in_chunks
try:
pycrypto = True
from Crypto.Cipher import AES
from Crypto import Random
print("Using PyCrypto for AES enc/dec")
except ImportError:
pycrypto = False
import pyaes
print("Using Pyaes for AES enc/dec")
class AESCipherFile:
def __init__( self ,f,d, size_limit_reading_os, key ):
h = hashlib.sha256()
h.update(key.encode("utf_8"))
self.key = h.digest()
self.f = f
self.d = d
self.size_limit_reading_os = size_limit_reading_os
def pad(self,b):
b = bytearray(b)
fill = 16 - len(b) % 16
for i in range(fill):
b.append(fill)
return bytes(b)
def unpad(self,b) :
return b[:-b[len(b)-1]]
def encrypt( self ):
if pycrypto:
iv = Random.new().read( 16 )
aes_encrypt_state = AES.new( self.key, AES.MODE_CBC, iv )
else:
iv = os.urandom(16)
aes_encrypt_state = pyaes.AESModeOfOperationCBC(self.key, iv = iv)
#check if the file is a multiple of 16 bytes for padding
self.f.seek(0,2)
if self.f.tell() % 16 == 0:
self.d.write(bytes(b"1"))
else:
self.d.write(bytes(b"0"))
self.f.seek(0,0)
#end check
self.d.write(iv)
hash = hashlib.md5()
for piece in read_in_chunks(self.f,self.size_limit_reading_os):
hash.update(piece)
if len(piece) % 16 > 0 :
piece = self.pad(piece)
steps = int(len(piece) / 16)
for i in range(steps):
self.d.write(aes_encrypt_state.encrypt( piece[16 * i : 16 * (i + 1)] ))
return hash.hexdigest()
def decrypt( self ):
perfect = False
perfectbyte = self.f.read(1)
if perfectbyte == bytes(b"1"):
perfect = True
if pycrypto:
aes_decrypt_state = AES.new( self.key, AES.MODE_CBC, self.f.read(16) )
else:
aes_decrypt_state = pyaes.AESModeOfOperationCBC(self.key, self.f.read(16))
last_16_bytes_in_piece = None
for piece in read_in_chunks(self.f,self.size_limit_reading_os):
if last_16_bytes_in_piece != None:
self.d.write(aes_decrypt_state.decrypt(last_16_bytes_in_piece))
steps = int(len(piece) / 16)
if steps > 1 :
for i in range(steps - 1):
self.d.write(aes_decrypt_state.decrypt( piece[16 * i : 16 * (i + 1)] ))
i = i + 1
else:
i = 0
last_16_bytes_in_piece = aes_decrypt_state.decrypt( piece[16 * i : 16 * (i + 1)] )
if last_16_bytes_in_piece != None:
if perfect:
self.d.write(last_16_bytes_in_piece)
else:
self.d.write(self.unpad(last_16_bytes_in_piece))
''' usage
with open("c:\\test\\file", 'rb') as f:
with open("c:\\test\\file_enc", 'wb') as d:
aes = AESCipherFile(f,d,134217728,"chiave")
aes.encrypt()
with open("c:\\test\\file_enc", 'rb') as f:
with open("c:\\test\\file", 'wb') as d:
aes = AESCipherFile(f,d,134217728,"chiave")
aes.decrypt()
'''