Add files via upload

2023-03-20 14:09:49 +09:00
parent 328c9ff49b
commit 6d57e38739
8 changed files with 2653 additions and 0 deletions
--- a/alpha/tool/each_folder.py
+++ b/alpha/tool/each_folder.py
@@ -0,0 +1,327 @@
+import sys
+import os
+import subprocess
+
+# This script is based on a script provided by Project Maintainer: Yukata-Sawada. I (markxu98) modified it for my own use. I tested it only on my machines
+# with only my own files. Although I believe the structure of my files should reflex many different types of users/organizations, I cannot guarantee this
+# script is suitable for ALL. So I suggest use it, understand it, and modify it by yourself to fit.
+
+# The purpose of this script is to generate PAR2 files with (somewhat) "best" storage efficiency with (somewhat) "good" reliability, while spending
+# (somewhat) "less" time or using (somewhat) "less" processing power.
+
+# The following "const" values are optimized by me. They should be okay to most cases. Use caution when changing all of them. I added more comments to
+# certain ones important to calculation.
+
+# Set path of par2j
+client_path = "../par2j64.exe"
+
+# Set options for par2j
+# Don't use /ss, /sn, /sr, or /sm here.
+cmd_option = "/rr10 /rd1 /rf3"
+
+# How to set slices initially (either /ss, /sn, or /sr)
+# The default setting is /sr10 in MultiPar.
+init_slice_option = "/sr10"
+
+# Slice size multiplier (used in all cmd in this script)
+# It would be good to set a cluster size of HDD/SSD. (4k == 4096 by default)
+slice_size_multiplier = 4096
+
+# Max number of slices at searching good efficiency (20000 by default).
+# More number of slices is good for many files or varied size.
+# This value can be ignored if initial slice count is even larger.
+# Set this value to 32768 to ignore it completely.
+max_slice_count = 20000
+
+# Max percent of slices at searching good efficiency (170% by default).
+# If initial_count is less than max_slice_count, the maximum slice count at
+# searching is the smaller value of max_slice_count and
+# (initial_count * max_slice_rate / 100).
+# If initial_count is not less than max_slice_count, max_slice_count is
+# ignored, and the maximum slice count at searching is the smaller value of
+# 32768 and (initial_count * max_slice_rate / 100).
+# This will also be used to calculate the min_slice_size.
+max_slice_rate = 170
+#max_slice_rate = 0
+# If you have good processing power, you can set above rate to 0, then BOTH
+# max_slice_count and max_slice_rate will be ignored. 32768 will be fixed as
+# max_slice_count at searching.
+
+# Min number of slices at searching good efficiency (100 by default).
+# Normally less number of slices tend to archive higher efficiency.
+# But, too few slices is bad against random error.
+# If by very small chance, the calculated maximum slice count is less than
+# min_slice_count, no searching is needed and min_slice_count is used to
+# generate PAR2 file.
+min_slice_count = 100
+
+# Min percent of slices at searching good efficiency (30% by default).
+# Normally less number of slices tend to archive higher efficiency.
+# But, too few slices is bad against random error. It may need at least 50%.
+# If initial_count is more than min_slice_count, the minimum slice count at
+# searching is the greater value of min_slice_count and
+# (initial_count * min_slice_rate / 100).
+# If initial_count is not more than min_slice_count, min_slice_rate is
+# ignored, and the minimum slice count at searching is min_slice_count.
+# This will also be used to calculate the max_slice_size.
+min_slice_rate = 30
+#min_slice_rate = 0
+# If you have good processing power, you can set above rate to 0, then this
+# rate will be ignored and min_slice_count is used at searching
+
+# Caution: You CAN set max_slice_rate = 0 and min_slice_rate = 0 at the same
+# time to search (almost) "WHOLE" range, from min_slice_count to 32768.
+
+# Min efficiency improvment that will be regarded as "better" (0.3% by default).
+# If the efficiency improvement is not so significant, it's unreasonable to
+# use a larger slice count. This value controls how significant to update the
+# best slice count at searching.
+min_efficiency_improvement = 0.3
+#min_efficiency_improvement = 0
+# If you want to achieve "absolute" best efficiency, you can set above to 0.
+
+# Read "Efficiency rate"
+def read_efficiency(output_text):
+    # Find from the last
+    line_start = output_text.rfind("Efficiency rate\t\t:")
+    if line_start != -1:
+        line_start += 19
+        line_end = output_text.find("%\n", line_start)
+        #print("line_start=", line_start)
+        #print("line_end=", line_end)
+        return float(output_text[line_start:line_end])
+    else:
+        return -1
+
+
+# Read "Input File Slice count"
+def read_slice_count(output_text):
+    # Find from the top
+    line_start = output_text.find("Input File Slice count\t:")
+    if line_start != -1:
+        line_start += 25
+        line_end = output_text.find("\n", line_start)
+        return int(output_text[line_start:line_end])
+    else:
+        return -1
+
+
+# Read "Input File Slice size"
+def read_slice_size(output_text):
+    # Find from the top
+    line_start = output_text.find("Input File Slice size\t:")
+    if line_start != -1:
+        line_start += 24
+        line_end = output_text.find("\n", line_start)
+        return int(output_text[line_start:line_end])
+    else:
+        return -1
+
+
+# Search setting of good efficiency
+def test_efficiency(par_path):
+    min_size = 0
+    max_size = 0
+    best_count = 0
+    best_size = 0
+    best_efficiency = 0
+    best_efficiency_at_initial_count = 0
+    best_count_at_max_count = 0
+    best_size_at_max_count = 0
+    best_efficiency_at_max_count = 0
+
+    # First time to get initial value
+    cmd = "\"" + client_path + "\" t /uo " + init_slice_option + " /sm" + str(slice_size_multiplier) + " " + cmd_option + " \"" + par_path + "\" *"
+    res = subprocess.run(cmd, shell=True, capture_output=True, encoding='utf8')
+    #print("return code: {}".format(res.returncode))
+    #print("captured stdout: {}".format(res.stdout))
+    if res.returncode != 0:
+        return 0
+    efficiency_rate = read_efficiency(res.stdout)
+    if efficiency_rate < 0:
+        return 0
+    # DON'T change best_count, best_size and best_efficiency here. The following three values will be evaluated after the search is done.
+    # Using initial_count may not be best case. If the search can find a slice count less than initial_count, whose efficiency is the same as
+    # best_efficiency_at_initial_count, that slice count should be used instead of initial_count.
+    initial_count = read_slice_count(res.stdout)
+    if initial_count <= 0:
+        return 0
+    initial_size = read_slice_size(res.stdout)
+    best_efficiency_at_initial_count = efficiency_rate
+    #print("initial_size =", initial_size, ", initial_count =", initial_count, ", efficiency =", efficiency_rate)
+
+    # Get min and max of slice count and size to be used at searching
+    # maximum slice count is co-related to minimum slice size
+    if max_slice_rate != 0:
+        if initial_count > max_slice_count:
+            if (initial_count * max_slice_rate / 100) > 32768:
+                max_count = 32768
+            else:
+                max_count = int(initial_count * max_slice_rate / 100)
+        else:
+            if (initial_count * max_slice_rate / 100) > max_slice_count:
+                max_count = max_slice_count
+            else:
+                max_count = int(initial_count * max_slice_rate / 100)
+    else:
+        max_count = 32768
+    # Giving out the calculated maximum slice count, get "real" max_count and min_size from result of Par2j64.exe
+    # Here use option "/sn" to search around (from -12.5% to +6.25%) the calculated maximum slice count for best efficiency
+    cmd = "\"" + client_path + "\" t /uo /sn" + str(max_count) + " /sm" + str(slice_size_multiplier) + " " + cmd_option + " \"" + par_path + "\" *"
+    res = subprocess.run(cmd, shell=True, capture_output=True, encoding='utf8')
+    if res.returncode != 0:
+        return 0
+    efficiency_rate = read_efficiency(res.stdout)
+    if efficiency_rate < 0:
+        return 0
+    # DON'T change best_count, best_size and best_efficiency here. The following three values will be evaluated after the search is done.
+    # Using max_count is the worst case as it will require more processing power. If the search can find a slice count less than max_count,
+    # whose efficiency is the same as best_efficiency_at_max_count, that slice count should be used instead of best_count_at_max_count.
+    best_count_at_max_count = read_slice_count(res.stdout)
+    best_size_at_max_count = read_slice_size(res.stdout)
+    best_efficiency_at_max_count = efficiency_rate
+    max_count = read_slice_count(res.stdout)
+    min_size = read_slice_size(res.stdout)
+    #print("max_count =", max_count, ", min_size =", min_size, ", efficiency =", best_efficiency_at_max_count)
+
+    # Minimum slice count is co-related to maximum slice size
+    if min_slice_rate > 0 and (initial_count * min_slice_rate / 100) > min_slice_count:
+        min_count = int(initial_count * min_slice_rate / 100)
+    else:
+        min_count = min_slice_count
+    # Giving out the calculated minimum slice count, get "real" min_count and max_size from result of Par2j64.exe
+    # Here use option "/sn" to search around (from -12.5% to +6.25%) the calculated minimum slice count for best efficiency
+    cmd = "\"" + client_path + "\" t /uo /sn" + str(min_count) + " /sm" + str(slice_size_multiplier) + " " + cmd_option + " \"" + par_path + "\" *"
+    res = subprocess.run(cmd, shell=True, capture_output=True, encoding='utf8')
+    if res.returncode != 0:
+        return 0
+    efficiency_rate = read_efficiency(res.stdout)
+    if efficiency_rate < 0:
+        return 0
+    min_count = read_slice_count(res.stdout)
+    max_size = read_slice_size(res.stdout)
+    best_count = read_slice_count(res.stdout)
+    best_size = read_slice_size(res.stdout)
+    best_efficiency = efficiency_rate
+    #print("min_count =", min_count, ", max_size =", max_size, ", efficiency =", best_efficiency)
+
+    # If the calculated maximum slice count is too small, no need to search (QUITE UNLIKELY to happen)
+    if max_slice_rate > 0 and (initial_count * max_slice_rate / 100) <= min_slice_count:
+        # Giving out min_slice_count, get "real" best_size from result of Par2j64.exe
+        # Here use option "/sn" to search around (from -12.5% to +6.25%) the minimum slice count for best efficiency
+        cmd = "\"" + client_path + "\" t /uo /sn" + str(min_slice_count) + " /sm" + str(slice_size_multiplier) + " " + cmd_option + " \"" + par_path + "\" *"
+        res = subprocess.run(cmd, shell=True, capture_output=True, encoding='utf8')
+        if res.returncode != 0:
+            return 0
+        efficiency_rate = read_efficiency(res.stdout)
+        if efficiency_rate < 0:
+            return 0
+        best_count = read_slice_count(res.stdout)
+        best_size = read_slice_size(res.stdout)
+        best_efficiency = efficiency_rate
+        #print("initial_count too small, best_count =", best_count, ", best_size =", best_size, ", best_efficiency =", best_efficiency)
+        # Return slice size to archive the best efficiency
+        return best_size
+    else:
+        # Try every (step) slice count between min_count and max_count
+        step_slice_count_int = int((min_count + 1) * 8 / 7)
+        while step_slice_count_int < max_count:
+            #print(f"Testing slice count: (around) {step_slice_count_int}, from {(step_slice_count_int - int(step_slice_count_int / 8))} to {int(step_slice_count_int * 17 / 16)}")
+            # Giving out the calculated step slice count, get "real" slice count and size from result of Par2j64.exe
+            # Here use option "/sn" to search around (from -12.5% to +6.25%) the calculated step slice count for best efficiency
+            cmd = "\"" + client_path + "\" t /uo /sn" + str(step_slice_count_int) + " /sm" + str(slice_size_multiplier) + " " + cmd_option + " \"" + par_path + "\" *"
+            res = subprocess.run(cmd, shell=True, capture_output=True, encoding='utf8')
+            if res.returncode != 0:
+                break
+            efficiency_rate = read_efficiency(res.stdout)
+            if efficiency_rate < 0:
+                break
+            if efficiency_rate > best_efficiency + min_efficiency_improvement:
+                best_count = read_slice_count(res.stdout)
+                best_size = read_slice_size(res.stdout)
+                best_efficiency = efficiency_rate
+            # Next count should be more than 17/16 of the input count. (Range to +6.25% was checked already.)
+            step_slice_count_int = int((int(step_slice_count_int * 17 / 16) + 1) * 8 / 7)
+        # Evaluate slice count searched with initial_count
+        if initial_count < best_count and best_efficiency_at_initial_count > best_efficiency - min_efficiency_improvement:
+            best_count = initial_count
+            best_size = initial_size
+            best_efficiency = best_efficiency_at_initial_count
+        # Evaluate slice count searched with max_count.
+        if best_efficiency_at_max_count > best_efficiency + min_efficiency_improvement:
+            best_count = best_count_at_max_count
+            best_size = best_size_at_max_count
+            best_efficiency = best_efficiency_at_max_count
+        #print("best_count =", best_count, "best_size =", best_size, ", best_efficiency =", best_efficiency)
+
+    return best_size
+
+
+# Return sub-process's ExitCode
+def command(cmd):
+    ret = subprocess.run(cmd, shell=True)
+    return ret.returncode
+
+
+# Return zero for empty folder
+def check_empty(path='.'):
+    total = 0
+    with os.scandir(path) as it:
+        for entry in it:
+            if entry.is_file():
+                total += entry.stat().st_size
+            elif entry.is_dir():
+                total += check_empty(entry.path)
+            if total > 0:
+                break
+    return total
+
+
+# Read arguments of command-line
+for idx, arg in enumerate(sys.argv[1:]):
+    one_path = arg
+    one_name = os.path.basename(one_path)
+
+    # Check the folder exists
+    if os.path.isdir(one_path) == False:
+        print(one_name + " isn't folder.")
+        continue
+
+    # Check empty folder
+    if check_empty(one_path) == 0:
+        print(one_name + " is empty folder.")
+        continue
+
+    print(one_name + " is folder.")
+
+    # Path of creating PAR file
+    par_path = one_path + "\\" + one_name + ".par2"
+
+    # Check the PAR file exists already
+    if os.path.exists(par_path):
+        print(one_name + " includes PAR file already.")
+        continue
+
+    # Test setting for good efficiency
+    slice_size = test_efficiency(par_path)
+    if slice_size == 0:
+        print("Failed to test options.")
+        continue
+
+    # Set command-line
+    # Cover path by " for possible space
+    cmd = "\"" + client_path + "\" c /ss" + str(slice_size) + " " + cmd_option + " \"" + par_path + "\" *"
+    # If you want to see creating result only, use "t" command instead of "c".
+
+    # Process the command
+    print("Creating PAR files.")
+    error_level = command(cmd)
+
+    # Check error
+    # Exit loop, when error occur.
+    if error_level > 0:
+        print("Error=", error_level)
+        break
+
+# If you don't confirm result, comment out below line.
+input('Press [Enter] key to continue . . .')