Add files via upload
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
[ par2j.exe - version 1.3.3.1 or later ]
|
[ par2j.exe - version 1.3.3.2 or later ]
|
||||||
|
|
||||||
Type "par2j.exe" to see version, test integrity, and show usage below.
|
Type "par2j.exe" to see version, test integrity, and show usage below.
|
||||||
|
|
||||||
@@ -369,14 +369,22 @@ The format is "/lc#" (# is from 1 to 32 as the number of using threads).
|
|||||||
0: It uses the number of physical Cores.
|
0: It uses the number of physical Cores.
|
||||||
255: It tries to use more threads than number of physical Cores.
|
255: It tries to use more threads than number of physical Cores.
|
||||||
|
|
||||||
You may set additional combinations;
|
You may set additional combinations for CPU feature;
|
||||||
+1024 to disable CLMUL (and use slower SSSE3 code),
|
+1024 to disable CLMUL (and use slower SSSE3 code)
|
||||||
+2048 to disable JIT (for SSE2),
|
+2048 to disable JIT (for SSE2)
|
||||||
+4096 to disable SSSE3,
|
+4096 to disable SSSE3
|
||||||
+8192 to disable AVX2,
|
+8192 to disable AVX2
|
||||||
+256 or +512 (slower device) to enable GPU acceleration.
|
|
||||||
|
|
||||||
for example, /lc1 to use single Core, /lc45 to use half Cores and GPU
|
You may set additional combinations for GPU control;
|
||||||
|
+256 or +512 (slower device) to enable GPU acceleration
|
||||||
|
+65536 for classic method
|
||||||
|
+131072 for 16-byte memory access
|
||||||
|
+262144 for 4-byte memory access and calculate 2 blocks at once
|
||||||
|
+524288 for 16-byte memory access and calculate 2 blocks at once
|
||||||
|
+1048576 for CL_MEM_COPY_HOST_PTR or +2097152 for CL_MEM_USE_HOST_PTR
|
||||||
|
(When you set exclusive bits, larger value will be used.)
|
||||||
|
|
||||||
|
for example, /lc1 to use single Core, /lc508 to use half Cores and GPU
|
||||||
|
|
||||||
/m :
|
/m :
|
||||||
Set this, if you want to set memory usage.
|
Set this, if you want to set memory usage.
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
// create.c
|
// create.c
|
||||||
// Copyright : 2023-10-22 Yutaka Sawada
|
// Copyright : 2023-12-12 Yutaka Sawada
|
||||||
// License : GPL
|
// License : GPL
|
||||||
|
|
||||||
#ifndef _UNICODE
|
#ifndef _UNICODE
|
||||||
@@ -26,6 +26,11 @@
|
|||||||
|
|
||||||
//#define TIMER // 実験用
|
//#define TIMER // 実験用
|
||||||
|
|
||||||
|
#ifdef TIMER
|
||||||
|
#include <time.h>
|
||||||
|
static double time_sec, time_speed;
|
||||||
|
#endif
|
||||||
|
|
||||||
// ソート時に項目を比較する
|
// ソート時に項目を比較する
|
||||||
static int sort_cmp(const void *elem1, const void *elem2)
|
static int sort_cmp(const void *elem1, const void *elem2)
|
||||||
{
|
{
|
||||||
@@ -196,7 +201,7 @@ int set_common_packet(
|
|||||||
__int64 prog_now = 0;
|
__int64 prog_now = 0;
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
unsigned int time_start = GetTickCount();
|
clock_t time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
print_progress_text(0, "Computing file hash");
|
print_progress_text(0, "Computing file hash");
|
||||||
|
|
||||||
@@ -305,14 +310,14 @@ unsigned int time_start = GetTickCount();
|
|||||||
off += (64 + main_packet_size);
|
off += (64 + main_packet_size);
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount() - time_start;
|
time_start = clock() - time_start;
|
||||||
printf("hash %d.%03d sec", time_start / 1000, time_start % 1000);
|
time_sec = (double)time_start / CLOCKS_PER_SEC;
|
||||||
if (time_start > 0){
|
if (time_sec > 0){
|
||||||
time_start = (int)((total_file_size * 125) / ((__int64)time_start * 131072));
|
time_speed = (double)total_file_size / (time_sec * 1048576);
|
||||||
printf(", %d MB/s\n", time_start);
|
|
||||||
} else {
|
} else {
|
||||||
printf("\n");
|
time_speed = 0;
|
||||||
}
|
}
|
||||||
|
printf("hash %.3f sec, %.0f MB/s\n", time_sec, time_speed);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
error_end:
|
error_end:
|
||||||
@@ -341,7 +346,7 @@ int set_common_packet_multi(
|
|||||||
FILE_HASH_TH th[MAX_MULTI_READ];
|
FILE_HASH_TH th[MAX_MULTI_READ];
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
unsigned int time_start = GetTickCount();
|
clock_t time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
memset(hSub, 0, sizeof(HANDLE) * MAX_MULTI_READ);
|
memset(hSub, 0, sizeof(HANDLE) * MAX_MULTI_READ);
|
||||||
@@ -545,14 +550,14 @@ unsigned int time_start = GetTickCount();
|
|||||||
}
|
}
|
||||||
print_progress_done(); // 改行して行の先頭に戻しておく
|
print_progress_done(); // 改行して行の先頭に戻しておく
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount() - time_start;
|
time_start = clock() - time_start;
|
||||||
printf("hash %d.%03d sec", time_start / 1000, time_start % 1000);
|
time_sec = (double)time_start / CLOCKS_PER_SEC;
|
||||||
if (time_start > 0){
|
if (time_sec > 0){
|
||||||
time_start = (int)((total_file_size * 125) / ((__int64)time_start * 131072));
|
time_speed = (double)total_file_size / (time_sec * 1048576);
|
||||||
printf(", %d MB/s\n", time_start);
|
|
||||||
} else {
|
} else {
|
||||||
printf("\n");
|
time_speed = 0;
|
||||||
}
|
}
|
||||||
|
printf("hash %.3f sec, %.0f MB/s\n", time_sec, time_speed);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
error_end:
|
error_end:
|
||||||
@@ -700,7 +705,7 @@ int set_common_packet_hash(
|
|||||||
__int64 prog_now = 0;
|
__int64 prog_now = 0;
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
unsigned int time_start = GetTickCount();
|
clock_t time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
print_progress_text(0, "Computing file hash");
|
print_progress_text(0, "Computing file hash");
|
||||||
|
|
||||||
@@ -740,8 +745,8 @@ unsigned int time_start = GetTickCount();
|
|||||||
print_progress_done(); // 改行して行の先頭に戻しておく
|
print_progress_done(); // 改行して行の先頭に戻しておく
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount() - time_start;
|
time_start = clock() - time_start;
|
||||||
printf("hash %d.%03d sec\n", time_start / 1000, time_start % 1000);
|
printf("hash %.3f sec\n", (double)time_start / CLOCKS_PER_SEC);
|
||||||
#endif
|
#endif
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -1065,7 +1070,7 @@ int create_recovery_file(
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
unsigned int time_start = GetTickCount();
|
clock_t time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
print_progress_text(0, "Constructing recovery file");
|
print_progress_text(0, "Constructing recovery file");
|
||||||
time_last = GetTickCount();
|
time_last = GetTickCount();
|
||||||
@@ -1258,8 +1263,8 @@ unsigned int time_start = GetTickCount();
|
|||||||
print_progress_done(); // 改行して行の先頭に戻しておく
|
print_progress_done(); // 改行して行の先頭に戻しておく
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount() - time_start;
|
time_start = clock() - time_start;
|
||||||
printf("write %d.%03d sec\n", time_start / 1000, time_start % 1000);
|
printf("write %.3f sec\n", (double)time_start / CLOCKS_PER_SEC);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|||||||
@@ -2795,7 +2795,7 @@ void galois_align_xor(
|
|||||||
void galois_align16_multiply(
|
void galois_align16_multiply(
|
||||||
unsigned char *r1, // Region to multiply (must be aligned by 16)
|
unsigned char *r1, // Region to multiply (must be aligned by 16)
|
||||||
unsigned char *r2, // Products go here
|
unsigned char *r2, // Products go here
|
||||||
unsigned int len, // Byte length (must be multiple of 32)
|
unsigned int len, // Byte length (must be multiple of 16)
|
||||||
int factor) // Number to multiply by
|
int factor) // Number to multiply by
|
||||||
{
|
{
|
||||||
if (factor <= 1){
|
if (factor <= 1){
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
extern unsigned short *galois_log_table;
|
//extern unsigned short *galois_log_table;
|
||||||
extern unsigned int cpu_flag;
|
extern unsigned int cpu_flag;
|
||||||
|
|
||||||
int galois_create_table(void); // Returns 0 on success, -1 on failure
|
int galois_create_table(void); // Returns 0 on success, -1 on failure
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
// lib_opencl.c
|
// lib_opencl.c
|
||||||
// Copyright : 2023-11-27 Yutaka Sawada
|
// Copyright : 2023-12-26 Yutaka Sawada
|
||||||
// License : GPL
|
// License : GPL
|
||||||
|
|
||||||
#ifndef _WIN32_WINNT
|
#ifndef _WIN32_WINNT
|
||||||
@@ -84,7 +84,7 @@ cl_command_queue OpenCL_command = NULL;
|
|||||||
cl_kernel OpenCL_kernel = NULL;
|
cl_kernel OpenCL_kernel = NULL;
|
||||||
cl_mem OpenCL_src = NULL, OpenCL_dst = NULL, OpenCL_buf = NULL;
|
cl_mem OpenCL_src = NULL, OpenCL_dst = NULL, OpenCL_buf = NULL;
|
||||||
size_t OpenCL_group_num;
|
size_t OpenCL_group_num;
|
||||||
int OpenCL_method = 0; // 正=速い機器を選ぶ, 負=遅い機器を選ぶ
|
int OpenCL_method = 0; // 標準では GPU を使わず、動作は自動選択される
|
||||||
|
|
||||||
API_clCreateBuffer gfn_clCreateBuffer;
|
API_clCreateBuffer gfn_clCreateBuffer;
|
||||||
API_clReleaseMemObject gfn_clReleaseMemObject;
|
API_clReleaseMemObject gfn_clReleaseMemObject;
|
||||||
@@ -100,7 +100,11 @@ API_clEnqueueNDRangeKernel gfn_clEnqueueNDRangeKernel;
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
入力
|
入力
|
||||||
OpenCL_method : どのデバイスを選ぶか
|
OpenCL_method : どのデバイスや関数を選ぶか
|
||||||
|
0x100 = 速い機器を選ぶ, 0x200 = 遅い機器を選ぶ
|
||||||
|
0x10000 = 1ブロックずつ計算する, 0x20000 = 2ブロックずつ計算しようとする
|
||||||
|
0x40000 = 4-byte memory access, 0x80000 = try 16-byte memory access
|
||||||
|
0x100000 = CL_MEM_COPY_HOST_PTR, 0x200000 = CL_MEM_USE_HOST_PTR
|
||||||
unit_size : ブロックの単位サイズ
|
unit_size : ブロックの単位サイズ
|
||||||
src_max : ソース・ブロック個数
|
src_max : ソース・ブロック個数
|
||||||
|
|
||||||
@@ -111,11 +115,12 @@ OpenCL_method : 動作フラグいろいろ
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
// 0=成功, 1~エラー番号
|
// 0=成功, 1~エラー番号
|
||||||
int init_OpenCL(int unit_size, int *src_max)
|
int init_OpenCL(unsigned int unit_size, int *src_max)
|
||||||
{
|
{
|
||||||
char buf[2048], *p_source;
|
char buf[2048], *p_source;
|
||||||
int err = 0, i, j;
|
int err = 0, i, j;
|
||||||
int gpu_power, count, gpu_flag;
|
int gpu_power, count;
|
||||||
|
int unified_memory; // non zero = Integrated GPU
|
||||||
size_t data_size, alloc_max;
|
size_t data_size, alloc_max;
|
||||||
//FILE *fp;
|
//FILE *fp;
|
||||||
HRSRC res;
|
HRSRC res;
|
||||||
@@ -136,9 +141,10 @@ int init_OpenCL(int unit_size, int *src_max)
|
|||||||
API_clReleaseProgram fn_clReleaseProgram;
|
API_clReleaseProgram fn_clReleaseProgram;
|
||||||
API_clCreateKernel fn_clCreateKernel;
|
API_clCreateKernel fn_clCreateKernel;
|
||||||
API_clGetKernelWorkGroupInfo fn_clGetKernelWorkGroupInfo;
|
API_clGetKernelWorkGroupInfo fn_clGetKernelWorkGroupInfo;
|
||||||
|
API_clReleaseKernel fn_clReleaseKernel;
|
||||||
cl_int ret;
|
cl_int ret;
|
||||||
cl_uint num_platforms = 0, num_devices = 0, num_groups, param_value;
|
cl_uint num_platforms = 0, num_devices = 0, num_groups, param_value;
|
||||||
cl_ulong param_value8, param_value4;
|
cl_ulong param_value8;
|
||||||
cl_platform_id platform_id[MAX_DEVICE], selected_platform; // Intel, AMD, Nvidia などドライバーの提供元
|
cl_platform_id platform_id[MAX_DEVICE], selected_platform; // Intel, AMD, Nvidia などドライバーの提供元
|
||||||
cl_device_id device_id[MAX_DEVICE], selected_device; // CPU や GPU など
|
cl_device_id device_id[MAX_DEVICE], selected_device; // CPU や GPU など
|
||||||
cl_program program;
|
cl_program program;
|
||||||
@@ -215,6 +221,9 @@ int init_OpenCL(int unit_size, int *src_max)
|
|||||||
fn_clGetKernelWorkGroupInfo = (API_clGetKernelWorkGroupInfo)GetProcAddress(hLibOpenCL, "clGetKernelWorkGroupInfo");
|
fn_clGetKernelWorkGroupInfo = (API_clGetKernelWorkGroupInfo)GetProcAddress(hLibOpenCL, "clGetKernelWorkGroupInfo");
|
||||||
if (fn_clGetKernelWorkGroupInfo == NULL)
|
if (fn_clGetKernelWorkGroupInfo == NULL)
|
||||||
return err;
|
return err;
|
||||||
|
fn_clReleaseKernel = (API_clReleaseKernel)GetProcAddress(hLibOpenCL, "clReleaseKernel");
|
||||||
|
if (fn_clReleaseKernel == NULL)
|
||||||
|
return err;
|
||||||
gfn_clFinish = (API_clFinish)GetProcAddress(hLibOpenCL, "clFinish");
|
gfn_clFinish = (API_clFinish)GetProcAddress(hLibOpenCL, "clFinish");
|
||||||
if (gfn_clFinish == NULL)
|
if (gfn_clFinish == NULL)
|
||||||
return err;
|
return err;
|
||||||
@@ -226,12 +235,10 @@ int init_OpenCL(int unit_size, int *src_max)
|
|||||||
ret = fn_clGetPlatformIDs(MAX_DEVICE, platform_id, &num_platforms);
|
ret = fn_clGetPlatformIDs(MAX_DEVICE, platform_id, &num_platforms);
|
||||||
if (ret != CL_SUCCESS)
|
if (ret != CL_SUCCESS)
|
||||||
return (ret << 8) | 10;
|
return (ret << 8) | 10;
|
||||||
if (OpenCL_method >= 0){ // 選択する順序と初期値を変える
|
if (OpenCL_method & 0x200){ // 選択する順序と初期値を変える
|
||||||
OpenCL_method = 1;
|
|
||||||
gpu_power = 0;
|
|
||||||
} else {
|
|
||||||
OpenCL_method = -1;
|
|
||||||
gpu_power = INT_MIN;
|
gpu_power = INT_MIN;
|
||||||
|
} else {
|
||||||
|
gpu_power = 0;
|
||||||
}
|
}
|
||||||
alloc_max = 0;
|
alloc_max = 0;
|
||||||
|
|
||||||
@@ -268,20 +275,17 @@ int init_OpenCL(int unit_size, int *src_max)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// 取得できなくてもエラーにしない
|
// 取得できなくてもエラーにしない
|
||||||
param_value = 0; // CL_DEVICE_HOST_UNIFIED_MEMORY は OpenCL 2.0 以降で非推奨になった
|
|
||||||
ret = fn_clGetDeviceInfo(device_id[j], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(cl_uint), ¶m_value, NULL);
|
ret = fn_clGetDeviceInfo(device_id[j], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(cl_uint), ¶m_value, NULL);
|
||||||
|
if (ret == CL_SUCCESS){
|
||||||
|
if (param_value != 0){
|
||||||
#ifdef DEBUG_OUTPUT
|
#ifdef DEBUG_OUTPUT
|
||||||
if (ret == CL_SUCCESS)
|
|
||||||
printf("HOST_UNIFIED_MEMORY = %d\n", param_value);
|
printf("HOST_UNIFIED_MEMORY = %d\n", param_value);
|
||||||
#endif
|
#endif
|
||||||
if (param_value != 0)
|
|
||||||
param_value = 1;
|
param_value = 1;
|
||||||
param_value4 = 0; // local memory が多い時だけ処理を変える
|
}
|
||||||
ret = fn_clGetDeviceInfo(device_id[j], CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), ¶m_value4, NULL);
|
} else { // CL_DEVICE_HOST_UNIFIED_MEMORY は OpenCL 2.0 以降で非推奨になった
|
||||||
#ifdef DEBUG_OUTPUT
|
param_value = 0;
|
||||||
if (ret == CL_SUCCESS)
|
}
|
||||||
printf("LOCAL_MEM_SIZE = %I64d KB\n", param_value4 >> 10);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// 取得できない場合はエラーにする
|
// 取得できない場合はエラーにする
|
||||||
ret = fn_clGetDeviceInfo(device_id[j], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), ¶m_value8, NULL);
|
ret = fn_clGetDeviceInfo(device_id[j], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), ¶m_value8, NULL);
|
||||||
@@ -302,7 +306,8 @@ int init_OpenCL(int unit_size, int *src_max)
|
|||||||
#endif
|
#endif
|
||||||
// MAX_COMPUTE_UNITS * MAX_WORK_GROUP_SIZE で計算力を測る、外付けGPUなら値を倍にする
|
// MAX_COMPUTE_UNITS * MAX_WORK_GROUP_SIZE で計算力を測る、外付けGPUなら値を倍にする
|
||||||
count = (2 - param_value) * (int)data_size * num_groups;
|
count = (2 - param_value) * (int)data_size * num_groups;
|
||||||
count *= OpenCL_method; // 符号を変える
|
if (OpenCL_method & 0x200) // Prefer slower device
|
||||||
|
count *= -1; // 符号を変える
|
||||||
//printf("prev = %d, now = %d\n", gpu_power, count);
|
//printf("prev = %d, now = %d\n", gpu_power, count);
|
||||||
if ((count > gpu_power) && (data_size >= 256) && // 256以上ないとテーブルを作れない
|
if ((count > gpu_power) && (data_size >= 256) && // 256以上ないとテーブルを作れない
|
||||||
(param_value8 / 8 > (cl_ulong)unit_size)){ // CL_DEVICE_MAX_MEM_ALLOC_SIZE に収まるか
|
(param_value8 / 8 > (cl_ulong)unit_size)){ // CL_DEVICE_MAX_MEM_ALLOC_SIZE に収まるか
|
||||||
@@ -311,9 +316,7 @@ int init_OpenCL(int unit_size, int *src_max)
|
|||||||
selected_platform = platform_id[i];
|
selected_platform = platform_id[i];
|
||||||
OpenCL_group_num = num_groups; // ワークグループ数は COMPUTE_UNITS 数にする
|
OpenCL_group_num = num_groups; // ワークグループ数は COMPUTE_UNITS 数にする
|
||||||
alloc_max = (size_t)param_value8;
|
alloc_max = (size_t)param_value8;
|
||||||
gpu_flag = param_value; // 0 = discrete GPU, 1 = integrated GPU
|
unified_memory = param_value; // 0 = discrete GPU, 1 = integrated GPU
|
||||||
if (param_value4 >= 32768)
|
|
||||||
gpu_flag |= 2; // local memory が 32KB 以上あるかどうか
|
|
||||||
|
|
||||||
// AMD や Intel の GPU ではメモリー領域が全体の 1/4 とは限らない
|
// AMD や Intel の GPU ではメモリー領域が全体の 1/4 とは限らない
|
||||||
ret = fn_clGetDeviceInfo(device_id[j], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), ¶m_value8, NULL);
|
ret = fn_clGetDeviceInfo(device_id[j], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), ¶m_value8, NULL);
|
||||||
@@ -355,49 +358,6 @@ int init_OpenCL(int unit_size, int *src_max)
|
|||||||
if (ret != CL_SUCCESS)
|
if (ret != CL_SUCCESS)
|
||||||
return (ret << 8) | 12;
|
return (ret << 8) | 12;
|
||||||
|
|
||||||
// 計算方式を選択する
|
|
||||||
if ((((cpu_flag & 0x101) == 1) || ((cpu_flag & 0x110) == 0x10)) && (sse_unit == 32)){
|
|
||||||
if (gpu_flag & 2){
|
|
||||||
OpenCL_method = 3; // local memory が 32KB 以上あれば 16-byte ずつアクセスする
|
|
||||||
} else {
|
|
||||||
OpenCL_method = 2; // SSSE3 & ALTMAP または AVX2 ならデータの並び替え対応版を使う
|
|
||||||
}
|
|
||||||
} else if (((cpu_flag & 128) != 0) && (sse_unit == 256)){
|
|
||||||
OpenCL_method = 4; // JIT(SSE2) は bit ごとに上位から 16バイトずつ並ぶ
|
|
||||||
// ローカルのテーブルサイズが異なることに注意
|
|
||||||
// XOR 方式以外は 2KB (4バイト * 256項目 * 2個) 使う
|
|
||||||
// XOR (JIT) は 64バイト (4バイト * 16項目) 使う
|
|
||||||
} else {
|
|
||||||
OpenCL_method = 1; // 並び替えられてないデータ用
|
|
||||||
}
|
|
||||||
|
|
||||||
// work group 数が必要以上に多い場合は減らす
|
|
||||||
if (OpenCL_method == 2){
|
|
||||||
// work item 一個が 8バイトずつ計算する、256個なら work group ごとに 2KB 担当する
|
|
||||||
data_size = unit_size / 2048;
|
|
||||||
} else if (OpenCL_method == 3){
|
|
||||||
// work item 一個が 32バイトずつ計算する、256個なら work group ごとに 8KB 担当する
|
|
||||||
data_size = unit_size / 8192;
|
|
||||||
} else {
|
|
||||||
// work item 一個が 4バイトずつ計算する、256個なら work group ごとに 1KB 担当する
|
|
||||||
data_size = unit_size / 1024;
|
|
||||||
}
|
|
||||||
if (OpenCL_group_num > data_size){
|
|
||||||
OpenCL_group_num = data_size;
|
|
||||||
printf("Number of work groups is reduced to %zd\n", OpenCL_group_num);
|
|
||||||
}
|
|
||||||
|
|
||||||
// データへのアクセス方法をデバイスによって変える
|
|
||||||
if (gpu_flag & 1){
|
|
||||||
OpenCL_method |= 8; // Integrated GPU なら CL_MEM_USE_HOST_PTR を使う
|
|
||||||
} else { // Discrete GPU なら NVIDIA のだけ flag を変える
|
|
||||||
ret = fn_clGetDeviceInfo(selected_device, CL_DEVICE_VERSION, sizeof(buf), buf, NULL);
|
|
||||||
if (ret == CL_SUCCESS){
|
|
||||||
if (strstr(buf, "CUDA") != NULL)
|
|
||||||
OpenCL_method |= 8; // NVIDIA GPU なら CL_MEM_USE_HOST_PTR を使う
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 最大で何ブロック分のメモリー領域を保持できるのか(ここではまだ確保しない)
|
// 最大で何ブロック分のメモリー領域を保持できるのか(ここではまだ確保しない)
|
||||||
// 後で実際に確保する量はこれよりも少なくなる
|
// 後で実際に確保する量はこれよりも少なくなる
|
||||||
count = (int)(alloc_max / unit_size); // 確保できるメモリー量から逆算する
|
count = (int)(alloc_max / unit_size); // 確保できるメモリー量から逆算する
|
||||||
@@ -409,25 +369,6 @@ int init_OpenCL(int unit_size, int *src_max)
|
|||||||
printf("src buf : %zd KB (%d blocks), possible\n", data_size >> 10, count);
|
printf("src buf : %zd KB (%d blocks), possible\n", data_size >> 10, count);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// 出力先は1ブロック分だけあればいい
|
|
||||||
// CL_MEM_ALLOC_HOST_PTRを使えばpinned memoryになるらしい
|
|
||||||
data_size = unit_size;
|
|
||||||
OpenCL_dst = gfn_clCreateBuffer(OpenCL_context, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, data_size, NULL, &ret);
|
|
||||||
if (ret != CL_SUCCESS)
|
|
||||||
return (ret << 8) | 13;
|
|
||||||
#ifdef DEBUG_OUTPUT
|
|
||||||
printf("dst buf : %zd KB (%zd Bytes), OK\n", data_size >> 10, data_size);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// factor は最大個数分 (src_max個)
|
|
||||||
data_size = sizeof(unsigned short) * (*src_max);
|
|
||||||
OpenCL_buf = gfn_clCreateBuffer(OpenCL_context, CL_MEM_READ_ONLY, data_size, NULL, &ret);
|
|
||||||
if (ret != CL_SUCCESS)
|
|
||||||
return (ret << 8) | 14;
|
|
||||||
#ifdef DEBUG_OUTPUT
|
|
||||||
printf("factor buf : %zd Bytes (%d factors), OK\n", data_size, (*src_max));
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
// テキスト形式の OpenCL C ソース・コードを読み込む
|
// テキスト形式の OpenCL C ソース・コードを読み込む
|
||||||
err = 4;
|
err = 4;
|
||||||
@@ -528,17 +469,207 @@ int init_OpenCL(int unit_size, int *src_max)
|
|||||||
return (ret << 8) | 21;
|
return (ret << 8) | 21;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 計算方式を選択する
|
||||||
|
if ((((cpu_flag & 0x101) == 1) || ((cpu_flag & 0x110) == 0x10)) && (sse_unit == 32)){
|
||||||
|
int select_method; // SSSE3 & ALTMAP または AVX2 ならデータの並び替え対応版を使う
|
||||||
|
if (OpenCL_method & 0x80000){ // 16-byte and 2 blocks
|
||||||
|
select_method = 12;
|
||||||
|
} else if (OpenCL_method & 0x40000){ // 4-byte and 2 blocks
|
||||||
|
select_method = 10;
|
||||||
|
} else if (OpenCL_method & 0x20000){ // 16-byte
|
||||||
|
select_method = 4;
|
||||||
|
} else if (OpenCL_method & 0x10000){ // 4-byte
|
||||||
|
select_method = 2;
|
||||||
|
} else { // kernel を作って詳細を確かめる
|
||||||
|
size_t item2, item4, item10, item12;
|
||||||
|
cl_kernel kernel2, kernel4, kernel10, kernel12;
|
||||||
|
item2 = item4 = item10 = item12 = 0;
|
||||||
|
// まずは一番重くて速い奴を調べる
|
||||||
|
wsprintfA(buf, "method%d", 12);
|
||||||
|
kernel12 = fn_clCreateKernel(program, buf, &ret);
|
||||||
|
if (ret == CL_SUCCESS){
|
||||||
|
ret = fn_clGetKernelWorkGroupInfo(kernel12, selected_device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &item12, NULL);
|
||||||
|
if (ret == CL_SUCCESS){
|
||||||
|
#ifdef DEBUG_OUTPUT
|
||||||
|
printf("\nTesting %s\n", buf);
|
||||||
|
printf("PREFERRED_WORK_GROUP_SIZE_MULTIPLE = %zu\n", item12);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (item12 >= 32){ // 32以上あれば余裕で動くとみなす
|
||||||
|
select_method = 12;
|
||||||
|
OpenCL_kernel = kernel12;
|
||||||
|
#ifdef DEBUG_OUTPUT
|
||||||
|
printf("\nSelected method%d\n", select_method);
|
||||||
|
#endif
|
||||||
|
} else { // 他の奴と比較する
|
||||||
|
wsprintfA(buf, "method%d", 2);
|
||||||
|
kernel2 = fn_clCreateKernel(program, buf, &ret);
|
||||||
|
if (ret == CL_SUCCESS){
|
||||||
|
ret = fn_clGetKernelWorkGroupInfo(kernel2, selected_device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &item2, NULL);
|
||||||
|
if (ret == CL_SUCCESS){
|
||||||
|
#ifdef DEBUG_OUTPUT
|
||||||
|
printf("\nTesting %s\n", buf);
|
||||||
|
printf("PREFERRED_WORK_GROUP_SIZE_MULTIPLE = %zu\n", item2);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (item12 >= item2){
|
||||||
|
select_method = 12;
|
||||||
|
OpenCL_kernel = kernel12;
|
||||||
|
ret = fn_clReleaseKernel(kernel2);
|
||||||
|
#ifdef DEBUG_OUTPUT
|
||||||
|
if (ret != CL_SUCCESS)
|
||||||
|
printf("clReleaseKernel : Failed\n");
|
||||||
|
printf("\nSelected method%d\n", select_method);
|
||||||
|
#endif
|
||||||
|
} else {
|
||||||
|
ret = fn_clReleaseKernel(kernel12);
|
||||||
|
#ifdef DEBUG_OUTPUT
|
||||||
|
if (ret != CL_SUCCESS)
|
||||||
|
printf("clReleaseKernel : Failed\n");
|
||||||
|
#endif
|
||||||
|
wsprintfA(buf, "method%d", 10);
|
||||||
|
kernel10 = fn_clCreateKernel(program, buf, &ret);
|
||||||
|
if (ret == CL_SUCCESS){
|
||||||
|
ret = fn_clGetKernelWorkGroupInfo(kernel10, selected_device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &item10, NULL);
|
||||||
|
if (ret == CL_SUCCESS){
|
||||||
|
#ifdef DEBUG_OUTPUT
|
||||||
|
printf("\nTesting %s\n", buf);
|
||||||
|
printf("PREFERRED_WORK_GROUP_SIZE_MULTIPLE = %zu\n", item10);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (item10 >= item2){
|
||||||
|
select_method = 10;
|
||||||
|
OpenCL_kernel = kernel10;
|
||||||
|
ret = fn_clReleaseKernel(kernel2);
|
||||||
|
#ifdef DEBUG_OUTPUT
|
||||||
|
if (ret != CL_SUCCESS)
|
||||||
|
printf("clReleaseKernel : Failed\n");
|
||||||
|
printf("\nSelected method%d\n", select_method);
|
||||||
|
#endif
|
||||||
|
} else {
|
||||||
|
wsprintfA(buf, "method%d", 4);
|
||||||
|
kernel4 = fn_clCreateKernel(program, buf, &ret);
|
||||||
|
if (ret == CL_SUCCESS){
|
||||||
|
ret = fn_clGetKernelWorkGroupInfo(kernel4, selected_device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &item4, NULL);
|
||||||
|
if (ret == CL_SUCCESS){
|
||||||
|
#ifdef DEBUG_OUTPUT
|
||||||
|
printf("\nTesting %s\n", buf);
|
||||||
|
printf("PREFERRED_WORK_GROUP_SIZE_MULTIPLE = %zu\n", item4);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (item4 >= item2){
|
||||||
|
select_method = 4;
|
||||||
|
OpenCL_kernel = kernel4;
|
||||||
|
ret = fn_clReleaseKernel(kernel2);
|
||||||
|
#ifdef DEBUG_OUTPUT
|
||||||
|
if (ret != CL_SUCCESS)
|
||||||
|
printf("clReleaseKernel : Failed\n");
|
||||||
|
printf("\nSelected method%d\n", select_method);
|
||||||
|
#endif
|
||||||
|
} else {
|
||||||
|
select_method = 2;
|
||||||
|
OpenCL_kernel = kernel2;
|
||||||
|
ret = fn_clReleaseKernel(kernel4);
|
||||||
|
#ifdef DEBUG_OUTPUT
|
||||||
|
if (ret != CL_SUCCESS)
|
||||||
|
printf("clReleaseKernel : Failed\n");
|
||||||
|
printf("\nSelected method%d\n", select_method);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
OpenCL_method |= select_method;
|
||||||
|
} else if (((cpu_flag & 128) != 0) && (sse_unit == 256)){
|
||||||
|
OpenCL_method |= 16; // JIT(SSE2) は bit ごとに上位から 16バイトずつ並ぶ
|
||||||
|
// ローカルのテーブルサイズが異なることに注意
|
||||||
|
// XOR 方式以外は 2KB (4バイト * 256項目 * 2個) 使う
|
||||||
|
// XOR (JIT) は 64バイト (4バイト * 16項目) 使う
|
||||||
|
} else {
|
||||||
|
int select_method; // 並び替えられてないデータ用
|
||||||
|
if (OpenCL_method & 0x40000){ // 4-byte and 2 blocks
|
||||||
|
select_method = 9;
|
||||||
|
} else if (OpenCL_method & 0x10000){ // 4-byte
|
||||||
|
select_method = 1;
|
||||||
|
} else { // kernel を作って詳細を確かめる
|
||||||
|
size_t item1, item9;
|
||||||
|
cl_kernel kernel1, kernel9;
|
||||||
|
item1 = item9 = 0;
|
||||||
|
// まずは一番重くて速い奴を調べる
|
||||||
|
wsprintfA(buf, "method%d", 9);
|
||||||
|
kernel9 = fn_clCreateKernel(program, buf, &ret);
|
||||||
|
if (ret == CL_SUCCESS){
|
||||||
|
ret = fn_clGetKernelWorkGroupInfo(kernel9, selected_device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &item9, NULL);
|
||||||
|
if (ret == CL_SUCCESS){
|
||||||
|
#ifdef DEBUG_OUTPUT
|
||||||
|
printf("\nTesting %s\n", buf);
|
||||||
|
printf("PREFERRED_WORK_GROUP_SIZE_MULTIPLE = %zu\n", item9);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (item9 >= 32){ // 32以上あれば余裕で動くとみなす
|
||||||
|
select_method = 9;
|
||||||
|
OpenCL_kernel = kernel9;
|
||||||
|
#ifdef DEBUG_OUTPUT
|
||||||
|
printf("\nSelected method%d\n", select_method);
|
||||||
|
#endif
|
||||||
|
} else { // 他の奴と比較する
|
||||||
|
wsprintfA(buf, "method%d", 1);
|
||||||
|
kernel1 = fn_clCreateKernel(program, buf, &ret);
|
||||||
|
if (ret == CL_SUCCESS){
|
||||||
|
ret = fn_clGetKernelWorkGroupInfo(kernel1, selected_device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &item1, NULL);
|
||||||
|
if (ret == CL_SUCCESS){
|
||||||
|
#ifdef DEBUG_OUTPUT
|
||||||
|
printf("\nTesting %s\n", buf);
|
||||||
|
printf("PREFERRED_WORK_GROUP_SIZE_MULTIPLE = %zu\n", item1);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (item9 >= item1){
|
||||||
|
select_method = 9;
|
||||||
|
OpenCL_kernel = kernel9;
|
||||||
|
ret = fn_clReleaseKernel(kernel1);
|
||||||
|
#ifdef DEBUG_OUTPUT
|
||||||
|
if (ret != CL_SUCCESS)
|
||||||
|
printf("clReleaseKernel : Failed\n");
|
||||||
|
printf("\nSelected method%d\n", select_method);
|
||||||
|
#endif
|
||||||
|
} else {
|
||||||
|
select_method = 1;
|
||||||
|
OpenCL_kernel = kernel1;
|
||||||
|
ret = fn_clReleaseKernel(kernel9);
|
||||||
|
#ifdef DEBUG_OUTPUT
|
||||||
|
if (ret != CL_SUCCESS)
|
||||||
|
printf("clReleaseKernel : Failed\n");
|
||||||
|
printf("\nSelected method%d\n", select_method);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
OpenCL_method |= select_method;
|
||||||
|
}
|
||||||
|
|
||||||
// カーネル関数を抽出する
|
// カーネル関数を抽出する
|
||||||
wsprintfA(buf, "method%d", OpenCL_method & 7);
|
if (OpenCL_kernel == NULL){
|
||||||
|
wsprintfA(buf, "method%d", OpenCL_method & 31);
|
||||||
OpenCL_kernel = fn_clCreateKernel(program, buf, &ret);
|
OpenCL_kernel = fn_clCreateKernel(program, buf, &ret);
|
||||||
if (ret != CL_SUCCESS)
|
if (ret != CL_SUCCESS)
|
||||||
return (ret << 8) | 22;
|
return (ret << 8) | 22;
|
||||||
#ifdef DEBUG_OUTPUT
|
#ifdef DEBUG_OUTPUT
|
||||||
printf("CreateKernel : %s\n", buf);
|
printf("CreateKernel : %s\n", buf);
|
||||||
|
ret = fn_clGetKernelWorkGroupInfo(OpenCL_kernel, selected_device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &data_size, NULL);
|
||||||
|
if (ret == CL_SUCCESS)
|
||||||
|
printf("PREFERRED_WORK_GROUP_SIZE_MULTIPLE = %zu\n", data_size);
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
// カーネルが実行できる work item 数を調べる
|
// カーネルが実行できる work item 数を調べる
|
||||||
ret = fn_clGetKernelWorkGroupInfo(OpenCL_kernel, NULL, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &data_size, NULL);
|
ret = fn_clGetKernelWorkGroupInfo(OpenCL_kernel, selected_device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &data_size, NULL);
|
||||||
if ((ret == CL_SUCCESS) && (data_size < 256)){ // 最低でも 256 以上は必要
|
if ((ret == CL_SUCCESS) && (data_size < 256)){ // 最低でも 256 以上は必要
|
||||||
#ifdef DEBUG_OUTPUT
|
#ifdef DEBUG_OUTPUT
|
||||||
printf("KERNEL_WORK_GROUP_SIZE = %zd\n", data_size);
|
printf("KERNEL_WORK_GROUP_SIZE = %zd\n", data_size);
|
||||||
@@ -558,6 +689,60 @@ int init_OpenCL(int unit_size, int *src_max)
|
|||||||
fn_clUnloadCompiler();
|
fn_clUnloadCompiler();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// work group 数が必要以上に多い場合は減らす
|
||||||
|
if (OpenCL_method & 4){
|
||||||
|
// work item 一個が 32バイトずつ計算する、256個なら work group ごとに 8KB 担当する
|
||||||
|
data_size = unit_size / 8192;
|
||||||
|
} else if (OpenCL_method & 2){
|
||||||
|
// work item 一個が 8バイトずつ計算する、256個なら work group ごとに 2KB 担当する
|
||||||
|
data_size = unit_size / 2048;
|
||||||
|
} else {
|
||||||
|
// work item 一個が 4バイトずつ計算する、256個なら work group ごとに 1KB 担当する
|
||||||
|
data_size = unit_size / 1024;
|
||||||
|
}
|
||||||
|
if (OpenCL_group_num > data_size){
|
||||||
|
OpenCL_group_num = data_size;
|
||||||
|
printf("Number of work groups is reduced to %zd\n", OpenCL_group_num);
|
||||||
|
}
|
||||||
|
|
||||||
|
// データへのアクセス方法をデバイスによって変える
|
||||||
|
if (OpenCL_method & 0x200000){
|
||||||
|
OpenCL_method |= 32;
|
||||||
|
} else if ((OpenCL_method & 0x100000) == 0){
|
||||||
|
if (unified_memory){
|
||||||
|
OpenCL_method |= 32; // Integrated GPU なら CL_MEM_USE_HOST_PTR を使う
|
||||||
|
} else { // Discrete GPU でも Nvidia のは動作を変える
|
||||||
|
ret = fn_clGetDeviceInfo(selected_device, CL_DEVICE_VERSION, sizeof(buf), buf, NULL);
|
||||||
|
if (ret == CL_SUCCESS){
|
||||||
|
if (strstr(buf, "CUDA") != NULL)
|
||||||
|
OpenCL_method |= 32; // NVIDIA GPU なら CL_MEM_USE_HOST_PTR を使う
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 出力先は1ブロック分だけあればいい
|
||||||
|
// CL_MEM_ALLOC_HOST_PTRを使えばpinned memoryになるらしい
|
||||||
|
data_size = unit_size;
|
||||||
|
if (OpenCL_method & 8)
|
||||||
|
data_size *= 2; // 2ブロックずつ計算できるように、2倍確保しておく
|
||||||
|
OpenCL_dst = gfn_clCreateBuffer(OpenCL_context, CL_MEM_ALLOC_HOST_PTR, data_size, NULL, &ret);
|
||||||
|
if (ret != CL_SUCCESS)
|
||||||
|
return (ret << 8) | 13;
|
||||||
|
#ifdef DEBUG_OUTPUT
|
||||||
|
printf("dst buf : %zd KB (%zd Bytes), OK\n", data_size >> 10, data_size);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// factor は最大個数分 (src_max個)
|
||||||
|
data_size = sizeof(unsigned short) * (*src_max);
|
||||||
|
if (OpenCL_method & 8)
|
||||||
|
data_size *= 2; // 2ブロックずつ計算できるように、2倍確保しておく
|
||||||
|
OpenCL_buf = gfn_clCreateBuffer(OpenCL_context, CL_MEM_READ_ONLY, data_size, NULL, &ret);
|
||||||
|
if (ret != CL_SUCCESS)
|
||||||
|
return (ret << 8) | 14;
|
||||||
|
#ifdef DEBUG_OUTPUT
|
||||||
|
printf("factor buf : %zd Bytes (%d factors), OK\n", data_size, (*src_max));
|
||||||
|
#endif
|
||||||
|
|
||||||
// カーネル引数を指定する
|
// カーネル引数を指定する
|
||||||
ret = gfn_clSetKernelArg(OpenCL_kernel, 1, sizeof(cl_mem), &OpenCL_dst);
|
ret = gfn_clSetKernelArg(OpenCL_kernel, 1, sizeof(cl_mem), &OpenCL_dst);
|
||||||
if (ret != CL_SUCCESS)
|
if (ret != CL_SUCCESS)
|
||||||
@@ -565,13 +750,12 @@ int init_OpenCL(int unit_size, int *src_max)
|
|||||||
ret = gfn_clSetKernelArg(OpenCL_kernel, 2, sizeof(cl_mem), &OpenCL_buf);
|
ret = gfn_clSetKernelArg(OpenCL_kernel, 2, sizeof(cl_mem), &OpenCL_buf);
|
||||||
if (ret != CL_SUCCESS)
|
if (ret != CL_SUCCESS)
|
||||||
return (ret << 8) | 102;
|
return (ret << 8) | 102;
|
||||||
if (ret != CL_SUCCESS)
|
|
||||||
return (ret << 8) | 103;
|
|
||||||
|
|
||||||
#ifdef DEBUG_OUTPUT
|
#ifdef DEBUG_OUTPUT
|
||||||
// ワークアイテム数
|
// ワークアイテム数
|
||||||
printf("\nMax number of work items = %zd (256 * %zd)\n", OpenCL_group_num * 256, OpenCL_group_num);
|
printf("\nMax number of work items = %zd (256 * %zd)\n", OpenCL_group_num * 256, OpenCL_group_num);
|
||||||
#endif
|
#endif
|
||||||
|
OpenCL_method &= 0xFF; // 最後に選択設定を消去する
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -683,7 +867,7 @@ void info_OpenCL(char *buf, int buf_size)
|
|||||||
// ソース・ブロックをデバイス側にコピーする
|
// ソース・ブロックをデバイス側にコピーする
|
||||||
int gpu_copy_blocks(
|
int gpu_copy_blocks(
|
||||||
unsigned char *data, // ブロックのバッファー (境界は 4096にすること)
|
unsigned char *data, // ブロックのバッファー (境界は 4096にすること)
|
||||||
int unit_size, // 4096の倍数にすること
|
unsigned int unit_size, // 4096の倍数にすること
|
||||||
int src_num) // 何ブロックをコピーするのか
|
int src_num) // 何ブロックをコピーするのか
|
||||||
{
|
{
|
||||||
size_t data_size;
|
size_t data_size;
|
||||||
@@ -692,7 +876,7 @@ int gpu_copy_blocks(
|
|||||||
|
|
||||||
// Integrated GPU と Discrete GPU の違いに関係なく、使う分だけ毎回メモリー領域を確保する
|
// Integrated GPU と Discrete GPU の違いに関係なく、使う分だけ毎回メモリー領域を確保する
|
||||||
data_size = (size_t)unit_size * src_num;
|
data_size = (size_t)unit_size * src_num;
|
||||||
if (OpenCL_method & 8){ // AMD's APU や Integrated GPU なら ZeroCopy する
|
if (OpenCL_method & 32){ // AMD's APU や Integrated GPU なら ZeroCopy する
|
||||||
// 実際に比較してみると GeForce GPU でもメモリー消費量が少なくてコピーが速い
|
// 実際に比較してみると GeForce GPU でもメモリー消費量が少なくてコピーが速い
|
||||||
// NVIDIA GPU は CL_MEM_USE_HOST_PTR でも VRAM 上にキャッシュするので速いらしい
|
// NVIDIA GPU は CL_MEM_USE_HOST_PTR でも VRAM 上にキャッシュするので速いらしい
|
||||||
flags = CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR;
|
flags = CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR;
|
||||||
@@ -719,17 +903,31 @@ int gpu_copy_blocks(
|
|||||||
int gpu_multiply_blocks(
|
int gpu_multiply_blocks(
|
||||||
int src_num, // Number of multiplying source blocks
|
int src_num, // Number of multiplying source blocks
|
||||||
unsigned short *mat, // Matrix of numbers to multiply by
|
unsigned short *mat, // Matrix of numbers to multiply by
|
||||||
|
unsigned short *mat2, // Set to calculate 2 blocks at once
|
||||||
unsigned char *buf, // Products go here
|
unsigned char *buf, // Products go here
|
||||||
int len) // Byte length
|
unsigned int len) // Byte length
|
||||||
{
|
{
|
||||||
unsigned __int64 *vram, *src, *dst;
|
unsigned __int64 *vram, *src, *dst;
|
||||||
size_t global_size, local_size;
|
size_t global_size, local_size;
|
||||||
cl_int ret;
|
cl_int ret;
|
||||||
|
|
||||||
// 倍率の配列をデバイス側に書き込む
|
// 倍率の配列をデバイス側に書き込む
|
||||||
|
if (mat2 == NULL){ // 1ブロック分だけコピーする
|
||||||
ret = gfn_clEnqueueWriteBuffer(OpenCL_command, OpenCL_buf, CL_FALSE, 0, sizeof(short) * src_num, mat, 0, NULL, NULL);
|
ret = gfn_clEnqueueWriteBuffer(OpenCL_command, OpenCL_buf, CL_FALSE, 0, sizeof(short) * src_num, mat, 0, NULL, NULL);
|
||||||
|
} else { // 2ブロックずつ計算する場合は、配列のサイズも2倍になる
|
||||||
|
if ((size_t)mat2 == 1){ // アドレスが 1 になることはあり得ないので、識別できる
|
||||||
|
ret = gfn_clEnqueueWriteBuffer(OpenCL_command, OpenCL_buf, CL_FALSE, 0, sizeof(short) * src_num * 2, mat, 0, NULL, NULL);
|
||||||
|
} else { // 2回コピーする
|
||||||
|
size_t data_size = sizeof(short) * src_num;
|
||||||
|
ret = gfn_clEnqueueWriteBuffer(OpenCL_command, OpenCL_buf, CL_FALSE, 0, data_size, mat, 0, NULL, NULL);
|
||||||
if (ret != CL_SUCCESS)
|
if (ret != CL_SUCCESS)
|
||||||
return (ret << 8) | 10;
|
return (ret << 8) | 10;
|
||||||
|
// もう一つの配列は違う場所からコピーする
|
||||||
|
ret = gfn_clEnqueueWriteBuffer(OpenCL_command, OpenCL_buf, CL_FALSE, data_size, data_size, mat2, 0, NULL, NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ret != CL_SUCCESS)
|
||||||
|
return (ret << 8) | 11;
|
||||||
|
|
||||||
// 引数を指定する
|
// 引数を指定する
|
||||||
ret = gfn_clSetKernelArg(OpenCL_kernel, 3, sizeof(int), &src_num);
|
ret = gfn_clSetKernelArg(OpenCL_kernel, 3, sizeof(int), &src_num);
|
||||||
@@ -739,15 +937,15 @@ int gpu_multiply_blocks(
|
|||||||
// カーネル並列実行
|
// カーネル並列実行
|
||||||
local_size = 256; // テーブルやキャッシュのため、work item 数は 256 に固定する
|
local_size = 256; // テーブルやキャッシュのため、work item 数は 256 に固定する
|
||||||
global_size = OpenCL_group_num * 256;
|
global_size = OpenCL_group_num * 256;
|
||||||
//printf("group num = %d, global size = %d, local size = 256 \n", OpenCL_group_num, global_size);
|
//printf("group num = %d, global size = %d, local size = %d \n", OpenCL_group_num, global_size, local_size);
|
||||||
ret = gfn_clEnqueueNDRangeKernel(OpenCL_command, OpenCL_kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
|
ret = gfn_clEnqueueNDRangeKernel(OpenCL_command, OpenCL_kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
|
||||||
if (ret != CL_SUCCESS)
|
if (ret != CL_SUCCESS)
|
||||||
return (ret << 8) | 11;
|
return (ret << 8) | 12;
|
||||||
|
|
||||||
// 出力内容をホスト側に反映させる
|
// 出力内容をホスト側に反映させる
|
||||||
vram = gfn_clEnqueueMapBuffer(OpenCL_command, OpenCL_dst, CL_TRUE, CL_MAP_READ, 0, len, 0, NULL, NULL, &ret);
|
vram = gfn_clEnqueueMapBuffer(OpenCL_command, OpenCL_dst, CL_TRUE, CL_MAP_READ, 0, len, 0, NULL, NULL, &ret);
|
||||||
if (ret != CL_SUCCESS)
|
if (ret != CL_SUCCESS)
|
||||||
return (ret << 8) | 12;
|
return (ret << 8) | 13;
|
||||||
|
|
||||||
// 8バイトごとに XOR する (SSE2 で XOR しても速くならず)
|
// 8バイトごとに XOR する (SSE2 で XOR しても速くならず)
|
||||||
src = vram;
|
src = vram;
|
||||||
@@ -762,7 +960,7 @@ int gpu_multiply_blocks(
|
|||||||
// ホスト側でデータを変更しなくても、clEnqueueMapBufferと対で呼び出さないといけない
|
// ホスト側でデータを変更しなくても、clEnqueueMapBufferと対で呼び出さないといけない
|
||||||
ret = gfn_clEnqueueUnmapMemObject(OpenCL_command, OpenCL_dst, vram, 0, NULL, NULL);
|
ret = gfn_clEnqueueUnmapMemObject(OpenCL_command, OpenCL_dst, vram, 0, NULL, NULL);
|
||||||
if (ret != CL_SUCCESS)
|
if (ret != CL_SUCCESS)
|
||||||
return (ret << 8) | 13;
|
return (ret << 8) | 14;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -775,12 +973,12 @@ int gpu_finish(void)
|
|||||||
// 全ての処理が終わるのを待つ
|
// 全ての処理が終わるのを待つ
|
||||||
ret = gfn_clFinish(OpenCL_command);
|
ret = gfn_clFinish(OpenCL_command);
|
||||||
if (ret != CL_SUCCESS)
|
if (ret != CL_SUCCESS)
|
||||||
return (ret << 8) | 20;
|
return (ret << 8) | 30;
|
||||||
|
|
||||||
if (OpenCL_src != NULL){ // 確保されてる場合は解除する
|
if (OpenCL_src != NULL){ // 確保されてる場合は解除する
|
||||||
ret = gfn_clReleaseMemObject(OpenCL_src);
|
ret = gfn_clReleaseMemObject(OpenCL_src);
|
||||||
if (ret != CL_SUCCESS)
|
if (ret != CL_SUCCESS)
|
||||||
return (ret << 8) | 21;
|
return (ret << 8) | 31;
|
||||||
OpenCL_src = NULL;
|
OpenCL_src = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -10,20 +10,21 @@ extern "C" {
|
|||||||
|
|
||||||
extern int OpenCL_method;
|
extern int OpenCL_method;
|
||||||
|
|
||||||
int init_OpenCL(int unit_size, int *src_max);
|
int init_OpenCL(unsigned int unit_size, int *src_max);
|
||||||
int free_OpenCL(void);
|
int free_OpenCL(void);
|
||||||
void info_OpenCL(char *buf, int buf_size);
|
void info_OpenCL(char *buf, int buf_size);
|
||||||
|
|
||||||
int gpu_copy_blocks(
|
int gpu_copy_blocks(
|
||||||
unsigned char *data,
|
unsigned char *data,
|
||||||
int unit_size,
|
unsigned int unit_size,
|
||||||
int src_num);
|
int src_num);
|
||||||
|
|
||||||
int gpu_multiply_blocks(
|
int gpu_multiply_blocks(
|
||||||
int src_num, // Number of multiplying source blocks
|
int src_num, // Number of multiplying source blocks
|
||||||
unsigned short *mat, // Matrix of numbers to multiply by
|
unsigned short *mat, // Matrix of numbers to multiply by
|
||||||
|
unsigned short *mat2, // Set to calculate 2 blocks at once
|
||||||
unsigned char *buf, // Products go here
|
unsigned char *buf, // Products go here
|
||||||
int len); // Byte length
|
unsigned int len); // Byte length
|
||||||
|
|
||||||
int gpu_finish(void);
|
int gpu_finish(void);
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
// list.c
|
// list.c
|
||||||
// Copyright : 2023-10-15 Yutaka Sawada
|
// Copyright : 2023-12-12 Yutaka Sawada
|
||||||
// License : GPL
|
// License : GPL
|
||||||
|
|
||||||
#ifndef _UNICODE
|
#ifndef _UNICODE
|
||||||
@@ -26,6 +26,11 @@
|
|||||||
|
|
||||||
//#define TIMER // 実験用
|
//#define TIMER // 実験用
|
||||||
|
|
||||||
|
#ifdef TIMER
|
||||||
|
#include <time.h>
|
||||||
|
static double time_sec, time_speed;
|
||||||
|
#endif
|
||||||
|
|
||||||
// recovery set のファイルのハッシュ値を調べる (空のファイルは除く)
|
// recovery set のファイルのハッシュ値を調べる (空のファイルは除く)
|
||||||
// 0x00 = ファイルが存在して完全である
|
// 0x00 = ファイルが存在して完全である
|
||||||
// 0x01 = ファイルが存在しない
|
// 0x01 = ファイルが存在しない
|
||||||
@@ -296,7 +301,7 @@ int check_file_complete(
|
|||||||
{
|
{
|
||||||
int i, rv;
|
int i, rv;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
unsigned int time_start = GetTickCount();
|
clock_t time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
printf("\nVerifying Input File :\n");
|
printf("\nVerifying Input File :\n");
|
||||||
@@ -332,14 +337,14 @@ unsigned int time_start = GetTickCount();
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount() - time_start;
|
time_start = clock() - time_start;
|
||||||
printf("\n hash %d.%03d sec", time_start / 1000, time_start % 1000);
|
time_sec = (double)time_start / CLOCKS_PER_SEC;
|
||||||
if (time_start > 0){
|
if (time_sec > 0){
|
||||||
time_start = (int)((total_file_size * 125) / ((__int64)time_start * 131072));
|
time_speed = (double)total_file_size / (time_sec * 1048576);
|
||||||
printf(", %d MB/s\n", time_start);
|
|
||||||
} else {
|
} else {
|
||||||
printf("\n");
|
time_speed = 0;
|
||||||
}
|
}
|
||||||
|
printf("\n hash %.3f sec, %.0f MB/s\n", time_sec, time_speed);
|
||||||
#endif
|
#endif
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -364,7 +369,7 @@ int check_file_complete_multi(
|
|||||||
HANDLE hSub[MAX_READ_NUM];
|
HANDLE hSub[MAX_READ_NUM];
|
||||||
FILE_CHECK_TH th[MAX_READ_NUM];
|
FILE_CHECK_TH th[MAX_READ_NUM];
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
unsigned int time_start = GetTickCount();
|
clock_t time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
memset(hSub, 0, sizeof(HANDLE) * MAX_READ_NUM);
|
memset(hSub, 0, sizeof(HANDLE) * MAX_READ_NUM);
|
||||||
@@ -630,14 +635,14 @@ unsigned int time_start = GetTickCount();
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount() - time_start;
|
time_start = clock() - time_start;
|
||||||
printf("\n hash %d.%03d sec", time_start / 1000, time_start % 1000);
|
time_sec = (double)time_start / CLOCKS_PER_SEC;
|
||||||
if (time_start > 0){
|
if (time_sec > 0){
|
||||||
time_start = (int)((total_file_size * 125) / ((__int64)time_start * 131072));
|
time_speed = (double)total_file_size / (time_sec * 1048576);
|
||||||
printf(", %d MB/s\n", time_start);
|
|
||||||
} else {
|
} else {
|
||||||
printf("\n");
|
time_speed = 0;
|
||||||
}
|
}
|
||||||
|
printf("\n hash %.3f sec, %.0f MB/s\n", time_sec, time_speed);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
error_end:
|
error_end:
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
// md5_crc.c
|
// md5_crc.c
|
||||||
// Copyright : 2023-10-29 Yutaka Sawada
|
// Copyright : 2023-12-12 Yutaka Sawada
|
||||||
// License : GPL
|
// License : GPL
|
||||||
|
|
||||||
#ifndef _UNICODE
|
#ifndef _UNICODE
|
||||||
@@ -21,7 +21,6 @@
|
|||||||
#include "phmd5.h"
|
#include "phmd5.h"
|
||||||
#include "md5_crc.h"
|
#include "md5_crc.h"
|
||||||
|
|
||||||
|
|
||||||
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
||||||
|
|
||||||
// バイト配列の MD5 ハッシュ値を求める
|
// バイト配列の MD5 ハッシュ値を求める
|
||||||
@@ -200,8 +199,10 @@ int file_md5_crc32_block(
|
|||||||
//#define TIMER // 実験用
|
//#define TIMER // 実験用
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
static unsigned int time_start, time1_start;
|
#include <time.h>
|
||||||
static unsigned int time_total = 0, time2_total = 0, time3_total = 0;
|
static double time_sec, time_speed;
|
||||||
|
static clock_t time_start, time1_start;
|
||||||
|
static clock_t time_total = 0, time2_total = 0, time3_total = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define MAX_BUF_SIZE 2097152 // ヒープ領域を使う場合の最大サイズ
|
#define MAX_BUF_SIZE 2097152 // ヒープ領域を使う場合の最大サイズ
|
||||||
@@ -224,7 +225,7 @@ int file_hash_crc(
|
|||||||
HANDLE hFile;
|
HANDLE hFile;
|
||||||
OVERLAPPED ol;
|
OVERLAPPED ol;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time1_start = GetTickCount();
|
time1_start = clock();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// ソース・ファイルを開く
|
// ソース・ファイルを開く
|
||||||
@@ -251,11 +252,11 @@ time1_start = GetTickCount();
|
|||||||
if (file_left < IO_SIZE)
|
if (file_left < IO_SIZE)
|
||||||
read_size = (unsigned int)file_left;
|
read_size = (unsigned int)file_left;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
off = ReadFile(hFile, buf1, read_size, NULL, &ol);
|
off = ReadFile(hFile, buf1, read_size, NULL, &ol);
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time2_total += GetTickCount() - time_start;
|
time2_total += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
if ((off == 0) && (GetLastError() != ERROR_IO_PENDING)){
|
if ((off == 0) && (GetLastError() != ERROR_IO_PENDING)){
|
||||||
print_win32_err();
|
print_win32_err();
|
||||||
@@ -281,11 +282,11 @@ time2_total += GetTickCount() - time_start;
|
|||||||
ol.OffsetHigh = (unsigned int)(file_off >> 32);
|
ol.OffsetHigh = (unsigned int)(file_off >> 32);
|
||||||
file_off += IO_SIZE;
|
file_off += IO_SIZE;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
off = ReadFile(hFile, buf, read_size, NULL, &ol);
|
off = ReadFile(hFile, buf, read_size, NULL, &ol);
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time2_total += GetTickCount() - time_start;
|
time2_total += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
if ((off == 0) && (GetLastError() != ERROR_IO_PENDING)){
|
if ((off == 0) && (GetLastError() != ERROR_IO_PENDING)){
|
||||||
print_win32_err();
|
print_win32_err();
|
||||||
@@ -301,7 +302,7 @@ time2_total += GetTickCount() - time_start;
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
off = 0; // チェックサム計算
|
off = 0; // チェックサム計算
|
||||||
if (block_left > 0){ // 前回足りなかった分を追加する
|
if (block_left > 0){ // 前回足りなかった分を追加する
|
||||||
@@ -338,7 +339,7 @@ time_start = GetTickCount();
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time3_total += GetTickCount() - time_start;
|
time3_total += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// 経過表示
|
// 経過表示
|
||||||
@@ -369,16 +370,17 @@ error_end:
|
|||||||
CloseHandle(ol.hEvent);
|
CloseHandle(ol.hEvent);
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_total += GetTickCount() - time1_start;
|
time_total += clock() - time1_start;
|
||||||
if (*prog_now == total_file_size){
|
if (*prog_now == total_file_size){
|
||||||
printf("\nread %d.%03d sec\n", time2_total / 1000, time2_total % 1000);
|
printf("\nread %.3f sec\n", (double)time2_total / CLOCKS_PER_SEC);
|
||||||
printf("main %d.%03d sec\n", time3_total / 1000, time3_total % 1000);
|
printf("main %.3f sec\n", (double)time3_total / CLOCKS_PER_SEC);
|
||||||
if (time_total > 0){
|
time_sec = (double)time_total / CLOCKS_PER_SEC;
|
||||||
time_start = (int)((total_file_size * 125) / ((__int64)time_total * 131072));
|
if (time_sec > 0){
|
||||||
|
time_speed = (double)total_file_size / (time_sec * 1048576);
|
||||||
} else {
|
} else {
|
||||||
time_start = 0;
|
time_speed = 0;
|
||||||
}
|
}
|
||||||
printf("total %d.%03d sec, %d MB/s\n", time_total / 1000, time_total % 1000, time_start);
|
printf("total %.3f sec, %.0f MB/s\n", time_sec, time_speed);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
return err;
|
return err;
|
||||||
@@ -403,7 +405,7 @@ int file_hash_crc(
|
|||||||
HANDLE hFile;
|
HANDLE hFile;
|
||||||
OVERLAPPED ol;
|
OVERLAPPED ol;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time1_start = GetTickCount();
|
time1_start = clock();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// ソース・ファイルを開く
|
// ソース・ファイルを開く
|
||||||
@@ -442,11 +444,11 @@ error_retry_read:
|
|||||||
if (file_left < IO_SIZE)
|
if (file_left < IO_SIZE)
|
||||||
read_size = (unsigned int)file_left;
|
read_size = (unsigned int)file_left;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
off = ReadFile(hFile, buf1, read_size, NULL, &ol);
|
off = ReadFile(hFile, buf1, read_size, NULL, &ol);
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time2_total += GetTickCount() - time_start;
|
time2_total += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
if ((off == 0) && (GetLastError() != ERROR_IO_PENDING)){
|
if ((off == 0) && (GetLastError() != ERROR_IO_PENDING)){
|
||||||
print_win32_err();
|
print_win32_err();
|
||||||
@@ -536,11 +538,11 @@ error_retry_pause:
|
|||||||
ol.OffsetHigh = (unsigned int)(file_off >> 32);
|
ol.OffsetHigh = (unsigned int)(file_off >> 32);
|
||||||
file_off += IO_SIZE;
|
file_off += IO_SIZE;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
off = ReadFile(hFile, buf, read_size, NULL, &ol);
|
off = ReadFile(hFile, buf, read_size, NULL, &ol);
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time2_total += GetTickCount() - time_start;
|
time2_total += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
if ((off == 0) && (GetLastError() != ERROR_IO_PENDING)){
|
if ((off == 0) && (GetLastError() != ERROR_IO_PENDING)){
|
||||||
print_win32_err();
|
print_win32_err();
|
||||||
@@ -557,7 +559,7 @@ time2_total += GetTickCount() - time_start;
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
off = 0; // チェックサム計算
|
off = 0; // チェックサム計算
|
||||||
if (block_left > 0){ // 前回足りなかった分を追加する
|
if (block_left > 0){ // 前回足りなかった分を追加する
|
||||||
@@ -594,7 +596,7 @@ time_start = GetTickCount();
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time3_total += GetTickCount() - time_start;
|
time3_total += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// 経過表示
|
// 経過表示
|
||||||
@@ -625,16 +627,17 @@ error_end:
|
|||||||
CloseHandle(ol.hEvent);
|
CloseHandle(ol.hEvent);
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_total += GetTickCount() - time1_start;
|
time_total += clock() - time1_start;
|
||||||
if (*prog_now == total_file_size){
|
if (*prog_now == total_file_size){
|
||||||
printf("\nread %d.%03d sec\n", time2_total / 1000, time2_total % 1000);
|
printf("\nread %.3f sec\n", (double)time2_total / CLOCKS_PER_SEC);
|
||||||
printf("main %d.%03d sec\n", time3_total / 1000, time3_total % 1000);
|
printf("main %.3f sec\n", (double)time3_total / CLOCKS_PER_SEC);
|
||||||
if (time_total > 0){
|
time_sec = (double)time_total / CLOCKS_PER_SEC;
|
||||||
time_start = (int)((total_file_size * 125) / ((__int64)time_total * 131072));
|
if (time_sec > 0){
|
||||||
|
time_speed = (double)total_file_size / (time_sec * 1048576);
|
||||||
} else {
|
} else {
|
||||||
time_start = 0;
|
time_speed = 0;
|
||||||
}
|
}
|
||||||
printf("total %d.%03d sec, %d MB/s\n", time_total / 1000, time_total % 1000, time_start);
|
printf("total %.3f sec, %.0f MB/s\n", time_sec, time_speed);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
return err;
|
return err;
|
||||||
@@ -660,7 +663,7 @@ int file_hash_crc(
|
|||||||
HANDLE hFile;
|
HANDLE hFile;
|
||||||
OVERLAPPED ol;
|
OVERLAPPED ol;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time1_start = GetTickCount();
|
time1_start = clock();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// ソース・ファイルを開く
|
// ソース・ファイルを開く
|
||||||
@@ -699,11 +702,11 @@ time1_start = GetTickCount();
|
|||||||
if (file_left < io_size)
|
if (file_left < io_size)
|
||||||
read_size = (unsigned int)file_left;
|
read_size = (unsigned int)file_left;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
off = ReadFile(hFile, buf1, read_size, NULL, &ol);
|
off = ReadFile(hFile, buf1, read_size, NULL, &ol);
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time2_total += GetTickCount() - time_start;
|
time2_total += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
if ((off == 0) && (GetLastError() != ERROR_IO_PENDING)){
|
if ((off == 0) && (GetLastError() != ERROR_IO_PENDING)){
|
||||||
print_win32_err();
|
print_win32_err();
|
||||||
@@ -729,11 +732,11 @@ time2_total += GetTickCount() - time_start;
|
|||||||
ol.OffsetHigh = (unsigned int)(file_off >> 32);
|
ol.OffsetHigh = (unsigned int)(file_off >> 32);
|
||||||
file_off += io_size;
|
file_off += io_size;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
off = ReadFile(hFile, buf, read_size, NULL, &ol);
|
off = ReadFile(hFile, buf, read_size, NULL, &ol);
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time2_total += GetTickCount() - time_start;
|
time2_total += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
if ((off == 0) && (GetLastError() != ERROR_IO_PENDING)){
|
if ((off == 0) && (GetLastError() != ERROR_IO_PENDING)){
|
||||||
print_win32_err();
|
print_win32_err();
|
||||||
@@ -749,7 +752,7 @@ time2_total += GetTickCount() - time_start;
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
off = 0; // チェックサム計算
|
off = 0; // チェックサム計算
|
||||||
if (block_left > 0){ // 前回足りなかった分を追加する
|
if (block_left > 0){ // 前回足りなかった分を追加する
|
||||||
@@ -786,7 +789,7 @@ time_start = GetTickCount();
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time3_total += GetTickCount() - time_start;
|
time3_total += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// 経過表示
|
// 経過表示
|
||||||
@@ -819,16 +822,17 @@ error_end:
|
|||||||
_aligned_free(buf1);
|
_aligned_free(buf1);
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_total += GetTickCount() - time1_start;
|
time_total += clock() - time1_start;
|
||||||
if (*prog_now == total_file_size){
|
if (*prog_now == total_file_size){
|
||||||
printf("\nread %d.%03d sec\n", time2_total / 1000, time2_total % 1000);
|
printf("\nread %.3f sec\n", (double)time2_total / CLOCKS_PER_SEC);
|
||||||
printf("main %d.%03d sec\n", time3_total / 1000, time3_total % 1000);
|
printf("main %.3f sec\n", (double)time3_total / CLOCKS_PER_SEC);
|
||||||
if (time_total > 0){
|
time_sec = (double)time_total / CLOCKS_PER_SEC;
|
||||||
time_start = (int)((total_file_size * 125) / ((__int64)time_total * 131072));
|
if (time_sec > 0){
|
||||||
|
time_speed = (double)total_file_size / (time_sec * 1048576);
|
||||||
} else {
|
} else {
|
||||||
time_start = 0;
|
time_speed = 0;
|
||||||
}
|
}
|
||||||
printf("total %d.%03d sec, %d MB/s\n", time_total / 1000, time_total % 1000, time_start);
|
printf("total %.3f sec, %.0f MB/s\n", time_sec, time_speed);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
return err;
|
return err;
|
||||||
@@ -1038,7 +1042,7 @@ int file_hash_check(
|
|||||||
PHMD5 hash_ctx, block_ctx;
|
PHMD5 hash_ctx, block_ctx;
|
||||||
OVERLAPPED ol;
|
OVERLAPPED ol;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time1_start = GetTickCount();
|
time1_start = clock();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
prog_last = -1; // 検証中のファイル名を毎回表示する
|
prog_last = -1; // 検証中のファイル名を毎回表示する
|
||||||
@@ -1062,11 +1066,11 @@ time1_start = GetTickCount();
|
|||||||
file_left = file_size - 16384; // 本来のファイル・サイズまでしか検査しない
|
file_left = file_size - 16384; // 本来のファイル・サイズまでしか検査しない
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
off = ReadFile(hFile, buf, len, NULL, &ol);
|
off = ReadFile(hFile, buf, len, NULL, &ol);
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time2_total += GetTickCount() - time_start;
|
time2_total += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
if ((off == 0) && (GetLastError() != ERROR_IO_PENDING)){
|
if ((off == 0) && (GetLastError() != ERROR_IO_PENDING)){
|
||||||
print_win32_err();
|
print_win32_err();
|
||||||
@@ -1141,11 +1145,11 @@ time2_total += GetTickCount() - time_start;
|
|||||||
if (file_left < IO_SIZE)
|
if (file_left < IO_SIZE)
|
||||||
read_size = (unsigned int)file_left;
|
read_size = (unsigned int)file_left;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
off = ReadFile(hFile, buf1, read_size, NULL, &ol);
|
off = ReadFile(hFile, buf1, read_size, NULL, &ol);
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time2_total += GetTickCount() - time_start;
|
time2_total += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
if ((off == 0) && (GetLastError() != ERROR_IO_PENDING)){
|
if ((off == 0) && (GetLastError() != ERROR_IO_PENDING)){
|
||||||
print_win32_err();
|
print_win32_err();
|
||||||
@@ -1168,11 +1172,11 @@ time2_total += GetTickCount() - time_start;
|
|||||||
ol.OffsetHigh = (unsigned int)(file_off >> 32);
|
ol.OffsetHigh = (unsigned int)(file_off >> 32);
|
||||||
file_off += IO_SIZE;
|
file_off += IO_SIZE;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
off = ReadFile(hFile, buf, read_size, NULL, &ol);
|
off = ReadFile(hFile, buf, read_size, NULL, &ol);
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time2_total += GetTickCount() - time_start;
|
time2_total += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
if ((off == 0) && (GetLastError() != ERROR_IO_PENDING)){
|
if ((off == 0) && (GetLastError() != ERROR_IO_PENDING)){
|
||||||
print_win32_err();
|
print_win32_err();
|
||||||
@@ -1187,7 +1191,7 @@ time2_total += GetTickCount() - time_start;
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
if (s_blk != NULL){
|
if (s_blk != NULL){
|
||||||
off = 0;
|
off = 0;
|
||||||
@@ -1230,7 +1234,7 @@ time_start = GetTickCount();
|
|||||||
Phmd5Process(&hash_ctx, buf, len); // MD5 計算
|
Phmd5Process(&hash_ctx, buf, len); // MD5 計算
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time3_total += GetTickCount() - time_start;
|
time3_total += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// 経過表示
|
// 経過表示
|
||||||
@@ -1267,15 +1271,16 @@ error_end:
|
|||||||
CloseHandle(ol.hEvent);
|
CloseHandle(ol.hEvent);
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_total += GetTickCount() - time1_start;
|
time_total += clock() - time1_start;
|
||||||
printf("\nread %d.%03d sec\n", time2_total / 1000, time2_total % 1000);
|
printf("\nread %.3f sec\n", (double)time2_total / CLOCKS_PER_SEC);
|
||||||
printf("main %d.%03d sec\n", time3_total / 1000, time3_total % 1000);
|
printf("main %.3f sec\n", (double)time3_total / CLOCKS_PER_SEC);
|
||||||
if (time_total > 0){
|
time_sec = (double)time_total / CLOCKS_PER_SEC;
|
||||||
time_start = (int)((file_size * 125) / ((__int64)time_total * 131072));
|
if (time_sec > 0){
|
||||||
|
time_speed = (double)file_size / (time_sec * 1048576);
|
||||||
} else {
|
} else {
|
||||||
time_start = 0;
|
time_speed = 0;
|
||||||
}
|
}
|
||||||
printf("total %d.%03d sec, %d MB/s\n", time_total / 1000, time_total % 1000, time_start);
|
printf("total %.3f sec, %.0f MB/s\n", time_sec, time_speed);
|
||||||
#endif
|
#endif
|
||||||
return comp_num;
|
return comp_num;
|
||||||
}
|
}
|
||||||
@@ -1536,7 +1541,7 @@ int file_hash_direct(
|
|||||||
HANDLE hFile;
|
HANDLE hFile;
|
||||||
OVERLAPPED ol;
|
OVERLAPPED ol;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time1_start = GetTickCount();
|
time1_start = clock();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
prog_last = -1; // 検証中のファイル名を毎回表示する
|
prog_last = -1; // 検証中のファイル名を毎回表示する
|
||||||
@@ -1592,11 +1597,11 @@ time1_start = GetTickCount();
|
|||||||
file_left = file_size - 16384; // 本来のファイル・サイズまでしか検査しない
|
file_left = file_size - 16384; // 本来のファイル・サイズまでしか検査しない
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
off = ReadFile(hFile, buf, read_size, NULL, &ol);
|
off = ReadFile(hFile, buf, read_size, NULL, &ol);
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time2_total += GetTickCount() - time_start;
|
time2_total += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
if ((off == 0) && (GetLastError() != ERROR_IO_PENDING)){
|
if ((off == 0) && (GetLastError() != ERROR_IO_PENDING)){
|
||||||
comp_num = -1;
|
comp_num = -1;
|
||||||
@@ -1679,11 +1684,11 @@ time2_total += GetTickCount() - time_start;
|
|||||||
read_size = (read_size + 4095) & ~4095; // 4KB の倍数にする
|
read_size = (read_size + 4095) & ~4095; // 4KB の倍数にする
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
off = ReadFile(hFile, buf1, read_size, NULL, &ol);
|
off = ReadFile(hFile, buf1, read_size, NULL, &ol);
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time2_total += GetTickCount() - time_start;
|
time2_total += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
if ((off == 0) && (GetLastError() != ERROR_IO_PENDING)){
|
if ((off == 0) && (GetLastError() != ERROR_IO_PENDING)){
|
||||||
print_win32_err();
|
print_win32_err();
|
||||||
@@ -1710,11 +1715,11 @@ time2_total += GetTickCount() - time_start;
|
|||||||
ol.OffsetHigh = (unsigned int)(file_off >> 32);
|
ol.OffsetHigh = (unsigned int)(file_off >> 32);
|
||||||
file_off += IO_SIZE;
|
file_off += IO_SIZE;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
off = ReadFile(hFile, buf, read_size, NULL, &ol);
|
off = ReadFile(hFile, buf, read_size, NULL, &ol);
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time2_total += GetTickCount() - time_start;
|
time2_total += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
if ((off == 0) && (GetLastError() != ERROR_IO_PENDING)){
|
if ((off == 0) && (GetLastError() != ERROR_IO_PENDING)){
|
||||||
print_win32_err();
|
print_win32_err();
|
||||||
@@ -1729,7 +1734,7 @@ time2_total += GetTickCount() - time_start;
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
if (s_blk != NULL){
|
if (s_blk != NULL){
|
||||||
off = 0;
|
off = 0;
|
||||||
@@ -1771,7 +1776,7 @@ time_start = GetTickCount();
|
|||||||
Phmd5Process(&hash_ctx, buf, len); // MD5 計算
|
Phmd5Process(&hash_ctx, buf, len); // MD5 計算
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time3_total += GetTickCount() - time_start;
|
time3_total += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// 経過表示
|
// 経過表示
|
||||||
@@ -1812,10 +1817,16 @@ error_end:
|
|||||||
_aligned_free(buf1);
|
_aligned_free(buf1);
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_total += GetTickCount() - time1_start;
|
time_total += clock() - time1_start;
|
||||||
printf("\nread %d.%03d sec\n", time2_total / 1000, time2_total % 1000);
|
printf("\nread %.3f sec\n", (double)time2_total / CLOCKS_PER_SEC);
|
||||||
printf("main %d.%03d sec\n", time3_total / 1000, time3_total % 1000);
|
printf("main %.3f sec\n", (double)time3_total / CLOCKS_PER_SEC);
|
||||||
printf("total %d.%03d sec\n", time_total / 1000, time_total % 1000);
|
time_sec = (double)time_total / CLOCKS_PER_SEC;
|
||||||
|
if (time_sec > 0){
|
||||||
|
time_speed = (double)file_size / (time_sec * 1048576);
|
||||||
|
} else {
|
||||||
|
time_speed = 0;
|
||||||
|
}
|
||||||
|
printf("total %.3f sec, %.0f MB/s\n", time_sec, time_speed);
|
||||||
#endif
|
#endif
|
||||||
return comp_num;
|
return comp_num;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
// par2_cmd.c
|
// par2_cmd.c
|
||||||
// Copyright : 2023-10-15 Yutaka Sawada
|
// Copyright : 2023-12-09 Yutaka Sawada
|
||||||
// License : GPL
|
// License : GPL
|
||||||
|
|
||||||
#ifndef _UNICODE
|
#ifndef _UNICODE
|
||||||
@@ -1479,14 +1479,12 @@ ri= switch_set & 0x00040000
|
|||||||
} else if (wcsncmp(tmp_p, L"lc", 2) == 0){
|
} else if (wcsncmp(tmp_p, L"lc", 2) == 0){
|
||||||
k = 0;
|
k = 0;
|
||||||
j = 2;
|
j = 2;
|
||||||
while ((j < 2 + 5) && (tmp_p[j] >= '0') && (tmp_p[j] <= '9')){
|
while ((j < 2 + 7) && (tmp_p[j] >= '0') && (tmp_p[j] <= '9')){
|
||||||
k = (k * 10) + (tmp_p[j] - '0');
|
k = (k * 10) + (tmp_p[j] - '0');
|
||||||
j++;
|
j++;
|
||||||
}
|
}
|
||||||
if (k & 256){ // GPU を使う
|
if (k & 0x300){ // GPU を使う
|
||||||
OpenCL_method = 1; // Faster GPU
|
OpenCL_method = k & 0x003F0300;
|
||||||
} else if (k & 512){
|
|
||||||
OpenCL_method = -1; // Slower GPU
|
|
||||||
}
|
}
|
||||||
if (k & 1024) // CLMUL と ALTMAP を使わない
|
if (k & 1024) // CLMUL と ALTMAP を使わない
|
||||||
cpu_flag = (cpu_flag & 0xFFFFFFF7) | 256;
|
cpu_flag = (cpu_flag & 0xFFFFFFF7) | 256;
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
// reedsolomon.c
|
// reedsolomon.c
|
||||||
// Copyright : 2023-10-26 Yutaka Sawada
|
// Copyright : 2023-12-12 Yutaka Sawada
|
||||||
// License : GPL
|
// License : GPL
|
||||||
|
|
||||||
#ifndef _UNICODE
|
#ifndef _UNICODE
|
||||||
@@ -27,6 +27,9 @@
|
|||||||
#include "rs_decode.h"
|
#include "rs_decode.h"
|
||||||
#include "reedsolomon.h"
|
#include "reedsolomon.h"
|
||||||
|
|
||||||
|
#ifdef TIMER
|
||||||
|
#include <time.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
// GPU を使う最小データサイズ (MB 単位)
|
// GPU を使う最小データサイズ (MB 単位)
|
||||||
// GPU の起動には時間がかかるので、データが小さすぎると逆に遅くなる
|
// GPU の起動には時間がかかるので、データが小さすぎると逆に遅くなる
|
||||||
@@ -739,7 +742,7 @@ int rs_encode(
|
|||||||
int err = 0;
|
int err = 0;
|
||||||
unsigned int len;
|
unsigned int len;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
unsigned int time_total = GetTickCount();
|
clock_t time_total = clock();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (galois_create_table()){
|
if (galois_create_table()){
|
||||||
@@ -755,7 +758,7 @@ unsigned int time_total = GetTickCount();
|
|||||||
// パリティ計算用の行列演算の準備をする
|
// パリティ計算用の行列演算の準備をする
|
||||||
len = sizeof(unsigned short) * source_num;
|
len = sizeof(unsigned short) * source_num;
|
||||||
if (OpenCL_method != 0)
|
if (OpenCL_method != 0)
|
||||||
len *= 2; // GPU の作業領域も確保しておく
|
len *= 3; // GPU の作業領域も確保しておく
|
||||||
constant = malloc(len);
|
constant = malloc(len);
|
||||||
if (constant == NULL){
|
if (constant == NULL){
|
||||||
printf("malloc, %d\n", len);
|
printf("malloc, %d\n", len);
|
||||||
@@ -799,8 +802,8 @@ unsigned int time_total = GetTickCount();
|
|||||||
err = encode_method2(file_path, header_buf, rcv_hFile, files, s_blk, p_blk, constant);
|
err = encode_method2(file_path, header_buf, rcv_hFile, files, s_blk, p_blk, constant);
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
if (err != 1){
|
if (err != 1){
|
||||||
time_total = GetTickCount() - time_total;
|
time_total = clock() - time_total;
|
||||||
printf("total %d.%03d sec\n", time_total / 1000, time_total % 1000);
|
printf("total %.3f sec\n", (double)time_total / CLOCKS_PER_SEC);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -830,7 +833,7 @@ int rs_encode_1pass(
|
|||||||
int err = 0;
|
int err = 0;
|
||||||
unsigned int len;
|
unsigned int len;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
unsigned int time_total = GetTickCount();
|
clock_t time_total = clock();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (galois_create_table()){
|
if (galois_create_table()){
|
||||||
@@ -841,7 +844,7 @@ unsigned int time_total = GetTickCount();
|
|||||||
// パリティ計算用の行列演算の準備をする
|
// パリティ計算用の行列演算の準備をする
|
||||||
len = sizeof(unsigned short) * source_num;
|
len = sizeof(unsigned short) * source_num;
|
||||||
if (OpenCL_method != 0)
|
if (OpenCL_method != 0)
|
||||||
len *= 2; // GPU の作業領域も確保しておく
|
len *= 3; // GPU の作業領域も確保しておく
|
||||||
constant = malloc(len);
|
constant = malloc(len);
|
||||||
if (constant == NULL){
|
if (constant == NULL){
|
||||||
printf("malloc, %d\n", len);
|
printf("malloc, %d\n", len);
|
||||||
@@ -888,8 +891,8 @@ unsigned int time_total = GetTickCount();
|
|||||||
if (err < 0){
|
if (err < 0){
|
||||||
printf("switching to 2-pass processing, %d\n", err);
|
printf("switching to 2-pass processing, %d\n", err);
|
||||||
} else if (err != 1){
|
} else if (err != 1){
|
||||||
time_total = GetTickCount() - time_total;
|
time_total = clock() - time_total;
|
||||||
printf("total %d.%03d sec\n", time_total / 1000, time_total % 1000);
|
printf("total %.3f sec\n", (double)time_total / CLOCKS_PER_SEC);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -913,7 +916,7 @@ int rs_decode(
|
|||||||
int err = 0, i, j, k;
|
int err = 0, i, j, k;
|
||||||
unsigned int len;
|
unsigned int len;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
unsigned int time_matrix = 0, time_total = GetTickCount();
|
clock_t time_matrix = 0, time_total = clock();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (galois_create_table()){
|
if (galois_create_table()){
|
||||||
@@ -948,7 +951,7 @@ unsigned int time_matrix = 0, time_total = GetTickCount();
|
|||||||
id = mat + (block_lost * source_num);
|
id = mat + (block_lost * source_num);
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_matrix = GetTickCount();
|
time_matrix = clock();
|
||||||
#endif
|
#endif
|
||||||
// 復元用の行列を計算する
|
// 復元用の行列を計算する
|
||||||
print_progress_text(0, "Computing matrix");
|
print_progress_text(0, "Computing matrix");
|
||||||
@@ -989,7 +992,7 @@ time_matrix = GetTickCount();
|
|||||||
//for (i = 0; i < block_lost; i++)
|
//for (i = 0; i < block_lost; i++)
|
||||||
// printf("id[%d] = %d\n", i, id[i]);
|
// printf("id[%d] = %d\n", i, id[i]);
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_matrix = GetTickCount() - time_matrix;
|
time_matrix = clock() - time_matrix;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
@@ -1032,9 +1035,9 @@ time_matrix = GetTickCount() - time_matrix;
|
|||||||
err = decode_method2(file_path, block_lost, rcv_hFile, files, s_blk, p_blk, mat);
|
err = decode_method2(file_path, block_lost, rcv_hFile, files, s_blk, p_blk, mat);
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
if (err != 1){
|
if (err != 1){
|
||||||
time_total = GetTickCount() - time_total;
|
time_total = clock() - time_total;
|
||||||
printf("total %d.%03d sec\n", time_total / 1000, time_total % 1000);
|
printf("total %.3f sec\n", (double)time_total / CLOCKS_PER_SEC);
|
||||||
printf("matrix %d.%03d sec\n", time_matrix / 1000, time_matrix % 1000);
|
printf("matrix %.3f sec\n", (double)time_matrix / CLOCKS_PER_SEC);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
// rs_decode.c
|
// rs_decode.c
|
||||||
// Copyright : 2023-11-27 Yutaka Sawada
|
// Copyright : 2023-12-13 Yutaka Sawada
|
||||||
// License : GPL
|
// License : GPL
|
||||||
|
|
||||||
#ifndef _UNICODE
|
#ifndef _UNICODE
|
||||||
@@ -28,7 +28,9 @@
|
|||||||
|
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
static unsigned int time_start, time_read = 0, time_write = 0, time_calc = 0;
|
#include <time.h>
|
||||||
|
static double time_sec, time_speed;
|
||||||
|
static clock_t time_start, time_read = 0, time_write = 0, time_calc = 0;
|
||||||
static unsigned int read_count, write_count = 0, skip_count;
|
static unsigned int read_count, write_count = 0, skip_count;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -60,7 +62,7 @@ static DWORD WINAPI thread_decode2(LPVOID lpParameter)
|
|||||||
RS_TH *th;
|
RS_TH *th;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
unsigned int loop_count2a = 0, loop_count2b = 0;
|
unsigned int loop_count2a = 0, loop_count2b = 0;
|
||||||
unsigned int time_start2, time_encode2a = 0, time_encode2b = 0;
|
clock_t time_start2, time_encode2a = 0, time_encode2b = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
th = (RS_TH *)lpParameter;
|
th = (RS_TH *)lpParameter;
|
||||||
@@ -78,7 +80,7 @@ unsigned int time_start2, time_encode2a = 0, time_encode2b = 0;
|
|||||||
WaitForSingleObject(hRun, INFINITE); // 計算開始の合図を待つ
|
WaitForSingleObject(hRun, INFINITE); // 計算開始の合図を待つ
|
||||||
while (th->now < INT_MAX / 2){
|
while (th->now < INT_MAX / 2){
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start2 = GetTickCount();
|
time_start2 = clock();
|
||||||
#endif
|
#endif
|
||||||
s_buf = th->buf;
|
s_buf = th->buf;
|
||||||
factor = th->mat;
|
factor = th->mat;
|
||||||
@@ -95,7 +97,7 @@ loop_count2a++;
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_encode2a += GetTickCount() - time_start2;
|
time_encode2a += clock() - time_start2;
|
||||||
#endif
|
#endif
|
||||||
} else { // 消失ブロックを部分的に保持する場合
|
} else { // 消失ブロックを部分的に保持する場合
|
||||||
// スレッドごとに復元する消失ブロックの chunk を変える
|
// スレッドごとに復元する消失ブロックの chunk を変える
|
||||||
@@ -136,7 +138,7 @@ loop_count2b += src_num;
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_encode2b += GetTickCount() - time_start2;
|
time_encode2b += clock() - time_start2;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
//_mm_sfence(); // メモリーへの書き込みを完了する
|
//_mm_sfence(); // メモリーへの書き込みを完了する
|
||||||
@@ -146,19 +148,21 @@ time_encode2b += GetTickCount() - time_start2;
|
|||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
loop_count2b /= chunk_num; // chunk数で割ってブロック数にする
|
loop_count2b /= chunk_num; // chunk数で割ってブロック数にする
|
||||||
printf("sub-thread : total loop = %d\n", loop_count2a + loop_count2b);
|
printf("sub-thread : total loop = %d\n", loop_count2a + loop_count2b);
|
||||||
if (time_encode2a > 0){
|
time_sec = (double)time_encode2a / CLOCKS_PER_SEC;
|
||||||
i = (int)((__int64)loop_count2a * unit_size * 125 / ((__int64)time_encode2a * 131072));
|
if (time_sec > 0){
|
||||||
|
time_speed = ((double)loop_count2a * unit_size) / (time_sec * 1048576);
|
||||||
} else {
|
} else {
|
||||||
i = 0;
|
time_speed = 0;
|
||||||
}
|
}
|
||||||
if (loop_count2a > 0)
|
if (loop_count2a > 0)
|
||||||
printf(" 1st decode %d.%03d sec, %d loop, %d MB/s\n", time_encode2a / 1000, time_encode2a % 1000, loop_count2a, i);
|
printf(" 1st decode %.3f sec, %d loop, %.0f MB/s\n", time_sec, loop_count2a, time_speed);
|
||||||
if (time_encode2b > 0){
|
time_sec = (double)time_encode2b / CLOCKS_PER_SEC;
|
||||||
i = (int)((__int64)loop_count2b * unit_size * 125 / ((__int64)time_encode2b * 131072));
|
if (time_sec > 0){
|
||||||
|
time_speed = ((double)loop_count2b * unit_size) / (time_sec * 1048576);
|
||||||
} else {
|
} else {
|
||||||
i = 0;
|
time_speed = 0;
|
||||||
}
|
}
|
||||||
printf(" 2nd decode %d.%03d sec, %d loop, %d MB/s\n", time_encode2b / 1000, time_encode2b % 1000, loop_count2b, i);
|
printf(" 2nd decode %.3f sec, %d loop, %.0f MB/s\n", time_sec, loop_count2b, time_speed);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// 終了処理
|
// 終了処理
|
||||||
@@ -178,7 +182,7 @@ static DWORD WINAPI thread_decode3(LPVOID lpParameter)
|
|||||||
RS_TH *th;
|
RS_TH *th;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
unsigned int loop_count2a = 0, loop_count2b = 0;
|
unsigned int loop_count2a = 0, loop_count2b = 0;
|
||||||
unsigned int time_start2, time_encode2a = 0, time_encode2b = 0;
|
clock_t time_start2, time_encode2a = 0, time_encode2b = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
th = (RS_TH *)lpParameter;
|
th = (RS_TH *)lpParameter;
|
||||||
@@ -197,7 +201,7 @@ unsigned int time_start2, time_encode2a = 0, time_encode2b = 0;
|
|||||||
WaitForSingleObject(hRun, INFINITE); // 計算開始の合図を待つ
|
WaitForSingleObject(hRun, INFINITE); // 計算開始の合図を待つ
|
||||||
while (th->now < INT_MAX / 2){
|
while (th->now < INT_MAX / 2){
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start2 = GetTickCount();
|
time_start2 = clock();
|
||||||
#endif
|
#endif
|
||||||
s_buf = th->buf;
|
s_buf = th->buf;
|
||||||
factor = th->mat;
|
factor = th->mat;
|
||||||
@@ -214,7 +218,7 @@ loop_count2a++;
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_encode2a += GetTickCount() - time_start2;
|
time_encode2a += clock() - time_start2;
|
||||||
#endif
|
#endif
|
||||||
} else { // 全ての消失ブロックを保持する場合
|
} else { // 全ての消失ブロックを保持する場合
|
||||||
// スレッドごとに復元する消失ブロックの chunk を変える
|
// スレッドごとに復元する消失ブロックの chunk を変える
|
||||||
@@ -250,7 +254,7 @@ loop_count2b += src_num;
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_encode2b += GetTickCount() - time_start2;
|
time_encode2b += clock() - time_start2;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
//_mm_sfence(); // メモリーへの書き込みを完了する
|
//_mm_sfence(); // メモリーへの書き込みを完了する
|
||||||
@@ -260,19 +264,21 @@ time_encode2b += GetTickCount() - time_start2;
|
|||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
loop_count2b /= chunk_num; // chunk数で割ってブロック数にする
|
loop_count2b /= chunk_num; // chunk数で割ってブロック数にする
|
||||||
printf("sub-thread : total loop = %d\n", loop_count2a + loop_count2b);
|
printf("sub-thread : total loop = %d\n", loop_count2a + loop_count2b);
|
||||||
if (time_encode2a > 0){
|
time_sec = (double)time_encode2a / CLOCKS_PER_SEC;
|
||||||
i = (int)((__int64)loop_count2a * unit_size * 125 / ((__int64)time_encode2a * 131072));
|
if (time_sec > 0){
|
||||||
|
time_speed = ((double)loop_count2a * unit_size) / (time_sec * 1048576);
|
||||||
} else {
|
} else {
|
||||||
i = 0;
|
time_speed = 0;
|
||||||
}
|
}
|
||||||
if (loop_count2a > 0)
|
if (loop_count2a > 0)
|
||||||
printf(" 1st decode %d.%03d sec, %d loop, %d MB/s\n", time_encode2a / 1000, time_encode2a % 1000, loop_count2a, i);
|
printf(" 1st decode %.3f sec, %d loop, %.0f MB/s\n", time_sec, loop_count2a, time_speed);
|
||||||
if (time_encode2b > 0){
|
time_sec = (double)time_encode2b / CLOCKS_PER_SEC;
|
||||||
i = (int)((__int64)loop_count2b * unit_size * 125 / ((__int64)time_encode2b * 131072));
|
if (time_sec > 0){
|
||||||
|
time_speed = ((double)loop_count2b * unit_size) / (time_sec * 1048576);
|
||||||
} else {
|
} else {
|
||||||
i = 0;
|
time_speed = 0;
|
||||||
}
|
}
|
||||||
printf(" 2nd decode %d.%03d sec, %d loop, %d MB/s\n", time_encode2b / 1000, time_encode2b % 1000, loop_count2b, i);
|
printf(" 2nd decode %.3f sec, %d loop, %.0f MB/s\n", time_sec, loop_count2b, time_speed);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// 終了処理
|
// 終了処理
|
||||||
@@ -292,7 +298,8 @@ static DWORD WINAPI thread_decode_gpu(LPVOID lpParameter)
|
|||||||
HANDLE hRun, hEnd;
|
HANDLE hRun, hEnd;
|
||||||
RS_TH *th;
|
RS_TH *th;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
unsigned int time_start2, time_encode2 = 0, loop_count2 = 0;
|
unsigned int loop_count2 = 0;
|
||||||
|
clock_t time_start2, time_encode2 = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
th = (RS_TH *)lpParameter;
|
th = (RS_TH *)lpParameter;
|
||||||
@@ -307,7 +314,7 @@ unsigned int time_start2, time_encode2 = 0, loop_count2 = 0;
|
|||||||
WaitForSingleObject(hRun, INFINITE); // 計算開始の合図を待つ
|
WaitForSingleObject(hRun, INFINITE); // 計算開始の合図を待つ
|
||||||
while (th->now < INT_MAX / 2){
|
while (th->now < INT_MAX / 2){
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start2 = GetTickCount();
|
time_start2 = clock();
|
||||||
#endif
|
#endif
|
||||||
// GPUはソース・ブロック読み込み中に呼ばれない
|
// GPUはソース・ブロック読み込み中に呼ばれない
|
||||||
s_buf = th->buf;
|
s_buf = th->buf;
|
||||||
@@ -321,10 +328,14 @@ time_start2 = GetTickCount();
|
|||||||
InterlockedExchange(&(th->now), INT_MAX / 3); // サブ・スレッドの計算を中断する
|
InterlockedExchange(&(th->now), INT_MAX / 3); // サブ・スレッドの計算を中断する
|
||||||
}
|
}
|
||||||
|
|
||||||
// スレッドごとに復元する消失ブロックを変える
|
// 一つの GPUスレッドが全ての消失ブロックを処理する
|
||||||
while ((j = InterlockedIncrement(&(th->now))) < block_lost){ // j = ++th_now
|
if (OpenCL_method & 8){ // 2ブロックずつ計算する
|
||||||
|
// 消失ブロック数が奇数なら、最初の一個だけ別に計算する
|
||||||
|
if (block_lost & 1){
|
||||||
|
InterlockedIncrement(&(th->now)); // 常に j = 0 となる
|
||||||
|
|
||||||
// 倍率は逆行列から部分的にコピーする
|
// 倍率は逆行列から部分的にコピーする
|
||||||
i = gpu_multiply_blocks(src_num, factor + source_num * j, g_buf + (size_t)unit_size * j, unit_size);
|
i = gpu_multiply_blocks(src_num, factor, NULL, g_buf, unit_size);
|
||||||
if (i != 0){
|
if (i != 0){
|
||||||
th->len = i;
|
th->len = i;
|
||||||
InterlockedExchange(&(th->now), INT_MAX / 3); // サブ・スレッドの計算を中断する
|
InterlockedExchange(&(th->now), INT_MAX / 3); // サブ・スレッドの計算を中断する
|
||||||
@@ -335,8 +346,40 @@ time_start2 = GetTickCount();
|
|||||||
loop_count2 += src_num;
|
loop_count2 += src_num;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 残りのブロックは二個ずつ計算する
|
||||||
|
while ((j = InterlockedAdd(&(th->now), 2)) < block_lost){ // th_now += 2, j = th_now
|
||||||
|
j--; // +2 してるから、最初のブロックは -1 する
|
||||||
|
|
||||||
|
// 倍率は逆行列から部分的に2回コピーする
|
||||||
|
i = gpu_multiply_blocks(src_num, factor + source_num * j, factor + source_num * (j + 1), g_buf + (size_t)unit_size * j, unit_size * 2);
|
||||||
|
if (i != 0){
|
||||||
|
th->len = i;
|
||||||
|
InterlockedExchange(&(th->now), INT_MAX / 3); // サブ・スレッドの計算を中断する
|
||||||
|
break;
|
||||||
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_encode2 += GetTickCount() - time_start2;
|
loop_count2 += src_num * 2;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
} else { // 以前からの1ブロックずつ計算する方式
|
||||||
|
while ((j = InterlockedIncrement(&(th->now))) < block_lost){ // j = ++th_now
|
||||||
|
// 倍率は逆行列から部分的にコピーする(2ブロックずつの場合はブロック数をマイナスにする)
|
||||||
|
i = gpu_multiply_blocks(src_num, factor + source_num * j, NULL, g_buf + (size_t)unit_size * j, unit_size);
|
||||||
|
if (i != 0){
|
||||||
|
th->len = i;
|
||||||
|
InterlockedExchange(&(th->now), INT_MAX / 3); // サブ・スレッドの計算を中断する
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef TIMER
|
||||||
|
loop_count2 += src_num;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#ifdef TIMER
|
||||||
|
time_encode2 += clock() - time_start2;
|
||||||
#endif
|
#endif
|
||||||
// 最後にVRAMを解放する
|
// 最後にVRAMを解放する
|
||||||
i = gpu_finish();
|
i = gpu_finish();
|
||||||
@@ -349,12 +392,13 @@ time_encode2 += GetTickCount() - time_start2;
|
|||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
printf("gpu-thread :\n");
|
printf("gpu-thread :\n");
|
||||||
if (time_encode2 > 0){
|
time_sec = (double)time_encode2 / CLOCKS_PER_SEC;
|
||||||
i = (int)((__int64)loop_count2 * unit_size * 125 / ((__int64)time_encode2 * 131072));
|
if (time_sec > 0){
|
||||||
|
time_speed = ((double)loop_count2 * unit_size) / (time_sec * 1048576);
|
||||||
} else {
|
} else {
|
||||||
i = 0;
|
time_speed = 0;
|
||||||
}
|
}
|
||||||
printf(" 2nd decode %d.%03d sec, %d loop, %d MB/s\n", time_encode2 / 1000, time_encode2 % 1000, loop_count2, i);
|
printf(" 2nd decode %.3f sec, %d loop, %.0f MB/s\n", time_sec, loop_count2, time_speed);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// 終了処理
|
// 終了処理
|
||||||
@@ -430,7 +474,7 @@ int decode_method1( // ソース・ブロックが一個だけの場合
|
|||||||
block_off = 0;
|
block_off = 0;
|
||||||
while (block_off < block_size){
|
while (block_off < block_size){
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
// パリティ・ブロックを読み込む
|
// パリティ・ブロックを読み込む
|
||||||
len = block_size - block_off;
|
len = block_size - block_off;
|
||||||
@@ -447,18 +491,18 @@ time_start = GetTickCount();
|
|||||||
// パリティ・ブロックのチェックサムを計算する
|
// パリティ・ブロックのチェックサムを計算する
|
||||||
checksum16_altmap(buf, buf + io_size, io_size);
|
checksum16_altmap(buf, buf + io_size, io_size);
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_read += GetTickCount() - time_start;
|
time_read += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
// 失われたソース・ブロックを復元する
|
// 失われたソース・ブロックを復元する
|
||||||
memset(work_buf, 0, unit_size);
|
memset(work_buf, 0, unit_size);
|
||||||
// factor で割ると元に戻る
|
// factor で割ると元に戻る
|
||||||
galois_align_multiply(buf, work_buf, unit_size, galois_divide(1, galois_power(2, id)));
|
galois_align_multiply(buf, work_buf, unit_size, galois_divide(1, galois_power(2, id)));
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_calc += GetTickCount() - time_start;
|
time_calc += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// 経過表示
|
// 経過表示
|
||||||
@@ -472,7 +516,7 @@ time_calc += GetTickCount() - time_start;
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
// 復元されたソース・ブロックのチェックサムを検証する
|
// 復元されたソース・ブロックのチェックサムを検証する
|
||||||
checksum16_return(work_buf, hash, io_size);
|
checksum16_return(work_buf, hash, io_size);
|
||||||
@@ -491,7 +535,7 @@ time_start = GetTickCount();
|
|||||||
goto error_end;
|
goto error_end;
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_write += GetTickCount() - time_start;
|
time_write += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
block_off += io_size;
|
block_off += io_size;
|
||||||
@@ -499,9 +543,9 @@ time_write += GetTickCount() - time_start;
|
|||||||
print_progress_done(); // 末尾ブロックの断片化によっては 100% で完了するとは限らない
|
print_progress_done(); // 末尾ブロックの断片化によっては 100% で完了するとは限らない
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
printf("read %d.%03d sec\n", time_read / 1000, time_read % 1000);
|
printf("read %.3f sec\n", (double)time_read / CLOCKS_PER_SEC);
|
||||||
printf("write %d.%03d sec\n", time_write / 1000, time_write % 1000);
|
printf("write %.3f sec\n", (double)time_write / CLOCKS_PER_SEC);
|
||||||
printf("decode %d.%03d sec\n", time_calc / 1000, time_calc % 1000);
|
printf("decode %.3f sec\n", (double)time_calc / CLOCKS_PER_SEC);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
error_end:
|
error_end:
|
||||||
@@ -623,7 +667,7 @@ int decode_method2( // ソース・データを全て読み込む場合
|
|||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
read_count = 0;
|
read_count = 0;
|
||||||
skip_count = 0;
|
skip_count = 0;
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
last_file = -1;
|
last_file = -1;
|
||||||
recv_now = 0; // 何番目の代替ブロックか
|
recv_now = 0; // 何番目の代替ブロックか
|
||||||
@@ -760,7 +804,7 @@ skip_count++;
|
|||||||
hFile = NULL;
|
hFile = NULL;
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_read += GetTickCount() - time_start;
|
time_read += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
WaitForMultipleObjects(cpu_num1, hEnd, TRUE, INFINITE); // サブ・スレッドの計算終了の合図を待つ
|
WaitForMultipleObjects(cpu_num1, hEnd, TRUE, INFINITE); // サブ・スレッドの計算終了の合図を待つ
|
||||||
@@ -845,7 +889,7 @@ skip_count++;
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
// 復元されたブロックを書き込む
|
// 復元されたブロックを書き込む
|
||||||
work_buf = p_buf;
|
work_buf = p_buf;
|
||||||
@@ -916,7 +960,7 @@ write_count++;
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_write += GetTickCount() - time_start;
|
time_write += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
part_off += part_num; // 次の消失ブロック位置にする
|
part_off += part_num; // 次の消失ブロック位置にする
|
||||||
@@ -930,9 +974,9 @@ time_write += GetTickCount() - time_start;
|
|||||||
print_progress_done();
|
print_progress_done();
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
printf("read %d.%03d sec\n", time_read / 1000, time_read % 1000);
|
printf("read %.3f sec\n", (double)time_read / CLOCKS_PER_SEC);
|
||||||
j = ((block_size + io_size - 1) / io_size) * block_lost;
|
j = ((block_size + io_size - 1) / io_size) * block_lost;
|
||||||
printf("write %d.%03d sec, count = %d/%d\n", time_write / 1000, time_write % 1000, write_count, j);
|
printf("write %.3f sec, count = %d/%d\n", (double)time_write / CLOCKS_PER_SEC, write_count, j);
|
||||||
if (prog_num != prog_base)
|
if (prog_num != prog_base)
|
||||||
printf(" prog_num = %I64d, prog_base = %I64d\n", prog_num, prog_base);
|
printf(" prog_num = %I64d, prog_base = %I64d\n", prog_num, prog_base);
|
||||||
#endif
|
#endif
|
||||||
@@ -1063,7 +1107,7 @@ int decode_method3( // 復元するブロックを全て保持できる場合
|
|||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
read_count = 0;
|
read_count = 0;
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
last_file = -1;
|
last_file = -1;
|
||||||
for (i = 0; i < read_num; i++){ // スライスを一個ずつ読み込んでメモリー上に配置していく
|
for (i = 0; i < read_num; i++){ // スライスを一個ずつ読み込んでメモリー上に配置していく
|
||||||
@@ -1173,7 +1217,7 @@ read_count++;
|
|||||||
hFile = NULL;
|
hFile = NULL;
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_read += GetTickCount() - time_start;
|
time_read += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
WaitForMultipleObjects(cpu_num1, hEnd, TRUE, INFINITE); // サブ・スレッドの計算終了の合図を待つ
|
WaitForMultipleObjects(cpu_num1, hEnd, TRUE, INFINITE); // サブ・スレッドの計算終了の合図を待つ
|
||||||
@@ -1238,7 +1282,7 @@ time_read += GetTickCount() - time_start;
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
// 復元されたブロックを書き込む
|
// 復元されたブロックを書き込む
|
||||||
work_buf = p_buf;
|
work_buf = p_buf;
|
||||||
@@ -1297,7 +1341,7 @@ time_start = GetTickCount();
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_write += GetTickCount() - time_start;
|
time_write += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
// 最後の書き込みファイルを閉じる
|
// 最後の書き込みファイルを閉じる
|
||||||
CloseHandle(hFile);
|
CloseHandle(hFile);
|
||||||
@@ -1305,8 +1349,8 @@ time_write += GetTickCount() - time_start;
|
|||||||
print_progress_done();
|
print_progress_done();
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
printf("read %d.%03d sec\n", time_read / 1000, time_read % 1000);
|
printf("read %.3f sec\n", (double)time_read / CLOCKS_PER_SEC);
|
||||||
printf("write %d.%03d sec\n", time_write / 1000, time_write % 1000);
|
printf("write %.3f sec\n", (double)time_write / CLOCKS_PER_SEC);
|
||||||
if (prog_num != prog_base)
|
if (prog_num != prog_base)
|
||||||
printf(" prog_num = %I64d, prog_base = %I64d\n", prog_num, prog_base);
|
printf(" prog_num = %I64d, prog_base = %I64d\n", prog_num, prog_base);
|
||||||
#endif
|
#endif
|
||||||
@@ -1463,7 +1507,7 @@ int decode_method4( // 全てのブロックを断片的に保持する場合 (G
|
|||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
read_count = 0;
|
read_count = 0;
|
||||||
skip_count = 0;
|
skip_count = 0;
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
last_file = -1;
|
last_file = -1;
|
||||||
recv_now = 0; // 何番目の代替ブロックか
|
recv_now = 0; // 何番目の代替ブロックか
|
||||||
@@ -1600,7 +1644,7 @@ skip_count++;
|
|||||||
hFile = NULL;
|
hFile = NULL;
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_read += GetTickCount() - time_start;
|
time_read += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
memset(g_buf, 0, (size_t)unit_size * block_lost); // 待機中に GPU用の領域をゼロ埋めしておく
|
memset(g_buf, 0, (size_t)unit_size * block_lost); // 待機中に GPU用の領域をゼロ埋めしておく
|
||||||
@@ -1845,7 +1889,7 @@ skip_count++;
|
|||||||
prog_num += th->size * block_lost;
|
prog_num += th->size * block_lost;
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
// 復元されたブロックを書き込む
|
// 復元されたブロックを書き込む
|
||||||
work_buf = p_buf;
|
work_buf = p_buf;
|
||||||
@@ -1918,7 +1962,7 @@ write_count++;
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_write += GetTickCount() - time_start;
|
time_write += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
block_off += io_size;
|
block_off += io_size;
|
||||||
@@ -1929,9 +1973,9 @@ time_write += GetTickCount() - time_start;
|
|||||||
print_progress_done();
|
print_progress_done();
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
printf("read %d.%03d sec\n", time_read / 1000, time_read % 1000);
|
printf("read %.3f sec\n", (double)time_read / CLOCKS_PER_SEC);
|
||||||
j = ((block_size + io_size - 1) / io_size) * block_lost;
|
j = ((block_size + io_size - 1) / io_size) * block_lost;
|
||||||
printf("write %d.%03d sec, count = %d/%d\n", time_write / 1000, time_write % 1000, write_count, j);
|
printf("write %.3f sec, count = %d/%d\n", (double)time_write / CLOCKS_PER_SEC, write_count, j);
|
||||||
if (prog_num != prog_base)
|
if (prog_num != prog_base)
|
||||||
printf(" prog_num = %I64d, prog_base = %I64d\n", prog_num, prog_base);
|
printf(" prog_num = %I64d, prog_base = %I64d\n", prog_num, prog_base);
|
||||||
#endif
|
#endif
|
||||||
@@ -2096,7 +2140,7 @@ int decode_method5( // 復元するブロックだけ保持する場合 (GPU対
|
|||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
read_count = 0;
|
read_count = 0;
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
last_file = -1;
|
last_file = -1;
|
||||||
for (i = 0; i < read_num; i++){ // スライスを一個ずつ読み込んでメモリー上に配置していく
|
for (i = 0; i < read_num; i++){ // スライスを一個ずつ読み込んでメモリー上に配置していく
|
||||||
@@ -2206,7 +2250,7 @@ read_count++;
|
|||||||
hFile = NULL;
|
hFile = NULL;
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_read += GetTickCount() - time_start;
|
time_read += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (source_off == 0)
|
if (source_off == 0)
|
||||||
@@ -2446,7 +2490,7 @@ time_read += GetTickCount() - time_start;
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
// 復元されたブロックを書き込む
|
// 復元されたブロックを書き込む
|
||||||
work_buf = p_buf;
|
work_buf = p_buf;
|
||||||
@@ -2507,7 +2551,7 @@ time_start = GetTickCount();
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_write += GetTickCount() - time_start;
|
time_write += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
// 最後の書き込みファイルを閉じる
|
// 最後の書き込みファイルを閉じる
|
||||||
CloseHandle(hFile);
|
CloseHandle(hFile);
|
||||||
@@ -2515,8 +2559,8 @@ time_write += GetTickCount() - time_start;
|
|||||||
print_progress_done();
|
print_progress_done();
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
printf("read %d.%03d sec\n", time_read / 1000, time_read % 1000);
|
printf("read %.3f sec\n", (double)time_read / CLOCKS_PER_SEC);
|
||||||
printf("write %d.%03d sec\n", time_write / 1000, time_write % 1000);
|
printf("write %.3f sec\n", (double)time_write / CLOCKS_PER_SEC);
|
||||||
if (prog_num != prog_base)
|
if (prog_num != prog_base)
|
||||||
printf(" prog_num = %I64d, prog_base = %I64d\n", prog_num, prog_base);
|
printf(" prog_num = %I64d, prog_base = %I64d\n", prog_num, prog_base);
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
// rs_encode.c
|
// rs_encode.c
|
||||||
// Copyright : 2023-11-25 Yutaka Sawada
|
// Copyright : 2023-12-18 Yutaka Sawada
|
||||||
// License : GPL
|
// License : GPL
|
||||||
|
|
||||||
#ifndef _UNICODE
|
#ifndef _UNICODE
|
||||||
@@ -29,7 +29,9 @@
|
|||||||
|
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
static unsigned int time_start, time_read = 0, time_write = 0, time_calc = 0;
|
#include <time.h>
|
||||||
|
static double time_sec, time_speed;
|
||||||
|
static clock_t time_start, time_read = 0, time_write = 0, time_calc = 0;
|
||||||
static unsigned int read_count, skip_count;
|
static unsigned int read_count, skip_count;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -61,7 +63,7 @@ static DWORD WINAPI thread_encode2(LPVOID lpParameter)
|
|||||||
RS_TH *th;
|
RS_TH *th;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
unsigned int loop_count2a = 0, loop_count2b = 0;
|
unsigned int loop_count2a = 0, loop_count2b = 0;
|
||||||
unsigned int time_start2, time_encode2a = 0, time_encode2b = 0;
|
clock_t time_start2, time_encode2a = 0, time_encode2b = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
th = (RS_TH *)lpParameter;
|
th = (RS_TH *)lpParameter;
|
||||||
@@ -80,7 +82,7 @@ unsigned int time_start2, time_encode2a = 0, time_encode2b = 0;
|
|||||||
WaitForSingleObject(hRun, INFINITE); // 計算開始の合図を待つ
|
WaitForSingleObject(hRun, INFINITE); // 計算開始の合図を待つ
|
||||||
while (th->now < INT_MAX / 2){
|
while (th->now < INT_MAX / 2){
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start2 = GetTickCount();
|
time_start2 = clock();
|
||||||
#endif
|
#endif
|
||||||
s_buf = th->buf;
|
s_buf = th->buf;
|
||||||
src_off = th->off; // ソース・ブロック番号
|
src_off = th->off; // ソース・ブロック番号
|
||||||
@@ -98,7 +100,7 @@ loop_count2a++;
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_encode2a += GetTickCount() - time_start2;
|
time_encode2a += clock() - time_start2;
|
||||||
#endif
|
#endif
|
||||||
} else { // パリティ・ブロックを部分的に保持する場合
|
} else { // パリティ・ブロックを部分的に保持する場合
|
||||||
// スレッドごとに作成するパリティ・ブロックの chunk を変える
|
// スレッドごとに作成するパリティ・ブロックの chunk を変える
|
||||||
@@ -143,7 +145,7 @@ loop_count2b += src_num;
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_encode2b += GetTickCount() - time_start2;
|
time_encode2b += clock() - time_start2;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
//_mm_sfence(); // メモリーへの書き込みを完了する
|
//_mm_sfence(); // メモリーへの書き込みを完了する
|
||||||
@@ -153,19 +155,21 @@ time_encode2b += GetTickCount() - time_start2;
|
|||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
loop_count2b /= chunk_num; // chunk数で割ってブロック数にする
|
loop_count2b /= chunk_num; // chunk数で割ってブロック数にする
|
||||||
printf("sub-thread : total loop = %d\n", loop_count2a + loop_count2b);
|
printf("sub-thread : total loop = %d\n", loop_count2a + loop_count2b);
|
||||||
if (time_encode2a > 0){
|
time_sec = (double)time_encode2a / CLOCKS_PER_SEC;
|
||||||
i = (int)((__int64)loop_count2a * unit_size * 125 / ((__int64)time_encode2a * 131072));
|
if (time_sec > 0){
|
||||||
|
time_speed = ((double)loop_count2a * unit_size) / (time_sec * 1048576);
|
||||||
} else {
|
} else {
|
||||||
i = 0;
|
time_speed = 0;
|
||||||
}
|
}
|
||||||
if (loop_count2a > 0)
|
if (loop_count2a > 0)
|
||||||
printf(" 1st encode %d.%03d sec, %d loop, %d MB/s\n", time_encode2a / 1000, time_encode2a % 1000, loop_count2a, i);
|
printf(" 1st encode %.3f sec, %d loop, %.0f MB/s\n", time_sec, loop_count2a, time_speed);
|
||||||
if (time_encode2b > 0){
|
time_sec = (double)time_encode2b / CLOCKS_PER_SEC;
|
||||||
i = (int)((__int64)loop_count2b * unit_size * 125 / ((__int64)time_encode2b * 131072));
|
if (time_sec > 0){
|
||||||
|
time_speed = ((double)loop_count2b * unit_size) / (time_sec * 1048576);
|
||||||
} else {
|
} else {
|
||||||
i = 0;
|
time_speed = 0;
|
||||||
}
|
}
|
||||||
printf(" 2nd encode %d.%03d sec, %d loop, %d MB/s\n", time_encode2b / 1000, time_encode2b % 1000, loop_count2b, i);
|
printf(" 2nd encode %.3f sec, %d loop, %.0f MB/s\n", time_sec, loop_count2b, time_speed);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// 終了処理
|
// 終了処理
|
||||||
@@ -185,7 +189,7 @@ static DWORD WINAPI thread_encode3(LPVOID lpParameter)
|
|||||||
RS_TH *th;
|
RS_TH *th;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
unsigned int loop_count2a = 0, loop_count2b = 0;
|
unsigned int loop_count2a = 0, loop_count2b = 0;
|
||||||
unsigned int time_start2, time_encode2a = 0, time_encode2b = 0;
|
clock_t time_start2, time_encode2a = 0, time_encode2b = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
th = (RS_TH *)lpParameter;
|
th = (RS_TH *)lpParameter;
|
||||||
@@ -204,7 +208,7 @@ unsigned int time_start2, time_encode2a = 0, time_encode2b = 0;
|
|||||||
WaitForSingleObject(hRun, INFINITE); // 計算開始の合図を待つ
|
WaitForSingleObject(hRun, INFINITE); // 計算開始の合図を待つ
|
||||||
while (th->now < INT_MAX / 2){
|
while (th->now < INT_MAX / 2){
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start2 = GetTickCount();
|
time_start2 = clock();
|
||||||
#endif
|
#endif
|
||||||
s_buf = th->buf;
|
s_buf = th->buf;
|
||||||
src_off = th->off; // ソース・ブロック番号
|
src_off = th->off; // ソース・ブロック番号
|
||||||
@@ -221,7 +225,7 @@ loop_count2a++;
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_encode2a += GetTickCount() - time_start2;
|
time_encode2a += clock() - time_start2;
|
||||||
#endif
|
#endif
|
||||||
} else { // 全てのパリティ・ブロックを保持する場合
|
} else { // 全てのパリティ・ブロックを保持する場合
|
||||||
// スレッドごとに作成するパリティ・ブロックの chunk を変える
|
// スレッドごとに作成するパリティ・ブロックの chunk を変える
|
||||||
@@ -261,7 +265,7 @@ loop_count2b += src_num;
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_encode2b += GetTickCount() - time_start2;
|
time_encode2b += clock() - time_start2;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
//_mm_sfence(); // メモリーへの書き込みを完了する
|
//_mm_sfence(); // メモリーへの書き込みを完了する
|
||||||
@@ -271,19 +275,21 @@ time_encode2b += GetTickCount() - time_start2;
|
|||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
loop_count2b /= chunk_num; // chunk数で割ってブロック数にする
|
loop_count2b /= chunk_num; // chunk数で割ってブロック数にする
|
||||||
printf("sub-thread : total loop = %d\n", loop_count2a + loop_count2b);
|
printf("sub-thread : total loop = %d\n", loop_count2a + loop_count2b);
|
||||||
if (time_encode2a > 0){
|
time_sec = (double)time_encode2a / CLOCKS_PER_SEC;
|
||||||
i = (int)((__int64)loop_count2a * unit_size * 125 / ((__int64)time_encode2a * 131072));
|
if (time_sec > 0){
|
||||||
|
time_speed = ((double)loop_count2a * unit_size) / (time_sec * 1048576);
|
||||||
} else {
|
} else {
|
||||||
i = 0;
|
time_speed = 0;
|
||||||
}
|
}
|
||||||
if (loop_count2a > 0)
|
if (loop_count2a > 0)
|
||||||
printf(" 1st encode %d.%03d sec, %d loop, %d MB/s\n", time_encode2a / 1000, time_encode2a % 1000, loop_count2a, i);
|
printf(" 1st encode %.3f sec, %d loop, %.0f MB/s\n", time_sec, loop_count2a, time_speed);
|
||||||
if (time_encode2b > 0){
|
time_sec = (double)time_encode2b / CLOCKS_PER_SEC;
|
||||||
i = (int)((__int64)loop_count2b * unit_size * 125 / ((__int64)time_encode2b * 131072));
|
if (time_sec > 0){
|
||||||
|
time_speed = ((double)loop_count2b * unit_size) / (time_sec * 1048576);
|
||||||
} else {
|
} else {
|
||||||
i = 0;
|
time_speed = 0;
|
||||||
}
|
}
|
||||||
printf(" 2nd encode %d.%03d sec, %d loop, %d MB/s\n", time_encode2b / 1000, time_encode2b % 1000, loop_count2b, i);
|
printf(" 2nd encode %.3f sec, %d loop, %.0f MB/s\n", time_sec, loop_count2b, time_speed);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// 終了処理
|
// 終了処理
|
||||||
@@ -303,7 +309,8 @@ static DWORD WINAPI thread_encode_gpu(LPVOID lpParameter)
|
|||||||
HANDLE hRun, hEnd;
|
HANDLE hRun, hEnd;
|
||||||
RS_TH *th;
|
RS_TH *th;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
unsigned int time_start2, time_encode2 = 0, loop_count2 = 0;
|
unsigned int loop_count2 = 0;
|
||||||
|
clock_t time_start2, time_encode2 = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
th = (RS_TH *)lpParameter;
|
th = (RS_TH *)lpParameter;
|
||||||
@@ -320,7 +327,7 @@ unsigned int time_start2, time_encode2 = 0, loop_count2 = 0;
|
|||||||
WaitForSingleObject(hRun, INFINITE); // 計算開始の合図を待つ
|
WaitForSingleObject(hRun, INFINITE); // 計算開始の合図を待つ
|
||||||
while (th->now < INT_MAX / 2){
|
while (th->now < INT_MAX / 2){
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start2 = GetTickCount();
|
time_start2 = clock();
|
||||||
#endif
|
#endif
|
||||||
// GPUはソース・ブロック読み込み中に呼ばれない
|
// GPUはソース・ブロック読み込み中に呼ばれない
|
||||||
s_buf = th->buf;
|
s_buf = th->buf;
|
||||||
@@ -335,13 +342,17 @@ time_start2 = GetTickCount();
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 一つの GPUスレッドが全てのパリティ・ブロックを処理する
|
// 一つの GPUスレッドが全てのパリティ・ブロックを処理する
|
||||||
while ((j = InterlockedIncrement(&(th->now))) < parity_num){ // j = ++th_now
|
if (OpenCL_method & 8){ // 2ブロックずつ計算する
|
||||||
|
// パリティ・ブロック数が奇数なら、最初の一個だけ別に計算する
|
||||||
|
if (parity_num & 1){
|
||||||
|
InterlockedIncrement(&(th->now)); // 常に j = 0 となる
|
||||||
|
|
||||||
// factor は定数行列の乗数になる
|
// factor は定数行列の乗数になる
|
||||||
for (i = 0; i < src_num; i++)
|
for (i = 0; i < src_num; i++)
|
||||||
factor[i] = galois_power(constant[src_off + i], first_num + j);
|
factor[i] = galois_power(constant[src_off + i], first_num);
|
||||||
|
|
||||||
// VRAM上のソース・ブロックごとにパリティを追加していく
|
// VRAM上のソース・ブロックごとにパリティを追加していく
|
||||||
i = gpu_multiply_blocks(src_num, factor, g_buf + (size_t)unit_size * j, unit_size);
|
i = gpu_multiply_blocks(src_num, factor, NULL, g_buf, unit_size);
|
||||||
if (i != 0){
|
if (i != 0){
|
||||||
th->len = i;
|
th->len = i;
|
||||||
InterlockedExchange(&(th->now), INT_MAX / 3); // サブ・スレッドの計算を中断する
|
InterlockedExchange(&(th->now), INT_MAX / 3); // サブ・スレッドの計算を中断する
|
||||||
@@ -351,8 +362,51 @@ time_start2 = GetTickCount();
|
|||||||
loop_count2 += src_num;
|
loop_count2 += src_num;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 残りのブロックは二個ずつ計算する
|
||||||
|
while ((j = InterlockedAdd(&(th->now), 2)) < parity_num){ // th_now += 2, j = th_now
|
||||||
|
j--; // +2 してるから、最初のブロックは -1 する
|
||||||
|
|
||||||
|
// factor は定数行列の乗数になる
|
||||||
|
for (i = 0; i < src_num; i++){
|
||||||
|
int c = constant[src_off + i]; // 同じ定数だけど、何乗するかが異なる
|
||||||
|
factor[i] = galois_power(c, first_num + j);
|
||||||
|
factor[src_num + i] = galois_power(c, first_num + j + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// VRAM上のソース・ブロックごとにパリティを追加していく
|
||||||
|
i = gpu_multiply_blocks(src_num, factor, (void *)1, g_buf + (size_t)unit_size * j, unit_size * 2);
|
||||||
|
if (i != 0){
|
||||||
|
th->len = i;
|
||||||
|
InterlockedExchange(&(th->now), INT_MAX / 3); // サブ・スレッドの計算を中断する
|
||||||
|
break;
|
||||||
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_encode2 += GetTickCount() - time_start2;
|
loop_count2 += src_num * 2;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
} else { // 以前からの1ブロックずつ計算する方式
|
||||||
|
while ((j = InterlockedIncrement(&(th->now))) < parity_num){ // j = ++th_now
|
||||||
|
// factor は定数行列の乗数になる
|
||||||
|
for (i = 0; i < src_num; i++)
|
||||||
|
factor[i] = galois_power(constant[src_off + i], first_num + j);
|
||||||
|
|
||||||
|
// VRAM上のソース・ブロックごとにパリティを追加していく
|
||||||
|
i = gpu_multiply_blocks(src_num, factor, NULL, g_buf + (size_t)unit_size * j, unit_size);
|
||||||
|
if (i != 0){
|
||||||
|
th->len = i;
|
||||||
|
InterlockedExchange(&(th->now), INT_MAX / 3); // サブ・スレッドの計算を中断する
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#ifdef TIMER
|
||||||
|
loop_count2 += src_num;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef TIMER
|
||||||
|
time_encode2 += clock() - time_start2;
|
||||||
#endif
|
#endif
|
||||||
// 最後にVRAMを解放する
|
// 最後にVRAMを解放する
|
||||||
i = gpu_finish();
|
i = gpu_finish();
|
||||||
@@ -365,12 +419,13 @@ time_encode2 += GetTickCount() - time_start2;
|
|||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
printf("gpu-thread :\n");
|
printf("gpu-thread :\n");
|
||||||
if (time_encode2 > 0){
|
time_sec = (double)time_encode2 / CLOCKS_PER_SEC;
|
||||||
i = (int)((__int64)loop_count2 * unit_size * 125 / ((__int64)time_encode2 * 131072));
|
if (time_sec > 0){
|
||||||
|
time_speed = ((double)loop_count2 * unit_size) / (time_sec * 1048576);
|
||||||
} else {
|
} else {
|
||||||
i = 0;
|
time_speed = 0;
|
||||||
}
|
}
|
||||||
printf(" 2nd encode %d.%03d sec, %d loop, %d MB/s\n", time_encode2 / 1000, time_encode2 % 1000, loop_count2, i);
|
printf(" 2nd encode %.3f sec, %d loop, %.0f MB/s\n", time_sec, loop_count2, time_speed);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// 終了処理
|
// 終了処理
|
||||||
@@ -452,7 +507,7 @@ int encode_method1( // ソース・ブロックが一個だけの場合
|
|||||||
block_off = 0;
|
block_off = 0;
|
||||||
while (block_off < block_size){
|
while (block_off < block_size){
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
// ソース・ブロックを読み込む
|
// ソース・ブロックを読み込む
|
||||||
len = s_blk[0].size - block_off;
|
len = s_blk[0].size - block_off;
|
||||||
@@ -469,7 +524,7 @@ time_start = GetTickCount();
|
|||||||
s_blk[0].crc = crc_update(s_blk[0].crc, buf, len); // without pad
|
s_blk[0].crc = crc_update(s_blk[0].crc, buf, len); // without pad
|
||||||
checksum16_altmap(buf, buf + io_size, io_size);
|
checksum16_altmap(buf, buf + io_size, io_size);
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_read += GetTickCount() - time_start;
|
time_read += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// リカバリ・ファイルに書き込むサイズ
|
// リカバリ・ファイルに書き込むサイズ
|
||||||
@@ -482,13 +537,13 @@ time_read += GetTickCount() - time_start;
|
|||||||
// パリティ・ブロックごとに
|
// パリティ・ブロックごとに
|
||||||
for (i = 0; i < parity_num; i++){
|
for (i = 0; i < parity_num; i++){
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
memset(work_buf, 0, unit_size);
|
memset(work_buf, 0, unit_size);
|
||||||
// factor は 2の乗数になる
|
// factor は 2の乗数になる
|
||||||
galois_align_multiply(buf, work_buf, unit_size, galois_power(2, first_num + i));
|
galois_align_multiply(buf, work_buf, unit_size, galois_power(2, first_num + i));
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_calc += GetTickCount() - time_start;
|
time_calc += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// 経過表示
|
// 経過表示
|
||||||
@@ -502,7 +557,7 @@ time_calc += GetTickCount() - time_start;
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
// パリティ・ブロックのチェックサムを検証する
|
// パリティ・ブロックのチェックサムを検証する
|
||||||
checksum16_return(work_buf, hash, io_size);
|
checksum16_return(work_buf, hash, io_size);
|
||||||
@@ -535,7 +590,7 @@ time_start = GetTickCount();
|
|||||||
goto error_end;
|
goto error_end;
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_write += GetTickCount() - time_start;
|
time_write += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -565,7 +620,7 @@ time_write += GetTickCount() - time_start;
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
// 最後に Recovery Slice packet のヘッダーを書き込む
|
// 最後に Recovery Slice packet のヘッダーを書き込む
|
||||||
for (i = 0; i < parity_num; i++){
|
for (i = 0; i < parity_num; i++){
|
||||||
@@ -581,14 +636,14 @@ time_start = GetTickCount();
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_write += GetTickCount() - time_start;
|
time_write += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
printf("read %d.%03d sec\n", time_read / 1000, time_read % 1000);
|
printf("read %.3f sec\n", (double)time_read / CLOCKS_PER_SEC);
|
||||||
printf("write %d.%03d sec\n", time_write / 1000, time_write % 1000);
|
printf("write %.3f sec\n", (double)time_write / CLOCKS_PER_SEC);
|
||||||
printf("encode %d.%03d sec\n", time_calc / 1000, time_calc % 1000);
|
printf("encode %.3f sec\n", (double)time_calc / CLOCKS_PER_SEC);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
error_end:
|
error_end:
|
||||||
@@ -729,7 +784,7 @@ int encode_method2( // ソース・データを全て読み込む場合
|
|||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
read_count = 0;
|
read_count = 0;
|
||||||
skip_count = 0;
|
skip_count = 0;
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
last_file = -1;
|
last_file = -1;
|
||||||
for (i = 0; i < source_num; i++){
|
for (i = 0; i < source_num; i++){
|
||||||
@@ -830,7 +885,7 @@ skip_count++;
|
|||||||
CloseHandle(hFile);
|
CloseHandle(hFile);
|
||||||
hFile = NULL;
|
hFile = NULL;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_read += GetTickCount() - time_start;
|
time_read += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
WaitForMultipleObjects(cpu_num1, hEnd, TRUE, INFINITE); // サブ・スレッドの計算終了の合図を待つ
|
WaitForMultipleObjects(cpu_num1, hEnd, TRUE, INFINITE); // サブ・スレッドの計算終了の合図を待つ
|
||||||
@@ -930,7 +985,7 @@ skip_count++;
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
// パリティ・ブロックを書き込む
|
// パリティ・ブロックを書き込む
|
||||||
work_buf = p_buf;
|
work_buf = p_buf;
|
||||||
@@ -979,7 +1034,7 @@ time_start = GetTickCount();
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_write += GetTickCount() - time_start;
|
time_write += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
part_off += part_num; // 次のパリティ位置にする
|
part_off += part_num; // 次のパリティ位置にする
|
||||||
@@ -1025,7 +1080,7 @@ time_write += GetTickCount() - time_start;
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
// 最後に Recovery Slice packet のヘッダーを書き込む
|
// 最後に Recovery Slice packet のヘッダーを書き込む
|
||||||
for (i = 0; i < parity_num; i++){
|
for (i = 0; i < parity_num; i++){
|
||||||
@@ -1041,13 +1096,13 @@ time_start = GetTickCount();
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_write += GetTickCount() - time_start;
|
time_write += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
printf("read %d.%03d sec\n", time_read / 1000, time_read % 1000);
|
printf("read %.3f sec\n", (double)time_read / CLOCKS_PER_SEC);
|
||||||
printf("write %d.%03d sec\n", time_write / 1000, time_write % 1000);
|
printf("write %.3f sec\n", (double)time_write / CLOCKS_PER_SEC);
|
||||||
if (prog_num != prog_base)
|
if (prog_num != prog_base)
|
||||||
printf(" prog_num = %I64d, prog_base = %I64d\n", prog_num, prog_base);
|
printf(" prog_num = %I64d, prog_base = %I64d\n", prog_num, prog_base);
|
||||||
#endif
|
#endif
|
||||||
@@ -1186,7 +1241,7 @@ int encode_method3( // パリティ・ブロックを全て保持して、一度
|
|||||||
src_off = source_off - 1; // まだ計算して無い印
|
src_off = source_off - 1; // まだ計算して無い印
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
for (i = 0; i < read_num; i++){ // スライスを一個ずつ読み込んでメモリー上に配置していく
|
for (i = 0; i < read_num; i++){ // スライスを一個ずつ読み込んでメモリー上に配置していく
|
||||||
// ソース・ブロックを読み込む
|
// ソース・ブロックを読み込む
|
||||||
@@ -1318,7 +1373,7 @@ time_start = GetTickCount();
|
|||||||
memcpy(common_buf + packet_off + 16, file_md_ctx.hash, 16);
|
memcpy(common_buf + packet_off + 16, file_md_ctx.hash, 16);
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_read += GetTickCount() - time_start;
|
time_read += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
WaitForMultipleObjects(cpu_num1, hEnd, TRUE, INFINITE); // サブ・スレッドの計算終了の合図を待つ
|
WaitForMultipleObjects(cpu_num1, hEnd, TRUE, INFINITE); // サブ・スレッドの計算終了の合図を待つ
|
||||||
@@ -1393,19 +1448,19 @@ time_read += GetTickCount() - time_start;
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
memcpy(common_buf + common_size, common_buf, common_size); // 後の半分に前半のをコピーする
|
memcpy(common_buf + common_size, common_buf, common_size); // 後の半分に前半のをコピーする
|
||||||
// 最後にパリティ・ブロックのチェックサムを検証して、リカバリ・ファイルに書き込む
|
// 最後にパリティ・ブロックのチェックサムを検証して、リカバリ・ファイルに書き込む
|
||||||
err = create_recovery_file_1pass(file_path, recovery_path, packet_limit, block_distri,
|
err = create_recovery_file_1pass(file_path, recovery_path, packet_limit, block_distri,
|
||||||
packet_num, common_buf, common_size, footer_buf, footer_size, rcv_hFile, p_buf, NULL, unit_size);
|
packet_num, common_buf, common_size, footer_buf, footer_size, rcv_hFile, p_buf, NULL, unit_size);
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_write = GetTickCount() - time_start;
|
time_write = clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
printf("read %d.%03d sec\n", time_read / 1000, time_read % 1000);
|
printf("read %.3f sec\n", (double)time_read / CLOCKS_PER_SEC);
|
||||||
printf("write %d.%03d sec\n", time_write / 1000, time_write % 1000);
|
printf("write %.3f sec\n", (double)time_write / CLOCKS_PER_SEC);
|
||||||
if (prog_num != prog_base - prog_write * parity_num)
|
if (prog_num != prog_base - prog_write * parity_num)
|
||||||
printf(" prog_num = %I64d != %I64d\n", prog_num, prog_base - prog_write * parity_num);
|
printf(" prog_num = %I64d != %I64d\n", prog_num, prog_base - prog_write * parity_num);
|
||||||
#endif
|
#endif
|
||||||
@@ -1577,7 +1632,7 @@ int encode_method4( // 全てのブロックを断片的に保持する場合 (G
|
|||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
read_count = 0;
|
read_count = 0;
|
||||||
skip_count = 0;
|
skip_count = 0;
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
last_file = -1;
|
last_file = -1;
|
||||||
for (i = 0; i < source_num; i++){
|
for (i = 0; i < source_num; i++){
|
||||||
@@ -1678,7 +1733,7 @@ skip_count++;
|
|||||||
CloseHandle(hFile);
|
CloseHandle(hFile);
|
||||||
hFile = NULL;
|
hFile = NULL;
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_read += GetTickCount() - time_start;
|
time_read += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
memset(g_buf, 0, (size_t)unit_size * parity_num); // 待機中に GPU用の領域をゼロ埋めしておく
|
memset(g_buf, 0, (size_t)unit_size * parity_num); // 待機中に GPU用の領域をゼロ埋めしておく
|
||||||
@@ -1931,7 +1986,7 @@ skip_count++;
|
|||||||
prog_num += th->size * parity_num;
|
prog_num += th->size * parity_num;
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
// パリティ・ブロックを書き込む
|
// パリティ・ブロックを書き込む
|
||||||
work_buf = p_buf;
|
work_buf = p_buf;
|
||||||
@@ -1982,7 +2037,7 @@ time_start = GetTickCount();
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_write += GetTickCount() - time_start;
|
time_write += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
block_off += io_size;
|
block_off += io_size;
|
||||||
@@ -2025,7 +2080,7 @@ time_write += GetTickCount() - time_start;
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
// 最後に Recovery Slice packet のヘッダーを書き込む
|
// 最後に Recovery Slice packet のヘッダーを書き込む
|
||||||
for (i = 0; i < parity_num; i++){
|
for (i = 0; i < parity_num; i++){
|
||||||
@@ -2041,13 +2096,13 @@ time_start = GetTickCount();
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_write += GetTickCount() - time_start;
|
time_write += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
printf("read %d.%03d sec\n", time_read / 1000, time_read % 1000);
|
printf("read %.3f sec\n", (double)time_read / CLOCKS_PER_SEC);
|
||||||
printf("write %d.%03d sec\n", time_write / 1000, time_write % 1000);
|
printf("write %.3f sec\n", (double)time_write / CLOCKS_PER_SEC);
|
||||||
if (prog_num != prog_base)
|
if (prog_num != prog_base)
|
||||||
printf(" prog_num = %I64d, prog_base = %I64d\n", prog_num, prog_base);
|
printf(" prog_num = %I64d, prog_base = %I64d\n", prog_num, prog_base);
|
||||||
#endif
|
#endif
|
||||||
@@ -2220,7 +2275,7 @@ int encode_method5( // ソース・ブロックの一部とパリティ・ブロ
|
|||||||
src_off = source_off - 1; // まだ計算して無い印
|
src_off = source_off - 1; // まだ計算して無い印
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
for (i = 0; i < read_num; i++){ // スライスを一個ずつ読み込んでメモリー上に配置していく
|
for (i = 0; i < read_num; i++){ // スライスを一個ずつ読み込んでメモリー上に配置していく
|
||||||
// ソース・ブロックを読み込む
|
// ソース・ブロックを読み込む
|
||||||
@@ -2351,7 +2406,7 @@ time_start = GetTickCount();
|
|||||||
memcpy(common_buf + packet_off + 16, file_md_ctx.hash, 16);
|
memcpy(common_buf + packet_off + 16, file_md_ctx.hash, 16);
|
||||||
}
|
}
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_read += GetTickCount() - time_start;
|
time_read += clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (source_off == 0)
|
if (source_off == 0)
|
||||||
@@ -2589,19 +2644,19 @@ time_read += GetTickCount() - time_start;
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_start = GetTickCount();
|
time_start = clock();
|
||||||
#endif
|
#endif
|
||||||
memcpy(common_buf + common_size, common_buf, common_size); // 後の半分に前半のをコピーする
|
memcpy(common_buf + common_size, common_buf, common_size); // 後の半分に前半のをコピーする
|
||||||
// 最後にパリティ・ブロックのチェックサムを検証して、リカバリ・ファイルに書き込む
|
// 最後にパリティ・ブロックのチェックサムを検証して、リカバリ・ファイルに書き込む
|
||||||
err = create_recovery_file_1pass(file_path, recovery_path, packet_limit, block_distri,
|
err = create_recovery_file_1pass(file_path, recovery_path, packet_limit, block_distri,
|
||||||
packet_num, common_buf, common_size, footer_buf, footer_size, rcv_hFile, p_buf, g_buf, unit_size);
|
packet_num, common_buf, common_size, footer_buf, footer_size, rcv_hFile, p_buf, g_buf, unit_size);
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
time_write = GetTickCount() - time_start;
|
time_write = clock() - time_start;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef TIMER
|
#ifdef TIMER
|
||||||
printf("read %d.%03d sec\n", time_read / 1000, time_read % 1000);
|
printf("read %.3f sec\n", (double)time_read / CLOCKS_PER_SEC);
|
||||||
printf("write %d.%03d sec\n", time_write / 1000, time_write % 1000);
|
printf("write %.3f sec\n", (double)time_write / CLOCKS_PER_SEC);
|
||||||
if (prog_num != prog_base - prog_write * parity_num)
|
if (prog_num != prog_base - prog_write * parity_num)
|
||||||
printf(" prog_num = %I64d != %I64d\n", prog_num, prog_base - prog_write * parity_num);
|
printf(" prog_num = %I64d != %I64d\n", prog_num, prog_base - prog_write * parity_num);
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -1,10 +1,14 @@
|
|||||||
void calc_table(__local uint *mtab, int id, int factor)
|
void calc_table(__local uint *mtab, int id, int factor)
|
||||||
{
|
{
|
||||||
int i, sum = 0;
|
int i, sum, mask;
|
||||||
|
|
||||||
for (i = 0; i < 8; i++){
|
mask = (id & 1) ? 0xFFFF : 0;
|
||||||
sum = (id & (1 << i)) ? (sum ^ factor) : sum;
|
sum = mask & factor;
|
||||||
factor = (factor & 0x8000) ? ((factor << 1) ^ 0x1100B) : (factor << 1);
|
for (i = 1; i < 8; i++){
|
||||||
|
mask = (factor & 0x8000) ? 0x1100B : 0;
|
||||||
|
factor = (factor << 1) ^ mask;
|
||||||
|
mask = (id & (1 << i)) ? 0xFFFF : 0;
|
||||||
|
sum ^= mask & factor;
|
||||||
}
|
}
|
||||||
mtab[id] = sum;
|
mtab[id] = sum;
|
||||||
|
|
||||||
@@ -14,6 +18,32 @@ void calc_table(__local uint *mtab, int id, int factor)
|
|||||||
mtab[id + 256] = sum;
|
mtab[id + 256] = sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void calc_table2(__local uint *mtab, int id, int factor, int factor2)
|
||||||
|
{
|
||||||
|
int i, sum, sum2, mask;
|
||||||
|
|
||||||
|
mask = (id & 1) ? 0xFFFF : 0;
|
||||||
|
sum = mask & factor;
|
||||||
|
sum2 = mask & factor2;
|
||||||
|
for (i = 1; i < 8; i++){
|
||||||
|
mask = (factor & 0x8000) ? 0x1100B : 0;
|
||||||
|
factor = (factor << 1) ^ mask;
|
||||||
|
mask = (factor2 & 0x8000) ? 0x1100B : 0;
|
||||||
|
factor2 = (factor2 << 1) ^ mask;
|
||||||
|
mask = (id & (1 << i)) ? 0xFFFF : 0;
|
||||||
|
sum ^= mask & factor;
|
||||||
|
sum2 ^= mask & factor2;
|
||||||
|
}
|
||||||
|
mtab[id] = sum | (sum2 << 16);
|
||||||
|
|
||||||
|
sum = (sum << 4) ^ (((sum << 16) >> 31) & 0x88058) ^ (((sum << 17) >> 31) & 0x4402C) ^ (((sum << 18) >> 31) & 0x22016) ^ (((sum << 19) >> 31) & 0x1100B);
|
||||||
|
sum = (sum << 4) ^ (((sum << 16) >> 31) & 0x88058) ^ (((sum << 17) >> 31) & 0x4402C) ^ (((sum << 18) >> 31) & 0x22016) ^ (((sum << 19) >> 31) & 0x1100B);
|
||||||
|
sum2 = (sum2 << 4) ^ (((sum2 << 16) >> 31) & 0x88058) ^ (((sum2 << 17) >> 31) & 0x4402C) ^ (((sum2 << 18) >> 31) & 0x22016) ^ (((sum2 << 19) >> 31) & 0x1100B);
|
||||||
|
sum2 = (sum2 << 4) ^ (((sum2 << 16) >> 31) & 0x88058) ^ (((sum2 << 17) >> 31) & 0x4402C) ^ (((sum2 << 18) >> 31) & 0x22016) ^ (((sum2 << 19) >> 31) & 0x1100B);
|
||||||
|
|
||||||
|
mtab[id + 256] = sum | (sum2 << 16);
|
||||||
|
}
|
||||||
|
|
||||||
__kernel void method1(
|
__kernel void method1(
|
||||||
__global uint *src,
|
__global uint *src,
|
||||||
__global uint *dst,
|
__global uint *dst,
|
||||||
@@ -31,6 +61,7 @@ __kernel void method1(
|
|||||||
dst[i] = 0;
|
dst[i] = 0;
|
||||||
|
|
||||||
for (blk = 0; blk < blk_num; blk++){
|
for (blk = 0; blk < blk_num; blk++){
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
calc_table(mtab, table_id, factors[blk]);
|
calc_table(mtab, table_id, factors[blk]);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
@@ -42,7 +73,6 @@ __kernel void method1(
|
|||||||
dst[i] ^= sum;
|
dst[i] ^= sum;
|
||||||
}
|
}
|
||||||
src += BLK_SIZE;
|
src += BLK_SIZE;
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -65,6 +95,7 @@ __kernel void method2(
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (blk = 0; blk < blk_num; blk++){
|
for (blk = 0; blk < blk_num; blk++){
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
calc_table(mtab, table_id, factors[blk]);
|
calc_table(mtab, table_id, factors[blk]);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
@@ -82,11 +113,10 @@ __kernel void method2(
|
|||||||
dst[pos + 4] ^= ((sum1 & 0xFF00FF00) >> 8) | (sum2 & 0xFF00FF00);
|
dst[pos + 4] ^= ((sum1 & 0xFF00FF00) >> 8) | (sum2 & 0xFF00FF00);
|
||||||
}
|
}
|
||||||
src += BLK_SIZE;
|
src += BLK_SIZE;
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__kernel void method3(
|
__kernel void method4(
|
||||||
__global uint4 *src,
|
__global uint4 *src,
|
||||||
__global uint4 *dst,
|
__global uint4 *dst,
|
||||||
__global ushort *factors,
|
__global ushort *factors,
|
||||||
@@ -106,6 +136,7 @@ __kernel void method3(
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (blk = 0; blk < blk_num; blk++){
|
for (blk = 0; blk < blk_num; blk++){
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
calc_table(mtab, table_id, factors[blk]);
|
calc_table(mtab, table_id, factors[blk]);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
@@ -124,11 +155,140 @@ __kernel void method3(
|
|||||||
dst[i + 1] ^= as_uint4((uchar16)(r0.y, r0.w, r1.y, r1.w, r2.y, r2.w, r3.y, r3.w, r4.y, r4.w, r5.y, r5.w, r6.y, r6.w, r7.y, r7.w));
|
dst[i + 1] ^= as_uint4((uchar16)(r0.y, r0.w, r1.y, r1.w, r2.y, r2.w, r3.y, r3.w, r4.y, r4.w, r5.y, r5.w, r6.y, r6.w, r7.y, r7.w));
|
||||||
}
|
}
|
||||||
src += BLK_SIZE / 4;
|
src += BLK_SIZE / 4;
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__kernel void method4(
|
__kernel void method9(
|
||||||
|
__global uint *src,
|
||||||
|
__global uint *dst,
|
||||||
|
__global ushort *factors,
|
||||||
|
int blk_num)
|
||||||
|
{
|
||||||
|
__local uint mtab[512];
|
||||||
|
int i, blk;
|
||||||
|
uint v, sum, sum2;
|
||||||
|
const int work_id = get_global_id(0);
|
||||||
|
const int work_size = get_global_size(0);
|
||||||
|
const int table_id = get_local_id(0);
|
||||||
|
|
||||||
|
for (i = work_id; i < BLK_SIZE; i += work_size){
|
||||||
|
dst[i] = 0;
|
||||||
|
dst[i + BLK_SIZE] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (blk = 0; blk < blk_num; blk++){
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
calc_table2(mtab, table_id, factors[blk], factors[blk_num + blk]);
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
for (i = work_id; i < BLK_SIZE; i += work_size){
|
||||||
|
v = src[i];
|
||||||
|
sum = mtab[(uchar)v] ^ mtab[256 + (uchar)(v >> 8)];
|
||||||
|
sum2 = mtab[(uchar)(v >> 16)] ^ mtab[256 + (v >> 24)];
|
||||||
|
dst[i] ^= (sum & 0xFFFF) | (sum2 << 16);
|
||||||
|
dst[i + BLK_SIZE] ^= (sum >> 16) | (sum2 & 0xFFFF0000);
|
||||||
|
}
|
||||||
|
src += BLK_SIZE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void method10(
|
||||||
|
__global uint *src,
|
||||||
|
__global uint *dst,
|
||||||
|
__global ushort *factors,
|
||||||
|
int blk_num)
|
||||||
|
{
|
||||||
|
__local uint mtab[512];
|
||||||
|
int i, blk, pos;
|
||||||
|
uint lo, hi, sum1, sum2, sum3, sum4;
|
||||||
|
const int work_id = get_global_id(0) * 2;
|
||||||
|
const int work_size = get_global_size(0) * 2;
|
||||||
|
const int table_id = get_local_id(0);
|
||||||
|
|
||||||
|
for (i = work_id; i < BLK_SIZE; i += work_size){
|
||||||
|
dst[i ] = 0;
|
||||||
|
dst[i + 1] = 0;
|
||||||
|
dst[i + BLK_SIZE ] = 0;
|
||||||
|
dst[i + BLK_SIZE + 1] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (blk = 0; blk < blk_num; blk++){
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
calc_table2(mtab, table_id, factors[blk], factors[blk_num + blk]);
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
for (i = work_id; i < BLK_SIZE; i += work_size){
|
||||||
|
pos = (i & ~7) + ((i & 7) >> 1);
|
||||||
|
lo = src[pos ];
|
||||||
|
hi = src[pos + 4];
|
||||||
|
sum1 = mtab[(uchar)lo] ^ mtab[256 + (uchar)hi];
|
||||||
|
sum2 = mtab[(uchar)(lo >> 8)] ^ mtab[256 + (uchar)(hi >> 8)];
|
||||||
|
sum3 = mtab[(uchar)(lo >> 16)] ^ mtab[256 + (uchar)(hi >> 16)];
|
||||||
|
sum4 = mtab[lo >> 24] ^ mtab[256 + (hi >> 24)];
|
||||||
|
dst[pos ] ^= (sum1 & 0xFF) | ((sum2 & 0xFF) << 8) | ((sum3 & 0xFF) << 16) | (sum4 << 24);
|
||||||
|
dst[pos + 4] ^= ((sum1 >> 8) & 0xFF) | (sum2 & 0xFF00) | ((sum3 & 0xFF00) << 8) | ((sum4 & 0xFF00) << 16);
|
||||||
|
dst[pos + BLK_SIZE ] ^= ((sum1 >> 16) & 0xFF) | ((sum2 >> 8) & 0xFF00) | (sum3 & 0xFF0000) | ((sum4 & 0xFF0000) << 8);
|
||||||
|
dst[pos + BLK_SIZE + 4] ^= (sum1 >> 24) | ((sum2 >> 16) & 0xFF00) | ((sum3 >> 8) & 0xFF0000) | (sum4 & 0xFF000000);
|
||||||
|
}
|
||||||
|
src += BLK_SIZE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void method12(
|
||||||
|
__global uint4 *src,
|
||||||
|
__global uint4 *dst,
|
||||||
|
__global ushort *factors,
|
||||||
|
int blk_num)
|
||||||
|
{
|
||||||
|
__local uint mtab[512];
|
||||||
|
int i, blk;
|
||||||
|
uchar4 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, rA, rB, rC, rD, rE, rF;
|
||||||
|
uchar16 lo, hi;
|
||||||
|
const int work_id = get_global_id(0) * 2;
|
||||||
|
const int work_size = get_global_size(0) * 2;
|
||||||
|
const int table_id = get_local_id(0);
|
||||||
|
|
||||||
|
for (i = work_id; i < BLK_SIZE / 4; i += work_size){
|
||||||
|
dst[i ] = 0;
|
||||||
|
dst[i + 1] = 0;
|
||||||
|
dst[i + BLK_SIZE / 4 ] = 0;
|
||||||
|
dst[i + BLK_SIZE / 4 + 1] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (blk = 0; blk < blk_num; blk++){
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
calc_table2(mtab, table_id, factors[blk], factors[blk_num + blk]);
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
for (i = work_id; i < BLK_SIZE / 4; i += work_size){
|
||||||
|
lo = as_uchar16(src[i ]);
|
||||||
|
hi = as_uchar16(src[i + 1]);
|
||||||
|
r0 = as_uchar4(mtab[lo.s0] ^ mtab[256 + hi.s0]);
|
||||||
|
r1 = as_uchar4(mtab[lo.s1] ^ mtab[256 + hi.s1]);
|
||||||
|
r2 = as_uchar4(mtab[lo.s2] ^ mtab[256 + hi.s2]);
|
||||||
|
r3 = as_uchar4(mtab[lo.s3] ^ mtab[256 + hi.s3]);
|
||||||
|
r4 = as_uchar4(mtab[lo.s4] ^ mtab[256 + hi.s4]);
|
||||||
|
r5 = as_uchar4(mtab[lo.s5] ^ mtab[256 + hi.s5]);
|
||||||
|
r6 = as_uchar4(mtab[lo.s6] ^ mtab[256 + hi.s6]);
|
||||||
|
r7 = as_uchar4(mtab[lo.s7] ^ mtab[256 + hi.s7]);
|
||||||
|
r8 = as_uchar4(mtab[lo.s8] ^ mtab[256 + hi.s8]);
|
||||||
|
r9 = as_uchar4(mtab[lo.s9] ^ mtab[256 + hi.s9]);
|
||||||
|
rA = as_uchar4(mtab[lo.sa] ^ mtab[256 + hi.sa]);
|
||||||
|
rB = as_uchar4(mtab[lo.sb] ^ mtab[256 + hi.sb]);
|
||||||
|
rC = as_uchar4(mtab[lo.sc] ^ mtab[256 + hi.sc]);
|
||||||
|
rD = as_uchar4(mtab[lo.sd] ^ mtab[256 + hi.sd]);
|
||||||
|
rE = as_uchar4(mtab[lo.se] ^ mtab[256 + hi.se]);
|
||||||
|
rF = as_uchar4(mtab[lo.sf] ^ mtab[256 + hi.sf]);
|
||||||
|
dst[i ] ^= as_uint4((uchar16)(r0.x, r1.x, r2.x, r3.x, r4.x, r5.x, r6.x, r7.x, r8.x, r9.x, rA.x, rB.x, rC.x, rD.x, rE.x, rF.x));
|
||||||
|
dst[i + 1] ^= as_uint4((uchar16)(r0.y, r1.y, r2.y, r3.y, r4.y, r5.y, r6.y, r7.y, r8.y, r9.y, rA.y, rB.y, rC.y, rD.y, rE.y, rF.y));
|
||||||
|
dst[i + BLK_SIZE / 4 ] ^= as_uint4((uchar16)(r0.z, r1.z, r2.z, r3.z, r4.z, r5.z, r6.z, r7.z, r8.z, r9.z, rA.z, rB.z, rC.z, rD.z, rE.z, rF.z));
|
||||||
|
dst[i + BLK_SIZE / 4 + 1] ^= as_uint4((uchar16)(r0.w, r1.w, r2.w, r3.w, r4.w, r5.w, r6.w, r7.w, r8.w, r9.w, rA.w, rB.w, rC.w, rD.w, rE.w, rF.w));
|
||||||
|
}
|
||||||
|
src += BLK_SIZE / 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void method16(
|
||||||
__global uint *src,
|
__global uint *src,
|
||||||
__global uint *dst,
|
__global uint *dst,
|
||||||
__global ushort *factors,
|
__global ushort *factors,
|
||||||
@@ -136,7 +296,7 @@ __kernel void method4(
|
|||||||
{
|
{
|
||||||
__local int table[16];
|
__local int table[16];
|
||||||
__local uint cache[256];
|
__local uint cache[256];
|
||||||
int i, j, blk, pos, sht, mask;
|
int i, j, blk, pos, mask, tmp;
|
||||||
uint sum;
|
uint sum;
|
||||||
const int work_id = get_global_id(0);
|
const int work_id = get_global_id(0);
|
||||||
const int work_size = get_global_size(0);
|
const int work_size = get_global_size(0);
|
||||||
@@ -146,11 +306,12 @@ __kernel void method4(
|
|||||||
|
|
||||||
for (blk = 0; blk < blk_num; blk++){
|
for (blk = 0; blk < blk_num; blk++){
|
||||||
if (get_local_id(0) == 0){
|
if (get_local_id(0) == 0){
|
||||||
pos = factors[blk] << 16;
|
tmp = factors[blk];
|
||||||
table[0] = pos;
|
table[0] = tmp;
|
||||||
for (j = 1; j < 16; j++){
|
for (j = 1; j < 16; j++){
|
||||||
pos = (pos << 1) ^ ((pos >> 31) & 0x100B0000);
|
mask = (tmp & 0x8000) ? 0x1100B : 0;
|
||||||
table[j] = pos;
|
tmp = (tmp << 1) ^ mask;
|
||||||
|
table[j] = tmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
@@ -161,10 +322,11 @@ __kernel void method4(
|
|||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
sum = 0;
|
sum = 0;
|
||||||
sht = (i & 60) >> 2;
|
tmp = (i & 60) >> 2;
|
||||||
|
tmp = 0x8000 >> tmp;
|
||||||
pos &= ~60;
|
pos &= ~60;
|
||||||
for (j = 15; j >= 0; j--){
|
for (j = 15; j >= 0; j--){
|
||||||
mask = (table[j] << sht) >> 31;
|
mask = (table[j] & tmp) ? 0xFFFFFFFF : 0;
|
||||||
sum ^= mask & cache[pos];
|
sum ^= mask & cache[pos];
|
||||||
pos += 4;
|
pos += 4;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user