Change factors for CPU cache optimization

This commit is contained in:
Yutaka Sawada
2024-08-04 21:41:24 +09:00
committed by GitHub
parent ad90e2db8d
commit 3ac6c9336a
6 changed files with 107 additions and 54 deletions

View File

@@ -1,4 +1,4 @@
[ par2j.exe - version 1.3.3.2 or later ]
[ par2j.exe - version 1.3.3.4 or later ]
Type "par2j.exe" to see version, test integrity, and show usage below.
@@ -386,6 +386,25 @@ The format is "/lc#" (# is from 1 to 32 as the number of using threads).
for example, /lc1 to use single Core, /lc508 to use half Cores and GPU
/lcb :
This is for Cache Blocking. (CPU cache optimization)
By default, this value is set-associative size of CPU L2 cache.
Maximum value is 32767. It will be multipled by 64 KB.
To disable cache optimization, set "/lcb0".
/lcs :
This is for large buffer size at calculating multiple files' hashes.
By default, this value is set-associative size of CPU L3 cache.
Maximum value is 65535. It will be multipled by 64 KB.
When you set "/lcs0", large buffer size will become 2 MB by default.
/lcm :
This is for max number of chunks to calculate at once. (CPU shared cache optimization)
By default, this value may be rate of L3 cache size / L2 cache size.
The value may be changed by some factors in experimental results.
Maximum value is 32768. Lower values than 8 will be same as 32768.
To disable cache optimization, set "/lcm0".
/m :
Set this, if you want to set memory usage.
If too many memory is allocated, system will use swap file.

View File

@@ -1,5 +1,5 @@
// common2.c
// Copyright : 2023-10-13 Yutaka Sawada
// Copyright : 2024-07-25 Yutaka Sawada
// License : GPL
#ifndef _UNICODE
@@ -2008,8 +2008,11 @@ void check_cpu(void)
if (cache3_size > 0){
//printf("L3 cache: %d KB (%d way)\n", cache3_size >> 10 , cache3_way);
cpu_cache = cache3_size / cache3_way; // set-associative のサイズにする
if (cpu_cache < 131072)
if (cpu_cache < 131072){
cpu_cache = 128 << 10; // 128 KB 以上にする
} else {
cpu_cache = (cpu_cache + 0xFFFF) & 0xFFFF0000; // 64 KB の倍数にする
}
}
if (cache2_size > 0){
//printf("L2 cache: %d KB (%d way)\n", cache2_size >> 10, cache2_way);
@@ -2030,7 +2033,9 @@ void check_cpu(void)
returnLength += returnLength / 2;
}
}
cpu_cache |= returnLength & 0x1FFFF;
if (returnLength > 0x8000)
returnLength = 0x8000;
cpu_cache |= returnLength & 0xFFFF;
}
}
@@ -2038,7 +2043,7 @@ void check_cpu(void)
limit_size = 128 << 10;
//printf("Limit size of Cache Blocking: %d KB\n", limit_size >> 10);
// cpu_flag の上位 16-bit にキャッシュの制限サイズを置く
cpu_flag |= limit_size & 0xFFFF0000; // 64 KB 未満は無視する
cpu_flag |= (limit_size + 0xFFFF) & 0xFFFF0000; // 64 KB の倍数にする
if (core_count == 0){ // 物理コア数が不明なら、論理コア数と同じにする
core_count = cpu_num;

View File

@@ -1,5 +1,5 @@
// md5_crc.c
// Copyright : 2023-12-12 Yutaka Sawada
// Copyright : 2024-07-24 Yutaka Sawada
// License : GPL
#ifndef _UNICODE
@@ -677,7 +677,7 @@ time1_start = clock();
// バッファー・サイズが大きいのでヒープ領域を使う
for (io_size = IO_SIZE; io_size <= MAX_BUF_SIZE; io_size += IO_SIZE){ // IO_SIZE の倍数にする
if ((io_size + IO_SIZE > (cpu_cache & 0xFFFE0000)) || ((__int64)(io_size + IO_SIZE) * 4 > file_left))
if ((io_size + IO_SIZE > (cpu_cache & 0xFFFF0000)) || ((__int64)(io_size + IO_SIZE) * 4 > file_left))
break;
}
buf1 = _aligned_malloc(io_size * 2, 64);
@@ -873,7 +873,7 @@ DWORD WINAPI file_hash_crc2(LPVOID lpParameter)
// バッファー・サイズが大きいのでヒープ領域を使う
prog_tick = 1;
for (io_size = IO_SIZE; io_size <= MAX_BUF_SIZE; io_size += IO_SIZE){ // IO_SIZE の倍数にする
if ((io_size + IO_SIZE > (cpu_cache & 0xFFFE0000)) || ((__int64)(io_size + IO_SIZE) * 4 > file_left))
if ((io_size + IO_SIZE > (cpu_cache & 0xFFFF0000)) || ((__int64)(io_size + IO_SIZE) * 4 > file_left))
break;
prog_tick++;
}
@@ -1311,7 +1311,7 @@ DWORD WINAPI file_hash_background(LPVOID lpParameter)
// バッファー・サイズが大きいのでヒープ領域を使う
for (io_size = IO_SIZE; io_size <= MAX_BUF_SIZE; io_size += IO_SIZE){ // IO_SIZE の倍数にする
if ((io_size + IO_SIZE > (cpu_cache & 0xFFFE0000)) || ((__int64)(io_size + IO_SIZE) * 4 > file_size))
if ((io_size + IO_SIZE > (cpu_cache & 0xFFFF0000)) || ((__int64)(io_size + IO_SIZE) * 4 > file_size))
break;
}
//printf("\n io_size = %d\n", io_size);

View File

@@ -1,5 +1,5 @@
// par2_cmd.c
// Copyright : 2023-12-09 Yutaka Sawada
// Copyright : 2024-07-28 Yutaka Sawada
// License : GPL
#ifndef _UNICODE
@@ -86,7 +86,7 @@ static void print_environment(void)
printf("CPU thread\t: %d / %d\n", cpu_num & 0xFFFF, cpu_num >> 24);
cpu_num &= 0xFFFF; // 利用するコア数だけにしておく
printf("CPU cache limit : %d KB, %d KB\n", (cpu_flag & 0xFFFF0000) >> 10, (cpu_cache & 0xFFFE0000) >> 10);
printf("CPU cache limit : %d KB, %d KB (%d)\n", (cpu_flag & 0xFFFF0000) >> 10, (cpu_cache & 0xFFFF0000) >> 10, cpu_cache & 0xFFFF);
#ifndef _WIN64 // 32-bit 版は MMX, SSE2, SSSE3, AVX2 のどれかを表示する
printf("CPU extra\t:");
if (((cpu_flag & 16) != 0) && ((cpu_flag & 256) == 0)){
@@ -1477,6 +1477,34 @@ ri= switch_set & 0x00040000
switch_v |= j;
// 共通のオプション (数値)
} else if (wcsncmp(tmp_p, L"lc", 2) == 0){
if (tmp_p[2] == 'b'){ // Size of Cache Blocking (CPU's L2 cache optimization)
k = 0;
j = 3;
while ((j < 3 + 5) && (tmp_p[j] >= '0') && (tmp_p[j] <= '9')){
k = (k * 10) + (tmp_p[j] - '0');
j++;
}
if (k <= 0x7FFF) // 上位 16-bit に上書きする
cpu_flag = (cpu_flag & 0xFFFF) | (k << 16);
} else if (tmp_p[2] == 's'){ // Size of Shared Cache
k = 0;
j = 3;
while ((j < 3 + 5) && (tmp_p[j] >= '0') && (tmp_p[j] <= '9')){
k = (k * 10) + (tmp_p[j] - '0');
j++;
}
if (k <= 0xFFFF) // 上位 16-bit に上書きする
cpu_cache = (cpu_cache & 0xFFFF) | (k << 16);
} else if (tmp_p[2] == 'm'){ // Max number of chunks (CPU's shared L3 cache optimization)
k = 0;
j = 3;
while ((j < 3 + 5) && (tmp_p[j] >= '0') && (tmp_p[j] <= '9')){
k = (k * 10) + (tmp_p[j] - '0');
j++;
}
if (k <= 0x8000) // CACHE_MIN_NUM 未満なら 0x8000 になる
cpu_cache = (cpu_cache & 0xFFFF0000) | k; // 下位 16-bit に上書きする
} else { // Extra と GPU も別にしてもいいかも?
k = 0;
j = 2;
while ((j < 2 + 7) && (tmp_p[j] >= '0') && (tmp_p[j] <= '9')){
@@ -1520,6 +1548,7 @@ ri= switch_set & 0x00040000
}
cpu_num = (cpu_num & 0xFFFF0000) | k; // 指定されたコア数を下位に配置する
}
}
} else if (wcsncmp(tmp_p, L"m", 1) == 0){
memory_use = 0;
j = 1; // メモリー使用量だけでなく、モード切替用としても使う、2桁まで

View File

@@ -1,7 +1,7 @@
1 RT_STRING ".\\source.cl"
1 VERSIONINFO
FILEVERSION 1,3,3,3
FILEVERSION 1,3,3,4
PRODUCTVERSION 1,3,3,0
FILEOS 0x40004
FILETYPE 0x1
@@ -13,7 +13,7 @@ BLOCK "StringFileInfo"
VALUE "FileDescription", "PAR2 client"
VALUE "LegalCopyright", "Copyright (C) 2024 Yutaka Sawada"
VALUE "ProductName", "par2j"
VALUE "FileVersion", "1.3.3.3"
VALUE "FileVersion", "1.3.3.4"
VALUE "ProductVersion", "1.3.3.0"
}
}

View File

@@ -1,2 +1,2 @@
#define FILE_VERSION "1.3.3.3" // ファイルのバージョン番号
#define FILE_VERSION "1.3.3.4" // ファイルのバージョン番号
#define PRODUCT_VERSION "1.3.3" // 製品のバージョン番号