Add files via upload

2023-03-12 11:02:25 +09:00
parent dcd8047d73
commit dd85bf7e4f
23 changed files with 15389 additions and 0 deletions
--- a/source/par2j/par2.c
+++ b/source/par2j/par2.c
--- a/source/par2j/par2.h
+++ b/source/par2j/par2.h
@@ -0,0 +1,42 @@
+#ifndef _PAR2_H_
+#define _PAR2_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+// パリティを作成する
+int par2_create(
+	wchar_t *uni_buf,	// 作業用、入力されたコメントが入ってる
+	int packet_limit,	// リカバリ・ファイルのパケット繰り返しの制限数
+	int block_distri,	// パリティ・ブロックの分配方法
+	int switch_p);		// インデックス・ファイルを作らない, ユニコードのファイル名も記録する
+
+// リカバリ・ファイルの構成を試算する
+int par2_trial(
+	wchar_t *uni_buf,	// 作業用、入力されたコメントが入ってる
+	int packet_limit,	// リカバリ・ファイルのパケット繰り返しの制限数
+	int block_distri,	// パリティ・ブロックの分配方法
+	int switch_p);		// インデックス・ファイルを作らない, ユニコードのファイル名も記録する
+
+// ソース・ファイルの破損や欠損を調べる
+int par2_verify(wchar_t *uni_buf);	// 作業用
+
+// ソース・ファイルの破損や欠損を修復する
+int par2_repair(wchar_t *uni_buf);	// 作業用
+
+// ソース・ファイルの一覧を表示する
+int par2_list(
+	wchar_t *uni_buf,	// 作業用
+	int switch_h);		// ハッシュ値も表示する
+
+// CRC-32 チェックサムを使って自分自身の破損を検出する
+int par2_checksum(wchar_t *uni_buf);	// 作業用
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/source/par2j/par2_cmd.c
+++ b/source/par2j/par2_cmd.c
--- a/source/par2j/par2j.vcxproj
+++ b/source/par2j/par2j.vcxproj
@@ -0,0 +1,165 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{3DD6B39E-7178-4E46-A3DE-17DE984DF86B}</ProjectGuid>
+    <RootNamespace>par2j</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <SpectreMitigation>false</SpectreMitigation>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>16.0.31025.104</_ProjectFileVersion>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Configuration)\</OutDir>
+    <IntDir>$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+    <GenerateManifest>false</GenerateManifest>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+    <GenerateManifest>false</GenerateManifest>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
+      <RuntimeTypeInfo>false</RuntimeTypeInfo>
+      <PrecompiledHeader />
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat />
+      <AdditionalIncludeDirectories>$(ProjectDir)/OpenCL</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>imagehlp.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>false</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <LargeAddressAware>true</LargeAddressAware>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <SetChecksum>true</SetChecksum>
+      <TargetMachine>MachineX86</TargetMachine>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Midl>
+      <TargetEnvironment>X64</TargetEnvironment>
+    </Midl>
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <RuntimeTypeInfo>false</RuntimeTypeInfo>
+      <PrecompiledHeader />
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat />
+      <AdditionalIncludeDirectories>$(ProjectDir)/OpenCL</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>imagehlp.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <GenerateDebugInformation>false</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <LargeAddressAware>true</LargeAddressAware>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <SetChecksum>true</SetChecksum>
+      <TargetMachine>MachineX64</TargetMachine>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="com.cpp" />
+    <ClCompile Include="common2.c" />
+    <ClCompile Include="crc.c" />
+    <ClCompile Include="create.c" />
+    <ClCompile Include="gf16.c" />
+    <ClCompile Include="ini.c" />
+    <ClCompile Include="json.c" />
+    <ClCompile Include="lib_opencl.c" />
+    <ClCompile Include="list.c" />
+    <ClCompile Include="md5_crc.c" />
+    <ClCompile Include="par2.c" />
+    <ClCompile Include="par2_cmd.c" />
+    <ClCompile Include="phmd5.c" />
+    <ClCompile Include="phmd5a.c" />
+    <ClCompile Include="phmd5s.c" />
+    <ClCompile Include="reedsolomon.c" />
+    <ClCompile Include="repair.c" />
+    <ClCompile Include="rs_decode.c" />
+    <ClCompile Include="rs_encode.c" />
+    <ClCompile Include="search.c" />
+    <ClCompile Include="verify.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="common2.h" />
+    <ClInclude Include="crc.h" />
+    <ClInclude Include="create.h" />
+    <ClInclude Include="gf16.h" />
+    <ClInclude Include="gf_jit.h" />
+    <ClInclude Include="ini.h" />
+    <ClInclude Include="json.h" />
+    <ClInclude Include="lib_opencl.h" />
+    <ClInclude Include="list.h" />
+    <ClInclude Include="md5_crc.h" />
+    <ClInclude Include="par2.h" />
+    <ClInclude Include="phmd5.h" />
+    <ClInclude Include="reedsolomon.h" />
+    <ClInclude Include="repair.h" />
+    <ClInclude Include="rs_decode.h" />
+    <ClInclude Include="rs_encode.h" />
+    <ClInclude Include="search.h" />
+    <ClInclude Include="verify.h" />
+    <ClInclude Include="version.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="res_par2j.rc" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
--- a/source/par2j/phmd5.c
+++ b/source/par2j/phmd5.c
@@ -0,0 +1,109 @@
+/*----------------------------------------------------------------------------
+;
+; MD5 hash generator -- Paul Houle (paulhoule.com) 11/13/2017
+;
+; Non-time critical C logic.  All API entry points are here.
+; See phmd5.h for documentation.
+;
+;---------------------------------------------------------------------------*/
+
+#include <string.h>
+#include "phmd5.h"
+
+// First call -- initialize pmd5 structure for use.
+void Phmd5Begin(PHMD5 *pmd5) {
+	unsigned __int32 *uhash = (unsigned __int32 *) pmd5->hash;
+
+	uhash[0] = 0x67452301;				// init hash per rfc1321
+	uhash[1] = 0xEFCDAB89;
+	uhash[2] = 0x98BADCFE;
+	uhash[3] = 0x10325476;
+
+	pmd5->totbyt = 0;					// init count of data bytes processed
+}
+
+// Last call -- after this, pmd5->hash holds final MD5 hash.
+void Phmd5End(PHMD5 *pmd5) {
+	char pad[72];						// pad buffer (worst case is 72 bytes)
+	unsigned padc;						// size of needed pad (9-72 bytes)
+
+	padc = 64 - ((unsigned) pmd5->totbyt & 63); // pad to 64-byte boundary
+	if (padc < 9) padc += 64;			// add a block if we need more room
+	memset(pad, 0, padc);				// clear entire pad area
+	pad[0] = (char) 0x80;				// place input stream terminator
+										// place 64-bit input data bit count
+	*(unsigned __int64 *) &pad[padc - 8] = pmd5->totbyt << 3;
+	Phmd5Process(pmd5, pad, padc);		// process the pad
+}
+
+// Work done here -- call for as many input blocks that need to be processed.
+// pdata points to the input data, bytecnt is pdata size (0..n bytes).
+// See phmd5.h regarding how to use this optimally.
+void Phmd5Process(PHMD5 *pmd5, char *pdata, size_t bytecnt) {
+	unsigned resid = (unsigned) pmd5->totbyt;
+
+	pmd5->totbyt += bytecnt;			// update total bytes processed
+
+	resid &= 63;						// count of bytes now in pmd5->buf
+
+	// This block handles the case of residual data in pmd5->buf.
+	// After this block pmd5->buf is empty (except perhaps on exit).
+
+	if (resid) {						// if residual exists,
+		unsigned cb = 64 - resid;
+		if (cb > bytecnt) cb = (unsigned) bytecnt;
+		memcpy(pmd5->buf + resid, pdata, cb);
+		pdata += cb;
+		bytecnt -= cb;
+		if (resid + cb < 64) return;
+		Phmd5DoBlocks(pmd5->hash, pmd5->buf, 64);
+	}
+
+	// This block processes input data in-place, if the data is dword
+	// aligned and in 64-byte chunks.
+
+	if ((unsigned) bytecnt & ~63 && ((size_t) pdata & 3) == 0) {
+		Phmd5DoBlocks(pmd5->hash, pdata, bytecnt & ~63);
+		pdata += bytecnt & ~63;
+		bytecnt &= 63;
+	}
+
+	while (bytecnt) {					// handle residual/non-aligned data
+		unsigned cb = 64 > (unsigned) bytecnt ? (unsigned) bytecnt : 64;
+		memcpy(pmd5->buf, pdata, cb);
+		pdata += cb;
+		bytecnt -= cb;
+		if (cb < 64) return;
+		Phmd5DoBlocks(pmd5->hash, pmd5->buf, 64);
+	};
+}
+
+// Added by Yutaka Sawada for PAR2's padding null bytes at the last of each file.
+void Phmd5ProcessZero(PHMD5 *pmd5, size_t bytecnt) {
+	unsigned resid = (unsigned) pmd5->totbyt;
+
+	pmd5->totbyt += bytecnt;			// update total bytes processed
+
+	resid &= 63;						// count of bytes now in pmd5->buf
+
+	// This block handles the case of residual data in pmd5->buf.
+	// After this block pmd5->buf is empty (except perhaps on exit).
+
+	if (resid) {						// if residual exists,
+		unsigned cb = 64 - resid;
+		if (cb > bytecnt) cb = (unsigned) bytecnt;
+		memset(pmd5->buf + resid, 0, cb);
+		bytecnt -= cb;
+		if (resid + cb < 64) return;
+		Phmd5DoBlocks(pmd5->hash, pmd5->buf, 64);
+	}
+
+	// This block processes input data in-place, if the data is in 64-byte chunks.
+
+	if (bytecnt & ~63) {
+		Phmd5DoBlocksZero(pmd5->hash, bytecnt & ~63);
+		bytecnt &= 63;
+	}
+
+	if (bytecnt) memset(pmd5->buf, 0, 64);	// handle residual data
+}
--- a/source/par2j/phmd5.h
+++ b/source/par2j/phmd5.h
@@ -0,0 +1,66 @@
+/*----------------------------------------------------------------------------
+;
+; MD5 hash generator -- Paul Houle (paulhoule.com) 11/13/2017
+;
+; This code is in the public domain.  Please attribute the author.
+;
+; There are a lot of MD5 generators; here's another.  This one targets a
+; little-endian memory architecture only (eg X86).  The benefit of this
+; is speed -- bytes within larger elements never need to be reversed,
+; which means the source data can be processed in-place.
+;
+; Though other compilers might be usable, this was developed using
+; Microsoft 32/64-bit C 12.0 [Version 18.00.30723].  Vendor specific
+; definitions (eg. _rotl, __int32, __int64) are used.
+; Build commands:
+;
+;	cl /c /Ox phmd5.c
+;	cl /c /Ox phmd5a.c
+;
+; Link the resulting .obj's into your executable and #include "phmd5.h"
+;
+; How to call the routines to generate a hash:
+;
+;	(1) Allocate a PHMD5 type struct -- it's small, can be static or local.
+;		A pointer to this struct is the first argument to all functions.
+;
+;	(2) Call Phmd5Begin() once -- this initializes the PHMD5 struct.
+;
+;	(3) Call Phmd5Process() as many times as necessary for all data
+;		to be included in the MD5 hash.
+;
+;	(4) Call Phmd5End() once.  The final 16-byte MD5 hash will then be
+;		available in PHMD5->hash.  Note the finished hash is a simple array
+;		of bytes, and must be treated/displayed/copied/etc that way.
+;
+; For best performance the Phmd5Process() "pdata" pointer should be 32-bit
+; aligned (a multiple of 4) and "bytecnt" should be a multiple of 64.
+; As long as both of these conditions continue to be met the input data is
+; processed in-place; otherwise, some speed (10-15%) is lost as the data
+; is copied to an internal blocking buffer before being proceessed.
+;
+;---------------------------------------------------------------------------*/
+
+#ifndef _PHMD5_DEFINED					// include guard
+#define _PHMD5_DEFINED
+
+#include <stddef.h>
+typedef struct {
+	unsigned char hash[16];				// final 16-byte hash winds up here
+	unsigned __int64 totbyt;			// processed byte count
+	char buf[64];						// input blocking buffer
+} PHMD5;
+
+void Phmd5Begin(PHMD5 *pmd5);
+void Phmd5Process(PHMD5 *pmd5, char *pdata, size_t bytecnt);
+void Phmd5End(PHMD5 *pmd5);
+void Phmd5DoBlocks(unsigned char *hash, char *pdata, size_t bytecnt);
+
+// added by Yutaka Sawada for PAR2
+void Phmd5ProcessZero(PHMD5 *pmd5, size_t bytecnt);
+void Phmd5DoBlocksZero(unsigned char *hash, size_t bytecnt);
+
+// calculate two MD5 at once for PAR2
+void Phmd5Process2(PHMD5 *pmd5, PHMD5 *pmd52, char *pdata, size_t bytecnt);
+
+#endif
--- a/source/par2j/phmd5a.c
+++ b/source/par2j/phmd5a.c
@@ -0,0 +1,480 @@
+/*----------------------------------------------------------------------------
+;
+; MD5 hash generator -- Paul Houle (paulhoule.com) 11/13/2017
+;
+; Called only from phmd5.c -- see phmd5.h for overview
+;
+; This is the same logic in phmd5a.asm, implemented (after the fact) in C.
+; The C compiler must support the "_rotl()" function generated inline to
+; achieve maximal performance.
+;
+; It turns out the MSFT C compiler, coupled with newer processor, results
+; in timings comparable to the 32-bit only phmd5a.asm hand-written assembly.
+; Therefore this "C" implementation is now used.
+; Avoiding assembly allows the code to be compiled either 32 or 64 bit.
+;
+; Note that a "little-endian" memory architecture is assumed.
+;
+; The Fx() and MD5STEP() macros were written by Colin Plumb in 1993.
+; MD5STEP() was changed slightly to match how phmd5a.asm operates.
+;
+;---------------------------------------------------------------------------*/
+
+/*
+#include <stdlib.h>						// for _rotl()
+#include "phmd5.h"
+
+
+// MD5 Optimisation Tricks by Anime Tosho
+// https://github.com/animetosho/md5-optimisation#optimisation-tricks-single-buffer
+// Dependency shortcut in G function
+#define F1(x, y, z) ((x & y) + (~x & z))
+
+//#define F1(x, y, z) (z ^ (x & (y ^ z)))
+#define F2(x, y, z) F1(z, x, y)
+#define F3(x, y, z) (x ^ y ^ z)
+#define F4(x, y, z) (y ^ (x | ~z))
+
+#define MD5STEP(f, w, x, y, z, ix, s, sc) \
+	w = _rotl(w + f(x, y, z) + ((unsigned *) pdata)[ix] + sc, s) + x
+
+void Phmd5DoBlocks(
+	unsigned char *hash,
+	char *pdata,
+	size_t bytecnt
+) {
+	unsigned __int32 a = *(unsigned __int32 *) &hash[ 0];
+	unsigned __int32 b = *(unsigned __int32 *) &hash[ 4];
+	unsigned __int32 c = *(unsigned __int32 *) &hash[ 8];
+	unsigned __int32 d = *(unsigned __int32 *) &hash[12];
+
+	do {
+		MD5STEP(F1, a, b, c, d,  0,  7, 0xd76aa478);
+		MD5STEP(F1, d, a, b, c,  1, 12, 0xe8c7b756);
+		MD5STEP(F1, c, d, a, b,  2, 17, 0x242070db);
+		MD5STEP(F1, b, c, d, a,  3, 22, 0xc1bdceee);
+		MD5STEP(F1, a, b, c, d,  4,  7, 0xf57c0faf);
+		MD5STEP(F1, d, a, b, c,  5, 12, 0x4787c62a);
+		MD5STEP(F1, c, d, a, b,  6, 17, 0xa8304613);
+		MD5STEP(F1, b, c, d, a,  7, 22, 0xfd469501);
+		MD5STEP(F1, a, b, c, d,  8,  7, 0x698098d8);
+		MD5STEP(F1, d, a, b, c,  9, 12, 0x8b44f7af);
+		MD5STEP(F1, c, d, a, b, 10, 17, 0xffff5bb1);
+		MD5STEP(F1, b, c, d, a, 11, 22, 0x895cd7be);
+		MD5STEP(F1, a, b, c, d, 12,  7, 0x6b901122);
+		MD5STEP(F1, d, a, b, c, 13, 12, 0xfd987193);
+		MD5STEP(F1, c, d, a, b, 14, 17, 0xa679438e);
+		MD5STEP(F1, b, c, d, a, 15, 22, 0x49b40821);
+
+		MD5STEP(F2, a, b, c, d,  1,  5, 0xf61e2562);
+		MD5STEP(F2, d, a, b, c,  6,  9, 0xc040b340);
+		MD5STEP(F2, c, d, a, b, 11, 14, 0x265e5a51);
+		MD5STEP(F2, b, c, d, a,  0, 20, 0xe9b6c7aa);
+		MD5STEP(F2, a, b, c, d,  5,  5, 0xd62f105d);
+		MD5STEP(F2, d, a, b, c, 10,  9, 0x02441453);
+		MD5STEP(F2, c, d, a, b, 15, 14, 0xd8a1e681);
+		MD5STEP(F2, b, c, d, a,  4, 20, 0xe7d3fbc8);
+		MD5STEP(F2, a, b, c, d,  9,  5, 0x21e1cde6);
+		MD5STEP(F2, d, a, b, c, 14,  9, 0xc33707d6);
+		MD5STEP(F2, c, d, a, b,  3, 14, 0xf4d50d87);
+		MD5STEP(F2, b, c, d, a,  8, 20, 0x455a14ed);
+		MD5STEP(F2, a, b, c, d, 13,  5, 0xa9e3e905);
+		MD5STEP(F2, d, a, b, c,  2,  9, 0xfcefa3f8);
+		MD5STEP(F2, c, d, a, b,  7, 14, 0x676f02d9);
+		MD5STEP(F2, b, c, d, a, 12, 20, 0x8d2a4c8a);
+
+		MD5STEP(F3, a, b, c, d,  5,  4, 0xfffa3942);
+		MD5STEP(F3, d, a, b, c,  8, 11, 0x8771f681);
+		MD5STEP(F3, c, d, a, b, 11, 16, 0x6d9d6122);
+		MD5STEP(F3, b, c, d, a, 14, 23, 0xfde5380c);
+		MD5STEP(F3, a, b, c, d,  1,  4, 0xa4beea44);
+		MD5STEP(F3, d, a, b, c,  4, 11, 0x4bdecfa9);
+		MD5STEP(F3, c, d, a, b,  7, 16, 0xf6bb4b60);
+		MD5STEP(F3, b, c, d, a, 10, 23, 0xbebfbc70);
+		MD5STEP(F3, a, b, c, d, 13,  4, 0x289b7ec6);
+		MD5STEP(F3, d, a, b, c,  0, 11, 0xeaa127fa);
+		MD5STEP(F3, c, d, a, b,  3, 16, 0xd4ef3085);
+		MD5STEP(F3, b, c, d, a,  6, 23, 0x04881d05);
+		MD5STEP(F3, a, b, c, d,  9,  4, 0xd9d4d039);
+		MD5STEP(F3, d, a, b, c, 12, 11, 0xe6db99e5);
+		MD5STEP(F3, c, d, a, b, 15, 16, 0x1fa27cf8);
+		MD5STEP(F3, b, c, d, a,  2, 23, 0xc4ac5665);
+
+		MD5STEP(F4, a, b, c, d,  0,  6, 0xf4292244);
+		MD5STEP(F4, d, a, b, c,  7, 10, 0x432aff97);
+		MD5STEP(F4, c, d, a, b, 14, 15, 0xab9423a7);
+		MD5STEP(F4, b, c, d, a,  5, 21, 0xfc93a039);
+		MD5STEP(F4, a, b, c, d, 12,  6, 0x655b59c3);
+		MD5STEP(F4, d, a, b, c,  3, 10, 0x8f0ccc92);
+		MD5STEP(F4, c, d, a, b, 10, 15, 0xffeff47d);
+		MD5STEP(F4, b, c, d, a,  1, 21, 0x85845dd1);
+		MD5STEP(F4, a, b, c, d,  8,  6, 0x6fa87e4f);
+		MD5STEP(F4, d, a, b, c, 15, 10, 0xfe2ce6e0);
+		MD5STEP(F4, c, d, a, b,  6, 15, 0xa3014314);
+		MD5STEP(F4, b, c, d, a, 13, 21, 0x4e0811a1);
+		MD5STEP(F4, a, b, c, d,  4,  6, 0xf7537e82);
+		MD5STEP(F4, d, a, b, c, 11, 10, 0xbd3af235);
+		MD5STEP(F4, c, d, a, b,  2, 15, 0x2ad7d2bb);
+		MD5STEP(F4, b, c, d, a,  9, 21, 0xeb86d391);
+
+		a += *(unsigned __int32 *) &hash[ 0];
+		b += *(unsigned __int32 *) &hash[ 4];
+		c += *(unsigned __int32 *) &hash[ 8];
+		d += *(unsigned __int32 *) &hash[12];
+
+		*(unsigned __int32 *) &hash[ 0] = a;
+		*(unsigned __int32 *) &hash[ 4] = b;
+		*(unsigned __int32 *) &hash[ 8] = c;
+		*(unsigned __int32 *) &hash[12] = d;
+
+		pdata += 64;
+	} while (bytecnt -= 64);
+}
+
+#undef MD5STEP
+
+// for update with null bytes
+#define MD5STEP(f, w, x, y, z, ix, s, sc) \
+	w = _rotl(w + f(x, y, z) + sc, s) + x
+
+void Phmd5DoBlocksZero(
+	unsigned char *hash,
+	size_t bytecnt
+) {
+	unsigned __int32 a = *(unsigned __int32 *) &hash[ 0];
+	unsigned __int32 b = *(unsigned __int32 *) &hash[ 4];
+	unsigned __int32 c = *(unsigned __int32 *) &hash[ 8];
+	unsigned __int32 d = *(unsigned __int32 *) &hash[12];
+
+	do {
+		MD5STEP(F1, a, b, c, d,  0,  7, 0xd76aa478);
+		MD5STEP(F1, d, a, b, c,  1, 12, 0xe8c7b756);
+		MD5STEP(F1, c, d, a, b,  2, 17, 0x242070db);
+		MD5STEP(F1, b, c, d, a,  3, 22, 0xc1bdceee);
+		MD5STEP(F1, a, b, c, d,  4,  7, 0xf57c0faf);
+		MD5STEP(F1, d, a, b, c,  5, 12, 0x4787c62a);
+		MD5STEP(F1, c, d, a, b,  6, 17, 0xa8304613);
+		MD5STEP(F1, b, c, d, a,  7, 22, 0xfd469501);
+		MD5STEP(F1, a, b, c, d,  8,  7, 0x698098d8);
+		MD5STEP(F1, d, a, b, c,  9, 12, 0x8b44f7af);
+		MD5STEP(F1, c, d, a, b, 10, 17, 0xffff5bb1);
+		MD5STEP(F1, b, c, d, a, 11, 22, 0x895cd7be);
+		MD5STEP(F1, a, b, c, d, 12,  7, 0x6b901122);
+		MD5STEP(F1, d, a, b, c, 13, 12, 0xfd987193);
+		MD5STEP(F1, c, d, a, b, 14, 17, 0xa679438e);
+		MD5STEP(F1, b, c, d, a, 15, 22, 0x49b40821);
+
+		MD5STEP(F2, a, b, c, d,  1,  5, 0xf61e2562);
+		MD5STEP(F2, d, a, b, c,  6,  9, 0xc040b340);
+		MD5STEP(F2, c, d, a, b, 11, 14, 0x265e5a51);
+		MD5STEP(F2, b, c, d, a,  0, 20, 0xe9b6c7aa);
+		MD5STEP(F2, a, b, c, d,  5,  5, 0xd62f105d);
+		MD5STEP(F2, d, a, b, c, 10,  9, 0x02441453);
+		MD5STEP(F2, c, d, a, b, 15, 14, 0xd8a1e681);
+		MD5STEP(F2, b, c, d, a,  4, 20, 0xe7d3fbc8);
+		MD5STEP(F2, a, b, c, d,  9,  5, 0x21e1cde6);
+		MD5STEP(F2, d, a, b, c, 14,  9, 0xc33707d6);
+		MD5STEP(F2, c, d, a, b,  3, 14, 0xf4d50d87);
+		MD5STEP(F2, b, c, d, a,  8, 20, 0x455a14ed);
+		MD5STEP(F2, a, b, c, d, 13,  5, 0xa9e3e905);
+		MD5STEP(F2, d, a, b, c,  2,  9, 0xfcefa3f8);
+		MD5STEP(F2, c, d, a, b,  7, 14, 0x676f02d9);
+		MD5STEP(F2, b, c, d, a, 12, 20, 0x8d2a4c8a);
+
+		MD5STEP(F3, a, b, c, d,  5,  4, 0xfffa3942);
+		MD5STEP(F3, d, a, b, c,  8, 11, 0x8771f681);
+		MD5STEP(F3, c, d, a, b, 11, 16, 0x6d9d6122);
+		MD5STEP(F3, b, c, d, a, 14, 23, 0xfde5380c);
+		MD5STEP(F3, a, b, c, d,  1,  4, 0xa4beea44);
+		MD5STEP(F3, d, a, b, c,  4, 11, 0x4bdecfa9);
+		MD5STEP(F3, c, d, a, b,  7, 16, 0xf6bb4b60);
+		MD5STEP(F3, b, c, d, a, 10, 23, 0xbebfbc70);
+		MD5STEP(F3, a, b, c, d, 13,  4, 0x289b7ec6);
+		MD5STEP(F3, d, a, b, c,  0, 11, 0xeaa127fa);
+		MD5STEP(F3, c, d, a, b,  3, 16, 0xd4ef3085);
+		MD5STEP(F3, b, c, d, a,  6, 23, 0x04881d05);
+		MD5STEP(F3, a, b, c, d,  9,  4, 0xd9d4d039);
+		MD5STEP(F3, d, a, b, c, 12, 11, 0xe6db99e5);
+		MD5STEP(F3, c, d, a, b, 15, 16, 0x1fa27cf8);
+		MD5STEP(F3, b, c, d, a,  2, 23, 0xc4ac5665);
+
+		MD5STEP(F4, a, b, c, d,  0,  6, 0xf4292244);
+		MD5STEP(F4, d, a, b, c,  7, 10, 0x432aff97);
+		MD5STEP(F4, c, d, a, b, 14, 15, 0xab9423a7);
+		MD5STEP(F4, b, c, d, a,  5, 21, 0xfc93a039);
+		MD5STEP(F4, a, b, c, d, 12,  6, 0x655b59c3);
+		MD5STEP(F4, d, a, b, c,  3, 10, 0x8f0ccc92);
+		MD5STEP(F4, c, d, a, b, 10, 15, 0xffeff47d);
+		MD5STEP(F4, b, c, d, a,  1, 21, 0x85845dd1);
+		MD5STEP(F4, a, b, c, d,  8,  6, 0x6fa87e4f);
+		MD5STEP(F4, d, a, b, c, 15, 10, 0xfe2ce6e0);
+		MD5STEP(F4, c, d, a, b,  6, 15, 0xa3014314);
+		MD5STEP(F4, b, c, d, a, 13, 21, 0x4e0811a1);
+		MD5STEP(F4, a, b, c, d,  4,  6, 0xf7537e82);
+		MD5STEP(F4, d, a, b, c, 11, 10, 0xbd3af235);
+		MD5STEP(F4, c, d, a, b,  2, 15, 0x2ad7d2bb);
+		MD5STEP(F4, b, c, d, a,  9, 21, 0xeb86d391);
+
+		a += *(unsigned __int32 *) &hash[ 0];
+		b += *(unsigned __int32 *) &hash[ 4];
+		c += *(unsigned __int32 *) &hash[ 8];
+		d += *(unsigned __int32 *) &hash[12];
+
+		*(unsigned __int32 *) &hash[ 0] = a;
+		*(unsigned __int32 *) &hash[ 4] = b;
+		*(unsigned __int32 *) &hash[ 8] = c;
+		*(unsigned __int32 *) &hash[12] = d;
+
+	} while (bytecnt -= 64);
+}
+
+#undef MD5STEP
+
+*/
+
+
+// SIMD function by Yutaka Sawada 2021-02-04
+
+#include <string.h>
+#include <emmintrin.h>	// for MMX ~ SSE2
+#include "phmd5.h"
+
+
+// delays `x` dependency by Anime Tosho
+
+//#define F1(x, y, z) (((y ^ z) & x) ^ z)
+#define F1(x, y, z) _mm_xor_si128(_mm_and_si128(_mm_xor_si128(y, z), x), z)
+
+//#define F2(x, y, z) ((z & x) + (~z & y))
+#define F2(x, y, z) _mm_or_si128(_mm_and_si128(x, z), _mm_andnot_si128(z, y))
+
+//#define F3(x, y, z) (x ^ y ^ z)
+#define F3(x, y, z) _mm_xor_si128(x, _mm_xor_si128(y, z))
+
+//#define F4(x, y, z) (y ^ (x | ~z))
+#define F4(x, y, z) _mm_xor_si128(y, _mm_or_si128(x, _mm_xor_si128(z, _mm_cmpeq_epi32(z, z))))
+
+#define MD5STEP(f, w, x, y, z, ix, s, sc) w = _mm_add_epi32(_mm_srli_epi64(_mm_shuffle_epi32(_mm_add_epi32(_mm_add_epi32(w, _mm_set1_epi32(((unsigned __int32 *) pdata)[ix] + sc)), f(x, y, z)), _MM_SHUFFLE(0, 0, 0, 0)), 32 - s), x)
+/*
+#define MD5STEP(f, w, x, y, z, ix, s, sc) { \
+	w = _mm_add_epi32(_mm_add_epi32(w, _mm_set1_epi32(((unsigned __int32 *) pdata)[ix] + sc)), f(x, y, z)); \
+	w = _mm_shuffle_epi32(w, _MM_SHUFFLE(0, 0, 0, 0)); \
+	w = _mm_srli_epi64(w, 32 - s); \
+	w = _mm_add_epi32(w, x); \
+}
+*/
+
+// SSE2 version
+void Phmd5DoBlocks(
+	unsigned char *hash,
+	char *pdata,
+	size_t bytecnt
+) {
+	__m128i h0, h1, h2, h3;
+	__m128i a, b, c, d;
+
+	h0 = _mm_loadu_si128((__m128i *) hash);	// h0 = [a0, a1, a2, a3] (little endian)
+	h1 = _mm_srli_si128(h0,  4);			// h1 = [a1, a2, a3,  0]
+	h2 = _mm_srli_si128(h0,  8);			// h2 = [a2, a3,  0,  0]
+	h3 = _mm_srli_si128(h0, 12);			// h3 = [a3,  0,  0,  0]
+	_mm_store_si128(&a, h0);
+	_mm_store_si128(&b, h1);
+	_mm_store_si128(&c, h2);
+	_mm_store_si128(&d, h3);
+
+	do {
+		MD5STEP(F1, a, b, c, d,  0,  7, 0xd76aa478);
+		MD5STEP(F1, d, a, b, c,  1, 12, 0xe8c7b756);
+		MD5STEP(F1, c, d, a, b,  2, 17, 0x242070db);
+		MD5STEP(F1, b, c, d, a,  3, 22, 0xc1bdceee);
+		MD5STEP(F1, a, b, c, d,  4,  7, 0xf57c0faf);
+		MD5STEP(F1, d, a, b, c,  5, 12, 0x4787c62a);
+		MD5STEP(F1, c, d, a, b,  6, 17, 0xa8304613);
+		MD5STEP(F1, b, c, d, a,  7, 22, 0xfd469501);
+		MD5STEP(F1, a, b, c, d,  8,  7, 0x698098d8);
+		MD5STEP(F1, d, a, b, c,  9, 12, 0x8b44f7af);
+		MD5STEP(F1, c, d, a, b, 10, 17, 0xffff5bb1);
+		MD5STEP(F1, b, c, d, a, 11, 22, 0x895cd7be);
+		MD5STEP(F1, a, b, c, d, 12,  7, 0x6b901122);
+		MD5STEP(F1, d, a, b, c, 13, 12, 0xfd987193);
+		MD5STEP(F1, c, d, a, b, 14, 17, 0xa679438e);
+		MD5STEP(F1, b, c, d, a, 15, 22, 0x49b40821);
+
+		MD5STEP(F2, a, b, c, d,  1,  5, 0xf61e2562);
+		MD5STEP(F2, d, a, b, c,  6,  9, 0xc040b340);
+		MD5STEP(F2, c, d, a, b, 11, 14, 0x265e5a51);
+		MD5STEP(F2, b, c, d, a,  0, 20, 0xe9b6c7aa);
+		MD5STEP(F2, a, b, c, d,  5,  5, 0xd62f105d);
+		MD5STEP(F2, d, a, b, c, 10,  9, 0x02441453);
+		MD5STEP(F2, c, d, a, b, 15, 14, 0xd8a1e681);
+		MD5STEP(F2, b, c, d, a,  4, 20, 0xe7d3fbc8);
+		MD5STEP(F2, a, b, c, d,  9,  5, 0x21e1cde6);
+		MD5STEP(F2, d, a, b, c, 14,  9, 0xc33707d6);
+		MD5STEP(F2, c, d, a, b,  3, 14, 0xf4d50d87);
+		MD5STEP(F2, b, c, d, a,  8, 20, 0x455a14ed);
+		MD5STEP(F2, a, b, c, d, 13,  5, 0xa9e3e905);
+		MD5STEP(F2, d, a, b, c,  2,  9, 0xfcefa3f8);
+		MD5STEP(F2, c, d, a, b,  7, 14, 0x676f02d9);
+		MD5STEP(F2, b, c, d, a, 12, 20, 0x8d2a4c8a);
+
+		MD5STEP(F3, a, b, c, d,  5,  4, 0xfffa3942);
+		MD5STEP(F3, d, a, b, c,  8, 11, 0x8771f681);
+		MD5STEP(F3, c, d, a, b, 11, 16, 0x6d9d6122);
+		MD5STEP(F3, b, c, d, a, 14, 23, 0xfde5380c);
+		MD5STEP(F3, a, b, c, d,  1,  4, 0xa4beea44);
+		MD5STEP(F3, d, a, b, c,  4, 11, 0x4bdecfa9);
+		MD5STEP(F3, c, d, a, b,  7, 16, 0xf6bb4b60);
+		MD5STEP(F3, b, c, d, a, 10, 23, 0xbebfbc70);
+		MD5STEP(F3, a, b, c, d, 13,  4, 0x289b7ec6);
+		MD5STEP(F3, d, a, b, c,  0, 11, 0xeaa127fa);
+		MD5STEP(F3, c, d, a, b,  3, 16, 0xd4ef3085);
+		MD5STEP(F3, b, c, d, a,  6, 23, 0x04881d05);
+		MD5STEP(F3, a, b, c, d,  9,  4, 0xd9d4d039);
+		MD5STEP(F3, d, a, b, c, 12, 11, 0xe6db99e5);
+		MD5STEP(F3, c, d, a, b, 15, 16, 0x1fa27cf8);
+		MD5STEP(F3, b, c, d, a,  2, 23, 0xc4ac5665);
+
+		MD5STEP(F4, a, b, c, d,  0,  6, 0xf4292244);
+		MD5STEP(F4, d, a, b, c,  7, 10, 0x432aff97);
+		MD5STEP(F4, c, d, a, b, 14, 15, 0xab9423a7);
+		MD5STEP(F4, b, c, d, a,  5, 21, 0xfc93a039);
+		MD5STEP(F4, a, b, c, d, 12,  6, 0x655b59c3);
+		MD5STEP(F4, d, a, b, c,  3, 10, 0x8f0ccc92);
+		MD5STEP(F4, c, d, a, b, 10, 15, 0xffeff47d);
+		MD5STEP(F4, b, c, d, a,  1, 21, 0x85845dd1);
+		MD5STEP(F4, a, b, c, d,  8,  6, 0x6fa87e4f);
+		MD5STEP(F4, d, a, b, c, 15, 10, 0xfe2ce6e0);
+		MD5STEP(F4, c, d, a, b,  6, 15, 0xa3014314);
+		MD5STEP(F4, b, c, d, a, 13, 21, 0x4e0811a1);
+		MD5STEP(F4, a, b, c, d,  4,  6, 0xf7537e82);
+		MD5STEP(F4, d, a, b, c, 11, 10, 0xbd3af235);
+		MD5STEP(F4, c, d, a, b,  2, 15, 0x2ad7d2bb);
+		MD5STEP(F4, b, c, d, a,  9, 21, 0xeb86d391);
+
+		a = _mm_add_epi32(a, h0);
+		b = _mm_add_epi32(b, h1);
+		c = _mm_add_epi32(c, h2);
+		d = _mm_add_epi32(d, h3);
+
+		_mm_store_si128(&h0, a);
+		_mm_store_si128(&h1, b);
+		_mm_store_si128(&h2, c);
+		_mm_store_si128(&h3, d);
+
+		pdata += 64;
+	} while (bytecnt -= 64);
+
+	*(unsigned __int32 *) &hash[ 0] = _mm_cvtsi128_si32(h0);
+	*(unsigned __int32 *) &hash[ 4] = _mm_cvtsi128_si32(h1);
+	*(unsigned __int32 *) &hash[ 8] = _mm_cvtsi128_si32(h2);
+	*(unsigned __int32 *) &hash[12] = _mm_cvtsi128_si32(h3);
+}
+
+
+// for update with null bytes
+#define MD5STEP0(f, w, x, y, z, s, sc) w = _mm_add_epi32(_mm_srli_epi64(_mm_shuffle_epi32(_mm_add_epi32(_mm_add_epi32(w, _mm_set1_epi32(sc)), f(x, y, z)), _MM_SHUFFLE(0, 0, 0, 0)), 32 - s), x)
+
+// zero fill version
+void Phmd5DoBlocksZero(
+	unsigned char *hash,
+	size_t bytecnt
+) {
+	__m128i h0, h1, h2, h3;
+	__m128i a, b, c, d;
+
+	h0 = _mm_loadu_si128((__m128i *) hash);	// h0 = [a0, a1, a2, a3] (little endian)
+	h1 = _mm_srli_si128(h0,  4);			// h1 = [a1, a2, a3,  0]
+	h2 = _mm_srli_si128(h0,  8);			// h2 = [a2, a3,  0,  0]
+	h3 = _mm_srli_si128(h0, 12);			// h3 = [a3,  0,  0,  0]
+	_mm_store_si128(&a, h0);
+	_mm_store_si128(&b, h1);
+	_mm_store_si128(&c, h2);
+	_mm_store_si128(&d, h3);
+
+	do {
+		MD5STEP0(F1, a, b, c, d,  7, 0xd76aa478);
+		MD5STEP0(F1, d, a, b, c, 12, 0xe8c7b756);
+		MD5STEP0(F1, c, d, a, b, 17, 0x242070db);
+		MD5STEP0(F1, b, c, d, a, 22, 0xc1bdceee);
+		MD5STEP0(F1, a, b, c, d,  7, 0xf57c0faf);
+		MD5STEP0(F1, d, a, b, c, 12, 0x4787c62a);
+		MD5STEP0(F1, c, d, a, b, 17, 0xa8304613);
+		MD5STEP0(F1, b, c, d, a, 22, 0xfd469501);
+		MD5STEP0(F1, a, b, c, d,  7, 0x698098d8);
+		MD5STEP0(F1, d, a, b, c, 12, 0x8b44f7af);
+		MD5STEP0(F1, c, d, a, b, 17, 0xffff5bb1);
+		MD5STEP0(F1, b, c, d, a, 22, 0x895cd7be);
+		MD5STEP0(F1, a, b, c, d,  7, 0x6b901122);
+		MD5STEP0(F1, d, a, b, c, 12, 0xfd987193);
+		MD5STEP0(F1, c, d, a, b, 17, 0xa679438e);
+		MD5STEP0(F1, b, c, d, a, 22, 0x49b40821);
+
+		MD5STEP0(F2, a, b, c, d,  5, 0xf61e2562);
+		MD5STEP0(F2, d, a, b, c,  9, 0xc040b340);
+		MD5STEP0(F2, c, d, a, b, 14, 0x265e5a51);
+		MD5STEP0(F2, b, c, d, a, 20, 0xe9b6c7aa);
+		MD5STEP0(F2, a, b, c, d,  5, 0xd62f105d);
+		MD5STEP0(F2, d, a, b, c,  9, 0x02441453);
+		MD5STEP0(F2, c, d, a, b, 14, 0xd8a1e681);
+		MD5STEP0(F2, b, c, d, a, 20, 0xe7d3fbc8);
+		MD5STEP0(F2, a, b, c, d,  5, 0x21e1cde6);
+		MD5STEP0(F2, d, a, b, c,  9, 0xc33707d6);
+		MD5STEP0(F2, c, d, a, b, 14, 0xf4d50d87);
+		MD5STEP0(F2, b, c, d, a, 20, 0x455a14ed);
+		MD5STEP0(F2, a, b, c, d,  5, 0xa9e3e905);
+		MD5STEP0(F2, d, a, b, c,  9, 0xfcefa3f8);
+		MD5STEP0(F2, c, d, a, b, 14, 0x676f02d9);
+		MD5STEP0(F2, b, c, d, a, 20, 0x8d2a4c8a);
+
+		MD5STEP0(F3, a, b, c, d,  4, 0xfffa3942);
+		MD5STEP0(F3, d, a, b, c, 11, 0x8771f681);
+		MD5STEP0(F3, c, d, a, b, 16, 0x6d9d6122);
+		MD5STEP0(F3, b, c, d, a, 23, 0xfde5380c);
+		MD5STEP0(F3, a, b, c, d,  4, 0xa4beea44);
+		MD5STEP0(F3, d, a, b, c, 11, 0x4bdecfa9);
+		MD5STEP0(F3, c, d, a, b, 16, 0xf6bb4b60);
+		MD5STEP0(F3, b, c, d, a, 23, 0xbebfbc70);
+		MD5STEP0(F3, a, b, c, d,  4, 0x289b7ec6);
+		MD5STEP0(F3, d, a, b, c, 11, 0xeaa127fa);
+		MD5STEP0(F3, c, d, a, b, 16, 0xd4ef3085);
+		MD5STEP0(F3, b, c, d, a, 23, 0x04881d05);
+		MD5STEP0(F3, a, b, c, d,  4, 0xd9d4d039);
+		MD5STEP0(F3, d, a, b, c, 11, 0xe6db99e5);
+		MD5STEP0(F3, c, d, a, b, 16, 0x1fa27cf8);
+		MD5STEP0(F3, b, c, d, a, 23, 0xc4ac5665);
+
+		MD5STEP0(F4, a, b, c, d,  6, 0xf4292244);
+		MD5STEP0(F4, d, a, b, c, 10, 0x432aff97);
+		MD5STEP0(F4, c, d, a, b, 15, 0xab9423a7);
+		MD5STEP0(F4, b, c, d, a, 21, 0xfc93a039);
+		MD5STEP0(F4, a, b, c, d,  6, 0x655b59c3);
+		MD5STEP0(F4, d, a, b, c, 10, 0x8f0ccc92);
+		MD5STEP0(F4, c, d, a, b, 15, 0xffeff47d);
+		MD5STEP0(F4, b, c, d, a, 21, 0x85845dd1);
+		MD5STEP0(F4, a, b, c, d,  6, 0x6fa87e4f);
+		MD5STEP0(F4, d, a, b, c, 10, 0xfe2ce6e0);
+		MD5STEP0(F4, c, d, a, b, 15, 0xa3014314);
+		MD5STEP0(F4, b, c, d, a, 21, 0x4e0811a1);
+		MD5STEP0(F4, a, b, c, d,  6, 0xf7537e82);
+		MD5STEP0(F4, d, a, b, c, 10, 0xbd3af235);
+		MD5STEP0(F4, c, d, a, b, 15, 0x2ad7d2bb);
+		MD5STEP0(F4, b, c, d, a, 21, 0xeb86d391);
+
+		a = _mm_add_epi32(a, h0);
+		b = _mm_add_epi32(b, h1);
+		c = _mm_add_epi32(c, h2);
+		d = _mm_add_epi32(d, h3);
+
+		_mm_store_si128(&h0, a);
+		_mm_store_si128(&h1, b);
+		_mm_store_si128(&h2, c);
+		_mm_store_si128(&h3, d);
+
+	} while (bytecnt -= 64);
+
+	*(unsigned __int32 *) &hash[ 0] = _mm_cvtsi128_si32(h0);
+	*(unsigned __int32 *) &hash[ 4] = _mm_cvtsi128_si32(h1);
+	*(unsigned __int32 *) &hash[ 8] = _mm_cvtsi128_si32(h2);
+	*(unsigned __int32 *) &hash[12] = _mm_cvtsi128_si32(h3);
+}
+
--- a/source/par2j/phmd5s.c
+++ b/source/par2j/phmd5s.c
@@ -0,0 +1,277 @@
+// SIMD function by Yutaka Sawada 2021-02-05
+
+#include <string.h>
+#include <emmintrin.h>	// MMX ~ SSE2 命令セットを使用する場合インクルード
+#include "phmd5.h"
+
+
+//#define F1(x, y, z) (((y ^ z) & x) ^ z)
+#define F1(x, y, z) _mm_xor_si128(_mm_and_si128(_mm_xor_si128(y, z), x), z)
+
+//#define F2(x, y, z) ((z & x) + (~z & y))
+#define F2(x, y, z) _mm_or_si128(_mm_and_si128(x, z), _mm_andnot_si128(z, y))
+
+//#define F3(x, y, z) (x ^ y ^ z)
+#define F3(x, y, z) _mm_xor_si128(x, _mm_xor_si128(y, z))
+
+//#define F4(x, y, z) (y ^ (x | ~z))
+#define F4(x, y, z) _mm_xor_si128(y, _mm_or_si128(x, _mm_xor_si128(z, _mm_cmpeq_epi32(z, z))))
+//#define F4(x, y, z) _mm_xor_si128(y, _mm_or_si128(x, _mm_xor_si128(z, _mm_set1_epi32(0xffffffff))))
+
+
+// ビットローテーションをシャッフル命令で置き換える
+#define MD5STEP(f, w, x, y, z, ix, s, sc) w = _mm_add_epi32(_mm_srli_epi64(_mm_shuffle_epi32(_mm_add_epi32(_mm_add_epi32(w, _mm_add_epi32(XX##ix, _mm_set1_epi32(sc))), f(x, y, z)), _MM_SHUFFLE(2, 2, 0, 0)), 32 - s), x)
+
+// 展開した場合
+/*
+#define MD5STEP(f, w, x, y, z, ix, s, sc) { \
+	w = _mm_add_epi32(_mm_add_epi32(w, _mm_add_epi32(XX##ix, _mm_set1_epi32(sc))), f(x, y, z)); \
+	w = _mm_shuffle_epi32(w, _MM_SHUFFLE(2, 2, 0, 0)); \
+	w = _mm_srli_epi64(w, 32 - s); \
+	w = _mm_add_epi32(w, x); \
+}
+*/
+
+// Read two 32-bit integers twice
+// XX##a = [a0, a1,  0,  0] read 8-bytes each (little endian)
+// XX##b = [b0, b1,  0,  0]
+// XX##a = [a0, b0, a1, b1] after _mm_unpacklo_epi32(XX##a, XX##b)
+// XX##b = [a1, a1, b1, b1] after _mm_unpackhi_epi32(XX##a, XX##a)
+// XX##a = [a0, a0, b0, b0] after _mm_unpacklo_epi32(XX##a, XX##a)
+#define READ2(a, b, x) { \
+	XX##a = _mm_loadl_epi64((__m128i *) (pdata  + x)); \
+	XX##b = _mm_loadl_epi64((__m128i *) (pdata2 + x)); \
+	XX##a = _mm_unpacklo_epi32(XX##a, XX##b); \
+	XX##b = _mm_unpackhi_epi32(XX##a, XX##a); \
+	XX##a = _mm_unpacklo_epi32(XX##a, XX##a); \
+}
+
+// Read four 32-bit integers twice
+// XX##a = [a0, a1, a2, a3] read 16-bytes each (little endian)
+// XX##b = [b0, b1, b2, b3]
+// XX##c = [a2, b2, a3, b3] after _mm_unpackhi_epi32(XX##a, XX##b)
+// XX##a = [a0, b0, a1, b1] after _mm_unpacklo_epi32(XX##a, XX##b)
+// XX##b = [a1, a1, b1, b1] after _mm_unpackhi_epi32(XX##a, XX##a)
+// XX##a = [a0, a0, b0, b0] after _mm_unpacklo_epi32(XX##a, XX##a)
+// XX##d = [a3, a3, b3, b3] after _mm_unpackhi_epi32(XX##c, XX##c)
+// XX##c = [a2, a2, b2, b2] after _mm_unpacklo_epi32(XX##c, XX##c)
+#define READ4(a, b, c, d, x) { \
+	XX##a = _mm_loadu_si128((__m128i *) (pdata  + x)); \
+	XX##b = _mm_loadu_si128((__m128i *) (pdata2 + x)); \
+	XX##c = _mm_unpackhi_epi32(XX##a, XX##b); \
+	XX##a = _mm_unpacklo_epi32(XX##a, XX##b); \
+	XX##b = _mm_unpackhi_epi32(XX##a, XX##a); \
+	XX##a = _mm_unpacklo_epi32(XX##a, XX##a); \
+	XX##d = _mm_unpackhi_epi32(XX##c, XX##c); \
+	XX##c = _mm_unpacklo_epi32(XX##c, XX##c); \
+}
+
+void Phmd5DoBlocks2(
+	unsigned char *hash,
+	unsigned char *hash2,
+	char *pdata,
+	char *pdata2,
+	size_t bytecnt
+) {
+	__m128i h0, h1, h2, h3;
+	__m128i a, b, c, d;
+	__m128i XX0, XX1, XX2, XX3, XX4, XX5, XX6, XX7;
+	__m128i XX8, XX9, XX10, XX11, XX12, XX13, XX14, XX15;
+
+	// same method as READ4
+	h0 = _mm_loadu_si128((__m128i *) hash);
+	h1 = _mm_loadu_si128((__m128i *) hash2);
+	h2 = _mm_unpackhi_epi32(h0, h1);
+	h0 = _mm_unpacklo_epi32(h0, h1);
+	h1 = _mm_unpackhi_epi32(h0, h0);
+	h0 = _mm_unpacklo_epi32(h0, h0);
+	h3 = _mm_unpackhi_epi32(h2, h2);
+	h2 = _mm_unpacklo_epi32(h2, h2);
+//	h0 = _mm_set_epi32(0, *(unsigned __int32 *) &hash2[ 0], 0, *(unsigned __int32 *) &hash[ 0] );
+//	h1 = _mm_set_epi32(0, *(unsigned __int32 *) &hash2[ 4], 0, *(unsigned __int32 *) &hash[ 4] );
+//	h2 = _mm_set_epi32(0, *(unsigned __int32 *) &hash2[ 8], 0, *(unsigned __int32 *) &hash[ 8] );
+//	h3 = _mm_set_epi32(0, *(unsigned __int32 *) &hash2[12], 0, *(unsigned __int32 *) &hash[12] );
+	_mm_store_si128(&a, h0);
+	_mm_store_si128(&b, h1);
+	_mm_store_si128(&c, h2);
+	_mm_store_si128(&d, h3);
+
+	do {
+//		READ4( 0,  1,  2,  3,  0);
+		READ2( 0,  1,  0);
+		MD5STEP(F1, a, b, c, d,  0,  7, 0xd76aa478);
+		MD5STEP(F1, d, a, b, c,  1, 12, 0xe8c7b756);
+		READ2( 2,  3,  8);
+		MD5STEP(F1, c, d, a, b,  2, 17, 0x242070db);
+		MD5STEP(F1, b, c, d, a,  3, 22, 0xc1bdceee);
+//		READ4( 4,  5,  6,  7, 16);
+		READ2( 4,  5, 16);
+		MD5STEP(F1, a, b, c, d,  4,  7, 0xf57c0faf);
+		MD5STEP(F1, d, a, b, c,  5, 12, 0x4787c62a);
+		READ2( 6,  7, 24);
+		MD5STEP(F1, c, d, a, b,  6, 17, 0xa8304613);
+		MD5STEP(F1, b, c, d, a,  7, 22, 0xfd469501);
+//		READ4( 8,  9, 10, 11, 32);
+		READ2( 8,  9, 32);
+		MD5STEP(F1, a, b, c, d,  8,  7, 0x698098d8);
+		MD5STEP(F1, d, a, b, c,  9, 12, 0x8b44f7af);
+		READ2(10, 11, 40);
+		MD5STEP(F1, c, d, a, b, 10, 17, 0xffff5bb1);
+		MD5STEP(F1, b, c, d, a, 11, 22, 0x895cd7be);
+//		READ4(12, 13, 14, 15, 48);
+		READ2(12, 13, 48);
+		MD5STEP(F1, a, b, c, d, 12,  7, 0x6b901122);
+		MD5STEP(F1, d, a, b, c, 13, 12, 0xfd987193);
+		READ2(14, 15, 56);
+		MD5STEP(F1, c, d, a, b, 14, 17, 0xa679438e);
+		MD5STEP(F1, b, c, d, a, 15, 22, 0x49b40821);
+
+		MD5STEP(F2, a, b, c, d,  1,  5, 0xf61e2562);
+		MD5STEP(F2, d, a, b, c,  6,  9, 0xc040b340);
+		MD5STEP(F2, c, d, a, b, 11, 14, 0x265e5a51);
+		MD5STEP(F2, b, c, d, a,  0, 20, 0xe9b6c7aa);
+		MD5STEP(F2, a, b, c, d,  5,  5, 0xd62f105d);
+		MD5STEP(F2, d, a, b, c, 10,  9, 0x02441453);
+		MD5STEP(F2, c, d, a, b, 15, 14, 0xd8a1e681);
+		MD5STEP(F2, b, c, d, a,  4, 20, 0xe7d3fbc8);
+		MD5STEP(F2, a, b, c, d,  9,  5, 0x21e1cde6);
+		MD5STEP(F2, d, a, b, c, 14,  9, 0xc33707d6);
+		MD5STEP(F2, c, d, a, b,  3, 14, 0xf4d50d87);
+		MD5STEP(F2, b, c, d, a,  8, 20, 0x455a14ed);
+		MD5STEP(F2, a, b, c, d, 13,  5, 0xa9e3e905);
+		MD5STEP(F2, d, a, b, c,  2,  9, 0xfcefa3f8);
+		MD5STEP(F2, c, d, a, b,  7, 14, 0x676f02d9);
+		MD5STEP(F2, b, c, d, a, 12, 20, 0x8d2a4c8a);
+
+		MD5STEP(F3, a, b, c, d,  5,  4, 0xfffa3942);
+		MD5STEP(F3, d, a, b, c,  8, 11, 0x8771f681);
+		MD5STEP(F3, c, d, a, b, 11, 16, 0x6d9d6122);
+		MD5STEP(F3, b, c, d, a, 14, 23, 0xfde5380c);
+		MD5STEP(F3, a, b, c, d,  1,  4, 0xa4beea44);
+		MD5STEP(F3, d, a, b, c,  4, 11, 0x4bdecfa9);
+		MD5STEP(F3, c, d, a, b,  7, 16, 0xf6bb4b60);
+		MD5STEP(F3, b, c, d, a, 10, 23, 0xbebfbc70);
+		MD5STEP(F3, a, b, c, d, 13,  4, 0x289b7ec6);
+		MD5STEP(F3, d, a, b, c,  0, 11, 0xeaa127fa);
+		MD5STEP(F3, c, d, a, b,  3, 16, 0xd4ef3085);
+		MD5STEP(F3, b, c, d, a,  6, 23, 0x04881d05);
+		MD5STEP(F3, a, b, c, d,  9,  4, 0xd9d4d039);
+		MD5STEP(F3, d, a, b, c, 12, 11, 0xe6db99e5);
+		MD5STEP(F3, c, d, a, b, 15, 16, 0x1fa27cf8);
+		MD5STEP(F3, b, c, d, a,  2, 23, 0xc4ac5665);
+
+		MD5STEP(F4, a, b, c, d,  0,  6, 0xf4292244);
+		MD5STEP(F4, d, a, b, c,  7, 10, 0x432aff97);
+		MD5STEP(F4, c, d, a, b, 14, 15, 0xab9423a7);
+		MD5STEP(F4, b, c, d, a,  5, 21, 0xfc93a039);
+		MD5STEP(F4, a, b, c, d, 12,  6, 0x655b59c3);
+		MD5STEP(F4, d, a, b, c,  3, 10, 0x8f0ccc92);
+		MD5STEP(F4, c, d, a, b, 10, 15, 0xffeff47d);
+		MD5STEP(F4, b, c, d, a,  1, 21, 0x85845dd1);
+		MD5STEP(F4, a, b, c, d,  8,  6, 0x6fa87e4f);
+		MD5STEP(F4, d, a, b, c, 15, 10, 0xfe2ce6e0);
+		MD5STEP(F4, c, d, a, b,  6, 15, 0xa3014314);
+		MD5STEP(F4, b, c, d, a, 13, 21, 0x4e0811a1);
+		MD5STEP(F4, a, b, c, d,  4,  6, 0xf7537e82);
+		MD5STEP(F4, d, a, b, c, 11, 10, 0xbd3af235);
+		MD5STEP(F4, c, d, a, b,  2, 15, 0x2ad7d2bb);
+		MD5STEP(F4, b, c, d, a,  9, 21, 0xeb86d391);
+
+		a = _mm_add_epi32(a, h0);
+		b = _mm_add_epi32(b, h1);
+		c = _mm_add_epi32(c, h2);
+		d = _mm_add_epi32(d, h3);
+
+		_mm_store_si128(&h0, a);
+		_mm_store_si128(&h1, b);
+		_mm_store_si128(&h2, c);
+		_mm_store_si128(&h3, d);
+
+		pdata += 64;
+		pdata2 += 64;
+	} while (bytecnt -= 64);
+
+	*(unsigned __int32 *) &hash[ 0] = _mm_cvtsi128_si32(h0);
+	*(unsigned __int32 *) &hash[ 4] = _mm_cvtsi128_si32(h1);
+	*(unsigned __int32 *) &hash[ 8] = _mm_cvtsi128_si32(h2);
+	*(unsigned __int32 *) &hash[12] = _mm_cvtsi128_si32(h3);
+	h0 = _mm_srli_si128(h0, 8);	// right shift 8-bytes
+	h1 = _mm_srli_si128(h1, 8);
+	h2 = _mm_srli_si128(h2, 8);
+	h3 = _mm_srli_si128(h3, 8);
+	*(unsigned __int32 *) &hash2[ 0] = _mm_cvtsi128_si32(h0);
+	*(unsigned __int32 *) &hash2[ 4] = _mm_cvtsi128_si32(h1);
+	*(unsigned __int32 *) &hash2[ 8] = _mm_cvtsi128_si32(h2);
+	*(unsigned __int32 *) &hash2[12] = _mm_cvtsi128_si32(h3);
+}
+
+// SIMD version updates two MD5 at once.
+// The data must be dword (4-bytes) aligned.
+void Phmd5Process2(PHMD5 *pmd5, PHMD5 *pmd52, char *pdata, size_t bytecnt) {
+	char *pdata2;
+	size_t bytefin, bytecnt2;
+	unsigned cb, resid, resid2;
+
+	pdata2 = pdata;
+	bytecnt2 = bytecnt;
+	resid = (unsigned) pmd5->totbyt;
+	resid2 = (unsigned) pmd52->totbyt;
+	pmd5->totbyt += bytecnt;			// update total bytes processed
+	pmd52->totbyt += bytecnt;
+
+	resid &= 63;						// count of bytes now in pmd5->buf
+	resid2 &= 63;
+
+	// This block handles the case of residual data in pmd5->buf.
+	// After this block pmd5->buf is empty (except perhaps on exit).
+
+	if (resid) {						// if residual exists,
+		cb = 64 - resid;
+		if (cb > bytecnt) cb = (unsigned) bytecnt;
+		memcpy(pmd5->buf + resid, pdata, cb);
+		pdata += cb;
+		bytecnt -= cb;
+		if (resid + cb == 64) Phmd5DoBlocks(pmd5->hash, pmd5->buf, 64);
+	}
+	bytefin = bytecnt & ~63;
+	if (resid2) {
+		cb = 64 - resid2;
+		if (cb > bytecnt2) cb = (unsigned) bytecnt2;
+		memcpy(pmd52->buf + resid2, pdata2, cb);
+		pdata2 += cb;
+		bytecnt2 -= cb;
+		if (bytecnt2 < bytefin) bytefin = bytecnt2 & ~63;	// shorter size
+		if (resid2 + cb == 64) Phmd5DoBlocks(pmd52->hash, pmd52->buf, 64);
+	}
+
+	// This block processes input data in-place, if the data is dword
+	// aligned and in 64-byte chunks.
+
+	if (bytefin) {
+		//Phmd5DoBlocks(pmd5->hash, pdata, bytefin);
+		//Phmd5DoBlocks(pmd52->hash, pdata2, bytefin);
+		Phmd5DoBlocks2(pmd5->hash, pmd52->hash, pdata, pdata2, bytefin);
+		pdata += bytefin;
+		pdata2 += bytefin;
+		bytecnt -= bytefin;
+		bytecnt2 -= bytefin;
+	}
+
+	while (bytecnt) {					// handle residual/non-aligned data
+		cb = 64 > (unsigned) bytecnt ? (unsigned) bytecnt : 64;
+		memcpy(pmd5->buf, pdata, cb);
+		pdata += cb;
+		bytecnt -= cb;
+		if (cb < 64) break;
+		Phmd5DoBlocks(pmd5->hash, pmd5->buf, 64);
+	};
+	while (bytecnt2) {
+		cb = 64 > (unsigned) bytecnt2 ? (unsigned) bytecnt2 : 64;
+		memcpy(pmd52->buf, pdata2, cb);
+		pdata2 += cb;
+		bytecnt2 -= cb;
+		if (cb < 64) break;
+		Phmd5DoBlocks(pmd52->hash, pmd52->buf, 64);
+	};
+}
+
--- a/source/par2j/reedsolomon.c
+++ b/source/par2j/reedsolomon.c
@@ -0,0 +1,816 @@
+// reedsolomon.c
+// Copyright : 2022-10-08 Yutaka Sawada
+// License : GPL
+
+#ifndef _UNICODE
+#define _UNICODE
+#endif
+#ifndef UNICODE
+#define UNICODE
+#endif
+#ifndef _WIN32_WINNT
+#define _WIN32_WINNT 0x0600	// Windows Vista or later
+#endif
+
+#include <malloc.h>
+#include <process.h>
+#include <stdio.h>
+
+#include <windows.h>
+
+#include "common2.h"
+#include "crc.h"
+#include "gf16.h"
+#include "phmd5.h"
+#include "lib_opencl.h"
+#include "rs_encode.h"
+#include "rs_decode.h"
+#include "reedsolomon.h"
+
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+// chunk がキャッシュに収まるようにすれば速くなる！ (Cache Blocking という最適化手法)
+int try_cache_blocking(int unit_size)
+{
+	int limit_size, chunk_count, chunk_size, cache_line_diff;
+
+	// CPUキャッシュをどのくらいまで使うか
+	limit_size = cpu_flag & 0x7FFF8000;	// 最低でも 32KB になる
+	if (limit_size == 0)	// キャッシュ・サイズを取得できなかった場合は最適化しない
+		return unit_size;
+
+	// キャッシュにうまく収まるように chunk のサイズを決める
+	cache_line_diff = 64 - sse_unit;	// cache line size とデータ境界の差
+	if (cache_line_diff < 0)
+		cache_line_diff = 0;
+	chunk_count = 1;
+	chunk_size = unit_size;	// unit_size は sse_unit の倍数になってる
+	while (chunk_size + cache_line_diff > limit_size){	// 制限サイズより大きいなら
+		// 分割数を増やして chunk のサイズを試算してみる
+		chunk_count++;
+		chunk_size = (unit_size + chunk_count - 1) / chunk_count;
+		chunk_size = (chunk_size + (sse_unit - 1)) & ~(sse_unit - 1);	// sse_unit の倍数にする
+	}
+
+	return chunk_size;
+}
+
+// 空きメモリー量からファイル・アクセスのバッファー・サイズを計算する
+// io_size = unit_size - HASH_SIZE になることに注意 (alloc_unit >= HASH_SIZE)
+unsigned int get_io_size(
+	unsigned int buf_num,	// 何ブロック分の領域を確保するのか
+	unsigned int *part_num,	// 部分的なエンコード用の作業領域
+	size_t trial_alloc,		// 確保できるか確認するのか
+	int alloc_unit)			// メモリー単位の境界 (sse_unit か MEM_UNIT)
+{
+	unsigned int unit_size, io_size, part_max, part_min;
+	size_t mem_size, io_size64;
+
+	if (part_num == NULL){	// 指定が無ければ調節しない
+		part_max = 0;
+		part_min = 0;
+	} else {
+		part_max = *part_num;	// 初期値には最大値をセットする
+		part_min = source_num >> PART_MIN_RATE;
+		part_min = (part_min / cpu_num) * cpu_num;	// cpu_num の倍数にする（切り下げ）
+		if ((int)part_min < cpu_num * 2)
+			part_min = cpu_num * 2;	// ダブル・バッファリングするなら cpu_num の倍以上にすること
+		if (part_min > part_max)
+			part_min = part_max;
+#ifdef TIMER
+		printf("get_io_size: part_min = %d, part_max = %d\n", part_min, part_max);
+#endif
+	}
+	// alloc_unit の倍数にする
+	unit_size = (block_size + HASH_SIZE + (alloc_unit - 1)) & ~(alloc_unit - 1);
+
+	if (trial_alloc){
+		__int64 possible_size;
+		possible_size = (__int64)unit_size * (buf_num + part_max);
+#ifndef _WIN64	// 32-bit 版なら
+		if (possible_size > MAX_MEM_SIZE)	// 確保する最大サイズを 2GB までにする
+			possible_size = MAX_MEM_SIZE;
+		if (check_OS64() == 0){	// 32-bit OS 上なら更に制限する
+			if (possible_size > MAX_MEM_SIZE32)
+				possible_size = MAX_MEM_SIZE32;
+		}
+#endif
+		trial_alloc = (size_t)possible_size;
+		trial_alloc = (trial_alloc + 0xFFFF) & ~0xFFFF;	// 64KB の倍数にしておく
+	}
+	mem_size = get_mem_size(trial_alloc);
+	io_size64 = mem_size / (buf_num + part_max) - HASH_SIZE;	// 何個分必要か
+
+	// ブロック・サイズより大きい、またはブロック・サイズ自体が小さい場合は
+	if ((io_size64 >= (size_t)block_size) || (block_size <= 1024)){
+		io_size = unit_size - HASH_SIZE;	// ブロック・サイズ - HASH_SIZE
+
+	} else {	// ブロック・サイズを等分割する
+		unsigned int num, num2;
+		io_size = (unsigned int)io_size64;
+		num = (block_size + io_size - 1) / io_size;	// ブロックを何分割するか
+		if (part_min < part_max){	// 保持する量に幅があるなら
+			io_size64 = mem_size / (buf_num + part_min) - HASH_SIZE;	// 確保するサイズを最低限にした場合
+			if (io_size64 >= (size_t)block_size){
+				num2 = 1;
+			} else {
+				io_size = (unsigned int)io_size64;
+				num2 = (block_size + io_size - 1) / io_size;
+			}
+		} else {
+			num2 = num;
+		}
+		if (num > num2){	// 確保量を減らしたほうがブロックの分割数が減るなら
+			io_size = (block_size + num2 - 1) / num2;
+			if (io_size < 1024)
+				io_size = 1024;
+			num = (unsigned int)(mem_size / (io_size + HASH_SIZE)) - buf_num;
+			if (num < part_max){	// 分割して計算するなら
+				num2 = (parity_num + num - 1) / num;	// 分割回数
+				num = (parity_num + num2 - 1) / num2;
+				num = ((num + cpu_num - 1) / cpu_num) * cpu_num;	// cpu_num の倍数にする（切り上げ）
+				if (num < part_min)
+					num = part_min;
+			}
+			if (num > part_max)
+				num = part_max;
+			*part_num = num;
+		} else {
+			io_size = (block_size + num - 1) / num;
+			if (io_size < 1024)
+				io_size = 1024;	// 断片化する場合でもブロック数が多いと 32768 KB は使う
+		}
+		io_size = ((io_size + HASH_SIZE + (alloc_unit - 1)) & ~(alloc_unit - 1)) - HASH_SIZE;	// alloc_unit の倍数 - HASH_SIZE
+	}
+
+	return io_size;
+}
+
+// 何ブロックまとめてファイルから読み込むかを空きメモリー量から計算する
+int read_block_num(
+	int keep_num,			// 保持するパリティ・ブロック数
+	int add_num,			// 余裕を見るブロック数
+	size_t trial_alloc,		// 確保できるか確認するのか
+	int alloc_unit)			// メモリー単位の境界 (sse_unit か MEM_UNIT)
+{
+	int buf_num, read_min;
+	unsigned int unit_size;
+	size_t mem_size;
+
+	read_min = keep_num >> READ_MIN_RATE;
+	if (read_min < READ_MIN_NUM)
+		read_min = READ_MIN_NUM;
+	if (read_min > source_num)
+		read_min = source_num;
+	unit_size = (block_size + HASH_SIZE + (alloc_unit - 1)) & ~(alloc_unit - 1);
+
+	if (trial_alloc){
+		__int64 possible_size;
+		possible_size = (__int64)unit_size * (source_num + keep_num + add_num);
+#ifndef _WIN64	// 32-bit 版なら
+		if (possible_size > MAX_MEM_SIZE)	// 確保する最大サイズを 2GB までにする
+			possible_size = MAX_MEM_SIZE;
+		if (check_OS64() == 0){	// 32-bit OS 上なら更に制限する
+			if (possible_size > MAX_MEM_SIZE32)
+				possible_size = MAX_MEM_SIZE32;
+		}
+#endif
+		trial_alloc = (size_t)possible_size;
+		trial_alloc = (trial_alloc + 0xFFFF) & ~0xFFFF;	// 64KB の倍数にしておく
+	}
+	mem_size = get_mem_size(trial_alloc) / unit_size;	// 何個分確保できるか
+
+	if (mem_size >= (size_t)(source_num + keep_num + add_num)){	// 最大個数より多い
+		buf_num = source_num;
+	} else if ((int)mem_size < read_min + keep_num + add_num){	// 少なすぎる
+		buf_num = 0;	// メモリー不足の印
+	} else {	// ソース・ブロック個数を等分割する
+		int split_num;
+		buf_num = (int)mem_size - (keep_num + add_num);
+		split_num = (source_num + buf_num - 1) / buf_num;	// 何回に別けて読み込むか
+		buf_num = (source_num + split_num - 1) / split_num;
+	}
+
+	return buf_num;
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+// 戸川 隼人 の「演習と応用FORTRAN77」の逆行列の計算方法を参考にして
+// Gaussian Elimination を少し修正して行列の数を一つにしてみた
+
+// 半分のメモリーで逆行列を計算する (利用するパリティ・ブロックの所だけ)
+static int invert_matrix_st(unsigned short *mat,
+	int rows,				// 横行の数、行列の縦サイズ、失われたソース・ブロックの数 = 利用するパリティ・ブロック数
+	int cols,				// 縦列の数、行列の横サイズ、本来のソース・ブロック数
+	source_ctx_r *s_blk)	// 各ソース・ブロックの情報
+{
+	int i, j, row_start, row_start2, pivot, factor;
+	unsigned int time_last = GetTickCount();
+
+	// Gaussian Elimination with 1 matrix
+	pivot = 0;
+	row_start = 0;	// その行の開始位置
+	for (i = 0; i < rows; i++){
+		// 経過表示
+		if (GetTickCount() - time_last >= UPDATE_TIME){
+			if (print_progress((i * 1000) / rows))
+				return 2;
+			time_last = GetTickCount();
+		}
+
+		// その行 (パリティ・ブロック) がどのソース・ブロックの代用か
+		while ((pivot < cols) && (s_blk[pivot].exist != 0))
+			pivot++;
+
+		// Divide the row by element i,pivot
+		factor = mat[row_start + pivot];	// mat(j, pivot) は 0以外のはず
+		//printf("\nparity[ %u ] -> source[ %u ], factor = %u\n", id[col_find], col_find, factor);
+		if (factor > 1){	// factor が 1より大きいなら、1にする為に factor で割る
+			mat[row_start + pivot] = 1;	// これが行列を一個で済ます手
+			galois_region_divide(mat + row_start, cols, factor);
+		} else if (factor == 0){	// factor = 0 だと、その行列の逆行列を計算できない
+			return (0x00010000 | pivot);	// どのソース・ブロックで問題が発生したのかを返す
+		}
+
+		// 別の行の同じ pivot 列が 0以外なら、その値を 0にするために、
+		// i 行を何倍かしたものを XOR する
+		for (j = rows - 1; j >= 0; j--){
+			if (j == i)
+				continue;	// 同じ行はとばす
+			row_start2 = cols * j;	// その行の開始位置
+			factor = mat[row_start2 + pivot];	// j 行の pivot 列の値
+			mat[row_start2 + pivot] = 0;	// これが行列を一個で済ます手
+			// 先の計算により、i 行の pivot 列の値は必ず 1なので、この factor が倍率になる
+			galois_region_multiply(mat + row_start, mat + row_start2, cols, factor);
+		}
+		row_start += cols;	// 次の行にずらす
+		pivot++;
+	}
+
+	return 0;
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+// マルチ・プロセッサー対応
+
+typedef struct {	// RS threading control struct
+	unsigned short *mat;	// 行列
+	int cols;	// 横行の長さ
+	volatile int start;	// 掛ける行の先頭位置
+	volatile int pivot;	// 倍率となる値の位置
+	volatile int skip;	// とばす行
+	volatile int now;	// 消去する行
+	HANDLE h;
+	HANDLE run;
+	HANDLE end;
+} INV_TH;
+
+// サブ・スレッド
+static DWORD WINAPI thread_func(LPVOID lpParameter)
+{
+	unsigned short *mat;
+	int j, cols, row_start2, factor;
+	INV_TH *th;
+
+	th = (INV_TH *)lpParameter;
+	mat = th->mat;
+	cols = th->cols;
+
+	WaitForSingleObject(th->run, INFINITE);	// 計算開始の合図を待つ
+	while (th->skip >= 0){
+		while ((j = InterlockedDecrement(&(th->now))) >= 0){	// j = --th_now
+			if (j == th->skip)
+				continue;
+			row_start2 = cols * j;	// その行の開始位置
+			factor = mat[row_start2 + th->pivot];	// j 行の pivot 列の値
+			mat[row_start2 + th->pivot] = 0;	// これが行列を一個で済ます手
+			// 先の計算により、i 行の pivot 列の値は必ず 1なので、この factor が倍率になる
+			galois_region_multiply(mat + th->start, mat + row_start2, cols, factor);
+		}
+		//_mm_sfence();	// メモリーへの書き込みを完了する
+		SetEvent(th->end);	// 計算終了を通知する
+		WaitForSingleObject(th->run, INFINITE);	// 計算開始の合図を待つ
+	}
+
+	// 終了処理
+	CloseHandle(th->run);
+	CloseHandle(th->end);
+	return 0;
+}
+
+// マルチ・スレッドで逆行列を計算する (利用するパリティ・ブロックの所だけ)
+static int invert_matrix_mt(unsigned short *mat,
+	int rows,				// 横行の数、行列の縦サイズ、失われたソース・ブロックの数 = 利用するパリティ・ブロック数
+	int cols,				// 縦列の数、行列の横サイズ、本来のソース・ブロック数
+	source_ctx_r *s_blk)	// 各ソース・ブロックの情報
+{
+	int j, row_start2, factor;
+	unsigned int time_last = GetTickCount();
+	INV_TH th[1];
+
+	memset(th, 0, sizeof(INV_TH));
+
+	// イベントを作成する
+	th->run = CreateEvent(NULL, FALSE, FALSE, NULL);	// 両方とも Auto Reset にする
+	if (th->run == NULL){
+		print_win32_err();
+		printf("error, inv-thread\n");
+		return 1;
+	}
+	th->end = CreateEvent(NULL, FALSE, FALSE, NULL);
+	if (th->end == NULL){
+		print_win32_err();
+		CloseHandle(th->run);
+		printf("error, inv-thread\n");
+		return 1;
+	}
+	// サブ・スレッドを起動する
+	th->mat = mat;
+	th->cols = cols;
+	//_mm_sfence();	// メモリーへの書き込みを完了してからスレッドを起動する
+	th->h = (HANDLE)_beginthreadex(NULL, STACK_SIZE, thread_func, (LPVOID)th, 0, NULL);
+	if (th->h == NULL){
+		print_win32_err();
+		CloseHandle(th->run);
+		CloseHandle(th->end);
+		printf("error, inv-thread\n");
+		return 1;
+	}
+
+	// Gaussian Elimination with 1 matrix
+	th->pivot = 0;
+	th->start = 0;	// その行の開始位置
+	for (th->skip = 0; th->skip < rows; th->skip++){
+		// 経過表示
+		if (GetTickCount() - time_last >= UPDATE_TIME){
+			if (print_progress((th->skip * 1000) / rows)){
+				th->skip = -1;	// 終了指示
+				//_mm_sfence();
+				SetEvent(th->run);
+				WaitForSingleObject(th->h, INFINITE);
+				CloseHandle(th->h);
+				return 2;
+			}
+			time_last = GetTickCount();
+		}
+
+		// その行 (パリティ・ブロック) がどのソース・ブロックの代用か
+		while ((th->pivot < cols) && (s_blk[th->pivot].exist != 0))
+			th->pivot++;
+
+		// Divide the row by element i,pivot
+		factor = mat[th->start + th->pivot];
+		if (factor > 1){
+			mat[th->start + th->pivot] = 1;	// これが行列を一個で済ます手
+			galois_region_divide(mat + th->start, cols, factor);
+		} else if (factor == 0){	// factor = 0 だと、その行列の逆行列を計算できない
+			th->skip = -1;	// 終了指示
+			//_mm_sfence();
+			SetEvent(th->run);
+			WaitForSingleObject(th->h, INFINITE);
+			CloseHandle(th->h);
+			return (0x00010000 | th->pivot);	// どのソース・ブロックで問題が発生したのかを返す
+		}
+
+		// 別の行の同じ pivot 列が 0以外なら、その値を 0にするために、
+		// i 行を何倍かしたものを XOR する
+		th->now = rows;	// 初期値 + 1
+		//_mm_sfence();	// メモリーへの書き込みを完了してからスレッドを再開する
+		SetEvent(th->run);	// サブ・スレッドに計算を開始させる
+		while ((j = InterlockedDecrement(&(th->now))) >= 0){	// j = --th_now
+			if (j == th->skip)	// 同じ行はとばす
+				continue;
+			row_start2 = cols * j;	// その行の開始位置
+			factor = mat[row_start2 + th->pivot];	// j 行の pivot 列の値
+			mat[row_start2 + th->pivot] = 0;	// これが行列を一個で済ます手
+			// 先の計算により、i 行の pivot 列の値は必ず 1なので、この factor が倍率になる
+			galois_region_multiply(mat + th->start, mat + row_start2, cols, factor);
+		}
+
+		WaitForSingleObject(th->end, INFINITE);	// サブ・スレッドの計算終了の合図を待つ
+		th->start += cols;
+		th->pivot++;
+	}
+
+	// サブ・スレッドを終了させる
+	th->skip = -1;	// 終了指示
+	//_mm_sfence();
+	SetEvent(th->run);
+	WaitForSingleObject(th->h, INFINITE);
+	CloseHandle(th->h);
+	return 0;
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+/*
+gflib の行列作成用関数や行列の逆変換用の関数を元にして、
+計算のやり方を PAR 2.0 用に修正する。
+
+par-v1.1.tar.gz に含まれる rs.doc
+Dummies guide to Reed-Solomon coding. を参考にする
+*/
+
+/*
+5 * 5 なら
+ 1   1    1     1     1     constant の 0乗
+ 2   4   16   128   256  <- この行の値を constant とする
+ 4  16  256 16384  4107     constant の 2乗
+ 8  64 4096  8566  7099     constant の 3乗
+16 256 4107 43963  7166     constant の 4乗
+
+par2-specifications.pdf によると、constant は 2の乗数で、
+その指数は (n%3 != 0 && n%5 != 0 && n%17 != 0 && n%257 != 0) になる。
+*/
+
+// PAR 2.0 のパリティ検査行列はエンコード中にその場で生成する
+// constant と facter の 2個のベクトルで表現する
+// パリティ・ブロックごとに facter *= constant で更新していく
+static void make_encode_constant(
+	unsigned short *constant)	// constant を収めた配列
+{
+	unsigned short temp;
+	int n, i;
+
+	// constant は 2の乗数で、係数が3,5,17,257の倍数になるものは除く
+	// 定数 2, 4, 16, 128, 256, 2048, 8192, ...
+	n = 0;
+	temp = 1;
+	for (i = 0; i < source_num; i++){
+		while (n <= 65535){
+			temp = galois_multiply_fix(temp, 1);	// galois_multiply(temp, 2);
+			n++;
+			if ((n % 3 != 0) && (n % 5 != 0) && (n % 17 != 0) && (n % 257 != 0))
+				break;
+		}
+		constant[i] = temp;
+	}
+}
+
+// 復元用の行列を作る、十分な数のパリティ・ブロックが必要
+static int make_decode_matrix(
+	unsigned short *mat,	// 復元用の行列
+	int block_lost,			// 横行、行列の縦サイズ、失われたソース・ブロックの数 = 必要なパリティ・ブロック数
+	source_ctx_r *s_blk,	// 各ソース・ブロックの情報
+	parity_ctx_r *p_blk)	// 各パリティ・ブロックの情報
+{
+	unsigned short *id;		// 失われたソース・ブロックをどのパリティ・ブロックで代用したか
+	unsigned short constant;
+	int i, j, k, n;
+
+	// printf("\n parity_num = %d, rows = %d, cols = %d \n", parity_num, block_lost, source_num);
+	// 失われたソース・ブロックをどのパリティ・ブロックで代用するか
+	id = mat + (block_lost * source_num);
+	j = 0;
+	for (i = 0; (i < parity_num) && (j < block_lost); i++){
+		if (p_blk[i].exist == 1)	// 利用不可の印が付いてるブロックは無視する
+			id[j++] = (unsigned short)i;
+	}
+	if (j < block_lost){	// パリティ・ブロックの数が足りなければ
+		printf("need more recovery slice\n");
+		return 1;
+	}
+
+	// 存在して利用するパリティ・ブロックだけの行列を作る
+	n = 0;
+	constant = 1;
+	for (i = 0; i < source_num; i++){	// 一列ずつ縦に値をセットしていく
+		while (n <= 65535){
+			constant = galois_multiply_fix(constant, 1);	// galois_multiply(constant, 2);
+			n++;
+			if ((n % 3 != 0) && (n % 5 != 0) && (n % 17 != 0) && (n % 257 != 0))
+				break;
+		}
+//		printf("\n[%5d], 2 pow %5d = %5d", i, n, constant);
+
+		k = 0;
+		for (j = 0; j < source_num; j++){	// j 行の i 列
+			if (s_blk[j].exist == 0){	// 該当部分はパリティ・ブロックで補うのなら
+				mat[source_num * k + i] = galois_power(constant, id[k]);
+				k++;
+			}
+		}
+	}
+
+	if ((cpu_num == 1) || (source_num < 10) || (block_lost < 4)){	// 小さすぎる行列はマルチ・スレッドにしない
+		k = invert_matrix_st(mat, block_lost, source_num, s_blk);
+	} else {
+		k = invert_matrix_mt(mat, block_lost, source_num, s_blk);
+	}
+	return k;
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+// リード・ソロモン符号を使ってエンコードする
+int rs_encode(
+	wchar_t *file_path,
+	unsigned char *header_buf,	// Recovery Slice packet のパケット・ヘッダー
+	HANDLE *rcv_hFile,			// リカバリ・ファイルのハンドル
+	file_ctx_c *files,			// ソース・ファイルの情報
+	source_ctx_c *s_blk,		// ソース・ブロックの情報
+	parity_ctx_c *p_blk)		// パリティ・ブロックの情報
+{
+	unsigned short *constant = NULL;
+	int err = 0;
+	unsigned int len;
+#ifdef TIMER
+unsigned int time_total = GetTickCount();
+#endif
+
+	if (galois_create_table()){
+		printf("galois_create_table\n");
+		return 1;
+	}
+
+	if (source_num == 1){	// ソース・ブロックが一個だけなら
+		err = encode_method1(file_path, header_buf, rcv_hFile, files, s_blk, p_blk);
+		goto error_end;
+	}
+
+	// パリティ計算用の行列演算の準備をする
+	if (parity_num > source_num){
+		len = sizeof(unsigned short) * (source_num + parity_num);
+	} else {
+		len = sizeof(unsigned short) * source_num * 2;
+	}
+	constant = malloc(len);
+	if (constant == NULL){
+		printf("malloc, %d\n", len);
+		err = 1;
+		goto error_end;
+	}
+#ifdef TIMER
+	printf("\nmatrix size = %d.%d KB\n", len >> 10, (len >> 10) % 10);
+#endif
+	// パリティ検査行列の基になる定数
+	make_encode_constant(constant);
+//	for (len = 0; (int)len < source_num; len++)
+//		printf("constant[%5d] = %5d\n", len, constant[len]);
+
+#ifdef TIMER
+	err = 0;	// IO method : 0=Auto, -2=Read all, -4=GPU read all
+	if (err == 0){
+#endif
+	// HDD なら 1-pass & Read some 方式を使う
+	// メモリー不足や SSD なら、Read all 方式でブロックを断片化させる
+	if ((OpenCL_method != 0) && (block_size >= 65536) && (source_num >= 256) && (parity_num >= 32) &&
+			((source_num + parity_num) * (__int64)block_size > 1048576 * 512)){
+		// ブロック数が多いなら、ブロックごとにスレッドを割り当てる (GPU を使う)
+		err = -4;	// 2-pass & GPU read all
+	} else {
+		err = -2;	// 2-pass & Read all
+	}
+#ifdef TIMER
+	}
+#endif
+
+	// 最初は GPUを使い、無理なら次に移る
+	if (err == -4)
+		err = encode_method4(file_path, header_buf, rcv_hFile, files, s_blk, p_blk, constant);
+	if (err == -2)	// ソース・データを全て読み込む場合
+		err = encode_method2(file_path, header_buf, rcv_hFile, files, s_blk, p_blk, constant);
+#ifdef TIMER
+	if (err != 1){
+		time_total = GetTickCount() - time_total;
+		printf("total  %d.%03d sec\n", time_total / 1000, time_total % 1000);
+	}
+#endif
+
+error_end:
+	if (constant)
+		free(constant);
+	galois_free_table();	// Galois Field のテーブルを解放する
+	return err;
+}
+
+// パリティ・ブロックをメモリー上に保持して、一度に読み書きする
+int rs_encode_1pass(
+	wchar_t *file_path,
+	wchar_t *recovery_path,		// 作業用
+	int packet_limit,			// リカバリ・ファイルのパケット繰り返しの制限
+	int block_distri,			// パリティ・ブロックの分配方法 (3-bit目は番号の付け方)
+	int packet_num,				// 共通パケットの数
+	unsigned char *common_buf,	// 共通パケットのバッファー
+	int common_size,			// 共通パケットのバッファー・サイズ
+	unsigned char *footer_buf,	// 末尾パケットのバッファー
+	int footer_size,			// 末尾パケットのバッファー・サイズ
+	HANDLE *rcv_hFile,			// リカバリ・ファイルのハンドル
+	file_ctx_c *files,			// ソース・ファイルの情報
+	source_ctx_c *s_blk)		// ソース・ブロックの情報
+{
+	unsigned short *constant = NULL;
+	int err = 0;
+	unsigned int len;
+#ifdef TIMER
+unsigned int time_total = GetTickCount();
+#endif
+
+	if (galois_create_table()){
+		printf("galois_create_table\n");
+		return 1;
+	}
+
+	// パリティ計算用の行列演算の準備をする
+	if (parity_num > source_num){
+		len = sizeof(unsigned short) * (source_num + parity_num);
+	} else {
+		len = sizeof(unsigned short) * source_num * 2;
+	}
+	constant = malloc(len);
+	if (constant == NULL){
+		printf("malloc, %d\n", len);
+		err = 1;
+		goto error_end;
+	}
+#ifdef TIMER
+	printf("\nmatrix size = %d.%d KB\n", len >> 10, (len >> 10) % 10);
+#endif
+	// パリティ検査行列の基になる定数
+	make_encode_constant(constant);
+//	for (len = 0; (int)len < source_num; len++)
+//		printf("constant[%5d] = %5d\n", len, constant[len]);
+
+#ifdef TIMER
+	err = 0;	// IO method : 0=Auto, -3=Read some, -5=GPU read some, -? = Goto 2pass
+	if (err == 0){
+#endif
+	// メモリーが足りてる場合だけ 1-pass方式を使う
+	if ((OpenCL_method != 0) && (block_size >= 65536) && (source_num >= 256) && (parity_num >= 32) &&
+			((source_num + parity_num) * (__int64)block_size > 1048576 * 512)){
+		err = -5;	// 1-pass & GPU read some
+	} else {
+		err = -3;	// 1-pass & Read some
+	}
+#ifdef TIMER
+	}
+#endif
+
+	// 最初は GPUを使い、無理なら次に移る
+	if (err == -5)
+		err = encode_method5(file_path, recovery_path, packet_limit, block_distri, packet_num,
+				common_buf, common_size, footer_buf, footer_size, rcv_hFile, files, s_blk, constant);
+	if (err == -3)	// ソース・データをいくつか読み込む場合
+		err = encode_method3(file_path, recovery_path, packet_limit, block_distri, packet_num,
+				common_buf, common_size, footer_buf, footer_size, rcv_hFile, files, s_blk, constant);
+
+#ifdef TIMER
+	if (err < 0){
+		printf("switching to 2-pass processing, %d\n", err);
+	} else if (err != 1){
+		time_total = GetTickCount() - time_total;
+		printf("total  %d.%03d sec\n", time_total / 1000, time_total % 1000);
+	}
+#endif
+
+error_end:
+	if (constant)
+		free(constant);
+	galois_free_table();	// Galois Field のテーブルを解放する
+	return err;
+}
+
+// リード・ソロモン符号を使ってデコードする
+int rs_decode(
+	wchar_t *file_path,
+	int block_lost,			// 失われたソース・ブロックの数
+	HANDLE *rcv_hFile,		// リカバリ・ファイルのハンドル
+	file_ctx_r *files,		// ソース・ファイルの情報
+	source_ctx_r *s_blk,	// ソース・ブロックの情報
+	parity_ctx_r *p_blk)	// パリティ・ブロックの情報
+{
+	unsigned short *mat = NULL, *id;
+	int err = 0, i, j, k;
+	unsigned int len;
+#ifdef TIMER
+unsigned int time_matrix = 0, time_total = GetTickCount();
+#endif
+
+	if (galois_create_table()){
+		printf("galois_create_table\n");
+		return 1;
+	}
+
+	if (source_num == 1){	// ソース・ブロックが一個だけなら
+		err = decode_method1(file_path, rcv_hFile, files, s_blk, p_blk);
+		goto error_end;
+	}
+
+	// 復元用の行列演算の準備をする
+	len = sizeof(unsigned short) * block_lost * (source_num + 1);
+	mat = malloc(len);
+	if (mat == NULL){
+		printf("malloc, %d\n", len);
+		printf("matrix for recovery is too large\n");
+		err = 1;
+		goto error_end;
+	}
+#ifdef TIMER
+	if (len & 0xFFF00000){
+		printf("\nmatrix size = %d.%d MB\n", len >> 20, (len >> 20) % 10);
+	} else {
+		printf("\nmatrix size = %d.%d KB\n", len >> 10, (len >> 10) % 10);
+	}
+#endif
+	// 何番目の消失ソース・ブロックがどのパリティで代替されるか
+	id = mat + (block_lost * source_num);
+
+#ifdef TIMER
+time_matrix = GetTickCount();
+#endif
+	// 復元用の行列を計算する
+	print_progress_text(0, "Computing matrix");
+	err = make_decode_matrix(mat, block_lost, s_blk, p_blk);
+	while (err >= 0x00010000){	// 逆行列を計算できなかった場合 ( Petr Matas の修正案を参考に実装)
+		printf("\n");
+		err ^= 0x00010000;	// エラーが起きた行 (ソース・ブロックの番号)
+		printf("fail at input slice %d\n", err);
+		k = 0;
+		for (i = 0; i < err; i++){
+			if (s_blk[i].exist == 0)
+				k++;
+		}
+		// id[k] エラーが起きた行に対応するパリティ・ブロックの番号
+		p_blk[id[k]].exist = 0x100;	// そのパリティ・ブロックを使わないようにする
+		printf("disable recovery slice %d\n", id[k]);
+		j = 0;
+		for (i = 0; i < parity_num; i++){
+			if (p_blk[i].exist == 1)
+				j++;	// 利用可能なパリティ・ブロックの数
+		}
+		if (j >= block_lost){	// 使えるパリティ・ブロックの数が破損ブロックの数以上なら
+			print_progress_text(0, "Computing matrix");
+			err = make_decode_matrix(mat, block_lost, s_blk, p_blk);
+		} else {	// 代替するパリティ・ブロックの数が足りなければ
+			printf("fail at recovery slice");
+			for (i = 0; i < parity_num; i++){
+				if (p_blk[i].exist == 0x100)
+					printf(" %d", i);
+			}
+			printf("\n");
+			err = 1;
+		}
+	}
+	if (err)	// それ以外のエラーなら
+		goto error_end;
+	print_progress_done();	// 改行して行の先頭に戻しておく
+	//for (i = 0; i < block_lost; i++)
+	//	printf("id[%d] = %d\n", i, id[i]);
+#ifdef TIMER
+time_matrix = GetTickCount() - time_matrix;
+#endif
+
+#ifdef TIMER
+	err = 0;	// IO method : 0=Auto, -2=Read all, -3=Read some, -4=GPU all, -5=GPU some
+	if (err == 0){
+#endif
+	if ((OpenCL_method != 0) && (block_size >= 65536) && (source_num >= 256) && (block_lost >= 32) &&
+			((source_num + block_lost) * (__int64)block_size > 1048576 * 512)){
+		// ブロック数が多いなら、ブロックごとにスレッドを割り当てる (GPU を使う)
+		if (memory_use & 16){
+			err = -4;	// SSD なら Read all 方式でブロックが断片化しても速い
+		} else 
+		if (read_block_num(block_lost, 2, 0, MEM_UNIT) != 0){
+			err = -5;	// HDD でメモリーが足りてるなら Read some 方式を使う
+		} else {
+			err = -4;	// メモリー不足なら Read all 方式でブロックを断片化させる
+		}
+	} else {
+		// ソース・ブロックを全て断片的に読み込むか、いくつかを丸ごと読み込むかを決める
+		if (memory_use & 16){
+			err = -2;	// SSD なら Read all 方式でブロックが断片化しても速い
+		} else 
+		if (read_block_num(block_lost, cpu_num - 1, 0, sse_unit) != 0){
+			err = -3;	// HDD でメモリーが足りてるなら Read some 方式を使う
+		} else {
+			err = -2;	// メモリー不足なら Read all 方式でブロックを断片化させる
+		}
+	}
+#ifdef TIMER
+	}
+#endif
+
+	// ファイル・アクセスの方式によって分岐する
+	if (err == -5)
+		err = decode_method5(file_path, block_lost, rcv_hFile, files, s_blk, p_blk, mat);
+	if (err == -4)
+		err = decode_method4(file_path, block_lost, rcv_hFile, files, s_blk, p_blk, mat);
+	if (err == -3)	// ソース・データをいくつか読み込む場合
+		err = decode_method3(file_path, block_lost, rcv_hFile, files, s_blk, p_blk, mat);
+	if (err == -2)	// ソース・データを全て読み込む場合
+		err = decode_method2(file_path, block_lost, rcv_hFile, files, s_blk, p_blk, mat);
+#ifdef TIMER
+	if (err != 1){
+		time_total = GetTickCount() - time_total;
+		printf("total  %d.%03d sec\n", time_total / 1000, time_total % 1000);
+		printf("matrix %d.%03d sec\n", time_matrix / 1000, time_matrix % 1000);
+	}
+#endif
+
+error_end:
+	if (mat)
+		free(mat);
+	galois_free_table();	// Galois Field のテーブルを解放する
+	return err;
+}
+
--- a/source/par2j/reedsolomon.h
+++ b/source/par2j/reedsolomon.h
@@ -0,0 +1,80 @@
+#ifndef _REEDSOLOMON_H_
+#define _REEDSOLOMON_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+//#define TIMER // 実験用
+
+// Read all source & Keep some parity 方式
+// 部分的なエンコードを行う最低ブロック数
+#define PART_MAX_RATE	1	// ソース・ブロック数の 1/2  = 50%
+#define PART_MIN_RATE	5	// ソース・ブロック数の 1/32 = 3.1%
+
+// Read some source & Keep all parity 方式
+// 一度に読み込む最少ブロック数
+#define READ_MIN_RATE	1	// 保持するブロック数の 1/2 = 50%
+#define READ_MIN_NUM	16
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+// Cache Blocking を試みる
+int try_cache_blocking(int unit_size);
+
+// 空きメモリー量からファイル・アクセスのバッファー・サイズを計算する
+unsigned int get_io_size(
+	unsigned int buf_num,	// 何ブロック分の領域を確保するのか
+	unsigned int *part_num,	// 部分的なエンコード用の作業領域
+	size_t trial_alloc,		// 確保できるか確認するのか
+	int alloc_unit);		// メモリー単位の境界 (sse_unit か MEM_UNIT)
+
+// 何ブロックまとめてファイルから読み込むかを空きメモリー量から計算する
+int read_block_num(
+	int keep_num,			// 保持するパリティ・ブロック数
+	int add_num,			// 余裕を見るブロック数
+	size_t trial_alloc,		// 確保できるか確認するのか
+	int alloc_unit);		// メモリー単位の境界 (sse_unit か MEM_UNIT)
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+// リード・ソロモン符号を使ってエンコードする
+int rs_encode(
+	wchar_t *file_path,
+	unsigned char *header_buf,	// Recovery Slice packet のパケット・ヘッダー
+	HANDLE *rcv_hFile,			// リカバリ・ファイルのハンドル
+	file_ctx_c *files,			// ソース・ファイルの情報
+	source_ctx_c *s_blk,		// ソース・ブロックの情報
+	parity_ctx_c *p_blk);		// パリティ・ブロックの情報
+
+// パリティ・ブロックをメモリー上に保持して、一度に読み書きする
+int rs_encode_1pass(
+	wchar_t *file_path,
+	wchar_t *recovery_path,		// 作業用
+	int packet_limit,			// リカバリ・ファイルのパケット繰り返しの制限
+	int block_distri,			// パリティ・ブロックの分配方法 (3-bit目は番号の付け方)
+	int packet_num,				// 共通パケットの数
+	unsigned char *common_buf,	// 共通パケットのバッファー
+	int common_size,			// 共通パケットのバッファー・サイズ
+	unsigned char *footer_buf,	// 末尾パケットのバッファー
+	int footer_size,			// 末尾パケットのバッファー・サイズ
+	HANDLE *rcv_hFile,			// リカバリ・ファイルのハンドル
+	file_ctx_c *files,			// ソース・ファイルの情報
+	source_ctx_c *s_blk);		// ソース・ブロックの情報
+
+// リード・ソロモン符号を使ってデコードする
+int rs_decode(
+	wchar_t *file_path,
+	int block_lost,				// 失われたソース・ブロックの数
+	HANDLE *rcv_hFile,			// リカバリ・ファイルのハンドル
+	file_ctx_r *files,			// ソース・ファイルの情報
+	source_ctx_r *s_blk,		// ソース・ブロックの情報
+	parity_ctx_r *p_blk);		// パリティ・ブロックの情報
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/source/par2j/repair.c
+++ b/source/par2j/repair.c
@@ -0,0 +1,879 @@
+// repair.c
+// Copyright : 2022-10-14 Yutaka Sawada
+// License : GPL
+
+#ifndef _UNICODE
+#define _UNICODE
+#endif
+#ifndef UNICODE
+#define UNICODE
+#endif
+#ifndef _WIN32_WINNT
+#define _WIN32_WINNT 0x0600	// Windows Vista or later
+#endif
+
+#include <stdio.h>
+
+#include <windows.h>
+
+#include "common2.h"
+#include "crc.h"
+#include "md5_crc.h"
+#include "ini.h"
+#include "json.h"
+#include "repair.h"
+
+
+// ファイル・リストを表示する
+void print_file_list(
+	char *ascii_buf,	// 作業用
+	file_ctx_r *files)
+{
+	int i;
+
+	printf("\nInput File list      :\n");
+	printf("         Size  Slice :  Filename\n");
+	fflush(stdout);
+	for (i = 0; i < file_num; i++){
+		if (files[i].name < 0){	// File Description packet が欠落してる
+			printf("            ?      ? : Unknown\n");
+		} else {
+			if (files[i].name == 0){	// ファイル名だけ不明
+				ascii_buf[0] = 0;
+			} else {
+				utf16_to_cp(list_buf + files[i].name, ascii_buf, cp_output);
+			}
+			// ファイルごとのブロック数と開始番号
+			if ((i < entity_num) && (files[i].size > 0)){
+				printf("%13I64d %6d : \"%s\"\n", files[i].size, files[i].b_num, ascii_buf);
+			} else {	// 空のファイルやフォルダ、または non recovery set
+				printf("%13I64d      0 : \"%s\"\n", files[i].size, ascii_buf);
+			}
+		}
+	}
+	printf("\nInput File total size\t: %I64d\n", total_file_size);
+	printf("Input File Slice count\t: %d\n", source_num);
+}
+
+// ディレクトリ記号までの長さを返す (存在しない場合は -1)
+static int wcschr_dir(wchar_t *s)
+{
+	int i = 0;
+
+	while (s[i] != 0){
+		if (s[i] == '\\')
+			return i;
+		i++;
+	}
+
+	return -1;
+}
+
+// ファイル名を比較する
+static int name_cmp(const void *elem1, const void *elem2)
+{
+	wchar_t *name1, *name2;
+	int rv = 2, len1, len2;
+
+	name1 = list_buf + ((file_ctx_r *)elem1)->name;
+	name2 = list_buf + ((file_ctx_r *)elem2)->name;
+
+	while (name1[0] + name2[0] != 0){
+		len1 = wcschr_dir(name1);
+		len2 = wcschr_dir(name2);
+
+		// フォルダを先にする
+		if (len1 >= 0){
+			if (len2 < 0)	// file1 だけサブ・ディレクトリがある
+				return -1;
+		} else if (len2 >= 0){	// file2 だけサブ・ディレクトリがある
+			return 1;
+		}
+
+		// ユーザーの言語設定によって順序を決める
+		//rv = CompareString(LOCALE_USER_DEFAULT, 0, name1, len1, name2, len2);
+		rv = CompareStringEx(LOCALE_NAME_USER_DEFAULT, 0x00000008, name1, len1, name2, len2, NULL, NULL, 0);
+		if ((rv == 2) && (len1 != -1) && (len2 != -1)){
+			name1 += len1 + 1;
+			name2 += len2 + 1;
+		} else {
+			break;
+		}
+	}
+
+	return rv - 2;
+//	return wcscmp(name1, name2);	// QuickPar はこちらの順序
+}
+
+// ソース・ファイル情報を確認して集計する
+int set_file_data(
+	char *ascii_buf,	// 作業用
+	file_ctx_r *files)
+{
+	int i, bad_flag;
+
+	// ソース・ファイルの基本情報
+	bad_flag = 0;
+	total_file_size = 0;
+	source_num = 0;
+	for (i = 0; i < file_num; i++){
+		if (files[i].name < 0){	// File Description packet が欠落してる
+			bad_flag |= 1;
+		} else {
+			if (files[i].name == 0){	// ファイル名だけ不明
+				bad_flag |= 2;
+			} else {
+				if (base_len + wcslen(list_buf + files[i].name) >= MAX_LEN - ADD_LEN)
+					bad_flag |= 4;	// ファイル・パスが長過ぎる
+			}
+			// ファイルごとのブロック数と開始番号
+			if ((i < entity_num) && (files[i].size > 0)){
+				files[i].b_off = source_num;
+				files[i].b_num = (int)((files[i].size + (__int64)block_size - 1) / (__int64)block_size);
+				source_num += files[i].b_num;
+				files[i].state = 0x80;	// チェックサムが必要だがまだ読み取ってない
+			} else {	// 空のファイルやフォルダ、または non recovery set
+				files[i].b_off = 0;
+				files[i].b_num = 0;
+				files[i].state = 0;	// チェックサムは必要ない
+			}
+			total_file_size += files[i].size;
+		}
+	}
+	if (bad_flag){
+		print_file_list(ascii_buf, files);	// 終了前にファイル・リストを表示する
+		if (bad_flag & 1){	// File Description packet が不足してると検査を継続できない
+			printf("\nFile Description packet is missing\n");
+		} else if (bad_flag & 2){
+			printf("\nfilename is unknown\n");
+		} else if (bad_flag & 4){
+			printf("\nfilename is too long\n");
+		}
+		return 1;
+	}
+
+	// PAR2 仕様ではソース・ブロックの最大値は 32768 個だが、
+	// 規格外のリカバリ・データがあるかもしれない (YencPowerPost A&A v11b のバグ)
+	if (source_num > MAX_SOURCE_NUM){
+		parity_num = 0;	// 規格外ならリカバリ・ブロックを無効にする
+	} else {
+		// リカバリ・ブロック自体はいくらでも作れるが、異なるスライスは 65535個まで
+		parity_num = MAX_PARITY_NUM;
+	}
+
+	// If you want to see original order (sorted by File ID), comment out below lines.
+	// ファイルを並び替えたら、ソース・ブロックのファイル番号もそれに応じて変えること
+	// recovery set のファイルをファイル名の順に並び替える
+	if (entity_num > 1)
+		qsort(files, entity_num, sizeof(file_ctx_r), name_cmp);
+	// non-recovery set のファイルをファイル名の順に並び替える
+	if (file_num - entity_num > 1)
+		qsort(&(files[entity_num]), file_num - entity_num, sizeof(file_ctx_r), name_cmp);
+
+	print_file_list(ascii_buf, files);	// 並び替えられたファイル・リストを表示する
+
+	return 0;
+}
+
+// ソース・ファイルの検査結果を集計して、修復方法を判定する
+int result_file_state(
+	char *ascii_buf,
+	int *result,
+	int parity_now,
+	int recovery_lost,
+	file_ctx_r *files,		// 各ソース・ファイルの情報
+	source_ctx_r *s_blk)	// 各ソース・ブロックの情報
+{
+	int i, num, find_num, b_last;
+	int lost_num, need_repair, rejoin_num, repair_num, incomp_num;
+
+	find_num = -1;	// 項目が未表示の印
+	need_repair = 0;
+	rejoin_num = 0;
+	repair_num = 0;
+	incomp_num = 0;
+	for (num = 0; num < entity_num; num++){
+		if (files[num].size == 0){	// フォルダまたは空ファイル
+			if (files[num].state & 0x3F)	// 空ファイルとフォルダが存在する (0x40) 以外なら
+				need_repair++;
+		} else {	// ソース・ファイル
+			if (files[num].state & 0x03){	// 消失(0x01)、破損(0x02) ならスライス検出結果を表示する
+				if (find_num < 0){	// 項目表示がまだなら
+					printf("\nCounting available slice:\n");
+					printf(" Avail /  Slice :  Filename\n");	// 集計された検出スライス数
+				}
+				utf16_to_cp(list_buf + files[num].name, ascii_buf, cp_output);
+				find_num = 0;
+				if ((files[num].state & 0x80) == 0){	// チェックサムが存在するなら
+					b_last = files[num].b_off + files[num].b_num;
+					for (i = files[num].b_off; i < b_last; i++){
+						if (s_blk[i].exist != 0)
+							find_num++;
+					}
+				}
+				printf("%6d / %6d : \"%s\"\n", find_num, files[num].b_num, ascii_buf);
+				if (find_num == files[num].b_num){
+					need_repair |= 0x20000000;	// スライスが揃ってるのでファイルを再構築できる
+					rejoin_num++;
+				} else {
+					need_repair |= 0x10000000;	// スライスが足りない
+					repair_num++;
+				}
+			} else if (files[num].state & 0x30){	// 追加(0x10)、別名・移動(0x20, 0x28)
+				need_repair++;
+			}
+		}
+	}
+	for (num = entity_num; num < file_num; num++){
+		if (files[num].state & 0x30){	// 追加(0x10)、別名・移動(0x20)、フォルダが移動(0x60)
+			need_repair++;
+		} else if (files[num].state & 0x03){	// 消失(0x01)、破損(0x02)、フォルダが消失(0x41)
+			if (files[num].size == 0){	// サイズが 0ならすぐに復元できる
+				need_repair++;
+			} else {
+				need_repair |= 0x40000000;	// non-recovery set のファイルが破損して修復できない
+				incomp_num++;
+			}
+		}
+	}
+	json_file_state(files);	// JSONファイルに記録する
+
+	// 利用可能なソース・ブロックの数
+	printf("\nInput File Slice avail\t: %d\n", first_num);
+	// 消失したソース・ブロックの数
+	lost_num = source_num - first_num;
+	printf("Input File Slice lost\t: %d\n\n", lost_num);
+	if (need_repair == 0)
+		printf("All Files Complete\n");
+	if (recovery_lost > 0){	// 不完全なリカバリ・ファイルがあるなら
+		i = 256;
+		printf("%d PAR File(s) Incomplete\n", recovery_lost);
+	} else {
+		i = 0;
+	}
+
+	// 修復する必要があるかどうか
+	if (need_repair != 0){
+		i |= 4;
+		if (need_repair & 0x0FFFFFFF){	// 簡易修復は可能
+			printf("Ready to rename %d file(s)\n", need_repair & 0x0FFFFFFF);
+			i |= 32;
+		}
+		if (need_repair & 0x20000000){	// 再構築までは可能
+			printf("Ready to rejoin %d file(s)\n", rejoin_num);
+			i |= 64;
+		}
+		if (need_repair & 0x10000000){	// ソース・ブロックの復元が必要
+			if (lost_num > parity_now){
+				printf("Need %d more slice(s) to repair %d file(s)\n", lost_num - parity_now, repair_num);
+				i |= 8;
+			} else if ((lost_num == parity_now) && (lost_num >= 2)){	// 逆行列の計算で失敗するかも
+				printf("Try to repair %d file(s)\n", repair_num);
+				i |= 128 | 8;
+			} else {
+				printf("Ready to repair %d file(s)\n", repair_num);
+				i |= 128;
+			}
+		}
+		if (need_repair & 0x40000000)	// non-recovery set のファイルは修復できない
+			printf("Cannot repair %d file(s)\n", incomp_num);
+	}
+	fflush(stdout);
+
+	*result = need_repair;
+	return i;
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+// 簡単な修復を行う、まだ修復の必要なファイルの数を戻す
+int simple_repair(
+	char *ascii_buf,
+	int need_repair,
+	file_ctx_r *files)		// 各ソース・ファイルの情報
+{
+	wchar_t file_path[MAX_LEN], old_path[MAX_LEN];
+	int i, num, repaired_num;
+	HANDLE hFile;
+
+	if (need_repair){	// 簡単な修復だけでいいファイルの数
+		printf("\nCorrecting file : %d\n", need_repair);
+		printf(" Status   :  Filename\n");
+		fflush(stdout);
+		need_repair = 0x10000000;
+	}
+	repaired_num = 0;
+	wcscpy(file_path, base_dir);
+	wcscpy(old_path, base_dir);
+
+	// recovery set のファイル
+	for (num = 0; num < entity_num; num++){
+		utf16_to_cp(list_buf + files[num].name, ascii_buf, cp_output);
+		wcscpy(file_path + base_len, list_buf + files[num].name);
+		if (files[num].size == 0){	// フォルダまたは空ファイルを作り直す
+			switch (files[num].state){
+			case 1:		// 存在しなくてもサイズが 0ならすぐに復元できる
+				hFile = CreateFile(file_path, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
+				if ((hFile == INVALID_HANDLE_VALUE) && (GetLastError() == ERROR_PATH_NOT_FOUND)){	// Path not found (3)
+					make_dir(file_path);	// 途中のフォルダが存在しないのなら作成する
+					hFile = CreateFile(file_path, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
+				}
+				if (hFile != INVALID_HANDLE_VALUE){
+					CloseHandle(hFile);
+					files[num].state = 0;
+					printf(" Restored : \"%s\"\n", ascii_buf);
+					repaired_num++;
+				} else {
+					printf(" Failed   : \"%s\"\n", ascii_buf);
+				}
+				fflush(stdout);
+				break;
+			case 16:	// ファイルに内容がある場合は破損ではなく追加と見なす
+				if (shorten_file(file_path, 0) == 0){
+					files[num].state = 0;
+					printf(" Restored : \"%s\"\n", ascii_buf);
+					repaired_num++;
+				} else {
+					printf(" Failed   : \"%s\"\n", ascii_buf);
+				}
+				fflush(stdout);
+				break;
+			case 32:	// 本来の場所に戻す
+			case 96:	// フォルダを本来の場所に戻す
+				wcscpy(old_path + base_len, list_buf + files[num].name2);
+				if (replace_file(file_path, old_path) == 0){
+					files[num].state &= 0x40;	// 0 か 64 になる
+					printf(" Restored : \"%s\"\n", ascii_buf);
+					repaired_num++;
+				} else {	// ファイル名の変更に失敗した
+					printf(" Failed   : \"%s\"\n", ascii_buf);
+				}
+				fflush(stdout);
+				break;
+			case 65:	// フォルダが消失
+				i = (int)wcslen(file_path);
+				if (file_path[i - 1] == '\\')
+					file_path[i - 1] = 0;
+				if (CreateDirectory(file_path, NULL) == 0){
+					i = GetLastError();
+					if (i == ERROR_PATH_NOT_FOUND){	// Path not found (3)
+						make_dir(file_path);	// 途中のフォルダが存在しないのなら作成する
+						i = CreateDirectory(file_path, NULL);
+					} else if (i == ERROR_ALREADY_EXISTS){	// Destination file is already exist (183)
+						if ((GetFileAttributes(file_path) & FILE_ATTRIBUTE_DIRECTORY) == 0){
+							// 同名のファイルが存在するならどかす
+							move_away_file(file_path);
+							i = CreateDirectory(file_path, NULL);
+						}
+					}
+				}
+				if (i != 0){
+					files[num].state = 64;
+					printf(" Restored : \"%s\"\n", ascii_buf);
+					repaired_num++;
+				} else {
+					printf(" Failed   : \"%s\"\n", ascii_buf);
+				}
+				fflush(stdout);
+				break;
+			}
+		} else {
+			switch (files[num].state & 0x7F){	// チェックサムの有無に関係なく訂正できる
+			case 1:		// 後で失われたブロックを復元する
+			case 2:
+			case 6:
+				need_repair++;
+				break;
+			case 16:	// 末尾のゴミを取り除く
+				if (shorten_file(file_path, files[num].size) == 0){
+					write_ini_complete(num, file_path);
+					files[num].state &= 0x80;
+					printf(" Restored : \"%s\"\n", ascii_buf);
+					repaired_num++;
+				} else {
+					printf(" Failed   : \"%s\"\n", ascii_buf);
+				}
+				fflush(stdout);
+				break;
+			case 32:	// ファイル名を訂正する
+			case 40:
+				wcscpy(old_path + base_len, list_buf + files[num].name2);
+				if (replace_file(file_path, old_path) == 0){
+					files[num].state &= 0x80;
+					printf(" Restored : \"%s\"\n", ascii_buf);
+					repaired_num++;
+				} else {	// ファイル名の変更に失敗した
+					printf(" Failed   : \"%s\"\n", ascii_buf);
+				}
+				fflush(stdout);
+				break;
+			}
+		}
+	}
+
+	// non-recovery set のファイル
+	for (num = entity_num; num < file_num; num++){
+		utf16_to_cp(list_buf + files[num].name, ascii_buf, cp_output);
+		wcscpy(file_path + base_len, list_buf + files[num].name);
+		switch (files[num].state){
+		case 1:		// 消失
+		case 2:		// 破損
+			if (files[num].size == 0){	// サイズが 0ならすぐに復元できる
+				hFile = CreateFile(file_path, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
+				if ((hFile == INVALID_HANDLE_VALUE) && (GetLastError() == ERROR_PATH_NOT_FOUND)){	// Path not found (3)
+					make_dir(file_path);	// 途中のフォルダが存在しないのなら作成する
+					hFile = CreateFile(file_path, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
+				}
+				if (hFile != INVALID_HANDLE_VALUE){
+					CloseHandle(hFile);
+					files[num].state = 0;
+					printf(" Restored : \"%s\"\n", ascii_buf);
+					repaired_num++;
+				} else {
+					printf(" Failed   : \"%s\"\n", ascii_buf);
+				}
+				fflush(stdout);
+			}
+			break;
+		case 16:	// 追加
+			if (shorten_file(file_path, files[num].size) == 0){
+				files[num].state = 0;
+				printf(" Restored : \"%s\"\n", ascii_buf);
+				repaired_num++;
+			} else {
+				printf(" Failed   : \"%s\"\n", ascii_buf);
+			}
+			fflush(stdout);
+			break;
+		case 32:	// 本来の場所に戻す、またはファイル名を訂正する
+		case 40:
+		case 96:	// フォルダを本来の場所に戻す
+			wcscpy(old_path + base_len, list_buf + files[num].name2);
+			if (replace_file(file_path, old_path) == 0){
+				files[num].state &= 0x40;	// 0 か 64 になる
+				printf(" Restored : \"%s\"\n", ascii_buf);
+				repaired_num++;
+			} else {	// ファイル名の変更に失敗した
+				printf(" Failed   : \"%s\"\n", ascii_buf);
+			}
+			fflush(stdout);
+			break;
+		case 65:	// フォルダが消失
+			i = (int)wcslen(file_path);
+			if (file_path[i - 1] == '\\')
+				file_path[i - 1] = 0;
+			if (CreateDirectory(file_path, NULL) == 0){
+				i = GetLastError();
+				if (i == ERROR_PATH_NOT_FOUND){	// Path not found (3)
+					make_dir(file_path);	// 途中のフォルダが存在しないのなら作成する
+					i = CreateDirectory(file_path, NULL);
+				} else if (i == ERROR_ALREADY_EXISTS){	// Destination file is already exist (183)
+					if ((GetFileAttributes(file_path) & FILE_ATTRIBUTE_DIRECTORY) == 0){
+						// 同名のファイルが存在するならどかす
+						move_away_file(file_path);
+						i = CreateDirectory(file_path, NULL);
+					}
+				}
+			}
+			if (i != 0){
+				files[num].state = 64;
+				printf(" Restored : \"%s\"\n", ascii_buf);
+				repaired_num++;
+			} else {
+				printf(" Failed   : \"%s\"\n", ascii_buf);
+			}
+			fflush(stdout);
+			break;
+		}
+	}
+
+	if (need_repair & 0x10000000)	// 修復できたファイルの数
+		printf("\nRestored file count\t: %d\n", repaired_num);
+
+	return need_repair & 0x0FFFFFFF;
+}
+
+// 4バイトのソース・ブロックを逆算してソース・ファイルに書き込む
+int restore_block4(
+	wchar_t *file_path,
+	file_ctx_r *files,		// 各ソース・ファイルの情報
+	source_ctx_r *s_blk)	// 各ソース・ブロックの情報
+{
+	int i, j, num, b_last;
+	unsigned int data;
+	unsigned int time_last = 0;
+	HANDLE hFile;
+
+	print_progress_text(0, "Restoring slice");
+	wcscpy(file_path, base_dir);
+	for (num = 0; num < entity_num; num++){
+		// 経過表示
+		if (GetTickCount() - time_last >= UPDATE_TIME){
+			if (print_progress((num * 1000) / entity_num))
+				return 2;
+			time_last = GetTickCount();
+		}
+
+		if ((files[num].size > 0) && ((files[num].state & 0x80) == 0) &&
+				((files[num].state & 3) != 0)){	// 不完全なファイルにチェックサムが存在するなら
+			//printf("file %d, 0x%08x\n", num, files[num].state);
+			if (files[num].state & 4){	// 破損ファイルを上書きして復元する場合
+				// ソース・ファイルを作り直す（元のデータは全て消える）
+				wcscpy(file_path + base_len, list_buf + files[num].name);
+				hFile = CreateFile(file_path, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, 0, NULL);
+			} else {
+				// 作業ファイルを開く
+				hFile = handle_temp_file(list_buf + files[num].name, file_path);
+			}
+			if (hFile == INVALID_HANDLE_VALUE)
+				return 1;
+
+			// 逆算したソース・ブロックを書き込んでいく
+			b_last = files[num].b_off + files[num].b_num;
+			for (i = files[num].b_off; i < b_last; i++){
+				data = crc_reverse_zero(s_blk[i].crc, 4);	// CRC-32 からブロック内容を逆算する
+				if (!WriteFile(hFile, &data, s_blk[i].size, &j, NULL)){
+					print_win32_err();
+					CloseHandle(hFile);
+					return 1;
+				}
+			}
+			CloseHandle(hFile);
+		}
+	}
+	print_progress_done();	// 改行して行の先頭に戻しておく
+
+	return 0;
+}
+
+// 同じ内容のソース・ブロックを流用する、または内容がわかるブロックは逆算する
+int restore_block(
+	wchar_t *file_path,
+	int reuse_num,			// 流用可能なソース・ブロックの数
+	file_ctx_r *files,		// 各ソース・ファイルの情報
+	source_ctx_r *s_blk)	// 各ソース・ブロックの情報
+{
+	int i, num, src_blk, src_file;
+	unsigned int data;
+	unsigned int time_last = 0, prog_num = 0;
+	__int64 file_off;
+	HANDLE hFile, hFile_src;
+
+	print_progress_text(0, "Restoring slice");
+	wcscpy(file_path, base_dir);
+	for (num = 0; num < entity_num; num++){
+		if ((files[num].size > 0) && ((files[num].state & 0x80) == 0) &&
+				((files[num].state & 3) != 0)){	// チェックサムと作業ファイルが存在するなら
+			hFile = NULL;
+
+			// 利用可能なソース・ブロックをコピーしていく
+			i = files[num].b_off;
+			for (file_off = 0; file_off < files[num].size; file_off += block_size){
+				if ((s_blk[i].exist >= 3) && (s_blk[i].exist <= 5)){
+					if (hFile == NULL){	// 書き込み先ファイルがまだ開かれてなければ
+						if (files[num].state & 4){	// 破損ファイルを上書きして復元する場合
+							// 上書き用のソース・ファイルを開く
+							hFile = handle_write_file(list_buf + files[num].name, file_path, files[num].size);
+						} else {
+							// 作業ファイルを開く
+							hFile = handle_temp_file(list_buf + files[num].name, file_path);
+						}
+						if (hFile == INVALID_HANDLE_VALUE)
+							return 1;
+					}
+
+					switch (s_blk[i].exist){
+					case 3:	// 内容が全て 0 のブロック
+						// 0 で埋める
+						if (file_fill_data(hFile, file_off, 0, s_blk[i].size)){
+							CloseHandle(hFile);
+							printf("file_fill_data, %d\n", i);
+							return 1;
+						}
+						break;
+					case 4:	// 同じファイル、または別のファイルに存在する同じブロック
+						src_blk = s_blk[i].file;	// s_blk[i].file にはそのブロック番号が入ってる
+						src_file = s_blk[src_blk].file;
+/*						printf("copy block : off 0x%I64X block %d -> off 0x%I64X block %d\n",
+							(__int64)(src_blk - files[src_file].b_off) * (__int64)block_size,
+							src_blk, file_off, i);*/
+						if (files[src_file].state & 7){	// 読み込み元が消失・破損ファイルなら
+							if (files[src_file].state & 4){	// 上書き中の破損ファイルから読み込む
+								wcscpy(file_path + base_len, list_buf + files[src_file].name);
+							} else {	// 作り直した作業ファイルから読み込む
+								get_temp_name(list_buf + files[src_file].name, file_path + base_len);
+							}
+							hFile_src = CreateFile(file_path, GENERIC_READ, FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL);
+						} else {
+							if (files[src_file].state & 0x20){	// 名前訂正失敗時には別名ファイルから読み込む
+								wcscpy(file_path + base_len, list_buf + files[src_file].name2);
+							} else {	// 完全なソース・ファイルから読み込む (追加訂正失敗時も)
+								wcscpy(file_path + base_len, list_buf + files[src_file].name);
+							}
+							hFile_src = CreateFile(file_path, GENERIC_READ, 0, NULL, OPEN_EXISTING, 0, NULL);
+						}
+						if (hFile_src == INVALID_HANDLE_VALUE){
+							print_win32_err();
+							CloseHandle(hFile);
+							printf_cp("cannot open file, %s\n", file_path);
+							return 1;
+						}
+						// コピーする
+						if (file_copy_data(hFile_src, (__int64)(src_blk - files[src_file].b_off) * (__int64)block_size,
+								hFile, file_off, s_blk[i].size)){
+							print_win32_err();
+							CloseHandle(hFile);
+							printf("file_copy_data, %d\n", i);
+							return 1;
+						}
+						CloseHandle(hFile_src);
+						break;
+					case 5:	// 内容を逆算することができるブロック
+						data = crc_reverse_zero(s_blk[i].crc, block_size);	// CRC-32 からブロック内容を逆算する
+						if (file_write_data(hFile, file_off, (unsigned char *)(&data), s_blk[i].size)){
+							CloseHandle(hFile);
+							printf("file_write_data, %d\n", i);
+							return 1;
+						}
+						break;
+					}
+					s_blk[i].file = num;
+
+					// 経過表示
+					prog_num++;
+					if (GetTickCount() - time_last >= UPDATE_TIME){
+						if (print_progress((prog_num * 1000) / reuse_num))
+							return 2;
+						time_last = GetTickCount();
+					}
+				}
+				i++;
+			}
+			if (hFile)
+				CloseHandle(hFile);
+		}
+	}
+	print_progress_done();	// 改行して行の先頭に戻しておく
+
+	return 0;
+}
+
+// 正しく修復できたか調べて結果表示する
+int verify_repair(
+	wchar_t *file_path,
+	char *ascii_buf,
+	file_ctx_r *files,		// 各ソース・ファイルの情報
+	source_ctx_r *s_blk)	// 各ソース・ブロックの情報
+{
+	wchar_t temp_path[MAX_LEN];
+	int i, num, b_last, bad_flag, repaired_num;
+
+	repaired_num = 0;
+	wcscpy(file_path, base_dir);
+	for (num = 0; num < entity_num; num++){
+		if (files[num].size == 0)
+			continue;	// 空ファイルは検証しない
+
+		if (files[num].state & 4){	// 破損ファイルを上書きして修復したなら
+			bad_flag = 0;
+			// 再度開きなおす
+			wcscpy(file_path + base_len, list_buf + files[num].name);
+			if (files[num].state & 0x80){	// チェックサムが欠落したソース・ファイル
+				i = file_hash_direct(num, file_path, list_buf + files[num].name, files, NULL);
+			} else {
+				i = file_hash_direct(num, file_path, list_buf + files[num].name, files, s_blk);
+			}
+			if (i == -2)
+				return 2;	// 確認中にキャンセルされた
+			if (i == -4){
+				bad_flag = 1;	// Missing
+			} else if (i != -3){
+				bad_flag = 2;	// Failed
+			}
+
+			// 結果を表示する
+			utf16_to_cp(list_buf + files[num].name, ascii_buf, cp_output);
+			if (bad_flag){	// 失敗
+				printf(" Failed   : \"%s\"\n", ascii_buf);
+			} else {	// 修復成功
+				write_ini_complete(num, file_path);
+				files[num].state &= 0x80;
+				b_last = files[num].b_off + files[num].b_num;
+				for (i = files[num].b_off; i < b_last; i++){
+					if (s_blk[i].exist == 0)
+						first_num++;	// 復元したブロック数
+					s_blk[i].exist = 1;
+				}
+				printf(" Repaired : \"%s\"\n", ascii_buf);
+				repaired_num++;
+			}
+			fflush(stdout);
+
+		} else if (files[num].state & 3){	// 新しく作り直したソース・ファイルなら
+			bad_flag = 0;
+			// 再度開きなおす
+			wcscpy(file_path + base_len, list_buf + files[num].name);
+			get_temp_name(file_path, temp_path);
+			if (files[num].state & 0x80){	// チェックサムが欠落したソース・ファイル
+				i = file_hash_direct(num, temp_path, list_buf + files[num].name, files, NULL);
+			} else {
+				i = file_hash_direct(num, temp_path, list_buf + files[num].name, files, s_blk);
+			}
+			if (i == -2)
+				return 2;	// 確認中にキャンセルされた
+			if (i == -4){
+				bad_flag = 1;	// Missing
+			} else if (i != -3){
+				bad_flag = 2;	// Failed
+			}
+
+			// 結果を表示する
+			utf16_to_cp(list_buf + files[num].name, ascii_buf, cp_output);
+			if (bad_flag){	// 失敗
+				if (((files[num].state & 0x80) == 0) &&
+						(bad_flag == 2) && ((switch_b & 4) != 0)){	// 修復に失敗した場合でも、元のファイルを置き換える
+					// 完全なブロックが含まれてるかどうか
+					b_last = files[num].b_off + files[num].b_num;
+					for (i = files[num].b_off; i < b_last; i++){
+						if (s_blk[i].exist != 0){
+							i = -1;
+							break;
+						}
+					}
+					if (i < 0){	// 消失ファイルを代替し、破損ファイルを置き換える
+						if (replace_file(file_path, temp_path) == 0){
+							files[num].state = 2;	// 破損ファイルにする
+							printf(" Replaced : \"%s\"\n", ascii_buf);
+							fflush(stdout);
+							continue;
+						}
+					}
+				}
+				printf(" Failed   : \"%s\"\n", ascii_buf);
+			} else {	// 修復成功
+				if (replace_file(file_path, temp_path) == 0){	// 修復したファイルを戻す
+					write_ini_complete(num, file_path);
+					files[num].state &= 0x80;
+					b_last = files[num].b_off + files[num].b_num;
+					for (i = files[num].b_off; i < b_last; i++){
+						if (s_blk[i].exist == 0)
+							first_num++;	// 復元したブロック数
+						s_blk[i].exist = 1;
+					}
+					printf(" Repaired : \"%s\"\n", ascii_buf);
+					repaired_num++;
+				} else {	// ファイルを戻せなかった場合は、別名扱いにして修復したファイルを残す
+					files[num].state = (files[num].state & 0x80) | 0x20;
+					printf(" Locked   : \"%s\"\n", ascii_buf);
+				}
+			}
+			fflush(stdout);
+		}
+	}
+
+	// 修復できたファイルの数と修復後のブロック数
+	printf("\nRepaired file count\t: %d\n", repaired_num);
+	printf("Input File Slice avail\t: %d\n", first_num);
+
+	return 0;
+}
+
+// 作業用のソース・ファイルを削除する
+void delete_work_file(
+	wchar_t *file_path,
+	file_ctx_r *files)		// 各ソース・ファイルの情報
+{
+	int num;
+
+	wcscpy(file_path, base_dir);
+	for (num = 0; num < entity_num; num++){
+		if (files[num].size == 0)
+			continue;
+
+		//printf("files[%d].state = %d\n", num, files[num].state);
+		if (((files[num].state & 3) != 0) && ((files[num].state & 4) == 0)){	// 作業ファイルが存在するなら
+			// 作業ファイルを削除する
+			get_temp_name(list_buf + files[num].name, file_path + base_len);
+			//printf_cp("delete %s\n", file_path);
+			if (DeleteFile(file_path) == 0){
+				//printf("error = %d\n", GetLastError());
+				// Anti-Virusソフトが書き込み直後のファイルを検査してロックすることがある
+				if (GetLastError() == 32){	// ERROR_SHARING_VIOLATION
+					Sleep(100);	// 少し待ってから再挑戦する
+					DeleteFile(file_path);
+				}
+			}
+		}
+	}
+}
+
+// ブロック単位の復元ができなくても、再構築したファイルで置き換える
+void replace_incomplete(
+	wchar_t *file_path,
+	char *ascii_buf,
+	file_ctx_r *files,		// 各ソース・ファイルの情報
+	source_ctx_r *s_blk)	// 各ソース・ブロックの情報
+{
+	wchar_t temp_path[MAX_LEN];
+	int i, num, b_last, first_time;
+
+	first_time = 1;
+	wcscpy(file_path, base_dir);
+	for (num = 0; num < entity_num; num++){
+		if ((files[num].size > 0) && ((files[num].state & 0x80) == 0) &&
+				((files[num].state & 3) != 0)){	// チェックサムと作業ファイルが存在するなら
+			// 完全なブロックが含まれてるかどうか
+			b_last = files[num].b_off + files[num].b_num;
+			for (i = files[num].b_off; i < b_last; i++){
+				if (s_blk[i].exist != 0){
+					i = -1;
+					break;
+				}
+			}
+			if (i >= 0)
+				continue;	// 完全なブロックを全く見つけれなかった場合はだめ
+
+			// 作業ファイルが存在すれば、消失ファイルを代替し、破損ファイルを置き換える
+			wcscpy(file_path + base_len, list_buf + files[num].name);
+			get_temp_name(file_path, temp_path);
+			if (replace_file(file_path, temp_path) == 0){
+				files[num].state = 2;	// 破損ファイルにする
+				if (first_time){
+					printf("\nPutting incomplete file :\n");
+					printf(" Status   :  Filename\n");
+					first_time = 0;
+				}
+				utf16_to_cp(list_buf + files[num].name, ascii_buf, cp_output);
+				printf(" Replaced : \"%s\"\n", ascii_buf);
+			}
+		}
+	}
+}
+
+// リカバリ・ファイルを削除する（Useless状態だったのは無視する）
+int purge_recovery_file(void)
+{
+	int err, num, recv_off;
+
+	//printf("recovery_num = %d\n", recovery_num);
+	err = num = 0;
+	recv_off = 0;
+	while (recv_off < recv2_len){
+		//printf_cp("delete %s\n", recv2_buf + recv_off);
+		// 標準でゴミ箱に入れようとする、失敗したら普通に削除する
+		if (delete_file_recycle(recv2_buf + recv_off) == 0){
+			num++;
+		} else {
+			err++;
+		}
+
+		// 次のファイルの位置にずらす
+		while (recv2_buf[recv_off] != 0)
+			recv_off++;
+		recv_off++;
+	}
+	printf("%d PAR File(s) Deleted\n", num);
+
+	return err;
+}
+
--- a/source/par2j/repair.h
+++ b/source/par2j/repair.h
@@ -0,0 +1,71 @@
+#ifndef _REPAIR_H_
+#define _REPAIR_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+// ソース・ファイル情報を確認して集計する
+int set_file_data(
+	char *ascii_buf,	// 作業用
+	file_ctx_r *files);
+
+// ソース・ファイルの検査結果を集計して、修復方法を判定する
+int result_file_state(
+	char *ascii_buf,
+	int *result,
+	int parity_now,
+	int recovery_lost,
+	file_ctx_r *files,		// 各ソース・ファイルの情報
+	source_ctx_r *s_blk);	// 各ソース・ブロックの情報
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+// 簡単な修復を行う
+int simple_repair(
+	char *ascii_buf,
+	int need_repair,
+	file_ctx_r *files);		// 各ソース・ファイルの情報
+
+// 4バイトのソース・ブロックを逆算してソース・ファイルに書き込む
+int restore_block4(
+	wchar_t *file_path,
+	file_ctx_r *files,		// 各ソース・ファイルの情報
+	source_ctx_r *s_blk);	// 各ソース・ブロックの情報
+
+// ソース・ブロックを流用または逆算してソース・ファイルに書き込む
+int restore_block(
+	wchar_t *file_path,
+	int reuse_num,			// 流用可能なソース・ブロックの数
+	file_ctx_r *files,		// 各ソース・ファイルの情報
+	source_ctx_r *s_blk);	// 各ソース・ブロックの情報
+
+// 正しく修復できたか調べて結果表示する
+int verify_repair(
+	wchar_t *file_path,
+	char *ascii_buf,
+	file_ctx_r *files,		// 各ソース・ファイルの情報
+	source_ctx_r *s_blk);	// 各ソース・ブロックの情報
+
+// 作業用のソース・ファイルを削除する
+void delete_work_file(
+	wchar_t *file_path,
+	file_ctx_r *files);		// 各ソース・ファイルの情報
+
+// ブロック単位の復元ができなくても、再構築したファイルで置き換える
+void replace_incomplete(
+	wchar_t *file_path,
+	char *ascii_buf,
+	file_ctx_r *files,		// 各ソース・ファイルの情報
+	source_ctx_r *s_blk);	// 各ソース・ブロックの情報
+
+// リカバリ・ファイルを削除する（Useless状態だったのは無視する）
+int purge_recovery_file(void);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/source/par2j/res_par2j.rc
+++ b/source/par2j/res_par2j.rc
@@ -0,0 +1,26 @@
+1 RT_STRING ".\\source.cl"
+
+1 VERSIONINFO
+FILEVERSION 1,3,2,8
+PRODUCTVERSION 1,3,2,0
+FILEOS 0x40004
+FILETYPE 0x1
+{
+BLOCK "StringFileInfo"
+{
+	BLOCK "040904B0"
+	{
+		VALUE "FileDescription", "PAR2 client"
+		VALUE "LegalCopyright", "Copyright (C) 2023 Yutaka Sawada"
+		VALUE "ProductName", "par2j"
+		VALUE "FileVersion", "1.3.2.8"
+		VALUE "ProductVersion", "1.3.2.0"
+	}
+}
+
+BLOCK "VarFileInfo"
+{
+	VALUE "Translation", 0x0409 0x04B0
+}
+}
+
--- a/source/par2j/rs_decode.c
+++ b/source/par2j/rs_decode.c
--- a/source/par2j/rs_decode.h
+++ b/source/par2j/rs_decode.h
@@ -0,0 +1,57 @@
+#ifndef _RS_DECODE_H_
+#define _RS_DECODE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+int decode_method1(	// ソース・ブロックが一個だけの場合
+	wchar_t *file_path,
+	HANDLE *rcv_hFile,		// リカバリ・ファイルのハンドル
+	file_ctx_r *files,		// ソース・ファイルの情報
+	source_ctx_r *s_blk,	// ソース・ブロックの情報
+	parity_ctx_r *p_blk);	// パリティ・ブロックの情報
+
+int decode_method2(	// ソース・データを全て読み込む場合
+	wchar_t *file_path,
+	int block_lost,			// 失われたソース・ブロックの数
+	HANDLE *rcv_hFile,		// リカバリ・ファイルのハンドル
+	file_ctx_r *files,		// ソース・ファイルの情報
+	source_ctx_r *s_blk,	// ソース・ブロックの情報
+	parity_ctx_r *p_blk,		// パリティ・ブロックの情報
+	unsigned short *mat);
+
+int decode_method3(	// 復元するブロックを全て保持できる場合
+	wchar_t *file_path,
+	int block_lost,			// 失われたソース・ブロックの数
+	HANDLE *rcv_hFile,		// リカバリ・ファイルのハンドル
+	file_ctx_r *files,		// ソース・ファイルの情報
+	source_ctx_r *s_blk,	// ソース・ブロックの情報
+	parity_ctx_r *p_blk,	// パリティ・ブロックの情報
+	unsigned short *mat);
+
+int decode_method4(	// 全てのブロックを断片的に保持する場合 (GPU対応)
+	wchar_t *file_path,
+	int block_lost,			// 失われたソース・ブロックの数
+	HANDLE *rcv_hFile,		// リカバリ・ファイルのハンドル
+	file_ctx_r *files,		// ソース・ファイルの情報
+	source_ctx_r *s_blk,	// ソース・ブロックの情報
+	parity_ctx_r *p_blk,		// パリティ・ブロックの情報
+	unsigned short *mat);
+
+int decode_method5(	// 復元するブロックだけ保持する場合 (GPU対応)
+	wchar_t *file_path,
+	int block_lost,			// 失われたソース・ブロックの数
+	HANDLE *rcv_hFile,		// リカバリ・ファイルのハンドル
+	file_ctx_r *files,		// ソース・ファイルの情報
+	source_ctx_r *s_blk,	// ソース・ブロックの情報
+	parity_ctx_r *p_blk,	// パリティ・ブロックの情報
+	unsigned short *mat);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/source/par2j/rs_encode.c
+++ b/source/par2j/rs_encode.c
--- a/source/par2j/rs_encode.h
+++ b/source/par2j/rs_encode.h
@@ -0,0 +1,72 @@
+#ifndef _RS_ENCODE_H_
+#define _RS_ENCODE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+int encode_method1(	// ソース・ブロックが一個だけの場合
+	wchar_t *file_path,
+	unsigned char *header_buf,	// Recovery Slice packet のパケット・ヘッダー
+	HANDLE *rcv_hFile,			// リカバリ・ファイルのハンドル
+	file_ctx_c *files,			// ソース・ファイルの情報
+	source_ctx_c *s_blk,		// ソース・ブロックの情報
+	parity_ctx_c *p_blk);		// パリティ・ブロックの情報
+
+
+int encode_method2(	// ソース・データを全て読み込む場合
+	wchar_t *file_path,
+	unsigned char *header_buf,	// Recovery Slice packet のパケット・ヘッダー
+	HANDLE *rcv_hFile,			// リカバリ・ファイルのハンドル
+	file_ctx_c *files,			// ソース・ファイルの情報
+	source_ctx_c *s_blk,		// ソース・ブロックの情報
+	parity_ctx_c *p_blk,		// パリティ・ブロックの情報
+	unsigned short *constant);
+
+int encode_method3(	// パリティ・ブロックを全て保持して、一度に書き込む場合
+	wchar_t *file_path,
+	wchar_t *recovery_path,		// 作業用
+	int packet_limit,			// リカバリ・ファイルのパケット繰り返しの制限
+	int block_distri,			// パリティ・ブロックの分配方法 (3-bit目は番号の付け方)
+	int packet_num,				// 共通パケットの数
+	unsigned char *common_buf,	// 共通パケットのバッファー
+	int common_size,			// 共通パケットのバッファー・サイズ
+	unsigned char *footer_buf,	// 末尾パケットのバッファー
+	int footer_size,			// 末尾パケットのバッファー・サイズ
+	HANDLE *rcv_hFile,			// リカバリ・ファイルのハンドル
+	file_ctx_c *files,			// ソース・ファイルの情報
+	source_ctx_c *s_blk,		// ソース・ブロックの情報
+	unsigned short *constant);
+
+
+int encode_method4(	// 全てのブロックを断片的に保持する場合 (GPU対応)
+	wchar_t *file_path,
+	unsigned char *header_buf,	// Recovery Slice packet のパケット・ヘッダー
+	HANDLE *rcv_hFile,			// リカバリ・ファイルのハンドル
+	file_ctx_c *files,			// ソース・ファイルの情報
+	source_ctx_c *s_blk,		// ソース・ブロックの情報
+	parity_ctx_c *p_blk,		// パリティ・ブロックの情報
+	unsigned short *constant);	// 複数ブロック分の領域を確保しておく？
+
+int encode_method5(	// ソース・ブロックの一部とパリティ・ブロックを保持する場合 (GPU対応)
+	wchar_t *file_path,
+	wchar_t *recovery_path,		// 作業用
+	int packet_limit,			// リカバリ・ファイルのパケット繰り返しの制限
+	int block_distri,			// パリティ・ブロックの分配方法 (3-bit目は番号の付け方)
+	int packet_num,				// 共通パケットの数
+	unsigned char *common_buf,	// 共通パケットのバッファー
+	int common_size,			// 共通パケットのバッファー・サイズ
+	unsigned char *footer_buf,	// 末尾パケットのバッファー
+	int footer_size,			// 末尾パケットのバッファー・サイズ
+	HANDLE *rcv_hFile,			// リカバリ・ファイルのハンドル
+	file_ctx_c *files,			// ソース・ファイルの情報
+	source_ctx_c *s_blk,		// ソース・ブロックの情報
+	unsigned short *constant);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/source/par2j/search.c
+++ b/source/par2j/search.c
--- a/source/par2j/search.h
+++ b/source/par2j/search.h
@@ -0,0 +1,42 @@
+#ifndef _SEARCH_H_
+#define _SEARCH_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+// リカバリ・ファイルを検索してファイル・リストに追加する
+int search_recovery_files(void);
+
+// Main packet を末尾から遡って探す
+int search_main_packet(
+	unsigned char *buf,		// 作業バッファー、File ID が戻る
+	unsigned char *set_id);	// Recovery Set ID が戻る
+
+// ファイル情報のパケットを探す
+int search_file_packet(
+	char *ascii_buf,
+	unsigned char *buf,		// 作業バッファー
+	wchar_t *par_commentU,	// Unicode コメントを入れる
+	unsigned char *set_id,	// Recovery Set ID を確かめる
+	int flag_sanitize,		// 0以外 = ファイル名を浄化する
+	file_ctx_r *files);		// 各ソース・ファイルの情報
+
+// 修復用のパケットを探す
+int search_recovery_packet(
+	char *ascii_buf,
+	unsigned char *buf,		// 作業バッファー
+	wchar_t *uni_buf,
+	unsigned char *set_id,	// Recovery Set ID を確かめる
+	HANDLE *rcv_hFile,		// 各リカバリ・ファイルのハンドル (verify なら NULL)
+	file_ctx_r *files,		// 各ソース・ファイルの情報
+	source_ctx_r *s_blk,	// 各ソース・ブロックの情報
+	parity_ctx_r *p_blk);	// 各パリティ・ブロックの情報
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/source/par2j/source.cl
+++ b/source/par2j/source.cl
@@ -0,0 +1,187 @@
+void calc_table(__local uint *mtab, int id, int factor)
+{
+	int i, sum = 0;
+
+	for (i = 0; i < 8; i++){
+		sum = (id & (1 << i)) ? (sum ^ factor) : sum;
+		factor = (factor & 0x8000) ? ((factor << 1) ^ 0x1100B) : (factor << 1);
+	}
+	mtab[id] = sum;
+
+	sum = (sum << 4) ^ (((sum << 16) >> 31) & 0x88058) ^ (((sum << 17) >> 31) & 0x4402C) ^ (((sum << 18) >> 31) & 0x22016) ^ (((sum << 19) >> 31) & 0x1100B);
+	sum = (sum << 4) ^ (((sum << 16) >> 31) & 0x88058) ^ (((sum << 17) >> 31) & 0x4402C) ^ (((sum << 18) >> 31) & 0x22016) ^ (((sum << 19) >> 31) & 0x1100B);
+
+	mtab[id + 256] = sum;
+}
+
+__kernel void method0(
+	__global uint *src,
+	__global uint *dst,
+	__global ushort *factors,
+	int blk_num)
+{
+	__local uint mtab[512];
+	int i, blk;
+	uint v, sum;
+	const int work_id = get_global_id(0);
+	const int work_size = get_global_size(0);
+	const int table_id = get_local_id(0);
+
+	for (i = work_id; i < BLK_SIZE; i += work_size)
+		dst[i] = 0;
+
+	for (blk = 0; blk < blk_num; blk++){
+		calc_table(mtab, table_id, factors[blk]);
+		barrier(CLK_LOCAL_MEM_FENCE);
+
+		for (i = work_id; i < BLK_SIZE; i += work_size){
+			v = src[i];
+			sum = mtab[(uchar)(v >> 16)] ^ mtab[256 + (v >> 24)];
+			sum <<= 16;
+			sum ^= mtab[(uchar)v] ^ mtab[256 + (uchar)(v >> 8)];
+			dst[i] ^= sum;
+		}
+		src += BLK_SIZE;
+		barrier(CLK_LOCAL_MEM_FENCE);
+	}
+}
+
+__kernel void method2(
+	__global uint *src,
+	__global uint *dst,
+	__global ushort *factors,
+	int blk_num)
+{
+	__local uint mtab[512];
+	int i, blk, pos;
+	uint lo, hi, sum1, sum2;
+	const int work_id = get_global_id(0) * 2;
+	const int work_size = get_global_size(0) * 2;
+	const int table_id = get_local_id(0);
+
+	for (i = work_id; i < BLK_SIZE; i += work_size){
+		dst[i    ] = 0;
+		dst[i + 1] = 0;
+	}
+
+	for (blk = 0; blk < blk_num; blk++){
+		calc_table(mtab, table_id, factors[blk]);
+		barrier(CLK_LOCAL_MEM_FENCE);
+
+		for (i = work_id; i < BLK_SIZE; i += work_size){
+			pos = (i & ~7) + ((i & 7) >> 1);
+			lo = src[pos    ];
+			hi = src[pos + 4];
+			sum1 = mtab[(uchar)(lo >> 16)] ^ mtab[256 + (uchar)(hi >> 16)];
+			sum2 = mtab[lo >> 24] ^ mtab[256 + (hi >> 24)];
+			sum1 <<= 16;
+			sum2 <<= 16;
+			sum1 ^= mtab[(uchar)lo] ^ mtab[256 + (uchar)hi];
+			sum2 ^= mtab[(uchar)(lo >> 8)] ^ mtab[256 + (uchar)(hi >> 8)];
+			dst[pos    ] ^= (sum1 & 0x00FF00FF) | ((sum2 & 0x00FF00FF) << 8);
+			dst[pos + 4] ^= ((sum1 & 0xFF00FF00) >> 8) | (sum2 & 0xFF00FF00);
+		}
+		src += BLK_SIZE;
+		barrier(CLK_LOCAL_MEM_FENCE);
+	}
+}
+
+__kernel void method3(
+	__global uint *src,
+	__global uint *dst,
+	__global ushort *factors,
+	int blk_num)
+{
+	__global uint *blk_src;
+	__local uint mtab[512];
+	int i, blk, chk_size, remain, pos;
+	uint lo, hi, sum1, sum2;
+	const int work_id = get_global_id(0) * 2;
+	const int work_size = get_global_size(0) * 2;
+	const int table_id = get_local_id(0);
+
+	remain = BLK_SIZE;
+	chk_size = CHK_SIZE;
+	while (remain > 0){
+		if (chk_size > remain)
+			chk_size = remain;
+
+		for (i = work_id; i < chk_size; i += work_size){
+			dst[i    ] = 0;
+			dst[i + 1] = 0;
+		}
+
+		blk_src = src;
+		for (blk = 0; blk < blk_num; blk++){
+			calc_table(mtab, table_id, factors[blk]);
+			barrier(CLK_LOCAL_MEM_FENCE);
+
+			for (i = work_id; i < chk_size; i += work_size){
+				pos = (i & ~7) + ((i & 7) >> 1);
+				lo = blk_src[pos    ];
+				hi = blk_src[pos + 4];
+				sum1 = mtab[(uchar)(lo >> 16)] ^ mtab[256 + (uchar)(hi >> 16)];
+				sum2 = mtab[lo >> 24] ^ mtab[256 + (hi >> 24)];
+				sum1 <<= 16;
+				sum2 <<= 16;
+				sum1 ^= mtab[(uchar)lo] ^ mtab[256 + (uchar)hi];
+				sum2 ^= mtab[(uchar)(lo >> 8)] ^ mtab[256 + (uchar)(hi >> 8)];
+				dst[pos    ] ^= (sum1 & 0x00FF00FF) | ((sum2 & 0x00FF00FF) << 8);
+				dst[pos + 4] ^= ((sum1 & 0xFF00FF00) >> 8) | (sum2 & 0xFF00FF00);
+			}
+			blk_src += BLK_SIZE;
+			barrier(CLK_LOCAL_MEM_FENCE);
+		}
+
+		src += CHK_SIZE;
+		dst += CHK_SIZE;
+		remain -= CHK_SIZE;
+	}
+}
+
+__kernel void method4(
+	__global uint *src,
+	__global uint *dst,
+	__global ushort *factors,
+	int blk_num)
+{
+	__local int table[16];
+	__local uint cache[256];
+	int i, j, blk, pos, sht, mask;
+	uint sum;
+	const int work_id = get_global_id(0);
+	const int work_size = get_global_size(0);
+
+	for (i = work_id; i < BLK_SIZE; i += work_size)
+		dst[i] = 0;
+
+	for (blk = 0; blk < blk_num; blk++){
+		if (get_local_id(0) == 0){
+			pos = factors[blk] << 16;
+			table[0] = pos;
+			for (j = 1; j < 16; j++){
+				pos = (pos << 1) ^ ((pos >> 31) & 0x100B0000);
+				table[j] = pos;
+			}
+		}
+		barrier(CLK_LOCAL_MEM_FENCE);
+
+		for (i = work_id; i < BLK_SIZE; i += work_size){
+			pos = i & 255;
+			cache[pos] = src[i];
+			barrier(CLK_LOCAL_MEM_FENCE);
+
+			sum = 0;
+			sht = (i & 60) >> 2;
+			pos &= ~60;
+			for (j = 15; j >= 0; j--){
+				mask = (table[j] << sht) >> 31;
+				sum ^= mask & cache[pos];
+				pos += 4;
+			}
+			dst[i] ^= sum;
+			barrier(CLK_LOCAL_MEM_FENCE);
+		}
+		src += BLK_SIZE;
+	}
+}
--- a/source/par2j/verify.c
+++ b/source/par2j/verify.c
--- a/source/par2j/verify.h
+++ b/source/par2j/verify.h
@@ -0,0 +1,63 @@
+#ifndef _VERIFY_H_
+#define _VERIFY_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+// スライス検査の準備をする
+int init_verification(
+	file_ctx_r *files,		// 各ソース・ファイルの情報
+	source_ctx_r *s_blk,	// 各ソース・ブロックの情報
+	slice_ctx *sc);
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+// ソース・ファイルのスライスを探す (分割ファイルも)
+int check_file_slice(
+	char *ascii_buf,		// 作業用
+	wchar_t *file_path,		// 作業用
+	int num,				// file_ctx におけるファイル番号
+	file_ctx_r *files,		// 各ソース・ファイルの情報
+	source_ctx_r *s_blk,	// 各ソース・ブロックの情報
+	slice_ctx *sc);			// スライス検査用の情報
+
+// 分割ファイルと類似名ファイルに含まれるスライスを探す
+int search_file_split(
+	char *ascii_buf,		// 作業用
+	wchar_t *file_path,		// 作業用
+	int num,				// file_ctx におけるファイル番号
+	file_ctx_r *files,		// 各ソース・ファイルの情報
+	source_ctx_r *s_blk,	// 各ソース・ブロックの情報
+	slice_ctx *sc);			// スライス検査用の情報
+
+// 指定された外部ファイルを検査する
+int check_external_file(
+	char *ascii_buf,		// 作業用
+	wchar_t *file_path,		// 作業用
+	file_ctx_r *files,		// 各ソース・ファイルの情報
+	source_ctx_r *s_blk,	// 各ソース・ブロックの情報
+	slice_ctx *sc);			// スライス検査用の情報
+
+// 基準ディレクトリ内を検索して、名前が異なってるソース・ファイルを探す
+int search_additional_file(
+	char *ascii_buf,		// 作業用
+	wchar_t *find_path,		// 作業用
+	file_ctx_r *files,		// 各ソース・ファイルの情報
+	source_ctx_r *s_blk,	// 各ソース・ブロックの情報
+	slice_ctx *sc);			// スライス検査用の情報
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+// スライスを逆算するか、共通してるスライスを探す
+int search_calculable_slice(
+	file_ctx_r *files,		// 各ソース・ファイルの情報
+	source_ctx_r *s_blk);	// 各ソース・ブロックの情報
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/source/par2j/version.h
+++ b/source/par2j/version.h
@@ -0,0 +1,2 @@
+#define FILE_VERSION "1.3.2.8"	// ファイルのバージョン番号
+#define PRODUCT_VERSION "1.3.2"	// 製品のバージョン番号