diff --git a/tests/generator_scripts/source/CudaKernels.harness.cpp b/tests/generator_scripts/source/CudaKernels.harness.cpp index b86a7c2146d9bd2cb30a8b91b03b7dd738208e33..e691129c347a9cfe5b45cae62953658654abcfeb 100644 --- a/tests/generator_scripts/source/CudaKernels.harness.cpp +++ b/tests/generator_scripts/source/CudaKernels.harness.cpp @@ -42,7 +42,8 @@ int main(void) std::mt19937 gen{rd()}; std::uniform_real_distribution<double> distrib{-1.0, 1.0}; - auto check = [&](std::function< void () > invoke) { + auto check = [&](std::function<void()> invoke) + { for (size_t i = 0; i < items; ++i) { data_src[i] = distrib(gen); @@ -62,24 +63,33 @@ int main(void) } }; - check([&]() { + check([&]() + { /* Linear3D Dynamic */ dim3 blockSize{64, 8, 1}; cudaStream_t stream; checkCudaError(cudaStreamCreate(&stream)); gen::linear3d::scaleKernel(blockSize, dst, src, stream); - checkCudaError(cudaStreamSynchronize(stream)); - }); + checkCudaError(cudaStreamSynchronize(stream)); }); + + check([&]() + { + /* Linear3D Automatic */ + cudaStream_t stream; + checkCudaError(cudaStreamCreate(&stream)); + gen::linear3d::scaleKernel(dst, src, stream); + checkCudaError(cudaStreamSynchronize(stream)); }); - check([&]() { + check([&]() + { /* Blockwise4D Automatic */ cudaStream_t stream; checkCudaError(cudaStreamCreate(&stream)); gen::blockwise4d::scaleKernel(dst, src, stream); - checkCudaError(cudaStreamSynchronize(stream)); - }); + checkCudaError(cudaStreamSynchronize(stream)); }); - check([&]() { + check([&]() + { /* Linear3D Manual */ dim3 blockSize{32, 8, 1}; dim3 gridSize{5, 4, 23}; @@ -87,18 +97,17 @@ int main(void) cudaStream_t stream; checkCudaError(cudaStreamCreate(&stream)); gen::linear3d_manual::scaleKernel(blockSize, dst, gridSize, src, stream); - checkCudaError(cudaStreamSynchronize(stream)); - }); + checkCudaError(cudaStreamSynchronize(stream)); }); - check([&]() { + check([&]() + { /* Blockwise4D Manual */ dim3 blockSize{132, 1, 1}; dim3 gridSize{25, 23, 1}; cudaStream_t stream; checkCudaError(cudaStreamCreate(&stream)); gen::blockwise4d_manual::scaleKernel(blockSize, dst, gridSize, src, stream); - checkCudaError(cudaStreamSynchronize(stream)); - }); + checkCudaError(cudaStreamSynchronize(stream)); }); checkCudaError(cudaFree(data_src)); checkCudaError(cudaFree(data_dst)); diff --git a/tests/generator_scripts/source/CudaKernels.py b/tests/generator_scripts/source/CudaKernels.py index dc7e643bab0d33aa957d3d7957f679c0d9729086..8572782bba7f1d924726394d4326489154c13235 100644 --- a/tests/generator_scripts/source/CudaKernels.py +++ b/tests/generator_scripts/source/CudaKernels.py @@ -35,6 +35,21 @@ with SourceFileGenerator() as sfg: sfg.gpu_invoke(khandle, block_size=block_size, stream=stream), ) + with sfg.namespace("linear3d_automatic"): + cfg = base_config.copy() + cfg.gpu.indexing_scheme = "linear3d" + khandle = sfg.kernels.create(asm, "scale", cfg) + + sfg.function("scaleKernel")( + sfg.map_field( + src, std.mdspan.from_field(src, ref=True, layout_policy="layout_right") + ), + sfg.map_field( + dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right") + ), + sfg.gpu_invoke(khandle, stream=stream), + ) + with sfg.namespace("blockwise4d"): cfg = base_config.copy() cfg.gpu.indexing_scheme = "blockwise4d" @@ -63,7 +78,9 @@ with SourceFileGenerator() as sfg: sfg.map_field( dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right") ), - sfg.gpu_invoke(khandle, block_size=block_size, grid_size=grid_size, stream=stream), + sfg.gpu_invoke( + khandle, block_size=block_size, grid_size=grid_size, stream=stream + ), ) with sfg.namespace("blockwise4d_manual"): @@ -79,5 +96,7 @@ with SourceFileGenerator() as sfg: sfg.map_field( dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right") ), - sfg.gpu_invoke(khandle, block_size=block_size, grid_size=grid_size, stream=stream), + sfg.gpu_invoke( + khandle, block_size=block_size, grid_size=grid_size, stream=stream + ), ) diff --git a/tests/generator_scripts/source/HipKernels.harness.cpp b/tests/generator_scripts/source/HipKernels.harness.cpp index b6d2d2dd6a47671ca50a082f8357fe8f96b88b64..2bf7b83c1dae274c16b5f5e286e3edd6024280e5 100644 --- a/tests/generator_scripts/source/HipKernels.harness.cpp +++ b/tests/generator_scripts/source/HipKernels.harness.cpp @@ -42,7 +42,8 @@ int main(void) std::mt19937 gen{rd()}; std::uniform_real_distribution<double> distrib{-1.0, 1.0}; - auto check = [&](std::function< void () > invoke) { + auto check = [&](std::function<void()> invoke) + { for (size_t i = 0; i < items; ++i) { data_src[i] = distrib(gen); @@ -62,24 +63,33 @@ int main(void) } }; - check([&]() { + check([&]() + { /* Linear3D Dynamic */ dim3 blockSize{64, 8, 1}; hipStream_t stream; checkHipError(hipStreamCreate(&stream)); gen::linear3d::scaleKernel(blockSize, dst, src, stream); - checkHipError(hipStreamSynchronize(stream)); - }); + checkHipError(hipStreamSynchronize(stream)); }); + + check([&]() + { + /* Linear3D Automatic */ + hipStream_t stream; + checkHipError(hipStreamCreate(&stream)); + gen::linear3d_automatic::scaleKernel(dst, src, stream); + checkHipError(hipStreamSynchronize(stream)); }); - check([&]() { + check([&]() + { /* Blockwise4D Automatic */ hipStream_t stream; checkHipError(hipStreamCreate(&stream)); gen::blockwise4d::scaleKernel(dst, src, stream); - checkHipError(hipStreamSynchronize(stream)); - }); + checkHipError(hipStreamSynchronize(stream)); }); - check([&]() { + check([&]() + { /* Linear3D Manual */ dim3 blockSize{32, 8, 1}; dim3 gridSize{5, 4, 23}; @@ -87,18 +97,17 @@ int main(void) hipStream_t stream; checkHipError(hipStreamCreate(&stream)); gen::linear3d_manual::scaleKernel(blockSize, dst, gridSize, src, stream); - checkHipError(hipStreamSynchronize(stream)); - }); + checkHipError(hipStreamSynchronize(stream)); }); - check([&]() { + check([&]() + { /* Blockwise4D Manual */ dim3 blockSize{132, 1, 1}; dim3 gridSize{25, 23, 1}; hipStream_t stream; checkHipError(hipStreamCreate(&stream)); gen::blockwise4d_manual::scaleKernel(blockSize, dst, gridSize, src, stream); - checkHipError(hipStreamSynchronize(stream)); - }); + checkHipError(hipStreamSynchronize(stream)); }); checkHipError(hipFree(data_src)); checkHipError(hipFree(data_dst)); diff --git a/tests/generator_scripts/source/HipKernels.py b/tests/generator_scripts/source/HipKernels.py index 35315b8eeda88560a07f7bd54dc4639f7b7eb13d..78464b5be1e4b24863a8cab62d96fd55ba60d544 100644 --- a/tests/generator_scripts/source/HipKernels.py +++ b/tests/generator_scripts/source/HipKernels.py @@ -35,6 +35,21 @@ with SourceFileGenerator() as sfg: sfg.gpu_invoke(khandle, block_size=block_size, stream=stream), ) + with sfg.namespace("linear3d_automatic"): + cfg = base_config.copy() + cfg.gpu.indexing_scheme = "linear3d" + khandle = sfg.kernels.create(asm, "scale", cfg) + + sfg.function("scaleKernel")( + sfg.map_field( + src, std.mdspan.from_field(src, ref=True, layout_policy="layout_right") + ), + sfg.map_field( + dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right") + ), + sfg.gpu_invoke(khandle, stream=stream), + ) + with sfg.namespace("blockwise4d"): cfg = base_config.copy() cfg.gpu.indexing_scheme = "blockwise4d" @@ -63,7 +78,9 @@ with SourceFileGenerator() as sfg: sfg.map_field( dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right") ), - sfg.gpu_invoke(khandle, block_size=block_size, grid_size=grid_size, stream=stream), + sfg.gpu_invoke( + khandle, block_size=block_size, grid_size=grid_size, stream=stream + ), ) with sfg.namespace("blockwise4d_manual"): @@ -79,5 +96,7 @@ with SourceFileGenerator() as sfg: sfg.map_field( dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right") ), - sfg.gpu_invoke(khandle, block_size=block_size, grid_size=grid_size, stream=stream), + sfg.gpu_invoke( + khandle, block_size=block_size, grid_size=grid_size, stream=stream + ), )