Skip to content
Snippets Groups Projects
Commit 8fea4416 authored by Frederik Hennig's avatar Frederik Hennig
Browse files

test automatic block size in linear3d

parent 455b455e
Branches
No related tags found
1 merge request!24Extend Support for CUDA and HIP kernel invocations
Pipeline #75638 canceled
...@@ -42,7 +42,8 @@ int main(void) ...@@ -42,7 +42,8 @@ int main(void)
std::mt19937 gen{rd()}; std::mt19937 gen{rd()};
std::uniform_real_distribution<double> distrib{-1.0, 1.0}; std::uniform_real_distribution<double> distrib{-1.0, 1.0};
auto check = [&](std::function< void () > invoke) { auto check = [&](std::function<void()> invoke)
{
for (size_t i = 0; i < items; ++i) for (size_t i = 0; i < items; ++i)
{ {
data_src[i] = distrib(gen); data_src[i] = distrib(gen);
...@@ -62,24 +63,33 @@ int main(void) ...@@ -62,24 +63,33 @@ int main(void)
} }
}; };
check([&]() { check([&]()
{
/* Linear3D Dynamic */ /* Linear3D Dynamic */
dim3 blockSize{64, 8, 1}; dim3 blockSize{64, 8, 1};
cudaStream_t stream; cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream)); checkCudaError(cudaStreamCreate(&stream));
gen::linear3d::scaleKernel(blockSize, dst, src, stream); gen::linear3d::scaleKernel(blockSize, dst, src, stream);
checkCudaError(cudaStreamSynchronize(stream)); checkCudaError(cudaStreamSynchronize(stream)); });
});
check([&]()
{
/* Linear3D Automatic */
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::linear3d::scaleKernel(dst, src, stream);
checkCudaError(cudaStreamSynchronize(stream)); });
check([&]() { check([&]()
{
/* Blockwise4D Automatic */ /* Blockwise4D Automatic */
cudaStream_t stream; cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream)); checkCudaError(cudaStreamCreate(&stream));
gen::blockwise4d::scaleKernel(dst, src, stream); gen::blockwise4d::scaleKernel(dst, src, stream);
checkCudaError(cudaStreamSynchronize(stream)); checkCudaError(cudaStreamSynchronize(stream)); });
});
check([&]() { check([&]()
{
/* Linear3D Manual */ /* Linear3D Manual */
dim3 blockSize{32, 8, 1}; dim3 blockSize{32, 8, 1};
dim3 gridSize{5, 4, 23}; dim3 gridSize{5, 4, 23};
...@@ -87,18 +97,17 @@ int main(void) ...@@ -87,18 +97,17 @@ int main(void)
cudaStream_t stream; cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream)); checkCudaError(cudaStreamCreate(&stream));
gen::linear3d_manual::scaleKernel(blockSize, dst, gridSize, src, stream); gen::linear3d_manual::scaleKernel(blockSize, dst, gridSize, src, stream);
checkCudaError(cudaStreamSynchronize(stream)); checkCudaError(cudaStreamSynchronize(stream)); });
});
check([&]() { check([&]()
{
/* Blockwise4D Manual */ /* Blockwise4D Manual */
dim3 blockSize{132, 1, 1}; dim3 blockSize{132, 1, 1};
dim3 gridSize{25, 23, 1}; dim3 gridSize{25, 23, 1};
cudaStream_t stream; cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream)); checkCudaError(cudaStreamCreate(&stream));
gen::blockwise4d_manual::scaleKernel(blockSize, dst, gridSize, src, stream); gen::blockwise4d_manual::scaleKernel(blockSize, dst, gridSize, src, stream);
checkCudaError(cudaStreamSynchronize(stream)); checkCudaError(cudaStreamSynchronize(stream)); });
});
checkCudaError(cudaFree(data_src)); checkCudaError(cudaFree(data_src));
checkCudaError(cudaFree(data_dst)); checkCudaError(cudaFree(data_dst));
......
...@@ -35,6 +35,21 @@ with SourceFileGenerator() as sfg: ...@@ -35,6 +35,21 @@ with SourceFileGenerator() as sfg:
sfg.gpu_invoke(khandle, block_size=block_size, stream=stream), sfg.gpu_invoke(khandle, block_size=block_size, stream=stream),
) )
with sfg.namespace("linear3d_automatic"):
cfg = base_config.copy()
cfg.gpu.indexing_scheme = "linear3d"
khandle = sfg.kernels.create(asm, "scale", cfg)
sfg.function("scaleKernel")(
sfg.map_field(
src, std.mdspan.from_field(src, ref=True, layout_policy="layout_right")
),
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(khandle, stream=stream),
)
with sfg.namespace("blockwise4d"): with sfg.namespace("blockwise4d"):
cfg = base_config.copy() cfg = base_config.copy()
cfg.gpu.indexing_scheme = "blockwise4d" cfg.gpu.indexing_scheme = "blockwise4d"
...@@ -63,7 +78,9 @@ with SourceFileGenerator() as sfg: ...@@ -63,7 +78,9 @@ with SourceFileGenerator() as sfg:
sfg.map_field( sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right") dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
), ),
sfg.gpu_invoke(khandle, block_size=block_size, grid_size=grid_size, stream=stream), sfg.gpu_invoke(
khandle, block_size=block_size, grid_size=grid_size, stream=stream
),
) )
with sfg.namespace("blockwise4d_manual"): with sfg.namespace("blockwise4d_manual"):
...@@ -79,5 +96,7 @@ with SourceFileGenerator() as sfg: ...@@ -79,5 +96,7 @@ with SourceFileGenerator() as sfg:
sfg.map_field( sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right") dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
), ),
sfg.gpu_invoke(khandle, block_size=block_size, grid_size=grid_size, stream=stream), sfg.gpu_invoke(
khandle, block_size=block_size, grid_size=grid_size, stream=stream
),
) )
...@@ -42,7 +42,8 @@ int main(void) ...@@ -42,7 +42,8 @@ int main(void)
std::mt19937 gen{rd()}; std::mt19937 gen{rd()};
std::uniform_real_distribution<double> distrib{-1.0, 1.0}; std::uniform_real_distribution<double> distrib{-1.0, 1.0};
auto check = [&](std::function< void () > invoke) { auto check = [&](std::function<void()> invoke)
{
for (size_t i = 0; i < items; ++i) for (size_t i = 0; i < items; ++i)
{ {
data_src[i] = distrib(gen); data_src[i] = distrib(gen);
...@@ -62,24 +63,33 @@ int main(void) ...@@ -62,24 +63,33 @@ int main(void)
} }
}; };
check([&]() { check([&]()
{
/* Linear3D Dynamic */ /* Linear3D Dynamic */
dim3 blockSize{64, 8, 1}; dim3 blockSize{64, 8, 1};
hipStream_t stream; hipStream_t stream;
checkHipError(hipStreamCreate(&stream)); checkHipError(hipStreamCreate(&stream));
gen::linear3d::scaleKernel(blockSize, dst, src, stream); gen::linear3d::scaleKernel(blockSize, dst, src, stream);
checkHipError(hipStreamSynchronize(stream)); checkHipError(hipStreamSynchronize(stream)); });
});
check([&]()
{
/* Linear3D Automatic */
hipStream_t stream;
checkHipError(hipStreamCreate(&stream));
gen::linear3d_automatic::scaleKernel(dst, src, stream);
checkHipError(hipStreamSynchronize(stream)); });
check([&]() { check([&]()
{
/* Blockwise4D Automatic */ /* Blockwise4D Automatic */
hipStream_t stream; hipStream_t stream;
checkHipError(hipStreamCreate(&stream)); checkHipError(hipStreamCreate(&stream));
gen::blockwise4d::scaleKernel(dst, src, stream); gen::blockwise4d::scaleKernel(dst, src, stream);
checkHipError(hipStreamSynchronize(stream)); checkHipError(hipStreamSynchronize(stream)); });
});
check([&]() { check([&]()
{
/* Linear3D Manual */ /* Linear3D Manual */
dim3 blockSize{32, 8, 1}; dim3 blockSize{32, 8, 1};
dim3 gridSize{5, 4, 23}; dim3 gridSize{5, 4, 23};
...@@ -87,18 +97,17 @@ int main(void) ...@@ -87,18 +97,17 @@ int main(void)
hipStream_t stream; hipStream_t stream;
checkHipError(hipStreamCreate(&stream)); checkHipError(hipStreamCreate(&stream));
gen::linear3d_manual::scaleKernel(blockSize, dst, gridSize, src, stream); gen::linear3d_manual::scaleKernel(blockSize, dst, gridSize, src, stream);
checkHipError(hipStreamSynchronize(stream)); checkHipError(hipStreamSynchronize(stream)); });
});
check([&]() { check([&]()
{
/* Blockwise4D Manual */ /* Blockwise4D Manual */
dim3 blockSize{132, 1, 1}; dim3 blockSize{132, 1, 1};
dim3 gridSize{25, 23, 1}; dim3 gridSize{25, 23, 1};
hipStream_t stream; hipStream_t stream;
checkHipError(hipStreamCreate(&stream)); checkHipError(hipStreamCreate(&stream));
gen::blockwise4d_manual::scaleKernel(blockSize, dst, gridSize, src, stream); gen::blockwise4d_manual::scaleKernel(blockSize, dst, gridSize, src, stream);
checkHipError(hipStreamSynchronize(stream)); checkHipError(hipStreamSynchronize(stream)); });
});
checkHipError(hipFree(data_src)); checkHipError(hipFree(data_src));
checkHipError(hipFree(data_dst)); checkHipError(hipFree(data_dst));
......
...@@ -35,6 +35,21 @@ with SourceFileGenerator() as sfg: ...@@ -35,6 +35,21 @@ with SourceFileGenerator() as sfg:
sfg.gpu_invoke(khandle, block_size=block_size, stream=stream), sfg.gpu_invoke(khandle, block_size=block_size, stream=stream),
) )
with sfg.namespace("linear3d_automatic"):
cfg = base_config.copy()
cfg.gpu.indexing_scheme = "linear3d"
khandle = sfg.kernels.create(asm, "scale", cfg)
sfg.function("scaleKernel")(
sfg.map_field(
src, std.mdspan.from_field(src, ref=True, layout_policy="layout_right")
),
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(khandle, stream=stream),
)
with sfg.namespace("blockwise4d"): with sfg.namespace("blockwise4d"):
cfg = base_config.copy() cfg = base_config.copy()
cfg.gpu.indexing_scheme = "blockwise4d" cfg.gpu.indexing_scheme = "blockwise4d"
...@@ -63,7 +78,9 @@ with SourceFileGenerator() as sfg: ...@@ -63,7 +78,9 @@ with SourceFileGenerator() as sfg:
sfg.map_field( sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right") dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
), ),
sfg.gpu_invoke(khandle, block_size=block_size, grid_size=grid_size, stream=stream), sfg.gpu_invoke(
khandle, block_size=block_size, grid_size=grid_size, stream=stream
),
) )
with sfg.namespace("blockwise4d_manual"): with sfg.namespace("blockwise4d_manual"):
...@@ -79,5 +96,7 @@ with SourceFileGenerator() as sfg: ...@@ -79,5 +96,7 @@ with SourceFileGenerator() as sfg:
sfg.map_field( sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right") dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
), ),
sfg.gpu_invoke(khandle, block_size=block_size, grid_size=grid_size, stream=stream), sfg.gpu_invoke(
khandle, block_size=block_size, grid_size=grid_size, stream=stream
),
) )
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment