Skip to content
Snippets Groups Projects
Commit 8fea4416 authored by Frederik Hennig's avatar Frederik Hennig
Browse files

test automatic block size in linear3d

parent 455b455e
No related branches found
No related tags found
1 merge request!24Extend Support for CUDA and HIP kernel invocations
Pipeline #75638 canceled
......@@ -42,7 +42,8 @@ int main(void)
std::mt19937 gen{rd()};
std::uniform_real_distribution<double> distrib{-1.0, 1.0};
auto check = [&](std::function< void () > invoke) {
auto check = [&](std::function<void()> invoke)
{
for (size_t i = 0; i < items; ++i)
{
data_src[i] = distrib(gen);
......@@ -62,24 +63,33 @@ int main(void)
}
};
check([&]() {
check([&]()
{
/* Linear3D Dynamic */
dim3 blockSize{64, 8, 1};
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::linear3d::scaleKernel(blockSize, dst, src, stream);
checkCudaError(cudaStreamSynchronize(stream));
});
checkCudaError(cudaStreamSynchronize(stream)); });
check([&]()
{
/* Linear3D Automatic */
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::linear3d::scaleKernel(dst, src, stream);
checkCudaError(cudaStreamSynchronize(stream)); });
check([&]() {
check([&]()
{
/* Blockwise4D Automatic */
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::blockwise4d::scaleKernel(dst, src, stream);
checkCudaError(cudaStreamSynchronize(stream));
});
checkCudaError(cudaStreamSynchronize(stream)); });
check([&]() {
check([&]()
{
/* Linear3D Manual */
dim3 blockSize{32, 8, 1};
dim3 gridSize{5, 4, 23};
......@@ -87,18 +97,17 @@ int main(void)
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::linear3d_manual::scaleKernel(blockSize, dst, gridSize, src, stream);
checkCudaError(cudaStreamSynchronize(stream));
});
checkCudaError(cudaStreamSynchronize(stream)); });
check([&]() {
check([&]()
{
/* Blockwise4D Manual */
dim3 blockSize{132, 1, 1};
dim3 gridSize{25, 23, 1};
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::blockwise4d_manual::scaleKernel(blockSize, dst, gridSize, src, stream);
checkCudaError(cudaStreamSynchronize(stream));
});
checkCudaError(cudaStreamSynchronize(stream)); });
checkCudaError(cudaFree(data_src));
checkCudaError(cudaFree(data_dst));
......
......@@ -35,6 +35,21 @@ with SourceFileGenerator() as sfg:
sfg.gpu_invoke(khandle, block_size=block_size, stream=stream),
)
with sfg.namespace("linear3d_automatic"):
cfg = base_config.copy()
cfg.gpu.indexing_scheme = "linear3d"
khandle = sfg.kernels.create(asm, "scale", cfg)
sfg.function("scaleKernel")(
sfg.map_field(
src, std.mdspan.from_field(src, ref=True, layout_policy="layout_right")
),
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(khandle, stream=stream),
)
with sfg.namespace("blockwise4d"):
cfg = base_config.copy()
cfg.gpu.indexing_scheme = "blockwise4d"
......@@ -63,7 +78,9 @@ with SourceFileGenerator() as sfg:
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(khandle, block_size=block_size, grid_size=grid_size, stream=stream),
sfg.gpu_invoke(
khandle, block_size=block_size, grid_size=grid_size, stream=stream
),
)
with sfg.namespace("blockwise4d_manual"):
......@@ -79,5 +96,7 @@ with SourceFileGenerator() as sfg:
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(khandle, block_size=block_size, grid_size=grid_size, stream=stream),
sfg.gpu_invoke(
khandle, block_size=block_size, grid_size=grid_size, stream=stream
),
)
......@@ -42,7 +42,8 @@ int main(void)
std::mt19937 gen{rd()};
std::uniform_real_distribution<double> distrib{-1.0, 1.0};
auto check = [&](std::function< void () > invoke) {
auto check = [&](std::function<void()> invoke)
{
for (size_t i = 0; i < items; ++i)
{
data_src[i] = distrib(gen);
......@@ -62,24 +63,33 @@ int main(void)
}
};
check([&]() {
check([&]()
{
/* Linear3D Dynamic */
dim3 blockSize{64, 8, 1};
hipStream_t stream;
checkHipError(hipStreamCreate(&stream));
gen::linear3d::scaleKernel(blockSize, dst, src, stream);
checkHipError(hipStreamSynchronize(stream));
});
checkHipError(hipStreamSynchronize(stream)); });
check([&]()
{
/* Linear3D Automatic */
hipStream_t stream;
checkHipError(hipStreamCreate(&stream));
gen::linear3d_automatic::scaleKernel(dst, src, stream);
checkHipError(hipStreamSynchronize(stream)); });
check([&]() {
check([&]()
{
/* Blockwise4D Automatic */
hipStream_t stream;
checkHipError(hipStreamCreate(&stream));
gen::blockwise4d::scaleKernel(dst, src, stream);
checkHipError(hipStreamSynchronize(stream));
});
checkHipError(hipStreamSynchronize(stream)); });
check([&]() {
check([&]()
{
/* Linear3D Manual */
dim3 blockSize{32, 8, 1};
dim3 gridSize{5, 4, 23};
......@@ -87,18 +97,17 @@ int main(void)
hipStream_t stream;
checkHipError(hipStreamCreate(&stream));
gen::linear3d_manual::scaleKernel(blockSize, dst, gridSize, src, stream);
checkHipError(hipStreamSynchronize(stream));
});
checkHipError(hipStreamSynchronize(stream)); });
check([&]() {
check([&]()
{
/* Blockwise4D Manual */
dim3 blockSize{132, 1, 1};
dim3 gridSize{25, 23, 1};
hipStream_t stream;
checkHipError(hipStreamCreate(&stream));
gen::blockwise4d_manual::scaleKernel(blockSize, dst, gridSize, src, stream);
checkHipError(hipStreamSynchronize(stream));
});
checkHipError(hipStreamSynchronize(stream)); });
checkHipError(hipFree(data_src));
checkHipError(hipFree(data_dst));
......
......@@ -35,6 +35,21 @@ with SourceFileGenerator() as sfg:
sfg.gpu_invoke(khandle, block_size=block_size, stream=stream),
)
with sfg.namespace("linear3d_automatic"):
cfg = base_config.copy()
cfg.gpu.indexing_scheme = "linear3d"
khandle = sfg.kernels.create(asm, "scale", cfg)
sfg.function("scaleKernel")(
sfg.map_field(
src, std.mdspan.from_field(src, ref=True, layout_policy="layout_right")
),
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(khandle, stream=stream),
)
with sfg.namespace("blockwise4d"):
cfg = base_config.copy()
cfg.gpu.indexing_scheme = "blockwise4d"
......@@ -63,7 +78,9 @@ with SourceFileGenerator() as sfg:
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(khandle, block_size=block_size, grid_size=grid_size, stream=stream),
sfg.gpu_invoke(
khandle, block_size=block_size, grid_size=grid_size, stream=stream
),
)
with sfg.namespace("blockwise4d_manual"):
......@@ -79,5 +96,7 @@ with SourceFileGenerator() as sfg:
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(khandle, block_size=block_size, grid_size=grid_size, stream=stream),
sfg.gpu_invoke(
khandle, block_size=block_size, grid_size=grid_size, stream=stream
),
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment