Skip to content
Snippets Groups Projects
Commit 8fea4416 authored by Frederik Hennig's avatar Frederik Hennig
Browse files

test automatic block size in linear3d

parent 455b455e
1 merge request!24Extend Support for CUDA and HIP kernel invocations
Pipeline #75638 canceled with stages
in 29 seconds
......@@ -42,7 +42,8 @@ int main(void)
std::mt19937 gen{rd()};
std::uniform_real_distribution<double> distrib{-1.0, 1.0};
auto check = [&](std::function< void () > invoke) {
auto check = [&](std::function<void()> invoke)
{
for (size_t i = 0; i < items; ++i)
{
data_src[i] = distrib(gen);
......@@ -62,24 +63,33 @@ int main(void)
}
};
check([&]() {
check([&]()
{
/* Linear3D Dynamic */
dim3 blockSize{64, 8, 1};
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::linear3d::scaleKernel(blockSize, dst, src, stream);
checkCudaError(cudaStreamSynchronize(stream));
});
checkCudaError(cudaStreamSynchronize(stream)); });
check([&]()
{
/* Linear3D Automatic */
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::linear3d::scaleKernel(dst, src, stream);
checkCudaError(cudaStreamSynchronize(stream)); });
check([&]() {
check([&]()
{
/* Blockwise4D Automatic */
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::blockwise4d::scaleKernel(dst, src, stream);
checkCudaError(cudaStreamSynchronize(stream));
});
checkCudaError(cudaStreamSynchronize(stream)); });
check([&]() {
check([&]()
{
/* Linear3D Manual */
dim3 blockSize{32, 8, 1};
dim3 gridSize{5, 4, 23};
......@@ -87,18 +97,17 @@ int main(void)
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::linear3d_manual::scaleKernel(blockSize, dst, gridSize, src, stream);
checkCudaError(cudaStreamSynchronize(stream));
});
checkCudaError(cudaStreamSynchronize(stream)); });
check([&]() {
check([&]()
{
/* Blockwise4D Manual */
dim3 blockSize{132, 1, 1};
dim3 gridSize{25, 23, 1};
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::blockwise4d_manual::scaleKernel(blockSize, dst, gridSize, src, stream);
checkCudaError(cudaStreamSynchronize(stream));
});
checkCudaError(cudaStreamSynchronize(stream)); });
checkCudaError(cudaFree(data_src));
checkCudaError(cudaFree(data_dst));
......
......@@ -35,6 +35,21 @@ with SourceFileGenerator() as sfg:
sfg.gpu_invoke(khandle, block_size=block_size, stream=stream),
)
with sfg.namespace("linear3d_automatic"):
cfg = base_config.copy()
cfg.gpu.indexing_scheme = "linear3d"
khandle = sfg.kernels.create(asm, "scale", cfg)
sfg.function("scaleKernel")(
sfg.map_field(
src, std.mdspan.from_field(src, ref=True, layout_policy="layout_right")
),
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(khandle, stream=stream),
)
with sfg.namespace("blockwise4d"):
cfg = base_config.copy()
cfg.gpu.indexing_scheme = "blockwise4d"
......@@ -63,7 +78,9 @@ with SourceFileGenerator() as sfg:
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(khandle, block_size=block_size, grid_size=grid_size, stream=stream),
sfg.gpu_invoke(
khandle, block_size=block_size, grid_size=grid_size, stream=stream
),
)
with sfg.namespace("blockwise4d_manual"):
......@@ -79,5 +96,7 @@ with SourceFileGenerator() as sfg:
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(khandle, block_size=block_size, grid_size=grid_size, stream=stream),
sfg.gpu_invoke(
khandle, block_size=block_size, grid_size=grid_size, stream=stream
),
)
......@@ -42,7 +42,8 @@ int main(void)
std::mt19937 gen{rd()};
std::uniform_real_distribution<double> distrib{-1.0, 1.0};
auto check = [&](std::function< void () > invoke) {
auto check = [&](std::function<void()> invoke)
{
for (size_t i = 0; i < items; ++i)
{
data_src[i] = distrib(gen);
......@@ -62,24 +63,33 @@ int main(void)
}
};
check([&]() {
check([&]()
{
/* Linear3D Dynamic */
dim3 blockSize{64, 8, 1};
hipStream_t stream;
checkHipError(hipStreamCreate(&stream));
gen::linear3d::scaleKernel(blockSize, dst, src, stream);
checkHipError(hipStreamSynchronize(stream));
});
checkHipError(hipStreamSynchronize(stream)); });
check([&]()
{
/* Linear3D Automatic */
hipStream_t stream;
checkHipError(hipStreamCreate(&stream));
gen::linear3d_automatic::scaleKernel(dst, src, stream);
checkHipError(hipStreamSynchronize(stream)); });
check([&]() {
check([&]()
{
/* Blockwise4D Automatic */
hipStream_t stream;
checkHipError(hipStreamCreate(&stream));
gen::blockwise4d::scaleKernel(dst, src, stream);
checkHipError(hipStreamSynchronize(stream));
});
checkHipError(hipStreamSynchronize(stream)); });
check([&]() {
check([&]()
{
/* Linear3D Manual */
dim3 blockSize{32, 8, 1};
dim3 gridSize{5, 4, 23};
......@@ -87,18 +97,17 @@ int main(void)
hipStream_t stream;
checkHipError(hipStreamCreate(&stream));
gen::linear3d_manual::scaleKernel(blockSize, dst, gridSize, src, stream);
checkHipError(hipStreamSynchronize(stream));
});
checkHipError(hipStreamSynchronize(stream)); });
check([&]() {
check([&]()
{
/* Blockwise4D Manual */
dim3 blockSize{132, 1, 1};
dim3 gridSize{25, 23, 1};
hipStream_t stream;
checkHipError(hipStreamCreate(&stream));
gen::blockwise4d_manual::scaleKernel(blockSize, dst, gridSize, src, stream);
checkHipError(hipStreamSynchronize(stream));
});
checkHipError(hipStreamSynchronize(stream)); });
checkHipError(hipFree(data_src));
checkHipError(hipFree(data_dst));
......
......@@ -35,6 +35,21 @@ with SourceFileGenerator() as sfg:
sfg.gpu_invoke(khandle, block_size=block_size, stream=stream),
)
with sfg.namespace("linear3d_automatic"):
cfg = base_config.copy()
cfg.gpu.indexing_scheme = "linear3d"
khandle = sfg.kernels.create(asm, "scale", cfg)
sfg.function("scaleKernel")(
sfg.map_field(
src, std.mdspan.from_field(src, ref=True, layout_policy="layout_right")
),
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(khandle, stream=stream),
)
with sfg.namespace("blockwise4d"):
cfg = base_config.copy()
cfg.gpu.indexing_scheme = "blockwise4d"
......@@ -63,7 +78,9 @@ with SourceFileGenerator() as sfg:
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(khandle, block_size=block_size, grid_size=grid_size, stream=stream),
sfg.gpu_invoke(
khandle, block_size=block_size, grid_size=grid_size, stream=stream
),
)
with sfg.namespace("blockwise4d_manual"):
......@@ -79,5 +96,7 @@ with SourceFileGenerator() as sfg:
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(khandle, block_size=block_size, grid_size=grid_size, stream=stream),
sfg.gpu_invoke(
khandle, block_size=block_size, grid_size=grid_size, stream=stream
),
)
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment