Skip to content
Snippets Groups Projects

Extend Support for CUDA and HIP kernel invocations

Merged Frederik Hennig requested to merge fhennig/cuda-invoke into master
Compare and Show latest version
4 files
+ 87
31
Preferences
Compare changes
Files
4
#include "CudaKernels.hpp"
#include <cuda/cuda_runtime.h>
#include <cuda_runtime.h>
#include <experimental/mdspan>
#include <random>
@@ -42,7 +42,8 @@ int main(void)
std::mt19937 gen{rd()};
std::uniform_real_distribution<double> distrib{-1.0, 1.0};
auto check = [&](std::function< void () > invoke) {
auto check = [&](std::function<void()> invoke)
{
for (size_t i = 0; i < items; ++i)
{
data_src[i] = distrib(gen);
@@ -62,24 +63,33 @@ int main(void)
}
};
check([&]() {
check([&]()
{
/* Linear3D Dynamic */
dim3 blockSize{64, 8, 1};
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::linear3d::scaleKernel(blockSize, dst, src, stream);
checkCudaError(cudaStreamSynchronize(stream));
});
checkCudaError(cudaStreamSynchronize(stream)); });
check([&]()
{
/* Linear3D Automatic */
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::linear3d_automatic::scaleKernel(dst, src, stream);
checkCudaError(cudaStreamSynchronize(stream)); });
check([&]() {
check([&]()
{
/* Blockwise4D Automatic */
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::blockwise4d::scaleKernel(dst, src, stream);
checkCudaError(cudaStreamSynchronize(stream));
});
checkCudaError(cudaStreamSynchronize(stream)); });
check([&]() {
check([&]()
{
/* Linear3D Manual */
dim3 blockSize{32, 8, 1};
dim3 gridSize{5, 4, 23};
@@ -87,18 +97,17 @@ int main(void)
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::linear3d_manual::scaleKernel(blockSize, dst, gridSize, src, stream);
checkCudaError(cudaStreamSynchronize(stream));
});
checkCudaError(cudaStreamSynchronize(stream)); });
check([&]() {
check([&]()
{
/* Blockwise4D Manual */
dim3 blockSize{132, 1, 1};
dim3 gridSize{25, 23, 1};
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::blockwise4d_manual::scaleKernel(blockSize, dst, gridSize, src, stream);
checkCudaError(cudaStreamSynchronize(stream));
});
checkCudaError(cudaStreamSynchronize(stream)); });
checkCudaError(cudaFree(data_src));
checkCudaError(cudaFree(data_dst));