diff --git a/src/pystencils/include/gpu_defines.h b/src/pystencils/include/gpu_defines.h index 5525bbc69188fa143ba9146d286a93d392f34bf2..34cff79dea2f14399622a0026e362a4832bd739c 100644 --- a/src/pystencils/include/gpu_defines.h +++ b/src/pystencils/include/gpu_defines.h @@ -13,10 +13,11 @@ typedef __hip_uint16_t uint16_t; typedef __hip_int16_t int16_t; #endif -#ifdef __CUDA_ARCH__ -// No direct implementation of atomic multiplication, minimum and maximum available +// No direct implementation for all atomic operations available // -> add support by custom implementations using a CAS mechanism +#if defined(__CUDA_ARCH__) || defined(__HIPCC_RTC__) + // - atomicMul (double/float) // see https://stackoverflow.com/questions/43354798/atomic-multiplication-and-division __device__ double atomicMul(double* address, double val) { @@ -43,6 +44,10 @@ __device__ float atomicMul(float* address, float val) { return __int_as_float(old); } +#endif + +#ifdef __CUDA_ARCH__ + // - atomicMin (double/float) // see https://stackoverflow.com/questions/17399119/how-do-i-use-atomicmax-on-floating-point-values-in-cuda __device__ __forceinline__ double atomicMin(double *address, double val) @@ -94,4 +99,5 @@ __device__ __forceinline__ float atomicMax(float *address, float val) } return __int_as_float(ret); } + #endif