Skip to content
Snippets Groups Projects
Commit c7f1518e authored by Richard Angersbach's avatar Richard Angersbach
Browse files

Move manual atomic op implementations to new header

parent 3202d8b8
No related branches found
No related tags found
1 merge request!438Reduction Support
Pipeline #75829 failed
......@@ -197,7 +197,10 @@ class CudaPlatform(GenericGpu):
@property
def required_headers(self) -> set[str]:
return {'"pystencils_runtime/hip.h"'} # TODO: move to HipPlatform once it is introduced
return {
'"pystencils_runtime/hip.h"', # TODO: move to HipPlatform once it is introduced
'"gpu_atomics.h'
}
def materialize_iteration_space(
self, body: PsBlock, ispace: IterationSpace
......
#pragma once
// No direct implementation for all atomic operations available
// -> add support by custom implementations using a CAS mechanism
#if defined(__CUDA_ARCH__) || defined(__HIPCC_RTC__)
// - atomicMul (double/float)
// see https://stackoverflow.com/questions/43354798/atomic-multiplication-and-division
__device__ double atomicMul(double* address, double val) {
unsigned long long int* address_as_ull = (unsigned long long int*)address;
unsigned long long int oldValue = *address_as_ull, assumed;
do {
assumed = oldValue;
oldValue = atomicCAS(address_as_ull, assumed, __double_as_longlong(val *
__longlong_as_double(assumed)));
} while (assumed != oldValue);
return __longlong_as_double(oldValue);
}
__device__ float atomicMul(float* address, float val) {
int* address_as_int = (int*)address;
int old = *address_as_int;
int assumed;
do {
assumed = old;
old = atomicCAS(address_as_int, assumed, __float_as_int(val * __int_as_float(assumed)));
} while (assumed != old);
return __int_as_float(old);
}
#endif
#ifdef __CUDA_ARCH__
// - atomicMin (double/float)
// see https://stackoverflow.com/questions/17399119/how-do-i-use-atomicmax-on-floating-point-values-in-cuda
__device__ __forceinline__ double atomicMin(double *address, double val)
{
unsigned long long ret = __double_as_longlong(*address);
while(val < __longlong_as_double(ret))
{
unsigned long long old = ret;
if((ret = atomicCAS((unsigned long long *)address, old, __double_as_longlong(val))) == old)
break;
}
return __longlong_as_double(ret);
}
__device__ __forceinline__ float atomicMin(float *address, float val)
{
int ret = __float_as_int(*address);
while(val < __int_as_float(ret))
{
int old = ret;
if((ret = atomicCAS((int *)address, old, __float_as_int(val))) == old)
break;
}
return __int_as_float(ret);
}
// - atomicMax (double/float)
// see https://stackoverflow.com/questions/17399119/how-do-i-use-atomicmax-on-floating-point-values-in-cuda
__device__ __forceinline__ double atomicMax(double *address, double val)
{
unsigned long long ret = __double_as_longlong(*address);
while(val > __longlong_as_double(ret))
{
unsigned long long old = ret;
if((ret = atomicCAS((unsigned long long *)address, old, __double_as_longlong(val))) == old)
break;
}
return __longlong_as_double(ret);
}
__device__ __forceinline__ float atomicMax(float *address, float val)
{
int ret = __float_as_int(*address);
while(val > __int_as_float(ret))
{
int old = ret;
if((ret = atomicCAS((int *)address, old, __float_as_int(val))) == old)
break;
}
return __int_as_float(ret);
}
#endif
\ No newline at end of file
......@@ -6,92 +6,3 @@ typedef __hip_int8_t int8_t;
typedef __hip_uint16_t uint16_t;
typedef __hip_int16_t int16_t;
#endif
// No direct implementation for all atomic operations available
// -> add support by custom implementations using a CAS mechanism
#if defined(__CUDA_ARCH__) || defined(__HIPCC_RTC__)
// - atomicMul (double/float)
// see https://stackoverflow.com/questions/43354798/atomic-multiplication-and-division
__device__ double atomicMul(double* address, double val) {
unsigned long long int* address_as_ull = (unsigned long long int*)address;
unsigned long long int oldValue = *address_as_ull, assumed;
do {
assumed = oldValue;
oldValue = atomicCAS(address_as_ull, assumed, __double_as_longlong(val *
__longlong_as_double(assumed)));
} while (assumed != oldValue);
return __longlong_as_double(oldValue);
}
__device__ float atomicMul(float* address, float val) {
int* address_as_int = (int*)address;
int old = *address_as_int;
int assumed;
do {
assumed = old;
old = atomicCAS(address_as_int, assumed, __float_as_int(val * __int_as_float(assumed)));
} while (assumed != old);
return __int_as_float(old);
}
#endif
#ifdef __CUDA_ARCH__
// - atomicMin (double/float)
// see https://stackoverflow.com/questions/17399119/how-do-i-use-atomicmax-on-floating-point-values-in-cuda
__device__ __forceinline__ double atomicMin(double *address, double val)
{
unsigned long long ret = __double_as_longlong(*address);
while(val < __longlong_as_double(ret))
{
unsigned long long old = ret;
if((ret = atomicCAS((unsigned long long *)address, old, __double_as_longlong(val))) == old)
break;
}
return __longlong_as_double(ret);
}
__device__ __forceinline__ float atomicMin(float *address, float val)
{
int ret = __float_as_int(*address);
while(val < __int_as_float(ret))
{
int old = ret;
if((ret = atomicCAS((int *)address, old, __float_as_int(val))) == old)
break;
}
return __int_as_float(ret);
}
// - atomicMax (double/float)
// see https://stackoverflow.com/questions/17399119/how-do-i-use-atomicmax-on-floating-point-values-in-cuda
__device__ __forceinline__ double atomicMax(double *address, double val)
{
unsigned long long ret = __double_as_longlong(*address);
while(val > __longlong_as_double(ret))
{
unsigned long long old = ret;
if((ret = atomicCAS((unsigned long long *)address, old, __double_as_longlong(val))) == old)
break;
}
return __longlong_as_double(ret);
}
__device__ __forceinline__ float atomicMax(float *address, float val)
{
int ret = __float_as_int(*address);
while(val > __int_as_float(ret))
{
int old = ret;
if((ret = atomicCAS((int *)address, old, __float_as_int(val))) == old)
break;
}
return __int_as_float(ret);
}
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment