From e20d47da5f269e38052645f510727f323f2d6be9 Mon Sep 17 00:00:00 2001 From: Michael Kuron <m.kuron@gmx.de> Date: Thu, 29 Jun 2023 20:19:07 +0200 Subject: [PATCH] Make AMD GPU support compatible with both hipcc and hiprtc --- pystencils/gpu/gpujit.py | 3 +++ pystencils/include/gpu_defines.h | 7 +++++++ pystencils/include/philox_rand.h | 10 ++++++---- pystencils_tests/test_custom_backends.py | 2 +- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/pystencils/gpu/gpujit.py b/pystencils/gpu/gpujit.py index e29f85d43..efa5af826 100644 --- a/pystencils/gpu/gpujit.py +++ b/pystencils/gpu/gpujit.py @@ -42,6 +42,9 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen headers = get_headers(kernel_function_node) if cp.cuda.runtime.is_hip: headers.add('"gpu_defines.h"') + for field in kernel_function_node.fields_accessed: + if isinstance(field.dtype, BasicType) and field.dtype.is_half(): + headers.add('<hip/hip_fp16.h>') else: headers.update({'"gpu_defines.h"', '<cstdint>'}) for field in kernel_function_node.fields_accessed: diff --git a/pystencils/include/gpu_defines.h b/pystencils/include/gpu_defines.h index 2d9875db2..67e7722e9 100644 --- a/pystencils/include/gpu_defines.h +++ b/pystencils/include/gpu_defines.h @@ -3,3 +3,10 @@ #define POS_INFINITY __int_as_float(0x7f800000) #define INFINITY POS_INFINITY #define NEG_INFINITY __int_as_float(0xff800000) + +#ifdef __HIPCC_RTC__ +typedef __hip_uint8_t uint8_t; +typedef __hip_int8_t int8_t; +typedef __hip_uint16_t uint16_t; +typedef __hip_int16_t int16_t; +#endif diff --git a/pystencils/include/philox_rand.h b/pystencils/include/philox_rand.h index fab941468..cb91b53b9 100644 --- a/pystencils/include/philox_rand.h +++ b/pystencils/include/philox_rand.h @@ -1,4 +1,4 @@ -#ifndef __OPENCL_VERSION__ +#if !defined(__OPENCL_VERSION__) && !defined(__HIPCC_RTC__) #if defined(__SSE2__) || (defined(_MSC_VER) && !defined(_M_ARM64)) #include <emmintrin.h> // SSE2 #endif @@ -38,7 +38,7 @@ #endif #endif -#ifdef __CUDA_ARCH__ +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) #define QUALIFIERS static __forceinline__ __device__ #elif defined(__OPENCL_VERSION__) #define QUALIFIERS static inline @@ -59,7 +59,9 @@ typedef uint32_t uint32; typedef uint64_t uint64; #else +#ifndef __HIPCC_RTC__ #include <cstdint> +#endif typedef std::uint32_t uint32; typedef std::uint64_t uint64; #endif @@ -75,7 +77,7 @@ typedef svfloat64_t svfloat64_st; QUALIFIERS uint32 mulhilo32(uint32 a, uint32 b, uint32* hip) { -#ifndef __CUDA_ARCH__ +#if !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__) // host code #if defined(__powerpc__) && (!defined(__clang__) || defined(__xlC__)) *hip = __mulhwu(a,b); @@ -186,7 +188,7 @@ QUALIFIERS void philox_float4(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3 #endif } -#if !defined(__CUDA_ARCH__) && !defined(__OPENCL_VERSION__) +#if !defined(__CUDA_ARCH__) && !defined(__OPENCL_VERSION__) && !defined(__HIP_DEVICE_COMPILE__) #if defined(__SSE4_1__) || (defined(_MSC_VER) && !defined(_M_ARM64)) QUALIFIERS void _philox4x32round(__m128i* ctr, __m128i* key) { diff --git a/pystencils_tests/test_custom_backends.py b/pystencils_tests/test_custom_backends.py index 9b625f8f9..c7bf7fe24 100644 --- a/pystencils_tests/test_custom_backends.py +++ b/pystencils_tests/test_custom_backends.py @@ -47,5 +47,5 @@ def test_custom_backends_gpu(): ast = pystencils.create_kernel(normal_assignments, target=Target.GPU) pystencils.show_code(ast, ScreamingGpuBackend()) - with pytest.raises(cupy.cuda.compiler.JitifyException): + with pytest.raises((cupy.cuda.compiler.JitifyException, cupy.cuda.compiler.CompileException)): pystencils.gpu.gpujit.make_python_function(ast, custom_backend=ScreamingGpuBackend()) -- GitLab