Skip to content
Snippets Groups Projects
Commit e20d47da authored by Michael Kuron's avatar Michael Kuron :mortar_board:
Browse files

Make AMD GPU support compatible with both hipcc and hiprtc

parent 89e6b0f2
No related tags found
No related merge requests found
...@@ -42,6 +42,9 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen ...@@ -42,6 +42,9 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen
headers = get_headers(kernel_function_node) headers = get_headers(kernel_function_node)
if cp.cuda.runtime.is_hip: if cp.cuda.runtime.is_hip:
headers.add('"gpu_defines.h"') headers.add('"gpu_defines.h"')
for field in kernel_function_node.fields_accessed:
if isinstance(field.dtype, BasicType) and field.dtype.is_half():
headers.add('<hip/hip_fp16.h>')
else: else:
headers.update({'"gpu_defines.h"', '<cstdint>'}) headers.update({'"gpu_defines.h"', '<cstdint>'})
for field in kernel_function_node.fields_accessed: for field in kernel_function_node.fields_accessed:
......
...@@ -3,3 +3,10 @@ ...@@ -3,3 +3,10 @@
#define POS_INFINITY __int_as_float(0x7f800000) #define POS_INFINITY __int_as_float(0x7f800000)
#define INFINITY POS_INFINITY #define INFINITY POS_INFINITY
#define NEG_INFINITY __int_as_float(0xff800000) #define NEG_INFINITY __int_as_float(0xff800000)
#ifdef __HIPCC_RTC__
typedef __hip_uint8_t uint8_t;
typedef __hip_int8_t int8_t;
typedef __hip_uint16_t uint16_t;
typedef __hip_int16_t int16_t;
#endif
#ifndef __OPENCL_VERSION__ #if !defined(__OPENCL_VERSION__) && !defined(__HIPCC_RTC__)
#if defined(__SSE2__) || (defined(_MSC_VER) && !defined(_M_ARM64)) #if defined(__SSE2__) || (defined(_MSC_VER) && !defined(_M_ARM64))
#include <emmintrin.h> // SSE2 #include <emmintrin.h> // SSE2
#endif #endif
...@@ -38,7 +38,7 @@ ...@@ -38,7 +38,7 @@
#endif #endif
#endif #endif
#ifdef __CUDA_ARCH__ #if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
#define QUALIFIERS static __forceinline__ __device__ #define QUALIFIERS static __forceinline__ __device__
#elif defined(__OPENCL_VERSION__) #elif defined(__OPENCL_VERSION__)
#define QUALIFIERS static inline #define QUALIFIERS static inline
...@@ -59,7 +59,9 @@ ...@@ -59,7 +59,9 @@
typedef uint32_t uint32; typedef uint32_t uint32;
typedef uint64_t uint64; typedef uint64_t uint64;
#else #else
#ifndef __HIPCC_RTC__
#include <cstdint> #include <cstdint>
#endif
typedef std::uint32_t uint32; typedef std::uint32_t uint32;
typedef std::uint64_t uint64; typedef std::uint64_t uint64;
#endif #endif
...@@ -75,7 +77,7 @@ typedef svfloat64_t svfloat64_st; ...@@ -75,7 +77,7 @@ typedef svfloat64_t svfloat64_st;
QUALIFIERS uint32 mulhilo32(uint32 a, uint32 b, uint32* hip) QUALIFIERS uint32 mulhilo32(uint32 a, uint32 b, uint32* hip)
{ {
#ifndef __CUDA_ARCH__ #if !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__)
// host code // host code
#if defined(__powerpc__) && (!defined(__clang__) || defined(__xlC__)) #if defined(__powerpc__) && (!defined(__clang__) || defined(__xlC__))
*hip = __mulhwu(a,b); *hip = __mulhwu(a,b);
...@@ -186,7 +188,7 @@ QUALIFIERS void philox_float4(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3 ...@@ -186,7 +188,7 @@ QUALIFIERS void philox_float4(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3
#endif #endif
} }
#if !defined(__CUDA_ARCH__) && !defined(__OPENCL_VERSION__) #if !defined(__CUDA_ARCH__) && !defined(__OPENCL_VERSION__) && !defined(__HIP_DEVICE_COMPILE__)
#if defined(__SSE4_1__) || (defined(_MSC_VER) && !defined(_M_ARM64)) #if defined(__SSE4_1__) || (defined(_MSC_VER) && !defined(_M_ARM64))
QUALIFIERS void _philox4x32round(__m128i* ctr, __m128i* key) QUALIFIERS void _philox4x32round(__m128i* ctr, __m128i* key)
{ {
......
...@@ -47,5 +47,5 @@ def test_custom_backends_gpu(): ...@@ -47,5 +47,5 @@ def test_custom_backends_gpu():
ast = pystencils.create_kernel(normal_assignments, target=Target.GPU) ast = pystencils.create_kernel(normal_assignments, target=Target.GPU)
pystencils.show_code(ast, ScreamingGpuBackend()) pystencils.show_code(ast, ScreamingGpuBackend())
with pytest.raises(cupy.cuda.compiler.JitifyException): with pytest.raises((cupy.cuda.compiler.JitifyException, cupy.cuda.compiler.CompileException)):
pystencils.gpu.gpujit.make_python_function(ast, custom_backend=ScreamingGpuBackend()) pystencils.gpu.gpujit.make_python_function(ast, custom_backend=ScreamingGpuBackend())
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment