diff --git a/pystencils/gpu/gpujit.py b/pystencils/gpu/gpujit.py
index e29f85d4369b7dfc38bba9259a39bfc666dc0977..efa5af826df117b22d26a8f766a0c8b1730ee3f4 100644
--- a/pystencils/gpu/gpujit.py
+++ b/pystencils/gpu/gpujit.py
@@ -42,6 +42,9 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen
     headers = get_headers(kernel_function_node)
     if cp.cuda.runtime.is_hip:
         headers.add('"gpu_defines.h"')
+        for field in kernel_function_node.fields_accessed:
+            if isinstance(field.dtype, BasicType) and field.dtype.is_half():
+                headers.add('<hip/hip_fp16.h>')
     else:
         headers.update({'"gpu_defines.h"', '<cstdint>'})
         for field in kernel_function_node.fields_accessed:
diff --git a/pystencils/include/gpu_defines.h b/pystencils/include/gpu_defines.h
index 2d9875db2e0ea0360cccb17ac249f7bc79a2cb66..67e7722e9a01b217584dce14f0dcec16d2025c80 100644
--- a/pystencils/include/gpu_defines.h
+++ b/pystencils/include/gpu_defines.h
@@ -3,3 +3,10 @@
 #define POS_INFINITY __int_as_float(0x7f800000)
 #define INFINITY POS_INFINITY
 #define NEG_INFINITY __int_as_float(0xff800000)
+
+#ifdef __HIPCC_RTC__
+typedef __hip_uint8_t uint8_t;
+typedef __hip_int8_t int8_t;
+typedef __hip_uint16_t uint16_t;
+typedef __hip_int16_t int16_t;
+#endif
diff --git a/pystencils/include/philox_rand.h b/pystencils/include/philox_rand.h
index fab94146889a854f09537b0395cbee5607355c1e..cb91b53b96c487b575d9d318a5f58d1460ed59ed 100644
--- a/pystencils/include/philox_rand.h
+++ b/pystencils/include/philox_rand.h
@@ -1,4 +1,4 @@
-#ifndef __OPENCL_VERSION__
+#if !defined(__OPENCL_VERSION__) && !defined(__HIPCC_RTC__)
 #if defined(__SSE2__) || (defined(_MSC_VER) && !defined(_M_ARM64))
 #include <emmintrin.h> // SSE2
 #endif
@@ -38,7 +38,7 @@
 #endif
 #endif
 
-#ifdef __CUDA_ARCH__
+#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
 #define QUALIFIERS static __forceinline__ __device__
 #elif defined(__OPENCL_VERSION__)
 #define QUALIFIERS static inline
@@ -59,7 +59,9 @@
 typedef uint32_t uint32;
 typedef uint64_t uint64;
 #else
+#ifndef __HIPCC_RTC__
 #include <cstdint>
+#endif
 typedef std::uint32_t uint32;
 typedef std::uint64_t uint64;
 #endif
@@ -75,7 +77,7 @@ typedef svfloat64_t svfloat64_st;
 
 QUALIFIERS uint32 mulhilo32(uint32 a, uint32 b, uint32* hip)
 {
-#ifndef __CUDA_ARCH__
+#if !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__)
     // host code
 #if defined(__powerpc__) && (!defined(__clang__) || defined(__xlC__))
     *hip = __mulhwu(a,b);
@@ -186,7 +188,7 @@ QUALIFIERS void philox_float4(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3
 #endif
 }
 
-#if !defined(__CUDA_ARCH__) && !defined(__OPENCL_VERSION__)
+#if !defined(__CUDA_ARCH__) && !defined(__OPENCL_VERSION__) && !defined(__HIP_DEVICE_COMPILE__)
 #if defined(__SSE4_1__) || (defined(_MSC_VER) && !defined(_M_ARM64))
 QUALIFIERS void _philox4x32round(__m128i* ctr, __m128i* key)
 {
diff --git a/pystencils_tests/test_custom_backends.py b/pystencils_tests/test_custom_backends.py
index 9b625f8f9020e42d6528e4fdf64da7e89d777bae..c7bf7fe243dc6a9df08ba4152cf56ef6ea588bee 100644
--- a/pystencils_tests/test_custom_backends.py
+++ b/pystencils_tests/test_custom_backends.py
@@ -47,5 +47,5 @@ def test_custom_backends_gpu():
 
     ast = pystencils.create_kernel(normal_assignments, target=Target.GPU)
     pystencils.show_code(ast, ScreamingGpuBackend())
-    with pytest.raises(cupy.cuda.compiler.JitifyException):
+    with pytest.raises((cupy.cuda.compiler.JitifyException, cupy.cuda.compiler.CompileException)):
         pystencils.gpu.gpujit.make_python_function(ast, custom_backend=ScreamingGpuBackend())