Skip to content
Snippets Groups Projects
Commit de1dc39e authored by Frederik Hennig's avatar Frederik Hennig
Browse files

start fixing JIT for HIP

parent 17d9fac8
No related branches found
No related tags found
1 merge request!458HIP Target and Platform
Pipeline #76040 failed
...@@ -586,6 +586,8 @@ class CreateKernelConfig(ConfigBase): ...@@ -586,6 +586,8 @@ class CreateKernelConfig(ConfigBase):
match t: match t:
case Target.CurrentCPU: case Target.CurrentCPU:
return Target.auto_cpu() return Target.auto_cpu()
case Target.CurrentGPU:
return Target.auto_gpu()
case _: case _:
return t return t
...@@ -600,7 +602,7 @@ class CreateKernelConfig(ConfigBase): ...@@ -600,7 +602,7 @@ class CreateKernelConfig(ConfigBase):
from ..jit import LegacyCpuJit from ..jit import LegacyCpuJit
return LegacyCpuJit() return LegacyCpuJit()
elif target == Target.CUDA: elif target == Target.CUDA or target == Target.HIP:
try: try:
from ..jit.gpu_cupy import CupyJit from ..jit.gpu_cupy import CupyJit
...@@ -611,7 +613,7 @@ class CreateKernelConfig(ConfigBase): ...@@ -611,7 +613,7 @@ class CreateKernelConfig(ConfigBase):
return no_jit return no_jit
elif target == Target.SYCL or target == Target.HIP: elif target == Target.SYCL:
from ..jit import no_jit from ..jit import no_jit
return no_jit return no_jit
......
...@@ -126,6 +126,18 @@ class Target(Flag): ...@@ -126,6 +126,18 @@ class Target(Flag):
else: else:
return Target.GenericCPU return Target.GenericCPU
@staticmethod
def auto_gpu() -> Target:
try:
import cupy
if cupy.cuda.runtime.is_hip:
return Target.HIP
else:
return Target.CUDA
except ImportError:
raise RuntimeError("Cannot infer GPU target since cupy is not installed.")
@staticmethod @staticmethod
def available_targets() -> list[Target]: def available_targets() -> list[Target]:
targets = [Target.GenericCPU] targets = [Target.GenericCPU]
......
...@@ -208,7 +208,11 @@ class CupyKernelWrapper(KernelWrapper): ...@@ -208,7 +208,11 @@ class CupyKernelWrapper(KernelWrapper):
class CupyJit(JitBase): class CupyJit(JitBase):
def __init__(self, default_block_size: Sequence[int] = (128, 2, 1)): def __init__(self, default_block_size: Sequence[int] = (128, 2, 1)):
self._runtime_headers = {"<cstdint>"} self._runtime_headers: set[str]
if cp.cuda.runtime.is_hip:
self._runtime_headers = set()
else:
self._runtime_headers = {"<cstdint>"}
if len(default_block_size) > 3: if len(default_block_size) > 3:
raise ValueError( raise ValueError(
...@@ -226,9 +230,9 @@ class CupyJit(JitBase): ...@@ -226,9 +230,9 @@ class CupyJit(JitBase):
"`cupy` is not installed: just-in-time-compilation of CUDA kernels is unavailable." "`cupy` is not installed: just-in-time-compilation of CUDA kernels is unavailable."
) )
if not isinstance(kernel, GpuKernel) or kernel.target != Target.CUDA: if not isinstance(kernel, GpuKernel):
raise ValueError( raise ValueError(
"The CupyJit just-in-time compiler only accepts kernels generated for CUDA or HIP" "The CupyJit just-in-time compiler only accepts GPU kernels generated for CUDA or HIP"
) )
options = self._compiler_options() options = self._compiler_options()
......
...@@ -23,7 +23,10 @@ AVAILABLE_TARGETS = [ps.Target.GenericCPU] ...@@ -23,7 +23,10 @@ AVAILABLE_TARGETS = [ps.Target.GenericCPU]
try: try:
import cupy import cupy
AVAILABLE_TARGETS += [ps.Target.CUDA] if cupy.cuda.runtime.is_hip:
AVAILABLE_TARGETS += [ps.Target.HIP]
else:
AVAILABLE_TARGETS += [ps.Target.CUDA]
except ImportError: except ImportError:
pass pass
......
...@@ -58,7 +58,7 @@ def test_full_scalar_field(): ...@@ -58,7 +58,7 @@ def test_full_scalar_field():
pack_eqs = [Assignment(buffer.center(), src_field.center())] pack_eqs = [Assignment(buffer.center(), src_field.center())]
config = CreateKernelConfig(target=pystencils.Target.GPU) config = CreateKernelConfig(target=pystencils.Target.CurrentGPU)
pack_ast = create_kernel(pack_eqs, config=config) pack_ast = create_kernel(pack_eqs, config=config)
pack_kernel = pack_ast.compile() pack_kernel = pack_ast.compile()
...@@ -66,7 +66,7 @@ def test_full_scalar_field(): ...@@ -66,7 +66,7 @@ def test_full_scalar_field():
unpack_eqs = [Assignment(dst_field.center(), buffer.center())] unpack_eqs = [Assignment(dst_field.center(), buffer.center())]
config = CreateKernelConfig(target=pystencils.Target.GPU) config = CreateKernelConfig(target=pystencils.Target.CurrentGPU)
unpack_ast = create_kernel(unpack_eqs, config=config) unpack_ast = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_ast.compile() unpack_kernel = unpack_ast.compile()
...@@ -94,7 +94,7 @@ def test_field_slice(): ...@@ -94,7 +94,7 @@ def test_field_slice():
pack_eqs = [Assignment(buffer.center(), src_field.center())] pack_eqs = [Assignment(buffer.center(), src_field.center())]
config = CreateKernelConfig(target=pystencils.Target.GPU) config = CreateKernelConfig(target=pystencils.Target.CurrentGPU)
pack_ast = create_kernel(pack_eqs, config=config) pack_ast = create_kernel(pack_eqs, config=config)
pack_kernel = pack_ast.compile() pack_kernel = pack_ast.compile()
...@@ -103,7 +103,7 @@ def test_field_slice(): ...@@ -103,7 +103,7 @@ def test_field_slice():
# Unpack into ghost layer of dst_field in N direction # Unpack into ghost layer of dst_field in N direction
unpack_eqs = [Assignment(dst_field.center(), buffer.center())] unpack_eqs = [Assignment(dst_field.center(), buffer.center())]
config = CreateKernelConfig(target=pystencils.Target.GPU) config = CreateKernelConfig(target=pystencils.Target.CurrentGPU)
unpack_ast = create_kernel(unpack_eqs, config=config) unpack_ast = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_ast.compile() unpack_kernel = unpack_ast.compile()
...@@ -131,7 +131,7 @@ def test_all_cell_values(): ...@@ -131,7 +131,7 @@ def test_all_cell_values():
eq = Assignment(buffer(idx), src_field(idx)) eq = Assignment(buffer(idx), src_field(idx))
pack_eqs.append(eq) pack_eqs.append(eq)
config = CreateKernelConfig(target=pystencils.Target.GPU) config = CreateKernelConfig(target=pystencils.Target.CurrentGPU)
pack_code = create_kernel(pack_eqs, config=config) pack_code = create_kernel(pack_eqs, config=config)
pack_kernel = pack_code.compile() pack_kernel = pack_code.compile()
...@@ -143,7 +143,7 @@ def test_all_cell_values(): ...@@ -143,7 +143,7 @@ def test_all_cell_values():
eq = Assignment(dst_field(idx), buffer(idx)) eq = Assignment(dst_field(idx), buffer(idx))
unpack_eqs.append(eq) unpack_eqs.append(eq)
config = CreateKernelConfig(target=pystencils.Target.GPU) config = CreateKernelConfig(target=pystencils.Target.CurrentGPU)
unpack_ast = create_kernel(unpack_eqs, config=config) unpack_ast = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_ast.compile() unpack_kernel = unpack_ast.compile()
unpack_kernel(buffer=gpu_buffer_arr, dst_field=gpu_dst_arr) unpack_kernel(buffer=gpu_buffer_arr, dst_field=gpu_dst_arr)
...@@ -173,7 +173,7 @@ def test_subset_cell_values(): ...@@ -173,7 +173,7 @@ def test_subset_cell_values():
pack_eqs.append(eq) pack_eqs.append(eq)
pack_types = {'src_field': gpu_src_arr.dtype, 'buffer': gpu_buffer_arr.dtype} pack_types = {'src_field': gpu_src_arr.dtype, 'buffer': gpu_buffer_arr.dtype}
config = CreateKernelConfig(target=pystencils.Target.GPU) config = CreateKernelConfig(target=pystencils.Target.CurrentGPU)
pack_ast = create_kernel(pack_eqs, config=config) pack_ast = create_kernel(pack_eqs, config=config)
pack_kernel = pack_ast.compile() pack_kernel = pack_ast.compile()
pack_kernel(buffer=gpu_buffer_arr, src_field=gpu_src_arr) pack_kernel(buffer=gpu_buffer_arr, src_field=gpu_src_arr)
...@@ -185,7 +185,7 @@ def test_subset_cell_values(): ...@@ -185,7 +185,7 @@ def test_subset_cell_values():
unpack_eqs.append(eq) unpack_eqs.append(eq)
unpack_types = {'dst_field': gpu_dst_arr.dtype, 'buffer': gpu_buffer_arr.dtype} unpack_types = {'dst_field': gpu_dst_arr.dtype, 'buffer': gpu_buffer_arr.dtype}
config = CreateKernelConfig(target=pystencils.Target.GPU) config = CreateKernelConfig(target=pystencils.Target.CurrentGPU)
unpack_ast = create_kernel(unpack_eqs, config=config) unpack_ast = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_ast.compile() unpack_kernel = unpack_ast.compile()
...@@ -215,7 +215,7 @@ def test_field_layouts(): ...@@ -215,7 +215,7 @@ def test_field_layouts():
pack_eqs.append(eq) pack_eqs.append(eq)
pack_types = {'src_field': gpu_src_arr.dtype, 'buffer': gpu_buffer_arr.dtype} pack_types = {'src_field': gpu_src_arr.dtype, 'buffer': gpu_buffer_arr.dtype}
config = CreateKernelConfig(target=pystencils.Target.GPU) config = CreateKernelConfig(target=pystencils.Target.CurrentGPU)
pack_ast = create_kernel(pack_eqs, config=config) pack_ast = create_kernel(pack_eqs, config=config)
pack_kernel = pack_ast.compile() pack_kernel = pack_ast.compile()
...@@ -228,7 +228,7 @@ def test_field_layouts(): ...@@ -228,7 +228,7 @@ def test_field_layouts():
unpack_eqs.append(eq) unpack_eqs.append(eq)
unpack_types = {'dst_field': gpu_dst_arr.dtype, 'buffer': gpu_buffer_arr.dtype} unpack_types = {'dst_field': gpu_dst_arr.dtype, 'buffer': gpu_buffer_arr.dtype}
config = CreateKernelConfig(target=pystencils.Target.GPU) config = CreateKernelConfig(target=pystencils.Target.CurrentGPU)
unpack_ast = create_kernel(unpack_eqs, config=config) unpack_ast = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_ast.compile() unpack_kernel = unpack_ast.compile()
...@@ -299,7 +299,7 @@ def test_iteration_slices(gpu_indexing): ...@@ -299,7 +299,7 @@ def test_iteration_slices(gpu_indexing):
gpu_src_arr.set(src_arr) gpu_src_arr.set(src_arr)
gpu_dst_arr.fill(0) gpu_dst_arr.fill(0)
config = CreateKernelConfig(target=Target.GPU, iteration_slice=pack_slice) config = CreateKernelConfig(target=Target.CurrentGPU, iteration_slice=pack_slice)
pack_code = create_kernel(pack_eqs, config=config) pack_code = create_kernel(pack_eqs, config=config)
pack_kernel = pack_code.compile() pack_kernel = pack_code.compile()
...@@ -311,7 +311,7 @@ def test_iteration_slices(gpu_indexing): ...@@ -311,7 +311,7 @@ def test_iteration_slices(gpu_indexing):
eq = Assignment(dst_field(idx), buffer(idx)) eq = Assignment(dst_field(idx), buffer(idx))
unpack_eqs.append(eq) unpack_eqs.append(eq)
config = CreateKernelConfig(target=Target.GPU, iteration_slice=pack_slice) config = CreateKernelConfig(target=Target.CurrentGPU, iteration_slice=pack_slice)
unpack_code = create_kernel(unpack_eqs, config=config) unpack_code = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_code.compile() unpack_kernel = unpack_code.compile()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment