diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py index 64b0b38e1dcc84ecdb4c05b994f430e8f926e0ec..495c0a059c77e6723ca340358aeaf71f74e06056 100644 --- a/pystencils/backends/cbackend.py +++ b/pystencils/backends/cbackend.py @@ -59,8 +59,8 @@ def generate_c(ast_node: Node, signature_only: bool = False, dialect='c', custom from pystencils.backends.cuda_backend import CudaBackend printer = CudaBackend(signature_only=signature_only) elif dialect == 'opencl': - from pystencils.backends.opencl_backend import OpenCLBackend - printer = OpenCLBackend(signature_only=signature_only) + from pystencils.backends.opencl_backend import OpenClBackend + printer = OpenClBackend(signature_only=signature_only) else: raise ValueError("Unknown dialect: " + str(dialect)) code = printer(ast_node) @@ -174,8 +174,11 @@ class CBackend: return getattr(self, method_name)(node) raise NotImplementedError(self.__class__.__name__ + " does not support node of type " + node.__class__.__name__) + def _print_Type(self, node): + return str(node) + def _print_KernelFunction(self, node): - function_arguments = ["%s %s" % (str(s.symbol.dtype), s.symbol.name) for s in node.get_parameters()] + function_arguments = ["%s %s" % (self._print(s.symbol.dtype), s.symbol.name) for s in node.get_parameters()] launch_bounds = "" if self._dialect == 'cuda': max_threads = node.indexing.max_threads_per_block() @@ -210,7 +213,7 @@ class CBackend: def _print_SympyAssignment(self, node): if node.is_declaration: - data_type = "const " + str(node.lhs.dtype) + " " if node.is_const else str(node.lhs.dtype) + " " + data_type = "const " + self._print(node.lhs.dtype) + " " if node.is_const else self._print(node.lhs.dtype) + " " return "%s%s = %s;" % (data_type, self.sympy_printer.doprint(node.lhs), self.sympy_printer.doprint(node.rhs)) else: diff --git a/pystencils/backends/opencl_backend.py b/pystencils/backends/opencl_backend.py index 92431fe79c1ba6fd35b36d181b5f5efe5e66ef38..b3a89ccc6d034921f30182ca10c81f508fc18e82 100644 --- a/pystencils/backends/opencl_backend.py +++ b/pystencils/backends/opencl_backend.py @@ -1,6 +1,7 @@ -from pystencils.backends.cuda_backend import CudaBackend +from pystencils.backends.cuda_backend import CudaBackend, CudaSympyPrinter from pystencils.backends.cbackend import generate_c from pystencils.astnodes import Node +import pystencils.data_types def generate_opencl(astnode: Node, signature_only: bool = False) -> str: """Prints an abstract syntax tree node as CUDA code. @@ -15,5 +16,50 @@ def generate_opencl(astnode: Node, signature_only: bool = False) -> str: return generate_c(astnode, signature_only, dialect='opencl') -class OpenCLBackend(CudaBackend): - pass \ No newline at end of file +class OpenClBackend(CudaBackend): + + def __init__(self, sympy_printer=None, + signature_only=False): + if not sympy_printer: + sympy_printer = OpenClSympyPrinter() + + super().__init__(sympy_printer, signature_only) + self._dialect = 'opencl' + + # def _print_SympyAssignment(self, node): + # code = super()._print_SympyAssignment(node) + # if node.is_declaration and isinstance(node.lhs.dtype, pystencils.data_types.PointerType): + # return "__global " + code + # else: + # return code + + def _print_Type(self, node): + code = super()._print_Type(node) + if isinstance(node, pystencils.data_types.PointerType): + return "__global " + code + else: + return code + + +class OpenClSympyPrinter(CudaSympyPrinter): + language = "OpenCL" + + DIMENSION_MAPPING = { + 'x': '0', + 'y': '1', + 'z': '2' + } + INDEXING_FUNCTION_MAPPING = { + 'blockIdx': 'get_group_id', + 'threadIdx': 'get_local_id', + 'blockDim': 'get_local_size', + 'gridDim': 'get_global_size' + } + + def _print_ThreadIndexingSymbol(self, node): + symbol_name: str = node.name + function_name, dimension = tuple(symbol_name.split(".")) + dimension = self.DIMENSION_MAPPING[dimension] + function_name = self.INDEXING_FUNCTION_MAPPING[function_name] + return f"{function_name}({dimension})" + diff --git a/pystencils/gpucuda/indexing.py b/pystencils/gpucuda/indexing.py index f6f1fbe80c3da6c1dea78cf687fe96dc6810ec10..4c8701b2599dd9b4570c7d7f8875e0e6b58bef36 100644 --- a/pystencils/gpucuda/indexing.py +++ b/pystencils/gpucuda/indexing.py @@ -1,8 +1,8 @@ import abc from functools import partial -from typing import Tuple # noqa import sympy as sp +from sympy.core.cache import cacheit from pystencils.astnodes import Block, Conditional from pystencils.data_types import TypedSymbol, create_type @@ -10,10 +10,24 @@ from pystencils.integer_functions import div_ceil, div_floor from pystencils.slicing import normalize_slice from pystencils.sympyextensions import is_integer_sequence, prod -BLOCK_IDX = [TypedSymbol("blockIdx." + coord, create_type("int")) for coord in ('x', 'y', 'z')] -THREAD_IDX = [TypedSymbol("threadIdx." + coord, create_type("int")) for coord in ('x', 'y', 'z')] -BLOCK_DIM = [TypedSymbol("blockDim." + coord, create_type("int")) for coord in ('x', 'y', 'z')] -GRID_DIM = [TypedSymbol("gridDim." + coord, create_type("int")) for coord in ('x', 'y', 'z')] + +class ThreadIndexingSymbol(TypedSymbol): + def __new__(cls, *args, **kwds): + obj = ThreadIndexingSymbol.__xnew_cached_(cls, *args, **kwds) + return obj + + def __new_stage2__(cls, name, dtype, *args, **kwargs): + obj = super(ThreadIndexingSymbol, cls).__xnew__(cls, name, dtype, *args, **kwargs) + return obj + + __xnew__ = staticmethod(__new_stage2__) + __xnew_cached_ = staticmethod(cacheit(__new_stage2__)) + + +BLOCK_IDX = [ThreadIndexingSymbol("blockIdx." + coord, create_type("int")) for coord in ('x', 'y', 'z')] +THREAD_IDX = [ThreadIndexingSymbol("threadIdx." + coord, create_type("int")) for coord in ('x', 'y', 'z')] +BLOCK_DIM = [ThreadIndexingSymbol("blockDim." + coord, create_type("int")) for coord in ('x', 'y', 'z')] +GRID_DIM = [ThreadIndexingSymbol("gridDim." + coord, create_type("int")) for coord in ('x', 'y', 'z')] class AbstractIndexing(abc.ABC): @@ -69,6 +83,7 @@ class AbstractIndexing(abc.ABC): def symbolic_parameters(self): """Set of symbols required in call_parameters code""" + # -------------------------------------------- Implementations --------------------------------------------------------- @@ -82,6 +97,7 @@ class BlockIndexing(AbstractIndexing): gets the largest amount of threads compile_time_block_size: compile in concrete block size, otherwise the cuda variable 'blockDim' is used """ + def __init__(self, field, iteration_slice, block_size=(16, 16, 1), permute_block_size_dependent_on_layout=True, compile_time_block_size=False, maximum_block_size=(1024, 1024, 64)): diff --git a/pystencils_tests/test_opencl.py b/pystencils_tests/test_opencl.py index 71a97ce779c194e9ef85fb58754ec2d068f31d53..00d84215cea7d4032326a048a07935a9e8221554 100644 --- a/pystencils_tests/test_opencl.py +++ b/pystencils_tests/test_opencl.py @@ -2,7 +2,7 @@ import sympy as sp import pystencils from pystencils.backends.cuda_backend import CudaBackend -from pystencils.backends.opencl_backend import OpenCLBackend +from pystencils.backends.opencl_backend import OpenClBackend def test_opencl_backend(): @@ -21,7 +21,7 @@ def test_opencl_backend(): code = pystencils.show_code(ast, custom_backend=CudaBackend()) print(code) - opencl_code = pystencils.show_code(ast, custom_backend=OpenCLBackend()) + opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend()) print(opencl_code)