diff --git a/src/pystencils_autodiff/backends/astnodes.py b/src/pystencils_autodiff/backends/astnodes.py index 51e378afa3ab55cd6ae5c21353a1e8473463e8d2..343dd1036e671447386d6eac660bde584080da49 100644 --- a/src/pystencils_autodiff/backends/astnodes.py +++ b/src/pystencils_autodiff/backends/astnodes.py @@ -15,6 +15,7 @@ from os.path import dirname, exists, join from pystencils.astnodes import FieldPointerSymbol, FieldShapeSymbol, FieldStrideSymbol from pystencils.cpu.cpujit import get_cache_config, get_compiler_config +from pystencils.gpucuda.cudajit import get_cubic_interpolation_include_paths from pystencils.include import get_pycuda_include_path, get_pystencils_include_path from pystencils_autodiff._file_io import read_template_from_file, write_file from pystencils_autodiff.backends.python_bindings import ( @@ -100,6 +101,10 @@ class TorchModule(JinjaCppFile): super().__init__(ast_dict) + @property + def kernel_wrappers(self): + return self.ast_dict['kernel_wrappers'] + @classmethod def generate_wrapper_function(cls, kernel_ast): return WrapperFunction(cls.DESTRUCTURING_CLASS(generate_kernel_call(kernel_ast)), @@ -129,7 +134,8 @@ class TorchModule(JinjaCppFile): extra_cuda_cflags=['-std=c++14', '-ccbin', get_compiler_config()['command']], build_directory=build_dir, extra_include_paths=[get_pycuda_include_path(), - get_pystencils_include_path()]) + get_pystencils_include_path(), + *get_cubic_interpolation_include_paths()]) return torch_extension diff --git a/src/pystencils_autodiff/framework_integration/printer.py b/src/pystencils_autodiff/framework_integration/printer.py index 173104e1e6ddf3090faabb071c72ae6e07f68d8a..afa694eb20186127fe884f098a562f66f54e6b94 100644 --- a/src/pystencils_autodiff/framework_integration/printer.py +++ b/src/pystencils_autodiff/framework_integration/printer.py @@ -90,3 +90,12 @@ class FrameworkIntegrationPrinter(pystencils.backends.cbackend.CBackend): "\n" + self._indent + \ ("\n" + self._indent).join(self._print(node.body).splitlines()) + \ "\n}" + + def _print_Timeloop(self, node): + children_string = '\n '.join(self._print(c) for c in node.children) + return f"""for( {node.loop_symbol.dtype} {node.loop_symbol}={node.loop_start}; {node.loop_symbol}<= {node.loop_end} ; {node.loop_symbol}+= {node.loop_increment} ) {{ + {children_string} +}}""" # noqa + + def _print_SwapBuffer(self, node): + return f"""std::swap({node.first_array}, {node.second_array});""" diff --git a/src/pystencils_autodiff/graph_datahandling.py b/src/pystencils_autodiff/graph_datahandling.py new file mode 100644 index 0000000000000000000000000000000000000000..2125ac229ccda3b3989714638b9e9bdfbbc9c9da --- /dev/null +++ b/src/pystencils_autodiff/graph_datahandling.py @@ -0,0 +1,209 @@ +# -*- coding: utf-8 -*- +# +# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de> +# +# Distributed under terms of the GPLv3 license. + +""" + +""" +from enum import Enum + +import numpy as np + +import pystencils.datahandling +import pystencils.kernel_wrapper +import pystencils.timeloop +from pystencils.field import FieldType + + +class DataTransferKind(str, Enum): + UNKNOWN = None + HOST_ALLOC = 'HOST_ALLOC' + DEVICE_ALLOC = 'DEVICE_ALLOC' + HOST_TO_DEVICE = 'HOST_TO_DEVICE' + DEVICE_TO_HOST = 'DEVICE_TO_HOST' + HOST_COMMUNICATION = 'HOST_COMMUNICATION' + DEVICE_COMMUNICATION = 'DEVICE_COMMUNICATION' + HOST_SWAP = 'HOST_SWAP' + DEVICE_SWAP = 'DEVICE_SWAP' + HOST_GATHER = 'HOST_GATHER' + DEVICE_GATHER = 'DEVICE_GATHER' + + def is_alloc(self): + return self in [self.HOST_ALLOC, self.DEVICE_ALLOC] + + def is_transfer(self): + return self in [self.HOST_TO_DEVICE, self.DEVICE_TO_HOST, self.SWAP] + + +class DataTransfer: + def __init__(self, field: pystencils.Field, kind: DataTransferKind): + self.field = field + self.kind = kind + + def __str__(self): + return f'DataTransferKind: {self.kind} with {self.field}' + + +class Swap(DataTransfer): + def __init__(self, source, destination, gpu): + self.kind = DataTransferKind.DEVICE_SWAP if gpu else DataTransferKind.HOST_SWAP + self.field = source + self.destination = destination + + def __str__(self): + return f'Swap: {self.field} with {self.destination}' + + +class Communication(DataTransfer): + def __init__(self, field, stencil, gpu): + self.kind = DataTransferKind.DEVICE_COMMUNICATION if gpu else DataTransferKind.HOST_COMMUNICATION + self.field = field + self.stencil = stencil + + +class KernelCall: + def __init__(self, kernel: pystencils.kernel_wrapper.KernelWrapper, kwargs): + self.kernel = kernel + self.kwargs = kwargs + + def __str__(self): + return "Call " + str(self.kernel.ast.function_name) + + +class TimeloopRun: + def __init__(self, timeloop, time_steps): + self.timeloop = timeloop + self.time_steps = time_steps + + def __str__(self): + return (f'Timeloop:' + + '\nPre:\n' + + '\n '.join(str(f) for f in self.timeloop._pre_run_functions) + + f'\n{self.time_steps} time steps:\n' + + '\n '.join(str(f) for f in self.timeloop._single_step_asts) + + '\nPost:\n' + + '\n '.join(str(f) for f in self.timeloop._post_run_functions)) + + @property + def asts(self): + return self.timeloop._single_step_asts + + +class GraphDataHandling(pystencils.datahandling.SerialDataHandling): + """Docstring for GraphDataHandling. """ + + class TimeLoop(pystencils.timeloop.TimeLoop): + def __init__(self, parent, *args, **kwargs): + self.parent = parent + self._single_step_asts = [] + super().__init__(*args, **kwargs) + + def add_pre_run_function(self, f): + self._pre_run_functions.append(f) + + def add_post_run_function(self, f): + self._post_run_functions.append(f) + + def add_single_step_function(self, f): + self._single_step_functions.append(f) + + def add_call(self, functor, argument_list): + for argument_dict in argument_list: + self._single_step_asts.append((functor, argument_dict) if not hasattr(functor, 'ast') else functor.ast) + + if hasattr(functor, 'kernel'): + functor = functor.kernel + if not isinstance(argument_list, list): + argument_list = [argument_list] + + def run(self, time_steps=1): + self.parent.call_queue.append(TimeloopRun(self, time_steps)) + super().run(time_steps) + + def __init__(self, *args, **kwargs): + + self.call_queue = [] + super().__init__(*args, **kwargs) + + def add_array(self, name, values_per_cell=1, dtype=np.float64, latex_name=None, ghost_layers=None, layout=None, + cpu=True, gpu=None, alignment=False, field_type=FieldType.GENERIC): + + super().add_array(name, + values_per_cell, + dtype, + latex_name, + ghost_layers, + layout, + cpu, + gpu, + alignment, + field_type) + if cpu: + self.call_queue.append(DataTransfer(self._fields[name], DataTransferKind.HOST_ALLOC)) + if gpu: + self.call_queue.append(DataTransfer(self._fields[name], DataTransferKind.DEVICE_ALLOC)) + + def add_custom_data(self, name, cpu_creation_function, + gpu_creation_function=None, cpu_to_gpu_transfer_func=None, gpu_to_cpu_transfer_func=None): + + self.call_queue.append('custom data. WTF?') + super().add_custom_data(name, cpu_creation_function, + gpu_creation_function, cpu_to_gpu_transfer_func, gpu_to_cpu_transfer_func) + + def gather_array(self, name, slice_obj=None, ghost_layers=False, **kwargs): + + self.call_queue.append('gather_array') + super().gather_array(name, slice_obj, ghost_layers, **kwargs) + + def swap(self, name1, name2, gpu=None): + + self.call_queue.append(Swap(self._fields[name1], self._fields[name2], gpu)) + super().swap(name1, name2, gpu) + + def run_kernel(self, kernel_function, **kwargs): + self.call_queue.append(KernelCall(kernel_function, kwargs)) + super().run_kernel(kernel_function, **kwargs) + # skip calling super + + def to_cpu(self, name): + super().to_cpu(name) + self.call_queue.append(DataTransfer(self._fields[name], DataTransferKind.HOST_TO_DEVICE)) + + def to_gpu(self, name): + super().to_gpu(name) + if name in self._custom_data_transfer_functions: + self.call_queue.append('Custom Tranfer Function') + else: + self.call_queue.append(DataTransfer(self._fields[name], DataTransferKind.DEVICE_TO_HOST)) + + def synchronization_function(self, names, stencil=None, target=None, **_): + for name in names: + gpu = target == 'gpu' + self.call_queue.append(Communication(self._fields[name], stencil, gpu)) + super().synchronization_function(names, stencil=None, target=None, **_) + + def __str__(self): + return '\n'.join(str(c) for c in self.call_queue) + + def create_timeloop(self, *args, **kwargs): + return self.TimeLoop(self, *args, **kwargs) + + def fill(self, array_name: str, val, value_idx=None, + slice_obj=None, ghost_layers=False, inner_ghost_layers=False) -> None: + self.call_queue.append('Fill ' + array_name) + super().fill(array_name, val, value_idx, slice_obj, ghost_layers, inner_ghost_layers) + + # TODO + # def reduce_float_sequence(self, sequence, operation, all_reduce=False) -> np.array: + # return np.array(sequence) + + # def reduce_int_sequence(self, sequence, operation, all_reduce=False) -> np.array: + # return np.array(sequence) + + # def create_vtk_writer(self, file_name, data_names, ghost_layers=False): + # pass + + # def create_vtk_writer_for_flag_array(self, file_name, data_name, masks_to_name, ghost_layers=False): + # pass diff --git a/src/pystencils_autodiff/timeloop_astnodes.py b/src/pystencils_autodiff/timeloop_astnodes.py new file mode 100644 index 0000000000000000000000000000000000000000..81518e0da3170facdabfb0fc29e59628f80edc57 --- /dev/null +++ b/src/pystencils_autodiff/timeloop_astnodes.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- +# +# Copyright © 2020 Stephan Seitz <stephan.seitz@fau.de> +# +# Distributed under terms of the GPLv3 license. + +""" + +""" + +import jinja2 +import sympy as sp + +from pystencils.astnodes import Node +from pystencils_autodiff.framework_integration.astnodes import FunctionCall, JinjaCppFile + + +class CustomNode(Node): + """Own custom base class for all AST nodes.""" + + def __init__(self, children): + self.children = children + + @property + def args(self): + """Returns all arguments/children of this node.""" + return self.children + + @property + def symbols_defined(self): + return set() + + @property + def undefined_symbols(self): + undefined = [] + + for c in self.args: + if hasattr(c, 'undefined_symbols'): + undefined.extend(c.undefined_symbols) + else: + try: + c = sp.sympify(c) + undefined.extend(c.free_symbols) + except Exception: + pass + + return set(undefined) - self.symbols_defined + + +class Namespace(CustomNode): + """Base class for all AST nodes.""" + + def __init__(self, name, children): + self.name = name + self.children = children + + @property + def undefined_symbols(self): + undefined = [] + + for c in self.children: + if hasattr(c, 'undefined_symbols'): + undefined.extend(c.undefined_symbols) + + return set(undefined) + + +class Timeloop(CustomNode): + + def __init__(self, loop_symbol, loop_start, loop_end, loop_increment, children: FunctionCall, kernelAstArr=[]): + self.children = children + self.loop_symbol = loop_symbol + self.loop_start = loop_start + self.loop_end = loop_end + self.loop_increment = loop_increment + self.headers = ["<iostream>"] + + self.kernelAstArr = kernelAstArr + + @property + def args(self): + """Returns all arguments/children of this node.""" + return (*self.children, + self.loop_symbol, + self.loop_start, + self.loop_end, + self.loop_increment + ) + + def symbolic_loop_args(self) -> set: + """ Returns all loop arguments which are not resolved """ + loopArg = [self.loop_start, + self.loop_end, + self.loop_increment] + symboliArg = set() + + for arg in loopArg: + if isinstance(arg, sp.Symbol): + symboliArg.add(arg) + + return symboliArg + + @property + def symbols_defined(self): + return {self.loop_symbol} + + def atoms(self, arg_type): + """Returns a set of all descendants recursively, which are an instance of the given type.""" + result = set() + for arg in self.args: + if isinstance(arg, arg_type): + result.add(arg) + if hasattr(arg, 'atoms'): + result.update(arg.atoms(arg_type)) + return result + + +class CppNamespace(JinjaCppFile): + + TEMPLATE = jinja2.Template(""" +namespace {{ name }} { +{% for c in children %} + {{ c | indent(3) }} +{% endfor %} +}""") + + def __init__(self, name, children): + + ast_dict = { + 'name': name, + 'children': children + } + + super().__init__(ast_dict) + + +class SwapBuffer(CustomNode): + def __init__(self, first_array, second_array): + self.first_array = first_array + self.second_array = second_array + self.headers = ["<utility>"] + + @property + def args(self): + return (self.first_array, self.second_array) + diff --git a/tests/test_graph_datahandling.py b/tests/test_graph_datahandling.py new file mode 100644 index 0000000000000000000000000000000000000000..5b22070b8231b9ecca3e31b106ae93de690b3e48 --- /dev/null +++ b/tests/test_graph_datahandling.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- +# +# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de> +# +# Distributed under terms of the GPLv3 license. + +""" + +""" + +import pytest + +try: + from lbmpy.boundaries import UBB, NoSlip + from lbmpy.lbstep import LatticeBoltzmannStep + from pystencils_autodiff.graph_datahandling import GraphDataHandling + from pystencils.slicing import slice_from_direction +except ImportError: + pass + +pytest.importorskip('lbmpy') + + +def create_lid_driven_cavity(domain_size=None, lid_velocity=0.005, lbm_kernel=None, + data_handling=None, **kwargs): + """Creates a lid driven cavity scenario. + + Args: + domain_size: tuple specifying the number of cells in each dimension + lid_velocity: x velocity of lid in lattice coordinates. + lbm_kernel: a LBM function, which would otherwise automatically created + kwargs: other parameters are passed on to the method, see :mod:`lbmpy.creationfunctions` + parallel: True for distributed memory parallelization with walberla + data_handling: see documentation of :func:`create_fully_periodic_flow` + Returns: + instance of :class:`Scenario` + """ + assert domain_size is not None or data_handling is not None + if data_handling is None: + optimization = kwargs.get('optimization', None) + target = optimization.get('target', None) if optimization else None + data_handling = GraphDataHandling(domain_size, + periodicity=False, + default_ghost_layers=1, + default_target=target) + step = LatticeBoltzmannStep(data_handling=data_handling, + lbm_kernel=lbm_kernel, + name="ldc", + timeloop_creation_function=data_handling.create_timeloop, + **kwargs) + + my_ubb = UBB(velocity=[lid_velocity, 0, 0][:step.method.dim]) + step.boundary_handling.set_boundary(my_ubb, slice_from_direction('N', step.dim)) + for direction in ('W', 'E', 'S') if step.dim == 2 else ('W', 'E', 'S', 'T', 'B'): + step.boundary_handling.set_boundary(NoSlip(), slice_from_direction(direction, step.dim)) + + return step + + +def ldc_setup(**kwargs): + ldc = create_lid_driven_cavity(relaxation_rate=1.7, **kwargs) + ldc.run(50) + return ldc + + +def test_graph_datahandling(): + + print("--- LDC 2D test ---") + + opt_params = {'target': 'gpu', 'gpu_indexing_params': {'block_size': (8, 4, 2)}} + lbm_step: LatticeBoltzmannStep = ldc_setup(domain_size=(10, 15), optimization=opt_params) + print(lbm_step._data_handling) + + print(lbm_step._data_handling.call_queue)