diff --git a/__init__.py b/__init__.py index dba04d0b3a546be8ae58e1f67cd32f66bee145a3..5d8aab2a4562ffd1c67cc69bda395f6875d38839 100644 --- a/__init__.py +++ b/__init__.py @@ -1,4 +1,5 @@ -from .sweep import Sweep -from .generate_packinfo import generate_pack_info_from_kernel, generate_pack_info_for_field +from .codegen import generate_sweep, generate_pack_info_from_kernel, generate_pack_info_for_field, generate_pack_info +from .cmake_integration import CodeGeneration -__all__ = ['Sweep', 'generate_pack_info_from_kernel', 'generate_pack_info_for_field'] +__all__ = ['CodeGeneration', + 'generate_sweep', 'generate_pack_info_from_kernel', 'generate_pack_info_for_field', 'generate_pack_info'] diff --git a/cmake_integration.py b/cmake_integration.py index d3a69feb2bb41e1c22a621fd54ab0a02fe782310..2ce021dff210ecf0f1e8248b55e75c3715ccd0ec 100644 --- a/cmake_integration.py +++ b/cmake_integration.py @@ -9,71 +9,70 @@ Usage example: codegen.register(['MyClass.h', 'MyClass.cpp'], functionReturningTwoStringsForHeaderAndCpp) """ -import atexit -from argparse import ArgumentParser +import json +import sys +import os +import warnings +__all__ = ['CodeGeneration'] -class CodeGeneratorCMakeIntegration: - +class CodeGeneration: def __init__(self): - self._registeredGenerators = [] - - def register(self, files, generation_function): - """ - Register function that generates on or more source files - :param files: paths of files to generate - :param generation_function: function that returns a tuple of string with the file contents - returned tuple has to have as many entries as files - """ - self._registeredGenerators.append((files, generation_function)) - - @property - def generated_files(self): - return sum((e[0] for e in self._registeredGenerators), []) - - def get_generators(self): - """Returns a dict mapping filename to function that generates code for this file""" - return {tuple(a): b for a, b in self._registeredGenerators} - - def generate(self): - for paths, generator_function in self._registeredGenerators: - files = generator_function() - assert len(files) == len(paths), "Registered generator function does not return expected amount of files" - for path, file in zip(paths, files): - with open(path, 'w') as f: - f.write(file) - - -codegen = CodeGeneratorCMakeIntegration() - - -def main(): - from pystencils.gpucuda.indexing import AUTO_BLOCK_SIZE_LIMITING - - # prevent automatic block size detection of CUDA generation module - # this would import pycuda, which might not be available, and if it is available problems occur - # since we use atexit and pycuda does as well, leading to a tear-down problem - previous_block_size_limiting_state = AUTO_BLOCK_SIZE_LIMITING - AUTO_BLOCKSIZE_LIMITING = False - - parser = ArgumentParser() - parser.add_argument("-l", "--list-output-files", action='store_true', default=False, - help="Prints a list of files this script generates instead of generating them") - parser.add_argument("-g", "--generate", action='store_true', default=False, - help="Generates the files") - - args = parser.parse_args() - if args.list_output_files: - print(";".join(codegen.generated_files)) - elif args.generate: - codegen.generate() + expected_files, cmake_vars = parse_json_args() + self.context = CodeGenerationContext(cmake_vars) + self.expected_files = expected_files + + def __enter__(self): + return self.context + + def __exit__(self, *args): + if self.expected_files and (set(self.context.files_written) != set(self.expected_files)): + only_in_cmake = set(self.expected_files) - set(self.context.files_written) + only_generated = set(self.context.files_written) - set(self.expected_files) + error_message = "Generated files specified not correctly in cmake with 'waLBerla_python_file_generates'\n" + if only_in_cmake: + error_message += "Files only specified in CMake {}\n".format([os.path.basename(p) for p in only_in_cmake]) + if only_generated: + error_message += "Unexpected generated files {}\n".format([os.path.basename(p) for p in only_generated]) + raise ValueError(error_message) + + +def parse_json_args(): + default = {'EXPECTED_FILES': [], + 'CMAKE_VARS': {'WALBERLA_BUILD_WITH_OPENMP': False, + 'WALBERLA_OPTIMIZE_FOR_LOCALHOST': False, + 'WALBERLA_DOUBLE_ACCURACY': True, + 'WALBERLA_BUILD_WITH_MPI': True} + } + + if len(sys.argv) == 2: + try: + parsed = json.loads(sys.argv[1]) + except json.JSONDecodeError: + warnings.warn("Could not parse JSON arguments") + parsed = default else: - parser.print_help() - AUTO_BLOCKSIZE_LIMITING = previous_block_size_limiting_state - - -def do_not_run_generation_at_exit(): - atexit.unregister(main) - - -atexit.register(main) + parsed = default + expected_files = parsed['EXPECTED_FILES'] + cmake_vars = {} + for key, value in parsed['CMAKE_VARS'].items(): + if value in ("ON", "1", "YES"): + value = True + elif value in ("OFF", "0", "NO"): + value = False + cmake_vars[key] = value + return expected_files, cmake_vars + + +class CodeGenerationContext: + def __init__(self, cmake_vars): + self.files_written = [] + self.openmp = cmake_vars['WALBERLA_BUILD_WITH_OPENMP'] + self.optimize_for_localhost = cmake_vars['WALBERLA_OPTIMIZE_FOR_LOCALHOST'] + self.mpi = cmake_vars['WALBERLA_BUILD_WITH_MPI'] + self.double_accuracy = cmake_vars['WALBERLA_DOUBLE_ACCURACY'] + + def write_file(self, name, content): + self.files_written.append(os.path.abspath(name)) + with open(name, 'w') as f: + f.write(content) diff --git a/codegen.py b/codegen.py new file mode 100644 index 0000000000000000000000000000000000000000..6bad99bd4d7dbe35ad92f4c61c3032b9de506a70 --- /dev/null +++ b/codegen.py @@ -0,0 +1,209 @@ +from jinja2 import Environment, PackageLoader +from collections import OrderedDict, defaultdict +from itertools import product +from typing import Dict, Sequence, Tuple, Optional + +from pystencils import create_staggered_kernel, Field, create_kernel, Assignment, FieldType +from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets +from pystencils.stencils import offset_to_direction_string, inverse_direction +from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env + + +__all__ = ['generate_sweep', 'generate_pack_info', 'generate_pack_info_for_field', 'generate_pack_info_from_kernel', + 'default_create_kernel_parameters', 'KernelInfo'] + + +def generate_sweep(generation_context, class_name, assignments, + namespace='pystencils', field_swaps=(), staggered=False, varying_parameters=(), + inner_outer_split=False, + **create_kernel_params): + if hasattr(assignments, 'all_assignments'): + assignments = assignments.all_assignments + + create_kernel_params = default_create_kernel_parameters(generation_context, create_kernel_params) + + if not staggered: + ast = create_kernel(assignments, **create_kernel_params) + else: + ast = create_staggered_kernel(*assignments, **create_kernel_params) + + def to_name(f): + return f.name if isinstance(f, Field) else f + + field_swaps = tuple((to_name(e[0]), to_name(e[1])) for e in field_swaps) + temporary_fields = tuple(e[1] for e in field_swaps) + + ast.function_name = class_name.lower() + + env = Environment(loader=PackageLoader('pystencils_walberla')) + add_pystencils_filters_to_jinja_env(env) + + if inner_outer_split is False: + jinja_context = { + 'kernel': KernelInfo(ast, temporary_fields, field_swaps, varying_parameters), + 'namespace': namespace, + 'class_name': class_name, + 'target': create_kernel_params.get("target", "cpu"), + } + header = env.get_template("Sweep.tmpl.h").render(**jinja_context) + source = env.get_template("Sweep.tmpl.cpp").render(**jinja_context) + else: + main_kernel_info = KernelInfo(ast, temporary_fields, field_swaps, varying_parameters) + representative_field = {p.field_name for p in main_kernel_info.parameters if p.is_field_parameter} + representative_field = sorted(representative_field)[0] + + jinja_context = { + 'kernel': main_kernel_info, + 'namespace': namespace, + 'class_name': class_name, + 'target': create_kernel_params.get("target", "cpu"), + 'field': representative_field, + } + header = env.get_template("SweepInnerOuter.tmpl.h").render(**jinja_context) + source = env.get_template("SweepInnerOuter.tmpl.cpp").render(**jinja_context) + + source_extension = "cpp" if create_kernel_params.get("target", "cpu") == "cpu" else "cu" + generation_context.write_file("{}.h".format(class_name), header) + generation_context.write_file("{}.{}".format(class_name, source_extension), source) + + +def generate_pack_info_for_field(generation_context, class_name: str, field: Field, + direction_subset: Optional[Tuple[Tuple[int, int, int]]] = None, + **create_kernel_params): + if not direction_subset: + direction_subset = tuple((i, j, k) for i, j, k in product(*[(-1, 0, 1)] * 3)) + + all_index_accesses = [field(*ind) for ind in product(*[range(s) for s in field.index_shape])] + return generate_pack_info(generation_context, class_name, {direction_subset: all_index_accesses}, + **create_kernel_params) + + +def generate_pack_info_from_kernel(generation_context, class_name: str, assignments: Sequence[Assignment], + **create_kernel_params): + reads = set() + for a in assignments: + reads.update(a.rhs.atoms(Field.Access)) + spec = defaultdict(set) + for fa in reads: + assert all(abs(e) <= 1 for e in fa.offsets) + for comm_dir in comm_directions(fa.offsets): + spec[(comm_dir,)].add(fa.field.center(*fa.index)) + return generate_pack_info(generation_context, class_name, spec, **create_kernel_params) + + +def generate_pack_info(generation_context, class_name: str, + directions_to_pack_terms: Dict[Tuple[Tuple], Sequence[Field.Access]], + namespace='pystencils', + **create_kernel_params): + items = [(e[0], sorted(e[1], key=lambda x: str(x))) for e in directions_to_pack_terms.items()] + items = sorted(items, key=lambda e: e[0]) + directions_to_pack_terms = OrderedDict(items) + + create_kernel_params = default_create_kernel_parameters(generation_context, create_kernel_params) + target = create_kernel_params.get('target', 'cpu') + + fields_accessed = set() + for terms in directions_to_pack_terms.values(): + for term in terms: + assert isinstance(term, Field.Access) and all(e == 0 for e in term.offsets) + fields_accessed.add(term) + + field_names = {fa.field.name for fa in fields_accessed} + + data_types = {fa.field.dtype for fa in fields_accessed} + if len(data_types) != 1: + raise NotImplementedError("Fields of different data types are used - this is not supported") + dtype = data_types.pop() + + pack_kernels = OrderedDict() + unpack_kernels = OrderedDict() + all_accesses = set() + elements_per_cell = OrderedDict() + for direction_set, terms in directions_to_pack_terms.items(): + for d in direction_set: + if not all(abs(i) <= 1 for i in d): + raise NotImplementedError("Only first neighborhood supported") + + buffer = Field.create_generic('buffer', spatial_dimensions=1, field_type=FieldType.BUFFER, + dtype=dtype.numpy_dtype, index_shape=(len(terms),)) + + direction_strings = tuple(offset_to_direction_string(d) for d in direction_set) + inv_direction_string = tuple(offset_to_direction_string(inverse_direction(d)) for d in direction_set) + all_accesses.update(terms) + + pack_ast = create_kernel([Assignment(buffer(i), term) for i, term in enumerate(terms)], + **create_kernel_params) + pack_ast.function_name = 'pack_{}'.format("_".join(direction_strings)) + unpack_ast = create_kernel([Assignment(term, buffer(i)) for i, term in enumerate(terms)], + **create_kernel_params) + unpack_ast.function_name = 'unpack_{}'.format("_".join(inv_direction_string)) + + pack_kernels[direction_strings] = KernelInfo(pack_ast) + unpack_kernels[inv_direction_string] = KernelInfo(unpack_ast) + elements_per_cell[direction_strings] = len(terms) + + fused_kernel = create_kernel([Assignment(buffer.center, t) for t in all_accesses], **create_kernel_params) + + jinja_context = { + 'class_name': class_name, + 'pack_kernels': pack_kernels, + 'unpack_kernels': unpack_kernels, + 'fused_kernel': KernelInfo(fused_kernel), + 'elements_per_cell': elements_per_cell, + 'target': target, + 'dtype': dtype, + 'field_name': field_names.pop(), + 'namespace': namespace, + } + + env = Environment(loader=PackageLoader('pystencils_walberla')) + add_pystencils_filters_to_jinja_env(env) + header = env.get_template("GpuPackInfo.tmpl.h").render(**jinja_context) + source = env.get_template("GpuPackInfo.tmpl.cpp").render(**jinja_context) + + source_extension = "cpp" if create_kernel_params.get("target", "cpu") == "cpu" else "cu" + generation_context.write_file("{}.h".format(class_name), header) + generation_context.write_file("{}.{}".format(class_name, source_extension), source) + + +# ---------------------------------- Internal -------------------------------------------------------------------------- + + +class KernelInfo: + def __init__(self, ast, temporary_fields=(), field_swaps=(), varying_parameters=()): + self.ast = ast + self.temporary_fields = tuple(temporary_fields) + self.field_swaps = tuple(field_swaps) + self.varying_parameters = tuple(varying_parameters) + self.parameters = ast.get_parameters() # cache parameters here + + +def default_create_kernel_parameters(generation_context, params): + default_dtype = "float64" if generation_context.double_accuracy else 'float32' + + if generation_context.optimize_for_localhost: + default_vec_is = get_supported_instruction_sets()[-1] + else: + default_vec_is = None + + params['target'] = params.get('target', 'cpu') + params['data_type'] = params.get('data_type', default_dtype) + params['cpu_openmp'] = params.get('cpu_openmp', generation_context.openmp) + params['cpu_vectorize_info'] = params.get('cpu_vectorize_info', {}) + + vec = params['cpu_vectorize_info'] + vec['instruction_set'] = vec.get('instruction_set', default_vec_is) + vec['assume_aligned'] = vec.get('assume_aligned', False) + vec['nontemporal'] = vec.get('nontemporal', False) + return params + + +def comm_directions(direction): + direction = inverse_direction(direction) + yield direction + for i in range(len(direction)): + if direction[i] != 0: + dir_as_list = list(direction) + dir_as_list[i] = 0 + if not all(e == 0 for e in dir_as_list): + yield tuple(dir_as_list) diff --git a/generate_packinfo.py b/generate_packinfo.py deleted file mode 100644 index 6da4056fa96eb70f137356ac55a2cf65c15c3dc8..0000000000000000000000000000000000000000 --- a/generate_packinfo.py +++ /dev/null @@ -1,112 +0,0 @@ -from itertools import product -from collections import defaultdict, OrderedDict -from typing import Dict, Sequence, Tuple, Optional -from jinja2 import Environment, PackageLoader -from pystencils import Field, FieldType, Assignment, create_kernel -from pystencils.stencils import offset_to_direction_string, inverse_direction -from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env -from pystencils_walberla.sweep import KernelInfo - - -def comm_directions(direction): - direction = inverse_direction(direction) - yield direction - for i in range(len(direction)): - if direction[i] != 0: - dir_as_list = list(direction) - dir_as_list[i] = 0 - if not all(e == 0 for e in dir_as_list): - yield tuple(dir_as_list) - - -def generate_pack_info_for_field(class_name: str, field: Field, - direction_subset: Optional[Tuple[Tuple[int, int, int]]] = None, - **create_kernel_params): - if not direction_subset: - direction_subset = tuple((i, j, k) for i, j, k in product(*[(-1, 0, 1)] * 3)) - - all_index_accesses = [field(*ind) for ind in product(*[range(s) for s in field.index_shape])] - return generate_pack_info(class_name, {direction_subset: all_index_accesses}, **create_kernel_params) - - -def generate_pack_info_from_kernel(class_name: str, assignments: Sequence[Assignment], **create_kernel_params): - reads = set() - for a in assignments: - reads.update(a.rhs.atoms(Field.Access)) - spec = defaultdict(set) - for fa in reads: - assert all(abs(e) <= 1 for e in fa.offsets) - for comm_dir in comm_directions(fa.offsets): - spec[(comm_dir,)].add(fa.field.center(*fa.index)) - return generate_pack_info(class_name, spec, **create_kernel_params) - - -def generate_pack_info(class_name: str, - directions_to_pack_terms: Dict[Tuple[Tuple], Sequence[Field.Access]], - namespace='pystencils', - **create_kernel_params): - - items = [(e[0], sorted(e[1], key=lambda x: str(x))) for e in directions_to_pack_terms.items()] - items = sorted(items, key=lambda e: e[0]) - directions_to_pack_terms = OrderedDict(items) - target = create_kernel_params.get('target', 'cpu') - - fields_accessed = set() - for terms in directions_to_pack_terms.values(): - for term in terms: - assert isinstance(term, Field.Access) and all(e == 0 for e in term.offsets) - fields_accessed.add(term) - - field_names = {fa.field.name for fa in fields_accessed} - - data_types = {fa.field.dtype for fa in fields_accessed} - if len(data_types) != 1: - raise NotImplementedError("Fields of different data types are used - this is not supported") - dtype = data_types.pop() - - pack_kernels = OrderedDict() - unpack_kernels = OrderedDict() - all_accesses = set() - elements_per_cell = OrderedDict() - for direction_set, terms in directions_to_pack_terms.items(): - for d in direction_set: - if not all(abs(i) <= 1 for i in d): - raise NotImplementedError("Only first neighborhood supported") - - buffer = Field.create_generic('buffer', spatial_dimensions=1, field_type=FieldType.BUFFER, - dtype=dtype.numpy_dtype, index_shape=(len(terms),)) - - direction_strings = tuple(offset_to_direction_string(d) for d in direction_set) - inv_direction_string = tuple(offset_to_direction_string(inverse_direction(d)) for d in direction_set) - all_accesses.update(terms) - - pack_ast = create_kernel([Assignment(buffer(i), term) for i, term in enumerate(terms)], - **create_kernel_params) - pack_ast.function_name = 'pack_{}'.format("_".join(direction_strings)) - unpack_ast = create_kernel([Assignment(term, buffer(i)) for i, term in enumerate(terms)], - **create_kernel_params) - unpack_ast.function_name = 'unpack_{}'.format("_".join(inv_direction_string)) - - pack_kernels[direction_strings] = KernelInfo(pack_ast) - unpack_kernels[inv_direction_string] = KernelInfo(unpack_ast) - elements_per_cell[direction_strings] = len(terms) - - fused_kernel = create_kernel([Assignment(buffer.center, t) for t in all_accesses], **create_kernel_params) - - context = { - 'class_name': class_name, - 'pack_kernels': pack_kernels, - 'unpack_kernels': unpack_kernels, - 'fused_kernel': KernelInfo(fused_kernel), - 'elements_per_cell': elements_per_cell, - 'target': target, - 'dtype': dtype, - 'field_name': field_names.pop(), - 'namespace': namespace, - } - - env = Environment(loader=PackageLoader('pystencils_walberla')) - add_pystencils_filters_to_jinja_env(env) - header = env.get_template("GpuPackInfo.tmpl.h").render(**context) - source = env.get_template("GpuPackInfo.tmpl.cpp").render(**context) - return header, source diff --git a/jinja_filters.py b/jinja_filters.py index 0dac492aa6eadb4e9bdadf2cb6b19213f3f2ea95..b96ba4bd15ed06e864a89cfca64030ec0f8eea36 100644 --- a/jinja_filters.py +++ b/jinja_filters.py @@ -209,11 +209,11 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non if field.field_type == FieldType.BUFFER: kernel_call_lines.append("%s %s = %s;" % (param.symbol.dtype, param.symbol.name, param.field_name)) else: - coordinates = set(get_start_coordinates(field)) - coordinates = sorted(coordinates, key=lambda e: str(e)) + coordinates = get_start_coordinates(field) actual_gls = "int_c(%s->nrOfGhostLayers())" % (param.field_name, ) - - for c in coordinates: + coord_set = set(coordinates) + coord_set = sorted(coord_set, key=lambda e: str(e)) + for c in coord_set: kernel_call_lines.append("WALBERLA_ASSERT_GREATER_EQUAL(%s, -%s);" % (c, actual_gls)) while len(coordinates) < 4: diff --git a/sweep.py b/sweep.py deleted file mode 100644 index 1720b3bb9ab94f15b30e7845951310483efc72af..0000000000000000000000000000000000000000 --- a/sweep.py +++ /dev/null @@ -1,150 +0,0 @@ -import sympy as sp -import functools -from jinja2 import Environment, PackageLoader - -from pystencils import kernel as kernel_decorator, create_staggered_kernel -from pystencils import Field, SymbolCreator, create_kernel -from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env - - -class KernelInfo: - def __init__(self, ast, temporary_fields=(), field_swaps=(), varying_parameters=()): - self.ast = ast - self.temporary_fields = tuple(temporary_fields) - self.field_swaps = tuple(field_swaps) - self.varying_parameters = tuple(varying_parameters) - self.parameters = ast.get_parameters() # cache parameters here - - -class Sweep: - const = SymbolCreator() - - def __init__(self, dim=3, f_size=None): - self.dim = dim - self.f_size = f_size - self._field_swaps = [] - self._temporary_fields = [] - - @staticmethod - def constant(name): - """Create a symbolic constant that is passed to the sweep as a parameter""" - return sp.Symbol(name) - - def field(self, name, f_size=None, dtype='float64'): - """Create a symbolic field that is passed to the sweep as BlockDataID""" - # layout does not matter, since it is only used to determine order of spatial loops i.e. zyx, which is - # always the same in walberla - if self.dim is None: - raise ValueError("Set the dimension of the sweep first, e.g. sweep.dim=3") - return Field.create_generic(name, dtype=dtype, spatial_dimensions=self.dim, index_dimensions=1 if f_size else 0, - layout='fzyx', index_shape=(f_size,) if f_size else None) - - def temporary_field(self, field, tmp_field_name=None, dtype='float64'): - """Creates a temporary field as clone of field, which is swapped at the end of the sweep""" - if tmp_field_name is None: - tmp_field_name = field.name + "_tmp" - self._temporary_fields.append(tmp_field_name) - self._field_swaps.append((tmp_field_name, field.name)) - return Field.create_generic(tmp_field_name, dtype=dtype, spatial_dimensions=field.spatial_dimensions, - index_dimensions=field.index_dimensions, layout=field.layout, - index_shape=field.index_shape) - - @staticmethod - def generate(name, sweep_function, namespace='pystencils', target='cpu', - dim=None, f_size=None, optimization={},): - from pystencils_walberla.cmake_integration import codegen - sweep = Sweep(dim, f_size) - - func = functools.partial(kernel_decorator, sweep_function, sweep=sweep) - cb = functools.partial(Sweep._generate_header_and_source, func, name, target, - namespace, sweep._temporary_fields, sweep._field_swaps, optimization=optimization, - staggered=False, varying_parameters=[]) - - file_names = [name + ".h", name + ('.cpp' if target == 'cpu' else '.cu')] - codegen.register(file_names, cb) - - @staticmethod - def generate_from_equations(name, function_returning_assignments, temporary_fields=[], field_swaps=[], - namespace="pystencils", target='cpu', optimization={}, - staggered=False, varying_parameters=[], **kwargs): - - cb = functools.partial(Sweep._generate_header_and_source, function_returning_assignments, name, target, - namespace, temporary_fields, field_swaps, - optimization=optimization, staggered=staggered, - varying_parameters=varying_parameters, **kwargs) - - file_names = [name + ".h", name + ('.cpp' if target == 'cpu' else '.cu')] - from pystencils_walberla.cmake_integration import codegen - codegen.register(file_names, cb) - - @staticmethod - def generate_inner_outer_kernel(name, function_returning_assignments, temporary_fields=[], field_swaps=[], - namespace="pystencils", target='cpu', optimization={}, outer_optimization={}, - varying_parameters=[], **kwargs): - def generate_callback(): - eqs = function_returning_assignments(**kwargs) - - ast = create_kernel(eqs, target=target, **optimization) - ast.function_name = name - - env = Environment(loader=PackageLoader('pystencils_walberla')) - add_pystencils_filters_to_jinja_env(env) - - main_kernel_info = KernelInfo(ast, temporary_fields, field_swaps, varying_parameters) - representative_field = {p.field_name for p in main_kernel_info.parameters if p.is_field_parameter} - representative_field = sorted(representative_field)[0] - - context = { - 'kernel': main_kernel_info, - 'namespace': namespace, - 'class_name': ast.function_name[0].upper() + ast.function_name[1:], - 'target': target, - 'field': representative_field, - } - - header = env.get_template("SweepInnerOuter.tmpl.h").render(**context) - source = env.get_template("SweepInnerOuter.tmpl.cpp").render(**context) - return header, source - - file_names = [name + ".h", name + ('.cpp' if target == 'cpu' else '.cu')] - from pystencils_walberla.cmake_integration import codegen - codegen.register(file_names, generate_callback) - - @staticmethod - def generate_pack_info(name, function_returning_assignments, target='gpu', **kwargs): - from pystencils_walberla.cmake_integration import codegen - - def callback(): - from pystencils_walberla.generate_packinfo import generate_pack_info_from_kernel - assignments = function_returning_assignments() - return generate_pack_info_from_kernel(name, assignments, target=target, **kwargs) - - file_names = [name + ".h", name + ('.cpp' if target == 'cpu' else '.cu')] - codegen.register(file_names, callback) - - @staticmethod - def _generate_header_and_source(function_returning_assignments, name, target, namespace, - temporary_fields, field_swaps, optimization, staggered, - varying_parameters, **kwargs): - eqs = function_returning_assignments(**kwargs) - - if not staggered: - ast = create_kernel(eqs, target=target, **optimization) - else: - ast = create_staggered_kernel(*eqs, target=target, **optimization) - ast.function_name = name - - env = Environment(loader=PackageLoader('pystencils_walberla')) - add_pystencils_filters_to_jinja_env(env) - - context = { - 'kernel': KernelInfo(ast, temporary_fields, field_swaps, varying_parameters), - 'namespace': namespace, - 'class_name': ast.function_name[0].upper() + ast.function_name[1:], - 'target': target, - } - - header = env.get_template("Sweep.tmpl.h").render(**context) - source = env.get_template("Sweep.tmpl.cpp").render(**context) - return header, source - diff --git a/templates/Sweep.tmpl.h b/templates/Sweep.tmpl.h index bda958e8fff1fcebcd4d723bee17ecfb727c7f3d..af3879c12681ca35b16137913e0bdd0f9c3c116d 100644 --- a/templates/Sweep.tmpl.h +++ b/templates/Sweep.tmpl.h @@ -51,7 +51,7 @@ class {{class_name}} { public: {{class_name}}( {{kernel|generate_constructor_parameters}}{%if target is equalto 'gpu'%} , cudaStream_t stream = 0{% endif %}) - : {{ kernel|generate_constructor_initializer_list }}, stream_(stream) + : {{ kernel|generate_constructor_initializer_list }}{%if target is equalto 'gpu'%}, stream_(stream) {%endif %} {}; {{ kernel| generate_destructor(class_name) |indent(4) }}