diff --git a/apps/benchmarks/UniformGridCPU/UniformGridCPU.py b/apps/benchmarks/UniformGridCPU/UniformGridCPU.py index 5abbdef5616ad0aa277dbb3c40d9d3d177d11514..4a55c4d7bd8116e457d76894fa5947b5521e6eb6 100644 --- a/apps/benchmarks/UniformGridCPU/UniformGridCPU.py +++ b/apps/benchmarks/UniformGridCPU/UniformGridCPU.py @@ -94,7 +94,7 @@ const bool infoCsePdfs = {cse_pdfs}; # DEFAULTS optimize = True - + with CodeGeneration() as ctx: openmp = True if ctx.openmp else False field_type = "float64" if ctx.double_accuracy else "float32" diff --git a/python/lbmpy_walberla/sweep_collection.py b/python/lbmpy_walberla/sweep_collection.py index 7c395af38f8f7488dfdaaa77fe63c719788c97dd..1293f063085bd67d11fe6ff9fbe2f0ab131a3aac 100644 --- a/python/lbmpy_walberla/sweep_collection.py +++ b/python/lbmpy_walberla/sweep_collection.py @@ -112,8 +112,9 @@ def lbm_kernel_family(class_name, kernel_name, def lbm_kernel(field_accessor, lb_stencil): return create_lbm_kernel(collision_rule, src_field, dst_field, field_accessor) advance_timestep = {"field_name": src_field.name, "function": "advanceTimestep"} - field_swaps.append(('pdfs', 'pdfs_tmp')) - temporary_fields.append('pdfs_tmp') + if not is_inplace(streaming_pattern): + field_swaps.append(('pdfs', 'pdfs_tmp')) + temporary_fields.append('pdfs_tmp') field_swaps.sort() temporary_fields.sort() elif kernel_name == "collide": @@ -126,14 +127,22 @@ def lbm_kernel_family(class_name, kernel_name, def lbm_kernel(field_accessor, lb_stencil): return create_stream_only_kernel(lb_stencil, src_field, dst_field, field_accessor) advance_timestep = {"field_name": src_field.name, "function": "advanceTimestep"} - field_swaps = [('pdfs', 'pdfs_tmp')] - temporary_fields = ['pdfs_tmp'] + if is_inplace(streaming_pattern): + field_swaps = () + temporary_fields = () + else: + field_swaps = [('pdfs', 'pdfs_tmp')] + temporary_fields = ['pdfs_tmp'] elif kernel_name == "streamOnlyNoAdvancement": def lbm_kernel(field_accessor, lb_stencil): return create_stream_only_kernel(lb_stencil, src_field, dst_field, field_accessor) advance_timestep = {"field_name": src_field.name, "function": "getTimestepPlusOne"} - field_swaps = () - temporary_fields = ['pdfs_tmp'] + if is_inplace(streaming_pattern): + field_swaps = () + temporary_fields = () + else: + field_swaps = () + temporary_fields = ['pdfs_tmp'] else: raise ValueError(f"kernel name: {kernel_name} is not valid") @@ -153,7 +162,8 @@ def lbm_kernel_family(class_name, kernel_name, nodes.append(KernelCallNode(ast)) tree = EvenIntegerCondition('timestep', nodes[0], nodes[1], parameter_dtype=np.uint8) - family = KernelFamily(tree, class_name, field_timestep=advance_timestep) + family = KernelFamily(tree, class_name, field_timestep=advance_timestep, + temporary_fields=temporary_fields, field_swaps=field_swaps) else: timestep = Timestep.BOTH accessor = get_accessor(streaming_pattern, timestep)