Skip to content
Snippets Groups Projects
Commit aae404b1 authored by Stephan Seitz's avatar Stephan Seitz
Browse files

Save current (broken?) state of work

parent ff23a9d9
No related branches found
No related tags found
No related merge requests found
Pipeline #21567 failed
......@@ -7,6 +7,7 @@
"""
"""
from copy import copy
from enum import Enum
import numpy as np
......@@ -37,6 +38,17 @@ class DataTransferKind(str, Enum):
return self in [self.HOST_TO_DEVICE, self.DEVICE_TO_HOST, self.SWAP]
# class BoundaryHandling:
# def __init__(self, field: pystencils.Field):
# self.field = field
# def __str__(self):
# return f'BoundaryHandling on {self.field}'
# def __repr__(self):
# return self.__str__()
class DataTransfer:
def __init__(self, field: pystencils.Field, kind: DataTransferKind):
self.field = field
......@@ -67,9 +79,10 @@ class Communication(DataTransfer):
class KernelCall:
def __init__(self, kernel: pystencils.kernel_wrapper.KernelWrapper, kwargs):
def __init__(self, kernel: pystencils.kernel_wrapper.KernelWrapper, kwargs, tmp_field_swaps=[]):
self.kernel = kernel
self.kwargs = kwargs
self.tmp_field_swaps = tmp_field_swaps
def __str__(self):
return "Call " + str(self.kernel.ast.function_name)
......@@ -91,7 +104,7 @@ class TimeloopRun:
@property
def asts(self):
return self.timeloop._single_step_asts
return [s.kernel.ast for s in self.timeloop._single_step_asts if hasattr(s, 'kernel')]
class GraphDataHandling(pystencils.datahandling.SerialDataHandling):
......@@ -113,17 +126,22 @@ class GraphDataHandling(pystencils.datahandling.SerialDataHandling):
self._single_step_functions.append(f)
def add_call(self, functor, argument_list):
for argument_dict in argument_list:
self._single_step_asts.append((functor, argument_dict) if not hasattr(functor, 'ast') else functor.ast)
if hasattr(functor, 'kernel'):
functor = functor.kernel
if not isinstance(argument_list, list):
argument_list = [argument_list]
if hasattr(functor, 'ast'):
self._single_step_asts.append(KernelCall(functor, {}))
else:
former_queue = self.parent.call_queue
self.parent.call_queue = []
#
functor(*argument_list)
self._single_step_asts.extend(self.parent.call_queue)
self.parent.call_queue = former_queue
def run(self, time_steps=1):
self.parent.call_queue.append(TimeloopRun(self, time_steps))
former_call_queue = copy(self.parent.call_queue)
self.parent.call_queue = []
super().run(time_steps)
self.parent.call_queue = former_call_queue
former_call_queue.append(TimeloopRun(self, time_steps))
def swap(self, src, dst, is_gpu):
if isinstance(src, str):
......@@ -135,10 +153,20 @@ class GraphDataHandling(pystencils.datahandling.SerialDataHandling):
def __init__(self, *args, **kwargs):
self.call_queue = []
self._timeloop_record = None
super().__init__(*args, **kwargs)
def add_array(self, name, values_per_cell=1, dtype=np.float64, latex_name=None, ghost_layers=None, layout=None,
cpu=True, gpu=None, alignment=False, field_type=FieldType.GENERIC):
def add_array(self,
name,
values_per_cell=1,
dtype=np.float64,
latex_name=None,
ghost_layers=None,
layout=None,
cpu=True,
gpu=None,
alignment=False,
field_type=FieldType.GENERIC):
super().add_array(name,
values_per_cell,
......@@ -189,10 +217,17 @@ class GraphDataHandling(pystencils.datahandling.SerialDataHandling):
self.call_queue.append(DataTransfer(self._fields[name], DataTransferKind.HOST_TO_DEVICE))
def synchronization_function(self, names, stencil=None, target=None, **_):
for name in names:
gpu = target == 'gpu'
self.call_queue.append(Communication(self._fields[name], stencil, gpu))
super().synchronization_function(names, stencil=None, target=None, **_)
def func():
for name in names:
gpu = target == 'gpu'
self.call_queue.append(Communication(self._fields[name], stencil, gpu))
pystencils.datahandling.SerialDataHandling.synchronization_function(self,
names,
stencil=None,
target=None,
**_)
return func
def __str__(self):
return '\n'.join(str(c) for c in self.call_queue)
......@@ -205,6 +240,23 @@ class GraphDataHandling(pystencils.datahandling.SerialDataHandling):
self.call_queue.append('Fill ' + array_name)
super().fill(array_name, val, value_idx, slice_obj, ghost_layers, inner_ghost_layers)
def merge_swaps_with_kernel_calls(self, call_queue=None):
# TODO(seitz): should be moved to ComputationGraph
if call_queue is None:
call_queue = self.call_queue
relevant_swaps = [(swap, predecessor) for (swap, predecessor) in zip(call_queue[1:], call_queue[:-1])
if isinstance(swap, Swap) and isinstance(predecessor, KernelCall)]
for s, pred in relevant_swaps:
call_queue.remove(s)
if (s.field, s.destination) not in pred.tmp_field_swaps:
pred.tmp_field_swaps.append((s.field, s.destination))
for t in call_queue:
if isinstance(t, TimeloopRun):
self.merge_swaps_with_kernel_calls(t.timeloop._single_step_asts)
# TODO
# def reduce_float_sequence(self, sequence, operation, all_reduce=False) -> np.array:
# return np.array(sequence)
......
......@@ -17,20 +17,25 @@ from stringcase import camelcase, pascalcase
import pystencils
from pystencils.astnodes import SympyAssignment
from pystencils.data_types import TypedSymbol
from pystencils.data_types import TypedSymbol, create_type
from pystencils_autodiff._file_io import read_template_from_file
from pystencils_autodiff.framework_integration.astnodes import JinjaCppFile
def _make_field_type(field, on_gpu):
from pystencils_walberla.jinja_filters import make_field_type, get_field_fsize
f_size = get_field_fsize(field)
return make_field_type(pystencils.data_types.get_base_type(field.dtype), f_size, on_gpu)
class FieldType(JinjaCppFile):
TEMPLATE = jinja2.Template("{{ field_type }}")
def __init__(self, field: pystencils.Field, on_gpu: bool):
from pystencils_walberla.jinja_filters import make_field_type, get_field_fsize
f_size = get_field_fsize(field)
field_type = make_field_type(pystencils.data_types.get_base_type(field.dtype), f_size, on_gpu)
field_type = _make_field_type(field, on_gpu)
ast_dict = {'on_gpu': on_gpu,
'field_type': field_type
......@@ -393,16 +398,22 @@ class FillFromFlagField(JinjaCppFile):
class LbCommunicationSetup(JinjaCppFile):
TEMPLATE = jinja2.Template("""blockforest::communication::UniformBufferedScheme<lbm::{{ lb_model_type }}::CommunicationStencil> {{ communication }}( blocks );
{{ communication }}.addPackInfo( make_shared< lbm::PdfFieldPackInfo<lbm::{{ lb_model_type }}> >( {{ pdf_id }} ) );
TEMPLATE = jinja2.Template("""
{{CommonicationScheme}}<lbm::{{ lb_model_type }}::CommunicationStencil> {{ communication }}( blocks );
{{ communication }}.addPackInfo( make_shared< {{ packinfo_class }} >( {{ pdf_id }} ) );
""") # noqa
def __init__(self, lb_model_type, pdf_id):
def __init__(self, lb_model_type, pdf_id, packinfo_class, gpu):
self._symbol = TypedSymbol('communication', 'auto')
self.gpu = gpu
ast_dict = {
'lb_model_type': lb_model_type,
'pdf_id': pdf_id,
'communication': self._symbol,
'packinfo_class': packinfo_class,
'CommonicationScheme': ('cuda::communication::UniformGPUScheme'
if gpu else 'blockforest::communication::UniformBufferedScheme')
}
super().__init__(ast_dict)
......@@ -414,7 +425,14 @@ class LbCommunicationSetup(JinjaCppFile):
def symbols_defined(self):
return {self.symbol}
headers = ['"blockforest/communication/UniformBufferedScheme.h"', '"lbm/communication/PdfFieldPackInfo.h"']
@property
def headers(self):
if self.gpu:
return ['"cuda/communication/UniformGPUScheme.h"', '"lbm/communication/PdfFieldPackInfo.h"', '"PackInfo.h"']
else:
return ['"blockforest/communication/UniformBufferedScheme.h"',
'"lbm/communication/PdfFieldPackInfo.h"',
'"PackInfo.h"']
class BeforeFunction(JinjaCppFile):
......@@ -490,6 +508,40 @@ class TimeLoop(JinjaCppFile):
headers = ['"timeloop/all.h"']
class ForLoop(JinjaCppFile):
TEMPLATE = jinja2.Template("""
for( {{node.loop_symbol.dtype}} {{node.loop_symbol}} = {{node.loop_start}}; {{node.loop_symbol}} <= {{node.loop_end}}; {{node.loop_symbol}}+= {{node.loop_increment}} ) {
{%- for c in children %}
{{ c | indent(3) }}
{%- endfor -%}
}
""") # noqa
def __init__(self,
loop_start,
loop_end,
children,
loop_symbol=TypedSymbol('t_', create_type('int64')),
loop_increment=1):
ast_dict = {
'loop_symbol': loop_symbol,
'loop_start': loop_start,
'loop_end': loop_end,
'children': children,
'loop_increment': loop_increment
}
super().__init__(ast_dict)
@property
def loop_symbol(self):
return self.ast_dict.loop_symbol
@property
def symbols_defined(self):
return {self.loop_symbol}
class U_Rho_Adaptor(JinjaCppFile):
"""Docstring for U_Rho_Adaptor. """
......@@ -683,7 +735,13 @@ class InitBoundaryHandling(JinjaCppFile):
{% endfor %}
""") # noqa
def __init__(self, block_forest, flag_field_id, pdf_field_id, boundary_conditions, boundary_kernel: dict, field_allocations):
def __init__(self,
block_forest,
flag_field_id,
pdf_field_id,
boundary_conditions,
boundary_kernel: dict,
field_allocations):
self.fluid = FlagUidDefinition("fluid")
ast_dict = {'fluid_uid_definition': self.fluid,
'geometry_initialization': BoundaryHandlingFromConfig(block_forest,
......@@ -768,7 +826,11 @@ class GeneratedBoundaryInitialization(JinjaCppFile):
class SweepCreation(JinjaCppFile):
TEMPLATE = jinja2.Template("""{{ sweep_class_name }}( {{ parameter_str }} )""") # noqa
def __init__(self, sweep_class_name: str, field_allocation: AllocateAllFields, ast, parameters_to_ignore=None):
def __init__(self,
sweep_class_name: str,
field_allocation: AllocateAllFields,
ast,
parameters_to_ignore=[]):
def resolve_parameter(p):
if ast.target == 'cpu':
dict = field_allocation._cpu_allocations
......@@ -780,7 +842,8 @@ class SweepCreation(JinjaCppFile):
parameters = ast.get_parameters()
parameter_ids = [resolve_parameter(p)
for p in parameters
if p.is_field_pointer or not p.is_field_parameter]
if (p.is_field_pointer and p.symbol.field_name not in parameters_to_ignore)
or not p.is_field_parameter]
ast_dict = {'sweep_class_name': sweep_class_name,
'parameter_ids': parameter_ids,
......@@ -806,3 +869,52 @@ for( auto& block : *{{block_forest}}) {{sweep_class_name | lower() }}(&block);""
'sweep_class_name': functor.ast_dict.sweep_class_name,
'block_forest': block_forest}
super().__init__(ast_dict)
class FieldCopy(JinjaCppFile):
TEMPLATE = jinja2.Template("""cuda::fieldCpy<{{ src_type }}, {{ dst_type }}>({{ block_forest }}, {{ src_id }}, {{ dst_id }});""") # noqa
def __init__(self, block_forest, src_id, src_field, src_gpu, dst_id, dst_field, dst_gpu):
src_type = _make_field_type(src_field, src_gpu)
dst_type = _make_field_type(dst_field, dst_gpu)
ast_dict = {'src_id': src_id,
'dst_id': dst_id,
'src_type': src_type,
'dst_type': dst_type,
'block_forest': block_forest}
super().__init__(ast_dict)
headers = ['"cuda/FieldCopy.h"']
class SwapFields(JinjaCppFile):
"""
.. warn::
Prefer temporary fields in sweeps over this class! Two full fields have higher memory usage.
"""
TEMPLATE = jinja2.Template("""std::swap({{field_id0}}, {{field_id1}});""")
def __init__(self, field_id0, field_id1):
ast_dict = {'field_id0': field_id0,
'field_id1': field_id1}
super().__init__(ast_dict)
headers = ["<algorithm>"]
class Communication(JinjaCppFile):
"""
.. warn::
Prefer temporary fields in sweeps over this class! Two full fields have higher memory usage.
"""
TEMPLATE = jinja2.Template("""communication()""")
def __init__(self, gpu):
ast_dict = {'gpu': gpu}
super().__init__(ast_dict)
headers = ["<algorithm>"]
......@@ -7,19 +7,20 @@
"""
import itertools
from typing import Dict
import sympy as sp
from stringcase import pascalcase
from stringcase import camelcase, pascalcase
import lbmpy_walberla
import pystencils
import pystencils_walberla.codegen
from pystencils.astnodes import Block, EmptyLine
from pystencils_autodiff.walberla import (
AllocateAllFields, CMakeLists, DefinitionsHeader, InitBoundaryHandling, LbCommunicationSetup,
ResolveUndefinedSymbols, RunTimeLoop, SweepCreation, SweepOverAllBlocks, TimeLoop,
UniformBlockforestFromConfig, WalberlaMain, WalberlaModule)
AllocateAllFields, CMakeLists, DefinitionsHeader, FieldCopy, InitBoundaryHandling,
LbCommunicationSetup, ResolveUndefinedSymbols, RunTimeLoop, SwapFields, SweepCreation,
SweepOverAllBlocks, TimeLoop, UniformBlockforestFromConfig, WalberlaMain, WalberlaModule)
class WaldUndWiesenSimulation():
......@@ -35,7 +36,8 @@ class WaldUndWiesenSimulation():
codegen_context,
boundary_handling: pystencils.boundaries.BoundaryHandling = None,
lb_rule=None,
refinement_scaling=None):
refinement_scaling=None,
boundary_handling_target='gpu'):
self._data_handling = graph_data_handling
self._lb_rule = lb_rule
self._refinement_scaling = refinement_scaling
......@@ -49,6 +51,17 @@ class WaldUndWiesenSimulation():
self._with_gui = False
self._with_gui_default = False
self._boundary_kernels = {}
self._boundary_handling_target = boundary_handling_target
pystencils_walberla.codegen.generate_pack_info_for_field(
self._codegen_context,
'PackInfo',
pystencils.Field.create_generic(graph_data_handling.fields['ldc_pdf'].name,
graph_data_handling.fields['ldc_pdf'].spatial_dimensions,
graph_data_handling.fields['ldc_pdf'].dtype.numpy_dtype,
graph_data_handling.fields['ldc_pdf'].index_dimensions,
index_shape=graph_data_handling.fields['ldc_pdf'].index_shape,),
target=self._boundary_handling_target)
self._packinfo_class = 'PackInfo'
def _create_helper_files(self) -> Dict[str, str]:
if self._lb_rule:
......@@ -58,7 +71,13 @@ class WaldUndWiesenSimulation():
if self._boundary_handling:
for bc in self.boundary_conditions:
self._boundary_kernels.update({bc.name: lbmpy_walberla.generate_boundary(
self._codegen_context, pascalcase(bc.name), bc, self._lb_rule.method)})
self._codegen_context,
pascalcase(bc.name),
bc,
self._lb_rule.method,
target=self._boundary_handling_target)},
)
self._bh_cycler = itertools.cycle(self._boundary_kernels.keys())
def _create_module(self):
if self._lb_rule:
......@@ -77,10 +96,11 @@ class WaldUndWiesenSimulation():
if self._lb_rule:
pdf_field_id = field_allocations._gpu_allocations.get(
'ldc_pdfSrc', field_allocations._cpu_allocations['ldc_pdfSrc']).symbol
'ldc_pdf', field_allocations._cpu_allocations['ldc_pdf']).symbol
else:
pdf_field_id = None
self._data_handling.merge_swaps_with_kernel_calls()
call_nodes = filter(lambda x: x, [self._graph_to_sweep(c) for c in self._data_handling.call_queue])
module = WalberlaModule(WalberlaMain(Block([
......@@ -96,7 +116,9 @@ class WaldUndWiesenSimulation():
self._field_allocations)
if self._boundary_handling else EmptyLine(),
LbCommunicationSetup(self._lb_model_name,
pdf_field_id)
pdf_field_id,
self._packinfo_class,
self._boundary_handling_target)
if self._lb_rule else EmptyLine(),
*call_nodes
]), self.parameter_config_block)
......@@ -130,28 +152,79 @@ class WaldUndWiesenSimulation():
return self._boundary_handling._boundary_object_to_boundary_info.keys()
def _graph_to_sweep(self, c):
from pystencils_autodiff.graph_datahandling import KernelCall, TimeloopRun
from pystencils_autodiff.graph_datahandling import KernelCall, TimeloopRun, DataTransferKind, DataTransfer
if isinstance(c, KernelCall):
sweep_class_name = next(self._kernel_class_generator)
pystencils_walberla.codegen.generate_sweep(
self._codegen_context, sweep_class_name, c.kernel.ast)
rtn = SweepOverAllBlocks(SweepCreation(sweep_class_name, self._field_allocations,
c.kernel.ast), self._block_forest.blocks)
rtn = SweepOverAllBlocks(SweepCreation(sweep_class_name,
self._field_allocations,
c.kernel.ast,
parameters_to_ignore=[s[1].name for s in c.tmp_field_swaps]),
self._block_forest.blocks)
elif isinstance(c, TimeloopRun):
sweeps = []
for a in c.timeloop._single_step_asts:
if 'indexField' in [f.name for f in a.fields_accessed]:
continue
sweep_class_name = next(self._kernel_class_generator)
pystencils_walberla.codegen.generate_sweep(
self._codegen_context, sweep_class_name, a)
sweeps.append(SweepCreation(sweep_class_name, self._field_allocations, a))
if isinstance(a, KernelCall):
if 'indexField' in [f.name for f in a.kernel.ast.fields_accessed]:
bh = next(self._bh_cycler)
sweeps.append(camelcase(bh))
continue
sweep_class_name = next(self._kernel_class_generator)
pystencils_walberla.codegen.generate_sweep(
self._codegen_context, sweep_class_name, a.kernel.ast, field_swaps=a.tmp_field_swaps)
sweeps.append(SweepCreation(sweep_class_name,
self._field_allocations,
a.kernel.ast,
parameters_to_ignore=[s[1].name for s in a.tmp_field_swaps]))
elif isinstance(c, DataTransfer):
if c.kind in (DataTransferKind.HOST_COMMUNICATION, DataTransferKind.DEVICE_COMMUNICATION):
src = self._field_allocations._cpu_allocations[c.field.name].symbol
dst = self._field_allocations._cpu_allocations[c.destination.name].symbol
rtn = SwapFields(src, dst)
elif c.kind == DataTransferKind.DEVICE_SWAP:
src = self._field_allocations._gpu_allocations[c.field.name].symbol
dst = self._field_allocations._gpu_allocations[c.destination.name].symbol
rtn = SwapFields(src, dst)
elif c.kind == DataTransferKind.HOST_TO_DEVICE:
src = self._field_allocations._cpu_allocations[c.field.name].symbol
dst = self._field_allocations._gpu_allocations[c.field.name].symbol
rtn = FieldCopy(self._block_forest.blocks, src, c.field, False, dst, c.field, True)
elif c.kind == DataTransferKind.DEVICE_TO_HOST:
src = self._field_allocations._gpu_allocations[c.field.name].symbol
dst = self._field_allocations._cpu_allocations[c.field.name].symbol
rtn = FieldCopy(self._block_forest.blocks, src, c.field, True, dst, c.field, False)
else:
rtn = None
else:
print(f'time {c}')
loop = TimeLoop(self._block_forest.blocks, [], sweeps, [], sp.S(c.time_steps))
rtn = Block([loop, RunTimeLoop(self._block_forest.blocks, loop, self._with_gui, self._with_gui_default)])
elif isinstance(c, DataTransfer):
if c.kind == DataTransferKind.HOST_SWAP:
src = self._field_allocations._cpu_allocations[c.field.name].symbol
dst = self._field_allocations._cpu_allocations[c.destination.name].symbol
rtn = SwapFields(src, dst)
elif c.kind == DataTransferKind.DEVICE_SWAP:
src = self._field_allocations._gpu_allocations[c.field.name].symbol
dst = self._field_allocations._gpu_allocations[c.destination.name].symbol
rtn = SwapFields(src, dst)
elif c.kind == DataTransferKind.HOST_TO_DEVICE:
src = self._field_allocations._cpu_allocations[c.field.name].symbol
dst = self._field_allocations._gpu_allocations[c.field.name].symbol
rtn = FieldCopy(self._block_forest.blocks, src, c.field, False, dst, c.field, True)
elif c.kind == DataTransferKind.DEVICE_TO_HOST:
src = self._field_allocations._gpu_allocations[c.field.name].symbol
dst = self._field_allocations._cpu_allocations[c.field.name].symbol
rtn = FieldCopy(self._block_forest.blocks, src, c.field, True, dst, c.field, False)
else:
rtn = None
else:
return None
rtn = None
return rtn
......@@ -12,9 +12,8 @@ from os.path import dirname, join
import numpy as np
import lbmpy_walberla
import pystencils
from lbmpy.creationfunctions import create_lb_collision_rule, create_lbm_kernel
from lbmpy.creationfunctions import create_lb_collision_rule
from pystencils.astnodes import Block, EmptyLine, SympyAssignment
from pystencils.data_types import TypedSymbol, create_type
from pystencils_autodiff._file_io import write_file
......@@ -83,6 +82,8 @@ def test_wald_wiesen_lbm():
lbm_step = ldc_setup(domain_size=(30, 30), optimization=opt_params,
fixed_loop_sizes=False, lid_velocity=lid_velocity)
# del lbm_step.data_handling.gpu_arrays.ldc_pdf_tmp
sim = WaldUndWiesenSimulation(lbm_step.data_handling,
ctx,
lbm_step.boundary_handling,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment