Skip to content
Snippets Groups Projects
Commit 8398ad5e authored by Martin Bauer's avatar Martin Bauer
Browse files

Generation of CPU PackInfos

parent 9069cc00
Branches
No related tags found
No related merge requests found
...@@ -32,7 +32,8 @@ class CodeGeneration: ...@@ -32,7 +32,8 @@ class CodeGeneration:
only_generated = set(self.context.files_written) - set(self.expected_files) only_generated = set(self.context.files_written) - set(self.expected_files)
error_message = "Generated files specified not correctly in cmake with 'waLBerla_python_file_generates'\n" error_message = "Generated files specified not correctly in cmake with 'waLBerla_python_file_generates'\n"
if only_in_cmake: if only_in_cmake:
error_message += "Files only specified in CMake {}\n".format([os.path.basename(p) for p in only_in_cmake]) error_message += "Files only specified in CMake {}\n".format(
[os.path.basename(p) for p in only_in_cmake])
if only_generated: if only_generated:
error_message += "Unexpected generated files {}\n".format([os.path.basename(p) for p in only_generated]) error_message += "Unexpected generated files {}\n".format([os.path.basename(p) for p in only_generated])
raise ValueError(error_message) raise ValueError(error_message)
...@@ -88,6 +89,7 @@ class ManualCodeGenerationContext: ...@@ -88,6 +89,7 @@ class ManualCodeGenerationContext:
Environment parameters like if OpenMP, MPI or CPU-specific optimization should be used can be explicitly passed Environment parameters like if OpenMP, MPI or CPU-specific optimization should be used can be explicitly passed
to constructor instead of getting them from CMake to constructor instead of getting them from CMake
""" """
def __init__(self, openmp=False, optimize_for_localhost=False, mpi=True, double_accuracy=True): def __init__(self, openmp=False, optimize_for_localhost=False, mpi=True, double_accuracy=True):
self.openmp = openmp self.openmp = openmp
self.optimize_for_localhost = optimize_for_localhost self.optimize_for_localhost = optimize_for_localhost
...@@ -96,6 +98,7 @@ class ManualCodeGenerationContext: ...@@ -96,6 +98,7 @@ class ManualCodeGenerationContext:
self.files = dict() self.files = dict()
self.cuda = False self.cuda = False
self.config = "" self.config = ""
def write_file(self, name, content): def write_file(self, name, content):
self.files[name] = content self.files[name] = content
......
...@@ -44,9 +44,6 @@ def generate_sweep(generation_context, class_name, assignments, ...@@ -44,9 +44,6 @@ def generate_sweep(generation_context, class_name, assignments,
to allow for communication hiding. to allow for communication hiding.
**create_kernel_params: remaining keyword arguments are passed to `pystencils.create_kernel` **create_kernel_params: remaining keyword arguments are passed to `pystencils.create_kernel`
""" """
if hasattr(assignments, 'all_assignments'):
assignments = assignments.all_assignments
create_kernel_params = default_create_kernel_parameters(generation_context, create_kernel_params) create_kernel_params = default_create_kernel_parameters(generation_context, create_kernel_params)
if not generation_context.cuda and create_kernel_params['target'] == 'gpu': if not generation_context.cuda and create_kernel_params['target'] == 'gpu':
...@@ -184,8 +181,7 @@ def generate_pack_info(generation_context, class_name: str, ...@@ -184,8 +181,7 @@ def generate_pack_info(generation_context, class_name: str,
create_kernel_params = default_create_kernel_parameters(generation_context, create_kernel_params) create_kernel_params = default_create_kernel_parameters(generation_context, create_kernel_params)
target = create_kernel_params.get('target', 'cpu') target = create_kernel_params.get('target', 'cpu')
if not generation_context.cuda and target == 'gpu': template_name = "CpuPackInfo.tmpl" if target == 'cpu' else 'GpuPackInfo.tmpl'
return
fields_accessed = set() fields_accessed = set()
for terms in directions_to_pack_terms.values(): for terms in directions_to_pack_terms.values():
...@@ -237,18 +233,18 @@ def generate_pack_info(generation_context, class_name: str, ...@@ -237,18 +233,18 @@ def generate_pack_info(generation_context, class_name: str,
'unpack_kernels': unpack_kernels, 'unpack_kernels': unpack_kernels,
'fused_kernel': KernelInfo(fused_kernel), 'fused_kernel': KernelInfo(fused_kernel),
'elements_per_cell': elements_per_cell, 'elements_per_cell': elements_per_cell,
'headers': get_headers(fused_kernel),
'target': target, 'target': target,
'dtype': dtype, 'dtype': dtype,
'field_name': field_names.pop(), 'field_name': field_names.pop(),
'namespace': namespace, 'namespace': namespace,
} }
env = Environment(loader=PackageLoader('pystencils_walberla')) env = Environment(loader=PackageLoader('pystencils_walberla'))
add_pystencils_filters_to_jinja_env(env) add_pystencils_filters_to_jinja_env(env)
header = env.get_template("GpuPackInfo.tmpl.h").render(**jinja_context) header = env.get_template(template_name + ".h").render(**jinja_context)
source = env.get_template("GpuPackInfo.tmpl.cpp").render(**jinja_context) source = env.get_template(template_name + ".cpp").render(**jinja_context)
source_extension = "cpp" if create_kernel_params.get("target", "cpu") == "cpu" else "cu" source_extension = "cpp" if target == "cpu" else "cu"
generation_context.write_file("{}.h".format(class_name), header) generation_context.write_file("{}.h".format(class_name), header)
generation_context.write_file("{}.{}".format(class_name, source_extension), source) generation_context.write_file("{}.{}".format(class_name, source_extension), source)
......
#include "stencil/Directions.h"
#include "core/cell/CellInterval.h"
#include "cuda/GPUField.h"
#include "core/DataTypes.h"
#include "{{class_name}}.h"
{% for header in headers %}
#include {{header}}
{% endfor %}
namespace walberla {
namespace {{namespace}} {
using walberla::cell::CellInterval;
using walberla::stencil::Direction;
{% for kernel in pack_kernels.values() %}
{{kernel|generate_definition(target)}}
{% endfor %}
{% for kernel in unpack_kernels.values() %}
{{kernel|generate_definition(target)}}
{% endfor %}
void {{class_name}}::pack(Direction dir, unsigned char * byte_buffer, IBlock * block) const
{
{{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(byte_buffer);
{{fused_kernel|generate_block_data_to_field_extraction(parameters_to_ignore=['buffer'])|indent(4)}}
CellInterval ci;
{{field_name}}->getSliceBeforeGhostLayer(dir, ci, 1, false);
switch( dir )
{
{%- for direction_set, kernel in pack_kernels.items() %}
{%- for dir in direction_set %}
case stencil::{{dir}}:
{%- endfor %}
{
{{kernel|generate_call(cell_interval="ci")|indent(12)}}
break;
}
{% endfor %}
default:
WALBERLA_ASSERT(false);
}
}
void {{class_name}}::unpack(Direction dir, unsigned char * byte_buffer, IBlock * block) const
{
{{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(byte_buffer);
{{fused_kernel|generate_block_data_to_field_extraction(parameters_to_ignore=['buffer'])|indent(4)}}
CellInterval ci;
{{field_name}}->getGhostRegion(dir, ci, 1, false);
auto communciationDirection = stencil::inverseDir[dir];
switch( communciationDirection )
{
{%- for direction_set, kernel in unpack_kernels.items() %}
{%- for dir in direction_set %}
case stencil::{{dir}}:
{%- endfor %}
{
{{kernel|generate_call(cell_interval="ci")|indent(12)}}
break;
}
{% endfor %}
default:
WALBERLA_ASSERT(false);
}
}
uint_t {{class_name}}::size(stencil::Direction dir, const IBlock * block) const
{
{{fused_kernel|generate_block_data_to_field_extraction(parameters_to_ignore=['buffer'])|indent(4)}}
CellInterval ci;
{{field_name}}->getGhostRegion(dir, ci, 1, false);
uint_t elementsPerCell = 0;
switch( dir )
{
{%- for direction_set, elements in elements_per_cell.items() %}
{%- for dir in direction_set %}
case stencil::{{dir}}:
{%- endfor %}
elementsPerCell = {{elements}};
break;
{% endfor %}
default:
elementsPerCell = 0;
}
return ci.numCells() * elementsPerCell * sizeof( {{dtype}} );
}
} // namespace {{namespace}}
} // namespace walberla
\ No newline at end of file
#pragma once
#include "stencil/Directions.h"
#include "core/cell/CellInterval.h"
#include "core/DataTypes.h"
#include "field/GhostLayerField.h"
#include "domain_decomposition/IBlock.h"
#include "communication/UniformPackInfo.h"
#define FUNC_PREFIX
#ifdef __GNUC__
#define RESTRICT __restrict__
#elif _MSC_VER
#define RESTRICT __restrict
#else
#define RESTRICT
#endif
namespace walberla {
namespace {{namespace}} {
class {{class_name}} : public ::walberla::communication::UniformPackInfo
{
public:
{{class_name}}( {{fused_kernel|generate_constructor_parameters(parameters_to_ignore=['buffer'])}} )
: {{ fused_kernel|generate_constructor_initializer_list(parameters_to_ignore=['buffer']) }}
{};
virtual ~{{class_name}}() {}
bool constantDataExchange() const { return true; }
bool threadsafeReceiving() const { return true; }
void unpackData(IBlock * receiver, stencil::Direction dir, mpi::RecvBuffer & buffer) {
const auto dataSize = size(dir, receiver);
unpack(dir, buffer.skip(dataSize), receiver);
}
void communicateLocal(const IBlock * sender, IBlock * receiver, stencil::Direction dir) {
//TODO: optimize by generating kernel for this case
mpi::SendBuffer sBuffer;
packData( sender, dir, sBuffer );
mpi::RecvBuffer rBuffer( sBuffer );
unpackData( receiver, stencil::inverseDir[dir], rBuffer );
}
private:
void packDataImpl(const IBlock * sender, stencil::Direction dir, mpi::SendBuffer & outBuffer) const {
const auto dataSize = size(dir, sender);
pack(dir, outBuffer.forward(dataSize), const_cast<IBlock*>(sender));
}
void pack (stencil::Direction dir, unsigned char * buffer, IBlock * block) const;
void unpack(stencil::Direction dir, unsigned char * buffer, IBlock * block) const;
uint_t size (stencil::Direction dir, const IBlock * block) const;
{{fused_kernel|generate_members(parameters_to_ignore=['buffer'])|indent(4)}}
};
} // namespace {{namespace}}
} // namespace walberla
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment