From 9da89cd1cb7646d663361737ecd5ae8e964fb478 Mon Sep 17 00:00:00 2001 From: Julian Hammer <julian.hammer@fau.de> Date: Fri, 6 Nov 2020 15:44:34 +0100 Subject: [PATCH] Updated Kerncraft Coupling --- .../kerncraft_coupling/kerncraft_interface.py | 222 +++++++++++++++++- 1 file changed, 214 insertions(+), 8 deletions(-) diff --git a/pystencils/pystencils/kerncraft_coupling/kerncraft_interface.py b/pystencils/pystencils/kerncraft_coupling/kerncraft_interface.py index 0aa4dcd24..0f156a01b 100644 --- a/pystencils/pystencils/kerncraft_coupling/kerncraft_interface.py +++ b/pystencils/pystencils/kerncraft_coupling/kerncraft_interface.py @@ -1,4 +1,8 @@ from tempfile import TemporaryDirectory +import fcntl +import textwrap +from copy import deepcopy +import warnings import sympy as sp from collections import defaultdict @@ -6,14 +10,18 @@ import kerncraft import kerncraft.kernel from typing import Optional from kerncraft.machinemodel import MachineModel +from jinja2 import Template from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark -from pystencils.astnodes import LoopOverCoordinate, SympyAssignment, ResolvedFieldAccess, KernelFunction +from pystencils.backends.cbackend import generate_c, get_headers +from pystencils.astnodes import \ + LoopOverCoordinate, SympyAssignment, ResolvedFieldAccess, KernelFunction from pystencils.field import get_layout_from_strides from pystencils.sympyextensions import count_operations_in_ast from pystencils.transformations import filtered_tree_iteration from pystencils.utils import DotDict -import warnings +from pystencils.astnodes import PragmaBlock + class PyStencilsKerncraftKernel(kerncraft.kernel.KernelCode): @@ -39,6 +47,7 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.KernelCode): # Initialize state self.asm_block = None self._filename = filename + self._keep_intermediates = False self.kernel_ast = ast self.temporary_dir = TemporaryDirectory() @@ -94,7 +103,7 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.KernelCode): for field in fields_accessed: layout = get_layout_tuple(field) permuted_shape = list(field.shape[i] for i in layout) - self.set_variable(field.name, str(field.dtype), tuple(permuted_shape)) + self.set_variable(field.name, (str(field.dtype),), tuple(permuted_shape)) # Scalars may be safely ignored # for param in ast.get_parameters(): @@ -127,14 +136,211 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.KernelCode): print("----------------------------- FLOPS -------------------------------") pprint(self._flops) - def as_code(self, type_='iaca', openmp=False): + def get_kernel_header(self, name='kernel'): + """ + Generate and store kernel.h + + :return: tuple of filename of header and file pointer of lockfile + """ + file_name = 'kernel.h' + file_path = self.get_intermediate_location( + file_name, machine_and_compiler_dependent=False) + lock_mode, lock_fp = self.lock_intermediate(file_path) + if lock_mode == fcntl.LOCK_SH: + # use cache + with open(file_path) as f: + code = f.read() + else: # lock_mode == fcntl.LOCK_EX + # needs update + code = generate_c(self.kernel_ast, signature_only=True) + with open(file_path, 'w') as f: + f.write(code) + self.release_exclusive_lock(lock_fp) # degrade to shared lock + + return file_name, lock_fp + + def get_kernel_code(self, openmp=False, name='kernel'): + """ + Generate and return compilable source code with kernel function from AST. + + :param openmp: include openmp paragmas (or strip them) + :param name: name of kernel function + """ + assert not openmp, "openmp is currently not support by pystencils" + filename = 'kernel' + if openmp: + filename += '-omp' + filename += '.c' + file_path = self.get_intermediate_location( + filename, machine_and_compiler_dependent=False) + lock_mode, lock_fp = self.lock_intermediate(file_path) + + if lock_mode == fcntl.LOCK_SH: + # use cache + with open(file_path) as f: + code = f.read() + else: # lock_mode == fcntl.LOCK_EX + # needs update + + kernel_template =Template(textwrap.dedent(""" + #include <stdlib.h> + #include <stdint.h> + #include <stdbool.h> + #include <math.h> + + #define RESTRICT __restrict__ + #define FUNC_PREFIX + + {{ includes }} + + {{kernel_code}} + """)) + header_list = get_headers(self.kernel_ast) + includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list]) + + ast = deepcopy(self.kernel_ast) + # Strip "#pragma omp parallel" from within kernel, because main function takes care of + # that + if len(ast.body.args) > 0 and isinstance(ast.body.args[0], PragmaBlock): + ast.body.args[0].pragma_line = '' + + code = kernel_template.render( + kernel_code=generate_c(self.kernel_ast, dialect='c'), + includes=includes) + + # Store to file + with open(file_path, 'w') as f: + f.write(code) + print(code) + self.release_exclusive_lock(lock_fp) # degrade to shared lock + + return file_path, lock_fp + + CODE_TEMPLATE = textwrap.dedent(""" + #include <likwid.h> + #include <stdlib.h> + #include <stdint.h> + #include <stdbool.h> + #include <math.h> + #include "kerncraft.h" + #include "kernel.h" + + #define RESTRICT __restrict__ + #define FUNC_PREFIX + void dummy(void *); + extern int var_false; + + int main(int argc, char **argv) { + {%- for constantName, dataType in constants %} + // Constant {{constantName}} + {{dataType}} {{constantName}}; + {{constantName}} = 0.23; + {%- endfor %} + + // Declaring arrays + {%- for field_name, dataType, size in fields %} + + // Initialization {{field_name}} + double * {{field_name}} = (double *) aligned_malloc(sizeof({{dataType}}) * {{size}}, 64); + // TODO initialize in parallel context in same order as they are touched + for (unsigned long long i = 0; i < {{size}}; ++i) + {{field_name}}[i] = 0.23; + {%- endfor %} + + likwid_markerInit(); + #pragma omp parallel + { + likwid_markerRegisterRegion("loop"); + #pragma omp barrier + + // Initializing arrays in same order as touched in kernel loop nest + //INIT_ARRAYS; + + // Dummy call + {%- for field_name, dataType, size in fields %} + if(var_false) dummy({{field_name}}); + {%- endfor %} + {%- for constantName, dataType in constants %} + if(var_false) dummy(&{{constantName}}); + {%- endfor %} + + for(int warmup = 1; warmup >= 0; --warmup) { + int repeat = 2; + if(warmup == 0) { + repeat = atoi(argv[1]); + likwid_markerStartRegion("loop"); + } + + for(; repeat > 0; --repeat) { + {{kernelName}}({{call_argument_list}}); + + {%- for field_name, dataType, size in fields %} + if(var_false) dummy({{field_name}}); + {%- endfor %} + {%- for constantName, dataType in constants %} + if(var_false) dummy(&{{constantName}}); + {%- endfor %} + } + + } + likwid_markerStopRegion("loop"); + } + likwid_markerClose(); + return 0; + } + """) + + def get_main_code(self, kernel_function_name='kernel'): """ - Generate and return compilable source code. + Generate and return compilable source code from AST. - :param type: can be iaca or likwid. - :param openmp: if true, openmp code will be generated + :return: tuple of filename and shared lock file pointer """ - return generate_benchmark(self.kernel_ast, likwid=type_ == 'likwid', openmp=openmp) + # TODO produce nicer code, including help text and other "comfort features". + assert self.kernel_ast is not None, "AST does not exist, this could be due to running " \ + "based on a kernel description rather than code." + + file_path = self.get_intermediate_location('main.c', machine_and_compiler_dependent=False) + lock_mode, lock_fp = self.lock_intermediate(file_path) + + if lock_mode == fcntl.LOCK_SH: + # use cache + with open(file_path) as f: + code = f.read() + else: # lock_mode == fcntl.LOCK_EX + # needs update + accessed_fields = {f.name: f for f in ast.fields_accessed} + constants = [] + fields = [] + call_parameters = [] + for p in ast.get_parameters(): + if not p.is_field_parameter: + constants.append((p.symbol.name, str(p.symbol.dtype))) + call_parameters.append(p.symbol.name) + else: + assert p.is_field_pointer, "Benchmark implemented only for kernels with fixed loop size" + field = accessed_fields[p.field_name] + dtype = str(get_base_type(p.symbol.dtype)) + fields.append((p.field_name, dtype, prod(field.shape))) + call_parameters.append(p.field_name) + + header_list = get_headers(ast) + includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list]) + + # Generate code + code = benchmark_template.render( + kernelName=ast.function_name, + fields=fields, + constants=constants, + call_agument_list=','.join(call_parameters), + includes=includes) + + # Store to file + with open(file_path, 'w') as f: + f.write(code) + self.release_exclusive_lock(lock_fp) # degrade to shared lock + + return file_path, lock_fp class KerncraftParameters(DotDict): -- GitLab