Skip to content
Snippets Groups Projects
Commit 9da89cd1 authored by Julian Hammer's avatar Julian Hammer
Browse files

Updated Kerncraft Coupling

parent 622aaa6c
No related branches found
No related tags found
1 merge request!183Updated Kerncraft Coupling
from tempfile import TemporaryDirectory
import fcntl
import textwrap
from copy import deepcopy
import warnings
import sympy as sp
from collections import defaultdict
......@@ -6,14 +10,18 @@ import kerncraft
import kerncraft.kernel
from typing import Optional
from kerncraft.machinemodel import MachineModel
from jinja2 import Template
from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark
from pystencils.astnodes import LoopOverCoordinate, SympyAssignment, ResolvedFieldAccess, KernelFunction
from pystencils.backends.cbackend import generate_c, get_headers
from pystencils.astnodes import \
LoopOverCoordinate, SympyAssignment, ResolvedFieldAccess, KernelFunction
from pystencils.field import get_layout_from_strides
from pystencils.sympyextensions import count_operations_in_ast
from pystencils.transformations import filtered_tree_iteration
from pystencils.utils import DotDict
import warnings
from pystencils.astnodes import PragmaBlock
class PyStencilsKerncraftKernel(kerncraft.kernel.KernelCode):
......@@ -39,6 +47,7 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.KernelCode):
# Initialize state
self.asm_block = None
self._filename = filename
self._keep_intermediates = False
self.kernel_ast = ast
self.temporary_dir = TemporaryDirectory()
......@@ -94,7 +103,7 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.KernelCode):
for field in fields_accessed:
layout = get_layout_tuple(field)
permuted_shape = list(field.shape[i] for i in layout)
self.set_variable(field.name, str(field.dtype), tuple(permuted_shape))
self.set_variable(field.name, (str(field.dtype),), tuple(permuted_shape))
# Scalars may be safely ignored
# for param in ast.get_parameters():
......@@ -127,14 +136,211 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.KernelCode):
print("----------------------------- FLOPS -------------------------------")
pprint(self._flops)
def as_code(self, type_='iaca', openmp=False):
def get_kernel_header(self, name='kernel'):
"""
Generate and store kernel.h
:return: tuple of filename of header and file pointer of lockfile
"""
file_name = 'kernel.h'
file_path = self.get_intermediate_location(
file_name, machine_and_compiler_dependent=False)
lock_mode, lock_fp = self.lock_intermediate(file_path)
if lock_mode == fcntl.LOCK_SH:
# use cache
with open(file_path) as f:
code = f.read()
else: # lock_mode == fcntl.LOCK_EX
# needs update
code = generate_c(self.kernel_ast, signature_only=True)
with open(file_path, 'w') as f:
f.write(code)
self.release_exclusive_lock(lock_fp) # degrade to shared lock
return file_name, lock_fp
def get_kernel_code(self, openmp=False, name='kernel'):
"""
Generate and return compilable source code with kernel function from AST.
:param openmp: include openmp paragmas (or strip them)
:param name: name of kernel function
"""
assert not openmp, "openmp is currently not support by pystencils"
filename = 'kernel'
if openmp:
filename += '-omp'
filename += '.c'
file_path = self.get_intermediate_location(
filename, machine_and_compiler_dependent=False)
lock_mode, lock_fp = self.lock_intermediate(file_path)
if lock_mode == fcntl.LOCK_SH:
# use cache
with open(file_path) as f:
code = f.read()
else: # lock_mode == fcntl.LOCK_EX
# needs update
kernel_template =Template(textwrap.dedent("""
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <math.h>
#define RESTRICT __restrict__
#define FUNC_PREFIX
{{ includes }}
{{kernel_code}}
"""))
header_list = get_headers(self.kernel_ast)
includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])
ast = deepcopy(self.kernel_ast)
# Strip "#pragma omp parallel" from within kernel, because main function takes care of
# that
if len(ast.body.args) > 0 and isinstance(ast.body.args[0], PragmaBlock):
ast.body.args[0].pragma_line = ''
code = kernel_template.render(
kernel_code=generate_c(self.kernel_ast, dialect='c'),
includes=includes)
# Store to file
with open(file_path, 'w') as f:
f.write(code)
print(code)
self.release_exclusive_lock(lock_fp) # degrade to shared lock
return file_path, lock_fp
CODE_TEMPLATE = textwrap.dedent("""
#include <likwid.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <math.h>
#include "kerncraft.h"
#include "kernel.h"
#define RESTRICT __restrict__
#define FUNC_PREFIX
void dummy(void *);
extern int var_false;
int main(int argc, char **argv) {
{%- for constantName, dataType in constants %}
// Constant {{constantName}}
{{dataType}} {{constantName}};
{{constantName}} = 0.23;
{%- endfor %}
// Declaring arrays
{%- for field_name, dataType, size in fields %}
// Initialization {{field_name}}
double * {{field_name}} = (double *) aligned_malloc(sizeof({{dataType}}) * {{size}}, 64);
// TODO initialize in parallel context in same order as they are touched
for (unsigned long long i = 0; i < {{size}}; ++i)
{{field_name}}[i] = 0.23;
{%- endfor %}
likwid_markerInit();
#pragma omp parallel
{
likwid_markerRegisterRegion("loop");
#pragma omp barrier
// Initializing arrays in same order as touched in kernel loop nest
//INIT_ARRAYS;
// Dummy call
{%- for field_name, dataType, size in fields %}
if(var_false) dummy({{field_name}});
{%- endfor %}
{%- for constantName, dataType in constants %}
if(var_false) dummy(&{{constantName}});
{%- endfor %}
for(int warmup = 1; warmup >= 0; --warmup) {
int repeat = 2;
if(warmup == 0) {
repeat = atoi(argv[1]);
likwid_markerStartRegion("loop");
}
for(; repeat > 0; --repeat) {
{{kernelName}}({{call_argument_list}});
{%- for field_name, dataType, size in fields %}
if(var_false) dummy({{field_name}});
{%- endfor %}
{%- for constantName, dataType in constants %}
if(var_false) dummy(&{{constantName}});
{%- endfor %}
}
}
likwid_markerStopRegion("loop");
}
likwid_markerClose();
return 0;
}
""")
def get_main_code(self, kernel_function_name='kernel'):
"""
Generate and return compilable source code.
Generate and return compilable source code from AST.
:param type: can be iaca or likwid.
:param openmp: if true, openmp code will be generated
:return: tuple of filename and shared lock file pointer
"""
return generate_benchmark(self.kernel_ast, likwid=type_ == 'likwid', openmp=openmp)
# TODO produce nicer code, including help text and other "comfort features".
assert self.kernel_ast is not None, "AST does not exist, this could be due to running " \
"based on a kernel description rather than code."
file_path = self.get_intermediate_location('main.c', machine_and_compiler_dependent=False)
lock_mode, lock_fp = self.lock_intermediate(file_path)
if lock_mode == fcntl.LOCK_SH:
# use cache
with open(file_path) as f:
code = f.read()
else: # lock_mode == fcntl.LOCK_EX
# needs update
accessed_fields = {f.name: f for f in ast.fields_accessed}
constants = []
fields = []
call_parameters = []
for p in ast.get_parameters():
if not p.is_field_parameter:
constants.append((p.symbol.name, str(p.symbol.dtype)))
call_parameters.append(p.symbol.name)
else:
assert p.is_field_pointer, "Benchmark implemented only for kernels with fixed loop size"
field = accessed_fields[p.field_name]
dtype = str(get_base_type(p.symbol.dtype))
fields.append((p.field_name, dtype, prod(field.shape)))
call_parameters.append(p.field_name)
header_list = get_headers(ast)
includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])
# Generate code
code = benchmark_template.render(
kernelName=ast.function_name,
fields=fields,
constants=constants,
call_agument_list=','.join(call_parameters),
includes=includes)
# Store to file
with open(file_path, 'w') as f:
f.write(code)
self.release_exclusive_lock(lock_fp) # degrade to shared lock
return file_path, lock_fp
class KerncraftParameters(DotDict):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment