Skip to content
Snippets Groups Projects
Commit 85ed4a03 authored by Markus Holzer's avatar Markus Holzer
Browse files

Minor fixes and benchmark test case

parent 4759ffe3
No related branches found
No related tags found
1 merge request!165Enable osaca usage
import os
import subprocess
import warnings
import tempfile
from jinja2 import Environment, PackageLoader, StrictUndefined
......@@ -64,13 +66,14 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
return env.get_template('benchmark.c').render(**jinja_context)
def run_c_benchmark(ast, inner_iterations, outer_iterations=3):
def run_c_benchmark(ast, inner_iterations, outer_iterations=3, path=None):
"""Runs the given kernel with outer loop in C
Args:
ast:
ast: pystencils ast which is used to compile the benchmark file
inner_iterations: timings are recorded around this many iterations
outer_iterations: number of timings recorded
path: path where the benchmark file is stored. If None a tmp folder is created
Returns:
list of times per iterations for each outer iteration
......@@ -78,7 +81,11 @@ def run_c_benchmark(ast, inner_iterations, outer_iterations=3):
import kerncraft
benchmark_code = generate_benchmark(ast, timing=True)
with open('bench.c', 'w') as f:
if path is None:
path = tempfile.mkdtemp()
with open(os.path.join(path, 'bench.c'), 'w') as f:
f.write(benchmark_code)
kerncraft_path = os.path.dirname(kerncraft.__file__)
......@@ -91,13 +98,20 @@ def run_c_benchmark(ast, inner_iterations, outer_iterations=3):
compile_cmd += [*extra_flags,
os.path.join(kerncraft_path, 'headers', 'timing.c'),
os.path.join(kerncraft_path, 'headers', 'dummy.c'),
'bench.c',
'-o', 'bench',
os.path.join(path, 'bench.c'),
'-o', os.path.join(path, 'bench'),
]
run_compile_step(compile_cmd)
time_pre_estimation_per_iteration = float(subprocess.check_output([os.path.join('./', path, 'bench'), str(10)]))
benchmark_time_limit = 20
if benchmark_time_limit / time_pre_estimation_per_iteration < inner_iterations:
warn = (f"A benchmark run with {inner_iterations} inner_iterations will probably take longer than "
f"{benchmark_time_limit} seconds for this kernel")
warnings.warn(warn)
results = []
for _ in range(outer_iterations):
benchmark_time = float(subprocess.check_output(['./bench', str(inner_iterations)]))
benchmark_time = float(subprocess.check_output([os.path.join('./', path, 'bench'), str(inner_iterations)]))
results.append(benchmark_time)
return results
......@@ -6,19 +6,17 @@ from typing import Optional
from jinja2 import Environment, PackageLoader, StrictUndefined
import kerncraft
import sympy as sp
from kerncraft.kerncraft import KernelCode
from kerncraft.machinemodel import MachineModel
from pystencils.astnodes import (
KernelFunction, LoopOverCoordinate, ResolvedFieldAccess, SympyAssignment)
from pystencils.astnodes import (KernelFunction, LoopOverCoordinate, ResolvedFieldAccess, SympyAssignment)
from pystencils.field import get_layout_from_strides
from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark
from pystencils.sympyextensions import count_operations_in_ast
from pystencils.transformations import filtered_tree_iteration
from pystencils.utils import DotDict
from pystencils.backends.cbackend import generate_c, get_headers
from pystencils.cpu.kernelcreation import add_openmp
class PyStencilsKerncraftKernel(KernelCode):
......@@ -38,8 +36,10 @@ class PyStencilsKerncraftKernel(KernelCode):
assumed_layout: either 'SoA' or 'AoS' - if fields have symbolic sizes the layout of the index
coordinates is not known. In this case either a structures of array (SoA) or
array of structures (AoS) layout is assumed
debug_print: print debug information
filename: used for caching
"""
kerncraft.kernel.Kernel.__init__(self, machine)
super(KernelCode, self).__init__(machine=machine)
# Initialize state
self.asm_block = None
......@@ -138,11 +138,7 @@ class PyStencilsKerncraftKernel(KernelCode):
file_path = self.get_intermediate_location(file_name, machine_and_compiler_dependent=False)
lock_mode, lock_fp = self.lock_intermediate(file_path)
if lock_mode == fcntl.LOCK_SH:
# use cache
with open(file_path) as f:
code = f.read()
else: # lock_mode == fcntl.LOCK_EX
if lock_mode == fcntl.LOCK_EX:
function_signature = generate_c(self.kernel_ast, dialect='c', signature_only=True)
jinja_context = {
......@@ -163,9 +159,8 @@ class PyStencilsKerncraftKernel(KernelCode):
Generate and return compilable source code.
Args:
type_: can be iaca or likwid.
openmp: if true, openmp code will be generated
as_filename: writes a file with the name as_filename
name: kernel name
"""
filename = 'pystencils_kernl'
if openmp:
......@@ -174,14 +169,13 @@ class PyStencilsKerncraftKernel(KernelCode):
file_path = self.get_intermediate_location(filename, machine_and_compiler_dependent=False)
lock_mode, lock_fp = self.lock_intermediate(file_path)
if lock_mode == fcntl.LOCK_SH:
# use cache
with open(file_path) as f:
code = f.read()
else: # lock_mode == fcntl.LOCK_EX
if lock_mode == fcntl.LOCK_EX:
header_list = get_headers(self.kernel_ast)
includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])
if openmp:
add_openmp(self.kernel_ast)
kernel_code = generate_c(self.kernel_ast, dialect='c')
jinja_context = {
......
......@@ -90,7 +90,7 @@ int main(int argc, char **argv)
{%- if timing %}
timing(&wcEndTime, &cpuEndTime);
if( warmup == 0)
printf("%e\\n", (wcEndTime - wcStartTime) / atoi(argv[1]) );
printf("%e\n", (wcEndTime - wcStartTime) / atoi(argv[1]) );
{%- endif %}
}
......
import os
import numpy as np
import pytest
import sympy as sp
import kerncraft
from kerncraft.kernel import KernelCode
from kerncraft.machinemodel import MachineModel
from kerncraft.models import ECM, ECMData, Benchmark
from pystencils import Assignment, Field
from pystencils.cpu import create_kernel
from pystencils.kerncraft_coupling import KerncraftParameters, PyStencilsKerncraftKernel
from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark
from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark, run_c_benchmark
from pystencils.timeloop import TimeLoop
SCRIPT_FOLDER = os.path.dirname(os.path.realpath(__file__))
INPUT_FOLDER = os.path.join(SCRIPT_FOLDER, "kerncraft_inputs")
......@@ -45,28 +46,28 @@ def analysis(kernel, model='ecmdata'):
machine_file_path = os.path.join(INPUT_FOLDER, "Example_SandyBridgeEP_E5-2680.yml")
machine = MachineModel(path_to_yaml=machine_file_path)
if model == 'ecmdata':
model = kerncraft.models.ECMData(kernel, machine, KerncraftParameters())
model = ECMData(kernel, machine, KerncraftParameters())
elif model == 'ecm':
model = kerncraft.models.ECM(kernel, machine, KerncraftParameters())
model = ECM(kernel, machine, KerncraftParameters())
# model.analyze()
# model.plot()
elif model == 'benchmark':
model = kerncraft.models.Benchmark(kernel, machine, KerncraftParameters())
model = Benchmark(kernel, machine, KerncraftParameters())
else:
model = kerncraft.models.ECM(kernel, machine, KerncraftParameters())
model = ECM(kernel, machine, KerncraftParameters())
model.analyze()
return model
@pytest.mark.kerncraft
def test_3d_7pt_OSACA():
# Make sure you use the intel compiler
def test_3d_7pt_osaca():
size = [20, 200, 200]
kernel_file_path = os.path.join(INPUT_FOLDER, "3d-7pt.c")
machine_file_path = os.path.join(INPUT_FOLDER, "Example_SandyBridgeEP_E5-2680.yml")
machine = MachineModel(path_to_yaml=machine_file_path)
machine_model = MachineModel(path_to_yaml=machine_file_path)
with open(kernel_file_path) as kernel_file:
reference_kernel = KernelCode(kernel_file.read(), machine=machine, filename=kernel_file_path)
reference_kernel = KernelCode(kernel_file.read(), machine=machine_model, filename=kernel_file_path)
reference_kernel.set_constant('M', size[0])
reference_kernel.set_constant('N', size[1])
assert size[1] == size[2]
......@@ -80,7 +81,7 @@ def test_3d_7pt_OSACA():
update_rule = Assignment(b[0, 0, 0], s * rhs)
ast = create_kernel([update_rule])
k = PyStencilsKerncraftKernel(ast, machine)
k = PyStencilsKerncraftKernel(ast, machine=machine_model)
analysis(k, model='ecm')
assert reference_kernel._flops == k._flops
# assert reference.results['cl throughput'] == analysis.results['cl throughput']
......@@ -132,3 +133,29 @@ def test_3d_7pt():
for e1, e2 in zip(reference.results['cycles'], result.results['cycles']):
assert e1 == e2
@pytest.mark.kerncraft
def test_benchmark():
size = [30, 50, 50]
arr = np.zeros(size)
a = Field.create_from_numpy_array('a', arr, index_dimensions=0)
b = Field.create_from_numpy_array('b', arr, index_dimensions=0)
s = sp.Symbol("s")
rhs = a[0, -1, 0] + a[0, 1, 0] + a[-1, 0, 0] + a[1, 0, 0] + a[0, 0, -1] + a[0, 0, 1]
update_rule = Assignment(b[0, 0, 0], s * rhs)
ast = create_kernel([update_rule])
c_benchmark_run = run_c_benchmark(ast, inner_iterations=1000, outer_iterations=1)
kernel = ast.compile()
a = np.full(size, fill_value=0.23)
b = np.full(size, fill_value=0.23)
timeloop = TimeLoop(steps=1)
timeloop.add_call(kernel, {'a': a, 'b': b, 's': 0.23})
timeloop_time = timeloop.benchmark(number_of_time_steps_for_estimation=1)
np.testing.assert_almost_equal(c_benchmark_run, timeloop_time, decimal=5)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment