Skip to content
Snippets Groups Projects
Select Git revision
  • 5e0ca81488ee1d95503230ae2da0b985c9e3961c
  • master default protected
  • v2.0-dev protected
  • zikeliml/Task-96-dotExporterForAST
  • zikeliml/124-rework-tutorials
  • fma
  • fhennig/v2.0-deprecations
  • holzer-master-patch-46757
  • 66-absolute-access-is-probably-not-copied-correctly-after-_eval_subs
  • gpu_bufferfield_fix
  • hyteg
  • vectorization_sqrt_fix
  • target_dh_refactoring
  • const_fix
  • improved_comm
  • gpu_liveness_opts
  • release/1.3.7 protected
  • release/1.3.6 protected
  • release/2.0.dev0 protected
  • release/1.3.5 protected
  • release/1.3.4 protected
  • release/1.3.3 protected
  • release/1.3.2 protected
  • release/1.3.1 protected
  • release/1.3 protected
  • release/1.2 protected
  • release/1.1.1 protected
  • release/1.1 protected
  • release/1.0.1 protected
  • release/1.0 protected
  • release/0.4.4 protected
  • last/Kerncraft
  • last/OpenCL
  • last/LLVM
  • release/0.4.3 protected
  • release/0.4.2 protected
36 results

test_fd_derivative.py

Blame
  • generate_benchmark.py 5.28 KiB
    import subprocess
    import warnings
    import tempfile
    from pathlib import Path
    
    from jinja2 import Environment, PackageLoader, StrictUndefined
    
    from pystencils.astnodes import PragmaBlock
    from pystencils.backends.cbackend import generate_c, get_headers
    from pystencils.cpu.cpujit import get_compiler_config, run_compile_step
    from pystencils.data_types import get_base_type
    from pystencils.include import get_pystencils_include_path
    from pystencils.integer_functions import modulo_ceil
    from pystencils.sympyextensions import prod
    
    import numpy as np
    
    
    def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
        """Return C code of a benchmark program for the given kernel.
    
        Args:
            ast: the pystencils AST object as returned by create_kernel
            likwid: if True likwid markers are added to the code
            openmp: relevant only if likwid=True, to generated correct likwid initialization code
            timing: add timing output to the code, prints time per iteration to stdout
    
        Returns:
            C code as string
        """
        accessed_fields = {f.name: f for f in ast.fields_accessed}
        constants = []
        fields = []
        call_parameters = []
        for p in ast.get_parameters():
            if not p.is_field_parameter:
                constants.append((p.symbol.name, str(p.symbol.dtype)))
                call_parameters.append(p.symbol.name)
            else:
                assert p.is_field_pointer, "Benchmark implemented only for kernels with fixed loop size"
                field = accessed_fields[p.field_name]
                dtype = str(get_base_type(p.symbol.dtype))
                np_dtype = np.dtype(dtype)
    
                dim0_size = field.shape[-1]
                dim1_size = np.prod(field.shape[:-1])
    
                size_data_type = np_dtype.itemsize
                elements = prod(field.shape)
                align = ast.instruction_set['width'] * size_data_type
                padding_elements = dim0_size % ast.instruction_set['width']
                padding_bytes = padding_elements * size_data_type
                ghost_layers = max(max(ast.ghost_layers))
    
                size = dim1_size * padding_bytes + np.prod(field.shape) * size_data_type
    
                assert align % np_dtype.itemsize == 0
                offset = ((dim0_size + padding_elements + ghost_layers) % ast.instruction_set['width']) * size_data_type
    
                fields.append((p.field_name, dtype, elements, size, offset, align))
                call_parameters.append(p.field_name)
    
        header_list = get_headers(ast)
        includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])
    
        # Strip "#pragma omp parallel" from within kernel, because main function takes care of that
        # when likwid and openmp are enabled
        if likwid and openmp:
            if len(ast.body.args) > 0 and isinstance(ast.body.args[0], PragmaBlock):
                ast.body.args[0].pragma_line = ''
    
        jinja_context = {
            'likwid': likwid,
            'openmp': openmp,
            'kernel_code': generate_c(ast, dialect='c'),
            'kernelName': ast.function_name,
            'fields': fields,
            'constants': constants,
            'call_argument_list': ",".join(call_parameters),
            'includes': includes,
            'timing': timing,
        }
    
        env = Environment(loader=PackageLoader('pystencils.kerncraft_coupling'), undefined=StrictUndefined)
    
        return env.get_template('benchmark.c').render(**jinja_context)
    
    
    def run_c_benchmark(ast, inner_iterations, outer_iterations=3, path=None):
        """Runs the given kernel with outer loop in C
    
        Args:
            ast: pystencils ast which is used to compile the benchmark file
            inner_iterations: timings are recorded around this many iterations
            outer_iterations: number of timings recorded
            path: path where the benchmark file is stored. If None a tmp folder is created
    
        Returns:
            list of times per iterations for each outer iteration
        """
        import kerncraft
    
        benchmark_code = generate_benchmark(ast, timing=True)
    
        if path is None:
            path = tempfile.mkdtemp()
    
        if isinstance(path, str):
            path = Path(path)
    
        with open(path / 'bench.c', 'w') as f:
            f.write(benchmark_code)
    
        kerncraft_path = Path(kerncraft.__file__).parent
    
        extra_flags = ['-I' + get_pystencils_include_path(),
                       '-I' + str(kerncraft_path / 'headers')]
    
        compiler_config = get_compiler_config()
        compile_cmd = [compiler_config['command']] + compiler_config['flags'].split()
        compile_cmd += [*extra_flags,
                        str(kerncraft_path / 'headers' / 'timing.c'),
                        str(kerncraft_path / 'headers' / 'dummy.c'),
                        str(path / 'bench.c'),
                        '-o', str(path / 'bench'),
                        ]
        run_compile_step(compile_cmd)
    
        time_pre_estimation_per_iteration = float(subprocess.check_output(['./' / path / 'bench', str(10)]))
        benchmark_time_limit = 20
        if benchmark_time_limit / time_pre_estimation_per_iteration < inner_iterations:
            warn = (f"A benchmark run with {inner_iterations} inner_iterations will probably take longer than "
                    f"{benchmark_time_limit} seconds for this kernel")
            warnings.warn(warn)
    
        results = []
        for _ in range(outer_iterations):
            benchmark_time = float(subprocess.check_output(['./' / path / 'bench', str(inner_iterations)]))
            results.append(benchmark_time)
        return results