diff --git a/pystencils_benchmark/benchmark.py b/pystencils_benchmark/benchmark.py index 07999fa7616da813f39a6408446fb4dde61e7a3f..887913fa010b4429aa14b7037bde1514045d52b1 100644 --- a/pystencils_benchmark/benchmark.py +++ b/pystencils_benchmark/benchmark.py @@ -1,14 +1,17 @@ from pathlib import Path -from typing import Union, List - from jinja2 import Environment, PackageLoader, StrictUndefined from pystencils.backends.cbackend import generate_c, get_headers -from pystencils.astnodes import KernelFunction +from pystencils.astnodes import KernelFunction, PragmaBlock from pystencils.enums import Backend +from pystencils.data_types import get_base_type +from pystencils.sympyextensions import prod +from pystencils.integer_functions import modulo_ceil + +import numpy as np -def generate_benchmark(kernel_asts: Union[List[KernelFunction], KernelFunction], +def generate_benchmark(kernel_ast: KernelFunction, path: Path = None, *, dialect: Backend = Backend.C) -> None: @@ -21,19 +24,82 @@ def generate_benchmark(kernel_asts: Union[List[KernelFunction], KernelFunction], include_path = path / 'include' include_path.mkdir(parents=True, exist_ok=True) - if isinstance(kernel_asts, KernelFunction): - kernel_asts = [kernel_asts] - - for kernel_ast in kernel_asts: - kernel_name = kernel_ast.function_name + kernel_name = kernel_ast.function_name - header = kernel_header(kernel_ast, dialect) - with open(include_path / f'{kernel_name}.h', 'w+') as f: - f.write(header) + header = kernel_header(kernel_ast, dialect) + with open(include_path / f'{kernel_name}.h', 'w+') as f: + f.write(header) + + source = kernel_source(kernel_ast, dialect) + with open(src_path / f'{kernel_name}.c', 'w+') as f: + f.write(source) + + with open(src_path / 'main.c', 'w+') as f: + f.write(kernel_main(kernel_ast)) + + +def kernel_main(kernel: KernelFunction, timing: bool = False): + """ + Return C code of a benchmark program for the given kernel. + + Args: + kernel: the pystencils AST object as returned by create_kernel + timing: add timing output to the code, prints time per iteration to stdout + Returns: + C code as string + """ + kernel_name = kernel.function_name + accessed_fields = {f.name: f for f in kernel.fields_accessed} + constants = [] + fields = [] + call_parameters = [] + for p in kernel.get_parameters(): + if not p.is_field_parameter: + constants.append((p.symbol.name, str(p.symbol.dtype))) + call_parameters.append(p.symbol.name) + else: + assert p.is_field_pointer, "Benchmark implemented only for kernels with fixed loop size" + field = accessed_fields[p.field_name] + dtype = str(get_base_type(p.symbol.dtype)) + np_dtype = get_base_type(p.symbol.dtype).numpy_dtype + size_data_type = np_dtype.itemsize + + dim0_size = field.shape[-1] + dim1_size = np.prod(field.shape[:-1]) + elements = prod(field.shape) + + if kernel.instruction_set: + align = kernel.instruction_set['width'] * size_data_type + padding_elements = modulo_ceil(dim0_size, kernel.instruction_set['width']) - dim0_size + padding_bytes = padding_elements * size_data_type + ghost_layers = max(max(kernel.ghost_layers)) + + size = dim1_size * padding_bytes + np.prod(field.shape) * size_data_type + + assert align % np_dtype.itemsize == 0 + offset = ((dim0_size + padding_elements + ghost_layers) % kernel.instruction_set['width']) * size_data_type + + fields.append((p.field_name, dtype, elements, size, offset, align)) + call_parameters.append(p.field_name) + else: + size = elements * size_data_type + fields.append((p.field_name, dtype, elements, size, 0, 0)) + call_parameters.append(p.field_name) + + includes = f'#include "{kernel_name}"\n' + jinja_context = { + 'kernel_code': generate_c(kernel, dialect=Backend.C), + 'kernel_name': kernel_name, + 'fields': fields, + 'constants': constants, + 'call_argument_list': ",".join(call_parameters), + 'includes': includes, + 'timing': timing, + } - source = kernel_source(kernel_ast, dialect) - with open(src_path / f'{kernel_name}.cpp', 'w+') as f: - f.write(source) + env = Environment(loader=PackageLoader('pystencils_benchmark'), undefined=StrictUndefined) + main = env.get_template('main.c').render(**jinja_context) + return main def kernel_header(kernel_ast: KernelFunction, dialect: Backend = Backend.C) -> str: diff --git a/pystencils_benchmark/templates/benchmark.c b/pystencils_benchmark/templates/main.c similarity index 83% rename from pystencils_benchmark/templates/benchmark.c rename to pystencils_benchmark/templates/main.c index 79daaffd9320f9d87e4ffaad8333e1e11a232ff5..37e95fd15418e61d01d113284a5b237f9816abc7 100644 --- a/pystencils_benchmark/templates/benchmark.c +++ b/pystencils_benchmark/templates/main.c @@ -8,10 +8,6 @@ {{ includes }} -{%- if likwid %} -#include <likwid.h> -{%- endif %} - #define RESTRICT __restrict__ #define FUNC_PREFIX void dummy(void *); @@ -56,27 +52,22 @@ void aligned_free( void *ptr ) } -{{kernel_code}} - - int main(int argc, char **argv) { - {%- if likwid %} - likwid_markerInit(); - {%- endif %} - {%- for field_name, dataType, elements, size, offset, alignment in fields %} // Initialization {{field_name}} - {%- if alignment > 0 %} + {%- if alignment > 0 %} {{dataType}} * {{field_name}} = ({{dataType}} *) aligned_malloc_with_offset({{size}}, {{alignment}}, {{offset}}); - {%- else %} + {%- else %} {{dataType}} * {{field_name}} = new {{dataType}}[{{elements}}]; - {%- endif %} + {%- endif %} for (unsigned long long i = 0; i < {{elements}}; ++i) {{field_name}}[i] = 0.23; if(var_false) + { dummy({{field_name}}); + } {%- endfor %} @@ -92,22 +83,10 @@ int main(int argc, char **argv) {%- endfor %} - {%- if likwid and openmp %} - #pragma omp parallel - { - likwid_markerRegisterRegion("loop"); - #pragma omp barrier - {%- elif likwid %} - likwid_markerRegisterRegion("loop"); - {%- endif %} - for(int warmup = 1; warmup >= 0; --warmup) { int repeat = 2; if(warmup == 0) { repeat = atoi(argv[1]); - {%- if likwid %} - likwid_markerStartRegion("loop"); - {%- endif %} } {%- if timing %} @@ -117,7 +96,7 @@ int main(int argc, char **argv) for (; repeat > 0; --repeat) { - {{kernelName}}({{call_argument_list}}); + {{kernel_name}}({{call_argument_list}}); // Dummy calls {%- for field_name, dataType, elements, size, offset, alignment in fields %} @@ -135,17 +114,6 @@ int main(int argc, char **argv) } - {%- if likwid %} - likwid_markerStopRegion("loop"); - {%- if openmp %} - } - {%- endif %} - {%- endif %} - - {%- if likwid %} - likwid_markerClose(); - {%- endif %} - {%- for field_name, dataType, elements, size, offset, alignment in fields %} {%- if alignment > 0 %} aligned_free({{field_name}}); diff --git a/test.ipynb b/test.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..d1459d8c64d29a373a9e7545d2b66b347baf69ba --- /dev/null +++ b/test.ipynb @@ -0,0 +1,144 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pystencils as ps\n", + "from pystencils_benchmark import kernel_header, kernel_source, generate_benchmark\n", + "from pathlib import Path" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "outputs": [], + "source": [ + "config = ps.CreateKernelConfig()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 11, + "outputs": [], + "source": [ + "a, b, c = ps.fields(a=np.ones(4000000), b=np.ones(4000000), c=np.ones(4000000))" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 12, + "outputs": [], + "source": [ + "@ps.kernel\n", + "def vadd():\n", + " a[0] @= b[0] + c[0]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 13, + "outputs": [ + { + "data": { + "text/plain": "<IPython.core.display.HTML object>", + "text/html": "<style>pre { line-height: 125%; }\ntd.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }\nspan.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }\ntd.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }\nspan.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }\n.highlight .hll { background-color: #ffffcc }\n.highlight { background: #f8f8f8; }\n.highlight .c { color: #408080; font-style: italic } /* Comment */\n.highlight .err { border: 1px solid #FF0000 } /* Error */\n.highlight .k { color: #008000; font-weight: bold } /* Keyword */\n.highlight .o { color: #666666 } /* Operator */\n.highlight .ch { color: #408080; font-style: italic } /* Comment.Hashbang */\n.highlight .cm { color: #408080; font-style: italic } /* Comment.Multiline */\n.highlight .cp { color: #BC7A00 } /* Comment.Preproc */\n.highlight .cpf { color: #408080; font-style: italic } /* Comment.PreprocFile */\n.highlight .c1 { color: #408080; font-style: italic } /* Comment.Single */\n.highlight .cs { color: #408080; font-style: italic } /* Comment.Special */\n.highlight .gd { color: #A00000 } /* Generic.Deleted */\n.highlight .ge { font-style: italic } /* Generic.Emph */\n.highlight .gr { color: #FF0000 } /* Generic.Error */\n.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */\n.highlight .gi { color: #00A000 } /* Generic.Inserted */\n.highlight .go { color: #888888 } /* Generic.Output */\n.highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */\n.highlight .gs { font-weight: bold } /* Generic.Strong */\n.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */\n.highlight .gt { color: #0044DD } /* Generic.Traceback */\n.highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */\n.highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */\n.highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */\n.highlight .kp { color: #008000 } /* Keyword.Pseudo */\n.highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */\n.highlight .kt { color: #B00040 } /* Keyword.Type */\n.highlight .m { color: #666666 } /* Literal.Number */\n.highlight .s { color: #BA2121 } /* Literal.String */\n.highlight .na { color: #7D9029 } /* Name.Attribute */\n.highlight .nb { color: #008000 } /* Name.Builtin */\n.highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */\n.highlight .no { color: #880000 } /* Name.Constant */\n.highlight .nd { color: #AA22FF } /* Name.Decorator */\n.highlight .ni { color: #999999; font-weight: bold } /* Name.Entity */\n.highlight .ne { color: #D2413A; font-weight: bold } /* Name.Exception */\n.highlight .nf { color: #0000FF } /* Name.Function */\n.highlight .nl { color: #A0A000 } /* Name.Label */\n.highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */\n.highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */\n.highlight .nv { color: #19177C } /* Name.Variable */\n.highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */\n.highlight .w { color: #bbbbbb } /* Text.Whitespace */\n.highlight .mb { color: #666666 } /* Literal.Number.Bin */\n.highlight .mf { color: #666666 } /* Literal.Number.Float */\n.highlight .mh { color: #666666 } /* Literal.Number.Hex */\n.highlight .mi { color: #666666 } /* Literal.Number.Integer */\n.highlight .mo { color: #666666 } /* Literal.Number.Oct */\n.highlight .sa { color: #BA2121 } /* Literal.String.Affix */\n.highlight .sb { color: #BA2121 } /* Literal.String.Backtick */\n.highlight .sc { color: #BA2121 } /* Literal.String.Char */\n.highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */\n.highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */\n.highlight .s2 { color: #BA2121 } /* Literal.String.Double */\n.highlight .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */\n.highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */\n.highlight .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */\n.highlight .sx { color: #008000 } /* Literal.String.Other */\n.highlight .sr { color: #BB6688 } /* Literal.String.Regex */\n.highlight .s1 { color: #BA2121 } /* Literal.String.Single */\n.highlight .ss { color: #19177C } /* Literal.String.Symbol */\n.highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */\n.highlight .fm { color: #0000FF } /* Name.Function.Magic */\n.highlight .vc { color: #19177C } /* Name.Variable.Class */\n.highlight .vg { color: #19177C } /* Name.Variable.Global */\n.highlight .vi { color: #19177C } /* Name.Variable.Instance */\n.highlight .vm { color: #19177C } /* Name.Variable.Magic */\n.highlight .il { color: #666666 } /* Literal.Number.Integer.Long */</style>" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": "FUNC_PREFIX void kernel(double * RESTRICT _data_a, double * RESTRICT const _data_b, double * RESTRICT const _data_c)\n{\n for (int64_t ctr_0 = 0; ctr_0 < 4000000; ctr_0 += 1)\n {\n _data_a[ctr_0] = _data_b[ctr_0] + _data_c[ctr_0];\n }\n}", + "text/html": "<div class=\"highlight\"><pre><span></span><span class=\"n\">FUNC_PREFIX</span><span class=\"w\"> </span><span class=\"kt\">void</span><span class=\"w\"> </span><span class=\"n\">kernel</span><span class=\"p\">(</span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">RESTRICT</span><span class=\"w\"> </span><span class=\"n\">_data_a</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">RESTRICT</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"n\">_data_b</span><span class=\"p\">,</span><span class=\"w\"> </span><span class=\"kt\">double</span><span class=\"w\"> </span><span class=\"o\">*</span><span class=\"w\"> </span><span class=\"n\">RESTRICT</span><span class=\"w\"> </span><span class=\"k\">const</span><span class=\"w\"> </span><span class=\"n\">_data_c</span><span class=\"p\">)</span><span class=\"w\"></span>\n<span class=\"p\">{</span><span class=\"w\"></span>\n<span class=\"w\"> </span><span class=\"k\">for</span><span class=\"w\"> </span><span class=\"p\">(</span><span class=\"kt\">int64_t</span><span class=\"w\"> </span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"mi\">0</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\"><</span><span class=\"w\"> </span><span class=\"mi\">4000000</span><span class=\"p\">;</span><span class=\"w\"> </span><span class=\"n\">ctr_0</span><span class=\"w\"> </span><span class=\"o\">+=</span><span class=\"w\"> </span><span class=\"mi\">1</span><span class=\"p\">)</span><span class=\"w\"></span>\n<span class=\"w\"> </span><span class=\"p\">{</span><span class=\"w\"></span>\n<span class=\"w\"> </span><span class=\"n\">_data_a</span><span class=\"p\">[</span><span class=\"n\">ctr_0</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">=</span><span class=\"w\"> </span><span class=\"n\">_data_b</span><span class=\"p\">[</span><span class=\"n\">ctr_0</span><span class=\"p\">]</span><span class=\"w\"> </span><span class=\"o\">+</span><span class=\"w\"> </span><span class=\"n\">_data_c</span><span class=\"p\">[</span><span class=\"n\">ctr_0</span><span class=\"p\">];</span><span class=\"w\"></span>\n<span class=\"w\"> </span><span class=\"p\">}</span><span class=\"w\"></span>\n<span class=\"p\">}</span><span class=\"w\"></span>\n</pre></div>\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "kernel_vadd = ps.create_kernel(vadd, config=config)\n", + "ps.show_code(kernel_vadd)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 14, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'list' object has no attribute 'add'", + "output_type": "error", + "traceback": [ + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mAttributeError\u001B[0m Traceback (most recent call last)", + "\u001B[0;32m/tmp/ipykernel_28091/2748650420.py\u001B[0m in \u001B[0;36m<module>\u001B[0;34m\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[0mexample_path\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mPath\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mcwd\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;34m/\u001B[0m \u001B[0;34m'example'\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m----> 2\u001B[0;31m \u001B[0mgenerate_benchmark\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mkernel_vadd\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mexample_path\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m", + "\u001B[0;32m~/git/pystencils-benchmark/pystencils_benchmark/benchmark.py\u001B[0m in \u001B[0;36mgenerate_benchmark\u001B[0;34m(kernel_ast, path, dialect)\u001B[0m\n\u001B[1;32m 31\u001B[0m \u001B[0mf\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mwrite\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mheader\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 32\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 33\u001B[0;31m \u001B[0msource\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mkernel_source\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mkernel_ast\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mdialect\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 34\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mopen\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0msrc_path\u001B[0m \u001B[0;34m/\u001B[0m \u001B[0;34mf'{kernel_name}.c'\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m'w+'\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;32mas\u001B[0m \u001B[0mf\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 35\u001B[0m \u001B[0mf\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mwrite\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0msource\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;32m~/git/pystencils-benchmark/pystencils_benchmark/benchmark.py\u001B[0m in \u001B[0;36mkernel_source\u001B[0;34m(kernel_ast, dialect)\u001B[0m\n\u001B[1;32m 119\u001B[0m \u001B[0mfunction_source\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mgenerate_c\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mkernel_ast\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mdialect\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0mdialect\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 120\u001B[0m \u001B[0mheaders\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mget_headers\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mkernel_ast\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 121\u001B[0;31m \u001B[0mheaders\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0madd\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34mf'\"{kernel_name}.h\"'\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 122\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 123\u001B[0m jinja_context = {\n", + "\u001B[0;31mAttributeError\u001B[0m: 'list' object has no attribute 'add'" + ] + } + ], + "source": [ + "example_path = Path.cwd() / 'example'\n", + "generate_benchmark(kernel_vadd, example_path)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file