diff --git a/doc/notebooks/demo_ve.ipynb b/doc/notebooks/demo_ve.ipynb
index 0604b21c227ed4331ac2887dbe9f996f242df638..1fe7846ade5f67ccd0d9f286525928c4c320da16 100644
--- a/doc/notebooks/demo_ve.ipynb
+++ b/doc/notebooks/demo_ve.ipynb
@@ -43,7 +43,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 13,
    "id": "5d56a49c-5963-4f86-a8d8-0b753a971e52",
    "metadata": {},
    "outputs": [],
@@ -54,7 +54,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 14,
    "id": "9c204a47-a39b-450b-9026-9bb63cf54e83",
    "metadata": {},
    "outputs": [],
@@ -64,7 +64,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 15,
    "id": "5bbc4118-e1b7-412b-9dc8-7d7d524786df",
    "metadata": {},
    "outputs": [],
@@ -75,7 +75,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 16,
    "id": "85641913-f00a-4f95-af69-c7646ad5b3d0",
    "metadata": {},
    "outputs": [],
@@ -87,7 +87,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 17,
    "id": "33097862-9f60-4624-a1f2-0fdfd82831bc",
    "metadata": {},
    "outputs": [
@@ -115,7 +115,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 18,
    "id": "99aee06b-1704-4d06-b261-cc07a6a7f9a3",
    "metadata": {},
    "outputs": [],
@@ -127,7 +127,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 19,
    "id": "e79ab7bb-128e-47ee-9d53-8779c4f55d7e",
    "metadata": {},
    "outputs": [
@@ -155,7 +155,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 20,
    "id": "4bb14872-11de-42cc-b5fc-4f995c7a6725",
    "metadata": {},
    "outputs": [],
@@ -167,7 +167,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 21,
    "id": "457cc1fe-1aef-44bd-9fd1-f914e019c933",
    "metadata": {},
    "outputs": [
@@ -195,7 +195,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 22,
    "id": "ae05beb9-745f-4885-ad4f-667aae867f05",
    "metadata": {},
    "outputs": [],
@@ -206,7 +206,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 23,
    "id": "7dfa86e1-8962-47df-900c-3133883a19c7",
    "metadata": {},
    "outputs": [],
@@ -216,30 +216,38 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
-   "id": "dd39502f-d935-4b88-b8c8-896b1a294bf9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "generate_benchmark([kernel_vadd, kernel_daxpy, kernel_daxpy_one_off], example_path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 24,
    "id": "f084548f-3a8e-4d45-8c7e-5845013b222e",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "TemplateNotFound",
+     "evalue": "main.h",
+     "output_type": "error",
+     "traceback": [
+      "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
+      "\u001B[0;31mTemplateNotFound\u001B[0m                          Traceback (most recent call last)",
+      "\u001B[0;32m/tmp/ipykernel_28036/2725825833.py\u001B[0m in \u001B[0;36m<module>\u001B[0;34m\u001B[0m\n\u001B[1;32m      1\u001B[0m \u001B[0;31m# Examples:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m----> 2\u001B[0;31m \u001B[0mgenerate_benchmark\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mkernel_daxpy\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mexample_path\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m      3\u001B[0m \u001B[0;31m# generate_benchmark(kernel_vadd, example_path)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m      4\u001B[0m \u001B[0;31m# generate_benchmark(kernel_daxpy_one_off, example_path)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m      5\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n",
+      "\u001B[0;32m~/git/pystencils-benchmark/pystencils_benchmark/benchmark.py\u001B[0m in \u001B[0;36mgenerate_benchmark\u001B[0;34m(kernel_ast, path, dialect)\u001B[0m\n\u001B[1;32m     36\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m     37\u001B[0m     \u001B[0;32mwith\u001B[0m \u001B[0mopen\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0msrc_path\u001B[0m \u001B[0;34m/\u001B[0m \u001B[0;34m'main.c'\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m'w+'\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;32mas\u001B[0m \u001B[0mf\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 38\u001B[0;31m         \u001B[0mf\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mwrite\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mkernel_main\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mkernel_ast\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m     39\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m     40\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n",
+      "\u001B[0;32m~/git/pystencils-benchmark/pystencils_benchmark/benchmark.py\u001B[0m in \u001B[0;36mkernel_main\u001B[0;34m(kernel, timing)\u001B[0m\n\u001B[1;32m     99\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m    100\u001B[0m     \u001B[0menv\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mEnvironment\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mloader\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0mPackageLoader\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'pystencils_benchmark'\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mundefined\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0mStrictUndefined\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 101\u001B[0;31m     \u001B[0mmain\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0menv\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mget_template\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'main.c'\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mrender\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m**\u001B[0m\u001B[0mjinja_context\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m    102\u001B[0m     \u001B[0;32mreturn\u001B[0m \u001B[0mmain\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m    103\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n",
+      "\u001B[0;32m~/git/pystencils/venv/lib/python3.9/site-packages/jinja2/environment.py\u001B[0m in \u001B[0;36mget_template\u001B[0;34m(self, name, parent, globals)\u001B[0m\n\u001B[1;32m    995\u001B[0m             \u001B[0mname\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mjoin_path\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mname\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mparent\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m    996\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 997\u001B[0;31m         \u001B[0;32mreturn\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_load_template\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mname\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mglobals\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m    998\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m    999\u001B[0m     \u001B[0;34m@\u001B[0m\u001B[0minternalcode\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+      "\u001B[0;32m~/git/pystencils/venv/lib/python3.9/site-packages/jinja2/environment.py\u001B[0m in \u001B[0;36m_load_template\u001B[0;34m(self, name, globals)\u001B[0m\n\u001B[1;32m    956\u001B[0m                 \u001B[0;32mreturn\u001B[0m \u001B[0mtemplate\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m    957\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 958\u001B[0;31m         \u001B[0mtemplate\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mloader\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mload\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mname\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mmake_globals\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mglobals\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m    959\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m    960\u001B[0m         \u001B[0;32mif\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mcache\u001B[0m \u001B[0;32mis\u001B[0m \u001B[0;32mnot\u001B[0m \u001B[0;32mNone\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+      "\u001B[0;32m~/git/pystencils/venv/lib/python3.9/site-packages/jinja2/loaders.py\u001B[0m in \u001B[0;36mload\u001B[0;34m(self, environment, name, globals)\u001B[0m\n\u001B[1;32m    123\u001B[0m         \u001B[0;31m# first we try to get the source for this template together\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m    124\u001B[0m         \u001B[0;31m# with the filename and the uptodate function.\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 125\u001B[0;31m         \u001B[0msource\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mfilename\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0muptodate\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mget_source\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0menvironment\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mname\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m    126\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m    127\u001B[0m         \u001B[0;31m# try to load the code from the bytecode cache if there is a\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+      "\u001B[0;32m~/git/pystencils/venv/lib/python3.9/site-packages/jinja2/loaders.py\u001B[0m in \u001B[0;36mget_source\u001B[0;34m(self, environment, template)\u001B[0m\n\u001B[1;32m    325\u001B[0m             \u001B[0;31m# Package is a directory.\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m    326\u001B[0m             \u001B[0;32mif\u001B[0m \u001B[0;32mnot\u001B[0m \u001B[0mos\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mpath\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0misfile\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mp\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 327\u001B[0;31m                 \u001B[0;32mraise\u001B[0m \u001B[0mTemplateNotFound\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mtemplate\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m    328\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m    329\u001B[0m             \u001B[0;32mwith\u001B[0m \u001B[0mopen\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mp\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m\"rb\"\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;32mas\u001B[0m \u001B[0mf\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+      "\u001B[0;31mTemplateNotFound\u001B[0m: main.h"
+     ]
+    }
+   ],
    "source": [
     "# Examples:\n",
-    "# generate_benchmark(kernel_daxpy, example_path)\n",
+    "generate_benchmark(kernel_daxpy, example_path)\n",
     "# generate_benchmark(kernel_vadd, example_path)\n",
     "# generate_benchmark(kernel_daxpy_one_off, example_path)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "id": "954b5ec7-a88a-4723-bd7f-227a3919b32d",
    "metadata": {},
    "outputs": [],
@@ -247,7 +255,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "id": "f51f2aff-f232-4de9-a0c4-94d3682eff86",
    "metadata": {},
    "outputs": [],