diff --git a/binder/environment.yml b/binder/environment.yml
index 2df28e8726bb78e60584279bb376bd579a16a0d0..b4acd2e2a083295c272c4082c80ed7f46d2ca498 100644
--- a/binder/environment.yml
+++ b/binder/environment.yml
@@ -7,7 +7,8 @@
 #     conda env create -f conda_environment_user.yml
 #     . activate pystencils
 #
-# If you have CUDA installed and want to use your GPU, uncomment the last line to install pycuda
+# If you have CUDA or ROCm installed and want to use your GPU, uncomment the last line to install cupy
+# Be careful to install the correct cupy version depending on your CUDA or ROCm version ...
 #
 # ----------------------------------------------------------------------------------------------------------------------
 
@@ -33,4 +34,4 @@ dependencies:
       - pyevtk # VTK output for serial simulations
       - blitzdb # file-based No-SQL database to store simulation results
       - pystencils
-      #- pycuda # add this if you have CUDA installed
+      #- cupy # add this if you have CUDA or ROCm installed
diff --git a/conftest.py b/conftest.py
index 9f9eac6300f08d0854bb9e5bb67c068cb0dabd96..e71dff56416d4fa2a5c01eab39e1853b90fe1969 100644
--- a/conftest.py
+++ b/conftest.py
@@ -46,7 +46,7 @@ add_path_to_ignore('pystencils_tests/benchmark')
 add_path_to_ignore('_local_tmp')
 
 try:
-    import pycuda
+    import cupy
 except ImportError:
     collect_ignore += [os.path.join(SCRIPT_FOLDER, "lbmpy_tests/test_cpu_gpu_equivalence.py")]
 
diff --git a/doc/notebooks/00_tutorial_lbmpy_walberla_overview.ipynb b/doc/notebooks/00_tutorial_lbmpy_walberla_overview.ipynb
index 2ce352c1aa47351d04d9ad426caa183392c543e4..d1181427c738344f5f937bcaecdd693c66525fc4 100644
--- a/doc/notebooks/00_tutorial_lbmpy_walberla_overview.ipynb
+++ b/doc/notebooks/00_tutorial_lbmpy_walberla_overview.ipynb
@@ -17,14 +17,14 @@
    "outputs": [],
    "source": [
     "try:\n",
-    "    import pycuda\n",
+    "    import cupy\n",
     "except ImportError:\n",
-    "    pycuda = None\n",
+    "    cupy = None\n",
     "    gpu = False\n",
     "    target = ps.Target.CPU\n",
-    "    print('No pycuda installed')\n",
+    "    print('No cupy installed')\n",
     "\n",
-    "if pycuda:\n",
+    "if cupy:\n",
     "    gpu = True\n",
     "    target = ps.Target.GPU"
    ]
@@ -136,7 +136,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Note that this code has the relaxation rate and array sizes inserted as numeric constants. This additional information helps the C compiler to generate faster code. Also, having the code in symbolic form makes it easy to generate code for different platforms as well: C(++) for CPUs, optionally with platform specific SIMD instrinsics or CUDA for Nvidia GPUs. To run the lid driven cavity on GPUs all it takes are the following changes:"
+    "Note that this code has the relaxation rate and array sizes inserted as numeric constants. This additional information helps the C compiler to generate faster code. Also, having the code in symbolic form makes it easy to generate code for different platforms as well: C(++) for CPUs, optionally with platform specific SIMD instrinsics or GPUs with CUDA. To run the lid driven cavity on GPUs all it takes are the following changes:"
    ]
   },
   {
@@ -603,7 +603,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.2"
+   "version": "3.11.0rc1"
   }
  },
  "nbformat": 4,
diff --git a/doc/notebooks/01_tutorial_predefinedScenarios.ipynb b/doc/notebooks/01_tutorial_predefinedScenarios.ipynb
index 40483f9cd720e6d35df6828890651f5b19dbd8e7..c7f0f93981506ee952e2345766b496294e9466b4 100644
--- a/doc/notebooks/01_tutorial_predefinedScenarios.ipynb
+++ b/doc/notebooks/01_tutorial_predefinedScenarios.ipynb
@@ -8,7 +8,7 @@
     {
      "data": {
       "text/plain": [
-       "<module 'pycuda' from '/home/markus/miniconda3/envs/pystencils/lib/python3.8/site-packages/pycuda/__init__.py'>"
+       "<module 'cupy' from '/home/markus/.local/lib/python3.11/site-packages/cupy/__init__.py'>"
       ]
      },
      "execution_count": 1,
@@ -18,7 +18,7 @@
    ],
    "source": [
     "import pytest\n",
-    "pytest.importorskip('pycuda')"
+    "pytest.importorskip('cupy')"
    ]
   },
   {
@@ -660,7 +660,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -674,7 +674,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.2"
+   "version": "3.11.0rc1"
   }
  },
  "nbformat": 4,
diff --git a/doc/notebooks/10_tutorial_conservative_allen_cahn_two_phase.ipynb b/doc/notebooks/10_tutorial_conservative_allen_cahn_two_phase.ipynb
index 236ee1b9f3cf6c3cb4ac391289ea008693308b86..ded1b3321281441b8573c78e911c536b9ceaeef4 100644
--- a/doc/notebooks/10_tutorial_conservative_allen_cahn_two_phase.ipynb
+++ b/doc/notebooks/10_tutorial_conservative_allen_cahn_two_phase.ipynb
@@ -30,32 +30,24 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "If `pycuda` is installed the simulation automatically runs on GPU"
+    "If `cupy` is installed the simulation automatically runs on GPU"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 2,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "No pycuda installed\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "try:\n",
-    "    import pycuda\n",
+    "    import cupy\n",
     "except ImportError:\n",
-    "    pycuda = None\n",
+    "    cupy = None\n",
     "    gpu = False\n",
     "    target = ps.Target.CPU\n",
-    "    print('No pycuda installed')\n",
+    "    print('No cupy installed')\n",
     "\n",
-    "if pycuda:\n",
+    "if cupy:\n",
     "    gpu = True\n",
     "    target = ps.Target.GPU"
    ]
@@ -925,7 +917,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.9"
+   "version": "3.11.0rc1"
   }
  },
  "nbformat": 4,
diff --git a/lbmpy/advanced_streaming/communication.py b/lbmpy/advanced_streaming/communication.py
index 786c60092d85ffbf28d5dfa9880f6d005ca8da4c..9c7dc4ca16af5ddb71dc0e147015af3c9bbde411 100644
--- a/lbmpy/advanced_streaming/communication.py
+++ b/lbmpy/advanced_streaming/communication.py
@@ -12,15 +12,15 @@ class LBMPeriodicityHandling:
 
     def __init__(self, stencil, data_handling, pdf_field_name,
                  streaming_pattern='pull', ghost_layers=1,
-                 pycuda_direct_copy=True):
+                 cupy_direct_copy=True):
         """
             Periodicity Handling for Lattice Boltzmann Streaming.
 
             **On the usage with cuda:**
-            - pycuda allows the copying of sliced arrays within device memory using the numpy syntax,
+            - cupy allows the copying of sliced arrays within device memory using the numpy syntax,
             e.g. `dst[:,0] = src[:,-1]`. In this implementation, this is the default for periodicity
-            handling. Alternatively, if you set `pycuda_direct_copy=False`, GPU kernels are generated and
-            compiled. The compiled kernels are almost twice as fast in execution as pycuda array copying,
+            handling. Alternatively, if you set `cupy_direct_copy=False`, GPU kernels are generated and
+            compiled. The compiled kernels are almost twice as fast in execution as cupy array copying,
             but especially for large stencils like D3Q27, their compilation can take up to 20 seconds.
             Choose your weapon depending on your use case.
         """
@@ -40,7 +40,7 @@ class LBMPeriodicityHandling:
         self.inplace_pattern = is_inplace(streaming_pattern)
 
         self.cpu = self.target == Target.CPU
-        self.pycuda_direct_copy = self.target == Target.GPU and pycuda_direct_copy
+        self.cupy_direct_copy = self.target == Target.GPU and cupy_direct_copy
 
         def is_copy_direction(direction):
             s = 0
@@ -63,7 +63,7 @@ class LBMPeriodicityHandling:
                                                            ghost_layers=ghost_layers)
             self.comm_slices.append(list(chain.from_iterable(v for k, v in slices_per_comm_dir.items())))
 
-        if self.target == Target.GPU and not pycuda_direct_copy:
+        if self.target == Target.GPU and not cupy_direct_copy:
             self.device_copy_kernels = list()
             for timestep in timesteps:
                 self.device_copy_kernels.append(self._compile_copy_kernels(timestep))
@@ -90,7 +90,7 @@ class LBMPeriodicityHandling:
 
     def _periodicity_handling_gpu(self, prev_timestep):
         arr = self.dh.gpu_arrays[self.pdf_field_name]
-        if self.pycuda_direct_copy:
+        if self.cupy_direct_copy:
             for src, dst in self.comm_slices[prev_timestep.idx]:
                 arr[dst] = arr[src]
         else:
diff --git a/lbmpy/max_domain_size_info.py b/lbmpy/max_domain_size_info.py
index 65fa50f97a7401dd37e2d38c037871817e796f56..fcd2ed0984cdcb54600bc63c1d29651a28f3357c 100644
--- a/lbmpy/max_domain_size_info.py
+++ b/lbmpy/max_domain_size_info.py
@@ -26,16 +26,19 @@ Examples:
 import warnings
 
 import numpy as np
+import pystencils
 
-# Optional packages cpuinfo, pycuda and psutil for hardware queries
+# Optional packages cpuinfo, cupy and psutil for hardware queries
 try:
     from cpuinfo import get_cpu_info
 except ImportError:
     get_cpu_info = None
 
 try:
-    from pycuda.autoinit import device
+    import cupy
+    device = cupy.cuda.Device(pystencils.GPU_DEVICE)
 except ImportError:
+    cupy = None
     device = None
 
 try:
@@ -114,7 +117,7 @@ def memory_sizes_of_current_machine():
             result['L3'] = cpu_info['l3_cache_size']
 
     if device:
-        size = device.total_memory() / (1024 * 1024)
+        size = device.mem_info[1] / (1024 * 1024)
         result['GPU'] = "{0:.0f} MB".format(size)
 
     if virtual_memory:
@@ -124,7 +127,7 @@ def memory_sizes_of_current_machine():
 
     if not result:
         warnings.warn("Couldn't query for any local memory size."
-                      "Install py-cpuinfo to get cache sizes, psutil for RAM size and pycuda for GPU memory size.")
+                      "Install py-cpuinfo to get cache sizes, psutil for RAM size and cupy for GPU memory size.")
 
     return result
 
diff --git a/lbmpy_tests/advanced_streaming/test_fully_periodic_flow.py b/lbmpy_tests/advanced_streaming/test_fully_periodic_flow.py
index 0c37cfc37c52506d1a373815fa9d1d849bf03cd5..413aaa07320391093bd7c202cc023251aa2f9b67 100644
--- a/lbmpy_tests/advanced_streaming/test_fully_periodic_flow.py
+++ b/lbmpy_tests/advanced_streaming/test_fully_periodic_flow.py
@@ -20,7 +20,7 @@ all_results = dict()
 targets = [Target.CPU]
 
 try:
-    import pycuda.autoinit
+    import cupy
     targets += [Target.GPU]
 except Exception:
     pass
diff --git a/lbmpy_tests/advanced_streaming/test_periodic_pipe_with_force.py b/lbmpy_tests/advanced_streaming/test_periodic_pipe_with_force.py
index 42b6671a35f61167e63cba0171b12e08dd022b7b..2dc18be870be2a38843b9d673e8e32da4957f5c2 100644
--- a/lbmpy_tests/advanced_streaming/test_periodic_pipe_with_force.py
+++ b/lbmpy_tests/advanced_streaming/test_periodic_pipe_with_force.py
@@ -21,7 +21,7 @@ all_results = dict()
 targets = [Target.CPU]
 
 try:
-    import pycuda.autoinit
+    import cupy
     targets += [Target.GPU]
 except Exception:
     pass
diff --git a/lbmpy_tests/cumulantmethod/test_flow_around_sphere.py b/lbmpy_tests/cumulantmethod/test_flow_around_sphere.py
index 9be652aa70386ad68030ce34fec43d92d5fafb43..6db3fa9fbd3c91127b305cfda4864464763b2d64 100644
--- a/lbmpy_tests/cumulantmethod/test_flow_around_sphere.py
+++ b/lbmpy_tests/cumulantmethod/test_flow_around_sphere.py
@@ -136,7 +136,7 @@ def flow_around_sphere(stencil, galilean_correction, L_LU, total_steps):
 @pytest.mark.parametrize('stencil', [Stencil.D2Q9, Stencil.D3Q19, Stencil.D3Q27])
 @pytest.mark.parametrize('galilean_correction', [False, True])
 def test_flow_around_sphere_short(stencil, galilean_correction):
-    pytest.importorskip('pycuda')
+    pytest.importorskip('cupy')
     flow_around_sphere(LBStencil(stencil), galilean_correction, 5, 200)
 
 
@@ -144,5 +144,5 @@ def test_flow_around_sphere_short(stencil, galilean_correction):
 @pytest.mark.parametrize('galilean_correction', [False, True])
 @pytest.mark.longrun
 def test_flow_around_sphere_long(stencil, galilean_correction):
-    pytest.importorskip('pycuda')
+    pytest.importorskip('cupy')
     flow_around_sphere(LBStencil(stencil), galilean_correction, 20, 3000)
diff --git a/lbmpy_tests/cumulantmethod/test_periodic_pipe_flow.ipynb b/lbmpy_tests/cumulantmethod/test_periodic_pipe_flow.ipynb
index b984e24d80049db2d4357c23bc9d2e4974120972..501abf4ceeb695021beea82452c46ed571183f77 100644
--- a/lbmpy_tests/cumulantmethod/test_periodic_pipe_flow.ipynb
+++ b/lbmpy_tests/cumulantmethod/test_periodic_pipe_flow.ipynb
@@ -33,25 +33,16 @@
    "cell_type": "code",
    "execution_count": 3,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "No pycuda installed\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "try:\n",
-    "    import pycuda\n",
-    "    import pycuda.gpuarray as gpuarray\n",
+    "    import cupy\n",
     "except ImportError:\n",
-    "    pycuda = None\n",
+    "    cupy = None\n",
     "    target = ps.Target.CPU\n",
-    "    print('No pycuda installed')\n",
+    "    print('No cupy installed')\n",
     "\n",
-    "if pycuda:\n",
+    "if cupy:\n",
     "    target = ps.Target.GPU"
    ]
   },
@@ -238,7 +229,28 @@
    "cell_type": "code",
    "execution_count": 6,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "AttributeError",
+     "evalue": "'tuple' object has no attribute 'items'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[6], line 4\u001b[0m\n\u001b[1;32m      1\u001b[0m srt_config \u001b[38;5;241m=\u001b[39m LBMConfig(stencil\u001b[38;5;241m=\u001b[39mstencil, method\u001b[38;5;241m=\u001b[39mMethod\u001b[38;5;241m.\u001b[39mSRT, relaxation_rate\u001b[38;5;241m=\u001b[39mviscous_rr,\n\u001b[1;32m      2\u001b[0m                        force_model\u001b[38;5;241m=\u001b[39mForceModel\u001b[38;5;241m.\u001b[39mSIMPLE, force\u001b[38;5;241m=\u001b[39mforce, streaming_pattern\u001b[38;5;241m=\u001b[39mstreaming_pattern)\n\u001b[0;32m----> 4\u001b[0m srt_flow \u001b[38;5;241m=\u001b[39m \u001b[43mPeriodicPipeFlow\u001b[49m\u001b[43m(\u001b[49m\u001b[43msrt_config\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mLBMOptimisation\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      5\u001b[0m srt_flow\u001b[38;5;241m.\u001b[39minit()\n\u001b[1;32m      6\u001b[0m srt_flow\u001b[38;5;241m.\u001b[39mrun(\u001b[38;5;241m400\u001b[39m)\n",
+      "Cell \u001b[0;32mIn[4], line 47\u001b[0m, in \u001b[0;36mPeriodicPipeFlow.__init__\u001b[0;34m(self, lbm_config, lbm_optimisation, config)\u001b[0m\n\u001b[1;32m     45\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m t \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtimesteps:\n\u001b[1;32m     46\u001b[0m     lbm_config \u001b[38;5;241m=\u001b[39m replace(lbm_config, timestep\u001b[38;5;241m=\u001b[39mt, collision_rule\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlb_collision)\n\u001b[0;32m---> 47\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlb_kernels\u001b[38;5;241m.\u001b[39mappend(\u001b[43mcreate_lb_function\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlbm_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlbm_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     48\u001b[0m \u001b[43m                                              \u001b[49m\u001b[43mlbm_optimisation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlbm_optimisation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     49\u001b[0m \u001b[43m                                              \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m     51\u001b[0m \u001b[38;5;66;03m#   Macroscopic Values\u001b[39;00m\n\u001b[1;32m     52\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdensity \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1.0\u001b[39m\n",
+      "File \u001b[0;32m~/pystencils/lbmpy/lbmpy/creationfunctions.py:505\u001b[0m, in \u001b[0;36mcreate_lb_function\u001b[0;34m(ast, lbm_config, lbm_optimisation, config, optimization, **kwargs)\u001b[0m\n\u001b[1;32m    502\u001b[0m     ast \u001b[38;5;241m=\u001b[39m lbm_config\u001b[38;5;241m.\u001b[39mast\n\u001b[1;32m    504\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ast \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 505\u001b[0m     ast \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_lb_ast\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlbm_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupdate_rule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlbm_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlbm_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    506\u001b[0m \u001b[43m                        \u001b[49m\u001b[43mlbm_optimisation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlbm_optimisation\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    508\u001b[0m res \u001b[38;5;241m=\u001b[39m ast\u001b[38;5;241m.\u001b[39mcompile()\n\u001b[1;32m    510\u001b[0m res\u001b[38;5;241m.\u001b[39mmethod \u001b[38;5;241m=\u001b[39m ast\u001b[38;5;241m.\u001b[39mmethod\n",
+      "File \u001b[0;32m~/pystencils/lbmpy/lbmpy/creationfunctions.py:530\u001b[0m, in \u001b[0;36mcreate_lb_ast\u001b[0;34m(update_rule, lbm_config, lbm_optimisation, config, optimization, **kwargs)\u001b[0m\n\u001b[1;32m    525\u001b[0m     update_rule \u001b[38;5;241m=\u001b[39m create_lb_update_rule(lbm_config\u001b[38;5;241m.\u001b[39mcollision_rule, lbm_config\u001b[38;5;241m=\u001b[39mlbm_config,\n\u001b[1;32m    526\u001b[0m                                         lbm_optimisation\u001b[38;5;241m=\u001b[39mlbm_optimisation, config\u001b[38;5;241m=\u001b[39mconfig)\n\u001b[1;32m    528\u001b[0m field_types \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m(fa\u001b[38;5;241m.\u001b[39mfield\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;28;01mfor\u001b[39;00m fa \u001b[38;5;129;01min\u001b[39;00m update_rule\u001b[38;5;241m.\u001b[39mdefined_symbols \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(fa, Field\u001b[38;5;241m.\u001b[39mAccess))\n\u001b[0;32m--> 530\u001b[0m config \u001b[38;5;241m=\u001b[39m \u001b[43mreplace\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcollate_types\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfield_types\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mghost_layers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m    531\u001b[0m ast \u001b[38;5;241m=\u001b[39m create_kernel(update_rule, config\u001b[38;5;241m=\u001b[39mconfig)\n\u001b[1;32m    533\u001b[0m ast\u001b[38;5;241m.\u001b[39mmethod \u001b[38;5;241m=\u001b[39m update_rule\u001b[38;5;241m.\u001b[39mmethod\n",
+      "File \u001b[0;32m/usr/lib/python3.11/dataclasses.py:1492\u001b[0m, in \u001b[0;36mreplace\u001b[0;34m(obj, **changes)\u001b[0m\n\u001b[1;32m   1485\u001b[0m         changes[f\u001b[38;5;241m.\u001b[39mname] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(obj, f\u001b[38;5;241m.\u001b[39mname)\n\u001b[1;32m   1487\u001b[0m \u001b[38;5;66;03m# Create the new object, which calls __init__() and\u001b[39;00m\n\u001b[1;32m   1488\u001b[0m \u001b[38;5;66;03m# __post_init__() (if defined), using all of the init fields we've\u001b[39;00m\n\u001b[1;32m   1489\u001b[0m \u001b[38;5;66;03m# added and/or left in 'changes'.  If there are values supplied in\u001b[39;00m\n\u001b[1;32m   1490\u001b[0m \u001b[38;5;66;03m# changes that aren't fields, this will correctly raise a\u001b[39;00m\n\u001b[1;32m   1491\u001b[0m \u001b[38;5;66;03m# TypeError.\u001b[39;00m\n\u001b[0;32m-> 1492\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;18;43m__class__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mchanges\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m<string>:24\u001b[0m, in \u001b[0;36m__init__\u001b[0;34m(self, target, backend, function_name, data_type, default_number_float, default_number_int, iteration_slice, ghost_layers, cpu_openmp, cpu_vectorize_info, cpu_blocking, omp_single_loop, gpu_indexing, gpu_indexing_params, default_assignment_simplifications, cpu_prepend_optimizations, use_auto_for_assignments, index_fields, coordinate_names, allow_double_writes, skip_independence_check)\u001b[0m\n",
+      "File \u001b[0;32m~/pystencils/pystencils/pystencils/config.py:177\u001b[0m, in \u001b[0;36mCreateKernelConfig.__post_init__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    174\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_type(dtype)\n\u001b[1;32m    176\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_type, \u001b[38;5;28mdict\u001b[39m):\n\u001b[0;32m--> 177\u001b[0m     dt \u001b[38;5;241m=\u001b[39m \u001b[43mcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata_type\u001b[49m\u001b[43m)\u001b[49m  \u001b[38;5;66;03m# The copy is necessary because BasicType has sympy shinanigans\u001b[39;00m\n\u001b[1;32m    178\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_type \u001b[38;5;241m=\u001b[39m defaultdict(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mDataTypeFactory(dt))\n\u001b[1;32m    180\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_type, \u001b[38;5;28mdict\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_type, defaultdict):\n",
+      "File \u001b[0;32m/usr/lib/python3.11/copy.py:102\u001b[0m, in \u001b[0;36mcopy\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m    100\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(rv, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m    101\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m x\n\u001b[0;32m--> 102\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_reconstruct\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mrv\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/usr/lib/python3.11/copy.py:273\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m    271\u001b[0m     state \u001b[38;5;241m=\u001b[39m deepcopy(state, memo)\n\u001b[1;32m    272\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(y, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__setstate__\u001b[39m\u001b[38;5;124m'\u001b[39m):\n\u001b[0;32m--> 273\u001b[0m     \u001b[43my\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__setstate__\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    274\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    275\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(state, \u001b[38;5;28mtuple\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(state) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m2\u001b[39m:\n",
+      "File \u001b[0;32m~/.local/lib/python3.11/site-packages/sympy/core/basic.py:144\u001b[0m, in \u001b[0;36mBasic.__setstate__\u001b[0;34m(self, state)\u001b[0m\n\u001b[1;32m    143\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__setstate__\u001b[39m(\u001b[38;5;28mself\u001b[39m, state):\n\u001b[0;32m--> 144\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m name, value \u001b[38;5;129;01min\u001b[39;00m \u001b[43mstate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mitems\u001b[49m():\n\u001b[1;32m    145\u001b[0m         \u001b[38;5;28msetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m, name, value)\n",
+      "\u001b[0;31mAttributeError\u001b[0m: 'tuple' object has no attribute 'items'"
+     ]
+    }
+   ],
    "source": [
     "srt_config = LBMConfig(stencil=stencil, method=Method.SRT, relaxation_rate=viscous_rr,\n",
     "                       force_model=ForceModel.SIMPLE, force=force, streaming_pattern=streaming_pattern)\n",
@@ -250,32 +262,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<matplotlib.colorbar.Colorbar at 0x127d43bb0>"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    },
-    {
-     "data": {
-      "image/png": "\n",
-      "text/plain": [
-       "<Figure size 1152x432 with 2 Axes>"
-      ]
-     },
-     "metadata": {
-      "needs_background": "light"
-     },
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "srt_u = srt_flow.get_trimmed_velocity_array()\n",
     "ps.plot.vector_field_magnitude(srt_u[30,:,:,:])\n",
@@ -291,7 +280,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -306,7 +295,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -317,7 +306,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -327,32 +316,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<matplotlib.colorbar.Colorbar at 0x127d1df10>"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    },
-    {
-     "data": {
-      "image/png": "\n",
-      "text/plain": [
-       "<Figure size 1152x432 with 2 Axes>"
-      ]
-     },
-     "metadata": {
-      "needs_background": "light"
-     },
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "cm_impl_f_u = cm_impl_f_flow.get_trimmed_velocity_array()\n",
     "ps.plot.vector_field_magnitude(cm_impl_f_u[30,:,:,:])\n",
@@ -361,7 +327,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -377,7 +343,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -394,7 +360,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -405,7 +371,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -415,32 +381,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<matplotlib.colorbar.Colorbar at 0x1272c5760>"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    },
-    {
-     "data": {
-      "image/png": "\n",
-      "text/plain": [
-       "<Figure size 1152x432 with 2 Axes>"
-      ]
-     },
-     "metadata": {
-      "needs_background": "light"
-     },
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "cm_expl_f_u = cm_expl_f_flow.get_trimmed_velocity_array()\n",
     "ps.plot.vector_field_magnitude(cm_expl_f_u[30,:,:,:])\n",
@@ -449,7 +392,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -474,7 +417,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.9"
+   "version": "3.11.0rc1"
   },
   "vscode": {
    "interpreter": {
diff --git a/lbmpy_tests/full_scenarios/phasefield_allen_cahn/phasefield-capillary-wave.ipynb b/lbmpy_tests/full_scenarios/phasefield_allen_cahn/phasefield-capillary-wave.ipynb
index e8339d83477a4f7b2a953d945bcb89c24b0700e7..bbfa78d9843a65ca964b61614a820f66f16bf39a 100644
--- a/lbmpy_tests/full_scenarios/phasefield_allen_cahn/phasefield-capillary-wave.ipynb
+++ b/lbmpy_tests/full_scenarios/phasefield_allen_cahn/phasefield-capillary-wave.ipynb
@@ -33,32 +33,24 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "If `pycuda` is installed the simulation automatically runs on GPU"
+    "If `cupy` is installed the simulation automatically runs on GPU"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 2,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "No pycuda installed\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "try:\n",
-    "    import pycuda\n",
+    "    import cupy\n",
     "except ImportError:\n",
-    "    pycuda = None\n",
+    "    cupy = None\n",
     "    gpu = False\n",
     "    target = ps.Target.CPU\n",
-    "    print('No pycuda installed')\n",
+    "    print('No cupy installed')\n",
     "\n",
-    "if pycuda:\n",
+    "if cupy:\n",
     "    gpu = True\n",
     "    target = ps.Target.GPU"
    ]
@@ -1123,7 +1115,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.9"
+   "version": "3.11.0rc1"
   }
  },
  "nbformat": 4,
diff --git a/lbmpy_tests/full_scenarios/phasefield_allen_cahn/phasefield-gravity-wave.ipynb b/lbmpy_tests/full_scenarios/phasefield_allen_cahn/phasefield-gravity-wave.ipynb
index 451c8872115a524fa05f3df49a0025267c3aea76..c7fa30260906c06776a6fbcea8846a4489d3cf01 100644
--- a/lbmpy_tests/full_scenarios/phasefield_allen_cahn/phasefield-gravity-wave.ipynb
+++ b/lbmpy_tests/full_scenarios/phasefield_allen_cahn/phasefield-gravity-wave.ipynb
@@ -32,32 +32,24 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "If `pycuda` is installed the simulation automatically runs on GPU"
+    "If `cupy` is installed the simulation automatically runs on GPU"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 2,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "No pycuda installed\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "try:\n",
-    "    import pycuda\n",
+    "    import cupy\n",
     "except ImportError:\n",
-    "    pycuda = None\n",
+    "    cupy = None\n",
     "    gpu = False\n",
     "    target = ps.Target.CPU\n",
-    "    print('No pycuda installed')\n",
+    "    print('No cupy installed')\n",
     "\n",
-    "if pycuda:\n",
+    "if cupy:\n",
     "    gpu = True\n",
     "    target = ps.Target.GPU"
    ]
@@ -1208,7 +1200,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.9"
+   "version": "3.11.0rc1"
   }
  },
  "nbformat": 4,
diff --git a/lbmpy_tests/full_scenarios/shear_wave/scenario_shear_wave.py b/lbmpy_tests/full_scenarios/shear_wave/scenario_shear_wave.py
index 9005e9687e9e6bf57d88f91557597812ad105036..cd5e73b7953216c3dd22c79490ecf29c178dc944 100644
--- a/lbmpy_tests/full_scenarios/shear_wave/scenario_shear_wave.py
+++ b/lbmpy_tests/full_scenarios/shear_wave/scenario_shear_wave.py
@@ -215,7 +215,7 @@ def create_full_parameter_study():
 
 
 def test_shear_wave():
-    pytest.importorskip('pycuda')
+    pytest.importorskip('cupy')
     params = {
         'l_0': 32,
         'u_0': 0.096,
diff --git a/lbmpy_tests/phasefield/test_n_phase_boyer_analytical.ipynb b/lbmpy_tests/phasefield/test_n_phase_boyer_analytical.ipynb
index bfb151700a55e22373bf0e1088eed44d46c628d9..36b2e7dfd24afdcdcb97ba1d50cbd2a9d5a77ed4 100644
--- a/lbmpy_tests/phasefield/test_n_phase_boyer_analytical.ipynb
+++ b/lbmpy_tests/phasefield/test_n_phase_boyer_analytical.ipynb
@@ -415,7 +415,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -429,7 +429,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.2"
+   "version": "3.11.0rc1"
   }
  },
  "nbformat": 4,
diff --git a/lbmpy_tests/phasefield/test_n_phase_boyer_noncoupled.ipynb b/lbmpy_tests/phasefield/test_n_phase_boyer_noncoupled.ipynb
index 6958e868e7fc0dfae36db8d5cc67a58a19cb98fb..4cb099e864b12e962d1badd898e25e38c32390ae 100644
--- a/lbmpy_tests/phasefield/test_n_phase_boyer_noncoupled.ipynb
+++ b/lbmpy_tests/phasefield/test_n_phase_boyer_noncoupled.ipynb
@@ -6,21 +6,19 @@
    "metadata": {},
    "outputs": [
     {
-     "ename": "Skipped",
-     "evalue": "could not import 'pycuda': No module named 'pycuda'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mSkipped\u001b[0m                                   Traceback (most recent call last)",
-      "\u001b[0;32m/var/folders/07/0d7kq8fd0sx24cs53zz90_qc0000gp/T/ipykernel_16968/622163826.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpytest\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mpytest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mimportorskip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'pycuda'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;32m/opt/local/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/_pytest/outcomes.py\u001b[0m in \u001b[0;36mimportorskip\u001b[0;34m(modname, minversion, reason)\u001b[0m\n\u001b[1;32m    210\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mreason\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    211\u001b[0m                 \u001b[0mreason\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf\"could not import {modname!r}: {exc}\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 212\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mSkipped\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreason\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mallow_module_level\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    213\u001b[0m     \u001b[0mmod\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodules\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmodname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    214\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mminversion\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mSkipped\u001b[0m: could not import 'pycuda': No module named 'pycuda'"
-     ]
+     "data": {
+      "text/plain": [
+       "<module 'cupy' from '/home/markus/.local/lib/python3.11/site-packages/cupy/__init__.py'>"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
     "import pytest\n",
-    "pytest.importorskip('pycuda')"
+    "pytest.importorskip('cupy')"
    ]
   },
   {
@@ -315,7 +313,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.9"
+   "version": "3.11.0rc1"
   }
  },
  "nbformat": 4,
diff --git a/lbmpy_tests/phasefield/test_numerical_1D_nphase_model.ipynb b/lbmpy_tests/phasefield/test_numerical_1D_nphase_model.ipynb
index 695b4e8414d1c367e410678fe0e213cc59bf3d62..3f46dd27315ceaef81da34e8a6442abf4c573d6b 100644
--- a/lbmpy_tests/phasefield/test_numerical_1D_nphase_model.ipynb
+++ b/lbmpy_tests/phasefield/test_numerical_1D_nphase_model.ipynb
@@ -8,7 +8,7 @@
     {
      "data": {
       "text/plain": [
-       "<module 'pycuda' from '/home/markus/miniconda3/envs/pystencils/lib/python3.8/site-packages/pycuda/__init__.py'>"
+       "<module 'cupy' from '/home/markus/.local/lib/python3.11/site-packages/cupy/__init__.py'>"
       ]
      },
      "execution_count": 1,
@@ -18,7 +18,7 @@
    ],
    "source": [
     "import pytest\n",
-    "pytest.importorskip('pycuda')"
+    "pytest.importorskip('cupy')"
    ]
   },
   {
@@ -435,7 +435,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -449,7 +449,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.2"
+   "version": "3.11.0rc1"
   }
  },
  "nbformat": 4,
diff --git a/lbmpy_tests/test_boundary_handling.py b/lbmpy_tests/test_boundary_handling.py
index 23900e0c645b9b72cc906975ee224009ca491fe8..e174adf54745b5f9d170a0d92840011ae88309ca 100644
--- a/lbmpy_tests/test_boundary_handling.py
+++ b/lbmpy_tests/test_boundary_handling.py
@@ -26,7 +26,7 @@ def mirror_stencil(direction, mirror_axis):
 def test_simple(target):
     if target == Target.GPU:
         import pytest
-        pytest.importorskip('pycuda')
+        pytest.importorskip('cupy')
 
     dh = create_data_handling((4, 4), parallel=False, default_target=target)
     dh.add_array('pdfs', values_per_cell=9, cpu=True, gpu=target != Target.CPU)
diff --git a/lbmpy_tests/test_cpu_gpu_equivalence.py b/lbmpy_tests/test_cpu_gpu_equivalence.py
index 7683153ba871c804c68385e187884ad473614d56..f519d3f5746582da49e2406eb9ff31848d05be8c 100644
--- a/lbmpy_tests/test_cpu_gpu_equivalence.py
+++ b/lbmpy_tests/test_cpu_gpu_equivalence.py
@@ -31,7 +31,7 @@ def run_equivalence_test(domain_size, lbm_config, lbm_opt, config, time_steps=13
                                       ((18, 20), Method.MRT, True, (4, 2), 'zyxf'),
                                       ((7, 11, 18), Method.TRT, False, False, 'numpy')])
 def test_force_driven_channel_short(scenario):
-    pytest.importorskip("pycuda")
+    pytest.importorskip("cupy")
     ds = scenario[0]
     method = scenario[1]
     compressible = scenario[2]
diff --git a/lbmpy_tests/test_diffusion.py b/lbmpy_tests/test_diffusion.py
index 646531241f1fbece7274b552dda3917ced6e518f..4fce37d1ae79c7becb63827c91868497d962f4b3 100644
--- a/lbmpy_tests/test_diffusion.py
+++ b/lbmpy_tests/test_diffusion.py
@@ -77,7 +77,7 @@ def test_diffusion():
 
       The hydrodynamic field is not simulated, instead a constant velocity is assumed.
     """
-    pytest.importorskip("pycuda")
+    pytest.importorskip("cupy")
     # Parameters
     domain_size = (1600, 160)
     omega = 1.38
diff --git a/lbmpy_tests/test_gpu_block_size_limiting.py b/lbmpy_tests/test_gpu_block_size_limiting.py
index e619b53b8b0bcfd2c8f6ce49183c5be7f64da7d9..f3bfc805e65cdb3f3df0ea34aff7aaaa4a86c3b6 100644
--- a/lbmpy_tests/test_gpu_block_size_limiting.py
+++ b/lbmpy_tests/test_gpu_block_size_limiting.py
@@ -6,7 +6,7 @@ from pystencils import Target, CreateKernelConfig
 
 
 def test_gpu_block_size_limiting():
-    pytest.importorskip("pycuda")
+    pytest.importorskip("cupy")
     too_large = 2048*2048
     lbm_config = LBMConfig(method=Method.CUMULANT, stencil=LBStencil(Stencil.D3Q19),
                            relaxation_rate=1.8, compressible=True)
diff --git a/lbmpy_tests/test_lbstep.py b/lbmpy_tests/test_lbstep.py
index f5184b37a64f039bd75d278e967d4d6e7c63ece8..629d106e3379a9c0996374f47bf46d5d817d569e 100644
--- a/lbmpy_tests/test_lbstep.py
+++ b/lbmpy_tests/test_lbstep.py
@@ -6,7 +6,7 @@ from pystencils import Target, CreateKernelConfig
 from lbmpy.scenarios import create_fully_periodic_flow, create_lid_driven_cavity
 
 try:
-    import pycuda.driver
+    import cupy
     gpu_available = True
 except ImportError:
     gpu_available = False
diff --git a/lbmpy_tests/test_poisuille_channel.py b/lbmpy_tests/test_poisuille_channel.py
index e0dec156bad1aafb8158ccb84f14846ed31cba44..264d75ff2d0af94f3326fd7636fff75732dc491d 100644
--- a/lbmpy_tests/test_poisuille_channel.py
+++ b/lbmpy_tests/test_poisuille_channel.py
@@ -18,7 +18,7 @@ def test_poiseuille_channel(target, stencil_name, zero_centered, moment_space_co
     # Cuda
     if target == ps.Target.GPU:
         import pytest
-        pytest.importorskip("pycuda")
+        pytest.importorskip("cupy")
 
     cspace_info = CollisionSpace.RAW_MOMENTS if moment_space_collision else CollisionSpace.POPULATIONS
     poiseuille_channel(target=target, stencil_name=stencil_name, zero_centered=zero_centered, collision_space_info=cspace_info)
diff --git a/lbmpy_tests/test_shear_flow.py b/lbmpy_tests/test_shear_flow.py
index 1ab7ca2137893aadd2eac5c4732d4cb80da2d890..9b22d0a5b0a8cd36ce53b0e32c649428959ecf9a 100644
--- a/lbmpy_tests/test_shear_flow.py
+++ b/lbmpy_tests/test_shear_flow.py
@@ -67,7 +67,7 @@ def test_shear_flow(target, stencil_name, zero_centered):
 
     # Cuda
     if target == ps.Target.GPU:
-        pytest.importorskip("pycuda")
+        pytest.importorskip("cupy")
 
     # LB parameters
     stencil = LBStencil(stencil_name)
diff --git a/lbmpy_tests/test_simple_equilibrium_conservation.py b/lbmpy_tests/test_simple_equilibrium_conservation.py
index 0c39825cd195810aab5b10bb8df8ea770926576e..d5692e7d06b42e4173edc6c3c3231be06182f960 100644
--- a/lbmpy_tests/test_simple_equilibrium_conservation.py
+++ b/lbmpy_tests/test_simple_equilibrium_conservation.py
@@ -13,7 +13,7 @@ import pytest
 @pytest.mark.parametrize('delta_equilibrium', [False, True])
 def test_simple_equilibrium_conservation(setup, method, compressible, delta_equilibrium):
     if setup[0] == Target.GPU:
-        pytest.importorskip("pycuda")
+        pytest.importorskip("cupy")
 
     if method == Method.SRT and not delta_equilibrium:
         pytest.skip()
@@ -30,11 +30,11 @@ def test_simple_equilibrium_conservation(setup, method, compressible, delta_equi
     func = create_lb_function(lbm_config=lbm_config, config=config)
 
     if setup[0] == Target.GPU:
-        import pycuda.gpuarray as gpuarray
-        gpu_src, gpu_dst = gpuarray.to_gpu(src), gpuarray.to_gpu(dst)
+        import cupy
+        gpu_src, gpu_dst = cupy.asarray(src), cupy.asarray(dst)
         func(src=gpu_src, dst=gpu_dst)
-        gpu_src.get(src)
-        gpu_dst.get(dst)
+        src[:] = gpu_src.get()
+        dst[:] = gpu_dst.get()
     else:
         func(src=src, dst=dst)
 
diff --git a/lbmpy_tests/test_sparse_lbm.ipynb b/lbmpy_tests/test_sparse_lbm.ipynb
index da2914ba88f9cd90e70a22f47885c3ebbb7283f4..00af223b14b729e99c9eebd6b6f3d794f2428f22 100644
--- a/lbmpy_tests/test_sparse_lbm.ipynb
+++ b/lbmpy_tests/test_sparse_lbm.ipynb
@@ -17,25 +17,16 @@
    "cell_type": "code",
    "execution_count": 2,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "No pycuda installed\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "try:\n",
-    "    import pycuda\n",
-    "    import pycuda.gpuarray as gpuarray\n",
+    "    import cupy\n",
     "except ImportError:\n",
-    "    pycuda = None\n",
+    "    cupy = None\n",
     "    target = ps.Target.CPU\n",
-    "    print('No pycuda installed')\n",
+    "    print('No cupy installed')\n",
     "\n",
-    "if pycuda:\n",
+    "if cupy:\n",
     "    target = ps.Target.GPU"
    ]
   },
@@ -79,14 +70,12 @@
    "outputs": [
     {
      "data": {
-      "image/png": "\n",
+      "image/png": "",
       "text/plain": [
-       "<Figure size 1152x432 with 2 Axes>"
+       "<Figure size 1600x600 with 2 Axes>"
       ]
      },
-     "metadata": {
-      "needs_background": "light"
-     },
+     "metadata": {},
      "output_type": "display_data"
     }
    ],
@@ -168,16 +157,7 @@
    "cell_type": "code",
    "execution_count": 7,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "WARNING:root:Using Nodes is experimental and not fully tested. Double check your generated code!\n",
-      "WARNING:root:Using Nodes is experimental and not fully tested. Double check your generated code!\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "cqc = method.conserved_quantity_computation\n",
     "inp_eqs = cqc.equilibrium_input_equations_from_init_values(force_substitution=False)\n",
@@ -221,13 +201,13 @@
    "outputs": [
     {
      "data": {
-      "image/png": "\n",
+      "image/png": "",
       "text/latex": [
        "$\\displaystyle \\left\\{ d_{0} : {d}_{0}^{0}, \\  d_{1} : {d}_{0}^{1}, \\  d_{2} : {d}_{0}^{2}, \\  d_{3} : {d}_{0}^{3}, \\  d_{4} : {d}_{0}^{4}, \\  d_{5} : {d}_{0}^{5}, \\  d_{6} : {d}_{0}^{6}, \\  d_{7} : {d}_{0}^{7}, \\  d_{8} : {d}_{0}^{8}, \\  f_{0} : {f}_{0}^{0}, \\  f_{1} : {f}_{\\mathbf{idx}_{0}^{0}}^{0}, \\  f_{2} : {f}_{\\mathbf{idx}_{0}^{1}}^{0}, \\  f_{3} : {f}_{\\mathbf{idx}_{0}^{2}}^{0}, \\  f_{4} : {f}_{\\mathbf{idx}_{0}^{3}}^{0}, \\  f_{5} : {f}_{\\mathbf{idx}_{0}^{4}}^{0}, \\  f_{6} : {f}_{\\mathbf{idx}_{0}^{5}}^{0}, \\  f_{7} : {f}_{\\mathbf{idx}_{0}^{6}}^{0}, \\  f_{8} : {f}_{\\mathbf{idx}_{0}^{7}}^{0}\\right\\}$"
       ],
       "text/plain": [
-       "{d₀: d_C__0, d₁: d_C__1, d₂: d_C__2, d₃: d_C__3, d₄: d_C__4, d₅: d_C__5, d₆: d_C__6, d₇: d_C__7, d₈: d_C__8, f₀: f_C__0, f₁: f_26b74c363b15, f₂: f_e111196926c6, f₃: f_e3f72afe7d66, f₄: f_0b929cb3f1da, f₅: f_3f75acc2de6d, \n",
-       "f₆: f_3e20adce708c, f₇: f_3a33f411da6b, f₈: f_68da5b60e7d8}"
+       "{d₀: d_C__0, d₁: d_C__1, d₂: d_C__2, d₃: d_C__3, d₄: d_C__4, d₅: d_C__5, d₆: d_C__6, d₇: d_C__7, d₈: d_C__8, f₀: f_C__0, f₁: f_9d0b5fd58d5c, f₂: f_c7175adc2ede, f₃: f_f2d66aa0a9c\n",
+       "3, f₄: f_de2a090a38e9, f₅: f_4eafba53d499, f₆: f_92d378c6bf21, f₇: f_bc4af68ef6a9, f₈: f_13012fb559c4}"
       ]
      },
      "execution_count": 8,
@@ -255,15 +235,7 @@
    "cell_type": "code",
    "execution_count": 9,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "WARNING:root:Using Nodes is experimental and not fully tested. Double check your generated code!\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "collision_rule = method.get_collision_rule()\n",
     "update_rule = collision_rule.new_with_substitutions(symbol_subs)\n",
@@ -320,17 +292,17 @@
     "    global pdf_arr, pdf_arr_tmp, index_arr\n",
     "    handle_ubb()\n",
     "    if target == ps.Target.GPU:\n",
-    "        gpu_pdf_arr = gpuarray.to_gpu(pdf_arr)\n",
-    "        gpu_pdf_arr_tmp = gpuarray.to_gpu(pdf_arr_tmp)\n",
-    "        gpu_index_arr = gpuarray.to_gpu(index_arr)\n",
+    "        gpu_pdf_arr = cupy.asarray(pdf_arr)\n",
+    "        gpu_pdf_arr_tmp = cupy.asarray(pdf_arr_tmp)\n",
+    "        gpu_index_arr = cupy.asarray(index_arr)\n",
     "        \n",
     "        kernel_stream_collide(f=gpu_pdf_arr[:mapping.num_fluid_cells], \n",
     "                          d=gpu_pdf_arr_tmp[:mapping.num_fluid_cells], \n",
     "                          idx=gpu_index_arr)\n",
     "    \n",
-    "        pdf_arr = gpu_pdf_arr.get()\n",
-    "        pdf_arr_tmp = gpu_pdf_arr_tmp.get()\n",
-    "        index_arr = gpu_index_arr.get()\n",
+    "        pdf_arr[:] = gpu_pdf_arr.get()\n",
+    "        pdf_arr_tmp[:] = gpu_pdf_arr_tmp.get()\n",
+    "        index_arr[:] = gpu_index_arr.get()\n",
     "    else:\n",
     "        kernel_stream_collide(f=pdf_arr[:mapping.num_fluid_cells], \n",
     "                              d=pdf_arr_tmp[:mapping.num_fluid_cells], \n",
@@ -359,14 +331,12 @@
    "outputs": [
     {
      "data": {
-      "image/png": "\n",
+      "image/png": "",
       "text/plain": [
-       "<Figure size 1152x432 with 1 Axes>"
+       "<Figure size 1600x600 with 1 Axes>"
       ]
      },
-     "metadata": {
-      "needs_background": "light"
-     },
+     "metadata": {},
      "output_type": "display_data"
     }
    ],
@@ -389,13 +359,24 @@
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "WARNING:root:Using Nodes is experimental and not fully tested. Double check your generated code!\n",
-      "WARNING:root:Using Nodes is experimental and not fully tested. Double check your generated code!\n",
-      "WARNING:root:Using Nodes is experimental and not fully tested. Double check your generated code!\n",
-      "WARNING:root:Lhs\"dir of type \"int64_t\" is assigned with a different datatype rhs: \"indexField[0](dir)\" of type \"int32_t\".\n"
+     "ename": "AttributeError",
+     "evalue": "'tuple' object has no attribute 'items'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[14], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m channel:\n\u001b[0;32m----> 2\u001b[0m     reference \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_channel\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdomain_size\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mforce\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlb_method\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m      4\u001b[0m     reference \u001b[38;5;241m=\u001b[39m create_lid_driven_cavity(domain_size, relaxation_rate\u001b[38;5;241m=\u001b[39momega, lid_velocity\u001b[38;5;241m=\u001b[39mlid_velocity,\n\u001b[1;32m      5\u001b[0m                                          compressible\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n",
+      "File \u001b[0;32m~/pystencils/lbmpy/lbmpy/scenarios.py:142\u001b[0m, in \u001b[0;36mcreate_channel\u001b[0;34m(domain_size, force, pressure_difference, u_max, diameter_callback, duct, wall_boundary, parallel, data_handling, **kwargs)\u001b[0m\n\u001b[1;32m    140\u001b[0m     kwargs[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mforce\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mtuple\u001b[39m([force, \u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m0\u001b[39m][:dim])\n\u001b[1;32m    141\u001b[0m     \u001b[38;5;28;01massert\u001b[39;00m data_handling\u001b[38;5;241m.\u001b[39mperiodicity[\u001b[38;5;241m0\u001b[39m]\n\u001b[0;32m--> 142\u001b[0m     step \u001b[38;5;241m=\u001b[39m \u001b[43mLatticeBoltzmannStep\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata_handling\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_handling\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mforce_driven_channel\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    143\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m pressure_difference:\n\u001b[1;32m    144\u001b[0m     inflow \u001b[38;5;241m=\u001b[39m FixedDensity(\u001b[38;5;241m1.0\u001b[39m \u001b[38;5;241m+\u001b[39m pressure_difference)\n",
+      "File \u001b[0;32m~/pystencils/lbmpy/lbmpy/lbstep.py:122\u001b[0m, in \u001b[0;36mLatticeBoltzmannStep.__init__\u001b[0;34m(self, domain_size, lbm_kernel, periodicity, kernel_params, data_handling, name, optimization, velocity_data_name, density_data_name, density_data_index, compute_velocity_in_every_step, compute_density_in_every_step, velocity_input_array_name, time_step_order, flag_interface, alignment_if_vectorized, fixed_loop_sizes, timeloop_creation_function, lbm_config, lbm_optimisation, config, **method_parameters)\u001b[0m\n\u001b[1;32m    119\u001b[0m lbm_config \u001b[38;5;241m=\u001b[39m replace(lbm_config, temporary_field_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_tmp_arr_name)\n\u001b[1;32m    121\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m time_step_order \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstream_collide\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[0;32m--> 122\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lbmKernels \u001b[38;5;241m=\u001b[39m [\u001b[43mcreate_lb_function\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlbm_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlbm_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    123\u001b[0m \u001b[43m                                           \u001b[49m\u001b[43mlbm_optimisation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlbm_optimisation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    124\u001b[0m \u001b[43m                                           \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m)\u001b[49m]\n\u001b[1;32m    125\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m time_step_order \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcollide_stream\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[1;32m    126\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lbmKernels \u001b[38;5;241m=\u001b[39m [create_lb_function(lbm_config\u001b[38;5;241m=\u001b[39mlbm_config,\n\u001b[1;32m    127\u001b[0m                                            lbm_optimisation\u001b[38;5;241m=\u001b[39mlbm_optimisation,\n\u001b[1;32m    128\u001b[0m                                            config\u001b[38;5;241m=\u001b[39mconfig,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    132\u001b[0m                                            config\u001b[38;5;241m=\u001b[39mconfig,\n\u001b[1;32m    133\u001b[0m                                            kernel_type\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstream_pull_only\u001b[39m\u001b[38;5;124m'\u001b[39m)]\n",
+      "File \u001b[0;32m~/pystencils/lbmpy/lbmpy/creationfunctions.py:505\u001b[0m, in \u001b[0;36mcreate_lb_function\u001b[0;34m(ast, lbm_config, lbm_optimisation, config, optimization, **kwargs)\u001b[0m\n\u001b[1;32m    502\u001b[0m     ast \u001b[38;5;241m=\u001b[39m lbm_config\u001b[38;5;241m.\u001b[39mast\n\u001b[1;32m    504\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ast \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 505\u001b[0m     ast \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_lb_ast\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlbm_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupdate_rule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlbm_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlbm_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    506\u001b[0m \u001b[43m                        \u001b[49m\u001b[43mlbm_optimisation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlbm_optimisation\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    508\u001b[0m res \u001b[38;5;241m=\u001b[39m ast\u001b[38;5;241m.\u001b[39mcompile()\n\u001b[1;32m    510\u001b[0m res\u001b[38;5;241m.\u001b[39mmethod \u001b[38;5;241m=\u001b[39m ast\u001b[38;5;241m.\u001b[39mmethod\n",
+      "File \u001b[0;32m~/pystencils/lbmpy/lbmpy/creationfunctions.py:530\u001b[0m, in \u001b[0;36mcreate_lb_ast\u001b[0;34m(update_rule, lbm_config, lbm_optimisation, config, optimization, **kwargs)\u001b[0m\n\u001b[1;32m    525\u001b[0m     update_rule \u001b[38;5;241m=\u001b[39m create_lb_update_rule(lbm_config\u001b[38;5;241m.\u001b[39mcollision_rule, lbm_config\u001b[38;5;241m=\u001b[39mlbm_config,\n\u001b[1;32m    526\u001b[0m                                         lbm_optimisation\u001b[38;5;241m=\u001b[39mlbm_optimisation, config\u001b[38;5;241m=\u001b[39mconfig)\n\u001b[1;32m    528\u001b[0m field_types \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m(fa\u001b[38;5;241m.\u001b[39mfield\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;28;01mfor\u001b[39;00m fa \u001b[38;5;129;01min\u001b[39;00m update_rule\u001b[38;5;241m.\u001b[39mdefined_symbols \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(fa, Field\u001b[38;5;241m.\u001b[39mAccess))\n\u001b[0;32m--> 530\u001b[0m config \u001b[38;5;241m=\u001b[39m \u001b[43mreplace\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcollate_types\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfield_types\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mghost_layers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m    531\u001b[0m ast \u001b[38;5;241m=\u001b[39m create_kernel(update_rule, config\u001b[38;5;241m=\u001b[39mconfig)\n\u001b[1;32m    533\u001b[0m ast\u001b[38;5;241m.\u001b[39mmethod \u001b[38;5;241m=\u001b[39m update_rule\u001b[38;5;241m.\u001b[39mmethod\n",
+      "File \u001b[0;32m/usr/lib/python3.11/dataclasses.py:1492\u001b[0m, in \u001b[0;36mreplace\u001b[0;34m(obj, **changes)\u001b[0m\n\u001b[1;32m   1485\u001b[0m         changes[f\u001b[38;5;241m.\u001b[39mname] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(obj, f\u001b[38;5;241m.\u001b[39mname)\n\u001b[1;32m   1487\u001b[0m \u001b[38;5;66;03m# Create the new object, which calls __init__() and\u001b[39;00m\n\u001b[1;32m   1488\u001b[0m \u001b[38;5;66;03m# __post_init__() (if defined), using all of the init fields we've\u001b[39;00m\n\u001b[1;32m   1489\u001b[0m \u001b[38;5;66;03m# added and/or left in 'changes'.  If there are values supplied in\u001b[39;00m\n\u001b[1;32m   1490\u001b[0m \u001b[38;5;66;03m# changes that aren't fields, this will correctly raise a\u001b[39;00m\n\u001b[1;32m   1491\u001b[0m \u001b[38;5;66;03m# TypeError.\u001b[39;00m\n\u001b[0;32m-> 1492\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;18;43m__class__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mchanges\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m<string>:24\u001b[0m, in \u001b[0;36m__init__\u001b[0;34m(self, target, backend, function_name, data_type, default_number_float, default_number_int, iteration_slice, ghost_layers, cpu_openmp, cpu_vectorize_info, cpu_blocking, omp_single_loop, gpu_indexing, gpu_indexing_params, default_assignment_simplifications, cpu_prepend_optimizations, use_auto_for_assignments, index_fields, coordinate_names, allow_double_writes, skip_independence_check)\u001b[0m\n",
+      "File \u001b[0;32m~/pystencils/pystencils/pystencils/config.py:177\u001b[0m, in \u001b[0;36mCreateKernelConfig.__post_init__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    174\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_type(dtype)\n\u001b[1;32m    176\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_type, \u001b[38;5;28mdict\u001b[39m):\n\u001b[0;32m--> 177\u001b[0m     dt \u001b[38;5;241m=\u001b[39m \u001b[43mcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata_type\u001b[49m\u001b[43m)\u001b[49m  \u001b[38;5;66;03m# The copy is necessary because BasicType has sympy shinanigans\u001b[39;00m\n\u001b[1;32m    178\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_type \u001b[38;5;241m=\u001b[39m defaultdict(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mDataTypeFactory(dt))\n\u001b[1;32m    180\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_type, \u001b[38;5;28mdict\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_type, defaultdict):\n",
+      "File \u001b[0;32m/usr/lib/python3.11/copy.py:102\u001b[0m, in \u001b[0;36mcopy\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m    100\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(rv, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m    101\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m x\n\u001b[0;32m--> 102\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_reconstruct\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mrv\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/usr/lib/python3.11/copy.py:273\u001b[0m, in \u001b[0;36m_reconstruct\u001b[0;34m(x, memo, func, args, state, listiter, dictiter, deepcopy)\u001b[0m\n\u001b[1;32m    271\u001b[0m     state \u001b[38;5;241m=\u001b[39m deepcopy(state, memo)\n\u001b[1;32m    272\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(y, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__setstate__\u001b[39m\u001b[38;5;124m'\u001b[39m):\n\u001b[0;32m--> 273\u001b[0m     \u001b[43my\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__setstate__\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    274\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    275\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(state, \u001b[38;5;28mtuple\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(state) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m2\u001b[39m:\n",
+      "File \u001b[0;32m~/.local/lib/python3.11/site-packages/sympy/core/basic.py:144\u001b[0m, in \u001b[0;36mBasic.__setstate__\u001b[0;34m(self, state)\u001b[0m\n\u001b[1;32m    143\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__setstate__\u001b[39m(\u001b[38;5;28mself\u001b[39m, state):\n\u001b[0;32m--> 144\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m name, value \u001b[38;5;129;01min\u001b[39;00m \u001b[43mstate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mitems\u001b[49m():\n\u001b[1;32m    145\u001b[0m         \u001b[38;5;28msetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m, name, value)\n",
+      "\u001b[0;31mAttributeError\u001b[0m: 'tuple' object has no attribute 'items'"
      ]
     }
    ],
@@ -410,7 +391,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -437,7 +418,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.9"
+   "version": "3.11.0rc1"
   }
  },
  "nbformat": 4,
diff --git a/setup.py b/setup.py
index 1d055a76b2bf5266440618eb5a22fefc2a437c27..87ca1837bbb20d6cdd74106d3ac909fa7135a473 100644
--- a/setup.py
+++ b/setup.py
@@ -107,7 +107,7 @@ setup(name='lbmpy',
           "Source Code": "https://i10git.cs.fau.de/pycodegen/lbmpy",
       },
       extras_require={
-          'gpu': ['pycuda'],
+          'gpu': ['cupy'],
           'opencl': ['pyopencl'],
           'alltrafos': ['islpy', 'py-cpuinfo'],
           'interactive': ['scipy', 'scikit-image', 'cython', 'matplotlib',