diff --git a/boundaries/boundaryhandling.py b/boundaries/boundaryhandling.py
index 4cfa82d51fee33a3b4dc8182ec07c23c06c82a53..5ca40d6ea7bdf2da1641d1dfa5f83e784c669407 100644
--- a/boundaries/boundaryhandling.py
+++ b/boundaries/boundaryhandling.py
@@ -127,9 +127,9 @@ class LbmMethodInfo(CustomCppCode):
             inverseDir = tuple([-i for i in direction])
             invDirs.append(str(stencil.index(inverseDir)))
 
-        code += "static const int %s [] = { %s };\n" % (INV_DIR_SYMBOL.name, ", ".join(invDirs))
+        code += "const int %s [] = { %s };\n" % (INV_DIR_SYMBOL.name, ", ".join(invDirs))
         weights = [str(w.evalf()) for w in lbMethod.weights]
-        code += "static const double %s [] = { %s };\n" % (WEIGHTS_SYMBOL.name, ",".join(weights))
+        code += "const double %s [] = { %s };\n" % (WEIGHTS_SYMBOL.name, ",".join(weights))
         super(LbmMethodInfo, self).__init__(code, symbolsRead=set(), symbolsDefined=symbolsDefined)
 
 
diff --git a/creationfunctions.py b/creationfunctions.py
index 173c5c6ac22f3acaedef5d69bd2fb106e4a55e6c..f2df502c881bf4809aa0f6e74a1ee88f5e4c0efd 100644
--- a/creationfunctions.py
+++ b/creationfunctions.py
@@ -12,7 +12,6 @@ from lbmpy.updatekernels import createStreamPullKernel, createPdfArray
 
 def _getParams(params, optParams):
     defaultMethodDescription = {
-        'target': 'cpu',
         'stencil': 'D2Q9',
         'method': 'srt',  # can be srt, trt or mrt
         'relaxationRates': sp.symbols("omega_:10"),
@@ -34,6 +33,7 @@ def _getParams(params, optParams):
         'fieldSize': None,
         'fieldLayout': 'reverseNumpy',  # can be 'numpy' (='c'), 'reverseNumpy' (='f'), 'fzyx', 'zyxf'
 
+        'target': 'cpu',
         'openMP': True,
         'pdfArr': None,
     }
@@ -58,7 +58,7 @@ def createLatticeBoltzmannFunction(ast=None, optimizationParams={}, **kwargs):
         params['optimizationParams'] = optParams
         ast = createLatticeBoltzmannAst(**params)
 
-    if params['target'] == 'cpu':
+    if optParams['target'] == 'cpu':
         from pystencils.cpu import makePythonFunction as makePythonCpuFunction, addOpenMP
         if 'openMP' in optParams:
             if isinstance(optParams['openMP'], bool) and optParams['openMP']:
@@ -66,7 +66,7 @@ def createLatticeBoltzmannFunction(ast=None, optimizationParams={}, **kwargs):
             elif isinstance(optParams['openMP'], int):
                 addOpenMP(ast, numThreads=optParams['openMP'])
         res = makePythonCpuFunction(ast)
-    elif params['target'] == 'gpu':
+    elif optParams['target'] == 'gpu':
         from pystencils.gpucuda import makePythonFunction as makePythonGpuFunction
         res = makePythonGpuFunction(ast)
     else:
@@ -84,7 +84,7 @@ def createLatticeBoltzmannAst(updateRule=None, optimizationParams={}, **kwargs):
         params['optimizationParams'] = optimizationParams
         updateRule = createLatticeBoltzmannUpdateRule(**params)
 
-    if params['target'] == 'cpu':
+    if optParams['target'] == 'cpu':
         from pystencils.cpu import createKernel
         if 'splitGroups' in updateRule.simplificationHints:
             print("splitting!")
@@ -92,7 +92,7 @@ def createLatticeBoltzmannAst(updateRule=None, optimizationParams={}, **kwargs):
         else:
             splitGroups = ()
         res = createKernel(updateRule.allEquations, splitGroups=splitGroups)
-    elif params['target'] == 'gpu':
+    elif optParams['target'] == 'gpu':
         from pystencils.gpucuda import createCUDAKernel
         res = createCUDAKernel(updateRule.allEquations)
     else:
diff --git a/moments.py b/moments.py
index 6c7ce3c9c91cac28c1ec1f105fbba6cdbf0f4df3..b2209ec20854424a4bc08ce1aa42da385a0f5958 100644
--- a/moments.py
+++ b/moments.py
@@ -385,10 +385,10 @@ def getDefaultMomentSetForStencil(stencil):
     if stencilsHaveSameEntries(stencil, getStencil("D3Q15")):
         x, y, z = MOMENT_SYMBOLS
         nonMatchedMoments = [(1, 2, 0), (2, 2, 0), (1, 1, 2), (1, 2, 2), (2, 2, 2)]
-        additionalMoments = (x**2 * y**2 + x**2 * z**2 + y**2 * z**2,
-                             x * (y**2 + z**2),
-                             y * (x**2 + z**2),
-                             z * (x**2 + y**2))
+        additionalMoments = (6 * (x**2 * y**2 + x**2 * z**2 + y**2 * z**2),
+                             3 * (x * (y**2 + z**2)),
+                             3 * (y * (x**2 + z**2)),
+                             3 * (z * (x**2 + y**2)))
         toRemove = set(extendMomentsWithPermutations(nonMatchedMoments))
         return toPoly(set(all27Moments) - toRemove) + additionalMoments
 
diff --git a/serialscenario.py b/serialscenario.py
index 77b26c49a10857cfeedbb7d8856824c8e2c63b86..ca4a93fb45a9ade3028a6f3bcc3cc653cc150a34 100644
--- a/serialscenario.py
+++ b/serialscenario.py
@@ -10,6 +10,9 @@ from lbmpy.stencils import getStencil
 
 def runScenario(domainSize, boundarySetupFunction, methodParameters, optimizationParameters, lbmKernel=None,
                 initialVelocity=None, preUpdateFunctions=[]):
+    if 'target' not in optimizationParameters:
+        optimizationParameters['target'] = 'cpu'
+
     ghostLayers = 1
     domainSizeWithGhostLayer = tuple([s + 2 * ghostLayers for s in domainSize])
     D = len(domainSize)
@@ -32,7 +35,8 @@ def runScenario(domainSize, boundarySetupFunction, methodParameters, optimizatio
     # Boundary setup
     if boundarySetupFunction is not None:
         symPdfField = Field.createFromNumpyArray('pdfs', pdfArrays[0], indexDimensions=1)
-        boundaryHandling = BoundaryHandling(symPdfField, domainSize, lbmKernel.method)
+        boundaryHandling = BoundaryHandling(symPdfField, domainSize, lbmKernel.method,
+                                            target=optimizationParameters['target'])
         boundarySetupFunction(boundaryHandling=boundaryHandling, method=method)
         boundaryHandling.prepare()
     else:
@@ -41,16 +45,21 @@ def runScenario(domainSize, boundarySetupFunction, methodParameters, optimizatio
     # Macroscopic value input/output
     densityArr = [np.zeros(domainSizeWithGhostLayer)]
     velocityArr = [np.zeros(domainSizeWithGhostLayer + (D,))]
-    getMacroscopic = compileMacroscopicValuesGetter(method, ['density', 'velocity'], pdfArr=pdfArrays[0])
+    getMacroscopic = compileMacroscopicValuesGetter(method, ['density', 'velocity'], pdfArr=pdfArrays[0], target='cpu')
 
     if initialVelocity is None:
         initialVelocity = [0] * D
     setMacroscopic = compileMacroscopicValuesSetter(method, {'density': 1.0, 'velocity': initialVelocity},
-                                                    pdfArr=pdfArrays[0])
+                                                    pdfArr=pdfArrays[0], target='cpu')
     setMacroscopic(pdfs=pdfArrays[0])
 
-    # Run simulation
-    def timeLoop(timeSteps):
+    if optimizationParameters['target'] == 'gpu':
+        import pycuda.gpuarray as gpuarray
+        pdfGpuArrays = [gpuarray.to_gpu(a) for a in pdfArrays]
+    else:
+        pdfGpuArrays = []
+
+    def cpuTimeLoop(timeSteps):
         for t in range(timeSteps):
             for f in preUpdateFunctions:
                 f(pdfArrays[0])
@@ -62,9 +71,31 @@ def runScenario(domainSize, boundarySetupFunction, methodParameters, optimizatio
         getMacroscopic(pdfs=pdfArrays[0], density=densityArr[0], velocity=velocityArr[0])
         return pdfArrays[0], densityArr[0], velocityArr[0]
 
-    timeLoop.kernel = lbmKernel
+    def gpuTimeLoop(timeSteps):
+        # Transfer data to gpu
+        for cpuArr, gpuArr in zip(pdfArrays, pdfGpuArrays):
+            gpuArr.set(cpuArr)
+
+        for t in range(timeSteps):
+            for f in preUpdateFunctions:
+                f(pdfGpuArrays[0])
+            if boundaryHandling is not None:
+                boundaryHandling(pdfs=pdfGpuArrays[0])
+            lbmKernel(src=pdfGpuArrays[0], dst=pdfGpuArrays[1])
+
+            pdfGpuArrays[0], pdfGpuArrays[1] = pdfGpuArrays[1], pdfGpuArrays[0]
+
+        # Transfer data from gpu to cpu
+        for cpuArr, gpuArr in zip(pdfArrays, pdfGpuArrays):
+            gpuArr.get(cpuArr)
+
+        getMacroscopic(pdfs=pdfArrays[0], density=densityArr[0], velocity=velocityArr[0])
+        return pdfArrays[0], densityArr[0], velocityArr[0]
+
+    cpuTimeLoop.kernel = lbmKernel
+    gpuTimeLoop.kernel = lbmKernel
 
-    return timeLoop
+    return gpuTimeLoop if optimizationParameters['target'] == 'gpu' else cpuTimeLoop
 
 
 def runLidDrivenCavity(domainSize, lidVelocity=0.005, optimizationParameters={}, lbmKernel=None, **kwargs):