From ff9e8f48b88574e6caadaa24bb50960f9ca6f699 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Victor=20Tozatti=20Risso?=
 <joaovictortr@protonmail.com>
Date: Wed, 13 Dec 2017 12:36:13 +0100
Subject: [PATCH] Code generation for field serialization into buffers

Concept: Generate code involving the (un)packing of fields (from)to linear
(1D) arrays, i.e. (de)serialization of the field values for buffered
communication.

A linear index is generated for the buffer, by inferring the strides and
variables of the loops over fields in the AST. In the CPU, this information is
obtained through the makeLoopOverDomain function, in
pystencils/transformations/transformations.py. On CUDA, the strides of
the fields (excluding buffers) are combined with the indexing variables to infer
the indexing of the buffer.

What is supported:
    - code generation for both CPU and GPU
    - (un)packing of fields with all the memory layouts supported by
    pystencils
    - (un)packing slices of fields (from)into the buffer
    - (un)packing subsets of cell values from the fields (from)into the buffer

Limitations:

- assumes that only one buffer and one field are being operated within
each kernel, however multiple equations involving the buffer and the
field are supported.

- (un)packing multiple cell values (from)into the buffer is supported,
however it is limited to the fields with indexDimensions=1. The same
applies to (un)packing subset of cell values of each cell.

Changes in this commit:

- add the FieldType enumeration to pystencils/field.py, to mark fields
of various types. This is replaces and is a generalization of the
isIndexedField boolean flag of the Field class. For now, the types
supported are: generic, indexed and buffer fields.

- add the fieldType property to the Field class, which indicates the
type of the field. Modifications were also performed to the member
functions of the Field class to add this property.

- add resolveBufferAccesses function, which replaces the fields marked
as buffers with the actual field access in the AST traversal.

Miscelaneous changes:

- add blockDim and gridDim variables as CUDA indexing variables.
---
 pystencils/__init__.py                        |   2 +-
 pystencils/cpu/cpujit.py                      |   5 +-
 pystencils/cpu/kernelcreation.py              |  30 ++-
 pystencils/field.py                           |  49 +++-
 pystencils/gpucuda/cudajit.py                 |   5 +-
 pystencils/gpucuda/indexing.py                |   6 +-
 pystencils/gpucuda/kernelcreation.py          |  33 ++-
 pystencils/llvm/kernelcreation.py             |   6 +-
 pystencils/transformations/transformations.py |  75 +++++-
 pystencils_tests/test_buffer.py               | 195 ++++++++++++++++
 pystencils_tests/test_buffer_gpu.py           | 217 ++++++++++++++++++
 pystencils_tests/test_jacobi_cbackend.py      |   4 +-
 pystencils_walberla/jinja_filters.py          |   3 +-
 setup_lbmpy_walberla.py                       |   2 +-
 14 files changed, 588 insertions(+), 44 deletions(-)
 create mode 100644 pystencils_tests/test_buffer.py
 create mode 100644 pystencils_tests/test_buffer_gpu.py

diff --git a/pystencils/__init__.py b/pystencils/__init__.py
index b78f17d6..27b0b97f 100644
--- a/pystencils/__init__.py
+++ b/pystencils/__init__.py
@@ -1,4 +1,4 @@
-from pystencils.field import Field, extractCommonSubexpressions
+from pystencils.field import Field, FieldType, extractCommonSubexpressions
 from pystencils.data_types import TypedSymbol
 from pystencils.slicing import makeSlice
 from pystencils.kernelcreation import createKernel, createIndexedKernel
diff --git a/pystencils/cpu/cpujit.py b/pystencils/cpu/cpujit.py
index 69c3d8cd..28184d6f 100644
--- a/pystencils/cpu/cpujit.py
+++ b/pystencils/cpu/cpujit.py
@@ -74,6 +74,7 @@ from pystencils.backends.cbackend import generateC, getHeaders
 from collections import OrderedDict, Mapping
 from pystencils.transformations import symbolNameToVariableName
 from pystencils.data_types import toCtypes, getBaseType, StructType
+from pystencils.field import FieldType
 
 
 def makePythonFunction(kernelFunctionNode, argumentDict={}):
@@ -387,9 +388,9 @@ def buildCTypeArgumentList(parameterSpecification, argumentDict):
                         raise ValueError("Passed array '%s' has strides %s which does not match expected strides %s" %
                                          (arg.fieldName, str(fieldArr.strides), str(symbolicFieldStrides)))
 
-                if symbolicField.isIndexField:
+                if FieldType.isIndexed(symbolicField):
                     indexArrShapes.add(fieldArr.shape[:symbolicField.spatialDimensions])
-                else:
+                elif not FieldType.isBuffer(symbolicField):
                     arrayShapes.add(fieldArr.shape[:symbolicField.spatialDimensions])
 
             elif arg.isFieldShapeArgument:
diff --git a/pystencils/cpu/kernelcreation.py b/pystencils/cpu/kernelcreation.py
index 298adea4..91b6dc3e 100644
--- a/pystencils/cpu/kernelcreation.py
+++ b/pystencils/cpu/kernelcreation.py
@@ -4,11 +4,11 @@ from functools import partial
 from collections import defaultdict
 
 from pystencils.astnodes import SympyAssignment, Block, LoopOverCoordinate, KernelFunction
-from pystencils.transformations import resolveFieldAccesses, makeLoopOverDomain, \
+from pystencils.transformations import resolveBufferAccesses, resolveFieldAccesses, makeLoopOverDomain, \
     typeAllEquations, getOptimalLoopOrdering, parseBasePointerInfo, moveConstantsBeforeLoop, splitInnerLoop, \
     substituteArrayAccessesWithConstants
 from pystencils.data_types import TypedSymbol, BasicType, StructType, createType
-from pystencils.field import Field
+from pystencils.field import Field, FieldType
 import pystencils.astnodes as ast
 from pystencils.cpu.cpujit import makePythonFunction
 
@@ -51,10 +51,13 @@ def createKernel(listOfEquations, functionName="kernel", typeForSymbol='double',
     allFields = fieldsRead.union(fieldsWritten)
     readOnlyFields = set([f.name for f in fieldsRead - fieldsWritten])
 
+    buffers = set([f for f in allFields if FieldType.isBuffer(f)])
+    fieldsWithoutBuffers = allFields - buffers
+
     body = ast.Block(assignments)
-    loopOrder = getOptimalLoopOrdering(allFields)
-    code = makeLoopOverDomain(body, functionName, iterationSlice=iterationSlice,
-                              ghostLayers=ghostLayers, loopOrder=loopOrder)
+    loopOrder = getOptimalLoopOrdering(fieldsWithoutBuffers)
+    code, loopStrides, loopVars = makeLoopOverDomain(body, functionName, iterationSlice=iterationSlice,
+                                                     ghostLayers=ghostLayers, loopOrder=loopOrder)
     code.target = 'cpu'
 
     if splitGroups:
@@ -62,8 +65,20 @@ def createKernel(listOfEquations, functionName="kernel", typeForSymbol='double',
         splitInnerLoop(code, typedSplitGroups)
 
     basePointerInfo = [['spatialInner0'], ['spatialInner1']] if len(loopOrder) >= 2 else [['spatialInner0']]
-    basePointerInfos = {field.name: parseBasePointerInfo(basePointerInfo, loopOrder, field) for field in allFields}
+    basePointerInfos = {field.name: parseBasePointerInfo(basePointerInfo, loopOrder, field)
+                        for field in fieldsWithoutBuffers}
+
+    bufferBasePointerInfos = {field.name: parseBasePointerInfo([['spatialInner0']], [0], field) for field in buffers}
+    basePointerInfos.update(bufferBasePointerInfos)
+
+    baseBufferIndex = loopVars[0]
+    stride = 1
+    for idx, var in enumerate(loopVars[1:]):
+        curStride = loopStrides[idx]
+        stride *= int(curStride) if isinstance(curStride, float) else curStride
+        baseBufferIndex += var * stride
 
+    resolveBufferAccesses(code, baseBufferIndex, readOnlyFields)
     resolveFieldAccesses(code, readOnlyFields, fieldToBasePointerInfo=basePointerInfos)
     substituteArrayAccessesWithConstants(code)
     moveConstantsBeforeLoop(code)
@@ -93,7 +108,8 @@ def createIndexedKernel(listOfEquations, indexFields, functionName="kernel", typ
     allFields = fieldsRead.union(fieldsWritten)
 
     for indexField in indexFields:
-        indexField.isIndexField = True
+        indexField.fieldType = FieldType.INDEXED
+        assert FieldType.isIndexed(indexField)
         assert indexField.spatialDimensions == 1, "Index fields have to be 1D"
 
     nonIndexFields = [f for f in allFields if f not in indexFields]
diff --git a/pystencils/field.py b/pystencils/field.py
index d545d15c..51d08df9 100644
--- a/pystencils/field.py
+++ b/pystencils/field.py
@@ -1,3 +1,4 @@
+from enum import Enum
 from itertools import chain
 import numpy as np
 import sympy as sp
@@ -7,6 +8,31 @@ from pystencils.data_types import TypedSymbol, createType, createCompositeTypeFr
 from pystencils.sympyextensions import isIntegerSequence
 
 
+class FieldType(Enum):
+    # generic fields
+    GENERIC = 0
+    # index fields are currently only used for boundary handling
+    # the coordinates are not the loop counters in that case, but are read from this index field
+    INDEXED = 1
+    # communication buffer, used for (un)packing data in communication.
+    BUFFER = 2
+
+    @staticmethod
+    def isGeneric(field):
+        assert isinstance(field, Field)
+        return field.fieldType == FieldType.GENERIC
+
+    @staticmethod
+    def isIndexed(field):
+        assert isinstance(field, Field)
+        return field.fieldType == FieldType.INDEXED
+
+    @staticmethod
+    def isBuffer(field):
+        assert isinstance(field, Field)
+        return field.fieldType == FieldType.BUFFER
+
+
 class Field(object):
     """
     With fields one can formulate stencil-like update rules on structured grids.
@@ -51,7 +77,7 @@ class Field(object):
 
     @staticmethod
     def createGeneric(fieldName, spatialDimensions, dtype=np.float64, indexDimensions=0, layout='numpy',
-                      indexShape=None):
+                      indexShape=None, fieldType=FieldType.GENERIC):
         """
         Creates a generic field where the field size is not fixed i.e. can be called with arrays of different sizes
 
@@ -85,7 +111,7 @@ class Field(object):
             shape += (1,)
             strides += (1,)
 
-        return Field(fieldName, dtype, layout, shape, strides)
+        return Field(fieldName, fieldType, dtype, layout, shape, strides)
 
     @staticmethod
     def createFromNumpyArray(fieldName, npArray, indexDimensions=0):
@@ -114,7 +140,7 @@ class Field(object):
             shape += (1,)
             strides += (1,)
 
-        return Field(fieldName, npArray.dtype, spatialLayout, shape, strides)
+        return Field(fieldName, FieldType.GENERIC, npArray.dtype, spatialLayout, shape, strides)
 
     @staticmethod
     def createFixedSize(fieldName, shape, indexDimensions=0, dtype=np.float64, layout='numpy'):
@@ -146,21 +172,20 @@ class Field(object):
         spatialLayout = list(layout)
         for i in range(spatialDimensions, len(layout)):
             spatialLayout.remove(i)
-        return Field(fieldName, dtype, tuple(spatialLayout), shape, strides)
+        return Field(fieldName, FieldType.GENERIC, dtype, tuple(spatialLayout), shape, strides)
 
-    def __init__(self, fieldName, dtype, layout, shape, strides):
+    def __init__(self, fieldName, fieldType, dtype, layout, shape, strides):
         """Do not use directly. Use static create* methods"""
         self._fieldName = fieldName
+        assert isinstance(fieldType, FieldType)
+        self.fieldType = fieldType
         self._dtype = createType(dtype)
         self._layout = normalizeLayout(layout)
         self.shape = shape
         self.strides = strides
-        # index fields are currently only used for boundary handling
-        # the coordinates are not the loop counters in that case, but are read from this index field
-        self.isIndexField = False
 
     def newFieldWithDifferentName(self, newName):
-        return Field(newName, self._dtype, self._layout, self.shape, self.strides)
+        return Field(newName, self.fieldType, self._dtype, self._layout, self.shape, self.strides)
 
     @property
     def spatialDimensions(self):
@@ -243,11 +268,11 @@ class Field(object):
         return Field.Access(self, center)(*args, **kwargs)
 
     def __hash__(self):
-        return hash((self._layout, self.shape, self.strides, self._dtype, self._fieldName))
+        return hash((self._layout, self.shape, self.strides, self._dtype, self.fieldType, self._fieldName))
 
     def __eq__(self, other):
-        selfTuple = (self.shape, self.strides, self.name, self.dtype)
-        otherTuple = (other.shape, other.strides, other.name, other.dtype)
+        selfTuple = (self.shape, self.strides, self.name, self.dtype, self.fieldType)
+        otherTuple = (other.shape, other.strides, other.name, other.dtype, other.fieldType)
         return selfTuple == otherTuple
 
     PREFIX = "f"
diff --git a/pystencils/gpucuda/cudajit.py b/pystencils/gpucuda/cudajit.py
index b815f5c3..3a0fe85f 100644
--- a/pystencils/gpucuda/cudajit.py
+++ b/pystencils/gpucuda/cudajit.py
@@ -2,6 +2,7 @@ import numpy as np
 from pystencils.backends.cbackend import generateC
 from pystencils.transformations import symbolNameToVariableName
 from pystencils.data_types import StructType, getBaseType
+from pystencils.field import FieldType
 
 
 def makePythonFunction(kernelFunctionNode, argumentDict={}):
@@ -119,9 +120,9 @@ def _checkArguments(parameterSpecification, argumentDict):
                         raise ValueError("Passed array '%s' has strides %s which does not match expected strides %s" %
                                          (arg.fieldName, str(fieldArr.strides), str(symbolicFieldStrides)))
 
-                if symbolicField.isIndexField:
+                if FieldType.isIndexed(symbolicField):
                     indexArrShapes.add(fieldArr.shape[:symbolicField.spatialDimensions])
-                else:
+                elif not FieldType.isBuffer(symbolicField):
                     arrayShapes.add(fieldArr.shape[:symbolicField.spatialDimensions])
 
     if len(arrayShapes) > 1:
diff --git a/pystencils/gpucuda/indexing.py b/pystencils/gpucuda/indexing.py
index 99db1efc..aa05078e 100644
--- a/pystencils/gpucuda/indexing.py
+++ b/pystencils/gpucuda/indexing.py
@@ -11,6 +11,8 @@ AUTO_BLOCKSIZE_LIMITING = True
 
 BLOCK_IDX = [TypedSymbol("blockIdx." + coord, createType("int")) for coord in ('x', 'y', 'z')]
 THREAD_IDX = [TypedSymbol("threadIdx." + coord, createType("int")) for coord in ('x', 'y', 'z')]
+BLOCK_DIM = [TypedSymbol("blockDim." + coord, createType("int")) for coord in ('x', 'y', 'z')]
+GRID_DIM = [TypedSymbol("gridDim." + coord, createType("int")) for coord in ('x', 'y', 'z')]
 
 
 class AbstractIndexing(abc.ABCMeta('ABC', (object,), {})):
@@ -28,8 +30,8 @@ class AbstractIndexing(abc.ABCMeta('ABC', (object,), {})):
 
     @property
     def indexVariables(self):
-        """Sympy symbols for CUDA's block and thread indices"""
-        return BLOCK_IDX + THREAD_IDX
+        """Sympy symbols for CUDA's block and thread indices, and block and grid dimensions. """
+        return BLOCK_IDX + THREAD_IDX + BLOCK_DIM + GRID_DIM
 
     @abc.abstractmethod
     def getCallParameters(self, arrShape):
diff --git a/pystencils/gpucuda/kernelcreation.py b/pystencils/gpucuda/kernelcreation.py
index 73870936..5bd2f5d7 100644
--- a/pystencils/gpucuda/kernelcreation.py
+++ b/pystencils/gpucuda/kernelcreation.py
@@ -2,10 +2,10 @@ from functools import partial
 
 from pystencils.gpucuda.indexing import BlockIndexing
 from pystencils.transformations import resolveFieldAccesses, typeAllEquations, parseBasePointerInfo, getCommonShape, \
-    substituteArrayAccessesWithConstants
+    substituteArrayAccessesWithConstants, resolveBufferAccesses
 from pystencils.astnodes import Block, KernelFunction, SympyAssignment, LoopOverCoordinate
 from pystencils.data_types import TypedSymbol, BasicType, StructType
-from pystencils import Field
+from pystencils import Field, FieldType
 from pystencils.gpucuda.cudajit import makePythonFunction
 
 
@@ -15,11 +15,18 @@ def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None,
     allFields = fieldsRead.union(fieldsWritten)
     readOnlyFields = set([f.name for f in fieldsRead - fieldsWritten])
 
+    buffers = set([f for f in allFields if FieldType.isBuffer(f)])
+    fieldsWithoutBuffers = allFields - buffers
+
     fieldAccesses = set()
+    numBufferAccesses = 0
     for eq in listOfEquations:
         fieldAccesses.update(eq.atoms(Field.Access))
 
-    commonShape = getCommonShape(allFields)
+        numBufferAccesses += sum([1 for access in eq.atoms(Field.Access) if FieldType.isBuffer(access.field)])
+
+    commonShape = getCommonShape(fieldsWithoutBuffers)
+
     if iterationSlice is None:
         # determine iteration slice from ghost layers
         if ghostLayers is None:
@@ -34,7 +41,7 @@ def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None,
             for i in range(len(commonShape)):
                 iterationSlice.append(slice(ghostLayers[i][0], -ghostLayers[i][1] if ghostLayers[i][1] > 0 else None))
 
-    indexing = indexingCreator(field=list(allFields)[0], iterationSlice=iterationSlice)
+    indexing = indexingCreator(field=list(fieldsWithoutBuffers)[0], iterationSlice=iterationSlice)
 
     block = Block(assignments)
     block = indexing.guard(block, commonShape)
@@ -46,8 +53,19 @@ def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None,
     basePointerInfos = {f.name: parseBasePointerInfo(basePointerInfo, [2, 1, 0], f) for f in allFields}
 
     coordMapping = {f.name: coordMapping for f in allFields}
-    resolveFieldAccesses(ast, readOnlyFields, fieldToFixedCoordinates=coordMapping,
-                         fieldToBasePointerInfo=basePointerInfos)
+
+    loopVars = [numBufferAccesses * i for i in indexing.coordinates]
+    loopStrides = list(fieldsWithoutBuffers)[0].shape
+
+    baseBufferIndex = loopVars[0]
+    stride = 1
+    for idx, var in enumerate(loopVars[1:]):
+        stride *= loopStrides[idx]
+        baseBufferIndex += var * stride
+
+    resolveBufferAccesses(ast, baseBufferIndex, readOnlyFields)
+    resolveFieldAccesses(ast, readOnlyFields, fieldToBasePointerInfo=basePointerInfos,
+                         fieldToFixedCoordinates=coordMapping)
 
     substituteArrayAccessesWithConstants(ast)
 
@@ -73,7 +91,8 @@ def createdIndexedCUDAKernel(listOfEquations, indexFields, functionName="kernel"
     readOnlyFields = set([f.name for f in fieldsRead - fieldsWritten])
 
     for indexField in indexFields:
-        indexField.isIndexField = True
+        indexField.fieldType = FieldType.INDEXED
+        assert FieldType.isIndexed(indexField)
         assert indexField.spatialDimensions == 1, "Index fields have to be 1D"
 
     nonIndexFields = [f for f in allFields if f not in indexFields]
diff --git a/pystencils/llvm/kernelcreation.py b/pystencils/llvm/kernelcreation.py
index 78cd1166..121dcdd9 100644
--- a/pystencils/llvm/kernelcreation.py
+++ b/pystencils/llvm/kernelcreation.py
@@ -1,7 +1,8 @@
 from pystencils.astnodes import SympyAssignment, Block, LoopOverCoordinate, KernelFunction
-from pystencils.transformations import resolveFieldAccesses, \
+from pystencils.transformations import resolveFieldAccesses, resolveBufferAccesses, \
     typeAllEquations, moveConstantsBeforeLoop, insertCasts
 from pystencils.data_types import TypedSymbol, BasicType, StructType
+from pystencils.field import Field, FieldType
 from functools import partial
 from pystencils.llvm.llvmjit import makePythonFunction
 
@@ -57,7 +58,8 @@ def createIndexedKernel(listOfEquations, indexFields, functionName="kernel", typ
     allFields = fieldsRead.union(fieldsWritten)
 
     for indexField in indexFields:
-        indexField.isIndexField = True
+        indexField.fieldType = FieldType.INDEXED
+        assert FieldType.isIndexed(indexField)
         assert indexField.spatialDimensions == 1, "Index fields have to be 1D"
 
     nonIndexFields = [f for f in allFields if f not in indexFields]
diff --git a/pystencils/transformations/transformations.py b/pystencils/transformations/transformations.py
index d41cadc0..bf9426f3 100644
--- a/pystencils/transformations/transformations.py
+++ b/pystencils/transformations/transformations.py
@@ -6,7 +6,7 @@ import sympy as sp
 from sympy.logic.boolalg import Boolean
 from sympy.tensor import IndexedBase
 
-from pystencils.field import Field, offsetComponentToDirectionString
+from pystencils.field import Field, FieldType, offsetComponentToDirectionString
 from pystencils.data_types import TypedSymbol, createType, PointerType, StructType, getBaseType, castFunc
 from pystencils.slicing import normalizeSlice
 import pystencils.astnodes as ast
@@ -71,13 +71,15 @@ def makeLoopOverDomain(body, functionName, iterationSlice=None, ghostLayers=None
     """
     # find correct ordering by inspecting participating FieldAccesses
     fieldAccesses = body.atoms(Field.Access)
-    fieldList = [e.field for e in fieldAccesses]
+    # exclude accesses to buffers from fieldList, because buffers are treated separately
+    fieldList = [e.field for e in fieldAccesses if not FieldType.isBuffer(e.field)]
     fields = set(fieldList)
+    numBufferAccesses = len(fieldAccesses) - len(fieldList)
 
     if loopOrder is None:
         loopOrder = getOptimalLoopOrdering(fields)
 
-    shape = getCommonShape(fields)
+    shape = getCommonShape(list(fields))
 
     if iterationSlice is not None:
         iterationSlice = normalizeSlice(iterationSlice, shape)
@@ -88,6 +90,11 @@ def makeLoopOverDomain(body, functionName, iterationSlice=None, ghostLayers=None
     if isinstance(ghostLayers, int):
         ghostLayers = [(ghostLayers, ghostLayers)] * len(loopOrder)
 
+    def getLoopStride(begin, end, step):
+        return (end - begin) / step
+
+    loopStrides = []
+    loopVars = []
     currentBody = body
     lastLoop = None
     for i, loopCoordinate in enumerate(reversed(loopOrder)):
@@ -97,6 +104,8 @@ def makeLoopOverDomain(body, functionName, iterationSlice=None, ghostLayers=None
             newLoop = ast.LoopOverCoordinate(currentBody, loopCoordinate, begin, end, 1)
             lastLoop = newLoop
             currentBody = ast.Block([lastLoop])
+            loopStrides.append(getLoopStride(begin, end, 1))
+            loopVars.append(newLoop.loopCounterSymbol)
         else:
             sliceComponent = iterationSlice[loopCoordinate]
             if type(sliceComponent) is slice:
@@ -104,11 +113,16 @@ def makeLoopOverDomain(body, functionName, iterationSlice=None, ghostLayers=None
                 newLoop = ast.LoopOverCoordinate(currentBody, loopCoordinate, sc.start, sc.stop, sc.step)
                 lastLoop = newLoop
                 currentBody = ast.Block([lastLoop])
+                loopStrides.append(getLoopStride(sc.start, sc.stop, sc.step))
+                loopVars.append(newLoop.loopCounterSymbol)
             else:
                 assignment = ast.SympyAssignment(ast.LoopOverCoordinate.getLoopCounterSymbol(loopCoordinate),
                                                  sp.sympify(sliceComponent))
                 currentBody.insertFront(assignment)
-    return ast.KernelFunction(currentBody, ghostLayers=ghostLayers, functionName=functionName)
+
+    loopVars = [numBufferAccesses * var for var in loopVars]
+    astNode = ast.KernelFunction(currentBody, ghostLayers=ghostLayers, functionName=functionName)
+    return (astNode, loopStrides, loopVars)
 
 
 def createIntermediateBasePointer(fieldAccess, coordinates, previousPtr):
@@ -133,7 +147,6 @@ def createIntermediateBasePointer(fieldAccess, coordinates, previousPtr):
         (ptr_E_2S, x*fstride_myfield[0] + y*fstride_myfield[1] + fstride_myfield[0] - 2*fstride_myfield[1])
     """
     field = fieldAccess.field
-
     offset = 0
     name = ""
     listToHash = []
@@ -158,6 +171,7 @@ def createIntermediateBasePointer(fieldAccess, coordinates, previousPtr):
         name += "%0.6X" % (abs(hash(tuple(listToHash))))
 
     newPtr = TypedSymbol(previousPtr.name + name, previousPtr.dtype)
+
     return newPtr, offset
 
 
@@ -223,6 +237,7 @@ def parseBasePointerInfo(basePointerSpecification, loopOrder, field):
     rest = allCoordinates - specifiedCoordinates
     if rest:
         result.append(list(rest))
+
     return result
 
 
@@ -278,6 +293,52 @@ def substituteArrayAccessesWithConstants(astNode):
         for a in astNode.args:
             substituteArrayAccessesWithConstants(a)
 
+def resolveBufferAccesses(astNode, baseBufferIndex, readOnlyFieldNames=set()):
+    def visitSympyExpr(expr, enclosingBlock, sympyAssignment):
+        if isinstance(expr, Field.Access):
+            fieldAccess = expr
+
+            # Do not apply transformation if field is not a buffer
+            if not FieldType.isBuffer(fieldAccess.field):
+                return expr
+
+            buffer = fieldAccess.field
+
+            dtype = PointerType(buffer.dtype, const=buffer.name in readOnlyFieldNames, restrict=True)
+            fieldPtr = TypedSymbol("%s%s" % (Field.DATA_PREFIX, symbolNameToVariableName(buffer.name)), dtype)
+
+            bufferIndex = baseBufferIndex
+            if len(fieldAccess.index) > 1:
+                raise RuntimeError('Only indexing dimensions up to 1 are currently supported in buffers!')
+
+            if len(fieldAccess.index) > 0:
+                cellIndex = fieldAccess.index[0]
+                bufferIndex += cellIndex
+
+            result = ast.ResolvedFieldAccess(fieldPtr, bufferIndex, fieldAccess.field, fieldAccess.offsets,
+                                             fieldAccess.index)
+
+            return visitSympyExpr(result, enclosingBlock, sympyAssignment)
+        else:
+            if isinstance(expr, ast.ResolvedFieldAccess):
+                return expr
+
+            newArgs = [visitSympyExpr(e, enclosingBlock, sympyAssignment) for e in expr.args]
+            kwargs = {'evaluate': False} if type(expr) in (sp.Add, sp.Mul, sp.Piecewise) else {}
+            return expr.func(*newArgs, **kwargs) if newArgs else expr
+
+    def visitNode(subAst):
+        if isinstance(subAst, ast.SympyAssignment):
+            enclosingBlock = subAst.parent
+            assert type(enclosingBlock) is ast.Block
+            subAst.lhs = visitSympyExpr(subAst.lhs, enclosingBlock, subAst)
+            subAst.rhs = visitSympyExpr(subAst.rhs, enclosingBlock, subAst)
+        else:
+            for i, a in enumerate(subAst.args):
+                visitNode(a)
+
+    return visitNode(astNode)
+
 
 def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerInfo={}, fieldToFixedCoordinates={}):
     """
@@ -298,6 +359,7 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn
         if isinstance(expr, Field.Access):
             fieldAccess = expr
             field = fieldAccess.field
+
             if field.name in fieldToBasePointerInfo:
                 basePointerInfo = fieldToBasePointerInfo[field.name]
             else:
@@ -324,6 +386,7 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn
                             coordDict[e] = field.dtype.getElementOffset(accessedFieldName)
                         else:
                             coordDict[e] = fieldAccess.index[e - field.spatialDimensions]
+
                 return coordDict
 
             lastPointer = fieldPtr
@@ -337,6 +400,7 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn
                 lastPointer = newPtr
 
             coordDict = createCoordinateDict(basePointerInfo[0])
+
             _, offset = createIntermediateBasePointer(fieldAccess, coordDict, lastPointer)
             result = ast.ResolvedFieldAccess(lastPointer, offset, fieldAccess.field,
                                              fieldAccess.offsets, fieldAccess.index)
@@ -344,6 +408,7 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn
             if isinstance(getBaseType(fieldAccess.field.dtype), StructType):
                 newType = fieldAccess.field.dtype.getElementType(fieldAccess.index[0])
                 result = castFunc(result, newType)
+
             return visitSympyExpr(result, enclosingBlock, sympyAssignment)
         else:
             if isinstance(expr, ast.ResolvedFieldAccess):
diff --git a/pystencils_tests/test_buffer.py b/pystencils_tests/test_buffer.py
new file mode 100644
index 00000000..600f7625
--- /dev/null
+++ b/pystencils_tests/test_buffer.py
@@ -0,0 +1,195 @@
+import unittest
+import numpy as np
+import sympy as sp
+from pystencils import Field, FieldType
+from pystencils.field import directionStringToOffset, layoutStringToTuple, createNumpyArrayWithLayout
+from pystencils.cpu import makePythonFunction
+from pystencils.cpu.kernelcreation import createKernel
+from pystencils.slicing import addGhostLayers, getSliceBeforeGhostLayer, getGhostRegionSlice
+
+
+class TestBuffer(unittest.TestCase):
+    """
+    Tests for the (un)packing (from)to buffers.
+    """
+    FIELD_SIZES = [(32, 10), (10, 8, 6)]
+
+    def _generate_fields(self, dt=np.uint64, numStencilDirections=1, layout='numpy'):
+        fieldSizes = self.FIELD_SIZES
+        if numStencilDirections > 1:
+            fieldSizes = [s + (numStencilDirections,) for s in fieldSizes]
+
+        fields = []
+        for size in fieldSizes:
+            fieldLayout = layoutStringToTuple(layout, len(size))
+            srcArr = createNumpyArrayWithLayout(size, fieldLayout)
+
+            arrayData = np.reshape(np.arange(1, np.prod(size)+1), size)
+            # Use flat iterator to input data into the array
+            srcArr.flat = addGhostLayers(arrayData, indexDimensions=1 if numStencilDirections > 1 else 0).astype(dt).flat
+            dstArr = np.zeros(srcArr.shape, dtype=dt)
+            bufferArr = np.zeros(np.prod(srcArr.shape), dtype=dt)
+            fields.append((srcArr, dstArr, bufferArr))
+        return fields
+
+    def test_full_scalar_field(self):
+        """
+        Tests fully (un)packing a scalar field (from)to a buffer.
+        """
+        fields = self._generate_fields()
+        for (srcArr, dstArr, bufferArr) in fields:
+            srcField = Field.createFromNumpyArray("srcField", srcArr)
+            dstField = Field.createFromNumpyArray("dstField", dstArr)
+            buffer = Field.createGeneric("buffer", spatialDimensions=1,
+                                         fieldType=FieldType.BUFFER, dtype=srcArr.dtype)
+
+            packEqs = [sp.Eq(buffer.center(), srcField.center())]
+            packCode = createKernel(packEqs, typeForSymbol={'srcField': srcArr.dtype, 'buffer': buffer.dtype})
+
+            packKernel = makePythonFunction(packCode)
+            packKernel(buffer=bufferArr, srcField=srcArr)
+
+            unpackEqs = [sp.Eq(dstField.center(), buffer.center())]
+            unpackCode = createKernel(unpackEqs, typeForSymbol={'dstField': dstArr.dtype, 'buffer': buffer.dtype})
+
+            unpackKernel = makePythonFunction(unpackCode)
+            unpackKernel(dstField=dstArr, buffer=bufferArr)
+
+            np.testing.assert_equal(srcArr, dstArr)
+
+    def test_field_slice(self):
+        """
+        Tests (un)packing slices of a scalar field (from)to a buffer.
+        """
+        fields = self._generate_fields()
+        for dir in ['N', 'S', 'NW', 'SW', 'TNW', 'B']:
+            for (srcArr, dstArr, bufferArr) in fields:
+                # Extract slice from N direction of the field
+                sliceDir = directionStringToOffset(dir, dim=len(srcArr.shape))
+                packSlice = getSliceBeforeGhostLayer(sliceDir)
+                unpackSlice = getGhostRegionSlice(sliceDir)
+
+                srcField = Field.createFromNumpyArray("srcField", srcArr[packSlice])
+                dstField = Field.createFromNumpyArray("dstField", dstArr[unpackSlice])
+                buffer = Field.createGeneric("buffer", spatialDimensions=1,
+                                             fieldType=FieldType.BUFFER, dtype=srcArr.dtype)
+
+                packEqs = [sp.Eq(buffer.center(), srcField.center())]
+                packCode = createKernel(packEqs, typeForSymbol={'srcField': srcArr.dtype, 'buffer': buffer.dtype})
+
+                packKernel = makePythonFunction(packCode)
+                packKernel(buffer=bufferArr, srcField=srcArr[packSlice])
+
+                # Unpack into ghost layer of dstField in N direction
+                unpackEqs = [sp.Eq(dstField.center(), buffer.center())]
+                unpackCode = createKernel(unpackEqs, typeForSymbol={'dstField': dstArr.dtype, 'buffer': buffer.dtype})
+
+                unpackKernel = makePythonFunction(unpackCode)
+                unpackKernel(buffer=bufferArr, dstField=dstArr[unpackSlice])
+
+                np.testing.assert_equal(srcArr[packSlice], dstArr[unpackSlice])
+
+    def test_all_cell_values(self):
+        """
+        Tests (un)packing all cell values of the a field (from)to a buffer.
+        """
+        numCellValues=19
+        fields = self._generate_fields(numStencilDirections=numCellValues)
+        for (srcArr, dstArr, bufferArr) in fields:
+            srcField = Field.createFromNumpyArray("srcField", srcArr, indexDimensions=1)
+            dstField = Field.createFromNumpyArray("dstField", dstArr, indexDimensions=1)
+            buffer = Field.createGeneric("buffer", spatialDimensions=1, indexDimensions=1,
+                                         fieldType=FieldType.BUFFER, dtype=srcArr.dtype)
+
+            packEqs = []
+            # Since we are packing all cell values for all cells, then
+            # the buffer index is equivalent to the field index
+            for idx in range(numCellValues):
+                eq = sp.Eq(buffer(idx), srcField(idx))
+                packEqs.append(eq)
+
+            packCode = createKernel(packEqs, typeForSymbol={'srcField': srcArr.dtype, 'buffer': buffer.dtype})
+            packKernel = makePythonFunction(packCode)
+            packKernel(buffer=bufferArr, srcField=srcArr)
+
+            unpackEqs = []
+
+            for idx in range(numCellValues):
+                eq = sp.Eq(dstField(idx), buffer(idx))
+                unpackEqs.append(eq)
+
+            unpackCode = createKernel(unpackEqs, typeForSymbol={'dstField': dstArr.dtype, 'buffer': buffer.dtype})
+            unpackKernel = makePythonFunction(unpackCode)
+            unpackKernel(buffer=bufferArr, dstField=dstArr)
+
+            np.testing.assert_equal(srcArr, dstArr)
+
+    def test_subset_cell_values(self):
+        """
+        Tests (un)packing a subset of cell values of the a field (from)to a buffer.
+        """
+        numCellValues=19
+        # Cell indices of the field to be (un)packed (from)to the buffer
+        cellIndices = [1, 5, 7, 8, 10, 12, 13]
+        fields = self._generate_fields(numStencilDirections=numCellValues)
+        for (srcArr, dstArr, bufferArr) in fields:
+            srcField = Field.createFromNumpyArray("srcField", srcArr, indexDimensions=1)
+            dstField = Field.createFromNumpyArray("dstField", dstArr, indexDimensions=1)
+            buffer = Field.createGeneric("buffer", spatialDimensions=1, indexDimensions=1,
+                                         fieldType=FieldType.BUFFER, dtype=srcArr.dtype)
+
+            packEqs = []
+            # Since we are packing all cell values for all cells, then
+            # the buffer index is equivalent to the field index
+            for bufferIdx, cellIdx in enumerate(cellIndices):
+                eq = sp.Eq(buffer(bufferIdx), srcField(cellIdx))
+                packEqs.append(eq)
+
+            packCode = createKernel(packEqs, typeForSymbol={'srcField': srcArr.dtype, 'buffer': buffer.dtype})
+            packKernel = makePythonFunction(packCode)
+            packKernel(buffer=bufferArr, srcField=srcArr)
+
+            unpackEqs = []
+
+            for bufferIdx, cellIdx in enumerate(cellIndices):
+                eq = sp.Eq(dstField(cellIdx), buffer(bufferIdx))
+                unpackEqs.append(eq)
+
+            unpackCode = createKernel(unpackEqs, typeForSymbol={'dstField': dstArr.dtype, 'buffer': buffer.dtype})
+            unpackKernel = makePythonFunction(unpackCode)
+            unpackKernel(buffer=bufferArr, dstField=dstArr)
+
+            maskArr = np.ma.masked_where((srcArr - dstArr) != 0, srcArr)
+            np.testing.assert_equal(dstArr, maskArr.filled(int(0)))
+
+
+    def test_field_layouts(self):
+        numCellValues = 27
+        for layoutStr in ['numpy', 'fzyx', 'zyxf', 'reversenumpy']:
+            fields = self._generate_fields(numStencilDirections=numCellValues, layout=layoutStr)
+            for (srcArr, dstArr, bufferArr) in fields:
+                srcField = Field.createFromNumpyArray("srcField", srcArr, indexDimensions=1)
+                dstField = Field.createFromNumpyArray("dstField", dstArr, indexDimensions=1)
+                buffer = Field.createGeneric("buffer", spatialDimensions=1, indexDimensions=1,
+                                             fieldType=FieldType.BUFFER, dtype=srcArr.dtype)
+
+                packEqs = []
+                # Since we are packing all cell values for all cells, then
+                # the buffer index is equivalent to the field index
+                for idx in range(numCellValues):
+                    eq = sp.Eq(buffer(idx), srcField(idx))
+                    packEqs.append(eq)
+
+                packCode = createKernel(packEqs, typeForSymbol={'srcField': srcArr.dtype, 'buffer': buffer.dtype})
+                packKernel = makePythonFunction(packCode)
+                packKernel(buffer=bufferArr, srcField=srcArr)
+
+                unpackEqs = []
+
+                for idx in range(numCellValues):
+                    eq = sp.Eq(dstField(idx), buffer(idx))
+                    unpackEqs.append(eq)
+
+                unpackCode = createKernel(unpackEqs, typeForSymbol={'dstField': dstArr.dtype, 'buffer': buffer.dtype})
+                unpackKernel = makePythonFunction(unpackCode)
+                unpackKernel(buffer=bufferArr, dstField=dstArr)
diff --git a/pystencils_tests/test_buffer_gpu.py b/pystencils_tests/test_buffer_gpu.py
new file mode 100644
index 00000000..d6d9275f
--- /dev/null
+++ b/pystencils_tests/test_buffer_gpu.py
@@ -0,0 +1,217 @@
+import unittest
+import numpy as np
+import sympy as sp
+from pystencils import Field, FieldType
+from pystencils.field import directionStringToOffset, layoutStringToTuple, createNumpyArrayWithLayout
+from pystencils.gpucuda import makePythonFunction, createCUDAKernel
+from pystencils.slicing import addGhostLayers, getSliceBeforeGhostLayer, getGhostRegionSlice
+import pycuda.autoinit
+import pycuda.gpuarray as gpuarray
+from pystencils.backends.cbackend import generateC
+
+
+class TestBufferGPU(unittest.TestCase):
+    """
+    Tests for the (un)packing (from)to buffers on a CUDA GPU.
+    """
+    FIELD_SIZES = [(4, 3), (9, 3, 7)]
+
+    def _generate_fields(self, dt=np.uint8, numStencilDirections=1, layout='numpy'):
+        fieldSizes = self.FIELD_SIZES
+        if numStencilDirections > 1:
+            fieldSizes = [s + (numStencilDirections,) for s in fieldSizes]
+
+        fields = []
+        for size in fieldSizes:
+            fieldLayout = layoutStringToTuple(layout, len(size))
+            srcArr = createNumpyArrayWithLayout(size, fieldLayout).astype(dt)
+
+            arrayData = np.reshape(np.arange(1, np.prod(size)+1), size)
+            # Use flat iterator to input data into the array
+            srcArr.flat = addGhostLayers(arrayData, indexDimensions=1 if numStencilDirections > 1 else 0).astype(dt).flat
+
+            gpuSrcArr = gpuarray.to_gpu(srcArr)
+            gpuDstArr = gpuarray.zeros_like(gpuSrcArr)
+            gpuBufferArr = gpuarray.zeros(np.prod(srcArr.shape), dtype=dt)
+
+            fields.append((srcArr, gpuSrcArr, gpuDstArr, gpuBufferArr))
+        return fields
+
+    def test_full_scalar_field(self):
+        """
+        Tests fully (un)packing a scalar field (from)to a GPU buffer.
+        """
+        fields = self._generate_fields()
+        for (srcArr, gpuSrcArr, gpuDstArr, gpuBufferArr) in fields:
+            srcField = Field.createFromNumpyArray("srcField", srcArr)
+            dstField = Field.createFromNumpyArray("dstField", srcArr)
+            buffer = Field.createGeneric("buffer", spatialDimensions=1,
+                                         fieldType=FieldType.BUFFER, dtype=srcArr.dtype)
+
+            packEqs = [sp.Eq(buffer.center(), srcField.center())]
+            packTypes = {'srcField': gpuSrcArr.dtype, 'buffer': gpuBufferArr.dtype}
+            packCode = createCUDAKernel(packEqs, typeForSymbol=packTypes)
+
+            packKernel = makePythonFunction(packCode)
+            packKernel(buffer=gpuBufferArr, srcField=gpuSrcArr)
+
+            unpackEqs = [sp.Eq(dstField.center(), buffer.center())]
+            unpackTypes = {'dstField': gpuDstArr.dtype, 'buffer': gpuBufferArr.dtype}
+            unpackCode = createCUDAKernel(unpackEqs, typeForSymbol=unpackTypes)
+
+            unpackKernel = makePythonFunction(unpackCode)
+            unpackKernel(dstField=gpuDstArr, buffer=gpuBufferArr)
+
+            dstArr = gpuDstArr.get()
+
+            np.testing.assert_equal(srcArr, dstArr)
+
+    def test_field_slice(self):
+        """
+        Tests (un)packing slices of a scalar field (from)to a buffer.
+        """
+        fields = self._generate_fields()
+        for dir in ['N', 'S', 'NW', 'SW', 'TNW', 'B']:
+            for (srcArr, gpuSrcArr, gpuDstArr, gpuBufferArr) in fields:
+                # Extract slice from N direction of the field
+                sliceDir = directionStringToOffset(dir, dim=len(srcArr.shape))
+                packSlice = getSliceBeforeGhostLayer(sliceDir)
+                unpackSlice = getGhostRegionSlice(sliceDir)
+
+                srcField = Field.createFromNumpyArray("srcField", srcArr[packSlice])
+                dstField = Field.createFromNumpyArray("dstField", srcArr[unpackSlice])
+                buffer = Field.createGeneric("buffer", spatialDimensions=1,
+                                             fieldType=FieldType.BUFFER, dtype=srcArr.dtype)
+
+                packEqs = [sp.Eq(buffer.center(), srcField.center())]
+                packTypes = {'srcField': gpuSrcArr.dtype, 'buffer': gpuBufferArr.dtype}
+                packCode = createCUDAKernel(packEqs, typeForSymbol=packTypes)
+
+                packKernel = makePythonFunction(packCode)
+                packKernel(buffer=gpuBufferArr, srcField=gpuSrcArr[packSlice])
+
+                # Unpack into ghost layer of dstField in N direction
+                unpackEqs = [sp.Eq(dstField.center(), buffer.center())]
+                unpackTypes = {'dstField': gpuDstArr.dtype, 'buffer': gpuBufferArr.dtype}
+                unpackCode = createCUDAKernel(unpackEqs, typeForSymbol=unpackTypes)
+
+                unpackKernel = makePythonFunction(unpackCode)
+                unpackKernel(buffer=gpuBufferArr, dstField=gpuDstArr[unpackSlice])
+
+                dstArr = gpuDstArr.get()
+
+                np.testing.assert_equal(srcArr[packSlice], dstArr[unpackSlice])
+
+    def test_all_cell_values(self):
+        """
+        Tests (un)packing all cell values of the a field (from)to a buffer.
+        """
+        numCellValues=7
+        fields = self._generate_fields(numStencilDirections=numCellValues)
+        for (srcArr, gpuSrcArr, gpuDstArr, gpuBufferArr) in fields:
+            srcField = Field.createFromNumpyArray("srcField", gpuSrcArr, indexDimensions=1)
+            dstField = Field.createFromNumpyArray("dstField", gpuSrcArr, indexDimensions=1)
+            buffer = Field.createGeneric("buffer", spatialDimensions=1, indexDimensions=1,
+                                         fieldType=FieldType.BUFFER, dtype=gpuSrcArr.dtype)
+
+            packEqs = []
+            # Since we are packing all cell values for all cells, then
+            # the buffer index is equivalent to the field index
+            for idx in range(numCellValues):
+                eq = sp.Eq(buffer(idx), srcField(idx))
+                packEqs.append(eq)
+
+            packTypes = {'srcField': gpuSrcArr.dtype, 'buffer': gpuBufferArr.dtype}
+            packCode = createCUDAKernel(packEqs, typeForSymbol=packTypes)
+            packKernel = makePythonFunction(packCode)
+            packKernel(buffer=gpuBufferArr, srcField=gpuSrcArr)
+
+            unpackEqs = []
+
+            for idx in range(numCellValues):
+                eq = sp.Eq(dstField(idx), buffer(idx))
+                unpackEqs.append(eq)
+
+            unpackTypes = {'dstField': gpuDstArr.dtype, 'buffer': gpuBufferArr.dtype}
+            unpackCode = createCUDAKernel(unpackEqs, typeForSymbol=unpackTypes)
+            unpackKernel = makePythonFunction(unpackCode)
+            unpackKernel(buffer=gpuBufferArr, dstField=gpuDstArr)
+
+            dstArr = gpuDstArr.get()
+
+            np.testing.assert_equal(srcArr, dstArr)
+
+    def test_subset_cell_values(self):
+        """
+        Tests (un)packing a subset of cell values of the a field (from)to a buffer.
+        """
+        numCellValues = 7
+        # Cell indices of the field to be (un)packed (from)to the buffer
+        cellIndices = [1, 3, 5, 6]
+        fields = self._generate_fields(numStencilDirections=numCellValues)
+        for (srcArr, gpuSrcArr, gpuDstArr, gpuBufferArr) in fields:
+            srcField = Field.createFromNumpyArray("srcField", gpuSrcArr, indexDimensions=1)
+            dstField = Field.createFromNumpyArray("dstField", gpuSrcArr, indexDimensions=1)
+            buffer = Field.createGeneric("buffer", spatialDimensions=1, indexDimensions=1,
+                                         fieldType=FieldType.BUFFER, dtype=gpuSrcArr.dtype)
+
+            packEqs = []
+            # Since we are packing all cell values for all cells, then
+            # the buffer index is equivalent to the field index
+            for bufferIdx, cellIdx in enumerate(cellIndices):
+                eq = sp.Eq(buffer(bufferIdx), srcField(cellIdx))
+                packEqs.append(eq)
+
+            packTypes = {'srcField': gpuSrcArr.dtype, 'buffer': gpuBufferArr.dtype}
+            packCode = createCUDAKernel(packEqs, typeForSymbol=packTypes)
+            packKernel = makePythonFunction(packCode)
+            packKernel(buffer=gpuBufferArr, srcField=gpuSrcArr)
+
+            unpackEqs = []
+
+            for bufferIdx, cellIdx in enumerate(cellIndices):
+                eq = sp.Eq(dstField(cellIdx), buffer(bufferIdx))
+                unpackEqs.append(eq)
+
+            unpackTypes = {'dstField': gpuDstArr.dtype, 'buffer': gpuBufferArr.dtype}
+            unpackCode = createCUDAKernel(unpackEqs, typeForSymbol=unpackTypes)
+            unpackKernel = makePythonFunction(unpackCode)
+            unpackKernel(buffer=gpuBufferArr, dstField=gpuDstArr)
+
+            dstArr = gpuDstArr.get()
+
+            maskArr = np.ma.masked_where((srcArr - dstArr) != 0, srcArr)
+            np.testing.assert_equal(dstArr, maskArr.filled(int(0)))
+
+    def test_field_layouts(self):
+        numCellValues = 7
+        for layoutStr in ['numpy', 'fzyx', 'zyxf', 'reversenumpy']:
+            fields = self._generate_fields(numStencilDirections=numCellValues, layout=layoutStr)
+            for (srcArr, gpuSrcArr, gpuDstArr, gpuBufferArr) in fields:
+                srcField = Field.createFromNumpyArray("srcField", gpuSrcArr, indexDimensions=1)
+                dstField = Field.createFromNumpyArray("dstField", gpuSrcArr, indexDimensions=1)
+                buffer = Field.createGeneric("buffer", spatialDimensions=1, indexDimensions=1,
+                                             fieldType=FieldType.BUFFER, dtype=srcArr.dtype)
+
+                packEqs = []
+                # Since we are packing all cell values for all cells, then
+                # the buffer index is equivalent to the field index
+                for idx in range(numCellValues):
+                    eq = sp.Eq(buffer(idx), srcField(idx))
+                    packEqs.append(eq)
+
+                packTypes = {'srcField': gpuSrcArr.dtype, 'buffer': gpuBufferArr.dtype}
+                packCode = createCUDAKernel(packEqs, typeForSymbol=packTypes)
+                packKernel = makePythonFunction(packCode)
+                packKernel(buffer=gpuBufferArr, srcField=gpuSrcArr)
+
+                unpackEqs = []
+
+                for idx in range(numCellValues):
+                    eq = sp.Eq(dstField(idx), buffer(idx))
+                    unpackEqs.append(eq)
+
+                unpackTypes = {'dstField': gpuDstArr.dtype, 'buffer': gpuBufferArr.dtype}
+                unpackCode = createCUDAKernel(unpackEqs, typeForSymbol=unpackTypes)
+                unpackKernel = makePythonFunction(unpackCode)
+                unpackKernel(buffer=gpuBufferArr, dstField=gpuDstArr)
\ No newline at end of file
diff --git a/pystencils_tests/test_jacobi_cbackend.py b/pystencils_tests/test_jacobi_cbackend.py
index 8e08a83e..dfe45ac5 100644
--- a/pystencils_tests/test_jacobi_cbackend.py
+++ b/pystencils_tests/test_jacobi_cbackend.py
@@ -22,7 +22,7 @@ class TestGeneratorModule(unittest.TestCase):
 
         jacobi = SympyAssignment(d[0, 0], (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4)
         body = Block([jacobi])
-        code = makeLoopOverDomain(body, "kernel")
+        code, _, _ = makeLoopOverDomain(body, "kernel")
         resolveFieldAccesses(code)
         substituteArrayAccessesWithConstants(code)
         moveConstantsBeforeLoop(code)
@@ -43,7 +43,7 @@ class TestGeneratorModule(unittest.TestCase):
         d = Field.createGeneric("d", 3)
         jacobi = SympyAssignment(d[0, 0, 0], (f[1, 0, 0] + f[-1, 0, 0] + f[0, 1, 0] + f[0, -1, 0]) / 4)
         body = Block([jacobi])
-        code = makeLoopOverDomain(body, "kernel")
+        code, _, _ = makeLoopOverDomain(body, "kernel")
         resolveFieldAccesses(code)
         substituteArrayAccessesWithConstants(code)
         moveConstantsBeforeLoop(code)
diff --git a/pystencils_walberla/jinja_filters.py b/pystencils_walberla/jinja_filters.py
index 142bd4cb..e24c7930 100644
--- a/pystencils_walberla/jinja_filters.py
+++ b/pystencils_walberla/jinja_filters.py
@@ -5,6 +5,7 @@ from pystencils.astnodes import ResolvedFieldAccess
 from pystencils.backends.cbackend import CustomSympyPrinter
 from pystencils.data_types import getBaseType
 from pystencils.cpu import generateC
+from pystencils.field import FieldType
 
 
 temporaryFieldTemplate = """
@@ -167,7 +168,7 @@ def generateCall(ctx, kernelInfo, ghostLayersToInclude=0):
     spatialShapeSymbols = []
 
     for param in ast.parameters:
-        if param.isFieldArgument and fields[param.fieldName].isIndexField:
+        if param.isFieldArgument and FieldType.isIndexed(fields[param.fieldName]):
             continue
 
         if param.isFieldPtrArgument:
diff --git a/setup_lbmpy_walberla.py b/setup_lbmpy_walberla.py
index 4528945e..0f6649f0 100644
--- a/setup_lbmpy_walberla.py
+++ b/setup_lbmpy_walberla.py
@@ -10,5 +10,5 @@ setup(name='lbmpy_walberla',
       packages=['lbmpy_walberla'] + ['lbmpy_walberla.' + s for s in find_packages('lbmpy_walberla')],
       install_requires=['lbmpy'],
       package_dir={'lbmpy_walberla': 'lbmpy_walberla'},
-      package_data={ 'lbmpy_walberla': ['templates/*'] },
+      package_data={'lbmpy_walberla': ['templates/*']},
       )
-- 
GitLab