diff --git a/pystencils/__init__.py b/pystencils/__init__.py index b78f17d66f89640855bc6950f7ddd752d2bdf838..27b0b97f8ba261d4d85ad9ace8876a92acf0eb9e 100644 --- a/pystencils/__init__.py +++ b/pystencils/__init__.py @@ -1,4 +1,4 @@ -from pystencils.field import Field, extractCommonSubexpressions +from pystencils.field import Field, FieldType, extractCommonSubexpressions from pystencils.data_types import TypedSymbol from pystencils.slicing import makeSlice from pystencils.kernelcreation import createKernel, createIndexedKernel diff --git a/pystencils/cpu/cpujit.py b/pystencils/cpu/cpujit.py index 69c3d8cd84b991ba86719733dfc44e80b813771f..28184d6f366d20904eb3d958e3c98af18339a8e5 100644 --- a/pystencils/cpu/cpujit.py +++ b/pystencils/cpu/cpujit.py @@ -74,6 +74,7 @@ from pystencils.backends.cbackend import generateC, getHeaders from collections import OrderedDict, Mapping from pystencils.transformations import symbolNameToVariableName from pystencils.data_types import toCtypes, getBaseType, StructType +from pystencils.field import FieldType def makePythonFunction(kernelFunctionNode, argumentDict={}): @@ -387,9 +388,9 @@ def buildCTypeArgumentList(parameterSpecification, argumentDict): raise ValueError("Passed array '%s' has strides %s which does not match expected strides %s" % (arg.fieldName, str(fieldArr.strides), str(symbolicFieldStrides))) - if symbolicField.isIndexField: + if FieldType.isIndexed(symbolicField): indexArrShapes.add(fieldArr.shape[:symbolicField.spatialDimensions]) - else: + elif not FieldType.isBuffer(symbolicField): arrayShapes.add(fieldArr.shape[:symbolicField.spatialDimensions]) elif arg.isFieldShapeArgument: diff --git a/pystencils/cpu/kernelcreation.py b/pystencils/cpu/kernelcreation.py index 298adea4d43b0428de94d74aaed6c8dac3891982..91b6dc3ebc296c3ba1ccd240a4081a0edc1019b6 100644 --- a/pystencils/cpu/kernelcreation.py +++ b/pystencils/cpu/kernelcreation.py @@ -4,11 +4,11 @@ from functools import partial from collections import defaultdict from pystencils.astnodes import SympyAssignment, Block, LoopOverCoordinate, KernelFunction -from pystencils.transformations import resolveFieldAccesses, makeLoopOverDomain, \ +from pystencils.transformations import resolveBufferAccesses, resolveFieldAccesses, makeLoopOverDomain, \ typeAllEquations, getOptimalLoopOrdering, parseBasePointerInfo, moveConstantsBeforeLoop, splitInnerLoop, \ substituteArrayAccessesWithConstants from pystencils.data_types import TypedSymbol, BasicType, StructType, createType -from pystencils.field import Field +from pystencils.field import Field, FieldType import pystencils.astnodes as ast from pystencils.cpu.cpujit import makePythonFunction @@ -51,10 +51,13 @@ def createKernel(listOfEquations, functionName="kernel", typeForSymbol='double', allFields = fieldsRead.union(fieldsWritten) readOnlyFields = set([f.name for f in fieldsRead - fieldsWritten]) + buffers = set([f for f in allFields if FieldType.isBuffer(f)]) + fieldsWithoutBuffers = allFields - buffers + body = ast.Block(assignments) - loopOrder = getOptimalLoopOrdering(allFields) - code = makeLoopOverDomain(body, functionName, iterationSlice=iterationSlice, - ghostLayers=ghostLayers, loopOrder=loopOrder) + loopOrder = getOptimalLoopOrdering(fieldsWithoutBuffers) + code, loopStrides, loopVars = makeLoopOverDomain(body, functionName, iterationSlice=iterationSlice, + ghostLayers=ghostLayers, loopOrder=loopOrder) code.target = 'cpu' if splitGroups: @@ -62,8 +65,20 @@ def createKernel(listOfEquations, functionName="kernel", typeForSymbol='double', splitInnerLoop(code, typedSplitGroups) basePointerInfo = [['spatialInner0'], ['spatialInner1']] if len(loopOrder) >= 2 else [['spatialInner0']] - basePointerInfos = {field.name: parseBasePointerInfo(basePointerInfo, loopOrder, field) for field in allFields} + basePointerInfos = {field.name: parseBasePointerInfo(basePointerInfo, loopOrder, field) + for field in fieldsWithoutBuffers} + + bufferBasePointerInfos = {field.name: parseBasePointerInfo([['spatialInner0']], [0], field) for field in buffers} + basePointerInfos.update(bufferBasePointerInfos) + + baseBufferIndex = loopVars[0] + stride = 1 + for idx, var in enumerate(loopVars[1:]): + curStride = loopStrides[idx] + stride *= int(curStride) if isinstance(curStride, float) else curStride + baseBufferIndex += var * stride + resolveBufferAccesses(code, baseBufferIndex, readOnlyFields) resolveFieldAccesses(code, readOnlyFields, fieldToBasePointerInfo=basePointerInfos) substituteArrayAccessesWithConstants(code) moveConstantsBeforeLoop(code) @@ -93,7 +108,8 @@ def createIndexedKernel(listOfEquations, indexFields, functionName="kernel", typ allFields = fieldsRead.union(fieldsWritten) for indexField in indexFields: - indexField.isIndexField = True + indexField.fieldType = FieldType.INDEXED + assert FieldType.isIndexed(indexField) assert indexField.spatialDimensions == 1, "Index fields have to be 1D" nonIndexFields = [f for f in allFields if f not in indexFields] diff --git a/pystencils/field.py b/pystencils/field.py index d545d15c8eb4852365a2a9ce88197f4dc26e0182..51d08df97501a4dd89fc641778ce5a845f5e7d49 100644 --- a/pystencils/field.py +++ b/pystencils/field.py @@ -1,3 +1,4 @@ +from enum import Enum from itertools import chain import numpy as np import sympy as sp @@ -7,6 +8,31 @@ from pystencils.data_types import TypedSymbol, createType, createCompositeTypeFr from pystencils.sympyextensions import isIntegerSequence +class FieldType(Enum): + # generic fields + GENERIC = 0 + # index fields are currently only used for boundary handling + # the coordinates are not the loop counters in that case, but are read from this index field + INDEXED = 1 + # communication buffer, used for (un)packing data in communication. + BUFFER = 2 + + @staticmethod + def isGeneric(field): + assert isinstance(field, Field) + return field.fieldType == FieldType.GENERIC + + @staticmethod + def isIndexed(field): + assert isinstance(field, Field) + return field.fieldType == FieldType.INDEXED + + @staticmethod + def isBuffer(field): + assert isinstance(field, Field) + return field.fieldType == FieldType.BUFFER + + class Field(object): """ With fields one can formulate stencil-like update rules on structured grids. @@ -51,7 +77,7 @@ class Field(object): @staticmethod def createGeneric(fieldName, spatialDimensions, dtype=np.float64, indexDimensions=0, layout='numpy', - indexShape=None): + indexShape=None, fieldType=FieldType.GENERIC): """ Creates a generic field where the field size is not fixed i.e. can be called with arrays of different sizes @@ -85,7 +111,7 @@ class Field(object): shape += (1,) strides += (1,) - return Field(fieldName, dtype, layout, shape, strides) + return Field(fieldName, fieldType, dtype, layout, shape, strides) @staticmethod def createFromNumpyArray(fieldName, npArray, indexDimensions=0): @@ -114,7 +140,7 @@ class Field(object): shape += (1,) strides += (1,) - return Field(fieldName, npArray.dtype, spatialLayout, shape, strides) + return Field(fieldName, FieldType.GENERIC, npArray.dtype, spatialLayout, shape, strides) @staticmethod def createFixedSize(fieldName, shape, indexDimensions=0, dtype=np.float64, layout='numpy'): @@ -146,21 +172,20 @@ class Field(object): spatialLayout = list(layout) for i in range(spatialDimensions, len(layout)): spatialLayout.remove(i) - return Field(fieldName, dtype, tuple(spatialLayout), shape, strides) + return Field(fieldName, FieldType.GENERIC, dtype, tuple(spatialLayout), shape, strides) - def __init__(self, fieldName, dtype, layout, shape, strides): + def __init__(self, fieldName, fieldType, dtype, layout, shape, strides): """Do not use directly. Use static create* methods""" self._fieldName = fieldName + assert isinstance(fieldType, FieldType) + self.fieldType = fieldType self._dtype = createType(dtype) self._layout = normalizeLayout(layout) self.shape = shape self.strides = strides - # index fields are currently only used for boundary handling - # the coordinates are not the loop counters in that case, but are read from this index field - self.isIndexField = False def newFieldWithDifferentName(self, newName): - return Field(newName, self._dtype, self._layout, self.shape, self.strides) + return Field(newName, self.fieldType, self._dtype, self._layout, self.shape, self.strides) @property def spatialDimensions(self): @@ -243,11 +268,11 @@ class Field(object): return Field.Access(self, center)(*args, **kwargs) def __hash__(self): - return hash((self._layout, self.shape, self.strides, self._dtype, self._fieldName)) + return hash((self._layout, self.shape, self.strides, self._dtype, self.fieldType, self._fieldName)) def __eq__(self, other): - selfTuple = (self.shape, self.strides, self.name, self.dtype) - otherTuple = (other.shape, other.strides, other.name, other.dtype) + selfTuple = (self.shape, self.strides, self.name, self.dtype, self.fieldType) + otherTuple = (other.shape, other.strides, other.name, other.dtype, other.fieldType) return selfTuple == otherTuple PREFIX = "f" diff --git a/pystencils/gpucuda/cudajit.py b/pystencils/gpucuda/cudajit.py index b815f5c33ef2f1f24ac0fb8c32cab68c964d18e6..3a0fe85f4ceec904438381b4104b29fcfb2f1d30 100644 --- a/pystencils/gpucuda/cudajit.py +++ b/pystencils/gpucuda/cudajit.py @@ -2,6 +2,7 @@ import numpy as np from pystencils.backends.cbackend import generateC from pystencils.transformations import symbolNameToVariableName from pystencils.data_types import StructType, getBaseType +from pystencils.field import FieldType def makePythonFunction(kernelFunctionNode, argumentDict={}): @@ -119,9 +120,9 @@ def _checkArguments(parameterSpecification, argumentDict): raise ValueError("Passed array '%s' has strides %s which does not match expected strides %s" % (arg.fieldName, str(fieldArr.strides), str(symbolicFieldStrides))) - if symbolicField.isIndexField: + if FieldType.isIndexed(symbolicField): indexArrShapes.add(fieldArr.shape[:symbolicField.spatialDimensions]) - else: + elif not FieldType.isBuffer(symbolicField): arrayShapes.add(fieldArr.shape[:symbolicField.spatialDimensions]) if len(arrayShapes) > 1: diff --git a/pystencils/gpucuda/indexing.py b/pystencils/gpucuda/indexing.py index 99db1efc974c11b0d3a5f911bf86bdf51094a4cc..aa05078e884b325a7c59a42aedb2d96745c3629d 100644 --- a/pystencils/gpucuda/indexing.py +++ b/pystencils/gpucuda/indexing.py @@ -11,6 +11,8 @@ AUTO_BLOCKSIZE_LIMITING = True BLOCK_IDX = [TypedSymbol("blockIdx." + coord, createType("int")) for coord in ('x', 'y', 'z')] THREAD_IDX = [TypedSymbol("threadIdx." + coord, createType("int")) for coord in ('x', 'y', 'z')] +BLOCK_DIM = [TypedSymbol("blockDim." + coord, createType("int")) for coord in ('x', 'y', 'z')] +GRID_DIM = [TypedSymbol("gridDim." + coord, createType("int")) for coord in ('x', 'y', 'z')] class AbstractIndexing(abc.ABCMeta('ABC', (object,), {})): @@ -28,8 +30,8 @@ class AbstractIndexing(abc.ABCMeta('ABC', (object,), {})): @property def indexVariables(self): - """Sympy symbols for CUDA's block and thread indices""" - return BLOCK_IDX + THREAD_IDX + """Sympy symbols for CUDA's block and thread indices, and block and grid dimensions. """ + return BLOCK_IDX + THREAD_IDX + BLOCK_DIM + GRID_DIM @abc.abstractmethod def getCallParameters(self, arrShape): diff --git a/pystencils/gpucuda/kernelcreation.py b/pystencils/gpucuda/kernelcreation.py index 7387093620757f4966f1c77f2e758834eba185ac..5bd2f5d7d96efda99bf4122483a50325cde7f2d3 100644 --- a/pystencils/gpucuda/kernelcreation.py +++ b/pystencils/gpucuda/kernelcreation.py @@ -2,10 +2,10 @@ from functools import partial from pystencils.gpucuda.indexing import BlockIndexing from pystencils.transformations import resolveFieldAccesses, typeAllEquations, parseBasePointerInfo, getCommonShape, \ - substituteArrayAccessesWithConstants + substituteArrayAccessesWithConstants, resolveBufferAccesses from pystencils.astnodes import Block, KernelFunction, SympyAssignment, LoopOverCoordinate from pystencils.data_types import TypedSymbol, BasicType, StructType -from pystencils import Field +from pystencils import Field, FieldType from pystencils.gpucuda.cudajit import makePythonFunction @@ -15,11 +15,18 @@ def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None, allFields = fieldsRead.union(fieldsWritten) readOnlyFields = set([f.name for f in fieldsRead - fieldsWritten]) + buffers = set([f for f in allFields if FieldType.isBuffer(f)]) + fieldsWithoutBuffers = allFields - buffers + fieldAccesses = set() + numBufferAccesses = 0 for eq in listOfEquations: fieldAccesses.update(eq.atoms(Field.Access)) - commonShape = getCommonShape(allFields) + numBufferAccesses += sum([1 for access in eq.atoms(Field.Access) if FieldType.isBuffer(access.field)]) + + commonShape = getCommonShape(fieldsWithoutBuffers) + if iterationSlice is None: # determine iteration slice from ghost layers if ghostLayers is None: @@ -34,7 +41,7 @@ def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None, for i in range(len(commonShape)): iterationSlice.append(slice(ghostLayers[i][0], -ghostLayers[i][1] if ghostLayers[i][1] > 0 else None)) - indexing = indexingCreator(field=list(allFields)[0], iterationSlice=iterationSlice) + indexing = indexingCreator(field=list(fieldsWithoutBuffers)[0], iterationSlice=iterationSlice) block = Block(assignments) block = indexing.guard(block, commonShape) @@ -46,8 +53,19 @@ def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None, basePointerInfos = {f.name: parseBasePointerInfo(basePointerInfo, [2, 1, 0], f) for f in allFields} coordMapping = {f.name: coordMapping for f in allFields} - resolveFieldAccesses(ast, readOnlyFields, fieldToFixedCoordinates=coordMapping, - fieldToBasePointerInfo=basePointerInfos) + + loopVars = [numBufferAccesses * i for i in indexing.coordinates] + loopStrides = list(fieldsWithoutBuffers)[0].shape + + baseBufferIndex = loopVars[0] + stride = 1 + for idx, var in enumerate(loopVars[1:]): + stride *= loopStrides[idx] + baseBufferIndex += var * stride + + resolveBufferAccesses(ast, baseBufferIndex, readOnlyFields) + resolveFieldAccesses(ast, readOnlyFields, fieldToBasePointerInfo=basePointerInfos, + fieldToFixedCoordinates=coordMapping) substituteArrayAccessesWithConstants(ast) @@ -73,7 +91,8 @@ def createdIndexedCUDAKernel(listOfEquations, indexFields, functionName="kernel" readOnlyFields = set([f.name for f in fieldsRead - fieldsWritten]) for indexField in indexFields: - indexField.isIndexField = True + indexField.fieldType = FieldType.INDEXED + assert FieldType.isIndexed(indexField) assert indexField.spatialDimensions == 1, "Index fields have to be 1D" nonIndexFields = [f for f in allFields if f not in indexFields] diff --git a/pystencils/llvm/kernelcreation.py b/pystencils/llvm/kernelcreation.py index 78cd1166a3f4daa9a025784a9b109127079fc661..121dcdd924a84f90d94ca557f5ac19cf5a299802 100644 --- a/pystencils/llvm/kernelcreation.py +++ b/pystencils/llvm/kernelcreation.py @@ -1,7 +1,8 @@ from pystencils.astnodes import SympyAssignment, Block, LoopOverCoordinate, KernelFunction -from pystencils.transformations import resolveFieldAccesses, \ +from pystencils.transformations import resolveFieldAccesses, resolveBufferAccesses, \ typeAllEquations, moveConstantsBeforeLoop, insertCasts from pystencils.data_types import TypedSymbol, BasicType, StructType +from pystencils.field import Field, FieldType from functools import partial from pystencils.llvm.llvmjit import makePythonFunction @@ -57,7 +58,8 @@ def createIndexedKernel(listOfEquations, indexFields, functionName="kernel", typ allFields = fieldsRead.union(fieldsWritten) for indexField in indexFields: - indexField.isIndexField = True + indexField.fieldType = FieldType.INDEXED + assert FieldType.isIndexed(indexField) assert indexField.spatialDimensions == 1, "Index fields have to be 1D" nonIndexFields = [f for f in allFields if f not in indexFields] diff --git a/pystencils/transformations/transformations.py b/pystencils/transformations/transformations.py index d41cadc013a7ec630bc5e11a6fed02ddf90335ba..bf9426f326150b5d996305c3afc7b729f9bdd80e 100644 --- a/pystencils/transformations/transformations.py +++ b/pystencils/transformations/transformations.py @@ -6,7 +6,7 @@ import sympy as sp from sympy.logic.boolalg import Boolean from sympy.tensor import IndexedBase -from pystencils.field import Field, offsetComponentToDirectionString +from pystencils.field import Field, FieldType, offsetComponentToDirectionString from pystencils.data_types import TypedSymbol, createType, PointerType, StructType, getBaseType, castFunc from pystencils.slicing import normalizeSlice import pystencils.astnodes as ast @@ -71,13 +71,15 @@ def makeLoopOverDomain(body, functionName, iterationSlice=None, ghostLayers=None """ # find correct ordering by inspecting participating FieldAccesses fieldAccesses = body.atoms(Field.Access) - fieldList = [e.field for e in fieldAccesses] + # exclude accesses to buffers from fieldList, because buffers are treated separately + fieldList = [e.field for e in fieldAccesses if not FieldType.isBuffer(e.field)] fields = set(fieldList) + numBufferAccesses = len(fieldAccesses) - len(fieldList) if loopOrder is None: loopOrder = getOptimalLoopOrdering(fields) - shape = getCommonShape(fields) + shape = getCommonShape(list(fields)) if iterationSlice is not None: iterationSlice = normalizeSlice(iterationSlice, shape) @@ -88,6 +90,11 @@ def makeLoopOverDomain(body, functionName, iterationSlice=None, ghostLayers=None if isinstance(ghostLayers, int): ghostLayers = [(ghostLayers, ghostLayers)] * len(loopOrder) + def getLoopStride(begin, end, step): + return (end - begin) / step + + loopStrides = [] + loopVars = [] currentBody = body lastLoop = None for i, loopCoordinate in enumerate(reversed(loopOrder)): @@ -97,6 +104,8 @@ def makeLoopOverDomain(body, functionName, iterationSlice=None, ghostLayers=None newLoop = ast.LoopOverCoordinate(currentBody, loopCoordinate, begin, end, 1) lastLoop = newLoop currentBody = ast.Block([lastLoop]) + loopStrides.append(getLoopStride(begin, end, 1)) + loopVars.append(newLoop.loopCounterSymbol) else: sliceComponent = iterationSlice[loopCoordinate] if type(sliceComponent) is slice: @@ -104,11 +113,16 @@ def makeLoopOverDomain(body, functionName, iterationSlice=None, ghostLayers=None newLoop = ast.LoopOverCoordinate(currentBody, loopCoordinate, sc.start, sc.stop, sc.step) lastLoop = newLoop currentBody = ast.Block([lastLoop]) + loopStrides.append(getLoopStride(sc.start, sc.stop, sc.step)) + loopVars.append(newLoop.loopCounterSymbol) else: assignment = ast.SympyAssignment(ast.LoopOverCoordinate.getLoopCounterSymbol(loopCoordinate), sp.sympify(sliceComponent)) currentBody.insertFront(assignment) - return ast.KernelFunction(currentBody, ghostLayers=ghostLayers, functionName=functionName) + + loopVars = [numBufferAccesses * var for var in loopVars] + astNode = ast.KernelFunction(currentBody, ghostLayers=ghostLayers, functionName=functionName) + return (astNode, loopStrides, loopVars) def createIntermediateBasePointer(fieldAccess, coordinates, previousPtr): @@ -133,7 +147,6 @@ def createIntermediateBasePointer(fieldAccess, coordinates, previousPtr): (ptr_E_2S, x*fstride_myfield[0] + y*fstride_myfield[1] + fstride_myfield[0] - 2*fstride_myfield[1]) """ field = fieldAccess.field - offset = 0 name = "" listToHash = [] @@ -158,6 +171,7 @@ def createIntermediateBasePointer(fieldAccess, coordinates, previousPtr): name += "%0.6X" % (abs(hash(tuple(listToHash)))) newPtr = TypedSymbol(previousPtr.name + name, previousPtr.dtype) + return newPtr, offset @@ -223,6 +237,7 @@ def parseBasePointerInfo(basePointerSpecification, loopOrder, field): rest = allCoordinates - specifiedCoordinates if rest: result.append(list(rest)) + return result @@ -278,6 +293,52 @@ def substituteArrayAccessesWithConstants(astNode): for a in astNode.args: substituteArrayAccessesWithConstants(a) +def resolveBufferAccesses(astNode, baseBufferIndex, readOnlyFieldNames=set()): + def visitSympyExpr(expr, enclosingBlock, sympyAssignment): + if isinstance(expr, Field.Access): + fieldAccess = expr + + # Do not apply transformation if field is not a buffer + if not FieldType.isBuffer(fieldAccess.field): + return expr + + buffer = fieldAccess.field + + dtype = PointerType(buffer.dtype, const=buffer.name in readOnlyFieldNames, restrict=True) + fieldPtr = TypedSymbol("%s%s" % (Field.DATA_PREFIX, symbolNameToVariableName(buffer.name)), dtype) + + bufferIndex = baseBufferIndex + if len(fieldAccess.index) > 1: + raise RuntimeError('Only indexing dimensions up to 1 are currently supported in buffers!') + + if len(fieldAccess.index) > 0: + cellIndex = fieldAccess.index[0] + bufferIndex += cellIndex + + result = ast.ResolvedFieldAccess(fieldPtr, bufferIndex, fieldAccess.field, fieldAccess.offsets, + fieldAccess.index) + + return visitSympyExpr(result, enclosingBlock, sympyAssignment) + else: + if isinstance(expr, ast.ResolvedFieldAccess): + return expr + + newArgs = [visitSympyExpr(e, enclosingBlock, sympyAssignment) for e in expr.args] + kwargs = {'evaluate': False} if type(expr) in (sp.Add, sp.Mul, sp.Piecewise) else {} + return expr.func(*newArgs, **kwargs) if newArgs else expr + + def visitNode(subAst): + if isinstance(subAst, ast.SympyAssignment): + enclosingBlock = subAst.parent + assert type(enclosingBlock) is ast.Block + subAst.lhs = visitSympyExpr(subAst.lhs, enclosingBlock, subAst) + subAst.rhs = visitSympyExpr(subAst.rhs, enclosingBlock, subAst) + else: + for i, a in enumerate(subAst.args): + visitNode(a) + + return visitNode(astNode) + def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerInfo={}, fieldToFixedCoordinates={}): """ @@ -298,6 +359,7 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn if isinstance(expr, Field.Access): fieldAccess = expr field = fieldAccess.field + if field.name in fieldToBasePointerInfo: basePointerInfo = fieldToBasePointerInfo[field.name] else: @@ -324,6 +386,7 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn coordDict[e] = field.dtype.getElementOffset(accessedFieldName) else: coordDict[e] = fieldAccess.index[e - field.spatialDimensions] + return coordDict lastPointer = fieldPtr @@ -337,6 +400,7 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn lastPointer = newPtr coordDict = createCoordinateDict(basePointerInfo[0]) + _, offset = createIntermediateBasePointer(fieldAccess, coordDict, lastPointer) result = ast.ResolvedFieldAccess(lastPointer, offset, fieldAccess.field, fieldAccess.offsets, fieldAccess.index) @@ -344,6 +408,7 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn if isinstance(getBaseType(fieldAccess.field.dtype), StructType): newType = fieldAccess.field.dtype.getElementType(fieldAccess.index[0]) result = castFunc(result, newType) + return visitSympyExpr(result, enclosingBlock, sympyAssignment) else: if isinstance(expr, ast.ResolvedFieldAccess): diff --git a/pystencils_tests/test_buffer.py b/pystencils_tests/test_buffer.py new file mode 100644 index 0000000000000000000000000000000000000000..600f76254805cc6df7a3e0e9ea043bc40d845061 --- /dev/null +++ b/pystencils_tests/test_buffer.py @@ -0,0 +1,195 @@ +import unittest +import numpy as np +import sympy as sp +from pystencils import Field, FieldType +from pystencils.field import directionStringToOffset, layoutStringToTuple, createNumpyArrayWithLayout +from pystencils.cpu import makePythonFunction +from pystencils.cpu.kernelcreation import createKernel +from pystencils.slicing import addGhostLayers, getSliceBeforeGhostLayer, getGhostRegionSlice + + +class TestBuffer(unittest.TestCase): + """ + Tests for the (un)packing (from)to buffers. + """ + FIELD_SIZES = [(32, 10), (10, 8, 6)] + + def _generate_fields(self, dt=np.uint64, numStencilDirections=1, layout='numpy'): + fieldSizes = self.FIELD_SIZES + if numStencilDirections > 1: + fieldSizes = [s + (numStencilDirections,) for s in fieldSizes] + + fields = [] + for size in fieldSizes: + fieldLayout = layoutStringToTuple(layout, len(size)) + srcArr = createNumpyArrayWithLayout(size, fieldLayout) + + arrayData = np.reshape(np.arange(1, np.prod(size)+1), size) + # Use flat iterator to input data into the array + srcArr.flat = addGhostLayers(arrayData, indexDimensions=1 if numStencilDirections > 1 else 0).astype(dt).flat + dstArr = np.zeros(srcArr.shape, dtype=dt) + bufferArr = np.zeros(np.prod(srcArr.shape), dtype=dt) + fields.append((srcArr, dstArr, bufferArr)) + return fields + + def test_full_scalar_field(self): + """ + Tests fully (un)packing a scalar field (from)to a buffer. + """ + fields = self._generate_fields() + for (srcArr, dstArr, bufferArr) in fields: + srcField = Field.createFromNumpyArray("srcField", srcArr) + dstField = Field.createFromNumpyArray("dstField", dstArr) + buffer = Field.createGeneric("buffer", spatialDimensions=1, + fieldType=FieldType.BUFFER, dtype=srcArr.dtype) + + packEqs = [sp.Eq(buffer.center(), srcField.center())] + packCode = createKernel(packEqs, typeForSymbol={'srcField': srcArr.dtype, 'buffer': buffer.dtype}) + + packKernel = makePythonFunction(packCode) + packKernel(buffer=bufferArr, srcField=srcArr) + + unpackEqs = [sp.Eq(dstField.center(), buffer.center())] + unpackCode = createKernel(unpackEqs, typeForSymbol={'dstField': dstArr.dtype, 'buffer': buffer.dtype}) + + unpackKernel = makePythonFunction(unpackCode) + unpackKernel(dstField=dstArr, buffer=bufferArr) + + np.testing.assert_equal(srcArr, dstArr) + + def test_field_slice(self): + """ + Tests (un)packing slices of a scalar field (from)to a buffer. + """ + fields = self._generate_fields() + for dir in ['N', 'S', 'NW', 'SW', 'TNW', 'B']: + for (srcArr, dstArr, bufferArr) in fields: + # Extract slice from N direction of the field + sliceDir = directionStringToOffset(dir, dim=len(srcArr.shape)) + packSlice = getSliceBeforeGhostLayer(sliceDir) + unpackSlice = getGhostRegionSlice(sliceDir) + + srcField = Field.createFromNumpyArray("srcField", srcArr[packSlice]) + dstField = Field.createFromNumpyArray("dstField", dstArr[unpackSlice]) + buffer = Field.createGeneric("buffer", spatialDimensions=1, + fieldType=FieldType.BUFFER, dtype=srcArr.dtype) + + packEqs = [sp.Eq(buffer.center(), srcField.center())] + packCode = createKernel(packEqs, typeForSymbol={'srcField': srcArr.dtype, 'buffer': buffer.dtype}) + + packKernel = makePythonFunction(packCode) + packKernel(buffer=bufferArr, srcField=srcArr[packSlice]) + + # Unpack into ghost layer of dstField in N direction + unpackEqs = [sp.Eq(dstField.center(), buffer.center())] + unpackCode = createKernel(unpackEqs, typeForSymbol={'dstField': dstArr.dtype, 'buffer': buffer.dtype}) + + unpackKernel = makePythonFunction(unpackCode) + unpackKernel(buffer=bufferArr, dstField=dstArr[unpackSlice]) + + np.testing.assert_equal(srcArr[packSlice], dstArr[unpackSlice]) + + def test_all_cell_values(self): + """ + Tests (un)packing all cell values of the a field (from)to a buffer. + """ + numCellValues=19 + fields = self._generate_fields(numStencilDirections=numCellValues) + for (srcArr, dstArr, bufferArr) in fields: + srcField = Field.createFromNumpyArray("srcField", srcArr, indexDimensions=1) + dstField = Field.createFromNumpyArray("dstField", dstArr, indexDimensions=1) + buffer = Field.createGeneric("buffer", spatialDimensions=1, indexDimensions=1, + fieldType=FieldType.BUFFER, dtype=srcArr.dtype) + + packEqs = [] + # Since we are packing all cell values for all cells, then + # the buffer index is equivalent to the field index + for idx in range(numCellValues): + eq = sp.Eq(buffer(idx), srcField(idx)) + packEqs.append(eq) + + packCode = createKernel(packEqs, typeForSymbol={'srcField': srcArr.dtype, 'buffer': buffer.dtype}) + packKernel = makePythonFunction(packCode) + packKernel(buffer=bufferArr, srcField=srcArr) + + unpackEqs = [] + + for idx in range(numCellValues): + eq = sp.Eq(dstField(idx), buffer(idx)) + unpackEqs.append(eq) + + unpackCode = createKernel(unpackEqs, typeForSymbol={'dstField': dstArr.dtype, 'buffer': buffer.dtype}) + unpackKernel = makePythonFunction(unpackCode) + unpackKernel(buffer=bufferArr, dstField=dstArr) + + np.testing.assert_equal(srcArr, dstArr) + + def test_subset_cell_values(self): + """ + Tests (un)packing a subset of cell values of the a field (from)to a buffer. + """ + numCellValues=19 + # Cell indices of the field to be (un)packed (from)to the buffer + cellIndices = [1, 5, 7, 8, 10, 12, 13] + fields = self._generate_fields(numStencilDirections=numCellValues) + for (srcArr, dstArr, bufferArr) in fields: + srcField = Field.createFromNumpyArray("srcField", srcArr, indexDimensions=1) + dstField = Field.createFromNumpyArray("dstField", dstArr, indexDimensions=1) + buffer = Field.createGeneric("buffer", spatialDimensions=1, indexDimensions=1, + fieldType=FieldType.BUFFER, dtype=srcArr.dtype) + + packEqs = [] + # Since we are packing all cell values for all cells, then + # the buffer index is equivalent to the field index + for bufferIdx, cellIdx in enumerate(cellIndices): + eq = sp.Eq(buffer(bufferIdx), srcField(cellIdx)) + packEqs.append(eq) + + packCode = createKernel(packEqs, typeForSymbol={'srcField': srcArr.dtype, 'buffer': buffer.dtype}) + packKernel = makePythonFunction(packCode) + packKernel(buffer=bufferArr, srcField=srcArr) + + unpackEqs = [] + + for bufferIdx, cellIdx in enumerate(cellIndices): + eq = sp.Eq(dstField(cellIdx), buffer(bufferIdx)) + unpackEqs.append(eq) + + unpackCode = createKernel(unpackEqs, typeForSymbol={'dstField': dstArr.dtype, 'buffer': buffer.dtype}) + unpackKernel = makePythonFunction(unpackCode) + unpackKernel(buffer=bufferArr, dstField=dstArr) + + maskArr = np.ma.masked_where((srcArr - dstArr) != 0, srcArr) + np.testing.assert_equal(dstArr, maskArr.filled(int(0))) + + + def test_field_layouts(self): + numCellValues = 27 + for layoutStr in ['numpy', 'fzyx', 'zyxf', 'reversenumpy']: + fields = self._generate_fields(numStencilDirections=numCellValues, layout=layoutStr) + for (srcArr, dstArr, bufferArr) in fields: + srcField = Field.createFromNumpyArray("srcField", srcArr, indexDimensions=1) + dstField = Field.createFromNumpyArray("dstField", dstArr, indexDimensions=1) + buffer = Field.createGeneric("buffer", spatialDimensions=1, indexDimensions=1, + fieldType=FieldType.BUFFER, dtype=srcArr.dtype) + + packEqs = [] + # Since we are packing all cell values for all cells, then + # the buffer index is equivalent to the field index + for idx in range(numCellValues): + eq = sp.Eq(buffer(idx), srcField(idx)) + packEqs.append(eq) + + packCode = createKernel(packEqs, typeForSymbol={'srcField': srcArr.dtype, 'buffer': buffer.dtype}) + packKernel = makePythonFunction(packCode) + packKernel(buffer=bufferArr, srcField=srcArr) + + unpackEqs = [] + + for idx in range(numCellValues): + eq = sp.Eq(dstField(idx), buffer(idx)) + unpackEqs.append(eq) + + unpackCode = createKernel(unpackEqs, typeForSymbol={'dstField': dstArr.dtype, 'buffer': buffer.dtype}) + unpackKernel = makePythonFunction(unpackCode) + unpackKernel(buffer=bufferArr, dstField=dstArr) diff --git a/pystencils_tests/test_buffer_gpu.py b/pystencils_tests/test_buffer_gpu.py new file mode 100644 index 0000000000000000000000000000000000000000..d6d9275f03a9f558ccd034f33739fd4b10c1790f --- /dev/null +++ b/pystencils_tests/test_buffer_gpu.py @@ -0,0 +1,217 @@ +import unittest +import numpy as np +import sympy as sp +from pystencils import Field, FieldType +from pystencils.field import directionStringToOffset, layoutStringToTuple, createNumpyArrayWithLayout +from pystencils.gpucuda import makePythonFunction, createCUDAKernel +from pystencils.slicing import addGhostLayers, getSliceBeforeGhostLayer, getGhostRegionSlice +import pycuda.autoinit +import pycuda.gpuarray as gpuarray +from pystencils.backends.cbackend import generateC + + +class TestBufferGPU(unittest.TestCase): + """ + Tests for the (un)packing (from)to buffers on a CUDA GPU. + """ + FIELD_SIZES = [(4, 3), (9, 3, 7)] + + def _generate_fields(self, dt=np.uint8, numStencilDirections=1, layout='numpy'): + fieldSizes = self.FIELD_SIZES + if numStencilDirections > 1: + fieldSizes = [s + (numStencilDirections,) for s in fieldSizes] + + fields = [] + for size in fieldSizes: + fieldLayout = layoutStringToTuple(layout, len(size)) + srcArr = createNumpyArrayWithLayout(size, fieldLayout).astype(dt) + + arrayData = np.reshape(np.arange(1, np.prod(size)+1), size) + # Use flat iterator to input data into the array + srcArr.flat = addGhostLayers(arrayData, indexDimensions=1 if numStencilDirections > 1 else 0).astype(dt).flat + + gpuSrcArr = gpuarray.to_gpu(srcArr) + gpuDstArr = gpuarray.zeros_like(gpuSrcArr) + gpuBufferArr = gpuarray.zeros(np.prod(srcArr.shape), dtype=dt) + + fields.append((srcArr, gpuSrcArr, gpuDstArr, gpuBufferArr)) + return fields + + def test_full_scalar_field(self): + """ + Tests fully (un)packing a scalar field (from)to a GPU buffer. + """ + fields = self._generate_fields() + for (srcArr, gpuSrcArr, gpuDstArr, gpuBufferArr) in fields: + srcField = Field.createFromNumpyArray("srcField", srcArr) + dstField = Field.createFromNumpyArray("dstField", srcArr) + buffer = Field.createGeneric("buffer", spatialDimensions=1, + fieldType=FieldType.BUFFER, dtype=srcArr.dtype) + + packEqs = [sp.Eq(buffer.center(), srcField.center())] + packTypes = {'srcField': gpuSrcArr.dtype, 'buffer': gpuBufferArr.dtype} + packCode = createCUDAKernel(packEqs, typeForSymbol=packTypes) + + packKernel = makePythonFunction(packCode) + packKernel(buffer=gpuBufferArr, srcField=gpuSrcArr) + + unpackEqs = [sp.Eq(dstField.center(), buffer.center())] + unpackTypes = {'dstField': gpuDstArr.dtype, 'buffer': gpuBufferArr.dtype} + unpackCode = createCUDAKernel(unpackEqs, typeForSymbol=unpackTypes) + + unpackKernel = makePythonFunction(unpackCode) + unpackKernel(dstField=gpuDstArr, buffer=gpuBufferArr) + + dstArr = gpuDstArr.get() + + np.testing.assert_equal(srcArr, dstArr) + + def test_field_slice(self): + """ + Tests (un)packing slices of a scalar field (from)to a buffer. + """ + fields = self._generate_fields() + for dir in ['N', 'S', 'NW', 'SW', 'TNW', 'B']: + for (srcArr, gpuSrcArr, gpuDstArr, gpuBufferArr) in fields: + # Extract slice from N direction of the field + sliceDir = directionStringToOffset(dir, dim=len(srcArr.shape)) + packSlice = getSliceBeforeGhostLayer(sliceDir) + unpackSlice = getGhostRegionSlice(sliceDir) + + srcField = Field.createFromNumpyArray("srcField", srcArr[packSlice]) + dstField = Field.createFromNumpyArray("dstField", srcArr[unpackSlice]) + buffer = Field.createGeneric("buffer", spatialDimensions=1, + fieldType=FieldType.BUFFER, dtype=srcArr.dtype) + + packEqs = [sp.Eq(buffer.center(), srcField.center())] + packTypes = {'srcField': gpuSrcArr.dtype, 'buffer': gpuBufferArr.dtype} + packCode = createCUDAKernel(packEqs, typeForSymbol=packTypes) + + packKernel = makePythonFunction(packCode) + packKernel(buffer=gpuBufferArr, srcField=gpuSrcArr[packSlice]) + + # Unpack into ghost layer of dstField in N direction + unpackEqs = [sp.Eq(dstField.center(), buffer.center())] + unpackTypes = {'dstField': gpuDstArr.dtype, 'buffer': gpuBufferArr.dtype} + unpackCode = createCUDAKernel(unpackEqs, typeForSymbol=unpackTypes) + + unpackKernel = makePythonFunction(unpackCode) + unpackKernel(buffer=gpuBufferArr, dstField=gpuDstArr[unpackSlice]) + + dstArr = gpuDstArr.get() + + np.testing.assert_equal(srcArr[packSlice], dstArr[unpackSlice]) + + def test_all_cell_values(self): + """ + Tests (un)packing all cell values of the a field (from)to a buffer. + """ + numCellValues=7 + fields = self._generate_fields(numStencilDirections=numCellValues) + for (srcArr, gpuSrcArr, gpuDstArr, gpuBufferArr) in fields: + srcField = Field.createFromNumpyArray("srcField", gpuSrcArr, indexDimensions=1) + dstField = Field.createFromNumpyArray("dstField", gpuSrcArr, indexDimensions=1) + buffer = Field.createGeneric("buffer", spatialDimensions=1, indexDimensions=1, + fieldType=FieldType.BUFFER, dtype=gpuSrcArr.dtype) + + packEqs = [] + # Since we are packing all cell values for all cells, then + # the buffer index is equivalent to the field index + for idx in range(numCellValues): + eq = sp.Eq(buffer(idx), srcField(idx)) + packEqs.append(eq) + + packTypes = {'srcField': gpuSrcArr.dtype, 'buffer': gpuBufferArr.dtype} + packCode = createCUDAKernel(packEqs, typeForSymbol=packTypes) + packKernel = makePythonFunction(packCode) + packKernel(buffer=gpuBufferArr, srcField=gpuSrcArr) + + unpackEqs = [] + + for idx in range(numCellValues): + eq = sp.Eq(dstField(idx), buffer(idx)) + unpackEqs.append(eq) + + unpackTypes = {'dstField': gpuDstArr.dtype, 'buffer': gpuBufferArr.dtype} + unpackCode = createCUDAKernel(unpackEqs, typeForSymbol=unpackTypes) + unpackKernel = makePythonFunction(unpackCode) + unpackKernel(buffer=gpuBufferArr, dstField=gpuDstArr) + + dstArr = gpuDstArr.get() + + np.testing.assert_equal(srcArr, dstArr) + + def test_subset_cell_values(self): + """ + Tests (un)packing a subset of cell values of the a field (from)to a buffer. + """ + numCellValues = 7 + # Cell indices of the field to be (un)packed (from)to the buffer + cellIndices = [1, 3, 5, 6] + fields = self._generate_fields(numStencilDirections=numCellValues) + for (srcArr, gpuSrcArr, gpuDstArr, gpuBufferArr) in fields: + srcField = Field.createFromNumpyArray("srcField", gpuSrcArr, indexDimensions=1) + dstField = Field.createFromNumpyArray("dstField", gpuSrcArr, indexDimensions=1) + buffer = Field.createGeneric("buffer", spatialDimensions=1, indexDimensions=1, + fieldType=FieldType.BUFFER, dtype=gpuSrcArr.dtype) + + packEqs = [] + # Since we are packing all cell values for all cells, then + # the buffer index is equivalent to the field index + for bufferIdx, cellIdx in enumerate(cellIndices): + eq = sp.Eq(buffer(bufferIdx), srcField(cellIdx)) + packEqs.append(eq) + + packTypes = {'srcField': gpuSrcArr.dtype, 'buffer': gpuBufferArr.dtype} + packCode = createCUDAKernel(packEqs, typeForSymbol=packTypes) + packKernel = makePythonFunction(packCode) + packKernel(buffer=gpuBufferArr, srcField=gpuSrcArr) + + unpackEqs = [] + + for bufferIdx, cellIdx in enumerate(cellIndices): + eq = sp.Eq(dstField(cellIdx), buffer(bufferIdx)) + unpackEqs.append(eq) + + unpackTypes = {'dstField': gpuDstArr.dtype, 'buffer': gpuBufferArr.dtype} + unpackCode = createCUDAKernel(unpackEqs, typeForSymbol=unpackTypes) + unpackKernel = makePythonFunction(unpackCode) + unpackKernel(buffer=gpuBufferArr, dstField=gpuDstArr) + + dstArr = gpuDstArr.get() + + maskArr = np.ma.masked_where((srcArr - dstArr) != 0, srcArr) + np.testing.assert_equal(dstArr, maskArr.filled(int(0))) + + def test_field_layouts(self): + numCellValues = 7 + for layoutStr in ['numpy', 'fzyx', 'zyxf', 'reversenumpy']: + fields = self._generate_fields(numStencilDirections=numCellValues, layout=layoutStr) + for (srcArr, gpuSrcArr, gpuDstArr, gpuBufferArr) in fields: + srcField = Field.createFromNumpyArray("srcField", gpuSrcArr, indexDimensions=1) + dstField = Field.createFromNumpyArray("dstField", gpuSrcArr, indexDimensions=1) + buffer = Field.createGeneric("buffer", spatialDimensions=1, indexDimensions=1, + fieldType=FieldType.BUFFER, dtype=srcArr.dtype) + + packEqs = [] + # Since we are packing all cell values for all cells, then + # the buffer index is equivalent to the field index + for idx in range(numCellValues): + eq = sp.Eq(buffer(idx), srcField(idx)) + packEqs.append(eq) + + packTypes = {'srcField': gpuSrcArr.dtype, 'buffer': gpuBufferArr.dtype} + packCode = createCUDAKernel(packEqs, typeForSymbol=packTypes) + packKernel = makePythonFunction(packCode) + packKernel(buffer=gpuBufferArr, srcField=gpuSrcArr) + + unpackEqs = [] + + for idx in range(numCellValues): + eq = sp.Eq(dstField(idx), buffer(idx)) + unpackEqs.append(eq) + + unpackTypes = {'dstField': gpuDstArr.dtype, 'buffer': gpuBufferArr.dtype} + unpackCode = createCUDAKernel(unpackEqs, typeForSymbol=unpackTypes) + unpackKernel = makePythonFunction(unpackCode) + unpackKernel(buffer=gpuBufferArr, dstField=gpuDstArr) \ No newline at end of file diff --git a/pystencils_tests/test_jacobi_cbackend.py b/pystencils_tests/test_jacobi_cbackend.py index 8e08a83ee2371dbf49cf4c515fcbcfcc0e59a72a..dfe45ac5e28d95f56766058481eec059a52a66b6 100644 --- a/pystencils_tests/test_jacobi_cbackend.py +++ b/pystencils_tests/test_jacobi_cbackend.py @@ -22,7 +22,7 @@ class TestGeneratorModule(unittest.TestCase): jacobi = SympyAssignment(d[0, 0], (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4) body = Block([jacobi]) - code = makeLoopOverDomain(body, "kernel") + code, _, _ = makeLoopOverDomain(body, "kernel") resolveFieldAccesses(code) substituteArrayAccessesWithConstants(code) moveConstantsBeforeLoop(code) @@ -43,7 +43,7 @@ class TestGeneratorModule(unittest.TestCase): d = Field.createGeneric("d", 3) jacobi = SympyAssignment(d[0, 0, 0], (f[1, 0, 0] + f[-1, 0, 0] + f[0, 1, 0] + f[0, -1, 0]) / 4) body = Block([jacobi]) - code = makeLoopOverDomain(body, "kernel") + code, _, _ = makeLoopOverDomain(body, "kernel") resolveFieldAccesses(code) substituteArrayAccessesWithConstants(code) moveConstantsBeforeLoop(code) diff --git a/pystencils_walberla/jinja_filters.py b/pystencils_walberla/jinja_filters.py index 142bd4cb85b4ad509910bee26e489904afbe95ee..e24c79300c5bc87f5f21d78572345028c10cb56c 100644 --- a/pystencils_walberla/jinja_filters.py +++ b/pystencils_walberla/jinja_filters.py @@ -5,6 +5,7 @@ from pystencils.astnodes import ResolvedFieldAccess from pystencils.backends.cbackend import CustomSympyPrinter from pystencils.data_types import getBaseType from pystencils.cpu import generateC +from pystencils.field import FieldType temporaryFieldTemplate = """ @@ -167,7 +168,7 @@ def generateCall(ctx, kernelInfo, ghostLayersToInclude=0): spatialShapeSymbols = [] for param in ast.parameters: - if param.isFieldArgument and fields[param.fieldName].isIndexField: + if param.isFieldArgument and FieldType.isIndexed(fields[param.fieldName]): continue if param.isFieldPtrArgument: diff --git a/setup_lbmpy_walberla.py b/setup_lbmpy_walberla.py index 4528945ecd155a1903941462b276f360b4ee9061..0f6649f0643640c65ee17d3054045403acb3a209 100644 --- a/setup_lbmpy_walberla.py +++ b/setup_lbmpy_walberla.py @@ -10,5 +10,5 @@ setup(name='lbmpy_walberla', packages=['lbmpy_walberla'] + ['lbmpy_walberla.' + s for s in find_packages('lbmpy_walberla')], install_requires=['lbmpy'], package_dir={'lbmpy_walberla': 'lbmpy_walberla'}, - package_data={ 'lbmpy_walberla': ['templates/*'] }, + package_data={'lbmpy_walberla': ['templates/*']}, )