Skip to content
Snippets Groups Projects
Commit e48ecca9 authored by Martin Bauer's avatar Martin Bauer
Browse files

Caching for jitted cpu and gpu kernels (big speedup for small work sizes)

parent 3f5a9f52
No related branches found
No related tags found
No related merge requests found
......@@ -3,6 +3,7 @@ Factory functions for standard LBM methods
"""
import sympy as sp
from copy import copy
from functools import partial
from lbmpy.methods.creationfunctions import createKBCTypeTRT, createRawMRT, createThreeRelaxationRateMRT
from lbmpy.methods.entropic import addIterativeEntropyCondition, addEntropyCondition
......@@ -13,7 +14,7 @@ from lbmpy.simplificationfactory import createSimplificationStrategy
from lbmpy.updatekernels import createStreamPullKernel, createPdfArray
def _getParams(params, optParams):
def updateWithDefaultParameters(params, optParams):
defaultMethodDescription = {
'stencil': 'D2Q9',
'method': 'srt', # can be srt, trt or mrt
......@@ -43,6 +44,10 @@ def _getParams(params, optParams):
'target': 'cpu',
'openMP': True,
'pdfArr': None,
'doublePrecision': True,
'gpuIndexing': 'block',
'gpuIndexingParams': {},
}
unknownParams = [k for k in params.keys() if k not in defaultMethodDescription]
unknownOptParams = [k for k in optParams.keys() if k not in defaultOptimizationDescription]
......@@ -59,7 +64,7 @@ def _getParams(params, optParams):
def createLatticeBoltzmannFunction(ast=None, optimizationParams={}, **kwargs):
params, optParams = _getParams(kwargs, optimizationParams)
params, optParams = updateWithDefaultParameters(kwargs, optimizationParams)
if ast is None:
params['optimizationParams'] = optParams
......@@ -86,7 +91,7 @@ def createLatticeBoltzmannFunction(ast=None, optimizationParams={}, **kwargs):
def createLatticeBoltzmannAst(updateRule=None, optimizationParams={}, **kwargs):
params, optParams = _getParams(kwargs, optimizationParams)
params, optParams = updateWithDefaultParameters(kwargs, optimizationParams)
if updateRule is None:
params['optimizationParams'] = optimizationParams
......@@ -95,14 +100,21 @@ def createLatticeBoltzmannAst(updateRule=None, optimizationParams={}, **kwargs):
if optParams['target'] == 'cpu':
from pystencils.cpu import createKernel
if 'splitGroups' in updateRule.simplificationHints:
print("splitting!")
splitGroups = updateRule.simplificationHints['splitGroups']
else:
splitGroups = ()
res = createKernel(updateRule.allEquations, splitGroups=splitGroups)
res = createKernel(updateRule.allEquations, splitGroups=splitGroups,
typeForSymbol='double' if optParams['doublePrecision'] else 'float')
elif optParams['target'] == 'gpu':
from pystencils.gpucuda import createCUDAKernel
res = createCUDAKernel(updateRule.allEquations)
from pystencils.gpucuda.indexing import LineIndexing, BlockIndexing
assert optParams['gpuIndexing'] in ('line', 'block')
indexingCreator = LineIndexing if optParams['gpuIndexing'] == 'line' else BlockIndexing
if optParams['gpuIndexingParams']:
indexingCreator = partial(indexingCreator, **optParams['gpuIndexingParams'])
res = createCUDAKernel(updateRule.allEquations,
typeForSymbol='double' if optParams['doublePrecision'] else 'float',
indexingCreator=indexingCreator)
else:
return ValueError("'target' has to be either 'cpu' or 'gpu'")
......@@ -112,7 +124,7 @@ def createLatticeBoltzmannAst(updateRule=None, optimizationParams={}, **kwargs):
def createLatticeBoltzmannUpdateRule(lbMethod=None, optimizationParams={}, **kwargs):
params, optParams = _getParams(kwargs, optimizationParams)
params, optParams = updateWithDefaultParameters(kwargs, optimizationParams)
stencil = getStencil(params['stencil'])
if lbMethod is None:
......@@ -154,7 +166,7 @@ def createLatticeBoltzmannUpdateRule(lbMethod=None, optimizationParams={}, **kwa
def createLatticeBoltzmannMethod(**params):
params, _ = _getParams(params, {})
params, _ = updateWithDefaultParameters(params, {})
stencil = getStencil(params['stencil'])
dim = len(stencil[0])
......
from copy import deepcopy
from pystencils.field import Field, getLayoutFromNumpyArray
from pystencils.field import Field, getLayoutOfArray
from lbmpy.simplificationfactory import createSimplificationStrategy
......@@ -42,9 +42,9 @@ def compileMacroscopicValuesGetter(lbMethod, outputQuantities, pdfArr=None, fiel
outputFieldShape = pdfArr.shape[:-1]
if indDims > 0:
outputFieldShape += (numberOfElements,)
fieldLayout = getLayoutFromNumpyArray(pdfArr)
fieldLayout = getLayoutOfArray(pdfArr)
else:
fieldLayout = getLayoutFromNumpyArray(pdfArr, indexDimensionIds=[len(pdfField.shape) - 1])
fieldLayout = getLayoutOfArray(pdfArr, indexDimensionIds=[len(pdfField.shape) - 1])
outputField = Field.createFixedSize(outputQuantity, outputFieldShape, indDims, pdfArr.dtype, fieldLayout)
outputMapping[outputQuantity] = [outputField(i) for i in range(numberOfElements)]
......
......@@ -12,6 +12,7 @@ class LbmCollisionRule(EquationCollection):
super(LbmCollisionRule, self).__init__(*args, **kwargs)
self.method = lbmMethod
class AbstractLbMethod(abc.ABCMeta('ABC', (object,), {})):
"""
Abstract base class for all LBM methods
......
from functools import partial
import numpy as np
from pystencils import Field
from pystencils.field import getLayoutOfArray, createNumpyArrayWithLayout
from pystencils.slicing import sliceFromDirection, addGhostLayers, getPeriodicBoundaryFunctor
from lbmpy.creationfunctions import createLatticeBoltzmannFunction
from lbmpy.creationfunctions import createLatticeBoltzmannFunction, updateWithDefaultParameters
from lbmpy.macroscopic_value_kernels import compileMacroscopicValuesGetter, compileMacroscopicValuesSetter
from lbmpy.boundaries import BoundaryHandling, noSlip, ubb, fixedDensity
from lbmpy.stencils import getStencil
from lbmpy.updatekernels import createPdfArray
def createScenario(domainSize, boundarySetupFunction, methodParameters, optimizationParams, lbmKernel=None,
initialVelocity=None, preUpdateFunctions=[], kernelParams={}):
if 'target' not in optimizationParams:
optimizationParams['target'] = 'cpu'
ghostLayers = 1
domainSizeWithGhostLayer = tuple([s + 2 * ghostLayers for s in domainSize])
D = len(domainSize)
if 'stencil' not in methodParameters:
methodParameters['stencil'] = 'D2Q9' if D == 2 else 'D3Q27'
methodParameters, optimizationParams = updateWithDefaultParameters(methodParameters, optimizationParams)
Q = len(getStencil(methodParameters['stencil']))
pdfArrays = [np.zeros(domainSizeWithGhostLayer + (Q,)),
np.zeros(domainSizeWithGhostLayer + (Q,))]
pdfArrays = [createPdfArray(domainSize, Q, layout=optimizationParams['fieldLayout']),
createPdfArray(domainSize, Q, layout=optimizationParams['fieldLayout'])]
# Create kernel
if lbmKernel is None:
......@@ -43,8 +43,10 @@ def createScenario(domainSize, boundarySetupFunction, methodParameters, optimiza
boundaryHandling = None
# Macroscopic value input/output
densityArr = [np.zeros(domainSizeWithGhostLayer)]
velocityArr = [np.zeros(domainSizeWithGhostLayer + (D,))]
pdfArrLayout = getLayoutOfArray(pdfArrays[0])
pdfArrLayoutNoIdx = getLayoutOfArray(pdfArrays[0], indexDimensionIds=[D])
densityArr = [createNumpyArrayWithLayout(domainSizeWithGhostLayer, layout=pdfArrLayoutNoIdx)]
velocityArr = [createNumpyArrayWithLayout(list(domainSizeWithGhostLayer) + [D], layout=pdfArrLayout)]
getMacroscopic = compileMacroscopicValuesGetter(method, ['density', 'velocity'], pdfArr=pdfArrays[0], target='cpu')
if initialVelocity is None:
......
import numpy as np
import sympy as sp
from pystencils import Field
from pystencils.field import createNumpyArrayWithLayout
from pystencils.sympyextensions import fastSubs
from lbmpy.fieldaccess import StreamPullTwoFieldsAccessor
......@@ -73,6 +74,7 @@ def createStreamPullKernel(collisionRule, numpyField=None, srcFieldName="src", d
# ---------------------------------- Pdf array creation for various layouts --------------------------------------------
def createPdfArray(size, numDirections, ghostLayers=1, layout='fzyx'):
"""
Creates an empty numpy array for a pdf field with the specified memory layout.
......@@ -88,17 +90,14 @@ def createPdfArray(size, numDirections, ghostLayers=1, layout='fzyx'):
(72, 360, 8)
"""
sizeWithGl = [s + 2 * ghostLayers for s in size]
dim = len(size)
if layout == "fzyx" or layout == 'f' or layout == 'reverseNumpy':
return np.empty(sizeWithGl + [numDirections], order='f')
layout = tuple(reversed(range(dim+1)))
elif layout == 'c' or layout == 'numpy':
return np.empty(sizeWithGl + [numDirections], order='c')
layout = tuple(range(dim+1))
elif layout == 'zyxf':
res = np.empty(list(reversed(sizeWithGl)) + [numDirections], order='c')
res = res.swapaxes(0, 1)
if len(size) == 3:
res = res.swapaxes(1, 2)
res = res.swapaxes(0, 1)
return res
layout = tuple(reversed(range(dim))) + (dim,)
return createNumpyArrayWithLayout(sizeWithGl + [numDirections], layout)
# ------------------------------------------- Add output fields to kernel ----------------------------------------------
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment