Skip to content
Snippets Groups Projects
Commit e48ecca9 authored by Martin Bauer's avatar Martin Bauer
Browse files

Caching for jitted cpu and gpu kernels (big speedup for small work sizes)

parent 3f5a9f52
Branches
Tags
No related merge requests found
...@@ -3,6 +3,7 @@ Factory functions for standard LBM methods ...@@ -3,6 +3,7 @@ Factory functions for standard LBM methods
""" """
import sympy as sp import sympy as sp
from copy import copy from copy import copy
from functools import partial
from lbmpy.methods.creationfunctions import createKBCTypeTRT, createRawMRT, createThreeRelaxationRateMRT from lbmpy.methods.creationfunctions import createKBCTypeTRT, createRawMRT, createThreeRelaxationRateMRT
from lbmpy.methods.entropic import addIterativeEntropyCondition, addEntropyCondition from lbmpy.methods.entropic import addIterativeEntropyCondition, addEntropyCondition
...@@ -13,7 +14,7 @@ from lbmpy.simplificationfactory import createSimplificationStrategy ...@@ -13,7 +14,7 @@ from lbmpy.simplificationfactory import createSimplificationStrategy
from lbmpy.updatekernels import createStreamPullKernel, createPdfArray from lbmpy.updatekernels import createStreamPullKernel, createPdfArray
def _getParams(params, optParams): def updateWithDefaultParameters(params, optParams):
defaultMethodDescription = { defaultMethodDescription = {
'stencil': 'D2Q9', 'stencil': 'D2Q9',
'method': 'srt', # can be srt, trt or mrt 'method': 'srt', # can be srt, trt or mrt
...@@ -43,6 +44,10 @@ def _getParams(params, optParams): ...@@ -43,6 +44,10 @@ def _getParams(params, optParams):
'target': 'cpu', 'target': 'cpu',
'openMP': True, 'openMP': True,
'pdfArr': None, 'pdfArr': None,
'doublePrecision': True,
'gpuIndexing': 'block',
'gpuIndexingParams': {},
} }
unknownParams = [k for k in params.keys() if k not in defaultMethodDescription] unknownParams = [k for k in params.keys() if k not in defaultMethodDescription]
unknownOptParams = [k for k in optParams.keys() if k not in defaultOptimizationDescription] unknownOptParams = [k for k in optParams.keys() if k not in defaultOptimizationDescription]
...@@ -59,7 +64,7 @@ def _getParams(params, optParams): ...@@ -59,7 +64,7 @@ def _getParams(params, optParams):
def createLatticeBoltzmannFunction(ast=None, optimizationParams={}, **kwargs): def createLatticeBoltzmannFunction(ast=None, optimizationParams={}, **kwargs):
params, optParams = _getParams(kwargs, optimizationParams) params, optParams = updateWithDefaultParameters(kwargs, optimizationParams)
if ast is None: if ast is None:
params['optimizationParams'] = optParams params['optimizationParams'] = optParams
...@@ -86,7 +91,7 @@ def createLatticeBoltzmannFunction(ast=None, optimizationParams={}, **kwargs): ...@@ -86,7 +91,7 @@ def createLatticeBoltzmannFunction(ast=None, optimizationParams={}, **kwargs):
def createLatticeBoltzmannAst(updateRule=None, optimizationParams={}, **kwargs): def createLatticeBoltzmannAst(updateRule=None, optimizationParams={}, **kwargs):
params, optParams = _getParams(kwargs, optimizationParams) params, optParams = updateWithDefaultParameters(kwargs, optimizationParams)
if updateRule is None: if updateRule is None:
params['optimizationParams'] = optimizationParams params['optimizationParams'] = optimizationParams
...@@ -95,14 +100,21 @@ def createLatticeBoltzmannAst(updateRule=None, optimizationParams={}, **kwargs): ...@@ -95,14 +100,21 @@ def createLatticeBoltzmannAst(updateRule=None, optimizationParams={}, **kwargs):
if optParams['target'] == 'cpu': if optParams['target'] == 'cpu':
from pystencils.cpu import createKernel from pystencils.cpu import createKernel
if 'splitGroups' in updateRule.simplificationHints: if 'splitGroups' in updateRule.simplificationHints:
print("splitting!")
splitGroups = updateRule.simplificationHints['splitGroups'] splitGroups = updateRule.simplificationHints['splitGroups']
else: else:
splitGroups = () splitGroups = ()
res = createKernel(updateRule.allEquations, splitGroups=splitGroups) res = createKernel(updateRule.allEquations, splitGroups=splitGroups,
typeForSymbol='double' if optParams['doublePrecision'] else 'float')
elif optParams['target'] == 'gpu': elif optParams['target'] == 'gpu':
from pystencils.gpucuda import createCUDAKernel from pystencils.gpucuda import createCUDAKernel
res = createCUDAKernel(updateRule.allEquations) from pystencils.gpucuda.indexing import LineIndexing, BlockIndexing
assert optParams['gpuIndexing'] in ('line', 'block')
indexingCreator = LineIndexing if optParams['gpuIndexing'] == 'line' else BlockIndexing
if optParams['gpuIndexingParams']:
indexingCreator = partial(indexingCreator, **optParams['gpuIndexingParams'])
res = createCUDAKernel(updateRule.allEquations,
typeForSymbol='double' if optParams['doublePrecision'] else 'float',
indexingCreator=indexingCreator)
else: else:
return ValueError("'target' has to be either 'cpu' or 'gpu'") return ValueError("'target' has to be either 'cpu' or 'gpu'")
...@@ -112,7 +124,7 @@ def createLatticeBoltzmannAst(updateRule=None, optimizationParams={}, **kwargs): ...@@ -112,7 +124,7 @@ def createLatticeBoltzmannAst(updateRule=None, optimizationParams={}, **kwargs):
def createLatticeBoltzmannUpdateRule(lbMethod=None, optimizationParams={}, **kwargs): def createLatticeBoltzmannUpdateRule(lbMethod=None, optimizationParams={}, **kwargs):
params, optParams = _getParams(kwargs, optimizationParams) params, optParams = updateWithDefaultParameters(kwargs, optimizationParams)
stencil = getStencil(params['stencil']) stencil = getStencil(params['stencil'])
if lbMethod is None: if lbMethod is None:
...@@ -154,7 +166,7 @@ def createLatticeBoltzmannUpdateRule(lbMethod=None, optimizationParams={}, **kwa ...@@ -154,7 +166,7 @@ def createLatticeBoltzmannUpdateRule(lbMethod=None, optimizationParams={}, **kwa
def createLatticeBoltzmannMethod(**params): def createLatticeBoltzmannMethod(**params):
params, _ = _getParams(params, {}) params, _ = updateWithDefaultParameters(params, {})
stencil = getStencil(params['stencil']) stencil = getStencil(params['stencil'])
dim = len(stencil[0]) dim = len(stencil[0])
......
from copy import deepcopy from copy import deepcopy
from pystencils.field import Field, getLayoutFromNumpyArray from pystencils.field import Field, getLayoutOfArray
from lbmpy.simplificationfactory import createSimplificationStrategy from lbmpy.simplificationfactory import createSimplificationStrategy
...@@ -42,9 +42,9 @@ def compileMacroscopicValuesGetter(lbMethod, outputQuantities, pdfArr=None, fiel ...@@ -42,9 +42,9 @@ def compileMacroscopicValuesGetter(lbMethod, outputQuantities, pdfArr=None, fiel
outputFieldShape = pdfArr.shape[:-1] outputFieldShape = pdfArr.shape[:-1]
if indDims > 0: if indDims > 0:
outputFieldShape += (numberOfElements,) outputFieldShape += (numberOfElements,)
fieldLayout = getLayoutFromNumpyArray(pdfArr) fieldLayout = getLayoutOfArray(pdfArr)
else: else:
fieldLayout = getLayoutFromNumpyArray(pdfArr, indexDimensionIds=[len(pdfField.shape) - 1]) fieldLayout = getLayoutOfArray(pdfArr, indexDimensionIds=[len(pdfField.shape) - 1])
outputField = Field.createFixedSize(outputQuantity, outputFieldShape, indDims, pdfArr.dtype, fieldLayout) outputField = Field.createFixedSize(outputQuantity, outputFieldShape, indDims, pdfArr.dtype, fieldLayout)
outputMapping[outputQuantity] = [outputField(i) for i in range(numberOfElements)] outputMapping[outputQuantity] = [outputField(i) for i in range(numberOfElements)]
......
...@@ -12,6 +12,7 @@ class LbmCollisionRule(EquationCollection): ...@@ -12,6 +12,7 @@ class LbmCollisionRule(EquationCollection):
super(LbmCollisionRule, self).__init__(*args, **kwargs) super(LbmCollisionRule, self).__init__(*args, **kwargs)
self.method = lbmMethod self.method = lbmMethod
class AbstractLbMethod(abc.ABCMeta('ABC', (object,), {})): class AbstractLbMethod(abc.ABCMeta('ABC', (object,), {})):
""" """
Abstract base class for all LBM methods Abstract base class for all LBM methods
......
from functools import partial from functools import partial
import numpy as np import numpy as np
from pystencils import Field from pystencils import Field
from pystencils.field import getLayoutOfArray, createNumpyArrayWithLayout
from pystencils.slicing import sliceFromDirection, addGhostLayers, getPeriodicBoundaryFunctor from pystencils.slicing import sliceFromDirection, addGhostLayers, getPeriodicBoundaryFunctor
from lbmpy.creationfunctions import createLatticeBoltzmannFunction from lbmpy.creationfunctions import createLatticeBoltzmannFunction, updateWithDefaultParameters
from lbmpy.macroscopic_value_kernels import compileMacroscopicValuesGetter, compileMacroscopicValuesSetter from lbmpy.macroscopic_value_kernels import compileMacroscopicValuesGetter, compileMacroscopicValuesSetter
from lbmpy.boundaries import BoundaryHandling, noSlip, ubb, fixedDensity from lbmpy.boundaries import BoundaryHandling, noSlip, ubb, fixedDensity
from lbmpy.stencils import getStencil from lbmpy.stencils import getStencil
from lbmpy.updatekernels import createPdfArray
def createScenario(domainSize, boundarySetupFunction, methodParameters, optimizationParams, lbmKernel=None, def createScenario(domainSize, boundarySetupFunction, methodParameters, optimizationParams, lbmKernel=None,
initialVelocity=None, preUpdateFunctions=[], kernelParams={}): initialVelocity=None, preUpdateFunctions=[], kernelParams={}):
if 'target' not in optimizationParams:
optimizationParams['target'] = 'cpu'
ghostLayers = 1 ghostLayers = 1
domainSizeWithGhostLayer = tuple([s + 2 * ghostLayers for s in domainSize]) domainSizeWithGhostLayer = tuple([s + 2 * ghostLayers for s in domainSize])
D = len(domainSize) D = len(domainSize)
if 'stencil' not in methodParameters: if 'stencil' not in methodParameters:
methodParameters['stencil'] = 'D2Q9' if D == 2 else 'D3Q27' methodParameters['stencil'] = 'D2Q9' if D == 2 else 'D3Q27'
methodParameters, optimizationParams = updateWithDefaultParameters(methodParameters, optimizationParams)
Q = len(getStencil(methodParameters['stencil'])) Q = len(getStencil(methodParameters['stencil']))
pdfArrays = [np.zeros(domainSizeWithGhostLayer + (Q,)), pdfArrays = [createPdfArray(domainSize, Q, layout=optimizationParams['fieldLayout']),
np.zeros(domainSizeWithGhostLayer + (Q,))] createPdfArray(domainSize, Q, layout=optimizationParams['fieldLayout'])]
# Create kernel # Create kernel
if lbmKernel is None: if lbmKernel is None:
...@@ -43,8 +43,10 @@ def createScenario(domainSize, boundarySetupFunction, methodParameters, optimiza ...@@ -43,8 +43,10 @@ def createScenario(domainSize, boundarySetupFunction, methodParameters, optimiza
boundaryHandling = None boundaryHandling = None
# Macroscopic value input/output # Macroscopic value input/output
densityArr = [np.zeros(domainSizeWithGhostLayer)] pdfArrLayout = getLayoutOfArray(pdfArrays[0])
velocityArr = [np.zeros(domainSizeWithGhostLayer + (D,))] pdfArrLayoutNoIdx = getLayoutOfArray(pdfArrays[0], indexDimensionIds=[D])
densityArr = [createNumpyArrayWithLayout(domainSizeWithGhostLayer, layout=pdfArrLayoutNoIdx)]
velocityArr = [createNumpyArrayWithLayout(list(domainSizeWithGhostLayer) + [D], layout=pdfArrLayout)]
getMacroscopic = compileMacroscopicValuesGetter(method, ['density', 'velocity'], pdfArr=pdfArrays[0], target='cpu') getMacroscopic = compileMacroscopicValuesGetter(method, ['density', 'velocity'], pdfArr=pdfArrays[0], target='cpu')
if initialVelocity is None: if initialVelocity is None:
......
import numpy as np import numpy as np
import sympy as sp import sympy as sp
from pystencils import Field from pystencils import Field
from pystencils.field import createNumpyArrayWithLayout
from pystencils.sympyextensions import fastSubs from pystencils.sympyextensions import fastSubs
from lbmpy.fieldaccess import StreamPullTwoFieldsAccessor from lbmpy.fieldaccess import StreamPullTwoFieldsAccessor
...@@ -73,6 +74,7 @@ def createStreamPullKernel(collisionRule, numpyField=None, srcFieldName="src", d ...@@ -73,6 +74,7 @@ def createStreamPullKernel(collisionRule, numpyField=None, srcFieldName="src", d
# ---------------------------------- Pdf array creation for various layouts -------------------------------------------- # ---------------------------------- Pdf array creation for various layouts --------------------------------------------
def createPdfArray(size, numDirections, ghostLayers=1, layout='fzyx'): def createPdfArray(size, numDirections, ghostLayers=1, layout='fzyx'):
""" """
Creates an empty numpy array for a pdf field with the specified memory layout. Creates an empty numpy array for a pdf field with the specified memory layout.
...@@ -88,17 +90,14 @@ def createPdfArray(size, numDirections, ghostLayers=1, layout='fzyx'): ...@@ -88,17 +90,14 @@ def createPdfArray(size, numDirections, ghostLayers=1, layout='fzyx'):
(72, 360, 8) (72, 360, 8)
""" """
sizeWithGl = [s + 2 * ghostLayers for s in size] sizeWithGl = [s + 2 * ghostLayers for s in size]
dim = len(size)
if layout == "fzyx" or layout == 'f' or layout == 'reverseNumpy': if layout == "fzyx" or layout == 'f' or layout == 'reverseNumpy':
return np.empty(sizeWithGl + [numDirections], order='f') layout = tuple(reversed(range(dim+1)))
elif layout == 'c' or layout == 'numpy': elif layout == 'c' or layout == 'numpy':
return np.empty(sizeWithGl + [numDirections], order='c') layout = tuple(range(dim+1))
elif layout == 'zyxf': elif layout == 'zyxf':
res = np.empty(list(reversed(sizeWithGl)) + [numDirections], order='c') layout = tuple(reversed(range(dim))) + (dim,)
res = res.swapaxes(0, 1) return createNumpyArrayWithLayout(sizeWithGl + [numDirections], layout)
if len(size) == 3:
res = res.swapaxes(1, 2)
res = res.swapaxes(0, 1)
return res
# ------------------------------------------- Add output fields to kernel ---------------------------------------------- # ------------------------------------------- Add output fields to kernel ----------------------------------------------
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment