Skip to content
Snippets Groups Projects
Commit f50516b6 authored by Martin Bauer's avatar Martin Bauer
Browse files

lbmpy: cuda & square channel scenario

parent 677c42f9
Branches
Tags
No related merge requests found
...@@ -127,9 +127,9 @@ class LbmMethodInfo(CustomCppCode): ...@@ -127,9 +127,9 @@ class LbmMethodInfo(CustomCppCode):
inverseDir = tuple([-i for i in direction]) inverseDir = tuple([-i for i in direction])
invDirs.append(str(stencil.index(inverseDir))) invDirs.append(str(stencil.index(inverseDir)))
code += "static const int %s [] = { %s };\n" % (INV_DIR_SYMBOL.name, ", ".join(invDirs)) code += "const int %s [] = { %s };\n" % (INV_DIR_SYMBOL.name, ", ".join(invDirs))
weights = [str(w.evalf()) for w in lbMethod.weights] weights = [str(w.evalf()) for w in lbMethod.weights]
code += "static const double %s [] = { %s };\n" % (WEIGHTS_SYMBOL.name, ",".join(weights)) code += "const double %s [] = { %s };\n" % (WEIGHTS_SYMBOL.name, ",".join(weights))
super(LbmMethodInfo, self).__init__(code, symbolsRead=set(), symbolsDefined=symbolsDefined) super(LbmMethodInfo, self).__init__(code, symbolsRead=set(), symbolsDefined=symbolsDefined)
......
...@@ -12,7 +12,6 @@ from lbmpy.updatekernels import createStreamPullKernel, createPdfArray ...@@ -12,7 +12,6 @@ from lbmpy.updatekernels import createStreamPullKernel, createPdfArray
def _getParams(params, optParams): def _getParams(params, optParams):
defaultMethodDescription = { defaultMethodDescription = {
'target': 'cpu',
'stencil': 'D2Q9', 'stencil': 'D2Q9',
'method': 'srt', # can be srt, trt or mrt 'method': 'srt', # can be srt, trt or mrt
'relaxationRates': sp.symbols("omega_:10"), 'relaxationRates': sp.symbols("omega_:10"),
...@@ -34,6 +33,7 @@ def _getParams(params, optParams): ...@@ -34,6 +33,7 @@ def _getParams(params, optParams):
'fieldSize': None, 'fieldSize': None,
'fieldLayout': 'reverseNumpy', # can be 'numpy' (='c'), 'reverseNumpy' (='f'), 'fzyx', 'zyxf' 'fieldLayout': 'reverseNumpy', # can be 'numpy' (='c'), 'reverseNumpy' (='f'), 'fzyx', 'zyxf'
'target': 'cpu',
'openMP': True, 'openMP': True,
'pdfArr': None, 'pdfArr': None,
} }
...@@ -58,7 +58,7 @@ def createLatticeBoltzmannFunction(ast=None, optimizationParams={}, **kwargs): ...@@ -58,7 +58,7 @@ def createLatticeBoltzmannFunction(ast=None, optimizationParams={}, **kwargs):
params['optimizationParams'] = optParams params['optimizationParams'] = optParams
ast = createLatticeBoltzmannAst(**params) ast = createLatticeBoltzmannAst(**params)
if params['target'] == 'cpu': if optParams['target'] == 'cpu':
from pystencils.cpu import makePythonFunction as makePythonCpuFunction, addOpenMP from pystencils.cpu import makePythonFunction as makePythonCpuFunction, addOpenMP
if 'openMP' in optParams: if 'openMP' in optParams:
if isinstance(optParams['openMP'], bool) and optParams['openMP']: if isinstance(optParams['openMP'], bool) and optParams['openMP']:
...@@ -66,7 +66,7 @@ def createLatticeBoltzmannFunction(ast=None, optimizationParams={}, **kwargs): ...@@ -66,7 +66,7 @@ def createLatticeBoltzmannFunction(ast=None, optimizationParams={}, **kwargs):
elif isinstance(optParams['openMP'], int): elif isinstance(optParams['openMP'], int):
addOpenMP(ast, numThreads=optParams['openMP']) addOpenMP(ast, numThreads=optParams['openMP'])
res = makePythonCpuFunction(ast) res = makePythonCpuFunction(ast)
elif params['target'] == 'gpu': elif optParams['target'] == 'gpu':
from pystencils.gpucuda import makePythonFunction as makePythonGpuFunction from pystencils.gpucuda import makePythonFunction as makePythonGpuFunction
res = makePythonGpuFunction(ast) res = makePythonGpuFunction(ast)
else: else:
...@@ -84,7 +84,7 @@ def createLatticeBoltzmannAst(updateRule=None, optimizationParams={}, **kwargs): ...@@ -84,7 +84,7 @@ def createLatticeBoltzmannAst(updateRule=None, optimizationParams={}, **kwargs):
params['optimizationParams'] = optimizationParams params['optimizationParams'] = optimizationParams
updateRule = createLatticeBoltzmannUpdateRule(**params) updateRule = createLatticeBoltzmannUpdateRule(**params)
if params['target'] == 'cpu': if optParams['target'] == 'cpu':
from pystencils.cpu import createKernel from pystencils.cpu import createKernel
if 'splitGroups' in updateRule.simplificationHints: if 'splitGroups' in updateRule.simplificationHints:
print("splitting!") print("splitting!")
...@@ -92,7 +92,7 @@ def createLatticeBoltzmannAst(updateRule=None, optimizationParams={}, **kwargs): ...@@ -92,7 +92,7 @@ def createLatticeBoltzmannAst(updateRule=None, optimizationParams={}, **kwargs):
else: else:
splitGroups = () splitGroups = ()
res = createKernel(updateRule.allEquations, splitGroups=splitGroups) res = createKernel(updateRule.allEquations, splitGroups=splitGroups)
elif params['target'] == 'gpu': elif optParams['target'] == 'gpu':
from pystencils.gpucuda import createCUDAKernel from pystencils.gpucuda import createCUDAKernel
res = createCUDAKernel(updateRule.allEquations) res = createCUDAKernel(updateRule.allEquations)
else: else:
......
...@@ -385,10 +385,10 @@ def getDefaultMomentSetForStencil(stencil): ...@@ -385,10 +385,10 @@ def getDefaultMomentSetForStencil(stencil):
if stencilsHaveSameEntries(stencil, getStencil("D3Q15")): if stencilsHaveSameEntries(stencil, getStencil("D3Q15")):
x, y, z = MOMENT_SYMBOLS x, y, z = MOMENT_SYMBOLS
nonMatchedMoments = [(1, 2, 0), (2, 2, 0), (1, 1, 2), (1, 2, 2), (2, 2, 2)] nonMatchedMoments = [(1, 2, 0), (2, 2, 0), (1, 1, 2), (1, 2, 2), (2, 2, 2)]
additionalMoments = (x**2 * y**2 + x**2 * z**2 + y**2 * z**2, additionalMoments = (6 * (x**2 * y**2 + x**2 * z**2 + y**2 * z**2),
x * (y**2 + z**2), 3 * (x * (y**2 + z**2)),
y * (x**2 + z**2), 3 * (y * (x**2 + z**2)),
z * (x**2 + y**2)) 3 * (z * (x**2 + y**2)))
toRemove = set(extendMomentsWithPermutations(nonMatchedMoments)) toRemove = set(extendMomentsWithPermutations(nonMatchedMoments))
return toPoly(set(all27Moments) - toRemove) + additionalMoments return toPoly(set(all27Moments) - toRemove) + additionalMoments
......
...@@ -10,6 +10,9 @@ from lbmpy.stencils import getStencil ...@@ -10,6 +10,9 @@ from lbmpy.stencils import getStencil
def runScenario(domainSize, boundarySetupFunction, methodParameters, optimizationParameters, lbmKernel=None, def runScenario(domainSize, boundarySetupFunction, methodParameters, optimizationParameters, lbmKernel=None,
initialVelocity=None, preUpdateFunctions=[]): initialVelocity=None, preUpdateFunctions=[]):
if 'target' not in optimizationParameters:
optimizationParameters['target'] = 'cpu'
ghostLayers = 1 ghostLayers = 1
domainSizeWithGhostLayer = tuple([s + 2 * ghostLayers for s in domainSize]) domainSizeWithGhostLayer = tuple([s + 2 * ghostLayers for s in domainSize])
D = len(domainSize) D = len(domainSize)
...@@ -32,7 +35,8 @@ def runScenario(domainSize, boundarySetupFunction, methodParameters, optimizatio ...@@ -32,7 +35,8 @@ def runScenario(domainSize, boundarySetupFunction, methodParameters, optimizatio
# Boundary setup # Boundary setup
if boundarySetupFunction is not None: if boundarySetupFunction is not None:
symPdfField = Field.createFromNumpyArray('pdfs', pdfArrays[0], indexDimensions=1) symPdfField = Field.createFromNumpyArray('pdfs', pdfArrays[0], indexDimensions=1)
boundaryHandling = BoundaryHandling(symPdfField, domainSize, lbmKernel.method) boundaryHandling = BoundaryHandling(symPdfField, domainSize, lbmKernel.method,
target=optimizationParameters['target'])
boundarySetupFunction(boundaryHandling=boundaryHandling, method=method) boundarySetupFunction(boundaryHandling=boundaryHandling, method=method)
boundaryHandling.prepare() boundaryHandling.prepare()
else: else:
...@@ -41,16 +45,21 @@ def runScenario(domainSize, boundarySetupFunction, methodParameters, optimizatio ...@@ -41,16 +45,21 @@ def runScenario(domainSize, boundarySetupFunction, methodParameters, optimizatio
# Macroscopic value input/output # Macroscopic value input/output
densityArr = [np.zeros(domainSizeWithGhostLayer)] densityArr = [np.zeros(domainSizeWithGhostLayer)]
velocityArr = [np.zeros(domainSizeWithGhostLayer + (D,))] velocityArr = [np.zeros(domainSizeWithGhostLayer + (D,))]
getMacroscopic = compileMacroscopicValuesGetter(method, ['density', 'velocity'], pdfArr=pdfArrays[0]) getMacroscopic = compileMacroscopicValuesGetter(method, ['density', 'velocity'], pdfArr=pdfArrays[0], target='cpu')
if initialVelocity is None: if initialVelocity is None:
initialVelocity = [0] * D initialVelocity = [0] * D
setMacroscopic = compileMacroscopicValuesSetter(method, {'density': 1.0, 'velocity': initialVelocity}, setMacroscopic = compileMacroscopicValuesSetter(method, {'density': 1.0, 'velocity': initialVelocity},
pdfArr=pdfArrays[0]) pdfArr=pdfArrays[0], target='cpu')
setMacroscopic(pdfs=pdfArrays[0]) setMacroscopic(pdfs=pdfArrays[0])
# Run simulation if optimizationParameters['target'] == 'gpu':
def timeLoop(timeSteps): import pycuda.gpuarray as gpuarray
pdfGpuArrays = [gpuarray.to_gpu(a) for a in pdfArrays]
else:
pdfGpuArrays = []
def cpuTimeLoop(timeSteps):
for t in range(timeSteps): for t in range(timeSteps):
for f in preUpdateFunctions: for f in preUpdateFunctions:
f(pdfArrays[0]) f(pdfArrays[0])
...@@ -62,9 +71,31 @@ def runScenario(domainSize, boundarySetupFunction, methodParameters, optimizatio ...@@ -62,9 +71,31 @@ def runScenario(domainSize, boundarySetupFunction, methodParameters, optimizatio
getMacroscopic(pdfs=pdfArrays[0], density=densityArr[0], velocity=velocityArr[0]) getMacroscopic(pdfs=pdfArrays[0], density=densityArr[0], velocity=velocityArr[0])
return pdfArrays[0], densityArr[0], velocityArr[0] return pdfArrays[0], densityArr[0], velocityArr[0]
timeLoop.kernel = lbmKernel def gpuTimeLoop(timeSteps):
# Transfer data to gpu
for cpuArr, gpuArr in zip(pdfArrays, pdfGpuArrays):
gpuArr.set(cpuArr)
for t in range(timeSteps):
for f in preUpdateFunctions:
f(pdfGpuArrays[0])
if boundaryHandling is not None:
boundaryHandling(pdfs=pdfGpuArrays[0])
lbmKernel(src=pdfGpuArrays[0], dst=pdfGpuArrays[1])
pdfGpuArrays[0], pdfGpuArrays[1] = pdfGpuArrays[1], pdfGpuArrays[0]
# Transfer data from gpu to cpu
for cpuArr, gpuArr in zip(pdfArrays, pdfGpuArrays):
gpuArr.get(cpuArr)
getMacroscopic(pdfs=pdfArrays[0], density=densityArr[0], velocity=velocityArr[0])
return pdfArrays[0], densityArr[0], velocityArr[0]
cpuTimeLoop.kernel = lbmKernel
gpuTimeLoop.kernel = lbmKernel
return timeLoop return gpuTimeLoop if optimizationParameters['target'] == 'gpu' else cpuTimeLoop
def runLidDrivenCavity(domainSize, lidVelocity=0.005, optimizationParameters={}, lbmKernel=None, **kwargs): def runLidDrivenCavity(domainSize, lidVelocity=0.005, optimizationParameters={}, lbmKernel=None, **kwargs):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment