diff --git a/boundaries/boundaryhandling.py b/boundaries/boundaryhandling.py index 4cfa82d51fee33a3b4dc8182ec07c23c06c82a53..5ca40d6ea7bdf2da1641d1dfa5f83e784c669407 100644 --- a/boundaries/boundaryhandling.py +++ b/boundaries/boundaryhandling.py @@ -127,9 +127,9 @@ class LbmMethodInfo(CustomCppCode): inverseDir = tuple([-i for i in direction]) invDirs.append(str(stencil.index(inverseDir))) - code += "static const int %s [] = { %s };\n" % (INV_DIR_SYMBOL.name, ", ".join(invDirs)) + code += "const int %s [] = { %s };\n" % (INV_DIR_SYMBOL.name, ", ".join(invDirs)) weights = [str(w.evalf()) for w in lbMethod.weights] - code += "static const double %s [] = { %s };\n" % (WEIGHTS_SYMBOL.name, ",".join(weights)) + code += "const double %s [] = { %s };\n" % (WEIGHTS_SYMBOL.name, ",".join(weights)) super(LbmMethodInfo, self).__init__(code, symbolsRead=set(), symbolsDefined=symbolsDefined) diff --git a/creationfunctions.py b/creationfunctions.py index 173c5c6ac22f3acaedef5d69bd2fb106e4a55e6c..f2df502c881bf4809aa0f6e74a1ee88f5e4c0efd 100644 --- a/creationfunctions.py +++ b/creationfunctions.py @@ -12,7 +12,6 @@ from lbmpy.updatekernels import createStreamPullKernel, createPdfArray def _getParams(params, optParams): defaultMethodDescription = { - 'target': 'cpu', 'stencil': 'D2Q9', 'method': 'srt', # can be srt, trt or mrt 'relaxationRates': sp.symbols("omega_:10"), @@ -34,6 +33,7 @@ def _getParams(params, optParams): 'fieldSize': None, 'fieldLayout': 'reverseNumpy', # can be 'numpy' (='c'), 'reverseNumpy' (='f'), 'fzyx', 'zyxf' + 'target': 'cpu', 'openMP': True, 'pdfArr': None, } @@ -58,7 +58,7 @@ def createLatticeBoltzmannFunction(ast=None, optimizationParams={}, **kwargs): params['optimizationParams'] = optParams ast = createLatticeBoltzmannAst(**params) - if params['target'] == 'cpu': + if optParams['target'] == 'cpu': from pystencils.cpu import makePythonFunction as makePythonCpuFunction, addOpenMP if 'openMP' in optParams: if isinstance(optParams['openMP'], bool) and optParams['openMP']: @@ -66,7 +66,7 @@ def createLatticeBoltzmannFunction(ast=None, optimizationParams={}, **kwargs): elif isinstance(optParams['openMP'], int): addOpenMP(ast, numThreads=optParams['openMP']) res = makePythonCpuFunction(ast) - elif params['target'] == 'gpu': + elif optParams['target'] == 'gpu': from pystencils.gpucuda import makePythonFunction as makePythonGpuFunction res = makePythonGpuFunction(ast) else: @@ -84,7 +84,7 @@ def createLatticeBoltzmannAst(updateRule=None, optimizationParams={}, **kwargs): params['optimizationParams'] = optimizationParams updateRule = createLatticeBoltzmannUpdateRule(**params) - if params['target'] == 'cpu': + if optParams['target'] == 'cpu': from pystencils.cpu import createKernel if 'splitGroups' in updateRule.simplificationHints: print("splitting!") @@ -92,7 +92,7 @@ def createLatticeBoltzmannAst(updateRule=None, optimizationParams={}, **kwargs): else: splitGroups = () res = createKernel(updateRule.allEquations, splitGroups=splitGroups) - elif params['target'] == 'gpu': + elif optParams['target'] == 'gpu': from pystencils.gpucuda import createCUDAKernel res = createCUDAKernel(updateRule.allEquations) else: diff --git a/moments.py b/moments.py index 6c7ce3c9c91cac28c1ec1f105fbba6cdbf0f4df3..b2209ec20854424a4bc08ce1aa42da385a0f5958 100644 --- a/moments.py +++ b/moments.py @@ -385,10 +385,10 @@ def getDefaultMomentSetForStencil(stencil): if stencilsHaveSameEntries(stencil, getStencil("D3Q15")): x, y, z = MOMENT_SYMBOLS nonMatchedMoments = [(1, 2, 0), (2, 2, 0), (1, 1, 2), (1, 2, 2), (2, 2, 2)] - additionalMoments = (x**2 * y**2 + x**2 * z**2 + y**2 * z**2, - x * (y**2 + z**2), - y * (x**2 + z**2), - z * (x**2 + y**2)) + additionalMoments = (6 * (x**2 * y**2 + x**2 * z**2 + y**2 * z**2), + 3 * (x * (y**2 + z**2)), + 3 * (y * (x**2 + z**2)), + 3 * (z * (x**2 + y**2))) toRemove = set(extendMomentsWithPermutations(nonMatchedMoments)) return toPoly(set(all27Moments) - toRemove) + additionalMoments diff --git a/serialscenario.py b/serialscenario.py index 77b26c49a10857cfeedbb7d8856824c8e2c63b86..ca4a93fb45a9ade3028a6f3bcc3cc653cc150a34 100644 --- a/serialscenario.py +++ b/serialscenario.py @@ -10,6 +10,9 @@ from lbmpy.stencils import getStencil def runScenario(domainSize, boundarySetupFunction, methodParameters, optimizationParameters, lbmKernel=None, initialVelocity=None, preUpdateFunctions=[]): + if 'target' not in optimizationParameters: + optimizationParameters['target'] = 'cpu' + ghostLayers = 1 domainSizeWithGhostLayer = tuple([s + 2 * ghostLayers for s in domainSize]) D = len(domainSize) @@ -32,7 +35,8 @@ def runScenario(domainSize, boundarySetupFunction, methodParameters, optimizatio # Boundary setup if boundarySetupFunction is not None: symPdfField = Field.createFromNumpyArray('pdfs', pdfArrays[0], indexDimensions=1) - boundaryHandling = BoundaryHandling(symPdfField, domainSize, lbmKernel.method) + boundaryHandling = BoundaryHandling(symPdfField, domainSize, lbmKernel.method, + target=optimizationParameters['target']) boundarySetupFunction(boundaryHandling=boundaryHandling, method=method) boundaryHandling.prepare() else: @@ -41,16 +45,21 @@ def runScenario(domainSize, boundarySetupFunction, methodParameters, optimizatio # Macroscopic value input/output densityArr = [np.zeros(domainSizeWithGhostLayer)] velocityArr = [np.zeros(domainSizeWithGhostLayer + (D,))] - getMacroscopic = compileMacroscopicValuesGetter(method, ['density', 'velocity'], pdfArr=pdfArrays[0]) + getMacroscopic = compileMacroscopicValuesGetter(method, ['density', 'velocity'], pdfArr=pdfArrays[0], target='cpu') if initialVelocity is None: initialVelocity = [0] * D setMacroscopic = compileMacroscopicValuesSetter(method, {'density': 1.0, 'velocity': initialVelocity}, - pdfArr=pdfArrays[0]) + pdfArr=pdfArrays[0], target='cpu') setMacroscopic(pdfs=pdfArrays[0]) - # Run simulation - def timeLoop(timeSteps): + if optimizationParameters['target'] == 'gpu': + import pycuda.gpuarray as gpuarray + pdfGpuArrays = [gpuarray.to_gpu(a) for a in pdfArrays] + else: + pdfGpuArrays = [] + + def cpuTimeLoop(timeSteps): for t in range(timeSteps): for f in preUpdateFunctions: f(pdfArrays[0]) @@ -62,9 +71,31 @@ def runScenario(domainSize, boundarySetupFunction, methodParameters, optimizatio getMacroscopic(pdfs=pdfArrays[0], density=densityArr[0], velocity=velocityArr[0]) return pdfArrays[0], densityArr[0], velocityArr[0] - timeLoop.kernel = lbmKernel + def gpuTimeLoop(timeSteps): + # Transfer data to gpu + for cpuArr, gpuArr in zip(pdfArrays, pdfGpuArrays): + gpuArr.set(cpuArr) + + for t in range(timeSteps): + for f in preUpdateFunctions: + f(pdfGpuArrays[0]) + if boundaryHandling is not None: + boundaryHandling(pdfs=pdfGpuArrays[0]) + lbmKernel(src=pdfGpuArrays[0], dst=pdfGpuArrays[1]) + + pdfGpuArrays[0], pdfGpuArrays[1] = pdfGpuArrays[1], pdfGpuArrays[0] + + # Transfer data from gpu to cpu + for cpuArr, gpuArr in zip(pdfArrays, pdfGpuArrays): + gpuArr.get(cpuArr) + + getMacroscopic(pdfs=pdfArrays[0], density=densityArr[0], velocity=velocityArr[0]) + return pdfArrays[0], densityArr[0], velocityArr[0] + + cpuTimeLoop.kernel = lbmKernel + gpuTimeLoop.kernel = lbmKernel - return timeLoop + return gpuTimeLoop if optimizationParameters['target'] == 'gpu' else cpuTimeLoop def runLidDrivenCavity(domainSize, lidVelocity=0.005, optimizationParameters={}, lbmKernel=None, **kwargs):