From 843ef07b7415efb81f1dd753385e0ab554d972cc Mon Sep 17 00:00:00 2001
From: Martin Bauer <martin.bauer@fau.de>
Date: Fri, 21 Apr 2017 13:34:52 +0200
Subject: [PATCH] Simpler OpenMP handling & OpenMP parallel boundaries

- periodic kernels not yet OpenMP parallel
---
 boundaries/boundaryhandling.py       | 11 ++++++++---
 boundaries/createindexlist.py        |  3 ++-
 boundaries/createindexlistcython.pyx | 11 +++++++----
 creationfunctions.py                 |  6 +-----
 scenarios.py                         |  4 ++--
 5 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/boundaries/boundaryhandling.py b/boundaries/boundaryhandling.py
index a9248395..14812066 100644
--- a/boundaries/boundaryhandling.py
+++ b/boundaries/boundaryhandling.py
@@ -20,7 +20,7 @@ class BoundaryHandling(object):
             self.kernel = kernel
             self.ast = ast
 
-    def __init__(self, pdfField, domainShape, lbMethod, ghostLayers=1, target='cpu'):
+    def __init__(self, pdfField, domainShape, lbMethod, ghostLayers=1, target='cpu', openMP=True):
         """
         Class for managing boundary kernels
 
@@ -52,6 +52,7 @@ class BoundaryHandling(object):
         self._dirty = False
         self._periodicity = [False, False, False]
         self._target = target
+        self.openMP = openMP
         if target not in ('cpu', 'gpu'):
             raise ValueError("Invalid target '%s' . Allowed values: 'cpu' or 'gpu'" % (target,))
 
@@ -75,6 +76,9 @@ class BoundaryHandling(object):
 
     def setPeriodicity(self, x=False, y=False, z=False):
         """Enable periodic boundary conditions at the border of the domain"""
+        for d in (x, y, z):
+            assert isinstance(d, bool)
+
         self._periodicity = [x, y, z]
         self._compilePeriodicityKernels()
 
@@ -160,7 +164,7 @@ class BoundaryHandling(object):
 
     def clear(self):
         """Removes all boundaries and fills the domain with fluid"""
-        np.fill(self._fluidFlag)
+        self.flagField.fill(self._fluidFlag)
         self._dirty = False
         self._boundaryInfos = []
         self._nameToBoundary = {}
@@ -176,7 +180,8 @@ class BoundaryHandling(object):
                                            target=self._target)
             boundary.ast = ast
             if self._target == 'cpu':
-                from pystencils.cpu import makePythonFunction as makePythonCpuFunction
+                from pystencils.cpu import makePythonFunction as makePythonCpuFunction, addOpenMP
+                addOpenMP(ast, numThreads=self.openMP)
                 boundary.kernel = makePythonCpuFunction(ast, {'indexField': idxField})
             elif self._target == 'gpu':
                 from pystencils.gpucuda import makePythonFunction as makePythonGpuFunction
diff --git a/boundaries/createindexlist.py b/boundaries/createindexlist.py
index f81e43f2..dfb66599 100644
--- a/boundaries/createindexlist.py
+++ b/boundaries/createindexlist.py
@@ -18,7 +18,8 @@ def _createBoundaryIndexListPython(flagFieldArr, nrOfGhostLayers, boundaryMask,
 
     result = []
     gl = nrOfGhostLayers
-    for cell in itertools.product(*[range(gl, i-gl) for i in flagFieldArr.shape]):
+    for cell in itertools.product(*reversed([range(gl, i-gl) for i in flagFieldArr.shape])):
+        cell = cell[::-1]
         if not flagFieldArr[cell] & fluidMask:
             continue
         for dirIdx, direction in enumerate(stencil):
diff --git a/boundaries/createindexlistcython.pyx b/boundaries/createindexlistcython.pyx
index 94acf552..039ba817 100644
--- a/boundaries/createindexlistcython.pyx
+++ b/boundaries/createindexlistcython.pyx
@@ -21,8 +21,8 @@ def createBoundaryIndexList2D(object[IntegerType, ndim=2] flagField,
     boundaryIndexList = []
     numDirections = stencil.shape[0]
 
-    for x in range(nrOfGhostLayers,xs-nrOfGhostLayers):
-        for y in range(nrOfGhostLayers,ys-nrOfGhostLayers):
+    for y in range(nrOfGhostLayers,ys-nrOfGhostLayers):
+        for x in range(nrOfGhostLayers,xs-nrOfGhostLayers):
             if flagField[x,y] & fluidMask:
                 for dirIdx in range(1, numDirections):
                     dx = stencil[dirIdx,0]
@@ -44,9 +44,12 @@ def createBoundaryIndexList3D(object[IntegerType, ndim=3] flagField,
     boundaryIndexList = []
     numDirections = stencil.shape[0]
 
-    for x in range(nrOfGhostLayers,xs-nrOfGhostLayers):
+    #for x in range(nrOfGhostLayers,xs-nrOfGhostLayers):
+    #    for y in range(nrOfGhostLayers,ys-nrOfGhostLayers):
+    #        for z in range(nrOfGhostLayers, zs-nrOfGhostLayers):
+    for z in range(nrOfGhostLayers, zs-nrOfGhostLayers):
         for y in range(nrOfGhostLayers,ys-nrOfGhostLayers):
-            for z in range(nrOfGhostLayers, zs-nrOfGhostLayers):
+            for x in range(nrOfGhostLayers,xs-nrOfGhostLayers):
                 if flagField[x, y, z] & fluidMask:
                     for dirIdx in range(1, numDirections):
                         dx = stencil[dirIdx,0]
diff --git a/creationfunctions.py b/creationfunctions.py
index de01d181..948b4aa0 100644
--- a/creationfunctions.py
+++ b/creationfunctions.py
@@ -213,11 +213,7 @@ def createLatticeBoltzmannFunction(ast=None, optimizationParams={}, **kwargs):
 
     if optParams['target'] == 'cpu':
         from pystencils.cpu import makePythonFunction as makePythonCpuFunction, addOpenMP
-        if 'openMP' in optParams:
-            if isinstance(optParams['openMP'], bool) and optParams['openMP']:
-                addOpenMP(ast)
-            elif not isinstance(optParams['openMP'], bool) and isinstance(optParams['openMP'], int):
-                addOpenMP(ast, numThreads=optParams['openMP'])
+        addOpenMP(ast, numThreads=optParams['openMP'])
         res = makePythonCpuFunction(ast)
     elif optParams['target'] == 'gpu':
         from pystencils.gpucuda import makePythonFunction as makePythonGpuFunction
diff --git a/scenarios.py b/scenarios.py
index e095af0e..90c45077 100644
--- a/scenarios.py
+++ b/scenarios.py
@@ -251,7 +251,6 @@ class Scenario(object):
         if 'stencil' not in methodParameters:
             methodParameters['stencil'] = 'D2Q9' if D == 2 else 'D3Q27'
 
-
         methodParameters, optimizationParams = updateWithDefaultParameters(methodParameters, optimizationParams)
 
         Q = len(getStencil(methodParameters['stencil']))
@@ -280,7 +279,8 @@ class Scenario(object):
                                                               pdfArr=self._pdfArrays[0], target='cpu')
 
         self._boundaryHandling = BoundaryHandling(self._pdfArrays[0], domainSize, self.method,
-                                                  target=optimizationParams['target'])
+                                                  target=optimizationParams['target'],
+                                                  openMP=optimizationParams['openMP'])
 
         self._preUpdateFunctions = preUpdateFunctions
         self.kernelParams = kernelParams
-- 
GitLab