From 68f909eed553bc9fe7923d024371f4945a87b740 Mon Sep 17 00:00:00 2001
From: Michael Kuron <m.kuron@gmx.de>
Date: Thu, 27 May 2021 13:41:10 +0200
Subject: [PATCH 1/2] Check if fluctuating LB compiles when vectorized

---
 lbmpy_tests/test_fluctuating_lb.py | 49 ++++++++++++++++++++++++++++--
 1 file changed, 46 insertions(+), 3 deletions(-)

diff --git a/lbmpy_tests/test_fluctuating_lb.py b/lbmpy_tests/test_fluctuating_lb.py
index 4055b0e2..76f4bdf9 100644
--- a/lbmpy_tests/test_fluctuating_lb.py
+++ b/lbmpy_tests/test_fluctuating_lb.py
@@ -6,6 +6,10 @@ from lbmpy.creationfunctions import *
 from lbmpy.macroscopic_value_kernels import macroscopic_values_setter
 import numpy as np
 from lbmpy.moments import is_bulk_moment, is_shear_moment, get_order
+from pystencils.rng import PhiloxTwoDoubles
+
+import pytest
+from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets
 
 
 def single_component_maxwell(x1, x2, kT, mass):
@@ -54,7 +58,7 @@ def add_pressure_output_to_collision_rule(collision_rule, pressure_field):
     collision_rule.main_assignments = collision_rule.main_assignments + pressure_ouput
 
 
-def get_fluctuating_lb(size=None, kT=None, omega_shear=None, omega_bulk=None, omega_odd=None, omega_even=None, rho_0=None, target=None):
+def get_fluctuating_lb(size=None, kT=None, omega_shear=None, omega_bulk=None, omega_odd=None, omega_even=None, rho_0=None, target=None, cpu_vectorize_info=None):
 
     # Parameters
     stencil = get_stencil('D3Q19')
@@ -81,7 +85,8 @@ def get_fluctuating_lb(size=None, kT=None, omega_shear=None, omega_bulk=None, om
     collision_rule = create_lb_collision_rule(
         method,
         fluctuating={
-            'temperature': kT
+            'temperature': kT,
+            'rng_node': PhiloxTwoDoubles,
         },
         optimization={'cse_global': True}
     )
@@ -98,7 +103,7 @@ def get_fluctuating_lb(size=None, kT=None, omega_shear=None, omega_bulk=None, om
                                                    {'density': rho, 'velocity': u})
 
     opts = {'cpu_openmp': True,
-            'cpu_vectorize_info': None,
+            'cpu_vectorize_info': cpu_vectorize_info,
             'target': dh.default_target}
 
     # Compile kernels
@@ -242,3 +247,41 @@ def test_point_force(target="cpu"):
             momentum, introduced_momentum + 0.5 * point_force, atol=1E-10)
         dh.cpu_arrays["force"][force_pos[0],
                                force_pos[1], force_pos[2]] = np.zeros(3)
+
+@pytest.mark.skipif(not get_supported_instruction_sets(), reason="No vector instruction sets supported")
+@pytest.mark.parametrize('assume_aligned', (True, False))
+@pytest.mark.parametrize('assume_inner_stride_one', (True, False))
+@pytest.mark.parametrize('assume_sufficient_line_padding', (True, False))
+def test_vectorization(assume_aligned, assume_inner_stride_one, assume_sufficient_line_padding):
+    method = create_mrt_orthogonal(
+        stencil=get_stencil('D2Q9'),
+        compressible=True,
+        weighted=True,
+        relaxation_rate_getter=rr_getter)
+    collision_rule = create_lb_collision_rule(
+        method,
+        fluctuating={
+            'temperature': sp.Symbol("kT"),
+            'rng_node': PhiloxTwoDoubles,
+            'block_offsets': (0, 0),
+        },
+        optimization={'cse_global': True}
+    )
+
+    collision = create_lb_update_rule(collision_rule=collision_rule,
+                                      stencil=method.stencil,
+                                      method=method,
+                                      compressible=True,
+                                      kernel_type='collide_only')
+
+    opts = {'cpu_openmp': False,
+            'cpu_vectorize_info': {
+                'instruction_set': get_supported_instruction_sets()[0],
+                'assume_aligned': assume_aligned,
+                'assume_inner_stride_one': assume_inner_stride_one,
+                'assume_sufficient_line_padding': assume_sufficient_line_padding,
+            },
+            'target': 'cpu'}
+
+    code = ps.create_kernel(collision, **opts)
+    code.compile()
-- 
GitLab


From 982feddfc73f0df56a574bb54be8d37f85798976 Mon Sep 17 00:00:00 2001
From: Michael Kuron <m.kuron@gmx.de>
Date: Thu, 27 May 2021 15:29:32 +0200
Subject: [PATCH 2/2] check for warning when vectorizing without stride-one

---
 lbmpy_tests/test_fluctuating_lb.py | 32 +++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/lbmpy_tests/test_fluctuating_lb.py b/lbmpy_tests/test_fluctuating_lb.py
index 76f4bdf9..3ad3958e 100644
--- a/lbmpy_tests/test_fluctuating_lb.py
+++ b/lbmpy_tests/test_fluctuating_lb.py
@@ -9,7 +9,8 @@ from lbmpy.moments import is_bulk_moment, is_shear_moment, get_order
 from pystencils.rng import PhiloxTwoDoubles
 
 import pytest
-from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets
+from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets, get_vector_instruction_set
+from pystencils.cpu.cpujit import get_compiler_config
 
 
 def single_component_maxwell(x1, x2, kT, mass):
@@ -58,7 +59,7 @@ def add_pressure_output_to_collision_rule(collision_rule, pressure_field):
     collision_rule.main_assignments = collision_rule.main_assignments + pressure_ouput
 
 
-def get_fluctuating_lb(size=None, kT=None, omega_shear=None, omega_bulk=None, omega_odd=None, omega_even=None, rho_0=None, target=None, cpu_vectorize_info=None):
+def get_fluctuating_lb(size=None, kT=None, omega_shear=None, omega_bulk=None, omega_odd=None, omega_even=None, rho_0=None, target=None):
 
     # Parameters
     stencil = get_stencil('D3Q19')
@@ -85,8 +86,7 @@ def get_fluctuating_lb(size=None, kT=None, omega_shear=None, omega_bulk=None, om
     collision_rule = create_lb_collision_rule(
         method,
         fluctuating={
-            'temperature': kT,
-            'rng_node': PhiloxTwoDoubles,
+            'temperature': kT
         },
         optimization={'cse_global': True}
     )
@@ -103,7 +103,7 @@ def get_fluctuating_lb(size=None, kT=None, omega_shear=None, omega_bulk=None, om
                                                    {'density': rho, 'velocity': u})
 
     opts = {'cpu_openmp': True,
-            'cpu_vectorize_info': cpu_vectorize_info,
+            'cpu_vectorize_info': None,
             'target': dh.default_target}
 
     # Compile kernels
@@ -248,6 +248,7 @@ def test_point_force(target="cpu"):
         dh.cpu_arrays["force"][force_pos[0],
                                force_pos[1], force_pos[2]] = np.zeros(3)
 
+
 @pytest.mark.skipif(not get_supported_instruction_sets(), reason="No vector instruction sets supported")
 @pytest.mark.parametrize('assume_aligned', (True, False))
 @pytest.mark.parametrize('assume_inner_stride_one', (True, False))
@@ -274,14 +275,31 @@ def test_vectorization(assume_aligned, assume_inner_stride_one, assume_sufficien
                                       compressible=True,
                                       kernel_type='collide_only')
 
+    instruction_sets = get_supported_instruction_sets()
+    if get_compiler_config()['os'] == 'windows':
+        # skip instruction sets supported by the CPU but not by the compiler
+        if 'avx' in instruction_sets and ('/arch:avx2' not in get_compiler_config()['flags'].lower()
+                                          and '/arch:avx512' not in get_compiler_config()['flags'].lower()):
+            instruction_sets.remove('avx')
+        if 'avx512' in instruction_sets and '/arch:avx512' not in get_compiler_config()['flags'].lower():
+            instruction_sets.remove('avx512')
+    instruction_set = instruction_sets[-1]
+
     opts = {'cpu_openmp': False,
             'cpu_vectorize_info': {
-                'instruction_set': get_supported_instruction_sets()[0],
+                'instruction_set': instruction_set,
                 'assume_aligned': assume_aligned,
                 'assume_inner_stride_one': assume_inner_stride_one,
                 'assume_sufficient_line_padding': assume_sufficient_line_padding,
             },
             'target': 'cpu'}
 
-    code = ps.create_kernel(collision, **opts)
+    if not assume_inner_stride_one and 'storeS' not in get_vector_instruction_set('double', instruction_set):
+        with pytest.warns(UserWarning) as warn:
+            code = ps.create_kernel(collision, **opts)
+            assert 'Could not vectorize loop' in warn[0].message.args[0]
+    else:
+        with pytest.warns(None) as warn:
+            code = ps.create_kernel(collision, **opts)
+            assert len(warn) == 0
     code.compile()
-- 
GitLab