From 68f909eed553bc9fe7923d024371f4945a87b740 Mon Sep 17 00:00:00 2001 From: Michael Kuron <m.kuron@gmx.de> Date: Thu, 27 May 2021 13:41:10 +0200 Subject: [PATCH 1/2] Check if fluctuating LB compiles when vectorized --- lbmpy_tests/test_fluctuating_lb.py | 49 ++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/lbmpy_tests/test_fluctuating_lb.py b/lbmpy_tests/test_fluctuating_lb.py index 4055b0e2..76f4bdf9 100644 --- a/lbmpy_tests/test_fluctuating_lb.py +++ b/lbmpy_tests/test_fluctuating_lb.py @@ -6,6 +6,10 @@ from lbmpy.creationfunctions import * from lbmpy.macroscopic_value_kernels import macroscopic_values_setter import numpy as np from lbmpy.moments import is_bulk_moment, is_shear_moment, get_order +from pystencils.rng import PhiloxTwoDoubles + +import pytest +from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets def single_component_maxwell(x1, x2, kT, mass): @@ -54,7 +58,7 @@ def add_pressure_output_to_collision_rule(collision_rule, pressure_field): collision_rule.main_assignments = collision_rule.main_assignments + pressure_ouput -def get_fluctuating_lb(size=None, kT=None, omega_shear=None, omega_bulk=None, omega_odd=None, omega_even=None, rho_0=None, target=None): +def get_fluctuating_lb(size=None, kT=None, omega_shear=None, omega_bulk=None, omega_odd=None, omega_even=None, rho_0=None, target=None, cpu_vectorize_info=None): # Parameters stencil = get_stencil('D3Q19') @@ -81,7 +85,8 @@ def get_fluctuating_lb(size=None, kT=None, omega_shear=None, omega_bulk=None, om collision_rule = create_lb_collision_rule( method, fluctuating={ - 'temperature': kT + 'temperature': kT, + 'rng_node': PhiloxTwoDoubles, }, optimization={'cse_global': True} ) @@ -98,7 +103,7 @@ def get_fluctuating_lb(size=None, kT=None, omega_shear=None, omega_bulk=None, om {'density': rho, 'velocity': u}) opts = {'cpu_openmp': True, - 'cpu_vectorize_info': None, + 'cpu_vectorize_info': cpu_vectorize_info, 'target': dh.default_target} # Compile kernels @@ -242,3 +247,41 @@ def test_point_force(target="cpu"): momentum, introduced_momentum + 0.5 * point_force, atol=1E-10) dh.cpu_arrays["force"][force_pos[0], force_pos[1], force_pos[2]] = np.zeros(3) + +@pytest.mark.skipif(not get_supported_instruction_sets(), reason="No vector instruction sets supported") +@pytest.mark.parametrize('assume_aligned', (True, False)) +@pytest.mark.parametrize('assume_inner_stride_one', (True, False)) +@pytest.mark.parametrize('assume_sufficient_line_padding', (True, False)) +def test_vectorization(assume_aligned, assume_inner_stride_one, assume_sufficient_line_padding): + method = create_mrt_orthogonal( + stencil=get_stencil('D2Q9'), + compressible=True, + weighted=True, + relaxation_rate_getter=rr_getter) + collision_rule = create_lb_collision_rule( + method, + fluctuating={ + 'temperature': sp.Symbol("kT"), + 'rng_node': PhiloxTwoDoubles, + 'block_offsets': (0, 0), + }, + optimization={'cse_global': True} + ) + + collision = create_lb_update_rule(collision_rule=collision_rule, + stencil=method.stencil, + method=method, + compressible=True, + kernel_type='collide_only') + + opts = {'cpu_openmp': False, + 'cpu_vectorize_info': { + 'instruction_set': get_supported_instruction_sets()[0], + 'assume_aligned': assume_aligned, + 'assume_inner_stride_one': assume_inner_stride_one, + 'assume_sufficient_line_padding': assume_sufficient_line_padding, + }, + 'target': 'cpu'} + + code = ps.create_kernel(collision, **opts) + code.compile() -- GitLab From 982feddfc73f0df56a574bb54be8d37f85798976 Mon Sep 17 00:00:00 2001 From: Michael Kuron <m.kuron@gmx.de> Date: Thu, 27 May 2021 15:29:32 +0200 Subject: [PATCH 2/2] check for warning when vectorizing without stride-one --- lbmpy_tests/test_fluctuating_lb.py | 32 +++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/lbmpy_tests/test_fluctuating_lb.py b/lbmpy_tests/test_fluctuating_lb.py index 76f4bdf9..3ad3958e 100644 --- a/lbmpy_tests/test_fluctuating_lb.py +++ b/lbmpy_tests/test_fluctuating_lb.py @@ -9,7 +9,8 @@ from lbmpy.moments import is_bulk_moment, is_shear_moment, get_order from pystencils.rng import PhiloxTwoDoubles import pytest -from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets +from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets, get_vector_instruction_set +from pystencils.cpu.cpujit import get_compiler_config def single_component_maxwell(x1, x2, kT, mass): @@ -58,7 +59,7 @@ def add_pressure_output_to_collision_rule(collision_rule, pressure_field): collision_rule.main_assignments = collision_rule.main_assignments + pressure_ouput -def get_fluctuating_lb(size=None, kT=None, omega_shear=None, omega_bulk=None, omega_odd=None, omega_even=None, rho_0=None, target=None, cpu_vectorize_info=None): +def get_fluctuating_lb(size=None, kT=None, omega_shear=None, omega_bulk=None, omega_odd=None, omega_even=None, rho_0=None, target=None): # Parameters stencil = get_stencil('D3Q19') @@ -85,8 +86,7 @@ def get_fluctuating_lb(size=None, kT=None, omega_shear=None, omega_bulk=None, om collision_rule = create_lb_collision_rule( method, fluctuating={ - 'temperature': kT, - 'rng_node': PhiloxTwoDoubles, + 'temperature': kT }, optimization={'cse_global': True} ) @@ -103,7 +103,7 @@ def get_fluctuating_lb(size=None, kT=None, omega_shear=None, omega_bulk=None, om {'density': rho, 'velocity': u}) opts = {'cpu_openmp': True, - 'cpu_vectorize_info': cpu_vectorize_info, + 'cpu_vectorize_info': None, 'target': dh.default_target} # Compile kernels @@ -248,6 +248,7 @@ def test_point_force(target="cpu"): dh.cpu_arrays["force"][force_pos[0], force_pos[1], force_pos[2]] = np.zeros(3) + @pytest.mark.skipif(not get_supported_instruction_sets(), reason="No vector instruction sets supported") @pytest.mark.parametrize('assume_aligned', (True, False)) @pytest.mark.parametrize('assume_inner_stride_one', (True, False)) @@ -274,14 +275,31 @@ def test_vectorization(assume_aligned, assume_inner_stride_one, assume_sufficien compressible=True, kernel_type='collide_only') + instruction_sets = get_supported_instruction_sets() + if get_compiler_config()['os'] == 'windows': + # skip instruction sets supported by the CPU but not by the compiler + if 'avx' in instruction_sets and ('/arch:avx2' not in get_compiler_config()['flags'].lower() + and '/arch:avx512' not in get_compiler_config()['flags'].lower()): + instruction_sets.remove('avx') + if 'avx512' in instruction_sets and '/arch:avx512' not in get_compiler_config()['flags'].lower(): + instruction_sets.remove('avx512') + instruction_set = instruction_sets[-1] + opts = {'cpu_openmp': False, 'cpu_vectorize_info': { - 'instruction_set': get_supported_instruction_sets()[0], + 'instruction_set': instruction_set, 'assume_aligned': assume_aligned, 'assume_inner_stride_one': assume_inner_stride_one, 'assume_sufficient_line_padding': assume_sufficient_line_padding, }, 'target': 'cpu'} - code = ps.create_kernel(collision, **opts) + if not assume_inner_stride_one and 'storeS' not in get_vector_instruction_set('double', instruction_set): + with pytest.warns(UserWarning) as warn: + code = ps.create_kernel(collision, **opts) + assert 'Could not vectorize loop' in warn[0].message.args[0] + else: + with pytest.warns(None) as warn: + code = ps.create_kernel(collision, **opts) + assert len(warn) == 0 code.compile() -- GitLab