diff --git a/lbmpy_tests/test_fluctuating_lb.py b/lbmpy_tests/test_fluctuating_lb.py index 76f4bdf9afa78e26b9fee10bb7e41ead5c234ed0..3ad3958e4691342d5d97e7f137c3103bcdecbf30 100644 --- a/lbmpy_tests/test_fluctuating_lb.py +++ b/lbmpy_tests/test_fluctuating_lb.py @@ -9,7 +9,8 @@ from lbmpy.moments import is_bulk_moment, is_shear_moment, get_order from pystencils.rng import PhiloxTwoDoubles import pytest -from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets +from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets, get_vector_instruction_set +from pystencils.cpu.cpujit import get_compiler_config def single_component_maxwell(x1, x2, kT, mass): @@ -58,7 +59,7 @@ def add_pressure_output_to_collision_rule(collision_rule, pressure_field): collision_rule.main_assignments = collision_rule.main_assignments + pressure_ouput -def get_fluctuating_lb(size=None, kT=None, omega_shear=None, omega_bulk=None, omega_odd=None, omega_even=None, rho_0=None, target=None, cpu_vectorize_info=None): +def get_fluctuating_lb(size=None, kT=None, omega_shear=None, omega_bulk=None, omega_odd=None, omega_even=None, rho_0=None, target=None): # Parameters stencil = get_stencil('D3Q19') @@ -85,8 +86,7 @@ def get_fluctuating_lb(size=None, kT=None, omega_shear=None, omega_bulk=None, om collision_rule = create_lb_collision_rule( method, fluctuating={ - 'temperature': kT, - 'rng_node': PhiloxTwoDoubles, + 'temperature': kT }, optimization={'cse_global': True} ) @@ -103,7 +103,7 @@ def get_fluctuating_lb(size=None, kT=None, omega_shear=None, omega_bulk=None, om {'density': rho, 'velocity': u}) opts = {'cpu_openmp': True, - 'cpu_vectorize_info': cpu_vectorize_info, + 'cpu_vectorize_info': None, 'target': dh.default_target} # Compile kernels @@ -248,6 +248,7 @@ def test_point_force(target="cpu"): dh.cpu_arrays["force"][force_pos[0], force_pos[1], force_pos[2]] = np.zeros(3) + @pytest.mark.skipif(not get_supported_instruction_sets(), reason="No vector instruction sets supported") @pytest.mark.parametrize('assume_aligned', (True, False)) @pytest.mark.parametrize('assume_inner_stride_one', (True, False)) @@ -274,14 +275,31 @@ def test_vectorization(assume_aligned, assume_inner_stride_one, assume_sufficien compressible=True, kernel_type='collide_only') + instruction_sets = get_supported_instruction_sets() + if get_compiler_config()['os'] == 'windows': + # skip instruction sets supported by the CPU but not by the compiler + if 'avx' in instruction_sets and ('/arch:avx2' not in get_compiler_config()['flags'].lower() + and '/arch:avx512' not in get_compiler_config()['flags'].lower()): + instruction_sets.remove('avx') + if 'avx512' in instruction_sets and '/arch:avx512' not in get_compiler_config()['flags'].lower(): + instruction_sets.remove('avx512') + instruction_set = instruction_sets[-1] + opts = {'cpu_openmp': False, 'cpu_vectorize_info': { - 'instruction_set': get_supported_instruction_sets()[0], + 'instruction_set': instruction_set, 'assume_aligned': assume_aligned, 'assume_inner_stride_one': assume_inner_stride_one, 'assume_sufficient_line_padding': assume_sufficient_line_padding, }, 'target': 'cpu'} - code = ps.create_kernel(collision, **opts) + if not assume_inner_stride_one and 'storeS' not in get_vector_instruction_set('double', instruction_set): + with pytest.warns(UserWarning) as warn: + code = ps.create_kernel(collision, **opts) + assert 'Could not vectorize loop' in warn[0].message.args[0] + else: + with pytest.warns(None) as warn: + code = ps.create_kernel(collision, **opts) + assert len(warn) == 0 code.compile()