From a4acc9ffa4766c287e34e27de310c5588bf66d66 Mon Sep 17 00:00:00 2001 From: Frederik Hennig <frederik.hennig@fau.de> Date: Wed, 29 Jan 2025 10:23:01 +0100 Subject: [PATCH 1/2] Add sqrt; fix domains in function testsuite; update freeze of sp.Pow - Add the `sqrt` function to the IR, as well as to CPU, CUDA, SYCL, and x86-vector targets - Rewrite case for `sp.Power` in freeze to generate `sqrt` calls where appropriate - Adapt function test suite: Test on values within the correct domain for all functions - Untrack some testsuite output files - Remove some deprecated imports from the testsuite --- src/pystencils/backend/functions.py | 1 + .../backend/kernelcreation/freeze.py | 73 ++++++++------ src/pystencils/backend/platforms/cuda.py | 1 + .../backend/platforms/generic_cpu.py | 3 +- src/pystencils/backend/platforms/sycl.py | 1 + src/pystencils/backend/platforms/x86.py | 3 + tests/kernelcreation/test_functions.py | 92 ++++++++++++++++-- tests/kernelcreation/test_staggered_kernel.py | 2 +- tests/nbackend/kernelcreation/test_freeze.py | 84 ++++++++++++++-- tests/runtime/test_boundary.py | 2 +- tests/runtime/test_data/.gitignore | 1 + .../datahandling_parallel_save_test/dst.dat | Bin 304 -> 0 bytes .../datahandling_parallel_save_test/src.dat | Bin 304 -> 0 bytes .../test_data/datahandling_save_test.npz | Bin 428 -> 0 bytes tests/runtime/test_datahandling.py | 2 +- 15 files changed, 212 insertions(+), 53 deletions(-) create mode 100644 tests/runtime/test_data/.gitignore delete mode 100644 tests/runtime/test_data/datahandling_parallel_save_test/dst.dat delete mode 100644 tests/runtime/test_data/datahandling_parallel_save_test/src.dat delete mode 100644 tests/runtime/test_data/datahandling_save_test.npz diff --git a/src/pystencils/backend/functions.py b/src/pystencils/backend/functions.py index 388160f30..f3d18f349 100644 --- a/src/pystencils/backend/functions.py +++ b/src/pystencils/backend/functions.py @@ -78,6 +78,7 @@ class MathFunctions(Enum): ASin = ("asin", 1) ACos = ("acos", 1) ATan = ("atan", 1) + Sqrt = ("sqrt", 1) Abs = ("abs", 1) Floor = ("floor", 1) diff --git a/src/pystencils/backend/kernelcreation/freeze.py b/src/pystencils/backend/kernelcreation/freeze.py index 44ee17077..c38fcbc97 100644 --- a/src/pystencils/backend/kernelcreation/freeze.py +++ b/src/pystencils/backend/kernelcreation/freeze.py @@ -212,40 +212,49 @@ class FreezeExpressions: base = expr.args[0] exponent = expr.args[1] - base_frozen = self.visit_expr(base) - reciprocal = False - expand_product = False - - if exponent.is_Integer: - if exponent == 0: - return PsExpression.make(PsConstant(1)) - - if exponent.is_negative: - reciprocal = True - exponent = -exponent - - if exponent <= sp.Integer( - 5 - ): # TODO: is this a sensible limit? maybe make this configurable. - expand_product = True - - if expand_product: - frozen_expr = reduce( - mul, - [base_frozen] - + [base_frozen.clone() for _ in range(0, int(exponent) - 1)], - ) - else: - exponent_frozen = self.visit_expr(exponent) - frozen_expr = PsMathFunction(MathFunctions.Pow)( - base_frozen, exponent_frozen - ) + expr_frozen = self.visit_expr(base) + + if isinstance(exponent, sp.Rational): + # Decompose rational exponent + num: int = exponent.numerator + denom: int = exponent.denominator - if reciprocal: - one = PsExpression.make(PsConstant(1)) - frozen_expr = one / frozen_expr + if denom <= 2 and abs(num) <= 8: + # At most a square root, and at most eight factors - return frozen_expr + reciprocal = False + + if num < 0: + reciprocal = True + num = -num + + if denom == 2: + expr_frozen = PsMathFunction(MathFunctions.Sqrt)(expr_frozen) + denom = 1 + + assert denom == 1 + + # Pairwise multiplication for logarithmic runtime + factors = [expr_frozen] + [expr_frozen.clone() for _ in range(num - 1)] + while len(factors) > 1: + combined = [x * y for x, y in zip(factors[::2], factors[1::2])] + if len(factors) % 2 == 1: + combined.append(factors[-1]) + factors = combined + + expr_frozen = factors.pop() + + if reciprocal: + one = PsExpression.make(PsConstant(1)) + expr_frozen = one / expr_frozen + + return expr_frozen + + # If we got this far, use pow + exponent_frozen = self.visit_expr(exponent) + expr_frozen = PsMathFunction(MathFunctions.Pow)(expr_frozen, exponent_frozen) + + return expr_frozen def map_Integer(self, expr: sp.Integer) -> PsConstantExpr: value = int(expr) diff --git a/src/pystencils/backend/platforms/cuda.py b/src/pystencils/backend/platforms/cuda.py index f146cfbfd..d9cd9352e 100644 --- a/src/pystencils/backend/platforms/cuda.py +++ b/src/pystencils/backend/platforms/cuda.py @@ -89,6 +89,7 @@ class CudaPlatform(GenericGpu): | MathFunctions.Log | MathFunctions.Sin | MathFunctions.Cos + | MathFunctions.Sqrt | MathFunctions.Ceil | MathFunctions.Floor ) if dtype.width in (16, 32, 64): diff --git a/src/pystencils/backend/platforms/generic_cpu.py b/src/pystencils/backend/platforms/generic_cpu.py index b6d7dd551..fa8e54002 100644 --- a/src/pystencils/backend/platforms/generic_cpu.py +++ b/src/pystencils/backend/platforms/generic_cpu.py @@ -43,7 +43,7 @@ class GenericCpu(Platform): @property def required_headers(self) -> set[str]: - return {"<math.h>"} + return {"<cmath>"} def materialize_iteration_space( self, body: PsBlock, ispace: IterationSpace @@ -78,6 +78,7 @@ class GenericCpu(Platform): | MathFunctions.ATan | MathFunctions.ATan2 | MathFunctions.Pow + | MathFunctions.Sqrt | MathFunctions.Floor | MathFunctions.Ceil ): diff --git a/src/pystencils/backend/platforms/sycl.py b/src/pystencils/backend/platforms/sycl.py index 9c04d6074..92ff68e21 100644 --- a/src/pystencils/backend/platforms/sycl.py +++ b/src/pystencils/backend/platforms/sycl.py @@ -80,6 +80,7 @@ class SyclPlatform(GenericGpu): | MathFunctions.ATan | MathFunctions.ATan2 | MathFunctions.Pow + | MathFunctions.Sqrt | MathFunctions.Floor | MathFunctions.Ceil ): diff --git a/src/pystencils/backend/platforms/x86.py b/src/pystencils/backend/platforms/x86.py index 7d2fe650f..fff143300 100644 --- a/src/pystencils/backend/platforms/x86.py +++ b/src/pystencils/backend/platforms/x86.py @@ -202,6 +202,9 @@ class X86VectorCpu(GenericVectorCpu): opstr = expr.function.func.function_name if vtype.width > 256: raise MaterializationError("512bit ceil/floor require SVML.") + + case MathFunctions.Sqrt if vtype.is_float(): + opstr = expr.function.name case MathFunctions.Min | MathFunctions.Max: opstr = expr.function.func.function_name diff --git a/tests/kernelcreation/test_functions.py b/tests/kernelcreation/test_functions.py index 9b7dd2852..6f82f5aa0 100644 --- a/tests/kernelcreation/test_functions.py +++ b/tests/kernelcreation/test_functions.py @@ -28,6 +28,7 @@ def unary_function(name, xp): "asin": (sp.asin, xp.arcsin), "acos": (sp.acos, xp.arccos), "atan": (sp.atan, xp.arctan), + "sqrt": (sp.sqrt, xp.sqrt), "abs": (sp.Abs, xp.abs), "floor": (sp.floor, xp.floor), "ceil": (sp.ceiling, xp.ceil), @@ -46,6 +47,82 @@ def binary_function(name, xp): AVAIL_TARGETS = Target.available_targets() +@pytest.fixture +def function_domain(function_name, dtype): + eps = 1e-6 + rng = np.random.default_rng() + + match function_name: + case ( + "exp" | "sin" | "cos" | "sinh" | "cosh" | "atan" | "abs" | "floor" | "ceil" + ): + return np.concat( + [ + [0.0, -1.0, 1.0], + rng.uniform(-0.1, 0.1, 5), + rng.uniform(-5.0, 5.0, 8), + rng.uniform(-10, 10, 8), + ] + ).astype(dtype) + case "tan": + return np.concat( + [ + [0.0, -1.0, 1.0], + rng.uniform(-np.pi / 2.0 + eps, np.pi / 2.0 - eps, 13), + ] + ).astype(dtype) + case "asin" | "acos": + return np.concat( + [ + [0.0, 1.0, -1.0], + rng.uniform(-1.0, 1.0, 13), + ] + ).astype(dtype) + case "log" | "sqrt": + return np.concat( + [ + [1.0], + rng.uniform(eps, 0.1, 7), + rng.uniform(eps, 1.0, 8), + rng.uniform(eps, 1e6, 8), + ] + ).astype(dtype) + case "min" | "max" | "atan2": + return np.concat( + [ + rng.uniform(-0.1, 0.1, 8), + rng.uniform(-5.0, 5.0, 8), + rng.uniform(-10, 10, 8), + ] + ).astype(dtype), np.concat( + [ + rng.uniform(-0.1, 0.1, 8), + rng.uniform(-5.0, 5.0, 8), + rng.uniform(-10, 10, 8), + ] + ).astype( + dtype + ) + case "pow": + return np.concat( + [ + [0., 1., 1.], + rng.uniform(-1., 1., 8), + rng.uniform(0., 5., 8), + ] + ).astype(dtype), np.concat( + [ + [1., 0., 2.], + np.arange(2., 10., 1.), + rng.uniform(-2.0, 2.0, 8), + ] + ).astype( + dtype + ) + case _: + assert False, "I don't know the domain of that function" + + @pytest.mark.parametrize( "function_name, target", list( @@ -70,15 +147,15 @@ AVAIL_TARGETS = Target.available_targets() ["floor", "ceil"], [t for t in AVAIL_TARGETS if Target._AVX512 not in t] ) ) - + list(product(["abs"], AVAIL_TARGETS)), + + list(product(["sqrt", "abs"], AVAIL_TARGETS)), ) @pytest.mark.parametrize("dtype", (np.float32, np.float64)) -def test_unary_functions(gen_config, xp, function_name, dtype): +def test_unary_functions(gen_config, xp, function_name, dtype, function_domain): sp_func, xp_func = unary_function(function_name, xp) resolution = np.finfo(dtype).resolution # Array size should be larger than eight, such that vectorized kernels don't just run their remainder loop - inp = xp.array([0.1, 0.2, 0.0, -0.8, -1.6, -12.592, xp.pi, xp.e, -0.3], dtype=dtype) + inp = xp.array(function_domain) outp = xp.zeros_like(inp) reference = xp_func(inp) @@ -104,15 +181,12 @@ def test_unary_functions(gen_config, xp, function_name, dtype): ), ) @pytest.mark.parametrize("dtype", (np.float32, np.float64)) -def test_binary_functions(gen_config, xp, function_name, dtype): +def test_binary_functions(gen_config, xp, function_name, dtype, function_domain): sp_func, xp_func = binary_function(function_name, xp) resolution: dtype = np.finfo(dtype).resolution - inp = xp.array([0.1, 0.2, 0.3, -0.8, -1.6, -12.592, xp.pi, xp.e, 0.0], dtype=dtype) - inp2 = xp.array( - [3.1, -0.5, 21.409, 11.0, 1.0, -14e3, 2.0 * xp.pi, -xp.e, 0.0], - dtype=dtype, - ) + inp = xp.array(function_domain[0]) + inp2 = xp.array(function_domain[1]) outp = xp.zeros_like(inp) reference = xp_func(inp, inp2) diff --git a/tests/kernelcreation/test_staggered_kernel.py b/tests/kernelcreation/test_staggered_kernel.py index 9bc9e71af..99b61d07f 100644 --- a/tests/kernelcreation/test_staggered_kernel.py +++ b/tests/kernelcreation/test_staggered_kernel.py @@ -5,7 +5,7 @@ import pytest import pystencils as ps from pystencils import x_staggered_vector, TypedSymbol -from pystencils.enums import Target +from pystencils import Target class TestStaggeredDiffusion: diff --git a/tests/nbackend/kernelcreation/test_freeze.py b/tests/nbackend/kernelcreation/test_freeze.py index ce4f61785..fccec7114 100644 --- a/tests/nbackend/kernelcreation/test_freeze.py +++ b/tests/nbackend/kernelcreation/test_freeze.py @@ -113,14 +113,8 @@ def test_freeze_fields(): zero = PsExpression.make(PsConstant(0)) - lhs = PsBufferAcc( - f_arr.base_pointer, - (PsExpression.make(counter) + zero, zero) - ) - rhs = PsBufferAcc( - g_arr.base_pointer, - (PsExpression.make(counter) + zero, zero) - ) + lhs = PsBufferAcc(f_arr.base_pointer, (PsExpression.make(counter) + zero, zero)) + rhs = PsBufferAcc(g_arr.base_pointer, (PsExpression.make(counter) + zero, zero)) should = PsAssignment(lhs, rhs) @@ -357,6 +351,80 @@ def test_add_sub(): assert expr.structurally_equal(PsAdd(x2, PsMul(minus_two, y2))) +def test_powers(): + ctx = KernelCreationContext() + freeze = FreezeExpressions(ctx) + + x, y, z = sp.symbols("x, y, z") + + x2 = PsExpression.make(ctx.get_symbol("x")) + y2 = PsExpression.make(ctx.get_symbol("y")) + + # Integer powers + expr = freeze(x**2) + assert expr.structurally_equal(x2 * x2) + + expr = freeze(x**3) + assert expr.structurally_equal(x2 * x2 * x2) + + expr = freeze(x**4) + assert expr.structurally_equal((x2 * x2) * (x2 * x2)) + + expr = freeze(x**5) + assert expr.structurally_equal((x2 * x2) * (x2 * x2) * x2) + + # Negative integer powers + one = PsExpression.make(PsConstant(1)) + + expr = freeze(x**-2) + assert expr.structurally_equal(one / (x2 * x2)) + + expr = freeze(x**-3) + assert expr.structurally_equal(one / (x2 * x2 * x2)) + + expr = freeze(x**-4) + assert expr.structurally_equal(one / ((x2 * x2) * (x2 * x2))) + + expr = freeze(x**-5) + assert expr.structurally_equal(one / ((x2 * x2) * (x2 * x2) * x2)) + + # Integer powers of the square root + sqrt = PsMathFunction(MathFunctions.Sqrt) + + expr = freeze(x ** sp.Rational(1, 2)) + assert expr.structurally_equal(sqrt(x2)) + + expr = freeze(x ** sp.Rational(2, 2)) + assert expr.structurally_equal(x2) + + expr = freeze(x ** sp.Rational(3, 2)) + assert expr.structurally_equal(sqrt(x2) * sqrt(x2) * sqrt(x2)) + + expr = freeze(x ** sp.Rational(4, 2)) + assert expr.structurally_equal(x2 * x2) + + expr = freeze(x ** sp.Rational(5, 2)) + assert expr.structurally_equal( + (sqrt(x2) * sqrt(x2)) * (sqrt(x2) * sqrt(x2)) * sqrt(x2) + ) + + # Negative integer powers of sqrt + expr = freeze(x ** sp.Rational(-1, 2)) + assert expr.structurally_equal(one / sqrt(x2)) + + expr = freeze(x ** sp.Rational(-3, 2)) + assert expr.structurally_equal(one / (sqrt(x2) * sqrt(x2) * sqrt(x2))) + + # Cube root + pow = PsMathFunction(MathFunctions.Pow) + expr = freeze(x ** sp.Rational(1, 3)) + assert expr.structurally_equal(pow(x2, freeze(sp.Rational(1, 3)))) + + # Unknown exponent + expr = freeze(x**y) + assert expr.structurally_equal(pow(x2, y2)) + + def test_tuple_array_literals(): ctx = KernelCreationContext() freeze = FreezeExpressions(ctx) diff --git a/tests/runtime/test_boundary.py b/tests/runtime/test_boundary.py index a94d37820..fb8f827e8 100644 --- a/tests/runtime/test_boundary.py +++ b/tests/runtime/test_boundary.py @@ -8,7 +8,7 @@ import pystencils from pystencils import Assignment, create_kernel from pystencils.boundaries import BoundaryHandling, Dirichlet, Neumann, add_neumann_boundary from pystencils.datahandling import SerialDataHandling -from pystencils.enums import Target +from pystencils import Target from pystencils.slicing import slice_from_direction from pystencils.timeloop import TimeLoop diff --git a/tests/runtime/test_data/.gitignore b/tests/runtime/test_data/.gitignore new file mode 100644 index 000000000..278bd1136 --- /dev/null +++ b/tests/runtime/test_data/.gitignore @@ -0,0 +1 @@ +datahandling_save_test* \ No newline at end of file diff --git a/tests/runtime/test_data/datahandling_parallel_save_test/dst.dat b/tests/runtime/test_data/datahandling_parallel_save_test/dst.dat deleted file mode 100644 index 204552486f77c485a4dd333a10eff82f9d44aa9f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 304 YcmWe&fB*$XC<9D=u!rzQY4mUa0M%S7&Hw-a diff --git a/tests/runtime/test_data/datahandling_parallel_save_test/src.dat b/tests/runtime/test_data/datahandling_parallel_save_test/src.dat deleted file mode 100644 index 204552486f77c485a4dd333a10eff82f9d44aa9f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 304 YcmWe&fB*$XC<9D=u!rzQY4mUa0M%S7&Hw-a diff --git a/tests/runtime/test_data/datahandling_save_test.npz b/tests/runtime/test_data/datahandling_save_test.npz deleted file mode 100644 index 22202358a4fa1d1cea4db89c0889f5bca636598b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 428 zcmWIWW@Zs#U|`??Vnv4TVm_%@Ad7*Ofq|VtgrT@7Sud}kl953GECiAPO9ScIZ^U0o z3!FR=a4cZ$yh%}WVwU7BU6409ZQ;7b3+7FW4+)wwLwtVxlu2Ad{F++6R|5m|&w1#0 zgl-SIU~n-Ih&Eig61XB%LAb!M;UN3(z$z0Nx621J>=G6f+c7W%;B{3>amfI=ijhf# q8CPh50tW&b7(p~N>;k-r)ie#L3F@@~Z&o&t8B9Q!1*CPrCISFllx#cz diff --git a/tests/runtime/test_datahandling.py b/tests/runtime/test_datahandling.py index 29e639c88..62ba64056 100644 --- a/tests/runtime/test_datahandling.py +++ b/tests/runtime/test_datahandling.py @@ -7,7 +7,7 @@ import numpy as np import pystencils as ps from pystencils import create_data_handling, create_kernel from pystencils.gpu.gpu_array_handler import GPUArrayHandler -from pystencils.enums import Target +from pystencils import Target try: import pytest -- GitLab From ecbbdf72fe3338c223124453d01bd2fb92864fa6 Mon Sep 17 00:00:00 2001 From: Frederik Hennig <frederik.hennig@fau.de> Date: Wed, 29 Jan 2025 10:35:44 +0100 Subject: [PATCH 2/2] use np.concatenate --- tests/kernelcreation/test_functions.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/kernelcreation/test_functions.py b/tests/kernelcreation/test_functions.py index 6f82f5aa0..a4d154d4b 100644 --- a/tests/kernelcreation/test_functions.py +++ b/tests/kernelcreation/test_functions.py @@ -56,7 +56,7 @@ def function_domain(function_name, dtype): case ( "exp" | "sin" | "cos" | "sinh" | "cosh" | "atan" | "abs" | "floor" | "ceil" ): - return np.concat( + return np.concatenate( [ [0.0, -1.0, 1.0], rng.uniform(-0.1, 0.1, 5), @@ -65,21 +65,21 @@ def function_domain(function_name, dtype): ] ).astype(dtype) case "tan": - return np.concat( + return np.concatenate( [ [0.0, -1.0, 1.0], rng.uniform(-np.pi / 2.0 + eps, np.pi / 2.0 - eps, 13), ] ).astype(dtype) case "asin" | "acos": - return np.concat( + return np.concatenate( [ [0.0, 1.0, -1.0], rng.uniform(-1.0, 1.0, 13), ] ).astype(dtype) case "log" | "sqrt": - return np.concat( + return np.concatenate( [ [1.0], rng.uniform(eps, 0.1, 7), @@ -88,13 +88,13 @@ def function_domain(function_name, dtype): ] ).astype(dtype) case "min" | "max" | "atan2": - return np.concat( + return np.concatenate( [ rng.uniform(-0.1, 0.1, 8), rng.uniform(-5.0, 5.0, 8), rng.uniform(-10, 10, 8), ] - ).astype(dtype), np.concat( + ).astype(dtype), np.concatenate( [ rng.uniform(-0.1, 0.1, 8), rng.uniform(-5.0, 5.0, 8), @@ -104,13 +104,13 @@ def function_domain(function_name, dtype): dtype ) case "pow": - return np.concat( + return np.concatenate( [ [0., 1., 1.], rng.uniform(-1., 1., 8), rng.uniform(0., 5., 8), ] - ).astype(dtype), np.concat( + ).astype(dtype), np.concatenate( [ [1., 0., 2.], np.arange(2., 10., 1.), -- GitLab