Welford algorithm extension

b76ac202 · Helen Schottenhamml · Markus Holzer · 9b29a46d · b76ac202 · b76ac202
Commit b76ac202 authored 9 months ago by Helen Schottenhamml Committed by Markus Holzer 9 months ago
--- a/src/lbmpy/flow_statistics.py
+++ b/src/lbmpy/flow_statistics.py
@@ -4,17 +4,18 @@ import pystencils as ps
 from pystencils.field import Field


-def welford_assignments(field, mean_field, sum_of_products_field=None):
+def welford_assignments(field, mean_field, sum_of_squares_field=None, sum_of_cubes_field=None):
    r"""
    Creates the assignments for the kernel creation of Welford's algorithm
    (https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm).
    This algorithm is an online algorithm for the mean and variance calculation of sample data, here implemented for
-    the execution on vector fields, e.g., velocity.
-    The calculation of the variance is optional and only performed if a field for the sum of products is given.
+    the execution on scalar or vector fields, e.g., velocity.
+    The calculation of the variance / the third-order central moments is optional and only performed if a field for
+    the sum of squares / sum of cubes is given.

    The mean value is directly updated in the mean vector field.
-    The variance must be retrieved in a post-processing step. Let :math `M_{2,n}` denote the value of the sum of
-    products after the first :math `n` samples. According to Welford the biased sample variance
+    The variance / covariance must be retrieved in a post-processing step. Let :math `M_{2,n}` denote the value of the
+    sum of squares after the first :math `n` samples. According to Welford the biased sample variance
    :math `\sigma_n^2` and the unbiased sample variance :math `s_n^2` are given by

    .. math ::
@@ -22,6 +23,9 @@ def welford_assignments(field, mean_field, sum_of_products_field=None):
        s_n^2 = \frac{M_{2,n}}{n-1},

    respectively.
+
+    Liekwise, to the 3rd-order central moment(s) of the (vector) field, the sum of cubes field must be divided
+    by :math `n` in a post-processing step.
    """

    if isinstance(field, Field):
@@ -40,22 +44,41 @@ def welford_assignments(field, mean_field, sum_of_products_field=None):
    else:
        raise ValueError("Mean vector field has to be a pystencils Field or Field.Access")

-    if sum_of_products_field is None:
-        # sum of products will not be calculated, i.e., the variance is not retrievable
-        welford_sum_of_products_field = None
+    if sum_of_squares_field is None:
+        # sum of products will not be calculated, i.e., the covariance matrix is not retrievable
+        welford_sum_of_squares_field = None
+    else:
+        if isinstance(sum_of_squares_field, Field):
+            welford_sum_of_squares_field = sum_of_squares_field.center
+        elif isinstance(sum_of_squares_field, Field.Access):
+            welford_sum_of_squares_field = sum_of_squares_field
+        else:
+            raise ValueError("Sum of squares field has to be a pystencils Field or Field.Access")
+
+        assert welford_sum_of_squares_field.field.values_per_cell() == dim ** 2, \
+            (f"Sum of squares field does not have the right layout. "
+             f"Index dimension: {welford_sum_of_squares_field.field.values_per_cell()}, expected: {dim ** 2}")
+
+    if sum_of_cubes_field is None:
+        # sum of cubes will not be calculated, i.e., the 3rd-order central moments are not retrievable
+        welford_sum_of_cubes_field = None
    else:
-        if isinstance(sum_of_products_field, Field):
-            welford_sum_of_products_field = sum_of_products_field.center
-            assert sum_of_products_field.values_per_cell() == dim**2, \
-                (f"Sum of products field does not have the right layout. "
-                 f"Index dimension: {sum_of_products_field.values_per_cell()}, expected: {dim**2}")
-        elif isinstance(sum_of_products_field, Field.Access):
-            welford_sum_of_products_field = sum_of_products_field
-            assert sum_of_products_field.field.values_per_cell() == dim**2, \
-                (f"Sum of products field does not have the right layout. "
-                 f"Index dimension: {sum_of_products_field.field.values_per_cell()}, expected: {dim**2}")
+        if isinstance(sum_of_cubes_field, Field):
+            welford_sum_of_cubes_field = sum_of_cubes_field.center
+        elif isinstance(sum_of_cubes_field, Field.Access):
+            welford_sum_of_cubes_field = sum_of_cubes_field
        else:
-            raise ValueError("Variance vector field has to be a pystencils Field or Field.Access")
+            raise ValueError("Sum of cubes field has to be a pystencils Field or Field.Access")
+
+        assert welford_sum_of_cubes_field.field.values_per_cell() == dim ** 3, \
+            (f"Sum of cubes field does not have the right layout. "
+             f"Index dimension: {welford_sum_of_cubes_field.field.values_per_cell()}, expected: {dim ** 3}")
+
+    # for the calculation of the thrid-order moments, the variance must also be calculated
+    if welford_sum_of_cubes_field is not None:
+        assert welford_sum_of_squares_field is not None
+
+    # actual assignments

    counter = sp.Symbol('counter')
    delta = sp.symbols(f"delta_:{dim}")
@@ -67,7 +90,7 @@ def welford_assignments(field, mean_field, sum_of_products_field=None):
        main_assignments.append(ps.Assignment(welford_mean_field.at_index(i),
                                              welford_mean_field.at_index(i) + delta[i] / counter))

-    if sum_of_products_field is not None:
+    if sum_of_squares_field is not None:
        delta2 = sp.symbols(f"delta2_:{dim}")
        for i in range(dim):
            main_assignments.append(
@@ -76,7 +99,21 @@ def welford_assignments(field, mean_field, sum_of_products_field=None):
            for j in range(dim):
                idx = i * dim + j
                main_assignments.append(ps.Assignment(
-                    welford_sum_of_products_field.at_index(idx),
-                    welford_sum_of_products_field.at_index(idx) + delta[i] * delta2[j]))
+                    welford_sum_of_squares_field.at_index(idx),
+                    welford_sum_of_squares_field.at_index(idx) + delta[i] * delta2[j]))
+
+        if sum_of_cubes_field is not None:
+            for i in range(dim):
+                for j in range(dim):
+                    for k in range(dim):
+                        idx = (i * dim + j) * dim + k
+                        main_assignments.append(ps.Assignment(
+                            welford_sum_of_cubes_field.at_index(idx),
+                            welford_sum_of_cubes_field.at_index(idx)
+                            - delta[k] / counter * welford_sum_of_squares_field(i * dim + j)
+                            - delta[j] / counter * welford_sum_of_squares_field(k * dim + i)
+                            - delta[i] / counter * welford_sum_of_squares_field(j * dim + k)
+                            + delta2[i] * (2 * delta[j] - delta2[j]) * delta[k]
+                        ))

    return main_assignments
--- a/tests/test_welford.py
+++ b/tests/test_welford.py
+import pytest
+import numpy as np
+import pystencils as ps
+from lbmpy.flow_statistics import welford_assignments
+
+
+@pytest.mark.parametrize('order', [1, 2, 3])
+@pytest.mark.parametrize('dim', [1, 2, 3])
+def test_welford(order, dim):
+
+    target = ps.Target.CPU
+
+    # create data handling and fields
+    dh = ps.create_data_handling((1, 1, 1), periodicity=False, default_target=target)
+
+    vector_field = dh.add_array('vector', values_per_cell=dim)
+    dh.fill(vector_field.name, 0.0, ghost_layers=False)
+
+    mean_field = dh.add_array('mean', values_per_cell=dim)
+    dh.fill(mean_field.name, 0.0, ghost_layers=False)
+
+    if order >= 2:
+        sos = dh.add_array('sos', values_per_cell=dim**2)
+        dh.fill(sos.name, 0.0, ghost_layers=False)
+
+        if order == 3:
+            soc = dh.add_array('soc', values_per_cell=dim**3)
+            dh.fill(soc.name, 0.0, ghost_layers=False)
+        else:
+            soc = None
+    else:
+        sos = None
+        soc = None
+
+    welford = welford_assignments(field=vector_field, mean_field=mean_field,
+                                  sum_of_squares_field=sos, sum_of_cubes_field=soc)
+
+    welford_kernel = ps.create_kernel(welford).compile()
+
+    # set random seed
+    np.random.seed(42)
+    n = int(1e4)
+    x = np.random.normal(size=n * dim).reshape(n, dim)
+
+    analytical_mean = np.zeros(dim)
+    analytical_covariance = np.zeros(dim**2)
+    analytical_third_order_moments = np.zeros(dim**3)
+
+    # calculate analytical mean
+    for i in range(dim):
+        analytical_mean[i] = np.mean(x[:, i])
+
+    # calculate analytical covariances
+    for i in range(dim):
+        for j in range(dim):
+            analytical_covariance[i * dim + j] \
+                = (np.sum((x[:, i] - analytical_mean[i]) * (x[:, j] - analytical_mean[j]))) / n
+
+    # calculate analytical third-order central moments
+    for i in range(dim):
+        for j in range(dim):
+            for k in range(dim):
+                analytical_third_order_moments[(i * dim + j) * dim + k] \
+                    = (np.sum((x[:, i] - analytical_mean[i])
+                              * (x[:, j] - analytical_mean[j])
+                              * (x[:, k] - analytical_mean[k]))) / n
+
+    # Time loop
+    counter = 1
+    for i in range(n):
+        for d in range(dim):
+            new_value = x[i, d]
+            if dim > 1:
+                dh.fill(array_name=vector_field.name, val=new_value, value_idx=d, ghost_layers=False)
+            else:
+                dh.fill(array_name=vector_field.name, val=new_value, ghost_layers=False)
+        dh.run_kernel(welford_kernel, counter=counter)
+        counter += 1
+
+    welford_mean = dh.gather_array(mean_field.name).flatten()
+    np.testing.assert_allclose(welford_mean, analytical_mean)
+
+    if order >= 2:
+        welford_covariance = dh.gather_array(sos.name).flatten() / n
+        np.testing.assert_allclose(welford_covariance, analytical_covariance)
+        if order == 3:
+            welford_third_order_moments = dh.gather_array(soc.name).flatten() / n
+            np.testing.assert_allclose(welford_third_order_moments, analytical_third_order_moments)
+
+
+if __name__ == '__main__':
+    test_welford(1, 1)