From b76ac202b0b6add2a2e3a52fcb438f480314b080 Mon Sep 17 00:00:00 2001
From: Helen Schottenhamml <helen.schottenhamml@fau.de>
Date: Tue, 17 Sep 2024 20:45:51 +0200
Subject: [PATCH] Welford algorithm extension

---
 src/lbmpy/flow_statistics.py | 81 ++++++++++++++++++++++---------
 tests/test_welford.py        | 92 ++++++++++++++++++++++++++++++++++++
 2 files changed, 151 insertions(+), 22 deletions(-)
 create mode 100644 tests/test_welford.py

diff --git a/src/lbmpy/flow_statistics.py b/src/lbmpy/flow_statistics.py
index 3e5b5f5f..d4c7e308 100644
--- a/src/lbmpy/flow_statistics.py
+++ b/src/lbmpy/flow_statistics.py
@@ -4,17 +4,18 @@ import pystencils as ps
 from pystencils.field import Field
 
 
-def welford_assignments(field, mean_field, sum_of_products_field=None):
+def welford_assignments(field, mean_field, sum_of_squares_field=None, sum_of_cubes_field=None):
     r"""
     Creates the assignments for the kernel creation of Welford's algorithm
     (https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm).
     This algorithm is an online algorithm for the mean and variance calculation of sample data, here implemented for
-    the execution on vector fields, e.g., velocity.
-    The calculation of the variance is optional and only performed if a field for the sum of products is given.
+    the execution on scalar or vector fields, e.g., velocity.
+    The calculation of the variance / the third-order central moments is optional and only performed if a field for
+    the sum of squares / sum of cubes is given.
 
     The mean value is directly updated in the mean vector field.
-    The variance must be retrieved in a post-processing step. Let :math `M_{2,n}` denote the value of the sum of
-    products after the first :math `n` samples. According to Welford the biased sample variance
+    The variance / covariance must be retrieved in a post-processing step. Let :math `M_{2,n}` denote the value of the
+    sum of squares after the first :math `n` samples. According to Welford the biased sample variance
     :math `\sigma_n^2` and the unbiased sample variance :math `s_n^2` are given by
 
     .. math ::
@@ -22,6 +23,9 @@ def welford_assignments(field, mean_field, sum_of_products_field=None):
         s_n^2 = \frac{M_{2,n}}{n-1},
 
     respectively.
+
+    Liekwise, to the 3rd-order central moment(s) of the (vector) field, the sum of cubes field must be divided
+    by :math `n` in a post-processing step.
     """
 
     if isinstance(field, Field):
@@ -40,22 +44,41 @@ def welford_assignments(field, mean_field, sum_of_products_field=None):
     else:
         raise ValueError("Mean vector field has to be a pystencils Field or Field.Access")
 
-    if sum_of_products_field is None:
-        # sum of products will not be calculated, i.e., the variance is not retrievable
-        welford_sum_of_products_field = None
+    if sum_of_squares_field is None:
+        # sum of products will not be calculated, i.e., the covariance matrix is not retrievable
+        welford_sum_of_squares_field = None
+    else:
+        if isinstance(sum_of_squares_field, Field):
+            welford_sum_of_squares_field = sum_of_squares_field.center
+        elif isinstance(sum_of_squares_field, Field.Access):
+            welford_sum_of_squares_field = sum_of_squares_field
+        else:
+            raise ValueError("Sum of squares field has to be a pystencils Field or Field.Access")
+
+        assert welford_sum_of_squares_field.field.values_per_cell() == dim ** 2, \
+            (f"Sum of squares field does not have the right layout. "
+             f"Index dimension: {welford_sum_of_squares_field.field.values_per_cell()}, expected: {dim ** 2}")
+
+    if sum_of_cubes_field is None:
+        # sum of cubes will not be calculated, i.e., the 3rd-order central moments are not retrievable
+        welford_sum_of_cubes_field = None
     else:
-        if isinstance(sum_of_products_field, Field):
-            welford_sum_of_products_field = sum_of_products_field.center
-            assert sum_of_products_field.values_per_cell() == dim**2, \
-                (f"Sum of products field does not have the right layout. "
-                 f"Index dimension: {sum_of_products_field.values_per_cell()}, expected: {dim**2}")
-        elif isinstance(sum_of_products_field, Field.Access):
-            welford_sum_of_products_field = sum_of_products_field
-            assert sum_of_products_field.field.values_per_cell() == dim**2, \
-                (f"Sum of products field does not have the right layout. "
-                 f"Index dimension: {sum_of_products_field.field.values_per_cell()}, expected: {dim**2}")
+        if isinstance(sum_of_cubes_field, Field):
+            welford_sum_of_cubes_field = sum_of_cubes_field.center
+        elif isinstance(sum_of_cubes_field, Field.Access):
+            welford_sum_of_cubes_field = sum_of_cubes_field
         else:
-            raise ValueError("Variance vector field has to be a pystencils Field or Field.Access")
+            raise ValueError("Sum of cubes field has to be a pystencils Field or Field.Access")
+
+        assert welford_sum_of_cubes_field.field.values_per_cell() == dim ** 3, \
+            (f"Sum of cubes field does not have the right layout. "
+             f"Index dimension: {welford_sum_of_cubes_field.field.values_per_cell()}, expected: {dim ** 3}")
+
+    # for the calculation of the thrid-order moments, the variance must also be calculated
+    if welford_sum_of_cubes_field is not None:
+        assert welford_sum_of_squares_field is not None
+
+    # actual assignments
 
     counter = sp.Symbol('counter')
     delta = sp.symbols(f"delta_:{dim}")
@@ -67,7 +90,7 @@ def welford_assignments(field, mean_field, sum_of_products_field=None):
         main_assignments.append(ps.Assignment(welford_mean_field.at_index(i),
                                               welford_mean_field.at_index(i) + delta[i] / counter))
 
-    if sum_of_products_field is not None:
+    if sum_of_squares_field is not None:
         delta2 = sp.symbols(f"delta2_:{dim}")
         for i in range(dim):
             main_assignments.append(
@@ -76,7 +99,21 @@ def welford_assignments(field, mean_field, sum_of_products_field=None):
             for j in range(dim):
                 idx = i * dim + j
                 main_assignments.append(ps.Assignment(
-                    welford_sum_of_products_field.at_index(idx),
-                    welford_sum_of_products_field.at_index(idx) + delta[i] * delta2[j]))
+                    welford_sum_of_squares_field.at_index(idx),
+                    welford_sum_of_squares_field.at_index(idx) + delta[i] * delta2[j]))
+
+        if sum_of_cubes_field is not None:
+            for i in range(dim):
+                for j in range(dim):
+                    for k in range(dim):
+                        idx = (i * dim + j) * dim + k
+                        main_assignments.append(ps.Assignment(
+                            welford_sum_of_cubes_field.at_index(idx),
+                            welford_sum_of_cubes_field.at_index(idx)
+                            - delta[k] / counter * welford_sum_of_squares_field(i * dim + j)
+                            - delta[j] / counter * welford_sum_of_squares_field(k * dim + i)
+                            - delta[i] / counter * welford_sum_of_squares_field(j * dim + k)
+                            + delta2[i] * (2 * delta[j] - delta2[j]) * delta[k]
+                        ))
 
     return main_assignments
diff --git a/tests/test_welford.py b/tests/test_welford.py
new file mode 100644
index 00000000..f374c727
--- /dev/null
+++ b/tests/test_welford.py
@@ -0,0 +1,92 @@
+import pytest
+import numpy as np
+import pystencils as ps
+from lbmpy.flow_statistics import welford_assignments
+
+
+@pytest.mark.parametrize('order', [1, 2, 3])
+@pytest.mark.parametrize('dim', [1, 2, 3])
+def test_welford(order, dim):
+
+    target = ps.Target.CPU
+
+    # create data handling and fields
+    dh = ps.create_data_handling((1, 1, 1), periodicity=False, default_target=target)
+
+    vector_field = dh.add_array('vector', values_per_cell=dim)
+    dh.fill(vector_field.name, 0.0, ghost_layers=False)
+
+    mean_field = dh.add_array('mean', values_per_cell=dim)
+    dh.fill(mean_field.name, 0.0, ghost_layers=False)
+
+    if order >= 2:
+        sos = dh.add_array('sos', values_per_cell=dim**2)
+        dh.fill(sos.name, 0.0, ghost_layers=False)
+
+        if order == 3:
+            soc = dh.add_array('soc', values_per_cell=dim**3)
+            dh.fill(soc.name, 0.0, ghost_layers=False)
+        else:
+            soc = None
+    else:
+        sos = None
+        soc = None
+
+    welford = welford_assignments(field=vector_field, mean_field=mean_field,
+                                  sum_of_squares_field=sos, sum_of_cubes_field=soc)
+
+    welford_kernel = ps.create_kernel(welford).compile()
+
+    # set random seed
+    np.random.seed(42)
+    n = int(1e4)
+    x = np.random.normal(size=n * dim).reshape(n, dim)
+
+    analytical_mean = np.zeros(dim)
+    analytical_covariance = np.zeros(dim**2)
+    analytical_third_order_moments = np.zeros(dim**3)
+
+    # calculate analytical mean
+    for i in range(dim):
+        analytical_mean[i] = np.mean(x[:, i])
+
+    # calculate analytical covariances
+    for i in range(dim):
+        for j in range(dim):
+            analytical_covariance[i * dim + j] \
+                = (np.sum((x[:, i] - analytical_mean[i]) * (x[:, j] - analytical_mean[j]))) / n
+
+    # calculate analytical third-order central moments
+    for i in range(dim):
+        for j in range(dim):
+            for k in range(dim):
+                analytical_third_order_moments[(i * dim + j) * dim + k] \
+                    = (np.sum((x[:, i] - analytical_mean[i])
+                              * (x[:, j] - analytical_mean[j])
+                              * (x[:, k] - analytical_mean[k]))) / n
+
+    # Time loop
+    counter = 1
+    for i in range(n):
+        for d in range(dim):
+            new_value = x[i, d]
+            if dim > 1:
+                dh.fill(array_name=vector_field.name, val=new_value, value_idx=d, ghost_layers=False)
+            else:
+                dh.fill(array_name=vector_field.name, val=new_value, ghost_layers=False)
+        dh.run_kernel(welford_kernel, counter=counter)
+        counter += 1
+
+    welford_mean = dh.gather_array(mean_field.name).flatten()
+    np.testing.assert_allclose(welford_mean, analytical_mean)
+
+    if order >= 2:
+        welford_covariance = dh.gather_array(sos.name).flatten() / n
+        np.testing.assert_allclose(welford_covariance, analytical_covariance)
+        if order == 3:
+            welford_third_order_moments = dh.gather_array(soc.name).flatten() / n
+            np.testing.assert_allclose(welford_third_order_moments, analytical_third_order_moments)
+
+
+if __name__ == '__main__':
+    test_welford(1, 1)
-- 
GitLab