From b76ac202b0b6add2a2e3a52fcb438f480314b080 Mon Sep 17 00:00:00 2001 From: Helen Schottenhamml <helen.schottenhamml@fau.de> Date: Tue, 17 Sep 2024 20:45:51 +0200 Subject: [PATCH] Welford algorithm extension --- src/lbmpy/flow_statistics.py | 81 ++++++++++++++++++++++--------- tests/test_welford.py | 92 ++++++++++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+), 22 deletions(-) create mode 100644 tests/test_welford.py diff --git a/src/lbmpy/flow_statistics.py b/src/lbmpy/flow_statistics.py index 3e5b5f5f..d4c7e308 100644 --- a/src/lbmpy/flow_statistics.py +++ b/src/lbmpy/flow_statistics.py @@ -4,17 +4,18 @@ import pystencils as ps from pystencils.field import Field -def welford_assignments(field, mean_field, sum_of_products_field=None): +def welford_assignments(field, mean_field, sum_of_squares_field=None, sum_of_cubes_field=None): r""" Creates the assignments for the kernel creation of Welford's algorithm (https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm). This algorithm is an online algorithm for the mean and variance calculation of sample data, here implemented for - the execution on vector fields, e.g., velocity. - The calculation of the variance is optional and only performed if a field for the sum of products is given. + the execution on scalar or vector fields, e.g., velocity. + The calculation of the variance / the third-order central moments is optional and only performed if a field for + the sum of squares / sum of cubes is given. The mean value is directly updated in the mean vector field. - The variance must be retrieved in a post-processing step. Let :math `M_{2,n}` denote the value of the sum of - products after the first :math `n` samples. According to Welford the biased sample variance + The variance / covariance must be retrieved in a post-processing step. Let :math `M_{2,n}` denote the value of the + sum of squares after the first :math `n` samples. According to Welford the biased sample variance :math `\sigma_n^2` and the unbiased sample variance :math `s_n^2` are given by .. math :: @@ -22,6 +23,9 @@ def welford_assignments(field, mean_field, sum_of_products_field=None): s_n^2 = \frac{M_{2,n}}{n-1}, respectively. + + Liekwise, to the 3rd-order central moment(s) of the (vector) field, the sum of cubes field must be divided + by :math `n` in a post-processing step. """ if isinstance(field, Field): @@ -40,22 +44,41 @@ def welford_assignments(field, mean_field, sum_of_products_field=None): else: raise ValueError("Mean vector field has to be a pystencils Field or Field.Access") - if sum_of_products_field is None: - # sum of products will not be calculated, i.e., the variance is not retrievable - welford_sum_of_products_field = None + if sum_of_squares_field is None: + # sum of products will not be calculated, i.e., the covariance matrix is not retrievable + welford_sum_of_squares_field = None + else: + if isinstance(sum_of_squares_field, Field): + welford_sum_of_squares_field = sum_of_squares_field.center + elif isinstance(sum_of_squares_field, Field.Access): + welford_sum_of_squares_field = sum_of_squares_field + else: + raise ValueError("Sum of squares field has to be a pystencils Field or Field.Access") + + assert welford_sum_of_squares_field.field.values_per_cell() == dim ** 2, \ + (f"Sum of squares field does not have the right layout. " + f"Index dimension: {welford_sum_of_squares_field.field.values_per_cell()}, expected: {dim ** 2}") + + if sum_of_cubes_field is None: + # sum of cubes will not be calculated, i.e., the 3rd-order central moments are not retrievable + welford_sum_of_cubes_field = None else: - if isinstance(sum_of_products_field, Field): - welford_sum_of_products_field = sum_of_products_field.center - assert sum_of_products_field.values_per_cell() == dim**2, \ - (f"Sum of products field does not have the right layout. " - f"Index dimension: {sum_of_products_field.values_per_cell()}, expected: {dim**2}") - elif isinstance(sum_of_products_field, Field.Access): - welford_sum_of_products_field = sum_of_products_field - assert sum_of_products_field.field.values_per_cell() == dim**2, \ - (f"Sum of products field does not have the right layout. " - f"Index dimension: {sum_of_products_field.field.values_per_cell()}, expected: {dim**2}") + if isinstance(sum_of_cubes_field, Field): + welford_sum_of_cubes_field = sum_of_cubes_field.center + elif isinstance(sum_of_cubes_field, Field.Access): + welford_sum_of_cubes_field = sum_of_cubes_field else: - raise ValueError("Variance vector field has to be a pystencils Field or Field.Access") + raise ValueError("Sum of cubes field has to be a pystencils Field or Field.Access") + + assert welford_sum_of_cubes_field.field.values_per_cell() == dim ** 3, \ + (f"Sum of cubes field does not have the right layout. " + f"Index dimension: {welford_sum_of_cubes_field.field.values_per_cell()}, expected: {dim ** 3}") + + # for the calculation of the thrid-order moments, the variance must also be calculated + if welford_sum_of_cubes_field is not None: + assert welford_sum_of_squares_field is not None + + # actual assignments counter = sp.Symbol('counter') delta = sp.symbols(f"delta_:{dim}") @@ -67,7 +90,7 @@ def welford_assignments(field, mean_field, sum_of_products_field=None): main_assignments.append(ps.Assignment(welford_mean_field.at_index(i), welford_mean_field.at_index(i) + delta[i] / counter)) - if sum_of_products_field is not None: + if sum_of_squares_field is not None: delta2 = sp.symbols(f"delta2_:{dim}") for i in range(dim): main_assignments.append( @@ -76,7 +99,21 @@ def welford_assignments(field, mean_field, sum_of_products_field=None): for j in range(dim): idx = i * dim + j main_assignments.append(ps.Assignment( - welford_sum_of_products_field.at_index(idx), - welford_sum_of_products_field.at_index(idx) + delta[i] * delta2[j])) + welford_sum_of_squares_field.at_index(idx), + welford_sum_of_squares_field.at_index(idx) + delta[i] * delta2[j])) + + if sum_of_cubes_field is not None: + for i in range(dim): + for j in range(dim): + for k in range(dim): + idx = (i * dim + j) * dim + k + main_assignments.append(ps.Assignment( + welford_sum_of_cubes_field.at_index(idx), + welford_sum_of_cubes_field.at_index(idx) + - delta[k] / counter * welford_sum_of_squares_field(i * dim + j) + - delta[j] / counter * welford_sum_of_squares_field(k * dim + i) + - delta[i] / counter * welford_sum_of_squares_field(j * dim + k) + + delta2[i] * (2 * delta[j] - delta2[j]) * delta[k] + )) return main_assignments diff --git a/tests/test_welford.py b/tests/test_welford.py new file mode 100644 index 00000000..f374c727 --- /dev/null +++ b/tests/test_welford.py @@ -0,0 +1,92 @@ +import pytest +import numpy as np +import pystencils as ps +from lbmpy.flow_statistics import welford_assignments + + +@pytest.mark.parametrize('order', [1, 2, 3]) +@pytest.mark.parametrize('dim', [1, 2, 3]) +def test_welford(order, dim): + + target = ps.Target.CPU + + # create data handling and fields + dh = ps.create_data_handling((1, 1, 1), periodicity=False, default_target=target) + + vector_field = dh.add_array('vector', values_per_cell=dim) + dh.fill(vector_field.name, 0.0, ghost_layers=False) + + mean_field = dh.add_array('mean', values_per_cell=dim) + dh.fill(mean_field.name, 0.0, ghost_layers=False) + + if order >= 2: + sos = dh.add_array('sos', values_per_cell=dim**2) + dh.fill(sos.name, 0.0, ghost_layers=False) + + if order == 3: + soc = dh.add_array('soc', values_per_cell=dim**3) + dh.fill(soc.name, 0.0, ghost_layers=False) + else: + soc = None + else: + sos = None + soc = None + + welford = welford_assignments(field=vector_field, mean_field=mean_field, + sum_of_squares_field=sos, sum_of_cubes_field=soc) + + welford_kernel = ps.create_kernel(welford).compile() + + # set random seed + np.random.seed(42) + n = int(1e4) + x = np.random.normal(size=n * dim).reshape(n, dim) + + analytical_mean = np.zeros(dim) + analytical_covariance = np.zeros(dim**2) + analytical_third_order_moments = np.zeros(dim**3) + + # calculate analytical mean + for i in range(dim): + analytical_mean[i] = np.mean(x[:, i]) + + # calculate analytical covariances + for i in range(dim): + for j in range(dim): + analytical_covariance[i * dim + j] \ + = (np.sum((x[:, i] - analytical_mean[i]) * (x[:, j] - analytical_mean[j]))) / n + + # calculate analytical third-order central moments + for i in range(dim): + for j in range(dim): + for k in range(dim): + analytical_third_order_moments[(i * dim + j) * dim + k] \ + = (np.sum((x[:, i] - analytical_mean[i]) + * (x[:, j] - analytical_mean[j]) + * (x[:, k] - analytical_mean[k]))) / n + + # Time loop + counter = 1 + for i in range(n): + for d in range(dim): + new_value = x[i, d] + if dim > 1: + dh.fill(array_name=vector_field.name, val=new_value, value_idx=d, ghost_layers=False) + else: + dh.fill(array_name=vector_field.name, val=new_value, ghost_layers=False) + dh.run_kernel(welford_kernel, counter=counter) + counter += 1 + + welford_mean = dh.gather_array(mean_field.name).flatten() + np.testing.assert_allclose(welford_mean, analytical_mean) + + if order >= 2: + welford_covariance = dh.gather_array(sos.name).flatten() / n + np.testing.assert_allclose(welford_covariance, analytical_covariance) + if order == 3: + welford_third_order_moments = dh.gather_array(soc.name).flatten() / n + np.testing.assert_allclose(welford_third_order_moments, analytical_third_order_moments) + + +if __name__ == '__main__': + test_welford(1, 1) -- GitLab