Skip to content
Snippets Groups Projects
Commit c6fc87d8 authored by Markus Holzer's avatar Markus Holzer
Browse files

Merge branch 'welford_extension' into 'master'

Welford algorithm extension

See merge request pycodegen/lbmpy!175
parents 9b29a46d b76ac202
1 merge request!175Welford algorithm extension
Pipeline #69136 failed with stages
in 10 minutes and 13 seconds
...@@ -4,17 +4,18 @@ import pystencils as ps ...@@ -4,17 +4,18 @@ import pystencils as ps
from pystencils.field import Field from pystencils.field import Field
def welford_assignments(field, mean_field, sum_of_products_field=None): def welford_assignments(field, mean_field, sum_of_squares_field=None, sum_of_cubes_field=None):
r""" r"""
Creates the assignments for the kernel creation of Welford's algorithm Creates the assignments for the kernel creation of Welford's algorithm
(https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm). (https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm).
This algorithm is an online algorithm for the mean and variance calculation of sample data, here implemented for This algorithm is an online algorithm for the mean and variance calculation of sample data, here implemented for
the execution on vector fields, e.g., velocity. the execution on scalar or vector fields, e.g., velocity.
The calculation of the variance is optional and only performed if a field for the sum of products is given. The calculation of the variance / the third-order central moments is optional and only performed if a field for
the sum of squares / sum of cubes is given.
The mean value is directly updated in the mean vector field. The mean value is directly updated in the mean vector field.
The variance must be retrieved in a post-processing step. Let :math `M_{2,n}` denote the value of the sum of The variance / covariance must be retrieved in a post-processing step. Let :math `M_{2,n}` denote the value of the
products after the first :math `n` samples. According to Welford the biased sample variance sum of squares after the first :math `n` samples. According to Welford the biased sample variance
:math `\sigma_n^2` and the unbiased sample variance :math `s_n^2` are given by :math `\sigma_n^2` and the unbiased sample variance :math `s_n^2` are given by
.. math :: .. math ::
...@@ -22,6 +23,9 @@ def welford_assignments(field, mean_field, sum_of_products_field=None): ...@@ -22,6 +23,9 @@ def welford_assignments(field, mean_field, sum_of_products_field=None):
s_n^2 = \frac{M_{2,n}}{n-1}, s_n^2 = \frac{M_{2,n}}{n-1},
respectively. respectively.
Liekwise, to the 3rd-order central moment(s) of the (vector) field, the sum of cubes field must be divided
by :math `n` in a post-processing step.
""" """
if isinstance(field, Field): if isinstance(field, Field):
...@@ -40,22 +44,41 @@ def welford_assignments(field, mean_field, sum_of_products_field=None): ...@@ -40,22 +44,41 @@ def welford_assignments(field, mean_field, sum_of_products_field=None):
else: else:
raise ValueError("Mean vector field has to be a pystencils Field or Field.Access") raise ValueError("Mean vector field has to be a pystencils Field or Field.Access")
if sum_of_products_field is None: if sum_of_squares_field is None:
# sum of products will not be calculated, i.e., the variance is not retrievable # sum of products will not be calculated, i.e., the covariance matrix is not retrievable
welford_sum_of_products_field = None welford_sum_of_squares_field = None
else:
if isinstance(sum_of_squares_field, Field):
welford_sum_of_squares_field = sum_of_squares_field.center
elif isinstance(sum_of_squares_field, Field.Access):
welford_sum_of_squares_field = sum_of_squares_field
else:
raise ValueError("Sum of squares field has to be a pystencils Field or Field.Access")
assert welford_sum_of_squares_field.field.values_per_cell() == dim ** 2, \
(f"Sum of squares field does not have the right layout. "
f"Index dimension: {welford_sum_of_squares_field.field.values_per_cell()}, expected: {dim ** 2}")
if sum_of_cubes_field is None:
# sum of cubes will not be calculated, i.e., the 3rd-order central moments are not retrievable
welford_sum_of_cubes_field = None
else: else:
if isinstance(sum_of_products_field, Field): if isinstance(sum_of_cubes_field, Field):
welford_sum_of_products_field = sum_of_products_field.center welford_sum_of_cubes_field = sum_of_cubes_field.center
assert sum_of_products_field.values_per_cell() == dim**2, \ elif isinstance(sum_of_cubes_field, Field.Access):
(f"Sum of products field does not have the right layout. " welford_sum_of_cubes_field = sum_of_cubes_field
f"Index dimension: {sum_of_products_field.values_per_cell()}, expected: {dim**2}")
elif isinstance(sum_of_products_field, Field.Access):
welford_sum_of_products_field = sum_of_products_field
assert sum_of_products_field.field.values_per_cell() == dim**2, \
(f"Sum of products field does not have the right layout. "
f"Index dimension: {sum_of_products_field.field.values_per_cell()}, expected: {dim**2}")
else: else:
raise ValueError("Variance vector field has to be a pystencils Field or Field.Access") raise ValueError("Sum of cubes field has to be a pystencils Field or Field.Access")
assert welford_sum_of_cubes_field.field.values_per_cell() == dim ** 3, \
(f"Sum of cubes field does not have the right layout. "
f"Index dimension: {welford_sum_of_cubes_field.field.values_per_cell()}, expected: {dim ** 3}")
# for the calculation of the thrid-order moments, the variance must also be calculated
if welford_sum_of_cubes_field is not None:
assert welford_sum_of_squares_field is not None
# actual assignments
counter = sp.Symbol('counter') counter = sp.Symbol('counter')
delta = sp.symbols(f"delta_:{dim}") delta = sp.symbols(f"delta_:{dim}")
...@@ -67,7 +90,7 @@ def welford_assignments(field, mean_field, sum_of_products_field=None): ...@@ -67,7 +90,7 @@ def welford_assignments(field, mean_field, sum_of_products_field=None):
main_assignments.append(ps.Assignment(welford_mean_field.at_index(i), main_assignments.append(ps.Assignment(welford_mean_field.at_index(i),
welford_mean_field.at_index(i) + delta[i] / counter)) welford_mean_field.at_index(i) + delta[i] / counter))
if sum_of_products_field is not None: if sum_of_squares_field is not None:
delta2 = sp.symbols(f"delta2_:{dim}") delta2 = sp.symbols(f"delta2_:{dim}")
for i in range(dim): for i in range(dim):
main_assignments.append( main_assignments.append(
...@@ -76,7 +99,21 @@ def welford_assignments(field, mean_field, sum_of_products_field=None): ...@@ -76,7 +99,21 @@ def welford_assignments(field, mean_field, sum_of_products_field=None):
for j in range(dim): for j in range(dim):
idx = i * dim + j idx = i * dim + j
main_assignments.append(ps.Assignment( main_assignments.append(ps.Assignment(
welford_sum_of_products_field.at_index(idx), welford_sum_of_squares_field.at_index(idx),
welford_sum_of_products_field.at_index(idx) + delta[i] * delta2[j])) welford_sum_of_squares_field.at_index(idx) + delta[i] * delta2[j]))
if sum_of_cubes_field is not None:
for i in range(dim):
for j in range(dim):
for k in range(dim):
idx = (i * dim + j) * dim + k
main_assignments.append(ps.Assignment(
welford_sum_of_cubes_field.at_index(idx),
welford_sum_of_cubes_field.at_index(idx)
- delta[k] / counter * welford_sum_of_squares_field(i * dim + j)
- delta[j] / counter * welford_sum_of_squares_field(k * dim + i)
- delta[i] / counter * welford_sum_of_squares_field(j * dim + k)
+ delta2[i] * (2 * delta[j] - delta2[j]) * delta[k]
))
return main_assignments return main_assignments
import pytest
import numpy as np
import pystencils as ps
from lbmpy.flow_statistics import welford_assignments
@pytest.mark.parametrize('order', [1, 2, 3])
@pytest.mark.parametrize('dim', [1, 2, 3])
def test_welford(order, dim):
target = ps.Target.CPU
# create data handling and fields
dh = ps.create_data_handling((1, 1, 1), periodicity=False, default_target=target)
vector_field = dh.add_array('vector', values_per_cell=dim)
dh.fill(vector_field.name, 0.0, ghost_layers=False)
mean_field = dh.add_array('mean', values_per_cell=dim)
dh.fill(mean_field.name, 0.0, ghost_layers=False)
if order >= 2:
sos = dh.add_array('sos', values_per_cell=dim**2)
dh.fill(sos.name, 0.0, ghost_layers=False)
if order == 3:
soc = dh.add_array('soc', values_per_cell=dim**3)
dh.fill(soc.name, 0.0, ghost_layers=False)
else:
soc = None
else:
sos = None
soc = None
welford = welford_assignments(field=vector_field, mean_field=mean_field,
sum_of_squares_field=sos, sum_of_cubes_field=soc)
welford_kernel = ps.create_kernel(welford).compile()
# set random seed
np.random.seed(42)
n = int(1e4)
x = np.random.normal(size=n * dim).reshape(n, dim)
analytical_mean = np.zeros(dim)
analytical_covariance = np.zeros(dim**2)
analytical_third_order_moments = np.zeros(dim**3)
# calculate analytical mean
for i in range(dim):
analytical_mean[i] = np.mean(x[:, i])
# calculate analytical covariances
for i in range(dim):
for j in range(dim):
analytical_covariance[i * dim + j] \
= (np.sum((x[:, i] - analytical_mean[i]) * (x[:, j] - analytical_mean[j]))) / n
# calculate analytical third-order central moments
for i in range(dim):
for j in range(dim):
for k in range(dim):
analytical_third_order_moments[(i * dim + j) * dim + k] \
= (np.sum((x[:, i] - analytical_mean[i])
* (x[:, j] - analytical_mean[j])
* (x[:, k] - analytical_mean[k]))) / n
# Time loop
counter = 1
for i in range(n):
for d in range(dim):
new_value = x[i, d]
if dim > 1:
dh.fill(array_name=vector_field.name, val=new_value, value_idx=d, ghost_layers=False)
else:
dh.fill(array_name=vector_field.name, val=new_value, ghost_layers=False)
dh.run_kernel(welford_kernel, counter=counter)
counter += 1
welford_mean = dh.gather_array(mean_field.name).flatten()
np.testing.assert_allclose(welford_mean, analytical_mean)
if order >= 2:
welford_covariance = dh.gather_array(sos.name).flatten() / n
np.testing.assert_allclose(welford_covariance, analytical_covariance)
if order == 3:
welford_third_order_moments = dh.gather_array(soc.name).flatten() / n
np.testing.assert_allclose(welford_third_order_moments, analytical_third_order_moments)
if __name__ == '__main__':
test_welford(1, 1)
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment