Skip to content
Snippets Groups Projects

Welford algorithm extension

Merged Helen Schottenhamml requested to merge welford_extension into master
Compare and
2 files
+ 151
22
Preferences
Compare changes
Files
2
+ 59
22
@@ -4,17 +4,18 @@ import pystencils as ps
from pystencils.field import Field
def welford_assignments(field, mean_field, sum_of_products_field=None):
def welford_assignments(field, mean_field, sum_of_squares_field=None, sum_of_cubes_field=None):
r"""
Creates the assignments for the kernel creation of Welford's algorithm
(https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm).
This algorithm is an online algorithm for the mean and variance calculation of sample data, here implemented for
the execution on vector fields, e.g., velocity.
The calculation of the variance is optional and only performed if a field for the sum of products is given.
the execution on scalar or vector fields, e.g., velocity.
The calculation of the variance / the third-order central moments is optional and only performed if a field for
the sum of squares / sum of cubes is given.
The mean value is directly updated in the mean vector field.
The variance must be retrieved in a post-processing step. Let :math `M_{2,n}` denote the value of the sum of
products after the first :math `n` samples. According to Welford the biased sample variance
The variance / covariance must be retrieved in a post-processing step. Let :math `M_{2,n}` denote the value of the
sum of squares after the first :math `n` samples. According to Welford the biased sample variance
:math `\sigma_n^2` and the unbiased sample variance :math `s_n^2` are given by
.. math ::
@@ -22,6 +23,9 @@ def welford_assignments(field, mean_field, sum_of_products_field=None):
s_n^2 = \frac{M_{2,n}}{n-1},
respectively.
Liekwise, to the 3rd-order central moment(s) of the (vector) field, the sum of cubes field must be divided
by :math `n` in a post-processing step.
"""
if isinstance(field, Field):
@@ -40,22 +44,41 @@ def welford_assignments(field, mean_field, sum_of_products_field=None):
else:
raise ValueError("Mean vector field has to be a pystencils Field or Field.Access")
if sum_of_products_field is None:
# sum of products will not be calculated, i.e., the variance is not retrievable
welford_sum_of_products_field = None
if sum_of_squares_field is None:
# sum of products will not be calculated, i.e., the covariance matrix is not retrievable
welford_sum_of_squares_field = None
else:
if isinstance(sum_of_squares_field, Field):
welford_sum_of_squares_field = sum_of_squares_field.center
elif isinstance(sum_of_squares_field, Field.Access):
welford_sum_of_squares_field = sum_of_squares_field
else:
raise ValueError("Sum of squares field has to be a pystencils Field or Field.Access")
assert welford_sum_of_squares_field.field.values_per_cell() == dim ** 2, \
(f"Sum of squares field does not have the right layout. "
f"Index dimension: {welford_sum_of_squares_field.field.values_per_cell()}, expected: {dim ** 2}")
if sum_of_cubes_field is None:
# sum of cubes will not be calculated, i.e., the 3rd-order central moments are not retrievable
welford_sum_of_cubes_field = None
else:
if isinstance(sum_of_products_field, Field):
welford_sum_of_products_field = sum_of_products_field.center
assert sum_of_products_field.values_per_cell() == dim**2, \
(f"Sum of products field does not have the right layout. "
f"Index dimension: {sum_of_products_field.values_per_cell()}, expected: {dim**2}")
elif isinstance(sum_of_products_field, Field.Access):
welford_sum_of_products_field = sum_of_products_field
assert sum_of_products_field.field.values_per_cell() == dim**2, \
(f"Sum of products field does not have the right layout. "
f"Index dimension: {sum_of_products_field.field.values_per_cell()}, expected: {dim**2}")
if isinstance(sum_of_cubes_field, Field):
welford_sum_of_cubes_field = sum_of_cubes_field.center
elif isinstance(sum_of_cubes_field, Field.Access):
welford_sum_of_cubes_field = sum_of_cubes_field
else:
raise ValueError("Variance vector field has to be a pystencils Field or Field.Access")
raise ValueError("Sum of cubes field has to be a pystencils Field or Field.Access")
assert welford_sum_of_cubes_field.field.values_per_cell() == dim ** 3, \
(f"Sum of cubes field does not have the right layout. "
f"Index dimension: {welford_sum_of_cubes_field.field.values_per_cell()}, expected: {dim ** 3}")
# for the calculation of the thrid-order moments, the variance must also be calculated
if welford_sum_of_cubes_field is not None:
assert welford_sum_of_squares_field is not None
# actual assignments
counter = sp.Symbol('counter')
delta = sp.symbols(f"delta_:{dim}")
@@ -67,7 +90,7 @@ def welford_assignments(field, mean_field, sum_of_products_field=None):
main_assignments.append(ps.Assignment(welford_mean_field.at_index(i),
welford_mean_field.at_index(i) + delta[i] / counter))
if sum_of_products_field is not None:
if sum_of_squares_field is not None:
delta2 = sp.symbols(f"delta2_:{dim}")
for i in range(dim):
main_assignments.append(
@@ -76,7 +99,21 @@ def welford_assignments(field, mean_field, sum_of_products_field=None):
for j in range(dim):
idx = i * dim + j
main_assignments.append(ps.Assignment(
welford_sum_of_products_field.at_index(idx),
welford_sum_of_products_field.at_index(idx) + delta[i] * delta2[j]))
welford_sum_of_squares_field.at_index(idx),
welford_sum_of_squares_field.at_index(idx) + delta[i] * delta2[j]))
if sum_of_cubes_field is not None:
for i in range(dim):
for j in range(dim):
for k in range(dim):
idx = (i * dim + j) * dim + k
main_assignments.append(ps.Assignment(
welford_sum_of_cubes_field.at_index(idx),
welford_sum_of_cubes_field.at_index(idx)
- delta[k] / counter * welford_sum_of_squares_field(i * dim + j)
- delta[j] / counter * welford_sum_of_squares_field(k * dim + i)
- delta[i] / counter * welford_sum_of_squares_field(j * dim + k)
+ delta2[i] * (2 * delta[j] - delta2[j]) * delta[k]
))
return main_assignments