Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 2171 additions and 992 deletions
# ---
# jupyter:
# jupytext:
# text_representation:
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.16.7
# kernelspec:
# display_name: Python 3 (ipykernel)
# language: python
# name: python3
# ---
# %%
from pystencils.session import *
# %% [markdown]
# # Demo: Working with derivatives
#
#
# ## Overview
# This notebook demonstrates how to formulate continuous differential operators in *pystencils* and automatically derive finite difference stencils from them.
#
# Instead of using the built-in derivatives in *sympy*, *pystencils* comes with its own derivative objects. They represent spatial derivatives of pystencils fields.
# %%
f = ps.fields("f: [2D]")
first_derivative_x = ps.fd.diff(f, 0)
first_derivative_x
# %% [markdown]
# This object is the derivative of the field $f$ with respect to the first spatial coordinate $x$. To get a finite difference approximation a discretization strategy is required:
# %%
discretize_2nd_order = ps.fd.Discretization2ndOrder(dx=sp.symbols("h"))
discretize_2nd_order(first_derivative_x)
# %% [markdown]
# Strictly speaking, derivative objects act on *field accesses*, not *fields*, that why there is a $(0,0)$ index at the field:
# %%
first_derivative_x
# %% [markdown]
# Sometimes it might be useful to specify derivatives at an offset e.g.
# %%
derivative_offset = ps.fd.diff(f[0, 1], 0)
derivative_offset, discretize_2nd_order(derivative_offset)
# %% [markdown]
# Another example with second order derivatives:
# %%
laplacian = ps.fd.diff(f, 0, 0) + ps.fd.diff(f, 1, 1)
laplacian
# %%
discretize_2nd_order(laplacian)
# %% [markdown]
# ## Working with derivative terms
#
# No automatic simplifications are done on derivative terms i.e. linearity relations or product rule are not applied automatically.
# %%
f, g = ps.fields("f, g :[2D]")
c = sp.symbols("c")
δ = ps.fd.diff
expr = δ( δ(f, 0) + δ(g, 0) + c + 5 , 0)
expr
# %% [markdown]
# This nested term can not be discretized automatically.
# %%
try:
discretize_2nd_order(expr)
except ValueError as e:
print(e)
# %% [markdown]
# ### Linearity
# The following function expands all derivatives exploiting linearity:
# %%
ps.fd.expand_diff_linear(expr)
# %% [markdown]
# The symbol $c$ that was included is interpreted as a function by default.
# We can control the simplification behaviour by specifying all functions or all constants:
# %%
ps.fd.expand_diff_linear(expr, functions=(f[0,0], g[0, 0]))
# %%
ps.fd.expand_diff_linear(expr, constants=[c])
# %% [markdown]
# The expanded term can then be discretized:
# %%
discretize_2nd_order(ps.fd.expand_diff_linear(expr, constants=[c]))
# %% [markdown]
# ### Product rule
#
# The next cells show how to apply product rule and its reverse:
# %%
expr = δ(f[0, 0] * g[0, 0], 0 )
expr
# %%
expanded_expr = ps.fd.expand_diff_products(expr)
expanded_expr
# %%
recombined_expr = ps.fd.combine_diff_products(expanded_expr)
recombined_expr
# %%
assert recombined_expr == expr
# %% [markdown]
# ### Evaluate derivatives
#
# Arguments of derivative need not be to be fields, only when trying to discretize them.
# The next cells show how to transform them to *sympy* derivatives and evaluate them.
# %%
k = sp.symbols("k")
expr = δ(k**3 + 2 * k, 0 )
expr
# %%
ps.fd.evaluate_diffs(expr, var=k)
# ---
# jupyter:
# jupytext:
# text_representation:
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.16.7
# kernelspec:
# display_name: Python 3 (ipykernel)
# language: python
# name: python3
# ---
# %%
from pystencils.session import *
import shutil
# %%
try:
from scipy.ndimage import gaussian_filter
scipy_avail = True
except ImportError:
scipy_avail = False
# %% [markdown]
# # Plotting and Animation
#
#
# ## 1) Stencils
#
# This section shows how to visualize stencils, i.e. lists of directions
# %%
stencil_5pt = [(0, 0), (1, 0), (-1, 0), (0, 1), (0, -1)]
plt.figure(figsize=(2, 2))
ps.stencil.plot(stencil_5pt)
# %% [markdown]
# By default the indices are shown the arrow endpoints.
# Custom data can also be plotted:
# %%
plt.figure(figsize=(2, 2))
ps.stencil.plot(stencil_5pt, data=[2.0, 5, 'test', 42, 1])
# %% [markdown]
# The stencil can also be given as an expression in a single field
# %%
f = ps.fields("f: [2D]")
c = sp.symbols("c")
expr = f[0, 0] + 2 * f[1, 0] + c * f[-1, 0] + 4 * f[0, 1] - c**2 * f[0, -1]
plt.figure(figsize=(2, 2))
ps.stencil.plot_expression(expr)
# %% [markdown]
# There is also a function to simply plot sympy expressions depending on one variable only:
# %%
x = sp.symbols("x")
plt.figure(figsize=(4, 4))
plt.sympy_function(x**2 - 1, x_values=np.linspace(-2, 2));
# %% [markdown]
# Three dimensional stencils can be visualized in two different ways
# %%
stencil_7pt = [(0, 0, 0), (1, 0, 0), (-1, 0, 0), (0, 1, 0), (0, -1, 0), (0, 0, 1), (0, 0, -1)]
ps.stencil.plot(stencil_7pt, data=[i for i in range(7)])
# %% [markdown]
# This kind of plot works well for small stencils, for larger stencils this gets messy:
# %%
stencil_27pt = [(i, j, k) for i in (-1, 0, 1) for j in (-1, 0, 1) for k in (-1, 0, 1)]
ps.stencil.plot(stencil_27pt, data=[i for i in range(27)])
# %% [markdown]
# Adding `slice=True` shows the three projections of the stencils instead
# %%
ps.stencil.plot(stencil_27pt, data=[i for i in range(27)], slice=True)
# %% [markdown]
# ## 2) Scalar fields
#
# *pystencils* also comes with helper functions to plot simulation data.
# The plotting functions are simple but useful wrappers around basic Matplotlib functionality.
# *pystencils* imports all matplotlib functions as well, such that one can use
#
# ```import pystencils.plot as plt```
#
# instead of
#
# ```import matplotlib.pyplot as plt```
#
# The session import at the top of the notebook does this already, and also sets up inline plotting for notebooks.
#
# This section demonstrates how to plot 2D scalar fields. Internally `imshow` from matplotlib is used, however the coordinate system is changed such that (0,0) is in the lower left corner, the $x$-axis points to the right, and the $y$-axis upward.
#
# The next function returns a scalar field for demonstration
# %%
def example_scalar_field(t=0):
x, y = np.meshgrid(np.linspace(0, 2 * np.pi, 100), np.linspace(0, 2 * np.pi, 100))
z = np.cos(x + 0.1 * t) * np.sin(y + 0.1 * t) + 0.1 * x * y
return z
# %% [markdown]
# To show a single scalar field use `plt.scalar_field`:
# %%
arr = example_scalar_field()
plt.scalar_field(arr)
plt.colorbar()
# %% [markdown]
# A similar wrapper around `counter` from matplotlib
# %%
plt.scalar_field_contour(arr);
# %% [markdown]
# A 3D surface plot is also possible:
# %%
plt.scalar_field_surface(arr)
# %% [markdown]
# For simulation results one often needs visualization of time dependent results. To show an evolving scalar field an animation can be created as shown in the next cell
# %%
t = 0
def run_func():
global t
t += 1
return example_scalar_field(t)
# %%
if shutil.which("ffmpeg") is not None:
plt.figure()
animation = plt.scalar_field_animation(run_func, frames=60)
ps.jupyter.display_as_html_video(animation)
else:
print("No ffmpeg installed")
# %% [markdown]
# Another way to display animations is as an image sequence. While the `run_func` i.e. the simulation is running the current state is then displayed as HTML image. Use this method when your `run_func` takes a long time and you want to see the status while the simulation runs. The `frames` parameter specifies how often the run function will be called.
# %%
plt.figure()
animation = plt.scalar_field_animation(run_func, frames=30)
ps.jupyter.display_as_html_image(animation)
# %% [markdown]
# For surface plots there is also an animated version:
# %%
if shutil.which("ffmpeg") is not None:
animation = plt.surface_plot_animation(run_func, frames=60)
ps.jupyter.display_as_html_video(animation)
else:
print("No ffmpeg installed")
# %% [markdown]
# ## 3) Vector Fields
#
# *pystencils* provides another set of plotting functions for vector fields, e.g. velocity fields.
# They have three index dimensions, where the last coordinate has to have 2 entries, representing the $x$ and $y$ component of the vector.
# %%
def example_vector_field(t=0, shape=(40, 40)):
result = np.empty(shape + (2,))
x, y = np.meshgrid(np.linspace(0, 2 * np.pi, shape[0]), np.linspace(0, 2 * np.pi, shape[1]))
result[..., 0] = np.cos(x + 0.1 * t) * np.sin(y + 0.1 * t) + 0.01 * x * y
result[..., 1] = np.cos(0.001 * y)
return result
# %% [markdown]
# Vector fields can either be plotted using quivers (arrows)
# %%
plt.figure()
data = example_vector_field()
plt.vector_field(data, step=1);
# %% [markdown]
# The `step` parameter can be used to display only every $n$'th arrow:
# %%
plt.figure()
plt.vector_field(data, step=2);
# %% [markdown]
# or by displaying their magnitude in a colormap
# %%
plt.figure()
plt.vector_field_magnitude(data);
# %% [markdown]
# There are also functions to animate both variants, quiver plots..
# %%
t = 0
def run_func():
global t
t += 1
return example_vector_field(t)
# %%
if shutil.which("ffmpeg") is not None:
plt.figure()
animation = plt.vector_field_animation(run_func, frames=60)
ps.jupyter.display_as_html_video(animation)
else:
print("No ffmpeg installed")
# %% [markdown]
# ...and magnitude plots
# %%
if shutil.which("ffmpeg") is not None:
animation = plt.vector_field_magnitude_animation(run_func, frames=60)
ps.jupyter.display_as_html_video(animation)
else:
print("No ffmpeg installed")
# %% [markdown]
# ## 4) Phase Fields
#
# A third group of plotting functions helps to display arrays as they occur in phase-field simulations. However these function may also be useful for other kinds of simulations.
# They expect arrays where the last coordinate indicates the fraction of a certain component, i.e. `arr[x, y, 2]` should be a value between $0$ and $1$ and specifies the fraction of the third phase at $(x, y)$. The plotting functions expect that sum over the last coordinate gives $1$ for all cells.
#
# Lets again generate some example data
# %%
def example_phase_field():
if scipy_avail:
from scipy.ndimage import gaussian_filter
shape=(80, 80)
result = np.zeros(shape + (4,))
result[20:40, 20:40, 0] = 1
if scipy_avail:
gaussian_filter(result[..., 0], sigma=3, output=result[..., 0])
result[50:70, 30:50, 1] = 1
if scipy_avail:
gaussian_filter(result[..., 1], sigma=3, output=result[..., 1])
result[:, :10, 2] = 1
result[:, :, 3] = 1 - np.sum(result, axis=2)
return result
data = example_phase_field()
# %% [markdown]
# The `scalar_field_alpha_value` function uses the last entry, i.e. the value between 0 and 1, as the alpha value of the specified colour to show where the phase is located. This visualization makes it easy to distinguish between smooth and sharp transitions. Note: If `scipy` is not installed, the phase-field will have a sharp interface because no gaussian filter was applied above. This is not realistic. However, it still shows the possibilities of the visualization functions here.
# %%
plt.scalar_field_alpha_value(data[..., 0], color='b')
plt.scalar_field_alpha_value(data[..., 1], color='r')
plt.scalar_field_alpha_value(data[..., 2], color='k')
# %% [markdown]
# To see all existing phases the `phase_plot` function uses this alpha-value representation for all phases.
# %%
plt.phase_plot(data)
# %% [markdown]
# Another option to display each field separately in a subplot
# %%
plt.multiple_scalar_fields(data)
# ---
# jupyter:
# jupytext:
# text_representation:
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.16.7
# kernelspec:
# display_name: Python 3 (ipykernel)
# language: python
# name: python3
# ---
# %% nbsphinx="hidden"
import psutil
from pystencils.session import *
import shutil
# %% [markdown]
# # Demo: Finite differences - 2D wave equation
#
# In this tutorial we show how to use the finite difference module of *pystencils* to solve a 2D wave equations. The time derivative is discretized by a simple forward Euler method.
# %% [markdown]
# $$ \frac{\partial^2 u}{\partial t^2} = \mbox{div} \left( q(x,y) \nabla u \right)$$
# %% [markdown]
# We begin by creating three *numpy* arrays for the current, the previous and the next timestep. Actually we will see later that two fields are enough, but let's keep it simple. From these *numpy* arrays we create *pystencils* fields to formulate our update rule.
# %%
size = (60, 70) # domain size
u_arrays = [np.zeros(size), np.zeros(size), np.zeros(size)]
u_fields = [ps.Field.create_from_numpy_array("u%s" % (name,), arr)
for name, arr in zip(["0", "1", "2"], u_arrays)]
# Nicer display for fields
for i, field in enumerate(u_fields):
field.latex_name = "u^{(%d)}" % (i,)
# %% [markdown]
# *pystencils* contains already simple rules to discretize the a diffusion term. The time discretization is done manually.
# %%
discretize = ps.fd.Discretization2ndOrder()
def central2nd_time_derivative(fields):
f_next, f_current, f_last = fields
return (f_next[0, 0] - 2 * f_current[0, 0] + f_last[0, 0]) / discretize.dt**2
rhs = ps.fd.diffusion(u_fields[1], 1)
wave_eq = sp.Eq(central2nd_time_derivative(u_fields), discretize(rhs))
wave_eq = sp.simplify(wave_eq)
wave_eq
# %% [markdown]
# The explicit Euler scheme is now obtained by solving above equation with respect to $u_C^{next}$.
# %%
u_next_C = u_fields[-1][0, 0]
update_rule = ps.Assignment(u_next_C, sp.solve(wave_eq, u_next_C)[0])
update_rule
# %% [markdown]
# Before creating the kernel, we substitute numeric values for $dx$ and $dt$.
# Then a kernel is created just like in the last tutorial.
# %%
update_rule = update_rule.subs({discretize.dx: 0.1, discretize.dt: 0.05})
ast = ps.create_kernel(update_rule)
kernel = ast.compile()
ps.show_code(ast)
# %%
ast.get_parameters()
# %%
ast.fields_accessed
# %% [markdown]
# To run simulation a suitable initial condition and boundary treatment is required. We chose an initial condition which is zero at the borders of the domain. The outermost layer is not changed by the update kernel, so we have an implicit homogenous Dirichlet boundary condition.
# %%
X,Y = np.meshgrid( np.linspace(0, 1, size[1]), np.linspace(0,1, size[0]))
Z = np.sin(2*X*np.pi) * np.sin(2*Y*np.pi)
# Initialize the previous and current values with the initial function
np.copyto(u_arrays[0], Z)
np.copyto(u_arrays[1], Z)
# The values for the next timesteps do not matter, since they are overwritten
u_arrays[2][:, :] = 0
# %% [markdown]
# One timestep now consists of applying the kernel once, then shifting the arrays.
# %%
def run(timesteps=1):
for t in range(timesteps):
kernel(u0=u_arrays[0], u1=u_arrays[1], u2=u_arrays[2])
u_arrays[0], u_arrays[1], u_arrays[2] = u_arrays[1], u_arrays[2], u_arrays[0]
return u_arrays[2]
# %% [markdown]
# Lets create an animation of the solution:
# %%
if shutil.which("ffmpeg") is not None:
ani = plt.surface_plot_animation(run, zlim=(-1, 1))
ps.jupyter.display_as_html_video(ani)
else:
print("No ffmpeg installed")
# %%
assert np.isfinite(np.max(u_arrays[2]))
# %% [markdown]
# __Runing on GPU__
#
# We can also run the same kernel on the GPU, by using the ``cupy`` package.
# %%
try:
import cupy
except ImportError:
cupy=None
print('No cupy installed')
res = None
if cupy:
gpu_ast = ps.create_kernel(update_rule, target=ps.Target.GPU)
gpu_kernel = gpu_ast.compile()
res = ps.show_code(gpu_ast)
res
# %% [markdown]
# The run function has to be changed now slightly, since the data has to be transfered to the GPU first, then the kernel can be executed, and in the end the data has to be transfered back
# %%
if cupy:
def run_on_gpu(timesteps=1):
# Transfer arrays to GPU
gpuArrs = [cupy.asarray(cpu_array) for cpu_array in u_arrays]
for t in range(timesteps):
gpu_kernel(u0=gpuArrs[0], u1=gpuArrs[1], u2=gpuArrs[2])
gpuArrs[0], gpuArrs[1], gpuArrs[2] = gpuArrs[1], gpuArrs[2], gpuArrs[0]
# Transfer arrays to CPU
for gpuArr, cpuArr in zip(gpuArrs, u_arrays):
cpuArr[:] = gpuArr.get()
assert np.isfinite(np.max(u_arrays[2]))
# %%
if cupy:
run_on_gpu(400)
.. _page_tutorials:
Tutorials
=========
......@@ -8,14 +10,14 @@ It is a good idea to download them and run them directly to be able to play arou
.. toctree::
:maxdepth: 1
/notebooks/01_tutorial_getting_started.ipynb
/notebooks/02_tutorial_basic_kernels.ipynb
/notebooks/03_tutorial_datahandling.ipynb
/notebooks/04_tutorial_advection_diffusion.ipynb
/notebooks/05_tutorial_phasefield_spinodal_decomposition.ipynb
/notebooks/06_tutorial_phasefield_dentritic_growth.ipynb
/notebooks/demo_assignment_collection.ipynb
/notebooks/demo_plotting_and_animation.ipynb
/notebooks/demo_derivatives.ipynb
/notebooks/demo_benchmark.ipynb
/notebooks/demo_wave_equation.ipynb
01_tutorial_getting_started.ipynb
02_tutorial_basic_kernels.ipynb
03_tutorial_datahandling.ipynb
04_tutorial_advection_diffusion.ipynb
05_tutorial_phasefield_spinodal_decomposition.ipynb
06_tutorial_phasefield_dentritic_growth.ipynb
demo_assignment_collection.ipynb
demo_plotting_and_animation.ipynb
demo_derivatives.ipynb
demo_benchmark.ipynb
demo_wave_equation.ipynb
---
file_format: mystnb
kernelspec:
display_name: Python 3 (ipykernel)
language: python
name: python3
mystnb:
execution_mode: cache
---
# Working with Data Types
This guide will demonstrate the various options that exist to customize the data types
in generated kernels.
Data types can be modified on different levels of granularity:
Individual fields and symbols,
single subexpressions,
or the entire kernel.
```{code-cell} ipython3
:tags: [remove-cell]
import pystencils as ps
import sympy as sp
```
## Changing the Default Data Types
The pystencils code generator defines two default data types:
- The default *numeric type*, which is applied to all numerical computations that are not
otherwise explicitly typed; the default is `float64`.
- The default *index type*, which is used for all loop and field index calculations; the default is `int64`.
These can be modified by setting the
{any}`default_dtype <CreateKernelConfig.default_dtype>` and
{any}`index_type <CreateKernelConfig.index_dtype>`
options of the code generator configuration:
```{code-cell} ipython3
cfg = ps.CreateKernelConfig()
cfg.default_dtype = "float32"
cfg.index_dtype = "int32"
```
Modifying these will change the way types for [untyped symbols](#untyped-symbols)
and [dynamically typed expressions](#dynamic-typing) are computed.
## Setting the Types of Fields and Symbols
(untyped-symbols)=
### Untyped Symbols
Symbols used inside a kernel are most commonly created using
{any}`sp.symbols <sympy.core.symbol.symbols>` or
{any}`sp.Symbol <sympy.core.symbol.Symbol>`.
These symbols are *untyped*; they will receive a type during code generation
according to these rules:
- Free untyped symbols (i.e. symbols not defined by an assignment inside the kernel) receive the
{any}`default data type <CreateKernelConfig.default_dtype>` specified in the code generator configuration.
- Bound untyped symbols (i.e. symbols that *are* defined in an assignment)
receive the data type that was computed for the right-hand side expression of their defining assignment.
If you are working on kernels with homogenous data types, using untyped symbols will mostly be enough.
### Explicitly Typed Symbols and Fields
If you need more control over the data types in (parts of) your kernel,
you will have to explicitly specify them.
To set an explicit data type for a symbol, use the {any}`TypedSymbol` class of pystencils:
```{code-cell} ipython3
x_typed = ps.TypedSymbol("x", "uint32")
x_typed, str(x_typed.dtype)
```
You can set a `TypedSymbol` to any data type provided by [the type system](#page_type_system),
which will then be enforced by the code generator.
The same holds for fields:
When creating fields through the {any}`fields <pystencils.field.fields>` function,
add the type to the descriptor string; for instance:
```{code-cell} ipython3
f, g = ps.fields("f(1), g(3): float32[3D]")
str(f.dtype), str(g.dtype)
```
When using `Field.create_generic` or `Field.create_fixed_size`, on the other hand,
you can set the data type via the `dtype` keyword argument.
(dynamic-typing)=
### Dynamically Typed Symbols and Fields
Apart from explicitly setting data types,
`TypedSymbol`s and fields can also receive a *dynamic data type* (see {any}`DynamicType`).
There are two options:
- Symbols or fields annotated with {any}`DynamicType.NUMERIC_TYPE` will always receive
the {any}`default numeric type <CreateKernelConfig.default_dtype>` configured for the
code generator.
This is the default setting for fields
created through `fields`, `Field.create_generic` or `Field.create_fixed_size`.
- When annotated with {any}`DynamicType.INDEX_TYPE`, on the other hand, they will receive
the {any}`index data type <CreateKernelConfig.index_dtype>` configured for the kernel.
Using dynamic typing, you can enforce symbols to receive either the standard numeric or
index type without explicitly stating it, such that your kernel definition becomes
independent from the code generator configuration.
## Mixing Types Inside Expressions
Pystencils enforces that all symbols, constants, and fields occuring inside an expression
have the same data type.
The code generator will never introduce implicit casts--if any type conflicts arise, it will terminate with an error.
Still, there are cases where you want to combine subexpressions of different types;
maybe you need to compute geometric information from loop counters or other integers,
or you are doing mixed-precision numerical computations.
In these cases, you might have to introduce explicit type casts when values move from one type context to another.
<!-- 2. Annotate expressions with a specific data type to ensure computations are performed in that type.
TODO: See #97 (https://i10git.cs.fau.de/pycodegen/pystencils/-/issues/97)
-->
(type_casts)=
### Type Casts
Type casts can be introduced into kernels using the {any}`tcast` symbolic function.
It takes an expression and a data type, which is either an explicit type (see [the type system](#page_type_system))
or a dynamic type ({any}`DynamicType`):
```{code-cell} ipython3
x, y = sp.symbols("x, y")
expr1 = ps.tcast(x, "float32")
expr2 = ps.tcast(3 + y, ps.DynamicType.INDEX_TYPE)
str(expr1.dtype), str(expr2.dtype)
```
When a type cast occurs, pystencils will compute the type of its argument independently
and then introduce a runtime cast to the target type.
That target type must comply with the type computed for the outer expression,
which the cast is embedded in.
## Understanding the pystencils Type Inference System
To correctly apply varying data types to pystencils kernels, it is important to understand
how pystencils computes and propagates the data types of symbols and expressions.
Type inference happens on the level of assignments.
For each assignment $x := \mathrm{calc}(y_1, \dots, y_n)$,
the system first attempts to compute a *unique* type for the right-hand side (RHS) $\mathrm{calc}(y_1, \dots, y_n)$.
It searches for any subexpression inside the RHS for which a type is already known --
these might be typed symbols
(whose types are either set explicitly by the user,
or have been determined from their defining assignment),
field accesses,
or explicitly typed expressions.
It then attempts to apply that data type to the entire expression.
If type conflicts occur, the process fails and the code generator raises an error.
Otherwise, the resulting type is assigned to the left-hand side symbol $x$.
:::{admonition} Developer's To Do
It would be great to illustrate this using a GraphViz-plot of an AST,
with nodes colored according to their data types
:::
---
jupytext:
formats: md:myst
text_representation:
extension: .md
format_name: myst
format_version: 0.13
jupytext_version: 1.16.4
kernelspec:
display_name: Python 3 (ipykernel)
language: python
name: python3
mystnb:
execution_mode: cache
---
```{code-cell} ipython3
:tags: [remove-cell]
import sympy as sp
import pystencils as ps
import numpy as np
import matplotlib.pyplot as plt
```
(guide_gpukernels)=
# Pystencils for GPUs
Pystencils offers code generation for Nvidia GPUs using the CUDA programming model,
as well as just-in-time compilation and execution of CUDA kernels from within Python
based on the [cupy] library.w
This section's objective is to give a detailed introduction into the creation of
GPU kernels with pystencils.
## Generate, Compile and Run CUDA Kernels
In order to obtain a CUDA implementation of a symbolic kernel, naught more is required
than setting the {any}`target <CreateKernelConfig.target>` code generator option to
{any}`Target.CUDA`:
```{code-cell} ipython3
f, g = ps.fields("f, g: float64[3D]")
update = ps.Assignment(f.center(), 2 * g.center())
cfg = ps.CreateKernelConfig(target=ps.Target.CUDA)
kernel = ps.create_kernel(update, cfg)
ps.inspect(kernel)
```
The `kernel` object returned by the code generator in above snippet is an instance
of the {py:class}`GpuKernel` class.
It extends {py:class}`Kernel` with some GPU-specific information.
In particular, it defines the {any}`threads_range <GpuKernel.threads_range>`
property, which tells us how many threads the kernel is expecting to be executed with:
```{code-cell} ipython3
kernel.threads_range
```
If a GPU is available and [CuPy][cupy] is installed in the current environment,
the kernel can be compiled and run immediately.
To execute the kernel, a {any}`cupy.ndarray` has to be passed for each field.
:::{note}
[CuPy][cupy] is a Python library for numerical computations on GPU arrays,
which operates much in the same way that [NumPy][numpy] works on CPU arrays.
Cupy and NumPy expose nearly the same APIs for array operations;
the difference being that CuPy allocates all its arrays on the GPU
and performs its operations as CUDA kernels.
Also, CuPy exposes a just-in-time-compiler for GPU kernels, which internally calls [nvcc].
In pystencils, we use CuPy both to compile and provide executable kernels on-demand from within Python code,
and to allocate and manage the data these kernels can be executed on.
For more information on CuPy, refer to [their documentation][cupy-docs].
:::
```{code-cell} ipython3
:tags: [raises-exception]
import cupy as cp
rng = cp.random.default_rng(seed=42)
f_arr = rng.random((16, 16, 16))
g_arr = cp.zeros_like(f_arr)
kfunc = kernel.compile()
kfunc(f=f_arr, g=g_arr)
```
### Modifying the Launch Grid
The `kernel.compile()` invocation in the above code produces a {any}`CupyKernelWrapper` callable object.
This object holds the kernel's launch grid configuration
(i.e. the number of thread blocks, and the number of threads per block.)
Pystencils specifies a default value for the block size and if possible,
the number of blocks is automatically inferred in order to cover the entire iteration space.
In addition, the wrapper's interface allows us to customize the GPU launch grid,
by manually setting both the number of threads per block, and the number of blocks on the grid:
```{code-cell} ipython3
kfunc.block_size = (16, 8, 8)
kfunc.num_blocks = (1, 2, 2)
```
For most kernels, setting only the `block_size` is sufficient since pystencils will
automatically compute the number of blocks;
for exceptions to this, see [](#manual_launch_grids).
If `num_blocks` is set manually and the launch grid thus specified is too small, only
a part of the iteration space will be traversed by the kernel;
similarily, if it is too large, it will cause any threads working outside of the iteration bounds to idle.
(manual_launch_grids)=
### Manual Launch Grids and Non-Cuboid Iteration Patterns
In some cases, it will be unavoidable to set the launch grid size manually;
especially if the code generator is unable to automatically determine the size of the
iteration space.
An example for this is the triangular iteration previously described in the [Kernel Creation Guide](#example_triangular_iteration).
Let's set it up once more:
```{code-cell} ipython3
:tags: [remove-cell]
def _draw_ispace(f_arr):
n, m = f_arr.shape
fig, ax = plt.subplots()
ax.set_xticks(np.arange(0, m, 4))
ax.set_yticks(np.arange(0, n, 4))
# ax.set_xticklabels([])
# ax.set_yticklabels([])
ax.set_xticks(np.arange(-.5, m, 1), minor=True)
ax.set_yticks(np.arange(-.5, n, 1), minor=True)
ax.grid(which="minor", linewidth=2)
ax.tick_params(which='minor', bottom=False, left=False)
ax.imshow(f_arr, interpolation="none", aspect="equal", origin="lower")
```
```{code-cell} ipython3
:tags: [remove-cell]
f = ps.fields("f: float64[2D]")
assignments = [
ps.Assignment(f(0), 1)
]
```
```{code-cell} ipython3
y = ps.DEFAULTS.spatial_counters[0]
cfg = ps.CreateKernelConfig(
target=ps.Target.CUDA,
iteration_slice=ps.make_slice[:, y:]
)
kernel = ps.create_kernel(assignments, cfg).compile()
```
This warns us that the threads range could not be determined automatically.
We can disable this warning by setting `manual_launch_grid` in the GPU option category:
```{code-cell}
cfg.gpu.manual_launch_grid = True
```
Now, to execute our kernel, we have to manually specify its launch grid:
```{code-cell} ipython3
kernel.block_size = (8, 8)
kernel.num_blocks = (2, 2)
```
This way the kernel will cover this iteration space:
```{code-cell} ipython3
:tags: [remove-input]
f_arr = cp.zeros((16, 16))
kernel(f=f_arr)
_draw_ispace(cp.asnumpy(f_arr))
```
We can also observe the effect of decreasing the launch grid size:
```{code-cell} ipython3
kernel.block_size = (4, 4)
kernel.num_blocks = (2, 3)
```
```{code-cell} ipython3
:tags: [remove-input]
f_arr = cp.zeros((16, 16))
kernel(f=f_arr)
_draw_ispace(cp.asnumpy(f_arr))
```
Here, since there are only eight threads operating in $x$-direction,
and twelve threads in $y$-direction,
only a part of the triangle is being processed.
## API Reference
```{eval-rst}
.. autosummary::
:nosignatures:
pystencils.codegen.GpuKernel
pystencils.jit.gpu_cupy.CupyKernelWrapper
```
:::{admonition} Developers To Do:
- Fast approximation functions
- Fp16 on GPU
:::
[cupy]: https://cupy.dev "CuPy Homepage"
[numpy]: https://numpy.org "NumPy Homepage"
[nvcc]: https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html "NVIDIA CUDA Compiler Driver"
[cupy-docs]: https://docs.cupy.dev/en/stable/overview.html "CuPy Documentation"
---
jupytext:
formats: md:myst
text_representation:
extension: .md
format_name: myst
format_version: 0.13
jupytext_version: 1.16.4
kernelspec:
display_name: Python 3 (ipykernel)
language: python
name: python3
mystnb:
execution_mode: cache
---
```{code-cell} ipython3
:tags: [remove-cell]
import sympy as sp
import pystencils as ps
import numpy as np
import matplotlib.pyplot as plt
```
(guide_kernelcreation)=
# Kernel Creation
Once a kernel's assignments are fully assembled, they need to be passed through pystencils' code
generation engine in order to produce the kernel's executable code.
The goal of this chapter is to shed light on pystencils' main code generation pipeline.
Here, we show how to invoke the code generator and discuss its various configuration options
and their effects on the generated kernel.
## Running the Code Generator
The primary way to invoke the code generation engine is through the {any}`create_kernel` function.
It takes two arguments:
- the list of assignment that make up the kernel (optionally wrapped as an ``AssignmentCollection``),
- and a configuration object, an instance of {any}`CreateKernelConfig <pystencils.codegen.config.CreateKernelConfig>`.
```{eval-rst}
.. currentmodule:: pystencils.codegen
.. autosummary::
:nosignatures:
create_kernel
CreateKernelConfig
```
For a simple kernel, an invocation of the code generator might look like this:
```{code-cell} ipython3
# Symbol and field definitions
u_src, u_dst, f = ps.fields("u_src, u_dst, f: float32[2D]")
h = sp.Symbol("h")
# Kernel definition
update = [
ps.Assignment(
u_dst[0,0], (h**2 * f[0, 0] + u_src[1, 0] + u_src[-1, 0] + u_src[0, 1] + u_src[0, -1]) / 4
)
]
# Code Generator Configuration
cfg = ps.CreateKernelConfig(
target=ps.Target.CUDA,
default_dtype="float32",
ghost_layers=1
)
kernel = ps.create_kernel(update, cfg)
```
The above snippet defines a five-point-stencil Jacobi update. A few noteworthy things are going on:
- The target data type of the kernel is to be `float32`.
This is explicitly specified for the three fields `u`, `u_tmp` and `f`.
For the symbol `h`, this is left implicit; `h` therefore learns its data type from the `default_dtype` configuration option.
- The target hardware for this kernel are Nvidia GPUs; this is reflected by the `target` property being set to `Target.CUDA`.
- As the five-point stencil reads data from neighbors offset by one cell, it can not be legally executed on the outermost
layer of nodes of the fields' 2D arrays. Here, we ensure that these outer layers are excluded by setting `ghost_layers=1`.
This is not strictly necessary, since the code generator could infer that information by itself.
## Inspecting the Generated Code
The object returned by the code generator, here named `kernel`, is an instance of the {any}`Kernel` class.
This object stores the kernel's name, its list of parameters, the set of fields it operates on, and its hardware target.
Also, it of course holds the kernel itself, in the form of an [abstract syntax tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) (AST).
This tree can be printed out as compilable code in the target language (C++ or, in this case, CUDA),
but holds much more information than the printed-out code string.
When working in a Jupyter notebook, we can interactively inspect the kernel using pystencils' `inspect` function.
This reveals a widget that allows us investigate various details about the kernel:
- its general properties, such as name, parameters, fields, target, ...;
- its code, represented in the *pystencils IR syntax*;
- the same code, in native C++/CUDA syntax;
- and a visual representation of its abstract syntax tree.
```{code-cell} ipython3
ps.inspect(kernel)
```
## Configuring the Code Generator
The code generation engine can be configured using a wide range of options.
This section aims at explaining the majority of these options,
their interaction and effects, use cases and caveats.
### Target Specification
Pystencils supports code generation for a variety of CPU and GPU hardware.
```{eval-rst}
.. currentmodule:: pystencils.codegen
.. autosummary::
:nosignatures:
CreateKernelConfig.target
Target
```
### Data Types
To produce valid output code, the code generator has to figure out the data types of each
symbol, expression, and assignment occuring inside a kernel.
This happens roughly according to the following rules:
- **Field Accesses:** Each field has a fixed data type set at its creation, which is also applied to
each access to that field.
- **Symbols:** Symbols obtain their data types from two sources.
A symbol occuring first on the left-hand side of an assignment receives the data type that
was inferred for the right-hand side expression of that assignment.
Symbols occuring first inside some expression on the right-hand side of an assignment, on the other
hand, receive the {any}`default_dtype <CreateKernelConfig.default_dtype>` set in the {any}`CreateKernelConfig`.
We can observe this behavior by setting up a kernel including several fields with different data types:
```{code-cell} ipython3
from pystencils.sympyextensions import tcast
f = ps.fields("f: float32[2D]")
g = ps.fields("g: float16[2D]")
x, y, z = sp.symbols("x, y, z")
assignments = [
ps.Assignment(x, 42),
ps.Assignment(y, f(0) + x),
ps.Assignment(z, g(0))
]
cfg = ps.CreateKernelConfig(
default_dtype="float32",
index_dtype="int32"
)
kernel = ps.create_kernel(assignments, cfg)
```
We can take a look at the result produced by the code generator after parsing the above kernel.
Inspecting the internal representation of the kernel's body and loop nest,
we see that `x` has received the `float32` type,
which was specified via the {py:data}`default_dtype <CreateKernelConfig.default_dtype>` option.
The symbol `y`, on the other hand, has inherited its data type `float16` from the access to the field `g`
on its declaration's right-hand side.
Also, we can observe that the loop counters and symbols related to the field's memory layout
are using the `int32` data type, as specified in {py:data}`index_dtype <CreateKernelConfig.index_dtype>`:
```{code-cell} ipython3
:tags: [remove-input]
driver = ps.codegen.get_driver(cfg, retain_intermediates=True)
kernel = driver(assignments)
ps.inspect(driver.intermediates.materialized_ispace, show_cpp=False)
```
:::{note}
To learn more about inspecting code after different stages of the code generator, refer to [this section](#section_codegen_stages).
:::
```{eval-rst}
.. currentmodule:: pystencils.codegen
.. autosummary::
:nosignatures:
CreateKernelConfig.default_dtype
CreateKernelConfig.index_dtype
```
### The Iteration Space
The *domain fields* a kernel operates on are understood to reside on a common,
one-, two- or three-dimensional computational grid.
The grid points may be understood as vertices or cells, depending on the application.
When executed, the kernel performs a computation and updates values on all, or a specific subset
of, these grid points.
The set of points the kernel actually operates on is defined by its *iteration space*.
There are three distinct options to control the iteration space in the code generator,
only one of which can be specified at a time:
- The ``ghost_layers`` option allows to specify a number of layers of grid points on all
domain borders that should be excluded from iteration;
- The ``iteration_slice`` option allows to describe the iteration space using Pythonic slice objects;
- The ``index_field`` option can be used to realize a sparse list-based iteration by passing a special
*index field* which holds a list of all points that should be processed.
:::{note}
The points within a kernel's iteration space are understood to be processed concurrently and in
no particular order;
the output of any kernel that relies on some specific iteration order is therefore undefined.
(When running on a GPU, all grid points might in fact be processed in perfect simultaniety!)
:::
```{eval-rst}
.. currentmodule:: pystencils.codegen
.. autosummary::
:nosignatures:
CreateKernelConfig.ghost_layers
CreateKernelConfig.iteration_slice
CreateKernelConfig.index_field
```
```{code-cell} ipython3
:tags: [remove-cell]
def _draw_ispace(f_arr):
n, m = f_arr.shape
fig, ax = plt.subplots()
ax.set_xticks(np.arange(0, m, 4))
ax.set_yticks(np.arange(0, n, 4))
# ax.set_xticklabels([])
# ax.set_yticklabels([])
ax.set_xticks(np.arange(-.5, m, 1), minor=True)
ax.set_yticks(np.arange(-.5, n, 1), minor=True)
ax.grid(which="minor", linewidth=2)
ax.tick_params(which='minor', bottom=False, left=False)
ax.imshow(f_arr, interpolation="none", aspect="equal", origin="lower")
```
#### Specifying Ghost Layers
One way to alter the iteration space is by introducing ghost layers on the domain borders.
These layers of grid points are stripped from the iterations, and can be used to hold
boundary values or exchange data in MPI-parallel simulations.
##### Automatic Ghost Layers
The easiest way to define an iteration space with ghost layers
is to set `ghost_layers=ps.AUTO`, which is also the default
when no iteration space options are specified.
In this case, the code generator will examine the kernel to find the maximum range
of its stencil -- that is, the maximum neighbor offset encountered in any field access.
If, for instance, a neighbor node in $x$-direction with offset $k$ is accessed by the kernel,
it cannot legally execute on the outermost $k$ layers of nodes in that direction since it would
access memory out-of-bounds.
Therefore, an automatic number of $k$ ghost layers at each domain border is inferred.
As we can see in the example below, the number of inferred ghost layers at each domain border will be set to the maximum required in any dimension.
```{code-cell} ipython3
:tags: [remove-cell]
u, v = ps.fields("u, v: [2D]")
```
To illustrate, the following kernel accesses neighbor nodes with a maximum offset of two:
```{code-cell} ipython3
ranged_update = ps.Assignment(u.center(), v[-2, -1] + v[2, 1])
cfg = ps.CreateKernelConfig(ghost_layers=ps.AUTO)
kernel = ps.create_kernel(ranged_update, cfg)
```
With `ghost_layers=ps.AUTO`, its iteration space will look like this (yellow cells are included, purple cells excluded).
```{code-cell} ipython3
:tags: [remove-input]
f = ps.fields("f: float64[2D]")
assignments = [
ranged_update,
ps.Assignment(f(0), 1)
]
kernel = ps.create_kernel(assignments).compile()
f_arr = np.zeros((16, 16))
u_arr = np.zeros_like(f_arr)
v_arr = np.zeros_like(f_arr)
kernel(f=f_arr, u=u_arr, v=v_arr)
_draw_ispace(f_arr)
```
##### Uniform and Nonuniform Ghost Layers
```{code-cell} ipython3
:tags: [remove-cell]
def _show_ispace(cfg):
f = ps.fields("f: float64[2D]")
assignments = [
ps.Assignment(f(0), 1)
]
kernel = ps.create_kernel(assignments, cfg).compile()
f_arr = np.zeros((16, 16))
kernel(f=f_arr)
_draw_ispace(f_arr)
```
Setting `ghost_layers` to a number will remove that many layers from the iteration space in each dimension:
```{code-cell} ipython3
cfg = ps.CreateKernelConfig(
ghost_layers=1
)
```
```{code-cell} ipython3
:tags: [remove-input]
_show_ispace(cfg)
```
Ghost layers can also be specified individually for each dimension and lower/upper borders,
by passing a sequence with either a single integer or a pair of integers per dimension:
```{code-cell} ipython3
cfg = ps.CreateKernelConfig(
ghost_layers=[(2, 1), 3]
)
```
```{code-cell} ipython3
:tags: [remove-input]
_show_ispace(cfg)
```
#### Iteration Slices
Using the `iteration_slice` option, we can assert much finer control on the kernel's iteration space
by specifying it using sequences of Python {py:class}`slice` objects.
We can quickly create those using `ps.make_slice`, using the `start:stop:step` slice notation.
The easiest case is to set the iteration space with fixed numerical limits:
```{code-cell} ipython3
cfg = ps.CreateKernelConfig(
iteration_slice=ps.make_slice[3:-4, 9:14]
)
```
```{code-cell} ipython3
:tags: [remove-input]
_show_ispace(cfg)
```
##### Strided Iteration
It is also possible to set up a strided iteration that skips over a fixed number of elements.
The following example processes only every second line in $y$-direction, using the slice `::2`:
```{code-cell} ipython3
cfg = ps.CreateKernelConfig(
iteration_slice=ps.make_slice[::2, 3:-3]
)
```
```{code-cell} ipython3
:tags: [remove-input]
_show_ispace(cfg)
```
(example_triangular_iteration)=
##### Triangular Iteration
Iteration slices are not limited to constant numerical values; they can be arbitrarily complex
*SymPy* expressions.
By using the counter symbol for the first dimension to define the iteration limits of the second,
we can produce a triangular iteration pattern:
```{code-cell} ipython3
y = ps.DEFAULTS.spatial_counters[0]
cfg = ps.CreateKernelConfig(
iteration_slice=ps.make_slice[:, y:]
)
```
:::{warning}
This type of dependency is restricted by the ordering of the iteration space dimensions:
The limits of a dimension can only depend on the counters of dimensions that are *slower*
than itself.
The ordering of dimensions is determined by the memory layout of the kernels' fields;
see also the [section on memory layouts](#section_memory_layout).
:::
```{code-cell} ipython3
:tags: [remove-input]
_show_ispace(cfg)
```
##### Red-Black Iteration
Using a case distinction for the second dimension's start index, we can even produce
a checkerboard pattern, as required for e.g. red-black Gauss-Seidel-type smoothers.
We use the integer remainder ({any}`int_rem`) to distinguish between even- and odd-numbered rows,
set the start value accordingly using {any}`sp.Piecewise <sympy.functions.elementary.piecewise.Piecewise>`,
and use a step size of two:
$$
start(y)=
\begin{cases}
0 & \quad \text{if } y \; \mathrm{rem} \; 2 = 0 \\
1 & \quad \text{otherwise}
\end{cases}
$$
```{code-cell} ipython3
from pystencils.sympyextensions.integer_functions import int_rem
y = ps.DEFAULTS.spatial_counters[0]
start = sp.Piecewise(
(0, sp.Eq(int_rem(y, 2), 0)),
(1, True)
)
cfg = ps.CreateKernelConfig(
iteration_slice=ps.make_slice[:, start::2]
)
```
:::{warning}
The restrictions on dimension ordering of the triangular iteration example apply
to the checkerboard-iteration as well.
:::
```{code-cell} ipython3
:tags: [remove-input]
_show_ispace(cfg)
```
(section_memory_layout)=
## Memory Layout and Dimension Ordering
:::{admonition} Developer To Do
Briefly explain about field memory layouts, cache locality, coalesced memory accesses (on GPU and vector CPUs),
and the need for correct ordering of the dimensions (loop order on CPU, thread indexing order on GPU).
:::
(section_codegen_stages)=
## Advanced: Understanding the Stages of the Code Generator
While translating a set of symbolic definitions to a kernel, the code generator of pystencils
goes through a number of stages, gradually extending and transforming the AST.
Pystencils allows you to retrieve and inspect the intermediate results produced by the
code generator, in order to better understand the process of kernel translation.
This can be immensely helpful when tracking down bugs or trying to explain unexpected
output code.
To get access to the intermediate results, the code generator has to be invoked in a slightly different way.
Instead of just calling `create_kernel`, we directly create the so-called *driver* and instruct it to
store its intermediate ASTs:
```{code-cell} ipython3
:tags: [remove-cell]
u_src, u_dst, f = ps.fields("u_src, u_dst, f: float32[2D]")
h = sp.Symbol("h")
cfg = ps.CreateKernelConfig(
target=ps.Target.X86_AVX512,
default_dtype="float32",
)
cfg.cpu.openmp.enable = True
cfg.cpu.vectorize.enable = True
cfg.cpu.vectorize.assume_inner_stride_one = True
assignments = [
ps.Assignment(
u_dst[0,0], (h**2 * f[0, 0] + u_src[1, 0] + u_src[-1, 0] + u_src[0, 1] + u_src[0, -1]) / 4
)
]
```
```{code-cell} ipython3
driver = ps.codegen.get_driver(cfg, retain_intermediates=True)
kernel = driver(assignments)
ps.inspect(driver.intermediates)
```
.. _page_symbolic_language:
*****************
Symbolic Language
*****************
Pystencils allows you to define near-arbitrarily complex numerical kernels in its symbolic
language, which is based on the computer algebra system `SymPy <https://www.sympy.org>`_.
The pystencils code generator is able to parse and translate a large portion of SymPy's
symbolic expression toolkit, and furthermore extends it with its own features.
Among the supported SymPy features are: symbols, constants, arithmetic and logical expressions,
trigonometric and most transcendental functions, as well as piecewise definitions.
Symbols
=======
Mathematical variables are generally represented using `sympy.Symbol <sympy.core.symbol.Symbol>`.
Fields
======
The most important extension to SymPy brought by pystencils are *fields*.
Fields are a symbolic representation of multidimensional cartesian numerical arrays,
as used in many stencil algorithms.
They are represented by the `Field` class.
Fields can be created from a textual description using the `fields <pystencils.field.fields>` function,
or, more concisely, using the factory methods `Field.create_generic` and `Field.create_fixed_size`.
It is also possible to create a field representing an existing numpy array,
including its shape, data type, and memory layout, using `Field.create_from_numpy_array`.
.. autosummary::
:nosignatures:
pystencils.Field
Assignments and Assignment Collections
======================================
Pystencils relies heavily on SymPy's `Assignment <sympy.codegen.ast.Assignment>` class.
Assignments are the fundamental components of pystencils kernels;
they are used both for assigning expressions to symbols
and for writing values to fields.
Assignments are combined and structured inside `assignment collections <pystencils.AssignmentCollection>`.
An assignment collection contains two separate lists of assignments:
- The **subexpressions** list contains assignments to symbols which can be reused in all subsequent assignments.
These are typically used to structure computations into parts
by precomputing (common) subexpressions
- The **main assignments** represent the actual effect of the kernel by storing the computation's results
into fields.
.. autosummary::
:nosignatures:
pystencils.Assignment
pystencils.AssignmentCollection
Restrictions on SymPy Expressions
=================================
In order to produce valid C code, the *pystencils* code generator places some restrictions
on the SymPy expressions it consumes.
Piecewise Definitions
---------------------
Pystencils can parse and translate piecewise function definitions using
`sympy.Piecewise <sympy.functions.elementary.piecewise.Piecewise>`
*only if* they have a default case.
Any incomplete piecewise definition, such as the following, will result in an error from pystencils:
.. code-block:: Python
sp.Piecewise((0, x < 0), (1, sp.And(x >= 0, x < 1)))
To avoid this, you may explicitly mark the final branch as the default case by
setting its condition to ``True``:
.. code-block:: Python
sp.Piecewise((0, x < 0), (1, True))
This is not always necessary; if SymPy can prove that the range of possible values is covered completely,
it might simplify the final condition to ``True`` automatically.
Integer Operations
==================
Division and Remainder
----------------------
Care has to be taken when working with integer division operations in pystencils.
The python operators ``//`` and ``%`` work differently from their counterparts in the C family of languages.
Where in C, integer division always rounds toward zero, ``//`` performs a floor-divide (or euclidean division)
which rounds toward negative infinity.
These two operations differ whenever one of the operands is negative.
Accordingly, in Python ``a % b`` returns the *euclidean modulus*,
while C ``a % b`` computes the *remainder* of division.
The euclidean modulus is always nonnegative, while the remainder, if nonzero, always has the same sign as ``a``.
When ``//`` and ``%`` occur in symbolic expressions given to pystencils, they are interpreted the Python-way.
This can lead to inefficient generated code, since Pythonic integer division does not map to the corresponding C
operators.
To achieve C behaviour (and efficient code), you can use
`pystencils.symb.int_div <pystencils.sympyextensions.integer_functions.int_div>` and
`pystencils.symb.int_rem <pystencils.sympyextensions.integer_functions.int_rem>`
which translate to C ``/`` and ``%``, respectively.
When expressions are translated in an integer type context, the Python ``/`` operator
will also be converted to C-style ``/`` integer division.
Still, use of ``/`` for integers is discouraged, as it is designed to return a floating-point value in Python.
[mypy]
python_version = 3.10
exclude = "src/pystencils/old"
[mypy-pystencils.*]
ignore_errors=true
[mypy-pystencils.backend.*]
ignore_errors = False
[mypy-pystencils.types.*]
ignore_errors = False
[mypy-pystencils.codegen.*]
ignore_errors = False
[mypy-pystencils.jit.*]
ignore_errors = False
[mypy-pystencils.field]
ignore_errors = False
[mypy-pystencils.sympyextensions.typed_sympy]
ignore_errors = False
[mypy-setuptools.*]
ignore_missing_imports=true
[mypy-appdirs.*]
ignore_missing_imports=true
[mypy-islpy.*]
ignore_missing_imports=true
[mypy-cupy.*]
ignore_missing_imports=true
[mypy-cpuinfo.*]
ignore_missing_imports=true
[mypy-fasteners.*]
ignore_missing_imports=true
from __future__ import annotations
from typing import Sequence
import os
import nox
import subprocess
import re
nox.options.sessions = ["lint", "typecheck", "testsuite"]
def get_cuda_version(session: nox.Session) -> None | tuple[int, ...]:
query_args = ["nvcc", "--version"]
try:
query_result = subprocess.run(query_args, capture_output=True)
except FileNotFoundError:
return None
matches = re.findall(r"release \d+\.\d+", str(query_result.stdout))
if matches:
match = matches[0]
version_string = match.split()[-1]
try:
return tuple(int(v) for v in version_string.split("."))
except ValueError:
pass
session.warn("nvcc was found, but I am unable to determine the CUDA version.")
return None
def install_cupy(
session: nox.Session, cupy_version: str, skip_if_no_cuda: bool = False
):
if cupy_version is not None:
cuda_version = get_cuda_version(session)
if cuda_version is None or cuda_version[0] not in (11, 12):
if skip_if_no_cuda:
session.skip(
"No compatible installation of CUDA found - Need either CUDA 11 or 12"
)
else:
session.warn(
"Running without cupy: no compatbile installation of CUDA found. Need either CUDA 11 or 12."
)
return
cuda_major = cuda_version[0]
cupy_package = f"cupy-cuda{cuda_major}x=={cupy_version}"
session.install(cupy_package)
def check_external_doc_dependencies(session: nox.Session):
dot_args = ["dot", "--version"]
try:
_ = subprocess.run(dot_args, capture_output=True)
except FileNotFoundError:
session.error(
"Unable to build documentation: "
"Command `dot` from the `graphviz` package (https://www.graphviz.org/) is not available"
)
def editable_install(session: nox.Session, opts: Sequence[str] = ()):
if opts:
opts_str = "[" + ",".join(opts) + "]"
else:
opts_str = ""
session.install("-e", f".{opts_str}")
@nox.session(python="3.10", tags=["qa", "code-quality"])
def lint(session: nox.Session):
"""Lint code using flake8"""
session.install("flake8")
session.run("flake8", "src/pystencils")
@nox.session(python="3.10", tags=["qa", "code-quality"])
def typecheck(session: nox.Session):
"""Run MyPy for static type checking"""
editable_install(session)
session.install("mypy")
session.run("mypy", "src/pystencils")
@nox.session(python=["3.10", "3.11", "3.12", "3.13"], tags=["test"])
@nox.parametrize("cupy_version", [None, "12", "13"], ids=["cpu", "cupy12", "cupy13"])
def testsuite(session: nox.Session, cupy_version: str | None):
"""Run the pystencils test suite.
**Positional Arguments:** Any positional arguments passed to nox after `--`
are propagated to pytest.
"""
if cupy_version is not None:
install_cupy(session, cupy_version, skip_if_no_cuda=True)
editable_install(session, ["alltrafos", "use_cython", "interactive", "testsuite"])
num_cores = os.cpu_count()
session.run(
"pytest",
"-v",
"-n",
str(num_cores),
"--cov-report=term",
"--cov=.",
"-m",
"not longrun",
"--html",
"test-report/index.html",
"--junitxml=report.xml",
*session.posargs
)
session.run("coverage", "html")
session.run("coverage", "xml")
@nox.session(python=["3.10"], tags=["docs"])
def docs(session: nox.Session):
"""Build the documentation pages"""
check_external_doc_dependencies(session)
install_cupy(session, "12.3")
editable_install(session, ["doc"])
env = {}
session_args = session.posargs
if "--fail-on-warnings" in session_args:
env["SPHINXOPTS"] = "-W --keep-going"
session.chdir("docs")
if "--clean" in session_args:
session.run("make", "clean", external=True)
session.run("make", "html", external=True, env=env)
......@@ -12,7 +12,7 @@ authors = [
]
license = { file = "COPYING.txt" }
requires-python = ">=3.10"
dependencies = ["sympy>=1.6,<=1.11.1", "numpy>=1.8.0", "appdirs", "joblib", "pyyaml"]
dependencies = ["sympy>=1.9,<=1.12.1", "numpy>=1.8.0", "appdirs", "joblib", "pyyaml", "pybind11", "fasteners"]
classifiers = [
"Development Status :: 4 - Beta",
"Framework :: Jupyter",
......@@ -44,34 +44,45 @@ interactive = [
use_cython = [
'Cython'
]
dev = [
"flake8",
"mypy",
"black",
]
doc = [
'sphinx',
'sphinx_rtd_theme',
'nbsphinx',
'pydata-sphinx-theme==0.15.4',
'sphinx-book-theme==1.1.3', # workaround for https://github.com/executablebooks/sphinx-book-theme/issues/865
'sphinxcontrib-bibtex',
'sphinx_autodoc_typehints',
'pandoc',
'sphinx_design',
'myst-nb',
'matplotlib',
'ipywidgets',
'graphviz',
]
tests = [
testsuite = [
'pytest',
'pytest-cov',
'pytest-html',
'ansi2html',
'pytest-xdist',
'flake8',
'mypy>=1.8',
'nbformat',
'nbconvert',
'ipython',
'matplotlib',
'py-cpuinfo',
'randomgen>=1.18',
'scipy'
]
[build-system]
requires = [
"setuptools>=61",
"versioneer>=0.29",
"tomli; python_version < '3.11'",
"versioneer[toml]>=0.29",
# 'Cython'
]
build-backend = "setuptools.build_meta"
......@@ -79,6 +90,7 @@ build-backend = "setuptools.build_meta"
[tool.setuptools.package-data]
pystencils = [
"include/*.h",
"jit/cpu/*.tmpl.cpp",
"boundaries/createindexlistcython.pyx"
]
......
[pytest]
testpaths = src tests doc/notebooks
testpaths = src tests docs/source/tutorials
pythonpath = src
python_files = test_*.py *_test.py scenario_*.py
norecursedirs = *.egg-info .git .cache .ipynb_checkpoints htmlcov
addopts = --doctest-modules --durations=20 --cov-config pytest.ini
addopts =
--doctest-modules
--durations=20
--cov-config pytest.ini
--ignore=src/pystencils/old
--ignore=tests/_old
--ignore=tests/_todo
markers =
longrun: tests only run at night since they have large execution time
notebook: mark for notebooks
......@@ -17,6 +23,7 @@ filterwarnings =
ignore:Using or importing the ABCs from 'collections' instead of from 'collections.abc':DeprecationWarning
ignore:Animation was deleted without rendering anything:UserWarning
# Coverage Configuration
[run]
branch = True
source = src/pystencils
......@@ -25,6 +32,7 @@ source = src/pystencils
omit = doc/*
tests/*
setup.py
noxfile.py
quicktest.py
conftest.py
versioneer.py
......@@ -34,6 +42,8 @@ omit = doc/*
src/pystencils/cache.py
src/pystencils/pacxx/benchmark.py
src/pystencils/_version.py
src/pystencils/_deprecation.py
src/pystencils/old
venv/
[report]
......@@ -55,8 +65,11 @@ exclude_lines =
if False:
if __name__ == .__main__.:
# Don't cover type checking imports
if TYPE_CHECKING:
skip_covered = True
fail_under = 85
fail_under = 80
[html]
directory = coverage_report
"""Module to generate stencil kernels in C or CUDA using sympy expressions and call them as Python functions"""
from .enums import Backend, Target
from .codegen import (
Target,
CreateKernelConfig,
AUTO
)
from .defaults import DEFAULTS
from . import fd
from . import stencil as stencil
from .assignment import Assignment, AddAugmentedAssignment, assignment_from_stencil
from .typing.typed_sympy import TypedSymbol
from .display_utils import get_code_obj, get_code_str, show_code, to_dot
from .inspection import inspect
from .field import Field, FieldType, fields
from .config import CreateKernelConfig
from .types import create_type, create_numeric_type
from .cache import clear_cache
from .kernel_decorator import kernel, kernel_config
from .kernelcreation import create_kernel, create_staggered_kernel
from .simp import AssignmentCollection
from .codegen.driver import create_kernel, create_staggered_kernel
from .codegen import Kernel
from .jit import no_jit
from .backend.exceptions import KernelConstraintsError
from .slicing import make_slice
from .spatial_coordinates import x_, x_staggered, x_staggered_vector, x_vector, y_, y_staggered, z_, z_staggered
from .sympyextensions import SymbolCreator
from .spatial_coordinates import (
x_,
x_staggered,
x_staggered_vector,
x_vector,
y_,
y_staggered,
z_,
z_staggered,
)
from .assignment import Assignment, AddAugmentedAssignment, assignment_from_stencil
from .simp import AssignmentCollection
from .sympyextensions.typed_sympy import TypedSymbol, DynamicType
from .sympyextensions import SymbolCreator, tcast
from .datahandling import create_data_handling
__all__ = ['Field', 'FieldType', 'fields',
'TypedSymbol',
'make_slice',
'CreateKernelConfig',
'create_kernel', 'create_staggered_kernel',
'Target', 'Backend',
'show_code', 'to_dot', 'get_code_obj', 'get_code_str',
'AssignmentCollection',
'Assignment', 'AddAugmentedAssignment',
'assignment_from_stencil',
'SymbolCreator',
'create_data_handling',
'clear_cache',
'kernel', 'kernel_config',
'x_', 'y_', 'z_',
'x_staggered', 'y_staggered', 'z_staggered',
'x_vector', 'x_staggered_vector',
'fd',
'stencil']
from ._version import get_versions
__all__ = [
"Field",
"FieldType",
"fields",
"DEFAULTS",
"TypedSymbol",
"DynamicType",
"create_type",
"create_numeric_type",
"make_slice",
"CreateKernelConfig",
"AUTO",
"create_kernel",
"create_staggered_kernel",
"Kernel",
"KernelConstraintsError",
"Target",
"no_jit",
"show_code",
"to_dot",
"get_code_obj",
"get_code_str",
"inspect",
"AssignmentCollection",
"Assignment",
"AddAugmentedAssignment",
"assignment_from_stencil",
"SymbolCreator",
"create_data_handling",
"clear_cache",
"kernel",
"kernel_config",
"x_",
"y_",
"z_",
"x_staggered",
"y_staggered",
"z_staggered",
"x_vector",
"x_staggered_vector",
"fd",
"stencil",
"tcast",
]
__version__ = get_versions()['version']
del get_versions
from . import _version
__version__ = _version.get_versions()['version']
def _deprecated(feature, instead, version="2.1"):
from warnings import warn
warn(
f"{feature} is deprecated and will be removed in pystencils {version}."
f"Use {instead} instead.",
DeprecationWarning,
stacklevel=2
)
......@@ -5,8 +5,9 @@
# directories (produced by setup.py build) will contain a much shorter file
# that just contains the computed version number.
# This file is released into the public domain. Generated by
# versioneer-0.19 (https://github.com/python-versioneer/python-versioneer)
# This file is released into the public domain.
# Generated by versioneer-0.29
# https://github.com/python-versioneer/python-versioneer
"""Git implementation of _version.py."""
......@@ -15,9 +16,11 @@ import os
import re
import subprocess
import sys
from typing import Any, Callable, Dict, List, Optional, Tuple
import functools
def get_keywords():
def get_keywords() -> Dict[str, str]:
"""Get the keywords needed to look up the version information."""
# these strings will be replaced by git during git-archive.
# setup.py/versioneer.py will grep for the variable names, so they must
......@@ -33,8 +36,15 @@ def get_keywords():
class VersioneerConfig:
"""Container for Versioneer configuration parameters."""
VCS: str
style: str
tag_prefix: str
parentdir_prefix: str
versionfile_source: str
verbose: bool
def get_config():
def get_config() -> VersioneerConfig:
"""Create, populate and return the VersioneerConfig() object."""
# these strings are filled in when 'setup.py versioneer' creates
# _version.py
......@@ -43,7 +53,7 @@ def get_config():
cfg.style = "pep440"
cfg.tag_prefix = "release/"
cfg.parentdir_prefix = "pystencils-"
cfg.versionfile_source = "pystencils/_version.py"
cfg.versionfile_source = "src/pystencils/_version.py"
cfg.verbose = False
return cfg
......@@ -52,13 +62,13 @@ class NotThisMethod(Exception):
"""Exception raised if a method is not valid for the current scenario."""
LONG_VERSION_PY = {}
HANDLERS = {}
LONG_VERSION_PY: Dict[str, str] = {}
HANDLERS: Dict[str, Dict[str, Callable]] = {}
def register_vcs_handler(vcs, method): # decorator
def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator
"""Create decorator to mark a method as the handler of a VCS."""
def decorate(f):
def decorate(f: Callable) -> Callable:
"""Store f in HANDLERS[vcs][method]."""
if vcs not in HANDLERS:
HANDLERS[vcs] = {}
......@@ -67,22 +77,35 @@ def register_vcs_handler(vcs, method): # decorator
return decorate
def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
env=None):
def run_command(
commands: List[str],
args: List[str],
cwd: Optional[str] = None,
verbose: bool = False,
hide_stderr: bool = False,
env: Optional[Dict[str, str]] = None,
) -> Tuple[Optional[str], Optional[int]]:
"""Call the given command(s)."""
assert isinstance(commands, list)
p = None
for c in commands:
process = None
popen_kwargs: Dict[str, Any] = {}
if sys.platform == "win32":
# This hides the console window if pythonw.exe is used
startupinfo = subprocess.STARTUPINFO()
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
popen_kwargs["startupinfo"] = startupinfo
for command in commands:
try:
dispcmd = str([c] + args)
dispcmd = str([command] + args)
# remember shell=False, so use git.cmd on windows, not just git
p = subprocess.Popen([c] + args, cwd=cwd, env=env,
stdout=subprocess.PIPE,
stderr=(subprocess.PIPE if hide_stderr
else None))
process = subprocess.Popen([command] + args, cwd=cwd, env=env,
stdout=subprocess.PIPE,
stderr=(subprocess.PIPE if hide_stderr
else None), **popen_kwargs)
break
except EnvironmentError:
e = sys.exc_info()[1]
except OSError as e:
if e.errno == errno.ENOENT:
continue
if verbose:
......@@ -93,16 +116,20 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
if verbose:
print("unable to find command, tried %s" % (commands,))
return None, None
stdout = p.communicate()[0].strip().decode()
if p.returncode != 0:
stdout = process.communicate()[0].strip().decode()
if process.returncode != 0:
if verbose:
print("unable to run %s (error)" % dispcmd)
print("stdout was %s" % stdout)
return None, p.returncode
return stdout, p.returncode
return None, process.returncode
return stdout, process.returncode
def versions_from_parentdir(parentdir_prefix, root, verbose):
def versions_from_parentdir(
parentdir_prefix: str,
root: str,
verbose: bool,
) -> Dict[str, Any]:
"""Try to determine the version from the parent directory name.
Source tarballs conventionally unpack into a directory that includes both
......@@ -111,15 +138,14 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
"""
rootdirs = []
for i in range(3):
for _ in range(3):
dirname = os.path.basename(root)
if dirname.startswith(parentdir_prefix):
return {"version": dirname[len(parentdir_prefix):],
"full-revisionid": None,
"dirty": False, "error": None, "date": None}
else:
rootdirs.append(root)
root = os.path.dirname(root) # up a level
rootdirs.append(root)
root = os.path.dirname(root) # up a level
if verbose:
print("Tried directories %s but none started with prefix %s" %
......@@ -128,39 +154,42 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
@register_vcs_handler("git", "get_keywords")
def git_get_keywords(versionfile_abs):
def git_get_keywords(versionfile_abs: str) -> Dict[str, str]:
"""Extract version information from the given file."""
# the code embedded in _version.py can just fetch the value of these
# keywords. When used from setup.py, we don't want to import _version.py,
# so we do it with a regexp instead. This function is not used from
# _version.py.
keywords = {}
keywords: Dict[str, str] = {}
try:
f = open(versionfile_abs, "r")
for line in f.readlines():
if line.strip().startswith("git_refnames ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["refnames"] = mo.group(1)
if line.strip().startswith("git_full ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["full"] = mo.group(1)
if line.strip().startswith("git_date ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["date"] = mo.group(1)
f.close()
except EnvironmentError:
with open(versionfile_abs, "r") as fobj:
for line in fobj:
if line.strip().startswith("git_refnames ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["refnames"] = mo.group(1)
if line.strip().startswith("git_full ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["full"] = mo.group(1)
if line.strip().startswith("git_date ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["date"] = mo.group(1)
except OSError:
pass
return keywords
@register_vcs_handler("git", "keywords")
def git_versions_from_keywords(keywords, tag_prefix, verbose):
def git_versions_from_keywords(
keywords: Dict[str, str],
tag_prefix: str,
verbose: bool,
) -> Dict[str, Any]:
"""Get version information from git keywords."""
if not keywords:
raise NotThisMethod("no keywords at all, weird")
if "refnames" not in keywords:
raise NotThisMethod("Short version file found")
date = keywords.get("date")
if date is not None:
# Use only the last line. Previous lines may contain GPG signature
......@@ -179,11 +208,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
if verbose:
print("keywords are unexpanded, not using")
raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
refs = set([r.strip() for r in refnames.strip("()").split(",")])
refs = {r.strip() for r in refnames.strip("()").split(",")}
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
TAG = "tag: "
tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
if not tags:
# Either we're using git < 1.8.3, or there really are no tags. We use
# a heuristic: assume all version tags have a digit. The old git %d
......@@ -192,7 +221,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
# between branches and tags. By ignoring refnames without digits, we
# filter out many common branch names like "release" and
# "stabilization", as well as "HEAD" and "master".
tags = set([r for r in refs if re.search(r'\d', r)])
tags = {r for r in refs if re.search(r'\d', r)}
if verbose:
print("discarding '%s', no digits" % ",".join(refs - tags))
if verbose:
......@@ -201,6 +230,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
# sorting will prefer e.g. "2.0" over "2.0rc1"
if ref.startswith(tag_prefix):
r = ref[len(tag_prefix):]
# Filter out refs that exactly match prefix or that don't start
# with a number once the prefix is stripped (mostly a concern
# when prefix is '')
if not re.match(r'\d', r):
continue
if verbose:
print("picking %s" % r)
return {"version": r,
......@@ -216,7 +250,12 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
@register_vcs_handler("git", "pieces_from_vcs")
def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
def git_pieces_from_vcs(
tag_prefix: str,
root: str,
verbose: bool,
runner: Callable = run_command
) -> Dict[str, Any]:
"""Get version from 'git describe' in the root of the source tree.
This only gets called if the git-archive 'subst' keywords were *not*
......@@ -227,8 +266,15 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
if sys.platform == "win32":
GITS = ["git.cmd", "git.exe"]
out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
hide_stderr=True)
# GIT_DIR can interfere with correct operation of Versioneer.
# It may be intended to be passed to the Versioneer-versioned project,
# but that should not change where we get our version from.
env = os.environ.copy()
env.pop("GIT_DIR", None)
runner = functools.partial(runner, env=env)
_, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
hide_stderr=not verbose)
if rc != 0:
if verbose:
print("Directory %s not under git control" % root)
......@@ -236,24 +282,57 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
# if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
# if there isn't one, this yields HEX[-dirty] (no NUM)
describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
"--always", "--long",
"--match", "%s*" % tag_prefix],
cwd=root)
describe_out, rc = runner(GITS, [
"describe", "--tags", "--dirty", "--always", "--long",
"--match", f"{tag_prefix}[[:digit:]]*"
], cwd=root)
# --long was added in git-1.5.5
if describe_out is None:
raise NotThisMethod("'git describe' failed")
describe_out = describe_out.strip()
full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
if full_out is None:
raise NotThisMethod("'git rev-parse' failed")
full_out = full_out.strip()
pieces = {}
pieces: Dict[str, Any] = {}
pieces["long"] = full_out
pieces["short"] = full_out[:7] # maybe improved later
pieces["error"] = None
branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
cwd=root)
# --abbrev-ref was added in git-1.6.3
if rc != 0 or branch_name is None:
raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
branch_name = branch_name.strip()
if branch_name == "HEAD":
# If we aren't exactly on a branch, pick a branch which represents
# the current commit. If all else fails, we are on a branchless
# commit.
branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
# --contains was added in git-1.5.4
if rc != 0 or branches is None:
raise NotThisMethod("'git branch --contains' returned error")
branches = branches.split("\n")
# Remove the first line if we're running detached
if "(" in branches[0]:
branches.pop(0)
# Strip off the leading "* " from the list of branches.
branches = [branch[2:] for branch in branches]
if "master" in branches:
branch_name = "master"
elif not branches:
branch_name = None
else:
# Pick the first branch that is returned. Good or bad.
branch_name = branches[0]
pieces["branch"] = branch_name
# parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
# TAG might have hyphens.
git_describe = describe_out
......@@ -270,7 +349,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
# TAG-NUM-gHEX
mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
if not mo:
# unparseable. Maybe git-describe is misbehaving?
# unparsable. Maybe git-describe is misbehaving?
pieces["error"] = ("unable to parse git-describe output: '%s'"
% describe_out)
return pieces
......@@ -295,13 +374,11 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
else:
# HEX: no tags
pieces["closest-tag"] = None
count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
cwd=root)
pieces["distance"] = int(count_out) # total number of commits
out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
pieces["distance"] = len(out.split()) # total number of commits
# commit date: see ISO-8601 comment in git_versions_from_keywords()
date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
cwd=root)[0].strip()
date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
# Use only the last line. Previous lines may contain GPG signature
# information.
date = date.splitlines()[-1]
......@@ -310,14 +387,14 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
return pieces
def plus_or_dot(pieces):
def plus_or_dot(pieces: Dict[str, Any]) -> str:
"""Return a + if we don't already have one, else return a ."""
if "+" in pieces.get("closest-tag", ""):
return "."
return "+"
def render_pep440(pieces):
def render_pep440(pieces: Dict[str, Any]) -> str:
"""Build up version string, with post-release "local version identifier".
Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
......@@ -342,23 +419,71 @@ def render_pep440(pieces):
return rendered
def render_pep440_pre(pieces):
"""TAG[.post0.devDISTANCE] -- No -dirty.
def render_pep440_branch(pieces: Dict[str, Any]) -> str:
"""TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
The ".dev0" means not master branch. Note that .dev0 sorts backwards
(a feature branch will appear "older" than the master branch).
Exceptions:
1: no tags. 0.post0.devDISTANCE
1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
if pieces["branch"] != "master":
rendered += ".dev0"
rendered += plus_or_dot(pieces)
rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
else:
# exception #1
rendered = "0"
if pieces["branch"] != "master":
rendered += ".dev0"
rendered += "+untagged.%d.g%s" % (pieces["distance"],
pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
return rendered
def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:
"""Split pep440 version string at the post-release segment.
Returns the release segments before the post-release and the
post-release version number (or -1 if no post-release segment is present).
"""
vc = str.split(ver, ".post")
return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
def render_pep440_pre(pieces: Dict[str, Any]) -> str:
"""TAG[.postN.devDISTANCE] -- No -dirty.
Exceptions:
1: no tags. 0.post0.devDISTANCE
"""
if pieces["closest-tag"]:
if pieces["distance"]:
rendered += ".post0.dev%d" % pieces["distance"]
# update the post release segment
tag_version, post_version = pep440_split_post(pieces["closest-tag"])
rendered = tag_version
if post_version is not None:
rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
else:
rendered += ".post0.dev%d" % (pieces["distance"])
else:
# no commits, use the tag as the version
rendered = pieces["closest-tag"]
else:
# exception #1
rendered = "0.post0.dev%d" % pieces["distance"]
return rendered
def render_pep440_post(pieces):
def render_pep440_post(pieces: Dict[str, Any]) -> str:
"""TAG[.postDISTANCE[.dev0]+gHEX] .
The ".dev0" means dirty. Note that .dev0 sorts backwards
......@@ -385,7 +510,36 @@ def render_pep440_post(pieces):
return rendered
def render_pep440_old(pieces):
def render_pep440_post_branch(pieces: Dict[str, Any]) -> str:
"""TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
The ".dev0" means not master branch.
Exceptions:
1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += ".post%d" % pieces["distance"]
if pieces["branch"] != "master":
rendered += ".dev0"
rendered += plus_or_dot(pieces)
rendered += "g%s" % pieces["short"]
if pieces["dirty"]:
rendered += ".dirty"
else:
# exception #1
rendered = "0.post%d" % pieces["distance"]
if pieces["branch"] != "master":
rendered += ".dev0"
rendered += "+g%s" % pieces["short"]
if pieces["dirty"]:
rendered += ".dirty"
return rendered
def render_pep440_old(pieces: Dict[str, Any]) -> str:
"""TAG[.postDISTANCE[.dev0]] .
The ".dev0" means dirty.
......@@ -407,7 +561,7 @@ def render_pep440_old(pieces):
return rendered
def render_git_describe(pieces):
def render_git_describe(pieces: Dict[str, Any]) -> str:
"""TAG[-DISTANCE-gHEX][-dirty].
Like 'git describe --tags --dirty --always'.
......@@ -427,7 +581,7 @@ def render_git_describe(pieces):
return rendered
def render_git_describe_long(pieces):
def render_git_describe_long(pieces: Dict[str, Any]) -> str:
"""TAG-DISTANCE-gHEX[-dirty].
Like 'git describe --tags --dirty --always -long'.
......@@ -447,7 +601,7 @@ def render_git_describe_long(pieces):
return rendered
def render(pieces, style):
def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:
"""Render the given version pieces into the requested style."""
if pieces["error"]:
return {"version": "unknown",
......@@ -461,10 +615,14 @@ def render(pieces, style):
if style == "pep440":
rendered = render_pep440(pieces)
elif style == "pep440-branch":
rendered = render_pep440_branch(pieces)
elif style == "pep440-pre":
rendered = render_pep440_pre(pieces)
elif style == "pep440-post":
rendered = render_pep440_post(pieces)
elif style == "pep440-post-branch":
rendered = render_pep440_post_branch(pieces)
elif style == "pep440-old":
rendered = render_pep440_old(pieces)
elif style == "git-describe":
......@@ -479,7 +637,7 @@ def render(pieces, style):
"date": pieces.get("date")}
def get_versions():
def get_versions() -> Dict[str, Any]:
"""Get version information or return default if unable to do so."""
# I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
# __file__, we can work backwards from there to the root. Some
......@@ -500,7 +658,7 @@ def get_versions():
# versionfile_source is the relative path from the top of the source
# tree (where the .git directory might live) to this file. Invert
# this to find the root from __file__.
for i in cfg.versionfile_source.split('/'):
for _ in cfg.versionfile_source.split('/'):
root = os.path.dirname(root)
except NameError:
return {"version": "0+unknown", "full-revisionid": None,
......
# flake8: noqa
import numpy as np
......@@ -17,10 +18,12 @@ def aligned_empty(shape, byte_alignment=True, dtype=np.float64, byte_offset=0, o
align_inner_coordinate: if True, the start of the innermost coordinate lines are aligned as well
"""
if byte_alignment is True or byte_alignment == 'cacheline':
from pystencils.backends.simd_instruction_sets import (get_supported_instruction_sets, get_cacheline_size,
get_vector_instruction_set)
# from pystencils.backends.simd_instruction_sets import (get_supported_instruction_sets, get_cacheline_size,
# get_vector_instruction_set)
instruction_sets = get_supported_instruction_sets()
# instruction_sets = get_supported_instruction_sets()
# TODO `get_supported_instruction_sets` has to be reimplemented
instruction_sets = None
if instruction_sets is None:
byte_alignment = 64
elif byte_alignment == 'cacheline':
......
import numpy as np
import sympy as sp
from sympy.codegen.ast import Assignment, AugmentedAssignment, AddAugmentedAssignment
from sympy.codegen.ast import Assignment, AugmentedAssignment
from sympy.codegen.ast import AddAugmentedAssignment as SpAddAugAssignment
from sympy.printing.latex import LatexPrinter
__all__ = ['Assignment', 'AugmentedAssignment', 'AddAugmentedAssignment', 'assignment_from_stencil']
......@@ -39,6 +40,9 @@ LatexPrinter._print_AugmentedAssignment = print_assignment_latex
sp.MutableDenseMatrix.__hash__ = lambda self: hash(tuple(self))
# Re-Export
AddAugmentedAssignment = SpAddAugAssignment
def assignment_from_stencil(stencil_array, input_field, output_field,
normalization_factor=None, order='visual') -> Assignment:
......
import collections.abc
import itertools
import uuid
from typing import Any, List, Optional, Sequence, Set, Union
import sympy as sp
import pystencils
from pystencils.typing.utilities import create_type, get_next_parent_of_type
from pystencils.enums import Target, Backend
from pystencils.field import Field
from pystencils.typing.typed_sympy import FieldPointerSymbol, FieldShapeSymbol, FieldStrideSymbol, TypedSymbol
from pystencils.sympyextensions import fast_subs
NodeOrExpr = Union['Node', sp.Expr]
class Node:
"""Base class for all AST nodes."""
def __init__(self, parent: Optional['Node'] = None):
self.parent = parent
@property
def args(self) -> List[NodeOrExpr]:
"""Returns all arguments/children of this node."""
raise NotImplementedError()
@property
def symbols_defined(self) -> Set[sp.Symbol]:
"""Set of symbols which are defined by this node."""
raise NotImplementedError()
@property
def undefined_symbols(self) -> Set[sp.Symbol]:
"""Symbols which are used but are not defined inside this node."""
raise NotImplementedError()
def subs(self, subs_dict) -> None:
"""Inplace! Substitute, similar to sympy's but modifies the AST inplace."""
for i, a in enumerate(self.args):
result = a.subs(subs_dict)
if isinstance(a, sp.Expr): # sympy expressions' subs is out-of-place
self.args[i] = result
else: # all other should be in-place
assert result is None
@property
def func(self):
return self.__class__
def atoms(self, arg_type) -> Set[Any]:
"""Returns a set of all descendants recursively, which are an instance of the given type."""
result = set()
for arg in self.args:
if isinstance(arg, arg_type):
result.add(arg)
result.update(arg.atoms(arg_type))
return result
class Conditional(Node):
"""Conditional that maps to a 'if' statement in C/C++.
Try to avoid using this node inside of loops, since currently this construction can not be vectorized.
Consider using assignments with sympy.Piecewise in this case.
Args:
condition_expr: sympy relational expression
true_block: block which is run if conditional is true
false_block: optional block which is run if conditional is false
"""
def __init__(self, condition_expr: sp.Basic, true_block: Union['Block', 'SympyAssignment'],
false_block: Optional['Block'] = None) -> None:
super(Conditional, self).__init__(parent=None)
self.condition_expr = condition_expr
def handle_child(c):
if c is None:
return None
if not isinstance(c, Block):
c = Block([c])
c.parent = self
return c
self.true_block = handle_child(true_block)
self.false_block = handle_child(false_block)
def subs(self, subs_dict):
self.true_block.subs(subs_dict)
if self.false_block:
self.false_block.subs(subs_dict)
self.condition_expr = self.condition_expr.subs(subs_dict)
@property
def args(self):
result = [self.condition_expr, self.true_block]
if self.false_block:
result.append(self.false_block)
return result
@property
def symbols_defined(self):
return set()
@property
def undefined_symbols(self):
result = self.true_block.undefined_symbols
if self.false_block:
result.update(self.false_block.undefined_symbols)
if hasattr(self.condition_expr, 'atoms'):
result.update(self.condition_expr.atoms(sp.Symbol))
return result
def __str__(self):
return self.__repr__()
def __repr__(self):
result = f'if:({self.condition_expr!r}) '
if self.true_block:
result += f'\n\t{self.true_block}) '
if self.false_block:
result = 'else: '
result += f'\n\t{self.false_block} '
return result
def replace_by_true_block(self):
"""Replaces the conditional by its True block"""
self.parent.replace(self, [self.true_block])
def replace_by_false_block(self):
"""Replaces the conditional by its False block"""
self.parent.replace(self, [self.false_block] if self.false_block else [])
class KernelFunction(Node):
class Parameter:
"""Function parameter.
Each undefined symbol in a `KernelFunction` node becomes a parameter to the function.
Parameters are either symbols introduced by the user that never occur on the left hand side of an
Assignment, or are related to fields/arrays passed to the function.
A parameter consists of the typed symbol (symbol property). For field related parameters this is a symbol
defined in pystencils.kernelparameters.
If the parameter is related to one or multiple fields, these fields are referenced in the fields property.
"""
def __init__(self, symbol, fields):
self.symbol = symbol # type: TypedSymbol
self.fields = fields # type: Sequence[Field]
def __repr__(self):
return repr(self.symbol)
@property
def is_field_stride(self):
return isinstance(self.symbol, FieldStrideSymbol)
@property
def is_field_shape(self):
return isinstance(self.symbol, FieldShapeSymbol)
@property
def is_field_pointer(self):
return isinstance(self.symbol, FieldPointerSymbol)
@property
def is_field_parameter(self):
return self.is_field_pointer or self.is_field_shape or self.is_field_stride
@property
def field_name(self):
return self.fields[0].name
def __init__(self, body, target: Target, backend: Backend, compile_function, ghost_layers,
function_name: str = "kernel",
assignments=None):
super(KernelFunction, self).__init__()
self._body = body
body.parent = self
self.function_name = function_name
self._body.parent = self
self.ghost_layers = ghost_layers
self._target = target
self._backend = backend
# these variables are assumed to be global, so no automatic parameter is generated for them
self.global_variables = set()
self.instruction_set = None # used in `vectorize` function to tell the backend which i.s. (SSE,AVX) to use
# function that compiles the node to a Python callable, is set by the backends
self._compile_function = compile_function
self.assignments = assignments
# If nontemporal stores are activated together with the Neon instruction set it results in cacheline zeroing
# For cacheline zeroing the information of the field size for each field is needed. Thus, in this case
# all field sizes are kernel parameters and not just the common field size used for the loops
self.use_all_written_field_sizes = False
@property
def target(self):
"""See pystencils.Target"""
return self._target
@property
def backend(self):
"""Backend for generating the code: `Backend`"""
return self._backend
@property
def symbols_defined(self):
return set()
@property
def undefined_symbols(self):
return set()
@property
def body(self):
return self._body
@body.setter
def body(self, value):
self._body = value
self._body.parent = self
@property
def args(self):
return self._body,
@property
def fields_accessed(self) -> Set[Field]:
"""Set of Field instances: fields which are accessed inside this kernel function"""
return set(o.field for o in itertools.chain(self.atoms(ResolvedFieldAccess)))
@property
def fields_written(self) -> Set[Field]:
assignments = self.atoms(SympyAssignment)
return set().union(itertools.chain.from_iterable([f.field for f in a.lhs.free_symbols if hasattr(f, 'field')]
for a in assignments))
@property
def fields_read(self) -> Set[Field]:
assignments = self.atoms(SympyAssignment)
return set().union(itertools.chain.from_iterable([f.field for f in a.rhs.free_symbols if hasattr(f, 'field')]
for a in assignments))
def get_parameters(self) -> Sequence['KernelFunction.Parameter']:
"""Returns list of parameters for this function.
This function is expensive, cache the result where possible!
"""
field_map = {f.name: f for f in self.fields_accessed}
sizes = set()
if self.use_all_written_field_sizes:
sizes = set().union(*(a.shape[:a.spatial_dimensions] for a in self.fields_written))
sizes = filter(lambda s: isinstance(s, FieldShapeSymbol), sizes)
def get_fields(symbol):
if hasattr(symbol, 'field_name'):
return field_map[symbol.field_name],
elif hasattr(symbol, 'field_names'):
return tuple(field_map[fn] for fn in symbol.field_names)
return ()
argument_symbols = self._body.undefined_symbols - self.global_variables
argument_symbols.update(sizes)
parameters = [self.Parameter(symbol, get_fields(symbol)) for symbol in argument_symbols]
if hasattr(self, 'indexing'):
parameters += [self.Parameter(s, []) for s in self.indexing.symbolic_parameters()]
parameters.sort(key=lambda p: p.symbol.name)
return parameters
def __str__(self):
params = [p.symbol for p in self.get_parameters()]
return '{0} {1}({2})\n{3}'.format(type(self).__name__, self.function_name, params,
("\t" + "\t".join(str(self.body).splitlines(True))))
def __repr__(self):
params = [p.symbol for p in self.get_parameters()]
return f'{type(self).__name__} {self.function_name}({params})'
def compile(self, *args, **kwargs):
if self._compile_function is None:
raise ValueError("No compile-function provided for this KernelFunction node")
return self._compile_function(self, *args, **kwargs)
class SkipIteration(Node):
@property
def args(self):
return []
@property
def symbols_defined(self):
return set()
@property
def undefined_symbols(self):
return set()
class Block(Node):
def __init__(self, nodes: Union[Node, List[Node]]):
super(Block, self).__init__()
if not isinstance(nodes, list):
nodes = [nodes]
self._nodes = nodes
self.parent = None
for n in self._nodes:
try:
n.parent = self
except AttributeError:
pass
@property
def args(self):
return self._nodes
def subs(self, subs_dict) -> None:
for a in self.args:
a.subs(subs_dict)
def fast_subs(self, subs_dict, skip=None):
self._nodes = [fast_subs(a, subs_dict, skip) for a in self._nodes]
return self
def insert_front(self, node, if_not_exists=False):
if if_not_exists and len(self._nodes) > 0 and self._nodes[0] == node:
return
if isinstance(node, collections.abc.Iterable):
node = list(node)
for n in node:
n.parent = self
self._nodes = node + self._nodes
else:
node.parent = self
self._nodes.insert(0, node)
def insert_before(self, new_node, insert_before, if_not_exists=False):
new_node.parent = self
assert self._nodes.count(insert_before) == 1
idx = self._nodes.index(insert_before)
if not if_not_exists or self._nodes[idx] != new_node:
self._nodes.insert(idx, new_node)
def insert_after(self, new_node, insert_after, if_not_exists=False):
new_node.parent = self
assert self._nodes.count(insert_after) == 1
idx = self._nodes.index(insert_after) + 1
if not if_not_exists or not (self._nodes[idx - 1] == new_node
or (idx < len(self._nodes) and self._nodes[idx] == new_node)):
self._nodes.insert(idx, new_node)
def append(self, node):
if isinstance(node, list) or isinstance(node, tuple):
for n in node:
n.parent = self
self._nodes.append(n)
else:
node.parent = self
self._nodes.append(node)
def take_child_nodes(self):
tmp = self._nodes
self._nodes = []
return tmp
def replace(self, child, replacements):
assert self._nodes.count(child) == 1
idx = self._nodes.index(child)
del self._nodes[idx]
if type(replacements) is list:
for e in replacements:
e.parent = self
self._nodes = self._nodes[:idx] + replacements + self._nodes[idx:]
else:
replacements.parent = self
self._nodes.insert(idx, replacements)
@property
def symbols_defined(self):
result = set()
for a in self.args:
if isinstance(a, pystencils.Assignment):
result.update(a.free_symbols)
else:
result.update(a.symbols_defined)
return result
@property
def undefined_symbols(self):
result = set()
defined_symbols = set()
for a in self.args:
if isinstance(a, pystencils.Assignment):
result.update(a.free_symbols)
defined_symbols.update({a.lhs})
else:
result.update(a.undefined_symbols)
defined_symbols.update(a.symbols_defined)
return result - defined_symbols
def __str__(self):
return "Block " + ''.join('{!s}\n'.format(node) for node in self._nodes)
def __repr__(self):
return "Block"
class PragmaBlock(Block):
def __init__(self, pragma_line, nodes):
super(PragmaBlock, self).__init__(nodes)
self.pragma_line = pragma_line
for n in nodes:
n.parent = self
def __repr__(self):
return self.pragma_line
class LoopOverCoordinate(Node):
LOOP_COUNTER_NAME_PREFIX = "ctr"
BLOCK_LOOP_COUNTER_NAME_PREFIX = "_blockctr"
def __init__(self, body, coordinate_to_loop_over, start, stop, step=1, is_block_loop=False, custom_loop_ctr=None):
super(LoopOverCoordinate, self).__init__(parent=None)
self.body = body
body.parent = self
self.coordinate_to_loop_over = coordinate_to_loop_over
self.start = start
self.stop = stop
self.step = step
self.body.parent = self
self.prefix_lines = []
self.is_block_loop = is_block_loop
self.custom_loop_ctr = custom_loop_ctr
def new_loop_with_different_body(self, new_body):
result = LoopOverCoordinate(new_body, self.coordinate_to_loop_over, self.start, self.stop,
self.step, self.is_block_loop, self.custom_loop_ctr)
result.prefix_lines = [prefix_line for prefix_line in self.prefix_lines]
return result
def subs(self, subs_dict):
self.body.subs(subs_dict)
if hasattr(self.start, "subs"):
self.start = self.start.subs(subs_dict)
if hasattr(self.stop, "subs"):
self.stop = self.stop.subs(subs_dict)
if hasattr(self.step, "subs"):
self.step = self.step.subs(subs_dict)
def fast_subs(self, subs_dict, skip=None):
self.body = fast_subs(self.body, subs_dict, skip)
if isinstance(self.start, sp.Basic):
self.start = fast_subs(self.start, subs_dict, skip)
if isinstance(self.stop, sp.Basic):
self.stop = fast_subs(self.stop, subs_dict, skip)
if isinstance(self.step, sp.Basic):
self.step = fast_subs(self.step, subs_dict, skip)
return self
@property
def args(self):
result = [self.body]
for e in [self.start, self.stop, self.step]:
if hasattr(e, "args"):
result.append(e)
return result
def replace(self, child, replacement):
if child == self.body:
self.body = replacement
elif child == self.start:
self.start = replacement
elif child == self.step:
self.step = replacement
elif child == self.stop:
self.stop = replacement
@property
def symbols_defined(self):
return {self.loop_counter_symbol}
@property
def undefined_symbols(self):
result = self.body.undefined_symbols
for possible_symbol in [self.start, self.stop, self.step]:
if isinstance(possible_symbol, Node) or isinstance(possible_symbol, sp.Basic):
result.update(possible_symbol.atoms(sp.Symbol))
return result - {self.loop_counter_symbol}
@staticmethod
def get_loop_counter_name(coordinate_to_loop_over):
return f"{LoopOverCoordinate.LOOP_COUNTER_NAME_PREFIX}_{coordinate_to_loop_over}"
@staticmethod
def get_block_loop_counter_name(coordinate_to_loop_over):
return f"{LoopOverCoordinate.BLOCK_LOOP_COUNTER_NAME_PREFIX}_{coordinate_to_loop_over}"
@property
def loop_counter_name(self):
if self.custom_loop_ctr:
return self.custom_loop_ctr.name
else:
if self.is_block_loop:
return LoopOverCoordinate.get_block_loop_counter_name(self.coordinate_to_loop_over)
else:
return LoopOverCoordinate.get_loop_counter_name(self.coordinate_to_loop_over)
@staticmethod
def is_loop_counter_symbol(symbol):
prefix = LoopOverCoordinate.LOOP_COUNTER_NAME_PREFIX
if not symbol.name.startswith(prefix):
return None
if symbol.dtype != create_type('int'):
return None
coordinate = int(symbol.name[len(prefix) + 1:])
return coordinate
@staticmethod
def get_loop_counter_symbol(coordinate_to_loop_over):
return TypedSymbol(LoopOverCoordinate.get_loop_counter_name(coordinate_to_loop_over), 'int', nonnegative=True)
@staticmethod
def get_block_loop_counter_symbol(coordinate_to_loop_over):
return TypedSymbol(LoopOverCoordinate.get_block_loop_counter_name(coordinate_to_loop_over),
'int',
nonnegative=True)
@property
def loop_counter_symbol(self):
if self.custom_loop_ctr:
return self.custom_loop_ctr
else:
if self.is_block_loop:
return self.get_block_loop_counter_symbol(self.coordinate_to_loop_over)
else:
return self.get_loop_counter_symbol(self.coordinate_to_loop_over)
@property
def is_outermost_loop(self):
return get_next_parent_of_type(self, LoopOverCoordinate) is None
@property
def is_innermost_loop(self):
return len(self.atoms(LoopOverCoordinate)) == 0
def __str__(self):
return 'for({!s}={!s}; {!s}<{!s}; {!s}+={!s})\n{!s}'.format(self.loop_counter_name, self.start,
self.loop_counter_name, self.stop,
self.loop_counter_name, self.step,
("\t" + "\t".join(str(self.body).splitlines(True))))
def __repr__(self):
return 'for({!s}={!s}; {!s}<{!s}; {!s}+={!s})'.format(self.loop_counter_name, self.start,
self.loop_counter_name, self.stop,
self.loop_counter_name, self.step)
class SympyAssignment(Node):
def __init__(self, lhs_symbol, rhs_expr, is_const=True, use_auto=False):
super(SympyAssignment, self).__init__(parent=None)
self._lhs_symbol = sp.sympify(lhs_symbol)
self._rhs = sp.sympify(rhs_expr)
self._is_const = is_const
self._is_declaration = self.__is_declaration()
self._use_auto = use_auto
def __is_declaration(self):
from pystencils.typing import CastFunc
if isinstance(self._lhs_symbol, CastFunc):
return False
if any(isinstance(self._lhs_symbol, c) for c in (Field.Access, sp.Indexed, TemporaryMemoryAllocation)):
return False
return True
@property
def lhs(self):
return self._lhs_symbol
@property
def rhs(self):
return self._rhs
@lhs.setter
def lhs(self, new_value):
self._lhs_symbol = new_value
self._is_declaration = self.__is_declaration()
@rhs.setter
def rhs(self, new_rhs_expr):
self._rhs = new_rhs_expr
def subs(self, subs_dict):
self.lhs = fast_subs(self.lhs, subs_dict)
self.rhs = fast_subs(self.rhs, subs_dict)
def fast_subs(self, subs_dict, skip=None):
self.lhs = fast_subs(self.lhs, subs_dict, skip)
self.rhs = fast_subs(self.rhs, subs_dict, skip)
return self
def optimize(self, optimizations):
try:
from sympy.codegen.rewriting import optimize
self.rhs = optimize(self.rhs, optimizations)
except Exception:
pass
@property
def args(self):
return [self._lhs_symbol, self.rhs]
@property
def symbols_defined(self):
if not self._is_declaration:
return set()
return {self._lhs_symbol}
@property
def undefined_symbols(self):
result = {s for s in self.rhs.free_symbols if not isinstance(s, sp.Indexed)}
# Add loop counters if there a field accesses
loop_counters = set()
for symbol in result:
if isinstance(symbol, Field.Access):
for i in range(len(symbol.offsets)):
loop_counters.add(LoopOverCoordinate.get_loop_counter_symbol(i))
result.update(loop_counters)
result.update(self._lhs_symbol.atoms(sp.Symbol))
return result
@property
def is_declaration(self):
return self._is_declaration
@property
def is_const(self):
return self._is_const
@property
def use_auto(self):
return self._use_auto
def replace(self, child, replacement):
if child == self.lhs:
replacement.parent = self
self.lhs = replacement
elif child == self.rhs:
replacement.parent = self
self.rhs = replacement
else:
raise ValueError(f'{replacement} is not in args of {self.__class__}')
def __repr__(self):
return repr(self.lhs) + "" + repr(self.rhs)
def _repr_html_(self):
printed_lhs = sp.latex(self.lhs)
printed_rhs = sp.latex(self.rhs)
return f"${printed_lhs} \\leftarrow {printed_rhs}$"
def __hash__(self):
return hash((self.lhs, self.rhs))
def __eq__(self, other):
return type(self) is type(other) and (self.lhs, self.rhs) == (other.lhs, other.rhs)
class ResolvedFieldAccess(sp.Indexed):
def __new__(cls, base, linearized_index, field, offsets, idx_coordinate_values):
if not isinstance(base, sp.IndexedBase):
assert isinstance(base, TypedSymbol)
base = sp.IndexedBase(base, shape=(1,))
assert isinstance(base.label, TypedSymbol)
obj = super(ResolvedFieldAccess, cls).__new__(cls, base, linearized_index)
obj.field = field
obj.offsets = offsets
obj.idx_coordinate_values = idx_coordinate_values
return obj
def _eval_subs(self, old, new):
return ResolvedFieldAccess(self.args[0],
self.args[1].subs(old, new),
self.field, self.offsets, self.idx_coordinate_values)
def fast_subs(self, substitutions, skip=None):
if self in substitutions:
return substitutions[self]
return ResolvedFieldAccess(self.args[0].subs(substitutions),
self.args[1].subs(substitutions),
self.field, self.offsets, self.idx_coordinate_values)
def _hashable_content(self):
super_class_contents = super(ResolvedFieldAccess, self)._hashable_content()
return super_class_contents + tuple(self.offsets) + (repr(self.idx_coordinate_values), hash(self.field))
@property
def typed_symbol(self):
return self.base.label
def __str__(self):
top = super(ResolvedFieldAccess, self).__str__()
return f"{top} ({self.typed_symbol.dtype})"
def __getnewargs__(self):
return self.base, self.indices[0], self.field, self.offsets, self.idx_coordinate_values
def __getnewargs_ex__(self):
return (self.base, self.indices[0], self.field, self.offsets, self.idx_coordinate_values), {}
class TemporaryMemoryAllocation(Node):
"""Node for temporary memory buffer allocation.
Always allocates aligned memory.
Args:
typed_symbol: symbol used as pointer (has to be typed)
size: number of elements to allocate
align_offset: the align_offset's element is aligned
"""
def __init__(self, typed_symbol: TypedSymbol, size, align_offset):
super(TemporaryMemoryAllocation, self).__init__(parent=None)
self.symbol = typed_symbol
self.size = size
self.headers = ['<stdlib.h>']
self._align_offset = align_offset
@property
def symbols_defined(self):
return {self.symbol}
@property
def undefined_symbols(self):
if isinstance(self.size, sp.Basic):
return self.size.atoms(sp.Symbol)
else:
return set()
@property
def args(self):
return [self.symbol]
def offset(self, byte_alignment):
"""Number of ELEMENTS to skip for a pointer that is aligned to byte_alignment."""
np_dtype = self.symbol.dtype.base_type.numpy_dtype
assert byte_alignment % np_dtype.itemsize == 0
return -self._align_offset % (byte_alignment / np_dtype.itemsize)
class TemporaryMemoryFree(Node):
def __init__(self, alloc_node):
super(TemporaryMemoryFree, self).__init__(parent=None)
self.alloc_node = alloc_node
@property
def symbol(self):
return self.alloc_node.symbol
def offset(self, byte_alignment):
return self.alloc_node.offset(byte_alignment)
@property
def symbols_defined(self):
return set()
@property
def undefined_symbols(self):
return set()
@property
def args(self):
return []
def early_out(condition):
from pystencils.cpu.vectorization import vec_all
return Conditional(vec_all(condition), Block([SkipIteration()]))
def get_dummy_symbol(dtype='bool'):
return TypedSymbol(f'dummy{uuid.uuid4().hex}', create_type(dtype))
class SourceCodeComment(Node):
def __init__(self, text):
self.text = text
@property
def args(self):
return []
@property
def symbols_defined(self):
return set()
@property
def undefined_symbols(self):
return set()
def __str__(self):
return "/* " + self.text + " */"
def __repr__(self):
return self.__str__()
class EmptyLine(Node):
def __init__(self):
pass
@property
def args(self):
return []
@property
def symbols_defined(self):
return set()
@property
def undefined_symbols(self):
return set()
def __str__(self):
return ""
def __repr__(self):
return self.__str__()
class ConditionalFieldAccess(sp.Function):
"""
:class:`pystencils.Field.Access` that is only executed if a certain condition is met.
Can be used, for instance, for out-of-bound checks.
"""
def __new__(cls, field_access, outofbounds_condition, outofbounds_value=0):
return sp.Function.__new__(cls, field_access, outofbounds_condition, sp.S(outofbounds_value))
@property
def access(self):
return self.args[0]
@property
def outofbounds_condition(self):
return self.args[1]
@property
def outofbounds_value(self):
return self.args[2]
def __getnewargs__(self):
return self.access, self.outofbounds_condition, self.outofbounds_value
def __getnewargs_ex__(self):
return (self.access, self.outofbounds_condition, self.outofbounds_value), {}
from .astnode import PsAstNode
from .iteration import dfs_preorder, dfs_postorder
__all__ = [
"PsAstNode",
"dfs_preorder",
"dfs_postorder",
]