Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Commits on Source (4)
Showing
with 223 additions and 1028 deletions
__pycache__
.ipynb_checkpoints
.coverage
.coverage*
*.pyc
*.vti
/build
......@@ -18,4 +18,4 @@ pystencils/boundaries/createindexlistcython.c
pystencils/boundaries/createindexlistcython.*.so
pystencils_tests/tmp
pystencils_tests/kerncraft_inputs/.2d-5pt.c_kerncraft/
pystencils_tests/kerncraft_inputs/.3d-7pt.c_kerncraft/
\ No newline at end of file
pystencils_tests/kerncraft_inputs/.3d-7pt.c_kerncraft/
......@@ -14,7 +14,7 @@
name: pystencils
dependencies:
# Basic dependencies:
- python >= 3.6
- python >= 3.8
- numpy
- sympy >= 1.1
- appdirs # to find default cache directory on each platform
......
......@@ -51,7 +51,7 @@ nbsphinx_execute = 'never'
nbsphinx_codecell_lexer = 'python3'
# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {'python': ('https://docs.python.org/3.6', None),
intersphinx_mapping = {'python': ('https://docs.python.org/3.8', None),
'numpy': ('https://docs.scipy.org/doc/numpy/', None),
'matplotlib': ('https://matplotlib.org/', None),
'sympy': ('https://docs.sympy.org/latest/', None),
......
This source diff could not be displayed because it is too large. You can view the blob instead.
%% Cell type:code id: tags:
 
``` python
from pystencils.session import *
import pystencils as ps
sp.init_printing()
frac = sp.Rational
```
 
%% Cell type:markdown id: tags:
 
# Tutorial 06: Phase-field simulation of dentritic solidification
 
This is the second tutorial on phase field methods with pystencils. Make sure to read the previous tutorial first.
 
In this tutorial we again implement a model described in **Programming Phase-Field Modelling** by S. Bulent Biner.
This time we implement the model from chapter 4.7 that describes dentritic growth. So get ready for some beautiful snowflake pictures.
 
We start again by adding all required arrays fields. This time we explicitly store the change of the phase variable φ in time, since the dynamics is calculated using an Allen-Cahn formulation where a term $\partial_t \phi$ occurs.
 
%% Cell type:code id: tags:
 
``` python
dh = ps.create_data_handling(domain_size=(300, 300), periodicity=True,
default_target='cpu')
default_target=ps.Target.CPU)
φ_field = dh.add_array('phi', latex_name='φ')
φ_field_tmp = dh.add_array('phi_temp', latex_name='φ_temp')
φ_delta_field = dh.add_array('phidelta', latex_name='φ_D')
t_field = dh.add_array('T')
```
 
%% Cell type:markdown id: tags:
 
This model has a lot of parameters that are created here in a symbolic fashion.
 
%% Cell type:code id: tags:
 
``` python
ε, m, δ, j, θzero, α, γ, Teq, κ, τ = sp.symbols("ε m δ j θ_0 α γ T_eq κ τ")
εb = sp.Symbol("\\bar{\\epsilon}")
 
φ = φ_field.center
φ_tmp = φ_field_tmp.center
T = t_field.center
 
def f(φ, m):
return φ**4 / 4 - (frac(1, 2) - m/3) * φ**3 + (frac(1,4)-m/2)*φ**2
 
free_energy_density = ε**2 / 2 * (ps.fd.Diff(φ,0)**2 + ps.fd.Diff(φ,1)**2 ) + f(φ, m)
free_energy_density
```
 
%% Output
 
$\displaystyle \frac{{{φ}_{(0,0)}}^{4}}{4} - {{φ}_{(0,0)}}^{3} \left(\frac{1}{2} - \frac{m}{3}\right) + {{φ}_{(0,0)}}^{2} \left(\frac{1}{4} - \frac{m}{2}\right) + \frac{ε^{2} \left({\partial_{0} {{φ}_{(0,0)}}}^{2} + {\partial_{1} {{φ}_{(0,0)}}}^{2}\right)}{2}$
4 2 ⎛ 2 2⎞
φ_C 3 ⎛1 m⎞ 2 ⎛1 m⎞ ε ⋅⎝D(φ[0,0]) + D(φ[0,0]) ⎠
──── - φ_C ⋅⎜─ - ─⎟ + φ_C ⋅⎜─ - ─⎟ + ────────────────────────────
4 ⎝2 3⎠ ⎝4 2⎠ 2
 
%% Cell type:markdown id: tags:
 
The free energy is again composed of a bulk and interface part.
 
%% Cell type:code id: tags:
 
``` python
plt.figure(figsize=(7,4))
plt.sympy_function(f(φ, m=1), x_values=(-1.05, 1.5) )
plt.xlabel("φ")
plt.title("Bulk free energy");
```
 
%% Output
 
 
%% Cell type:markdown id: tags:
 
Compared to last tutorial, this bulk free energy has also two minima, but at different values.
 
Another complexity of the model is its anisotropy. The gradient parameter $\epsilon$ depends on the interface normal.
 
%% Cell type:code id: tags:
 
``` python
def σ(θ):
return 1 + δ * sp.cos(j * (θ - θzero))
 
θ = sp.atan2(ps.fd.Diff(φ, 1), ps.fd.Diff(φ, 0))
 
ε_val = εb * σ(θ)
ε_val
```
 
%% Output
 
$\displaystyle \bar{\epsilon} \left(δ \cos{\left(j \left(- θ_{0} + \operatorname{atan_{2}}{\left({\partial_{1} {{φ}_{(0,0)}}},{\partial_{0} {{φ}_{(0,0)}}} \right)}\right) \right)} + 1\right)$
\bar{\epsilon}⋅(δ⋅cos(j⋅(-θ₀ + atan2(D(φ[0,0]), D(φ[0,0])))) + 1)
 
%% Cell type:code id: tags:
 
``` python
def m_func(T):
return (α / sp.pi) * sp.atan(γ * (Teq - T))
```
 
%% Cell type:markdown id: tags:
 
However, we just insert these parameters into the free energy formulation before doing the functional derivative, to make the dependence of $\epsilon$ on $\nabla \phi$ explicit.
 
%% Cell type:code id: tags:
 
``` python
fe = free_energy_density.subs({
m: m_func(T),
ε: εb * σ(θ),
})
 
dF_dφ = ps.fd.functional_derivative(fe, φ)
dF_dφ = ps.fd.expand_diff_full(dF_dφ, functions=[φ])
dF_dφ
```
 
%% Output
 
$\displaystyle {{φ}_{(0,0)}}^{3} - \frac{{{φ}_{(0,0)}}^{2} α \operatorname{atan}{\left({{T}_{(0,0)}} γ - T_{eq} γ \right)}}{\pi} - \frac{3 {{φ}_{(0,0)}}^{2}}{2} + \frac{{{φ}_{(0,0)}} α \operatorname{atan}{\left({{T}_{(0,0)}} γ - T_{eq} γ \right)}}{\pi} + \frac{{{φ}_{(0,0)}}}{2} - \bar{\epsilon}^{2} δ^{2} \cos^{2}{\left(j θ_{0} - j \operatorname{atan_{2}}{\left({\partial_{1} {{φ}_{(0,0)}}},{\partial_{0} {{φ}_{(0,0)}}} \right)} \right)} {\partial_{0} {\partial_{0} {{φ}_{(0,0)}}}} - \bar{\epsilon}^{2} δ^{2} \cos^{2}{\left(j θ_{0} - j \operatorname{atan_{2}}{\left({\partial_{1} {{φ}_{(0,0)}}},{\partial_{0} {{φ}_{(0,0)}}} \right)} \right)} {\partial_{1} {\partial_{1} {{φ}_{(0,0)}}}} - 2 \bar{\epsilon}^{2} δ \cos{\left(j θ_{0} - j \operatorname{atan_{2}}{\left({\partial_{1} {{φ}_{(0,0)}}},{\partial_{0} {{φ}_{(0,0)}}} \right)} \right)} {\partial_{0} {\partial_{0} {{φ}_{(0,0)}}}} - 2 \bar{\epsilon}^{2} δ \cos{\left(j θ_{0} - j \operatorname{atan_{2}}{\left({\partial_{1} {{φ}_{(0,0)}}},{\partial_{0} {{φ}_{(0,0)}}} \right)} \right)} {\partial_{1} {\partial_{1} {{φ}_{(0,0)}}}} - \bar{\epsilon}^{2} {\partial_{0} {\partial_{0} {{φ}_{(0,0)}}}} - \bar{\epsilon}^{2} {\partial_{1} {\partial_{1} {{φ}_{(0,0)}}}}$
2 2
3 φ_C ⋅α⋅atan(T_C⋅γ - T_eq⋅γ) 3⋅φ_C φ_C⋅α⋅atan(T_C⋅γ - T_eq⋅γ) φ_C
φ_C - ─────────────────────────── - ────── + ────────────────────────── + ───
π 2 π 2
2 2 2
- \bar{\epsilon} ⋅δ ⋅cos (j⋅θ₀ - j⋅atan2(D(φ[0,0]), D(φ[0,0])))⋅D(D(φ[0,0]))
2 2 2
- \bar{\epsilon} ⋅δ ⋅cos (j⋅θ₀ - j⋅atan2(D(φ[0,0]), D(φ[0,0])))⋅D(D(φ[0,0])) -
2
2⋅\bar{\epsilon} ⋅δ⋅cos(j⋅θ₀ - j⋅atan2(D(φ[0,0]), D(φ[0,0])))⋅D(D(φ[0,0])) -
2
2⋅\bar{\epsilon} ⋅δ⋅cos(j⋅θ₀ - j⋅atan2(D(φ[0,0]), D(φ[0,0])))⋅D(D(φ[0,0])) - \
2 2
bar{\epsilon} ⋅D(D(φ[0,0])) - \bar{\epsilon} ⋅D(D(φ[0,0]))
 
%% Cell type:markdown id: tags:
 
Then we insert all the numeric parameters and discretize:
 
%% Cell type:code id: tags:
 
``` python
discretize = ps.fd.Discretization2ndOrder(dx=0.03, dt=1e-5)
parameters = {
τ: 0.0003,
κ: 1.8,
εb: 0.01,
δ: 0.02,
γ: 10,
j: 6,
α: 0.9,
Teq: 1.0,
θzero: 0.2,
sp.pi: sp.pi.evalf()
}
parameters
```
 
%% Output
 
$\displaystyle \left\{ \pi : 3.14159265358979, \ T_{eq} : 1.0, \ \bar{\epsilon} : 0.01, \ j : 6, \ α : 0.9, \ γ : 10, \ δ : 0.02, \ θ_{0} : 0.2, \ κ : 1.8, \ τ : 0.0003\right\}$
{π: 3.14159265358979, T_eq: 1.0, \bar{\epsilon}: 0.01, j: 6, α: 0.9, γ: 10, δ:
0.02, θ₀: 0.2, κ: 1.8, τ: 0.0003}
 
%% Cell type:code id: tags:
 
``` python
dφ_dt = - dF_dφ / τ
φ_eqs = ps.simp.sympy_cse_on_assignment_list([ps.Assignment(φ_delta_field.center,
discretize(dφ_dt.subs(parameters)))])
φ_eqs.append(ps.Assignment(φ_tmp, discretize(ps.fd.transient(φ) - φ_delta_field.center)))
 
temperature_evolution = -ps.fd.transient(T) + ps.fd.diffusion(T, 1) + κ * φ_delta_field.center
temperature_eqs = [
ps.Assignment(T, discretize(temperature_evolution.subs(parameters)))
]
```
 
%% Cell type:markdown id: tags:
 
When creating the kernels we pass as target (which may be 'cpu' or 'gpu') the default target of the target handling. This enables to switch to a GPU simulation just by changing the parameter of the data handling.
When creating the kernels we pass as target (which may be 'Target.CPU' or 'Target.GPU') the default target of the target handling. This enables to switch to a GPU simulation just by changing the parameter of the data handling.
 
The rest is similar to the previous tutorial.
 
%% Cell type:code id: tags:
 
``` python
φ_kernel = ps.create_kernel(φ_eqs, cpu_openmp=4, target=dh.default_target).compile()
temperature_kernel = ps.create_kernel(temperature_eqs, target=dh.default_target).compile()
```
 
%% Cell type:code id: tags:
 
``` python
def timeloop(steps=200):
φ_sync = dh.synchronization_function(['phi'])
temperature_sync = dh.synchronization_function(['T'])
dh.all_to_gpu() # this does nothing when running on CPU
for t in range(steps):
φ_sync()
dh.run_kernel(φ_kernel)
dh.swap('phi', 'phi_temp')
temperature_sync()
dh.run_kernel(temperature_kernel)
dh.all_to_cpu()
return dh.gather_array('phi')
 
def init(nucleus_size=np.sqrt(5)):
for b in dh.iterate():
x, y = b.cell_index_arrays
x, y = x-b.shape[0]//2, y-b.shape[0]//2
bArr = (x**2 + y**2) < nucleus_size**2
b['phi'].fill(0)
b['phi'][(x**2 + y**2) < nucleus_size**2] = 1.0
b['T'].fill(0.0)
 
def plot():
plt.subplot(1,3,1)
plt.scalar_field(dh.gather_array('phi'))
plt.title("φ")
plt.colorbar()
plt.subplot(1,3,2)
plt.title("T")
plt.scalar_field(dh.gather_array('T'))
plt.colorbar()
plt.subplot(1,3,3)
plt.title("∂φ")
plt.scalar_field(dh.gather_array('phidelta'))
plt.colorbar()
```
 
%% Cell type:code id: tags:
 
``` python
timeloop(10)
init()
plot()
print(dh)
```
 
%% Output
 
Name| Inner (min/max)| WithGl (min/max)
----------------------------------------------------
T| ( 0, 0)| ( 0, 0)
phi| ( 0, 1)| ( 0, 1)
phi_temp| ( 0, 0)| ( 0, 0)
phidelta| ( 0, 0)| ( 0, 0)
 
 
%% Cell type:code id: tags:
 
``` python
result = None
if 'is_test_run' not in globals():
ani = ps.plot.scalar_field_animation(timeloop, rescale=True, frames=600)
result = ps.jupyter.display_as_html_video(ani)
result
```
 
%% Output
 
<IPython.core.display.HTML object>
 
%% Cell type:code id: tags:
 
``` python
assert np.isfinite(dh.max('phi'))
assert np.isfinite(dh.max('T'))
```
......
%% Cell type:code id: tags:
``` python
import psutil
from pystencils.session import *
import shutil
```
%% Cell type:markdown id: tags:
# Demo: Finite differences - 2D wave equation
In this tutorial we show how to use the finite difference module of *pystencils* to solve a 2D wave equations. The time derivative is discretized by a simple forward Euler method.
%% Cell type:markdown id: tags:
$$ \frac{\partial^2 u}{\partial t^2} = \mbox{div} \left( q(x,y) \nabla u \right)$$
%% Cell type:markdown id: tags:
We begin by creating three *numpy* arrays for the current, the previous and the next timestep. Actually we will see later that two fields are enough, but let's keep it simple. From these *numpy* arrays we create *pystencils* fields to formulate our update rule.
%% Cell type:code id: tags:
``` python
size = (60, 70) # domain size
u_arrays = [np.zeros(size), np.zeros(size), np.zeros(size)]
u_fields = [ps.Field.create_from_numpy_array("u%s" % (name,), arr)
for name, arr in zip(["0", "1", "2"], u_arrays)]
# Nicer display for fields
for i, field in enumerate(u_fields):
field.latex_name = "u^{(%d)}" % (i,)
```
%% Cell type:markdown id: tags:
*pystencils* contains already simple rules to discretize the a diffusion term. The time discretization is done manually.
%% Cell type:code id: tags:
``` python
discretize = ps.fd.Discretization2ndOrder()
def central2nd_time_derivative(fields):
f_next, f_current, f_last = fields
return (f_next[0, 0] - 2 * f_current[0, 0] + f_last[0, 0]) / discretize.dt**2
rhs = ps.fd.diffusion(u_fields[1], 1)
wave_eq = sp.Eq(central2nd_time_derivative(u_fields), discretize(rhs))
wave_eq = sp.simplify(wave_eq)
wave_eq
```
%% Output
$$\frac{{{u^{(0)}}_{(0,0)}} - 2 {{u^{(1)}}_{(0,0)}} + {{u^{(2)}}_{(0,0)}}}{dt^{2}} = \frac{{{u^{(1)}}_{(-1,0)}} + {{u^{(1)}}_{(0,-1)}} - 4 {{u^{(1)}}_{(0,0)}} + {{u^{(1)}}_{(0,1)}} + {{u^{(1)}}_{(1,0)}}}{dx^{2}}$$
u_0_C - 2⋅u_1_C + u_2_C u_1_W + u_1_S - 4⋅u_1_C + u_1_N + u_1_E
─────────────────────── = ───────────────────────────────────────
2 2
dt dx
%% Cell type:markdown id: tags:
The explicit Euler scheme is now obtained by solving above equation with respect to $u_C^{next}$.
%% Cell type:code id: tags:
``` python
u_next_C = u_fields[-1][0, 0]
update_rule = ps.Assignment(u_next_C, sp.solve(wave_eq, u_next_C)[0])
update_rule
```
%% Output
$${{u^{(2)}}_{(0,0)}} \leftarrow \frac{{{u^{(1)}}_{(-1,0)}} dt^{2} + {{u^{(1)}}_{(0,-1)}} dt^{2} - 4 {{u^{(1)}}_{(0,0)}} dt^{2} + {{u^{(1)}}_{(0,1)}} dt^{2} + {{u^{(1)}}_{(1,0)}} dt^{2} + dx^{2} \left(- {{u^{(0)}}_{(0,0)}} + 2 {{u^{(1)}}_{(0,0)}}\right)}{dx^{2}}$$
2 2 2 2 2 2
u_1_W⋅dt + u_1_S⋅dt - 4⋅u_1_C⋅dt + u_1_N⋅dt + u_1_E⋅dt + dx ⋅(-u
u_2_C := ─────────────────────────────────────────────────────────────────────
2
dx
_0_C + 2⋅u_1_C)
───────────────
%% Cell type:markdown id: tags:
Before creating the kernel, we substitute numeric values for $dx$ and $dt$.
Then a kernel is created just like in the last tutorial.
%% Cell type:code id: tags:
``` python
update_rule = update_rule.subs({discretize.dx: 0.1, discretize.dt: 0.05})
ast = ps.create_kernel(update_rule)
kernel = ast.compile()
ps.show_code(ast)
```
%% Output
FUNC_PREFIX void kernel(double * RESTRICT const _data_u0, double * RESTRICT const _data_u1, double * RESTRICT _data_u2)
{
for (int ctr_0 = 1; ctr_0 < 59; ctr_0 += 1)
{
double * RESTRICT _data_u2_00 = _data_u2 + 70*ctr_0;
double * RESTRICT const _data_u1_00 = _data_u1 + 70*ctr_0;
double * RESTRICT const _data_u0_00 = _data_u0 + 70*ctr_0;
double * RESTRICT const _data_u1_01 = _data_u1 + 70*ctr_0 + 70;
double * RESTRICT const _data_u1_0m1 = _data_u1 + 70*ctr_0 - 70;
for (int ctr_1 = 1; ctr_1 < 69; ctr_1 += 1)
{
_data_u2_00[ctr_1] = 0.25*_data_u1_00[ctr_1 + 1] + 0.25*_data_u1_00[ctr_1 - 1] + 0.25*_data_u1_01[ctr_1] + 0.25*_data_u1_0m1[ctr_1] - 1.0*_data_u0_00[ctr_1] + 1.0*_data_u1_00[ctr_1];
}
}
}
%% Cell type:code id: tags:
``` python
ast.get_parameters()
```
%% Output
[_data_u0, _data_u1, _data_u2]
%% Cell type:code id: tags:
``` python
ast.fields_accessed
```
%% Output
{u0, u1, u2}
%% Cell type:markdown id: tags:
To run simulation a suitable initial condition and boundary treatment is required. We chose an initial condition which is zero at the borders of the domain. The outermost layer is not changed by the update kernel, so we have an implicit homogenous Dirichlet boundary condition.
%% Cell type:code id: tags:
``` python
X,Y = np.meshgrid( np.linspace(0, 1, size[1]), np.linspace(0,1, size[0]))
Z = np.sin(2*X*np.pi) * np.sin(2*Y*np.pi)
# Initialize the previous and current values with the initial function
np.copyto(u_arrays[0], Z)
np.copyto(u_arrays[1], Z)
# The values for the next timesteps do not matter, since they are overwritten
u_arrays[2][:, :] = 0
```
%% Cell type:markdown id: tags:
One timestep now consists of applying the kernel once, then shifting the arrays.
%% Cell type:code id: tags:
``` python
def run(timesteps=1):
for t in range(timesteps):
kernel(u0=u_arrays[0], u1=u_arrays[1], u2=u_arrays[2])
u_arrays[0], u_arrays[1], u_arrays[2] = u_arrays[1], u_arrays[2], u_arrays[0]
return u_arrays[2]
```
%% Cell type:markdown id: tags:
Lets create an animation of the solution:
%% Cell type:code id: tags:
``` python
if shutil.which("ffmpeg") is not None:
ani = plt.surface_plot_animation(run, zlim=(-1, 1))
ps.jupyter.display_as_html_video(ani)
else:
print("No ffmpeg installed")
```
%% Output
<IPython.core.display.HTML object>
%% Cell type:code id: tags:
``` python
assert np.isfinite(np.max(u_arrays[2]))
```
%% Cell type:markdown id: tags:
__LLVM backend__
The next cell demonstrates how to run the same simulation with the LLVM backend. The only difference is that in the `create_kernel` function the `target` is set to llvm.
%% Cell type:code id: tags:
``` python
try:
import llvmlite
except ImportError:
llvmlite=None
print('No llvmlite installed')
if llvmlite:
kernel = ps.create_kernel(update_rule, target='llvm').compile()
kernel = ps.create_kernel(update_rule, backend=ps.Backend.LLVM).compile()
X,Y = np.meshgrid( np.linspace(0, 1, size[1]), np.linspace(0,1, size[0]))
Z = np.sin(2*X*np.pi) * np.sin(2*Y*np.pi)
# Initialize the previous and current values with the initial function
np.copyto(u_arrays[0], Z)
np.copyto(u_arrays[1], Z)
# The values for the next timesteps do not matter, since they are overwritten
u_arrays[2][:, :] = 0
def run_LLVM(timesteps=1):
for t in range(timesteps):
kernel(u0=u_arrays[0], u1=u_arrays[1], u2=u_arrays[2])
u_arrays[0], u_arrays[1], u_arrays[2] = u_arrays[1], u_arrays[2], u_arrays[0]
return u_arrays[2]
assert np.isfinite(np.max(u_arrays[2]))
if shutil.which("ffmpeg") is not None:
ani = plt.surface_plot_animation(run_LLVM, zlim=(-1, 1))
ps.jupyter.display_as_html_video(ani)
else:
print("No ffmpeg installed")
```
%% Output
<IPython.core.display.HTML object>
%% Cell type:markdown id: tags:
__Runing on GPU__
We can also run the same kernel on the GPU, by using the ``pycuda`` package.
%% Cell type:code id: tags:
``` python
try:
import pycuda
except ImportError:
pycuda=None
print('No pycuda installed')
res = None
if pycuda:
gpu_ast = ps.create_kernel(update_rule, target='gpu')
gpu_ast = ps.create_kernel(update_rule, target=ps.Target.GPU)
gpu_kernel = gpu_ast.compile()
res = ps.show_code(gpu_ast)
res
```
%% Output
FUNC_PREFIX __launch_bounds__(256) void kernel(double * RESTRICT const _data_u0, double * RESTRICT const _data_u1, double * RESTRICT _data_u2)
{
if (blockDim.x*blockIdx.x + threadIdx.x + 1 < 59 && blockDim.y*blockIdx.y + threadIdx.y + 1 < 69)
{
const int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x + 1;
const int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y + 1;
double * RESTRICT _data_u2_10 = _data_u2 + ctr_1;
double * RESTRICT const _data_u1_10 = _data_u1 + ctr_1;
double * RESTRICT const _data_u0_10 = _data_u0 + ctr_1;
double * RESTRICT const _data_u1_11 = _data_u1 + ctr_1 + 1;
double * RESTRICT const _data_u1_1m1 = _data_u1 + ctr_1 - 1;
_data_u2_10[70*ctr_0] = 0.25*_data_u1_10[70*ctr_0 + 70] + 0.25*_data_u1_10[70*ctr_0 - 70] + 0.25*_data_u1_11[70*ctr_0] + 0.25*_data_u1_1m1[70*ctr_0] - 1.0*_data_u0_10[70*ctr_0] + 1.0*_data_u1_10[70*ctr_0];
}
}
%% Cell type:markdown id: tags:
The run function has to be changed now slightly, since the data has to be transfered to the GPU first, then the kernel can be executed, and in the end the data has to be transfered back
%% Cell type:code id: tags:
``` python
if pycuda:
import pycuda.gpuarray as gpuarray
def run_on_gpu(timesteps=1):
# Transfer arrays to GPU
gpuArrs = [gpuarray.to_gpu(a) for a in u_arrays]
for t in range(timesteps):
gpu_kernel(u0=gpuArrs[0], u1=gpuArrs[1], u2=gpuArrs[2])
gpuArrs[0], gpuArrs[1], gpuArrs[2] = gpuArrs[1], gpuArrs[2], gpuArrs[0]
# Transfer arrays to CPU
for gpuArr, cpuArr in zip(gpuArrs, u_arrays):
gpuArr.get(cpuArr)
assert np.isfinite(np.max(u_arrays[2]))
```
%% Cell type:code id: tags:
``` python
if pycuda:
run_on_gpu(400)
```
......
......@@ -8,6 +8,10 @@ Creating kernels
.. autofunction:: pystencils.create_kernel
.. autofunction:: pystencils.CreateKernelConfig
.. autofunction:: pystencils.create_domain_kernel
.. autofunction:: pystencils.create_indexed_kernel
.. autofunction:: pystencils.create_staggered_kernel
......
"""Module to generate stencil kernels in C or CUDA using sympy expressions and call them as Python functions"""
from .enums import Backend, Target
from . import fd
from . import stencil as stencil
from .assignment import Assignment, assignment_from_stencil
from .data_types import TypedSymbol
from .datahandling import create_data_handling
from .display_utils import show_code, get_code_obj, get_code_str, to_dot
from .display_utils import get_code_obj, get_code_str, show_code, to_dot
from .field import Field, FieldType, fields
from .kernel_decorator import kernel
from .kernelcreation import create_indexed_kernel, create_kernel, create_staggered_kernel
from .kernelcreation import (
CreateKernelConfig, create_domain_kernel, create_indexed_kernel, create_kernel, create_staggered_kernel)
from .simp import AssignmentCollection
from .slicing import make_slice
from .spatial_coordinates import x_, x_staggered, x_staggered_vector, x_vector, y_, y_staggered, z_, z_staggered
from .sympyextensions import SymbolCreator
from .spatial_coordinates import (x_, x_staggered, x_staggered_vector, x_vector,
y_, y_staggered, z_, z_staggered)
try:
import pystencils_autodiff
autodiff = pystencils_autodiff
except ImportError:
pass
__all__ = ['Field', 'FieldType', 'fields',
'TypedSymbol',
'make_slice',
'create_kernel', 'create_indexed_kernel', 'create_staggered_kernel',
'create_kernel', 'create_domain_kernel', 'create_indexed_kernel', 'create_staggered_kernel',
'CreateKernelConfig',
'Target', 'Backend',
'show_code', 'to_dot', 'get_code_obj', 'get_code_str',
'AssignmentCollection',
'Assignment',
......@@ -39,5 +42,6 @@ __all__ = ['Field', 'FieldType', 'fields',
'stencil']
from ._version import get_versions
__version__ = get_versions()['version']
del get_versions
......@@ -7,6 +7,7 @@ import sympy as sp
import pystencils
from pystencils.data_types import TypedImaginaryUnit, TypedSymbol, cast_func, create_type
from pystencils.enums import Target, Backend
from pystencils.field import Field
from pystencils.kernelparameters import FieldPointerSymbol, FieldShapeSymbol, FieldStrideSymbol
from pystencils.sympyextensions import fast_subs
......@@ -136,7 +137,6 @@ class Conditional(Node):
class KernelFunction(Node):
class Parameter:
"""Function parameter.
......@@ -176,7 +176,9 @@ class KernelFunction(Node):
def field_name(self):
return self.fields[0].name
def __init__(self, body, target, backend, compile_function, ghost_layers, function_name="kernel", assignments=None):
def __init__(self, body, target: Target, backend: Backend, compile_function, ghost_layers,
function_name: str = "kernel",
assignments=None):
super(KernelFunction, self).__init__()
self._body = body
body.parent = self
......@@ -194,12 +196,12 @@ class KernelFunction(Node):
@property
def target(self):
"""Currently either 'cpu' or 'gpu' """
"""See pystencils.Target"""
return self._target
@property
def backend(self):
"""Backend for generating the code e.g. 'llvm', 'c', 'cuda' """
"""Backend for generating the code: `Backend`"""
return self._backend
@property
......
......@@ -14,6 +14,7 @@ from pystencils.cpu.vectorization import vec_all, vec_any, CachelineSize
from pystencils.data_types import (
PointerType, VectorType, address_of, cast_func, create_type, get_type_of_expression,
reinterpret_cast_func, vector_memory_access, BasicType, TypedSymbol)
from pystencils.enums import Backend
from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
from pystencils.integer_functions import (
bit_shift_left, bit_shift_right, bitwise_and, bitwise_or, bitwise_xor,
......@@ -34,7 +35,7 @@ KERNCRAFT_NO_TERNARY_MODE = False
def generate_c(ast_node: Node,
signature_only: bool = False,
dialect='c',
dialect: Backend = Backend.C,
custom_backend=None,
with_globals=True) -> str:
"""Prints an abstract syntax tree node as C or CUDA code.
......@@ -46,7 +47,7 @@ def generate_c(ast_node: Node,
Args:
ast_node: ast representation of kernel
signature_only: generate signature without function body
dialect: 'c', 'cuda' or opencl
dialect: `Backend`: 'C', 'CUDA' or 'OPENCL'
custom_backend: use own custom printer for code generation
with_globals: enable usage of global variables
Returns:
......@@ -60,21 +61,21 @@ def generate_c(ast_node: Node,
ast_node.global_variables = d.symbols_defined
if custom_backend:
printer = custom_backend
elif dialect == 'c':
elif dialect == Backend.C:
try:
instruction_set = ast_node.instruction_set
except Exception:
instruction_set = None
printer = CBackend(signature_only=signature_only,
vector_instruction_set=instruction_set)
elif dialect == 'cuda':
elif dialect == Backend.CUDA:
from pystencils.backends.cuda_backend import CudaBackend
printer = CudaBackend(signature_only=signature_only)
elif dialect == 'opencl':
elif dialect == Backend.OPENCL:
from pystencils.backends.opencl_backend import OpenClBackend
printer = OpenClBackend(signature_only=signature_only)
else:
raise ValueError("Unknown dialect: " + str(dialect))
raise ValueError(f'Unknown {dialect=}')
code = printer(ast_node)
if not signature_only and isinstance(ast_node, KernelFunction):
if with_globals and global_declarations:
......@@ -189,7 +190,7 @@ class CFunction(TypedSymbol):
# noinspection PyPep8Naming
class CBackend:
def __init__(self, sympy_printer=None, signature_only=False, vector_instruction_set=None, dialect='c'):
def __init__(self, sympy_printer=None, signature_only=False, vector_instruction_set=None, dialect=Backend.C):
if sympy_printer is None:
if vector_instruction_set is not None:
self.sympy_printer = VectorizedCustomSympyPrinter(vector_instruction_set)
......@@ -228,7 +229,7 @@ class CBackend:
function_arguments = [f"{self._print(s.symbol.dtype)} {s.symbol.name}" for s in node.get_parameters()
if not type(s.symbol) is CFunction]
launch_bounds = ""
if self._dialect == 'cuda':
if self._dialect == Backend.CUDA:
max_threads = node.indexing.max_threads_per_block()
if max_threads:
launch_bounds = f"__launch_bounds__({max_threads}) "
......
......@@ -2,6 +2,7 @@ from os.path import dirname, join
from pystencils.astnodes import Node
from pystencils.backends.cbackend import CBackend, CustomSympyPrinter, generate_c
from pystencils.enums import Backend
from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
from pystencils.interpolation_astnodes import DiffInterpolatorAccess, InterpolationMode
......@@ -22,7 +23,7 @@ def generate_cuda(ast_node: Node, signature_only: bool = False, custom_backend=N
Returns:
CUDA code for the ast node and its descendants
"""
return generate_c(ast_node, signature_only, dialect='cuda',
return generate_c(ast_node, signature_only, dialect=Backend.CUDA,
custom_backend=custom_backend, with_globals=with_globals)
......@@ -33,7 +34,7 @@ class CudaBackend(CBackend):
if not sympy_printer:
sympy_printer = CudaSympyPrinter()
super().__init__(sympy_printer, signature_only, dialect='cuda')
super().__init__(sympy_printer, signature_only, dialect=Backend.CUDA)
def _print_SharedMemoryAllocation(self, node):
dtype = node.symbol.dtype
......
......@@ -4,6 +4,7 @@ import pystencils.data_types
from pystencils.astnodes import Node
from pystencils.backends.cbackend import CustomSympyPrinter, generate_c
from pystencils.backends.cuda_backend import CudaBackend, CudaSympyPrinter
from pystencils.enums import Backend
from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
with open(join(dirname(__file__), 'opencl1.1_known_functions.txt')) as f:
......@@ -12,7 +13,7 @@ with open(join(dirname(__file__), 'opencl1.1_known_functions.txt')) as f:
def generate_opencl(ast_node: Node, signature_only: bool = False, custom_backend=None, with_globals=True) -> str:
"""Prints an abstract syntax tree node (made for target 'gpu') as OpenCL code.
"""Prints an abstract syntax tree node (made for `Target` 'GPU') as OpenCL code. # TODO Backend instead of Target?
Args:
ast_node: ast representation of kernel
......@@ -23,7 +24,7 @@ def generate_opencl(ast_node: Node, signature_only: bool = False, custom_backend
Returns:
OpenCL code for the ast node and its descendants
"""
return generate_c(ast_node, signature_only, dialect='opencl',
return generate_c(ast_node, signature_only, dialect=Backend.OPENCL,
custom_backend=custom_backend, with_globals=with_globals)
......@@ -36,7 +37,7 @@ class OpenClBackend(CudaBackend):
sympy_printer = OpenClSympyPrinter()
super().__init__(sympy_printer, signature_only)
self._dialect = 'opencl'
self._dialect = Backend.OPENCL
def _print_Type(self, node):
code = super()._print_Type(node)
......
import numpy as np
import sympy as sp
from pystencils import create_indexed_kernel
from pystencils import create_kernel, CreateKernelConfig, Target
from pystencils.assignment import Assignment
from pystencils.backends.cbackend import CustomCodeNode
from pystencils.boundaries.createindexlist import (
......@@ -84,7 +84,7 @@ class FlagInterface:
class BoundaryHandling:
def __init__(self, data_handling, field_name, stencil, name="boundary_handling", flag_interface=None,
target='cpu', openmp=True):
target: Target = Target.CPU, openmp=True):
assert data_handling.has_data(field_name)
assert data_handling.dim == len(stencil[0]), "Dimension of stencil and data handling do not match"
self._data_handling = data_handling
......@@ -442,9 +442,10 @@ class BoundaryOffsetInfo(CustomCodeNode):
INV_DIR_SYMBOL = TypedSymbol("invdir", np.int64)
def create_boundary_kernel(field, index_field, stencil, boundary_functor, target='cpu', **kernel_creation_args):
def create_boundary_kernel(field, index_field, stencil, boundary_functor, target=Target.CPU, **kernel_creation_args):
elements = [BoundaryOffsetInfo(stencil)]
dir_symbol = TypedSymbol("dir", np.int64)
elements += [Assignment(dir_symbol, index_field[0]('dir'))]
elements += boundary_functor(field, direction_symbol=dir_symbol, index_field=index_field)
return create_indexed_kernel(elements, [index_field], target=target, **kernel_creation_args)
config = CreateKernelConfig(index_fields=[index_field], target=target, **kernel_creation_args)
return create_kernel(elements, config=config)
......@@ -5,6 +5,7 @@ import numpy as np
import pystencils.astnodes as ast
from pystencils.assignment import Assignment
from pystencils.enums import Target, Backend
from pystencils.astnodes import Block, KernelFunction, LoopOverCoordinate, SympyAssignment
from pystencils.cpu.cpujit import make_python_function
from pystencils.data_types import StructType, TypedSymbol, create_type
......@@ -70,7 +71,7 @@ def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "ke
loop_order = get_optimal_loop_ordering(fields_without_buffers)
loop_node, ghost_layer_info = make_loop_over_domain(body, iteration_slice=iteration_slice,
ghost_layers=ghost_layers, loop_order=loop_order)
ast_node = KernelFunction(loop_node, 'cpu', 'c', compile_function=make_python_function,
ast_node = KernelFunction(loop_node, Target.CPU, Backend.C, compile_function=make_python_function,
ghost_layers=ghost_layer_info, function_name=function_name, assignments=assignments)
implement_interpolations(body)
......@@ -151,7 +152,7 @@ def create_indexed_kernel(assignments: AssignmentOrAstNodeList, index_fields, fu
loop_body.append(assignment)
function_body = Block([loop_node])
ast_node = KernelFunction(function_body, "cpu", "c", make_python_function,
ast_node = KernelFunction(function_body, Target.CPU, Backend.C, make_python_function,
ghost_layers=None, function_name=function_name, assignments=assignments)
fixed_coordinate_mapping = {f.name: coordinate_typed_symbols for f in non_index_fields}
......
import warnings
from typing import Tuple, Union
from .datahandling_interface import DataHandling
from ..enums import Target
from .serial_datahandling import SerialDataHandling
try:
......@@ -18,7 +21,7 @@ except ImportError:
def create_data_handling(domain_size: Tuple[int, ...],
periodicity: Union[bool, Tuple[bool, ...]] = False,
default_layout: str = 'SoA',
default_target: str = 'cpu',
default_target: Target = Target.CPU,
parallel: bool = False,
default_ghost_layers: int = 1,
opencl_queue=None) -> DataHandling:
......@@ -29,10 +32,16 @@ def create_data_handling(domain_size: Tuple[int, ...],
periodicity: either True, False for full or no periodicity or a tuple of booleans indicating periodicity
for each coordinate
default_layout: default array layout, that is used if not explicitly specified in 'add_array'
default_target: either 'cpu' or 'gpu'
default_target: `Target`
parallel: if True a parallel domain is created using walberla - each MPI process gets a part of the domain
default_ghost_layers: default number of ghost layers if not overwritten in 'add_array'
"""
if isinstance(default_target, str):
new_target = Target[default_target.upper()]
warnings.warn(f'Target "{default_target}" as str is deprecated. Use {new_target} instead',
category=DeprecationWarning)
default_target = new_target
if parallel:
assert not opencl_queue, "OpenCL is only supported for SerialDataHandling"
if wlb is None:
......
......@@ -3,6 +3,7 @@ from typing import Callable, Dict, Iterable, Optional, Sequence, Tuple, Union
import numpy as np
from pystencils.enums import Target, Backend
from pystencils.field import Field, FieldType
......@@ -16,8 +17,8 @@ class DataHandling(ABC):
'gather' function that has collects (parts of the) distributed data on a single process.
"""
_GPU_LIKE_TARGETS = ['gpu', 'opencl']
_GPU_LIKE_BACKENDS = ['gpucuda', 'opencl']
_GPU_LIKE_TARGETS = [Target.GPU, Target.OPENCL]
_GPU_LIKE_BACKENDS = [Backend.CUDA, Backend.OPENCL]
# ---------------------------- Adding and accessing data -----------------------------------------------------------
......@@ -56,7 +57,7 @@ class DataHandling(ABC):
layout: memory layout of array, either structure of arrays 'SoA' or array of structures 'AoS'.
this is only important if values_per_cell > 1
cpu: allocate field on the CPU
gpu: allocate field on the GPU, if None, a GPU field is allocated if default_target is 'gpu'
gpu: allocate field on the GPU, if None, a GPU field is allocated if default_target is 'GPU'
alignment: either False for no alignment, or the number of bytes to align to
Returns:
pystencils field, that can be used to formulate symbolic kernels
......@@ -91,7 +92,7 @@ class DataHandling(ABC):
layout: memory layout of array, either structure of arrays 'SoA' or array of structures 'AoS'.
this is only important if values_per_cell > 1
cpu: allocate field on the CPU
gpu: allocate field on the GPU, if None, a GPU field is allocated if default_target is 'gpu'
gpu: allocate field on the GPU, if None, a GPU field is allocated if default_target is 'GPU'
alignment: either False for no alignment, or the number of bytes to align to
Returns:
Fields representing the just created arrays
......@@ -280,7 +281,7 @@ class DataHandling(ABC):
names: what data to synchronize: name of array or sequence of names
stencil: stencil as string defining which neighbors are synchronized e.g. 'D2Q9', 'D3Q19'
if None, a full synchronization (i.e. D2Q9 or D3Q27) is done
target: either 'cpu' or 'gpu
target: `Target` either 'CPU' or 'GPU'
kwargs: implementation specific, optional optimization parameters for communication
Returns:
......
......@@ -7,16 +7,18 @@ import waLBerla as wlb
from pystencils.datahandling.blockiteration import block_iteration, sliced_block_iteration
from pystencils.datahandling.datahandling_interface import DataHandling
from pystencils.enums import Backend
from pystencils.field import Field, FieldType
from pystencils.kernelparameters import FieldPointerSymbol
from pystencils.utils import DotDict
from pystencils import Target
class ParallelDataHandling(DataHandling):
GPU_DATA_PREFIX = "gpu_"
VTK_COUNTER = 0
def __init__(self, blocks, default_ghost_layers=1, default_layout='SoA', dim=3, default_target='cpu'):
def __init__(self, blocks, default_ghost_layers=1, default_layout='SoA', dim=3, default_target=Target.CPU):
"""
Creates data handling based on walberla block storage
......@@ -27,8 +29,9 @@ class ParallelDataHandling(DataHandling):
dim: dimension of scenario,
walberla always uses three dimensions, so if dim=2 the extend of the
z coordinate of blocks has to be 1
default_target: either 'cpu' or 'gpu' . If set to 'gpu' for each array also a GPU version is allocated
if not overwritten in add_array, and synchronization functions are for the GPU by default
default_target: `Target`, either 'CPU' or 'GPU' . If set to 'GPU' for each array also a GPU version is
allocated if not overwritten in add_array, and synchronization functions are for the GPU by
default
"""
super(ParallelDataHandling, self).__init__()
assert dim in (2, 3)
......@@ -94,7 +97,7 @@ class ParallelDataHandling(DataHandling):
if ghost_layers is None:
ghost_layers = self.default_ghost_layers
if gpu is None:
gpu = self.default_target == 'gpu'
gpu = self.default_target == Target.GPU
if layout is None:
layout = self.default_layout
if len(self.blocks) == 0:
......@@ -230,7 +233,7 @@ class ParallelDataHandling(DataHandling):
kernel_function(**arg_dict)
def get_kernel_kwargs(self, kernel_function, **kwargs):
if kernel_function.ast.backend == 'gpucuda':
if kernel_function.ast.backend == Backend.CUDA:
name_map = self._field_name_to_gpu_data_name
to_array = wlb.cuda.toGpuArray
else:
......@@ -283,10 +286,10 @@ class ParallelDataHandling(DataHandling):
self.to_gpu(name)
def synchronization_function_cpu(self, names, stencil=None, buffered=True, stencil_restricted=False, **_):
return self.synchronization_function(names, stencil, 'cpu', buffered, stencil_restricted)
return self.synchronization_function(names, stencil, Target.CPU, buffered, stencil_restricted)
def synchronization_function_gpu(self, names, stencil=None, buffered=True, stencil_restricted=False, **_):
return self.synchronization_function(names, stencil, 'gpu', buffered, stencil_restricted)
return self.synchronization_function(names, stencil, Target.GPU, buffered, stencil_restricted)
def synchronization_function(self, names, stencil=None, target=None, buffered=True, stencil_restricted=False):
if target is None:
......@@ -299,12 +302,12 @@ class ParallelDataHandling(DataHandling):
names = [names]
create_scheme = wlb.createUniformBufferedScheme if buffered else wlb.createUniformDirectScheme
if target == 'cpu':
if target == Target.CPU:
create_packing = wlb.field.createPackInfo if buffered else wlb.field.createMPIDatatypeInfo
if buffered and stencil_restricted:
create_packing = wlb.field.createStencilRestrictedPackInfo
else:
assert target == 'gpu'
assert target == Target.GPU
create_packing = wlb.cuda.createPackInfo if buffered else wlb.cuda.createMPIDatatypeInfo
names = [self.GPU_DATA_PREFIX + name for name in names]
......
......@@ -8,6 +8,7 @@ from pystencils.datahandling.blockiteration import SerialBlock
from pystencils.datahandling.datahandling_interface import DataHandling
from pystencils.datahandling.pycuda import PyCudaArrayHandler, PyCudaNotAvailableHandler
from pystencils.datahandling.pyopencl import PyOpenClArrayHandler
from pystencils.enums import Target
from pystencils.field import (
Field, FieldType, create_numpy_array_with_layout, layout_string_to_tuple,
spatial_layout_string_to_tuple)
......@@ -22,7 +23,7 @@ class SerialDataHandling(DataHandling):
default_ghost_layers: int = 1,
default_layout: str = 'SoA',
periodicity: Union[bool, Sequence[bool]] = False,
default_target: str = 'cpu',
default_target: Target = Target.CPU,
opencl_queue=None,
opencl_ctx=None,
array_handler=None) -> None:
......@@ -33,8 +34,9 @@ class SerialDataHandling(DataHandling):
domain_size: size of the spatial domain as tuple
default_ghost_layers: default number of ghost layers used, if not overridden in add_array() method
default_layout: default layout used, if not overridden in add_array() method
default_target: either 'cpu' or 'gpu' . If set to 'gpu' for each array also a GPU version is allocated
if not overwritten in add_array, and synchronization functions are for the GPU by default
default_target: `Target` either 'CPU' or 'GPU'. If set to 'GPU' for each array also a GPU version is
allocated if not overwritten in add_array, and synchronization functions are for the GPU by
default
"""
super(SerialDataHandling, self).__init__()
self._domainSize = tuple(domain_size)
......@@ -55,7 +57,7 @@ class SerialDataHandling(DataHandling):
except Exception:
self.array_handler = PyCudaNotAvailableHandler()
if default_target == 'opencl' or opencl_queue:
if default_target == Target.OPENCL or opencl_queue:
self.array_handler = PyOpenClArrayHandler(opencl_queue)
else:
self.array_handler = array_handler
......@@ -107,7 +109,7 @@ class SerialDataHandling(DataHandling):
}
if not hasattr(values_per_cell, '__len__'):
values_per_cell = (values_per_cell, )
values_per_cell = (values_per_cell,)
if len(values_per_cell) == 1 and values_per_cell[0] == 1:
values_per_cell = ()
......@@ -266,17 +268,17 @@ class SerialDataHandling(DataHandling):
return name in self.gpu_arrays
def synchronization_function_cpu(self, names, stencil_name=None, **_):
return self.synchronization_function(names, stencil_name, target='cpu')
return self.synchronization_function(names, stencil_name, target=Target.CPU)
def synchronization_function_gpu(self, names, stencil_name=None, **_):
return self.synchronization_function(names, stencil_name, target='gpu')
return self.synchronization_function(names, stencil_name, target=Target.GPU)
def synchronization_function(self, names, stencil=None, target=None, functor=None, **_):
if target is None:
target = self.default_target
if target == 'opencl':
target = 'gpu'
assert target in ('cpu', 'gpu')
if target == Target.OPENCL: # TODO potential misuse between Target and Backend
target = Target.GPU
assert target in (Target.CPU, Target.GPU)
if not hasattr(names, '__len__') or type(names) is str:
names = [names]
......@@ -305,12 +307,12 @@ class SerialDataHandling(DataHandling):
gls = self._field_information[name]['ghost_layers']
values_per_cell = self._field_information[name]['values_per_cell']
if values_per_cell == ():
values_per_cell = (1, )
values_per_cell = (1,)
if len(values_per_cell) == 1:
values_per_cell = values_per_cell[0]
if len(filtered_stencil) > 0:
if target == 'cpu':
if target == Target.CPU:
if functor is None:
from pystencils.slicing import get_periodic_boundary_functor
functor = get_periodic_boundary_functor
......@@ -318,7 +320,8 @@ class SerialDataHandling(DataHandling):
else:
if functor is None:
from pystencils.gpucuda.periodicity import get_periodic_boundary_functor as functor
target = 'gpu' if not isinstance(self.array_handler, PyOpenClArrayHandler) else 'opencl'
target = Target.GPU if not isinstance(self.array_handler,
PyOpenClArrayHandler) else Target.OPENCL
result.append(functor(filtered_stencil, self._domainSize,
index_dimensions=self.fields[name].index_dimensions,
index_dim_shape=values_per_cell,
......@@ -328,7 +331,7 @@ class SerialDataHandling(DataHandling):
opencl_queue=self._opencl_queue,
opencl_ctx=self._opencl_ctx))
if target == 'cpu':
if target == Target.CPU:
def result_functor():
for arr_name, func in zip(names, result):
func(pdfs=self.cpu_arrays[arr_name])
......@@ -379,6 +382,7 @@ class SerialDataHandling(DataHandling):
raise NotImplementedError("VTK export for fields with more than one index "
"coordinate not implemented")
image_to_vtk(full_file_name, cell_data=cell_data)
return writer
def create_vtk_writer_for_flag_array(self, file_name, data_name, masks_to_name, ghost_layers=False):
......
......@@ -3,6 +3,7 @@ from typing import Any, Dict, Optional, Union
import sympy as sp
from pystencils.astnodes import KernelFunction
from pystencils.enums import Backend
from pystencils.kernel_wrapper import KernelWrapper
......@@ -45,12 +46,9 @@ def get_code_obj(ast: Union[KernelFunction, KernelWrapper], custom_backend=None)
if isinstance(ast, KernelWrapper):
ast = ast.ast
if ast.backend == 'gpucuda':
dialect = 'cuda'
elif ast.backend == 'opencl':
dialect = 'opencl'
else:
dialect = 'c'
if ast.backend not in {Backend.C, Backend.CUDA, Backend.OPENCL}:
raise NotImplementedError(f'get_code_obj is not implemented for backend {ast.backend}')
dialect = ast.backend
class CodeDisplay:
def __init__(self, ast_input):
......