Skip to content
Snippets Groups Projects
Commit 4b2bd4d3 authored by Markus Holzer's avatar Markus Holzer
Browse files

Fixed second stage pipeline

parent 1597456b
Branches
Tags
No related merge requests found
Source diff could not be displayed: it is too large. Options to address this: view the blob.
...@@ -11,11 +11,11 @@ Creating kernels ...@@ -11,11 +11,11 @@ Creating kernels
.. autoclass:: pystencils.CreateKernelConfig .. autoclass:: pystencils.CreateKernelConfig
:members: :members:
.. autofunction:: pystencils.create_domain_kernel .. autofunction:: pystencils.kernelcreation.create_domain_kernel
.. autofunction:: pystencils.create_indexed_kernel .. autofunction:: pystencils.kernelcreation.create_indexed_kernel
.. autofunction:: pystencils.create_staggered_kernel .. autofunction:: pystencils.kernelcreation.create_staggered_kernel
Code printing Code printing
......
...@@ -495,8 +495,8 @@ class CustomSympyPrinter(CCodePrinter): ...@@ -495,8 +495,8 @@ class CustomSympyPrinter(CCodePrinter):
known = self.known_functions[arg.__class__.__name__.lower()] known = self.known_functions[arg.__class__.__name__.lower()]
code = self._print(arg) code = self._print(arg)
return code.replace(known, f"{known}f") return code.replace(known, f"{known}f")
elif isinstance(arg, sp.Pow) and data_type == BasicType('float32'): elif isinstance(arg, (sp.Pow, sp.exp)) and data_type == BasicType('float32'):
known = ['sqrt', 'cbrt', 'pow'] known = ['sqrt', 'cbrt', 'pow', 'exp']
code = self._print(arg) code = self._print(arg)
for k in known: for k in known:
if k in code: if k in code:
...@@ -673,8 +673,11 @@ class VectorizedCustomSympyPrinter(CustomSympyPrinter): ...@@ -673,8 +673,11 @@ class VectorizedCustomSympyPrinter(CustomSympyPrinter):
instruction = self.instruction_set['loadA'] if aligned else self.instruction_set['loadU'] instruction = self.instruction_set['loadA'] if aligned else self.instruction_set['loadU']
return instruction.format(f"& {self._print(arg)}", **self._kwargs) return instruction.format(f"& {self._print(arg)}", **self._kwargs)
elif expr.func == DivFunc: elif expr.func == DivFunc:
return self.instruction_set['/'].format(self._print(expr.divisor), self._print(expr.dividend), result = self._scalarFallback('_print_Function', expr)
if not result:
result = self.instruction_set['/'].format(self._print(expr.divisor), self._print(expr.dividend),
**self._kwargs) **self._kwargs)
return result
elif expr.func == fast_division: elif expr.func == fast_division:
result = self._scalarFallback('_print_Function', expr) result = self._scalarFallback('_print_Function', expr)
if not result: if not result:
......
import graphviz import graphviz
from graphviz import Digraph, lang try:
from graphviz import Digraph
import graphviz.quoting as quote
except ImportError:
from graphviz import Digraph
import graphviz.lang as quote
from sympy.printing.printer import Printer from sympy.printing.printer import Printer
...@@ -12,7 +17,7 @@ class DotPrinter(Printer): ...@@ -12,7 +17,7 @@ class DotPrinter(Printer):
super(DotPrinter, self).__init__() super(DotPrinter, self).__init__()
self._node_to_str_function = node_to_str_function self._node_to_str_function = node_to_str_function
self.dot = Digraph(**kwargs) self.dot = Digraph(**kwargs)
self.dot.quote_edge = lang.quote self.dot.quote_edge = quote.quote
def _print_KernelFunction(self, func): def _print_KernelFunction(self, func):
self.dot.node(str(id(func)), style='filled', fillcolor='#a056db', label=self._node_to_str_function(func)) self.dot.node(str(id(func)), style='filled', fillcolor='#a056db', label=self._node_to_str_function(func))
......
...@@ -75,20 +75,22 @@ class CreateKernelConfig: ...@@ -75,20 +75,22 @@ class CreateKernelConfig:
""" """
gpu_indexing: str = 'block' gpu_indexing: str = 'block'
""" """
Either 'block' or 'line' , or custom indexing class, see `AbstractIndexing` Either 'block' or 'line' , or custom indexing class, see `pystencils.gpucuda.AbstractIndexing`
""" """
gpu_indexing_params: MappingProxyType = field(default=MappingProxyType({})) gpu_indexing_params: MappingProxyType = field(default=MappingProxyType({}))
""" """
Dict with indexing parameters (constructor parameters of indexing class) Dict with indexing parameters (constructor parameters of indexing class)
e.g. for 'block' one can specify '{'block_size': (20, 20, 10) }'. e.g. for 'block' one can specify '{'block_size': (20, 20, 10) }'.
""" """
# TODO rework this docstring
default_assignment_simplifications: bool = False default_assignment_simplifications: bool = False
""" """
If `True` default simplifications are first performed on the Assignments. If problems occur during the If `True` default simplifications are first performed on the Assignments. If problems occur during the
simplification a warning will be thrown. simplification a warning will be thrown.
Furthermore, it is essential to know that this is a two-stage process. The first stage of the process acts Furthermore, it is essential to know that this is a two-stage process. The first stage of the process acts
on the level of the `AssignmentCollection`. In this part, `create_simplification_strategy` on the level of the `pystencils.AssignmentCollection`. In this part,
from pystencils.simplificationfactory will be used to apply optimisations like insertion of constants to `pystencil.simp.create_simplification_strategy` from pystencils.simplificationfactory will be used to
apply optimisations like insertion of constants to
remove pressure from the registers. Thus the first part of the optimisations can only be executed if remove pressure from the registers. Thus the first part of the optimisations can only be executed if
an `AssignmentCollection` is passed. The second part of the optimisation acts on the level of each Assignment an `AssignmentCollection` is passed. The second part of the optimisation acts on the level of each Assignment
individually. In this stage, all optimisations from `sympy.codegen.rewriting.optims_c99` are applied individually. In this stage, all optimisations from `sympy.codegen.rewriting.optims_c99` are applied
......
...@@ -10,7 +10,12 @@ from pystencils.kernel_wrapper import KernelWrapper ...@@ -10,7 +10,12 @@ from pystencils.kernel_wrapper import KernelWrapper
def to_dot(expr: sp.Expr, graph_style: Optional[Dict[str, Any]] = None, short=True): def to_dot(expr: sp.Expr, graph_style: Optional[Dict[str, Any]] = None, short=True):
"""Show a sympy or pystencils AST as dot graph""" """Show a sympy or pystencils AST as dot graph"""
from pystencils.astnodes import Node from pystencils.astnodes import Node
try:
import graphviz import graphviz
except ImportError:
print("graphviz is not installed. Visualizing the AST is not available")
return
graph_style = {} if graph_style is None else graph_style graph_style = {} if graph_style is None else graph_style
if isinstance(expr, Node): if isinstance(expr, Node):
......
...@@ -216,7 +216,8 @@ class TypeAdder: ...@@ -216,7 +216,8 @@ class TypeAdder:
else: else:
new_args.append(a) new_args.append(a)
return expr.func(*new_args) if new_args else expr, collated_type return expr.func(*new_args) if new_args else expr, collated_type
elif isinstance(expr, (sp.Pow, InverseTrigonometricFunction, TrigonometricFunction, HyperbolicFunction)): elif isinstance(expr, (sp.Pow, sp.exp, InverseTrigonometricFunction, TrigonometricFunction,
HyperbolicFunction)):
args_types = [self.figure_out_type(arg) for arg in expr.args] args_types = [self.figure_out_type(arg) for arg in expr.args]
collated_type = collate_types([t for _, t in args_types]) collated_type = collate_types([t for _, t in args_types])
new_args = [a if t.dtype_eq(collated_type) else CastFunc(a, collated_type) for a, t in args_types] new_args = [a if t.dtype_eq(collated_type) else CastFunc(a, collated_type) for a, t in args_types]
......
...@@ -33,7 +33,8 @@ def test_two_arguments(dtype, func, target): ...@@ -33,7 +33,8 @@ def test_two_arguments(dtype, func, target):
dh.run_kernel(kernel) dh.run_kernel(kernel)
dh.all_to_cpu() dh.all_to_cpu()
np.testing.assert_allclose(dh.gather_array("x")[0, 0], float(func(1.0, 2.0).evalf())) np.testing.assert_allclose(dh.gather_array("x")[0, 0], float(func(1.0, 2.0).evalf()),
13 if dtype == 'float64' else 5)
@pytest.mark.parametrize('dtype', ["float64", "float32"]) @pytest.mark.parametrize('dtype', ["float64", "float32"])
......
...@@ -4,6 +4,7 @@ import pytest ...@@ -4,6 +4,7 @@ import pytest
import pystencils.config import pystencils.config
import sympy as sp import sympy as sp
import pystencils as ps import pystencils as ps
import numpy as np
from pystencils.simp import subexpression_substitution_in_main_assignments from pystencils.simp import subexpression_substitution_in_main_assignments
from pystencils.simp import add_subexpressions_for_divisions from pystencils.simp import add_subexpressions_for_divisions
...@@ -143,29 +144,27 @@ def test_add_subexpressions_for_field_reads(): ...@@ -143,29 +144,27 @@ def test_add_subexpressions_for_field_reads():
@pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU)) @pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU))
@pytest.mark.parametrize('simplification', (True, False)) @pytest.mark.parametrize('dtype', ('float32', 'float64'))
@pytest.mark.skipif((vs.major, vs.minor, vs.micro) == (3, 8, 2), reason="does not work on python 3.8.2 for some reason") @pytest.mark.skipif((vs.major, vs.minor, vs.micro) == (3, 8, 2), reason="does not work on python 3.8.2 for some reason")
def test_sympy_optimizations(target, simplification): def test_sympy_optimizations(target, dtype):
if target == ps.Target.GPU: if target == ps.Target.GPU:
pytest.importorskip("pycuda") pytest.importorskip("pycuda")
src, dst = ps.fields('src, dst: float32[2d]') src, dst = ps.fields(f'src, dst: {dtype}[2d]')
# Triggers Sympy's expm1 optimization
# Sympy's expm1 optimization is tedious to use and the behaviour is highly depended on the sympy version. In
# some cases the exp expression has to be encapsulated in brackets or multiplied with 1 or 1.0
# for sympy to work properly ...
assignments = ps.AssignmentCollection({ assignments = ps.AssignmentCollection({
src[0, 0]: 1.0 * (sp.exp(dst[0, 0]) - 1) src[0, 0]: 1.0 * (sp.exp(dst[0, 0]) - 1)
}) })
config = pystencils.config.CreateKernelConfig(target=target, default_assignment_simplifications=simplification) config = pystencils.config.CreateKernelConfig(target=target, default_number_float=dtype)
ast = ps.create_kernel(assignments, config=config) ast = ps.create_kernel(assignments, config=config)
ps.show_code(ast)
code = ps.get_code_str(ast) code = ps.get_code_str(ast)
if simplification: if dtype == 'float32':
assert 'expm1(' in code assert 'expf(' in code
else: elif dtype == 'float64':
assert 'expm1(' not in code assert 'exp(' in code
@pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU)) @pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU))
...@@ -176,7 +175,7 @@ def test_evaluate_constant_terms(target, simplification): ...@@ -176,7 +175,7 @@ def test_evaluate_constant_terms(target, simplification):
pytest.importorskip("pycuda") pytest.importorskip("pycuda")
src, dst = ps.fields('src, dst: float32[2d]') src, dst = ps.fields('src, dst: float32[2d]')
# Triggers Sympy's cos optimization # cos of a number will always be simplified
assignments = ps.AssignmentCollection({ assignments = ps.AssignmentCollection({
src[0, 0]: -sp.cos(1) + dst[0, 0] src[0, 0]: -sp.cos(1) + dst[0, 0]
}) })
...@@ -184,8 +183,4 @@ def test_evaluate_constant_terms(target, simplification): ...@@ -184,8 +183,4 @@ def test_evaluate_constant_terms(target, simplification):
config = pystencils.config.CreateKernelConfig(target=target, default_assignment_simplifications=simplification) config = pystencils.config.CreateKernelConfig(target=target, default_assignment_simplifications=simplification)
ast = ps.create_kernel(assignments, config=config) ast = ps.create_kernel(assignments, config=config)
code = ps.get_code_str(ast) code = ps.get_code_str(ast)
if simplification:
assert 'cos(' not in code assert 'cos(' not in code
else:
assert 'cos(' in code
print(code)
...@@ -195,9 +195,9 @@ def test_piecewise3(instruction_set=instruction_set): ...@@ -195,9 +195,9 @@ def test_piecewise3(instruction_set=instruction_set):
g[0, 0] @= 1.0 / (s.b + s.k) if f[0, 0] > 0.0 else 1.0 g[0, 0] @= 1.0 / (s.b + s.k) if f[0, 0] > 0.0 else 1.0
ast = ps.create_kernel(test_kernel) ast = ps.create_kernel(test_kernel)
ps.show_code(ast) # ps.show_code(ast)
vectorize(ast, instruction_set=instruction_set) vectorize(ast, instruction_set=instruction_set)
ps.show_code(ast) # ps.show_code(ast)
ast.compile() ast.compile()
......
...@@ -61,24 +61,29 @@ def test_vectorized_abs(instruction_set, dtype): ...@@ -61,24 +61,29 @@ def test_vectorized_abs(instruction_set, dtype):
@pytest.mark.parametrize('dtype', ('float', 'double')) @pytest.mark.parametrize('dtype', ('float', 'double'))
@pytest.mark.parametrize('instruction_set', supported_instruction_sets) @pytest.mark.parametrize('instruction_set', supported_instruction_sets)
def test_strided(instruction_set, dtype): def test_strided(instruction_set, dtype):
npdtype = np.float64 if dtype == 'double' else np.float32
f, g = ps.fields(f"f, g : float{64 if dtype=='double' else 32}[2D]") f, g = ps.fields(f"f, g : float{64 if dtype=='double' else 32}[2D]")
update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)] update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)]
if 'storeS' not in get_vector_instruction_set(dtype, instruction_set) and not instruction_set in ['avx512', 'rvv'] and not instruction_set.startswith('sve'): if 'storeS' not in get_vector_instruction_set(dtype, instruction_set) and instruction_set not in ['avx512', 'rvv'] and not instruction_set.startswith('sve'):
with pytest.warns(UserWarning) as warn: with pytest.warns(UserWarning) as warn:
config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set}) config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set},
default_number_float=npdtype)
ast = ps.create_kernel(update_rule, config=config) ast = ps.create_kernel(update_rule, config=config)
assert 'Could not vectorize loop' in warn[0].message.args[0] assert 'Could not vectorize loop' in warn[0].message.args[0]
else: else:
with pytest.warns(None) as warn: with pytest.warns(None) as warn:
config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set}) config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set},
default_number_float=npdtype)
ast = ps.create_kernel(update_rule, config=config) ast = ps.create_kernel(update_rule, config=config)
assert len(warn) == 0 assert len(warn) == 0
# ps.show_code(ast)
func = ast.compile() func = ast.compile()
ref_func = ps.create_kernel(update_rule).compile() ref_func = ps.create_kernel(update_rule).compile()
arr = np.random.random((23 + 2, 17 + 2)).astype(np.float64 if dtype == 'double' else np.float32) arr = np.random.random((23 + 2, 17 + 2)).astype(npdtype)
dst = np.zeros_like(arr, dtype=np.float64 if dtype == 'double' else np.float32) dst = np.zeros_like(arr, dtype=npdtype)
ref = np.zeros_like(arr, dtype=np.float64 if dtype == 'double' else np.float32) ref = np.zeros_like(arr, dtype=npdtype)
func(g=dst, f=arr) func(g=dst, f=arr)
ref_func(g=ref, f=arr) ref_func(g=ref, f=arr)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment