Fixed second stage pipeline

4b2bd4d3 · Markus Holzer · 1597456b · 4b2bd4d3 · 4b2bd4d3 · 4b2bd4d3
Commit 4b2bd4d3 authored Feb 9, 2022 by Markus Holzer
--- a/doc/notebooks/01_tutorial_getting_started.ipynb
+++ b/doc/notebooks/01_tutorial_getting_started.ipynb
--- a/doc/sphinx/kernel_compile_and_call.rst
+++ b/doc/sphinx/kernel_compile_and_call.rst
@@ -11,11 +11,11 @@ Creating kernels
 .. autoclass:: pystencils.CreateKernelConfig
    :members:
-.. autofunction:: pystencils.create_domain_kernel
+.. autofunction:: pystencils.kernelcreation.create_domain_kernel
-.. autofunction:: pystencils.create_indexed_kernel
+.. autofunction:: pystencils.kernelcreation.create_indexed_kernel
-.. autofunction:: pystencils.create_staggered_kernel
+.. autofunction:: pystencils.kernelcreation.create_staggered_kernel
 Code printing

--- a/pystencils/backends/cbackend.py
+++ b/pystencils/backends/cbackend.py
@@ -495,8 +495,8 @@ class CustomSympyPrinter(CCodePrinter):
                known = self.known_functions[arg.__class__.__name__.lower()]
                code = self._print(arg)
                return code.replace(known, f"{known}f")
-            elif isinstance(arg, sp.Pow) and data_type == BasicType('float32'):
+            elif isinstance(arg, (sp.Pow, sp.exp)) and data_type == BasicType('float32'):
-                known = ['sqrt', 'cbrt', 'pow']
+                known = ['sqrt', 'cbrt', 'pow', 'exp']
                code = self._print(arg)
                for k in known:
                    if k in code:
@@ -673,8 +673,11 @@ class VectorizedCustomSympyPrinter(CustomSympyPrinter):
            instruction = self.instruction_set['loadA'] if aligned else self.instruction_set['loadU']
            return instruction.format(f"& {self._print(arg)}", **self._kwargs)
        elif expr.func == DivFunc:
-            return self.instruction_set['/'].format(self._print(expr.divisor), self._print(expr.dividend),
+            result = self._scalarFallback('_print_Function', expr)
+            if not result:
+                result = self.instruction_set['/'].format(self._print(expr.divisor), self._print(expr.dividend),
                                                          **self._kwargs)
+            return result
        elif expr.func == fast_division:
            result = self._scalarFallback('_print_Function', expr)
            if not result:

--- a/pystencils/backends/dot.py
+++ b/pystencils/backends/dot.py
 import graphviz
-from graphviz import Digraph, lang
+try:
+    from graphviz import Digraph
+    import graphviz.quoting as quote
+except ImportError:
+    from graphviz import Digraph
+    import graphviz.lang as quote
 from sympy.printing.printer import Printer
@@ -12,7 +17,7 @@ class DotPrinter(Printer):
        super(DotPrinter, self).__init__()
        self._node_to_str_function = node_to_str_function
        self.dot = Digraph(**kwargs)
-        self.dot.quote_edge = lang.quote
+        self.dot.quote_edge = quote.quote
    def _print_KernelFunction(self, func):
        self.dot.node(str(id(func)), style='filled', fillcolor='#a056db', label=self._node_to_str_function(func))

--- a/pystencils/config.py
+++ b/pystencils/config.py
@@ -75,20 +75,22 @@ class CreateKernelConfig:
    """
    gpu_indexing: str = 'block'
    """
-    Either 'block' or 'line' , or custom indexing class, see `AbstractIndexing`
+    Either 'block' or 'line' , or custom indexing class, see `pystencils.gpucuda.AbstractIndexing`
    """
    gpu_indexing_params: MappingProxyType = field(default=MappingProxyType({}))
    """
    Dict with indexing parameters (constructor parameters of indexing class)
    e.g. for 'block' one can specify '{'block_size': (20, 20, 10) }'.
    """
+    # TODO rework this docstring
    default_assignment_simplifications: bool = False
    """
    If `True` default simplifications are first performed on the Assignments. If problems occur during the
    simplification a warning will be thrown.
    Furthermore, it is essential to know that this is a two-stage process. The first stage of the process acts
-    on the level of the `AssignmentCollection`.  In this part, `create_simplification_strategy` 
+    on the level of the `pystencils.AssignmentCollection`.  In this part,
-    from pystencils.simplificationfactory will be used to apply optimisations like insertion of constants to 
+    `pystencil.simp.create_simplification_strategy` from pystencils.simplificationfactory will be used to
+    apply optimisations like insertion of constants to
    remove pressure from the registers. Thus the first part of the optimisations can only be executed if
    an `AssignmentCollection` is passed. The second part of the optimisation acts on the level of each Assignment
    individually. In this stage, all optimisations from `sympy.codegen.rewriting.optims_c99` are applied

--- a/pystencils/display_utils.py
+++ b/pystencils/display_utils.py
@@ -10,7 +10,12 @@ from pystencils.kernel_wrapper import KernelWrapper
 def to_dot(expr: sp.Expr, graph_style: Optional[Dict[str, Any]] = None, short=True):
    """Show a sympy or pystencils AST as dot graph"""
    from pystencils.astnodes import Node
+    try:
        import graphviz
+    except ImportError:
+        print("graphviz is not installed. Visualizing the AST is not available")
+        return
    graph_style = {} if graph_style is None else graph_style
    if isinstance(expr, Node):

--- a/pystencils/typing/leaf_typing.py
+++ b/pystencils/typing/leaf_typing.py
@@ -216,7 +216,8 @@ class TypeAdder:
                else:
                    new_args.append(a)
            return expr.func(*new_args) if new_args else expr, collated_type
-        elif isinstance(expr, (sp.Pow, InverseTrigonometricFunction, TrigonometricFunction, HyperbolicFunction)):
+        elif isinstance(expr, (sp.Pow, sp.exp, InverseTrigonometricFunction, TrigonometricFunction,
+                               HyperbolicFunction)):
            args_types = [self.figure_out_type(arg) for arg in expr.args]
            collated_type = collate_types([t for _, t in args_types])
            new_args = [a if t.dtype_eq(collated_type) else CastFunc(a, collated_type) for a, t in args_types]

--- a/pystencils_tests/test_math_functions.py
+++ b/pystencils_tests/test_math_functions.py
@@ -33,7 +33,8 @@ def test_two_arguments(dtype, func, target):
    dh.run_kernel(kernel)
    dh.all_to_cpu()
-    np.testing.assert_allclose(dh.gather_array("x")[0, 0], float(func(1.0, 2.0).evalf()))
+    np.testing.assert_allclose(dh.gather_array("x")[0, 0], float(func(1.0, 2.0).evalf()),
+                               13 if dtype == 'float64' else 5)
 @pytest.mark.parametrize('dtype', ["float64", "float32"])

--- a/pystencils_tests/test_simplifications.py
+++ b/pystencils_tests/test_simplifications.py
@@ -4,6 +4,7 @@ import pytest
 import pystencils.config
 import sympy as sp
 import pystencils as ps
+import numpy as np
 from pystencils.simp import subexpression_substitution_in_main_assignments
 from pystencils.simp import add_subexpressions_for_divisions
@@ -143,29 +144,27 @@ def test_add_subexpressions_for_field_reads():
 @pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU))
-@pytest.mark.parametrize('simplification', (True, False))
+@pytest.mark.parametrize('dtype', ('float32', 'float64'))
 @pytest.mark.skipif((vs.major, vs.minor, vs.micro) == (3, 8, 2), reason="does not work on python 3.8.2 for some reason")
-def test_sympy_optimizations(target, simplification):
+def test_sympy_optimizations(target, dtype):
    if target == ps.Target.GPU:
        pytest.importorskip("pycuda")
-    src, dst = ps.fields('src, dst:  float32[2d]')
+    src, dst = ps.fields(f'src, dst:  {dtype}[2d]')
-    # Triggers Sympy's expm1 optimization
-    # Sympy's expm1 optimization is tedious to use and the behaviour is highly depended on the sympy version. In
-    # some cases the exp expression has to be encapsulated in brackets or multiplied with 1 or 1.0
-    # for sympy to work properly ...
    assignments = ps.AssignmentCollection({
        src[0, 0]: 1.0 * (sp.exp(dst[0, 0]) - 1)
    })
-    config = pystencils.config.CreateKernelConfig(target=target, default_assignment_simplifications=simplification)
+    config = pystencils.config.CreateKernelConfig(target=target, default_number_float=dtype)
    ast = ps.create_kernel(assignments, config=config)
+    ps.show_code(ast)
    code = ps.get_code_str(ast)
-    if simplification:
+    if dtype == 'float32':
-        assert 'expm1(' in code
+        assert 'expf(' in code
-    else:
+    elif dtype == 'float64':
-        assert 'expm1(' not in code
+        assert 'exp(' in code
 @pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU))
@@ -176,7 +175,7 @@ def test_evaluate_constant_terms(target, simplification):
        pytest.importorskip("pycuda")
    src, dst = ps.fields('src, dst:  float32[2d]')
-    # Triggers Sympy's cos optimization
+    # cos of a number will always be simplified
    assignments = ps.AssignmentCollection({
        src[0, 0]: -sp.cos(1) + dst[0, 0]
    })
@@ -184,8 +183,4 @@ def test_evaluate_constant_terms(target, simplification):
    config = pystencils.config.CreateKernelConfig(target=target, default_assignment_simplifications=simplification)
    ast = ps.create_kernel(assignments, config=config)
    code = ps.get_code_str(ast)
-    if simplification:
    assert 'cos(' not in code
-    else:
-        assert 'cos(' in code
-    print(code)
--- a/pystencils_tests/test_vectorization.py
+++ b/pystencils_tests/test_vectorization.py
@@ -195,9 +195,9 @@ def test_piecewise3(instruction_set=instruction_set):
        g[0, 0] @= 1.0 / (s.b + s.k) if f[0, 0] > 0.0 else 1.0
    ast = ps.create_kernel(test_kernel)
-    ps.show_code(ast)
+    # ps.show_code(ast)
    vectorize(ast, instruction_set=instruction_set)
-    ps.show_code(ast)
+    # ps.show_code(ast)
    ast.compile()

--- a/pystencils_tests/test_vectorization_specific.py
+++ b/pystencils_tests/test_vectorization_specific.py
@@ -61,24 +61,29 @@ def test_vectorized_abs(instruction_set, dtype):
 @pytest.mark.parametrize('dtype', ('float', 'double'))
 @pytest.mark.parametrize('instruction_set', supported_instruction_sets)
 def test_strided(instruction_set, dtype):
+    npdtype = np.float64 if dtype == 'double' else np.float32
    f, g = ps.fields(f"f, g : float{64 if dtype=='double' else 32}[2D]")
    update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)]
-    if 'storeS' not in get_vector_instruction_set(dtype, instruction_set) and not instruction_set in ['avx512', 'rvv'] and not instruction_set.startswith('sve'):
+    if 'storeS' not in get_vector_instruction_set(dtype, instruction_set) and instruction_set not in ['avx512', 'rvv'] and not instruction_set.startswith('sve'):
        with pytest.warns(UserWarning) as warn:
-            config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set})
+            config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set},
+                                                          default_number_float=npdtype)
            ast = ps.create_kernel(update_rule, config=config)
            assert 'Could not vectorize loop' in warn[0].message.args[0]
    else:
        with pytest.warns(None) as warn:
-            config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set})
+            config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set},
+                                                          default_number_float=npdtype)
            ast = ps.create_kernel(update_rule, config=config)
            assert len(warn) == 0
+    # ps.show_code(ast)
    func = ast.compile()
    ref_func = ps.create_kernel(update_rule).compile()
-    arr = np.random.random((23 + 2, 17 + 2)).astype(np.float64 if dtype == 'double' else np.float32)
+    arr = np.random.random((23 + 2, 17 + 2)).astype(npdtype)
-    dst = np.zeros_like(arr, dtype=np.float64 if dtype == 'double' else np.float32)
+    dst = np.zeros_like(arr, dtype=npdtype)
-    ref = np.zeros_like(arr, dtype=np.float64 if dtype == 'double' else np.float32)
+    ref = np.zeros_like(arr, dtype=npdtype)
    func(g=dst, f=arr)
    ref_func(g=ref, f=arr)