From 5ee715d0df2651e745ff5de0524abfe24d48c968 Mon Sep 17 00:00:00 2001
From: zy69guqi <richard.angersbach@fau.de>
Date: Thu, 20 Mar 2025 17:04:47 +0100
Subject: [PATCH] Reformat adapted files [skip ci]

---
 src/pystencils/__init__.py                    |  9 +-
 src/pystencils/backend/ast/vector.py          | 46 +++++----
 src/pystencils/backend/emission/ir_printer.py |  4 +-
 .../backend/kernelcreation/context.py         |  6 +-
 .../backend/kernelcreation/freeze.py          | 18 ++--
 .../backend/kernelcreation/typification.py    |  7 +-
 .../backend/platforms/generic_cpu.py          | 46 +++++++--
 .../backend/platforms/generic_gpu.py          | 98 ++++++++++++++-----
 src/pystencils/backend/platforms/platform.py  |  2 +-
 src/pystencils/backend/platforms/sycl.py      |  4 +-
 src/pystencils/backend/platforms/x86.py       | 10 +-
 .../backend/transformations/add_pragmas.py    |  8 +-
 .../transformations/loop_vectorizer.py        | 36 ++++---
 .../transformations/select_functions.py       |  8 +-
 .../transformations/select_intrinsics.py      |  4 +-
 src/pystencils/codegen/driver.py              | 38 +++++--
 src/pystencils/compound_op_mapping.py         | 15 ++-
 src/pystencils/jit/cpu_extension_module.py    |  5 +-
 src/pystencils/sympyextensions/__init__.py    |  4 +-
 src/pystencils/sympyextensions/reduction.py   | 11 ++-
 tests/kernelcreation/test_reduction.py        | 23 +++--
 21 files changed, 283 insertions(+), 119 deletions(-)

diff --git a/src/pystencils/__init__.py b/src/pystencils/__init__.py
index a7bf33aa6..329f61d32 100644
--- a/src/pystencils/__init__.py
+++ b/src/pystencils/__init__.py
@@ -1,10 +1,6 @@
 """Module to generate stencil kernels in C or CUDA using sympy expressions and call them as Python functions"""
 
-from .codegen import (
-    Target,
-    CreateKernelConfig,
-    AUTO
-)
+from .codegen import Target, CreateKernelConfig, AUTO
 from .defaults import DEFAULTS
 from . import fd
 from . import stencil as stencil
@@ -93,4 +89,5 @@ __all__ = [
 ]
 
 from . import _version
-__version__ = _version.get_versions()['version']
+
+__version__ = _version.get_versions()["version"]
diff --git a/src/pystencils/backend/ast/vector.py b/src/pystencils/backend/ast/vector.py
index 4f5224133..4141b0296 100644
--- a/src/pystencils/backend/ast/vector.py
+++ b/src/pystencils/backend/ast/vector.py
@@ -18,7 +18,7 @@ class PsVecBroadcast(PsUnOp, PsVectorOp):
     """Broadcast a scalar value to N vector lanes."""
 
     __match_args__ = ("lanes", "operand")
-    
+
     def __init__(self, lanes: int, operand: PsExpression):
         super().__init__(operand)
         self._lanes = lanes
@@ -26,21 +26,18 @@ class PsVecBroadcast(PsUnOp, PsVectorOp):
     @property
     def lanes(self) -> int:
         return self._lanes
-    
+
     @lanes.setter
     def lanes(self, n: int):
         self._lanes = n
 
     def _clone_expr(self) -> PsVecBroadcast:
         return PsVecBroadcast(self._lanes, self._operand.clone())
-    
+
     def structurally_equal(self, other: PsAstNode) -> bool:
         if not isinstance(other, PsVecBroadcast):
             return False
-        return (
-            super().structurally_equal(other)
-            and self._lanes == other._lanes
-        )
+        return super().structurally_equal(other) and self._lanes == other._lanes
 
 
 class PsVecHorizontal(PsBinOp, PsVectorOp):
@@ -48,8 +45,13 @@ class PsVecHorizontal(PsBinOp, PsVectorOp):
 
     __match_args__ = ("lanes", "scalar_operand", "vector_operand", "reduction_op")
 
-    def __init__(self, lanes: int, scalar_operand: PsExpression, vector_operand: PsExpression,
-                 reduction_op: ReductionOp):
+    def __init__(
+        self,
+        lanes: int,
+        scalar_operand: PsExpression,
+        vector_operand: PsExpression,
+        reduction_op: ReductionOp,
+    ):
         super().__init__(scalar_operand, vector_operand)
         self._lanes = lanes
         self._reduction_op = reduction_op
@@ -87,19 +89,23 @@ class PsVecHorizontal(PsBinOp, PsVectorOp):
         self._reduction_op = op
 
     def _clone_expr(self) -> PsVecHorizontal:
-        return PsVecHorizontal(self._lanes, self._op1.clone(), self._op2.clone(), self._reduction_op)
+        return PsVecHorizontal(
+            self._lanes, self._op1.clone(), self._op2.clone(), self._reduction_op
+        )
 
     def structurally_equal(self, other: PsAstNode) -> bool:
         if not isinstance(other, PsVecHorizontal):
             return False
-        return (super().structurally_equal(other)
-                and self._lanes == other._lanes
-                and self._reduction_op == other._reduction_op)
+        return (
+            super().structurally_equal(other)
+            and self._lanes == other._lanes
+            and self._reduction_op == other._reduction_op
+        )
 
 
 class PsVecMemAcc(PsExpression, PsLvalue, PsVectorOp):
     """Pointer-based vectorized memory access.
-    
+
     Args:
         base_ptr: Pointer identifying the accessed memory region
         offset: Offset inside the memory region
@@ -150,7 +156,7 @@ class PsVecMemAcc(PsExpression, PsLvalue, PsVectorOp):
     @property
     def stride(self) -> PsExpression | None:
         return self._stride
-    
+
     @stride.setter
     def stride(self, expr: PsExpression | None):
         self._stride = expr
@@ -161,10 +167,12 @@ class PsVecMemAcc(PsExpression, PsLvalue, PsVectorOp):
 
     def get_vector_type(self) -> PsVectorType:
         return cast(PsVectorType, self._dtype)
-    
+
     def get_children(self) -> tuple[PsAstNode, ...]:
-        return (self._ptr, self._offset) + (() if self._stride is None else (self._stride,))
-    
+        return (self._ptr, self._offset) + (
+            () if self._stride is None else (self._stride,)
+        )
+
     def set_child(self, idx: int, c: PsAstNode):
         idx = [0, 1, 2][idx]
         match idx:
@@ -193,7 +201,7 @@ class PsVecMemAcc(PsExpression, PsLvalue, PsVectorOp):
             and self._vector_entries == other._vector_entries
             and self._aligned == other._aligned
         )
-    
+
     def __repr__(self) -> str:
         return (
             f"PsVecMemAcc({repr(self._ptr)}, {repr(self._offset)}, {repr(self._vector_entries)}, "
diff --git a/src/pystencils/backend/emission/ir_printer.py b/src/pystencils/backend/emission/ir_printer.py
index 1508e6d94..22ae2f91a 100644
--- a/src/pystencils/backend/emission/ir_printer.py
+++ b/src/pystencils/backend/emission/ir_printer.py
@@ -24,7 +24,7 @@ def emit_ir(ir: PsAstNode | Kernel):
 
 class IRAstPrinter(BasePrinter):
     """Print the IR AST as pseudo-code.
-    
+
     This printer produces a complete pseudocode representation of a pystencils AST.
     Other than the `CAstPrinter`, the `IRAstPrinter` is capable of emitting code for
     each node defined in `ast <pystencils.backend.ast>`.
@@ -85,7 +85,7 @@ class IRAstPrinter(BasePrinter):
 
                 return pc.parenthesize(
                     f"vec_horizontal_{reduction_op.name.lower()}<{lanes}>({scalar_operand_code, vector_operand_code})",
-                    Ops.Weakest
+                    Ops.Weakest,
                 )
 
             case _:
diff --git a/src/pystencils/backend/kernelcreation/context.py b/src/pystencils/backend/kernelcreation/context.py
index 827be45a5..536c73c7f 100644
--- a/src/pystencils/backend/kernelcreation/context.py
+++ b/src/pystencils/backend/kernelcreation/context.py
@@ -106,7 +106,7 @@ class KernelCreationContext:
     def index_dtype(self) -> PsIntegerType:
         """Data type used by default for index expressions"""
         return self._index_dtype
-    
+
     def resolve_dynamic_type(self, dtype: DynamicType | PsType) -> PsType:
         """Selects the appropriate data type for `DynamicType` instances, and returns all other types as they are."""
         match dtype:
@@ -191,7 +191,9 @@ class KernelCreationContext:
 
         self._symbols[old.name] = new
 
-    def add_symbol_reduction_info(self, local_symb: PsSymbol, reduction_info: ReductionInfo):
+    def add_symbol_reduction_info(
+        self, local_symb: PsSymbol, reduction_info: ReductionInfo
+    ):
         """Adds entry for a symbol and its reduction info to its corresponding lookup table.
 
         The symbol ``symbol`` shall not exist in the symbol table already.
diff --git a/src/pystencils/backend/kernelcreation/freeze.py b/src/pystencils/backend/kernelcreation/freeze.py
index df6bfbd1f..63e9ea5b1 100644
--- a/src/pystencils/backend/kernelcreation/freeze.py
+++ b/src/pystencils/backend/kernelcreation/freeze.py
@@ -57,7 +57,7 @@ from ..ast.expressions import (
     PsAnd,
     PsOr,
     PsNot,
-    PsMemAcc
+    PsMemAcc,
 )
 from ..ast.vector import PsVecMemAcc
 
@@ -110,7 +110,9 @@ class FreezeExpressions:
 
     def __call__(self, obj: AssignmentCollection | sp.Basic) -> PsAstNode:
         if isinstance(obj, AssignmentCollection):
-            return PsBlock([cast(PsStructuralNode, self.visit(asm)) for asm in obj.all_assignments])
+            return PsBlock(
+                [cast(PsStructuralNode, self.visit(asm)) for asm in obj.all_assignments]
+            )
         elif isinstance(obj, AssignmentBase):
             return cast(PsAssignment, self.visit(obj))
         elif isinstance(obj, _ExprLike):
@@ -179,7 +181,9 @@ class FreezeExpressions:
             "/=": ReductionOp.Div,
         }
 
-        return PsAssignment(lhs, compound_op_to_expr(_str_to_compound_op[expr.op], lhs.clone(), rhs))
+        return PsAssignment(
+            lhs, compound_op_to_expr(_str_to_compound_op[expr.op], lhs.clone(), rhs)
+        )
 
     def map_ReductionAssignment(self, expr: ReductionAssignment):
         assert isinstance(expr.lhs, TypedSymbol)
@@ -327,22 +331,22 @@ class FreezeExpressions:
             raise FreezeError("Cannot translate an empty tuple.")
 
         items = [self.visit_expr(item) for item in expr]
-        
+
         if any(isinstance(i, PsArrayInitList) for i in items):
             #  base case: have nested arrays
             if not all(isinstance(i, PsArrayInitList) for i in items):
                 raise FreezeError(
                     f"Cannot translate nested arrays of non-uniform shape: {expr}"
                 )
-            
+
             subarrays = cast(list[PsArrayInitList], items)
             shape_tail = subarrays[0].shape
-            
+
             if not all(s.shape == shape_tail for s in subarrays[1:]):
                 raise FreezeError(
                     f"Cannot translate nested arrays of non-uniform shape: {expr}"
                 )
-            
+
             return PsArrayInitList([s.items_grid for s in subarrays])  # type: ignore
         else:
             #  base case: no nested arrays
diff --git a/src/pystencils/backend/kernelcreation/typification.py b/src/pystencils/backend/kernelcreation/typification.py
index 3ca0a16e2..b457f39a0 100644
--- a/src/pystencils/backend/kernelcreation/typification.py
+++ b/src/pystencils/backend/kernelcreation/typification.py
@@ -194,9 +194,10 @@ class TypeContext:
                             f"    Target type: {self._target_type}"
                         )
 
-                case PsNumericOpTrait() if not isinstance(
-                    self._target_type, PsNumericType
-                ) or self._target_type.is_bool():
+                case PsNumericOpTrait() if (
+                    not isinstance(self._target_type, PsNumericType)
+                    or self._target_type.is_bool()
+                ):
                     #   FIXME: PsBoolType derives from PsNumericType, but is not numeric
                     raise TypificationError(
                         f"Numerical operation encountered in non-numerical type context:\n"
diff --git a/src/pystencils/backend/platforms/generic_cpu.py b/src/pystencils/backend/platforms/generic_cpu.py
index 43b048184..ccef61817 100644
--- a/src/pystencils/backend/platforms/generic_cpu.py
+++ b/src/pystencils/backend/platforms/generic_cpu.py
@@ -4,8 +4,14 @@ from typing import Sequence
 from ..ast.expressions import PsCall, PsMemAcc, PsConstantExpr
 
 from ..ast import PsAstNode
-from ..functions import CFunction, MathFunctions, NumericLimitsFunctions, ReductionFunctions, PsMathFunction, \
-    PsReductionFunction
+from ..functions import (
+    CFunction,
+    MathFunctions,
+    NumericLimitsFunctions,
+    ReductionFunctions,
+    PsMathFunction,
+    PsReductionFunction,
+)
 from ..literals import PsLiteral
 from ...compound_op_mapping import compound_op_to_expr
 from ...sympyextensions import ReductionOp
@@ -30,7 +36,8 @@ from ..ast.expressions import (
     PsLookup,
     PsGe,
     PsLe,
-    PsTernary, PsLiteralExpr,
+    PsTernary,
+    PsLiteralExpr,
 )
 from ..ast.vector import PsVecMemAcc
 from ...types import PsVectorType, PsCustomType
@@ -60,20 +67,31 @@ class GenericCpu(Platform):
         else:
             raise MaterializationError(f"Unknown type of iteration space: {ispace}")
 
-    def select_function(self, call: PsCall) -> PsExpression | tuple[tuple[PsStructuralNode, ...], PsAstNode]:
+    def select_function(
+        self, call: PsCall
+    ) -> PsExpression | tuple[tuple[PsStructuralNode, ...], PsAstNode]:
         call_func = call.function
         assert isinstance(call_func, PsReductionFunction | PsMathFunction)
 
         func = call_func.func
 
-        if isinstance(call_func, PsReductionFunction) and func is ReductionFunctions.WriteBackToPtr:
+        if (
+            isinstance(call_func, PsReductionFunction)
+            and func is ReductionFunctions.WriteBackToPtr
+        ):
             ptr_expr, symbol_expr = call.args
             op = call_func.reduction_op
 
-            assert isinstance(ptr_expr, PsSymbolExpr) and isinstance(ptr_expr.dtype, PsPointerType)
-            assert isinstance(symbol_expr, PsSymbolExpr) and isinstance(symbol_expr.dtype, PsScalarType)
+            assert isinstance(ptr_expr, PsSymbolExpr) and isinstance(
+                ptr_expr.dtype, PsPointerType
+            )
+            assert isinstance(symbol_expr, PsSymbolExpr) and isinstance(
+                symbol_expr.dtype, PsScalarType
+            )
 
-            ptr_access = PsMemAcc(ptr_expr, PsConstantExpr(PsConstant(0, self._ctx.index_dtype)))
+            ptr_access = PsMemAcc(
+                ptr_expr, PsConstantExpr(PsConstant(0, self._ctx.index_dtype))
+            )
 
             # inspired by OpenMP: local reduction variable (negative sign) is added at the end
             actual_op = ReductionOp.Add if op is ReductionOp.Sub else op
@@ -89,8 +107,16 @@ class GenericCpu(Platform):
         dtype = call.get_dtype()
         arg_types = (dtype,) * func.num_args
 
-        if isinstance(dtype, PsScalarType) and func in (NumericLimitsFunctions.Min, NumericLimitsFunctions.Max):
-            return PsLiteralExpr(PsLiteral(f"std::numeric_limits<{dtype.c_string()}>::{func.function_name}()", dtype))
+        if isinstance(dtype, PsScalarType) and func in (
+            NumericLimitsFunctions.Min,
+            NumericLimitsFunctions.Max,
+        ):
+            return PsLiteralExpr(
+                PsLiteral(
+                    f"std::numeric_limits<{dtype.c_string()}>::{func.function_name}()",
+                    dtype,
+                )
+            )
 
         if isinstance(dtype, PsIeeeFloatType) and dtype.width in (32, 64):
             cfunc: CFunction
diff --git a/src/pystencils/backend/platforms/generic_gpu.py b/src/pystencils/backend/platforms/generic_gpu.py
index 9b21457be..2a12d6b7b 100644
--- a/src/pystencils/backend/platforms/generic_gpu.py
+++ b/src/pystencils/backend/platforms/generic_gpu.py
@@ -24,7 +24,14 @@ from ..kernelcreation import (
 )
 
 from ..kernelcreation.context import KernelCreationContext
-from ..ast.structural import PsBlock, PsConditional, PsDeclaration, PsStatement, PsAssignment, PsStructuralNode
+from ..ast.structural import (
+    PsBlock,
+    PsConditional,
+    PsDeclaration,
+    PsStatement,
+    PsAssignment,
+    PsStructuralNode,
+)
 from ..ast.expressions import (
     PsExpression,
     PsLiteralExpr,
@@ -36,13 +43,19 @@ from ..ast.expressions import (
     PsConstantExpr,
     PsAdd,
     PsRem,
-    PsEq
+    PsEq,
 )
 from ..ast.expressions import PsLt, PsAnd
 from ...types import PsSignedIntegerType, PsIeeeFloatType
 from ..literals import PsLiteral
-from ..functions import MathFunctions, CFunction, ReductionFunctions, NumericLimitsFunctions, PsReductionFunction, \
-    PsMathFunction
+from ..functions import (
+    MathFunctions,
+    CFunction,
+    ReductionFunctions,
+    NumericLimitsFunctions,
+    PsReductionFunction,
+    PsMathFunction,
+)
 
 int32 = PsSignedIntegerType(width=32, const=False)
 
@@ -131,7 +144,7 @@ class Blockwise4DMapping(ThreadMapping):
         THREAD_IDX[0],
         BLOCK_IDX[0],
         BLOCK_IDX[1],
-        BLOCK_IDX[2]
+        BLOCK_IDX[2],
     ]
 
     def __call__(self, ispace: IterationSpace) -> dict[PsSymbol, PsExpression]:
@@ -177,7 +190,7 @@ class Blockwise4DMapping(ThreadMapping):
 
 class GenericGpu(Platform):
     """Common base platform for CUDA- and HIP-type GPU targets.
-    
+
     Args:
         ctx: The kernel creation context
         omit_range_check: If `True`, generated index translation code will not check if the point identified
@@ -238,23 +251,34 @@ class GenericGpu(Platform):
         else:
             raise MaterializationError(f"Unknown type of iteration space: {ispace}")
 
-    def select_function(self, call: PsCall) -> PsExpression | tuple[tuple[PsStructuralNode, ...], PsAstNode]:
+    def select_function(
+        self, call: PsCall
+    ) -> PsExpression | tuple[tuple[PsStructuralNode, ...], PsAstNode]:
         call_func = call.function
         assert isinstance(call_func, PsReductionFunction | PsMathFunction)
 
         func = call_func.func
 
-        if isinstance(call_func, PsReductionFunction) and func is ReductionFunctions.WriteBackToPtr:
+        if (
+            isinstance(call_func, PsReductionFunction)
+            and func is ReductionFunctions.WriteBackToPtr
+        ):
             ptr_expr, symbol_expr = call.args
             op = call_func.reduction_op
             stype = symbol_expr.dtype
             ptrtype = ptr_expr.dtype
 
-            assert isinstance(ptr_expr, PsSymbolExpr) and isinstance(ptrtype, PsPointerType)
-            assert isinstance(symbol_expr, PsSymbolExpr) and isinstance(stype, PsScalarType)
+            assert isinstance(ptr_expr, PsSymbolExpr) and isinstance(
+                ptrtype, PsPointerType
+            )
+            assert isinstance(symbol_expr, PsSymbolExpr) and isinstance(
+                stype, PsScalarType
+            )
 
             if not isinstance(stype, PsIeeeFloatType) or stype.width not in (32, 64):
-                NotImplementedError("atomic operations are only available for float32/64 datatypes")
+                NotImplementedError(
+                    "atomic operations are only available for float32/64 datatypes"
+                )
 
             # workaround for subtractions -> use additions for reducing intermediate results
             # similar to OpenMP reductions: local copies (negative sign) are added at the end
@@ -274,36 +298,60 @@ class GenericGpu(Platform):
                 # perform local warp reductions
                 def gen_shuffle_instr(offset: int):
                     full_mask = PsLiteralExpr(PsLiteral("0xffffffff", UInt(32)))
-                    return PsCall(CFunction("__shfl_xor_sync", [UInt(32), stype, SInt(32)], stype),
-                                  [full_mask, symbol_expr, PsConstantExpr(PsConstant(offset, SInt(32)))])
+                    return PsCall(
+                        CFunction(
+                            "__shfl_xor_sync", [UInt(32), stype, SInt(32)], stype
+                        ),
+                        [
+                            full_mask,
+                            symbol_expr,
+                            PsConstantExpr(PsConstant(offset, SInt(32))),
+                        ],
+                    )
 
                 # set up shuffle instructions for warp-level reduction
                 num_shuffles = math.frexp(self._warp_size)[1]
-                shuffles = tuple(PsAssignment(symbol_expr,
-                                              compound_op_to_expr(actual_op,
-                                                                  symbol_expr, gen_shuffle_instr(pow(2, i - 1))))
-                                 for i in reversed(range(1, num_shuffles)))
+                shuffles = tuple(
+                    PsAssignment(
+                        symbol_expr,
+                        compound_op_to_expr(
+                            actual_op, symbol_expr, gen_shuffle_instr(pow(2, i - 1))
+                        ),
+                    )
+                    for i in reversed(range(1, num_shuffles))
+                )
 
                 # find first thread in warp
                 thread_indices_per_dim = [
-                    idx * PsConstantExpr(PsConstant(reduce(operator.mul, BLOCK_DIM[:i], 1), SInt(32)))
-                    for i, idx in enumerate(THREAD_IDX[:ispace.rank])
+                    idx
+                    * PsConstantExpr(
+                        PsConstant(reduce(operator.mul, BLOCK_DIM[:i], 1), SInt(32))
+                    )
+                    for i, idx in enumerate(THREAD_IDX[: ispace.rank])
                 ]
                 tid: PsExpression = thread_indices_per_dim[0]
                 for t in thread_indices_per_dim[1:]:
                     tid = PsAdd(tid, t)
-                first_thread_in_warp = PsEq(PsRem(tid, PsConstantExpr(PsConstant(self._warp_size, SInt(32)))),
-                                            PsConstantExpr(PsConstant(0, SInt(32))))
+                first_thread_in_warp = PsEq(
+                    PsRem(tid, PsConstantExpr(PsConstant(self._warp_size, SInt(32)))),
+                    PsConstantExpr(PsConstant(0, SInt(32))),
+                )
 
                 # set condition to only execute atomic operation on first valid thread in warp
-                cond = PsAnd(is_valid_thread, first_thread_in_warp) if is_valid_thread else first_thread_in_warp
+                cond = (
+                    PsAnd(is_valid_thread, first_thread_in_warp)
+                    if is_valid_thread
+                    else first_thread_in_warp
+                )
             else:
                 # no optimization: only execute atomic add on valid thread
                 shuffles = ()
                 cond = is_valid_thread
 
             # use atomic operation
-            call.function = CFunction(f"atomic{actual_op.name}", [ptrtype, stype], PsCustomType("void"))
+            call.function = CFunction(
+                f"atomic{actual_op.name}", [ptrtype, stype], PsCustomType("void")
+            )
             call.args = (ptr_expr, symbol_expr)
 
             # assemble warp reduction
@@ -321,7 +369,9 @@ class GenericGpu(Platform):
                 case NumericLimitsFunctions.Max:
                     define = "POS_INFINITY"
                 case _:
-                    raise MaterializationError(f"Cannot materialize call to function {func}")
+                    raise MaterializationError(
+                        f"Cannot materialize call to function {func}"
+                    )
 
             return PsLiteralExpr(PsLiteral(define, dtype))
 
diff --git a/src/pystencils/backend/platforms/platform.py b/src/pystencils/backend/platforms/platform.py
index 4f738dd5d..7b81865ae 100644
--- a/src/pystencils/backend/platforms/platform.py
+++ b/src/pystencils/backend/platforms/platform.py
@@ -12,7 +12,7 @@ class Platform(ABC):
     """Abstract base class for all supported platforms.
 
     The platform performs all target-dependent tasks during code generation:
-    
+
     - Translation of the iteration space to an index source (loop nest, GPU indexing, ...)
     - Platform-specific optimizations (e.g. vectorization, OpenMP)
     """
diff --git a/src/pystencils/backend/platforms/sycl.py b/src/pystencils/backend/platforms/sycl.py
index 78af01b2f..22d60f9b0 100644
--- a/src/pystencils/backend/platforms/sycl.py
+++ b/src/pystencils/backend/platforms/sycl.py
@@ -56,7 +56,9 @@ class SyclPlatform(Platform):
         else:
             raise MaterializationError(f"Unknown type of iteration space: {ispace}")
 
-    def select_function(self, call: PsCall) -> PsExpression | tuple[tuple[PsStructuralNode, ...], PsAstNode]:
+    def select_function(
+        self, call: PsCall
+    ) -> PsExpression | tuple[tuple[PsStructuralNode, ...], PsAstNode]:
         assert isinstance(call.function, PsMathFunction)
 
         func = call.function.func
diff --git a/src/pystencils/backend/platforms/x86.py b/src/pystencils/backend/platforms/x86.py
index df0945006..add38cfe4 100644
--- a/src/pystencils/backend/platforms/x86.py
+++ b/src/pystencils/backend/platforms/x86.py
@@ -359,7 +359,11 @@ def _x86_op_intrin(
             atype = vtype.scalar_type
         case PsVecHorizontal():
             # horizontal add instead of sub avoids double inversion of sign
-            actual_op = ReductionOp.Add if op.reduction_op == ReductionOp.Sub else op.reduction_op
+            actual_op = (
+                ReductionOp.Add
+                if op.reduction_op == ReductionOp.Sub
+                else op.reduction_op
+            )
             opstr = f"horizontal_{actual_op.name.lower()}"
             rtype = vtype.scalar_type
             atypes = (vtype.scalar_type, vtype)
@@ -409,7 +413,9 @@ def _x86_op_intrin(
                 case (SInt(64), Fp()) | (
                     Fp(),
                     SInt(64),
-                ) if varch < X86VectorArch.AVX512:
+                ) if (
+                    varch < X86VectorArch.AVX512
+                ):
                     panic()
                 # AVX512 only: cvtepiA_epiT if A > T
                 case (SInt(a), SInt(t)) if a > t and varch < X86VectorArch.AVX512:
diff --git a/src/pystencils/backend/transformations/add_pragmas.py b/src/pystencils/backend/transformations/add_pragmas.py
index c9e8b3994..fa466e495 100644
--- a/src/pystencils/backend/transformations/add_pragmas.py
+++ b/src/pystencils/backend/transformations/add_pragmas.py
@@ -126,9 +126,13 @@ class AddOpenMP:
         if bool(ctx.symbols_reduction_info):
             for symbol, reduction_info in ctx.symbols_reduction_info.items():
                 if isinstance(symbol.dtype, PsScalarType):
-                    pragma_text += f" reduction({reduction_info.op.value}: {symbol.name})"
+                    pragma_text += (
+                        f" reduction({reduction_info.op.value}: {symbol.name})"
+                    )
                 else:
-                    NotImplementedError("OMP: Reductions for non-scalar data types are not supported yet.")
+                    NotImplementedError(
+                        "OMP: Reductions for non-scalar data types are not supported yet."
+                    )
 
         if collapse is not None:
             if collapse <= 0:
diff --git a/src/pystencils/backend/transformations/loop_vectorizer.py b/src/pystencils/backend/transformations/loop_vectorizer.py
index a96c6af4b..04d7d20f0 100644
--- a/src/pystencils/backend/transformations/loop_vectorizer.py
+++ b/src/pystencils/backend/transformations/loop_vectorizer.py
@@ -7,7 +7,13 @@ from ...types import PsVectorType, PsScalarType
 from ..kernelcreation import KernelCreationContext
 from ..constants import PsConstant
 from ..ast import PsAstNode
-from ..ast.structural import PsLoop, PsBlock, PsDeclaration, PsAssignment, PsStructuralNode
+from ..ast.structural import (
+    PsLoop,
+    PsBlock,
+    PsDeclaration,
+    PsAssignment,
+    PsStructuralNode,
+)
 from ..ast.expressions import PsExpression, PsTernary, PsGt, PsSymbolExpr
 from ..ast.vector import PsVecBroadcast, PsVecHorizontal
 from ..ast.analysis import collect_undefined_symbols
@@ -142,13 +148,25 @@ class LoopVectorizer:
             vector_symb = vc.vectorize_symbol(symb)
 
             # Declare and init vector
-            simd_init_local_reduction_vars += [PsDeclaration(
-                PsSymbolExpr(vector_symb), PsVecBroadcast(self._lanes, PsSymbolExpr(symb)))]
+            simd_init_local_reduction_vars += [
+                PsDeclaration(
+                    PsSymbolExpr(vector_symb),
+                    PsVecBroadcast(self._lanes, PsSymbolExpr(symb)),
+                )
+            ]
 
             # Write back vectorization result
-            simd_writeback_local_reduction_vars += [PsAssignment(
-                PsSymbolExpr(symb), PsVecHorizontal(self._lanes, PsSymbolExpr(symb), PsSymbolExpr(vector_symb),
-                                                    reduction_info.op))]
+            simd_writeback_local_reduction_vars += [
+                PsAssignment(
+                    PsSymbolExpr(symb),
+                    PsVecHorizontal(
+                        self._lanes,
+                        PsSymbolExpr(symb),
+                        PsSymbolExpr(vector_symb),
+                        reduction_info.op,
+                    ),
+                )
+            ]
 
         #   Generate vectorized loop body
         simd_body = self._vectorize_ast(loop.body, vc)
@@ -241,11 +259,7 @@ class LoopVectorizer:
 
                 return PsBlock(
                     simd_init_local_reduction_vars
-                    + [
-                        simd_stop_decl,
-                        simd_step_decl,
-                        simd_loop
-                    ]
+                    + [simd_stop_decl, simd_step_decl, simd_loop]
                     + simd_writeback_local_reduction_vars
                     + [
                         trailing_start_decl,
diff --git a/src/pystencils/backend/transformations/select_functions.py b/src/pystencils/backend/transformations/select_functions.py
index 576cebad1..9ce404693 100644
--- a/src/pystencils/backend/transformations/select_functions.py
+++ b/src/pystencils/backend/transformations/select_functions.py
@@ -21,7 +21,9 @@ class SelectFunctions:
 
         if isinstance(node, PsAssignment):
             rhs = node.rhs
-            if isinstance(rhs, PsCall) and isinstance(rhs.function, PsReductionFunction):
+            if isinstance(rhs, PsCall) and isinstance(
+                rhs.function, PsReductionFunction
+            ):
                 resolved_func = self._platform.select_function(rhs)
 
                 match resolved_func:
@@ -30,7 +32,9 @@ class SelectFunctions:
 
                         match new_rhs:
                             case PsExpression():
-                                return PsBlock(prepend + (PsAssignment(node.lhs, new_rhs),))
+                                return PsBlock(
+                                    prepend + (PsAssignment(node.lhs, new_rhs),)
+                                )
                             case PsStructuralNode():
                                 # special case: produces structural with atomic operation writing value back to ptr
                                 return PsBlock(prepend + (new_rhs,))
diff --git a/src/pystencils/backend/transformations/select_intrinsics.py b/src/pystencils/backend/transformations/select_intrinsics.py
index 49fb9bb08..b20614393 100644
--- a/src/pystencils/backend/transformations/select_intrinsics.py
+++ b/src/pystencils/backend/transformations/select_intrinsics.py
@@ -101,7 +101,9 @@ class SelectIntrinsics:
             if isinstance(expr, PsVecHorizontal):
                 scalar_op = expr.scalar_operand
                 vector_op_to_scalar = self.visit_expr(expr.vector_operand, sc)
-                return self._platform.op_intrinsic(expr, [scalar_op, vector_op_to_scalar])
+                return self._platform.op_intrinsic(
+                    expr, [scalar_op, vector_op_to_scalar]
+                )
             else:
                 return expr
 
diff --git a/src/pystencils/codegen/driver.py b/src/pystencils/codegen/driver.py
index 3962c316b..c285dd7bf 100644
--- a/src/pystencils/codegen/driver.py
+++ b/src/pystencils/codegen/driver.py
@@ -26,7 +26,13 @@ from ..types import PsIntegerType, PsScalarType
 from ..backend.memory import PsSymbol
 from ..backend.ast import PsAstNode
 from ..backend.functions import PsReductionFunction, ReductionFunctions
-from ..backend.ast.expressions import PsExpression, PsSymbolExpr, PsCall, PsMemAcc, PsConstantExpr
+from ..backend.ast.expressions import (
+    PsExpression,
+    PsSymbolExpr,
+    PsCall,
+    PsMemAcc,
+    PsConstantExpr,
+)
 from ..backend.ast.structural import PsBlock, PsLoop, PsDeclaration, PsAssignment
 from ..backend.ast.analysis import collect_undefined_symbols, collect_required_headers
 from ..backend.kernelcreation import (
@@ -191,12 +197,20 @@ class DefaultKernelCreationDriver:
             ptr_symbol_expr = typify(PsSymbolExpr(reduction_info.ptr_symbol))
             init_val = typify(reduction_info.init_val)
 
-            ptr_access = PsMemAcc(ptr_symbol_expr, PsConstantExpr(PsConstant(0, self._ctx.index_dtype)))
-            write_back_ptr = PsCall(PsReductionFunction(ReductionFunctions.WriteBackToPtr, reduction_info.op),
-                                    [ptr_symbol_expr, symbol_expr])
+            ptr_access = PsMemAcc(
+                ptr_symbol_expr, PsConstantExpr(PsConstant(0, self._ctx.index_dtype))
+            )
+            write_back_ptr = PsCall(
+                PsReductionFunction(
+                    ReductionFunctions.WriteBackToPtr, reduction_info.op
+                ),
+                [ptr_symbol_expr, symbol_expr],
+            )
 
-            prepend_ast = [PsDeclaration(symbol_expr, init_val)]     # declare and init local copy with neutral element
-            append_ast = [PsAssignment(ptr_access, write_back_ptr)]  # write back result to reduction target variable
+            # declare and init local copy with neutral element
+            prepend_ast = [PsDeclaration(symbol_expr, init_val)]
+            # write back result to reduction target variable
+            append_ast = [PsAssignment(ptr_access, write_back_ptr)]
 
             kernel_ast.statements = prepend_ast + kernel_ast.statements
             kernel_ast.statements += append_ast
@@ -423,14 +437,18 @@ class DefaultKernelCreationDriver:
 
         idx_scheme: GpuIndexingScheme = self._cfg.gpu.get_option("indexing_scheme")
         manual_launch_grid: bool = self._cfg.gpu.get_option("manual_launch_grid")
-        assume_warp_aligned_block_size: bool = self._cfg.gpu.get_option("assume_warp_aligned_block_size")
+        assume_warp_aligned_block_size: bool = self._cfg.gpu.get_option(
+            "assume_warp_aligned_block_size"
+        )
         warp_size: int | None = self._cfg.gpu.get_option("warp_size")
 
         if warp_size is None:
             warp_size = GpuOptions.default_warp_size(self._target)
 
         if warp_size is None and assume_warp_aligned_block_size:
-            warn("GPU warp size is unknown - ignoring assumption `assume_warp_aligned_block_size`.")
+            warn(
+                "GPU warp size is unknown - ignoring assumption `assume_warp_aligned_block_size`."
+            )
 
         return GpuIndexing(
             self._ctx,
@@ -475,7 +493,9 @@ class DefaultKernelCreationDriver:
                 else None
             )
 
-            assume_warp_aligned_block_size: bool = self._cfg.gpu.get_option("assume_warp_aligned_block_size")
+            assume_warp_aligned_block_size: bool = self._cfg.gpu.get_option(
+                "assume_warp_aligned_block_size"
+            )
             warp_size: int | None = self._cfg.gpu.get_option("warp_size")
 
             GpuPlatform: type
diff --git a/src/pystencils/compound_op_mapping.py b/src/pystencils/compound_op_mapping.py
index f256369f9..193b308d0 100644
--- a/src/pystencils/compound_op_mapping.py
+++ b/src/pystencils/compound_op_mapping.py
@@ -3,7 +3,12 @@ from .backend.exceptions import FreezeError
 from .backend.functions import PsMathFunction, MathFunctions
 from .sympyextensions.reduction import ReductionOp
 
-_available_operator_interface: set[ReductionOp] = {ReductionOp.Add, ReductionOp.Sub, ReductionOp.Mul, ReductionOp.Div}
+_available_operator_interface: set[ReductionOp] = {
+    ReductionOp.Add,
+    ReductionOp.Sub,
+    ReductionOp.Mul,
+    ReductionOp.Div,
+}
 
 
 def compound_op_to_expr(op: ReductionOp, op1, op2) -> PsExpression:
@@ -18,7 +23,9 @@ def compound_op_to_expr(op: ReductionOp, op1, op2) -> PsExpression:
             case ReductionOp.Div:
                 operator = PsDiv
             case _:
-                raise FreezeError(f"Found unsupported operation type for compound assignments: {op}.")
+                raise FreezeError(
+                    f"Found unsupported operation type for compound assignments: {op}."
+                )
         return operator(op1, op2)
     else:
         match op:
@@ -27,4 +34,6 @@ def compound_op_to_expr(op: ReductionOp, op1, op2) -> PsExpression:
             case ReductionOp.Max:
                 return PsCall(PsMathFunction(MathFunctions.Max), [op1, op2])
             case _:
-                raise FreezeError(f"Found unsupported operation type for compound assignments: {op}.")
+                raise FreezeError(
+                    f"Found unsupported operation type for compound assignments: {op}."
+                )
diff --git a/src/pystencils/jit/cpu_extension_module.py b/src/pystencils/jit/cpu_extension_module.py
index 03260f649..4d76ea9ca 100644
--- a/src/pystencils/jit/cpu_extension_module.py
+++ b/src/pystencils/jit/cpu_extension_module.py
@@ -92,6 +92,7 @@ class PsKernelExtensioNModule:
 
         #   Kernels and call wrappers
         from ..backend.emission import CAstPrinter
+
         printer = CAstPrinter(func_prefix="FUNC_PREFIX")
 
         for name, kernel in self._kernels.items():
@@ -293,7 +294,9 @@ if( !kwargs || !PyDict_Check(kwargs) ) {{
             self._buffer_types[ptr] = ptr_dtype.base_type
             self.extract_buffer(ptr, param.name)
             buffer = self.get_buffer(param.name)
-            code = f"{param.dtype.c_string()} {param.name} = ({param.dtype}) {buffer}.buf;"
+            code = (
+                f"{param.dtype.c_string()} {param.name} = ({param.dtype}) {buffer}.buf;"
+            )
 
             assert code is not None
 
diff --git a/src/pystencils/sympyextensions/__init__.py b/src/pystencils/sympyextensions/__init__.py
index 71f9a049a..bd0fa1fe9 100644
--- a/src/pystencils/sympyextensions/__init__.py
+++ b/src/pystencils/sympyextensions/__init__.py
@@ -28,7 +28,7 @@ from .math import (
     count_operations_in_ast,
     common_denominator,
     get_symmetric_part,
-    SymbolCreator
+    SymbolCreator,
 )
 
 
@@ -67,5 +67,5 @@ __all__ = [
     "common_denominator",
     "get_symmetric_part",
     "SymbolCreator",
-    "DynamicType"
+    "DynamicType",
 ]
diff --git a/src/pystencils/sympyextensions/reduction.py b/src/pystencils/sympyextensions/reduction.py
index cebfcb2f7..e95e37c24 100644
--- a/src/pystencils/sympyextensions/reduction.py
+++ b/src/pystencils/sympyextensions/reduction.py
@@ -22,6 +22,7 @@ class ReductionAssignment(AssignmentBase):
     reduction_op : ReductionOp
        Enum for binary operation being applied in the assignment, such as "Add" for "+", "Sub" for "-", etc.
     """
+
     _reduction_op = None  # type: ReductionOp
 
     @property
@@ -55,9 +56,13 @@ class MaxReductionAssignment(ReductionAssignment):
 
 # Mapping from ReductionOp enum to ReductionAssigment classes
 _reduction_assignment_classes = {
-    cls.reduction_op: cls for cls in [
-        AddReductionAssignment, SubReductionAssignment, MulReductionAssignment,
-        MinReductionAssignment, MaxReductionAssignment
+    cls.reduction_op: cls
+    for cls in [
+        AddReductionAssignment,
+        SubReductionAssignment,
+        MulReductionAssignment,
+        MinReductionAssignment,
+        MaxReductionAssignment,
     ]
 }
 
diff --git a/tests/kernelcreation/test_reduction.py b/tests/kernelcreation/test_reduction.py
index c3775964b..6e2b2f3fe 100644
--- a/tests/kernelcreation/test_reduction.py
+++ b/tests/kernelcreation/test_reduction.py
@@ -10,7 +10,7 @@ SIZE = 15
 SOLUTION = {
     "+": INIT_W + INIT_ARR * SIZE,
     "-": INIT_W - INIT_ARR * SIZE,
-    "*": INIT_W * INIT_ARR ** SIZE,
+    "*": INIT_W * INIT_ARR**SIZE,
     "min": min(INIT_W, INIT_ARR),
     "max": max(INIT_W, INIT_ARR),
 }
@@ -18,7 +18,7 @@ SOLUTION = {
 
 # get AST for kernel with reduction assignment
 def get_reduction_assign_ast(dtype, op, config):
-    x = ps.fields(f'x: {dtype}[1d]')
+    x = ps.fields(f"x: {dtype}[1d]")
     w = ps.TypedSymbol("w", dtype)
 
     red_assign = reduction_assignment_from_str(w, op, x.center())
@@ -26,13 +26,18 @@ def get_reduction_assign_ast(dtype, op, config):
     return ps.create_kernel([red_assign], config, default_dtype=dtype)
 
 
-@pytest.mark.parametrize('instruction_set', ['sse', 'avx'])
-@pytest.mark.parametrize('dtype', ["float64", "float32"])
+@pytest.mark.parametrize("instruction_set", ["sse", "avx"])
+@pytest.mark.parametrize("dtype", ["float64", "float32"])
 @pytest.mark.parametrize("op", ["+", "-", "*", "min", "max"])
 def test_reduction_cpu(instruction_set, dtype, op):
-    vectorize_info = {'instruction_set': instruction_set, 'assume_inner_stride_one': True}
+    vectorize_info = {
+        "instruction_set": instruction_set,
+        "assume_inner_stride_one": True,
+    }
 
-    config = ps.CreateKernelConfig(target=ps.Target.CPU, cpu_openmp=True, cpu_vectorize_info=vectorize_info)
+    config = ps.CreateKernelConfig(
+        target=ps.Target.CPU, cpu_openmp=True, cpu_vectorize_info=vectorize_info
+    )
 
     ast_reduction = get_reduction_assign_ast(dtype, op, config)
     ps.show_code(ast_reduction)
@@ -45,7 +50,7 @@ def test_reduction_cpu(instruction_set, dtype, op):
     assert np.allclose(reduction_array, SOLUTION[op])
 
 
-@pytest.mark.parametrize('dtype', ["float64", "float32"])
+@pytest.mark.parametrize("dtype", ["float64", "float32"])
 @pytest.mark.parametrize("op", ["+", "-", "*", "min", "max"])
 def test_reduction_gpu(dtype, op):
     try:
@@ -57,7 +62,9 @@ def test_reduction_gpu(dtype, op):
     except ImportError:
         pytest.skip(reason="CuPy is not available", allow_module_level=True)
     except CUDARuntimeError:
-        pytest.skip(reason="No CUDA capable device is detected", allow_module_level=True)
+        pytest.skip(
+            reason="No CUDA capable device is detected", allow_module_level=True
+        )
 
     config = ps.CreateKernelConfig(target=ps.Target.GPU)
 
-- 
GitLab