diff --git a/generate_all_operators.py b/generate_all_operators.py index a3a4bf0a71117e8ea8d7c01d0d3afb1f5351f74d..2f81aeefce7ebb8c46278109dcb12d4b29bb5e3b 100644 --- a/generate_all_operators.py +++ b/generate_all_operators.py @@ -53,10 +53,10 @@ from hog.function_space import ( ) from hog.logger import get_logger, TimedLogger from hog.operator_generation.kernel_types import ( - Apply, - Assemble, - AssembleDiagonal, - KernelType, + ApplyWrapper, + AssembleWrapper, + AssembleDiagonalWrapper, + KernelWrapperType, ) from hog.operator_generation.loop_strategies import ( LoopStrategy, @@ -354,7 +354,7 @@ def main(): args.quad_degree if args.quad_rule is None else args.quad_rule, ) } - + enabled_geometries: Set[TriangleElement | TetrahedronElement] = set() if 2 in args.dimensions: enabled_geometries.add(TriangleElement()) @@ -488,7 +488,7 @@ class OperatorInfo: sp.Matrix, ] type_descriptor: HOGType - kernel_types: List[KernelType] = None # type: ignore[assignment] # will definitely be initialized in __post_init__ + kernel_types: List[KernelWrapperType] = None # type: ignore[assignment] # will definitely be initialized in __post_init__ geometries: Sequence[ElementGeometry] = field( default_factory=lambda: [TriangleElement(), TetrahedronElement()] ) @@ -507,7 +507,7 @@ class OperatorInfo: if self.kernel_types is None: dims = [g.dimensions for g in self.geometries] self.kernel_types = [ - Apply( + ApplyWrapper( self.test_space, self.trial_space, type_descriptor=self.type_descriptor, @@ -518,7 +518,7 @@ class OperatorInfo: all_opts = set().union(*[o for (o, _, _) in self.opts]) if not ({Opts.VECTORIZE, Opts.VECTORIZE512}.intersection(all_opts)): self.kernel_types.append( - Assemble( + AssembleWrapper( self.test_space, self.trial_space, type_descriptor=self.type_descriptor, @@ -528,8 +528,10 @@ class OperatorInfo: if self.test_space == self.trial_space: self.kernel_types.append( - AssembleDiagonal( - self.test_space, type_descriptor=self.type_descriptor, dims=dims + AssembleDiagonalWrapper( + self.test_space, + type_descriptor=self.type_descriptor, + dims=dims, ) ) @@ -591,7 +593,7 @@ def all_operators( ops.append(OperatorInfo(mapping="P2", name="SUPGDiffusion", trial_space=P2, test_space=P2, form=partial(supg_diffusion, velocity_function_space=P2, diffusivityXdelta_function_space=P2), type_descriptor=type_descriptor, geometries=list(geometries), opts=opts, blending=blending)) - + # fmt: on p2vec_epsilon = partial( @@ -703,7 +705,7 @@ def generate_elementwise_op( name, symbolizer, opts=optimizations, - kernel_types=op_info.kernel_types, + kernel_wrapper_types=op_info.kernel_types, type_descriptor=type_descriptor, ) @@ -725,19 +727,20 @@ def generate_elementwise_op( blending=blending, # type: ignore[call-arg] # kw-args are not supported by Callable ) - operator.set_element_matrix( + operator.add_integral( + name="".join(name.split()), dim=geometry.dimensions, geometry=geometry, integration_domain=MacroIntegrationDomain.VOLUME, quad=quad, blending=blending, form=form, + loop_strategy=loop_strategy, ) dir_path = os.path.join(args.output, op_info.name.split("_")[0]) operator.generate_class_code( dir_path, - loop_strategy=loop_strategy, clang_format_binary=args.clang_format_binary, ) diff --git a/hog/operator_generation/kernel_types.py b/hog/operator_generation/kernel_types.py index 778077cd48eff87879dc4bccebc868293eefa71d..9050623c9d65c5a87c88ce944bf9e01fbc615d87 100644 --- a/hog/operator_generation/kernel_types.py +++ b/hog/operator_generation/kernel_types.py @@ -42,16 +42,54 @@ from hog.operator_generation.function_space_impls import FunctionSpaceImpl from hog.operator_generation.indexing import FaceType, CellType from hog.operator_generation.pystencils_extensions import create_generic_fields from hog.operator_generation.types import HOGPrecision, HOGType, hyteg_type +from hog.operator_generation.loop_strategies import LoopStrategy, SAWTOOTH class KernelType(ABC): - name: str - src_fields: List[FunctionSpaceImpl] - dst_fields: List[FunctionSpaceImpl] - _template: Template - - """Type to specify the type of kernel. - E.g. a GEMv kernel defines multiple source fields and scalar parameters.""" + """ + A HyTeG operator may implement multiple types of methods that perform some kind of operation on a function that + is inferred from the bilinear form. For instance, a (matrix-free) matrix-vector multiplication or the assembly + of its diagonal. + + Certain operations such as setting boundary data to zero or communication have to be executed before the actual + kernel can be executed. + + E.g.: + + ``` + void apply() { + communication() // "pre-processing" + kernel() // actual kernel + post_communication() // "post-processing" + } + ``` + + For some applications, it might be necessary or at least comfortable to execute multiple kernels inside such a + method. For instance, if boundary conditions are applied: + + ``` + // form is sum of volume and boundary integral: + // ∫ ... dΩ + ∫ ... dS + void assemble() { + communication() // "pre-processing" + kernel() // volume kernel (∫ ... dΩ) + kernel_boundary() // boundary kernel (∫ ... dS | additive execution) + post_communication() // "post-processing" + } + ``` + + This class (KernelType) describes the "action" of the kernel (matvec, assembly, ...). + Another class (KernelWrapperType) then describes what kind of pre- and post-processing is required. + + ``` + void assemble() { // from KernelTypeWrapper + communication() // from KernelTypeWrapper + kernel() // from KernelType + IntegrationInfo 1 + kernel_boundary() // from KernelType + IntegrationInfo 2 + post_communication() // from KernelTypeWrapper + } + ``` + """ @abstractmethod def kernel_operation( @@ -90,27 +128,243 @@ class KernelType(ABC): """ ... + +class Apply(KernelType): + def __init__(self): + self.result_prefix = "elMatVec_" + + def kernel_operation( + self, + src_vecs: List[sp.MatrixBase], + dst_vecs: List[sp.MatrixBase], + mat: sp.MatrixBase, + rows: int, + ) -> List[SympyAssignment]: + kernel_ops = mat * src_vecs[0] + + tmp_symbols = sp.numbered_symbols(self.result_prefix) + + kernel_op_assignments = [ + SympyAssignment(tmp, kernel_op) + for tmp, kernel_op in zip(tmp_symbols, kernel_ops) + ] + + return kernel_op_assignments + + def kernel_post_operation( + self, + geometry: ElementGeometry, + element_index: Tuple[int, int, int], + element_type: Union[FaceType, CellType], + src_vecs_accesses: List[List[Field.Access]], + dst_vecs_accesses: List[List[Field.Access]], + ) -> List[ps.astnodes.Node]: + tmp_symbols = sp.numbered_symbols(self.result_prefix) + + # Add and store result to destination. + store_dst_vecs = [ + SympyAssignment(a, a + s) for a, s in zip(dst_vecs_accesses[0], tmp_symbols) + ] + return store_dst_vecs + + +class AssembleDiagonal(KernelType): + def __init__(self): + self.result_prefix = "elMatDiag_" + + def kernel_operation( + self, + src_vecs: List[sp.MatrixBase], + dst_vecs: List[sp.MatrixBase], + mat: sp.MatrixBase, + rows: int, + ) -> List[SympyAssignment]: + kernel_ops = mat.diagonal() + + tmp_symbols = sp.numbered_symbols(self.result_prefix) + + kernel_op_assignments = [ + SympyAssignment(tmp, kernel_op) + for tmp, kernel_op in zip(tmp_symbols, kernel_ops) + ] + + return kernel_op_assignments + + def kernel_post_operation( + self, + geometry: ElementGeometry, + element_index: Tuple[int, int, int], + element_type: Union[FaceType, CellType], + src_vecs_accesses: List[List[Field.Access]], + dst_vecs_accesses: List[List[Field.Access]], + ) -> List[ps.astnodes.Node]: + tmp_symbols = sp.numbered_symbols(self.result_prefix) + + # Add and store result to destination. + store_dst_vecs = [ + SympyAssignment(a, a + s) for a, s in zip(dst_vecs_accesses[0], tmp_symbols) + ] + return store_dst_vecs + + +class Assemble(KernelType): + def __init__( + self, + src_space: FunctionSpace, + dst_space: FunctionSpace, + ): + self.result_prefix = "elMat_" + idx_t = HOGType("idx_t", np.int64) + + self.src: FunctionSpaceImpl = FunctionSpaceImpl.create_impl( + src_space, "src", idx_t + ) + self.dst: FunctionSpaceImpl = FunctionSpaceImpl.create_impl( + dst_space, "dst", idx_t + ) + + def kernel_operation( + self, + src_vecs: List[sp.MatrixBase], + dst_vecs: List[sp.MatrixBase], + mat: sp.MatrixBase, + rows: int, + ) -> List[SympyAssignment]: + return [ + SympyAssignment(sp.Symbol(f"{self.result_prefix}{r}_{c}"), mat[r, c]) + for r in range(mat.shape[0]) + for c in range(mat.shape[1]) + ] + + def kernel_post_operation( + self, + geometry: ElementGeometry, + element_index: Tuple[int, int, int], + element_type: Union[FaceType, CellType], + src_vecs_accesses: List[List[Field.Access]], + dst_vecs_accesses: List[List[Field.Access]], + ) -> List[ps.astnodes.Node]: + src, dst = dst_vecs_accesses + el_mat = sp.Matrix( + [ + [sp.Symbol(f"{self.result_prefix}{r}_{c}") for c in range(len(src))] + for r in range(len(dst)) + ] + ) + + # apply basis/dof transformations + transform_src_code, transform_src_mat = self.src.dof_transformation( + geometry, element_index, element_type + ) + transform_dst_code, transform_dst_mat = self.dst.dof_transformation( + geometry, element_index, element_type + ) + transformed_el_mat = transform_dst_mat.T * el_mat * transform_src_mat + + nr = len(dst) + nc = len(src) + mat_size = nr * nc + + rowIdx, colIdx = create_generic_fields(["rowIdx", "colIdx"], dtype=np.uint64) + # NOTE: The type is 'hyteg_type().pystencils_type', i.e. `real_t` + # (not `self._type_descriptor.pystencils_type`) + # because this function is implemented manually in HyTeG with + # this signature. Passing `np.float64` is not ideal (if `real_t != + # double`) but it makes sure that casts are inserted if necessary + # (though some might be superfluous). + mat = create_generic_fields( + ["mat"], dtype=hyteg_type(HOGPrecision.REAL_T).pystencils_type + )[0] + rowIdxSymb = FieldPointerSymbol(rowIdx.name, rowIdx.dtype, False) + colIdxSymb = FieldPointerSymbol(colIdx.name, colIdx.dtype, False) + matSymb = FieldPointerSymbol(mat.name, mat.dtype, False) + + body: List[ps.astnodes.Node] = [ + CustomCodeNode( + f"std::vector< uint_t > {rowIdxSymb.name}( {nr} );\n" + f"std::vector< uint_t > {colIdxSymb.name}( {nc} );\n" + f"std::vector< {mat.dtype} > {matSymb.name}( {mat_size} );", + [], + [rowIdxSymb, colIdxSymb, matSymb], + ), + ps.astnodes.EmptyLine(), + ] + body += [ + SympyAssignment( + rowIdx.absolute_access((k,), (0,)), CastFunc(dst_access, np.uint64) + ) + for k, dst_access in enumerate(dst) + ] + body += [ + SympyAssignment( + colIdx.absolute_access((k,), (0,)), CastFunc(src_access, np.uint64) + ) + for k, src_access in enumerate(src) + ] + + body += [ + ps.astnodes.EmptyLine(), + ps.astnodes.SourceCodeComment("Apply basis transformation"), + *sorted({transform_src_code, transform_dst_code}, key=lambda n: n._code), + ps.astnodes.EmptyLine(), + ] + + body += [ + SympyAssignment( + mat.absolute_access((r * nc + c,), (0,)), + CastFunc(transformed_el_mat[r, c], mat.dtype), + ) + for r in range(nr) + for c in range(nc) + ] + + body += [ + ps.astnodes.EmptyLine(), + CustomCodeNode( + f"mat->addValues( {rowIdxSymb.name}, {colIdxSymb.name}, {matSymb.name} );", + [ + TypedSymbol("mat", "std::shared_ptr< SparseMatrixProxy >"), + rowIdxSymb, + colIdxSymb, + matSymb, + ], + [], + ), + ] + + return body + + +class KernelWrapperType(ABC): + name: str + src_fields: List[FunctionSpaceImpl] + dst_fields: List[FunctionSpaceImpl] + _template: Template + """ + See documentation of class KernelType. + """ + + @property @abstractmethod - def includes(self) -> Set[str]: - ... + def kernel_type(self) -> KernelType: ... @abstractmethod - def base_classes(self) -> List[str]: - ... + def includes(self) -> Set[str]: ... @abstractmethod - def wrapper_methods(self) -> List[CppMethod]: - ... + def base_classes(self) -> List[str]: ... @abstractmethod - def member_variables(self) -> List[CppMemberVariable]: - ... + def wrapper_methods(self) -> List[CppMethod]: ... + + @abstractmethod + def member_variables(self) -> List[CppMemberVariable]: ... def substitute(self, subs: Mapping[str, object]) -> None: self._template = Template(self._template.safe_substitute(subs)) -class Apply(KernelType): +class ApplyWrapper(KernelWrapperType): def __init__( self, src_space: FunctionSpace, @@ -224,39 +478,9 @@ class Apply(KernelType): f'this->stopTiming( "{self.name}" );' ) - def kernel_operation( - self, - src_vecs: List[sp.MatrixBase], - dst_vecs: List[sp.MatrixBase], - mat: sp.MatrixBase, - rows: int, - ) -> List[SympyAssignment]: - kernel_ops = mat * src_vecs[0] - - tmp_symbols = sp.numbered_symbols(self.result_prefix) - - kernel_op_assignments = [ - SympyAssignment(tmp, kernel_op) - for tmp, kernel_op in zip(tmp_symbols, kernel_ops) - ] - - return kernel_op_assignments - - def kernel_post_operation( - self, - geometry: ElementGeometry, - element_index: Tuple[int, int, int], - element_type: Union[FaceType, CellType], - src_vecs_accesses: List[List[Field.Access]], - dst_vecs_accesses: List[List[Field.Access]], - ) -> List[ps.astnodes.Node]: - tmp_symbols = sp.numbered_symbols(self.result_prefix) - - # Add and store result to destination. - store_dst_vecs = [ - SympyAssignment(a, a + s) for a, s in zip(dst_vecs_accesses[0], tmp_symbols) - ] - return store_dst_vecs + @property + def kernel_type(self) -> KernelType: + return Apply() def includes(self) -> Set[str]: return ( @@ -302,7 +526,7 @@ class Apply(KernelType): return [] -class GEMV(KernelType): +class GEMVWrapper(KernelWrapperType): def __init__( self, type_descriptor: HOGType, @@ -325,7 +549,7 @@ class GEMV(KernelType): raise HOGException("Not implemented") -class AssembleDiagonal(KernelType): +class AssembleDiagonalWrapper(KernelWrapperType): def __init__( self, fe_space: FunctionSpace, @@ -340,7 +564,6 @@ class AssembleDiagonal(KernelType): self.src_fields = [] self.dst_fields = [self.dst] self.dims = dims - self.result_prefix = "elMatDiag_" def macro_loop(dim: int) -> str: Macro = {2: "Face", 3: "Cell"}[dim] @@ -413,39 +636,9 @@ class AssembleDiagonal(KernelType): f'this->stopTiming( "{self.name}" );' ) - def kernel_operation( - self, - src_vecs: List[sp.MatrixBase], - dst_vecs: List[sp.MatrixBase], - mat: sp.MatrixBase, - rows: int, - ) -> List[SympyAssignment]: - kernel_ops = mat.diagonal() - - tmp_symbols = sp.numbered_symbols(self.result_prefix) - - kernel_op_assignments = [ - SympyAssignment(tmp, kernel_op) - for tmp, kernel_op in zip(tmp_symbols, kernel_ops) - ] - - return kernel_op_assignments - - def kernel_post_operation( - self, - geometry: ElementGeometry, - element_index: Tuple[int, int, int], - element_type: Union[FaceType, CellType], - src_vecs_accesses: List[List[Field.Access]], - dst_vecs_accesses: List[List[Field.Access]], - ) -> List[ps.astnodes.Node]: - tmp_symbols = sp.numbered_symbols(self.result_prefix) - - # Add and store result to destination. - store_dst_vecs = [ - SympyAssignment(a, a + s) for a, s in zip(dst_vecs_accesses[0], tmp_symbols) - ] - return store_dst_vecs + @property + def kernel_type(self) -> KernelType: + return AssembleDiagonal() def includes(self) -> Set[str]: return {"hyteg/solvers/Smoothables.hpp"} | self.dst.includes() @@ -483,7 +676,7 @@ class AssembleDiagonal(KernelType): ] -class Assemble(KernelType): +class AssembleWrapper(KernelWrapperType): def __init__( self, src_space: FunctionSpace, @@ -508,7 +701,6 @@ class Assemble(KernelType): self.type_descriptor = type_descriptor self.dims = dims - self.result_prefix = "elMat_" def macro_loop(dim: int) -> str: Macro = {2: "Face", 3: "Cell"}[dim] @@ -563,116 +755,9 @@ class Assemble(KernelType): f'this->stopTiming( "{self.name}" );' ) - def kernel_operation( - self, - src_vecs: List[sp.MatrixBase], - dst_vecs: List[sp.MatrixBase], - mat: sp.MatrixBase, - rows: int, - ) -> List[SympyAssignment]: - return [ - SympyAssignment(sp.Symbol(f"{self.result_prefix}{r}_{c}"), mat[r, c]) - for r in range(mat.shape[0]) - for c in range(mat.shape[1]) - ] - - def kernel_post_operation( - self, - geometry: ElementGeometry, - element_index: Tuple[int, int, int], - element_type: Union[FaceType, CellType], - src_vecs_accesses: List[List[Field.Access]], - dst_vecs_accesses: List[List[Field.Access]], - ) -> List[ps.astnodes.Node]: - src, dst = dst_vecs_accesses - el_mat = sp.Matrix( - [ - [sp.Symbol(f"{self.result_prefix}{r}_{c}") for c in range(len(src))] - for r in range(len(dst)) - ] - ) - - # apply basis/dof transformations - transform_src_code, transform_src_mat = self.src.dof_transformation( - geometry, element_index, element_type - ) - transform_dst_code, transform_dst_mat = self.dst.dof_transformation( - geometry, element_index, element_type - ) - transformed_el_mat = transform_dst_mat.T * el_mat * transform_src_mat - - nr = len(dst) - nc = len(src) - mat_size = nr * nc - - rowIdx, colIdx = create_generic_fields(["rowIdx", "colIdx"], dtype=np.uint64) - # NOTE: The type is 'hyteg_type().pystencils_type', i.e. `real_t` - # (not `self._type_descriptor.pystencils_type`) - # because this function is implemented manually in HyTeG with - # this signature. Passing `np.float64` is not ideal (if `real_t != - # double`) but it makes sure that casts are inserted if necessary - # (though some might be superfluous). - mat = create_generic_fields( - ["mat"], dtype=hyteg_type(HOGPrecision.REAL_T).pystencils_type - )[0] - rowIdxSymb = FieldPointerSymbol(rowIdx.name, rowIdx.dtype, False) - colIdxSymb = FieldPointerSymbol(colIdx.name, colIdx.dtype, False) - matSymb = FieldPointerSymbol(mat.name, mat.dtype, False) - - body: List[ps.astnodes.Node] = [ - CustomCodeNode( - f"std::vector< uint_t > {rowIdxSymb.name}( {nr} );\n" - f"std::vector< uint_t > {colIdxSymb.name}( {nc} );\n" - f"std::vector< {mat.dtype} > {matSymb.name}( {mat_size} );", - [], - [rowIdxSymb, colIdxSymb, matSymb], - ), - ps.astnodes.EmptyLine(), - ] - body += [ - SympyAssignment( - rowIdx.absolute_access((k,), (0,)), CastFunc(dst_access, np.uint64) - ) - for k, dst_access in enumerate(dst) - ] - body += [ - SympyAssignment( - colIdx.absolute_access((k,), (0,)), CastFunc(src_access, np.uint64) - ) - for k, src_access in enumerate(src) - ] - - body += [ - ps.astnodes.EmptyLine(), - ps.astnodes.SourceCodeComment("Apply basis transformation"), - *sorted({transform_src_code, transform_dst_code}, key=lambda n: n._code), - ps.astnodes.EmptyLine(), - ] - - body += [ - SympyAssignment( - mat.absolute_access((r * nc + c,), (0,)), - CastFunc(transformed_el_mat[r, c], mat.dtype), - ) - for r in range(nr) - for c in range(nc) - ] - - body += [ - ps.astnodes.EmptyLine(), - CustomCodeNode( - f"mat->addValues( {rowIdxSymb.name}, {colIdxSymb.name}, {matSymb.name} );", - [ - TypedSymbol("mat", "std::shared_ptr< SparseMatrixProxy >"), - rowIdxSymb, - colIdxSymb, - matSymb, - ], - [], - ), - ] - - return body + @property + def kernel_type(self) -> KernelType: + return Assemble(self.src.fe_space, self.dst.fe_space) def includes(self) -> Set[str]: return ( diff --git a/hog/operator_generation/operators.py b/hog/operator_generation/operators.py index 34b28661fe21af58c4e2168cab54a6680308f009..2eb16fc268b28e80f33601fe340d2b5fc87eba76 100644 --- a/hog/operator_generation/operators.py +++ b/hog/operator_generation/operators.py @@ -83,8 +83,8 @@ from hog.operator_generation.indexing import ( element_vertex_coordinates, IndexingInfo, ) -from hog.operator_generation.kernel_types import Assemble, KernelType -from hog.operator_generation.loop_strategies import LoopStrategy, SAWTOOTH +from hog.operator_generation.kernel_types import KernelWrapperType, KernelType, Assemble +from hog.operator_generation.loop_strategies import LoopStrategy, SAWTOOTH, CUBES from hog.operator_generation.optimizer import Optimizer, Opts from hog.quadrature import QuadLoop, Quadrature from hog.symbolizer import Symbolizer @@ -100,6 +100,8 @@ class MacroIntegrationDomain(Enum): @dataclass class IntegrationInfo: + """Data associated with one integral term and the corresponding loop pattern.""" + geometry: ElementGeometry # geometry of the element, e.g. tetrahedron integration_domain: ( MacroIntegrationDomain # entity of geometry to integrate over, e.g. facet @@ -111,22 +113,27 @@ class IntegrationInfo: quad_loop: Optional[QuadLoop] mat: sp.MatrixBase + loop_strategy: LoopStrategy + + name: str = "unknown_integral" docstring: str = "" def _str_(self): - return f"Integration Info: {self.geometry}, {self.integration_domain}, mat shape {self.mat.shape}, quad degree {self.quad.degree}, blending {self.blending}" + return f"Integration Info: {self.name}, {self.geometry}, {self.integration_domain}, mat shape {self.mat.shape}, quad degree {self.quad.degree}, blending {self.blending}" def _repr_(self): return str(self) @dataclass -class Kernel: - kernel_type: KernelType - kernel_function: KernelFunction - platform_dependent_funcs: Dict[str, KernelFunction] - operation_count: str - integration_info: IntegrationInfo +class OperatorMethod: + """Collection of kernels and metadata required for each method of an operator.""" + + kernel_wrapper_type: KernelWrapperType + kernel_functions: List[KernelFunction] + platform_dependent_funcs: List[Dict[str, KernelFunction]] + operation_counts: List[str] + integration_infos: List[IntegrationInfo] class CppClassFiles(Enum): @@ -163,16 +170,16 @@ class HyTeGElementwiseOperator: self, name: str, symbolizer: Symbolizer, - kernel_types: List[KernelType], + kernel_wrapper_types: List[KernelWrapperType], opts: Set[Opts], type_descriptor: HOGType, ): self.name = name self.symbolizer = symbolizer - self.kernel_types = kernel_types # type of kernel: e.g. GEMV + self.kernel_wrapper_types = kernel_wrapper_types # type of kernel: e.g. GEMV - # Holds one element matrix and quad scheme for each ElementGeometry. - self.element_matrices: Dict[int, IntegrationInfo] = {} + # Each IntegrationInfo object represents one integral of the weak formulation. + self.integration_infos: Dict[int, List[IntegrationInfo]] = {} self._optimizer = Optimizer(opts) self._optimizer_no_vec = Optimizer(opts - {Opts.VECTORIZE, Opts.VECTORIZE512}) @@ -183,30 +190,40 @@ class HyTeGElementwiseOperator: # coefficients self.coeffs: Dict[str, FunctionSpaceImpl] = {} # implementations for each kernel, generated at a later stage - self.kernels: List[Kernel] = [] + self.operator_methods: List[OperatorMethod] = [] - def set_element_matrix( + def add_integral( self, + name: str, dim: int, geometry: ElementGeometry, integration_domain: MacroIntegrationDomain, quad: Quadrature, blending: GeometryMap, form: Form, + loop_strategy: LoopStrategy, ) -> None: """ - Use this method to add element matrices to the operator. + Use this method to add integrals to the operator. + :param name: some name for this integral (no spaces please) :param dim: the dimensionality of the domain, i.e. the volume - it may be that this does not match the geometry of the element, e.g., when facet integrals are required, note that only one routine per dim is created :param geometry: geometry that shall be integrated over :param integration_domain: where to integrate - see MacroIntegrationDomain - :param mat: the local element matrix :param quad: the employed quadrature scheme - should match what has been used to integrate the weak form :param blending: the same geometry map that has been passed to the form + :param form: the integrand + :param loop_strategy: loop pattern over the refined macro-volume - must somehow be compatible with the + integration domain """ + if "".join(name.split()) != name: + raise HOGException( + "Please give the integral an identifier without white space." + ) + if dim not in [2, 3]: raise HOGException("Only supporting 2D and 3D. Dim should be in [2, 3]") @@ -215,11 +232,10 @@ class HyTeGElementwiseOperator: if dim != geometry.dimensions: raise HOGException("Only volume integrals supported as of now.") - if dim in self.element_matrices: - raise HOGException( - "You are trying to overwrite an already specified integration routine by calling this method twice with " - "the same dim argument." - ) + if integration_domain == MacroIntegrationDomain.VOLUME and not ( + isinstance(loop_strategy, SAWTOOTH) or isinstance(loop_strategy, CUBES) + ): + raise HOGException("Invalid loop strategy for volume integrals.") tables = [] quad_loop = None @@ -256,15 +272,22 @@ class HyTeGElementwiseOperator: mat[row, col], self.symbolizer, blending ) - self.element_matrices[dim] = IntegrationInfo( - geometry=geometry, - integration_domain=integration_domain, - quad=quad, - blending=blending, - tables=tables, - quad_loop=quad_loop, - mat=mat, - docstring=form.docstring, + if dim not in self.integration_infos: + self.integration_infos[dim] = [] + + self.integration_infos[dim].append( + IntegrationInfo( + name=name, + geometry=geometry, + integration_domain=integration_domain, + quad=quad, + blending=blending, + tables=tables, + quad_loop=quad_loop, + mat=mat, + docstring=form.docstring, + loop_strategy=loop_strategy, + ) ) def coefficients(self) -> List[FunctionSpaceImpl]: @@ -282,7 +305,6 @@ class HyTeGElementwiseOperator: def generate_class_code( self, dir_path: str, - loop_strategy: LoopStrategy = SAWTOOTH(), class_files: CppClassFiles = CppClassFiles.HEADER_AND_IMPL, clang_format_binary: Optional[str] = None, ) -> None: @@ -297,29 +319,30 @@ class HyTeGElementwiseOperator: """ # Asking the optimizer if optimizations are valid. - self._optimizer.check_opts_validity(loop_strategy) + self._optimizer.check_opts_validity() # Generate each kernel type (apply, gemv, ...). - self.generate_kernels(loop_strategy) + self.generate_kernels() # Setting up the final C++ class. operator_cpp_class = CppClass( name=self.name, base_classes=sorted( - {base for kt in self.kernel_types for base in kt.base_classes()} + {base for kt in self.kernel_wrapper_types for base in kt.base_classes()} ), ) # Adding form docstring to C++ class form_docstrings = set() - for d, io in self.element_matrices.items(): - form_docstrings.add(io.docstring) + for d, ios in self.integration_infos.items(): + for io in ios: + form_docstrings.add(io.docstring) for form_docstring in form_docstrings: form_docstring_with_slashes = "/// ".join(form_docstring.splitlines(True)) operator_cpp_class.add(CppComment(form_docstring_with_slashes, where="all")) - for kernel_type in self.kernel_types: + for kernel_wrapper_type in self.kernel_wrapper_types: # Setting up communication. - kernel_type.substitute( + kernel_wrapper_type.substitute( { "comm_fe_functions_2D": "\n".join( coeff.pre_communication(2) for coeff in self.coefficients() @@ -331,48 +354,79 @@ class HyTeGElementwiseOperator: ) # Wrapper methods ("hand-crafted") - for kernel_wrapper_cpp_method in kernel_type.wrapper_methods(): + for kernel_wrapper_cpp_method in kernel_wrapper_type.wrapper_methods(): operator_cpp_class.add(kernel_wrapper_cpp_method) # Member variables - for member in kernel_type.member_variables(): + for member in kernel_wrapper_type.member_variables(): operator_cpp_class.add(member) # Add all kernels to the class. - for kernel in self.kernels: - kernel_op_count = "\n".join( - [ - f"Kernel type: {kernel.kernel_type.name}", - f"- quadrature rule: {kernel.integration_info.quad}", - f"- operations per element:", - kernel.operation_count, - ] - ) + for operator_method in self.operator_methods: - if class_files == CppClassFiles.HEADER_IMPL_AND_VARIANTS: - operator_cpp_class.add( - CppMethodWithVariants( - { - platform: CppMethod.from_kernel_function( - kernel, - is_const=True, - visibility="private", - docstring=indent(kernel_op_count, "/// "), - ) - for platform, kernel in kernel.platform_dependent_funcs.items() - } - ) + num_integrals = len(operator_method.integration_infos) + + if num_integrals != len( + operator_method.kernel_functions + ) or num_integrals != len(operator_method.operation_counts): + raise HOGException( + "There should be as many IntegrationInfo (aka integrals) as KernelFunctions (aka kernels)." ) - else: - operator_cpp_class.add( - CppMethod.from_kernel_function( - kernel.kernel_function, - is_const=True, - visibility="private", - docstring=indent(kernel_op_count, "/// "), - ) + + for ( + integration_info, + sub_kernel, + operation_count, + platform_dependent_funcs, + ) in zip( + operator_method.integration_infos, + operator_method.kernel_functions, + operator_method.operation_counts, + operator_method.platform_dependent_funcs, + ): + kernel_docstring = "\n".join( + [ + f"\nIntegral: {integration_info.name}", + f"- volume element: {integration_info.geometry}", + f"- kernel type: {operator_method.kernel_wrapper_type.name}", + f"- loop strategy: {integration_info.loop_strategy}", + f"- quadrature rule: {integration_info.quad}", + f"- blending map: {integration_info.blending}", + f"- operations per element:", + operation_count, + ] ) + if class_files == CppClassFiles.HEADER_IMPL_AND_VARIANTS: + operator_cpp_class.add( + CppMethodWithVariants( + { + platform: CppMethod.from_kernel_function( + plat_dep_kernel, + is_const=True, + visibility="private", + docstring=indent( + kernel_docstring, + "/// ", + ), + ) + for platform, plat_dep_kernel in platform_dependent_funcs.items() + } + ) + ) + else: + operator_cpp_class.add( + CppMethod.from_kernel_function( + sub_kernel, + is_const=True, + visibility="private", + docstring=indent( + kernel_docstring, + "/// ", + ), + ) + ) + # Finally we know what fields we need and can build the constructors, member variables, and includes. # Constructors ... @@ -423,10 +477,25 @@ class HyTeGElementwiseOperator: func_space_includes = set().union( *[coeff.includes() for coeff in self.coefficients()] ) - kernel_includes = set().union(*[kt.includes() for kt in self.kernel_types]) + kernel_includes = set().union( + *[kt.includes() for kt in self.kernel_wrapper_types] + ) blending_includes = set() - for dim, integration_info in self.element_matrices.items(): - for inc in integration_info.blending.coupling_includes(): + for dim, integration_infos in self.integration_infos.items(): + + if not all( + [ + integration_infos[0].blending.coupling_includes() + == io.blending.coupling_includes() + for io in integration_infos + ] + ): + raise HOGException( + "Seems that there are different blending functions in one bilinear form (likely in two different " + "integrals). This is not supported yet. :(" + ) + + for inc in integration_infos[0].blending.coupling_includes(): blending_includes.add(inc) all_includes = ( self.INCLUDES | func_space_includes | kernel_includes | blending_includes @@ -665,8 +734,9 @@ class HyTeGElementwiseOperator: self, dim: int, integration_info: IntegrationInfo, - loop_strategy: LoopStrategy, kernel_type: KernelType, + src_fields: List[FunctionSpaceImpl], + dst_fields: List[FunctionSpaceImpl], ) -> Tuple[ps.astnodes.Block, str]: """ This method generates an AST that represents the passed kernel type. @@ -675,7 +745,6 @@ class HyTeGElementwiseOperator: :param integration_info: IntegrationInfo object holding the symbolic element matrix, quadrature rule, element geometry, etc. - :param loop_strategy: defines the iteration pattern :param kernel_type: specifies the kernel to execute - this could be e.g., a matrix-vector multiplication :returns: tuple (pre_loop_stmts, loop, operations_table) @@ -708,14 +777,14 @@ class HyTeGElementwiseOperator: self.symbolizer.dof_symbols_as_vector( src_field.fe_space.num_dofs(geometry), src_field.name ) - for src_field in kernel_type.src_fields + for src_field in src_fields ] dst_vecs_symbols = [ self.symbolizer.dof_symbols_as_vector( dst_field.fe_space.num_dofs(geometry), dst_field.name ) - for dst_field in kernel_type.dst_fields + for dst_field in dst_fields ] # Do the kernel operation. @@ -785,7 +854,7 @@ class HyTeGElementwiseOperator: element_index = [ LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim) ] - loop = loop_strategy.create_loop( + loop = integration_info.loop_strategy.create_loop( dim, element_index, indexing_info.micro_edges_per_macro_edge ) @@ -802,7 +871,7 @@ class HyTeGElementwiseOperator: element_type, indexing_info, ) - for src_field in kernel_type.src_fields + for src_field in src_fields ] dst_vecs_accesses = [ dst_field.local_dofs( @@ -811,7 +880,7 @@ class HyTeGElementwiseOperator: element_type, indexing_info, ) - for dst_field in kernel_type.dst_fields + for dst_field in dst_fields ] # Load source DoFs. @@ -908,16 +977,14 @@ class HyTeGElementwiseOperator: ) ) - blending_assignments += ( - hog.code_generation.hessian_matrix_assignments( - mat, - integration_info.tables + quad_loop, - geometry, - self.symbolizer, - affine_points=element_vertex_coordinates_symbols, - blending=integration_info.blending, - quad_info=integration_info.quad, - ) + blending_assignments += hog.code_generation.hessian_matrix_assignments( + mat, + integration_info.tables + quad_loop, + geometry, + self.symbolizer, + affine_points=element_vertex_coordinates_symbols, + blending=integration_info.blending, + quad_info=integration_info.quad, ) with TimedLogger("cse on blending operation", logging.DEBUG): @@ -972,7 +1039,7 @@ class HyTeGElementwiseOperator: body = add_types(body, kernel_config) # add the created loop body of statements to the loop according to the loop strategy - loop_strategy.add_body_to_loop(loop, body, element_type) + integration_info.loop_strategy.add_body_to_loop(loop, body, element_type) # This actually replaces the abstract field access instances with # array accesses that contain the index computations. @@ -1020,7 +1087,9 @@ class HyTeGElementwiseOperator: "renaming loop bodies and preloop stmts for element types", logging.DEBUG ): for element_type, preloop in preloop_stmts.items(): - loop = loop_strategy.add_preloop_for_loop(loop, preloop, element_type) + loop = integration_info.loop_strategy.add_preloop_for_loop( + loop, preloop, element_type + ) # Add quadrature points and weights array declarations, but only those # which are actually needed. @@ -1033,7 +1102,7 @@ class HyTeGElementwiseOperator: return (Block(body), ops.to_table()) - def generate_kernels(self, loop_strategy: LoopStrategy) -> None: + def generate_kernels(self) -> None: """ TODO: Split this up in a meaningful way. Currently does a lot of stuff and modifies the kernel_types. @@ -1051,58 +1120,82 @@ class HyTeGElementwiseOperator: This information is gathered here, and written to the kernel type object, """ - for kernel_type in self.kernel_types: - for dim, integration_info in self.element_matrices.items(): - geometry = integration_info.geometry + for kernel_wrapper_type in self.kernel_wrapper_types: + for dim, integration_infos in self.integration_infos.items(): + + kernel_functions = [] + kernel_op_counts = [] + platform_dep_kernels = [] + macro_type = {2: "face", 3: "cell"} - # generate AST of kernel loop - with TimedLogger( - f"Generating kernel: {kernel_type.name} in {dim}D", logging.INFO - ): - ( - function_body, - kernel_op_count, - ) = self._generate_kernel( - dim, integration_info, loop_strategy, kernel_type + geometry = integration_infos[0].geometry + if not all([geometry == io.geometry for io in integration_infos]): + raise HOGException( + "All element geometries should be the same. Regardless of whether you integrate over their " + "boundary or volume. Dev note: this information seems to be redundant and we should only have " + "it in one place." ) - kernel_function = KernelFunction( - function_body, - Target.CPU, - Backend.C, - make_python_function, - ghost_layers=None, - function_name=f"{kernel_type.name}_macro_{geometry.dimensions}D", - assignments=None, - ) + for integration_info in integration_infos: + + # generate AST of kernel loop + with TimedLogger( + f"Generating kernel {integration_info.name} (wrapper: {kernel_wrapper_type.name} in {dim}D", + logging.INFO, + ): + + ( + function_body, + kernel_op_count, + ) = self._generate_kernel( + dim, + integration_info, + kernel_wrapper_type.kernel_type, + kernel_wrapper_type.src_fields, + kernel_wrapper_type.dst_fields, + ) - # optimizer applies optimizations - with TimedLogger( - f"Optimizing kernel: {kernel_type.name} in {dim}D", logging.INFO - ): - optimizer = ( - self._optimizer - if not isinstance(kernel_type, Assemble) - else self._optimizer_no_vec - ) - platform_dep_kernels = optimizer.apply_to_kernel( - kernel_function, dim, loop_strategy - ) + kernel_function = KernelFunction( + function_body, + Target.CPU, + Backend.C, + make_python_function, + ghost_layers=None, + function_name=f"{kernel_wrapper_type.name}_{integration_info.name}_macro_{geometry.dimensions}D", + assignments=None, + ) - kernel_parameters = kernel_function.get_parameters() + kernel_functions.append(kernel_function) + kernel_op_counts.append(kernel_op_count) + + # optimizer applies optimizations + with TimedLogger( + f"Optimizing kernel: {kernel_function.function_name} in {dim}D", + logging.INFO, + ): + optimizer = ( + self._optimizer + if not isinstance(kernel_wrapper_type.kernel_type, Assemble) + else self._optimizer_no_vec + ) + platform_dep_kernels.append( + optimizer.apply_to_kernel( + kernel_function, dim, integration_info.loop_strategy + ) + ) - # setup kernel string and op count table + # Setup kernel wrapper string and op count table # - # in the following, we insert the sub strings of the final kernel string: + # In the following, we insert the sub strings of the final kernel string: # coefficients (retrieving pointers), setup of scalar parameters, kernel function call # This is done as follows: # - the kernel type has the skeleton string in which the sub string must be substituted - # - the function space impl knows from which function type a src/dst/coefficient field is and can return the - # corresponding sub string + # - the function space impl knows from which function type a src/dst/coefficient field is and can return + # the corresponding sub string # Retrieve coefficient pointers - kernel_type.substitute( + kernel_wrapper_type.substitute( { f"pointer_retrieval_{dim}D": "\n".join( coeff.pointer_retrieval(dim) @@ -1126,32 +1219,56 @@ class HyTeGElementwiseOperator: ] ) - scalar_parameter_setup += ( - integration_info.blending.parameter_coupling_code() - ) + blending_parameter_coupling_code = integration_infos[ + 0 + ].blending.parameter_coupling_code() + if not all( + [ + blending_parameter_coupling_code + == io.blending.parameter_coupling_code() + for io in integration_infos + ] + ): + raise HOGException( + "It seems you specified different blending maps for one bilinear form. " + "This may be desired, but it is certainly not supported :)." + ) + + scalar_parameter_setup += blending_parameter_coupling_code - kernel_type.substitute( + kernel_wrapper_type.substitute( {f"scalar_parameter_setup_{dim}D": scalar_parameter_setup} ) - # Kernel function call. - kernel_function_call_parameter_string = ",\n".join( - [str(prm) for prm in kernel_parameters] - ) - kernel_type.substitute( + # Kernel function call(s). + kernel_function_call_strings = [] + for kernel_function in kernel_functions: + kernel_parameters = kernel_function.get_parameters() + + kernel_function_call_parameter_string = ",\n".join( + [str(prm) for prm in kernel_parameters] + ) + + kernel_function_call_strings.append( + f""" + {kernel_function.function_name}(\n + {kernel_function_call_parameter_string});""" + ) + + kernel_wrapper_type.substitute( { - f"kernel_function_call_{dim}D": f""" - {kernel_function.function_name}(\n - {kernel_function_call_parameter_string});""" + f"kernel_function_call_{dim}D": "\n".join( + kernel_function_call_strings + ) } ) # Collect all information - kernel = Kernel( - kernel_type, - kernel_function, + kernel = OperatorMethod( + kernel_wrapper_type, + kernel_functions, platform_dep_kernels, - kernel_op_count, - integration_info, + kernel_op_counts, + integration_infos, ) - self.kernels.append(kernel) + self.operator_methods.append(kernel) diff --git a/hog/operator_generation/optimizer.py b/hog/operator_generation/optimizer.py index a64fc0ae499c63f805a051e30e5ab599c444960d..6d433c4b027553fd681954c9411f47fdc450c75d 100644 --- a/hog/operator_generation/optimizer.py +++ b/hog/operator_generation/optimizer.py @@ -108,8 +108,8 @@ class Optimizer: def __getitem__(self, opt): return opt in self._opts - def check_opts_validity(self, loop_strategy: LoopStrategy) -> None: - """Checks if the desired optimizations are valid for the given loop strategy.""" + def check_opts_validity(self) -> None: + """Checks if the desired optimizations are valid.""" if Opts.VECTORIZE512 in self._opts and not Opts.VECTORIZE in self._opts: raise HOGException("Optimization VECTORIZE512 requires VECTORIZE.") @@ -191,6 +191,7 @@ class Optimizer: with TimedLogger("simplifying conditionals", logging.DEBUG): simplify_conditionals(loop, loop_counter_simplification=True) + if self[Opts.MOVECONSTANTS]: with TimedLogger("moving constants out of loop", logging.DEBUG): # This has to be done twice because sometimes constants are not moved completely to the surrounding block but diff --git a/hog_tests/operator_generation/test_opgen_smoke.py b/hog_tests/operator_generation/test_opgen_smoke.py new file mode 100644 index 0000000000000000000000000000000000000000..b328de9311ecd64a9ad32fb435e2b0d599212314 --- /dev/null +++ b/hog_tests/operator_generation/test_opgen_smoke.py @@ -0,0 +1,114 @@ +# HyTeG Operator Generator +# Copyright (C) 2024 HyTeG Team +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +from sympy.core.cache import clear_cache + +from hog.blending import IdentityMap +from hog.operator_generation.loop_strategies import CUBES +from hog.operator_generation.optimizer import Opts +from hog.element_geometry import TriangleElement +from hog.function_space import LagrangianFunctionSpace +from hog.operator_generation.operators import ( + HyTeGElementwiseOperator, + MacroIntegrationDomain, +) +from hog.symbolizer import Symbolizer +from hog.quadrature import Quadrature, select_quadrule +from hog.forms import div_k_grad, mass +from hog.operator_generation.kernel_types import ApplyWrapper +from hog.operator_generation.types import hyteg_type +from hog.blending import AnnulusMap + + +def test_opgen_smoke(): + """ + Just a simple smoke test to check that an operator can be generated. + + If something is really broken, this will make the CI fail early. + + We are generating a matvec method here for + + ∫ k ∇u · ∇v dx + ∫ uv dx + + with either integral being evaluated in their own kernel. + + That may not be reasonable but tests some features. + """ + clear_cache() + + symbolizer = Symbolizer() + volume_geometry = TriangleElement() + + name = f"P2DivKGradBlendingPlusMass" + + trial = LagrangianFunctionSpace(2, symbolizer) + test = LagrangianFunctionSpace(2, symbolizer) + coeff = LagrangianFunctionSpace(2, symbolizer) + quad = Quadrature(select_quadrule(2, volume_geometry), volume_geometry) + + divkgrad = div_k_grad(trial, test, volume_geometry, symbolizer, AnnulusMap(), coeff) + m = mass(trial, test, volume_geometry, symbolizer, AnnulusMap()) + + type_descriptor = hyteg_type() + + kernel_types = [ + ApplyWrapper( + test, + trial, + type_descriptor=type_descriptor, + dims=[2], + ) + ] + + opts = {Opts.MOVECONSTANTS, Opts.VECTORIZE, Opts.TABULATE, Opts.QUADLOOPS} + + operator = HyTeGElementwiseOperator( + name, + symbolizer=symbolizer, + kernel_wrapper_types=kernel_types, + opts=opts, + type_descriptor=type_descriptor, + ) + + operator.add_integral( + name="div_k_grad", + dim=volume_geometry.dimensions, + geometry=volume_geometry, + integration_domain=MacroIntegrationDomain.VOLUME, + quad=quad, + blending=AnnulusMap(), + form=divkgrad, + loop_strategy=CUBES(), + ) + + operator.add_integral( + name="mass", + dim=volume_geometry.dimensions, + geometry=volume_geometry, + integration_domain=MacroIntegrationDomain.VOLUME, + quad=quad, + blending=AnnulusMap(), + form=m, + loop_strategy=CUBES(), + ) + + operator.generate_class_code( + ".", + ) + + +if __name__ == "__main__": + test_opgen_smoke()