Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 572 additions and 385 deletions
import time
from pairs.analysis.expressions import DetermineExpressionsTerminals, ResetInPlaceOperations, DetermineInPlaceOperations, ListDeclaredExpressions
from pairs.analysis.blocks import DiscoverBlockVariants, DetermineExpressionsOwnership, DetermineParentBlocks
from pairs.analysis.devices import FetchKernelReferences, MarkCandidateLoops
from pairs.analysis.devices import FetchKernelReferences, MarkCandidateLoops, FetchDeviceCopies
from pairs.analysis.modules import FetchModulesReferences, InferModulesReturnTypes
......@@ -53,4 +53,7 @@ class Analysis:
self.apply(MarkCandidateLoops())
def infer_modules_return_types(self):
self.apply(InferModulesReturnTypes())
\ No newline at end of file
self.apply(InferModulesReturnTypes())
def fetch_device_copies(self):
self.apply(FetchDeviceCopies())
\ No newline at end of file
......@@ -9,6 +9,31 @@ from pairs.ir.visitor import Visitor
from pairs.ir.vectors import VectorOp
class FetchDeviceCopies(Visitor):
def __init__(self, ast=None):
super().__init__(ast)
self.module_stack = []
def visit_Module(self, ast_node):
self.module_stack.append(ast_node)
self.visit_children(ast_node)
self.module_stack.pop()
def visit_CopyArray(self, ast_node):
self.module_stack[-1].add_device_copy(ast_node.array())
def visit_CopyProperty(self, ast_node):
self.module_stack[-1].add_device_copy(ast_node.prop())
def visit_CopyFeatureProperty(self, ast_node):
self.module_stack[-1].add_device_copy(ast_node.prop())
def visit_CopyContactProperty(self, ast_node):
self.module_stack[-1].add_device_copy(ast_node.contact_prop())
def visit_CopyVar(self, ast_node):
self.module_stack[-1].add_device_copy(ast_node.variable())
class MarkCandidateLoops(Visitor):
def __init__(self, ast=None):
super().__init__(ast)
......
......@@ -253,7 +253,7 @@ class PairsAcessor:
self.print(f"{self.host_device_attr}{tkw} get{funcname}({params}) const{{")
if self.target.is_gpu():
if self.target.is_gpu() and prop.device_flag:
self.ifdef_else("__CUDA_ARCH__", self.getter_body, [prop, True], self.getter_body, [prop, False])
else:
self.getter_body(prop, False)
......
......@@ -31,8 +31,6 @@ from pairs.ir.variables import Var, DeclareVariable, Deref
from pairs.ir.parameters import Parameter
from pairs.ir.vectors import Vector, VectorAccess, VectorOp, ZeroVector
from pairs.ir.ret import Return
from pairs.sim.domain_partitioners import DomainPartitioners
from pairs.sim.timestep import Timestep
from pairs.code_gen.printer import Printer
from pairs.code_gen.accessor import PairsAcessor
......@@ -45,6 +43,7 @@ class CGen:
self.target = None
self.print = None
self.kernel_context = False
self.loop_scope = False
self.generate_full_object_names = False
self.ref = ref
self.debug = debug
......@@ -58,15 +57,6 @@ class CGen:
def real_type(self):
return Types.c_keyword(self.sim, Types.Real)
# def generate_cmake_config_file(self):
# self.print = Printer("pairs_cmake_params.txt")
# self.print.start()
# self.print(f"PAIRS_TARGET={self.ref}")
# self.print(f"GENERATE_WHOLE_PROGRAM={'ON' if self.sim._generate_whole_program else 'OFF'}")
# self.print(f"USE_WALBERLA={'ON' if self.sim._partitioner == DomainPartitioners.BlockForest else 'OFF'}")
# # self.print(f"COMPILE_CUDA={'ON' if self.target.is_gpu() else 'OFF'}")
# self.print.end()
def generate_object_reference(self, obj, device=False, index=None):
if device and (not self.target.is_gpu() or not obj.device_flag):
# Ideally this should never be called
......@@ -150,8 +140,6 @@ class CGen:
self.print("}")
def generate_preamble(self):
# self.print(f"#define APPLICATION_REFERENCE \"{self.ref}\"")
if self.target.is_gpu():
self.print("#include <math_constants.h>")
......@@ -166,15 +154,15 @@ class CGen:
self.print("#include <stdlib.h>")
self.print("//---")
self.print("#include \"likwid-marker.h\"")
self.print("#include \"copper_fcc_lattice.hpp\"")
self.print("#include \"create_body.hpp\"")
self.print("#include \"dem_sc_grid.hpp\"")
self.print("#include \"pairs.hpp\"")
self.print("#include \"read_from_file.hpp\"")
self.print("#include \"stats.hpp\"")
self.print("#include \"timing.hpp\"")
self.print("#include \"thermo.hpp\"")
self.print("#include \"vtk.hpp\"")
self.print("#include \"utility/copper_fcc_lattice.hpp\"")
self.print("#include \"utility/create_body.hpp\"")
self.print("#include \"utility/dem_sc_grid.hpp\"")
self.print("#include \"utility/read_from_file.hpp\"")
self.print("#include \"utility/stats.hpp\"")
self.print("#include \"utility/timing.hpp\"")
self.print("#include \"utility/thermo.hpp\"")
self.print("#include \"utility/vtk.hpp\"")
self.print("")
self.print("using namespace pairs;")
self.print("")
......@@ -185,12 +173,6 @@ class CGen:
if not module.interface:
module_params += ["PairsRuntime *pairs_runtime", "struct PairsObjects *pobj"]
if module.name=="initialize" and self.sim.create_domain_at_initialization:
module_params += ["int argc", "char **argv"]
if module.name=="set_domain":
module_params += ["int argc", "char **argv"]
module_params += [f"{Types.c_keyword(self.sim, param.type())} {param.name()}" for param in module.parameters()]
print_params = ", ".join(module_params)
......@@ -203,9 +185,8 @@ class CGen:
self.print("namespace pairs::internal {")
self.print.add_indent(4)
# All modules except the interface ones are declared in the pairs::internal scope
for module in self.sim.modules() + self.sim.udf_modules():
assert not module.interface
# All internal modules are declared in the pairs::internal scope
for module in self.sim.modules():
self.generate_module_header(module, definition=False)
self.print.add_indent(-4)
......@@ -214,7 +195,7 @@ class CGen:
def generate_pairs_object_structure(self):
self.print("")
externkw = "" if self.sim._generate_whole_program else "extern "
externkw = "extern "
if self.target.is_gpu():
for array in self.sim.arrays.statics():
if array.device_flag:
......@@ -290,34 +271,6 @@ class CGen:
self.print("};")
self.print("")
def generate_program(self, ast_node):
self.generate_interfaces()
ext = ".cu" if self.target.is_gpu() else ".cpp"
self.print = Printer(self.ref + ext)
self.print.start()
self.generate_preamble()
self.generate_pairs_object_structure()
self.generate_module_decls()
self.print("namespace pairs::internal {")
self.print.add_indent(4)
for kernel in self.sim.kernels():
self.generate_kernel(kernel)
for module in self.sim.modules():
if module.name!='main':
self.generate_module(module)
self.print.add_indent(-4)
self.print("}")
for module in self.sim.modules():
if module.name=='main':
self.generate_main(module)
self.print.end()
def generate_library(self):
self.generate_interfaces()
# Generate CUDA/CPP file with modules
......@@ -351,9 +304,8 @@ class CGen:
for kernel in self.sim.kernels():
self.generate_kernel(kernel)
# All modules except the interface ones are defined in the pairs::internal scope
for module in self.sim.modules() + self.sim.udf_modules():
assert not module.interface
# All internal modules are defined in the pairs::internal scope
for module in self.sim.modules():
self.generate_module(module)
self.print.add_indent(-4)
......@@ -452,42 +404,12 @@ class CGen:
if feature_prop in module.host_references():
self.print(f"{type_kw} *{feature_prop.name()}_h = pobj->{feature_prop.name()};")
def generate_main(self, module):
assert module.name=='main'
ndims = module.sim.ndims()
nprops = module.sim.properties.nprops()
ncontactprops = module.sim.contact_properties.nprops()
narrays = module.sim.arrays.narrays()
part = DomainPartitioners.c_keyword(module.sim.partitioner())
self.generate_full_object_names = True
self.print("int main(int argc, char **argv) {")
self.print(f" PairsRuntime *pairs_runtime = new PairsRuntime({nprops}, {ncontactprops}, {narrays}, {part});")
self.print(f" struct PairsObjects *pobj = new PairsObjects();")
if module.sim._enable_profiler:
self.print(" LIKWID_MARKER_INIT;")
self.generate_statement(module.block)
if module.sim._enable_profiler:
self.print(" LIKWID_MARKER_CLOSE;")
self.print(" pairs::print_timers(pairs_runtime);")
self.print(" pairs::print_stats(pairs_runtime, pobj->nlocal, pobj->nghost);")
self.print(" delete pobj;")
self.print(" delete pairs_runtime;")
self.print(" return 0;")
self.print("}")
self.generate_full_object_names = False
def generate_module(self, module):
self.generate_module_header(module, definition=True)
self.print.add_indent(4)
if self.debug:
self.print(f"PAIRS_DEBUG(\"\\n{module.name}\\n\");")
# if self.debug:
# self.print(f"PAIRS_DEBUG(\"\\n{module.name}\\n\");")
if not module.interface:
self.generate_module_declerations(module)
......@@ -603,9 +525,7 @@ class CGen:
if ast_node.check_for_resize():
resize = self.generate_expression(ast_node.resize)
capacity = self.generate_expression(ast_node.capacity)
# self.print(f"printf (\" %d -- before AtomicInc: nsend = %d -- send_capacity = %d -- resizes[0] = %d\\n\", {Printer.line_id}, {elem}, {capacity}, {resize});")
self.print(f"pairs::{prefix}atomic_add_resize_check(&({elem}), {value}, &({resize}), {capacity});")
# self.print(f"printf (\" %d -- after AtomicInc: nsend = %d -- send_capacity = %d -- resizes[0] = %d\\n\", {Printer.line_id}, {elem}, {capacity}, {resize});")
else:
self.print(f"pairs::{prefix}atomic_add(&({elem}), {value});")
......@@ -617,7 +537,10 @@ class CGen:
self.print.add_indent(-4)
if isinstance(ast_node, Continue):
self.print("continue;")
if self.loop_scope:
self.print("continue;")
else:
self.print("return;")
# TODO: Why there are Decls for other types?
if isinstance(ast_node, Decl):
......@@ -755,7 +678,7 @@ class CGen:
for i in matrix_op.indexes_to_generate():
lhs = self.generate_expression(matrix_op.lhs, matrix_op.mem, index=i)
rhs = self.generate_expression(matrix_op.rhs, index=i)
operator = vector_op.operator()
operator = matrix_op.operator()
if operator.is_unary():
self.print(f"const {self.real_type()} {matrix_op.name()}_{dim} = {operator.symbol()}({lhs});")
......@@ -806,9 +729,7 @@ class CGen:
self.print(f"pairs_runtime->copyArrayTo{ctx_suffix}({array_id}, {action}, {size}); // {array_name}")
else:
# self.print(f"std::cout<< \"{Printer.line_id} -- before {array_name} copyArrayTo{ctx_suffix}({action}) === \" << pobj->{array_name}[0] << \" \" << pobj->{array_name}[1] << \" \" << pobj->{array_name}[2] << std::endl;")
self.print(f"pairs_runtime->copyArrayTo{ctx_suffix}({array_id}, {action}); // {array_name}")
# self.print(f"std::cout<< \"{Printer.line_id} -- after {array_name} copyArrayTo{ctx_suffix}({action}) === \" << pobj->{array_name}[0] << \" \" << pobj->{array_name}[1] << \" \" << pobj->{array_name}[2] << std::endl;")
if isinstance(ast_node, CopyContactProperty):
prop_id = ast_node.contact_prop().id()
......@@ -848,7 +769,9 @@ class CGen:
self.print("#pragma omp parallel for")
self.print(f"for(int {iterator} = {lower_range}; {iterator} < {upper_range}; {iterator}++) {{")
self.loop_scope = True
self.generate_statement(ast_node.block)
self.loop_scope = False
self.print("}")
......@@ -917,9 +840,6 @@ class CGen:
if isinstance(ast_node, ModuleCall):
module_params = ["pairs_runtime", "pobj"]
if ast_node.module.name=="init_domain":
module_params += ["argc", "argv"]
module_params += [f"{param.name()}" for param in ast_node.module.parameters()]
print_params = ", ".join(module_params)
......@@ -1015,9 +935,6 @@ class CGen:
if self.target.is_gpu() and fp.device_flag:
self.print(f"pairs_runtime->copyFeaturePropertyToDevice({fp.id()}); // {fp.name()}")
if isinstance(ast_node, Timestep):
self.generate_statement(ast_node.block)
if isinstance(ast_node, ReallocProperty):
p = ast_node.property()
ptr_addr = self.generate_object_address(p)
......@@ -1060,7 +977,9 @@ class CGen:
if isinstance(ast_node, While):
cond = self.generate_expression(ast_node.cond)
self.print(f"while({cond}) {{")
self.loop_scope = True
self.generate_statement(ast_node.block)
self.loop_scope = False
self.print("}")
if isinstance(ast_node, Return):
......@@ -1098,9 +1017,6 @@ class CGen:
if ast_node.name().startswith("pairs::"):
extra_params += ["pairs_runtime"]
if ast_node.name() == "pairs_runtime->initDomain":
extra_params += ["&argc", "&argv"]
params = ", ".join(extra_params + [str(self.generate_expression(p)) for p in ast_node.parameters()])
return f"{ast_node.name()}({params})"
......
......@@ -20,7 +20,6 @@ from pairs.sim.domain_partitioners import DomainPartitioners
from pairs.ir.print import PrintCode
from pairs.ir.assign import Assign
from pairs.sim.contact_history import BuildContactHistory, ClearUnusedContactHistory, ResetContactHistoryUsageStatus
from pairs.sim.thermo import ComputeThermo
class InterfaceModules:
def __init__(self, sim):
......@@ -28,7 +27,7 @@ class InterfaceModules:
def create_all(self):
self.initialize()
self.setup_sim()
self.setup_cells()
self.update_domain()
self.update_cells(self.sim.reneighbor_frequency)
self.communicate(self.sim.reneighbor_frequency)
......@@ -40,9 +39,6 @@ class InterfaceModules:
self.build_contact_history(self.sim.reneighbor_frequency)
self.reset_contact_history()
if self.sim._compute_thermo != 0:
self.compute_thermo(self.sim._compute_thermo)
self.rank()
self.nlocal()
self.nghost()
......@@ -60,26 +56,27 @@ class InterfaceModules:
PrintCode(self.sim, f"pairs_runtime = new PairsRuntime({nprops}, {ncontactprops}, {narrays}, {part});")
PrintCode(self.sim, f"pobj = new PairsObjects();")
if self.sim.grid is None:
self.sim.grid = MutableGrid(self.sim, self.sim.dims)
inits = Block.from_list(self.sim, [
RegisterTimers(self.sim),
RegisterMarkers(self.sim),
DeclareVariables(self.sim),
DeclareArrays(self.sim),
AllocateProperties(self.sim),
AllocateContactProperties(self.sim),
AllocateFeatureProperties(self.sim),
RegisterTimers(self.sim),
RegisterMarkers(self.sim)
])
if self.sim.create_domain_at_initialization:
self.sim.add_statement(Block.merge_blocks(inits, self.sim.create_domain))
else:
assert self.sim.grid is None, "A grid already exists"
self.sim.grid = MutableGrid(self.sim, self.sim.dims)
self.sim.add_statement(inits)
if self.sim._enable_profiler:
PrintCode(self.sim, "LIKWID_MARKER_INIT;")
self.sim.add_statement(inits)
@pairs_interface_block
def setup_sim(self):
self.sim.module_name('setup_sim')
def setup_cells(self):
self.sim.module_name('setup_cells')
if self.sim.cell_lists.runtime_spacing:
for d in range(self.sim.dims):
......@@ -88,7 +85,6 @@ class InterfaceModules:
if self.sim.cell_lists.runtime_cutoff_radius:
Assign(self.sim, self.sim.cell_lists.cutoff_radius, Parameter(self.sim, 'cutoff_radius', Types.Real))
self.sim.add_statement(self.sim.setup_particles)
# This update assumes all particles have been created exactly in the rank that contains them
self.sim.add_statement(UpdateDomain(self.sim))
self.sim.add_statement(BuildCellListsStencil(self.sim, self.sim.cell_lists))
......@@ -164,11 +160,6 @@ class InterfaceModules:
self.sim.add_statement(ResetContactHistoryUsageStatus(self.sim, self.sim._contact_history))
self.sim.add_statement(ClearUnusedContactHistory(self.sim, self.sim._contact_history))
@pairs_interface_block
def compute_thermo(self):
self.sim.module_name('compute_thermo')
self.sim.add_statement(ComputeThermo(self.sim))
@pairs_interface_block
def rank(self):
self.sim.module_name('rank')
......@@ -189,63 +180,15 @@ class InterfaceModules:
self.sim.module_name('size')
Return(self.sim, ScalarOp.inline(self.sim.nlocal + self.sim.nghost))
@pairs_interface_block
def create_sphere(self):
self.sim.module_name('create_sphere')
x = Parameter(self.sim, 'x', Types.Real)
y = Parameter(self.sim, 'y', Types.Real)
z = Parameter(self.sim, 'z', Types.Real)
vx = Parameter(self.sim, 'vx', Types.Real)
vy = Parameter(self.sim, 'vy', Types.Real)
vz = Parameter(self.sim, 'vz', Types.Real)
density = Parameter(self.sim, 'density', Types.Real)
radius = Parameter(self.sim, 'radius', Types.Real)
ptype = Parameter(self.sim, 'type', Types.Real)
flag = Parameter(self.sim, 'flag', Types.Real)
Return(self.sim, Call(self.sim, "pairs::create_sphere",
[x, y, z, vx, vy, vz,
density, radius, ptype, flag], Types.UInt64))
@pairs_interface_block
def create_halfspace(self):
self.sim.module_name('create_halfspace')
x = Parameter(self.sim, 'x', Types.Real)
y = Parameter(self.sim, 'y', Types.Real)
z = Parameter(self.sim, 'z', Types.Real)
nx = Parameter(self.sim, 'nx', Types.Real)
ny = Parameter(self.sim, 'ny', Types.Real)
nz = Parameter(self.sim, 'nz', Types.Real)
ptype = Parameter(self.sim, 'type', Types.Real)
flag = Parameter(self.sim, 'flag', Types.Real)
Return(self.sim, Call(self.sim, "pairs::create_halfspace",
[x, y, z, nx, ny, nz, ptype, flag], Types.UInt64))
@pairs_interface_block
def dem_sc_grid(self):
self.sim.module_name('dem_sc_grid')
xmax = Parameter(self.sim, 'xmax', Types.Real)
ymax = Parameter(self.sim, 'ymax', Types.Real)
zmax = Parameter(self.sim, 'zmax', Types.Real)
spacing = Parameter(self.sim, 'spacing', Types.Real)
diameter = Parameter(self.sim, 'diameter', Types.Real)
min_diameter = Parameter(self.sim, 'min_diameter', Types.Real)
max_diameter = Parameter(self.sim, 'max_diameter', Types.Real)
initial_velocity = Parameter(self.sim, 'initial_velocity', Types.Real)
particle_density = Parameter(self.sim, 'particle_density', Types.Real)
ntypes = Parameter(self.sim, 'ntypes', Types.Int32)
Assign(self.sim, self.sim.nlocal,
Call_Int(self.sim, "pairs::dem_sc_grid",
[xmax, ymax, zmax, spacing, diameter, min_diameter, max_diameter,
initial_velocity, particle_density, ntypes]))
Return(self.sim, self.sim.nlocal)
@pairs_interface_block
def end(self):
self.sim.module_name('end')
if self.sim._enable_profiler:
PrintCode(self.sim, "LIKWID_MARKER_CLOSE;")
Call_Void(self.sim, "pairs::print_timers", [])
Call_Void(self.sim, "pairs::log_timers", [])
Call_Void(self.sim, "pairs::print_stats", [self.sim.nlocal, self.sim.nghost])
PrintCode(self.sim, "delete pobj;")
PrintCode(self.sim, "delete pairs_runtime;")
......@@ -12,9 +12,20 @@ def pairs_inline(func):
return inner
def pairs_block(func):
"""This decorator needs the owning class of the method being decorated to have a 'run_on_device' member.
If the variable doesn't exist it defaults to False here, hence pairs_host_block gets used."""
def wrapper(*args, **kwargs):
self = args[0]
decorator = pairs_device_block if getattr(self, "run_on_device", False) else pairs_host_block
return decorator(func)(*args, **kwargs)
return wrapper
def pairs_host_block(func):
def inner(*args, **kwargs):
sim = args[0].sim # self.sim
profile = getattr(args[0], "profile", False)
sim.init_block()
func(*args, **kwargs)
return Module(sim,
......@@ -22,7 +33,8 @@ def pairs_host_block(func):
block=Block(sim, sim._block),
resizes_to_check=sim._resizes_to_check,
check_properties_resize=sim._check_properties_resize,
run_on_device=False)
run_on_device=False,
profile=profile)
return inner
......@@ -30,6 +42,7 @@ def pairs_host_block(func):
def pairs_device_block(func):
def inner(*args, **kwargs):
sim = args[0].sim # self.sim
profile = getattr(args[0], "profile", False)
sim.init_block()
func(*args, **kwargs)
return Module(sim,
......@@ -37,7 +50,8 @@ def pairs_device_block(func):
block=Block(sim, sim._block),
resizes_to_check=sim._resizes_to_check,
check_properties_resize=sim._check_properties_resize,
run_on_device=True)
run_on_device=True,
profile=profile)
return inner
......
......@@ -88,8 +88,8 @@ class For(ASTNode):
class ParticleFor(For):
def __init__(self, sim, block=None, local_only=True):
super().__init__(sim, 0, sim.nlocal if local_only else sim.nlocal + sim.nghost, block)
def __init__(self, sim, block=None, local_only=True, not_kernel=False):
super().__init__(sim, 0, sim.nlocal if local_only else sim.nlocal + sim.nghost, block, not_kernel)
self.local_only = local_only
def __str__(self):
......
......@@ -83,7 +83,48 @@ class Abs(MathFunction):
def type(self):
return self._params[0].type()
class Min(MathFunction):
def __init__(self, sim, a, b):
super().__init__(sim)
self._params = [a, b]
def __str__(self):
return f"Min<{self._params}>"
def function_name(self):
return "MIN"
def type(self):
return self._params[0].type()
class Max(MathFunction):
def __init__(self, sim, a, b):
super().__init__(sim)
self._params = [a, b]
def __str__(self):
return f"Max<{self._params}>"
def function_name(self):
return "MAX"
def type(self):
return self._params[0].type()
class Sign(MathFunction):
def __init__(self, sim, expr):
super().__init__(sim)
self._params = [expr]
def __str__(self):
return f"Sign<{self._params}>"
def function_name(self):
return "SIGN"
def type(self):
return self._params[0].type()
class Sin(MathFunction):
def __init__(self, sim, expr):
super().__init__(sim)
......
......@@ -16,8 +16,8 @@ class Module(ASTNode):
block=None,
resizes_to_check={},
check_properties_resize=False,
run_on_device=False,
user_defined=False,
run_on_device=False,
profile=False,
interface=False):
super().__init__(sim)
self._id = Module.last_module
......@@ -29,24 +29,22 @@ class Module(ASTNode):
self._contact_properties = {}
self._feature_properties = {}
self._host_references = set()
self._device_copies = set()
self._block = block
self._resizes_to_check = resizes_to_check
self._check_properties_resize = check_properties_resize
self._run_on_device = run_on_device
self._user_defined = user_defined
self._interface = interface
self._return_type = Types.Void
self._profile = False
self._profile = profile
if profile:
self.sim.enable_profiler()
if user_defined:
assert not interface, ("User-defined modules can't be part of the interface directly."
"Wrap them inside seperate interface modules.")
sim.add_udf_module(self)
if interface:
sim.add_interface_module(self)
else:
if interface:
sim.add_interface_module(self)
else:
sim.add_module(self)
sim.add_module(self)
Module.last_module += 1
......@@ -69,10 +67,6 @@ class Module(ASTNode):
def run_on_device(self):
return self._run_on_device
@property
def user_defined(self):
return self._user_defined
@property
def interface(self):
return self._interface
......@@ -115,6 +109,9 @@ class Module(ASTNode):
def host_references(self):
return self._host_references
def device_copies(self):
return self._device_copies
def add_array(self, array, write=False):
array_list = array if isinstance(array, list) else [array]
new_op = 'w' if write else 'r'
......@@ -185,6 +182,9 @@ class Module(ASTNode):
def add_host_reference(self, elem):
self._host_references.add(elem)
def add_device_copy(self, elem):
self._device_copies.add(elem)
def children(self):
return [self._block]
......
......@@ -7,16 +7,20 @@ from pairs.ir.types import Types
class Symbol(ASTTerm):
def __init__(self, sim, sym_type):
def __init__(self, sim, sym_type, name=None):
super().__init__(sim, OperatorClass.from_type(sym_type))
self.sym_type = sym_type
self.assign_to = None
self.name = name
def __str__(self):
return f"Symbol<{Types.c_keyword(self.sim, self.sym_type)}>"
def assign(self, node):
self.assign_to = node
def get_assigned_node(self):
return self.assign_to
def type(self):
return self.sym_type
......
class Timers:
Invalid = -1
All = 0
Communication = 1
DeviceTransfers = 2
Offset = 3
Communication = 0
DeviceTransfers = 1
Offset = 2
def name(timer):
return "all" if timer == Timers.All else \
"mpi" if timer == Timers.Communication else \
"transfers" if timer == Timers.DeviceTransfers else \
"invalid"
return "MARKERS::MPI" if timer == Timers.Communication else \
"MARKERS::DEVICE_TRANSFERS" if timer == Timers.DeviceTransfers else \
"INVALID"
......@@ -4,6 +4,8 @@ from pairs.ir.assign import Assign
from pairs.ir.scalars import ScalarOp
from pairs.ir.lit import Lit
from pairs.ir.operator_class import OperatorClass
from pairs.ir.types import Types
from pairs.ir.accessor_class import AccessorClass
class Variables:
......@@ -23,10 +25,10 @@ class Variables:
self.vars.append(var)
return var
def add_temp(self, init):
def add_temp(self, init, type):
lit = Lit.cvt(self.sim, init)
tmp_id = Variables.new_temp_id()
tmp_var = Var(self.sim, f"tmp{tmp_id}", lit.type(), temp=True)
tmp_var = Var(self.sim, f"tmp{tmp_id}", lit.type() if type is None else type, temp=True)
Assign(self.sim, tmp_var, lit)
return tmp_var
......@@ -57,6 +59,11 @@ class Var(ASTTerm):
def __str__(self):
return f"Var<{self.var_name}>"
def __getitem__(self, index):
assert not Types.is_scalar(self.var_type)
_acc_class = AccessorClass.from_type(self.var_type)
return _acc_class(self.sim, self, Lit.cvt(self.sim, index))
def copy(self, deep=False):
# Terminal copies are just themselves
return self
......
......@@ -13,7 +13,10 @@ from pairs.ir.types import Types
from pairs.mapping.keywords import Keywords
from pairs.sim.flags import Flags
from pairs.sim.interaction import ParticleInteraction
from pairs.sim.global_interaction import GlobalLocalInteraction, GlobalGlobalInteraction, GlobalReduction, SortGlobals, PackGlobals, ResetReductionProps, ReduceGlobals, UnpackGlobals
from pairs.ir.module import Module
from pairs.ir.block import Block, pairs_block
from pairs.sim.lowerable import Lowerable
class UndefinedSymbol():
def __init__(self, symbol_id):
......@@ -284,12 +287,30 @@ class BuildParticleIR(ast.NodeVisitor):
op_class = OperatorClass.from_type(operand.type())
return op_class(self.sim, operand, None, BuildParticleIR.get_unary_op(node.op))
def compute(sim, func, cutoff_radius=None, symbols={}, parameters={}, pre_step=False, skip_first=False):
if sim._generate_whole_program:
assert not parameters, "Compute functions can't take custom parameters when generating whole program."
class OneBodyKernel(Lowerable):
def __init__(self, sim, module_name, run_on_device, profile):
super().__init__(sim)
self.block = Block(sim, [])
self.module_name = module_name
self.run_on_device = run_on_device
self.profile = profile
def add_statement(self, stmt):
self.block.add_statement(stmt)
def __iter__(self):
self.sim.add_statement(self)
self.sim.enter(self)
for i in ParticleFor(self.sim):
yield i
self.sim.leave()
@pairs_block
def lower(self):
self.sim.module_name(self.module_name)
self.sim.add_statement(self.block)
def compute(sim, func, cutoff_radius=None, symbols={}, parameters={}, compute_globals=False, run_on_device=True, profile=False):
src = inspect.getsource(func)
tree = ast.parse(src, mode='exec')
#print(ast.dump(ast.parse(src, mode='exec')))
......@@ -311,14 +332,15 @@ def compute(sim, func, cutoff_radius=None, symbols={}, parameters={}, pre_step=F
sim.module_name(func.__name__)
if nparams == 1:
for i in ParticleFor(sim):
for _ in Filter(sim, ScalarOp.cmp(sim.particle_flags[i] & Flags.Fixed, 0)):
ir = BuildParticleIR(sim, symbols, parameters)
ir.add_symbols({params[0]: i})
ir.visit(tree)
for i in OneBodyKernel(sim, func.__name__, run_on_device=run_on_device, profile=profile):
ir = BuildParticleIR(sim, symbols, parameters)
ir.add_symbols({params[0]: i})
ir.visit(tree)
else:
for interaction_data in ParticleInteraction(sim, nparams, cutoff_radius):
# Compute local-local and local-global interactions
particle_interaction = ParticleInteraction(sim, func.__name__, nparams, cutoff_radius, run_on_device=run_on_device, profile=profile)
for interaction_data in particle_interaction:
# Start building IR
ir = BuildParticleIR(sim, symbols, parameters)
ir.add_symbols({
......@@ -332,43 +354,59 @@ def compute(sim, func, cutoff_radius=None, symbols={}, parameters={}, pre_step=F
'__contact_point__': interaction_data.contact_point(),
'__contact_normal__': interaction_data.contact_normal()
})
ir.visit(tree)
if sim._generate_whole_program:
if pre_step:
sim.build_pre_step_module_with_statements(skip_first=skip_first, profile=True)
else:
sim.build_module_with_statements(skip_first=skip_first, profile=True)
else:
sim.build_user_defined_function()
def setup(sim, func, symbols={}):
src = inspect.getsource(func)
tree = ast.parse(src, mode='exec')
# Fetch function info
info = FetchParticleFuncInfo()
info.visit(tree)
params = info.params()
nparams = info.nparams()
# Compute functions must have parameters
assert nparams == 1, "Number of parameters from setup functions must be one!"
if compute_globals:
# If compute_globals is enabled, global interactions and reductions become
# part of the same module as local interactions
global_reduction = GlobalReduction(sim, func.__name__, particle_interaction)
SortGlobals(global_reduction) # Sort global bodies with respect to their uid
PackGlobals(global_reduction) # Pack reduction properties in uid order in an intermediate buffer
ResetReductionProps(global_reduction) # Reset reduction properties to be prepared for local reduction
# Compute local contirbutions on global bodies
global_local_interactions = GlobalLocalInteraction(sim, func.__name__, nparams)
for interaction_data in global_local_interactions:
ir = BuildParticleIR(sim, symbols, parameters)
ir.add_symbols({
params[0]: interaction_data.i(),
params[1]: interaction_data.j(),
'__i__': interaction_data.i(),
'__j__': interaction_data.j(),
'__delta__': interaction_data.delta(),
'__squared_distance__': interaction_data.squared_distance(),
'__penetration_depth__': interaction_data.penetration_depth(),
'__contact_point__': interaction_data.contact_point(),
'__contact_normal__': interaction_data.contact_normal()
})
ir.visit(tree)
# Convert literal symbols
symbols = {symbol: Lit.cvt(sim, value) for symbol, value in symbols.items()}
sim.init_block()
sim.module_name(func.__name__)
PackGlobals(global_reduction, False) # Pack local contributions in reduction buffer in uid order
ReduceGlobals(global_reduction) # Inplace reduce local contributions over global bodies in the reduction buffer
UnpackGlobals(global_reduction) # Add the reduced properties with the intermediate buffer and unpack into global bodies
for i in ParticleFor(sim):
ir = BuildParticleIR(sim, symbols)
ir.add_symbols({params[0]: i})
ir.visit(tree)
# Compute global-global interactions
global_global_interactions = GlobalGlobalInteraction(sim, func.__name__, nparams)
for interaction_data in global_global_interactions:
ir = BuildParticleIR(sim, symbols, parameters)
ir.add_symbols({
params[0]: interaction_data.i(),
params[1]: interaction_data.j(),
'__i__': interaction_data.i(),
'__j__': interaction_data.j(),
'__delta__': interaction_data.delta(),
'__squared_distance__': interaction_data.squared_distance(),
'__penetration_depth__': interaction_data.penetration_depth(),
'__contact_point__': interaction_data.contact_point(),
'__contact_normal__': interaction_data.contact_normal()
})
ir.visit(tree)
if sim._generate_whole_program:
sim.build_setup_module_with_statements()
else:
sim.build_user_defined_function()
# User defined functions are wrapped inside seperate interface modules here.
# The udf's have the same name as their interface module but they get implemented in the pairs::internal scope.
sim.build_interface_module_with_statements(run_on_device)
......@@ -13,7 +13,7 @@ from pairs.ir.types import Types
from pairs.ir.print import Print
from pairs.ir.vectors import Vector, ZeroVector
from pairs.sim.shapes import Shapes
from pairs.sim.flags import Flags
class Keywords:
def __init__(self, sim):
......@@ -47,12 +47,42 @@ class Keywords:
assert particle_id.type() == Types.Int32, "Particle ID must be an integer."
return ScalarOp.cmp(self.sim.particle_shape[particle_id], Shapes.Sphere)
def keyword_is_box(self, args):
assert len(args) == 1, "is_box() keyword requires one parameter."
particle_id = args[0]
assert particle_id.type() == Types.Int32, "Particle ID must be an integer."
return ScalarOp.cmp(self.sim.particle_shape[particle_id], Shapes.Box)
def keyword_is_halfspace(self, args):
assert len(args) == 1, "is_sphere() keyword requires one parameter."
particle_id = args[0]
assert particle_id.type() == Types.Int32, "Particle ID must be an integer."
return ScalarOp.cmp(self.sim.particle_shape[particle_id], Shapes.Halfspace)
def keyword_is_infinite(self, args):
assert len(args) == 1, "is_infinite() keyword requires one parameter."
particle_id = args[0]
assert particle_id.type() == Types.Int32, "Particle ID must be an integer."
return self.sim.particle_flags[particle_id] & Flags.Infinite
def keyword_is_ghost(self, args):
assert len(args) == 1, "is_ghost() keyword requires one parameter."
particle_id = args[0]
assert particle_id.type() == Types.Int32, "Particle ID must be an integer."
return self.sim.particle_flags[particle_id] & Flags.Ghost
def keyword_is_fixed(self, args):
assert len(args) == 1, "is_fixed() keyword requires one parameter."
particle_id = args[0]
assert particle_id.type() == Types.Int32, "Particle ID must be an integer."
return self.sim.particle_flags[particle_id] & Flags.Fixed
def keyword_is_global(self, args):
assert len(args) == 1, "is_global() keyword requires one parameter."
particle_id = args[0]
assert particle_id.type() == Types.Int32, "Particle ID must be an integer."
return self.sim.particle_flags[particle_id] & Flags.Global
def keyword_select(self, args):
assert len(args) == 3, "select() keyword requires three parameters!"
return Select(self.sim, args[0], args[1], args[2])
......@@ -125,6 +155,10 @@ class Keywords:
assert len(args) == 0, "zero_vector() keyword requires no parameter."
return ZeroVector(self.sim)
def keyword_vector(self, args):
assert len(args) == self.sim.ndims(), "vector()"
return Vector(self.sim, [args[i] for i in range(self.sim.ndims())])
def keyword_transposed(self, args):
assert len(args) == 1, "transposed() keyword requires one parameter."
matrix = args[0]
......@@ -151,13 +185,18 @@ class Keywords:
inv_det * ((matrix[3] * matrix[7]) - (matrix[4] * matrix[6])),
inv_det * ((matrix[6] * matrix[1]) - (matrix[7] * matrix[0])),
inv_det * ((matrix[0] * matrix[4]) - (matrix[1] * matrix[3])) ])
def keyword_diagonal_matrix(self, args):
assert len(args) == 1, "diagonal_matrix() keyword requires one parameter!"
value = args[0]
nelems = Types.number_of_elements(self.sim, Types.Matrix)
return Matrix(self.sim, [value if i % (self.sim.ndims() + 1) == 0 else 0.0 \
for i in range(nelems)])
assert len(args) == 1 or len(args) == self.sim.ndims(), f"diagonal_matrix() keyword requires 1 or {self.sim.ndims()} parameters!"
if len(args) == 1:
value = args[0]
nelems = Types.number_of_elements(self.sim, Types.Matrix)
return Matrix(self.sim, [value if i % (self.sim.ndims() + 1) == 0 else 0.0 \
for i in range(nelems)])
elif len(args) == self.sim.ndims():
nelems = Types.number_of_elements(self.sim, Types.Matrix)
return Matrix(self.sim, [args[i % self.sim.ndims()] if i % (self.sim.ndims() + 1) == 0 else 0.0 \
for i in range(nelems)])
def keyword_matrix_multiplication(self, args):
assert len(args) == 2, "matrix_multiplication() keyword requires two parameters!"
......
......@@ -27,7 +27,6 @@ class CellLists:
self.spacing = spacing if isinstance(spacing, list) else [spacing for d in range(sim.ndims())]
self.runtime_spacing = False
else:
assert self.sim._generate_whole_program==False, "Cell spacing needs to be defined when generating whole program."
self.spacing = self.sim.add_array('spacing', self.sim.ndims(), Types.Real)
self.runtime_spacing = True
......@@ -35,7 +34,6 @@ class CellLists:
self.cutoff_radius = cutoff_radius
self.runtime_cutoff_radius = False
else:
assert self.sim._generate_whole_program==False, "cutoff_radius needs to be defined when generating whole program."
self.cutoff_radius = self.sim.add_var('cutoff_radius', Types.Real)
self.runtime_cutoff_radius = True
......@@ -132,7 +130,7 @@ class BuildCellLists(Lowerable):
for i in ParticleFor(self.sim, local_only=False):
flat_index = self.sim.add_temp_var(0)
for _ in Filter(self.sim, ASTTerm.not_op(particle_flags[i] & Flags.Infinite)):
for _ in Filter(self.sim, ASTTerm.not_op(particle_flags[i] & (Flags.Infinite | Flags.Global))):
cell_index = [
Cast.int(self.sim,
(positions[i][dim] - (dom_part.min(dim) - spacing[dim])) / spacing[dim]) \
......
from pairs.ir.assign import Assign
from pairs.ir.block import pairs_inline
from pairs.ir.functions import Call_Int, Call_Void
from pairs.ir.particle_attributes import ParticleAttributeList
from pairs.ir.types import Types
from pairs.sim.grid import Grid3D
from pairs.sim.lowerable import Lowerable
class CopperFCCLattice(Lowerable):
def __init__(self, sim, nx, ny, nz, rho, temperature, ntypes):
super().__init__(sim)
self._nx = nx
self._ny = ny
self._nz = nz
self._rho = rho
self._temperature = temperature
self._ntypes = ntypes
lattice = pow((4.0 / rho), (1.0 / 3.0))
self._xprd = nx * lattice
self._yprd = ny * lattice
self._zprd = nz * lattice
sim.set_domain([0.0, 0.0, 0.0, self._xprd, self._yprd, self._zprd])
@pairs_inline
def lower(self):
Assign(self.sim, self.sim.nlocal,
Call_Int(self.sim, "pairs::copper_fcc_lattice",
[self._nx, self._ny, self._nz,
self._xprd, self._yprd, self._zprd,
self._rho, self._ntypes]))
Call_Void(self.sim, "pairs::adjust_thermo",
[self.sim.nlocal, self._xprd, self._yprd, self._zprd, self._temperature])
from pairs.ir.assign import Assign
from pairs.ir.block import pairs_inline
from pairs.ir.functions import Call_Int
from pairs.sim.lowerable import Lowerable
class DEMSCGrid(Lowerable):
def __init__(
self, sim, xmax, ymax, zmax, spacing, diameter, min_diameter, max_diameter, initial_velocity, particle_density, ntypes):
super().__init__(sim)
self._xmax = xmax
self._ymax = ymax
self._zmax = zmax
self._spacing = spacing
self._diameter = diameter
self._min_diameter = min_diameter
self._max_diameter = max_diameter
self._initial_velocity = initial_velocity
self._particle_density = particle_density
self._ntypes = ntypes
#sim.set_domain([0.0, 0.0, 0.0, self._xprd, self._yprd, self._zprd])
@pairs_inline
def lower(self):
Assign(self.sim, self.sim.nlocal,
Call_Int(self.sim, "pairs::dem_sc_grid",
[self._xmax, self._ymax, self._zmax, self._spacing, self._diameter, self._min_diameter, self._max_diameter,
self._initial_velocity, self._particle_density, self._ntypes]))
from pairs.ir.block import pairs_inline
from pairs.sim.lowerable import Lowerable
class InitializeDomain(Lowerable):
def __init__(self, sim):
super().__init__(sim)
@pairs_inline
def lower(self):
self.sim.domain_partitioning().initialize()
class UpdateDomain(Lowerable):
def __init__(self, sim):
super().__init__(sim)
......
......@@ -11,8 +11,7 @@ from pairs.sim.grid import MutableGrid
from pairs.ir.device import CopyArray
from pairs.ir.contexts import Contexts
from pairs.ir.actions import Actions
from pairs.sim.load_balancing_algorithms import LoadBalancingAlgorithms
from pairs.ir.print import PrintCode
class DimensionRanges:
def __init__(self, sim):
self.sim = sim
......@@ -45,10 +44,6 @@ class DimensionRanges:
def reduce_sum_step_indexes(self, step, array):
return sum([array[i] for i in self.step_indexes(step)])
def initialize(self):
grid_array = [self.sim.grid.min(d) for d in range(self.sim.ndims())] + [self.sim.grid.max(d) for d in range(self.sim.ndims())]
Call_Void(self.sim, "pairs_runtime->initDomain", grid_array)
def update(self):
Call_Void(self.sim, "pairs_runtime->updateDomain", [])
Assign(self.sim, self.rank, Call_Int(self.sim, "pairs_runtime->getDomainPartitioner()->getRank", []))
......@@ -140,48 +135,33 @@ class BlockForest:
return self.reduce_step
def initialize(self):
grid_array = [self.sim.grid.min(d) for d in range(self.sim.ndims())] + [self.sim.grid.max(d) for d in range(self.sim.ndims())]
Call_Void(self.sim, "pairs_runtime->initDomain",
grid_array + self.sim._pbc + ([True] if self.load_balancer is not None else []))
if self.load_balancer is not None:
PrintCode(self.sim, "pairs_runtime->getDomainPartitioner()->initWorkloadBalancer"
f"({LoadBalancingAlgorithms.c_keyword(self.load_balancer)}, {self.regrid_min}, {self.regrid_max});")
# Call_Void(self.sim, "pairs_runtime->getDomainPartitioner()->initWorkloadBalancer",
# [self.load_balancer, self.regrid_min, self.regrid_max])
def update(self):
Call_Void(self.sim, "pairs_runtime->updateDomain", [])
Assign(self.sim, self.rank, Call_Int(self.sim, "pairs_runtime->getDomainPartitioner()->getRank", []))
Assign(self.sim, self.nranks, Call_Int(self.sim, "pairs_runtime->getNumberOfNeighborRanks", []))
Assign(self.sim, self.ntotal_aabbs, Call_Int(self.sim, "pairs_runtime->getNumberOfNeighborAABBs", []))
for _ in Filter(self.sim, ScalarOp.neq(self.nranks, 0)):
Assign(self.sim, self.ntotal_aabbs, Call_Int(self.sim, "pairs_runtime->getNumberOfNeighborAABBs", []))
for _ in Filter(self.sim, self.nranks_capacity < self.nranks):
Assign(self.sim, self.nranks_capacity, self.nranks + 10)
self.ranks.realloc()
self.naabbs.realloc()
self.aabb_offsets.realloc()
for _ in Filter(self.sim, self.nranks_capacity < self.nranks):
Assign(self.sim, self.nranks_capacity, self.nranks + 10)
self.ranks.realloc()
self.naabbs.realloc()
self.aabb_offsets.realloc()
for _ in Filter(self.sim, self.aabb_capacity < self.ntotal_aabbs):
Assign(self.sim, self.aabb_capacity, self.ntotal_aabbs + 20)
self.aabbs.realloc()
CopyArray(self.sim, self.ranks, Contexts.Host, Actions.WriteOnly, self.nranks)
CopyArray(self.sim, self.naabbs, Contexts.Host, Actions.WriteOnly, self.nranks)
CopyArray(self.sim, self.aabb_offsets, Contexts.Host, Actions.WriteOnly, self.nranks)
CopyArray(self.sim, self.aabbs, Contexts.Host, Actions.WriteOnly, self.ntotal_aabbs * 6)
CopyArray(self.sim, self.subdom, Contexts.Host, Actions.WriteOnly)
Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['ranks', self.ranks, self.nranks])
Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['naabbs', self.naabbs, self.nranks])
Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['aabb_offsets', self.aabb_offsets, self.nranks])
Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['aabbs', self.aabbs, self.ntotal_aabbs * 6])
Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['subdom', self.subdom, self.sim.ndims() * 2])
for _ in Filter(self.sim, self.aabb_capacity < self.ntotal_aabbs):
Assign(self.sim, self.aabb_capacity, self.ntotal_aabbs + 20)
self.aabbs.realloc()
CopyArray(self.sim, self.ranks, Contexts.Host, Actions.WriteOnly, self.nranks)
CopyArray(self.sim, self.naabbs, Contexts.Host, Actions.WriteOnly, self.nranks)
CopyArray(self.sim, self.aabb_offsets, Contexts.Host, Actions.WriteOnly, self.nranks)
CopyArray(self.sim, self.aabbs, Contexts.Host, Actions.WriteOnly, self.ntotal_aabbs * 6)
CopyArray(self.sim, self.subdom, Contexts.Host, Actions.WriteOnly)
Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['ranks', self.ranks, self.nranks])
Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['naabbs', self.naabbs, self.nranks])
Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['aabb_offsets', self.aabb_offsets, self.nranks])
Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['aabbs', self.aabbs, self.ntotal_aabbs * 6])
Call_Void(self.sim, "pairs_runtime->copyRuntimeArray", ['subdom', self.subdom, self.sim.ndims() * 2])
if isinstance(self.sim.grid, MutableGrid):
for d in range(self.sim.dims):
......@@ -229,7 +209,7 @@ class BlockForest:
for _ in Filter(self.sim, full_cond):
yield i, r, self.ranks[r], pbc_shifts
for _ in Filter(self.sim, ScalarOp.cmp(self.ranks[r] , self.rank)): # if my neighbor is me (cuz I'm the only rank in a dimension that has pbc)
for _ in Filter(self.sim, ScalarOp.cmp(self.ranks[r] , self.rank)): # if my neighbor is me
pbc_shifts = []
isghost = Lit(self.sim, 0)
......
from pairs.ir.assign import Assign
from pairs.ir.scalars import ScalarOp
from pairs.ir.block import pairs_inline, pairs_device_block, pairs_host_block
from pairs.ir.branches import Filter
from pairs.ir.loops import For, ParticleFor
from pairs.ir.types import Types
from pairs.ir.device import CopyArray
from pairs.ir.contexts import Contexts
from pairs.ir.actions import Actions
from pairs.ir.sizeof import Sizeof
from pairs.ir.functions import Call_Void
from pairs.ir.cast import Cast
from pairs.sim.flags import Flags
from pairs.sim.lowerable import Lowerable
from pairs.sim.interaction import ParticleInteraction
class GlobalLocalInteraction(ParticleInteraction):
def __init__(self, sim, module_name, nbody, cutoff_radius=None, use_cell_lists=False):
super().__init__(sim, module_name, nbody, cutoff_radius, use_cell_lists)
@pairs_device_block
def lower(self):
self.sim.module_name(f"{self.module_name}_global_local_interactions")
if self.sim._target.is_gpu():
first_cell_bytes = self.sim.add_temp_var(0)
Assign(self.sim, first_cell_bytes, self.cell_lists.cell_capacity * Sizeof(self.sim, Types.Int32))
CopyArray(self.sim, self.cell_lists.cell_sizes, Contexts.Host, Actions.ReadOnly, first_cell_bytes)
for ishape in range(self.maxs):
if self.include_shape(ishape):
# Loop over the global cell
for p in For(self.sim, 0, self.cell_lists.cell_sizes[0]):
i = self.cell_lists.cell_particles[0][p]
# TODO: Skip if the bounding box of the global body doesn't intersect the subdom of this rank
for _ in Filter(self.sim, ScalarOp.and_op(
ScalarOp.cmp(self.sim.particle_shape[i], self.sim.get_shape_id(ishape)),
self.sim.particle_flags[i] & (Flags.Infinite | Flags.Global))):
for jshape in range(self.maxs):
if self.include_interaction(ishape, jshape):
# Globals are presenet in all ranks so they should not interact with ghosts
# TODO: Make this loop the kernel candiate and reduce forces on the global body
for j in ParticleFor(self.sim):
# Here we make make sure not to interact with other global bodies, otherwise
# their contributions will get reduced again over all ranks
for _ in Filter(self.sim, ScalarOp.and_op(
ScalarOp.cmp(self.sim.particle_shape[j], self.sim.get_shape_id(jshape)),
ScalarOp.not_op(self.sim.particle_flags[j] & (Flags.Infinite | Flags.Global)))):
for _ in Filter(self.sim, ScalarOp.neq(i, j)):
self.compute_interaction(i, j, ishape, jshape)
class GlobalGlobalInteraction(ParticleInteraction):
def __init__(self, sim, module_name, nbody, cutoff_radius=None, use_cell_lists=False):
super().__init__(sim, module_name, nbody, cutoff_radius, use_cell_lists)
@pairs_device_block
def lower(self):
self.sim.module_name(f"{self.module_name}_global_global_interactions")
if self.sim._target.is_gpu():
first_cell_bytes = self.sim.add_temp_var(0)
Assign(self.sim, first_cell_bytes, self.cell_lists.cell_capacity * Sizeof(self.sim, Types.Int32))
CopyArray(self.sim, self.cell_lists.cell_sizes, Contexts.Host, Actions.ReadOnly, first_cell_bytes)
for ishape in range(self.maxs):
if self.include_shape(ishape):
# Loop over the global cell
for p in For(self.sim, 0, self.cell_lists.cell_sizes[0]):
i = self.cell_lists.cell_particles[0][p]
for _ in Filter(self.sim, ScalarOp.and_op(
ScalarOp.cmp(self.sim.particle_shape[i], self.sim.get_shape_id(ishape)),
self.sim.particle_flags[i] & (Flags.Infinite | Flags.Global))):
for jshape in range(self.maxs):
if self.include_interaction(ishape, jshape):
# Loop over the global cell
for q in For(self.sim, 0, self.cell_lists.cell_sizes[0]):
j = self.cell_lists.cell_particles[0][q]
# Here we only compute interactions with other global bodies
for _ in Filter(self.sim, ScalarOp.and_op(
ScalarOp.cmp(self.sim.particle_shape[j], self.sim.get_shape_id(jshape)),
(self.sim.particle_flags[j] & (Flags.Infinite | Flags.Global)))):
for _ in Filter(self.sim, ScalarOp.neq(i, j)):
self.compute_interaction(i, j, ishape, jshape)
class GlobalReduction:
def __init__(self, sim, module_name, particle_interaction):
self.sim = sim
self.module_name = module_name
self.particle_interaction = particle_interaction
self.nglobal_red = sim.add_var('nglobal_red', Types.Int32) # Number of global particles that need reduction
self.nglobal_capacity = sim.add_var('nglobal_capacity', Types.Int32, 64)
self.global_elem_capacity = sim.add_var('global_elem_capacity', Types.Int32, 100)
self.red_buffer = sim.add_array('red_buffer', [self.nglobal_capacity, self.global_elem_capacity], Types.Real, arr_sync=False)
self.intermediate_buffer = sim.add_array('intermediate_buffer', [self.nglobal_capacity, self.global_elem_capacity], Types.Real, arr_sync=False)
self.sorted_idx = sim.add_array('sorted_idx', [self.nglobal_capacity], Types.Int32, arr_sync=False)
self.unsorted_idx = sim.add_array('unsorted_idx', [self.nglobal_capacity], Types.Int32, arr_sync=False)
self.removed_idx = sim.add_array('removed_idx', [self.nglobal_capacity], Types.Boolean, arr_sync=False)
self.red_props = set()
for ishape in range(self.sim.max_shapes()):
for jshape in range(self.sim.max_shapes()):
if self.particle_interaction.include_interaction(ishape, jshape):
for app in self.particle_interaction.apply_list[ishape*self.sim.max_shapes() + jshape]:
self.red_props.add(app.prop())
def global_particles(self):
for p in For(self.sim, 0, self.sim.cell_lists.cell_sizes[0]):
i = self.sim.cell_lists.cell_particles[0][p]
for ishape in range(self.sim.max_shapes()):
if self.particle_interaction.include_shape(ishape):
for _ in Filter(self.sim, ScalarOp.and_op(
ScalarOp.cmp(self.sim.particle_shape[i], self.sim.get_shape_id(ishape)),
self.sim.particle_flags[i] & (Flags.Infinite | Flags.Global))):
yield i
def get_elems_per_particle(self):
return sum([Types.number_of_elements(self.sim, p.type()) for p in self.red_props])
class SortGlobals(Lowerable):
def __init__(self, global_reduction):
super().__init__(global_reduction.sim)
self.global_reduction = global_reduction
self.sim.add_statement(self)
@pairs_host_block
def lower(self):
self.sim.module_name(f"{self.global_reduction.module_name}_sort_globals")
nglobal_capacity = self.global_reduction.nglobal_capacity
nglobal_red = self.global_reduction.nglobal_red
unsorted_idx = self.global_reduction.unsorted_idx
sorted_idx = self.global_reduction.sorted_idx
removed_idx = self.global_reduction.removed_idx
uid = self.sim.particle_uid
self.sim.check_resize(nglobal_capacity, nglobal_red)
Assign(self.sim, nglobal_red, 0)
for i in self.global_reduction.global_particles():
Assign(self.sim, unsorted_idx[nglobal_red], i)
Assign(self.sim, sorted_idx[nglobal_red], 0)
Assign(self.sim, removed_idx[nglobal_red], 0)
Assign(self.sim, nglobal_red, nglobal_red +1)
min_uid = self.sim.add_temp_var(0, Types.UInt64)
min_idx = self.sim.add_temp_var(0)
# Here we sort indices of global bodies with respect to their uid's.
# The sorted uid's will be in identical order on all ranks. This ensures that the
# reduced properties are mapped correctly to each global body during inplace reduction.
for i in For(self.sim, 0, nglobal_red):
Assign(self.sim, min_uid, -1) # TODO: Lit max: UINT64_MAX
Assign(self.sim, min_idx, 0)
for j in For(self.sim, 0, nglobal_red):
for _ in Filter(self.sim, ScalarOp.and_op(uid[unsorted_idx[j]] < min_uid,
ScalarOp.not_op(removed_idx[j]))):
Assign(self.sim, min_uid, uid[unsorted_idx[j]])
Assign(self.sim, min_idx, j)
Assign(self.sim, sorted_idx[i], unsorted_idx[min_idx])
Assign(self.sim, removed_idx[min_idx], 1)
class PackGlobals(Lowerable):
def __init__(self, global_reduction, save_state=True):
super().__init__(global_reduction.sim)
self.global_reduction = global_reduction
self.save_state = save_state
self.buffer = global_reduction.intermediate_buffer if save_state else global_reduction.red_buffer
self.sim.add_statement(self)
@pairs_device_block
def lower(self):
self.sim.module_name(f"{self.global_reduction.module_name}_pack_globals_{'intermediate' if self.save_state else 'reduce'}")
nglobal_red = self.global_reduction.nglobal_red
sorted_idx = self.global_reduction.sorted_idx
nelems_per_particle = self.global_reduction.get_elems_per_particle()
self.buffer.set_stride(1, nelems_per_particle)
for buffer_idx in For(self.sim, 0, nglobal_red):
i = sorted_idx[buffer_idx]
p_offset = 0
for p in self.global_reduction.red_props:
if not Types.is_scalar(p.type()):
nelems = Types.number_of_elements(self.sim, p.type())
for e in range(nelems):
Assign(self.sim, self.buffer[buffer_idx][p_offset + e], p[i][e])
p_offset += nelems
else:
cast_fn = lambda x: Cast(self.sim, x, Types.Real) if p.type() != Types.Real else x
Assign(self.sim, self.buffer[buffer_idx][p_offset], cast_fn(p[i]))
p_offset += 1
class ResetReductionProps(Lowerable):
def __init__(self, global_reduction):
super().__init__(global_reduction.sim)
self.global_reduction = global_reduction
self.sim.add_statement(self)
@pairs_device_block
def lower(self):
self.sim.module_name(f"{self.global_reduction.module_name}_reset_globals")
nglobal_red = self.global_reduction.nglobal_red
sorted_idx = self.global_reduction.sorted_idx
for buffer_idx in For(self.sim, 0, nglobal_red):
i = sorted_idx[buffer_idx]
for p in self.global_reduction.red_props:
Assign(self.sim, p[i], 0.0)
class ReduceGlobals(Lowerable):
def __init__(self, global_reduction):
super().__init__(global_reduction.sim)
self.global_reduction = global_reduction
self.sim.add_statement(self)
@pairs_inline
def lower(self):
nelems_total = self.global_reduction.nglobal_red * self.global_reduction.get_elems_per_particle()
Call_Void( self.sim, "pairs_runtime->allReduceInplaceSum", [self.global_reduction.red_buffer, nelems_total])
class UnpackGlobals(Lowerable):
def __init__(self, global_reduction):
super().__init__(global_reduction.sim)
self.global_reduction = global_reduction
self.sim.add_statement(self)
@pairs_device_block
def lower(self):
self.sim.module_name(f"{self.global_reduction.module_name}_unpack_globals")
nglobal_red = self.global_reduction.nglobal_red
sorted_idx = self.global_reduction.sorted_idx
red_buffer = self.global_reduction.red_buffer
intermediate_buffer = self.global_reduction.intermediate_buffer
for buffer_idx in For(self.sim, 0, nglobal_red):
i = sorted_idx[buffer_idx]
p_offset = 0
for p in self.global_reduction.red_props:
if not Types.is_scalar(p.type()):
nelems = Types.number_of_elements(self.sim, p.type())
for e in range(nelems):
Assign(self.sim, p[i][e], red_buffer[buffer_idx][p_offset + e] + intermediate_buffer[buffer_idx][p_offset + e])
p_offset += nelems
else:
cast_fn = lambda x: Cast(self.sim, x, p.type()) if p.type() != Types.Real else x
Assign(self.sim, p[i], cast_fn(red_buffer[buffer_idx][p_offset] + intermediate_buffer[buffer_idx][p_offset + e]))
p_offset += 1
\ No newline at end of file