From 84b762f595809f48d35322a228df7743dd848a94 Mon Sep 17 00:00:00 2001 From: Rafael Ravedutti <rafaelravedutti@gmail.com> Date: Thu, 30 Nov 2023 03:57:52 +0100 Subject: [PATCH] Run determine ghost/exchange particles kernels in GPU Signed-off-by: Rafael Ravedutti <rafaelravedutti@gmail.com> --- src/pairs/analysis/blocks.py | 10 ++++++++ src/pairs/analysis/devices.py | 10 ++++++++ src/pairs/analysis/modules.py | 14 ++++++++++++ src/pairs/code_gen/cgen.py | 14 +++++++++++- src/pairs/ir/atomic.py | 30 ++++++++++++++++++++++-- src/pairs/ir/mutator.py | 10 ++++++++ src/pairs/sim/comm.py | 34 ++++++++++++++++++++-------- src/pairs/transformations/modules.py | 18 +++++++++++++++ 8 files changed, 127 insertions(+), 13 deletions(-) diff --git a/src/pairs/analysis/blocks.py b/src/pairs/analysis/blocks.py index 0d2b061..a3ef7a8 100644 --- a/src/pairs/analysis/blocks.py +++ b/src/pairs/analysis/blocks.py @@ -34,6 +34,16 @@ class DiscoverBlockVariants(Visitor): self.visit(ast_node.resize) self.visit(ast_node.capacity) + def visit_AtomicInc(self, ast_node): + self.in_assignment = ast_node + self.visit(ast_node.elem) + self.in_assignment = None + self.visit(ast_node.value) + + if ast_node.check_for_resize(): + self.visit(ast_node.resize) + self.visit(ast_node.capacity) + def visit_For(self, ast_node): self.push_variant(ast_node.iterator) ast_node.block.add_variant(ast_node.iterator.name()) diff --git a/src/pairs/analysis/devices.py b/src/pairs/analysis/devices.py index 07f3a17..996a99e 100644 --- a/src/pairs/analysis/devices.py +++ b/src/pairs/analysis/devices.py @@ -64,6 +64,16 @@ class FetchKernelReferences(Visitor): self.visit(ast_node.resize) self.visit(ast_node.capacity) + def visit_AtomicInc(self, ast_node): + self.writing = True + self.visit(ast_node.elem) + self.writing = False + self.visit(ast_node.value) + + if ast_node.resize is not None: + self.visit(ast_node.resize) + self.visit(ast_node.capacity) + def visit_Kernel(self, ast_node): kernel_id = ast_node.kernel_id self.kernel_decls[kernel_id] = [] diff --git a/src/pairs/analysis/modules.py b/src/pairs/analysis/modules.py index 5b97619..4cf0bf8 100644 --- a/src/pairs/analysis/modules.py +++ b/src/pairs/analysis/modules.py @@ -38,6 +38,20 @@ class FetchModulesReferences(Visitor): self.visit(ast_node.resize) self.visit(ast_node.capacity) + def visit_AtomicInc(self, ast_node): + self.writing = True + self.visit(ast_node.elem) + self.writing = False + self.visit(ast_node.value) + + for m in self.module_stack: + if m.run_on_device: + ast_node.device_flag = True + + if ast_node.resize is not None: + self.visit(ast_node.resize) + self.visit(ast_node.capacity) + def visit_Module(self, ast_node): self.module_stack.append(ast_node) self.visit_children(ast_node) diff --git a/src/pairs/code_gen/cgen.py b/src/pairs/code_gen/cgen.py index b8dcd29..80716a8 100644 --- a/src/pairs/code_gen/cgen.py +++ b/src/pairs/code_gen/cgen.py @@ -1,7 +1,7 @@ import math from pairs.ir.actions import Actions from pairs.ir.assign import Assign -from pairs.ir.atomic import AtomicAdd +from pairs.ir.atomic import AtomicAdd, AtomicInc from pairs.ir.arrays import Array, ArrayAccess, DeclareStaticArray, RegisterArray, ReallocArray from pairs.ir.block import Block from pairs.ir.branches import Branch @@ -269,6 +269,18 @@ class CGen: src = self.generate_expression(ast_node._src) self.print(f"{dest} = {src};") + if isinstance(ast_node, AtomicInc): + elem = self.generate_expression(ast_node.elem, mem=True) + value = self.generate_expression(ast_node.value) + prefix = "" if ast_node.device_flag else "host_" + + if ast_node.check_for_resize(): + resize = self.generate_expression(ast_node.resize) + capacity = self.generate_expression(ast_node.capacity) + self.print(f"pairs::{prefix}atomic_add_resize_check(&({elem}), {value}, &({resize}), {capacity});") + else: + self.print(f"pairs::{prefix}atomic_add(&({elem}), {value});") + if isinstance(ast_node, Block): self.print.add_indent(4) for stmt in ast_node.statements(): diff --git a/src/pairs/ir/atomic.py b/src/pairs/ir/atomic.py index 88c54b6..8937db6 100644 --- a/src/pairs/ir/atomic.py +++ b/src/pairs/ir/atomic.py @@ -1,3 +1,4 @@ +from pairs.ir.ast_node import ASTNode from pairs.ir.ast_term import ASTTerm from pairs.ir.scalars import ScalarOp from pairs.ir.lit import Lit @@ -39,5 +40,30 @@ class AtomicAdd(ASTTerm): return self.elem.type() def children(self): - return [self.elem, self.value] + \ - ([self.resize, self.capacity] if self.resize is not None else []) + return [self.elem, self.value] + \ + ([self.resize, self.capacity] if self.resize is not None else []) + + +class AtomicInc(ASTNode): + def __init__(self, sim, elem, value): + super().__init__(sim) + self.elem = ScalarOp.inline(elem) + self.value = Lit.cvt(sim, value) + self.resize = None + self.capacity = None + self.device_flag = False + sim.add_statement(self) + + def __str__(self): + return f"AtomicInc<{self.elem, self.value}>" + + def add_resize_check(self, resize, capacity): + self.resize = ScalarOp.inline(resize) + self.capacity = capacity + + def check_for_resize(self): + return self.resize is not None + + def children(self): + return [self.elem, self.value] + \ + ([self.resize, self.capacity] if self.resize is not None else []) diff --git a/src/pairs/ir/mutator.py b/src/pairs/ir/mutator.py index 42a5564..bbe06f9 100644 --- a/src/pairs/ir/mutator.py +++ b/src/pairs/ir/mutator.py @@ -79,6 +79,16 @@ class Mutator: return ast_node + def mutate_AtomicInc(self, ast_node): + ast_node.elem = self.mutate(ast_node.elem) + ast_node.value = self.mutate(ast_node.value) + + if ast_node.check_for_resize(): + ast_node.resize = self.mutate(ast_node.resize) + ast_node.capacity = self.mutate(ast_node.capacity) + + return ast_node + def mutate_Block(self, ast_node): ast_node.stmts = [self.mutate(s) for s in ast_node.stmts] return ast_node diff --git a/src/pairs/sim/comm.py b/src/pairs/sim/comm.py index 5152eef..83db6e0 100644 --- a/src/pairs/sim/comm.py +++ b/src/pairs/sim/comm.py @@ -1,9 +1,12 @@ +from pairs.ir.actions import Actions from pairs.ir.assign import Assign -from pairs.ir.atomic import AtomicAdd +from pairs.ir.atomic import AtomicAdd, AtomicInc from pairs.ir.scalars import ScalarOp from pairs.ir.block import pairs_device_block, pairs_host_block, pairs_inline from pairs.ir.branches import Branch, Filter from pairs.ir.cast import Cast +from pairs.ir.contexts import Contexts +from pairs.ir.device import CopyArray from pairs.ir.functions import Call_Void from pairs.ir.loops import For, ParticleFor, While from pairs.ir.utils import Print @@ -54,6 +57,18 @@ class Comm: Assign(self.sim, self.sim.nghost, 0) for step in range(self.dom_part.number_of_steps()): + if self.sim._target.is_gpu(): + CopyArray(self.sim, self.nsend, Contexts.Host, Actions.Ignore) + CopyArray(self.sim, self.nrecv, Contexts.Host, Actions.Ignore) + + for j in self.dom_part.step_indexes(step): + Assign(self.sim, self.nsend[j], 0) + Assign(self.sim, self.nrecv[j], 0) + + if self.sim._target.is_gpu(): + CopyArray(self.sim, self.nsend, Contexts.Device, Actions.Ignore) + CopyArray(self.sim, self.nrecv, Contexts.Device, Actions.Ignore) + DetermineGhostParticles(self, step, self.sim.cell_spacing()) CommunicateSizes(self, step) SetCommunicationOffsets(self, step) @@ -70,13 +85,17 @@ class Comm: Assign(self.sim, self.nsend_all, 0) Assign(self.sim, self.sim.nghost, 0) - for s in range(step): + for s in range(step + 1): for j in self.dom_part.step_indexes(s): Assign(self.sim, self.nsend[j], 0) Assign(self.sim, self.nrecv[j], 0) Assign(self.sim, self.send_offsets[j], 0) Assign(self.sim, self.recv_offsets[j], 0) + if self.sim._target.is_gpu(): + CopyArray(self.sim, self.nsend, Contexts.Device, Actions.Ignore) + CopyArray(self.sim, self.nrecv, Contexts.Device, Actions.Ignore) + DetermineGhostParticles(self, step, 0.0) CommunicateSizes(self, step) SetCommunicationOffsets(self, step) @@ -124,12 +143,10 @@ class DetermineGhostParticles(Lowerable): self.spacing = spacing self.sim.add_statement(self) - #@pairs_device_block - @pairs_host_block + @pairs_device_block def lower(self): nsend_all = self.comm.nsend_all nsend = self.comm.nsend - nrecv = self.comm.nrecv send_map = self.comm.send_map send_mult = self.comm.send_mult exchg_flag = self.comm.exchg_flag @@ -139,10 +156,6 @@ class DetermineGhostParticles(Lowerable): self.sim.check_resize(self.comm.send_capacity, nsend) #self.sim.check_resize(self.comm.send_capacity, nsend_all) - for j in self.comm.dom_part.step_indexes(self.step): - Assign(self.sim, nsend[j], 0) - Assign(self.sim, nrecv[j], 0) - if is_exchange: for i in ParticleFor(self.sim): Assign(self.sim, exchg_flag[i], 0) @@ -157,7 +170,8 @@ class DetermineGhostParticles(Lowerable): for d in range(self.sim.ndims()): Assign(self.sim, send_mult[next_idx][d], pbc[d]) - Assign(self.sim, nsend[j], nsend[j] + 1) + #Assign(self.sim, nsend[j], nsend[j] + 1) + AtomicInc(self.sim, nsend[j], 1) class SetCommunicationOffsets(Lowerable): diff --git a/src/pairs/transformations/modules.py b/src/pairs/transformations/modules.py index 2024905..1ee2c9b 100644 --- a/src/pairs/transformations/modules.py +++ b/src/pairs/transformations/modules.py @@ -107,6 +107,24 @@ class AddResizeLogic(Mutator): return ast_node + def mutate_AtomicInc(self, ast_node): + dest = ast_node.elem + src = dest + ast_node.value + match_capacity = None + + if isinstance(dest, (ArrayAccess, Var)): + match_capacity = self.lookup_capacity([dest]) + + # Resize var is used in index, this statement should be checked for safety + if match_capacity is not None: + module = self.module_stack[-1] + resizes = list(self.module_resizes[module].keys()) + capacities = list(self.module_resizes[module].values()) + resize_id = resizes[capacities.index(match_capacity)] + ast_node.add_resize_check(ast_node.sim.resizes[resize_id], match_capacity) + + return ast_node + def mutate_Block(self, ast_node): self.block_stack.append(ast_node) ast_node.stmts = [self.mutate(s) for s in ast_node.stmts] -- GitLab