diff --git a/src/pairs/analysis/blocks.py b/src/pairs/analysis/blocks.py index 0d2b06157b87a54330fab9894814ecc5226c14ac..a3ef7a871c62b03cd914abcff0a87282967640ae 100644 --- a/src/pairs/analysis/blocks.py +++ b/src/pairs/analysis/blocks.py @@ -34,6 +34,16 @@ class DiscoverBlockVariants(Visitor): self.visit(ast_node.resize) self.visit(ast_node.capacity) + def visit_AtomicInc(self, ast_node): + self.in_assignment = ast_node + self.visit(ast_node.elem) + self.in_assignment = None + self.visit(ast_node.value) + + if ast_node.check_for_resize(): + self.visit(ast_node.resize) + self.visit(ast_node.capacity) + def visit_For(self, ast_node): self.push_variant(ast_node.iterator) ast_node.block.add_variant(ast_node.iterator.name()) diff --git a/src/pairs/analysis/devices.py b/src/pairs/analysis/devices.py index 07f3a173dee0cafe2011be467236931fc2ba6687..996a99ef93b4f499b9555af266573b999f4a1f56 100644 --- a/src/pairs/analysis/devices.py +++ b/src/pairs/analysis/devices.py @@ -64,6 +64,16 @@ class FetchKernelReferences(Visitor): self.visit(ast_node.resize) self.visit(ast_node.capacity) + def visit_AtomicInc(self, ast_node): + self.writing = True + self.visit(ast_node.elem) + self.writing = False + self.visit(ast_node.value) + + if ast_node.resize is not None: + self.visit(ast_node.resize) + self.visit(ast_node.capacity) + def visit_Kernel(self, ast_node): kernel_id = ast_node.kernel_id self.kernel_decls[kernel_id] = [] diff --git a/src/pairs/analysis/modules.py b/src/pairs/analysis/modules.py index 5b976196924b26f477499d82e2d7fcb6590dcae6..4cf0bf8002af97ad66ff2057301235c7756ab5ad 100644 --- a/src/pairs/analysis/modules.py +++ b/src/pairs/analysis/modules.py @@ -38,6 +38,20 @@ class FetchModulesReferences(Visitor): self.visit(ast_node.resize) self.visit(ast_node.capacity) + def visit_AtomicInc(self, ast_node): + self.writing = True + self.visit(ast_node.elem) + self.writing = False + self.visit(ast_node.value) + + for m in self.module_stack: + if m.run_on_device: + ast_node.device_flag = True + + if ast_node.resize is not None: + self.visit(ast_node.resize) + self.visit(ast_node.capacity) + def visit_Module(self, ast_node): self.module_stack.append(ast_node) self.visit_children(ast_node) diff --git a/src/pairs/code_gen/cgen.py b/src/pairs/code_gen/cgen.py index b8dcd293708b0b7dcd70cb2c2bc034db768e50c5..80716a8383a26aaebc8ee58e769b8c7adc1be2f9 100644 --- a/src/pairs/code_gen/cgen.py +++ b/src/pairs/code_gen/cgen.py @@ -1,7 +1,7 @@ import math from pairs.ir.actions import Actions from pairs.ir.assign import Assign -from pairs.ir.atomic import AtomicAdd +from pairs.ir.atomic import AtomicAdd, AtomicInc from pairs.ir.arrays import Array, ArrayAccess, DeclareStaticArray, RegisterArray, ReallocArray from pairs.ir.block import Block from pairs.ir.branches import Branch @@ -269,6 +269,18 @@ class CGen: src = self.generate_expression(ast_node._src) self.print(f"{dest} = {src};") + if isinstance(ast_node, AtomicInc): + elem = self.generate_expression(ast_node.elem, mem=True) + value = self.generate_expression(ast_node.value) + prefix = "" if ast_node.device_flag else "host_" + + if ast_node.check_for_resize(): + resize = self.generate_expression(ast_node.resize) + capacity = self.generate_expression(ast_node.capacity) + self.print(f"pairs::{prefix}atomic_add_resize_check(&({elem}), {value}, &({resize}), {capacity});") + else: + self.print(f"pairs::{prefix}atomic_add(&({elem}), {value});") + if isinstance(ast_node, Block): self.print.add_indent(4) for stmt in ast_node.statements(): diff --git a/src/pairs/ir/atomic.py b/src/pairs/ir/atomic.py index 88c54b6180f3fce3d24aa0a2678c546667cf377f..8937db6473ac2034f23f809b314a3f96199127c9 100644 --- a/src/pairs/ir/atomic.py +++ b/src/pairs/ir/atomic.py @@ -1,3 +1,4 @@ +from pairs.ir.ast_node import ASTNode from pairs.ir.ast_term import ASTTerm from pairs.ir.scalars import ScalarOp from pairs.ir.lit import Lit @@ -39,5 +40,30 @@ class AtomicAdd(ASTTerm): return self.elem.type() def children(self): - return [self.elem, self.value] + \ - ([self.resize, self.capacity] if self.resize is not None else []) + return [self.elem, self.value] + \ + ([self.resize, self.capacity] if self.resize is not None else []) + + +class AtomicInc(ASTNode): + def __init__(self, sim, elem, value): + super().__init__(sim) + self.elem = ScalarOp.inline(elem) + self.value = Lit.cvt(sim, value) + self.resize = None + self.capacity = None + self.device_flag = False + sim.add_statement(self) + + def __str__(self): + return f"AtomicInc<{self.elem, self.value}>" + + def add_resize_check(self, resize, capacity): + self.resize = ScalarOp.inline(resize) + self.capacity = capacity + + def check_for_resize(self): + return self.resize is not None + + def children(self): + return [self.elem, self.value] + \ + ([self.resize, self.capacity] if self.resize is not None else []) diff --git a/src/pairs/ir/mutator.py b/src/pairs/ir/mutator.py index 42a55640f8dbc8979eb1f42dd198e82938d63567..bbe06f9ce371d818111cf597ab36e99f65fe5137 100644 --- a/src/pairs/ir/mutator.py +++ b/src/pairs/ir/mutator.py @@ -79,6 +79,16 @@ class Mutator: return ast_node + def mutate_AtomicInc(self, ast_node): + ast_node.elem = self.mutate(ast_node.elem) + ast_node.value = self.mutate(ast_node.value) + + if ast_node.check_for_resize(): + ast_node.resize = self.mutate(ast_node.resize) + ast_node.capacity = self.mutate(ast_node.capacity) + + return ast_node + def mutate_Block(self, ast_node): ast_node.stmts = [self.mutate(s) for s in ast_node.stmts] return ast_node diff --git a/src/pairs/sim/comm.py b/src/pairs/sim/comm.py index 5152eef50829cc685ef4d297742022b0ff92465e..83db6e06db2717bdaf545a02b252ba729a6edc11 100644 --- a/src/pairs/sim/comm.py +++ b/src/pairs/sim/comm.py @@ -1,9 +1,12 @@ +from pairs.ir.actions import Actions from pairs.ir.assign import Assign -from pairs.ir.atomic import AtomicAdd +from pairs.ir.atomic import AtomicAdd, AtomicInc from pairs.ir.scalars import ScalarOp from pairs.ir.block import pairs_device_block, pairs_host_block, pairs_inline from pairs.ir.branches import Branch, Filter from pairs.ir.cast import Cast +from pairs.ir.contexts import Contexts +from pairs.ir.device import CopyArray from pairs.ir.functions import Call_Void from pairs.ir.loops import For, ParticleFor, While from pairs.ir.utils import Print @@ -54,6 +57,18 @@ class Comm: Assign(self.sim, self.sim.nghost, 0) for step in range(self.dom_part.number_of_steps()): + if self.sim._target.is_gpu(): + CopyArray(self.sim, self.nsend, Contexts.Host, Actions.Ignore) + CopyArray(self.sim, self.nrecv, Contexts.Host, Actions.Ignore) + + for j in self.dom_part.step_indexes(step): + Assign(self.sim, self.nsend[j], 0) + Assign(self.sim, self.nrecv[j], 0) + + if self.sim._target.is_gpu(): + CopyArray(self.sim, self.nsend, Contexts.Device, Actions.Ignore) + CopyArray(self.sim, self.nrecv, Contexts.Device, Actions.Ignore) + DetermineGhostParticles(self, step, self.sim.cell_spacing()) CommunicateSizes(self, step) SetCommunicationOffsets(self, step) @@ -70,13 +85,17 @@ class Comm: Assign(self.sim, self.nsend_all, 0) Assign(self.sim, self.sim.nghost, 0) - for s in range(step): + for s in range(step + 1): for j in self.dom_part.step_indexes(s): Assign(self.sim, self.nsend[j], 0) Assign(self.sim, self.nrecv[j], 0) Assign(self.sim, self.send_offsets[j], 0) Assign(self.sim, self.recv_offsets[j], 0) + if self.sim._target.is_gpu(): + CopyArray(self.sim, self.nsend, Contexts.Device, Actions.Ignore) + CopyArray(self.sim, self.nrecv, Contexts.Device, Actions.Ignore) + DetermineGhostParticles(self, step, 0.0) CommunicateSizes(self, step) SetCommunicationOffsets(self, step) @@ -124,12 +143,10 @@ class DetermineGhostParticles(Lowerable): self.spacing = spacing self.sim.add_statement(self) - #@pairs_device_block - @pairs_host_block + @pairs_device_block def lower(self): nsend_all = self.comm.nsend_all nsend = self.comm.nsend - nrecv = self.comm.nrecv send_map = self.comm.send_map send_mult = self.comm.send_mult exchg_flag = self.comm.exchg_flag @@ -139,10 +156,6 @@ class DetermineGhostParticles(Lowerable): self.sim.check_resize(self.comm.send_capacity, nsend) #self.sim.check_resize(self.comm.send_capacity, nsend_all) - for j in self.comm.dom_part.step_indexes(self.step): - Assign(self.sim, nsend[j], 0) - Assign(self.sim, nrecv[j], 0) - if is_exchange: for i in ParticleFor(self.sim): Assign(self.sim, exchg_flag[i], 0) @@ -157,7 +170,8 @@ class DetermineGhostParticles(Lowerable): for d in range(self.sim.ndims()): Assign(self.sim, send_mult[next_idx][d], pbc[d]) - Assign(self.sim, nsend[j], nsend[j] + 1) + #Assign(self.sim, nsend[j], nsend[j] + 1) + AtomicInc(self.sim, nsend[j], 1) class SetCommunicationOffsets(Lowerable): diff --git a/src/pairs/transformations/modules.py b/src/pairs/transformations/modules.py index 20249056728d5c76acb306ecb96ba03f8574d658..1ee2c9b15f09caa2e51dafc02cdf3405f7722e5c 100644 --- a/src/pairs/transformations/modules.py +++ b/src/pairs/transformations/modules.py @@ -107,6 +107,24 @@ class AddResizeLogic(Mutator): return ast_node + def mutate_AtomicInc(self, ast_node): + dest = ast_node.elem + src = dest + ast_node.value + match_capacity = None + + if isinstance(dest, (ArrayAccess, Var)): + match_capacity = self.lookup_capacity([dest]) + + # Resize var is used in index, this statement should be checked for safety + if match_capacity is not None: + module = self.module_stack[-1] + resizes = list(self.module_resizes[module].keys()) + capacities = list(self.module_resizes[module].values()) + resize_id = resizes[capacities.index(match_capacity)] + ast_node.add_resize_check(ast_node.sim.resizes[resize_id], match_capacity) + + return ast_node + def mutate_Block(self, ast_node): self.block_stack.append(ast_node) ast_node.stmts = [self.mutate(s) for s in ast_node.stmts]