Skip to content
Snippets Groups Projects
Commit 1a991fff authored by Markus Holzer's avatar Markus Holzer
Browse files

Blocking for partial directions

parent 74bb2c23
Branches
Tags
No related merge requests found
......@@ -404,7 +404,7 @@ class PragmaBlock(Block):
class LoopOverCoordinate(Node):
LOOP_COUNTER_NAME_PREFIX = "ctr"
BlOCK_LOOP_COUNTER_NAME_PREFIX = "_blockctr"
BLOCK_LOOP_COUNTER_NAME_PREFIX = "_blockctr"
def __init__(self, body, coordinate_to_loop_over, start, stop, step=1, is_block_loop=False):
super(LoopOverCoordinate, self).__init__(parent=None)
......@@ -479,7 +479,7 @@ class LoopOverCoordinate(Node):
@staticmethod
def get_block_loop_counter_name(coordinate_to_loop_over):
return f"{LoopOverCoordinate.BlOCK_LOOP_COUNTER_NAME_PREFIX}_{coordinate_to_loop_over}"
return f"{LoopOverCoordinate.BLOCK_LOOP_COUNTER_NAME_PREFIX}_{coordinate_to_loop_over}"
@property
def loop_counter_name(self):
......
......@@ -1258,7 +1258,8 @@ def loop_blocking(ast_node: ast.KernelFunction, block_size) -> int:
Args:
ast_node: kernel function node before vectorization transformation has been applied
block_size: sequence defining block size in x, y, (z) direction
block_size: sequence defining block size in x, y, (z) direction.
If chosen as zero the direction will not be used for blocking.
Returns:
number of dimensions blocked
......@@ -1270,8 +1271,10 @@ def loop_blocking(ast_node: ast.KernelFunction, block_size) -> int:
body = ast_node.body
coordinates = []
coordinates_taken_into_account = 0
loop_starts = {}
loop_stops = {}
for loop in loops:
coord = loop.coordinate_to_loop_over
if coord not in coordinates:
......@@ -1285,6 +1288,9 @@ def loop_blocking(ast_node: ast.KernelFunction, block_size) -> int:
# Create the outer loops that iterate over the blocks
outer_loop = None
for coord in reversed(coordinates):
if block_size[coord] == 0:
continue
coordinates_taken_into_account += 1
body = ast.Block([outer_loop]) if outer_loop else body
outer_loop = ast.LoopOverCoordinate(body,
coord,
......@@ -1298,6 +1304,8 @@ def loop_blocking(ast_node: ast.KernelFunction, block_size) -> int:
# modify the existing loops to only iterate within one block
for inner_loop in loops:
coord = inner_loop.coordinate_to_loop_over
if block_size[coord] == 0:
continue
block_ctr = ast.LoopOverCoordinate.get_block_loop_counter_symbol(coord)
loop_range = inner_loop.stop - inner_loop.start
if sp.sympify(
......@@ -1307,7 +1315,7 @@ def loop_blocking(ast_node: ast.KernelFunction, block_size) -> int:
stop = sp.Min(inner_loop.stop, block_ctr + block_size[coord])
inner_loop.start = block_ctr
inner_loop.stop = stop
return len(coordinates)
return coordinates_taken_into_account
def implement_interpolations(ast_node: ast.Node,
......
......@@ -18,14 +18,20 @@ def check_equivalence(assignments, src_arr):
for vectorization in [False, {'assume_inner_stride_one': True}]:
with_blocking = ps.create_kernel(assignments, cpu_blocking=(8, 16, 4), cpu_openmp=openmp,
cpu_vectorize_info=vectorization).compile()
with_blocking_only_over_y = ps.create_kernel(assignments, cpu_blocking=(0, 16, 0), cpu_openmp=openmp,
cpu_vectorize_info=vectorization).compile()
without_blocking = ps.create_kernel(assignments).compile()
print(f" openmp {openmp}, vectorization {vectorization}")
dst_arr = np.zeros_like(src_arr)
dst2_arr = np.zeros_like(src_arr)
ref_arr = np.zeros_like(src_arr)
np.copyto(src_arr, np.random.rand(*src_arr.shape))
with_blocking(src=src_arr, dst=dst_arr)
with_blocking_only_over_y(src=src_arr, dst=dst2_arr)
without_blocking(src=src_arr, dst=ref_arr)
np.testing.assert_almost_equal(ref_arr, dst_arr)
np.testing.assert_almost_equal(ref_arr, dst2_arr)
def test_jacobi3d_var_size():
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment