From 9748ab4346095df5c4611391308b9e48b0aace60 Mon Sep 17 00:00:00 2001
From: Martin Bauer <martin.bauer@fau.de>
Date: Wed, 24 Apr 2019 13:03:26 +0200
Subject: [PATCH] Improvements for GPU code generation

- turned on restrict keyword by default (makes large difference on GPUs)
- smarter block indexing: changing block size depending on domain size
  Example: previously there where (1,1,1) blocks when requested
  block size was (64, 1, 1) and domain size (1, 512, 512), now the
  block size is changed automatically to (1, 64, 1) in this case
- added __lauch_bounds__ to kernels to allow better optimizations from
  the CUDA compiler
---
 lbmpy_tests/test_code_hashequivalence.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lbmpy_tests/test_code_hashequivalence.py b/lbmpy_tests/test_code_hashequivalence.py
index 39ba4d78..8dca527c 100644
--- a/lbmpy_tests/test_code_hashequivalence.py
+++ b/lbmpy_tests/test_code_hashequivalence.py
@@ -10,7 +10,7 @@ def test_hash_equivalence():
     exactly the same code (not only functionally equivalent code) should be produced.
     Due to undefined order in sets and dicts this may no be the case.
     """
-    ref_value = "461f0ced7afa3d0499d5bd90d87fcdb0cfc6a5f56ee9fa4f13386c15b8484ca2"
+    ref_value = "5dfbb90b02e4940f05dcca11b43e1bb885d5655566735b52ad8c64f511848420"
     ast = create_lb_ast(stencil='D3Q19', method='srt', optimization={'openmp': False})
     code = generate_c(ast)
     hash_value = sha256(code.encode()).hexdigest()
-- 
GitLab