From 0cdd23d8123ba12917ce5a6c63e4a81ffc866b69 Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Wed, 24 Apr 2019 13:03:26 +0200 Subject: [PATCH] Improvements for GPU code generation - turned on restrict keyword by default (makes large difference on GPUs) - smarter block indexing: changing block size depending on domain size Example: previously there where (1,1,1) blocks when requested block size was (64, 1, 1) and domain size (1, 512, 512), now the block size is changed automatically to (1, 64, 1) in this case - added __lauch_bounds__ to kernels to allow better optimizations from the CUDA compiler --- pystencils_walberla/templates/GpuPackInfo.tmpl.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pystencils_walberla/templates/GpuPackInfo.tmpl.h b/pystencils_walberla/templates/GpuPackInfo.tmpl.h index 19d68f5..8b70e1c 100644 --- a/pystencils_walberla/templates/GpuPackInfo.tmpl.h +++ b/pystencils_walberla/templates/GpuPackInfo.tmpl.h @@ -13,6 +13,13 @@ #define FUNC_PREFIX __global__ {%- endif %} +#ifdef __GNUC__ +#define RESTRICT __restrict__ +#elif _MSC_VER +#define RESTRICT __restrict +#else +#define RESTRICT +#endif namespace walberla { namespace {{namespace}} { -- GitLab