Skip to content
Snippets Groups Projects

Add experimental half precison support

Merged Markus Holzer requested to merge holzer/pystencils:halfP into master
3 files
+ 44
2
Compare changes
  • Side-by-side
  • Inline
Files
3
+ 7
2
@@ -39,17 +39,22 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen
@@ -39,17 +39,22 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen
if argument_dict is None:
if argument_dict is None:
argument_dict = {}
argument_dict = {}
 
half = True in (field.dtype.is_half() for field in kernel_function_node.fields_accessed)
 
if cp.cuda.runtime.is_hip:
if cp.cuda.runtime.is_hip:
header_list = ['"gpu_defines.h"'] + list(get_headers(kernel_function_node))
header_list = ['"gpu_defines.h"'] + list(get_headers(kernel_function_node))
 
if half:
 
header_list += ['<hip_fp16.h>', ]
else:
else:
header_list = ['"gpu_defines.h"', '<cstdint>'] + list(get_headers(kernel_function_node))
header_list = ['"gpu_defines.h"', '<cstdint>'] + list(get_headers(kernel_function_node))
 
if half:
 
header_list += ['<cuda_fp16.h>', ]
includes = "\n".join([f"#include {include_file}" for include_file in header_list])
includes = "\n".join([f"#include {include_file}" for include_file in header_list])
code = includes + "\n"
code = includes + "\n"
code += "#define FUNC_PREFIX __global__\n"
code += "#define FUNC_PREFIX __global__\n"
code += "#define RESTRICT __restrict__\n\n"
code += "#define RESTRICT __restrict__\n\n"
code += str(generate_cuda(kernel_function_node, custom_backend=custom_backend))
code += 'extern "C" {\n%s\n}\n' % str(generate_cuda(kernel_function_node, custom_backend=custom_backend))
code = 'extern "C" {\n%s\n}\n' % code
options = ["-w", "-std=c++11"]
options = ["-w", "-std=c++11"]
if USE_FAST_MATH:
if USE_FAST_MATH:
Loading