diff --git a/pystencils_benchmark/templates/gpu/main.c b/pystencils_benchmark/templates/gpu/main.c index b2f35718df6cb4b0abcbf827845a858fee68fa42..13730482d3a9880095451f4cd717c5e37be2ba6c 100644 --- a/pystencils_benchmark/templates/gpu/main.c +++ b/pystencils_benchmark/templates/gpu/main.c @@ -32,6 +32,12 @@ int main(int argc, char **argv) cudaMemset({{field_name}}, 0.23, {{elements}}); {% endfor %} + {% for constantName, dataType in kernel.constants %} + // Constant {{constantName}} + {{dataType}} {{constantName}}; + {{constantName}} = 0.23; + {% endfor %} + dim3 blocks({{kernel.blocks[0]}}, {{kernel.blocks[1]}}, {{kernel.blocks[2]}}); dim3 grid({{kernel.grid[0]}}, {{kernel.grid[1]}}, {{kernel.grid[2]}}); diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index 62881eccc8f9a7f7f6232bce9a2f17b939313c86..fe3946be744dab8c3d281bc7cbdb520a3399d05b 100755 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -48,14 +48,20 @@ gpu_kwargs = ({}, {'launch_bounds': (256,)}, {'launch_bounds': (256, 2)}) def test_generate_gpu(kwargs): compiler = Compiler.NVCC a, b, c = ps.fields(a=np.ones(4000000), b=np.ones(4000000), c=np.ones(4000000)) + alpha = sp.symbols('alpha') @ps.kernel_config(ps.CreateKernelConfig(target=ps.Target.GPU)) def vadd(): a[0] @= b[0] + c[0] kernel_vadd = ps.create_kernel(**vadd) + @ps.kernel_config(ps.CreateKernelConfig(target=ps.Target.GPU)) + def daxpy(): + b[0] @= alpha * a[0] + b[0] + kernel_daxpy = ps.create_kernel(**daxpy) + with tempfile.TemporaryDirectory(dir=Path.cwd()) as temp_dir: temp_dir = Path(temp_dir) - pb.gpu.generate_benchmark(kernel_vadd, temp_dir, compiler=compiler, **kwargs) + pb.gpu.generate_benchmark([kernel_vadd, kernel_daxpy], temp_dir, compiler=compiler, **kwargs) subprocess.run(['make', '-C', f'{temp_dir}'], check=True) subprocess.run([f'{temp_dir}/benchmark-{compiler.name}', '10'], check=True)