diff --git a/src/pystencils_autodiff/backends/torch_native_cpu.tmpl.cpp b/src/pystencils_autodiff/backends/torch_native_cpu.tmpl.cpp index 0f02323e317bdd5f69e404237123896848371bc7..d7ee774e03061e4d878d93cc1dfca8f7306beb4b 100644 --- a/src/pystencils_autodiff/backends/torch_native_cpu.tmpl.cpp +++ b/src/pystencils_autodiff/backends/torch_native_cpu.tmpl.cpp @@ -7,6 +7,7 @@ using namespace pybind11::literals; using scalar_t = {{ dtype }}; +#define RESTRICT __restrict std::vector<at::Tensor> {{ kernel_name }}_forward( {%- for tensor in forward_tensors -%} diff --git a/tests/backends/test_torch_native_compilation.py b/tests/backends/test_torch_native_compilation.py index bf72109a294879c22bd83caab3c22a0dc5233157..f23fd21db6c9b61b2f52cdb16f197dd1769fbbc9 100644 --- a/tests/backends/test_torch_native_compilation.py +++ b/tests/backends/test_torch_native_compilation.py @@ -132,10 +132,9 @@ def test_torch_native_compilation(): print(output) -def test_generate_torch(): +def test_generate_torch_gpu(): x, y = pystencils.fields('x, y: float32[2d]') - os.environ['CUDA_HOME'] = "/usr/local/cuda-10.0" assignments = pystencils.AssignmentCollection({ y.center(): x.center()**2 }, {}) @@ -143,6 +142,16 @@ def test_generate_torch(): op_cuda = generate_torch(appdirs.user_cache_dir('pystencils'), autodiff, is_cuda=True, dtype=np.float32) assert op_cuda is not None + + +def test_generate_torch_cpu(): + x, y = pystencils.fields('x, y: float32[2d]') + + assignments = pystencils.AssignmentCollection({ + y.center(): x.center()**2 + }, {}) + autodiff = pystencils_autodiff.AutoDiffOp(assignments) + op_cpp = generate_torch(appdirs.user_cache_dir('pystencils'), autodiff, is_cuda=False, dtype=np.float32) assert op_cpp is not None @@ -165,7 +174,6 @@ def test_execute_torch(): @pytest.mark.skipif('NO_GPU_EXECUTION' in os.environ, reason='Skip GPU execution tests') - def test_execute_torch_gpu(): x, y = pystencils.fields('x, y: float64[32,32]')