diff --git a/generated_files/cone_backprojector_3D_CudaKernel.cu b/generated_files/cone_backprojector_3D_CudaKernel.cu
index 4b899de4c1b8d8b0feb284dd6feea16612f4ebd1..dc92f4b0370e68544299b33ba486b7068c87667f 100644
--- a/generated_files/cone_backprojector_3D_CudaKernel.cu
+++ b/generated_files/cone_backprojector_3D_CudaKernel.cu
@@ -43,7 +43,7 @@ inline void gpuAssert(cudaError_t code, const char *file, int line,
   }
 }
 
-inline __device__ float3 map(float3 coordinates, float *d_projection_matrices,
+inline __device__ float3 map(float3 coordinates, const float *d_projection_matrices,
                              int n) {
   const float *matrix = &(d_projection_matrices[n * 12]);
 
@@ -99,7 +99,7 @@ inline __device__ float interp2D(const float *const_volume_ptr,
 }
 
 __global__ void backproject_3Dcone_beam_kernel(
-    const float *sinogram_ptr, float *vol, float *d_projection_matrices,
+    const float *sinogram_ptr, float *vol, const float *d_projection_matrices,
     const int number_of_projections, const uint3 volume_size,
     const float3 volume_spacing, const float3 volume_origin,
     const uint2 detector_size, const uint3 pointer_offsets,
@@ -144,10 +144,6 @@ void Cone_Backprojection3D_Kernel_Launcher(
     const float projection_multiplier) {
   // COPY matrix to graphics card as float array
   auto matrices_size_b = number_of_projections * 12 * sizeof(float);
-  float *d_projection_matrices;
-  gpuErrchk(cudaMalloc(&d_projection_matrices, matrices_size_b));
-  gpuErrchk(cudaMemcpy(d_projection_matrices, projection_matrices,
-                       matrices_size_b, cudaMemcpyHostToDevice));
 
   uint3 volume_size = make_uint3(volume_width, volume_height, volume_depth);
   float3 volume_spacing =
@@ -168,10 +164,9 @@ void Cone_Backprojection3D_Kernel_Launcher(
   const dim3 block = dim3(BLOCKSIZE_X, BLOCKSIZE_Y, BLOCKSIZE_Z);
 
   backproject_3Dcone_beam_kernel<<<grid, block>>>(
-      sinogram_ptr, out, d_projection_matrices, number_of_projections,
+      sinogram_ptr, out, projection_matrices, number_of_projections,
       volume_size, volume_spacing, volume_origin, detector_size,
       pointer_offsets, projection_multiplier);
 
   gpuErrchk(cudaUnbindTexture(sinogram_as_texture));
-  gpuErrchk(cudaFree(d_projection_matrices));
 }
diff --git a/generated_files/cone_backprojector_3D_CudaKernel_hardware_interp.cu b/generated_files/cone_backprojector_3D_CudaKernel_hardware_interp.cu
index 0e528b1a956abebf1e43a9f18a9abba06e60e72a..34682bbec5024c574de6bf5028bb5557155023ad 100644
--- a/generated_files/cone_backprojector_3D_CudaKernel_hardware_interp.cu
+++ b/generated_files/cone_backprojector_3D_CudaKernel_hardware_interp.cu
@@ -43,7 +43,7 @@ inline void gpuAssert(cudaError_t code, const char *file, int line,
   }
 }
 
-inline __device__ float3 map(float3 coordinates, float *d_projection_matrices,
+inline __device__ float3 map(float3 coordinates, const float *d_projection_matrices,
                              int n) {
   const float *matrix = &(d_projection_matrices[n * 12]);
 
@@ -56,7 +56,7 @@ inline __device__ float3 map(float3 coordinates, float *d_projection_matrices,
 }
 
 __global__ void backproject_3Dcone_beam_kernel_tex_interp(
-    float *vol, float *d_projection_matrices, const int number_of_projections,
+    float *vol, const float *d_projection_matrices, const int number_of_projections,
     const uint3 volume_size, const float3 volume_spacing,
     const float3 volume_origin, const float projection_multiplier) {
   const int i = blockIdx.x * blockDim.x + threadIdx.x;
@@ -114,10 +114,6 @@ void Cone_Backprojection3D_Kernel_Tex_Interp_Launcher(
     const float projection_multiplier) {
   // COPY matrix to graphics card as float array
   auto matrices_size_b = number_of_projections * 12 * sizeof(float);
-  float *d_projection_matrices;
-  gpuErrchk(cudaMalloc(&d_projection_matrices, matrices_size_b));
-  gpuErrchk(cudaMemcpy(d_projection_matrices, projection_matrices,
-                       matrices_size_b, cudaMemcpyHostToDevice));
 
   uint3 volume_size = make_uint3(volume_width, volume_height, volume_depth);
   float3 volume_spacing =
@@ -169,10 +165,9 @@ void Cone_Backprojection3D_Kernel_Tex_Interp_Launcher(
   const dim3 block = dim3(BLOCKSIZE_X, BLOCKSIZE_Y, BLOCKSIZE_Z);
 
   backproject_3Dcone_beam_kernel_tex_interp<<<grid, block>>>(
-      out, d_projection_matrices, number_of_projections, volume_size,
+      out, projection_matrices, number_of_projections, volume_size,
       volume_spacing, volume_origin, projection_multiplier);
 
   gpuErrchk(cudaUnbindTexture(sinogram_as_texture));
   gpuErrchk(cudaFreeArray(projArray));
-  gpuErrchk(cudaFree(d_projection_matrices));
 }
diff --git a/generated_files/cone_projector_3D_CudaKernel.cu b/generated_files/cone_projector_3D_CudaKernel.cu
index 70813e9cbc26d360d8c10c8c6c20b972e9cd7b4d..f5608f6c400d9db730260029b7a1113a597a9fb8 100644
--- a/generated_files/cone_projector_3D_CudaKernel.cu
+++ b/generated_files/cone_projector_3D_CudaKernel.cu
@@ -225,16 +225,9 @@ void Cone_Projection_Kernel_Launcher(const float* volume_ptr, float *out, const
 {
     //COPY inv AR matrix to graphics card as float array
     auto matrices_size_b = number_of_projections * 9 * sizeof(float);
-    float *d_inv_AR_matrices;
-    gpuErrchk(cudaMalloc(&d_inv_AR_matrices, matrices_size_b));
-    gpuErrchk(cudaMemcpy(d_inv_AR_matrices, inv_AR_matrix, matrices_size_b, cudaMemcpyHostToDevice));
     //COPY source points to graphics card as float3
     auto src_points_size_b = number_of_projections * sizeof(float3);
 
-    float3 *d_src_points;
-    gpuErrchk(cudaMalloc(&d_src_points, src_points_size_b));
-    gpuErrchk(cudaMemcpy(d_src_points, src_points, src_points_size_b, cudaMemcpyHostToDevice));
-
     uint3 volume_size = make_uint3(volume_width, volume_height, volume_depth);
     float3 volume_spacing = make_float3(volume_spacing_x, volume_spacing_y, volume_spacing_z);
 
@@ -244,14 +237,12 @@ void Cone_Projection_Kernel_Launcher(const float* volume_ptr, float *out, const
     const dim3 blocksize = dim3( BLOCKSIZE_X, BLOCKSIZE_Y, 1 );
     const dim3 gridsize = dim3( detector_size.x / blocksize.x + 1, detector_size.y / blocksize.y + 1 , number_of_projections+1);
 
-    project_3Dcone_beam_kernel<<<gridsize, blocksize>>>(volume_ptr, out, d_inv_AR_matrices, d_src_points, step_size,
+    project_3Dcone_beam_kernel<<<gridsize, blocksize>>>(volume_ptr, out, inv_AR_matrix, reinterpret_cast<const float3*>(src_points), step_size,
                                         volume_size,volume_spacing, detector_size,number_of_projections,pointer_offsets);
 
     cudaDeviceSynchronize();
 
     // check for errors
     gpuErrchk( cudaPeekAtLastError() );
-    gpuErrchk(cudaFree(d_inv_AR_matrices));
-    gpuErrchk(cudaFree(d_src_points));
 }
 
diff --git a/src/pyronn_torch/PYRO-NN-Layers b/src/pyronn_torch/PYRO-NN-Layers
index 1d10af309ff19d637bf4e991f4cb0aa58b561840..2cf5bdbf36e606eedf51ffb2ae84a7e3a4bb4492 160000
--- a/src/pyronn_torch/PYRO-NN-Layers
+++ b/src/pyronn_torch/PYRO-NN-Layers
@@ -1 +1 @@
-Subproject commit 1d10af309ff19d637bf4e991f4cb0aa58b561840
+Subproject commit 2cf5bdbf36e606eedf51ffb2ae84a7e3a4bb4492
diff --git a/src/pyronn_torch/codegen.py b/src/pyronn_torch/codegen.py
index 97b37632e755efd5ba3ce0ae6b9db16bfb97449b..602937ae456c6f23e16c6c0ef00ed63d0cc1920d 100644
--- a/src/pyronn_torch/codegen.py
+++ b/src/pyronn_torch/codegen.py
@@ -192,6 +192,7 @@ def generate_shared_object(output_folder=None,
     copytree(join(dirname(__file__), 'PYRO-NN-Layers', 'helper_headers'),
              join(object_cache, module_name, 'helper_headers'))
     if update_repo_files:
+        rmtree(join(output_folder, 'helper_headers'), ignore_errors=True)
         copytree(join(dirname(__file__), 'PYRO-NN-Layers', 'helper_headers'),
                  join(output_folder, 'helper_headers'))