Skip to content
Snippets Groups Projects
Commit 3d2c1f56 authored by Stephan Seitz's avatar Stephan Seitz
Browse files

Move NativeTextureBinding to framework_integration.texture_astnodes

parent 05e213a1
Branches
Tags
No related merge requests found
Pipeline #19188 failed
...@@ -11,17 +11,17 @@ waLBerla currently uses `pystencils-walberla <https://pypi.org/project/pystencil ...@@ -11,17 +11,17 @@ waLBerla currently uses `pystencils-walberla <https://pypi.org/project/pystencil
""" """
import itertools import itertools
from collections.abc import Iterable from collections.abc import Iterable
from functools import reduce
from typing import Any, List, Set from typing import Any, List, Set
import jinja2 import jinja2
import sympy as sp
import pystencils import pystencils
import sympy as sp
from pystencils.astnodes import KernelFunction, Node, NodeOrExpr, ResolvedFieldAccess from pystencils.astnodes import KernelFunction, Node, NodeOrExpr, ResolvedFieldAccess
from pystencils.data_types import TypedSymbol from pystencils.data_types import TypedSymbol
from pystencils.kernelparameters import FieldPointerSymbol, FieldShapeSymbol, FieldStrideSymbol from pystencils.kernelparameters import FieldPointerSymbol, FieldShapeSymbol, FieldStrideSymbol
from pystencils_autodiff.framework_integration.printer import FrameworkIntegrationPrinter from pystencils_autodiff.framework_integration.printer import FrameworkIntegrationPrinter
from pystencils_autodiff.framework_integration.texture_astnodes import NativeTextureBinding
class DestructuringBindingsForFieldClass(Node): class DestructuringBindingsForFieldClass(Node):
...@@ -88,201 +88,6 @@ class DestructuringBindingsForFieldClass(Node): ...@@ -88,201 +88,6 @@ class DestructuringBindingsForFieldClass(Node):
return self.body.atoms(arg_type) | {s for s in self.symbols_defined if isinstance(s, arg_type)} return self.body.atoms(arg_type) | {s for s in self.symbols_defined if isinstance(s, arg_type)}
class NativeTextureBinding(pystencils.backends.cbackend.CustomCodeNode):
"""
Bind texture to CUDA device pointer
Recommended read: https://devblogs.nvidia.com/cuda-pro-tip-kepler-texture-objects-improve-performance-and-flexibility/
The definition from cudaResourceDesc and cudaTextureDesc
.. code:: c
/**
* CUDA resource descriptor
*/
struct __device_builtin__ cudaResourceDesc {
enum cudaResourceType resType; /**< Resource type */
union {
struct {
cudaArray_t array; /**< CUDA array */
} array;
struct {
cudaMipmappedArray_t mipmap; /**< CUDA mipmapped array */
} mipmap;
struct {
void *devPtr; /**< Device pointer */
struct cudaChannelFormatDesc desc; /**< Channel descriptor */
size_t sizeInBytes; /**< Size in bytes */
} linear;
struct {
void *devPtr; /**< Device pointer */
struct cudaChannelFormatDesc desc; /**< Channel descriptor */
size_t width; /**< Width of the array in elements */
size_t height; /**< Height of the array in elements */
size_t pitchInBytes; /**< Pitch between two rows in bytes */
} pitch2D;
} res;
};
.. code:: c
/**
* CUDA texture descriptor
*/
struct __device_builtin__ cudaTextureDesc
{
/**
* Texture address mode for up to 3 dimensions
*/
enum cudaTextureAddressMode addressMode[3];
/**
* Texture filter mode
*/
enum cudaTextureFilterMode filterMode;
/**
* Texture read mode
*/
enum cudaTextureReadMode readMode;
/**
* Perform sRGB->linear conversion during texture read
*/
int sRGB;
/**
* Texture Border Color
*/
float borderColor[4];
/**
* Indicates whether texture reads are normalized or not
*/
int normalizedCoords;
/**
* Limit to the anisotropy ratio
*/
unsigned int maxAnisotropy;
/**
* Mipmap filter mode
*/
enum cudaTextureFilterMode mipmapFilterMode;
/**
* Offset applied to the supplied mipmap level
*/
float mipmapLevelBias;
/**
* Lower end of the mipmap level range to clamp access to
*/
float minMipmapLevelClamp;
/**
* Upper end of the mipmap level range to clamp access to
*/
float maxMipmapLevelClamp;
};
""" # noqa
CODE_TEMPLATE_LINEAR = jinja2.Template("""cudaResourceDesc {{resource_desc}}{};
{{resource_desc}}.resType = cudaResourceTypeLinear;
{{resource_desc}}.res.linear.devPtr = {{device_ptr}};
{{resource_desc}}.res.linear.desc.f = {{cuda_channel_format}};
{{resource_desc}}.res.linear.desc.x = {{bits_per_channel}}; // bits per channel
{{resource_desc}}.res.linear.sizeInBytes = {{total_size}};
cudaTextureDesc {{texture_desc}}{};
cudaTextureObject_t {{texture_object}}=0;
cudaCreateTextureObject(&{{texture_object}}, &{{resource_desc}}, &{{texture_desc}}, nullptr);
{{texture_desc}}.readMode = cudaReadModeElementType;
auto {{texture_object}}Destroyer = std::unique_ptr(nullptr, [&](){
cudaDestroyTextureObject({{texture_object}});
});
""")
CODE_TEMPLATE_PITCHED2D = jinja2.Template(""" !!! TODO!!! """)
CODE_TEMPLATE_CUDA_ARRAY = jinja2.Template("""
auto channel_desc_{{texture_name}} = {{channel_desc}};
{{ create_array }}
{{ copy_array }}
{{ texture_name }}.addressMode[0] = cudaAddressModeBorder;
{{ texture_name }}.addressMode[1] = cudaAddressModeBorder;
{{ texture_name }}.addressMode[2] = cudaAddressModeBorder;
{{ texture_name }}.filterMode = cudaFilterModeLinear;
{{ texture_name }}.normalized = false;
cudaBindTextureToArray(&{{texture_name}}, {{array}}, &channel_desc_{{texture_name}});
std::shared_ptr<void> {{array}}Destroyer(nullptr, [&](...){
cudaFreeArray({{array}});
cudaUnbindTexture({{texture_name}});
});
""")
def __init__(self, texture, device_data_ptr, use_texture_objects=True):
self._texture = texture
self._device_ptr = device_data_ptr
self._dtype = self._device_ptr.dtype.base_type
self._shape = tuple(sp.S(s) for s in self._texture.field.shape)
self._ndim = texture.field.ndim
assert use_texture_objects, "without texture objects is not implemented"
super().__init__(self.get_code(dialect='c', vector_instruction_set=None),
symbols_read={device_data_ptr,
*[s for s in self._shape if isinstance(s, sp.Symbol)]},
symbols_defined={})
self.headers = ['<memory>', '<cuda.h>', '<cuda_runtime_api.h>']
def get_code(self, dialect, vector_instruction_set):
texture_name = self._texture.symbol.name
code = self.CODE_TEMPLATE_CUDA_ARRAY.render(
resource_desc='resDesc_' + texture_name,
texture_desc='texDesc_' + texture_name,
channel_desc=f'cudaCreateChannelDesc<{self._dtype}>()', # noqa
texture_object='tex_' + texture_name,
array='array_' + texture_name,
texture_name=texture_name,
ndim=self._ndim,
device_ptr=self._device_ptr,
create_array=self._get_create_array_call(),
copy_array=self._get_copy_array_call(),
dtype=self._dtype,
bits_per_channel=self._dtype.numpy_dtype.itemsize * 8,
total_size=self._dtype.numpy_dtype.itemsize * reduce(lambda x, y: x * y, self._shape, 1))
return code
def _get_create_array_call(self):
texture_name = self._texture.symbol.name
ndim = '' if self._ndim <= 2 else f'{self._ndim}D'
array = 'array_' + texture_name
return f"""cudaArray * {array};
cudaMalloc{ndim}Array(&{array}, &channel_desc_{texture_name}, """ + (
(f'{{{", ".join(str(s) for s in reversed(self._shape))}}});'
if self._ndim == 3
else f'{", ".join(str(s) for s in reversed(self._shape))});'))
def _get_copy_array_call(self):
texture_name = self._texture.symbol.name
array = 'array_' + texture_name
if self._texture.field.ndim == 3:
copy_params = f'cpy_{texture_name}_params'
return f"""cudaMemcpy3DParms {copy_params}{{}};
{copy_params}.srcPtr = {{{self._device_ptr},
{self._texture.field.strides[-1] * self._texture.field.shape[-1] * self._dtype.numpy_dtype.itemsize},
{self._texture.field.shape[-1]},
{self._texture.field.shape[-2]}}};
{copy_params}.dstArray = {array};
{copy_params}.extent = {{{", ".join(str(s) for s in reversed(self._shape))}}};
{copy_params}.kind = cudaMemcpyDeviceToDevice;
cudaMemcpy3D(&{copy_params});""" # noqa
elif self._texture.field.ndim == 2:
# noqa: cudaMemcpy2DToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind);
return f"""cudaMemcpy2DToArray({array},
0u,
0u,
{self._device_ptr},
{self._texture.field.strides[-1] * self._texture.field.shape[-1] * self._dtype.numpy_dtype.itemsize},
{self._texture.field.shape[-1]},
{self._texture.field.shape[-2]},
cudaMemcpyDeviceToDevice);
""" # noqa
else:
raise NotImplementedError()
class KernelFunctionCall(Node): class KernelFunctionCall(Node):
""" """
AST nodes representing a call of a :class:`pystencils.astnodes.KernelFunction` AST nodes representing a call of a :class:`pystencils.astnodes.KernelFunction`
......
# -*- coding: utf-8 -*-
#
# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
#
# Distributed under terms of the GPLv3 license.
"""
"""
from functools import reduce
import jinja2
import sympy as sp
import pystencils.backends
class NativeTextureBinding(pystencils.backends.cbackend.CustomCodeNode):
"""
Bind texture to CUDA device pointer
Recommended read: https://devblogs.nvidia.com/cuda-pro-tip-kepler-texture-objects-improve-performance-and-flexibility/
The definition from cudaResourceDesc and cudaTextureDesc
.. code:: c
/**
* CUDA resource descriptor
*/
struct __device_builtin__ cudaResourceDesc {
enum cudaResourceType resType; /**< Resource type */
union {
struct {
cudaArray_t array; /**< CUDA array */
} array;
struct {
cudaMipmappedArray_t mipmap; /**< CUDA mipmapped array */
} mipmap;
struct {
void *devPtr; /**< Device pointer */
struct cudaChannelFormatDesc desc; /**< Channel descriptor */
size_t sizeInBytes; /**< Size in bytes */
} linear;
struct {
void *devPtr; /**< Device pointer */
struct cudaChannelFormatDesc desc; /**< Channel descriptor */
size_t width; /**< Width of the array in elements */
size_t height; /**< Height of the array in elements */
size_t pitchInBytes; /**< Pitch between two rows in bytes */
} pitch2D;
} res;
};
.. code:: c
/**
* CUDA texture descriptor
*/
struct __device_builtin__ cudaTextureDesc
{
/**
* Texture address mode for up to 3 dimensions
*/
enum cudaTextureAddressMode addressMode[3];
/**
* Texture filter mode
*/
enum cudaTextureFilterMode filterMode;
/**
* Texture read mode
*/
enum cudaTextureReadMode readMode;
/**
* Perform sRGB->linear conversion during texture read
*/
int sRGB;
/**
* Texture Border Color
*/
float borderColor[4];
/**
* Indicates whether texture reads are normalized or not
*/
int normalizedCoords;
/**
* Limit to the anisotropy ratio
*/
unsigned int maxAnisotropy;
/**
* Mipmap filter mode
*/
enum cudaTextureFilterMode mipmapFilterMode;
/**
* Offset applied to the supplied mipmap level
*/
float mipmapLevelBias;
/**
* Lower end of the mipmap level range to clamp access to
*/
float minMipmapLevelClamp;
/**
* Upper end of the mipmap level range to clamp access to
*/
float maxMipmapLevelClamp;
};
""" # noqa
CODE_TEMPLATE_LINEAR = jinja2.Template("""cudaResourceDesc {{resource_desc}}{};
{{resource_desc}}.resType = cudaResourceTypeLinear;
{{resource_desc}}.res.linear.devPtr = {{device_ptr}};
{{resource_desc}}.res.linear.desc.f = {{cuda_channel_format}};
{{resource_desc}}.res.linear.desc.x = {{bits_per_channel}}; // bits per channel
{{resource_desc}}.res.linear.sizeInBytes = {{total_size}};
cudaTextureDesc {{texture_desc}}{};
cudaTextureObject_t {{texture_object}}=0;
cudaCreateTextureObject(&{{texture_object}}, &{{resource_desc}}, &{{texture_desc}}, nullptr);
{{texture_desc}}.readMode = cudaReadModeElementType;
auto {{texture_object}}Destroyer = std::unique_ptr(nullptr, [&](){
cudaDestroyTextureObject({{texture_object}});
});
""")
CODE_TEMPLATE_PITCHED2D = jinja2.Template(""" !!! TODO!!! """)
CODE_TEMPLATE_CUDA_ARRAY = jinja2.Template("""
auto channel_desc_{{texture_name}} = {{channel_desc}};
{{ create_array }}
{{ copy_array }}
{{ texture_name }}.addressMode[0] = cudaAddressModeBorder;
{{ texture_name }}.addressMode[1] = cudaAddressModeBorder;
{{ texture_name }}.addressMode[2] = cudaAddressModeBorder;
{{ texture_name }}.filterMode = cudaFilterModeLinear;
{{ texture_name }}.normalized = false;
cudaBindTextureToArray(&{{texture_name}}, {{array}}, &channel_desc_{{texture_name}});
std::shared_ptr<void> {{array}}Destroyer(nullptr, [&](...){
cudaFreeArray({{array}});
cudaUnbindTexture({{texture_name}});
});
""")
def __init__(self, texture, device_data_ptr, use_texture_objects=True):
self._texture = texture
self._device_ptr = device_data_ptr
self._dtype = self._device_ptr.dtype.base_type
self._shape = tuple(sp.S(s) for s in self._texture.field.shape)
self._ndim = texture.field.ndim
assert use_texture_objects, "without texture objects is not implemented"
super().__init__(self.get_code(dialect='c', vector_instruction_set=None),
symbols_read={device_data_ptr,
*[s for s in self._shape if isinstance(s, sp.Symbol)]},
symbols_defined={})
self.headers = ['<memory>', '<cuda.h>', '<cuda_runtime_api.h>']
def get_code(self, dialect, vector_instruction_set):
texture_name = self._texture.symbol.name
code = self.CODE_TEMPLATE_CUDA_ARRAY.render(
resource_desc='resDesc_' + texture_name,
texture_desc='texDesc_' + texture_name,
channel_desc=f'cudaCreateChannelDesc<{self._dtype}>()', # noqa
texture_object='tex_' + texture_name,
array='array_' + texture_name,
texture_name=texture_name,
ndim=self._ndim,
device_ptr=self._device_ptr,
create_array=self._get_create_array_call(),
copy_array=self._get_copy_array_call(),
dtype=self._dtype,
bits_per_channel=self._dtype.numpy_dtype.itemsize * 8,
total_size=self._dtype.numpy_dtype.itemsize * reduce(lambda x, y: x * y, self._shape, 1))
return code
def _get_create_array_call(self):
texture_name = self._texture.symbol.name
ndim = '' if self._ndim <= 2 else f'{self._ndim}D'
array = 'array_' + texture_name
return f"""cudaArray * {array};
cudaMalloc{ndim}Array(&{array}, &channel_desc_{texture_name}, """ + (
(f'{{{", ".join(str(s) for s in reversed(self._shape))}}});'
if self._ndim == 3
else f'{", ".join(str(s) for s in reversed(self._shape))});'))
def _get_copy_array_call(self):
texture_name = self._texture.symbol.name
array = 'array_' + texture_name
if self._texture.field.ndim == 3:
copy_params = f'cpy_{texture_name}_params'
return f"""cudaMemcpy3DParms {copy_params}{{}};
{copy_params}.srcPtr = {{{self._device_ptr},
{self._texture.field.strides[-1] * self._texture.field.shape[-1] * self._dtype.numpy_dtype.itemsize},
{self._texture.field.shape[-1]},
{self._texture.field.shape[-2]}}};
{copy_params}.dstArray = {array};
{copy_params}.extent = {{{", ".join(str(s) for s in reversed(self._shape))}}};
{copy_params}.kind = cudaMemcpyDeviceToDevice;
cudaMemcpy3D(&{copy_params});""" # noqa
elif self._texture.field.ndim == 2:
# noqa: cudaMemcpy2DToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind);
return f"""cudaMemcpy2DToArray({array},
0u,
0u,
{self._device_ptr},
{self._texture.field.strides[-1] * self._texture.field.shape[-1] * self._dtype.numpy_dtype.itemsize},
{self._texture.field.shape[-1]},
{self._texture.field.shape[-2]},
cudaMemcpyDeviceToDevice);
""" # noqa
else:
raise NotImplementedError()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment