Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • fhennig/devel
  • master
  • rangersbach/c-interfacing
  • v0.1a1
  • v0.1a2
  • v0.1a3
  • v0.1a4
7 results

Target

Select target project
  • ob28imeq/pystencils-sfg
  • brendan-waters/pystencils-sfg
  • pycodegen/pystencils-sfg
3 results
Select Git revision
  • frontend-cleanup
  • lbwelding-features
  • master
  • refactor-indexing-params
  • unit_tests
  • v0.1a1
  • v0.1a2
  • v0.1a3
  • v0.1a4
9 results
Show changes
Showing
with 1053 additions and 0 deletions
from pystencilssfg import SourceFileGenerator
with SourceFileGenerator() as sfg:
sfg.function("factorial").params(sfg.var("n", "uint64")).returns(
"uint64"
).inline().constexpr()(
sfg.branch("n == 0")("return 1;")("return n * factorial(n - 1);")
)
q = sfg.var("q", "double")
k = sfg.var("k", "uint64_t")
x = sfg.var("x", "double")
sfg.include("<cmath>")
sfg.struct("Series")(
sfg.method("geometric")
.static()
.attr("nodiscard")
.params(q, k)
.returns("double")(
sfg.branch("k == 0")(
"return 1.0;"
)(
"return Series::geometric(q, k - 1) + std::pow(q, k);"
)
)
)
sfg.struct("ConstexprMath")(
sfg.method("abs").static().constexpr().inline()
.params(x)
.returns("double")
(
"if (x >= 0.0) return x; else return -x;"
),
sfg.method("geometric")
.static()
.constexpr()
.inline()
.params(q, k)
.returns("double")(
sfg.branch("k == 0")(
"return 1.0;"
)(
"return 1 + q * ConstexprMath::geometric(q, k - 1);"
)
)
)
with sfg.namespace("inheritance_test"):
sfg.klass("Parent")(
sfg.public(
sfg.method("compute").returns("int").virtual().const()(
"return 24;"
)
)
)
sfg.klass("Child", bases=["public Parent"])(
sfg.public(
sfg.method("compute").returns("int").override().const()(
"return 31;"
)
)
)
#include "ComposerHeaderOnly.hpp"
#include <vector>
#undef NDEBUG
#include <cassert>
int main(void) {
assert( twice(13) == 26 );
{
std::vector< int64_t > arr { 1, 2, 3, 4, 5, 6 };
twiceKernel(arr);
std::vector< int64_t > expected { 2, 4, 6, 8, 10, 12 };
assert ( arr == expected );
}
{
std::vector< int64_t > arr { 1, 2, 3, 4, 5, 6 };
ScaleKernel ker { 3 };
ker( arr );
std::vector< int64_t > expected { 3, 6, 9, 12, 15, 18 };
assert ( arr == expected );
}
return 0;
}
from pystencilssfg import SourceFileGenerator, SfgConfig
from pystencilssfg.lang.cpp import std
import pystencils as ps
cfg = SfgConfig(header_only=True)
with SourceFileGenerator(cfg) as sfg:
n = sfg.var("n", "int32")
# Should be automatically marked inline
sfg.function("twice").returns("int32")(
sfg.expr("return 2 * {};", n)
)
# Inline kernel
arr = ps.fields("arr: int64[1D]")
vec = std.vector.from_field(arr)
c = ps.TypedSymbol("c", "int64")
asm = ps.Assignment(arr(0), c * arr(0))
khandle = sfg.kernels.create(asm)
sfg.function("twiceKernel")(
sfg.map_field(arr, vec),
sfg.set_param(c, "2"),
sfg.call(khandle)
)
# Inline class members
sfg.klass("ScaleKernel")(
sfg.private(
c
),
sfg.public(
sfg.constructor(c).init(c)(c),
sfg.method("operator()")(
sfg.map_field(arr, vec),
sfg.set_param(c, "this->c"),
sfg.call(khandle)
)
)
)
from pystencilssfg import SourceFileGenerator
from pystencils.types import PsCustomType
with SourceFileGenerator() as sfg:
sfg.namespace("gen")
sfg.include("<iostream>")
sfg.code(r"enum class Noodles { RIGATONI, RAMEN, SPAETZLE, SPAGHETTI };")
noodle = sfg.var("noodle", PsCustomType("Noodles"))
sfg.function("printOpinion")(
sfg.switch(noodle)
.case("Noodles::RAMEN")(
'std::cout << "Great!" << std::endl;'
)
.case("Noodles::SPAETZLE")(
'std::cout << "Amazing!" << std::endl;'
)
.default(
'std::cout << "Okay, I guess..." << std::endl;'
)
)
sfg.function("getRating").returns("int32")(
sfg.switch(noodle, autobreak=False)
.case("Noodles::RIGATONI")(
"return 13;"
)
.case("Noodles::RAMEN")(
"return 41;"
)
.case("Noodles::SPAETZLE")(
"return 43;"
)
.case("Noodles::SPAGHETTI")(
"return 15;"
),
"return 0;"
)
sfg.function("isItalian").returns("bool")(
sfg.branch(
sfg.expr("{0} == Noodles::RIGATONI || {0} == Noodles::SPAGHETTI", noodle)
)(
"return true;"
)(
"return false;"
)
)
#include "CudaKernels.hpp"
#include <cuda_runtime.h>
#include <experimental/mdspan>
#include <random>
#include <iostream>
#include <functional>
#undef NDEBUG
#include <cassert>
namespace stdex = std::experimental;
using extents_t = stdex::dextents<uint64_t, 3>;
using field_t = stdex::mdspan<double, extents_t, stdex::layout_right>;
void checkCudaError(cudaError_t err)
{
if (err != cudaSuccess)
{
std::cerr << "HIP Error: " << err << std::endl;
exit(2);
}
}
int main(void)
{
extents_t extents{23, 25, 132};
size_t items{extents.extent(0) * extents.extent(1) * extents.extent(2)};
double *data_src;
checkCudaError(cudaMallocManaged<double>(&data_src, sizeof(double) * items));
field_t src{data_src, extents};
double *data_dst;
checkCudaError(cudaMallocManaged<double>(&data_dst, sizeof(double) * items));
field_t dst{data_dst, extents};
std::random_device rd;
std::mt19937 gen{rd()};
std::uniform_real_distribution<double> distrib{-1.0, 1.0};
auto check = [&](std::function<void()> invoke)
{
for (size_t i = 0; i < items; ++i)
{
data_src[i] = distrib(gen);
data_dst[i] = NAN;
}
invoke();
for (size_t i = 0; i < items; ++i)
{
const double desired = 2.0 * data_src[i];
if (std::abs(desired - data_dst[i]) >= 1e-12)
{
std::cerr << "Mismatch at element " << i << "; Desired: " << desired << "; Actual: " << data_dst[i] << std::endl;
exit(EXIT_FAILURE);
}
}
};
check([&]()
{
/* Linear3D Dynamic */
dim3 blockSize{64, 8, 1};
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::linear3d::scaleKernel(blockSize, dst, src, stream);
checkCudaError(cudaStreamSynchronize(stream)); });
check([&]()
{
/* Linear3D Automatic */
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::linear3d_automatic::scaleKernel(dst, src, stream);
checkCudaError(cudaStreamSynchronize(stream)); });
check([&]()
{
/* Blockwise4D Automatic */
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::blockwise4d::scaleKernel(dst, src, stream);
checkCudaError(cudaStreamSynchronize(stream)); });
check([&]()
{
/* Linear3D Manual */
dim3 blockSize{32, 8, 1};
dim3 gridSize{5, 4, 23};
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::linear3d_manual::scaleKernel(blockSize, dst, gridSize, src, stream);
checkCudaError(cudaStreamSynchronize(stream)); });
check([&]()
{
/* Blockwise4D Manual */
dim3 blockSize{132, 1, 1};
dim3 gridSize{25, 23, 1};
cudaStream_t stream;
checkCudaError(cudaStreamCreate(&stream));
gen::blockwise4d_manual::scaleKernel(blockSize, dst, gridSize, src, stream);
checkCudaError(cudaStreamSynchronize(stream)); });
checkCudaError(cudaFree(data_src));
checkCudaError(cudaFree(data_dst));
return EXIT_SUCCESS;
}
from pystencilssfg import SourceFileGenerator
from pystencilssfg.lang.cpp import std
from pystencilssfg.lang.gpu import cuda
import pystencils as ps
std.mdspan.configure(namespace="std::experimental", header="<experimental/mdspan>")
src, dst = ps.fields("src, dst: double[3D]", layout="c")
asm = ps.Assignment(dst(0), 2 * src(0))
with SourceFileGenerator() as sfg:
sfg.namespace("gen")
base_config = ps.CreateKernelConfig(target=ps.Target.CUDA)
block_size = cuda.dim3().var("blockSize")
grid_size = cuda.dim3().var("gridSize")
stream = cuda.stream_t().var("stream")
with sfg.namespace("linear3d"):
cfg = base_config.copy()
cfg.gpu.indexing_scheme = "linear3d"
khandle = sfg.kernels.create(asm, "scale", cfg)
sfg.function("scaleKernel")(
sfg.map_field(
src, std.mdspan.from_field(src, ref=True, layout_policy="layout_right")
),
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(khandle, block_size=block_size, stream=stream),
)
with sfg.namespace("linear3d_automatic"):
cfg = base_config.copy()
cfg.gpu.indexing_scheme = "linear3d"
khandle = sfg.kernels.create(asm, "scale", cfg)
sfg.function("scaleKernel")(
sfg.map_field(
src, std.mdspan.from_field(src, ref=True, layout_policy="layout_right")
),
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(khandle, stream=stream),
)
with sfg.namespace("blockwise4d"):
cfg = base_config.copy()
cfg.gpu.indexing_scheme = "blockwise4d"
khandle = sfg.kernels.create(asm, "scale", cfg)
sfg.function("scaleKernel")(
sfg.map_field(
src, std.mdspan.from_field(src, ref=True, layout_policy="layout_right")
),
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(khandle, stream=stream),
)
with sfg.namespace("linear3d_manual"):
cfg = base_config.copy()
cfg.gpu.indexing_scheme = "linear3d"
cfg.gpu.manual_launch_grid = True
khandle = sfg.kernels.create(asm, "scale", cfg)
sfg.function("scaleKernel")(
sfg.map_field(
src, std.mdspan.from_field(src, ref=True, layout_policy="layout_right")
),
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(
khandle, block_size=block_size, grid_size=grid_size, stream=stream
),
)
with sfg.namespace("blockwise4d_manual"):
cfg = base_config.copy()
cfg.gpu.indexing_scheme = "blockwise4d"
cfg.gpu.manual_launch_grid = True
khandle = sfg.kernels.create(asm, "scale", cfg)
sfg.function("scaleKernel")(
sfg.map_field(
src, std.mdspan.from_field(src, ref=True, layout_policy="layout_right")
),
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(
khandle, block_size=block_size, grid_size=grid_size, stream=stream
),
)
#include "HipKernels.hpp"
#include <hip/hip_runtime.h>
#include <experimental/mdspan>
#include <random>
#include <iostream>
#include <functional>
#undef NDEBUG
#include <cassert>
namespace stdex = std::experimental;
using extents_t = stdex::dextents<uint64_t, 3>;
using field_t = stdex::mdspan<double, extents_t, stdex::layout_right>;
void checkHipError(hipError_t err)
{
if (err != hipSuccess)
{
std::cerr << "HIP Error: " << err << std::endl;
exit(2);
}
}
int main(void)
{
extents_t extents{23, 25, 132};
size_t items{extents.extent(0) * extents.extent(1) * extents.extent(2)};
double *data_src;
checkHipError(hipMallocManaged<double>(&data_src, sizeof(double) * items));
field_t src{data_src, extents};
double *data_dst;
checkHipError(hipMallocManaged<double>(&data_dst, sizeof(double) * items));
field_t dst{data_dst, extents};
std::random_device rd;
std::mt19937 gen{rd()};
std::uniform_real_distribution<double> distrib{-1.0, 1.0};
auto check = [&](std::function<void()> invoke)
{
for (size_t i = 0; i < items; ++i)
{
data_src[i] = distrib(gen);
data_dst[i] = NAN;
}
invoke();
for (size_t i = 0; i < items; ++i)
{
const double desired = 2.0 * data_src[i];
if (std::abs(desired - data_dst[i]) >= 1e-12)
{
std::cerr << "Mismatch at element " << i << "; Desired: " << desired << "; Actual: " << data_dst[i] << std::endl;
exit(EXIT_FAILURE);
}
}
};
check([&]()
{
/* Linear3D Dynamic */
dim3 blockSize{64, 8, 1};
hipStream_t stream;
checkHipError(hipStreamCreate(&stream));
gen::linear3d::scaleKernel(blockSize, dst, src, stream);
checkHipError(hipStreamSynchronize(stream)); });
check([&]()
{
/* Linear3D Automatic */
hipStream_t stream;
checkHipError(hipStreamCreate(&stream));
gen::linear3d_automatic::scaleKernel(dst, src, stream);
checkHipError(hipStreamSynchronize(stream)); });
check([&]()
{
/* Blockwise4D Automatic */
hipStream_t stream;
checkHipError(hipStreamCreate(&stream));
gen::blockwise4d::scaleKernel(dst, src, stream);
checkHipError(hipStreamSynchronize(stream)); });
check([&]()
{
/* Linear3D Manual */
dim3 blockSize{32, 8, 1};
dim3 gridSize{5, 4, 23};
hipStream_t stream;
checkHipError(hipStreamCreate(&stream));
gen::linear3d_manual::scaleKernel(blockSize, dst, gridSize, src, stream);
checkHipError(hipStreamSynchronize(stream)); });
check([&]()
{
/* Blockwise4D Manual */
dim3 blockSize{132, 1, 1};
dim3 gridSize{25, 23, 1};
hipStream_t stream;
checkHipError(hipStreamCreate(&stream));
gen::blockwise4d_manual::scaleKernel(blockSize, dst, gridSize, src, stream);
checkHipError(hipStreamSynchronize(stream)); });
checkHipError(hipFree(data_src));
checkHipError(hipFree(data_dst));
return EXIT_SUCCESS;
}
from pystencilssfg import SourceFileGenerator
from pystencilssfg.lang.cpp import std
from pystencilssfg.lang.gpu import hip
import pystencils as ps
std.mdspan.configure(namespace="std::experimental", header="<experimental/mdspan>")
src, dst = ps.fields("src, dst: double[3D]", layout="c")
asm = ps.Assignment(dst(0), 2 * src(0))
with SourceFileGenerator() as sfg:
sfg.namespace("gen")
base_config = ps.CreateKernelConfig(target=ps.Target.HIP)
block_size = hip.dim3().var("blockSize")
grid_size = hip.dim3().var("gridSize")
stream = hip.stream_t().var("stream")
with sfg.namespace("linear3d"):
cfg = base_config.copy()
cfg.gpu.indexing_scheme = "linear3d"
khandle = sfg.kernels.create(asm, "scale", cfg)
sfg.function("scaleKernel")(
sfg.map_field(
src, std.mdspan.from_field(src, ref=True, layout_policy="layout_right")
),
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(khandle, block_size=block_size, stream=stream),
)
with sfg.namespace("linear3d_automatic"):
cfg = base_config.copy()
cfg.gpu.indexing_scheme = "linear3d"
khandle = sfg.kernels.create(asm, "scale", cfg)
sfg.function("scaleKernel")(
sfg.map_field(
src, std.mdspan.from_field(src, ref=True, layout_policy="layout_right")
),
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(khandle, stream=stream),
)
with sfg.namespace("blockwise4d"):
cfg = base_config.copy()
cfg.gpu.indexing_scheme = "blockwise4d"
khandle = sfg.kernels.create(asm, "scale", cfg)
sfg.function("scaleKernel")(
sfg.map_field(
src, std.mdspan.from_field(src, ref=True, layout_policy="layout_right")
),
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(khandle, stream=stream),
)
with sfg.namespace("linear3d_manual"):
cfg = base_config.copy()
cfg.gpu.indexing_scheme = "linear3d"
cfg.gpu.manual_launch_grid = True
khandle = sfg.kernels.create(asm, "scale", cfg)
sfg.function("scaleKernel")(
sfg.map_field(
src, std.mdspan.from_field(src, ref=True, layout_policy="layout_right")
),
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(
khandle, block_size=block_size, grid_size=grid_size, stream=stream
),
)
with sfg.namespace("blockwise4d_manual"):
cfg = base_config.copy()
cfg.gpu.indexing_scheme = "blockwise4d"
cfg.gpu.manual_launch_grid = True
khandle = sfg.kernels.create(asm, "scale", cfg)
sfg.function("scaleKernel")(
sfg.map_field(
src, std.mdspan.from_field(src, ref=True, layout_policy="layout_right")
),
sfg.map_field(
dst, std.mdspan.from_field(dst, ref=True, layout_policy="layout_right")
),
sfg.gpu_invoke(
khandle, block_size=block_size, grid_size=grid_size, stream=stream
),
)
#include "JacobiMdspan.hpp"
#include <experimental/mdspan>
#include <memory>
namespace stdex = std::experimental;
using field_t = stdex::mdspan<double, stdex::extents<int64_t, std::dynamic_extent, std::dynamic_extent>, stdex::layout_left>;
using scalar_field_t = stdex::mdspan<double, stdex::extents<int64_t, std::dynamic_extent, std::dynamic_extent, 1>, stdex::layout_left>;
int main(void)
{
auto data_f = std::make_unique<double[]>(64);
scalar_field_t f{data_f.get(), 8, 8};
auto data_u = std::make_unique<double[]>(64);
field_t u{data_u.get(), 8, 8};
auto data_u_tmp = std::make_unique<double[]>(64);
field_t u_tmp{data_u_tmp.get(), 8, 8};
double h{1.0 / 7.0};
gen::jacobi_smooth(f, h, u_tmp, u);
}
import sympy as sp
from pystencils import fields, kernel
from pystencilssfg import SourceFileGenerator
from pystencilssfg.lang.cpp.std import mdspan
mdspan.configure(namespace="std::experimental", header="<experimental/mdspan>")
with SourceFileGenerator() as sfg:
sfg.namespace("gen")
u_src, u_dst, f = fields("u_src, u_dst, f(1) : double[2D]", layout="fzyx")
h = sp.Symbol("h")
@kernel
def poisson_jacobi():
u_dst[0, 0] @= (
h**2 * f[0, 0] + u_src[1, 0] + u_src[-1, 0] + u_src[0, 1] + u_src[0, -1]
) / 4
poisson_kernel = sfg.kernels.create(poisson_jacobi)
sfg.function("jacobi_smooth")(
sfg.map_field(u_src, mdspan.from_field(u_src, layout_policy="layout_left")),
sfg.map_field(u_dst, mdspan.from_field(u_dst, layout_policy="layout_left")),
sfg.map_field(f, mdspan.from_field(f, layout_policy="layout_left")),
sfg.call(poisson_kernel),
)
#include "MdSpanLayouts.hpp"
#include <concepts>
#include <experimental/mdspan>
namespace stdex = std::experimental;
static_assert( std::is_same_v< gen::field_soa::layout_type, stdex::layout_left > );
static_assert( std::is_same_v< gen::field_aos::layout_type, stdex::layout_stride > );
static_assert( std::is_same_v< gen::field_c::layout_type, stdex::layout_right > );
static_assert( gen::field_soa::static_extent(0) == 17 );
static_assert( gen::field_soa::static_extent(1) == 19 );
static_assert( gen::field_soa::static_extent(2) == 32 );
static_assert( gen::field_soa::static_extent(3) == 9 );
int main(void) {
gen::field_soa f_soa { nullptr };
gen::checkLayoutSoa(f_soa);
gen::field_aos::extents_type f_aos_extents { };
std::array< uint64_t, 4 > strides_aos {
/* stride(x) */ f_aos_extents.extent(3),
/* stride(y) */ f_aos_extents.extent(3) * f_aos_extents.extent(0),
/* stride(z) */ f_aos_extents.extent(3) * f_aos_extents.extent(0) * f_aos_extents.extent(1),
/* stride(f) */ 1
};
gen::field_aos::mapping_type f_aos_mapping { f_aos_extents, strides_aos };
gen::field_aos f_aos { nullptr, f_aos_mapping };
gen::checkLayoutAos(f_aos);
gen::field_c f_c { nullptr };
gen::checkLayoutC(f_c);
}
import pystencils as ps
from pystencilssfg import SourceFileGenerator
from pystencilssfg.lang.cpp import std
from pystencilssfg.lang import strip_ptr_ref
std.mdspan.configure(namespace="std::experimental", header="<experimental/mdspan>")
with SourceFileGenerator() as sfg:
sfg.namespace("gen")
sfg.include("<cassert>")
def check_layout(field: ps.Field, mdspan: std.mdspan):
seq = []
for d in range(field.spatial_dimensions + field.index_dimensions):
seq += [
sfg.expr(
'assert({} == {} && "Shape mismatch at coordinate {}");',
mdspan.extent(d),
field.shape[d],
d,
),
sfg.expr(
'assert({} == {} && "Stride mismatch at coordinate {}");',
mdspan._extract_stride(d),
field.strides[d],
d,
),
]
return seq
f_soa = ps.fields("f_soa(9): double[17, 19, 32]", layout="soa")
f_soa_mdspan = std.mdspan.from_field(f_soa, layout_policy="layout_left", ref=True)
sfg.code(f"using field_soa = {strip_ptr_ref(f_soa_mdspan.dtype)};")
sfg.function("checkLayoutSoa")(
*check_layout(f_soa, f_soa_mdspan)
)
f_aos = ps.fields("f_aos(9): double[17, 19, 32]", layout="aos")
f_aos_mdspan = std.mdspan.from_field(f_aos, ref=True)
sfg.code(f"using field_aos = {strip_ptr_ref(f_aos_mdspan.dtype)};")
sfg.function("checkLayoutAos")(
*check_layout(f_aos, f_aos_mdspan)
)
f_c = ps.fields("f_c(9): double[17, 19, 32]", layout="c")
f_c_mdspan = std.mdspan.from_field(f_c, layout_policy="layout_right", ref=True)
sfg.code(f"using field_c = {strip_ptr_ref(f_c_mdspan.dtype)};")
sfg.function("checkLayoutC")(
*check_layout(f_c, f_c_mdspan)
)
#include "MdSpanLbStreaming.hpp"
#include <concepts>
#include <experimental/mdspan>
#include <array>
#include <cassert>
#include <memory>
#include <span>
namespace stdex = std::experimental;
static_assert(std::is_same_v<gen::field_fzyx::layout_type, stdex::layout_left>);
static_assert(std::is_same_v<gen::field_zyxf::layout_type, stdex::layout_stride>);
static_assert(std::is_same_v<gen::field_c::layout_type, stdex::layout_right>);
using shape_type = stdex::extents< int64_t, std::dynamic_extent, std::dynamic_extent, std::dynamic_extent, 6 >;
static_assert(std::is_same_v<gen::field_fzyx::extents_type, shape_type >);
constexpr shape_type field_shape { 16l, 15l, 14l };
constexpr std::array<std::array<int64_t, 3>, 2> slice{
{{3, 4, 5},
{7, 10, 12}}};
template <typename Kernel, typename PdfField>
void test_streaming(Kernel &kernel, PdfField &src_field, PdfField &dst_field)
{
kernel.setZero(src_field);
kernel.setZero(dst_field);
for (int64_t z = slice[0][2]; z < slice[1][2]; ++z)
for (int64_t y = slice[0][1]; y < slice[1][1]; ++y)
for (int64_t x = slice[0][0]; x < slice[1][0]; ++x)
for (int64_t i = 0; i < int64_t(gen::STENCIL.size()); ++i)
{
src_field(x, y, z, i) = double(i);
}
kernel(dst_field, src_field);
for (int64_t z = slice[0][2]; z < slice[1][2]; ++z)
for (int64_t y = slice[0][1]; y < slice[1][1]; ++y)
for (int64_t x = slice[0][0]; x < slice[1][0]; ++x)
for (int64_t i = 0; i < int64_t(gen::STENCIL.size()); ++i)
{
const std::array<int64_t, 3> &offsets = gen::STENCIL[i];
assert((dst_field(x + offsets[0], y + offsets[1], z + offsets[2], i) == double(i)));
}
}
int main(void)
{
constexpr size_t num_items { (size_t) field_shape.extent(0) * field_shape.extent(1) * field_shape.extent(2) * field_shape.extent(3) };
auto src_data = std::make_unique< double [] >( num_items );
auto dst_data = std::make_unique< double [] >( num_items );
// Structure-of-Arrays
{
gen::Kernel_fzyx kernel;
gen::field_fzyx src_arr { src_data.get(), field_shape };
gen::field_fzyx dst_arr { dst_data.get(), field_shape };
test_streaming(kernel, src_arr, dst_arr );
}
// Array-of-Structures
{
gen::Kernel_zyxf kernel;
std::array< uint64_t, 4 > strides_xyzf {
/* stride(x) */ field_shape.extent(3),
/* stride(y) */ field_shape.extent(3) * field_shape.extent(0),
/* stride(z) */ field_shape.extent(3) * field_shape.extent(0) * field_shape.extent(1),
/* stride(f) */ 1
};
gen::field_zyxf::mapping_type zyxf_mapping { field_shape, strides_xyzf };
gen::field_zyxf src_arr { src_data.get(), zyxf_mapping };
gen::field_zyxf dst_arr { dst_data.get(), zyxf_mapping };
test_streaming(kernel, src_arr, dst_arr );
}
// C Row-Major
{
gen::Kernel_c kernel;
gen::field_c src_arr { src_data.get(), field_shape };
gen::field_c dst_arr { dst_data.get(), field_shape };
test_streaming(kernel, src_arr, dst_arr );
}
}
import numpy as np
import pystencils as ps
from pystencilssfg import SourceFileGenerator, SfgComposer
from pystencilssfg.lang.cpp import std
from pystencilssfg.lang import strip_ptr_ref
std.mdspan.configure(namespace="std::experimental", header="<experimental/mdspan>")
stencil = ((-1, 0, 0), (1, 0, 0), (0, -1, 0), (0, 1, 0), (0, 0, 1), (0, 0, -1))
def lbm_stream(sfg: SfgComposer, field_layout: str, layout_policy: str):
src, dst = ps.fields("src(6), dst(6): double[3D]", layout=field_layout)
src_mdspan = std.mdspan.from_field(src, layout_policy=layout_policy, extents_type="int64", ref=True)
dst_mdspan = std.mdspan.from_field(dst, layout_policy=layout_policy, extents_type="int64", ref=True)
asms = []
asms_zero = []
for i, dir in enumerate(stencil):
asms.append(ps.Assignment(dst.center(i), src[-np.array(dir)](i)))
asms_zero.append(ps.Assignment(dst.center(i), 0))
khandle = sfg.kernels.create(asms, f"stream_{field_layout}")
khandle_zero = sfg.kernels.create(asms_zero, f"zero_{field_layout}")
sfg.code(f"using field_{field_layout} = {strip_ptr_ref(src_mdspan.get_dtype())};")
sfg.klass(f"Kernel_{field_layout}")(
sfg.public(
sfg.method("operator()")(
sfg.map_field(src, src_mdspan),
sfg.map_field(dst, dst_mdspan),
sfg.call(khandle),
),
sfg.method("setZero")(
sfg.map_field(dst, dst_mdspan),
sfg.call(khandle_zero),
)
)
)
with SourceFileGenerator() as sfg:
sfg.namespace("gen")
sfg.include("<cassert>")
sfg.include("<array>")
stencil_code = (
"{{"
+ ", ".join("{" + ", ".join(str(ci) for ci in c) + "}" for c in stencil)
+ "}}"
)
sfg.code(
f"constexpr std::array< std::array< int64_t, 3 >, 6 > STENCIL = {stencil_code};"
)
lbm_stream(sfg, "fzyx", "layout_left")
lbm_stream(sfg, "c", "layout_right")
lbm_stream(sfg, "zyxf", "layout_stride")
#include "NestedNamespaces.hpp"
static_assert( outer::X == 13 );
static_assert( outer::inner::Y == 52 );
static_assert( outer::Z == 41 );
static_assert( outer::second_inner::W == 91 );
static_assert( outer::inner::innermost::V == 29 );
static_assert( GLOBAL == 42 );
int main() {
return 0;
}
from pystencilssfg import SourceFileGenerator
with SourceFileGenerator() as sfg:
with sfg.namespace("outer"):
sfg.code("constexpr int X = 13;")
with sfg.namespace("inner"):
sfg.code("constexpr int Y = 52;")
sfg.code("constexpr int Z = 41;")
with sfg.namespace("outer::second_inner"):
sfg.code("constexpr int W = 91;")
with sfg.namespace("outer::inner::innermost"):
sfg.code("constexpr int V = 29;")
sfg.code("constexpr int GLOBAL = 42;")
#include "ScaleKernel.hpp"
#include <vector>
#define NDEBUG
#include <cassert>
int main(void){
std::vector< float > src;
src.resize(gen::N);
std::vector< float > dst;
dst.resize(gen::N);
for(int i = 0; i < gen::N; ++i){
src[i] = 1.0f;
}
float alpha = 2.5f;
gen::Scale scale{ alpha };
scale(dst.data(), src.data());
for(int i = 0; i < gen::N; ++i){
assert( dst[i] == alpha );
}
}
from pystencils import TypedSymbol, fields, kernel
from pystencilssfg import SourceFileGenerator
with SourceFileGenerator() as sfg:
sfg.namespace("gen")
N = 10
α = TypedSymbol("alpha", "float32")
src, dst = fields(f"src, dst: float32[{N}]")
@kernel
def scale():
src[0] @= α * dst.center()
khandle = sfg.kernels.create(scale)
sfg.code(f"constexpr int N = {N};")
sfg.klass("Scale")(
sfg.private(α),
sfg.public(
sfg.constructor(α).init(α)(α),
sfg.method("operator()")(sfg.init(α)(f"this->{α}"), sfg.call(khandle)),
),
)
#include "SimpleClasses.hpp"
#define NDEBUG
#include <cassert>
int main(void){
Point p { 3, 1, -4 };
assert(p.getX() == 3);
SpecialPoint q { 0, 1, 2 };
assert(q.getY() == 1);
}
from pystencilssfg import SourceFileGenerator
with SourceFileGenerator() as sfg:
x_ = sfg.var("x_", "int64_t")
y_ = sfg.var("y_", "int64_t")
z_ = sfg.var("z_", "int64_t")
x = sfg.var("x", "int64_t")
y = sfg.var("y", "int64_t")
z = sfg.var("z", "int64_t")
sfg.klass("Point")(
sfg.public(
sfg.constructor(x, y, z).init(x_)(x).init(y_)(y).init(z_)(z),
sfg.method("getX").returns("const int64_t").const().inline()(
"return this->x_;"
),
),
sfg.protected(x_, y_, z_),
)
sfg.klass("SpecialPoint", bases=["public Point"])(
sfg.public(
"using Point::Point;",
sfg.method("getY").returns("const int64_t").const().inline()(
"return this->y_;"
),
)
)