Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision

Target

Select target project
  • anirudh.jonnalagadda/pystencils
  • hyteg/pystencils
  • jbadwaik/pystencils
  • jngrad/pystencils
  • itischler/pystencils
  • ob28imeq/pystencils
  • hoenig/pystencils
  • Bindgen/pystencils
  • hammer/pystencils
  • da15siwa/pystencils
  • holzer/pystencils
  • alexander.reinauer/pystencils
  • ec93ujoh/pystencils
  • Harke/pystencils
  • seitz/pystencils
  • pycodegen/pystencils
16 results
Select Git revision
Show changes
Showing
with 986 additions and 420 deletions
from typing import Any, Sequence, cast
from dataclasses import dataclass
try:
import cupy as cp
HAVE_CUPY = True
except ImportError:
HAVE_CUPY = False
from ..codegen import Target
from ..field import FieldType
from ..types import PsType
from .jit import JitBase, JitError, KernelWrapper
from ..codegen import (
Kernel,
GpuKernel,
Parameter,
)
from ..codegen.properties import FieldShape, FieldStride, FieldBasePtr
from ..types import PsStructType, PsPointerType
from ..include import get_pystencils_include_path
@dataclass
class LaunchGrid:
grid: tuple[int, int, int]
block: tuple[int, int, int]
class CupyKernelWrapper(KernelWrapper):
def __init__(
self,
kfunc: GpuKernel,
raw_kernel: Any,
block_size: tuple[int, int, int],
):
self._kfunc: GpuKernel = kfunc
self._raw_kernel = raw_kernel
self._block_size = block_size
self._num_blocks: tuple[int, int, int] | None = None
self._args_cache: dict[Any, tuple] = dict()
@property
def kernel_function(self) -> GpuKernel:
return self._kfunc
@property
def raw_kernel(self):
return self._raw_kernel
@property
def block_size(self) -> tuple[int, int, int]:
return self._block_size
@block_size.setter
def block_size(self, bs: tuple[int, int, int]):
self._block_size = bs
@property
def num_blocks(self) -> tuple[int, int, int] | None:
return self._num_blocks
@num_blocks.setter
def num_blocks(self, nb: tuple[int, int, int] | None):
self._num_blocks = nb
def __call__(self, **kwargs: Any):
kernel_args, launch_grid = self._get_cached_args(**kwargs)
device = self._get_device(kernel_args)
with cp.cuda.Device(device):
self._raw_kernel(launch_grid.grid, launch_grid.block, kernel_args)
def _get_device(self, kernel_args):
devices = set(a.device.id for a in kernel_args if type(a) is cp.ndarray)
if len(devices) != 1:
raise JitError("Could not determine CUDA device to execute on")
return devices.pop()
def _get_cached_args(self, **kwargs):
key = (self._block_size, self._num_blocks) + tuple((k, id(v)) for k, v in kwargs.items())
if key not in self._args_cache:
args = self._get_args(**kwargs)
self._args_cache[key] = args
return args
else:
return self._args_cache[key]
def _get_args(self, **kwargs) -> tuple[tuple, LaunchGrid]:
args = []
valuation: dict[str, Any] = dict()
def add_arg(name: str, arg: Any, dtype: PsType):
nptype = dtype.numpy_dtype
assert nptype is not None
typecast = nptype.type
arg = typecast(arg)
args.append(arg)
valuation[name] = arg
field_shapes = set()
index_shapes = set()
def check_shape(field_ptr: Parameter, arr: cp.ndarray):
field = field_ptr.fields[0]
if field.has_fixed_shape:
expected_shape = tuple(int(s) for s in field.shape)
if isinstance(field.dtype, PsStructType):
assert expected_shape[-1] == 1
expected_shape = expected_shape[:-1]
actual_shape = arr.shape
if expected_shape != actual_shape:
raise ValueError(
f"Array kernel argument {field.name} had unexpected shape:\n"
f" Expected {expected_shape}, but got {actual_shape}"
)
expected_strides = tuple(int(s) for s in field.strides)
if isinstance(field.dtype, PsStructType):
assert expected_strides[-1] == 1
expected_strides = expected_strides[:-1]
actual_strides = tuple(s // arr.dtype.itemsize for s in arr.strides)
if expected_strides != actual_strides:
raise ValueError(
f"Array kernel argument {field.name} had unexpected strides:\n"
f" Expected {expected_strides}, but got {actual_strides}"
)
match field.field_type:
case FieldType.GENERIC:
field_shapes.add(arr.shape[: field.spatial_dimensions])
if len(field_shapes) > 1:
raise ValueError(
"Incompatible array shapes:"
"All arrays passed for generic fields to a kernel must have the same shape."
)
case FieldType.INDEXED:
index_shapes.add(arr.shape)
if len(index_shapes) > 1:
raise ValueError(
"Incompatible array shapes:"
"All arrays passed for index fields to a kernel must have the same shape."
)
# Collect parameter values
arr: cp.ndarray
for kparam in self._kfunc.parameters:
if kparam.is_field_parameter:
# Determine field-associated data to pass in
for prop in kparam.properties:
match prop:
case FieldBasePtr(field):
elem_dtype: PsType
from .. import DynamicType
if isinstance(field.dtype, DynamicType):
assert isinstance(kparam.dtype, PsPointerType)
elem_dtype = kparam.dtype.base_type
else:
elem_dtype = field.dtype
arr = kwargs[field.name]
if arr.dtype != elem_dtype.numpy_dtype:
raise JitError(
f"Data type mismatch at array argument {field.name}:"
f"Expected {field.dtype}, got {arr.dtype}"
)
check_shape(kparam, arr)
args.append(arr)
break
case FieldShape(field, coord):
arr = kwargs[field.name]
add_arg(kparam.name, arr.shape[coord], kparam.dtype)
break
case FieldStride(field, coord):
arr = kwargs[field.name]
add_arg(
kparam.name,
arr.strides[coord] // arr.dtype.itemsize,
kparam.dtype,
)
break
else:
# scalar parameter
val: Any = kwargs[kparam.name]
add_arg(kparam.name, val, kparam.dtype)
# Determine launch grid
from ..backend.ast.expressions import evaluate_expression
symbolic_threads_range = self._kfunc.threads_range
if self._num_blocks is not None:
launch_grid = LaunchGrid(self._num_blocks, self._block_size)
elif symbolic_threads_range is not None:
threads_range: list[int] = [
evaluate_expression(expr, valuation)
for expr in symbolic_threads_range.num_work_items
]
if symbolic_threads_range.dim < 3:
threads_range += [1] * (3 - symbolic_threads_range.dim)
def div_ceil(a, b):
return a // b if a % b == 0 else a // b + 1
# TODO: Refine this?
num_blocks = tuple(
div_ceil(threads, tpb)
for threads, tpb in zip(threads_range, self._block_size)
)
assert len(num_blocks) == 3
launch_grid = LaunchGrid(num_blocks, self._block_size)
else:
raise JitError(
"Unable to determine launch grid for GPU kernel invocation: "
"No manual grid size was specified, and the number of threads could not "
"be determined automatically."
)
return tuple(args), launch_grid
class CupyJit(JitBase):
def __init__(self, default_block_size: Sequence[int] = (128, 2, 1)):
self._runtime_headers = {"<cstdint>"}
if len(default_block_size) > 3:
raise ValueError(
f"Invalid block size: {default_block_size}. Must be at most three-dimensional."
)
self._default_block_size: tuple[int, int, int] = cast(
tuple[int, int, int],
tuple(default_block_size) + (1,) * (3 - len(default_block_size)),
)
def compile(self, kfunc: Kernel) -> KernelWrapper:
if not HAVE_CUPY:
raise JitError(
"`cupy` is not installed: just-in-time-compilation of CUDA kernels is unavailable."
)
if not isinstance(kfunc, GpuKernel) or kfunc.target != Target.CUDA:
raise ValueError(
"The CupyJit just-in-time compiler only accepts kernels generated for CUDA or HIP"
)
options = self._compiler_options()
prelude = self._prelude(kfunc)
kernel_code = self._kernel_code(kfunc)
code = prelude + kernel_code
raw_kernel = cp.RawKernel(
code, kfunc.name, options=options, backend="nvrtc", jitify=True
)
return CupyKernelWrapper(kfunc, raw_kernel, self._default_block_size)
def _compiler_options(self) -> tuple[str, ...]:
options = ["-w", "-std=c++11"]
options.append("-I" + get_pystencils_include_path())
return tuple(options)
def _prelude(self, kfunc: GpuKernel) -> str:
headers = self._runtime_headers
headers |= kfunc.required_headers
if '"half_precision.h"' in headers:
headers.remove('"half_precision.h"')
if cp.cuda.runtime.is_hip:
headers.add("<hip/hip_fp16.h>")
else:
headers.add("<cuda_fp16.h>")
code = "\n".join(f"#include {header}" for header in headers)
code += "\n\n#define RESTRICT __restrict__\n\n"
return code
def _kernel_code(self, kfunc: GpuKernel) -> str:
kernel_code = kfunc.get_c_code()
return f'extern "C" {kernel_code}'
from __future__ import annotations
from typing import Sequence, TYPE_CHECKING
from abc import ABC, abstractmethod
if TYPE_CHECKING:
from ..codegen import Kernel, Parameter, Target
class JitError(Exception):
"""Indicates an error during just-in-time compilation"""
class KernelWrapper(ABC):
"""Wrapper around a compiled and executable pystencils kernel."""
def __init__(self, kfunc: Kernel) -> None:
self._kfunc = kfunc
@abstractmethod
def __call__(self, **kwargs) -> None:
pass
@property
def kernel_function(self) -> Kernel:
return self._kfunc
@property
def ast(self) -> Kernel:
return self._kfunc
@property
def target(self) -> Target:
return self._kfunc.target
@property
def parameters(self) -> Sequence[Parameter]:
return self._kfunc.parameters
@property
def code(self) -> str:
from pystencils.display_utils import get_code_str
return get_code_str(self._kfunc)
class JitBase(ABC):
"""Base class for just-in-time compilation interfaces implemented in pystencils."""
@abstractmethod
def compile(self, kernel: Kernel) -> KernelWrapper:
"""Compile a kernel function and return a callable object which invokes the kernel."""
class NoJit(JitBase):
"""Not a JIT compiler: Used to explicitly disable JIT compilation on an AST."""
def compile(self, kernel: Kernel) -> KernelWrapper:
raise JitError(
"Just-in-time compilation of this kernel was explicitly disabled."
)
# mypy: ignore-errors
r"""
*pystencils* automatically searches for a compiler, so in most cases no explicit configuration is required.
On Linux make sure that 'gcc' and 'g++' are installed and in your path.
On Windows a recent Visual Studio installation is required.
In case anything does not work as expected or a special compiler should be used, changes can be specified
in a configuration file.
*pystencils* looks for a configuration file in JSON format at the following locations in the listed order.
1. at the path specified in the environment variable ``PYSTENCILS_CONFIG``
2. in the current working direction for a file named ``pystencils.json``
3. or in your home directory at ``~/.config/pystencils/config.json`` (Linux) or
``%HOMEPATH%\.pystencils\config.json`` (Windows)
If no configuration file is found, a default configuration is created at the above-mentioned location in your home.
So run *pystencils* once, then edit the created configuration file.
Compiler Config (Linux)
-----------------------
- **'os'**: should be detected automatically as 'linux'
- **'command'**: path to C++ compiler (defaults to 'g++')
- **'flags'**: space separated list of compiler flags. Make sure to activate OpenMP in your compiler
- **'restrict_qualifier'**: the 'restrict' qualifier is not standardized across compilers.
For most Linux compilers the qualifier is ``__restrict__``
Compiler Config (Windows)
-------------------------
*pystencils* uses the mechanism of *setuptools.msvc* to search for a compilation environment.
Then 'cl.exe' is used to compile.
- **'os'**: should be detected automatically as 'windows'
- **'msvc_version'**: either a version number, year number, 'auto' or 'latest' for automatic detection of latest
installed version or 'setuptools' for setuptools-based detection. Alternatively path to folder
where Visual Studio is installed. This path has to contain a file called 'vcvarsall.bat'
- **'arch'**: 'x86' or 'x64'
- **'flags'**: flags passed to 'cl.exe', make sure OpenMP is activated
- **'restrict_qualifier'**: the 'restrict' qualifier is not standardized across compilers.
For Windows compilers the qualifier should be ``__restrict``
"""
from appdirs import user_cache_dir, user_config_dir
from collections import OrderedDict
from typing import Callable
import importlib.util
import json
import os
import platform
import shutil
import subprocess
import sysconfig
import tempfile
import time
import warnings
from ..codegen import Kernel
from .jit import JitBase, KernelWrapper
from .cpu_extension_module import PsKernelExtensioNModule
from .msvc_detection import get_environment
from pystencils.include import get_pystencils_include_path
from pystencils.utils import atomic_file_write, recursive_dict_update
class CpuKernelWrapper(KernelWrapper):
def __init__(self, kfunc: Kernel, compiled_kernel: Callable[..., None]) -> None:
super().__init__(kfunc)
self._compiled_kernel = compiled_kernel
def __call__(self, **kwargs) -> None:
self._compiled_kernel(**kwargs)
@property
def kernel(self) -> Callable[..., None]:
return self._compiled_kernel
class LegacyCpuJit(JitBase):
"""Wrapper around ``pystencils.cpu.cpujit``"""
def compile(self, kernel: Kernel) -> KernelWrapper:
return compile_and_load(kernel)
def make_python_function(kernel_function_node, custom_backend=None):
"""
Creates C code from the abstract syntax tree, compiles it and makes it accessible as Python function
The parameters of the kernel are:
- numpy arrays for each field used in the kernel. The keyword argument name is the name of the field
- all symbols which are not defined in the kernel itself are expected as parameters
:param kernel_function_node: the abstract syntax tree
:param custom_backend: use own custom printer for code generation
:return: kernel functor
"""
result = compile_and_load(kernel_function_node, custom_backend)
return result
def set_config(config):
"""
Override the configuration provided in config file
Configuration of compiler parameters:
If this function is not called the configuration is taken from a config file in JSON format which
is searched in the following locations in the order specified:
- at location provided in environment variable PYSTENCILS_CONFIG (if this variable exists)
- a file called ".pystencils.json" in the current working directory
- ~/.pystencils.json in your home
If none of these files exist a file ~/.pystencils.json is created with a default configuration using
the GNU 'g++'
An example JSON file with all possible keys. If not all keys are specified, default values are used
``
{
'compiler' :
{
"command": "/software/intel/2017/bin/icpc",
"flags": "-Ofast -DNDEBUG -fPIC -march=native -fopenmp",
"env": {
"LM_PROJECT": "iwia",
}
}
}
``
"""
global _config
_config = config.copy()
def get_configuration_file_path():
config_path_in_home = os.path.join(user_config_dir("pystencils"), "config.json")
# 1) Read path from environment variable if found
if "PYSTENCILS_CONFIG" in os.environ:
return os.environ["PYSTENCILS_CONFIG"], True
# 2) Look in current directory for pystencils.json
elif os.path.exists("pystencils.json"):
return "pystencils.json", True
# 3) Try ~/.pystencils.json
elif os.path.exists(config_path_in_home):
return config_path_in_home, True
else:
return config_path_in_home, False
def create_folder(path, is_file):
if is_file:
path = os.path.split(path)[0]
try:
os.makedirs(path)
except os.error:
pass
def read_config():
if platform.system().lower() == "linux":
default_compiler_config = OrderedDict(
[
("os", "linux"),
("command", "g++"),
("flags", "-Ofast -DNDEBUG -fPIC -march=native -fopenmp -std=c++11"),
("restrict_qualifier", "__restrict__"),
]
)
if platform.machine().startswith("ppc64") or platform.machine() == "arm64":
default_compiler_config["flags"] = default_compiler_config["flags"].replace(
"-march=native", "-mcpu=native"
)
elif platform.system().lower() == "windows":
default_compiler_config = OrderedDict(
[
("os", "windows"),
("msvc_version", "latest"),
("arch", "x64"),
("flags", "/Ox /fp:fast /OpenMP /arch:avx"),
("restrict_qualifier", "__restrict"),
]
)
if platform.machine() == "ARM64":
default_compiler_config["arch"] = "ARM64"
default_compiler_config["flags"] = default_compiler_config["flags"].replace(
" /arch:avx", ""
)
elif platform.system().lower() == "darwin":
default_compiler_config = OrderedDict(
[
("os", "darwin"),
("command", "clang++"),
(
"flags",
"-Ofast -DNDEBUG -fPIC -march=native -Xclang -fopenmp -std=c++11",
),
("restrict_qualifier", "__restrict__"),
]
)
if platform.machine() == "arm64":
default_compiler_config["flags"] = default_compiler_config["flags"].replace(
"-march=native ", ""
)
for libomp in [
"/opt/local/lib/libomp/libomp.dylib",
"/usr/local/lib/libomp.dylib",
"/opt/homebrew/lib/libomp.dylib",
]:
if os.path.exists(libomp):
default_compiler_config["flags"] += " " + libomp
break
else:
raise NotImplementedError(
"Generation of default compiler flags for %s is not implemented"
% (platform.system(),)
)
default_cache_config = OrderedDict(
[
("object_cache", os.path.join(user_cache_dir("pystencils"), "objectcache")),
("clear_cache_on_start", False),
]
)
default_config = OrderedDict(
[("compiler", default_compiler_config), ("cache", default_cache_config)]
)
config_path, config_exists = get_configuration_file_path()
config = default_config.copy()
if config_exists:
with open(config_path, "r") as json_config_file:
loaded_config = json.load(json_config_file)
config = recursive_dict_update(config, loaded_config)
else:
create_folder(config_path, True)
with open(config_path, "w") as f:
json.dump(config, f, indent=4)
if config["cache"]["object_cache"] is not False:
config["cache"]["object_cache"] = os.path.expanduser(
config["cache"]["object_cache"]
).format(pid=os.getpid())
clear_cache_on_start = False
cache_status_file = os.path.join(
config["cache"]["object_cache"], "last_config.json"
)
if os.path.exists(cache_status_file):
# check if compiler config has changed
last_config = json.load(open(cache_status_file, "r"))
if set(last_config.items()) != set(config["compiler"].items()):
clear_cache_on_start = True
else:
for key in last_config.keys():
if last_config[key] != config["compiler"][key]:
clear_cache_on_start = True
if config["cache"]["clear_cache_on_start"] or clear_cache_on_start:
shutil.rmtree(config["cache"]["object_cache"], ignore_errors=True)
create_folder(config["cache"]["object_cache"], False)
with tempfile.NamedTemporaryFile(
"w", dir=os.path.dirname(cache_status_file), delete=False
) as f:
json.dump(config["compiler"], f, indent=4)
os.replace(f.name, cache_status_file)
if config["compiler"]["os"] == "windows":
msvc_env = get_environment(
config["compiler"]["msvc_version"], config["compiler"]["arch"]
)
if "env" not in config["compiler"]:
config["compiler"]["env"] = {}
config["compiler"]["env"].update(msvc_env)
return config
_config = read_config()
def get_compiler_config():
return _config["compiler"]
def get_cache_config():
return _config["cache"]
def add_or_change_compiler_flags(flags):
if not isinstance(flags, list) and not isinstance(flags, tuple):
flags = [flags]
compiler_config = get_compiler_config()
cache_config = get_cache_config()
cache_config["object_cache"] = False # disable cache
for flag in flags:
flag = flag.strip()
if "=" in flag:
base = flag.split("=")[0].strip()
else:
base = flag
new_flags = [
c for c in compiler_config["flags"].split() if not c.startswith(base)
]
new_flags.append(flag)
compiler_config["flags"] = " ".join(new_flags)
def clear_cache():
cache_config = get_cache_config()
if cache_config["object_cache"] is not False:
shutil.rmtree(cache_config["object_cache"], ignore_errors=True)
create_folder(cache_config["object_cache"], False)
def load_kernel_from_file(module_name, function_name, path):
try:
spec = importlib.util.spec_from_file_location(name=module_name, location=path)
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
except ImportError:
warnings.warn(f"Could not load {path}, trying on more time in 5 seconds ...")
time.sleep(5)
spec = importlib.util.spec_from_file_location(name=module_name, location=path)
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
return getattr(mod, function_name)
def run_compile_step(command):
compiler_config = get_compiler_config()
config_env = compiler_config["env"] if "env" in compiler_config else {}
compile_environment = os.environ.copy()
compile_environment.update(config_env)
try:
shell = True if compiler_config["os"].lower() == "windows" else False
subprocess.check_output(
command, env=compile_environment, stderr=subprocess.STDOUT, shell=shell
)
except subprocess.CalledProcessError as e:
print(" ".join(command))
print(e.output.decode("utf8"))
raise e
def compile_module(code, code_hash, base_dir, compile_flags=None):
if compile_flags is None:
compile_flags = []
compiler_config = get_compiler_config()
extra_flags = [
"-I" + sysconfig.get_paths()["include"],
"-I" + get_pystencils_include_path(),
] + compile_flags
if compiler_config["os"].lower() == "windows":
lib_suffix = ".pyd"
object_suffix = ".obj"
windows = True
else:
lib_suffix = ".so"
object_suffix = ".o"
windows = False
src_file = os.path.join(base_dir, code_hash + ".cpp")
lib_file = os.path.join(base_dir, code_hash + lib_suffix)
object_file = os.path.join(base_dir, code_hash + object_suffix)
if not os.path.exists(object_file):
try:
with open(src_file, "x") as f:
code.write_to_file(f)
except FileExistsError:
pass
if windows:
compile_cmd = ["cl.exe", "/c", "/EHsc"] + compiler_config["flags"].split()
compile_cmd += [*extra_flags, src_file, "/Fo" + object_file]
run_compile_step(compile_cmd)
else:
with atomic_file_write(object_file) as file_name:
compile_cmd = [compiler_config["command"], "-c"] + compiler_config[
"flags"
].split()
compile_cmd += [*extra_flags, "-o", file_name, src_file]
run_compile_step(compile_cmd)
# Linking
if windows:
config_vars = sysconfig.get_config_vars()
py_lib = os.path.join(
config_vars["installed_base"],
"libs",
f"python{config_vars['py_version_nodot']}.lib",
)
run_compile_step(
["link.exe", py_lib, "/DLL", "/out:" + lib_file, object_file]
)
elif platform.system().lower() == "darwin":
with atomic_file_write(lib_file) as file_name:
run_compile_step(
[
compiler_config["command"],
"-shared",
object_file,
"-o",
file_name,
"-undefined",
"dynamic_lookup",
]
+ compiler_config["flags"].split()
)
else:
with atomic_file_write(lib_file) as file_name:
run_compile_step(
[
compiler_config["command"],
"-shared",
object_file,
"-o",
file_name,
]
+ compiler_config["flags"].split()
)
return lib_file
def compile_and_load(kernel: Kernel, custom_backend=None):
cache_config = get_cache_config()
compiler_config = get_compiler_config()
function_prefix = (
"__declspec(dllexport)" if compiler_config["os"].lower() == "windows" else ""
)
code = PsKernelExtensioNModule()
code.add_function(kernel, kernel.name)
code.create_code_string(compiler_config["restrict_qualifier"], function_prefix)
code_hash_str = code.get_hash_of_code()
compile_flags = []
# TODO: replace
# if kernel.instruction_set and "compile_flags" in kernel.instruction_set:
# compile_flags = kernel.instruction_set["compile_flags"]
if cache_config["object_cache"] is False:
with tempfile.TemporaryDirectory() as base_dir:
lib_file = compile_module(
code, code_hash_str, base_dir, compile_flags=compile_flags
)
result = load_kernel_from_file(code_hash_str, kernel.name, lib_file)
else:
lib_file = compile_module(
code,
code_hash_str,
base_dir=cache_config["object_cache"],
compile_flags=compile_flags,
)
result = load_kernel_from_file(code_hash_str, kernel.name, lib_file)
return CpuKernelWrapper(kernel, result)
import os
import subprocess
def get_environment(version_specifier, arch="x64"):
"""Returns an environment dictionary, for activating the Visual Studio compiler.
Args:
version_specifier: either a version number, year number, 'auto' or 'latest' for automatic detection of latest
installed version or 'setuptools' for setuptools-based detection
arch: x86 or x64
"""
if version_specifier == "setuptools":
return get_environment_from_setup_tools(arch)
elif "\\" in version_specifier:
vc_vars_path = find_vc_vars_all_via_filesystem_search(version_specifier)
return get_environment_from_vc_vars_file(vc_vars_path, arch)
else:
try:
if version_specifier in ("auto", "latest"):
version_nr = find_latest_msvc_version_using_environment_variables()
else:
version_nr = normalize_msvc_version(version_specifier)
vc_vars_path = get_vc_vars_path_via_environment_variable(version_nr)
except ValueError:
vc_vars_path = find_vc_vars_all_via_filesystem_search(
"C:\\Program Files (x86)\\Microsoft Visual Studio"
)
if vc_vars_path is None:
vc_vars_path = find_vc_vars_all_via_filesystem_search(
"C:\\Program Files\\Microsoft Visual Studio"
)
if vc_vars_path is None:
raise ValueError(
"Visual Studio not found. Write path to VS folder in pystencils config"
)
return get_environment_from_vc_vars_file(vc_vars_path, arch)
def find_latest_msvc_version_using_environment_variables():
import re
# noinspection SpellCheckingInspection
regex = re.compile(r"VS(\d\d)\dCOMNTOOLS")
versions = []
for key, value in os.environ.items():
match = regex.match(key)
if match:
versions.append(int(match.group(1)))
if len(versions) == 0:
raise ValueError("Visual Studio not found.")
versions.sort()
return versions[-1]
def normalize_msvc_version(version):
"""
Takes version specifiers in the following form:
- year: 2012, 2013, 2015, either as int or string
- version numbers with or without dot i.e. 11.0 or 11
:return: integer version number
"""
if isinstance(version, str) and "." in version:
version = version.split(".")[0]
version = int(version)
mapping = {2015: 14, 2013: 12, 2012: 11}
if version in mapping:
return mapping[version]
else:
return version
def get_environment_from_vc_vars_file(vc_vars_file, arch):
out = subprocess.check_output(
f'cmd /u /c "{vc_vars_file}" {arch} && set',
stderr=subprocess.STDOUT,
).decode("utf-16le", errors="replace")
env = {
key.upper(): value
for key, _, value in (line.partition("=") for line in out.splitlines())
if key and value
}
return env
def get_vc_vars_path_via_environment_variable(version_nr):
# noinspection SpellCheckingInspection
environment_var_name = "VS%d0COMNTOOLS" % (version_nr,)
vc_path = os.environ[environment_var_name]
path = os.path.join(vc_path, "..", "..", "VC", "vcvarsall.bat")
return os.path.abspath(path)
def get_environment_from_setup_tools(arch):
from setuptools.msvc import msvc14_get_vc_env
msvc_env = msvc14_get_vc_env(arch)
return {k.upper(): v for k, v in msvc_env.items()}
def find_vc_vars_all_via_filesystem_search(base_path):
matches = []
for root, dir_names, file_names in os.walk(base_path):
for filename in file_names:
if filename == "vcvarsall.bat":
matches.append(os.path.join(root, filename))
matches.sort(reverse=True)
if matches:
return matches[0]
......@@ -5,9 +5,9 @@ from typing import Callable, Union, List, Dict, Tuple
import sympy as sp
from pystencils.assignment import Assignment
from pystencils.sympyextensions import SymbolCreator
from pystencils.config import CreateKernelConfig
from .assignment import Assignment
from .sympyextensions import SymbolCreator
from .codegen import CreateKernelConfig
__all__ = ['kernel', 'kernel_config']
......
import pystencils
from .jit import KernelWrapper as _KernelWrapper
class KernelWrapper:
"""
Light-weight wrapper around a compiled kernel.
Can be called while still providing access to underlying AST.
"""
def __init__(self, kernel, parameters, ast_node: pystencils.astnodes.KernelFunction):
self.kernel = kernel
self.parameters = parameters
self.ast = ast_node
self.num_regs = None
def __call__(self, **kwargs):
return self.kernel(**kwargs)
@property
def code(self):
return pystencils.get_code_str(self.ast)
KernelWrapper = _KernelWrapper
import itertools
import warnings
from typing import Union, List
from .codegen import Target
from .codegen import create_kernel as _create_kernel
import sympy as sp
from pystencils.config import CreateKernelConfig
from warnings import warn
from pystencils.assignment import Assignment, AddAugmentedAssignment
from pystencils.astnodes import Node, Block, Conditional, LoopOverCoordinate, SympyAssignment
from pystencils.cpu.vectorization import vectorize
from pystencils.enums import Target, Backend
from pystencils.field import Field, FieldType
from pystencils.node_collection import NodeCollection
from pystencils.simp.assignment_collection import AssignmentCollection
from pystencils.kernel_contrains_check import KernelConstraintsCheck
from pystencils.simplificationfactory import create_simplification_strategy
from pystencils.stencil import direction_string_to_offset, inverse_direction_string
from pystencils.transformations import (
loop_blocking, move_constants_before_loop, remove_conditionals_in_staggered_kernel)
warn(
"Importing anything from `pystencils.kernelcreation` is deprecated "
"and the module will be removed in pystencils 2.1. "
"Import from `pystencils` instead.",
FutureWarning,
)
def create_kernel(assignments: Union[Assignment, List[Assignment],
AddAugmentedAssignment, List[AddAugmentedAssignment],
AssignmentCollection, List[Node], NodeCollection],
*,
config: CreateKernelConfig = None, **kwargs):
"""
Creates abstract syntax tree (AST) of kernel, using a list of update equations.
This function forms the general API and delegates the kernel creation to others depending on the CreateKernelConfig.
Args:
assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection`
config: CreateKernelConfig which includes the needed configuration
kwargs: Arguments for updating the config
create_kernel = _create_kernel
Returns:
abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
can be compiled with through its 'compile()' member
Example:
>>> import pystencils as ps
>>> import numpy as np
>>> s, d = ps.fields('s, d: [2D]')
>>> assignment = ps.Assignment(d[0,0], s[0, 1] + s[0, -1] + s[1, 0] + s[-1, 0])
>>> kernel_ast = ps.create_kernel(assignment, config=ps.CreateKernelConfig(cpu_openmp=True))
>>> kernel = kernel_ast.compile()
>>> d_arr = np.zeros([5, 5])
>>> kernel(d=d_arr, s=np.ones([5, 5]))
>>> d_arr
array([[0., 0., 0., 0., 0.],
[0., 4., 4., 4., 0.],
[0., 4., 4., 4., 0.],
[0., 4., 4., 4., 0.],
[0., 0., 0., 0., 0.]])
"""
# ---- Updating configuration from kwargs
if not config:
config = CreateKernelConfig(**kwargs)
else:
for k, v in kwargs.items():
if not hasattr(config, k):
raise KeyError(f'{v} is not a valid kwarg. Please look in CreateKernelConfig for valid settings')
setattr(config, k, v)
# ---- Normalizing parameters
if isinstance(assignments, (Assignment, AddAugmentedAssignment)):
assignments = [assignments]
assert assignments, "Assignments must not be empty!"
if isinstance(assignments, list):
assignments = NodeCollection(assignments)
elif isinstance(assignments, AssignmentCollection):
# TODO Markus check and doku
# --- applying first default simplifications
try:
if config.default_assignment_simplifications:
simplification = create_simplification_strategy()
assignments = simplification(assignments)
except Exception as e:
warnings.warn(f"It was not possible to apply the default pystencils optimisations to the "
f"AssignmentCollection due to the following problem :{e}")
simplification_hints = assignments.simplification_hints
assignments = NodeCollection.from_assignment_collection(assignments)
assignments.simplification_hints = simplification_hints
if config.index_fields:
return create_indexed_kernel(assignments, config=config)
else:
return create_domain_kernel(assignments, config=config)
def create_domain_kernel(assignments: NodeCollection, *, config: CreateKernelConfig):
"""
Creates abstract syntax tree (AST) of kernel, using a NodeCollection.
Note that `create_domain_kernel` is a lower level function which shoul be accessed by not providing `index_fields`
to create_kernel
Args:
assignments: `pystencils.node_collection.NodeCollection` containing all assignements and nodes to be processed
config: CreateKernelConfig which includes the needed configuration
Returns:
abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
can be compiled with through its 'compile()' member
Example:
>>> import pystencils as ps
>>> import numpy as np
>>> from pystencils.kernelcreation import create_domain_kernel
>>> from pystencils.node_collection import NodeCollection
>>> s, d = ps.fields('s, d: [2D]')
>>> assignment = ps.Assignment(d[0,0], s[0, 1] + s[0, -1] + s[1, 0] + s[-1, 0])
>>> kernel_config = ps.CreateKernelConfig(cpu_openmp=True)
>>> kernel_ast = create_domain_kernel(NodeCollection([assignment]), config=kernel_config)
>>> kernel = kernel_ast.compile()
>>> d_arr = np.zeros([5, 5])
>>> kernel(d=d_arr, s=np.ones([5, 5]))
>>> d_arr
array([[0., 0., 0., 0., 0.],
[0., 4., 4., 4., 0.],
[0., 4., 4., 4., 0.],
[0., 4., 4., 4., 0.],
[0., 0., 0., 0., 0.]])
"""
# --- eval
assignments.evaluate_terms()
# FUTURE WORK from here we shouldn't NEED sympy
# --- check constrains
check = KernelConstraintsCheck(check_independence_condition=not config.skip_independence_check,
check_double_write_condition=not config.allow_double_writes)
check.visit(assignments)
assignments.bound_fields = check.fields_written
assignments.rhs_fields = check.fields_read
# ---- Creating ast
ast = None
if config.target == Target.CPU:
if config.backend == Backend.C:
from pystencils.cpu import add_openmp, create_kernel
ast = create_kernel(assignments, config=config)
for optimization in config.cpu_prepend_optimizations:
optimization(ast)
omp_collapse = None
if config.cpu_blocking:
omp_collapse = loop_blocking(ast, config.cpu_blocking)
if config.cpu_openmp:
add_openmp(ast, num_threads=config.cpu_openmp, collapse=omp_collapse,
assume_single_outer_loop=config.omp_single_loop)
if config.cpu_vectorize_info:
if config.cpu_vectorize_info is True:
vectorize(ast)
elif isinstance(config.cpu_vectorize_info, dict):
vectorize(ast, **config.cpu_vectorize_info)
if config.cpu_openmp and config.cpu_blocking and 'nontemporal' in config.cpu_vectorize_info and \
config.cpu_vectorize_info['nontemporal'] and 'cachelineZero' in ast.instruction_set:
# This condition is stricter than it needs to be: if blocks along the fastest axis start on a
# cache line boundary, it's okay. But we cannot determine that here.
# We don't need to disallow OpenMP collapsing because it is never applied to the inner loop.
raise ValueError("Blocking cannot be combined with cacheline-zeroing")
else:
raise ValueError("Invalid value for cpu_vectorize_info")
elif config.target == Target.GPU:
if config.backend == Backend.CUDA:
from pystencils.gpu import create_cuda_kernel
ast = create_cuda_kernel(assignments, config=config)
if not ast:
raise NotImplementedError(
f'{config.target} together with {config.backend} is not supported by `create_domain_kernel`')
if config.use_auto_for_assignments:
for a in ast.atoms(SympyAssignment):
a.use_auto = True
return ast
def create_indexed_kernel(assignments: NodeCollection, *, config: CreateKernelConfig):
"""
Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with
coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling.
The coordinates are stored in a separated index_field, which is a one dimensional array with struct data type.
This struct has to contain fields named 'x', 'y' and for 3D fields ('z'). These names are configurable with the
'coordinate_names' parameter. The struct can have also other fields that can be read and written in the kernel, for
example boundary parameters.
Note that `create_indexed_kernel` is a lower level function which shoul be accessed by providing `index_fields`
to create_kernel
Args:
assignments: `pystencils.node_collection.NodeCollection` containing all assignements and nodes to be processed
config: CreateKernelConfig which includes the needed configuration
Returns:
abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
can be compiled with through its 'compile()' member
Example:
>>> import pystencils as ps
>>> from pystencils.node_collection import NodeCollection
>>> import numpy as np
>>> from pystencils.kernelcreation import create_indexed_kernel
>>>
>>> # Index field stores the indices of the cell to visit together with optional values
>>> index_arr_dtype = np.dtype([('x', np.int32), ('y', np.int32), ('val', np.double)])
>>> index_arr = np.array([(1, 1, 0.1), (2, 2, 0.2), (3, 3, 0.3)], dtype=index_arr_dtype)
>>> idx_field = ps.fields(idx=index_arr)
>>>
>>> # Additional values stored in index field can be accessed in the kernel as well
>>> s, d = ps.fields('s, d: [2D]')
>>> assignment = ps.Assignment(d[0, 0], 2 * s[0, 1] + 2 * s[1, 0] + idx_field('val'))
>>> kernel_config = ps.CreateKernelConfig(index_fields=[idx_field], coordinate_names=('x', 'y'))
>>> kernel_ast = create_indexed_kernel(NodeCollection([assignment]), config=kernel_config)
>>> kernel = kernel_ast.compile()
>>> d_arr = np.zeros([5, 5])
>>> kernel(s=np.ones([5, 5]), d=d_arr, idx=index_arr)
>>> d_arr
array([[0. , 0. , 0. , 0. , 0. ],
[0. , 4.1, 0. , 0. , 0. ],
[0. , 0. , 4.2, 0. , 0. ],
[0. , 0. , 0. , 4.3, 0. ],
[0. , 0. , 0. , 0. , 0. ]])
"""
# --- eval
assignments.evaluate_terms()
# FUTURE WORK from here we shouldn't NEED sympy
# --- check constrains
check = KernelConstraintsCheck(check_independence_condition=not config.skip_independence_check,
check_double_write_condition=not config.allow_double_writes)
check.visit(assignments)
assignments.bound_fields = check.fields_written
assignments.rhs_fields = check.fields_read
ast = None
if config.target == Target.CPU and config.backend == Backend.C:
from pystencils.cpu import add_openmp, create_indexed_kernel
ast = create_indexed_kernel(assignments, config=config)
if config.cpu_openmp:
add_openmp(ast, num_threads=config.cpu_openmp)
elif config.target == Target.GPU:
if config.backend == Backend.CUDA:
from pystencils.gpu import created_indexed_cuda_kernel
ast = created_indexed_cuda_kernel(assignments, config=config)
if not ast:
raise NotImplementedError(f'Indexed kernels are not yet supported for {config.target} with {config.backend}')
return ast
def create_staggered_kernel(assignments, target: Target = Target.CPU, gpu_exclusive_conditions=False, **kwargs):
"""Kernel that updates a staggered field.
.. image:: /img/staggered_grid.svg
For a staggered field, the first index coordinate defines the location of the staggered value.
Further index coordinates can be used to store vectors/tensors at each point.
Args:
assignments: a sequence of assignments or an AssignmentCollection.
Assignments to staggered field are processed specially, while subexpressions and assignments to
regular fields are passed through to `create_kernel`. Multiple different staggered fields can be
used, but they all need to use the same stencil (i.e. the same number of staggered points) and
shape.
target: 'CPU' or 'GPU'
gpu_exclusive_conditions: disable the use of multiple conditionals inside the loop. The outer layers are then
handled in an else branch.
kwargs: passed directly to create_kernel, iteration_slice and ghost_layers parameters are not allowed
Returns:
AST, see `create_kernel`
"""
# TODO: Add doku like in the other kernels
if 'ghost_layers' in kwargs:
assert kwargs['ghost_layers'] is None
del kwargs['ghost_layers']
if 'iteration_slice' in kwargs:
assert kwargs['iteration_slice'] is None
del kwargs['iteration_slice']
if 'omp_single_loop' in kwargs:
assert kwargs['omp_single_loop'] is False
del kwargs['omp_single_loop']
if isinstance(assignments, AssignmentCollection):
subexpressions = assignments.subexpressions + [a for a in assignments.main_assignments
if not hasattr(a, 'lhs')
or type(a.lhs) is not Field.Access
or not FieldType.is_staggered(a.lhs.field)]
assignments = [a for a in assignments.main_assignments if hasattr(a, 'lhs')
and type(a.lhs) is Field.Access
and FieldType.is_staggered(a.lhs.field)]
else:
subexpressions = [a for a in assignments if not hasattr(a, 'lhs')
or type(a.lhs) is not Field.Access
or not FieldType.is_staggered(a.lhs.field)]
assignments = [a for a in assignments if hasattr(a, 'lhs')
and type(a.lhs) is Field.Access
and FieldType.is_staggered(a.lhs.field)]
if len(set([tuple(a.lhs.field.staggered_stencil) for a in assignments])) != 1:
raise ValueError("All assignments need to be made to staggered fields with the same stencil")
if len(set([a.lhs.field.shape for a in assignments])) != 1:
raise ValueError("All assignments need to be made to staggered fields with the same shape")
staggered_field = assignments[0].lhs.field
stencil = staggered_field.staggered_stencil
dim = staggered_field.spatial_dimensions
shape = staggered_field.shape
counters = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)]
final_assignments = []
# find out whether any of the ghost layers is not needed
common_exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
for direction in stencil:
exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
for elementary_direction in direction:
exclusions.remove(inverse_direction_string(elementary_direction))
common_exclusions.intersection_update(exclusions)
ghost_layers = [[0, 0] for d in range(dim)]
for direction in common_exclusions:
direction = direction_string_to_offset(direction)
for d, s in enumerate(direction):
if s == 1:
ghost_layers[d][1] = 1
elif s == -1:
ghost_layers[d][0] = 1
def condition(direction):
"""exclude those staggered points that correspond to fluxes between ghost cells"""
exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
for elementary_direction in direction:
exclusions.remove(inverse_direction_string(elementary_direction))
conditions = []
for e in exclusions:
if e in common_exclusions:
continue
offset = direction_string_to_offset(e)
for i, o in enumerate(offset):
if o == 1:
conditions.append(counters[i] < shape[i] - 1)
elif o == -1:
conditions.append(counters[i] > 0)
return sp.And(*conditions)
if gpu_exclusive_conditions:
outer_assignment = None
conditions = {direction: condition(direction) for direction in stencil}
for num_conditions in range(len(stencil)):
for combination in itertools.combinations(conditions.values(), num_conditions):
for assignment in assignments:
direction = stencil[assignment.lhs.index[0]]
if conditions[direction] in combination:
assignment = SympyAssignment(assignment.lhs, assignment.rhs)
outer_assignment = Conditional(sp.And(*combination), Block([assignment]), outer_assignment)
inner_assignment = []
for assignment in assignments:
inner_assignment.append(SympyAssignment(assignment.lhs, assignment.rhs))
last_conditional = Conditional(sp.And(*[condition(d) for d in stencil]),
Block(inner_assignment), outer_assignment)
final_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \
[SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \
[last_conditional]
config = CreateKernelConfig(target=target, ghost_layers=ghost_layers, omp_single_loop=False, **kwargs)
ast = create_kernel(final_assignments, config=config)
return ast
for assignment in assignments:
direction = stencil[assignment.lhs.index[0]]
sp_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \
[SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \
[SympyAssignment(assignment.lhs, assignment.rhs)]
last_conditional = Conditional(condition(direction), Block(sp_assignments))
final_assignments.append(last_conditional)
remove_start_conditional = any([gl[0] == 0 for gl in ghost_layers])
prepend_optimizations = [lambda ast: remove_conditionals_in_staggered_kernel(ast, remove_start_conditional),
move_constants_before_loop]
if 'cpu_prepend_optimizations' in kwargs:
prepend_optimizations += kwargs['cpu_prepend_optimizations']
del kwargs['cpu_prepend_optimizations']
config = CreateKernelConfig(ghost_layers=ghost_layers, target=target, omp_single_loop=False,
cpu_prepend_optimizations=prepend_optimizations, **kwargs)
ast = create_kernel(final_assignments, config=config)
return ast
def create_staggered_kernel(
assignments, target: Target = Target.CPU, gpu_exclusive_conditions=False, **kwargs
):
raise NotImplementedError(
"Staggered kernels are not yet implemented for pystencils 2.0"
)
......@@ -11,7 +11,7 @@ from sympy.logic.boolalg import BooleanFalse, BooleanTrue
from sympy.functions.elementary.trigonometric import TrigonometricFunction, InverseTrigonometricFunction
from sympy.functions.elementary.hyperbolic import HyperbolicFunction
from pystencils.astnodes import KernelFunction, LoopOverCoordinate, Node
from pystencils.sympyextensions.astnodes import KernelFunction, LoopOverCoordinate, Node
from pystencils.cpu.vectorization import vec_all, vec_any, CachelineSize
from pystencils.typing import (
PointerType, VectorType, CastFunc, create_type, get_type_of_expression,
......
from pystencils.astnodes import Node
from pystencils.sympyextensions.astnodes import Node
from pystencils.backends.cbackend import CBackend, CustomSympyPrinter, generate_c
from pystencils.enums import Backend
from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
......
......@@ -55,7 +55,7 @@ class DotPrinter(Printer):
def __shortened(node):
from pystencils.astnodes import LoopOverCoordinate, KernelFunction, SympyAssignment, Conditional
from pystencils.sympyextensions.astnodes import LoopOverCoordinate, KernelFunction, SympyAssignment, Conditional
if isinstance(node, LoopOverCoordinate):
return "Loop over dim %d" % (node.coordinate_to_loop_over,)
elif isinstance(node, KernelFunction):
......
......@@ -9,7 +9,7 @@
"""
import json
from pystencils.astnodes import NodeOrExpr
from pystencils.sympyextensions.astnodes import NodeOrExpr
from pystencils.backends.cbackend import CustomSympyPrinter, generate_c
try:
......
......@@ -99,7 +99,7 @@ def get_cacheline_size(instruction_set):
return None
import pystencils as ps
from pystencils.astnodes import SympyAssignment
from pystencils.sympyextensions.astnodes import SympyAssignment
import numpy as np
from pystencils.cpu.vectorization import CachelineSize
......
......@@ -5,8 +5,7 @@ from types import MappingProxyType
from typing import Union, Tuple, List, Dict, Callable, Any, DefaultDict, Iterable
from pystencils import Target, Backend, Field
from pystencils.typing.typed_sympy import BasicType
from pystencils.typing.utilities import collate_types
from ..sympyextensions.typed_sympy import BasicType
import numpy as np
......
from pystencils.cpu.cpujit import make_python_function
from pystencils.cpu.kernelcreation import add_openmp, create_indexed_kernel, create_kernel, add_pragmas
from .cpujit import make_python_function
from .kernelcreation import add_openmp, create_indexed_kernel, create_kernel, add_pragmas
__all__ = ['create_kernel', 'create_indexed_kernel', 'add_openmp', 'add_pragmas', 'make_python_function']
......@@ -61,7 +61,7 @@ import warnings
import numpy as np
from pystencils import FieldType
from pystencils.astnodes import LoopOverCoordinate
from pystencils.sympyextensions.astnodes import LoopOverCoordinate
from pystencils.backends.cbackend import generate_c, get_headers, CFunction
from pystencils.cpu.msvc_detection import get_environment
from pystencils.include import get_pystencils_include_path
......@@ -619,7 +619,13 @@ def compile_and_load(ast, custom_backend=None):
compiler_config = get_compiler_config()
function_prefix = '__declspec(dllexport)' if compiler_config['os'].lower() == 'windows' else ''
code = ExtensionModuleCode(custom_backend=custom_backend)
from ..nbackend.ast import PsKernelFunction
if isinstance(ast, PsKernelFunction):
from ..nbackend.jit.cpu_extension_module import PsKernelExtensioNModule
code = PsKernelExtensioNModule()
else:
code = ExtensionModuleCode(custom_backend=custom_backend)
code.add_function(ast, ast.function_name)
code.create_code_string(compiler_config['restrict_qualifier'], function_prefix)
......