Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision

Target

Select target project
  • anirudh.jonnalagadda/pystencils
  • hyteg/pystencils
  • jbadwaik/pystencils
  • jngrad/pystencils
  • itischler/pystencils
  • ob28imeq/pystencils
  • hoenig/pystencils
  • Bindgen/pystencils
  • hammer/pystencils
  • da15siwa/pystencils
  • holzer/pystencils
  • alexander.reinauer/pystencils
  • ec93ujoh/pystencils
  • Harke/pystencils
  • seitz/pystencils
  • pycodegen/pystencils
16 results
Select Git revision
Show changes
Showing
with 1707 additions and 718 deletions
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -5,10 +5,12 @@ API Reference
:maxdepth: 3
kernel_compile_and_call.rst
enums.rst
simplifications.rst
datahandling.rst
configuration.rst
field.rst
stencil.rst
finite_differences.rst
plot.rst
ast.rst
************
Enumerations
************
.. automodule:: pystencils.enums
:members:
......@@ -8,9 +8,14 @@ Creating kernels
.. autofunction:: pystencils.create_kernel
.. autofunction:: pystencils.create_indexed_kernel
.. autoclass:: pystencils.CreateKernelConfig
:members:
.. autofunction:: pystencils.create_staggered_kernel
.. autofunction:: pystencils.kernelcreation.create_domain_kernel
.. autofunction:: pystencils.kernelcreation.create_indexed_kernel
.. autofunction:: pystencils.kernelcreation.create_staggered_kernel
Code printing
......@@ -22,11 +27,11 @@ Code printing
GPU Indexing
-------------
.. autoclass:: pystencils.gpucuda.AbstractIndexing
.. autoclass:: pystencils.gpu.AbstractIndexing
:members:
.. autoclass:: pystencils.gpucuda.BlockIndexing
.. autoclass:: pystencils.gpu.BlockIndexing
:members:
.. autoclass:: pystencils.gpucuda.LineIndexing
.. autoclass:: pystencils.gpu.LineIndexing
:members:
......@@ -2,7 +2,7 @@
Plotting and Animation
**********************
.. automodule:: pystencils.plot2d
.. automodule:: pystencils.plot
:members:
......@@ -10,13 +10,27 @@ AssignmentCollection
:members:
SimplificationStrategy
======================
.. autoclass:: pystencils.simp.SimplificationStrategy
:members:
Simplifications
===============
.. automodule:: pystencils.simp
:members:
.. automodule:: pystencils.simp.simplifications
:members:
Subexpression insertion
=======================
The subexpression insertions have the goal to insert subexpressions which will not reduce the number of FLOPs.
For example a constant value kept as subexpression will lead to a new variable in the code which will occupy
a register slot. On the other side a single variable could just be inserted in all assignments.
.. automodule:: pystencils.simp.subexpression_insertion
:members:
......
*******
Stencil
*******
.. automodule:: pystencils.stencil
:members:
......@@ -15,6 +15,7 @@ It is a good idea to download them and run them directly to be able to play arou
/notebooks/05_tutorial_phasefield_spinodal_decomposition.ipynb
/notebooks/06_tutorial_phasefield_dentritic_growth.ipynb
/notebooks/demo_assignment_collection.ipynb
/notebooks/demo_plotting_and_animation.ipynb
/notebooks/demo_derivatives.ipynb
/notebooks/demo_benchmark.ipynb
/notebooks/demo_wave_equation.ipynb
import subprocess
def version_number_from_git(tag_prefix='release/', sha_length=10, version_format="{version}.dev{commits}+{sha}"):
def get_released_versions():
tags = sorted(subprocess.getoutput('git tag').split('\n'))
versions = [t[len(tag_prefix):] for t in tags if t.startswith(tag_prefix)]
return versions
def tag_from_version(v):
return tag_prefix + v
def increment_version(v):
parsed_version = [int(i) for i in v.split('.')]
parsed_version[-1] += 1
return '.'.join(str(i) for i in parsed_version)
latest_release = get_released_versions()[-1]
commits_since_tag = subprocess.getoutput('git rev-list {}..HEAD --count'.format(tag_from_version(latest_release)))
sha = subprocess.getoutput('git rev-parse HEAD')[:sha_length]
is_dirty = len(subprocess.getoutput("git status --untracked-files=no -s")) > 0
if int(commits_since_tag) == 0:
version_string = latest_release
else:
next_version = increment_version(latest_release)
version_string = version_format.format(version=next_version, commits=commits_since_tag, sha=sha)
if is_dirty:
version_string += ".dirty"
return version_string
[project]
name = "pystencils"
description = "Speeding up stencil computations on CPUs and GPUs"
dynamic = ["version"]
readme = "README.md"
authors = [
{ name = "Martin Bauer" },
{ name = "Jan Hönig " },
{ name = "Markus Holzer" },
{ name = "Frederik Hennig" },
{ email = "cs10-codegen@fau.de" },
]
license = { file = "COPYING.txt" }
requires-python = ">=3.10"
dependencies = ["sympy>=1.9,<=1.12.1", "numpy>=1.8.0", "appdirs", "joblib", "pyyaml", "fasteners"]
classifiers = [
"Development Status :: 4 - Beta",
"Framework :: Jupyter",
"Topic :: Software Development :: Code Generators",
"Topic :: Scientific/Engineering :: Physics",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
]
[project.urls]
"Bug Tracker" = "https://i10git.cs.fau.de/pycodegen/pystencils/-/issues"
"Documentation" = "https://pycodegen.pages.i10git.cs.fau.de/pystencils/"
"Source Code" = "https://i10git.cs.fau.de/pycodegen/pystencils"
[project.optional-dependencies]
gpu = ['cupy']
alltrafos = ['islpy', 'py-cpuinfo']
bench_db = ['blitzdb', 'pymongo', 'pandas']
interactive = [
'matplotlib',
'ipy_table',
'imageio',
'jupyter',
'pyevtk',
'rich',
'graphviz',
]
use_cython = [
'Cython'
]
doc = [
'sphinx',
'sphinx_rtd_theme',
'nbsphinx',
'sphinxcontrib-bibtex',
'sphinx_autodoc_typehints',
'pandoc',
]
tests = [
'pytest',
'pytest-cov',
'pytest-html',
'ansi2html',
'pytest-xdist',
'flake8',
'nbformat',
'nbconvert',
'ipython',
'matplotlib',
'py-cpuinfo',
'randomgen>=1.18',
]
[build-system]
requires = [
"setuptools>=61",
"versioneer[toml]>=0.29",
# 'Cython'
]
build-backend = "setuptools.build_meta"
[tool.setuptools.package-data]
pystencils = [
"include/*.h",
"boundaries/createindexlistcython.pyx"
]
[tool.setuptools.packages.find]
where = ["src"]
include = ["pystencils", "pystencils.*"]
namespaces = false
[tool.versioneer]
# See the docstring in versioneer.py for instructions. Note that you must
# re-run 'versioneer.py setup' after changing this section, and commit the
# resulting files.
VCS = "git"
style = "pep440"
versionfile_source = "src/pystencils/_version.py"
versionfile_build = "pystencils/_version.py"
tag_prefix = "release/"
parentdir_prefix = "pystencils-"
# noinspection SpellCheckingInspection
def get_vector_instruction_set(data_type='double', instruction_set='avx'):
comparisons = {
'==': '_CMP_EQ_UQ',
'!=': '_CMP_NEQ_UQ',
'>=': '_CMP_GE_OQ',
'<=': '_CMP_LE_OQ',
'<': '_CMP_NGE_UQ',
'>': '_CMP_NLE_UQ',
}
base_names = {
'+': 'add[0, 1]',
'-': 'sub[0, 1]',
'*': 'mul[0, 1]',
'/': 'div[0, 1]',
'&': 'and[0, 1]',
'|': 'or[0, 1]',
'blendv': 'blendv[0, 1, 2]',
'sqrt': 'sqrt[0]',
'makeVec': 'set[]',
'makeZero': 'setzero[]',
'loadU': 'loadu[0]',
'loadA': 'load[0]',
'storeU': 'storeu[0,1]',
'storeA': 'store[0,1]',
'stream': 'stream[0,1]',
}
for comparison_op, constant in comparisons.items():
base_names[comparison_op] = 'cmp[0, 1, %s]' % (constant,)
headers = {
'avx512': ['<immintrin.h>'],
'avx': ['<immintrin.h>'],
'sse': ['<immintrin.h>', '<xmmintrin.h>', '<emmintrin.h>', '<pmmintrin.h>',
'<tmmintrin.h>', '<smmintrin.h>', '<nmmintrin.h>']
}
suffix = {
'double': 'pd',
'float': 'ps',
}
prefix = {
'sse': '_mm',
'avx': '_mm256',
'avx512': '_mm512',
}
width = {
("double", "sse"): 2,
("float", "sse"): 4,
("double", "avx"): 4,
("float", "avx"): 8,
("double", "avx512"): 8,
("float", "avx512"): 16,
}
result = {
'width': width[(data_type, instruction_set)],
}
pre = prefix[instruction_set]
suf = suffix[data_type]
for intrinsic_id, function_shortcut in base_names.items():
function_shortcut = function_shortcut.strip()
name = function_shortcut[:function_shortcut.index('[')]
if intrinsic_id == 'makeVec':
arg_string = "({})".format(",".join(["{0}"] * result['width']))
else:
args = function_shortcut[function_shortcut.index('[') + 1: -1]
arg_string = "("
for arg in args.split(","):
arg = arg.strip()
if not arg:
continue
if arg in ('0', '1', '2', '3', '4', '5'):
arg_string += "{" + arg + "},"
else:
arg_string += arg + ","
arg_string = arg_string[:-1] + ")"
mask_suffix = '_mask' if instruction_set == 'avx512' and intrinsic_id in comparisons.keys() else ''
result[intrinsic_id] = pre + "_" + name + "_" + suf + mask_suffix + arg_string
result['dataTypePrefix'] = {
'double': "_" + pre + 'd',
'float': "_" + pre,
}
result['rsqrt'] = None
bit_width = result['width'] * (64 if data_type == 'double' else 32)
result['double'] = "__m%dd" % (bit_width,)
result['float'] = "__m%d" % (bit_width,)
result['int'] = "__m%di" % (bit_width,)
result['bool'] = "__m%dd" % (bit_width,)
result['headers'] = headers[instruction_set]
result['any'] = "%s_movemask_%s({0}) > 0" % (pre, suf)
result['all'] = "%s_movemask_%s({0}) == 0xF" % (pre, suf)
if instruction_set == 'avx512':
size = 8 if data_type == 'double' else 16
result['&'] = '_kand_mask%d({0}, {1})' % (size,)
result['|'] = '_kor_mask%d({0}, {1})' % (size,)
result['any'] = '!_ktestz_mask%d_u8({0}, {0})' % (size, )
result['all'] = '_kortestc_mask%d_u8({0}, {0})' % (size, )
result['blendv'] = '%s_mask_blend_%s({2}, {0}, {1})' % (pre, suf)
result['rsqrt'] = "_mm512_rsqrt14_%s({0})" % (suf,)
result['bool'] = "__mmask%d" % (size,)
if instruction_set == 'avx' and data_type == 'float':
result['rsqrt'] = "_mm256_rsqrt_ps({0})"
return result
def get_supported_instruction_sets():
"""List of supported instruction sets on current hardware, or None if query failed."""
try:
from cpuinfo import get_cpu_info
except ImportError:
return None
result = []
required_sse_flags = {'sse', 'sse2', 'ssse3', 'sse4_1', 'sse4_2'}
required_avx_flags = {'avx'}
required_avx512_flags = {'avx512f'}
flags = set(get_cpu_info()['flags'])
if flags.issuperset(required_sse_flags):
result.append("sse")
if flags.issuperset(required_avx_flags):
result.append("avx")
if flags.issuperset(required_avx512_flags):
result.append("avx512")
return result