add-dependency-jinja2 to master · pycodegen / pystencils

Some changes are not shown.

For a faster browsing experience, only 52 of 273 files are shown. Download one of the files below to see all changes.

.flake8

+4 −4

Original line number	Diff line number	Diff line
		[flake8]
		max-line-length=120
		exclude=pystencils/jupyter.py,
		pystencils/plot.py
		pystencils/session.py
		ignore = W293 W503 W291
		exclude=src/pystencils/jupyter.py,
		src/pystencils/plot.py
		src/pystencils/session.py
		ignore = W293 W503 W291 C901 E741

.gitattributes

0 → 100644

+1 −0

Original line number	Diff line number	Diff line
		src/pystencils/_version.py export-subst

.gitignore

+20 −3

Original line number	Diff line number	Diff line
		__pycache__
		.ipynb_checkpoints
		.coverage
		.coverage*
		*.pyc
		*.vti
		/build
		/dist
		/*.egg-info
		*.egg-info
		.cache
		_build
		/html_doc
		/.idea
		.vscode
		.cache
		_local_tmp
		RELEASE-VERSION
		test-report
		src/pystencils/boundaries/createindexlistcython.c
		src/pystencils/boundaries/createindexlistcython.*.so
		tests/tmp
		tests/var
		tests/kerncraft_inputs/.2d-5pt.c_kerncraft/
		tests/kerncraft_inputs/.3d-7pt.c_kerncraft/
		report.xml
		coverage_report/


		# macOS
		**/.DS_Store
		*.uuid

.gitlab-ci.yml

+278 −50

Original line number	Diff line number	Diff line
		stages:
		- pretest
		- test
		- nightly
		- docs
		- deploy


		# -------------------------- Templates ------------------------------------------------------------------------------------

		# Base configuration for jobs meant to run at every commit
		.every-commit:
		rules:
		- if: $CI_PIPELINE_SOURCE != "schedule"

		# Configuration for jobs meant to run on each commit to pycodegen/pystencils/master
		.every-commit-master:
		rules:
		- if: '$CI_PIPELINE_SOURCE != "schedule" && $CI_PROJECT_PATH == "pycodegen/pystencils" && $CI_COMMIT_BRANCH == "master"'

		# Base configuration for jobs meant to run at a schedule
		.scheduled:
		rules:
		- if: $CI_PIPELINE_SOURCE == "schedule"

		# -------------------------- Tests ------------------------------------------------------------------------------------

		# Normal test - runs on every commit all but "long run" tests
		tests-and-coverage:
		stage: test
		except:
		variables:
		- $ENABLE_NIGHTLY_BUILDS
		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full
		stage: pretest
		extends: .every-commit
		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full:cupy12.3
		before_script:
		- pip install -e .
		script:
		- env
		- pip list
		- export NUM_CORES=$(nproc --all)
		- mkdir -p ~/.config/matplotlib
		- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
		- mkdir public
		- py.test -v -n $NUM_CORES --cov-report html --cov-report term --cov=. -m "not longrun"
		- pytest -v -n $NUM_CORES --cov-report html --cov-report xml --cov-report term --cov=. -m "not longrun" --html test-report/index.html --junitxml=report.xml
		- python -m coverage xml
		tags:
		- docker
		- cuda
		- cuda11
		- AVX
		coverage: /Total coverage:\s\d+.\d+\%/
		artifacts:
		when: always
		paths:
		- coverage_report
		- test-report
		reports:
		coverage_report:
		coverage_format: cobertura
		path: coverage.xml
		junit: report.xml

		# Nightly test - runs "long run" jobs only
		test-longrun:
		# Normal test with longruns
		tests-and-coverage-with-longrun:
		stage: test
		only:
		variables:
		- $ENABLE_NIGHTLY_BUILDS
		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full
		when: manual
		allow_failure: true
		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full:cupy12.3
		before_script:
		- pip install sympy --upgrade
		- pip install -e .
		script:
		- env
		- pip list
		- export NUM_CORES=$(nproc --all)
		- mkdir -p ~/.config/matplotlib
		- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
		- py.test -v -n $NUM_CORES --cov-report html --cov-report term --cov=.
		- mkdir public
		- py.test -v -n $NUM_CORES
		tags:
		- docker
		- cuda11
		- AVX

		# pipeline with latest python version
		latest-python:
		stage: test
		extends: .every-commit
		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/latest_python
		before_script:
		- pip install -e .
		script:
		- env
		- pip list
		- pip install -e .
		- export NUM_CORES=$(nproc --all)
		- mkdir -p ~/.config/matplotlib
		- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
		- mkdir public
		- py.test -v -n $NUM_CORES -m "not longrun" --junitxml=report.xml
		tags:
		- docker
		- cuda
		- AVX
		artifacts:
		paths:
		- coverage_report
		when: always
		reports:
		junit: report.xml


		# Minimal tests in windows environment
		minimal-windows:
		#minimal-windows:
		# stage: test
		# tags:
		# - win
		# script:
		# - export NUM_CORES=$(nproc --all)
		# - source /cygdrive/c/Users/build/Miniconda3/Scripts/activate
		# - source activate pystencils
		# - pip install joblib
		# - pip list
		# - python -c "import numpy"
		# - py.test -v -m "not (notebook or longrun)"

		ubuntu:
		stage: test
		except:
		variables:
		- $ENABLE_NIGHTLY_BUILDS
		tags:
		- win
		extends: .every-commit
		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/ubuntu
		before_script:
		- ln -s /usr/include/locale.h /usr/include/xlocale.h
		- pip3 install -e .
		script:
		- source /cygdrive/c/Users/build/Miniconda3/Scripts/activate
		- source activate pystencils_dev
		- export NUM_CORES=$(nproc --all)
		- mkdir -p ~/.config/matplotlib
		- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
		- sed -i 's/--doctest-modules //g' pytest.ini
		- env
		- conda env list
		- python -c "import numpy"
		- python setup.py quicktest
		- pip list
		- pytest -v -n $NUM_CORES -m "not longrun" --junitxml=report.xml
		tags:
		- docker
		- cuda11
		- AVX
		artifacts:
		when: always
		reports:
		junit: report.xml

		minimal-ubuntu:
		.multiarch_template:
		stage: test
		except:
		variables:
		- $ENABLE_NIGHTLY_BUILDS
		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/minimal_ubuntu
		extends: .every-commit
		allow_failure: true
		before_script: &multiarch_before_script
		# - pip3 install -v .
		- export PYTHONPATH=src
		- python3 -c "import pystencils as ps; ps.cpu.cpujit.read_config()"
		- sed -i '/^fail_under.*/d' pytest.ini
		script:
		- python3 setup.py quicktest
		- export NUM_CORES=$(nproc --all)
		- mkdir -p ~/.config/matplotlib
		- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
		- sed -i 's/--doctest-modules //g' pytest.ini
		- env
		- pip3 list
		- python3 -m pytest -v -n $NUM_CORES --reruns 2 --cov-report html --cov-report xml --cov=. --junitxml=report.xml tests/test_vec.py tests/test_random.py tests/test_half_precision.py
		- python3 -m coverage xml
		tags:
		- docker
		- multiarch
		artifacts:
		when: always
		paths:
		- coverage_report
		reports:
		coverage_report:
		coverage_format: cobertura
		path: coverage.xml
		junit: report.xml

		arm64v8:
		extends: .multiarch_template
		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/arm64
		variables:
		QEMU_CPU: "cortex-a76"
		before_script:
		- *multiarch_before_script

		ppc64le:
		extends: .multiarch_template
		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/ppc64le
		before_script:
		- *multiarch_before_script

		arm64v9:
		# SVE support is still unreliable in GCC 13 (incorrect code for fixed-width vectors, internal compiler errors).
		# For half precision Clang is necessary
		extends: .multiarch_template
		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/arm64
		variables:
		# disable pointer authentication to speed up emulation by 3x
		QEMU_CPU: "max,pauth-impdef=on"
		before_script:
		- *multiarch_before_script
		- sed -i s/g\+\+/clang++/g ~/.config/pystencils/config.json

		riscv64:
		# RISC-V vector extension support is incomplete in GCC 13.
		extends: .multiarch_template
		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/riscv64
		variables:
		# explicitly set SIMD as detection requires QEMU >= 8.1
		PYSTENCILS_SIMD: "rvv"
		QEMU_CPU: "rv64,v=true,zicboz=true"
		before_script:
		- *multiarch_before_script
		- sed -i 's/march=native/march=rv64imfdvzicboz/g' ~/.config/pystencils/config.json
		- sed -i s/g\+\+/clang++/g ~/.config/pystencils/config.json

		minimal-conda:
		stage: pretest
		extends: .every-commit
		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/minimal_conda
		before_script:
		- pip install -e .
		script:
		- python quicktest.py
		tags:
		- docker
		- cuda


		minimal-sympy-master:
		stage: test
		except:
		variables:
		- $ENABLE_NIGHTLY_BUILDS
		extends: .every-commit
		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/minimal_conda
		before_script:
		- pip install -e .
		script:
		- python setup.py quicktest
		- python -m pip install --upgrade git+https://github.com/sympy/sympy.git
		- python quicktest.py
		allow_failure: true
		tags:
		- docker
		- cuda


		pycodegen-integration:
		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full
		stage: test
		when: manual
		allow_failure: true
		script:
		- git clone https://gitlab-ci-token:${CI_JOB_TOKEN}@i10git.cs.fau.de/pycodegen/pycodegen.git
		- cd pycodegen
		- git submodule sync --recursive
		- git submodule update --init --recursive
		- git submodule foreach git fetch origin # compare the latest master version!
		- git submodule foreach git reset --hard origin/master
		- cd pystencils
		- git remote add test $CI_REPOSITORY_URL
		- git fetch test
		- git reset --hard $CI_COMMIT_SHA
		- cd ..
		- pip install -e pystencils/
		- pip install -e lbmpy/
		- cmake --version
		- ./install_walberla.sh
		- export NUM_CORES=$(nproc --all)
		- mkdir -p ~/.config/matplotlib
		- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
		- cd pystencils
		- py.test -v -n $NUM_CORES --junitxml=report.xml .
		- cd ../lbmpy
		- py.test -v -n $NUM_CORES --junitxml=report.xml .
		- cd ../walberla/build/
		- make -j $NUM_CORES CodegenJacobiCPU CodegenJacobiGPU CodegenPoissonCPU CodegenPoissonGPU MicroBenchmarkGpuLbm LbCodeGenerationExample
		- make -j $NUM_CORES multiphaseCPU multiphaseGPU FluctuatingMRT FlowAroundSphereCodeGen FieldLayoutAndVectorizationTest GeneratedOutflowBC
		- cd apps/benchmarks/UniformGridGPU
		- make -j $NUM_CORES
		- cd ../UniformGridCPU
		- make -j $NUM_CORES
		tags:
		- docker
		- cuda11
		- AVX
		artifacts:
		when: always
		reports:
		junit: pycodegen/*/report.xml


		# -------------------- Scheduled Tasks --------------------------------------------------------------------------


		# Nightly test against the latest (pre-release) version of SymPy published on PyPI
		nightly-sympy:
		stage: nightly
		needs: []
		extends: .scheduled
		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/latest_python
		before_script:
		- pip install -e .
		- pip install --upgrade --pre sympy
		script:
		- env
		- pip list
		- export NUM_CORES=$(nproc --all)
		- mkdir -p ~/.config/matplotlib
		- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
		- mkdir public
		- pytest -v -n $NUM_CORES -m "not longrun" --junitxml=report.xml
		tags:
		- docker
		- AVX
		- cuda
		artifacts:
		when: always
		reports:
		junit: report.xml

		# -------------------- Linter & Documentation --------------------------------------------------------------------------


		flake8-lint:
		stage: test
		except:
		variables:
		- $ENABLE_NIGHTLY_BUILDS
		stage: pretest
		extends: .every-commit
		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full
		script:
		- flake8 pystencils
		- flake8 src/pystencils
		tags:
		- docker
		- cuda


		build-documentation:
		stage: test
		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full
		stage: docs
		extends: .every-commit
		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/documentation
		needs: []
		before_script:
		- pip install -e .
		script:
		- export PYTHONPATH=`pwd`
		- mkdir html_doc
		- sphinx-build -b html doc html_doc
		- sphinx-build -W -b html doc html_doc
		tags:
		- docker
		- cuda
		artifacts:
		paths:
		- html_doc
		@@ -119,7 +347,9 @@ build-documentation:

		pages:
		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full
		extends: .every-commit-master
		stage: deploy
		needs: ["tests-and-coverage", "build-documentation"]
		script:
		- ls -l
		- mv coverage_report html_doc
		@@ -129,5 +359,3 @@ pages:
		- public
		tags:
		- docker
		only:
		- master@pycodegen/pystencils

.isort.cfg

+1 −0

Original line number	Diff line number	Diff line
		@@ -2,3 +2,4 @@
		line_length=100
		balanced_wrapping=True
		multi_line_output=4
		known_third_party=sympy

AUTHORS.txt

+6 −5

Original line number	Diff line number	Diff line
		@@ -3,12 +3,13 @@ Contributors:
		-------------

		- Martin Bauer <martin.bauer@fau.de>
		- Markus Holzer <markus.holzer@fau.de>
		- Stephan Seitz <stephan.seitz@fau.de>
		- Michael Kuron <mkuron@icp.uni-stuttgart.de>
		- Jan Hönig <jan.hoenig@fau.de>
		- Nils Kohl <nils.kohl@fau.de>
		- Julian Hammer <julian.hammer@fau.de>
		- Christian Godenschwager <christian.godenschwager@fau.de>
		- Markus Holzer <markus.holzer@fau.de>
		- Michael Kuron <mkuron@icp.uni-stuttgart.de>
		- Nils Kohl <nils.kohl@fau.de>
		- Frederik Hennig <frederik.hennig@fau.de>
		- Dominik Ernst <dominik.ernst@fau.de>
		- João Victor Tozatti Risso <joaovictortr@protonmail.com>
		- Christian Godenschwager <christian.godenschwager@fau.de>
		- Dominik Thoennes <dominik.thoennes@fau.de>

CHANGELOG.md

0 → 100644

+7 −0

Original line number	Diff line number	Diff line
		# Change Log

		## Unreleased

		### Removed
		* LLVM backend because it was not used much and not good integrated in pystencils.
		* OpenCL backend because it was not used much and not good integrated in pystencils.

CONTRIBUTING.md

0 → 100644

+87 −0

Original line number	Diff line number	Diff line
		# Contributing

		Contributions to pystencils are always welcome, and they are greatly appreciated!
		A list of open problems can be found [here]( https://i10git.cs.fau.de/pycodegen/pystencils/-/issues).
		Of course, it is also always appreciated to bring own ideas and problems to the community!


		Please submit all contributions to the official [GitLab repository](https://i10git.cs.fau.de/pycodegen/pystencils) in the form of a Merge Request. Please do not submit git diffs or files containing the changes.
		There also exists a GitHub repository, which is only a mirror to the GitLab repository. Contributions to the GitHub repository are not considered.

		`pystencils` is an open-source python package under the license of AGPL3. Thus we consider the act of contributing to the code by submitting a Merge Request as the "Sign off" or agreement to the AGPL3 license.

		You can contribute in many different ways:

		## Types of Contributions

		### Report Bugs

		Report bugs at [https://i10git.cs.fau.de/pycodegen/pystencils/-/issues](https://i10git.cs.fau.de/pycodegen/pystencils/-/issues).

		For pystencils, it is often necessary to provide the python and [SymPy](https://www.sympy.org/en/index.html) versions used and hardware information like the
		processor architecture and the compiler version used to compile the generated kernels.

		### Fix Issues

		Look through the GitLab issues. Different tags are indicating the status of the issues.
		The "bug" tag indicates problems with pystencils, while the "feature" tag shows ideas that should be added in the future.

		### Write Documentation

		The documentation of pystencils can be found [here](https://pycodegen.pages.i10git.cs.fau.de/pystencils/). Jupyter notebooks are used to provide an
		interactive start to pystencils. It is always appreciated if new document notebooks are provided
		since this helps others a lot.

		## Get Started!

		Ready to contribute? Here is how to set up `pystencils` for local development.

		1. Fork the `pystencils` repo on GitLab.
		2. Clone your fork locally:
		```bash
		$ git clone https://i10git.cs.fau.de/your-name/pystencils
		```
		3. Install your local copy into a virtualenv. It is also recommended to use anaconda or miniconda to manage the python environments.
		```bash
		$ mkvirtualenv pystencils
		$ cd pystencils/
		$ pip install -e .
		```
		4. Create a branch for local development:
		```bash
		$ git checkout -b name-of-your-bugfix-or-feature
		```
		Now you can make your changes locally.

		5. When you're done making changes, check that your changes pass flake8 and the
		tests
		```bash
		$ flake8 pystencils
		$ py.test -v -n $NUM_CORES -m "not longrun" .

		```

		To get all packages needed for development, a requirements list can be found [here](https://i10git.cs.fau.de/pycodegen/pycodegen/-/blob/master/conda_environment_dev.yml). This includes flake8 and pytest.

		6. Commit your changes and push your branch to GitHub::
		```bash
		$ git add .
		$ git commit -m "Your detailed description of your changes."
		$ git push origin name-of-your-bugfix-or-feature
		```
		7. Submit a Merge Request on GitLab.

		## Merge Request Guidelines

		Before you submit a Merge Request, check that it meets these guidelines:

		1. All functionality that is implemented through this Merge Request should be covered by unit tests. These are implemented in `pystencil_tests`
		2. If the Merge Request adds functionality, the docs should be updated. Put your new functionality into a function with a docstring.
		3. If you have a maintainer status for `pystencils`, you can merge Merge Requests to the master branch. However, every Merge Request needs to be reviewed by another developer. Thus it is not allowed to merge a Merge Request, which is submitted by oneself.

		## Tips

		To run a subset of tests:
		```bash
		$ py.test my_test.py
		```
		No newline at end of file

MANIFEST.in

+3 −3

Original line number	Diff line number	Diff line
		include README.md
		include COPYING.txt
		include RELEASE-VERSION
		include AUTHORS.txt
		include CONTRIBUTING.md
		include CHANGELOG.md

README.md

+29 −12

Original line number	Diff line number	Diff line
		@@ -2,15 +2,16 @@ pystencils
		==========

		[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/mabau/pystencils/master?filepath=doc%2Fnotebooks)
		[![Docs](https://img.shields.io/badge/read-the_docs-brightgreen.svg)](http://pycodegen.pages.walberla.net/pystencils)
		[![Docs](https://img.shields.io/badge/read-the_docs-brightgreen.svg)](https://pycodegen.pages.i10git.cs.fau.de/pystencils)
		[![pypi-package](https://badge.fury.io/py/pystencils.svg)](https://badge.fury.io/py/pystencils)
		[![pipeline status](https://i10git.cs.fau.de/pycodegen/pystencils/badges/master/pipeline.svg)](https://i10git.cs.fau.de/pycodegen/pystencils/commits/master)
		[![coverage report](https://i10git.cs.fau.de/pycodegen/pystencils/badges/master/coverage.svg)](http://pycodegen.pages.walberla.net/pystencils/coverage_report)
		[![coverage report](https://i10git.cs.fau.de/pycodegen/pystencils/badges/master/coverage.svg)](http://pycodegen.pages.i10git.cs.fau.de/pystencils/coverage_report)

		Run blazingly fast stencil codes on numpy arrays.

		pystencils uses sympy to define stencil operations, that can be executed on numpy arrays.
		Exploiting the stencil structure makes pystencils run faster than normal numpy code and even as Cython and numba,
		[as demonstrated in this notebook](http://pycodegen.pages.walberla.net/pystencils/notebooks/demo_benchmark.html).
		[as demonstrated in this notebook](https://pycodegen.pages.i10git.cs.fau.de/pystencils/notebooks/demo_benchmark.html).


		Here is a code snippet that computes the average of neighboring cells:
		@@ -32,15 +33,15 @@ kernel(f=f_arr, g=g_arr)
		It comes with automatic finite difference discretization for PDEs:

		```python
		import pystencils as ps
		import sympy as sp

		c, v = ps.fields("c, v(2): [2D]")
		adv_diff_pde = ps.fd.transient(c) - ps.fd.diffusion(c, sp.symbols("D")) + ps.fd.advection(c, v)
		discretize = ps.fd.Discretization2ndOrder(dx=1, dt=0.01)
		discretization = discretize(adv_diff_pde)
		```

		Look at the [documentation](http://pycodegen.pages.walberla.net/pystencils) to learn more.


		Installation
		------------

		@@ -51,7 +52,7 @@ pip install pystencils[interactive]
		Without `[interactive]` you get a minimal version with very little dependencies.

		All options:
		- `gpu`: use this if an Nvidia GPU is available and CUDA is installed
		- `gpu`: use this if an NVIDIA or AMD GPU is available and CUDA or ROCm is installed
		- `alltrafos`: pulls in additional dependencies for loop simplification e.g. libisl
		- `bench_db`: functionality to store benchmark result in object databases
		- `interactive`: installs dependencies to work in Jupyter including image I/O, plotting etc.
		@@ -62,9 +63,25 @@ Options can be combined e.g.
		pip install pystencils[interactive, gpu, doc]
		```

		pystencils is also fully compatible with Windows machines. If working with visual studio and cupy makes sure to run example files first to ensure that cupy can find the compiler's executable.

		Documentation
		-------------

		Read the docs [here](http://pycodegen.pages.walberla.net/pystencils) and
		check out the Jupyter notebooks in `doc/notebooks`.
		Read the docs [here](https://pycodegen.pages.i10git.cs.fau.de/pystencils) and
		check out the Jupyter notebooks in `doc/notebooks`. The Changelog of pystencils can be found [here](https://i10git.cs.fau.de/pycodegen/pystencils/-/blob/master/CHANGELOG.md).

		Authors
		-------

		Many thanks go to the [contributors](https://i10git.cs.fau.de/pycodegen/pystencils/-/blob/master/AUTHORS.txt) of pystencils.

		### Please cite us

		If you use pystencils in a publication, please cite the following articles:

		Overview:
		- M. Bauer et al, Code Generation for Massively Parallel Phase-Field Simulations. Association for Computing Machinery, 2019. https://doi.org/10.1145/3295500.3356186

		Performance Modelling:
		- D. Ernst et al, Analytical performance estimation during code generation on modern GPUs. Journal of Parallel and Distributed Computing, 2023. https://doi.org/10.1016/j.jpdc.2022.11.003

binder/environment.yml

+3 −3

Original line number	Diff line number	Diff line
		@@ -7,14 +7,14 @@
		# conda env create -f conda_environment_user.yml
		# . activate pystencils
		#
		# If you have CUDA installed and want to use your GPU, uncomment the last line to install pycuda
		# If you have CUDA or ROCm installed and want to use your GPU, uncomment the last line to install cupy
		#
		# ----------------------------------------------------------------------------------------------------------------------

		name: pystencils
		dependencies:
		# Basic dependencies:
		- python >= 3.6
		- python >= 3.8
		- numpy
		- sympy >= 1.1
		- appdirs # to find default cache directory on each platform
		@@ -32,4 +32,4 @@ dependencies:
		- ipy_table # HTML tables for jupyter notebooks
		- pyevtk # VTK output for serial simulations
		- blitzdb # file-based No-SQL database to store simulation results
		#- pycuda # add this if you have CUDA installed
		#- cupy # add this if you have CUDA or ROCm installed

conftest.py

+56 −25

Original line number	Diff line number	Diff line
		import os
		import pytest
		import tempfile
		import runpy
		import sys
		import tempfile
		import warnings

		import nbformat
		import pytest
		from nbconvert import PythonExporter

		from pystencils.boundaries.createindexlist import * # NOQA
		# Trigger config file reading / creation once - to avoid race conditions when multiple instances are creating it
		# at the same time
		from pystencils.cpu import cpujit
		@@ -14,12 +20,17 @@ try:
		pyximport.install(language_level=3)
		except ImportError:
		pass
		from pystencils.boundaries.createindexlistcython import * # NOQA


		SCRIPT_FOLDER = os.path.dirname(os.path.realpath(__file__))
		sys.path.insert(0, os.path.abspath('pystencils'))

		# the Ubuntu pipeline uses an older version of pytest which uses deprecated functionality.
		# This leads to many warinings in the test and coverage pipeline.
		pytest_numeric_version = [int(x, 10) for x in pytest.__version__.split('.')]
		pytest_numeric_version.reverse()
		pytest_version = sum(x * (100 ** i) for i, x in enumerate(pytest_numeric_version))


		def add_path_to_ignore(path):
		if not os.path.exists(path):
		@@ -28,34 +39,49 @@ def add_path_to_ignore(path):
		collect_ignore += [os.path.join(SCRIPT_FOLDER, path, f) for f in os.listdir(os.path.join(SCRIPT_FOLDER, path))]


		collect_ignore = [os.path.join(SCRIPT_FOLDER, "doc", "conf.py")]
		add_path_to_ignore('pystencils_tests/benchmark')
		collect_ignore = [os.path.join(SCRIPT_FOLDER, "doc", "conf.py"),
		os.path.join(SCRIPT_FOLDER, "src", "pystencils", "opencl", "opencl.autoinit")]
		add_path_to_ignore('tests/benchmark')
		add_path_to_ignore('_local_tmp')


		try:
		import pycuda
		import cupy
		except ImportError:
		collect_ignore += [os.path.join(SCRIPT_FOLDER, "pystencils/pystencils_tests/test_cudagpu.py")]
		add_path_to_ignore('pystencils/gpucuda')
		collect_ignore += [os.path.join(SCRIPT_FOLDER, "tests/test_gpu.py")]
		add_path_to_ignore('src/pystencils/gpu')

		try:
		import llvmlite
		import waLBerla
		except ImportError:
		collect_ignore += [os.path.join(SCRIPT_FOLDER, 'pystencils_tests/backends/llvm.py')]
		add_path_to_ignore('pystencils/llvm')
		collect_ignore += [os.path.join(SCRIPT_FOLDER, "tests/test_aligned_array.py"),
		os.path.join(SCRIPT_FOLDER, "tests/test_datahandling_parallel.py"),
		os.path.join(SCRIPT_FOLDER, "doc/notebooks/03_tutorial_datahandling.ipynb"),
		os.path.join(SCRIPT_FOLDER, "src/pystencils/datahandling/parallel_datahandling.py"),
		os.path.join(SCRIPT_FOLDER, "tests/test_small_block_benchmark.ipynb")]

		try:
		import kerncraft
		import blitzdb
		except ImportError:
		collect_ignore += [os.path.join(SCRIPT_FOLDER, "pystencils_tests/test_kerncraft_coupling.py")]
		add_path_to_ignore('pystencils/kerncraft_coupling')
		add_path_to_ignore('src/pystencils/runhelper')
		collect_ignore += [os.path.join(SCRIPT_FOLDER, "tests/test_parameterstudy.py")]
		collect_ignore += [os.path.join(SCRIPT_FOLDER, "tests/test_json_serializer.py")]

		try:
		import blitzdb
		import islpy
		except ImportError:
		add_path_to_ignore('pystencils/runhelper')
		collect_ignore += [os.path.join(SCRIPT_FOLDER, "src/pystencils/integer_set_analysis.py")]

		try:
		import graphviz
		except ImportError:
		collect_ignore += [os.path.join(SCRIPT_FOLDER, "src/pystencils/backends/dot.py")]
		collect_ignore += [os.path.join(SCRIPT_FOLDER, "doc/notebooks/01_tutorial_getting_started.ipynb")]

		try:
		import pyevtk
		except ImportError:
		collect_ignore += [os.path.join(SCRIPT_FOLDER, "src/pystencils/datahandling/vtk.py")]

		collect_ignore += [os.path.join(SCRIPT_FOLDER, 'setup.py')]

		@@ -65,10 +91,6 @@ for root, sub_dirs, files in os.walk('.'):
		collect_ignore.append(f)


		import nbformat
		from nbconvert import PythonExporter


		class IPythonMockup:
		def run_line_magic(self, args, *kwargs):
		pass
		@@ -115,9 +137,15 @@ class IPyNbFile(pytest.File):
		exporter.exclude_markdown = True
		exporter.exclude_input_prompt = True

		notebook_contents = self.fspath.open()
		notebook_contents = self.fspath.open(encoding='utf-8')

		with warnings.catch_warnings():
		warnings.filterwarnings("ignore", "IPython.core.inputsplitter is deprecated")
		notebook = nbformat.read(notebook_contents, 4)
		code, _ = exporter.from_notebook_node(notebook)
		if pytest_version >= 50403:
		yield IPyNbTest.from_parent(name=self.name, parent=self, code=code)
		else:
		yield IPyNbTest(self.name, self, code)

		def teardown(self):
		@@ -127,4 +155,7 @@ class IPyNbFile(pytest.File):
		def pytest_collect_file(path, parent):
		glob_exprs = ["demo.ipynb", "tutorial.ipynb", "test_*.ipynb"]
		if any(path.fnmatch(g) for g in glob_exprs):
		if pytest_version >= 50403:
		return IPyNbFile.from_parent(fspath=path, parent=parent)
		else:
		return IPyNbFile(path, parent)

doc/conf.py

100644 → 100755

+13 −8

Original line number	Diff line number	Diff line
		@@ -4,10 +4,11 @@
		import datetime
		import sphinx_rtd_theme
		import os
		import re
		import sys

		sys.path.insert(0, os.path.abspath('.'))
		from version_from_git import version_number_from_git
		import pystencils

		extensions = [
		'sphinx.ext.autodoc',
		@@ -25,11 +26,14 @@ templates_path = ['_templates']
		source_suffix = '.rst'
		master_doc = 'index'

		copyright = '{}, Martin Bauer'.format(datetime.datetime.now().year)
		author = 'Martin Bauer'
		version = version_number_from_git()
		release = version_number_from_git()
		language = None
		copyright = f'{datetime.datetime.now().year}, Martin Bauer, Markus Holzer, Frederik Hennig'
		author = 'Martin Bauer, Markus Holzer, Frederik Hennig'
		# The short X.Y version (including .devXXXX, rcX, b1 suffixes if present)
		version = re.sub(r'(\d+\.\d+)\.\d+(.*)', r'\1\2', pystencils.__version__)
		version = re.sub(r'(\.dev\d+).*?$', r'\1', version)
		# The full version, including alpha/beta/rc tags.
		release = pystencils.__version__
		language = 'en'
		exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '**.ipynb_checkpoints']
		default_role = 'any'
		pygments_style = 'sphinx'
		@@ -47,13 +51,14 @@ nbsphinx_execute = 'never'
		nbsphinx_codecell_lexer = 'python3'

		# Example configuration for intersphinx: refer to the Python standard library.
		intersphinx_mapping = {'python': ('https://docs.python.org/3.6', None),
		intersphinx_mapping = {'python': ('https://docs.python.org/3.8', None),
		'numpy': ('https://docs.scipy.org/doc/numpy/', None),
		'matplotlib': ('https://matplotlib.org/', None),
		'sympy': ('https://docs.sympy.org/latest/', None),
		}

		autodoc_member_order = 'bysource'
		bibtex_bibfiles = ['sphinx/pystencils.bib']

		project = 'pystencils'
		html_logo = "img/logo.png"
		html_logo = 'img/logo.png'

doc/img/github_repo_card.png

0 → 100644

+78.4 KiB

78.41 KiB

doc/img/logo_large.svg

0 → 100644

+444 −0

Original line number	Diff line number	Diff line
		<?xml version="1.0" encoding="UTF-8" standalone="no"?>
		<!-- Created with Inkscape (http://www.inkscape.org/) -->

		<svg
		xmlns:dc="http://purl.org/dc/elements/1.1/"
		xmlns:cc="http://creativecommons.org/ns#"
		xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
		xmlns:svg="http://www.w3.org/2000/svg"
		xmlns="http://www.w3.org/2000/svg"
		xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
		xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
		width="379.82614mm"
		height="189.91307mm"
		viewBox="0 0 1345.8407 672.92033"
		id="svg2"
		version="1.1"
		inkscape:version="0.92.3 (2405546, 2018-03-11)"
		sodipodi:docname="logo_large.svg"
		inkscape:export-filename="/home/martin/code/pycodegen/pystencils/doc/img/github_repo_card.png"
		inkscape:export-xdpi="85.599998"
		inkscape:export-ydpi="85.599998">
		<defs
		id="defs4">
		<inkscape:path-effect
		effect="spiro"
		id="path-effect4188"
		is_visible="true" />
		<inkscape:path-effect
		effect="spiro"
		id="path-effect4188-5"
		is_visible="true" />
		<filter
		y="-0.25"
		height="1.5"
		inkscape:menu-tooltip="Darkens the edge with an inner blur and adds a flexible glow"
		inkscape:menu="Shadows and Glows"
		inkscape:label="Dark And Glow"
		style="color-interpolation-filters:sRGB"
		id="filter4596">
		<feGaussianBlur
		stdDeviation="5"
		result="result6"
		id="feGaussianBlur4598" />
		<feComposite
		result="result8"
		in="SourceGraphic"
		operator="atop"
		in2="result6"
		id="feComposite4600" />
		<feComposite
		result="result9"
		operator="over"
		in2="SourceAlpha"
		in="result8"
		id="feComposite4602" />
		<feColorMatrix
		values="1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 "
		result="result10"
		id="feColorMatrix4604" />
		<feBlend
		in="result10"
		mode="normal"
		in2="result6"
		id="feBlend4606" />
		</filter>
		<filter
		y="-0.25"
		height="1.5"
		inkscape:menu-tooltip="Darkens the edge with an inner blur and adds a flexible glow"
		inkscape:menu="Shadows and Glows"
		inkscape:label="Dark And Glow"
		style="color-interpolation-filters:sRGB"
		id="filter4608">
		<feGaussianBlur
		stdDeviation="5"
		result="result6"
		id="feGaussianBlur4610" />
		<feComposite
		result="result8"
		in="SourceGraphic"
		operator="atop"
		in2="result6"
		id="feComposite4612" />
		<feComposite
		result="result9"
		operator="over"
		in2="SourceAlpha"
		in="result8"
		id="feComposite4614" />
		<feColorMatrix
		values="1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 "
		result="result10"
		id="feColorMatrix4616" />
		<feBlend
		in="result10"
		mode="normal"
		in2="result6"
		id="feBlend4618" />
		</filter>
		<filter
		y="-0.25"
		height="1.5"
		inkscape:menu-tooltip="Darkens the edge with an inner blur and adds a flexible glow"
		inkscape:menu="Shadows and Glows"
		inkscape:label="Dark And Glow"
		style="color-interpolation-filters:sRGB"
		id="filter4620">
		<feGaussianBlur
		stdDeviation="5"
		result="result6"
		id="feGaussianBlur4622" />
		<feComposite
		result="result8"
		in="SourceGraphic"
		operator="atop"
		in2="result6"
		id="feComposite4624" />
		<feComposite
		result="result9"
		operator="over"
		in2="SourceAlpha"
		in="result8"
		id="feComposite4626" />
		<feColorMatrix
		values="1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 "
		result="result10"
		id="feColorMatrix4628" />
		<feBlend
		in="result10"
		mode="normal"
		in2="result6"
		id="feBlend4630" />
		</filter>
		<filter
		y="-0.25"
		height="1.5"
		inkscape:menu-tooltip="Darkens the edge with an inner blur and adds a flexible glow"
		inkscape:menu="Shadows and Glows"
		inkscape:label="Dark And Glow"
		style="color-interpolation-filters:sRGB"
		id="filter4632">
		<feGaussianBlur
		stdDeviation="5"
		result="result6"
		id="feGaussianBlur4634" />
		<feComposite
		result="result8"
		in="SourceGraphic"
		operator="atop"
		in2="result6"
		id="feComposite4636" />
		<feComposite
		result="result9"
		operator="over"
		in2="SourceAlpha"
		in="result8"
		id="feComposite4638" />
		<feColorMatrix
		values="1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 "
		result="result10"
		id="feColorMatrix4640" />
		<feBlend
		in="result10"
		mode="normal"
		in2="result6"
		id="feBlend4642" />
		</filter>
		<inkscape:path-effect
		effect="spiro"
		id="path-effect4188-7"
		is_visible="true" />
		<inkscape:path-effect
		effect="spiro"
		id="path-effect4188-5-6"
		is_visible="true" />
		<filter
		y="-0.25"
		height="1.5"
		inkscape:menu-tooltip="Darkens the edge with an inner blur and adds a flexible glow"
		inkscape:menu="Shadows and Glows"
		inkscape:label="Dark And Glow"
		style="color-interpolation-filters:sRGB"
		id="filter4596-6">
		<feGaussianBlur
		stdDeviation="5"
		result="result6"
		id="feGaussianBlur4598-6" />
		<feComposite
		result="result8"
		in="SourceGraphic"
		operator="atop"
		in2="result6"
		id="feComposite4600-9" />
		<feComposite
		result="result9"
		operator="over"
		in2="SourceAlpha"
		in="result8"
		id="feComposite4602-1" />
		<feColorMatrix
		values="1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 "
		result="result10"
		id="feColorMatrix4604-4" />
		<feBlend
		in="result10"
		mode="normal"
		in2="result6"
		id="feBlend4606-3" />
		</filter>
		<filter
		y="-0.25"
		height="1.5"
		inkscape:menu-tooltip="Darkens the edge with an inner blur and adds a flexible glow"
		inkscape:menu="Shadows and Glows"
		inkscape:label="Dark And Glow"
		style="color-interpolation-filters:sRGB"
		id="filter4620-1">
		<feGaussianBlur
		stdDeviation="5"
		result="result6"
		id="feGaussianBlur4622-1" />
		<feComposite
		result="result8"
		in="SourceGraphic"
		operator="atop"
		in2="result6"
		id="feComposite4624-4" />
		<feComposite
		result="result9"
		operator="over"
		in2="SourceAlpha"
		in="result8"
		id="feComposite4626-8" />
		<feColorMatrix
		values="1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 "
		result="result10"
		id="feColorMatrix4628-5" />
		<feBlend
		in="result10"
		mode="normal"
		in2="result6"
		id="feBlend4630-7" />
		</filter>
		<filter
		y="-0.25"
		height="1.5"
		inkscape:menu-tooltip="Darkens the edge with an inner blur and adds a flexible glow"
		inkscape:menu="Shadows and Glows"
		inkscape:label="Dark And Glow"
		style="color-interpolation-filters:sRGB"
		id="filter4632-1">
		<feGaussianBlur
		stdDeviation="5"
		result="result6"
		id="feGaussianBlur4634-9" />
		<feComposite
		result="result8"
		in="SourceGraphic"
		operator="atop"
		in2="result6"
		id="feComposite4636-8" />
		<feComposite
		result="result9"
		operator="over"
		in2="SourceAlpha"
		in="result8"
		id="feComposite4638-7" />
		<feColorMatrix
		values="1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 "
		result="result10"
		id="feColorMatrix4640-6" />
		<feBlend
		in="result10"
		mode="normal"
		in2="result6"
		id="feBlend4642-5" />
		</filter>
		<filter
		y="-0.25"
		height="1.5"
		inkscape:menu-tooltip="Darkens the edge with an inner blur and adds a flexible glow"
		inkscape:menu="Shadows and Glows"
		inkscape:label="Dark And Glow"
		style="color-interpolation-filters:sRGB"
		id="filter4608-0">
		<feGaussianBlur
		stdDeviation="5"
		result="result6"
		id="feGaussianBlur4610-2" />
		<feComposite
		result="result8"
		in="SourceGraphic"
		operator="atop"
		in2="result6"
		id="feComposite4612-5" />
		<feComposite
		result="result9"
		operator="over"
		in2="SourceAlpha"
		in="result8"
		id="feComposite4614-7" />
		<feColorMatrix
		values="1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 "
		result="result10"
		id="feColorMatrix4616-6" />
		<feBlend
		in="result10"
		mode="normal"
		in2="result6"
		id="feBlend4618-9" />
		</filter>
		</defs>
		<sodipodi:namedview
		id="base"
		pagecolor="#ffffff"
		bordercolor="#666666"
		borderopacity="1.0"
		inkscape:pageopacity="0.0"
		inkscape:pageshadow="2"
		inkscape:zoom="0.70000001"
		inkscape:cx="545.01294"
		inkscape:cy="35.725386"
		inkscape:document-units="px"
		inkscape:current-layer="layer1"
		showgrid="false"
		inkscape:window-width="3840"
		inkscape:window-height="2061"
		inkscape:window-x="0"
		inkscape:window-y="0"
		inkscape:window-maximized="1"
		fit-margin-top="0"
		fit-margin-left="0"
		fit-margin-right="0"
		fit-margin-bottom="0">
		<inkscape:grid
		type="xygrid"
		id="grid4176"
		originx="267.20477"
		originy="315.17846" />
		</sodipodi:namedview>
		<metadata
		id="metadata7">
		<rdf:RDF>
		<cc:Work
		rdf:about="">
		<dc:format>image/svg+xml</dc:format>
		<dc:type
		rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
		<dc:title></dc:title>
		</cc:Work>
		</rdf:RDF>
		</metadata>
		<g
		inkscape:label="Layer 1"
		inkscape:groupmode="layer"
		id="layer1"
		transform="translate(267.20477,-694.6203)">
		<text
		xml:space="preserve"
		style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:34.78659058px;line-height:125%;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, ';letter-spacing:0px;word-spacing:0px;fill:#252525;fill-opacity:1;stroke:none;stroke-width:4.34832382px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
		x="159.99139"
		y="964.43109"
		id="text1392-1"
		inkscape:export-xdpi="70.669998"
		inkscape:export-ydpi="70.669998"><tspan
		sodipodi:role="line"
		id="tspan1390-1"
		x="159.99139"
		y="964.43109"
		style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:156.53968811px;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, Bold';fill:#252525;fill-opacity:1;stroke-width:4.34832382px">pystencils</tspan></text>
		<g
		id="g9986"
		transform="matrix(4.1201463,0,0,4.1201463,-399.75066,866.02979)"
		inkscape:export-xdpi="70.669998"
		inkscape:export-ydpi="70.669998">
		<path
		inkscape:connector-curvature="0"
		inkscape:original-d="M 60.891002,27.333516 H 118.64865"
		inkscape:path-effect="#path-effect4188-7"
		id="path4186-6"
		d="M 60.891002,27.333516 H 118.64865"
		style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.78799796;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:0.70388345" />
		<path
		sodipodi:nodetypes="cc"
		inkscape:connector-curvature="0"
		inkscape:original-d="M 89.922623,-0.47572315 C 31.237244,132.88729 89.846228,36.88339 89.846228,56.13594"
		inkscape:path-effect="#path-effect4188-5-6"
		id="path4186-3-9"
		d="M 89.922623,-0.47572315 89.846228,56.13594"
		style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:2.78799796;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:0.70388345" />
		<circle
		transform="matrix(0.21391721,0,0,0.21391721,27.733834,-23.442344)"
		r="34.345188"
		cy="108.02044"
		cx="291.42902"
		id="path4136-76"
		style="opacity:1;fill:#e69f00;fill-opacity:1;stroke:none;stroke-width:3;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;filter:url(#filter4596-6)" />
		<circle
		transform="matrix(0.21391721,0,0,0.21391721,27.733834,-23.442344)"
		r="34.345188"
		cy="365.43817"
		cx="290.41885"
		id="path4136-6-0"
		style="opacity:1;fill:#0072b2;fill-opacity:1;stroke:none;stroke-width:3;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;filter:url(#filter4620-1)" />
		<circle
		transform="matrix(0.21391721,0,0,0.21391721,27.733834,-23.442344)"
		r="34.345188"
		cy="236.72931"
		cx="422.24377"
		id="path4136-3-9"
		style="opacity:1;fill:#999999;fill-opacity:1;stroke:none;stroke-width:3;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;filter:url(#filter4632-1)" />
		<circle
		transform="matrix(0.21391721,0,0,0.21391721,27.733834,-23.442344)"
		r="34.345188"
		cy="236.72931"
		cx="155.56349"
		id="path4136-7-0"
		style="opacity:1;fill:#009e73;fill-opacity:1;stroke:none;stroke-width:3;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;filter:url(#filter4608-0)" />
		</g>
		<text
		xml:space="preserve"
		style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.7668047px;line-height:125%;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, ';letter-spacing:0px;word-spacing:0px;fill:#252525;fill-opacity:0.70629368;stroke:none;stroke-width:1.09585059px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
		x="229.96391"
		y="1071.713"
		id="text1392-1-3"
		inkscape:export-xdpi="70.669998"
		inkscape:export-ydpi="70.669998"><tspan
		sodipodi:role="line"
		id="tspan1390-1-6"
		x="229.96391"
		y="1071.713"
		style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:62.0406723px;line-height:105.99999428%;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, Bold';fill:#252525;fill-opacity:0.70629368;stroke-width:1.09585059px">speed up stencil </tspan><tspan
		sodipodi:role="line"
		x="229.96391"
		y="1137.4761"
		style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:62.0406723px;line-height:105.99999428%;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, Bold';fill:#252525;fill-opacity:0.70629368;stroke-width:1.09585059px"
		id="tspan109">computations on</tspan><tspan
		sodipodi:role="line"
		x="229.96391"
		y="1203.2393"
		style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:62.0406723px;line-height:105.99999428%;font-family:'Latin Modern Mono Light';-inkscape-font-specification:'Latin Modern Mono Light, Bold';fill:#252525;fill-opacity:0.70629368;stroke-width:1.09585059px"
		id="tspan107">numpy arrays</tspan></text>
		</g>
		</svg>

doc/index.rst

+1 −0

Original line number	Diff line number	Diff line
		@@ -14,5 +14,6 @@ pystencils can help you to generate blazingly fast code for image processing, nu

		.. image:: /img/pystencils_arch_block_diagram.svg
		:height: 450px
		:align: center

doc/notebooks/01_tutorial_getting_started.ipynb

+431 −448

File changed.

File size exceeds preview limit.

View original file

View changed file

doc/notebooks/02_tutorial_basic_kernels.ipynb

+257 −99

File changed.

Preview size limit exceeded, changes collapsed.

doc/notebooks/03_tutorial_datahandling.ipynb

+303 −122

File changed.

Preview size limit exceeded, changes collapsed.

doc/notebooks/04_tutorial_advection_diffusion.ipynb

+8 −8

File changed.

Preview size limit exceeded, changes collapsed.

doc/notebooks/05_tutorial_phasefield_spinodal_decomposition.ipynb

+14 −19

File changed.

Preview size limit exceeded, changes collapsed.

doc/notebooks/06_tutorial_phasefield_dentritic_growth.ipynb

+199 −71

File changed.

Preview size limit exceeded, changes collapsed.

doc/notebooks/demo_assignment_collection.ipynb

+47 −39

File changed.

Preview size limit exceeded, changes collapsed.

doc/notebooks/demo_benchmark.ipynb

+2 −6

File changed.

Preview size limit exceeded, changes collapsed.

doc/notebooks/demo_derivatives.ipynb

+50 −57

File changed.

Preview size limit exceeded, changes collapsed.

doc/notebooks/demo_plotting_and_animation.ipynb

+123 −134

File changed.

Preview size limit exceeded, changes collapsed.

doc/notebooks/demo_wave_equation.ipynb

+139 −204

File changed.

Preview size limit exceeded, changes collapsed.

doc/sphinx/api.rst

+1 −0

Original line number	Diff line number	Diff line
		@@ -5,6 +5,7 @@ API Reference
		:maxdepth: 3

		kernel_compile_and_call.rst
		enums.rst
		simplifications.rst
		datahandling.rst
		configuration.rst

doc/sphinx/enums.rst

0 → 100644

+6 −0

Original line number	Diff line number	Diff line
		************
		Enumerations
		************

		.. automodule:: pystencils.enums
		:members:

doc/sphinx/kernel_compile_and_call.rst

+10 −5

Original line number	Diff line number	Diff line
		@@ -8,9 +8,14 @@ Creating kernels

		.. autofunction:: pystencils.create_kernel

		.. autofunction:: pystencils.create_indexed_kernel
		.. autoclass:: pystencils.CreateKernelConfig
		:members:

		.. autofunction:: pystencils.kernelcreation.create_domain_kernel

		.. autofunction:: pystencils.kernelcreation.create_indexed_kernel

		.. autofunction:: pystencils.create_staggered_kernel
		.. autofunction:: pystencils.kernelcreation.create_staggered_kernel


		Code printing
		@@ -22,11 +27,11 @@ Code printing
		GPU Indexing
		-------------

		.. autoclass:: pystencils.gpucuda.AbstractIndexing
		.. autoclass:: pystencils.gpu.AbstractIndexing
		:members:

		.. autoclass:: pystencils.gpucuda.BlockIndexing
		.. autoclass:: pystencils.gpu.BlockIndexing
		:members:

		.. autoclass:: pystencils.gpucuda.LineIndexing
		.. autoclass:: pystencils.gpu.LineIndexing
		:members:

pystencils_tests/init.py→doc/sphinx/pystencils.bib

+0 −0

File moved.

doc/sphinx/simplifications.rst

+16 −2

Original line number	Diff line number	Diff line
		@@ -10,13 +10,27 @@ AssignmentCollection
		:members:


		SimplificationStrategy
		======================

		.. autoclass:: pystencils.simp.SimplificationStrategy
		:members:

		Simplifications
		===============

		.. automodule:: pystencils.simp
		.. automodule:: pystencils.simp.simplifications
		:members:

		Subexpression insertion
		=======================

		The subexpression insertions have the goal to insert subexpressions which will not reduce the number of FLOPs.
		For example a constant value kept as subexpression will lead to a new variable in the code which will occupy
		a register slot. On the other side a single variable could just be inserted in all assignments.

		.. automodule:: pystencils.simp.subexpression_insertion
		:members:

doc/version_from_git.py

deleted100644 → 0

+0 −31

Original line number	Diff line number	Diff line
		import subprocess

		def version_number_from_git(tag_prefix='release/', sha_length=10, version_format="{version}.dev{commits}+{sha}"):

		def get_released_versions():
		tags = sorted(subprocess.getoutput('git tag').split('\n'))
		versions = [t[len(tag_prefix):] for t in tags if t.startswith(tag_prefix)]
		return versions

		def tag_from_version(v):
		return tag_prefix + v

		def increment_version(v):
		parsed_version = [int(i) for i in v.split('.')]
		parsed_version[-1] += 1
		return '.'.join(str(i) for i in parsed_version)

		latest_release = get_released_versions()[-1]
		commits_since_tag = subprocess.getoutput('git rev-list {}..HEAD --count'.format(tag_from_version(latest_release)))
		sha = subprocess.getoutput('git rev-parse HEAD')[:sha_length]
		is_dirty = len(subprocess.getoutput("git status --untracked-files=no -s")) > 0

		if int(commits_since_tag) == 0:
		version_string = latest_release
		else:
		next_version = increment_version(latest_release)
		version_string = version_format.format(version=next_version, commits=commits_since_tag, sha=sha)

		if is_dirty:
		version_string += ".dirty"
		return version_string

pyproject.toml

0 → 100644

+98 −0

Original line number	Diff line number	Diff line
		[project]
		name = "pystencils"
		description = "Speeding up stencil computations on CPUs and GPUs"
		dynamic = ["version"]
		readme = "README.md"
		authors = [
		{ name = "Martin Bauer" },
		{ name = "Jan Hönig " },
		{ name = "Markus Holzer" },
		{ name = "Frederik Hennig" },
		{ email = "cs10-codegen@fau.de" },
		]
		license = { file = "COPYING.txt" }
		requires-python = ">=3.10"
		dependencies = ["sympy>=1.9,<=1.12.1", "numpy>=1.8.0", "appdirs", "joblib", "pyyaml", "fasteners"]
		classifiers = [
		"Development Status :: 4 - Beta",
		"Framework :: Jupyter",
		"Topic :: Software Development :: Code Generators",
		"Topic :: Scientific/Engineering :: Physics",
		"Intended Audience :: Developers",
		"Intended Audience :: Science/Research",
		"License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
		]

		[project.urls]
		"Bug Tracker" = "https://i10git.cs.fau.de/pycodegen/pystencils/-/issues"
		"Documentation" = "https://pycodegen.pages.i10git.cs.fau.de/pystencils/"
		"Source Code" = "https://i10git.cs.fau.de/pycodegen/pystencils"

		[project.optional-dependencies]
		gpu = ['cupy']
		alltrafos = ['islpy', 'py-cpuinfo']
		bench_db = ['blitzdb', 'pymongo', 'pandas']
		interactive = [
		'matplotlib',
		'ipy_table',
		'imageio',
		'jupyter',
		'pyevtk',
		'rich',
		'graphviz',
		]
		use_cython = [
		'Cython'
		]
		doc = [
		'sphinx',
		'sphinx_rtd_theme',
		'nbsphinx',
		'sphinxcontrib-bibtex',
		'sphinx_autodoc_typehints',
		'pandoc',
		]
		tests = [
		'pytest',
		'pytest-cov',
		'pytest-html',
		'ansi2html',
		'pytest-xdist',
		'flake8',
		'nbformat',
		'nbconvert',
		'ipython',
		'matplotlib',
		'py-cpuinfo',
		'randomgen>=1.18',
		]

		[build-system]
		requires = [
		"setuptools>=61",
		"versioneer[toml]>=0.29",
		# 'Cython'
		]
		build-backend = "setuptools.build_meta"

		[tool.setuptools.package-data]
		pystencils = [
		"include/*.h",
		"boundaries/createindexlistcython.pyx"
		]

		[tool.setuptools.packages.find]
		where = ["src"]
		include = ["pystencils", "pystencils.*"]
		namespaces = false

		[tool.versioneer]
		# See the docstring in versioneer.py for instructions. Note that you must
		# re-run 'versioneer.py setup' after changing this section, and commit the
		# resulting files.
		VCS = "git"
		style = "pep440"
		versionfile_source = "src/pystencils/_version.py"
		versionfile_build = "pystencils/_version.py"
		tag_prefix = "release/"
		parentdir_prefix = "pystencils-"

pystencils/backends/cuda_known_functions.txt

deleted100644 → 0

+0 −293

Original line number	Diff line number	Diff line
		__prof_trigger
		printf

		__syncthreads
		__syncthreads_count
		__syncthreads_and
		__syncthreads_or
		__syncwarp
		__threadfence
		__threadfence_block
		__threadfence_system

		atomicAdd
		atomicSub
		atomicExch
		atomicMin
		atomicMax
		atomicInc
		atomicDec
		atomicAnd
		atomicOr
		atomicXor
		atomicCAS

		__all_sync
		__any_sync
		__ballot_sync
		__active_mask

		__shfl_sync
		__shfl_up_sync
		__shfl_down_sync
		__shfl_xor_sync

		__match_any_sync
		__match_all_sync

		__isGlobal
		__isShared
		__isConstant
		__isLocal

		tex1Dfetch
		tex1D
		tex2D
		tex3D

		rsqrtf
		cbrtf
		rcbrtf
		hypotf
		rhypotf
		norm3df
		rnorm3df
		norm4df
		rnorm4df
		normf
		rnormf
		expf
		exp2f
		exp10f
		expm1f
		logf
		log2f
		log10f
		log1pf
		sinf
		cosf
		tanf
		sincosf
		sinpif
		cospif
		sincospif
		asinf
		acosf
		atanf
		atan2f
		sinhf
		coshf
		tanhf
		asinhf
		acoshf
		atanhf
		powf
		erff
		erfcf
		erfinvf
		erfcinvf
		erfcxf
		normcdff
		normcdfinvf
		lgammaf
		tgammaf
		fmaf
		frexpf
		ldexpf
		scalbnf
		scalblnf
		logbf
		ilogbf
		j0f
		j1f
		jnf
		y0f
		y1f
		ynf
		cyl_bessel_i0f
		cyl_bessel_i1f
		fmodf
		remainderf
		remquof
		modff
		fdimf
		truncf
		roundf
		rintf
		nearbyintf
		ceilf
		floorf
		lrintf
		lroundf
		llrintf
		llroundf

		sqrt
		rsqrt
		cbrt
		rcbrt
		hypot
		rhypot
		norm3d
		rnorm3d
		norm4d
		rnorm4d
		norm
		rnorm
		exp
		exp2
		exp10
		expm1
		log
		log2
		log10
		log1p
		sin
		cos
		tan
		sincos
		sinpi
		cospi
		sincospi
		asin
		acos
		atan
		atan2
		sinh
		cosh
		tanh
		asinh
		acosh
		atanh
		pow
		erf
		erfc
		erfinv
		erfcinv
		erfcx
		normcdf
		normcdfinv
		lgamma
		tgamma
		fma
		frexp
		ldexp
		scalbn
		scalbln
		logb
		ilogb
		j0
		j1
		jn
		y0
		y1
		yn
		cyl_bessel_i0
		cyl_bessel_i1
		fmod
		remainder
		remquo
		mod
		fdim
		trunc
		round
		rint
		nearbyint
		ceil
		floor
		lrint
		lround
		llrint
		llround

		__fdividef
		__sinf
		__cosf
		__tanf
		__sincosf
		__logf
		__log2f
		__log10f
		__expf
		__exp10f
		__powf

		__fadd_rn
		__fsub_rn
		__fmul_rn
		__fmaf_rn
		__frcp_rn
		__fsqrt_rn
		__frsqrt_rn
		__fdiv_rn

		__fadd_rz
		__fsub_rz
		__fmul_rz
		__fmaf_rz
		__frcp_rz
		__fsqrt_rz
		__frsqrt_rz
		__fdiv_rz

		__fadd_ru
		__fsub_ru
		__fmul_ru
		__fmaf_ru
		__frcp_ru
		__fsqrt_ru
		__frsqrt_ru
		__fdiv_ru

		__fadd_rd
		__fsub_rd
		__fmul_rd
		__fmaf_rd
		__frcp_rd
		__fsqrt_rd
		__frsqrt_rd
		__fdiv_rd

		__fdividef
		__expf
		__exp10f
		__logf
		__log2f
		__log10f
		__sinf
		__cosf
		__sincosf
		__tanf
		__powf

		__dadd_rn
		__dsub_rn
		__dmul_rn
		__fma_rn
		__ddiv_rn
		__drcp_rn
		__dsqrt_rn

		__dadd_rz
		__dsub_rz
		__dmul_rz
		__fma_rz
		__ddiv_rz
		__drcp_rz
		__dsqrt_rz

		__dadd_ru
		__dsub_ru
		__dmul_ru
		__fma_ru
		__ddiv_ru
		__drcp_ru
		__dsqrt_ru

		__dadd_rd
		__dsub_rd
		__dmul_rd
		__fma_rd
		__ddiv_rd
		__drcp_rd
		__dsqrt_rd

pystencils/backends/simd_instruction_sets.py

deleted100644 → 0

+0 −138

Original line number	Diff line number	Diff line


		# noinspection SpellCheckingInspection
		def get_vector_instruction_set(data_type='double', instruction_set='avx'):
		comparisons = {
		'==': '_CMP_EQ_UQ',
		'!=': '_CMP_NEQ_UQ',
		'>=': '_CMP_GE_OQ',
		'<=': '_CMP_LE_OQ',
		'<': '_CMP_NGE_UQ',
		'>': '_CMP_NLE_UQ',
		}
		base_names = {
		'+': 'add[0, 1]',
		'-': 'sub[0, 1]',
		'*': 'mul[0, 1]',
		'/': 'div[0, 1]',
		'&': 'and[0, 1]',
		'\|': 'or[0, 1]',
		'blendv': 'blendv[0, 1, 2]',

		'sqrt': 'sqrt[0]',

		'makeVec': 'set[]',
		'makeZero': 'setzero[]',

		'loadU': 'loadu[0]',
		'loadA': 'load[0]',
		'storeU': 'storeu[0,1]',
		'storeA': 'store[0,1]',
		'stream': 'stream[0,1]',
		}
		for comparison_op, constant in comparisons.items():
		base_names[comparison_op] = 'cmp[0, 1, %s]' % (constant,)

		headers = {
		'avx512': ['<immintrin.h>'],
		'avx': ['<immintrin.h>'],
		'sse': ['<immintrin.h>', '<xmmintrin.h>', '<emmintrin.h>', '<pmmintrin.h>',
		'<tmmintrin.h>', '<smmintrin.h>', '<nmmintrin.h>']
		}

		suffix = {
		'double': 'pd',
		'float': 'ps',
		}
		prefix = {
		'sse': '_mm',
		'avx': '_mm256',
		'avx512': '_mm512',
		}

		width = {
		("double", "sse"): 2,
		("float", "sse"): 4,
		("double", "avx"): 4,
		("float", "avx"): 8,
		("double", "avx512"): 8,
		("float", "avx512"): 16,
		}

		result = {
		'width': width[(data_type, instruction_set)],
		}
		pre = prefix[instruction_set]
		suf = suffix[data_type]
		for intrinsic_id, function_shortcut in base_names.items():
		function_shortcut = function_shortcut.strip()
		name = function_shortcut[:function_shortcut.index('[')]

		if intrinsic_id == 'makeVec':
		arg_string = "({})".format(",".join(["{0}"] * result['width']))
		else:
		args = function_shortcut[function_shortcut.index('[') + 1: -1]
		arg_string = "("
		for arg in args.split(","):
		arg = arg.strip()
		if not arg:
		continue
		if arg in ('0', '1', '2', '3', '4', '5'):
		arg_string += "{" + arg + "},"
		else:
		arg_string += arg + ","
		arg_string = arg_string[:-1] + ")"
		mask_suffix = '_mask' if instruction_set == 'avx512' and intrinsic_id in comparisons.keys() else ''
		result[intrinsic_id] = pre + "_" + name + "_" + suf + mask_suffix + arg_string

		result['dataTypePrefix'] = {
		'double': "_" + pre + 'd',
		'float': "_" + pre,
		}

		result['rsqrt'] = None
		bit_width = result['width'] * (64 if data_type == 'double' else 32)
		result['double'] = "__m%dd" % (bit_width,)
		result['float'] = "__m%d" % (bit_width,)
		result['int'] = "__m%di" % (bit_width,)
		result['bool'] = "__m%dd" % (bit_width,)

		result['headers'] = headers[instruction_set]
		result['any'] = "%s_movemask_%s({0}) > 0" % (pre, suf)
		result['all'] = "%s_movemask_%s({0}) == 0xF" % (pre, suf)

		if instruction_set == 'avx512':
		size = 8 if data_type == 'double' else 16
		result['&'] = '_kand_mask%d({0}, {1})' % (size,)
		result['\|'] = '_kor_mask%d({0}, {1})' % (size,)
		result['any'] = '!_ktestz_mask%d_u8({0}, {0})' % (size, )
		result['all'] = '_kortestc_mask%d_u8({0}, {0})' % (size, )
		result['blendv'] = '%s_mask_blend_%s({2}, {0}, {1})' % (pre, suf)
		result['rsqrt'] = "_mm512_rsqrt14_%s({0})" % (suf,)
		result['bool'] = "__mmask%d" % (size,)

		if instruction_set == 'avx' and data_type == 'float':
		result['rsqrt'] = "_mm256_rsqrt_ps({0})"

		return result


		def get_supported_instruction_sets():
		"""List of supported instruction sets on current hardware, or None if query failed."""
		try:
		from cpuinfo import get_cpu_info
		except ImportError:
		return None

		result = []
		required_sse_flags = {'sse', 'sse2', 'ssse3', 'sse4_1', 'sse4_2'}
		required_avx_flags = {'avx'}
		required_avx512_flags = {'avx512f'}
		flags = set(get_cpu_info()['flags'])
		if flags.issuperset(required_sse_flags):
		result.append("sse")
		if flags.issuperset(required_avx_flags):
		result.append("avx")
		if flags.issuperset(required_avx512_flags):
		result.append("avx512")
		return result

pystencils/boundaries/createindexlistcython.c

deleted100644 → 0

+0 −41083

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils/cache.py

deleted100644 → 0

+0 −27

Original line number	Diff line number	Diff line
		import os

		try:
		from functools import lru_cache as memorycache
		except ImportError:
		from backports.functools_lru_cache import lru_cache as memorycache

		try:
		from joblib import Memory
		from appdirs import user_cache_dir
		if 'PYSTENCILS_CACHE_DIR' in os.environ:
		cache_dir = os.environ['PYSTENCILS_CACHE_DIR']
		else:
		cache_dir = user_cache_dir('pystencils')
		disk_cache = Memory(cachedir=cache_dir, verbose=False).cache
		disk_cache_no_fallback = disk_cache
		except ImportError:
		# fallback to in-memory caching if joblib is not available
		disk_cache = memorycache(maxsize=64)

		def disk_cache_no_fallback(o):
		return o


		# Disable memory cache:
		# disk_cache = lambda o: o
		# disk_cache_no_fallback = lambda o: o

pystencils/data_types.py

deleted100644 → 0

+0 −633

Original line number	Diff line number	Diff line
		import ctypes

		import numpy as np
		import sympy as sp
		from sympy.core.cache import cacheit
		from sympy.logic.boolalg import Boolean

		from pystencils.cache import memorycache
		from pystencils.utils import all_equal

		try:
		import llvmlite.ir as ir
		except ImportError as e:
		ir = None
		_ir_importerror = e


		# noinspection PyPep8Naming
		class address_of(sp.Function):
		is_Atom = True

		def __new__(cls, arg):
		obj = sp.Function.__new__(cls, arg)
		return obj

		@property
		def canonical(self):
		if hasattr(self.args[0], 'canonical'):
		return self.args[0].canonical
		else:
		raise NotImplementedError()

		@property
		def is_commutative(self):
		return self.args[0].is_commutative

		@property
		def dtype(self):
		if hasattr(self.args[0], 'dtype'):
		return PointerType(self.args[0].dtype, restrict=True)
		else:
		return PointerType('void', restrict=True)


		# noinspection PyPep8Naming
		class cast_func(sp.Function):
		is_Atom = True

		def __new__(cls, args, *kwargs):
		# to work in conditions of sp.Piecewise cast_func has to be of type Boolean as well
		# however, a cast_function should only be a boolean if its argument is a boolean, otherwise this leads
		# to problems when for example comparing cast_func's for equality
		#
		# lhs = bitwise_and(a, cast_func(1, 'int'))
		# rhs = cast_func(0, 'int')
		# print( sp.Ne(lhs, rhs) ) # would give true if all cast_funcs are booleans
		# -> thus a separate class boolean_cast_func is introduced
		if isinstance(args[0], Boolean):
		cls = boolean_cast_func
		return sp.Function.__new__(cls, args, *kwargs)

		@property
		def canonical(self):
		if hasattr(self.args[0], 'canonical'):
		return self.args[0].canonical
		else:
		raise NotImplementedError()

		@property
		def is_commutative(self):
		return self.args[0].is_commutative

		@property
		def dtype(self):
		return self.args[1]


		# noinspection PyPep8Naming
		class boolean_cast_func(cast_func, Boolean):
		pass


		# noinspection PyPep8Naming
		class vector_memory_access(cast_func):
		nargs = (4,)


		# noinspection PyPep8Naming
		class reinterpret_cast_func(cast_func):
		pass


		# noinspection PyPep8Naming
		class pointer_arithmetic_func(sp.Function, Boolean):
		@property
		def canonical(self):
		if hasattr(self.args[0], 'canonical'):
		return self.args[0].canonical
		else:
		raise NotImplementedError()


		class TypedSymbol(sp.Symbol):
		def __new__(cls, args, *kwds):
		obj = TypedSymbol.__xnew_cached_(cls, args, *kwds)
		return obj

		def __new_stage2__(cls, name, dtype):
		obj = super(TypedSymbol, cls).__xnew__(cls, name)
		try:
		obj._dtype = create_type(dtype)
		except (TypeError, ValueError):
		# on error keep the string
		obj._dtype = dtype
		return obj

		__xnew__ = staticmethod(__new_stage2__)
		__xnew_cached_ = staticmethod(cacheit(__new_stage2__))

		@property
		def dtype(self):
		return self._dtype

		def _hashable_content(self):
		return super()._hashable_content(), hash(self._dtype)

		def __getnewargs__(self):
		return self.name, self.dtype

		# For reference: Numpy type hierarchy https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.scalars.html
		@property
		def is_integer(self):
		if hasattr(self.dtype, 'numpy_dtype'):
		return np.issubdtype(self.dtype.numpy_dtype, np.integer) or super().is_integer
		else:
		return super().is_integer

		@property
		def is_negative(self):
		if hasattr(self.dtype, 'numpy_dtype'):
		if np.issubdtype(self.dtype.numpy_dtype, np.unsignedinteger):
		return False

		return super().is_positive

		@property
		def is_real(self):
		if hasattr(self.dtype, 'numpy_dtype'):
		return np.issubdtype(self.dtype.numpy_dtype, np.integer) or \
		np.issubdtype(self.dtype.numpy_dtype, np.floating) or \
		super().is_real
		else:
		return super().is_real


		def create_type(specification):
		"""Creates a subclass of Type according to a string or an object of subclass Type.

		Args:
		specification: Type object, or a string

		Returns:
		Type object, or a new Type object parsed from the string
		"""
		if isinstance(specification, Type):
		return specification
		else:
		numpy_dtype = np.dtype(specification)
		if numpy_dtype.fields is None:
		return BasicType(numpy_dtype, const=False)
		else:
		return StructType(numpy_dtype, const=False)


		@memorycache(maxsize=64)
		def create_composite_type_from_string(specification):
		"""Creates a new Type object from a c-like string specification.

		Args:
		specification: Specification string

		Returns:
		Type object
		"""
		specification = specification.lower().split()
		parts = []
		current = []
		for s in specification:
		if s == '*':
		parts.append(current)
		current = [s]
		else:
		current.append(s)
		if len(current) > 0:
		parts.append(current)
		# Parse native part
		base_part = parts.pop(0)
		const = False
		if 'const' in base_part:
		const = True
		base_part.remove('const')
		assert len(base_part) == 1
		if base_part[0][-1] == "*":
		base_part[0] = base_part[0][:-1]
		parts.append('*')
		current_type = BasicType(np.dtype(base_part[0]), const)
		# Parse pointer parts
		for part in parts:
		restrict = False
		const = False
		if 'restrict' in part:
		restrict = True
		part.remove('restrict')
		if 'const' in part:
		const = True
		part.remove("const")
		assert len(part) == 1 and part[0] == '*'
		current_type = PointerType(current_type, const, restrict)
		return current_type


		def get_base_type(data_type):
		while data_type.base_type is not None:
		data_type = data_type.base_type
		return data_type


		def to_ctypes(data_type):
		"""
		Transforms a given Type into ctypes
		:param data_type: Subclass of Type
		:return: ctypes type object
		"""
		if isinstance(data_type, PointerType):
		return ctypes.POINTER(to_ctypes(data_type.base_type))
		elif isinstance(data_type, StructType):
		return ctypes.POINTER(ctypes.c_uint8)
		else:
		return to_ctypes.map[data_type.numpy_dtype]


		to_ctypes.map = {
		np.dtype(np.int8): ctypes.c_int8,
		np.dtype(np.int16): ctypes.c_int16,
		np.dtype(np.int32): ctypes.c_int32,
		np.dtype(np.int64): ctypes.c_int64,

		np.dtype(np.uint8): ctypes.c_uint8,
		np.dtype(np.uint16): ctypes.c_uint16,
		np.dtype(np.uint32): ctypes.c_uint32,
		np.dtype(np.uint64): ctypes.c_uint64,

		np.dtype(np.float32): ctypes.c_float,
		np.dtype(np.float64): ctypes.c_double,
		}


		def ctypes_from_llvm(data_type):
		if not ir:
		raise _ir_importerror
		if isinstance(data_type, ir.PointerType):
		ctype = ctypes_from_llvm(data_type.pointee)
		if ctype is None:
		return ctypes.c_void_p
		else:
		return ctypes.POINTER(ctype)
		elif isinstance(data_type, ir.IntType):
		if data_type.width == 8:
		return ctypes.c_int8
		elif data_type.width == 16:
		return ctypes.c_int16
		elif data_type.width == 32:
		return ctypes.c_int32
		elif data_type.width == 64:
		return ctypes.c_int64
		else:
		raise ValueError("Int width %d is not supported" % data_type.width)
		elif isinstance(data_type, ir.FloatType):
		return ctypes.c_float
		elif isinstance(data_type, ir.DoubleType):
		return ctypes.c_double
		elif isinstance(data_type, ir.VoidType):
		return None # Void type is not supported by ctypes
		else:
		raise NotImplementedError('Data type %s of %s is not supported yet' % (type(data_type), data_type))


		def to_llvm_type(data_type):
		"""
		Transforms a given type into ctypes
		:param data_type: Subclass of Type
		:return: llvmlite type object
		"""
		if not ir:
		raise _ir_importerror
		if isinstance(data_type, PointerType):
		return to_llvm_type(data_type.base_type).as_pointer()
		else:
		return to_llvm_type.map[data_type.numpy_dtype]


		if ir:
		to_llvm_type.map = {
		np.dtype(np.int8): ir.IntType(8),
		np.dtype(np.int16): ir.IntType(16),
		np.dtype(np.int32): ir.IntType(32),
		np.dtype(np.int64): ir.IntType(64),

		np.dtype(np.uint8): ir.IntType(8),
		np.dtype(np.uint16): ir.IntType(16),
		np.dtype(np.uint32): ir.IntType(32),
		np.dtype(np.uint64): ir.IntType(64),

		np.dtype(np.float32): ir.FloatType(),
		np.dtype(np.float64): ir.DoubleType(),
		}


		def peel_off_type(dtype, type_to_peel_off):
		while type(dtype) is type_to_peel_off:
		dtype = dtype.base_type
		return dtype


		def collate_types(types):
		"""
		Takes a sequence of types and returns their "common type" e.g. (float, double, float) -> double
		Uses the collation rules from numpy.
		"""

		# Pointer arithmetic case i.e. pointer + integer is allowed
		if any(type(t) is PointerType for t in types):
		pointer_type = None
		for t in types:
		if type(t) is PointerType:
		if pointer_type is not None:
		raise ValueError("Cannot collate the combination of two pointer types")
		pointer_type = t
		elif type(t) is BasicType:
		if not (t.is_int() or t.is_uint()):
		raise ValueError("Invalid pointer arithmetic")
		else:
		raise ValueError("Invalid pointer arithmetic")
		return pointer_type

		# peel of vector types, if at least one vector type occurred the result will also be the vector type
		vector_type = [t for t in types if type(t) is VectorType]
		if not all_equal(t.width for t in vector_type):
		raise ValueError("Collation failed because of vector types with different width")
		types = [peel_off_type(t, VectorType) for t in types]

		# now we should have a list of basic types - struct types are not yet supported
		assert all(type(t) is BasicType for t in types)

		if any(t.is_float() for t in types):
		types = tuple(t for t in types if t.is_float())
		# use numpy collation -> create type from numpy type -> and, put vector type around if necessary
		result_numpy_type = np.result_type(*(t.numpy_dtype for t in types))
		result = BasicType(result_numpy_type)
		if vector_type:
		result = VectorType(result, vector_type[0].width)
		return result


		@memorycache(maxsize=2048)
		def get_type_of_expression(expr):
		from pystencils.astnodes import ResolvedFieldAccess
		from pystencils.cpu.vectorization import vec_all, vec_any

		expr = sp.sympify(expr)
		if isinstance(expr, sp.Integer):
		return create_type("int")
		elif isinstance(expr, sp.Rational) or isinstance(expr, sp.Float):
		return create_type("double")
		elif isinstance(expr, ResolvedFieldAccess):
		return expr.field.dtype
		elif isinstance(expr, TypedSymbol):
		return expr.dtype
		elif isinstance(expr, sp.Symbol):
		raise ValueError("All symbols inside this expression have to be typed! ", str(expr))
		elif isinstance(expr, cast_func):
		return expr.args[1]
		elif isinstance(expr, vec_any) or isinstance(expr, vec_all):
		return create_type("bool")
		elif hasattr(expr, 'func') and expr.func == sp.Piecewise:
		collated_result_type = collate_types(tuple(get_type_of_expression(a[0]) for a in expr.args))
		collated_condition_type = collate_types(tuple(get_type_of_expression(a[1]) for a in expr.args))
		if type(collated_condition_type) is VectorType and type(collated_result_type) is not VectorType:
		collated_result_type = VectorType(collated_result_type, width=collated_condition_type.width)
		return collated_result_type
		elif isinstance(expr, sp.Indexed):
		typed_symbol = expr.base.label
		return typed_symbol.dtype.base_type
		elif isinstance(expr, sp.boolalg.Boolean) or isinstance(expr, sp.boolalg.BooleanFunction):
		# if any arg is of vector type return a vector boolean, else return a normal scalar boolean
		result = create_type("bool")
		vec_args = [get_type_of_expression(a) for a in expr.args if isinstance(get_type_of_expression(a), VectorType)]
		if vec_args:
		result = VectorType(result, width=vec_args[0].width)
		return result
		elif isinstance(expr, sp.Pow):
		return get_type_of_expression(expr.args[0])
		elif isinstance(expr, sp.Expr):
		types = tuple(get_type_of_expression(a) for a in expr.args)
		return collate_types(types)

		raise NotImplementedError("Could not determine type for", expr, type(expr))


		class Type(sp.Basic):
		is_Atom = True

		def __new__(cls, args, *kwargs):
		return sp.Basic.__new__(cls)

		def _sympystr(self, args, *kwargs):
		return str(self)


		class BasicType(Type):
		@staticmethod
		def numpy_name_to_c(name):
		if name == 'float64':
		return 'double'
		elif name == 'float32':
		return 'float'
		elif name.startswith('int'):
		width = int(name[len("int"):])
		return "int%d_t" % (width,)
		elif name.startswith('uint'):
		width = int(name[len("uint"):])
		return "uint%d_t" % (width,)
		elif name == 'bool':
		return 'bool'
		else:
		raise NotImplementedError("Can map numpy to C name for %s" % (name,))

		def __init__(self, dtype, const=False):
		self.const = const
		if isinstance(dtype, Type):
		self._dtype = dtype.numpy_dtype
		else:
		self._dtype = np.dtype(dtype)
		assert self._dtype.fields is None, "Tried to initialize NativeType with a structured type"
		assert self._dtype.hasobject is False
		assert self._dtype.subdtype is None

		def __getnewargs__(self):
		return self.numpy_dtype, self.const

		@property
		def base_type(self):
		return None

		@property
		def numpy_dtype(self):
		return self._dtype

		@property
		def item_size(self):
		return 1

		def is_int(self):
		return self.numpy_dtype in np.sctypes['int']

		def is_float(self):
		return self.numpy_dtype in np.sctypes['float']

		def is_uint(self):
		return self.numpy_dtype in np.sctypes['uint']

		def is_complex(self):
		return self.numpy_dtype in np.sctypes['complex']

		def is_other(self):
		return self.numpy_dtype in np.sctypes['others']

		@property
		def base_name(self):
		return BasicType.numpy_name_to_c(str(self._dtype))

		def __str__(self):
		result = BasicType.numpy_name_to_c(str(self._dtype))
		if self.const:
		result += " const"
		return result

		def __repr__(self):
		return str(self)

		def __eq__(self, other):
		if not isinstance(other, BasicType):
		return False
		else:
		return (self.numpy_dtype, self.const) == (other.numpy_dtype, other.const)

		def __hash__(self):
		return hash(str(self))


		class VectorType(Type):
		instruction_set = None

		def __init__(self, base_type, width=4):
		self._base_type = base_type
		self.width = width

		@property
		def base_type(self):
		return self._base_type

		@property
		def item_size(self):
		return self.width * self.base_type.item_size

		def __eq__(self, other):
		if not isinstance(other, VectorType):
		return False
		else:
		return (self.base_type, self.width) == (other.base_type, other.width)

		def __str__(self):
		if self.instruction_set is None:
		return "%s[%d]" % (self.base_type, self.width)
		else:
		if self.base_type == create_type("int64"):
		return self.instruction_set['int']
		elif self.base_type == create_type("float64"):
		return self.instruction_set['double']
		elif self.base_type == create_type("float32"):
		return self.instruction_set['float']
		elif self.base_type == create_type("bool"):
		return self.instruction_set['bool']
		else:
		raise NotImplementedError()

		def __hash__(self):
		return hash((self.base_type, self.width))

		def __getnewargs__(self):
		return self._base_type, self.width


		class PointerType(Type):
		def __init__(self, base_type, const=False, restrict=True):
		self._base_type = base_type
		self.const = const
		self.restrict = restrict

		def __getnewargs__(self):
		return self.base_type, self.const, self.restrict

		@property
		def alias(self):
		return not self.restrict

		@property
		def base_type(self):
		return self._base_type

		@property
		def item_size(self):
		return self.base_type.item_size

		def __eq__(self, other):
		if not isinstance(other, PointerType):
		return False
		else:
		return (self.base_type, self.const, self.restrict) == (other.base_type, other.const, other.restrict)

		def __str__(self):
		components = [str(self.base_type), '*']
		if self.restrict:
		components.append('RESTRICT')
		if self.const:
		components.append("const")
		return " ".join(components)

		def __repr__(self):
		return str(self)

		def __hash__(self):
		return hash((self._base_type, self.const, self.restrict))


		class StructType:
		def __init__(self, numpy_type, const=False):
		self.const = const
		self._dtype = np.dtype(numpy_type)

		def __getnewargs__(self):
		return self.numpy_dtype, self.const

		@property
		def base_type(self):
		return None

		@property
		def numpy_dtype(self):
		return self._dtype

		@property
		def item_size(self):
		return self.numpy_dtype.itemsize

		def get_element_offset(self, element_name):
		return self.numpy_dtype.fields[element_name][1]

		def get_element_type(self, element_name):
		np_element_type = self.numpy_dtype.fields[element_name][0]
		return BasicType(np_element_type, self.const)

		def has_element(self, element_name):
		return element_name in self.numpy_dtype.fields

		def __eq__(self, other):
		if not isinstance(other, StructType):
		return False
		else:
		return (self.numpy_dtype, self.const) == (other.numpy_dtype, other.const)

		def __str__(self):
		# structs are handled byte-wise
		result = "uint8_t"
		if self.const:
		result += " const"
		return result

		def __repr__(self):
		return str(self)

		def __hash__(self):
		return hash((self.numpy_dtype, self.const))

pystencils/include/PyStencilsField.h

deleted100644 → 0

+0 −19

Original line number	Diff line number	Diff line
		#pragma once

		extern "C++" {
		#ifdef __CUDA_ARCH__
		template <typename DTYPE_T, std::size_t DIMENSION> struct PyStencilsField {
		DTYPE_T *data;
		DTYPE_T shape[DIMENSION];
		DTYPE_T stride[DIMENSION];
		};
		#else
		#include <array>

		template <typename DTYPE_T, std::size_t DIMENSION> struct PyStencilsField {
		DTYPE_T *data;
		std::array<DTYPE_T, DIMENSION> shape;
		std::array<DTYPE_T, DIMENSION> stride;
		};
		#endif
		}

pystencils/include/philox_rand.h

deleted100644 → 0

+0 −104

Original line number	Diff line number	Diff line
		#include <cstdint>

		#ifndef __CUDA_ARCH__
		#define QUALIFIERS inline
		#else
		#define QUALIFIERS static __forceinline__ __device__
		#endif

		#define PHILOX_W32_0 (0x9E3779B9)
		#define PHILOX_W32_1 (0xBB67AE85)
		#define PHILOX_M4x32_0 (0xD2511F53)
		#define PHILOX_M4x32_1 (0xCD9E8D57)
		#define TWOPOW53_INV_DOUBLE (1.1102230246251565e-16)
		#define TWOPOW32_INV_FLOAT (2.3283064e-10f)

		typedef std::uint32_t uint32;
		typedef std::uint64_t uint64;


		QUALIFIERS uint32 mulhilo32(uint32 a, uint32 b, uint32* hip)
		{
		#ifndef __CUDA_ARCH__
		// host code
		uint64 product = ((uint64)a) * ((uint64)b);
		*hip = product >> 32;
		return (uint32)product;
		#else
		// device code
		*hip = __umulhi(a,b);
		return a*b;
		#endif
		}

		QUALIFIERS void _philox4x32round(uint32* ctr, uint32* key)
		{
		uint32 hi0;
		uint32 hi1;
		uint32 lo0 = mulhilo32(PHILOX_M4x32_0, ctr[0], &hi0);
		uint32 lo1 = mulhilo32(PHILOX_M4x32_1, ctr[2], &hi1);

		ctr[0] = hi1^ctr[1]^key[0];
		ctr[1] = lo1;
		ctr[2] = hi0^ctr[3]^key[1];
		ctr[3] = lo0;
		}

		QUALIFIERS void _philox4x32bumpkey(uint32* key)
		{
		key[0] += PHILOX_W32_0;
		key[1] += PHILOX_W32_1;
		}

		QUALIFIERS double _uniform_double_hq(uint32 x, uint32 y)
		{
		unsigned long long z = (unsigned long long)x ^
		((unsigned long long)y << (53 - 32));
		return z * TWOPOW53_INV_DOUBLE + (TWOPOW53_INV_DOUBLE/2.0);
		}


		QUALIFIERS void philox_double2(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3,
		uint32 key0, uint32 key1, double & rnd1, double & rnd2)
		{
		uint32 key[2] = {key0, key1};
		uint32 ctr[4] = {ctr0, ctr1, ctr2, ctr3};
		_philox4x32round(ctr, key); // 1
		_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 2
		_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 3
		_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 4
		_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 5
		_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 6
		_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 7
		_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 8
		_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 9
		_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 10

		rnd1 = _uniform_double_hq(ctr[0], ctr[1]);
		rnd2 = _uniform_double_hq(ctr[2], ctr[3]);
		}



		QUALIFIERS void philox_float4(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3,
		uint32 key0, uint32 key1,
		float & rnd1, float & rnd2, float & rnd3, float & rnd4)
		{
		uint32 key[2] = {key0, key1};
		uint32 ctr[4] = {ctr0, ctr1, ctr2, ctr3};
		_philox4x32round(ctr, key); // 1
		_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 2
		_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 3
		_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 4
		_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 5
		_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 6
		_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 7
		_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 8
		_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 9
		_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 10

		rnd1 = ctr[0] * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT/2.0f);
		rnd2 = ctr[1] * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT/2.0f);
		rnd3 = ctr[2] * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT/2.0f);
		rnd4 = ctr[3] * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT/2.0f);
		}
		No newline at end of file

pystencils/kerncraft_coupling/init.py

deleted100644 → 0

+0 −4

Original line number	Diff line number	Diff line
		from .generate_benchmark import generate_benchmark, run_c_benchmark
		from .kerncraft_interface import KerncraftParameters, PyStencilsKerncraftKernel

		__all__ = ['PyStencilsKerncraftKernel', 'KerncraftParameters', 'generate_benchmark', 'run_c_benchmark']

pystencils/kerncraft_coupling/generate_benchmark.py

deleted100644 → 0

+0 −210

Original line number	Diff line number	Diff line
		import os
		import subprocess

		from jinja2 import Template

		from pystencils.astnodes import PragmaBlock
		from pystencils.backends.cbackend import generate_c, get_headers
		from pystencils.cpu.cpujit import get_compiler_config, run_compile_step
		from pystencils.data_types import get_base_type
		from pystencils.include import get_pystencils_include_path
		from pystencils.sympyextensions import prod

		benchmark_template = Template("""
		#include "kerncraft.h"
		#include <stdlib.h>
		#include <stdint.h>
		#include <stdbool.h>
		#include <math.h>
		#include <stdio.h>

		{{ includes }}

		{%- if likwid %}
		#include <likwid.h>
		{%- endif %}

		#define RESTRICT __restrict__
		#define FUNC_PREFIX
		void dummy(void *);
		void timing(double* wcTime, double* cpuTime);
		extern int var_false;


		{{kernel_code}}


		int main(int argc, char **argv)
		{
		{%- if likwid %}
		likwid_markerInit();
		{%- endif %}

		{%- for field_name, dataType, size in fields %}

		// Initialization {{field_name}}
		double * {{field_name}} = (double ) aligned_malloc(sizeof({{dataType}}) {{size}}, 64);
		for (unsigned long long i = 0; i < {{size}}; ++i)
		{{field_name}}[i] = 0.23;

		if(var_false)
		dummy({{field_name}});

		{%- endfor %}



		{%- for constantName, dataType in constants %}

		// Constant {{constantName}}
		{{dataType}} {{constantName}};
		{{constantName}} = 0.23;
		if(var_false)
		dummy(& {{constantName}});

		{%- endfor %}

		{%- if likwid and openmp %}
		#pragma omp parallel
		{
		likwid_markerRegisterRegion("loop");
		#pragma omp barrier
		{%- elif likwid %}
		likwid_markerRegisterRegion("loop");
		{%- endif %}

		for(int warmup = 1; warmup >= 0; --warmup) {
		int repeat = 2;
		if(warmup == 0) {
		repeat = atoi(argv[1]);
		{%- if likwid %}
		likwid_markerStartRegion("loop");
		{%- endif %}
		}

		{%- if timing %}
		double wcStartTime, cpuStartTime, wcEndTime, cpuEndTime;
		timing(&wcStartTime, &cpuStartTime);
		{%- endif %}

		for (; repeat > 0; --repeat)
		{
		{{kernelName}}({{call_argument_list}});

		// Dummy calls
		{%- for field_name, dataType, size in fields %}
		if(var_false) dummy((void*){{field_name}});
		{%- endfor %}
		{%- for constantName, dataType in constants %}
		if(var_false) dummy((void*)&{{constantName}});
		{%- endfor %}
		}
		{%- if timing %}
		timing(&wcEndTime, &cpuEndTime);
		if( warmup == 0)
		printf("%e\\n", (wcEndTime - wcStartTime) / atoi(argv[1]) );
		{%- endif %}

		}

		{%- if likwid %}
		likwid_markerStopRegion("loop");
		{%- if openmp %}
		}
		{%- endif %}
		{%- endif %}

		{%- if likwid %}
		likwid_markerClose();
		{%- endif %}
		}
		""")


		def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
		"""Return C code of a benchmark program for the given kernel.

		Args:
		ast: the pystencils AST object as returned by create_kernel
		likwid: if True likwid markers are added to the code
		openmp: relevant only if likwid=True, to generated correct likwid initialization code
		timing: add timing output to the code, prints time per iteration to stdout

		Returns:
		C code as string
		"""
		accessed_fields = {f.name: f for f in ast.fields_accessed}
		constants = []
		fields = []
		call_parameters = []
		for p in ast.get_parameters():
		if not p.is_field_parameter:
		constants.append((p.symbol.name, str(p.symbol.dtype)))
		call_parameters.append(p.symbol.name)
		else:
		assert p.is_field_pointer, "Benchmark implemented only for kernels with fixed loop size"
		field = accessed_fields[p.field_name]
		dtype = str(get_base_type(p.symbol.dtype))
		fields.append((p.field_name, dtype, prod(field.shape)))
		call_parameters.append(p.field_name)

		header_list = get_headers(ast)
		includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])

		# Strip "#pragma omp parallel" from within kernel, because main function takes care of that
		# when likwid and openmp are enabled
		if likwid and openmp:
		if len(ast.body.args) > 0 and isinstance(ast.body.args[0], PragmaBlock):
		ast.body.args[0].pragma_line = ''

		args = {
		'likwid': likwid,
		'openmp': openmp,
		'kernel_code': generate_c(ast, dialect='c'),
		'kernelName': ast.function_name,
		'fields': fields,
		'constants': constants,
		'call_argument_list': ",".join(call_parameters),
		'includes': includes,
		'timing': timing,
		}
		return benchmark_template.render(**args)


		def run_c_benchmark(ast, inner_iterations, outer_iterations=3):
		"""Runs the given kernel with outer loop in C

		Args:
		ast:
		inner_iterations: timings are recorded around this many iterations
		outer_iterations: number of timings recorded

		Returns:
		list of times per iterations for each outer iteration
		"""
		import kerncraft

		benchmark_code = generate_benchmark(ast, timing=True)
		with open('bench.c', 'w') as f:
		f.write(benchmark_code)

		kerncraft_path = os.path.dirname(kerncraft.__file__)

		extra_flags = ['-I' + get_pystencils_include_path(),
		'-I' + os.path.join(kerncraft_path, 'headers')]

		compiler_config = get_compiler_config()
		compile_cmd = [compiler_config['command']] + compiler_config['flags'].split()
		compile_cmd += [*extra_flags,
		os.path.join(kerncraft_path, 'headers', 'timing.c'),
		os.path.join(kerncraft_path, 'headers', 'dummy.c'),
		'bench.c',
		'-o', 'bench',
		]
		run_compile_step(compile_cmd)

		results = []
		for _ in range(outer_iterations):
		benchmark_time = float(subprocess.check_output(['./bench', str(inner_iterations)]))
		results.append(benchmark_time)
		return results

pystencils/kerncraft_coupling/kerncraft_interface.py

deleted100644 → 0

+0 −182

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils/kernelcreation.py

deleted100644 → 0

+0 −269

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils/llvm/init.py

deleted100644 → 0

+0 −4

Original line number	Diff line number	Diff line
		from .kernelcreation import create_kernel
		from .llvmjit import make_python_function

		__all__ = ['create_kernel', 'make_python_function']

pystencils/llvm/control_flow.py

deleted100644 → 0

+0 −52

Original line number	Diff line number	Diff line
		import llvmlite.ir as ir


		class Loop(object):
		def __init__(self, builder, start_val, stop_val, step_val=1, loop_name='loop', phi_name="_phi"):
		self.builder = builder
		self.start_val = start_val
		self.stop_val = stop_val
		self.step_val = step_val
		self.loop_name = loop_name
		self.phi_name = phi_name

		def __enter__(self):
		self.loop_end, self.after, phi = self._for_loop(self.start_val, self.stop_val, self.step_val, self.loop_name,
		self.phi_name)
		return phi

		def _for_loop(self, start_val, stop_val, step_val, loop_name, phi_name):
		# TODO size of int??? unisgned???
		integer = ir.IntType(64)

		# Loop block
		pre_loop_bb = self.builder.block
		loop_bb = self.builder.append_basic_block(name='loop_' + loop_name)

		self.builder.branch(loop_bb)

		# Insert an explicit fall through from the current block to loop_bb
		self.builder.position_at_start(loop_bb)

		# Add phi
		phi = self.builder.phi(integer, name=phi_name)
		phi.add_incoming(start_val, pre_loop_bb)

		loop_end_bb = self.builder.append_basic_block(name=loop_name + "_end_bb")
		self.builder.position_at_start(loop_end_bb)

		next_var = self.builder.add(phi, step_val, name=loop_name + '_next_it')
		cond = self.builder.icmp_unsigned('<', next_var, stop_val, name=loop_name + "_cond")

		after_bb = self.builder.append_basic_block(name=loop_name + "_after_bb")

		self.builder.cbranch(cond, loop_bb, after_bb)
		phi.add_incoming(next_var, loop_end_bb)

		self.builder.position_at_end(loop_bb)

		return loop_end_bb, after_bb, phi

		def __exit__(self, exc_type, exc, exc_tb):
		self.builder.branch(self.loop_end)
		self.builder.position_at_end(self.after)

pystencils/llvm/kernelcreation.py

deleted100644 → 0

+0 −33

Original line number	Diff line number	Diff line
		from pystencils.llvm.llvmjit import make_python_function
		from pystencils.transformations import insert_casts


		def create_kernel(assignments, function_name="kernel", type_info=None, split_groups=(),
		iteration_slice=None, ghost_layers=None):
		"""
		Creates an abstract syntax tree for a kernel function, by taking a list of update rules.

		Loops are created according to the field accesses in the equations.

		Args:
		assignments: list of sympy equations, containing accesses to :class:`pystencils.field.Field`.
		Defining the update rules of the kernel
		function_name: name of the generated function - only important if generated code is written out
		type_info: a map from symbol name to a C type specifier. If not specified all symbols are assumed to
		be of type 'double' except symbols which occur on the left hand side of equations where the
		right hand side is a sympy Boolean which are assumed to be 'bool' .
		split_groups: Specification on how to split up inner loop into multiple loops. For details see
		transformation :func:`pystencils.transformation.split_inner_loop`
		iteration_slice: if not None, iteration is done only over this slice of the field
		ghost_layers: a sequence of pairs for each coordinate with lower and upper nr of ghost layers
		if None, the number of ghost layers is determined automatically and assumed to be equal for a
		all dimensions

		:return: :class:`pystencils.ast.KernelFunction` node
		"""
		from pystencils.cpu import create_kernel
		code = create_kernel(assignments, function_name, type_info, split_groups, iteration_slice, ghost_layers)
		code.body = insert_casts(code.body)
		code._compile_function = make_python_function
		code._backend = 'llvm'
		return code

pystencils/llvm/llvm.py

deleted100644 → 0

+0 −322

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils/llvm/llvmjit.py

deleted100644 → 0

+0 −226

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils/rng.py

deleted100644 → 0

+0 −128

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils/sympy_gmpy_bug_workaround.py

deleted100644 → 0

+0 −18

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/benchmark/SkylakeSP_Gold-5122_allinclusive.yaml

deleted100644 → 0

+0 −600

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/benchmark/benchmark.py

deleted100644 → 0

+0 −188

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/benchmark/generate.py

deleted100644 → 0

+0 −50

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/benchmark/iacaMarks.h

deleted100644 → 0

+0 −53

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/benchmark/main.c

deleted100644 → 0

+0 −11

Original line number	Diff line number	Diff line
		#include "iacaMarks.h"

		int main(int argc, char * argv[]){
		int a = 0;
		for(int i = 0; i < argc+100000; i++){
		IACA_START
		a += i;
		}
		IACA_END
		return a;
		}

pystencils_tests/kerncraft_inputs/2d-5pt.c

deleted100644 → 0

+0 −8

Original line number	Diff line number	Diff line
		double a[30][50][3];
		double b[30][50][3];
		double s;

		for(int j=1; j<30-1; ++j)
		for(int i=1; i<50-1; ++i)
		b[j][i] = ( a[j][i-1] + a[j][i+1]
		+ a[j-1][i] + a[j+1][i]) * s;

pystencils_tests/kerncraft_inputs/3d-7pt.c

deleted100644 → 0

+0 −10

Original line number	Diff line number	Diff line
		double a[M][N][N];
		double b[M][N][N];
		double s;

		for(int k=1; k<M-1; ++k)
		for(int j=1; j<N-1; ++j)
		for(int i=1; i<N-1; ++i)
		b[k][j][i] = ( a[k][j][i-1] + a[k][j][i+1]
		+ a[k][j-1][i] + a[k][j+1][i]
		+ a[k-1][j][i] + a[k+1][j][i]) * s;

pystencils_tests/kerncraft_inputs/default_machine_file.yaml

deleted100644 → 0

+0 −277

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_address_of.py

deleted100644 → 0

+0 −59

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_assignment_collection.py

deleted100644 → 0

+0 −29

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_basic_usage_llvm.ipynb

deleted100644 → 0

+0 −398

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_conditional_vec.py

deleted100644 → 0

+0 −64

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_cuda_known_functions.py

deleted100644 → 0

+0 −59

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_datahandling_parallel.py

deleted100644 → 0

+0 −66

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_destructuring_field_class.py

deleted100644 → 0

+0 −53

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_fd_derivation.ipynb

deleted100644 → 0

+0 −250

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_field.py

deleted100644 → 0

+0 −119

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_indexed_kernels.py

deleted100644 → 0

+0 −56

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_jacobi_llvm.py

deleted100644 → 0

+0 −54

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_kerncraft_coupling.py

deleted100644 → 0

+0 −130

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_phasefield_dentritic_3D.ipynb

deleted100644 → 0

+0 −357

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_random.py

deleted100644 → 0

+0 −43

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_size_and_layout_checks_llvm.py

deleted100644 → 0

+0 −81

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_sliced_iteration.py

deleted100644 → 0

+0 −55

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_small_block_benchmark.ipynb

deleted100644 → 0

+0 −184

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_stencils.py

deleted100644 → 0

+0 −1

Original line number	Diff line number	Diff line
		import pystencils as ps

pystencils_tests/test_types.py

deleted100644 → 0

+0 −21

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_vectorization.py

deleted100644 → 0

+0 −160

File deleted.

Preview size limit exceeded, changes collapsed.

pytest.ini

+29 −9

File changed.

Preview size limit exceeded, changes collapsed.

quicktest.py

0 → 100644

+22 −0

File added.

Preview size limit exceeded, changes collapsed.

release.sh

+2 −2

Original line number	Diff line number	Diff line
		@@ -8,6 +8,6 @@ read new_version

		git tag -s release/${new_version}
		git push origin master release/${new_version}
		python setup.py sdist bdist_wheel
		rm -rf dist
		python setup.py sdist
		twine upload dist/*

setup.py

+15 −113

File changed.

Preview size limit exceeded, changes collapsed.

pystencils/init.py→src/pystencils/init.py

+40 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

src/pystencils/_version.py

0 → 100644

+683 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils/alignedarray.py→src/pystencils/alignedarray.py

+29 −3

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/assignment.py→src/pystencils/assignment.py

+28 −46

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/astnodes.py→src/pystencils/astnodes.py

+246 −99

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/backends/init.py→src/pystencils/backends/init.py

+0 −6

Original line number	Diff line number	Diff line
		@@ -6,9 +6,3 @@ try:
		__all__.append('print_dot')
		except ImportError:
		pass

		try:
		from .llvm import generate_llvm # NOQA
		__all__.append('generate_llvm')
		except ImportError:
		pass

src/pystencils/backends/arm_instruction_sets.py

0 → 100644

+174 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils/backends/cbackend.py→src/pystencils/backends/cbackend.py

+911 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/backends/cuda_backend.py→src/pystencils/backends/cuda_backend.py

+68 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/backends/dot.py→src/pystencils/backends/dot.py

+10 −7

Original line number	Diff line number	Diff line
		import graphviz
		from graphviz import Digraph, lang
		try:
		from graphviz import Digraph
		import graphviz.quoting as quote
		except ImportError:
		from graphviz import Digraph
		import graphviz.lang as quote
		from sympy.printing.printer import Printer


		@@ -12,7 +17,7 @@ class DotPrinter(Printer):
		super(DotPrinter, self).__init__()
		self._node_to_str_function = node_to_str_function
		self.dot = Digraph(**kwargs)
		self.dot.quote_edge = lang.quote
		self.dot.quote_edge = quote.quote

		def _print_KernelFunction(self, func):
		self.dot.node(str(id(func)), style='filled', fillcolor='#a056db', label=self._node_to_str_function(func))
		@@ -50,22 +55,20 @@ class DotPrinter(Printer):


		def __shortened(node):
		from pystencils.astnodes import LoopOverCoordinate, KernelFunction, SympyAssignment, Block, Conditional
		from pystencils.astnodes import LoopOverCoordinate, KernelFunction, SympyAssignment, Conditional
		if isinstance(node, LoopOverCoordinate):
		return "Loop over dim %d" % (node.coordinate_to_loop_over,)
		elif isinstance(node, KernelFunction):
		params = node.get_parameters()
		param_names = [p.field_name for p in params if p.is_field_pointer]
		param_names += [p.symbol.name for p in params if not p.is_field_parameter]
		return "Func: %s (%s)" % (node.function_name, ",".join(param_names))
		return f"Func: {node.function_name} ({','.join(param_names)})"
		elif isinstance(node, SympyAssignment):
		return repr(node.lhs)
		elif isinstance(node, Block):
		return "Block" + str(id(node))
		elif isinstance(node, Conditional):
		return repr(node)
		else:
		raise NotImplementedError("Cannot handle node type %s" % (type(node),))
		raise NotImplementedError(f"Cannot handle node type {type(node)}")


		def print_dot(node, view=False, short=False, **kwargs):

src/pystencils/backends/json.py

0 → 100644

+82 −0

File added.

Preview size limit exceeded, changes collapsed.

src/pystencils/backends/ppc_instruction_sets.py

0 → 100644

+106 −0

File added.

Preview size limit exceeded, changes collapsed.

src/pystencils/backends/riscv_instruction_sets.py

0 → 100644

+111 −0

File added.

Preview size limit exceeded, changes collapsed.

src/pystencils/backends/simd_instruction_sets.py

0 → 100644

+126 −0

File added.

Preview size limit exceeded, changes collapsed.

src/pystencils/backends/x86_instruction_sets.py

0 → 100644

+178 −0

File added.

Preview size limit exceeded, changes collapsed.

src/pystencils/bit_masks.py

0 → 100644

+53 −0

Original line number	Diff line number	Diff line
		import sympy as sp
		# from pystencils.typing import get_type_of_expression


		# noinspection PyPep8Naming
		class flag_cond(sp.Function):
		"""Evaluates a flag condition on a bit mask, and returns the value of one of two expressions,
		depending on whether the flag is set.

		Three argument version:
		```
		flag_cond(flag_bit, mask, expr) = expr if (flag_bit is set in mask) else 0
		```

		Four argument version:
		```
		flag_cond(flag_bit, mask, expr_then, expr_else) = expr_then if (flag_bit is set in mask) else expr_else
		```
		"""

		nargs = (3, 4)

		def __new__(cls, flag_bit, mask_expression, *expressions):

		# TODO Jan reintroduce checking
		# flag_dtype = get_type_of_expression(flag_bit)
		# if not flag_dtype.is_int():
		# raise ValueError('Argument flag_bit must be of integer type.')
		#
		# mask_dtype = get_type_of_expression(mask_expression)
		# if not mask_dtype.is_int():
		# raise ValueError('Argument mask_expression must be of integer type.')

		return super().__new__(cls, flag_bit, mask_expression, *expressions)

		def to_c(self, print_func):
		flag_bit = self.args[0]
		mask = self.args[1]

		then_expression = self.args[2]

		flag_bit_code = print_func(flag_bit)
		mask_code = print_func(mask)
		then_code = print_func(then_expression)

		code = f"(({mask_code}) >> ({flag_bit_code}) & 1) * ({then_code})"

		if len(self.args) > 3:
		else_expression = self.args[3]
		else_code = print_func(else_expression)
		code += f" + (({mask_code}) >> ({flag_bit_code}) ^ 1) * ({else_code})"

		return code

Source

Target

Files

Some changes are not shown.