compare_fix to master · pycodegen / pystencils

Some changes are not shown.

For a faster browsing experience, only 56 of 288 files are shown. Download one of the files below to see all changes.

.flake8

+4 −4

Original line number	Original line	Diff line number	Diff line
	[flake8]		[flake8]
	max-line-length=120		max-line-length=120
	exclude=pystencils/jupyter.py,		exclude=src/pystencils/jupyter.py,
	pystencils/plot.py		src/pystencils/plot.py
	pystencils/session.py		src/pystencils/session.py
	ignore = W293 W503 W291 C901		ignore = W293 W503 W291 C901 E741

.gitattributes

0 → 100644

+1 −0

Original line number	Original line	Diff line number	Diff line
			src/pystencils/_version.py export-subst

.gitignore

+17 −2

Original line number	Original line	Diff line number	Diff line
	__pycache__		__pycache__
	.ipynb_checkpoints		.ipynb_checkpoints
	.coverage		.coverage*
	*.pyc		*.pyc
	*.vti		*.vti
	/build		/build
	/dist		/dist
	/*.egg-info		*.egg-info
	.cache		.cache
	_build		_build
			/html_doc
	/.idea		/.idea
			.vscode
	.cache		.cache
	_local_tmp		_local_tmp
	RELEASE-VERSION		RELEASE-VERSION
	test-report		test-report
			src/pystencils/boundaries/createindexlistcython.c
			src/pystencils/boundaries/createindexlistcython.*.so
			tests/tmp
			tests/var
			tests/kerncraft_inputs/.2d-5pt.c_kerncraft/
			tests/kerncraft_inputs/.3d-7pt.c_kerncraft/
			report.xml
			coverage_report/


			# macOS
			**/.DS_Store
			*.uuid

.gitlab-ci.yml

+241 −63

Original line number	Original line	Diff line number	Diff line
	stages:		stages:
			- pretest
	- test		- test
			- nightly
			- docs
	- deploy		- deploy


			# -------------------------- Templates ------------------------------------------------------------------------------------

			# Base configuration for jobs meant to run at every commit
			.every-commit:
			rules:
			- if: $CI_PIPELINE_SOURCE != "schedule"

			# Configuration for jobs meant to run on each commit to pycodegen/pystencils/master
			.every-commit-master:
			rules:
			- if: '$CI_PIPELINE_SOURCE != "schedule" && $CI_PROJECT_PATH == "pycodegen/pystencils" && $CI_COMMIT_BRANCH == "master"'

			# Base configuration for jobs meant to run at a schedule
			.scheduled:
			rules:
			- if: $CI_PIPELINE_SOURCE == "schedule"

	# -------------------------- Tests ------------------------------------------------------------------------------------		# -------------------------- Tests ------------------------------------------------------------------------------------

	# Normal test - runs on every commit all but "long run" tests		# Normal test - runs on every commit all but "long run" tests
	tests-and-coverage:		tests-and-coverage:
	stage: test		stage: pretest
	except:		extends: .every-commit
	variables:		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full:cupy12.3
	- $ENABLE_NIGHTLY_BUILDS		before_script:
	image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full		- pip install -e .
	script:		script:
			- env
			- pip list
	- export NUM_CORES=$(nproc --all)		- export NUM_CORES=$(nproc --all)
	- mkdir -p ~/.config/matplotlib		- mkdir -p ~/.config/matplotlib
	- echo "backend:template" > ~/.config/matplotlib/matplotlibrc		- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
	- mkdir public		- mkdir public
	- py.test -v -n $NUM_CORES --cov-report html --cov-report term --cov=. -m "not longrun" --html test-report/index.html		- pytest -v -n $NUM_CORES --cov-report html --cov-report xml --cov-report term --cov=. -m "not longrun" --html test-report/index.html --junitxml=report.xml
			- python -m coverage xml
	tags:		tags:
	- docker		- docker
	- cuda		- cuda11
	- AVX		- AVX
			coverage: /Total coverage:\s\d+.\d+\%/
	artifacts:		artifacts:
	when: always		when: always
	paths:		paths:
	- coverage_report		- coverage_report
	- test-report		- test-report
			reports:
			coverage_report:
			coverage_format: cobertura
			path: coverage.xml
			junit: report.xml

	# Nightly test - runs "long run" jobs only		# Normal test with longruns
	test-longrun:		tests-and-coverage-with-longrun:
	stage: test		stage: test
	only:		when: manual
	variables:		allow_failure: true
	- $ENABLE_NIGHTLY_BUILDS		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full:cupy12.3
	image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full		before_script:
			- pip install sympy --upgrade
			- pip install -e .
	script:		script:
			- env
			- pip list
	- export NUM_CORES=$(nproc --all)		- export NUM_CORES=$(nproc --all)
	- mkdir -p ~/.config/matplotlib		- mkdir -p ~/.config/matplotlib
	- echo "backend:template" > ~/.config/matplotlib/matplotlibrc		- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
	- py.test -v -n $NUM_CORES --cov-report html --cov-report term --cov=. --html test-report/index.html		- mkdir public
			- py.test -v -n $NUM_CORES
			tags:
			- docker
			- cuda11
			- AVX

			# pipeline with latest python version
			latest-python:
			stage: test
			extends: .every-commit
			image: i10git.cs.fau.de:5005/pycodegen/pycodegen/latest_python
			before_script:
			- pip install -e .
			script:
			- env
			- pip list
			- pip install -e .
			- export NUM_CORES=$(nproc --all)
			- mkdir -p ~/.config/matplotlib
			- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
			- mkdir public
			- py.test -v -n $NUM_CORES -m "not longrun" --junitxml=report.xml
	tags:		tags:
	- docker		- docker
	- cuda
	- AVX		- AVX
	artifacts:		artifacts:
	paths:		when: always
	- coverage_report		reports:
	- test-report		junit: report.xml


	# Minimal tests in windows environment		# Minimal tests in windows environment
	minimal-windows:		#minimal-windows:
			# stage: test
			# tags:
			# - win
			# script:
			# - export NUM_CORES=$(nproc --all)
			# - source /cygdrive/c/Users/build/Miniconda3/Scripts/activate
			# - source activate pystencils
			# - pip install joblib
			# - pip list
			# - python -c "import numpy"
			# - py.test -v -m "not (notebook or longrun)"

			ubuntu:
	stage: test		stage: test
	except:		extends: .every-commit
	variables:		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/ubuntu
	- $ENABLE_NIGHTLY_BUILDS		before_script:
	tags:		- ln -s /usr/include/locale.h /usr/include/xlocale.h
	- win		- pip3 install -e .
	script:		script:
	- source /cygdrive/c/Users/build/Miniconda3/Scripts/activate		- export NUM_CORES=$(nproc --all)
	- source activate pystencils_dev		- mkdir -p ~/.config/matplotlib
			- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
			- sed -i 's/--doctest-modules //g' pytest.ini
	- env		- env
	- conda env list		- pip list
	- python -c "import numpy"		- pytest -v -n $NUM_CORES -m "not longrun" --junitxml=report.xml
	- python setup.py quicktest		tags:
			- docker
			- cuda11
			- AVX
			artifacts:
			when: always
			reports:
			junit: report.xml

	minimal-ubuntu:		.multiarch_template:
	stage: test		stage: test
	except:		extends: .every-commit
	variables:		allow_failure: true
	- $ENABLE_NIGHTLY_BUILDS		before_script: &multiarch_before_script
	image: i10git.cs.fau.de:5005/pycodegen/pycodegen/minimal_ubuntu		# - pip3 install -v .
			- export PYTHONPATH=src
			- python3 -c "import pystencils as ps; ps.cpu.cpujit.read_config()"
			- sed -i '/^fail_under.*/d' pytest.ini
	script:		script:
	- python3 setup.py quicktest		- export NUM_CORES=$(nproc --all)
			- mkdir -p ~/.config/matplotlib
			- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
			- sed -i 's/--doctest-modules //g' pytest.ini
			- env
			- pip3 list
			- python3 -m pytest -v -n $NUM_CORES --reruns 2 --cov-report html --cov-report xml --cov=. --junitxml=report.xml tests/test_vec.py tests/test_random.py tests/test_half_precision.py
			- python3 -m coverage xml
	tags:		tags:
	- docker		- docker
			- multiarch
			artifacts:
			when: always
			paths:
			- coverage_report
			reports:
			coverage_report:
			coverage_format: cobertura
			path: coverage.xml
			junit: report.xml

	minimal-conda:		arm64v8:
	stage: test		extends: .multiarch_template
	except:		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/arm64
			variables:
			QEMU_CPU: "cortex-a76"
			before_script:
			- *multiarch_before_script

			ppc64le:
			extends: .multiarch_template
			image: i10git.cs.fau.de:5005/pycodegen/pycodegen/ppc64le
			before_script:
			- *multiarch_before_script

			arm64v9:
			# SVE support is still unreliable in GCC 13 (incorrect code for fixed-width vectors, internal compiler errors).
			# For half precision Clang is necessary
			extends: .multiarch_template
			image: i10git.cs.fau.de:5005/pycodegen/pycodegen/arm64
	variables:		variables:
	- $ENABLE_NIGHTLY_BUILDS		# disable pointer authentication to speed up emulation by 3x
			QEMU_CPU: "max,pauth-impdef=on"
			before_script:
			- *multiarch_before_script
			- sed -i s/g\+\+/clang++/g ~/.config/pystencils/config.json

			riscv64:
			# RISC-V vector extension support is incomplete in GCC 13.
			extends: .multiarch_template
			image: i10git.cs.fau.de:5005/pycodegen/pycodegen/riscv64
			variables:
			# explicitly set SIMD as detection requires QEMU >= 8.1
			PYSTENCILS_SIMD: "rvv"
			QEMU_CPU: "rv64,v=true,zicboz=true"
			before_script:
			- *multiarch_before_script
			- sed -i 's/march=native/march=rv64imfdvzicboz/g' ~/.config/pystencils/config.json
			- sed -i s/g\+\+/clang++/g ~/.config/pystencils/config.json

			minimal-conda:
			stage: pretest
			extends: .every-commit
	image: i10git.cs.fau.de:5005/pycodegen/pycodegen/minimal_conda		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/minimal_conda
			before_script:
			- pip install -e .
	script:		script:
	- python setup.py quicktest		- python quicktest.py
	tags:		tags:
	- docker		- docker
			- cuda


	minimal-sympy-master:		minimal-sympy-master:
	stage: test		stage: test
	except:		extends: .every-commit
	variables:
	- $ENABLE_NIGHTLY_BUILDS
	image: i10git.cs.fau.de:5005/pycodegen/pycodegen/minimal_conda		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/minimal_conda
			before_script:
			- pip install -e .
	script:		script:
	- python -m pip install --upgrade git+https://github.com/sympy/sympy.git		- python -m pip install --upgrade git+https://github.com/sympy/sympy.git
	- python setup.py quicktest		- python quicktest.py
	allow_failure: true		allow_failure: true
	tags:		tags:
	- docker		- docker
			- cuda


	pycodegen-integration:		pycodegen-integration:
	image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full
	stage: test		stage: test
	when: manual		when: manual
			allow_failure: true
	script:		script:
	- git clone https://gitlab-ci-token:${CI_JOB_TOKEN}@i10git.cs.fau.de/pycodegen/pycodegen.git		- git clone https://gitlab-ci-token:${CI_JOB_TOKEN}@i10git.cs.fau.de/pycodegen/pycodegen.git
	- cd pycodegen		- cd pycodegen
	@@ -118,50 +257,89 @@ pycodegen-integration:
	- git fetch test		- git fetch test
	- git reset --hard $CI_COMMIT_SHA		- git reset --hard $CI_COMMIT_SHA
	- cd ..		- cd ..
	- export PYTHONPATH=`pwd`/pystencils:`pwd`/lbmpy:`pwd`/pygrandchem:`pwd`/pystencils_walberla:`pwd`/lbmpy_walberla		- pip install -e pystencils/
			- pip install -e lbmpy/
			- cmake --version
	- ./install_walberla.sh		- ./install_walberla.sh
	- export NUM_CORES=$(nproc --all)		- export NUM_CORES=$(nproc --all)
	- mkdir -p ~/.config/matplotlib		- mkdir -p ~/.config/matplotlib
	- echo "backend:template" > ~/.config/matplotlib/matplotlibrc		- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
	- cd pystencils		- cd pystencils
	- py.test -v -n $NUM_CORES .		- py.test -v -n $NUM_CORES --junitxml=report.xml .
	- cd ../lbmpy		- cd ../lbmpy
	- py.test -v -n $NUM_CORES .		- py.test -v -n $NUM_CORES --junitxml=report.xml .
	- cd ../pygrandchem
	- py.test -v -n $NUM_CORES .
	- cd ../walberla/build/		- cd ../walberla/build/
	- make CodegenJacobiCPU CodegenJacobiGPU MicroBenchmarkGpuLbm LbCodeGenerationExample		- make -j $NUM_CORES CodegenJacobiCPU CodegenJacobiGPU CodegenPoissonCPU CodegenPoissonGPU MicroBenchmarkGpuLbm LbCodeGenerationExample
			- make -j $NUM_CORES multiphaseCPU multiphaseGPU FluctuatingMRT FlowAroundSphereCodeGen FieldLayoutAndVectorizationTest GeneratedOutflowBC
			- cd apps/benchmarks/UniformGridGPU
			- make -j $NUM_CORES
			- cd ../UniformGridCPU
			- make -j $NUM_CORES
	tags:		tags:
	- docker		- docker
	- cuda		- cuda11
	- AVX		- AVX
			artifacts:
			when: always
			reports:
			junit: pycodegen/*/report.xml


			# -------------------- Scheduled Tasks --------------------------------------------------------------------------


			# Nightly test against the latest (pre-release) version of SymPy published on PyPI
			nightly-sympy:
			stage: nightly
			needs: []
			extends: .scheduled
			image: i10git.cs.fau.de:5005/pycodegen/pycodegen/latest_python
			before_script:
			- pip install -e .
			- pip install --upgrade --pre sympy
			script:
			- env
			- pip list
			- export NUM_CORES=$(nproc --all)
			- mkdir -p ~/.config/matplotlib
			- echo "backend:template" > ~/.config/matplotlib/matplotlibrc
			- mkdir public
			- pytest -v -n $NUM_CORES -m "not longrun" --junitxml=report.xml
			tags:
			- docker
			- AVX
			- cuda
			artifacts:
			when: always
			reports:
			junit: report.xml

	# -------------------- Linter & Documentation --------------------------------------------------------------------------		# -------------------- Linter & Documentation --------------------------------------------------------------------------


	flake8-lint:		flake8-lint:
	stage: test		stage: pretest
	except:		extends: .every-commit
	variables:
	- $ENABLE_NIGHTLY_BUILDS
	image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full
	script:		script:
	- flake8 pystencils		- flake8 src/pystencils
	tags:		tags:
	- docker		- docker
	- cuda


	build-documentation:		build-documentation:
	stage: test		stage: docs
	image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full		extends: .every-commit
			image: i10git.cs.fau.de:5005/pycodegen/pycodegen/documentation
			needs: []
			before_script:
			- pip install -e .
	script:		script:
	- export PYTHONPATH=`pwd`
	- mkdir html_doc		- mkdir html_doc
			- sphinx-build -b html doc html_doc
	- sphinx-build -W -b html doc html_doc		- sphinx-build -W -b html doc html_doc
	tags:		tags:
	- docker		- docker
	- cuda
	artifacts:		artifacts:
	paths:		paths:
	- html_doc		- html_doc
	@@ -169,7 +347,9 @@ build-documentation:

	pages:		pages:
	image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full		image: i10git.cs.fau.de:5005/pycodegen/pycodegen/full
			extends: .every-commit-master
	stage: deploy		stage: deploy
			needs: ["tests-and-coverage", "build-documentation"]
	script:		script:
	- ls -l		- ls -l
	- mv coverage_report html_doc		- mv coverage_report html_doc
	@@ -179,5 +359,3 @@ pages:
	- public		- public
	tags:		tags:
	- docker		- docker
	only:
	- master@pycodegen/pystencils

AUTHORS.txt

+6 −5

Original line number	Original line	Diff line number	Diff line
	@@ -3,12 +3,13 @@ Contributors:
	-------------		-------------

	- Martin Bauer <martin.bauer@fau.de>		- Martin Bauer <martin.bauer@fau.de>
			- Markus Holzer <markus.holzer@fau.de>
	- Stephan Seitz <stephan.seitz@fau.de>		- Stephan Seitz <stephan.seitz@fau.de>
			- Michael Kuron <mkuron@icp.uni-stuttgart.de>
	- Jan Hönig <jan.hoenig@fau.de>		- Jan Hönig <jan.hoenig@fau.de>
	- Nils Kohl <nils.kohl@fau.de>
	- Julian Hammer <julian.hammer@fau.de>		- Julian Hammer <julian.hammer@fau.de>
	- Christian Godenschwager <christian.godenschwager@fau.de>		- Nils Kohl <nils.kohl@fau.de>
	- Markus Holzer <markus.holzer@fau.de>		- Frederik Hennig <frederik.hennig@fau.de>
	- Michael Kuron <mkuron@icp.uni-stuttgart.de>
	- Dominik Ernst <dominik.ernst@fau.de>		- Dominik Ernst <dominik.ernst@fau.de>
	- João Victor Tozatti Risso <joaovictortr@protonmail.com>		- Christian Godenschwager <christian.godenschwager@fau.de>
			- Dominik Thoennes <dominik.thoennes@fau.de>

CHANGELOG.md

0 → 100644

+7 −0

Original line number	Original line	Diff line number	Diff line
			# Change Log

			## Unreleased

			### Removed
			* LLVM backend because it was not used much and not good integrated in pystencils.
			* OpenCL backend because it was not used much and not good integrated in pystencils.

CONTRIBUTING.md

0 → 100644

+87 −0

Original line number	Original line	Diff line number	Diff line
			# Contributing

			Contributions to pystencils are always welcome, and they are greatly appreciated!
			A list of open problems can be found [here]( https://i10git.cs.fau.de/pycodegen/pystencils/-/issues).
			Of course, it is also always appreciated to bring own ideas and problems to the community!


			Please submit all contributions to the official [GitLab repository](https://i10git.cs.fau.de/pycodegen/pystencils) in the form of a Merge Request. Please do not submit git diffs or files containing the changes.
			There also exists a GitHub repository, which is only a mirror to the GitLab repository. Contributions to the GitHub repository are not considered.

			`pystencils` is an open-source python package under the license of AGPL3. Thus we consider the act of contributing to the code by submitting a Merge Request as the "Sign off" or agreement to the AGPL3 license.

			You can contribute in many different ways:

			## Types of Contributions

			### Report Bugs

			Report bugs at [https://i10git.cs.fau.de/pycodegen/pystencils/-/issues](https://i10git.cs.fau.de/pycodegen/pystencils/-/issues).

			For pystencils, it is often necessary to provide the python and [SymPy](https://www.sympy.org/en/index.html) versions used and hardware information like the
			processor architecture and the compiler version used to compile the generated kernels.

			### Fix Issues

			Look through the GitLab issues. Different tags are indicating the status of the issues.
			The "bug" tag indicates problems with pystencils, while the "feature" tag shows ideas that should be added in the future.

			### Write Documentation

			The documentation of pystencils can be found [here](https://pycodegen.pages.i10git.cs.fau.de/pystencils/). Jupyter notebooks are used to provide an
			interactive start to pystencils. It is always appreciated if new document notebooks are provided
			since this helps others a lot.

			## Get Started!

			Ready to contribute? Here is how to set up `pystencils` for local development.

			1. Fork the `pystencils` repo on GitLab.
			2. Clone your fork locally:
			```bash
			$ git clone https://i10git.cs.fau.de/your-name/pystencils
			```
			3. Install your local copy into a virtualenv. It is also recommended to use anaconda or miniconda to manage the python environments.
			```bash
			$ mkvirtualenv pystencils
			$ cd pystencils/
			$ pip install -e .
			```
			4. Create a branch for local development:
			```bash
			$ git checkout -b name-of-your-bugfix-or-feature
			```
			Now you can make your changes locally.

			5. When you're done making changes, check that your changes pass flake8 and the
			tests
			```bash
			$ flake8 pystencils
			$ py.test -v -n $NUM_CORES -m "not longrun" .

			```

			To get all packages needed for development, a requirements list can be found [here](https://i10git.cs.fau.de/pycodegen/pycodegen/-/blob/master/conda_environment_dev.yml). This includes flake8 and pytest.

			6. Commit your changes and push your branch to GitHub::
			```bash
			$ git add .
			$ git commit -m "Your detailed description of your changes."
			$ git push origin name-of-your-bugfix-or-feature
			```
			7. Submit a Merge Request on GitLab.

			## Merge Request Guidelines

			Before you submit a Merge Request, check that it meets these guidelines:

			1. All functionality that is implemented through this Merge Request should be covered by unit tests. These are implemented in `pystencil_tests`
			2. If the Merge Request adds functionality, the docs should be updated. Put your new functionality into a function with a docstring.
			3. If you have a maintainer status for `pystencils`, you can merge Merge Requests to the master branch. However, every Merge Request needs to be reviewed by another developer. Thus it is not allowed to merge a Merge Request, which is submitted by oneself.

			## Tips

			To run a subset of tests:
			```bash
			$ py.test my_test.py
			```
			No newline at end of file

MANIFEST.in

+3 −3

Original line number	Original line	Diff line number	Diff line
	include README.md		include AUTHORS.txt
	include COPYING.txt		include CONTRIBUTING.md
	include RELEASE-VERSION		include CHANGELOG.md

README.md

+28 −14

Original line number	Original line	Diff line number	Diff line
	@@ -2,16 +2,16 @@ pystencils
	==========		==========

	[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/mabau/pystencils/master?filepath=doc%2Fnotebooks)		[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/mabau/pystencils/master?filepath=doc%2Fnotebooks)
	[![Docs](https://img.shields.io/badge/read-the_docs-brightgreen.svg)](http://pycodegen.pages.walberla.net/pystencils)		[![Docs](https://img.shields.io/badge/read-the_docs-brightgreen.svg)](https://pycodegen.pages.i10git.cs.fau.de/pystencils)
	[![pypi-package](https://badge.fury.io/py/pystencils.svg)](https://badge.fury.io/py/pystencils)		[![pypi-package](https://badge.fury.io/py/pystencils.svg)](https://badge.fury.io/py/pystencils)
	[![pipeline status](https://i10git.cs.fau.de/pycodegen/pystencils/badges/master/pipeline.svg)](https://i10git.cs.fau.de/pycodegen/pystencils/commits/master)		[![pipeline status](https://i10git.cs.fau.de/pycodegen/pystencils/badges/master/pipeline.svg)](https://i10git.cs.fau.de/pycodegen/pystencils/commits/master)
	[![coverage report](https://i10git.cs.fau.de/pycodegen/pystencils/badges/master/coverage.svg)](http://pycodegen.pages.walberla.net/pystencils/coverage_report)		[![coverage report](https://i10git.cs.fau.de/pycodegen/pystencils/badges/master/coverage.svg)](http://pycodegen.pages.i10git.cs.fau.de/pystencils/coverage_report)

	Run blazingly fast stencil codes on numpy arrays.		Run blazingly fast stencil codes on numpy arrays.

	pystencils uses sympy to define stencil operations, that can be executed on numpy arrays.		pystencils uses sympy to define stencil operations, that can be executed on numpy arrays.
	Exploiting the stencil structure makes pystencils run faster than normal numpy code and even as Cython and numba,		Exploiting the stencil structure makes pystencils run faster than normal numpy code and even as Cython and numba,
	[as demonstrated in this notebook](http://pycodegen.pages.walberla.net/pystencils/notebooks/demo_benchmark.html).		[as demonstrated in this notebook](https://pycodegen.pages.i10git.cs.fau.de/pystencils/notebooks/demo_benchmark.html).


	Here is a code snippet that computes the average of neighboring cells:		Here is a code snippet that computes the average of neighboring cells:
	@@ -33,15 +33,15 @@ kernel(f=f_arr, g=g_arr)
	It comes with automatic finite difference discretization for PDEs:		It comes with automatic finite difference discretization for PDEs:

	```python		```python
			import pystencils as ps
			import sympy as sp

	c, v = ps.fields("c, v(2): [2D]")		c, v = ps.fields("c, v(2): [2D]")
	adv_diff_pde = ps.fd.transient(c) - ps.fd.diffusion(c, sp.symbols("D")) + ps.fd.advection(c, v)		adv_diff_pde = ps.fd.transient(c) - ps.fd.diffusion(c, sp.symbols("D")) + ps.fd.advection(c, v)
	discretize = ps.fd.Discretization2ndOrder(dx=1, dt=0.01)		discretize = ps.fd.Discretization2ndOrder(dx=1, dt=0.01)
	discretization = discretize(adv_diff_pde)		discretization = discretize(adv_diff_pde)
	```		```

	Look at the [documentation](http://pycodegen.pages.walberla.net/pystencils) to learn more.


	Installation		Installation
	------------		------------

	@@ -52,12 +52,10 @@ pip install pystencils[interactive]
	Without `[interactive]` you get a minimal version with very little dependencies.		Without `[interactive]` you get a minimal version with very little dependencies.

	All options:		All options:
	- `gpu`: use this if an Nvidia GPU is available and CUDA is installed		- `gpu`: use this if an NVIDIA or AMD GPU is available and CUDA or ROCm is installed
	- `opencl`: basic OpenCL support (experimental)
	- `alltrafos`: pulls in additional dependencies for loop simplification e.g. libisl		- `alltrafos`: pulls in additional dependencies for loop simplification e.g. libisl
	- `bench_db`: functionality to store benchmark result in object databases		- `bench_db`: functionality to store benchmark result in object databases
	- `interactive`: installs dependencies to work in Jupyter including image I/O, plotting etc.		- `interactive`: installs dependencies to work in Jupyter including image I/O, plotting etc.
	- `autodiff`: enable derivation of adjoint kernels and generation of Torch/Tensorflow operations
	- `doc`: packages to build documentation		- `doc`: packages to build documentation

	Options can be combined e.g.		Options can be combined e.g.
	@@ -65,9 +63,25 @@ Options can be combined e.g.
	pip install pystencils[interactive, gpu, doc]		pip install pystencils[interactive, gpu, doc]
	```		```

			pystencils is also fully compatible with Windows machines. If working with visual studio and cupy makes sure to run example files first to ensure that cupy can find the compiler's executable.

	Documentation		Documentation
	-------------		-------------

	Read the docs [here](http://pycodegen.pages.walberla.net/pystencils) and		Read the docs [here](https://pycodegen.pages.i10git.cs.fau.de/pystencils) and
	check out the Jupyter notebooks in `doc/notebooks`.		check out the Jupyter notebooks in `doc/notebooks`. The Changelog of pystencils can be found [here](https://i10git.cs.fau.de/pycodegen/pystencils/-/blob/master/CHANGELOG.md).

			Authors
			-------

			Many thanks go to the [contributors](https://i10git.cs.fau.de/pycodegen/pystencils/-/blob/master/AUTHORS.txt) of pystencils.

			### Please cite us

			If you use pystencils in a publication, please cite the following articles:

			Overview:
			- M. Bauer et al, Code Generation for Massively Parallel Phase-Field Simulations. Association for Computing Machinery, 2019. https://doi.org/10.1145/3295500.3356186

			Performance Modelling:
			- D. Ernst et al, Analytical performance estimation during code generation on modern GPUs. Journal of Parallel and Distributed Computing, 2023. https://doi.org/10.1016/j.jpdc.2022.11.003

binder/environment.yml

+3 −3

Original line number	Original line	Diff line number	Diff line
	@@ -7,14 +7,14 @@
	# conda env create -f conda_environment_user.yml		# conda env create -f conda_environment_user.yml
	# . activate pystencils		# . activate pystencils
	#		#
	# If you have CUDA installed and want to use your GPU, uncomment the last line to install pycuda		# If you have CUDA or ROCm installed and want to use your GPU, uncomment the last line to install cupy
	#		#
	# ----------------------------------------------------------------------------------------------------------------------		# ----------------------------------------------------------------------------------------------------------------------

	name: pystencils		name: pystencils
	dependencies:		dependencies:
	# Basic dependencies:		# Basic dependencies:
	- python >= 3.6		- python >= 3.8
	- numpy		- numpy
	- sympy >= 1.1		- sympy >= 1.1
	- appdirs # to find default cache directory on each platform		- appdirs # to find default cache directory on each platform
	@@ -32,4 +32,4 @@ dependencies:
	- ipy_table # HTML tables for jupyter notebooks		- ipy_table # HTML tables for jupyter notebooks
	- pyevtk # VTK output for serial simulations		- pyevtk # VTK output for serial simulations
	- blitzdb # file-based No-SQL database to store simulation results		- blitzdb # file-based No-SQL database to store simulation results
	#- pycuda # add this if you have CUDA installed		#- cupy # add this if you have CUDA or ROCm installed

conftest.py

+47 −32

Original line number	Original line	Diff line number	Diff line
	import os		import os
	import pytest
	import tempfile
	import runpy		import runpy
	import sys		import sys
			import tempfile
	import warnings		import warnings

			import nbformat
			import pytest
			from nbconvert import PythonExporter

			from pystencils.boundaries.createindexlist import * # NOQA
	# Trigger config file reading / creation once - to avoid race conditions when multiple instances are creating it		# Trigger config file reading / creation once - to avoid race conditions when multiple instances are creating it
	# at the same time		# at the same time
	from pystencils.cpu import cpujit		from pystencils.cpu import cpujit
	@@ -15,12 +20,17 @@ try:
	pyximport.install(language_level=3)		pyximport.install(language_level=3)
	except ImportError:		except ImportError:
	pass		pass
	from pystencils.boundaries.createindexlistcython import * # NOQA


	SCRIPT_FOLDER = os.path.dirname(os.path.realpath(__file__))		SCRIPT_FOLDER = os.path.dirname(os.path.realpath(__file__))
	sys.path.insert(0, os.path.abspath('pystencils'))		sys.path.insert(0, os.path.abspath('pystencils'))

			# the Ubuntu pipeline uses an older version of pytest which uses deprecated functionality.
			# This leads to many warinings in the test and coverage pipeline.
			pytest_numeric_version = [int(x, 10) for x in pytest.__version__.split('.')]
			pytest_numeric_version.reverse()
			pytest_version = sum(x * (100 ** i) for i, x in enumerate(pytest_numeric_version))


	def add_path_to_ignore(path):		def add_path_to_ignore(path):
	if not os.path.exists(path):		if not os.path.exists(path):
	@@ -29,46 +39,49 @@ def add_path_to_ignore(path):
	collect_ignore += [os.path.join(SCRIPT_FOLDER, path, f) for f in os.listdir(os.path.join(SCRIPT_FOLDER, path))]		collect_ignore += [os.path.join(SCRIPT_FOLDER, path, f) for f in os.listdir(os.path.join(SCRIPT_FOLDER, path))]


	collect_ignore = [os.path.join(SCRIPT_FOLDER, "doc", "conf.py")]		collect_ignore = [os.path.join(SCRIPT_FOLDER, "doc", "conf.py"),
	add_path_to_ignore('pystencils_tests/benchmark')		os.path.join(SCRIPT_FOLDER, "src", "pystencils", "opencl", "opencl.autoinit")]
			add_path_to_ignore('tests/benchmark')
	add_path_to_ignore('_local_tmp')		add_path_to_ignore('_local_tmp')


	collect_ignore += [os.path.join(SCRIPT_FOLDER, "pystencils/autodiff.py")]

	try:		try:
	import pycuda		import cupy
	except ImportError:		except ImportError:
	collect_ignore += [os.path.join(SCRIPT_FOLDER, "pystencils/pystencils_tests/test_cudagpu.py")]		collect_ignore += [os.path.join(SCRIPT_FOLDER, "tests/test_gpu.py")]
	add_path_to_ignore('pystencils/gpucuda')		add_path_to_ignore('src/pystencils/gpu')

	try:		try:
	import llvmlite		import waLBerla
	except ImportError:		except ImportError:
	collect_ignore += [os.path.join(SCRIPT_FOLDER, 'pystencils_tests/backends/llvm.py')]		collect_ignore += [os.path.join(SCRIPT_FOLDER, "tests/test_aligned_array.py"),
	add_path_to_ignore('pystencils/llvm')		os.path.join(SCRIPT_FOLDER, "tests/test_datahandling_parallel.py"),
			os.path.join(SCRIPT_FOLDER, "doc/notebooks/03_tutorial_datahandling.ipynb"),
			os.path.join(SCRIPT_FOLDER, "src/pystencils/datahandling/parallel_datahandling.py"),
			os.path.join(SCRIPT_FOLDER, "tests/test_small_block_benchmark.ipynb")]

	try:		try:
	import kerncraft		import blitzdb
	except ImportError:		except ImportError:
	collect_ignore += [os.path.join(SCRIPT_FOLDER, "pystencils_tests/test_kerncraft_coupling.py"),		add_path_to_ignore('src/pystencils/runhelper')
	os.path.join(SCRIPT_FOLDER, "pystencils_tests/benchmark/benchmark.py")]		collect_ignore += [os.path.join(SCRIPT_FOLDER, "tests/test_parameterstudy.py")]
	add_path_to_ignore('pystencils/kerncraft_coupling')		collect_ignore += [os.path.join(SCRIPT_FOLDER, "tests/test_json_serializer.py")]

	try:		try:
	import waLBerla		import islpy
	except ImportError:		except ImportError:
	collect_ignore += [os.path.join(SCRIPT_FOLDER, "pystencils_tests/test_aligned_array.py"),		collect_ignore += [os.path.join(SCRIPT_FOLDER, "src/pystencils/integer_set_analysis.py")]
	os.path.join(SCRIPT_FOLDER, "pystencils_tests/test_datahandling_parallel.py"),
	os.path.join(SCRIPT_FOLDER, "doc/notebooks/03_tutorial_datahandling.ipynb"),
	os.path.join(SCRIPT_FOLDER, "pystencils/datahandling/parallel_datahandling.py"),
	os.path.join(SCRIPT_FOLDER, "pystencils_tests/test_small_block_benchmark.ipynb")]

	try:		try:
	import blitzdb		import graphviz
	except ImportError:		except ImportError:
	add_path_to_ignore('pystencils/runhelper')		collect_ignore += [os.path.join(SCRIPT_FOLDER, "src/pystencils/backends/dot.py")]
			collect_ignore += [os.path.join(SCRIPT_FOLDER, "doc/notebooks/01_tutorial_getting_started.ipynb")]

			try:
			import pyevtk
			except ImportError:
			collect_ignore += [os.path.join(SCRIPT_FOLDER, "src/pystencils/datahandling/vtk.py")]

	collect_ignore += [os.path.join(SCRIPT_FOLDER, 'setup.py')]		collect_ignore += [os.path.join(SCRIPT_FOLDER, 'setup.py')]

	@@ -78,10 +91,6 @@ for root, sub_dirs, files in os.walk('.'):
	collect_ignore.append(f)		collect_ignore.append(f)


	import nbformat
	from nbconvert import PythonExporter


	class IPythonMockup:		class IPythonMockup:
	def run_line_magic(self, args, *kwargs):		def run_line_magic(self, args, *kwargs):
	pass		pass
	@@ -128,12 +137,15 @@ class IPyNbFile(pytest.File):
	exporter.exclude_markdown = True		exporter.exclude_markdown = True
	exporter.exclude_input_prompt = True		exporter.exclude_input_prompt = True

	notebook_contents = self.fspath.open()		notebook_contents = self.fspath.open(encoding='utf-8')

	with warnings.catch_warnings():		with warnings.catch_warnings():
	warnings.filterwarnings("ignore", "IPython.core.inputsplitter is deprecated")		warnings.filterwarnings("ignore", "IPython.core.inputsplitter is deprecated")
	notebook = nbformat.read(notebook_contents, 4)		notebook = nbformat.read(notebook_contents, 4)
	code, _ = exporter.from_notebook_node(notebook)		code, _ = exporter.from_notebook_node(notebook)
			if pytest_version >= 50403:
			yield IPyNbTest.from_parent(name=self.name, parent=self, code=code)
			else:
	yield IPyNbTest(self.name, self, code)		yield IPyNbTest(self.name, self, code)

	def teardown(self):		def teardown(self):
	@@ -143,4 +155,7 @@ class IPyNbFile(pytest.File):
	def pytest_collect_file(path, parent):		def pytest_collect_file(path, parent):
	glob_exprs = ["demo.ipynb", "tutorial.ipynb", "test_*.ipynb"]		glob_exprs = ["demo.ipynb", "tutorial.ipynb", "test_*.ipynb"]
	if any(path.fnmatch(g) for g in glob_exprs):		if any(path.fnmatch(g) for g in glob_exprs):
			if pytest_version >= 50403:
			return IPyNbFile.from_parent(fspath=path, parent=parent)
			else:
	return IPyNbFile(path, parent)		return IPyNbFile(path, parent)

doc/conf.py

100644 → 100755

+13 −8

Original line number	Original line	Diff line number	Diff line
	@@ -4,10 +4,11 @@
	import datetime		import datetime
	import sphinx_rtd_theme		import sphinx_rtd_theme
	import os		import os
			import re
	import sys		import sys

	sys.path.insert(0, os.path.abspath('.'))		sys.path.insert(0, os.path.abspath('.'))
	from version_from_git import version_number_from_git		import pystencils

	extensions = [		extensions = [
	'sphinx.ext.autodoc',		'sphinx.ext.autodoc',
	@@ -25,11 +26,14 @@ templates_path = ['_templates']
	source_suffix = '.rst'		source_suffix = '.rst'
	master_doc = 'index'		master_doc = 'index'

	copyright = '{}, Martin Bauer'.format(datetime.datetime.now().year)		copyright = f'{datetime.datetime.now().year}, Martin Bauer, Markus Holzer, Frederik Hennig'
	author = 'Martin Bauer'		author = 'Martin Bauer, Markus Holzer, Frederik Hennig'
	version = version_number_from_git()		# The short X.Y version (including .devXXXX, rcX, b1 suffixes if present)
	release = version_number_from_git()		version = re.sub(r'(\d+\.\d+)\.\d+(.*)', r'\1\2', pystencils.__version__)
	language = None		version = re.sub(r'(\.dev\d+).*?$', r'\1', version)
			# The full version, including alpha/beta/rc tags.
			release = pystencils.__version__
			language = 'en'
	exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '**.ipynb_checkpoints']		exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '**.ipynb_checkpoints']
	default_role = 'any'		default_role = 'any'
	pygments_style = 'sphinx'		pygments_style = 'sphinx'
	@@ -47,13 +51,14 @@ nbsphinx_execute = 'never'
	nbsphinx_codecell_lexer = 'python3'		nbsphinx_codecell_lexer = 'python3'

	# Example configuration for intersphinx: refer to the Python standard library.		# Example configuration for intersphinx: refer to the Python standard library.
	intersphinx_mapping = {'python': ('https://docs.python.org/3.6', None),		intersphinx_mapping = {'python': ('https://docs.python.org/3.8', None),
	'numpy': ('https://docs.scipy.org/doc/numpy/', None),		'numpy': ('https://docs.scipy.org/doc/numpy/', None),
	'matplotlib': ('https://matplotlib.org/', None),		'matplotlib': ('https://matplotlib.org/', None),
	'sympy': ('https://docs.sympy.org/latest/', None),		'sympy': ('https://docs.sympy.org/latest/', None),
	}		}

	autodoc_member_order = 'bysource'		autodoc_member_order = 'bysource'
			bibtex_bibfiles = ['sphinx/pystencils.bib']

	project = 'pystencils'		project = 'pystencils'
	html_logo = "img/logo.png"		html_logo = 'img/logo.png'

doc/index.rst

+1 −0

Original line number	Original line	Diff line number	Diff line
	@@ -14,5 +14,6 @@ pystencils can help you to generate blazingly fast code for image processing, nu

	.. image:: /img/pystencils_arch_block_diagram.svg		.. image:: /img/pystencils_arch_block_diagram.svg
	:height: 450px		:height: 450px
			:align: center

doc/notebooks/01_tutorial_getting_started.ipynb

+431 −448

File changed.

File size exceeds preview limit.

View original file

View changed file

doc/notebooks/02_tutorial_basic_kernels.ipynb

+257 −99

File changed.

Preview size limit exceeded, changes collapsed.

doc/notebooks/03_tutorial_datahandling.ipynb

+303 −122

File changed.

Preview size limit exceeded, changes collapsed.

doc/notebooks/04_tutorial_advection_diffusion.ipynb

+8 −8

File changed.

Preview size limit exceeded, changes collapsed.

doc/notebooks/05_tutorial_phasefield_spinodal_decomposition.ipynb

+14 −19

File changed.

Preview size limit exceeded, changes collapsed.

doc/notebooks/06_tutorial_phasefield_dentritic_growth.ipynb

+199 −71

File changed.

Preview size limit exceeded, changes collapsed.

doc/notebooks/demo_assignment_collection.ipynb

+47 −39

File changed.

Preview size limit exceeded, changes collapsed.

doc/notebooks/demo_benchmark.ipynb

+2 −6

File changed.

Preview size limit exceeded, changes collapsed.

doc/notebooks/demo_derivatives.ipynb

+50 −57

File changed.

Preview size limit exceeded, changes collapsed.

doc/notebooks/demo_plotting_and_animation.ipynb

+110 −137

File changed.

Preview size limit exceeded, changes collapsed.

doc/notebooks/demo_wave_equation.ipynb

+139 −204

File changed.

Preview size limit exceeded, changes collapsed.

doc/sphinx/api.rst

+1 −0

Original line number	Original line	Diff line number	Diff line
	@@ -5,6 +5,7 @@ API Reference
	:maxdepth: 3		:maxdepth: 3

	kernel_compile_and_call.rst		kernel_compile_and_call.rst
			enums.rst
	simplifications.rst		simplifications.rst
	datahandling.rst		datahandling.rst
	configuration.rst		configuration.rst

doc/sphinx/enums.rst

0 → 100644

+6 −0

Original line number	Original line	Diff line number	Diff line
			************
			Enumerations
			************

			.. automodule:: pystencils.enums
			:members:

doc/sphinx/kernel_compile_and_call.rst

+10 −5

Original line number	Original line	Diff line number	Diff line
	@@ -8,9 +8,14 @@ Creating kernels

	.. autofunction:: pystencils.create_kernel		.. autofunction:: pystencils.create_kernel

	.. autofunction:: pystencils.create_indexed_kernel		.. autoclass:: pystencils.CreateKernelConfig
			:members:

			.. autofunction:: pystencils.kernelcreation.create_domain_kernel

			.. autofunction:: pystencils.kernelcreation.create_indexed_kernel

	.. autofunction:: pystencils.create_staggered_kernel		.. autofunction:: pystencils.kernelcreation.create_staggered_kernel


	Code printing		Code printing
	@@ -22,11 +27,11 @@ Code printing
	GPU Indexing		GPU Indexing
	-------------		-------------

	.. autoclass:: pystencils.gpucuda.AbstractIndexing		.. autoclass:: pystencils.gpu.AbstractIndexing
	:members:		:members:

	.. autoclass:: pystencils.gpucuda.BlockIndexing		.. autoclass:: pystencils.gpu.BlockIndexing
	:members:		:members:

	.. autoclass:: pystencils.gpucuda.LineIndexing		.. autoclass:: pystencils.gpu.LineIndexing
	:members:		:members:

pystencils_tests/init.py→doc/sphinx/pystencils.bib

+0 −0

File moved.

doc/sphinx/simplifications.rst

+16 −2

Original line number	Original line	Diff line number	Diff line
	@@ -10,13 +10,27 @@ AssignmentCollection
	:members:		:members:


			SimplificationStrategy
			======================

			.. autoclass:: pystencils.simp.SimplificationStrategy
			:members:

	Simplifications		Simplifications
	===============		===============

	.. automodule:: pystencils.simp		.. automodule:: pystencils.simp.simplifications
	:members:		:members:

			Subexpression insertion
			=======================

			The subexpression insertions have the goal to insert subexpressions which will not reduce the number of FLOPs.
			For example a constant value kept as subexpression will lead to a new variable in the code which will occupy
			a register slot. On the other side a single variable could just be inserted in all assignments.

			.. automodule:: pystencils.simp.subexpression_insertion
			:members:

doc/version_from_git.py

deleted100644 → 0

+0 −31

Original line number	Original line	Diff line number	Diff line
	import subprocess

	def version_number_from_git(tag_prefix='release/', sha_length=10, version_format="{version}.dev{commits}+{sha}"):

	def get_released_versions():
	tags = sorted(subprocess.getoutput('git tag').split('\n'))
	versions = [t[len(tag_prefix):] for t in tags if t.startswith(tag_prefix)]
	return versions

	def tag_from_version(v):
	return tag_prefix + v

	def increment_version(v):
	parsed_version = [int(i) for i in v.split('.')]
	parsed_version[-1] += 1
	return '.'.join(str(i) for i in parsed_version)

	latest_release = get_released_versions()[-1]
	commits_since_tag = subprocess.getoutput('git rev-list {}..HEAD --count'.format(tag_from_version(latest_release)))
	sha = subprocess.getoutput('git rev-parse HEAD')[:sha_length]
	is_dirty = len(subprocess.getoutput("git status --untracked-files=no -s")) > 0

	if int(commits_since_tag) == 0:
	version_string = latest_release
	else:
	next_version = increment_version(latest_release)
	version_string = version_format.format(version=next_version, commits=commits_since_tag, sha=sha)

	if is_dirty:
	version_string += ".dirty"
	return version_string

pre-push

deleted100755 → 0

+0 −22

Original line number	Original line	Diff line number	Diff line
	#!/usr/bin/env bash

	# Checks run before every push
	# has to be copied to .git/hooks

	echo "Running pre-push hook"
	echo "Running flake8 check"
	flake8 --append-config=.flake8 pystencils

	# $? stores exit value of the last command
	if [ $? -ne 0 ]; then
	echo "flake8 failed"
	exit 1
	fi

	python3 setup.py quicktest

	# $? stores exit value of the last command
	if [ $? -ne 0 ]; then
	echo "quicktest failed"
	exit 1
	fi

pyproject.toml

0 → 100644

+98 −0

Original line number	Original line	Diff line number	Diff line
			[project]
			name = "pystencils"
			description = "Speeding up stencil computations on CPUs and GPUs"
			dynamic = ["version"]
			readme = "README.md"
			authors = [
			{ name = "Martin Bauer" },
			{ name = "Jan Hönig " },
			{ name = "Markus Holzer" },
			{ name = "Frederik Hennig" },
			{ email = "cs10-codegen@fau.de" },
			]
			license = { file = "COPYING.txt" }
			requires-python = ">=3.10"
			dependencies = ["sympy>=1.9,<=1.12.1", "numpy>=1.8.0", "appdirs", "joblib", "pyyaml", "fasteners"]
			classifiers = [
			"Development Status :: 4 - Beta",
			"Framework :: Jupyter",
			"Topic :: Software Development :: Code Generators",
			"Topic :: Scientific/Engineering :: Physics",
			"Intended Audience :: Developers",
			"Intended Audience :: Science/Research",
			"License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
			]

			[project.urls]
			"Bug Tracker" = "https://i10git.cs.fau.de/pycodegen/pystencils/-/issues"
			"Documentation" = "https://pycodegen.pages.i10git.cs.fau.de/pystencils/"
			"Source Code" = "https://i10git.cs.fau.de/pycodegen/pystencils"

			[project.optional-dependencies]
			gpu = ['cupy']
			alltrafos = ['islpy', 'py-cpuinfo']
			bench_db = ['blitzdb', 'pymongo', 'pandas']
			interactive = [
			'matplotlib',
			'ipy_table',
			'imageio',
			'jupyter',
			'pyevtk',
			'rich',
			'graphviz',
			]
			use_cython = [
			'Cython'
			]
			doc = [
			'sphinx',
			'sphinx_rtd_theme',
			'nbsphinx',
			'sphinxcontrib-bibtex',
			'sphinx_autodoc_typehints',
			'pandoc',
			]
			tests = [
			'pytest',
			'pytest-cov',
			'pytest-html',
			'ansi2html',
			'pytest-xdist',
			'flake8',
			'nbformat',
			'nbconvert',
			'ipython',
			'matplotlib',
			'py-cpuinfo',
			'randomgen>=1.18',
			]

			[build-system]
			requires = [
			"setuptools>=61",
			"versioneer[toml]>=0.29",
			# 'Cython'
			]
			build-backend = "setuptools.build_meta"

			[tool.setuptools.package-data]
			pystencils = [
			"include/*.h",
			"boundaries/createindexlistcython.pyx"
			]

			[tool.setuptools.packages.find]
			where = ["src"]
			include = ["pystencils", "pystencils.*"]
			namespaces = false

			[tool.versioneer]
			# See the docstring in versioneer.py for instructions. Note that you must
			# re-run 'versioneer.py setup' after changing this section, and commit the
			# resulting files.
			VCS = "git"
			style = "pep440"
			versionfile_source = "src/pystencils/_version.py"
			versionfile_build = "pystencils/_version.py"
			tag_prefix = "release/"
			parentdir_prefix = "pystencils-"

pystencils/autodiff.py

deleted100644 → 0

+0 −12

Original line number	Original line	Diff line number	Diff line
	"""
	Provides tools for generation of auto-differentiable operations.

	See https://github.com/theHamsta/pystencils_autodiff

	Installation:

	.. code-block:: bash
	pip install pystencils-autodiff
	"""

	raise NotImplementedError('pystencils-autodiff is not installed. Run `pip install pystencils-autodiff`')

pystencils/backends/cuda_backend.py

deleted100644 → 0

+0 −101

Original line number	Original line	Diff line number	Diff line
	from os.path import dirname, join

	from pystencils.astnodes import Node
	from pystencils.backends.cbackend import CBackend, CustomSympyPrinter, generate_c
	from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
	from pystencils.interpolation_astnodes import InterpolationMode

	with open(join(dirname(__file__), 'cuda_known_functions.txt')) as f:
	lines = f.readlines()
	CUDA_KNOWN_FUNCTIONS = {l.strip(): l.strip() for l in lines if l}


	def generate_cuda(astnode: Node, signature_only: bool = False) -> str:
	"""Prints an abstract syntax tree node as CUDA code.

	Args:
	astnode: KernelFunction node to generate code for
	signature_only: if True only the signature is printed

	Returns:
	C-like code for the ast node and its descendants
	"""
	return generate_c(astnode, signature_only, dialect='cuda')


	class CudaBackend(CBackend):

	def __init__(self, sympy_printer=None,
	signature_only=False):
	if not sympy_printer:
	sympy_printer = CudaSympyPrinter()

	super().__init__(sympy_printer, signature_only, dialect='cuda')

	def _print_SharedMemoryAllocation(self, node):
	code = "__shared__ {dtype} {name}[{num_elements}];"
	return code.format(dtype=node.symbol.dtype,
	name=self.sympy_printer.doprint(node.symbol.name),
	num_elements='*'.join([str(s) for s in node.shared_mem.shape]))

	@staticmethod
	def _print_ThreadBlockSynchronization(node):
	code = "__synchtreads();"
	return code

	def _print_TextureDeclaration(self, node):

	if node.texture.field.dtype.numpy_dtype.itemsize > 4:
	code = "texture<fp_tex_%s, cudaTextureType%iD, cudaReadModeElementType> %s;" % (
	str(node.texture.field.dtype),
	node.texture.field.spatial_dimensions,
	node.texture
	)
	else:
	code = "texture<%s, cudaTextureType%iD, cudaReadModeElementType> %s;" % (
	str(node.texture.field.dtype),
	node.texture.field.spatial_dimensions,
	node.texture
	)
	return code

	def _print_SkipIteration(self, _):
	return "return;"


	class CudaSympyPrinter(CustomSympyPrinter):
	language = "CUDA"

	def __init__(self):
	super(CudaSympyPrinter, self).__init__()
	self.known_functions.update(CUDA_KNOWN_FUNCTIONS)

	def _print_TextureAccess(self, node):
	dtype = node.texture.field.dtype.numpy_dtype

	if node.texture.interpolation_mode == InterpolationMode.CUBIC_SPLINE:
	template = "cubicTex%iDSimple(%s, %s)"
	else:
	if dtype.itemsize > 4:
	# Use PyCuda hack!
	# https://github.com/inducer/pycuda/blob/master/pycuda/cuda/pycuda-helpers.hpp
	template = "fp_tex%iD(%s, %s)"
	else:
	template = "tex%iD(%s, %s)"

	code = template % (
	node.texture.field.spatial_dimensions,
	str(node.texture),
	# + 0.5 comes from Nvidia's staggered indexing
	', '.join(self._print(o + 0.5) for o in reversed(node.offsets))
	)
	return code

	def _print_Function(self, expr):
	if isinstance(expr, fast_division):
	return "__fdividef(%s, %s)" % tuple(self._print(a) for a in expr.args)
	elif isinstance(expr, fast_sqrt):
	return "__fsqrt_rn(%s)" % tuple(self._print(a) for a in expr.args)
	elif isinstance(expr, fast_inv_sqrt):
	return "__frsqrt_rn(%s)" % tuple(self._print(a) for a in expr.args)
	return super()._print_Function(expr)

pystencils/backends/cuda_known_functions.txt

deleted100644 → 0

+0 −294

Original line number	Original line	Diff line number	Diff line
	__prof_trigger
	printf

	__syncthreads
	__syncthreads_count
	__syncthreads_and
	__syncthreads_or
	__syncwarp
	__threadfence
	__threadfence_block
	__threadfence_system

	atomicAdd
	atomicSub
	atomicExch
	atomicMin
	atomicMax
	atomicInc
	atomicDec
	atomicAnd
	atomicOr
	atomicXor
	atomicCAS

	__all_sync
	__any_sync
	__ballot_sync
	__active_mask

	__shfl_sync
	__shfl_up_sync
	__shfl_down_sync
	__shfl_xor_sync

	__match_any_sync
	__match_all_sync

	__isGlobal
	__isShared
	__isConstant
	__isLocal

	tex1Dfetch
	tex1D
	tex2D
	tex3D

	sqrtf
	rsqrtf
	cbrtf
	rcbrtf
	hypotf
	rhypotf
	norm3df
	rnorm3df
	norm4df
	rnorm4df
	normf
	rnormf
	expf
	exp2f
	exp10f
	expm1f
	logf
	log2f
	log10f
	log1pf
	sinf
	cosf
	tanf
	sincosf
	sinpif
	cospif
	sincospif
	asinf
	acosf
	atanf
	atan2f
	sinhf
	coshf
	tanhf
	asinhf
	acoshf
	atanhf
	powf
	erff
	erfcf
	erfinvf
	erfcinvf
	erfcxf
	normcdff
	normcdfinvf
	lgammaf
	tgammaf
	fmaf
	frexpf
	ldexpf
	scalbnf
	scalblnf
	logbf
	ilogbf
	j0f
	j1f
	jnf
	y0f
	y1f
	ynf
	cyl_bessel_i0f
	cyl_bessel_i1f
	fmodf
	remainderf
	remquof
	modff
	fdimf
	truncf
	roundf
	rintf
	nearbyintf
	ceilf
	floorf
	lrintf
	lroundf
	llrintf
	llroundf

	sqrt
	rsqrt
	cbrt
	rcbrt
	hypot
	rhypot
	norm3d
	rnorm3d
	norm4d
	rnorm4d
	norm
	rnorm
	exp
	exp2
	exp10
	expm1
	log
	log2
	log10
	log1p
	sin
	cos
	tan
	sincos
	sinpi
	cospi
	sincospi
	asin
	acos
	atan
	atan2
	sinh
	cosh
	tanh
	asinh
	acosh
	atanh
	pow
	erf
	erfc
	erfinv
	erfcinv
	erfcx
	normcdf
	normcdfinv
	lgamma
	tgamma
	fma
	frexp
	ldexp
	scalbn
	scalbln
	logb
	ilogb
	j0
	j1
	jn
	y0
	y1
	yn
	cyl_bessel_i0
	cyl_bessel_i1
	fmod
	remainder
	remquo
	mod
	fdim
	trunc
	round
	rint
	nearbyint
	ceil
	floor
	lrint
	lround
	llrint
	llround

	__fdividef
	__sinf
	__cosf
	__tanf
	__sincosf
	__logf
	__log2f
	__log10f
	__expf
	__exp10f
	__powf

	__fadd_rn
	__fsub_rn
	__fmul_rn
	__fmaf_rn
	__frcp_rn
	__fsqrt_rn
	__frsqrt_rn
	__fdiv_rn

	__fadd_rz
	__fsub_rz
	__fmul_rz
	__fmaf_rz
	__frcp_rz
	__fsqrt_rz
	__frsqrt_rz
	__fdiv_rz

	__fadd_ru
	__fsub_ru
	__fmul_ru
	__fmaf_ru
	__frcp_ru
	__fsqrt_ru
	__frsqrt_ru
	__fdiv_ru

	__fadd_rd
	__fsub_rd
	__fmul_rd
	__fmaf_rd
	__frcp_rd
	__fsqrt_rd
	__frsqrt_rd
	__fdiv_rd

	__fdividef
	__expf
	__exp10f
	__logf
	__log2f
	__log10f
	__sinf
	__cosf
	__sincosf
	__tanf
	__powf

	__dadd_rn
	__dsub_rn
	__dmul_rn
	__fma_rn
	__ddiv_rn
	__drcp_rn
	__dsqrt_rn

	__dadd_rz
	__dsub_rz
	__dmul_rz
	__fma_rz
	__ddiv_rz
	__drcp_rz
	__dsqrt_rz

	__dadd_ru
	__dsub_ru
	__dmul_ru
	__fma_ru
	__ddiv_ru
	__drcp_ru
	__dsqrt_ru

	__dadd_rd
	__dsub_rd
	__dmul_rd
	__fma_rd
	__ddiv_rd
	__drcp_rd
	__dsqrt_rd

pystencils/backends/opencl1.1_known_functions.txt

deleted100644 → 0

+0 −100

Original line number	Original line	Diff line number	Diff line
	acos
	acosh
	acospi
	asin
	asinh
	asinpi
	atan
	atan2
	atanh
	atanpi
	atan2pi
	cbrt
	ceil
	copysign
	cos
	cosh
	cospi
	erfc
	erf
	exp
	exp2
	exp10
	expm1
	fabs
	fdim
	floor
	fma
	fmax
	fmax
	fmin45
	fmin
	fmod
	fract
	frexp
	hypot
	ilogb
	ldexp
	lgamma
	lgamma_r
	log
	log2
	log10
	log1p
	logb
	mad
	maxmag
	minmag
	modf
	nextafter
	pow
	pown
	powr
	remquo
	intn
	remquo
	rint
	rootn
	rootn
	round
	rsqrt
	sin
	sincos
	sinh
	sinpi
	sqrt
	tan
	tanh
	tanpi
	tgamma
	trunc


	half_cos
	half_divide
	half_exp
	half_exp2
	half_exp10
	half_log
	half_log2
	half_log10
	half_powr
	half_recip
	half_rsqrt
	half_sin
	half_sqrt
	half_tan
	native_cos
	native_divide
	native_exp
	native_exp2
	native_exp10
	native_log
	native_log2
	native_log10
	native_powr
	native_recip
	native_rsqrt
	native_sin
	native_sqrt
	native_tan

pystencils/backends/opencl_backend.py

deleted100644 → 0

+0 −97

Original line number	Original line	Diff line number	Diff line
	from os.path import dirname, join

	import pystencils.data_types
	from pystencils.astnodes import Node
	from pystencils.backends.cbackend import CustomSympyPrinter, generate_c
	from pystencils.backends.cuda_backend import CudaBackend, CudaSympyPrinter
	from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt

	with open(join(dirname(__file__), 'opencl1.1_known_functions.txt')) as f:
	lines = f.readlines()
	OPENCL_KNOWN_FUNCTIONS = {l.strip(): l.strip() for l in lines if l}


	def generate_opencl(astnode: Node, signature_only: bool = False) -> str:
	"""Prints an abstract syntax tree node (made for target 'gpu') as OpenCL code.

	Args:
	astnode: KernelFunction node to generate code for
	signature_only: if True only the signature is printed

	Returns:
	C-like code for the ast node and its descendants
	"""
	return generate_c(astnode, signature_only, dialect='opencl')


	class OpenClBackend(CudaBackend):

	def __init__(self,
	sympy_printer=None,
	signature_only=False):
	if not sympy_printer:
	sympy_printer = OpenClSympyPrinter()

	super().__init__(sympy_printer, signature_only)
	self._dialect = 'opencl'

	def _print_Type(self, node):
	code = super()._print_Type(node)
	if isinstance(node, pystencils.data_types.PointerType):
	return "__global " + code
	else:
	return code

	def _print_ThreadBlockSynchronization(self, node):
	raise NotImplementedError()

	def _print_TextureDeclaration(self, node):
	raise NotImplementedError()


	class OpenClSympyPrinter(CudaSympyPrinter):
	language = "OpenCL"

	DIMENSION_MAPPING = {
	'x': '0',
	'y': '1',
	'z': '2'
	}
	INDEXING_FUNCTION_MAPPING = {
	'blockIdx': 'get_group_id',
	'threadIdx': 'get_local_id',
	'blockDim': 'get_local_size',
	'gridDim': 'get_global_size'
	}

	def __init__(self):
	CustomSympyPrinter.__init__(self)
	self.known_functions = OPENCL_KNOWN_FUNCTIONS

	def _print_ThreadIndexingSymbol(self, node):
	symbol_name: str = node.name
	function_name, dimension = tuple(symbol_name.split("."))
	dimension = self.DIMENSION_MAPPING[dimension]
	function_name = self.INDEXING_FUNCTION_MAPPING[function_name]
	return f"(int) {function_name}({dimension})"

	def _print_TextureAccess(self, node):
	raise NotImplementedError()

	# For math functions, OpenCL is more similar to the C++ printer CustomSympyPrinter
	# since built-in math functions are generic.
	# In CUDA, you have to differentiate between `sin` and `sinf`
	try:
	_print_math_func = CustomSympyPrinter._print_math_func
	except AttributeError:
	pass
	_print_Pow = CustomSympyPrinter._print_Pow

	def _print_Function(self, expr):
	if isinstance(expr, fast_division):
	return "native_divide(%s, %s)" % tuple(self._print(a) for a in expr.args)
	elif isinstance(expr, fast_sqrt):
	return "native_sqrt(%s)" % tuple(self._print(a) for a in expr.args)
	elif isinstance(expr, fast_inv_sqrt):
	return "native_rsqrt(%s)" % tuple(self._print(a) for a in expr.args)
	return CustomSympyPrinter._print_Function(self, expr)

pystencils/boundaries/createindexlistcython.c

deleted100644 → 0

+0 −41083

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils/cache.py

deleted100644 → 0

+0 −45

Original line number	Original line	Diff line number	Diff line
	import os
	from collections.abc import Hashable
	from functools import partial
	from itertools import chain

	try:
	from functools import lru_cache as memorycache
	except ImportError:
	from backports.functools_lru_cache import lru_cache as memorycache


	try:
	from joblib import Memory
	from appdirs import user_cache_dir
	if 'PYSTENCILS_CACHE_DIR' in os.environ:
	cache_dir = os.environ['PYSTENCILS_CACHE_DIR']
	else:
	cache_dir = user_cache_dir('pystencils')
	disk_cache = Memory(cache_dir, verbose=False).cache
	disk_cache_no_fallback = disk_cache
	except ImportError:
	# fallback to in-memory caching if joblib is not available
	disk_cache = memorycache(maxsize=64)

	def disk_cache_no_fallback(o):
	return o


	def _wrapper(wrapped_func, cached_func, args, *kwargs):
	if all(isinstance(a, Hashable) for a in chain(args, kwargs.values())):
	return cached_func(args, *kwargs)
	else:
	return wrapped_func(args, *kwargs)


	def memorycache_if_hashable(maxsize=128, typed=False):

	def wrapper(func):
	return partial(_wrapper, func, memorycache(maxsize, typed)(func))

	return wrapper

	# Disable memory cache:
	# disk_cache = lambda o: o
	# disk_cache_no_fallback = lambda o: o

pystencils/data_types.py

deleted100644 → 0

+0 −832

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils/gpucuda/texture_utils.py

deleted100644 → 0

+0 −126

Original line number	Original line	Diff line number	Diff line
	# -- coding: utf-8 --
	#
	# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
	#
	# Distributed under terms of the GPLv3 license.

	"""

	"""

	from os.path import dirname, isdir, join

	import numpy as np

	try:
	import pycuda.driver as cuda
	from pycuda import gpuarray
	except Exception:
	pass


	def pow_two_divider(n):
	if n == 0:
	return 0
	divider = 1
	while (n & divider) == 0:
	divider <<= 1
	return divider


	def ndarray_to_tex(tex_ref,
	ndarray,
	address_mode=None,
	filter_mode=None,
	use_normalized_coordinates=False,
	read_as_integer=False):

	if address_mode is None:
	address_mode = cuda.address_mode.BORDER
	if filter_mode is None:
	filter_mode = cuda.filter_mode.LINEAR

	if isinstance(ndarray, np.ndarray):
	cu_array = cuda.np_to_array(ndarray, 'C')
	elif isinstance(ndarray, gpuarray.GPUArray):
	cu_array = cuda.gpuarray_to_array(ndarray, 'C')
	else:
	raise TypeError(
	'ndarray must be numpy.ndarray or pycuda.gpuarray.GPUArray')

	cuda.TextureReference.set_array(tex_ref, cu_array)

	tex_ref.set_address_mode(0, address_mode)
	if ndarray.ndim >= 2:
	tex_ref.set_address_mode(1, address_mode)
	if ndarray.ndim >= 3:
	tex_ref.set_address_mode(2, address_mode)
	tex_ref.set_filter_mode(filter_mode)

	if not use_normalized_coordinates:
	tex_ref.set_flags(tex_ref.get_flags() & ~cuda.TRSF_NORMALIZED_COORDINATES)

	if not read_as_integer:
	tex_ref.set_flags(tex_ref.get_flags() & ~cuda.TRSF_READ_AS_INTEGER)


	def prefilter_for_cubic_bspline(gpuarray):
	import pycuda.autoinit # NOQA
	from pycuda.compiler import SourceModule

	ndim = gpuarray.ndim
	assert ndim == 2 or ndim == 3, "Only 2d or 3d supported"
	assert isdir(join(dirname(__file__), "CubicInterpolationCUDA", "code")), \
	"Submodule CubicInterpolationCUDA does not exist"
	nvcc_options = ["-w", "-std=c++11", "-Wno-deprecated-gpu-targets"]
	nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code")]
	nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code", "internal")]

	file_name = join(dirname(__file__), "CubicInterpolationCUDA", "code", "cubicPrefilter%iD.cu" % ndim)
	with open(file_name) as file:
	code = file.read()

	mod = SourceModule(code, options=nvcc_options)

	if ndim == 2:
	height, width = gpuarray.shape
	block = min(pow_two_divider(height), 64)
	grid = height // block
	func = mod.get_function('SamplesToCoefficients2DXf')
	func(gpuarray, np.uint32(gpuarray.strides[-2]), *(np.uint32(r)
	for r in reversed(gpuarray.shape)),
	block=(block, 1, 1),
	grid=(grid, 1, 1))

	block = min(pow_two_divider(width), 64)
	grid = width // block
	func = mod.get_function('SamplesToCoefficients2DYf')
	func(gpuarray, np.uint32(gpuarray.strides[-2]), *(np.uint32(r)
	for r in reversed(gpuarray.shape)),
	block=(block, 1, 1),
	grid=(grid, 1, 1))
	elif ndim == 3:
	depth, height, width = gpuarray.shape
	dimX = min(min(pow_two_divider(width), pow_two_divider(height)), 64)
	dimY = min(min(pow_two_divider(depth), pow_two_divider(height)), 512 / dimX)
	block = (dimX, dimY, 1)

	dimGridX = (height // block[0], depth // block[1], 1)
	dimGridY = (width // block[0], depth // block[1], 1)
	dimGridZ = (width // block[0], height // block[1], 1)

	func = mod.get_function("SamplesToCoefficients3DXf")
	func(gpuarray, np.uint32(gpuarray.strides[-2]), *(np.uint32(r)
	for r in reversed(gpuarray.shape)),
	block=block,
	grid=dimGridX)
	func = mod.get_function("SamplesToCoefficients3DYf")
	func(gpuarray, np.uint32(gpuarray.strides[-2]), *(np.uint32(r)
	for r in reversed(gpuarray.shape)),
	block=block,
	grid=dimGridY)
	func = mod.get_function("SamplesToCoefficients3DZf")
	func(gpuarray, np.uint32(gpuarray.strides[-2]), *(np.uint32(r)
	for r in reversed(gpuarray.shape)),
	block=block,
	grid=dimGridZ)

pystencils/include/PyStencilsField.h

deleted100644 → 0

+0 −19

Original line number	Original line	Diff line number	Diff line
	#pragma once

	extern "C++" {
	#ifdef __CUDA_ARCH__
	template <typename DTYPE_T, std::size_t DIMENSION> struct PyStencilsField {
	DTYPE_T *data;
	DTYPE_T shape[DIMENSION];
	DTYPE_T stride[DIMENSION];
	};
	#else
	#include <array>

	template <typename DTYPE_T, std::size_t DIMENSION> struct PyStencilsField {
	DTYPE_T *data;
	std::array<DTYPE_T, DIMENSION> shape;
	std::array<DTYPE_T, DIMENSION> stride;
	};
	#endif
	}

pystencils/include/aesni_rand.h

deleted100644 → 0

+0 −132

Original line number	Original line	Diff line number	Diff line
	#if !defined(__AES__) \|\| !defined(__SSE4_1__)
	#error AES-NI and SSE4.1 need to be enabled
	#endif

	#include <emmintrin.h> // SSE2
	#include <wmmintrin.h> // AES
	#ifdef __AVX512VL__
	#include <immintrin.h> // AVX*
	#else
	#include <smmintrin.h> // SSE4
	#ifdef __FMA__
	#include <immintrin.h> // FMA
	#endif
	#endif
	#include <cstdint>

	#define QUALIFIERS inline
	#define TWOPOW53_INV_DOUBLE (1.1102230246251565e-16)
	#define TWOPOW32_INV_FLOAT (2.3283064e-10f)

	typedef std::uint32_t uint32;
	typedef std::uint64_t uint64;

	QUALIFIERS __m128i aesni1xm128i(const __m128i & in, const __m128i & k) {
	__m128i x = _mm_xor_si128(k, in);
	x = _mm_aesenc_si128(x, k); // 1
	x = _mm_aesenc_si128(x, k); // 2
	x = _mm_aesenc_si128(x, k); // 3
	x = _mm_aesenc_si128(x, k); // 4
	x = _mm_aesenc_si128(x, k); // 5
	x = _mm_aesenc_si128(x, k); // 6
	x = _mm_aesenc_si128(x, k); // 7
	x = _mm_aesenc_si128(x, k); // 8
	x = _mm_aesenc_si128(x, k); // 9
	x = _mm_aesenclast_si128(x, k); // 10
	return x;
	}

	QUALIFIERS __m128 _my_cvtepu32_ps(const __m128i v)
	{
	#ifdef __AVX512VL__
	return _mm_cvtepu32_ps(v);
	#else
	__m128i v2 = _mm_srli_epi32(v, 1);
	__m128i v1 = _mm_and_si128(v, _mm_set1_epi32(1));
	__m128 v2f = _mm_cvtepi32_ps(v2);
	__m128 v1f = _mm_cvtepi32_ps(v1);
	return _mm_add_ps(_mm_add_ps(v2f, v2f), v1f);
	#endif
	}

	#if !defined(__AVX512VL__) && defined(__GNUC__) && __GNUC__ >= 5
	__attribute__((optimize("no-associative-math")))
	#endif
	QUALIFIERS __m128d _my_cvtepu64_pd(const __m128i x)
	{
	#ifdef __AVX512VL__
	return _mm_cvtepu64_pd(x);
	#else
	__m128i xH = _mm_srli_epi64(x, 32);
	xH = _mm_or_si128(xH, _mm_castpd_si128(_mm_set1_pd(19342813113834066795298816.))); // 2^84
	__m128i xL = _mm_blend_epi16(x, _mm_castpd_si128(_mm_set1_pd(0x0010000000000000)), 0xcc); // 2^52
	__m128d f = _mm_sub_pd(_mm_castsi128_pd(xH), _mm_set1_pd(19342813118337666422669312.)); // 2^84 + 2^52
	return _mm_add_pd(f, _mm_castsi128_pd(xL));
	#endif
	}


	QUALIFIERS void aesni_double2(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3,
	uint32 key0, uint32 key1, uint32 key2, uint32 key3,
	double & rnd1, double & rnd2)
	{
	// pack input and call AES
	__m128i c128 = _mm_set_epi32(ctr3, ctr2, ctr1, ctr0);
	__m128i k128 = _mm_set_epi32(key3, key2, key1, key0);
	c128 = aesni1xm128i(c128, k128);

	// convert 32 to 64 bit and put 0th and 2nd element into x, 1st and 3rd element into y
	__m128i x = _mm_and_si128(c128, _mm_set_epi32(0, 0xffffffff, 0, 0xffffffff));
	__m128i y = _mm_and_si128(c128, _mm_set_epi32(0xffffffff, 0, 0xffffffff, 0));
	y = _mm_srli_si128(y, 4);

	// calculate z = x ^ y << (53 - 32))
	__m128i z = _mm_sll_epi64(y, _mm_set1_epi64x(53 - 32));
	z = _mm_xor_si128(x, z);

	// convert uint64 to double
	__m128d rs = _my_cvtepu64_pd(z);
	// calculate rs * TWOPOW53_INV_DOUBLE + (TWOPOW53_INV_DOUBLE/2.0)
	#ifdef __FMA__
	rs = _mm_fmadd_pd(rs, _mm_set1_pd(TWOPOW53_INV_DOUBLE), _mm_set1_pd(TWOPOW53_INV_DOUBLE/2.0));
	#else
	rs = _mm_mul_pd(rs, _mm_set1_pd(TWOPOW53_INV_DOUBLE));
	rs = _mm_add_pd(rs, _mm_set1_pd(TWOPOW53_INV_DOUBLE/2.0));
	#endif

	// store result
	alignas(16) double rr[2];
	_mm_store_pd(rr, rs);
	rnd1 = rr[0];
	rnd2 = rr[1];
	}


	QUALIFIERS void aesni_float4(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3,
	uint32 key0, uint32 key1, uint32 key2, uint32 key3,
	float & rnd1, float & rnd2, float & rnd3, float & rnd4)
	{
	// pack input and call AES
	__m128i c128 = _mm_set_epi32(ctr3, ctr2, ctr1, ctr0);
	__m128i k128 = _mm_set_epi32(key3, key2, key1, key0);
	c128 = aesni1xm128i(c128, k128);

	// convert uint32 to float
	__m128 rs = _my_cvtepu32_ps(c128);
	// calculate rs * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT/2.0f)
	#ifdef __FMA__
	rs = _mm_fmadd_ps(rs, _mm_set1_ps(TWOPOW32_INV_FLOAT), _mm_set1_ps(TWOPOW32_INV_FLOAT/2.0f));
	#else
	rs = _mm_mul_ps(rs, _mm_set1_ps(TWOPOW32_INV_FLOAT));
	rs = _mm_add_ps(rs, _mm_set1_ps(TWOPOW32_INV_FLOAT/2.0f));
	#endif

	// store result
	alignas(16) float r[4];
	_mm_store_ps(r, rs);
	rnd1 = r[0];
	rnd2 = r[1];
	rnd3 = r[2];
	rnd4 = r[3];
	}

pystencils/include/cuda_complex.hpp

deleted100644 → 0

+0 −1228

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils/include/opencl_stdint.h

deleted100644 → 0

+0 −15

Original line number	Original line	Diff line number	Diff line
	#ifndef OPENCL_STDINT
	#define OPENCL_STDINT

	typedef unsigned int uint_t;

	typedef signed char int8_t;
	typedef signed short int16_t;
	typedef signed int int32_t;
	typedef signed long int int64_t;
	typedef unsigned char uint8_t;
	typedef unsigned short uint16_t;
	typedef unsigned int uint32_t;
	typedef unsigned long int uint64_t;

	#endif

pystencils/include/philox_rand.h

deleted100644 → 0

+0 −103

Original line number	Original line	Diff line number	Diff line
	#include <cstdint>

	#ifndef __CUDA_ARCH__
	#define QUALIFIERS inline
	#else
	#define QUALIFIERS static __forceinline__ __device__
	#endif

	#define PHILOX_W32_0 (0x9E3779B9)
	#define PHILOX_W32_1 (0xBB67AE85)
	#define PHILOX_M4x32_0 (0xD2511F53)
	#define PHILOX_M4x32_1 (0xCD9E8D57)
	#define TWOPOW53_INV_DOUBLE (1.1102230246251565e-16)
	#define TWOPOW32_INV_FLOAT (2.3283064e-10f)

	typedef std::uint32_t uint32;
	typedef std::uint64_t uint64;


	QUALIFIERS uint32 mulhilo32(uint32 a, uint32 b, uint32* hip)
	{
	#ifndef __CUDA_ARCH__
	// host code
	uint64 product = ((uint64)a) * ((uint64)b);
	*hip = product >> 32;
	return (uint32)product;
	#else
	// device code
	*hip = __umulhi(a,b);
	return a*b;
	#endif
	}

	QUALIFIERS void _philox4x32round(uint32* ctr, uint32* key)
	{
	uint32 hi0;
	uint32 hi1;
	uint32 lo0 = mulhilo32(PHILOX_M4x32_0, ctr[0], &hi0);
	uint32 lo1 = mulhilo32(PHILOX_M4x32_1, ctr[2], &hi1);

	ctr[0] = hi1^ctr[1]^key[0];
	ctr[1] = lo1;
	ctr[2] = hi0^ctr[3]^key[1];
	ctr[3] = lo0;
	}

	QUALIFIERS void _philox4x32bumpkey(uint32* key)
	{
	key[0] += PHILOX_W32_0;
	key[1] += PHILOX_W32_1;
	}

	QUALIFIERS double _uniform_double_hq(uint32 x, uint32 y)
	{
	uint64 z = (uint64)x ^ ((uint64)y << (53 - 32));
	return z * TWOPOW53_INV_DOUBLE + (TWOPOW53_INV_DOUBLE/2.0);
	}


	QUALIFIERS void philox_double2(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3,
	uint32 key0, uint32 key1, double & rnd1, double & rnd2)
	{
	uint32 key[2] = {key0, key1};
	uint32 ctr[4] = {ctr0, ctr1, ctr2, ctr3};
	_philox4x32round(ctr, key); // 1
	_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 2
	_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 3
	_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 4
	_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 5
	_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 6
	_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 7
	_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 8
	_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 9
	_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 10

	rnd1 = _uniform_double_hq(ctr[0], ctr[1]);
	rnd2 = _uniform_double_hq(ctr[2], ctr[3]);
	}



	QUALIFIERS void philox_float4(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3,
	uint32 key0, uint32 key1,
	float & rnd1, float & rnd2, float & rnd3, float & rnd4)
	{
	uint32 key[2] = {key0, key1};
	uint32 ctr[4] = {ctr0, ctr1, ctr2, ctr3};
	_philox4x32round(ctr, key); // 1
	_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 2
	_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 3
	_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 4
	_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 5
	_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 6
	_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 7
	_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 8
	_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 9
	_philox4x32bumpkey(key); _philox4x32round(ctr, key); // 10

	rnd1 = ctr[0] * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT/2.0f);
	rnd2 = ctr[1] * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT/2.0f);
	rnd3 = ctr[2] * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT/2.0f);
	rnd4 = ctr[3] * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT/2.0f);
	}
	No newline at end of file

pystencils/interpolation_astnodes.py

deleted100644 → 0

+0 −458

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils/kerncraft_coupling/init.py

deleted100644 → 0

+0 −4

Original line number	Original line	Diff line number	Diff line
	from .generate_benchmark import generate_benchmark, run_c_benchmark
	from .kerncraft_interface import KerncraftParameters, PyStencilsKerncraftKernel

	__all__ = ['PyStencilsKerncraftKernel', 'KerncraftParameters', 'generate_benchmark', 'run_c_benchmark']

pystencils/kerncraft_coupling/generate_benchmark.py

deleted100644 → 0

+0 −210

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils/kerncraft_coupling/kerncraft_interface.py

deleted100644 → 0

+0 −182

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils/llvm/init.py

deleted100644 → 0

+0 −4

Original line number	Original line	Diff line number	Diff line
	from .kernelcreation import create_kernel
	from .llvmjit import make_python_function

	__all__ = ['create_kernel', 'make_python_function']

pystencils/llvm/control_flow.py

deleted100644 → 0

+0 −52

Original line number	Original line	Diff line number	Diff line
	import llvmlite.ir as ir


	class Loop(object):
	def __init__(self, builder, start_val, stop_val, step_val=1, loop_name='loop', phi_name="_phi"):
	self.builder = builder
	self.start_val = start_val
	self.stop_val = stop_val
	self.step_val = step_val
	self.loop_name = loop_name
	self.phi_name = phi_name

	def __enter__(self):
	self.loop_end, self.after, phi = self._for_loop(self.start_val, self.stop_val, self.step_val, self.loop_name,
	self.phi_name)
	return phi

	def _for_loop(self, start_val, stop_val, step_val, loop_name, phi_name):
	# TODO size of int??? unisgned???
	integer = ir.IntType(64)

	# Loop block
	pre_loop_bb = self.builder.block
	loop_bb = self.builder.append_basic_block(name='loop_' + loop_name)

	self.builder.branch(loop_bb)

	# Insert an explicit fall through from the current block to loop_bb
	self.builder.position_at_start(loop_bb)

	# Add phi
	phi = self.builder.phi(integer, name=phi_name)
	phi.add_incoming(start_val, pre_loop_bb)

	loop_end_bb = self.builder.append_basic_block(name=loop_name + "_end_bb")
	self.builder.position_at_start(loop_end_bb)

	next_var = self.builder.add(phi, step_val, name=loop_name + '_next_it')
	cond = self.builder.icmp_unsigned('<', next_var, stop_val, name=loop_name + "_cond")

	after_bb = self.builder.append_basic_block(name=loop_name + "_after_bb")

	self.builder.cbranch(cond, loop_bb, after_bb)
	phi.add_incoming(next_var, loop_end_bb)

	self.builder.position_at_end(loop_bb)

	return loop_end_bb, after_bb, phi

	def __exit__(self, exc_type, exc, exc_tb):
	self.builder.branch(self.loop_end)
	self.builder.position_at_end(self.after)

pystencils/llvm/kernelcreation.py

deleted100644 → 0

+0 −45

Original line number	Original line	Diff line number	Diff line
	from pystencils.llvm.llvmjit import make_python_function
	from pystencils.transformations import insert_casts


	def create_kernel(assignments, function_name="kernel", type_info=None, split_groups=(),
	iteration_slice=None, ghost_layers=None, target='cpu'):
	"""
	Creates an abstract syntax tree for a kernel function, by taking a list of update rules.

	Loops are created according to the field accesses in the equations.

	Args:
	assignments: list of sympy equations, containing accesses to :class:`pystencils.field.Field`.
	Defining the update rules of the kernel
	function_name: name of the generated function - only important if generated code is written out
	type_info: a map from symbol name to a C type specifier. If not specified all symbols are assumed to
	be of type 'double' except symbols which occur on the left hand side of equations where the
	right hand side is a sympy Boolean which are assumed to be 'bool' .
	split_groups: Specification on how to split up inner loop into multiple loops. For details see
	transformation :func:`pystencils.transformation.split_inner_loop`
	iteration_slice: if not None, iteration is done only over this slice of the field
	ghost_layers: a sequence of pairs for each coordinate with lower and upper nr of ghost layers
	if None, the number of ghost layers is determined automatically and assumed to be equal for a
	all dimensions

	:return: :class:`pystencils.ast.KernelFunction` node
	"""
	if target == 'cpu':
	from pystencils.cpu import create_kernel
	code = create_kernel(assignments, function_name, type_info, split_groups, iteration_slice, ghost_layers)
	code._backend = 'llvm'
	elif target == 'gpu':
	from pystencils.gpucuda.kernelcreation import create_cuda_kernel
	code = create_cuda_kernel(assignments,
	function_name,
	type_info,
	iteration_slice=iteration_slice,
	ghost_layers=ghost_layers)
	code._backend = 'llvm_gpu'
	else:
	NotImplementedError()
	code.body = insert_casts(code.body)
	code._compile_function = make_python_function

	return code

pystencils/llvm/llvm.py

deleted100644 → 0

+0 −379

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils/llvm/llvmjit.py

deleted100644 → 0

+0 −323

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils/math_optimizations.py

deleted100644 → 0

+0 −46

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils/opencl/opencljit.py

deleted100644 → 0

+0 −93

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils/rng.py

deleted100644 → 0

+0 −115

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils/sympy_gmpy_bug_workaround.py

deleted100644 → 0

+0 −18

Original line number	Original line	Diff line number	Diff line
	# Disable gmpy backend until this bug is resolved if joblib serialize
	# See https://github.com/sympy/sympy/pull/13530
	import os
	import warnings

	os.environ['MPMATH_NOGMPY'] = '1'
	try:
	import mpmath.libmp
	# In case the user has imported sympy first, then pystencils
	if mpmath.libmp.BACKEND == 'gmpy':
	warnings.warn("You are using the gmpy backend. You might encounter an error 'argument is not an mpz sympy'. "
	"This is due to a known bug in sympy/gmpy library. "
	"To prevent this, import pystencils first then sympy or set the environment variable "
	"MPMATH_NOGMPY=1")
	except ImportError:
	pass

	__all__ = []

pystencils/test_type_interference.py

deleted100644 → 0

+0 −26

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/benchmark/SkylakeSP_Gold-5122_allinclusive.yaml

deleted100644 → 0

+0 −600

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/benchmark/benchmark.py

deleted100644 → 0

+0 −188

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/benchmark/generate.py

deleted100644 → 0

+0 −50

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/benchmark/iacaMarks.h

deleted100644 → 0

+0 −53

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/benchmark/main.c

deleted100644 → 0

+0 −11

Original line number	Original line	Diff line number	Diff line
	#include "iacaMarks.h"

	int main(int argc, char * argv[]){
	int a = 0;
	for(int i = 0; i < argc+100000; i++){
	IACA_START
	a += i;
	}
	IACA_END
	return a;
	}

pystencils_tests/kerncraft_inputs/2d-5pt.c

deleted100644 → 0

+0 −8

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/kerncraft_inputs/3d-7pt.c

deleted100644 → 0

+0 −10

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/kerncraft_inputs/default_machine_file.yaml

deleted100644 → 0

+0 −277

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_address_of.py

deleted100644 → 0

+0 −47

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_assignment_collection.py

deleted100644 → 0

+0 −29

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_basic_usage_llvm.ipynb

deleted100644 → 0

+0 −398

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_complex_numbers.py

deleted100644 → 0

+0 −142

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_conditional_vec.py

deleted100644 → 0

+0 −64

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_cuda_known_functions.py

deleted100644 → 0

+0 −47

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_datahandling_parallel.py

deleted100644 → 0

+0 −66

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_fd_derivation.ipynb

deleted100644 → 0

+0 −406

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_field.py

deleted100644 → 0

+0 −167

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_field_coordinates.py

deleted100644 → 0

+0 −99

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_indexed_kernels.py

deleted100644 → 0

+0 −56

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_interpolation.py

deleted100644 → 0

+0 −236

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_jacobi_llvm.py

deleted100644 → 0

+0 −93

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_kerncraft_coupling.py

deleted100644 → 0

+0 −130

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_opencl.py

deleted100644 → 0

+0 −235

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_phasefield_dentritic_3D.ipynb

deleted100644 → 0

+0 −367

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_print_infinity.py

deleted100644 → 0

+0 −22

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_print_unsupported_node.py

deleted100644 → 0

+0 −24

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_random.py

deleted100644 → 0

+0 −94

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_size_and_layout_checks_llvm.py

deleted100644 → 0

+0 −81

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_sliced_iteration.py

deleted100644 → 0

+0 −55

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_small_block_benchmark.ipynb

deleted100644 → 0

+0 −184

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_stencils.py

deleted100644 → 0

+0 −1

Original line number	Original line	Diff line number	Diff line
	import pystencils as ps

pystencils_tests/test_sum_prod.py

deleted100644 → 0

+0 −132

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_sympy_optimizations.py

deleted100644 → 0

+0 −60

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_types.py

deleted100644 → 0

+0 −62

File deleted.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_vectorization.py

deleted100644 → 0

+0 −161

File deleted.

Preview size limit exceeded, changes collapsed.

pytest.ini

+28 −11

File changed.

Preview size limit exceeded, changes collapsed.

quicktest.py

0 → 100644

+22 −0

Original line number	Original line	Diff line number	Diff line
			#!/usr/bin/env python3

			from contextlib import redirect_stdout
			import io
			from tests.test_quicktests import (
			test_basic_kernel,
			test_basic_blocking_staggered,
			test_basic_vectorization,
			)

			quick_tests = [
			test_basic_kernel,
			test_basic_blocking_staggered,
			test_basic_vectorization,
			]

			if __name__ == "__main__":
			print("Running pystencils quicktests")
			for qt in quick_tests:
			print(f" -> {qt.__name__}")
			with redirect_stdout(io.StringIO()):
			qt()

release.sh

+2 −2

File changed.

Preview size limit exceeded, changes collapsed.

setup.py

+15 −128

File changed.

Preview size limit exceeded, changes collapsed.

pystencils/init.py→src/pystencils/init.py

+40 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

src/pystencils/_version.py

0 → 100644

+683 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils/alignedarray.py→src/pystencils/alignedarray.py

+29 −3

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/assignment.py→src/pystencils/assignment.py

+28 −46

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/astnodes.py→src/pystencils/astnodes.py

+134 −53

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/backends/init.py→src/pystencils/backends/init.py

+0 −6

File changed and moved.

Preview size limit exceeded, changes collapsed.

src/pystencils/backends/arm_instruction_sets.py

0 → 100644

+174 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils/backends/cbackend.py→src/pystencils/backends/cbackend.py

+911 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

src/pystencils/backends/cuda_backend.py

0 → 100644

+68 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils/backends/dot.py→src/pystencils/backends/dot.py

+10 −7

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/backends/json.py→src/pystencils/backends/json.py

+10 −33

File changed and moved.

Preview size limit exceeded, changes collapsed.

src/pystencils/backends/ppc_instruction_sets.py

0 → 100644

+106 −0

File added.

Preview size limit exceeded, changes collapsed.

src/pystencils/backends/riscv_instruction_sets.py

0 → 100644

+111 −0

File added.

Preview size limit exceeded, changes collapsed.

src/pystencils/backends/simd_instruction_sets.py

0 → 100644

+126 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils/backends/simd_instruction_sets.py→src/pystencils/backends/x86_instruction_sets.py

+178 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

src/pystencils/bit_masks.py

0 → 100644

+53 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils/boundaries/init.py→src/pystencils/boundaries/init.py

+0 −0

File moved.

pystencils/boundaries/boundaryconditions.py→src/pystencils/boundaries/boundaryconditions.py

+9 −9

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/boundaries/boundaryhandling.py→src/pystencils/boundaries/boundaryhandling.py

+78 −55

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/boundaries/createindexlist.py→src/pystencils/boundaries/createindexlist.py

+221 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/boundaries/createindexlistcython.pyx→src/pystencils/boundaries/createindexlistcython.pyx

+87 −22

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/boundaries/inkernel.py→src/pystencils/boundaries/inkernel.py

+1 −1

File changed and moved.

Preview size limit exceeded, changes collapsed.

src/pystencils/cache.py

0 → 100644

+72 −0

File added.

Preview size limit exceeded, changes collapsed.

src/pystencils/config.py

0 → 100644

+209 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils/cpu/init.py→src/pystencils/cpu/init.py

+2 −2

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/cpu/cpujit.py→src/pystencils/cpu/cpujit.py

+218 −104

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/cpu/kernelcreation.py→src/pystencils/cpu/kernelcreation.py

+88 −65

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/cpu/msvc_detection.py→src/pystencils/cpu/msvc_detection.py

+1 −1

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/cpu/vectorization.py→src/pystencils/cpu/vectorization.py

+425 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/datahandling/init.py→src/pystencils/datahandling/init.py

+22 −5

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/datahandling/blockiteration.py→src/pystencils/datahandling/blockiteration.py

+5 −5

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/datahandling/datahandling_interface.py→src/pystencils/datahandling/datahandling_interface.py

+66 −2

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/datahandling/parallel_datahandling.py→src/pystencils/datahandling/parallel_datahandling.py

+54 −31

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/datahandling/serial_datahandling.py→src/pystencils/datahandling/serial_datahandling.py

+100 −52

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/datahandling/vtk.py→src/pystencils/datahandling/vtk.py

+0 −0

File moved.

pystencils/display_utils.py→src/pystencils/display_utils.py

+48 −6

File changed and moved.

Preview size limit exceeded, changes collapsed.

src/pystencils/enums.py

0 → 100644

+30 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils/fast_approximation.py→src/pystencils/fast_approximation.py

+12 −2

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/fd/init.py→src/pystencils/fd/init.py

+2 −1

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/fd/derivation.py→src/pystencils/fd/derivation.py

+16 −29

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/fd/derivative.py→src/pystencils/fd/derivative.py

+22 −3

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/fd/finitedifferences.py→src/pystencils/fd/finitedifferences.py

+20 −43

File changed and moved.

Preview size limit exceeded, changes collapsed.

src/pystencils/fd/finitevolumes.py

0 → 100644

+250 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils/fd/spatial.py→src/pystencils/fd/spatial.py

+2 −35

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/field.py→src/pystencils/field.py

+179 −180

File changed and moved.

Preview size limit exceeded, changes collapsed.

src/pystencils/functions.py

0 → 100644

+57 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils/gpucuda/init.py→src/pystencils/gpu/init.py

+9 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

src/pystencils/gpu/gpu_array_handler.py

0 → 100644

+98 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils/gpucuda/cudajit.py→src/pystencils/gpu/gpujit.py

+56 −51

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/gpucuda/indexing.py→src/pystencils/gpu/indexing.py

+431 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/gpucuda/kernelcreation.py→src/pystencils/gpu/kernelcreation.py

+98 −57

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/gpucuda/periodicity.py→src/pystencils/gpu/periodicity.py

+17 −11

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/include/init.py→src/pystencils/include/init.py

+5 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

src/pystencils/include/aesni_rand.h

0 → 100644

+1242 −0

File added.

Preview size limit exceeded, changes collapsed.

src/pystencils/include/arm_neon_helpers.h

0 → 100644

+126 −0

File added.

Preview size limit exceeded, changes collapsed.

src/pystencils/include/gpu_defines.h

0 → 100644

+44 −0

File added.

Preview size limit exceeded, changes collapsed.

src/pystencils/include/half_precision.h

0 → 100644

+49 −0

File added.

Preview size limit exceeded, changes collapsed.

src/pystencils/include/myintrin.h

0 → 100644

+130 −0

File added.

Preview size limit exceeded, changes collapsed.

src/pystencils/include/philox_rand.h

0 → 100644

+1400 −0

File added.

Preview size limit exceeded, changes collapsed.

src/pystencils/include/ppc_altivec_helpers.h

0 → 100644

+82 −0

File added.

Preview size limit exceeded, changes collapsed.

src/pystencils/include/riscv_v_helpers.h

0 → 100644

+80 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils/integer_functions.py→src/pystencils/integer_functions.py

+18 −7

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/integer_set_analysis.py→src/pystencils/integer_set_analysis.py

+8 −7

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/jupyter.py→src/pystencils/jupyter.py

+0 −61

File changed and moved.

Preview size limit exceeded, changes collapsed.

src/pystencils/kernel_contrains_check.py

0 → 100644

+141 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils/kernel_decorator.py→src/pystencils/kernel_decorator.py

+78 −20

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/kernel_wrapper.py→src/pystencils/kernel_wrapper.py

+22 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/kernelcreation.py→src/pystencils/kernelcreation.py

+394 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

src/pystencils/node_collection.py

0 → 100644

+84 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils/placeholder_function.py→src/pystencils/placeholder_function.py

+1 −1

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/plot.py→src/pystencils/plot.py

+0 −0

File moved.

src/pystencils/rng.py

0 → 100644

+128 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils/runhelper/init.py→src/pystencils/runhelper/init.py

+0 −0

File moved.

pystencils/runhelper/db.py→src/pystencils/runhelper/db.py

+63 −4

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/runhelper/parameterstudy.py→src/pystencils/runhelper/parameterstudy.py

+7 −5

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/session.py→src/pystencils/session.py

+8 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/simp/init.py→src/pystencils/simp/init.py

+11 −3

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/simp/assignment_collection.py→src/pystencils/simp/assignment_collection.py

+82 −30

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/simp/simplifications.py→src/pystencils/simp/simplifications.py

+86 −11

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/simp/simplificationstrategy.py→src/pystencils/simp/simplificationstrategy.py

+3 −3

File changed and moved.

Preview size limit exceeded, changes collapsed.

src/pystencils/simp/subexpression_insertion.py

0 → 100644

+95 −0

File added.

Preview size limit exceeded, changes collapsed.

src/pystencils/simplificationfactory.py

0 → 100644

+18 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils/slicing.py→src/pystencils/slicing.py

+5 −2

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/spatial_coordinates.py→src/pystencils/spatial_coordinates.py

+0 −0

File moved.

pystencils/stencil.py→src/pystencils/stencil.py

+51 −9

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/sympyextensions.py→src/pystencils/sympyextensions.py

+146 −27

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils/timeloop.py→src/pystencils/timeloop.py

+0 −0

File moved.

pystencils/transformations.py→src/pystencils/transformations.py

+225 −485

File changed and moved.

Preview size limit exceeded, changes collapsed.

src/pystencils/typing/init.py

0 → 100644

+16 −0

File added.

Preview size limit exceeded, changes collapsed.

src/pystencils/typing/cast_functions.py

0 → 100644

+131 −0

File added.

Preview size limit exceeded, changes collapsed.

src/pystencils/typing/leaf_typing.py

0 → 100644

+256 −0

File added.

Preview size limit exceeded, changes collapsed.

src/pystencils/typing/transformations.py

0 → 100644

+27 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils/kernelparameters.py→src/pystencils/typing/typed_sympy.py

+197 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

src/pystencils/typing/types.py

0 → 100644

+321 −0

File added.

Preview size limit exceeded, changes collapsed.

src/pystencils/typing/utilities.py

0 → 100644

+234 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils/utils.py→src/pystencils/utils.py

+60 −41

File changed and moved.

Preview size limit exceeded, changes collapsed.

tests/init.py

0 → 100644

+0 −0

Original line number	Original line	Diff line number	Diff line

tests/test_Min_Max.py

0 → 100644

+96 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_abs.py

0 → 100644

+22 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_address_of.py

0 → 100644

+50 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_aligned_array.py→tests/test_aligned_array.py

+71 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

tests/test_assignment_collection.py

0 → 100644

+188 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_assignment_collection_dict_conversion.py→tests/test_assignment_collection_dict_conversion.py

+0 −0

File moved.

pystencils_tests/test_assignment_from_stencil.py→tests/test_assignment_from_stencil.py

+0 −0

File moved.

tests/test_astnodes.py

0 → 100644

+88 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_augmented_assignment.py

0 → 100644

+35 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_base_pointer_specification.py

0 → 100644

+54 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_bit_masks.py

0 → 100644

+45 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_blocking.py→tests/test_blocking.py

+20 −1

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_blocking_staggered.py→tests/test_blocking_staggered.py

+3 −1

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_boundary.py→tests/test_boundary.py

+246 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

tests/test_boundary_indexlist_creation.py

0 → 100644

+121 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_buffer.py→tests/test_buffer.py

+83 −12

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_buffer_gpu.py→tests/test_buffer_gpu.py

+142 −33

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_conditional_field_access.py→tests/test_conditional_field_access.py

+13 −15

File changed and moved.

Preview size limit exceeded, changes collapsed.

tests/test_conditional_vec.py

0 → 100644

+159 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_config.py

0 → 100644

+124 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_create_kernel_config.py

0 → 100644

+46 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_custom_backends.py→tests/test_custom_backends.py

+51 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

tests/test_data/datahandling_load_test.npz

0 → 100644

+410 B

File added.

Preview size limit exceeded, changes collapsed.

View file

tests/test_data/datahandling_parallel_load_test/dst.dat

0 → 100644

+304 B

File added.

Preview size limit exceeded, changes collapsed.

View file

tests/test_data/datahandling_parallel_load_test/src.dat

0 → 100644

+304 B

File added.

Preview size limit exceeded, changes collapsed.

View file

tests/test_data/datahandling_parallel_save_test/dst.dat

0 → 100644

+304 B

File added.

Preview size limit exceeded, changes collapsed.

View file

tests/test_data/datahandling_parallel_save_test/src.dat

0 → 100644

+304 B

File added.

Preview size limit exceeded, changes collapsed.

View file

tests/test_data/datahandling_save_test.npz

0 → 100644

+410 B

File added.

Preview size limit exceeded, changes collapsed.

View file

pystencils_tests/test_data/lenna.png→tests/test_data/lenna.png

(463 KiB)

File moved.

View file

pystencils_tests/test_data/test_vessel2d_mask.png→tests/test_data/test_vessel2d_mask.png

(7.43 KiB)

File moved.

View file

pystencils_tests/test_datahandling.py→tests/test_datahandling.py

+411 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

tests/test_datahandling_parallel.py

0 → 100644

+199 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_derivative.py→tests/test_derivative.py

+0 −0

File moved.

tests/test_dot_printer.ipynb

0 → 100644

+216 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_dot_printer.py

0 → 100644

+13 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_dtype_check.py→tests/test_dtype_check.py

+0 −0

File moved.

pystencils_tests/test_fast_approximation.py→tests/test_fast_approximation.py

+12 −6

File changed and moved.

Preview size limit exceeded, changes collapsed.

tests/test_fd_derivation.ipynb

0 → 100644

+353 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_fd_derivation_via_rotation.ipynb→tests/test_fd_derivation_via_rotation.ipynb

+1 −1

File changed and moved.

Preview size limit exceeded, changes collapsed.

tests/test_fd_derivative.py

0 → 100644

+24 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_field.py

0 → 100644

+304 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_field_access_poly.py

0 → 100644

+35 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_field_equality.ipynb→tests/test_field_equality.ipynb

+8 −9

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_finite_differences.py→tests/test_finite_differences.py

+31 −4

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_floor_ceil_int_optimization.py→tests/test_floor_ceil_int_optimization.py

+1 −1

File changed and moved.

Preview size limit exceeded, changes collapsed.

tests/test_fvm.py

0 → 100644

+641 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_global_definitions.py→tests/test_global_definitions.py

+3 −3

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_cudagpu.py→tests/test_gpu.py

+257 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

tests/test_half_precision.py

0 → 100644

+41 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_helpful_errors.py

0 → 100644

+37 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_indexed_kernels.py

0 → 100644

+79 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_jacobi_cbackend.py→tests/test_jacobi_cbackend.py

+9 −8

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_json_backend.py→tests/test_json_backend.py

+11 −3

File changed and moved.

Preview size limit exceeded, changes collapsed.

tests/test_json_serializer.py

0 → 100644

+28 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_jupyter_extensions.ipynb

0 → 100644

+260 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_logarithm.py

0 → 100644

+26 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_loop_cutting.py→tests/test_loop_cutting.py

+18 −5

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_match_subs_for_assignment_collection.py→tests/test_match_subs_for_assignment_collection.py

+2 −2

File changed and moved.

Preview size limit exceeded, changes collapsed.

tests/test_math_functions.py

0 → 100644

+114 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_modulo.py

0 → 100644

+51 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_move_constant_before_loop.py→tests/test_move_constant_before_loop.py

+35 −2

File changed and moved.

Preview size limit exceeded, changes collapsed.

tests/test_nodecollection.py

0 → 100644

+13 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_parameterstudy.py→tests/test_parameterstudy.py

+0 −0

File moved.

tests/test_phasefield_dentritic_3D.ipynb

0 → 100644

+378 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_pickle_support.py→tests/test_pickle_support.py

+1 −1

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_plot.py→tests/test_plot.py

+4 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

tests/test_printing.py

0 → 100644

+104 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_quicktests.py

0 → 100644

+74 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_random.py

0 → 100644

+209 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_sharedmethodcache.py

0 → 100644

+85 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_simplification_strategy.py→tests/test_simplification_strategy.py

+88 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

tests/test_simplifications.py

0 → 100644

+200 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_size_and_layout_checks.py→tests/test_size_and_layout_checks.py

+14 −5

File changed and moved.

Preview size limit exceeded, changes collapsed.

tests/test_sliced_iteration.py

0 → 100644

+106 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_slicing.py

0 → 100644

+96 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_small_block_benchmark.ipynb

0 → 100644

+201 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_source_code_comment.py→tests/test_source_code_comment.py

+5 −2

File changed and moved.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_staggered_kernel.py→tests/test_staggered_kernel.py

+26 −8

File changed and moved.

Preview size limit exceeded, changes collapsed.

tests/test_stencil_plot.ipynb

0 → 100644

+62 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_stencils.py

0 → 100644

+34 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_struct_types.py→tests/test_struct_types.py

+0 −0

File moved.

tests/test_subexpression_insertion.py

0 → 100644

+45 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_sum_prod.py

0 → 100644

+105 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_sympyextensions.py

0 → 100644

+228 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_timeloop.py

0 → 100644

+64 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_transformations.py

0 → 100644

+147 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_type_interference.py

0 → 100644

+33 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_types.py

0 → 100644

+247 −0

File added.

Preview size limit exceeded, changes collapsed.

pystencils_tests/test_fd_derivation.py→tests/test_utils.py

+81 −0

File changed and moved.

Preview size limit exceeded, changes collapsed.

tests/test_vectorization.py

0 → 100644

+386 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_vectorization_specific.py

0 → 100644

+339 −0

File added.

Preview size limit exceeded, changes collapsed.

tests/test_version_string.py

0 → 100644

+11 −0

File added.

Preview size limit exceeded, changes collapsed.

Source

Target

Files

Some changes are not shown.