Skip to content
Snippets Groups Projects
Commit 9577deec authored by Jan Hönig's avatar Jan Hönig
Browse files

Benchmark works again

parent a534e81b
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id:b79814e5-9a10-4b20-a886-c16db785bc48 tags: %% Cell type:markdown id:b79814e5-9a10-4b20-a886-c16db785bc48 tags:
# LLVM-VE Vector Intrinsics # LLVM-VE Vector Intrinsics
- [Tutorial](https://sx-aurora-dev.github.io/ve-intrinsics-tutorial/) - [Tutorial](https://sx-aurora-dev.github.io/ve-intrinsics-tutorial/)
- [List of intrinsics](https://sx-aurora-dev.github.io/velintrin.html) - [List of intrinsics](https://sx-aurora-dev.github.io/velintrin.html)
- [Assembly manual](https://www.hpc.nec/documents/sdk/pdfs/VectorEngine-as-manual-v1.3.pdf) - [Assembly manual](https://www.hpc.nec/documents/sdk/pdfs/VectorEngine-as-manual-v1.3.pdf)
## Vector Register ## Vector Register
- `__vr`: 256 x 64 bit values - `__vr`: 256 x 64 bit values
- 64 registers available (**beware of spilling**) - 64 registers available (**beware of spilling**)
## Intrinsic Functions ## Intrinsic Functions
- format: `_vel_<asm>_<suffix>` - format: `_vel_<asm>_<suffix>`
- `<asm>`: instruction mnemonic in the [assembly manual](https://www.hpc.nec/documents/sdk/pdfs/VectorEngine-as-manual-v1.3.pdf) - `<asm>`: instruction mnemonic in the [assembly manual](https://www.hpc.nec/documents/sdk/pdfs/VectorEngine-as-manual-v1.3.pdf)
- has one suffix which handles the type: `d`: double - has one suffix which handles the type: `d`: double
- `<suffix>`: list of return value and arguments - `<suffix>`: list of return value and arguments
- `v`: vector - `v`: vector
- `s`: scalar - `s`: scalar
- `m` and `M`: mask for 256 and 512 elements - `m` and `M`: mask for 256 and 512 elements
- `l`: vector length - `l`: vector length
## Vector Load/Store ## Vector Load/Store
- 64 bit: `_vel_vld_vssl` - 64 bit: `_vel_vld_vssl`
- 32 bit: upper, lower or "packed" - 32 bit: upper, lower or "packed"
- 2 `s` arguments: stride; base adress - 2 `s` arguments: stride; base adress
## Vector Length and Pass Through Argument ## Vector Length and Pass Through Argument
- `l` defines how many elements are updated - `l` defines how many elements are updated
- all instructions with additional `v` available, which is passed to the non-updated elements (pass-through: `pt`) - all instructions with additional `v` available, which is passed to the non-updated elements (pass-through: `pt`)
## NT-Stores ## NT-Stores
- `nc` variants of memory access intrinsics (e.g. `_vel_vldnc_vssl`) - `nc` variants of memory access intrinsics (e.g. `_vel_vldnc_vssl`)
## Vector Mask ## Vector Mask
- `__vm256`: 256 bit - `__vm256`: 256 bit
- 8 registers - 8 registers
- 0 bit -> no update - 0 bit -> no update
- instructions with `m` suffix - instructions with `m` suffix
## Packed Instructions ## Packed Instructions
- operations on 512 elements of `fp32` or `int32`. - operations on 512 elements of `fp32` or `int32`.
- `p` prefix (e.g. `_vel_pvfadd_vl`) - `p` prefix (e.g. `_vel_pvfadd_vl`)
- TBD... - TBD...
%% Cell type:code id:5d56a49c-5963-4f86-a8d8-0b753a971e52 tags: %% Cell type:code id:5d56a49c-5963-4f86-a8d8-0b753a971e52 tags:
``` python ``` python
from pystencils.session import * from pystencils.session import *
import pystencils as ps import pystencils as ps
``` ```
%% Cell type:code id:9c204a47-a39b-450b-9026-9bb63cf54e83 tags: %% Cell type:code id:9c204a47-a39b-450b-9026-9bb63cf54e83 tags:
``` python ``` python
config = ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': 've'}) config = ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': 've'})
``` ```
%% Cell type:code id:5bbc4118-e1b7-412b-9dc8-7d7d524786df tags: %% Cell type:code id:5bbc4118-e1b7-412b-9dc8-7d7d524786df tags:
``` python ``` python
a, b, c = ps.fields(a=np.ones(4000000), b=np.ones(4000000), c=np.ones(4000000)) a, b, c = ps.fields(a=np.ones(4000000), b=np.ones(4000000), c=np.ones(4000000))
alpha = sp.symbols('alpha') alpha = sp.symbols('alpha')
``` ```
%% Cell type:code id:85641913-f00a-4f95-af69-c7646ad5b3d0 tags: %% Cell type:code id:85641913-f00a-4f95-af69-c7646ad5b3d0 tags:
``` python ``` python
@ps.kernel(config) @ps.kernel_config(config)
def vadd(): def vadd():
a[0] @= b[0] + c[0] a[0] @= b[0] + c[0]
``` ```
%% Cell type:code id:33097862-9f60-4624-a1f2-0fdfd82831bc tags: %% Cell type:code id:33097862-9f60-4624-a1f2-0fdfd82831bc tags:
``` python ``` python
kernel_vadd = ps.create_kernel(**vadd) kernel_vadd = ps.create_kernel(**vadd)
ps.show_code(kernel_vadd) ps.show_code(kernel_vadd)
``` ```
%% Output %% Output
%% Cell type:code id:99aee06b-1704-4d06-b261-cc07a6a7f9a3 tags: %% Cell type:code id:99aee06b-1704-4d06-b261-cc07a6a7f9a3 tags:
``` python ``` python
@ps.kernel(config) @ps.kernel_config(config)
def daxpy(): def daxpy():
b[0] @= alpha * a[0] + b[0] b[0] @= alpha * a[0] + b[0]
``` ```
%% Cell type:code id:e79ab7bb-128e-47ee-9d53-8779c4f55d7e tags: %% Cell type:code id:e79ab7bb-128e-47ee-9d53-8779c4f55d7e tags:
``` python ``` python
kernel_daxpy = ps.create_kernel(**daxpy) kernel_daxpy = ps.create_kernel(**daxpy)
ps.show_code(kernel_daxpy) ps.show_code(kernel_daxpy)
``` ```
%% Output %% Output
%% Cell type:code id:4bb14872-11de-42cc-b5fc-4f995c7a6725 tags: %% Cell type:code id:4bb14872-11de-42cc-b5fc-4f995c7a6725 tags:
``` python ``` python
@ps.kernel(config) @ps.kernel_config(config)
def daxpy_one_off(): def daxpy_one_off():
b[0] @= alpha * a[0] + b[0] b[0] @= alpha * a[0] + b[0]
``` ```
%% Cell type:code id:457cc1fe-1aef-44bd-9fd1-f914e019c933 tags: %% Cell type:code id:457cc1fe-1aef-44bd-9fd1-f914e019c933 tags:
``` python ``` python
kernel_daxpy_one_off = ps.create_kernel(**daxpy_one_off) kernel_daxpy_one_off = ps.create_kernel(**daxpy_one_off)
ps.show_code(kernel_daxpy_one_off) ps.show_code(kernel_daxpy_one_off)
``` ```
%% Output %% Output
%% Cell type:code id:fea2f8e4 tags:
``` python
#from pystencils.kerncraft_coupling import generate_benchmark
```
%% Cell type:code id:4fbd5c90 tags:
``` python
#print(generate_benchmark(kernel_vadd))
```
%% Cell type:code id:ae05beb9-745f-4885-ad4f-667aae867f05 tags: %% Cell type:code id:ae05beb9-745f-4885-ad4f-667aae867f05 tags:
``` python ``` python
from pystencils.benchmark import kernel_header, kernel_source, generate_benchmark from pystencils_benchmark import kernel_header, kernel_source, generate_benchmark
from pathlib import Path from pathlib import Path
``` ```
%% Cell type:code id:7dfa86e1-8962-47df-900c-3133883a19c7 tags: %% Cell type:code id:7dfa86e1-8962-47df-900c-3133883a19c7 tags:
``` python ``` python
example_path = Path.cwd() / 'example' example_path = Path.cwd() / 'example'
example_path.mkdir(parents=True, exist_ok=True)
``` ```
%% Cell type:code id:dd39502f-d935-4b88-b8c8-896b1a294bf9 tags: %% Cell type:code id:dd39502f-d935-4b88-b8c8-896b1a294bf9 tags:
``` python ``` python
generate_benchmark(kernel_vadd, example_path) generate_benchmark([kernel_vadd, kernel_daxpy, kernel_daxpy_one_off], example_path)
``` ```
%% Output
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_209040/990452426.py in <module>
----> 1 generate_benchmark(kernel_vadd, example_path)
2
~/git/pystencils/pystencils/benchmark/benchmark.py in generate_benchmark(kernel_ast, path, dialect, kernel_name)
13 kernel_name = kernel_ast.function_name
14
---> 15 header = kernel_header(kernel_ast, dialect)
16 with open(path / f'{kernel_name}.h', 'w+') as f:
17 f.write(header)
~/git/pystencils/pystencils/benchmark/benchmark.py in kernel_header(kernel_ast, dialect)
23
24 def kernel_header(kernel_ast, dialect='c'):
---> 25 function_signature = generate_c(kernel_ast, dialect=dialect, signature_only=True)
26
27 jinja_context = {
~/git/pystencils/pystencils/backends/cbackend.py in generate_c(ast_node, signature_only, dialect, custom_backend, with_globals)
76 printer = OpenClBackend(signature_only=signature_only)
77 else:
---> 78 raise ValueError(f'Unknown {dialect=}')
79 code = printer(ast_node)
80 if not signature_only and isinstance(ast_node, KernelFunction):
ValueError: Unknown dialect='c'
%% Cell type:code id:f084548f-3a8e-4d45-8c7e-5845013b222e tags: %% Cell type:code id:f084548f-3a8e-4d45-8c7e-5845013b222e tags:
``` python ``` python
# Examples: # Examples:
generate_benchmark(kernel_daxpy, example_path) # generate_benchmark(kernel_daxpy, example_path)
generate_benchmark(kernel_vadd, example_path) # generate_benchmark(kernel_vadd, example_path)
generate_benchmark(kernel_daxpy_one_off, example_path) # generate_benchmark(kernel_daxpy_one_off, example_path)
``` ```
%% Cell type:code id:954b5ec7-a88a-4723-bd7f-227a3919b32d tags: %% Cell type:code id:954b5ec7-a88a-4723-bd7f-227a3919b32d tags:
``` python ``` python
``` ```
%% Cell type:code id:f51f2aff-f232-4de9-a0c4-94d3682eff86 tags: %% Cell type:code id:f51f2aff-f232-4de9-a0c4-94d3682eff86 tags:
``` python ``` python
``` ```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment