Skip to content
Snippets Groups Projects
Commit 3a8e49af authored by Rafael Ravedutti's avatar Rafael Ravedutti
Browse files

Generate OpenMP code (still not working)

parent e7f04d1a
No related branches found
No related tags found
No related merge requests found
......@@ -8,7 +8,7 @@ PYCMD=python3
CC=mpicc
#CC=mpiicpx
#CC=mpiicpc
CFLAGS=-Ofast -march=core-avx2 ${MPI_FLAGS} ${LIKWID_FLAGS}
CFLAGS=-Ofast -march=core-avx2 -fopenmp ${MPI_FLAGS} ${LIKWID_FLAGS}
#CFLAGS=-Ofast -xHost -qopt-zmm-usage=high ${MPI_FLAGS} ${LIKWID_FLAGS}
#CFLAGS=-Ofast -xCORE-AVX512 -qopt-zmm-usage=high ${MPI_FLAGS} ${LIKWID_FLAGS}
DEBUG_FLAGS=
......
......@@ -25,6 +25,29 @@ __host__ void copy_slice_to_host(const void *d_ptr, void *h_ptr, size_t offset,
__host__ void copy_static_symbol_to_device(void *h_ptr, const void *d_ptr, size_t count);
__host__ void copy_static_symbol_to_host(void *d_ptr, const void *h_ptr, size_t count);
#ifdef PAIRS_TARGET_OPENMP
#include <omp.h>
inline __host__ int host_atomic_add(int *addr, int val) {
int result;
#pragma omp critical
{
*addr += val;
result = *addr;
}
return result - val;
}
inline __host__ real_t host_atomic_add(real_t *addr, real_t val) {
real_t result;
#pragma omp critical
{
*addr += val;
result = *addr;
}
return result - val;
}
#else
inline __host__ int host_atomic_add(int *addr, int val) {
*addr += val;
return *addr - val;
......@@ -35,6 +58,7 @@ inline __host__ real_t host_atomic_add(real_t *addr, real_t val) {
*addr += val;
return tmp;
}
#endif
inline __host__ int host_atomic_add_resize_check(int *addr, int val, int *resize, int capacity) {
const int add_res = *addr + val;
......
......@@ -21,7 +21,10 @@ def simulation(
CGen(ref, debug), shapes, dims, timesteps, double_prec, use_contact_history,
particle_capacity, neighbor_capacity)
def target_cpu():
def target_cpu(parallel=False):
if parallel:
return Target(Target.Backend_CPP, [Target.Feature_CPU, Target.Feature_OpenMP])
return Target(Target.Backend_CPP, Target.Feature_CPU)
def target_gpu():
......
......@@ -62,6 +62,10 @@ class CGen:
if self.target.is_gpu():
self.print("#define PAIRS_TARGET_CUDA")
if self.target.is_openmp():
self.print("#define PAIRS_TARGET_OPENMP")
self.print("#include <omp.h>")
self.print("#include <limits.h>")
self.print("#include <math.h>")
self.print("#include <stdbool.h>")
......@@ -507,6 +511,10 @@ class CGen:
iterator = self.generate_expression(ast_node.iterator)
lower_range = self.generate_expression(ast_node.min)
upper_range = self.generate_expression(ast_node.max)
if self.target.is_openmp() and ast_node.is_kernel_candidate():
self.print("#pragma omp parallel for")
self.print(f"for(int {iterator} = {lower_range}; {iterator} < {upper_range}; {iterator}++) {{")
self.generate_statement(ast_node.block)
self.print("}")
......
......@@ -14,6 +14,7 @@ class Target:
Feature_AVX2 = 3
Feature_AVX512 = 4
Feature_GPU = 5
Feature_OpenMP = 6
# Operating system
OS_Unknown = 0
......@@ -37,3 +38,6 @@ class Target:
def is_gpu(self):
return self.has_feature(Target.Feature_GPU)
def is_openmp(self):
return self.has_feature(Target.Feature_OpenMP)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment