From fa0f36f260277856fa4728f623e0d08f23b5cf0e Mon Sep 17 00:00:00 2001 From: Behzad Safaei <iwia103h@a0522.nhr.fau.de> Date: Wed, 23 Oct 2024 14:32:10 +0200 Subject: [PATCH] Debugging segfault (not solved) --- CMakeLists.txt | 6 ++++++ examples/dem_sd.py | 14 +++++++------- runtime/array.hpp | 14 +++++++------- runtime/contact_property.hpp | 16 ++++++++-------- runtime/devices/cuda.cu | 3 +++ runtime/feature_property.hpp | 14 +++++++------- runtime/property.hpp | 18 +++++++++--------- src/pairs/code_gen/cgen.py | 26 +++++++++++++++++++++----- src/pairs/code_gen/printer.py | 3 +++ src/pairs/ir/kernel.py | 2 +- src/pairs/sim/comm.py | 3 ++- 11 files changed, 74 insertions(+), 45 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8b82269..e5e64f5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -108,6 +108,12 @@ if(COMPILE_CUDA) set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH}) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -rdc=true") + if(CMAKE_BUILD_TYPE STREQUAL "Debug") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -g -G -O0 -DDEBUG") + else() + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O3") + endif() + if(USE_WALBERLA) waLBerla_add_executable( NAME ${TARGET_BIN} diff --git a/examples/dem_sd.py b/examples/dem_sd.py index 3a8bdfa..5bea2fd 100644 --- a/examples/dem_sd.py +++ b/examples/dem_sd.py @@ -78,7 +78,7 @@ frictionCoefficient = 0.5 restitutionCoefficient = 0.1 collisionTime_SI = 5e-4 poissonsRatio = 0.22 -timeSteps = 10000 +timeSteps = 1 visSpacing = 200 denseBottomLayer = False bottomLayerOffsetFactor = 1.0 @@ -101,7 +101,7 @@ psim = pairs.simulation( use_contact_history=False, particle_capacity=1000000, neighbor_capacity=20, - debug=True, generate_whole_program=False) + debug=True, generate_whole_program=True) if target == 'gpu': psim.target(pairs.target_gpu()) @@ -115,10 +115,10 @@ psim.add_property('linear_velocity', pairs.vector()) psim.add_property('angular_velocity', pairs.vector()) psim.add_property('force', pairs.vector(), volatile=True) psim.add_property('torque', pairs.vector(), volatile=True) -psim.add_property('hydrodynamic_force', pairs.vector()) -psim.add_property('hydrodynamic_torque', pairs.vector()) -psim.add_property('old_hydrodynamic_force', pairs.vector()) -psim.add_property('old_hydrodynamic_torque', pairs.vector()) +# psim.add_property('hydrodynamic_force', pairs.vector()) +# psim.add_property('hydrodynamic_torque', pairs.vector()) +# psim.add_property('old_hydrodynamic_force', pairs.vector()) +# psim.add_property('old_hydrodynamic_torque', pairs.vector()) psim.add_property('radius', pairs.real(), 1.0) psim.add_property('normal', pairs.vector()) psim.add_property('inv_inertia', pairs.matrix()) @@ -134,7 +134,7 @@ psim.set_domain([0.0, 0.0, 0.0, domainSize_SI[0], domainSize_SI[1], domainSize_S psim.set_domain_partitioner(pairs.regular_domain_partitioner_xy()) psim.pbc([False, False, False]) psim.dem_sc_grid( - domainSize_SI[0], domainSize_SI[1], domainSize_SI[2]/2, generationSpacing_SI, + domainSize_SI[0], domainSize_SI[1], domainSize_SI[2], generationSpacing_SI, diameter_SI, minDiameter_SI, maxDiameter_SI, initialVelocity_SI, densityParticle_SI, ntypes) #psim.read_particle_data( diff --git a/runtime/array.hpp b/runtime/array.hpp index 03a5385..d422842 100644 --- a/runtime/array.hpp +++ b/runtime/array.hpp @@ -24,14 +24,14 @@ public: PAIRS_ASSERT(size_ > 0); } - array_t getId() { return id; } - std::string getName() { return name; } - void *getHostPointer() { return h_ptr; } - void *getDevicePointer() { return d_ptr; } + array_t getId() const { return id; } + std::string getName() const { return name; } + void *getHostPointer() const { return h_ptr; } + void *getDevicePointer() const { return d_ptr; } void setPointers(void *h_ptr_, void *d_ptr_) { h_ptr = h_ptr_, d_ptr = d_ptr_; } - void setSize(size_t size_) { size = size_; } - size_t getSize() { return size; }; - bool isStatic() { return is_static; } + void setSize(size_t size_) { size = size_;} + size_t getSize() const { return size; } + bool isStatic() const { return is_static; } }; } diff --git a/runtime/contact_property.hpp b/runtime/contact_property.hpp index ff27343..be31b51 100644 --- a/runtime/contact_property.hpp +++ b/runtime/contact_property.hpp @@ -26,16 +26,16 @@ public: PAIRS_ASSERT(type != Prop_Invalid && layout_ != Invalid && sx_ > 0 && sy_ > 0); } - property_t getId() { return id; } - std::string getName() { return name; } - void *getHostPointer() { return h_ptr; } - void *getDevicePointer() { return d_ptr; } + property_t getId() const { return id; } + std::string getName() const { return name; } + void *getHostPointer() const { return h_ptr; } + void *getDevicePointer() const { return d_ptr; } void setPointers(void *h_ptr_, void *d_ptr_) { h_ptr = h_ptr_, d_ptr = d_ptr_; } void setSizes(size_t sx_, size_t sy_) { sx = sx_, sy = sy_; } - size_t getTotalSize() { return sx * sy * getPrimitiveTypeSize(); }; - PropertyType getType() { return type; } - layout_t getLayout() { return layout; } - size_t getPrimitiveTypeSize() { + size_t getTotalSize() const { return sx * sy * getPrimitiveTypeSize(); }; + PropertyType getType() const { return type; } + layout_t getLayout() const { return layout; } + size_t getPrimitiveTypeSize() const { return (type == Prop_Integer) ? sizeof(int) : (type == Prop_UInt64) ? sizeof(unsigned long long int) : (type == Prop_Real) ? sizeof(real_t) : diff --git a/runtime/devices/cuda.cu b/runtime/devices/cuda.cu index 38848b2..7ee69c1 100644 --- a/runtime/devices/cuda.cu +++ b/runtime/devices/cuda.cu @@ -96,6 +96,9 @@ __device__ int atomic_add(int *addr, int val) { return atomicAdd(addr, val); } __device__ real_t atomic_add(real_t *addr, real_t val) { return atomicAdd_double(addr, val); } __device__ int atomic_add_resize_check(int *addr, int val, int *resize, int capacity) { const int add_res = *addr + val; + + printf("atomic_add_resize_check::: add_res %d --- val %d --- capacity %d --- resize %d\n", add_res, val, capacity, *resize); + if(add_res >= capacity) { *resize = add_res; return *addr; diff --git a/runtime/feature_property.hpp b/runtime/feature_property.hpp index 025a514..e3221b4 100644 --- a/runtime/feature_property.hpp +++ b/runtime/feature_property.hpp @@ -22,13 +22,13 @@ public: nkinds(nkinds_), array_size(array_size_) {} - property_t getId() { return id; } - std::string getName() { return name; } - void *getHostPointer() { return h_ptr; } - void *getDevicePointer() { return d_ptr; } - PropertyType getType() { return type; } - size_t getNumberOfKinds() { return nkinds; } - size_t getArraySize() { return array_size; } + property_t getId() const { return id; } + std::string getName() const { return name; } + void *getHostPointer() const { return h_ptr; } + void *getDevicePointer() const { return d_ptr; } + PropertyType getType() const { return type; } + size_t getNumberOfKinds() const { return nkinds; } + size_t getArraySize() const { return array_size; } }; } diff --git a/runtime/property.hpp b/runtime/property.hpp index fc08280..301a46b 100644 --- a/runtime/property.hpp +++ b/runtime/property.hpp @@ -31,17 +31,17 @@ public: PAIRS_ASSERT(type != Prop_Invalid && layout_ != Invalid && sx_ > 0 && sy_ > 0); } - property_t getId() { return id; } - std::string getName() { return name; } - void *getHostPointer() { return h_ptr; } - void *getDevicePointer() { return d_ptr; } + property_t getId() const { return id; } + std::string getName() const { return name; } + void *getHostPointer() const { return h_ptr; } + void *getDevicePointer() const { return d_ptr; } void setPointers(void *h_ptr_, void *d_ptr_) { h_ptr = h_ptr_, d_ptr = d_ptr_; } void setSizes(size_t sx_, size_t sy_) { sx = sx_, sy = sy_; } - size_t getTotalSize() { return sx * sy * getPrimitiveTypeSize(); }; - PropertyType getType() { return type; } - layout_t getLayout() { return layout; } - int isVolatile() { return vol != 0; } - size_t getPrimitiveTypeSize() { + size_t getTotalSize() const { return sx * sy * getPrimitiveTypeSize(); }; + PropertyType getType() const { return type; } + layout_t getLayout() const { return layout; } + int isVolatile() const { return vol != 0; } + size_t getPrimitiveTypeSize() const { return (type == Prop_Integer) ? sizeof(int) : (type == Prop_UInt64) ? sizeof(unsigned long long int) : (type == Prop_Real) ? sizeof(real_t) : diff --git a/src/pairs/code_gen/cgen.py b/src/pairs/code_gen/cgen.py index bfdd48c..8af9bba 100644 --- a/src/pairs/code_gen/cgen.py +++ b/src/pairs/code_gen/cgen.py @@ -254,21 +254,21 @@ class CGen: def generate_pairs_object_structure(self): self.print("") - + externkw = "" if self.sim._generate_whole_program else "extern " if self.target.is_gpu(): for array in self.sim.arrays.statics(): if array.device_flag: t = array.type() tkw = Types.c_keyword(self.sim, t) size = self.generate_expression(ScalarOp.inline(array.alloc_size())) - self.print(f"extern __constant__ {tkw} d_{array.name()}[{size}];") + self.print(f"{externkw}__constant__ {tkw} d_{array.name()}[{size}];") for feature_prop in self.sim.feature_properties: if feature_prop.device_flag: t = feature_prop.type() tkw = Types.c_keyword(self.sim, t) size = feature_prop.array_size() - self.print(f"extern __constant__ {tkw} d_{feature_prop.name()}[{size}];") + self.print(f"{externkw}__constant__ {tkw} d_{feature_prop.name()}[{size}];") self.print("") self.print("struct PairsObjects {") @@ -501,7 +501,7 @@ class CGen: device_cond = module.run_on_device and self.target.is_gpu() if self.debug: - self.print(f"PAIRS_DEBUG(\"{module.name}\\n\");") + self.print(f"PAIRS_DEBUG(\"\\n{module.name}\\n\");") for var in module.read_only_variables(): type_kw = Types.c_keyword(self.sim, var.type()) @@ -519,7 +519,7 @@ class CGen: type_kw = Types.c_keyword(self.sim, array.type()) name = array.name() if not device_cond else f"d_{array.name()}" # self.generate_full_object_names = True - if not array.is_static() or (array.is_static() and not module.run_on_device): + if not array.is_static() or (array.is_static() and not device_cond): self.print(f"{type_kw} *{array.name()} = pobj->{name};") # self.print(f"{type_kw} *{array.name()} = {self.generate_object_reference(array, device=device_cond)};") # self.generate_full_object_names = False @@ -563,6 +563,7 @@ class CGen: def generate_kernel(self, kernel): kernel_params = "int range_start" + has_resizes = False for var in kernel.read_only_variables(): type_kw = Types.c_keyword(self.sim, var.type()) decl = f"{type_kw} {var.name()}" @@ -579,6 +580,8 @@ class CGen: type_kw = Types.c_keyword(self.sim, array.type()) decl = f"{type_kw} *{array.name()}" kernel_params += f", {decl}" + if array.name() == "resizes": + has_resizes = True for prop in kernel.properties(): type_kw = Types.c_keyword(self.sim, prop.type()) @@ -611,7 +614,15 @@ class CGen: self.print(f" const int {kernel.iterator.name()} = blockIdx.x * blockDim.x + threadIdx.x + range_start;") self.print.add_indent(4) self.kernel_context = True + + if has_resizes: + self.print(f"printf(\"{kernel.name} @@@@@@@@ before kernel: resizes[0] = %d\\n\", resizes[0]);") + self.generate_statement(kernel.block) + + if has_resizes: + self.print(f"printf(\"{kernel.name} @@@@@@@@ after kernel: resizes[0] = %d\\n\", resizes[0]);") + self.kernel_context = False self.print.add_indent(-4) self.print("}") @@ -652,7 +663,10 @@ class CGen: if ast_node.check_for_resize(): resize = self.generate_expression(ast_node.resize) capacity = self.generate_expression(ast_node.capacity) + self.print(f"printf (\" %d -- before AtomicInc: nsend = %d -- send_capacity = %d -- resizes[0] = %d\\n\", {Printer.line_id}, {elem}, {capacity}, {resize});") self.print(f"pairs::{prefix}atomic_add_resize_check(&({elem}), {value}, &({resize}), {capacity});") + self.print(f"printf (\" %d -- after AtomicInc: nsend = %d -- send_capacity = %d -- resizes[0] = %d\\n\", {Printer.line_id}, {elem}, {capacity}, {resize});") + else: self.print(f"pairs::{prefix}atomic_add(&({elem}), {value});") @@ -852,7 +866,9 @@ class CGen: self.print(f"pairs_runtime->copyArrayTo{ctx_suffix}({array_id}, {action}, {size}); // {array_name}") else: + self.print(f"std::cout<< \"{Printer.line_id} -- before {array_name} copyArrayTo{ctx_suffix}({action}) === \" << pobj->{array_name}[0] << \" \" << pobj->{array_name}[1] << \" \" << pobj->{array_name}[2] << std::endl;") self.print(f"pairs_runtime->copyArrayTo{ctx_suffix}({array_id}, {action}); // {array_name}") + self.print(f"std::cout<< \"{Printer.line_id} -- after {array_name} copyArrayTo{ctx_suffix}({action}) === \" << pobj->{array_name}[0] << \" \" << pobj->{array_name}[1] << \" \" << pobj->{array_name}[2] << std::endl;") if isinstance(ast_node, CopyContactProperty): prop_id = ast_node.contact_prop().id() diff --git a/src/pairs/code_gen/printer.py b/src/pairs/code_gen/printer.py index 4d73b76..3eabef6 100644 --- a/src/pairs/code_gen/printer.py +++ b/src/pairs/code_gen/printer.py @@ -1,4 +1,6 @@ class Printer: + + line_id = 0 def __init__(self, output): self.output = output self.stream = None @@ -16,4 +18,5 @@ class Printer: def __call__(self, text): assert self.stream is not None, "Invalid stream!" + Printer.line_id += 1 self.stream.write(self.indent * ' ' + text + '\n') diff --git a/src/pairs/ir/kernel.py b/src/pairs/ir/kernel.py index 04def29..d40be9e 100644 --- a/src/pairs/ir/kernel.py +++ b/src/pairs/ir/kernel.py @@ -190,7 +190,7 @@ class KernelLaunch(ASTNode): self._iterator = iterator self._range_min = range_min self._range_max = range_max - self._threads_per_block = Lit.cvt(sim, 32) + self._threads_per_block = Lit.cvt(sim, 1) self._nelems = (range_max - range_min) self._nblocks = (self._nelems + self._threads_per_block - 1) / self._threads_per_block diff --git a/src/pairs/sim/comm.py b/src/pairs/sim/comm.py index dace3c9..988e71d 100644 --- a/src/pairs/sim/comm.py +++ b/src/pairs/sim/comm.py @@ -9,7 +9,7 @@ from pairs.ir.contexts import Contexts from pairs.ir.device import CopyArray from pairs.ir.functions import Call_Void from pairs.ir.loops import For, ParticleFor, While -from pairs.ir.print import Print +from pairs.ir.print import Print, PrintCode from pairs.ir.select import Select from pairs.ir.sizeof import Sizeof from pairs.ir.types import Types @@ -240,6 +240,7 @@ class DetermineGhostParticles(Lowerable): self.sim.check_resize(self.comm.send_capacity, nsend) #self.sim.check_resize(self.comm.send_capacity, nsend_all) + PrintCode(self.sim, f"std::cout << \"resizes[0] {self.sim._module_name} ========== \" << pobj->resizes[0] << std::endl;") if is_exchange: for i in ParticleFor(self.sim): Assign(self.sim, exchg_flag[i], 0) -- GitLab