diff --git a/CMakeLists.txt b/CMakeLists.txt index 8b8226914ebd09be148e22b13ae30acdf4d5ccad..e5e64f58adc9305ca50a519910012e440bf6a805 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -108,6 +108,12 @@ if(COMPILE_CUDA) set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH}) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -rdc=true") + if(CMAKE_BUILD_TYPE STREQUAL "Debug") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -g -G -O0 -DDEBUG") + else() + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O3") + endif() + if(USE_WALBERLA) waLBerla_add_executable( NAME ${TARGET_BIN} diff --git a/examples/dem_sd.py b/examples/dem_sd.py index 3a8bdfa527935e4eff5f74bd6f7c9b44f3903e83..5bea2fd80c594e2da27cf99393804b9d5cd40c86 100644 --- a/examples/dem_sd.py +++ b/examples/dem_sd.py @@ -78,7 +78,7 @@ frictionCoefficient = 0.5 restitutionCoefficient = 0.1 collisionTime_SI = 5e-4 poissonsRatio = 0.22 -timeSteps = 10000 +timeSteps = 1 visSpacing = 200 denseBottomLayer = False bottomLayerOffsetFactor = 1.0 @@ -101,7 +101,7 @@ psim = pairs.simulation( use_contact_history=False, particle_capacity=1000000, neighbor_capacity=20, - debug=True, generate_whole_program=False) + debug=True, generate_whole_program=True) if target == 'gpu': psim.target(pairs.target_gpu()) @@ -115,10 +115,10 @@ psim.add_property('linear_velocity', pairs.vector()) psim.add_property('angular_velocity', pairs.vector()) psim.add_property('force', pairs.vector(), volatile=True) psim.add_property('torque', pairs.vector(), volatile=True) -psim.add_property('hydrodynamic_force', pairs.vector()) -psim.add_property('hydrodynamic_torque', pairs.vector()) -psim.add_property('old_hydrodynamic_force', pairs.vector()) -psim.add_property('old_hydrodynamic_torque', pairs.vector()) +# psim.add_property('hydrodynamic_force', pairs.vector()) +# psim.add_property('hydrodynamic_torque', pairs.vector()) +# psim.add_property('old_hydrodynamic_force', pairs.vector()) +# psim.add_property('old_hydrodynamic_torque', pairs.vector()) psim.add_property('radius', pairs.real(), 1.0) psim.add_property('normal', pairs.vector()) psim.add_property('inv_inertia', pairs.matrix()) @@ -134,7 +134,7 @@ psim.set_domain([0.0, 0.0, 0.0, domainSize_SI[0], domainSize_SI[1], domainSize_S psim.set_domain_partitioner(pairs.regular_domain_partitioner_xy()) psim.pbc([False, False, False]) psim.dem_sc_grid( - domainSize_SI[0], domainSize_SI[1], domainSize_SI[2]/2, generationSpacing_SI, + domainSize_SI[0], domainSize_SI[1], domainSize_SI[2], generationSpacing_SI, diameter_SI, minDiameter_SI, maxDiameter_SI, initialVelocity_SI, densityParticle_SI, ntypes) #psim.read_particle_data( diff --git a/runtime/array.hpp b/runtime/array.hpp index 03a538543ee1f465104b101ea7bb353d527c4f78..d422842f96cbdb6343c11ca370d76a3f94be29c3 100644 --- a/runtime/array.hpp +++ b/runtime/array.hpp @@ -24,14 +24,14 @@ public: PAIRS_ASSERT(size_ > 0); } - array_t getId() { return id; } - std::string getName() { return name; } - void *getHostPointer() { return h_ptr; } - void *getDevicePointer() { return d_ptr; } + array_t getId() const { return id; } + std::string getName() const { return name; } + void *getHostPointer() const { return h_ptr; } + void *getDevicePointer() const { return d_ptr; } void setPointers(void *h_ptr_, void *d_ptr_) { h_ptr = h_ptr_, d_ptr = d_ptr_; } - void setSize(size_t size_) { size = size_; } - size_t getSize() { return size; }; - bool isStatic() { return is_static; } + void setSize(size_t size_) { size = size_;} + size_t getSize() const { return size; } + bool isStatic() const { return is_static; } }; } diff --git a/runtime/contact_property.hpp b/runtime/contact_property.hpp index ff273434223e15d9c8dd17b23600be7dfe351ffa..be31b51a3dd22fdd5381da8f345d8608bd1666e4 100644 --- a/runtime/contact_property.hpp +++ b/runtime/contact_property.hpp @@ -26,16 +26,16 @@ public: PAIRS_ASSERT(type != Prop_Invalid && layout_ != Invalid && sx_ > 0 && sy_ > 0); } - property_t getId() { return id; } - std::string getName() { return name; } - void *getHostPointer() { return h_ptr; } - void *getDevicePointer() { return d_ptr; } + property_t getId() const { return id; } + std::string getName() const { return name; } + void *getHostPointer() const { return h_ptr; } + void *getDevicePointer() const { return d_ptr; } void setPointers(void *h_ptr_, void *d_ptr_) { h_ptr = h_ptr_, d_ptr = d_ptr_; } void setSizes(size_t sx_, size_t sy_) { sx = sx_, sy = sy_; } - size_t getTotalSize() { return sx * sy * getPrimitiveTypeSize(); }; - PropertyType getType() { return type; } - layout_t getLayout() { return layout; } - size_t getPrimitiveTypeSize() { + size_t getTotalSize() const { return sx * sy * getPrimitiveTypeSize(); }; + PropertyType getType() const { return type; } + layout_t getLayout() const { return layout; } + size_t getPrimitiveTypeSize() const { return (type == Prop_Integer) ? sizeof(int) : (type == Prop_UInt64) ? sizeof(unsigned long long int) : (type == Prop_Real) ? sizeof(real_t) : diff --git a/runtime/devices/cuda.cu b/runtime/devices/cuda.cu index 38848b216477104a398aa59bc4d3e10509526e66..7ee69c1ad97e64ffac270ee0bce0802bd4187e69 100644 --- a/runtime/devices/cuda.cu +++ b/runtime/devices/cuda.cu @@ -96,6 +96,9 @@ __device__ int atomic_add(int *addr, int val) { return atomicAdd(addr, val); } __device__ real_t atomic_add(real_t *addr, real_t val) { return atomicAdd_double(addr, val); } __device__ int atomic_add_resize_check(int *addr, int val, int *resize, int capacity) { const int add_res = *addr + val; + + printf("atomic_add_resize_check::: add_res %d --- val %d --- capacity %d --- resize %d\n", add_res, val, capacity, *resize); + if(add_res >= capacity) { *resize = add_res; return *addr; diff --git a/runtime/feature_property.hpp b/runtime/feature_property.hpp index 025a514766a01643c21915e362a24481664af5fd..e3221b4cefcfdc20f7d70d612c2d7be3f90c89ae 100644 --- a/runtime/feature_property.hpp +++ b/runtime/feature_property.hpp @@ -22,13 +22,13 @@ public: nkinds(nkinds_), array_size(array_size_) {} - property_t getId() { return id; } - std::string getName() { return name; } - void *getHostPointer() { return h_ptr; } - void *getDevicePointer() { return d_ptr; } - PropertyType getType() { return type; } - size_t getNumberOfKinds() { return nkinds; } - size_t getArraySize() { return array_size; } + property_t getId() const { return id; } + std::string getName() const { return name; } + void *getHostPointer() const { return h_ptr; } + void *getDevicePointer() const { return d_ptr; } + PropertyType getType() const { return type; } + size_t getNumberOfKinds() const { return nkinds; } + size_t getArraySize() const { return array_size; } }; } diff --git a/runtime/property.hpp b/runtime/property.hpp index fc082802a40e3ef6b054e8a0b13c82bebe150d02..301a46b23a8aca0322e420db6b64cd16ee7d39d4 100644 --- a/runtime/property.hpp +++ b/runtime/property.hpp @@ -31,17 +31,17 @@ public: PAIRS_ASSERT(type != Prop_Invalid && layout_ != Invalid && sx_ > 0 && sy_ > 0); } - property_t getId() { return id; } - std::string getName() { return name; } - void *getHostPointer() { return h_ptr; } - void *getDevicePointer() { return d_ptr; } + property_t getId() const { return id; } + std::string getName() const { return name; } + void *getHostPointer() const { return h_ptr; } + void *getDevicePointer() const { return d_ptr; } void setPointers(void *h_ptr_, void *d_ptr_) { h_ptr = h_ptr_, d_ptr = d_ptr_; } void setSizes(size_t sx_, size_t sy_) { sx = sx_, sy = sy_; } - size_t getTotalSize() { return sx * sy * getPrimitiveTypeSize(); }; - PropertyType getType() { return type; } - layout_t getLayout() { return layout; } - int isVolatile() { return vol != 0; } - size_t getPrimitiveTypeSize() { + size_t getTotalSize() const { return sx * sy * getPrimitiveTypeSize(); }; + PropertyType getType() const { return type; } + layout_t getLayout() const { return layout; } + int isVolatile() const { return vol != 0; } + size_t getPrimitiveTypeSize() const { return (type == Prop_Integer) ? sizeof(int) : (type == Prop_UInt64) ? sizeof(unsigned long long int) : (type == Prop_Real) ? sizeof(real_t) : diff --git a/src/pairs/code_gen/cgen.py b/src/pairs/code_gen/cgen.py index bfdd48c9134531c61dd2f32989da4ab4b11b0e80..8af9bbadca2c22537df913a3fcee31c771f7d8ba 100644 --- a/src/pairs/code_gen/cgen.py +++ b/src/pairs/code_gen/cgen.py @@ -254,21 +254,21 @@ class CGen: def generate_pairs_object_structure(self): self.print("") - + externkw = "" if self.sim._generate_whole_program else "extern " if self.target.is_gpu(): for array in self.sim.arrays.statics(): if array.device_flag: t = array.type() tkw = Types.c_keyword(self.sim, t) size = self.generate_expression(ScalarOp.inline(array.alloc_size())) - self.print(f"extern __constant__ {tkw} d_{array.name()}[{size}];") + self.print(f"{externkw}__constant__ {tkw} d_{array.name()}[{size}];") for feature_prop in self.sim.feature_properties: if feature_prop.device_flag: t = feature_prop.type() tkw = Types.c_keyword(self.sim, t) size = feature_prop.array_size() - self.print(f"extern __constant__ {tkw} d_{feature_prop.name()}[{size}];") + self.print(f"{externkw}__constant__ {tkw} d_{feature_prop.name()}[{size}];") self.print("") self.print("struct PairsObjects {") @@ -501,7 +501,7 @@ class CGen: device_cond = module.run_on_device and self.target.is_gpu() if self.debug: - self.print(f"PAIRS_DEBUG(\"{module.name}\\n\");") + self.print(f"PAIRS_DEBUG(\"\\n{module.name}\\n\");") for var in module.read_only_variables(): type_kw = Types.c_keyword(self.sim, var.type()) @@ -519,7 +519,7 @@ class CGen: type_kw = Types.c_keyword(self.sim, array.type()) name = array.name() if not device_cond else f"d_{array.name()}" # self.generate_full_object_names = True - if not array.is_static() or (array.is_static() and not module.run_on_device): + if not array.is_static() or (array.is_static() and not device_cond): self.print(f"{type_kw} *{array.name()} = pobj->{name};") # self.print(f"{type_kw} *{array.name()} = {self.generate_object_reference(array, device=device_cond)};") # self.generate_full_object_names = False @@ -563,6 +563,7 @@ class CGen: def generate_kernel(self, kernel): kernel_params = "int range_start" + has_resizes = False for var in kernel.read_only_variables(): type_kw = Types.c_keyword(self.sim, var.type()) decl = f"{type_kw} {var.name()}" @@ -579,6 +580,8 @@ class CGen: type_kw = Types.c_keyword(self.sim, array.type()) decl = f"{type_kw} *{array.name()}" kernel_params += f", {decl}" + if array.name() == "resizes": + has_resizes = True for prop in kernel.properties(): type_kw = Types.c_keyword(self.sim, prop.type()) @@ -611,7 +614,15 @@ class CGen: self.print(f" const int {kernel.iterator.name()} = blockIdx.x * blockDim.x + threadIdx.x + range_start;") self.print.add_indent(4) self.kernel_context = True + + if has_resizes: + self.print(f"printf(\"{kernel.name} @@@@@@@@ before kernel: resizes[0] = %d\\n\", resizes[0]);") + self.generate_statement(kernel.block) + + if has_resizes: + self.print(f"printf(\"{kernel.name} @@@@@@@@ after kernel: resizes[0] = %d\\n\", resizes[0]);") + self.kernel_context = False self.print.add_indent(-4) self.print("}") @@ -652,7 +663,10 @@ class CGen: if ast_node.check_for_resize(): resize = self.generate_expression(ast_node.resize) capacity = self.generate_expression(ast_node.capacity) + self.print(f"printf (\" %d -- before AtomicInc: nsend = %d -- send_capacity = %d -- resizes[0] = %d\\n\", {Printer.line_id}, {elem}, {capacity}, {resize});") self.print(f"pairs::{prefix}atomic_add_resize_check(&({elem}), {value}, &({resize}), {capacity});") + self.print(f"printf (\" %d -- after AtomicInc: nsend = %d -- send_capacity = %d -- resizes[0] = %d\\n\", {Printer.line_id}, {elem}, {capacity}, {resize});") + else: self.print(f"pairs::{prefix}atomic_add(&({elem}), {value});") @@ -852,7 +866,9 @@ class CGen: self.print(f"pairs_runtime->copyArrayTo{ctx_suffix}({array_id}, {action}, {size}); // {array_name}") else: + self.print(f"std::cout<< \"{Printer.line_id} -- before {array_name} copyArrayTo{ctx_suffix}({action}) === \" << pobj->{array_name}[0] << \" \" << pobj->{array_name}[1] << \" \" << pobj->{array_name}[2] << std::endl;") self.print(f"pairs_runtime->copyArrayTo{ctx_suffix}({array_id}, {action}); // {array_name}") + self.print(f"std::cout<< \"{Printer.line_id} -- after {array_name} copyArrayTo{ctx_suffix}({action}) === \" << pobj->{array_name}[0] << \" \" << pobj->{array_name}[1] << \" \" << pobj->{array_name}[2] << std::endl;") if isinstance(ast_node, CopyContactProperty): prop_id = ast_node.contact_prop().id() diff --git a/src/pairs/code_gen/printer.py b/src/pairs/code_gen/printer.py index 4d73b7679f7f8c52ff1878339dbc0479cb9db215..3eabef6f95c22b6d2f16d168a97437cb1fe00d9f 100644 --- a/src/pairs/code_gen/printer.py +++ b/src/pairs/code_gen/printer.py @@ -1,4 +1,6 @@ class Printer: + + line_id = 0 def __init__(self, output): self.output = output self.stream = None @@ -16,4 +18,5 @@ class Printer: def __call__(self, text): assert self.stream is not None, "Invalid stream!" + Printer.line_id += 1 self.stream.write(self.indent * ' ' + text + '\n') diff --git a/src/pairs/ir/kernel.py b/src/pairs/ir/kernel.py index 04def29cf9153a8ec7f5048f79579bd062186dd0..d40be9e355afc5c1b31ea24abf6605825f8695de 100644 --- a/src/pairs/ir/kernel.py +++ b/src/pairs/ir/kernel.py @@ -190,7 +190,7 @@ class KernelLaunch(ASTNode): self._iterator = iterator self._range_min = range_min self._range_max = range_max - self._threads_per_block = Lit.cvt(sim, 32) + self._threads_per_block = Lit.cvt(sim, 1) self._nelems = (range_max - range_min) self._nblocks = (self._nelems + self._threads_per_block - 1) / self._threads_per_block diff --git a/src/pairs/sim/comm.py b/src/pairs/sim/comm.py index dace3c9aeed7be47f1c120dcd05034628c6b1f64..988e71d0bcb2f325b5582c819fd010e221ab50ca 100644 --- a/src/pairs/sim/comm.py +++ b/src/pairs/sim/comm.py @@ -9,7 +9,7 @@ from pairs.ir.contexts import Contexts from pairs.ir.device import CopyArray from pairs.ir.functions import Call_Void from pairs.ir.loops import For, ParticleFor, While -from pairs.ir.print import Print +from pairs.ir.print import Print, PrintCode from pairs.ir.select import Select from pairs.ir.sizeof import Sizeof from pairs.ir.types import Types @@ -240,6 +240,7 @@ class DetermineGhostParticles(Lowerable): self.sim.check_resize(self.comm.send_capacity, nsend) #self.sim.check_resize(self.comm.send_capacity, nsend_all) + PrintCode(self.sim, f"std::cout << \"resizes[0] {self.sim._module_name} ========== \" << pobj->resizes[0] << std::endl;") if is_exchange: for i in ParticleFor(self.sim): Assign(self.sim, exchg_flag[i], 0)