From 7b93c1b414a8ce8bc60d705f5569ceff2f02a9a0 Mon Sep 17 00:00:00 2001 From: Behzad Safaei <iwia103h@alex2.nhr.fau.de> Date: Tue, 11 Mar 2025 18:15:19 +0100 Subject: [PATCH] Fix LIKWID, reorganize timers, add rank-aware logger --- CMakeLists.txt | 6 +- examples/modular/spring_dashpot.py | 2 +- runtime/pairs.cpp | 66 +++++------ runtime/pairs.hpp | 5 + runtime/pairs_common.hpp | 7 +- runtime/timers.hpp | 111 ++++++++++++------ runtime/timing.cpp | 4 + runtime/timing.hpp | 1 + src/pairs/code_gen/cgen.py | 10 +- src/pairs/code_gen/interface.py | 63 ++-------- src/pairs/ir/block.py | 18 ++- src/pairs/ir/module.py | 25 ++-- src/pairs/ir/timers.py | 14 +-- src/pairs/mapping/funcs.py | 16 +-- src/pairs/sim/global_interaction.py | 13 --- src/pairs/sim/instrumentation.py | 17 ++- src/pairs/sim/interaction.py | 9 +- src/pairs/sim/simulation.py | 117 +++---------------- src/pairs/sim/timestep.py | 72 ------------ src/pairs/transformations/__init__.py | 6 +- src/pairs/transformations/devices.py | 13 --- src/pairs/transformations/instrumentation.py | 5 +- 22 files changed, 215 insertions(+), 385 deletions(-) delete mode 100644 src/pairs/sim/timestep.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 5de4c9f..4b968da 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -250,10 +250,12 @@ endif() # LIKWID ======================================================================== #================================================================================ if(LIKWID_DIR) + # Example: -DLIKWID_DIR=/apps/likwid/5.4.0 + target_compile_options(${PAIRS_TARGET} PRIVATE -DLIKWID_PERFMON -pthread) - target_link_libraries(${PAIRS_TARGET} PRIVATE ${LIKWID_DIR}/lib/liblikwid.a) - list(APPEND PAIRS_LINK_LIBRARIES ${LIKWID_DIR}/lib/liblikwid.a) + target_link_libraries(${PAIRS_TARGET} PRIVATE ${LIKWID_DIR}/lib/liblikwid.so) + list(APPEND PAIRS_LINK_LIBRARIES ${LIKWID_DIR}/lib/liblikwid.so) include_directories(${LIKWID_DIR}/include) list(APPEND PAIRS_INCLUDE_DIRS "${LIKWID_DIR}/include") diff --git a/examples/modular/spring_dashpot.py b/examples/modular/spring_dashpot.py index d88c8a7..9b68d54 100644 --- a/examples/modular/spring_dashpot.py +++ b/examples/modular/spring_dashpot.py @@ -98,7 +98,7 @@ psim.build_cell_lists() # The order of user-defined functions is not important here since # they are not used by other subroutines and are only callable individually psim.compute(update_mass_and_inertia, symbols={'infinity': math.inf }) -psim.compute(spring_dashpot) +psim.compute(spring_dashpot, profile=True) psim.compute(euler, parameters={'dt': pairs.real()}) gravity_SI = 9.81 diff --git a/runtime/pairs.cpp b/runtime/pairs.cpp index 351aa36..a238282 100644 --- a/runtime/pairs.cpp +++ b/runtime/pairs.cpp @@ -190,9 +190,12 @@ void PairsRuntime::copyArraySliceToDevice( // PAIRS_DEBUG( // "Copying array %s to device (offset=%lu, n=%lu)\n", // array.getName().c_str(), offset, size); + this->getTimers()->start(TimerMarkers::DeviceTransfers); pairs::copy_slice_to_device( array.getHostPointer(), array.getDevicePointer(), offset, size); + + this->getTimers()->stop(TimerMarkers::DeviceTransfers); } } } @@ -209,6 +212,7 @@ void PairsRuntime::copyArrayToDevice(Array &array, action_t action, size_t size) if(action == Ignore || action == WriteAfterRead || action == ReadOnly) { if(action == Ignore || !array_flags->isDeviceFlagSet(array_id)) { + this->getTimers()->start(TimerMarkers::DeviceTransfers); if(array.isStatic()) { // PAIRS_DEBUG( // "Copying static array %s to device (n=%lu)\n", @@ -223,6 +227,7 @@ void PairsRuntime::copyArrayToDevice(Array &array, action_t action, size_t size) pairs::copy_to_device(array.getHostPointer(), array.getDevicePointer(), size); } + this->getTimers()->stop(TimerMarkers::DeviceTransfers); } } @@ -242,9 +247,12 @@ void PairsRuntime::copyArraySliceToHost(Array &array, action_t action, size_t of // PAIRS_DEBUG( // "Copying array %s to host (offset=%lu, n=%lu)\n", // array.getName().c_str(), offset, size); + this->getTimers()->start(TimerMarkers::DeviceTransfers); pairs::copy_slice_to_host( array.getDevicePointer(), array.getHostPointer(), offset, size); + + this->getTimers()->stop(TimerMarkers::DeviceTransfers); } } } @@ -261,6 +269,8 @@ void PairsRuntime::copyArrayToHost(Array &array, action_t action, size_t size) { if(action == Ignore || action == WriteAfterRead || action == ReadOnly) { if(action == Ignore || !array_flags->isHostFlagSet(array_id)) { + this->getTimers()->start(TimerMarkers::DeviceTransfers); + if(array.isStatic()) { // PAIRS_DEBUG( // "Copying static array %s to host (n=%lu)\n", array.getName().c_str(), size); @@ -271,6 +281,7 @@ void PairsRuntime::copyArrayToHost(Array &array, action_t action, size_t size) { // PAIRS_DEBUG("Copying array %s to host (n=%lu)\n", array.getName().c_str(), size); pairs::copy_to_host(array.getDevicePointer(), array.getHostPointer(), size); } + this->getTimers()->stop(TimerMarkers::DeviceTransfers); } } @@ -287,7 +298,9 @@ void PairsRuntime::copyPropertyToDevice(Property &prop, action_t action, size_t if(action == Ignore || action == WriteAfterRead || action == ReadOnly) { if(action == Ignore || !prop_flags->isDeviceFlagSet(prop_id)) { // PAIRS_DEBUG("Copying property %s to device (n=%lu)\n", prop.getName().c_str(), size); + this->getTimers()->start(TimerMarkers::DeviceTransfers); pairs::copy_to_device(prop.getHostPointer(), prop.getDevicePointer(), size); + this->getTimers()->stop(TimerMarkers::DeviceTransfers); } } @@ -304,7 +317,9 @@ void PairsRuntime::copyPropertyToHost(Property &prop, action_t action, size_t si if(action == Ignore || action == WriteAfterRead || action == ReadOnly) { if(action == Ignore || !prop_flags->isHostFlagSet(prop_id)) { // PAIRS_DEBUG("Copying property %s to host (n=%lu)\n", prop.getName().c_str(), size); + this->getTimers()->start(TimerMarkers::DeviceTransfers); pairs::copy_to_host(prop.getDevicePointer(), prop.getHostPointer(), size); + this->getTimers()->stop(TimerMarkers::DeviceTransfers); } } @@ -324,9 +339,12 @@ void PairsRuntime::copyContactPropertyToDevice( if(action == Ignore || !contact_prop_flags->isDeviceFlagSet(prop_id)) { // PAIRS_DEBUG("Copying contact property %s to device (n=%lu)\n", // contact_prop.getName().c_str(), size); + this->getTimers()->start(TimerMarkers::DeviceTransfers); pairs::copy_to_device( contact_prop.getHostPointer(), contact_prop.getDevicePointer(), size); + + this->getTimers()->stop(TimerMarkers::DeviceTransfers); contact_prop_flags->setDeviceFlag(prop_id); } @@ -346,9 +364,10 @@ void PairsRuntime::copyContactPropertyToHost( if(!contact_prop_flags->isHostFlagSet(contact_prop.getId())) { // PAIRS_DEBUG("Copying contact property %s to host (n=%lu)\n", // contact_prop.getName().c_str(), size); - + this->getTimers()->start(TimerMarkers::DeviceTransfers); pairs::copy_to_host( contact_prop.getDevicePointer(), contact_prop.getHostPointer(), size); + this->getTimers()->stop(TimerMarkers::DeviceTransfers); contact_prop_flags->setHostFlag(prop_id); } @@ -364,24 +383,25 @@ void PairsRuntime::copyFeaturePropertyToDevice(FeatureProperty &feature_prop) { // PAIRS_DEBUG("Copying feature property %s to device (n=%lu)\n", // feature_prop.getName().c_str(), n); + this->getTimers()->start(TimerMarkers::DeviceTransfers); pairs::copy_static_symbol_to_device( feature_prop.getHostPointer(), feature_prop.getDevicePointer(), n); + + this->getTimers()->stop(TimerMarkers::DeviceTransfers); } void PairsRuntime::communicateSizes(int dim, const int *send_sizes, int *recv_sizes) { auto nsend_id = getArrayByHostPointer(send_sizes).getId(); auto nrecv_id = getArrayByHostPointer(recv_sizes).getId(); - this->getTimers()->start(DeviceTransfers); copyArrayToHost(nsend_id, ReadOnly); array_flags->setHostFlag(nrecv_id); array_flags->clearDeviceFlag(nrecv_id); - this->getTimers()->stop(DeviceTransfers); - this->getTimers()->start(Communication); + this->getTimers()->start(TimerMarkers::MPI); this->getDomainPartitioner()->communicateSizes(dim, send_sizes, recv_sizes); - this->getTimers()->stop(Communication); + this->getTimers()->stop(TimerMarkers::MPI); } void PairsRuntime::communicateData( @@ -400,7 +420,6 @@ void PairsRuntime::communicateData( auto nsend_id = getArrayByHostPointer(nsend).getId(); auto nrecv_id = getArrayByHostPointer(nrecv).getId(); - this->getTimers()->start(DeviceTransfers); copyArrayToHost(send_offsets_id, ReadOnly); copyArrayToHost(recv_offsets_id, ReadOnly); copyArrayToHost(nsend_id, ReadOnly); @@ -433,17 +452,13 @@ void PairsRuntime::communicateData( array_flags->clearDeviceFlag(recv_buf_id); #endif - this->getTimers()->stop(DeviceTransfers); - - this->getTimers()->start(Communication); + this->getTimers()->start(TimerMarkers::MPI); this->getDomainPartitioner()->communicateData( dim, elem_size, send_buf_ptr, send_offsets, nsend, recv_buf_ptr, recv_offsets, nrecv); - this->getTimers()->stop(Communication); + this->getTimers()->stop(TimerMarkers::MPI); #ifndef ENABLE_CUDA_AWARE_MPI - this->getTimers()->start(DeviceTransfers); copyArrayToDevice(recv_buf_id, Ignore, nrecv_all * elem_size * sizeof(real_t)); - this->getTimers()->stop(DeviceTransfers); #endif } @@ -463,7 +478,6 @@ void PairsRuntime::communicateDataReverse( auto nsend_id = getArrayByHostPointer(nsend).getId(); auto nrecv_id = getArrayByHostPointer(nrecv).getId(); - this->getTimers()->start(DeviceTransfers); copyArrayToHost(send_offsets_id, ReadOnly); copyArrayToHost(recv_offsets_id, ReadOnly); copyArrayToHost(nsend_id, ReadOnly); @@ -496,17 +510,13 @@ void PairsRuntime::communicateDataReverse( array_flags->clearDeviceFlag(recv_buf_id); #endif - this->getTimers()->stop(DeviceTransfers); - - this->getTimers()->start(Communication); + this->getTimers()->start(TimerMarkers::MPI); this->getDomainPartitioner()->communicateDataReverse( dim, elem_size, send_buf_ptr, send_offsets, nsend, recv_buf_ptr, recv_offsets, nrecv); - this->getTimers()->stop(Communication); + this->getTimers()->stop(TimerMarkers::MPI); #ifndef ENABLE_CUDA_AWARE_MPI - this->getTimers()->start(DeviceTransfers); copyArrayToDevice(recv_buf_id, Ignore, nrecv_all * elem_size * sizeof(real_t)); - this->getTimers()->stop(DeviceTransfers); #endif } @@ -526,7 +536,6 @@ void PairsRuntime::communicateAllData( auto nsend_id = getArrayByHostPointer(nsend).getId(); auto nrecv_id = getArrayByHostPointer(nrecv).getId(); - this->getTimers()->start(DeviceTransfers); copyArrayToHost(send_offsets_id, ReadOnly); copyArrayToHost(recv_offsets_id, ReadOnly); copyArrayToHost(nsend_id, ReadOnly); @@ -559,17 +568,13 @@ void PairsRuntime::communicateAllData( array_flags->clearDeviceFlag(recv_buf_id); #endif - this->getTimers()->stop(DeviceTransfers); - - this->getTimers()->start(Communication); + this->getTimers()->start(TimerMarkers::MPI); this->getDomainPartitioner()->communicateAllData( ndims, elem_size, send_buf_ptr, send_offsets, nsend, recv_buf_ptr, recv_offsets, nrecv); - this->getTimers()->stop(Communication); + this->getTimers()->stop(TimerMarkers::MPI); #ifndef ENABLE_CUDA_AWARE_MPI - this->getTimers()->start(DeviceTransfers); copyArrayToDevice(recv_buf_id, Ignore, nrecv_all * elem_size * sizeof(real_t)); - this->getTimers()->stop(DeviceTransfers); #endif } @@ -589,7 +594,6 @@ void PairsRuntime::communicateContactHistoryData( auto nsend_contact_id = getArrayByHostPointer(nsend_contact).getId(); auto nrecv_contact_id = getArrayByHostPointer(nrecv_contact).getId(); - this->getTimers()->start(DeviceTransfers); copyArrayToHost(contact_soffsets_id, ReadOnly); copyArrayToHost(nsend_contact_id, ReadOnly); @@ -610,9 +614,7 @@ void PairsRuntime::communicateContactHistoryData( array_flags->clearDeviceFlag(recv_buf_id); #endif - this->getTimers()->stop(DeviceTransfers); - - this->getTimers()->start(Communication); + this->getTimers()->start(TimerMarkers::MPI); this->getDomainPartitioner()->communicateSizes(dim, nsend_contact, nrecv_contact); contact_roffsets[dim * 2 + 0] = 0; @@ -629,13 +631,11 @@ void PairsRuntime::communicateContactHistoryData( send_buf_ptr, contact_soffsets, nsend_contact, recv_buf_ptr, contact_roffsets, nrecv_contact); - this->getTimers()->stop(Communication); + this->getTimers()->stop(TimerMarkers::MPI); #ifndef ENABLE_CUDA_AWARE_MPI - this->getTimers()->start(DeviceTransfers); copyArrayToDevice(recv_buf_id, Ignore, nrecv_all * sizeof(real_t)); copyArrayToDevice(contact_roffsets_id, Ignore); - this->getTimers()->stop(DeviceTransfers); #endif } diff --git a/runtime/pairs.hpp b/runtime/pairs.hpp index d582d27..d77be18 100644 --- a/runtime/pairs.hpp +++ b/runtime/pairs.hpp @@ -362,11 +362,16 @@ public: // Timers Timers<double> *getTimers() { return timers; } + void printTimers() { if(this->getDomainPartitioner()->getRank() == 0) { this->getTimers()->print(); } } + + void logTimers() { + this->getTimers()->writeToFile(this->getDomainPartitioner()->getRank()); + } }; template<typename Domain_T> diff --git a/runtime/pairs_common.hpp b/runtime/pairs_common.hpp index 03766e1..af65189 100644 --- a/runtime/pairs_common.hpp +++ b/runtime/pairs_common.hpp @@ -79,10 +79,9 @@ enum Actions { }; enum TimerMarkers { - All = 0, - Communication = 1, - DeviceTransfers = 2, - Offset = 3 + MPI = 0, + DeviceTransfers = 1, + Offset = 2 }; enum DomainPartitioners { diff --git a/runtime/timers.hpp b/runtime/timers.hpp index c4cdc94..8e399dd 100644 --- a/runtime/timers.hpp +++ b/runtime/timers.hpp @@ -2,6 +2,8 @@ #include <chrono> #include <iostream> #include <unordered_map> +#include <sstream> +#include <iomanip> #pragma once @@ -17,61 +19,98 @@ public: void add(size_t id, std::string name) { counter_names.resize(id + 1); - counters.resize(id + 1); + time_counters.resize(id + 1); + call_counters.resize(id + 1); clocks.resize(id + 1); counter_names[id] = name; } - void start(size_t id) { clocks[id] = std::chrono::high_resolution_clock::now(); } + void start(size_t id) { clocks[id] = std::chrono::high_resolution_clock::now(); ++call_counters[id];} void stop(size_t id) { auto current_clock = std::chrono::high_resolution_clock::now(); - counters[id] += static_cast<TimeType>( + time_counters[id] += static_cast<TimeType>( std::chrono::duration_cast<TimeUnit>(current_clock - clocks[id]).count()) * time_factor; } - void print() { - std::unordered_map<std::string, TimeType> categorySums; - - std::cout << "all: " << counters[0] << std::endl; - for (size_t i = 1; i < counters.size(); ++i) { + void writeToFile(int rank){ + MPI_File file; + MPI_File_open(MPI_COMM_WORLD, "timers.txt", MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &file); + + std::ostringstream ss; + ss << "Rank: " << rank << "\n"; + ss << std::left << std::setw(80) << "Timer" + << std::left << std::setw(15) << "Total [ms]" + << std::left << std::setw(15) << "Count" << "\n"; + ss << "--------------------------------------------------------------------------------------------------------\n"; + + // Modules + for (size_t i = TimerMarkers::Offset; i < time_counters.size(); ++i) { const std::string& counterName = counter_names[i]; - TimeType counterValue = counters[i]; - - if(counterName.find("pack_") == 0 || - counterName.find("unpack_") == 0 || - counterName.find("determine_") == 0 || - counterName.find("set_communication_") == 0 || - counterName.find("remove_exchanged_particles") == 0 || - counterName.find("change_size_after_exchange") == 0) { - - categorySums["communication"] += counterValue; - - } else if(counterName.find("build_cell_lists") == 0 || - counterName.find("build_cell_lists_stencil") == 0 || - counterName.find("partition_cell_lists") == 0 || - counterName.find("build_neighbor_lists") == 0) { - - categorySums["neighbors"] += counterValue; - - } else { - if(counterName.length() > 0) { - categorySums[counterName] += counterValue; - } else { - categorySums["other"] += counterValue; - } + if(counterName.length() > 0) { + ss << std::left << std::setw(80) << counter_names[i] + << std::left << std::setw(15) << std::fixed << std::setprecision(2) << time_counters[i] + << std::left << std::setw(15) << call_counters[i] + << "\n"; } } - // Print the accumulated sums for each category - for(const auto& category: categorySums) { - std::cout << category.first << ": " << category.second << std::endl; + // Markers + for (size_t i = 0; i < TimerMarkers::Offset; ++i) { + ss << std::left << std::setw(80) << counter_names[i] + << std::left << std::setw(15) << std::fixed << std::setprecision(2) << time_counters[i] + << std::left << std::setw(15) << 1 + << "\n"; + } + + computeCategories(); + + // Categories + for (const auto& cs : categorySums) {; + ss << std::left << std::setw(80) << cs.first + << std::left << std::setw(15) << std::fixed << std::setprecision(2) << cs.second + << std::left << std::setw(15) << 1 + << "\n"; + } + ss << "\n\n"; + + std::string output = ss.str(); + MPI_File_write_ordered(file, output.c_str(), output.size(), MPI_CHAR, MPI_STATUS_IGNORE); + MPI_File_close(&file); + } + + void print(){ + } + + void computeCategories() { + for (size_t i = 0; i < time_counters.size(); ++i) { + const std::string& counterName = counter_names[i]; + TimeType counterValue = time_counters[i]; + + if(counterName.find("INTERNAL_MODULES::pack_") == 0 || + counterName.find("INTERNAL_MODULES::unpack_") == 0 || + counterName.find("INTERNAL_MODULES::determine_") == 0 || + counterName.find("INTERNAL_MODULES::set_communication_") == 0 || + counterName.find("INTERNAL_MODULES::remove_exchanged_particles") == 0 || + counterName.find("INTERNAL_MODULES::change_size_after_exchange") == 0) { + + categorySums["INTERNAL_CATEGORIES::COMMUNICATION"] += counterValue; + + } else if(counterName.find("INTERNAL_MODULES::build_cell_lists") == 0 || + counterName.find("INTERNAL_MODULES::build_cell_lists_stencil") == 0 || + counterName.find("INTERNAL_MODULES::partition_cell_lists") == 0 || + counterName.find("INTERNAL_MODULES::build_neighbor_lists") == 0) { + + categorySums["INTERNAL_CATEGORIES::NEIGHBORS"] += counterValue; + } } } private: std::vector<std::string> counter_names; - std::vector<TimeType> counters; + std::vector<TimeType> time_counters; + std::vector<int> call_counters; + std::unordered_map<std::string, TimeType> categorySums; std::vector<std::chrono::high_resolution_clock::time_point> clocks; TimeType time_factor; }; diff --git a/runtime/timing.cpp b/runtime/timing.cpp index 0068d81..6addb69 100644 --- a/runtime/timing.cpp +++ b/runtime/timing.cpp @@ -20,4 +20,8 @@ void print_timers(PairsRuntime *ps) { ps->printTimers(); } +void log_timers(PairsRuntime *ps) { + ps->logTimers(); +} + } diff --git a/runtime/timing.hpp b/runtime/timing.hpp index f754460..80662e9 100644 --- a/runtime/timing.hpp +++ b/runtime/timing.hpp @@ -10,5 +10,6 @@ void register_timer(PairsRuntime *ps, int id, std::string name); void start_timer(PairsRuntime *ps, int id); void stop_timer(PairsRuntime *ps, int id); void print_timers(PairsRuntime *ps); +void log_timers(PairsRuntime *ps); } diff --git a/src/pairs/code_gen/cgen.py b/src/pairs/code_gen/cgen.py index c8078a8..3258452 100644 --- a/src/pairs/code_gen/cgen.py +++ b/src/pairs/code_gen/cgen.py @@ -198,9 +198,8 @@ class CGen: self.print("namespace pairs::internal {") self.print.add_indent(4) - # All modules except the interface ones are declared in the pairs::internal scope - for module in self.sim.modules() + self.sim.udf_modules(): - assert not module.interface + # All internal modules are declared in the pairs::internal scope + for module in self.sim.modules(): self.generate_module_header(module, definition=False) self.print.add_indent(-4) @@ -346,9 +345,8 @@ class CGen: for kernel in self.sim.kernels(): self.generate_kernel(kernel) - # All modules except the interface ones are defined in the pairs::internal scope - for module in self.sim.modules() + self.sim.udf_modules(): - assert not module.interface + # All internal modules are defined in the pairs::internal scope + for module in self.sim.modules(): self.generate_module(module) self.print.add_indent(-4) diff --git a/src/pairs/code_gen/interface.py b/src/pairs/code_gen/interface.py index 78d5a85..228a7ae 100644 --- a/src/pairs/code_gen/interface.py +++ b/src/pairs/code_gen/interface.py @@ -73,6 +73,9 @@ class InterfaceModules: RegisterMarkers(self.sim) ]) + if self.sim._enable_profiler: + PrintCode(self.sim, "LIKWID_MARKER_INIT;") + self.sim.add_statement(inits) @pairs_interface_block @@ -186,63 +189,15 @@ class InterfaceModules: self.sim.module_name('size') Return(self.sim, ScalarOp.inline(self.sim.nlocal + self.sim.nghost)) - @pairs_interface_block - def create_sphere(self): - self.sim.module_name('create_sphere') - x = Parameter(self.sim, 'x', Types.Real) - y = Parameter(self.sim, 'y', Types.Real) - z = Parameter(self.sim, 'z', Types.Real) - vx = Parameter(self.sim, 'vx', Types.Real) - vy = Parameter(self.sim, 'vy', Types.Real) - vz = Parameter(self.sim, 'vz', Types.Real) - density = Parameter(self.sim, 'density', Types.Real) - radius = Parameter(self.sim, 'radius', Types.Real) - ptype = Parameter(self.sim, 'type', Types.Real) - flag = Parameter(self.sim, 'flag', Types.Real) - - Return(self.sim, Call(self.sim, "pairs::create_sphere", - [x, y, z, vx, vy, vz, - density, radius, ptype, flag], Types.UInt64)) - - @pairs_interface_block - def create_halfspace(self): - self.sim.module_name('create_halfspace') - x = Parameter(self.sim, 'x', Types.Real) - y = Parameter(self.sim, 'y', Types.Real) - z = Parameter(self.sim, 'z', Types.Real) - nx = Parameter(self.sim, 'nx', Types.Real) - ny = Parameter(self.sim, 'ny', Types.Real) - nz = Parameter(self.sim, 'nz', Types.Real) - ptype = Parameter(self.sim, 'type', Types.Real) - flag = Parameter(self.sim, 'flag', Types.Real) - - Return(self.sim, Call(self.sim, "pairs::create_halfspace", - [x, y, z, nx, ny, nz, ptype, flag], Types.UInt64)) - - @pairs_interface_block - def dem_sc_grid(self): - self.sim.module_name('dem_sc_grid') - xmax = Parameter(self.sim, 'xmax', Types.Real) - ymax = Parameter(self.sim, 'ymax', Types.Real) - zmax = Parameter(self.sim, 'zmax', Types.Real) - spacing = Parameter(self.sim, 'spacing', Types.Real) - diameter = Parameter(self.sim, 'diameter', Types.Real) - min_diameter = Parameter(self.sim, 'min_diameter', Types.Real) - max_diameter = Parameter(self.sim, 'max_diameter', Types.Real) - initial_velocity = Parameter(self.sim, 'initial_velocity', Types.Real) - particle_density = Parameter(self.sim, 'particle_density', Types.Real) - ntypes = Parameter(self.sim, 'ntypes', Types.Int32) - - Assign(self.sim, self.sim.nlocal, - Call_Int(self.sim, "pairs::dem_sc_grid", - [xmax, ymax, zmax, spacing, diameter, min_diameter, max_diameter, - initial_velocity, particle_density, ntypes])) - Return(self.sim, self.sim.nlocal) - @pairs_interface_block def end(self): self.sim.module_name('end') - # Call_Void(self.sim, "pairs::print_timers", []) + + if self.sim._enable_profiler: + PrintCode(self.sim, "LIKWID_MARKER_CLOSE;") + + Call_Void(self.sim, "pairs::print_timers", []) + Call_Void(self.sim, "pairs::log_timers", []) Call_Void(self.sim, "pairs::print_stats", [self.sim.nlocal, self.sim.nghost]) PrintCode(self.sim, "delete pobj;") PrintCode(self.sim, "delete pairs_runtime;") diff --git a/src/pairs/ir/block.py b/src/pairs/ir/block.py index 2a14ea2..5883d41 100644 --- a/src/pairs/ir/block.py +++ b/src/pairs/ir/block.py @@ -12,9 +12,20 @@ def pairs_inline(func): return inner +def pairs_block(func): + """This decorator needs the owning class of the method being decorated to have a 'run_on_device' member. + If the variable doesn't exist it defaults to False here, hence pairs_host_block gets used.""" + def wrapper(*args, **kwargs): + self = args[0] + decorator = pairs_device_block if getattr(self, "run_on_device", False) else pairs_host_block + return decorator(func)(*args, **kwargs) + return wrapper + + def pairs_host_block(func): def inner(*args, **kwargs): sim = args[0].sim # self.sim + profile = getattr(args[0], "profile", False) sim.init_block() func(*args, **kwargs) return Module(sim, @@ -22,7 +33,8 @@ def pairs_host_block(func): block=Block(sim, sim._block), resizes_to_check=sim._resizes_to_check, check_properties_resize=sim._check_properties_resize, - run_on_device=False) + run_on_device=False, + profile=profile) return inner @@ -30,6 +42,7 @@ def pairs_host_block(func): def pairs_device_block(func): def inner(*args, **kwargs): sim = args[0].sim # self.sim + profile = getattr(args[0], "profile", False) sim.init_block() func(*args, **kwargs) return Module(sim, @@ -37,7 +50,8 @@ def pairs_device_block(func): block=Block(sim, sim._block), resizes_to_check=sim._resizes_to_check, check_properties_resize=sim._check_properties_resize, - run_on_device=True) + run_on_device=True, + profile=profile) return inner diff --git a/src/pairs/ir/module.py b/src/pairs/ir/module.py index 3f2df80..c7937ed 100644 --- a/src/pairs/ir/module.py +++ b/src/pairs/ir/module.py @@ -16,8 +16,8 @@ class Module(ASTNode): block=None, resizes_to_check={}, check_properties_resize=False, - run_on_device=False, - user_defined=False, + run_on_device=False, + profile=False, interface=False): super().__init__(sim) self._id = Module.last_module @@ -34,20 +34,17 @@ class Module(ASTNode): self._resizes_to_check = resizes_to_check self._check_properties_resize = check_properties_resize self._run_on_device = run_on_device - self._user_defined = user_defined self._interface = interface self._return_type = Types.Void - self._profile = False + self._profile = profile + + if profile: + self.sim.enable_profiler() - if user_defined: - assert not interface, ("User-defined modules can't be part of the interface directly." - "Wrap them inside seperate interface modules.") - sim.add_udf_module(self) + if interface: + sim.add_interface_module(self) else: - if interface: - sim.add_interface_module(self) - else: - sim.add_module(self) + sim.add_module(self) Module.last_module += 1 @@ -70,10 +67,6 @@ class Module(ASTNode): def run_on_device(self): return self._run_on_device - @property - def user_defined(self): - return self._user_defined - @property def interface(self): return self._interface diff --git a/src/pairs/ir/timers.py b/src/pairs/ir/timers.py index b1cd21c..928a668 100644 --- a/src/pairs/ir/timers.py +++ b/src/pairs/ir/timers.py @@ -1,12 +1,10 @@ class Timers: Invalid = -1 - All = 0 - Communication = 1 - DeviceTransfers = 2 - Offset = 3 + Communication = 0 + DeviceTransfers = 1 + Offset = 2 def name(timer): - return "all" if timer == Timers.All else \ - "mpi" if timer == Timers.Communication else \ - "transfers" if timer == Timers.DeviceTransfers else \ - "invalid" + return "MARKERS::MPI" if timer == Timers.Communication else \ + "MARKERS::DEVICE_TRANSFERS" if timer == Timers.DeviceTransfers else \ + "INVALID" diff --git a/src/pairs/mapping/funcs.py b/src/pairs/mapping/funcs.py index 2523ba4..c845787 100644 --- a/src/pairs/mapping/funcs.py +++ b/src/pairs/mapping/funcs.py @@ -15,7 +15,7 @@ from pairs.sim.flags import Flags from pairs.sim.interaction import ParticleInteraction from pairs.sim.global_interaction import GlobalLocalInteraction, GlobalGlobalInteraction, GlobalReduction, SortGlobals, PackGlobals, ResetReductionProps, ReduceGlobals, UnpackGlobals from pairs.ir.module import Module -from pairs.ir.block import Block, pairs_device_block +from pairs.ir.block import Block, pairs_block from pairs.sim.lowerable import Lowerable class UndefinedSymbol(): @@ -288,10 +288,12 @@ class BuildParticleIR(ast.NodeVisitor): return op_class(self.sim, operand, None, BuildParticleIR.get_unary_op(node.op)) class OneBodyKernel(Lowerable): - def __init__(self, sim, module_name): + def __init__(self, sim, module_name, run_on_device, profile): super().__init__(sim) self.block = Block(sim, []) self.module_name = module_name + self.run_on_device = run_on_device + self.profile = profile def add_statement(self, stmt): self.block.add_statement(stmt) @@ -303,12 +305,12 @@ class OneBodyKernel(Lowerable): yield i self.sim.leave() - @pairs_device_block + @pairs_block def lower(self): self.sim.module_name(self.module_name) self.sim.add_statement(self.block) -def compute(sim, func, cutoff_radius=None, symbols={}, parameters={}, compute_globals=False): +def compute(sim, func, cutoff_radius=None, symbols={}, parameters={}, compute_globals=False, run_on_device=True, profile=False): if sim._generate_whole_program: assert not parameters, "Compute functions can't take custom parameters when generating whole program." @@ -333,14 +335,14 @@ def compute(sim, func, cutoff_radius=None, symbols={}, parameters={}, compute_gl sim.module_name(func.__name__) if nparams == 1: - for i in OneBodyKernel(sim, func.__name__): + for i in OneBodyKernel(sim, func.__name__, run_on_device=run_on_device, profile=profile): ir = BuildParticleIR(sim, symbols, parameters) ir.add_symbols({params[0]: i}) ir.visit(tree) else: # Compute local-local and local-global interactions - particle_interaction = ParticleInteraction(sim, func.__name__, nparams, cutoff_radius) + particle_interaction = ParticleInteraction(sim, func.__name__, nparams, cutoff_radius, run_on_device=run_on_device, profile=profile) for interaction_data in particle_interaction: # Start building IR ir = BuildParticleIR(sim, symbols, parameters) @@ -409,5 +411,5 @@ def compute(sim, func, cutoff_radius=None, symbols={}, parameters={}, compute_gl # User defined functions are wrapped inside seperate interface modules here. # The udf's have the same name as their interface module but they get implemented in the pairs::internal scope. - sim.build_interface_module() + sim.build_interface_module_with_statements(run_on_device, profile) diff --git a/src/pairs/sim/global_interaction.py b/src/pairs/sim/global_interaction.py index 00f213e..dca2a77 100644 --- a/src/pairs/sim/global_interaction.py +++ b/src/pairs/sim/global_interaction.py @@ -104,19 +104,6 @@ class GlobalReduction: for app in self.particle_interaction.apply_list[ishape*self.sim.max_shapes() + jshape]: self.red_props.add(app.prop()) - # self.sim.add_statement(self) - - # @pairs_inline - # def lower(self): - # SortGlobals(self) - # PackGlobals(self, self.intermediate_buffer) - # ResetReductionProps(self) - # GlobalLocalInteraction(self) - # PackGlobals(self, self.red_buffer) - # ReduceGlobals(self) - # UnpackGlobals(self) - # GlobalGlobalInteraction(self) - def global_particles(self): for p in For(self.sim, 0, self.sim.cell_lists.cell_sizes[0]): i = self.sim.cell_lists.cell_particles[0][p] diff --git a/src/pairs/sim/instrumentation.py b/src/pairs/sim/instrumentation.py index 7281f13..6e90198 100644 --- a/src/pairs/sim/instrumentation.py +++ b/src/pairs/sim/instrumentation.py @@ -1,6 +1,7 @@ from pairs.ir.block import pairs_inline from pairs.ir.functions import Call_Void from pairs.ir.timers import Timers +from pairs.ir.types import Types from pairs.sim.lowerable import FinalLowerable class RegisterTimers(FinalLowerable): @@ -12,9 +13,14 @@ class RegisterTimers(FinalLowerable): for t in range(Timers.Offset): Call_Void(self.sim, "pairs::register_timer", [t, Timers.name(t)]) - for m in self.sim.module_list: - if m.name != 'main' and m.name != 'initialize': - Call_Void(self.sim, "pairs::register_timer", [m.module_id + Timers.Offset, m.name]) + # Interface modules + for m in self.sim.interface_modules(): + if m.name != 'initialize' and m.name != 'end' and m.return_type==Types.Void: + Call_Void(self.sim, "pairs::register_timer", [m.module_id + Timers.Offset, "INTERFACE_MODULES::" + m.name]) + + # Internal modules + for m in self.sim.modules(): + Call_Void(self.sim, "pairs::register_timer", [m.module_id + Timers.Offset, "INTERNAL_MODULES::" + m.name]) class RegisterMarkers(FinalLowerable): @@ -24,6 +30,7 @@ class RegisterMarkers(FinalLowerable): @pairs_inline def lower(self): if self.sim._enable_profiler: - for m in self.sim.module_list: - if m.name != 'main' and m.name != 'initialize' and m.must_profile(): + # Only internal modules are profiled + for m in self.sim.modules(): + if m.must_profile(): Call_Void(self.sim, "LIKWID_MARKER_REGISTER", [m.name]) diff --git a/src/pairs/sim/interaction.py b/src/pairs/sim/interaction.py index f19289e..901c2d4 100644 --- a/src/pairs/sim/interaction.py +++ b/src/pairs/sim/interaction.py @@ -1,7 +1,7 @@ from pairs.ir.assign import Assign from pairs.ir.ast_term import ASTTerm from pairs.ir.scalars import ScalarOp -from pairs.ir.block import Block, pairs_device_block +from pairs.ir.block import Block, pairs_block from pairs.ir.branches import Filter, Branch from pairs.ir.loops import For, ParticleFor from pairs.ir.math import Sqrt, Abs, Min, Sign @@ -307,8 +307,10 @@ class InteractionData: self.contact_normal().assign(contact_normal if s_relative else -contact_normal) class ParticleInteraction(Lowerable): - def __init__(self, sim, module_name, nbody, cutoff_radius=None, use_cell_lists=False): + def __init__(self, sim, module_name, nbody, cutoff_radius=None, use_cell_lists=False, run_on_device=True, profile=False): super().__init__(sim) + self.run_on_device = run_on_device + self.profile = profile self.module_name = module_name self.nbody = nbody self.contact_threshold = 0.0 @@ -316,6 +318,7 @@ class ParticleInteraction(Lowerable): self.maxs = self.sim.max_shapes() self.interactions_data = {} self.cutoff_radius = cutoff_radius + if any(self.sim.get_shape_id(s)==Shapes.PointMass for s in range(self.maxs)): assert cutoff_radius is not None @@ -415,7 +418,7 @@ class ParticleInteraction(Lowerable): self.sim.add_statement(Filter(self.sim, interaction_data.cutoff_condition, self.blocks[ishape*self.maxs + jshape])) self.apply_reductions(i, ishape, jshape) - @pairs_device_block + @pairs_block def lower(self): self.sim.module_name(f"{self.module_name}_local_interactions") if self.nbody == 2: diff --git a/src/pairs/sim/simulation.py b/src/pairs/sim/simulation.py index 9ac9977..76e3cdb 100644 --- a/src/pairs/sim/simulation.py +++ b/src/pairs/sim/simulation.py @@ -1,37 +1,23 @@ from pairs.ir.arrays import Arrays from pairs.ir.block import Block -from pairs.ir.branches import Filter from pairs.ir.features import Features, FeatureProperties from pairs.ir.kernel import Kernel from pairs.ir.layouts import Layouts -from pairs.ir.module import Module, ModuleCall +from pairs.ir.module import Module from pairs.ir.properties import Properties, ContactProperties from pairs.ir.symbols import Symbol from pairs.ir.types import Types from pairs.ir.variables import Variables #from pairs.graph.graphviz import ASTGraph from pairs.mapping.funcs import compute -from pairs.sim.arrays import DeclareArrays -from pairs.sim.cell_lists import CellLists, BuildCellLists, BuildCellListsStencil, PartitionCellLists, BuildCellNeighborLists -from pairs.sim.comm import Comm, Synchronize, Borders, Exchange, ReverseComm -from pairs.sim.contact_history import ContactHistory, BuildContactHistory, ClearUnusedContactHistory, ResetContactHistoryUsageStatus -from pairs.sim.copper_fcc_lattice import CopperFCCLattice -from pairs.sim.dem_sc_grid import DEMSCGrid -from pairs.sim.domain import UpdateDomain +from pairs.sim.cell_lists import CellLists, BuildCellLists, PartitionCellLists, BuildCellNeighborLists +from pairs.sim.comm import Comm +from pairs.sim.contact_history import ContactHistory from pairs.sim.domain_partitioners import DomainPartitioners from pairs.sim.domain_partitioning import BlockForest, DimensionRanges from pairs.sim.load_balancing_algorithms import LoadBalancingAlgorithms -from pairs.sim.features import AllocateFeatureProperties -from pairs.sim.grid import Grid2D, Grid3D, MutableGrid -from pairs.sim.instrumentation import RegisterMarkers, RegisterTimers -from pairs.sim.lattice import ParticleLattice +from pairs.sim.grid import Grid3D from pairs.sim.neighbor_lists import NeighborLists, BuildNeighborLists -from pairs.sim.properties import AllocateProperties, AllocateContactProperties, ResetVolatileProperties -from pairs.sim.read_from_file import ReadParticleData -from pairs.sim.thermo import ComputeThermo -from pairs.sim.timestep import Timestep -from pairs.sim.variables import DeclareVariables -from pairs.sim.vtk import VTKWrite from pairs.transformations import Transformations from pairs.code_gen.interface import InterfaceModules @@ -176,45 +162,25 @@ class Simulation: def max_shapes(self): return len(self._shapes) - def add_udf_module(self, module): - assert isinstance(module, Module), "add_udf_module(): Given parameter is not of type Module!" - assert module.user_defined and not module.interface - if module.name not in [m.name for m in self.udf_module_list]: - self.udf_module_list.append(module) - def add_interface_module(self, module): assert isinstance(module, Module), "add_interface_module(): Given parameter is not of type Module!" - assert module.interface and not module.user_defined + assert module.interface if module.name not in [m.name for m in self.interface_module_list]: self.interface_module_list.append(module) def add_module(self, module): assert isinstance(module, Module), "add_module(): Given parameter is not of type Module!" - assert not module.interface and not module.user_defined + assert not module.interface if module.name not in [m.name for m in self.module_list]: self.module_list.append(module) def interface_modules(self): + """List interface modules""" return self.interface_module_list - - def udf_modules(self): - return self.udf_module_list - + def modules(self): - """List simulation modules, with main always in the last position""" - - sorted_mods = [] - main_mod = None - for m in self.module_list: - if m.name != 'main': - sorted_mods.append(m) - else: - main_mod = m - - if main_mod is not None: - sorted_mods += [main_mod] - - return sorted_mods + """List internal modules""" + return self.module_list def add_kernel(self, kernel): assert isinstance(kernel, Kernel), "add_kernel(): Given parameter is not of type Kernel!" @@ -330,8 +296,8 @@ class Simulation: self.neighbor_lists = NeighborLists(self, self.cell_lists) return self.neighbor_lists - def compute(self, func, cutoff_radius=None, symbols={}, parameters={}, compute_globals=False): - return compute(self, func, cutoff_radius, symbols, parameters, compute_globals) + def compute(self, func, cutoff_radius=None, symbols={}, parameters={}, compute_globals=False, run_on_device=True, profile=False): + return compute(self, func, cutoff_radius, symbols, parameters, compute_globals, run_on_device, profile) def init_block(self): @@ -356,68 +322,13 @@ class Simulation: else: raise Exception("Two sizes assigned to same capacity!") - def build_setup_module_with_statements(self): - """Build a Module in the setup part of the program using the last initialized block""" - - self.setup_functions.append( - Module(self, - name=self._module_name, - block=Block(self, self._block), - resizes_to_check=self._resizes_to_check, - check_properties_resize=self._check_properties_resize, - run_on_device=True)) - - def build_pre_step_module_with_statements(self, run_on_device=True, skip_first=False, profile=False): - """Build a Module in the pre-step part of the program using the last initialized block""" - module = Module(self, name=self._module_name, - block=Block(self, self._block), - resizes_to_check=self._resizes_to_check, - check_properties_resize=self._check_properties_resize, - run_on_device=run_on_device) - - if profile: - module.profile() - - if skip_first: - self.pre_step_functions.append((module, {'skip_first': True})) - - else: - self.pre_step_functions.append(module) - - def build_module_with_statements(self, run_on_device=True, skip_first=False, profile=False): - """Build a Module in the compute part of the program using the last initialized block""" - module = Module(self, name=self._module_name, - block=Block(self, self._block), - resizes_to_check=self._resizes_to_check, - check_properties_resize=self._check_properties_resize, - run_on_device=run_on_device) - if profile: - module.profile() - - if skip_first: - self.functions.append((module, {'skip_first': True})) - - else: - self.functions.append(module) - - def build_user_defined_function(self, run_on_device=True): - """Build a user-defined Module that will be callable seperately as part of the interface""" - Module(self, name=self._module_name, - block=Block(self, self._block), - resizes_to_check=self._resizes_to_check, - check_properties_resize=self._check_properties_resize, - run_on_device=run_on_device, - user_defined=True) - - - def build_interface_module(self, run_on_device=False): + def build_interface_module_with_statements(self, run_on_device=False, profile=False): """Build a user-defined Module that will be callable seperately as part of the interface""" Module(self, name=self._module_name, block=Block(self, self._block), resizes_to_check=self._resizes_to_check, check_properties_resize=self._check_properties_resize, run_on_device=run_on_device, - user_defined=False, interface=True) def capture_statements(self, capture=True): diff --git a/src/pairs/sim/timestep.py b/src/pairs/sim/timestep.py deleted file mode 100644 index abef09a..0000000 --- a/src/pairs/sim/timestep.py +++ /dev/null @@ -1,72 +0,0 @@ -from pairs.ir.scalars import ScalarOp -from pairs.ir.block import Block -from pairs.ir.branches import Branch, Filter -from pairs.ir.functions import Call_Void -from pairs.ir.loops import For -from pairs.ir.timers import Timers - - -class Timestep: - def __init__(self, sim, nsteps, item_list=None): - self.sim = sim - self.block = Block(sim, []) - self.timestep_loop = For(sim, 0, nsteps + 1, self.block) if self.sim._generate_whole_program else None - - if item_list is not None: - for item in item_list: - if isinstance(item, tuple): - stmt_else = None - - if len(item) == 2: - stmt, params = item - - if len(item) == 3: - stmt, stmt_else, params = item - - exec_every = 0 if 'every' not in params else params['every'] - skip_first = False if 'skip_first' not in params else params['skip_first'] - self.add(stmt, exec_every, stmt_else, skip_first) - - else: - self.add(item) - - def timestep(self): - return self.timestep_loop.iter() if self.sim._generate_whole_program else self.sim.sim_timestep - - def add(self, item, exec_every=0, item_else=None, skip_first=False): - assert exec_every >= 0, "exec_every parameter must be higher or equal than zero!" - stmts = item if not isinstance(item, Block) else item.statements() - stmts_else = None - ts = self.timestep() - self.sim.enter(self.block) - - if item_else is not None: - stmts_else = item_else if not isinstance(item_else, Block) else item_else.statements() - - if exec_every > 0: - cond = ScalarOp.or_op(ScalarOp.cmp((ts + 1) % exec_every, 0), ScalarOp.cmp(ts, 0)) - one_way = True if stmts_else is None else False - - self.block.add_statement( - Branch(self.sim, ScalarOp.inline(cond), one_way, - Block(self.sim, stmts), - Block(self.sim, stmts_else) if not one_way else None)) - - elif skip_first: - self.block.add_statement(Filter(self.sim, ScalarOp.inline(ts > 0), Block(self.sim, stmts))) - - else: - self.block.add_statement(stmts) - - self.sim.leave() - - def as_block(self): - _capture = self.sim._capture_statements - self.sim.capture_statements(False) - - block = Block(self.sim, [Call_Void(self.sim, "pairs::start_timer", [Timers.All]), - self.timestep_loop if self.sim._generate_whole_program else self.block, - Call_Void(self.sim, "pairs::stop_timer", [Timers.All])]) - - self.sim.capture_statements(_capture) - return block diff --git a/src/pairs/transformations/__init__.py b/src/pairs/transformations/__init__.py index 8372865..314fd17 100644 --- a/src/pairs/transformations/__init__.py +++ b/src/pairs/transformations/__init__.py @@ -111,9 +111,5 @@ class Transformations: self.add_expression_declarations() # self.add_host_references_to_modules() self.add_device_references_to_modules() - - # TODO: Place stop timers before the function returns - # or simply don't instrument modules that have a non-void return type - # to avoid having to deal with returns within conditional blocks - # self.add_instrumentation() + self.add_instrumentation() diff --git a/src/pairs/transformations/devices.py b/src/pairs/transformations/devices.py index 7a01fee..1eb6f1d 100644 --- a/src/pairs/transformations/devices.py +++ b/src/pairs/transformations/devices.py @@ -36,9 +36,6 @@ class AddDeviceCopies(Mutator): if isinstance(s, ModuleCall): copy_context = Contexts.Device if s.module.run_on_device else Contexts.Host clear_context = Contexts.Host if s.module.run_on_device else Contexts.Device - # new_stmts += [ - # Call_Void(ast_node.sim, "pairs::start_timer", [Timers.DeviceTransfers]) - # ] for array, action in s.module.arrays().items(): # TODO: Add device copies only if they are not mannualy taken care of inside the module @@ -69,27 +66,17 @@ class AddDeviceCopies(Mutator): # if var not in s.module.device_copies() and var in s.module.host_references(): # new_stmts += [CopyVar(s.sim, var, Contexts.Host, action)] - # new_stmts += [ - # Call_Void(ast_node.sim, "pairs::stop_timer", [Timers.DeviceTransfers]) - # ] new_stmts.append(s) if isinstance(s, ModuleCall): if s.module.run_on_device: - # new_stmts += [ - # Call_Void(ast_node.sim, "pairs::start_timer", [Timers.DeviceTransfers]) - # ] - for var, action in s.module.variables().items(): if action != Actions.ReadOnly and var.device_flag: new_stmts += [CopyVar(s.sim, var, Contexts.Host, action)] if self.module_resizes[s.module]: new_stmts += [CopyArray(s.sim, s.sim.resizes, Contexts.Host, Actions.Ignore)] - # new_stmts += [ - # Call_Void(ast_node.sim, "pairs::stop_timer", [Timers.DeviceTransfers]) - # ] ast_node.stmts = new_stmts return ast_node diff --git a/src/pairs/transformations/instrumentation.py b/src/pairs/transformations/instrumentation.py index 88b73c0..5d039b8 100644 --- a/src/pairs/transformations/instrumentation.py +++ b/src/pairs/transformations/instrumentation.py @@ -1,8 +1,8 @@ from pairs.ir.block import Block from pairs.ir.functions import Call_Void -from pairs.ir.module import ModuleCall from pairs.ir.mutator import Mutator from pairs.ir.timers import Timers +from pairs.ir.types import Types class AddModulesInstrumentation(Mutator): @@ -12,7 +12,8 @@ class AddModulesInstrumentation(Mutator): def mutate_ModuleCall(self, ast_node): ast_node._module = self.mutate(ast_node._module) module = ast_node._module - if module.name == 'main' or module.name == 'initialize': + + if module.name == 'initialize' or module.name == 'end' or module.return_type!=Types.Void: return ast_node if module.must_profile(): -- GitLab