From 81c47f4d57106b6162e422f563755cfd85d415d5 Mon Sep 17 00:00:00 2001
From: Behzad Safaei <iwia103h@a0522.nhr.fau.de>
Date: Wed, 12 Mar 2025 01:57:29 +0100
Subject: [PATCH] Cleanup and minor bug fixes

---
 CMakeLists.txt                             |  18 +--
 README.md                                  |  42 ++----
 examples/modular/force_reduction.py        |   3 +-
 examples/modular/sphere_box_global.py      |   3 +-
 examples/modular/spring_dashpot.py         |   3 +-
 runtime/pairs.hpp                          |   3 +-
 runtime/timers.hpp                         |  35 ++++-
 src/pairs/__init__.py                      |  20 +--
 src/pairs/analysis/devices.py              |   1 -
 src/pairs/code_gen/cgen.py                 |  80 +----------
 src/pairs/code_gen/interface.py            |   4 +-
 src/pairs/mapping/funcs.py                 |   5 +-
 src/pairs/sim/cell_lists.py                |   2 -
 src/pairs/sim/domain_partitioning.py       |   3 +-
 src/pairs/sim/load_balancing_algorithms.py |  13 --
 src/pairs/sim/simulation.py                | 147 +--------------------
 16 files changed, 64 insertions(+), 318 deletions(-)
 delete mode 100644 src/pairs/sim/load_balancing_algorithms.py

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4b968da..64d109d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,13 +10,7 @@ set(CMAKE_CXX_FLAGS_DEBUG "-g -O0 -DDEBUG")
 
 option(USE_MPI                      "USE_MPI" ON)
 option(COMPILE_CUDA                 "COMPILE_CUDA" OFF)
-option(GENERATE_WHOLE_PROGRAM       "Generate the whole program (i.e. including the 'main' function). No additional source files are needed." OFF)
-option(BUILD_APP                    "Build a stand-alone app which uses the P4IRS modular interface. Provide your source files with -DUSER_SOURCE_FILES" OFF)
-
-if(GENERATE_WHOLE_PROGRAM AND BUILD_APP)
-    message(FATAL_ERROR "You must choose either GENERATE_WHOLE_PROGRAM or BUILD_APP or neither.\n
-        Choose neither if you only want to use the P4IRS library in your project (in a seperate build system).")
-endif()
+set(USER_SOURCE_FILES "" CACHE STRING "List of source files to compile (semicolon-separated)")
 
 set(INPUT_SCRIPT ${INPUT_SCRIPT} CACHE PATH "The input python script triggering code generation")
 if(NOT EXISTS ${INPUT_SCRIPT})
@@ -71,7 +65,7 @@ set(PAIRS_LINK_DIRS ${CMAKE_CURRENT_BINARY_DIR})
 set(PAIRS_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR})
 
 # The target can either be an executable or a static library
-if(GENERATE_WHOLE_PROGRAM OR BUILD_APP)
+if(USER_SOURCE_FILES)
     add_executable(${PAIRS_TARGET} ${RUNTIME_COMMON_FILES})
 else()
     add_library(${PAIRS_TARGET} STATIC ${RUNTIME_COMMON_FILES})
@@ -90,13 +84,7 @@ set_target_properties(${PAIRS_TARGET} PROPERTIES
 #================================================================================
 # USER_SOURCE_FILES =============================================================
 #================================================================================
-if(BUILD_APP)
-    set(USER_SOURCE_FILES "" CACHE STRING "List of source files to compile (semicolon-separated)")
-    if(NOT USER_SOURCE_FILES)
-        message(FATAL_ERROR "BUILD_APP is ON. You have to specify source files like this:\n
-            -DUSER_SOURCE_FILES=src/main.cpp;src/helper.cpp")
-    endif()
-
+if(USER_SOURCE_FILES)
     foreach(file ${USER_SOURCE_FILES})
         if(NOT EXISTS ${file})
             message(FATAL_ERROR "File '${file}' does not exist!")
diff --git a/README.md b/README.md
index 882644c..92db3b5 100644
--- a/README.md
+++ b/README.md
@@ -100,57 +100,35 @@ else:
 psim.generate()
 ```
 
-## Build instructions
+## Build Instructions
 
-P4IRS can be built in 3 different modes using the CMake build system. Before we demostrate each mode, ensure you have CMake, MPI and CUDA (if targeting GPU execution) available in your environment.
+P4IRS can be built in two different modes using the CMake build system. Before we demostrate each mode, ensure you have CMake, MPI and CUDA (if targeting GPU execution) available in your environment.
 
 In the following, we assume we have created and navigated to a build directory: `mkdir build; cd build` 
 
-**General CMake flags (applicable to all 3 modes):**  
+**Basic CMake flags:**  
 * Pass your input script to CMake using `-DINPUT_SCRIPT=path/to/script.py`  
 * Enable CUDA with `-DCOMPILE_CUDA=ON`
 * Enable support for BlockForest domain partitioning and dynamic load balancing by providing the path to waLBerla source directory `-DWALBERLA_DIR=path/to/walberla` (TODO: waLBerla as a submodule)
 
 
-### 1. Whole-program generation:
+### 1. Stand-Alone P4IRS Application
 ---------------------
-In this mode, everything including the `main` function is generated by P4IRS.
+To build a C++ application using P4IRS, provide the list of your source files to CMake using the `-DUSER_SOURCE_FILES` flag (semicolon-seperated).
 
-1. Set `generate_whole_program=True` in the input script
-2. Set the CMake flag `-DGENERATE_WHOLE_PROGRAM=ON`
-
-Example: Build [md.py](examples/whole-program-generation/md.py)
-```
-cmake -DINPUT_SCRIPT=../examples/whole-program-generation/md.py -DGENERATE_WHOLE_PROGRAM=ON ..
-``` 
-Now call `make` and an **executable** is built.
-
-
-### 2. Modular stand-alone app
----------------------
-You can build a stand-alone C++ app which uses the P4IRS modular interface.
-
-1. Set `generate_whole_program=False` in the input script
-2. Set the CMake flag `-DBUILD_APP=ON`
-3. Provide the list of your source files to CMake (semicolon-seperated):`-DUSER_SOURCE_FILES=path/to/main.cpp;path/to/helper.cpp`
-
-Example: Build the application [sd_1.cpp](examples/modular/sd_1.cpp) with [spring_dashpot.py](examples/modular/spring_dashpot.py)  
+**Example**: Build the application [sd_1.cpp](examples/modular/sd_1.cpp) using [spring_dashpot.py](examples/modular/spring_dashpot.py) as the input script.  
 Note: In this example we assume waLBerla has been already cloned next to the P4IRS directory.
 
 ```
-cmake -DINPUT_SCRIPT=../examples/modular/spring_dashpot.py -DBUILD_APP=ON -DUSER_SOURCE_FILES=../examples/modular/sd_1.cpp -DWALBERLA_DIR=../../walberla ..
+cmake -DINPUT_SCRIPT=../examples/modular/spring_dashpot.py -DUSER_SOURCE_FILES=../examples/modular/sd_1.cpp -DWALBERLA_DIR=../../walberla ..
 ```
 Now call `make` and an **executable** is built.
 
 
-### 3. P4IRS as a library
+### 2. P4IRS as a Library
 ---------------------
-In this mode, P4IRS is compiled as a library that can be integrated into other projects.
-
-1. Set `generate_whole_program=False` in the input script
-2. Ensure both `BUILD_APP` and `GENERATE_WHOLE_PROGRAM` are `OFF` (they are OFF by default)
-
-Configure CMake and call `make` as usual, and a **static library** is built. You can then include P4IRS and its dependencies in your build system as follows:
+P4IRS can also be compiled as a library for integration into larger projects.  
+To compile P4IRS as a library, simply do not pass any `USER_SOURCE_FILES` to CMake. Configure CMake and call `make` as usual, and a **static library** is built. You can then include P4IRS and its dependencies in your build system as follows:
 ```cmake
 find_package(pairs REQUIRED HINTS "path/to/pairs/build" NO_DEFAULT_PATH)
 target_include_directories(my_app PUBLIC ${PAIRS_INCLUDE_DIRS})
diff --git a/examples/modular/force_reduction.py b/examples/modular/force_reduction.py
index af9cea7..8f15dd6 100644
--- a/examples/modular/force_reduction.py
+++ b/examples/modular/force_reduction.py
@@ -58,8 +58,7 @@ psim = pairs.simulation(
     double_prec=True,
     particle_capacity=1000000,
     neighbor_capacity=20,
-    debug=True, 
-    generate_whole_program=False)
+    debug=True)
 
 
 target = sys.argv[1] if len(sys.argv[1]) > 1 else "none"
diff --git a/examples/modular/sphere_box_global.py b/examples/modular/sphere_box_global.py
index 1fbbd6d..53722f0 100644
--- a/examples/modular/sphere_box_global.py
+++ b/examples/modular/sphere_box_global.py
@@ -71,8 +71,7 @@ psim = pairs.simulation(
     double_prec=True,
     particle_capacity=1000000,
     neighbor_capacity=20,
-    debug=True, 
-    generate_whole_program=False)
+    debug=True)
 
 
 target = sys.argv[1] if len(sys.argv[1]) > 1 else "none"
diff --git a/examples/modular/spring_dashpot.py b/examples/modular/spring_dashpot.py
index 9b68d54..94f4bfe 100644
--- a/examples/modular/spring_dashpot.py
+++ b/examples/modular/spring_dashpot.py
@@ -59,8 +59,7 @@ psim = pairs.simulation(
     double_prec=True,
     particle_capacity=1000000,
     neighbor_capacity=20,
-    debug=True, 
-    generate_whole_program=False)
+    debug=True)
 
 
 target = sys.argv[1] if len(sys.argv[1]) > 1 else "none"
diff --git a/runtime/pairs.hpp b/runtime/pairs.hpp
index d77be18..853e8ef 100644
--- a/runtime/pairs.hpp
+++ b/runtime/pairs.hpp
@@ -370,7 +370,8 @@ public:
     }
 
     void logTimers() {
-        this->getTimers()->writeToFile(this->getDomainPartitioner()->getRank());
+        this->getTimers()->writeToFile(this->getDomainPartitioner()->getRank(), 
+                                        this->getDomainPartitioner()->getWorldSize());
     }
 };
 
diff --git a/runtime/timers.hpp b/runtime/timers.hpp
index 8e399dd..e82fd32 100644
--- a/runtime/timers.hpp
+++ b/runtime/timers.hpp
@@ -33,10 +33,13 @@ public:
             std::chrono::duration_cast<TimeUnit>(current_clock - clocks[id]).count()) * time_factor;
     }
 
-    void writeToFile(int rank){
+    void writeToFile(int rank, int world_size){
+        std::string filename = "timers_" + std::to_string(world_size) + ".txt";
+        if (rank==0) std::cout << "Writing timers log to: " << filename << std::endl;
+
         MPI_File file;
-        MPI_File_open(MPI_COMM_WORLD, "timers.txt", MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &file);
-    
+        MPI_File_open(MPI_COMM_WORLD, filename.c_str(), MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &file);
+
         std::ostringstream ss;
         ss << "Rank: " << rank << "\n";
         ss << std::left << std::setw(80) << "Timer"
@@ -80,6 +83,32 @@ public:
     }
 
     void print(){
+        std::cout << "--------------------------------------------------------------------------------------------------------\n";
+        std::cout << std::left << std::setw(80) << "Timer (MPI rank: 0)"
+            << std::left << std::setw(15) << "Total [ms]"
+            << std::left << std::setw(15) << "Count" << "\n";
+        std::cout << "--------------------------------------------------------------------------------------------------------\n";
+        
+        // Modules
+        for (size_t i = TimerMarkers::Offset; i < time_counters.size(); ++i) {
+            const std::string& counterName = counter_names[i];
+            if(counterName.find("INTERFACE_MODULES::") == 0) {
+                std::cout << std::left << std::setw(80) << counter_names[i]
+                        << std::left << std::setw(15) << std::fixed << std::setprecision(2) << time_counters[i]
+                        << std::left << std::setw(15) << call_counters[i]
+                        << "\n";
+            }
+        }
+
+        // Markers
+        for (size_t i = 0; i < TimerMarkers::Offset; ++i) {
+            std::cout << std::left << std::setw(80) << counter_names[i]
+                    << std::left << std::setw(15) << std::fixed << std::setprecision(2) << time_counters[i]
+                    << std::left << std::setw(15) << 1
+                    << "\n";
+        }
+
+        std::cout << "--------------------------------------------------------------------------------------------------------\n";
     }
 
     void computeCategories() {
diff --git a/src/pairs/__init__.py b/src/pairs/__init__.py
index ec005e5..cbde4f8 100644
--- a/src/pairs/__init__.py
+++ b/src/pairs/__init__.py
@@ -2,7 +2,6 @@ from pairs.ir.types import Types
 from pairs.code_gen.cgen import CGen
 from pairs.code_gen.target import Target
 from pairs.sim.domain_partitioners import DomainPartitioners
-from pairs.sim.load_balancing_algorithms import LoadBalancingAlgorithms
 from pairs.sim.shapes import Shapes
 from pairs.sim.simulation import Simulation
 
@@ -11,17 +10,14 @@ def simulation(
     ref,
     shapes,
     dims=3,
-    timesteps=100,
     double_prec=False,
     use_contact_history=False,
     particle_capacity=800000,
     neighbor_capacity=100,
-    debug=False,
-    generate_whole_program=False):
+    debug=False):
 
     return Simulation(
-        CGen(ref, debug), shapes, dims, timesteps, double_prec, use_contact_history,
-        particle_capacity, neighbor_capacity, generate_whole_program)
+        CGen(ref, debug), shapes, dims, double_prec, use_contact_history, particle_capacity, neighbor_capacity)
 
 def target_cpu(parallel=False):
     if parallel:
@@ -73,15 +69,3 @@ def regular_domain_partitioner_xy():
 
 def block_forest():
     return DomainPartitioners.BlockForest
-
-def morton():
-    return LoadBalancingAlgorithms.Morton
-
-def hilbert():
-    return LoadBalancingAlgorithms.Hilbert
-
-def metis():
-    return LoadBalancingAlgorithms.Metis
-
-def diffusive():
-    return LoadBalancingAlgorithms.Diffusive
\ No newline at end of file
diff --git a/src/pairs/analysis/devices.py b/src/pairs/analysis/devices.py
index c93ac99..d8c6c10 100644
--- a/src/pairs/analysis/devices.py
+++ b/src/pairs/analysis/devices.py
@@ -20,7 +20,6 @@ class FetchDeviceCopies(Visitor):
         self.module_stack.pop()
 
     def visit_CopyArray(self, ast_node):
-        print(self.module_stack[-1].name , " array = ", ast_node.array().name() )
         self.module_stack[-1].add_device_copy(ast_node.array())
 
     def visit_CopyProperty(self, ast_node):
diff --git a/src/pairs/code_gen/cgen.py b/src/pairs/code_gen/cgen.py
index 3258452..13f4f8f 100644
--- a/src/pairs/code_gen/cgen.py
+++ b/src/pairs/code_gen/cgen.py
@@ -31,8 +31,6 @@ from pairs.ir.variables import Var, DeclareVariable, Deref
 from pairs.ir.parameters import Parameter
 from pairs.ir.vectors import Vector, VectorAccess, VectorOp, ZeroVector
 from pairs.ir.ret import Return
-from pairs.sim.domain_partitioners import DomainPartitioners
-from pairs.sim.timestep import Timestep
 from pairs.code_gen.printer import Printer
 from pairs.code_gen.accessor import PairsAcessor
 
@@ -59,15 +57,6 @@ class CGen:
     def real_type(self):
         return Types.c_keyword(self.sim, Types.Real)
     
-    # def generate_cmake_config_file(self):
-    #     self.print = Printer("pairs_cmake_params.txt")
-    #     self.print.start()
-    #     self.print(f"PAIRS_TARGET={self.ref}")
-    #     self.print(f"GENERATE_WHOLE_PROGRAM={'ON' if self.sim._generate_whole_program else 'OFF'}")
-    #     self.print(f"USE_WALBERLA={'ON' if self.sim._partitioner == DomainPartitioners.BlockForest else 'OFF'}")
-    #     # self.print(f"COMPILE_CUDA={'ON' if self.target.is_gpu() else 'OFF'}")
-    #     self.print.end()
-
     def generate_object_reference(self, obj, device=False, index=None):
         if device and (not self.target.is_gpu() or not obj.device_flag):
             # Ideally this should never be called
@@ -151,8 +140,6 @@ class CGen:
         self.print("}")
 
     def generate_preamble(self):
-        # self.print(f"#define APPLICATION_REFERENCE \"{self.ref}\"")
-
         if self.target.is_gpu():
             self.print("#include <math_constants.h>")
              
@@ -208,7 +195,7 @@ class CGen:
         
     def generate_pairs_object_structure(self):
         self.print("")
-        externkw = "" if self.sim._generate_whole_program else "extern "
+        externkw = "extern "
         if self.target.is_gpu():
             for array in self.sim.arrays.statics():
                 if array.device_flag:
@@ -284,34 +271,6 @@ class CGen:
         self.print("};")
         self.print("")
 
-    def generate_program(self, ast_node):
-        self.generate_interfaces()
-        ext = ".cu" if self.target.is_gpu() else ".cpp"
-        self.print = Printer(self.ref + ext)
-        self.print.start()
-        self.generate_preamble()
-        self.generate_pairs_object_structure()
-        self.generate_module_decls()
-
-        self.print("namespace pairs::internal {")
-        self.print.add_indent(4)
-
-        for kernel in self.sim.kernels():
-            self.generate_kernel(kernel)
-
-        for module in self.sim.modules():
-            if module.name!='main':
-                self.generate_module(module)
-
-        self.print.add_indent(-4)
-        self.print("}")
-
-        for module in self.sim.modules():
-            if module.name=='main':
-                self.generate_main(module)
-
-        self.print.end()
-
     def generate_library(self):
         self.generate_interfaces()
         # Generate CUDA/CPP file with modules
@@ -445,36 +404,6 @@ class CGen:
             if feature_prop in module.host_references():
                 self.print(f"{type_kw} *{feature_prop.name()}_h = pobj->{feature_prop.name()};")
 
-    def generate_main(self, module):
-        assert module.name=='main'
-
-        ndims = module.sim.ndims()
-        nprops = module.sim.properties.nprops()
-        ncontactprops = module.sim.contact_properties.nprops()
-        narrays = module.sim.arrays.narrays()
-        part = DomainPartitioners.c_keyword(module.sim.partitioner())
-
-        self.generate_full_object_names = True
-        self.print("int main(int argc, char **argv) {")
-        self.print(f"    PairsRuntime *pairs_runtime = new PairsRuntime({nprops}, {ncontactprops}, {narrays}, {part});")
-        self.print(f"    struct PairsObjects *pobj = new PairsObjects();")
-
-        if module.sim._enable_profiler:
-            self.print("    LIKWID_MARKER_INIT;")
-
-        self.generate_statement(module.block)
-
-        if module.sim._enable_profiler:
-            self.print("    LIKWID_MARKER_CLOSE;")
-
-        self.print("    pairs::print_timers(pairs_runtime);")
-        self.print("    pairs::print_stats(pairs_runtime, pobj->nlocal, pobj->nghost);")
-        self.print("    delete pobj;")
-        self.print("    delete pairs_runtime;")
-        self.print("    return 0;")
-        self.print("}")
-        self.generate_full_object_names = False
-
     def generate_module(self, module):
         self.generate_module_header(module, definition=True)
         self.print.add_indent(4)
@@ -596,9 +525,7 @@ class CGen:
             if ast_node.check_for_resize():
                 resize = self.generate_expression(ast_node.resize)
                 capacity = self.generate_expression(ast_node.capacity)
-                # self.print(f"printf (\" %d -- before AtomicInc: nsend = %d -- send_capacity = %d -- resizes[0] = %d\\n\", {Printer.line_id}, {elem}, {capacity}, {resize});")
                 self.print(f"pairs::{prefix}atomic_add_resize_check(&({elem}), {value}, &({resize}), {capacity});")
-                # self.print(f"printf (\" %d -- after AtomicInc: nsend = %d -- send_capacity = %d -- resizes[0] = %d\\n\", {Printer.line_id}, {elem}, {capacity}, {resize});")
 
             else:
                 self.print(f"pairs::{prefix}atomic_add(&({elem}), {value});")
@@ -802,9 +729,7 @@ class CGen:
                 self.print(f"pairs_runtime->copyArrayTo{ctx_suffix}({array_id}, {action}, {size}); // {array_name}")
 
             else:
-                # self.print(f"std::cout<< \"{Printer.line_id} -- before {array_name} copyArrayTo{ctx_suffix}({action}) === \" <<  pobj->{array_name}[0]  << \" \" << pobj->{array_name}[1]  << \" \" << pobj->{array_name}[2]  << std::endl;")
                 self.print(f"pairs_runtime->copyArrayTo{ctx_suffix}({array_id}, {action}); // {array_name}")
-                # self.print(f"std::cout<< \"{Printer.line_id} -- after {array_name} copyArrayTo{ctx_suffix}({action}) === \" <<  pobj->{array_name}[0]  << \" \" << pobj->{array_name}[1]  << \" \" << pobj->{array_name}[2]  << std::endl;")
 
         if isinstance(ast_node, CopyContactProperty):
             prop_id = ast_node.contact_prop().id()
@@ -1010,9 +935,6 @@ class CGen:
             if self.target.is_gpu() and fp.device_flag:
                 self.print(f"pairs_runtime->copyFeaturePropertyToDevice({fp.id()}); // {fp.name()}")
 
-        if isinstance(ast_node, Timestep):
-            self.generate_statement(ast_node.block)
-
         if isinstance(ast_node, ReallocProperty):
             p = ast_node.property()
             ptr_addr = self.generate_object_address(p)
diff --git a/src/pairs/code_gen/interface.py b/src/pairs/code_gen/interface.py
index 228a7ae..20a27fd 100644
--- a/src/pairs/code_gen/interface.py
+++ b/src/pairs/code_gen/interface.py
@@ -64,13 +64,13 @@ class InterfaceModules:
             self.sim.grid = MutableGrid(self.sim, self.sim.dims)
 
         inits = Block.from_list(self.sim, [
+            RegisterTimers(self.sim),
+            RegisterMarkers(self.sim),
             DeclareVariables(self.sim),
             DeclareArrays(self.sim),
             AllocateProperties(self.sim),
             AllocateContactProperties(self.sim),
             AllocateFeatureProperties(self.sim),
-            RegisterTimers(self.sim),
-            RegisterMarkers(self.sim)
         ])
 
         if self.sim._enable_profiler:
diff --git a/src/pairs/mapping/funcs.py b/src/pairs/mapping/funcs.py
index c845787..f2b1e18 100644
--- a/src/pairs/mapping/funcs.py
+++ b/src/pairs/mapping/funcs.py
@@ -311,9 +311,6 @@ class OneBodyKernel(Lowerable):
         self.sim.add_statement(self.block)
 
 def compute(sim, func, cutoff_radius=None, symbols={}, parameters={}, compute_globals=False, run_on_device=True, profile=False):
-    if sim._generate_whole_program:
-        assert not parameters, "Compute functions can't take custom parameters when generating whole program."
-
     src = inspect.getsource(func)
     tree = ast.parse(src, mode='exec')
     #print(ast.dump(ast.parse(src, mode='exec')))
@@ -411,5 +408,5 @@ def compute(sim, func, cutoff_radius=None, symbols={}, parameters={}, compute_gl
             
     # User defined functions are wrapped inside seperate interface modules here.
     # The udf's have the same name as their interface module but they get implemented in the pairs::internal scope.
-    sim.build_interface_module_with_statements(run_on_device, profile)  
+    sim.build_interface_module_with_statements(run_on_device)  
     
diff --git a/src/pairs/sim/cell_lists.py b/src/pairs/sim/cell_lists.py
index 24b7329..8501984 100644
--- a/src/pairs/sim/cell_lists.py
+++ b/src/pairs/sim/cell_lists.py
@@ -27,7 +27,6 @@ class CellLists:
             self.spacing = spacing if isinstance(spacing, list) else [spacing for d in range(sim.ndims())]
             self.runtime_spacing = False
         else:
-            assert self.sim._generate_whole_program==False, "Cell spacing needs to be defined when generating whole program."
             self.spacing = self.sim.add_array('spacing', self.sim.ndims(), Types.Real)
             self.runtime_spacing = True
 
@@ -35,7 +34,6 @@ class CellLists:
             self.cutoff_radius = cutoff_radius
             self.runtime_cutoff_radius = False
         else:
-            assert self.sim._generate_whole_program==False, "cutoff_radius needs to be defined when generating whole program."
             self.cutoff_radius = self.sim.add_var('cutoff_radius', Types.Real)
             self.runtime_cutoff_radius = True
 
diff --git a/src/pairs/sim/domain_partitioning.py b/src/pairs/sim/domain_partitioning.py
index 10fac21..f90f61f 100644
--- a/src/pairs/sim/domain_partitioning.py
+++ b/src/pairs/sim/domain_partitioning.py
@@ -11,8 +11,7 @@ from pairs.sim.grid import MutableGrid
 from pairs.ir.device import CopyArray
 from pairs.ir.contexts import Contexts
 from pairs.ir.actions import Actions
-from pairs.sim.load_balancing_algorithms import LoadBalancingAlgorithms
-from pairs.ir.print import PrintCode
+
 class DimensionRanges:
     def __init__(self, sim):
         self.sim                = sim
diff --git a/src/pairs/sim/load_balancing_algorithms.py b/src/pairs/sim/load_balancing_algorithms.py
deleted file mode 100644
index 165d151..0000000
--- a/src/pairs/sim/load_balancing_algorithms.py
+++ /dev/null
@@ -1,13 +0,0 @@
-class LoadBalancingAlgorithms:
-    Morton = 0
-    Hilbert = 1
-    Diffusive = 3
-    Metis = 2
-
-    def c_keyword(algorithm):
-        return "Hilbert"        if algorithm == LoadBalancingAlgorithms.Hilbert else \
-               "Morton"         if algorithm == LoadBalancingAlgorithms.Morton else \
-               "Diffusive"      if algorithm == LoadBalancingAlgorithms.Diffusive else \
-               "Metis"          if algorithm == LoadBalancingAlgorithms.Metis else \
-               "Invalid"
-    
\ No newline at end of file
diff --git a/src/pairs/sim/simulation.py b/src/pairs/sim/simulation.py
index 76e3cdb..3f8f084 100644
--- a/src/pairs/sim/simulation.py
+++ b/src/pairs/sim/simulation.py
@@ -15,7 +15,6 @@ from pairs.sim.comm import Comm
 from pairs.sim.contact_history import ContactHistory
 from pairs.sim.domain_partitioners import DomainPartitioners
 from pairs.sim.domain_partitioning import BlockForest, DimensionRanges
-from pairs.sim.load_balancing_algorithms import LoadBalancingAlgorithms
 from pairs.sim.grid import Grid3D
 from pairs.sim.neighbor_lists import NeighborLists, BuildNeighborLists
 from pairs.transformations import Transformations
@@ -30,17 +29,14 @@ class Simulation:
         code_gen,
         shapes,
         dims=3,
-        timesteps=100,
         double_prec=False,
         use_contact_history=False,
         particle_capacity=800000,
-        neighbor_capacity=100,
-        generate_whole_program=False):
+        neighbor_capacity=100):
 
         # Code generator for the simulation
         self.code_gen = code_gen
         self.code_gen.assign_simulation(self)
-        self._generate_whole_program = generate_whole_program
 
         # Data structures to be generated
         self.position_prop = None
@@ -52,7 +48,6 @@ class Simulation:
         self.contact_properties = ContactProperties(self)
 
         # General capacities, sizes and particle properties
-        self.sim_timestep = self.add_var('sim_timestep', Types.Int32, runtime=True)
         self.particle_capacity = \
             self.add_var('particle_capacity', Types.Int32, particle_capacity, runtime=True)
         self.neighbor_capacity = self.add_var('neighbor_capacity', Types.Int32, neighbor_capacity)
@@ -79,18 +74,13 @@ class Simulation:
         self._capture_statements = True
         self._block = Block(self, [])
 
+        # Interface modules
+        self.interface_module_list = []
+        
+        # Internal modules and kernels
         self.module_list = []
         self.kernel_list = []
 
-        # Individual user-defined and interface modules are created only when generate_whole_program is False
-        self.udf_module_list = []
-        self.interface_module_list = []
-
-        # User-defined functions to be called by other subroutines (used only when generate_whole_program is True)
-        self.setup_functions = []
-        self.pre_step_functions = []
-        self.functions = []
-
         # Structures to generated resize code for capacities
         self._check_properties_resize = False
         self._resizes_to_check = {}
@@ -112,7 +102,6 @@ class Simulation:
         self._module_name = None                # Current module name
         self._double_prec = double_prec         # Use double-precision FP arithmetic
         self.dims = dims                        # Number of dimensions
-        self.ntimesteps = timesteps             # Number of time-steps
         self.reneighbor_frequency = 1           # Re-neighbor frequency
         self.rebalance_frequency = 0            # Re-balance frequency for dynamic load balancing
         self._target = None                     # Hardware target info
@@ -136,14 +125,6 @@ class Simulation:
         else:
             raise Exception("Invalid domain partitioner.")
         
-    def set_workload_balancer(self, algorithm=LoadBalancingAlgorithms.Morton, 
-                              regrid_min=100, regrid_max=1000, rebalance_frequency=0):
-        assert self._partitioner == DomainPartitioners.BlockForest, "Load balancing is only supported by BlockForest."
-        self.rebalance_frequency = rebalance_frequency
-        self._dom_part.load_balancer = algorithm
-        self._dom_part.regrid_min = regrid_min
-        self._dom_part.regrid_max = regrid_max
-
     def partitioner(self):
         return self._partitioner
 
@@ -299,7 +280,6 @@ class Simulation:
     def compute(self, func, cutoff_radius=None, symbols={}, parameters={}, compute_globals=False, run_on_device=True, profile=False):
         return compute(self, func, cutoff_radius, symbols, parameters, compute_globals, run_on_device, profile)
 
-
     def init_block(self):
         """Initialize new block in this simulation instance"""
         self._block = Block(self, [])
@@ -322,7 +302,7 @@ class Simulation:
         else:
             raise Exception("Two sizes assigned to same capacity!")
 
-    def build_interface_module_with_statements(self, run_on_device=False, profile=False):
+    def build_interface_module_with_statements(self, run_on_device=False):
         """Build a user-defined Module that will be callable seperately as part of the interface"""
         Module(self, name=self._module_name,
                 block=Block(self, self._block),
@@ -415,12 +395,6 @@ class Simulation:
         self._comm = Comm(self, self._dom_part)
         self.create_update_cells_block()
 
-        if self._generate_whole_program:
-            self.generate_program()
-        else:
-            self.generate_library()
-
-    def generate_library(self):
         InterfaceModules(self).create_all()
         Transformations(self.interface_modules(), self._target).apply_all()
 
@@ -428,111 +402,4 @@ class Simulation:
         self.code_gen.generate_library()
 
         # Generate getters for the runtime functions
-        self.code_gen.generate_interfaces()
-"""
-    def generate_program(self):
-        assert self.grid, "No domain is created. Set domain bounds with 'set_domain'."
-
-        reverse_comm_module = ReverseComm(self._comm, reduce=True)
-
-        # Params that determine when a method must be called only when reneighboring
-        every_reneighbor_params = {'every': self.reneighbor_frequency}
-
-        timestep_procedures = []
-
-        # First steps executed during each time-step in the simulation
-        timestep_procedures += self.pre_step_functions 
-
-        # Rebalancing routines
-        if self.rebalance_frequency:
-            update_domain_procedures = Block.from_list(self, [
-                Exchange(self._comm),
-                UpdateDomain(self),
-                Borders(self._comm),
-                ResetVolatileProperties(self),
-                BuildCellListsStencil(self, self.cell_lists),
-                self.update_cells_procedures
-                ])
-
-            timestep_procedures.append((update_domain_procedures, {'every': self.rebalance_frequency}))
-
-        # Communication routines
-        timestep_procedures += [(Exchange(self._comm), every_reneighbor_params),
-                                (Borders(self._comm), Synchronize(self._comm), every_reneighbor_params)]
-
-        # Update acceleration data structures
-        timestep_procedures += [(self.update_cells_procedures, every_reneighbor_params)]
-
-        # Add routines for contact history management
-        if self._use_contact_history:
-            if self.neighbor_lists is not None:
-                timestep_procedures.append(
-                    (BuildContactHistory(self, self._contact_history, self.cell_lists),
-                    every_reneighbor_params))
-
-            timestep_procedures.append(ResetContactHistoryUsageStatus(self, self._contact_history))
-
-        # Reset volatile properties
-        timestep_procedures += [ResetVolatileProperties(self)]
-
-        # Add computational kernels
-        timestep_procedures += self.functions
-
-        # For whole-program-generation, add reverse_comm wherever needed in the timestep loop (eg: after computational kernels) like this:
-        timestep_procedures += [reverse_comm_module]
-
-        # Clear unused contact history
-        if self._use_contact_history:
-            timestep_procedures.append(ClearUnusedContactHistory(self, self._contact_history))
-
-        # Add routine to calculate thermal data
-        if self._compute_thermo != 0:
-            timestep_procedures.append(
-                (ComputeThermo(self), {'every': self._compute_thermo}))
-
-
-        # Data structures and timer/markers initialization
-        inits = Block.from_list(self, [
-            DeclareVariables(self),
-            DeclareArrays(self),
-            AllocateProperties(self),
-            AllocateContactProperties(self),
-            AllocateFeatureProperties(self),
-            RegisterTimers(self),
-            RegisterMarkers(self)
-        ])
-
-        # Construct the time-step loop
-        timestep = Timestep(self, self.ntimesteps, timestep_procedures)
-        self.enter(timestep.block)
-
-        # Add routine to write VTK data when set
-        if self.vtk_file is not None:
-            timestep.add(VTKWrite(self, self.vtk_file, timestep.timestep(), self.vtk_frequency))
-
-        self.leave()
-
-        # Combine everything into a whole program
-        # Initialization and setup functions, together with time-step loop
-        # UpdateDomain is added after setup_particles because particles must be already present in the simulation
-        body = Block.from_list(self, [
-            self.create_domain,
-            self.setup_particles,
-            UpdateDomain(self),        
-            self.setup_functions,
-            BuildCellListsStencil(self, self.cell_lists),
-            timestep.as_block()
-        ])
-
-        program = Module(self, name='main', block=Block.merge_blocks(inits, body))
-
-        # Apply transformations
-        transformations = Transformations(program, self._target)
-        transformations.apply_all()
-
-        # Generate whole program
-        self.code_gen.generate_program(program)
-
-        # Generate getters for the runtime functions
-        self.code_gen.generate_interfaces()
-"""
\ No newline at end of file
+        self.code_gen.generate_interfaces()
\ No newline at end of file
-- 
GitLab