diff --git a/.gitignore b/.gitignore
index 37f64d450b07bdc6632bb730810109dabaa3e374..33b4e18100ffe4951dee7ac962c111e13c777334 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,10 +10,10 @@ obj
 obj_cpu
 obj_gpu
 todo*
-lj_cpu
-lj_gpu
-lj.cu
-lj.cpp
+md_cpu
+md_gpu
+md.cu
+md.cpp
 dem_cpu
 dem_gpu
 dem.cu
diff --git a/Makefile b/Makefile
index 0e2343478c620beed2cf1924609054e6fc0d85ef..967a1a9f117d174c758b82a6af3c41c296ded89c 100644
--- a/Makefile
+++ b/Makefile
@@ -14,7 +14,9 @@ DEBUG_FLAGS=
 
 # CUDA settings
 NVCC=nvcc
-NVCC_FLAGS=-O3 -mavx2 -mfma
+#NVCC_FLAGS=-O3 -mavx2 -mfma
+NVCC_FLAGS=-O3 -arch=sm_80 -mavx2 -mfma -ffast-math -funroll-loops --forward-unknown-to-host-compiler
+#NVCC_FLAGS=-O3 -arch=sm_80 -march=native -ffast-math -funroll-loops --forward-unknown-to-host-compiler
 NVCC_PATH:="$(shell which ${NVCC})"
 CUDA_FLAGS=-DENABLE_CUDA_AWARE_MPI
 CUDART_FLAGS=-lcudart -L /apps/SPACK/0.19.1/opt/linux-almalinux8-zen/gcc-8.5.0/nvhpc-23.7-bzxcokzjvx4stynglo4u2ffpljajzlam/Linux_x86_64/23.7/cuda/12.2/targets/x86_64-linux/lib
diff --git a/runtime/dem_sc_grid.hpp b/runtime/dem_sc_grid.hpp
index fae3056c12ed5bf8bc4d754246362987b4c441ec..8c85ce6b77d84ddd9e8e52327e3323fefb29378a 100644
--- a/runtime/dem_sc_grid.hpp
+++ b/runtime/dem_sc_grid.hpp
@@ -151,6 +151,23 @@ int dem_sc_grid(PairsSimulation *ps, double xmax, double ymax, double zmax, doub
         last_uid++;
     }
 
+    int global_nparticles = nparticles;
+    if(ps->getDomainPartitioner()->getWorldSize() > 1) {
+        MPI_Allreduce(&nparticles, &global_nparticles, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+    }
+
+    if(ps->getDomainPartitioner()->getRank() == 0) {
+        std::cout << "DEM Simple-Cubic Grid" << std::endl;
+        std::cout << "Domain size: <" << xmax << ", " << ymax << ", " << zmax << ">" << std::endl;
+        std::cout << "Spacing: " << spacing << std::endl;
+        std::cout << "Diameter: " << diameter
+                  << " (min = " << min_diameter << ", max = " << max_diameter << ")" << std::endl;
+        std::cout << "Initial velocity: " << initial_velocity << std::endl;
+        std::cout << "Particle density: " << particle_density << std::endl;
+        std::cout << "Number of types: " << ntypes << std::endl;
+        std::cout << "Number of particles: " << global_nparticles << std::endl;
+    }
+
     return nparticles;
 }