diff --git a/runtime/likwid-marker.h b/runtime/likwid-marker.h
new file mode 100644
index 0000000000000000000000000000000000000000..0d36f27d67782c67333216c858ba4412313f02c5
--- /dev/null
+++ b/runtime/likwid-marker.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
+ * All rights reserved. This file is part of MD-Bench.
+ * Use of this source code is governed by a LGPL-3.0
+ * license that can be found in the LICENSE file.
+ */
+#ifndef LIKWID_MARKER_H
+#define LIKWID_MARKER_H
+
+
+/** \addtogroup MarkerAPI Marker API module
+*  @{
+*/
+/*!
+\def LIKWID_MARKER_INIT
+Shortcut for likwid_markerInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_MARKER_THREADINIT
+Shortcut for likwid_markerThreadInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_MARKER_REGISTER(regionTag)
+Shortcut for likwid_markerRegisterRegion() with \a regionTag if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_MARKER_START(regionTag)
+Shortcut for likwid_markerStartRegion() with \a regionTag if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_MARKER_STOP(regionTag)
+Shortcut for likwid_markerStopRegion() with \a regionTag if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
+Shortcut for likwid_markerGetResults() for \a regionTag if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_MARKER_SWITCH
+Shortcut for likwid_markerNextGroup() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_MARKER_RESET(regionTag)
+Shortcut for likwid_markerResetRegion() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_MARKER_CLOSE
+Shortcut for likwid_markerClose() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
+*/
+/** @}*/
+
+#ifdef LIKWID_PERFMON
+#include <likwid.h>
+#define LIKWID_MARKER_INIT likwid_markerInit()
+#define LIKWID_MARKER_THREADINIT likwid_markerThreadInit()
+#define LIKWID_MARKER_SWITCH likwid_markerNextGroup()
+#define LIKWID_MARKER_REGISTER(regionTag) likwid_markerRegisterRegion(regionTag)
+#define LIKWID_MARKER_START(regionTag) likwid_markerStartRegion(regionTag)
+#define LIKWID_MARKER_STOP(regionTag) likwid_markerStopRegion(regionTag)
+#define LIKWID_MARKER_CLOSE likwid_markerClose()
+#define LIKWID_MARKER_RESET(regionTag) likwid_markerResetRegion(regionTag)
+#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count) likwid_markerGetRegion(regionTag, nevents, events, time, count)
+#else  /* LIKWID_PERFMON */
+#define LIKWID_MARKER_INIT
+#define LIKWID_MARKER_THREADINIT
+#define LIKWID_MARKER_SWITCH
+#define LIKWID_MARKER_REGISTER(regionTag)
+#define LIKWID_MARKER_START(regionTag)
+#define LIKWID_MARKER_STOP(regionTag)
+#define LIKWID_MARKER_CLOSE
+#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
+#define LIKWID_MARKER_RESET(regionTag)
+#endif /* LIKWID_PERFMON */
+
+
+/** \addtogroup NvMarkerAPI NvMarker API module (MarkerAPI for Nvidia GPUs)
+*  @{
+*/
+/*!
+\def LIKWID_NVMARKER_INIT
+Shortcut for likwid_gpuMarkerInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_NVMARKER_THREADINIT
+Shortcut for likwid_gpuMarkerThreadInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_NVMARKER_REGISTER(regionTag)
+Shortcut for likwid_gpuMarkerRegisterRegion() with \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_NVMARKER_START(regionTag)
+Shortcut for likwid_gpuMarkerStartRegion() with \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_NVMARKER_STOP(regionTag)
+Shortcut for likwid_gpuMarkerStopRegion() with \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_NVMARKER_GET(regionTag, ngpus, nevents, events, time, count)
+Shortcut for likwid_gpuMarkerGetRegion() for \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_NVMARKER_SWITCH
+Shortcut for likwid_gpuMarkerNextGroup() if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_NVMARKER_RESET(regionTag)
+Shortcut for likwid_gpuMarkerResetRegion() if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
+*/
+/*!
+\def LIKWID_NVMARKER_CLOSE
+Shortcut for likwid_gpuMarkerClose() if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
+*/
+/** @}*/
+
+#ifdef LIKWID_NVMON
+#ifndef LIKWID_WITH_NVMON
+#define LIKWID_WITH_NVMON
+#endif
+#include <likwid.h>
+#define LIKWID_NVMARKER_INIT likwid_gpuMarkerInit()
+#define LIKWID_NVMARKER_THREADINIT likwid_gpuMarkerThreadInit()
+#define LIKWID_NVMARKER_SWITCH likwid_gpuMarkerNextGroup()
+#define LIKWID_NVMARKER_REGISTER(regionTag) likwid_gpuMarkerRegisterRegion(regionTag)
+#define LIKWID_NVMARKER_START(regionTag) likwid_gpuMarkerStartRegion(regionTag)
+#define LIKWID_NVMARKER_STOP(regionTag) likwid_gpuMarkerStopRegion(regionTag)
+#define LIKWID_NVMARKER_CLOSE likwid_gpuMarkerClose()
+#define LIKWID_NVMARKER_RESET(regionTag) likwid_gpuMarkerResetRegion(regionTag)
+#define LIKWID_NVMARKER_GET(regionTag, ngpus, nevents, events, time, count) \
+    likwid_gpuMarkerGetRegion(regionTag, ngpus, nevents, events, time, count)
+#else /* LIKWID_NVMON */
+#define LIKWID_NVMARKER_INIT
+#define LIKWID_NVMARKER_THREADINIT
+#define LIKWID_NVMARKER_SWITCH
+#define LIKWID_NVMARKER_REGISTER(regionTag)
+#define LIKWID_NVMARKER_START(regionTag)
+#define LIKWID_NVMARKER_STOP(regionTag)
+#define LIKWID_NVMARKER_CLOSE
+#define LIKWID_NVMARKER_GET(regionTag, nevents, events, time, count)
+#define LIKWID_NVMARKER_RESET(regionTag)
+#endif /* LIKWID_NVMON */
+
+
+
+#endif /* LIKWID_MARKER_H */
diff --git a/src/pairs/code_gen/cgen.py b/src/pairs/code_gen/cgen.py
index 26845536857ddf39a8a7906bd3e35bb3569ad40a..5a17c302d6c795eb01ebb15dbd70ea1452a5049d 100644
--- a/src/pairs/code_gen/cgen.py
+++ b/src/pairs/code_gen/cgen.py
@@ -66,6 +66,7 @@ class CGen:
         self.print("#include <stdio.h>")
         self.print("#include <stdlib.h>")
         self.print("//---")
+        self.print("#include \"runtime/likwid-marker.h\"")
         self.print("#include \"runtime/pairs.hpp\"")
         self.print("#include \"runtime/read_from_file.hpp\"")
         self.print("#include \"runtime/timing.hpp\"")
diff --git a/src/pairs/mapping/funcs.py b/src/pairs/mapping/funcs.py
index 925115623c190b2f340c832a065f0b0bd9640bea..63491aad509bce1b4b80ac2bdbd8b872b92df9d8 100644
--- a/src/pairs/mapping/funcs.py
+++ b/src/pairs/mapping/funcs.py
@@ -319,10 +319,10 @@ def compute(sim, func, cutoff_radius=None, symbols={}, pre_step=False, skip_firs
             ir.visit(tree)
 
     if pre_step:
-        sim.build_pre_step_module_with_statements(skip_first=skip_first)
+        sim.build_pre_step_module_with_statements(skip_first=skip_first, profile=True)
 
     else:
-        sim.build_module_with_statements(skip_first=skip_first)
+        sim.build_module_with_statements(skip_first=skip_first, profile=True)
 
 
 def setup(sim, func, symbols={}):
diff --git a/src/pairs/sim/instrumentation.py b/src/pairs/sim/instrumentation.py
index 8bae4241b702f6aa94741df38c3fa3443e00ebbd..a017c047883eb8e20cad1e3af9d1f3655085b65b 100644
--- a/src/pairs/sim/instrumentation.py
+++ b/src/pairs/sim/instrumentation.py
@@ -23,6 +23,6 @@ class RegisterMarkers(FinalLowerable):
     @pairs_inline
     def lower(self):
         if self.sim._enable_profiler:
-            for m in self.module_list:
+            for m in self.sim.module_list:
                 if m.name != 'main' and m.must_profile():
                     Call_Void(self.sim, "LIKWID_MARKER_REGISTER", [m.name])
diff --git a/src/pairs/sim/simulation.py b/src/pairs/sim/simulation.py
index 0e39804e32ecddb74f16c9ea76bb9b85f8b3411e..2ef4feaf65a3b5c1e21aad19ea3f6b9416ea7079 100644
--- a/src/pairs/sim/simulation.py
+++ b/src/pairs/sim/simulation.py
@@ -252,25 +252,30 @@ class Simulation:
                 check_properties_resize=self._check_properties_resize,
                 run_on_device=False))
 
-    def build_pre_step_module_with_statements(self, run_on_device=True, skip_first=False):
+    def build_pre_step_module_with_statements(self, run_on_device=True, skip_first=False, profile=False):
         module = Module(self, name=self._module_name,
                               block=Block(self, self._block),
                               resizes_to_check=self._resizes_to_check,
                               check_properties_resize=self._check_properties_resize,
                               run_on_device=run_on_device)
 
+        if profile:
+            module.profile()
+
         if skip_first:
             self.pre_step_functions.append((module, {'skip_first': True}))
 
         else:
             self.pre_step_functions.append(module)
 
-    def build_module_with_statements(self, run_on_device=True, skip_first=False):
+    def build_module_with_statements(self, run_on_device=True, skip_first=False, profile=False):
         module = Module(self, name=self._module_name,
                               block=Block(self, self._block),
                               resizes_to_check=self._resizes_to_check,
                               check_properties_resize=self._check_properties_resize,
                               run_on_device=run_on_device)
+        if profile:
+            module.profile()
 
         if skip_first:
             self.functions.append((module, {'skip_first': True}))
diff --git a/src/pairs/transformations/instrumentation.py b/src/pairs/transformations/instrumentation.py
index c4a5b68168891c4686c537d5fe87a22921c16734..53d0e52840744ac552158f1e0864da66c561cc0d 100644
--- a/src/pairs/transformations/instrumentation.py
+++ b/src/pairs/transformations/instrumentation.py
@@ -16,11 +16,11 @@ class AddModulesInstrumentation(Mutator):
 
         timer_id = module.module_id + 1
         start_timer = Call_Void(ast_node.sim, "pairs::start_timer", [timer_id])
-        end_timer = Call_Void(ast_node.sim, "pairs::stop_timer", [timer_id])
+        stop_timer = Call_Void(ast_node.sim, "pairs::stop_timer", [timer_id])
 
         if module.must_profile():
             start_marker = Call_Void(ast_node.sim, "LIKWID_MARKER_START", [module.name])
             stop_marker = Call_Void(ast_node.sim, "LIKWID_MARKER_STOP", [module.name])
-            return Block(ast_node.sim, [start_timer, start_marker, ast_node, end_marker, end_timer])
+            return Block(ast_node.sim, [start_timer, start_marker, ast_node, stop_marker, stop_timer])
 
-        return Block(ast_node.sim, [start_timer, ast_node, end_timer])
+        return Block(ast_node.sim, [start_timer, ast_node, stop_timer])