diff --git a/pystencils/backends/x86_instruction_sets.py b/pystencils/backends/x86_instruction_sets.py
index f72b48266195dd1a30149325e5949723a6b9ac7e..db3dc362cf52266d90980b11d2d5c877bb491ba4 100644
--- a/pystencils/backends/x86_instruction_sets.py
+++ b/pystencils/backends/x86_instruction_sets.py
@@ -115,8 +115,8 @@ def get_vector_instruction_set_x86(data_type='double', instruction_set='avx'):
         result[intrinsic_id] = pre + "_" + name + "_" + suf + mask_suffix + arg_string
 
     result['dataTypePrefix'] = {
-        'double': "_" + pre + 'd',
-        'float': "_" + pre,
+        'double': "_" + pre[0:2] + pre[3:] + 'd',
+        'float': "_" + pre[0:2] + pre[3:],
     }
 
     bit_width = result['width'] * (64 if data_type == 'double' else 32)
diff --git a/pystencils/cpu/vectorization.py b/pystencils/cpu/vectorization.py
index 812a6163465295911f8b252a2a4eab0af7ec2417..47a529a4eaa2f86947f3031b05d22d00e45d5f86 100644
--- a/pystencils/cpu/vectorization.py
+++ b/pystencils/cpu/vectorization.py
@@ -7,8 +7,8 @@ from sympy.logic.boolalg import BooleanFunction, BooleanAtom
 
 import pystencils.astnodes as ast
 from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets, get_vector_instruction_set
-from pystencils.typing import (
-    PointerType, TypedSymbol, VectorType, CastFunc, collate_types, get_type_of_expression, VectorMemoryAccess)
+from pystencils.typing import ( BasicType, PointerType, TypedSymbol, VectorType, CastFunc, collate_types,
+                                get_type_of_expression, VectorMemoryAccess)
 from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
 from pystencils.functions import DivFunc
 from pystencils.field import Field
@@ -252,7 +252,11 @@ def insert_vector_casts(ast_node, default_float_type='double'):
         if isinstance(expr, VectorMemoryAccess):
             return VectorMemoryAccess(*expr.args[0:4], visit_expr(expr.args[4], default_type), *expr.args[5:])
         elif isinstance(expr, CastFunc):
-            return expr  # TODO here, since CastFunc might not be vector???
+            cast_type = expr.args[1]
+            arg = visit_expr(expr.args[0])
+            assert(cast_type in [BasicType('float32'), BasicType('float64')],
+                   f'Vectorization cannot vectorize type {cast_type}')
+            return expr.func(arg, VectorType(cast_type))
         elif expr.func is sp.Abs and 'abs' not in ast_node.instruction_set:
             new_arg = visit_expr(expr.args[0], default_type)
             base_type = get_type_of_expression(expr.args[0]).base_type if type(expr.args[0]) is VectorMemoryAccess \
@@ -311,13 +315,18 @@ def insert_vector_casts(ast_node, default_float_type='double'):
         elif isinstance(expr, (sp.Number, TypedSymbol, BooleanAtom)):
             return expr
         else:
-            raise NotImplementedError(f'Should I raise or should I return now? {expr}')
+            # TODO better error string
+            raise NotImplementedError(f'Should I raise or should I return now? {type(expr)} {expr}')
 
     def visit_node(node, substitution_dict, default_type='double'):
         substitution_dict = substitution_dict.copy()
         for arg in node.args:
             if isinstance(arg, ast.SympyAssignment):
+                # TODO only if not remainder loop (? if no VectorAccess then remainder loop)
                 assignment = arg
+                # If there is a remainder loop we do not vectorise it, thus lhs will indicate this
+                if isinstance(assignment.lhs, ast.ResolvedFieldAccess):
+                    continue
                 subs_expr = fast_subs(assignment.rhs, substitution_dict,
                                       skip=lambda e: isinstance(e, ast.ResolvedFieldAccess))
                 assignment.rhs = visit_expr(subs_expr, default_type)
diff --git a/pystencils/typing/leaf_typing.py b/pystencils/typing/leaf_typing.py
index 20f92eabdf8afe88e039d165539d00335dcd95af..aa23de65d8ba9329cedb36acf047ac81e5f414f1 100644
--- a/pystencils/typing/leaf_typing.py
+++ b/pystencils/typing/leaf_typing.py
@@ -21,6 +21,7 @@ from pystencils.typing.types import BasicType, create_type, PointerType
 from pystencils.typing.utilities import get_type_of_expression, collate_types
 from pystencils.typing.cast_functions import CastFunc, BooleanCastFunc
 from pystencils.typing.typed_sympy import TypedSymbol
+from pystencils.fast_approximation import fast_sqrt, fast_division, fast_inv_sqrt
 from pystencils.utils import ContextVar
 
 
@@ -215,6 +216,12 @@ class TypeAdder:
                 return new_func, collated_type
             else:
                 return CastFunc(new_func, collated_type), collated_type
+        elif isinstance(expr, (fast_sqrt, fast_division, fast_inv_sqrt)):
+            args_types = [self.figure_out_type(arg) for arg in expr.args]
+            collated_type = BasicType('float32')
+            new_args = [a if t.dtype_eq(collated_type) else CastFunc(a, collated_type) for a, t in args_types]
+            new_func = expr.func(*new_args) if new_args else expr
+            return CastFunc(new_func, collated_type), collated_type
         elif isinstance(expr, (sp.Add, sp.Mul, sp.Abs, sp.Min, sp.Max, DivFunc, sp.UnevaluatedExpr)):
             args_types = [self.figure_out_type(arg) for arg in expr.args]
             collated_type = collate_types([t for _, t in args_types])
diff --git a/pystencils_tests/test_vectorization.py b/pystencils_tests/test_vectorization.py
index a7a335c7592f87df4524276dffd18b03c8f0a1c8..9c9a99c3458797255cd3f1b8f41eb9e0e46af4ca 100644
--- a/pystencils_tests/test_vectorization.py
+++ b/pystencils_tests/test_vectorization.py
@@ -4,7 +4,7 @@ import pystencils.config
 import sympy as sp
 
 import pystencils as ps
-from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets
+from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets, get_vector_instruction_set
 from pystencils.cpu.vectorization import vectorize
 from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisions
 from pystencils.enums import Target
@@ -13,10 +13,25 @@ from pystencils.transformations import replace_inner_stride_with_one
 supported_instruction_sets = get_supported_instruction_sets()
 if supported_instruction_sets:
     instruction_set = supported_instruction_sets[-1]
+    instructions = get_vector_instruction_set(instruction_set=instruction_set)
 else:
     instruction_set = None
 
 
+# CI:
+# FAILED pystencils_tests/test_vectorization.py::test_vectorised_pow - NotImple...
+# FAILED pystencils_tests/test_vectorization.py::test_inplace_update - NotImple...
+# FAILED pystencils_tests/test_vectorization.py::test_vectorization_fixed_size
+# FAILED pystencils_tests/test_vectorization.py::test_vectorised_fast_approximations
+# FAILED pystencils_tests/test_vectorization.py::test_vectorization_variable_size
+
+# Jan:
+# test_aligned_and_nt_stores
+# test_aligned_and_nt_stores_openmp
+# test_hardware_query
+# test_vectorised_fast_approximations
+
+# TODO: Skip tests if no instruction set is available
 def test_vector_type_propagation(instruction_set=instruction_set):
     a, b, c, d, e = sp.symbols("a b c d e")
     arr = np.ones((2 ** 2 + 2, 2 ** 3 + 2))
@@ -118,6 +133,10 @@ def test_vectorization_fixed_size(instruction_set=instruction_set):
 
         ast = ps.create_kernel(update_rule)
         vectorize(ast, instruction_set=instruction_set)
+        code = ps.get_code_str(ast)
+        add_instruction = instructions["+"][:instructions["+"].find("(")]
+        assert add_instruction in code
+        # print(code)
 
         func = ast.compile()
         dst = np.zeros_like(arr)