diff --git a/src/pystencils/config.py b/src/pystencils/config.py index 5fff3b799c3bcd9f303ac155ca5069308379bdb8..fafc34178f38849c3ef1f71705314fc65e70989a 100644 --- a/src/pystencils/config.py +++ b/src/pystencils/config.py @@ -138,6 +138,12 @@ class CreateKernelConfig: Don't check that loop iterations are independent. This is needed e.g. for periodicity kernel, that access the field outside the iteration bounds. Use with care! """ + check_thread_safety: bool = True + """ + Assignments are considered thread safe if read and writes only target the same locations. If this is not the case, + multithreaded optimisations will fail. The thread safety check can be deactivated to use e.g. GPUs anyway. + Use with care! + """ class DataTypeFactory: """Because of pickle, we need to have a nested class, instead of a lambda in __post_init__""" diff --git a/src/pystencils/kernel_contrains_check.py b/src/pystencils/kernel_contrains_check.py index f1fa4b8a141400c0880672f4fdbcd356b59d4ccd..b72e084a32df8e7f96859c61cb9c56265dcc90e7 100644 --- a/src/pystencils/kernel_contrains_check.py +++ b/src/pystencils/kernel_contrains_check.py @@ -38,10 +38,12 @@ class KernelConstraintsCheck: def __init__(self, check_independence_condition=True, check_double_write_condition=True): self.scopes = NestedScopes() + self.field_reads = defaultdict(set) self.field_writes = defaultdict(set) self.fields_read = set() self.check_independence_condition = check_independence_condition self.check_double_write_condition = check_double_write_condition + self.thread_safe = True def visit(self, obj): if isinstance(obj, (AssignmentCollection, NodeCollection)): @@ -111,6 +113,11 @@ class KernelConstraintsCheck: if self.check_double_write_condition and len(self.field_writes[fai]) > 1: raise ValueError( f"Field {lhs.field.name} is written at two different locations") + + if fai in self.field_reads: + reads = tuple(self.field_reads[fai]) + if len(reads) > 1 or lhs.offsets != reads[0]: + self.thread_safe = False elif isinstance(lhs, sp.Symbol): if self.scopes.is_defined_locally(lhs): raise ValueError(f"Assignments not in SSA form, multiple assignments to {lhs.name}") @@ -120,8 +127,9 @@ class KernelConstraintsCheck: def update_accesses_rhs(self, rhs): if isinstance(rhs, Field.Access) and self.check_independence_condition: - writes = self.field_writes[self.FieldAndIndex( - rhs.field, rhs.index)] + fai = self.FieldAndIndex(rhs.field, rhs.index) + writes = self.field_writes[fai] + self.field_reads[fai].add(rhs.offsets) for write_offset in writes: assert len(writes) == 1 if write_offset != rhs.offsets: diff --git a/src/pystencils/kernelcreation.py b/src/pystencils/kernelcreation.py index 385f42d2f01c26b633fcc25799665cab2072dceb..8dd7b90fc25431ba78c4c698bab7de4dd8b38b85 100644 --- a/src/pystencils/kernelcreation.py +++ b/src/pystencils/kernelcreation.py @@ -128,7 +128,14 @@ def create_domain_kernel(assignments: NodeCollection, *, config: CreateKernelCon # --- check constrains check = KernelConstraintsCheck(check_independence_condition=not config.skip_independence_check, check_double_write_condition=not config.allow_double_writes) + check.visit(assignments) + if not check.thread_safe and config.check_thread_safety: + base = "Assignments are not thread safe because data is read and written on different locations." + if config.cpu_openmp: + raise ValueError(f"{base} OpenMP optimisation is not permitted in this scenario.") + if config.target == Target.GPU: + raise ValueError(f"{base} GPU target is not permitted in this case, only CPU target with single thread") assignments.bound_fields = check.fields_written assignments.rhs_fields = check.fields_read