aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorRik van Riel <riel@redhat.com>2013-12-18 20:08:44 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-12-18 22:04:51 -0500
commit20841405940e7be0617612d521e206e4b6b325db (patch)
treeff60aa7674876d90e25db4046d9916f73680682b /include
parentde466bd628e8d663fdf3f791bc8db318ee85c714 (diff)
mm: fix TLB flush race between migration, and change_protection_range
There are a few subtle races, between change_protection_range (used by mprotect and change_prot_numa) on one side, and NUMA page migration and compaction on the other side. The basic race is that there is a time window between when the PTE gets made non-present (PROT_NONE or NUMA), and the TLB is flushed. During that time, a CPU may continue writing to the page. This is fine most of the time, however compaction or the NUMA migration code may come in, and migrate the page away. When that happens, the CPU may continue writing, through the cached translation, to what is no longer the current memory location of the process. This only affects x86, which has a somewhat optimistic pte_accessible. All other architectures appear to be safe, and will either always flush, or flush whenever there is a valid mapping, even with no permissions (SPARC). The basic race looks like this: CPU A CPU B CPU C load TLB entry make entry PTE/PMD_NUMA fault on entry read/write old page start migrating page change PTE/PMD to new page read/write old page [*] flush TLB reload TLB from new entry read/write new page lose data [*] the old page may belong to a new user at this point! The obvious fix is to flush remote TLB entries, by making sure that pte_accessible aware of the fact that PROT_NONE and PROT_NUMA memory may still be accessible if there is a TLB flush pending for the mm. This should fix both NUMA migration and compaction. [mgorman@suse.de: fix build] Signed-off-by: Rik van Riel <riel@redhat.com> Signed-off-by: Mel Gorman <mgorman@suse.de> Cc: Alex Thorlton <athorlton@sgi.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/pgtable.h2
-rw-r--r--include/linux/mm_types.h44
2 files changed, 45 insertions, 1 deletions
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index f330d28e4d0e..b12079afbd5f 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -217,7 +217,7 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
217#endif 217#endif
218 218
219#ifndef pte_accessible 219#ifndef pte_accessible
220# define pte_accessible(pte) ((void)(pte),1) 220# define pte_accessible(mm, pte) ((void)(pte), 1)
221#endif 221#endif
222 222
223#ifndef flush_tlb_fix_spurious_fault 223#ifndef flush_tlb_fix_spurious_fault
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index bd299418a934..e5c49c30460f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -443,6 +443,14 @@ struct mm_struct {
443 /* numa_scan_seq prevents two threads setting pte_numa */ 443 /* numa_scan_seq prevents two threads setting pte_numa */
444 int numa_scan_seq; 444 int numa_scan_seq;
445#endif 445#endif
446#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
447 /*
448 * An operation with batched TLB flushing is going on. Anything that
449 * can move process memory needs to flush the TLB when moving a
450 * PROT_NONE or PROT_NUMA mapped page.
451 */
452 bool tlb_flush_pending;
453#endif
446 struct uprobes_state uprobes_state; 454 struct uprobes_state uprobes_state;
447}; 455};
448 456
@@ -459,4 +467,40 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
459 return mm->cpu_vm_mask_var; 467 return mm->cpu_vm_mask_var;
460} 468}
461 469
470#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
471/*
472 * Memory barriers to keep this state in sync are graciously provided by
473 * the page table locks, outside of which no page table modifications happen.
474 * The barriers below prevent the compiler from re-ordering the instructions
475 * around the memory barriers that are already present in the code.
476 */
477static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
478{
479 barrier();
480 return mm->tlb_flush_pending;
481}
482static inline void set_tlb_flush_pending(struct mm_struct *mm)
483{
484 mm->tlb_flush_pending = true;
485 barrier();
486}
487/* Clearing is done after a TLB flush, which also provides a barrier. */
488static inline void clear_tlb_flush_pending(struct mm_struct *mm)
489{
490 barrier();
491 mm->tlb_flush_pending = false;
492}
493#else
494static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
495{
496 return false;
497}
498static inline void set_tlb_flush_pending(struct mm_struct *mm)
499{
500}
501static inline void clear_tlb_flush_pending(struct mm_struct *mm)
502{
503}
504#endif
505
462#endif /* _LINUX_MM_TYPES_H */ 506#endif /* _LINUX_MM_TYPES_H */