aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/include
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@kernel.org>2016-01-06 15:21:01 -0500
committerIngo Molnar <mingo@kernel.org>2016-01-11 06:03:15 -0500
commit71b3c126e61177eb693423f2e18a1914205b165e (patch)
treea91955974bf8281db543a6ae1fee03ffdc6a9011 /arch/x86/include
parentafd2ff9b7e1b367172f18ba7f693dfb62bdcb2dc (diff)
x86/mm: Add barriers and document switch_mm()-vs-flush synchronization
When switch_mm() activates a new PGD, it also sets a bit that tells other CPUs that the PGD is in use so that TLB flush IPIs will be sent. In order for that to work correctly, the bit needs to be visible prior to loading the PGD and therefore starting to fill the local TLB. Document all the barriers that make this work correctly and add a couple that were missing. Signed-off-by: Andy Lutomirski <luto@kernel.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Rik van Riel <riel@redhat.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-mm@kvack.org Cc: stable@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/include')
-rw-r--r--arch/x86/include/asm/mmu_context.h33
1 files changed, 32 insertions, 1 deletions
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 379cd3658799..1edc9cd198b8 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -116,8 +116,34 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
116#endif 116#endif
117 cpumask_set_cpu(cpu, mm_cpumask(next)); 117 cpumask_set_cpu(cpu, mm_cpumask(next));
118 118
119 /* Re-load page tables */ 119 /*
120 * Re-load page tables.
121 *
122 * This logic has an ordering constraint:
123 *
124 * CPU 0: Write to a PTE for 'next'
125 * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
126 * CPU 1: set bit 1 in next's mm_cpumask
127 * CPU 1: load from the PTE that CPU 0 writes (implicit)
128 *
129 * We need to prevent an outcome in which CPU 1 observes
130 * the new PTE value and CPU 0 observes bit 1 clear in
131 * mm_cpumask. (If that occurs, then the IPI will never
132 * be sent, and CPU 0's TLB will contain a stale entry.)
133 *
134 * The bad outcome can occur if either CPU's load is
135 * reordered before that CPU's store, so both CPUs much
136 * execute full barriers to prevent this from happening.
137 *
138 * Thus, switch_mm needs a full barrier between the
139 * store to mm_cpumask and any operation that could load
140 * from next->pgd. This barrier synchronizes with
141 * remote TLB flushers. Fortunately, load_cr3 is
142 * serializing and thus acts as a full barrier.
143 *
144 */
120 load_cr3(next->pgd); 145 load_cr3(next->pgd);
146
121 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); 147 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
122 148
123 /* Stop flush ipis for the previous mm */ 149 /* Stop flush ipis for the previous mm */
@@ -156,10 +182,15 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
156 * schedule, protecting us from simultaneous changes. 182 * schedule, protecting us from simultaneous changes.
157 */ 183 */
158 cpumask_set_cpu(cpu, mm_cpumask(next)); 184 cpumask_set_cpu(cpu, mm_cpumask(next));
185
159 /* 186 /*
160 * We were in lazy tlb mode and leave_mm disabled 187 * We were in lazy tlb mode and leave_mm disabled
161 * tlb flush IPI delivery. We must reload CR3 188 * tlb flush IPI delivery. We must reload CR3
162 * to make sure to use no freed page tables. 189 * to make sure to use no freed page tables.
190 *
191 * As above, this is a barrier that forces
192 * TLB repopulation to be ordered after the
193 * store to mm_cpumask.
163 */ 194 */
164 load_cr3(next->pgd); 195 load_cr3(next->pgd);
165 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); 196 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);