aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/tlb.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm/tlb.c')
-rw-r--r--arch/x86/mm/tlb.c115
1 files changed, 86 insertions, 29 deletions
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index bddd6b3cee1d..03b6b4c2238d 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -7,7 +7,6 @@
7#include <linux/export.h> 7#include <linux/export.h>
8#include <linux/cpu.h> 8#include <linux/cpu.h>
9#include <linux/debugfs.h> 9#include <linux/debugfs.h>
10#include <linux/ptrace.h>
11 10
12#include <asm/tlbflush.h> 11#include <asm/tlbflush.h>
13#include <asm/mmu_context.h> 12#include <asm/mmu_context.h>
@@ -31,6 +30,12 @@
31 */ 30 */
32 31
33/* 32/*
33 * Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is
34 * stored in cpu_tlb_state.last_user_mm_ibpb.
35 */
36#define LAST_USER_MM_IBPB 0x1UL
37
38/*
34 * We get here when we do something requiring a TLB invalidation 39 * We get here when we do something requiring a TLB invalidation
35 * but could not go invalidate all of the contexts. We do the 40 * but could not go invalidate all of the contexts. We do the
36 * necessary invalidation by clearing out the 'ctx_id' which 41 * necessary invalidation by clearing out the 'ctx_id' which
@@ -181,17 +186,87 @@ static void sync_current_stack_to_mm(struct mm_struct *mm)
181 } 186 }
182} 187}
183 188
184static bool ibpb_needed(struct task_struct *tsk, u64 last_ctx_id) 189static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
190{
191 unsigned long next_tif = task_thread_info(next)->flags;
192 unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB;
193
194 return (unsigned long)next->mm | ibpb;
195}
196
197static void cond_ibpb(struct task_struct *next)
185{ 198{
199 if (!next || !next->mm)
200 return;
201
186 /* 202 /*
187 * Check if the current (previous) task has access to the memory 203 * Both, the conditional and the always IBPB mode use the mm
188 * of the @tsk (next) task. If access is denied, make sure to 204 * pointer to avoid the IBPB when switching between tasks of the
189 * issue a IBPB to stop user->user Spectre-v2 attacks. 205 * same process. Using the mm pointer instead of mm->context.ctx_id
190 * 206 * opens a hypothetical hole vs. mm_struct reuse, which is more or
191 * Note: __ptrace_may_access() returns 0 or -ERRNO. 207 * less impossible to control by an attacker. Aside of that it
208 * would only affect the first schedule so the theoretically
209 * exposed data is not really interesting.
192 */ 210 */
193 return (tsk && tsk->mm && tsk->mm->context.ctx_id != last_ctx_id && 211 if (static_branch_likely(&switch_mm_cond_ibpb)) {
194 ptrace_may_access_sched(tsk, PTRACE_MODE_SPEC_IBPB)); 212 unsigned long prev_mm, next_mm;
213
214 /*
215 * This is a bit more complex than the always mode because
216 * it has to handle two cases:
217 *
218 * 1) Switch from a user space task (potential attacker)
219 * which has TIF_SPEC_IB set to a user space task
220 * (potential victim) which has TIF_SPEC_IB not set.
221 *
222 * 2) Switch from a user space task (potential attacker)
223 * which has TIF_SPEC_IB not set to a user space task
224 * (potential victim) which has TIF_SPEC_IB set.
225 *
226 * This could be done by unconditionally issuing IBPB when
227 * a task which has TIF_SPEC_IB set is either scheduled in
228 * or out. Though that results in two flushes when:
229 *
230 * - the same user space task is scheduled out and later
231 * scheduled in again and only a kernel thread ran in
232 * between.
233 *
234 * - a user space task belonging to the same process is
235 * scheduled in after a kernel thread ran in between
236 *
237 * - a user space task belonging to the same process is
238 * scheduled in immediately.
239 *
240 * Optimize this with reasonably small overhead for the
241 * above cases. Mangle the TIF_SPEC_IB bit into the mm
242 * pointer of the incoming task which is stored in
243 * cpu_tlbstate.last_user_mm_ibpb for comparison.
244 */
245 next_mm = mm_mangle_tif_spec_ib(next);
246 prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb);
247
248 /*
249 * Issue IBPB only if the mm's are different and one or
250 * both have the IBPB bit set.
251 */
252 if (next_mm != prev_mm &&
253 (next_mm | prev_mm) & LAST_USER_MM_IBPB)
254 indirect_branch_prediction_barrier();
255
256 this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm);
257 }
258
259 if (static_branch_unlikely(&switch_mm_always_ibpb)) {
260 /*
261 * Only flush when switching to a user space task with a
262 * different context than the user space task which ran
263 * last on this CPU.
264 */
265 if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) {
266 indirect_branch_prediction_barrier();
267 this_cpu_write(cpu_tlbstate.last_user_mm, next->mm);
268 }
269 }
195} 270}
196 271
197void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, 272void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
@@ -292,22 +367,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
292 new_asid = prev_asid; 367 new_asid = prev_asid;
293 need_flush = true; 368 need_flush = true;
294 } else { 369 } else {
295 u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
296
297 /* 370 /*
298 * Avoid user/user BTB poisoning by flushing the branch 371 * Avoid user/user BTB poisoning by flushing the branch
299 * predictor when switching between processes. This stops 372 * predictor when switching between processes. This stops
300 * one process from doing Spectre-v2 attacks on another. 373 * one process from doing Spectre-v2 attacks on another.
301 *
302 * As an optimization, flush indirect branches only when
303 * switching into a processes that can't be ptrace by the
304 * current one (as in such case, attacker has much more
305 * convenient way how to tamper with the next process than
306 * branch buffer poisoning).
307 */ 374 */
308 if (static_cpu_has(X86_FEATURE_USE_IBPB) && 375 cond_ibpb(tsk);
309 ibpb_needed(tsk, last_ctx_id))
310 indirect_branch_prediction_barrier();
311 376
312 if (IS_ENABLED(CONFIG_VMAP_STACK)) { 377 if (IS_ENABLED(CONFIG_VMAP_STACK)) {
313 /* 378 /*
@@ -365,14 +430,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
365 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); 430 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
366 } 431 }
367 432
368 /*
369 * Record last user mm's context id, so we can avoid
370 * flushing branch buffer with IBPB if we switch back
371 * to the same user.
372 */
373 if (next != &init_mm)
374 this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
375
376 /* Make sure we write CR3 before loaded_mm. */ 433 /* Make sure we write CR3 before loaded_mm. */
377 barrier(); 434 barrier();
378 435
@@ -441,7 +498,7 @@ void initialize_tlbstate_and_flush(void)
441 write_cr3(build_cr3(mm->pgd, 0)); 498 write_cr3(build_cr3(mm->pgd, 0));
442 499
443 /* Reinitialize tlbstate. */ 500 /* Reinitialize tlbstate. */
444 this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id); 501 this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB);
445 this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); 502 this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
446 this_cpu_write(cpu_tlbstate.next_asid, 1); 503 this_cpu_write(cpu_tlbstate.next_asid, 1);
447 this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); 504 this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);