diff options
Diffstat (limited to 'arch/x86/mm/tlb.c')
| -rw-r--r-- | arch/x86/mm/tlb.c | 115 |
1 files changed, 86 insertions, 29 deletions
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index bddd6b3cee1d..03b6b4c2238d 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
| @@ -7,7 +7,6 @@ | |||
| 7 | #include <linux/export.h> | 7 | #include <linux/export.h> |
| 8 | #include <linux/cpu.h> | 8 | #include <linux/cpu.h> |
| 9 | #include <linux/debugfs.h> | 9 | #include <linux/debugfs.h> |
| 10 | #include <linux/ptrace.h> | ||
| 11 | 10 | ||
| 12 | #include <asm/tlbflush.h> | 11 | #include <asm/tlbflush.h> |
| 13 | #include <asm/mmu_context.h> | 12 | #include <asm/mmu_context.h> |
| @@ -31,6 +30,12 @@ | |||
| 31 | */ | 30 | */ |
| 32 | 31 | ||
| 33 | /* | 32 | /* |
| 33 | * Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is | ||
| 34 | * stored in cpu_tlb_state.last_user_mm_ibpb. | ||
| 35 | */ | ||
| 36 | #define LAST_USER_MM_IBPB 0x1UL | ||
| 37 | |||
| 38 | /* | ||
| 34 | * We get here when we do something requiring a TLB invalidation | 39 | * We get here when we do something requiring a TLB invalidation |
| 35 | * but could not go invalidate all of the contexts. We do the | 40 | * but could not go invalidate all of the contexts. We do the |
| 36 | * necessary invalidation by clearing out the 'ctx_id' which | 41 | * necessary invalidation by clearing out the 'ctx_id' which |
| @@ -181,17 +186,87 @@ static void sync_current_stack_to_mm(struct mm_struct *mm) | |||
| 181 | } | 186 | } |
| 182 | } | 187 | } |
| 183 | 188 | ||
| 184 | static bool ibpb_needed(struct task_struct *tsk, u64 last_ctx_id) | 189 | static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next) |
| 190 | { | ||
| 191 | unsigned long next_tif = task_thread_info(next)->flags; | ||
| 192 | unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB; | ||
| 193 | |||
| 194 | return (unsigned long)next->mm | ibpb; | ||
| 195 | } | ||
| 196 | |||
| 197 | static void cond_ibpb(struct task_struct *next) | ||
| 185 | { | 198 | { |
| 199 | if (!next || !next->mm) | ||
| 200 | return; | ||
| 201 | |||
| 186 | /* | 202 | /* |
| 187 | * Check if the current (previous) task has access to the memory | 203 | * Both, the conditional and the always IBPB mode use the mm |
| 188 | * of the @tsk (next) task. If access is denied, make sure to | 204 | * pointer to avoid the IBPB when switching between tasks of the |
| 189 | * issue a IBPB to stop user->user Spectre-v2 attacks. | 205 | * same process. Using the mm pointer instead of mm->context.ctx_id |
| 190 | * | 206 | * opens a hypothetical hole vs. mm_struct reuse, which is more or |
| 191 | * Note: __ptrace_may_access() returns 0 or -ERRNO. | 207 | * less impossible to control by an attacker. Aside of that it |
| 208 | * would only affect the first schedule so the theoretically | ||
| 209 | * exposed data is not really interesting. | ||
| 192 | */ | 210 | */ |
| 193 | return (tsk && tsk->mm && tsk->mm->context.ctx_id != last_ctx_id && | 211 | if (static_branch_likely(&switch_mm_cond_ibpb)) { |
| 194 | ptrace_may_access_sched(tsk, PTRACE_MODE_SPEC_IBPB)); | 212 | unsigned long prev_mm, next_mm; |
| 213 | |||
| 214 | /* | ||
| 215 | * This is a bit more complex than the always mode because | ||
| 216 | * it has to handle two cases: | ||
| 217 | * | ||
| 218 | * 1) Switch from a user space task (potential attacker) | ||
| 219 | * which has TIF_SPEC_IB set to a user space task | ||
| 220 | * (potential victim) which has TIF_SPEC_IB not set. | ||
| 221 | * | ||
| 222 | * 2) Switch from a user space task (potential attacker) | ||
| 223 | * which has TIF_SPEC_IB not set to a user space task | ||
| 224 | * (potential victim) which has TIF_SPEC_IB set. | ||
| 225 | * | ||
| 226 | * This could be done by unconditionally issuing IBPB when | ||
| 227 | * a task which has TIF_SPEC_IB set is either scheduled in | ||
| 228 | * or out. Though that results in two flushes when: | ||
| 229 | * | ||
| 230 | * - the same user space task is scheduled out and later | ||
| 231 | * scheduled in again and only a kernel thread ran in | ||
| 232 | * between. | ||
| 233 | * | ||
| 234 | * - a user space task belonging to the same process is | ||
| 235 | * scheduled in after a kernel thread ran in between | ||
| 236 | * | ||
| 237 | * - a user space task belonging to the same process is | ||
| 238 | * scheduled in immediately. | ||
| 239 | * | ||
| 240 | * Optimize this with reasonably small overhead for the | ||
| 241 | * above cases. Mangle the TIF_SPEC_IB bit into the mm | ||
| 242 | * pointer of the incoming task which is stored in | ||
| 243 | * cpu_tlbstate.last_user_mm_ibpb for comparison. | ||
| 244 | */ | ||
| 245 | next_mm = mm_mangle_tif_spec_ib(next); | ||
| 246 | prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb); | ||
| 247 | |||
| 248 | /* | ||
| 249 | * Issue IBPB only if the mm's are different and one or | ||
| 250 | * both have the IBPB bit set. | ||
| 251 | */ | ||
| 252 | if (next_mm != prev_mm && | ||
| 253 | (next_mm | prev_mm) & LAST_USER_MM_IBPB) | ||
| 254 | indirect_branch_prediction_barrier(); | ||
| 255 | |||
| 256 | this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm); | ||
| 257 | } | ||
| 258 | |||
| 259 | if (static_branch_unlikely(&switch_mm_always_ibpb)) { | ||
| 260 | /* | ||
| 261 | * Only flush when switching to a user space task with a | ||
| 262 | * different context than the user space task which ran | ||
| 263 | * last on this CPU. | ||
| 264 | */ | ||
| 265 | if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) { | ||
| 266 | indirect_branch_prediction_barrier(); | ||
| 267 | this_cpu_write(cpu_tlbstate.last_user_mm, next->mm); | ||
| 268 | } | ||
| 269 | } | ||
| 195 | } | 270 | } |
| 196 | 271 | ||
| 197 | void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, | 272 | void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, |
| @@ -292,22 +367,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, | |||
| 292 | new_asid = prev_asid; | 367 | new_asid = prev_asid; |
| 293 | need_flush = true; | 368 | need_flush = true; |
| 294 | } else { | 369 | } else { |
| 295 | u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id); | ||
| 296 | |||
| 297 | /* | 370 | /* |
| 298 | * Avoid user/user BTB poisoning by flushing the branch | 371 | * Avoid user/user BTB poisoning by flushing the branch |
| 299 | * predictor when switching between processes. This stops | 372 | * predictor when switching between processes. This stops |
| 300 | * one process from doing Spectre-v2 attacks on another. | 373 | * one process from doing Spectre-v2 attacks on another. |
| 301 | * | ||
| 302 | * As an optimization, flush indirect branches only when | ||
| 303 | * switching into a processes that can't be ptrace by the | ||
| 304 | * current one (as in such case, attacker has much more | ||
| 305 | * convenient way how to tamper with the next process than | ||
| 306 | * branch buffer poisoning). | ||
| 307 | */ | 374 | */ |
| 308 | if (static_cpu_has(X86_FEATURE_USE_IBPB) && | 375 | cond_ibpb(tsk); |
| 309 | ibpb_needed(tsk, last_ctx_id)) | ||
| 310 | indirect_branch_prediction_barrier(); | ||
| 311 | 376 | ||
| 312 | if (IS_ENABLED(CONFIG_VMAP_STACK)) { | 377 | if (IS_ENABLED(CONFIG_VMAP_STACK)) { |
| 313 | /* | 378 | /* |
| @@ -365,14 +430,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, | |||
| 365 | trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); | 430 | trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); |
| 366 | } | 431 | } |
| 367 | 432 | ||
| 368 | /* | ||
| 369 | * Record last user mm's context id, so we can avoid | ||
| 370 | * flushing branch buffer with IBPB if we switch back | ||
| 371 | * to the same user. | ||
| 372 | */ | ||
| 373 | if (next != &init_mm) | ||
| 374 | this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); | ||
| 375 | |||
| 376 | /* Make sure we write CR3 before loaded_mm. */ | 433 | /* Make sure we write CR3 before loaded_mm. */ |
| 377 | barrier(); | 434 | barrier(); |
| 378 | 435 | ||
| @@ -441,7 +498,7 @@ void initialize_tlbstate_and_flush(void) | |||
| 441 | write_cr3(build_cr3(mm->pgd, 0)); | 498 | write_cr3(build_cr3(mm->pgd, 0)); |
| 442 | 499 | ||
| 443 | /* Reinitialize tlbstate. */ | 500 | /* Reinitialize tlbstate. */ |
| 444 | this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id); | 501 | this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB); |
| 445 | this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); | 502 | this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); |
| 446 | this_cpu_write(cpu_tlbstate.next_asid, 1); | 503 | this_cpu_write(cpu_tlbstate.next_asid, 1); |
| 447 | this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); | 504 | this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); |
