diff options
| -rw-r--r-- | arch/x86/events/core.c | 2 | ||||
| -rw-r--r-- | arch/x86/include/asm/tlbflush.h | 40 | ||||
| -rw-r--r-- | arch/x86/lib/usercopy.c | 5 | ||||
| -rw-r--r-- | arch/x86/mm/tlb.c | 7 |
4 files changed, 53 insertions, 1 deletions
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 5f4829f10129..dfb2f7c0d019 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c | |||
| @@ -2465,7 +2465,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs | |||
| 2465 | 2465 | ||
| 2466 | perf_callchain_store(entry, regs->ip); | 2466 | perf_callchain_store(entry, regs->ip); |
| 2467 | 2467 | ||
| 2468 | if (!current->mm) | 2468 | if (!nmi_uaccess_okay()) |
| 2469 | return; | 2469 | return; |
| 2470 | 2470 | ||
| 2471 | if (perf_callchain_user32(regs, entry)) | 2471 | if (perf_callchain_user32(regs, entry)) |
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 29c9da6c62fc..58ce5288878e 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h | |||
| @@ -175,8 +175,16 @@ struct tlb_state { | |||
| 175 | * are on. This means that it may not match current->active_mm, | 175 | * are on. This means that it may not match current->active_mm, |
| 176 | * which will contain the previous user mm when we're in lazy TLB | 176 | * which will contain the previous user mm when we're in lazy TLB |
| 177 | * mode even if we've already switched back to swapper_pg_dir. | 177 | * mode even if we've already switched back to swapper_pg_dir. |
| 178 | * | ||
| 179 | * During switch_mm_irqs_off(), loaded_mm will be set to | ||
| 180 | * LOADED_MM_SWITCHING during the brief interrupts-off window | ||
| 181 | * when CR3 and loaded_mm would otherwise be inconsistent. This | ||
| 182 | * is for nmi_uaccess_okay()'s benefit. | ||
| 178 | */ | 183 | */ |
| 179 | struct mm_struct *loaded_mm; | 184 | struct mm_struct *loaded_mm; |
| 185 | |||
| 186 | #define LOADED_MM_SWITCHING ((struct mm_struct *)1) | ||
| 187 | |||
| 180 | u16 loaded_mm_asid; | 188 | u16 loaded_mm_asid; |
| 181 | u16 next_asid; | 189 | u16 next_asid; |
| 182 | /* last user mm's ctx id */ | 190 | /* last user mm's ctx id */ |
| @@ -246,6 +254,38 @@ struct tlb_state { | |||
| 246 | }; | 254 | }; |
| 247 | DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); | 255 | DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); |
| 248 | 256 | ||
| 257 | /* | ||
| 258 | * Blindly accessing user memory from NMI context can be dangerous | ||
| 259 | * if we're in the middle of switching the current user task or | ||
| 260 | * switching the loaded mm. It can also be dangerous if we | ||
| 261 | * interrupted some kernel code that was temporarily using a | ||
| 262 | * different mm. | ||
| 263 | */ | ||
| 264 | static inline bool nmi_uaccess_okay(void) | ||
| 265 | { | ||
| 266 | struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); | ||
| 267 | struct mm_struct *current_mm = current->mm; | ||
| 268 | |||
| 269 | VM_WARN_ON_ONCE(!loaded_mm); | ||
| 270 | |||
| 271 | /* | ||
| 272 | * The condition we want to check is | ||
| 273 | * current_mm->pgd == __va(read_cr3_pa()). This may be slow, though, | ||
| 274 | * if we're running in a VM with shadow paging, and nmi_uaccess_okay() | ||
| 275 | * is supposed to be reasonably fast. | ||
| 276 | * | ||
| 277 | * Instead, we check the almost equivalent but somewhat conservative | ||
| 278 | * condition below, and we rely on the fact that switch_mm_irqs_off() | ||
| 279 | * sets loaded_mm to LOADED_MM_SWITCHING before writing to CR3. | ||
| 280 | */ | ||
| 281 | if (loaded_mm != current_mm) | ||
| 282 | return false; | ||
| 283 | |||
| 284 | VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa())); | ||
| 285 | |||
| 286 | return true; | ||
| 287 | } | ||
| 288 | |||
| 249 | /* Initialize cr4 shadow for this CPU. */ | 289 | /* Initialize cr4 shadow for this CPU. */ |
| 250 | static inline void cr4_init_shadow(void) | 290 | static inline void cr4_init_shadow(void) |
| 251 | { | 291 | { |
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index c8c6ad0d58b8..3f435d7fca5e 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c | |||
| @@ -7,6 +7,8 @@ | |||
| 7 | #include <linux/uaccess.h> | 7 | #include <linux/uaccess.h> |
| 8 | #include <linux/export.h> | 8 | #include <linux/export.h> |
| 9 | 9 | ||
| 10 | #include <asm/tlbflush.h> | ||
| 11 | |||
| 10 | /* | 12 | /* |
| 11 | * We rely on the nested NMI work to allow atomic faults from the NMI path; the | 13 | * We rely on the nested NMI work to allow atomic faults from the NMI path; the |
| 12 | * nested NMI paths are careful to preserve CR2. | 14 | * nested NMI paths are careful to preserve CR2. |
| @@ -19,6 +21,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | |||
| 19 | if (__range_not_ok(from, n, TASK_SIZE)) | 21 | if (__range_not_ok(from, n, TASK_SIZE)) |
| 20 | return n; | 22 | return n; |
| 21 | 23 | ||
| 24 | if (!nmi_uaccess_okay()) | ||
| 25 | return n; | ||
| 26 | |||
| 22 | /* | 27 | /* |
| 23 | * Even though this function is typically called from NMI/IRQ context | 28 | * Even though this function is typically called from NMI/IRQ context |
| 24 | * disable pagefaults so that its behaviour is consistent even when | 29 | * disable pagefaults so that its behaviour is consistent even when |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 9517d1b2a281..e96b99eb800c 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
| @@ -305,6 +305,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, | |||
| 305 | 305 | ||
| 306 | choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); | 306 | choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); |
| 307 | 307 | ||
| 308 | /* Let nmi_uaccess_okay() know that we're changing CR3. */ | ||
| 309 | this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); | ||
| 310 | barrier(); | ||
| 311 | |||
| 308 | if (need_flush) { | 312 | if (need_flush) { |
| 309 | this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); | 313 | this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); |
| 310 | this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); | 314 | this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); |
| @@ -335,6 +339,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, | |||
| 335 | if (next != &init_mm) | 339 | if (next != &init_mm) |
| 336 | this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); | 340 | this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); |
| 337 | 341 | ||
| 342 | /* Make sure we write CR3 before loaded_mm. */ | ||
| 343 | barrier(); | ||
| 344 | |||
| 338 | this_cpu_write(cpu_tlbstate.loaded_mm, next); | 345 | this_cpu_write(cpu_tlbstate.loaded_mm, next); |
| 339 | this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid); | 346 | this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid); |
| 340 | } | 347 | } |
