diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-23 13:43:04 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-23 13:43:04 -0400 |
commit | d82924c3b8d0607094b94fab290a33c5ad7d586c (patch) | |
tree | 2afbc9066262b5987c8568a09f80db5198025689 | |
parent | d7197a5ad8528642cb70f1d27d4d5c7332a2b395 (diff) | |
parent | bb4b3b7762735cdaba5a40fd94c9303d9ffa147a (diff) |
Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 pti updates from Ingo Molnar:
"The main changes:
- Make the IBPB barrier more strict and add STIBP support (Jiri
Kosina)
- Micro-optimize and clean up the entry code (Andy Lutomirski)
- ... plus misc other fixes"
* 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/speculation: Propagate information about RSB filling mitigation to sysfs
x86/speculation: Enable cross-hyperthread spectre v2 STIBP mitigation
x86/speculation: Apply IBPB more strictly to avoid cross-process data leak
x86/speculation: Add RETPOLINE_AMD support to the inline asm CALL_NOSPEC variant
x86/CPU: Fix unused variable warning when !CONFIG_IA32_EMULATION
x86/pti/64: Remove the SYSCALL64 entry trampoline
x86/entry/64: Use the TSS sp2 slot for SYSCALL/SYSRET scratch space
x86/entry/64: Document idtentry
-rw-r--r-- | arch/x86/entry/entry_64.S | 117 | ||||
-rw-r--r-- | arch/x86/include/asm/cpu_entry_area.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/nospec-branch.h | 17 | ||||
-rw-r--r-- | arch/x86/include/asm/processor.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/sections.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/asm-offsets.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/bugs.c | 58 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 16 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes/core.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux.lds.S | 10 | ||||
-rw-r--r-- | arch/x86/mm/cpu_entry_area.c | 36 | ||||
-rw-r--r-- | arch/x86/mm/pti.c | 33 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 31 | ||||
-rw-r--r-- | arch/x86/xen/xen-asm_64.S | 8 | ||||
-rw-r--r-- | include/linux/ptrace.h | 21 | ||||
-rw-r--r-- | kernel/cpu.c | 11 | ||||
-rw-r--r-- | kernel/ptrace.c | 10 |
19 files changed, 222 insertions, 176 deletions
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 7c5ce0a6c4d2..4d7a2d9d44cf 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S | |||
@@ -142,67 +142,6 @@ END(native_usergs_sysret64) | |||
142 | * with them due to bugs in both AMD and Intel CPUs. | 142 | * with them due to bugs in both AMD and Intel CPUs. |
143 | */ | 143 | */ |
144 | 144 | ||
145 | .pushsection .entry_trampoline, "ax" | ||
146 | |||
147 | /* | ||
148 | * The code in here gets remapped into cpu_entry_area's trampoline. This means | ||
149 | * that the assembler and linker have the wrong idea as to where this code | ||
150 | * lives (and, in fact, it's mapped more than once, so it's not even at a | ||
151 | * fixed address). So we can't reference any symbols outside the entry | ||
152 | * trampoline and expect it to work. | ||
153 | * | ||
154 | * Instead, we carefully abuse %rip-relative addressing. | ||
155 | * _entry_trampoline(%rip) refers to the start of the remapped) entry | ||
156 | * trampoline. We can thus find cpu_entry_area with this macro: | ||
157 | */ | ||
158 | |||
159 | #define CPU_ENTRY_AREA \ | ||
160 | _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip) | ||
161 | |||
162 | /* The top word of the SYSENTER stack is hot and is usable as scratch space. */ | ||
163 | #define RSP_SCRATCH CPU_ENTRY_AREA_entry_stack + \ | ||
164 | SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA | ||
165 | |||
166 | ENTRY(entry_SYSCALL_64_trampoline) | ||
167 | UNWIND_HINT_EMPTY | ||
168 | swapgs | ||
169 | |||
170 | /* Stash the user RSP. */ | ||
171 | movq %rsp, RSP_SCRATCH | ||
172 | |||
173 | /* Note: using %rsp as a scratch reg. */ | ||
174 | SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp | ||
175 | |||
176 | /* Load the top of the task stack into RSP */ | ||
177 | movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp | ||
178 | |||
179 | /* Start building the simulated IRET frame. */ | ||
180 | pushq $__USER_DS /* pt_regs->ss */ | ||
181 | pushq RSP_SCRATCH /* pt_regs->sp */ | ||
182 | pushq %r11 /* pt_regs->flags */ | ||
183 | pushq $__USER_CS /* pt_regs->cs */ | ||
184 | pushq %rcx /* pt_regs->ip */ | ||
185 | |||
186 | /* | ||
187 | * x86 lacks a near absolute jump, and we can't jump to the real | ||
188 | * entry text with a relative jump. We could push the target | ||
189 | * address and then use retq, but this destroys the pipeline on | ||
190 | * many CPUs (wasting over 20 cycles on Sandy Bridge). Instead, | ||
191 | * spill RDI and restore it in a second-stage trampoline. | ||
192 | */ | ||
193 | pushq %rdi | ||
194 | movq $entry_SYSCALL_64_stage2, %rdi | ||
195 | JMP_NOSPEC %rdi | ||
196 | END(entry_SYSCALL_64_trampoline) | ||
197 | |||
198 | .popsection | ||
199 | |||
200 | ENTRY(entry_SYSCALL_64_stage2) | ||
201 | UNWIND_HINT_EMPTY | ||
202 | popq %rdi | ||
203 | jmp entry_SYSCALL_64_after_hwframe | ||
204 | END(entry_SYSCALL_64_stage2) | ||
205 | |||
206 | ENTRY(entry_SYSCALL_64) | 145 | ENTRY(entry_SYSCALL_64) |
207 | UNWIND_HINT_EMPTY | 146 | UNWIND_HINT_EMPTY |
208 | /* | 147 | /* |
@@ -212,21 +151,19 @@ ENTRY(entry_SYSCALL_64) | |||
212 | */ | 151 | */ |
213 | 152 | ||
214 | swapgs | 153 | swapgs |
215 | /* | 154 | /* tss.sp2 is scratch space. */ |
216 | * This path is only taken when PAGE_TABLE_ISOLATION is disabled so it | 155 | movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2) |
217 | * is not required to switch CR3. | 156 | SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp |
218 | */ | ||
219 | movq %rsp, PER_CPU_VAR(rsp_scratch) | ||
220 | movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp | 157 | movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp |
221 | 158 | ||
222 | /* Construct struct pt_regs on stack */ | 159 | /* Construct struct pt_regs on stack */ |
223 | pushq $__USER_DS /* pt_regs->ss */ | 160 | pushq $__USER_DS /* pt_regs->ss */ |
224 | pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */ | 161 | pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2) /* pt_regs->sp */ |
225 | pushq %r11 /* pt_regs->flags */ | 162 | pushq %r11 /* pt_regs->flags */ |
226 | pushq $__USER_CS /* pt_regs->cs */ | 163 | pushq $__USER_CS /* pt_regs->cs */ |
227 | pushq %rcx /* pt_regs->ip */ | 164 | pushq %rcx /* pt_regs->ip */ |
228 | GLOBAL(entry_SYSCALL_64_after_hwframe) | 165 | GLOBAL(entry_SYSCALL_64_after_hwframe) |
229 | pushq %rax /* pt_regs->orig_ax */ | 166 | pushq %rax /* pt_regs->orig_ax */ |
230 | 167 | ||
231 | PUSH_AND_CLEAR_REGS rax=$-ENOSYS | 168 | PUSH_AND_CLEAR_REGS rax=$-ENOSYS |
232 | 169 | ||
@@ -900,6 +837,42 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt | |||
900 | */ | 837 | */ |
901 | #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8) | 838 | #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8) |
902 | 839 | ||
840 | /** | ||
841 | * idtentry - Generate an IDT entry stub | ||
842 | * @sym: Name of the generated entry point | ||
843 | * @do_sym: C function to be called | ||
844 | * @has_error_code: True if this IDT vector has an error code on the stack | ||
845 | * @paranoid: non-zero means that this vector may be invoked from | ||
846 | * kernel mode with user GSBASE and/or user CR3. | ||
847 | * 2 is special -- see below. | ||
848 | * @shift_ist: Set to an IST index if entries from kernel mode should | ||
849 | * decrement the IST stack so that nested entries get a | ||
850 | * fresh stack. (This is for #DB, which has a nasty habit | ||
851 | * of recursing.) | ||
852 | * | ||
853 | * idtentry generates an IDT stub that sets up a usable kernel context, | ||
854 | * creates struct pt_regs, and calls @do_sym. The stub has the following | ||
855 | * special behaviors: | ||
856 | * | ||
857 | * On an entry from user mode, the stub switches from the trampoline or | ||
858 | * IST stack to the normal thread stack. On an exit to user mode, the | ||
859 | * normal exit-to-usermode path is invoked. | ||
860 | * | ||
861 | * On an exit to kernel mode, if @paranoid == 0, we check for preemption, | ||
862 | * whereas we omit the preemption check if @paranoid != 0. This is purely | ||
863 | * because the implementation is simpler this way. The kernel only needs | ||
864 | * to check for asynchronous kernel preemption when IRQ handlers return. | ||
865 | * | ||
866 | * If @paranoid == 0, then the stub will handle IRET faults by pretending | ||
867 | * that the fault came from user mode. It will handle gs_change faults by | ||
868 | * pretending that the fault happened with kernel GSBASE. Since this handling | ||
869 | * is omitted for @paranoid != 0, the #GP, #SS, and #NP stubs must have | ||
870 | * @paranoid == 0. This special handling will do the wrong thing for | ||
871 | * espfix-induced #DF on IRET, so #DF must not use @paranoid == 0. | ||
872 | * | ||
873 | * @paranoid == 2 is special: the stub will never switch stacks. This is for | ||
874 | * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS. | ||
875 | */ | ||
903 | .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 | 876 | .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 |
904 | ENTRY(\sym) | 877 | ENTRY(\sym) |
905 | UNWIND_HINT_IRET_REGS offset=\has_error_code*8 | 878 | UNWIND_HINT_IRET_REGS offset=\has_error_code*8 |
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 4a7884b8dca5..29c706415443 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h | |||
@@ -30,8 +30,6 @@ struct cpu_entry_area { | |||
30 | */ | 30 | */ |
31 | struct tss_struct tss; | 31 | struct tss_struct tss; |
32 | 32 | ||
33 | char entry_trampoline[PAGE_SIZE]; | ||
34 | |||
35 | #ifdef CONFIG_X86_64 | 33 | #ifdef CONFIG_X86_64 |
36 | /* | 34 | /* |
37 | * Exception stacks used for IST entries. | 35 | * Exception stacks used for IST entries. |
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index fd2a8c1b88bc..80dc14422495 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h | |||
@@ -170,11 +170,15 @@ | |||
170 | */ | 170 | */ |
171 | # define CALL_NOSPEC \ | 171 | # define CALL_NOSPEC \ |
172 | ANNOTATE_NOSPEC_ALTERNATIVE \ | 172 | ANNOTATE_NOSPEC_ALTERNATIVE \ |
173 | ALTERNATIVE( \ | 173 | ALTERNATIVE_2( \ |
174 | ANNOTATE_RETPOLINE_SAFE \ | 174 | ANNOTATE_RETPOLINE_SAFE \ |
175 | "call *%[thunk_target]\n", \ | 175 | "call *%[thunk_target]\n", \ |
176 | "call __x86_indirect_thunk_%V[thunk_target]\n", \ | 176 | "call __x86_indirect_thunk_%V[thunk_target]\n", \ |
177 | X86_FEATURE_RETPOLINE) | 177 | X86_FEATURE_RETPOLINE, \ |
178 | "lfence;\n" \ | ||
179 | ANNOTATE_RETPOLINE_SAFE \ | ||
180 | "call *%[thunk_target]\n", \ | ||
181 | X86_FEATURE_RETPOLINE_AMD) | ||
178 | # define THUNK_TARGET(addr) [thunk_target] "r" (addr) | 182 | # define THUNK_TARGET(addr) [thunk_target] "r" (addr) |
179 | 183 | ||
180 | #elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE) | 184 | #elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE) |
@@ -184,7 +188,8 @@ | |||
184 | * here, anyway. | 188 | * here, anyway. |
185 | */ | 189 | */ |
186 | # define CALL_NOSPEC \ | 190 | # define CALL_NOSPEC \ |
187 | ALTERNATIVE( \ | 191 | ANNOTATE_NOSPEC_ALTERNATIVE \ |
192 | ALTERNATIVE_2( \ | ||
188 | ANNOTATE_RETPOLINE_SAFE \ | 193 | ANNOTATE_RETPOLINE_SAFE \ |
189 | "call *%[thunk_target]\n", \ | 194 | "call *%[thunk_target]\n", \ |
190 | " jmp 904f;\n" \ | 195 | " jmp 904f;\n" \ |
@@ -199,7 +204,11 @@ | |||
199 | " ret;\n" \ | 204 | " ret;\n" \ |
200 | " .align 16\n" \ | 205 | " .align 16\n" \ |
201 | "904: call 901b;\n", \ | 206 | "904: call 901b;\n", \ |
202 | X86_FEATURE_RETPOLINE) | 207 | X86_FEATURE_RETPOLINE, \ |
208 | "lfence;\n" \ | ||
209 | ANNOTATE_RETPOLINE_SAFE \ | ||
210 | "call *%[thunk_target]\n", \ | ||
211 | X86_FEATURE_RETPOLINE_AMD) | ||
203 | 212 | ||
204 | # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) | 213 | # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) |
205 | #else /* No retpoline for C / inline asm */ | 214 | #else /* No retpoline for C / inline asm */ |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c7a4e2a174b9..617805981cce 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -316,7 +316,13 @@ struct x86_hw_tss { | |||
316 | */ | 316 | */ |
317 | u64 sp1; | 317 | u64 sp1; |
318 | 318 | ||
319 | /* | ||
320 | * Since Linux does not use ring 2, the 'sp2' slot is unused by | ||
321 | * hardware. entry_SYSCALL_64 uses it as scratch space to stash | ||
322 | * the user RSP value. | ||
323 | */ | ||
319 | u64 sp2; | 324 | u64 sp2; |
325 | |||
320 | u64 reserved2; | 326 | u64 reserved2; |
321 | u64 ist[7]; | 327 | u64 ist[7]; |
322 | u32 reserved3; | 328 | u32 reserved3; |
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index 4a911a382ade..8ea1cfdbeabc 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h | |||
@@ -11,7 +11,6 @@ extern char __end_rodata_aligned[]; | |||
11 | 11 | ||
12 | #if defined(CONFIG_X86_64) | 12 | #if defined(CONFIG_X86_64) |
13 | extern char __end_rodata_hpage_align[]; | 13 | extern char __end_rodata_hpage_align[]; |
14 | extern char __entry_trampoline_start[], __entry_trampoline_end[]; | ||
15 | #endif | 14 | #endif |
16 | 15 | ||
17 | #endif /* _ASM_X86_SECTIONS_H */ | 16 | #endif /* _ASM_X86_SECTIONS_H */ |
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index fc02c3cf238f..72adf6c335dc 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c | |||
@@ -96,13 +96,12 @@ void common(void) { | |||
96 | OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask); | 96 | OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask); |
97 | 97 | ||
98 | /* Layout info for cpu_entry_area */ | 98 | /* Layout info for cpu_entry_area */ |
99 | OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss); | ||
100 | OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline); | ||
101 | OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page); | 99 | OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page); |
102 | DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack)); | 100 | DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack)); |
103 | DEFINE(MASK_entry_stack, (~(sizeof(struct entry_stack) - 1))); | 101 | DEFINE(MASK_entry_stack, (~(sizeof(struct entry_stack) - 1))); |
104 | 102 | ||
105 | /* Offset for sp0 and sp1 into the tss_struct */ | 103 | /* Offset for fields in tss_struct */ |
106 | OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); | 104 | OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); |
107 | OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); | 105 | OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); |
106 | OFFSET(TSS_sp2, tss_struct, x86_tss.sp2); | ||
108 | } | 107 | } |
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b810cc239375..c37e66e493bf 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -35,12 +35,10 @@ static void __init spectre_v2_select_mitigation(void); | |||
35 | static void __init ssb_select_mitigation(void); | 35 | static void __init ssb_select_mitigation(void); |
36 | static void __init l1tf_select_mitigation(void); | 36 | static void __init l1tf_select_mitigation(void); |
37 | 37 | ||
38 | /* | 38 | /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ |
39 | * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any | 39 | u64 x86_spec_ctrl_base; |
40 | * writes to SPEC_CTRL contain whatever reserved bits have been set. | ||
41 | */ | ||
42 | u64 __ro_after_init x86_spec_ctrl_base; | ||
43 | EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); | 40 | EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); |
41 | static DEFINE_MUTEX(spec_ctrl_mutex); | ||
44 | 42 | ||
45 | /* | 43 | /* |
46 | * The vendor and possibly platform specific bits which can be modified in | 44 | * The vendor and possibly platform specific bits which can be modified in |
@@ -326,6 +324,46 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) | |||
326 | return cmd; | 324 | return cmd; |
327 | } | 325 | } |
328 | 326 | ||
327 | static bool stibp_needed(void) | ||
328 | { | ||
329 | if (spectre_v2_enabled == SPECTRE_V2_NONE) | ||
330 | return false; | ||
331 | |||
332 | if (!boot_cpu_has(X86_FEATURE_STIBP)) | ||
333 | return false; | ||
334 | |||
335 | return true; | ||
336 | } | ||
337 | |||
338 | static void update_stibp_msr(void *info) | ||
339 | { | ||
340 | wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); | ||
341 | } | ||
342 | |||
343 | void arch_smt_update(void) | ||
344 | { | ||
345 | u64 mask; | ||
346 | |||
347 | if (!stibp_needed()) | ||
348 | return; | ||
349 | |||
350 | mutex_lock(&spec_ctrl_mutex); | ||
351 | mask = x86_spec_ctrl_base; | ||
352 | if (cpu_smt_control == CPU_SMT_ENABLED) | ||
353 | mask |= SPEC_CTRL_STIBP; | ||
354 | else | ||
355 | mask &= ~SPEC_CTRL_STIBP; | ||
356 | |||
357 | if (mask != x86_spec_ctrl_base) { | ||
358 | pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", | ||
359 | cpu_smt_control == CPU_SMT_ENABLED ? | ||
360 | "Enabling" : "Disabling"); | ||
361 | x86_spec_ctrl_base = mask; | ||
362 | on_each_cpu(update_stibp_msr, NULL, 1); | ||
363 | } | ||
364 | mutex_unlock(&spec_ctrl_mutex); | ||
365 | } | ||
366 | |||
329 | static void __init spectre_v2_select_mitigation(void) | 367 | static void __init spectre_v2_select_mitigation(void) |
330 | { | 368 | { |
331 | enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); | 369 | enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); |
@@ -426,6 +464,9 @@ specv2_set_mode: | |||
426 | setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); | 464 | setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); |
427 | pr_info("Enabling Restricted Speculation for firmware calls\n"); | 465 | pr_info("Enabling Restricted Speculation for firmware calls\n"); |
428 | } | 466 | } |
467 | |||
468 | /* Enable STIBP if appropriate */ | ||
469 | arch_smt_update(); | ||
429 | } | 470 | } |
430 | 471 | ||
431 | #undef pr_fmt | 472 | #undef pr_fmt |
@@ -816,6 +857,8 @@ static ssize_t l1tf_show_state(char *buf) | |||
816 | static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, | 857 | static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, |
817 | char *buf, unsigned int bug) | 858 | char *buf, unsigned int bug) |
818 | { | 859 | { |
860 | int ret; | ||
861 | |||
819 | if (!boot_cpu_has_bug(bug)) | 862 | if (!boot_cpu_has_bug(bug)) |
820 | return sprintf(buf, "Not affected\n"); | 863 | return sprintf(buf, "Not affected\n"); |
821 | 864 | ||
@@ -833,10 +876,13 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr | |||
833 | return sprintf(buf, "Mitigation: __user pointer sanitization\n"); | 876 | return sprintf(buf, "Mitigation: __user pointer sanitization\n"); |
834 | 877 | ||
835 | case X86_BUG_SPECTRE_V2: | 878 | case X86_BUG_SPECTRE_V2: |
836 | return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], | 879 | ret = sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], |
837 | boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", | 880 | boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", |
838 | boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", | 881 | boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", |
882 | (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "", | ||
883 | boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", | ||
839 | spectre_v2_module_string()); | 884 | spectre_v2_module_string()); |
885 | return ret; | ||
840 | 886 | ||
841 | case X86_BUG_SPEC_STORE_BYPASS: | 887 | case X86_BUG_SPEC_STORE_BYPASS: |
842 | return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); | 888 | return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9315a1660668..660d0b22e962 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1534,19 +1534,8 @@ EXPORT_PER_CPU_SYMBOL(__preempt_count); | |||
1534 | /* May not be marked __init: used by software suspend */ | 1534 | /* May not be marked __init: used by software suspend */ |
1535 | void syscall_init(void) | 1535 | void syscall_init(void) |
1536 | { | 1536 | { |
1537 | extern char _entry_trampoline[]; | ||
1538 | extern char entry_SYSCALL_64_trampoline[]; | ||
1539 | |||
1540 | int cpu = smp_processor_id(); | ||
1541 | unsigned long SYSCALL64_entry_trampoline = | ||
1542 | (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline + | ||
1543 | (entry_SYSCALL_64_trampoline - _entry_trampoline); | ||
1544 | |||
1545 | wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); | 1537 | wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); |
1546 | if (static_cpu_has(X86_FEATURE_PTI)) | 1538 | wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); |
1547 | wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline); | ||
1548 | else | ||
1549 | wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); | ||
1550 | 1539 | ||
1551 | #ifdef CONFIG_IA32_EMULATION | 1540 | #ifdef CONFIG_IA32_EMULATION |
1552 | wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); | 1541 | wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); |
@@ -1557,7 +1546,8 @@ void syscall_init(void) | |||
1557 | * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). | 1546 | * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). |
1558 | */ | 1547 | */ |
1559 | wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); | 1548 | wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); |
1560 | wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1)); | 1549 | wrmsrl_safe(MSR_IA32_SYSENTER_ESP, |
1550 | (unsigned long)(cpu_entry_stack(smp_processor_id()) + 1)); | ||
1561 | wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); | 1551 | wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); |
1562 | #else | 1552 | #else |
1563 | wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); | 1553 | wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); |
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index f72a47b602e2..c33b06f5faa4 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c | |||
@@ -1028,18 +1028,10 @@ NOKPROBE_SYMBOL(kprobe_fault_handler); | |||
1028 | 1028 | ||
1029 | bool arch_within_kprobe_blacklist(unsigned long addr) | 1029 | bool arch_within_kprobe_blacklist(unsigned long addr) |
1030 | { | 1030 | { |
1031 | bool is_in_entry_trampoline_section = false; | ||
1032 | |||
1033 | #ifdef CONFIG_X86_64 | ||
1034 | is_in_entry_trampoline_section = | ||
1035 | (addr >= (unsigned long)__entry_trampoline_start && | ||
1036 | addr < (unsigned long)__entry_trampoline_end); | ||
1037 | #endif | ||
1038 | return (addr >= (unsigned long)__kprobes_text_start && | 1031 | return (addr >= (unsigned long)__kprobes_text_start && |
1039 | addr < (unsigned long)__kprobes_text_end) || | 1032 | addr < (unsigned long)__kprobes_text_end) || |
1040 | (addr >= (unsigned long)__entry_text_start && | 1033 | (addr >= (unsigned long)__entry_text_start && |
1041 | addr < (unsigned long)__entry_text_end) || | 1034 | addr < (unsigned long)__entry_text_end); |
1042 | is_in_entry_trampoline_section; | ||
1043 | } | 1035 | } |
1044 | 1036 | ||
1045 | int __init arch_init_kprobes(void) | 1037 | int __init arch_init_kprobes(void) |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index d6674a425714..31b4755369f0 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -60,8 +60,6 @@ | |||
60 | #include <asm/unistd_32_ia32.h> | 60 | #include <asm/unistd_32_ia32.h> |
61 | #endif | 61 | #endif |
62 | 62 | ||
63 | __visible DEFINE_PER_CPU(unsigned long, rsp_scratch); | ||
64 | |||
65 | /* Prints also some state that isn't saved in the pt_regs */ | 63 | /* Prints also some state that isn't saved in the pt_regs */ |
66 | void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) | 64 | void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) |
67 | { | 65 | { |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 16c95cb90496..5bd0a997d81e 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -383,6 +383,10 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | |||
383 | * we won't enable interupts or schedule before we invoke | 383 | * we won't enable interupts or schedule before we invoke |
384 | * general_protection, so nothing will clobber the stack | 384 | * general_protection, so nothing will clobber the stack |
385 | * frame we just set up. | 385 | * frame we just set up. |
386 | * | ||
387 | * We will enter general_protection with kernel GSBASE, | ||
388 | * which is what the stub expects, given that the faulting | ||
389 | * RIP will be the IRET instruction. | ||
386 | */ | 390 | */ |
387 | regs->ip = (unsigned long)general_protection; | 391 | regs->ip = (unsigned long)general_protection; |
388 | regs->sp = (unsigned long)&gpregs->orig_ax; | 392 | regs->sp = (unsigned long)&gpregs->orig_ax; |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 5dd3317d761f..0d618ee634ac 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -136,16 +136,6 @@ SECTIONS | |||
136 | *(.fixup) | 136 | *(.fixup) |
137 | *(.gnu.warning) | 137 | *(.gnu.warning) |
138 | 138 | ||
139 | #ifdef CONFIG_X86_64 | ||
140 | . = ALIGN(PAGE_SIZE); | ||
141 | __entry_trampoline_start = .; | ||
142 | _entry_trampoline = .; | ||
143 | *(.entry_trampoline) | ||
144 | . = ALIGN(PAGE_SIZE); | ||
145 | __entry_trampoline_end = .; | ||
146 | ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big"); | ||
147 | #endif | ||
148 | |||
149 | #ifdef CONFIG_RETPOLINE | 139 | #ifdef CONFIG_RETPOLINE |
150 | __indirect_thunk_start = .; | 140 | __indirect_thunk_start = .; |
151 | *(.text.__x86.indirect_thunk) | 141 | *(.text.__x86.indirect_thunk) |
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 076ebdce9bd4..12d7e7fb4efd 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c | |||
@@ -15,7 +15,6 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage) | |||
15 | #ifdef CONFIG_X86_64 | 15 | #ifdef CONFIG_X86_64 |
16 | static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks | 16 | static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks |
17 | [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); | 17 | [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); |
18 | static DEFINE_PER_CPU(struct kcore_list, kcore_entry_trampoline); | ||
19 | #endif | 18 | #endif |
20 | 19 | ||
21 | struct cpu_entry_area *get_cpu_entry_area(int cpu) | 20 | struct cpu_entry_area *get_cpu_entry_area(int cpu) |
@@ -83,8 +82,6 @@ static void percpu_setup_debug_store(int cpu) | |||
83 | static void __init setup_cpu_entry_area(int cpu) | 82 | static void __init setup_cpu_entry_area(int cpu) |
84 | { | 83 | { |
85 | #ifdef CONFIG_X86_64 | 84 | #ifdef CONFIG_X86_64 |
86 | extern char _entry_trampoline[]; | ||
87 | |||
88 | /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ | 85 | /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ |
89 | pgprot_t gdt_prot = PAGE_KERNEL_RO; | 86 | pgprot_t gdt_prot = PAGE_KERNEL_RO; |
90 | pgprot_t tss_prot = PAGE_KERNEL_RO; | 87 | pgprot_t tss_prot = PAGE_KERNEL_RO; |
@@ -146,43 +143,10 @@ static void __init setup_cpu_entry_area(int cpu) | |||
146 | cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks, | 143 | cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks, |
147 | &per_cpu(exception_stacks, cpu), | 144 | &per_cpu(exception_stacks, cpu), |
148 | sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL); | 145 | sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL); |
149 | |||
150 | cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline, | ||
151 | __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); | ||
152 | /* | ||
153 | * The cpu_entry_area alias addresses are not in the kernel binary | ||
154 | * so they do not show up in /proc/kcore normally. This adds entries | ||
155 | * for them manually. | ||
156 | */ | ||
157 | kclist_add_remap(&per_cpu(kcore_entry_trampoline, cpu), | ||
158 | _entry_trampoline, | ||
159 | &get_cpu_entry_area(cpu)->entry_trampoline, PAGE_SIZE); | ||
160 | #endif | 146 | #endif |
161 | percpu_setup_debug_store(cpu); | 147 | percpu_setup_debug_store(cpu); |
162 | } | 148 | } |
163 | 149 | ||
164 | #ifdef CONFIG_X86_64 | ||
165 | int arch_get_kallsym(unsigned int symnum, unsigned long *value, char *type, | ||
166 | char *name) | ||
167 | { | ||
168 | unsigned int cpu, ncpu = 0; | ||
169 | |||
170 | if (symnum >= num_possible_cpus()) | ||
171 | return -EINVAL; | ||
172 | |||
173 | for_each_possible_cpu(cpu) { | ||
174 | if (ncpu++ >= symnum) | ||
175 | break; | ||
176 | } | ||
177 | |||
178 | *value = (unsigned long)&get_cpu_entry_area(cpu)->entry_trampoline; | ||
179 | *type = 't'; | ||
180 | strlcpy(name, "__entry_SYSCALL_64_trampoline", KSYM_NAME_LEN); | ||
181 | |||
182 | return 0; | ||
183 | } | ||
184 | #endif | ||
185 | |||
186 | static __init void setup_cpu_entry_area_ptes(void) | 150 | static __init void setup_cpu_entry_area_ptes(void) |
187 | { | 151 | { |
188 | #ifdef CONFIG_X86_32 | 152 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index c1fc1ae6b429..4fee5c3003ed 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c | |||
@@ -434,11 +434,42 @@ static void __init pti_clone_p4d(unsigned long addr) | |||
434 | } | 434 | } |
435 | 435 | ||
436 | /* | 436 | /* |
437 | * Clone the CPU_ENTRY_AREA into the user space visible page table. | 437 | * Clone the CPU_ENTRY_AREA and associated data into the user space visible |
438 | * page table. | ||
438 | */ | 439 | */ |
439 | static void __init pti_clone_user_shared(void) | 440 | static void __init pti_clone_user_shared(void) |
440 | { | 441 | { |
442 | unsigned int cpu; | ||
443 | |||
441 | pti_clone_p4d(CPU_ENTRY_AREA_BASE); | 444 | pti_clone_p4d(CPU_ENTRY_AREA_BASE); |
445 | |||
446 | for_each_possible_cpu(cpu) { | ||
447 | /* | ||
448 | * The SYSCALL64 entry code needs to be able to find the | ||
449 | * thread stack and needs one word of scratch space in which | ||
450 | * to spill a register. All of this lives in the TSS, in | ||
451 | * the sp1 and sp2 slots. | ||
452 | * | ||
453 | * This is done for all possible CPUs during boot to ensure | ||
454 | * that it's propagated to all mms. If we were to add one of | ||
455 | * these mappings during CPU hotplug, we would need to take | ||
456 | * some measure to make sure that every mm that subsequently | ||
457 | * ran on that CPU would have the relevant PGD entry in its | ||
458 | * pagetables. The usual vmalloc_fault() mechanism would not | ||
459 | * work for page faults taken in entry_SYSCALL_64 before RSP | ||
460 | * is set up. | ||
461 | */ | ||
462 | |||
463 | unsigned long va = (unsigned long)&per_cpu(cpu_tss_rw, cpu); | ||
464 | phys_addr_t pa = per_cpu_ptr_to_phys((void *)va); | ||
465 | pte_t *target_pte; | ||
466 | |||
467 | target_pte = pti_user_pagetable_walk_pte(va); | ||
468 | if (WARN_ON(!target_pte)) | ||
469 | return; | ||
470 | |||
471 | *target_pte = pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL); | ||
472 | } | ||
442 | } | 473 | } |
443 | 474 | ||
444 | #else /* CONFIG_X86_64 */ | 475 | #else /* CONFIG_X86_64 */ |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 7d68489cfdb1..bddd6b3cee1d 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/export.h> | 7 | #include <linux/export.h> |
8 | #include <linux/cpu.h> | 8 | #include <linux/cpu.h> |
9 | #include <linux/debugfs.h> | 9 | #include <linux/debugfs.h> |
10 | #include <linux/ptrace.h> | ||
10 | 11 | ||
11 | #include <asm/tlbflush.h> | 12 | #include <asm/tlbflush.h> |
12 | #include <asm/mmu_context.h> | 13 | #include <asm/mmu_context.h> |
@@ -180,6 +181,19 @@ static void sync_current_stack_to_mm(struct mm_struct *mm) | |||
180 | } | 181 | } |
181 | } | 182 | } |
182 | 183 | ||
184 | static bool ibpb_needed(struct task_struct *tsk, u64 last_ctx_id) | ||
185 | { | ||
186 | /* | ||
187 | * Check if the current (previous) task has access to the memory | ||
188 | * of the @tsk (next) task. If access is denied, make sure to | ||
189 | * issue a IBPB to stop user->user Spectre-v2 attacks. | ||
190 | * | ||
191 | * Note: __ptrace_may_access() returns 0 or -ERRNO. | ||
192 | */ | ||
193 | return (tsk && tsk->mm && tsk->mm->context.ctx_id != last_ctx_id && | ||
194 | ptrace_may_access_sched(tsk, PTRACE_MODE_SPEC_IBPB)); | ||
195 | } | ||
196 | |||
183 | void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, | 197 | void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, |
184 | struct task_struct *tsk) | 198 | struct task_struct *tsk) |
185 | { | 199 | { |
@@ -286,18 +300,13 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, | |||
286 | * one process from doing Spectre-v2 attacks on another. | 300 | * one process from doing Spectre-v2 attacks on another. |
287 | * | 301 | * |
288 | * As an optimization, flush indirect branches only when | 302 | * As an optimization, flush indirect branches only when |
289 | * switching into processes that disable dumping. This | 303 | * switching into a processes that can't be ptrace by the |
290 | * protects high value processes like gpg, without having | 304 | * current one (as in such case, attacker has much more |
291 | * too high performance overhead. IBPB is *expensive*! | 305 | * convenient way how to tamper with the next process than |
292 | * | 306 | * branch buffer poisoning). |
293 | * This will not flush branches when switching into kernel | ||
294 | * threads. It will also not flush if we switch to idle | ||
295 | * thread and back to the same process. It will flush if we | ||
296 | * switch to a different non-dumpable process. | ||
297 | */ | 307 | */ |
298 | if (tsk && tsk->mm && | 308 | if (static_cpu_has(X86_FEATURE_USE_IBPB) && |
299 | tsk->mm->context.ctx_id != last_ctx_id && | 309 | ibpb_needed(tsk, last_ctx_id)) |
300 | get_dumpable(tsk->mm) != SUID_DUMP_USER) | ||
301 | indirect_branch_prediction_barrier(); | 310 | indirect_branch_prediction_barrier(); |
302 | 311 | ||
303 | if (IS_ENABLED(CONFIG_VMAP_STACK)) { | 312 | if (IS_ENABLED(CONFIG_VMAP_STACK)) { |
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 417b339e5c8e..bb1c2da0381d 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S | |||
@@ -91,13 +91,15 @@ ENTRY(xen_iret) | |||
91 | ENTRY(xen_sysret64) | 91 | ENTRY(xen_sysret64) |
92 | /* | 92 | /* |
93 | * We're already on the usermode stack at this point, but | 93 | * We're already on the usermode stack at this point, but |
94 | * still with the kernel gs, so we can easily switch back | 94 | * still with the kernel gs, so we can easily switch back. |
95 | * | ||
96 | * tss.sp2 is scratch space. | ||
95 | */ | 97 | */ |
96 | movq %rsp, PER_CPU_VAR(rsp_scratch) | 98 | movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2) |
97 | movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp | 99 | movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp |
98 | 100 | ||
99 | pushq $__USER_DS | 101 | pushq $__USER_DS |
100 | pushq PER_CPU_VAR(rsp_scratch) | 102 | pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2) |
101 | pushq %r11 | 103 | pushq %r11 |
102 | pushq $__USER_CS | 104 | pushq $__USER_CS |
103 | pushq %rcx | 105 | pushq %rcx |
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 4f36431c380b..e5e5ef513df3 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h | |||
@@ -62,14 +62,17 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); | |||
62 | #define PTRACE_MODE_READ 0x01 | 62 | #define PTRACE_MODE_READ 0x01 |
63 | #define PTRACE_MODE_ATTACH 0x02 | 63 | #define PTRACE_MODE_ATTACH 0x02 |
64 | #define PTRACE_MODE_NOAUDIT 0x04 | 64 | #define PTRACE_MODE_NOAUDIT 0x04 |
65 | #define PTRACE_MODE_FSCREDS 0x08 | 65 | #define PTRACE_MODE_FSCREDS 0x08 |
66 | #define PTRACE_MODE_REALCREDS 0x10 | 66 | #define PTRACE_MODE_REALCREDS 0x10 |
67 | #define PTRACE_MODE_SCHED 0x20 | ||
68 | #define PTRACE_MODE_IBPB 0x40 | ||
67 | 69 | ||
68 | /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */ | 70 | /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */ |
69 | #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS) | 71 | #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS) |
70 | #define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS) | 72 | #define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS) |
71 | #define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS) | 73 | #define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS) |
72 | #define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS) | 74 | #define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS) |
75 | #define PTRACE_MODE_SPEC_IBPB (PTRACE_MODE_ATTACH_REALCREDS | PTRACE_MODE_IBPB) | ||
73 | 76 | ||
74 | /** | 77 | /** |
75 | * ptrace_may_access - check whether the caller is permitted to access | 78 | * ptrace_may_access - check whether the caller is permitted to access |
@@ -87,6 +90,20 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); | |||
87 | */ | 90 | */ |
88 | extern bool ptrace_may_access(struct task_struct *task, unsigned int mode); | 91 | extern bool ptrace_may_access(struct task_struct *task, unsigned int mode); |
89 | 92 | ||
93 | /** | ||
94 | * ptrace_may_access - check whether the caller is permitted to access | ||
95 | * a target task. | ||
96 | * @task: target task | ||
97 | * @mode: selects type of access and caller credentials | ||
98 | * | ||
99 | * Returns true on success, false on denial. | ||
100 | * | ||
101 | * Similar to ptrace_may_access(). Only to be called from context switch | ||
102 | * code. Does not call into audit and the regular LSM hooks due to locking | ||
103 | * constraints. | ||
104 | */ | ||
105 | extern bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode); | ||
106 | |||
90 | static inline int ptrace_reparented(struct task_struct *child) | 107 | static inline int ptrace_reparented(struct task_struct *child) |
91 | { | 108 | { |
92 | return !same_thread_group(child->real_parent, child->parent); | 109 | return !same_thread_group(child->real_parent, child->parent); |
diff --git a/kernel/cpu.c b/kernel/cpu.c index e82920b8bee1..3c7f3b4c453c 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -2055,6 +2055,12 @@ static void cpuhp_online_cpu_device(unsigned int cpu) | |||
2055 | kobject_uevent(&dev->kobj, KOBJ_ONLINE); | 2055 | kobject_uevent(&dev->kobj, KOBJ_ONLINE); |
2056 | } | 2056 | } |
2057 | 2057 | ||
2058 | /* | ||
2059 | * Architectures that need SMT-specific errata handling during SMT hotplug | ||
2060 | * should override this. | ||
2061 | */ | ||
2062 | void __weak arch_smt_update(void) { }; | ||
2063 | |||
2058 | static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) | 2064 | static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) |
2059 | { | 2065 | { |
2060 | int cpu, ret = 0; | 2066 | int cpu, ret = 0; |
@@ -2081,8 +2087,10 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) | |||
2081 | */ | 2087 | */ |
2082 | cpuhp_offline_cpu_device(cpu); | 2088 | cpuhp_offline_cpu_device(cpu); |
2083 | } | 2089 | } |
2084 | if (!ret) | 2090 | if (!ret) { |
2085 | cpu_smt_control = ctrlval; | 2091 | cpu_smt_control = ctrlval; |
2092 | arch_smt_update(); | ||
2093 | } | ||
2086 | cpu_maps_update_done(); | 2094 | cpu_maps_update_done(); |
2087 | return ret; | 2095 | return ret; |
2088 | } | 2096 | } |
@@ -2093,6 +2101,7 @@ static int cpuhp_smt_enable(void) | |||
2093 | 2101 | ||
2094 | cpu_maps_update_begin(); | 2102 | cpu_maps_update_begin(); |
2095 | cpu_smt_control = CPU_SMT_ENABLED; | 2103 | cpu_smt_control = CPU_SMT_ENABLED; |
2104 | arch_smt_update(); | ||
2096 | for_each_present_cpu(cpu) { | 2105 | for_each_present_cpu(cpu) { |
2097 | /* Skip online CPUs and CPUs on offline nodes */ | 2106 | /* Skip online CPUs and CPUs on offline nodes */ |
2098 | if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) | 2107 | if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 21fec73d45d4..99cfddde6a55 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -261,6 +261,9 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) | |||
261 | 261 | ||
262 | static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) | 262 | static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) |
263 | { | 263 | { |
264 | if (mode & PTRACE_MODE_SCHED) | ||
265 | return false; | ||
266 | |||
264 | if (mode & PTRACE_MODE_NOAUDIT) | 267 | if (mode & PTRACE_MODE_NOAUDIT) |
265 | return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE); | 268 | return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE); |
266 | else | 269 | else |
@@ -328,9 +331,16 @@ ok: | |||
328 | !ptrace_has_cap(mm->user_ns, mode))) | 331 | !ptrace_has_cap(mm->user_ns, mode))) |
329 | return -EPERM; | 332 | return -EPERM; |
330 | 333 | ||
334 | if (mode & PTRACE_MODE_SCHED) | ||
335 | return 0; | ||
331 | return security_ptrace_access_check(task, mode); | 336 | return security_ptrace_access_check(task, mode); |
332 | } | 337 | } |
333 | 338 | ||
339 | bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode) | ||
340 | { | ||
341 | return __ptrace_may_access(task, mode | PTRACE_MODE_SCHED); | ||
342 | } | ||
343 | |||
334 | bool ptrace_may_access(struct task_struct *task, unsigned int mode) | 344 | bool ptrace_may_access(struct task_struct *task, unsigned int mode) |
335 | { | 345 | { |
336 | int err; | 346 | int err; |