diff options
author | Ingo Molnar <mingo@kernel.org> | 2017-12-01 04:32:48 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2017-12-17 06:58:53 -0500 |
commit | 0fd2e9c53d82704a3ba87ea1980ec515188c5316 (patch) | |
tree | a828c396110053feba9e65307a4e802b00966519 | |
parent | 1784f9144b143a1e8b19fe94083b040aa559182b (diff) | |
parent | 1e4c4f610f774df6088d7c065b2dd4d22adba698 (diff) |
Merge commit 'upstream-x86-entry' into WIP.x86/mm
Pull in a minimal set of v4.15 entry code changes, for a base for the MM isolation patches.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
55 files changed, 579 insertions, 361 deletions
diff --git a/Documentation/x86/orc-unwinder.txt b/Documentation/x86/orc-unwinder.txt index af0c9a4c65a6..cd4b29be29af 100644 --- a/Documentation/x86/orc-unwinder.txt +++ b/Documentation/x86/orc-unwinder.txt | |||
@@ -4,7 +4,7 @@ ORC unwinder | |||
4 | Overview | 4 | Overview |
5 | -------- | 5 | -------- |
6 | 6 | ||
7 | The kernel CONFIG_ORC_UNWINDER option enables the ORC unwinder, which is | 7 | The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is |
8 | similar in concept to a DWARF unwinder. The difference is that the | 8 | similar in concept to a DWARF unwinder. The difference is that the |
9 | format of the ORC data is much simpler than DWARF, which in turn allows | 9 | format of the ORC data is much simpler than DWARF, which in turn allows |
10 | the ORC unwinder to be much simpler and faster. | 10 | the ORC unwinder to be much simpler and faster. |
@@ -934,8 +934,8 @@ ifdef CONFIG_STACK_VALIDATION | |||
934 | ifeq ($(has_libelf),1) | 934 | ifeq ($(has_libelf),1) |
935 | objtool_target := tools/objtool FORCE | 935 | objtool_target := tools/objtool FORCE |
936 | else | 936 | else |
937 | ifdef CONFIG_ORC_UNWINDER | 937 | ifdef CONFIG_UNWINDER_ORC |
938 | $(error "Cannot generate ORC metadata for CONFIG_ORC_UNWINDER=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel") | 938 | $(error "Cannot generate ORC metadata for CONFIG_UNWINDER_ORC=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel") |
939 | else | 939 | else |
940 | $(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel") | 940 | $(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel") |
941 | endif | 941 | endif |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2fdb23313dd5..926fdfbadcdb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -171,7 +171,7 @@ config X86 | |||
171 | select HAVE_PERF_USER_STACK_DUMP | 171 | select HAVE_PERF_USER_STACK_DUMP |
172 | select HAVE_RCU_TABLE_FREE | 172 | select HAVE_RCU_TABLE_FREE |
173 | select HAVE_REGS_AND_STACK_ACCESS_API | 173 | select HAVE_REGS_AND_STACK_ACCESS_API |
174 | select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION | 174 | select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION |
175 | select HAVE_STACK_VALIDATION if X86_64 | 175 | select HAVE_STACK_VALIDATION if X86_64 |
176 | select HAVE_SYSCALL_TRACEPOINTS | 176 | select HAVE_SYSCALL_TRACEPOINTS |
177 | select HAVE_UNSTABLE_SCHED_CLOCK | 177 | select HAVE_UNSTABLE_SCHED_CLOCK |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 90b123056f4b..6293a8768a91 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -359,28 +359,14 @@ config PUNIT_ATOM_DEBUG | |||
359 | 359 | ||
360 | choice | 360 | choice |
361 | prompt "Choose kernel unwinder" | 361 | prompt "Choose kernel unwinder" |
362 | default FRAME_POINTER_UNWINDER | 362 | default UNWINDER_ORC if X86_64 |
363 | default UNWINDER_FRAME_POINTER if X86_32 | ||
363 | ---help--- | 364 | ---help--- |
364 | This determines which method will be used for unwinding kernel stack | 365 | This determines which method will be used for unwinding kernel stack |
365 | traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack, | 366 | traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack, |
366 | livepatch, lockdep, and more. | 367 | livepatch, lockdep, and more. |
367 | 368 | ||
368 | config FRAME_POINTER_UNWINDER | 369 | config UNWINDER_ORC |
369 | bool "Frame pointer unwinder" | ||
370 | select FRAME_POINTER | ||
371 | ---help--- | ||
372 | This option enables the frame pointer unwinder for unwinding kernel | ||
373 | stack traces. | ||
374 | |||
375 | The unwinder itself is fast and it uses less RAM than the ORC | ||
376 | unwinder, but the kernel text size will grow by ~3% and the kernel's | ||
377 | overall performance will degrade by roughly 5-10%. | ||
378 | |||
379 | This option is recommended if you want to use the livepatch | ||
380 | consistency model, as this is currently the only way to get a | ||
381 | reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE). | ||
382 | |||
383 | config ORC_UNWINDER | ||
384 | bool "ORC unwinder" | 370 | bool "ORC unwinder" |
385 | depends on X86_64 | 371 | depends on X86_64 |
386 | select STACK_VALIDATION | 372 | select STACK_VALIDATION |
@@ -396,7 +382,22 @@ config ORC_UNWINDER | |||
396 | Enabling this option will increase the kernel's runtime memory usage | 382 | Enabling this option will increase the kernel's runtime memory usage |
397 | by roughly 2-4MB, depending on your kernel config. | 383 | by roughly 2-4MB, depending on your kernel config. |
398 | 384 | ||
399 | config GUESS_UNWINDER | 385 | config UNWINDER_FRAME_POINTER |
386 | bool "Frame pointer unwinder" | ||
387 | select FRAME_POINTER | ||
388 | ---help--- | ||
389 | This option enables the frame pointer unwinder for unwinding kernel | ||
390 | stack traces. | ||
391 | |||
392 | The unwinder itself is fast and it uses less RAM than the ORC | ||
393 | unwinder, but the kernel text size will grow by ~3% and the kernel's | ||
394 | overall performance will degrade by roughly 5-10%. | ||
395 | |||
396 | This option is recommended if you want to use the livepatch | ||
397 | consistency model, as this is currently the only way to get a | ||
398 | reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE). | ||
399 | |||
400 | config UNWINDER_GUESS | ||
400 | bool "Guess unwinder" | 401 | bool "Guess unwinder" |
401 | depends on EXPERT | 402 | depends on EXPERT |
402 | ---help--- | 403 | ---help--- |
@@ -411,7 +412,7 @@ config GUESS_UNWINDER | |||
411 | endchoice | 412 | endchoice |
412 | 413 | ||
413 | config FRAME_POINTER | 414 | config FRAME_POINTER |
414 | depends on !ORC_UNWINDER && !GUESS_UNWINDER | 415 | depends on !UNWINDER_ORC && !UNWINDER_GUESS |
415 | bool | 416 | bool |
416 | 417 | ||
417 | endmenu | 418 | endmenu |
diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config index 550cd5012b73..66c9e2aab16c 100644 --- a/arch/x86/configs/tiny.config +++ b/arch/x86/configs/tiny.config | |||
@@ -1,5 +1,5 @@ | |||
1 | CONFIG_NOHIGHMEM=y | 1 | CONFIG_NOHIGHMEM=y |
2 | # CONFIG_HIGHMEM4G is not set | 2 | # CONFIG_HIGHMEM4G is not set |
3 | # CONFIG_HIGHMEM64G is not set | 3 | # CONFIG_HIGHMEM64G is not set |
4 | CONFIG_GUESS_UNWINDER=y | 4 | CONFIG_UNWINDER_GUESS=y |
5 | # CONFIG_FRAME_POINTER_UNWINDER is not set | 5 | # CONFIG_UNWINDER_FRAME_POINTER is not set |
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 4a4b16e56d35..e32fc1f274d8 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig | |||
@@ -299,6 +299,7 @@ CONFIG_DEBUG_STACKOVERFLOW=y | |||
299 | # CONFIG_DEBUG_RODATA_TEST is not set | 299 | # CONFIG_DEBUG_RODATA_TEST is not set |
300 | CONFIG_DEBUG_BOOT_PARAMS=y | 300 | CONFIG_DEBUG_BOOT_PARAMS=y |
301 | CONFIG_OPTIMIZE_INLINING=y | 301 | CONFIG_OPTIMIZE_INLINING=y |
302 | CONFIG_UNWINDER_ORC=y | ||
302 | CONFIG_SECURITY=y | 303 | CONFIG_SECURITY=y |
303 | CONFIG_SECURITY_NETWORK=y | 304 | CONFIG_SECURITY_NETWORK=y |
304 | CONFIG_SECURITY_SELINUX=y | 305 | CONFIG_SECURITY_SELINUX=y |
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 6e160031cfea..3fd8bc560fae 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h | |||
@@ -142,56 +142,25 @@ For 32-bit we have the following conventions - kernel is built with | |||
142 | UNWIND_HINT_REGS offset=\offset | 142 | UNWIND_HINT_REGS offset=\offset |
143 | .endm | 143 | .endm |
144 | 144 | ||
145 | .macro RESTORE_EXTRA_REGS offset=0 | 145 | .macro POP_EXTRA_REGS |
146 | movq 0*8+\offset(%rsp), %r15 | 146 | popq %r15 |
147 | movq 1*8+\offset(%rsp), %r14 | 147 | popq %r14 |
148 | movq 2*8+\offset(%rsp), %r13 | 148 | popq %r13 |
149 | movq 3*8+\offset(%rsp), %r12 | 149 | popq %r12 |
150 | movq 4*8+\offset(%rsp), %rbp | 150 | popq %rbp |
151 | movq 5*8+\offset(%rsp), %rbx | 151 | popq %rbx |
152 | UNWIND_HINT_REGS offset=\offset extra=0 | 152 | .endm |
153 | .endm | 153 | |
154 | 154 | .macro POP_C_REGS | |
155 | .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1 | 155 | popq %r11 |
156 | .if \rstor_r11 | 156 | popq %r10 |
157 | movq 6*8(%rsp), %r11 | 157 | popq %r9 |
158 | .endif | 158 | popq %r8 |
159 | .if \rstor_r8910 | 159 | popq %rax |
160 | movq 7*8(%rsp), %r10 | 160 | popq %rcx |
161 | movq 8*8(%rsp), %r9 | 161 | popq %rdx |
162 | movq 9*8(%rsp), %r8 | 162 | popq %rsi |
163 | .endif | 163 | popq %rdi |
164 | .if \rstor_rax | ||
165 | movq 10*8(%rsp), %rax | ||
166 | .endif | ||
167 | .if \rstor_rcx | ||
168 | movq 11*8(%rsp), %rcx | ||
169 | .endif | ||
170 | .if \rstor_rdx | ||
171 | movq 12*8(%rsp), %rdx | ||
172 | .endif | ||
173 | movq 13*8(%rsp), %rsi | ||
174 | movq 14*8(%rsp), %rdi | ||
175 | UNWIND_HINT_IRET_REGS offset=16*8 | ||
176 | .endm | ||
177 | .macro RESTORE_C_REGS | ||
178 | RESTORE_C_REGS_HELPER 1,1,1,1,1 | ||
179 | .endm | ||
180 | .macro RESTORE_C_REGS_EXCEPT_RAX | ||
181 | RESTORE_C_REGS_HELPER 0,1,1,1,1 | ||
182 | .endm | ||
183 | .macro RESTORE_C_REGS_EXCEPT_RCX | ||
184 | RESTORE_C_REGS_HELPER 1,0,1,1,1 | ||
185 | .endm | ||
186 | .macro RESTORE_C_REGS_EXCEPT_R11 | ||
187 | RESTORE_C_REGS_HELPER 1,1,0,1,1 | ||
188 | .endm | ||
189 | .macro RESTORE_C_REGS_EXCEPT_RCX_R11 | ||
190 | RESTORE_C_REGS_HELPER 1,0,0,1,1 | ||
191 | .endm | ||
192 | |||
193 | .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0 | ||
194 | subq $-(15*8+\addskip), %rsp | ||
195 | .endm | 164 | .endm |
196 | 165 | ||
197 | .macro icebp | 166 | .macro icebp |
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index bcfc5668dcb2..a2b30ec69497 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S | |||
@@ -221,10 +221,9 @@ entry_SYSCALL_64_fastpath: | |||
221 | TRACE_IRQS_ON /* user mode is traced as IRQs on */ | 221 | TRACE_IRQS_ON /* user mode is traced as IRQs on */ |
222 | movq RIP(%rsp), %rcx | 222 | movq RIP(%rsp), %rcx |
223 | movq EFLAGS(%rsp), %r11 | 223 | movq EFLAGS(%rsp), %r11 |
224 | RESTORE_C_REGS_EXCEPT_RCX_R11 | 224 | addq $6*8, %rsp /* skip extra regs -- they were preserved */ |
225 | movq RSP(%rsp), %rsp | ||
226 | UNWIND_HINT_EMPTY | 225 | UNWIND_HINT_EMPTY |
227 | USERGS_SYSRET64 | 226 | jmp .Lpop_c_regs_except_rcx_r11_and_sysret |
228 | 227 | ||
229 | 1: | 228 | 1: |
230 | /* | 229 | /* |
@@ -246,17 +245,18 @@ entry_SYSCALL64_slow_path: | |||
246 | call do_syscall_64 /* returns with IRQs disabled */ | 245 | call do_syscall_64 /* returns with IRQs disabled */ |
247 | 246 | ||
248 | return_from_SYSCALL_64: | 247 | return_from_SYSCALL_64: |
249 | RESTORE_EXTRA_REGS | ||
250 | TRACE_IRQS_IRETQ /* we're about to change IF */ | 248 | TRACE_IRQS_IRETQ /* we're about to change IF */ |
251 | 249 | ||
252 | /* | 250 | /* |
253 | * Try to use SYSRET instead of IRET if we're returning to | 251 | * Try to use SYSRET instead of IRET if we're returning to |
254 | * a completely clean 64-bit userspace context. | 252 | * a completely clean 64-bit userspace context. If we're not, |
253 | * go to the slow exit path. | ||
255 | */ | 254 | */ |
256 | movq RCX(%rsp), %rcx | 255 | movq RCX(%rsp), %rcx |
257 | movq RIP(%rsp), %r11 | 256 | movq RIP(%rsp), %r11 |
258 | cmpq %rcx, %r11 /* RCX == RIP */ | 257 | |
259 | jne opportunistic_sysret_failed | 258 | cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */ |
259 | jne swapgs_restore_regs_and_return_to_usermode | ||
260 | 260 | ||
261 | /* | 261 | /* |
262 | * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP | 262 | * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP |
@@ -274,14 +274,14 @@ return_from_SYSCALL_64: | |||
274 | 274 | ||
275 | /* If this changed %rcx, it was not canonical */ | 275 | /* If this changed %rcx, it was not canonical */ |
276 | cmpq %rcx, %r11 | 276 | cmpq %rcx, %r11 |
277 | jne opportunistic_sysret_failed | 277 | jne swapgs_restore_regs_and_return_to_usermode |
278 | 278 | ||
279 | cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */ | 279 | cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */ |
280 | jne opportunistic_sysret_failed | 280 | jne swapgs_restore_regs_and_return_to_usermode |
281 | 281 | ||
282 | movq R11(%rsp), %r11 | 282 | movq R11(%rsp), %r11 |
283 | cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */ | 283 | cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */ |
284 | jne opportunistic_sysret_failed | 284 | jne swapgs_restore_regs_and_return_to_usermode |
285 | 285 | ||
286 | /* | 286 | /* |
287 | * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot | 287 | * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot |
@@ -302,12 +302,12 @@ return_from_SYSCALL_64: | |||
302 | * would never get past 'stuck_here'. | 302 | * would never get past 'stuck_here'. |
303 | */ | 303 | */ |
304 | testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11 | 304 | testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11 |
305 | jnz opportunistic_sysret_failed | 305 | jnz swapgs_restore_regs_and_return_to_usermode |
306 | 306 | ||
307 | /* nothing to check for RSP */ | 307 | /* nothing to check for RSP */ |
308 | 308 | ||
309 | cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */ | 309 | cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */ |
310 | jne opportunistic_sysret_failed | 310 | jne swapgs_restore_regs_and_return_to_usermode |
311 | 311 | ||
312 | /* | 312 | /* |
313 | * We win! This label is here just for ease of understanding | 313 | * We win! This label is here just for ease of understanding |
@@ -315,14 +315,20 @@ return_from_SYSCALL_64: | |||
315 | */ | 315 | */ |
316 | syscall_return_via_sysret: | 316 | syscall_return_via_sysret: |
317 | /* rcx and r11 are already restored (see code above) */ | 317 | /* rcx and r11 are already restored (see code above) */ |
318 | RESTORE_C_REGS_EXCEPT_RCX_R11 | ||
319 | movq RSP(%rsp), %rsp | ||
320 | UNWIND_HINT_EMPTY | 318 | UNWIND_HINT_EMPTY |
319 | POP_EXTRA_REGS | ||
320 | .Lpop_c_regs_except_rcx_r11_and_sysret: | ||
321 | popq %rsi /* skip r11 */ | ||
322 | popq %r10 | ||
323 | popq %r9 | ||
324 | popq %r8 | ||
325 | popq %rax | ||
326 | popq %rsi /* skip rcx */ | ||
327 | popq %rdx | ||
328 | popq %rsi | ||
329 | popq %rdi | ||
330 | movq RSP-ORIG_RAX(%rsp), %rsp | ||
321 | USERGS_SYSRET64 | 331 | USERGS_SYSRET64 |
322 | |||
323 | opportunistic_sysret_failed: | ||
324 | SWAPGS | ||
325 | jmp restore_c_regs_and_iret | ||
326 | END(entry_SYSCALL_64) | 332 | END(entry_SYSCALL_64) |
327 | 333 | ||
328 | ENTRY(stub_ptregs_64) | 334 | ENTRY(stub_ptregs_64) |
@@ -423,8 +429,7 @@ ENTRY(ret_from_fork) | |||
423 | movq %rsp, %rdi | 429 | movq %rsp, %rdi |
424 | call syscall_return_slowpath /* returns with IRQs disabled */ | 430 | call syscall_return_slowpath /* returns with IRQs disabled */ |
425 | TRACE_IRQS_ON /* user mode is traced as IRQS on */ | 431 | TRACE_IRQS_ON /* user mode is traced as IRQS on */ |
426 | SWAPGS | 432 | jmp swapgs_restore_regs_and_return_to_usermode |
427 | jmp restore_regs_and_iret | ||
428 | 433 | ||
429 | 1: | 434 | 1: |
430 | /* kernel thread */ | 435 | /* kernel thread */ |
@@ -612,8 +617,21 @@ GLOBAL(retint_user) | |||
612 | mov %rsp,%rdi | 617 | mov %rsp,%rdi |
613 | call prepare_exit_to_usermode | 618 | call prepare_exit_to_usermode |
614 | TRACE_IRQS_IRETQ | 619 | TRACE_IRQS_IRETQ |
620 | |||
621 | GLOBAL(swapgs_restore_regs_and_return_to_usermode) | ||
622 | #ifdef CONFIG_DEBUG_ENTRY | ||
623 | /* Assert that pt_regs indicates user mode. */ | ||
624 | testb $3, CS(%rsp) | ||
625 | jnz 1f | ||
626 | ud2 | ||
627 | 1: | ||
628 | #endif | ||
615 | SWAPGS | 629 | SWAPGS |
616 | jmp restore_regs_and_iret | 630 | POP_EXTRA_REGS |
631 | POP_C_REGS | ||
632 | addq $8, %rsp /* skip regs->orig_ax */ | ||
633 | INTERRUPT_RETURN | ||
634 | |||
617 | 635 | ||
618 | /* Returning to kernel space */ | 636 | /* Returning to kernel space */ |
619 | retint_kernel: | 637 | retint_kernel: |
@@ -633,15 +651,17 @@ retint_kernel: | |||
633 | */ | 651 | */ |
634 | TRACE_IRQS_IRETQ | 652 | TRACE_IRQS_IRETQ |
635 | 653 | ||
636 | /* | 654 | GLOBAL(restore_regs_and_return_to_kernel) |
637 | * At this label, code paths which return to kernel and to user, | 655 | #ifdef CONFIG_DEBUG_ENTRY |
638 | * which come from interrupts/exception and from syscalls, merge. | 656 | /* Assert that pt_regs indicates kernel mode. */ |
639 | */ | 657 | testb $3, CS(%rsp) |
640 | GLOBAL(restore_regs_and_iret) | 658 | jz 1f |
641 | RESTORE_EXTRA_REGS | 659 | ud2 |
642 | restore_c_regs_and_iret: | 660 | 1: |
643 | RESTORE_C_REGS | 661 | #endif |
644 | REMOVE_PT_GPREGS_FROM_STACK 8 | 662 | POP_EXTRA_REGS |
663 | POP_C_REGS | ||
664 | addq $8, %rsp /* skip regs->orig_ax */ | ||
645 | INTERRUPT_RETURN | 665 | INTERRUPT_RETURN |
646 | 666 | ||
647 | ENTRY(native_iret) | 667 | ENTRY(native_iret) |
@@ -818,7 +838,7 @@ ENTRY(\sym) | |||
818 | 838 | ||
819 | ASM_CLAC | 839 | ASM_CLAC |
820 | 840 | ||
821 | .ifeq \has_error_code | 841 | .if \has_error_code == 0 |
822 | pushq $-1 /* ORIG_RAX: no syscall to restart */ | 842 | pushq $-1 /* ORIG_RAX: no syscall to restart */ |
823 | .endif | 843 | .endif |
824 | 844 | ||
@@ -1059,6 +1079,7 @@ idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK | |||
1059 | idtentry stack_segment do_stack_segment has_error_code=1 | 1079 | idtentry stack_segment do_stack_segment has_error_code=1 |
1060 | 1080 | ||
1061 | #ifdef CONFIG_XEN | 1081 | #ifdef CONFIG_XEN |
1082 | idtentry xennmi do_nmi has_error_code=0 | ||
1062 | idtentry xendebug do_debug has_error_code=0 | 1083 | idtentry xendebug do_debug has_error_code=0 |
1063 | idtentry xenint3 do_int3 has_error_code=0 | 1084 | idtentry xenint3 do_int3 has_error_code=0 |
1064 | #endif | 1085 | #endif |
@@ -1112,17 +1133,14 @@ ENTRY(paranoid_exit) | |||
1112 | DISABLE_INTERRUPTS(CLBR_ANY) | 1133 | DISABLE_INTERRUPTS(CLBR_ANY) |
1113 | TRACE_IRQS_OFF_DEBUG | 1134 | TRACE_IRQS_OFF_DEBUG |
1114 | testl %ebx, %ebx /* swapgs needed? */ | 1135 | testl %ebx, %ebx /* swapgs needed? */ |
1115 | jnz paranoid_exit_no_swapgs | 1136 | jnz .Lparanoid_exit_no_swapgs |
1116 | TRACE_IRQS_IRETQ | 1137 | TRACE_IRQS_IRETQ |
1117 | SWAPGS_UNSAFE_STACK | 1138 | SWAPGS_UNSAFE_STACK |
1118 | jmp paranoid_exit_restore | 1139 | jmp .Lparanoid_exit_restore |
1119 | paranoid_exit_no_swapgs: | 1140 | .Lparanoid_exit_no_swapgs: |
1120 | TRACE_IRQS_IRETQ_DEBUG | 1141 | TRACE_IRQS_IRETQ_DEBUG |
1121 | paranoid_exit_restore: | 1142 | .Lparanoid_exit_restore: |
1122 | RESTORE_EXTRA_REGS | 1143 | jmp restore_regs_and_return_to_kernel |
1123 | RESTORE_C_REGS | ||
1124 | REMOVE_PT_GPREGS_FROM_STACK 8 | ||
1125 | INTERRUPT_RETURN | ||
1126 | END(paranoid_exit) | 1144 | END(paranoid_exit) |
1127 | 1145 | ||
1128 | /* | 1146 | /* |
@@ -1223,10 +1241,13 @@ ENTRY(error_exit) | |||
1223 | jmp retint_user | 1241 | jmp retint_user |
1224 | END(error_exit) | 1242 | END(error_exit) |
1225 | 1243 | ||
1226 | /* Runs on exception stack */ | 1244 | /* |
1227 | /* XXX: broken on Xen PV */ | 1245 | * Runs on exception stack. Xen PV does not go through this path at all, |
1246 | * so we can use real assembly here. | ||
1247 | */ | ||
1228 | ENTRY(nmi) | 1248 | ENTRY(nmi) |
1229 | UNWIND_HINT_IRET_REGS | 1249 | UNWIND_HINT_IRET_REGS |
1250 | |||
1230 | /* | 1251 | /* |
1231 | * We allow breakpoints in NMIs. If a breakpoint occurs, then | 1252 | * We allow breakpoints in NMIs. If a breakpoint occurs, then |
1232 | * the iretq it performs will take us out of NMI context. | 1253 | * the iretq it performs will take us out of NMI context. |
@@ -1284,7 +1305,7 @@ ENTRY(nmi) | |||
1284 | * stacks lest we corrupt the "NMI executing" variable. | 1305 | * stacks lest we corrupt the "NMI executing" variable. |
1285 | */ | 1306 | */ |
1286 | 1307 | ||
1287 | SWAPGS_UNSAFE_STACK | 1308 | swapgs |
1288 | cld | 1309 | cld |
1289 | movq %rsp, %rdx | 1310 | movq %rsp, %rdx |
1290 | movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp | 1311 | movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp |
@@ -1328,8 +1349,7 @@ ENTRY(nmi) | |||
1328 | * Return back to user mode. We must *not* do the normal exit | 1349 | * Return back to user mode. We must *not* do the normal exit |
1329 | * work, because we don't want to enable interrupts. | 1350 | * work, because we don't want to enable interrupts. |
1330 | */ | 1351 | */ |
1331 | SWAPGS | 1352 | jmp swapgs_restore_regs_and_return_to_usermode |
1332 | jmp restore_regs_and_iret | ||
1333 | 1353 | ||
1334 | .Lnmi_from_kernel: | 1354 | .Lnmi_from_kernel: |
1335 | /* | 1355 | /* |
@@ -1450,7 +1470,7 @@ nested_nmi_out: | |||
1450 | popq %rdx | 1470 | popq %rdx |
1451 | 1471 | ||
1452 | /* We are returning to kernel mode, so this cannot result in a fault. */ | 1472 | /* We are returning to kernel mode, so this cannot result in a fault. */ |
1453 | INTERRUPT_RETURN | 1473 | iretq |
1454 | 1474 | ||
1455 | first_nmi: | 1475 | first_nmi: |
1456 | /* Restore rdx. */ | 1476 | /* Restore rdx. */ |
@@ -1481,7 +1501,7 @@ first_nmi: | |||
1481 | pushfq /* RFLAGS */ | 1501 | pushfq /* RFLAGS */ |
1482 | pushq $__KERNEL_CS /* CS */ | 1502 | pushq $__KERNEL_CS /* CS */ |
1483 | pushq $1f /* RIP */ | 1503 | pushq $1f /* RIP */ |
1484 | INTERRUPT_RETURN /* continues at repeat_nmi below */ | 1504 | iretq /* continues at repeat_nmi below */ |
1485 | UNWIND_HINT_IRET_REGS | 1505 | UNWIND_HINT_IRET_REGS |
1486 | 1: | 1506 | 1: |
1487 | #endif | 1507 | #endif |
@@ -1544,29 +1564,34 @@ end_repeat_nmi: | |||
1544 | nmi_swapgs: | 1564 | nmi_swapgs: |
1545 | SWAPGS_UNSAFE_STACK | 1565 | SWAPGS_UNSAFE_STACK |
1546 | nmi_restore: | 1566 | nmi_restore: |
1547 | RESTORE_EXTRA_REGS | 1567 | POP_EXTRA_REGS |
1548 | RESTORE_C_REGS | 1568 | POP_C_REGS |
1549 | 1569 | ||
1550 | /* Point RSP at the "iret" frame. */ | 1570 | /* |
1551 | REMOVE_PT_GPREGS_FROM_STACK 6*8 | 1571 | * Skip orig_ax and the "outermost" frame to point RSP at the "iret" |
1572 | * at the "iret" frame. | ||
1573 | */ | ||
1574 | addq $6*8, %rsp | ||
1552 | 1575 | ||
1553 | /* | 1576 | /* |
1554 | * Clear "NMI executing". Set DF first so that we can easily | 1577 | * Clear "NMI executing". Set DF first so that we can easily |
1555 | * distinguish the remaining code between here and IRET from | 1578 | * distinguish the remaining code between here and IRET from |
1556 | * the SYSCALL entry and exit paths. On a native kernel, we | 1579 | * the SYSCALL entry and exit paths. |
1557 | * could just inspect RIP, but, on paravirt kernels, | 1580 | * |
1558 | * INTERRUPT_RETURN can translate into a jump into a | 1581 | * We arguably should just inspect RIP instead, but I (Andy) wrote |
1559 | * hypercall page. | 1582 | * this code when I had the misapprehension that Xen PV supported |
1583 | * NMIs, and Xen PV would break that approach. | ||
1560 | */ | 1584 | */ |
1561 | std | 1585 | std |
1562 | movq $0, 5*8(%rsp) /* clear "NMI executing" */ | 1586 | movq $0, 5*8(%rsp) /* clear "NMI executing" */ |
1563 | 1587 | ||
1564 | /* | 1588 | /* |
1565 | * INTERRUPT_RETURN reads the "iret" frame and exits the NMI | 1589 | * iretq reads the "iret" frame and exits the NMI stack in a |
1566 | * stack in a single instruction. We are returning to kernel | 1590 | * single instruction. We are returning to kernel mode, so this |
1567 | * mode, so this cannot result in a fault. | 1591 | * cannot result in a fault. Similarly, we don't need to worry |
1592 | * about espfix64 on the way back to kernel mode. | ||
1568 | */ | 1593 | */ |
1569 | INTERRUPT_RETURN | 1594 | iretq |
1570 | END(nmi) | 1595 | END(nmi) |
1571 | 1596 | ||
1572 | ENTRY(ignore_sysret) | 1597 | ENTRY(ignore_sysret) |
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index b5c7a56ed256..568e130d932c 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S | |||
@@ -337,8 +337,7 @@ ENTRY(entry_INT80_compat) | |||
337 | 337 | ||
338 | /* Go back to user mode. */ | 338 | /* Go back to user mode. */ |
339 | TRACE_IRQS_ON | 339 | TRACE_IRQS_ON |
340 | SWAPGS | 340 | jmp swapgs_restore_regs_and_return_to_usermode |
341 | jmp restore_regs_and_iret | ||
342 | END(entry_INT80_compat) | 341 | END(entry_INT80_compat) |
343 | 342 | ||
344 | ENTRY(stub32_clone) | 343 | ENTRY(stub32_clone) |
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h index 5b0579abb398..3ac991d81e74 100644 --- a/arch/x86/include/asm/archrandom.h +++ b/arch/x86/include/asm/archrandom.h | |||
@@ -45,7 +45,7 @@ static inline bool rdrand_long(unsigned long *v) | |||
45 | bool ok; | 45 | bool ok; |
46 | unsigned int retry = RDRAND_RETRY_LOOPS; | 46 | unsigned int retry = RDRAND_RETRY_LOOPS; |
47 | do { | 47 | do { |
48 | asm volatile(RDRAND_LONG "\n\t" | 48 | asm volatile(RDRAND_LONG |
49 | CC_SET(c) | 49 | CC_SET(c) |
50 | : CC_OUT(c) (ok), "=a" (*v)); | 50 | : CC_OUT(c) (ok), "=a" (*v)); |
51 | if (ok) | 51 | if (ok) |
@@ -59,7 +59,7 @@ static inline bool rdrand_int(unsigned int *v) | |||
59 | bool ok; | 59 | bool ok; |
60 | unsigned int retry = RDRAND_RETRY_LOOPS; | 60 | unsigned int retry = RDRAND_RETRY_LOOPS; |
61 | do { | 61 | do { |
62 | asm volatile(RDRAND_INT "\n\t" | 62 | asm volatile(RDRAND_INT |
63 | CC_SET(c) | 63 | CC_SET(c) |
64 | : CC_OUT(c) (ok), "=a" (*v)); | 64 | : CC_OUT(c) (ok), "=a" (*v)); |
65 | if (ok) | 65 | if (ok) |
@@ -71,7 +71,7 @@ static inline bool rdrand_int(unsigned int *v) | |||
71 | static inline bool rdseed_long(unsigned long *v) | 71 | static inline bool rdseed_long(unsigned long *v) |
72 | { | 72 | { |
73 | bool ok; | 73 | bool ok; |
74 | asm volatile(RDSEED_LONG "\n\t" | 74 | asm volatile(RDSEED_LONG |
75 | CC_SET(c) | 75 | CC_SET(c) |
76 | : CC_OUT(c) (ok), "=a" (*v)); | 76 | : CC_OUT(c) (ok), "=a" (*v)); |
77 | return ok; | 77 | return ok; |
@@ -80,7 +80,7 @@ static inline bool rdseed_long(unsigned long *v) | |||
80 | static inline bool rdseed_int(unsigned int *v) | 80 | static inline bool rdseed_int(unsigned int *v) |
81 | { | 81 | { |
82 | bool ok; | 82 | bool ok; |
83 | asm volatile(RDSEED_INT "\n\t" | 83 | asm volatile(RDSEED_INT |
84 | CC_SET(c) | 84 | CC_SET(c) |
85 | : CC_OUT(c) (ok), "=a" (*v)); | 85 | : CC_OUT(c) (ok), "=a" (*v)); |
86 | return ok; | 86 | return ok; |
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 2bcf47314959..3fa039855b8f 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h | |||
@@ -143,7 +143,7 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr) | |||
143 | static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) | 143 | static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) |
144 | { | 144 | { |
145 | bool negative; | 145 | bool negative; |
146 | asm volatile(LOCK_PREFIX "andb %2,%1\n\t" | 146 | asm volatile(LOCK_PREFIX "andb %2,%1" |
147 | CC_SET(s) | 147 | CC_SET(s) |
148 | : CC_OUT(s) (negative), ADDR | 148 | : CC_OUT(s) (negative), ADDR |
149 | : "ir" ((char) ~(1 << nr)) : "memory"); | 149 | : "ir" ((char) ~(1 << nr)) : "memory"); |
@@ -246,7 +246,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long * | |||
246 | { | 246 | { |
247 | bool oldbit; | 247 | bool oldbit; |
248 | 248 | ||
249 | asm("bts %2,%1\n\t" | 249 | asm("bts %2,%1" |
250 | CC_SET(c) | 250 | CC_SET(c) |
251 | : CC_OUT(c) (oldbit), ADDR | 251 | : CC_OUT(c) (oldbit), ADDR |
252 | : "Ir" (nr)); | 252 | : "Ir" (nr)); |
@@ -286,7 +286,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long | |||
286 | { | 286 | { |
287 | bool oldbit; | 287 | bool oldbit; |
288 | 288 | ||
289 | asm volatile("btr %2,%1\n\t" | 289 | asm volatile("btr %2,%1" |
290 | CC_SET(c) | 290 | CC_SET(c) |
291 | : CC_OUT(c) (oldbit), ADDR | 291 | : CC_OUT(c) (oldbit), ADDR |
292 | : "Ir" (nr)); | 292 | : "Ir" (nr)); |
@@ -298,7 +298,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon | |||
298 | { | 298 | { |
299 | bool oldbit; | 299 | bool oldbit; |
300 | 300 | ||
301 | asm volatile("btc %2,%1\n\t" | 301 | asm volatile("btc %2,%1" |
302 | CC_SET(c) | 302 | CC_SET(c) |
303 | : CC_OUT(c) (oldbit), ADDR | 303 | : CC_OUT(c) (oldbit), ADDR |
304 | : "Ir" (nr) : "memory"); | 304 | : "Ir" (nr) : "memory"); |
@@ -329,7 +329,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l | |||
329 | { | 329 | { |
330 | bool oldbit; | 330 | bool oldbit; |
331 | 331 | ||
332 | asm volatile("bt %2,%1\n\t" | 332 | asm volatile("bt %2,%1" |
333 | CC_SET(c) | 333 | CC_SET(c) |
334 | : CC_OUT(c) (oldbit) | 334 | : CC_OUT(c) (oldbit) |
335 | : "m" (*(unsigned long *)addr), "Ir" (nr)); | 335 | : "m" (*(unsigned long *)addr), "Ir" (nr)); |
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 9eef9cc64c68..a600a6cda9ec 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h | |||
@@ -7,6 +7,7 @@ | |||
7 | */ | 7 | */ |
8 | #include <linux/types.h> | 8 | #include <linux/types.h> |
9 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
10 | #include <linux/sched/task_stack.h> | ||
10 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
11 | #include <asm/user32.h> | 12 | #include <asm/user32.h> |
12 | #include <asm/unistd.h> | 13 | #include <asm/unistd.h> |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 0dfa68438e80..bf6a76202a77 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -126,11 +126,10 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; | |||
126 | #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) | 126 | #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) |
127 | 127 | ||
128 | #define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) | 128 | #define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) |
129 | #define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability)) | 129 | |
130 | #define setup_clear_cpu_cap(bit) do { \ | 130 | extern void setup_clear_cpu_cap(unsigned int bit); |
131 | clear_cpu_cap(&boot_cpu_data, bit); \ | 131 | extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); |
132 | set_bit(bit, (unsigned long *)cpu_caps_cleared); \ | 132 | |
133 | } while (0) | ||
134 | #define setup_force_cpu_cap(bit) do { \ | 133 | #define setup_force_cpu_cap(bit) do { \ |
135 | set_cpu_cap(&boot_cpu_data, bit); \ | 134 | set_cpu_cap(&boot_cpu_data, bit); \ |
136 | set_bit(bit, (unsigned long *)cpu_caps_set); \ | 135 | set_bit(bit, (unsigned long *)cpu_caps_set); \ |
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 793690fbda36..74370734663c 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h | |||
@@ -22,6 +22,11 @@ | |||
22 | * this feature bit is not displayed in /proc/cpuinfo at all. | 22 | * this feature bit is not displayed in /proc/cpuinfo at all. |
23 | */ | 23 | */ |
24 | 24 | ||
25 | /* | ||
26 | * When adding new features here that depend on other features, | ||
27 | * please update the table in kernel/cpu/cpuid-deps.c | ||
28 | */ | ||
29 | |||
25 | /* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ | 30 | /* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ |
26 | #define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ | 31 | #define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ |
27 | #define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ | 32 | #define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ |
@@ -295,6 +300,12 @@ | |||
295 | #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ | 300 | #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ |
296 | #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ | 301 | #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ |
297 | #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ | 302 | #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ |
303 | #define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ | ||
304 | #define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ | ||
305 | #define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ | ||
306 | #define X86_FEATURE_VPCLMULQDQ (16*32+ 10) /* Carry-Less Multiplication Double Quadword */ | ||
307 | #define X86_FEATURE_AVX512_VNNI (16*32+ 11) /* Vector Neural Network Instructions */ | ||
308 | #define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */ | ||
298 | #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ | 309 | #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ |
299 | #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ | 310 | #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ |
300 | #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ | 311 | #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ |
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index 8546fafa21a9..7948a17febb4 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h | |||
@@ -6,7 +6,7 @@ | |||
6 | #include <asm/orc_types.h> | 6 | #include <asm/orc_types.h> |
7 | 7 | ||
8 | struct mod_arch_specific { | 8 | struct mod_arch_specific { |
9 | #ifdef CONFIG_ORC_UNWINDER | 9 | #ifdef CONFIG_UNWINDER_ORC |
10 | unsigned int num_orcs; | 10 | unsigned int num_orcs; |
11 | int *orc_unwind_ip; | 11 | int *orc_unwind_ip; |
12 | struct orc_entry *orc_unwind; | 12 | struct orc_entry *orc_unwind; |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index fd81228e8037..283efcaac8af 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -16,10 +16,9 @@ | |||
16 | #include <linux/cpumask.h> | 16 | #include <linux/cpumask.h> |
17 | #include <asm/frame.h> | 17 | #include <asm/frame.h> |
18 | 18 | ||
19 | static inline void load_sp0(struct tss_struct *tss, | 19 | static inline void load_sp0(unsigned long sp0) |
20 | struct thread_struct *thread) | ||
21 | { | 20 | { |
22 | PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread); | 21 | PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0); |
23 | } | 22 | } |
24 | 23 | ||
25 | /* The paravirtualized CPUID instruction. */ | 24 | /* The paravirtualized CPUID instruction. */ |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 10cc3b9709fe..6ec54d01972d 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -134,7 +134,7 @@ struct pv_cpu_ops { | |||
134 | void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries); | 134 | void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries); |
135 | void (*free_ldt)(struct desc_struct *ldt, unsigned entries); | 135 | void (*free_ldt)(struct desc_struct *ldt, unsigned entries); |
136 | 136 | ||
137 | void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); | 137 | void (*load_sp0)(unsigned long sp0); |
138 | 138 | ||
139 | void (*set_iopl_mask)(unsigned mask); | 139 | void (*set_iopl_mask)(unsigned mask); |
140 | 140 | ||
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 377f1ffd18be..ba3c523aaf16 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -526,7 +526,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr, | |||
526 | { | 526 | { |
527 | bool oldbit; | 527 | bool oldbit; |
528 | 528 | ||
529 | asm volatile("bt "__percpu_arg(2)",%1\n\t" | 529 | asm volatile("bt "__percpu_arg(2)",%1" |
530 | CC_SET(c) | 530 | CC_SET(c) |
531 | : CC_OUT(c) (oldbit) | 531 | : CC_OUT(c) (oldbit) |
532 | : "m" (*(unsigned long __percpu *)addr), "Ir" (nr)); | 532 | : "m" (*(unsigned long __percpu *)addr), "Ir" (nr)); |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index bdac19ab2488..2db7cf720b04 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -431,7 +431,9 @@ typedef struct { | |||
431 | struct thread_struct { | 431 | struct thread_struct { |
432 | /* Cached TLS descriptors: */ | 432 | /* Cached TLS descriptors: */ |
433 | struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; | 433 | struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; |
434 | #ifdef CONFIG_X86_32 | ||
434 | unsigned long sp0; | 435 | unsigned long sp0; |
436 | #endif | ||
435 | unsigned long sp; | 437 | unsigned long sp; |
436 | #ifdef CONFIG_X86_32 | 438 | #ifdef CONFIG_X86_32 |
437 | unsigned long sysenter_cs; | 439 | unsigned long sysenter_cs; |
@@ -518,16 +520,9 @@ static inline void native_set_iopl_mask(unsigned mask) | |||
518 | } | 520 | } |
519 | 521 | ||
520 | static inline void | 522 | static inline void |
521 | native_load_sp0(struct tss_struct *tss, struct thread_struct *thread) | 523 | native_load_sp0(unsigned long sp0) |
522 | { | 524 | { |
523 | tss->x86_tss.sp0 = thread->sp0; | 525 | this_cpu_write(cpu_tss.x86_tss.sp0, sp0); |
524 | #ifdef CONFIG_X86_32 | ||
525 | /* Only happens when SEP is enabled, no need to test "SEP"arately: */ | ||
526 | if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { | ||
527 | tss->x86_tss.ss1 = thread->sysenter_cs; | ||
528 | wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); | ||
529 | } | ||
530 | #endif | ||
531 | } | 526 | } |
532 | 527 | ||
533 | static inline void native_swapgs(void) | 528 | static inline void native_swapgs(void) |
@@ -547,15 +542,20 @@ static inline unsigned long current_top_of_stack(void) | |||
547 | #endif | 542 | #endif |
548 | } | 543 | } |
549 | 544 | ||
545 | static inline bool on_thread_stack(void) | ||
546 | { | ||
547 | return (unsigned long)(current_top_of_stack() - | ||
548 | current_stack_pointer) < THREAD_SIZE; | ||
549 | } | ||
550 | |||
550 | #ifdef CONFIG_PARAVIRT | 551 | #ifdef CONFIG_PARAVIRT |
551 | #include <asm/paravirt.h> | 552 | #include <asm/paravirt.h> |
552 | #else | 553 | #else |
553 | #define __cpuid native_cpuid | 554 | #define __cpuid native_cpuid |
554 | 555 | ||
555 | static inline void load_sp0(struct tss_struct *tss, | 556 | static inline void load_sp0(unsigned long sp0) |
556 | struct thread_struct *thread) | ||
557 | { | 557 | { |
558 | native_load_sp0(tss, thread); | 558 | native_load_sp0(sp0); |
559 | } | 559 | } |
560 | 560 | ||
561 | #define set_iopl_mask native_set_iopl_mask | 561 | #define set_iopl_mask native_set_iopl_mask |
@@ -804,6 +804,15 @@ static inline void spin_lock_prefetch(const void *x) | |||
804 | #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \ | 804 | #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \ |
805 | TOP_OF_KERNEL_STACK_PADDING) | 805 | TOP_OF_KERNEL_STACK_PADDING) |
806 | 806 | ||
807 | #define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1)) | ||
808 | |||
809 | #define task_pt_regs(task) \ | ||
810 | ({ \ | ||
811 | unsigned long __ptr = (unsigned long)task_stack_page(task); \ | ||
812 | __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \ | ||
813 | ((struct pt_regs *)__ptr) - 1; \ | ||
814 | }) | ||
815 | |||
807 | #ifdef CONFIG_X86_32 | 816 | #ifdef CONFIG_X86_32 |
808 | /* | 817 | /* |
809 | * User space process size: 3GB (default). | 818 | * User space process size: 3GB (default). |
@@ -823,23 +832,6 @@ static inline void spin_lock_prefetch(const void *x) | |||
823 | .addr_limit = KERNEL_DS, \ | 832 | .addr_limit = KERNEL_DS, \ |
824 | } | 833 | } |
825 | 834 | ||
826 | /* | ||
827 | * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack. | ||
828 | * This is necessary to guarantee that the entire "struct pt_regs" | ||
829 | * is accessible even if the CPU haven't stored the SS/ESP registers | ||
830 | * on the stack (interrupt gate does not save these registers | ||
831 | * when switching to the same priv ring). | ||
832 | * Therefore beware: accessing the ss/esp fields of the | ||
833 | * "struct pt_regs" is possible, but they may contain the | ||
834 | * completely wrong values. | ||
835 | */ | ||
836 | #define task_pt_regs(task) \ | ||
837 | ({ \ | ||
838 | unsigned long __ptr = (unsigned long)task_stack_page(task); \ | ||
839 | __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \ | ||
840 | ((struct pt_regs *)__ptr) - 1; \ | ||
841 | }) | ||
842 | |||
843 | #define KSTK_ESP(task) (task_pt_regs(task)->sp) | 835 | #define KSTK_ESP(task) (task_pt_regs(task)->sp) |
844 | 836 | ||
845 | #else | 837 | #else |
@@ -873,11 +865,9 @@ static inline void spin_lock_prefetch(const void *x) | |||
873 | #define STACK_TOP_MAX TASK_SIZE_MAX | 865 | #define STACK_TOP_MAX TASK_SIZE_MAX |
874 | 866 | ||
875 | #define INIT_THREAD { \ | 867 | #define INIT_THREAD { \ |
876 | .sp0 = TOP_OF_INIT_STACK, \ | ||
877 | .addr_limit = KERNEL_DS, \ | 868 | .addr_limit = KERNEL_DS, \ |
878 | } | 869 | } |
879 | 870 | ||
880 | #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) | ||
881 | extern unsigned long KSTK_ESP(struct task_struct *task); | 871 | extern unsigned long KSTK_ESP(struct task_struct *task); |
882 | 872 | ||
883 | #endif /* CONFIG_X86_64 */ | 873 | #endif /* CONFIG_X86_64 */ |
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index c0e3c45cf6ab..14131dd06b29 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
@@ -136,9 +136,9 @@ static inline int v8086_mode(struct pt_regs *regs) | |||
136 | #endif | 136 | #endif |
137 | } | 137 | } |
138 | 138 | ||
139 | #ifdef CONFIG_X86_64 | ||
140 | static inline bool user_64bit_mode(struct pt_regs *regs) | 139 | static inline bool user_64bit_mode(struct pt_regs *regs) |
141 | { | 140 | { |
141 | #ifdef CONFIG_X86_64 | ||
142 | #ifndef CONFIG_PARAVIRT | 142 | #ifndef CONFIG_PARAVIRT |
143 | /* | 143 | /* |
144 | * On non-paravirt systems, this is the only long mode CPL 3 | 144 | * On non-paravirt systems, this is the only long mode CPL 3 |
@@ -149,8 +149,12 @@ static inline bool user_64bit_mode(struct pt_regs *regs) | |||
149 | /* Headers are too twisted for this to go in paravirt.h. */ | 149 | /* Headers are too twisted for this to go in paravirt.h. */ |
150 | return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs; | 150 | return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs; |
151 | #endif | 151 | #endif |
152 | #else /* !CONFIG_X86_64 */ | ||
153 | return false; | ||
154 | #endif | ||
152 | } | 155 | } |
153 | 156 | ||
157 | #ifdef CONFIG_X86_64 | ||
154 | #define current_user_stack_pointer() current_pt_regs()->sp | 158 | #define current_user_stack_pointer() current_pt_regs()->sp |
155 | #define compat_user_stack_pointer() current_pt_regs()->sp | 159 | #define compat_user_stack_pointer() current_pt_regs()->sp |
156 | #endif | 160 | #endif |
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index d8f3a6ae9f6c..f91c365e57c3 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h | |||
@@ -29,7 +29,7 @@ cc_label: \ | |||
29 | #define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \ | 29 | #define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \ |
30 | do { \ | 30 | do { \ |
31 | bool c; \ | 31 | bool c; \ |
32 | asm volatile (fullop ";" CC_SET(cc) \ | 32 | asm volatile (fullop CC_SET(cc) \ |
33 | : [counter] "+m" (var), CC_OUT(cc) (c) \ | 33 | : [counter] "+m" (var), CC_OUT(cc) (c) \ |
34 | : __VA_ARGS__ : clobbers); \ | 34 | : __VA_ARGS__ : clobbers); \ |
35 | return c; \ | 35 | return c; \ |
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 899084b70412..8c6bd6863db9 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h | |||
@@ -2,6 +2,8 @@ | |||
2 | #ifndef _ASM_X86_SWITCH_TO_H | 2 | #ifndef _ASM_X86_SWITCH_TO_H |
3 | #define _ASM_X86_SWITCH_TO_H | 3 | #define _ASM_X86_SWITCH_TO_H |
4 | 4 | ||
5 | #include <linux/sched/task_stack.h> | ||
6 | |||
5 | struct task_struct; /* one of the stranger aspects of C forward declarations */ | 7 | struct task_struct; /* one of the stranger aspects of C forward declarations */ |
6 | 8 | ||
7 | struct task_struct *__switch_to_asm(struct task_struct *prev, | 9 | struct task_struct *__switch_to_asm(struct task_struct *prev, |
@@ -73,4 +75,26 @@ do { \ | |||
73 | ((last) = __switch_to_asm((prev), (next))); \ | 75 | ((last) = __switch_to_asm((prev), (next))); \ |
74 | } while (0) | 76 | } while (0) |
75 | 77 | ||
78 | #ifdef CONFIG_X86_32 | ||
79 | static inline void refresh_sysenter_cs(struct thread_struct *thread) | ||
80 | { | ||
81 | /* Only happens when SEP is enabled, no need to test "SEP"arately: */ | ||
82 | if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs)) | ||
83 | return; | ||
84 | |||
85 | this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs); | ||
86 | wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); | ||
87 | } | ||
88 | #endif | ||
89 | |||
90 | /* This is used when switching tasks or entering/exiting vm86 mode. */ | ||
91 | static inline void update_sp0(struct task_struct *task) | ||
92 | { | ||
93 | #ifdef CONFIG_X86_32 | ||
94 | load_sp0(task->thread.sp0); | ||
95 | #else | ||
96 | load_sp0(task_top_of_stack(task)); | ||
97 | #endif | ||
98 | } | ||
99 | |||
76 | #endif /* _ASM_X86_SWITCH_TO_H */ | 100 | #endif /* _ASM_X86_SWITCH_TO_H */ |
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 91dfcafe27a6..bad25bb80679 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h | |||
@@ -21,7 +21,7 @@ asmlinkage long sys_ioperm(unsigned long, unsigned long, int); | |||
21 | asmlinkage long sys_iopl(unsigned int); | 21 | asmlinkage long sys_iopl(unsigned int); |
22 | 22 | ||
23 | /* kernel/ldt.c */ | 23 | /* kernel/ldt.c */ |
24 | asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); | 24 | asmlinkage long sys_modify_ldt(int, void __user *, unsigned long); |
25 | 25 | ||
26 | /* kernel/signal.c */ | 26 | /* kernel/signal.c */ |
27 | asmlinkage long sys_rt_sigreturn(void); | 27 | asmlinkage long sys_rt_sigreturn(void); |
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h index fa60398bbc3a..069c04be1507 100644 --- a/arch/x86/include/asm/trace/fpu.h +++ b/arch/x86/include/asm/trace/fpu.h | |||
@@ -34,11 +34,6 @@ DECLARE_EVENT_CLASS(x86_fpu, | |||
34 | ) | 34 | ) |
35 | ); | 35 | ); |
36 | 36 | ||
37 | DEFINE_EVENT(x86_fpu, x86_fpu_state, | ||
38 | TP_PROTO(struct fpu *fpu), | ||
39 | TP_ARGS(fpu) | ||
40 | ); | ||
41 | |||
42 | DEFINE_EVENT(x86_fpu, x86_fpu_before_save, | 37 | DEFINE_EVENT(x86_fpu, x86_fpu_before_save, |
43 | TP_PROTO(struct fpu *fpu), | 38 | TP_PROTO(struct fpu *fpu), |
44 | TP_ARGS(fpu) | 39 | TP_ARGS(fpu) |
@@ -74,11 +69,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_activate_state, | |||
74 | TP_ARGS(fpu) | 69 | TP_ARGS(fpu) |
75 | ); | 70 | ); |
76 | 71 | ||
77 | DEFINE_EVENT(x86_fpu, x86_fpu_deactivate_state, | ||
78 | TP_PROTO(struct fpu *fpu), | ||
79 | TP_ARGS(fpu) | ||
80 | ); | ||
81 | |||
82 | DEFINE_EVENT(x86_fpu, x86_fpu_init_state, | 72 | DEFINE_EVENT(x86_fpu, x86_fpu_init_state, |
83 | TP_PROTO(struct fpu *fpu), | 73 | TP_PROTO(struct fpu *fpu), |
84 | TP_ARGS(fpu) | 74 | TP_ARGS(fpu) |
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index b0cced97a6ce..1fadd310ff68 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
@@ -38,9 +38,9 @@ asmlinkage void simd_coprocessor_error(void); | |||
38 | 38 | ||
39 | #if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) | 39 | #if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) |
40 | asmlinkage void xen_divide_error(void); | 40 | asmlinkage void xen_divide_error(void); |
41 | asmlinkage void xen_xennmi(void); | ||
41 | asmlinkage void xen_xendebug(void); | 42 | asmlinkage void xen_xendebug(void); |
42 | asmlinkage void xen_xenint3(void); | 43 | asmlinkage void xen_xenint3(void); |
43 | asmlinkage void xen_nmi(void); | ||
44 | asmlinkage void xen_overflow(void); | 44 | asmlinkage void xen_overflow(void); |
45 | asmlinkage void xen_bounds(void); | 45 | asmlinkage void xen_bounds(void); |
46 | asmlinkage void xen_invalid_op(void); | 46 | asmlinkage void xen_invalid_op(void); |
@@ -145,4 +145,22 @@ enum { | |||
145 | X86_TRAP_IRET = 32, /* 32, IRET Exception */ | 145 | X86_TRAP_IRET = 32, /* 32, IRET Exception */ |
146 | }; | 146 | }; |
147 | 147 | ||
148 | /* | ||
149 | * Page fault error code bits: | ||
150 | * | ||
151 | * bit 0 == 0: no page found 1: protection fault | ||
152 | * bit 1 == 0: read access 1: write access | ||
153 | * bit 2 == 0: kernel-mode access 1: user-mode access | ||
154 | * bit 3 == 1: use of reserved bit detected | ||
155 | * bit 4 == 1: fault was an instruction fetch | ||
156 | * bit 5 == 1: protection keys block access | ||
157 | */ | ||
158 | enum x86_pf_error_code { | ||
159 | X86_PF_PROT = 1 << 0, | ||
160 | X86_PF_WRITE = 1 << 1, | ||
161 | X86_PF_USER = 1 << 2, | ||
162 | X86_PF_RSVD = 1 << 3, | ||
163 | X86_PF_INSTR = 1 << 4, | ||
164 | X86_PF_PK = 1 << 5, | ||
165 | }; | ||
148 | #endif /* _ASM_X86_TRAPS_H */ | 166 | #endif /* _ASM_X86_TRAPS_H */ |
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h index 87adc0d38c4a..e9cc6fe1fc6f 100644 --- a/arch/x86/include/asm/unwind.h +++ b/arch/x86/include/asm/unwind.h | |||
@@ -13,11 +13,11 @@ struct unwind_state { | |||
13 | struct task_struct *task; | 13 | struct task_struct *task; |
14 | int graph_idx; | 14 | int graph_idx; |
15 | bool error; | 15 | bool error; |
16 | #if defined(CONFIG_ORC_UNWINDER) | 16 | #if defined(CONFIG_UNWINDER_ORC) |
17 | bool signal, full_regs; | 17 | bool signal, full_regs; |
18 | unsigned long sp, bp, ip; | 18 | unsigned long sp, bp, ip; |
19 | struct pt_regs *regs; | 19 | struct pt_regs *regs; |
20 | #elif defined(CONFIG_FRAME_POINTER_UNWINDER) | 20 | #elif defined(CONFIG_UNWINDER_FRAME_POINTER) |
21 | bool got_irq; | 21 | bool got_irq; |
22 | unsigned long *bp, *orig_sp, ip; | 22 | unsigned long *bp, *orig_sp, ip; |
23 | struct pt_regs *regs; | 23 | struct pt_regs *regs; |
@@ -51,7 +51,7 @@ void unwind_start(struct unwind_state *state, struct task_struct *task, | |||
51 | __unwind_start(state, task, regs, first_frame); | 51 | __unwind_start(state, task, regs, first_frame); |
52 | } | 52 | } |
53 | 53 | ||
54 | #if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER_UNWINDER) | 54 | #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) |
55 | static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) | 55 | static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) |
56 | { | 56 | { |
57 | if (unwind_done(state)) | 57 | if (unwind_done(state)) |
@@ -66,7 +66,7 @@ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) | |||
66 | } | 66 | } |
67 | #endif | 67 | #endif |
68 | 68 | ||
69 | #ifdef CONFIG_ORC_UNWINDER | 69 | #ifdef CONFIG_UNWINDER_ORC |
70 | void unwind_init(void); | 70 | void unwind_init(void); |
71 | void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, | 71 | void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, |
72 | void *orc, size_t orc_size); | 72 | void *orc, size_t orc_size); |
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h index 6f3355399665..53b4ca55ebb6 100644 --- a/arch/x86/include/uapi/asm/processor-flags.h +++ b/arch/x86/include/uapi/asm/processor-flags.h | |||
@@ -152,5 +152,8 @@ | |||
152 | #define CX86_ARR_BASE 0xc4 | 152 | #define CX86_ARR_BASE 0xc4 |
153 | #define CX86_RCR_BASE 0xdc | 153 | #define CX86_RCR_BASE 0xdc |
154 | 154 | ||
155 | #define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ | ||
156 | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ | ||
157 | X86_CR0_PG) | ||
155 | 158 | ||
156 | #endif /* _UAPI_ASM_X86_PROCESSOR_FLAGS_H */ | 159 | #endif /* _UAPI_ASM_X86_PROCESSOR_FLAGS_H */ |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5f70044340ff..d12da41f72da 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -27,7 +27,6 @@ KASAN_SANITIZE_dumpstack.o := n | |||
27 | KASAN_SANITIZE_dumpstack_$(BITS).o := n | 27 | KASAN_SANITIZE_dumpstack_$(BITS).o := n |
28 | KASAN_SANITIZE_stacktrace.o := n | 28 | KASAN_SANITIZE_stacktrace.o := n |
29 | 29 | ||
30 | OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y | ||
31 | OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y | 30 | OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y |
32 | OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y | 31 | OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y |
33 | OBJECT_FILES_NON_STANDARD_test_nx.o := y | 32 | OBJECT_FILES_NON_STANDARD_test_nx.o := y |
@@ -128,9 +127,9 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o | |||
128 | obj-$(CONFIG_TRACING) += tracepoint.o | 127 | obj-$(CONFIG_TRACING) += tracepoint.o |
129 | obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o | 128 | obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o |
130 | 129 | ||
131 | obj-$(CONFIG_ORC_UNWINDER) += unwind_orc.o | 130 | obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o |
132 | obj-$(CONFIG_FRAME_POINTER_UNWINDER) += unwind_frame.o | 131 | obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o |
133 | obj-$(CONFIG_GUESS_UNWINDER) += unwind_guess.o | 132 | obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o |
134 | 133 | ||
135 | ### | 134 | ### |
136 | # 64 bit specific files | 135 | # 64 bit specific files |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index c60922a66385..90cb82dbba57 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -23,6 +23,7 @@ obj-y += rdrand.o | |||
23 | obj-y += match.o | 23 | obj-y += match.o |
24 | obj-y += bugs.o | 24 | obj-y += bugs.o |
25 | obj-$(CONFIG_CPU_FREQ) += aperfmperf.o | 25 | obj-$(CONFIG_CPU_FREQ) += aperfmperf.o |
26 | obj-y += cpuid-deps.o | ||
26 | 27 | ||
27 | obj-$(CONFIG_PROC_FS) += proc.o | 28 | obj-$(CONFIG_PROC_FS) += proc.o |
28 | obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o | 29 | obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c9176bae7fd8..cdf79ab628c2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1301,18 +1301,16 @@ void print_cpu_info(struct cpuinfo_x86 *c) | |||
1301 | pr_cont(")\n"); | 1301 | pr_cont(")\n"); |
1302 | } | 1302 | } |
1303 | 1303 | ||
1304 | static __init int setup_disablecpuid(char *arg) | 1304 | /* |
1305 | * clearcpuid= was already parsed in fpu__init_parse_early_param. | ||
1306 | * But we need to keep a dummy __setup around otherwise it would | ||
1307 | * show up as an environment variable for init. | ||
1308 | */ | ||
1309 | static __init int setup_clearcpuid(char *arg) | ||
1305 | { | 1310 | { |
1306 | int bit; | ||
1307 | |||
1308 | if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32) | ||
1309 | setup_clear_cpu_cap(bit); | ||
1310 | else | ||
1311 | return 0; | ||
1312 | |||
1313 | return 1; | 1311 | return 1; |
1314 | } | 1312 | } |
1315 | __setup("clearcpuid=", setup_disablecpuid); | 1313 | __setup("clearcpuid=", setup_clearcpuid); |
1316 | 1314 | ||
1317 | #ifdef CONFIG_X86_64 | 1315 | #ifdef CONFIG_X86_64 |
1318 | DEFINE_PER_CPU_FIRST(union irq_stack_union, | 1316 | DEFINE_PER_CPU_FIRST(union irq_stack_union, |
@@ -1572,9 +1570,13 @@ void cpu_init(void) | |||
1572 | initialize_tlbstate_and_flush(); | 1570 | initialize_tlbstate_and_flush(); |
1573 | enter_lazy_tlb(&init_mm, me); | 1571 | enter_lazy_tlb(&init_mm, me); |
1574 | 1572 | ||
1575 | load_sp0(t, ¤t->thread); | 1573 | /* |
1574 | * Initialize the TSS. Don't bother initializing sp0, as the initial | ||
1575 | * task never enters user mode. | ||
1576 | */ | ||
1576 | set_tss_desc(cpu, t); | 1577 | set_tss_desc(cpu, t); |
1577 | load_TR_desc(); | 1578 | load_TR_desc(); |
1579 | |||
1578 | load_mm_ldt(&init_mm); | 1580 | load_mm_ldt(&init_mm); |
1579 | 1581 | ||
1580 | clear_all_debug_regs(); | 1582 | clear_all_debug_regs(); |
@@ -1596,7 +1598,6 @@ void cpu_init(void) | |||
1596 | int cpu = smp_processor_id(); | 1598 | int cpu = smp_processor_id(); |
1597 | struct task_struct *curr = current; | 1599 | struct task_struct *curr = current; |
1598 | struct tss_struct *t = &per_cpu(cpu_tss, cpu); | 1600 | struct tss_struct *t = &per_cpu(cpu_tss, cpu); |
1599 | struct thread_struct *thread = &curr->thread; | ||
1600 | 1601 | ||
1601 | wait_for_master_cpu(cpu); | 1602 | wait_for_master_cpu(cpu); |
1602 | 1603 | ||
@@ -1627,9 +1628,13 @@ void cpu_init(void) | |||
1627 | initialize_tlbstate_and_flush(); | 1628 | initialize_tlbstate_and_flush(); |
1628 | enter_lazy_tlb(&init_mm, curr); | 1629 | enter_lazy_tlb(&init_mm, curr); |
1629 | 1630 | ||
1630 | load_sp0(t, thread); | 1631 | /* |
1632 | * Initialize the TSS. Don't bother initializing sp0, as the initial | ||
1633 | * task never enters user mode. | ||
1634 | */ | ||
1631 | set_tss_desc(cpu, t); | 1635 | set_tss_desc(cpu, t); |
1632 | load_TR_desc(); | 1636 | load_TR_desc(); |
1637 | |||
1633 | load_mm_ldt(&init_mm); | 1638 | load_mm_ldt(&init_mm); |
1634 | 1639 | ||
1635 | t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); | 1640 | t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); |
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c new file mode 100644 index 000000000000..c21f22d836ad --- /dev/null +++ b/arch/x86/kernel/cpu/cpuid-deps.c | |||
@@ -0,0 +1,125 @@ | |||
1 | /* Declare dependencies between CPUIDs */ | ||
2 | #include <linux/kernel.h> | ||
3 | #include <linux/init.h> | ||
4 | #include <linux/module.h> | ||
5 | #include <asm/cpufeature.h> | ||
6 | |||
7 | struct cpuid_dep { | ||
8 | unsigned int feature; | ||
9 | unsigned int depends; | ||
10 | }; | ||
11 | |||
12 | /* | ||
13 | * Table of CPUID features that depend on others. | ||
14 | * | ||
15 | * This only includes dependencies that can be usefully disabled, not | ||
16 | * features part of the base set (like FPU). | ||
17 | * | ||
18 | * Note this all is not __init / __initdata because it can be | ||
19 | * called from cpu hotplug. It shouldn't do anything in this case, | ||
20 | * but it's difficult to tell that to the init reference checker. | ||
21 | */ | ||
22 | const static struct cpuid_dep cpuid_deps[] = { | ||
23 | { X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE }, | ||
24 | { X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE }, | ||
25 | { X86_FEATURE_XSAVES, X86_FEATURE_XSAVE }, | ||
26 | { X86_FEATURE_AVX, X86_FEATURE_XSAVE }, | ||
27 | { X86_FEATURE_PKU, X86_FEATURE_XSAVE }, | ||
28 | { X86_FEATURE_MPX, X86_FEATURE_XSAVE }, | ||
29 | { X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE }, | ||
30 | { X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR }, | ||
31 | { X86_FEATURE_XMM, X86_FEATURE_FXSR }, | ||
32 | { X86_FEATURE_XMM2, X86_FEATURE_XMM }, | ||
33 | { X86_FEATURE_XMM3, X86_FEATURE_XMM2 }, | ||
34 | { X86_FEATURE_XMM4_1, X86_FEATURE_XMM2 }, | ||
35 | { X86_FEATURE_XMM4_2, X86_FEATURE_XMM2 }, | ||
36 | { X86_FEATURE_XMM3, X86_FEATURE_XMM2 }, | ||
37 | { X86_FEATURE_PCLMULQDQ, X86_FEATURE_XMM2 }, | ||
38 | { X86_FEATURE_SSSE3, X86_FEATURE_XMM2, }, | ||
39 | { X86_FEATURE_F16C, X86_FEATURE_XMM2, }, | ||
40 | { X86_FEATURE_AES, X86_FEATURE_XMM2 }, | ||
41 | { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 }, | ||
42 | { X86_FEATURE_FMA, X86_FEATURE_AVX }, | ||
43 | { X86_FEATURE_AVX2, X86_FEATURE_AVX, }, | ||
44 | { X86_FEATURE_AVX512F, X86_FEATURE_AVX, }, | ||
45 | { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F }, | ||
46 | { X86_FEATURE_AVX512PF, X86_FEATURE_AVX512F }, | ||
47 | { X86_FEATURE_AVX512ER, X86_FEATURE_AVX512F }, | ||
48 | { X86_FEATURE_AVX512CD, X86_FEATURE_AVX512F }, | ||
49 | { X86_FEATURE_AVX512DQ, X86_FEATURE_AVX512F }, | ||
50 | { X86_FEATURE_AVX512BW, X86_FEATURE_AVX512F }, | ||
51 | { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F }, | ||
52 | { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F }, | ||
53 | { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL }, | ||
54 | { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL }, | ||
55 | { X86_FEATURE_VAES, X86_FEATURE_AVX512VL }, | ||
56 | { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL }, | ||
57 | { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL }, | ||
58 | { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL }, | ||
59 | { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F }, | ||
60 | { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F }, | ||
61 | { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F }, | ||
62 | {} | ||
63 | }; | ||
64 | |||
65 | static inline void __clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit) | ||
66 | { | ||
67 | clear_bit32(bit, c->x86_capability); | ||
68 | } | ||
69 | |||
70 | static inline void __setup_clear_cpu_cap(unsigned int bit) | ||
71 | { | ||
72 | clear_cpu_cap(&boot_cpu_data, bit); | ||
73 | set_bit32(bit, cpu_caps_cleared); | ||
74 | } | ||
75 | |||
76 | static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature) | ||
77 | { | ||
78 | if (!c) | ||
79 | __setup_clear_cpu_cap(feature); | ||
80 | else | ||
81 | __clear_cpu_cap(c, feature); | ||
82 | } | ||
83 | |||
84 | /* Take the capabilities and the BUG bits into account */ | ||
85 | #define MAX_FEATURE_BITS ((NCAPINTS + NBUGINTS) * sizeof(u32) * 8) | ||
86 | |||
87 | static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature) | ||
88 | { | ||
89 | DECLARE_BITMAP(disable, MAX_FEATURE_BITS); | ||
90 | const struct cpuid_dep *d; | ||
91 | bool changed; | ||
92 | |||
93 | if (WARN_ON(feature >= MAX_FEATURE_BITS)) | ||
94 | return; | ||
95 | |||
96 | clear_feature(c, feature); | ||
97 | |||
98 | /* Collect all features to disable, handling dependencies */ | ||
99 | memset(disable, 0, sizeof(disable)); | ||
100 | __set_bit(feature, disable); | ||
101 | |||
102 | /* Loop until we get a stable state. */ | ||
103 | do { | ||
104 | changed = false; | ||
105 | for (d = cpuid_deps; d->feature; d++) { | ||
106 | if (!test_bit(d->depends, disable)) | ||
107 | continue; | ||
108 | if (__test_and_set_bit(d->feature, disable)) | ||
109 | continue; | ||
110 | |||
111 | changed = true; | ||
112 | clear_feature(c, d->feature); | ||
113 | } | ||
114 | } while (changed); | ||
115 | } | ||
116 | |||
117 | void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature) | ||
118 | { | ||
119 | do_clear_cpu_cap(c, feature); | ||
120 | } | ||
121 | |||
122 | void setup_clear_cpu_cap(unsigned int feature) | ||
123 | { | ||
124 | do_clear_cpu_cap(NULL, feature); | ||
125 | } | ||
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 7affb7e3d9a5..6abd83572b01 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c | |||
@@ -249,6 +249,10 @@ static void __init fpu__init_system_ctx_switch(void) | |||
249 | */ | 249 | */ |
250 | static void __init fpu__init_parse_early_param(void) | 250 | static void __init fpu__init_parse_early_param(void) |
251 | { | 251 | { |
252 | char arg[32]; | ||
253 | char *argptr = arg; | ||
254 | int bit; | ||
255 | |||
252 | if (cmdline_find_option_bool(boot_command_line, "no387")) | 256 | if (cmdline_find_option_bool(boot_command_line, "no387")) |
253 | setup_clear_cpu_cap(X86_FEATURE_FPU); | 257 | setup_clear_cpu_cap(X86_FEATURE_FPU); |
254 | 258 | ||
@@ -266,6 +270,13 @@ static void __init fpu__init_parse_early_param(void) | |||
266 | 270 | ||
267 | if (cmdline_find_option_bool(boot_command_line, "noxsaves")) | 271 | if (cmdline_find_option_bool(boot_command_line, "noxsaves")) |
268 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); | 272 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); |
273 | |||
274 | if (cmdline_find_option(boot_command_line, "clearcpuid", arg, | ||
275 | sizeof(arg)) && | ||
276 | get_option(&argptr, &bit) && | ||
277 | bit >= 0 && | ||
278 | bit < NCAPINTS * 32) | ||
279 | setup_clear_cpu_cap(bit); | ||
269 | } | 280 | } |
270 | 281 | ||
271 | /* | 282 | /* |
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index f1d5476c9022..87a57b7642d3 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <asm/fpu/xstate.h> | 15 | #include <asm/fpu/xstate.h> |
16 | 16 | ||
17 | #include <asm/tlbflush.h> | 17 | #include <asm/tlbflush.h> |
18 | #include <asm/cpufeature.h> | ||
18 | 19 | ||
19 | /* | 20 | /* |
20 | * Although we spell it out in here, the Processor Trace | 21 | * Although we spell it out in here, the Processor Trace |
@@ -36,6 +37,19 @@ static const char *xfeature_names[] = | |||
36 | "unknown xstate feature" , | 37 | "unknown xstate feature" , |
37 | }; | 38 | }; |
38 | 39 | ||
40 | static short xsave_cpuid_features[] __initdata = { | ||
41 | X86_FEATURE_FPU, | ||
42 | X86_FEATURE_XMM, | ||
43 | X86_FEATURE_AVX, | ||
44 | X86_FEATURE_MPX, | ||
45 | X86_FEATURE_MPX, | ||
46 | X86_FEATURE_AVX512F, | ||
47 | X86_FEATURE_AVX512F, | ||
48 | X86_FEATURE_AVX512F, | ||
49 | X86_FEATURE_INTEL_PT, | ||
50 | X86_FEATURE_PKU, | ||
51 | }; | ||
52 | |||
39 | /* | 53 | /* |
40 | * Mask of xstate features supported by the CPU and the kernel: | 54 | * Mask of xstate features supported by the CPU and the kernel: |
41 | */ | 55 | */ |
@@ -59,26 +73,6 @@ unsigned int fpu_user_xstate_size; | |||
59 | void fpu__xstate_clear_all_cpu_caps(void) | 73 | void fpu__xstate_clear_all_cpu_caps(void) |
60 | { | 74 | { |
61 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); | 75 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); |
62 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | ||
63 | setup_clear_cpu_cap(X86_FEATURE_XSAVEC); | ||
64 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); | ||
65 | setup_clear_cpu_cap(X86_FEATURE_AVX); | ||
66 | setup_clear_cpu_cap(X86_FEATURE_AVX2); | ||
67 | setup_clear_cpu_cap(X86_FEATURE_AVX512F); | ||
68 | setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA); | ||
69 | setup_clear_cpu_cap(X86_FEATURE_AVX512PF); | ||
70 | setup_clear_cpu_cap(X86_FEATURE_AVX512ER); | ||
71 | setup_clear_cpu_cap(X86_FEATURE_AVX512CD); | ||
72 | setup_clear_cpu_cap(X86_FEATURE_AVX512DQ); | ||
73 | setup_clear_cpu_cap(X86_FEATURE_AVX512BW); | ||
74 | setup_clear_cpu_cap(X86_FEATURE_AVX512VL); | ||
75 | setup_clear_cpu_cap(X86_FEATURE_MPX); | ||
76 | setup_clear_cpu_cap(X86_FEATURE_XGETBV1); | ||
77 | setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI); | ||
78 | setup_clear_cpu_cap(X86_FEATURE_PKU); | ||
79 | setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW); | ||
80 | setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS); | ||
81 | setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ); | ||
82 | } | 76 | } |
83 | 77 | ||
84 | /* | 78 | /* |
@@ -726,6 +720,7 @@ void __init fpu__init_system_xstate(void) | |||
726 | unsigned int eax, ebx, ecx, edx; | 720 | unsigned int eax, ebx, ecx, edx; |
727 | static int on_boot_cpu __initdata = 1; | 721 | static int on_boot_cpu __initdata = 1; |
728 | int err; | 722 | int err; |
723 | int i; | ||
729 | 724 | ||
730 | WARN_ON_FPU(!on_boot_cpu); | 725 | WARN_ON_FPU(!on_boot_cpu); |
731 | on_boot_cpu = 0; | 726 | on_boot_cpu = 0; |
@@ -759,6 +754,14 @@ void __init fpu__init_system_xstate(void) | |||
759 | goto out_disable; | 754 | goto out_disable; |
760 | } | 755 | } |
761 | 756 | ||
757 | /* | ||
758 | * Clear XSAVE features that are disabled in the normal CPUID. | ||
759 | */ | ||
760 | for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) { | ||
761 | if (!boot_cpu_has(xsave_cpuid_features[i])) | ||
762 | xfeatures_mask &= ~BIT(i); | ||
763 | } | ||
764 | |||
762 | xfeatures_mask &= fpu__get_supported_xfeatures_mask(); | 765 | xfeatures_mask &= fpu__get_supported_xfeatures_mask(); |
763 | 766 | ||
764 | /* Enable xstate instructions to be able to continue with initialization: */ | 767 | /* Enable xstate instructions to be able to continue with initialization: */ |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index f1d528bb66a6..c29020907886 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -212,9 +212,6 @@ ENTRY(startup_32_smp) | |||
212 | #endif | 212 | #endif |
213 | 213 | ||
214 | .Ldefault_entry: | 214 | .Ldefault_entry: |
215 | #define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ | ||
216 | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ | ||
217 | X86_CR0_PG) | ||
218 | movl $(CR0_STATE & ~X86_CR0_PG),%eax | 215 | movl $(CR0_STATE & ~X86_CR0_PG),%eax |
219 | movl %eax,%cr0 | 216 | movl %eax,%cr0 |
220 | 217 | ||
@@ -402,7 +399,7 @@ ENTRY(early_idt_handler_array) | |||
402 | # 24(%rsp) error code | 399 | # 24(%rsp) error code |
403 | i = 0 | 400 | i = 0 |
404 | .rept NUM_EXCEPTION_VECTORS | 401 | .rept NUM_EXCEPTION_VECTORS |
405 | .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1 | 402 | .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0 |
406 | pushl $0 # Dummy error code, to make stack frame uniform | 403 | pushl $0 # Dummy error code, to make stack frame uniform |
407 | .endif | 404 | .endif |
408 | pushl $i # 20(%esp) Vector number | 405 | pushl $i # 20(%esp) Vector number |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 6dde3f3fc1f8..fd58835d8f9b 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -50,6 +50,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map) | |||
50 | .code64 | 50 | .code64 |
51 | .globl startup_64 | 51 | .globl startup_64 |
52 | startup_64: | 52 | startup_64: |
53 | UNWIND_HINT_EMPTY | ||
53 | /* | 54 | /* |
54 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, | 55 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, |
55 | * and someone has loaded an identity mapped page table | 56 | * and someone has loaded an identity mapped page table |
@@ -89,6 +90,7 @@ startup_64: | |||
89 | addq $(early_top_pgt - __START_KERNEL_map), %rax | 90 | addq $(early_top_pgt - __START_KERNEL_map), %rax |
90 | jmp 1f | 91 | jmp 1f |
91 | ENTRY(secondary_startup_64) | 92 | ENTRY(secondary_startup_64) |
93 | UNWIND_HINT_EMPTY | ||
92 | /* | 94 | /* |
93 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, | 95 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, |
94 | * and someone has loaded a mapped page table. | 96 | * and someone has loaded a mapped page table. |
@@ -133,6 +135,7 @@ ENTRY(secondary_startup_64) | |||
133 | movq $1f, %rax | 135 | movq $1f, %rax |
134 | jmp *%rax | 136 | jmp *%rax |
135 | 1: | 137 | 1: |
138 | UNWIND_HINT_EMPTY | ||
136 | 139 | ||
137 | /* Check if nx is implemented */ | 140 | /* Check if nx is implemented */ |
138 | movl $0x80000001, %eax | 141 | movl $0x80000001, %eax |
@@ -150,9 +153,6 @@ ENTRY(secondary_startup_64) | |||
150 | 1: wrmsr /* Make changes effective */ | 153 | 1: wrmsr /* Make changes effective */ |
151 | 154 | ||
152 | /* Setup cr0 */ | 155 | /* Setup cr0 */ |
153 | #define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ | ||
154 | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ | ||
155 | X86_CR0_PG) | ||
156 | movl $CR0_STATE, %eax | 156 | movl $CR0_STATE, %eax |
157 | /* Make changes effective */ | 157 | /* Make changes effective */ |
158 | movq %rax, %cr0 | 158 | movq %rax, %cr0 |
@@ -235,7 +235,7 @@ ENTRY(secondary_startup_64) | |||
235 | pushq %rax # target address in negative space | 235 | pushq %rax # target address in negative space |
236 | lretq | 236 | lretq |
237 | .Lafter_lret: | 237 | .Lafter_lret: |
238 | ENDPROC(secondary_startup_64) | 238 | END(secondary_startup_64) |
239 | 239 | ||
240 | #include "verify_cpu.S" | 240 | #include "verify_cpu.S" |
241 | 241 | ||
@@ -247,6 +247,7 @@ ENDPROC(secondary_startup_64) | |||
247 | */ | 247 | */ |
248 | ENTRY(start_cpu0) | 248 | ENTRY(start_cpu0) |
249 | movq initial_stack(%rip), %rsp | 249 | movq initial_stack(%rip), %rsp |
250 | UNWIND_HINT_EMPTY | ||
250 | jmp .Ljump_to_C_code | 251 | jmp .Ljump_to_C_code |
251 | ENDPROC(start_cpu0) | 252 | ENDPROC(start_cpu0) |
252 | #endif | 253 | #endif |
@@ -266,26 +267,24 @@ ENDPROC(start_cpu0) | |||
266 | .quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS | 267 | .quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS |
267 | __FINITDATA | 268 | __FINITDATA |
268 | 269 | ||
269 | bad_address: | ||
270 | jmp bad_address | ||
271 | |||
272 | __INIT | 270 | __INIT |
273 | ENTRY(early_idt_handler_array) | 271 | ENTRY(early_idt_handler_array) |
274 | # 104(%rsp) %rflags | ||
275 | # 96(%rsp) %cs | ||
276 | # 88(%rsp) %rip | ||
277 | # 80(%rsp) error code | ||
278 | i = 0 | 272 | i = 0 |
279 | .rept NUM_EXCEPTION_VECTORS | 273 | .rept NUM_EXCEPTION_VECTORS |
280 | .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1 | 274 | .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0 |
281 | pushq $0 # Dummy error code, to make stack frame uniform | 275 | UNWIND_HINT_IRET_REGS |
276 | pushq $0 # Dummy error code, to make stack frame uniform | ||
277 | .else | ||
278 | UNWIND_HINT_IRET_REGS offset=8 | ||
282 | .endif | 279 | .endif |
283 | pushq $i # 72(%rsp) Vector number | 280 | pushq $i # 72(%rsp) Vector number |
284 | jmp early_idt_handler_common | 281 | jmp early_idt_handler_common |
282 | UNWIND_HINT_IRET_REGS | ||
285 | i = i + 1 | 283 | i = i + 1 |
286 | .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc | 284 | .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc |
287 | .endr | 285 | .endr |
288 | ENDPROC(early_idt_handler_array) | 286 | UNWIND_HINT_IRET_REGS offset=16 |
287 | END(early_idt_handler_array) | ||
289 | 288 | ||
290 | early_idt_handler_common: | 289 | early_idt_handler_common: |
291 | /* | 290 | /* |
@@ -313,6 +312,7 @@ early_idt_handler_common: | |||
313 | pushq %r13 /* pt_regs->r13 */ | 312 | pushq %r13 /* pt_regs->r13 */ |
314 | pushq %r14 /* pt_regs->r14 */ | 313 | pushq %r14 /* pt_regs->r14 */ |
315 | pushq %r15 /* pt_regs->r15 */ | 314 | pushq %r15 /* pt_regs->r15 */ |
315 | UNWIND_HINT_REGS | ||
316 | 316 | ||
317 | cmpq $14,%rsi /* Page fault? */ | 317 | cmpq $14,%rsi /* Page fault? */ |
318 | jnz 10f | 318 | jnz 10f |
@@ -327,8 +327,8 @@ early_idt_handler_common: | |||
327 | 327 | ||
328 | 20: | 328 | 20: |
329 | decl early_recursion_flag(%rip) | 329 | decl early_recursion_flag(%rip) |
330 | jmp restore_regs_and_iret | 330 | jmp restore_regs_and_return_to_kernel |
331 | ENDPROC(early_idt_handler_common) | 331 | END(early_idt_handler_common) |
332 | 332 | ||
333 | __INITDATA | 333 | __INITDATA |
334 | 334 | ||
@@ -435,7 +435,7 @@ ENTRY(phys_base) | |||
435 | EXPORT_SYMBOL(phys_base) | 435 | EXPORT_SYMBOL(phys_base) |
436 | 436 | ||
437 | #include "../../x86/xen/xen-head.S" | 437 | #include "../../x86/xen/xen-head.S" |
438 | 438 | ||
439 | __PAGE_ALIGNED_BSS | 439 | __PAGE_ALIGNED_BSS |
440 | NEXT_PAGE(empty_zero_page) | 440 | NEXT_PAGE(empty_zero_page) |
441 | .skip PAGE_SIZE | 441 | .skip PAGE_SIZE |
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 4d17bacf4030..ae5615b03def 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/string.h> | 13 | #include <linux/string.h> |
14 | #include <linux/mm.h> | 14 | #include <linux/mm.h> |
15 | #include <linux/smp.h> | 15 | #include <linux/smp.h> |
16 | #include <linux/syscalls.h> | ||
16 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
17 | #include <linux/vmalloc.h> | 18 | #include <linux/vmalloc.h> |
18 | #include <linux/uaccess.h> | 19 | #include <linux/uaccess.h> |
@@ -295,8 +296,8 @@ out: | |||
295 | return error; | 296 | return error; |
296 | } | 297 | } |
297 | 298 | ||
298 | asmlinkage int sys_modify_ldt(int func, void __user *ptr, | 299 | SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr , |
299 | unsigned long bytecount) | 300 | unsigned long , bytecount) |
300 | { | 301 | { |
301 | int ret = -ENOSYS; | 302 | int ret = -ENOSYS; |
302 | 303 | ||
@@ -314,5 +315,14 @@ asmlinkage int sys_modify_ldt(int func, void __user *ptr, | |||
314 | ret = write_ldt(ptr, bytecount, 0); | 315 | ret = write_ldt(ptr, bytecount, 0); |
315 | break; | 316 | break; |
316 | } | 317 | } |
317 | return ret; | 318 | /* |
319 | * The SYSCALL_DEFINE() macros give us an 'unsigned long' | ||
320 | * return type, but tht ABI for sys_modify_ldt() expects | ||
321 | * 'int'. This cast gives us an int-sized value in %rax | ||
322 | * for the return code. The 'unsigned' is necessary so | ||
323 | * the compiler does not try to sign-extend the negative | ||
324 | * return codes into the high half of the register when | ||
325 | * taking the value from int->long. | ||
326 | */ | ||
327 | return (unsigned int)ret; | ||
318 | } | 328 | } |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c67685337c5a..97fb3e5737f5 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -49,7 +49,13 @@ | |||
49 | */ | 49 | */ |
50 | __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { | 50 | __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { |
51 | .x86_tss = { | 51 | .x86_tss = { |
52 | .sp0 = TOP_OF_INIT_STACK, | 52 | /* |
53 | * .sp0 is only used when entering ring 0 from a lower | ||
54 | * privilege level. Since the init task never runs anything | ||
55 | * but ring 0 code, there is no need for a valid value here. | ||
56 | * Poison it. | ||
57 | */ | ||
58 | .sp0 = (1UL << (BITS_PER_LONG-1)) + 1, | ||
53 | #ifdef CONFIG_X86_32 | 59 | #ifdef CONFIG_X86_32 |
54 | .ss0 = __KERNEL_DS, | 60 | .ss0 = __KERNEL_DS, |
55 | .ss1 = __KERNEL_CS, | 61 | .ss1 = __KERNEL_CS, |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 11966251cd42..45bf0c5f93e1 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -284,9 +284,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
284 | 284 | ||
285 | /* | 285 | /* |
286 | * Reload esp0 and cpu_current_top_of_stack. This changes | 286 | * Reload esp0 and cpu_current_top_of_stack. This changes |
287 | * current_thread_info(). | 287 | * current_thread_info(). Refresh the SYSENTER configuration in |
288 | * case prev or next is vm86. | ||
288 | */ | 289 | */ |
289 | load_sp0(tss, next); | 290 | update_sp0(next_p); |
291 | refresh_sysenter_cs(next); | ||
290 | this_cpu_write(cpu_current_top_of_stack, | 292 | this_cpu_write(cpu_current_top_of_stack, |
291 | (unsigned long)task_stack_page(next_p) + | 293 | (unsigned long)task_stack_page(next_p) + |
292 | THREAD_SIZE); | 294 | THREAD_SIZE); |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 302e7b2572d1..eeeb34f85c25 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -274,7 +274,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, | |||
274 | struct inactive_task_frame *frame; | 274 | struct inactive_task_frame *frame; |
275 | struct task_struct *me = current; | 275 | struct task_struct *me = current; |
276 | 276 | ||
277 | p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; | ||
278 | childregs = task_pt_regs(p); | 277 | childregs = task_pt_regs(p); |
279 | fork_frame = container_of(childregs, struct fork_frame, regs); | 278 | fork_frame = container_of(childregs, struct fork_frame, regs); |
280 | frame = &fork_frame->frame; | 279 | frame = &fork_frame->frame; |
@@ -464,8 +463,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
464 | */ | 463 | */ |
465 | this_cpu_write(current_task, next_p); | 464 | this_cpu_write(current_task, next_p); |
466 | 465 | ||
467 | /* Reload esp0 and ss1. This changes current_thread_info(). */ | 466 | /* Reload sp0. */ |
468 | load_sp0(tss, next); | 467 | update_sp0(next_p); |
469 | 468 | ||
470 | /* | 469 | /* |
471 | * Now maybe reload the debug registers and handle I/O bitmaps | 470 | * Now maybe reload the debug registers and handle I/O bitmaps |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 65a0ccdc3050..d56c1d209283 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -962,8 +962,7 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle) | |||
962 | #ifdef CONFIG_X86_32 | 962 | #ifdef CONFIG_X86_32 |
963 | /* Stack for startup_32 can be just as for start_secondary onwards */ | 963 | /* Stack for startup_32 can be just as for start_secondary onwards */ |
964 | irq_ctx_init(cpu); | 964 | irq_ctx_init(cpu); |
965 | per_cpu(cpu_current_top_of_stack, cpu) = | 965 | per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle); |
966 | (unsigned long)task_stack_page(idle) + THREAD_SIZE; | ||
967 | #else | 966 | #else |
968 | initial_gs = per_cpu_offset(cpu); | 967 | initial_gs = per_cpu_offset(cpu); |
969 | #endif | 968 | #endif |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5a6b8f809792..d366adfc61da 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -141,8 +141,7 @@ void ist_begin_non_atomic(struct pt_regs *regs) | |||
141 | * will catch asm bugs and any attempt to use ist_preempt_enable | 141 | * will catch asm bugs and any attempt to use ist_preempt_enable |
142 | * from double_fault. | 142 | * from double_fault. |
143 | */ | 143 | */ |
144 | BUG_ON((unsigned long)(current_top_of_stack() - | 144 | BUG_ON(!on_thread_stack()); |
145 | current_stack_pointer) >= THREAD_SIZE); | ||
146 | 145 | ||
147 | preempt_enable_no_resched(); | 146 | preempt_enable_no_resched(); |
148 | } | 147 | } |
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index 014ea59aa153..3d3c2f71f617 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S | |||
@@ -33,7 +33,7 @@ | |||
33 | #include <asm/cpufeatures.h> | 33 | #include <asm/cpufeatures.h> |
34 | #include <asm/msr-index.h> | 34 | #include <asm/msr-index.h> |
35 | 35 | ||
36 | verify_cpu: | 36 | ENTRY(verify_cpu) |
37 | pushf # Save caller passed flags | 37 | pushf # Save caller passed flags |
38 | push $0 # Kill any dangerous flags | 38 | push $0 # Kill any dangerous flags |
39 | popf | 39 | popf |
@@ -139,3 +139,4 @@ verify_cpu: | |||
139 | popf # Restore caller passed flags | 139 | popf # Restore caller passed flags |
140 | xorl %eax, %eax | 140 | xorl %eax, %eax |
141 | ret | 141 | ret |
142 | ENDPROC(verify_cpu) | ||
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 68244742ecb0..5edb27f1a2c4 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c | |||
@@ -55,6 +55,7 @@ | |||
55 | #include <asm/irq.h> | 55 | #include <asm/irq.h> |
56 | #include <asm/traps.h> | 56 | #include <asm/traps.h> |
57 | #include <asm/vm86.h> | 57 | #include <asm/vm86.h> |
58 | #include <asm/switch_to.h> | ||
58 | 59 | ||
59 | /* | 60 | /* |
60 | * Known problems: | 61 | * Known problems: |
@@ -94,7 +95,6 @@ | |||
94 | 95 | ||
95 | void save_v86_state(struct kernel_vm86_regs *regs, int retval) | 96 | void save_v86_state(struct kernel_vm86_regs *regs, int retval) |
96 | { | 97 | { |
97 | struct tss_struct *tss; | ||
98 | struct task_struct *tsk = current; | 98 | struct task_struct *tsk = current; |
99 | struct vm86plus_struct __user *user; | 99 | struct vm86plus_struct __user *user; |
100 | struct vm86 *vm86 = current->thread.vm86; | 100 | struct vm86 *vm86 = current->thread.vm86; |
@@ -146,12 +146,13 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval) | |||
146 | do_exit(SIGSEGV); | 146 | do_exit(SIGSEGV); |
147 | } | 147 | } |
148 | 148 | ||
149 | tss = &per_cpu(cpu_tss, get_cpu()); | 149 | preempt_disable(); |
150 | tsk->thread.sp0 = vm86->saved_sp0; | 150 | tsk->thread.sp0 = vm86->saved_sp0; |
151 | tsk->thread.sysenter_cs = __KERNEL_CS; | 151 | tsk->thread.sysenter_cs = __KERNEL_CS; |
152 | load_sp0(tss, &tsk->thread); | 152 | update_sp0(tsk); |
153 | refresh_sysenter_cs(&tsk->thread); | ||
153 | vm86->saved_sp0 = 0; | 154 | vm86->saved_sp0 = 0; |
154 | put_cpu(); | 155 | preempt_enable(); |
155 | 156 | ||
156 | memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs)); | 157 | memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs)); |
157 | 158 | ||
@@ -237,7 +238,6 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg) | |||
237 | 238 | ||
238 | static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) | 239 | static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) |
239 | { | 240 | { |
240 | struct tss_struct *tss; | ||
241 | struct task_struct *tsk = current; | 241 | struct task_struct *tsk = current; |
242 | struct vm86 *vm86 = tsk->thread.vm86; | 242 | struct vm86 *vm86 = tsk->thread.vm86; |
243 | struct kernel_vm86_regs vm86regs; | 243 | struct kernel_vm86_regs vm86regs; |
@@ -365,15 +365,17 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) | |||
365 | vm86->saved_sp0 = tsk->thread.sp0; | 365 | vm86->saved_sp0 = tsk->thread.sp0; |
366 | lazy_save_gs(vm86->regs32.gs); | 366 | lazy_save_gs(vm86->regs32.gs); |
367 | 367 | ||
368 | tss = &per_cpu(cpu_tss, get_cpu()); | ||
369 | /* make room for real-mode segments */ | 368 | /* make room for real-mode segments */ |
369 | preempt_disable(); | ||
370 | tsk->thread.sp0 += 16; | 370 | tsk->thread.sp0 += 16; |
371 | 371 | ||
372 | if (static_cpu_has(X86_FEATURE_SEP)) | 372 | if (static_cpu_has(X86_FEATURE_SEP)) { |
373 | tsk->thread.sysenter_cs = 0; | 373 | tsk->thread.sysenter_cs = 0; |
374 | refresh_sysenter_cs(&tsk->thread); | ||
375 | } | ||
374 | 376 | ||
375 | load_sp0(tss, &tsk->thread); | 377 | update_sp0(tsk); |
376 | put_cpu(); | 378 | preempt_enable(); |
377 | 379 | ||
378 | if (vm86->flags & VM86_SCREEN_BITMAP) | 380 | if (vm86->flags & VM86_SCREEN_BITMAP) |
379 | mark_screen_rdonly(tsk->mm); | 381 | mark_screen_rdonly(tsk->mm); |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index b0ff378650a9..3109ba6c6ede 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -30,26 +30,6 @@ | |||
30 | #include <asm/trace/exceptions.h> | 30 | #include <asm/trace/exceptions.h> |
31 | 31 | ||
32 | /* | 32 | /* |
33 | * Page fault error code bits: | ||
34 | * | ||
35 | * bit 0 == 0: no page found 1: protection fault | ||
36 | * bit 1 == 0: read access 1: write access | ||
37 | * bit 2 == 0: kernel-mode access 1: user-mode access | ||
38 | * bit 3 == 1: use of reserved bit detected | ||
39 | * bit 4 == 1: fault was an instruction fetch | ||
40 | * bit 5 == 1: protection keys block access | ||
41 | */ | ||
42 | enum x86_pf_error_code { | ||
43 | |||
44 | PF_PROT = 1 << 0, | ||
45 | PF_WRITE = 1 << 1, | ||
46 | PF_USER = 1 << 2, | ||
47 | PF_RSVD = 1 << 3, | ||
48 | PF_INSTR = 1 << 4, | ||
49 | PF_PK = 1 << 5, | ||
50 | }; | ||
51 | |||
52 | /* | ||
53 | * Returns 0 if mmiotrace is disabled, or if the fault is not | 33 | * Returns 0 if mmiotrace is disabled, or if the fault is not |
54 | * handled by mmiotrace: | 34 | * handled by mmiotrace: |
55 | */ | 35 | */ |
@@ -150,7 +130,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) | |||
150 | * If it was a exec (instruction fetch) fault on NX page, then | 130 | * If it was a exec (instruction fetch) fault on NX page, then |
151 | * do not ignore the fault: | 131 | * do not ignore the fault: |
152 | */ | 132 | */ |
153 | if (error_code & PF_INSTR) | 133 | if (error_code & X86_PF_INSTR) |
154 | return 0; | 134 | return 0; |
155 | 135 | ||
156 | instr = (void *)convert_ip_to_linear(current, regs); | 136 | instr = (void *)convert_ip_to_linear(current, regs); |
@@ -180,7 +160,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) | |||
180 | * siginfo so userspace can discover which protection key was set | 160 | * siginfo so userspace can discover which protection key was set |
181 | * on the PTE. | 161 | * on the PTE. |
182 | * | 162 | * |
183 | * If we get here, we know that the hardware signaled a PF_PK | 163 | * If we get here, we know that the hardware signaled a X86_PF_PK |
184 | * fault and that there was a VMA once we got in the fault | 164 | * fault and that there was a VMA once we got in the fault |
185 | * handler. It does *not* guarantee that the VMA we find here | 165 | * handler. It does *not* guarantee that the VMA we find here |
186 | * was the one that we faulted on. | 166 | * was the one that we faulted on. |
@@ -205,7 +185,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey) | |||
205 | /* | 185 | /* |
206 | * force_sig_info_fault() is called from a number of | 186 | * force_sig_info_fault() is called from a number of |
207 | * contexts, some of which have a VMA and some of which | 187 | * contexts, some of which have a VMA and some of which |
208 | * do not. The PF_PK handing happens after we have a | 188 | * do not. The X86_PF_PK handing happens after we have a |
209 | * valid VMA, so we should never reach this without a | 189 | * valid VMA, so we should never reach this without a |
210 | * valid VMA. | 190 | * valid VMA. |
211 | */ | 191 | */ |
@@ -698,7 +678,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, | |||
698 | if (!oops_may_print()) | 678 | if (!oops_may_print()) |
699 | return; | 679 | return; |
700 | 680 | ||
701 | if (error_code & PF_INSTR) { | 681 | if (error_code & X86_PF_INSTR) { |
702 | unsigned int level; | 682 | unsigned int level; |
703 | pgd_t *pgd; | 683 | pgd_t *pgd; |
704 | pte_t *pte; | 684 | pte_t *pte; |
@@ -780,7 +760,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, | |||
780 | */ | 760 | */ |
781 | if (current->thread.sig_on_uaccess_err && signal) { | 761 | if (current->thread.sig_on_uaccess_err && signal) { |
782 | tsk->thread.trap_nr = X86_TRAP_PF; | 762 | tsk->thread.trap_nr = X86_TRAP_PF; |
783 | tsk->thread.error_code = error_code | PF_USER; | 763 | tsk->thread.error_code = error_code | X86_PF_USER; |
784 | tsk->thread.cr2 = address; | 764 | tsk->thread.cr2 = address; |
785 | 765 | ||
786 | /* XXX: hwpoison faults will set the wrong code. */ | 766 | /* XXX: hwpoison faults will set the wrong code. */ |
@@ -898,7 +878,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, | |||
898 | struct task_struct *tsk = current; | 878 | struct task_struct *tsk = current; |
899 | 879 | ||
900 | /* User mode accesses just cause a SIGSEGV */ | 880 | /* User mode accesses just cause a SIGSEGV */ |
901 | if (error_code & PF_USER) { | 881 | if (error_code & X86_PF_USER) { |
902 | /* | 882 | /* |
903 | * It's possible to have interrupts off here: | 883 | * It's possible to have interrupts off here: |
904 | */ | 884 | */ |
@@ -919,7 +899,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, | |||
919 | * Instruction fetch faults in the vsyscall page might need | 899 | * Instruction fetch faults in the vsyscall page might need |
920 | * emulation. | 900 | * emulation. |
921 | */ | 901 | */ |
922 | if (unlikely((error_code & PF_INSTR) && | 902 | if (unlikely((error_code & X86_PF_INSTR) && |
923 | ((address & ~0xfff) == VSYSCALL_ADDR))) { | 903 | ((address & ~0xfff) == VSYSCALL_ADDR))) { |
924 | if (emulate_vsyscall(regs, address)) | 904 | if (emulate_vsyscall(regs, address)) |
925 | return; | 905 | return; |
@@ -932,7 +912,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, | |||
932 | * are always protection faults. | 912 | * are always protection faults. |
933 | */ | 913 | */ |
934 | if (address >= TASK_SIZE_MAX) | 914 | if (address >= TASK_SIZE_MAX) |
935 | error_code |= PF_PROT; | 915 | error_code |= X86_PF_PROT; |
936 | 916 | ||
937 | if (likely(show_unhandled_signals)) | 917 | if (likely(show_unhandled_signals)) |
938 | show_signal_msg(regs, error_code, address, tsk); | 918 | show_signal_msg(regs, error_code, address, tsk); |
@@ -993,11 +973,11 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code, | |||
993 | 973 | ||
994 | if (!boot_cpu_has(X86_FEATURE_OSPKE)) | 974 | if (!boot_cpu_has(X86_FEATURE_OSPKE)) |
995 | return false; | 975 | return false; |
996 | if (error_code & PF_PK) | 976 | if (error_code & X86_PF_PK) |
997 | return true; | 977 | return true; |
998 | /* this checks permission keys on the VMA: */ | 978 | /* this checks permission keys on the VMA: */ |
999 | if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE), | 979 | if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE), |
1000 | (error_code & PF_INSTR), foreign)) | 980 | (error_code & X86_PF_INSTR), foreign)) |
1001 | return true; | 981 | return true; |
1002 | return false; | 982 | return false; |
1003 | } | 983 | } |
@@ -1025,7 +1005,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, | |||
1025 | int code = BUS_ADRERR; | 1005 | int code = BUS_ADRERR; |
1026 | 1006 | ||
1027 | /* Kernel mode? Handle exceptions or die: */ | 1007 | /* Kernel mode? Handle exceptions or die: */ |
1028 | if (!(error_code & PF_USER)) { | 1008 | if (!(error_code & X86_PF_USER)) { |
1029 | no_context(regs, error_code, address, SIGBUS, BUS_ADRERR); | 1009 | no_context(regs, error_code, address, SIGBUS, BUS_ADRERR); |
1030 | return; | 1010 | return; |
1031 | } | 1011 | } |
@@ -1053,14 +1033,14 @@ static noinline void | |||
1053 | mm_fault_error(struct pt_regs *regs, unsigned long error_code, | 1033 | mm_fault_error(struct pt_regs *regs, unsigned long error_code, |
1054 | unsigned long address, u32 *pkey, unsigned int fault) | 1034 | unsigned long address, u32 *pkey, unsigned int fault) |
1055 | { | 1035 | { |
1056 | if (fatal_signal_pending(current) && !(error_code & PF_USER)) { | 1036 | if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) { |
1057 | no_context(regs, error_code, address, 0, 0); | 1037 | no_context(regs, error_code, address, 0, 0); |
1058 | return; | 1038 | return; |
1059 | } | 1039 | } |
1060 | 1040 | ||
1061 | if (fault & VM_FAULT_OOM) { | 1041 | if (fault & VM_FAULT_OOM) { |
1062 | /* Kernel mode? Handle exceptions or die: */ | 1042 | /* Kernel mode? Handle exceptions or die: */ |
1063 | if (!(error_code & PF_USER)) { | 1043 | if (!(error_code & X86_PF_USER)) { |
1064 | no_context(regs, error_code, address, | 1044 | no_context(regs, error_code, address, |
1065 | SIGSEGV, SEGV_MAPERR); | 1045 | SIGSEGV, SEGV_MAPERR); |
1066 | return; | 1046 | return; |
@@ -1085,16 +1065,16 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, | |||
1085 | 1065 | ||
1086 | static int spurious_fault_check(unsigned long error_code, pte_t *pte) | 1066 | static int spurious_fault_check(unsigned long error_code, pte_t *pte) |
1087 | { | 1067 | { |
1088 | if ((error_code & PF_WRITE) && !pte_write(*pte)) | 1068 | if ((error_code & X86_PF_WRITE) && !pte_write(*pte)) |
1089 | return 0; | 1069 | return 0; |
1090 | 1070 | ||
1091 | if ((error_code & PF_INSTR) && !pte_exec(*pte)) | 1071 | if ((error_code & X86_PF_INSTR) && !pte_exec(*pte)) |
1092 | return 0; | 1072 | return 0; |
1093 | /* | 1073 | /* |
1094 | * Note: We do not do lazy flushing on protection key | 1074 | * Note: We do not do lazy flushing on protection key |
1095 | * changes, so no spurious fault will ever set PF_PK. | 1075 | * changes, so no spurious fault will ever set X86_PF_PK. |
1096 | */ | 1076 | */ |
1097 | if ((error_code & PF_PK)) | 1077 | if ((error_code & X86_PF_PK)) |
1098 | return 1; | 1078 | return 1; |
1099 | 1079 | ||
1100 | return 1; | 1080 | return 1; |
@@ -1140,8 +1120,8 @@ spurious_fault(unsigned long error_code, unsigned long address) | |||
1140 | * change, so user accesses are not expected to cause spurious | 1120 | * change, so user accesses are not expected to cause spurious |
1141 | * faults. | 1121 | * faults. |
1142 | */ | 1122 | */ |
1143 | if (error_code != (PF_WRITE | PF_PROT) | 1123 | if (error_code != (X86_PF_WRITE | X86_PF_PROT) && |
1144 | && error_code != (PF_INSTR | PF_PROT)) | 1124 | error_code != (X86_PF_INSTR | X86_PF_PROT)) |
1145 | return 0; | 1125 | return 0; |
1146 | 1126 | ||
1147 | pgd = init_mm.pgd + pgd_index(address); | 1127 | pgd = init_mm.pgd + pgd_index(address); |
@@ -1201,19 +1181,19 @@ access_error(unsigned long error_code, struct vm_area_struct *vma) | |||
1201 | * always an unconditional error and can never result in | 1181 | * always an unconditional error and can never result in |
1202 | * a follow-up action to resolve the fault, like a COW. | 1182 | * a follow-up action to resolve the fault, like a COW. |
1203 | */ | 1183 | */ |
1204 | if (error_code & PF_PK) | 1184 | if (error_code & X86_PF_PK) |
1205 | return 1; | 1185 | return 1; |
1206 | 1186 | ||
1207 | /* | 1187 | /* |
1208 | * Make sure to check the VMA so that we do not perform | 1188 | * Make sure to check the VMA so that we do not perform |
1209 | * faults just to hit a PF_PK as soon as we fill in a | 1189 | * faults just to hit a X86_PF_PK as soon as we fill in a |
1210 | * page. | 1190 | * page. |
1211 | */ | 1191 | */ |
1212 | if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE), | 1192 | if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE), |
1213 | (error_code & PF_INSTR), foreign)) | 1193 | (error_code & X86_PF_INSTR), foreign)) |
1214 | return 1; | 1194 | return 1; |
1215 | 1195 | ||
1216 | if (error_code & PF_WRITE) { | 1196 | if (error_code & X86_PF_WRITE) { |
1217 | /* write, present and write, not present: */ | 1197 | /* write, present and write, not present: */ |
1218 | if (unlikely(!(vma->vm_flags & VM_WRITE))) | 1198 | if (unlikely(!(vma->vm_flags & VM_WRITE))) |
1219 | return 1; | 1199 | return 1; |
@@ -1221,7 +1201,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma) | |||
1221 | } | 1201 | } |
1222 | 1202 | ||
1223 | /* read, present: */ | 1203 | /* read, present: */ |
1224 | if (unlikely(error_code & PF_PROT)) | 1204 | if (unlikely(error_code & X86_PF_PROT)) |
1225 | return 1; | 1205 | return 1; |
1226 | 1206 | ||
1227 | /* read, not present: */ | 1207 | /* read, not present: */ |
@@ -1244,7 +1224,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs) | |||
1244 | if (!static_cpu_has(X86_FEATURE_SMAP)) | 1224 | if (!static_cpu_has(X86_FEATURE_SMAP)) |
1245 | return false; | 1225 | return false; |
1246 | 1226 | ||
1247 | if (error_code & PF_USER) | 1227 | if (error_code & X86_PF_USER) |
1248 | return false; | 1228 | return false; |
1249 | 1229 | ||
1250 | if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC)) | 1230 | if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC)) |
@@ -1297,7 +1277,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, | |||
1297 | * protection error (error_code & 9) == 0. | 1277 | * protection error (error_code & 9) == 0. |
1298 | */ | 1278 | */ |
1299 | if (unlikely(fault_in_kernel_space(address))) { | 1279 | if (unlikely(fault_in_kernel_space(address))) { |
1300 | if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) { | 1280 | if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) { |
1301 | if (vmalloc_fault(address) >= 0) | 1281 | if (vmalloc_fault(address) >= 0) |
1302 | return; | 1282 | return; |
1303 | 1283 | ||
@@ -1325,7 +1305,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, | |||
1325 | if (unlikely(kprobes_fault(regs))) | 1305 | if (unlikely(kprobes_fault(regs))) |
1326 | return; | 1306 | return; |
1327 | 1307 | ||
1328 | if (unlikely(error_code & PF_RSVD)) | 1308 | if (unlikely(error_code & X86_PF_RSVD)) |
1329 | pgtable_bad(regs, error_code, address); | 1309 | pgtable_bad(regs, error_code, address); |
1330 | 1310 | ||
1331 | if (unlikely(smap_violation(error_code, regs))) { | 1311 | if (unlikely(smap_violation(error_code, regs))) { |
@@ -1351,7 +1331,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, | |||
1351 | */ | 1331 | */ |
1352 | if (user_mode(regs)) { | 1332 | if (user_mode(regs)) { |
1353 | local_irq_enable(); | 1333 | local_irq_enable(); |
1354 | error_code |= PF_USER; | 1334 | error_code |= X86_PF_USER; |
1355 | flags |= FAULT_FLAG_USER; | 1335 | flags |= FAULT_FLAG_USER; |
1356 | } else { | 1336 | } else { |
1357 | if (regs->flags & X86_EFLAGS_IF) | 1337 | if (regs->flags & X86_EFLAGS_IF) |
@@ -1360,9 +1340,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, | |||
1360 | 1340 | ||
1361 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); | 1341 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); |
1362 | 1342 | ||
1363 | if (error_code & PF_WRITE) | 1343 | if (error_code & X86_PF_WRITE) |
1364 | flags |= FAULT_FLAG_WRITE; | 1344 | flags |= FAULT_FLAG_WRITE; |
1365 | if (error_code & PF_INSTR) | 1345 | if (error_code & X86_PF_INSTR) |
1366 | flags |= FAULT_FLAG_INSTRUCTION; | 1346 | flags |= FAULT_FLAG_INSTRUCTION; |
1367 | 1347 | ||
1368 | /* | 1348 | /* |
@@ -1382,7 +1362,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, | |||
1382 | * space check, thus avoiding the deadlock: | 1362 | * space check, thus avoiding the deadlock: |
1383 | */ | 1363 | */ |
1384 | if (unlikely(!down_read_trylock(&mm->mmap_sem))) { | 1364 | if (unlikely(!down_read_trylock(&mm->mmap_sem))) { |
1385 | if ((error_code & PF_USER) == 0 && | 1365 | if (!(error_code & X86_PF_USER) && |
1386 | !search_exception_tables(regs->ip)) { | 1366 | !search_exception_tables(regs->ip)) { |
1387 | bad_area_nosemaphore(regs, error_code, address, NULL); | 1367 | bad_area_nosemaphore(regs, error_code, address, NULL); |
1388 | return; | 1368 | return; |
@@ -1409,7 +1389,7 @@ retry: | |||
1409 | bad_area(regs, error_code, address); | 1389 | bad_area(regs, error_code, address); |
1410 | return; | 1390 | return; |
1411 | } | 1391 | } |
1412 | if (error_code & PF_USER) { | 1392 | if (error_code & X86_PF_USER) { |
1413 | /* | 1393 | /* |
1414 | * Accessing the stack below %sp is always a bug. | 1394 | * Accessing the stack below %sp is always a bug. |
1415 | * The large cushion allows instructions like enter | 1395 | * The large cushion allows instructions like enter |
diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c index 836a1eb5df43..3ee234b6234d 100644 --- a/arch/x86/um/ldt.c +++ b/arch/x86/um/ldt.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/mm.h> | 6 | #include <linux/mm.h> |
7 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
8 | #include <linux/slab.h> | 8 | #include <linux/slab.h> |
9 | #include <linux/syscalls.h> | ||
9 | #include <linux/uaccess.h> | 10 | #include <linux/uaccess.h> |
10 | #include <asm/unistd.h> | 11 | #include <asm/unistd.h> |
11 | #include <os.h> | 12 | #include <os.h> |
@@ -369,7 +370,9 @@ void free_ldt(struct mm_context *mm) | |||
369 | mm->arch.ldt.entry_count = 0; | 370 | mm->arch.ldt.entry_count = 0; |
370 | } | 371 | } |
371 | 372 | ||
372 | int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) | 373 | SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr , |
374 | unsigned long , bytecount) | ||
373 | { | 375 | { |
374 | return do_modify_ldt_skas(func, ptr, bytecount); | 376 | /* See non-um modify_ldt() for why we do this cast */ |
377 | return (unsigned int)do_modify_ldt_skas(func, ptr, bytecount); | ||
375 | } | 378 | } |
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index d4396e27b1fb..e55d276afc70 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c | |||
@@ -601,7 +601,7 @@ static struct trap_array_entry trap_array[] = { | |||
601 | #ifdef CONFIG_X86_MCE | 601 | #ifdef CONFIG_X86_MCE |
602 | { machine_check, xen_machine_check, true }, | 602 | { machine_check, xen_machine_check, true }, |
603 | #endif | 603 | #endif |
604 | { nmi, xen_nmi, true }, | 604 | { nmi, xen_xennmi, true }, |
605 | { overflow, xen_overflow, false }, | 605 | { overflow, xen_overflow, false }, |
606 | #ifdef CONFIG_IA32_EMULATION | 606 | #ifdef CONFIG_IA32_EMULATION |
607 | { entry_INT80_compat, xen_entry_INT80_compat, false }, | 607 | { entry_INT80_compat, xen_entry_INT80_compat, false }, |
@@ -811,15 +811,14 @@ static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, | |||
811 | } | 811 | } |
812 | } | 812 | } |
813 | 813 | ||
814 | static void xen_load_sp0(struct tss_struct *tss, | 814 | static void xen_load_sp0(unsigned long sp0) |
815 | struct thread_struct *thread) | ||
816 | { | 815 | { |
817 | struct multicall_space mcs; | 816 | struct multicall_space mcs; |
818 | 817 | ||
819 | mcs = xen_mc_entry(0); | 818 | mcs = xen_mc_entry(0); |
820 | MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); | 819 | MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0); |
821 | xen_mc_issue(PARAVIRT_LAZY_CPU); | 820 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
822 | tss->x86_tss.sp0 = thread->sp0; | 821 | this_cpu_write(cpu_tss.x86_tss.sp0, sp0); |
823 | } | 822 | } |
824 | 823 | ||
825 | void xen_set_iopl_mask(unsigned mask) | 824 | void xen_set_iopl_mask(unsigned mask) |
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 05f91ce9b55e..c0c756c76afe 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c | |||
@@ -14,6 +14,7 @@ | |||
14 | * single-threaded. | 14 | * single-threaded. |
15 | */ | 15 | */ |
16 | #include <linux/sched.h> | 16 | #include <linux/sched.h> |
17 | #include <linux/sched/task_stack.h> | ||
17 | #include <linux/err.h> | 18 | #include <linux/err.h> |
18 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
19 | #include <linux/smp.h> | 20 | #include <linux/smp.h> |
@@ -294,12 +295,19 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | |||
294 | #endif | 295 | #endif |
295 | memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); | 296 | memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); |
296 | 297 | ||
298 | /* | ||
299 | * Bring up the CPU in cpu_bringup_and_idle() with the stack | ||
300 | * pointing just below where pt_regs would be if it were a normal | ||
301 | * kernel entry. | ||
302 | */ | ||
297 | ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; | 303 | ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; |
298 | ctxt->flags = VGCF_IN_KERNEL; | 304 | ctxt->flags = VGCF_IN_KERNEL; |
299 | ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ | 305 | ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ |
300 | ctxt->user_regs.ds = __USER_DS; | 306 | ctxt->user_regs.ds = __USER_DS; |
301 | ctxt->user_regs.es = __USER_DS; | 307 | ctxt->user_regs.es = __USER_DS; |
302 | ctxt->user_regs.ss = __KERNEL_DS; | 308 | ctxt->user_regs.ss = __KERNEL_DS; |
309 | ctxt->user_regs.cs = __KERNEL_CS; | ||
310 | ctxt->user_regs.esp = (unsigned long)task_pt_regs(idle); | ||
303 | 311 | ||
304 | xen_copy_trap_info(ctxt->trap_ctxt); | 312 | xen_copy_trap_info(ctxt->trap_ctxt); |
305 | 313 | ||
@@ -314,8 +322,13 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | |||
314 | ctxt->gdt_frames[0] = gdt_mfn; | 322 | ctxt->gdt_frames[0] = gdt_mfn; |
315 | ctxt->gdt_ents = GDT_ENTRIES; | 323 | ctxt->gdt_ents = GDT_ENTRIES; |
316 | 324 | ||
325 | /* | ||
326 | * Set SS:SP that Xen will use when entering guest kernel mode | ||
327 | * from guest user mode. Subsequent calls to load_sp0() can | ||
328 | * change this value. | ||
329 | */ | ||
317 | ctxt->kernel_ss = __KERNEL_DS; | 330 | ctxt->kernel_ss = __KERNEL_DS; |
318 | ctxt->kernel_sp = idle->thread.sp0; | 331 | ctxt->kernel_sp = task_top_of_stack(idle); |
319 | 332 | ||
320 | #ifdef CONFIG_X86_32 | 333 | #ifdef CONFIG_X86_32 |
321 | ctxt->event_callback_cs = __KERNEL_CS; | 334 | ctxt->event_callback_cs = __KERNEL_CS; |
@@ -327,10 +340,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | |||
327 | (unsigned long)xen_hypervisor_callback; | 340 | (unsigned long)xen_hypervisor_callback; |
328 | ctxt->failsafe_callback_eip = | 341 | ctxt->failsafe_callback_eip = |
329 | (unsigned long)xen_failsafe_callback; | 342 | (unsigned long)xen_failsafe_callback; |
330 | ctxt->user_regs.cs = __KERNEL_CS; | ||
331 | per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); | 343 | per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); |
332 | 344 | ||
333 | ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); | ||
334 | ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir)); | 345 | ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir)); |
335 | if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt)) | 346 | if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt)) |
336 | BUG(); | 347 | BUG(); |
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index c98a48c861fd..8a10c9a9e2b5 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S | |||
@@ -30,7 +30,7 @@ xen_pv_trap debug | |||
30 | xen_pv_trap xendebug | 30 | xen_pv_trap xendebug |
31 | xen_pv_trap int3 | 31 | xen_pv_trap int3 |
32 | xen_pv_trap xenint3 | 32 | xen_pv_trap xenint3 |
33 | xen_pv_trap nmi | 33 | xen_pv_trap xennmi |
34 | xen_pv_trap overflow | 34 | xen_pv_trap overflow |
35 | xen_pv_trap bounds | 35 | xen_pv_trap bounds |
36 | xen_pv_trap invalid_op | 36 | xen_pv_trap invalid_op |
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index b5b8d7f43557..497cc55a0c16 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <asm/boot.h> | 10 | #include <asm/boot.h> |
11 | #include <asm/asm.h> | 11 | #include <asm/asm.h> |
12 | #include <asm/page_types.h> | 12 | #include <asm/page_types.h> |
13 | #include <asm/unwind_hints.h> | ||
13 | 14 | ||
14 | #include <xen/interface/elfnote.h> | 15 | #include <xen/interface/elfnote.h> |
15 | #include <xen/interface/features.h> | 16 | #include <xen/interface/features.h> |
@@ -20,6 +21,7 @@ | |||
20 | #ifdef CONFIG_XEN_PV | 21 | #ifdef CONFIG_XEN_PV |
21 | __INIT | 22 | __INIT |
22 | ENTRY(startup_xen) | 23 | ENTRY(startup_xen) |
24 | UNWIND_HINT_EMPTY | ||
23 | cld | 25 | cld |
24 | 26 | ||
25 | /* Clear .bss */ | 27 | /* Clear .bss */ |
@@ -34,21 +36,24 @@ ENTRY(startup_xen) | |||
34 | mov $init_thread_union+THREAD_SIZE, %_ASM_SP | 36 | mov $init_thread_union+THREAD_SIZE, %_ASM_SP |
35 | 37 | ||
36 | jmp xen_start_kernel | 38 | jmp xen_start_kernel |
37 | 39 | END(startup_xen) | |
38 | __FINIT | 40 | __FINIT |
39 | #endif | 41 | #endif |
40 | 42 | ||
41 | .pushsection .text | 43 | .pushsection .text |
42 | .balign PAGE_SIZE | 44 | .balign PAGE_SIZE |
43 | ENTRY(hypercall_page) | 45 | ENTRY(hypercall_page) |
44 | .skip PAGE_SIZE | 46 | .rept (PAGE_SIZE / 32) |
47 | UNWIND_HINT_EMPTY | ||
48 | .skip 32 | ||
49 | .endr | ||
45 | 50 | ||
46 | #define HYPERCALL(n) \ | 51 | #define HYPERCALL(n) \ |
47 | .equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \ | 52 | .equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \ |
48 | .type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32 | 53 | .type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32 |
49 | #include <asm/xen-hypercalls.h> | 54 | #include <asm/xen-hypercalls.h> |
50 | #undef HYPERCALL | 55 | #undef HYPERCALL |
51 | 56 | END(hypercall_page) | |
52 | .popsection | 57 | .popsection |
53 | 58 | ||
54 | ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") | 59 | ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") |
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 8acfc1e099e1..63e56f6c1877 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h | |||
@@ -687,7 +687,7 @@ | |||
687 | #define BUG_TABLE | 687 | #define BUG_TABLE |
688 | #endif | 688 | #endif |
689 | 689 | ||
690 | #ifdef CONFIG_ORC_UNWINDER | 690 | #ifdef CONFIG_UNWINDER_ORC |
691 | #define ORC_UNWIND_TABLE \ | 691 | #define ORC_UNWIND_TABLE \ |
692 | . = ALIGN(4); \ | 692 | . = ALIGN(4); \ |
693 | .orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) { \ | 693 | .orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) { \ |
diff --git a/include/linux/bitops.h b/include/linux/bitops.h index d03c5dd6185d..8a7e9924df57 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h | |||
@@ -228,6 +228,32 @@ static inline unsigned long __ffs64(u64 word) | |||
228 | return __ffs((unsigned long)word); | 228 | return __ffs((unsigned long)word); |
229 | } | 229 | } |
230 | 230 | ||
231 | /* | ||
232 | * clear_bit32 - Clear a bit in memory for u32 array | ||
233 | * @nr: Bit to clear | ||
234 | * @addr: u32 * address of bitmap | ||
235 | * | ||
236 | * Same as clear_bit, but avoids needing casts for u32 arrays. | ||
237 | */ | ||
238 | |||
239 | static __always_inline void clear_bit32(long nr, volatile u32 *addr) | ||
240 | { | ||
241 | clear_bit(nr, (volatile unsigned long *)addr); | ||
242 | } | ||
243 | |||
244 | /* | ||
245 | * set_bit32 - Set a bit in memory for u32 array | ||
246 | * @nr: Bit to clear | ||
247 | * @addr: u32 * address of bitmap | ||
248 | * | ||
249 | * Same as set_bit, but avoids needing casts for u32 arrays. | ||
250 | */ | ||
251 | |||
252 | static __always_inline void set_bit32(long nr, volatile u32 *addr) | ||
253 | { | ||
254 | set_bit(nr, (volatile unsigned long *)addr); | ||
255 | } | ||
256 | |||
231 | #ifdef __KERNEL__ | 257 | #ifdef __KERNEL__ |
232 | 258 | ||
233 | #ifndef set_mask_bits | 259 | #ifndef set_mask_bits |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index dfdad67d8f6c..ff21b4dbb392 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
@@ -376,7 +376,7 @@ config STACK_VALIDATION | |||
376 | that runtime stack traces are more reliable. | 376 | that runtime stack traces are more reliable. |
377 | 377 | ||
378 | This is also a prerequisite for generation of ORC unwind data, which | 378 | This is also a prerequisite for generation of ORC unwind data, which |
379 | is needed for CONFIG_ORC_UNWINDER. | 379 | is needed for CONFIG_UNWINDER_ORC. |
380 | 380 | ||
381 | For more information, see | 381 | For more information, see |
382 | tools/objtool/Documentation/stack-validation.txt. | 382 | tools/objtool/Documentation/stack-validation.txt. |
diff --git a/scripts/Makefile.build b/scripts/Makefile.build index bb831d49bcfd..e63af4e19382 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build | |||
@@ -259,7 +259,7 @@ ifneq ($(SKIP_STACK_VALIDATION),1) | |||
259 | 259 | ||
260 | __objtool_obj := $(objtree)/tools/objtool/objtool | 260 | __objtool_obj := $(objtree)/tools/objtool/objtool |
261 | 261 | ||
262 | objtool_args = $(if $(CONFIG_ORC_UNWINDER),orc generate,check) | 262 | objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check) |
263 | 263 | ||
264 | ifndef CONFIG_FRAME_POINTER | 264 | ifndef CONFIG_FRAME_POINTER |
265 | objtool_args += --no-fp | 265 | objtool_args += --no-fp |
diff --git a/tools/objtool/check.c b/tools/objtool/check.c index c0e26ad1fa7e..9b341584eb1b 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c | |||
@@ -1757,11 +1757,14 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, | |||
1757 | if (insn->dead_end) | 1757 | if (insn->dead_end) |
1758 | return 0; | 1758 | return 0; |
1759 | 1759 | ||
1760 | insn = next_insn; | 1760 | if (!next_insn) { |
1761 | if (!insn) { | 1761 | if (state.cfa.base == CFI_UNDEFINED) |
1762 | return 0; | ||
1762 | WARN("%s: unexpected end of section", sec->name); | 1763 | WARN("%s: unexpected end of section", sec->name); |
1763 | return 1; | 1764 | return 1; |
1764 | } | 1765 | } |
1766 | |||
1767 | insn = next_insn; | ||
1765 | } | 1768 | } |
1766 | 1769 | ||
1767 | return 0; | 1770 | return 0; |
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index 31e0f9143840..07f329919828 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c | |||
@@ -70,7 +70,7 @@ static void cmd_usage(void) | |||
70 | 70 | ||
71 | printf("\n"); | 71 | printf("\n"); |
72 | 72 | ||
73 | exit(1); | 73 | exit(129); |
74 | } | 74 | } |
75 | 75 | ||
76 | static void handle_options(int *argc, const char ***argv) | 76 | static void handle_options(int *argc, const char ***argv) |
@@ -86,9 +86,7 @@ static void handle_options(int *argc, const char ***argv) | |||
86 | break; | 86 | break; |
87 | } else { | 87 | } else { |
88 | fprintf(stderr, "Unknown option: %s\n", cmd); | 88 | fprintf(stderr, "Unknown option: %s\n", cmd); |
89 | fprintf(stderr, "\n Usage: %s\n", | 89 | cmd_usage(); |
90 | objtool_usage_string); | ||
91 | exit(1); | ||
92 | } | 90 | } |
93 | 91 | ||
94 | (*argv)++; | 92 | (*argv)++; |