diff options
Diffstat (limited to 'arch/x86/include/asm/processor.h')
-rw-r--r-- | arch/x86/include/asm/processor.h | 82 |
1 files changed, 53 insertions, 29 deletions
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index cc16fa882e3e..d3a67fba200a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -163,9 +163,9 @@ enum cpuid_regs_idx { | |||
163 | extern struct cpuinfo_x86 boot_cpu_data; | 163 | extern struct cpuinfo_x86 boot_cpu_data; |
164 | extern struct cpuinfo_x86 new_cpu_data; | 164 | extern struct cpuinfo_x86 new_cpu_data; |
165 | 165 | ||
166 | extern struct tss_struct doublefault_tss; | 166 | extern struct x86_hw_tss doublefault_tss; |
167 | extern __u32 cpu_caps_cleared[NCAPINTS]; | 167 | extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; |
168 | extern __u32 cpu_caps_set[NCAPINTS]; | 168 | extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; |
169 | 169 | ||
170 | #ifdef CONFIG_SMP | 170 | #ifdef CONFIG_SMP |
171 | DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); | 171 | DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); |
@@ -253,6 +253,11 @@ static inline void load_cr3(pgd_t *pgdir) | |||
253 | write_cr3(__sme_pa(pgdir)); | 253 | write_cr3(__sme_pa(pgdir)); |
254 | } | 254 | } |
255 | 255 | ||
256 | /* | ||
257 | * Note that while the legacy 'TSS' name comes from 'Task State Segment', | ||
258 | * on modern x86 CPUs the TSS also holds information important to 64-bit mode, | ||
259 | * unrelated to the task-switch mechanism: | ||
260 | */ | ||
256 | #ifdef CONFIG_X86_32 | 261 | #ifdef CONFIG_X86_32 |
257 | /* This is the TSS defined by the hardware. */ | 262 | /* This is the TSS defined by the hardware. */ |
258 | struct x86_hw_tss { | 263 | struct x86_hw_tss { |
@@ -305,7 +310,13 @@ struct x86_hw_tss { | |||
305 | struct x86_hw_tss { | 310 | struct x86_hw_tss { |
306 | u32 reserved1; | 311 | u32 reserved1; |
307 | u64 sp0; | 312 | u64 sp0; |
313 | |||
314 | /* | ||
315 | * We store cpu_current_top_of_stack in sp1 so it's always accessible. | ||
316 | * Linux does not use ring 1, so sp1 is not otherwise needed. | ||
317 | */ | ||
308 | u64 sp1; | 318 | u64 sp1; |
319 | |||
309 | u64 sp2; | 320 | u64 sp2; |
310 | u64 reserved2; | 321 | u64 reserved2; |
311 | u64 ist[7]; | 322 | u64 ist[7]; |
@@ -323,12 +334,22 @@ struct x86_hw_tss { | |||
323 | #define IO_BITMAP_BITS 65536 | 334 | #define IO_BITMAP_BITS 65536 |
324 | #define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) | 335 | #define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) |
325 | #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) | 336 | #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) |
326 | #define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap) | 337 | #define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss)) |
327 | #define INVALID_IO_BITMAP_OFFSET 0x8000 | 338 | #define INVALID_IO_BITMAP_OFFSET 0x8000 |
328 | 339 | ||
340 | struct entry_stack { | ||
341 | unsigned long words[64]; | ||
342 | }; | ||
343 | |||
344 | struct entry_stack_page { | ||
345 | struct entry_stack stack; | ||
346 | } __aligned(PAGE_SIZE); | ||
347 | |||
329 | struct tss_struct { | 348 | struct tss_struct { |
330 | /* | 349 | /* |
331 | * The hardware state: | 350 | * The fixed hardware portion. This must not cross a page boundary |
351 | * at risk of violating the SDM's advice and potentially triggering | ||
352 | * errata. | ||
332 | */ | 353 | */ |
333 | struct x86_hw_tss x86_tss; | 354 | struct x86_hw_tss x86_tss; |
334 | 355 | ||
@@ -339,18 +360,9 @@ struct tss_struct { | |||
339 | * be within the limit. | 360 | * be within the limit. |
340 | */ | 361 | */ |
341 | unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; | 362 | unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; |
363 | } __aligned(PAGE_SIZE); | ||
342 | 364 | ||
343 | #ifdef CONFIG_X86_32 | 365 | DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw); |
344 | /* | ||
345 | * Space for the temporary SYSENTER stack. | ||
346 | */ | ||
347 | unsigned long SYSENTER_stack_canary; | ||
348 | unsigned long SYSENTER_stack[64]; | ||
349 | #endif | ||
350 | |||
351 | } ____cacheline_aligned; | ||
352 | |||
353 | DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); | ||
354 | 366 | ||
355 | /* | 367 | /* |
356 | * sizeof(unsigned long) coming from an extra "long" at the end | 368 | * sizeof(unsigned long) coming from an extra "long" at the end |
@@ -364,6 +376,9 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); | |||
364 | 376 | ||
365 | #ifdef CONFIG_X86_32 | 377 | #ifdef CONFIG_X86_32 |
366 | DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); | 378 | DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); |
379 | #else | ||
380 | /* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */ | ||
381 | #define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1 | ||
367 | #endif | 382 | #endif |
368 | 383 | ||
369 | /* | 384 | /* |
@@ -523,7 +538,7 @@ static inline void native_set_iopl_mask(unsigned mask) | |||
523 | static inline void | 538 | static inline void |
524 | native_load_sp0(unsigned long sp0) | 539 | native_load_sp0(unsigned long sp0) |
525 | { | 540 | { |
526 | this_cpu_write(cpu_tss.x86_tss.sp0, sp0); | 541 | this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); |
527 | } | 542 | } |
528 | 543 | ||
529 | static inline void native_swapgs(void) | 544 | static inline void native_swapgs(void) |
@@ -535,12 +550,12 @@ static inline void native_swapgs(void) | |||
535 | 550 | ||
536 | static inline unsigned long current_top_of_stack(void) | 551 | static inline unsigned long current_top_of_stack(void) |
537 | { | 552 | { |
538 | #ifdef CONFIG_X86_64 | 553 | /* |
539 | return this_cpu_read_stable(cpu_tss.x86_tss.sp0); | 554 | * We can't read directly from tss.sp0: sp0 on x86_32 is special in |
540 | #else | 555 | * and around vm86 mode and sp0 on x86_64 is special because of the |
541 | /* sp0 on x86_32 is special in and around vm86 mode. */ | 556 | * entry trampoline. |
557 | */ | ||
542 | return this_cpu_read_stable(cpu_current_top_of_stack); | 558 | return this_cpu_read_stable(cpu_current_top_of_stack); |
543 | #endif | ||
544 | } | 559 | } |
545 | 560 | ||
546 | static inline bool on_thread_stack(void) | 561 | static inline bool on_thread_stack(void) |
@@ -837,13 +852,22 @@ static inline void spin_lock_prefetch(const void *x) | |||
837 | 852 | ||
838 | #else | 853 | #else |
839 | /* | 854 | /* |
840 | * User space process size. 47bits minus one guard page. The guard | 855 | * User space process size. This is the first address outside the user range. |
841 | * page is necessary on Intel CPUs: if a SYSCALL instruction is at | 856 | * There are a few constraints that determine this: |
842 | * the highest possible canonical userspace address, then that | 857 | * |
843 | * syscall will enter the kernel with a non-canonical return | 858 | * On Intel CPUs, if a SYSCALL instruction is at the highest canonical |
844 | * address, and SYSRET will explode dangerously. We avoid this | 859 | * address, then that syscall will enter the kernel with a |
845 | * particular problem by preventing anything from being mapped | 860 | * non-canonical return address, and SYSRET will explode dangerously. |
846 | * at the maximum canonical address. | 861 | * We avoid this particular problem by preventing anything executable |
862 | * from being mapped at the maximum canonical address. | ||
863 | * | ||
864 | * On AMD CPUs in the Ryzen family, there's a nasty bug in which the | ||
865 | * CPUs malfunction if they execute code from the highest canonical page. | ||
866 | * They'll speculate right off the end of the canonical space, and | ||
867 | * bad things happen. This is worked around in the same way as the | ||
868 | * Intel problem. | ||
869 | * | ||
870 | * With page table isolation enabled, we map the LDT in ... [stay tuned] | ||
847 | */ | 871 | */ |
848 | #define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE) | 872 | #define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE) |
849 | 873 | ||