diff options
| -rw-r--r-- | arch/x86/include/asm/cpu_entry_area.h | 52 | ||||
| -rw-r--r-- | arch/x86/include/asm/fixmap.h | 41 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/common.c | 94 | ||||
| -rw-r--r-- | arch/x86/kernel/traps.c | 1 | ||||
| -rw-r--r-- | arch/x86/mm/Makefile | 2 | ||||
| -rw-r--r-- | arch/x86/mm/cpu_entry_area.c | 104 |
6 files changed, 159 insertions, 135 deletions
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h new file mode 100644 index 000000000000..5471826803af --- /dev/null +++ b/arch/x86/include/asm/cpu_entry_area.h | |||
| @@ -0,0 +1,52 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | ||
| 2 | |||
| 3 | #ifndef _ASM_X86_CPU_ENTRY_AREA_H | ||
| 4 | #define _ASM_X86_CPU_ENTRY_AREA_H | ||
| 5 | |||
| 6 | #include <linux/percpu-defs.h> | ||
| 7 | #include <asm/processor.h> | ||
| 8 | |||
| 9 | /* | ||
| 10 | * cpu_entry_area is a percpu region that contains things needed by the CPU | ||
| 11 | * and early entry/exit code. Real types aren't used for all fields here | ||
| 12 | * to avoid circular header dependencies. | ||
| 13 | * | ||
| 14 | * Every field is a virtual alias of some other allocated backing store. | ||
| 15 | * There is no direct allocation of a struct cpu_entry_area. | ||
| 16 | */ | ||
| 17 | struct cpu_entry_area { | ||
| 18 | char gdt[PAGE_SIZE]; | ||
| 19 | |||
| 20 | /* | ||
| 21 | * The GDT is just below entry_stack and thus serves (on x86_64) as | ||
| 22 | * a a read-only guard page. | ||
| 23 | */ | ||
| 24 | struct entry_stack_page entry_stack_page; | ||
| 25 | |||
| 26 | /* | ||
| 27 | * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because | ||
| 28 | * we need task switches to work, and task switches write to the TSS. | ||
| 29 | */ | ||
| 30 | struct tss_struct tss; | ||
| 31 | |||
| 32 | char entry_trampoline[PAGE_SIZE]; | ||
| 33 | |||
| 34 | #ifdef CONFIG_X86_64 | ||
| 35 | /* | ||
| 36 | * Exception stacks used for IST entries. | ||
| 37 | * | ||
| 38 | * In the future, this should have a separate slot for each stack | ||
| 39 | * with guard pages between them. | ||
| 40 | */ | ||
| 41 | char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]; | ||
| 42 | #endif | ||
| 43 | }; | ||
| 44 | |||
| 45 | #define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) | ||
| 46 | #define CPU_ENTRY_AREA_PAGES (CPU_ENTRY_AREA_SIZE / PAGE_SIZE) | ||
| 47 | |||
| 48 | DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); | ||
| 49 | |||
| 50 | extern void setup_cpu_entry_areas(void); | ||
| 51 | |||
| 52 | #endif | ||
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 8153b8d86a3c..fb801662a230 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | #else | 25 | #else |
| 26 | #include <uapi/asm/vsyscall.h> | 26 | #include <uapi/asm/vsyscall.h> |
| 27 | #endif | 27 | #endif |
| 28 | #include <asm/cpu_entry_area.h> | ||
| 28 | 29 | ||
| 29 | /* | 30 | /* |
| 30 | * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall | 31 | * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall |
| @@ -45,46 +46,6 @@ extern unsigned long __FIXADDR_TOP; | |||
| 45 | #endif | 46 | #endif |
| 46 | 47 | ||
| 47 | /* | 48 | /* |
| 48 | * cpu_entry_area is a percpu region in the fixmap that contains things | ||
| 49 | * needed by the CPU and early entry/exit code. Real types aren't used | ||
| 50 | * for all fields here to avoid circular header dependencies. | ||
| 51 | * | ||
| 52 | * Every field is a virtual alias of some other allocated backing store. | ||
| 53 | * There is no direct allocation of a struct cpu_entry_area. | ||
| 54 | */ | ||
| 55 | struct cpu_entry_area { | ||
| 56 | char gdt[PAGE_SIZE]; | ||
| 57 | |||
| 58 | /* | ||
| 59 | * The GDT is just below entry_stack and thus serves (on x86_64) as | ||
| 60 | * a a read-only guard page. | ||
| 61 | */ | ||
| 62 | struct entry_stack_page entry_stack_page; | ||
| 63 | |||
| 64 | /* | ||
| 65 | * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because | ||
| 66 | * we need task switches to work, and task switches write to the TSS. | ||
| 67 | */ | ||
| 68 | struct tss_struct tss; | ||
| 69 | |||
| 70 | char entry_trampoline[PAGE_SIZE]; | ||
| 71 | |||
| 72 | #ifdef CONFIG_X86_64 | ||
| 73 | /* | ||
| 74 | * Exception stacks used for IST entries. | ||
| 75 | * | ||
| 76 | * In the future, this should have a separate slot for each stack | ||
| 77 | * with guard pages between them. | ||
| 78 | */ | ||
| 79 | char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]; | ||
| 80 | #endif | ||
| 81 | }; | ||
| 82 | |||
| 83 | #define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE) | ||
| 84 | |||
| 85 | extern void setup_cpu_entry_areas(void); | ||
| 86 | |||
| 87 | /* | ||
| 88 | * Here we define all the compile-time 'special' virtual | 49 | * Here we define all the compile-time 'special' virtual |
| 89 | * addresses. The point is to have a constant address at | 50 | * addresses. The point is to have a constant address at |
| 90 | * compile time, but to set the physical address only | 51 | * compile time, but to set the physical address only |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ed4acbce37a8..8ddcfa4d4165 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
| @@ -482,102 +482,8 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { | |||
| 482 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, | 482 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, |
| 483 | [DEBUG_STACK - 1] = DEBUG_STKSZ | 483 | [DEBUG_STACK - 1] = DEBUG_STKSZ |
| 484 | }; | 484 | }; |
| 485 | |||
| 486 | static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks | ||
| 487 | [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); | ||
| 488 | #endif | ||
| 489 | |||
| 490 | static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, | ||
| 491 | entry_stack_storage); | ||
| 492 | |||
| 493 | static void __init | ||
| 494 | set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot) | ||
| 495 | { | ||
| 496 | for ( ; pages; pages--, idx--, ptr += PAGE_SIZE) | ||
| 497 | __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot); | ||
| 498 | } | ||
| 499 | |||
| 500 | /* Setup the fixmap mappings only once per-processor */ | ||
| 501 | static void __init setup_cpu_entry_area(int cpu) | ||
| 502 | { | ||
| 503 | #ifdef CONFIG_X86_64 | ||
| 504 | extern char _entry_trampoline[]; | ||
| 505 | |||
| 506 | /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ | ||
| 507 | pgprot_t gdt_prot = PAGE_KERNEL_RO; | ||
| 508 | pgprot_t tss_prot = PAGE_KERNEL_RO; | ||
| 509 | #else | ||
| 510 | /* | ||
| 511 | * On native 32-bit systems, the GDT cannot be read-only because | ||
| 512 | * our double fault handler uses a task gate, and entering through | ||
| 513 | * a task gate needs to change an available TSS to busy. If the | ||
| 514 | * GDT is read-only, that will triple fault. The TSS cannot be | ||
| 515 | * read-only because the CPU writes to it on task switches. | ||
| 516 | * | ||
| 517 | * On Xen PV, the GDT must be read-only because the hypervisor | ||
| 518 | * requires it. | ||
| 519 | */ | ||
| 520 | pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ? | ||
| 521 | PAGE_KERNEL_RO : PAGE_KERNEL; | ||
| 522 | pgprot_t tss_prot = PAGE_KERNEL; | ||
| 523 | #endif | ||
| 524 | |||
| 525 | __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot); | ||
| 526 | set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page), | ||
| 527 | per_cpu_ptr(&entry_stack_storage, cpu), 1, | ||
| 528 | PAGE_KERNEL); | ||
| 529 | |||
| 530 | /* | ||
| 531 | * The Intel SDM says (Volume 3, 7.2.1): | ||
| 532 | * | ||
| 533 | * Avoid placing a page boundary in the part of the TSS that the | ||
| 534 | * processor reads during a task switch (the first 104 bytes). The | ||
| 535 | * processor may not correctly perform address translations if a | ||
| 536 | * boundary occurs in this area. During a task switch, the processor | ||
| 537 | * reads and writes into the first 104 bytes of each TSS (using | ||
| 538 | * contiguous physical addresses beginning with the physical address | ||
| 539 | * of the first byte of the TSS). So, after TSS access begins, if | ||
| 540 | * part of the 104 bytes is not physically contiguous, the processor | ||
| 541 | * will access incorrect information without generating a page-fault | ||
| 542 | * exception. | ||
| 543 | * | ||
| 544 | * There are also a lot of errata involving the TSS spanning a page | ||
| 545 | * boundary. Assert that we're not doing that. | ||
| 546 | */ | ||
| 547 | BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ | ||
| 548 | offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); | ||
| 549 | BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); | ||
| 550 | set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss), | ||
| 551 | &per_cpu(cpu_tss_rw, cpu), | ||
| 552 | sizeof(struct tss_struct) / PAGE_SIZE, | ||
| 553 | tss_prot); | ||
| 554 | |||
| 555 | #ifdef CONFIG_X86_32 | ||
| 556 | per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu); | ||
| 557 | #endif | 485 | #endif |
| 558 | 486 | ||
| 559 | #ifdef CONFIG_X86_64 | ||
| 560 | BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); | ||
| 561 | BUILD_BUG_ON(sizeof(exception_stacks) != | ||
| 562 | sizeof(((struct cpu_entry_area *)0)->exception_stacks)); | ||
| 563 | set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks), | ||
| 564 | &per_cpu(exception_stacks, cpu), | ||
| 565 | sizeof(exception_stacks) / PAGE_SIZE, | ||
| 566 | PAGE_KERNEL); | ||
| 567 | |||
| 568 | __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline), | ||
| 569 | __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); | ||
| 570 | #endif | ||
| 571 | } | ||
| 572 | |||
| 573 | void __init setup_cpu_entry_areas(void) | ||
| 574 | { | ||
| 575 | unsigned int cpu; | ||
| 576 | |||
| 577 | for_each_possible_cpu(cpu) | ||
| 578 | setup_cpu_entry_area(cpu); | ||
| 579 | } | ||
| 580 | |||
| 581 | /* Load the original GDT from the per-cpu structure */ | 487 | /* Load the original GDT from the per-cpu structure */ |
| 582 | void load_direct_gdt(int cpu) | 488 | void load_direct_gdt(int cpu) |
| 583 | { | 489 | { |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 74136fd16f49..464daed6894f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
| @@ -52,6 +52,7 @@ | |||
| 52 | #include <asm/traps.h> | 52 | #include <asm/traps.h> |
| 53 | #include <asm/desc.h> | 53 | #include <asm/desc.h> |
| 54 | #include <asm/fpu/internal.h> | 54 | #include <asm/fpu/internal.h> |
| 55 | #include <asm/cpu_entry_area.h> | ||
| 55 | #include <asm/mce.h> | 56 | #include <asm/mce.h> |
| 56 | #include <asm/fixmap.h> | 57 | #include <asm/fixmap.h> |
| 57 | #include <asm/mach_traps.h> | 58 | #include <asm/mach_traps.h> |
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 7ba7f3d7f477..2e0017af8f9b 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
| @@ -10,7 +10,7 @@ CFLAGS_REMOVE_mem_encrypt.o = -pg | |||
| 10 | endif | 10 | endif |
| 11 | 11 | ||
| 12 | obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ | 12 | obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ |
| 13 | pat.o pgtable.o physaddr.o setup_nx.o tlb.o | 13 | pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o |
| 14 | 14 | ||
| 15 | # Make sure __phys_addr has no stackprotector | 15 | # Make sure __phys_addr has no stackprotector |
| 16 | nostackp := $(call cc-option, -fno-stack-protector) | 16 | nostackp := $(call cc-option, -fno-stack-protector) |
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c new file mode 100644 index 000000000000..235ff9cfaaf4 --- /dev/null +++ b/arch/x86/mm/cpu_entry_area.c | |||
| @@ -0,0 +1,104 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | ||
| 2 | |||
| 3 | #include <linux/spinlock.h> | ||
| 4 | #include <linux/percpu.h> | ||
| 5 | |||
| 6 | #include <asm/cpu_entry_area.h> | ||
| 7 | #include <asm/pgtable.h> | ||
| 8 | #include <asm/fixmap.h> | ||
| 9 | #include <asm/desc.h> | ||
| 10 | |||
| 11 | static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage); | ||
| 12 | |||
| 13 | #ifdef CONFIG_X86_64 | ||
| 14 | static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks | ||
| 15 | [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); | ||
| 16 | #endif | ||
| 17 | |||
| 18 | static void __init | ||
| 19 | set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot) | ||
| 20 | { | ||
| 21 | for ( ; pages; pages--, idx--, ptr += PAGE_SIZE) | ||
| 22 | __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot); | ||
| 23 | } | ||
| 24 | |||
| 25 | /* Setup the fixmap mappings only once per-processor */ | ||
| 26 | static void __init setup_cpu_entry_area(int cpu) | ||
| 27 | { | ||
| 28 | #ifdef CONFIG_X86_64 | ||
| 29 | extern char _entry_trampoline[]; | ||
| 30 | |||
| 31 | /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ | ||
| 32 | pgprot_t gdt_prot = PAGE_KERNEL_RO; | ||
| 33 | pgprot_t tss_prot = PAGE_KERNEL_RO; | ||
| 34 | #else | ||
| 35 | /* | ||
| 36 | * On native 32-bit systems, the GDT cannot be read-only because | ||
| 37 | * our double fault handler uses a task gate, and entering through | ||
| 38 | * a task gate needs to change an available TSS to busy. If the | ||
| 39 | * GDT is read-only, that will triple fault. The TSS cannot be | ||
| 40 | * read-only because the CPU writes to it on task switches. | ||
| 41 | * | ||
| 42 | * On Xen PV, the GDT must be read-only because the hypervisor | ||
| 43 | * requires it. | ||
| 44 | */ | ||
| 45 | pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ? | ||
| 46 | PAGE_KERNEL_RO : PAGE_KERNEL; | ||
| 47 | pgprot_t tss_prot = PAGE_KERNEL; | ||
| 48 | #endif | ||
| 49 | |||
| 50 | __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot); | ||
| 51 | set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page), | ||
| 52 | per_cpu_ptr(&entry_stack_storage, cpu), 1, | ||
| 53 | PAGE_KERNEL); | ||
| 54 | |||
| 55 | /* | ||
| 56 | * The Intel SDM says (Volume 3, 7.2.1): | ||
| 57 | * | ||
| 58 | * Avoid placing a page boundary in the part of the TSS that the | ||
| 59 | * processor reads during a task switch (the first 104 bytes). The | ||
| 60 | * processor may not correctly perform address translations if a | ||
| 61 | * boundary occurs in this area. During a task switch, the processor | ||
| 62 | * reads and writes into the first 104 bytes of each TSS (using | ||
| 63 | * contiguous physical addresses beginning with the physical address | ||
| 64 | * of the first byte of the TSS). So, after TSS access begins, if | ||
| 65 | * part of the 104 bytes is not physically contiguous, the processor | ||
| 66 | * will access incorrect information without generating a page-fault | ||
| 67 | * exception. | ||
| 68 | * | ||
| 69 | * There are also a lot of errata involving the TSS spanning a page | ||
| 70 | * boundary. Assert that we're not doing that. | ||
| 71 | */ | ||
| 72 | BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ | ||
| 73 | offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); | ||
| 74 | BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); | ||
| 75 | set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss), | ||
| 76 | &per_cpu(cpu_tss_rw, cpu), | ||
| 77 | sizeof(struct tss_struct) / PAGE_SIZE, | ||
| 78 | tss_prot); | ||
| 79 | |||
| 80 | #ifdef CONFIG_X86_32 | ||
| 81 | per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu); | ||
| 82 | #endif | ||
| 83 | |||
| 84 | #ifdef CONFIG_X86_64 | ||
| 85 | BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); | ||
| 86 | BUILD_BUG_ON(sizeof(exception_stacks) != | ||
| 87 | sizeof(((struct cpu_entry_area *)0)->exception_stacks)); | ||
| 88 | set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks), | ||
| 89 | &per_cpu(exception_stacks, cpu), | ||
| 90 | sizeof(exception_stacks) / PAGE_SIZE, | ||
| 91 | PAGE_KERNEL); | ||
| 92 | |||
| 93 | __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline), | ||
| 94 | __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); | ||
| 95 | #endif | ||
| 96 | } | ||
| 97 | |||
| 98 | void __init setup_cpu_entry_areas(void) | ||
| 99 | { | ||
| 100 | unsigned int cpu; | ||
| 101 | |||
| 102 | for_each_possible_cpu(cpu) | ||
| 103 | setup_cpu_entry_area(cpu); | ||
| 104 | } | ||
