diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-20 15:05:30 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-20 15:05:30 -0500 |
| commit | 972d5e7e5b66f5a143026fcdd4b2be2f519c0f12 (patch) | |
| tree | 6c1c5bb79fe163b3b48254605b54532099b74cff | |
| parent | 5d4863e4cc4dc12d1d5e42da3cb5d38c535e4ad6 (diff) | |
| parent | ef0b8b9a521c65201bfca9747ee1bf374296133c (diff) | |
Merge branch 'x86-efi-kexec-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 EFI changes from Ingo Molnar:
"This consists of two main parts:
- New static EFI runtime services virtual mapping layout which is
groundwork for kexec support on EFI (Borislav Petkov)
- EFI kexec support itself (Dave Young)"
* 'x86-efi-kexec-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
x86/efi: parse_efi_setup() build fix
x86: ksysfs.c build fix
x86/efi: Delete superfluous global variables
x86: Reserve setup_data ranges late after parsing memmap cmdline
x86: Export x86 boot_params to sysfs
x86: Add xloadflags bit for EFI runtime support on kexec
x86/efi: Pass necessary EFI data for kexec via setup_data
efi: Export EFI runtime memory mapping to sysfs
efi: Export more EFI table variables to sysfs
x86/efi: Cleanup efi_enter_virtual_mode() function
x86/efi: Fix off-by-one bug in EFI Boot Services reservation
x86/efi: Add a wrapper function efi_map_region_fixed()
x86/efi: Remove unused variables in __map_region()
x86/efi: Check krealloc return value
x86/efi: Runtime services virtual mapping
x86/mm/cpa: Map in an arbitrary pgd
x86/mm/pageattr: Add last levels of error path
x86/mm/pageattr: Add a PUD error unwinding path
x86/mm/pageattr: Add a PTE pagetable populating function
x86/mm/pageattr: Add a PMD pagetable populating function
...
23 files changed, 1689 insertions, 116 deletions
diff --git a/Documentation/ABI/testing/sysfs-firmware-efi b/Documentation/ABI/testing/sysfs-firmware-efi new file mode 100644 index 000000000000..05874da7ce80 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-firmware-efi | |||
| @@ -0,0 +1,20 @@ | |||
| 1 | What: /sys/firmware/efi/fw_vendor | ||
| 2 | Date: December 2013 | ||
| 3 | Contact: Dave Young <dyoung@redhat.com> | ||
| 4 | Description: It shows the physical address of firmware vendor field in the | ||
| 5 | EFI system table. | ||
| 6 | Users: Kexec | ||
| 7 | |||
| 8 | What: /sys/firmware/efi/runtime | ||
| 9 | Date: December 2013 | ||
| 10 | Contact: Dave Young <dyoung@redhat.com> | ||
| 11 | Description: It shows the physical address of runtime service table entry in | ||
| 12 | the EFI system table. | ||
| 13 | Users: Kexec | ||
| 14 | |||
| 15 | What: /sys/firmware/efi/config_table | ||
| 16 | Date: December 2013 | ||
| 17 | Contact: Dave Young <dyoung@redhat.com> | ||
| 18 | Description: It shows the physical address of config table entry in the EFI | ||
| 19 | system table. | ||
| 20 | Users: Kexec | ||
diff --git a/Documentation/ABI/testing/sysfs-firmware-efi-runtime-map b/Documentation/ABI/testing/sysfs-firmware-efi-runtime-map new file mode 100644 index 000000000000..c61b9b348e99 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-firmware-efi-runtime-map | |||
| @@ -0,0 +1,34 @@ | |||
| 1 | What: /sys/firmware/efi/runtime-map/ | ||
| 2 | Date: December 2013 | ||
| 3 | Contact: Dave Young <dyoung@redhat.com> | ||
| 4 | Description: Switching efi runtime services to virtual mode requires | ||
| 5 | that all efi memory ranges which have the runtime attribute | ||
| 6 | bit set to be mapped to virtual addresses. | ||
| 7 | |||
| 8 | The efi runtime services can only be switched to virtual | ||
| 9 | mode once without rebooting. The kexec kernel must maintain | ||
| 10 | the same physical to virtual address mappings as the first | ||
| 11 | kernel. The mappings are exported to sysfs so userspace tools | ||
| 12 | can reassemble them and pass them into the kexec kernel. | ||
| 13 | |||
| 14 | /sys/firmware/efi/runtime-map/ is the directory the kernel | ||
| 15 | exports that information in. | ||
| 16 | |||
| 17 | subdirectories are named with the number of the memory range: | ||
| 18 | |||
| 19 | /sys/firmware/efi/runtime-map/0 | ||
| 20 | /sys/firmware/efi/runtime-map/1 | ||
| 21 | /sys/firmware/efi/runtime-map/2 | ||
| 22 | /sys/firmware/efi/runtime-map/3 | ||
| 23 | ... | ||
| 24 | |||
| 25 | Each subdirectory contains five files: | ||
| 26 | |||
| 27 | attribute : The attributes of the memory range. | ||
| 28 | num_pages : The size of the memory range in pages. | ||
| 29 | phys_addr : The physical address of the memory range. | ||
| 30 | type : The type of the memory range. | ||
| 31 | virt_addr : The virtual address of the memory range. | ||
| 32 | |||
| 33 | Above values are all hexadecimal numbers with the '0x' prefix. | ||
| 34 | Users: Kexec | ||
diff --git a/Documentation/ABI/testing/sysfs-kernel-boot_params b/Documentation/ABI/testing/sysfs-kernel-boot_params new file mode 100644 index 000000000000..eca38ce2852d --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-boot_params | |||
| @@ -0,0 +1,38 @@ | |||
| 1 | What: /sys/kernel/boot_params | ||
| 2 | Date: December 2013 | ||
| 3 | Contact: Dave Young <dyoung@redhat.com> | ||
| 4 | Description: The /sys/kernel/boot_params directory contains two | ||
| 5 | files: "data" and "version" and one subdirectory "setup_data". | ||
| 6 | It is used to export the kernel boot parameters of an x86 | ||
| 7 | platform to userspace for kexec and debugging purpose. | ||
| 8 | |||
| 9 | If there's no setup_data in boot_params the subdirectory will | ||
| 10 | not be created. | ||
| 11 | |||
| 12 | "data" file is the binary representation of struct boot_params. | ||
| 13 | |||
| 14 | "version" file is the string representation of boot | ||
| 15 | protocol version. | ||
| 16 | |||
| 17 | "setup_data" subdirectory contains the setup_data data | ||
| 18 | structure in boot_params. setup_data is maintained in kernel | ||
| 19 | as a link list. In "setup_data" subdirectory there's one | ||
| 20 | subdirectory for each link list node named with the number | ||
| 21 | of the list nodes. The list node subdirectory contains two | ||
| 22 | files "type" and "data". "type" file is the string | ||
| 23 | representation of setup_data type. "data" file is the binary | ||
| 24 | representation of setup_data payload. | ||
| 25 | |||
| 26 | The whole boot_params directory structure is like below: | ||
| 27 | /sys/kernel/boot_params | ||
| 28 | |__ data | ||
| 29 | |__ setup_data | ||
| 30 | | |__ 0 | ||
| 31 | | | |__ data | ||
| 32 | | | |__ type | ||
| 33 | | |__ 1 | ||
| 34 | | |__ data | ||
| 35 | | |__ type | ||
| 36 | |__ version | ||
| 37 | |||
| 38 | Users: Kexec | ||
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 3b8e262c3657..4eb5fff022b4 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
| @@ -899,6 +899,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
| 899 | edd= [EDD] | 899 | edd= [EDD] |
| 900 | Format: {"off" | "on" | "skip[mbr]"} | 900 | Format: {"off" | "on" | "skip[mbr]"} |
| 901 | 901 | ||
| 902 | efi= [EFI] | ||
| 903 | Format: { "old_map" } | ||
| 904 | old_map [X86-64]: switch to the old ioremap-based EFI | ||
| 905 | runtime services mapping. 32-bit still uses this one by | ||
| 906 | default. | ||
| 907 | |||
| 902 | efi_no_storage_paranoia [EFI; X86] | 908 | efi_no_storage_paranoia [EFI; X86] |
| 903 | Using this parameter you can use more than 50% of | 909 | Using this parameter you can use more than 50% of |
| 904 | your efi variable storage. Use this parameter only if | 910 | your efi variable storage. Use this parameter only if |
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt index f4f268c2b826..cb81741d3b0b 100644 --- a/Documentation/x86/boot.txt +++ b/Documentation/x86/boot.txt | |||
| @@ -608,6 +608,9 @@ Protocol: 2.12+ | |||
| 608 | - If 1, the kernel supports the 64-bit EFI handoff entry point | 608 | - If 1, the kernel supports the 64-bit EFI handoff entry point |
| 609 | given at handover_offset + 0x200. | 609 | given at handover_offset + 0x200. |
| 610 | 610 | ||
| 611 | Bit 4 (read): XLF_EFI_KEXEC | ||
| 612 | - If 1, the kernel supports kexec EFI boot with EFI runtime support. | ||
| 613 | |||
| 611 | Field name: cmdline_size | 614 | Field name: cmdline_size |
| 612 | Type: read | 615 | Type: read |
| 613 | Offset/size: 0x238/4 | 616 | Offset/size: 0x238/4 |
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 881582f75c9c..c584a51add15 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt | |||
| @@ -28,4 +28,11 @@ reference. | |||
| 28 | Current X86-64 implementations only support 40 bits of address space, | 28 | Current X86-64 implementations only support 40 bits of address space, |
| 29 | but we support up to 46 bits. This expands into MBZ space in the page tables. | 29 | but we support up to 46 bits. This expands into MBZ space in the page tables. |
| 30 | 30 | ||
| 31 | ->trampoline_pgd: | ||
| 32 | |||
| 33 | We map EFI runtime services in the aforementioned PGD in the virtual | ||
| 34 | range of 64Gb (arbitrarily set, can be raised if needed) | ||
| 35 | |||
| 36 | 0xffffffef00000000 - 0xffffffff00000000 | ||
| 37 | |||
| 31 | -Andi Kleen, Jul 2004 | 38 | -Andi Kleen, Jul 2004 |
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 9ec06a1f6d61..ec3b8ba68096 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S | |||
| @@ -391,7 +391,14 @@ xloadflags: | |||
| 391 | #else | 391 | #else |
| 392 | # define XLF23 0 | 392 | # define XLF23 0 |
| 393 | #endif | 393 | #endif |
| 394 | .word XLF0 | XLF1 | XLF23 | 394 | |
| 395 | #if defined(CONFIG_X86_64) && defined(CONFIG_EFI) && defined(CONFIG_KEXEC) | ||
| 396 | # define XLF4 XLF_EFI_KEXEC | ||
| 397 | #else | ||
| 398 | # define XLF4 0 | ||
| 399 | #endif | ||
| 400 | |||
| 401 | .word XLF0 | XLF1 | XLF23 | XLF4 | ||
| 395 | 402 | ||
| 396 | cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, | 403 | cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, |
| 397 | #added with boot protocol | 404 | #added with boot protocol |
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 65c6e6e3a552..3b978c472d08 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h | |||
| @@ -1,6 +1,24 @@ | |||
| 1 | #ifndef _ASM_X86_EFI_H | 1 | #ifndef _ASM_X86_EFI_H |
| 2 | #define _ASM_X86_EFI_H | 2 | #define _ASM_X86_EFI_H |
| 3 | 3 | ||
| 4 | /* | ||
| 5 | * We map the EFI regions needed for runtime services non-contiguously, | ||
| 6 | * with preserved alignment on virtual addresses starting from -4G down | ||
| 7 | * for a total max space of 64G. This way, we provide for stable runtime | ||
| 8 | * services addresses across kernels so that a kexec'd kernel can still | ||
| 9 | * use them. | ||
| 10 | * | ||
| 11 | * This is the main reason why we're doing stable VA mappings for RT | ||
| 12 | * services. | ||
| 13 | * | ||
| 14 | * This flag is used in conjuction with a chicken bit called | ||
| 15 | * "efi=old_map" which can be used as a fallback to the old runtime | ||
| 16 | * services mapping method in case there's some b0rkage with a | ||
| 17 | * particular EFI implementation (haha, it is hard to hold up the | ||
| 18 | * sarcasm here...). | ||
| 19 | */ | ||
| 20 | #define EFI_OLD_MEMMAP EFI_ARCH_1 | ||
| 21 | |||
| 4 | #ifdef CONFIG_X86_32 | 22 | #ifdef CONFIG_X86_32 |
| 5 | 23 | ||
| 6 | #define EFI_LOADER_SIGNATURE "EL32" | 24 | #define EFI_LOADER_SIGNATURE "EL32" |
| @@ -69,24 +87,31 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3, | |||
| 69 | efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3), \ | 87 | efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3), \ |
| 70 | (u64)(a4), (u64)(a5), (u64)(a6)) | 88 | (u64)(a4), (u64)(a5), (u64)(a6)) |
| 71 | 89 | ||
| 90 | #define _efi_call_virtX(x, f, ...) \ | ||
| 91 | ({ \ | ||
| 92 | efi_status_t __s; \ | ||
| 93 | \ | ||
| 94 | efi_sync_low_kernel_mappings(); \ | ||
| 95 | preempt_disable(); \ | ||
| 96 | __s = efi_call##x((void *)efi.systab->runtime->f, __VA_ARGS__); \ | ||
| 97 | preempt_enable(); \ | ||
| 98 | __s; \ | ||
| 99 | }) | ||
| 100 | |||
| 72 | #define efi_call_virt0(f) \ | 101 | #define efi_call_virt0(f) \ |
| 73 | efi_call0((efi.systab->runtime->f)) | 102 | _efi_call_virtX(0, f) |
| 74 | #define efi_call_virt1(f, a1) \ | 103 | #define efi_call_virt1(f, a1) \ |
| 75 | efi_call1((efi.systab->runtime->f), (u64)(a1)) | 104 | _efi_call_virtX(1, f, (u64)(a1)) |
| 76 | #define efi_call_virt2(f, a1, a2) \ | 105 | #define efi_call_virt2(f, a1, a2) \ |
| 77 | efi_call2((efi.systab->runtime->f), (u64)(a1), (u64)(a2)) | 106 | _efi_call_virtX(2, f, (u64)(a1), (u64)(a2)) |
| 78 | #define efi_call_virt3(f, a1, a2, a3) \ | 107 | #define efi_call_virt3(f, a1, a2, a3) \ |
| 79 | efi_call3((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ | 108 | _efi_call_virtX(3, f, (u64)(a1), (u64)(a2), (u64)(a3)) |
| 80 | (u64)(a3)) | 109 | #define efi_call_virt4(f, a1, a2, a3, a4) \ |
| 81 | #define efi_call_virt4(f, a1, a2, a3, a4) \ | 110 | _efi_call_virtX(4, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4)) |
| 82 | efi_call4((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ | 111 | #define efi_call_virt5(f, a1, a2, a3, a4, a5) \ |
| 83 | (u64)(a3), (u64)(a4)) | 112 | _efi_call_virtX(5, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5)) |
| 84 | #define efi_call_virt5(f, a1, a2, a3, a4, a5) \ | 113 | #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ |
| 85 | efi_call5((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ | 114 | _efi_call_virtX(6, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) |
| 86 | (u64)(a3), (u64)(a4), (u64)(a5)) | ||
| 87 | #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ | ||
| 88 | efi_call6((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ | ||
| 89 | (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) | ||
| 90 | 115 | ||
| 91 | extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, | 116 | extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, |
| 92 | u32 type, u64 attribute); | 117 | u32 type, u64 attribute); |
| @@ -95,12 +120,28 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, | |||
| 95 | 120 | ||
| 96 | extern int add_efi_memmap; | 121 | extern int add_efi_memmap; |
| 97 | extern unsigned long x86_efi_facility; | 122 | extern unsigned long x86_efi_facility; |
| 123 | extern struct efi_scratch efi_scratch; | ||
| 98 | extern void efi_set_executable(efi_memory_desc_t *md, bool executable); | 124 | extern void efi_set_executable(efi_memory_desc_t *md, bool executable); |
| 99 | extern int efi_memblock_x86_reserve_range(void); | 125 | extern int efi_memblock_x86_reserve_range(void); |
| 100 | extern void efi_call_phys_prelog(void); | 126 | extern void efi_call_phys_prelog(void); |
| 101 | extern void efi_call_phys_epilog(void); | 127 | extern void efi_call_phys_epilog(void); |
| 102 | extern void efi_unmap_memmap(void); | 128 | extern void efi_unmap_memmap(void); |
| 103 | extern void efi_memory_uc(u64 addr, unsigned long size); | 129 | extern void efi_memory_uc(u64 addr, unsigned long size); |
| 130 | extern void __init efi_map_region(efi_memory_desc_t *md); | ||
| 131 | extern void __init efi_map_region_fixed(efi_memory_desc_t *md); | ||
| 132 | extern void efi_sync_low_kernel_mappings(void); | ||
| 133 | extern void efi_setup_page_tables(void); | ||
| 134 | extern void __init old_map_region(efi_memory_desc_t *md); | ||
| 135 | |||
| 136 | struct efi_setup_data { | ||
| 137 | u64 fw_vendor; | ||
| 138 | u64 runtime; | ||
| 139 | u64 tables; | ||
| 140 | u64 smbios; | ||
| 141 | u64 reserved[8]; | ||
| 142 | }; | ||
| 143 | |||
| 144 | extern u64 efi_setup; | ||
| 104 | 145 | ||
| 105 | #ifdef CONFIG_EFI | 146 | #ifdef CONFIG_EFI |
| 106 | 147 | ||
| @@ -110,7 +151,7 @@ static inline bool efi_is_native(void) | |||
| 110 | } | 151 | } |
| 111 | 152 | ||
| 112 | extern struct console early_efi_console; | 153 | extern struct console early_efi_console; |
| 113 | 154 | extern void parse_efi_setup(u64 phys_addr, u32 data_len); | |
| 114 | #else | 155 | #else |
| 115 | /* | 156 | /* |
| 116 | * IF EFI is not configured, have the EFI calls return -ENOSYS. | 157 | * IF EFI is not configured, have the EFI calls return -ENOSYS. |
| @@ -122,6 +163,7 @@ extern struct console early_efi_console; | |||
| 122 | #define efi_call4(_f, _a1, _a2, _a3, _a4) (-ENOSYS) | 163 | #define efi_call4(_f, _a1, _a2, _a3, _a4) (-ENOSYS) |
| 123 | #define efi_call5(_f, _a1, _a2, _a3, _a4, _a5) (-ENOSYS) | 164 | #define efi_call5(_f, _a1, _a2, _a3, _a4, _a5) (-ENOSYS) |
| 124 | #define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6) (-ENOSYS) | 165 | #define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6) (-ENOSYS) |
| 166 | static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {} | ||
| 125 | #endif /* CONFIG_EFI */ | 167 | #endif /* CONFIG_EFI */ |
| 126 | 168 | ||
| 127 | #endif /* _ASM_X86_EFI_H */ | 169 | #endif /* _ASM_X86_EFI_H */ |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 0ecac257fb26..a83aa44bb1fb 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
| @@ -382,7 +382,8 @@ static inline void update_page_count(int level, unsigned long pages) { } | |||
| 382 | */ | 382 | */ |
| 383 | extern pte_t *lookup_address(unsigned long address, unsigned int *level); | 383 | extern pte_t *lookup_address(unsigned long address, unsigned int *level); |
| 384 | extern phys_addr_t slow_virt_to_phys(void *__address); | 384 | extern phys_addr_t slow_virt_to_phys(void *__address); |
| 385 | 385 | extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, | |
| 386 | unsigned numpages, unsigned long page_flags); | ||
| 386 | #endif /* !__ASSEMBLY__ */ | 387 | #endif /* !__ASSEMBLY__ */ |
| 387 | 388 | ||
| 388 | #endif /* _ASM_X86_PGTABLE_DEFS_H */ | 389 | #endif /* _ASM_X86_PGTABLE_DEFS_H */ |
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 9c3733c5f8f7..225b0988043a 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #define SETUP_E820_EXT 1 | 6 | #define SETUP_E820_EXT 1 |
| 7 | #define SETUP_DTB 2 | 7 | #define SETUP_DTB 2 |
| 8 | #define SETUP_PCI 3 | 8 | #define SETUP_PCI 3 |
| 9 | #define SETUP_EFI 4 | ||
| 9 | 10 | ||
| 10 | /* ram_size flags */ | 11 | /* ram_size flags */ |
| 11 | #define RAMDISK_IMAGE_START_MASK 0x07FF | 12 | #define RAMDISK_IMAGE_START_MASK 0x07FF |
| @@ -23,6 +24,7 @@ | |||
| 23 | #define XLF_CAN_BE_LOADED_ABOVE_4G (1<<1) | 24 | #define XLF_CAN_BE_LOADED_ABOVE_4G (1<<1) |
| 24 | #define XLF_EFI_HANDOVER_32 (1<<2) | 25 | #define XLF_EFI_HANDOVER_32 (1<<2) |
| 25 | #define XLF_EFI_HANDOVER_64 (1<<3) | 26 | #define XLF_EFI_HANDOVER_64 (1<<3) |
| 27 | #define XLF_EFI_KEXEC (1<<4) | ||
| 26 | 28 | ||
| 27 | #ifndef __ASSEMBLY__ | 29 | #ifndef __ASSEMBLY__ |
| 28 | 30 | ||
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 9b0a34e2cd79..510cca5c5390 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
| @@ -29,6 +29,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | |||
| 29 | obj-y += syscall_$(BITS).o | 29 | obj-y += syscall_$(BITS).o |
| 30 | obj-$(CONFIG_X86_64) += vsyscall_64.o | 30 | obj-$(CONFIG_X86_64) += vsyscall_64.o |
| 31 | obj-$(CONFIG_X86_64) += vsyscall_emu_64.o | 31 | obj-$(CONFIG_X86_64) += vsyscall_emu_64.o |
| 32 | obj-$(CONFIG_SYSFS) += ksysfs.o | ||
| 32 | obj-y += bootflag.o e820.o | 33 | obj-y += bootflag.o e820.o |
| 33 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o | 34 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o |
| 34 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o | 35 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o |
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c new file mode 100644 index 000000000000..c2bedaea11f7 --- /dev/null +++ b/arch/x86/kernel/ksysfs.c | |||
| @@ -0,0 +1,340 @@ | |||
| 1 | /* | ||
| 2 | * Architecture specific sysfs attributes in /sys/kernel | ||
| 3 | * | ||
| 4 | * Copyright (C) 2007, Intel Corp. | ||
| 5 | * Huang Ying <ying.huang@intel.com> | ||
| 6 | * Copyright (C) 2013, 2013 Red Hat, Inc. | ||
| 7 | * Dave Young <dyoung@redhat.com> | ||
| 8 | * | ||
| 9 | * This file is released under the GPLv2 | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/kobject.h> | ||
| 13 | #include <linux/string.h> | ||
| 14 | #include <linux/sysfs.h> | ||
| 15 | #include <linux/init.h> | ||
| 16 | #include <linux/stat.h> | ||
| 17 | #include <linux/slab.h> | ||
| 18 | #include <linux/mm.h> | ||
| 19 | |||
| 20 | #include <asm/io.h> | ||
| 21 | #include <asm/setup.h> | ||
| 22 | |||
| 23 | static ssize_t version_show(struct kobject *kobj, | ||
| 24 | struct kobj_attribute *attr, char *buf) | ||
| 25 | { | ||
| 26 | return sprintf(buf, "0x%04x\n", boot_params.hdr.version); | ||
| 27 | } | ||
| 28 | |||
| 29 | static struct kobj_attribute boot_params_version_attr = __ATTR_RO(version); | ||
| 30 | |||
| 31 | static ssize_t boot_params_data_read(struct file *fp, struct kobject *kobj, | ||
| 32 | struct bin_attribute *bin_attr, | ||
| 33 | char *buf, loff_t off, size_t count) | ||
| 34 | { | ||
| 35 | memcpy(buf, (void *)&boot_params + off, count); | ||
| 36 | return count; | ||
| 37 | } | ||
| 38 | |||
| 39 | static struct bin_attribute boot_params_data_attr = { | ||
| 40 | .attr = { | ||
| 41 | .name = "data", | ||
| 42 | .mode = S_IRUGO, | ||
| 43 | }, | ||
| 44 | .read = boot_params_data_read, | ||
| 45 | .size = sizeof(boot_params), | ||
| 46 | }; | ||
| 47 | |||
| 48 | static struct attribute *boot_params_version_attrs[] = { | ||
| 49 | &boot_params_version_attr.attr, | ||
| 50 | NULL, | ||
| 51 | }; | ||
| 52 | |||
| 53 | static struct bin_attribute *boot_params_data_attrs[] = { | ||
| 54 | &boot_params_data_attr, | ||
| 55 | NULL, | ||
| 56 | }; | ||
| 57 | |||
| 58 | static struct attribute_group boot_params_attr_group = { | ||
| 59 | .attrs = boot_params_version_attrs, | ||
| 60 | .bin_attrs = boot_params_data_attrs, | ||
| 61 | }; | ||
| 62 | |||
| 63 | static int kobj_to_setup_data_nr(struct kobject *kobj, int *nr) | ||
| 64 | { | ||
| 65 | const char *name; | ||
| 66 | |||
| 67 | name = kobject_name(kobj); | ||
| 68 | return kstrtoint(name, 10, nr); | ||
| 69 | } | ||
| 70 | |||
| 71 | static int get_setup_data_paddr(int nr, u64 *paddr) | ||
| 72 | { | ||
| 73 | int i = 0; | ||
| 74 | struct setup_data *data; | ||
| 75 | u64 pa_data = boot_params.hdr.setup_data; | ||
| 76 | |||
| 77 | while (pa_data) { | ||
| 78 | if (nr == i) { | ||
| 79 | *paddr = pa_data; | ||
| 80 | return 0; | ||
| 81 | } | ||
| 82 | data = ioremap_cache(pa_data, sizeof(*data)); | ||
| 83 | if (!data) | ||
| 84 | return -ENOMEM; | ||
| 85 | |||
| 86 | pa_data = data->next; | ||
| 87 | iounmap(data); | ||
| 88 | i++; | ||
| 89 | } | ||
| 90 | return -EINVAL; | ||
| 91 | } | ||
| 92 | |||
| 93 | static int __init get_setup_data_size(int nr, size_t *size) | ||
| 94 | { | ||
| 95 | int i = 0; | ||
| 96 | struct setup_data *data; | ||
| 97 | u64 pa_data = boot_params.hdr.setup_data; | ||
| 98 | |||
| 99 | while (pa_data) { | ||
| 100 | data = ioremap_cache(pa_data, sizeof(*data)); | ||
| 101 | if (!data) | ||
| 102 | return -ENOMEM; | ||
| 103 | if (nr == i) { | ||
| 104 | *size = data->len; | ||
| 105 | iounmap(data); | ||
| 106 | return 0; | ||
| 107 | } | ||
| 108 | |||
| 109 | pa_data = data->next; | ||
| 110 | iounmap(data); | ||
| 111 | i++; | ||
| 112 | } | ||
| 113 | return -EINVAL; | ||
| 114 | } | ||
| 115 | |||
| 116 | static ssize_t type_show(struct kobject *kobj, | ||
| 117 | struct kobj_attribute *attr, char *buf) | ||
| 118 | { | ||
| 119 | int nr, ret; | ||
| 120 | u64 paddr; | ||
| 121 | struct setup_data *data; | ||
| 122 | |||
| 123 | ret = kobj_to_setup_data_nr(kobj, &nr); | ||
| 124 | if (ret) | ||
| 125 | return ret; | ||
| 126 | |||
| 127 | ret = get_setup_data_paddr(nr, &paddr); | ||
| 128 | if (ret) | ||
| 129 | return ret; | ||
| 130 | data = ioremap_cache(paddr, sizeof(*data)); | ||
| 131 | if (!data) | ||
| 132 | return -ENOMEM; | ||
| 133 | |||
| 134 | ret = sprintf(buf, "0x%x\n", data->type); | ||
| 135 | iounmap(data); | ||
| 136 | return ret; | ||
| 137 | } | ||
| 138 | |||
| 139 | static ssize_t setup_data_data_read(struct file *fp, | ||
| 140 | struct kobject *kobj, | ||
| 141 | struct bin_attribute *bin_attr, | ||
| 142 | char *buf, | ||
| 143 | loff_t off, size_t count) | ||
| 144 | { | ||
| 145 | int nr, ret = 0; | ||
| 146 | u64 paddr; | ||
| 147 | struct setup_data *data; | ||
| 148 | void *p; | ||
| 149 | |||
| 150 | ret = kobj_to_setup_data_nr(kobj, &nr); | ||
| 151 | if (ret) | ||
| 152 | return ret; | ||
| 153 | |||
| 154 | ret = get_setup_data_paddr(nr, &paddr); | ||
| 155 | if (ret) | ||
| 156 | return ret; | ||
| 157 | data = ioremap_cache(paddr, sizeof(*data)); | ||
| 158 | if (!data) | ||
| 159 | return -ENOMEM; | ||
| 160 | |||
| 161 | if (off > data->len) { | ||
| 162 | ret = -EINVAL; | ||
| 163 | goto out; | ||
| 164 | } | ||
| 165 | |||
| 166 | if (count > data->len - off) | ||
| 167 | count = data->len - off; | ||
| 168 | |||
| 169 | if (!count) | ||
| 170 | goto out; | ||
| 171 | |||
| 172 | ret = count; | ||
| 173 | p = ioremap_cache(paddr + sizeof(*data), data->len); | ||
| 174 | if (!p) { | ||
| 175 | ret = -ENOMEM; | ||
| 176 | goto out; | ||
| 177 | } | ||
| 178 | memcpy(buf, p + off, count); | ||
| 179 | iounmap(p); | ||
| 180 | out: | ||
| 181 | iounmap(data); | ||
| 182 | return ret; | ||
| 183 | } | ||
| 184 | |||
| 185 | static struct kobj_attribute type_attr = __ATTR_RO(type); | ||
| 186 | |||
| 187 | static struct bin_attribute data_attr = { | ||
| 188 | .attr = { | ||
| 189 | .name = "data", | ||
| 190 | .mode = S_IRUGO, | ||
| 191 | }, | ||
| 192 | .read = setup_data_data_read, | ||
| 193 | }; | ||
| 194 | |||
| 195 | static struct attribute *setup_data_type_attrs[] = { | ||
| 196 | &type_attr.attr, | ||
| 197 | NULL, | ||
| 198 | }; | ||
| 199 | |||
| 200 | static struct bin_attribute *setup_data_data_attrs[] = { | ||
| 201 | &data_attr, | ||
| 202 | NULL, | ||
| 203 | }; | ||
| 204 | |||
| 205 | static struct attribute_group setup_data_attr_group = { | ||
| 206 | .attrs = setup_data_type_attrs, | ||
| 207 | .bin_attrs = setup_data_data_attrs, | ||
| 208 | }; | ||
| 209 | |||
| 210 | static int __init create_setup_data_node(struct kobject *parent, | ||
| 211 | struct kobject **kobjp, int nr) | ||
| 212 | { | ||
| 213 | int ret = 0; | ||
| 214 | size_t size; | ||
| 215 | struct kobject *kobj; | ||
| 216 | char name[16]; /* should be enough for setup_data nodes numbers */ | ||
| 217 | snprintf(name, 16, "%d", nr); | ||
| 218 | |||
| 219 | kobj = kobject_create_and_add(name, parent); | ||
| 220 | if (!kobj) | ||
| 221 | return -ENOMEM; | ||
| 222 | |||
| 223 | ret = get_setup_data_size(nr, &size); | ||
| 224 | if (ret) | ||
| 225 | goto out_kobj; | ||
| 226 | |||
| 227 | data_attr.size = size; | ||
| 228 | ret = sysfs_create_group(kobj, &setup_data_attr_group); | ||
| 229 | if (ret) | ||
| 230 | goto out_kobj; | ||
| 231 | *kobjp = kobj; | ||
| 232 | |||
| 233 | return 0; | ||
| 234 | out_kobj: | ||
| 235 | kobject_put(kobj); | ||
| 236 | return ret; | ||
| 237 | } | ||
| 238 | |||
| 239 | static void __init cleanup_setup_data_node(struct kobject *kobj) | ||
| 240 | { | ||
| 241 | sysfs_remove_group(kobj, &setup_data_attr_group); | ||
| 242 | kobject_put(kobj); | ||
| 243 | } | ||
| 244 | |||
| 245 | static int __init get_setup_data_total_num(u64 pa_data, int *nr) | ||
| 246 | { | ||
| 247 | int ret = 0; | ||
| 248 | struct setup_data *data; | ||
| 249 | |||
| 250 | *nr = 0; | ||
| 251 | while (pa_data) { | ||
| 252 | *nr += 1; | ||
| 253 | data = ioremap_cache(pa_data, sizeof(*data)); | ||
| 254 | if (!data) { | ||
| 255 | ret = -ENOMEM; | ||
| 256 | goto out; | ||
| 257 | } | ||
| 258 | pa_data = data->next; | ||
| 259 | iounmap(data); | ||
| 260 | } | ||
| 261 | |||
| 262 | out: | ||
| 263 | return ret; | ||
| 264 | } | ||
| 265 | |||
| 266 | static int __init create_setup_data_nodes(struct kobject *parent) | ||
| 267 | { | ||
| 268 | struct kobject *setup_data_kobj, **kobjp; | ||
| 269 | u64 pa_data; | ||
| 270 | int i, j, nr, ret = 0; | ||
| 271 | |||
| 272 | pa_data = boot_params.hdr.setup_data; | ||
| 273 | if (!pa_data) | ||
| 274 | return 0; | ||
| 275 | |||
| 276 | setup_data_kobj = kobject_create_and_add("setup_data", parent); | ||
| 277 | if (!setup_data_kobj) { | ||
| 278 | ret = -ENOMEM; | ||
| 279 | goto out; | ||
| 280 | } | ||
| 281 | |||
| 282 | ret = get_setup_data_total_num(pa_data, &nr); | ||
| 283 | if (ret) | ||
| 284 | goto out_setup_data_kobj; | ||
| 285 | |||
| 286 | kobjp = kmalloc(sizeof(*kobjp) * nr, GFP_KERNEL); | ||
| 287 | if (!kobjp) { | ||
| 288 | ret = -ENOMEM; | ||
| 289 | goto out_setup_data_kobj; | ||
| 290 | } | ||
| 291 | |||
| 292 | for (i = 0; i < nr; i++) { | ||
| 293 | ret = create_setup_data_node(setup_data_kobj, kobjp + i, i); | ||
| 294 | if (ret) | ||
| 295 | goto out_clean_nodes; | ||
| 296 | } | ||
| 297 | |||
| 298 | kfree(kobjp); | ||
| 299 | return 0; | ||
| 300 | |||
| 301 | out_clean_nodes: | ||
| 302 | for (j = i - 1; j > 0; j--) | ||
| 303 | cleanup_setup_data_node(*(kobjp + j)); | ||
| 304 | kfree(kobjp); | ||
| 305 | out_setup_data_kobj: | ||
| 306 | kobject_put(setup_data_kobj); | ||
| 307 | out: | ||
| 308 | return ret; | ||
| 309 | } | ||
| 310 | |||
| 311 | static int __init boot_params_ksysfs_init(void) | ||
| 312 | { | ||
| 313 | int ret; | ||
| 314 | struct kobject *boot_params_kobj; | ||
| 315 | |||
| 316 | boot_params_kobj = kobject_create_and_add("boot_params", | ||
| 317 | kernel_kobj); | ||
| 318 | if (!boot_params_kobj) { | ||
| 319 | ret = -ENOMEM; | ||
| 320 | goto out; | ||
| 321 | } | ||
| 322 | |||
| 323 | ret = sysfs_create_group(boot_params_kobj, &boot_params_attr_group); | ||
| 324 | if (ret) | ||
| 325 | goto out_boot_params_kobj; | ||
| 326 | |||
| 327 | ret = create_setup_data_nodes(boot_params_kobj); | ||
| 328 | if (ret) | ||
| 329 | goto out_create_group; | ||
| 330 | |||
| 331 | return 0; | ||
| 332 | out_create_group: | ||
| 333 | sysfs_remove_group(boot_params_kobj, &boot_params_attr_group); | ||
| 334 | out_boot_params_kobj: | ||
| 335 | kobject_put(boot_params_kobj); | ||
| 336 | out: | ||
| 337 | return ret; | ||
| 338 | } | ||
| 339 | |||
| 340 | arch_initcall(boot_params_ksysfs_init); | ||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index cb233bc9dee3..be4b456e444b 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
| @@ -447,6 +447,9 @@ static void __init parse_setup_data(void) | |||
| 447 | case SETUP_DTB: | 447 | case SETUP_DTB: |
| 448 | add_dtb(pa_data); | 448 | add_dtb(pa_data); |
| 449 | break; | 449 | break; |
| 450 | case SETUP_EFI: | ||
| 451 | parse_efi_setup(pa_data, data_len); | ||
| 452 | break; | ||
| 450 | default: | 453 | default: |
| 451 | break; | 454 | break; |
| 452 | } | 455 | } |
| @@ -924,8 +927,6 @@ void __init setup_arch(char **cmdline_p) | |||
| 924 | iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; | 927 | iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; |
| 925 | setup_memory_map(); | 928 | setup_memory_map(); |
| 926 | parse_setup_data(); | 929 | parse_setup_data(); |
| 927 | /* update the e820_saved too */ | ||
| 928 | e820_reserve_setup_data(); | ||
| 929 | 930 | ||
| 930 | copy_edd(); | 931 | copy_edd(); |
| 931 | 932 | ||
| @@ -987,6 +988,8 @@ void __init setup_arch(char **cmdline_p) | |||
| 987 | early_dump_pci_devices(); | 988 | early_dump_pci_devices(); |
| 988 | #endif | 989 | #endif |
| 989 | 990 | ||
| 991 | /* update the e820_saved too */ | ||
| 992 | e820_reserve_setup_data(); | ||
| 990 | finish_e820_parsing(); | 993 | finish_e820_parsing(); |
| 991 | 994 | ||
| 992 | if (efi_enabled(EFI_BOOT)) | 995 | if (efi_enabled(EFI_BOOT)) |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index bb32480c2d71..b3b19f46c016 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
| @@ -30,6 +30,7 @@ | |||
| 30 | */ | 30 | */ |
| 31 | struct cpa_data { | 31 | struct cpa_data { |
| 32 | unsigned long *vaddr; | 32 | unsigned long *vaddr; |
| 33 | pgd_t *pgd; | ||
| 33 | pgprot_t mask_set; | 34 | pgprot_t mask_set; |
| 34 | pgprot_t mask_clr; | 35 | pgprot_t mask_clr; |
| 35 | int numpages; | 36 | int numpages; |
| @@ -322,17 +323,9 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
| 322 | return prot; | 323 | return prot; |
| 323 | } | 324 | } |
| 324 | 325 | ||
| 325 | /* | 326 | static pte_t *__lookup_address_in_pgd(pgd_t *pgd, unsigned long address, |
| 326 | * Lookup the page table entry for a virtual address. Return a pointer | 327 | unsigned int *level) |
| 327 | * to the entry and the level of the mapping. | ||
| 328 | * | ||
| 329 | * Note: We return pud and pmd either when the entry is marked large | ||
| 330 | * or when the present bit is not set. Otherwise we would return a | ||
| 331 | * pointer to a nonexisting mapping. | ||
| 332 | */ | ||
| 333 | pte_t *lookup_address(unsigned long address, unsigned int *level) | ||
| 334 | { | 328 | { |
| 335 | pgd_t *pgd = pgd_offset_k(address); | ||
| 336 | pud_t *pud; | 329 | pud_t *pud; |
| 337 | pmd_t *pmd; | 330 | pmd_t *pmd; |
| 338 | 331 | ||
| @@ -361,8 +354,31 @@ pte_t *lookup_address(unsigned long address, unsigned int *level) | |||
| 361 | 354 | ||
| 362 | return pte_offset_kernel(pmd, address); | 355 | return pte_offset_kernel(pmd, address); |
| 363 | } | 356 | } |
| 357 | |||
| 358 | /* | ||
| 359 | * Lookup the page table entry for a virtual address. Return a pointer | ||
| 360 | * to the entry and the level of the mapping. | ||
| 361 | * | ||
| 362 | * Note: We return pud and pmd either when the entry is marked large | ||
| 363 | * or when the present bit is not set. Otherwise we would return a | ||
| 364 | * pointer to a nonexisting mapping. | ||
| 365 | */ | ||
| 366 | pte_t *lookup_address(unsigned long address, unsigned int *level) | ||
| 367 | { | ||
| 368 | return __lookup_address_in_pgd(pgd_offset_k(address), address, level); | ||
| 369 | } | ||
| 364 | EXPORT_SYMBOL_GPL(lookup_address); | 370 | EXPORT_SYMBOL_GPL(lookup_address); |
| 365 | 371 | ||
| 372 | static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address, | ||
| 373 | unsigned int *level) | ||
| 374 | { | ||
| 375 | if (cpa->pgd) | ||
| 376 | return __lookup_address_in_pgd(cpa->pgd + pgd_index(address), | ||
| 377 | address, level); | ||
| 378 | |||
| 379 | return lookup_address(address, level); | ||
| 380 | } | ||
| 381 | |||
| 366 | /* | 382 | /* |
| 367 | * This is necessary because __pa() does not work on some | 383 | * This is necessary because __pa() does not work on some |
| 368 | * kinds of memory, like vmalloc() or the alloc_remap() | 384 | * kinds of memory, like vmalloc() or the alloc_remap() |
| @@ -437,7 +453,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
| 437 | * Check for races, another CPU might have split this page | 453 | * Check for races, another CPU might have split this page |
| 438 | * up already: | 454 | * up already: |
| 439 | */ | 455 | */ |
| 440 | tmp = lookup_address(address, &level); | 456 | tmp = _lookup_address_cpa(cpa, address, &level); |
| 441 | if (tmp != kpte) | 457 | if (tmp != kpte) |
| 442 | goto out_unlock; | 458 | goto out_unlock; |
| 443 | 459 | ||
| @@ -543,7 +559,8 @@ out_unlock: | |||
| 543 | } | 559 | } |
| 544 | 560 | ||
| 545 | static int | 561 | static int |
| 546 | __split_large_page(pte_t *kpte, unsigned long address, struct page *base) | 562 | __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, |
| 563 | struct page *base) | ||
| 547 | { | 564 | { |
| 548 | pte_t *pbase = (pte_t *)page_address(base); | 565 | pte_t *pbase = (pte_t *)page_address(base); |
| 549 | unsigned long pfn, pfninc = 1; | 566 | unsigned long pfn, pfninc = 1; |
| @@ -556,7 +573,7 @@ __split_large_page(pte_t *kpte, unsigned long address, struct page *base) | |||
| 556 | * Check for races, another CPU might have split this page | 573 | * Check for races, another CPU might have split this page |
| 557 | * up for us already: | 574 | * up for us already: |
| 558 | */ | 575 | */ |
| 559 | tmp = lookup_address(address, &level); | 576 | tmp = _lookup_address_cpa(cpa, address, &level); |
| 560 | if (tmp != kpte) { | 577 | if (tmp != kpte) { |
| 561 | spin_unlock(&pgd_lock); | 578 | spin_unlock(&pgd_lock); |
| 562 | return 1; | 579 | return 1; |
| @@ -632,7 +649,8 @@ __split_large_page(pte_t *kpte, unsigned long address, struct page *base) | |||
| 632 | return 0; | 649 | return 0; |
| 633 | } | 650 | } |
| 634 | 651 | ||
| 635 | static int split_large_page(pte_t *kpte, unsigned long address) | 652 | static int split_large_page(struct cpa_data *cpa, pte_t *kpte, |
| 653 | unsigned long address) | ||
| 636 | { | 654 | { |
| 637 | struct page *base; | 655 | struct page *base; |
| 638 | 656 | ||
| @@ -644,15 +662,390 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
| 644 | if (!base) | 662 | if (!base) |
| 645 | return -ENOMEM; | 663 | return -ENOMEM; |
| 646 | 664 | ||
| 647 | if (__split_large_page(kpte, address, base)) | 665 | if (__split_large_page(cpa, kpte, address, base)) |
| 648 | __free_page(base); | 666 | __free_page(base); |
| 649 | 667 | ||
| 650 | return 0; | 668 | return 0; |
| 651 | } | 669 | } |
| 652 | 670 | ||
| 671 | static bool try_to_free_pte_page(pte_t *pte) | ||
| 672 | { | ||
| 673 | int i; | ||
| 674 | |||
| 675 | for (i = 0; i < PTRS_PER_PTE; i++) | ||
| 676 | if (!pte_none(pte[i])) | ||
| 677 | return false; | ||
| 678 | |||
| 679 | free_page((unsigned long)pte); | ||
| 680 | return true; | ||
| 681 | } | ||
| 682 | |||
| 683 | static bool try_to_free_pmd_page(pmd_t *pmd) | ||
| 684 | { | ||
| 685 | int i; | ||
| 686 | |||
| 687 | for (i = 0; i < PTRS_PER_PMD; i++) | ||
| 688 | if (!pmd_none(pmd[i])) | ||
| 689 | return false; | ||
| 690 | |||
| 691 | free_page((unsigned long)pmd); | ||
| 692 | return true; | ||
| 693 | } | ||
| 694 | |||
| 695 | static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end) | ||
| 696 | { | ||
| 697 | pte_t *pte = pte_offset_kernel(pmd, start); | ||
| 698 | |||
| 699 | while (start < end) { | ||
| 700 | set_pte(pte, __pte(0)); | ||
| 701 | |||
| 702 | start += PAGE_SIZE; | ||
| 703 | pte++; | ||
| 704 | } | ||
| 705 | |||
| 706 | if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) { | ||
| 707 | pmd_clear(pmd); | ||
| 708 | return true; | ||
| 709 | } | ||
| 710 | return false; | ||
| 711 | } | ||
| 712 | |||
| 713 | static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd, | ||
| 714 | unsigned long start, unsigned long end) | ||
| 715 | { | ||
| 716 | if (unmap_pte_range(pmd, start, end)) | ||
| 717 | if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) | ||
| 718 | pud_clear(pud); | ||
| 719 | } | ||
| 720 | |||
| 721 | static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) | ||
| 722 | { | ||
| 723 | pmd_t *pmd = pmd_offset(pud, start); | ||
| 724 | |||
| 725 | /* | ||
| 726 | * Not on a 2MB page boundary? | ||
| 727 | */ | ||
| 728 | if (start & (PMD_SIZE - 1)) { | ||
| 729 | unsigned long next_page = (start + PMD_SIZE) & PMD_MASK; | ||
| 730 | unsigned long pre_end = min_t(unsigned long, end, next_page); | ||
| 731 | |||
| 732 | __unmap_pmd_range(pud, pmd, start, pre_end); | ||
| 733 | |||
| 734 | start = pre_end; | ||
| 735 | pmd++; | ||
| 736 | } | ||
| 737 | |||
| 738 | /* | ||
| 739 | * Try to unmap in 2M chunks. | ||
| 740 | */ | ||
| 741 | while (end - start >= PMD_SIZE) { | ||
| 742 | if (pmd_large(*pmd)) | ||
| 743 | pmd_clear(pmd); | ||
| 744 | else | ||
| 745 | __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE); | ||
| 746 | |||
| 747 | start += PMD_SIZE; | ||
| 748 | pmd++; | ||
| 749 | } | ||
| 750 | |||
| 751 | /* | ||
| 752 | * 4K leftovers? | ||
| 753 | */ | ||
| 754 | if (start < end) | ||
| 755 | return __unmap_pmd_range(pud, pmd, start, end); | ||
| 756 | |||
| 757 | /* | ||
| 758 | * Try again to free the PMD page if haven't succeeded above. | ||
| 759 | */ | ||
| 760 | if (!pud_none(*pud)) | ||
| 761 | if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) | ||
| 762 | pud_clear(pud); | ||
| 763 | } | ||
| 764 | |||
| 765 | static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) | ||
| 766 | { | ||
| 767 | pud_t *pud = pud_offset(pgd, start); | ||
| 768 | |||
| 769 | /* | ||
| 770 | * Not on a GB page boundary? | ||
| 771 | */ | ||
| 772 | if (start & (PUD_SIZE - 1)) { | ||
| 773 | unsigned long next_page = (start + PUD_SIZE) & PUD_MASK; | ||
| 774 | unsigned long pre_end = min_t(unsigned long, end, next_page); | ||
| 775 | |||
| 776 | unmap_pmd_range(pud, start, pre_end); | ||
| 777 | |||
| 778 | start = pre_end; | ||
| 779 | pud++; | ||
| 780 | } | ||
| 781 | |||
| 782 | /* | ||
| 783 | * Try to unmap in 1G chunks? | ||
| 784 | */ | ||
| 785 | while (end - start >= PUD_SIZE) { | ||
| 786 | |||
| 787 | if (pud_large(*pud)) | ||
| 788 | pud_clear(pud); | ||
| 789 | else | ||
| 790 | unmap_pmd_range(pud, start, start + PUD_SIZE); | ||
| 791 | |||
| 792 | start += PUD_SIZE; | ||
| 793 | pud++; | ||
| 794 | } | ||
| 795 | |||
| 796 | /* | ||
| 797 | * 2M leftovers? | ||
| 798 | */ | ||
| 799 | if (start < end) | ||
| 800 | unmap_pmd_range(pud, start, end); | ||
| 801 | |||
| 802 | /* | ||
| 803 | * No need to try to free the PUD page because we'll free it in | ||
| 804 | * populate_pgd's error path | ||
| 805 | */ | ||
| 806 | } | ||
| 807 | |||
| 808 | static int alloc_pte_page(pmd_t *pmd) | ||
| 809 | { | ||
| 810 | pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); | ||
| 811 | if (!pte) | ||
| 812 | return -1; | ||
| 813 | |||
| 814 | set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); | ||
| 815 | return 0; | ||
| 816 | } | ||
| 817 | |||
| 818 | static int alloc_pmd_page(pud_t *pud) | ||
| 819 | { | ||
| 820 | pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); | ||
| 821 | if (!pmd) | ||
| 822 | return -1; | ||
| 823 | |||
| 824 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | ||
| 825 | return 0; | ||
| 826 | } | ||
| 827 | |||
| 828 | static void populate_pte(struct cpa_data *cpa, | ||
| 829 | unsigned long start, unsigned long end, | ||
| 830 | unsigned num_pages, pmd_t *pmd, pgprot_t pgprot) | ||
| 831 | { | ||
| 832 | pte_t *pte; | ||
| 833 | |||
| 834 | pte = pte_offset_kernel(pmd, start); | ||
| 835 | |||
| 836 | while (num_pages-- && start < end) { | ||
| 837 | |||
| 838 | /* deal with the NX bit */ | ||
| 839 | if (!(pgprot_val(pgprot) & _PAGE_NX)) | ||
| 840 | cpa->pfn &= ~_PAGE_NX; | ||
| 841 | |||
| 842 | set_pte(pte, pfn_pte(cpa->pfn >> PAGE_SHIFT, pgprot)); | ||
| 843 | |||
| 844 | start += PAGE_SIZE; | ||
| 845 | cpa->pfn += PAGE_SIZE; | ||
| 846 | pte++; | ||
| 847 | } | ||
| 848 | } | ||
| 849 | |||
| 850 | static int populate_pmd(struct cpa_data *cpa, | ||
| 851 | unsigned long start, unsigned long end, | ||
| 852 | unsigned num_pages, pud_t *pud, pgprot_t pgprot) | ||
| 853 | { | ||
| 854 | unsigned int cur_pages = 0; | ||
| 855 | pmd_t *pmd; | ||
| 856 | |||
| 857 | /* | ||
| 858 | * Not on a 2M boundary? | ||
| 859 | */ | ||
| 860 | if (start & (PMD_SIZE - 1)) { | ||
| 861 | unsigned long pre_end = start + (num_pages << PAGE_SHIFT); | ||
| 862 | unsigned long next_page = (start + PMD_SIZE) & PMD_MASK; | ||
| 863 | |||
| 864 | pre_end = min_t(unsigned long, pre_end, next_page); | ||
| 865 | cur_pages = (pre_end - start) >> PAGE_SHIFT; | ||
| 866 | cur_pages = min_t(unsigned int, num_pages, cur_pages); | ||
| 867 | |||
| 868 | /* | ||
| 869 | * Need a PTE page? | ||
| 870 | */ | ||
| 871 | pmd = pmd_offset(pud, start); | ||
| 872 | if (pmd_none(*pmd)) | ||
| 873 | if (alloc_pte_page(pmd)) | ||
| 874 | return -1; | ||
| 875 | |||
| 876 | populate_pte(cpa, start, pre_end, cur_pages, pmd, pgprot); | ||
| 877 | |||
| 878 | start = pre_end; | ||
| 879 | } | ||
| 880 | |||
| 881 | /* | ||
| 882 | * We mapped them all? | ||
| 883 | */ | ||
| 884 | if (num_pages == cur_pages) | ||
| 885 | return cur_pages; | ||
| 886 | |||
| 887 | while (end - start >= PMD_SIZE) { | ||
| 888 | |||
| 889 | /* | ||
| 890 | * We cannot use a 1G page so allocate a PMD page if needed. | ||
| 891 | */ | ||
| 892 | if (pud_none(*pud)) | ||
| 893 | if (alloc_pmd_page(pud)) | ||
| 894 | return -1; | ||
| 895 | |||
| 896 | pmd = pmd_offset(pud, start); | ||
| 897 | |||
| 898 | set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot))); | ||
| 899 | |||
| 900 | start += PMD_SIZE; | ||
| 901 | cpa->pfn += PMD_SIZE; | ||
| 902 | cur_pages += PMD_SIZE >> PAGE_SHIFT; | ||
| 903 | } | ||
| 904 | |||
| 905 | /* | ||
| 906 | * Map trailing 4K pages. | ||
| 907 | */ | ||
| 908 | if (start < end) { | ||
| 909 | pmd = pmd_offset(pud, start); | ||
| 910 | if (pmd_none(*pmd)) | ||
| 911 | if (alloc_pte_page(pmd)) | ||
| 912 | return -1; | ||
| 913 | |||
| 914 | populate_pte(cpa, start, end, num_pages - cur_pages, | ||
| 915 | pmd, pgprot); | ||
| 916 | } | ||
| 917 | return num_pages; | ||
| 918 | } | ||
| 919 | |||
| 920 | static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, | ||
| 921 | pgprot_t pgprot) | ||
| 922 | { | ||
| 923 | pud_t *pud; | ||
| 924 | unsigned long end; | ||
| 925 | int cur_pages = 0; | ||
| 926 | |||
| 927 | end = start + (cpa->numpages << PAGE_SHIFT); | ||
| 928 | |||
| 929 | /* | ||
| 930 | * Not on a Gb page boundary? => map everything up to it with | ||
| 931 | * smaller pages. | ||
| 932 | */ | ||
| 933 | if (start & (PUD_SIZE - 1)) { | ||
| 934 | unsigned long pre_end; | ||
| 935 | unsigned long next_page = (start + PUD_SIZE) & PUD_MASK; | ||
| 936 | |||
| 937 | pre_end = min_t(unsigned long, end, next_page); | ||
| 938 | cur_pages = (pre_end - start) >> PAGE_SHIFT; | ||
| 939 | cur_pages = min_t(int, (int)cpa->numpages, cur_pages); | ||
| 940 | |||
| 941 | pud = pud_offset(pgd, start); | ||
| 942 | |||
| 943 | /* | ||
| 944 | * Need a PMD page? | ||
| 945 | */ | ||
| 946 | if (pud_none(*pud)) | ||
| 947 | if (alloc_pmd_page(pud)) | ||
| 948 | return -1; | ||
| 949 | |||
| 950 | cur_pages = populate_pmd(cpa, start, pre_end, cur_pages, | ||
| 951 | pud, pgprot); | ||
| 952 | if (cur_pages < 0) | ||
| 953 | return cur_pages; | ||
| 954 | |||
| 955 | start = pre_end; | ||
| 956 | } | ||
| 957 | |||
| 958 | /* We mapped them all? */ | ||
| 959 | if (cpa->numpages == cur_pages) | ||
| 960 | return cur_pages; | ||
| 961 | |||
| 962 | pud = pud_offset(pgd, start); | ||
| 963 | |||
| 964 | /* | ||
| 965 | * Map everything starting from the Gb boundary, possibly with 1G pages | ||
| 966 | */ | ||
| 967 | while (end - start >= PUD_SIZE) { | ||
| 968 | set_pud(pud, __pud(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot))); | ||
| 969 | |||
| 970 | start += PUD_SIZE; | ||
| 971 | cpa->pfn += PUD_SIZE; | ||
| 972 | cur_pages += PUD_SIZE >> PAGE_SHIFT; | ||
| 973 | pud++; | ||
| 974 | } | ||
| 975 | |||
| 976 | /* Map trailing leftover */ | ||
| 977 | if (start < end) { | ||
| 978 | int tmp; | ||
| 979 | |||
| 980 | pud = pud_offset(pgd, start); | ||
| 981 | if (pud_none(*pud)) | ||
| 982 | if (alloc_pmd_page(pud)) | ||
| 983 | return -1; | ||
| 984 | |||
| 985 | tmp = populate_pmd(cpa, start, end, cpa->numpages - cur_pages, | ||
| 986 | pud, pgprot); | ||
| 987 | if (tmp < 0) | ||
| 988 | return cur_pages; | ||
| 989 | |||
| 990 | cur_pages += tmp; | ||
| 991 | } | ||
| 992 | return cur_pages; | ||
| 993 | } | ||
| 994 | |||
| 995 | /* | ||
| 996 | * Restrictions for kernel page table do not necessarily apply when mapping in | ||
| 997 | * an alternate PGD. | ||
| 998 | */ | ||
| 999 | static int populate_pgd(struct cpa_data *cpa, unsigned long addr) | ||
| 1000 | { | ||
| 1001 | pgprot_t pgprot = __pgprot(_KERNPG_TABLE); | ||
| 1002 | bool allocd_pgd = false; | ||
| 1003 | pgd_t *pgd_entry; | ||
| 1004 | pud_t *pud = NULL; /* shut up gcc */ | ||
| 1005 | int ret; | ||
| 1006 | |||
| 1007 | pgd_entry = cpa->pgd + pgd_index(addr); | ||
| 1008 | |||
| 1009 | /* | ||
| 1010 | * Allocate a PUD page and hand it down for mapping. | ||
| 1011 | */ | ||
| 1012 | if (pgd_none(*pgd_entry)) { | ||
| 1013 | pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); | ||
| 1014 | if (!pud) | ||
| 1015 | return -1; | ||
| 1016 | |||
| 1017 | set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE)); | ||
| 1018 | allocd_pgd = true; | ||
| 1019 | } | ||
| 1020 | |||
| 1021 | pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr); | ||
| 1022 | pgprot_val(pgprot) |= pgprot_val(cpa->mask_set); | ||
| 1023 | |||
| 1024 | ret = populate_pud(cpa, addr, pgd_entry, pgprot); | ||
| 1025 | if (ret < 0) { | ||
| 1026 | unmap_pud_range(pgd_entry, addr, | ||
| 1027 | addr + (cpa->numpages << PAGE_SHIFT)); | ||
| 1028 | |||
| 1029 | if (allocd_pgd) { | ||
| 1030 | /* | ||
| 1031 | * If I allocated this PUD page, I can just as well | ||
| 1032 | * free it in this error path. | ||
| 1033 | */ | ||
| 1034 | pgd_clear(pgd_entry); | ||
| 1035 | free_page((unsigned long)pud); | ||
| 1036 | } | ||
| 1037 | return ret; | ||
| 1038 | } | ||
| 1039 | cpa->numpages = ret; | ||
| 1040 | return 0; | ||
| 1041 | } | ||
| 1042 | |||
| 653 | static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, | 1043 | static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, |
| 654 | int primary) | 1044 | int primary) |
| 655 | { | 1045 | { |
| 1046 | if (cpa->pgd) | ||
| 1047 | return populate_pgd(cpa, vaddr); | ||
| 1048 | |||
| 656 | /* | 1049 | /* |
| 657 | * Ignore all non primary paths. | 1050 | * Ignore all non primary paths. |
| 658 | */ | 1051 | */ |
| @@ -697,7 +1090,7 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) | |||
| 697 | else | 1090 | else |
| 698 | address = *cpa->vaddr; | 1091 | address = *cpa->vaddr; |
| 699 | repeat: | 1092 | repeat: |
| 700 | kpte = lookup_address(address, &level); | 1093 | kpte = _lookup_address_cpa(cpa, address, &level); |
| 701 | if (!kpte) | 1094 | if (!kpte) |
| 702 | return __cpa_process_fault(cpa, address, primary); | 1095 | return __cpa_process_fault(cpa, address, primary); |
| 703 | 1096 | ||
| @@ -761,7 +1154,7 @@ repeat: | |||
| 761 | /* | 1154 | /* |
| 762 | * We have to split the large page: | 1155 | * We have to split the large page: |
| 763 | */ | 1156 | */ |
| 764 | err = split_large_page(kpte, address); | 1157 | err = split_large_page(cpa, kpte, address); |
| 765 | if (!err) { | 1158 | if (!err) { |
| 766 | /* | 1159 | /* |
| 767 | * Do a global flush tlb after splitting the large page | 1160 | * Do a global flush tlb after splitting the large page |
| @@ -910,6 +1303,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
| 910 | int ret, cache, checkalias; | 1303 | int ret, cache, checkalias; |
| 911 | unsigned long baddr = 0; | 1304 | unsigned long baddr = 0; |
| 912 | 1305 | ||
| 1306 | memset(&cpa, 0, sizeof(cpa)); | ||
| 1307 | |||
| 913 | /* | 1308 | /* |
| 914 | * Check, if we are requested to change a not supported | 1309 | * Check, if we are requested to change a not supported |
| 915 | * feature: | 1310 | * feature: |
| @@ -1356,6 +1751,7 @@ static int __set_pages_p(struct page *page, int numpages) | |||
| 1356 | { | 1751 | { |
| 1357 | unsigned long tempaddr = (unsigned long) page_address(page); | 1752 | unsigned long tempaddr = (unsigned long) page_address(page); |
| 1358 | struct cpa_data cpa = { .vaddr = &tempaddr, | 1753 | struct cpa_data cpa = { .vaddr = &tempaddr, |
| 1754 | .pgd = NULL, | ||
| 1359 | .numpages = numpages, | 1755 | .numpages = numpages, |
| 1360 | .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), | 1756 | .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), |
| 1361 | .mask_clr = __pgprot(0), | 1757 | .mask_clr = __pgprot(0), |
| @@ -1374,6 +1770,7 @@ static int __set_pages_np(struct page *page, int numpages) | |||
| 1374 | { | 1770 | { |
| 1375 | unsigned long tempaddr = (unsigned long) page_address(page); | 1771 | unsigned long tempaddr = (unsigned long) page_address(page); |
| 1376 | struct cpa_data cpa = { .vaddr = &tempaddr, | 1772 | struct cpa_data cpa = { .vaddr = &tempaddr, |
| 1773 | .pgd = NULL, | ||
| 1377 | .numpages = numpages, | 1774 | .numpages = numpages, |
| 1378 | .mask_set = __pgprot(0), | 1775 | .mask_set = __pgprot(0), |
| 1379 | .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), | 1776 | .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), |
| @@ -1434,6 +1831,36 @@ bool kernel_page_present(struct page *page) | |||
| 1434 | 1831 | ||
| 1435 | #endif /* CONFIG_DEBUG_PAGEALLOC */ | 1832 | #endif /* CONFIG_DEBUG_PAGEALLOC */ |
| 1436 | 1833 | ||
| 1834 | int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, | ||
| 1835 | unsigned numpages, unsigned long page_flags) | ||
| 1836 | { | ||
| 1837 | int retval = -EINVAL; | ||
| 1838 | |||
| 1839 | struct cpa_data cpa = { | ||
| 1840 | .vaddr = &address, | ||
| 1841 | .pfn = pfn, | ||
| 1842 | .pgd = pgd, | ||
| 1843 | .numpages = numpages, | ||
| 1844 | .mask_set = __pgprot(0), | ||
| 1845 | .mask_clr = __pgprot(0), | ||
| 1846 | .flags = 0, | ||
| 1847 | }; | ||
| 1848 | |||
| 1849 | if (!(__supported_pte_mask & _PAGE_NX)) | ||
| 1850 | goto out; | ||
| 1851 | |||
| 1852 | if (!(page_flags & _PAGE_NX)) | ||
| 1853 | cpa.mask_clr = __pgprot(_PAGE_NX); | ||
| 1854 | |||
| 1855 | cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags); | ||
| 1856 | |||
| 1857 | retval = __change_page_attr_set_clr(&cpa, 0); | ||
| 1858 | __flush_tlb_all(); | ||
| 1859 | |||
| 1860 | out: | ||
| 1861 | return retval; | ||
| 1862 | } | ||
| 1863 | |||
| 1437 | /* | 1864 | /* |
| 1438 | * The testcases use internal knowledge of the implementation that shouldn't | 1865 | * The testcases use internal knowledge of the implementation that shouldn't |
| 1439 | * be exposed to the rest of the kernel. Include these directly here. | 1866 | * be exposed to the rest of the kernel. Include these directly here. |
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index cceb813044ef..d62ec87a2b26 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c | |||
| @@ -12,6 +12,8 @@ | |||
| 12 | * Bibo Mao <bibo.mao@intel.com> | 12 | * Bibo Mao <bibo.mao@intel.com> |
| 13 | * Chandramouli Narayanan <mouli@linux.intel.com> | 13 | * Chandramouli Narayanan <mouli@linux.intel.com> |
| 14 | * Huang Ying <ying.huang@intel.com> | 14 | * Huang Ying <ying.huang@intel.com> |
| 15 | * Copyright (C) 2013 SuSE Labs | ||
| 16 | * Borislav Petkov <bp@suse.de> - runtime services VA mapping | ||
| 15 | * | 17 | * |
| 16 | * Copied from efi_32.c to eliminate the duplicated code between EFI | 18 | * Copied from efi_32.c to eliminate the duplicated code between EFI |
| 17 | * 32/64 support code. --ying 2007-10-26 | 19 | * 32/64 support code. --ying 2007-10-26 |
| @@ -51,7 +53,7 @@ | |||
| 51 | #include <asm/x86_init.h> | 53 | #include <asm/x86_init.h> |
| 52 | #include <asm/rtc.h> | 54 | #include <asm/rtc.h> |
| 53 | 55 | ||
| 54 | #define EFI_DEBUG 1 | 56 | #define EFI_DEBUG |
| 55 | 57 | ||
| 56 | #define EFI_MIN_RESERVE 5120 | 58 | #define EFI_MIN_RESERVE 5120 |
| 57 | 59 | ||
| @@ -74,6 +76,8 @@ static __initdata efi_config_table_type_t arch_tables[] = { | |||
| 74 | {NULL_GUID, NULL, NULL}, | 76 | {NULL_GUID, NULL, NULL}, |
| 75 | }; | 77 | }; |
| 76 | 78 | ||
| 79 | u64 efi_setup; /* efi setup_data physical address */ | ||
| 80 | |||
| 77 | /* | 81 | /* |
| 78 | * Returns 1 if 'facility' is enabled, 0 otherwise. | 82 | * Returns 1 if 'facility' is enabled, 0 otherwise. |
| 79 | */ | 83 | */ |
| @@ -110,7 +114,6 @@ static int __init setup_storage_paranoia(char *arg) | |||
| 110 | } | 114 | } |
| 111 | early_param("efi_no_storage_paranoia", setup_storage_paranoia); | 115 | early_param("efi_no_storage_paranoia", setup_storage_paranoia); |
| 112 | 116 | ||
| 113 | |||
| 114 | static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) | 117 | static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) |
| 115 | { | 118 | { |
| 116 | unsigned long flags; | 119 | unsigned long flags; |
| @@ -398,9 +401,9 @@ int __init efi_memblock_x86_reserve_range(void) | |||
| 398 | return 0; | 401 | return 0; |
| 399 | } | 402 | } |
| 400 | 403 | ||
| 401 | #if EFI_DEBUG | ||
| 402 | static void __init print_efi_memmap(void) | 404 | static void __init print_efi_memmap(void) |
| 403 | { | 405 | { |
| 406 | #ifdef EFI_DEBUG | ||
| 404 | efi_memory_desc_t *md; | 407 | efi_memory_desc_t *md; |
| 405 | void *p; | 408 | void *p; |
| 406 | int i; | 409 | int i; |
| @@ -415,8 +418,8 @@ static void __init print_efi_memmap(void) | |||
| 415 | md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), | 418 | md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), |
| 416 | (md->num_pages >> (20 - EFI_PAGE_SHIFT))); | 419 | (md->num_pages >> (20 - EFI_PAGE_SHIFT))); |
| 417 | } | 420 | } |
| 418 | } | ||
| 419 | #endif /* EFI_DEBUG */ | 421 | #endif /* EFI_DEBUG */ |
| 422 | } | ||
| 420 | 423 | ||
| 421 | void __init efi_reserve_boot_services(void) | 424 | void __init efi_reserve_boot_services(void) |
| 422 | { | 425 | { |
| @@ -436,7 +439,7 @@ void __init efi_reserve_boot_services(void) | |||
| 436 | * - Not within any part of the kernel | 439 | * - Not within any part of the kernel |
| 437 | * - Not the bios reserved area | 440 | * - Not the bios reserved area |
| 438 | */ | 441 | */ |
| 439 | if ((start+size >= __pa_symbol(_text) | 442 | if ((start + size > __pa_symbol(_text) |
| 440 | && start <= __pa_symbol(_end)) || | 443 | && start <= __pa_symbol(_end)) || |
| 441 | !e820_all_mapped(start, start+size, E820_RAM) || | 444 | !e820_all_mapped(start, start+size, E820_RAM) || |
| 442 | memblock_is_region_reserved(start, size)) { | 445 | memblock_is_region_reserved(start, size)) { |
| @@ -489,18 +492,27 @@ static int __init efi_systab_init(void *phys) | |||
| 489 | { | 492 | { |
| 490 | if (efi_enabled(EFI_64BIT)) { | 493 | if (efi_enabled(EFI_64BIT)) { |
| 491 | efi_system_table_64_t *systab64; | 494 | efi_system_table_64_t *systab64; |
| 495 | struct efi_setup_data *data = NULL; | ||
| 492 | u64 tmp = 0; | 496 | u64 tmp = 0; |
| 493 | 497 | ||
| 498 | if (efi_setup) { | ||
| 499 | data = early_memremap(efi_setup, sizeof(*data)); | ||
| 500 | if (!data) | ||
| 501 | return -ENOMEM; | ||
| 502 | } | ||
| 494 | systab64 = early_ioremap((unsigned long)phys, | 503 | systab64 = early_ioremap((unsigned long)phys, |
| 495 | sizeof(*systab64)); | 504 | sizeof(*systab64)); |
| 496 | if (systab64 == NULL) { | 505 | if (systab64 == NULL) { |
| 497 | pr_err("Couldn't map the system table!\n"); | 506 | pr_err("Couldn't map the system table!\n"); |
| 507 | if (data) | ||
| 508 | early_iounmap(data, sizeof(*data)); | ||
| 498 | return -ENOMEM; | 509 | return -ENOMEM; |
| 499 | } | 510 | } |
| 500 | 511 | ||
| 501 | efi_systab.hdr = systab64->hdr; | 512 | efi_systab.hdr = systab64->hdr; |
| 502 | efi_systab.fw_vendor = systab64->fw_vendor; | 513 | efi_systab.fw_vendor = data ? (unsigned long)data->fw_vendor : |
| 503 | tmp |= systab64->fw_vendor; | 514 | systab64->fw_vendor; |
| 515 | tmp |= data ? data->fw_vendor : systab64->fw_vendor; | ||
| 504 | efi_systab.fw_revision = systab64->fw_revision; | 516 | efi_systab.fw_revision = systab64->fw_revision; |
| 505 | efi_systab.con_in_handle = systab64->con_in_handle; | 517 | efi_systab.con_in_handle = systab64->con_in_handle; |
| 506 | tmp |= systab64->con_in_handle; | 518 | tmp |= systab64->con_in_handle; |
| @@ -514,15 +526,20 @@ static int __init efi_systab_init(void *phys) | |||
| 514 | tmp |= systab64->stderr_handle; | 526 | tmp |= systab64->stderr_handle; |
| 515 | efi_systab.stderr = systab64->stderr; | 527 | efi_systab.stderr = systab64->stderr; |
| 516 | tmp |= systab64->stderr; | 528 | tmp |= systab64->stderr; |
| 517 | efi_systab.runtime = (void *)(unsigned long)systab64->runtime; | 529 | efi_systab.runtime = data ? |
| 518 | tmp |= systab64->runtime; | 530 | (void *)(unsigned long)data->runtime : |
| 531 | (void *)(unsigned long)systab64->runtime; | ||
| 532 | tmp |= data ? data->runtime : systab64->runtime; | ||
| 519 | efi_systab.boottime = (void *)(unsigned long)systab64->boottime; | 533 | efi_systab.boottime = (void *)(unsigned long)systab64->boottime; |
| 520 | tmp |= systab64->boottime; | 534 | tmp |= systab64->boottime; |
| 521 | efi_systab.nr_tables = systab64->nr_tables; | 535 | efi_systab.nr_tables = systab64->nr_tables; |
| 522 | efi_systab.tables = systab64->tables; | 536 | efi_systab.tables = data ? (unsigned long)data->tables : |
| 523 | tmp |= systab64->tables; | 537 | systab64->tables; |
| 538 | tmp |= data ? data->tables : systab64->tables; | ||
| 524 | 539 | ||
| 525 | early_iounmap(systab64, sizeof(*systab64)); | 540 | early_iounmap(systab64, sizeof(*systab64)); |
| 541 | if (data) | ||
| 542 | early_iounmap(data, sizeof(*data)); | ||
| 526 | #ifdef CONFIG_X86_32 | 543 | #ifdef CONFIG_X86_32 |
| 527 | if (tmp >> 32) { | 544 | if (tmp >> 32) { |
| 528 | pr_err("EFI data located above 4GB, disabling EFI.\n"); | 545 | pr_err("EFI data located above 4GB, disabling EFI.\n"); |
| @@ -626,6 +643,62 @@ static int __init efi_memmap_init(void) | |||
| 626 | return 0; | 643 | return 0; |
| 627 | } | 644 | } |
| 628 | 645 | ||
| 646 | /* | ||
| 647 | * A number of config table entries get remapped to virtual addresses | ||
| 648 | * after entering EFI virtual mode. However, the kexec kernel requires | ||
| 649 | * their physical addresses therefore we pass them via setup_data and | ||
| 650 | * correct those entries to their respective physical addresses here. | ||
| 651 | * | ||
| 652 | * Currently only handles smbios which is necessary for some firmware | ||
| 653 | * implementation. | ||
| 654 | */ | ||
| 655 | static int __init efi_reuse_config(u64 tables, int nr_tables) | ||
| 656 | { | ||
| 657 | int i, sz, ret = 0; | ||
| 658 | void *p, *tablep; | ||
| 659 | struct efi_setup_data *data; | ||
| 660 | |||
| 661 | if (!efi_setup) | ||
| 662 | return 0; | ||
| 663 | |||
| 664 | if (!efi_enabled(EFI_64BIT)) | ||
| 665 | return 0; | ||
| 666 | |||
| 667 | data = early_memremap(efi_setup, sizeof(*data)); | ||
| 668 | if (!data) { | ||
| 669 | ret = -ENOMEM; | ||
| 670 | goto out; | ||
| 671 | } | ||
| 672 | |||
| 673 | if (!data->smbios) | ||
| 674 | goto out_memremap; | ||
| 675 | |||
| 676 | sz = sizeof(efi_config_table_64_t); | ||
| 677 | |||
| 678 | p = tablep = early_memremap(tables, nr_tables * sz); | ||
| 679 | if (!p) { | ||
| 680 | pr_err("Could not map Configuration table!\n"); | ||
| 681 | ret = -ENOMEM; | ||
| 682 | goto out_memremap; | ||
| 683 | } | ||
| 684 | |||
| 685 | for (i = 0; i < efi.systab->nr_tables; i++) { | ||
| 686 | efi_guid_t guid; | ||
| 687 | |||
| 688 | guid = ((efi_config_table_64_t *)p)->guid; | ||
| 689 | |||
| 690 | if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) | ||
| 691 | ((efi_config_table_64_t *)p)->table = data->smbios; | ||
| 692 | p += sz; | ||
| 693 | } | ||
| 694 | early_iounmap(tablep, nr_tables * sz); | ||
| 695 | |||
| 696 | out_memremap: | ||
| 697 | early_iounmap(data, sizeof(*data)); | ||
| 698 | out: | ||
| 699 | return ret; | ||
| 700 | } | ||
| 701 | |||
| 629 | void __init efi_init(void) | 702 | void __init efi_init(void) |
| 630 | { | 703 | { |
| 631 | efi_char16_t *c16; | 704 | efi_char16_t *c16; |
| @@ -651,6 +724,10 @@ void __init efi_init(void) | |||
| 651 | 724 | ||
| 652 | set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); | 725 | set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); |
| 653 | 726 | ||
| 727 | efi.config_table = (unsigned long)efi.systab->tables; | ||
| 728 | efi.fw_vendor = (unsigned long)efi.systab->fw_vendor; | ||
| 729 | efi.runtime = (unsigned long)efi.systab->runtime; | ||
| 730 | |||
| 654 | /* | 731 | /* |
| 655 | * Show what we know for posterity | 732 | * Show what we know for posterity |
| 656 | */ | 733 | */ |
| @@ -667,6 +744,9 @@ void __init efi_init(void) | |||
| 667 | efi.systab->hdr.revision >> 16, | 744 | efi.systab->hdr.revision >> 16, |
| 668 | efi.systab->hdr.revision & 0xffff, vendor); | 745 | efi.systab->hdr.revision & 0xffff, vendor); |
| 669 | 746 | ||
| 747 | if (efi_reuse_config(efi.systab->tables, efi.systab->nr_tables)) | ||
| 748 | return; | ||
| 749 | |||
| 670 | if (efi_config_init(arch_tables)) | 750 | if (efi_config_init(arch_tables)) |
| 671 | return; | 751 | return; |
| 672 | 752 | ||
| @@ -684,15 +764,12 @@ void __init efi_init(void) | |||
| 684 | return; | 764 | return; |
| 685 | set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility); | 765 | set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility); |
| 686 | } | 766 | } |
| 687 | |||
| 688 | if (efi_memmap_init()) | 767 | if (efi_memmap_init()) |
| 689 | return; | 768 | return; |
| 690 | 769 | ||
| 691 | set_bit(EFI_MEMMAP, &x86_efi_facility); | 770 | set_bit(EFI_MEMMAP, &x86_efi_facility); |
| 692 | 771 | ||
| 693 | #if EFI_DEBUG | ||
| 694 | print_efi_memmap(); | 772 | print_efi_memmap(); |
| 695 | #endif | ||
| 696 | } | 773 | } |
| 697 | 774 | ||
| 698 | void __init efi_late_init(void) | 775 | void __init efi_late_init(void) |
| @@ -741,36 +818,38 @@ void efi_memory_uc(u64 addr, unsigned long size) | |||
| 741 | set_memory_uc(addr, npages); | 818 | set_memory_uc(addr, npages); |
| 742 | } | 819 | } |
| 743 | 820 | ||
| 744 | /* | 821 | void __init old_map_region(efi_memory_desc_t *md) |
| 745 | * This function will switch the EFI runtime services to virtual mode. | ||
| 746 | * Essentially, look through the EFI memmap and map every region that | ||
| 747 | * has the runtime attribute bit set in its memory descriptor and update | ||
| 748 | * that memory descriptor with the virtual address obtained from ioremap(). | ||
| 749 | * This enables the runtime services to be called without having to | ||
| 750 | * thunk back into physical mode for every invocation. | ||
| 751 | */ | ||
| 752 | void __init efi_enter_virtual_mode(void) | ||
| 753 | { | 822 | { |
| 754 | efi_memory_desc_t *md, *prev_md = NULL; | 823 | u64 start_pfn, end_pfn, end; |
| 755 | efi_status_t status; | ||
| 756 | unsigned long size; | 824 | unsigned long size; |
| 757 | u64 end, systab, start_pfn, end_pfn; | 825 | void *va; |
| 758 | void *p, *va, *new_memmap = NULL; | ||
| 759 | int count = 0; | ||
| 760 | 826 | ||
| 761 | efi.systab = NULL; | 827 | start_pfn = PFN_DOWN(md->phys_addr); |
| 828 | size = md->num_pages << PAGE_SHIFT; | ||
| 829 | end = md->phys_addr + size; | ||
| 830 | end_pfn = PFN_UP(end); | ||
| 762 | 831 | ||
| 763 | /* | 832 | if (pfn_range_is_mapped(start_pfn, end_pfn)) { |
| 764 | * We don't do virtual mode, since we don't do runtime services, on | 833 | va = __va(md->phys_addr); |
| 765 | * non-native EFI | ||
| 766 | */ | ||
| 767 | 834 | ||
| 768 | if (!efi_is_native()) { | 835 | if (!(md->attribute & EFI_MEMORY_WB)) |
| 769 | efi_unmap_memmap(); | 836 | efi_memory_uc((u64)(unsigned long)va, size); |
| 770 | return; | 837 | } else |
| 771 | } | 838 | va = efi_ioremap(md->phys_addr, size, |
| 839 | md->type, md->attribute); | ||
| 840 | |||
| 841 | md->virt_addr = (u64) (unsigned long) va; | ||
| 842 | if (!va) | ||
| 843 | pr_err("ioremap of 0x%llX failed!\n", | ||
| 844 | (unsigned long long)md->phys_addr); | ||
| 845 | } | ||
| 846 | |||
| 847 | /* Merge contiguous regions of the same type and attribute */ | ||
| 848 | static void __init efi_merge_regions(void) | ||
| 849 | { | ||
| 850 | void *p; | ||
| 851 | efi_memory_desc_t *md, *prev_md = NULL; | ||
| 772 | 852 | ||
| 773 | /* Merge contiguous regions of the same type and attribute */ | ||
| 774 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | 853 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { |
| 775 | u64 prev_size; | 854 | u64 prev_size; |
| 776 | md = p; | 855 | md = p; |
| @@ -796,6 +875,77 @@ void __init efi_enter_virtual_mode(void) | |||
| 796 | } | 875 | } |
| 797 | prev_md = md; | 876 | prev_md = md; |
| 798 | } | 877 | } |
| 878 | } | ||
| 879 | |||
| 880 | static void __init get_systab_virt_addr(efi_memory_desc_t *md) | ||
| 881 | { | ||
| 882 | unsigned long size; | ||
| 883 | u64 end, systab; | ||
| 884 | |||
| 885 | size = md->num_pages << EFI_PAGE_SHIFT; | ||
| 886 | end = md->phys_addr + size; | ||
| 887 | systab = (u64)(unsigned long)efi_phys.systab; | ||
| 888 | if (md->phys_addr <= systab && systab < end) { | ||
| 889 | systab += md->virt_addr - md->phys_addr; | ||
| 890 | efi.systab = (efi_system_table_t *)(unsigned long)systab; | ||
| 891 | } | ||
| 892 | } | ||
| 893 | |||
| 894 | static int __init save_runtime_map(void) | ||
| 895 | { | ||
| 896 | efi_memory_desc_t *md; | ||
| 897 | void *tmp, *p, *q = NULL; | ||
| 898 | int count = 0; | ||
| 899 | |||
| 900 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | ||
| 901 | md = p; | ||
| 902 | |||
| 903 | if (!(md->attribute & EFI_MEMORY_RUNTIME) || | ||
| 904 | (md->type == EFI_BOOT_SERVICES_CODE) || | ||
| 905 | (md->type == EFI_BOOT_SERVICES_DATA)) | ||
| 906 | continue; | ||
| 907 | tmp = krealloc(q, (count + 1) * memmap.desc_size, GFP_KERNEL); | ||
| 908 | if (!tmp) | ||
| 909 | goto out; | ||
| 910 | q = tmp; | ||
| 911 | |||
| 912 | memcpy(q + count * memmap.desc_size, md, memmap.desc_size); | ||
| 913 | count++; | ||
| 914 | } | ||
| 915 | |||
| 916 | efi_runtime_map_setup(q, count, memmap.desc_size); | ||
| 917 | |||
| 918 | return 0; | ||
| 919 | out: | ||
| 920 | kfree(q); | ||
| 921 | return -ENOMEM; | ||
| 922 | } | ||
| 923 | |||
| 924 | /* | ||
| 925 | * Map efi regions which were passed via setup_data. The virt_addr is a fixed | ||
| 926 | * addr which was used in first kernel of a kexec boot. | ||
| 927 | */ | ||
| 928 | static void __init efi_map_regions_fixed(void) | ||
| 929 | { | ||
| 930 | void *p; | ||
| 931 | efi_memory_desc_t *md; | ||
| 932 | |||
| 933 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | ||
| 934 | md = p; | ||
| 935 | efi_map_region_fixed(md); /* FIXME: add error handling */ | ||
| 936 | get_systab_virt_addr(md); | ||
| 937 | } | ||
| 938 | |||
| 939 | } | ||
| 940 | |||
| 941 | /* | ||
| 942 | * Map efi memory ranges for runtime serivce and update new_memmap with virtual | ||
| 943 | * addresses. | ||
| 944 | */ | ||
| 945 | static void * __init efi_map_regions(int *count) | ||
| 946 | { | ||
| 947 | efi_memory_desc_t *md; | ||
| 948 | void *p, *tmp, *new_memmap = NULL; | ||
| 799 | 949 | ||
| 800 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | 950 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { |
| 801 | md = p; | 951 | md = p; |
| @@ -807,53 +957,95 @@ void __init efi_enter_virtual_mode(void) | |||
| 807 | continue; | 957 | continue; |
| 808 | } | 958 | } |
| 809 | 959 | ||
| 810 | size = md->num_pages << EFI_PAGE_SHIFT; | 960 | efi_map_region(md); |
| 811 | end = md->phys_addr + size; | 961 | get_systab_virt_addr(md); |
| 812 | 962 | ||
| 813 | start_pfn = PFN_DOWN(md->phys_addr); | 963 | tmp = krealloc(new_memmap, (*count + 1) * memmap.desc_size, |
| 814 | end_pfn = PFN_UP(end); | 964 | GFP_KERNEL); |
| 815 | if (pfn_range_is_mapped(start_pfn, end_pfn)) { | 965 | if (!tmp) |
| 816 | va = __va(md->phys_addr); | 966 | goto out; |
| 967 | new_memmap = tmp; | ||
| 968 | memcpy(new_memmap + (*count * memmap.desc_size), md, | ||
| 969 | memmap.desc_size); | ||
| 970 | (*count)++; | ||
| 971 | } | ||
| 817 | 972 | ||
| 818 | if (!(md->attribute & EFI_MEMORY_WB)) | 973 | return new_memmap; |
| 819 | efi_memory_uc((u64)(unsigned long)va, size); | 974 | out: |
| 820 | } else | 975 | kfree(new_memmap); |
| 821 | va = efi_ioremap(md->phys_addr, size, | 976 | return NULL; |
| 822 | md->type, md->attribute); | 977 | } |
| 978 | |||
| 979 | /* | ||
| 980 | * This function will switch the EFI runtime services to virtual mode. | ||
| 981 | * Essentially, we look through the EFI memmap and map every region that | ||
| 982 | * has the runtime attribute bit set in its memory descriptor into the | ||
| 983 | * ->trampoline_pgd page table using a top-down VA allocation scheme. | ||
| 984 | * | ||
| 985 | * The old method which used to update that memory descriptor with the | ||
| 986 | * virtual address obtained from ioremap() is still supported when the | ||
| 987 | * kernel is booted with efi=old_map on its command line. Same old | ||
| 988 | * method enabled the runtime services to be called without having to | ||
| 989 | * thunk back into physical mode for every invocation. | ||
| 990 | * | ||
| 991 | * The new method does a pagetable switch in a preemption-safe manner | ||
| 992 | * so that we're in a different address space when calling a runtime | ||
| 993 | * function. For function arguments passing we do copy the PGDs of the | ||
| 994 | * kernel page table into ->trampoline_pgd prior to each call. | ||
| 995 | * | ||
| 996 | * Specially for kexec boot, efi runtime maps in previous kernel should | ||
| 997 | * be passed in via setup_data. In that case runtime ranges will be mapped | ||
| 998 | * to the same virtual addresses as the first kernel. | ||
| 999 | */ | ||
| 1000 | void __init efi_enter_virtual_mode(void) | ||
| 1001 | { | ||
| 1002 | efi_status_t status; | ||
| 1003 | void *new_memmap = NULL; | ||
| 1004 | int err, count = 0; | ||
| 823 | 1005 | ||
| 824 | md->virt_addr = (u64) (unsigned long) va; | 1006 | efi.systab = NULL; |
| 825 | 1007 | ||
| 826 | if (!va) { | 1008 | /* |
| 827 | pr_err("ioremap of 0x%llX failed!\n", | 1009 | * We don't do virtual mode, since we don't do runtime services, on |
| 828 | (unsigned long long)md->phys_addr); | 1010 | * non-native EFI |
| 829 | continue; | 1011 | */ |
| 830 | } | 1012 | if (!efi_is_native()) { |
| 1013 | efi_unmap_memmap(); | ||
| 1014 | return; | ||
| 1015 | } | ||
| 831 | 1016 | ||
| 832 | systab = (u64) (unsigned long) efi_phys.systab; | 1017 | if (efi_setup) { |
| 833 | if (md->phys_addr <= systab && systab < end) { | 1018 | efi_map_regions_fixed(); |
| 834 | systab += md->virt_addr - md->phys_addr; | 1019 | } else { |
| 835 | efi.systab = (efi_system_table_t *) (unsigned long) systab; | 1020 | efi_merge_regions(); |
| 1021 | new_memmap = efi_map_regions(&count); | ||
| 1022 | if (!new_memmap) { | ||
| 1023 | pr_err("Error reallocating memory, EFI runtime non-functional!\n"); | ||
| 1024 | return; | ||
| 836 | } | 1025 | } |
| 837 | new_memmap = krealloc(new_memmap, | ||
| 838 | (count + 1) * memmap.desc_size, | ||
| 839 | GFP_KERNEL); | ||
| 840 | memcpy(new_memmap + (count * memmap.desc_size), md, | ||
| 841 | memmap.desc_size); | ||
| 842 | count++; | ||
| 843 | } | 1026 | } |
| 844 | 1027 | ||
| 1028 | err = save_runtime_map(); | ||
| 1029 | if (err) | ||
| 1030 | pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n"); | ||
| 1031 | |||
| 845 | BUG_ON(!efi.systab); | 1032 | BUG_ON(!efi.systab); |
| 846 | 1033 | ||
| 847 | status = phys_efi_set_virtual_address_map( | 1034 | efi_setup_page_tables(); |
| 848 | memmap.desc_size * count, | 1035 | efi_sync_low_kernel_mappings(); |
| 849 | memmap.desc_size, | ||
| 850 | memmap.desc_version, | ||
| 851 | (efi_memory_desc_t *)__pa(new_memmap)); | ||
| 852 | 1036 | ||
| 853 | if (status != EFI_SUCCESS) { | 1037 | if (!efi_setup) { |
| 854 | pr_alert("Unable to switch EFI into virtual mode " | 1038 | status = phys_efi_set_virtual_address_map( |
| 855 | "(status=%lx)!\n", status); | 1039 | memmap.desc_size * count, |
| 856 | panic("EFI call to SetVirtualAddressMap() failed!"); | 1040 | memmap.desc_size, |
| 1041 | memmap.desc_version, | ||
| 1042 | (efi_memory_desc_t *)__pa(new_memmap)); | ||
| 1043 | |||
| 1044 | if (status != EFI_SUCCESS) { | ||
| 1045 | pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n", | ||
| 1046 | status); | ||
| 1047 | panic("EFI call to SetVirtualAddressMap() failed!"); | ||
| 1048 | } | ||
| 857 | } | 1049 | } |
| 858 | 1050 | ||
| 859 | /* | 1051 | /* |
| @@ -876,7 +1068,8 @@ void __init efi_enter_virtual_mode(void) | |||
| 876 | efi.query_variable_info = virt_efi_query_variable_info; | 1068 | efi.query_variable_info = virt_efi_query_variable_info; |
| 877 | efi.update_capsule = virt_efi_update_capsule; | 1069 | efi.update_capsule = virt_efi_update_capsule; |
| 878 | efi.query_capsule_caps = virt_efi_query_capsule_caps; | 1070 | efi.query_capsule_caps = virt_efi_query_capsule_caps; |
| 879 | if (__supported_pte_mask & _PAGE_NX) | 1071 | |
| 1072 | if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX)) | ||
| 880 | runtime_code_page_mkexec(); | 1073 | runtime_code_page_mkexec(); |
| 881 | 1074 | ||
| 882 | kfree(new_memmap); | 1075 | kfree(new_memmap); |
| @@ -1006,3 +1199,15 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) | |||
| 1006 | return EFI_SUCCESS; | 1199 | return EFI_SUCCESS; |
| 1007 | } | 1200 | } |
| 1008 | EXPORT_SYMBOL_GPL(efi_query_variable_store); | 1201 | EXPORT_SYMBOL_GPL(efi_query_variable_store); |
| 1202 | |||
| 1203 | static int __init parse_efi_cmdline(char *str) | ||
| 1204 | { | ||
| 1205 | if (*str == '=') | ||
| 1206 | str++; | ||
| 1207 | |||
| 1208 | if (!strncmp(str, "old_map", 7)) | ||
| 1209 | set_bit(EFI_OLD_MEMMAP, &x86_efi_facility); | ||
| 1210 | |||
| 1211 | return 0; | ||
| 1212 | } | ||
| 1213 | early_param("efi", parse_efi_cmdline); | ||
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 40e446941dd7..249b183cf417 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c | |||
| @@ -37,9 +37,19 @@ | |||
| 37 | * claim EFI runtime service handler exclusively and to duplicate a memory in | 37 | * claim EFI runtime service handler exclusively and to duplicate a memory in |
| 38 | * low memory space say 0 - 3G. | 38 | * low memory space say 0 - 3G. |
| 39 | */ | 39 | */ |
| 40 | |||
| 41 | static unsigned long efi_rt_eflags; | 40 | static unsigned long efi_rt_eflags; |
| 42 | 41 | ||
| 42 | void efi_sync_low_kernel_mappings(void) {} | ||
| 43 | void efi_setup_page_tables(void) {} | ||
| 44 | |||
| 45 | void __init efi_map_region(efi_memory_desc_t *md) | ||
| 46 | { | ||
| 47 | old_map_region(md); | ||
| 48 | } | ||
| 49 | |||
| 50 | void __init efi_map_region_fixed(efi_memory_desc_t *md) {} | ||
| 51 | void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} | ||
| 52 | |||
| 43 | void efi_call_phys_prelog(void) | 53 | void efi_call_phys_prelog(void) |
| 44 | { | 54 | { |
| 45 | struct desc_ptr gdt_descr; | 55 | struct desc_ptr gdt_descr; |
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 39a0e7f1f0a3..6284f158a47d 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c | |||
| @@ -38,10 +38,28 @@ | |||
| 38 | #include <asm/efi.h> | 38 | #include <asm/efi.h> |
| 39 | #include <asm/cacheflush.h> | 39 | #include <asm/cacheflush.h> |
| 40 | #include <asm/fixmap.h> | 40 | #include <asm/fixmap.h> |
| 41 | #include <asm/realmode.h> | ||
| 41 | 42 | ||
| 42 | static pgd_t *save_pgd __initdata; | 43 | static pgd_t *save_pgd __initdata; |
| 43 | static unsigned long efi_flags __initdata; | 44 | static unsigned long efi_flags __initdata; |
| 44 | 45 | ||
| 46 | /* | ||
| 47 | * We allocate runtime services regions bottom-up, starting from -4G, i.e. | ||
| 48 | * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G. | ||
| 49 | */ | ||
| 50 | static u64 efi_va = -4 * (1UL << 30); | ||
| 51 | #define EFI_VA_END (-68 * (1UL << 30)) | ||
| 52 | |||
| 53 | /* | ||
| 54 | * Scratch space used for switching the pagetable in the EFI stub | ||
| 55 | */ | ||
| 56 | struct efi_scratch { | ||
| 57 | u64 r15; | ||
| 58 | u64 prev_cr3; | ||
| 59 | pgd_t *efi_pgt; | ||
| 60 | bool use_pgd; | ||
| 61 | }; | ||
| 62 | |||
| 45 | static void __init early_code_mapping_set_exec(int executable) | 63 | static void __init early_code_mapping_set_exec(int executable) |
| 46 | { | 64 | { |
| 47 | efi_memory_desc_t *md; | 65 | efi_memory_desc_t *md; |
| @@ -65,6 +83,9 @@ void __init efi_call_phys_prelog(void) | |||
| 65 | int pgd; | 83 | int pgd; |
| 66 | int n_pgds; | 84 | int n_pgds; |
| 67 | 85 | ||
| 86 | if (!efi_enabled(EFI_OLD_MEMMAP)) | ||
| 87 | return; | ||
| 88 | |||
| 68 | early_code_mapping_set_exec(1); | 89 | early_code_mapping_set_exec(1); |
| 69 | local_irq_save(efi_flags); | 90 | local_irq_save(efi_flags); |
| 70 | 91 | ||
| @@ -86,6 +107,10 @@ void __init efi_call_phys_epilog(void) | |||
| 86 | */ | 107 | */ |
| 87 | int pgd; | 108 | int pgd; |
| 88 | int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); | 109 | int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); |
| 110 | |||
| 111 | if (!efi_enabled(EFI_OLD_MEMMAP)) | ||
| 112 | return; | ||
| 113 | |||
| 89 | for (pgd = 0; pgd < n_pgds; pgd++) | 114 | for (pgd = 0; pgd < n_pgds; pgd++) |
| 90 | set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]); | 115 | set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]); |
| 91 | kfree(save_pgd); | 116 | kfree(save_pgd); |
| @@ -94,6 +119,96 @@ void __init efi_call_phys_epilog(void) | |||
| 94 | early_code_mapping_set_exec(0); | 119 | early_code_mapping_set_exec(0); |
| 95 | } | 120 | } |
| 96 | 121 | ||
| 122 | /* | ||
| 123 | * Add low kernel mappings for passing arguments to EFI functions. | ||
| 124 | */ | ||
| 125 | void efi_sync_low_kernel_mappings(void) | ||
| 126 | { | ||
| 127 | unsigned num_pgds; | ||
| 128 | pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); | ||
| 129 | |||
| 130 | if (efi_enabled(EFI_OLD_MEMMAP)) | ||
| 131 | return; | ||
| 132 | |||
| 133 | num_pgds = pgd_index(MODULES_END - 1) - pgd_index(PAGE_OFFSET); | ||
| 134 | |||
| 135 | memcpy(pgd + pgd_index(PAGE_OFFSET), | ||
| 136 | init_mm.pgd + pgd_index(PAGE_OFFSET), | ||
| 137 | sizeof(pgd_t) * num_pgds); | ||
| 138 | } | ||
| 139 | |||
| 140 | void efi_setup_page_tables(void) | ||
| 141 | { | ||
| 142 | efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd; | ||
| 143 | |||
| 144 | if (!efi_enabled(EFI_OLD_MEMMAP)) | ||
| 145 | efi_scratch.use_pgd = true; | ||
| 146 | } | ||
| 147 | |||
| 148 | static void __init __map_region(efi_memory_desc_t *md, u64 va) | ||
| 149 | { | ||
| 150 | pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); | ||
| 151 | unsigned long pf = 0; | ||
| 152 | |||
| 153 | if (!(md->attribute & EFI_MEMORY_WB)) | ||
| 154 | pf |= _PAGE_PCD; | ||
| 155 | |||
| 156 | if (kernel_map_pages_in_pgd(pgd, md->phys_addr, va, md->num_pages, pf)) | ||
| 157 | pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", | ||
| 158 | md->phys_addr, va); | ||
| 159 | } | ||
| 160 | |||
| 161 | void __init efi_map_region(efi_memory_desc_t *md) | ||
| 162 | { | ||
| 163 | unsigned long size = md->num_pages << PAGE_SHIFT; | ||
| 164 | u64 pa = md->phys_addr; | ||
| 165 | |||
| 166 | if (efi_enabled(EFI_OLD_MEMMAP)) | ||
| 167 | return old_map_region(md); | ||
| 168 | |||
| 169 | /* | ||
| 170 | * Make sure the 1:1 mappings are present as a catch-all for b0rked | ||
| 171 | * firmware which doesn't update all internal pointers after switching | ||
| 172 | * to virtual mode and would otherwise crap on us. | ||
| 173 | */ | ||
| 174 | __map_region(md, md->phys_addr); | ||
| 175 | |||
| 176 | efi_va -= size; | ||
| 177 | |||
| 178 | /* Is PA 2M-aligned? */ | ||
| 179 | if (!(pa & (PMD_SIZE - 1))) { | ||
| 180 | efi_va &= PMD_MASK; | ||
| 181 | } else { | ||
| 182 | u64 pa_offset = pa & (PMD_SIZE - 1); | ||
| 183 | u64 prev_va = efi_va; | ||
| 184 | |||
| 185 | /* get us the same offset within this 2M page */ | ||
| 186 | efi_va = (efi_va & PMD_MASK) + pa_offset; | ||
| 187 | |||
| 188 | if (efi_va > prev_va) | ||
| 189 | efi_va -= PMD_SIZE; | ||
| 190 | } | ||
| 191 | |||
| 192 | if (efi_va < EFI_VA_END) { | ||
| 193 | pr_warn(FW_WARN "VA address range overflow!\n"); | ||
| 194 | return; | ||
| 195 | } | ||
| 196 | |||
| 197 | /* Do the VA map */ | ||
| 198 | __map_region(md, efi_va); | ||
| 199 | md->virt_addr = efi_va; | ||
| 200 | } | ||
| 201 | |||
| 202 | /* | ||
| 203 | * kexec kernel will use efi_map_region_fixed to map efi runtime memory ranges. | ||
| 204 | * md->virt_addr is the original virtual address which had been mapped in kexec | ||
| 205 | * 1st kernel. | ||
| 206 | */ | ||
| 207 | void __init efi_map_region_fixed(efi_memory_desc_t *md) | ||
| 208 | { | ||
| 209 | __map_region(md, md->virt_addr); | ||
| 210 | } | ||
| 211 | |||
| 97 | void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, | 212 | void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, |
| 98 | u32 type, u64 attribute) | 213 | u32 type, u64 attribute) |
| 99 | { | 214 | { |
| @@ -113,3 +228,8 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, | |||
| 113 | 228 | ||
| 114 | return (void __iomem *)__va(phys_addr); | 229 | return (void __iomem *)__va(phys_addr); |
| 115 | } | 230 | } |
| 231 | |||
| 232 | void __init parse_efi_setup(u64 phys_addr, u32 data_len) | ||
| 233 | { | ||
| 234 | efi_setup = phys_addr + sizeof(struct setup_data); | ||
| 235 | } | ||
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 4c07ccab8146..88073b140298 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S | |||
| @@ -34,10 +34,47 @@ | |||
| 34 | mov %rsi, %cr0; \ | 34 | mov %rsi, %cr0; \ |
| 35 | mov (%rsp), %rsp | 35 | mov (%rsp), %rsp |
| 36 | 36 | ||
| 37 | /* stolen from gcc */ | ||
| 38 | .macro FLUSH_TLB_ALL | ||
| 39 | movq %r15, efi_scratch(%rip) | ||
| 40 | movq %r14, efi_scratch+8(%rip) | ||
| 41 | movq %cr4, %r15 | ||
| 42 | movq %r15, %r14 | ||
| 43 | andb $0x7f, %r14b | ||
| 44 | movq %r14, %cr4 | ||
| 45 | movq %r15, %cr4 | ||
| 46 | movq efi_scratch+8(%rip), %r14 | ||
| 47 | movq efi_scratch(%rip), %r15 | ||
| 48 | .endm | ||
| 49 | |||
| 50 | .macro SWITCH_PGT | ||
| 51 | cmpb $0, efi_scratch+24(%rip) | ||
| 52 | je 1f | ||
| 53 | movq %r15, efi_scratch(%rip) # r15 | ||
| 54 | # save previous CR3 | ||
| 55 | movq %cr3, %r15 | ||
| 56 | movq %r15, efi_scratch+8(%rip) # prev_cr3 | ||
| 57 | movq efi_scratch+16(%rip), %r15 # EFI pgt | ||
| 58 | movq %r15, %cr3 | ||
| 59 | 1: | ||
| 60 | .endm | ||
| 61 | |||
| 62 | .macro RESTORE_PGT | ||
| 63 | cmpb $0, efi_scratch+24(%rip) | ||
| 64 | je 2f | ||
| 65 | movq efi_scratch+8(%rip), %r15 | ||
| 66 | movq %r15, %cr3 | ||
| 67 | movq efi_scratch(%rip), %r15 | ||
| 68 | FLUSH_TLB_ALL | ||
| 69 | 2: | ||
| 70 | .endm | ||
| 71 | |||
| 37 | ENTRY(efi_call0) | 72 | ENTRY(efi_call0) |
| 38 | SAVE_XMM | 73 | SAVE_XMM |
| 39 | subq $32, %rsp | 74 | subq $32, %rsp |
| 75 | SWITCH_PGT | ||
| 40 | call *%rdi | 76 | call *%rdi |
| 77 | RESTORE_PGT | ||
| 41 | addq $32, %rsp | 78 | addq $32, %rsp |
| 42 | RESTORE_XMM | 79 | RESTORE_XMM |
| 43 | ret | 80 | ret |
| @@ -47,7 +84,9 @@ ENTRY(efi_call1) | |||
| 47 | SAVE_XMM | 84 | SAVE_XMM |
| 48 | subq $32, %rsp | 85 | subq $32, %rsp |
| 49 | mov %rsi, %rcx | 86 | mov %rsi, %rcx |
| 87 | SWITCH_PGT | ||
| 50 | call *%rdi | 88 | call *%rdi |
| 89 | RESTORE_PGT | ||
| 51 | addq $32, %rsp | 90 | addq $32, %rsp |
| 52 | RESTORE_XMM | 91 | RESTORE_XMM |
| 53 | ret | 92 | ret |
| @@ -57,7 +96,9 @@ ENTRY(efi_call2) | |||
| 57 | SAVE_XMM | 96 | SAVE_XMM |
| 58 | subq $32, %rsp | 97 | subq $32, %rsp |
| 59 | mov %rsi, %rcx | 98 | mov %rsi, %rcx |
| 99 | SWITCH_PGT | ||
| 60 | call *%rdi | 100 | call *%rdi |
| 101 | RESTORE_PGT | ||
| 61 | addq $32, %rsp | 102 | addq $32, %rsp |
| 62 | RESTORE_XMM | 103 | RESTORE_XMM |
| 63 | ret | 104 | ret |
| @@ -68,7 +109,9 @@ ENTRY(efi_call3) | |||
| 68 | subq $32, %rsp | 109 | subq $32, %rsp |
| 69 | mov %rcx, %r8 | 110 | mov %rcx, %r8 |
| 70 | mov %rsi, %rcx | 111 | mov %rsi, %rcx |
| 112 | SWITCH_PGT | ||
| 71 | call *%rdi | 113 | call *%rdi |
| 114 | RESTORE_PGT | ||
| 72 | addq $32, %rsp | 115 | addq $32, %rsp |
| 73 | RESTORE_XMM | 116 | RESTORE_XMM |
| 74 | ret | 117 | ret |
| @@ -80,7 +123,9 @@ ENTRY(efi_call4) | |||
| 80 | mov %r8, %r9 | 123 | mov %r8, %r9 |
| 81 | mov %rcx, %r8 | 124 | mov %rcx, %r8 |
| 82 | mov %rsi, %rcx | 125 | mov %rsi, %rcx |
| 126 | SWITCH_PGT | ||
| 83 | call *%rdi | 127 | call *%rdi |
| 128 | RESTORE_PGT | ||
| 84 | addq $32, %rsp | 129 | addq $32, %rsp |
| 85 | RESTORE_XMM | 130 | RESTORE_XMM |
| 86 | ret | 131 | ret |
| @@ -93,7 +138,9 @@ ENTRY(efi_call5) | |||
| 93 | mov %r8, %r9 | 138 | mov %r8, %r9 |
| 94 | mov %rcx, %r8 | 139 | mov %rcx, %r8 |
| 95 | mov %rsi, %rcx | 140 | mov %rsi, %rcx |
| 141 | SWITCH_PGT | ||
| 96 | call *%rdi | 142 | call *%rdi |
| 143 | RESTORE_PGT | ||
| 97 | addq $48, %rsp | 144 | addq $48, %rsp |
| 98 | RESTORE_XMM | 145 | RESTORE_XMM |
| 99 | ret | 146 | ret |
| @@ -109,8 +156,15 @@ ENTRY(efi_call6) | |||
| 109 | mov %r8, %r9 | 156 | mov %r8, %r9 |
| 110 | mov %rcx, %r8 | 157 | mov %rcx, %r8 |
| 111 | mov %rsi, %rcx | 158 | mov %rsi, %rcx |
| 159 | SWITCH_PGT | ||
| 112 | call *%rdi | 160 | call *%rdi |
| 161 | RESTORE_PGT | ||
| 113 | addq $48, %rsp | 162 | addq $48, %rsp |
| 114 | RESTORE_XMM | 163 | RESTORE_XMM |
| 115 | ret | 164 | ret |
| 116 | ENDPROC(efi_call6) | 165 | ENDPROC(efi_call6) |
| 166 | |||
| 167 | .data | ||
| 168 | ENTRY(efi_scratch) | ||
| 169 | .fill 3,8,0 | ||
| 170 | .byte 0 | ||
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index 6aecbc86ec94..1e75f48b61f8 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig | |||
| @@ -36,6 +36,17 @@ config EFI_VARS_PSTORE_DEFAULT_DISABLE | |||
| 36 | backend for pstore by default. This setting can be overridden | 36 | backend for pstore by default. This setting can be overridden |
| 37 | using the efivars module's pstore_disable parameter. | 37 | using the efivars module's pstore_disable parameter. |
| 38 | 38 | ||
| 39 | config EFI_RUNTIME_MAP | ||
| 40 | bool "Export efi runtime maps to sysfs" | ||
| 41 | depends on X86 && EFI && KEXEC | ||
| 42 | default y | ||
| 43 | help | ||
| 44 | Export efi runtime memory maps to /sys/firmware/efi/runtime-map. | ||
| 45 | That memory map is used for example by kexec to set up efi virtual | ||
| 46 | mapping the 2nd kernel, but can also be used for debugging purposes. | ||
| 47 | |||
| 48 | See also Documentation/ABI/testing/sysfs-firmware-efi-runtime-map. | ||
| 49 | |||
| 39 | endmenu | 50 | endmenu |
| 40 | 51 | ||
| 41 | config UEFI_CPER | 52 | config UEFI_CPER |
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile index 6c2a41ec21ba..9553496b0f43 100644 --- a/drivers/firmware/efi/Makefile +++ b/drivers/firmware/efi/Makefile | |||
| @@ -5,3 +5,4 @@ obj-$(CONFIG_EFI) += efi.o vars.o | |||
| 5 | obj-$(CONFIG_EFI_VARS) += efivars.o | 5 | obj-$(CONFIG_EFI_VARS) += efivars.o |
| 6 | obj-$(CONFIG_EFI_VARS_PSTORE) += efi-pstore.o | 6 | obj-$(CONFIG_EFI_VARS_PSTORE) += efi-pstore.o |
| 7 | obj-$(CONFIG_UEFI_CPER) += cper.o | 7 | obj-$(CONFIG_UEFI_CPER) += cper.o |
| 8 | obj-$(CONFIG_EFI_RUNTIME_MAP) += runtime-map.o | ||
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 2e2fbdec0845..4753bac65279 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c | |||
| @@ -32,6 +32,9 @@ struct efi __read_mostly efi = { | |||
| 32 | .hcdp = EFI_INVALID_TABLE_ADDR, | 32 | .hcdp = EFI_INVALID_TABLE_ADDR, |
| 33 | .uga = EFI_INVALID_TABLE_ADDR, | 33 | .uga = EFI_INVALID_TABLE_ADDR, |
| 34 | .uv_systab = EFI_INVALID_TABLE_ADDR, | 34 | .uv_systab = EFI_INVALID_TABLE_ADDR, |
| 35 | .fw_vendor = EFI_INVALID_TABLE_ADDR, | ||
| 36 | .runtime = EFI_INVALID_TABLE_ADDR, | ||
| 37 | .config_table = EFI_INVALID_TABLE_ADDR, | ||
| 35 | }; | 38 | }; |
| 36 | EXPORT_SYMBOL(efi); | 39 | EXPORT_SYMBOL(efi); |
| 37 | 40 | ||
| @@ -71,13 +74,49 @@ static ssize_t systab_show(struct kobject *kobj, | |||
| 71 | static struct kobj_attribute efi_attr_systab = | 74 | static struct kobj_attribute efi_attr_systab = |
| 72 | __ATTR(systab, 0400, systab_show, NULL); | 75 | __ATTR(systab, 0400, systab_show, NULL); |
| 73 | 76 | ||
| 77 | #define EFI_FIELD(var) efi.var | ||
| 78 | |||
| 79 | #define EFI_ATTR_SHOW(name) \ | ||
| 80 | static ssize_t name##_show(struct kobject *kobj, \ | ||
| 81 | struct kobj_attribute *attr, char *buf) \ | ||
| 82 | { \ | ||
| 83 | return sprintf(buf, "0x%lx\n", EFI_FIELD(name)); \ | ||
| 84 | } | ||
| 85 | |||
| 86 | EFI_ATTR_SHOW(fw_vendor); | ||
| 87 | EFI_ATTR_SHOW(runtime); | ||
| 88 | EFI_ATTR_SHOW(config_table); | ||
| 89 | |||
| 90 | static struct kobj_attribute efi_attr_fw_vendor = __ATTR_RO(fw_vendor); | ||
| 91 | static struct kobj_attribute efi_attr_runtime = __ATTR_RO(runtime); | ||
| 92 | static struct kobj_attribute efi_attr_config_table = __ATTR_RO(config_table); | ||
| 93 | |||
| 74 | static struct attribute *efi_subsys_attrs[] = { | 94 | static struct attribute *efi_subsys_attrs[] = { |
| 75 | &efi_attr_systab.attr, | 95 | &efi_attr_systab.attr, |
| 76 | NULL, /* maybe more in the future? */ | 96 | &efi_attr_fw_vendor.attr, |
| 97 | &efi_attr_runtime.attr, | ||
| 98 | &efi_attr_config_table.attr, | ||
| 99 | NULL, | ||
| 77 | }; | 100 | }; |
| 78 | 101 | ||
| 102 | static umode_t efi_attr_is_visible(struct kobject *kobj, | ||
| 103 | struct attribute *attr, int n) | ||
| 104 | { | ||
| 105 | umode_t mode = attr->mode; | ||
| 106 | |||
| 107 | if (attr == &efi_attr_fw_vendor.attr) | ||
| 108 | return (efi.fw_vendor == EFI_INVALID_TABLE_ADDR) ? 0 : mode; | ||
| 109 | else if (attr == &efi_attr_runtime.attr) | ||
| 110 | return (efi.runtime == EFI_INVALID_TABLE_ADDR) ? 0 : mode; | ||
| 111 | else if (attr == &efi_attr_config_table.attr) | ||
| 112 | return (efi.config_table == EFI_INVALID_TABLE_ADDR) ? 0 : mode; | ||
| 113 | |||
| 114 | return mode; | ||
| 115 | } | ||
| 116 | |||
| 79 | static struct attribute_group efi_subsys_attr_group = { | 117 | static struct attribute_group efi_subsys_attr_group = { |
| 80 | .attrs = efi_subsys_attrs, | 118 | .attrs = efi_subsys_attrs, |
| 119 | .is_visible = efi_attr_is_visible, | ||
| 81 | }; | 120 | }; |
| 82 | 121 | ||
| 83 | static struct efivars generic_efivars; | 122 | static struct efivars generic_efivars; |
| @@ -128,6 +167,10 @@ static int __init efisubsys_init(void) | |||
| 128 | goto err_unregister; | 167 | goto err_unregister; |
| 129 | } | 168 | } |
| 130 | 169 | ||
| 170 | error = efi_runtime_map_init(efi_kobj); | ||
| 171 | if (error) | ||
| 172 | goto err_remove_group; | ||
| 173 | |||
| 131 | /* and the standard mountpoint for efivarfs */ | 174 | /* and the standard mountpoint for efivarfs */ |
| 132 | efivars_kobj = kobject_create_and_add("efivars", efi_kobj); | 175 | efivars_kobj = kobject_create_and_add("efivars", efi_kobj); |
| 133 | if (!efivars_kobj) { | 176 | if (!efivars_kobj) { |
diff --git a/drivers/firmware/efi/runtime-map.c b/drivers/firmware/efi/runtime-map.c new file mode 100644 index 000000000000..97cdd16a2169 --- /dev/null +++ b/drivers/firmware/efi/runtime-map.c | |||
| @@ -0,0 +1,181 @@ | |||
| 1 | /* | ||
| 2 | * linux/drivers/efi/runtime-map.c | ||
| 3 | * Copyright (C) 2013 Red Hat, Inc., Dave Young <dyoung@redhat.com> | ||
| 4 | * | ||
| 5 | * This file is released under the GPLv2. | ||
| 6 | */ | ||
| 7 | |||
| 8 | #include <linux/string.h> | ||
| 9 | #include <linux/kernel.h> | ||
| 10 | #include <linux/module.h> | ||
| 11 | #include <linux/types.h> | ||
| 12 | #include <linux/efi.h> | ||
| 13 | #include <linux/slab.h> | ||
| 14 | |||
| 15 | #include <asm/setup.h> | ||
| 16 | |||
| 17 | static void *efi_runtime_map; | ||
| 18 | static int nr_efi_runtime_map; | ||
| 19 | static u32 efi_memdesc_size; | ||
| 20 | |||
| 21 | struct efi_runtime_map_entry { | ||
| 22 | efi_memory_desc_t md; | ||
| 23 | struct kobject kobj; /* kobject for each entry */ | ||
| 24 | }; | ||
| 25 | |||
| 26 | static struct efi_runtime_map_entry **map_entries; | ||
| 27 | |||
| 28 | struct map_attribute { | ||
| 29 | struct attribute attr; | ||
| 30 | ssize_t (*show)(struct efi_runtime_map_entry *entry, char *buf); | ||
| 31 | }; | ||
| 32 | |||
| 33 | static inline struct map_attribute *to_map_attr(struct attribute *attr) | ||
| 34 | { | ||
| 35 | return container_of(attr, struct map_attribute, attr); | ||
| 36 | } | ||
| 37 | |||
| 38 | static ssize_t type_show(struct efi_runtime_map_entry *entry, char *buf) | ||
| 39 | { | ||
| 40 | return snprintf(buf, PAGE_SIZE, "0x%x\n", entry->md.type); | ||
| 41 | } | ||
| 42 | |||
| 43 | #define EFI_RUNTIME_FIELD(var) entry->md.var | ||
| 44 | |||
| 45 | #define EFI_RUNTIME_U64_ATTR_SHOW(name) \ | ||
| 46 | static ssize_t name##_show(struct efi_runtime_map_entry *entry, char *buf) \ | ||
| 47 | { \ | ||
| 48 | return snprintf(buf, PAGE_SIZE, "0x%llx\n", EFI_RUNTIME_FIELD(name)); \ | ||
| 49 | } | ||
| 50 | |||
| 51 | EFI_RUNTIME_U64_ATTR_SHOW(phys_addr); | ||
| 52 | EFI_RUNTIME_U64_ATTR_SHOW(virt_addr); | ||
| 53 | EFI_RUNTIME_U64_ATTR_SHOW(num_pages); | ||
| 54 | EFI_RUNTIME_U64_ATTR_SHOW(attribute); | ||
| 55 | |||
| 56 | static inline struct efi_runtime_map_entry *to_map_entry(struct kobject *kobj) | ||
| 57 | { | ||
| 58 | return container_of(kobj, struct efi_runtime_map_entry, kobj); | ||
| 59 | } | ||
| 60 | |||
| 61 | static ssize_t map_attr_show(struct kobject *kobj, struct attribute *attr, | ||
| 62 | char *buf) | ||
| 63 | { | ||
| 64 | struct efi_runtime_map_entry *entry = to_map_entry(kobj); | ||
| 65 | struct map_attribute *map_attr = to_map_attr(attr); | ||
| 66 | |||
| 67 | return map_attr->show(entry, buf); | ||
| 68 | } | ||
| 69 | |||
| 70 | static struct map_attribute map_type_attr = __ATTR_RO(type); | ||
| 71 | static struct map_attribute map_phys_addr_attr = __ATTR_RO(phys_addr); | ||
| 72 | static struct map_attribute map_virt_addr_attr = __ATTR_RO(virt_addr); | ||
| 73 | static struct map_attribute map_num_pages_attr = __ATTR_RO(num_pages); | ||
| 74 | static struct map_attribute map_attribute_attr = __ATTR_RO(attribute); | ||
| 75 | |||
| 76 | /* | ||
| 77 | * These are default attributes that are added for every memmap entry. | ||
| 78 | */ | ||
| 79 | static struct attribute *def_attrs[] = { | ||
| 80 | &map_type_attr.attr, | ||
| 81 | &map_phys_addr_attr.attr, | ||
| 82 | &map_virt_addr_attr.attr, | ||
| 83 | &map_num_pages_attr.attr, | ||
| 84 | &map_attribute_attr.attr, | ||
| 85 | NULL | ||
| 86 | }; | ||
| 87 | |||
| 88 | static const struct sysfs_ops map_attr_ops = { | ||
| 89 | .show = map_attr_show, | ||
| 90 | }; | ||
| 91 | |||
| 92 | static void map_release(struct kobject *kobj) | ||
| 93 | { | ||
| 94 | struct efi_runtime_map_entry *entry; | ||
| 95 | |||
| 96 | entry = to_map_entry(kobj); | ||
| 97 | kfree(entry); | ||
| 98 | } | ||
| 99 | |||
| 100 | static struct kobj_type __refdata map_ktype = { | ||
| 101 | .sysfs_ops = &map_attr_ops, | ||
| 102 | .default_attrs = def_attrs, | ||
| 103 | .release = map_release, | ||
| 104 | }; | ||
| 105 | |||
| 106 | static struct kset *map_kset; | ||
| 107 | |||
| 108 | static struct efi_runtime_map_entry * | ||
| 109 | add_sysfs_runtime_map_entry(struct kobject *kobj, int nr) | ||
| 110 | { | ||
| 111 | int ret; | ||
| 112 | struct efi_runtime_map_entry *entry; | ||
| 113 | |||
| 114 | if (!map_kset) { | ||
| 115 | map_kset = kset_create_and_add("runtime-map", NULL, kobj); | ||
| 116 | if (!map_kset) | ||
| 117 | return ERR_PTR(-ENOMEM); | ||
| 118 | } | ||
| 119 | |||
| 120 | entry = kzalloc(sizeof(*entry), GFP_KERNEL); | ||
| 121 | if (!entry) { | ||
| 122 | kset_unregister(map_kset); | ||
| 123 | return entry; | ||
| 124 | } | ||
| 125 | |||
| 126 | memcpy(&entry->md, efi_runtime_map + nr * efi_memdesc_size, | ||
| 127 | sizeof(efi_memory_desc_t)); | ||
| 128 | |||
| 129 | kobject_init(&entry->kobj, &map_ktype); | ||
| 130 | entry->kobj.kset = map_kset; | ||
| 131 | ret = kobject_add(&entry->kobj, NULL, "%d", nr); | ||
| 132 | if (ret) { | ||
| 133 | kobject_put(&entry->kobj); | ||
| 134 | kset_unregister(map_kset); | ||
| 135 | return ERR_PTR(ret); | ||
| 136 | } | ||
| 137 | |||
| 138 | return entry; | ||
| 139 | } | ||
| 140 | |||
| 141 | void efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size) | ||
| 142 | { | ||
| 143 | efi_runtime_map = map; | ||
| 144 | nr_efi_runtime_map = nr_entries; | ||
| 145 | efi_memdesc_size = desc_size; | ||
| 146 | } | ||
| 147 | |||
| 148 | int __init efi_runtime_map_init(struct kobject *efi_kobj) | ||
| 149 | { | ||
| 150 | int i, j, ret = 0; | ||
| 151 | struct efi_runtime_map_entry *entry; | ||
| 152 | |||
| 153 | if (!efi_runtime_map) | ||
| 154 | return 0; | ||
| 155 | |||
| 156 | map_entries = kzalloc(nr_efi_runtime_map * sizeof(entry), GFP_KERNEL); | ||
| 157 | if (!map_entries) { | ||
| 158 | ret = -ENOMEM; | ||
| 159 | goto out; | ||
| 160 | } | ||
| 161 | |||
| 162 | for (i = 0; i < nr_efi_runtime_map; i++) { | ||
| 163 | entry = add_sysfs_runtime_map_entry(efi_kobj, i); | ||
| 164 | if (IS_ERR(entry)) { | ||
| 165 | ret = PTR_ERR(entry); | ||
| 166 | goto out_add_entry; | ||
| 167 | } | ||
| 168 | *(map_entries + i) = entry; | ||
| 169 | } | ||
| 170 | |||
| 171 | return 0; | ||
| 172 | out_add_entry: | ||
| 173 | for (j = i - 1; j > 0; j--) { | ||
| 174 | entry = *(map_entries + j); | ||
| 175 | kobject_put(&entry->kobj); | ||
| 176 | } | ||
| 177 | if (map_kset) | ||
| 178 | kset_unregister(map_kset); | ||
| 179 | out: | ||
| 180 | return ret; | ||
| 181 | } | ||
diff --git a/include/linux/efi.h b/include/linux/efi.h index 11ce6784a196..0a819e7a60c9 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h | |||
| @@ -556,6 +556,9 @@ extern struct efi { | |||
| 556 | unsigned long hcdp; /* HCDP table */ | 556 | unsigned long hcdp; /* HCDP table */ |
| 557 | unsigned long uga; /* UGA table */ | 557 | unsigned long uga; /* UGA table */ |
| 558 | unsigned long uv_systab; /* UV system table */ | 558 | unsigned long uv_systab; /* UV system table */ |
| 559 | unsigned long fw_vendor; /* fw_vendor */ | ||
| 560 | unsigned long runtime; /* runtime table */ | ||
| 561 | unsigned long config_table; /* config tables */ | ||
| 559 | efi_get_time_t *get_time; | 562 | efi_get_time_t *get_time; |
| 560 | efi_set_time_t *set_time; | 563 | efi_set_time_t *set_time; |
| 561 | efi_get_wakeup_time_t *get_wakeup_time; | 564 | efi_get_wakeup_time_t *get_wakeup_time; |
| @@ -653,6 +656,7 @@ extern int __init efi_setup_pcdp_console(char *); | |||
| 653 | #define EFI_RUNTIME_SERVICES 3 /* Can we use runtime services? */ | 656 | #define EFI_RUNTIME_SERVICES 3 /* Can we use runtime services? */ |
| 654 | #define EFI_MEMMAP 4 /* Can we use EFI memory map? */ | 657 | #define EFI_MEMMAP 4 /* Can we use EFI memory map? */ |
| 655 | #define EFI_64BIT 5 /* Is the firmware 64-bit? */ | 658 | #define EFI_64BIT 5 /* Is the firmware 64-bit? */ |
| 659 | #define EFI_ARCH_1 6 /* First arch-specific bit */ | ||
| 656 | 660 | ||
| 657 | #ifdef CONFIG_EFI | 661 | #ifdef CONFIG_EFI |
| 658 | # ifdef CONFIG_X86 | 662 | # ifdef CONFIG_X86 |
| @@ -872,4 +876,17 @@ int efivars_sysfs_init(void); | |||
| 872 | 876 | ||
| 873 | #endif /* CONFIG_EFI_VARS */ | 877 | #endif /* CONFIG_EFI_VARS */ |
| 874 | 878 | ||
| 879 | #ifdef CONFIG_EFI_RUNTIME_MAP | ||
| 880 | int efi_runtime_map_init(struct kobject *); | ||
| 881 | void efi_runtime_map_setup(void *, int, u32); | ||
| 882 | #else | ||
| 883 | static inline int efi_runtime_map_init(struct kobject *kobj) | ||
| 884 | { | ||
| 885 | return 0; | ||
| 886 | } | ||
| 887 | |||
| 888 | static inline void | ||
| 889 | efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size) {} | ||
| 890 | #endif | ||
| 891 | |||
| 875 | #endif /* _LINUX_EFI_H */ | 892 | #endif /* _LINUX_EFI_H */ |
