aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-01-20 15:05:30 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-20 15:05:30 -0500
commit972d5e7e5b66f5a143026fcdd4b2be2f519c0f12 (patch)
tree6c1c5bb79fe163b3b48254605b54532099b74cff
parent5d4863e4cc4dc12d1d5e42da3cb5d38c535e4ad6 (diff)
parentef0b8b9a521c65201bfca9747ee1bf374296133c (diff)
Merge branch 'x86-efi-kexec-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 EFI changes from Ingo Molnar: "This consists of two main parts: - New static EFI runtime services virtual mapping layout which is groundwork for kexec support on EFI (Borislav Petkov) - EFI kexec support itself (Dave Young)" * 'x86-efi-kexec-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits) x86/efi: parse_efi_setup() build fix x86: ksysfs.c build fix x86/efi: Delete superfluous global variables x86: Reserve setup_data ranges late after parsing memmap cmdline x86: Export x86 boot_params to sysfs x86: Add xloadflags bit for EFI runtime support on kexec x86/efi: Pass necessary EFI data for kexec via setup_data efi: Export EFI runtime memory mapping to sysfs efi: Export more EFI table variables to sysfs x86/efi: Cleanup efi_enter_virtual_mode() function x86/efi: Fix off-by-one bug in EFI Boot Services reservation x86/efi: Add a wrapper function efi_map_region_fixed() x86/efi: Remove unused variables in __map_region() x86/efi: Check krealloc return value x86/efi: Runtime services virtual mapping x86/mm/cpa: Map in an arbitrary pgd x86/mm/pageattr: Add last levels of error path x86/mm/pageattr: Add a PUD error unwinding path x86/mm/pageattr: Add a PTE pagetable populating function x86/mm/pageattr: Add a PMD pagetable populating function ...
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-efi20
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-efi-runtime-map34
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-boot_params38
-rw-r--r--Documentation/kernel-parameters.txt6
-rw-r--r--Documentation/x86/boot.txt3
-rw-r--r--Documentation/x86/x86_64/mm.txt7
-rw-r--r--arch/x86/boot/header.S9
-rw-r--r--arch/x86/include/asm/efi.h78
-rw-r--r--arch/x86/include/asm/pgtable_types.h3
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h2
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/ksysfs.c340
-rw-r--r--arch/x86/kernel/setup.c7
-rw-r--r--arch/x86/mm/pageattr.c461
-rw-r--r--arch/x86/platform/efi/efi.c355
-rw-r--r--arch/x86/platform/efi/efi_32.c12
-rw-r--r--arch/x86/platform/efi/efi_64.c120
-rw-r--r--arch/x86/platform/efi/efi_stub_64.S54
-rw-r--r--drivers/firmware/efi/Kconfig11
-rw-r--r--drivers/firmware/efi/Makefile1
-rw-r--r--drivers/firmware/efi/efi.c45
-rw-r--r--drivers/firmware/efi/runtime-map.c181
-rw-r--r--include/linux/efi.h17
23 files changed, 1689 insertions, 116 deletions
diff --git a/Documentation/ABI/testing/sysfs-firmware-efi b/Documentation/ABI/testing/sysfs-firmware-efi
new file mode 100644
index 000000000000..05874da7ce80
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-efi
@@ -0,0 +1,20 @@
1What: /sys/firmware/efi/fw_vendor
2Date: December 2013
3Contact: Dave Young <dyoung@redhat.com>
4Description: It shows the physical address of firmware vendor field in the
5 EFI system table.
6Users: Kexec
7
8What: /sys/firmware/efi/runtime
9Date: December 2013
10Contact: Dave Young <dyoung@redhat.com>
11Description: It shows the physical address of runtime service table entry in
12 the EFI system table.
13Users: Kexec
14
15What: /sys/firmware/efi/config_table
16Date: December 2013
17Contact: Dave Young <dyoung@redhat.com>
18Description: It shows the physical address of config table entry in the EFI
19 system table.
20Users: Kexec
diff --git a/Documentation/ABI/testing/sysfs-firmware-efi-runtime-map b/Documentation/ABI/testing/sysfs-firmware-efi-runtime-map
new file mode 100644
index 000000000000..c61b9b348e99
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-efi-runtime-map
@@ -0,0 +1,34 @@
1What: /sys/firmware/efi/runtime-map/
2Date: December 2013
3Contact: Dave Young <dyoung@redhat.com>
4Description: Switching efi runtime services to virtual mode requires
5 that all efi memory ranges which have the runtime attribute
6 bit set to be mapped to virtual addresses.
7
8 The efi runtime services can only be switched to virtual
9 mode once without rebooting. The kexec kernel must maintain
10 the same physical to virtual address mappings as the first
11 kernel. The mappings are exported to sysfs so userspace tools
12 can reassemble them and pass them into the kexec kernel.
13
14 /sys/firmware/efi/runtime-map/ is the directory the kernel
15 exports that information in.
16
17 subdirectories are named with the number of the memory range:
18
19 /sys/firmware/efi/runtime-map/0
20 /sys/firmware/efi/runtime-map/1
21 /sys/firmware/efi/runtime-map/2
22 /sys/firmware/efi/runtime-map/3
23 ...
24
25 Each subdirectory contains five files:
26
27 attribute : The attributes of the memory range.
28 num_pages : The size of the memory range in pages.
29 phys_addr : The physical address of the memory range.
30 type : The type of the memory range.
31 virt_addr : The virtual address of the memory range.
32
33 Above values are all hexadecimal numbers with the '0x' prefix.
34Users: Kexec
diff --git a/Documentation/ABI/testing/sysfs-kernel-boot_params b/Documentation/ABI/testing/sysfs-kernel-boot_params
new file mode 100644
index 000000000000..eca38ce2852d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-boot_params
@@ -0,0 +1,38 @@
1What: /sys/kernel/boot_params
2Date: December 2013
3Contact: Dave Young <dyoung@redhat.com>
4Description: The /sys/kernel/boot_params directory contains two
5 files: "data" and "version" and one subdirectory "setup_data".
6 It is used to export the kernel boot parameters of an x86
7 platform to userspace for kexec and debugging purpose.
8
9 If there's no setup_data in boot_params the subdirectory will
10 not be created.
11
12 "data" file is the binary representation of struct boot_params.
13
14 "version" file is the string representation of boot
15 protocol version.
16
17 "setup_data" subdirectory contains the setup_data data
18 structure in boot_params. setup_data is maintained in kernel
19 as a link list. In "setup_data" subdirectory there's one
20 subdirectory for each link list node named with the number
21 of the list nodes. The list node subdirectory contains two
22 files "type" and "data". "type" file is the string
23 representation of setup_data type. "data" file is the binary
24 representation of setup_data payload.
25
26 The whole boot_params directory structure is like below:
27 /sys/kernel/boot_params
28 |__ data
29 |__ setup_data
30 | |__ 0
31 | | |__ data
32 | | |__ type
33 | |__ 1
34 | |__ data
35 | |__ type
36 |__ version
37
38Users: Kexec
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 3b8e262c3657..4eb5fff022b4 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -899,6 +899,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
899 edd= [EDD] 899 edd= [EDD]
900 Format: {"off" | "on" | "skip[mbr]"} 900 Format: {"off" | "on" | "skip[mbr]"}
901 901
902 efi= [EFI]
903 Format: { "old_map" }
904 old_map [X86-64]: switch to the old ioremap-based EFI
905 runtime services mapping. 32-bit still uses this one by
906 default.
907
902 efi_no_storage_paranoia [EFI; X86] 908 efi_no_storage_paranoia [EFI; X86]
903 Using this parameter you can use more than 50% of 909 Using this parameter you can use more than 50% of
904 your efi variable storage. Use this parameter only if 910 your efi variable storage. Use this parameter only if
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index f4f268c2b826..cb81741d3b0b 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -608,6 +608,9 @@ Protocol: 2.12+
608 - If 1, the kernel supports the 64-bit EFI handoff entry point 608 - If 1, the kernel supports the 64-bit EFI handoff entry point
609 given at handover_offset + 0x200. 609 given at handover_offset + 0x200.
610 610
611 Bit 4 (read): XLF_EFI_KEXEC
612 - If 1, the kernel supports kexec EFI boot with EFI runtime support.
613
611Field name: cmdline_size 614Field name: cmdline_size
612Type: read 615Type: read
613Offset/size: 0x238/4 616Offset/size: 0x238/4
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 881582f75c9c..c584a51add15 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -28,4 +28,11 @@ reference.
28Current X86-64 implementations only support 40 bits of address space, 28Current X86-64 implementations only support 40 bits of address space,
29but we support up to 46 bits. This expands into MBZ space in the page tables. 29but we support up to 46 bits. This expands into MBZ space in the page tables.
30 30
31->trampoline_pgd:
32
33We map EFI runtime services in the aforementioned PGD in the virtual
34range of 64Gb (arbitrarily set, can be raised if needed)
35
360xffffffef00000000 - 0xffffffff00000000
37
31-Andi Kleen, Jul 2004 38-Andi Kleen, Jul 2004
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 9ec06a1f6d61..ec3b8ba68096 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -391,7 +391,14 @@ xloadflags:
391#else 391#else
392# define XLF23 0 392# define XLF23 0
393#endif 393#endif
394 .word XLF0 | XLF1 | XLF23 394
395#if defined(CONFIG_X86_64) && defined(CONFIG_EFI) && defined(CONFIG_KEXEC)
396# define XLF4 XLF_EFI_KEXEC
397#else
398# define XLF4 0
399#endif
400
401 .word XLF0 | XLF1 | XLF23 | XLF4
395 402
396cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, 403cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
397 #added with boot protocol 404 #added with boot protocol
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 65c6e6e3a552..3b978c472d08 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -1,6 +1,24 @@
1#ifndef _ASM_X86_EFI_H 1#ifndef _ASM_X86_EFI_H
2#define _ASM_X86_EFI_H 2#define _ASM_X86_EFI_H
3 3
4/*
5 * We map the EFI regions needed for runtime services non-contiguously,
6 * with preserved alignment on virtual addresses starting from -4G down
7 * for a total max space of 64G. This way, we provide for stable runtime
8 * services addresses across kernels so that a kexec'd kernel can still
9 * use them.
10 *
11 * This is the main reason why we're doing stable VA mappings for RT
12 * services.
13 *
14 * This flag is used in conjuction with a chicken bit called
15 * "efi=old_map" which can be used as a fallback to the old runtime
16 * services mapping method in case there's some b0rkage with a
17 * particular EFI implementation (haha, it is hard to hold up the
18 * sarcasm here...).
19 */
20#define EFI_OLD_MEMMAP EFI_ARCH_1
21
4#ifdef CONFIG_X86_32 22#ifdef CONFIG_X86_32
5 23
6#define EFI_LOADER_SIGNATURE "EL32" 24#define EFI_LOADER_SIGNATURE "EL32"
@@ -69,24 +87,31 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
69 efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3), \ 87 efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3), \
70 (u64)(a4), (u64)(a5), (u64)(a6)) 88 (u64)(a4), (u64)(a5), (u64)(a6))
71 89
90#define _efi_call_virtX(x, f, ...) \
91({ \
92 efi_status_t __s; \
93 \
94 efi_sync_low_kernel_mappings(); \
95 preempt_disable(); \
96 __s = efi_call##x((void *)efi.systab->runtime->f, __VA_ARGS__); \
97 preempt_enable(); \
98 __s; \
99})
100
72#define efi_call_virt0(f) \ 101#define efi_call_virt0(f) \
73 efi_call0((efi.systab->runtime->f)) 102 _efi_call_virtX(0, f)
74#define efi_call_virt1(f, a1) \ 103#define efi_call_virt1(f, a1) \
75 efi_call1((efi.systab->runtime->f), (u64)(a1)) 104 _efi_call_virtX(1, f, (u64)(a1))
76#define efi_call_virt2(f, a1, a2) \ 105#define efi_call_virt2(f, a1, a2) \
77 efi_call2((efi.systab->runtime->f), (u64)(a1), (u64)(a2)) 106 _efi_call_virtX(2, f, (u64)(a1), (u64)(a2))
78#define efi_call_virt3(f, a1, a2, a3) \ 107#define efi_call_virt3(f, a1, a2, a3) \
79 efi_call3((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ 108 _efi_call_virtX(3, f, (u64)(a1), (u64)(a2), (u64)(a3))
80 (u64)(a3)) 109#define efi_call_virt4(f, a1, a2, a3, a4) \
81#define efi_call_virt4(f, a1, a2, a3, a4) \ 110 _efi_call_virtX(4, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4))
82 efi_call4((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ 111#define efi_call_virt5(f, a1, a2, a3, a4, a5) \
83 (u64)(a3), (u64)(a4)) 112 _efi_call_virtX(5, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5))
84#define efi_call_virt5(f, a1, a2, a3, a4, a5) \ 113#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
85 efi_call5((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ 114 _efi_call_virtX(6, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
86 (u64)(a3), (u64)(a4), (u64)(a5))
87#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
88 efi_call6((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
89 (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
90 115
91extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, 116extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
92 u32 type, u64 attribute); 117 u32 type, u64 attribute);
@@ -95,12 +120,28 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
95 120
96extern int add_efi_memmap; 121extern int add_efi_memmap;
97extern unsigned long x86_efi_facility; 122extern unsigned long x86_efi_facility;
123extern struct efi_scratch efi_scratch;
98extern void efi_set_executable(efi_memory_desc_t *md, bool executable); 124extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
99extern int efi_memblock_x86_reserve_range(void); 125extern int efi_memblock_x86_reserve_range(void);
100extern void efi_call_phys_prelog(void); 126extern void efi_call_phys_prelog(void);
101extern void efi_call_phys_epilog(void); 127extern void efi_call_phys_epilog(void);
102extern void efi_unmap_memmap(void); 128extern void efi_unmap_memmap(void);
103extern void efi_memory_uc(u64 addr, unsigned long size); 129extern void efi_memory_uc(u64 addr, unsigned long size);
130extern void __init efi_map_region(efi_memory_desc_t *md);
131extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
132extern void efi_sync_low_kernel_mappings(void);
133extern void efi_setup_page_tables(void);
134extern void __init old_map_region(efi_memory_desc_t *md);
135
136struct efi_setup_data {
137 u64 fw_vendor;
138 u64 runtime;
139 u64 tables;
140 u64 smbios;
141 u64 reserved[8];
142};
143
144extern u64 efi_setup;
104 145
105#ifdef CONFIG_EFI 146#ifdef CONFIG_EFI
106 147
@@ -110,7 +151,7 @@ static inline bool efi_is_native(void)
110} 151}
111 152
112extern struct console early_efi_console; 153extern struct console early_efi_console;
113 154extern void parse_efi_setup(u64 phys_addr, u32 data_len);
114#else 155#else
115/* 156/*
116 * IF EFI is not configured, have the EFI calls return -ENOSYS. 157 * IF EFI is not configured, have the EFI calls return -ENOSYS.
@@ -122,6 +163,7 @@ extern struct console early_efi_console;
122#define efi_call4(_f, _a1, _a2, _a3, _a4) (-ENOSYS) 163#define efi_call4(_f, _a1, _a2, _a3, _a4) (-ENOSYS)
123#define efi_call5(_f, _a1, _a2, _a3, _a4, _a5) (-ENOSYS) 164#define efi_call5(_f, _a1, _a2, _a3, _a4, _a5) (-ENOSYS)
124#define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6) (-ENOSYS) 165#define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6) (-ENOSYS)
166static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}
125#endif /* CONFIG_EFI */ 167#endif /* CONFIG_EFI */
126 168
127#endif /* _ASM_X86_EFI_H */ 169#endif /* _ASM_X86_EFI_H */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 0ecac257fb26..a83aa44bb1fb 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -382,7 +382,8 @@ static inline void update_page_count(int level, unsigned long pages) { }
382 */ 382 */
383extern pte_t *lookup_address(unsigned long address, unsigned int *level); 383extern pte_t *lookup_address(unsigned long address, unsigned int *level);
384extern phys_addr_t slow_virt_to_phys(void *__address); 384extern phys_addr_t slow_virt_to_phys(void *__address);
385 385extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
386 unsigned numpages, unsigned long page_flags);
386#endif /* !__ASSEMBLY__ */ 387#endif /* !__ASSEMBLY__ */
387 388
388#endif /* _ASM_X86_PGTABLE_DEFS_H */ 389#endif /* _ASM_X86_PGTABLE_DEFS_H */
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 9c3733c5f8f7..225b0988043a 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -6,6 +6,7 @@
6#define SETUP_E820_EXT 1 6#define SETUP_E820_EXT 1
7#define SETUP_DTB 2 7#define SETUP_DTB 2
8#define SETUP_PCI 3 8#define SETUP_PCI 3
9#define SETUP_EFI 4
9 10
10/* ram_size flags */ 11/* ram_size flags */
11#define RAMDISK_IMAGE_START_MASK 0x07FF 12#define RAMDISK_IMAGE_START_MASK 0x07FF
@@ -23,6 +24,7 @@
23#define XLF_CAN_BE_LOADED_ABOVE_4G (1<<1) 24#define XLF_CAN_BE_LOADED_ABOVE_4G (1<<1)
24#define XLF_EFI_HANDOVER_32 (1<<2) 25#define XLF_EFI_HANDOVER_32 (1<<2)
25#define XLF_EFI_HANDOVER_64 (1<<3) 26#define XLF_EFI_HANDOVER_64 (1<<3)
27#define XLF_EFI_KEXEC (1<<4)
26 28
27#ifndef __ASSEMBLY__ 29#ifndef __ASSEMBLY__
28 30
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9b0a34e2cd79..510cca5c5390 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
29obj-y += syscall_$(BITS).o 29obj-y += syscall_$(BITS).o
30obj-$(CONFIG_X86_64) += vsyscall_64.o 30obj-$(CONFIG_X86_64) += vsyscall_64.o
31obj-$(CONFIG_X86_64) += vsyscall_emu_64.o 31obj-$(CONFIG_X86_64) += vsyscall_emu_64.o
32obj-$(CONFIG_SYSFS) += ksysfs.o
32obj-y += bootflag.o e820.o 33obj-y += bootflag.o e820.o
33obj-y += pci-dma.o quirks.o topology.o kdebugfs.o 34obj-y += pci-dma.o quirks.o topology.o kdebugfs.o
34obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o 35obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
new file mode 100644
index 000000000000..c2bedaea11f7
--- /dev/null
+++ b/arch/x86/kernel/ksysfs.c
@@ -0,0 +1,340 @@
1/*
2 * Architecture specific sysfs attributes in /sys/kernel
3 *
4 * Copyright (C) 2007, Intel Corp.
5 * Huang Ying <ying.huang@intel.com>
6 * Copyright (C) 2013, 2013 Red Hat, Inc.
7 * Dave Young <dyoung@redhat.com>
8 *
9 * This file is released under the GPLv2
10 */
11
12#include <linux/kobject.h>
13#include <linux/string.h>
14#include <linux/sysfs.h>
15#include <linux/init.h>
16#include <linux/stat.h>
17#include <linux/slab.h>
18#include <linux/mm.h>
19
20#include <asm/io.h>
21#include <asm/setup.h>
22
23static ssize_t version_show(struct kobject *kobj,
24 struct kobj_attribute *attr, char *buf)
25{
26 return sprintf(buf, "0x%04x\n", boot_params.hdr.version);
27}
28
29static struct kobj_attribute boot_params_version_attr = __ATTR_RO(version);
30
31static ssize_t boot_params_data_read(struct file *fp, struct kobject *kobj,
32 struct bin_attribute *bin_attr,
33 char *buf, loff_t off, size_t count)
34{
35 memcpy(buf, (void *)&boot_params + off, count);
36 return count;
37}
38
39static struct bin_attribute boot_params_data_attr = {
40 .attr = {
41 .name = "data",
42 .mode = S_IRUGO,
43 },
44 .read = boot_params_data_read,
45 .size = sizeof(boot_params),
46};
47
48static struct attribute *boot_params_version_attrs[] = {
49 &boot_params_version_attr.attr,
50 NULL,
51};
52
53static struct bin_attribute *boot_params_data_attrs[] = {
54 &boot_params_data_attr,
55 NULL,
56};
57
58static struct attribute_group boot_params_attr_group = {
59 .attrs = boot_params_version_attrs,
60 .bin_attrs = boot_params_data_attrs,
61};
62
63static int kobj_to_setup_data_nr(struct kobject *kobj, int *nr)
64{
65 const char *name;
66
67 name = kobject_name(kobj);
68 return kstrtoint(name, 10, nr);
69}
70
71static int get_setup_data_paddr(int nr, u64 *paddr)
72{
73 int i = 0;
74 struct setup_data *data;
75 u64 pa_data = boot_params.hdr.setup_data;
76
77 while (pa_data) {
78 if (nr == i) {
79 *paddr = pa_data;
80 return 0;
81 }
82 data = ioremap_cache(pa_data, sizeof(*data));
83 if (!data)
84 return -ENOMEM;
85
86 pa_data = data->next;
87 iounmap(data);
88 i++;
89 }
90 return -EINVAL;
91}
92
93static int __init get_setup_data_size(int nr, size_t *size)
94{
95 int i = 0;
96 struct setup_data *data;
97 u64 pa_data = boot_params.hdr.setup_data;
98
99 while (pa_data) {
100 data = ioremap_cache(pa_data, sizeof(*data));
101 if (!data)
102 return -ENOMEM;
103 if (nr == i) {
104 *size = data->len;
105 iounmap(data);
106 return 0;
107 }
108
109 pa_data = data->next;
110 iounmap(data);
111 i++;
112 }
113 return -EINVAL;
114}
115
116static ssize_t type_show(struct kobject *kobj,
117 struct kobj_attribute *attr, char *buf)
118{
119 int nr, ret;
120 u64 paddr;
121 struct setup_data *data;
122
123 ret = kobj_to_setup_data_nr(kobj, &nr);
124 if (ret)
125 return ret;
126
127 ret = get_setup_data_paddr(nr, &paddr);
128 if (ret)
129 return ret;
130 data = ioremap_cache(paddr, sizeof(*data));
131 if (!data)
132 return -ENOMEM;
133
134 ret = sprintf(buf, "0x%x\n", data->type);
135 iounmap(data);
136 return ret;
137}
138
139static ssize_t setup_data_data_read(struct file *fp,
140 struct kobject *kobj,
141 struct bin_attribute *bin_attr,
142 char *buf,
143 loff_t off, size_t count)
144{
145 int nr, ret = 0;
146 u64 paddr;
147 struct setup_data *data;
148 void *p;
149
150 ret = kobj_to_setup_data_nr(kobj, &nr);
151 if (ret)
152 return ret;
153
154 ret = get_setup_data_paddr(nr, &paddr);
155 if (ret)
156 return ret;
157 data = ioremap_cache(paddr, sizeof(*data));
158 if (!data)
159 return -ENOMEM;
160
161 if (off > data->len) {
162 ret = -EINVAL;
163 goto out;
164 }
165
166 if (count > data->len - off)
167 count = data->len - off;
168
169 if (!count)
170 goto out;
171
172 ret = count;
173 p = ioremap_cache(paddr + sizeof(*data), data->len);
174 if (!p) {
175 ret = -ENOMEM;
176 goto out;
177 }
178 memcpy(buf, p + off, count);
179 iounmap(p);
180out:
181 iounmap(data);
182 return ret;
183}
184
185static struct kobj_attribute type_attr = __ATTR_RO(type);
186
187static struct bin_attribute data_attr = {
188 .attr = {
189 .name = "data",
190 .mode = S_IRUGO,
191 },
192 .read = setup_data_data_read,
193};
194
195static struct attribute *setup_data_type_attrs[] = {
196 &type_attr.attr,
197 NULL,
198};
199
200static struct bin_attribute *setup_data_data_attrs[] = {
201 &data_attr,
202 NULL,
203};
204
205static struct attribute_group setup_data_attr_group = {
206 .attrs = setup_data_type_attrs,
207 .bin_attrs = setup_data_data_attrs,
208};
209
210static int __init create_setup_data_node(struct kobject *parent,
211 struct kobject **kobjp, int nr)
212{
213 int ret = 0;
214 size_t size;
215 struct kobject *kobj;
216 char name[16]; /* should be enough for setup_data nodes numbers */
217 snprintf(name, 16, "%d", nr);
218
219 kobj = kobject_create_and_add(name, parent);
220 if (!kobj)
221 return -ENOMEM;
222
223 ret = get_setup_data_size(nr, &size);
224 if (ret)
225 goto out_kobj;
226
227 data_attr.size = size;
228 ret = sysfs_create_group(kobj, &setup_data_attr_group);
229 if (ret)
230 goto out_kobj;
231 *kobjp = kobj;
232
233 return 0;
234out_kobj:
235 kobject_put(kobj);
236 return ret;
237}
238
239static void __init cleanup_setup_data_node(struct kobject *kobj)
240{
241 sysfs_remove_group(kobj, &setup_data_attr_group);
242 kobject_put(kobj);
243}
244
245static int __init get_setup_data_total_num(u64 pa_data, int *nr)
246{
247 int ret = 0;
248 struct setup_data *data;
249
250 *nr = 0;
251 while (pa_data) {
252 *nr += 1;
253 data = ioremap_cache(pa_data, sizeof(*data));
254 if (!data) {
255 ret = -ENOMEM;
256 goto out;
257 }
258 pa_data = data->next;
259 iounmap(data);
260 }
261
262out:
263 return ret;
264}
265
266static int __init create_setup_data_nodes(struct kobject *parent)
267{
268 struct kobject *setup_data_kobj, **kobjp;
269 u64 pa_data;
270 int i, j, nr, ret = 0;
271
272 pa_data = boot_params.hdr.setup_data;
273 if (!pa_data)
274 return 0;
275
276 setup_data_kobj = kobject_create_and_add("setup_data", parent);
277 if (!setup_data_kobj) {
278 ret = -ENOMEM;
279 goto out;
280 }
281
282 ret = get_setup_data_total_num(pa_data, &nr);
283 if (ret)
284 goto out_setup_data_kobj;
285
286 kobjp = kmalloc(sizeof(*kobjp) * nr, GFP_KERNEL);
287 if (!kobjp) {
288 ret = -ENOMEM;
289 goto out_setup_data_kobj;
290 }
291
292 for (i = 0; i < nr; i++) {
293 ret = create_setup_data_node(setup_data_kobj, kobjp + i, i);
294 if (ret)
295 goto out_clean_nodes;
296 }
297
298 kfree(kobjp);
299 return 0;
300
301out_clean_nodes:
302 for (j = i - 1; j > 0; j--)
303 cleanup_setup_data_node(*(kobjp + j));
304 kfree(kobjp);
305out_setup_data_kobj:
306 kobject_put(setup_data_kobj);
307out:
308 return ret;
309}
310
311static int __init boot_params_ksysfs_init(void)
312{
313 int ret;
314 struct kobject *boot_params_kobj;
315
316 boot_params_kobj = kobject_create_and_add("boot_params",
317 kernel_kobj);
318 if (!boot_params_kobj) {
319 ret = -ENOMEM;
320 goto out;
321 }
322
323 ret = sysfs_create_group(boot_params_kobj, &boot_params_attr_group);
324 if (ret)
325 goto out_boot_params_kobj;
326
327 ret = create_setup_data_nodes(boot_params_kobj);
328 if (ret)
329 goto out_create_group;
330
331 return 0;
332out_create_group:
333 sysfs_remove_group(boot_params_kobj, &boot_params_attr_group);
334out_boot_params_kobj:
335 kobject_put(boot_params_kobj);
336out:
337 return ret;
338}
339
340arch_initcall(boot_params_ksysfs_init);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index cb233bc9dee3..be4b456e444b 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -447,6 +447,9 @@ static void __init parse_setup_data(void)
447 case SETUP_DTB: 447 case SETUP_DTB:
448 add_dtb(pa_data); 448 add_dtb(pa_data);
449 break; 449 break;
450 case SETUP_EFI:
451 parse_efi_setup(pa_data, data_len);
452 break;
450 default: 453 default:
451 break; 454 break;
452 } 455 }
@@ -924,8 +927,6 @@ void __init setup_arch(char **cmdline_p)
924 iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; 927 iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
925 setup_memory_map(); 928 setup_memory_map();
926 parse_setup_data(); 929 parse_setup_data();
927 /* update the e820_saved too */
928 e820_reserve_setup_data();
929 930
930 copy_edd(); 931 copy_edd();
931 932
@@ -987,6 +988,8 @@ void __init setup_arch(char **cmdline_p)
987 early_dump_pci_devices(); 988 early_dump_pci_devices();
988#endif 989#endif
989 990
991 /* update the e820_saved too */
992 e820_reserve_setup_data();
990 finish_e820_parsing(); 993 finish_e820_parsing();
991 994
992 if (efi_enabled(EFI_BOOT)) 995 if (efi_enabled(EFI_BOOT))
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index bb32480c2d71..b3b19f46c016 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -30,6 +30,7 @@
30 */ 30 */
31struct cpa_data { 31struct cpa_data {
32 unsigned long *vaddr; 32 unsigned long *vaddr;
33 pgd_t *pgd;
33 pgprot_t mask_set; 34 pgprot_t mask_set;
34 pgprot_t mask_clr; 35 pgprot_t mask_clr;
35 int numpages; 36 int numpages;
@@ -322,17 +323,9 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
322 return prot; 323 return prot;
323} 324}
324 325
325/* 326static pte_t *__lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
326 * Lookup the page table entry for a virtual address. Return a pointer 327 unsigned int *level)
327 * to the entry and the level of the mapping.
328 *
329 * Note: We return pud and pmd either when the entry is marked large
330 * or when the present bit is not set. Otherwise we would return a
331 * pointer to a nonexisting mapping.
332 */
333pte_t *lookup_address(unsigned long address, unsigned int *level)
334{ 328{
335 pgd_t *pgd = pgd_offset_k(address);
336 pud_t *pud; 329 pud_t *pud;
337 pmd_t *pmd; 330 pmd_t *pmd;
338 331
@@ -361,8 +354,31 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)
361 354
362 return pte_offset_kernel(pmd, address); 355 return pte_offset_kernel(pmd, address);
363} 356}
357
358/*
359 * Lookup the page table entry for a virtual address. Return a pointer
360 * to the entry and the level of the mapping.
361 *
362 * Note: We return pud and pmd either when the entry is marked large
363 * or when the present bit is not set. Otherwise we would return a
364 * pointer to a nonexisting mapping.
365 */
366pte_t *lookup_address(unsigned long address, unsigned int *level)
367{
368 return __lookup_address_in_pgd(pgd_offset_k(address), address, level);
369}
364EXPORT_SYMBOL_GPL(lookup_address); 370EXPORT_SYMBOL_GPL(lookup_address);
365 371
372static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
373 unsigned int *level)
374{
375 if (cpa->pgd)
376 return __lookup_address_in_pgd(cpa->pgd + pgd_index(address),
377 address, level);
378
379 return lookup_address(address, level);
380}
381
366/* 382/*
367 * This is necessary because __pa() does not work on some 383 * This is necessary because __pa() does not work on some
368 * kinds of memory, like vmalloc() or the alloc_remap() 384 * kinds of memory, like vmalloc() or the alloc_remap()
@@ -437,7 +453,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
437 * Check for races, another CPU might have split this page 453 * Check for races, another CPU might have split this page
438 * up already: 454 * up already:
439 */ 455 */
440 tmp = lookup_address(address, &level); 456 tmp = _lookup_address_cpa(cpa, address, &level);
441 if (tmp != kpte) 457 if (tmp != kpte)
442 goto out_unlock; 458 goto out_unlock;
443 459
@@ -543,7 +559,8 @@ out_unlock:
543} 559}
544 560
545static int 561static int
546__split_large_page(pte_t *kpte, unsigned long address, struct page *base) 562__split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
563 struct page *base)
547{ 564{
548 pte_t *pbase = (pte_t *)page_address(base); 565 pte_t *pbase = (pte_t *)page_address(base);
549 unsigned long pfn, pfninc = 1; 566 unsigned long pfn, pfninc = 1;
@@ -556,7 +573,7 @@ __split_large_page(pte_t *kpte, unsigned long address, struct page *base)
556 * Check for races, another CPU might have split this page 573 * Check for races, another CPU might have split this page
557 * up for us already: 574 * up for us already:
558 */ 575 */
559 tmp = lookup_address(address, &level); 576 tmp = _lookup_address_cpa(cpa, address, &level);
560 if (tmp != kpte) { 577 if (tmp != kpte) {
561 spin_unlock(&pgd_lock); 578 spin_unlock(&pgd_lock);
562 return 1; 579 return 1;
@@ -632,7 +649,8 @@ __split_large_page(pte_t *kpte, unsigned long address, struct page *base)
632 return 0; 649 return 0;
633} 650}
634 651
635static int split_large_page(pte_t *kpte, unsigned long address) 652static int split_large_page(struct cpa_data *cpa, pte_t *kpte,
653 unsigned long address)
636{ 654{
637 struct page *base; 655 struct page *base;
638 656
@@ -644,15 +662,390 @@ static int split_large_page(pte_t *kpte, unsigned long address)
644 if (!base) 662 if (!base)
645 return -ENOMEM; 663 return -ENOMEM;
646 664
647 if (__split_large_page(kpte, address, base)) 665 if (__split_large_page(cpa, kpte, address, base))
648 __free_page(base); 666 __free_page(base);
649 667
650 return 0; 668 return 0;
651} 669}
652 670
671static bool try_to_free_pte_page(pte_t *pte)
672{
673 int i;
674
675 for (i = 0; i < PTRS_PER_PTE; i++)
676 if (!pte_none(pte[i]))
677 return false;
678
679 free_page((unsigned long)pte);
680 return true;
681}
682
683static bool try_to_free_pmd_page(pmd_t *pmd)
684{
685 int i;
686
687 for (i = 0; i < PTRS_PER_PMD; i++)
688 if (!pmd_none(pmd[i]))
689 return false;
690
691 free_page((unsigned long)pmd);
692 return true;
693}
694
695static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
696{
697 pte_t *pte = pte_offset_kernel(pmd, start);
698
699 while (start < end) {
700 set_pte(pte, __pte(0));
701
702 start += PAGE_SIZE;
703 pte++;
704 }
705
706 if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) {
707 pmd_clear(pmd);
708 return true;
709 }
710 return false;
711}
712
713static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd,
714 unsigned long start, unsigned long end)
715{
716 if (unmap_pte_range(pmd, start, end))
717 if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
718 pud_clear(pud);
719}
720
721static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
722{
723 pmd_t *pmd = pmd_offset(pud, start);
724
725 /*
726 * Not on a 2MB page boundary?
727 */
728 if (start & (PMD_SIZE - 1)) {
729 unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
730 unsigned long pre_end = min_t(unsigned long, end, next_page);
731
732 __unmap_pmd_range(pud, pmd, start, pre_end);
733
734 start = pre_end;
735 pmd++;
736 }
737
738 /*
739 * Try to unmap in 2M chunks.
740 */
741 while (end - start >= PMD_SIZE) {
742 if (pmd_large(*pmd))
743 pmd_clear(pmd);
744 else
745 __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE);
746
747 start += PMD_SIZE;
748 pmd++;
749 }
750
751 /*
752 * 4K leftovers?
753 */
754 if (start < end)
755 return __unmap_pmd_range(pud, pmd, start, end);
756
757 /*
758 * Try again to free the PMD page if haven't succeeded above.
759 */
760 if (!pud_none(*pud))
761 if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
762 pud_clear(pud);
763}
764
765static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
766{
767 pud_t *pud = pud_offset(pgd, start);
768
769 /*
770 * Not on a GB page boundary?
771 */
772 if (start & (PUD_SIZE - 1)) {
773 unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
774 unsigned long pre_end = min_t(unsigned long, end, next_page);
775
776 unmap_pmd_range(pud, start, pre_end);
777
778 start = pre_end;
779 pud++;
780 }
781
782 /*
783 * Try to unmap in 1G chunks?
784 */
785 while (end - start >= PUD_SIZE) {
786
787 if (pud_large(*pud))
788 pud_clear(pud);
789 else
790 unmap_pmd_range(pud, start, start + PUD_SIZE);
791
792 start += PUD_SIZE;
793 pud++;
794 }
795
796 /*
797 * 2M leftovers?
798 */
799 if (start < end)
800 unmap_pmd_range(pud, start, end);
801
802 /*
803 * No need to try to free the PUD page because we'll free it in
804 * populate_pgd's error path
805 */
806}
807
808static int alloc_pte_page(pmd_t *pmd)
809{
810 pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
811 if (!pte)
812 return -1;
813
814 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
815 return 0;
816}
817
818static int alloc_pmd_page(pud_t *pud)
819{
820 pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
821 if (!pmd)
822 return -1;
823
824 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
825 return 0;
826}
827
828static void populate_pte(struct cpa_data *cpa,
829 unsigned long start, unsigned long end,
830 unsigned num_pages, pmd_t *pmd, pgprot_t pgprot)
831{
832 pte_t *pte;
833
834 pte = pte_offset_kernel(pmd, start);
835
836 while (num_pages-- && start < end) {
837
838 /* deal with the NX bit */
839 if (!(pgprot_val(pgprot) & _PAGE_NX))
840 cpa->pfn &= ~_PAGE_NX;
841
842 set_pte(pte, pfn_pte(cpa->pfn >> PAGE_SHIFT, pgprot));
843
844 start += PAGE_SIZE;
845 cpa->pfn += PAGE_SIZE;
846 pte++;
847 }
848}
849
850static int populate_pmd(struct cpa_data *cpa,
851 unsigned long start, unsigned long end,
852 unsigned num_pages, pud_t *pud, pgprot_t pgprot)
853{
854 unsigned int cur_pages = 0;
855 pmd_t *pmd;
856
857 /*
858 * Not on a 2M boundary?
859 */
860 if (start & (PMD_SIZE - 1)) {
861 unsigned long pre_end = start + (num_pages << PAGE_SHIFT);
862 unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
863
864 pre_end = min_t(unsigned long, pre_end, next_page);
865 cur_pages = (pre_end - start) >> PAGE_SHIFT;
866 cur_pages = min_t(unsigned int, num_pages, cur_pages);
867
868 /*
869 * Need a PTE page?
870 */
871 pmd = pmd_offset(pud, start);
872 if (pmd_none(*pmd))
873 if (alloc_pte_page(pmd))
874 return -1;
875
876 populate_pte(cpa, start, pre_end, cur_pages, pmd, pgprot);
877
878 start = pre_end;
879 }
880
881 /*
882 * We mapped them all?
883 */
884 if (num_pages == cur_pages)
885 return cur_pages;
886
887 while (end - start >= PMD_SIZE) {
888
889 /*
890 * We cannot use a 1G page so allocate a PMD page if needed.
891 */
892 if (pud_none(*pud))
893 if (alloc_pmd_page(pud))
894 return -1;
895
896 pmd = pmd_offset(pud, start);
897
898 set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot)));
899
900 start += PMD_SIZE;
901 cpa->pfn += PMD_SIZE;
902 cur_pages += PMD_SIZE >> PAGE_SHIFT;
903 }
904
905 /*
906 * Map trailing 4K pages.
907 */
908 if (start < end) {
909 pmd = pmd_offset(pud, start);
910 if (pmd_none(*pmd))
911 if (alloc_pte_page(pmd))
912 return -1;
913
914 populate_pte(cpa, start, end, num_pages - cur_pages,
915 pmd, pgprot);
916 }
917 return num_pages;
918}
919
920static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
921 pgprot_t pgprot)
922{
923 pud_t *pud;
924 unsigned long end;
925 int cur_pages = 0;
926
927 end = start + (cpa->numpages << PAGE_SHIFT);
928
929 /*
930 * Not on a Gb page boundary? => map everything up to it with
931 * smaller pages.
932 */
933 if (start & (PUD_SIZE - 1)) {
934 unsigned long pre_end;
935 unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
936
937 pre_end = min_t(unsigned long, end, next_page);
938 cur_pages = (pre_end - start) >> PAGE_SHIFT;
939 cur_pages = min_t(int, (int)cpa->numpages, cur_pages);
940
941 pud = pud_offset(pgd, start);
942
943 /*
944 * Need a PMD page?
945 */
946 if (pud_none(*pud))
947 if (alloc_pmd_page(pud))
948 return -1;
949
950 cur_pages = populate_pmd(cpa, start, pre_end, cur_pages,
951 pud, pgprot);
952 if (cur_pages < 0)
953 return cur_pages;
954
955 start = pre_end;
956 }
957
958 /* We mapped them all? */
959 if (cpa->numpages == cur_pages)
960 return cur_pages;
961
962 pud = pud_offset(pgd, start);
963
964 /*
965 * Map everything starting from the Gb boundary, possibly with 1G pages
966 */
967 while (end - start >= PUD_SIZE) {
968 set_pud(pud, __pud(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot)));
969
970 start += PUD_SIZE;
971 cpa->pfn += PUD_SIZE;
972 cur_pages += PUD_SIZE >> PAGE_SHIFT;
973 pud++;
974 }
975
976 /* Map trailing leftover */
977 if (start < end) {
978 int tmp;
979
980 pud = pud_offset(pgd, start);
981 if (pud_none(*pud))
982 if (alloc_pmd_page(pud))
983 return -1;
984
985 tmp = populate_pmd(cpa, start, end, cpa->numpages - cur_pages,
986 pud, pgprot);
987 if (tmp < 0)
988 return cur_pages;
989
990 cur_pages += tmp;
991 }
992 return cur_pages;
993}
994
995/*
996 * Restrictions for kernel page table do not necessarily apply when mapping in
997 * an alternate PGD.
998 */
999static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
1000{
1001 pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
1002 bool allocd_pgd = false;
1003 pgd_t *pgd_entry;
1004 pud_t *pud = NULL; /* shut up gcc */
1005 int ret;
1006
1007 pgd_entry = cpa->pgd + pgd_index(addr);
1008
1009 /*
1010 * Allocate a PUD page and hand it down for mapping.
1011 */
1012 if (pgd_none(*pgd_entry)) {
1013 pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
1014 if (!pud)
1015 return -1;
1016
1017 set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE));
1018 allocd_pgd = true;
1019 }
1020
1021 pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
1022 pgprot_val(pgprot) |= pgprot_val(cpa->mask_set);
1023
1024 ret = populate_pud(cpa, addr, pgd_entry, pgprot);
1025 if (ret < 0) {
1026 unmap_pud_range(pgd_entry, addr,
1027 addr + (cpa->numpages << PAGE_SHIFT));
1028
1029 if (allocd_pgd) {
1030 /*
1031 * If I allocated this PUD page, I can just as well
1032 * free it in this error path.
1033 */
1034 pgd_clear(pgd_entry);
1035 free_page((unsigned long)pud);
1036 }
1037 return ret;
1038 }
1039 cpa->numpages = ret;
1040 return 0;
1041}
1042
653static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, 1043static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
654 int primary) 1044 int primary)
655{ 1045{
1046 if (cpa->pgd)
1047 return populate_pgd(cpa, vaddr);
1048
656 /* 1049 /*
657 * Ignore all non primary paths. 1050 * Ignore all non primary paths.
658 */ 1051 */
@@ -697,7 +1090,7 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
697 else 1090 else
698 address = *cpa->vaddr; 1091 address = *cpa->vaddr;
699repeat: 1092repeat:
700 kpte = lookup_address(address, &level); 1093 kpte = _lookup_address_cpa(cpa, address, &level);
701 if (!kpte) 1094 if (!kpte)
702 return __cpa_process_fault(cpa, address, primary); 1095 return __cpa_process_fault(cpa, address, primary);
703 1096
@@ -761,7 +1154,7 @@ repeat:
761 /* 1154 /*
762 * We have to split the large page: 1155 * We have to split the large page:
763 */ 1156 */
764 err = split_large_page(kpte, address); 1157 err = split_large_page(cpa, kpte, address);
765 if (!err) { 1158 if (!err) {
766 /* 1159 /*
767 * Do a global flush tlb after splitting the large page 1160 * Do a global flush tlb after splitting the large page
@@ -910,6 +1303,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
910 int ret, cache, checkalias; 1303 int ret, cache, checkalias;
911 unsigned long baddr = 0; 1304 unsigned long baddr = 0;
912 1305
1306 memset(&cpa, 0, sizeof(cpa));
1307
913 /* 1308 /*
914 * Check, if we are requested to change a not supported 1309 * Check, if we are requested to change a not supported
915 * feature: 1310 * feature:
@@ -1356,6 +1751,7 @@ static int __set_pages_p(struct page *page, int numpages)
1356{ 1751{
1357 unsigned long tempaddr = (unsigned long) page_address(page); 1752 unsigned long tempaddr = (unsigned long) page_address(page);
1358 struct cpa_data cpa = { .vaddr = &tempaddr, 1753 struct cpa_data cpa = { .vaddr = &tempaddr,
1754 .pgd = NULL,
1359 .numpages = numpages, 1755 .numpages = numpages,
1360 .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), 1756 .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
1361 .mask_clr = __pgprot(0), 1757 .mask_clr = __pgprot(0),
@@ -1374,6 +1770,7 @@ static int __set_pages_np(struct page *page, int numpages)
1374{ 1770{
1375 unsigned long tempaddr = (unsigned long) page_address(page); 1771 unsigned long tempaddr = (unsigned long) page_address(page);
1376 struct cpa_data cpa = { .vaddr = &tempaddr, 1772 struct cpa_data cpa = { .vaddr = &tempaddr,
1773 .pgd = NULL,
1377 .numpages = numpages, 1774 .numpages = numpages,
1378 .mask_set = __pgprot(0), 1775 .mask_set = __pgprot(0),
1379 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), 1776 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
@@ -1434,6 +1831,36 @@ bool kernel_page_present(struct page *page)
1434 1831
1435#endif /* CONFIG_DEBUG_PAGEALLOC */ 1832#endif /* CONFIG_DEBUG_PAGEALLOC */
1436 1833
1834int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
1835 unsigned numpages, unsigned long page_flags)
1836{
1837 int retval = -EINVAL;
1838
1839 struct cpa_data cpa = {
1840 .vaddr = &address,
1841 .pfn = pfn,
1842 .pgd = pgd,
1843 .numpages = numpages,
1844 .mask_set = __pgprot(0),
1845 .mask_clr = __pgprot(0),
1846 .flags = 0,
1847 };
1848
1849 if (!(__supported_pte_mask & _PAGE_NX))
1850 goto out;
1851
1852 if (!(page_flags & _PAGE_NX))
1853 cpa.mask_clr = __pgprot(_PAGE_NX);
1854
1855 cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags);
1856
1857 retval = __change_page_attr_set_clr(&cpa, 0);
1858 __flush_tlb_all();
1859
1860out:
1861 return retval;
1862}
1863
1437/* 1864/*
1438 * The testcases use internal knowledge of the implementation that shouldn't 1865 * The testcases use internal knowledge of the implementation that shouldn't
1439 * be exposed to the rest of the kernel. Include these directly here. 1866 * be exposed to the rest of the kernel. Include these directly here.
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index cceb813044ef..d62ec87a2b26 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -12,6 +12,8 @@
12 * Bibo Mao <bibo.mao@intel.com> 12 * Bibo Mao <bibo.mao@intel.com>
13 * Chandramouli Narayanan <mouli@linux.intel.com> 13 * Chandramouli Narayanan <mouli@linux.intel.com>
14 * Huang Ying <ying.huang@intel.com> 14 * Huang Ying <ying.huang@intel.com>
15 * Copyright (C) 2013 SuSE Labs
16 * Borislav Petkov <bp@suse.de> - runtime services VA mapping
15 * 17 *
16 * Copied from efi_32.c to eliminate the duplicated code between EFI 18 * Copied from efi_32.c to eliminate the duplicated code between EFI
17 * 32/64 support code. --ying 2007-10-26 19 * 32/64 support code. --ying 2007-10-26
@@ -51,7 +53,7 @@
51#include <asm/x86_init.h> 53#include <asm/x86_init.h>
52#include <asm/rtc.h> 54#include <asm/rtc.h>
53 55
54#define EFI_DEBUG 1 56#define EFI_DEBUG
55 57
56#define EFI_MIN_RESERVE 5120 58#define EFI_MIN_RESERVE 5120
57 59
@@ -74,6 +76,8 @@ static __initdata efi_config_table_type_t arch_tables[] = {
74 {NULL_GUID, NULL, NULL}, 76 {NULL_GUID, NULL, NULL},
75}; 77};
76 78
79u64 efi_setup; /* efi setup_data physical address */
80
77/* 81/*
78 * Returns 1 if 'facility' is enabled, 0 otherwise. 82 * Returns 1 if 'facility' is enabled, 0 otherwise.
79 */ 83 */
@@ -110,7 +114,6 @@ static int __init setup_storage_paranoia(char *arg)
110} 114}
111early_param("efi_no_storage_paranoia", setup_storage_paranoia); 115early_param("efi_no_storage_paranoia", setup_storage_paranoia);
112 116
113
114static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) 117static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
115{ 118{
116 unsigned long flags; 119 unsigned long flags;
@@ -398,9 +401,9 @@ int __init efi_memblock_x86_reserve_range(void)
398 return 0; 401 return 0;
399} 402}
400 403
401#if EFI_DEBUG
402static void __init print_efi_memmap(void) 404static void __init print_efi_memmap(void)
403{ 405{
406#ifdef EFI_DEBUG
404 efi_memory_desc_t *md; 407 efi_memory_desc_t *md;
405 void *p; 408 void *p;
406 int i; 409 int i;
@@ -415,8 +418,8 @@ static void __init print_efi_memmap(void)
415 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), 418 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
416 (md->num_pages >> (20 - EFI_PAGE_SHIFT))); 419 (md->num_pages >> (20 - EFI_PAGE_SHIFT)));
417 } 420 }
418}
419#endif /* EFI_DEBUG */ 421#endif /* EFI_DEBUG */
422}
420 423
421void __init efi_reserve_boot_services(void) 424void __init efi_reserve_boot_services(void)
422{ 425{
@@ -436,7 +439,7 @@ void __init efi_reserve_boot_services(void)
436 * - Not within any part of the kernel 439 * - Not within any part of the kernel
437 * - Not the bios reserved area 440 * - Not the bios reserved area
438 */ 441 */
439 if ((start+size >= __pa_symbol(_text) 442 if ((start + size > __pa_symbol(_text)
440 && start <= __pa_symbol(_end)) || 443 && start <= __pa_symbol(_end)) ||
441 !e820_all_mapped(start, start+size, E820_RAM) || 444 !e820_all_mapped(start, start+size, E820_RAM) ||
442 memblock_is_region_reserved(start, size)) { 445 memblock_is_region_reserved(start, size)) {
@@ -489,18 +492,27 @@ static int __init efi_systab_init(void *phys)
489{ 492{
490 if (efi_enabled(EFI_64BIT)) { 493 if (efi_enabled(EFI_64BIT)) {
491 efi_system_table_64_t *systab64; 494 efi_system_table_64_t *systab64;
495 struct efi_setup_data *data = NULL;
492 u64 tmp = 0; 496 u64 tmp = 0;
493 497
498 if (efi_setup) {
499 data = early_memremap(efi_setup, sizeof(*data));
500 if (!data)
501 return -ENOMEM;
502 }
494 systab64 = early_ioremap((unsigned long)phys, 503 systab64 = early_ioremap((unsigned long)phys,
495 sizeof(*systab64)); 504 sizeof(*systab64));
496 if (systab64 == NULL) { 505 if (systab64 == NULL) {
497 pr_err("Couldn't map the system table!\n"); 506 pr_err("Couldn't map the system table!\n");
507 if (data)
508 early_iounmap(data, sizeof(*data));
498 return -ENOMEM; 509 return -ENOMEM;
499 } 510 }
500 511
501 efi_systab.hdr = systab64->hdr; 512 efi_systab.hdr = systab64->hdr;
502 efi_systab.fw_vendor = systab64->fw_vendor; 513 efi_systab.fw_vendor = data ? (unsigned long)data->fw_vendor :
503 tmp |= systab64->fw_vendor; 514 systab64->fw_vendor;
515 tmp |= data ? data->fw_vendor : systab64->fw_vendor;
504 efi_systab.fw_revision = systab64->fw_revision; 516 efi_systab.fw_revision = systab64->fw_revision;
505 efi_systab.con_in_handle = systab64->con_in_handle; 517 efi_systab.con_in_handle = systab64->con_in_handle;
506 tmp |= systab64->con_in_handle; 518 tmp |= systab64->con_in_handle;
@@ -514,15 +526,20 @@ static int __init efi_systab_init(void *phys)
514 tmp |= systab64->stderr_handle; 526 tmp |= systab64->stderr_handle;
515 efi_systab.stderr = systab64->stderr; 527 efi_systab.stderr = systab64->stderr;
516 tmp |= systab64->stderr; 528 tmp |= systab64->stderr;
517 efi_systab.runtime = (void *)(unsigned long)systab64->runtime; 529 efi_systab.runtime = data ?
518 tmp |= systab64->runtime; 530 (void *)(unsigned long)data->runtime :
531 (void *)(unsigned long)systab64->runtime;
532 tmp |= data ? data->runtime : systab64->runtime;
519 efi_systab.boottime = (void *)(unsigned long)systab64->boottime; 533 efi_systab.boottime = (void *)(unsigned long)systab64->boottime;
520 tmp |= systab64->boottime; 534 tmp |= systab64->boottime;
521 efi_systab.nr_tables = systab64->nr_tables; 535 efi_systab.nr_tables = systab64->nr_tables;
522 efi_systab.tables = systab64->tables; 536 efi_systab.tables = data ? (unsigned long)data->tables :
523 tmp |= systab64->tables; 537 systab64->tables;
538 tmp |= data ? data->tables : systab64->tables;
524 539
525 early_iounmap(systab64, sizeof(*systab64)); 540 early_iounmap(systab64, sizeof(*systab64));
541 if (data)
542 early_iounmap(data, sizeof(*data));
526#ifdef CONFIG_X86_32 543#ifdef CONFIG_X86_32
527 if (tmp >> 32) { 544 if (tmp >> 32) {
528 pr_err("EFI data located above 4GB, disabling EFI.\n"); 545 pr_err("EFI data located above 4GB, disabling EFI.\n");
@@ -626,6 +643,62 @@ static int __init efi_memmap_init(void)
626 return 0; 643 return 0;
627} 644}
628 645
646/*
647 * A number of config table entries get remapped to virtual addresses
648 * after entering EFI virtual mode. However, the kexec kernel requires
649 * their physical addresses therefore we pass them via setup_data and
650 * correct those entries to their respective physical addresses here.
651 *
652 * Currently only handles smbios which is necessary for some firmware
653 * implementation.
654 */
655static int __init efi_reuse_config(u64 tables, int nr_tables)
656{
657 int i, sz, ret = 0;
658 void *p, *tablep;
659 struct efi_setup_data *data;
660
661 if (!efi_setup)
662 return 0;
663
664 if (!efi_enabled(EFI_64BIT))
665 return 0;
666
667 data = early_memremap(efi_setup, sizeof(*data));
668 if (!data) {
669 ret = -ENOMEM;
670 goto out;
671 }
672
673 if (!data->smbios)
674 goto out_memremap;
675
676 sz = sizeof(efi_config_table_64_t);
677
678 p = tablep = early_memremap(tables, nr_tables * sz);
679 if (!p) {
680 pr_err("Could not map Configuration table!\n");
681 ret = -ENOMEM;
682 goto out_memremap;
683 }
684
685 for (i = 0; i < efi.systab->nr_tables; i++) {
686 efi_guid_t guid;
687
688 guid = ((efi_config_table_64_t *)p)->guid;
689
690 if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID))
691 ((efi_config_table_64_t *)p)->table = data->smbios;
692 p += sz;
693 }
694 early_iounmap(tablep, nr_tables * sz);
695
696out_memremap:
697 early_iounmap(data, sizeof(*data));
698out:
699 return ret;
700}
701
629void __init efi_init(void) 702void __init efi_init(void)
630{ 703{
631 efi_char16_t *c16; 704 efi_char16_t *c16;
@@ -651,6 +724,10 @@ void __init efi_init(void)
651 724
652 set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); 725 set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility);
653 726
727 efi.config_table = (unsigned long)efi.systab->tables;
728 efi.fw_vendor = (unsigned long)efi.systab->fw_vendor;
729 efi.runtime = (unsigned long)efi.systab->runtime;
730
654 /* 731 /*
655 * Show what we know for posterity 732 * Show what we know for posterity
656 */ 733 */
@@ -667,6 +744,9 @@ void __init efi_init(void)
667 efi.systab->hdr.revision >> 16, 744 efi.systab->hdr.revision >> 16,
668 efi.systab->hdr.revision & 0xffff, vendor); 745 efi.systab->hdr.revision & 0xffff, vendor);
669 746
747 if (efi_reuse_config(efi.systab->tables, efi.systab->nr_tables))
748 return;
749
670 if (efi_config_init(arch_tables)) 750 if (efi_config_init(arch_tables))
671 return; 751 return;
672 752
@@ -684,15 +764,12 @@ void __init efi_init(void)
684 return; 764 return;
685 set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility); 765 set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility);
686 } 766 }
687
688 if (efi_memmap_init()) 767 if (efi_memmap_init())
689 return; 768 return;
690 769
691 set_bit(EFI_MEMMAP, &x86_efi_facility); 770 set_bit(EFI_MEMMAP, &x86_efi_facility);
692 771
693#if EFI_DEBUG
694 print_efi_memmap(); 772 print_efi_memmap();
695#endif
696} 773}
697 774
698void __init efi_late_init(void) 775void __init efi_late_init(void)
@@ -741,36 +818,38 @@ void efi_memory_uc(u64 addr, unsigned long size)
741 set_memory_uc(addr, npages); 818 set_memory_uc(addr, npages);
742} 819}
743 820
744/* 821void __init old_map_region(efi_memory_desc_t *md)
745 * This function will switch the EFI runtime services to virtual mode.
746 * Essentially, look through the EFI memmap and map every region that
747 * has the runtime attribute bit set in its memory descriptor and update
748 * that memory descriptor with the virtual address obtained from ioremap().
749 * This enables the runtime services to be called without having to
750 * thunk back into physical mode for every invocation.
751 */
752void __init efi_enter_virtual_mode(void)
753{ 822{
754 efi_memory_desc_t *md, *prev_md = NULL; 823 u64 start_pfn, end_pfn, end;
755 efi_status_t status;
756 unsigned long size; 824 unsigned long size;
757 u64 end, systab, start_pfn, end_pfn; 825 void *va;
758 void *p, *va, *new_memmap = NULL;
759 int count = 0;
760 826
761 efi.systab = NULL; 827 start_pfn = PFN_DOWN(md->phys_addr);
828 size = md->num_pages << PAGE_SHIFT;
829 end = md->phys_addr + size;
830 end_pfn = PFN_UP(end);
762 831
763 /* 832 if (pfn_range_is_mapped(start_pfn, end_pfn)) {
764 * We don't do virtual mode, since we don't do runtime services, on 833 va = __va(md->phys_addr);
765 * non-native EFI
766 */
767 834
768 if (!efi_is_native()) { 835 if (!(md->attribute & EFI_MEMORY_WB))
769 efi_unmap_memmap(); 836 efi_memory_uc((u64)(unsigned long)va, size);
770 return; 837 } else
771 } 838 va = efi_ioremap(md->phys_addr, size,
839 md->type, md->attribute);
840
841 md->virt_addr = (u64) (unsigned long) va;
842 if (!va)
843 pr_err("ioremap of 0x%llX failed!\n",
844 (unsigned long long)md->phys_addr);
845}
846
847/* Merge contiguous regions of the same type and attribute */
848static void __init efi_merge_regions(void)
849{
850 void *p;
851 efi_memory_desc_t *md, *prev_md = NULL;
772 852
773 /* Merge contiguous regions of the same type and attribute */
774 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 853 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
775 u64 prev_size; 854 u64 prev_size;
776 md = p; 855 md = p;
@@ -796,6 +875,77 @@ void __init efi_enter_virtual_mode(void)
796 } 875 }
797 prev_md = md; 876 prev_md = md;
798 } 877 }
878}
879
880static void __init get_systab_virt_addr(efi_memory_desc_t *md)
881{
882 unsigned long size;
883 u64 end, systab;
884
885 size = md->num_pages << EFI_PAGE_SHIFT;
886 end = md->phys_addr + size;
887 systab = (u64)(unsigned long)efi_phys.systab;
888 if (md->phys_addr <= systab && systab < end) {
889 systab += md->virt_addr - md->phys_addr;
890 efi.systab = (efi_system_table_t *)(unsigned long)systab;
891 }
892}
893
894static int __init save_runtime_map(void)
895{
896 efi_memory_desc_t *md;
897 void *tmp, *p, *q = NULL;
898 int count = 0;
899
900 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
901 md = p;
902
903 if (!(md->attribute & EFI_MEMORY_RUNTIME) ||
904 (md->type == EFI_BOOT_SERVICES_CODE) ||
905 (md->type == EFI_BOOT_SERVICES_DATA))
906 continue;
907 tmp = krealloc(q, (count + 1) * memmap.desc_size, GFP_KERNEL);
908 if (!tmp)
909 goto out;
910 q = tmp;
911
912 memcpy(q + count * memmap.desc_size, md, memmap.desc_size);
913 count++;
914 }
915
916 efi_runtime_map_setup(q, count, memmap.desc_size);
917
918 return 0;
919out:
920 kfree(q);
921 return -ENOMEM;
922}
923
924/*
925 * Map efi regions which were passed via setup_data. The virt_addr is a fixed
926 * addr which was used in first kernel of a kexec boot.
927 */
928static void __init efi_map_regions_fixed(void)
929{
930 void *p;
931 efi_memory_desc_t *md;
932
933 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
934 md = p;
935 efi_map_region_fixed(md); /* FIXME: add error handling */
936 get_systab_virt_addr(md);
937 }
938
939}
940
941/*
942 * Map efi memory ranges for runtime serivce and update new_memmap with virtual
943 * addresses.
944 */
945static void * __init efi_map_regions(int *count)
946{
947 efi_memory_desc_t *md;
948 void *p, *tmp, *new_memmap = NULL;
799 949
800 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 950 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
801 md = p; 951 md = p;
@@ -807,53 +957,95 @@ void __init efi_enter_virtual_mode(void)
807 continue; 957 continue;
808 } 958 }
809 959
810 size = md->num_pages << EFI_PAGE_SHIFT; 960 efi_map_region(md);
811 end = md->phys_addr + size; 961 get_systab_virt_addr(md);
812 962
813 start_pfn = PFN_DOWN(md->phys_addr); 963 tmp = krealloc(new_memmap, (*count + 1) * memmap.desc_size,
814 end_pfn = PFN_UP(end); 964 GFP_KERNEL);
815 if (pfn_range_is_mapped(start_pfn, end_pfn)) { 965 if (!tmp)
816 va = __va(md->phys_addr); 966 goto out;
967 new_memmap = tmp;
968 memcpy(new_memmap + (*count * memmap.desc_size), md,
969 memmap.desc_size);
970 (*count)++;
971 }
817 972
818 if (!(md->attribute & EFI_MEMORY_WB)) 973 return new_memmap;
819 efi_memory_uc((u64)(unsigned long)va, size); 974out:
820 } else 975 kfree(new_memmap);
821 va = efi_ioremap(md->phys_addr, size, 976 return NULL;
822 md->type, md->attribute); 977}
978
979/*
980 * This function will switch the EFI runtime services to virtual mode.
981 * Essentially, we look through the EFI memmap and map every region that
982 * has the runtime attribute bit set in its memory descriptor into the
983 * ->trampoline_pgd page table using a top-down VA allocation scheme.
984 *
985 * The old method which used to update that memory descriptor with the
986 * virtual address obtained from ioremap() is still supported when the
987 * kernel is booted with efi=old_map on its command line. Same old
988 * method enabled the runtime services to be called without having to
989 * thunk back into physical mode for every invocation.
990 *
991 * The new method does a pagetable switch in a preemption-safe manner
992 * so that we're in a different address space when calling a runtime
993 * function. For function arguments passing we do copy the PGDs of the
994 * kernel page table into ->trampoline_pgd prior to each call.
995 *
996 * Specially for kexec boot, efi runtime maps in previous kernel should
997 * be passed in via setup_data. In that case runtime ranges will be mapped
998 * to the same virtual addresses as the first kernel.
999 */
1000void __init efi_enter_virtual_mode(void)
1001{
1002 efi_status_t status;
1003 void *new_memmap = NULL;
1004 int err, count = 0;
823 1005
824 md->virt_addr = (u64) (unsigned long) va; 1006 efi.systab = NULL;
825 1007
826 if (!va) { 1008 /*
827 pr_err("ioremap of 0x%llX failed!\n", 1009 * We don't do virtual mode, since we don't do runtime services, on
828 (unsigned long long)md->phys_addr); 1010 * non-native EFI
829 continue; 1011 */
830 } 1012 if (!efi_is_native()) {
1013 efi_unmap_memmap();
1014 return;
1015 }
831 1016
832 systab = (u64) (unsigned long) efi_phys.systab; 1017 if (efi_setup) {
833 if (md->phys_addr <= systab && systab < end) { 1018 efi_map_regions_fixed();
834 systab += md->virt_addr - md->phys_addr; 1019 } else {
835 efi.systab = (efi_system_table_t *) (unsigned long) systab; 1020 efi_merge_regions();
1021 new_memmap = efi_map_regions(&count);
1022 if (!new_memmap) {
1023 pr_err("Error reallocating memory, EFI runtime non-functional!\n");
1024 return;
836 } 1025 }
837 new_memmap = krealloc(new_memmap,
838 (count + 1) * memmap.desc_size,
839 GFP_KERNEL);
840 memcpy(new_memmap + (count * memmap.desc_size), md,
841 memmap.desc_size);
842 count++;
843 } 1026 }
844 1027
1028 err = save_runtime_map();
1029 if (err)
1030 pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n");
1031
845 BUG_ON(!efi.systab); 1032 BUG_ON(!efi.systab);
846 1033
847 status = phys_efi_set_virtual_address_map( 1034 efi_setup_page_tables();
848 memmap.desc_size * count, 1035 efi_sync_low_kernel_mappings();
849 memmap.desc_size,
850 memmap.desc_version,
851 (efi_memory_desc_t *)__pa(new_memmap));
852 1036
853 if (status != EFI_SUCCESS) { 1037 if (!efi_setup) {
854 pr_alert("Unable to switch EFI into virtual mode " 1038 status = phys_efi_set_virtual_address_map(
855 "(status=%lx)!\n", status); 1039 memmap.desc_size * count,
856 panic("EFI call to SetVirtualAddressMap() failed!"); 1040 memmap.desc_size,
1041 memmap.desc_version,
1042 (efi_memory_desc_t *)__pa(new_memmap));
1043
1044 if (status != EFI_SUCCESS) {
1045 pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n",
1046 status);
1047 panic("EFI call to SetVirtualAddressMap() failed!");
1048 }
857 } 1049 }
858 1050
859 /* 1051 /*
@@ -876,7 +1068,8 @@ void __init efi_enter_virtual_mode(void)
876 efi.query_variable_info = virt_efi_query_variable_info; 1068 efi.query_variable_info = virt_efi_query_variable_info;
877 efi.update_capsule = virt_efi_update_capsule; 1069 efi.update_capsule = virt_efi_update_capsule;
878 efi.query_capsule_caps = virt_efi_query_capsule_caps; 1070 efi.query_capsule_caps = virt_efi_query_capsule_caps;
879 if (__supported_pte_mask & _PAGE_NX) 1071
1072 if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX))
880 runtime_code_page_mkexec(); 1073 runtime_code_page_mkexec();
881 1074
882 kfree(new_memmap); 1075 kfree(new_memmap);
@@ -1006,3 +1199,15 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)
1006 return EFI_SUCCESS; 1199 return EFI_SUCCESS;
1007} 1200}
1008EXPORT_SYMBOL_GPL(efi_query_variable_store); 1201EXPORT_SYMBOL_GPL(efi_query_variable_store);
1202
1203static int __init parse_efi_cmdline(char *str)
1204{
1205 if (*str == '=')
1206 str++;
1207
1208 if (!strncmp(str, "old_map", 7))
1209 set_bit(EFI_OLD_MEMMAP, &x86_efi_facility);
1210
1211 return 0;
1212}
1213early_param("efi", parse_efi_cmdline);
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 40e446941dd7..249b183cf417 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -37,9 +37,19 @@
37 * claim EFI runtime service handler exclusively and to duplicate a memory in 37 * claim EFI runtime service handler exclusively and to duplicate a memory in
38 * low memory space say 0 - 3G. 38 * low memory space say 0 - 3G.
39 */ 39 */
40
41static unsigned long efi_rt_eflags; 40static unsigned long efi_rt_eflags;
42 41
42void efi_sync_low_kernel_mappings(void) {}
43void efi_setup_page_tables(void) {}
44
45void __init efi_map_region(efi_memory_desc_t *md)
46{
47 old_map_region(md);
48}
49
50void __init efi_map_region_fixed(efi_memory_desc_t *md) {}
51void __init parse_efi_setup(u64 phys_addr, u32 data_len) {}
52
43void efi_call_phys_prelog(void) 53void efi_call_phys_prelog(void)
44{ 54{
45 struct desc_ptr gdt_descr; 55 struct desc_ptr gdt_descr;
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 39a0e7f1f0a3..6284f158a47d 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -38,10 +38,28 @@
38#include <asm/efi.h> 38#include <asm/efi.h>
39#include <asm/cacheflush.h> 39#include <asm/cacheflush.h>
40#include <asm/fixmap.h> 40#include <asm/fixmap.h>
41#include <asm/realmode.h>
41 42
42static pgd_t *save_pgd __initdata; 43static pgd_t *save_pgd __initdata;
43static unsigned long efi_flags __initdata; 44static unsigned long efi_flags __initdata;
44 45
46/*
47 * We allocate runtime services regions bottom-up, starting from -4G, i.e.
48 * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G.
49 */
50static u64 efi_va = -4 * (1UL << 30);
51#define EFI_VA_END (-68 * (1UL << 30))
52
53/*
54 * Scratch space used for switching the pagetable in the EFI stub
55 */
56struct efi_scratch {
57 u64 r15;
58 u64 prev_cr3;
59 pgd_t *efi_pgt;
60 bool use_pgd;
61};
62
45static void __init early_code_mapping_set_exec(int executable) 63static void __init early_code_mapping_set_exec(int executable)
46{ 64{
47 efi_memory_desc_t *md; 65 efi_memory_desc_t *md;
@@ -65,6 +83,9 @@ void __init efi_call_phys_prelog(void)
65 int pgd; 83 int pgd;
66 int n_pgds; 84 int n_pgds;
67 85
86 if (!efi_enabled(EFI_OLD_MEMMAP))
87 return;
88
68 early_code_mapping_set_exec(1); 89 early_code_mapping_set_exec(1);
69 local_irq_save(efi_flags); 90 local_irq_save(efi_flags);
70 91
@@ -86,6 +107,10 @@ void __init efi_call_phys_epilog(void)
86 */ 107 */
87 int pgd; 108 int pgd;
88 int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); 109 int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
110
111 if (!efi_enabled(EFI_OLD_MEMMAP))
112 return;
113
89 for (pgd = 0; pgd < n_pgds; pgd++) 114 for (pgd = 0; pgd < n_pgds; pgd++)
90 set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]); 115 set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]);
91 kfree(save_pgd); 116 kfree(save_pgd);
@@ -94,6 +119,96 @@ void __init efi_call_phys_epilog(void)
94 early_code_mapping_set_exec(0); 119 early_code_mapping_set_exec(0);
95} 120}
96 121
122/*
123 * Add low kernel mappings for passing arguments to EFI functions.
124 */
125void efi_sync_low_kernel_mappings(void)
126{
127 unsigned num_pgds;
128 pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
129
130 if (efi_enabled(EFI_OLD_MEMMAP))
131 return;
132
133 num_pgds = pgd_index(MODULES_END - 1) - pgd_index(PAGE_OFFSET);
134
135 memcpy(pgd + pgd_index(PAGE_OFFSET),
136 init_mm.pgd + pgd_index(PAGE_OFFSET),
137 sizeof(pgd_t) * num_pgds);
138}
139
140void efi_setup_page_tables(void)
141{
142 efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd;
143
144 if (!efi_enabled(EFI_OLD_MEMMAP))
145 efi_scratch.use_pgd = true;
146}
147
148static void __init __map_region(efi_memory_desc_t *md, u64 va)
149{
150 pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
151 unsigned long pf = 0;
152
153 if (!(md->attribute & EFI_MEMORY_WB))
154 pf |= _PAGE_PCD;
155
156 if (kernel_map_pages_in_pgd(pgd, md->phys_addr, va, md->num_pages, pf))
157 pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n",
158 md->phys_addr, va);
159}
160
161void __init efi_map_region(efi_memory_desc_t *md)
162{
163 unsigned long size = md->num_pages << PAGE_SHIFT;
164 u64 pa = md->phys_addr;
165
166 if (efi_enabled(EFI_OLD_MEMMAP))
167 return old_map_region(md);
168
169 /*
170 * Make sure the 1:1 mappings are present as a catch-all for b0rked
171 * firmware which doesn't update all internal pointers after switching
172 * to virtual mode and would otherwise crap on us.
173 */
174 __map_region(md, md->phys_addr);
175
176 efi_va -= size;
177
178 /* Is PA 2M-aligned? */
179 if (!(pa & (PMD_SIZE - 1))) {
180 efi_va &= PMD_MASK;
181 } else {
182 u64 pa_offset = pa & (PMD_SIZE - 1);
183 u64 prev_va = efi_va;
184
185 /* get us the same offset within this 2M page */
186 efi_va = (efi_va & PMD_MASK) + pa_offset;
187
188 if (efi_va > prev_va)
189 efi_va -= PMD_SIZE;
190 }
191
192 if (efi_va < EFI_VA_END) {
193 pr_warn(FW_WARN "VA address range overflow!\n");
194 return;
195 }
196
197 /* Do the VA map */
198 __map_region(md, efi_va);
199 md->virt_addr = efi_va;
200}
201
202/*
203 * kexec kernel will use efi_map_region_fixed to map efi runtime memory ranges.
204 * md->virt_addr is the original virtual address which had been mapped in kexec
205 * 1st kernel.
206 */
207void __init efi_map_region_fixed(efi_memory_desc_t *md)
208{
209 __map_region(md, md->virt_addr);
210}
211
97void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, 212void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
98 u32 type, u64 attribute) 213 u32 type, u64 attribute)
99{ 214{
@@ -113,3 +228,8 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
113 228
114 return (void __iomem *)__va(phys_addr); 229 return (void __iomem *)__va(phys_addr);
115} 230}
231
232void __init parse_efi_setup(u64 phys_addr, u32 data_len)
233{
234 efi_setup = phys_addr + sizeof(struct setup_data);
235}
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
index 4c07ccab8146..88073b140298 100644
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -34,10 +34,47 @@
34 mov %rsi, %cr0; \ 34 mov %rsi, %cr0; \
35 mov (%rsp), %rsp 35 mov (%rsp), %rsp
36 36
37 /* stolen from gcc */
38 .macro FLUSH_TLB_ALL
39 movq %r15, efi_scratch(%rip)
40 movq %r14, efi_scratch+8(%rip)
41 movq %cr4, %r15
42 movq %r15, %r14
43 andb $0x7f, %r14b
44 movq %r14, %cr4
45 movq %r15, %cr4
46 movq efi_scratch+8(%rip), %r14
47 movq efi_scratch(%rip), %r15
48 .endm
49
50 .macro SWITCH_PGT
51 cmpb $0, efi_scratch+24(%rip)
52 je 1f
53 movq %r15, efi_scratch(%rip) # r15
54 # save previous CR3
55 movq %cr3, %r15
56 movq %r15, efi_scratch+8(%rip) # prev_cr3
57 movq efi_scratch+16(%rip), %r15 # EFI pgt
58 movq %r15, %cr3
59 1:
60 .endm
61
62 .macro RESTORE_PGT
63 cmpb $0, efi_scratch+24(%rip)
64 je 2f
65 movq efi_scratch+8(%rip), %r15
66 movq %r15, %cr3
67 movq efi_scratch(%rip), %r15
68 FLUSH_TLB_ALL
69 2:
70 .endm
71
37ENTRY(efi_call0) 72ENTRY(efi_call0)
38 SAVE_XMM 73 SAVE_XMM
39 subq $32, %rsp 74 subq $32, %rsp
75 SWITCH_PGT
40 call *%rdi 76 call *%rdi
77 RESTORE_PGT
41 addq $32, %rsp 78 addq $32, %rsp
42 RESTORE_XMM 79 RESTORE_XMM
43 ret 80 ret
@@ -47,7 +84,9 @@ ENTRY(efi_call1)
47 SAVE_XMM 84 SAVE_XMM
48 subq $32, %rsp 85 subq $32, %rsp
49 mov %rsi, %rcx 86 mov %rsi, %rcx
87 SWITCH_PGT
50 call *%rdi 88 call *%rdi
89 RESTORE_PGT
51 addq $32, %rsp 90 addq $32, %rsp
52 RESTORE_XMM 91 RESTORE_XMM
53 ret 92 ret
@@ -57,7 +96,9 @@ ENTRY(efi_call2)
57 SAVE_XMM 96 SAVE_XMM
58 subq $32, %rsp 97 subq $32, %rsp
59 mov %rsi, %rcx 98 mov %rsi, %rcx
99 SWITCH_PGT
60 call *%rdi 100 call *%rdi
101 RESTORE_PGT
61 addq $32, %rsp 102 addq $32, %rsp
62 RESTORE_XMM 103 RESTORE_XMM
63 ret 104 ret
@@ -68,7 +109,9 @@ ENTRY(efi_call3)
68 subq $32, %rsp 109 subq $32, %rsp
69 mov %rcx, %r8 110 mov %rcx, %r8
70 mov %rsi, %rcx 111 mov %rsi, %rcx
112 SWITCH_PGT
71 call *%rdi 113 call *%rdi
114 RESTORE_PGT
72 addq $32, %rsp 115 addq $32, %rsp
73 RESTORE_XMM 116 RESTORE_XMM
74 ret 117 ret
@@ -80,7 +123,9 @@ ENTRY(efi_call4)
80 mov %r8, %r9 123 mov %r8, %r9
81 mov %rcx, %r8 124 mov %rcx, %r8
82 mov %rsi, %rcx 125 mov %rsi, %rcx
126 SWITCH_PGT
83 call *%rdi 127 call *%rdi
128 RESTORE_PGT
84 addq $32, %rsp 129 addq $32, %rsp
85 RESTORE_XMM 130 RESTORE_XMM
86 ret 131 ret
@@ -93,7 +138,9 @@ ENTRY(efi_call5)
93 mov %r8, %r9 138 mov %r8, %r9
94 mov %rcx, %r8 139 mov %rcx, %r8
95 mov %rsi, %rcx 140 mov %rsi, %rcx
141 SWITCH_PGT
96 call *%rdi 142 call *%rdi
143 RESTORE_PGT
97 addq $48, %rsp 144 addq $48, %rsp
98 RESTORE_XMM 145 RESTORE_XMM
99 ret 146 ret
@@ -109,8 +156,15 @@ ENTRY(efi_call6)
109 mov %r8, %r9 156 mov %r8, %r9
110 mov %rcx, %r8 157 mov %rcx, %r8
111 mov %rsi, %rcx 158 mov %rsi, %rcx
159 SWITCH_PGT
112 call *%rdi 160 call *%rdi
161 RESTORE_PGT
113 addq $48, %rsp 162 addq $48, %rsp
114 RESTORE_XMM 163 RESTORE_XMM
115 ret 164 ret
116ENDPROC(efi_call6) 165ENDPROC(efi_call6)
166
167 .data
168ENTRY(efi_scratch)
169 .fill 3,8,0
170 .byte 0
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index 6aecbc86ec94..1e75f48b61f8 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -36,6 +36,17 @@ config EFI_VARS_PSTORE_DEFAULT_DISABLE
36 backend for pstore by default. This setting can be overridden 36 backend for pstore by default. This setting can be overridden
37 using the efivars module's pstore_disable parameter. 37 using the efivars module's pstore_disable parameter.
38 38
39config EFI_RUNTIME_MAP
40 bool "Export efi runtime maps to sysfs"
41 depends on X86 && EFI && KEXEC
42 default y
43 help
44 Export efi runtime memory maps to /sys/firmware/efi/runtime-map.
45 That memory map is used for example by kexec to set up efi virtual
46 mapping the 2nd kernel, but can also be used for debugging purposes.
47
48 See also Documentation/ABI/testing/sysfs-firmware-efi-runtime-map.
49
39endmenu 50endmenu
40 51
41config UEFI_CPER 52config UEFI_CPER
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index 6c2a41ec21ba..9553496b0f43 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -5,3 +5,4 @@ obj-$(CONFIG_EFI) += efi.o vars.o
5obj-$(CONFIG_EFI_VARS) += efivars.o 5obj-$(CONFIG_EFI_VARS) += efivars.o
6obj-$(CONFIG_EFI_VARS_PSTORE) += efi-pstore.o 6obj-$(CONFIG_EFI_VARS_PSTORE) += efi-pstore.o
7obj-$(CONFIG_UEFI_CPER) += cper.o 7obj-$(CONFIG_UEFI_CPER) += cper.o
8obj-$(CONFIG_EFI_RUNTIME_MAP) += runtime-map.o
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 2e2fbdec0845..4753bac65279 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -32,6 +32,9 @@ struct efi __read_mostly efi = {
32 .hcdp = EFI_INVALID_TABLE_ADDR, 32 .hcdp = EFI_INVALID_TABLE_ADDR,
33 .uga = EFI_INVALID_TABLE_ADDR, 33 .uga = EFI_INVALID_TABLE_ADDR,
34 .uv_systab = EFI_INVALID_TABLE_ADDR, 34 .uv_systab = EFI_INVALID_TABLE_ADDR,
35 .fw_vendor = EFI_INVALID_TABLE_ADDR,
36 .runtime = EFI_INVALID_TABLE_ADDR,
37 .config_table = EFI_INVALID_TABLE_ADDR,
35}; 38};
36EXPORT_SYMBOL(efi); 39EXPORT_SYMBOL(efi);
37 40
@@ -71,13 +74,49 @@ static ssize_t systab_show(struct kobject *kobj,
71static struct kobj_attribute efi_attr_systab = 74static struct kobj_attribute efi_attr_systab =
72 __ATTR(systab, 0400, systab_show, NULL); 75 __ATTR(systab, 0400, systab_show, NULL);
73 76
77#define EFI_FIELD(var) efi.var
78
79#define EFI_ATTR_SHOW(name) \
80static ssize_t name##_show(struct kobject *kobj, \
81 struct kobj_attribute *attr, char *buf) \
82{ \
83 return sprintf(buf, "0x%lx\n", EFI_FIELD(name)); \
84}
85
86EFI_ATTR_SHOW(fw_vendor);
87EFI_ATTR_SHOW(runtime);
88EFI_ATTR_SHOW(config_table);
89
90static struct kobj_attribute efi_attr_fw_vendor = __ATTR_RO(fw_vendor);
91static struct kobj_attribute efi_attr_runtime = __ATTR_RO(runtime);
92static struct kobj_attribute efi_attr_config_table = __ATTR_RO(config_table);
93
74static struct attribute *efi_subsys_attrs[] = { 94static struct attribute *efi_subsys_attrs[] = {
75 &efi_attr_systab.attr, 95 &efi_attr_systab.attr,
76 NULL, /* maybe more in the future? */ 96 &efi_attr_fw_vendor.attr,
97 &efi_attr_runtime.attr,
98 &efi_attr_config_table.attr,
99 NULL,
77}; 100};
78 101
102static umode_t efi_attr_is_visible(struct kobject *kobj,
103 struct attribute *attr, int n)
104{
105 umode_t mode = attr->mode;
106
107 if (attr == &efi_attr_fw_vendor.attr)
108 return (efi.fw_vendor == EFI_INVALID_TABLE_ADDR) ? 0 : mode;
109 else if (attr == &efi_attr_runtime.attr)
110 return (efi.runtime == EFI_INVALID_TABLE_ADDR) ? 0 : mode;
111 else if (attr == &efi_attr_config_table.attr)
112 return (efi.config_table == EFI_INVALID_TABLE_ADDR) ? 0 : mode;
113
114 return mode;
115}
116
79static struct attribute_group efi_subsys_attr_group = { 117static struct attribute_group efi_subsys_attr_group = {
80 .attrs = efi_subsys_attrs, 118 .attrs = efi_subsys_attrs,
119 .is_visible = efi_attr_is_visible,
81}; 120};
82 121
83static struct efivars generic_efivars; 122static struct efivars generic_efivars;
@@ -128,6 +167,10 @@ static int __init efisubsys_init(void)
128 goto err_unregister; 167 goto err_unregister;
129 } 168 }
130 169
170 error = efi_runtime_map_init(efi_kobj);
171 if (error)
172 goto err_remove_group;
173
131 /* and the standard mountpoint for efivarfs */ 174 /* and the standard mountpoint for efivarfs */
132 efivars_kobj = kobject_create_and_add("efivars", efi_kobj); 175 efivars_kobj = kobject_create_and_add("efivars", efi_kobj);
133 if (!efivars_kobj) { 176 if (!efivars_kobj) {
diff --git a/drivers/firmware/efi/runtime-map.c b/drivers/firmware/efi/runtime-map.c
new file mode 100644
index 000000000000..97cdd16a2169
--- /dev/null
+++ b/drivers/firmware/efi/runtime-map.c
@@ -0,0 +1,181 @@
1/*
2 * linux/drivers/efi/runtime-map.c
3 * Copyright (C) 2013 Red Hat, Inc., Dave Young <dyoung@redhat.com>
4 *
5 * This file is released under the GPLv2.
6 */
7
8#include <linux/string.h>
9#include <linux/kernel.h>
10#include <linux/module.h>
11#include <linux/types.h>
12#include <linux/efi.h>
13#include <linux/slab.h>
14
15#include <asm/setup.h>
16
17static void *efi_runtime_map;
18static int nr_efi_runtime_map;
19static u32 efi_memdesc_size;
20
21struct efi_runtime_map_entry {
22 efi_memory_desc_t md;
23 struct kobject kobj; /* kobject for each entry */
24};
25
26static struct efi_runtime_map_entry **map_entries;
27
28struct map_attribute {
29 struct attribute attr;
30 ssize_t (*show)(struct efi_runtime_map_entry *entry, char *buf);
31};
32
33static inline struct map_attribute *to_map_attr(struct attribute *attr)
34{
35 return container_of(attr, struct map_attribute, attr);
36}
37
38static ssize_t type_show(struct efi_runtime_map_entry *entry, char *buf)
39{
40 return snprintf(buf, PAGE_SIZE, "0x%x\n", entry->md.type);
41}
42
43#define EFI_RUNTIME_FIELD(var) entry->md.var
44
45#define EFI_RUNTIME_U64_ATTR_SHOW(name) \
46static ssize_t name##_show(struct efi_runtime_map_entry *entry, char *buf) \
47{ \
48 return snprintf(buf, PAGE_SIZE, "0x%llx\n", EFI_RUNTIME_FIELD(name)); \
49}
50
51EFI_RUNTIME_U64_ATTR_SHOW(phys_addr);
52EFI_RUNTIME_U64_ATTR_SHOW(virt_addr);
53EFI_RUNTIME_U64_ATTR_SHOW(num_pages);
54EFI_RUNTIME_U64_ATTR_SHOW(attribute);
55
56static inline struct efi_runtime_map_entry *to_map_entry(struct kobject *kobj)
57{
58 return container_of(kobj, struct efi_runtime_map_entry, kobj);
59}
60
61static ssize_t map_attr_show(struct kobject *kobj, struct attribute *attr,
62 char *buf)
63{
64 struct efi_runtime_map_entry *entry = to_map_entry(kobj);
65 struct map_attribute *map_attr = to_map_attr(attr);
66
67 return map_attr->show(entry, buf);
68}
69
70static struct map_attribute map_type_attr = __ATTR_RO(type);
71static struct map_attribute map_phys_addr_attr = __ATTR_RO(phys_addr);
72static struct map_attribute map_virt_addr_attr = __ATTR_RO(virt_addr);
73static struct map_attribute map_num_pages_attr = __ATTR_RO(num_pages);
74static struct map_attribute map_attribute_attr = __ATTR_RO(attribute);
75
76/*
77 * These are default attributes that are added for every memmap entry.
78 */
79static struct attribute *def_attrs[] = {
80 &map_type_attr.attr,
81 &map_phys_addr_attr.attr,
82 &map_virt_addr_attr.attr,
83 &map_num_pages_attr.attr,
84 &map_attribute_attr.attr,
85 NULL
86};
87
88static const struct sysfs_ops map_attr_ops = {
89 .show = map_attr_show,
90};
91
92static void map_release(struct kobject *kobj)
93{
94 struct efi_runtime_map_entry *entry;
95
96 entry = to_map_entry(kobj);
97 kfree(entry);
98}
99
100static struct kobj_type __refdata map_ktype = {
101 .sysfs_ops = &map_attr_ops,
102 .default_attrs = def_attrs,
103 .release = map_release,
104};
105
106static struct kset *map_kset;
107
108static struct efi_runtime_map_entry *
109add_sysfs_runtime_map_entry(struct kobject *kobj, int nr)
110{
111 int ret;
112 struct efi_runtime_map_entry *entry;
113
114 if (!map_kset) {
115 map_kset = kset_create_and_add("runtime-map", NULL, kobj);
116 if (!map_kset)
117 return ERR_PTR(-ENOMEM);
118 }
119
120 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
121 if (!entry) {
122 kset_unregister(map_kset);
123 return entry;
124 }
125
126 memcpy(&entry->md, efi_runtime_map + nr * efi_memdesc_size,
127 sizeof(efi_memory_desc_t));
128
129 kobject_init(&entry->kobj, &map_ktype);
130 entry->kobj.kset = map_kset;
131 ret = kobject_add(&entry->kobj, NULL, "%d", nr);
132 if (ret) {
133 kobject_put(&entry->kobj);
134 kset_unregister(map_kset);
135 return ERR_PTR(ret);
136 }
137
138 return entry;
139}
140
141void efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size)
142{
143 efi_runtime_map = map;
144 nr_efi_runtime_map = nr_entries;
145 efi_memdesc_size = desc_size;
146}
147
148int __init efi_runtime_map_init(struct kobject *efi_kobj)
149{
150 int i, j, ret = 0;
151 struct efi_runtime_map_entry *entry;
152
153 if (!efi_runtime_map)
154 return 0;
155
156 map_entries = kzalloc(nr_efi_runtime_map * sizeof(entry), GFP_KERNEL);
157 if (!map_entries) {
158 ret = -ENOMEM;
159 goto out;
160 }
161
162 for (i = 0; i < nr_efi_runtime_map; i++) {
163 entry = add_sysfs_runtime_map_entry(efi_kobj, i);
164 if (IS_ERR(entry)) {
165 ret = PTR_ERR(entry);
166 goto out_add_entry;
167 }
168 *(map_entries + i) = entry;
169 }
170
171 return 0;
172out_add_entry:
173 for (j = i - 1; j > 0; j--) {
174 entry = *(map_entries + j);
175 kobject_put(&entry->kobj);
176 }
177 if (map_kset)
178 kset_unregister(map_kset);
179out:
180 return ret;
181}
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 11ce6784a196..0a819e7a60c9 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -556,6 +556,9 @@ extern struct efi {
556 unsigned long hcdp; /* HCDP table */ 556 unsigned long hcdp; /* HCDP table */
557 unsigned long uga; /* UGA table */ 557 unsigned long uga; /* UGA table */
558 unsigned long uv_systab; /* UV system table */ 558 unsigned long uv_systab; /* UV system table */
559 unsigned long fw_vendor; /* fw_vendor */
560 unsigned long runtime; /* runtime table */
561 unsigned long config_table; /* config tables */
559 efi_get_time_t *get_time; 562 efi_get_time_t *get_time;
560 efi_set_time_t *set_time; 563 efi_set_time_t *set_time;
561 efi_get_wakeup_time_t *get_wakeup_time; 564 efi_get_wakeup_time_t *get_wakeup_time;
@@ -653,6 +656,7 @@ extern int __init efi_setup_pcdp_console(char *);
653#define EFI_RUNTIME_SERVICES 3 /* Can we use runtime services? */ 656#define EFI_RUNTIME_SERVICES 3 /* Can we use runtime services? */
654#define EFI_MEMMAP 4 /* Can we use EFI memory map? */ 657#define EFI_MEMMAP 4 /* Can we use EFI memory map? */
655#define EFI_64BIT 5 /* Is the firmware 64-bit? */ 658#define EFI_64BIT 5 /* Is the firmware 64-bit? */
659#define EFI_ARCH_1 6 /* First arch-specific bit */
656 660
657#ifdef CONFIG_EFI 661#ifdef CONFIG_EFI
658# ifdef CONFIG_X86 662# ifdef CONFIG_X86
@@ -872,4 +876,17 @@ int efivars_sysfs_init(void);
872 876
873#endif /* CONFIG_EFI_VARS */ 877#endif /* CONFIG_EFI_VARS */
874 878
879#ifdef CONFIG_EFI_RUNTIME_MAP
880int efi_runtime_map_init(struct kobject *);
881void efi_runtime_map_setup(void *, int, u32);
882#else
883static inline int efi_runtime_map_init(struct kobject *kobj)
884{
885 return 0;
886}
887
888static inline void
889efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size) {}
890#endif
891
875#endif /* _LINUX_EFI_H */ 892#endif /* _LINUX_EFI_H */