diff options
author | Matt Fleming <matt.fleming@intel.com> | 2011-11-18 08:09:11 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-12-09 02:32:26 -0500 |
commit | e8c7106280a305e1ff2a3a8a4dfce141469fb039 (patch) | |
tree | 576cc6990eb3903aeb8e333019b6442441d0fdcb /arch | |
parent | 2ded6e6a94c98ea453a156748cb7fabaf39a76b9 (diff) |
x86, efi: Calling __pa() with an ioremap()ed address is invalid
If we encounter an efi_memory_desc_t without EFI_MEMORY_WB set
in ->attribute we currently call set_memory_uc(), which in turn
calls __pa() on a potentially ioremap'd address.
On CONFIG_X86_32 this is invalid, resulting in the following
oops on some machines:
BUG: unable to handle kernel paging request at f7f22280
IP: [<c10257b9>] reserve_ram_pages_type+0x89/0x210
[...]
Call Trace:
[<c104f8ca>] ? page_is_ram+0x1a/0x40
[<c1025aff>] reserve_memtype+0xdf/0x2f0
[<c1024dc9>] set_memory_uc+0x49/0xa0
[<c19334d0>] efi_enter_virtual_mode+0x1c2/0x3aa
[<c19216d4>] start_kernel+0x291/0x2f2
[<c19211c7>] ? loglevel+0x1b/0x1b
[<c19210bf>] i386_start_kernel+0xbf/0xc8
A better approach to this problem is to map the memory region
with the correct attributes from the start, instead of modifying
it after the fact. The uncached case can be handled by
ioremap_nocache() and the cached by ioremap_cache().
Despite first impressions, it's not possible to use
ioremap_cache() to map all cached memory regions on
CONFIG_X86_64 because EFI_RUNTIME_SERVICES_DATA regions really
don't like being mapped into the vmalloc space, as detailed in
the following bug report,
https://bugzilla.redhat.com/show_bug.cgi?id=748516
Therefore, we need to ensure that any EFI_RUNTIME_SERVICES_DATA
regions are covered by the direct kernel mapping table on
CONFIG_X86_64. To accomplish this we now map E820_RESERVED_EFI
regions via the direct kernel mapping with the initial call to
init_memory_mapping() in setup_arch(), whereas previously these
regions wouldn't be mapped if they were after the last E820_RAM
region until efi_ioremap() was called. Doing it this way allows
us to delete efi_ioremap() completely.
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Matthew Garrett <mjg@redhat.com>
Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Huang Ying <huang.ying.caritas@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/1321621751-3650-1-git-send-email-matt@console-pimps.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/include/asm/e820.h | 8 | ||||
-rw-r--r-- | arch/x86/include/asm/efi.h | 5 | ||||
-rw-r--r-- | arch/x86/kernel/e820.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 21 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi.c | 29 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_64.c | 17 |
6 files changed, 48 insertions, 35 deletions
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 908b96957d88..c9547033e38e 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h | |||
@@ -53,6 +53,13 @@ | |||
53 | */ | 53 | */ |
54 | #define E820_RESERVED_KERN 128 | 54 | #define E820_RESERVED_KERN 128 |
55 | 55 | ||
56 | /* | ||
57 | * Address ranges that need to be mapped by the kernel direct | ||
58 | * mapping. This is used to make sure regions such as | ||
59 | * EFI_RUNTIME_SERVICES_DATA are directly mapped. See setup_arch(). | ||
60 | */ | ||
61 | #define E820_RESERVED_EFI 129 | ||
62 | |||
56 | #ifndef __ASSEMBLY__ | 63 | #ifndef __ASSEMBLY__ |
57 | #include <linux/types.h> | 64 | #include <linux/types.h> |
58 | struct e820entry { | 65 | struct e820entry { |
@@ -115,6 +122,7 @@ static inline void early_memtest(unsigned long start, unsigned long end) | |||
115 | } | 122 | } |
116 | #endif | 123 | #endif |
117 | 124 | ||
125 | extern unsigned long e820_end_pfn(unsigned long limit_pfn, unsigned type); | ||
118 | extern unsigned long e820_end_of_ram_pfn(void); | 126 | extern unsigned long e820_end_of_ram_pfn(void); |
119 | extern unsigned long e820_end_of_low_ram_pfn(void); | 127 | extern unsigned long e820_end_of_low_ram_pfn(void); |
120 | extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); | 128 | extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); |
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 7093e4a6a0bc..b8d8bfcd44a9 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h | |||
@@ -33,8 +33,6 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...); | |||
33 | #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ | 33 | #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ |
34 | efi_call_virt(f, a1, a2, a3, a4, a5, a6) | 34 | efi_call_virt(f, a1, a2, a3, a4, a5, a6) |
35 | 35 | ||
36 | #define efi_ioremap(addr, size, type) ioremap_cache(addr, size) | ||
37 | |||
38 | #else /* !CONFIG_X86_32 */ | 36 | #else /* !CONFIG_X86_32 */ |
39 | 37 | ||
40 | extern u64 efi_call0(void *fp); | 38 | extern u64 efi_call0(void *fp); |
@@ -84,9 +82,6 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3, | |||
84 | efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ | 82 | efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ |
85 | (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) | 83 | (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) |
86 | 84 | ||
87 | extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, | ||
88 | u32 type); | ||
89 | |||
90 | #endif /* CONFIG_X86_32 */ | 85 | #endif /* CONFIG_X86_32 */ |
91 | 86 | ||
92 | extern int add_efi_memmap; | 87 | extern int add_efi_memmap; |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 303a0e48f076..65ffd110a81b 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -135,6 +135,7 @@ static void __init e820_print_type(u32 type) | |||
135 | printk(KERN_CONT "(usable)"); | 135 | printk(KERN_CONT "(usable)"); |
136 | break; | 136 | break; |
137 | case E820_RESERVED: | 137 | case E820_RESERVED: |
138 | case E820_RESERVED_EFI: | ||
138 | printk(KERN_CONT "(reserved)"); | 139 | printk(KERN_CONT "(reserved)"); |
139 | break; | 140 | break; |
140 | case E820_ACPI: | 141 | case E820_ACPI: |
@@ -783,7 +784,7 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) | |||
783 | /* | 784 | /* |
784 | * Find the highest page frame number we have available | 785 | * Find the highest page frame number we have available |
785 | */ | 786 | */ |
786 | static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type) | 787 | unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type) |
787 | { | 788 | { |
788 | int i; | 789 | int i; |
789 | unsigned long last_pfn = 0; | 790 | unsigned long last_pfn = 0; |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index cf0ef986cb6d..9a9e40fb091c 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -691,6 +691,8 @@ early_param("reservelow", parse_reservelow); | |||
691 | 691 | ||
692 | void __init setup_arch(char **cmdline_p) | 692 | void __init setup_arch(char **cmdline_p) |
693 | { | 693 | { |
694 | unsigned long end_pfn; | ||
695 | |||
694 | #ifdef CONFIG_X86_32 | 696 | #ifdef CONFIG_X86_32 |
695 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | 697 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); |
696 | visws_early_detect(); | 698 | visws_early_detect(); |
@@ -932,7 +934,24 @@ void __init setup_arch(char **cmdline_p) | |||
932 | init_gbpages(); | 934 | init_gbpages(); |
933 | 935 | ||
934 | /* max_pfn_mapped is updated here */ | 936 | /* max_pfn_mapped is updated here */ |
935 | max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); | 937 | end_pfn = max_low_pfn; |
938 | |||
939 | #ifdef CONFIG_X86_64 | ||
940 | /* | ||
941 | * There may be regions after the last E820_RAM region that we | ||
942 | * want to include in the kernel direct mapping, such as | ||
943 | * EFI_RUNTIME_SERVICES_DATA. | ||
944 | */ | ||
945 | if (efi_enabled) { | ||
946 | unsigned long efi_end; | ||
947 | |||
948 | efi_end = e820_end_pfn(MAXMEM>>PAGE_SHIFT, E820_RESERVED_EFI); | ||
949 | if (efi_end > max_low_pfn) | ||
950 | end_pfn = efi_end; | ||
951 | } | ||
952 | #endif | ||
953 | |||
954 | max_low_pfn_mapped = init_memory_mapping(0, end_pfn << PAGE_SHIFT); | ||
936 | max_pfn_mapped = max_low_pfn_mapped; | 955 | max_pfn_mapped = max_low_pfn_mapped; |
937 | 956 | ||
938 | #ifdef CONFIG_X86_64 | 957 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 37718f0f053d..c9718a16be15 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c | |||
@@ -323,10 +323,13 @@ static void __init do_add_efi_memmap(void) | |||
323 | case EFI_UNUSABLE_MEMORY: | 323 | case EFI_UNUSABLE_MEMORY: |
324 | e820_type = E820_UNUSABLE; | 324 | e820_type = E820_UNUSABLE; |
325 | break; | 325 | break; |
326 | case EFI_RUNTIME_SERVICES_DATA: | ||
327 | e820_type = E820_RESERVED_EFI; | ||
328 | break; | ||
326 | default: | 329 | default: |
327 | /* | 330 | /* |
328 | * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE | 331 | * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE |
329 | * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO | 332 | * EFI_MEMORY_MAPPED_IO |
330 | * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE | 333 | * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE |
331 | */ | 334 | */ |
332 | e820_type = E820_RESERVED; | 335 | e820_type = E820_RESERVED; |
@@ -671,10 +674,21 @@ void __init efi_enter_virtual_mode(void) | |||
671 | end_pfn = PFN_UP(end); | 674 | end_pfn = PFN_UP(end); |
672 | if (end_pfn <= max_low_pfn_mapped | 675 | if (end_pfn <= max_low_pfn_mapped |
673 | || (end_pfn > (1UL << (32 - PAGE_SHIFT)) | 676 | || (end_pfn > (1UL << (32 - PAGE_SHIFT)) |
674 | && end_pfn <= max_pfn_mapped)) | 677 | && end_pfn <= max_pfn_mapped)) { |
675 | va = __va(md->phys_addr); | 678 | va = __va(md->phys_addr); |
676 | else | 679 | |
677 | va = efi_ioremap(md->phys_addr, size, md->type); | 680 | if (!(md->attribute & EFI_MEMORY_WB)) { |
681 | addr = (u64) (unsigned long)va; | ||
682 | npages = md->num_pages; | ||
683 | memrange_efi_to_native(&addr, &npages); | ||
684 | set_memory_uc(addr, npages); | ||
685 | } | ||
686 | } else { | ||
687 | if (!(md->attribute & EFI_MEMORY_WB)) | ||
688 | va = ioremap_nocache(md->phys_addr, size); | ||
689 | else | ||
690 | va = ioremap_cache(md->phys_addr, size); | ||
691 | } | ||
678 | 692 | ||
679 | md->virt_addr = (u64) (unsigned long) va; | 693 | md->virt_addr = (u64) (unsigned long) va; |
680 | 694 | ||
@@ -684,13 +698,6 @@ void __init efi_enter_virtual_mode(void) | |||
684 | continue; | 698 | continue; |
685 | } | 699 | } |
686 | 700 | ||
687 | if (!(md->attribute & EFI_MEMORY_WB)) { | ||
688 | addr = md->virt_addr; | ||
689 | npages = md->num_pages; | ||
690 | memrange_efi_to_native(&addr, &npages); | ||
691 | set_memory_uc(addr, npages); | ||
692 | } | ||
693 | |||
694 | systab = (u64) (unsigned long) efi_phys.systab; | 701 | systab = (u64) (unsigned long) efi_phys.systab; |
695 | if (md->phys_addr <= systab && systab < end) { | 702 | if (md->phys_addr <= systab && systab < end) { |
696 | systab += md->virt_addr - md->phys_addr; | 703 | systab += md->virt_addr - md->phys_addr; |
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index ac3aa54e2654..312250c6b2de 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c | |||
@@ -80,20 +80,3 @@ void __init efi_call_phys_epilog(void) | |||
80 | local_irq_restore(efi_flags); | 80 | local_irq_restore(efi_flags); |
81 | early_code_mapping_set_exec(0); | 81 | early_code_mapping_set_exec(0); |
82 | } | 82 | } |
83 | |||
84 | void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, | ||
85 | u32 type) | ||
86 | { | ||
87 | unsigned long last_map_pfn; | ||
88 | |||
89 | if (type == EFI_MEMORY_MAPPED_IO) | ||
90 | return ioremap(phys_addr, size); | ||
91 | |||
92 | last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); | ||
93 | if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) { | ||
94 | unsigned long top = last_map_pfn << PAGE_SHIFT; | ||
95 | efi_ioremap(top, size - (top - phys_addr), type); | ||
96 | } | ||
97 | |||
98 | return (void __iomem *)__va(phys_addr); | ||
99 | } | ||