aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Fleming <matt.fleming@intel.com>2011-11-18 08:09:11 -0500
committerIngo Molnar <mingo@elte.hu>2011-12-09 02:32:26 -0500
commite8c7106280a305e1ff2a3a8a4dfce141469fb039 (patch)
tree576cc6990eb3903aeb8e333019b6442441d0fdcb
parent2ded6e6a94c98ea453a156748cb7fabaf39a76b9 (diff)
x86, efi: Calling __pa() with an ioremap()ed address is invalid
If we encounter an efi_memory_desc_t without EFI_MEMORY_WB set in ->attribute we currently call set_memory_uc(), which in turn calls __pa() on a potentially ioremap'd address. On CONFIG_X86_32 this is invalid, resulting in the following oops on some machines: BUG: unable to handle kernel paging request at f7f22280 IP: [<c10257b9>] reserve_ram_pages_type+0x89/0x210 [...] Call Trace: [<c104f8ca>] ? page_is_ram+0x1a/0x40 [<c1025aff>] reserve_memtype+0xdf/0x2f0 [<c1024dc9>] set_memory_uc+0x49/0xa0 [<c19334d0>] efi_enter_virtual_mode+0x1c2/0x3aa [<c19216d4>] start_kernel+0x291/0x2f2 [<c19211c7>] ? loglevel+0x1b/0x1b [<c19210bf>] i386_start_kernel+0xbf/0xc8 A better approach to this problem is to map the memory region with the correct attributes from the start, instead of modifying it after the fact. The uncached case can be handled by ioremap_nocache() and the cached by ioremap_cache(). Despite first impressions, it's not possible to use ioremap_cache() to map all cached memory regions on CONFIG_X86_64 because EFI_RUNTIME_SERVICES_DATA regions really don't like being mapped into the vmalloc space, as detailed in the following bug report, https://bugzilla.redhat.com/show_bug.cgi?id=748516 Therefore, we need to ensure that any EFI_RUNTIME_SERVICES_DATA regions are covered by the direct kernel mapping table on CONFIG_X86_64. To accomplish this we now map E820_RESERVED_EFI regions via the direct kernel mapping with the initial call to init_memory_mapping() in setup_arch(), whereas previously these regions wouldn't be mapped if they were after the last E820_RAM region until efi_ioremap() was called. Doing it this way allows us to delete efi_ioremap() completely. Signed-off-by: Matt Fleming <matt.fleming@intel.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Matthew Garrett <mjg@redhat.com> Cc: Zhang Rui <rui.zhang@intel.com> Cc: Huang Ying <huang.ying.caritas@gmail.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Link: http://lkml.kernel.org/r/1321621751-3650-1-git-send-email-matt@console-pimps.org Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/include/asm/e820.h8
-rw-r--r--arch/x86/include/asm/efi.h5
-rw-r--r--arch/x86/kernel/e820.c3
-rw-r--r--arch/x86/kernel/setup.c21
-rw-r--r--arch/x86/platform/efi/efi.c29
-rw-r--r--arch/x86/platform/efi/efi_64.c17
6 files changed, 48 insertions, 35 deletions
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 908b96957d8..c9547033e38 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -53,6 +53,13 @@
53 */ 53 */
54#define E820_RESERVED_KERN 128 54#define E820_RESERVED_KERN 128
55 55
56/*
57 * Address ranges that need to be mapped by the kernel direct
58 * mapping. This is used to make sure regions such as
59 * EFI_RUNTIME_SERVICES_DATA are directly mapped. See setup_arch().
60 */
61#define E820_RESERVED_EFI 129
62
56#ifndef __ASSEMBLY__ 63#ifndef __ASSEMBLY__
57#include <linux/types.h> 64#include <linux/types.h>
58struct e820entry { 65struct e820entry {
@@ -115,6 +122,7 @@ static inline void early_memtest(unsigned long start, unsigned long end)
115} 122}
116#endif 123#endif
117 124
125extern unsigned long e820_end_pfn(unsigned long limit_pfn, unsigned type);
118extern unsigned long e820_end_of_ram_pfn(void); 126extern unsigned long e820_end_of_ram_pfn(void);
119extern unsigned long e820_end_of_low_ram_pfn(void); 127extern unsigned long e820_end_of_low_ram_pfn(void);
120extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); 128extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 7093e4a6a0b..b8d8bfcd44a 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -33,8 +33,6 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
33#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ 33#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
34 efi_call_virt(f, a1, a2, a3, a4, a5, a6) 34 efi_call_virt(f, a1, a2, a3, a4, a5, a6)
35 35
36#define efi_ioremap(addr, size, type) ioremap_cache(addr, size)
37
38#else /* !CONFIG_X86_32 */ 36#else /* !CONFIG_X86_32 */
39 37
40extern u64 efi_call0(void *fp); 38extern u64 efi_call0(void *fp);
@@ -84,9 +82,6 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
84 efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ 82 efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
85 (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) 83 (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
86 84
87extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
88 u32 type);
89
90#endif /* CONFIG_X86_32 */ 85#endif /* CONFIG_X86_32 */
91 86
92extern int add_efi_memmap; 87extern int add_efi_memmap;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 303a0e48f07..65ffd110a81 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -135,6 +135,7 @@ static void __init e820_print_type(u32 type)
135 printk(KERN_CONT "(usable)"); 135 printk(KERN_CONT "(usable)");
136 break; 136 break;
137 case E820_RESERVED: 137 case E820_RESERVED:
138 case E820_RESERVED_EFI:
138 printk(KERN_CONT "(reserved)"); 139 printk(KERN_CONT "(reserved)");
139 break; 140 break;
140 case E820_ACPI: 141 case E820_ACPI:
@@ -783,7 +784,7 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
783/* 784/*
784 * Find the highest page frame number we have available 785 * Find the highest page frame number we have available
785 */ 786 */
786static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type) 787unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
787{ 788{
788 int i; 789 int i;
789 unsigned long last_pfn = 0; 790 unsigned long last_pfn = 0;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index cf0ef986cb6..9a9e40fb091 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -691,6 +691,8 @@ early_param("reservelow", parse_reservelow);
691 691
692void __init setup_arch(char **cmdline_p) 692void __init setup_arch(char **cmdline_p)
693{ 693{
694 unsigned long end_pfn;
695
694#ifdef CONFIG_X86_32 696#ifdef CONFIG_X86_32
695 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); 697 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
696 visws_early_detect(); 698 visws_early_detect();
@@ -932,7 +934,24 @@ void __init setup_arch(char **cmdline_p)
932 init_gbpages(); 934 init_gbpages();
933 935
934 /* max_pfn_mapped is updated here */ 936 /* max_pfn_mapped is updated here */
935 max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); 937 end_pfn = max_low_pfn;
938
939#ifdef CONFIG_X86_64
940 /*
941 * There may be regions after the last E820_RAM region that we
942 * want to include in the kernel direct mapping, such as
943 * EFI_RUNTIME_SERVICES_DATA.
944 */
945 if (efi_enabled) {
946 unsigned long efi_end;
947
948 efi_end = e820_end_pfn(MAXMEM>>PAGE_SHIFT, E820_RESERVED_EFI);
949 if (efi_end > max_low_pfn)
950 end_pfn = efi_end;
951 }
952#endif
953
954 max_low_pfn_mapped = init_memory_mapping(0, end_pfn << PAGE_SHIFT);
936 max_pfn_mapped = max_low_pfn_mapped; 955 max_pfn_mapped = max_low_pfn_mapped;
937 956
938#ifdef CONFIG_X86_64 957#ifdef CONFIG_X86_64
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 37718f0f053..c9718a16be1 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -323,10 +323,13 @@ static void __init do_add_efi_memmap(void)
323 case EFI_UNUSABLE_MEMORY: 323 case EFI_UNUSABLE_MEMORY:
324 e820_type = E820_UNUSABLE; 324 e820_type = E820_UNUSABLE;
325 break; 325 break;
326 case EFI_RUNTIME_SERVICES_DATA:
327 e820_type = E820_RESERVED_EFI;
328 break;
326 default: 329 default:
327 /* 330 /*
328 * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE 331 * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE
329 * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO 332 * EFI_MEMORY_MAPPED_IO
330 * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE 333 * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE
331 */ 334 */
332 e820_type = E820_RESERVED; 335 e820_type = E820_RESERVED;
@@ -671,10 +674,21 @@ void __init efi_enter_virtual_mode(void)
671 end_pfn = PFN_UP(end); 674 end_pfn = PFN_UP(end);
672 if (end_pfn <= max_low_pfn_mapped 675 if (end_pfn <= max_low_pfn_mapped
673 || (end_pfn > (1UL << (32 - PAGE_SHIFT)) 676 || (end_pfn > (1UL << (32 - PAGE_SHIFT))
674 && end_pfn <= max_pfn_mapped)) 677 && end_pfn <= max_pfn_mapped)) {
675 va = __va(md->phys_addr); 678 va = __va(md->phys_addr);
676 else 679
677 va = efi_ioremap(md->phys_addr, size, md->type); 680 if (!(md->attribute & EFI_MEMORY_WB)) {
681 addr = (u64) (unsigned long)va;
682 npages = md->num_pages;
683 memrange_efi_to_native(&addr, &npages);
684 set_memory_uc(addr, npages);
685 }
686 } else {
687 if (!(md->attribute & EFI_MEMORY_WB))
688 va = ioremap_nocache(md->phys_addr, size);
689 else
690 va = ioremap_cache(md->phys_addr, size);
691 }
678 692
679 md->virt_addr = (u64) (unsigned long) va; 693 md->virt_addr = (u64) (unsigned long) va;
680 694
@@ -684,13 +698,6 @@ void __init efi_enter_virtual_mode(void)
684 continue; 698 continue;
685 } 699 }
686 700
687 if (!(md->attribute & EFI_MEMORY_WB)) {
688 addr = md->virt_addr;
689 npages = md->num_pages;
690 memrange_efi_to_native(&addr, &npages);
691 set_memory_uc(addr, npages);
692 }
693
694 systab = (u64) (unsigned long) efi_phys.systab; 701 systab = (u64) (unsigned long) efi_phys.systab;
695 if (md->phys_addr <= systab && systab < end) { 702 if (md->phys_addr <= systab && systab < end) {
696 systab += md->virt_addr - md->phys_addr; 703 systab += md->virt_addr - md->phys_addr;
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index ac3aa54e265..312250c6b2d 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -80,20 +80,3 @@ void __init efi_call_phys_epilog(void)
80 local_irq_restore(efi_flags); 80 local_irq_restore(efi_flags);
81 early_code_mapping_set_exec(0); 81 early_code_mapping_set_exec(0);
82} 82}
83
84void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
85 u32 type)
86{
87 unsigned long last_map_pfn;
88
89 if (type == EFI_MEMORY_MAPPED_IO)
90 return ioremap(phys_addr, size);
91
92 last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
93 if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) {
94 unsigned long top = last_map_pfn << PAGE_SHIFT;
95 efi_ioremap(top, size - (top - phys_addr), type);
96 }
97
98 return (void __iomem *)__va(phys_addr);
99}