diff options
author | Ingo Molnar <mingo@kernel.org> | 2013-11-26 06:23:04 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-11-26 06:23:04 -0500 |
commit | 61d066977583803d333f1e7266b8ba772162dda4 (patch) | |
tree | 087d56e401422f1a8a165a782216aa6d0291a60e | |
parent | b975dc3689fc6a3718ad288ce080924f9cb7e176 (diff) | |
parent | ee41143027706d9f342dfe05487a00b20887fde7 (diff) |
Merge tag 'efi-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi into x86/efi
Pull EFI virtual mapping changes from Matt Fleming:
* New static EFI runtime services virtual mapping layout which is
groundwork for kexec support on EFI. (Borislav Petkov)
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | Documentation/kernel-parameters.txt | 6 | ||||
-rw-r--r-- | Documentation/x86/x86_64/mm.txt | 7 | ||||
-rw-r--r-- | arch/x86/include/asm/efi.h | 64 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_types.h | 3 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 461 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi.c | 111 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_32.c | 9 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_64.c | 109 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_stub_64.S | 54 | ||||
-rw-r--r-- | include/linux/efi.h | 1 |
10 files changed, 755 insertions, 70 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 50680a59a2ff..e06e99303dd3 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -890,6 +890,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
890 | edd= [EDD] | 890 | edd= [EDD] |
891 | Format: {"off" | "on" | "skip[mbr]"} | 891 | Format: {"off" | "on" | "skip[mbr]"} |
892 | 892 | ||
893 | efi= [EFI] | ||
894 | Format: { "old_map" } | ||
895 | old_map [X86-64]: switch to the old ioremap-based EFI | ||
896 | runtime services mapping. 32-bit still uses this one by | ||
897 | default. | ||
898 | |||
893 | efi_no_storage_paranoia [EFI; X86] | 899 | efi_no_storage_paranoia [EFI; X86] |
894 | Using this parameter you can use more than 50% of | 900 | Using this parameter you can use more than 50% of |
895 | your efi variable storage. Use this parameter only if | 901 | your efi variable storage. Use this parameter only if |
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 881582f75c9c..c584a51add15 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt | |||
@@ -28,4 +28,11 @@ reference. | |||
28 | Current X86-64 implementations only support 40 bits of address space, | 28 | Current X86-64 implementations only support 40 bits of address space, |
29 | but we support up to 46 bits. This expands into MBZ space in the page tables. | 29 | but we support up to 46 bits. This expands into MBZ space in the page tables. |
30 | 30 | ||
31 | ->trampoline_pgd: | ||
32 | |||
33 | We map EFI runtime services in the aforementioned PGD in the virtual | ||
34 | range of 64Gb (arbitrarily set, can be raised if needed) | ||
35 | |||
36 | 0xffffffef00000000 - 0xffffffff00000000 | ||
37 | |||
31 | -Andi Kleen, Jul 2004 | 38 | -Andi Kleen, Jul 2004 |
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 65c6e6e3a552..89a05b0507b9 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h | |||
@@ -1,6 +1,24 @@ | |||
1 | #ifndef _ASM_X86_EFI_H | 1 | #ifndef _ASM_X86_EFI_H |
2 | #define _ASM_X86_EFI_H | 2 | #define _ASM_X86_EFI_H |
3 | 3 | ||
4 | /* | ||
5 | * We map the EFI regions needed for runtime services non-contiguously, | ||
6 | * with preserved alignment on virtual addresses starting from -4G down | ||
7 | * for a total max space of 64G. This way, we provide for stable runtime | ||
8 | * services addresses across kernels so that a kexec'd kernel can still | ||
9 | * use them. | ||
10 | * | ||
11 | * This is the main reason why we're doing stable VA mappings for RT | ||
12 | * services. | ||
13 | * | ||
14 | * This flag is used in conjuction with a chicken bit called | ||
15 | * "efi=old_map" which can be used as a fallback to the old runtime | ||
16 | * services mapping method in case there's some b0rkage with a | ||
17 | * particular EFI implementation (haha, it is hard to hold up the | ||
18 | * sarcasm here...). | ||
19 | */ | ||
20 | #define EFI_OLD_MEMMAP EFI_ARCH_1 | ||
21 | |||
4 | #ifdef CONFIG_X86_32 | 22 | #ifdef CONFIG_X86_32 |
5 | 23 | ||
6 | #define EFI_LOADER_SIGNATURE "EL32" | 24 | #define EFI_LOADER_SIGNATURE "EL32" |
@@ -69,24 +87,31 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3, | |||
69 | efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3), \ | 87 | efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3), \ |
70 | (u64)(a4), (u64)(a5), (u64)(a6)) | 88 | (u64)(a4), (u64)(a5), (u64)(a6)) |
71 | 89 | ||
90 | #define _efi_call_virtX(x, f, ...) \ | ||
91 | ({ \ | ||
92 | efi_status_t __s; \ | ||
93 | \ | ||
94 | efi_sync_low_kernel_mappings(); \ | ||
95 | preempt_disable(); \ | ||
96 | __s = efi_call##x((void *)efi.systab->runtime->f, __VA_ARGS__); \ | ||
97 | preempt_enable(); \ | ||
98 | __s; \ | ||
99 | }) | ||
100 | |||
72 | #define efi_call_virt0(f) \ | 101 | #define efi_call_virt0(f) \ |
73 | efi_call0((efi.systab->runtime->f)) | 102 | _efi_call_virtX(0, f) |
74 | #define efi_call_virt1(f, a1) \ | 103 | #define efi_call_virt1(f, a1) \ |
75 | efi_call1((efi.systab->runtime->f), (u64)(a1)) | 104 | _efi_call_virtX(1, f, (u64)(a1)) |
76 | #define efi_call_virt2(f, a1, a2) \ | 105 | #define efi_call_virt2(f, a1, a2) \ |
77 | efi_call2((efi.systab->runtime->f), (u64)(a1), (u64)(a2)) | 106 | _efi_call_virtX(2, f, (u64)(a1), (u64)(a2)) |
78 | #define efi_call_virt3(f, a1, a2, a3) \ | 107 | #define efi_call_virt3(f, a1, a2, a3) \ |
79 | efi_call3((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ | 108 | _efi_call_virtX(3, f, (u64)(a1), (u64)(a2), (u64)(a3)) |
80 | (u64)(a3)) | 109 | #define efi_call_virt4(f, a1, a2, a3, a4) \ |
81 | #define efi_call_virt4(f, a1, a2, a3, a4) \ | 110 | _efi_call_virtX(4, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4)) |
82 | efi_call4((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ | 111 | #define efi_call_virt5(f, a1, a2, a3, a4, a5) \ |
83 | (u64)(a3), (u64)(a4)) | 112 | _efi_call_virtX(5, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5)) |
84 | #define efi_call_virt5(f, a1, a2, a3, a4, a5) \ | 113 | #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ |
85 | efi_call5((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ | 114 | _efi_call_virtX(6, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) |
86 | (u64)(a3), (u64)(a4), (u64)(a5)) | ||
87 | #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ | ||
88 | efi_call6((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ | ||
89 | (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) | ||
90 | 115 | ||
91 | extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, | 116 | extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, |
92 | u32 type, u64 attribute); | 117 | u32 type, u64 attribute); |
@@ -95,12 +120,17 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, | |||
95 | 120 | ||
96 | extern int add_efi_memmap; | 121 | extern int add_efi_memmap; |
97 | extern unsigned long x86_efi_facility; | 122 | extern unsigned long x86_efi_facility; |
123 | extern struct efi_scratch efi_scratch; | ||
98 | extern void efi_set_executable(efi_memory_desc_t *md, bool executable); | 124 | extern void efi_set_executable(efi_memory_desc_t *md, bool executable); |
99 | extern int efi_memblock_x86_reserve_range(void); | 125 | extern int efi_memblock_x86_reserve_range(void); |
100 | extern void efi_call_phys_prelog(void); | 126 | extern void efi_call_phys_prelog(void); |
101 | extern void efi_call_phys_epilog(void); | 127 | extern void efi_call_phys_epilog(void); |
102 | extern void efi_unmap_memmap(void); | 128 | extern void efi_unmap_memmap(void); |
103 | extern void efi_memory_uc(u64 addr, unsigned long size); | 129 | extern void efi_memory_uc(u64 addr, unsigned long size); |
130 | extern void __init efi_map_region(efi_memory_desc_t *md); | ||
131 | extern void efi_sync_low_kernel_mappings(void); | ||
132 | extern void efi_setup_page_tables(void); | ||
133 | extern void __init old_map_region(efi_memory_desc_t *md); | ||
104 | 134 | ||
105 | #ifdef CONFIG_EFI | 135 | #ifdef CONFIG_EFI |
106 | 136 | ||
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 0ecac257fb26..a83aa44bb1fb 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -382,7 +382,8 @@ static inline void update_page_count(int level, unsigned long pages) { } | |||
382 | */ | 382 | */ |
383 | extern pte_t *lookup_address(unsigned long address, unsigned int *level); | 383 | extern pte_t *lookup_address(unsigned long address, unsigned int *level); |
384 | extern phys_addr_t slow_virt_to_phys(void *__address); | 384 | extern phys_addr_t slow_virt_to_phys(void *__address); |
385 | 385 | extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, | |
386 | unsigned numpages, unsigned long page_flags); | ||
386 | #endif /* !__ASSEMBLY__ */ | 387 | #endif /* !__ASSEMBLY__ */ |
387 | 388 | ||
388 | #endif /* _ASM_X86_PGTABLE_DEFS_H */ | 389 | #endif /* _ASM_X86_PGTABLE_DEFS_H */ |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index bb32480c2d71..b3b19f46c016 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -30,6 +30,7 @@ | |||
30 | */ | 30 | */ |
31 | struct cpa_data { | 31 | struct cpa_data { |
32 | unsigned long *vaddr; | 32 | unsigned long *vaddr; |
33 | pgd_t *pgd; | ||
33 | pgprot_t mask_set; | 34 | pgprot_t mask_set; |
34 | pgprot_t mask_clr; | 35 | pgprot_t mask_clr; |
35 | int numpages; | 36 | int numpages; |
@@ -322,17 +323,9 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
322 | return prot; | 323 | return prot; |
323 | } | 324 | } |
324 | 325 | ||
325 | /* | 326 | static pte_t *__lookup_address_in_pgd(pgd_t *pgd, unsigned long address, |
326 | * Lookup the page table entry for a virtual address. Return a pointer | 327 | unsigned int *level) |
327 | * to the entry and the level of the mapping. | ||
328 | * | ||
329 | * Note: We return pud and pmd either when the entry is marked large | ||
330 | * or when the present bit is not set. Otherwise we would return a | ||
331 | * pointer to a nonexisting mapping. | ||
332 | */ | ||
333 | pte_t *lookup_address(unsigned long address, unsigned int *level) | ||
334 | { | 328 | { |
335 | pgd_t *pgd = pgd_offset_k(address); | ||
336 | pud_t *pud; | 329 | pud_t *pud; |
337 | pmd_t *pmd; | 330 | pmd_t *pmd; |
338 | 331 | ||
@@ -361,8 +354,31 @@ pte_t *lookup_address(unsigned long address, unsigned int *level) | |||
361 | 354 | ||
362 | return pte_offset_kernel(pmd, address); | 355 | return pte_offset_kernel(pmd, address); |
363 | } | 356 | } |
357 | |||
358 | /* | ||
359 | * Lookup the page table entry for a virtual address. Return a pointer | ||
360 | * to the entry and the level of the mapping. | ||
361 | * | ||
362 | * Note: We return pud and pmd either when the entry is marked large | ||
363 | * or when the present bit is not set. Otherwise we would return a | ||
364 | * pointer to a nonexisting mapping. | ||
365 | */ | ||
366 | pte_t *lookup_address(unsigned long address, unsigned int *level) | ||
367 | { | ||
368 | return __lookup_address_in_pgd(pgd_offset_k(address), address, level); | ||
369 | } | ||
364 | EXPORT_SYMBOL_GPL(lookup_address); | 370 | EXPORT_SYMBOL_GPL(lookup_address); |
365 | 371 | ||
372 | static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address, | ||
373 | unsigned int *level) | ||
374 | { | ||
375 | if (cpa->pgd) | ||
376 | return __lookup_address_in_pgd(cpa->pgd + pgd_index(address), | ||
377 | address, level); | ||
378 | |||
379 | return lookup_address(address, level); | ||
380 | } | ||
381 | |||
366 | /* | 382 | /* |
367 | * This is necessary because __pa() does not work on some | 383 | * This is necessary because __pa() does not work on some |
368 | * kinds of memory, like vmalloc() or the alloc_remap() | 384 | * kinds of memory, like vmalloc() or the alloc_remap() |
@@ -437,7 +453,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
437 | * Check for races, another CPU might have split this page | 453 | * Check for races, another CPU might have split this page |
438 | * up already: | 454 | * up already: |
439 | */ | 455 | */ |
440 | tmp = lookup_address(address, &level); | 456 | tmp = _lookup_address_cpa(cpa, address, &level); |
441 | if (tmp != kpte) | 457 | if (tmp != kpte) |
442 | goto out_unlock; | 458 | goto out_unlock; |
443 | 459 | ||
@@ -543,7 +559,8 @@ out_unlock: | |||
543 | } | 559 | } |
544 | 560 | ||
545 | static int | 561 | static int |
546 | __split_large_page(pte_t *kpte, unsigned long address, struct page *base) | 562 | __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, |
563 | struct page *base) | ||
547 | { | 564 | { |
548 | pte_t *pbase = (pte_t *)page_address(base); | 565 | pte_t *pbase = (pte_t *)page_address(base); |
549 | unsigned long pfn, pfninc = 1; | 566 | unsigned long pfn, pfninc = 1; |
@@ -556,7 +573,7 @@ __split_large_page(pte_t *kpte, unsigned long address, struct page *base) | |||
556 | * Check for races, another CPU might have split this page | 573 | * Check for races, another CPU might have split this page |
557 | * up for us already: | 574 | * up for us already: |
558 | */ | 575 | */ |
559 | tmp = lookup_address(address, &level); | 576 | tmp = _lookup_address_cpa(cpa, address, &level); |
560 | if (tmp != kpte) { | 577 | if (tmp != kpte) { |
561 | spin_unlock(&pgd_lock); | 578 | spin_unlock(&pgd_lock); |
562 | return 1; | 579 | return 1; |
@@ -632,7 +649,8 @@ __split_large_page(pte_t *kpte, unsigned long address, struct page *base) | |||
632 | return 0; | 649 | return 0; |
633 | } | 650 | } |
634 | 651 | ||
635 | static int split_large_page(pte_t *kpte, unsigned long address) | 652 | static int split_large_page(struct cpa_data *cpa, pte_t *kpte, |
653 | unsigned long address) | ||
636 | { | 654 | { |
637 | struct page *base; | 655 | struct page *base; |
638 | 656 | ||
@@ -644,15 +662,390 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
644 | if (!base) | 662 | if (!base) |
645 | return -ENOMEM; | 663 | return -ENOMEM; |
646 | 664 | ||
647 | if (__split_large_page(kpte, address, base)) | 665 | if (__split_large_page(cpa, kpte, address, base)) |
648 | __free_page(base); | 666 | __free_page(base); |
649 | 667 | ||
650 | return 0; | 668 | return 0; |
651 | } | 669 | } |
652 | 670 | ||
671 | static bool try_to_free_pte_page(pte_t *pte) | ||
672 | { | ||
673 | int i; | ||
674 | |||
675 | for (i = 0; i < PTRS_PER_PTE; i++) | ||
676 | if (!pte_none(pte[i])) | ||
677 | return false; | ||
678 | |||
679 | free_page((unsigned long)pte); | ||
680 | return true; | ||
681 | } | ||
682 | |||
683 | static bool try_to_free_pmd_page(pmd_t *pmd) | ||
684 | { | ||
685 | int i; | ||
686 | |||
687 | for (i = 0; i < PTRS_PER_PMD; i++) | ||
688 | if (!pmd_none(pmd[i])) | ||
689 | return false; | ||
690 | |||
691 | free_page((unsigned long)pmd); | ||
692 | return true; | ||
693 | } | ||
694 | |||
695 | static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end) | ||
696 | { | ||
697 | pte_t *pte = pte_offset_kernel(pmd, start); | ||
698 | |||
699 | while (start < end) { | ||
700 | set_pte(pte, __pte(0)); | ||
701 | |||
702 | start += PAGE_SIZE; | ||
703 | pte++; | ||
704 | } | ||
705 | |||
706 | if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) { | ||
707 | pmd_clear(pmd); | ||
708 | return true; | ||
709 | } | ||
710 | return false; | ||
711 | } | ||
712 | |||
713 | static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd, | ||
714 | unsigned long start, unsigned long end) | ||
715 | { | ||
716 | if (unmap_pte_range(pmd, start, end)) | ||
717 | if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) | ||
718 | pud_clear(pud); | ||
719 | } | ||
720 | |||
721 | static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) | ||
722 | { | ||
723 | pmd_t *pmd = pmd_offset(pud, start); | ||
724 | |||
725 | /* | ||
726 | * Not on a 2MB page boundary? | ||
727 | */ | ||
728 | if (start & (PMD_SIZE - 1)) { | ||
729 | unsigned long next_page = (start + PMD_SIZE) & PMD_MASK; | ||
730 | unsigned long pre_end = min_t(unsigned long, end, next_page); | ||
731 | |||
732 | __unmap_pmd_range(pud, pmd, start, pre_end); | ||
733 | |||
734 | start = pre_end; | ||
735 | pmd++; | ||
736 | } | ||
737 | |||
738 | /* | ||
739 | * Try to unmap in 2M chunks. | ||
740 | */ | ||
741 | while (end - start >= PMD_SIZE) { | ||
742 | if (pmd_large(*pmd)) | ||
743 | pmd_clear(pmd); | ||
744 | else | ||
745 | __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE); | ||
746 | |||
747 | start += PMD_SIZE; | ||
748 | pmd++; | ||
749 | } | ||
750 | |||
751 | /* | ||
752 | * 4K leftovers? | ||
753 | */ | ||
754 | if (start < end) | ||
755 | return __unmap_pmd_range(pud, pmd, start, end); | ||
756 | |||
757 | /* | ||
758 | * Try again to free the PMD page if haven't succeeded above. | ||
759 | */ | ||
760 | if (!pud_none(*pud)) | ||
761 | if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) | ||
762 | pud_clear(pud); | ||
763 | } | ||
764 | |||
765 | static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) | ||
766 | { | ||
767 | pud_t *pud = pud_offset(pgd, start); | ||
768 | |||
769 | /* | ||
770 | * Not on a GB page boundary? | ||
771 | */ | ||
772 | if (start & (PUD_SIZE - 1)) { | ||
773 | unsigned long next_page = (start + PUD_SIZE) & PUD_MASK; | ||
774 | unsigned long pre_end = min_t(unsigned long, end, next_page); | ||
775 | |||
776 | unmap_pmd_range(pud, start, pre_end); | ||
777 | |||
778 | start = pre_end; | ||
779 | pud++; | ||
780 | } | ||
781 | |||
782 | /* | ||
783 | * Try to unmap in 1G chunks? | ||
784 | */ | ||
785 | while (end - start >= PUD_SIZE) { | ||
786 | |||
787 | if (pud_large(*pud)) | ||
788 | pud_clear(pud); | ||
789 | else | ||
790 | unmap_pmd_range(pud, start, start + PUD_SIZE); | ||
791 | |||
792 | start += PUD_SIZE; | ||
793 | pud++; | ||
794 | } | ||
795 | |||
796 | /* | ||
797 | * 2M leftovers? | ||
798 | */ | ||
799 | if (start < end) | ||
800 | unmap_pmd_range(pud, start, end); | ||
801 | |||
802 | /* | ||
803 | * No need to try to free the PUD page because we'll free it in | ||
804 | * populate_pgd's error path | ||
805 | */ | ||
806 | } | ||
807 | |||
808 | static int alloc_pte_page(pmd_t *pmd) | ||
809 | { | ||
810 | pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); | ||
811 | if (!pte) | ||
812 | return -1; | ||
813 | |||
814 | set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); | ||
815 | return 0; | ||
816 | } | ||
817 | |||
818 | static int alloc_pmd_page(pud_t *pud) | ||
819 | { | ||
820 | pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); | ||
821 | if (!pmd) | ||
822 | return -1; | ||
823 | |||
824 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | ||
825 | return 0; | ||
826 | } | ||
827 | |||
828 | static void populate_pte(struct cpa_data *cpa, | ||
829 | unsigned long start, unsigned long end, | ||
830 | unsigned num_pages, pmd_t *pmd, pgprot_t pgprot) | ||
831 | { | ||
832 | pte_t *pte; | ||
833 | |||
834 | pte = pte_offset_kernel(pmd, start); | ||
835 | |||
836 | while (num_pages-- && start < end) { | ||
837 | |||
838 | /* deal with the NX bit */ | ||
839 | if (!(pgprot_val(pgprot) & _PAGE_NX)) | ||
840 | cpa->pfn &= ~_PAGE_NX; | ||
841 | |||
842 | set_pte(pte, pfn_pte(cpa->pfn >> PAGE_SHIFT, pgprot)); | ||
843 | |||
844 | start += PAGE_SIZE; | ||
845 | cpa->pfn += PAGE_SIZE; | ||
846 | pte++; | ||
847 | } | ||
848 | } | ||
849 | |||
850 | static int populate_pmd(struct cpa_data *cpa, | ||
851 | unsigned long start, unsigned long end, | ||
852 | unsigned num_pages, pud_t *pud, pgprot_t pgprot) | ||
853 | { | ||
854 | unsigned int cur_pages = 0; | ||
855 | pmd_t *pmd; | ||
856 | |||
857 | /* | ||
858 | * Not on a 2M boundary? | ||
859 | */ | ||
860 | if (start & (PMD_SIZE - 1)) { | ||
861 | unsigned long pre_end = start + (num_pages << PAGE_SHIFT); | ||
862 | unsigned long next_page = (start + PMD_SIZE) & PMD_MASK; | ||
863 | |||
864 | pre_end = min_t(unsigned long, pre_end, next_page); | ||
865 | cur_pages = (pre_end - start) >> PAGE_SHIFT; | ||
866 | cur_pages = min_t(unsigned int, num_pages, cur_pages); | ||
867 | |||
868 | /* | ||
869 | * Need a PTE page? | ||
870 | */ | ||
871 | pmd = pmd_offset(pud, start); | ||
872 | if (pmd_none(*pmd)) | ||
873 | if (alloc_pte_page(pmd)) | ||
874 | return -1; | ||
875 | |||
876 | populate_pte(cpa, start, pre_end, cur_pages, pmd, pgprot); | ||
877 | |||
878 | start = pre_end; | ||
879 | } | ||
880 | |||
881 | /* | ||
882 | * We mapped them all? | ||
883 | */ | ||
884 | if (num_pages == cur_pages) | ||
885 | return cur_pages; | ||
886 | |||
887 | while (end - start >= PMD_SIZE) { | ||
888 | |||
889 | /* | ||
890 | * We cannot use a 1G page so allocate a PMD page if needed. | ||
891 | */ | ||
892 | if (pud_none(*pud)) | ||
893 | if (alloc_pmd_page(pud)) | ||
894 | return -1; | ||
895 | |||
896 | pmd = pmd_offset(pud, start); | ||
897 | |||
898 | set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot))); | ||
899 | |||
900 | start += PMD_SIZE; | ||
901 | cpa->pfn += PMD_SIZE; | ||
902 | cur_pages += PMD_SIZE >> PAGE_SHIFT; | ||
903 | } | ||
904 | |||
905 | /* | ||
906 | * Map trailing 4K pages. | ||
907 | */ | ||
908 | if (start < end) { | ||
909 | pmd = pmd_offset(pud, start); | ||
910 | if (pmd_none(*pmd)) | ||
911 | if (alloc_pte_page(pmd)) | ||
912 | return -1; | ||
913 | |||
914 | populate_pte(cpa, start, end, num_pages - cur_pages, | ||
915 | pmd, pgprot); | ||
916 | } | ||
917 | return num_pages; | ||
918 | } | ||
919 | |||
920 | static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, | ||
921 | pgprot_t pgprot) | ||
922 | { | ||
923 | pud_t *pud; | ||
924 | unsigned long end; | ||
925 | int cur_pages = 0; | ||
926 | |||
927 | end = start + (cpa->numpages << PAGE_SHIFT); | ||
928 | |||
929 | /* | ||
930 | * Not on a Gb page boundary? => map everything up to it with | ||
931 | * smaller pages. | ||
932 | */ | ||
933 | if (start & (PUD_SIZE - 1)) { | ||
934 | unsigned long pre_end; | ||
935 | unsigned long next_page = (start + PUD_SIZE) & PUD_MASK; | ||
936 | |||
937 | pre_end = min_t(unsigned long, end, next_page); | ||
938 | cur_pages = (pre_end - start) >> PAGE_SHIFT; | ||
939 | cur_pages = min_t(int, (int)cpa->numpages, cur_pages); | ||
940 | |||
941 | pud = pud_offset(pgd, start); | ||
942 | |||
943 | /* | ||
944 | * Need a PMD page? | ||
945 | */ | ||
946 | if (pud_none(*pud)) | ||
947 | if (alloc_pmd_page(pud)) | ||
948 | return -1; | ||
949 | |||
950 | cur_pages = populate_pmd(cpa, start, pre_end, cur_pages, | ||
951 | pud, pgprot); | ||
952 | if (cur_pages < 0) | ||
953 | return cur_pages; | ||
954 | |||
955 | start = pre_end; | ||
956 | } | ||
957 | |||
958 | /* We mapped them all? */ | ||
959 | if (cpa->numpages == cur_pages) | ||
960 | return cur_pages; | ||
961 | |||
962 | pud = pud_offset(pgd, start); | ||
963 | |||
964 | /* | ||
965 | * Map everything starting from the Gb boundary, possibly with 1G pages | ||
966 | */ | ||
967 | while (end - start >= PUD_SIZE) { | ||
968 | set_pud(pud, __pud(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot))); | ||
969 | |||
970 | start += PUD_SIZE; | ||
971 | cpa->pfn += PUD_SIZE; | ||
972 | cur_pages += PUD_SIZE >> PAGE_SHIFT; | ||
973 | pud++; | ||
974 | } | ||
975 | |||
976 | /* Map trailing leftover */ | ||
977 | if (start < end) { | ||
978 | int tmp; | ||
979 | |||
980 | pud = pud_offset(pgd, start); | ||
981 | if (pud_none(*pud)) | ||
982 | if (alloc_pmd_page(pud)) | ||
983 | return -1; | ||
984 | |||
985 | tmp = populate_pmd(cpa, start, end, cpa->numpages - cur_pages, | ||
986 | pud, pgprot); | ||
987 | if (tmp < 0) | ||
988 | return cur_pages; | ||
989 | |||
990 | cur_pages += tmp; | ||
991 | } | ||
992 | return cur_pages; | ||
993 | } | ||
994 | |||
995 | /* | ||
996 | * Restrictions for kernel page table do not necessarily apply when mapping in | ||
997 | * an alternate PGD. | ||
998 | */ | ||
999 | static int populate_pgd(struct cpa_data *cpa, unsigned long addr) | ||
1000 | { | ||
1001 | pgprot_t pgprot = __pgprot(_KERNPG_TABLE); | ||
1002 | bool allocd_pgd = false; | ||
1003 | pgd_t *pgd_entry; | ||
1004 | pud_t *pud = NULL; /* shut up gcc */ | ||
1005 | int ret; | ||
1006 | |||
1007 | pgd_entry = cpa->pgd + pgd_index(addr); | ||
1008 | |||
1009 | /* | ||
1010 | * Allocate a PUD page and hand it down for mapping. | ||
1011 | */ | ||
1012 | if (pgd_none(*pgd_entry)) { | ||
1013 | pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); | ||
1014 | if (!pud) | ||
1015 | return -1; | ||
1016 | |||
1017 | set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE)); | ||
1018 | allocd_pgd = true; | ||
1019 | } | ||
1020 | |||
1021 | pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr); | ||
1022 | pgprot_val(pgprot) |= pgprot_val(cpa->mask_set); | ||
1023 | |||
1024 | ret = populate_pud(cpa, addr, pgd_entry, pgprot); | ||
1025 | if (ret < 0) { | ||
1026 | unmap_pud_range(pgd_entry, addr, | ||
1027 | addr + (cpa->numpages << PAGE_SHIFT)); | ||
1028 | |||
1029 | if (allocd_pgd) { | ||
1030 | /* | ||
1031 | * If I allocated this PUD page, I can just as well | ||
1032 | * free it in this error path. | ||
1033 | */ | ||
1034 | pgd_clear(pgd_entry); | ||
1035 | free_page((unsigned long)pud); | ||
1036 | } | ||
1037 | return ret; | ||
1038 | } | ||
1039 | cpa->numpages = ret; | ||
1040 | return 0; | ||
1041 | } | ||
1042 | |||
653 | static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, | 1043 | static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, |
654 | int primary) | 1044 | int primary) |
655 | { | 1045 | { |
1046 | if (cpa->pgd) | ||
1047 | return populate_pgd(cpa, vaddr); | ||
1048 | |||
656 | /* | 1049 | /* |
657 | * Ignore all non primary paths. | 1050 | * Ignore all non primary paths. |
658 | */ | 1051 | */ |
@@ -697,7 +1090,7 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) | |||
697 | else | 1090 | else |
698 | address = *cpa->vaddr; | 1091 | address = *cpa->vaddr; |
699 | repeat: | 1092 | repeat: |
700 | kpte = lookup_address(address, &level); | 1093 | kpte = _lookup_address_cpa(cpa, address, &level); |
701 | if (!kpte) | 1094 | if (!kpte) |
702 | return __cpa_process_fault(cpa, address, primary); | 1095 | return __cpa_process_fault(cpa, address, primary); |
703 | 1096 | ||
@@ -761,7 +1154,7 @@ repeat: | |||
761 | /* | 1154 | /* |
762 | * We have to split the large page: | 1155 | * We have to split the large page: |
763 | */ | 1156 | */ |
764 | err = split_large_page(kpte, address); | 1157 | err = split_large_page(cpa, kpte, address); |
765 | if (!err) { | 1158 | if (!err) { |
766 | /* | 1159 | /* |
767 | * Do a global flush tlb after splitting the large page | 1160 | * Do a global flush tlb after splitting the large page |
@@ -910,6 +1303,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
910 | int ret, cache, checkalias; | 1303 | int ret, cache, checkalias; |
911 | unsigned long baddr = 0; | 1304 | unsigned long baddr = 0; |
912 | 1305 | ||
1306 | memset(&cpa, 0, sizeof(cpa)); | ||
1307 | |||
913 | /* | 1308 | /* |
914 | * Check, if we are requested to change a not supported | 1309 | * Check, if we are requested to change a not supported |
915 | * feature: | 1310 | * feature: |
@@ -1356,6 +1751,7 @@ static int __set_pages_p(struct page *page, int numpages) | |||
1356 | { | 1751 | { |
1357 | unsigned long tempaddr = (unsigned long) page_address(page); | 1752 | unsigned long tempaddr = (unsigned long) page_address(page); |
1358 | struct cpa_data cpa = { .vaddr = &tempaddr, | 1753 | struct cpa_data cpa = { .vaddr = &tempaddr, |
1754 | .pgd = NULL, | ||
1359 | .numpages = numpages, | 1755 | .numpages = numpages, |
1360 | .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), | 1756 | .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), |
1361 | .mask_clr = __pgprot(0), | 1757 | .mask_clr = __pgprot(0), |
@@ -1374,6 +1770,7 @@ static int __set_pages_np(struct page *page, int numpages) | |||
1374 | { | 1770 | { |
1375 | unsigned long tempaddr = (unsigned long) page_address(page); | 1771 | unsigned long tempaddr = (unsigned long) page_address(page); |
1376 | struct cpa_data cpa = { .vaddr = &tempaddr, | 1772 | struct cpa_data cpa = { .vaddr = &tempaddr, |
1773 | .pgd = NULL, | ||
1377 | .numpages = numpages, | 1774 | .numpages = numpages, |
1378 | .mask_set = __pgprot(0), | 1775 | .mask_set = __pgprot(0), |
1379 | .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), | 1776 | .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), |
@@ -1434,6 +1831,36 @@ bool kernel_page_present(struct page *page) | |||
1434 | 1831 | ||
1435 | #endif /* CONFIG_DEBUG_PAGEALLOC */ | 1832 | #endif /* CONFIG_DEBUG_PAGEALLOC */ |
1436 | 1833 | ||
1834 | int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, | ||
1835 | unsigned numpages, unsigned long page_flags) | ||
1836 | { | ||
1837 | int retval = -EINVAL; | ||
1838 | |||
1839 | struct cpa_data cpa = { | ||
1840 | .vaddr = &address, | ||
1841 | .pfn = pfn, | ||
1842 | .pgd = pgd, | ||
1843 | .numpages = numpages, | ||
1844 | .mask_set = __pgprot(0), | ||
1845 | .mask_clr = __pgprot(0), | ||
1846 | .flags = 0, | ||
1847 | }; | ||
1848 | |||
1849 | if (!(__supported_pte_mask & _PAGE_NX)) | ||
1850 | goto out; | ||
1851 | |||
1852 | if (!(page_flags & _PAGE_NX)) | ||
1853 | cpa.mask_clr = __pgprot(_PAGE_NX); | ||
1854 | |||
1855 | cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags); | ||
1856 | |||
1857 | retval = __change_page_attr_set_clr(&cpa, 0); | ||
1858 | __flush_tlb_all(); | ||
1859 | |||
1860 | out: | ||
1861 | return retval; | ||
1862 | } | ||
1863 | |||
1437 | /* | 1864 | /* |
1438 | * The testcases use internal knowledge of the implementation that shouldn't | 1865 | * The testcases use internal knowledge of the implementation that shouldn't |
1439 | * be exposed to the rest of the kernel. Include these directly here. | 1866 | * be exposed to the rest of the kernel. Include these directly here. |
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 92c02344a060..f8ec4dafc74e 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c | |||
@@ -12,6 +12,8 @@ | |||
12 | * Bibo Mao <bibo.mao@intel.com> | 12 | * Bibo Mao <bibo.mao@intel.com> |
13 | * Chandramouli Narayanan <mouli@linux.intel.com> | 13 | * Chandramouli Narayanan <mouli@linux.intel.com> |
14 | * Huang Ying <ying.huang@intel.com> | 14 | * Huang Ying <ying.huang@intel.com> |
15 | * Copyright (C) 2013 SuSE Labs | ||
16 | * Borislav Petkov <bp@suse.de> - runtime services VA mapping | ||
15 | * | 17 | * |
16 | * Copied from efi_32.c to eliminate the duplicated code between EFI | 18 | * Copied from efi_32.c to eliminate the duplicated code between EFI |
17 | * 32/64 support code. --ying 2007-10-26 | 19 | * 32/64 support code. --ying 2007-10-26 |
@@ -51,7 +53,7 @@ | |||
51 | #include <asm/x86_init.h> | 53 | #include <asm/x86_init.h> |
52 | #include <asm/rtc.h> | 54 | #include <asm/rtc.h> |
53 | 55 | ||
54 | #define EFI_DEBUG 1 | 56 | #define EFI_DEBUG |
55 | 57 | ||
56 | #define EFI_MIN_RESERVE 5120 | 58 | #define EFI_MIN_RESERVE 5120 |
57 | 59 | ||
@@ -398,9 +400,9 @@ int __init efi_memblock_x86_reserve_range(void) | |||
398 | return 0; | 400 | return 0; |
399 | } | 401 | } |
400 | 402 | ||
401 | #if EFI_DEBUG | ||
402 | static void __init print_efi_memmap(void) | 403 | static void __init print_efi_memmap(void) |
403 | { | 404 | { |
405 | #ifdef EFI_DEBUG | ||
404 | efi_memory_desc_t *md; | 406 | efi_memory_desc_t *md; |
405 | void *p; | 407 | void *p; |
406 | int i; | 408 | int i; |
@@ -415,8 +417,8 @@ static void __init print_efi_memmap(void) | |||
415 | md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), | 417 | md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), |
416 | (md->num_pages >> (20 - EFI_PAGE_SHIFT))); | 418 | (md->num_pages >> (20 - EFI_PAGE_SHIFT))); |
417 | } | 419 | } |
418 | } | ||
419 | #endif /* EFI_DEBUG */ | 420 | #endif /* EFI_DEBUG */ |
421 | } | ||
420 | 422 | ||
421 | void __init efi_reserve_boot_services(void) | 423 | void __init efi_reserve_boot_services(void) |
422 | { | 424 | { |
@@ -696,10 +698,7 @@ void __init efi_init(void) | |||
696 | x86_platform.set_wallclock = efi_set_rtc_mmss; | 698 | x86_platform.set_wallclock = efi_set_rtc_mmss; |
697 | } | 699 | } |
698 | #endif | 700 | #endif |
699 | |||
700 | #if EFI_DEBUG | ||
701 | print_efi_memmap(); | 701 | print_efi_memmap(); |
702 | #endif | ||
703 | } | 702 | } |
704 | 703 | ||
705 | void __init efi_late_init(void) | 704 | void __init efi_late_init(void) |
@@ -748,21 +747,56 @@ void efi_memory_uc(u64 addr, unsigned long size) | |||
748 | set_memory_uc(addr, npages); | 747 | set_memory_uc(addr, npages); |
749 | } | 748 | } |
750 | 749 | ||
750 | void __init old_map_region(efi_memory_desc_t *md) | ||
751 | { | ||
752 | u64 start_pfn, end_pfn, end; | ||
753 | unsigned long size; | ||
754 | void *va; | ||
755 | |||
756 | start_pfn = PFN_DOWN(md->phys_addr); | ||
757 | size = md->num_pages << PAGE_SHIFT; | ||
758 | end = md->phys_addr + size; | ||
759 | end_pfn = PFN_UP(end); | ||
760 | |||
761 | if (pfn_range_is_mapped(start_pfn, end_pfn)) { | ||
762 | va = __va(md->phys_addr); | ||
763 | |||
764 | if (!(md->attribute & EFI_MEMORY_WB)) | ||
765 | efi_memory_uc((u64)(unsigned long)va, size); | ||
766 | } else | ||
767 | va = efi_ioremap(md->phys_addr, size, | ||
768 | md->type, md->attribute); | ||
769 | |||
770 | md->virt_addr = (u64) (unsigned long) va; | ||
771 | if (!va) | ||
772 | pr_err("ioremap of 0x%llX failed!\n", | ||
773 | (unsigned long long)md->phys_addr); | ||
774 | } | ||
775 | |||
751 | /* | 776 | /* |
752 | * This function will switch the EFI runtime services to virtual mode. | 777 | * This function will switch the EFI runtime services to virtual mode. |
753 | * Essentially, look through the EFI memmap and map every region that | 778 | * Essentially, we look through the EFI memmap and map every region that |
754 | * has the runtime attribute bit set in its memory descriptor and update | 779 | * has the runtime attribute bit set in its memory descriptor into the |
755 | * that memory descriptor with the virtual address obtained from ioremap(). | 780 | * ->trampoline_pgd page table using a top-down VA allocation scheme. |
756 | * This enables the runtime services to be called without having to | 781 | * |
782 | * The old method which used to update that memory descriptor with the | ||
783 | * virtual address obtained from ioremap() is still supported when the | ||
784 | * kernel is booted with efi=old_map on its command line. Same old | ||
785 | * method enabled the runtime services to be called without having to | ||
757 | * thunk back into physical mode for every invocation. | 786 | * thunk back into physical mode for every invocation. |
787 | * | ||
788 | * The new method does a pagetable switch in a preemption-safe manner | ||
789 | * so that we're in a different address space when calling a runtime | ||
790 | * function. For function arguments passing we do copy the PGDs of the | ||
791 | * kernel page table into ->trampoline_pgd prior to each call. | ||
758 | */ | 792 | */ |
759 | void __init efi_enter_virtual_mode(void) | 793 | void __init efi_enter_virtual_mode(void) |
760 | { | 794 | { |
761 | efi_memory_desc_t *md, *prev_md = NULL; | 795 | efi_memory_desc_t *md, *prev_md = NULL; |
762 | efi_status_t status; | 796 | void *p, *new_memmap = NULL; |
763 | unsigned long size; | 797 | unsigned long size; |
764 | u64 end, systab, start_pfn, end_pfn; | 798 | efi_status_t status; |
765 | void *p, *va, *new_memmap = NULL; | 799 | u64 end, systab; |
766 | int count = 0; | 800 | int count = 0; |
767 | 801 | ||
768 | efi.systab = NULL; | 802 | efi.systab = NULL; |
@@ -771,7 +805,6 @@ void __init efi_enter_virtual_mode(void) | |||
771 | * We don't do virtual mode, since we don't do runtime services, on | 805 | * We don't do virtual mode, since we don't do runtime services, on |
772 | * non-native EFI | 806 | * non-native EFI |
773 | */ | 807 | */ |
774 | |||
775 | if (!efi_is_native()) { | 808 | if (!efi_is_native()) { |
776 | efi_unmap_memmap(); | 809 | efi_unmap_memmap(); |
777 | return; | 810 | return; |
@@ -802,6 +835,7 @@ void __init efi_enter_virtual_mode(void) | |||
802 | continue; | 835 | continue; |
803 | } | 836 | } |
804 | prev_md = md; | 837 | prev_md = md; |
838 | |||
805 | } | 839 | } |
806 | 840 | ||
807 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | 841 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { |
@@ -814,36 +848,24 @@ void __init efi_enter_virtual_mode(void) | |||
814 | continue; | 848 | continue; |
815 | } | 849 | } |
816 | 850 | ||
851 | efi_map_region(md); | ||
852 | |||
817 | size = md->num_pages << EFI_PAGE_SHIFT; | 853 | size = md->num_pages << EFI_PAGE_SHIFT; |
818 | end = md->phys_addr + size; | 854 | end = md->phys_addr + size; |
819 | 855 | ||
820 | start_pfn = PFN_DOWN(md->phys_addr); | ||
821 | end_pfn = PFN_UP(end); | ||
822 | if (pfn_range_is_mapped(start_pfn, end_pfn)) { | ||
823 | va = __va(md->phys_addr); | ||
824 | |||
825 | if (!(md->attribute & EFI_MEMORY_WB)) | ||
826 | efi_memory_uc((u64)(unsigned long)va, size); | ||
827 | } else | ||
828 | va = efi_ioremap(md->phys_addr, size, | ||
829 | md->type, md->attribute); | ||
830 | |||
831 | md->virt_addr = (u64) (unsigned long) va; | ||
832 | |||
833 | if (!va) { | ||
834 | pr_err("ioremap of 0x%llX failed!\n", | ||
835 | (unsigned long long)md->phys_addr); | ||
836 | continue; | ||
837 | } | ||
838 | |||
839 | systab = (u64) (unsigned long) efi_phys.systab; | 856 | systab = (u64) (unsigned long) efi_phys.systab; |
840 | if (md->phys_addr <= systab && systab < end) { | 857 | if (md->phys_addr <= systab && systab < end) { |
841 | systab += md->virt_addr - md->phys_addr; | 858 | systab += md->virt_addr - md->phys_addr; |
859 | |||
842 | efi.systab = (efi_system_table_t *) (unsigned long) systab; | 860 | efi.systab = (efi_system_table_t *) (unsigned long) systab; |
843 | } | 861 | } |
862 | |||
844 | new_memmap = krealloc(new_memmap, | 863 | new_memmap = krealloc(new_memmap, |
845 | (count + 1) * memmap.desc_size, | 864 | (count + 1) * memmap.desc_size, |
846 | GFP_KERNEL); | 865 | GFP_KERNEL); |
866 | if (!new_memmap) | ||
867 | goto err_out; | ||
868 | |||
847 | memcpy(new_memmap + (count * memmap.desc_size), md, | 869 | memcpy(new_memmap + (count * memmap.desc_size), md, |
848 | memmap.desc_size); | 870 | memmap.desc_size); |
849 | count++; | 871 | count++; |
@@ -851,6 +873,9 @@ void __init efi_enter_virtual_mode(void) | |||
851 | 873 | ||
852 | BUG_ON(!efi.systab); | 874 | BUG_ON(!efi.systab); |
853 | 875 | ||
876 | efi_setup_page_tables(); | ||
877 | efi_sync_low_kernel_mappings(); | ||
878 | |||
854 | status = phys_efi_set_virtual_address_map( | 879 | status = phys_efi_set_virtual_address_map( |
855 | memmap.desc_size * count, | 880 | memmap.desc_size * count, |
856 | memmap.desc_size, | 881 | memmap.desc_size, |
@@ -883,7 +908,8 @@ void __init efi_enter_virtual_mode(void) | |||
883 | efi.query_variable_info = virt_efi_query_variable_info; | 908 | efi.query_variable_info = virt_efi_query_variable_info; |
884 | efi.update_capsule = virt_efi_update_capsule; | 909 | efi.update_capsule = virt_efi_update_capsule; |
885 | efi.query_capsule_caps = virt_efi_query_capsule_caps; | 910 | efi.query_capsule_caps = virt_efi_query_capsule_caps; |
886 | if (__supported_pte_mask & _PAGE_NX) | 911 | |
912 | if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX)) | ||
887 | runtime_code_page_mkexec(); | 913 | runtime_code_page_mkexec(); |
888 | 914 | ||
889 | kfree(new_memmap); | 915 | kfree(new_memmap); |
@@ -894,6 +920,11 @@ void __init efi_enter_virtual_mode(void) | |||
894 | EFI_VARIABLE_BOOTSERVICE_ACCESS | | 920 | EFI_VARIABLE_BOOTSERVICE_ACCESS | |
895 | EFI_VARIABLE_RUNTIME_ACCESS, | 921 | EFI_VARIABLE_RUNTIME_ACCESS, |
896 | 0, NULL); | 922 | 0, NULL); |
923 | |||
924 | return; | ||
925 | |||
926 | err_out: | ||
927 | pr_err("Error reallocating memory, EFI runtime non-functional!\n"); | ||
897 | } | 928 | } |
898 | 929 | ||
899 | /* | 930 | /* |
@@ -1013,3 +1044,15 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) | |||
1013 | return EFI_SUCCESS; | 1044 | return EFI_SUCCESS; |
1014 | } | 1045 | } |
1015 | EXPORT_SYMBOL_GPL(efi_query_variable_store); | 1046 | EXPORT_SYMBOL_GPL(efi_query_variable_store); |
1047 | |||
1048 | static int __init parse_efi_cmdline(char *str) | ||
1049 | { | ||
1050 | if (*str == '=') | ||
1051 | str++; | ||
1052 | |||
1053 | if (!strncmp(str, "old_map", 7)) | ||
1054 | set_bit(EFI_OLD_MEMMAP, &x86_efi_facility); | ||
1055 | |||
1056 | return 0; | ||
1057 | } | ||
1058 | early_param("efi", parse_efi_cmdline); | ||
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 40e446941dd7..e94557cf5487 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c | |||
@@ -37,9 +37,16 @@ | |||
37 | * claim EFI runtime service handler exclusively and to duplicate a memory in | 37 | * claim EFI runtime service handler exclusively and to duplicate a memory in |
38 | * low memory space say 0 - 3G. | 38 | * low memory space say 0 - 3G. |
39 | */ | 39 | */ |
40 | |||
41 | static unsigned long efi_rt_eflags; | 40 | static unsigned long efi_rt_eflags; |
42 | 41 | ||
42 | void efi_sync_low_kernel_mappings(void) {} | ||
43 | void efi_setup_page_tables(void) {} | ||
44 | |||
45 | void __init efi_map_region(efi_memory_desc_t *md) | ||
46 | { | ||
47 | old_map_region(md); | ||
48 | } | ||
49 | |||
43 | void efi_call_phys_prelog(void) | 50 | void efi_call_phys_prelog(void) |
44 | { | 51 | { |
45 | struct desc_ptr gdt_descr; | 52 | struct desc_ptr gdt_descr; |
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 39a0e7f1f0a3..bf286c386d33 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c | |||
@@ -38,10 +38,28 @@ | |||
38 | #include <asm/efi.h> | 38 | #include <asm/efi.h> |
39 | #include <asm/cacheflush.h> | 39 | #include <asm/cacheflush.h> |
40 | #include <asm/fixmap.h> | 40 | #include <asm/fixmap.h> |
41 | #include <asm/realmode.h> | ||
41 | 42 | ||
42 | static pgd_t *save_pgd __initdata; | 43 | static pgd_t *save_pgd __initdata; |
43 | static unsigned long efi_flags __initdata; | 44 | static unsigned long efi_flags __initdata; |
44 | 45 | ||
46 | /* | ||
47 | * We allocate runtime services regions bottom-up, starting from -4G, i.e. | ||
48 | * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G. | ||
49 | */ | ||
50 | static u64 efi_va = -4 * (1UL << 30); | ||
51 | #define EFI_VA_END (-68 * (1UL << 30)) | ||
52 | |||
53 | /* | ||
54 | * Scratch space used for switching the pagetable in the EFI stub | ||
55 | */ | ||
56 | struct efi_scratch { | ||
57 | u64 r15; | ||
58 | u64 prev_cr3; | ||
59 | pgd_t *efi_pgt; | ||
60 | bool use_pgd; | ||
61 | }; | ||
62 | |||
45 | static void __init early_code_mapping_set_exec(int executable) | 63 | static void __init early_code_mapping_set_exec(int executable) |
46 | { | 64 | { |
47 | efi_memory_desc_t *md; | 65 | efi_memory_desc_t *md; |
@@ -65,6 +83,9 @@ void __init efi_call_phys_prelog(void) | |||
65 | int pgd; | 83 | int pgd; |
66 | int n_pgds; | 84 | int n_pgds; |
67 | 85 | ||
86 | if (!efi_enabled(EFI_OLD_MEMMAP)) | ||
87 | return; | ||
88 | |||
68 | early_code_mapping_set_exec(1); | 89 | early_code_mapping_set_exec(1); |
69 | local_irq_save(efi_flags); | 90 | local_irq_save(efi_flags); |
70 | 91 | ||
@@ -86,6 +107,10 @@ void __init efi_call_phys_epilog(void) | |||
86 | */ | 107 | */ |
87 | int pgd; | 108 | int pgd; |
88 | int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); | 109 | int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); |
110 | |||
111 | if (!efi_enabled(EFI_OLD_MEMMAP)) | ||
112 | return; | ||
113 | |||
89 | for (pgd = 0; pgd < n_pgds; pgd++) | 114 | for (pgd = 0; pgd < n_pgds; pgd++) |
90 | set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]); | 115 | set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]); |
91 | kfree(save_pgd); | 116 | kfree(save_pgd); |
@@ -94,6 +119,90 @@ void __init efi_call_phys_epilog(void) | |||
94 | early_code_mapping_set_exec(0); | 119 | early_code_mapping_set_exec(0); |
95 | } | 120 | } |
96 | 121 | ||
122 | /* | ||
123 | * Add low kernel mappings for passing arguments to EFI functions. | ||
124 | */ | ||
125 | void efi_sync_low_kernel_mappings(void) | ||
126 | { | ||
127 | unsigned num_pgds; | ||
128 | pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); | ||
129 | |||
130 | if (efi_enabled(EFI_OLD_MEMMAP)) | ||
131 | return; | ||
132 | |||
133 | num_pgds = pgd_index(MODULES_END - 1) - pgd_index(PAGE_OFFSET); | ||
134 | |||
135 | memcpy(pgd + pgd_index(PAGE_OFFSET), | ||
136 | init_mm.pgd + pgd_index(PAGE_OFFSET), | ||
137 | sizeof(pgd_t) * num_pgds); | ||
138 | } | ||
139 | |||
140 | void efi_setup_page_tables(void) | ||
141 | { | ||
142 | efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd; | ||
143 | |||
144 | if (!efi_enabled(EFI_OLD_MEMMAP)) | ||
145 | efi_scratch.use_pgd = true; | ||
146 | } | ||
147 | |||
148 | static void __init __map_region(efi_memory_desc_t *md, u64 va) | ||
149 | { | ||
150 | pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); | ||
151 | unsigned long pf = 0, size; | ||
152 | u64 end; | ||
153 | |||
154 | if (!(md->attribute & EFI_MEMORY_WB)) | ||
155 | pf |= _PAGE_PCD; | ||
156 | |||
157 | size = md->num_pages << PAGE_SHIFT; | ||
158 | end = va + size; | ||
159 | |||
160 | if (kernel_map_pages_in_pgd(pgd, md->phys_addr, va, md->num_pages, pf)) | ||
161 | pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", | ||
162 | md->phys_addr, va); | ||
163 | } | ||
164 | |||
165 | void __init efi_map_region(efi_memory_desc_t *md) | ||
166 | { | ||
167 | unsigned long size = md->num_pages << PAGE_SHIFT; | ||
168 | u64 pa = md->phys_addr; | ||
169 | |||
170 | if (efi_enabled(EFI_OLD_MEMMAP)) | ||
171 | return old_map_region(md); | ||
172 | |||
173 | /* | ||
174 | * Make sure the 1:1 mappings are present as a catch-all for b0rked | ||
175 | * firmware which doesn't update all internal pointers after switching | ||
176 | * to virtual mode and would otherwise crap on us. | ||
177 | */ | ||
178 | __map_region(md, md->phys_addr); | ||
179 | |||
180 | efi_va -= size; | ||
181 | |||
182 | /* Is PA 2M-aligned? */ | ||
183 | if (!(pa & (PMD_SIZE - 1))) { | ||
184 | efi_va &= PMD_MASK; | ||
185 | } else { | ||
186 | u64 pa_offset = pa & (PMD_SIZE - 1); | ||
187 | u64 prev_va = efi_va; | ||
188 | |||
189 | /* get us the same offset within this 2M page */ | ||
190 | efi_va = (efi_va & PMD_MASK) + pa_offset; | ||
191 | |||
192 | if (efi_va > prev_va) | ||
193 | efi_va -= PMD_SIZE; | ||
194 | } | ||
195 | |||
196 | if (efi_va < EFI_VA_END) { | ||
197 | pr_warn(FW_WARN "VA address range overflow!\n"); | ||
198 | return; | ||
199 | } | ||
200 | |||
201 | /* Do the VA map */ | ||
202 | __map_region(md, efi_va); | ||
203 | md->virt_addr = efi_va; | ||
204 | } | ||
205 | |||
97 | void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, | 206 | void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, |
98 | u32 type, u64 attribute) | 207 | u32 type, u64 attribute) |
99 | { | 208 | { |
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 4c07ccab8146..88073b140298 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S | |||
@@ -34,10 +34,47 @@ | |||
34 | mov %rsi, %cr0; \ | 34 | mov %rsi, %cr0; \ |
35 | mov (%rsp), %rsp | 35 | mov (%rsp), %rsp |
36 | 36 | ||
37 | /* stolen from gcc */ | ||
38 | .macro FLUSH_TLB_ALL | ||
39 | movq %r15, efi_scratch(%rip) | ||
40 | movq %r14, efi_scratch+8(%rip) | ||
41 | movq %cr4, %r15 | ||
42 | movq %r15, %r14 | ||
43 | andb $0x7f, %r14b | ||
44 | movq %r14, %cr4 | ||
45 | movq %r15, %cr4 | ||
46 | movq efi_scratch+8(%rip), %r14 | ||
47 | movq efi_scratch(%rip), %r15 | ||
48 | .endm | ||
49 | |||
50 | .macro SWITCH_PGT | ||
51 | cmpb $0, efi_scratch+24(%rip) | ||
52 | je 1f | ||
53 | movq %r15, efi_scratch(%rip) # r15 | ||
54 | # save previous CR3 | ||
55 | movq %cr3, %r15 | ||
56 | movq %r15, efi_scratch+8(%rip) # prev_cr3 | ||
57 | movq efi_scratch+16(%rip), %r15 # EFI pgt | ||
58 | movq %r15, %cr3 | ||
59 | 1: | ||
60 | .endm | ||
61 | |||
62 | .macro RESTORE_PGT | ||
63 | cmpb $0, efi_scratch+24(%rip) | ||
64 | je 2f | ||
65 | movq efi_scratch+8(%rip), %r15 | ||
66 | movq %r15, %cr3 | ||
67 | movq efi_scratch(%rip), %r15 | ||
68 | FLUSH_TLB_ALL | ||
69 | 2: | ||
70 | .endm | ||
71 | |||
37 | ENTRY(efi_call0) | 72 | ENTRY(efi_call0) |
38 | SAVE_XMM | 73 | SAVE_XMM |
39 | subq $32, %rsp | 74 | subq $32, %rsp |
75 | SWITCH_PGT | ||
40 | call *%rdi | 76 | call *%rdi |
77 | RESTORE_PGT | ||
41 | addq $32, %rsp | 78 | addq $32, %rsp |
42 | RESTORE_XMM | 79 | RESTORE_XMM |
43 | ret | 80 | ret |
@@ -47,7 +84,9 @@ ENTRY(efi_call1) | |||
47 | SAVE_XMM | 84 | SAVE_XMM |
48 | subq $32, %rsp | 85 | subq $32, %rsp |
49 | mov %rsi, %rcx | 86 | mov %rsi, %rcx |
87 | SWITCH_PGT | ||
50 | call *%rdi | 88 | call *%rdi |
89 | RESTORE_PGT | ||
51 | addq $32, %rsp | 90 | addq $32, %rsp |
52 | RESTORE_XMM | 91 | RESTORE_XMM |
53 | ret | 92 | ret |
@@ -57,7 +96,9 @@ ENTRY(efi_call2) | |||
57 | SAVE_XMM | 96 | SAVE_XMM |
58 | subq $32, %rsp | 97 | subq $32, %rsp |
59 | mov %rsi, %rcx | 98 | mov %rsi, %rcx |
99 | SWITCH_PGT | ||
60 | call *%rdi | 100 | call *%rdi |
101 | RESTORE_PGT | ||
61 | addq $32, %rsp | 102 | addq $32, %rsp |
62 | RESTORE_XMM | 103 | RESTORE_XMM |
63 | ret | 104 | ret |
@@ -68,7 +109,9 @@ ENTRY(efi_call3) | |||
68 | subq $32, %rsp | 109 | subq $32, %rsp |
69 | mov %rcx, %r8 | 110 | mov %rcx, %r8 |
70 | mov %rsi, %rcx | 111 | mov %rsi, %rcx |
112 | SWITCH_PGT | ||
71 | call *%rdi | 113 | call *%rdi |
114 | RESTORE_PGT | ||
72 | addq $32, %rsp | 115 | addq $32, %rsp |
73 | RESTORE_XMM | 116 | RESTORE_XMM |
74 | ret | 117 | ret |
@@ -80,7 +123,9 @@ ENTRY(efi_call4) | |||
80 | mov %r8, %r9 | 123 | mov %r8, %r9 |
81 | mov %rcx, %r8 | 124 | mov %rcx, %r8 |
82 | mov %rsi, %rcx | 125 | mov %rsi, %rcx |
126 | SWITCH_PGT | ||
83 | call *%rdi | 127 | call *%rdi |
128 | RESTORE_PGT | ||
84 | addq $32, %rsp | 129 | addq $32, %rsp |
85 | RESTORE_XMM | 130 | RESTORE_XMM |
86 | ret | 131 | ret |
@@ -93,7 +138,9 @@ ENTRY(efi_call5) | |||
93 | mov %r8, %r9 | 138 | mov %r8, %r9 |
94 | mov %rcx, %r8 | 139 | mov %rcx, %r8 |
95 | mov %rsi, %rcx | 140 | mov %rsi, %rcx |
141 | SWITCH_PGT | ||
96 | call *%rdi | 142 | call *%rdi |
143 | RESTORE_PGT | ||
97 | addq $48, %rsp | 144 | addq $48, %rsp |
98 | RESTORE_XMM | 145 | RESTORE_XMM |
99 | ret | 146 | ret |
@@ -109,8 +156,15 @@ ENTRY(efi_call6) | |||
109 | mov %r8, %r9 | 156 | mov %r8, %r9 |
110 | mov %rcx, %r8 | 157 | mov %rcx, %r8 |
111 | mov %rsi, %rcx | 158 | mov %rsi, %rcx |
159 | SWITCH_PGT | ||
112 | call *%rdi | 160 | call *%rdi |
161 | RESTORE_PGT | ||
113 | addq $48, %rsp | 162 | addq $48, %rsp |
114 | RESTORE_XMM | 163 | RESTORE_XMM |
115 | ret | 164 | ret |
116 | ENDPROC(efi_call6) | 165 | ENDPROC(efi_call6) |
166 | |||
167 | .data | ||
168 | ENTRY(efi_scratch) | ||
169 | .fill 3,8,0 | ||
170 | .byte 0 | ||
diff --git a/include/linux/efi.h b/include/linux/efi.h index bc5687d0f315..6c0ca528300c 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h | |||
@@ -653,6 +653,7 @@ extern int __init efi_setup_pcdp_console(char *); | |||
653 | #define EFI_RUNTIME_SERVICES 3 /* Can we use runtime services? */ | 653 | #define EFI_RUNTIME_SERVICES 3 /* Can we use runtime services? */ |
654 | #define EFI_MEMMAP 4 /* Can we use EFI memory map? */ | 654 | #define EFI_MEMMAP 4 /* Can we use EFI memory map? */ |
655 | #define EFI_64BIT 5 /* Is the firmware 64-bit? */ | 655 | #define EFI_64BIT 5 /* Is the firmware 64-bit? */ |
656 | #define EFI_ARCH_1 6 /* First arch-specific bit */ | ||
656 | 657 | ||
657 | #ifdef CONFIG_EFI | 658 | #ifdef CONFIG_EFI |
658 | # ifdef CONFIG_X86 | 659 | # ifdef CONFIG_X86 |