aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2013-11-26 06:23:04 -0500
committerIngo Molnar <mingo@kernel.org>2013-11-26 06:23:04 -0500
commit61d066977583803d333f1e7266b8ba772162dda4 (patch)
tree087d56e401422f1a8a165a782216aa6d0291a60e
parentb975dc3689fc6a3718ad288ce080924f9cb7e176 (diff)
parentee41143027706d9f342dfe05487a00b20887fde7 (diff)
Merge tag 'efi-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi into x86/efi
Pull EFI virtual mapping changes from Matt Fleming: * New static EFI runtime services virtual mapping layout which is groundwork for kexec support on EFI. (Borislav Petkov) Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--Documentation/kernel-parameters.txt6
-rw-r--r--Documentation/x86/x86_64/mm.txt7
-rw-r--r--arch/x86/include/asm/efi.h64
-rw-r--r--arch/x86/include/asm/pgtable_types.h3
-rw-r--r--arch/x86/mm/pageattr.c461
-rw-r--r--arch/x86/platform/efi/efi.c111
-rw-r--r--arch/x86/platform/efi/efi_32.c9
-rw-r--r--arch/x86/platform/efi/efi_64.c109
-rw-r--r--arch/x86/platform/efi/efi_stub_64.S54
-rw-r--r--include/linux/efi.h1
10 files changed, 755 insertions, 70 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 50680a59a2ff..e06e99303dd3 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -890,6 +890,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
890 edd= [EDD] 890 edd= [EDD]
891 Format: {"off" | "on" | "skip[mbr]"} 891 Format: {"off" | "on" | "skip[mbr]"}
892 892
893 efi= [EFI]
894 Format: { "old_map" }
895 old_map [X86-64]: switch to the old ioremap-based EFI
896 runtime services mapping. 32-bit still uses this one by
897 default.
898
893 efi_no_storage_paranoia [EFI; X86] 899 efi_no_storage_paranoia [EFI; X86]
894 Using this parameter you can use more than 50% of 900 Using this parameter you can use more than 50% of
895 your efi variable storage. Use this parameter only if 901 your efi variable storage. Use this parameter only if
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 881582f75c9c..c584a51add15 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -28,4 +28,11 @@ reference.
28Current X86-64 implementations only support 40 bits of address space, 28Current X86-64 implementations only support 40 bits of address space,
29but we support up to 46 bits. This expands into MBZ space in the page tables. 29but we support up to 46 bits. This expands into MBZ space in the page tables.
30 30
31->trampoline_pgd:
32
33We map EFI runtime services in the aforementioned PGD in the virtual
34range of 64Gb (arbitrarily set, can be raised if needed)
35
360xffffffef00000000 - 0xffffffff00000000
37
31-Andi Kleen, Jul 2004 38-Andi Kleen, Jul 2004
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 65c6e6e3a552..89a05b0507b9 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -1,6 +1,24 @@
1#ifndef _ASM_X86_EFI_H 1#ifndef _ASM_X86_EFI_H
2#define _ASM_X86_EFI_H 2#define _ASM_X86_EFI_H
3 3
4/*
5 * We map the EFI regions needed for runtime services non-contiguously,
6 * with preserved alignment on virtual addresses starting from -4G down
7 * for a total max space of 64G. This way, we provide for stable runtime
8 * services addresses across kernels so that a kexec'd kernel can still
9 * use them.
10 *
11 * This is the main reason why we're doing stable VA mappings for RT
12 * services.
13 *
14 * This flag is used in conjuction with a chicken bit called
15 * "efi=old_map" which can be used as a fallback to the old runtime
16 * services mapping method in case there's some b0rkage with a
17 * particular EFI implementation (haha, it is hard to hold up the
18 * sarcasm here...).
19 */
20#define EFI_OLD_MEMMAP EFI_ARCH_1
21
4#ifdef CONFIG_X86_32 22#ifdef CONFIG_X86_32
5 23
6#define EFI_LOADER_SIGNATURE "EL32" 24#define EFI_LOADER_SIGNATURE "EL32"
@@ -69,24 +87,31 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
69 efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3), \ 87 efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3), \
70 (u64)(a4), (u64)(a5), (u64)(a6)) 88 (u64)(a4), (u64)(a5), (u64)(a6))
71 89
90#define _efi_call_virtX(x, f, ...) \
91({ \
92 efi_status_t __s; \
93 \
94 efi_sync_low_kernel_mappings(); \
95 preempt_disable(); \
96 __s = efi_call##x((void *)efi.systab->runtime->f, __VA_ARGS__); \
97 preempt_enable(); \
98 __s; \
99})
100
72#define efi_call_virt0(f) \ 101#define efi_call_virt0(f) \
73 efi_call0((efi.systab->runtime->f)) 102 _efi_call_virtX(0, f)
74#define efi_call_virt1(f, a1) \ 103#define efi_call_virt1(f, a1) \
75 efi_call1((efi.systab->runtime->f), (u64)(a1)) 104 _efi_call_virtX(1, f, (u64)(a1))
76#define efi_call_virt2(f, a1, a2) \ 105#define efi_call_virt2(f, a1, a2) \
77 efi_call2((efi.systab->runtime->f), (u64)(a1), (u64)(a2)) 106 _efi_call_virtX(2, f, (u64)(a1), (u64)(a2))
78#define efi_call_virt3(f, a1, a2, a3) \ 107#define efi_call_virt3(f, a1, a2, a3) \
79 efi_call3((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ 108 _efi_call_virtX(3, f, (u64)(a1), (u64)(a2), (u64)(a3))
80 (u64)(a3)) 109#define efi_call_virt4(f, a1, a2, a3, a4) \
81#define efi_call_virt4(f, a1, a2, a3, a4) \ 110 _efi_call_virtX(4, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4))
82 efi_call4((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ 111#define efi_call_virt5(f, a1, a2, a3, a4, a5) \
83 (u64)(a3), (u64)(a4)) 112 _efi_call_virtX(5, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5))
84#define efi_call_virt5(f, a1, a2, a3, a4, a5) \ 113#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
85 efi_call5((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ 114 _efi_call_virtX(6, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
86 (u64)(a3), (u64)(a4), (u64)(a5))
87#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
88 efi_call6((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
89 (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
90 115
91extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, 116extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
92 u32 type, u64 attribute); 117 u32 type, u64 attribute);
@@ -95,12 +120,17 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
95 120
96extern int add_efi_memmap; 121extern int add_efi_memmap;
97extern unsigned long x86_efi_facility; 122extern unsigned long x86_efi_facility;
123extern struct efi_scratch efi_scratch;
98extern void efi_set_executable(efi_memory_desc_t *md, bool executable); 124extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
99extern int efi_memblock_x86_reserve_range(void); 125extern int efi_memblock_x86_reserve_range(void);
100extern void efi_call_phys_prelog(void); 126extern void efi_call_phys_prelog(void);
101extern void efi_call_phys_epilog(void); 127extern void efi_call_phys_epilog(void);
102extern void efi_unmap_memmap(void); 128extern void efi_unmap_memmap(void);
103extern void efi_memory_uc(u64 addr, unsigned long size); 129extern void efi_memory_uc(u64 addr, unsigned long size);
130extern void __init efi_map_region(efi_memory_desc_t *md);
131extern void efi_sync_low_kernel_mappings(void);
132extern void efi_setup_page_tables(void);
133extern void __init old_map_region(efi_memory_desc_t *md);
104 134
105#ifdef CONFIG_EFI 135#ifdef CONFIG_EFI
106 136
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 0ecac257fb26..a83aa44bb1fb 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -382,7 +382,8 @@ static inline void update_page_count(int level, unsigned long pages) { }
382 */ 382 */
383extern pte_t *lookup_address(unsigned long address, unsigned int *level); 383extern pte_t *lookup_address(unsigned long address, unsigned int *level);
384extern phys_addr_t slow_virt_to_phys(void *__address); 384extern phys_addr_t slow_virt_to_phys(void *__address);
385 385extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
386 unsigned numpages, unsigned long page_flags);
386#endif /* !__ASSEMBLY__ */ 387#endif /* !__ASSEMBLY__ */
387 388
388#endif /* _ASM_X86_PGTABLE_DEFS_H */ 389#endif /* _ASM_X86_PGTABLE_DEFS_H */
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index bb32480c2d71..b3b19f46c016 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -30,6 +30,7 @@
30 */ 30 */
31struct cpa_data { 31struct cpa_data {
32 unsigned long *vaddr; 32 unsigned long *vaddr;
33 pgd_t *pgd;
33 pgprot_t mask_set; 34 pgprot_t mask_set;
34 pgprot_t mask_clr; 35 pgprot_t mask_clr;
35 int numpages; 36 int numpages;
@@ -322,17 +323,9 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
322 return prot; 323 return prot;
323} 324}
324 325
325/* 326static pte_t *__lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
326 * Lookup the page table entry for a virtual address. Return a pointer 327 unsigned int *level)
327 * to the entry and the level of the mapping.
328 *
329 * Note: We return pud and pmd either when the entry is marked large
330 * or when the present bit is not set. Otherwise we would return a
331 * pointer to a nonexisting mapping.
332 */
333pte_t *lookup_address(unsigned long address, unsigned int *level)
334{ 328{
335 pgd_t *pgd = pgd_offset_k(address);
336 pud_t *pud; 329 pud_t *pud;
337 pmd_t *pmd; 330 pmd_t *pmd;
338 331
@@ -361,8 +354,31 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)
361 354
362 return pte_offset_kernel(pmd, address); 355 return pte_offset_kernel(pmd, address);
363} 356}
357
358/*
359 * Lookup the page table entry for a virtual address. Return a pointer
360 * to the entry and the level of the mapping.
361 *
362 * Note: We return pud and pmd either when the entry is marked large
363 * or when the present bit is not set. Otherwise we would return a
364 * pointer to a nonexisting mapping.
365 */
366pte_t *lookup_address(unsigned long address, unsigned int *level)
367{
368 return __lookup_address_in_pgd(pgd_offset_k(address), address, level);
369}
364EXPORT_SYMBOL_GPL(lookup_address); 370EXPORT_SYMBOL_GPL(lookup_address);
365 371
372static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
373 unsigned int *level)
374{
375 if (cpa->pgd)
376 return __lookup_address_in_pgd(cpa->pgd + pgd_index(address),
377 address, level);
378
379 return lookup_address(address, level);
380}
381
366/* 382/*
367 * This is necessary because __pa() does not work on some 383 * This is necessary because __pa() does not work on some
368 * kinds of memory, like vmalloc() or the alloc_remap() 384 * kinds of memory, like vmalloc() or the alloc_remap()
@@ -437,7 +453,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
437 * Check for races, another CPU might have split this page 453 * Check for races, another CPU might have split this page
438 * up already: 454 * up already:
439 */ 455 */
440 tmp = lookup_address(address, &level); 456 tmp = _lookup_address_cpa(cpa, address, &level);
441 if (tmp != kpte) 457 if (tmp != kpte)
442 goto out_unlock; 458 goto out_unlock;
443 459
@@ -543,7 +559,8 @@ out_unlock:
543} 559}
544 560
545static int 561static int
546__split_large_page(pte_t *kpte, unsigned long address, struct page *base) 562__split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
563 struct page *base)
547{ 564{
548 pte_t *pbase = (pte_t *)page_address(base); 565 pte_t *pbase = (pte_t *)page_address(base);
549 unsigned long pfn, pfninc = 1; 566 unsigned long pfn, pfninc = 1;
@@ -556,7 +573,7 @@ __split_large_page(pte_t *kpte, unsigned long address, struct page *base)
556 * Check for races, another CPU might have split this page 573 * Check for races, another CPU might have split this page
557 * up for us already: 574 * up for us already:
558 */ 575 */
559 tmp = lookup_address(address, &level); 576 tmp = _lookup_address_cpa(cpa, address, &level);
560 if (tmp != kpte) { 577 if (tmp != kpte) {
561 spin_unlock(&pgd_lock); 578 spin_unlock(&pgd_lock);
562 return 1; 579 return 1;
@@ -632,7 +649,8 @@ __split_large_page(pte_t *kpte, unsigned long address, struct page *base)
632 return 0; 649 return 0;
633} 650}
634 651
635static int split_large_page(pte_t *kpte, unsigned long address) 652static int split_large_page(struct cpa_data *cpa, pte_t *kpte,
653 unsigned long address)
636{ 654{
637 struct page *base; 655 struct page *base;
638 656
@@ -644,15 +662,390 @@ static int split_large_page(pte_t *kpte, unsigned long address)
644 if (!base) 662 if (!base)
645 return -ENOMEM; 663 return -ENOMEM;
646 664
647 if (__split_large_page(kpte, address, base)) 665 if (__split_large_page(cpa, kpte, address, base))
648 __free_page(base); 666 __free_page(base);
649 667
650 return 0; 668 return 0;
651} 669}
652 670
671static bool try_to_free_pte_page(pte_t *pte)
672{
673 int i;
674
675 for (i = 0; i < PTRS_PER_PTE; i++)
676 if (!pte_none(pte[i]))
677 return false;
678
679 free_page((unsigned long)pte);
680 return true;
681}
682
683static bool try_to_free_pmd_page(pmd_t *pmd)
684{
685 int i;
686
687 for (i = 0; i < PTRS_PER_PMD; i++)
688 if (!pmd_none(pmd[i]))
689 return false;
690
691 free_page((unsigned long)pmd);
692 return true;
693}
694
695static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
696{
697 pte_t *pte = pte_offset_kernel(pmd, start);
698
699 while (start < end) {
700 set_pte(pte, __pte(0));
701
702 start += PAGE_SIZE;
703 pte++;
704 }
705
706 if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) {
707 pmd_clear(pmd);
708 return true;
709 }
710 return false;
711}
712
713static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd,
714 unsigned long start, unsigned long end)
715{
716 if (unmap_pte_range(pmd, start, end))
717 if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
718 pud_clear(pud);
719}
720
721static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
722{
723 pmd_t *pmd = pmd_offset(pud, start);
724
725 /*
726 * Not on a 2MB page boundary?
727 */
728 if (start & (PMD_SIZE - 1)) {
729 unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
730 unsigned long pre_end = min_t(unsigned long, end, next_page);
731
732 __unmap_pmd_range(pud, pmd, start, pre_end);
733
734 start = pre_end;
735 pmd++;
736 }
737
738 /*
739 * Try to unmap in 2M chunks.
740 */
741 while (end - start >= PMD_SIZE) {
742 if (pmd_large(*pmd))
743 pmd_clear(pmd);
744 else
745 __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE);
746
747 start += PMD_SIZE;
748 pmd++;
749 }
750
751 /*
752 * 4K leftovers?
753 */
754 if (start < end)
755 return __unmap_pmd_range(pud, pmd, start, end);
756
757 /*
758 * Try again to free the PMD page if haven't succeeded above.
759 */
760 if (!pud_none(*pud))
761 if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
762 pud_clear(pud);
763}
764
765static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
766{
767 pud_t *pud = pud_offset(pgd, start);
768
769 /*
770 * Not on a GB page boundary?
771 */
772 if (start & (PUD_SIZE - 1)) {
773 unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
774 unsigned long pre_end = min_t(unsigned long, end, next_page);
775
776 unmap_pmd_range(pud, start, pre_end);
777
778 start = pre_end;
779 pud++;
780 }
781
782 /*
783 * Try to unmap in 1G chunks?
784 */
785 while (end - start >= PUD_SIZE) {
786
787 if (pud_large(*pud))
788 pud_clear(pud);
789 else
790 unmap_pmd_range(pud, start, start + PUD_SIZE);
791
792 start += PUD_SIZE;
793 pud++;
794 }
795
796 /*
797 * 2M leftovers?
798 */
799 if (start < end)
800 unmap_pmd_range(pud, start, end);
801
802 /*
803 * No need to try to free the PUD page because we'll free it in
804 * populate_pgd's error path
805 */
806}
807
808static int alloc_pte_page(pmd_t *pmd)
809{
810 pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
811 if (!pte)
812 return -1;
813
814 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
815 return 0;
816}
817
818static int alloc_pmd_page(pud_t *pud)
819{
820 pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
821 if (!pmd)
822 return -1;
823
824 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
825 return 0;
826}
827
828static void populate_pte(struct cpa_data *cpa,
829 unsigned long start, unsigned long end,
830 unsigned num_pages, pmd_t *pmd, pgprot_t pgprot)
831{
832 pte_t *pte;
833
834 pte = pte_offset_kernel(pmd, start);
835
836 while (num_pages-- && start < end) {
837
838 /* deal with the NX bit */
839 if (!(pgprot_val(pgprot) & _PAGE_NX))
840 cpa->pfn &= ~_PAGE_NX;
841
842 set_pte(pte, pfn_pte(cpa->pfn >> PAGE_SHIFT, pgprot));
843
844 start += PAGE_SIZE;
845 cpa->pfn += PAGE_SIZE;
846 pte++;
847 }
848}
849
850static int populate_pmd(struct cpa_data *cpa,
851 unsigned long start, unsigned long end,
852 unsigned num_pages, pud_t *pud, pgprot_t pgprot)
853{
854 unsigned int cur_pages = 0;
855 pmd_t *pmd;
856
857 /*
858 * Not on a 2M boundary?
859 */
860 if (start & (PMD_SIZE - 1)) {
861 unsigned long pre_end = start + (num_pages << PAGE_SHIFT);
862 unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
863
864 pre_end = min_t(unsigned long, pre_end, next_page);
865 cur_pages = (pre_end - start) >> PAGE_SHIFT;
866 cur_pages = min_t(unsigned int, num_pages, cur_pages);
867
868 /*
869 * Need a PTE page?
870 */
871 pmd = pmd_offset(pud, start);
872 if (pmd_none(*pmd))
873 if (alloc_pte_page(pmd))
874 return -1;
875
876 populate_pte(cpa, start, pre_end, cur_pages, pmd, pgprot);
877
878 start = pre_end;
879 }
880
881 /*
882 * We mapped them all?
883 */
884 if (num_pages == cur_pages)
885 return cur_pages;
886
887 while (end - start >= PMD_SIZE) {
888
889 /*
890 * We cannot use a 1G page so allocate a PMD page if needed.
891 */
892 if (pud_none(*pud))
893 if (alloc_pmd_page(pud))
894 return -1;
895
896 pmd = pmd_offset(pud, start);
897
898 set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot)));
899
900 start += PMD_SIZE;
901 cpa->pfn += PMD_SIZE;
902 cur_pages += PMD_SIZE >> PAGE_SHIFT;
903 }
904
905 /*
906 * Map trailing 4K pages.
907 */
908 if (start < end) {
909 pmd = pmd_offset(pud, start);
910 if (pmd_none(*pmd))
911 if (alloc_pte_page(pmd))
912 return -1;
913
914 populate_pte(cpa, start, end, num_pages - cur_pages,
915 pmd, pgprot);
916 }
917 return num_pages;
918}
919
920static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
921 pgprot_t pgprot)
922{
923 pud_t *pud;
924 unsigned long end;
925 int cur_pages = 0;
926
927 end = start + (cpa->numpages << PAGE_SHIFT);
928
929 /*
930 * Not on a Gb page boundary? => map everything up to it with
931 * smaller pages.
932 */
933 if (start & (PUD_SIZE - 1)) {
934 unsigned long pre_end;
935 unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
936
937 pre_end = min_t(unsigned long, end, next_page);
938 cur_pages = (pre_end - start) >> PAGE_SHIFT;
939 cur_pages = min_t(int, (int)cpa->numpages, cur_pages);
940
941 pud = pud_offset(pgd, start);
942
943 /*
944 * Need a PMD page?
945 */
946 if (pud_none(*pud))
947 if (alloc_pmd_page(pud))
948 return -1;
949
950 cur_pages = populate_pmd(cpa, start, pre_end, cur_pages,
951 pud, pgprot);
952 if (cur_pages < 0)
953 return cur_pages;
954
955 start = pre_end;
956 }
957
958 /* We mapped them all? */
959 if (cpa->numpages == cur_pages)
960 return cur_pages;
961
962 pud = pud_offset(pgd, start);
963
964 /*
965 * Map everything starting from the Gb boundary, possibly with 1G pages
966 */
967 while (end - start >= PUD_SIZE) {
968 set_pud(pud, __pud(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot)));
969
970 start += PUD_SIZE;
971 cpa->pfn += PUD_SIZE;
972 cur_pages += PUD_SIZE >> PAGE_SHIFT;
973 pud++;
974 }
975
976 /* Map trailing leftover */
977 if (start < end) {
978 int tmp;
979
980 pud = pud_offset(pgd, start);
981 if (pud_none(*pud))
982 if (alloc_pmd_page(pud))
983 return -1;
984
985 tmp = populate_pmd(cpa, start, end, cpa->numpages - cur_pages,
986 pud, pgprot);
987 if (tmp < 0)
988 return cur_pages;
989
990 cur_pages += tmp;
991 }
992 return cur_pages;
993}
994
995/*
996 * Restrictions for kernel page table do not necessarily apply when mapping in
997 * an alternate PGD.
998 */
999static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
1000{
1001 pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
1002 bool allocd_pgd = false;
1003 pgd_t *pgd_entry;
1004 pud_t *pud = NULL; /* shut up gcc */
1005 int ret;
1006
1007 pgd_entry = cpa->pgd + pgd_index(addr);
1008
1009 /*
1010 * Allocate a PUD page and hand it down for mapping.
1011 */
1012 if (pgd_none(*pgd_entry)) {
1013 pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
1014 if (!pud)
1015 return -1;
1016
1017 set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE));
1018 allocd_pgd = true;
1019 }
1020
1021 pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
1022 pgprot_val(pgprot) |= pgprot_val(cpa->mask_set);
1023
1024 ret = populate_pud(cpa, addr, pgd_entry, pgprot);
1025 if (ret < 0) {
1026 unmap_pud_range(pgd_entry, addr,
1027 addr + (cpa->numpages << PAGE_SHIFT));
1028
1029 if (allocd_pgd) {
1030 /*
1031 * If I allocated this PUD page, I can just as well
1032 * free it in this error path.
1033 */
1034 pgd_clear(pgd_entry);
1035 free_page((unsigned long)pud);
1036 }
1037 return ret;
1038 }
1039 cpa->numpages = ret;
1040 return 0;
1041}
1042
653static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, 1043static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
654 int primary) 1044 int primary)
655{ 1045{
1046 if (cpa->pgd)
1047 return populate_pgd(cpa, vaddr);
1048
656 /* 1049 /*
657 * Ignore all non primary paths. 1050 * Ignore all non primary paths.
658 */ 1051 */
@@ -697,7 +1090,7 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
697 else 1090 else
698 address = *cpa->vaddr; 1091 address = *cpa->vaddr;
699repeat: 1092repeat:
700 kpte = lookup_address(address, &level); 1093 kpte = _lookup_address_cpa(cpa, address, &level);
701 if (!kpte) 1094 if (!kpte)
702 return __cpa_process_fault(cpa, address, primary); 1095 return __cpa_process_fault(cpa, address, primary);
703 1096
@@ -761,7 +1154,7 @@ repeat:
761 /* 1154 /*
762 * We have to split the large page: 1155 * We have to split the large page:
763 */ 1156 */
764 err = split_large_page(kpte, address); 1157 err = split_large_page(cpa, kpte, address);
765 if (!err) { 1158 if (!err) {
766 /* 1159 /*
767 * Do a global flush tlb after splitting the large page 1160 * Do a global flush tlb after splitting the large page
@@ -910,6 +1303,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
910 int ret, cache, checkalias; 1303 int ret, cache, checkalias;
911 unsigned long baddr = 0; 1304 unsigned long baddr = 0;
912 1305
1306 memset(&cpa, 0, sizeof(cpa));
1307
913 /* 1308 /*
914 * Check, if we are requested to change a not supported 1309 * Check, if we are requested to change a not supported
915 * feature: 1310 * feature:
@@ -1356,6 +1751,7 @@ static int __set_pages_p(struct page *page, int numpages)
1356{ 1751{
1357 unsigned long tempaddr = (unsigned long) page_address(page); 1752 unsigned long tempaddr = (unsigned long) page_address(page);
1358 struct cpa_data cpa = { .vaddr = &tempaddr, 1753 struct cpa_data cpa = { .vaddr = &tempaddr,
1754 .pgd = NULL,
1359 .numpages = numpages, 1755 .numpages = numpages,
1360 .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), 1756 .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
1361 .mask_clr = __pgprot(0), 1757 .mask_clr = __pgprot(0),
@@ -1374,6 +1770,7 @@ static int __set_pages_np(struct page *page, int numpages)
1374{ 1770{
1375 unsigned long tempaddr = (unsigned long) page_address(page); 1771 unsigned long tempaddr = (unsigned long) page_address(page);
1376 struct cpa_data cpa = { .vaddr = &tempaddr, 1772 struct cpa_data cpa = { .vaddr = &tempaddr,
1773 .pgd = NULL,
1377 .numpages = numpages, 1774 .numpages = numpages,
1378 .mask_set = __pgprot(0), 1775 .mask_set = __pgprot(0),
1379 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), 1776 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
@@ -1434,6 +1831,36 @@ bool kernel_page_present(struct page *page)
1434 1831
1435#endif /* CONFIG_DEBUG_PAGEALLOC */ 1832#endif /* CONFIG_DEBUG_PAGEALLOC */
1436 1833
1834int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
1835 unsigned numpages, unsigned long page_flags)
1836{
1837 int retval = -EINVAL;
1838
1839 struct cpa_data cpa = {
1840 .vaddr = &address,
1841 .pfn = pfn,
1842 .pgd = pgd,
1843 .numpages = numpages,
1844 .mask_set = __pgprot(0),
1845 .mask_clr = __pgprot(0),
1846 .flags = 0,
1847 };
1848
1849 if (!(__supported_pte_mask & _PAGE_NX))
1850 goto out;
1851
1852 if (!(page_flags & _PAGE_NX))
1853 cpa.mask_clr = __pgprot(_PAGE_NX);
1854
1855 cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags);
1856
1857 retval = __change_page_attr_set_clr(&cpa, 0);
1858 __flush_tlb_all();
1859
1860out:
1861 return retval;
1862}
1863
1437/* 1864/*
1438 * The testcases use internal knowledge of the implementation that shouldn't 1865 * The testcases use internal knowledge of the implementation that shouldn't
1439 * be exposed to the rest of the kernel. Include these directly here. 1866 * be exposed to the rest of the kernel. Include these directly here.
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 92c02344a060..f8ec4dafc74e 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -12,6 +12,8 @@
12 * Bibo Mao <bibo.mao@intel.com> 12 * Bibo Mao <bibo.mao@intel.com>
13 * Chandramouli Narayanan <mouli@linux.intel.com> 13 * Chandramouli Narayanan <mouli@linux.intel.com>
14 * Huang Ying <ying.huang@intel.com> 14 * Huang Ying <ying.huang@intel.com>
15 * Copyright (C) 2013 SuSE Labs
16 * Borislav Petkov <bp@suse.de> - runtime services VA mapping
15 * 17 *
16 * Copied from efi_32.c to eliminate the duplicated code between EFI 18 * Copied from efi_32.c to eliminate the duplicated code between EFI
17 * 32/64 support code. --ying 2007-10-26 19 * 32/64 support code. --ying 2007-10-26
@@ -51,7 +53,7 @@
51#include <asm/x86_init.h> 53#include <asm/x86_init.h>
52#include <asm/rtc.h> 54#include <asm/rtc.h>
53 55
54#define EFI_DEBUG 1 56#define EFI_DEBUG
55 57
56#define EFI_MIN_RESERVE 5120 58#define EFI_MIN_RESERVE 5120
57 59
@@ -398,9 +400,9 @@ int __init efi_memblock_x86_reserve_range(void)
398 return 0; 400 return 0;
399} 401}
400 402
401#if EFI_DEBUG
402static void __init print_efi_memmap(void) 403static void __init print_efi_memmap(void)
403{ 404{
405#ifdef EFI_DEBUG
404 efi_memory_desc_t *md; 406 efi_memory_desc_t *md;
405 void *p; 407 void *p;
406 int i; 408 int i;
@@ -415,8 +417,8 @@ static void __init print_efi_memmap(void)
415 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), 417 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
416 (md->num_pages >> (20 - EFI_PAGE_SHIFT))); 418 (md->num_pages >> (20 - EFI_PAGE_SHIFT)));
417 } 419 }
418}
419#endif /* EFI_DEBUG */ 420#endif /* EFI_DEBUG */
421}
420 422
421void __init efi_reserve_boot_services(void) 423void __init efi_reserve_boot_services(void)
422{ 424{
@@ -696,10 +698,7 @@ void __init efi_init(void)
696 x86_platform.set_wallclock = efi_set_rtc_mmss; 698 x86_platform.set_wallclock = efi_set_rtc_mmss;
697 } 699 }
698#endif 700#endif
699
700#if EFI_DEBUG
701 print_efi_memmap(); 701 print_efi_memmap();
702#endif
703} 702}
704 703
705void __init efi_late_init(void) 704void __init efi_late_init(void)
@@ -748,21 +747,56 @@ void efi_memory_uc(u64 addr, unsigned long size)
748 set_memory_uc(addr, npages); 747 set_memory_uc(addr, npages);
749} 748}
750 749
750void __init old_map_region(efi_memory_desc_t *md)
751{
752 u64 start_pfn, end_pfn, end;
753 unsigned long size;
754 void *va;
755
756 start_pfn = PFN_DOWN(md->phys_addr);
757 size = md->num_pages << PAGE_SHIFT;
758 end = md->phys_addr + size;
759 end_pfn = PFN_UP(end);
760
761 if (pfn_range_is_mapped(start_pfn, end_pfn)) {
762 va = __va(md->phys_addr);
763
764 if (!(md->attribute & EFI_MEMORY_WB))
765 efi_memory_uc((u64)(unsigned long)va, size);
766 } else
767 va = efi_ioremap(md->phys_addr, size,
768 md->type, md->attribute);
769
770 md->virt_addr = (u64) (unsigned long) va;
771 if (!va)
772 pr_err("ioremap of 0x%llX failed!\n",
773 (unsigned long long)md->phys_addr);
774}
775
751/* 776/*
752 * This function will switch the EFI runtime services to virtual mode. 777 * This function will switch the EFI runtime services to virtual mode.
753 * Essentially, look through the EFI memmap and map every region that 778 * Essentially, we look through the EFI memmap and map every region that
754 * has the runtime attribute bit set in its memory descriptor and update 779 * has the runtime attribute bit set in its memory descriptor into the
755 * that memory descriptor with the virtual address obtained from ioremap(). 780 * ->trampoline_pgd page table using a top-down VA allocation scheme.
756 * This enables the runtime services to be called without having to 781 *
782 * The old method which used to update that memory descriptor with the
783 * virtual address obtained from ioremap() is still supported when the
784 * kernel is booted with efi=old_map on its command line. Same old
785 * method enabled the runtime services to be called without having to
757 * thunk back into physical mode for every invocation. 786 * thunk back into physical mode for every invocation.
787 *
788 * The new method does a pagetable switch in a preemption-safe manner
789 * so that we're in a different address space when calling a runtime
790 * function. For function arguments passing we do copy the PGDs of the
791 * kernel page table into ->trampoline_pgd prior to each call.
758 */ 792 */
759void __init efi_enter_virtual_mode(void) 793void __init efi_enter_virtual_mode(void)
760{ 794{
761 efi_memory_desc_t *md, *prev_md = NULL; 795 efi_memory_desc_t *md, *prev_md = NULL;
762 efi_status_t status; 796 void *p, *new_memmap = NULL;
763 unsigned long size; 797 unsigned long size;
764 u64 end, systab, start_pfn, end_pfn; 798 efi_status_t status;
765 void *p, *va, *new_memmap = NULL; 799 u64 end, systab;
766 int count = 0; 800 int count = 0;
767 801
768 efi.systab = NULL; 802 efi.systab = NULL;
@@ -771,7 +805,6 @@ void __init efi_enter_virtual_mode(void)
771 * We don't do virtual mode, since we don't do runtime services, on 805 * We don't do virtual mode, since we don't do runtime services, on
772 * non-native EFI 806 * non-native EFI
773 */ 807 */
774
775 if (!efi_is_native()) { 808 if (!efi_is_native()) {
776 efi_unmap_memmap(); 809 efi_unmap_memmap();
777 return; 810 return;
@@ -802,6 +835,7 @@ void __init efi_enter_virtual_mode(void)
802 continue; 835 continue;
803 } 836 }
804 prev_md = md; 837 prev_md = md;
838
805 } 839 }
806 840
807 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 841 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
@@ -814,36 +848,24 @@ void __init efi_enter_virtual_mode(void)
814 continue; 848 continue;
815 } 849 }
816 850
851 efi_map_region(md);
852
817 size = md->num_pages << EFI_PAGE_SHIFT; 853 size = md->num_pages << EFI_PAGE_SHIFT;
818 end = md->phys_addr + size; 854 end = md->phys_addr + size;
819 855
820 start_pfn = PFN_DOWN(md->phys_addr);
821 end_pfn = PFN_UP(end);
822 if (pfn_range_is_mapped(start_pfn, end_pfn)) {
823 va = __va(md->phys_addr);
824
825 if (!(md->attribute & EFI_MEMORY_WB))
826 efi_memory_uc((u64)(unsigned long)va, size);
827 } else
828 va = efi_ioremap(md->phys_addr, size,
829 md->type, md->attribute);
830
831 md->virt_addr = (u64) (unsigned long) va;
832
833 if (!va) {
834 pr_err("ioremap of 0x%llX failed!\n",
835 (unsigned long long)md->phys_addr);
836 continue;
837 }
838
839 systab = (u64) (unsigned long) efi_phys.systab; 856 systab = (u64) (unsigned long) efi_phys.systab;
840 if (md->phys_addr <= systab && systab < end) { 857 if (md->phys_addr <= systab && systab < end) {
841 systab += md->virt_addr - md->phys_addr; 858 systab += md->virt_addr - md->phys_addr;
859
842 efi.systab = (efi_system_table_t *) (unsigned long) systab; 860 efi.systab = (efi_system_table_t *) (unsigned long) systab;
843 } 861 }
862
844 new_memmap = krealloc(new_memmap, 863 new_memmap = krealloc(new_memmap,
845 (count + 1) * memmap.desc_size, 864 (count + 1) * memmap.desc_size,
846 GFP_KERNEL); 865 GFP_KERNEL);
866 if (!new_memmap)
867 goto err_out;
868
847 memcpy(new_memmap + (count * memmap.desc_size), md, 869 memcpy(new_memmap + (count * memmap.desc_size), md,
848 memmap.desc_size); 870 memmap.desc_size);
849 count++; 871 count++;
@@ -851,6 +873,9 @@ void __init efi_enter_virtual_mode(void)
851 873
852 BUG_ON(!efi.systab); 874 BUG_ON(!efi.systab);
853 875
876 efi_setup_page_tables();
877 efi_sync_low_kernel_mappings();
878
854 status = phys_efi_set_virtual_address_map( 879 status = phys_efi_set_virtual_address_map(
855 memmap.desc_size * count, 880 memmap.desc_size * count,
856 memmap.desc_size, 881 memmap.desc_size,
@@ -883,7 +908,8 @@ void __init efi_enter_virtual_mode(void)
883 efi.query_variable_info = virt_efi_query_variable_info; 908 efi.query_variable_info = virt_efi_query_variable_info;
884 efi.update_capsule = virt_efi_update_capsule; 909 efi.update_capsule = virt_efi_update_capsule;
885 efi.query_capsule_caps = virt_efi_query_capsule_caps; 910 efi.query_capsule_caps = virt_efi_query_capsule_caps;
886 if (__supported_pte_mask & _PAGE_NX) 911
912 if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX))
887 runtime_code_page_mkexec(); 913 runtime_code_page_mkexec();
888 914
889 kfree(new_memmap); 915 kfree(new_memmap);
@@ -894,6 +920,11 @@ void __init efi_enter_virtual_mode(void)
894 EFI_VARIABLE_BOOTSERVICE_ACCESS | 920 EFI_VARIABLE_BOOTSERVICE_ACCESS |
895 EFI_VARIABLE_RUNTIME_ACCESS, 921 EFI_VARIABLE_RUNTIME_ACCESS,
896 0, NULL); 922 0, NULL);
923
924 return;
925
926 err_out:
927 pr_err("Error reallocating memory, EFI runtime non-functional!\n");
897} 928}
898 929
899/* 930/*
@@ -1013,3 +1044,15 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)
1013 return EFI_SUCCESS; 1044 return EFI_SUCCESS;
1014} 1045}
1015EXPORT_SYMBOL_GPL(efi_query_variable_store); 1046EXPORT_SYMBOL_GPL(efi_query_variable_store);
1047
1048static int __init parse_efi_cmdline(char *str)
1049{
1050 if (*str == '=')
1051 str++;
1052
1053 if (!strncmp(str, "old_map", 7))
1054 set_bit(EFI_OLD_MEMMAP, &x86_efi_facility);
1055
1056 return 0;
1057}
1058early_param("efi", parse_efi_cmdline);
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 40e446941dd7..e94557cf5487 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -37,9 +37,16 @@
37 * claim EFI runtime service handler exclusively and to duplicate a memory in 37 * claim EFI runtime service handler exclusively and to duplicate a memory in
38 * low memory space say 0 - 3G. 38 * low memory space say 0 - 3G.
39 */ 39 */
40
41static unsigned long efi_rt_eflags; 40static unsigned long efi_rt_eflags;
42 41
42void efi_sync_low_kernel_mappings(void) {}
43void efi_setup_page_tables(void) {}
44
45void __init efi_map_region(efi_memory_desc_t *md)
46{
47 old_map_region(md);
48}
49
43void efi_call_phys_prelog(void) 50void efi_call_phys_prelog(void)
44{ 51{
45 struct desc_ptr gdt_descr; 52 struct desc_ptr gdt_descr;
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 39a0e7f1f0a3..bf286c386d33 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -38,10 +38,28 @@
38#include <asm/efi.h> 38#include <asm/efi.h>
39#include <asm/cacheflush.h> 39#include <asm/cacheflush.h>
40#include <asm/fixmap.h> 40#include <asm/fixmap.h>
41#include <asm/realmode.h>
41 42
42static pgd_t *save_pgd __initdata; 43static pgd_t *save_pgd __initdata;
43static unsigned long efi_flags __initdata; 44static unsigned long efi_flags __initdata;
44 45
46/*
47 * We allocate runtime services regions bottom-up, starting from -4G, i.e.
48 * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G.
49 */
50static u64 efi_va = -4 * (1UL << 30);
51#define EFI_VA_END (-68 * (1UL << 30))
52
53/*
54 * Scratch space used for switching the pagetable in the EFI stub
55 */
56struct efi_scratch {
57 u64 r15;
58 u64 prev_cr3;
59 pgd_t *efi_pgt;
60 bool use_pgd;
61};
62
45static void __init early_code_mapping_set_exec(int executable) 63static void __init early_code_mapping_set_exec(int executable)
46{ 64{
47 efi_memory_desc_t *md; 65 efi_memory_desc_t *md;
@@ -65,6 +83,9 @@ void __init efi_call_phys_prelog(void)
65 int pgd; 83 int pgd;
66 int n_pgds; 84 int n_pgds;
67 85
86 if (!efi_enabled(EFI_OLD_MEMMAP))
87 return;
88
68 early_code_mapping_set_exec(1); 89 early_code_mapping_set_exec(1);
69 local_irq_save(efi_flags); 90 local_irq_save(efi_flags);
70 91
@@ -86,6 +107,10 @@ void __init efi_call_phys_epilog(void)
86 */ 107 */
87 int pgd; 108 int pgd;
88 int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); 109 int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
110
111 if (!efi_enabled(EFI_OLD_MEMMAP))
112 return;
113
89 for (pgd = 0; pgd < n_pgds; pgd++) 114 for (pgd = 0; pgd < n_pgds; pgd++)
90 set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]); 115 set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]);
91 kfree(save_pgd); 116 kfree(save_pgd);
@@ -94,6 +119,90 @@ void __init efi_call_phys_epilog(void)
94 early_code_mapping_set_exec(0); 119 early_code_mapping_set_exec(0);
95} 120}
96 121
122/*
123 * Add low kernel mappings for passing arguments to EFI functions.
124 */
125void efi_sync_low_kernel_mappings(void)
126{
127 unsigned num_pgds;
128 pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
129
130 if (efi_enabled(EFI_OLD_MEMMAP))
131 return;
132
133 num_pgds = pgd_index(MODULES_END - 1) - pgd_index(PAGE_OFFSET);
134
135 memcpy(pgd + pgd_index(PAGE_OFFSET),
136 init_mm.pgd + pgd_index(PAGE_OFFSET),
137 sizeof(pgd_t) * num_pgds);
138}
139
140void efi_setup_page_tables(void)
141{
142 efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd;
143
144 if (!efi_enabled(EFI_OLD_MEMMAP))
145 efi_scratch.use_pgd = true;
146}
147
148static void __init __map_region(efi_memory_desc_t *md, u64 va)
149{
150 pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
151 unsigned long pf = 0, size;
152 u64 end;
153
154 if (!(md->attribute & EFI_MEMORY_WB))
155 pf |= _PAGE_PCD;
156
157 size = md->num_pages << PAGE_SHIFT;
158 end = va + size;
159
160 if (kernel_map_pages_in_pgd(pgd, md->phys_addr, va, md->num_pages, pf))
161 pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n",
162 md->phys_addr, va);
163}
164
165void __init efi_map_region(efi_memory_desc_t *md)
166{
167 unsigned long size = md->num_pages << PAGE_SHIFT;
168 u64 pa = md->phys_addr;
169
170 if (efi_enabled(EFI_OLD_MEMMAP))
171 return old_map_region(md);
172
173 /*
174 * Make sure the 1:1 mappings are present as a catch-all for b0rked
175 * firmware which doesn't update all internal pointers after switching
176 * to virtual mode and would otherwise crap on us.
177 */
178 __map_region(md, md->phys_addr);
179
180 efi_va -= size;
181
182 /* Is PA 2M-aligned? */
183 if (!(pa & (PMD_SIZE - 1))) {
184 efi_va &= PMD_MASK;
185 } else {
186 u64 pa_offset = pa & (PMD_SIZE - 1);
187 u64 prev_va = efi_va;
188
189 /* get us the same offset within this 2M page */
190 efi_va = (efi_va & PMD_MASK) + pa_offset;
191
192 if (efi_va > prev_va)
193 efi_va -= PMD_SIZE;
194 }
195
196 if (efi_va < EFI_VA_END) {
197 pr_warn(FW_WARN "VA address range overflow!\n");
198 return;
199 }
200
201 /* Do the VA map */
202 __map_region(md, efi_va);
203 md->virt_addr = efi_va;
204}
205
97void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, 206void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
98 u32 type, u64 attribute) 207 u32 type, u64 attribute)
99{ 208{
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
index 4c07ccab8146..88073b140298 100644
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -34,10 +34,47 @@
34 mov %rsi, %cr0; \ 34 mov %rsi, %cr0; \
35 mov (%rsp), %rsp 35 mov (%rsp), %rsp
36 36
37 /* stolen from gcc */
38 .macro FLUSH_TLB_ALL
39 movq %r15, efi_scratch(%rip)
40 movq %r14, efi_scratch+8(%rip)
41 movq %cr4, %r15
42 movq %r15, %r14
43 andb $0x7f, %r14b
44 movq %r14, %cr4
45 movq %r15, %cr4
46 movq efi_scratch+8(%rip), %r14
47 movq efi_scratch(%rip), %r15
48 .endm
49
50 .macro SWITCH_PGT
51 cmpb $0, efi_scratch+24(%rip)
52 je 1f
53 movq %r15, efi_scratch(%rip) # r15
54 # save previous CR3
55 movq %cr3, %r15
56 movq %r15, efi_scratch+8(%rip) # prev_cr3
57 movq efi_scratch+16(%rip), %r15 # EFI pgt
58 movq %r15, %cr3
59 1:
60 .endm
61
62 .macro RESTORE_PGT
63 cmpb $0, efi_scratch+24(%rip)
64 je 2f
65 movq efi_scratch+8(%rip), %r15
66 movq %r15, %cr3
67 movq efi_scratch(%rip), %r15
68 FLUSH_TLB_ALL
69 2:
70 .endm
71
37ENTRY(efi_call0) 72ENTRY(efi_call0)
38 SAVE_XMM 73 SAVE_XMM
39 subq $32, %rsp 74 subq $32, %rsp
75 SWITCH_PGT
40 call *%rdi 76 call *%rdi
77 RESTORE_PGT
41 addq $32, %rsp 78 addq $32, %rsp
42 RESTORE_XMM 79 RESTORE_XMM
43 ret 80 ret
@@ -47,7 +84,9 @@ ENTRY(efi_call1)
47 SAVE_XMM 84 SAVE_XMM
48 subq $32, %rsp 85 subq $32, %rsp
49 mov %rsi, %rcx 86 mov %rsi, %rcx
87 SWITCH_PGT
50 call *%rdi 88 call *%rdi
89 RESTORE_PGT
51 addq $32, %rsp 90 addq $32, %rsp
52 RESTORE_XMM 91 RESTORE_XMM
53 ret 92 ret
@@ -57,7 +96,9 @@ ENTRY(efi_call2)
57 SAVE_XMM 96 SAVE_XMM
58 subq $32, %rsp 97 subq $32, %rsp
59 mov %rsi, %rcx 98 mov %rsi, %rcx
99 SWITCH_PGT
60 call *%rdi 100 call *%rdi
101 RESTORE_PGT
61 addq $32, %rsp 102 addq $32, %rsp
62 RESTORE_XMM 103 RESTORE_XMM
63 ret 104 ret
@@ -68,7 +109,9 @@ ENTRY(efi_call3)
68 subq $32, %rsp 109 subq $32, %rsp
69 mov %rcx, %r8 110 mov %rcx, %r8
70 mov %rsi, %rcx 111 mov %rsi, %rcx
112 SWITCH_PGT
71 call *%rdi 113 call *%rdi
114 RESTORE_PGT
72 addq $32, %rsp 115 addq $32, %rsp
73 RESTORE_XMM 116 RESTORE_XMM
74 ret 117 ret
@@ -80,7 +123,9 @@ ENTRY(efi_call4)
80 mov %r8, %r9 123 mov %r8, %r9
81 mov %rcx, %r8 124 mov %rcx, %r8
82 mov %rsi, %rcx 125 mov %rsi, %rcx
126 SWITCH_PGT
83 call *%rdi 127 call *%rdi
128 RESTORE_PGT
84 addq $32, %rsp 129 addq $32, %rsp
85 RESTORE_XMM 130 RESTORE_XMM
86 ret 131 ret
@@ -93,7 +138,9 @@ ENTRY(efi_call5)
93 mov %r8, %r9 138 mov %r8, %r9
94 mov %rcx, %r8 139 mov %rcx, %r8
95 mov %rsi, %rcx 140 mov %rsi, %rcx
141 SWITCH_PGT
96 call *%rdi 142 call *%rdi
143 RESTORE_PGT
97 addq $48, %rsp 144 addq $48, %rsp
98 RESTORE_XMM 145 RESTORE_XMM
99 ret 146 ret
@@ -109,8 +156,15 @@ ENTRY(efi_call6)
109 mov %r8, %r9 156 mov %r8, %r9
110 mov %rcx, %r8 157 mov %rcx, %r8
111 mov %rsi, %rcx 158 mov %rsi, %rcx
159 SWITCH_PGT
112 call *%rdi 160 call *%rdi
161 RESTORE_PGT
113 addq $48, %rsp 162 addq $48, %rsp
114 RESTORE_XMM 163 RESTORE_XMM
115 ret 164 ret
116ENDPROC(efi_call6) 165ENDPROC(efi_call6)
166
167 .data
168ENTRY(efi_scratch)
169 .fill 3,8,0
170 .byte 0
diff --git a/include/linux/efi.h b/include/linux/efi.h
index bc5687d0f315..6c0ca528300c 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -653,6 +653,7 @@ extern int __init efi_setup_pcdp_console(char *);
653#define EFI_RUNTIME_SERVICES 3 /* Can we use runtime services? */ 653#define EFI_RUNTIME_SERVICES 3 /* Can we use runtime services? */
654#define EFI_MEMMAP 4 /* Can we use EFI memory map? */ 654#define EFI_MEMMAP 4 /* Can we use EFI memory map? */
655#define EFI_64BIT 5 /* Is the firmware 64-bit? */ 655#define EFI_64BIT 5 /* Is the firmware 64-bit? */
656#define EFI_ARCH_1 6 /* First arch-specific bit */
656 657
657#ifdef CONFIG_EFI 658#ifdef CONFIG_EFI
658# ifdef CONFIG_X86 659# ifdef CONFIG_X86