diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-20 15:05:30 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-20 15:05:30 -0500 |
commit | 972d5e7e5b66f5a143026fcdd4b2be2f519c0f12 (patch) | |
tree | 6c1c5bb79fe163b3b48254605b54532099b74cff /arch/x86/platform | |
parent | 5d4863e4cc4dc12d1d5e42da3cb5d38c535e4ad6 (diff) | |
parent | ef0b8b9a521c65201bfca9747ee1bf374296133c (diff) |
Merge branch 'x86-efi-kexec-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 EFI changes from Ingo Molnar:
"This consists of two main parts:
- New static EFI runtime services virtual mapping layout which is
groundwork for kexec support on EFI (Borislav Petkov)
- EFI kexec support itself (Dave Young)"
* 'x86-efi-kexec-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
x86/efi: parse_efi_setup() build fix
x86: ksysfs.c build fix
x86/efi: Delete superfluous global variables
x86: Reserve setup_data ranges late after parsing memmap cmdline
x86: Export x86 boot_params to sysfs
x86: Add xloadflags bit for EFI runtime support on kexec
x86/efi: Pass necessary EFI data for kexec via setup_data
efi: Export EFI runtime memory mapping to sysfs
efi: Export more EFI table variables to sysfs
x86/efi: Cleanup efi_enter_virtual_mode() function
x86/efi: Fix off-by-one bug in EFI Boot Services reservation
x86/efi: Add a wrapper function efi_map_region_fixed()
x86/efi: Remove unused variables in __map_region()
x86/efi: Check krealloc return value
x86/efi: Runtime services virtual mapping
x86/mm/cpa: Map in an arbitrary pgd
x86/mm/pageattr: Add last levels of error path
x86/mm/pageattr: Add a PUD error unwinding path
x86/mm/pageattr: Add a PTE pagetable populating function
x86/mm/pageattr: Add a PMD pagetable populating function
...
Diffstat (limited to 'arch/x86/platform')
-rw-r--r-- | arch/x86/platform/efi/efi.c | 355 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_32.c | 12 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_64.c | 120 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_stub_64.S | 54 |
4 files changed, 465 insertions, 76 deletions
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index cceb813044ef..d62ec87a2b26 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c | |||
@@ -12,6 +12,8 @@ | |||
12 | * Bibo Mao <bibo.mao@intel.com> | 12 | * Bibo Mao <bibo.mao@intel.com> |
13 | * Chandramouli Narayanan <mouli@linux.intel.com> | 13 | * Chandramouli Narayanan <mouli@linux.intel.com> |
14 | * Huang Ying <ying.huang@intel.com> | 14 | * Huang Ying <ying.huang@intel.com> |
15 | * Copyright (C) 2013 SuSE Labs | ||
16 | * Borislav Petkov <bp@suse.de> - runtime services VA mapping | ||
15 | * | 17 | * |
16 | * Copied from efi_32.c to eliminate the duplicated code between EFI | 18 | * Copied from efi_32.c to eliminate the duplicated code between EFI |
17 | * 32/64 support code. --ying 2007-10-26 | 19 | * 32/64 support code. --ying 2007-10-26 |
@@ -51,7 +53,7 @@ | |||
51 | #include <asm/x86_init.h> | 53 | #include <asm/x86_init.h> |
52 | #include <asm/rtc.h> | 54 | #include <asm/rtc.h> |
53 | 55 | ||
54 | #define EFI_DEBUG 1 | 56 | #define EFI_DEBUG |
55 | 57 | ||
56 | #define EFI_MIN_RESERVE 5120 | 58 | #define EFI_MIN_RESERVE 5120 |
57 | 59 | ||
@@ -74,6 +76,8 @@ static __initdata efi_config_table_type_t arch_tables[] = { | |||
74 | {NULL_GUID, NULL, NULL}, | 76 | {NULL_GUID, NULL, NULL}, |
75 | }; | 77 | }; |
76 | 78 | ||
79 | u64 efi_setup; /* efi setup_data physical address */ | ||
80 | |||
77 | /* | 81 | /* |
78 | * Returns 1 if 'facility' is enabled, 0 otherwise. | 82 | * Returns 1 if 'facility' is enabled, 0 otherwise. |
79 | */ | 83 | */ |
@@ -110,7 +114,6 @@ static int __init setup_storage_paranoia(char *arg) | |||
110 | } | 114 | } |
111 | early_param("efi_no_storage_paranoia", setup_storage_paranoia); | 115 | early_param("efi_no_storage_paranoia", setup_storage_paranoia); |
112 | 116 | ||
113 | |||
114 | static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) | 117 | static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) |
115 | { | 118 | { |
116 | unsigned long flags; | 119 | unsigned long flags; |
@@ -398,9 +401,9 @@ int __init efi_memblock_x86_reserve_range(void) | |||
398 | return 0; | 401 | return 0; |
399 | } | 402 | } |
400 | 403 | ||
401 | #if EFI_DEBUG | ||
402 | static void __init print_efi_memmap(void) | 404 | static void __init print_efi_memmap(void) |
403 | { | 405 | { |
406 | #ifdef EFI_DEBUG | ||
404 | efi_memory_desc_t *md; | 407 | efi_memory_desc_t *md; |
405 | void *p; | 408 | void *p; |
406 | int i; | 409 | int i; |
@@ -415,8 +418,8 @@ static void __init print_efi_memmap(void) | |||
415 | md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), | 418 | md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), |
416 | (md->num_pages >> (20 - EFI_PAGE_SHIFT))); | 419 | (md->num_pages >> (20 - EFI_PAGE_SHIFT))); |
417 | } | 420 | } |
418 | } | ||
419 | #endif /* EFI_DEBUG */ | 421 | #endif /* EFI_DEBUG */ |
422 | } | ||
420 | 423 | ||
421 | void __init efi_reserve_boot_services(void) | 424 | void __init efi_reserve_boot_services(void) |
422 | { | 425 | { |
@@ -436,7 +439,7 @@ void __init efi_reserve_boot_services(void) | |||
436 | * - Not within any part of the kernel | 439 | * - Not within any part of the kernel |
437 | * - Not the bios reserved area | 440 | * - Not the bios reserved area |
438 | */ | 441 | */ |
439 | if ((start+size >= __pa_symbol(_text) | 442 | if ((start + size > __pa_symbol(_text) |
440 | && start <= __pa_symbol(_end)) || | 443 | && start <= __pa_symbol(_end)) || |
441 | !e820_all_mapped(start, start+size, E820_RAM) || | 444 | !e820_all_mapped(start, start+size, E820_RAM) || |
442 | memblock_is_region_reserved(start, size)) { | 445 | memblock_is_region_reserved(start, size)) { |
@@ -489,18 +492,27 @@ static int __init efi_systab_init(void *phys) | |||
489 | { | 492 | { |
490 | if (efi_enabled(EFI_64BIT)) { | 493 | if (efi_enabled(EFI_64BIT)) { |
491 | efi_system_table_64_t *systab64; | 494 | efi_system_table_64_t *systab64; |
495 | struct efi_setup_data *data = NULL; | ||
492 | u64 tmp = 0; | 496 | u64 tmp = 0; |
493 | 497 | ||
498 | if (efi_setup) { | ||
499 | data = early_memremap(efi_setup, sizeof(*data)); | ||
500 | if (!data) | ||
501 | return -ENOMEM; | ||
502 | } | ||
494 | systab64 = early_ioremap((unsigned long)phys, | 503 | systab64 = early_ioremap((unsigned long)phys, |
495 | sizeof(*systab64)); | 504 | sizeof(*systab64)); |
496 | if (systab64 == NULL) { | 505 | if (systab64 == NULL) { |
497 | pr_err("Couldn't map the system table!\n"); | 506 | pr_err("Couldn't map the system table!\n"); |
507 | if (data) | ||
508 | early_iounmap(data, sizeof(*data)); | ||
498 | return -ENOMEM; | 509 | return -ENOMEM; |
499 | } | 510 | } |
500 | 511 | ||
501 | efi_systab.hdr = systab64->hdr; | 512 | efi_systab.hdr = systab64->hdr; |
502 | efi_systab.fw_vendor = systab64->fw_vendor; | 513 | efi_systab.fw_vendor = data ? (unsigned long)data->fw_vendor : |
503 | tmp |= systab64->fw_vendor; | 514 | systab64->fw_vendor; |
515 | tmp |= data ? data->fw_vendor : systab64->fw_vendor; | ||
504 | efi_systab.fw_revision = systab64->fw_revision; | 516 | efi_systab.fw_revision = systab64->fw_revision; |
505 | efi_systab.con_in_handle = systab64->con_in_handle; | 517 | efi_systab.con_in_handle = systab64->con_in_handle; |
506 | tmp |= systab64->con_in_handle; | 518 | tmp |= systab64->con_in_handle; |
@@ -514,15 +526,20 @@ static int __init efi_systab_init(void *phys) | |||
514 | tmp |= systab64->stderr_handle; | 526 | tmp |= systab64->stderr_handle; |
515 | efi_systab.stderr = systab64->stderr; | 527 | efi_systab.stderr = systab64->stderr; |
516 | tmp |= systab64->stderr; | 528 | tmp |= systab64->stderr; |
517 | efi_systab.runtime = (void *)(unsigned long)systab64->runtime; | 529 | efi_systab.runtime = data ? |
518 | tmp |= systab64->runtime; | 530 | (void *)(unsigned long)data->runtime : |
531 | (void *)(unsigned long)systab64->runtime; | ||
532 | tmp |= data ? data->runtime : systab64->runtime; | ||
519 | efi_systab.boottime = (void *)(unsigned long)systab64->boottime; | 533 | efi_systab.boottime = (void *)(unsigned long)systab64->boottime; |
520 | tmp |= systab64->boottime; | 534 | tmp |= systab64->boottime; |
521 | efi_systab.nr_tables = systab64->nr_tables; | 535 | efi_systab.nr_tables = systab64->nr_tables; |
522 | efi_systab.tables = systab64->tables; | 536 | efi_systab.tables = data ? (unsigned long)data->tables : |
523 | tmp |= systab64->tables; | 537 | systab64->tables; |
538 | tmp |= data ? data->tables : systab64->tables; | ||
524 | 539 | ||
525 | early_iounmap(systab64, sizeof(*systab64)); | 540 | early_iounmap(systab64, sizeof(*systab64)); |
541 | if (data) | ||
542 | early_iounmap(data, sizeof(*data)); | ||
526 | #ifdef CONFIG_X86_32 | 543 | #ifdef CONFIG_X86_32 |
527 | if (tmp >> 32) { | 544 | if (tmp >> 32) { |
528 | pr_err("EFI data located above 4GB, disabling EFI.\n"); | 545 | pr_err("EFI data located above 4GB, disabling EFI.\n"); |
@@ -626,6 +643,62 @@ static int __init efi_memmap_init(void) | |||
626 | return 0; | 643 | return 0; |
627 | } | 644 | } |
628 | 645 | ||
646 | /* | ||
647 | * A number of config table entries get remapped to virtual addresses | ||
648 | * after entering EFI virtual mode. However, the kexec kernel requires | ||
649 | * their physical addresses therefore we pass them via setup_data and | ||
650 | * correct those entries to their respective physical addresses here. | ||
651 | * | ||
652 | * Currently only handles smbios which is necessary for some firmware | ||
653 | * implementation. | ||
654 | */ | ||
655 | static int __init efi_reuse_config(u64 tables, int nr_tables) | ||
656 | { | ||
657 | int i, sz, ret = 0; | ||
658 | void *p, *tablep; | ||
659 | struct efi_setup_data *data; | ||
660 | |||
661 | if (!efi_setup) | ||
662 | return 0; | ||
663 | |||
664 | if (!efi_enabled(EFI_64BIT)) | ||
665 | return 0; | ||
666 | |||
667 | data = early_memremap(efi_setup, sizeof(*data)); | ||
668 | if (!data) { | ||
669 | ret = -ENOMEM; | ||
670 | goto out; | ||
671 | } | ||
672 | |||
673 | if (!data->smbios) | ||
674 | goto out_memremap; | ||
675 | |||
676 | sz = sizeof(efi_config_table_64_t); | ||
677 | |||
678 | p = tablep = early_memremap(tables, nr_tables * sz); | ||
679 | if (!p) { | ||
680 | pr_err("Could not map Configuration table!\n"); | ||
681 | ret = -ENOMEM; | ||
682 | goto out_memremap; | ||
683 | } | ||
684 | |||
685 | for (i = 0; i < efi.systab->nr_tables; i++) { | ||
686 | efi_guid_t guid; | ||
687 | |||
688 | guid = ((efi_config_table_64_t *)p)->guid; | ||
689 | |||
690 | if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) | ||
691 | ((efi_config_table_64_t *)p)->table = data->smbios; | ||
692 | p += sz; | ||
693 | } | ||
694 | early_iounmap(tablep, nr_tables * sz); | ||
695 | |||
696 | out_memremap: | ||
697 | early_iounmap(data, sizeof(*data)); | ||
698 | out: | ||
699 | return ret; | ||
700 | } | ||
701 | |||
629 | void __init efi_init(void) | 702 | void __init efi_init(void) |
630 | { | 703 | { |
631 | efi_char16_t *c16; | 704 | efi_char16_t *c16; |
@@ -651,6 +724,10 @@ void __init efi_init(void) | |||
651 | 724 | ||
652 | set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); | 725 | set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); |
653 | 726 | ||
727 | efi.config_table = (unsigned long)efi.systab->tables; | ||
728 | efi.fw_vendor = (unsigned long)efi.systab->fw_vendor; | ||
729 | efi.runtime = (unsigned long)efi.systab->runtime; | ||
730 | |||
654 | /* | 731 | /* |
655 | * Show what we know for posterity | 732 | * Show what we know for posterity |
656 | */ | 733 | */ |
@@ -667,6 +744,9 @@ void __init efi_init(void) | |||
667 | efi.systab->hdr.revision >> 16, | 744 | efi.systab->hdr.revision >> 16, |
668 | efi.systab->hdr.revision & 0xffff, vendor); | 745 | efi.systab->hdr.revision & 0xffff, vendor); |
669 | 746 | ||
747 | if (efi_reuse_config(efi.systab->tables, efi.systab->nr_tables)) | ||
748 | return; | ||
749 | |||
670 | if (efi_config_init(arch_tables)) | 750 | if (efi_config_init(arch_tables)) |
671 | return; | 751 | return; |
672 | 752 | ||
@@ -684,15 +764,12 @@ void __init efi_init(void) | |||
684 | return; | 764 | return; |
685 | set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility); | 765 | set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility); |
686 | } | 766 | } |
687 | |||
688 | if (efi_memmap_init()) | 767 | if (efi_memmap_init()) |
689 | return; | 768 | return; |
690 | 769 | ||
691 | set_bit(EFI_MEMMAP, &x86_efi_facility); | 770 | set_bit(EFI_MEMMAP, &x86_efi_facility); |
692 | 771 | ||
693 | #if EFI_DEBUG | ||
694 | print_efi_memmap(); | 772 | print_efi_memmap(); |
695 | #endif | ||
696 | } | 773 | } |
697 | 774 | ||
698 | void __init efi_late_init(void) | 775 | void __init efi_late_init(void) |
@@ -741,36 +818,38 @@ void efi_memory_uc(u64 addr, unsigned long size) | |||
741 | set_memory_uc(addr, npages); | 818 | set_memory_uc(addr, npages); |
742 | } | 819 | } |
743 | 820 | ||
744 | /* | 821 | void __init old_map_region(efi_memory_desc_t *md) |
745 | * This function will switch the EFI runtime services to virtual mode. | ||
746 | * Essentially, look through the EFI memmap and map every region that | ||
747 | * has the runtime attribute bit set in its memory descriptor and update | ||
748 | * that memory descriptor with the virtual address obtained from ioremap(). | ||
749 | * This enables the runtime services to be called without having to | ||
750 | * thunk back into physical mode for every invocation. | ||
751 | */ | ||
752 | void __init efi_enter_virtual_mode(void) | ||
753 | { | 822 | { |
754 | efi_memory_desc_t *md, *prev_md = NULL; | 823 | u64 start_pfn, end_pfn, end; |
755 | efi_status_t status; | ||
756 | unsigned long size; | 824 | unsigned long size; |
757 | u64 end, systab, start_pfn, end_pfn; | 825 | void *va; |
758 | void *p, *va, *new_memmap = NULL; | ||
759 | int count = 0; | ||
760 | 826 | ||
761 | efi.systab = NULL; | 827 | start_pfn = PFN_DOWN(md->phys_addr); |
828 | size = md->num_pages << PAGE_SHIFT; | ||
829 | end = md->phys_addr + size; | ||
830 | end_pfn = PFN_UP(end); | ||
762 | 831 | ||
763 | /* | 832 | if (pfn_range_is_mapped(start_pfn, end_pfn)) { |
764 | * We don't do virtual mode, since we don't do runtime services, on | 833 | va = __va(md->phys_addr); |
765 | * non-native EFI | ||
766 | */ | ||
767 | 834 | ||
768 | if (!efi_is_native()) { | 835 | if (!(md->attribute & EFI_MEMORY_WB)) |
769 | efi_unmap_memmap(); | 836 | efi_memory_uc((u64)(unsigned long)va, size); |
770 | return; | 837 | } else |
771 | } | 838 | va = efi_ioremap(md->phys_addr, size, |
839 | md->type, md->attribute); | ||
840 | |||
841 | md->virt_addr = (u64) (unsigned long) va; | ||
842 | if (!va) | ||
843 | pr_err("ioremap of 0x%llX failed!\n", | ||
844 | (unsigned long long)md->phys_addr); | ||
845 | } | ||
846 | |||
847 | /* Merge contiguous regions of the same type and attribute */ | ||
848 | static void __init efi_merge_regions(void) | ||
849 | { | ||
850 | void *p; | ||
851 | efi_memory_desc_t *md, *prev_md = NULL; | ||
772 | 852 | ||
773 | /* Merge contiguous regions of the same type and attribute */ | ||
774 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | 853 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { |
775 | u64 prev_size; | 854 | u64 prev_size; |
776 | md = p; | 855 | md = p; |
@@ -796,6 +875,77 @@ void __init efi_enter_virtual_mode(void) | |||
796 | } | 875 | } |
797 | prev_md = md; | 876 | prev_md = md; |
798 | } | 877 | } |
878 | } | ||
879 | |||
880 | static void __init get_systab_virt_addr(efi_memory_desc_t *md) | ||
881 | { | ||
882 | unsigned long size; | ||
883 | u64 end, systab; | ||
884 | |||
885 | size = md->num_pages << EFI_PAGE_SHIFT; | ||
886 | end = md->phys_addr + size; | ||
887 | systab = (u64)(unsigned long)efi_phys.systab; | ||
888 | if (md->phys_addr <= systab && systab < end) { | ||
889 | systab += md->virt_addr - md->phys_addr; | ||
890 | efi.systab = (efi_system_table_t *)(unsigned long)systab; | ||
891 | } | ||
892 | } | ||
893 | |||
894 | static int __init save_runtime_map(void) | ||
895 | { | ||
896 | efi_memory_desc_t *md; | ||
897 | void *tmp, *p, *q = NULL; | ||
898 | int count = 0; | ||
899 | |||
900 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | ||
901 | md = p; | ||
902 | |||
903 | if (!(md->attribute & EFI_MEMORY_RUNTIME) || | ||
904 | (md->type == EFI_BOOT_SERVICES_CODE) || | ||
905 | (md->type == EFI_BOOT_SERVICES_DATA)) | ||
906 | continue; | ||
907 | tmp = krealloc(q, (count + 1) * memmap.desc_size, GFP_KERNEL); | ||
908 | if (!tmp) | ||
909 | goto out; | ||
910 | q = tmp; | ||
911 | |||
912 | memcpy(q + count * memmap.desc_size, md, memmap.desc_size); | ||
913 | count++; | ||
914 | } | ||
915 | |||
916 | efi_runtime_map_setup(q, count, memmap.desc_size); | ||
917 | |||
918 | return 0; | ||
919 | out: | ||
920 | kfree(q); | ||
921 | return -ENOMEM; | ||
922 | } | ||
923 | |||
924 | /* | ||
925 | * Map efi regions which were passed via setup_data. The virt_addr is a fixed | ||
926 | * addr which was used in first kernel of a kexec boot. | ||
927 | */ | ||
928 | static void __init efi_map_regions_fixed(void) | ||
929 | { | ||
930 | void *p; | ||
931 | efi_memory_desc_t *md; | ||
932 | |||
933 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | ||
934 | md = p; | ||
935 | efi_map_region_fixed(md); /* FIXME: add error handling */ | ||
936 | get_systab_virt_addr(md); | ||
937 | } | ||
938 | |||
939 | } | ||
940 | |||
941 | /* | ||
942 | * Map efi memory ranges for runtime serivce and update new_memmap with virtual | ||
943 | * addresses. | ||
944 | */ | ||
945 | static void * __init efi_map_regions(int *count) | ||
946 | { | ||
947 | efi_memory_desc_t *md; | ||
948 | void *p, *tmp, *new_memmap = NULL; | ||
799 | 949 | ||
800 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | 950 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { |
801 | md = p; | 951 | md = p; |
@@ -807,53 +957,95 @@ void __init efi_enter_virtual_mode(void) | |||
807 | continue; | 957 | continue; |
808 | } | 958 | } |
809 | 959 | ||
810 | size = md->num_pages << EFI_PAGE_SHIFT; | 960 | efi_map_region(md); |
811 | end = md->phys_addr + size; | 961 | get_systab_virt_addr(md); |
812 | 962 | ||
813 | start_pfn = PFN_DOWN(md->phys_addr); | 963 | tmp = krealloc(new_memmap, (*count + 1) * memmap.desc_size, |
814 | end_pfn = PFN_UP(end); | 964 | GFP_KERNEL); |
815 | if (pfn_range_is_mapped(start_pfn, end_pfn)) { | 965 | if (!tmp) |
816 | va = __va(md->phys_addr); | 966 | goto out; |
967 | new_memmap = tmp; | ||
968 | memcpy(new_memmap + (*count * memmap.desc_size), md, | ||
969 | memmap.desc_size); | ||
970 | (*count)++; | ||
971 | } | ||
817 | 972 | ||
818 | if (!(md->attribute & EFI_MEMORY_WB)) | 973 | return new_memmap; |
819 | efi_memory_uc((u64)(unsigned long)va, size); | 974 | out: |
820 | } else | 975 | kfree(new_memmap); |
821 | va = efi_ioremap(md->phys_addr, size, | 976 | return NULL; |
822 | md->type, md->attribute); | 977 | } |
978 | |||
979 | /* | ||
980 | * This function will switch the EFI runtime services to virtual mode. | ||
981 | * Essentially, we look through the EFI memmap and map every region that | ||
982 | * has the runtime attribute bit set in its memory descriptor into the | ||
983 | * ->trampoline_pgd page table using a top-down VA allocation scheme. | ||
984 | * | ||
985 | * The old method which used to update that memory descriptor with the | ||
986 | * virtual address obtained from ioremap() is still supported when the | ||
987 | * kernel is booted with efi=old_map on its command line. Same old | ||
988 | * method enabled the runtime services to be called without having to | ||
989 | * thunk back into physical mode for every invocation. | ||
990 | * | ||
991 | * The new method does a pagetable switch in a preemption-safe manner | ||
992 | * so that we're in a different address space when calling a runtime | ||
993 | * function. For function arguments passing we do copy the PGDs of the | ||
994 | * kernel page table into ->trampoline_pgd prior to each call. | ||
995 | * | ||
996 | * Specially for kexec boot, efi runtime maps in previous kernel should | ||
997 | * be passed in via setup_data. In that case runtime ranges will be mapped | ||
998 | * to the same virtual addresses as the first kernel. | ||
999 | */ | ||
1000 | void __init efi_enter_virtual_mode(void) | ||
1001 | { | ||
1002 | efi_status_t status; | ||
1003 | void *new_memmap = NULL; | ||
1004 | int err, count = 0; | ||
823 | 1005 | ||
824 | md->virt_addr = (u64) (unsigned long) va; | 1006 | efi.systab = NULL; |
825 | 1007 | ||
826 | if (!va) { | 1008 | /* |
827 | pr_err("ioremap of 0x%llX failed!\n", | 1009 | * We don't do virtual mode, since we don't do runtime services, on |
828 | (unsigned long long)md->phys_addr); | 1010 | * non-native EFI |
829 | continue; | 1011 | */ |
830 | } | 1012 | if (!efi_is_native()) { |
1013 | efi_unmap_memmap(); | ||
1014 | return; | ||
1015 | } | ||
831 | 1016 | ||
832 | systab = (u64) (unsigned long) efi_phys.systab; | 1017 | if (efi_setup) { |
833 | if (md->phys_addr <= systab && systab < end) { | 1018 | efi_map_regions_fixed(); |
834 | systab += md->virt_addr - md->phys_addr; | 1019 | } else { |
835 | efi.systab = (efi_system_table_t *) (unsigned long) systab; | 1020 | efi_merge_regions(); |
1021 | new_memmap = efi_map_regions(&count); | ||
1022 | if (!new_memmap) { | ||
1023 | pr_err("Error reallocating memory, EFI runtime non-functional!\n"); | ||
1024 | return; | ||
836 | } | 1025 | } |
837 | new_memmap = krealloc(new_memmap, | ||
838 | (count + 1) * memmap.desc_size, | ||
839 | GFP_KERNEL); | ||
840 | memcpy(new_memmap + (count * memmap.desc_size), md, | ||
841 | memmap.desc_size); | ||
842 | count++; | ||
843 | } | 1026 | } |
844 | 1027 | ||
1028 | err = save_runtime_map(); | ||
1029 | if (err) | ||
1030 | pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n"); | ||
1031 | |||
845 | BUG_ON(!efi.systab); | 1032 | BUG_ON(!efi.systab); |
846 | 1033 | ||
847 | status = phys_efi_set_virtual_address_map( | 1034 | efi_setup_page_tables(); |
848 | memmap.desc_size * count, | 1035 | efi_sync_low_kernel_mappings(); |
849 | memmap.desc_size, | ||
850 | memmap.desc_version, | ||
851 | (efi_memory_desc_t *)__pa(new_memmap)); | ||
852 | 1036 | ||
853 | if (status != EFI_SUCCESS) { | 1037 | if (!efi_setup) { |
854 | pr_alert("Unable to switch EFI into virtual mode " | 1038 | status = phys_efi_set_virtual_address_map( |
855 | "(status=%lx)!\n", status); | 1039 | memmap.desc_size * count, |
856 | panic("EFI call to SetVirtualAddressMap() failed!"); | 1040 | memmap.desc_size, |
1041 | memmap.desc_version, | ||
1042 | (efi_memory_desc_t *)__pa(new_memmap)); | ||
1043 | |||
1044 | if (status != EFI_SUCCESS) { | ||
1045 | pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n", | ||
1046 | status); | ||
1047 | panic("EFI call to SetVirtualAddressMap() failed!"); | ||
1048 | } | ||
857 | } | 1049 | } |
858 | 1050 | ||
859 | /* | 1051 | /* |
@@ -876,7 +1068,8 @@ void __init efi_enter_virtual_mode(void) | |||
876 | efi.query_variable_info = virt_efi_query_variable_info; | 1068 | efi.query_variable_info = virt_efi_query_variable_info; |
877 | efi.update_capsule = virt_efi_update_capsule; | 1069 | efi.update_capsule = virt_efi_update_capsule; |
878 | efi.query_capsule_caps = virt_efi_query_capsule_caps; | 1070 | efi.query_capsule_caps = virt_efi_query_capsule_caps; |
879 | if (__supported_pte_mask & _PAGE_NX) | 1071 | |
1072 | if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX)) | ||
880 | runtime_code_page_mkexec(); | 1073 | runtime_code_page_mkexec(); |
881 | 1074 | ||
882 | kfree(new_memmap); | 1075 | kfree(new_memmap); |
@@ -1006,3 +1199,15 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) | |||
1006 | return EFI_SUCCESS; | 1199 | return EFI_SUCCESS; |
1007 | } | 1200 | } |
1008 | EXPORT_SYMBOL_GPL(efi_query_variable_store); | 1201 | EXPORT_SYMBOL_GPL(efi_query_variable_store); |
1202 | |||
1203 | static int __init parse_efi_cmdline(char *str) | ||
1204 | { | ||
1205 | if (*str == '=') | ||
1206 | str++; | ||
1207 | |||
1208 | if (!strncmp(str, "old_map", 7)) | ||
1209 | set_bit(EFI_OLD_MEMMAP, &x86_efi_facility); | ||
1210 | |||
1211 | return 0; | ||
1212 | } | ||
1213 | early_param("efi", parse_efi_cmdline); | ||
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 40e446941dd7..249b183cf417 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c | |||
@@ -37,9 +37,19 @@ | |||
37 | * claim EFI runtime service handler exclusively and to duplicate a memory in | 37 | * claim EFI runtime service handler exclusively and to duplicate a memory in |
38 | * low memory space say 0 - 3G. | 38 | * low memory space say 0 - 3G. |
39 | */ | 39 | */ |
40 | |||
41 | static unsigned long efi_rt_eflags; | 40 | static unsigned long efi_rt_eflags; |
42 | 41 | ||
42 | void efi_sync_low_kernel_mappings(void) {} | ||
43 | void efi_setup_page_tables(void) {} | ||
44 | |||
45 | void __init efi_map_region(efi_memory_desc_t *md) | ||
46 | { | ||
47 | old_map_region(md); | ||
48 | } | ||
49 | |||
50 | void __init efi_map_region_fixed(efi_memory_desc_t *md) {} | ||
51 | void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} | ||
52 | |||
43 | void efi_call_phys_prelog(void) | 53 | void efi_call_phys_prelog(void) |
44 | { | 54 | { |
45 | struct desc_ptr gdt_descr; | 55 | struct desc_ptr gdt_descr; |
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 39a0e7f1f0a3..6284f158a47d 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c | |||
@@ -38,10 +38,28 @@ | |||
38 | #include <asm/efi.h> | 38 | #include <asm/efi.h> |
39 | #include <asm/cacheflush.h> | 39 | #include <asm/cacheflush.h> |
40 | #include <asm/fixmap.h> | 40 | #include <asm/fixmap.h> |
41 | #include <asm/realmode.h> | ||
41 | 42 | ||
42 | static pgd_t *save_pgd __initdata; | 43 | static pgd_t *save_pgd __initdata; |
43 | static unsigned long efi_flags __initdata; | 44 | static unsigned long efi_flags __initdata; |
44 | 45 | ||
46 | /* | ||
47 | * We allocate runtime services regions bottom-up, starting from -4G, i.e. | ||
48 | * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G. | ||
49 | */ | ||
50 | static u64 efi_va = -4 * (1UL << 30); | ||
51 | #define EFI_VA_END (-68 * (1UL << 30)) | ||
52 | |||
53 | /* | ||
54 | * Scratch space used for switching the pagetable in the EFI stub | ||
55 | */ | ||
56 | struct efi_scratch { | ||
57 | u64 r15; | ||
58 | u64 prev_cr3; | ||
59 | pgd_t *efi_pgt; | ||
60 | bool use_pgd; | ||
61 | }; | ||
62 | |||
45 | static void __init early_code_mapping_set_exec(int executable) | 63 | static void __init early_code_mapping_set_exec(int executable) |
46 | { | 64 | { |
47 | efi_memory_desc_t *md; | 65 | efi_memory_desc_t *md; |
@@ -65,6 +83,9 @@ void __init efi_call_phys_prelog(void) | |||
65 | int pgd; | 83 | int pgd; |
66 | int n_pgds; | 84 | int n_pgds; |
67 | 85 | ||
86 | if (!efi_enabled(EFI_OLD_MEMMAP)) | ||
87 | return; | ||
88 | |||
68 | early_code_mapping_set_exec(1); | 89 | early_code_mapping_set_exec(1); |
69 | local_irq_save(efi_flags); | 90 | local_irq_save(efi_flags); |
70 | 91 | ||
@@ -86,6 +107,10 @@ void __init efi_call_phys_epilog(void) | |||
86 | */ | 107 | */ |
87 | int pgd; | 108 | int pgd; |
88 | int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); | 109 | int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); |
110 | |||
111 | if (!efi_enabled(EFI_OLD_MEMMAP)) | ||
112 | return; | ||
113 | |||
89 | for (pgd = 0; pgd < n_pgds; pgd++) | 114 | for (pgd = 0; pgd < n_pgds; pgd++) |
90 | set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]); | 115 | set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]); |
91 | kfree(save_pgd); | 116 | kfree(save_pgd); |
@@ -94,6 +119,96 @@ void __init efi_call_phys_epilog(void) | |||
94 | early_code_mapping_set_exec(0); | 119 | early_code_mapping_set_exec(0); |
95 | } | 120 | } |
96 | 121 | ||
122 | /* | ||
123 | * Add low kernel mappings for passing arguments to EFI functions. | ||
124 | */ | ||
125 | void efi_sync_low_kernel_mappings(void) | ||
126 | { | ||
127 | unsigned num_pgds; | ||
128 | pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); | ||
129 | |||
130 | if (efi_enabled(EFI_OLD_MEMMAP)) | ||
131 | return; | ||
132 | |||
133 | num_pgds = pgd_index(MODULES_END - 1) - pgd_index(PAGE_OFFSET); | ||
134 | |||
135 | memcpy(pgd + pgd_index(PAGE_OFFSET), | ||
136 | init_mm.pgd + pgd_index(PAGE_OFFSET), | ||
137 | sizeof(pgd_t) * num_pgds); | ||
138 | } | ||
139 | |||
140 | void efi_setup_page_tables(void) | ||
141 | { | ||
142 | efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd; | ||
143 | |||
144 | if (!efi_enabled(EFI_OLD_MEMMAP)) | ||
145 | efi_scratch.use_pgd = true; | ||
146 | } | ||
147 | |||
148 | static void __init __map_region(efi_memory_desc_t *md, u64 va) | ||
149 | { | ||
150 | pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); | ||
151 | unsigned long pf = 0; | ||
152 | |||
153 | if (!(md->attribute & EFI_MEMORY_WB)) | ||
154 | pf |= _PAGE_PCD; | ||
155 | |||
156 | if (kernel_map_pages_in_pgd(pgd, md->phys_addr, va, md->num_pages, pf)) | ||
157 | pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", | ||
158 | md->phys_addr, va); | ||
159 | } | ||
160 | |||
161 | void __init efi_map_region(efi_memory_desc_t *md) | ||
162 | { | ||
163 | unsigned long size = md->num_pages << PAGE_SHIFT; | ||
164 | u64 pa = md->phys_addr; | ||
165 | |||
166 | if (efi_enabled(EFI_OLD_MEMMAP)) | ||
167 | return old_map_region(md); | ||
168 | |||
169 | /* | ||
170 | * Make sure the 1:1 mappings are present as a catch-all for b0rked | ||
171 | * firmware which doesn't update all internal pointers after switching | ||
172 | * to virtual mode and would otherwise crap on us. | ||
173 | */ | ||
174 | __map_region(md, md->phys_addr); | ||
175 | |||
176 | efi_va -= size; | ||
177 | |||
178 | /* Is PA 2M-aligned? */ | ||
179 | if (!(pa & (PMD_SIZE - 1))) { | ||
180 | efi_va &= PMD_MASK; | ||
181 | } else { | ||
182 | u64 pa_offset = pa & (PMD_SIZE - 1); | ||
183 | u64 prev_va = efi_va; | ||
184 | |||
185 | /* get us the same offset within this 2M page */ | ||
186 | efi_va = (efi_va & PMD_MASK) + pa_offset; | ||
187 | |||
188 | if (efi_va > prev_va) | ||
189 | efi_va -= PMD_SIZE; | ||
190 | } | ||
191 | |||
192 | if (efi_va < EFI_VA_END) { | ||
193 | pr_warn(FW_WARN "VA address range overflow!\n"); | ||
194 | return; | ||
195 | } | ||
196 | |||
197 | /* Do the VA map */ | ||
198 | __map_region(md, efi_va); | ||
199 | md->virt_addr = efi_va; | ||
200 | } | ||
201 | |||
202 | /* | ||
203 | * kexec kernel will use efi_map_region_fixed to map efi runtime memory ranges. | ||
204 | * md->virt_addr is the original virtual address which had been mapped in kexec | ||
205 | * 1st kernel. | ||
206 | */ | ||
207 | void __init efi_map_region_fixed(efi_memory_desc_t *md) | ||
208 | { | ||
209 | __map_region(md, md->virt_addr); | ||
210 | } | ||
211 | |||
97 | void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, | 212 | void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, |
98 | u32 type, u64 attribute) | 213 | u32 type, u64 attribute) |
99 | { | 214 | { |
@@ -113,3 +228,8 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, | |||
113 | 228 | ||
114 | return (void __iomem *)__va(phys_addr); | 229 | return (void __iomem *)__va(phys_addr); |
115 | } | 230 | } |
231 | |||
232 | void __init parse_efi_setup(u64 phys_addr, u32 data_len) | ||
233 | { | ||
234 | efi_setup = phys_addr + sizeof(struct setup_data); | ||
235 | } | ||
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 4c07ccab8146..88073b140298 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S | |||
@@ -34,10 +34,47 @@ | |||
34 | mov %rsi, %cr0; \ | 34 | mov %rsi, %cr0; \ |
35 | mov (%rsp), %rsp | 35 | mov (%rsp), %rsp |
36 | 36 | ||
37 | /* stolen from gcc */ | ||
38 | .macro FLUSH_TLB_ALL | ||
39 | movq %r15, efi_scratch(%rip) | ||
40 | movq %r14, efi_scratch+8(%rip) | ||
41 | movq %cr4, %r15 | ||
42 | movq %r15, %r14 | ||
43 | andb $0x7f, %r14b | ||
44 | movq %r14, %cr4 | ||
45 | movq %r15, %cr4 | ||
46 | movq efi_scratch+8(%rip), %r14 | ||
47 | movq efi_scratch(%rip), %r15 | ||
48 | .endm | ||
49 | |||
50 | .macro SWITCH_PGT | ||
51 | cmpb $0, efi_scratch+24(%rip) | ||
52 | je 1f | ||
53 | movq %r15, efi_scratch(%rip) # r15 | ||
54 | # save previous CR3 | ||
55 | movq %cr3, %r15 | ||
56 | movq %r15, efi_scratch+8(%rip) # prev_cr3 | ||
57 | movq efi_scratch+16(%rip), %r15 # EFI pgt | ||
58 | movq %r15, %cr3 | ||
59 | 1: | ||
60 | .endm | ||
61 | |||
62 | .macro RESTORE_PGT | ||
63 | cmpb $0, efi_scratch+24(%rip) | ||
64 | je 2f | ||
65 | movq efi_scratch+8(%rip), %r15 | ||
66 | movq %r15, %cr3 | ||
67 | movq efi_scratch(%rip), %r15 | ||
68 | FLUSH_TLB_ALL | ||
69 | 2: | ||
70 | .endm | ||
71 | |||
37 | ENTRY(efi_call0) | 72 | ENTRY(efi_call0) |
38 | SAVE_XMM | 73 | SAVE_XMM |
39 | subq $32, %rsp | 74 | subq $32, %rsp |
75 | SWITCH_PGT | ||
40 | call *%rdi | 76 | call *%rdi |
77 | RESTORE_PGT | ||
41 | addq $32, %rsp | 78 | addq $32, %rsp |
42 | RESTORE_XMM | 79 | RESTORE_XMM |
43 | ret | 80 | ret |
@@ -47,7 +84,9 @@ ENTRY(efi_call1) | |||
47 | SAVE_XMM | 84 | SAVE_XMM |
48 | subq $32, %rsp | 85 | subq $32, %rsp |
49 | mov %rsi, %rcx | 86 | mov %rsi, %rcx |
87 | SWITCH_PGT | ||
50 | call *%rdi | 88 | call *%rdi |
89 | RESTORE_PGT | ||
51 | addq $32, %rsp | 90 | addq $32, %rsp |
52 | RESTORE_XMM | 91 | RESTORE_XMM |
53 | ret | 92 | ret |
@@ -57,7 +96,9 @@ ENTRY(efi_call2) | |||
57 | SAVE_XMM | 96 | SAVE_XMM |
58 | subq $32, %rsp | 97 | subq $32, %rsp |
59 | mov %rsi, %rcx | 98 | mov %rsi, %rcx |
99 | SWITCH_PGT | ||
60 | call *%rdi | 100 | call *%rdi |
101 | RESTORE_PGT | ||
61 | addq $32, %rsp | 102 | addq $32, %rsp |
62 | RESTORE_XMM | 103 | RESTORE_XMM |
63 | ret | 104 | ret |
@@ -68,7 +109,9 @@ ENTRY(efi_call3) | |||
68 | subq $32, %rsp | 109 | subq $32, %rsp |
69 | mov %rcx, %r8 | 110 | mov %rcx, %r8 |
70 | mov %rsi, %rcx | 111 | mov %rsi, %rcx |
112 | SWITCH_PGT | ||
71 | call *%rdi | 113 | call *%rdi |
114 | RESTORE_PGT | ||
72 | addq $32, %rsp | 115 | addq $32, %rsp |
73 | RESTORE_XMM | 116 | RESTORE_XMM |
74 | ret | 117 | ret |
@@ -80,7 +123,9 @@ ENTRY(efi_call4) | |||
80 | mov %r8, %r9 | 123 | mov %r8, %r9 |
81 | mov %rcx, %r8 | 124 | mov %rcx, %r8 |
82 | mov %rsi, %rcx | 125 | mov %rsi, %rcx |
126 | SWITCH_PGT | ||
83 | call *%rdi | 127 | call *%rdi |
128 | RESTORE_PGT | ||
84 | addq $32, %rsp | 129 | addq $32, %rsp |
85 | RESTORE_XMM | 130 | RESTORE_XMM |
86 | ret | 131 | ret |
@@ -93,7 +138,9 @@ ENTRY(efi_call5) | |||
93 | mov %r8, %r9 | 138 | mov %r8, %r9 |
94 | mov %rcx, %r8 | 139 | mov %rcx, %r8 |
95 | mov %rsi, %rcx | 140 | mov %rsi, %rcx |
141 | SWITCH_PGT | ||
96 | call *%rdi | 142 | call *%rdi |
143 | RESTORE_PGT | ||
97 | addq $48, %rsp | 144 | addq $48, %rsp |
98 | RESTORE_XMM | 145 | RESTORE_XMM |
99 | ret | 146 | ret |
@@ -109,8 +156,15 @@ ENTRY(efi_call6) | |||
109 | mov %r8, %r9 | 156 | mov %r8, %r9 |
110 | mov %rcx, %r8 | 157 | mov %rcx, %r8 |
111 | mov %rsi, %rcx | 158 | mov %rsi, %rcx |
159 | SWITCH_PGT | ||
112 | call *%rdi | 160 | call *%rdi |
161 | RESTORE_PGT | ||
113 | addq $48, %rsp | 162 | addq $48, %rsp |
114 | RESTORE_XMM | 163 | RESTORE_XMM |
115 | ret | 164 | ret |
116 | ENDPROC(efi_call6) | 165 | ENDPROC(efi_call6) |
166 | |||
167 | .data | ||
168 | ENTRY(efi_scratch) | ||
169 | .fill 3,8,0 | ||
170 | .byte 0 | ||