aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2013-11-26 06:23:04 -0500
committerIngo Molnar <mingo@kernel.org>2013-11-26 06:23:04 -0500
commit61d066977583803d333f1e7266b8ba772162dda4 (patch)
tree087d56e401422f1a8a165a782216aa6d0291a60e /arch/x86/mm
parentb975dc3689fc6a3718ad288ce080924f9cb7e176 (diff)
parentee41143027706d9f342dfe05487a00b20887fde7 (diff)
Merge tag 'efi-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi into x86/efi
Pull EFI virtual mapping changes from Matt Fleming: * New static EFI runtime services virtual mapping layout which is groundwork for kexec support on EFI. (Borislav Petkov) Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/pageattr.c461
1 files changed, 444 insertions, 17 deletions
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index bb32480c2d71..b3b19f46c016 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -30,6 +30,7 @@
30 */ 30 */
31struct cpa_data { 31struct cpa_data {
32 unsigned long *vaddr; 32 unsigned long *vaddr;
33 pgd_t *pgd;
33 pgprot_t mask_set; 34 pgprot_t mask_set;
34 pgprot_t mask_clr; 35 pgprot_t mask_clr;
35 int numpages; 36 int numpages;
@@ -322,17 +323,9 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
322 return prot; 323 return prot;
323} 324}
324 325
325/* 326static pte_t *__lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
326 * Lookup the page table entry for a virtual address. Return a pointer 327 unsigned int *level)
327 * to the entry and the level of the mapping.
328 *
329 * Note: We return pud and pmd either when the entry is marked large
330 * or when the present bit is not set. Otherwise we would return a
331 * pointer to a nonexisting mapping.
332 */
333pte_t *lookup_address(unsigned long address, unsigned int *level)
334{ 328{
335 pgd_t *pgd = pgd_offset_k(address);
336 pud_t *pud; 329 pud_t *pud;
337 pmd_t *pmd; 330 pmd_t *pmd;
338 331
@@ -361,8 +354,31 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)
361 354
362 return pte_offset_kernel(pmd, address); 355 return pte_offset_kernel(pmd, address);
363} 356}
357
358/*
359 * Lookup the page table entry for a virtual address. Return a pointer
360 * to the entry and the level of the mapping.
361 *
362 * Note: We return pud and pmd either when the entry is marked large
363 * or when the present bit is not set. Otherwise we would return a
364 * pointer to a nonexisting mapping.
365 */
366pte_t *lookup_address(unsigned long address, unsigned int *level)
367{
368 return __lookup_address_in_pgd(pgd_offset_k(address), address, level);
369}
364EXPORT_SYMBOL_GPL(lookup_address); 370EXPORT_SYMBOL_GPL(lookup_address);
365 371
372static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
373 unsigned int *level)
374{
375 if (cpa->pgd)
376 return __lookup_address_in_pgd(cpa->pgd + pgd_index(address),
377 address, level);
378
379 return lookup_address(address, level);
380}
381
366/* 382/*
367 * This is necessary because __pa() does not work on some 383 * This is necessary because __pa() does not work on some
368 * kinds of memory, like vmalloc() or the alloc_remap() 384 * kinds of memory, like vmalloc() or the alloc_remap()
@@ -437,7 +453,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
437 * Check for races, another CPU might have split this page 453 * Check for races, another CPU might have split this page
438 * up already: 454 * up already:
439 */ 455 */
440 tmp = lookup_address(address, &level); 456 tmp = _lookup_address_cpa(cpa, address, &level);
441 if (tmp != kpte) 457 if (tmp != kpte)
442 goto out_unlock; 458 goto out_unlock;
443 459
@@ -543,7 +559,8 @@ out_unlock:
543} 559}
544 560
545static int 561static int
546__split_large_page(pte_t *kpte, unsigned long address, struct page *base) 562__split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
563 struct page *base)
547{ 564{
548 pte_t *pbase = (pte_t *)page_address(base); 565 pte_t *pbase = (pte_t *)page_address(base);
549 unsigned long pfn, pfninc = 1; 566 unsigned long pfn, pfninc = 1;
@@ -556,7 +573,7 @@ __split_large_page(pte_t *kpte, unsigned long address, struct page *base)
556 * Check for races, another CPU might have split this page 573 * Check for races, another CPU might have split this page
557 * up for us already: 574 * up for us already:
558 */ 575 */
559 tmp = lookup_address(address, &level); 576 tmp = _lookup_address_cpa(cpa, address, &level);
560 if (tmp != kpte) { 577 if (tmp != kpte) {
561 spin_unlock(&pgd_lock); 578 spin_unlock(&pgd_lock);
562 return 1; 579 return 1;
@@ -632,7 +649,8 @@ __split_large_page(pte_t *kpte, unsigned long address, struct page *base)
632 return 0; 649 return 0;
633} 650}
634 651
635static int split_large_page(pte_t *kpte, unsigned long address) 652static int split_large_page(struct cpa_data *cpa, pte_t *kpte,
653 unsigned long address)
636{ 654{
637 struct page *base; 655 struct page *base;
638 656
@@ -644,15 +662,390 @@ static int split_large_page(pte_t *kpte, unsigned long address)
644 if (!base) 662 if (!base)
645 return -ENOMEM; 663 return -ENOMEM;
646 664
647 if (__split_large_page(kpte, address, base)) 665 if (__split_large_page(cpa, kpte, address, base))
648 __free_page(base); 666 __free_page(base);
649 667
650 return 0; 668 return 0;
651} 669}
652 670
671static bool try_to_free_pte_page(pte_t *pte)
672{
673 int i;
674
675 for (i = 0; i < PTRS_PER_PTE; i++)
676 if (!pte_none(pte[i]))
677 return false;
678
679 free_page((unsigned long)pte);
680 return true;
681}
682
683static bool try_to_free_pmd_page(pmd_t *pmd)
684{
685 int i;
686
687 for (i = 0; i < PTRS_PER_PMD; i++)
688 if (!pmd_none(pmd[i]))
689 return false;
690
691 free_page((unsigned long)pmd);
692 return true;
693}
694
695static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
696{
697 pte_t *pte = pte_offset_kernel(pmd, start);
698
699 while (start < end) {
700 set_pte(pte, __pte(0));
701
702 start += PAGE_SIZE;
703 pte++;
704 }
705
706 if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) {
707 pmd_clear(pmd);
708 return true;
709 }
710 return false;
711}
712
713static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd,
714 unsigned long start, unsigned long end)
715{
716 if (unmap_pte_range(pmd, start, end))
717 if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
718 pud_clear(pud);
719}
720
721static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
722{
723 pmd_t *pmd = pmd_offset(pud, start);
724
725 /*
726 * Not on a 2MB page boundary?
727 */
728 if (start & (PMD_SIZE - 1)) {
729 unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
730 unsigned long pre_end = min_t(unsigned long, end, next_page);
731
732 __unmap_pmd_range(pud, pmd, start, pre_end);
733
734 start = pre_end;
735 pmd++;
736 }
737
738 /*
739 * Try to unmap in 2M chunks.
740 */
741 while (end - start >= PMD_SIZE) {
742 if (pmd_large(*pmd))
743 pmd_clear(pmd);
744 else
745 __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE);
746
747 start += PMD_SIZE;
748 pmd++;
749 }
750
751 /*
752 * 4K leftovers?
753 */
754 if (start < end)
755 return __unmap_pmd_range(pud, pmd, start, end);
756
757 /*
758 * Try again to free the PMD page if haven't succeeded above.
759 */
760 if (!pud_none(*pud))
761 if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
762 pud_clear(pud);
763}
764
765static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
766{
767 pud_t *pud = pud_offset(pgd, start);
768
769 /*
770 * Not on a GB page boundary?
771 */
772 if (start & (PUD_SIZE - 1)) {
773 unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
774 unsigned long pre_end = min_t(unsigned long, end, next_page);
775
776 unmap_pmd_range(pud, start, pre_end);
777
778 start = pre_end;
779 pud++;
780 }
781
782 /*
783 * Try to unmap in 1G chunks?
784 */
785 while (end - start >= PUD_SIZE) {
786
787 if (pud_large(*pud))
788 pud_clear(pud);
789 else
790 unmap_pmd_range(pud, start, start + PUD_SIZE);
791
792 start += PUD_SIZE;
793 pud++;
794 }
795
796 /*
797 * 2M leftovers?
798 */
799 if (start < end)
800 unmap_pmd_range(pud, start, end);
801
802 /*
803 * No need to try to free the PUD page because we'll free it in
804 * populate_pgd's error path
805 */
806}
807
808static int alloc_pte_page(pmd_t *pmd)
809{
810 pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
811 if (!pte)
812 return -1;
813
814 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
815 return 0;
816}
817
818static int alloc_pmd_page(pud_t *pud)
819{
820 pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
821 if (!pmd)
822 return -1;
823
824 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
825 return 0;
826}
827
828static void populate_pte(struct cpa_data *cpa,
829 unsigned long start, unsigned long end,
830 unsigned num_pages, pmd_t *pmd, pgprot_t pgprot)
831{
832 pte_t *pte;
833
834 pte = pte_offset_kernel(pmd, start);
835
836 while (num_pages-- && start < end) {
837
838 /* deal with the NX bit */
839 if (!(pgprot_val(pgprot) & _PAGE_NX))
840 cpa->pfn &= ~_PAGE_NX;
841
842 set_pte(pte, pfn_pte(cpa->pfn >> PAGE_SHIFT, pgprot));
843
844 start += PAGE_SIZE;
845 cpa->pfn += PAGE_SIZE;
846 pte++;
847 }
848}
849
850static int populate_pmd(struct cpa_data *cpa,
851 unsigned long start, unsigned long end,
852 unsigned num_pages, pud_t *pud, pgprot_t pgprot)
853{
854 unsigned int cur_pages = 0;
855 pmd_t *pmd;
856
857 /*
858 * Not on a 2M boundary?
859 */
860 if (start & (PMD_SIZE - 1)) {
861 unsigned long pre_end = start + (num_pages << PAGE_SHIFT);
862 unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
863
864 pre_end = min_t(unsigned long, pre_end, next_page);
865 cur_pages = (pre_end - start) >> PAGE_SHIFT;
866 cur_pages = min_t(unsigned int, num_pages, cur_pages);
867
868 /*
869 * Need a PTE page?
870 */
871 pmd = pmd_offset(pud, start);
872 if (pmd_none(*pmd))
873 if (alloc_pte_page(pmd))
874 return -1;
875
876 populate_pte(cpa, start, pre_end, cur_pages, pmd, pgprot);
877
878 start = pre_end;
879 }
880
881 /*
882 * We mapped them all?
883 */
884 if (num_pages == cur_pages)
885 return cur_pages;
886
887 while (end - start >= PMD_SIZE) {
888
889 /*
890 * We cannot use a 1G page so allocate a PMD page if needed.
891 */
892 if (pud_none(*pud))
893 if (alloc_pmd_page(pud))
894 return -1;
895
896 pmd = pmd_offset(pud, start);
897
898 set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot)));
899
900 start += PMD_SIZE;
901 cpa->pfn += PMD_SIZE;
902 cur_pages += PMD_SIZE >> PAGE_SHIFT;
903 }
904
905 /*
906 * Map trailing 4K pages.
907 */
908 if (start < end) {
909 pmd = pmd_offset(pud, start);
910 if (pmd_none(*pmd))
911 if (alloc_pte_page(pmd))
912 return -1;
913
914 populate_pte(cpa, start, end, num_pages - cur_pages,
915 pmd, pgprot);
916 }
917 return num_pages;
918}
919
920static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
921 pgprot_t pgprot)
922{
923 pud_t *pud;
924 unsigned long end;
925 int cur_pages = 0;
926
927 end = start + (cpa->numpages << PAGE_SHIFT);
928
929 /*
930 * Not on a Gb page boundary? => map everything up to it with
931 * smaller pages.
932 */
933 if (start & (PUD_SIZE - 1)) {
934 unsigned long pre_end;
935 unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
936
937 pre_end = min_t(unsigned long, end, next_page);
938 cur_pages = (pre_end - start) >> PAGE_SHIFT;
939 cur_pages = min_t(int, (int)cpa->numpages, cur_pages);
940
941 pud = pud_offset(pgd, start);
942
943 /*
944 * Need a PMD page?
945 */
946 if (pud_none(*pud))
947 if (alloc_pmd_page(pud))
948 return -1;
949
950 cur_pages = populate_pmd(cpa, start, pre_end, cur_pages,
951 pud, pgprot);
952 if (cur_pages < 0)
953 return cur_pages;
954
955 start = pre_end;
956 }
957
958 /* We mapped them all? */
959 if (cpa->numpages == cur_pages)
960 return cur_pages;
961
962 pud = pud_offset(pgd, start);
963
964 /*
965 * Map everything starting from the Gb boundary, possibly with 1G pages
966 */
967 while (end - start >= PUD_SIZE) {
968 set_pud(pud, __pud(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot)));
969
970 start += PUD_SIZE;
971 cpa->pfn += PUD_SIZE;
972 cur_pages += PUD_SIZE >> PAGE_SHIFT;
973 pud++;
974 }
975
976 /* Map trailing leftover */
977 if (start < end) {
978 int tmp;
979
980 pud = pud_offset(pgd, start);
981 if (pud_none(*pud))
982 if (alloc_pmd_page(pud))
983 return -1;
984
985 tmp = populate_pmd(cpa, start, end, cpa->numpages - cur_pages,
986 pud, pgprot);
987 if (tmp < 0)
988 return cur_pages;
989
990 cur_pages += tmp;
991 }
992 return cur_pages;
993}
994
995/*
996 * Restrictions for kernel page table do not necessarily apply when mapping in
997 * an alternate PGD.
998 */
999static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
1000{
1001 pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
1002 bool allocd_pgd = false;
1003 pgd_t *pgd_entry;
1004 pud_t *pud = NULL; /* shut up gcc */
1005 int ret;
1006
1007 pgd_entry = cpa->pgd + pgd_index(addr);
1008
1009 /*
1010 * Allocate a PUD page and hand it down for mapping.
1011 */
1012 if (pgd_none(*pgd_entry)) {
1013 pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
1014 if (!pud)
1015 return -1;
1016
1017 set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE));
1018 allocd_pgd = true;
1019 }
1020
1021 pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
1022 pgprot_val(pgprot) |= pgprot_val(cpa->mask_set);
1023
1024 ret = populate_pud(cpa, addr, pgd_entry, pgprot);
1025 if (ret < 0) {
1026 unmap_pud_range(pgd_entry, addr,
1027 addr + (cpa->numpages << PAGE_SHIFT));
1028
1029 if (allocd_pgd) {
1030 /*
1031 * If I allocated this PUD page, I can just as well
1032 * free it in this error path.
1033 */
1034 pgd_clear(pgd_entry);
1035 free_page((unsigned long)pud);
1036 }
1037 return ret;
1038 }
1039 cpa->numpages = ret;
1040 return 0;
1041}
1042
653static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, 1043static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
654 int primary) 1044 int primary)
655{ 1045{
1046 if (cpa->pgd)
1047 return populate_pgd(cpa, vaddr);
1048
656 /* 1049 /*
657 * Ignore all non primary paths. 1050 * Ignore all non primary paths.
658 */ 1051 */
@@ -697,7 +1090,7 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
697 else 1090 else
698 address = *cpa->vaddr; 1091 address = *cpa->vaddr;
699repeat: 1092repeat:
700 kpte = lookup_address(address, &level); 1093 kpte = _lookup_address_cpa(cpa, address, &level);
701 if (!kpte) 1094 if (!kpte)
702 return __cpa_process_fault(cpa, address, primary); 1095 return __cpa_process_fault(cpa, address, primary);
703 1096
@@ -761,7 +1154,7 @@ repeat:
761 /* 1154 /*
762 * We have to split the large page: 1155 * We have to split the large page:
763 */ 1156 */
764 err = split_large_page(kpte, address); 1157 err = split_large_page(cpa, kpte, address);
765 if (!err) { 1158 if (!err) {
766 /* 1159 /*
767 * Do a global flush tlb after splitting the large page 1160 * Do a global flush tlb after splitting the large page
@@ -910,6 +1303,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
910 int ret, cache, checkalias; 1303 int ret, cache, checkalias;
911 unsigned long baddr = 0; 1304 unsigned long baddr = 0;
912 1305
1306 memset(&cpa, 0, sizeof(cpa));
1307
913 /* 1308 /*
914 * Check, if we are requested to change a not supported 1309 * Check, if we are requested to change a not supported
915 * feature: 1310 * feature:
@@ -1356,6 +1751,7 @@ static int __set_pages_p(struct page *page, int numpages)
1356{ 1751{
1357 unsigned long tempaddr = (unsigned long) page_address(page); 1752 unsigned long tempaddr = (unsigned long) page_address(page);
1358 struct cpa_data cpa = { .vaddr = &tempaddr, 1753 struct cpa_data cpa = { .vaddr = &tempaddr,
1754 .pgd = NULL,
1359 .numpages = numpages, 1755 .numpages = numpages,
1360 .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), 1756 .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
1361 .mask_clr = __pgprot(0), 1757 .mask_clr = __pgprot(0),
@@ -1374,6 +1770,7 @@ static int __set_pages_np(struct page *page, int numpages)
1374{ 1770{
1375 unsigned long tempaddr = (unsigned long) page_address(page); 1771 unsigned long tempaddr = (unsigned long) page_address(page);
1376 struct cpa_data cpa = { .vaddr = &tempaddr, 1772 struct cpa_data cpa = { .vaddr = &tempaddr,
1773 .pgd = NULL,
1377 .numpages = numpages, 1774 .numpages = numpages,
1378 .mask_set = __pgprot(0), 1775 .mask_set = __pgprot(0),
1379 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), 1776 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
@@ -1434,6 +1831,36 @@ bool kernel_page_present(struct page *page)
1434 1831
1435#endif /* CONFIG_DEBUG_PAGEALLOC */ 1832#endif /* CONFIG_DEBUG_PAGEALLOC */
1436 1833
1834int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
1835 unsigned numpages, unsigned long page_flags)
1836{
1837 int retval = -EINVAL;
1838
1839 struct cpa_data cpa = {
1840 .vaddr = &address,
1841 .pfn = pfn,
1842 .pgd = pgd,
1843 .numpages = numpages,
1844 .mask_set = __pgprot(0),
1845 .mask_clr = __pgprot(0),
1846 .flags = 0,
1847 };
1848
1849 if (!(__supported_pte_mask & _PAGE_NX))
1850 goto out;
1851
1852 if (!(page_flags & _PAGE_NX))
1853 cpa.mask_clr = __pgprot(_PAGE_NX);
1854
1855 cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags);
1856
1857 retval = __change_page_attr_set_clr(&cpa, 0);
1858 __flush_tlb_all();
1859
1860out:
1861 return retval;
1862}
1863
1437/* 1864/*
1438 * The testcases use internal knowledge of the implementation that shouldn't 1865 * The testcases use internal knowledge of the implementation that shouldn't
1439 * be exposed to the rest of the kernel. Include these directly here. 1866 * be exposed to the rest of the kernel. Include these directly here.