diff options
Diffstat (limited to 'arch/x86/xen/mmu.c')
-rw-r--r-- | arch/x86/xen/mmu.c | 186 |
1 files changed, 105 insertions, 81 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index ce563be09cc1..256282e7888b 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -365,7 +365,7 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | |||
365 | /* Assume pteval_t is equivalent to all the other *val_t types. */ | 365 | /* Assume pteval_t is equivalent to all the other *val_t types. */ |
366 | static pteval_t pte_mfn_to_pfn(pteval_t val) | 366 | static pteval_t pte_mfn_to_pfn(pteval_t val) |
367 | { | 367 | { |
368 | if (val & _PAGE_PRESENT) { | 368 | if (pteval_present(val)) { |
369 | unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; | 369 | unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; |
370 | unsigned long pfn = mfn_to_pfn(mfn); | 370 | unsigned long pfn = mfn_to_pfn(mfn); |
371 | 371 | ||
@@ -381,7 +381,7 @@ static pteval_t pte_mfn_to_pfn(pteval_t val) | |||
381 | 381 | ||
382 | static pteval_t pte_pfn_to_mfn(pteval_t val) | 382 | static pteval_t pte_pfn_to_mfn(pteval_t val) |
383 | { | 383 | { |
384 | if (val & _PAGE_PRESENT) { | 384 | if (pteval_present(val)) { |
385 | unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; | 385 | unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; |
386 | pteval_t flags = val & PTE_FLAGS_MASK; | 386 | pteval_t flags = val & PTE_FLAGS_MASK; |
387 | unsigned long mfn; | 387 | unsigned long mfn; |
@@ -431,7 +431,7 @@ static pteval_t iomap_pte(pteval_t val) | |||
431 | return val; | 431 | return val; |
432 | } | 432 | } |
433 | 433 | ||
434 | static pteval_t xen_pte_val(pte_t pte) | 434 | __visible pteval_t xen_pte_val(pte_t pte) |
435 | { | 435 | { |
436 | pteval_t pteval = pte.pte; | 436 | pteval_t pteval = pte.pte; |
437 | #if 0 | 437 | #if 0 |
@@ -448,7 +448,7 @@ static pteval_t xen_pte_val(pte_t pte) | |||
448 | } | 448 | } |
449 | PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); | 449 | PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); |
450 | 450 | ||
451 | static pgdval_t xen_pgd_val(pgd_t pgd) | 451 | __visible pgdval_t xen_pgd_val(pgd_t pgd) |
452 | { | 452 | { |
453 | return pte_mfn_to_pfn(pgd.pgd); | 453 | return pte_mfn_to_pfn(pgd.pgd); |
454 | } | 454 | } |
@@ -479,7 +479,7 @@ void xen_set_pat(u64 pat) | |||
479 | WARN_ON(pat != 0x0007010600070106ull); | 479 | WARN_ON(pat != 0x0007010600070106ull); |
480 | } | 480 | } |
481 | 481 | ||
482 | static pte_t xen_make_pte(pteval_t pte) | 482 | __visible pte_t xen_make_pte(pteval_t pte) |
483 | { | 483 | { |
484 | phys_addr_t addr = (pte & PTE_PFN_MASK); | 484 | phys_addr_t addr = (pte & PTE_PFN_MASK); |
485 | #if 0 | 485 | #if 0 |
@@ -514,14 +514,14 @@ static pte_t xen_make_pte(pteval_t pte) | |||
514 | } | 514 | } |
515 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); | 515 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); |
516 | 516 | ||
517 | static pgd_t xen_make_pgd(pgdval_t pgd) | 517 | __visible pgd_t xen_make_pgd(pgdval_t pgd) |
518 | { | 518 | { |
519 | pgd = pte_pfn_to_mfn(pgd); | 519 | pgd = pte_pfn_to_mfn(pgd); |
520 | return native_make_pgd(pgd); | 520 | return native_make_pgd(pgd); |
521 | } | 521 | } |
522 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd); | 522 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd); |
523 | 523 | ||
524 | static pmdval_t xen_pmd_val(pmd_t pmd) | 524 | __visible pmdval_t xen_pmd_val(pmd_t pmd) |
525 | { | 525 | { |
526 | return pte_mfn_to_pfn(pmd.pmd); | 526 | return pte_mfn_to_pfn(pmd.pmd); |
527 | } | 527 | } |
@@ -580,7 +580,7 @@ static void xen_pmd_clear(pmd_t *pmdp) | |||
580 | } | 580 | } |
581 | #endif /* CONFIG_X86_PAE */ | 581 | #endif /* CONFIG_X86_PAE */ |
582 | 582 | ||
583 | static pmd_t xen_make_pmd(pmdval_t pmd) | 583 | __visible pmd_t xen_make_pmd(pmdval_t pmd) |
584 | { | 584 | { |
585 | pmd = pte_pfn_to_mfn(pmd); | 585 | pmd = pte_pfn_to_mfn(pmd); |
586 | return native_make_pmd(pmd); | 586 | return native_make_pmd(pmd); |
@@ -588,13 +588,13 @@ static pmd_t xen_make_pmd(pmdval_t pmd) | |||
588 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); | 588 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); |
589 | 589 | ||
590 | #if PAGETABLE_LEVELS == 4 | 590 | #if PAGETABLE_LEVELS == 4 |
591 | static pudval_t xen_pud_val(pud_t pud) | 591 | __visible pudval_t xen_pud_val(pud_t pud) |
592 | { | 592 | { |
593 | return pte_mfn_to_pfn(pud.pud); | 593 | return pte_mfn_to_pfn(pud.pud); |
594 | } | 594 | } |
595 | PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val); | 595 | PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val); |
596 | 596 | ||
597 | static pud_t xen_make_pud(pudval_t pud) | 597 | __visible pud_t xen_make_pud(pudval_t pud) |
598 | { | 598 | { |
599 | pud = pte_pfn_to_mfn(pud); | 599 | pud = pte_pfn_to_mfn(pud); |
600 | 600 | ||
@@ -1198,44 +1198,40 @@ static void __init xen_cleanhighmap(unsigned long vaddr, | |||
1198 | * instead of somewhere later and be confusing. */ | 1198 | * instead of somewhere later and be confusing. */ |
1199 | xen_mc_flush(); | 1199 | xen_mc_flush(); |
1200 | } | 1200 | } |
1201 | #endif | 1201 | static void __init xen_pagetable_p2m_copy(void) |
1202 | static void __init xen_pagetable_init(void) | ||
1203 | { | 1202 | { |
1204 | #ifdef CONFIG_X86_64 | ||
1205 | unsigned long size; | 1203 | unsigned long size; |
1206 | unsigned long addr; | 1204 | unsigned long addr; |
1207 | #endif | 1205 | unsigned long new_mfn_list; |
1208 | paging_init(); | 1206 | |
1209 | xen_setup_shared_info(); | 1207 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
1210 | #ifdef CONFIG_X86_64 | 1208 | return; |
1211 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | 1209 | |
1212 | unsigned long new_mfn_list; | 1210 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); |
1213 | 1211 | ||
1214 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | 1212 | new_mfn_list = xen_revector_p2m_tree(); |
1215 | 1213 | /* No memory or already called. */ | |
1216 | /* On 32-bit, we get zero so this never gets executed. */ | 1214 | if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list) |
1217 | new_mfn_list = xen_revector_p2m_tree(); | 1215 | return; |
1218 | if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) { | 1216 | |
1219 | /* using __ka address and sticking INVALID_P2M_ENTRY! */ | 1217 | /* using __ka address and sticking INVALID_P2M_ENTRY! */ |
1220 | memset((void *)xen_start_info->mfn_list, 0xff, size); | 1218 | memset((void *)xen_start_info->mfn_list, 0xff, size); |
1221 | 1219 | ||
1222 | /* We should be in __ka space. */ | 1220 | /* We should be in __ka space. */ |
1223 | BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); | 1221 | BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); |
1224 | addr = xen_start_info->mfn_list; | 1222 | addr = xen_start_info->mfn_list; |
1225 | /* We roundup to the PMD, which means that if anybody at this stage is | 1223 | /* We roundup to the PMD, which means that if anybody at this stage is |
1226 | * using the __ka address of xen_start_info or xen_start_info->shared_info | 1224 | * using the __ka address of xen_start_info or xen_start_info->shared_info |
1227 | * they are in going to crash. Fortunatly we have already revectored | 1225 | * they are in going to crash. Fortunatly we have already revectored |
1228 | * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */ | 1226 | * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */ |
1229 | size = roundup(size, PMD_SIZE); | 1227 | size = roundup(size, PMD_SIZE); |
1230 | xen_cleanhighmap(addr, addr + size); | 1228 | xen_cleanhighmap(addr, addr + size); |
1231 | 1229 | ||
1232 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | 1230 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); |
1233 | memblock_free(__pa(xen_start_info->mfn_list), size); | 1231 | memblock_free(__pa(xen_start_info->mfn_list), size); |
1234 | /* And revector! Bye bye old array */ | 1232 | /* And revector! Bye bye old array */ |
1235 | xen_start_info->mfn_list = new_mfn_list; | 1233 | xen_start_info->mfn_list = new_mfn_list; |
1236 | } else | 1234 | |
1237 | goto skip; | ||
1238 | } | ||
1239 | /* At this stage, cleanup_highmap has already cleaned __ka space | 1235 | /* At this stage, cleanup_highmap has already cleaned __ka space |
1240 | * from _brk_limit way up to the max_pfn_mapped (which is the end of | 1236 | * from _brk_limit way up to the max_pfn_mapped (which is the end of |
1241 | * the ramdisk). We continue on, erasing PMD entries that point to page | 1237 | * the ramdisk). We continue on, erasing PMD entries that point to page |
@@ -1255,7 +1251,15 @@ static void __init xen_pagetable_init(void) | |||
1255 | * anything at this stage. */ | 1251 | * anything at this stage. */ |
1256 | xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); | 1252 | xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); |
1257 | #endif | 1253 | #endif |
1258 | skip: | 1254 | } |
1255 | #endif | ||
1256 | |||
1257 | static void __init xen_pagetable_init(void) | ||
1258 | { | ||
1259 | paging_init(); | ||
1260 | xen_setup_shared_info(); | ||
1261 | #ifdef CONFIG_X86_64 | ||
1262 | xen_pagetable_p2m_copy(); | ||
1259 | #endif | 1263 | #endif |
1260 | xen_post_allocator_init(); | 1264 | xen_post_allocator_init(); |
1261 | } | 1265 | } |
@@ -1753,6 +1757,10 @@ static void set_page_prot_flags(void *addr, pgprot_t prot, unsigned long flags) | |||
1753 | unsigned long pfn = __pa(addr) >> PAGE_SHIFT; | 1757 | unsigned long pfn = __pa(addr) >> PAGE_SHIFT; |
1754 | pte_t pte = pfn_pte(pfn, prot); | 1758 | pte_t pte = pfn_pte(pfn, prot); |
1755 | 1759 | ||
1760 | /* For PVH no need to set R/O or R/W to pin them or unpin them. */ | ||
1761 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
1762 | return; | ||
1763 | |||
1756 | if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) | 1764 | if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) |
1757 | BUG(); | 1765 | BUG(); |
1758 | } | 1766 | } |
@@ -1863,6 +1871,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, | |||
1863 | * but that's enough to get __va working. We need to fill in the rest | 1871 | * but that's enough to get __va working. We need to fill in the rest |
1864 | * of the physical mapping once some sort of allocator has been set | 1872 | * of the physical mapping once some sort of allocator has been set |
1865 | * up. | 1873 | * up. |
1874 | * NOTE: for PVH, the page tables are native. | ||
1866 | */ | 1875 | */ |
1867 | void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | 1876 | void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) |
1868 | { | 1877 | { |
@@ -1884,17 +1893,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | |||
1884 | /* Zap identity mapping */ | 1893 | /* Zap identity mapping */ |
1885 | init_level4_pgt[0] = __pgd(0); | 1894 | init_level4_pgt[0] = __pgd(0); |
1886 | 1895 | ||
1887 | /* Pre-constructed entries are in pfn, so convert to mfn */ | 1896 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
1888 | /* L4[272] -> level3_ident_pgt | 1897 | /* Pre-constructed entries are in pfn, so convert to mfn */ |
1889 | * L4[511] -> level3_kernel_pgt */ | 1898 | /* L4[272] -> level3_ident_pgt |
1890 | convert_pfn_mfn(init_level4_pgt); | 1899 | * L4[511] -> level3_kernel_pgt */ |
1891 | 1900 | convert_pfn_mfn(init_level4_pgt); | |
1892 | /* L3_i[0] -> level2_ident_pgt */ | 1901 | |
1893 | convert_pfn_mfn(level3_ident_pgt); | 1902 | /* L3_i[0] -> level2_ident_pgt */ |
1894 | /* L3_k[510] -> level2_kernel_pgt | 1903 | convert_pfn_mfn(level3_ident_pgt); |
1895 | * L3_i[511] -> level2_fixmap_pgt */ | 1904 | /* L3_k[510] -> level2_kernel_pgt |
1896 | convert_pfn_mfn(level3_kernel_pgt); | 1905 | * L3_i[511] -> level2_fixmap_pgt */ |
1897 | 1906 | convert_pfn_mfn(level3_kernel_pgt); | |
1907 | } | ||
1898 | /* We get [511][511] and have Xen's version of level2_kernel_pgt */ | 1908 | /* We get [511][511] and have Xen's version of level2_kernel_pgt */ |
1899 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); | 1909 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); |
1900 | l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); | 1910 | l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); |
@@ -1918,31 +1928,33 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | |||
1918 | copy_page(level2_fixmap_pgt, l2); | 1928 | copy_page(level2_fixmap_pgt, l2); |
1919 | /* Note that we don't do anything with level1_fixmap_pgt which | 1929 | /* Note that we don't do anything with level1_fixmap_pgt which |
1920 | * we don't need. */ | 1930 | * we don't need. */ |
1931 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | ||
1932 | /* Make pagetable pieces RO */ | ||
1933 | set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); | ||
1934 | set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); | ||
1935 | set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); | ||
1936 | set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); | ||
1937 | set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); | ||
1938 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | ||
1939 | set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); | ||
1940 | |||
1941 | /* Pin down new L4 */ | ||
1942 | pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, | ||
1943 | PFN_DOWN(__pa_symbol(init_level4_pgt))); | ||
1944 | |||
1945 | /* Unpin Xen-provided one */ | ||
1946 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | ||
1921 | 1947 | ||
1922 | /* Make pagetable pieces RO */ | 1948 | /* |
1923 | set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); | 1949 | * At this stage there can be no user pgd, and no page |
1924 | set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); | 1950 | * structure to attach it to, so make sure we just set kernel |
1925 | set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); | 1951 | * pgd. |
1926 | set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); | 1952 | */ |
1927 | set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); | 1953 | xen_mc_batch(); |
1928 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | 1954 | __xen_write_cr3(true, __pa(init_level4_pgt)); |
1929 | set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); | 1955 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
1930 | 1956 | } else | |
1931 | /* Pin down new L4 */ | 1957 | native_write_cr3(__pa(init_level4_pgt)); |
1932 | pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, | ||
1933 | PFN_DOWN(__pa_symbol(init_level4_pgt))); | ||
1934 | |||
1935 | /* Unpin Xen-provided one */ | ||
1936 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | ||
1937 | |||
1938 | /* | ||
1939 | * At this stage there can be no user pgd, and no page | ||
1940 | * structure to attach it to, so make sure we just set kernel | ||
1941 | * pgd. | ||
1942 | */ | ||
1943 | xen_mc_batch(); | ||
1944 | __xen_write_cr3(true, __pa(init_level4_pgt)); | ||
1945 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
1946 | 1958 | ||
1947 | /* We can't that easily rip out L3 and L2, as the Xen pagetables are | 1959 | /* We can't that easily rip out L3 and L2, as the Xen pagetables are |
1948 | * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for | 1960 | * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for |
@@ -2103,6 +2115,9 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | |||
2103 | 2115 | ||
2104 | static void __init xen_post_allocator_init(void) | 2116 | static void __init xen_post_allocator_init(void) |
2105 | { | 2117 | { |
2118 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
2119 | return; | ||
2120 | |||
2106 | pv_mmu_ops.set_pte = xen_set_pte; | 2121 | pv_mmu_ops.set_pte = xen_set_pte; |
2107 | pv_mmu_ops.set_pmd = xen_set_pmd; | 2122 | pv_mmu_ops.set_pmd = xen_set_pmd; |
2108 | pv_mmu_ops.set_pud = xen_set_pud; | 2123 | pv_mmu_ops.set_pud = xen_set_pud; |
@@ -2207,6 +2222,15 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { | |||
2207 | void __init xen_init_mmu_ops(void) | 2222 | void __init xen_init_mmu_ops(void) |
2208 | { | 2223 | { |
2209 | x86_init.paging.pagetable_init = xen_pagetable_init; | 2224 | x86_init.paging.pagetable_init = xen_pagetable_init; |
2225 | |||
2226 | /* Optimization - we can use the HVM one but it has no idea which | ||
2227 | * VCPUs are descheduled - which means that it will needlessly IPI | ||
2228 | * them. Xen knows so let it do the job. | ||
2229 | */ | ||
2230 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | ||
2231 | pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others; | ||
2232 | return; | ||
2233 | } | ||
2210 | pv_mmu_ops = xen_mmu_ops; | 2234 | pv_mmu_ops = xen_mmu_ops; |
2211 | 2235 | ||
2212 | memset(dummy_mapping, 0xff, PAGE_SIZE); | 2236 | memset(dummy_mapping, 0xff, PAGE_SIZE); |