diff options
author | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2012-09-12 11:16:27 -0400 |
---|---|---|
committer | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2012-09-12 11:18:57 -0400 |
commit | 98104c3480e568d9c145adbc7dc56c9d4d170e30 (patch) | |
tree | 95fd6366645fe16b496032e23d57d70411998944 /arch/x86/xen | |
parent | 25a765b7f05cb8460fa01b54568894b20e184862 (diff) | |
parent | 328731876451a837f56e66ffa11de053ed5daf73 (diff) |
Merge branch 'stable/128gb.v5.1' into stable/for-linus-3.7
* stable/128gb.v5.1:
xen/mmu: If the revector fails, don't attempt to revector anything else.
xen/p2m: When revectoring deal with holes in the P2M array.
xen/mmu: Release just the MFN list, not MFN list and part of pagetables.
xen/mmu: Remove from __ka space PMD entries for pagetables.
xen/mmu: Copy and revector the P2M tree.
xen/p2m: Add logic to revector a P2M tree to use __va leafs.
xen/mmu: Recycle the Xen provided L4, L3, and L2 pages
xen/mmu: For 64-bit do not call xen_map_identity_early
xen/mmu: use copy_page instead of memcpy.
xen/mmu: Provide comments describing the _ka and _va aliasing issue
xen/mmu: The xen_setup_kernel_pagetable doesn't need to return anything.
Revert "xen/x86: Workaround 64-bit hypervisor and 32-bit initial domain." and "xen/x86: Use memblock_reserve for sensitive areas."
xen/x86: Workaround 64-bit hypervisor and 32-bit initial domain.
xen/x86: Use memblock_reserve for sensitive areas.
xen/p2m: Fix the comment describing the P2M tree.
Conflicts:
arch/x86/xen/mmu.c
The pagetable_init is the old xen_pagetable_setup_done and xen_pagetable_setup_start
rolled in one.
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Diffstat (limited to 'arch/x86/xen')
-rw-r--r-- | arch/x86/xen/enlighten.c | 5 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 183 | ||||
-rw-r--r-- | arch/x86/xen/p2m.c | 92 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 18 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 3 |
5 files changed, 254 insertions, 47 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 2766746de274..47b3acdc2ac5 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -1290,7 +1290,6 @@ asmlinkage void __init xen_start_kernel(void) | |||
1290 | { | 1290 | { |
1291 | struct physdev_set_iopl set_iopl; | 1291 | struct physdev_set_iopl set_iopl; |
1292 | int rc; | 1292 | int rc; |
1293 | pgd_t *pgd; | ||
1294 | 1293 | ||
1295 | if (!xen_start_info) | 1294 | if (!xen_start_info) |
1296 | return; | 1295 | return; |
@@ -1382,8 +1381,6 @@ asmlinkage void __init xen_start_kernel(void) | |||
1382 | acpi_numa = -1; | 1381 | acpi_numa = -1; |
1383 | #endif | 1382 | #endif |
1384 | 1383 | ||
1385 | pgd = (pgd_t *)xen_start_info->pt_base; | ||
1386 | |||
1387 | /* Don't do the full vcpu_info placement stuff until we have a | 1384 | /* Don't do the full vcpu_info placement stuff until we have a |
1388 | possible map and a non-dummy shared_info. */ | 1385 | possible map and a non-dummy shared_info. */ |
1389 | per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; | 1386 | per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; |
@@ -1392,7 +1389,7 @@ asmlinkage void __init xen_start_kernel(void) | |||
1392 | early_boot_irqs_disabled = true; | 1389 | early_boot_irqs_disabled = true; |
1393 | 1390 | ||
1394 | xen_raw_console_write("mapping kernel into physical memory\n"); | 1391 | xen_raw_console_write("mapping kernel into physical memory\n"); |
1395 | pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); | 1392 | xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages); |
1396 | 1393 | ||
1397 | /* Allocate and initialize top and mid mfn levels for p2m structure */ | 1394 | /* Allocate and initialize top and mid mfn levels for p2m structure */ |
1398 | xen_build_mfn_list_list(); | 1395 | xen_build_mfn_list_list(); |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index dfc900471aef..5a16824cc2b3 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -84,6 +84,7 @@ | |||
84 | */ | 84 | */ |
85 | DEFINE_SPINLOCK(xen_reservation_lock); | 85 | DEFINE_SPINLOCK(xen_reservation_lock); |
86 | 86 | ||
87 | #ifdef CONFIG_X86_32 | ||
87 | /* | 88 | /* |
88 | * Identity map, in addition to plain kernel map. This needs to be | 89 | * Identity map, in addition to plain kernel map. This needs to be |
89 | * large enough to allocate page table pages to allocate the rest. | 90 | * large enough to allocate page table pages to allocate the rest. |
@@ -91,7 +92,7 @@ DEFINE_SPINLOCK(xen_reservation_lock); | |||
91 | */ | 92 | */ |
92 | #define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4) | 93 | #define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4) |
93 | static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES); | 94 | static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES); |
94 | 95 | #endif | |
95 | #ifdef CONFIG_X86_64 | 96 | #ifdef CONFIG_X86_64 |
96 | /* l3 pud for userspace vsyscall mapping */ | 97 | /* l3 pud for userspace vsyscall mapping */ |
97 | static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; | 98 | static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; |
@@ -1176,13 +1177,6 @@ static void xen_exit_mmap(struct mm_struct *mm) | |||
1176 | 1177 | ||
1177 | static void xen_post_allocator_init(void); | 1178 | static void xen_post_allocator_init(void); |
1178 | 1179 | ||
1179 | static void __init xen_pagetable_init(void) | ||
1180 | { | ||
1181 | paging_init(); | ||
1182 | xen_setup_shared_info(); | ||
1183 | xen_post_allocator_init(); | ||
1184 | } | ||
1185 | |||
1186 | static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) | 1180 | static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) |
1187 | { | 1181 | { |
1188 | /* reserve the range used */ | 1182 | /* reserve the range used */ |
@@ -1197,6 +1191,87 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) | |||
1197 | } | 1191 | } |
1198 | } | 1192 | } |
1199 | 1193 | ||
1194 | #ifdef CONFIG_X86_64 | ||
1195 | static void __init xen_cleanhighmap(unsigned long vaddr, | ||
1196 | unsigned long vaddr_end) | ||
1197 | { | ||
1198 | unsigned long kernel_end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; | ||
1199 | pmd_t *pmd = level2_kernel_pgt + pmd_index(vaddr); | ||
1200 | |||
1201 | /* NOTE: The loop is more greedy than the cleanup_highmap variant. | ||
1202 | * We include the PMD passed in on _both_ boundaries. */ | ||
1203 | for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE)); | ||
1204 | pmd++, vaddr += PMD_SIZE) { | ||
1205 | if (pmd_none(*pmd)) | ||
1206 | continue; | ||
1207 | if (vaddr < (unsigned long) _text || vaddr > kernel_end) | ||
1208 | set_pmd(pmd, __pmd(0)); | ||
1209 | } | ||
1210 | /* In case we did something silly, we should crash in this function | ||
1211 | * instead of somewhere later and be confusing. */ | ||
1212 | xen_mc_flush(); | ||
1213 | } | ||
1214 | #endif | ||
1215 | static void __init xen_pagetable_init(void) | ||
1216 | { | ||
1217 | #ifdef CONFIG_X86_64 | ||
1218 | unsigned long size; | ||
1219 | unsigned long addr; | ||
1220 | #endif | ||
1221 | paging_init(); | ||
1222 | xen_setup_shared_info(); | ||
1223 | #ifdef CONFIG_X86_64 | ||
1224 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | ||
1225 | unsigned long new_mfn_list; | ||
1226 | |||
1227 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | ||
1228 | |||
1229 | /* On 32-bit, we get zero so this never gets executed. */ | ||
1230 | new_mfn_list = xen_revector_p2m_tree(); | ||
1231 | if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) { | ||
1232 | /* using __ka address and sticking INVALID_P2M_ENTRY! */ | ||
1233 | memset((void *)xen_start_info->mfn_list, 0xff, size); | ||
1234 | |||
1235 | /* We should be in __ka space. */ | ||
1236 | BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); | ||
1237 | addr = xen_start_info->mfn_list; | ||
1238 | /* We roundup to the PMD, which means that if anybody at this stage is | ||
1239 | * using the __ka address of xen_start_info or xen_start_info->shared_info | ||
1240 | * they are in going to crash. Fortunatly we have already revectored | ||
1241 | * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */ | ||
1242 | size = roundup(size, PMD_SIZE); | ||
1243 | xen_cleanhighmap(addr, addr + size); | ||
1244 | |||
1245 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | ||
1246 | memblock_free(__pa(xen_start_info->mfn_list), size); | ||
1247 | /* And revector! Bye bye old array */ | ||
1248 | xen_start_info->mfn_list = new_mfn_list; | ||
1249 | } else | ||
1250 | goto skip; | ||
1251 | } | ||
1252 | /* At this stage, cleanup_highmap has already cleaned __ka space | ||
1253 | * from _brk_limit way up to the max_pfn_mapped (which is the end of | ||
1254 | * the ramdisk). We continue on, erasing PMD entries that point to page | ||
1255 | * tables - do note that they are accessible at this stage via __va. | ||
1256 | * For good measure we also round up to the PMD - which means that if | ||
1257 | * anybody is using __ka address to the initial boot-stack - and try | ||
1258 | * to use it - they are going to crash. The xen_start_info has been | ||
1259 | * taken care of already in xen_setup_kernel_pagetable. */ | ||
1260 | addr = xen_start_info->pt_base; | ||
1261 | size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE); | ||
1262 | |||
1263 | xen_cleanhighmap(addr, addr + size); | ||
1264 | xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base)); | ||
1265 | #ifdef DEBUG | ||
1266 | /* This is superflous and is not neccessary, but you know what | ||
1267 | * lets do it. The MODULES_VADDR -> MODULES_END should be clear of | ||
1268 | * anything at this stage. */ | ||
1269 | xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); | ||
1270 | #endif | ||
1271 | skip: | ||
1272 | #endif | ||
1273 | xen_post_allocator_init(); | ||
1274 | } | ||
1200 | static void xen_write_cr2(unsigned long cr2) | 1275 | static void xen_write_cr2(unsigned long cr2) |
1201 | { | 1276 | { |
1202 | this_cpu_read(xen_vcpu)->arch.cr2 = cr2; | 1277 | this_cpu_read(xen_vcpu)->arch.cr2 = cr2; |
@@ -1652,7 +1727,7 @@ static void set_page_prot(void *addr, pgprot_t prot) | |||
1652 | if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) | 1727 | if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) |
1653 | BUG(); | 1728 | BUG(); |
1654 | } | 1729 | } |
1655 | 1730 | #ifdef CONFIG_X86_32 | |
1656 | static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | 1731 | static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) |
1657 | { | 1732 | { |
1658 | unsigned pmdidx, pteidx; | 1733 | unsigned pmdidx, pteidx; |
@@ -1703,7 +1778,7 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | |||
1703 | 1778 | ||
1704 | set_page_prot(pmd, PAGE_KERNEL_RO); | 1779 | set_page_prot(pmd, PAGE_KERNEL_RO); |
1705 | } | 1780 | } |
1706 | 1781 | #endif | |
1707 | void __init xen_setup_machphys_mapping(void) | 1782 | void __init xen_setup_machphys_mapping(void) |
1708 | { | 1783 | { |
1709 | struct xen_machphys_mapping mapping; | 1784 | struct xen_machphys_mapping mapping; |
@@ -1731,7 +1806,20 @@ static void convert_pfn_mfn(void *v) | |||
1731 | for (i = 0; i < PTRS_PER_PTE; i++) | 1806 | for (i = 0; i < PTRS_PER_PTE; i++) |
1732 | pte[i] = xen_make_pte(pte[i].pte); | 1807 | pte[i] = xen_make_pte(pte[i].pte); |
1733 | } | 1808 | } |
1734 | 1809 | static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, | |
1810 | unsigned long addr) | ||
1811 | { | ||
1812 | if (*pt_base == PFN_DOWN(__pa(addr))) { | ||
1813 | set_page_prot((void *)addr, PAGE_KERNEL); | ||
1814 | clear_page((void *)addr); | ||
1815 | (*pt_base)++; | ||
1816 | } | ||
1817 | if (*pt_end == PFN_DOWN(__pa(addr))) { | ||
1818 | set_page_prot((void *)addr, PAGE_KERNEL); | ||
1819 | clear_page((void *)addr); | ||
1820 | (*pt_end)--; | ||
1821 | } | ||
1822 | } | ||
1735 | /* | 1823 | /* |
1736 | * Set up the initial kernel pagetable. | 1824 | * Set up the initial kernel pagetable. |
1737 | * | 1825 | * |
@@ -1743,11 +1831,13 @@ static void convert_pfn_mfn(void *v) | |||
1743 | * of the physical mapping once some sort of allocator has been set | 1831 | * of the physical mapping once some sort of allocator has been set |
1744 | * up. | 1832 | * up. |
1745 | */ | 1833 | */ |
1746 | pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, | 1834 | void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) |
1747 | unsigned long max_pfn) | ||
1748 | { | 1835 | { |
1749 | pud_t *l3; | 1836 | pud_t *l3; |
1750 | pmd_t *l2; | 1837 | pmd_t *l2; |
1838 | unsigned long addr[3]; | ||
1839 | unsigned long pt_base, pt_end; | ||
1840 | unsigned i; | ||
1751 | 1841 | ||
1752 | /* max_pfn_mapped is the last pfn mapped in the initial memory | 1842 | /* max_pfn_mapped is the last pfn mapped in the initial memory |
1753 | * mappings. Considering that on Xen after the kernel mappings we | 1843 | * mappings. Considering that on Xen after the kernel mappings we |
@@ -1755,32 +1845,53 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, | |||
1755 | * set max_pfn_mapped to the last real pfn mapped. */ | 1845 | * set max_pfn_mapped to the last real pfn mapped. */ |
1756 | max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); | 1846 | max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); |
1757 | 1847 | ||
1848 | pt_base = PFN_DOWN(__pa(xen_start_info->pt_base)); | ||
1849 | pt_end = pt_base + xen_start_info->nr_pt_frames; | ||
1850 | |||
1758 | /* Zap identity mapping */ | 1851 | /* Zap identity mapping */ |
1759 | init_level4_pgt[0] = __pgd(0); | 1852 | init_level4_pgt[0] = __pgd(0); |
1760 | 1853 | ||
1761 | /* Pre-constructed entries are in pfn, so convert to mfn */ | 1854 | /* Pre-constructed entries are in pfn, so convert to mfn */ |
1855 | /* L4[272] -> level3_ident_pgt | ||
1856 | * L4[511] -> level3_kernel_pgt */ | ||
1762 | convert_pfn_mfn(init_level4_pgt); | 1857 | convert_pfn_mfn(init_level4_pgt); |
1858 | |||
1859 | /* L3_i[0] -> level2_ident_pgt */ | ||
1763 | convert_pfn_mfn(level3_ident_pgt); | 1860 | convert_pfn_mfn(level3_ident_pgt); |
1861 | /* L3_k[510] -> level2_kernel_pgt | ||
1862 | * L3_i[511] -> level2_fixmap_pgt */ | ||
1764 | convert_pfn_mfn(level3_kernel_pgt); | 1863 | convert_pfn_mfn(level3_kernel_pgt); |
1765 | 1864 | ||
1865 | /* We get [511][511] and have Xen's version of level2_kernel_pgt */ | ||
1766 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); | 1866 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); |
1767 | l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); | 1867 | l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); |
1768 | 1868 | ||
1769 | memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); | 1869 | addr[0] = (unsigned long)pgd; |
1770 | memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); | 1870 | addr[1] = (unsigned long)l3; |
1771 | 1871 | addr[2] = (unsigned long)l2; | |
1872 | /* Graft it onto L4[272][0]. Note that we creating an aliasing problem: | ||
1873 | * Both L4[272][0] and L4[511][511] have entries that point to the same | ||
1874 | * L2 (PMD) tables. Meaning that if you modify it in __va space | ||
1875 | * it will be also modified in the __ka space! (But if you just | ||
1876 | * modify the PMD table to point to other PTE's or none, then you | ||
1877 | * are OK - which is what cleanup_highmap does) */ | ||
1878 | copy_page(level2_ident_pgt, l2); | ||
1879 | /* Graft it onto L4[511][511] */ | ||
1880 | copy_page(level2_kernel_pgt, l2); | ||
1881 | |||
1882 | /* Get [511][510] and graft that in level2_fixmap_pgt */ | ||
1772 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); | 1883 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); |
1773 | l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); | 1884 | l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); |
1774 | memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); | 1885 | copy_page(level2_fixmap_pgt, l2); |
1775 | 1886 | /* Note that we don't do anything with level1_fixmap_pgt which | |
1776 | /* Set up identity map */ | 1887 | * we don't need. */ |
1777 | xen_map_identity_early(level2_ident_pgt, max_pfn); | ||
1778 | 1888 | ||
1779 | /* Make pagetable pieces RO */ | 1889 | /* Make pagetable pieces RO */ |
1780 | set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); | 1890 | set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); |
1781 | set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); | 1891 | set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); |
1782 | set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); | 1892 | set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); |
1783 | set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); | 1893 | set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); |
1894 | set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); | ||
1784 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | 1895 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); |
1785 | set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); | 1896 | set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); |
1786 | 1897 | ||
@@ -1791,22 +1902,28 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, | |||
1791 | /* Unpin Xen-provided one */ | 1902 | /* Unpin Xen-provided one */ |
1792 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | 1903 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); |
1793 | 1904 | ||
1794 | /* Switch over */ | ||
1795 | pgd = init_level4_pgt; | ||
1796 | |||
1797 | /* | 1905 | /* |
1798 | * At this stage there can be no user pgd, and no page | 1906 | * At this stage there can be no user pgd, and no page |
1799 | * structure to attach it to, so make sure we just set kernel | 1907 | * structure to attach it to, so make sure we just set kernel |
1800 | * pgd. | 1908 | * pgd. |
1801 | */ | 1909 | */ |
1802 | xen_mc_batch(); | 1910 | xen_mc_batch(); |
1803 | __xen_write_cr3(true, __pa(pgd)); | 1911 | __xen_write_cr3(true, __pa(init_level4_pgt)); |
1804 | xen_mc_issue(PARAVIRT_LAZY_CPU); | 1912 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
1805 | 1913 | ||
1806 | memblock_reserve(__pa(xen_start_info->pt_base), | 1914 | /* We can't that easily rip out L3 and L2, as the Xen pagetables are |
1807 | xen_start_info->nr_pt_frames * PAGE_SIZE); | 1915 | * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for |
1916 | * the initial domain. For guests using the toolstack, they are in: | ||
1917 | * [L4], [L3], [L2], [L1], [L1], order .. So for dom0 we can only | ||
1918 | * rip out the [L4] (pgd), but for guests we shave off three pages. | ||
1919 | */ | ||
1920 | for (i = 0; i < ARRAY_SIZE(addr); i++) | ||
1921 | check_pt_base(&pt_base, &pt_end, addr[i]); | ||
1808 | 1922 | ||
1809 | return pgd; | 1923 | /* Our (by three pages) smaller Xen pagetable that we are using */ |
1924 | memblock_reserve(PFN_PHYS(pt_base), (pt_end - pt_base) * PAGE_SIZE); | ||
1925 | /* Revector the xen_start_info */ | ||
1926 | xen_start_info = (struct start_info *)__va(__pa(xen_start_info)); | ||
1810 | } | 1927 | } |
1811 | #else /* !CONFIG_X86_64 */ | 1928 | #else /* !CONFIG_X86_64 */ |
1812 | static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); | 1929 | static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); |
@@ -1831,8 +1948,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) | |||
1831 | */ | 1948 | */ |
1832 | swapper_kernel_pmd = | 1949 | swapper_kernel_pmd = |
1833 | extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); | 1950 | extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); |
1834 | memcpy(swapper_kernel_pmd, initial_kernel_pmd, | 1951 | copy_page(swapper_kernel_pmd, initial_kernel_pmd); |
1835 | sizeof(pmd_t) * PTRS_PER_PMD); | ||
1836 | swapper_pg_dir[KERNEL_PGD_BOUNDARY] = | 1952 | swapper_pg_dir[KERNEL_PGD_BOUNDARY] = |
1837 | __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); | 1953 | __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); |
1838 | set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); | 1954 | set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); |
@@ -1849,8 +1965,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) | |||
1849 | pv_mmu_ops.write_cr3 = &xen_write_cr3; | 1965 | pv_mmu_ops.write_cr3 = &xen_write_cr3; |
1850 | } | 1966 | } |
1851 | 1967 | ||
1852 | pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, | 1968 | void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) |
1853 | unsigned long max_pfn) | ||
1854 | { | 1969 | { |
1855 | pmd_t *kernel_pmd; | 1970 | pmd_t *kernel_pmd; |
1856 | 1971 | ||
@@ -1862,11 +1977,11 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, | |||
1862 | 512*1024); | 1977 | 512*1024); |
1863 | 1978 | ||
1864 | kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); | 1979 | kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); |
1865 | memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); | 1980 | copy_page(initial_kernel_pmd, kernel_pmd); |
1866 | 1981 | ||
1867 | xen_map_identity_early(initial_kernel_pmd, max_pfn); | 1982 | xen_map_identity_early(initial_kernel_pmd, max_pfn); |
1868 | 1983 | ||
1869 | memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD); | 1984 | copy_page(initial_page_table, pgd); |
1870 | initial_page_table[KERNEL_PGD_BOUNDARY] = | 1985 | initial_page_table[KERNEL_PGD_BOUNDARY] = |
1871 | __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); | 1986 | __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); |
1872 | 1987 | ||
@@ -1882,8 +1997,6 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, | |||
1882 | 1997 | ||
1883 | memblock_reserve(__pa(xen_start_info->pt_base), | 1998 | memblock_reserve(__pa(xen_start_info->pt_base), |
1884 | xen_start_info->nr_pt_frames * PAGE_SIZE); | 1999 | xen_start_info->nr_pt_frames * PAGE_SIZE); |
1885 | |||
1886 | return initial_page_table; | ||
1887 | } | 2000 | } |
1888 | #endif /* CONFIG_X86_64 */ | 2001 | #endif /* CONFIG_X86_64 */ |
1889 | 2002 | ||
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 76ba0e97e530..b5e4d302a067 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
@@ -22,7 +22,7 @@ | |||
22 | * | 22 | * |
23 | * P2M_PER_PAGE depends on the architecture, as a mfn is always | 23 | * P2M_PER_PAGE depends on the architecture, as a mfn is always |
24 | * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to | 24 | * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to |
25 | * 512 and 1024 entries respectively. | 25 | * 512 and 1024 entries respectively. |
26 | * | 26 | * |
27 | * In short, these structures contain the Machine Frame Number (MFN) of the PFN. | 27 | * In short, these structures contain the Machine Frame Number (MFN) of the PFN. |
28 | * | 28 | * |
@@ -139,11 +139,11 @@ | |||
139 | * / | ~0, ~0, .... | | 139 | * / | ~0, ~0, .... | |
140 | * | \---------------/ | 140 | * | \---------------/ |
141 | * | | 141 | * | |
142 | * p2m_missing p2m_missing | 142 | * p2m_mid_missing p2m_missing |
143 | * /------------------\ /------------\ | 143 | * /-----------------\ /------------\ |
144 | * | [p2m_mid_missing]+---->| ~0, ~0, ~0 | | 144 | * | [p2m_missing] +---->| ~0, ~0, ~0 | |
145 | * | [p2m_mid_missing]+---->| ..., ~0 | | 145 | * | [p2m_missing] +---->| ..., ~0 | |
146 | * \------------------/ \------------/ | 146 | * \-----------------/ \------------/ |
147 | * | 147 | * |
148 | * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) | 148 | * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) |
149 | */ | 149 | */ |
@@ -396,7 +396,85 @@ void __init xen_build_dynamic_phys_to_machine(void) | |||
396 | 396 | ||
397 | m2p_override_init(); | 397 | m2p_override_init(); |
398 | } | 398 | } |
399 | #ifdef CONFIG_X86_64 | ||
400 | #include <linux/bootmem.h> | ||
401 | unsigned long __init xen_revector_p2m_tree(void) | ||
402 | { | ||
403 | unsigned long va_start; | ||
404 | unsigned long va_end; | ||
405 | unsigned long pfn; | ||
406 | unsigned long pfn_free = 0; | ||
407 | unsigned long *mfn_list = NULL; | ||
408 | unsigned long size; | ||
409 | |||
410 | va_start = xen_start_info->mfn_list; | ||
411 | /*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long), | ||
412 | * so make sure it is rounded up to that */ | ||
413 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | ||
414 | va_end = va_start + size; | ||
415 | |||
416 | /* If we were revectored already, don't do it again. */ | ||
417 | if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET) | ||
418 | return 0; | ||
419 | |||
420 | mfn_list = alloc_bootmem_align(size, PAGE_SIZE); | ||
421 | if (!mfn_list) { | ||
422 | pr_warn("Could not allocate space for a new P2M tree!\n"); | ||
423 | return xen_start_info->mfn_list; | ||
424 | } | ||
425 | /* Fill it out with INVALID_P2M_ENTRY value */ | ||
426 | memset(mfn_list, 0xFF, size); | ||
427 | |||
428 | for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) { | ||
429 | unsigned topidx = p2m_top_index(pfn); | ||
430 | unsigned mididx; | ||
431 | unsigned long *mid_p; | ||
432 | |||
433 | if (!p2m_top[topidx]) | ||
434 | continue; | ||
435 | |||
436 | if (p2m_top[topidx] == p2m_mid_missing) | ||
437 | continue; | ||
438 | |||
439 | mididx = p2m_mid_index(pfn); | ||
440 | mid_p = p2m_top[topidx][mididx]; | ||
441 | if (!mid_p) | ||
442 | continue; | ||
443 | if ((mid_p == p2m_missing) || (mid_p == p2m_identity)) | ||
444 | continue; | ||
445 | |||
446 | if ((unsigned long)mid_p == INVALID_P2M_ENTRY) | ||
447 | continue; | ||
448 | |||
449 | /* The old va. Rebase it on mfn_list */ | ||
450 | if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) { | ||
451 | unsigned long *new; | ||
452 | |||
453 | if (pfn_free > (size / sizeof(unsigned long))) { | ||
454 | WARN(1, "Only allocated for %ld pages, but we want %ld!\n", | ||
455 | size / sizeof(unsigned long), pfn_free); | ||
456 | return 0; | ||
457 | } | ||
458 | new = &mfn_list[pfn_free]; | ||
459 | |||
460 | copy_page(new, mid_p); | ||
461 | p2m_top[topidx][mididx] = &mfn_list[pfn_free]; | ||
462 | p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn_free]); | ||
463 | |||
464 | pfn_free += P2M_PER_PAGE; | ||
399 | 465 | ||
466 | } | ||
467 | /* This should be the leafs allocated for identity from _brk. */ | ||
468 | } | ||
469 | return (unsigned long)mfn_list; | ||
470 | |||
471 | } | ||
472 | #else | ||
473 | unsigned long __init xen_revector_p2m_tree(void) | ||
474 | { | ||
475 | return 0; | ||
476 | } | ||
477 | #endif | ||
400 | unsigned long get_phys_to_machine(unsigned long pfn) | 478 | unsigned long get_phys_to_machine(unsigned long pfn) |
401 | { | 479 | { |
402 | unsigned topidx, mididx, idx; | 480 | unsigned topidx, mididx, idx; |
@@ -430,7 +508,7 @@ static void free_p2m_page(void *p) | |||
430 | free_page((unsigned long)p); | 508 | free_page((unsigned long)p); |
431 | } | 509 | } |
432 | 510 | ||
433 | /* | 511 | /* |
434 | * Fully allocate the p2m structure for a given pfn. We need to check | 512 | * Fully allocate the p2m structure for a given pfn. We need to check |
435 | * that both the top and mid levels are allocated, and make sure the | 513 | * that both the top and mid levels are allocated, and make sure the |
436 | * parallel mfn tree is kept in sync. We may race with other cpus, so | 514 | * parallel mfn tree is kept in sync. We may race with other cpus, so |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index d11ca11d14fc..3edb320d508f 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -431,6 +431,24 @@ char * __init xen_memory_setup(void) | |||
431 | * - mfn_list | 431 | * - mfn_list |
432 | * - xen_start_info | 432 | * - xen_start_info |
433 | * See comment above "struct start_info" in <xen/interface/xen.h> | 433 | * See comment above "struct start_info" in <xen/interface/xen.h> |
434 | * We tried to make the the memblock_reserve more selective so | ||
435 | * that it would be clear what region is reserved. Sadly we ran | ||
436 | * in the problem wherein on a 64-bit hypervisor with a 32-bit | ||
437 | * initial domain, the pt_base has the cr3 value which is not | ||
438 | * neccessarily where the pagetable starts! As Jan put it: " | ||
439 | * Actually, the adjustment turns out to be correct: The page | ||
440 | * tables for a 32-on-64 dom0 get allocated in the order "first L1", | ||
441 | * "first L2", "first L3", so the offset to the page table base is | ||
442 | * indeed 2. When reading xen/include/public/xen.h's comment | ||
443 | * very strictly, this is not a violation (since there nothing is said | ||
444 | * that the first thing in the page table space is pointed to by | ||
445 | * pt_base; I admit that this seems to be implied though, namely | ||
446 | * do I think that it is implied that the page table space is the | ||
447 | * range [pt_base, pt_base + nt_pt_frames), whereas that | ||
448 | * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames), | ||
449 | * which - without a priori knowledge - the kernel would have | ||
450 | * difficulty to figure out)." - so lets just fall back to the | ||
451 | * easy way and reserve the whole region. | ||
434 | */ | 452 | */ |
435 | memblock_reserve(__pa(xen_start_info->mfn_list), | 453 | memblock_reserve(__pa(xen_start_info->mfn_list), |
436 | xen_start_info->pt_base - xen_start_info->mfn_list); | 454 | xen_start_info->pt_base - xen_start_info->mfn_list); |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 202d4c150154..bb5a8105ea86 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -27,7 +27,7 @@ void xen_setup_mfn_list_list(void); | |||
27 | void xen_setup_shared_info(void); | 27 | void xen_setup_shared_info(void); |
28 | void xen_build_mfn_list_list(void); | 28 | void xen_build_mfn_list_list(void); |
29 | void xen_setup_machphys_mapping(void); | 29 | void xen_setup_machphys_mapping(void); |
30 | pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); | 30 | void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); |
31 | void xen_reserve_top(void); | 31 | void xen_reserve_top(void); |
32 | extern unsigned long xen_max_p2m_pfn; | 32 | extern unsigned long xen_max_p2m_pfn; |
33 | 33 | ||
@@ -45,6 +45,7 @@ void xen_hvm_init_shared_info(void); | |||
45 | void xen_unplug_emulated_devices(void); | 45 | void xen_unplug_emulated_devices(void); |
46 | 46 | ||
47 | void __init xen_build_dynamic_phys_to_machine(void); | 47 | void __init xen_build_dynamic_phys_to_machine(void); |
48 | unsigned long __init xen_revector_p2m_tree(void); | ||
48 | 49 | ||
49 | void xen_init_irq_ops(void); | 50 | void xen_init_irq_ops(void); |
50 | void xen_setup_timer(int cpu); | 51 | void xen_setup_timer(int cpu); |