aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen
diff options
context:
space:
mode:
authorKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2012-09-12 11:16:27 -0400
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2012-09-12 11:18:57 -0400
commit98104c3480e568d9c145adbc7dc56c9d4d170e30 (patch)
tree95fd6366645fe16b496032e23d57d70411998944 /arch/x86/xen
parent25a765b7f05cb8460fa01b54568894b20e184862 (diff)
parent328731876451a837f56e66ffa11de053ed5daf73 (diff)
Merge branch 'stable/128gb.v5.1' into stable/for-linus-3.7
* stable/128gb.v5.1: xen/mmu: If the revector fails, don't attempt to revector anything else. xen/p2m: When revectoring deal with holes in the P2M array. xen/mmu: Release just the MFN list, not MFN list and part of pagetables. xen/mmu: Remove from __ka space PMD entries for pagetables. xen/mmu: Copy and revector the P2M tree. xen/p2m: Add logic to revector a P2M tree to use __va leafs. xen/mmu: Recycle the Xen provided L4, L3, and L2 pages xen/mmu: For 64-bit do not call xen_map_identity_early xen/mmu: use copy_page instead of memcpy. xen/mmu: Provide comments describing the _ka and _va aliasing issue xen/mmu: The xen_setup_kernel_pagetable doesn't need to return anything. Revert "xen/x86: Workaround 64-bit hypervisor and 32-bit initial domain." and "xen/x86: Use memblock_reserve for sensitive areas." xen/x86: Workaround 64-bit hypervisor and 32-bit initial domain. xen/x86: Use memblock_reserve for sensitive areas. xen/p2m: Fix the comment describing the P2M tree. Conflicts: arch/x86/xen/mmu.c The pagetable_init is the old xen_pagetable_setup_done and xen_pagetable_setup_start rolled in one. Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Diffstat (limited to 'arch/x86/xen')
-rw-r--r--arch/x86/xen/enlighten.c5
-rw-r--r--arch/x86/xen/mmu.c183
-rw-r--r--arch/x86/xen/p2m.c92
-rw-r--r--arch/x86/xen/setup.c18
-rw-r--r--arch/x86/xen/xen-ops.h3
5 files changed, 254 insertions, 47 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 2766746de274..47b3acdc2ac5 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1290,7 +1290,6 @@ asmlinkage void __init xen_start_kernel(void)
1290{ 1290{
1291 struct physdev_set_iopl set_iopl; 1291 struct physdev_set_iopl set_iopl;
1292 int rc; 1292 int rc;
1293 pgd_t *pgd;
1294 1293
1295 if (!xen_start_info) 1294 if (!xen_start_info)
1296 return; 1295 return;
@@ -1382,8 +1381,6 @@ asmlinkage void __init xen_start_kernel(void)
1382 acpi_numa = -1; 1381 acpi_numa = -1;
1383#endif 1382#endif
1384 1383
1385 pgd = (pgd_t *)xen_start_info->pt_base;
1386
1387 /* Don't do the full vcpu_info placement stuff until we have a 1384 /* Don't do the full vcpu_info placement stuff until we have a
1388 possible map and a non-dummy shared_info. */ 1385 possible map and a non-dummy shared_info. */
1389 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; 1386 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
@@ -1392,7 +1389,7 @@ asmlinkage void __init xen_start_kernel(void)
1392 early_boot_irqs_disabled = true; 1389 early_boot_irqs_disabled = true;
1393 1390
1394 xen_raw_console_write("mapping kernel into physical memory\n"); 1391 xen_raw_console_write("mapping kernel into physical memory\n");
1395 pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); 1392 xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages);
1396 1393
1397 /* Allocate and initialize top and mid mfn levels for p2m structure */ 1394 /* Allocate and initialize top and mid mfn levels for p2m structure */
1398 xen_build_mfn_list_list(); 1395 xen_build_mfn_list_list();
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index dfc900471aef..5a16824cc2b3 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -84,6 +84,7 @@
84 */ 84 */
85DEFINE_SPINLOCK(xen_reservation_lock); 85DEFINE_SPINLOCK(xen_reservation_lock);
86 86
87#ifdef CONFIG_X86_32
87/* 88/*
88 * Identity map, in addition to plain kernel map. This needs to be 89 * Identity map, in addition to plain kernel map. This needs to be
89 * large enough to allocate page table pages to allocate the rest. 90 * large enough to allocate page table pages to allocate the rest.
@@ -91,7 +92,7 @@ DEFINE_SPINLOCK(xen_reservation_lock);
91 */ 92 */
92#define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4) 93#define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4)
93static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES); 94static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES);
94 95#endif
95#ifdef CONFIG_X86_64 96#ifdef CONFIG_X86_64
96/* l3 pud for userspace vsyscall mapping */ 97/* l3 pud for userspace vsyscall mapping */
97static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; 98static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
@@ -1176,13 +1177,6 @@ static void xen_exit_mmap(struct mm_struct *mm)
1176 1177
1177static void xen_post_allocator_init(void); 1178static void xen_post_allocator_init(void);
1178 1179
1179static void __init xen_pagetable_init(void)
1180{
1181 paging_init();
1182 xen_setup_shared_info();
1183 xen_post_allocator_init();
1184}
1185
1186static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) 1180static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
1187{ 1181{
1188 /* reserve the range used */ 1182 /* reserve the range used */
@@ -1197,6 +1191,87 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
1197 } 1191 }
1198} 1192}
1199 1193
1194#ifdef CONFIG_X86_64
1195static void __init xen_cleanhighmap(unsigned long vaddr,
1196 unsigned long vaddr_end)
1197{
1198 unsigned long kernel_end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
1199 pmd_t *pmd = level2_kernel_pgt + pmd_index(vaddr);
1200
1201 /* NOTE: The loop is more greedy than the cleanup_highmap variant.
1202 * We include the PMD passed in on _both_ boundaries. */
1203 for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE));
1204 pmd++, vaddr += PMD_SIZE) {
1205 if (pmd_none(*pmd))
1206 continue;
1207 if (vaddr < (unsigned long) _text || vaddr > kernel_end)
1208 set_pmd(pmd, __pmd(0));
1209 }
1210 /* In case we did something silly, we should crash in this function
1211 * instead of somewhere later and be confusing. */
1212 xen_mc_flush();
1213}
1214#endif
1215static void __init xen_pagetable_init(void)
1216{
1217#ifdef CONFIG_X86_64
1218 unsigned long size;
1219 unsigned long addr;
1220#endif
1221 paging_init();
1222 xen_setup_shared_info();
1223#ifdef CONFIG_X86_64
1224 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1225 unsigned long new_mfn_list;
1226
1227 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1228
1229 /* On 32-bit, we get zero so this never gets executed. */
1230 new_mfn_list = xen_revector_p2m_tree();
1231 if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) {
1232 /* using __ka address and sticking INVALID_P2M_ENTRY! */
1233 memset((void *)xen_start_info->mfn_list, 0xff, size);
1234
1235 /* We should be in __ka space. */
1236 BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map);
1237 addr = xen_start_info->mfn_list;
1238 /* We roundup to the PMD, which means that if anybody at this stage is
1239 * using the __ka address of xen_start_info or xen_start_info->shared_info
1240 * they are in going to crash. Fortunatly we have already revectored
1241 * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */
1242 size = roundup(size, PMD_SIZE);
1243 xen_cleanhighmap(addr, addr + size);
1244
1245 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1246 memblock_free(__pa(xen_start_info->mfn_list), size);
1247 /* And revector! Bye bye old array */
1248 xen_start_info->mfn_list = new_mfn_list;
1249 } else
1250 goto skip;
1251 }
1252 /* At this stage, cleanup_highmap has already cleaned __ka space
1253 * from _brk_limit way up to the max_pfn_mapped (which is the end of
1254 * the ramdisk). We continue on, erasing PMD entries that point to page
1255 * tables - do note that they are accessible at this stage via __va.
1256 * For good measure we also round up to the PMD - which means that if
1257 * anybody is using __ka address to the initial boot-stack - and try
1258 * to use it - they are going to crash. The xen_start_info has been
1259 * taken care of already in xen_setup_kernel_pagetable. */
1260 addr = xen_start_info->pt_base;
1261 size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE);
1262
1263 xen_cleanhighmap(addr, addr + size);
1264 xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
1265#ifdef DEBUG
1266 /* This is superflous and is not neccessary, but you know what
1267 * lets do it. The MODULES_VADDR -> MODULES_END should be clear of
1268 * anything at this stage. */
1269 xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
1270#endif
1271skip:
1272#endif
1273 xen_post_allocator_init();
1274}
1200static void xen_write_cr2(unsigned long cr2) 1275static void xen_write_cr2(unsigned long cr2)
1201{ 1276{
1202 this_cpu_read(xen_vcpu)->arch.cr2 = cr2; 1277 this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
@@ -1652,7 +1727,7 @@ static void set_page_prot(void *addr, pgprot_t prot)
1652 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) 1727 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
1653 BUG(); 1728 BUG();
1654} 1729}
1655 1730#ifdef CONFIG_X86_32
1656static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) 1731static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1657{ 1732{
1658 unsigned pmdidx, pteidx; 1733 unsigned pmdidx, pteidx;
@@ -1703,7 +1778,7 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1703 1778
1704 set_page_prot(pmd, PAGE_KERNEL_RO); 1779 set_page_prot(pmd, PAGE_KERNEL_RO);
1705} 1780}
1706 1781#endif
1707void __init xen_setup_machphys_mapping(void) 1782void __init xen_setup_machphys_mapping(void)
1708{ 1783{
1709 struct xen_machphys_mapping mapping; 1784 struct xen_machphys_mapping mapping;
@@ -1731,7 +1806,20 @@ static void convert_pfn_mfn(void *v)
1731 for (i = 0; i < PTRS_PER_PTE; i++) 1806 for (i = 0; i < PTRS_PER_PTE; i++)
1732 pte[i] = xen_make_pte(pte[i].pte); 1807 pte[i] = xen_make_pte(pte[i].pte);
1733} 1808}
1734 1809static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
1810 unsigned long addr)
1811{
1812 if (*pt_base == PFN_DOWN(__pa(addr))) {
1813 set_page_prot((void *)addr, PAGE_KERNEL);
1814 clear_page((void *)addr);
1815 (*pt_base)++;
1816 }
1817 if (*pt_end == PFN_DOWN(__pa(addr))) {
1818 set_page_prot((void *)addr, PAGE_KERNEL);
1819 clear_page((void *)addr);
1820 (*pt_end)--;
1821 }
1822}
1735/* 1823/*
1736 * Set up the initial kernel pagetable. 1824 * Set up the initial kernel pagetable.
1737 * 1825 *
@@ -1743,11 +1831,13 @@ static void convert_pfn_mfn(void *v)
1743 * of the physical mapping once some sort of allocator has been set 1831 * of the physical mapping once some sort of allocator has been set
1744 * up. 1832 * up.
1745 */ 1833 */
1746pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, 1834void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1747 unsigned long max_pfn)
1748{ 1835{
1749 pud_t *l3; 1836 pud_t *l3;
1750 pmd_t *l2; 1837 pmd_t *l2;
1838 unsigned long addr[3];
1839 unsigned long pt_base, pt_end;
1840 unsigned i;
1751 1841
1752 /* max_pfn_mapped is the last pfn mapped in the initial memory 1842 /* max_pfn_mapped is the last pfn mapped in the initial memory
1753 * mappings. Considering that on Xen after the kernel mappings we 1843 * mappings. Considering that on Xen after the kernel mappings we
@@ -1755,32 +1845,53 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
1755 * set max_pfn_mapped to the last real pfn mapped. */ 1845 * set max_pfn_mapped to the last real pfn mapped. */
1756 max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); 1846 max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));
1757 1847
1848 pt_base = PFN_DOWN(__pa(xen_start_info->pt_base));
1849 pt_end = pt_base + xen_start_info->nr_pt_frames;
1850
1758 /* Zap identity mapping */ 1851 /* Zap identity mapping */
1759 init_level4_pgt[0] = __pgd(0); 1852 init_level4_pgt[0] = __pgd(0);
1760 1853
1761 /* Pre-constructed entries are in pfn, so convert to mfn */ 1854 /* Pre-constructed entries are in pfn, so convert to mfn */
1855 /* L4[272] -> level3_ident_pgt
1856 * L4[511] -> level3_kernel_pgt */
1762 convert_pfn_mfn(init_level4_pgt); 1857 convert_pfn_mfn(init_level4_pgt);
1858
1859 /* L3_i[0] -> level2_ident_pgt */
1763 convert_pfn_mfn(level3_ident_pgt); 1860 convert_pfn_mfn(level3_ident_pgt);
1861 /* L3_k[510] -> level2_kernel_pgt
1862 * L3_i[511] -> level2_fixmap_pgt */
1764 convert_pfn_mfn(level3_kernel_pgt); 1863 convert_pfn_mfn(level3_kernel_pgt);
1765 1864
1865 /* We get [511][511] and have Xen's version of level2_kernel_pgt */
1766 l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); 1866 l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
1767 l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); 1867 l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
1768 1868
1769 memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); 1869 addr[0] = (unsigned long)pgd;
1770 memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); 1870 addr[1] = (unsigned long)l3;
1771 1871 addr[2] = (unsigned long)l2;
1872 /* Graft it onto L4[272][0]. Note that we creating an aliasing problem:
1873 * Both L4[272][0] and L4[511][511] have entries that point to the same
1874 * L2 (PMD) tables. Meaning that if you modify it in __va space
1875 * it will be also modified in the __ka space! (But if you just
1876 * modify the PMD table to point to other PTE's or none, then you
1877 * are OK - which is what cleanup_highmap does) */
1878 copy_page(level2_ident_pgt, l2);
1879 /* Graft it onto L4[511][511] */
1880 copy_page(level2_kernel_pgt, l2);
1881
1882 /* Get [511][510] and graft that in level2_fixmap_pgt */
1772 l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); 1883 l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
1773 l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); 1884 l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
1774 memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); 1885 copy_page(level2_fixmap_pgt, l2);
1775 1886 /* Note that we don't do anything with level1_fixmap_pgt which
1776 /* Set up identity map */ 1887 * we don't need. */
1777 xen_map_identity_early(level2_ident_pgt, max_pfn);
1778 1888
1779 /* Make pagetable pieces RO */ 1889 /* Make pagetable pieces RO */
1780 set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); 1890 set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
1781 set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); 1891 set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
1782 set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); 1892 set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
1783 set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); 1893 set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
1894 set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
1784 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); 1895 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
1785 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); 1896 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
1786 1897
@@ -1791,22 +1902,28 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
1791 /* Unpin Xen-provided one */ 1902 /* Unpin Xen-provided one */
1792 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); 1903 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1793 1904
1794 /* Switch over */
1795 pgd = init_level4_pgt;
1796
1797 /* 1905 /*
1798 * At this stage there can be no user pgd, and no page 1906 * At this stage there can be no user pgd, and no page
1799 * structure to attach it to, so make sure we just set kernel 1907 * structure to attach it to, so make sure we just set kernel
1800 * pgd. 1908 * pgd.
1801 */ 1909 */
1802 xen_mc_batch(); 1910 xen_mc_batch();
1803 __xen_write_cr3(true, __pa(pgd)); 1911 __xen_write_cr3(true, __pa(init_level4_pgt));
1804 xen_mc_issue(PARAVIRT_LAZY_CPU); 1912 xen_mc_issue(PARAVIRT_LAZY_CPU);
1805 1913
1806 memblock_reserve(__pa(xen_start_info->pt_base), 1914 /* We can't that easily rip out L3 and L2, as the Xen pagetables are
1807 xen_start_info->nr_pt_frames * PAGE_SIZE); 1915 * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for
1916 * the initial domain. For guests using the toolstack, they are in:
1917 * [L4], [L3], [L2], [L1], [L1], order .. So for dom0 we can only
1918 * rip out the [L4] (pgd), but for guests we shave off three pages.
1919 */
1920 for (i = 0; i < ARRAY_SIZE(addr); i++)
1921 check_pt_base(&pt_base, &pt_end, addr[i]);
1808 1922
1809 return pgd; 1923 /* Our (by three pages) smaller Xen pagetable that we are using */
1924 memblock_reserve(PFN_PHYS(pt_base), (pt_end - pt_base) * PAGE_SIZE);
1925 /* Revector the xen_start_info */
1926 xen_start_info = (struct start_info *)__va(__pa(xen_start_info));
1810} 1927}
1811#else /* !CONFIG_X86_64 */ 1928#else /* !CONFIG_X86_64 */
1812static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); 1929static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
@@ -1831,8 +1948,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
1831 */ 1948 */
1832 swapper_kernel_pmd = 1949 swapper_kernel_pmd =
1833 extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); 1950 extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
1834 memcpy(swapper_kernel_pmd, initial_kernel_pmd, 1951 copy_page(swapper_kernel_pmd, initial_kernel_pmd);
1835 sizeof(pmd_t) * PTRS_PER_PMD);
1836 swapper_pg_dir[KERNEL_PGD_BOUNDARY] = 1952 swapper_pg_dir[KERNEL_PGD_BOUNDARY] =
1837 __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); 1953 __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT);
1838 set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); 1954 set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO);
@@ -1849,8 +1965,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
1849 pv_mmu_ops.write_cr3 = &xen_write_cr3; 1965 pv_mmu_ops.write_cr3 = &xen_write_cr3;
1850} 1966}
1851 1967
1852pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, 1968void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1853 unsigned long max_pfn)
1854{ 1969{
1855 pmd_t *kernel_pmd; 1970 pmd_t *kernel_pmd;
1856 1971
@@ -1862,11 +1977,11 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
1862 512*1024); 1977 512*1024);
1863 1978
1864 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); 1979 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
1865 memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); 1980 copy_page(initial_kernel_pmd, kernel_pmd);
1866 1981
1867 xen_map_identity_early(initial_kernel_pmd, max_pfn); 1982 xen_map_identity_early(initial_kernel_pmd, max_pfn);
1868 1983
1869 memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD); 1984 copy_page(initial_page_table, pgd);
1870 initial_page_table[KERNEL_PGD_BOUNDARY] = 1985 initial_page_table[KERNEL_PGD_BOUNDARY] =
1871 __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); 1986 __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
1872 1987
@@ -1882,8 +1997,6 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
1882 1997
1883 memblock_reserve(__pa(xen_start_info->pt_base), 1998 memblock_reserve(__pa(xen_start_info->pt_base),
1884 xen_start_info->nr_pt_frames * PAGE_SIZE); 1999 xen_start_info->nr_pt_frames * PAGE_SIZE);
1885
1886 return initial_page_table;
1887} 2000}
1888#endif /* CONFIG_X86_64 */ 2001#endif /* CONFIG_X86_64 */
1889 2002
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 76ba0e97e530..b5e4d302a067 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -22,7 +22,7 @@
22 * 22 *
23 * P2M_PER_PAGE depends on the architecture, as a mfn is always 23 * P2M_PER_PAGE depends on the architecture, as a mfn is always
24 * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to 24 * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
25 * 512 and 1024 entries respectively. 25 * 512 and 1024 entries respectively.
26 * 26 *
27 * In short, these structures contain the Machine Frame Number (MFN) of the PFN. 27 * In short, these structures contain the Machine Frame Number (MFN) of the PFN.
28 * 28 *
@@ -139,11 +139,11 @@
139 * / | ~0, ~0, .... | 139 * / | ~0, ~0, .... |
140 * | \---------------/ 140 * | \---------------/
141 * | 141 * |
142 * p2m_missing p2m_missing 142 * p2m_mid_missing p2m_missing
143 * /------------------\ /------------\ 143 * /-----------------\ /------------\
144 * | [p2m_mid_missing]+---->| ~0, ~0, ~0 | 144 * | [p2m_missing] +---->| ~0, ~0, ~0 |
145 * | [p2m_mid_missing]+---->| ..., ~0 | 145 * | [p2m_missing] +---->| ..., ~0 |
146 * \------------------/ \------------/ 146 * \-----------------/ \------------/
147 * 147 *
148 * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) 148 * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
149 */ 149 */
@@ -396,7 +396,85 @@ void __init xen_build_dynamic_phys_to_machine(void)
396 396
397 m2p_override_init(); 397 m2p_override_init();
398} 398}
399#ifdef CONFIG_X86_64
400#include <linux/bootmem.h>
401unsigned long __init xen_revector_p2m_tree(void)
402{
403 unsigned long va_start;
404 unsigned long va_end;
405 unsigned long pfn;
406 unsigned long pfn_free = 0;
407 unsigned long *mfn_list = NULL;
408 unsigned long size;
409
410 va_start = xen_start_info->mfn_list;
411 /*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long),
412 * so make sure it is rounded up to that */
413 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
414 va_end = va_start + size;
415
416 /* If we were revectored already, don't do it again. */
417 if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET)
418 return 0;
419
420 mfn_list = alloc_bootmem_align(size, PAGE_SIZE);
421 if (!mfn_list) {
422 pr_warn("Could not allocate space for a new P2M tree!\n");
423 return xen_start_info->mfn_list;
424 }
425 /* Fill it out with INVALID_P2M_ENTRY value */
426 memset(mfn_list, 0xFF, size);
427
428 for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) {
429 unsigned topidx = p2m_top_index(pfn);
430 unsigned mididx;
431 unsigned long *mid_p;
432
433 if (!p2m_top[topidx])
434 continue;
435
436 if (p2m_top[topidx] == p2m_mid_missing)
437 continue;
438
439 mididx = p2m_mid_index(pfn);
440 mid_p = p2m_top[topidx][mididx];
441 if (!mid_p)
442 continue;
443 if ((mid_p == p2m_missing) || (mid_p == p2m_identity))
444 continue;
445
446 if ((unsigned long)mid_p == INVALID_P2M_ENTRY)
447 continue;
448
449 /* The old va. Rebase it on mfn_list */
450 if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) {
451 unsigned long *new;
452
453 if (pfn_free > (size / sizeof(unsigned long))) {
454 WARN(1, "Only allocated for %ld pages, but we want %ld!\n",
455 size / sizeof(unsigned long), pfn_free);
456 return 0;
457 }
458 new = &mfn_list[pfn_free];
459
460 copy_page(new, mid_p);
461 p2m_top[topidx][mididx] = &mfn_list[pfn_free];
462 p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn_free]);
463
464 pfn_free += P2M_PER_PAGE;
399 465
466 }
467 /* This should be the leafs allocated for identity from _brk. */
468 }
469 return (unsigned long)mfn_list;
470
471}
472#else
473unsigned long __init xen_revector_p2m_tree(void)
474{
475 return 0;
476}
477#endif
400unsigned long get_phys_to_machine(unsigned long pfn) 478unsigned long get_phys_to_machine(unsigned long pfn)
401{ 479{
402 unsigned topidx, mididx, idx; 480 unsigned topidx, mididx, idx;
@@ -430,7 +508,7 @@ static void free_p2m_page(void *p)
430 free_page((unsigned long)p); 508 free_page((unsigned long)p);
431} 509}
432 510
433/* 511/*
434 * Fully allocate the p2m structure for a given pfn. We need to check 512 * Fully allocate the p2m structure for a given pfn. We need to check
435 * that both the top and mid levels are allocated, and make sure the 513 * that both the top and mid levels are allocated, and make sure the
436 * parallel mfn tree is kept in sync. We may race with other cpus, so 514 * parallel mfn tree is kept in sync. We may race with other cpus, so
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index d11ca11d14fc..3edb320d508f 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -431,6 +431,24 @@ char * __init xen_memory_setup(void)
431 * - mfn_list 431 * - mfn_list
432 * - xen_start_info 432 * - xen_start_info
433 * See comment above "struct start_info" in <xen/interface/xen.h> 433 * See comment above "struct start_info" in <xen/interface/xen.h>
434 * We tried to make the the memblock_reserve more selective so
435 * that it would be clear what region is reserved. Sadly we ran
436 * in the problem wherein on a 64-bit hypervisor with a 32-bit
437 * initial domain, the pt_base has the cr3 value which is not
438 * neccessarily where the pagetable starts! As Jan put it: "
439 * Actually, the adjustment turns out to be correct: The page
440 * tables for a 32-on-64 dom0 get allocated in the order "first L1",
441 * "first L2", "first L3", so the offset to the page table base is
442 * indeed 2. When reading xen/include/public/xen.h's comment
443 * very strictly, this is not a violation (since there nothing is said
444 * that the first thing in the page table space is pointed to by
445 * pt_base; I admit that this seems to be implied though, namely
446 * do I think that it is implied that the page table space is the
447 * range [pt_base, pt_base + nt_pt_frames), whereas that
448 * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames),
449 * which - without a priori knowledge - the kernel would have
450 * difficulty to figure out)." - so lets just fall back to the
451 * easy way and reserve the whole region.
434 */ 452 */
435 memblock_reserve(__pa(xen_start_info->mfn_list), 453 memblock_reserve(__pa(xen_start_info->mfn_list),
436 xen_start_info->pt_base - xen_start_info->mfn_list); 454 xen_start_info->pt_base - xen_start_info->mfn_list);
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 202d4c150154..bb5a8105ea86 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -27,7 +27,7 @@ void xen_setup_mfn_list_list(void);
27void xen_setup_shared_info(void); 27void xen_setup_shared_info(void);
28void xen_build_mfn_list_list(void); 28void xen_build_mfn_list_list(void);
29void xen_setup_machphys_mapping(void); 29void xen_setup_machphys_mapping(void);
30pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); 30void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
31void xen_reserve_top(void); 31void xen_reserve_top(void);
32extern unsigned long xen_max_p2m_pfn; 32extern unsigned long xen_max_p2m_pfn;
33 33
@@ -45,6 +45,7 @@ void xen_hvm_init_shared_info(void);
45void xen_unplug_emulated_devices(void); 45void xen_unplug_emulated_devices(void);
46 46
47void __init xen_build_dynamic_phys_to_machine(void); 47void __init xen_build_dynamic_phys_to_machine(void);
48unsigned long __init xen_revector_p2m_tree(void);
48 49
49void xen_init_irq_ops(void); 50void xen_init_irq_ops(void);
50void xen_setup_timer(int cpu); 51void xen_setup_timer(int cpu);