diff options
| -rw-r--r-- | arch/x86/xen/enlighten.c | 5 | ||||
| -rw-r--r-- | arch/x86/xen/mmu.c | 183 | ||||
| -rw-r--r-- | arch/x86/xen/p2m.c | 92 | ||||
| -rw-r--r-- | arch/x86/xen/setup.c | 18 | ||||
| -rw-r--r-- | arch/x86/xen/xen-ops.h | 3 |
5 files changed, 254 insertions, 47 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 2766746de274..47b3acdc2ac5 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
| @@ -1290,7 +1290,6 @@ asmlinkage void __init xen_start_kernel(void) | |||
| 1290 | { | 1290 | { |
| 1291 | struct physdev_set_iopl set_iopl; | 1291 | struct physdev_set_iopl set_iopl; |
| 1292 | int rc; | 1292 | int rc; |
| 1293 | pgd_t *pgd; | ||
| 1294 | 1293 | ||
| 1295 | if (!xen_start_info) | 1294 | if (!xen_start_info) |
| 1296 | return; | 1295 | return; |
| @@ -1382,8 +1381,6 @@ asmlinkage void __init xen_start_kernel(void) | |||
| 1382 | acpi_numa = -1; | 1381 | acpi_numa = -1; |
| 1383 | #endif | 1382 | #endif |
| 1384 | 1383 | ||
| 1385 | pgd = (pgd_t *)xen_start_info->pt_base; | ||
| 1386 | |||
| 1387 | /* Don't do the full vcpu_info placement stuff until we have a | 1384 | /* Don't do the full vcpu_info placement stuff until we have a |
| 1388 | possible map and a non-dummy shared_info. */ | 1385 | possible map and a non-dummy shared_info. */ |
| 1389 | per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; | 1386 | per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; |
| @@ -1392,7 +1389,7 @@ asmlinkage void __init xen_start_kernel(void) | |||
| 1392 | early_boot_irqs_disabled = true; | 1389 | early_boot_irqs_disabled = true; |
| 1393 | 1390 | ||
| 1394 | xen_raw_console_write("mapping kernel into physical memory\n"); | 1391 | xen_raw_console_write("mapping kernel into physical memory\n"); |
| 1395 | pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); | 1392 | xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages); |
| 1396 | 1393 | ||
| 1397 | /* Allocate and initialize top and mid mfn levels for p2m structure */ | 1394 | /* Allocate and initialize top and mid mfn levels for p2m structure */ |
| 1398 | xen_build_mfn_list_list(); | 1395 | xen_build_mfn_list_list(); |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index dfc900471aef..5a16824cc2b3 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
| @@ -84,6 +84,7 @@ | |||
| 84 | */ | 84 | */ |
| 85 | DEFINE_SPINLOCK(xen_reservation_lock); | 85 | DEFINE_SPINLOCK(xen_reservation_lock); |
| 86 | 86 | ||
| 87 | #ifdef CONFIG_X86_32 | ||
| 87 | /* | 88 | /* |
| 88 | * Identity map, in addition to plain kernel map. This needs to be | 89 | * Identity map, in addition to plain kernel map. This needs to be |
| 89 | * large enough to allocate page table pages to allocate the rest. | 90 | * large enough to allocate page table pages to allocate the rest. |
| @@ -91,7 +92,7 @@ DEFINE_SPINLOCK(xen_reservation_lock); | |||
| 91 | */ | 92 | */ |
| 92 | #define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4) | 93 | #define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4) |
| 93 | static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES); | 94 | static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES); |
| 94 | 95 | #endif | |
| 95 | #ifdef CONFIG_X86_64 | 96 | #ifdef CONFIG_X86_64 |
| 96 | /* l3 pud for userspace vsyscall mapping */ | 97 | /* l3 pud for userspace vsyscall mapping */ |
| 97 | static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; | 98 | static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; |
| @@ -1176,13 +1177,6 @@ static void xen_exit_mmap(struct mm_struct *mm) | |||
| 1176 | 1177 | ||
| 1177 | static void xen_post_allocator_init(void); | 1178 | static void xen_post_allocator_init(void); |
| 1178 | 1179 | ||
| 1179 | static void __init xen_pagetable_init(void) | ||
| 1180 | { | ||
| 1181 | paging_init(); | ||
| 1182 | xen_setup_shared_info(); | ||
| 1183 | xen_post_allocator_init(); | ||
| 1184 | } | ||
| 1185 | |||
| 1186 | static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) | 1180 | static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) |
| 1187 | { | 1181 | { |
| 1188 | /* reserve the range used */ | 1182 | /* reserve the range used */ |
| @@ -1197,6 +1191,87 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) | |||
| 1197 | } | 1191 | } |
| 1198 | } | 1192 | } |
| 1199 | 1193 | ||
| 1194 | #ifdef CONFIG_X86_64 | ||
| 1195 | static void __init xen_cleanhighmap(unsigned long vaddr, | ||
| 1196 | unsigned long vaddr_end) | ||
| 1197 | { | ||
| 1198 | unsigned long kernel_end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; | ||
| 1199 | pmd_t *pmd = level2_kernel_pgt + pmd_index(vaddr); | ||
| 1200 | |||
| 1201 | /* NOTE: The loop is more greedy than the cleanup_highmap variant. | ||
| 1202 | * We include the PMD passed in on _both_ boundaries. */ | ||
| 1203 | for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE)); | ||
| 1204 | pmd++, vaddr += PMD_SIZE) { | ||
| 1205 | if (pmd_none(*pmd)) | ||
| 1206 | continue; | ||
| 1207 | if (vaddr < (unsigned long) _text || vaddr > kernel_end) | ||
| 1208 | set_pmd(pmd, __pmd(0)); | ||
| 1209 | } | ||
| 1210 | /* In case we did something silly, we should crash in this function | ||
| 1211 | * instead of somewhere later and be confusing. */ | ||
| 1212 | xen_mc_flush(); | ||
| 1213 | } | ||
| 1214 | #endif | ||
| 1215 | static void __init xen_pagetable_init(void) | ||
| 1216 | { | ||
| 1217 | #ifdef CONFIG_X86_64 | ||
| 1218 | unsigned long size; | ||
| 1219 | unsigned long addr; | ||
| 1220 | #endif | ||
| 1221 | paging_init(); | ||
| 1222 | xen_setup_shared_info(); | ||
| 1223 | #ifdef CONFIG_X86_64 | ||
| 1224 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | ||
| 1225 | unsigned long new_mfn_list; | ||
| 1226 | |||
| 1227 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | ||
| 1228 | |||
| 1229 | /* On 32-bit, we get zero so this never gets executed. */ | ||
| 1230 | new_mfn_list = xen_revector_p2m_tree(); | ||
| 1231 | if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) { | ||
| 1232 | /* using __ka address and sticking INVALID_P2M_ENTRY! */ | ||
| 1233 | memset((void *)xen_start_info->mfn_list, 0xff, size); | ||
| 1234 | |||
| 1235 | /* We should be in __ka space. */ | ||
| 1236 | BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); | ||
| 1237 | addr = xen_start_info->mfn_list; | ||
| 1238 | /* We roundup to the PMD, which means that if anybody at this stage is | ||
| 1239 | * using the __ka address of xen_start_info or xen_start_info->shared_info | ||
| 1240 | * they are in going to crash. Fortunatly we have already revectored | ||
| 1241 | * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */ | ||
| 1242 | size = roundup(size, PMD_SIZE); | ||
| 1243 | xen_cleanhighmap(addr, addr + size); | ||
| 1244 | |||
| 1245 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | ||
| 1246 | memblock_free(__pa(xen_start_info->mfn_list), size); | ||
| 1247 | /* And revector! Bye bye old array */ | ||
| 1248 | xen_start_info->mfn_list = new_mfn_list; | ||
| 1249 | } else | ||
| 1250 | goto skip; | ||
| 1251 | } | ||
| 1252 | /* At this stage, cleanup_highmap has already cleaned __ka space | ||
| 1253 | * from _brk_limit way up to the max_pfn_mapped (which is the end of | ||
| 1254 | * the ramdisk). We continue on, erasing PMD entries that point to page | ||
| 1255 | * tables - do note that they are accessible at this stage via __va. | ||
| 1256 | * For good measure we also round up to the PMD - which means that if | ||
| 1257 | * anybody is using __ka address to the initial boot-stack - and try | ||
| 1258 | * to use it - they are going to crash. The xen_start_info has been | ||
| 1259 | * taken care of already in xen_setup_kernel_pagetable. */ | ||
| 1260 | addr = xen_start_info->pt_base; | ||
| 1261 | size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE); | ||
| 1262 | |||
| 1263 | xen_cleanhighmap(addr, addr + size); | ||
| 1264 | xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base)); | ||
| 1265 | #ifdef DEBUG | ||
| 1266 | /* This is superflous and is not neccessary, but you know what | ||
| 1267 | * lets do it. The MODULES_VADDR -> MODULES_END should be clear of | ||
| 1268 | * anything at this stage. */ | ||
| 1269 | xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); | ||
| 1270 | #endif | ||
| 1271 | skip: | ||
| 1272 | #endif | ||
| 1273 | xen_post_allocator_init(); | ||
| 1274 | } | ||
| 1200 | static void xen_write_cr2(unsigned long cr2) | 1275 | static void xen_write_cr2(unsigned long cr2) |
| 1201 | { | 1276 | { |
| 1202 | this_cpu_read(xen_vcpu)->arch.cr2 = cr2; | 1277 | this_cpu_read(xen_vcpu)->arch.cr2 = cr2; |
| @@ -1652,7 +1727,7 @@ static void set_page_prot(void *addr, pgprot_t prot) | |||
| 1652 | if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) | 1727 | if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) |
| 1653 | BUG(); | 1728 | BUG(); |
| 1654 | } | 1729 | } |
| 1655 | 1730 | #ifdef CONFIG_X86_32 | |
| 1656 | static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | 1731 | static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) |
| 1657 | { | 1732 | { |
| 1658 | unsigned pmdidx, pteidx; | 1733 | unsigned pmdidx, pteidx; |
| @@ -1703,7 +1778,7 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | |||
| 1703 | 1778 | ||
| 1704 | set_page_prot(pmd, PAGE_KERNEL_RO); | 1779 | set_page_prot(pmd, PAGE_KERNEL_RO); |
| 1705 | } | 1780 | } |
| 1706 | 1781 | #endif | |
| 1707 | void __init xen_setup_machphys_mapping(void) | 1782 | void __init xen_setup_machphys_mapping(void) |
| 1708 | { | 1783 | { |
| 1709 | struct xen_machphys_mapping mapping; | 1784 | struct xen_machphys_mapping mapping; |
| @@ -1731,7 +1806,20 @@ static void convert_pfn_mfn(void *v) | |||
| 1731 | for (i = 0; i < PTRS_PER_PTE; i++) | 1806 | for (i = 0; i < PTRS_PER_PTE; i++) |
| 1732 | pte[i] = xen_make_pte(pte[i].pte); | 1807 | pte[i] = xen_make_pte(pte[i].pte); |
| 1733 | } | 1808 | } |
| 1734 | 1809 | static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, | |
| 1810 | unsigned long addr) | ||
| 1811 | { | ||
| 1812 | if (*pt_base == PFN_DOWN(__pa(addr))) { | ||
| 1813 | set_page_prot((void *)addr, PAGE_KERNEL); | ||
| 1814 | clear_page((void *)addr); | ||
| 1815 | (*pt_base)++; | ||
| 1816 | } | ||
| 1817 | if (*pt_end == PFN_DOWN(__pa(addr))) { | ||
| 1818 | set_page_prot((void *)addr, PAGE_KERNEL); | ||
| 1819 | clear_page((void *)addr); | ||
| 1820 | (*pt_end)--; | ||
| 1821 | } | ||
| 1822 | } | ||
| 1735 | /* | 1823 | /* |
| 1736 | * Set up the initial kernel pagetable. | 1824 | * Set up the initial kernel pagetable. |
| 1737 | * | 1825 | * |
| @@ -1743,11 +1831,13 @@ static void convert_pfn_mfn(void *v) | |||
| 1743 | * of the physical mapping once some sort of allocator has been set | 1831 | * of the physical mapping once some sort of allocator has been set |
| 1744 | * up. | 1832 | * up. |
| 1745 | */ | 1833 | */ |
| 1746 | pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, | 1834 | void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) |
| 1747 | unsigned long max_pfn) | ||
| 1748 | { | 1835 | { |
| 1749 | pud_t *l3; | 1836 | pud_t *l3; |
| 1750 | pmd_t *l2; | 1837 | pmd_t *l2; |
| 1838 | unsigned long addr[3]; | ||
| 1839 | unsigned long pt_base, pt_end; | ||
| 1840 | unsigned i; | ||
| 1751 | 1841 | ||
| 1752 | /* max_pfn_mapped is the last pfn mapped in the initial memory | 1842 | /* max_pfn_mapped is the last pfn mapped in the initial memory |
| 1753 | * mappings. Considering that on Xen after the kernel mappings we | 1843 | * mappings. Considering that on Xen after the kernel mappings we |
| @@ -1755,32 +1845,53 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, | |||
| 1755 | * set max_pfn_mapped to the last real pfn mapped. */ | 1845 | * set max_pfn_mapped to the last real pfn mapped. */ |
| 1756 | max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); | 1846 | max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); |
| 1757 | 1847 | ||
| 1848 | pt_base = PFN_DOWN(__pa(xen_start_info->pt_base)); | ||
| 1849 | pt_end = pt_base + xen_start_info->nr_pt_frames; | ||
| 1850 | |||
| 1758 | /* Zap identity mapping */ | 1851 | /* Zap identity mapping */ |
| 1759 | init_level4_pgt[0] = __pgd(0); | 1852 | init_level4_pgt[0] = __pgd(0); |
| 1760 | 1853 | ||
| 1761 | /* Pre-constructed entries are in pfn, so convert to mfn */ | 1854 | /* Pre-constructed entries are in pfn, so convert to mfn */ |
| 1855 | /* L4[272] -> level3_ident_pgt | ||
| 1856 | * L4[511] -> level3_kernel_pgt */ | ||
| 1762 | convert_pfn_mfn(init_level4_pgt); | 1857 | convert_pfn_mfn(init_level4_pgt); |
| 1858 | |||
| 1859 | /* L3_i[0] -> level2_ident_pgt */ | ||
| 1763 | convert_pfn_mfn(level3_ident_pgt); | 1860 | convert_pfn_mfn(level3_ident_pgt); |
| 1861 | /* L3_k[510] -> level2_kernel_pgt | ||
| 1862 | * L3_i[511] -> level2_fixmap_pgt */ | ||
| 1764 | convert_pfn_mfn(level3_kernel_pgt); | 1863 | convert_pfn_mfn(level3_kernel_pgt); |
| 1765 | 1864 | ||
| 1865 | /* We get [511][511] and have Xen's version of level2_kernel_pgt */ | ||
| 1766 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); | 1866 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); |
| 1767 | l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); | 1867 | l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); |
| 1768 | 1868 | ||
| 1769 | memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); | 1869 | addr[0] = (unsigned long)pgd; |
| 1770 | memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); | 1870 | addr[1] = (unsigned long)l3; |
| 1771 | 1871 | addr[2] = (unsigned long)l2; | |
| 1872 | /* Graft it onto L4[272][0]. Note that we creating an aliasing problem: | ||
| 1873 | * Both L4[272][0] and L4[511][511] have entries that point to the same | ||
| 1874 | * L2 (PMD) tables. Meaning that if you modify it in __va space | ||
| 1875 | * it will be also modified in the __ka space! (But if you just | ||
| 1876 | * modify the PMD table to point to other PTE's or none, then you | ||
| 1877 | * are OK - which is what cleanup_highmap does) */ | ||
| 1878 | copy_page(level2_ident_pgt, l2); | ||
| 1879 | /* Graft it onto L4[511][511] */ | ||
| 1880 | copy_page(level2_kernel_pgt, l2); | ||
| 1881 | |||
| 1882 | /* Get [511][510] and graft that in level2_fixmap_pgt */ | ||
| 1772 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); | 1883 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); |
| 1773 | l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); | 1884 | l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); |
| 1774 | memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); | 1885 | copy_page(level2_fixmap_pgt, l2); |
| 1775 | 1886 | /* Note that we don't do anything with level1_fixmap_pgt which | |
| 1776 | /* Set up identity map */ | 1887 | * we don't need. */ |
| 1777 | xen_map_identity_early(level2_ident_pgt, max_pfn); | ||
| 1778 | 1888 | ||
| 1779 | /* Make pagetable pieces RO */ | 1889 | /* Make pagetable pieces RO */ |
| 1780 | set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); | 1890 | set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); |
| 1781 | set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); | 1891 | set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); |
| 1782 | set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); | 1892 | set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); |
| 1783 | set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); | 1893 | set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); |
| 1894 | set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); | ||
| 1784 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | 1895 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); |
| 1785 | set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); | 1896 | set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); |
| 1786 | 1897 | ||
| @@ -1791,22 +1902,28 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, | |||
| 1791 | /* Unpin Xen-provided one */ | 1902 | /* Unpin Xen-provided one */ |
| 1792 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | 1903 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); |
| 1793 | 1904 | ||
| 1794 | /* Switch over */ | ||
| 1795 | pgd = init_level4_pgt; | ||
| 1796 | |||
| 1797 | /* | 1905 | /* |
| 1798 | * At this stage there can be no user pgd, and no page | 1906 | * At this stage there can be no user pgd, and no page |
| 1799 | * structure to attach it to, so make sure we just set kernel | 1907 | * structure to attach it to, so make sure we just set kernel |
| 1800 | * pgd. | 1908 | * pgd. |
| 1801 | */ | 1909 | */ |
| 1802 | xen_mc_batch(); | 1910 | xen_mc_batch(); |
| 1803 | __xen_write_cr3(true, __pa(pgd)); | 1911 | __xen_write_cr3(true, __pa(init_level4_pgt)); |
| 1804 | xen_mc_issue(PARAVIRT_LAZY_CPU); | 1912 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
| 1805 | 1913 | ||
| 1806 | memblock_reserve(__pa(xen_start_info->pt_base), | 1914 | /* We can't that easily rip out L3 and L2, as the Xen pagetables are |
| 1807 | xen_start_info->nr_pt_frames * PAGE_SIZE); | 1915 | * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for |
| 1916 | * the initial domain. For guests using the toolstack, they are in: | ||
| 1917 | * [L4], [L3], [L2], [L1], [L1], order .. So for dom0 we can only | ||
| 1918 | * rip out the [L4] (pgd), but for guests we shave off three pages. | ||
| 1919 | */ | ||
| 1920 | for (i = 0; i < ARRAY_SIZE(addr); i++) | ||
| 1921 | check_pt_base(&pt_base, &pt_end, addr[i]); | ||
| 1808 | 1922 | ||
| 1809 | return pgd; | 1923 | /* Our (by three pages) smaller Xen pagetable that we are using */ |
| 1924 | memblock_reserve(PFN_PHYS(pt_base), (pt_end - pt_base) * PAGE_SIZE); | ||
| 1925 | /* Revector the xen_start_info */ | ||
| 1926 | xen_start_info = (struct start_info *)__va(__pa(xen_start_info)); | ||
| 1810 | } | 1927 | } |
| 1811 | #else /* !CONFIG_X86_64 */ | 1928 | #else /* !CONFIG_X86_64 */ |
| 1812 | static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); | 1929 | static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); |
| @@ -1831,8 +1948,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) | |||
| 1831 | */ | 1948 | */ |
| 1832 | swapper_kernel_pmd = | 1949 | swapper_kernel_pmd = |
| 1833 | extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); | 1950 | extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); |
| 1834 | memcpy(swapper_kernel_pmd, initial_kernel_pmd, | 1951 | copy_page(swapper_kernel_pmd, initial_kernel_pmd); |
| 1835 | sizeof(pmd_t) * PTRS_PER_PMD); | ||
| 1836 | swapper_pg_dir[KERNEL_PGD_BOUNDARY] = | 1952 | swapper_pg_dir[KERNEL_PGD_BOUNDARY] = |
| 1837 | __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); | 1953 | __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); |
| 1838 | set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); | 1954 | set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); |
| @@ -1849,8 +1965,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) | |||
| 1849 | pv_mmu_ops.write_cr3 = &xen_write_cr3; | 1965 | pv_mmu_ops.write_cr3 = &xen_write_cr3; |
| 1850 | } | 1966 | } |
| 1851 | 1967 | ||
| 1852 | pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, | 1968 | void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) |
| 1853 | unsigned long max_pfn) | ||
| 1854 | { | 1969 | { |
| 1855 | pmd_t *kernel_pmd; | 1970 | pmd_t *kernel_pmd; |
| 1856 | 1971 | ||
| @@ -1862,11 +1977,11 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, | |||
| 1862 | 512*1024); | 1977 | 512*1024); |
| 1863 | 1978 | ||
| 1864 | kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); | 1979 | kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); |
| 1865 | memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); | 1980 | copy_page(initial_kernel_pmd, kernel_pmd); |
| 1866 | 1981 | ||
| 1867 | xen_map_identity_early(initial_kernel_pmd, max_pfn); | 1982 | xen_map_identity_early(initial_kernel_pmd, max_pfn); |
| 1868 | 1983 | ||
| 1869 | memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD); | 1984 | copy_page(initial_page_table, pgd); |
| 1870 | initial_page_table[KERNEL_PGD_BOUNDARY] = | 1985 | initial_page_table[KERNEL_PGD_BOUNDARY] = |
| 1871 | __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); | 1986 | __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); |
| 1872 | 1987 | ||
| @@ -1882,8 +1997,6 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, | |||
| 1882 | 1997 | ||
| 1883 | memblock_reserve(__pa(xen_start_info->pt_base), | 1998 | memblock_reserve(__pa(xen_start_info->pt_base), |
| 1884 | xen_start_info->nr_pt_frames * PAGE_SIZE); | 1999 | xen_start_info->nr_pt_frames * PAGE_SIZE); |
| 1885 | |||
| 1886 | return initial_page_table; | ||
| 1887 | } | 2000 | } |
| 1888 | #endif /* CONFIG_X86_64 */ | 2001 | #endif /* CONFIG_X86_64 */ |
| 1889 | 2002 | ||
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 76ba0e97e530..b5e4d302a067 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
| @@ -22,7 +22,7 @@ | |||
| 22 | * | 22 | * |
| 23 | * P2M_PER_PAGE depends on the architecture, as a mfn is always | 23 | * P2M_PER_PAGE depends on the architecture, as a mfn is always |
| 24 | * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to | 24 | * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to |
| 25 | * 512 and 1024 entries respectively. | 25 | * 512 and 1024 entries respectively. |
| 26 | * | 26 | * |
| 27 | * In short, these structures contain the Machine Frame Number (MFN) of the PFN. | 27 | * In short, these structures contain the Machine Frame Number (MFN) of the PFN. |
| 28 | * | 28 | * |
| @@ -139,11 +139,11 @@ | |||
| 139 | * / | ~0, ~0, .... | | 139 | * / | ~0, ~0, .... | |
| 140 | * | \---------------/ | 140 | * | \---------------/ |
| 141 | * | | 141 | * | |
| 142 | * p2m_missing p2m_missing | 142 | * p2m_mid_missing p2m_missing |
| 143 | * /------------------\ /------------\ | 143 | * /-----------------\ /------------\ |
| 144 | * | [p2m_mid_missing]+---->| ~0, ~0, ~0 | | 144 | * | [p2m_missing] +---->| ~0, ~0, ~0 | |
| 145 | * | [p2m_mid_missing]+---->| ..., ~0 | | 145 | * | [p2m_missing] +---->| ..., ~0 | |
| 146 | * \------------------/ \------------/ | 146 | * \-----------------/ \------------/ |
| 147 | * | 147 | * |
| 148 | * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) | 148 | * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) |
| 149 | */ | 149 | */ |
| @@ -396,7 +396,85 @@ void __init xen_build_dynamic_phys_to_machine(void) | |||
| 396 | 396 | ||
| 397 | m2p_override_init(); | 397 | m2p_override_init(); |
| 398 | } | 398 | } |
| 399 | #ifdef CONFIG_X86_64 | ||
| 400 | #include <linux/bootmem.h> | ||
| 401 | unsigned long __init xen_revector_p2m_tree(void) | ||
| 402 | { | ||
| 403 | unsigned long va_start; | ||
| 404 | unsigned long va_end; | ||
| 405 | unsigned long pfn; | ||
| 406 | unsigned long pfn_free = 0; | ||
| 407 | unsigned long *mfn_list = NULL; | ||
| 408 | unsigned long size; | ||
| 409 | |||
| 410 | va_start = xen_start_info->mfn_list; | ||
| 411 | /*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long), | ||
| 412 | * so make sure it is rounded up to that */ | ||
| 413 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | ||
| 414 | va_end = va_start + size; | ||
| 415 | |||
| 416 | /* If we were revectored already, don't do it again. */ | ||
| 417 | if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET) | ||
| 418 | return 0; | ||
| 419 | |||
| 420 | mfn_list = alloc_bootmem_align(size, PAGE_SIZE); | ||
| 421 | if (!mfn_list) { | ||
| 422 | pr_warn("Could not allocate space for a new P2M tree!\n"); | ||
| 423 | return xen_start_info->mfn_list; | ||
| 424 | } | ||
| 425 | /* Fill it out with INVALID_P2M_ENTRY value */ | ||
| 426 | memset(mfn_list, 0xFF, size); | ||
| 427 | |||
| 428 | for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) { | ||
| 429 | unsigned topidx = p2m_top_index(pfn); | ||
| 430 | unsigned mididx; | ||
| 431 | unsigned long *mid_p; | ||
| 432 | |||
| 433 | if (!p2m_top[topidx]) | ||
| 434 | continue; | ||
| 435 | |||
| 436 | if (p2m_top[topidx] == p2m_mid_missing) | ||
| 437 | continue; | ||
| 438 | |||
| 439 | mididx = p2m_mid_index(pfn); | ||
| 440 | mid_p = p2m_top[topidx][mididx]; | ||
| 441 | if (!mid_p) | ||
| 442 | continue; | ||
| 443 | if ((mid_p == p2m_missing) || (mid_p == p2m_identity)) | ||
| 444 | continue; | ||
| 445 | |||
| 446 | if ((unsigned long)mid_p == INVALID_P2M_ENTRY) | ||
| 447 | continue; | ||
| 448 | |||
| 449 | /* The old va. Rebase it on mfn_list */ | ||
| 450 | if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) { | ||
| 451 | unsigned long *new; | ||
| 452 | |||
| 453 | if (pfn_free > (size / sizeof(unsigned long))) { | ||
| 454 | WARN(1, "Only allocated for %ld pages, but we want %ld!\n", | ||
| 455 | size / sizeof(unsigned long), pfn_free); | ||
| 456 | return 0; | ||
| 457 | } | ||
| 458 | new = &mfn_list[pfn_free]; | ||
| 459 | |||
| 460 | copy_page(new, mid_p); | ||
| 461 | p2m_top[topidx][mididx] = &mfn_list[pfn_free]; | ||
| 462 | p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn_free]); | ||
| 463 | |||
| 464 | pfn_free += P2M_PER_PAGE; | ||
| 399 | 465 | ||
| 466 | } | ||
| 467 | /* This should be the leafs allocated for identity from _brk. */ | ||
| 468 | } | ||
| 469 | return (unsigned long)mfn_list; | ||
| 470 | |||
| 471 | } | ||
| 472 | #else | ||
| 473 | unsigned long __init xen_revector_p2m_tree(void) | ||
| 474 | { | ||
| 475 | return 0; | ||
| 476 | } | ||
| 477 | #endif | ||
| 400 | unsigned long get_phys_to_machine(unsigned long pfn) | 478 | unsigned long get_phys_to_machine(unsigned long pfn) |
| 401 | { | 479 | { |
| 402 | unsigned topidx, mididx, idx; | 480 | unsigned topidx, mididx, idx; |
| @@ -430,7 +508,7 @@ static void free_p2m_page(void *p) | |||
| 430 | free_page((unsigned long)p); | 508 | free_page((unsigned long)p); |
| 431 | } | 509 | } |
| 432 | 510 | ||
| 433 | /* | 511 | /* |
| 434 | * Fully allocate the p2m structure for a given pfn. We need to check | 512 | * Fully allocate the p2m structure for a given pfn. We need to check |
| 435 | * that both the top and mid levels are allocated, and make sure the | 513 | * that both the top and mid levels are allocated, and make sure the |
| 436 | * parallel mfn tree is kept in sync. We may race with other cpus, so | 514 | * parallel mfn tree is kept in sync. We may race with other cpus, so |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index d11ca11d14fc..3edb320d508f 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
| @@ -431,6 +431,24 @@ char * __init xen_memory_setup(void) | |||
| 431 | * - mfn_list | 431 | * - mfn_list |
| 432 | * - xen_start_info | 432 | * - xen_start_info |
| 433 | * See comment above "struct start_info" in <xen/interface/xen.h> | 433 | * See comment above "struct start_info" in <xen/interface/xen.h> |
| 434 | * We tried to make the the memblock_reserve more selective so | ||
| 435 | * that it would be clear what region is reserved. Sadly we ran | ||
| 436 | * in the problem wherein on a 64-bit hypervisor with a 32-bit | ||
| 437 | * initial domain, the pt_base has the cr3 value which is not | ||
| 438 | * neccessarily where the pagetable starts! As Jan put it: " | ||
| 439 | * Actually, the adjustment turns out to be correct: The page | ||
| 440 | * tables for a 32-on-64 dom0 get allocated in the order "first L1", | ||
| 441 | * "first L2", "first L3", so the offset to the page table base is | ||
| 442 | * indeed 2. When reading xen/include/public/xen.h's comment | ||
| 443 | * very strictly, this is not a violation (since there nothing is said | ||
| 444 | * that the first thing in the page table space is pointed to by | ||
| 445 | * pt_base; I admit that this seems to be implied though, namely | ||
| 446 | * do I think that it is implied that the page table space is the | ||
| 447 | * range [pt_base, pt_base + nt_pt_frames), whereas that | ||
| 448 | * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames), | ||
| 449 | * which - without a priori knowledge - the kernel would have | ||
| 450 | * difficulty to figure out)." - so lets just fall back to the | ||
| 451 | * easy way and reserve the whole region. | ||
| 434 | */ | 452 | */ |
| 435 | memblock_reserve(__pa(xen_start_info->mfn_list), | 453 | memblock_reserve(__pa(xen_start_info->mfn_list), |
| 436 | xen_start_info->pt_base - xen_start_info->mfn_list); | 454 | xen_start_info->pt_base - xen_start_info->mfn_list); |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 202d4c150154..bb5a8105ea86 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
| @@ -27,7 +27,7 @@ void xen_setup_mfn_list_list(void); | |||
| 27 | void xen_setup_shared_info(void); | 27 | void xen_setup_shared_info(void); |
| 28 | void xen_build_mfn_list_list(void); | 28 | void xen_build_mfn_list_list(void); |
| 29 | void xen_setup_machphys_mapping(void); | 29 | void xen_setup_machphys_mapping(void); |
| 30 | pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); | 30 | void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); |
| 31 | void xen_reserve_top(void); | 31 | void xen_reserve_top(void); |
| 32 | extern unsigned long xen_max_p2m_pfn; | 32 | extern unsigned long xen_max_p2m_pfn; |
| 33 | 33 | ||
| @@ -45,6 +45,7 @@ void xen_hvm_init_shared_info(void); | |||
| 45 | void xen_unplug_emulated_devices(void); | 45 | void xen_unplug_emulated_devices(void); |
| 46 | 46 | ||
| 47 | void __init xen_build_dynamic_phys_to_machine(void); | 47 | void __init xen_build_dynamic_phys_to_machine(void); |
| 48 | unsigned long __init xen_revector_p2m_tree(void); | ||
| 48 | 49 | ||
| 49 | void xen_init_irq_ops(void); | 50 | void xen_init_irq_ops(void); |
| 50 | void xen_setup_timer(int cpu); | 51 | void xen_setup_timer(int cpu); |
