aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2013-02-22 20:35:13 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-02-22 20:41:22 -0500
commit0cc9129d75ef8993702d97ab0e49542c15ac6ab9 (patch)
tree77dc998613c73e412a58a38a1fec5ead402be5d6
parentac630dd98a47b60b27d716758d5f4276cb974662 (diff)
x86-64, xen, mmu: Provide an early version of write_cr3.
With commit 8170e6bed465 ("x86, 64bit: Use a #PF handler to materialize early mappings on demand") we started hitting an early bootup crash where the Xen hypervisor would inform us that: (XEN) d7:v0: unhandled page fault (ec=0000) (XEN) Pagetable walk from ffffea000005b2d0: (XEN) L4[0x1d4] = 0000000000000000 ffffffffffffffff (XEN) domain_crash_sync called from entry.S (XEN) Domain 7 (vcpu#0) crashed on cpu#3: (XEN) ----[ Xen-4.2.0 x86_64 debug=n Not tainted ]---- .. that Xen was unable to context switch back to dom0. Looking at the calling stack we find: [<ffffffff8103feba>] xen_get_user_pgd+0x5a <-- [<ffffffff8103feba>] xen_get_user_pgd+0x5a [<ffffffff81042d27>] xen_write_cr3+0x77 [<ffffffff81ad2d21>] init_mem_mapping+0x1f9 [<ffffffff81ac293f>] setup_arch+0x742 [<ffffffff81666d71>] printk+0x48 We are trying to figure out whether we need to up-date the user PGD as well. Please keep in mind that under 64-bit PV guests we have a limited amount of rings: 0 for the Hypervisor, and 1 for both the Linux kernel and user-space. As such the Linux pvops'fied version of write_cr3 checks if it has to update the user-space cr3 as well. That clearly is not needed during early bootup. The recent changes (see above git commit) streamline the x86 page table allocation to be much simpler (And also incidentally the #PF handler ends up in spirit being similar to how the Xen toolstack sets up the initial page-tables). The fix is to have an early-bootup version of cr3 that just loads the kernel %cr3. The later version - which also handles user-page modifications will be used after the initial page tables have been setup. [ hpa: removed a redundant #ifdef and made the new function __init. Also note that x86-32 already has such an early xen_write_cr3. ] Tested-by: "H. Peter Anvin" <hpa@zytor.com> Reported-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Link: http://lkml.kernel.org/r/1361579812-23709-1-git-send-email-konrad.wilk@oracle.com Signed-off-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/x86/xen/mmu.c44
1 files changed, 39 insertions, 5 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index f5e86eee4e0e..e8e34938c57d 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1408,7 +1408,6 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3)
1408 xen_mc_callback(set_current_cr3, (void *)cr3); 1408 xen_mc_callback(set_current_cr3, (void *)cr3);
1409 } 1409 }
1410} 1410}
1411
1412static void xen_write_cr3(unsigned long cr3) 1411static void xen_write_cr3(unsigned long cr3)
1413{ 1412{
1414 BUG_ON(preemptible()); 1413 BUG_ON(preemptible());
@@ -1434,6 +1433,45 @@ static void xen_write_cr3(unsigned long cr3)
1434 xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ 1433 xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
1435} 1434}
1436 1435
1436#ifdef CONFIG_X86_64
1437/*
1438 * At the start of the day - when Xen launches a guest, it has already
1439 * built pagetables for the guest. We diligently look over them
1440 * in xen_setup_kernel_pagetable and graft as appropiate them in the
1441 * init_level4_pgt and its friends. Then when we are happy we load
1442 * the new init_level4_pgt - and continue on.
1443 *
1444 * The generic code starts (start_kernel) and 'init_mem_mapping' sets
1445 * up the rest of the pagetables. When it has completed it loads the cr3.
1446 * N.B. that baremetal would start at 'start_kernel' (and the early
1447 * #PF handler would create bootstrap pagetables) - so we are running
1448 * with the same assumptions as what to do when write_cr3 is executed
1449 * at this point.
1450 *
1451 * Since there are no user-page tables at all, we have two variants
1452 * of xen_write_cr3 - the early bootup (this one), and the late one
1453 * (xen_write_cr3). The reason we have to do that is that in 64-bit
1454 * the Linux kernel and user-space are both in ring 3 while the
1455 * hypervisor is in ring 0.
1456 */
1457static void __init xen_write_cr3_init(unsigned long cr3)
1458{
1459 BUG_ON(preemptible());
1460
1461 xen_mc_batch(); /* disables interrupts */
1462
1463 /* Update while interrupts are disabled, so its atomic with
1464 respect to ipis */
1465 this_cpu_write(xen_cr3, cr3);
1466
1467 __xen_write_cr3(true, cr3);
1468
1469 xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
1470
1471 pv_mmu_ops.write_cr3 = &xen_write_cr3;
1472}
1473#endif
1474
1437static int xen_pgd_alloc(struct mm_struct *mm) 1475static int xen_pgd_alloc(struct mm_struct *mm)
1438{ 1476{
1439 pgd_t *pgd = mm->pgd; 1477 pgd_t *pgd = mm->pgd;
@@ -2102,11 +2140,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
2102 .write_cr2 = xen_write_cr2, 2140 .write_cr2 = xen_write_cr2,
2103 2141
2104 .read_cr3 = xen_read_cr3, 2142 .read_cr3 = xen_read_cr3,
2105#ifdef CONFIG_X86_32
2106 .write_cr3 = xen_write_cr3_init, 2143 .write_cr3 = xen_write_cr3_init,
2107#else
2108 .write_cr3 = xen_write_cr3,
2109#endif
2110 2144
2111 .flush_tlb_user = xen_flush_tlb, 2145 .flush_tlb_user = xen_flush_tlb,
2112 .flush_tlb_kernel = xen_flush_tlb, 2146 .flush_tlb_kernel = xen_flush_tlb,