diff options
author | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2012-08-16 11:31:27 -0400 |
---|---|---|
committer | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2012-08-16 13:05:25 -0400 |
commit | ca08649eb5dd30f11a5a8fe8659b48899b7ea6a1 (patch) | |
tree | b1ab10ebec723a22034e1ae8dc2c975b293774ab /arch/x86/xen/enlighten.c | |
parent | 5bc6f9888db5739abfa0cae279b4b442e4db8049 (diff) |
Revert "xen PVonHVM: move shared_info to MMIO before kexec"
This reverts commit 00e37bdb0113a98408de42db85be002f21dbffd3.
During shutdown of PVHVM guests with more than 2VCPUs on certain
machines we can hit the race where the replaced shared_info is not
replaced fast enough and the PV time clock retries reading the same
area over and over without any any success and is stuck in an
infinite loop.
Acked-by: Olaf Hering <olaf@aepfle.de>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Diffstat (limited to 'arch/x86/xen/enlighten.c')
-rw-r--r-- | arch/x86/xen/enlighten.c | 118 |
1 files changed, 11 insertions, 107 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a6f8acbdfc9a..f1814fc2cb77 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -31,7 +31,6 @@ | |||
31 | #include <linux/pci.h> | 31 | #include <linux/pci.h> |
32 | #include <linux/gfp.h> | 32 | #include <linux/gfp.h> |
33 | #include <linux/memblock.h> | 33 | #include <linux/memblock.h> |
34 | #include <linux/syscore_ops.h> | ||
35 | 34 | ||
36 | #include <xen/xen.h> | 35 | #include <xen/xen.h> |
37 | #include <xen/interface/xen.h> | 36 | #include <xen/interface/xen.h> |
@@ -1472,130 +1471,38 @@ asmlinkage void __init xen_start_kernel(void) | |||
1472 | #endif | 1471 | #endif |
1473 | } | 1472 | } |
1474 | 1473 | ||
1475 | #ifdef CONFIG_XEN_PVHVM | 1474 | void __ref xen_hvm_init_shared_info(void) |
1476 | /* | ||
1477 | * The pfn containing the shared_info is located somewhere in RAM. This | ||
1478 | * will cause trouble if the current kernel is doing a kexec boot into a | ||
1479 | * new kernel. The new kernel (and its startup code) can not know where | ||
1480 | * the pfn is, so it can not reserve the page. The hypervisor will | ||
1481 | * continue to update the pfn, and as a result memory corruption occours | ||
1482 | * in the new kernel. | ||
1483 | * | ||
1484 | * One way to work around this issue is to allocate a page in the | ||
1485 | * xen-platform pci device's BAR memory range. But pci init is done very | ||
1486 | * late and the shared_info page is already in use very early to read | ||
1487 | * the pvclock. So moving the pfn from RAM to MMIO is racy because some | ||
1488 | * code paths on other vcpus could access the pfn during the small | ||
1489 | * window when the old pfn is moved to the new pfn. There is even a | ||
1490 | * small window were the old pfn is not backed by a mfn, and during that | ||
1491 | * time all reads return -1. | ||
1492 | * | ||
1493 | * Because it is not known upfront where the MMIO region is located it | ||
1494 | * can not be used right from the start in xen_hvm_init_shared_info. | ||
1495 | * | ||
1496 | * To minimise trouble the move of the pfn is done shortly before kexec. | ||
1497 | * This does not eliminate the race because all vcpus are still online | ||
1498 | * when the syscore_ops will be called. But hopefully there is no work | ||
1499 | * pending at this point in time. Also the syscore_op is run last which | ||
1500 | * reduces the risk further. | ||
1501 | */ | ||
1502 | |||
1503 | static struct shared_info *xen_hvm_shared_info; | ||
1504 | |||
1505 | static void xen_hvm_connect_shared_info(unsigned long pfn) | ||
1506 | { | 1475 | { |
1476 | int cpu; | ||
1507 | struct xen_add_to_physmap xatp; | 1477 | struct xen_add_to_physmap xatp; |
1478 | static struct shared_info *shared_info_page = 0; | ||
1508 | 1479 | ||
1480 | if (!shared_info_page) | ||
1481 | shared_info_page = (struct shared_info *) | ||
1482 | extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
1509 | xatp.domid = DOMID_SELF; | 1483 | xatp.domid = DOMID_SELF; |
1510 | xatp.idx = 0; | 1484 | xatp.idx = 0; |
1511 | xatp.space = XENMAPSPACE_shared_info; | 1485 | xatp.space = XENMAPSPACE_shared_info; |
1512 | xatp.gpfn = pfn; | 1486 | xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; |
1513 | if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) | 1487 | if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) |
1514 | BUG(); | 1488 | BUG(); |
1515 | 1489 | ||
1516 | } | 1490 | HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; |
1517 | static void xen_hvm_set_shared_info(struct shared_info *sip) | ||
1518 | { | ||
1519 | int cpu; | ||
1520 | |||
1521 | HYPERVISOR_shared_info = sip; | ||
1522 | 1491 | ||
1523 | /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info | 1492 | /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info |
1524 | * page, we use it in the event channel upcall and in some pvclock | 1493 | * page, we use it in the event channel upcall and in some pvclock |
1525 | * related functions. We don't need the vcpu_info placement | 1494 | * related functions. We don't need the vcpu_info placement |
1526 | * optimizations because we don't use any pv_mmu or pv_irq op on | 1495 | * optimizations because we don't use any pv_mmu or pv_irq op on |
1527 | * HVM. | 1496 | * HVM. |
1528 | * When xen_hvm_set_shared_info is run at boot time only vcpu 0 is | 1497 | * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is |
1529 | * online but xen_hvm_set_shared_info is run at resume time too and | 1498 | * online but xen_hvm_init_shared_info is run at resume time too and |
1530 | * in that case multiple vcpus might be online. */ | 1499 | * in that case multiple vcpus might be online. */ |
1531 | for_each_online_cpu(cpu) { | 1500 | for_each_online_cpu(cpu) { |
1532 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; | 1501 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; |
1533 | } | 1502 | } |
1534 | } | 1503 | } |
1535 | 1504 | ||
1536 | /* Reconnect the shared_info pfn to a mfn */ | 1505 | #ifdef CONFIG_XEN_PVHVM |
1537 | void xen_hvm_resume_shared_info(void) | ||
1538 | { | ||
1539 | xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT); | ||
1540 | } | ||
1541 | |||
1542 | #ifdef CONFIG_KEXEC | ||
1543 | static struct shared_info *xen_hvm_shared_info_kexec; | ||
1544 | static unsigned long xen_hvm_shared_info_pfn_kexec; | ||
1545 | |||
1546 | /* Remember a pfn in MMIO space for kexec reboot */ | ||
1547 | void __devinit xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn) | ||
1548 | { | ||
1549 | xen_hvm_shared_info_kexec = sip; | ||
1550 | xen_hvm_shared_info_pfn_kexec = pfn; | ||
1551 | } | ||
1552 | |||
1553 | static void xen_hvm_syscore_shutdown(void) | ||
1554 | { | ||
1555 | struct xen_memory_reservation reservation = { | ||
1556 | .domid = DOMID_SELF, | ||
1557 | .nr_extents = 1, | ||
1558 | }; | ||
1559 | unsigned long prev_pfn; | ||
1560 | int rc; | ||
1561 | |||
1562 | if (!xen_hvm_shared_info_kexec) | ||
1563 | return; | ||
1564 | |||
1565 | prev_pfn = __pa(xen_hvm_shared_info) >> PAGE_SHIFT; | ||
1566 | set_xen_guest_handle(reservation.extent_start, &prev_pfn); | ||
1567 | |||
1568 | /* Move pfn to MMIO, disconnects previous pfn from mfn */ | ||
1569 | xen_hvm_connect_shared_info(xen_hvm_shared_info_pfn_kexec); | ||
1570 | |||
1571 | /* Update pointers, following hypercall is also a memory barrier */ | ||
1572 | xen_hvm_set_shared_info(xen_hvm_shared_info_kexec); | ||
1573 | |||
1574 | /* Allocate new mfn for previous pfn */ | ||
1575 | do { | ||
1576 | rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); | ||
1577 | if (rc == 0) | ||
1578 | msleep(123); | ||
1579 | } while (rc == 0); | ||
1580 | |||
1581 | /* Make sure the previous pfn is really connected to a (new) mfn */ | ||
1582 | BUG_ON(rc != 1); | ||
1583 | } | ||
1584 | |||
1585 | static struct syscore_ops xen_hvm_syscore_ops = { | ||
1586 | .shutdown = xen_hvm_syscore_shutdown, | ||
1587 | }; | ||
1588 | #endif | ||
1589 | |||
1590 | /* Use a pfn in RAM, may move to MMIO before kexec. */ | ||
1591 | static void __init xen_hvm_init_shared_info(void) | ||
1592 | { | ||
1593 | /* Remember pointer for resume */ | ||
1594 | xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
1595 | xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT); | ||
1596 | xen_hvm_set_shared_info(xen_hvm_shared_info); | ||
1597 | } | ||
1598 | |||
1599 | static void __init init_hvm_pv_info(void) | 1506 | static void __init init_hvm_pv_info(void) |
1600 | { | 1507 | { |
1601 | int major, minor; | 1508 | int major, minor; |
@@ -1646,9 +1553,6 @@ static void __init xen_hvm_guest_init(void) | |||
1646 | init_hvm_pv_info(); | 1553 | init_hvm_pv_info(); |
1647 | 1554 | ||
1648 | xen_hvm_init_shared_info(); | 1555 | xen_hvm_init_shared_info(); |
1649 | #ifdef CONFIG_KEXEC | ||
1650 | register_syscore_ops(&xen_hvm_syscore_ops); | ||
1651 | #endif | ||
1652 | 1556 | ||
1653 | if (xen_feature(XENFEAT_hvm_callback_vector)) | 1557 | if (xen_feature(XENFEAT_hvm_callback_vector)) |
1654 | xen_have_vector_callback = 1; | 1558 | xen_have_vector_callback = 1; |