diff options
| -rw-r--r-- | arch/x86/xen/enlighten.c | 118 | ||||
| -rw-r--r-- | arch/x86/xen/suspend.c | 2 | ||||
| -rw-r--r-- | arch/x86/xen/xen-ops.h | 2 | ||||
| -rw-r--r-- | drivers/xen/platform-pci.c | 15 | ||||
| -rw-r--r-- | include/xen/events.h | 2 |
5 files changed, 126 insertions, 13 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index f1814fc2cb77..a6f8acbdfc9a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include <linux/pci.h> | 31 | #include <linux/pci.h> |
| 32 | #include <linux/gfp.h> | 32 | #include <linux/gfp.h> |
| 33 | #include <linux/memblock.h> | 33 | #include <linux/memblock.h> |
| 34 | #include <linux/syscore_ops.h> | ||
| 34 | 35 | ||
| 35 | #include <xen/xen.h> | 36 | #include <xen/xen.h> |
| 36 | #include <xen/interface/xen.h> | 37 | #include <xen/interface/xen.h> |
| @@ -1471,38 +1472,130 @@ asmlinkage void __init xen_start_kernel(void) | |||
| 1471 | #endif | 1472 | #endif |
| 1472 | } | 1473 | } |
| 1473 | 1474 | ||
| 1474 | void __ref xen_hvm_init_shared_info(void) | 1475 | #ifdef CONFIG_XEN_PVHVM |
| 1476 | /* | ||
| 1477 | * The pfn containing the shared_info is located somewhere in RAM. This | ||
| 1478 | * will cause trouble if the current kernel is doing a kexec boot into a | ||
| 1479 | * new kernel. The new kernel (and its startup code) can not know where | ||
| 1480 | * the pfn is, so it can not reserve the page. The hypervisor will | ||
| 1481 | * continue to update the pfn, and as a result memory corruption occours | ||
| 1482 | * in the new kernel. | ||
| 1483 | * | ||
| 1484 | * One way to work around this issue is to allocate a page in the | ||
| 1485 | * xen-platform pci device's BAR memory range. But pci init is done very | ||
| 1486 | * late and the shared_info page is already in use very early to read | ||
| 1487 | * the pvclock. So moving the pfn from RAM to MMIO is racy because some | ||
| 1488 | * code paths on other vcpus could access the pfn during the small | ||
| 1489 | * window when the old pfn is moved to the new pfn. There is even a | ||
| 1490 | * small window were the old pfn is not backed by a mfn, and during that | ||
| 1491 | * time all reads return -1. | ||
| 1492 | * | ||
| 1493 | * Because it is not known upfront where the MMIO region is located it | ||
| 1494 | * can not be used right from the start in xen_hvm_init_shared_info. | ||
| 1495 | * | ||
| 1496 | * To minimise trouble the move of the pfn is done shortly before kexec. | ||
| 1497 | * This does not eliminate the race because all vcpus are still online | ||
| 1498 | * when the syscore_ops will be called. But hopefully there is no work | ||
| 1499 | * pending at this point in time. Also the syscore_op is run last which | ||
| 1500 | * reduces the risk further. | ||
| 1501 | */ | ||
| 1502 | |||
| 1503 | static struct shared_info *xen_hvm_shared_info; | ||
| 1504 | |||
| 1505 | static void xen_hvm_connect_shared_info(unsigned long pfn) | ||
| 1475 | { | 1506 | { |
| 1476 | int cpu; | ||
| 1477 | struct xen_add_to_physmap xatp; | 1507 | struct xen_add_to_physmap xatp; |
| 1478 | static struct shared_info *shared_info_page = 0; | ||
| 1479 | 1508 | ||
| 1480 | if (!shared_info_page) | ||
| 1481 | shared_info_page = (struct shared_info *) | ||
| 1482 | extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
| 1483 | xatp.domid = DOMID_SELF; | 1509 | xatp.domid = DOMID_SELF; |
| 1484 | xatp.idx = 0; | 1510 | xatp.idx = 0; |
| 1485 | xatp.space = XENMAPSPACE_shared_info; | 1511 | xatp.space = XENMAPSPACE_shared_info; |
| 1486 | xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; | 1512 | xatp.gpfn = pfn; |
| 1487 | if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) | 1513 | if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) |
| 1488 | BUG(); | 1514 | BUG(); |
| 1489 | 1515 | ||
| 1490 | HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; | 1516 | } |
| 1517 | static void xen_hvm_set_shared_info(struct shared_info *sip) | ||
| 1518 | { | ||
| 1519 | int cpu; | ||
| 1520 | |||
| 1521 | HYPERVISOR_shared_info = sip; | ||
| 1491 | 1522 | ||
| 1492 | /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info | 1523 | /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info |
| 1493 | * page, we use it in the event channel upcall and in some pvclock | 1524 | * page, we use it in the event channel upcall and in some pvclock |
| 1494 | * related functions. We don't need the vcpu_info placement | 1525 | * related functions. We don't need the vcpu_info placement |
| 1495 | * optimizations because we don't use any pv_mmu or pv_irq op on | 1526 | * optimizations because we don't use any pv_mmu or pv_irq op on |
| 1496 | * HVM. | 1527 | * HVM. |
| 1497 | * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is | 1528 | * When xen_hvm_set_shared_info is run at boot time only vcpu 0 is |
| 1498 | * online but xen_hvm_init_shared_info is run at resume time too and | 1529 | * online but xen_hvm_set_shared_info is run at resume time too and |
| 1499 | * in that case multiple vcpus might be online. */ | 1530 | * in that case multiple vcpus might be online. */ |
| 1500 | for_each_online_cpu(cpu) { | 1531 | for_each_online_cpu(cpu) { |
| 1501 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; | 1532 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; |
| 1502 | } | 1533 | } |
| 1503 | } | 1534 | } |
| 1504 | 1535 | ||
| 1505 | #ifdef CONFIG_XEN_PVHVM | 1536 | /* Reconnect the shared_info pfn to a mfn */ |
| 1537 | void xen_hvm_resume_shared_info(void) | ||
| 1538 | { | ||
| 1539 | xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT); | ||
| 1540 | } | ||
| 1541 | |||
| 1542 | #ifdef CONFIG_KEXEC | ||
| 1543 | static struct shared_info *xen_hvm_shared_info_kexec; | ||
| 1544 | static unsigned long xen_hvm_shared_info_pfn_kexec; | ||
| 1545 | |||
| 1546 | /* Remember a pfn in MMIO space for kexec reboot */ | ||
| 1547 | void __devinit xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn) | ||
| 1548 | { | ||
| 1549 | xen_hvm_shared_info_kexec = sip; | ||
| 1550 | xen_hvm_shared_info_pfn_kexec = pfn; | ||
| 1551 | } | ||
| 1552 | |||
| 1553 | static void xen_hvm_syscore_shutdown(void) | ||
| 1554 | { | ||
| 1555 | struct xen_memory_reservation reservation = { | ||
| 1556 | .domid = DOMID_SELF, | ||
| 1557 | .nr_extents = 1, | ||
| 1558 | }; | ||
| 1559 | unsigned long prev_pfn; | ||
| 1560 | int rc; | ||
| 1561 | |||
| 1562 | if (!xen_hvm_shared_info_kexec) | ||
| 1563 | return; | ||
| 1564 | |||
| 1565 | prev_pfn = __pa(xen_hvm_shared_info) >> PAGE_SHIFT; | ||
| 1566 | set_xen_guest_handle(reservation.extent_start, &prev_pfn); | ||
| 1567 | |||
| 1568 | /* Move pfn to MMIO, disconnects previous pfn from mfn */ | ||
| 1569 | xen_hvm_connect_shared_info(xen_hvm_shared_info_pfn_kexec); | ||
| 1570 | |||
| 1571 | /* Update pointers, following hypercall is also a memory barrier */ | ||
| 1572 | xen_hvm_set_shared_info(xen_hvm_shared_info_kexec); | ||
| 1573 | |||
| 1574 | /* Allocate new mfn for previous pfn */ | ||
| 1575 | do { | ||
| 1576 | rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); | ||
| 1577 | if (rc == 0) | ||
| 1578 | msleep(123); | ||
| 1579 | } while (rc == 0); | ||
| 1580 | |||
| 1581 | /* Make sure the previous pfn is really connected to a (new) mfn */ | ||
| 1582 | BUG_ON(rc != 1); | ||
| 1583 | } | ||
| 1584 | |||
| 1585 | static struct syscore_ops xen_hvm_syscore_ops = { | ||
| 1586 | .shutdown = xen_hvm_syscore_shutdown, | ||
| 1587 | }; | ||
| 1588 | #endif | ||
| 1589 | |||
| 1590 | /* Use a pfn in RAM, may move to MMIO before kexec. */ | ||
| 1591 | static void __init xen_hvm_init_shared_info(void) | ||
| 1592 | { | ||
| 1593 | /* Remember pointer for resume */ | ||
| 1594 | xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
| 1595 | xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT); | ||
| 1596 | xen_hvm_set_shared_info(xen_hvm_shared_info); | ||
| 1597 | } | ||
| 1598 | |||
| 1506 | static void __init init_hvm_pv_info(void) | 1599 | static void __init init_hvm_pv_info(void) |
| 1507 | { | 1600 | { |
| 1508 | int major, minor; | 1601 | int major, minor; |
| @@ -1553,6 +1646,9 @@ static void __init xen_hvm_guest_init(void) | |||
| 1553 | init_hvm_pv_info(); | 1646 | init_hvm_pv_info(); |
| 1554 | 1647 | ||
| 1555 | xen_hvm_init_shared_info(); | 1648 | xen_hvm_init_shared_info(); |
| 1649 | #ifdef CONFIG_KEXEC | ||
| 1650 | register_syscore_ops(&xen_hvm_syscore_ops); | ||
| 1651 | #endif | ||
| 1556 | 1652 | ||
| 1557 | if (xen_feature(XENFEAT_hvm_callback_vector)) | 1653 | if (xen_feature(XENFEAT_hvm_callback_vector)) |
| 1558 | xen_have_vector_callback = 1; | 1654 | xen_have_vector_callback = 1; |
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 45329c8c226e..ae8a00c39de4 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c | |||
| @@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled) | |||
| 30 | { | 30 | { |
| 31 | #ifdef CONFIG_XEN_PVHVM | 31 | #ifdef CONFIG_XEN_PVHVM |
| 32 | int cpu; | 32 | int cpu; |
| 33 | xen_hvm_init_shared_info(); | 33 | xen_hvm_resume_shared_info(); |
| 34 | xen_callback_vector(); | 34 | xen_callback_vector(); |
| 35 | xen_unplug_emulated_devices(); | 35 | xen_unplug_emulated_devices(); |
| 36 | if (xen_feature(XENFEAT_hvm_safe_pvclock)) { | 36 | if (xen_feature(XENFEAT_hvm_safe_pvclock)) { |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 202d4c150154..1e4329e04e0f 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
| @@ -41,7 +41,7 @@ void xen_enable_syscall(void); | |||
| 41 | void xen_vcpu_restore(void); | 41 | void xen_vcpu_restore(void); |
| 42 | 42 | ||
| 43 | void xen_callback_vector(void); | 43 | void xen_callback_vector(void); |
| 44 | void xen_hvm_init_shared_info(void); | 44 | void xen_hvm_resume_shared_info(void); |
| 45 | void xen_unplug_emulated_devices(void); | 45 | void xen_unplug_emulated_devices(void); |
| 46 | 46 | ||
| 47 | void __init xen_build_dynamic_phys_to_machine(void); | 47 | void __init xen_build_dynamic_phys_to_machine(void); |
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index 97ca359ae2bd..d4c50d63acbc 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c | |||
| @@ -101,6 +101,19 @@ static int platform_pci_resume(struct pci_dev *pdev) | |||
| 101 | return 0; | 101 | return 0; |
| 102 | } | 102 | } |
| 103 | 103 | ||
| 104 | static void __devinit prepare_shared_info(void) | ||
| 105 | { | ||
| 106 | #ifdef CONFIG_KEXEC | ||
| 107 | unsigned long addr; | ||
| 108 | struct shared_info *hvm_shared_info; | ||
| 109 | |||
| 110 | addr = alloc_xen_mmio(PAGE_SIZE); | ||
| 111 | hvm_shared_info = ioremap(addr, PAGE_SIZE); | ||
| 112 | memset(hvm_shared_info, 0, PAGE_SIZE); | ||
| 113 | xen_hvm_prepare_kexec(hvm_shared_info, addr >> PAGE_SHIFT); | ||
| 114 | #endif | ||
| 115 | } | ||
| 116 | |||
| 104 | static int __devinit platform_pci_init(struct pci_dev *pdev, | 117 | static int __devinit platform_pci_init(struct pci_dev *pdev, |
| 105 | const struct pci_device_id *ent) | 118 | const struct pci_device_id *ent) |
| 106 | { | 119 | { |
| @@ -138,6 +151,8 @@ static int __devinit platform_pci_init(struct pci_dev *pdev, | |||
| 138 | platform_mmio = mmio_addr; | 151 | platform_mmio = mmio_addr; |
| 139 | platform_mmiolen = mmio_len; | 152 | platform_mmiolen = mmio_len; |
| 140 | 153 | ||
| 154 | prepare_shared_info(); | ||
| 155 | |||
| 141 | if (!xen_have_vector_callback) { | 156 | if (!xen_have_vector_callback) { |
| 142 | ret = xen_allocate_irq(pdev); | 157 | ret = xen_allocate_irq(pdev); |
| 143 | if (ret) { | 158 | if (ret) { |
diff --git a/include/xen/events.h b/include/xen/events.h index 04399b28e821..9c641deb65d2 100644 --- a/include/xen/events.h +++ b/include/xen/events.h | |||
| @@ -58,6 +58,8 @@ void notify_remote_via_irq(int irq); | |||
| 58 | 58 | ||
| 59 | void xen_irq_resume(void); | 59 | void xen_irq_resume(void); |
| 60 | 60 | ||
| 61 | void xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn); | ||
| 62 | |||
| 61 | /* Clear an irq's pending state, in preparation for polling on it */ | 63 | /* Clear an irq's pending state, in preparation for polling on it */ |
| 62 | void xen_clear_irq_pending(int irq); | 64 | void xen_clear_irq_pending(int irq); |
| 63 | void xen_set_irq_pending(int irq); | 65 | void xen_set_irq_pending(int irq); |
