aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/xen/enlighten.c118
-rw-r--r--arch/x86/xen/suspend.c2
-rw-r--r--arch/x86/xen/xen-ops.h2
-rw-r--r--drivers/xen/platform-pci.c15
-rw-r--r--include/xen/events.h2
5 files changed, 126 insertions, 13 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index f1814fc2cb7..a6f8acbdfc9 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -31,6 +31,7 @@
31#include <linux/pci.h> 31#include <linux/pci.h>
32#include <linux/gfp.h> 32#include <linux/gfp.h>
33#include <linux/memblock.h> 33#include <linux/memblock.h>
34#include <linux/syscore_ops.h>
34 35
35#include <xen/xen.h> 36#include <xen/xen.h>
36#include <xen/interface/xen.h> 37#include <xen/interface/xen.h>
@@ -1471,38 +1472,130 @@ asmlinkage void __init xen_start_kernel(void)
1471#endif 1472#endif
1472} 1473}
1473 1474
1474void __ref xen_hvm_init_shared_info(void) 1475#ifdef CONFIG_XEN_PVHVM
1476/*
1477 * The pfn containing the shared_info is located somewhere in RAM. This
1478 * will cause trouble if the current kernel is doing a kexec boot into a
1479 * new kernel. The new kernel (and its startup code) can not know where
1480 * the pfn is, so it can not reserve the page. The hypervisor will
1481 * continue to update the pfn, and as a result memory corruption occours
1482 * in the new kernel.
1483 *
1484 * One way to work around this issue is to allocate a page in the
1485 * xen-platform pci device's BAR memory range. But pci init is done very
1486 * late and the shared_info page is already in use very early to read
1487 * the pvclock. So moving the pfn from RAM to MMIO is racy because some
1488 * code paths on other vcpus could access the pfn during the small
1489 * window when the old pfn is moved to the new pfn. There is even a
1490 * small window were the old pfn is not backed by a mfn, and during that
1491 * time all reads return -1.
1492 *
1493 * Because it is not known upfront where the MMIO region is located it
1494 * can not be used right from the start in xen_hvm_init_shared_info.
1495 *
1496 * To minimise trouble the move of the pfn is done shortly before kexec.
1497 * This does not eliminate the race because all vcpus are still online
1498 * when the syscore_ops will be called. But hopefully there is no work
1499 * pending at this point in time. Also the syscore_op is run last which
1500 * reduces the risk further.
1501 */
1502
1503static struct shared_info *xen_hvm_shared_info;
1504
1505static void xen_hvm_connect_shared_info(unsigned long pfn)
1475{ 1506{
1476 int cpu;
1477 struct xen_add_to_physmap xatp; 1507 struct xen_add_to_physmap xatp;
1478 static struct shared_info *shared_info_page = 0;
1479 1508
1480 if (!shared_info_page)
1481 shared_info_page = (struct shared_info *)
1482 extend_brk(PAGE_SIZE, PAGE_SIZE);
1483 xatp.domid = DOMID_SELF; 1509 xatp.domid = DOMID_SELF;
1484 xatp.idx = 0; 1510 xatp.idx = 0;
1485 xatp.space = XENMAPSPACE_shared_info; 1511 xatp.space = XENMAPSPACE_shared_info;
1486 xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; 1512 xatp.gpfn = pfn;
1487 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) 1513 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
1488 BUG(); 1514 BUG();
1489 1515
1490 HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; 1516}
1517static void xen_hvm_set_shared_info(struct shared_info *sip)
1518{
1519 int cpu;
1520
1521 HYPERVISOR_shared_info = sip;
1491 1522
1492 /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info 1523 /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
1493 * page, we use it in the event channel upcall and in some pvclock 1524 * page, we use it in the event channel upcall and in some pvclock
1494 * related functions. We don't need the vcpu_info placement 1525 * related functions. We don't need the vcpu_info placement
1495 * optimizations because we don't use any pv_mmu or pv_irq op on 1526 * optimizations because we don't use any pv_mmu or pv_irq op on
1496 * HVM. 1527 * HVM.
1497 * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is 1528 * When xen_hvm_set_shared_info is run at boot time only vcpu 0 is
1498 * online but xen_hvm_init_shared_info is run at resume time too and 1529 * online but xen_hvm_set_shared_info is run at resume time too and
1499 * in that case multiple vcpus might be online. */ 1530 * in that case multiple vcpus might be online. */
1500 for_each_online_cpu(cpu) { 1531 for_each_online_cpu(cpu) {
1501 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; 1532 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
1502 } 1533 }
1503} 1534}
1504 1535
1505#ifdef CONFIG_XEN_PVHVM 1536/* Reconnect the shared_info pfn to a mfn */
1537void xen_hvm_resume_shared_info(void)
1538{
1539 xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT);
1540}
1541
1542#ifdef CONFIG_KEXEC
1543static struct shared_info *xen_hvm_shared_info_kexec;
1544static unsigned long xen_hvm_shared_info_pfn_kexec;
1545
1546/* Remember a pfn in MMIO space for kexec reboot */
1547void __devinit xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn)
1548{
1549 xen_hvm_shared_info_kexec = sip;
1550 xen_hvm_shared_info_pfn_kexec = pfn;
1551}
1552
1553static void xen_hvm_syscore_shutdown(void)
1554{
1555 struct xen_memory_reservation reservation = {
1556 .domid = DOMID_SELF,
1557 .nr_extents = 1,
1558 };
1559 unsigned long prev_pfn;
1560 int rc;
1561
1562 if (!xen_hvm_shared_info_kexec)
1563 return;
1564
1565 prev_pfn = __pa(xen_hvm_shared_info) >> PAGE_SHIFT;
1566 set_xen_guest_handle(reservation.extent_start, &prev_pfn);
1567
1568 /* Move pfn to MMIO, disconnects previous pfn from mfn */
1569 xen_hvm_connect_shared_info(xen_hvm_shared_info_pfn_kexec);
1570
1571 /* Update pointers, following hypercall is also a memory barrier */
1572 xen_hvm_set_shared_info(xen_hvm_shared_info_kexec);
1573
1574 /* Allocate new mfn for previous pfn */
1575 do {
1576 rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
1577 if (rc == 0)
1578 msleep(123);
1579 } while (rc == 0);
1580
1581 /* Make sure the previous pfn is really connected to a (new) mfn */
1582 BUG_ON(rc != 1);
1583}
1584
1585static struct syscore_ops xen_hvm_syscore_ops = {
1586 .shutdown = xen_hvm_syscore_shutdown,
1587};
1588#endif
1589
1590/* Use a pfn in RAM, may move to MMIO before kexec. */
1591static void __init xen_hvm_init_shared_info(void)
1592{
1593 /* Remember pointer for resume */
1594 xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE);
1595 xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT);
1596 xen_hvm_set_shared_info(xen_hvm_shared_info);
1597}
1598
1506static void __init init_hvm_pv_info(void) 1599static void __init init_hvm_pv_info(void)
1507{ 1600{
1508 int major, minor; 1601 int major, minor;
@@ -1553,6 +1646,9 @@ static void __init xen_hvm_guest_init(void)
1553 init_hvm_pv_info(); 1646 init_hvm_pv_info();
1554 1647
1555 xen_hvm_init_shared_info(); 1648 xen_hvm_init_shared_info();
1649#ifdef CONFIG_KEXEC
1650 register_syscore_ops(&xen_hvm_syscore_ops);
1651#endif
1556 1652
1557 if (xen_feature(XENFEAT_hvm_callback_vector)) 1653 if (xen_feature(XENFEAT_hvm_callback_vector))
1558 xen_have_vector_callback = 1; 1654 xen_have_vector_callback = 1;
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 45329c8c226..ae8a00c39de 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled)
30{ 30{
31#ifdef CONFIG_XEN_PVHVM 31#ifdef CONFIG_XEN_PVHVM
32 int cpu; 32 int cpu;
33 xen_hvm_init_shared_info(); 33 xen_hvm_resume_shared_info();
34 xen_callback_vector(); 34 xen_callback_vector();
35 xen_unplug_emulated_devices(); 35 xen_unplug_emulated_devices();
36 if (xen_feature(XENFEAT_hvm_safe_pvclock)) { 36 if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 202d4c15015..1e4329e04e0 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -41,7 +41,7 @@ void xen_enable_syscall(void);
41void xen_vcpu_restore(void); 41void xen_vcpu_restore(void);
42 42
43void xen_callback_vector(void); 43void xen_callback_vector(void);
44void xen_hvm_init_shared_info(void); 44void xen_hvm_resume_shared_info(void);
45void xen_unplug_emulated_devices(void); 45void xen_unplug_emulated_devices(void);
46 46
47void __init xen_build_dynamic_phys_to_machine(void); 47void __init xen_build_dynamic_phys_to_machine(void);
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index 97ca359ae2b..d4c50d63acb 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -101,6 +101,19 @@ static int platform_pci_resume(struct pci_dev *pdev)
101 return 0; 101 return 0;
102} 102}
103 103
104static void __devinit prepare_shared_info(void)
105{
106#ifdef CONFIG_KEXEC
107 unsigned long addr;
108 struct shared_info *hvm_shared_info;
109
110 addr = alloc_xen_mmio(PAGE_SIZE);
111 hvm_shared_info = ioremap(addr, PAGE_SIZE);
112 memset(hvm_shared_info, 0, PAGE_SIZE);
113 xen_hvm_prepare_kexec(hvm_shared_info, addr >> PAGE_SHIFT);
114#endif
115}
116
104static int __devinit platform_pci_init(struct pci_dev *pdev, 117static int __devinit platform_pci_init(struct pci_dev *pdev,
105 const struct pci_device_id *ent) 118 const struct pci_device_id *ent)
106{ 119{
@@ -138,6 +151,8 @@ static int __devinit platform_pci_init(struct pci_dev *pdev,
138 platform_mmio = mmio_addr; 151 platform_mmio = mmio_addr;
139 platform_mmiolen = mmio_len; 152 platform_mmiolen = mmio_len;
140 153
154 prepare_shared_info();
155
141 if (!xen_have_vector_callback) { 156 if (!xen_have_vector_callback) {
142 ret = xen_allocate_irq(pdev); 157 ret = xen_allocate_irq(pdev);
143 if (ret) { 158 if (ret) {
diff --git a/include/xen/events.h b/include/xen/events.h
index 04399b28e82..9c641deb65d 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -58,6 +58,8 @@ void notify_remote_via_irq(int irq);
58 58
59void xen_irq_resume(void); 59void xen_irq_resume(void);
60 60
61void xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn);
62
61/* Clear an irq's pending state, in preparation for polling on it */ 63/* Clear an irq's pending state, in preparation for polling on it */
62void xen_clear_irq_pending(int irq); 64void xen_clear_irq_pending(int irq);
63void xen_set_irq_pending(int irq); 65void xen_set_irq_pending(int irq);