diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/xen/hypercall.h | 8 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_amd.c | 22 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 224 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 39 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 23 | ||||
-rw-r--r-- | arch/x86/xen/suspend.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 2 |
8 files changed, 247 insertions, 77 deletions
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 5728852fb90f..59c226d120cd 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h | |||
@@ -48,6 +48,7 @@ | |||
48 | #include <xen/interface/sched.h> | 48 | #include <xen/interface/sched.h> |
49 | #include <xen/interface/physdev.h> | 49 | #include <xen/interface/physdev.h> |
50 | #include <xen/interface/platform.h> | 50 | #include <xen/interface/platform.h> |
51 | #include <xen/interface/xen-mca.h> | ||
51 | 52 | ||
52 | /* | 53 | /* |
53 | * The hypercall asms have to meet several constraints: | 54 | * The hypercall asms have to meet several constraints: |
@@ -302,6 +303,13 @@ HYPERVISOR_set_timer_op(u64 timeout) | |||
302 | } | 303 | } |
303 | 304 | ||
304 | static inline int | 305 | static inline int |
306 | HYPERVISOR_mca(struct xen_mc *mc_op) | ||
307 | { | ||
308 | mc_op->interface_version = XEN_MCA_INTERFACE_VERSION; | ||
309 | return _hypercall1(int, mca, mc_op); | ||
310 | } | ||
311 | |||
312 | static inline int | ||
305 | HYPERVISOR_dom0_op(struct xen_platform_op *platform_op) | 313 | HYPERVISOR_dom0_op(struct xen_platform_op *platform_op) |
306 | { | 314 | { |
307 | platform_op->interface_version = XENPF_INTERFACE_VERSION; | 315 | platform_op->interface_version = XENPF_INTERFACE_VERSION; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 9473e8772fd1..5e095f873e3e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -60,8 +60,6 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex); | |||
60 | 60 | ||
61 | int mce_disabled __read_mostly; | 61 | int mce_disabled __read_mostly; |
62 | 62 | ||
63 | #define MISC_MCELOG_MINOR 227 | ||
64 | |||
65 | #define SPINUNIT 100 /* 100ns */ | 63 | #define SPINUNIT 100 /* 100ns */ |
66 | 64 | ||
67 | atomic_t mce_entry; | 65 | atomic_t mce_entry; |
@@ -2346,7 +2344,7 @@ static __init int mcheck_init_device(void) | |||
2346 | 2344 | ||
2347 | return err; | 2345 | return err; |
2348 | } | 2346 | } |
2349 | device_initcall(mcheck_init_device); | 2347 | device_initcall_sync(mcheck_init_device); |
2350 | 2348 | ||
2351 | /* | 2349 | /* |
2352 | * Old style boot options parsing. Only for compatibility. | 2350 | * Old style boot options parsing. Only for compatibility. |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 671b95a2ffb5..c4e916d77378 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -759,4 +759,24 @@ static __init int threshold_init_device(void) | |||
759 | 759 | ||
760 | return 0; | 760 | return 0; |
761 | } | 761 | } |
762 | device_initcall(threshold_init_device); | 762 | /* |
763 | * there are 3 funcs which need to be _initcalled in a logic sequence: | ||
764 | * 1. xen_late_init_mcelog | ||
765 | * 2. mcheck_init_device | ||
766 | * 3. threshold_init_device | ||
767 | * | ||
768 | * xen_late_init_mcelog must register xen_mce_chrdev_device before | ||
769 | * native mce_chrdev_device registration if running under xen platform; | ||
770 | * | ||
771 | * mcheck_init_device should be inited before threshold_init_device to | ||
772 | * initialize mce_device, otherwise a NULL ptr dereference will cause panic. | ||
773 | * | ||
774 | * so we use following _initcalls | ||
775 | * 1. device_initcall(xen_late_init_mcelog); | ||
776 | * 2. device_initcall_sync(mcheck_init_device); | ||
777 | * 3. late_initcall(threshold_init_device); | ||
778 | * | ||
779 | * when running under xen, the initcall order is 1,2,3; | ||
780 | * on baremetal, we skip 1 and we do only 2 and 3. | ||
781 | */ | ||
782 | late_initcall(threshold_init_device); | ||
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index ed7d54985d0c..bf4bda6d3e9a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/pci.h> | 31 | #include <linux/pci.h> |
32 | #include <linux/gfp.h> | 32 | #include <linux/gfp.h> |
33 | #include <linux/memblock.h> | 33 | #include <linux/memblock.h> |
34 | #include <linux/syscore_ops.h> | ||
34 | 35 | ||
35 | #include <xen/xen.h> | 36 | #include <xen/xen.h> |
36 | #include <xen/interface/xen.h> | 37 | #include <xen/interface/xen.h> |
@@ -38,6 +39,7 @@ | |||
38 | #include <xen/interface/physdev.h> | 39 | #include <xen/interface/physdev.h> |
39 | #include <xen/interface/vcpu.h> | 40 | #include <xen/interface/vcpu.h> |
40 | #include <xen/interface/memory.h> | 41 | #include <xen/interface/memory.h> |
42 | #include <xen/interface/xen-mca.h> | ||
41 | #include <xen/features.h> | 43 | #include <xen/features.h> |
42 | #include <xen/page.h> | 44 | #include <xen/page.h> |
43 | #include <xen/hvm.h> | 45 | #include <xen/hvm.h> |
@@ -107,7 +109,7 @@ EXPORT_SYMBOL_GPL(xen_have_vector_callback); | |||
107 | * Point at some empty memory to start with. We map the real shared_info | 109 | * Point at some empty memory to start with. We map the real shared_info |
108 | * page as soon as fixmap is up and running. | 110 | * page as soon as fixmap is up and running. |
109 | */ | 111 | */ |
110 | struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; | 112 | struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info; |
111 | 113 | ||
112 | /* | 114 | /* |
113 | * Flag to determine whether vcpu info placement is available on all | 115 | * Flag to determine whether vcpu info placement is available on all |
@@ -124,6 +126,19 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; | |||
124 | */ | 126 | */ |
125 | static int have_vcpu_info_placement = 1; | 127 | static int have_vcpu_info_placement = 1; |
126 | 128 | ||
129 | struct tls_descs { | ||
130 | struct desc_struct desc[3]; | ||
131 | }; | ||
132 | |||
133 | /* | ||
134 | * Updating the 3 TLS descriptors in the GDT on every task switch is | ||
135 | * surprisingly expensive so we avoid updating them if they haven't | ||
136 | * changed. Since Xen writes different descriptors than the one | ||
137 | * passed in the update_descriptor hypercall we keep shadow copies to | ||
138 | * compare against. | ||
139 | */ | ||
140 | static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc); | ||
141 | |||
127 | static void clamp_max_cpus(void) | 142 | static void clamp_max_cpus(void) |
128 | { | 143 | { |
129 | #ifdef CONFIG_SMP | 144 | #ifdef CONFIG_SMP |
@@ -341,9 +356,7 @@ static void __init xen_init_cpuid_mask(void) | |||
341 | unsigned int xsave_mask; | 356 | unsigned int xsave_mask; |
342 | 357 | ||
343 | cpuid_leaf1_edx_mask = | 358 | cpuid_leaf1_edx_mask = |
344 | ~((1 << X86_FEATURE_MCE) | /* disable MCE */ | 359 | ~((1 << X86_FEATURE_MTRR) | /* disable MTRR */ |
345 | (1 << X86_FEATURE_MCA) | /* disable MCA */ | ||
346 | (1 << X86_FEATURE_MTRR) | /* disable MTRR */ | ||
347 | (1 << X86_FEATURE_ACC)); /* thermal monitoring */ | 360 | (1 << X86_FEATURE_ACC)); /* thermal monitoring */ |
348 | 361 | ||
349 | if (!xen_initial_domain()) | 362 | if (!xen_initial_domain()) |
@@ -540,12 +553,28 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr) | |||
540 | BUG(); | 553 | BUG(); |
541 | } | 554 | } |
542 | 555 | ||
556 | static inline bool desc_equal(const struct desc_struct *d1, | ||
557 | const struct desc_struct *d2) | ||
558 | { | ||
559 | return d1->a == d2->a && d1->b == d2->b; | ||
560 | } | ||
561 | |||
543 | static void load_TLS_descriptor(struct thread_struct *t, | 562 | static void load_TLS_descriptor(struct thread_struct *t, |
544 | unsigned int cpu, unsigned int i) | 563 | unsigned int cpu, unsigned int i) |
545 | { | 564 | { |
546 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); | 565 | struct desc_struct *shadow = &per_cpu(shadow_tls_desc, cpu).desc[i]; |
547 | xmaddr_t maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); | 566 | struct desc_struct *gdt; |
548 | struct multicall_space mc = __xen_mc_entry(0); | 567 | xmaddr_t maddr; |
568 | struct multicall_space mc; | ||
569 | |||
570 | if (desc_equal(shadow, &t->tls_array[i])) | ||
571 | return; | ||
572 | |||
573 | *shadow = t->tls_array[i]; | ||
574 | |||
575 | gdt = get_cpu_gdt_table(cpu); | ||
576 | maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); | ||
577 | mc = __xen_mc_entry(0); | ||
549 | 578 | ||
550 | MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); | 579 | MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); |
551 | } | 580 | } |
@@ -627,8 +656,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, | |||
627 | /* | 656 | /* |
628 | * Look for known traps using IST, and substitute them | 657 | * Look for known traps using IST, and substitute them |
629 | * appropriately. The debugger ones are the only ones we care | 658 | * appropriately. The debugger ones are the only ones we care |
630 | * about. Xen will handle faults like double_fault and | 659 | * about. Xen will handle faults like double_fault, |
631 | * machine_check, so we should never see them. Warn if | 660 | * so we should never see them. Warn if |
632 | * there's an unexpected IST-using fault handler. | 661 | * there's an unexpected IST-using fault handler. |
633 | */ | 662 | */ |
634 | if (addr == (unsigned long)debug) | 663 | if (addr == (unsigned long)debug) |
@@ -643,7 +672,11 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, | |||
643 | return 0; | 672 | return 0; |
644 | #ifdef CONFIG_X86_MCE | 673 | #ifdef CONFIG_X86_MCE |
645 | } else if (addr == (unsigned long)machine_check) { | 674 | } else if (addr == (unsigned long)machine_check) { |
646 | return 0; | 675 | /* |
676 | * when xen hypervisor inject vMCE to guest, | ||
677 | * use native mce handler to handle it | ||
678 | */ | ||
679 | ; | ||
647 | #endif | 680 | #endif |
648 | } else { | 681 | } else { |
649 | /* Some other trap using IST? */ | 682 | /* Some other trap using IST? */ |
@@ -1437,64 +1470,155 @@ asmlinkage void __init xen_start_kernel(void) | |||
1437 | #endif | 1470 | #endif |
1438 | } | 1471 | } |
1439 | 1472 | ||
1440 | static int init_hvm_pv_info(int *major, int *minor) | 1473 | #ifdef CONFIG_XEN_PVHVM |
1441 | { | 1474 | /* |
1442 | uint32_t eax, ebx, ecx, edx, pages, msr, base; | 1475 | * The pfn containing the shared_info is located somewhere in RAM. This |
1443 | u64 pfn; | 1476 | * will cause trouble if the current kernel is doing a kexec boot into a |
1444 | 1477 | * new kernel. The new kernel (and its startup code) can not know where | |
1445 | base = xen_cpuid_base(); | 1478 | * the pfn is, so it can not reserve the page. The hypervisor will |
1446 | cpuid(base + 1, &eax, &ebx, &ecx, &edx); | 1479 | * continue to update the pfn, and as a result memory corruption occours |
1447 | 1480 | * in the new kernel. | |
1448 | *major = eax >> 16; | 1481 | * |
1449 | *minor = eax & 0xffff; | 1482 | * One way to work around this issue is to allocate a page in the |
1450 | printk(KERN_INFO "Xen version %d.%d.\n", *major, *minor); | 1483 | * xen-platform pci device's BAR memory range. But pci init is done very |
1451 | 1484 | * late and the shared_info page is already in use very early to read | |
1452 | cpuid(base + 2, &pages, &msr, &ecx, &edx); | 1485 | * the pvclock. So moving the pfn from RAM to MMIO is racy because some |
1453 | 1486 | * code paths on other vcpus could access the pfn during the small | |
1454 | pfn = __pa(hypercall_page); | 1487 | * window when the old pfn is moved to the new pfn. There is even a |
1455 | wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); | 1488 | * small window were the old pfn is not backed by a mfn, and during that |
1456 | 1489 | * time all reads return -1. | |
1457 | xen_setup_features(); | 1490 | * |
1458 | 1491 | * Because it is not known upfront where the MMIO region is located it | |
1459 | pv_info.name = "Xen HVM"; | 1492 | * can not be used right from the start in xen_hvm_init_shared_info. |
1460 | 1493 | * | |
1461 | xen_domain_type = XEN_HVM_DOMAIN; | 1494 | * To minimise trouble the move of the pfn is done shortly before kexec. |
1495 | * This does not eliminate the race because all vcpus are still online | ||
1496 | * when the syscore_ops will be called. But hopefully there is no work | ||
1497 | * pending at this point in time. Also the syscore_op is run last which | ||
1498 | * reduces the risk further. | ||
1499 | */ | ||
1462 | 1500 | ||
1463 | return 0; | 1501 | static struct shared_info *xen_hvm_shared_info; |
1464 | } | ||
1465 | 1502 | ||
1466 | void __ref xen_hvm_init_shared_info(void) | 1503 | static void xen_hvm_connect_shared_info(unsigned long pfn) |
1467 | { | 1504 | { |
1468 | int cpu; | ||
1469 | struct xen_add_to_physmap xatp; | 1505 | struct xen_add_to_physmap xatp; |
1470 | static struct shared_info *shared_info_page = 0; | ||
1471 | 1506 | ||
1472 | if (!shared_info_page) | ||
1473 | shared_info_page = (struct shared_info *) | ||
1474 | extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
1475 | xatp.domid = DOMID_SELF; | 1507 | xatp.domid = DOMID_SELF; |
1476 | xatp.idx = 0; | 1508 | xatp.idx = 0; |
1477 | xatp.space = XENMAPSPACE_shared_info; | 1509 | xatp.space = XENMAPSPACE_shared_info; |
1478 | xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; | 1510 | xatp.gpfn = pfn; |
1479 | if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) | 1511 | if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) |
1480 | BUG(); | 1512 | BUG(); |
1481 | 1513 | ||
1482 | HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; | 1514 | } |
1515 | static void xen_hvm_set_shared_info(struct shared_info *sip) | ||
1516 | { | ||
1517 | int cpu; | ||
1518 | |||
1519 | HYPERVISOR_shared_info = sip; | ||
1483 | 1520 | ||
1484 | /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info | 1521 | /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info |
1485 | * page, we use it in the event channel upcall and in some pvclock | 1522 | * page, we use it in the event channel upcall and in some pvclock |
1486 | * related functions. We don't need the vcpu_info placement | 1523 | * related functions. We don't need the vcpu_info placement |
1487 | * optimizations because we don't use any pv_mmu or pv_irq op on | 1524 | * optimizations because we don't use any pv_mmu or pv_irq op on |
1488 | * HVM. | 1525 | * HVM. |
1489 | * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is | 1526 | * When xen_hvm_set_shared_info is run at boot time only vcpu 0 is |
1490 | * online but xen_hvm_init_shared_info is run at resume time too and | 1527 | * online but xen_hvm_set_shared_info is run at resume time too and |
1491 | * in that case multiple vcpus might be online. */ | 1528 | * in that case multiple vcpus might be online. */ |
1492 | for_each_online_cpu(cpu) { | 1529 | for_each_online_cpu(cpu) { |
1493 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; | 1530 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; |
1494 | } | 1531 | } |
1495 | } | 1532 | } |
1496 | 1533 | ||
1497 | #ifdef CONFIG_XEN_PVHVM | 1534 | /* Reconnect the shared_info pfn to a mfn */ |
1535 | void xen_hvm_resume_shared_info(void) | ||
1536 | { | ||
1537 | xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT); | ||
1538 | } | ||
1539 | |||
1540 | #ifdef CONFIG_KEXEC | ||
1541 | static struct shared_info *xen_hvm_shared_info_kexec; | ||
1542 | static unsigned long xen_hvm_shared_info_pfn_kexec; | ||
1543 | |||
1544 | /* Remember a pfn in MMIO space for kexec reboot */ | ||
1545 | void __devinit xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn) | ||
1546 | { | ||
1547 | xen_hvm_shared_info_kexec = sip; | ||
1548 | xen_hvm_shared_info_pfn_kexec = pfn; | ||
1549 | } | ||
1550 | |||
1551 | static void xen_hvm_syscore_shutdown(void) | ||
1552 | { | ||
1553 | struct xen_memory_reservation reservation = { | ||
1554 | .domid = DOMID_SELF, | ||
1555 | .nr_extents = 1, | ||
1556 | }; | ||
1557 | unsigned long prev_pfn; | ||
1558 | int rc; | ||
1559 | |||
1560 | if (!xen_hvm_shared_info_kexec) | ||
1561 | return; | ||
1562 | |||
1563 | prev_pfn = __pa(xen_hvm_shared_info) >> PAGE_SHIFT; | ||
1564 | set_xen_guest_handle(reservation.extent_start, &prev_pfn); | ||
1565 | |||
1566 | /* Move pfn to MMIO, disconnects previous pfn from mfn */ | ||
1567 | xen_hvm_connect_shared_info(xen_hvm_shared_info_pfn_kexec); | ||
1568 | |||
1569 | /* Update pointers, following hypercall is also a memory barrier */ | ||
1570 | xen_hvm_set_shared_info(xen_hvm_shared_info_kexec); | ||
1571 | |||
1572 | /* Allocate new mfn for previous pfn */ | ||
1573 | do { | ||
1574 | rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); | ||
1575 | if (rc == 0) | ||
1576 | msleep(123); | ||
1577 | } while (rc == 0); | ||
1578 | |||
1579 | /* Make sure the previous pfn is really connected to a (new) mfn */ | ||
1580 | BUG_ON(rc != 1); | ||
1581 | } | ||
1582 | |||
1583 | static struct syscore_ops xen_hvm_syscore_ops = { | ||
1584 | .shutdown = xen_hvm_syscore_shutdown, | ||
1585 | }; | ||
1586 | #endif | ||
1587 | |||
1588 | /* Use a pfn in RAM, may move to MMIO before kexec. */ | ||
1589 | static void __init xen_hvm_init_shared_info(void) | ||
1590 | { | ||
1591 | /* Remember pointer for resume */ | ||
1592 | xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
1593 | xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT); | ||
1594 | xen_hvm_set_shared_info(xen_hvm_shared_info); | ||
1595 | } | ||
1596 | |||
1597 | static void __init init_hvm_pv_info(void) | ||
1598 | { | ||
1599 | int major, minor; | ||
1600 | uint32_t eax, ebx, ecx, edx, pages, msr, base; | ||
1601 | u64 pfn; | ||
1602 | |||
1603 | base = xen_cpuid_base(); | ||
1604 | cpuid(base + 1, &eax, &ebx, &ecx, &edx); | ||
1605 | |||
1606 | major = eax >> 16; | ||
1607 | minor = eax & 0xffff; | ||
1608 | printk(KERN_INFO "Xen version %d.%d.\n", major, minor); | ||
1609 | |||
1610 | cpuid(base + 2, &pages, &msr, &ecx, &edx); | ||
1611 | |||
1612 | pfn = __pa(hypercall_page); | ||
1613 | wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); | ||
1614 | |||
1615 | xen_setup_features(); | ||
1616 | |||
1617 | pv_info.name = "Xen HVM"; | ||
1618 | |||
1619 | xen_domain_type = XEN_HVM_DOMAIN; | ||
1620 | } | ||
1621 | |||
1498 | static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, | 1622 | static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, |
1499 | unsigned long action, void *hcpu) | 1623 | unsigned long action, void *hcpu) |
1500 | { | 1624 | { |
@@ -1517,14 +1641,12 @@ static struct notifier_block xen_hvm_cpu_notifier __cpuinitdata = { | |||
1517 | 1641 | ||
1518 | static void __init xen_hvm_guest_init(void) | 1642 | static void __init xen_hvm_guest_init(void) |
1519 | { | 1643 | { |
1520 | int r; | 1644 | init_hvm_pv_info(); |
1521 | int major, minor; | ||
1522 | |||
1523 | r = init_hvm_pv_info(&major, &minor); | ||
1524 | if (r < 0) | ||
1525 | return; | ||
1526 | 1645 | ||
1527 | xen_hvm_init_shared_info(); | 1646 | xen_hvm_init_shared_info(); |
1647 | #ifdef CONFIG_KEXEC | ||
1648 | register_syscore_ops(&xen_hvm_syscore_ops); | ||
1649 | #endif | ||
1528 | 1650 | ||
1529 | if (xen_feature(XENFEAT_hvm_callback_vector)) | 1651 | if (xen_feature(XENFEAT_hvm_callback_vector)) |
1530 | xen_have_vector_callback = 1; | 1652 | xen_have_vector_callback = 1; |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3a73785631ce..27336dfcda8e 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -308,8 +308,20 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) | |||
308 | 308 | ||
309 | static inline void __xen_set_pte(pte_t *ptep, pte_t pteval) | 309 | static inline void __xen_set_pte(pte_t *ptep, pte_t pteval) |
310 | { | 310 | { |
311 | if (!xen_batched_set_pte(ptep, pteval)) | 311 | if (!xen_batched_set_pte(ptep, pteval)) { |
312 | native_set_pte(ptep, pteval); | 312 | /* |
313 | * Could call native_set_pte() here and trap and | ||
314 | * emulate the PTE write but with 32-bit guests this | ||
315 | * needs two traps (one for each of the two 32-bit | ||
316 | * words in the PTE) so do one hypercall directly | ||
317 | * instead. | ||
318 | */ | ||
319 | struct mmu_update u; | ||
320 | |||
321 | u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE; | ||
322 | u.val = pte_val_ma(pteval); | ||
323 | HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF); | ||
324 | } | ||
313 | } | 325 | } |
314 | 326 | ||
315 | static void xen_set_pte(pte_t *ptep, pte_t pteval) | 327 | static void xen_set_pte(pte_t *ptep, pte_t pteval) |
@@ -1416,13 +1428,28 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) | |||
1416 | } | 1428 | } |
1417 | #endif /* CONFIG_X86_64 */ | 1429 | #endif /* CONFIG_X86_64 */ |
1418 | 1430 | ||
1419 | /* Init-time set_pte while constructing initial pagetables, which | 1431 | /* |
1420 | doesn't allow RO pagetable pages to be remapped RW */ | 1432 | * Init-time set_pte while constructing initial pagetables, which |
1433 | * doesn't allow RO page table pages to be remapped RW. | ||
1434 | * | ||
1435 | * If there is no MFN for this PFN then this page is initially | ||
1436 | * ballooned out so clear the PTE (as in decrease_reservation() in | ||
1437 | * drivers/xen/balloon.c). | ||
1438 | * | ||
1439 | * Many of these PTE updates are done on unpinned and writable pages | ||
1440 | * and doing a hypercall for these is unnecessary and expensive. At | ||
1441 | * this point it is not possible to tell if a page is pinned or not, | ||
1442 | * so always write the PTE directly and rely on Xen trapping and | ||
1443 | * emulating any updates as necessary. | ||
1444 | */ | ||
1421 | static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) | 1445 | static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) |
1422 | { | 1446 | { |
1423 | pte = mask_rw_pte(ptep, pte); | 1447 | if (pte_mfn(pte) != INVALID_P2M_ENTRY) |
1448 | pte = mask_rw_pte(ptep, pte); | ||
1449 | else | ||
1450 | pte = __pte_ma(0); | ||
1424 | 1451 | ||
1425 | xen_set_pte(ptep, pte); | 1452 | native_set_pte(ptep, pte); |
1426 | } | 1453 | } |
1427 | 1454 | ||
1428 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) | 1455 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index a4790bf22c59..ead85576d54a 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -157,25 +157,24 @@ static unsigned long __init xen_populate_chunk( | |||
157 | unsigned long dest_pfn; | 157 | unsigned long dest_pfn; |
158 | 158 | ||
159 | for (i = 0, entry = list; i < map_size; i++, entry++) { | 159 | for (i = 0, entry = list; i < map_size; i++, entry++) { |
160 | unsigned long credits = credits_left; | ||
161 | unsigned long s_pfn; | 160 | unsigned long s_pfn; |
162 | unsigned long e_pfn; | 161 | unsigned long e_pfn; |
163 | unsigned long pfns; | 162 | unsigned long pfns; |
164 | long capacity; | 163 | long capacity; |
165 | 164 | ||
166 | if (credits <= 0) | 165 | if (credits_left <= 0) |
167 | break; | 166 | break; |
168 | 167 | ||
169 | if (entry->type != E820_RAM) | 168 | if (entry->type != E820_RAM) |
170 | continue; | 169 | continue; |
171 | 170 | ||
172 | e_pfn = PFN_UP(entry->addr + entry->size); | 171 | e_pfn = PFN_DOWN(entry->addr + entry->size); |
173 | 172 | ||
174 | /* We only care about E820 after the xen_start_info->nr_pages */ | 173 | /* We only care about E820 after the xen_start_info->nr_pages */ |
175 | if (e_pfn <= max_pfn) | 174 | if (e_pfn <= max_pfn) |
176 | continue; | 175 | continue; |
177 | 176 | ||
178 | s_pfn = PFN_DOWN(entry->addr); | 177 | s_pfn = PFN_UP(entry->addr); |
179 | /* If the E820 falls within the nr_pages, we want to start | 178 | /* If the E820 falls within the nr_pages, we want to start |
180 | * at the nr_pages PFN. | 179 | * at the nr_pages PFN. |
181 | * If that would mean going past the E820 entry, skip it | 180 | * If that would mean going past the E820 entry, skip it |
@@ -184,23 +183,19 @@ static unsigned long __init xen_populate_chunk( | |||
184 | capacity = e_pfn - max_pfn; | 183 | capacity = e_pfn - max_pfn; |
185 | dest_pfn = max_pfn; | 184 | dest_pfn = max_pfn; |
186 | } else { | 185 | } else { |
187 | /* last_pfn MUST be within E820_RAM regions */ | ||
188 | if (*last_pfn && e_pfn >= *last_pfn) | ||
189 | s_pfn = *last_pfn; | ||
190 | capacity = e_pfn - s_pfn; | 186 | capacity = e_pfn - s_pfn; |
191 | dest_pfn = s_pfn; | 187 | dest_pfn = s_pfn; |
192 | } | 188 | } |
193 | /* If we had filled this E820_RAM entry, go to the next one. */ | ||
194 | if (capacity <= 0) | ||
195 | continue; | ||
196 | 189 | ||
197 | if (credits > capacity) | 190 | if (credits_left < capacity) |
198 | credits = capacity; | 191 | capacity = credits_left; |
199 | 192 | ||
200 | pfns = xen_do_chunk(dest_pfn, dest_pfn + credits, false); | 193 | pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false); |
201 | done += pfns; | 194 | done += pfns; |
202 | credits_left -= pfns; | ||
203 | *last_pfn = (dest_pfn + pfns); | 195 | *last_pfn = (dest_pfn + pfns); |
196 | if (pfns < capacity) | ||
197 | break; | ||
198 | credits_left -= pfns; | ||
204 | } | 199 | } |
205 | return done; | 200 | return done; |
206 | } | 201 | } |
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 45329c8c226e..ae8a00c39de4 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c | |||
@@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled) | |||
30 | { | 30 | { |
31 | #ifdef CONFIG_XEN_PVHVM | 31 | #ifdef CONFIG_XEN_PVHVM |
32 | int cpu; | 32 | int cpu; |
33 | xen_hvm_init_shared_info(); | 33 | xen_hvm_resume_shared_info(); |
34 | xen_callback_vector(); | 34 | xen_callback_vector(); |
35 | xen_unplug_emulated_devices(); | 35 | xen_unplug_emulated_devices(); |
36 | if (xen_feature(XENFEAT_hvm_safe_pvclock)) { | 36 | if (xen_feature(XENFEAT_hvm_safe_pvclock)) { |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 202d4c150154..1e4329e04e0f 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -41,7 +41,7 @@ void xen_enable_syscall(void); | |||
41 | void xen_vcpu_restore(void); | 41 | void xen_vcpu_restore(void); |
42 | 42 | ||
43 | void xen_callback_vector(void); | 43 | void xen_callback_vector(void); |
44 | void xen_hvm_init_shared_info(void); | 44 | void xen_hvm_resume_shared_info(void); |
45 | void xen_unplug_emulated_devices(void); | 45 | void xen_unplug_emulated_devices(void); |
46 | 46 | ||
47 | void __init xen_build_dynamic_phys_to_machine(void); | 47 | void __init xen_build_dynamic_phys_to_machine(void); |