diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/spinlock.h | 3 | ||||
-rw-r--r-- | arch/x86/kernel/alternative.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/irq.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/microcode_amd.c | 7 | ||||
-rw-r--r-- | arch/x86/kvm/emulate.c | 30 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 13 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 5 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 118 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/p2m.c | 95 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 9 | ||||
-rw-r--r-- | arch/x86/xen/suspend.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 2 |
13 files changed, 155 insertions, 135 deletions
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index b315a33867f2..33692eaabab5 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h | |||
@@ -12,8 +12,7 @@ | |||
12 | * Simple spin lock operations. There are two variants, one clears IRQ's | 12 | * Simple spin lock operations. There are two variants, one clears IRQ's |
13 | * on the local processor, one does not. | 13 | * on the local processor, one does not. |
14 | * | 14 | * |
15 | * These are fair FIFO ticket locks, which are currently limited to 256 | 15 | * These are fair FIFO ticket locks, which support up to 2^16 CPUs. |
16 | * CPUs. | ||
17 | * | 16 | * |
18 | * (the type definitions are in asm/spinlock_types.h) | 17 | * (the type definitions are in asm/spinlock_types.h) |
19 | */ | 18 | */ |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index afb7ff79a29f..ced4534baed5 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -165,7 +165,7 @@ static const unsigned char * const k7_nops[ASM_NOP_MAX+2] = | |||
165 | #endif | 165 | #endif |
166 | 166 | ||
167 | #ifdef P6_NOP1 | 167 | #ifdef P6_NOP1 |
168 | static const unsigned char __initconst_or_module p6nops[] = | 168 | static const unsigned char p6nops[] = |
169 | { | 169 | { |
170 | P6_NOP1, | 170 | P6_NOP1, |
171 | P6_NOP2, | 171 | P6_NOP2, |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 7ad683d78645..d44f7829968e 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -270,7 +270,7 @@ void fixup_irqs(void) | |||
270 | 270 | ||
271 | if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { | 271 | if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { |
272 | break_affinity = 1; | 272 | break_affinity = 1; |
273 | affinity = cpu_all_mask; | 273 | affinity = cpu_online_mask; |
274 | } | 274 | } |
275 | 275 | ||
276 | chip = irq_data_get_irq_chip(data); | 276 | chip = irq_data_get_irq_chip(data); |
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 8a2ce8fd41c0..82746f942cd8 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c | |||
@@ -143,11 +143,12 @@ static int get_matching_microcode(int cpu, const u8 *ucode_ptr, | |||
143 | unsigned int *current_size) | 143 | unsigned int *current_size) |
144 | { | 144 | { |
145 | struct microcode_header_amd *mc_hdr; | 145 | struct microcode_header_amd *mc_hdr; |
146 | unsigned int actual_size; | 146 | unsigned int actual_size, patch_size; |
147 | u16 equiv_cpu_id; | 147 | u16 equiv_cpu_id; |
148 | 148 | ||
149 | /* size of the current patch we're staring at */ | 149 | /* size of the current patch we're staring at */ |
150 | *current_size = *(u32 *)(ucode_ptr + 4) + SECTION_HDR_SIZE; | 150 | patch_size = *(u32 *)(ucode_ptr + 4); |
151 | *current_size = patch_size + SECTION_HDR_SIZE; | ||
151 | 152 | ||
152 | equiv_cpu_id = find_equiv_id(); | 153 | equiv_cpu_id = find_equiv_id(); |
153 | if (!equiv_cpu_id) | 154 | if (!equiv_cpu_id) |
@@ -174,7 +175,7 @@ static int get_matching_microcode(int cpu, const u8 *ucode_ptr, | |||
174 | /* | 175 | /* |
175 | * now that the header looks sane, verify its size | 176 | * now that the header looks sane, verify its size |
176 | */ | 177 | */ |
177 | actual_size = verify_ucode_size(cpu, *current_size, leftover_size); | 178 | actual_size = verify_ucode_size(cpu, patch_size, leftover_size); |
178 | if (!actual_size) | 179 | if (!actual_size) |
179 | return 0; | 180 | return 0; |
180 | 181 | ||
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 97d9a9914ba8..a3b57a27be88 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -475,13 +475,26 @@ register_address(struct x86_emulate_ctxt *ctxt, unsigned long reg) | |||
475 | return address_mask(ctxt, reg); | 475 | return address_mask(ctxt, reg); |
476 | } | 476 | } |
477 | 477 | ||
478 | static void masked_increment(ulong *reg, ulong mask, int inc) | ||
479 | { | ||
480 | assign_masked(reg, *reg + inc, mask); | ||
481 | } | ||
482 | |||
478 | static inline void | 483 | static inline void |
479 | register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, int inc) | 484 | register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, int inc) |
480 | { | 485 | { |
486 | ulong mask; | ||
487 | |||
481 | if (ctxt->ad_bytes == sizeof(unsigned long)) | 488 | if (ctxt->ad_bytes == sizeof(unsigned long)) |
482 | *reg += inc; | 489 | mask = ~0UL; |
483 | else | 490 | else |
484 | *reg = (*reg & ~ad_mask(ctxt)) | ((*reg + inc) & ad_mask(ctxt)); | 491 | mask = ad_mask(ctxt); |
492 | masked_increment(reg, mask, inc); | ||
493 | } | ||
494 | |||
495 | static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc) | ||
496 | { | ||
497 | masked_increment(&ctxt->regs[VCPU_REGS_RSP], stack_mask(ctxt), inc); | ||
485 | } | 498 | } |
486 | 499 | ||
487 | static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) | 500 | static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) |
@@ -1522,8 +1535,8 @@ static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes) | |||
1522 | { | 1535 | { |
1523 | struct segmented_address addr; | 1536 | struct segmented_address addr; |
1524 | 1537 | ||
1525 | register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], -bytes); | 1538 | rsp_increment(ctxt, -bytes); |
1526 | addr.ea = register_address(ctxt, ctxt->regs[VCPU_REGS_RSP]); | 1539 | addr.ea = ctxt->regs[VCPU_REGS_RSP] & stack_mask(ctxt); |
1527 | addr.seg = VCPU_SREG_SS; | 1540 | addr.seg = VCPU_SREG_SS; |
1528 | 1541 | ||
1529 | return segmented_write(ctxt, addr, data, bytes); | 1542 | return segmented_write(ctxt, addr, data, bytes); |
@@ -1542,13 +1555,13 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, | |||
1542 | int rc; | 1555 | int rc; |
1543 | struct segmented_address addr; | 1556 | struct segmented_address addr; |
1544 | 1557 | ||
1545 | addr.ea = register_address(ctxt, ctxt->regs[VCPU_REGS_RSP]); | 1558 | addr.ea = ctxt->regs[VCPU_REGS_RSP] & stack_mask(ctxt); |
1546 | addr.seg = VCPU_SREG_SS; | 1559 | addr.seg = VCPU_SREG_SS; |
1547 | rc = segmented_read(ctxt, addr, dest, len); | 1560 | rc = segmented_read(ctxt, addr, dest, len); |
1548 | if (rc != X86EMUL_CONTINUE) | 1561 | if (rc != X86EMUL_CONTINUE) |
1549 | return rc; | 1562 | return rc; |
1550 | 1563 | ||
1551 | register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], len); | 1564 | rsp_increment(ctxt, len); |
1552 | return rc; | 1565 | return rc; |
1553 | } | 1566 | } |
1554 | 1567 | ||
@@ -1688,8 +1701,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt) | |||
1688 | 1701 | ||
1689 | while (reg >= VCPU_REGS_RAX) { | 1702 | while (reg >= VCPU_REGS_RAX) { |
1690 | if (reg == VCPU_REGS_RSP) { | 1703 | if (reg == VCPU_REGS_RSP) { |
1691 | register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], | 1704 | rsp_increment(ctxt, ctxt->op_bytes); |
1692 | ctxt->op_bytes); | ||
1693 | --reg; | 1705 | --reg; |
1694 | } | 1706 | } |
1695 | 1707 | ||
@@ -2825,7 +2837,7 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) | |||
2825 | rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes); | 2837 | rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes); |
2826 | if (rc != X86EMUL_CONTINUE) | 2838 | if (rc != X86EMUL_CONTINUE) |
2827 | return rc; | 2839 | return rc; |
2828 | register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], ctxt->src.val); | 2840 | rsp_increment(ctxt, ctxt->src.val); |
2829 | return X86EMUL_CONTINUE; | 2841 | return X86EMUL_CONTINUE; |
2830 | } | 2842 | } |
2831 | 2843 | ||
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 01ca00423938..7fbd0d273ea8 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -4113,16 +4113,21 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
4113 | LIST_HEAD(invalid_list); | 4113 | LIST_HEAD(invalid_list); |
4114 | 4114 | ||
4115 | /* | 4115 | /* |
4116 | * Never scan more than sc->nr_to_scan VM instances. | ||
4117 | * Will not hit this condition practically since we do not try | ||
4118 | * to shrink more than one VM and it is very unlikely to see | ||
4119 | * !n_used_mmu_pages so many times. | ||
4120 | */ | ||
4121 | if (!nr_to_scan--) | ||
4122 | break; | ||
4123 | /* | ||
4116 | * n_used_mmu_pages is accessed without holding kvm->mmu_lock | 4124 | * n_used_mmu_pages is accessed without holding kvm->mmu_lock |
4117 | * here. We may skip a VM instance errorneosly, but we do not | 4125 | * here. We may skip a VM instance errorneosly, but we do not |
4118 | * want to shrink a VM that only started to populate its MMU | 4126 | * want to shrink a VM that only started to populate its MMU |
4119 | * anyway. | 4127 | * anyway. |
4120 | */ | 4128 | */ |
4121 | if (kvm->arch.n_used_mmu_pages > 0) { | 4129 | if (!kvm->arch.n_used_mmu_pages) |
4122 | if (!nr_to_scan--) | ||
4123 | break; | ||
4124 | continue; | 4130 | continue; |
4125 | } | ||
4126 | 4131 | ||
4127 | idx = srcu_read_lock(&kvm->srcu); | 4132 | idx = srcu_read_lock(&kvm->srcu); |
4128 | spin_lock(&kvm->mmu_lock); | 4133 | spin_lock(&kvm->mmu_lock); |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 42bce48f6928..148ed666e311 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -806,7 +806,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); | |||
806 | * kvm-specific. Those are put in the beginning of the list. | 806 | * kvm-specific. Those are put in the beginning of the list. |
807 | */ | 807 | */ |
808 | 808 | ||
809 | #define KVM_SAVE_MSRS_BEGIN 9 | 809 | #define KVM_SAVE_MSRS_BEGIN 10 |
810 | static u32 msrs_to_save[] = { | 810 | static u32 msrs_to_save[] = { |
811 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 811 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, |
812 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, | 812 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, |
@@ -2000,6 +2000,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
2000 | case MSR_KVM_STEAL_TIME: | 2000 | case MSR_KVM_STEAL_TIME: |
2001 | data = vcpu->arch.st.msr_val; | 2001 | data = vcpu->arch.st.msr_val; |
2002 | break; | 2002 | break; |
2003 | case MSR_KVM_PV_EOI_EN: | ||
2004 | data = vcpu->arch.pv_eoi.msr_val; | ||
2005 | break; | ||
2003 | case MSR_IA32_P5_MC_ADDR: | 2006 | case MSR_IA32_P5_MC_ADDR: |
2004 | case MSR_IA32_P5_MC_TYPE: | 2007 | case MSR_IA32_P5_MC_TYPE: |
2005 | case MSR_IA32_MCG_CAP: | 2008 | case MSR_IA32_MCG_CAP: |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bf4bda6d3e9a..9642d4a38602 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -31,7 +31,6 @@ | |||
31 | #include <linux/pci.h> | 31 | #include <linux/pci.h> |
32 | #include <linux/gfp.h> | 32 | #include <linux/gfp.h> |
33 | #include <linux/memblock.h> | 33 | #include <linux/memblock.h> |
34 | #include <linux/syscore_ops.h> | ||
35 | 34 | ||
36 | #include <xen/xen.h> | 35 | #include <xen/xen.h> |
37 | #include <xen/interface/xen.h> | 36 | #include <xen/interface/xen.h> |
@@ -1470,130 +1469,38 @@ asmlinkage void __init xen_start_kernel(void) | |||
1470 | #endif | 1469 | #endif |
1471 | } | 1470 | } |
1472 | 1471 | ||
1473 | #ifdef CONFIG_XEN_PVHVM | 1472 | void __ref xen_hvm_init_shared_info(void) |
1474 | /* | ||
1475 | * The pfn containing the shared_info is located somewhere in RAM. This | ||
1476 | * will cause trouble if the current kernel is doing a kexec boot into a | ||
1477 | * new kernel. The new kernel (and its startup code) can not know where | ||
1478 | * the pfn is, so it can not reserve the page. The hypervisor will | ||
1479 | * continue to update the pfn, and as a result memory corruption occours | ||
1480 | * in the new kernel. | ||
1481 | * | ||
1482 | * One way to work around this issue is to allocate a page in the | ||
1483 | * xen-platform pci device's BAR memory range. But pci init is done very | ||
1484 | * late and the shared_info page is already in use very early to read | ||
1485 | * the pvclock. So moving the pfn from RAM to MMIO is racy because some | ||
1486 | * code paths on other vcpus could access the pfn during the small | ||
1487 | * window when the old pfn is moved to the new pfn. There is even a | ||
1488 | * small window were the old pfn is not backed by a mfn, and during that | ||
1489 | * time all reads return -1. | ||
1490 | * | ||
1491 | * Because it is not known upfront where the MMIO region is located it | ||
1492 | * can not be used right from the start in xen_hvm_init_shared_info. | ||
1493 | * | ||
1494 | * To minimise trouble the move of the pfn is done shortly before kexec. | ||
1495 | * This does not eliminate the race because all vcpus are still online | ||
1496 | * when the syscore_ops will be called. But hopefully there is no work | ||
1497 | * pending at this point in time. Also the syscore_op is run last which | ||
1498 | * reduces the risk further. | ||
1499 | */ | ||
1500 | |||
1501 | static struct shared_info *xen_hvm_shared_info; | ||
1502 | |||
1503 | static void xen_hvm_connect_shared_info(unsigned long pfn) | ||
1504 | { | 1473 | { |
1474 | int cpu; | ||
1505 | struct xen_add_to_physmap xatp; | 1475 | struct xen_add_to_physmap xatp; |
1476 | static struct shared_info *shared_info_page = 0; | ||
1506 | 1477 | ||
1478 | if (!shared_info_page) | ||
1479 | shared_info_page = (struct shared_info *) | ||
1480 | extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
1507 | xatp.domid = DOMID_SELF; | 1481 | xatp.domid = DOMID_SELF; |
1508 | xatp.idx = 0; | 1482 | xatp.idx = 0; |
1509 | xatp.space = XENMAPSPACE_shared_info; | 1483 | xatp.space = XENMAPSPACE_shared_info; |
1510 | xatp.gpfn = pfn; | 1484 | xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; |
1511 | if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) | 1485 | if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) |
1512 | BUG(); | 1486 | BUG(); |
1513 | 1487 | ||
1514 | } | 1488 | HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; |
1515 | static void xen_hvm_set_shared_info(struct shared_info *sip) | ||
1516 | { | ||
1517 | int cpu; | ||
1518 | |||
1519 | HYPERVISOR_shared_info = sip; | ||
1520 | 1489 | ||
1521 | /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info | 1490 | /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info |
1522 | * page, we use it in the event channel upcall and in some pvclock | 1491 | * page, we use it in the event channel upcall and in some pvclock |
1523 | * related functions. We don't need the vcpu_info placement | 1492 | * related functions. We don't need the vcpu_info placement |
1524 | * optimizations because we don't use any pv_mmu or pv_irq op on | 1493 | * optimizations because we don't use any pv_mmu or pv_irq op on |
1525 | * HVM. | 1494 | * HVM. |
1526 | * When xen_hvm_set_shared_info is run at boot time only vcpu 0 is | 1495 | * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is |
1527 | * online but xen_hvm_set_shared_info is run at resume time too and | 1496 | * online but xen_hvm_init_shared_info is run at resume time too and |
1528 | * in that case multiple vcpus might be online. */ | 1497 | * in that case multiple vcpus might be online. */ |
1529 | for_each_online_cpu(cpu) { | 1498 | for_each_online_cpu(cpu) { |
1530 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; | 1499 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; |
1531 | } | 1500 | } |
1532 | } | 1501 | } |
1533 | 1502 | ||
1534 | /* Reconnect the shared_info pfn to a mfn */ | 1503 | #ifdef CONFIG_XEN_PVHVM |
1535 | void xen_hvm_resume_shared_info(void) | ||
1536 | { | ||
1537 | xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT); | ||
1538 | } | ||
1539 | |||
1540 | #ifdef CONFIG_KEXEC | ||
1541 | static struct shared_info *xen_hvm_shared_info_kexec; | ||
1542 | static unsigned long xen_hvm_shared_info_pfn_kexec; | ||
1543 | |||
1544 | /* Remember a pfn in MMIO space for kexec reboot */ | ||
1545 | void __devinit xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn) | ||
1546 | { | ||
1547 | xen_hvm_shared_info_kexec = sip; | ||
1548 | xen_hvm_shared_info_pfn_kexec = pfn; | ||
1549 | } | ||
1550 | |||
1551 | static void xen_hvm_syscore_shutdown(void) | ||
1552 | { | ||
1553 | struct xen_memory_reservation reservation = { | ||
1554 | .domid = DOMID_SELF, | ||
1555 | .nr_extents = 1, | ||
1556 | }; | ||
1557 | unsigned long prev_pfn; | ||
1558 | int rc; | ||
1559 | |||
1560 | if (!xen_hvm_shared_info_kexec) | ||
1561 | return; | ||
1562 | |||
1563 | prev_pfn = __pa(xen_hvm_shared_info) >> PAGE_SHIFT; | ||
1564 | set_xen_guest_handle(reservation.extent_start, &prev_pfn); | ||
1565 | |||
1566 | /* Move pfn to MMIO, disconnects previous pfn from mfn */ | ||
1567 | xen_hvm_connect_shared_info(xen_hvm_shared_info_pfn_kexec); | ||
1568 | |||
1569 | /* Update pointers, following hypercall is also a memory barrier */ | ||
1570 | xen_hvm_set_shared_info(xen_hvm_shared_info_kexec); | ||
1571 | |||
1572 | /* Allocate new mfn for previous pfn */ | ||
1573 | do { | ||
1574 | rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); | ||
1575 | if (rc == 0) | ||
1576 | msleep(123); | ||
1577 | } while (rc == 0); | ||
1578 | |||
1579 | /* Make sure the previous pfn is really connected to a (new) mfn */ | ||
1580 | BUG_ON(rc != 1); | ||
1581 | } | ||
1582 | |||
1583 | static struct syscore_ops xen_hvm_syscore_ops = { | ||
1584 | .shutdown = xen_hvm_syscore_shutdown, | ||
1585 | }; | ||
1586 | #endif | ||
1587 | |||
1588 | /* Use a pfn in RAM, may move to MMIO before kexec. */ | ||
1589 | static void __init xen_hvm_init_shared_info(void) | ||
1590 | { | ||
1591 | /* Remember pointer for resume */ | ||
1592 | xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
1593 | xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT); | ||
1594 | xen_hvm_set_shared_info(xen_hvm_shared_info); | ||
1595 | } | ||
1596 | |||
1597 | static void __init init_hvm_pv_info(void) | 1504 | static void __init init_hvm_pv_info(void) |
1598 | { | 1505 | { |
1599 | int major, minor; | 1506 | int major, minor; |
@@ -1644,9 +1551,6 @@ static void __init xen_hvm_guest_init(void) | |||
1644 | init_hvm_pv_info(); | 1551 | init_hvm_pv_info(); |
1645 | 1552 | ||
1646 | xen_hvm_init_shared_info(); | 1553 | xen_hvm_init_shared_info(); |
1647 | #ifdef CONFIG_KEXEC | ||
1648 | register_syscore_ops(&xen_hvm_syscore_ops); | ||
1649 | #endif | ||
1650 | 1554 | ||
1651 | if (xen_feature(XENFEAT_hvm_callback_vector)) | 1555 | if (xen_feature(XENFEAT_hvm_callback_vector)) |
1652 | xen_have_vector_callback = 1; | 1556 | xen_have_vector_callback = 1; |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index b65a76133f4f..5141d808e751 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -1283,7 +1283,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, | |||
1283 | cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); | 1283 | cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); |
1284 | 1284 | ||
1285 | args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; | 1285 | args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; |
1286 | if (start != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) { | 1286 | if (end != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) { |
1287 | args->op.cmd = MMUEXT_INVLPG_MULTI; | 1287 | args->op.cmd = MMUEXT_INVLPG_MULTI; |
1288 | args->op.arg1.linear_addr = start; | 1288 | args->op.arg1.linear_addr = start; |
1289 | } | 1289 | } |
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index b2e91d40a4cb..76ba0e97e530 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
@@ -196,9 +196,11 @@ RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3); | |||
196 | 196 | ||
197 | /* When we populate back during bootup, the amount of pages can vary. The | 197 | /* When we populate back during bootup, the amount of pages can vary. The |
198 | * max we have is seen is 395979, but that does not mean it can't be more. | 198 | * max we have is seen is 395979, but that does not mean it can't be more. |
199 | * But some machines can have 3GB I/O holes even. So lets reserve enough | 199 | * Some machines can have 3GB I/O holes even. With early_can_reuse_p2m_middle |
200 | * for 4GB of I/O and E820 holes. */ | 200 | * it can re-use Xen provided mfn_list array, so we only need to allocate at |
201 | RESERVE_BRK(p2m_populated, PMD_SIZE * 4); | 201 | * most three P2M top nodes. */ |
202 | RESERVE_BRK(p2m_populated, PAGE_SIZE * 3); | ||
203 | |||
202 | static inline unsigned p2m_top_index(unsigned long pfn) | 204 | static inline unsigned p2m_top_index(unsigned long pfn) |
203 | { | 205 | { |
204 | BUG_ON(pfn >= MAX_P2M_PFN); | 206 | BUG_ON(pfn >= MAX_P2M_PFN); |
@@ -575,12 +577,99 @@ static bool __init early_alloc_p2m(unsigned long pfn) | |||
575 | } | 577 | } |
576 | return true; | 578 | return true; |
577 | } | 579 | } |
580 | |||
581 | /* | ||
582 | * Skim over the P2M tree looking at pages that are either filled with | ||
583 | * INVALID_P2M_ENTRY or with 1:1 PFNs. If found, re-use that page and | ||
584 | * replace the P2M leaf with a p2m_missing or p2m_identity. | ||
585 | * Stick the old page in the new P2M tree location. | ||
586 | */ | ||
587 | bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_mfn) | ||
588 | { | ||
589 | unsigned topidx; | ||
590 | unsigned mididx; | ||
591 | unsigned ident_pfns; | ||
592 | unsigned inv_pfns; | ||
593 | unsigned long *p2m; | ||
594 | unsigned long *mid_mfn_p; | ||
595 | unsigned idx; | ||
596 | unsigned long pfn; | ||
597 | |||
598 | /* We only look when this entails a P2M middle layer */ | ||
599 | if (p2m_index(set_pfn)) | ||
600 | return false; | ||
601 | |||
602 | for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) { | ||
603 | topidx = p2m_top_index(pfn); | ||
604 | |||
605 | if (!p2m_top[topidx]) | ||
606 | continue; | ||
607 | |||
608 | if (p2m_top[topidx] == p2m_mid_missing) | ||
609 | continue; | ||
610 | |||
611 | mididx = p2m_mid_index(pfn); | ||
612 | p2m = p2m_top[topidx][mididx]; | ||
613 | if (!p2m) | ||
614 | continue; | ||
615 | |||
616 | if ((p2m == p2m_missing) || (p2m == p2m_identity)) | ||
617 | continue; | ||
618 | |||
619 | if ((unsigned long)p2m == INVALID_P2M_ENTRY) | ||
620 | continue; | ||
621 | |||
622 | ident_pfns = 0; | ||
623 | inv_pfns = 0; | ||
624 | for (idx = 0; idx < P2M_PER_PAGE; idx++) { | ||
625 | /* IDENTITY_PFNs are 1:1 */ | ||
626 | if (p2m[idx] == IDENTITY_FRAME(pfn + idx)) | ||
627 | ident_pfns++; | ||
628 | else if (p2m[idx] == INVALID_P2M_ENTRY) | ||
629 | inv_pfns++; | ||
630 | else | ||
631 | break; | ||
632 | } | ||
633 | if ((ident_pfns == P2M_PER_PAGE) || (inv_pfns == P2M_PER_PAGE)) | ||
634 | goto found; | ||
635 | } | ||
636 | return false; | ||
637 | found: | ||
638 | /* Found one, replace old with p2m_identity or p2m_missing */ | ||
639 | p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing); | ||
640 | /* And the other for save/restore.. */ | ||
641 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
642 | /* NOTE: Even if it is a p2m_identity it should still be point to | ||
643 | * a page filled with INVALID_P2M_ENTRY entries. */ | ||
644 | mid_mfn_p[mididx] = virt_to_mfn(p2m_missing); | ||
645 | |||
646 | /* Reset where we want to stick the old page in. */ | ||
647 | topidx = p2m_top_index(set_pfn); | ||
648 | mididx = p2m_mid_index(set_pfn); | ||
649 | |||
650 | /* This shouldn't happen */ | ||
651 | if (WARN_ON(p2m_top[topidx] == p2m_mid_missing)) | ||
652 | early_alloc_p2m(set_pfn); | ||
653 | |||
654 | if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing)) | ||
655 | return false; | ||
656 | |||
657 | p2m_init(p2m); | ||
658 | p2m_top[topidx][mididx] = p2m; | ||
659 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
660 | mid_mfn_p[mididx] = virt_to_mfn(p2m); | ||
661 | |||
662 | return true; | ||
663 | } | ||
578 | bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn) | 664 | bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn) |
579 | { | 665 | { |
580 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | 666 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { |
581 | if (!early_alloc_p2m(pfn)) | 667 | if (!early_alloc_p2m(pfn)) |
582 | return false; | 668 | return false; |
583 | 669 | ||
670 | if (early_can_reuse_p2m_middle(pfn, mfn)) | ||
671 | return __set_phys_to_machine(pfn, mfn); | ||
672 | |||
584 | if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/)) | 673 | if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/)) |
585 | return false; | 674 | return false; |
586 | 675 | ||
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index ead85576d54a..d11ca11d14fc 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -78,9 +78,16 @@ static void __init xen_add_extra_mem(u64 start, u64 size) | |||
78 | memblock_reserve(start, size); | 78 | memblock_reserve(start, size); |
79 | 79 | ||
80 | xen_max_p2m_pfn = PFN_DOWN(start + size); | 80 | xen_max_p2m_pfn = PFN_DOWN(start + size); |
81 | for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { | ||
82 | unsigned long mfn = pfn_to_mfn(pfn); | ||
83 | |||
84 | if (WARN(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn)) | ||
85 | continue; | ||
86 | WARN(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n", | ||
87 | pfn, mfn); | ||
81 | 88 | ||
82 | for (pfn = PFN_DOWN(start); pfn <= xen_max_p2m_pfn; pfn++) | ||
83 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | 89 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); |
90 | } | ||
84 | } | 91 | } |
85 | 92 | ||
86 | static unsigned long __init xen_do_chunk(unsigned long start, | 93 | static unsigned long __init xen_do_chunk(unsigned long start, |
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index ae8a00c39de4..45329c8c226e 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c | |||
@@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled) | |||
30 | { | 30 | { |
31 | #ifdef CONFIG_XEN_PVHVM | 31 | #ifdef CONFIG_XEN_PVHVM |
32 | int cpu; | 32 | int cpu; |
33 | xen_hvm_resume_shared_info(); | 33 | xen_hvm_init_shared_info(); |
34 | xen_callback_vector(); | 34 | xen_callback_vector(); |
35 | xen_unplug_emulated_devices(); | 35 | xen_unplug_emulated_devices(); |
36 | if (xen_feature(XENFEAT_hvm_safe_pvclock)) { | 36 | if (xen_feature(XENFEAT_hvm_safe_pvclock)) { |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 1e4329e04e0f..202d4c150154 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -41,7 +41,7 @@ void xen_enable_syscall(void); | |||
41 | void xen_vcpu_restore(void); | 41 | void xen_vcpu_restore(void); |
42 | 42 | ||
43 | void xen_callback_vector(void); | 43 | void xen_callback_vector(void); |
44 | void xen_hvm_resume_shared_info(void); | 44 | void xen_hvm_init_shared_info(void); |
45 | void xen_unplug_emulated_devices(void); | 45 | void xen_unplug_emulated_devices(void); |
46 | 46 | ||
47 | void __init xen_build_dynamic_phys_to_machine(void); | 47 | void __init xen_build_dynamic_phys_to_machine(void); |