diff options
73 files changed, 2413 insertions, 1403 deletions
diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt index 83afe65d4966..22ff659bc0fb 100644 --- a/Documentation/virtual/kvm/cpuid.txt +++ b/Documentation/virtual/kvm/cpuid.txt | |||
@@ -43,6 +43,10 @@ KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs | |||
43 | KVM_FEATURE_ASYNC_PF || 4 || async pf can be enabled by | 43 | KVM_FEATURE_ASYNC_PF || 4 || async pf can be enabled by |
44 | || || writing to msr 0x4b564d02 | 44 | || || writing to msr 0x4b564d02 |
45 | ------------------------------------------------------------------------------ | 45 | ------------------------------------------------------------------------------ |
46 | KVM_FEATURE_PV_UNHALT || 7 || guest checks this feature bit | ||
47 | || || before enabling paravirtualized | ||
48 | || || spinlock support. | ||
49 | ------------------------------------------------------------------------------ | ||
46 | KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side | 50 | KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side |
47 | || || per-cpu warps are expected in | 51 | || || per-cpu warps are expected in |
48 | || || kvmclock. | 52 | || || kvmclock. |
diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt index ea113b5d87a4..022198e389d7 100644 --- a/Documentation/virtual/kvm/hypercalls.txt +++ b/Documentation/virtual/kvm/hypercalls.txt | |||
@@ -64,3 +64,17 @@ Purpose: To enable communication between the hypervisor and guest there is a | |||
64 | shared page that contains parts of supervisor visible register state. | 64 | shared page that contains parts of supervisor visible register state. |
65 | The guest can map this shared page to access its supervisor register through | 65 | The guest can map this shared page to access its supervisor register through |
66 | memory using this hypercall. | 66 | memory using this hypercall. |
67 | |||
68 | 5. KVM_HC_KICK_CPU | ||
69 | ------------------------ | ||
70 | Architecture: x86 | ||
71 | Status: active | ||
72 | Purpose: Hypercall used to wakeup a vcpu from HLT state | ||
73 | Usage example : A vcpu of a paravirtualized guest that is busywaiting in guest | ||
74 | kernel mode for an event to occur (ex: a spinlock to become available) can | ||
75 | execute HLT instruction once it has busy-waited for more than a threshold | ||
76 | time-interval. Execution of HLT instruction would cause the hypervisor to put | ||
77 | the vcpu to sleep until occurence of an appropriate event. Another vcpu of the | ||
78 | same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall, | ||
79 | specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0) | ||
80 | is used in the hypercall for future use. | ||
diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig index 62e968cac9dc..1f36b823905f 100644 --- a/arch/arm/configs/keystone_defconfig +++ b/arch/arm/configs/keystone_defconfig | |||
@@ -104,6 +104,7 @@ CONFIG_IP_SCTP=y | |||
104 | CONFIG_VLAN_8021Q=y | 104 | CONFIG_VLAN_8021Q=y |
105 | CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" | 105 | CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" |
106 | CONFIG_CMA=y | 106 | CONFIG_CMA=y |
107 | CONFIG_DMA_CMA=y | ||
107 | CONFIG_MTD=y | 108 | CONFIG_MTD=y |
108 | CONFIG_MTD_CMDLINE_PARTS=y | 109 | CONFIG_MTD_CMDLINE_PARTS=y |
109 | CONFIG_MTD_BLOCK=y | 110 | CONFIG_MTD_BLOCK=y |
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 5339e6a4d639..5465f564fdf3 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig | |||
@@ -78,6 +78,7 @@ CONFIG_MAC80211_RC_PID=y | |||
78 | CONFIG_MAC80211_RC_DEFAULT_PID=y | 78 | CONFIG_MAC80211_RC_DEFAULT_PID=y |
79 | CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" | 79 | CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" |
80 | CONFIG_CMA=y | 80 | CONFIG_CMA=y |
81 | CONFIG_DMA_CMA=y | ||
81 | CONFIG_CONNECTOR=y | 82 | CONFIG_CONNECTOR=y |
82 | CONFIG_DEVTMPFS=y | 83 | CONFIG_DEVTMPFS=y |
83 | CONFIG_DEVTMPFS_MOUNT=y | 84 | CONFIG_DEVTMPFS_MOUNT=y |
diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index 1effb43dab80..92d0a149aeb5 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig | |||
@@ -79,6 +79,7 @@ CONFIG_DEVTMPFS=y | |||
79 | CONFIG_DEVTMPFS_MOUNT=y | 79 | CONFIG_DEVTMPFS_MOUNT=y |
80 | # CONFIG_FIRMWARE_IN_KERNEL is not set | 80 | # CONFIG_FIRMWARE_IN_KERNEL is not set |
81 | CONFIG_CMA=y | 81 | CONFIG_CMA=y |
82 | CONFIG_DMA_CMA=y | ||
82 | CONFIG_MTD=y | 83 | CONFIG_MTD=y |
83 | CONFIG_MTD_M25P80=y | 84 | CONFIG_MTD_M25P80=y |
84 | CONFIG_PROC_DEVICETREE=y | 85 | CONFIG_PROC_DEVICETREE=y |
diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h index 3ed37b4d93da..e072bb2ba1b1 100644 --- a/arch/arm/include/asm/dma-contiguous.h +++ b/arch/arm/include/asm/dma-contiguous.h | |||
@@ -2,7 +2,7 @@ | |||
2 | #define ASMARM_DMA_CONTIGUOUS_H | 2 | #define ASMARM_DMA_CONTIGUOUS_H |
3 | 3 | ||
4 | #ifdef __KERNEL__ | 4 | #ifdef __KERNEL__ |
5 | #ifdef CONFIG_CMA | 5 | #ifdef CONFIG_DMA_CMA |
6 | 6 | ||
7 | #include <linux/types.h> | 7 | #include <linux/types.h> |
8 | #include <asm-generic/dma-contiguous.h> | 8 | #include <asm-generic/dma-contiguous.h> |
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 472ac7091003..9b28c41f4ba9 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h | |||
@@ -64,7 +64,7 @@ void kvm_clear_hyp_idmap(void); | |||
64 | 64 | ||
65 | static inline void kvm_set_pte(pte_t *pte, pte_t new_pte) | 65 | static inline void kvm_set_pte(pte_t *pte, pte_t new_pte) |
66 | { | 66 | { |
67 | pte_val(*pte) = new_pte; | 67 | *pte = new_pte; |
68 | /* | 68 | /* |
69 | * flush_pmd_entry just takes a void pointer and cleans the necessary | 69 | * flush_pmd_entry just takes a void pointer and cleans the necessary |
70 | * cache entries, so we can reuse the function for ptes. | 70 | * cache entries, so we can reuse the function for ptes. |
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 741f66a2edbd..9c697db2787e 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c | |||
@@ -219,6 +219,10 @@ long kvm_arch_dev_ioctl(struct file *filp, | |||
219 | return -EINVAL; | 219 | return -EINVAL; |
220 | } | 220 | } |
221 | 221 | ||
222 | void kvm_arch_memslots_updated(struct kvm *kvm) | ||
223 | { | ||
224 | } | ||
225 | |||
222 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 226 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
223 | struct kvm_memory_slot *memslot, | 227 | struct kvm_memory_slot *memslot, |
224 | struct kvm_userspace_memory_region *mem, | 228 | struct kvm_userspace_memory_region *mem, |
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 16cd4ba5d7fd..85dd84b10687 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S | |||
@@ -492,10 +492,10 @@ __kvm_hyp_code_end: | |||
492 | .section ".rodata" | 492 | .section ".rodata" |
493 | 493 | ||
494 | und_die_str: | 494 | und_die_str: |
495 | .ascii "unexpected undefined exception in Hyp mode at: %#08x" | 495 | .ascii "unexpected undefined exception in Hyp mode at: %#08x\n" |
496 | pabt_die_str: | 496 | pabt_die_str: |
497 | .ascii "unexpected prefetch abort in Hyp mode at: %#08x" | 497 | .ascii "unexpected prefetch abort in Hyp mode at: %#08x\n" |
498 | dabt_die_str: | 498 | dabt_die_str: |
499 | .ascii "unexpected data abort in Hyp mode at: %#08x" | 499 | .ascii "unexpected data abort in Hyp mode at: %#08x\n" |
500 | svc_die_str: | 500 | svc_die_str: |
501 | .ascii "unexpected HVC/SVC trap in Hyp mode at: %#08x" | 501 | .ascii "unexpected HVC/SVC trap in Hyp mode at: %#08x\n" |
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index b7840e7aa452..71e08baee209 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c | |||
@@ -40,7 +40,7 @@ static struct kvm_regs a15_regs_reset = { | |||
40 | }; | 40 | }; |
41 | 41 | ||
42 | static const struct kvm_irq_level a15_vtimer_irq = { | 42 | static const struct kvm_irq_level a15_vtimer_irq = { |
43 | .irq = 27, | 43 | { .irq = 27 }, |
44 | .level = 1, | 44 | .level = 1, |
45 | }; | 45 | }; |
46 | 46 | ||
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index a8e73ed5ad5b..b1d640f78623 100644 --- a/arch/arm/kvm/trace.h +++ b/arch/arm/kvm/trace.h | |||
@@ -59,10 +59,9 @@ TRACE_EVENT(kvm_guest_fault, | |||
59 | __entry->ipa = ipa; | 59 | __entry->ipa = ipa; |
60 | ), | 60 | ), |
61 | 61 | ||
62 | TP_printk("guest fault at PC %#08lx (hxfar %#08lx, " | 62 | TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx", |
63 | "ipa %#16llx, hsr %#08lx", | 63 | __entry->ipa, __entry->hsr, |
64 | __entry->vcpu_pc, __entry->hxfar, | 64 | __entry->hxfar, __entry->vcpu_pc) |
65 | __entry->ipa, __entry->hsr) | ||
66 | ); | 65 | ); |
67 | 66 | ||
68 | TRACE_EVENT(kvm_irq_line, | 67 | TRACE_EVENT(kvm_irq_line, |
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 7f9b1798c6cf..dbddc07a3bbd 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c | |||
@@ -358,7 +358,7 @@ static int __init atomic_pool_init(void) | |||
358 | if (!pages) | 358 | if (!pages) |
359 | goto no_pages; | 359 | goto no_pages; |
360 | 360 | ||
361 | if (IS_ENABLED(CONFIG_CMA)) | 361 | if (IS_ENABLED(CONFIG_DMA_CMA)) |
362 | ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page, | 362 | ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page, |
363 | atomic_pool_init); | 363 | atomic_pool_init); |
364 | else | 364 | else |
@@ -670,7 +670,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, | |||
670 | addr = __alloc_simple_buffer(dev, size, gfp, &page); | 670 | addr = __alloc_simple_buffer(dev, size, gfp, &page); |
671 | else if (!(gfp & __GFP_WAIT)) | 671 | else if (!(gfp & __GFP_WAIT)) |
672 | addr = __alloc_from_pool(size, &page); | 672 | addr = __alloc_from_pool(size, &page); |
673 | else if (!IS_ENABLED(CONFIG_CMA)) | 673 | else if (!IS_ENABLED(CONFIG_DMA_CMA)) |
674 | addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); | 674 | addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); |
675 | else | 675 | else |
676 | addr = __alloc_from_contiguous(dev, size, prot, &page, caller); | 676 | addr = __alloc_from_contiguous(dev, size, prot, &page, caller); |
@@ -759,7 +759,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, | |||
759 | __dma_free_buffer(page, size); | 759 | __dma_free_buffer(page, size); |
760 | } else if (__free_from_pool(cpu_addr, size)) { | 760 | } else if (__free_from_pool(cpu_addr, size)) { |
761 | return; | 761 | return; |
762 | } else if (!IS_ENABLED(CONFIG_CMA)) { | 762 | } else if (!IS_ENABLED(CONFIG_DMA_CMA)) { |
763 | __dma_free_remap(cpu_addr, size); | 763 | __dma_free_remap(cpu_addr, size); |
764 | __dma_free_buffer(page, size); | 764 | __dma_free_buffer(page, size); |
765 | } else { | 765 | } else { |
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 5b2dc0d10c8f..bdfd8789b376 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c | |||
@@ -1560,6 +1560,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | |||
1560 | return 0; | 1560 | return 0; |
1561 | } | 1561 | } |
1562 | 1562 | ||
1563 | void kvm_arch_memslots_updated(struct kvm *kvm) | ||
1564 | { | ||
1565 | } | ||
1566 | |||
1563 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 1567 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
1564 | struct kvm_memory_slot *memslot, | 1568 | struct kvm_memory_slot *memslot, |
1565 | struct kvm_userspace_memory_region *mem, | 1569 | struct kvm_userspace_memory_region *mem, |
diff --git a/arch/mips/kvm/kvm_locore.S b/arch/mips/kvm/kvm_locore.S index dca2aa665993..bbace092ad0a 100644 --- a/arch/mips/kvm/kvm_locore.S +++ b/arch/mips/kvm/kvm_locore.S | |||
@@ -1,13 +1,13 @@ | |||
1 | /* | 1 | /* |
2 | * This file is subject to the terms and conditions of the GNU General Public | 2 | * This file is subject to the terms and conditions of the GNU General Public |
3 | * License. See the file "COPYING" in the main directory of this archive | 3 | * License. See the file "COPYING" in the main directory of this archive |
4 | * for more details. | 4 | * for more details. |
5 | * | 5 | * |
6 | * Main entry point for the guest, exception handling. | 6 | * Main entry point for the guest, exception handling. |
7 | * | 7 | * |
8 | * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. | 8 | * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. |
9 | * Authors: Sanjay Lal <sanjayl@kymasys.com> | 9 | * Authors: Sanjay Lal <sanjayl@kymasys.com> |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <asm/asm.h> | 12 | #include <asm/asm.h> |
13 | #include <asm/asmmacro.h> | 13 | #include <asm/asmmacro.h> |
@@ -55,195 +55,193 @@ | |||
55 | * a0: run | 55 | * a0: run |
56 | * a1: vcpu | 56 | * a1: vcpu |
57 | */ | 57 | */ |
58 | .set noreorder | ||
59 | .set noat | ||
58 | 60 | ||
59 | FEXPORT(__kvm_mips_vcpu_run) | 61 | FEXPORT(__kvm_mips_vcpu_run) |
60 | .set push | 62 | /* k0/k1 not being used in host kernel context */ |
61 | .set noreorder | 63 | INT_ADDIU k1, sp, -PT_SIZE |
62 | .set noat | 64 | LONG_S $0, PT_R0(k1) |
63 | 65 | LONG_S $1, PT_R1(k1) | |
64 | /* k0/k1 not being used in host kernel context */ | 66 | LONG_S $2, PT_R2(k1) |
65 | addiu k1,sp, -PT_SIZE | 67 | LONG_S $3, PT_R3(k1) |
66 | LONG_S $0, PT_R0(k1) | 68 | |
67 | LONG_S $1, PT_R1(k1) | 69 | LONG_S $4, PT_R4(k1) |
68 | LONG_S $2, PT_R2(k1) | 70 | LONG_S $5, PT_R5(k1) |
69 | LONG_S $3, PT_R3(k1) | 71 | LONG_S $6, PT_R6(k1) |
70 | 72 | LONG_S $7, PT_R7(k1) | |
71 | LONG_S $4, PT_R4(k1) | 73 | |
72 | LONG_S $5, PT_R5(k1) | 74 | LONG_S $8, PT_R8(k1) |
73 | LONG_S $6, PT_R6(k1) | 75 | LONG_S $9, PT_R9(k1) |
74 | LONG_S $7, PT_R7(k1) | 76 | LONG_S $10, PT_R10(k1) |
75 | 77 | LONG_S $11, PT_R11(k1) | |
76 | LONG_S $8, PT_R8(k1) | 78 | LONG_S $12, PT_R12(k1) |
77 | LONG_S $9, PT_R9(k1) | 79 | LONG_S $13, PT_R13(k1) |
78 | LONG_S $10, PT_R10(k1) | 80 | LONG_S $14, PT_R14(k1) |
79 | LONG_S $11, PT_R11(k1) | 81 | LONG_S $15, PT_R15(k1) |
80 | LONG_S $12, PT_R12(k1) | 82 | LONG_S $16, PT_R16(k1) |
81 | LONG_S $13, PT_R13(k1) | 83 | LONG_S $17, PT_R17(k1) |
82 | LONG_S $14, PT_R14(k1) | 84 | |
83 | LONG_S $15, PT_R15(k1) | 85 | LONG_S $18, PT_R18(k1) |
84 | LONG_S $16, PT_R16(k1) | 86 | LONG_S $19, PT_R19(k1) |
85 | LONG_S $17, PT_R17(k1) | 87 | LONG_S $20, PT_R20(k1) |
86 | 88 | LONG_S $21, PT_R21(k1) | |
87 | LONG_S $18, PT_R18(k1) | 89 | LONG_S $22, PT_R22(k1) |
88 | LONG_S $19, PT_R19(k1) | 90 | LONG_S $23, PT_R23(k1) |
89 | LONG_S $20, PT_R20(k1) | 91 | LONG_S $24, PT_R24(k1) |
90 | LONG_S $21, PT_R21(k1) | 92 | LONG_S $25, PT_R25(k1) |
91 | LONG_S $22, PT_R22(k1) | ||
92 | LONG_S $23, PT_R23(k1) | ||
93 | LONG_S $24, PT_R24(k1) | ||
94 | LONG_S $25, PT_R25(k1) | ||
95 | 93 | ||
96 | /* XXXKYMA k0/k1 not saved, not being used if we got here through an ioctl() */ | 94 | /* XXXKYMA k0/k1 not saved, not being used if we got here through an ioctl() */ |
97 | 95 | ||
98 | LONG_S $28, PT_R28(k1) | 96 | LONG_S $28, PT_R28(k1) |
99 | LONG_S $29, PT_R29(k1) | 97 | LONG_S $29, PT_R29(k1) |
100 | LONG_S $30, PT_R30(k1) | 98 | LONG_S $30, PT_R30(k1) |
101 | LONG_S $31, PT_R31(k1) | 99 | LONG_S $31, PT_R31(k1) |
102 | 100 | ||
103 | /* Save hi/lo */ | 101 | /* Save hi/lo */ |
104 | mflo v0 | 102 | mflo v0 |
105 | LONG_S v0, PT_LO(k1) | 103 | LONG_S v0, PT_LO(k1) |
106 | mfhi v1 | 104 | mfhi v1 |
107 | LONG_S v1, PT_HI(k1) | 105 | LONG_S v1, PT_HI(k1) |
108 | 106 | ||
109 | /* Save host status */ | 107 | /* Save host status */ |
110 | mfc0 v0, CP0_STATUS | 108 | mfc0 v0, CP0_STATUS |
111 | LONG_S v0, PT_STATUS(k1) | 109 | LONG_S v0, PT_STATUS(k1) |
112 | 110 | ||
113 | /* Save host ASID, shove it into the BVADDR location */ | 111 | /* Save host ASID, shove it into the BVADDR location */ |
114 | mfc0 v1,CP0_ENTRYHI | 112 | mfc0 v1, CP0_ENTRYHI |
115 | andi v1, 0xff | 113 | andi v1, 0xff |
116 | LONG_S v1, PT_HOST_ASID(k1) | 114 | LONG_S v1, PT_HOST_ASID(k1) |
117 | 115 | ||
118 | /* Save DDATA_LO, will be used to store pointer to vcpu */ | 116 | /* Save DDATA_LO, will be used to store pointer to vcpu */ |
119 | mfc0 v1, CP0_DDATA_LO | 117 | mfc0 v1, CP0_DDATA_LO |
120 | LONG_S v1, PT_HOST_USERLOCAL(k1) | 118 | LONG_S v1, PT_HOST_USERLOCAL(k1) |
121 | 119 | ||
122 | /* DDATA_LO has pointer to vcpu */ | 120 | /* DDATA_LO has pointer to vcpu */ |
123 | mtc0 a1,CP0_DDATA_LO | 121 | mtc0 a1, CP0_DDATA_LO |
124 | 122 | ||
125 | /* Offset into vcpu->arch */ | 123 | /* Offset into vcpu->arch */ |
126 | addiu k1, a1, VCPU_HOST_ARCH | 124 | INT_ADDIU k1, a1, VCPU_HOST_ARCH |
127 | 125 | ||
128 | /* Save the host stack to VCPU, used for exception processing when we exit from the Guest */ | 126 | /* |
129 | LONG_S sp, VCPU_HOST_STACK(k1) | 127 | * Save the host stack to VCPU, used for exception processing |
128 | * when we exit from the Guest | ||
129 | */ | ||
130 | LONG_S sp, VCPU_HOST_STACK(k1) | ||
130 | 131 | ||
131 | /* Save the kernel gp as well */ | 132 | /* Save the kernel gp as well */ |
132 | LONG_S gp, VCPU_HOST_GP(k1) | 133 | LONG_S gp, VCPU_HOST_GP(k1) |
133 | 134 | ||
134 | /* Setup status register for running the guest in UM, interrupts are disabled */ | 135 | /* Setup status register for running the guest in UM, interrupts are disabled */ |
135 | li k0,(ST0_EXL | KSU_USER| ST0_BEV) | 136 | li k0, (ST0_EXL | KSU_USER | ST0_BEV) |
136 | mtc0 k0,CP0_STATUS | 137 | mtc0 k0, CP0_STATUS |
137 | ehb | 138 | ehb |
138 | 139 | ||
139 | /* load up the new EBASE */ | 140 | /* load up the new EBASE */ |
140 | LONG_L k0, VCPU_GUEST_EBASE(k1) | 141 | LONG_L k0, VCPU_GUEST_EBASE(k1) |
141 | mtc0 k0,CP0_EBASE | 142 | mtc0 k0, CP0_EBASE |
142 | 143 | ||
143 | /* Now that the new EBASE has been loaded, unset BEV, set interrupt mask as it was | 144 | /* |
144 | * but make sure that timer interrupts are enabled | 145 | * Now that the new EBASE has been loaded, unset BEV, set |
145 | */ | 146 | * interrupt mask as it was but make sure that timer interrupts |
146 | li k0,(ST0_EXL | KSU_USER | ST0_IE) | 147 | * are enabled |
147 | andi v0, v0, ST0_IM | 148 | */ |
148 | or k0, k0, v0 | 149 | li k0, (ST0_EXL | KSU_USER | ST0_IE) |
149 | mtc0 k0,CP0_STATUS | 150 | andi v0, v0, ST0_IM |
150 | ehb | 151 | or k0, k0, v0 |
152 | mtc0 k0, CP0_STATUS | ||
153 | ehb | ||
151 | 154 | ||
152 | 155 | ||
153 | /* Set Guest EPC */ | 156 | /* Set Guest EPC */ |
154 | LONG_L t0, VCPU_PC(k1) | 157 | LONG_L t0, VCPU_PC(k1) |
155 | mtc0 t0, CP0_EPC | 158 | mtc0 t0, CP0_EPC |
156 | 159 | ||
157 | FEXPORT(__kvm_mips_load_asid) | 160 | FEXPORT(__kvm_mips_load_asid) |
158 | /* Set the ASID for the Guest Kernel */ | 161 | /* Set the ASID for the Guest Kernel */ |
159 | sll t0, t0, 1 /* with kseg0 @ 0x40000000, kernel */ | 162 | INT_SLL t0, t0, 1 /* with kseg0 @ 0x40000000, kernel */ |
160 | /* addresses shift to 0x80000000 */ | 163 | /* addresses shift to 0x80000000 */ |
161 | bltz t0, 1f /* If kernel */ | 164 | bltz t0, 1f /* If kernel */ |
162 | addiu t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ | 165 | INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ |
163 | addiu t1, k1, VCPU_GUEST_USER_ASID /* else user */ | 166 | INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID /* else user */ |
164 | 1: | 167 | 1: |
165 | /* t1: contains the base of the ASID array, need to get the cpu id */ | 168 | /* t1: contains the base of the ASID array, need to get the cpu id */ |
166 | LONG_L t2, TI_CPU($28) /* smp_processor_id */ | 169 | LONG_L t2, TI_CPU($28) /* smp_processor_id */ |
167 | sll t2, t2, 2 /* x4 */ | 170 | INT_SLL t2, t2, 2 /* x4 */ |
168 | addu t3, t1, t2 | 171 | REG_ADDU t3, t1, t2 |
169 | LONG_L k0, (t3) | 172 | LONG_L k0, (t3) |
170 | andi k0, k0, 0xff | 173 | andi k0, k0, 0xff |
171 | mtc0 k0,CP0_ENTRYHI | 174 | mtc0 k0, CP0_ENTRYHI |
172 | ehb | 175 | ehb |
173 | 176 | ||
174 | /* Disable RDHWR access */ | 177 | /* Disable RDHWR access */ |
175 | mtc0 zero, CP0_HWRENA | 178 | mtc0 zero, CP0_HWRENA |
176 | 179 | ||
177 | /* Now load up the Guest Context from VCPU */ | 180 | /* Now load up the Guest Context from VCPU */ |
178 | LONG_L $1, VCPU_R1(k1) | 181 | LONG_L $1, VCPU_R1(k1) |
179 | LONG_L $2, VCPU_R2(k1) | 182 | LONG_L $2, VCPU_R2(k1) |
180 | LONG_L $3, VCPU_R3(k1) | 183 | LONG_L $3, VCPU_R3(k1) |
181 | 184 | ||
182 | LONG_L $4, VCPU_R4(k1) | 185 | LONG_L $4, VCPU_R4(k1) |
183 | LONG_L $5, VCPU_R5(k1) | 186 | LONG_L $5, VCPU_R5(k1) |
184 | LONG_L $6, VCPU_R6(k1) | 187 | LONG_L $6, VCPU_R6(k1) |
185 | LONG_L $7, VCPU_R7(k1) | 188 | LONG_L $7, VCPU_R7(k1) |
186 | 189 | ||
187 | LONG_L $8, VCPU_R8(k1) | 190 | LONG_L $8, VCPU_R8(k1) |
188 | LONG_L $9, VCPU_R9(k1) | 191 | LONG_L $9, VCPU_R9(k1) |
189 | LONG_L $10, VCPU_R10(k1) | 192 | LONG_L $10, VCPU_R10(k1) |
190 | LONG_L $11, VCPU_R11(k1) | 193 | LONG_L $11, VCPU_R11(k1) |
191 | LONG_L $12, VCPU_R12(k1) | 194 | LONG_L $12, VCPU_R12(k1) |
192 | LONG_L $13, VCPU_R13(k1) | 195 | LONG_L $13, VCPU_R13(k1) |
193 | LONG_L $14, VCPU_R14(k1) | 196 | LONG_L $14, VCPU_R14(k1) |
194 | LONG_L $15, VCPU_R15(k1) | 197 | LONG_L $15, VCPU_R15(k1) |
195 | LONG_L $16, VCPU_R16(k1) | 198 | LONG_L $16, VCPU_R16(k1) |
196 | LONG_L $17, VCPU_R17(k1) | 199 | LONG_L $17, VCPU_R17(k1) |
197 | LONG_L $18, VCPU_R18(k1) | 200 | LONG_L $18, VCPU_R18(k1) |
198 | LONG_L $19, VCPU_R19(k1) | 201 | LONG_L $19, VCPU_R19(k1) |
199 | LONG_L $20, VCPU_R20(k1) | 202 | LONG_L $20, VCPU_R20(k1) |
200 | LONG_L $21, VCPU_R21(k1) | 203 | LONG_L $21, VCPU_R21(k1) |
201 | LONG_L $22, VCPU_R22(k1) | 204 | LONG_L $22, VCPU_R22(k1) |
202 | LONG_L $23, VCPU_R23(k1) | 205 | LONG_L $23, VCPU_R23(k1) |
203 | LONG_L $24, VCPU_R24(k1) | 206 | LONG_L $24, VCPU_R24(k1) |
204 | LONG_L $25, VCPU_R25(k1) | 207 | LONG_L $25, VCPU_R25(k1) |
205 | 208 | ||
206 | /* k0/k1 loaded up later */ | 209 | /* k0/k1 loaded up later */ |
207 | 210 | ||
208 | LONG_L $28, VCPU_R28(k1) | 211 | LONG_L $28, VCPU_R28(k1) |
209 | LONG_L $29, VCPU_R29(k1) | 212 | LONG_L $29, VCPU_R29(k1) |
210 | LONG_L $30, VCPU_R30(k1) | 213 | LONG_L $30, VCPU_R30(k1) |
211 | LONG_L $31, VCPU_R31(k1) | 214 | LONG_L $31, VCPU_R31(k1) |
212 | 215 | ||
213 | /* Restore hi/lo */ | 216 | /* Restore hi/lo */ |
214 | LONG_L k0, VCPU_LO(k1) | 217 | LONG_L k0, VCPU_LO(k1) |
215 | mtlo k0 | 218 | mtlo k0 |
216 | 219 | ||
217 | LONG_L k0, VCPU_HI(k1) | 220 | LONG_L k0, VCPU_HI(k1) |
218 | mthi k0 | 221 | mthi k0 |
219 | 222 | ||
220 | FEXPORT(__kvm_mips_load_k0k1) | 223 | FEXPORT(__kvm_mips_load_k0k1) |
221 | /* Restore the guest's k0/k1 registers */ | 224 | /* Restore the guest's k0/k1 registers */ |
222 | LONG_L k0, VCPU_R26(k1) | 225 | LONG_L k0, VCPU_R26(k1) |
223 | LONG_L k1, VCPU_R27(k1) | 226 | LONG_L k1, VCPU_R27(k1) |
224 | 227 | ||
225 | /* Jump to guest */ | 228 | /* Jump to guest */ |
226 | eret | 229 | eret |
227 | .set pop | ||
228 | 230 | ||
229 | VECTOR(MIPSX(exception), unknown) | 231 | VECTOR(MIPSX(exception), unknown) |
230 | /* | 232 | /* |
231 | * Find out what mode we came from and jump to the proper handler. | 233 | * Find out what mode we came from and jump to the proper handler. |
232 | */ | 234 | */ |
233 | .set push | 235 | mtc0 k0, CP0_ERROREPC #01: Save guest k0 |
234 | .set noat | 236 | ehb #02: |
235 | .set noreorder | 237 | |
236 | mtc0 k0, CP0_ERROREPC #01: Save guest k0 | 238 | mfc0 k0, CP0_EBASE #02: Get EBASE |
237 | ehb #02: | 239 | INT_SRL k0, k0, 10 #03: Get rid of CPUNum |
238 | 240 | INT_SLL k0, k0, 10 #04 | |
239 | mfc0 k0, CP0_EBASE #02: Get EBASE | 241 | LONG_S k1, 0x3000(k0) #05: Save k1 @ offset 0x3000 |
240 | srl k0, k0, 10 #03: Get rid of CPUNum | 242 | INT_ADDIU k0, k0, 0x2000 #06: Exception handler is installed @ offset 0x2000 |
241 | sll k0, k0, 10 #04 | 243 | j k0 #07: jump to the function |
242 | LONG_S k1, 0x3000(k0) #05: Save k1 @ offset 0x3000 | 244 | nop #08: branch delay slot |
243 | addiu k0, k0, 0x2000 #06: Exception handler is installed @ offset 0x2000 | ||
244 | j k0 #07: jump to the function | ||
245 | nop #08: branch delay slot | ||
246 | .set push | ||
247 | VECTOR_END(MIPSX(exceptionEnd)) | 245 | VECTOR_END(MIPSX(exceptionEnd)) |
248 | .end MIPSX(exception) | 246 | .end MIPSX(exception) |
249 | 247 | ||
@@ -253,329 +251,327 @@ VECTOR_END(MIPSX(exceptionEnd)) | |||
253 | * | 251 | * |
254 | */ | 252 | */ |
255 | NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) | 253 | NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) |
256 | .set push | 254 | /* Get the VCPU pointer from DDTATA_LO */ |
257 | .set noat | 255 | mfc0 k1, CP0_DDATA_LO |
258 | .set noreorder | 256 | INT_ADDIU k1, k1, VCPU_HOST_ARCH |
259 | 257 | ||
260 | /* Get the VCPU pointer from DDTATA_LO */ | 258 | /* Start saving Guest context to VCPU */ |
261 | mfc0 k1, CP0_DDATA_LO | 259 | LONG_S $0, VCPU_R0(k1) |
262 | addiu k1, k1, VCPU_HOST_ARCH | 260 | LONG_S $1, VCPU_R1(k1) |
263 | 261 | LONG_S $2, VCPU_R2(k1) | |
264 | /* Start saving Guest context to VCPU */ | 262 | LONG_S $3, VCPU_R3(k1) |
265 | LONG_S $0, VCPU_R0(k1) | 263 | LONG_S $4, VCPU_R4(k1) |
266 | LONG_S $1, VCPU_R1(k1) | 264 | LONG_S $5, VCPU_R5(k1) |
267 | LONG_S $2, VCPU_R2(k1) | 265 | LONG_S $6, VCPU_R6(k1) |
268 | LONG_S $3, VCPU_R3(k1) | 266 | LONG_S $7, VCPU_R7(k1) |
269 | LONG_S $4, VCPU_R4(k1) | 267 | LONG_S $8, VCPU_R8(k1) |
270 | LONG_S $5, VCPU_R5(k1) | 268 | LONG_S $9, VCPU_R9(k1) |
271 | LONG_S $6, VCPU_R6(k1) | 269 | LONG_S $10, VCPU_R10(k1) |
272 | LONG_S $7, VCPU_R7(k1) | 270 | LONG_S $11, VCPU_R11(k1) |
273 | LONG_S $8, VCPU_R8(k1) | 271 | LONG_S $12, VCPU_R12(k1) |
274 | LONG_S $9, VCPU_R9(k1) | 272 | LONG_S $13, VCPU_R13(k1) |
275 | LONG_S $10, VCPU_R10(k1) | 273 | LONG_S $14, VCPU_R14(k1) |
276 | LONG_S $11, VCPU_R11(k1) | 274 | LONG_S $15, VCPU_R15(k1) |
277 | LONG_S $12, VCPU_R12(k1) | 275 | LONG_S $16, VCPU_R16(k1) |
278 | LONG_S $13, VCPU_R13(k1) | 276 | LONG_S $17, VCPU_R17(k1) |
279 | LONG_S $14, VCPU_R14(k1) | 277 | LONG_S $18, VCPU_R18(k1) |
280 | LONG_S $15, VCPU_R15(k1) | 278 | LONG_S $19, VCPU_R19(k1) |
281 | LONG_S $16, VCPU_R16(k1) | 279 | LONG_S $20, VCPU_R20(k1) |
282 | LONG_S $17,VCPU_R17(k1) | 280 | LONG_S $21, VCPU_R21(k1) |
283 | LONG_S $18, VCPU_R18(k1) | 281 | LONG_S $22, VCPU_R22(k1) |
284 | LONG_S $19, VCPU_R19(k1) | 282 | LONG_S $23, VCPU_R23(k1) |
285 | LONG_S $20, VCPU_R20(k1) | 283 | LONG_S $24, VCPU_R24(k1) |
286 | LONG_S $21, VCPU_R21(k1) | 284 | LONG_S $25, VCPU_R25(k1) |
287 | LONG_S $22, VCPU_R22(k1) | 285 | |
288 | LONG_S $23, VCPU_R23(k1) | 286 | /* Guest k0/k1 saved later */ |
289 | LONG_S $24, VCPU_R24(k1) | 287 | |
290 | LONG_S $25, VCPU_R25(k1) | 288 | LONG_S $28, VCPU_R28(k1) |
291 | 289 | LONG_S $29, VCPU_R29(k1) | |
292 | /* Guest k0/k1 saved later */ | 290 | LONG_S $30, VCPU_R30(k1) |
293 | 291 | LONG_S $31, VCPU_R31(k1) | |
294 | LONG_S $28, VCPU_R28(k1) | 292 | |
295 | LONG_S $29, VCPU_R29(k1) | 293 | /* We need to save hi/lo and restore them on |
296 | LONG_S $30, VCPU_R30(k1) | 294 | * the way out |
297 | LONG_S $31, VCPU_R31(k1) | 295 | */ |
298 | 296 | mfhi t0 | |
299 | /* We need to save hi/lo and restore them on | 297 | LONG_S t0, VCPU_HI(k1) |
300 | * the way out | 298 | |
301 | */ | 299 | mflo t0 |
302 | mfhi t0 | 300 | LONG_S t0, VCPU_LO(k1) |
303 | LONG_S t0, VCPU_HI(k1) | 301 | |
304 | 302 | /* Finally save guest k0/k1 to VCPU */ | |
305 | mflo t0 | 303 | mfc0 t0, CP0_ERROREPC |
306 | LONG_S t0, VCPU_LO(k1) | 304 | LONG_S t0, VCPU_R26(k1) |
307 | 305 | ||
308 | /* Finally save guest k0/k1 to VCPU */ | 306 | /* Get GUEST k1 and save it in VCPU */ |
309 | mfc0 t0, CP0_ERROREPC | 307 | PTR_LI t1, ~0x2ff |
310 | LONG_S t0, VCPU_R26(k1) | 308 | mfc0 t0, CP0_EBASE |
311 | 309 | and t0, t0, t1 | |
312 | /* Get GUEST k1 and save it in VCPU */ | 310 | LONG_L t0, 0x3000(t0) |
313 | la t1, ~0x2ff | 311 | LONG_S t0, VCPU_R27(k1) |
314 | mfc0 t0, CP0_EBASE | 312 | |
315 | and t0, t0, t1 | 313 | /* Now that context has been saved, we can use other registers */ |
316 | LONG_L t0, 0x3000(t0) | 314 | |
317 | LONG_S t0, VCPU_R27(k1) | 315 | /* Restore vcpu */ |
318 | 316 | mfc0 a1, CP0_DDATA_LO | |
319 | /* Now that context has been saved, we can use other registers */ | 317 | move s1, a1 |
320 | 318 | ||
321 | /* Restore vcpu */ | 319 | /* Restore run (vcpu->run) */ |
322 | mfc0 a1, CP0_DDATA_LO | 320 | LONG_L a0, VCPU_RUN(a1) |
323 | move s1, a1 | 321 | /* Save pointer to run in s0, will be saved by the compiler */ |
324 | 322 | move s0, a0 | |
325 | /* Restore run (vcpu->run) */ | 323 | |
326 | LONG_L a0, VCPU_RUN(a1) | 324 | /* Save Host level EPC, BadVaddr and Cause to VCPU, useful to |
327 | /* Save pointer to run in s0, will be saved by the compiler */ | 325 | * process the exception */ |
328 | move s0, a0 | 326 | mfc0 k0,CP0_EPC |
329 | 327 | LONG_S k0, VCPU_PC(k1) | |
330 | 328 | ||
331 | /* Save Host level EPC, BadVaddr and Cause to VCPU, useful to process the exception */ | 329 | mfc0 k0, CP0_BADVADDR |
332 | mfc0 k0,CP0_EPC | 330 | LONG_S k0, VCPU_HOST_CP0_BADVADDR(k1) |
333 | LONG_S k0, VCPU_PC(k1) | 331 | |
334 | 332 | mfc0 k0, CP0_CAUSE | |
335 | mfc0 k0, CP0_BADVADDR | 333 | LONG_S k0, VCPU_HOST_CP0_CAUSE(k1) |
336 | LONG_S k0, VCPU_HOST_CP0_BADVADDR(k1) | 334 | |
337 | 335 | mfc0 k0, CP0_ENTRYHI | |
338 | mfc0 k0, CP0_CAUSE | 336 | LONG_S k0, VCPU_HOST_ENTRYHI(k1) |
339 | LONG_S k0, VCPU_HOST_CP0_CAUSE(k1) | 337 | |
340 | 338 | /* Now restore the host state just enough to run the handlers */ | |
341 | mfc0 k0, CP0_ENTRYHI | 339 | |
342 | LONG_S k0, VCPU_HOST_ENTRYHI(k1) | 340 | /* Swtich EBASE to the one used by Linux */ |
343 | 341 | /* load up the host EBASE */ | |
344 | /* Now restore the host state just enough to run the handlers */ | 342 | mfc0 v0, CP0_STATUS |
345 | 343 | ||
346 | /* Swtich EBASE to the one used by Linux */ | 344 | .set at |
347 | /* load up the host EBASE */ | 345 | or k0, v0, ST0_BEV |
348 | mfc0 v0, CP0_STATUS | 346 | .set noat |
349 | 347 | ||
350 | .set at | 348 | mtc0 k0, CP0_STATUS |
351 | or k0, v0, ST0_BEV | 349 | ehb |
352 | .set noat | 350 | |
353 | 351 | LONG_L k0, VCPU_HOST_EBASE(k1) | |
354 | mtc0 k0, CP0_STATUS | 352 | mtc0 k0,CP0_EBASE |
355 | ehb | 353 | |
356 | |||
357 | LONG_L k0, VCPU_HOST_EBASE(k1) | ||
358 | mtc0 k0,CP0_EBASE | ||
359 | |||
360 | |||
361 | /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */ | ||
362 | .set at | ||
363 | and v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE) | ||
364 | or v0, v0, ST0_CU0 | ||
365 | .set noat | ||
366 | mtc0 v0, CP0_STATUS | ||
367 | ehb | ||
368 | |||
369 | /* Load up host GP */ | ||
370 | LONG_L gp, VCPU_HOST_GP(k1) | ||
371 | |||
372 | /* Need a stack before we can jump to "C" */ | ||
373 | LONG_L sp, VCPU_HOST_STACK(k1) | ||
374 | |||
375 | /* Saved host state */ | ||
376 | addiu sp,sp, -PT_SIZE | ||
377 | 354 | ||
378 | /* XXXKYMA do we need to load the host ASID, maybe not because the | 355 | /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */ |
379 | * kernel entries are marked GLOBAL, need to verify | 356 | .set at |
380 | */ | 357 | and v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE) |
358 | or v0, v0, ST0_CU0 | ||
359 | .set noat | ||
360 | mtc0 v0, CP0_STATUS | ||
361 | ehb | ||
362 | |||
363 | /* Load up host GP */ | ||
364 | LONG_L gp, VCPU_HOST_GP(k1) | ||
365 | |||
366 | /* Need a stack before we can jump to "C" */ | ||
367 | LONG_L sp, VCPU_HOST_STACK(k1) | ||
368 | |||
369 | /* Saved host state */ | ||
370 | INT_ADDIU sp, sp, -PT_SIZE | ||
381 | 371 | ||
382 | /* Restore host DDATA_LO */ | 372 | /* XXXKYMA do we need to load the host ASID, maybe not because the |
383 | LONG_L k0, PT_HOST_USERLOCAL(sp) | 373 | * kernel entries are marked GLOBAL, need to verify |
384 | mtc0 k0, CP0_DDATA_LO | 374 | */ |
385 | 375 | ||
386 | /* Restore RDHWR access */ | 376 | /* Restore host DDATA_LO */ |
387 | la k0, 0x2000000F | 377 | LONG_L k0, PT_HOST_USERLOCAL(sp) |
388 | mtc0 k0, CP0_HWRENA | 378 | mtc0 k0, CP0_DDATA_LO |
389 | 379 | ||
390 | /* Jump to handler */ | 380 | /* Restore RDHWR access */ |
381 | PTR_LI k0, 0x2000000F | ||
382 | mtc0 k0, CP0_HWRENA | ||
383 | |||
384 | /* Jump to handler */ | ||
391 | FEXPORT(__kvm_mips_jump_to_handler) | 385 | FEXPORT(__kvm_mips_jump_to_handler) |
392 | /* XXXKYMA: not sure if this is safe, how large is the stack?? */ | 386 | /* XXXKYMA: not sure if this is safe, how large is the stack?? |
393 | /* Now jump to the kvm_mips_handle_exit() to see if we can deal with this in the kernel */ | 387 | * Now jump to the kvm_mips_handle_exit() to see if we can deal |
394 | la t9,kvm_mips_handle_exit | 388 | * with this in the kernel */ |
395 | jalr.hb t9 | 389 | PTR_LA t9, kvm_mips_handle_exit |
396 | addiu sp,sp, -CALLFRAME_SIZ /* BD Slot */ | 390 | jalr.hb t9 |
397 | 391 | INT_ADDIU sp, sp, -CALLFRAME_SIZ /* BD Slot */ | |
398 | /* Return from handler Make sure interrupts are disabled */ | 392 | |
399 | di | 393 | /* Return from handler Make sure interrupts are disabled */ |
400 | ehb | 394 | di |
401 | 395 | ehb | |
402 | /* XXXKYMA: k0/k1 could have been blown away if we processed an exception | 396 | |
403 | * while we were handling the exception from the guest, reload k1 | 397 | /* XXXKYMA: k0/k1 could have been blown away if we processed |
404 | */ | 398 | * an exception while we were handling the exception from the |
405 | move k1, s1 | 399 | * guest, reload k1 |
406 | addiu k1, k1, VCPU_HOST_ARCH | 400 | */ |
407 | 401 | ||
408 | /* Check return value, should tell us if we are returning to the host (handle I/O etc) | 402 | move k1, s1 |
409 | * or resuming the guest | 403 | INT_ADDIU k1, k1, VCPU_HOST_ARCH |
410 | */ | 404 | |
411 | andi t0, v0, RESUME_HOST | 405 | /* Check return value, should tell us if we are returning to the |
412 | bnez t0, __kvm_mips_return_to_host | 406 | * host (handle I/O etc)or resuming the guest |
413 | nop | 407 | */ |
408 | andi t0, v0, RESUME_HOST | ||
409 | bnez t0, __kvm_mips_return_to_host | ||
410 | nop | ||
414 | 411 | ||
415 | __kvm_mips_return_to_guest: | 412 | __kvm_mips_return_to_guest: |
416 | /* Put the saved pointer to vcpu (s1) back into the DDATA_LO Register */ | 413 | /* Put the saved pointer to vcpu (s1) back into the DDATA_LO Register */ |
417 | mtc0 s1, CP0_DDATA_LO | 414 | mtc0 s1, CP0_DDATA_LO |
418 | |||
419 | /* Load up the Guest EBASE to minimize the window where BEV is set */ | ||
420 | LONG_L t0, VCPU_GUEST_EBASE(k1) | ||
421 | |||
422 | /* Switch EBASE back to the one used by KVM */ | ||
423 | mfc0 v1, CP0_STATUS | ||
424 | .set at | ||
425 | or k0, v1, ST0_BEV | ||
426 | .set noat | ||
427 | mtc0 k0, CP0_STATUS | ||
428 | ehb | ||
429 | mtc0 t0,CP0_EBASE | ||
430 | |||
431 | /* Setup status register for running guest in UM */ | ||
432 | .set at | ||
433 | or v1, v1, (ST0_EXL | KSU_USER | ST0_IE) | ||
434 | and v1, v1, ~ST0_CU0 | ||
435 | .set noat | ||
436 | mtc0 v1, CP0_STATUS | ||
437 | ehb | ||
438 | 415 | ||
416 | /* Load up the Guest EBASE to minimize the window where BEV is set */ | ||
417 | LONG_L t0, VCPU_GUEST_EBASE(k1) | ||
418 | |||
419 | /* Switch EBASE back to the one used by KVM */ | ||
420 | mfc0 v1, CP0_STATUS | ||
421 | .set at | ||
422 | or k0, v1, ST0_BEV | ||
423 | .set noat | ||
424 | mtc0 k0, CP0_STATUS | ||
425 | ehb | ||
426 | mtc0 t0, CP0_EBASE | ||
427 | |||
428 | /* Setup status register for running guest in UM */ | ||
429 | .set at | ||
430 | or v1, v1, (ST0_EXL | KSU_USER | ST0_IE) | ||
431 | and v1, v1, ~ST0_CU0 | ||
432 | .set noat | ||
433 | mtc0 v1, CP0_STATUS | ||
434 | ehb | ||
439 | 435 | ||
440 | /* Set Guest EPC */ | 436 | /* Set Guest EPC */ |
441 | LONG_L t0, VCPU_PC(k1) | 437 | LONG_L t0, VCPU_PC(k1) |
442 | mtc0 t0, CP0_EPC | 438 | mtc0 t0, CP0_EPC |
443 | 439 | ||
444 | /* Set the ASID for the Guest Kernel */ | 440 | /* Set the ASID for the Guest Kernel */ |
445 | sll t0, t0, 1 /* with kseg0 @ 0x40000000, kernel */ | 441 | INT_SLL t0, t0, 1 /* with kseg0 @ 0x40000000, kernel */ |
446 | /* addresses shift to 0x80000000 */ | 442 | /* addresses shift to 0x80000000 */ |
447 | bltz t0, 1f /* If kernel */ | 443 | bltz t0, 1f /* If kernel */ |
448 | addiu t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ | 444 | INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ |
449 | addiu t1, k1, VCPU_GUEST_USER_ASID /* else user */ | 445 | INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID /* else user */ |
450 | 1: | 446 | 1: |
451 | /* t1: contains the base of the ASID array, need to get the cpu id */ | 447 | /* t1: contains the base of the ASID array, need to get the cpu id */ |
452 | LONG_L t2, TI_CPU($28) /* smp_processor_id */ | 448 | LONG_L t2, TI_CPU($28) /* smp_processor_id */ |
453 | sll t2, t2, 2 /* x4 */ | 449 | INT_SLL t2, t2, 2 /* x4 */ |
454 | addu t3, t1, t2 | 450 | REG_ADDU t3, t1, t2 |
455 | LONG_L k0, (t3) | 451 | LONG_L k0, (t3) |
456 | andi k0, k0, 0xff | 452 | andi k0, k0, 0xff |
457 | mtc0 k0,CP0_ENTRYHI | 453 | mtc0 k0,CP0_ENTRYHI |
458 | ehb | 454 | ehb |
459 | 455 | ||
460 | /* Disable RDHWR access */ | 456 | /* Disable RDHWR access */ |
461 | mtc0 zero, CP0_HWRENA | 457 | mtc0 zero, CP0_HWRENA |
462 | 458 | ||
463 | /* load the guest context from VCPU and return */ | 459 | /* load the guest context from VCPU and return */ |
464 | LONG_L $0, VCPU_R0(k1) | 460 | LONG_L $0, VCPU_R0(k1) |
465 | LONG_L $1, VCPU_R1(k1) | 461 | LONG_L $1, VCPU_R1(k1) |
466 | LONG_L $2, VCPU_R2(k1) | 462 | LONG_L $2, VCPU_R2(k1) |
467 | LONG_L $3, VCPU_R3(k1) | 463 | LONG_L $3, VCPU_R3(k1) |
468 | LONG_L $4, VCPU_R4(k1) | 464 | LONG_L $4, VCPU_R4(k1) |
469 | LONG_L $5, VCPU_R5(k1) | 465 | LONG_L $5, VCPU_R5(k1) |
470 | LONG_L $6, VCPU_R6(k1) | 466 | LONG_L $6, VCPU_R6(k1) |
471 | LONG_L $7, VCPU_R7(k1) | 467 | LONG_L $7, VCPU_R7(k1) |
472 | LONG_L $8, VCPU_R8(k1) | 468 | LONG_L $8, VCPU_R8(k1) |
473 | LONG_L $9, VCPU_R9(k1) | 469 | LONG_L $9, VCPU_R9(k1) |
474 | LONG_L $10, VCPU_R10(k1) | 470 | LONG_L $10, VCPU_R10(k1) |
475 | LONG_L $11, VCPU_R11(k1) | 471 | LONG_L $11, VCPU_R11(k1) |
476 | LONG_L $12, VCPU_R12(k1) | 472 | LONG_L $12, VCPU_R12(k1) |
477 | LONG_L $13, VCPU_R13(k1) | 473 | LONG_L $13, VCPU_R13(k1) |
478 | LONG_L $14, VCPU_R14(k1) | 474 | LONG_L $14, VCPU_R14(k1) |
479 | LONG_L $15, VCPU_R15(k1) | 475 | LONG_L $15, VCPU_R15(k1) |
480 | LONG_L $16, VCPU_R16(k1) | 476 | LONG_L $16, VCPU_R16(k1) |
481 | LONG_L $17, VCPU_R17(k1) | 477 | LONG_L $17, VCPU_R17(k1) |
482 | LONG_L $18, VCPU_R18(k1) | 478 | LONG_L $18, VCPU_R18(k1) |
483 | LONG_L $19, VCPU_R19(k1) | 479 | LONG_L $19, VCPU_R19(k1) |
484 | LONG_L $20, VCPU_R20(k1) | 480 | LONG_L $20, VCPU_R20(k1) |
485 | LONG_L $21, VCPU_R21(k1) | 481 | LONG_L $21, VCPU_R21(k1) |
486 | LONG_L $22, VCPU_R22(k1) | 482 | LONG_L $22, VCPU_R22(k1) |
487 | LONG_L $23, VCPU_R23(k1) | 483 | LONG_L $23, VCPU_R23(k1) |
488 | LONG_L $24, VCPU_R24(k1) | 484 | LONG_L $24, VCPU_R24(k1) |
489 | LONG_L $25, VCPU_R25(k1) | 485 | LONG_L $25, VCPU_R25(k1) |
490 | 486 | ||
491 | /* $/k1 loaded later */ | 487 | /* $/k1 loaded later */ |
492 | LONG_L $28, VCPU_R28(k1) | 488 | LONG_L $28, VCPU_R28(k1) |
493 | LONG_L $29, VCPU_R29(k1) | 489 | LONG_L $29, VCPU_R29(k1) |
494 | LONG_L $30, VCPU_R30(k1) | 490 | LONG_L $30, VCPU_R30(k1) |
495 | LONG_L $31, VCPU_R31(k1) | 491 | LONG_L $31, VCPU_R31(k1) |
496 | 492 | ||
497 | FEXPORT(__kvm_mips_skip_guest_restore) | 493 | FEXPORT(__kvm_mips_skip_guest_restore) |
498 | LONG_L k0, VCPU_HI(k1) | 494 | LONG_L k0, VCPU_HI(k1) |
499 | mthi k0 | 495 | mthi k0 |
500 | 496 | ||
501 | LONG_L k0, VCPU_LO(k1) | 497 | LONG_L k0, VCPU_LO(k1) |
502 | mtlo k0 | 498 | mtlo k0 |
503 | 499 | ||
504 | LONG_L k0, VCPU_R26(k1) | 500 | LONG_L k0, VCPU_R26(k1) |
505 | LONG_L k1, VCPU_R27(k1) | 501 | LONG_L k1, VCPU_R27(k1) |
506 | 502 | ||
507 | eret | 503 | eret |
508 | 504 | ||
509 | __kvm_mips_return_to_host: | 505 | __kvm_mips_return_to_host: |
510 | /* EBASE is already pointing to Linux */ | 506 | /* EBASE is already pointing to Linux */ |
511 | LONG_L k1, VCPU_HOST_STACK(k1) | 507 | LONG_L k1, VCPU_HOST_STACK(k1) |
512 | addiu k1,k1, -PT_SIZE | 508 | INT_ADDIU k1,k1, -PT_SIZE |
513 | 509 | ||
514 | /* Restore host DDATA_LO */ | 510 | /* Restore host DDATA_LO */ |
515 | LONG_L k0, PT_HOST_USERLOCAL(k1) | 511 | LONG_L k0, PT_HOST_USERLOCAL(k1) |
516 | mtc0 k0, CP0_DDATA_LO | 512 | mtc0 k0, CP0_DDATA_LO |
517 | 513 | ||
518 | /* Restore host ASID */ | 514 | /* Restore host ASID */ |
519 | LONG_L k0, PT_HOST_ASID(sp) | 515 | LONG_L k0, PT_HOST_ASID(sp) |
520 | andi k0, 0xff | 516 | andi k0, 0xff |
521 | mtc0 k0,CP0_ENTRYHI | 517 | mtc0 k0,CP0_ENTRYHI |
522 | ehb | 518 | ehb |
523 | 519 | ||
524 | /* Load context saved on the host stack */ | 520 | /* Load context saved on the host stack */ |
525 | LONG_L $0, PT_R0(k1) | 521 | LONG_L $0, PT_R0(k1) |
526 | LONG_L $1, PT_R1(k1) | 522 | LONG_L $1, PT_R1(k1) |
527 | 523 | ||
528 | /* r2/v0 is the return code, shift it down by 2 (arithmetic) to recover the err code */ | 524 | /* r2/v0 is the return code, shift it down by 2 (arithmetic) |
529 | sra k0, v0, 2 | 525 | * to recover the err code */ |
530 | move $2, k0 | 526 | INT_SRA k0, v0, 2 |
531 | 527 | move $2, k0 | |
532 | LONG_L $3, PT_R3(k1) | 528 | |
533 | LONG_L $4, PT_R4(k1) | 529 | LONG_L $3, PT_R3(k1) |
534 | LONG_L $5, PT_R5(k1) | 530 | LONG_L $4, PT_R4(k1) |
535 | LONG_L $6, PT_R6(k1) | 531 | LONG_L $5, PT_R5(k1) |
536 | LONG_L $7, PT_R7(k1) | 532 | LONG_L $6, PT_R6(k1) |
537 | LONG_L $8, PT_R8(k1) | 533 | LONG_L $7, PT_R7(k1) |
538 | LONG_L $9, PT_R9(k1) | 534 | LONG_L $8, PT_R8(k1) |
539 | LONG_L $10, PT_R10(k1) | 535 | LONG_L $9, PT_R9(k1) |
540 | LONG_L $11, PT_R11(k1) | 536 | LONG_L $10, PT_R10(k1) |
541 | LONG_L $12, PT_R12(k1) | 537 | LONG_L $11, PT_R11(k1) |
542 | LONG_L $13, PT_R13(k1) | 538 | LONG_L $12, PT_R12(k1) |
543 | LONG_L $14, PT_R14(k1) | 539 | LONG_L $13, PT_R13(k1) |
544 | LONG_L $15, PT_R15(k1) | 540 | LONG_L $14, PT_R14(k1) |
545 | LONG_L $16, PT_R16(k1) | 541 | LONG_L $15, PT_R15(k1) |
546 | LONG_L $17, PT_R17(k1) | 542 | LONG_L $16, PT_R16(k1) |
547 | LONG_L $18, PT_R18(k1) | 543 | LONG_L $17, PT_R17(k1) |
548 | LONG_L $19, PT_R19(k1) | 544 | LONG_L $18, PT_R18(k1) |
549 | LONG_L $20, PT_R20(k1) | 545 | LONG_L $19, PT_R19(k1) |
550 | LONG_L $21, PT_R21(k1) | 546 | LONG_L $20, PT_R20(k1) |
551 | LONG_L $22, PT_R22(k1) | 547 | LONG_L $21, PT_R21(k1) |
552 | LONG_L $23, PT_R23(k1) | 548 | LONG_L $22, PT_R22(k1) |
553 | LONG_L $24, PT_R24(k1) | 549 | LONG_L $23, PT_R23(k1) |
554 | LONG_L $25, PT_R25(k1) | 550 | LONG_L $24, PT_R24(k1) |
555 | 551 | LONG_L $25, PT_R25(k1) | |
556 | /* Host k0/k1 were not saved */ | 552 | |
557 | 553 | /* Host k0/k1 were not saved */ | |
558 | LONG_L $28, PT_R28(k1) | 554 | |
559 | LONG_L $29, PT_R29(k1) | 555 | LONG_L $28, PT_R28(k1) |
560 | LONG_L $30, PT_R30(k1) | 556 | LONG_L $29, PT_R29(k1) |
561 | 557 | LONG_L $30, PT_R30(k1) | |
562 | LONG_L k0, PT_HI(k1) | 558 | |
563 | mthi k0 | 559 | LONG_L k0, PT_HI(k1) |
564 | 560 | mthi k0 | |
565 | LONG_L k0, PT_LO(k1) | 561 | |
566 | mtlo k0 | 562 | LONG_L k0, PT_LO(k1) |
567 | 563 | mtlo k0 | |
568 | /* Restore RDHWR access */ | 564 | |
569 | la k0, 0x2000000F | 565 | /* Restore RDHWR access */ |
570 | mtc0 k0, CP0_HWRENA | 566 | PTR_LI k0, 0x2000000F |
571 | 567 | mtc0 k0, CP0_HWRENA | |
572 | 568 | ||
573 | /* Restore RA, which is the address we will return to */ | 569 | |
574 | LONG_L ra, PT_R31(k1) | 570 | /* Restore RA, which is the address we will return to */ |
575 | j ra | 571 | LONG_L ra, PT_R31(k1) |
576 | nop | 572 | j ra |
577 | 573 | nop | |
578 | .set pop | 574 | |
579 | VECTOR_END(MIPSX(GuestExceptionEnd)) | 575 | VECTOR_END(MIPSX(GuestExceptionEnd)) |
580 | .end MIPSX(GuestException) | 576 | .end MIPSX(GuestException) |
581 | 577 | ||
@@ -627,24 +623,23 @@ MIPSX(exceptions): | |||
627 | 623 | ||
628 | #define HW_SYNCI_Step $1 | 624 | #define HW_SYNCI_Step $1 |
629 | LEAF(MIPSX(SyncICache)) | 625 | LEAF(MIPSX(SyncICache)) |
630 | .set push | 626 | .set push |
631 | .set mips32r2 | 627 | .set mips32r2 |
632 | beq a1, zero, 20f | 628 | beq a1, zero, 20f |
633 | nop | 629 | nop |
634 | addu a1, a0, a1 | 630 | REG_ADDU a1, a0, a1 |
635 | rdhwr v0, HW_SYNCI_Step | 631 | rdhwr v0, HW_SYNCI_Step |
636 | beq v0, zero, 20f | 632 | beq v0, zero, 20f |
637 | nop | 633 | nop |
638 | |||
639 | 10: | 634 | 10: |
640 | synci 0(a0) | 635 | synci 0(a0) |
641 | addu a0, a0, v0 | 636 | REG_ADDU a0, a0, v0 |
642 | sltu v1, a0, a1 | 637 | sltu v1, a0, a1 |
643 | bne v1, zero, 10b | 638 | bne v1, zero, 10b |
644 | nop | 639 | nop |
645 | sync | 640 | sync |
646 | 20: | 641 | 20: |
647 | jr.hb ra | 642 | jr.hb ra |
648 | nop | 643 | nop |
649 | .set pop | 644 | .set pop |
650 | END(MIPSX(SyncICache)) | 645 | END(MIPSX(SyncICache)) |
diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c index dd203e59e6fd..a7b044536de4 100644 --- a/arch/mips/kvm/kvm_mips.c +++ b/arch/mips/kvm/kvm_mips.c | |||
@@ -208,6 +208,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | |||
208 | return 0; | 208 | return 0; |
209 | } | 209 | } |
210 | 210 | ||
211 | void kvm_arch_memslots_updated(struct kvm *kvm) | ||
212 | { | ||
213 | } | ||
214 | |||
211 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 215 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
212 | struct kvm_memory_slot *memslot, | 216 | struct kvm_memory_slot *memslot, |
213 | struct kvm_userspace_memory_region *mem, | 217 | struct kvm_userspace_memory_region *mem, |
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 08891d07aeb6..fa19e2f1a874 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h | |||
@@ -334,6 +334,27 @@ static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) | |||
334 | return r; | 334 | return r; |
335 | } | 335 | } |
336 | 336 | ||
337 | /* | ||
338 | * Like kvmppc_get_last_inst(), but for fetching a sc instruction. | ||
339 | * Because the sc instruction sets SRR0 to point to the following | ||
340 | * instruction, we have to fetch from pc - 4. | ||
341 | */ | ||
342 | static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu) | ||
343 | { | ||
344 | ulong pc = kvmppc_get_pc(vcpu) - 4; | ||
345 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); | ||
346 | u32 r; | ||
347 | |||
348 | /* Load the instruction manually if it failed to do so in the | ||
349 | * exit path */ | ||
350 | if (svcpu->last_inst == KVM_INST_FETCH_FAILED) | ||
351 | kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false); | ||
352 | |||
353 | r = svcpu->last_inst; | ||
354 | svcpu_put(svcpu); | ||
355 | return r; | ||
356 | } | ||
357 | |||
337 | static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) | 358 | static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) |
338 | { | 359 | { |
339 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); | 360 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); |
@@ -446,6 +467,23 @@ static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) | |||
446 | return vcpu->arch.last_inst; | 467 | return vcpu->arch.last_inst; |
447 | } | 468 | } |
448 | 469 | ||
470 | /* | ||
471 | * Like kvmppc_get_last_inst(), but for fetching a sc instruction. | ||
472 | * Because the sc instruction sets SRR0 to point to the following | ||
473 | * instruction, we have to fetch from pc - 4. | ||
474 | */ | ||
475 | static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu) | ||
476 | { | ||
477 | ulong pc = kvmppc_get_pc(vcpu) - 4; | ||
478 | |||
479 | /* Load the instruction manually if it failed to do so in the | ||
480 | * exit path */ | ||
481 | if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) | ||
482 | kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false); | ||
483 | |||
484 | return vcpu->arch.last_inst; | ||
485 | } | ||
486 | |||
449 | static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) | 487 | static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) |
450 | { | 488 | { |
451 | return vcpu->arch.fault_dar; | 489 | return vcpu->arch.fault_dar; |
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index a1ecb14e4442..86d638a3b359 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h | |||
@@ -37,7 +37,7 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) | |||
37 | 37 | ||
38 | #ifdef CONFIG_KVM_BOOK3S_64_HV | 38 | #ifdef CONFIG_KVM_BOOK3S_64_HV |
39 | #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ | 39 | #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ |
40 | extern int kvm_hpt_order; /* order of preallocated HPTs */ | 40 | extern unsigned long kvm_rma_pages; |
41 | #endif | 41 | #endif |
42 | 42 | ||
43 | #define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */ | 43 | #define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */ |
@@ -100,7 +100,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, | |||
100 | /* (masks depend on page size) */ | 100 | /* (masks depend on page size) */ |
101 | rb |= 0x1000; /* page encoding in LP field */ | 101 | rb |= 0x1000; /* page encoding in LP field */ |
102 | rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */ | 102 | rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */ |
103 | rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */ | 103 | rb |= ((va_low << 4) & 0xf0); /* AVAL field (P7 doesn't seem to care) */ |
104 | } | 104 | } |
105 | } else { | 105 | } else { |
106 | /* 4kB page */ | 106 | /* 4kB page */ |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index af326cde7cb6..33283532e9d8 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -183,13 +183,9 @@ struct kvmppc_spapr_tce_table { | |||
183 | struct page *pages[0]; | 183 | struct page *pages[0]; |
184 | }; | 184 | }; |
185 | 185 | ||
186 | struct kvmppc_linear_info { | 186 | struct kvm_rma_info { |
187 | void *base_virt; | 187 | atomic_t use_count; |
188 | unsigned long base_pfn; | 188 | unsigned long base_pfn; |
189 | unsigned long npages; | ||
190 | struct list_head list; | ||
191 | atomic_t use_count; | ||
192 | int type; | ||
193 | }; | 189 | }; |
194 | 190 | ||
195 | /* XICS components, defined in book3s_xics.c */ | 191 | /* XICS components, defined in book3s_xics.c */ |
@@ -246,7 +242,7 @@ struct kvm_arch { | |||
246 | int tlbie_lock; | 242 | int tlbie_lock; |
247 | unsigned long lpcr; | 243 | unsigned long lpcr; |
248 | unsigned long rmor; | 244 | unsigned long rmor; |
249 | struct kvmppc_linear_info *rma; | 245 | struct kvm_rma_info *rma; |
250 | unsigned long vrma_slb_v; | 246 | unsigned long vrma_slb_v; |
251 | int rma_setup_done; | 247 | int rma_setup_done; |
252 | int using_mmu_notifiers; | 248 | int using_mmu_notifiers; |
@@ -259,7 +255,7 @@ struct kvm_arch { | |||
259 | spinlock_t slot_phys_lock; | 255 | spinlock_t slot_phys_lock; |
260 | cpumask_t need_tlb_flush; | 256 | cpumask_t need_tlb_flush; |
261 | struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; | 257 | struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; |
262 | struct kvmppc_linear_info *hpt_li; | 258 | int hpt_cma_alloc; |
263 | #endif /* CONFIG_KVM_BOOK3S_64_HV */ | 259 | #endif /* CONFIG_KVM_BOOK3S_64_HV */ |
264 | #ifdef CONFIG_PPC_BOOK3S_64 | 260 | #ifdef CONFIG_PPC_BOOK3S_64 |
265 | struct list_head spapr_tce_tables; | 261 | struct list_head spapr_tce_tables; |
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index a5287fe03d77..b15554a26c20 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
@@ -137,10 +137,10 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | |||
137 | unsigned long ioba, unsigned long tce); | 137 | unsigned long ioba, unsigned long tce); |
138 | extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, | 138 | extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, |
139 | struct kvm_allocate_rma *rma); | 139 | struct kvm_allocate_rma *rma); |
140 | extern struct kvmppc_linear_info *kvm_alloc_rma(void); | 140 | extern struct kvm_rma_info *kvm_alloc_rma(void); |
141 | extern void kvm_release_rma(struct kvmppc_linear_info *ri); | 141 | extern void kvm_release_rma(struct kvm_rma_info *ri); |
142 | extern struct kvmppc_linear_info *kvm_alloc_hpt(void); | 142 | extern struct page *kvm_alloc_hpt(unsigned long nr_pages); |
143 | extern void kvm_release_hpt(struct kvmppc_linear_info *li); | 143 | extern void kvm_release_hpt(struct page *page, unsigned long nr_pages); |
144 | extern int kvmppc_core_init_vm(struct kvm *kvm); | 144 | extern int kvmppc_core_init_vm(struct kvm *kvm); |
145 | extern void kvmppc_core_destroy_vm(struct kvm *kvm); | 145 | extern void kvmppc_core_destroy_vm(struct kvm *kvm); |
146 | extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free, | 146 | extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free, |
@@ -261,6 +261,7 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); | |||
261 | struct openpic; | 261 | struct openpic; |
262 | 262 | ||
263 | #ifdef CONFIG_KVM_BOOK3S_64_HV | 263 | #ifdef CONFIG_KVM_BOOK3S_64_HV |
264 | extern void kvm_cma_reserve(void) __init; | ||
264 | static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) | 265 | static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) |
265 | { | 266 | { |
266 | paca[cpu].kvm_hstate.xics_phys = addr; | 267 | paca[cpu].kvm_hstate.xics_phys = addr; |
@@ -281,13 +282,12 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) | |||
281 | } | 282 | } |
282 | 283 | ||
283 | extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu); | 284 | extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu); |
284 | extern void kvm_linear_init(void); | ||
285 | 285 | ||
286 | #else | 286 | #else |
287 | static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) | 287 | static inline void __init kvm_cma_reserve(void) |
288 | {} | 288 | {} |
289 | 289 | ||
290 | static inline void kvm_linear_init(void) | 290 | static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) |
291 | {} | 291 | {} |
292 | 292 | ||
293 | static inline u32 kvmppc_get_xics_latch(void) | 293 | static inline u32 kvmppc_get_xics_latch(void) |
@@ -394,10 +394,15 @@ static inline void kvmppc_mmu_flush_icache(pfn_t pfn) | |||
394 | } | 394 | } |
395 | } | 395 | } |
396 | 396 | ||
397 | /* Please call after prepare_to_enter. This function puts the lazy ee state | 397 | /* |
398 | back to normal mode, without actually enabling interrupts. */ | 398 | * Please call after prepare_to_enter. This function puts the lazy ee and irq |
399 | static inline void kvmppc_lazy_ee_enable(void) | 399 | * disabled tracking state back to normal mode, without actually enabling |
400 | * interrupts. | ||
401 | */ | ||
402 | static inline void kvmppc_fix_ee_before_entry(void) | ||
400 | { | 403 | { |
404 | trace_hardirqs_on(); | ||
405 | |||
401 | #ifdef CONFIG_PPC64 | 406 | #ifdef CONFIG_PPC64 |
402 | /* Only need to enable IRQs by hard enabling them after this */ | 407 | /* Only need to enable IRQs by hard enabling them after this */ |
403 | local_paca->irq_happened = 0; | 408 | local_paca->irq_happened = 0; |
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 8207459efe56..d8958be5f31a 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -454,6 +454,7 @@ int main(void) | |||
454 | DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2)); | 454 | DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2)); |
455 | DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3)); | 455 | DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3)); |
456 | #endif | 456 | #endif |
457 | DEFINE(VCPU_SHARED_SPRG3, offsetof(struct kvm_vcpu_arch_shared, sprg3)); | ||
457 | DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4)); | 458 | DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4)); |
458 | DEFINE(VCPU_SHARED_SPRG5, offsetof(struct kvm_vcpu_arch_shared, sprg5)); | 459 | DEFINE(VCPU_SHARED_SPRG5, offsetof(struct kvm_vcpu_arch_shared, sprg5)); |
459 | DEFINE(VCPU_SHARED_SPRG6, offsetof(struct kvm_vcpu_arch_shared, sprg6)); | 460 | DEFINE(VCPU_SHARED_SPRG6, offsetof(struct kvm_vcpu_arch_shared, sprg6)); |
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 389fb8077cc9..fe6a58c9f0b7 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c | |||
@@ -229,6 +229,8 @@ void __init early_setup(unsigned long dt_ptr) | |||
229 | /* Initialize the hash table or TLB handling */ | 229 | /* Initialize the hash table or TLB handling */ |
230 | early_init_mmu(); | 230 | early_init_mmu(); |
231 | 231 | ||
232 | kvm_cma_reserve(); | ||
233 | |||
232 | /* | 234 | /* |
233 | * Reserve any gigantic pages requested on the command line. | 235 | * Reserve any gigantic pages requested on the command line. |
234 | * memblock needs to have been initialized by the time this is | 236 | * memblock needs to have been initialized by the time this is |
@@ -609,8 +611,6 @@ void __init setup_arch(char **cmdline_p) | |||
609 | /* Initialize the MMU context management stuff */ | 611 | /* Initialize the MMU context management stuff */ |
610 | mmu_context_init(); | 612 | mmu_context_init(); |
611 | 613 | ||
612 | kvm_linear_init(); | ||
613 | |||
614 | /* Interrupt code needs to be 64K-aligned */ | 614 | /* Interrupt code needs to be 64K-aligned */ |
615 | if ((unsigned long)_stext & 0xffff) | 615 | if ((unsigned long)_stext & 0xffff) |
616 | panic("Kernelbase not 64K-aligned (0x%lx)!\n", | 616 | panic("Kernelbase not 64K-aligned (0x%lx)!\n", |
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index eb643f862579..ffaef2cb101a 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig | |||
@@ -72,6 +72,7 @@ config KVM_BOOK3S_64_HV | |||
72 | bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" | 72 | bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" |
73 | depends on KVM_BOOK3S_64 | 73 | depends on KVM_BOOK3S_64 |
74 | select MMU_NOTIFIER | 74 | select MMU_NOTIFIER |
75 | select CMA | ||
75 | ---help--- | 76 | ---help--- |
76 | Support running unmodified book3s_64 guest kernels in | 77 | Support running unmodified book3s_64 guest kernels in |
77 | virtual machines on POWER7 and PPC970 processors that have | 78 | virtual machines on POWER7 and PPC970 processors that have |
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 008cd856c5b5..6646c952c5e3 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile | |||
@@ -81,6 +81,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ | |||
81 | book3s_64_vio_hv.o \ | 81 | book3s_64_vio_hv.o \ |
82 | book3s_hv_ras.o \ | 82 | book3s_hv_ras.o \ |
83 | book3s_hv_builtin.o \ | 83 | book3s_hv_builtin.o \ |
84 | book3s_hv_cma.o \ | ||
84 | $(kvm-book3s_64-builtin-xics-objs-y) | 85 | $(kvm-book3s_64-builtin-xics-objs-y) |
85 | 86 | ||
86 | kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ | 87 | kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ |
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 739bfbadb85e..7e345e00661a 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c | |||
@@ -182,10 +182,13 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, | |||
182 | hva_t ptegp; | 182 | hva_t ptegp; |
183 | u64 pteg[16]; | 183 | u64 pteg[16]; |
184 | u64 avpn = 0; | 184 | u64 avpn = 0; |
185 | u64 v, r; | ||
186 | u64 v_val, v_mask; | ||
187 | u64 eaddr_mask; | ||
185 | int i; | 188 | int i; |
186 | u8 key = 0; | 189 | u8 pp, key = 0; |
187 | bool found = false; | 190 | bool found = false; |
188 | int second = 0; | 191 | bool second = false; |
189 | ulong mp_ea = vcpu->arch.magic_page_ea; | 192 | ulong mp_ea = vcpu->arch.magic_page_ea; |
190 | 193 | ||
191 | /* Magic page override */ | 194 | /* Magic page override */ |
@@ -208,8 +211,16 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, | |||
208 | goto no_seg_found; | 211 | goto no_seg_found; |
209 | 212 | ||
210 | avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr); | 213 | avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr); |
214 | v_val = avpn & HPTE_V_AVPN; | ||
215 | |||
211 | if (slbe->tb) | 216 | if (slbe->tb) |
212 | avpn |= SLB_VSID_B_1T; | 217 | v_val |= SLB_VSID_B_1T; |
218 | if (slbe->large) | ||
219 | v_val |= HPTE_V_LARGE; | ||
220 | v_val |= HPTE_V_VALID; | ||
221 | |||
222 | v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID | | ||
223 | HPTE_V_SECONDARY; | ||
213 | 224 | ||
214 | do_second: | 225 | do_second: |
215 | ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second); | 226 | ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second); |
@@ -227,91 +238,74 @@ do_second: | |||
227 | key = 4; | 238 | key = 4; |
228 | 239 | ||
229 | for (i=0; i<16; i+=2) { | 240 | for (i=0; i<16; i+=2) { |
230 | u64 v = pteg[i]; | 241 | /* Check all relevant fields of 1st dword */ |
231 | u64 r = pteg[i+1]; | 242 | if ((pteg[i] & v_mask) == v_val) { |
232 | |||
233 | /* Valid check */ | ||
234 | if (!(v & HPTE_V_VALID)) | ||
235 | continue; | ||
236 | /* Hash check */ | ||
237 | if ((v & HPTE_V_SECONDARY) != second) | ||
238 | continue; | ||
239 | |||
240 | /* AVPN compare */ | ||
241 | if (HPTE_V_COMPARE(avpn, v)) { | ||
242 | u8 pp = (r & HPTE_R_PP) | key; | ||
243 | int eaddr_mask = 0xFFF; | ||
244 | |||
245 | gpte->eaddr = eaddr; | ||
246 | gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, | ||
247 | eaddr, | ||
248 | data); | ||
249 | if (slbe->large) | ||
250 | eaddr_mask = 0xFFFFFF; | ||
251 | gpte->raddr = (r & HPTE_R_RPN) | (eaddr & eaddr_mask); | ||
252 | gpte->may_execute = ((r & HPTE_R_N) ? false : true); | ||
253 | gpte->may_read = false; | ||
254 | gpte->may_write = false; | ||
255 | |||
256 | switch (pp) { | ||
257 | case 0: | ||
258 | case 1: | ||
259 | case 2: | ||
260 | case 6: | ||
261 | gpte->may_write = true; | ||
262 | /* fall through */ | ||
263 | case 3: | ||
264 | case 5: | ||
265 | case 7: | ||
266 | gpte->may_read = true; | ||
267 | break; | ||
268 | } | ||
269 | |||
270 | dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx " | ||
271 | "-> 0x%lx\n", | ||
272 | eaddr, avpn, gpte->vpage, gpte->raddr); | ||
273 | found = true; | 243 | found = true; |
274 | break; | 244 | break; |
275 | } | 245 | } |
276 | } | 246 | } |
277 | 247 | ||
278 | /* Update PTE R and C bits, so the guest's swapper knows we used the | 248 | if (!found) { |
279 | * page */ | 249 | if (second) |
280 | if (found) { | 250 | goto no_page_found; |
281 | u32 oldr = pteg[i+1]; | 251 | v_val |= HPTE_V_SECONDARY; |
252 | second = true; | ||
253 | goto do_second; | ||
254 | } | ||
282 | 255 | ||
283 | if (gpte->may_read) { | 256 | v = pteg[i]; |
284 | /* Set the accessed flag */ | 257 | r = pteg[i+1]; |
285 | pteg[i+1] |= HPTE_R_R; | 258 | pp = (r & HPTE_R_PP) | key; |
286 | } | 259 | eaddr_mask = 0xFFF; |
287 | if (gpte->may_write) { | 260 | |
288 | /* Set the dirty flag */ | 261 | gpte->eaddr = eaddr; |
289 | pteg[i+1] |= HPTE_R_C; | 262 | gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data); |
290 | } else { | 263 | if (slbe->large) |
291 | dprintk("KVM: Mapping read-only page!\n"); | 264 | eaddr_mask = 0xFFFFFF; |
292 | } | 265 | gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask); |
266 | gpte->may_execute = ((r & HPTE_R_N) ? false : true); | ||
267 | gpte->may_read = false; | ||
268 | gpte->may_write = false; | ||
269 | |||
270 | switch (pp) { | ||
271 | case 0: | ||
272 | case 1: | ||
273 | case 2: | ||
274 | case 6: | ||
275 | gpte->may_write = true; | ||
276 | /* fall through */ | ||
277 | case 3: | ||
278 | case 5: | ||
279 | case 7: | ||
280 | gpte->may_read = true; | ||
281 | break; | ||
282 | } | ||
293 | 283 | ||
294 | /* Write back into the PTEG */ | 284 | dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx " |
295 | if (pteg[i+1] != oldr) | 285 | "-> 0x%lx\n", |
296 | copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); | 286 | eaddr, avpn, gpte->vpage, gpte->raddr); |
297 | 287 | ||
298 | if (!gpte->may_read) | 288 | /* Update PTE R and C bits, so the guest's swapper knows we used the |
299 | return -EPERM; | 289 | * page */ |
300 | return 0; | 290 | if (gpte->may_read) { |
301 | } else { | 291 | /* Set the accessed flag */ |
302 | dprintk("KVM MMU: No PTE found (ea=0x%lx sdr1=0x%llx " | 292 | r |= HPTE_R_R; |
303 | "ptegp=0x%lx)\n", | 293 | } |
304 | eaddr, to_book3s(vcpu)->sdr1, ptegp); | 294 | if (data && gpte->may_write) { |
305 | for (i = 0; i < 16; i += 2) | 295 | /* Set the dirty flag -- XXX even if not writing */ |
306 | dprintk(" %02d: 0x%llx - 0x%llx (0x%llx)\n", | 296 | r |= HPTE_R_C; |
307 | i, pteg[i], pteg[i+1], avpn); | 297 | } |
308 | 298 | ||
309 | if (!second) { | 299 | /* Write back into the PTEG */ |
310 | second = HPTE_V_SECONDARY; | 300 | if (pteg[i+1] != r) { |
311 | goto do_second; | 301 | pteg[i+1] = r; |
312 | } | 302 | copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); |
313 | } | 303 | } |
314 | 304 | ||
305 | if (!gpte->may_read) | ||
306 | return -EPERM; | ||
307 | return 0; | ||
308 | |||
315 | no_page_found: | 309 | no_page_found: |
316 | return -ENOENT; | 310 | return -ENOENT; |
317 | 311 | ||
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 710d31317d81..043eec8461e7 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
@@ -37,6 +37,8 @@ | |||
37 | #include <asm/ppc-opcode.h> | 37 | #include <asm/ppc-opcode.h> |
38 | #include <asm/cputable.h> | 38 | #include <asm/cputable.h> |
39 | 39 | ||
40 | #include "book3s_hv_cma.h" | ||
41 | |||
40 | /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ | 42 | /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ |
41 | #define MAX_LPID_970 63 | 43 | #define MAX_LPID_970 63 |
42 | 44 | ||
@@ -52,8 +54,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) | |||
52 | { | 54 | { |
53 | unsigned long hpt; | 55 | unsigned long hpt; |
54 | struct revmap_entry *rev; | 56 | struct revmap_entry *rev; |
55 | struct kvmppc_linear_info *li; | 57 | struct page *page = NULL; |
56 | long order = kvm_hpt_order; | 58 | long order = KVM_DEFAULT_HPT_ORDER; |
57 | 59 | ||
58 | if (htab_orderp) { | 60 | if (htab_orderp) { |
59 | order = *htab_orderp; | 61 | order = *htab_orderp; |
@@ -61,26 +63,23 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) | |||
61 | order = PPC_MIN_HPT_ORDER; | 63 | order = PPC_MIN_HPT_ORDER; |
62 | } | 64 | } |
63 | 65 | ||
66 | kvm->arch.hpt_cma_alloc = 0; | ||
64 | /* | 67 | /* |
65 | * If the user wants a different size from default, | ||
66 | * try first to allocate it from the kernel page allocator. | 68 | * try first to allocate it from the kernel page allocator. |
69 | * We keep the CMA reserved for failed allocation. | ||
67 | */ | 70 | */ |
68 | hpt = 0; | 71 | hpt = __get_free_pages(GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT | |
69 | if (order != kvm_hpt_order) { | 72 | __GFP_NOWARN, order - PAGE_SHIFT); |
70 | hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT| | ||
71 | __GFP_NOWARN, order - PAGE_SHIFT); | ||
72 | if (!hpt) | ||
73 | --order; | ||
74 | } | ||
75 | 73 | ||
76 | /* Next try to allocate from the preallocated pool */ | 74 | /* Next try to allocate from the preallocated pool */ |
77 | if (!hpt) { | 75 | if (!hpt) { |
78 | li = kvm_alloc_hpt(); | 76 | VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER); |
79 | if (li) { | 77 | page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT)); |
80 | hpt = (ulong)li->base_virt; | 78 | if (page) { |
81 | kvm->arch.hpt_li = li; | 79 | hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); |
82 | order = kvm_hpt_order; | 80 | kvm->arch.hpt_cma_alloc = 1; |
83 | } | 81 | } else |
82 | --order; | ||
84 | } | 83 | } |
85 | 84 | ||
86 | /* Lastly try successively smaller sizes from the page allocator */ | 85 | /* Lastly try successively smaller sizes from the page allocator */ |
@@ -118,8 +117,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) | |||
118 | return 0; | 117 | return 0; |
119 | 118 | ||
120 | out_freehpt: | 119 | out_freehpt: |
121 | if (kvm->arch.hpt_li) | 120 | if (kvm->arch.hpt_cma_alloc) |
122 | kvm_release_hpt(kvm->arch.hpt_li); | 121 | kvm_release_hpt(page, 1 << (order - PAGE_SHIFT)); |
123 | else | 122 | else |
124 | free_pages(hpt, order - PAGE_SHIFT); | 123 | free_pages(hpt, order - PAGE_SHIFT); |
125 | return -ENOMEM; | 124 | return -ENOMEM; |
@@ -165,8 +164,9 @@ void kvmppc_free_hpt(struct kvm *kvm) | |||
165 | { | 164 | { |
166 | kvmppc_free_lpid(kvm->arch.lpid); | 165 | kvmppc_free_lpid(kvm->arch.lpid); |
167 | vfree(kvm->arch.revmap); | 166 | vfree(kvm->arch.revmap); |
168 | if (kvm->arch.hpt_li) | 167 | if (kvm->arch.hpt_cma_alloc) |
169 | kvm_release_hpt(kvm->arch.hpt_li); | 168 | kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt), |
169 | 1 << (kvm->arch.hpt_order - PAGE_SHIFT)); | ||
170 | else | 170 | else |
171 | free_pages(kvm->arch.hpt_virt, | 171 | free_pages(kvm->arch.hpt_virt, |
172 | kvm->arch.hpt_order - PAGE_SHIFT); | 172 | kvm->arch.hpt_order - PAGE_SHIFT); |
@@ -1579,7 +1579,7 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf) | |||
1579 | ctx->first_pass = 1; | 1579 | ctx->first_pass = 1; |
1580 | 1580 | ||
1581 | rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY; | 1581 | rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY; |
1582 | ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag); | 1582 | ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC); |
1583 | if (ret < 0) { | 1583 | if (ret < 0) { |
1584 | kvm_put_kvm(kvm); | 1584 | kvm_put_kvm(kvm); |
1585 | return ret; | 1585 | return ret; |
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index b2d3f3b2de72..54cf9bc94dad 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c | |||
@@ -136,7 +136,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | |||
136 | mutex_unlock(&kvm->lock); | 136 | mutex_unlock(&kvm->lock); |
137 | 137 | ||
138 | return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, | 138 | return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, |
139 | stt, O_RDWR); | 139 | stt, O_RDWR | O_CLOEXEC); |
140 | 140 | ||
141 | fail: | 141 | fail: |
142 | if (stt) { | 142 | if (stt) { |
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 1f6344c4408d..360ce68c9809 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c | |||
@@ -458,6 +458,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) | |||
458 | case SPRN_PMC4_GEKKO: | 458 | case SPRN_PMC4_GEKKO: |
459 | case SPRN_WPAR_GEKKO: | 459 | case SPRN_WPAR_GEKKO: |
460 | case SPRN_MSSSR0: | 460 | case SPRN_MSSSR0: |
461 | case SPRN_DABR: | ||
461 | break; | 462 | break; |
462 | unprivileged: | 463 | unprivileged: |
463 | default: | 464 | default: |
@@ -555,6 +556,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) | |||
555 | case SPRN_PMC4_GEKKO: | 556 | case SPRN_PMC4_GEKKO: |
556 | case SPRN_WPAR_GEKKO: | 557 | case SPRN_WPAR_GEKKO: |
557 | case SPRN_MSSSR0: | 558 | case SPRN_MSSSR0: |
559 | case SPRN_DABR: | ||
558 | *spr_val = 0; | 560 | *spr_val = 0; |
559 | break; | 561 | break; |
560 | default: | 562 | default: |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7629cd3eb91a..b0ee3bc9ca76 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
@@ -680,13 +680,12 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
680 | } | 680 | } |
681 | 681 | ||
682 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | 682 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, |
683 | struct kvm_sregs *sregs) | 683 | struct kvm_sregs *sregs) |
684 | { | 684 | { |
685 | int i; | 685 | int i; |
686 | 686 | ||
687 | sregs->pvr = vcpu->arch.pvr; | ||
688 | |||
689 | memset(sregs, 0, sizeof(struct kvm_sregs)); | 687 | memset(sregs, 0, sizeof(struct kvm_sregs)); |
688 | sregs->pvr = vcpu->arch.pvr; | ||
690 | for (i = 0; i < vcpu->arch.slb_max; i++) { | 689 | for (i = 0; i < vcpu->arch.slb_max; i++) { |
691 | sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige; | 690 | sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige; |
692 | sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv; | 691 | sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv; |
@@ -696,7 +695,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
696 | } | 695 | } |
697 | 696 | ||
698 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | 697 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, |
699 | struct kvm_sregs *sregs) | 698 | struct kvm_sregs *sregs) |
700 | { | 699 | { |
701 | int i, j; | 700 | int i, j; |
702 | 701 | ||
@@ -1511,10 +1510,10 @@ static inline int lpcr_rmls(unsigned long rma_size) | |||
1511 | 1510 | ||
1512 | static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 1511 | static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
1513 | { | 1512 | { |
1514 | struct kvmppc_linear_info *ri = vma->vm_file->private_data; | ||
1515 | struct page *page; | 1513 | struct page *page; |
1514 | struct kvm_rma_info *ri = vma->vm_file->private_data; | ||
1516 | 1515 | ||
1517 | if (vmf->pgoff >= ri->npages) | 1516 | if (vmf->pgoff >= kvm_rma_pages) |
1518 | return VM_FAULT_SIGBUS; | 1517 | return VM_FAULT_SIGBUS; |
1519 | 1518 | ||
1520 | page = pfn_to_page(ri->base_pfn + vmf->pgoff); | 1519 | page = pfn_to_page(ri->base_pfn + vmf->pgoff); |
@@ -1536,7 +1535,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma) | |||
1536 | 1535 | ||
1537 | static int kvm_rma_release(struct inode *inode, struct file *filp) | 1536 | static int kvm_rma_release(struct inode *inode, struct file *filp) |
1538 | { | 1537 | { |
1539 | struct kvmppc_linear_info *ri = filp->private_data; | 1538 | struct kvm_rma_info *ri = filp->private_data; |
1540 | 1539 | ||
1541 | kvm_release_rma(ri); | 1540 | kvm_release_rma(ri); |
1542 | return 0; | 1541 | return 0; |
@@ -1549,18 +1548,27 @@ static const struct file_operations kvm_rma_fops = { | |||
1549 | 1548 | ||
1550 | long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) | 1549 | long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) |
1551 | { | 1550 | { |
1552 | struct kvmppc_linear_info *ri; | ||
1553 | long fd; | 1551 | long fd; |
1552 | struct kvm_rma_info *ri; | ||
1553 | /* | ||
1554 | * Only do this on PPC970 in HV mode | ||
1555 | */ | ||
1556 | if (!cpu_has_feature(CPU_FTR_HVMODE) || | ||
1557 | !cpu_has_feature(CPU_FTR_ARCH_201)) | ||
1558 | return -EINVAL; | ||
1559 | |||
1560 | if (!kvm_rma_pages) | ||
1561 | return -EINVAL; | ||
1554 | 1562 | ||
1555 | ri = kvm_alloc_rma(); | 1563 | ri = kvm_alloc_rma(); |
1556 | if (!ri) | 1564 | if (!ri) |
1557 | return -ENOMEM; | 1565 | return -ENOMEM; |
1558 | 1566 | ||
1559 | fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR); | 1567 | fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR | O_CLOEXEC); |
1560 | if (fd < 0) | 1568 | if (fd < 0) |
1561 | kvm_release_rma(ri); | 1569 | kvm_release_rma(ri); |
1562 | 1570 | ||
1563 | ret->rma_size = ri->npages << PAGE_SHIFT; | 1571 | ret->rma_size = kvm_rma_pages << PAGE_SHIFT; |
1564 | return fd; | 1572 | return fd; |
1565 | } | 1573 | } |
1566 | 1574 | ||
@@ -1725,7 +1733,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) | |||
1725 | { | 1733 | { |
1726 | int err = 0; | 1734 | int err = 0; |
1727 | struct kvm *kvm = vcpu->kvm; | 1735 | struct kvm *kvm = vcpu->kvm; |
1728 | struct kvmppc_linear_info *ri = NULL; | 1736 | struct kvm_rma_info *ri = NULL; |
1729 | unsigned long hva; | 1737 | unsigned long hva; |
1730 | struct kvm_memory_slot *memslot; | 1738 | struct kvm_memory_slot *memslot; |
1731 | struct vm_area_struct *vma; | 1739 | struct vm_area_struct *vma; |
@@ -1803,7 +1811,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) | |||
1803 | 1811 | ||
1804 | } else { | 1812 | } else { |
1805 | /* Set up to use an RMO region */ | 1813 | /* Set up to use an RMO region */ |
1806 | rma_size = ri->npages; | 1814 | rma_size = kvm_rma_pages; |
1807 | if (rma_size > memslot->npages) | 1815 | if (rma_size > memslot->npages) |
1808 | rma_size = memslot->npages; | 1816 | rma_size = memslot->npages; |
1809 | rma_size <<= PAGE_SHIFT; | 1817 | rma_size <<= PAGE_SHIFT; |
@@ -1831,14 +1839,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) | |||
1831 | /* POWER7 */ | 1839 | /* POWER7 */ |
1832 | lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L); | 1840 | lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L); |
1833 | lpcr |= rmls << LPCR_RMLS_SH; | 1841 | lpcr |= rmls << LPCR_RMLS_SH; |
1834 | kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT; | 1842 | kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT; |
1835 | } | 1843 | } |
1836 | kvm->arch.lpcr = lpcr; | 1844 | kvm->arch.lpcr = lpcr; |
1837 | pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n", | 1845 | pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n", |
1838 | ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); | 1846 | ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); |
1839 | 1847 | ||
1840 | /* Initialize phys addrs of pages in RMO */ | 1848 | /* Initialize phys addrs of pages in RMO */ |
1841 | npages = ri->npages; | 1849 | npages = kvm_rma_pages; |
1842 | porder = __ilog2(npages); | 1850 | porder = __ilog2(npages); |
1843 | physp = memslot->arch.slot_phys; | 1851 | physp = memslot->arch.slot_phys; |
1844 | if (physp) { | 1852 | if (physp) { |
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index ec0a9e5de100..8cd0daebb82d 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c | |||
@@ -13,33 +13,34 @@ | |||
13 | #include <linux/spinlock.h> | 13 | #include <linux/spinlock.h> |
14 | #include <linux/bootmem.h> | 14 | #include <linux/bootmem.h> |
15 | #include <linux/init.h> | 15 | #include <linux/init.h> |
16 | #include <linux/memblock.h> | ||
17 | #include <linux/sizes.h> | ||
16 | 18 | ||
17 | #include <asm/cputable.h> | 19 | #include <asm/cputable.h> |
18 | #include <asm/kvm_ppc.h> | 20 | #include <asm/kvm_ppc.h> |
19 | #include <asm/kvm_book3s.h> | 21 | #include <asm/kvm_book3s.h> |
20 | 22 | ||
21 | #define KVM_LINEAR_RMA 0 | 23 | #include "book3s_hv_cma.h" |
22 | #define KVM_LINEAR_HPT 1 | 24 | /* |
23 | 25 | * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206) | |
24 | static void __init kvm_linear_init_one(ulong size, int count, int type); | 26 | * should be power of 2. |
25 | static struct kvmppc_linear_info *kvm_alloc_linear(int type); | 27 | */ |
26 | static void kvm_release_linear(struct kvmppc_linear_info *ri); | 28 | #define HPT_ALIGN_PAGES ((1 << 18) >> PAGE_SHIFT) /* 256k */ |
27 | 29 | /* | |
28 | int kvm_hpt_order = KVM_DEFAULT_HPT_ORDER; | 30 | * By default we reserve 5% of memory for hash pagetable allocation. |
29 | EXPORT_SYMBOL_GPL(kvm_hpt_order); | 31 | */ |
30 | 32 | static unsigned long kvm_cma_resv_ratio = 5; | |
31 | /*************** RMA *************/ | ||
32 | |||
33 | /* | 33 | /* |
34 | * This maintains a list of RMAs (real mode areas) for KVM guests to use. | 34 | * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area. |
35 | * Each RMA has to be physically contiguous and of a size that the | 35 | * Each RMA has to be physically contiguous and of a size that the |
36 | * hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB, | 36 | * hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB, |
37 | * and other larger sizes. Since we are unlikely to be allocate that | 37 | * and other larger sizes. Since we are unlikely to be allocate that |
38 | * much physically contiguous memory after the system is up and running, | 38 | * much physically contiguous memory after the system is up and running, |
39 | * we preallocate a set of RMAs in early boot for KVM to use. | 39 | * we preallocate a set of RMAs in early boot using CMA. |
40 | * should be power of 2. | ||
40 | */ | 41 | */ |
41 | static unsigned long kvm_rma_size = 64 << 20; /* 64MB */ | 42 | unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */ |
42 | static unsigned long kvm_rma_count; | 43 | EXPORT_SYMBOL_GPL(kvm_rma_pages); |
43 | 44 | ||
44 | /* Work out RMLS (real mode limit selector) field value for a given RMA size. | 45 | /* Work out RMLS (real mode limit selector) field value for a given RMA size. |
45 | Assumes POWER7 or PPC970. */ | 46 | Assumes POWER7 or PPC970. */ |
@@ -69,165 +70,114 @@ static inline int lpcr_rmls(unsigned long rma_size) | |||
69 | 70 | ||
70 | static int __init early_parse_rma_size(char *p) | 71 | static int __init early_parse_rma_size(char *p) |
71 | { | 72 | { |
72 | if (!p) | 73 | unsigned long kvm_rma_size; |
73 | return 1; | ||
74 | 74 | ||
75 | pr_debug("%s(%s)\n", __func__, p); | ||
76 | if (!p) | ||
77 | return -EINVAL; | ||
75 | kvm_rma_size = memparse(p, &p); | 78 | kvm_rma_size = memparse(p, &p); |
76 | 79 | /* | |
80 | * Check that the requested size is one supported in hardware | ||
81 | */ | ||
82 | if (lpcr_rmls(kvm_rma_size) < 0) { | ||
83 | pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size); | ||
84 | return -EINVAL; | ||
85 | } | ||
86 | kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT; | ||
77 | return 0; | 87 | return 0; |
78 | } | 88 | } |
79 | early_param("kvm_rma_size", early_parse_rma_size); | 89 | early_param("kvm_rma_size", early_parse_rma_size); |
80 | 90 | ||
81 | static int __init early_parse_rma_count(char *p) | 91 | struct kvm_rma_info *kvm_alloc_rma() |
82 | { | 92 | { |
83 | if (!p) | 93 | struct page *page; |
84 | return 1; | 94 | struct kvm_rma_info *ri; |
85 | 95 | ||
86 | kvm_rma_count = simple_strtoul(p, NULL, 0); | 96 | ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL); |
87 | 97 | if (!ri) | |
88 | return 0; | 98 | return NULL; |
89 | } | 99 | page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages); |
90 | early_param("kvm_rma_count", early_parse_rma_count); | 100 | if (!page) |
91 | 101 | goto err_out; | |
92 | struct kvmppc_linear_info *kvm_alloc_rma(void) | 102 | atomic_set(&ri->use_count, 1); |
93 | { | 103 | ri->base_pfn = page_to_pfn(page); |
94 | return kvm_alloc_linear(KVM_LINEAR_RMA); | 104 | return ri; |
105 | err_out: | ||
106 | kfree(ri); | ||
107 | return NULL; | ||
95 | } | 108 | } |
96 | EXPORT_SYMBOL_GPL(kvm_alloc_rma); | 109 | EXPORT_SYMBOL_GPL(kvm_alloc_rma); |
97 | 110 | ||
98 | void kvm_release_rma(struct kvmppc_linear_info *ri) | 111 | void kvm_release_rma(struct kvm_rma_info *ri) |
99 | { | 112 | { |
100 | kvm_release_linear(ri); | 113 | if (atomic_dec_and_test(&ri->use_count)) { |
114 | kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages); | ||
115 | kfree(ri); | ||
116 | } | ||
101 | } | 117 | } |
102 | EXPORT_SYMBOL_GPL(kvm_release_rma); | 118 | EXPORT_SYMBOL_GPL(kvm_release_rma); |
103 | 119 | ||
104 | /*************** HPT *************/ | 120 | static int __init early_parse_kvm_cma_resv(char *p) |
105 | |||
106 | /* | ||
107 | * This maintains a list of big linear HPT tables that contain the GVA->HPA | ||
108 | * memory mappings. If we don't reserve those early on, we might not be able | ||
109 | * to get a big (usually 16MB) linear memory region from the kernel anymore. | ||
110 | */ | ||
111 | |||
112 | static unsigned long kvm_hpt_count; | ||
113 | |||
114 | static int __init early_parse_hpt_count(char *p) | ||
115 | { | 121 | { |
122 | pr_debug("%s(%s)\n", __func__, p); | ||
116 | if (!p) | 123 | if (!p) |
117 | return 1; | 124 | return -EINVAL; |
118 | 125 | return kstrtoul(p, 0, &kvm_cma_resv_ratio); | |
119 | kvm_hpt_count = simple_strtoul(p, NULL, 0); | ||
120 | |||
121 | return 0; | ||
122 | } | 126 | } |
123 | early_param("kvm_hpt_count", early_parse_hpt_count); | 127 | early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv); |
124 | 128 | ||
125 | struct kvmppc_linear_info *kvm_alloc_hpt(void) | 129 | struct page *kvm_alloc_hpt(unsigned long nr_pages) |
126 | { | 130 | { |
127 | return kvm_alloc_linear(KVM_LINEAR_HPT); | 131 | unsigned long align_pages = HPT_ALIGN_PAGES; |
132 | |||
133 | /* Old CPUs require HPT aligned on a multiple of its size */ | ||
134 | if (!cpu_has_feature(CPU_FTR_ARCH_206)) | ||
135 | align_pages = nr_pages; | ||
136 | return kvm_alloc_cma(nr_pages, align_pages); | ||
128 | } | 137 | } |
129 | EXPORT_SYMBOL_GPL(kvm_alloc_hpt); | 138 | EXPORT_SYMBOL_GPL(kvm_alloc_hpt); |
130 | 139 | ||
131 | void kvm_release_hpt(struct kvmppc_linear_info *li) | 140 | void kvm_release_hpt(struct page *page, unsigned long nr_pages) |
132 | { | 141 | { |
133 | kvm_release_linear(li); | 142 | kvm_release_cma(page, nr_pages); |
134 | } | 143 | } |
135 | EXPORT_SYMBOL_GPL(kvm_release_hpt); | 144 | EXPORT_SYMBOL_GPL(kvm_release_hpt); |
136 | 145 | ||
137 | /*************** generic *************/ | 146 | /** |
138 | 147 | * kvm_cma_reserve() - reserve area for kvm hash pagetable | |
139 | static LIST_HEAD(free_linears); | 148 | * |
140 | static DEFINE_SPINLOCK(linear_lock); | 149 | * This function reserves memory from early allocator. It should be |
141 | 150 | * called by arch specific code once the early allocator (memblock or bootmem) | |
142 | static void __init kvm_linear_init_one(ulong size, int count, int type) | 151 | * has been activated and all other subsystems have already allocated/reserved |
143 | { | 152 | * memory. |
144 | unsigned long i; | ||
145 | unsigned long j, npages; | ||
146 | void *linear; | ||
147 | struct page *pg; | ||
148 | const char *typestr; | ||
149 | struct kvmppc_linear_info *linear_info; | ||
150 | |||
151 | if (!count) | ||
152 | return; | ||
153 | |||
154 | typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT"; | ||
155 | |||
156 | npages = size >> PAGE_SHIFT; | ||
157 | linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info)); | ||
158 | for (i = 0; i < count; ++i) { | ||
159 | linear = alloc_bootmem_align(size, size); | ||
160 | pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear, | ||
161 | size >> 20); | ||
162 | linear_info[i].base_virt = linear; | ||
163 | linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT; | ||
164 | linear_info[i].npages = npages; | ||
165 | linear_info[i].type = type; | ||
166 | list_add_tail(&linear_info[i].list, &free_linears); | ||
167 | atomic_set(&linear_info[i].use_count, 0); | ||
168 | |||
169 | pg = pfn_to_page(linear_info[i].base_pfn); | ||
170 | for (j = 0; j < npages; ++j) { | ||
171 | atomic_inc(&pg->_count); | ||
172 | ++pg; | ||
173 | } | ||
174 | } | ||
175 | } | ||
176 | |||
177 | static struct kvmppc_linear_info *kvm_alloc_linear(int type) | ||
178 | { | ||
179 | struct kvmppc_linear_info *ri, *ret; | ||
180 | |||
181 | ret = NULL; | ||
182 | spin_lock(&linear_lock); | ||
183 | list_for_each_entry(ri, &free_linears, list) { | ||
184 | if (ri->type != type) | ||
185 | continue; | ||
186 | |||
187 | list_del(&ri->list); | ||
188 | atomic_inc(&ri->use_count); | ||
189 | memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT); | ||
190 | ret = ri; | ||
191 | break; | ||
192 | } | ||
193 | spin_unlock(&linear_lock); | ||
194 | return ret; | ||
195 | } | ||
196 | |||
197 | static void kvm_release_linear(struct kvmppc_linear_info *ri) | ||
198 | { | ||
199 | if (atomic_dec_and_test(&ri->use_count)) { | ||
200 | spin_lock(&linear_lock); | ||
201 | list_add_tail(&ri->list, &free_linears); | ||
202 | spin_unlock(&linear_lock); | ||
203 | |||
204 | } | ||
205 | } | ||
206 | |||
207 | /* | ||
208 | * Called at boot time while the bootmem allocator is active, | ||
209 | * to allocate contiguous physical memory for the hash page | ||
210 | * tables for guests. | ||
211 | */ | 153 | */ |
212 | void __init kvm_linear_init(void) | 154 | void __init kvm_cma_reserve(void) |
213 | { | 155 | { |
214 | /* HPT */ | 156 | unsigned long align_size; |
215 | kvm_linear_init_one(1 << kvm_hpt_order, kvm_hpt_count, KVM_LINEAR_HPT); | 157 | struct memblock_region *reg; |
216 | 158 | phys_addr_t selected_size = 0; | |
217 | /* RMA */ | 159 | /* |
218 | /* Only do this on PPC970 in HV mode */ | 160 | * We cannot use memblock_phys_mem_size() here, because |
219 | if (!cpu_has_feature(CPU_FTR_HVMODE) || | 161 | * memblock_analyze() has not been called yet. |
220 | !cpu_has_feature(CPU_FTR_ARCH_201)) | 162 | */ |
221 | return; | 163 | for_each_memblock(memory, reg) |
222 | 164 | selected_size += memblock_region_memory_end_pfn(reg) - | |
223 | if (!kvm_rma_size || !kvm_rma_count) | 165 | memblock_region_memory_base_pfn(reg); |
224 | return; | 166 | |
225 | 167 | selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT; | |
226 | /* Check that the requested size is one supported in hardware */ | 168 | if (selected_size) { |
227 | if (lpcr_rmls(kvm_rma_size) < 0) { | 169 | pr_debug("%s: reserving %ld MiB for global area\n", __func__, |
228 | pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size); | 170 | (unsigned long)selected_size / SZ_1M); |
229 | return; | 171 | /* |
172 | * Old CPUs require HPT aligned on a multiple of its size. So for them | ||
173 | * make the alignment as max size we could request. | ||
174 | */ | ||
175 | if (!cpu_has_feature(CPU_FTR_ARCH_206)) | ||
176 | align_size = __rounddown_pow_of_two(selected_size); | ||
177 | else | ||
178 | align_size = HPT_ALIGN_PAGES << PAGE_SHIFT; | ||
179 | |||
180 | align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size); | ||
181 | kvm_cma_declare_contiguous(selected_size, align_size); | ||
230 | } | 182 | } |
231 | |||
232 | kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA); | ||
233 | } | 183 | } |
diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c new file mode 100644 index 000000000000..d9d3d8553d51 --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_cma.c | |||
@@ -0,0 +1,240 @@ | |||
1 | /* | ||
2 | * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA | ||
3 | * for DMA mapping framework | ||
4 | * | ||
5 | * Copyright IBM Corporation, 2013 | ||
6 | * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License as | ||
10 | * published by the Free Software Foundation; either version 2 of the | ||
11 | * License or (at your optional) any later version of the license. | ||
12 | * | ||
13 | */ | ||
14 | #define pr_fmt(fmt) "kvm_cma: " fmt | ||
15 | |||
16 | #ifdef CONFIG_CMA_DEBUG | ||
17 | #ifndef DEBUG | ||
18 | # define DEBUG | ||
19 | #endif | ||
20 | #endif | ||
21 | |||
22 | #include <linux/memblock.h> | ||
23 | #include <linux/mutex.h> | ||
24 | #include <linux/sizes.h> | ||
25 | #include <linux/slab.h> | ||
26 | |||
27 | #include "book3s_hv_cma.h" | ||
28 | |||
29 | struct kvm_cma { | ||
30 | unsigned long base_pfn; | ||
31 | unsigned long count; | ||
32 | unsigned long *bitmap; | ||
33 | }; | ||
34 | |||
35 | static DEFINE_MUTEX(kvm_cma_mutex); | ||
36 | static struct kvm_cma kvm_cma_area; | ||
37 | |||
38 | /** | ||
39 | * kvm_cma_declare_contiguous() - reserve area for contiguous memory handling | ||
40 | * for kvm hash pagetable | ||
41 | * @size: Size of the reserved memory. | ||
42 | * @alignment: Alignment for the contiguous memory area | ||
43 | * | ||
44 | * This function reserves memory for kvm cma area. It should be | ||
45 | * called by arch code when early allocator (memblock or bootmem) | ||
46 | * is still activate. | ||
47 | */ | ||
48 | long __init kvm_cma_declare_contiguous(phys_addr_t size, phys_addr_t alignment) | ||
49 | { | ||
50 | long base_pfn; | ||
51 | phys_addr_t addr; | ||
52 | struct kvm_cma *cma = &kvm_cma_area; | ||
53 | |||
54 | pr_debug("%s(size %lx)\n", __func__, (unsigned long)size); | ||
55 | |||
56 | if (!size) | ||
57 | return -EINVAL; | ||
58 | /* | ||
59 | * Sanitise input arguments. | ||
60 | * We should be pageblock aligned for CMA. | ||
61 | */ | ||
62 | alignment = max(alignment, (phys_addr_t)(PAGE_SIZE << pageblock_order)); | ||
63 | size = ALIGN(size, alignment); | ||
64 | /* | ||
65 | * Reserve memory | ||
66 | * Use __memblock_alloc_base() since | ||
67 | * memblock_alloc_base() panic()s. | ||
68 | */ | ||
69 | addr = __memblock_alloc_base(size, alignment, 0); | ||
70 | if (!addr) { | ||
71 | base_pfn = -ENOMEM; | ||
72 | goto err; | ||
73 | } else | ||
74 | base_pfn = PFN_DOWN(addr); | ||
75 | |||
76 | /* | ||
77 | * Each reserved area must be initialised later, when more kernel | ||
78 | * subsystems (like slab allocator) are available. | ||
79 | */ | ||
80 | cma->base_pfn = base_pfn; | ||
81 | cma->count = size >> PAGE_SHIFT; | ||
82 | pr_info("CMA: reserved %ld MiB\n", (unsigned long)size / SZ_1M); | ||
83 | return 0; | ||
84 | err: | ||
85 | pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M); | ||
86 | return base_pfn; | ||
87 | } | ||
88 | |||
89 | /** | ||
90 | * kvm_alloc_cma() - allocate pages from contiguous area | ||
91 | * @nr_pages: Requested number of pages. | ||
92 | * @align_pages: Requested alignment in number of pages | ||
93 | * | ||
94 | * This function allocates memory buffer for hash pagetable. | ||
95 | */ | ||
96 | struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages) | ||
97 | { | ||
98 | int ret; | ||
99 | struct page *page = NULL; | ||
100 | struct kvm_cma *cma = &kvm_cma_area; | ||
101 | unsigned long chunk_count, nr_chunk; | ||
102 | unsigned long mask, pfn, pageno, start = 0; | ||
103 | |||
104 | |||
105 | if (!cma || !cma->count) | ||
106 | return NULL; | ||
107 | |||
108 | pr_debug("%s(cma %p, count %lu, align pages %lu)\n", __func__, | ||
109 | (void *)cma, nr_pages, align_pages); | ||
110 | |||
111 | if (!nr_pages) | ||
112 | return NULL; | ||
113 | /* | ||
114 | * align mask with chunk size. The bit tracks pages in chunk size | ||
115 | */ | ||
116 | VM_BUG_ON(!is_power_of_2(align_pages)); | ||
117 | mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1; | ||
118 | BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER); | ||
119 | |||
120 | chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); | ||
121 | nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); | ||
122 | |||
123 | mutex_lock(&kvm_cma_mutex); | ||
124 | for (;;) { | ||
125 | pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count, | ||
126 | start, nr_chunk, mask); | ||
127 | if (pageno >= chunk_count) | ||
128 | break; | ||
129 | |||
130 | pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)); | ||
131 | ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA); | ||
132 | if (ret == 0) { | ||
133 | bitmap_set(cma->bitmap, pageno, nr_chunk); | ||
134 | page = pfn_to_page(pfn); | ||
135 | memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT); | ||
136 | break; | ||
137 | } else if (ret != -EBUSY) { | ||
138 | break; | ||
139 | } | ||
140 | pr_debug("%s(): memory range at %p is busy, retrying\n", | ||
141 | __func__, pfn_to_page(pfn)); | ||
142 | /* try again with a bit different memory target */ | ||
143 | start = pageno + mask + 1; | ||
144 | } | ||
145 | mutex_unlock(&kvm_cma_mutex); | ||
146 | pr_debug("%s(): returned %p\n", __func__, page); | ||
147 | return page; | ||
148 | } | ||
149 | |||
150 | /** | ||
151 | * kvm_release_cma() - release allocated pages for hash pagetable | ||
152 | * @pages: Allocated pages. | ||
153 | * @nr_pages: Number of allocated pages. | ||
154 | * | ||
155 | * This function releases memory allocated by kvm_alloc_cma(). | ||
156 | * It returns false when provided pages do not belong to contiguous area and | ||
157 | * true otherwise. | ||
158 | */ | ||
159 | bool kvm_release_cma(struct page *pages, unsigned long nr_pages) | ||
160 | { | ||
161 | unsigned long pfn; | ||
162 | unsigned long nr_chunk; | ||
163 | struct kvm_cma *cma = &kvm_cma_area; | ||
164 | |||
165 | if (!cma || !pages) | ||
166 | return false; | ||
167 | |||
168 | pr_debug("%s(page %p count %lu)\n", __func__, (void *)pages, nr_pages); | ||
169 | |||
170 | pfn = page_to_pfn(pages); | ||
171 | |||
172 | if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) | ||
173 | return false; | ||
174 | |||
175 | VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count); | ||
176 | nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); | ||
177 | |||
178 | mutex_lock(&kvm_cma_mutex); | ||
179 | bitmap_clear(cma->bitmap, | ||
180 | (pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT), | ||
181 | nr_chunk); | ||
182 | free_contig_range(pfn, nr_pages); | ||
183 | mutex_unlock(&kvm_cma_mutex); | ||
184 | |||
185 | return true; | ||
186 | } | ||
187 | |||
188 | static int __init kvm_cma_activate_area(unsigned long base_pfn, | ||
189 | unsigned long count) | ||
190 | { | ||
191 | unsigned long pfn = base_pfn; | ||
192 | unsigned i = count >> pageblock_order; | ||
193 | struct zone *zone; | ||
194 | |||
195 | WARN_ON_ONCE(!pfn_valid(pfn)); | ||
196 | zone = page_zone(pfn_to_page(pfn)); | ||
197 | do { | ||
198 | unsigned j; | ||
199 | base_pfn = pfn; | ||
200 | for (j = pageblock_nr_pages; j; --j, pfn++) { | ||
201 | WARN_ON_ONCE(!pfn_valid(pfn)); | ||
202 | /* | ||
203 | * alloc_contig_range requires the pfn range | ||
204 | * specified to be in the same zone. Make this | ||
205 | * simple by forcing the entire CMA resv range | ||
206 | * to be in the same zone. | ||
207 | */ | ||
208 | if (page_zone(pfn_to_page(pfn)) != zone) | ||
209 | return -EINVAL; | ||
210 | } | ||
211 | init_cma_reserved_pageblock(pfn_to_page(base_pfn)); | ||
212 | } while (--i); | ||
213 | return 0; | ||
214 | } | ||
215 | |||
216 | static int __init kvm_cma_init_reserved_areas(void) | ||
217 | { | ||
218 | int bitmap_size, ret; | ||
219 | unsigned long chunk_count; | ||
220 | struct kvm_cma *cma = &kvm_cma_area; | ||
221 | |||
222 | pr_debug("%s()\n", __func__); | ||
223 | if (!cma->count) | ||
224 | return 0; | ||
225 | chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); | ||
226 | bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long); | ||
227 | cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL); | ||
228 | if (!cma->bitmap) | ||
229 | return -ENOMEM; | ||
230 | |||
231 | ret = kvm_cma_activate_area(cma->base_pfn, cma->count); | ||
232 | if (ret) | ||
233 | goto error; | ||
234 | return 0; | ||
235 | |||
236 | error: | ||
237 | kfree(cma->bitmap); | ||
238 | return ret; | ||
239 | } | ||
240 | core_initcall(kvm_cma_init_reserved_areas); | ||
diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h new file mode 100644 index 000000000000..655144f75fa5 --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_cma.h | |||
@@ -0,0 +1,27 @@ | |||
1 | /* | ||
2 | * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA | ||
3 | * for DMA mapping framework | ||
4 | * | ||
5 | * Copyright IBM Corporation, 2013 | ||
6 | * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License as | ||
10 | * published by the Free Software Foundation; either version 2 of the | ||
11 | * License or (at your optional) any later version of the license. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef __POWERPC_KVM_CMA_ALLOC_H__ | ||
16 | #define __POWERPC_KVM_CMA_ALLOC_H__ | ||
17 | /* | ||
18 | * Both RMA and Hash page allocation will be multiple of 256K. | ||
19 | */ | ||
20 | #define KVM_CMA_CHUNK_ORDER 18 | ||
21 | |||
22 | extern struct page *kvm_alloc_cma(unsigned long nr_pages, | ||
23 | unsigned long align_pages); | ||
24 | extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages); | ||
25 | extern long kvm_cma_declare_contiguous(phys_addr_t size, | ||
26 | phys_addr_t alignment) __init; | ||
27 | #endif | ||
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index fc25689a9f35..45e30d6e462b 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c | |||
@@ -383,6 +383,80 @@ static inline int try_lock_tlbie(unsigned int *lock) | |||
383 | return old == 0; | 383 | return old == 0; |
384 | } | 384 | } |
385 | 385 | ||
386 | /* | ||
387 | * tlbie/tlbiel is a bit different on the PPC970 compared to later | ||
388 | * processors such as POWER7; the large page bit is in the instruction | ||
389 | * not RB, and the top 16 bits and the bottom 12 bits of the VA | ||
390 | * in RB must be 0. | ||
391 | */ | ||
392 | static void do_tlbies_970(struct kvm *kvm, unsigned long *rbvalues, | ||
393 | long npages, int global, bool need_sync) | ||
394 | { | ||
395 | long i; | ||
396 | |||
397 | if (global) { | ||
398 | while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) | ||
399 | cpu_relax(); | ||
400 | if (need_sync) | ||
401 | asm volatile("ptesync" : : : "memory"); | ||
402 | for (i = 0; i < npages; ++i) { | ||
403 | unsigned long rb = rbvalues[i]; | ||
404 | |||
405 | if (rb & 1) /* large page */ | ||
406 | asm volatile("tlbie %0,1" : : | ||
407 | "r" (rb & 0x0000fffffffff000ul)); | ||
408 | else | ||
409 | asm volatile("tlbie %0,0" : : | ||
410 | "r" (rb & 0x0000fffffffff000ul)); | ||
411 | } | ||
412 | asm volatile("eieio; tlbsync; ptesync" : : : "memory"); | ||
413 | kvm->arch.tlbie_lock = 0; | ||
414 | } else { | ||
415 | if (need_sync) | ||
416 | asm volatile("ptesync" : : : "memory"); | ||
417 | for (i = 0; i < npages; ++i) { | ||
418 | unsigned long rb = rbvalues[i]; | ||
419 | |||
420 | if (rb & 1) /* large page */ | ||
421 | asm volatile("tlbiel %0,1" : : | ||
422 | "r" (rb & 0x0000fffffffff000ul)); | ||
423 | else | ||
424 | asm volatile("tlbiel %0,0" : : | ||
425 | "r" (rb & 0x0000fffffffff000ul)); | ||
426 | } | ||
427 | asm volatile("ptesync" : : : "memory"); | ||
428 | } | ||
429 | } | ||
430 | |||
431 | static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, | ||
432 | long npages, int global, bool need_sync) | ||
433 | { | ||
434 | long i; | ||
435 | |||
436 | if (cpu_has_feature(CPU_FTR_ARCH_201)) { | ||
437 | /* PPC970 tlbie instruction is a bit different */ | ||
438 | do_tlbies_970(kvm, rbvalues, npages, global, need_sync); | ||
439 | return; | ||
440 | } | ||
441 | if (global) { | ||
442 | while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) | ||
443 | cpu_relax(); | ||
444 | if (need_sync) | ||
445 | asm volatile("ptesync" : : : "memory"); | ||
446 | for (i = 0; i < npages; ++i) | ||
447 | asm volatile(PPC_TLBIE(%1,%0) : : | ||
448 | "r" (rbvalues[i]), "r" (kvm->arch.lpid)); | ||
449 | asm volatile("eieio; tlbsync; ptesync" : : : "memory"); | ||
450 | kvm->arch.tlbie_lock = 0; | ||
451 | } else { | ||
452 | if (need_sync) | ||
453 | asm volatile("ptesync" : : : "memory"); | ||
454 | for (i = 0; i < npages; ++i) | ||
455 | asm volatile("tlbiel %0" : : "r" (rbvalues[i])); | ||
456 | asm volatile("ptesync" : : : "memory"); | ||
457 | } | ||
458 | } | ||
459 | |||
386 | long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, | 460 | long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, |
387 | unsigned long pte_index, unsigned long avpn, | 461 | unsigned long pte_index, unsigned long avpn, |
388 | unsigned long *hpret) | 462 | unsigned long *hpret) |
@@ -408,19 +482,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, | |||
408 | if (v & HPTE_V_VALID) { | 482 | if (v & HPTE_V_VALID) { |
409 | hpte[0] &= ~HPTE_V_VALID; | 483 | hpte[0] &= ~HPTE_V_VALID; |
410 | rb = compute_tlbie_rb(v, hpte[1], pte_index); | 484 | rb = compute_tlbie_rb(v, hpte[1], pte_index); |
411 | if (global_invalidates(kvm, flags)) { | 485 | do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); |
412 | while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) | ||
413 | cpu_relax(); | ||
414 | asm volatile("ptesync" : : : "memory"); | ||
415 | asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" | ||
416 | : : "r" (rb), "r" (kvm->arch.lpid)); | ||
417 | asm volatile("ptesync" : : : "memory"); | ||
418 | kvm->arch.tlbie_lock = 0; | ||
419 | } else { | ||
420 | asm volatile("ptesync" : : : "memory"); | ||
421 | asm volatile("tlbiel %0" : : "r" (rb)); | ||
422 | asm volatile("ptesync" : : : "memory"); | ||
423 | } | ||
424 | /* Read PTE low word after tlbie to get final R/C values */ | 486 | /* Read PTE low word after tlbie to get final R/C values */ |
425 | remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]); | 487 | remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]); |
426 | } | 488 | } |
@@ -448,12 +510,11 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) | |||
448 | unsigned long *hp, *hptes[4], tlbrb[4]; | 510 | unsigned long *hp, *hptes[4], tlbrb[4]; |
449 | long int i, j, k, n, found, indexes[4]; | 511 | long int i, j, k, n, found, indexes[4]; |
450 | unsigned long flags, req, pte_index, rcbits; | 512 | unsigned long flags, req, pte_index, rcbits; |
451 | long int local = 0; | 513 | int global; |
452 | long int ret = H_SUCCESS; | 514 | long int ret = H_SUCCESS; |
453 | struct revmap_entry *rev, *revs[4]; | 515 | struct revmap_entry *rev, *revs[4]; |
454 | 516 | ||
455 | if (atomic_read(&kvm->online_vcpus) == 1) | 517 | global = global_invalidates(kvm, 0); |
456 | local = 1; | ||
457 | for (i = 0; i < 4 && ret == H_SUCCESS; ) { | 518 | for (i = 0; i < 4 && ret == H_SUCCESS; ) { |
458 | n = 0; | 519 | n = 0; |
459 | for (; i < 4; ++i) { | 520 | for (; i < 4; ++i) { |
@@ -529,22 +590,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) | |||
529 | break; | 590 | break; |
530 | 591 | ||
531 | /* Now that we've collected a batch, do the tlbies */ | 592 | /* Now that we've collected a batch, do the tlbies */ |
532 | if (!local) { | 593 | do_tlbies(kvm, tlbrb, n, global, true); |
533 | while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) | ||
534 | cpu_relax(); | ||
535 | asm volatile("ptesync" : : : "memory"); | ||
536 | for (k = 0; k < n; ++k) | ||
537 | asm volatile(PPC_TLBIE(%1,%0) : : | ||
538 | "r" (tlbrb[k]), | ||
539 | "r" (kvm->arch.lpid)); | ||
540 | asm volatile("eieio; tlbsync; ptesync" : : : "memory"); | ||
541 | kvm->arch.tlbie_lock = 0; | ||
542 | } else { | ||
543 | asm volatile("ptesync" : : : "memory"); | ||
544 | for (k = 0; k < n; ++k) | ||
545 | asm volatile("tlbiel %0" : : "r" (tlbrb[k])); | ||
546 | asm volatile("ptesync" : : : "memory"); | ||
547 | } | ||
548 | 594 | ||
549 | /* Read PTE low words after tlbie to get final R/C values */ | 595 | /* Read PTE low words after tlbie to get final R/C values */ |
550 | for (k = 0; k < n; ++k) { | 596 | for (k = 0; k < n; ++k) { |
@@ -603,19 +649,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, | |||
603 | if (v & HPTE_V_VALID) { | 649 | if (v & HPTE_V_VALID) { |
604 | rb = compute_tlbie_rb(v, r, pte_index); | 650 | rb = compute_tlbie_rb(v, r, pte_index); |
605 | hpte[0] = v & ~HPTE_V_VALID; | 651 | hpte[0] = v & ~HPTE_V_VALID; |
606 | if (global_invalidates(kvm, flags)) { | 652 | do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); |
607 | while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) | ||
608 | cpu_relax(); | ||
609 | asm volatile("ptesync" : : : "memory"); | ||
610 | asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" | ||
611 | : : "r" (rb), "r" (kvm->arch.lpid)); | ||
612 | asm volatile("ptesync" : : : "memory"); | ||
613 | kvm->arch.tlbie_lock = 0; | ||
614 | } else { | ||
615 | asm volatile("ptesync" : : : "memory"); | ||
616 | asm volatile("tlbiel %0" : : "r" (rb)); | ||
617 | asm volatile("ptesync" : : : "memory"); | ||
618 | } | ||
619 | /* | 653 | /* |
620 | * If the host has this page as readonly but the guest | 654 | * If the host has this page as readonly but the guest |
621 | * wants to make it read/write, reduce the permissions. | 655 | * wants to make it read/write, reduce the permissions. |
@@ -686,13 +720,7 @@ void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, | |||
686 | 720 | ||
687 | hptep[0] &= ~HPTE_V_VALID; | 721 | hptep[0] &= ~HPTE_V_VALID; |
688 | rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index); | 722 | rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index); |
689 | while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) | 723 | do_tlbies(kvm, &rb, 1, 1, true); |
690 | cpu_relax(); | ||
691 | asm volatile("ptesync" : : : "memory"); | ||
692 | asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" | ||
693 | : : "r" (rb), "r" (kvm->arch.lpid)); | ||
694 | asm volatile("ptesync" : : : "memory"); | ||
695 | kvm->arch.tlbie_lock = 0; | ||
696 | } | 724 | } |
697 | EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); | 725 | EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); |
698 | 726 | ||
@@ -706,12 +734,7 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep, | |||
706 | rbyte = (hptep[1] & ~HPTE_R_R) >> 8; | 734 | rbyte = (hptep[1] & ~HPTE_R_R) >> 8; |
707 | /* modify only the second-last byte, which contains the ref bit */ | 735 | /* modify only the second-last byte, which contains the ref bit */ |
708 | *((char *)hptep + 14) = rbyte; | 736 | *((char *)hptep + 14) = rbyte; |
709 | while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) | 737 | do_tlbies(kvm, &rb, 1, 1, false); |
710 | cpu_relax(); | ||
711 | asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" | ||
712 | : : "r" (rb), "r" (kvm->arch.lpid)); | ||
713 | asm volatile("ptesync" : : : "memory"); | ||
714 | kvm->arch.tlbie_lock = 0; | ||
715 | } | 738 | } |
716 | EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte); | 739 | EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte); |
717 | 740 | ||
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index b02f91e4c70d..60dce5bfab3f 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S | |||
@@ -1381,7 +1381,7 @@ hcall_try_real_mode: | |||
1381 | cmpldi r3,hcall_real_table_end - hcall_real_table | 1381 | cmpldi r3,hcall_real_table_end - hcall_real_table |
1382 | bge guest_exit_cont | 1382 | bge guest_exit_cont |
1383 | LOAD_REG_ADDR(r4, hcall_real_table) | 1383 | LOAD_REG_ADDR(r4, hcall_real_table) |
1384 | lwzx r3,r3,r4 | 1384 | lwax r3,r3,r4 |
1385 | cmpwi r3,0 | 1385 | cmpwi r3,0 |
1386 | beq guest_exit_cont | 1386 | beq guest_exit_cont |
1387 | add r3,r3,r4 | 1387 | add r3,r3,r4 |
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index 48cbbf862958..17cfae5497a3 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S | |||
@@ -92,6 +92,11 @@ kvm_start_lightweight: | |||
92 | PPC_LL r3, VCPU_HFLAGS(r4) | 92 | PPC_LL r3, VCPU_HFLAGS(r4) |
93 | rldicl r3, r3, 0, 63 /* r3 &= 1 */ | 93 | rldicl r3, r3, 0, 63 /* r3 &= 1 */ |
94 | stb r3, HSTATE_RESTORE_HID5(r13) | 94 | stb r3, HSTATE_RESTORE_HID5(r13) |
95 | |||
96 | /* Load up guest SPRG3 value, since it's user readable */ | ||
97 | ld r3, VCPU_SHARED(r4) | ||
98 | ld r3, VCPU_SHARED_SPRG3(r3) | ||
99 | mtspr SPRN_SPRG3, r3 | ||
95 | #endif /* CONFIG_PPC_BOOK3S_64 */ | 100 | #endif /* CONFIG_PPC_BOOK3S_64 */ |
96 | 101 | ||
97 | PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */ | 102 | PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */ |
@@ -123,6 +128,15 @@ kvmppc_handler_highmem: | |||
123 | /* R7 = vcpu */ | 128 | /* R7 = vcpu */ |
124 | PPC_LL r7, GPR4(r1) | 129 | PPC_LL r7, GPR4(r1) |
125 | 130 | ||
131 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
132 | /* | ||
133 | * Reload kernel SPRG3 value. | ||
134 | * No need to save guest value as usermode can't modify SPRG3. | ||
135 | */ | ||
136 | ld r3, PACA_SPRG3(r13) | ||
137 | mtspr SPRN_SPRG3, r3 | ||
138 | #endif /* CONFIG_PPC_BOOK3S_64 */ | ||
139 | |||
126 | PPC_STL r14, VCPU_GPR(R14)(r7) | 140 | PPC_STL r14, VCPU_GPR(R14)(r7) |
127 | PPC_STL r15, VCPU_GPR(R15)(r7) | 141 | PPC_STL r15, VCPU_GPR(R15)(r7) |
128 | PPC_STL r16, VCPU_GPR(R16)(r7) | 142 | PPC_STL r16, VCPU_GPR(R16)(r7) |
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index c6e13d9a9e15..27db1e665959 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c | |||
@@ -468,7 +468,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) | |||
468 | * both the traditional FP registers and the added VSX | 468 | * both the traditional FP registers and the added VSX |
469 | * registers into thread.fpr[]. | 469 | * registers into thread.fpr[]. |
470 | */ | 470 | */ |
471 | giveup_fpu(current); | 471 | if (current->thread.regs->msr & MSR_FP) |
472 | giveup_fpu(current); | ||
472 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) | 473 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) |
473 | vcpu_fpr[i] = thread_fpr[get_fpr_index(i)]; | 474 | vcpu_fpr[i] = thread_fpr[get_fpr_index(i)]; |
474 | 475 | ||
@@ -483,7 +484,8 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) | |||
483 | 484 | ||
484 | #ifdef CONFIG_ALTIVEC | 485 | #ifdef CONFIG_ALTIVEC |
485 | if (msr & MSR_VEC) { | 486 | if (msr & MSR_VEC) { |
486 | giveup_altivec(current); | 487 | if (current->thread.regs->msr & MSR_VEC) |
488 | giveup_altivec(current); | ||
487 | memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr)); | 489 | memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr)); |
488 | vcpu->arch.vscr = t->vscr; | 490 | vcpu->arch.vscr = t->vscr; |
489 | } | 491 | } |
@@ -575,8 +577,6 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, | |||
575 | printk(KERN_INFO "Loading up ext 0x%lx\n", msr); | 577 | printk(KERN_INFO "Loading up ext 0x%lx\n", msr); |
576 | #endif | 578 | #endif |
577 | 579 | ||
578 | current->thread.regs->msr |= msr; | ||
579 | |||
580 | if (msr & MSR_FP) { | 580 | if (msr & MSR_FP) { |
581 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) | 581 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) |
582 | thread_fpr[get_fpr_index(i)] = vcpu_fpr[i]; | 582 | thread_fpr[get_fpr_index(i)] = vcpu_fpr[i]; |
@@ -598,12 +598,32 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, | |||
598 | #endif | 598 | #endif |
599 | } | 599 | } |
600 | 600 | ||
601 | current->thread.regs->msr |= msr; | ||
601 | vcpu->arch.guest_owned_ext |= msr; | 602 | vcpu->arch.guest_owned_ext |= msr; |
602 | kvmppc_recalc_shadow_msr(vcpu); | 603 | kvmppc_recalc_shadow_msr(vcpu); |
603 | 604 | ||
604 | return RESUME_GUEST; | 605 | return RESUME_GUEST; |
605 | } | 606 | } |
606 | 607 | ||
608 | /* | ||
609 | * Kernel code using FP or VMX could have flushed guest state to | ||
610 | * the thread_struct; if so, get it back now. | ||
611 | */ | ||
612 | static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu) | ||
613 | { | ||
614 | unsigned long lost_ext; | ||
615 | |||
616 | lost_ext = vcpu->arch.guest_owned_ext & ~current->thread.regs->msr; | ||
617 | if (!lost_ext) | ||
618 | return; | ||
619 | |||
620 | if (lost_ext & MSR_FP) | ||
621 | kvmppc_load_up_fpu(); | ||
622 | if (lost_ext & MSR_VEC) | ||
623 | kvmppc_load_up_altivec(); | ||
624 | current->thread.regs->msr |= lost_ext; | ||
625 | } | ||
626 | |||
607 | int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | 627 | int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, |
608 | unsigned int exit_nr) | 628 | unsigned int exit_nr) |
609 | { | 629 | { |
@@ -772,7 +792,7 @@ program_interrupt: | |||
772 | } | 792 | } |
773 | case BOOK3S_INTERRUPT_SYSCALL: | 793 | case BOOK3S_INTERRUPT_SYSCALL: |
774 | if (vcpu->arch.papr_enabled && | 794 | if (vcpu->arch.papr_enabled && |
775 | (kvmppc_get_last_inst(vcpu) == 0x44000022) && | 795 | (kvmppc_get_last_sc(vcpu) == 0x44000022) && |
776 | !(vcpu->arch.shared->msr & MSR_PR)) { | 796 | !(vcpu->arch.shared->msr & MSR_PR)) { |
777 | /* SC 1 papr hypercalls */ | 797 | /* SC 1 papr hypercalls */ |
778 | ulong cmd = kvmppc_get_gpr(vcpu, 3); | 798 | ulong cmd = kvmppc_get_gpr(vcpu, 3); |
@@ -890,8 +910,9 @@ program_interrupt: | |||
890 | local_irq_enable(); | 910 | local_irq_enable(); |
891 | r = s; | 911 | r = s; |
892 | } else { | 912 | } else { |
893 | kvmppc_lazy_ee_enable(); | 913 | kvmppc_fix_ee_before_entry(); |
894 | } | 914 | } |
915 | kvmppc_handle_lost_ext(vcpu); | ||
895 | } | 916 | } |
896 | 917 | ||
897 | trace_kvm_book3s_reenter(r, vcpu); | 918 | trace_kvm_book3s_reenter(r, vcpu); |
@@ -1162,7 +1183,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
1162 | if (vcpu->arch.shared->msr & MSR_FP) | 1183 | if (vcpu->arch.shared->msr & MSR_FP) |
1163 | kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); | 1184 | kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); |
1164 | 1185 | ||
1165 | kvmppc_lazy_ee_enable(); | 1186 | kvmppc_fix_ee_before_entry(); |
1166 | 1187 | ||
1167 | ret = __kvmppc_vcpu_run(kvm_run, vcpu); | 1188 | ret = __kvmppc_vcpu_run(kvm_run, vcpu); |
1168 | 1189 | ||
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 94c1dd46b83d..a3a5cb8ee7ea 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <asm/hvcall.h> | 19 | #include <asm/hvcall.h> |
20 | #include <asm/xics.h> | 20 | #include <asm/xics.h> |
21 | #include <asm/debug.h> | 21 | #include <asm/debug.h> |
22 | #include <asm/time.h> | ||
22 | 23 | ||
23 | #include <linux/debugfs.h> | 24 | #include <linux/debugfs.h> |
24 | #include <linux/seq_file.h> | 25 | #include <linux/seq_file.h> |
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index dcc94f016007..17722d82f1d1 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c | |||
@@ -674,8 +674,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
674 | goto out; | 674 | goto out; |
675 | } | 675 | } |
676 | 676 | ||
677 | kvm_guest_enter(); | ||
678 | |||
679 | #ifdef CONFIG_PPC_FPU | 677 | #ifdef CONFIG_PPC_FPU |
680 | /* Save userspace FPU state in stack */ | 678 | /* Save userspace FPU state in stack */ |
681 | enable_kernel_fp(); | 679 | enable_kernel_fp(); |
@@ -698,7 +696,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
698 | kvmppc_load_guest_fp(vcpu); | 696 | kvmppc_load_guest_fp(vcpu); |
699 | #endif | 697 | #endif |
700 | 698 | ||
701 | kvmppc_lazy_ee_enable(); | 699 | kvmppc_fix_ee_before_entry(); |
702 | 700 | ||
703 | ret = __kvmppc_vcpu_run(kvm_run, vcpu); | 701 | ret = __kvmppc_vcpu_run(kvm_run, vcpu); |
704 | 702 | ||
@@ -1168,7 +1166,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
1168 | local_irq_enable(); | 1166 | local_irq_enable(); |
1169 | r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); | 1167 | r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); |
1170 | } else { | 1168 | } else { |
1171 | kvmppc_lazy_ee_enable(); | 1169 | kvmppc_fix_ee_before_entry(); |
1172 | } | 1170 | } |
1173 | } | 1171 | } |
1174 | 1172 | ||
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6316ee336e88..f55e14cd1762 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -117,8 +117,6 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) | |||
117 | kvm_guest_exit(); | 117 | kvm_guest_exit(); |
118 | continue; | 118 | continue; |
119 | } | 119 | } |
120 | |||
121 | trace_hardirqs_on(); | ||
122 | #endif | 120 | #endif |
123 | 121 | ||
124 | kvm_guest_enter(); | 122 | kvm_guest_enter(); |
@@ -420,6 +418,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | |||
420 | return kvmppc_core_create_memslot(slot, npages); | 418 | return kvmppc_core_create_memslot(slot, npages); |
421 | } | 419 | } |
422 | 420 | ||
421 | void kvm_arch_memslots_updated(struct kvm *kvm) | ||
422 | { | ||
423 | } | ||
424 | |||
423 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 425 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
424 | struct kvm_memory_slot *memslot, | 426 | struct kvm_memory_slot *memslot, |
425 | struct kvm_userspace_memory_region *mem, | 427 | struct kvm_userspace_memory_region *mem, |
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 3238d4004e84..e87ecaa2c569 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h | |||
@@ -274,6 +274,14 @@ struct kvm_arch{ | |||
274 | int css_support; | 274 | int css_support; |
275 | }; | 275 | }; |
276 | 276 | ||
277 | #define KVM_HVA_ERR_BAD (-1UL) | ||
278 | #define KVM_HVA_ERR_RO_BAD (-2UL) | ||
279 | |||
280 | static inline bool kvm_is_error_hva(unsigned long addr) | ||
281 | { | ||
282 | return IS_ERR_VALUE(addr); | ||
283 | } | ||
284 | |||
277 | extern int sie64a(struct kvm_s390_sie_block *, u64 *); | 285 | extern int sie64a(struct kvm_s390_sie_block *, u64 *); |
278 | extern char sie_exit; | 286 | extern char sie_exit; |
279 | #endif | 287 | #endif |
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 6340178748bf..ff132ac64ddd 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h | |||
@@ -12,8 +12,6 @@ typedef struct { | |||
12 | unsigned long asce_bits; | 12 | unsigned long asce_bits; |
13 | unsigned long asce_limit; | 13 | unsigned long asce_limit; |
14 | unsigned long vdso_base; | 14 | unsigned long vdso_base; |
15 | /* Cloned contexts will be created with extended page tables. */ | ||
16 | unsigned int alloc_pgste:1; | ||
17 | /* The mmu context has extended page tables. */ | 15 | /* The mmu context has extended page tables. */ |
18 | unsigned int has_pgste:1; | 16 | unsigned int has_pgste:1; |
19 | } mm_context_t; | 17 | } mm_context_t; |
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 7b7fce4e8469..9f973d8de90e 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h | |||
@@ -21,24 +21,7 @@ static inline int init_new_context(struct task_struct *tsk, | |||
21 | #ifdef CONFIG_64BIT | 21 | #ifdef CONFIG_64BIT |
22 | mm->context.asce_bits |= _ASCE_TYPE_REGION3; | 22 | mm->context.asce_bits |= _ASCE_TYPE_REGION3; |
23 | #endif | 23 | #endif |
24 | if (current->mm && current->mm->context.alloc_pgste) { | 24 | mm->context.has_pgste = 0; |
25 | /* | ||
26 | * alloc_pgste indicates, that any NEW context will be created | ||
27 | * with extended page tables. The old context is unchanged. The | ||
28 | * page table allocation and the page table operations will | ||
29 | * look at has_pgste to distinguish normal and extended page | ||
30 | * tables. The only way to create extended page tables is to | ||
31 | * set alloc_pgste and then create a new context (e.g. dup_mm). | ||
32 | * The page table allocation is called after init_new_context | ||
33 | * and if has_pgste is set, it will create extended page | ||
34 | * tables. | ||
35 | */ | ||
36 | mm->context.has_pgste = 1; | ||
37 | mm->context.alloc_pgste = 1; | ||
38 | } else { | ||
39 | mm->context.has_pgste = 0; | ||
40 | mm->context.alloc_pgste = 0; | ||
41 | } | ||
42 | mm->context.asce_limit = STACK_TOP_MAX; | 25 | mm->context.asce_limit = STACK_TOP_MAX; |
43 | crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); | 26 | crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); |
44 | return 0; | 27 | return 0; |
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 9f215b40109e..9b60a36c348d 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h | |||
@@ -1442,6 +1442,17 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd) | |||
1442 | } | 1442 | } |
1443 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */ | 1443 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */ |
1444 | 1444 | ||
1445 | static inline void pmdp_flush_lazy(struct mm_struct *mm, | ||
1446 | unsigned long address, pmd_t *pmdp) | ||
1447 | { | ||
1448 | int active = (mm == current->active_mm) ? 1 : 0; | ||
1449 | |||
1450 | if ((atomic_read(&mm->context.attach_count) & 0xffff) > active) | ||
1451 | __pmd_idte(address, pmdp); | ||
1452 | else | ||
1453 | mm->context.flush_mm = 1; | ||
1454 | } | ||
1455 | |||
1445 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 1456 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
1446 | 1457 | ||
1447 | #define __HAVE_ARCH_PGTABLE_DEPOSIT | 1458 | #define __HAVE_ARCH_PGTABLE_DEPOSIT |
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index b0e6435b2f02..0eb37505cab1 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h | |||
@@ -43,6 +43,7 @@ extern void execve_tail(void); | |||
43 | #ifndef CONFIG_64BIT | 43 | #ifndef CONFIG_64BIT |
44 | 44 | ||
45 | #define TASK_SIZE (1UL << 31) | 45 | #define TASK_SIZE (1UL << 31) |
46 | #define TASK_MAX_SIZE (1UL << 31) | ||
46 | #define TASK_UNMAPPED_BASE (1UL << 30) | 47 | #define TASK_UNMAPPED_BASE (1UL << 30) |
47 | 48 | ||
48 | #else /* CONFIG_64BIT */ | 49 | #else /* CONFIG_64BIT */ |
@@ -51,6 +52,7 @@ extern void execve_tail(void); | |||
51 | #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \ | 52 | #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \ |
52 | (1UL << 30) : (1UL << 41)) | 53 | (1UL << 30) : (1UL << 41)) |
53 | #define TASK_SIZE TASK_SIZE_OF(current) | 54 | #define TASK_SIZE TASK_SIZE_OF(current) |
55 | #define TASK_MAX_SIZE (1UL << 53) | ||
54 | 56 | ||
55 | #endif /* CONFIG_64BIT */ | 57 | #endif /* CONFIG_64BIT */ |
56 | 58 | ||
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 3074475c8ae0..3a74d8af0d69 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c | |||
@@ -119,12 +119,21 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) | |||
119 | * The layout is as follows: | 119 | * The layout is as follows: |
120 | * - gpr 2 contains the subchannel id (passed as addr) | 120 | * - gpr 2 contains the subchannel id (passed as addr) |
121 | * - gpr 3 contains the virtqueue index (passed as datamatch) | 121 | * - gpr 3 contains the virtqueue index (passed as datamatch) |
122 | * - gpr 4 contains the index on the bus (optionally) | ||
122 | */ | 123 | */ |
123 | ret = kvm_io_bus_write(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, | 124 | ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, |
124 | vcpu->run->s.regs.gprs[2], | 125 | vcpu->run->s.regs.gprs[2], |
125 | 8, &vcpu->run->s.regs.gprs[3]); | 126 | 8, &vcpu->run->s.regs.gprs[3], |
127 | vcpu->run->s.regs.gprs[4]); | ||
126 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | 128 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
127 | /* kvm_io_bus_write returns -EOPNOTSUPP if it found no match. */ | 129 | |
130 | /* | ||
131 | * Return cookie in gpr 2, but don't overwrite the register if the | ||
132 | * diagnose will be handled by userspace. | ||
133 | */ | ||
134 | if (ret != -EOPNOTSUPP) | ||
135 | vcpu->run->s.regs.gprs[2] = ret; | ||
136 | /* kvm_io_bus_write_cookie returns -EOPNOTSUPP if it found no match. */ | ||
128 | return ret < 0 ? ret : 0; | 137 | return ret < 0 ? ret : 0; |
129 | } | 138 | } |
130 | 139 | ||
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 34c1c9a90be2..776dafe918db 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <asm/pgtable.h> | 28 | #include <asm/pgtable.h> |
29 | #include <asm/nmi.h> | 29 | #include <asm/nmi.h> |
30 | #include <asm/switch_to.h> | 30 | #include <asm/switch_to.h> |
31 | #include <asm/facility.h> | ||
31 | #include <asm/sclp.h> | 32 | #include <asm/sclp.h> |
32 | #include "kvm-s390.h" | 33 | #include "kvm-s390.h" |
33 | #include "gaccess.h" | 34 | #include "gaccess.h" |
@@ -84,9 +85,15 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
84 | { NULL } | 85 | { NULL } |
85 | }; | 86 | }; |
86 | 87 | ||
87 | static unsigned long long *facilities; | 88 | unsigned long *vfacilities; |
88 | static struct gmap_notifier gmap_notifier; | 89 | static struct gmap_notifier gmap_notifier; |
89 | 90 | ||
91 | /* test availability of vfacility */ | ||
92 | static inline int test_vfacility(unsigned long nr) | ||
93 | { | ||
94 | return __test_facility(nr, (void *) vfacilities); | ||
95 | } | ||
96 | |||
90 | /* Section: not file related */ | 97 | /* Section: not file related */ |
91 | int kvm_arch_hardware_enable(void *garbage) | 98 | int kvm_arch_hardware_enable(void *garbage) |
92 | { | 99 | { |
@@ -387,7 +394,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
387 | vcpu->arch.sie_block->ecb = 6; | 394 | vcpu->arch.sie_block->ecb = 6; |
388 | vcpu->arch.sie_block->ecb2 = 8; | 395 | vcpu->arch.sie_block->ecb2 = 8; |
389 | vcpu->arch.sie_block->eca = 0xC1002001U; | 396 | vcpu->arch.sie_block->eca = 0xC1002001U; |
390 | vcpu->arch.sie_block->fac = (int) (long) facilities; | 397 | vcpu->arch.sie_block->fac = (int) (long) vfacilities; |
391 | hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); | 398 | hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); |
392 | tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, | 399 | tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, |
393 | (unsigned long) vcpu); | 400 | (unsigned long) vcpu); |
@@ -1063,6 +1070,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | |||
1063 | return 0; | 1070 | return 0; |
1064 | } | 1071 | } |
1065 | 1072 | ||
1073 | void kvm_arch_memslots_updated(struct kvm *kvm) | ||
1074 | { | ||
1075 | } | ||
1076 | |||
1066 | /* Section: memory related */ | 1077 | /* Section: memory related */ |
1067 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 1078 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
1068 | struct kvm_memory_slot *memslot, | 1079 | struct kvm_memory_slot *memslot, |
@@ -1129,20 +1140,20 @@ static int __init kvm_s390_init(void) | |||
1129 | * to hold the maximum amount of facilities. On the other hand, we | 1140 | * to hold the maximum amount of facilities. On the other hand, we |
1130 | * only set facilities that are known to work in KVM. | 1141 | * only set facilities that are known to work in KVM. |
1131 | */ | 1142 | */ |
1132 | facilities = (unsigned long long *) get_zeroed_page(GFP_KERNEL|GFP_DMA); | 1143 | vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA); |
1133 | if (!facilities) { | 1144 | if (!vfacilities) { |
1134 | kvm_exit(); | 1145 | kvm_exit(); |
1135 | return -ENOMEM; | 1146 | return -ENOMEM; |
1136 | } | 1147 | } |
1137 | memcpy(facilities, S390_lowcore.stfle_fac_list, 16); | 1148 | memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16); |
1138 | facilities[0] &= 0xff82fff3f47c0000ULL; | 1149 | vfacilities[0] &= 0xff82fff3f47c0000UL; |
1139 | facilities[1] &= 0x001c000000000000ULL; | 1150 | vfacilities[1] &= 0x001c000000000000UL; |
1140 | return 0; | 1151 | return 0; |
1141 | } | 1152 | } |
1142 | 1153 | ||
1143 | static void __exit kvm_s390_exit(void) | 1154 | static void __exit kvm_s390_exit(void) |
1144 | { | 1155 | { |
1145 | free_page((unsigned long) facilities); | 1156 | free_page((unsigned long) vfacilities); |
1146 | kvm_exit(); | 1157 | kvm_exit(); |
1147 | } | 1158 | } |
1148 | 1159 | ||
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 028ca9fd2158..dc99f1ca4267 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
@@ -24,6 +24,9 @@ | |||
24 | 24 | ||
25 | typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); | 25 | typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); |
26 | 26 | ||
27 | /* declare vfacilities extern */ | ||
28 | extern unsigned long *vfacilities; | ||
29 | |||
27 | /* negativ values are error codes, positive values for internal conditions */ | 30 | /* negativ values are error codes, positive values for internal conditions */ |
28 | #define SIE_INTERCEPT_RERUNVCPU (1<<0) | 31 | #define SIE_INTERCEPT_RERUNVCPU (1<<0) |
29 | #define SIE_INTERCEPT_UCONTROL (1<<1) | 32 | #define SIE_INTERCEPT_UCONTROL (1<<1) |
@@ -112,6 +115,13 @@ static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu) | |||
112 | return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; | 115 | return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; |
113 | } | 116 | } |
114 | 117 | ||
118 | /* Set the condition code in the guest program status word */ | ||
119 | static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc) | ||
120 | { | ||
121 | vcpu->arch.sie_block->gpsw.mask &= ~(3UL << 44); | ||
122 | vcpu->arch.sie_block->gpsw.mask |= cc << 44; | ||
123 | } | ||
124 | |||
115 | int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); | 125 | int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); |
116 | enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); | 126 | enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); |
117 | void kvm_s390_tasklet(unsigned long parm); | 127 | void kvm_s390_tasklet(unsigned long parm); |
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 4cdc54e63ebc..59200ee275e5 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c | |||
@@ -164,8 +164,7 @@ static int handle_tpi(struct kvm_vcpu *vcpu) | |||
164 | kfree(inti); | 164 | kfree(inti); |
165 | no_interrupt: | 165 | no_interrupt: |
166 | /* Set condition code and we're done. */ | 166 | /* Set condition code and we're done. */ |
167 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); | 167 | kvm_s390_set_psw_cc(vcpu, cc); |
168 | vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44; | ||
169 | return 0; | 168 | return 0; |
170 | } | 169 | } |
171 | 170 | ||
@@ -220,15 +219,13 @@ static int handle_io_inst(struct kvm_vcpu *vcpu) | |||
220 | * Set condition code 3 to stop the guest from issueing channel | 219 | * Set condition code 3 to stop the guest from issueing channel |
221 | * I/O instructions. | 220 | * I/O instructions. |
222 | */ | 221 | */ |
223 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); | 222 | kvm_s390_set_psw_cc(vcpu, 3); |
224 | vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44; | ||
225 | return 0; | 223 | return 0; |
226 | } | 224 | } |
227 | } | 225 | } |
228 | 226 | ||
229 | static int handle_stfl(struct kvm_vcpu *vcpu) | 227 | static int handle_stfl(struct kvm_vcpu *vcpu) |
230 | { | 228 | { |
231 | unsigned int facility_list; | ||
232 | int rc; | 229 | int rc; |
233 | 230 | ||
234 | vcpu->stat.instruction_stfl++; | 231 | vcpu->stat.instruction_stfl++; |
@@ -236,15 +233,13 @@ static int handle_stfl(struct kvm_vcpu *vcpu) | |||
236 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | 233 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) |
237 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | 234 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); |
238 | 235 | ||
239 | /* only pass the facility bits, which we can handle */ | ||
240 | facility_list = S390_lowcore.stfl_fac_list & 0xff82fff3; | ||
241 | |||
242 | rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), | 236 | rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), |
243 | &facility_list, sizeof(facility_list)); | 237 | vfacilities, 4); |
244 | if (rc) | 238 | if (rc) |
245 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | 239 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
246 | VCPU_EVENT(vcpu, 5, "store facility list value %x", facility_list); | 240 | VCPU_EVENT(vcpu, 5, "store facility list value %x", |
247 | trace_kvm_s390_handle_stfl(vcpu, facility_list); | 241 | *(unsigned int *) vfacilities); |
242 | trace_kvm_s390_handle_stfl(vcpu, *(unsigned int *) vfacilities); | ||
248 | return 0; | 243 | return 0; |
249 | } | 244 | } |
250 | 245 | ||
@@ -387,7 +382,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) | |||
387 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | 382 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); |
388 | 383 | ||
389 | if (fc > 3) { | 384 | if (fc > 3) { |
390 | vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; /* cc 3 */ | 385 | kvm_s390_set_psw_cc(vcpu, 3); |
391 | return 0; | 386 | return 0; |
392 | } | 387 | } |
393 | 388 | ||
@@ -397,7 +392,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) | |||
397 | 392 | ||
398 | if (fc == 0) { | 393 | if (fc == 0) { |
399 | vcpu->run->s.regs.gprs[0] = 3 << 28; | 394 | vcpu->run->s.regs.gprs[0] = 3 << 28; |
400 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); /* cc 0 */ | 395 | kvm_s390_set_psw_cc(vcpu, 0); |
401 | return 0; | 396 | return 0; |
402 | } | 397 | } |
403 | 398 | ||
@@ -431,12 +426,11 @@ static int handle_stsi(struct kvm_vcpu *vcpu) | |||
431 | } | 426 | } |
432 | trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); | 427 | trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); |
433 | free_page(mem); | 428 | free_page(mem); |
434 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); | 429 | kvm_s390_set_psw_cc(vcpu, 0); |
435 | vcpu->run->s.regs.gprs[0] = 0; | 430 | vcpu->run->s.regs.gprs[0] = 0; |
436 | return 0; | 431 | return 0; |
437 | out_no_data: | 432 | out_no_data: |
438 | /* condition code 3 */ | 433 | kvm_s390_set_psw_cc(vcpu, 3); |
439 | vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; | ||
440 | out_exception: | 434 | out_exception: |
441 | free_page(mem); | 435 | free_page(mem); |
442 | return rc; | 436 | return rc; |
@@ -494,12 +488,12 @@ static int handle_epsw(struct kvm_vcpu *vcpu) | |||
494 | kvm_s390_get_regs_rre(vcpu, ®1, ®2); | 488 | kvm_s390_get_regs_rre(vcpu, ®1, ®2); |
495 | 489 | ||
496 | /* This basically extracts the mask half of the psw. */ | 490 | /* This basically extracts the mask half of the psw. */ |
497 | vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000; | 491 | vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000UL; |
498 | vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32; | 492 | vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32; |
499 | if (reg2) { | 493 | if (reg2) { |
500 | vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000; | 494 | vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000UL; |
501 | vcpu->run->s.regs.gprs[reg2] |= | 495 | vcpu->run->s.regs.gprs[reg2] |= |
502 | vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffff; | 496 | vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffffUL; |
503 | } | 497 | } |
504 | return 0; | 498 | return 0; |
505 | } | 499 | } |
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 6d16132d0850..bf7c0dc64a76 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c | |||
@@ -335,7 +335,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, | |||
335 | 335 | ||
336 | if ((from | to | len) & (PMD_SIZE - 1)) | 336 | if ((from | to | len) & (PMD_SIZE - 1)) |
337 | return -EINVAL; | 337 | return -EINVAL; |
338 | if (len == 0 || from + len > PGDIR_SIZE || | 338 | if (len == 0 || from + len > TASK_MAX_SIZE || |
339 | from + len < from || to + len < to) | 339 | from + len < from || to + len < to) |
340 | return -EINVAL; | 340 | return -EINVAL; |
341 | 341 | ||
@@ -732,6 +732,11 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte) | |||
732 | spin_unlock(&gmap_notifier_lock); | 732 | spin_unlock(&gmap_notifier_lock); |
733 | } | 733 | } |
734 | 734 | ||
735 | static inline int page_table_with_pgste(struct page *page) | ||
736 | { | ||
737 | return atomic_read(&page->_mapcount) == 0; | ||
738 | } | ||
739 | |||
735 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, | 740 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, |
736 | unsigned long vmaddr) | 741 | unsigned long vmaddr) |
737 | { | 742 | { |
@@ -751,7 +756,7 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, | |||
751 | mp->vmaddr = vmaddr & PMD_MASK; | 756 | mp->vmaddr = vmaddr & PMD_MASK; |
752 | INIT_LIST_HEAD(&mp->mapper); | 757 | INIT_LIST_HEAD(&mp->mapper); |
753 | page->index = (unsigned long) mp; | 758 | page->index = (unsigned long) mp; |
754 | atomic_set(&page->_mapcount, 3); | 759 | atomic_set(&page->_mapcount, 0); |
755 | table = (unsigned long *) page_to_phys(page); | 760 | table = (unsigned long *) page_to_phys(page); |
756 | clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); | 761 | clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); |
757 | clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT, | 762 | clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT, |
@@ -818,6 +823,11 @@ EXPORT_SYMBOL(set_guest_storage_key); | |||
818 | 823 | ||
819 | #else /* CONFIG_PGSTE */ | 824 | #else /* CONFIG_PGSTE */ |
820 | 825 | ||
826 | static inline int page_table_with_pgste(struct page *page) | ||
827 | { | ||
828 | return 0; | ||
829 | } | ||
830 | |||
821 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, | 831 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, |
822 | unsigned long vmaddr) | 832 | unsigned long vmaddr) |
823 | { | 833 | { |
@@ -894,12 +904,12 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) | |||
894 | struct page *page; | 904 | struct page *page; |
895 | unsigned int bit, mask; | 905 | unsigned int bit, mask; |
896 | 906 | ||
897 | if (mm_has_pgste(mm)) { | 907 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
908 | if (page_table_with_pgste(page)) { | ||
898 | gmap_disconnect_pgtable(mm, table); | 909 | gmap_disconnect_pgtable(mm, table); |
899 | return page_table_free_pgste(table); | 910 | return page_table_free_pgste(table); |
900 | } | 911 | } |
901 | /* Free 1K/2K page table fragment of a 4K page */ | 912 | /* Free 1K/2K page table fragment of a 4K page */ |
902 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | ||
903 | bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); | 913 | bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); |
904 | spin_lock_bh(&mm->context.list_lock); | 914 | spin_lock_bh(&mm->context.list_lock); |
905 | if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) | 915 | if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) |
@@ -937,14 +947,14 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) | |||
937 | unsigned int bit, mask; | 947 | unsigned int bit, mask; |
938 | 948 | ||
939 | mm = tlb->mm; | 949 | mm = tlb->mm; |
940 | if (mm_has_pgste(mm)) { | 950 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
951 | if (page_table_with_pgste(page)) { | ||
941 | gmap_disconnect_pgtable(mm, table); | 952 | gmap_disconnect_pgtable(mm, table); |
942 | table = (unsigned long *) (__pa(table) | FRAG_MASK); | 953 | table = (unsigned long *) (__pa(table) | FRAG_MASK); |
943 | tlb_remove_table(tlb, table); | 954 | tlb_remove_table(tlb, table); |
944 | return; | 955 | return; |
945 | } | 956 | } |
946 | bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); | 957 | bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); |
947 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | ||
948 | spin_lock_bh(&mm->context.list_lock); | 958 | spin_lock_bh(&mm->context.list_lock); |
949 | if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) | 959 | if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) |
950 | list_del(&page->lru); | 960 | list_del(&page->lru); |
@@ -1030,36 +1040,120 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) | |||
1030 | } | 1040 | } |
1031 | 1041 | ||
1032 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 1042 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
1033 | void thp_split_vma(struct vm_area_struct *vma) | 1043 | static inline void thp_split_vma(struct vm_area_struct *vma) |
1034 | { | 1044 | { |
1035 | unsigned long addr; | 1045 | unsigned long addr; |
1036 | struct page *page; | ||
1037 | 1046 | ||
1038 | for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { | 1047 | for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) |
1039 | page = follow_page(vma, addr, FOLL_SPLIT); | 1048 | follow_page(vma, addr, FOLL_SPLIT); |
1040 | } | ||
1041 | } | 1049 | } |
1042 | 1050 | ||
1043 | void thp_split_mm(struct mm_struct *mm) | 1051 | static inline void thp_split_mm(struct mm_struct *mm) |
1044 | { | 1052 | { |
1045 | struct vm_area_struct *vma = mm->mmap; | 1053 | struct vm_area_struct *vma; |
1046 | 1054 | ||
1047 | while (vma != NULL) { | 1055 | for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { |
1048 | thp_split_vma(vma); | 1056 | thp_split_vma(vma); |
1049 | vma->vm_flags &= ~VM_HUGEPAGE; | 1057 | vma->vm_flags &= ~VM_HUGEPAGE; |
1050 | vma->vm_flags |= VM_NOHUGEPAGE; | 1058 | vma->vm_flags |= VM_NOHUGEPAGE; |
1051 | vma = vma->vm_next; | ||
1052 | } | 1059 | } |
1060 | mm->def_flags |= VM_NOHUGEPAGE; | ||
1061 | } | ||
1062 | #else | ||
1063 | static inline void thp_split_mm(struct mm_struct *mm) | ||
1064 | { | ||
1053 | } | 1065 | } |
1054 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 1066 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
1055 | 1067 | ||
1068 | static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb, | ||
1069 | struct mm_struct *mm, pud_t *pud, | ||
1070 | unsigned long addr, unsigned long end) | ||
1071 | { | ||
1072 | unsigned long next, *table, *new; | ||
1073 | struct page *page; | ||
1074 | pmd_t *pmd; | ||
1075 | |||
1076 | pmd = pmd_offset(pud, addr); | ||
1077 | do { | ||
1078 | next = pmd_addr_end(addr, end); | ||
1079 | again: | ||
1080 | if (pmd_none_or_clear_bad(pmd)) | ||
1081 | continue; | ||
1082 | table = (unsigned long *) pmd_deref(*pmd); | ||
1083 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | ||
1084 | if (page_table_with_pgste(page)) | ||
1085 | continue; | ||
1086 | /* Allocate new page table with pgstes */ | ||
1087 | new = page_table_alloc_pgste(mm, addr); | ||
1088 | if (!new) { | ||
1089 | mm->context.has_pgste = 0; | ||
1090 | continue; | ||
1091 | } | ||
1092 | spin_lock(&mm->page_table_lock); | ||
1093 | if (likely((unsigned long *) pmd_deref(*pmd) == table)) { | ||
1094 | /* Nuke pmd entry pointing to the "short" page table */ | ||
1095 | pmdp_flush_lazy(mm, addr, pmd); | ||
1096 | pmd_clear(pmd); | ||
1097 | /* Copy ptes from old table to new table */ | ||
1098 | memcpy(new, table, PAGE_SIZE/2); | ||
1099 | clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); | ||
1100 | /* Establish new table */ | ||
1101 | pmd_populate(mm, pmd, (pte_t *) new); | ||
1102 | /* Free old table with rcu, there might be a walker! */ | ||
1103 | page_table_free_rcu(tlb, table); | ||
1104 | new = NULL; | ||
1105 | } | ||
1106 | spin_unlock(&mm->page_table_lock); | ||
1107 | if (new) { | ||
1108 | page_table_free_pgste(new); | ||
1109 | goto again; | ||
1110 | } | ||
1111 | } while (pmd++, addr = next, addr != end); | ||
1112 | |||
1113 | return addr; | ||
1114 | } | ||
1115 | |||
1116 | static unsigned long page_table_realloc_pud(struct mmu_gather *tlb, | ||
1117 | struct mm_struct *mm, pgd_t *pgd, | ||
1118 | unsigned long addr, unsigned long end) | ||
1119 | { | ||
1120 | unsigned long next; | ||
1121 | pud_t *pud; | ||
1122 | |||
1123 | pud = pud_offset(pgd, addr); | ||
1124 | do { | ||
1125 | next = pud_addr_end(addr, end); | ||
1126 | if (pud_none_or_clear_bad(pud)) | ||
1127 | continue; | ||
1128 | next = page_table_realloc_pmd(tlb, mm, pud, addr, next); | ||
1129 | } while (pud++, addr = next, addr != end); | ||
1130 | |||
1131 | return addr; | ||
1132 | } | ||
1133 | |||
1134 | static void page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm, | ||
1135 | unsigned long addr, unsigned long end) | ||
1136 | { | ||
1137 | unsigned long next; | ||
1138 | pgd_t *pgd; | ||
1139 | |||
1140 | pgd = pgd_offset(mm, addr); | ||
1141 | do { | ||
1142 | next = pgd_addr_end(addr, end); | ||
1143 | if (pgd_none_or_clear_bad(pgd)) | ||
1144 | continue; | ||
1145 | next = page_table_realloc_pud(tlb, mm, pgd, addr, next); | ||
1146 | } while (pgd++, addr = next, addr != end); | ||
1147 | } | ||
1148 | |||
1056 | /* | 1149 | /* |
1057 | * switch on pgstes for its userspace process (for kvm) | 1150 | * switch on pgstes for its userspace process (for kvm) |
1058 | */ | 1151 | */ |
1059 | int s390_enable_sie(void) | 1152 | int s390_enable_sie(void) |
1060 | { | 1153 | { |
1061 | struct task_struct *tsk = current; | 1154 | struct task_struct *tsk = current; |
1062 | struct mm_struct *mm, *old_mm; | 1155 | struct mm_struct *mm = tsk->mm; |
1156 | struct mmu_gather tlb; | ||
1063 | 1157 | ||
1064 | /* Do we have switched amode? If no, we cannot do sie */ | 1158 | /* Do we have switched amode? If no, we cannot do sie */ |
1065 | if (s390_user_mode == HOME_SPACE_MODE) | 1159 | if (s390_user_mode == HOME_SPACE_MODE) |
@@ -1069,57 +1163,16 @@ int s390_enable_sie(void) | |||
1069 | if (mm_has_pgste(tsk->mm)) | 1163 | if (mm_has_pgste(tsk->mm)) |
1070 | return 0; | 1164 | return 0; |
1071 | 1165 | ||
1072 | /* lets check if we are allowed to replace the mm */ | 1166 | down_write(&mm->mmap_sem); |
1073 | task_lock(tsk); | ||
1074 | if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || | ||
1075 | #ifdef CONFIG_AIO | ||
1076 | !hlist_empty(&tsk->mm->ioctx_list) || | ||
1077 | #endif | ||
1078 | tsk->mm != tsk->active_mm) { | ||
1079 | task_unlock(tsk); | ||
1080 | return -EINVAL; | ||
1081 | } | ||
1082 | task_unlock(tsk); | ||
1083 | |||
1084 | /* we copy the mm and let dup_mm create the page tables with_pgstes */ | ||
1085 | tsk->mm->context.alloc_pgste = 1; | ||
1086 | /* make sure that both mms have a correct rss state */ | ||
1087 | sync_mm_rss(tsk->mm); | ||
1088 | mm = dup_mm(tsk); | ||
1089 | tsk->mm->context.alloc_pgste = 0; | ||
1090 | if (!mm) | ||
1091 | return -ENOMEM; | ||
1092 | |||
1093 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
1094 | /* split thp mappings and disable thp for future mappings */ | 1167 | /* split thp mappings and disable thp for future mappings */ |
1095 | thp_split_mm(mm); | 1168 | thp_split_mm(mm); |
1096 | mm->def_flags |= VM_NOHUGEPAGE; | 1169 | /* Reallocate the page tables with pgstes */ |
1097 | #endif | 1170 | mm->context.has_pgste = 1; |
1098 | 1171 | tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE); | |
1099 | /* Now lets check again if something happened */ | 1172 | page_table_realloc(&tlb, mm, 0, TASK_SIZE); |
1100 | task_lock(tsk); | 1173 | tlb_finish_mmu(&tlb, 0, TASK_SIZE); |
1101 | if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || | 1174 | up_write(&mm->mmap_sem); |
1102 | #ifdef CONFIG_AIO | 1175 | return mm->context.has_pgste ? 0 : -ENOMEM; |
1103 | !hlist_empty(&tsk->mm->ioctx_list) || | ||
1104 | #endif | ||
1105 | tsk->mm != tsk->active_mm) { | ||
1106 | mmput(mm); | ||
1107 | task_unlock(tsk); | ||
1108 | return -EINVAL; | ||
1109 | } | ||
1110 | |||
1111 | /* ok, we are alone. No ptrace, no threads, etc. */ | ||
1112 | old_mm = tsk->mm; | ||
1113 | tsk->mm = tsk->active_mm = mm; | ||
1114 | preempt_disable(); | ||
1115 | update_mm(mm, tsk); | ||
1116 | atomic_inc(&mm->context.attach_count); | ||
1117 | atomic_dec(&old_mm->context.attach_count); | ||
1118 | cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); | ||
1119 | preempt_enable(); | ||
1120 | task_unlock(tsk); | ||
1121 | mmput(old_mm); | ||
1122 | return 0; | ||
1123 | } | 1176 | } |
1124 | EXPORT_SYMBOL_GPL(s390_enable_sie); | 1177 | EXPORT_SYMBOL_GPL(s390_enable_sie); |
1125 | 1178 | ||
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f87f7fcefa0a..c76ff74a98f2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -286,6 +286,7 @@ struct kvm_mmu { | |||
286 | u64 *pae_root; | 286 | u64 *pae_root; |
287 | u64 *lm_root; | 287 | u64 *lm_root; |
288 | u64 rsvd_bits_mask[2][4]; | 288 | u64 rsvd_bits_mask[2][4]; |
289 | u64 bad_mt_xwr; | ||
289 | 290 | ||
290 | /* | 291 | /* |
291 | * Bitmap: bit set = last pte in walk | 292 | * Bitmap: bit set = last pte in walk |
@@ -323,6 +324,7 @@ struct kvm_pmu { | |||
323 | u64 global_ovf_ctrl; | 324 | u64 global_ovf_ctrl; |
324 | u64 counter_bitmask[2]; | 325 | u64 counter_bitmask[2]; |
325 | u64 global_ctrl_mask; | 326 | u64 global_ctrl_mask; |
327 | u64 reserved_bits; | ||
326 | u8 version; | 328 | u8 version; |
327 | struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; | 329 | struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; |
328 | struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED]; | 330 | struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED]; |
@@ -511,6 +513,14 @@ struct kvm_vcpu_arch { | |||
511 | * instruction. | 513 | * instruction. |
512 | */ | 514 | */ |
513 | bool write_fault_to_shadow_pgtable; | 515 | bool write_fault_to_shadow_pgtable; |
516 | |||
517 | /* set at EPT violation at this point */ | ||
518 | unsigned long exit_qualification; | ||
519 | |||
520 | /* pv related host specific info */ | ||
521 | struct { | ||
522 | bool pv_unhalted; | ||
523 | } pv; | ||
514 | }; | 524 | }; |
515 | 525 | ||
516 | struct kvm_lpage_info { | 526 | struct kvm_lpage_info { |
@@ -802,8 +812,8 @@ extern u32 kvm_min_guest_tsc_khz; | |||
802 | extern u32 kvm_max_guest_tsc_khz; | 812 | extern u32 kvm_max_guest_tsc_khz; |
803 | 813 | ||
804 | enum emulation_result { | 814 | enum emulation_result { |
805 | EMULATE_DONE, /* no further processing */ | 815 | EMULATE_DONE, /* no further processing */ |
806 | EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ | 816 | EMULATE_USER_EXIT, /* kvm_run ready for userspace exit */ |
807 | EMULATE_FAIL, /* can't emulate this instruction */ | 817 | EMULATE_FAIL, /* can't emulate this instruction */ |
808 | }; | 818 | }; |
809 | 819 | ||
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 109a9dd5d454..be8269b00e2a 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h | |||
@@ -93,7 +93,6 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, | |||
93 | 93 | ||
94 | struct pvclock_vsyscall_time_info { | 94 | struct pvclock_vsyscall_time_info { |
95 | struct pvclock_vcpu_time_info pvti; | 95 | struct pvclock_vcpu_time_info pvti; |
96 | u32 migrate_count; | ||
97 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); | 96 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); |
98 | 97 | ||
99 | #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) | 98 | #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index f3e01a2cbaa1..966502d4682e 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -387,6 +387,7 @@ enum vmcs_field { | |||
387 | #define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0 | 387 | #define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0 |
388 | #define VMX_EPT_EXTENT_CONTEXT 1 | 388 | #define VMX_EPT_EXTENT_CONTEXT 1 |
389 | #define VMX_EPT_EXTENT_GLOBAL 2 | 389 | #define VMX_EPT_EXTENT_GLOBAL 2 |
390 | #define VMX_EPT_EXTENT_SHIFT 24 | ||
390 | 391 | ||
391 | #define VMX_EPT_EXECUTE_ONLY_BIT (1ull) | 392 | #define VMX_EPT_EXECUTE_ONLY_BIT (1ull) |
392 | #define VMX_EPT_PAGE_WALK_4_BIT (1ull << 6) | 393 | #define VMX_EPT_PAGE_WALK_4_BIT (1ull << 6) |
@@ -394,6 +395,7 @@ enum vmcs_field { | |||
394 | #define VMX_EPTP_WB_BIT (1ull << 14) | 395 | #define VMX_EPTP_WB_BIT (1ull << 14) |
395 | #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) | 396 | #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) |
396 | #define VMX_EPT_1GB_PAGE_BIT (1ull << 17) | 397 | #define VMX_EPT_1GB_PAGE_BIT (1ull << 17) |
398 | #define VMX_EPT_INVEPT_BIT (1ull << 20) | ||
397 | #define VMX_EPT_AD_BIT (1ull << 21) | 399 | #define VMX_EPT_AD_BIT (1ull << 21) |
398 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) | 400 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) |
399 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) | 401 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) |
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index d651082c7cf7..0e79420376eb 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h | |||
@@ -65,6 +65,7 @@ | |||
65 | #define EXIT_REASON_EOI_INDUCED 45 | 65 | #define EXIT_REASON_EOI_INDUCED 45 |
66 | #define EXIT_REASON_EPT_VIOLATION 48 | 66 | #define EXIT_REASON_EPT_VIOLATION 48 |
67 | #define EXIT_REASON_EPT_MISCONFIG 49 | 67 | #define EXIT_REASON_EPT_MISCONFIG 49 |
68 | #define EXIT_REASON_INVEPT 50 | ||
68 | #define EXIT_REASON_PREEMPTION_TIMER 52 | 69 | #define EXIT_REASON_PREEMPTION_TIMER 52 |
69 | #define EXIT_REASON_WBINVD 54 | 70 | #define EXIT_REASON_WBINVD 54 |
70 | #define EXIT_REASON_XSETBV 55 | 71 | #define EXIT_REASON_XSETBV 55 |
@@ -106,12 +107,13 @@ | |||
106 | { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ | 107 | { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ |
107 | { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ | 108 | { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ |
108 | { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ | 109 | { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ |
110 | { EXIT_REASON_INVEPT, "INVEPT" }, \ | ||
111 | { EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" }, \ | ||
109 | { EXIT_REASON_WBINVD, "WBINVD" }, \ | 112 | { EXIT_REASON_WBINVD, "WBINVD" }, \ |
110 | { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \ | 113 | { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \ |
111 | { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ | 114 | { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ |
112 | { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ | 115 | { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ |
113 | { EXIT_REASON_INVD, "INVD" }, \ | 116 | { EXIT_REASON_INVD, "INVD" }, \ |
114 | { EXIT_REASON_INVPCID, "INVPCID" }, \ | 117 | { EXIT_REASON_INVPCID, "INVPCID" } |
115 | { EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" } | ||
116 | 118 | ||
117 | #endif /* _UAPIVMX_H */ | 119 | #endif /* _UAPIVMX_H */ |
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 2cb9470ea85b..a16bae3f83b3 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c | |||
@@ -128,46 +128,7 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, | |||
128 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); | 128 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); |
129 | } | 129 | } |
130 | 130 | ||
131 | static struct pvclock_vsyscall_time_info *pvclock_vdso_info; | ||
132 | |||
133 | static struct pvclock_vsyscall_time_info * | ||
134 | pvclock_get_vsyscall_user_time_info(int cpu) | ||
135 | { | ||
136 | if (!pvclock_vdso_info) { | ||
137 | BUG(); | ||
138 | return NULL; | ||
139 | } | ||
140 | |||
141 | return &pvclock_vdso_info[cpu]; | ||
142 | } | ||
143 | |||
144 | struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu) | ||
145 | { | ||
146 | return &pvclock_get_vsyscall_user_time_info(cpu)->pvti; | ||
147 | } | ||
148 | |||
149 | #ifdef CONFIG_X86_64 | 131 | #ifdef CONFIG_X86_64 |
150 | static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l, | ||
151 | void *v) | ||
152 | { | ||
153 | struct task_migration_notifier *mn = v; | ||
154 | struct pvclock_vsyscall_time_info *pvti; | ||
155 | |||
156 | pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu); | ||
157 | |||
158 | /* this is NULL when pvclock vsyscall is not initialized */ | ||
159 | if (unlikely(pvti == NULL)) | ||
160 | return NOTIFY_DONE; | ||
161 | |||
162 | pvti->migrate_count++; | ||
163 | |||
164 | return NOTIFY_DONE; | ||
165 | } | ||
166 | |||
167 | static struct notifier_block pvclock_migrate = { | ||
168 | .notifier_call = pvclock_task_migrate, | ||
169 | }; | ||
170 | |||
171 | /* | 132 | /* |
172 | * Initialize the generic pvclock vsyscall state. This will allocate | 133 | * Initialize the generic pvclock vsyscall state. This will allocate |
173 | * a/some page(s) for the per-vcpu pvclock information, set up a | 134 | * a/some page(s) for the per-vcpu pvclock information, set up a |
@@ -181,17 +142,12 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i, | |||
181 | 142 | ||
182 | WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); | 143 | WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); |
183 | 144 | ||
184 | pvclock_vdso_info = i; | ||
185 | |||
186 | for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { | 145 | for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { |
187 | __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, | 146 | __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, |
188 | __pa(i) + (idx*PAGE_SIZE), | 147 | __pa(i) + (idx*PAGE_SIZE), |
189 | PAGE_KERNEL_VVAR); | 148 | PAGE_KERNEL_VVAR); |
190 | } | 149 | } |
191 | 150 | ||
192 | |||
193 | register_task_migration_notifier(&pvclock_migrate); | ||
194 | |||
195 | return 0; | 151 | return 0; |
196 | } | 152 | } |
197 | #endif | 153 | #endif |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index a20ecb5b6cbf..b110fe6c03d4 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -413,7 +413,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
413 | (1 << KVM_FEATURE_CLOCKSOURCE2) | | 413 | (1 << KVM_FEATURE_CLOCKSOURCE2) | |
414 | (1 << KVM_FEATURE_ASYNC_PF) | | 414 | (1 << KVM_FEATURE_ASYNC_PF) | |
415 | (1 << KVM_FEATURE_PV_EOI) | | 415 | (1 << KVM_FEATURE_PV_EOI) | |
416 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); | 416 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | |
417 | (1 << KVM_FEATURE_PV_UNHALT); | ||
417 | 418 | ||
418 | if (sched_info_on()) | 419 | if (sched_info_on()) |
419 | entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); | 420 | entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index afc11245827c..5439117d5c4c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -79,16 +79,6 @@ static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) | |||
79 | *((u32 *) (apic->regs + reg_off)) = val; | 79 | *((u32 *) (apic->regs + reg_off)) = val; |
80 | } | 80 | } |
81 | 81 | ||
82 | static inline int apic_test_and_set_vector(int vec, void *bitmap) | ||
83 | { | ||
84 | return test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | ||
85 | } | ||
86 | |||
87 | static inline int apic_test_and_clear_vector(int vec, void *bitmap) | ||
88 | { | ||
89 | return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | ||
90 | } | ||
91 | |||
92 | static inline int apic_test_vector(int vec, void *bitmap) | 82 | static inline int apic_test_vector(int vec, void *bitmap) |
93 | { | 83 | { |
94 | return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | 84 | return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); |
@@ -331,10 +321,10 @@ void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) | |||
331 | } | 321 | } |
332 | EXPORT_SYMBOL_GPL(kvm_apic_update_irr); | 322 | EXPORT_SYMBOL_GPL(kvm_apic_update_irr); |
333 | 323 | ||
334 | static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) | 324 | static inline void apic_set_irr(int vec, struct kvm_lapic *apic) |
335 | { | 325 | { |
336 | apic->irr_pending = true; | 326 | apic->irr_pending = true; |
337 | return apic_test_and_set_vector(vec, apic->regs + APIC_IRR); | 327 | apic_set_vector(vec, apic->regs + APIC_IRR); |
338 | } | 328 | } |
339 | 329 | ||
340 | static inline int apic_search_irr(struct kvm_lapic *apic) | 330 | static inline int apic_search_irr(struct kvm_lapic *apic) |
@@ -681,32 +671,28 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
681 | if (unlikely(!apic_enabled(apic))) | 671 | if (unlikely(!apic_enabled(apic))) |
682 | break; | 672 | break; |
683 | 673 | ||
674 | result = 1; | ||
675 | |||
684 | if (dest_map) | 676 | if (dest_map) |
685 | __set_bit(vcpu->vcpu_id, dest_map); | 677 | __set_bit(vcpu->vcpu_id, dest_map); |
686 | 678 | ||
687 | if (kvm_x86_ops->deliver_posted_interrupt) { | 679 | if (kvm_x86_ops->deliver_posted_interrupt) |
688 | result = 1; | ||
689 | kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); | 680 | kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); |
690 | } else { | 681 | else { |
691 | result = !apic_test_and_set_irr(vector, apic); | 682 | apic_set_irr(vector, apic); |
692 | |||
693 | if (!result) { | ||
694 | if (trig_mode) | ||
695 | apic_debug("level trig mode repeatedly " | ||
696 | "for vector %d", vector); | ||
697 | goto out; | ||
698 | } | ||
699 | 683 | ||
700 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 684 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
701 | kvm_vcpu_kick(vcpu); | 685 | kvm_vcpu_kick(vcpu); |
702 | } | 686 | } |
703 | out: | ||
704 | trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, | 687 | trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, |
705 | trig_mode, vector, !result); | 688 | trig_mode, vector, false); |
706 | break; | 689 | break; |
707 | 690 | ||
708 | case APIC_DM_REMRD: | 691 | case APIC_DM_REMRD: |
709 | apic_debug("Ignoring delivery mode 3\n"); | 692 | result = 1; |
693 | vcpu->arch.pv.pv_unhalted = 1; | ||
694 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
695 | kvm_vcpu_kick(vcpu); | ||
710 | break; | 696 | break; |
711 | 697 | ||
712 | case APIC_DM_SMI: | 698 | case APIC_DM_SMI: |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 9e9285ae9b94..6e2d2c8f230b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -132,8 +132,8 @@ module_param(dbg, bool, 0644); | |||
132 | (PAGE_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ | 132 | (PAGE_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ |
133 | * PT32_LEVEL_BITS))) - 1)) | 133 | * PT32_LEVEL_BITS))) - 1)) |
134 | 134 | ||
135 | #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ | 135 | #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \ |
136 | | PT64_NX_MASK) | 136 | | shadow_x_mask | shadow_nx_mask) |
137 | 137 | ||
138 | #define ACC_EXEC_MASK 1 | 138 | #define ACC_EXEC_MASK 1 |
139 | #define ACC_WRITE_MASK PT_WRITABLE_MASK | 139 | #define ACC_WRITE_MASK PT_WRITABLE_MASK |
@@ -331,11 +331,6 @@ static int is_large_pte(u64 pte) | |||
331 | return pte & PT_PAGE_SIZE_MASK; | 331 | return pte & PT_PAGE_SIZE_MASK; |
332 | } | 332 | } |
333 | 333 | ||
334 | static int is_dirty_gpte(unsigned long pte) | ||
335 | { | ||
336 | return pte & PT_DIRTY_MASK; | ||
337 | } | ||
338 | |||
339 | static int is_rmap_spte(u64 pte) | 334 | static int is_rmap_spte(u64 pte) |
340 | { | 335 | { |
341 | return is_shadow_present_pte(pte); | 336 | return is_shadow_present_pte(pte); |
@@ -2052,12 +2047,18 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator) | |||
2052 | return __shadow_walk_next(iterator, *iterator->sptep); | 2047 | return __shadow_walk_next(iterator, *iterator->sptep); |
2053 | } | 2048 | } |
2054 | 2049 | ||
2055 | static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp) | 2050 | static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp, bool accessed) |
2056 | { | 2051 | { |
2057 | u64 spte; | 2052 | u64 spte; |
2058 | 2053 | ||
2054 | BUILD_BUG_ON(VMX_EPT_READABLE_MASK != PT_PRESENT_MASK || | ||
2055 | VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK); | ||
2056 | |||
2059 | spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | | 2057 | spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | |
2060 | shadow_user_mask | shadow_x_mask | shadow_accessed_mask; | 2058 | shadow_user_mask | shadow_x_mask; |
2059 | |||
2060 | if (accessed) | ||
2061 | spte |= shadow_accessed_mask; | ||
2061 | 2062 | ||
2062 | mmu_spte_set(sptep, spte); | 2063 | mmu_spte_set(sptep, spte); |
2063 | } | 2064 | } |
@@ -2574,14 +2575,6 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | |||
2574 | mmu_free_roots(vcpu); | 2575 | mmu_free_roots(vcpu); |
2575 | } | 2576 | } |
2576 | 2577 | ||
2577 | static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level) | ||
2578 | { | ||
2579 | int bit7; | ||
2580 | |||
2581 | bit7 = (gpte >> 7) & 1; | ||
2582 | return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0; | ||
2583 | } | ||
2584 | |||
2585 | static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, | 2578 | static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, |
2586 | bool no_dirty_log) | 2579 | bool no_dirty_log) |
2587 | { | 2580 | { |
@@ -2594,26 +2587,6 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
2594 | return gfn_to_pfn_memslot_atomic(slot, gfn); | 2587 | return gfn_to_pfn_memslot_atomic(slot, gfn); |
2595 | } | 2588 | } |
2596 | 2589 | ||
2597 | static bool prefetch_invalid_gpte(struct kvm_vcpu *vcpu, | ||
2598 | struct kvm_mmu_page *sp, u64 *spte, | ||
2599 | u64 gpte) | ||
2600 | { | ||
2601 | if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)) | ||
2602 | goto no_present; | ||
2603 | |||
2604 | if (!is_present_gpte(gpte)) | ||
2605 | goto no_present; | ||
2606 | |||
2607 | if (!(gpte & PT_ACCESSED_MASK)) | ||
2608 | goto no_present; | ||
2609 | |||
2610 | return false; | ||
2611 | |||
2612 | no_present: | ||
2613 | drop_spte(vcpu->kvm, spte); | ||
2614 | return true; | ||
2615 | } | ||
2616 | |||
2617 | static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, | 2590 | static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, |
2618 | struct kvm_mmu_page *sp, | 2591 | struct kvm_mmu_page *sp, |
2619 | u64 *start, u64 *end) | 2592 | u64 *start, u64 *end) |
@@ -2710,7 +2683,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
2710 | iterator.level - 1, | 2683 | iterator.level - 1, |
2711 | 1, ACC_ALL, iterator.sptep); | 2684 | 1, ACC_ALL, iterator.sptep); |
2712 | 2685 | ||
2713 | link_shadow_page(iterator.sptep, sp); | 2686 | link_shadow_page(iterator.sptep, sp, true); |
2714 | } | 2687 | } |
2715 | } | 2688 | } |
2716 | return emulate; | 2689 | return emulate; |
@@ -2808,7 +2781,7 @@ exit: | |||
2808 | return ret; | 2781 | return ret; |
2809 | } | 2782 | } |
2810 | 2783 | ||
2811 | static bool page_fault_can_be_fast(struct kvm_vcpu *vcpu, u32 error_code) | 2784 | static bool page_fault_can_be_fast(u32 error_code) |
2812 | { | 2785 | { |
2813 | /* | 2786 | /* |
2814 | * Do not fix the mmio spte with invalid generation number which | 2787 | * Do not fix the mmio spte with invalid generation number which |
@@ -2861,7 +2834,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, | |||
2861 | bool ret = false; | 2834 | bool ret = false; |
2862 | u64 spte = 0ull; | 2835 | u64 spte = 0ull; |
2863 | 2836 | ||
2864 | if (!page_fault_can_be_fast(vcpu, error_code)) | 2837 | if (!page_fault_can_be_fast(error_code)) |
2865 | return false; | 2838 | return false; |
2866 | 2839 | ||
2867 | walk_shadow_page_lockless_begin(vcpu); | 2840 | walk_shadow_page_lockless_begin(vcpu); |
@@ -3209,6 +3182,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
3209 | mmu_sync_roots(vcpu); | 3182 | mmu_sync_roots(vcpu); |
3210 | spin_unlock(&vcpu->kvm->mmu_lock); | 3183 | spin_unlock(&vcpu->kvm->mmu_lock); |
3211 | } | 3184 | } |
3185 | EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots); | ||
3212 | 3186 | ||
3213 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, | 3187 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, |
3214 | u32 access, struct x86_exception *exception) | 3188 | u32 access, struct x86_exception *exception) |
@@ -3478,6 +3452,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) | |||
3478 | ++vcpu->stat.tlb_flush; | 3452 | ++vcpu->stat.tlb_flush; |
3479 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); | 3453 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); |
3480 | } | 3454 | } |
3455 | EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb); | ||
3481 | 3456 | ||
3482 | static void paging_new_cr3(struct kvm_vcpu *vcpu) | 3457 | static void paging_new_cr3(struct kvm_vcpu *vcpu) |
3483 | { | 3458 | { |
@@ -3501,18 +3476,6 @@ static void paging_free(struct kvm_vcpu *vcpu) | |||
3501 | nonpaging_free(vcpu); | 3476 | nonpaging_free(vcpu); |
3502 | } | 3477 | } |
3503 | 3478 | ||
3504 | static inline void protect_clean_gpte(unsigned *access, unsigned gpte) | ||
3505 | { | ||
3506 | unsigned mask; | ||
3507 | |||
3508 | BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK); | ||
3509 | |||
3510 | mask = (unsigned)~ACC_WRITE_MASK; | ||
3511 | /* Allow write access to dirty gptes */ | ||
3512 | mask |= (gpte >> (PT_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) & PT_WRITABLE_MASK; | ||
3513 | *access &= mask; | ||
3514 | } | ||
3515 | |||
3516 | static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, | 3479 | static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, |
3517 | unsigned access, int *nr_present) | 3480 | unsigned access, int *nr_present) |
3518 | { | 3481 | { |
@@ -3530,16 +3493,6 @@ static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, | |||
3530 | return false; | 3493 | return false; |
3531 | } | 3494 | } |
3532 | 3495 | ||
3533 | static inline unsigned gpte_access(struct kvm_vcpu *vcpu, u64 gpte) | ||
3534 | { | ||
3535 | unsigned access; | ||
3536 | |||
3537 | access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; | ||
3538 | access &= ~(gpte >> PT64_NX_SHIFT); | ||
3539 | |||
3540 | return access; | ||
3541 | } | ||
3542 | |||
3543 | static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte) | 3496 | static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte) |
3544 | { | 3497 | { |
3545 | unsigned index; | 3498 | unsigned index; |
@@ -3549,6 +3502,11 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gp | |||
3549 | return mmu->last_pte_bitmap & (1 << index); | 3502 | return mmu->last_pte_bitmap & (1 << index); |
3550 | } | 3503 | } |
3551 | 3504 | ||
3505 | #define PTTYPE_EPT 18 /* arbitrary */ | ||
3506 | #define PTTYPE PTTYPE_EPT | ||
3507 | #include "paging_tmpl.h" | ||
3508 | #undef PTTYPE | ||
3509 | |||
3552 | #define PTTYPE 64 | 3510 | #define PTTYPE 64 |
3553 | #include "paging_tmpl.h" | 3511 | #include "paging_tmpl.h" |
3554 | #undef PTTYPE | 3512 | #undef PTTYPE |
@@ -3563,6 +3521,8 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, | |||
3563 | int maxphyaddr = cpuid_maxphyaddr(vcpu); | 3521 | int maxphyaddr = cpuid_maxphyaddr(vcpu); |
3564 | u64 exb_bit_rsvd = 0; | 3522 | u64 exb_bit_rsvd = 0; |
3565 | 3523 | ||
3524 | context->bad_mt_xwr = 0; | ||
3525 | |||
3566 | if (!context->nx) | 3526 | if (!context->nx) |
3567 | exb_bit_rsvd = rsvd_bits(63, 63); | 3527 | exb_bit_rsvd = rsvd_bits(63, 63); |
3568 | switch (context->root_level) { | 3528 | switch (context->root_level) { |
@@ -3618,7 +3578,40 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, | |||
3618 | } | 3578 | } |
3619 | } | 3579 | } |
3620 | 3580 | ||
3621 | static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) | 3581 | static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, |
3582 | struct kvm_mmu *context, bool execonly) | ||
3583 | { | ||
3584 | int maxphyaddr = cpuid_maxphyaddr(vcpu); | ||
3585 | int pte; | ||
3586 | |||
3587 | context->rsvd_bits_mask[0][3] = | ||
3588 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); | ||
3589 | context->rsvd_bits_mask[0][2] = | ||
3590 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6); | ||
3591 | context->rsvd_bits_mask[0][1] = | ||
3592 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6); | ||
3593 | context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51); | ||
3594 | |||
3595 | /* large page */ | ||
3596 | context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3]; | ||
3597 | context->rsvd_bits_mask[1][2] = | ||
3598 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29); | ||
3599 | context->rsvd_bits_mask[1][1] = | ||
3600 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20); | ||
3601 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; | ||
3602 | |||
3603 | for (pte = 0; pte < 64; pte++) { | ||
3604 | int rwx_bits = pte & 7; | ||
3605 | int mt = pte >> 3; | ||
3606 | if (mt == 0x2 || mt == 0x3 || mt == 0x7 || | ||
3607 | rwx_bits == 0x2 || rwx_bits == 0x6 || | ||
3608 | (rwx_bits == 0x4 && !execonly)) | ||
3609 | context->bad_mt_xwr |= (1ull << pte); | ||
3610 | } | ||
3611 | } | ||
3612 | |||
3613 | static void update_permission_bitmask(struct kvm_vcpu *vcpu, | ||
3614 | struct kvm_mmu *mmu, bool ept) | ||
3622 | { | 3615 | { |
3623 | unsigned bit, byte, pfec; | 3616 | unsigned bit, byte, pfec; |
3624 | u8 map; | 3617 | u8 map; |
@@ -3636,12 +3629,16 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu | |||
3636 | w = bit & ACC_WRITE_MASK; | 3629 | w = bit & ACC_WRITE_MASK; |
3637 | u = bit & ACC_USER_MASK; | 3630 | u = bit & ACC_USER_MASK; |
3638 | 3631 | ||
3639 | /* Not really needed: !nx will cause pte.nx to fault */ | 3632 | if (!ept) { |
3640 | x |= !mmu->nx; | 3633 | /* Not really needed: !nx will cause pte.nx to fault */ |
3641 | /* Allow supervisor writes if !cr0.wp */ | 3634 | x |= !mmu->nx; |
3642 | w |= !is_write_protection(vcpu) && !uf; | 3635 | /* Allow supervisor writes if !cr0.wp */ |
3643 | /* Disallow supervisor fetches of user code if cr4.smep */ | 3636 | w |= !is_write_protection(vcpu) && !uf; |
3644 | x &= !(smep && u && !uf); | 3637 | /* Disallow supervisor fetches of user code if cr4.smep */ |
3638 | x &= !(smep && u && !uf); | ||
3639 | } else | ||
3640 | /* Not really needed: no U/S accesses on ept */ | ||
3641 | u = 1; | ||
3645 | 3642 | ||
3646 | fault = (ff && !x) || (uf && !u) || (wf && !w); | 3643 | fault = (ff && !x) || (uf && !u) || (wf && !w); |
3647 | map |= fault << bit; | 3644 | map |= fault << bit; |
@@ -3676,7 +3673,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, | |||
3676 | context->root_level = level; | 3673 | context->root_level = level; |
3677 | 3674 | ||
3678 | reset_rsvds_bits_mask(vcpu, context); | 3675 | reset_rsvds_bits_mask(vcpu, context); |
3679 | update_permission_bitmask(vcpu, context); | 3676 | update_permission_bitmask(vcpu, context, false); |
3680 | update_last_pte_bitmap(vcpu, context); | 3677 | update_last_pte_bitmap(vcpu, context); |
3681 | 3678 | ||
3682 | ASSERT(is_pae(vcpu)); | 3679 | ASSERT(is_pae(vcpu)); |
@@ -3706,7 +3703,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu, | |||
3706 | context->root_level = PT32_ROOT_LEVEL; | 3703 | context->root_level = PT32_ROOT_LEVEL; |
3707 | 3704 | ||
3708 | reset_rsvds_bits_mask(vcpu, context); | 3705 | reset_rsvds_bits_mask(vcpu, context); |
3709 | update_permission_bitmask(vcpu, context); | 3706 | update_permission_bitmask(vcpu, context, false); |
3710 | update_last_pte_bitmap(vcpu, context); | 3707 | update_last_pte_bitmap(vcpu, context); |
3711 | 3708 | ||
3712 | context->new_cr3 = paging_new_cr3; | 3709 | context->new_cr3 = paging_new_cr3; |
@@ -3768,7 +3765,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
3768 | context->gva_to_gpa = paging32_gva_to_gpa; | 3765 | context->gva_to_gpa = paging32_gva_to_gpa; |
3769 | } | 3766 | } |
3770 | 3767 | ||
3771 | update_permission_bitmask(vcpu, context); | 3768 | update_permission_bitmask(vcpu, context, false); |
3772 | update_last_pte_bitmap(vcpu, context); | 3769 | update_last_pte_bitmap(vcpu, context); |
3773 | 3770 | ||
3774 | return 0; | 3771 | return 0; |
@@ -3800,6 +3797,33 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) | |||
3800 | } | 3797 | } |
3801 | EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); | 3798 | EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); |
3802 | 3799 | ||
3800 | int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, | ||
3801 | bool execonly) | ||
3802 | { | ||
3803 | ASSERT(vcpu); | ||
3804 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); | ||
3805 | |||
3806 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); | ||
3807 | |||
3808 | context->nx = true; | ||
3809 | context->new_cr3 = paging_new_cr3; | ||
3810 | context->page_fault = ept_page_fault; | ||
3811 | context->gva_to_gpa = ept_gva_to_gpa; | ||
3812 | context->sync_page = ept_sync_page; | ||
3813 | context->invlpg = ept_invlpg; | ||
3814 | context->update_pte = ept_update_pte; | ||
3815 | context->free = paging_free; | ||
3816 | context->root_level = context->shadow_root_level; | ||
3817 | context->root_hpa = INVALID_PAGE; | ||
3818 | context->direct_map = false; | ||
3819 | |||
3820 | update_permission_bitmask(vcpu, context, true); | ||
3821 | reset_rsvds_bits_mask_ept(vcpu, context, execonly); | ||
3822 | |||
3823 | return 0; | ||
3824 | } | ||
3825 | EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu); | ||
3826 | |||
3803 | static int init_kvm_softmmu(struct kvm_vcpu *vcpu) | 3827 | static int init_kvm_softmmu(struct kvm_vcpu *vcpu) |
3804 | { | 3828 | { |
3805 | int r = kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu); | 3829 | int r = kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu); |
@@ -3847,7 +3871,7 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) | |||
3847 | g_context->gva_to_gpa = paging32_gva_to_gpa_nested; | 3871 | g_context->gva_to_gpa = paging32_gva_to_gpa_nested; |
3848 | } | 3872 | } |
3849 | 3873 | ||
3850 | update_permission_bitmask(vcpu, g_context); | 3874 | update_permission_bitmask(vcpu, g_context, false); |
3851 | update_last_pte_bitmap(vcpu, g_context); | 3875 | update_last_pte_bitmap(vcpu, g_context); |
3852 | 3876 | ||
3853 | return 0; | 3877 | return 0; |
@@ -3923,8 +3947,8 @@ static bool need_remote_flush(u64 old, u64 new) | |||
3923 | return true; | 3947 | return true; |
3924 | if ((old ^ new) & PT64_BASE_ADDR_MASK) | 3948 | if ((old ^ new) & PT64_BASE_ADDR_MASK) |
3925 | return true; | 3949 | return true; |
3926 | old ^= PT64_NX_MASK; | 3950 | old ^= shadow_nx_mask; |
3927 | new ^= PT64_NX_MASK; | 3951 | new ^= shadow_nx_mask; |
3928 | return (old & ~new & PT64_PERM_MASK) != 0; | 3952 | return (old & ~new & PT64_PERM_MASK) != 0; |
3929 | } | 3953 | } |
3930 | 3954 | ||
@@ -4182,7 +4206,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, | |||
4182 | switch (er) { | 4206 | switch (er) { |
4183 | case EMULATE_DONE: | 4207 | case EMULATE_DONE: |
4184 | return 1; | 4208 | return 1; |
4185 | case EMULATE_DO_MMIO: | 4209 | case EMULATE_USER_EXIT: |
4186 | ++vcpu->stat.mmio_exits; | 4210 | ++vcpu->stat.mmio_exits; |
4187 | /* fall through */ | 4211 | /* fall through */ |
4188 | case EMULATE_FAIL: | 4212 | case EMULATE_FAIL: |
@@ -4390,11 +4414,8 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm) | |||
4390 | /* | 4414 | /* |
4391 | * The very rare case: if the generation-number is round, | 4415 | * The very rare case: if the generation-number is round, |
4392 | * zap all shadow pages. | 4416 | * zap all shadow pages. |
4393 | * | ||
4394 | * The max value is MMIO_MAX_GEN - 1 since it is not called | ||
4395 | * when mark memslot invalid. | ||
4396 | */ | 4417 | */ |
4397 | if (unlikely(kvm_current_mmio_generation(kvm) >= (MMIO_MAX_GEN - 1))) { | 4418 | if (unlikely(kvm_current_mmio_generation(kvm) >= MMIO_MAX_GEN)) { |
4398 | printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n"); | 4419 | printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n"); |
4399 | kvm_mmu_invalidate_zap_all_pages(kvm); | 4420 | kvm_mmu_invalidate_zap_all_pages(kvm); |
4400 | } | 4421 | } |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 5b59c573aba7..77e044a0f5f7 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -71,6 +71,8 @@ enum { | |||
71 | 71 | ||
72 | int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); | 72 | int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); |
73 | int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); | 73 | int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); |
74 | int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, | ||
75 | bool execonly); | ||
74 | 76 | ||
75 | static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) | 77 | static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) |
76 | { | 78 | { |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 7769699d48a8..043330159179 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -23,6 +23,13 @@ | |||
23 | * so the code in this file is compiled twice, once per pte size. | 23 | * so the code in this file is compiled twice, once per pte size. |
24 | */ | 24 | */ |
25 | 25 | ||
26 | /* | ||
27 | * This is used to catch non optimized PT_GUEST_(DIRTY|ACCESS)_SHIFT macro | ||
28 | * uses for EPT without A/D paging type. | ||
29 | */ | ||
30 | extern u64 __pure __using_nonexistent_pte_bit(void) | ||
31 | __compiletime_error("wrong use of PT_GUEST_(DIRTY|ACCESS)_SHIFT"); | ||
32 | |||
26 | #if PTTYPE == 64 | 33 | #if PTTYPE == 64 |
27 | #define pt_element_t u64 | 34 | #define pt_element_t u64 |
28 | #define guest_walker guest_walker64 | 35 | #define guest_walker guest_walker64 |
@@ -32,6 +39,10 @@ | |||
32 | #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) | 39 | #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) |
33 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) | 40 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) |
34 | #define PT_LEVEL_BITS PT64_LEVEL_BITS | 41 | #define PT_LEVEL_BITS PT64_LEVEL_BITS |
42 | #define PT_GUEST_ACCESSED_MASK PT_ACCESSED_MASK | ||
43 | #define PT_GUEST_DIRTY_MASK PT_DIRTY_MASK | ||
44 | #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT | ||
45 | #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT | ||
35 | #ifdef CONFIG_X86_64 | 46 | #ifdef CONFIG_X86_64 |
36 | #define PT_MAX_FULL_LEVELS 4 | 47 | #define PT_MAX_FULL_LEVELS 4 |
37 | #define CMPXCHG cmpxchg | 48 | #define CMPXCHG cmpxchg |
@@ -49,7 +60,26 @@ | |||
49 | #define PT_INDEX(addr, level) PT32_INDEX(addr, level) | 60 | #define PT_INDEX(addr, level) PT32_INDEX(addr, level) |
50 | #define PT_LEVEL_BITS PT32_LEVEL_BITS | 61 | #define PT_LEVEL_BITS PT32_LEVEL_BITS |
51 | #define PT_MAX_FULL_LEVELS 2 | 62 | #define PT_MAX_FULL_LEVELS 2 |
63 | #define PT_GUEST_ACCESSED_MASK PT_ACCESSED_MASK | ||
64 | #define PT_GUEST_DIRTY_MASK PT_DIRTY_MASK | ||
65 | #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT | ||
66 | #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT | ||
52 | #define CMPXCHG cmpxchg | 67 | #define CMPXCHG cmpxchg |
68 | #elif PTTYPE == PTTYPE_EPT | ||
69 | #define pt_element_t u64 | ||
70 | #define guest_walker guest_walkerEPT | ||
71 | #define FNAME(name) ept_##name | ||
72 | #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK | ||
73 | #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl) | ||
74 | #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) | ||
75 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) | ||
76 | #define PT_LEVEL_BITS PT64_LEVEL_BITS | ||
77 | #define PT_GUEST_ACCESSED_MASK 0 | ||
78 | #define PT_GUEST_DIRTY_MASK 0 | ||
79 | #define PT_GUEST_DIRTY_SHIFT __using_nonexistent_pte_bit() | ||
80 | #define PT_GUEST_ACCESSED_SHIFT __using_nonexistent_pte_bit() | ||
81 | #define CMPXCHG cmpxchg64 | ||
82 | #define PT_MAX_FULL_LEVELS 4 | ||
53 | #else | 83 | #else |
54 | #error Invalid PTTYPE value | 84 | #error Invalid PTTYPE value |
55 | #endif | 85 | #endif |
@@ -80,6 +110,40 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) | |||
80 | return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT; | 110 | return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT; |
81 | } | 111 | } |
82 | 112 | ||
113 | static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte) | ||
114 | { | ||
115 | unsigned mask; | ||
116 | |||
117 | /* dirty bit is not supported, so no need to track it */ | ||
118 | if (!PT_GUEST_DIRTY_MASK) | ||
119 | return; | ||
120 | |||
121 | BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK); | ||
122 | |||
123 | mask = (unsigned)~ACC_WRITE_MASK; | ||
124 | /* Allow write access to dirty gptes */ | ||
125 | mask |= (gpte >> (PT_GUEST_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) & | ||
126 | PT_WRITABLE_MASK; | ||
127 | *access &= mask; | ||
128 | } | ||
129 | |||
130 | static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level) | ||
131 | { | ||
132 | int bit7 = (gpte >> 7) & 1, low6 = gpte & 0x3f; | ||
133 | |||
134 | return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) | | ||
135 | ((mmu->bad_mt_xwr & (1ull << low6)) != 0); | ||
136 | } | ||
137 | |||
138 | static inline int FNAME(is_present_gpte)(unsigned long pte) | ||
139 | { | ||
140 | #if PTTYPE != PTTYPE_EPT | ||
141 | return is_present_gpte(pte); | ||
142 | #else | ||
143 | return pte & 7; | ||
144 | #endif | ||
145 | } | ||
146 | |||
83 | static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | 147 | static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, |
84 | pt_element_t __user *ptep_user, unsigned index, | 148 | pt_element_t __user *ptep_user, unsigned index, |
85 | pt_element_t orig_pte, pt_element_t new_pte) | 149 | pt_element_t orig_pte, pt_element_t new_pte) |
@@ -103,6 +167,42 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | |||
103 | return (ret != orig_pte); | 167 | return (ret != orig_pte); |
104 | } | 168 | } |
105 | 169 | ||
170 | static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, | ||
171 | struct kvm_mmu_page *sp, u64 *spte, | ||
172 | u64 gpte) | ||
173 | { | ||
174 | if (FNAME(is_rsvd_bits_set)(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)) | ||
175 | goto no_present; | ||
176 | |||
177 | if (!FNAME(is_present_gpte)(gpte)) | ||
178 | goto no_present; | ||
179 | |||
180 | /* if accessed bit is not supported prefetch non accessed gpte */ | ||
181 | if (PT_GUEST_ACCESSED_MASK && !(gpte & PT_GUEST_ACCESSED_MASK)) | ||
182 | goto no_present; | ||
183 | |||
184 | return false; | ||
185 | |||
186 | no_present: | ||
187 | drop_spte(vcpu->kvm, spte); | ||
188 | return true; | ||
189 | } | ||
190 | |||
191 | static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte) | ||
192 | { | ||
193 | unsigned access; | ||
194 | #if PTTYPE == PTTYPE_EPT | ||
195 | access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) | | ||
196 | ((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) | | ||
197 | ACC_USER_MASK; | ||
198 | #else | ||
199 | access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; | ||
200 | access &= ~(gpte >> PT64_NX_SHIFT); | ||
201 | #endif | ||
202 | |||
203 | return access; | ||
204 | } | ||
205 | |||
106 | static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, | 206 | static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, |
107 | struct kvm_mmu *mmu, | 207 | struct kvm_mmu *mmu, |
108 | struct guest_walker *walker, | 208 | struct guest_walker *walker, |
@@ -114,18 +214,23 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, | |||
114 | gfn_t table_gfn; | 214 | gfn_t table_gfn; |
115 | int ret; | 215 | int ret; |
116 | 216 | ||
217 | /* dirty/accessed bits are not supported, so no need to update them */ | ||
218 | if (!PT_GUEST_DIRTY_MASK) | ||
219 | return 0; | ||
220 | |||
117 | for (level = walker->max_level; level >= walker->level; --level) { | 221 | for (level = walker->max_level; level >= walker->level; --level) { |
118 | pte = orig_pte = walker->ptes[level - 1]; | 222 | pte = orig_pte = walker->ptes[level - 1]; |
119 | table_gfn = walker->table_gfn[level - 1]; | 223 | table_gfn = walker->table_gfn[level - 1]; |
120 | ptep_user = walker->ptep_user[level - 1]; | 224 | ptep_user = walker->ptep_user[level - 1]; |
121 | index = offset_in_page(ptep_user) / sizeof(pt_element_t); | 225 | index = offset_in_page(ptep_user) / sizeof(pt_element_t); |
122 | if (!(pte & PT_ACCESSED_MASK)) { | 226 | if (!(pte & PT_GUEST_ACCESSED_MASK)) { |
123 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte)); | 227 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte)); |
124 | pte |= PT_ACCESSED_MASK; | 228 | pte |= PT_GUEST_ACCESSED_MASK; |
125 | } | 229 | } |
126 | if (level == walker->level && write_fault && !is_dirty_gpte(pte)) { | 230 | if (level == walker->level && write_fault && |
231 | !(pte & PT_GUEST_DIRTY_MASK)) { | ||
127 | trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); | 232 | trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); |
128 | pte |= PT_DIRTY_MASK; | 233 | pte |= PT_GUEST_DIRTY_MASK; |
129 | } | 234 | } |
130 | if (pte == orig_pte) | 235 | if (pte == orig_pte) |
131 | continue; | 236 | continue; |
@@ -170,7 +275,7 @@ retry_walk: | |||
170 | if (walker->level == PT32E_ROOT_LEVEL) { | 275 | if (walker->level == PT32E_ROOT_LEVEL) { |
171 | pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3); | 276 | pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3); |
172 | trace_kvm_mmu_paging_element(pte, walker->level); | 277 | trace_kvm_mmu_paging_element(pte, walker->level); |
173 | if (!is_present_gpte(pte)) | 278 | if (!FNAME(is_present_gpte)(pte)) |
174 | goto error; | 279 | goto error; |
175 | --walker->level; | 280 | --walker->level; |
176 | } | 281 | } |
@@ -179,7 +284,7 @@ retry_walk: | |||
179 | ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || | 284 | ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || |
180 | (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); | 285 | (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); |
181 | 286 | ||
182 | accessed_dirty = PT_ACCESSED_MASK; | 287 | accessed_dirty = PT_GUEST_ACCESSED_MASK; |
183 | pt_access = pte_access = ACC_ALL; | 288 | pt_access = pte_access = ACC_ALL; |
184 | ++walker->level; | 289 | ++walker->level; |
185 | 290 | ||
@@ -215,17 +320,17 @@ retry_walk: | |||
215 | 320 | ||
216 | trace_kvm_mmu_paging_element(pte, walker->level); | 321 | trace_kvm_mmu_paging_element(pte, walker->level); |
217 | 322 | ||
218 | if (unlikely(!is_present_gpte(pte))) | 323 | if (unlikely(!FNAME(is_present_gpte)(pte))) |
219 | goto error; | 324 | goto error; |
220 | 325 | ||
221 | if (unlikely(is_rsvd_bits_set(&vcpu->arch.mmu, pte, | 326 | if (unlikely(FNAME(is_rsvd_bits_set)(mmu, pte, |
222 | walker->level))) { | 327 | walker->level))) { |
223 | errcode |= PFERR_RSVD_MASK | PFERR_PRESENT_MASK; | 328 | errcode |= PFERR_RSVD_MASK | PFERR_PRESENT_MASK; |
224 | goto error; | 329 | goto error; |
225 | } | 330 | } |
226 | 331 | ||
227 | accessed_dirty &= pte; | 332 | accessed_dirty &= pte; |
228 | pte_access = pt_access & gpte_access(vcpu, pte); | 333 | pte_access = pt_access & FNAME(gpte_access)(vcpu, pte); |
229 | 334 | ||
230 | walker->ptes[walker->level - 1] = pte; | 335 | walker->ptes[walker->level - 1] = pte; |
231 | } while (!is_last_gpte(mmu, walker->level, pte)); | 336 | } while (!is_last_gpte(mmu, walker->level, pte)); |
@@ -248,13 +353,15 @@ retry_walk: | |||
248 | walker->gfn = real_gpa >> PAGE_SHIFT; | 353 | walker->gfn = real_gpa >> PAGE_SHIFT; |
249 | 354 | ||
250 | if (!write_fault) | 355 | if (!write_fault) |
251 | protect_clean_gpte(&pte_access, pte); | 356 | FNAME(protect_clean_gpte)(&pte_access, pte); |
252 | else | 357 | else |
253 | /* | 358 | /* |
254 | * On a write fault, fold the dirty bit into accessed_dirty by | 359 | * On a write fault, fold the dirty bit into accessed_dirty. |
255 | * shifting it one place right. | 360 | * For modes without A/D bits support accessed_dirty will be |
361 | * always clear. | ||
256 | */ | 362 | */ |
257 | accessed_dirty &= pte >> (PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT); | 363 | accessed_dirty &= pte >> |
364 | (PT_GUEST_DIRTY_SHIFT - PT_GUEST_ACCESSED_SHIFT); | ||
258 | 365 | ||
259 | if (unlikely(!accessed_dirty)) { | 366 | if (unlikely(!accessed_dirty)) { |
260 | ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); | 367 | ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); |
@@ -279,6 +386,25 @@ error: | |||
279 | walker->fault.vector = PF_VECTOR; | 386 | walker->fault.vector = PF_VECTOR; |
280 | walker->fault.error_code_valid = true; | 387 | walker->fault.error_code_valid = true; |
281 | walker->fault.error_code = errcode; | 388 | walker->fault.error_code = errcode; |
389 | |||
390 | #if PTTYPE == PTTYPE_EPT | ||
391 | /* | ||
392 | * Use PFERR_RSVD_MASK in error_code to to tell if EPT | ||
393 | * misconfiguration requires to be injected. The detection is | ||
394 | * done by is_rsvd_bits_set() above. | ||
395 | * | ||
396 | * We set up the value of exit_qualification to inject: | ||
397 | * [2:0] - Derive from [2:0] of real exit_qualification at EPT violation | ||
398 | * [5:3] - Calculated by the page walk of the guest EPT page tables | ||
399 | * [7:8] - Derived from [7:8] of real exit_qualification | ||
400 | * | ||
401 | * The other bits are set to 0. | ||
402 | */ | ||
403 | if (!(errcode & PFERR_RSVD_MASK)) { | ||
404 | vcpu->arch.exit_qualification &= 0x187; | ||
405 | vcpu->arch.exit_qualification |= ((pt_access & pte) & 0x7) << 3; | ||
406 | } | ||
407 | #endif | ||
282 | walker->fault.address = addr; | 408 | walker->fault.address = addr; |
283 | walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu; | 409 | walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu; |
284 | 410 | ||
@@ -293,6 +419,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker, | |||
293 | access); | 419 | access); |
294 | } | 420 | } |
295 | 421 | ||
422 | #if PTTYPE != PTTYPE_EPT | ||
296 | static int FNAME(walk_addr_nested)(struct guest_walker *walker, | 423 | static int FNAME(walk_addr_nested)(struct guest_walker *walker, |
297 | struct kvm_vcpu *vcpu, gva_t addr, | 424 | struct kvm_vcpu *vcpu, gva_t addr, |
298 | u32 access) | 425 | u32 access) |
@@ -300,6 +427,7 @@ static int FNAME(walk_addr_nested)(struct guest_walker *walker, | |||
300 | return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.nested_mmu, | 427 | return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.nested_mmu, |
301 | addr, access); | 428 | addr, access); |
302 | } | 429 | } |
430 | #endif | ||
303 | 431 | ||
304 | static bool | 432 | static bool |
305 | FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 433 | FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
@@ -309,14 +437,14 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
309 | gfn_t gfn; | 437 | gfn_t gfn; |
310 | pfn_t pfn; | 438 | pfn_t pfn; |
311 | 439 | ||
312 | if (prefetch_invalid_gpte(vcpu, sp, spte, gpte)) | 440 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) |
313 | return false; | 441 | return false; |
314 | 442 | ||
315 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); | 443 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); |
316 | 444 | ||
317 | gfn = gpte_to_gfn(gpte); | 445 | gfn = gpte_to_gfn(gpte); |
318 | pte_access = sp->role.access & gpte_access(vcpu, gpte); | 446 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); |
319 | protect_clean_gpte(&pte_access, gpte); | 447 | FNAME(protect_clean_gpte)(&pte_access, gpte); |
320 | pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, | 448 | pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, |
321 | no_dirty_log && (pte_access & ACC_WRITE_MASK)); | 449 | no_dirty_log && (pte_access & ACC_WRITE_MASK)); |
322 | if (is_error_pfn(pfn)) | 450 | if (is_error_pfn(pfn)) |
@@ -446,7 +574,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
446 | goto out_gpte_changed; | 574 | goto out_gpte_changed; |
447 | 575 | ||
448 | if (sp) | 576 | if (sp) |
449 | link_shadow_page(it.sptep, sp); | 577 | link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK); |
450 | } | 578 | } |
451 | 579 | ||
452 | for (; | 580 | for (; |
@@ -466,7 +594,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
466 | 594 | ||
467 | sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1, | 595 | sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1, |
468 | true, direct_access, it.sptep); | 596 | true, direct_access, it.sptep); |
469 | link_shadow_page(it.sptep, sp); | 597 | link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK); |
470 | } | 598 | } |
471 | 599 | ||
472 | clear_sp_write_flooding_count(it.sptep); | 600 | clear_sp_write_flooding_count(it.sptep); |
@@ -727,6 +855,7 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, | |||
727 | return gpa; | 855 | return gpa; |
728 | } | 856 | } |
729 | 857 | ||
858 | #if PTTYPE != PTTYPE_EPT | ||
730 | static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, | 859 | static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, |
731 | u32 access, | 860 | u32 access, |
732 | struct x86_exception *exception) | 861 | struct x86_exception *exception) |
@@ -745,6 +874,7 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, | |||
745 | 874 | ||
746 | return gpa; | 875 | return gpa; |
747 | } | 876 | } |
877 | #endif | ||
748 | 878 | ||
749 | /* | 879 | /* |
750 | * Using the cached information from sp->gfns is safe because: | 880 | * Using the cached information from sp->gfns is safe because: |
@@ -785,15 +915,15 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
785 | sizeof(pt_element_t))) | 915 | sizeof(pt_element_t))) |
786 | return -EINVAL; | 916 | return -EINVAL; |
787 | 917 | ||
788 | if (prefetch_invalid_gpte(vcpu, sp, &sp->spt[i], gpte)) { | 918 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) { |
789 | vcpu->kvm->tlbs_dirty++; | 919 | vcpu->kvm->tlbs_dirty++; |
790 | continue; | 920 | continue; |
791 | } | 921 | } |
792 | 922 | ||
793 | gfn = gpte_to_gfn(gpte); | 923 | gfn = gpte_to_gfn(gpte); |
794 | pte_access = sp->role.access; | 924 | pte_access = sp->role.access; |
795 | pte_access &= gpte_access(vcpu, gpte); | 925 | pte_access &= FNAME(gpte_access)(vcpu, gpte); |
796 | protect_clean_gpte(&pte_access, gpte); | 926 | FNAME(protect_clean_gpte)(&pte_access, gpte); |
797 | 927 | ||
798 | if (sync_mmio_spte(vcpu->kvm, &sp->spt[i], gfn, pte_access, | 928 | if (sync_mmio_spte(vcpu->kvm, &sp->spt[i], gfn, pte_access, |
799 | &nr_present)) | 929 | &nr_present)) |
@@ -830,3 +960,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
830 | #undef gpte_to_gfn | 960 | #undef gpte_to_gfn |
831 | #undef gpte_to_gfn_lvl | 961 | #undef gpte_to_gfn_lvl |
832 | #undef CMPXCHG | 962 | #undef CMPXCHG |
963 | #undef PT_GUEST_ACCESSED_MASK | ||
964 | #undef PT_GUEST_DIRTY_MASK | ||
965 | #undef PT_GUEST_DIRTY_SHIFT | ||
966 | #undef PT_GUEST_ACCESSED_SHIFT | ||
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index c53e797e7369..5c4f63151b4d 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c | |||
@@ -160,7 +160,7 @@ static void stop_counter(struct kvm_pmc *pmc) | |||
160 | 160 | ||
161 | static void reprogram_counter(struct kvm_pmc *pmc, u32 type, | 161 | static void reprogram_counter(struct kvm_pmc *pmc, u32 type, |
162 | unsigned config, bool exclude_user, bool exclude_kernel, | 162 | unsigned config, bool exclude_user, bool exclude_kernel, |
163 | bool intr) | 163 | bool intr, bool in_tx, bool in_tx_cp) |
164 | { | 164 | { |
165 | struct perf_event *event; | 165 | struct perf_event *event; |
166 | struct perf_event_attr attr = { | 166 | struct perf_event_attr attr = { |
@@ -173,6 +173,10 @@ static void reprogram_counter(struct kvm_pmc *pmc, u32 type, | |||
173 | .exclude_kernel = exclude_kernel, | 173 | .exclude_kernel = exclude_kernel, |
174 | .config = config, | 174 | .config = config, |
175 | }; | 175 | }; |
176 | if (in_tx) | ||
177 | attr.config |= HSW_IN_TX; | ||
178 | if (in_tx_cp) | ||
179 | attr.config |= HSW_IN_TX_CHECKPOINTED; | ||
176 | 180 | ||
177 | attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc); | 181 | attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc); |
178 | 182 | ||
@@ -226,7 +230,9 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) | |||
226 | 230 | ||
227 | if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE | | 231 | if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE | |
228 | ARCH_PERFMON_EVENTSEL_INV | | 232 | ARCH_PERFMON_EVENTSEL_INV | |
229 | ARCH_PERFMON_EVENTSEL_CMASK))) { | 233 | ARCH_PERFMON_EVENTSEL_CMASK | |
234 | HSW_IN_TX | | ||
235 | HSW_IN_TX_CHECKPOINTED))) { | ||
230 | config = find_arch_event(&pmc->vcpu->arch.pmu, event_select, | 236 | config = find_arch_event(&pmc->vcpu->arch.pmu, event_select, |
231 | unit_mask); | 237 | unit_mask); |
232 | if (config != PERF_COUNT_HW_MAX) | 238 | if (config != PERF_COUNT_HW_MAX) |
@@ -239,7 +245,9 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) | |||
239 | reprogram_counter(pmc, type, config, | 245 | reprogram_counter(pmc, type, config, |
240 | !(eventsel & ARCH_PERFMON_EVENTSEL_USR), | 246 | !(eventsel & ARCH_PERFMON_EVENTSEL_USR), |
241 | !(eventsel & ARCH_PERFMON_EVENTSEL_OS), | 247 | !(eventsel & ARCH_PERFMON_EVENTSEL_OS), |
242 | eventsel & ARCH_PERFMON_EVENTSEL_INT); | 248 | eventsel & ARCH_PERFMON_EVENTSEL_INT, |
249 | (eventsel & HSW_IN_TX), | ||
250 | (eventsel & HSW_IN_TX_CHECKPOINTED)); | ||
243 | } | 251 | } |
244 | 252 | ||
245 | static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx) | 253 | static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx) |
@@ -256,7 +264,7 @@ static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx) | |||
256 | arch_events[fixed_pmc_events[idx]].event_type, | 264 | arch_events[fixed_pmc_events[idx]].event_type, |
257 | !(en & 0x2), /* exclude user */ | 265 | !(en & 0x2), /* exclude user */ |
258 | !(en & 0x1), /* exclude kernel */ | 266 | !(en & 0x1), /* exclude kernel */ |
259 | pmi); | 267 | pmi, false, false); |
260 | } | 268 | } |
261 | 269 | ||
262 | static inline u8 fixed_en_pmi(u64 ctrl, int idx) | 270 | static inline u8 fixed_en_pmi(u64 ctrl, int idx) |
@@ -408,7 +416,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
408 | } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { | 416 | } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { |
409 | if (data == pmc->eventsel) | 417 | if (data == pmc->eventsel) |
410 | return 0; | 418 | return 0; |
411 | if (!(data & 0xffffffff00200000ull)) { | 419 | if (!(data & pmu->reserved_bits)) { |
412 | reprogram_gp_counter(pmc, data); | 420 | reprogram_gp_counter(pmc, data); |
413 | return 0; | 421 | return 0; |
414 | } | 422 | } |
@@ -450,6 +458,7 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) | |||
450 | pmu->counter_bitmask[KVM_PMC_GP] = 0; | 458 | pmu->counter_bitmask[KVM_PMC_GP] = 0; |
451 | pmu->counter_bitmask[KVM_PMC_FIXED] = 0; | 459 | pmu->counter_bitmask[KVM_PMC_FIXED] = 0; |
452 | pmu->version = 0; | 460 | pmu->version = 0; |
461 | pmu->reserved_bits = 0xffffffff00200000ull; | ||
453 | 462 | ||
454 | entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); | 463 | entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); |
455 | if (!entry) | 464 | if (!entry) |
@@ -478,6 +487,12 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) | |||
478 | pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | | 487 | pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | |
479 | (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED); | 488 | (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED); |
480 | pmu->global_ctrl_mask = ~pmu->global_ctrl; | 489 | pmu->global_ctrl_mask = ~pmu->global_ctrl; |
490 | |||
491 | entry = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
492 | if (entry && | ||
493 | (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) && | ||
494 | (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) | ||
495 | pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED; | ||
481 | } | 496 | } |
482 | 497 | ||
483 | void kvm_pmu_init(struct kvm_vcpu *vcpu) | 498 | void kvm_pmu_init(struct kvm_vcpu *vcpu) |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 064d0be67ecc..1f1da43ff2a2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -373,6 +373,7 @@ struct nested_vmx { | |||
373 | * we must keep them pinned while L2 runs. | 373 | * we must keep them pinned while L2 runs. |
374 | */ | 374 | */ |
375 | struct page *apic_access_page; | 375 | struct page *apic_access_page; |
376 | u64 msr_ia32_feature_control; | ||
376 | }; | 377 | }; |
377 | 378 | ||
378 | #define POSTED_INTR_ON 0 | 379 | #define POSTED_INTR_ON 0 |
@@ -711,10 +712,10 @@ static void nested_release_page_clean(struct page *page) | |||
711 | kvm_release_page_clean(page); | 712 | kvm_release_page_clean(page); |
712 | } | 713 | } |
713 | 714 | ||
715 | static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); | ||
714 | static u64 construct_eptp(unsigned long root_hpa); | 716 | static u64 construct_eptp(unsigned long root_hpa); |
715 | static void kvm_cpu_vmxon(u64 addr); | 717 | static void kvm_cpu_vmxon(u64 addr); |
716 | static void kvm_cpu_vmxoff(void); | 718 | static void kvm_cpu_vmxoff(void); |
717 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); | ||
718 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); | 719 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); |
719 | static void vmx_set_segment(struct kvm_vcpu *vcpu, | 720 | static void vmx_set_segment(struct kvm_vcpu *vcpu, |
720 | struct kvm_segment *var, int seg); | 721 | struct kvm_segment *var, int seg); |
@@ -1039,12 +1040,16 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit) | |||
1039 | (vmcs12->secondary_vm_exec_control & bit); | 1040 | (vmcs12->secondary_vm_exec_control & bit); |
1040 | } | 1041 | } |
1041 | 1042 | ||
1042 | static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12, | 1043 | static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12) |
1043 | struct kvm_vcpu *vcpu) | ||
1044 | { | 1044 | { |
1045 | return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS; | 1045 | return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS; |
1046 | } | 1046 | } |
1047 | 1047 | ||
1048 | static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12) | ||
1049 | { | ||
1050 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT); | ||
1051 | } | ||
1052 | |||
1048 | static inline bool is_exception(u32 intr_info) | 1053 | static inline bool is_exception(u32 intr_info) |
1049 | { | 1054 | { |
1050 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) | 1055 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) |
@@ -2155,6 +2160,7 @@ static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; | |||
2155 | static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; | 2160 | static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; |
2156 | static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; | 2161 | static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; |
2157 | static u32 nested_vmx_misc_low, nested_vmx_misc_high; | 2162 | static u32 nested_vmx_misc_low, nested_vmx_misc_high; |
2163 | static u32 nested_vmx_ept_caps; | ||
2158 | static __init void nested_vmx_setup_ctls_msrs(void) | 2164 | static __init void nested_vmx_setup_ctls_msrs(void) |
2159 | { | 2165 | { |
2160 | /* | 2166 | /* |
@@ -2190,14 +2196,17 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2190 | * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and | 2196 | * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and |
2191 | * 17 must be 1. | 2197 | * 17 must be 1. |
2192 | */ | 2198 | */ |
2199 | rdmsr(MSR_IA32_VMX_EXIT_CTLS, | ||
2200 | nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high); | ||
2193 | nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; | 2201 | nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; |
2194 | /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */ | 2202 | /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */ |
2203 | nested_vmx_exit_ctls_high &= | ||
2195 | #ifdef CONFIG_X86_64 | 2204 | #ifdef CONFIG_X86_64 |
2196 | nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE; | 2205 | VM_EXIT_HOST_ADDR_SPACE_SIZE | |
2197 | #else | ||
2198 | nested_vmx_exit_ctls_high = 0; | ||
2199 | #endif | 2206 | #endif |
2200 | nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; | 2207 | VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; |
2208 | nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | | ||
2209 | VM_EXIT_LOAD_IA32_EFER); | ||
2201 | 2210 | ||
2202 | /* entry controls */ | 2211 | /* entry controls */ |
2203 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, | 2212 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, |
@@ -2205,8 +2214,12 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2205 | /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */ | 2214 | /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */ |
2206 | nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; | 2215 | nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; |
2207 | nested_vmx_entry_ctls_high &= | 2216 | nested_vmx_entry_ctls_high &= |
2208 | VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE; | 2217 | #ifdef CONFIG_X86_64 |
2209 | nested_vmx_entry_ctls_high |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; | 2218 | VM_ENTRY_IA32E_MODE | |
2219 | #endif | ||
2220 | VM_ENTRY_LOAD_IA32_PAT; | ||
2221 | nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | | ||
2222 | VM_ENTRY_LOAD_IA32_EFER); | ||
2210 | 2223 | ||
2211 | /* cpu-based controls */ | 2224 | /* cpu-based controls */ |
2212 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, | 2225 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, |
@@ -2241,6 +2254,22 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2241 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 2254 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
2242 | SECONDARY_EXEC_WBINVD_EXITING; | 2255 | SECONDARY_EXEC_WBINVD_EXITING; |
2243 | 2256 | ||
2257 | if (enable_ept) { | ||
2258 | /* nested EPT: emulate EPT also to L1 */ | ||
2259 | nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT; | ||
2260 | nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | | ||
2261 | VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT; | ||
2262 | nested_vmx_ept_caps &= vmx_capability.ept; | ||
2263 | /* | ||
2264 | * Since invept is completely emulated we support both global | ||
2265 | * and context invalidation independent of what host cpu | ||
2266 | * supports | ||
2267 | */ | ||
2268 | nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT | | ||
2269 | VMX_EPT_EXTENT_CONTEXT_BIT; | ||
2270 | } else | ||
2271 | nested_vmx_ept_caps = 0; | ||
2272 | |||
2244 | /* miscellaneous data */ | 2273 | /* miscellaneous data */ |
2245 | rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); | 2274 | rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); |
2246 | nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | | 2275 | nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | |
@@ -2282,8 +2311,11 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2282 | 2311 | ||
2283 | switch (msr_index) { | 2312 | switch (msr_index) { |
2284 | case MSR_IA32_FEATURE_CONTROL: | 2313 | case MSR_IA32_FEATURE_CONTROL: |
2285 | *pdata = 0; | 2314 | if (nested_vmx_allowed(vcpu)) { |
2286 | break; | 2315 | *pdata = to_vmx(vcpu)->nested.msr_ia32_feature_control; |
2316 | break; | ||
2317 | } | ||
2318 | return 0; | ||
2287 | case MSR_IA32_VMX_BASIC: | 2319 | case MSR_IA32_VMX_BASIC: |
2288 | /* | 2320 | /* |
2289 | * This MSR reports some information about VMX support. We | 2321 | * This MSR reports some information about VMX support. We |
@@ -2346,8 +2378,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2346 | nested_vmx_secondary_ctls_high); | 2378 | nested_vmx_secondary_ctls_high); |
2347 | break; | 2379 | break; |
2348 | case MSR_IA32_VMX_EPT_VPID_CAP: | 2380 | case MSR_IA32_VMX_EPT_VPID_CAP: |
2349 | /* Currently, no nested ept or nested vpid */ | 2381 | /* Currently, no nested vpid support */ |
2350 | *pdata = 0; | 2382 | *pdata = nested_vmx_ept_caps; |
2351 | break; | 2383 | break; |
2352 | default: | 2384 | default: |
2353 | return 0; | 2385 | return 0; |
@@ -2356,14 +2388,24 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2356 | return 1; | 2388 | return 1; |
2357 | } | 2389 | } |
2358 | 2390 | ||
2359 | static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | 2391 | static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
2360 | { | 2392 | { |
2393 | u32 msr_index = msr_info->index; | ||
2394 | u64 data = msr_info->data; | ||
2395 | bool host_initialized = msr_info->host_initiated; | ||
2396 | |||
2361 | if (!nested_vmx_allowed(vcpu)) | 2397 | if (!nested_vmx_allowed(vcpu)) |
2362 | return 0; | 2398 | return 0; |
2363 | 2399 | ||
2364 | if (msr_index == MSR_IA32_FEATURE_CONTROL) | 2400 | if (msr_index == MSR_IA32_FEATURE_CONTROL) { |
2365 | /* TODO: the right thing. */ | 2401 | if (!host_initialized && |
2402 | to_vmx(vcpu)->nested.msr_ia32_feature_control | ||
2403 | & FEATURE_CONTROL_LOCKED) | ||
2404 | return 0; | ||
2405 | to_vmx(vcpu)->nested.msr_ia32_feature_control = data; | ||
2366 | return 1; | 2406 | return 1; |
2407 | } | ||
2408 | |||
2367 | /* | 2409 | /* |
2368 | * No need to treat VMX capability MSRs specially: If we don't handle | 2410 | * No need to treat VMX capability MSRs specially: If we don't handle |
2369 | * them, handle_wrmsr will #GP(0), which is correct (they are readonly) | 2411 | * them, handle_wrmsr will #GP(0), which is correct (they are readonly) |
@@ -2494,7 +2536,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2494 | return 1; | 2536 | return 1; |
2495 | /* Otherwise falls through */ | 2537 | /* Otherwise falls through */ |
2496 | default: | 2538 | default: |
2497 | if (vmx_set_vmx_msr(vcpu, msr_index, data)) | 2539 | if (vmx_set_vmx_msr(vcpu, msr_info)) |
2498 | break; | 2540 | break; |
2499 | msr = find_msr_entry(vmx, msr_index); | 2541 | msr = find_msr_entry(vmx, msr_index); |
2500 | if (msr) { | 2542 | if (msr) { |
@@ -5302,9 +5344,13 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) | |||
5302 | 5344 | ||
5303 | /* It is a write fault? */ | 5345 | /* It is a write fault? */ |
5304 | error_code = exit_qualification & (1U << 1); | 5346 | error_code = exit_qualification & (1U << 1); |
5347 | /* It is a fetch fault? */ | ||
5348 | error_code |= (exit_qualification & (1U << 2)) << 2; | ||
5305 | /* ept page table is present? */ | 5349 | /* ept page table is present? */ |
5306 | error_code |= (exit_qualification >> 3) & 0x1; | 5350 | error_code |= (exit_qualification >> 3) & 0x1; |
5307 | 5351 | ||
5352 | vcpu->arch.exit_qualification = exit_qualification; | ||
5353 | |||
5308 | return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); | 5354 | return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); |
5309 | } | 5355 | } |
5310 | 5356 | ||
@@ -5438,7 +5484,8 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
5438 | 5484 | ||
5439 | err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); | 5485 | err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); |
5440 | 5486 | ||
5441 | if (err == EMULATE_DO_MMIO) { | 5487 | if (err == EMULATE_USER_EXIT) { |
5488 | ++vcpu->stat.mmio_exits; | ||
5442 | ret = 0; | 5489 | ret = 0; |
5443 | goto out; | 5490 | goto out; |
5444 | } | 5491 | } |
@@ -5567,8 +5614,47 @@ static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) | |||
5567 | free_loaded_vmcs(&vmx->vmcs01); | 5614 | free_loaded_vmcs(&vmx->vmcs01); |
5568 | } | 5615 | } |
5569 | 5616 | ||
5617 | /* | ||
5618 | * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), | ||
5619 | * set the success or error code of an emulated VMX instruction, as specified | ||
5620 | * by Vol 2B, VMX Instruction Reference, "Conventions". | ||
5621 | */ | ||
5622 | static void nested_vmx_succeed(struct kvm_vcpu *vcpu) | ||
5623 | { | ||
5624 | vmx_set_rflags(vcpu, vmx_get_rflags(vcpu) | ||
5625 | & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | | ||
5626 | X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF)); | ||
5627 | } | ||
5628 | |||
5629 | static void nested_vmx_failInvalid(struct kvm_vcpu *vcpu) | ||
5630 | { | ||
5631 | vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) | ||
5632 | & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF | | ||
5633 | X86_EFLAGS_SF | X86_EFLAGS_OF)) | ||
5634 | | X86_EFLAGS_CF); | ||
5635 | } | ||
5636 | |||
5570 | static void nested_vmx_failValid(struct kvm_vcpu *vcpu, | 5637 | static void nested_vmx_failValid(struct kvm_vcpu *vcpu, |
5571 | u32 vm_instruction_error); | 5638 | u32 vm_instruction_error) |
5639 | { | ||
5640 | if (to_vmx(vcpu)->nested.current_vmptr == -1ull) { | ||
5641 | /* | ||
5642 | * failValid writes the error number to the current VMCS, which | ||
5643 | * can't be done there isn't a current VMCS. | ||
5644 | */ | ||
5645 | nested_vmx_failInvalid(vcpu); | ||
5646 | return; | ||
5647 | } | ||
5648 | vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) | ||
5649 | & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | | ||
5650 | X86_EFLAGS_SF | X86_EFLAGS_OF)) | ||
5651 | | X86_EFLAGS_ZF); | ||
5652 | get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error; | ||
5653 | /* | ||
5654 | * We don't need to force a shadow sync because | ||
5655 | * VM_INSTRUCTION_ERROR is not shadowed | ||
5656 | */ | ||
5657 | } | ||
5572 | 5658 | ||
5573 | /* | 5659 | /* |
5574 | * Emulate the VMXON instruction. | 5660 | * Emulate the VMXON instruction. |
@@ -5583,6 +5669,8 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | |||
5583 | struct kvm_segment cs; | 5669 | struct kvm_segment cs; |
5584 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 5670 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
5585 | struct vmcs *shadow_vmcs; | 5671 | struct vmcs *shadow_vmcs; |
5672 | const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED | ||
5673 | | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; | ||
5586 | 5674 | ||
5587 | /* The Intel VMX Instruction Reference lists a bunch of bits that | 5675 | /* The Intel VMX Instruction Reference lists a bunch of bits that |
5588 | * are prerequisite to running VMXON, most notably cr4.VMXE must be | 5676 | * are prerequisite to running VMXON, most notably cr4.VMXE must be |
@@ -5611,6 +5699,13 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | |||
5611 | skip_emulated_instruction(vcpu); | 5699 | skip_emulated_instruction(vcpu); |
5612 | return 1; | 5700 | return 1; |
5613 | } | 5701 | } |
5702 | |||
5703 | if ((vmx->nested.msr_ia32_feature_control & VMXON_NEEDED_FEATURES) | ||
5704 | != VMXON_NEEDED_FEATURES) { | ||
5705 | kvm_inject_gp(vcpu, 0); | ||
5706 | return 1; | ||
5707 | } | ||
5708 | |||
5614 | if (enable_shadow_vmcs) { | 5709 | if (enable_shadow_vmcs) { |
5615 | shadow_vmcs = alloc_vmcs(); | 5710 | shadow_vmcs = alloc_vmcs(); |
5616 | if (!shadow_vmcs) | 5711 | if (!shadow_vmcs) |
@@ -5628,6 +5723,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | |||
5628 | vmx->nested.vmxon = true; | 5723 | vmx->nested.vmxon = true; |
5629 | 5724 | ||
5630 | skip_emulated_instruction(vcpu); | 5725 | skip_emulated_instruction(vcpu); |
5726 | nested_vmx_succeed(vcpu); | ||
5631 | return 1; | 5727 | return 1; |
5632 | } | 5728 | } |
5633 | 5729 | ||
@@ -5712,6 +5808,7 @@ static int handle_vmoff(struct kvm_vcpu *vcpu) | |||
5712 | return 1; | 5808 | return 1; |
5713 | free_nested(to_vmx(vcpu)); | 5809 | free_nested(to_vmx(vcpu)); |
5714 | skip_emulated_instruction(vcpu); | 5810 | skip_emulated_instruction(vcpu); |
5811 | nested_vmx_succeed(vcpu); | ||
5715 | return 1; | 5812 | return 1; |
5716 | } | 5813 | } |
5717 | 5814 | ||
@@ -5768,48 +5865,6 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, | |||
5768 | return 0; | 5865 | return 0; |
5769 | } | 5866 | } |
5770 | 5867 | ||
5771 | /* | ||
5772 | * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), | ||
5773 | * set the success or error code of an emulated VMX instruction, as specified | ||
5774 | * by Vol 2B, VMX Instruction Reference, "Conventions". | ||
5775 | */ | ||
5776 | static void nested_vmx_succeed(struct kvm_vcpu *vcpu) | ||
5777 | { | ||
5778 | vmx_set_rflags(vcpu, vmx_get_rflags(vcpu) | ||
5779 | & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | | ||
5780 | X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF)); | ||
5781 | } | ||
5782 | |||
5783 | static void nested_vmx_failInvalid(struct kvm_vcpu *vcpu) | ||
5784 | { | ||
5785 | vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) | ||
5786 | & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF | | ||
5787 | X86_EFLAGS_SF | X86_EFLAGS_OF)) | ||
5788 | | X86_EFLAGS_CF); | ||
5789 | } | ||
5790 | |||
5791 | static void nested_vmx_failValid(struct kvm_vcpu *vcpu, | ||
5792 | u32 vm_instruction_error) | ||
5793 | { | ||
5794 | if (to_vmx(vcpu)->nested.current_vmptr == -1ull) { | ||
5795 | /* | ||
5796 | * failValid writes the error number to the current VMCS, which | ||
5797 | * can't be done there isn't a current VMCS. | ||
5798 | */ | ||
5799 | nested_vmx_failInvalid(vcpu); | ||
5800 | return; | ||
5801 | } | ||
5802 | vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) | ||
5803 | & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | | ||
5804 | X86_EFLAGS_SF | X86_EFLAGS_OF)) | ||
5805 | | X86_EFLAGS_ZF); | ||
5806 | get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error; | ||
5807 | /* | ||
5808 | * We don't need to force a shadow sync because | ||
5809 | * VM_INSTRUCTION_ERROR is not shadowed | ||
5810 | */ | ||
5811 | } | ||
5812 | |||
5813 | /* Emulate the VMCLEAR instruction */ | 5868 | /* Emulate the VMCLEAR instruction */ |
5814 | static int handle_vmclear(struct kvm_vcpu *vcpu) | 5869 | static int handle_vmclear(struct kvm_vcpu *vcpu) |
5815 | { | 5870 | { |
@@ -5972,8 +6027,8 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) | |||
5972 | unsigned long field; | 6027 | unsigned long field; |
5973 | u64 field_value; | 6028 | u64 field_value; |
5974 | struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; | 6029 | struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; |
5975 | unsigned long *fields = (unsigned long *)shadow_read_write_fields; | 6030 | const unsigned long *fields = shadow_read_write_fields; |
5976 | int num_fields = max_shadow_read_write_fields; | 6031 | const int num_fields = max_shadow_read_write_fields; |
5977 | 6032 | ||
5978 | vmcs_load(shadow_vmcs); | 6033 | vmcs_load(shadow_vmcs); |
5979 | 6034 | ||
@@ -6002,12 +6057,11 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) | |||
6002 | 6057 | ||
6003 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) | 6058 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) |
6004 | { | 6059 | { |
6005 | unsigned long *fields[] = { | 6060 | const unsigned long *fields[] = { |
6006 | (unsigned long *)shadow_read_write_fields, | 6061 | shadow_read_write_fields, |
6007 | (unsigned long *)shadow_read_only_fields | 6062 | shadow_read_only_fields |
6008 | }; | 6063 | }; |
6009 | int num_lists = ARRAY_SIZE(fields); | 6064 | const int max_fields[] = { |
6010 | int max_fields[] = { | ||
6011 | max_shadow_read_write_fields, | 6065 | max_shadow_read_write_fields, |
6012 | max_shadow_read_only_fields | 6066 | max_shadow_read_only_fields |
6013 | }; | 6067 | }; |
@@ -6018,7 +6072,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) | |||
6018 | 6072 | ||
6019 | vmcs_load(shadow_vmcs); | 6073 | vmcs_load(shadow_vmcs); |
6020 | 6074 | ||
6021 | for (q = 0; q < num_lists; q++) { | 6075 | for (q = 0; q < ARRAY_SIZE(fields); q++) { |
6022 | for (i = 0; i < max_fields[q]; i++) { | 6076 | for (i = 0; i < max_fields[q]; i++) { |
6023 | field = fields[q][i]; | 6077 | field = fields[q][i]; |
6024 | vmcs12_read_any(&vmx->vcpu, field, &field_value); | 6078 | vmcs12_read_any(&vmx->vcpu, field, &field_value); |
@@ -6248,6 +6302,74 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) | |||
6248 | return 1; | 6302 | return 1; |
6249 | } | 6303 | } |
6250 | 6304 | ||
6305 | /* Emulate the INVEPT instruction */ | ||
6306 | static int handle_invept(struct kvm_vcpu *vcpu) | ||
6307 | { | ||
6308 | u32 vmx_instruction_info, types; | ||
6309 | unsigned long type; | ||
6310 | gva_t gva; | ||
6311 | struct x86_exception e; | ||
6312 | struct { | ||
6313 | u64 eptp, gpa; | ||
6314 | } operand; | ||
6315 | u64 eptp_mask = ((1ull << 51) - 1) & PAGE_MASK; | ||
6316 | |||
6317 | if (!(nested_vmx_secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) || | ||
6318 | !(nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) { | ||
6319 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
6320 | return 1; | ||
6321 | } | ||
6322 | |||
6323 | if (!nested_vmx_check_permission(vcpu)) | ||
6324 | return 1; | ||
6325 | |||
6326 | if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) { | ||
6327 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
6328 | return 1; | ||
6329 | } | ||
6330 | |||
6331 | vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); | ||
6332 | type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf); | ||
6333 | |||
6334 | types = (nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; | ||
6335 | |||
6336 | if (!(types & (1UL << type))) { | ||
6337 | nested_vmx_failValid(vcpu, | ||
6338 | VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); | ||
6339 | return 1; | ||
6340 | } | ||
6341 | |||
6342 | /* According to the Intel VMX instruction reference, the memory | ||
6343 | * operand is read even if it isn't needed (e.g., for type==global) | ||
6344 | */ | ||
6345 | if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), | ||
6346 | vmx_instruction_info, &gva)) | ||
6347 | return 1; | ||
6348 | if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand, | ||
6349 | sizeof(operand), &e)) { | ||
6350 | kvm_inject_page_fault(vcpu, &e); | ||
6351 | return 1; | ||
6352 | } | ||
6353 | |||
6354 | switch (type) { | ||
6355 | case VMX_EPT_EXTENT_CONTEXT: | ||
6356 | if ((operand.eptp & eptp_mask) != | ||
6357 | (nested_ept_get_cr3(vcpu) & eptp_mask)) | ||
6358 | break; | ||
6359 | case VMX_EPT_EXTENT_GLOBAL: | ||
6360 | kvm_mmu_sync_roots(vcpu); | ||
6361 | kvm_mmu_flush_tlb(vcpu); | ||
6362 | nested_vmx_succeed(vcpu); | ||
6363 | break; | ||
6364 | default: | ||
6365 | BUG_ON(1); | ||
6366 | break; | ||
6367 | } | ||
6368 | |||
6369 | skip_emulated_instruction(vcpu); | ||
6370 | return 1; | ||
6371 | } | ||
6372 | |||
6251 | /* | 6373 | /* |
6252 | * The exit handlers return 1 if the exit was handled fully and guest execution | 6374 | * The exit handlers return 1 if the exit was handled fully and guest execution |
6253 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 6375 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
@@ -6292,6 +6414,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
6292 | [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, | 6414 | [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, |
6293 | [EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op, | 6415 | [EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op, |
6294 | [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op, | 6416 | [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op, |
6417 | [EXIT_REASON_INVEPT] = handle_invept, | ||
6295 | }; | 6418 | }; |
6296 | 6419 | ||
6297 | static const int kvm_vmx_max_exit_handlers = | 6420 | static const int kvm_vmx_max_exit_handlers = |
@@ -6518,6 +6641,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
6518 | case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: | 6641 | case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: |
6519 | case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: | 6642 | case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: |
6520 | case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: | 6643 | case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: |
6644 | case EXIT_REASON_INVEPT: | ||
6521 | /* | 6645 | /* |
6522 | * VMX instructions trap unconditionally. This allows L1 to | 6646 | * VMX instructions trap unconditionally. This allows L1 to |
6523 | * emulate them for its L2 guest, i.e., allows 3-level nesting! | 6647 | * emulate them for its L2 guest, i.e., allows 3-level nesting! |
@@ -6550,7 +6674,20 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
6550 | return nested_cpu_has2(vmcs12, | 6674 | return nested_cpu_has2(vmcs12, |
6551 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); | 6675 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); |
6552 | case EXIT_REASON_EPT_VIOLATION: | 6676 | case EXIT_REASON_EPT_VIOLATION: |
6677 | /* | ||
6678 | * L0 always deals with the EPT violation. If nested EPT is | ||
6679 | * used, and the nested mmu code discovers that the address is | ||
6680 | * missing in the guest EPT table (EPT12), the EPT violation | ||
6681 | * will be injected with nested_ept_inject_page_fault() | ||
6682 | */ | ||
6683 | return 0; | ||
6553 | case EXIT_REASON_EPT_MISCONFIG: | 6684 | case EXIT_REASON_EPT_MISCONFIG: |
6685 | /* | ||
6686 | * L2 never uses directly L1's EPT, but rather L0's own EPT | ||
6687 | * table (shadow on EPT) or a merged EPT table that L0 built | ||
6688 | * (EPT on EPT). So any problems with the structure of the | ||
6689 | * table is L0's fault. | ||
6690 | */ | ||
6554 | return 0; | 6691 | return 0; |
6555 | case EXIT_REASON_PREEMPTION_TIMER: | 6692 | case EXIT_REASON_PREEMPTION_TIMER: |
6556 | return vmcs12->pin_based_vm_exec_control & | 6693 | return vmcs12->pin_based_vm_exec_control & |
@@ -6638,7 +6775,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
6638 | 6775 | ||
6639 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked && | 6776 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked && |
6640 | !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis( | 6777 | !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis( |
6641 | get_vmcs12(vcpu), vcpu)))) { | 6778 | get_vmcs12(vcpu))))) { |
6642 | if (vmx_interrupt_allowed(vcpu)) { | 6779 | if (vmx_interrupt_allowed(vcpu)) { |
6643 | vmx->soft_vnmi_blocked = 0; | 6780 | vmx->soft_vnmi_blocked = 0; |
6644 | } else if (vmx->vnmi_blocked_time > 1000000000LL && | 6781 | } else if (vmx->vnmi_blocked_time > 1000000000LL && |
@@ -7326,6 +7463,48 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | |||
7326 | entry->ecx |= bit(X86_FEATURE_VMX); | 7463 | entry->ecx |= bit(X86_FEATURE_VMX); |
7327 | } | 7464 | } |
7328 | 7465 | ||
7466 | static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, | ||
7467 | struct x86_exception *fault) | ||
7468 | { | ||
7469 | struct vmcs12 *vmcs12; | ||
7470 | nested_vmx_vmexit(vcpu); | ||
7471 | vmcs12 = get_vmcs12(vcpu); | ||
7472 | |||
7473 | if (fault->error_code & PFERR_RSVD_MASK) | ||
7474 | vmcs12->vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; | ||
7475 | else | ||
7476 | vmcs12->vm_exit_reason = EXIT_REASON_EPT_VIOLATION; | ||
7477 | vmcs12->exit_qualification = vcpu->arch.exit_qualification; | ||
7478 | vmcs12->guest_physical_address = fault->address; | ||
7479 | } | ||
7480 | |||
7481 | /* Callbacks for nested_ept_init_mmu_context: */ | ||
7482 | |||
7483 | static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) | ||
7484 | { | ||
7485 | /* return the page table to be shadowed - in our case, EPT12 */ | ||
7486 | return get_vmcs12(vcpu)->ept_pointer; | ||
7487 | } | ||
7488 | |||
7489 | static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) | ||
7490 | { | ||
7491 | int r = kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu, | ||
7492 | nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT); | ||
7493 | |||
7494 | vcpu->arch.mmu.set_cr3 = vmx_set_cr3; | ||
7495 | vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3; | ||
7496 | vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; | ||
7497 | |||
7498 | vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; | ||
7499 | |||
7500 | return r; | ||
7501 | } | ||
7502 | |||
7503 | static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu) | ||
7504 | { | ||
7505 | vcpu->arch.walk_mmu = &vcpu->arch.mmu; | ||
7506 | } | ||
7507 | |||
7329 | /* | 7508 | /* |
7330 | * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested | 7509 | * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested |
7331 | * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it | 7510 | * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it |
@@ -7388,7 +7567,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7388 | vmcs12->guest_interruptibility_info); | 7567 | vmcs12->guest_interruptibility_info); |
7389 | vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); | 7568 | vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); |
7390 | kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); | 7569 | kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); |
7391 | vmcs_writel(GUEST_RFLAGS, vmcs12->guest_rflags); | 7570 | vmx_set_rflags(vcpu, vmcs12->guest_rflags); |
7392 | vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, | 7571 | vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, |
7393 | vmcs12->guest_pending_dbg_exceptions); | 7572 | vmcs12->guest_pending_dbg_exceptions); |
7394 | vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp); | 7573 | vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp); |
@@ -7508,15 +7687,24 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7508 | vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask; | 7687 | vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask; |
7509 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); | 7688 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); |
7510 | 7689 | ||
7511 | /* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer below */ | 7690 | /* L2->L1 exit controls are emulated - the hardware exit is to L0 so |
7512 | vmcs_write32(VM_EXIT_CONTROLS, | 7691 | * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER |
7513 | vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl); | 7692 | * bits are further modified by vmx_set_efer() below. |
7514 | vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls | | 7693 | */ |
7694 | vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); | ||
7695 | |||
7696 | /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are | ||
7697 | * emulated by vmx_set_efer(), below. | ||
7698 | */ | ||
7699 | vmcs_write32(VM_ENTRY_CONTROLS, | ||
7700 | (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & | ||
7701 | ~VM_ENTRY_IA32E_MODE) | | ||
7515 | (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); | 7702 | (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); |
7516 | 7703 | ||
7517 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) | 7704 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) { |
7518 | vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat); | 7705 | vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat); |
7519 | else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) | 7706 | vcpu->arch.pat = vmcs12->guest_ia32_pat; |
7707 | } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) | ||
7520 | vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); | 7708 | vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); |
7521 | 7709 | ||
7522 | 7710 | ||
@@ -7538,6 +7726,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7538 | vmx_flush_tlb(vcpu); | 7726 | vmx_flush_tlb(vcpu); |
7539 | } | 7727 | } |
7540 | 7728 | ||
7729 | if (nested_cpu_has_ept(vmcs12)) { | ||
7730 | kvm_mmu_unload(vcpu); | ||
7731 | nested_ept_init_mmu_context(vcpu); | ||
7732 | } | ||
7733 | |||
7541 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) | 7734 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) |
7542 | vcpu->arch.efer = vmcs12->guest_ia32_efer; | 7735 | vcpu->arch.efer = vmcs12->guest_ia32_efer; |
7543 | else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) | 7736 | else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) |
@@ -7565,6 +7758,16 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7565 | kvm_set_cr3(vcpu, vmcs12->guest_cr3); | 7758 | kvm_set_cr3(vcpu, vmcs12->guest_cr3); |
7566 | kvm_mmu_reset_context(vcpu); | 7759 | kvm_mmu_reset_context(vcpu); |
7567 | 7760 | ||
7761 | /* | ||
7762 | * L1 may access the L2's PDPTR, so save them to construct vmcs12 | ||
7763 | */ | ||
7764 | if (enable_ept) { | ||
7765 | vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0); | ||
7766 | vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1); | ||
7767 | vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); | ||
7768 | vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); | ||
7769 | } | ||
7770 | |||
7568 | kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp); | 7771 | kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp); |
7569 | kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip); | 7772 | kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip); |
7570 | } | 7773 | } |
@@ -7887,6 +8090,22 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7887 | vmcs12->guest_pending_dbg_exceptions = | 8090 | vmcs12->guest_pending_dbg_exceptions = |
7888 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); | 8091 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); |
7889 | 8092 | ||
8093 | /* | ||
8094 | * In some cases (usually, nested EPT), L2 is allowed to change its | ||
8095 | * own CR3 without exiting. If it has changed it, we must keep it. | ||
8096 | * Of course, if L0 is using shadow page tables, GUEST_CR3 was defined | ||
8097 | * by L0, not L1 or L2, so we mustn't unconditionally copy it to vmcs12. | ||
8098 | * | ||
8099 | * Additionally, restore L2's PDPTR to vmcs12. | ||
8100 | */ | ||
8101 | if (enable_ept) { | ||
8102 | vmcs12->guest_cr3 = vmcs_read64(GUEST_CR3); | ||
8103 | vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0); | ||
8104 | vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1); | ||
8105 | vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2); | ||
8106 | vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3); | ||
8107 | } | ||
8108 | |||
7890 | vmcs12->vm_entry_controls = | 8109 | vmcs12->vm_entry_controls = |
7891 | (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | | 8110 | (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | |
7892 | (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE); | 8111 | (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE); |
@@ -7948,6 +8167,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7948 | static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | 8167 | static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, |
7949 | struct vmcs12 *vmcs12) | 8168 | struct vmcs12 *vmcs12) |
7950 | { | 8169 | { |
8170 | struct kvm_segment seg; | ||
8171 | |||
7951 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) | 8172 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) |
7952 | vcpu->arch.efer = vmcs12->host_ia32_efer; | 8173 | vcpu->arch.efer = vmcs12->host_ia32_efer; |
7953 | else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) | 8174 | else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) |
@@ -7982,7 +8203,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | |||
7982 | vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); | 8203 | vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); |
7983 | kvm_set_cr4(vcpu, vmcs12->host_cr4); | 8204 | kvm_set_cr4(vcpu, vmcs12->host_cr4); |
7984 | 8205 | ||
7985 | /* shadow page tables on either EPT or shadow page tables */ | 8206 | if (nested_cpu_has_ept(vmcs12)) |
8207 | nested_ept_uninit_mmu_context(vcpu); | ||
8208 | |||
7986 | kvm_set_cr3(vcpu, vmcs12->host_cr3); | 8209 | kvm_set_cr3(vcpu, vmcs12->host_cr3); |
7987 | kvm_mmu_reset_context(vcpu); | 8210 | kvm_mmu_reset_context(vcpu); |
7988 | 8211 | ||
@@ -8001,23 +8224,61 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | |||
8001 | vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip); | 8224 | vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip); |
8002 | vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base); | 8225 | vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base); |
8003 | vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base); | 8226 | vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base); |
8004 | vmcs_writel(GUEST_TR_BASE, vmcs12->host_tr_base); | 8227 | |
8005 | vmcs_writel(GUEST_GS_BASE, vmcs12->host_gs_base); | 8228 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) { |
8006 | vmcs_writel(GUEST_FS_BASE, vmcs12->host_fs_base); | ||
8007 | vmcs_write16(GUEST_ES_SELECTOR, vmcs12->host_es_selector); | ||
8008 | vmcs_write16(GUEST_CS_SELECTOR, vmcs12->host_cs_selector); | ||
8009 | vmcs_write16(GUEST_SS_SELECTOR, vmcs12->host_ss_selector); | ||
8010 | vmcs_write16(GUEST_DS_SELECTOR, vmcs12->host_ds_selector); | ||
8011 | vmcs_write16(GUEST_FS_SELECTOR, vmcs12->host_fs_selector); | ||
8012 | vmcs_write16(GUEST_GS_SELECTOR, vmcs12->host_gs_selector); | ||
8013 | vmcs_write16(GUEST_TR_SELECTOR, vmcs12->host_tr_selector); | ||
8014 | |||
8015 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) | ||
8016 | vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat); | 8229 | vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat); |
8230 | vcpu->arch.pat = vmcs12->host_ia32_pat; | ||
8231 | } | ||
8017 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) | 8232 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) |
8018 | vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, | 8233 | vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, |
8019 | vmcs12->host_ia32_perf_global_ctrl); | 8234 | vmcs12->host_ia32_perf_global_ctrl); |
8020 | 8235 | ||
8236 | /* Set L1 segment info according to Intel SDM | ||
8237 | 27.5.2 Loading Host Segment and Descriptor-Table Registers */ | ||
8238 | seg = (struct kvm_segment) { | ||
8239 | .base = 0, | ||
8240 | .limit = 0xFFFFFFFF, | ||
8241 | .selector = vmcs12->host_cs_selector, | ||
8242 | .type = 11, | ||
8243 | .present = 1, | ||
8244 | .s = 1, | ||
8245 | .g = 1 | ||
8246 | }; | ||
8247 | if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) | ||
8248 | seg.l = 1; | ||
8249 | else | ||
8250 | seg.db = 1; | ||
8251 | vmx_set_segment(vcpu, &seg, VCPU_SREG_CS); | ||
8252 | seg = (struct kvm_segment) { | ||
8253 | .base = 0, | ||
8254 | .limit = 0xFFFFFFFF, | ||
8255 | .type = 3, | ||
8256 | .present = 1, | ||
8257 | .s = 1, | ||
8258 | .db = 1, | ||
8259 | .g = 1 | ||
8260 | }; | ||
8261 | seg.selector = vmcs12->host_ds_selector; | ||
8262 | vmx_set_segment(vcpu, &seg, VCPU_SREG_DS); | ||
8263 | seg.selector = vmcs12->host_es_selector; | ||
8264 | vmx_set_segment(vcpu, &seg, VCPU_SREG_ES); | ||
8265 | seg.selector = vmcs12->host_ss_selector; | ||
8266 | vmx_set_segment(vcpu, &seg, VCPU_SREG_SS); | ||
8267 | seg.selector = vmcs12->host_fs_selector; | ||
8268 | seg.base = vmcs12->host_fs_base; | ||
8269 | vmx_set_segment(vcpu, &seg, VCPU_SREG_FS); | ||
8270 | seg.selector = vmcs12->host_gs_selector; | ||
8271 | seg.base = vmcs12->host_gs_base; | ||
8272 | vmx_set_segment(vcpu, &seg, VCPU_SREG_GS); | ||
8273 | seg = (struct kvm_segment) { | ||
8274 | .base = vmcs12->host_tr_base, | ||
8275 | .limit = 0x67, | ||
8276 | .selector = vmcs12->host_tr_selector, | ||
8277 | .type = 11, | ||
8278 | .present = 1 | ||
8279 | }; | ||
8280 | vmx_set_segment(vcpu, &seg, VCPU_SREG_TR); | ||
8281 | |||
8021 | kvm_set_dr(vcpu, 7, 0x400); | 8282 | kvm_set_dr(vcpu, 7, 0x400); |
8022 | vmcs_write64(GUEST_IA32_DEBUGCTL, 0); | 8283 | vmcs_write64(GUEST_IA32_DEBUGCTL, 0); |
8023 | } | 8284 | } |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d21bce505315..e5ca72a5cdb6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -682,17 +682,6 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
682 | */ | 682 | */ |
683 | } | 683 | } |
684 | 684 | ||
685 | /* | ||
686 | * Does the new cr3 value map to physical memory? (Note, we | ||
687 | * catch an invalid cr3 even in real-mode, because it would | ||
688 | * cause trouble later on when we turn on paging anyway.) | ||
689 | * | ||
690 | * A real CPU would silently accept an invalid cr3 and would | ||
691 | * attempt to use it - with largely undefined (and often hard | ||
692 | * to debug) behavior on the guest side. | ||
693 | */ | ||
694 | if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) | ||
695 | return 1; | ||
696 | vcpu->arch.cr3 = cr3; | 685 | vcpu->arch.cr3 = cr3; |
697 | __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); | 686 | __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); |
698 | vcpu->arch.mmu.new_cr3(vcpu); | 687 | vcpu->arch.mmu.new_cr3(vcpu); |
@@ -850,7 +839,8 @@ static u32 msrs_to_save[] = { | |||
850 | #ifdef CONFIG_X86_64 | 839 | #ifdef CONFIG_X86_64 |
851 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, | 840 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, |
852 | #endif | 841 | #endif |
853 | MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA | 842 | MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, |
843 | MSR_IA32_FEATURE_CONTROL | ||
854 | }; | 844 | }; |
855 | 845 | ||
856 | static unsigned num_msrs_to_save; | 846 | static unsigned num_msrs_to_save; |
@@ -1457,6 +1447,29 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm) | |||
1457 | #endif | 1447 | #endif |
1458 | } | 1448 | } |
1459 | 1449 | ||
1450 | static void kvm_gen_update_masterclock(struct kvm *kvm) | ||
1451 | { | ||
1452 | #ifdef CONFIG_X86_64 | ||
1453 | int i; | ||
1454 | struct kvm_vcpu *vcpu; | ||
1455 | struct kvm_arch *ka = &kvm->arch; | ||
1456 | |||
1457 | spin_lock(&ka->pvclock_gtod_sync_lock); | ||
1458 | kvm_make_mclock_inprogress_request(kvm); | ||
1459 | /* no guest entries from this point */ | ||
1460 | pvclock_update_vm_gtod_copy(kvm); | ||
1461 | |||
1462 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
1463 | set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); | ||
1464 | |||
1465 | /* guest entries allowed */ | ||
1466 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
1467 | clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests); | ||
1468 | |||
1469 | spin_unlock(&ka->pvclock_gtod_sync_lock); | ||
1470 | #endif | ||
1471 | } | ||
1472 | |||
1460 | static int kvm_guest_time_update(struct kvm_vcpu *v) | 1473 | static int kvm_guest_time_update(struct kvm_vcpu *v) |
1461 | { | 1474 | { |
1462 | unsigned long flags, this_tsc_khz; | 1475 | unsigned long flags, this_tsc_khz; |
@@ -3806,6 +3819,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3806 | delta = user_ns.clock - now_ns; | 3819 | delta = user_ns.clock - now_ns; |
3807 | local_irq_enable(); | 3820 | local_irq_enable(); |
3808 | kvm->arch.kvmclock_offset = delta; | 3821 | kvm->arch.kvmclock_offset = delta; |
3822 | kvm_gen_update_masterclock(kvm); | ||
3809 | break; | 3823 | break; |
3810 | } | 3824 | } |
3811 | case KVM_GET_CLOCK: { | 3825 | case KVM_GET_CLOCK: { |
@@ -4955,6 +4969,97 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, | |||
4955 | static int complete_emulated_mmio(struct kvm_vcpu *vcpu); | 4969 | static int complete_emulated_mmio(struct kvm_vcpu *vcpu); |
4956 | static int complete_emulated_pio(struct kvm_vcpu *vcpu); | 4970 | static int complete_emulated_pio(struct kvm_vcpu *vcpu); |
4957 | 4971 | ||
4972 | static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7, | ||
4973 | unsigned long *db) | ||
4974 | { | ||
4975 | u32 dr6 = 0; | ||
4976 | int i; | ||
4977 | u32 enable, rwlen; | ||
4978 | |||
4979 | enable = dr7; | ||
4980 | rwlen = dr7 >> 16; | ||
4981 | for (i = 0; i < 4; i++, enable >>= 2, rwlen >>= 4) | ||
4982 | if ((enable & 3) && (rwlen & 15) == type && db[i] == addr) | ||
4983 | dr6 |= (1 << i); | ||
4984 | return dr6; | ||
4985 | } | ||
4986 | |||
4987 | static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, int *r) | ||
4988 | { | ||
4989 | struct kvm_run *kvm_run = vcpu->run; | ||
4990 | |||
4991 | /* | ||
4992 | * Use the "raw" value to see if TF was passed to the processor. | ||
4993 | * Note that the new value of the flags has not been saved yet. | ||
4994 | * | ||
4995 | * This is correct even for TF set by the guest, because "the | ||
4996 | * processor will not generate this exception after the instruction | ||
4997 | * that sets the TF flag". | ||
4998 | */ | ||
4999 | unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); | ||
5000 | |||
5001 | if (unlikely(rflags & X86_EFLAGS_TF)) { | ||
5002 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { | ||
5003 | kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1; | ||
5004 | kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; | ||
5005 | kvm_run->debug.arch.exception = DB_VECTOR; | ||
5006 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | ||
5007 | *r = EMULATE_USER_EXIT; | ||
5008 | } else { | ||
5009 | vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF; | ||
5010 | /* | ||
5011 | * "Certain debug exceptions may clear bit 0-3. The | ||
5012 | * remaining contents of the DR6 register are never | ||
5013 | * cleared by the processor". | ||
5014 | */ | ||
5015 | vcpu->arch.dr6 &= ~15; | ||
5016 | vcpu->arch.dr6 |= DR6_BS; | ||
5017 | kvm_queue_exception(vcpu, DB_VECTOR); | ||
5018 | } | ||
5019 | } | ||
5020 | } | ||
5021 | |||
5022 | static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) | ||
5023 | { | ||
5024 | struct kvm_run *kvm_run = vcpu->run; | ||
5025 | unsigned long eip = vcpu->arch.emulate_ctxt.eip; | ||
5026 | u32 dr6 = 0; | ||
5027 | |||
5028 | if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) && | ||
5029 | (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) { | ||
5030 | dr6 = kvm_vcpu_check_hw_bp(eip, 0, | ||
5031 | vcpu->arch.guest_debug_dr7, | ||
5032 | vcpu->arch.eff_db); | ||
5033 | |||
5034 | if (dr6 != 0) { | ||
5035 | kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1; | ||
5036 | kvm_run->debug.arch.pc = kvm_rip_read(vcpu) + | ||
5037 | get_segment_base(vcpu, VCPU_SREG_CS); | ||
5038 | |||
5039 | kvm_run->debug.arch.exception = DB_VECTOR; | ||
5040 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | ||
5041 | *r = EMULATE_USER_EXIT; | ||
5042 | return true; | ||
5043 | } | ||
5044 | } | ||
5045 | |||
5046 | if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK)) { | ||
5047 | dr6 = kvm_vcpu_check_hw_bp(eip, 0, | ||
5048 | vcpu->arch.dr7, | ||
5049 | vcpu->arch.db); | ||
5050 | |||
5051 | if (dr6 != 0) { | ||
5052 | vcpu->arch.dr6 &= ~15; | ||
5053 | vcpu->arch.dr6 |= dr6; | ||
5054 | kvm_queue_exception(vcpu, DB_VECTOR); | ||
5055 | *r = EMULATE_DONE; | ||
5056 | return true; | ||
5057 | } | ||
5058 | } | ||
5059 | |||
5060 | return false; | ||
5061 | } | ||
5062 | |||
4958 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, | 5063 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, |
4959 | unsigned long cr2, | 5064 | unsigned long cr2, |
4960 | int emulation_type, | 5065 | int emulation_type, |
@@ -4975,6 +5080,16 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4975 | 5080 | ||
4976 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { | 5081 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { |
4977 | init_emulate_ctxt(vcpu); | 5082 | init_emulate_ctxt(vcpu); |
5083 | |||
5084 | /* | ||
5085 | * We will reenter on the same instruction since | ||
5086 | * we do not set complete_userspace_io. This does not | ||
5087 | * handle watchpoints yet, those would be handled in | ||
5088 | * the emulate_ops. | ||
5089 | */ | ||
5090 | if (kvm_vcpu_check_breakpoint(vcpu, &r)) | ||
5091 | return r; | ||
5092 | |||
4978 | ctxt->interruptibility = 0; | 5093 | ctxt->interruptibility = 0; |
4979 | ctxt->have_exception = false; | 5094 | ctxt->have_exception = false; |
4980 | ctxt->perm_ok = false; | 5095 | ctxt->perm_ok = false; |
@@ -5031,17 +5146,18 @@ restart: | |||
5031 | inject_emulated_exception(vcpu); | 5146 | inject_emulated_exception(vcpu); |
5032 | r = EMULATE_DONE; | 5147 | r = EMULATE_DONE; |
5033 | } else if (vcpu->arch.pio.count) { | 5148 | } else if (vcpu->arch.pio.count) { |
5034 | if (!vcpu->arch.pio.in) | 5149 | if (!vcpu->arch.pio.in) { |
5150 | /* FIXME: return into emulator if single-stepping. */ | ||
5035 | vcpu->arch.pio.count = 0; | 5151 | vcpu->arch.pio.count = 0; |
5036 | else { | 5152 | } else { |
5037 | writeback = false; | 5153 | writeback = false; |
5038 | vcpu->arch.complete_userspace_io = complete_emulated_pio; | 5154 | vcpu->arch.complete_userspace_io = complete_emulated_pio; |
5039 | } | 5155 | } |
5040 | r = EMULATE_DO_MMIO; | 5156 | r = EMULATE_USER_EXIT; |
5041 | } else if (vcpu->mmio_needed) { | 5157 | } else if (vcpu->mmio_needed) { |
5042 | if (!vcpu->mmio_is_write) | 5158 | if (!vcpu->mmio_is_write) |
5043 | writeback = false; | 5159 | writeback = false; |
5044 | r = EMULATE_DO_MMIO; | 5160 | r = EMULATE_USER_EXIT; |
5045 | vcpu->arch.complete_userspace_io = complete_emulated_mmio; | 5161 | vcpu->arch.complete_userspace_io = complete_emulated_mmio; |
5046 | } else if (r == EMULATION_RESTART) | 5162 | } else if (r == EMULATION_RESTART) |
5047 | goto restart; | 5163 | goto restart; |
@@ -5050,10 +5166,12 @@ restart: | |||
5050 | 5166 | ||
5051 | if (writeback) { | 5167 | if (writeback) { |
5052 | toggle_interruptibility(vcpu, ctxt->interruptibility); | 5168 | toggle_interruptibility(vcpu, ctxt->interruptibility); |
5053 | kvm_set_rflags(vcpu, ctxt->eflags); | ||
5054 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 5169 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
5055 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; | 5170 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; |
5056 | kvm_rip_write(vcpu, ctxt->eip); | 5171 | kvm_rip_write(vcpu, ctxt->eip); |
5172 | if (r == EMULATE_DONE) | ||
5173 | kvm_vcpu_check_singlestep(vcpu, &r); | ||
5174 | kvm_set_rflags(vcpu, ctxt->eflags); | ||
5057 | } else | 5175 | } else |
5058 | vcpu->arch.emulate_regs_need_sync_to_vcpu = true; | 5176 | vcpu->arch.emulate_regs_need_sync_to_vcpu = true; |
5059 | 5177 | ||
@@ -5347,7 +5465,7 @@ static struct notifier_block pvclock_gtod_notifier = { | |||
5347 | int kvm_arch_init(void *opaque) | 5465 | int kvm_arch_init(void *opaque) |
5348 | { | 5466 | { |
5349 | int r; | 5467 | int r; |
5350 | struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; | 5468 | struct kvm_x86_ops *ops = opaque; |
5351 | 5469 | ||
5352 | if (kvm_x86_ops) { | 5470 | if (kvm_x86_ops) { |
5353 | printk(KERN_ERR "kvm: already loaded the other module\n"); | 5471 | printk(KERN_ERR "kvm: already loaded the other module\n"); |
@@ -5495,6 +5613,23 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | |||
5495 | return 1; | 5613 | return 1; |
5496 | } | 5614 | } |
5497 | 5615 | ||
5616 | /* | ||
5617 | * kvm_pv_kick_cpu_op: Kick a vcpu. | ||
5618 | * | ||
5619 | * @apicid - apicid of vcpu to be kicked. | ||
5620 | */ | ||
5621 | static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) | ||
5622 | { | ||
5623 | struct kvm_lapic_irq lapic_irq; | ||
5624 | |||
5625 | lapic_irq.shorthand = 0; | ||
5626 | lapic_irq.dest_mode = 0; | ||
5627 | lapic_irq.dest_id = apicid; | ||
5628 | |||
5629 | lapic_irq.delivery_mode = APIC_DM_REMRD; | ||
5630 | kvm_irq_delivery_to_apic(kvm, 0, &lapic_irq, NULL); | ||
5631 | } | ||
5632 | |||
5498 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | 5633 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) |
5499 | { | 5634 | { |
5500 | unsigned long nr, a0, a1, a2, a3, ret; | 5635 | unsigned long nr, a0, a1, a2, a3, ret; |
@@ -5528,6 +5663,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | |||
5528 | case KVM_HC_VAPIC_POLL_IRQ: | 5663 | case KVM_HC_VAPIC_POLL_IRQ: |
5529 | ret = 0; | 5664 | ret = 0; |
5530 | break; | 5665 | break; |
5666 | case KVM_HC_KICK_CPU: | ||
5667 | kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1); | ||
5668 | ret = 0; | ||
5669 | break; | ||
5531 | default: | 5670 | default: |
5532 | ret = -KVM_ENOSYS; | 5671 | ret = -KVM_ENOSYS; |
5533 | break; | 5672 | break; |
@@ -5689,29 +5828,6 @@ static void process_nmi(struct kvm_vcpu *vcpu) | |||
5689 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 5828 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
5690 | } | 5829 | } |
5691 | 5830 | ||
5692 | static void kvm_gen_update_masterclock(struct kvm *kvm) | ||
5693 | { | ||
5694 | #ifdef CONFIG_X86_64 | ||
5695 | int i; | ||
5696 | struct kvm_vcpu *vcpu; | ||
5697 | struct kvm_arch *ka = &kvm->arch; | ||
5698 | |||
5699 | spin_lock(&ka->pvclock_gtod_sync_lock); | ||
5700 | kvm_make_mclock_inprogress_request(kvm); | ||
5701 | /* no guest entries from this point */ | ||
5702 | pvclock_update_vm_gtod_copy(kvm); | ||
5703 | |||
5704 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
5705 | set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); | ||
5706 | |||
5707 | /* guest entries allowed */ | ||
5708 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
5709 | clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests); | ||
5710 | |||
5711 | spin_unlock(&ka->pvclock_gtod_sync_lock); | ||
5712 | #endif | ||
5713 | } | ||
5714 | |||
5715 | static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) | 5831 | static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) |
5716 | { | 5832 | { |
5717 | u64 eoi_exit_bitmap[4]; | 5833 | u64 eoi_exit_bitmap[4]; |
@@ -5950,6 +6066,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
5950 | kvm_apic_accept_events(vcpu); | 6066 | kvm_apic_accept_events(vcpu); |
5951 | switch(vcpu->arch.mp_state) { | 6067 | switch(vcpu->arch.mp_state) { |
5952 | case KVM_MP_STATE_HALTED: | 6068 | case KVM_MP_STATE_HALTED: |
6069 | vcpu->arch.pv.pv_unhalted = false; | ||
5953 | vcpu->arch.mp_state = | 6070 | vcpu->arch.mp_state = |
5954 | KVM_MP_STATE_RUNNABLE; | 6071 | KVM_MP_STATE_RUNNABLE; |
5955 | case KVM_MP_STATE_RUNNABLE: | 6072 | case KVM_MP_STATE_RUNNABLE: |
@@ -6061,6 +6178,8 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) | |||
6061 | 6178 | ||
6062 | if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) { | 6179 | if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) { |
6063 | vcpu->mmio_needed = 0; | 6180 | vcpu->mmio_needed = 0; |
6181 | |||
6182 | /* FIXME: return into emulator if single-stepping. */ | ||
6064 | if (vcpu->mmio_is_write) | 6183 | if (vcpu->mmio_is_write) |
6065 | return 1; | 6184 | return 1; |
6066 | vcpu->mmio_read_completed = 1; | 6185 | vcpu->mmio_read_completed = 1; |
@@ -6249,7 +6368,12 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | |||
6249 | struct kvm_mp_state *mp_state) | 6368 | struct kvm_mp_state *mp_state) |
6250 | { | 6369 | { |
6251 | kvm_apic_accept_events(vcpu); | 6370 | kvm_apic_accept_events(vcpu); |
6252 | mp_state->mp_state = vcpu->arch.mp_state; | 6371 | if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED && |
6372 | vcpu->arch.pv.pv_unhalted) | ||
6373 | mp_state->mp_state = KVM_MP_STATE_RUNNABLE; | ||
6374 | else | ||
6375 | mp_state->mp_state = vcpu->arch.mp_state; | ||
6376 | |||
6253 | return 0; | 6377 | return 0; |
6254 | } | 6378 | } |
6255 | 6379 | ||
@@ -6770,6 +6894,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
6770 | BUG_ON(vcpu->kvm == NULL); | 6894 | BUG_ON(vcpu->kvm == NULL); |
6771 | kvm = vcpu->kvm; | 6895 | kvm = vcpu->kvm; |
6772 | 6896 | ||
6897 | vcpu->arch.pv.pv_unhalted = false; | ||
6773 | vcpu->arch.emulate_ctxt.ops = &emulate_ops; | 6898 | vcpu->arch.emulate_ctxt.ops = &emulate_ops; |
6774 | if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu)) | 6899 | if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu)) |
6775 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 6900 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
@@ -7019,6 +7144,15 @@ out_free: | |||
7019 | return -ENOMEM; | 7144 | return -ENOMEM; |
7020 | } | 7145 | } |
7021 | 7146 | ||
7147 | void kvm_arch_memslots_updated(struct kvm *kvm) | ||
7148 | { | ||
7149 | /* | ||
7150 | * memslots->generation has been incremented. | ||
7151 | * mmio generation may have reached its maximum value. | ||
7152 | */ | ||
7153 | kvm_mmu_invalidate_mmio_sptes(kvm); | ||
7154 | } | ||
7155 | |||
7022 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 7156 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
7023 | struct kvm_memory_slot *memslot, | 7157 | struct kvm_memory_slot *memslot, |
7024 | struct kvm_userspace_memory_region *mem, | 7158 | struct kvm_userspace_memory_region *mem, |
@@ -7079,11 +7213,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
7079 | */ | 7213 | */ |
7080 | if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) | 7214 | if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) |
7081 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | 7215 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); |
7082 | /* | ||
7083 | * If memory slot is created, or moved, we need to clear all | ||
7084 | * mmio sptes. | ||
7085 | */ | ||
7086 | kvm_mmu_invalidate_mmio_sptes(kvm); | ||
7087 | } | 7216 | } |
7088 | 7217 | ||
7089 | void kvm_arch_flush_shadow_all(struct kvm *kvm) | 7218 | void kvm_arch_flush_shadow_all(struct kvm *kvm) |
@@ -7103,6 +7232,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | |||
7103 | !vcpu->arch.apf.halted) | 7232 | !vcpu->arch.apf.halted) |
7104 | || !list_empty_careful(&vcpu->async_pf.done) | 7233 | || !list_empty_careful(&vcpu->async_pf.done) |
7105 | || kvm_apic_has_events(vcpu) | 7234 | || kvm_apic_has_events(vcpu) |
7235 | || vcpu->arch.pv.pv_unhalted | ||
7106 | || atomic_read(&vcpu->arch.nmi_queued) || | 7236 | || atomic_read(&vcpu->arch.nmi_queued) || |
7107 | (kvm_arch_interrupt_allowed(vcpu) && | 7237 | (kvm_arch_interrupt_allowed(vcpu) && |
7108 | kvm_cpu_has_interrupt(vcpu)); | 7238 | kvm_cpu_has_interrupt(vcpu)); |
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index c74436e687bf..72074d528400 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c | |||
@@ -85,15 +85,18 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
85 | cycle_t ret; | 85 | cycle_t ret; |
86 | u64 last; | 86 | u64 last; |
87 | u32 version; | 87 | u32 version; |
88 | u32 migrate_count; | ||
89 | u8 flags; | 88 | u8 flags; |
90 | unsigned cpu, cpu1; | 89 | unsigned cpu, cpu1; |
91 | 90 | ||
92 | 91 | ||
93 | /* | 92 | /* |
94 | * When looping to get a consistent (time-info, tsc) pair, we | 93 | * Note: hypervisor must guarantee that: |
95 | * also need to deal with the possibility we can switch vcpus, | 94 | * 1. cpu ID number maps 1:1 to per-CPU pvclock time info. |
96 | * so make sure we always re-fetch time-info for the current vcpu. | 95 | * 2. that per-CPU pvclock time info is updated if the |
96 | * underlying CPU changes. | ||
97 | * 3. that version is increased whenever underlying CPU | ||
98 | * changes. | ||
99 | * | ||
97 | */ | 100 | */ |
98 | do { | 101 | do { |
99 | cpu = __getcpu() & VGETCPU_CPU_MASK; | 102 | cpu = __getcpu() & VGETCPU_CPU_MASK; |
@@ -104,8 +107,6 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
104 | 107 | ||
105 | pvti = get_pvti(cpu); | 108 | pvti = get_pvti(cpu); |
106 | 109 | ||
107 | migrate_count = pvti->migrate_count; | ||
108 | |||
109 | version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); | 110 | version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); |
110 | 111 | ||
111 | /* | 112 | /* |
@@ -117,8 +118,7 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
117 | cpu1 = __getcpu() & VGETCPU_CPU_MASK; | 118 | cpu1 = __getcpu() & VGETCPU_CPU_MASK; |
118 | } while (unlikely(cpu != cpu1 || | 119 | } while (unlikely(cpu != cpu1 || |
119 | (pvti->pvti.version & 1) || | 120 | (pvti->pvti.version & 1) || |
120 | pvti->pvti.version != version || | 121 | pvti->pvti.version != version)); |
121 | pvti->migrate_count != migrate_count)); | ||
122 | 122 | ||
123 | if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) | 123 | if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) |
124 | *mode = VCLOCK_NONE; | 124 | *mode = VCLOCK_NONE; |
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index 5daa2599ed48..e373671652b0 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig | |||
@@ -200,11 +200,9 @@ config DMA_SHARED_BUFFER | |||
200 | APIs extension; the file's descriptor can then be passed on to other | 200 | APIs extension; the file's descriptor can then be passed on to other |
201 | driver. | 201 | driver. |
202 | 202 | ||
203 | config CMA | 203 | config DMA_CMA |
204 | bool "Contiguous Memory Allocator" | 204 | bool "DMA Contiguous Memory Allocator" |
205 | depends on HAVE_DMA_CONTIGUOUS && HAVE_MEMBLOCK | 205 | depends on HAVE_DMA_CONTIGUOUS && CMA |
206 | select MIGRATION | ||
207 | select MEMORY_ISOLATION | ||
208 | help | 206 | help |
209 | This enables the Contiguous Memory Allocator which allows drivers | 207 | This enables the Contiguous Memory Allocator which allows drivers |
210 | to allocate big physically-contiguous blocks of memory for use with | 208 | to allocate big physically-contiguous blocks of memory for use with |
@@ -213,17 +211,7 @@ config CMA | |||
213 | For more information see <include/linux/dma-contiguous.h>. | 211 | For more information see <include/linux/dma-contiguous.h>. |
214 | If unsure, say "n". | 212 | If unsure, say "n". |
215 | 213 | ||
216 | if CMA | 214 | if DMA_CMA |
217 | |||
218 | config CMA_DEBUG | ||
219 | bool "CMA debug messages (DEVELOPMENT)" | ||
220 | depends on DEBUG_KERNEL | ||
221 | help | ||
222 | Turns on debug messages in CMA. This produces KERN_DEBUG | ||
223 | messages for every CMA call as well as various messages while | ||
224 | processing calls such as dma_alloc_from_contiguous(). | ||
225 | This option does not affect warning and error messages. | ||
226 | |||
227 | comment "Default contiguous memory area size:" | 215 | comment "Default contiguous memory area size:" |
228 | 216 | ||
229 | config CMA_SIZE_MBYTES | 217 | config CMA_SIZE_MBYTES |
diff --git a/drivers/base/Makefile b/drivers/base/Makefile index 48029aa477d9..94e8a80e87f8 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile | |||
@@ -6,7 +6,7 @@ obj-y := core.o bus.o dd.o syscore.o \ | |||
6 | attribute_container.o transport_class.o \ | 6 | attribute_container.o transport_class.o \ |
7 | topology.o | 7 | topology.o |
8 | obj-$(CONFIG_DEVTMPFS) += devtmpfs.o | 8 | obj-$(CONFIG_DEVTMPFS) += devtmpfs.o |
9 | obj-$(CONFIG_CMA) += dma-contiguous.o | 9 | obj-$(CONFIG_DMA_CMA) += dma-contiguous.o |
10 | obj-y += power/ | 10 | obj-y += power/ |
11 | obj-$(CONFIG_HAS_DMA) += dma-mapping.o | 11 | obj-$(CONFIG_HAS_DMA) += dma-mapping.o |
12 | obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o | 12 | obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o |
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 343744e4809c..7e2d15837b02 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h | |||
@@ -26,7 +26,7 @@ | |||
26 | #include <linux/types.h> | 26 | #include <linux/types.h> |
27 | #include <linux/irqchip/arm-gic.h> | 27 | #include <linux/irqchip/arm-gic.h> |
28 | 28 | ||
29 | #define VGIC_NR_IRQS 128 | 29 | #define VGIC_NR_IRQS 256 |
30 | #define VGIC_NR_SGIS 16 | 30 | #define VGIC_NR_SGIS 16 |
31 | #define VGIC_NR_PPIS 16 | 31 | #define VGIC_NR_PPIS 16 |
32 | #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) | 32 | #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) |
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index 01b5c84be828..00141d3325fe 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h | |||
@@ -57,7 +57,7 @@ struct cma; | |||
57 | struct page; | 57 | struct page; |
58 | struct device; | 58 | struct device; |
59 | 59 | ||
60 | #ifdef CONFIG_CMA | 60 | #ifdef CONFIG_DMA_CMA |
61 | 61 | ||
62 | /* | 62 | /* |
63 | * There is always at least global CMA area and a few optional device | 63 | * There is always at least global CMA area and a few optional device |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a63d83ebd151..ca645a01d37a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -85,6 +85,12 @@ static inline bool is_noslot_pfn(pfn_t pfn) | |||
85 | return pfn == KVM_PFN_NOSLOT; | 85 | return pfn == KVM_PFN_NOSLOT; |
86 | } | 86 | } |
87 | 87 | ||
88 | /* | ||
89 | * architectures with KVM_HVA_ERR_BAD other than PAGE_OFFSET (e.g. s390) | ||
90 | * provide own defines and kvm_is_error_hva | ||
91 | */ | ||
92 | #ifndef KVM_HVA_ERR_BAD | ||
93 | |||
88 | #define KVM_HVA_ERR_BAD (PAGE_OFFSET) | 94 | #define KVM_HVA_ERR_BAD (PAGE_OFFSET) |
89 | #define KVM_HVA_ERR_RO_BAD (PAGE_OFFSET + PAGE_SIZE) | 95 | #define KVM_HVA_ERR_RO_BAD (PAGE_OFFSET + PAGE_SIZE) |
90 | 96 | ||
@@ -93,6 +99,8 @@ static inline bool kvm_is_error_hva(unsigned long addr) | |||
93 | return addr >= PAGE_OFFSET; | 99 | return addr >= PAGE_OFFSET; |
94 | } | 100 | } |
95 | 101 | ||
102 | #endif | ||
103 | |||
96 | #define KVM_ERR_PTR_BAD_PAGE (ERR_PTR(-ENOENT)) | 104 | #define KVM_ERR_PTR_BAD_PAGE (ERR_PTR(-ENOENT)) |
97 | 105 | ||
98 | static inline bool is_error_page(struct page *page) | 106 | static inline bool is_error_page(struct page *page) |
@@ -160,8 +168,12 @@ enum kvm_bus { | |||
160 | 168 | ||
161 | int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | 169 | int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
162 | int len, const void *val); | 170 | int len, const void *val); |
171 | int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | ||
172 | int len, const void *val, long cookie); | ||
163 | int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, | 173 | int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, |
164 | void *val); | 174 | void *val); |
175 | int kvm_io_bus_read_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | ||
176 | int len, void *val, long cookie); | ||
165 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | 177 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
166 | int len, struct kvm_io_device *dev); | 178 | int len, struct kvm_io_device *dev); |
167 | int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | 179 | int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
@@ -499,6 +511,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
499 | void kvm_arch_free_memslot(struct kvm_memory_slot *free, | 511 | void kvm_arch_free_memslot(struct kvm_memory_slot *free, |
500 | struct kvm_memory_slot *dont); | 512 | struct kvm_memory_slot *dont); |
501 | int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); | 513 | int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); |
514 | void kvm_arch_memslots_updated(struct kvm *kvm); | ||
502 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 515 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
503 | struct kvm_memory_slot *memslot, | 516 | struct kvm_memory_slot *memslot, |
504 | struct kvm_userspace_memory_region *mem, | 517 | struct kvm_userspace_memory_region *mem, |
diff --git a/include/linux/sched.h b/include/linux/sched.h index f79ced719435..ce1e1c0aaa33 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -107,14 +107,6 @@ extern unsigned long this_cpu_load(void); | |||
107 | extern void calc_global_load(unsigned long ticks); | 107 | extern void calc_global_load(unsigned long ticks); |
108 | extern void update_cpu_load_nohz(void); | 108 | extern void update_cpu_load_nohz(void); |
109 | 109 | ||
110 | /* Notifier for when a task gets migrated to a new CPU */ | ||
111 | struct task_migration_notifier { | ||
112 | struct task_struct *task; | ||
113 | int from_cpu; | ||
114 | int to_cpu; | ||
115 | }; | ||
116 | extern void register_task_migration_notifier(struct notifier_block *n); | ||
117 | |||
118 | extern unsigned long get_parent_ip(unsigned long addr); | 110 | extern unsigned long get_parent_ip(unsigned long addr); |
119 | 111 | ||
120 | extern void dump_cpu_task(int cpu); | 112 | extern void dump_cpu_task(int cpu); |
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index acccd08be6c7..99c25338ede8 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h | |||
@@ -667,6 +667,7 @@ struct kvm_ppc_smmu_info { | |||
667 | #define KVM_CAP_PPC_RTAS 91 | 667 | #define KVM_CAP_PPC_RTAS 91 |
668 | #define KVM_CAP_IRQ_XICS 92 | 668 | #define KVM_CAP_IRQ_XICS 92 |
669 | #define KVM_CAP_ARM_EL1_32BIT 93 | 669 | #define KVM_CAP_ARM_EL1_32BIT 93 |
670 | #define KVM_CAP_SPAPR_MULTITCE 94 | ||
670 | 671 | ||
671 | #ifdef KVM_CAP_IRQ_ROUTING | 672 | #ifdef KVM_CAP_IRQ_ROUTING |
672 | 673 | ||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 725aa067ad63..5ac63c9a995a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -978,13 +978,6 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | |||
978 | rq->skip_clock_update = 1; | 978 | rq->skip_clock_update = 1; |
979 | } | 979 | } |
980 | 980 | ||
981 | static ATOMIC_NOTIFIER_HEAD(task_migration_notifier); | ||
982 | |||
983 | void register_task_migration_notifier(struct notifier_block *n) | ||
984 | { | ||
985 | atomic_notifier_chain_register(&task_migration_notifier, n); | ||
986 | } | ||
987 | |||
988 | #ifdef CONFIG_SMP | 981 | #ifdef CONFIG_SMP |
989 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | 982 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) |
990 | { | 983 | { |
@@ -1015,18 +1008,10 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
1015 | trace_sched_migrate_task(p, new_cpu); | 1008 | trace_sched_migrate_task(p, new_cpu); |
1016 | 1009 | ||
1017 | if (task_cpu(p) != new_cpu) { | 1010 | if (task_cpu(p) != new_cpu) { |
1018 | struct task_migration_notifier tmn; | ||
1019 | |||
1020 | if (p->sched_class->migrate_task_rq) | 1011 | if (p->sched_class->migrate_task_rq) |
1021 | p->sched_class->migrate_task_rq(p, new_cpu); | 1012 | p->sched_class->migrate_task_rq(p, new_cpu); |
1022 | p->se.nr_migrations++; | 1013 | p->se.nr_migrations++; |
1023 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0); | 1014 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0); |
1024 | |||
1025 | tmn.task = p; | ||
1026 | tmn.from_cpu = task_cpu(p); | ||
1027 | tmn.to_cpu = new_cpu; | ||
1028 | |||
1029 | atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn); | ||
1030 | } | 1015 | } |
1031 | 1016 | ||
1032 | __set_task_cpu(p, new_cpu); | 1017 | __set_task_cpu(p, new_cpu); |
diff --git a/mm/Kconfig b/mm/Kconfig index 8028dcc6615c..6cdd27043303 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -478,6 +478,30 @@ config FRONTSWAP | |||
478 | 478 | ||
479 | If unsure, say Y to enable frontswap. | 479 | If unsure, say Y to enable frontswap. |
480 | 480 | ||
481 | config CMA | ||
482 | bool "Contiguous Memory Allocator" | ||
483 | depends on HAVE_MEMBLOCK | ||
484 | select MIGRATION | ||
485 | select MEMORY_ISOLATION | ||
486 | help | ||
487 | This enables the Contiguous Memory Allocator which allows other | ||
488 | subsystems to allocate big physically-contiguous blocks of memory. | ||
489 | CMA reserves a region of memory and allows only movable pages to | ||
490 | be allocated from it. This way, the kernel can use the memory for | ||
491 | pagecache and when a subsystem requests for contiguous area, the | ||
492 | allocated pages are migrated away to serve the contiguous request. | ||
493 | |||
494 | If unsure, say "n". | ||
495 | |||
496 | config CMA_DEBUG | ||
497 | bool "CMA debug messages (DEVELOPMENT)" | ||
498 | depends on DEBUG_KERNEL && CMA | ||
499 | help | ||
500 | Turns on debug messages in CMA. This produces KERN_DEBUG | ||
501 | messages for every CMA call as well as various messages while | ||
502 | processing calls such as dma_alloc_from_contiguous(). | ||
503 | This option does not affect warning and error messages. | ||
504 | |||
481 | config ZBUD | 505 | config ZBUD |
482 | tristate | 506 | tristate |
483 | default n | 507 | default n |
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 17c5ac7d10ed..685fc72fc751 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c | |||
@@ -149,7 +149,7 @@ static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) | |||
149 | { | 149 | { |
150 | offset >>= 2; | 150 | offset >>= 2; |
151 | BUG_ON(offset > (VGIC_NR_IRQS / 4)); | 151 | BUG_ON(offset > (VGIC_NR_IRQS / 4)); |
152 | if (offset < 4) | 152 | if (offset < 8) |
153 | return x->percpu[cpuid] + offset; | 153 | return x->percpu[cpuid] + offset; |
154 | else | 154 | else |
155 | return x->shared + offset - 8; | 155 | return x->shared + offset - 8; |
@@ -432,19 +432,13 @@ static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu, | |||
432 | static u32 vgic_get_target_reg(struct kvm *kvm, int irq) | 432 | static u32 vgic_get_target_reg(struct kvm *kvm, int irq) |
433 | { | 433 | { |
434 | struct vgic_dist *dist = &kvm->arch.vgic; | 434 | struct vgic_dist *dist = &kvm->arch.vgic; |
435 | struct kvm_vcpu *vcpu; | 435 | int i; |
436 | int i, c; | ||
437 | unsigned long *bmap; | ||
438 | u32 val = 0; | 436 | u32 val = 0; |
439 | 437 | ||
440 | irq -= VGIC_NR_PRIVATE_IRQS; | 438 | irq -= VGIC_NR_PRIVATE_IRQS; |
441 | 439 | ||
442 | kvm_for_each_vcpu(c, vcpu, kvm) { | 440 | for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) |
443 | bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]); | 441 | val |= 1 << (dist->irq_spi_cpu[irq + i] + i * 8); |
444 | for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) | ||
445 | if (test_bit(irq + i, bmap)) | ||
446 | val |= 1 << (c + i * 8); | ||
447 | } | ||
448 | 442 | ||
449 | return val; | 443 | return val; |
450 | } | 444 | } |
@@ -547,8 +541,12 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, | |||
547 | struct kvm_exit_mmio *mmio, phys_addr_t offset) | 541 | struct kvm_exit_mmio *mmio, phys_addr_t offset) |
548 | { | 542 | { |
549 | u32 val; | 543 | u32 val; |
550 | u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, | 544 | u32 *reg; |
551 | vcpu->vcpu_id, offset >> 1); | 545 | |
546 | offset >>= 1; | ||
547 | reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, | ||
548 | vcpu->vcpu_id, offset); | ||
549 | |||
552 | if (offset & 2) | 550 | if (offset & 2) |
553 | val = *reg >> 16; | 551 | val = *reg >> 16; |
554 | else | 552 | else |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1580dd4ace4e..bf040c4e02b3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -102,28 +102,8 @@ static bool largepages_enabled = true; | |||
102 | 102 | ||
103 | bool kvm_is_mmio_pfn(pfn_t pfn) | 103 | bool kvm_is_mmio_pfn(pfn_t pfn) |
104 | { | 104 | { |
105 | if (pfn_valid(pfn)) { | 105 | if (pfn_valid(pfn)) |
106 | int reserved; | 106 | return PageReserved(pfn_to_page(pfn)); |
107 | struct page *tail = pfn_to_page(pfn); | ||
108 | struct page *head = compound_trans_head(tail); | ||
109 | reserved = PageReserved(head); | ||
110 | if (head != tail) { | ||
111 | /* | ||
112 | * "head" is not a dangling pointer | ||
113 | * (compound_trans_head takes care of that) | ||
114 | * but the hugepage may have been splitted | ||
115 | * from under us (and we may not hold a | ||
116 | * reference count on the head page so it can | ||
117 | * be reused before we run PageReferenced), so | ||
118 | * we've to check PageTail before returning | ||
119 | * what we just read. | ||
120 | */ | ||
121 | smp_rmb(); | ||
122 | if (PageTail(tail)) | ||
123 | return reserved; | ||
124 | } | ||
125 | return PageReserved(tail); | ||
126 | } | ||
127 | 107 | ||
128 | return true; | 108 | return true; |
129 | } | 109 | } |
@@ -731,7 +711,10 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, | |||
731 | update_memslots(slots, new, kvm->memslots->generation); | 711 | update_memslots(slots, new, kvm->memslots->generation); |
732 | rcu_assign_pointer(kvm->memslots, slots); | 712 | rcu_assign_pointer(kvm->memslots, slots); |
733 | synchronize_srcu_expedited(&kvm->srcu); | 713 | synchronize_srcu_expedited(&kvm->srcu); |
734 | return old_memslots; | 714 | |
715 | kvm_arch_memslots_updated(kvm); | ||
716 | |||
717 | return old_memslots; | ||
735 | } | 718 | } |
736 | 719 | ||
737 | /* | 720 | /* |
@@ -1893,7 +1876,7 @@ static struct file_operations kvm_vcpu_fops = { | |||
1893 | */ | 1876 | */ |
1894 | static int create_vcpu_fd(struct kvm_vcpu *vcpu) | 1877 | static int create_vcpu_fd(struct kvm_vcpu *vcpu) |
1895 | { | 1878 | { |
1896 | return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR); | 1879 | return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC); |
1897 | } | 1880 | } |
1898 | 1881 | ||
1899 | /* | 1882 | /* |
@@ -2302,7 +2285,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm, | |||
2302 | return ret; | 2285 | return ret; |
2303 | } | 2286 | } |
2304 | 2287 | ||
2305 | ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR); | 2288 | ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC); |
2306 | if (ret < 0) { | 2289 | if (ret < 0) { |
2307 | ops->destroy(dev); | 2290 | ops->destroy(dev); |
2308 | return ret; | 2291 | return ret; |
@@ -2586,7 +2569,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) | |||
2586 | return r; | 2569 | return r; |
2587 | } | 2570 | } |
2588 | #endif | 2571 | #endif |
2589 | r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); | 2572 | r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR | O_CLOEXEC); |
2590 | if (r < 0) | 2573 | if (r < 0) |
2591 | kvm_put_kvm(kvm); | 2574 | kvm_put_kvm(kvm); |
2592 | 2575 | ||
@@ -2812,11 +2795,9 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) | |||
2812 | kfree(bus); | 2795 | kfree(bus); |
2813 | } | 2796 | } |
2814 | 2797 | ||
2815 | static int kvm_io_bus_sort_cmp(const void *p1, const void *p2) | 2798 | static inline int kvm_io_bus_cmp(const struct kvm_io_range *r1, |
2799 | const struct kvm_io_range *r2) | ||
2816 | { | 2800 | { |
2817 | const struct kvm_io_range *r1 = p1; | ||
2818 | const struct kvm_io_range *r2 = p2; | ||
2819 | |||
2820 | if (r1->addr < r2->addr) | 2801 | if (r1->addr < r2->addr) |
2821 | return -1; | 2802 | return -1; |
2822 | if (r1->addr + r1->len > r2->addr + r2->len) | 2803 | if (r1->addr + r1->len > r2->addr + r2->len) |
@@ -2824,6 +2805,11 @@ static int kvm_io_bus_sort_cmp(const void *p1, const void *p2) | |||
2824 | return 0; | 2805 | return 0; |
2825 | } | 2806 | } |
2826 | 2807 | ||
2808 | static int kvm_io_bus_sort_cmp(const void *p1, const void *p2) | ||
2809 | { | ||
2810 | return kvm_io_bus_cmp(p1, p2); | ||
2811 | } | ||
2812 | |||
2827 | static int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, | 2813 | static int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, |
2828 | gpa_t addr, int len) | 2814 | gpa_t addr, int len) |
2829 | { | 2815 | { |
@@ -2857,17 +2843,54 @@ static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, | |||
2857 | 2843 | ||
2858 | off = range - bus->range; | 2844 | off = range - bus->range; |
2859 | 2845 | ||
2860 | while (off > 0 && kvm_io_bus_sort_cmp(&key, &bus->range[off-1]) == 0) | 2846 | while (off > 0 && kvm_io_bus_cmp(&key, &bus->range[off-1]) == 0) |
2861 | off--; | 2847 | off--; |
2862 | 2848 | ||
2863 | return off; | 2849 | return off; |
2864 | } | 2850 | } |
2865 | 2851 | ||
2852 | static int __kvm_io_bus_write(struct kvm_io_bus *bus, | ||
2853 | struct kvm_io_range *range, const void *val) | ||
2854 | { | ||
2855 | int idx; | ||
2856 | |||
2857 | idx = kvm_io_bus_get_first_dev(bus, range->addr, range->len); | ||
2858 | if (idx < 0) | ||
2859 | return -EOPNOTSUPP; | ||
2860 | |||
2861 | while (idx < bus->dev_count && | ||
2862 | kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { | ||
2863 | if (!kvm_iodevice_write(bus->range[idx].dev, range->addr, | ||
2864 | range->len, val)) | ||
2865 | return idx; | ||
2866 | idx++; | ||
2867 | } | ||
2868 | |||
2869 | return -EOPNOTSUPP; | ||
2870 | } | ||
2871 | |||
2866 | /* kvm_io_bus_write - called under kvm->slots_lock */ | 2872 | /* kvm_io_bus_write - called under kvm->slots_lock */ |
2867 | int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | 2873 | int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
2868 | int len, const void *val) | 2874 | int len, const void *val) |
2869 | { | 2875 | { |
2870 | int idx; | 2876 | struct kvm_io_bus *bus; |
2877 | struct kvm_io_range range; | ||
2878 | int r; | ||
2879 | |||
2880 | range = (struct kvm_io_range) { | ||
2881 | .addr = addr, | ||
2882 | .len = len, | ||
2883 | }; | ||
2884 | |||
2885 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | ||
2886 | r = __kvm_io_bus_write(bus, &range, val); | ||
2887 | return r < 0 ? r : 0; | ||
2888 | } | ||
2889 | |||
2890 | /* kvm_io_bus_write_cookie - called under kvm->slots_lock */ | ||
2891 | int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | ||
2892 | int len, const void *val, long cookie) | ||
2893 | { | ||
2871 | struct kvm_io_bus *bus; | 2894 | struct kvm_io_bus *bus; |
2872 | struct kvm_io_range range; | 2895 | struct kvm_io_range range; |
2873 | 2896 | ||
@@ -2877,14 +2900,35 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
2877 | }; | 2900 | }; |
2878 | 2901 | ||
2879 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | 2902 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); |
2880 | idx = kvm_io_bus_get_first_dev(bus, addr, len); | 2903 | |
2904 | /* First try the device referenced by cookie. */ | ||
2905 | if ((cookie >= 0) && (cookie < bus->dev_count) && | ||
2906 | (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0)) | ||
2907 | if (!kvm_iodevice_write(bus->range[cookie].dev, addr, len, | ||
2908 | val)) | ||
2909 | return cookie; | ||
2910 | |||
2911 | /* | ||
2912 | * cookie contained garbage; fall back to search and return the | ||
2913 | * correct cookie value. | ||
2914 | */ | ||
2915 | return __kvm_io_bus_write(bus, &range, val); | ||
2916 | } | ||
2917 | |||
2918 | static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range, | ||
2919 | void *val) | ||
2920 | { | ||
2921 | int idx; | ||
2922 | |||
2923 | idx = kvm_io_bus_get_first_dev(bus, range->addr, range->len); | ||
2881 | if (idx < 0) | 2924 | if (idx < 0) |
2882 | return -EOPNOTSUPP; | 2925 | return -EOPNOTSUPP; |
2883 | 2926 | ||
2884 | while (idx < bus->dev_count && | 2927 | while (idx < bus->dev_count && |
2885 | kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) { | 2928 | kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { |
2886 | if (!kvm_iodevice_write(bus->range[idx].dev, addr, len, val)) | 2929 | if (!kvm_iodevice_read(bus->range[idx].dev, range->addr, |
2887 | return 0; | 2930 | range->len, val)) |
2931 | return idx; | ||
2888 | idx++; | 2932 | idx++; |
2889 | } | 2933 | } |
2890 | 2934 | ||
@@ -2895,9 +2939,9 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
2895 | int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | 2939 | int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
2896 | int len, void *val) | 2940 | int len, void *val) |
2897 | { | 2941 | { |
2898 | int idx; | ||
2899 | struct kvm_io_bus *bus; | 2942 | struct kvm_io_bus *bus; |
2900 | struct kvm_io_range range; | 2943 | struct kvm_io_range range; |
2944 | int r; | ||
2901 | 2945 | ||
2902 | range = (struct kvm_io_range) { | 2946 | range = (struct kvm_io_range) { |
2903 | .addr = addr, | 2947 | .addr = addr, |
@@ -2905,18 +2949,36 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
2905 | }; | 2949 | }; |
2906 | 2950 | ||
2907 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | 2951 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); |
2908 | idx = kvm_io_bus_get_first_dev(bus, addr, len); | 2952 | r = __kvm_io_bus_read(bus, &range, val); |
2909 | if (idx < 0) | 2953 | return r < 0 ? r : 0; |
2910 | return -EOPNOTSUPP; | 2954 | } |
2911 | 2955 | ||
2912 | while (idx < bus->dev_count && | 2956 | /* kvm_io_bus_read_cookie - called under kvm->slots_lock */ |
2913 | kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) { | 2957 | int kvm_io_bus_read_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
2914 | if (!kvm_iodevice_read(bus->range[idx].dev, addr, len, val)) | 2958 | int len, void *val, long cookie) |
2915 | return 0; | 2959 | { |
2916 | idx++; | 2960 | struct kvm_io_bus *bus; |
2917 | } | 2961 | struct kvm_io_range range; |
2918 | 2962 | ||
2919 | return -EOPNOTSUPP; | 2963 | range = (struct kvm_io_range) { |
2964 | .addr = addr, | ||
2965 | .len = len, | ||
2966 | }; | ||
2967 | |||
2968 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | ||
2969 | |||
2970 | /* First try the device referenced by cookie. */ | ||
2971 | if ((cookie >= 0) && (cookie < bus->dev_count) && | ||
2972 | (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0)) | ||
2973 | if (!kvm_iodevice_read(bus->range[cookie].dev, addr, len, | ||
2974 | val)) | ||
2975 | return cookie; | ||
2976 | |||
2977 | /* | ||
2978 | * cookie contained garbage; fall back to search and return the | ||
2979 | * correct cookie value. | ||
2980 | */ | ||
2981 | return __kvm_io_bus_read(bus, &range, val); | ||
2920 | } | 2982 | } |
2921 | 2983 | ||
2922 | /* Caller must hold slots_lock. */ | 2984 | /* Caller must hold slots_lock. */ |