aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorAlexander Graf <agraf@suse.de>2013-04-17 09:20:38 -0400
committerAlexander Graf <agraf@suse.de>2013-04-17 09:20:38 -0400
commitfca7567c30a45962401d8d0707e6b6d7adf90f9a (patch)
tree8b67f90c33e9e67f7b7c61916796193a057e368c /arch
parentfbfba342a719b49d9cd0837202cf5365ba46ca9b (diff)
parent79558f112fc0352e057f7b5e158e3d88b8b62c60 (diff)
Merge commit 'origin/next' into kvm-ppc-next
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/kvm/arm.c3
-rw-r--r--arch/ia64/kvm/lapic.h6
-rw-r--r--arch/s390/kvm/intercept.c12
-rw-r--r--arch/s390/kvm/kvm-s390.c32
-rw-r--r--arch/s390/kvm/kvm-s390.h12
-rw-r--r--arch/s390/kvm/priv.c203
-rw-r--r--arch/x86/include/asm/entry_arch.h4
-rw-r--r--arch/x86/include/asm/hardirq.h3
-rw-r--r--arch/x86/include/asm/hw_irq.h1
-rw-r--r--arch/x86/include/asm/irq_vectors.h5
-rw-r--r--arch/x86/include/asm/kvm_host.h10
-rw-r--r--arch/x86/include/asm/vmx.h4
-rw-r--r--arch/x86/kernel/entry_64.S5
-rw-r--r--arch/x86/kernel/irq.c22
-rw-r--r--arch/x86/kernel/irqinit.c4
-rw-r--r--arch/x86/kvm/emulate.c27
-rw-r--r--arch/x86/kvm/i8254.c4
-rw-r--r--arch/x86/kvm/lapic.c141
-rw-r--r--arch/x86/kvm/lapic.h13
-rw-r--r--arch/x86/kvm/mmu.c11
-rw-r--r--arch/x86/kvm/paging_tmpl.h1
-rw-r--r--arch/x86/kvm/pmu.c14
-rw-r--r--arch/x86/kvm/svm.c12
-rw-r--r--arch/x86/kvm/vmx.c352
-rw-r--r--arch/x86/kvm/x86.c49
25 files changed, 591 insertions, 359 deletions
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index e4ad0bb01843..678596f699f3 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -805,7 +805,8 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
805 return 0; 805 return 0;
806} 806}
807 807
808int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level) 808int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
809 bool line_status)
809{ 810{
810 u32 irq = irq_level->irq; 811 u32 irq = irq_level->irq;
811 unsigned int irq_type, vcpu_idx, irq_num; 812 unsigned int irq_type, vcpu_idx, irq_num;
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
index c3e2935b6db4..c5f92a926a9a 100644
--- a/arch/ia64/kvm/lapic.h
+++ b/arch/ia64/kvm/lapic.h
@@ -27,10 +27,4 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
27#define kvm_apic_present(x) (true) 27#define kvm_apic_present(x) (true)
28#define kvm_lapic_enabled(x) (true) 28#define kvm_lapic_enabled(x) (true)
29 29
30static inline bool kvm_apic_vid_enabled(void)
31{
32 /* IA64 has no apicv supporting, do nothing here */
33 return false;
34}
35
36#endif 30#endif
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index c6ba4dfd7f1e..b7d1b2edeeb3 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -45,10 +45,8 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
45 do { 45 do {
46 rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg], 46 rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg],
47 (u64 __user *) useraddr); 47 (u64 __user *) useraddr);
48 if (rc) { 48 if (rc)
49 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 49 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
50 break;
51 }
52 useraddr += 8; 50 useraddr += 8;
53 if (reg == reg3) 51 if (reg == reg3)
54 break; 52 break;
@@ -79,10 +77,8 @@ static int handle_lctl(struct kvm_vcpu *vcpu)
79 reg = reg1; 77 reg = reg1;
80 do { 78 do {
81 rc = get_guest(vcpu, val, (u32 __user *) useraddr); 79 rc = get_guest(vcpu, val, (u32 __user *) useraddr);
82 if (rc) { 80 if (rc)
83 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 81 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
84 break;
85 }
86 vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; 82 vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
87 vcpu->arch.sie_block->gcr[reg] |= val; 83 vcpu->arch.sie_block->gcr[reg] |= val;
88 useraddr += 4; 84 useraddr += 4;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 33161b4a8280..c1c7c683fa26 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -149,6 +149,9 @@ int kvm_dev_ioctl_check_extension(long ext)
149 case KVM_CAP_MAX_VCPUS: 149 case KVM_CAP_MAX_VCPUS:
150 r = KVM_MAX_VCPUS; 150 r = KVM_MAX_VCPUS;
151 break; 151 break;
152 case KVM_CAP_NR_MEMSLOTS:
153 r = KVM_USER_MEM_SLOTS;
154 break;
152 case KVM_CAP_S390_COW: 155 case KVM_CAP_S390_COW:
153 r = MACHINE_HAS_ESOP; 156 r = MACHINE_HAS_ESOP;
154 break; 157 break;
@@ -633,8 +636,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
633 } else { 636 } else {
634 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 637 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
635 trace_kvm_s390_sie_fault(vcpu); 638 trace_kvm_s390_sie_fault(vcpu);
636 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 639 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
637 rc = 0;
638 } 640 }
639 } 641 }
640 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 642 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
@@ -978,18 +980,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
978 struct kvm_userspace_memory_region *mem, 980 struct kvm_userspace_memory_region *mem,
979 enum kvm_mr_change change) 981 enum kvm_mr_change change)
980{ 982{
981 /* A few sanity checks. We can have exactly one memory slot which has 983 /* A few sanity checks. We can have memory slots which have to be
982 to start at guest virtual zero and which has to be located at a 984 located/ended at a segment boundary (1MB). The memory in userland is
983 page boundary in userland and which has to end at a page boundary. 985 ok to be fragmented into various different vmas. It is okay to mmap()
984 The memory in userland is ok to be fragmented into various different 986 and munmap() stuff in this slot after doing this call at any time */
985 vmas. It is okay to mmap() and munmap() stuff in this slot after
986 doing this call at any time */
987
988 if (mem->slot)
989 return -EINVAL;
990
991 if (mem->guest_phys_addr)
992 return -EINVAL;
993 987
994 if (mem->userspace_addr & 0xffffful) 988 if (mem->userspace_addr & 0xffffful)
995 return -EINVAL; 989 return -EINVAL;
@@ -1007,6 +1001,16 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
1007{ 1001{
1008 int rc; 1002 int rc;
1009 1003
1004 /* If the basics of the memslot do not change, we do not want
1005 * to update the gmap. Every update causes several unnecessary
1006 * segment translation exceptions. This is usually handled just
1007 * fine by the normal fault handler + gmap, but it will also
1008 * cause faults on the prefix page of running guest CPUs.
1009 */
1010 if (old->userspace_addr == mem->userspace_addr &&
1011 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
1012 old->npages * PAGE_SIZE == mem->memory_size)
1013 return;
1010 1014
1011 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, 1015 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
1012 mem->guest_phys_addr, mem->memory_size); 1016 mem->guest_phys_addr, mem->memory_size);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 4d89d64a8161..efc14f687265 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -110,12 +110,12 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
110void kvm_s390_tasklet(unsigned long parm); 110void kvm_s390_tasklet(unsigned long parm);
111void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); 111void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
112void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu); 112void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
113int kvm_s390_inject_vm(struct kvm *kvm, 113int __must_check kvm_s390_inject_vm(struct kvm *kvm,
114 struct kvm_s390_interrupt *s390int); 114 struct kvm_s390_interrupt *s390int);
115int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, 115int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
116 struct kvm_s390_interrupt *s390int); 116 struct kvm_s390_interrupt *s390int);
117int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); 117int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
118int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); 118int __must_check kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
119struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, 119struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
120 u64 cr6, u64 schid); 120 u64 cr6, u64 schid);
121 121
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 7db2ad076f31..6bbd7b5a0bbe 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -14,6 +14,7 @@
14#include <linux/kvm.h> 14#include <linux/kvm.h>
15#include <linux/gfp.h> 15#include <linux/gfp.h>
16#include <linux/errno.h> 16#include <linux/errno.h>
17#include <linux/compat.h>
17#include <asm/asm-offsets.h> 18#include <asm/asm-offsets.h>
18#include <asm/current.h> 19#include <asm/current.h>
19#include <asm/debug.h> 20#include <asm/debug.h>
@@ -36,31 +37,24 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
36 operand2 = kvm_s390_get_base_disp_s(vcpu); 37 operand2 = kvm_s390_get_base_disp_s(vcpu);
37 38
38 /* must be word boundary */ 39 /* must be word boundary */
39 if (operand2 & 3) { 40 if (operand2 & 3)
40 kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 41 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
41 goto out;
42 }
43 42
44 /* get the value */ 43 /* get the value */
45 if (get_guest(vcpu, address, (u32 __user *) operand2)) { 44 if (get_guest(vcpu, address, (u32 __user *) operand2))
46 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 45 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
47 goto out;
48 }
49 46
50 address = address & 0x7fffe000u; 47 address = address & 0x7fffe000u;
51 48
52 /* make sure that the new value is valid memory */ 49 /* make sure that the new value is valid memory */
53 if (copy_from_guest_absolute(vcpu, &tmp, address, 1) || 50 if (copy_from_guest_absolute(vcpu, &tmp, address, 1) ||
54 (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) { 51 (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1)))
55 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 52 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
56 goto out;
57 }
58 53
59 kvm_s390_set_prefix(vcpu, address); 54 kvm_s390_set_prefix(vcpu, address);
60 55
61 VCPU_EVENT(vcpu, 5, "setting prefix to %x", address); 56 VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
62 trace_kvm_s390_handle_prefix(vcpu, 1, address); 57 trace_kvm_s390_handle_prefix(vcpu, 1, address);
63out:
64 return 0; 58 return 0;
65} 59}
66 60
@@ -74,49 +68,37 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
74 operand2 = kvm_s390_get_base_disp_s(vcpu); 68 operand2 = kvm_s390_get_base_disp_s(vcpu);
75 69
76 /* must be word boundary */ 70 /* must be word boundary */
77 if (operand2 & 3) { 71 if (operand2 & 3)
78 kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 72 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
79 goto out;
80 }
81 73
82 address = vcpu->arch.sie_block->prefix; 74 address = vcpu->arch.sie_block->prefix;
83 address = address & 0x7fffe000u; 75 address = address & 0x7fffe000u;
84 76
85 /* get the value */ 77 /* get the value */
86 if (put_guest(vcpu, address, (u32 __user *)operand2)) { 78 if (put_guest(vcpu, address, (u32 __user *)operand2))
87 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 79 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
88 goto out;
89 }
90 80
91 VCPU_EVENT(vcpu, 5, "storing prefix to %x", address); 81 VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
92 trace_kvm_s390_handle_prefix(vcpu, 0, address); 82 trace_kvm_s390_handle_prefix(vcpu, 0, address);
93out:
94 return 0; 83 return 0;
95} 84}
96 85
97static int handle_store_cpu_address(struct kvm_vcpu *vcpu) 86static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
98{ 87{
99 u64 useraddr; 88 u64 useraddr;
100 int rc;
101 89
102 vcpu->stat.instruction_stap++; 90 vcpu->stat.instruction_stap++;
103 91
104 useraddr = kvm_s390_get_base_disp_s(vcpu); 92 useraddr = kvm_s390_get_base_disp_s(vcpu);
105 93
106 if (useraddr & 1) { 94 if (useraddr & 1)
107 kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 95 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
108 goto out;
109 }
110 96
111 rc = put_guest(vcpu, vcpu->vcpu_id, (u16 __user *)useraddr); 97 if (put_guest(vcpu, vcpu->vcpu_id, (u16 __user *)useraddr))
112 if (rc) { 98 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
113 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
114 goto out;
115 }
116 99
117 VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr); 100 VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr);
118 trace_kvm_s390_handle_stap(vcpu, useraddr); 101 trace_kvm_s390_handle_stap(vcpu, useraddr);
119out:
120 return 0; 102 return 0;
121} 103}
122 104
@@ -135,10 +117,8 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
135 int cc; 117 int cc;
136 118
137 addr = kvm_s390_get_base_disp_s(vcpu); 119 addr = kvm_s390_get_base_disp_s(vcpu);
138 if (addr & 3) { 120 if (addr & 3)
139 kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 121 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
140 goto out;
141 }
142 cc = 0; 122 cc = 0;
143 inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0); 123 inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0);
144 if (!inti) 124 if (!inti)
@@ -167,7 +147,6 @@ no_interrupt:
167 /* Set condition code and we're done. */ 147 /* Set condition code and we're done. */
168 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); 148 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
169 vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44; 149 vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44;
170out:
171 return 0; 150 return 0;
172} 151}
173 152
@@ -237,12 +216,9 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
237 rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), 216 rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
238 &facility_list, sizeof(facility_list)); 217 &facility_list, sizeof(facility_list));
239 if (rc) 218 if (rc)
240 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 219 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
241 else { 220 VCPU_EVENT(vcpu, 5, "store facility list value %x", facility_list);
242 VCPU_EVENT(vcpu, 5, "store facility list value %x", 221 trace_kvm_s390_handle_stfl(vcpu, facility_list);
243 facility_list);
244 trace_kvm_s390_handle_stfl(vcpu, facility_list);
245 }
246 return 0; 222 return 0;
247} 223}
248 224
@@ -255,112 +231,80 @@ static void handle_new_psw(struct kvm_vcpu *vcpu)
255 231
256#define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA) 232#define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA)
257#define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL 233#define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL
258#define PSW_ADDR_24 0x00000000000fffffUL 234#define PSW_ADDR_24 0x0000000000ffffffUL
259#define PSW_ADDR_31 0x000000007fffffffUL 235#define PSW_ADDR_31 0x000000007fffffffUL
260 236
237static int is_valid_psw(psw_t *psw) {
238 if (psw->mask & PSW_MASK_UNASSIGNED)
239 return 0;
240 if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_BA) {
241 if (psw->addr & ~PSW_ADDR_31)
242 return 0;
243 }
244 if (!(psw->mask & PSW_MASK_ADDR_MODE) && (psw->addr & ~PSW_ADDR_24))
245 return 0;
246 if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_EA)
247 return 0;
248 return 1;
249}
250
261int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) 251int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
262{ 252{
263 u64 addr; 253 psw_t *gpsw = &vcpu->arch.sie_block->gpsw;
264 psw_compat_t new_psw; 254 psw_compat_t new_psw;
255 u64 addr;
265 256
266 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 257 if (gpsw->mask & PSW_MASK_PSTATE)
267 return kvm_s390_inject_program_int(vcpu, 258 return kvm_s390_inject_program_int(vcpu,
268 PGM_PRIVILEGED_OPERATION); 259 PGM_PRIVILEGED_OPERATION);
269
270 addr = kvm_s390_get_base_disp_s(vcpu); 260 addr = kvm_s390_get_base_disp_s(vcpu);
271 261 if (addr & 7)
272 if (addr & 7) { 262 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
273 kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 263 if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw)))
274 goto out; 264 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
275 } 265 if (!(new_psw.mask & PSW32_MASK_BASE))
276 266 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
277 if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) { 267 gpsw->mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32;
278 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 268 gpsw->mask |= new_psw.addr & PSW32_ADDR_AMODE;
279 goto out; 269 gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE;
280 } 270 if (!is_valid_psw(gpsw))
281 271 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
282 if (!(new_psw.mask & PSW32_MASK_BASE)) {
283 kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
284 goto out;
285 }
286
287 vcpu->arch.sie_block->gpsw.mask =
288 (new_psw.mask & ~PSW32_MASK_BASE) << 32;
289 vcpu->arch.sie_block->gpsw.addr = new_psw.addr;
290
291 if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) ||
292 (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) &&
293 (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) ||
294 ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) ==
295 PSW_MASK_EA)) {
296 kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
297 goto out;
298 }
299
300 handle_new_psw(vcpu); 272 handle_new_psw(vcpu);
301out:
302 return 0; 273 return 0;
303} 274}
304 275
305static int handle_lpswe(struct kvm_vcpu *vcpu) 276static int handle_lpswe(struct kvm_vcpu *vcpu)
306{ 277{
307 u64 addr;
308 psw_t new_psw; 278 psw_t new_psw;
279 u64 addr;
309 280
310 addr = kvm_s390_get_base_disp_s(vcpu); 281 addr = kvm_s390_get_base_disp_s(vcpu);
311 282 if (addr & 7)
312 if (addr & 7) { 283 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
313 kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 284 if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw)))
314 goto out; 285 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
315 } 286 vcpu->arch.sie_block->gpsw = new_psw;
316 287 if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
317 if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) { 288 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
318 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
319 goto out;
320 }
321
322 vcpu->arch.sie_block->gpsw.mask = new_psw.mask;
323 vcpu->arch.sie_block->gpsw.addr = new_psw.addr;
324
325 if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) ||
326 (((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) ==
327 PSW_MASK_BA) &&
328 (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_31)) ||
329 (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) &&
330 (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) ||
331 ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) ==
332 PSW_MASK_EA)) {
333 kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
334 goto out;
335 }
336
337 handle_new_psw(vcpu); 289 handle_new_psw(vcpu);
338out:
339 return 0; 290 return 0;
340} 291}
341 292
342static int handle_stidp(struct kvm_vcpu *vcpu) 293static int handle_stidp(struct kvm_vcpu *vcpu)
343{ 294{
344 u64 operand2; 295 u64 operand2;
345 int rc;
346 296
347 vcpu->stat.instruction_stidp++; 297 vcpu->stat.instruction_stidp++;
348 298
349 operand2 = kvm_s390_get_base_disp_s(vcpu); 299 operand2 = kvm_s390_get_base_disp_s(vcpu);
350 300
351 if (operand2 & 7) { 301 if (operand2 & 7)
352 kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 302 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
353 goto out;
354 }
355 303
356 rc = put_guest(vcpu, vcpu->arch.stidp_data, (u64 __user *)operand2); 304 if (put_guest(vcpu, vcpu->arch.stidp_data, (u64 __user *)operand2))
357 if (rc) { 305 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
358 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
359 goto out;
360 }
361 306
362 VCPU_EVENT(vcpu, 5, "%s", "store cpu id"); 307 VCPU_EVENT(vcpu, 5, "%s", "store cpu id");
363out:
364 return 0; 308 return 0;
365} 309}
366 310
@@ -400,8 +344,9 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
400 int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28; 344 int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28;
401 int sel1 = vcpu->run->s.regs.gprs[0] & 0xff; 345 int sel1 = vcpu->run->s.regs.gprs[0] & 0xff;
402 int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff; 346 int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff;
347 unsigned long mem = 0;
403 u64 operand2; 348 u64 operand2;
404 unsigned long mem; 349 int rc = 0;
405 350
406 vcpu->stat.instruction_stsi++; 351 vcpu->stat.instruction_stsi++;
407 VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); 352 VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
@@ -420,37 +365,37 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
420 case 2: 365 case 2:
421 mem = get_zeroed_page(GFP_KERNEL); 366 mem = get_zeroed_page(GFP_KERNEL);
422 if (!mem) 367 if (!mem)
423 goto out_fail; 368 goto out_no_data;
424 if (stsi((void *) mem, fc, sel1, sel2)) 369 if (stsi((void *) mem, fc, sel1, sel2))
425 goto out_mem; 370 goto out_no_data;
426 break; 371 break;
427 case 3: 372 case 3:
428 if (sel1 != 2 || sel2 != 2) 373 if (sel1 != 2 || sel2 != 2)
429 goto out_fail; 374 goto out_no_data;
430 mem = get_zeroed_page(GFP_KERNEL); 375 mem = get_zeroed_page(GFP_KERNEL);
431 if (!mem) 376 if (!mem)
432 goto out_fail; 377 goto out_no_data;
433 handle_stsi_3_2_2(vcpu, (void *) mem); 378 handle_stsi_3_2_2(vcpu, (void *) mem);
434 break; 379 break;
435 default: 380 default:
436 goto out_fail; 381 goto out_no_data;
437 } 382 }
438 383
439 if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) { 384 if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) {
440 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 385 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
441 goto out_mem; 386 goto out_exception;
442 } 387 }
443 trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); 388 trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
444 free_page(mem); 389 free_page(mem);
445 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); 390 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
446 vcpu->run->s.regs.gprs[0] = 0; 391 vcpu->run->s.regs.gprs[0] = 0;
447 return 0; 392 return 0;
448out_mem: 393out_no_data:
449 free_page(mem);
450out_fail:
451 /* condition code 3 */ 394 /* condition code 3 */
452 vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; 395 vcpu->arch.sie_block->gpsw.mask |= 3ul << 44;
453 return 0; 396out_exception:
397 free_page(mem);
398 return rc;
454} 399}
455 400
456static const intercept_handler_t b2_handlers[256] = { 401static const intercept_handler_t b2_handlers[256] = {
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 40afa0005c69..9bd4ecac72be 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -19,6 +19,10 @@ BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
19 19
20BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) 20BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
21 21
22#ifdef CONFIG_HAVE_KVM
23BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR)
24#endif
25
22/* 26/*
23 * every pentium local APIC has two 'local interrupts', with a 27 * every pentium local APIC has two 'local interrupts', with a
24 * soft-definable vector attached to both interrupts, one of 28 * soft-definable vector attached to both interrupts, one of
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 81f04cee5f74..ab0ae1aa6d0a 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -12,6 +12,9 @@ typedef struct {
12 unsigned int irq_spurious_count; 12 unsigned int irq_spurious_count;
13 unsigned int icr_read_retry_count; 13 unsigned int icr_read_retry_count;
14#endif 14#endif
15#ifdef CONFIG_HAVE_KVM
16 unsigned int kvm_posted_intr_ipis;
17#endif
15 unsigned int x86_platform_ipis; /* arch dependent */ 18 unsigned int x86_platform_ipis; /* arch dependent */
16 unsigned int apic_perf_irqs; 19 unsigned int apic_perf_irqs;
17 unsigned int apic_irq_work_irqs; 20 unsigned int apic_irq_work_irqs;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 10a78c3d3d5a..1da97efad08a 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -28,6 +28,7 @@
28/* Interrupt handlers registered during init_IRQ */ 28/* Interrupt handlers registered during init_IRQ */
29extern void apic_timer_interrupt(void); 29extern void apic_timer_interrupt(void);
30extern void x86_platform_ipi(void); 30extern void x86_platform_ipi(void);
31extern void kvm_posted_intr_ipi(void);
31extern void error_interrupt(void); 32extern void error_interrupt(void);
32extern void irq_work_interrupt(void); 33extern void irq_work_interrupt(void);
33 34
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index aac5fa62a86c..5702d7e3111d 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -102,6 +102,11 @@
102 */ 102 */
103#define X86_PLATFORM_IPI_VECTOR 0xf7 103#define X86_PLATFORM_IPI_VECTOR 0xf7
104 104
105/* Vector for KVM to deliver posted interrupt IPI */
106#ifdef CONFIG_HAVE_KVM
107#define POSTED_INTR_VECTOR 0xf2
108#endif
109
105/* 110/*
106 * IRQ work vector: 111 * IRQ work vector:
107 */ 112 */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b5a64621d5af..599f98b612d4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -94,9 +94,6 @@
94 94
95#define ASYNC_PF_PER_VCPU 64 95#define ASYNC_PF_PER_VCPU 64
96 96
97extern raw_spinlock_t kvm_lock;
98extern struct list_head vm_list;
99
100struct kvm_vcpu; 97struct kvm_vcpu;
101struct kvm; 98struct kvm;
102struct kvm_async_pf; 99struct kvm_async_pf;
@@ -704,6 +701,8 @@ struct kvm_x86_ops {
704 void (*hwapic_isr_update)(struct kvm *kvm, int isr); 701 void (*hwapic_isr_update)(struct kvm *kvm, int isr);
705 void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); 702 void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
706 void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); 703 void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
704 void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
705 void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
707 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); 706 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
708 int (*get_tdp_level)(void); 707 int (*get_tdp_level)(void);
709 u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); 708 u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
@@ -730,6 +729,7 @@ struct kvm_x86_ops {
730 int (*check_intercept)(struct kvm_vcpu *vcpu, 729 int (*check_intercept)(struct kvm_vcpu *vcpu,
731 struct x86_instruction_info *info, 730 struct x86_instruction_info *info,
732 enum x86_intercept_stage stage); 731 enum x86_intercept_stage stage);
732 void (*handle_external_intr)(struct kvm_vcpu *vcpu);
733}; 733};
734 734
735struct kvm_arch_async_pf { 735struct kvm_arch_async_pf {
@@ -798,6 +798,7 @@ enum emulation_result {
798#define EMULTYPE_TRAP_UD (1 << 1) 798#define EMULTYPE_TRAP_UD (1 << 1)
799#define EMULTYPE_SKIP (1 << 2) 799#define EMULTYPE_SKIP (1 << 2)
800#define EMULTYPE_RETRY (1 << 3) 800#define EMULTYPE_RETRY (1 << 3)
801#define EMULTYPE_NO_REEXECUTE (1 << 4)
801int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, 802int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
802 int emulation_type, void *insn, int insn_len); 803 int emulation_type, void *insn, int insn_len);
803 804
@@ -975,7 +976,6 @@ enum {
975 * Trap the fault and ignore the instruction if that happens. 976 * Trap the fault and ignore the instruction if that happens.
976 */ 977 */
977asmlinkage void kvm_spurious_fault(void); 978asmlinkage void kvm_spurious_fault(void);
978extern bool kvm_rebooting;
979 979
980#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \ 980#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \
981 "666: " insn "\n\t" \ 981 "666: " insn "\n\t" \
@@ -1030,7 +1030,7 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu);
1030void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu); 1030void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu);
1031bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr); 1031bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr);
1032int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); 1032int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
1033int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data); 1033int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
1034int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); 1034int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
1035void kvm_handle_pmu_event(struct kvm_vcpu *vcpu); 1035void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
1036void kvm_deliver_pmi(struct kvm_vcpu *vcpu); 1036void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index fc1c3134473b..6f07f1999138 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -71,6 +71,7 @@
71#define PIN_BASED_NMI_EXITING 0x00000008 71#define PIN_BASED_NMI_EXITING 0x00000008
72#define PIN_BASED_VIRTUAL_NMIS 0x00000020 72#define PIN_BASED_VIRTUAL_NMIS 0x00000020
73#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 73#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040
74#define PIN_BASED_POSTED_INTR 0x00000080
74 75
75#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 76#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016
76 77
@@ -102,6 +103,7 @@
102/* VMCS Encodings */ 103/* VMCS Encodings */
103enum vmcs_field { 104enum vmcs_field {
104 VIRTUAL_PROCESSOR_ID = 0x00000000, 105 VIRTUAL_PROCESSOR_ID = 0x00000000,
106 POSTED_INTR_NV = 0x00000002,
105 GUEST_ES_SELECTOR = 0x00000800, 107 GUEST_ES_SELECTOR = 0x00000800,
106 GUEST_CS_SELECTOR = 0x00000802, 108 GUEST_CS_SELECTOR = 0x00000802,
107 GUEST_SS_SELECTOR = 0x00000804, 109 GUEST_SS_SELECTOR = 0x00000804,
@@ -136,6 +138,8 @@ enum vmcs_field {
136 VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, 138 VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013,
137 APIC_ACCESS_ADDR = 0x00002014, 139 APIC_ACCESS_ADDR = 0x00002014,
138 APIC_ACCESS_ADDR_HIGH = 0x00002015, 140 APIC_ACCESS_ADDR_HIGH = 0x00002015,
141 POSTED_INTR_DESC_ADDR = 0x00002016,
142 POSTED_INTR_DESC_ADDR_HIGH = 0x00002017,
139 EPT_POINTER = 0x0000201a, 143 EPT_POINTER = 0x0000201a,
140 EPT_POINTER_HIGH = 0x0000201b, 144 EPT_POINTER_HIGH = 0x0000201b,
141 EOI_EXIT_BITMAP0 = 0x0000201c, 145 EOI_EXIT_BITMAP0 = 0x0000201c,
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c1d01e6ca790..727208941030 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1166,6 +1166,11 @@ apicinterrupt LOCAL_TIMER_VECTOR \
1166apicinterrupt X86_PLATFORM_IPI_VECTOR \ 1166apicinterrupt X86_PLATFORM_IPI_VECTOR \
1167 x86_platform_ipi smp_x86_platform_ipi 1167 x86_platform_ipi smp_x86_platform_ipi
1168 1168
1169#ifdef CONFIG_HAVE_KVM
1170apicinterrupt POSTED_INTR_VECTOR \
1171 kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
1172#endif
1173
1169apicinterrupt THRESHOLD_APIC_VECTOR \ 1174apicinterrupt THRESHOLD_APIC_VECTOR \
1170 threshold_interrupt smp_threshold_interrupt 1175 threshold_interrupt smp_threshold_interrupt
1171apicinterrupt THERMAL_APIC_VECTOR \ 1176apicinterrupt THERMAL_APIC_VECTOR \
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index e4595f105910..6ae6ea1d27d9 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -228,6 +228,28 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
228 set_irq_regs(old_regs); 228 set_irq_regs(old_regs);
229} 229}
230 230
231#ifdef CONFIG_HAVE_KVM
232/*
233 * Handler for POSTED_INTERRUPT_VECTOR.
234 */
235void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
236{
237 struct pt_regs *old_regs = set_irq_regs(regs);
238
239 ack_APIC_irq();
240
241 irq_enter();
242
243 exit_idle();
244
245 inc_irq_stat(kvm_posted_intr_ipis);
246
247 irq_exit();
248
249 set_irq_regs(old_regs);
250}
251#endif
252
231EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); 253EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
232 254
233#ifdef CONFIG_HOTPLUG_CPU 255#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 7dc4e459c2b3..a2a1fbc594ff 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -172,6 +172,10 @@ static void __init apic_intr_init(void)
172 172
173 /* IPI for X86 platform specific use */ 173 /* IPI for X86 platform specific use */
174 alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi); 174 alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi);
175#ifdef CONFIG_HAVE_KVM
176 /* IPI for KVM to deliver posted interrupt */
177 alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi);
178#endif
175 179
176 /* IPI vectors for APIC spurious and error interrupts */ 180 /* IPI vectors for APIC spurious and error interrupts */
177 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); 181 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a335cc6cde72..46f63b8d09f4 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -132,8 +132,9 @@
132#define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ 132#define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
133#define No64 (1<<28) 133#define No64 (1<<28)
134#define PageTable (1 << 29) /* instruction used to write page table */ 134#define PageTable (1 << 29) /* instruction used to write page table */
135#define NotImpl (1 << 30) /* instruction is not implemented */
135/* Source 2 operand type */ 136/* Source 2 operand type */
136#define Src2Shift (30) 137#define Src2Shift (31)
137#define Src2None (OpNone << Src2Shift) 138#define Src2None (OpNone << Src2Shift)
138#define Src2CL (OpCL << Src2Shift) 139#define Src2CL (OpCL << Src2Shift)
139#define Src2ImmByte (OpImmByte << Src2Shift) 140#define Src2ImmByte (OpImmByte << Src2Shift)
@@ -1578,12 +1579,21 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1578 1579
1579 memset(&seg_desc, 0, sizeof seg_desc); 1580 memset(&seg_desc, 0, sizeof seg_desc);
1580 1581
1581 if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) 1582 if (ctxt->mode == X86EMUL_MODE_REAL) {
1582 || ctxt->mode == X86EMUL_MODE_REAL) { 1583 /* set real mode segment descriptor (keep limit etc. for
1583 /* set real mode segment descriptor */ 1584 * unreal mode) */
1584 ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg); 1585 ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
1585 set_desc_base(&seg_desc, selector << 4); 1586 set_desc_base(&seg_desc, selector << 4);
1586 goto load; 1587 goto load;
1588 } else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
1589 /* VM86 needs a clean new segment descriptor */
1590 set_desc_base(&seg_desc, selector << 4);
1591 set_desc_limit(&seg_desc, 0xffff);
1592 seg_desc.type = 3;
1593 seg_desc.p = 1;
1594 seg_desc.s = 1;
1595 seg_desc.dpl = 3;
1596 goto load;
1587 } 1597 }
1588 1598
1589 rpl = selector & 3; 1599 rpl = selector & 3;
@@ -3615,7 +3625,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3615#define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } 3625#define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i }
3616#define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ 3626#define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \
3617 .check_perm = (_p) } 3627 .check_perm = (_p) }
3618#define N D(0) 3628#define N D(NotImpl)
3619#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } 3629#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
3620#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } 3630#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
3621#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } 3631#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
@@ -3713,7 +3723,7 @@ static const struct opcode group5[] = {
3713 I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), 3723 I(SrcMemFAddr | ImplicitOps | Stack, em_call_far),
3714 I(SrcMem | Stack, em_grp45), 3724 I(SrcMem | Stack, em_grp45),
3715 I(SrcMemFAddr | ImplicitOps, em_grp45), 3725 I(SrcMemFAddr | ImplicitOps, em_grp45),
3716 I(SrcMem | Stack, em_grp45), N, 3726 I(SrcMem | Stack, em_grp45), D(Undefined),
3717}; 3727};
3718 3728
3719static const struct opcode group6[] = { 3729static const struct opcode group6[] = {
@@ -4373,7 +4383,7 @@ done_prefixes:
4373 ctxt->intercept = opcode.intercept; 4383 ctxt->intercept = opcode.intercept;
4374 4384
4375 /* Unrecognised? */ 4385 /* Unrecognised? */
4376 if (ctxt->d == 0 || (ctxt->d & Undefined)) 4386 if (ctxt->d == 0 || (ctxt->d & NotImpl))
4377 return EMULATION_FAILED; 4387 return EMULATION_FAILED;
4378 4388
4379 if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn) 4389 if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn)
@@ -4511,7 +4521,8 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
4511 4521
4512 ctxt->mem_read.pos = 0; 4522 ctxt->mem_read.pos = 0;
4513 4523
4514 if (ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) { 4524 if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
4525 (ctxt->d & Undefined)) {
4515 rc = emulate_ud(ctxt); 4526 rc = emulate_ud(ctxt);
4516 goto done; 4527 goto done;
4517 } 4528 }
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index c1d30b2fc9bb..412a5aa0ef94 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -290,8 +290,8 @@ static void pit_do_work(struct kthread_work *work)
290 } 290 }
291 spin_unlock(&ps->inject_lock); 291 spin_unlock(&ps->inject_lock);
292 if (inject) { 292 if (inject) {
293 kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); 293 kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false);
294 kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); 294 kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false);
295 295
296 /* 296 /*
297 * Provides NMI watchdog support via Virtual Wire mode. 297 * Provides NMI watchdog support via Virtual Wire mode.
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index a8e9369f41c5..e29883c604ff 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void *bitmap)
94 return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 94 return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
95} 95}
96 96
97bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
98{
99 struct kvm_lapic *apic = vcpu->arch.apic;
100
101 return apic_test_vector(vector, apic->regs + APIC_ISR) ||
102 apic_test_vector(vector, apic->regs + APIC_IRR);
103}
104
97static inline void apic_set_vector(int vec, void *bitmap) 105static inline void apic_set_vector(int vec, void *bitmap)
98{ 106{
99 set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 107 set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -145,53 +153,6 @@ static inline int kvm_apic_id(struct kvm_lapic *apic)
145 return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; 153 return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
146} 154}
147 155
148void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
149 struct kvm_lapic_irq *irq,
150 u64 *eoi_exit_bitmap)
151{
152 struct kvm_lapic **dst;
153 struct kvm_apic_map *map;
154 unsigned long bitmap = 1;
155 int i;
156
157 rcu_read_lock();
158 map = rcu_dereference(vcpu->kvm->arch.apic_map);
159
160 if (unlikely(!map)) {
161 __set_bit(irq->vector, (unsigned long *)eoi_exit_bitmap);
162 goto out;
163 }
164
165 if (irq->dest_mode == 0) { /* physical mode */
166 if (irq->delivery_mode == APIC_DM_LOWEST ||
167 irq->dest_id == 0xff) {
168 __set_bit(irq->vector,
169 (unsigned long *)eoi_exit_bitmap);
170 goto out;
171 }
172 dst = &map->phys_map[irq->dest_id & 0xff];
173 } else {
174 u32 mda = irq->dest_id << (32 - map->ldr_bits);
175
176 dst = map->logical_map[apic_cluster_id(map, mda)];
177
178 bitmap = apic_logical_id(map, mda);
179 }
180
181 for_each_set_bit(i, &bitmap, 16) {
182 if (!dst[i])
183 continue;
184 if (dst[i]->vcpu == vcpu) {
185 __set_bit(irq->vector,
186 (unsigned long *)eoi_exit_bitmap);
187 break;
188 }
189 }
190
191out:
192 rcu_read_unlock();
193}
194
195static void recalculate_apic_map(struct kvm *kvm) 156static void recalculate_apic_map(struct kvm *kvm)
196{ 157{
197 struct kvm_apic_map *new, *old = NULL; 158 struct kvm_apic_map *new, *old = NULL;
@@ -256,7 +217,7 @@ out:
256 if (old) 217 if (old)
257 kfree_rcu(old, rcu); 218 kfree_rcu(old, rcu);
258 219
259 kvm_ioapic_make_eoibitmap_request(kvm); 220 kvm_vcpu_request_scan_ioapic(kvm);
260} 221}
261 222
262static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) 223static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
@@ -357,6 +318,19 @@ static u8 count_vectors(void *bitmap)
357 return count; 318 return count;
358} 319}
359 320
321void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
322{
323 u32 i, pir_val;
324 struct kvm_lapic *apic = vcpu->arch.apic;
325
326 for (i = 0; i <= 7; i++) {
327 pir_val = xchg(&pir[i], 0);
328 if (pir_val)
329 *((u32 *)(apic->regs + APIC_IRR + i * 0x10)) |= pir_val;
330 }
331}
332EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
333
360static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) 334static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic)
361{ 335{
362 apic->irr_pending = true; 336 apic->irr_pending = true;
@@ -379,6 +353,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
379 if (!apic->irr_pending) 353 if (!apic->irr_pending)
380 return -1; 354 return -1;
381 355
356 kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
382 result = apic_search_irr(apic); 357 result = apic_search_irr(apic);
383 ASSERT(result == -1 || result >= 16); 358 ASSERT(result == -1 || result >= 16);
384 359
@@ -431,14 +406,16 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
431} 406}
432 407
433static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 408static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
434 int vector, int level, int trig_mode); 409 int vector, int level, int trig_mode,
410 unsigned long *dest_map);
435 411
436int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) 412int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
413 unsigned long *dest_map)
437{ 414{
438 struct kvm_lapic *apic = vcpu->arch.apic; 415 struct kvm_lapic *apic = vcpu->arch.apic;
439 416
440 return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, 417 return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
441 irq->level, irq->trig_mode); 418 irq->level, irq->trig_mode, dest_map);
442} 419}
443 420
444static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) 421static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
@@ -505,6 +482,15 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic)
505 return result; 482 return result;
506} 483}
507 484
485void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr)
486{
487 struct kvm_lapic *apic = vcpu->arch.apic;
488 int i;
489
490 for (i = 0; i < 8; i++)
491 apic_set_reg(apic, APIC_TMR + 0x10 * i, tmr[i]);
492}
493
508static void apic_update_ppr(struct kvm_lapic *apic) 494static void apic_update_ppr(struct kvm_lapic *apic)
509{ 495{
510 u32 tpr, isrv, ppr, old_ppr; 496 u32 tpr, isrv, ppr, old_ppr;
@@ -611,7 +597,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
611} 597}
612 598
613bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, 599bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
614 struct kvm_lapic_irq *irq, int *r) 600 struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
615{ 601{
616 struct kvm_apic_map *map; 602 struct kvm_apic_map *map;
617 unsigned long bitmap = 1; 603 unsigned long bitmap = 1;
@@ -622,7 +608,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
622 *r = -1; 608 *r = -1;
623 609
624 if (irq->shorthand == APIC_DEST_SELF) { 610 if (irq->shorthand == APIC_DEST_SELF) {
625 *r = kvm_apic_set_irq(src->vcpu, irq); 611 *r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
626 return true; 612 return true;
627 } 613 }
628 614
@@ -667,7 +653,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
667 continue; 653 continue;
668 if (*r < 0) 654 if (*r < 0)
669 *r = 0; 655 *r = 0;
670 *r += kvm_apic_set_irq(dst[i]->vcpu, irq); 656 *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
671 } 657 }
672 658
673 ret = true; 659 ret = true;
@@ -681,7 +667,8 @@ out:
681 * Return 1 if successfully added and 0 if discarded. 667 * Return 1 if successfully added and 0 if discarded.
682 */ 668 */
683static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 669static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
684 int vector, int level, int trig_mode) 670 int vector, int level, int trig_mode,
671 unsigned long *dest_map)
685{ 672{
686 int result = 0; 673 int result = 0;
687 struct kvm_vcpu *vcpu = apic->vcpu; 674 struct kvm_vcpu *vcpu = apic->vcpu;
@@ -694,24 +681,28 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
694 if (unlikely(!apic_enabled(apic))) 681 if (unlikely(!apic_enabled(apic)))
695 break; 682 break;
696 683
697 if (trig_mode) { 684 if (dest_map)
698 apic_debug("level trig mode for vector %d", vector); 685 __set_bit(vcpu->vcpu_id, dest_map);
699 apic_set_vector(vector, apic->regs + APIC_TMR);
700 } else
701 apic_clear_vector(vector, apic->regs + APIC_TMR);
702 686
703 result = !apic_test_and_set_irr(vector, apic); 687 if (kvm_x86_ops->deliver_posted_interrupt) {
704 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, 688 result = 1;
705 trig_mode, vector, !result); 689 kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
706 if (!result) { 690 } else {
707 if (trig_mode) 691 result = !apic_test_and_set_irr(vector, apic);
708 apic_debug("level trig mode repeatedly for "
709 "vector %d", vector);
710 break;
711 }
712 692
713 kvm_make_request(KVM_REQ_EVENT, vcpu); 693 if (!result) {
714 kvm_vcpu_kick(vcpu); 694 if (trig_mode)
695 apic_debug("level trig mode repeatedly "
696 "for vector %d", vector);
697 goto out;
698 }
699
700 kvm_make_request(KVM_REQ_EVENT, vcpu);
701 kvm_vcpu_kick(vcpu);
702 }
703out:
704 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
705 trig_mode, vector, !result);
715 break; 706 break;
716 707
717 case APIC_DM_REMRD: 708 case APIC_DM_REMRD:
@@ -786,7 +777,7 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
786 trigger_mode = IOAPIC_LEVEL_TRIG; 777 trigger_mode = IOAPIC_LEVEL_TRIG;
787 else 778 else
788 trigger_mode = IOAPIC_EDGE_TRIG; 779 trigger_mode = IOAPIC_EDGE_TRIG;
789 kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); 780 kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
790 } 781 }
791} 782}
792 783
@@ -852,7 +843,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
852 irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, 843 irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
853 irq.vector); 844 irq.vector);
854 845
855 kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); 846 kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
856} 847}
857 848
858static u32 apic_get_tmcct(struct kvm_lapic *apic) 849static u32 apic_get_tmcct(struct kvm_lapic *apic)
@@ -1488,7 +1479,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
1488 vector = reg & APIC_VECTOR_MASK; 1479 vector = reg & APIC_VECTOR_MASK;
1489 mode = reg & APIC_MODE_MASK; 1480 mode = reg & APIC_MODE_MASK;
1490 trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; 1481 trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
1491 return __apic_accept_irq(apic, mode, vector, 1, trig_mode); 1482 return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
1483 NULL);
1492 } 1484 }
1493 return 0; 1485 return 0;
1494} 1486}
@@ -1658,6 +1650,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
1658 apic->highest_isr_cache = -1; 1650 apic->highest_isr_cache = -1;
1659 kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); 1651 kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
1660 kvm_make_request(KVM_REQ_EVENT, vcpu); 1652 kvm_make_request(KVM_REQ_EVENT, vcpu);
1653 kvm_rtc_eoi_tracking_restore_one(vcpu);
1661} 1654}
1662 1655
1663void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) 1656void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 2c721b986eec..c730ac9fe801 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -53,13 +53,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
53u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); 53u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
54void kvm_apic_set_version(struct kvm_vcpu *vcpu); 54void kvm_apic_set_version(struct kvm_vcpu *vcpu);
55 55
56void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
57void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
56int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); 58int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
57int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); 59int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
58int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); 60int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
61 unsigned long *dest_map);
59int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); 62int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
60 63
61bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, 64bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
62 struct kvm_lapic_irq *irq, int *r); 65 struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
63 66
64u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); 67u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
65void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); 68void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
@@ -160,13 +163,11 @@ static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
160 return ldr & map->lid_mask; 163 return ldr & map->lid_mask;
161} 164}
162 165
163void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu,
164 struct kvm_lapic_irq *irq,
165 u64 *eoi_bitmap);
166
167static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) 166static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
168{ 167{
169 return vcpu->arch.apic->pending_events; 168 return vcpu->arch.apic->pending_events;
170} 169}
171 170
171bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
172
172#endif 173#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 633e30cfbd63..004cc87b781c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1501,15 +1501,11 @@ static void drop_parent_pte(struct kvm_mmu_page *sp,
1501 mmu_spte_clear_no_track(parent_pte); 1501 mmu_spte_clear_no_track(parent_pte);
1502} 1502}
1503 1503
1504static void make_mmu_pages_available(struct kvm_vcpu *vcpu);
1505
1506static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, 1504static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
1507 u64 *parent_pte, int direct) 1505 u64 *parent_pte, int direct)
1508{ 1506{
1509 struct kvm_mmu_page *sp; 1507 struct kvm_mmu_page *sp;
1510 1508
1511 make_mmu_pages_available(vcpu);
1512
1513 sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); 1509 sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache);
1514 sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); 1510 sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
1515 if (!direct) 1511 if (!direct)
@@ -2806,6 +2802,7 @@ exit:
2806 2802
2807static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, 2803static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
2808 gva_t gva, pfn_t *pfn, bool write, bool *writable); 2804 gva_t gva, pfn_t *pfn, bool write, bool *writable);
2805static void make_mmu_pages_available(struct kvm_vcpu *vcpu);
2809 2806
2810static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, 2807static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
2811 gfn_t gfn, bool prefault) 2808 gfn_t gfn, bool prefault)
@@ -2847,6 +2844,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
2847 spin_lock(&vcpu->kvm->mmu_lock); 2844 spin_lock(&vcpu->kvm->mmu_lock);
2848 if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) 2845 if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
2849 goto out_unlock; 2846 goto out_unlock;
2847 make_mmu_pages_available(vcpu);
2850 if (likely(!force_pt_level)) 2848 if (likely(!force_pt_level))
2851 transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); 2849 transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
2852 r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn, 2850 r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn,
@@ -2924,6 +2922,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
2924 2922
2925 if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { 2923 if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
2926 spin_lock(&vcpu->kvm->mmu_lock); 2924 spin_lock(&vcpu->kvm->mmu_lock);
2925 make_mmu_pages_available(vcpu);
2927 sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, 2926 sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL,
2928 1, ACC_ALL, NULL); 2927 1, ACC_ALL, NULL);
2929 ++sp->root_count; 2928 ++sp->root_count;
@@ -2935,6 +2934,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
2935 2934
2936 ASSERT(!VALID_PAGE(root)); 2935 ASSERT(!VALID_PAGE(root));
2937 spin_lock(&vcpu->kvm->mmu_lock); 2936 spin_lock(&vcpu->kvm->mmu_lock);
2937 make_mmu_pages_available(vcpu);
2938 sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), 2938 sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
2939 i << 30, 2939 i << 30,
2940 PT32_ROOT_LEVEL, 1, ACC_ALL, 2940 PT32_ROOT_LEVEL, 1, ACC_ALL,
@@ -2973,6 +2973,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
2973 ASSERT(!VALID_PAGE(root)); 2973 ASSERT(!VALID_PAGE(root));
2974 2974
2975 spin_lock(&vcpu->kvm->mmu_lock); 2975 spin_lock(&vcpu->kvm->mmu_lock);
2976 make_mmu_pages_available(vcpu);
2976 sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, 2977 sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL,
2977 0, ACC_ALL, NULL); 2978 0, ACC_ALL, NULL);
2978 root = __pa(sp->spt); 2979 root = __pa(sp->spt);
@@ -3006,6 +3007,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
3006 return 1; 3007 return 1;
3007 } 3008 }
3008 spin_lock(&vcpu->kvm->mmu_lock); 3009 spin_lock(&vcpu->kvm->mmu_lock);
3010 make_mmu_pages_available(vcpu);
3009 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, 3011 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
3010 PT32_ROOT_LEVEL, 0, 3012 PT32_ROOT_LEVEL, 0,
3011 ACC_ALL, NULL); 3013 ACC_ALL, NULL);
@@ -3311,6 +3313,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
3311 spin_lock(&vcpu->kvm->mmu_lock); 3313 spin_lock(&vcpu->kvm->mmu_lock);
3312 if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) 3314 if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
3313 goto out_unlock; 3315 goto out_unlock;
3316 make_mmu_pages_available(vcpu);
3314 if (likely(!force_pt_level)) 3317 if (likely(!force_pt_level))
3315 transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); 3318 transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
3316 r = __direct_map(vcpu, gpa, write, map_writable, 3319 r = __direct_map(vcpu, gpa, write, map_writable,
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index af143f065532..da20860b457a 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -627,6 +627,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
627 goto out_unlock; 627 goto out_unlock;
628 628
629 kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); 629 kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
630 make_mmu_pages_available(vcpu);
630 if (!force_pt_level) 631 if (!force_pt_level)
631 transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); 632 transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
632 r = FNAME(fetch)(vcpu, addr, &walker, write_fault, 633 r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index cfc258a6bf97..c53e797e7369 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -360,10 +360,12 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
360 return 1; 360 return 1;
361} 361}
362 362
363int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) 363int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
364{ 364{
365 struct kvm_pmu *pmu = &vcpu->arch.pmu; 365 struct kvm_pmu *pmu = &vcpu->arch.pmu;
366 struct kvm_pmc *pmc; 366 struct kvm_pmc *pmc;
367 u32 index = msr_info->index;
368 u64 data = msr_info->data;
367 369
368 switch (index) { 370 switch (index) {
369 case MSR_CORE_PERF_FIXED_CTR_CTRL: 371 case MSR_CORE_PERF_FIXED_CTR_CTRL:
@@ -375,6 +377,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
375 } 377 }
376 break; 378 break;
377 case MSR_CORE_PERF_GLOBAL_STATUS: 379 case MSR_CORE_PERF_GLOBAL_STATUS:
380 if (msr_info->host_initiated) {
381 pmu->global_status = data;
382 return 0;
383 }
378 break; /* RO MSR */ 384 break; /* RO MSR */
379 case MSR_CORE_PERF_GLOBAL_CTRL: 385 case MSR_CORE_PERF_GLOBAL_CTRL:
380 if (pmu->global_ctrl == data) 386 if (pmu->global_ctrl == data)
@@ -386,7 +392,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
386 break; 392 break;
387 case MSR_CORE_PERF_GLOBAL_OVF_CTRL: 393 case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
388 if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) { 394 if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) {
389 pmu->global_status &= ~data; 395 if (!msr_info->host_initiated)
396 pmu->global_status &= ~data;
390 pmu->global_ovf_ctrl = data; 397 pmu->global_ovf_ctrl = data;
391 return 0; 398 return 0;
392 } 399 }
@@ -394,7 +401,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
394 default: 401 default:
395 if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) || 402 if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) ||
396 (pmc = get_fixed_pmc(pmu, index))) { 403 (pmc = get_fixed_pmc(pmu, index))) {
397 data = (s64)(s32)data; 404 if (!msr_info->host_initiated)
405 data = (s64)(s32)data;
398 pmc->counter += data - read_pmc(pmc); 406 pmc->counter += data - read_pmc(pmc);
399 return 0; 407 return 0;
400 } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { 408 } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7a46c1f46861..d6713e18bbc1 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3577,6 +3577,11 @@ static void svm_hwapic_isr_update(struct kvm *kvm, int isr)
3577 return; 3577 return;
3578} 3578}
3579 3579
3580static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu)
3581{
3582 return;
3583}
3584
3580static int svm_nmi_allowed(struct kvm_vcpu *vcpu) 3585static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
3581{ 3586{
3582 struct vcpu_svm *svm = to_svm(vcpu); 3587 struct vcpu_svm *svm = to_svm(vcpu);
@@ -4233,6 +4238,11 @@ out:
4233 return ret; 4238 return ret;
4234} 4239}
4235 4240
4241static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
4242{
4243 local_irq_enable();
4244}
4245
4236static struct kvm_x86_ops svm_x86_ops = { 4246static struct kvm_x86_ops svm_x86_ops = {
4237 .cpu_has_kvm_support = has_svm, 4247 .cpu_has_kvm_support = has_svm,
4238 .disabled_by_bios = is_disabled, 4248 .disabled_by_bios = is_disabled,
@@ -4300,6 +4310,7 @@ static struct kvm_x86_ops svm_x86_ops = {
4300 .vm_has_apicv = svm_vm_has_apicv, 4310 .vm_has_apicv = svm_vm_has_apicv,
4301 .load_eoi_exitmap = svm_load_eoi_exitmap, 4311 .load_eoi_exitmap = svm_load_eoi_exitmap,
4302 .hwapic_isr_update = svm_hwapic_isr_update, 4312 .hwapic_isr_update = svm_hwapic_isr_update,
4313 .sync_pir_to_irr = svm_sync_pir_to_irr,
4303 4314
4304 .set_tss_addr = svm_set_tss_addr, 4315 .set_tss_addr = svm_set_tss_addr,
4305 .get_tdp_level = get_npt_level, 4316 .get_tdp_level = get_npt_level,
@@ -4328,6 +4339,7 @@ static struct kvm_x86_ops svm_x86_ops = {
4328 .set_tdp_cr3 = set_tdp_cr3, 4339 .set_tdp_cr3 = set_tdp_cr3,
4329 4340
4330 .check_intercept = svm_check_intercept, 4341 .check_intercept = svm_check_intercept,
4342 .handle_external_intr = svm_handle_external_intr,
4331}; 4343};
4332 4344
4333static int __init svm_init(void) 4345static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 03f574641852..5a87a58af49d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -84,7 +84,8 @@ module_param(vmm_exclusive, bool, S_IRUGO);
84static bool __read_mostly fasteoi = 1; 84static bool __read_mostly fasteoi = 1;
85module_param(fasteoi, bool, S_IRUGO); 85module_param(fasteoi, bool, S_IRUGO);
86 86
87static bool __read_mostly enable_apicv_reg_vid; 87static bool __read_mostly enable_apicv = 1;
88module_param(enable_apicv, bool, S_IRUGO);
88 89
89/* 90/*
90 * If nested=1, nested virtualization is supported, i.e., guests may use 91 * If nested=1, nested virtualization is supported, i.e., guests may use
@@ -366,6 +367,31 @@ struct nested_vmx {
366 struct page *apic_access_page; 367 struct page *apic_access_page;
367}; 368};
368 369
370#define POSTED_INTR_ON 0
371/* Posted-Interrupt Descriptor */
372struct pi_desc {
373 u32 pir[8]; /* Posted interrupt requested */
374 u32 control; /* bit 0 of control is outstanding notification bit */
375 u32 rsvd[7];
376} __aligned(64);
377
378static bool pi_test_and_set_on(struct pi_desc *pi_desc)
379{
380 return test_and_set_bit(POSTED_INTR_ON,
381 (unsigned long *)&pi_desc->control);
382}
383
384static bool pi_test_and_clear_on(struct pi_desc *pi_desc)
385{
386 return test_and_clear_bit(POSTED_INTR_ON,
387 (unsigned long *)&pi_desc->control);
388}
389
390static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
391{
392 return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
393}
394
369struct vcpu_vmx { 395struct vcpu_vmx {
370 struct kvm_vcpu vcpu; 396 struct kvm_vcpu vcpu;
371 unsigned long host_rsp; 397 unsigned long host_rsp;
@@ -378,6 +404,7 @@ struct vcpu_vmx {
378 struct shared_msr_entry *guest_msrs; 404 struct shared_msr_entry *guest_msrs;
379 int nmsrs; 405 int nmsrs;
380 int save_nmsrs; 406 int save_nmsrs;
407 unsigned long host_idt_base;
381#ifdef CONFIG_X86_64 408#ifdef CONFIG_X86_64
382 u64 msr_host_kernel_gs_base; 409 u64 msr_host_kernel_gs_base;
383 u64 msr_guest_kernel_gs_base; 410 u64 msr_guest_kernel_gs_base;
@@ -429,6 +456,9 @@ struct vcpu_vmx {
429 456
430 bool rdtscp_enabled; 457 bool rdtscp_enabled;
431 458
459 /* Posted interrupt descriptor */
460 struct pi_desc pi_desc;
461
432 /* Support for a guest hypervisor (nested VMX) */ 462 /* Support for a guest hypervisor (nested VMX) */
433 struct nested_vmx nested; 463 struct nested_vmx nested;
434}; 464};
@@ -626,6 +656,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
626 struct kvm_segment *var, int seg); 656 struct kvm_segment *var, int seg);
627static bool guest_state_valid(struct kvm_vcpu *vcpu); 657static bool guest_state_valid(struct kvm_vcpu *vcpu);
628static u32 vmx_segment_access_rights(struct kvm_segment *var); 658static u32 vmx_segment_access_rights(struct kvm_segment *var);
659static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
629 660
630static DEFINE_PER_CPU(struct vmcs *, vmxarea); 661static DEFINE_PER_CPU(struct vmcs *, vmxarea);
631static DEFINE_PER_CPU(struct vmcs *, current_vmcs); 662static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -784,6 +815,18 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void)
784 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; 815 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
785} 816}
786 817
818static inline bool cpu_has_vmx_posted_intr(void)
819{
820 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
821}
822
823static inline bool cpu_has_vmx_apicv(void)
824{
825 return cpu_has_vmx_apic_register_virt() &&
826 cpu_has_vmx_virtual_intr_delivery() &&
827 cpu_has_vmx_posted_intr();
828}
829
787static inline bool cpu_has_vmx_flexpriority(void) 830static inline bool cpu_has_vmx_flexpriority(void)
788{ 831{
789 return cpu_has_vmx_tpr_shadow() && 832 return cpu_has_vmx_tpr_shadow() &&
@@ -2551,12 +2594,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
2551 u32 _vmexit_control = 0; 2594 u32 _vmexit_control = 0;
2552 u32 _vmentry_control = 0; 2595 u32 _vmentry_control = 0;
2553 2596
2554 min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
2555 opt = PIN_BASED_VIRTUAL_NMIS;
2556 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
2557 &_pin_based_exec_control) < 0)
2558 return -EIO;
2559
2560 min = CPU_BASED_HLT_EXITING | 2597 min = CPU_BASED_HLT_EXITING |
2561#ifdef CONFIG_X86_64 2598#ifdef CONFIG_X86_64
2562 CPU_BASED_CR8_LOAD_EXITING | 2599 CPU_BASED_CR8_LOAD_EXITING |
@@ -2627,11 +2664,23 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
2627#ifdef CONFIG_X86_64 2664#ifdef CONFIG_X86_64
2628 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; 2665 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
2629#endif 2666#endif
2630 opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT; 2667 opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT |
2668 VM_EXIT_ACK_INTR_ON_EXIT;
2631 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, 2669 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
2632 &_vmexit_control) < 0) 2670 &_vmexit_control) < 0)
2633 return -EIO; 2671 return -EIO;
2634 2672
2673 min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
2674 opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR;
2675 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
2676 &_pin_based_exec_control) < 0)
2677 return -EIO;
2678
2679 if (!(_cpu_based_2nd_exec_control &
2680 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) ||
2681 !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT))
2682 _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
2683
2635 min = 0; 2684 min = 0;
2636 opt = VM_ENTRY_LOAD_IA32_PAT; 2685 opt = VM_ENTRY_LOAD_IA32_PAT;
2637 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, 2686 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
@@ -2810,14 +2859,16 @@ static __init int hardware_setup(void)
2810 if (!cpu_has_vmx_ple()) 2859 if (!cpu_has_vmx_ple())
2811 ple_gap = 0; 2860 ple_gap = 0;
2812 2861
2813 if (!cpu_has_vmx_apic_register_virt() || 2862 if (!cpu_has_vmx_apicv())
2814 !cpu_has_vmx_virtual_intr_delivery()) 2863 enable_apicv = 0;
2815 enable_apicv_reg_vid = 0;
2816 2864
2817 if (enable_apicv_reg_vid) 2865 if (enable_apicv)
2818 kvm_x86_ops->update_cr8_intercept = NULL; 2866 kvm_x86_ops->update_cr8_intercept = NULL;
2819 else 2867 else {
2820 kvm_x86_ops->hwapic_irr_update = NULL; 2868 kvm_x86_ops->hwapic_irr_update = NULL;
2869 kvm_x86_ops->deliver_posted_interrupt = NULL;
2870 kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
2871 }
2821 2872
2822 if (nested) 2873 if (nested)
2823 nested_vmx_setup_ctls_msrs(); 2874 nested_vmx_setup_ctls_msrs();
@@ -3873,13 +3924,57 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
3873 msr, MSR_TYPE_W); 3924 msr, MSR_TYPE_W);
3874} 3925}
3875 3926
3927static int vmx_vm_has_apicv(struct kvm *kvm)
3928{
3929 return enable_apicv && irqchip_in_kernel(kvm);
3930}
3931
3932/*
3933 * Send interrupt to vcpu via posted interrupt way.
3934 * 1. If target vcpu is running(non-root mode), send posted interrupt
3935 * notification to vcpu and hardware will sync PIR to vIRR atomically.
3936 * 2. If target vcpu isn't running(root mode), kick it to pick up the
3937 * interrupt from PIR in next vmentry.
3938 */
3939static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
3940{
3941 struct vcpu_vmx *vmx = to_vmx(vcpu);
3942 int r;
3943
3944 if (pi_test_and_set_pir(vector, &vmx->pi_desc))
3945 return;
3946
3947 r = pi_test_and_set_on(&vmx->pi_desc);
3948 kvm_make_request(KVM_REQ_EVENT, vcpu);
3949 if (!r && (vcpu->mode == IN_GUEST_MODE))
3950 apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
3951 POSTED_INTR_VECTOR);
3952 else
3953 kvm_vcpu_kick(vcpu);
3954}
3955
3956static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
3957{
3958 struct vcpu_vmx *vmx = to_vmx(vcpu);
3959
3960 if (!pi_test_and_clear_on(&vmx->pi_desc))
3961 return;
3962
3963 kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
3964}
3965
3966static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu)
3967{
3968 return;
3969}
3970
3876/* 3971/*
3877 * Set up the vmcs's constant host-state fields, i.e., host-state fields that 3972 * Set up the vmcs's constant host-state fields, i.e., host-state fields that
3878 * will not change in the lifetime of the guest. 3973 * will not change in the lifetime of the guest.
3879 * Note that host-state that does change is set elsewhere. E.g., host-state 3974 * Note that host-state that does change is set elsewhere. E.g., host-state
3880 * that is set differently for each CPU is set in vmx_vcpu_load(), not here. 3975 * that is set differently for each CPU is set in vmx_vcpu_load(), not here.
3881 */ 3976 */
3882static void vmx_set_constant_host_state(void) 3977static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
3883{ 3978{
3884 u32 low32, high32; 3979 u32 low32, high32;
3885 unsigned long tmpl; 3980 unsigned long tmpl;
@@ -3907,6 +4002,7 @@ static void vmx_set_constant_host_state(void)
3907 4002
3908 native_store_idt(&dt); 4003 native_store_idt(&dt);
3909 vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ 4004 vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
4005 vmx->host_idt_base = dt.address;
3910 4006
3911 vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */ 4007 vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */
3912 4008
@@ -3932,6 +4028,15 @@ static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
3932 vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); 4028 vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
3933} 4029}
3934 4030
4031static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
4032{
4033 u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
4034
4035 if (!vmx_vm_has_apicv(vmx->vcpu.kvm))
4036 pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
4037 return pin_based_exec_ctrl;
4038}
4039
3935static u32 vmx_exec_control(struct vcpu_vmx *vmx) 4040static u32 vmx_exec_control(struct vcpu_vmx *vmx)
3936{ 4041{
3937 u32 exec_control = vmcs_config.cpu_based_exec_ctrl; 4042 u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
@@ -3949,11 +4054,6 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
3949 return exec_control; 4054 return exec_control;
3950} 4055}
3951 4056
3952static int vmx_vm_has_apicv(struct kvm *kvm)
3953{
3954 return enable_apicv_reg_vid && irqchip_in_kernel(kvm);
3955}
3956
3957static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) 4057static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
3958{ 4058{
3959 u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; 4059 u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
@@ -4009,8 +4109,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
4009 vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ 4109 vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
4010 4110
4011 /* Control */ 4111 /* Control */
4012 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, 4112 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
4013 vmcs_config.pin_based_exec_ctrl);
4014 4113
4015 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); 4114 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
4016 4115
@@ -4019,13 +4118,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
4019 vmx_secondary_exec_control(vmx)); 4118 vmx_secondary_exec_control(vmx));
4020 } 4119 }
4021 4120
4022 if (enable_apicv_reg_vid) { 4121 if (vmx_vm_has_apicv(vmx->vcpu.kvm)) {
4023 vmcs_write64(EOI_EXIT_BITMAP0, 0); 4122 vmcs_write64(EOI_EXIT_BITMAP0, 0);
4024 vmcs_write64(EOI_EXIT_BITMAP1, 0); 4123 vmcs_write64(EOI_EXIT_BITMAP1, 0);
4025 vmcs_write64(EOI_EXIT_BITMAP2, 0); 4124 vmcs_write64(EOI_EXIT_BITMAP2, 0);
4026 vmcs_write64(EOI_EXIT_BITMAP3, 0); 4125 vmcs_write64(EOI_EXIT_BITMAP3, 0);
4027 4126
4028 vmcs_write16(GUEST_INTR_STATUS, 0); 4127 vmcs_write16(GUEST_INTR_STATUS, 0);
4128
4129 vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
4130 vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
4029 } 4131 }
4030 4132
4031 if (ple_gap) { 4133 if (ple_gap) {
@@ -4039,7 +4141,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
4039 4141
4040 vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ 4142 vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */
4041 vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ 4143 vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */
4042 vmx_set_constant_host_state(); 4144 vmx_set_constant_host_state(vmx);
4043#ifdef CONFIG_X86_64 4145#ifdef CONFIG_X86_64
4044 rdmsrl(MSR_FS_BASE, a); 4146 rdmsrl(MSR_FS_BASE, a);
4045 vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */ 4147 vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */
@@ -4167,6 +4269,9 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
4167 vmcs_write64(APIC_ACCESS_ADDR, 4269 vmcs_write64(APIC_ACCESS_ADDR,
4168 page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); 4270 page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
4169 4271
4272 if (vmx_vm_has_apicv(vcpu->kvm))
4273 memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
4274
4170 if (vmx->vpid != 0) 4275 if (vmx->vpid != 0)
4171 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); 4276 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
4172 4277
@@ -4325,16 +4430,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
4325 4430
4326static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) 4431static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
4327{ 4432{
4328 if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { 4433 if (is_guest_mode(vcpu)) {
4329 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 4434 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4330 if (to_vmx(vcpu)->nested.nested_run_pending || 4435
4331 (vmcs12->idt_vectoring_info_field & 4436 if (to_vmx(vcpu)->nested.nested_run_pending)
4332 VECTORING_INFO_VALID_MASK))
4333 return 0; 4437 return 0;
4334 nested_vmx_vmexit(vcpu); 4438 if (nested_exit_on_intr(vcpu)) {
4335 vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; 4439 nested_vmx_vmexit(vcpu);
4336 vmcs12->vm_exit_intr_info = 0; 4440 vmcs12->vm_exit_reason =
4337 /* fall through to normal code, but now in L1, not L2 */ 4441 EXIT_REASON_EXTERNAL_INTERRUPT;
4442 vmcs12->vm_exit_intr_info = 0;
4443 /*
4444 * fall through to normal code, but now in L1, not L2
4445 */
4446 }
4338 } 4447 }
4339 4448
4340 return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && 4449 return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
@@ -5189,7 +5298,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
5189 if (test_bit(KVM_REQ_EVENT, &vcpu->requests)) 5298 if (test_bit(KVM_REQ_EVENT, &vcpu->requests))
5190 return 1; 5299 return 1;
5191 5300
5192 err = emulate_instruction(vcpu, 0); 5301 err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE);
5193 5302
5194 if (err == EMULATE_DO_MMIO) { 5303 if (err == EMULATE_DO_MMIO) {
5195 ret = 0; 5304 ret = 0;
@@ -6112,14 +6221,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
6112 case EXIT_REASON_TRIPLE_FAULT: 6221 case EXIT_REASON_TRIPLE_FAULT:
6113 return 1; 6222 return 1;
6114 case EXIT_REASON_PENDING_INTERRUPT: 6223 case EXIT_REASON_PENDING_INTERRUPT:
6224 return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING);
6115 case EXIT_REASON_NMI_WINDOW: 6225 case EXIT_REASON_NMI_WINDOW:
6116 /* 6226 return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING);
6117 * prepare_vmcs02() set the CPU_BASED_VIRTUAL_INTR_PENDING bit
6118 * (aka Interrupt Window Exiting) only when L1 turned it on,
6119 * so if we got a PENDING_INTERRUPT exit, this must be for L1.
6120 * Same for NMI Window Exiting.
6121 */
6122 return 1;
6123 case EXIT_REASON_TASK_SWITCH: 6227 case EXIT_REASON_TASK_SWITCH:
6124 return 1; 6228 return 1;
6125 case EXIT_REASON_CPUID: 6229 case EXIT_REASON_CPUID:
@@ -6370,6 +6474,9 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
6370 6474
6371static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) 6475static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
6372{ 6476{
6477 if (!vmx_vm_has_apicv(vcpu->kvm))
6478 return;
6479
6373 vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); 6480 vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
6374 vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]); 6481 vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
6375 vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]); 6482 vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
@@ -6400,6 +6507,52 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
6400 } 6507 }
6401} 6508}
6402 6509
6510static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
6511{
6512 u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
6513
6514 /*
6515 * If external interrupt exists, IF bit is set in rflags/eflags on the
6516 * interrupt stack frame, and interrupt will be enabled on a return
6517 * from interrupt handler.
6518 */
6519 if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
6520 == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
6521 unsigned int vector;
6522 unsigned long entry;
6523 gate_desc *desc;
6524 struct vcpu_vmx *vmx = to_vmx(vcpu);
6525#ifdef CONFIG_X86_64
6526 unsigned long tmp;
6527#endif
6528
6529 vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
6530 desc = (gate_desc *)vmx->host_idt_base + vector;
6531 entry = gate_offset(*desc);
6532 asm volatile(
6533#ifdef CONFIG_X86_64
6534 "mov %%" _ASM_SP ", %[sp]\n\t"
6535 "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
6536 "push $%c[ss]\n\t"
6537 "push %[sp]\n\t"
6538#endif
6539 "pushf\n\t"
6540 "orl $0x200, (%%" _ASM_SP ")\n\t"
6541 __ASM_SIZE(push) " $%c[cs]\n\t"
6542 "call *%[entry]\n\t"
6543 :
6544#ifdef CONFIG_X86_64
6545 [sp]"=&r"(tmp)
6546#endif
6547 :
6548 [entry]"r"(entry),
6549 [ss]"i"(__KERNEL_DS),
6550 [cs]"i"(__KERNEL_CS)
6551 );
6552 } else
6553 local_irq_enable();
6554}
6555
6403static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) 6556static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
6404{ 6557{
6405 u32 exit_intr_info; 6558 u32 exit_intr_info;
@@ -6498,8 +6651,6 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
6498 6651
6499static void vmx_complete_interrupts(struct vcpu_vmx *vmx) 6652static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
6500{ 6653{
6501 if (is_guest_mode(&vmx->vcpu))
6502 return;
6503 __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info, 6654 __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
6504 VM_EXIT_INSTRUCTION_LEN, 6655 VM_EXIT_INSTRUCTION_LEN,
6505 IDT_VECTORING_ERROR_CODE); 6656 IDT_VECTORING_ERROR_CODE);
@@ -6507,8 +6658,6 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
6507 6658
6508static void vmx_cancel_injection(struct kvm_vcpu *vcpu) 6659static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
6509{ 6660{
6510 if (is_guest_mode(vcpu))
6511 return;
6512 __vmx_complete_interrupts(vcpu, 6661 __vmx_complete_interrupts(vcpu,
6513 vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), 6662 vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
6514 VM_ENTRY_INSTRUCTION_LEN, 6663 VM_ENTRY_INSTRUCTION_LEN,
@@ -6540,21 +6689,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
6540 struct vcpu_vmx *vmx = to_vmx(vcpu); 6689 struct vcpu_vmx *vmx = to_vmx(vcpu);
6541 unsigned long debugctlmsr; 6690 unsigned long debugctlmsr;
6542 6691
6543 if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) {
6544 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6545 if (vmcs12->idt_vectoring_info_field &
6546 VECTORING_INFO_VALID_MASK) {
6547 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
6548 vmcs12->idt_vectoring_info_field);
6549 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
6550 vmcs12->vm_exit_instruction_len);
6551 if (vmcs12->idt_vectoring_info_field &
6552 VECTORING_INFO_DELIVER_CODE_MASK)
6553 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
6554 vmcs12->idt_vectoring_error_code);
6555 }
6556 }
6557
6558 /* Record the guest's net vcpu time for enforced NMI injections. */ 6692 /* Record the guest's net vcpu time for enforced NMI injections. */
6559 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) 6693 if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
6560 vmx->entry_time = ktime_get(); 6694 vmx->entry_time = ktime_get();
@@ -6713,17 +6847,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
6713 6847
6714 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); 6848 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
6715 6849
6716 if (is_guest_mode(vcpu)) {
6717 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6718 vmcs12->idt_vectoring_info_field = vmx->idt_vectoring_info;
6719 if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) {
6720 vmcs12->idt_vectoring_error_code =
6721 vmcs_read32(IDT_VECTORING_ERROR_CODE);
6722 vmcs12->vm_exit_instruction_len =
6723 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
6724 }
6725 }
6726
6727 vmx->loaded_vmcs->launched = 1; 6850 vmx->loaded_vmcs->launched = 1;
6728 6851
6729 vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); 6852 vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
@@ -6785,10 +6908,11 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
6785 put_cpu(); 6908 put_cpu();
6786 if (err) 6909 if (err)
6787 goto free_vmcs; 6910 goto free_vmcs;
6788 if (vm_need_virtualize_apic_accesses(kvm)) 6911 if (vm_need_virtualize_apic_accesses(kvm)) {
6789 err = alloc_apic_access_page(kvm); 6912 err = alloc_apic_access_page(kvm);
6790 if (err) 6913 if (err)
6791 goto free_vmcs; 6914 goto free_vmcs;
6915 }
6792 6916
6793 if (enable_ept) { 6917 if (enable_ept) {
6794 if (!kvm->arch.ept_identity_map_addr) 6918 if (!kvm->arch.ept_identity_map_addr)
@@ -7071,7 +7195,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7071 * Other fields are different per CPU, and will be set later when 7195 * Other fields are different per CPU, and will be set later when
7072 * vmx_vcpu_load() is called, and when vmx_save_host_state() is called. 7196 * vmx_vcpu_load() is called, and when vmx_save_host_state() is called.
7073 */ 7197 */
7074 vmx_set_constant_host_state(); 7198 vmx_set_constant_host_state(vmx);
7075 7199
7076 /* 7200 /*
7077 * HOST_RSP is normally set correctly in vmx_vcpu_run() just before 7201 * HOST_RSP is normally set correctly in vmx_vcpu_run() just before
@@ -7330,6 +7454,48 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7330 vcpu->arch.cr4_guest_owned_bits)); 7454 vcpu->arch.cr4_guest_owned_bits));
7331} 7455}
7332 7456
7457static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
7458 struct vmcs12 *vmcs12)
7459{
7460 u32 idt_vectoring;
7461 unsigned int nr;
7462
7463 if (vcpu->arch.exception.pending) {
7464 nr = vcpu->arch.exception.nr;
7465 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
7466
7467 if (kvm_exception_is_soft(nr)) {
7468 vmcs12->vm_exit_instruction_len =
7469 vcpu->arch.event_exit_inst_len;
7470 idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
7471 } else
7472 idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
7473
7474 if (vcpu->arch.exception.has_error_code) {
7475 idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
7476 vmcs12->idt_vectoring_error_code =
7477 vcpu->arch.exception.error_code;
7478 }
7479
7480 vmcs12->idt_vectoring_info_field = idt_vectoring;
7481 } else if (vcpu->arch.nmi_pending) {
7482 vmcs12->idt_vectoring_info_field =
7483 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
7484 } else if (vcpu->arch.interrupt.pending) {
7485 nr = vcpu->arch.interrupt.nr;
7486 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
7487
7488 if (vcpu->arch.interrupt.soft) {
7489 idt_vectoring |= INTR_TYPE_SOFT_INTR;
7490 vmcs12->vm_entry_instruction_len =
7491 vcpu->arch.event_exit_inst_len;
7492 } else
7493 idt_vectoring |= INTR_TYPE_EXT_INTR;
7494
7495 vmcs12->idt_vectoring_info_field = idt_vectoring;
7496 }
7497}
7498
7333/* 7499/*
7334 * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits 7500 * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
7335 * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), 7501 * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
@@ -7402,7 +7568,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7402 /* TODO: These cannot have changed unless we have MSR bitmaps and 7568 /* TODO: These cannot have changed unless we have MSR bitmaps and
7403 * the relevant bit asks not to trap the change */ 7569 * the relevant bit asks not to trap the change */
7404 vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); 7570 vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
7405 if (vmcs12->vm_entry_controls & VM_EXIT_SAVE_IA32_PAT) 7571 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
7406 vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); 7572 vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
7407 vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); 7573 vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
7408 vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); 7574 vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
@@ -7414,16 +7580,34 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7414 vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 7580 vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
7415 7581
7416 vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 7582 vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
7417 vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); 7583 if ((vmcs12->vm_exit_intr_info &
7418 vmcs12->idt_vectoring_info_field = to_vmx(vcpu)->idt_vectoring_info; 7584 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
7419 vmcs12->idt_vectoring_error_code = 7585 (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK))
7420 vmcs_read32(IDT_VECTORING_ERROR_CODE); 7586 vmcs12->vm_exit_intr_error_code =
7587 vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
7588 vmcs12->idt_vectoring_info_field = 0;
7421 vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 7589 vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
7422 vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); 7590 vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
7423 7591
7424 /* clear vm-entry fields which are to be cleared on exit */ 7592 if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
7425 if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) 7593 /* vm_entry_intr_info_field is cleared on exit. Emulate this
7594 * instead of reading the real value. */
7426 vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK; 7595 vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
7596
7597 /*
7598 * Transfer the event that L0 or L1 may wanted to inject into
7599 * L2 to IDT_VECTORING_INFO_FIELD.
7600 */
7601 vmcs12_save_pending_event(vcpu, vmcs12);
7602 }
7603
7604 /*
7605 * Drop what we picked up for L2 via vmx_complete_interrupts. It is
7606 * preserved above and would only end up incorrectly in L1.
7607 */
7608 vcpu->arch.nmi_injected = false;
7609 kvm_clear_exception_queue(vcpu);
7610 kvm_clear_interrupt_queue(vcpu);
7427} 7611}
7428 7612
7429/* 7613/*
@@ -7523,6 +7707,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
7523 int cpu; 7707 int cpu;
7524 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 7708 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7525 7709
7710 /* trying to cancel vmlaunch/vmresume is a bug */
7711 WARN_ON_ONCE(vmx->nested.nested_run_pending);
7712
7526 leave_guest_mode(vcpu); 7713 leave_guest_mode(vcpu);
7527 prepare_vmcs12(vcpu, vmcs12); 7714 prepare_vmcs12(vcpu, vmcs12);
7528 7715
@@ -7657,6 +7844,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
7657 .load_eoi_exitmap = vmx_load_eoi_exitmap, 7844 .load_eoi_exitmap = vmx_load_eoi_exitmap,
7658 .hwapic_irr_update = vmx_hwapic_irr_update, 7845 .hwapic_irr_update = vmx_hwapic_irr_update,
7659 .hwapic_isr_update = vmx_hwapic_isr_update, 7846 .hwapic_isr_update = vmx_hwapic_isr_update,
7847 .sync_pir_to_irr = vmx_sync_pir_to_irr,
7848 .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
7660 7849
7661 .set_tss_addr = vmx_set_tss_addr, 7850 .set_tss_addr = vmx_set_tss_addr,
7662 .get_tdp_level = get_ept_level, 7851 .get_tdp_level = get_ept_level,
@@ -7685,6 +7874,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
7685 .set_tdp_cr3 = vmx_set_cr3, 7874 .set_tdp_cr3 = vmx_set_cr3,
7686 7875
7687 .check_intercept = vmx_check_intercept, 7876 .check_intercept = vmx_check_intercept,
7877 .handle_external_intr = vmx_handle_external_intr,
7688}; 7878};
7689 7879
7690static int __init vmx_init(void) 7880static int __init vmx_init(void)
@@ -7741,7 +7931,7 @@ static int __init vmx_init(void)
7741 r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), 7931 r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
7742 __alignof__(struct vcpu_vmx), THIS_MODULE); 7932 __alignof__(struct vcpu_vmx), THIS_MODULE);
7743 if (r) 7933 if (r)
7744 goto out3; 7934 goto out5;
7745 7935
7746#ifdef CONFIG_KEXEC 7936#ifdef CONFIG_KEXEC
7747 rcu_assign_pointer(crash_vmclear_loaded_vmcss, 7937 rcu_assign_pointer(crash_vmclear_loaded_vmcss,
@@ -7759,7 +7949,7 @@ static int __init vmx_init(void)
7759 memcpy(vmx_msr_bitmap_longmode_x2apic, 7949 memcpy(vmx_msr_bitmap_longmode_x2apic,
7760 vmx_msr_bitmap_longmode, PAGE_SIZE); 7950 vmx_msr_bitmap_longmode, PAGE_SIZE);
7761 7951
7762 if (enable_apicv_reg_vid) { 7952 if (enable_apicv) {
7763 for (msr = 0x800; msr <= 0x8ff; msr++) 7953 for (msr = 0x800; msr <= 0x8ff; msr++)
7764 vmx_disable_intercept_msr_read_x2apic(msr); 7954 vmx_disable_intercept_msr_read_x2apic(msr);
7765 7955
@@ -7789,6 +7979,8 @@ static int __init vmx_init(void)
7789 7979
7790 return 0; 7980 return 0;
7791 7981
7982out5:
7983 free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
7792out4: 7984out4:
7793 free_page((unsigned long)vmx_msr_bitmap_longmode); 7985 free_page((unsigned long)vmx_msr_bitmap_longmode);
7794out3: 7986out3:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2aaba814f1c8..50e2e10b8041 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -261,6 +261,13 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
261} 261}
262EXPORT_SYMBOL_GPL(kvm_set_apic_base); 262EXPORT_SYMBOL_GPL(kvm_set_apic_base);
263 263
264asmlinkage void kvm_spurious_fault(void)
265{
266 /* Fault while not rebooting. We want the trace. */
267 BUG();
268}
269EXPORT_SYMBOL_GPL(kvm_spurious_fault);
270
264#define EXCPT_BENIGN 0 271#define EXCPT_BENIGN 0
265#define EXCPT_CONTRIBUTORY 1 272#define EXCPT_CONTRIBUTORY 1
266#define EXCPT_PF 2 273#define EXCPT_PF 2
@@ -2040,7 +2047,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2040 case MSR_P6_EVNTSEL0: 2047 case MSR_P6_EVNTSEL0:
2041 case MSR_P6_EVNTSEL1: 2048 case MSR_P6_EVNTSEL1:
2042 if (kvm_pmu_msr(vcpu, msr)) 2049 if (kvm_pmu_msr(vcpu, msr))
2043 return kvm_pmu_set_msr(vcpu, msr, data); 2050 return kvm_pmu_set_msr(vcpu, msr_info);
2044 2051
2045 if (pr || data != 0) 2052 if (pr || data != 0)
2046 vcpu_unimpl(vcpu, "disabled perfctr wrmsr: " 2053 vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
@@ -2086,7 +2093,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2086 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) 2093 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
2087 return xen_hvm_config(vcpu, data); 2094 return xen_hvm_config(vcpu, data);
2088 if (kvm_pmu_msr(vcpu, msr)) 2095 if (kvm_pmu_msr(vcpu, msr))
2089 return kvm_pmu_set_msr(vcpu, msr, data); 2096 return kvm_pmu_set_msr(vcpu, msr_info);
2090 if (!ignore_msrs) { 2097 if (!ignore_msrs) {
2091 vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", 2098 vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
2092 msr, data); 2099 msr, data);
@@ -2685,6 +2692,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2685static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, 2692static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
2686 struct kvm_lapic_state *s) 2693 struct kvm_lapic_state *s)
2687{ 2694{
2695 kvm_x86_ops->sync_pir_to_irr(vcpu);
2688 memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); 2696 memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
2689 2697
2690 return 0; 2698 return 0;
@@ -3484,13 +3492,15 @@ out:
3484 return r; 3492 return r;
3485} 3493}
3486 3494
3487int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event) 3495int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
3496 bool line_status)
3488{ 3497{
3489 if (!irqchip_in_kernel(kvm)) 3498 if (!irqchip_in_kernel(kvm))
3490 return -ENXIO; 3499 return -ENXIO;
3491 3500
3492 irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 3501 irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
3493 irq_event->irq, irq_event->level); 3502 irq_event->irq, irq_event->level,
3503 line_status);
3494 return 0; 3504 return 0;
3495} 3505}
3496 3506
@@ -4758,11 +4768,15 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
4758} 4768}
4759 4769
4760static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, 4770static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
4761 bool write_fault_to_shadow_pgtable) 4771 bool write_fault_to_shadow_pgtable,
4772 int emulation_type)
4762{ 4773{
4763 gpa_t gpa = cr2; 4774 gpa_t gpa = cr2;
4764 pfn_t pfn; 4775 pfn_t pfn;
4765 4776
4777 if (emulation_type & EMULTYPE_NO_REEXECUTE)
4778 return false;
4779
4766 if (!vcpu->arch.mmu.direct_map) { 4780 if (!vcpu->arch.mmu.direct_map) {
4767 /* 4781 /*
4768 * Write permission should be allowed since only 4782 * Write permission should be allowed since only
@@ -4905,8 +4919,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4905 if (r != EMULATION_OK) { 4919 if (r != EMULATION_OK) {
4906 if (emulation_type & EMULTYPE_TRAP_UD) 4920 if (emulation_type & EMULTYPE_TRAP_UD)
4907 return EMULATE_FAIL; 4921 return EMULATE_FAIL;
4908 if (reexecute_instruction(vcpu, cr2, 4922 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
4909 write_fault_to_spt)) 4923 emulation_type))
4910 return EMULATE_DONE; 4924 return EMULATE_DONE;
4911 if (emulation_type & EMULTYPE_SKIP) 4925 if (emulation_type & EMULTYPE_SKIP)
4912 return EMULATE_FAIL; 4926 return EMULATE_FAIL;
@@ -4936,7 +4950,8 @@ restart:
4936 return EMULATE_DONE; 4950 return EMULATE_DONE;
4937 4951
4938 if (r == EMULATION_FAILED) { 4952 if (r == EMULATION_FAILED) {
4939 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt)) 4953 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
4954 emulation_type))
4940 return EMULATE_DONE; 4955 return EMULATE_DONE;
4941 4956
4942 return handle_emulation_failure(vcpu); 4957 return handle_emulation_failure(vcpu);
@@ -5647,14 +5662,20 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
5647#endif 5662#endif
5648} 5663}
5649 5664
5650static void update_eoi_exitmap(struct kvm_vcpu *vcpu) 5665static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
5651{ 5666{
5652 u64 eoi_exit_bitmap[4]; 5667 u64 eoi_exit_bitmap[4];
5668 u32 tmr[8];
5669
5670 if (!kvm_apic_hw_enabled(vcpu->arch.apic))
5671 return;
5653 5672
5654 memset(eoi_exit_bitmap, 0, 32); 5673 memset(eoi_exit_bitmap, 0, 32);
5674 memset(tmr, 0, 32);
5655 5675
5656 kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap); 5676 kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr);
5657 kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); 5677 kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
5678 kvm_apic_update_tmr(vcpu, tmr);
5658} 5679}
5659 5680
5660static int vcpu_enter_guest(struct kvm_vcpu *vcpu) 5681static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
@@ -5710,8 +5731,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5710 kvm_handle_pmu_event(vcpu); 5731 kvm_handle_pmu_event(vcpu);
5711 if (kvm_check_request(KVM_REQ_PMI, vcpu)) 5732 if (kvm_check_request(KVM_REQ_PMI, vcpu))
5712 kvm_deliver_pmi(vcpu); 5733 kvm_deliver_pmi(vcpu);
5713 if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu)) 5734 if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
5714 update_eoi_exitmap(vcpu); 5735 vcpu_scan_ioapic(vcpu);
5715 } 5736 }
5716 5737
5717 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { 5738 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -5806,7 +5827,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5806 5827
5807 vcpu->mode = OUTSIDE_GUEST_MODE; 5828 vcpu->mode = OUTSIDE_GUEST_MODE;
5808 smp_wmb(); 5829 smp_wmb();
5809 local_irq_enable(); 5830
5831 /* Interrupt is enabled by handle_external_intr() */
5832 kvm_x86_ops->handle_external_intr(vcpu);
5810 5833
5811 ++vcpu->stat.exits; 5834 ++vcpu->stat.exits;
5812 5835