aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c1098
1 files changed, 831 insertions, 267 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a1e1bc9d412d..e46282a56565 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -38,6 +38,7 @@
38#include <linux/intel-iommu.h> 38#include <linux/intel-iommu.h>
39#include <linux/cpufreq.h> 39#include <linux/cpufreq.h>
40#include <linux/user-return-notifier.h> 40#include <linux/user-return-notifier.h>
41#include <linux/srcu.h>
41#include <trace/events/kvm.h> 42#include <trace/events/kvm.h>
42#undef TRACE_INCLUDE_FILE 43#undef TRACE_INCLUDE_FILE
43#define CREATE_TRACE_POINTS 44#define CREATE_TRACE_POINTS
@@ -93,16 +94,16 @@ module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
93 94
94struct kvm_shared_msrs_global { 95struct kvm_shared_msrs_global {
95 int nr; 96 int nr;
96 struct kvm_shared_msr { 97 u32 msrs[KVM_NR_SHARED_MSRS];
97 u32 msr;
98 u64 value;
99 } msrs[KVM_NR_SHARED_MSRS];
100}; 98};
101 99
102struct kvm_shared_msrs { 100struct kvm_shared_msrs {
103 struct user_return_notifier urn; 101 struct user_return_notifier urn;
104 bool registered; 102 bool registered;
105 u64 current_value[KVM_NR_SHARED_MSRS]; 103 struct kvm_shared_msr_values {
104 u64 host;
105 u64 curr;
106 } values[KVM_NR_SHARED_MSRS];
106}; 107};
107 108
108static struct kvm_shared_msrs_global __read_mostly shared_msrs_global; 109static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
@@ -147,53 +148,64 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
147static void kvm_on_user_return(struct user_return_notifier *urn) 148static void kvm_on_user_return(struct user_return_notifier *urn)
148{ 149{
149 unsigned slot; 150 unsigned slot;
150 struct kvm_shared_msr *global;
151 struct kvm_shared_msrs *locals 151 struct kvm_shared_msrs *locals
152 = container_of(urn, struct kvm_shared_msrs, urn); 152 = container_of(urn, struct kvm_shared_msrs, urn);
153 struct kvm_shared_msr_values *values;
153 154
154 for (slot = 0; slot < shared_msrs_global.nr; ++slot) { 155 for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
155 global = &shared_msrs_global.msrs[slot]; 156 values = &locals->values[slot];
156 if (global->value != locals->current_value[slot]) { 157 if (values->host != values->curr) {
157 wrmsrl(global->msr, global->value); 158 wrmsrl(shared_msrs_global.msrs[slot], values->host);
158 locals->current_value[slot] = global->value; 159 values->curr = values->host;
159 } 160 }
160 } 161 }
161 locals->registered = false; 162 locals->registered = false;
162 user_return_notifier_unregister(urn); 163 user_return_notifier_unregister(urn);
163} 164}
164 165
165void kvm_define_shared_msr(unsigned slot, u32 msr) 166static void shared_msr_update(unsigned slot, u32 msr)
166{ 167{
167 int cpu; 168 struct kvm_shared_msrs *smsr;
168 u64 value; 169 u64 value;
169 170
171 smsr = &__get_cpu_var(shared_msrs);
172 /* only read, and nobody should modify it at this time,
173 * so don't need lock */
174 if (slot >= shared_msrs_global.nr) {
175 printk(KERN_ERR "kvm: invalid MSR slot!");
176 return;
177 }
178 rdmsrl_safe(msr, &value);
179 smsr->values[slot].host = value;
180 smsr->values[slot].curr = value;
181}
182
183void kvm_define_shared_msr(unsigned slot, u32 msr)
184{
170 if (slot >= shared_msrs_global.nr) 185 if (slot >= shared_msrs_global.nr)
171 shared_msrs_global.nr = slot + 1; 186 shared_msrs_global.nr = slot + 1;
172 shared_msrs_global.msrs[slot].msr = msr; 187 shared_msrs_global.msrs[slot] = msr;
173 rdmsrl_safe(msr, &value); 188 /* we need ensured the shared_msr_global have been updated */
174 shared_msrs_global.msrs[slot].value = value; 189 smp_wmb();
175 for_each_online_cpu(cpu)
176 per_cpu(shared_msrs, cpu).current_value[slot] = value;
177} 190}
178EXPORT_SYMBOL_GPL(kvm_define_shared_msr); 191EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
179 192
180static void kvm_shared_msr_cpu_online(void) 193static void kvm_shared_msr_cpu_online(void)
181{ 194{
182 unsigned i; 195 unsigned i;
183 struct kvm_shared_msrs *locals = &__get_cpu_var(shared_msrs);
184 196
185 for (i = 0; i < shared_msrs_global.nr; ++i) 197 for (i = 0; i < shared_msrs_global.nr; ++i)
186 locals->current_value[i] = shared_msrs_global.msrs[i].value; 198 shared_msr_update(i, shared_msrs_global.msrs[i]);
187} 199}
188 200
189void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) 201void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
190{ 202{
191 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); 203 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
192 204
193 if (((value ^ smsr->current_value[slot]) & mask) == 0) 205 if (((value ^ smsr->values[slot].curr) & mask) == 0)
194 return; 206 return;
195 smsr->current_value[slot] = value; 207 smsr->values[slot].curr = value;
196 wrmsrl(shared_msrs_global.msrs[slot].msr, value); 208 wrmsrl(shared_msrs_global.msrs[slot], value);
197 if (!smsr->registered) { 209 if (!smsr->registered) {
198 smsr->urn.on_user_return = kvm_on_user_return; 210 smsr->urn.on_user_return = kvm_on_user_return;
199 user_return_notifier_register(&smsr->urn); 211 user_return_notifier_register(&smsr->urn);
@@ -257,12 +269,68 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
257} 269}
258EXPORT_SYMBOL_GPL(kvm_set_apic_base); 270EXPORT_SYMBOL_GPL(kvm_set_apic_base);
259 271
272#define EXCPT_BENIGN 0
273#define EXCPT_CONTRIBUTORY 1
274#define EXCPT_PF 2
275
276static int exception_class(int vector)
277{
278 switch (vector) {
279 case PF_VECTOR:
280 return EXCPT_PF;
281 case DE_VECTOR:
282 case TS_VECTOR:
283 case NP_VECTOR:
284 case SS_VECTOR:
285 case GP_VECTOR:
286 return EXCPT_CONTRIBUTORY;
287 default:
288 break;
289 }
290 return EXCPT_BENIGN;
291}
292
293static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
294 unsigned nr, bool has_error, u32 error_code)
295{
296 u32 prev_nr;
297 int class1, class2;
298
299 if (!vcpu->arch.exception.pending) {
300 queue:
301 vcpu->arch.exception.pending = true;
302 vcpu->arch.exception.has_error_code = has_error;
303 vcpu->arch.exception.nr = nr;
304 vcpu->arch.exception.error_code = error_code;
305 return;
306 }
307
308 /* to check exception */
309 prev_nr = vcpu->arch.exception.nr;
310 if (prev_nr == DF_VECTOR) {
311 /* triple fault -> shutdown */
312 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
313 return;
314 }
315 class1 = exception_class(prev_nr);
316 class2 = exception_class(nr);
317 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
318 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
319 /* generate double fault per SDM Table 5-5 */
320 vcpu->arch.exception.pending = true;
321 vcpu->arch.exception.has_error_code = true;
322 vcpu->arch.exception.nr = DF_VECTOR;
323 vcpu->arch.exception.error_code = 0;
324 } else
325 /* replace previous exception with a new one in a hope
326 that instruction re-execution will regenerate lost
327 exception */
328 goto queue;
329}
330
260void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) 331void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
261{ 332{
262 WARN_ON(vcpu->arch.exception.pending); 333 kvm_multiple_exception(vcpu, nr, false, 0);
263 vcpu->arch.exception.pending = true;
264 vcpu->arch.exception.has_error_code = false;
265 vcpu->arch.exception.nr = nr;
266} 334}
267EXPORT_SYMBOL_GPL(kvm_queue_exception); 335EXPORT_SYMBOL_GPL(kvm_queue_exception);
268 336
@@ -270,25 +338,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
270 u32 error_code) 338 u32 error_code)
271{ 339{
272 ++vcpu->stat.pf_guest; 340 ++vcpu->stat.pf_guest;
273
274 if (vcpu->arch.exception.pending) {
275 switch(vcpu->arch.exception.nr) {
276 case DF_VECTOR:
277 /* triple fault -> shutdown */
278 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
279 return;
280 case PF_VECTOR:
281 vcpu->arch.exception.nr = DF_VECTOR;
282 vcpu->arch.exception.error_code = 0;
283 return;
284 default:
285 /* replace previous exception with a new one in a hope
286 that instruction re-execution will regenerate lost
287 exception */
288 vcpu->arch.exception.pending = false;
289 break;
290 }
291 }
292 vcpu->arch.cr2 = addr; 341 vcpu->arch.cr2 = addr;
293 kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); 342 kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
294} 343}
@@ -301,11 +350,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi);
301 350
302void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) 351void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
303{ 352{
304 WARN_ON(vcpu->arch.exception.pending); 353 kvm_multiple_exception(vcpu, nr, true, error_code);
305 vcpu->arch.exception.pending = true;
306 vcpu->arch.exception.has_error_code = true;
307 vcpu->arch.exception.nr = nr;
308 vcpu->arch.exception.error_code = error_code;
309} 354}
310EXPORT_SYMBOL_GPL(kvm_queue_exception_e); 355EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
311 356
@@ -383,12 +428,18 @@ out:
383 428
384void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 429void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
385{ 430{
386 if (cr0 & CR0_RESERVED_BITS) { 431 cr0 |= X86_CR0_ET;
432
433#ifdef CONFIG_X86_64
434 if (cr0 & 0xffffffff00000000UL) {
387 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", 435 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
388 cr0, vcpu->arch.cr0); 436 cr0, kvm_read_cr0(vcpu));
389 kvm_inject_gp(vcpu, 0); 437 kvm_inject_gp(vcpu, 0);
390 return; 438 return;
391 } 439 }
440#endif
441
442 cr0 &= ~CR0_RESERVED_BITS;
392 443
393 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { 444 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
394 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); 445 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
@@ -405,7 +456,7 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
405 456
406 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { 457 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
407#ifdef CONFIG_X86_64 458#ifdef CONFIG_X86_64
408 if ((vcpu->arch.shadow_efer & EFER_LME)) { 459 if ((vcpu->arch.efer & EFER_LME)) {
409 int cs_db, cs_l; 460 int cs_db, cs_l;
410 461
411 if (!is_pae(vcpu)) { 462 if (!is_pae(vcpu)) {
@@ -443,13 +494,13 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0);
443 494
444void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) 495void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
445{ 496{
446 kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)); 497 kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0ful) | (msw & 0x0f));
447} 498}
448EXPORT_SYMBOL_GPL(kvm_lmsw); 499EXPORT_SYMBOL_GPL(kvm_lmsw);
449 500
450void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 501void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
451{ 502{
452 unsigned long old_cr4 = vcpu->arch.cr4; 503 unsigned long old_cr4 = kvm_read_cr4(vcpu);
453 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; 504 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
454 505
455 if (cr4 & CR4_RESERVED_BITS) { 506 if (cr4 & CR4_RESERVED_BITS) {
@@ -575,9 +626,11 @@ static inline u32 bit(int bitno)
575 * kvm-specific. Those are put in the beginning of the list. 626 * kvm-specific. Those are put in the beginning of the list.
576 */ 627 */
577 628
578#define KVM_SAVE_MSRS_BEGIN 2 629#define KVM_SAVE_MSRS_BEGIN 5
579static u32 msrs_to_save[] = { 630static u32 msrs_to_save[] = {
580 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 631 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
632 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
633 HV_X64_MSR_APIC_ASSIST_PAGE,
581 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 634 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
582 MSR_K6_STAR, 635 MSR_K6_STAR,
583#ifdef CONFIG_X86_64 636#ifdef CONFIG_X86_64
@@ -602,7 +655,7 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
602 } 655 }
603 656
604 if (is_paging(vcpu) 657 if (is_paging(vcpu)
605 && (vcpu->arch.shadow_efer & EFER_LME) != (efer & EFER_LME)) { 658 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) {
606 printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n"); 659 printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
607 kvm_inject_gp(vcpu, 0); 660 kvm_inject_gp(vcpu, 0);
608 return; 661 return;
@@ -633,9 +686,9 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
633 kvm_x86_ops->set_efer(vcpu, efer); 686 kvm_x86_ops->set_efer(vcpu, efer);
634 687
635 efer &= ~EFER_LMA; 688 efer &= ~EFER_LMA;
636 efer |= vcpu->arch.shadow_efer & EFER_LMA; 689 efer |= vcpu->arch.efer & EFER_LMA;
637 690
638 vcpu->arch.shadow_efer = efer; 691 vcpu->arch.efer = efer;
639 692
640 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; 693 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
641 kvm_mmu_reset_context(vcpu); 694 kvm_mmu_reset_context(vcpu);
@@ -957,6 +1010,100 @@ out:
957 return r; 1010 return r;
958} 1011}
959 1012
1013static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
1014{
1015 return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
1016}
1017
1018static bool kvm_hv_msr_partition_wide(u32 msr)
1019{
1020 bool r = false;
1021 switch (msr) {
1022 case HV_X64_MSR_GUEST_OS_ID:
1023 case HV_X64_MSR_HYPERCALL:
1024 r = true;
1025 break;
1026 }
1027
1028 return r;
1029}
1030
1031static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1032{
1033 struct kvm *kvm = vcpu->kvm;
1034
1035 switch (msr) {
1036 case HV_X64_MSR_GUEST_OS_ID:
1037 kvm->arch.hv_guest_os_id = data;
1038 /* setting guest os id to zero disables hypercall page */
1039 if (!kvm->arch.hv_guest_os_id)
1040 kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
1041 break;
1042 case HV_X64_MSR_HYPERCALL: {
1043 u64 gfn;
1044 unsigned long addr;
1045 u8 instructions[4];
1046
1047 /* if guest os id is not set hypercall should remain disabled */
1048 if (!kvm->arch.hv_guest_os_id)
1049 break;
1050 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
1051 kvm->arch.hv_hypercall = data;
1052 break;
1053 }
1054 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
1055 addr = gfn_to_hva(kvm, gfn);
1056 if (kvm_is_error_hva(addr))
1057 return 1;
1058 kvm_x86_ops->patch_hypercall(vcpu, instructions);
1059 ((unsigned char *)instructions)[3] = 0xc3; /* ret */
1060 if (copy_to_user((void __user *)addr, instructions, 4))
1061 return 1;
1062 kvm->arch.hv_hypercall = data;
1063 break;
1064 }
1065 default:
1066 pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1067 "data 0x%llx\n", msr, data);
1068 return 1;
1069 }
1070 return 0;
1071}
1072
1073static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1074{
1075 switch (msr) {
1076 case HV_X64_MSR_APIC_ASSIST_PAGE: {
1077 unsigned long addr;
1078
1079 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
1080 vcpu->arch.hv_vapic = data;
1081 break;
1082 }
1083 addr = gfn_to_hva(vcpu->kvm, data >>
1084 HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT);
1085 if (kvm_is_error_hva(addr))
1086 return 1;
1087 if (clear_user((void __user *)addr, PAGE_SIZE))
1088 return 1;
1089 vcpu->arch.hv_vapic = data;
1090 break;
1091 }
1092 case HV_X64_MSR_EOI:
1093 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
1094 case HV_X64_MSR_ICR:
1095 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
1096 case HV_X64_MSR_TPR:
1097 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
1098 default:
1099 pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1100 "data 0x%llx\n", msr, data);
1101 return 1;
1102 }
1103
1104 return 0;
1105}
1106
960int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1107int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
961{ 1108{
962 switch (msr) { 1109 switch (msr) {
@@ -1071,6 +1218,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1071 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " 1218 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
1072 "0x%x data 0x%llx\n", msr, data); 1219 "0x%x data 0x%llx\n", msr, data);
1073 break; 1220 break;
1221 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
1222 if (kvm_hv_msr_partition_wide(msr)) {
1223 int r;
1224 mutex_lock(&vcpu->kvm->lock);
1225 r = set_msr_hyperv_pw(vcpu, msr, data);
1226 mutex_unlock(&vcpu->kvm->lock);
1227 return r;
1228 } else
1229 return set_msr_hyperv(vcpu, msr, data);
1230 break;
1074 default: 1231 default:
1075 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) 1232 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
1076 return xen_hvm_config(vcpu, data); 1233 return xen_hvm_config(vcpu, data);
@@ -1170,6 +1327,54 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1170 return 0; 1327 return 0;
1171} 1328}
1172 1329
1330static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1331{
1332 u64 data = 0;
1333 struct kvm *kvm = vcpu->kvm;
1334
1335 switch (msr) {
1336 case HV_X64_MSR_GUEST_OS_ID:
1337 data = kvm->arch.hv_guest_os_id;
1338 break;
1339 case HV_X64_MSR_HYPERCALL:
1340 data = kvm->arch.hv_hypercall;
1341 break;
1342 default:
1343 pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1344 return 1;
1345 }
1346
1347 *pdata = data;
1348 return 0;
1349}
1350
1351static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1352{
1353 u64 data = 0;
1354
1355 switch (msr) {
1356 case HV_X64_MSR_VP_INDEX: {
1357 int r;
1358 struct kvm_vcpu *v;
1359 kvm_for_each_vcpu(r, v, vcpu->kvm)
1360 if (v == vcpu)
1361 data = r;
1362 break;
1363 }
1364 case HV_X64_MSR_EOI:
1365 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
1366 case HV_X64_MSR_ICR:
1367 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
1368 case HV_X64_MSR_TPR:
1369 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
1370 default:
1371 pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1372 return 1;
1373 }
1374 *pdata = data;
1375 return 0;
1376}
1377
1173int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1378int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1174{ 1379{
1175 u64 data; 1380 u64 data;
@@ -1221,7 +1426,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1221 data |= (((uint64_t)4ULL) << 40); 1426 data |= (((uint64_t)4ULL) << 40);
1222 break; 1427 break;
1223 case MSR_EFER: 1428 case MSR_EFER:
1224 data = vcpu->arch.shadow_efer; 1429 data = vcpu->arch.efer;
1225 break; 1430 break;
1226 case MSR_KVM_WALL_CLOCK: 1431 case MSR_KVM_WALL_CLOCK:
1227 data = vcpu->kvm->arch.wall_clock; 1432 data = vcpu->kvm->arch.wall_clock;
@@ -1236,6 +1441,16 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1236 case MSR_IA32_MCG_STATUS: 1441 case MSR_IA32_MCG_STATUS:
1237 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: 1442 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
1238 return get_msr_mce(vcpu, msr, pdata); 1443 return get_msr_mce(vcpu, msr, pdata);
1444 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
1445 if (kvm_hv_msr_partition_wide(msr)) {
1446 int r;
1447 mutex_lock(&vcpu->kvm->lock);
1448 r = get_msr_hyperv_pw(vcpu, msr, pdata);
1449 mutex_unlock(&vcpu->kvm->lock);
1450 return r;
1451 } else
1452 return get_msr_hyperv(vcpu, msr, pdata);
1453 break;
1239 default: 1454 default:
1240 if (!ignore_msrs) { 1455 if (!ignore_msrs) {
1241 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); 1456 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
@@ -1261,15 +1476,15 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
1261 int (*do_msr)(struct kvm_vcpu *vcpu, 1476 int (*do_msr)(struct kvm_vcpu *vcpu,
1262 unsigned index, u64 *data)) 1477 unsigned index, u64 *data))
1263{ 1478{
1264 int i; 1479 int i, idx;
1265 1480
1266 vcpu_load(vcpu); 1481 vcpu_load(vcpu);
1267 1482
1268 down_read(&vcpu->kvm->slots_lock); 1483 idx = srcu_read_lock(&vcpu->kvm->srcu);
1269 for (i = 0; i < msrs->nmsrs; ++i) 1484 for (i = 0; i < msrs->nmsrs; ++i)
1270 if (do_msr(vcpu, entries[i].index, &entries[i].data)) 1485 if (do_msr(vcpu, entries[i].index, &entries[i].data))
1271 break; 1486 break;
1272 up_read(&vcpu->kvm->slots_lock); 1487 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1273 1488
1274 vcpu_put(vcpu); 1489 vcpu_put(vcpu);
1275 1490
@@ -1351,6 +1566,11 @@ int kvm_dev_ioctl_check_extension(long ext)
1351 case KVM_CAP_XEN_HVM: 1566 case KVM_CAP_XEN_HVM:
1352 case KVM_CAP_ADJUST_CLOCK: 1567 case KVM_CAP_ADJUST_CLOCK:
1353 case KVM_CAP_VCPU_EVENTS: 1568 case KVM_CAP_VCPU_EVENTS:
1569 case KVM_CAP_HYPERV:
1570 case KVM_CAP_HYPERV_VAPIC:
1571 case KVM_CAP_HYPERV_SPIN:
1572 case KVM_CAP_PCI_SEGMENT:
1573 case KVM_CAP_X86_ROBUST_SINGLESTEP:
1354 r = 1; 1574 r = 1;
1355 break; 1575 break;
1356 case KVM_CAP_COALESCED_MMIO: 1576 case KVM_CAP_COALESCED_MMIO:
@@ -1464,8 +1684,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1464 1684
1465void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 1685void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1466{ 1686{
1467 kvm_x86_ops->vcpu_put(vcpu);
1468 kvm_put_guest_fpu(vcpu); 1687 kvm_put_guest_fpu(vcpu);
1688 kvm_x86_ops->vcpu_put(vcpu);
1469} 1689}
1470 1690
1471static int is_efer_nx(void) 1691static int is_efer_nx(void)
@@ -1530,6 +1750,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
1530 cpuid_fix_nx_cap(vcpu); 1750 cpuid_fix_nx_cap(vcpu);
1531 r = 0; 1751 r = 0;
1532 kvm_apic_set_version(vcpu); 1752 kvm_apic_set_version(vcpu);
1753 kvm_x86_ops->cpuid_update(vcpu);
1533 1754
1534out_free: 1755out_free:
1535 vfree(cpuid_entries); 1756 vfree(cpuid_entries);
@@ -1552,6 +1773,7 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
1552 goto out; 1773 goto out;
1553 vcpu->arch.cpuid_nent = cpuid->nent; 1774 vcpu->arch.cpuid_nent = cpuid->nent;
1554 kvm_apic_set_version(vcpu); 1775 kvm_apic_set_version(vcpu);
1776 kvm_x86_ops->cpuid_update(vcpu);
1555 return 0; 1777 return 0;
1556 1778
1557out: 1779out:
@@ -1594,12 +1816,15 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1594 u32 index, int *nent, int maxnent) 1816 u32 index, int *nent, int maxnent)
1595{ 1817{
1596 unsigned f_nx = is_efer_nx() ? F(NX) : 0; 1818 unsigned f_nx = is_efer_nx() ? F(NX) : 0;
1597 unsigned f_gbpages = kvm_x86_ops->gb_page_enable() ? F(GBPAGES) : 0;
1598#ifdef CONFIG_X86_64 1819#ifdef CONFIG_X86_64
1820 unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
1821 ? F(GBPAGES) : 0;
1599 unsigned f_lm = F(LM); 1822 unsigned f_lm = F(LM);
1600#else 1823#else
1824 unsigned f_gbpages = 0;
1601 unsigned f_lm = 0; 1825 unsigned f_lm = 0;
1602#endif 1826#endif
1827 unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
1603 1828
1604 /* cpuid 1.edx */ 1829 /* cpuid 1.edx */
1605 const u32 kvm_supported_word0_x86_features = 1830 const u32 kvm_supported_word0_x86_features =
@@ -1619,7 +1844,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1619 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | 1844 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
1620 F(PAT) | F(PSE36) | 0 /* Reserved */ | 1845 F(PAT) | F(PSE36) | 0 /* Reserved */ |
1621 f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | 1846 f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
1622 F(FXSR) | F(FXSR_OPT) | f_gbpages | 0 /* RDTSCP */ | 1847 F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
1623 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); 1848 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
1624 /* cpuid 1.ecx */ 1849 /* cpuid 1.ecx */
1625 const u32 kvm_supported_word4_x86_features = 1850 const u32 kvm_supported_word4_x86_features =
@@ -1866,7 +2091,7 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
1866 return 0; 2091 return 0;
1867 if (mce->status & MCI_STATUS_UC) { 2092 if (mce->status & MCI_STATUS_UC) {
1868 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || 2093 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
1869 !(vcpu->arch.cr4 & X86_CR4_MCE)) { 2094 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
1870 printk(KERN_DEBUG "kvm: set_mce: " 2095 printk(KERN_DEBUG "kvm: set_mce: "
1871 "injects mce exception while " 2096 "injects mce exception while "
1872 "previous one is in progress!\n"); 2097 "previous one is in progress!\n");
@@ -2160,14 +2385,14 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
2160 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) 2385 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
2161 return -EINVAL; 2386 return -EINVAL;
2162 2387
2163 down_write(&kvm->slots_lock); 2388 mutex_lock(&kvm->slots_lock);
2164 spin_lock(&kvm->mmu_lock); 2389 spin_lock(&kvm->mmu_lock);
2165 2390
2166 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); 2391 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
2167 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; 2392 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
2168 2393
2169 spin_unlock(&kvm->mmu_lock); 2394 spin_unlock(&kvm->mmu_lock);
2170 up_write(&kvm->slots_lock); 2395 mutex_unlock(&kvm->slots_lock);
2171 return 0; 2396 return 0;
2172} 2397}
2173 2398
@@ -2176,13 +2401,35 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
2176 return kvm->arch.n_alloc_mmu_pages; 2401 return kvm->arch.n_alloc_mmu_pages;
2177} 2402}
2178 2403
2404gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn)
2405{
2406 int i;
2407 struct kvm_mem_alias *alias;
2408 struct kvm_mem_aliases *aliases;
2409
2410 aliases = rcu_dereference(kvm->arch.aliases);
2411
2412 for (i = 0; i < aliases->naliases; ++i) {
2413 alias = &aliases->aliases[i];
2414 if (alias->flags & KVM_ALIAS_INVALID)
2415 continue;
2416 if (gfn >= alias->base_gfn
2417 && gfn < alias->base_gfn + alias->npages)
2418 return alias->target_gfn + gfn - alias->base_gfn;
2419 }
2420 return gfn;
2421}
2422
2179gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) 2423gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
2180{ 2424{
2181 int i; 2425 int i;
2182 struct kvm_mem_alias *alias; 2426 struct kvm_mem_alias *alias;
2427 struct kvm_mem_aliases *aliases;
2183 2428
2184 for (i = 0; i < kvm->arch.naliases; ++i) { 2429 aliases = rcu_dereference(kvm->arch.aliases);
2185 alias = &kvm->arch.aliases[i]; 2430
2431 for (i = 0; i < aliases->naliases; ++i) {
2432 alias = &aliases->aliases[i];
2186 if (gfn >= alias->base_gfn 2433 if (gfn >= alias->base_gfn
2187 && gfn < alias->base_gfn + alias->npages) 2434 && gfn < alias->base_gfn + alias->npages)
2188 return alias->target_gfn + gfn - alias->base_gfn; 2435 return alias->target_gfn + gfn - alias->base_gfn;
@@ -2200,6 +2447,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
2200{ 2447{
2201 int r, n; 2448 int r, n;
2202 struct kvm_mem_alias *p; 2449 struct kvm_mem_alias *p;
2450 struct kvm_mem_aliases *aliases, *old_aliases;
2203 2451
2204 r = -EINVAL; 2452 r = -EINVAL;
2205 /* General sanity checks */ 2453 /* General sanity checks */
@@ -2216,26 +2464,48 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
2216 < alias->target_phys_addr) 2464 < alias->target_phys_addr)
2217 goto out; 2465 goto out;
2218 2466
2219 down_write(&kvm->slots_lock); 2467 r = -ENOMEM;
2220 spin_lock(&kvm->mmu_lock); 2468 aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
2469 if (!aliases)
2470 goto out;
2471
2472 mutex_lock(&kvm->slots_lock);
2221 2473
2222 p = &kvm->arch.aliases[alias->slot]; 2474 /* invalidate any gfn reference in case of deletion/shrinking */
2475 memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
2476 aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID;
2477 old_aliases = kvm->arch.aliases;
2478 rcu_assign_pointer(kvm->arch.aliases, aliases);
2479 synchronize_srcu_expedited(&kvm->srcu);
2480 kvm_mmu_zap_all(kvm);
2481 kfree(old_aliases);
2482
2483 r = -ENOMEM;
2484 aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
2485 if (!aliases)
2486 goto out_unlock;
2487
2488 memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
2489
2490 p = &aliases->aliases[alias->slot];
2223 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; 2491 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
2224 p->npages = alias->memory_size >> PAGE_SHIFT; 2492 p->npages = alias->memory_size >> PAGE_SHIFT;
2225 p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; 2493 p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
2494 p->flags &= ~(KVM_ALIAS_INVALID);
2226 2495
2227 for (n = KVM_ALIAS_SLOTS; n > 0; --n) 2496 for (n = KVM_ALIAS_SLOTS; n > 0; --n)
2228 if (kvm->arch.aliases[n - 1].npages) 2497 if (aliases->aliases[n - 1].npages)
2229 break; 2498 break;
2230 kvm->arch.naliases = n; 2499 aliases->naliases = n;
2231 2500
2232 spin_unlock(&kvm->mmu_lock); 2501 old_aliases = kvm->arch.aliases;
2233 kvm_mmu_zap_all(kvm); 2502 rcu_assign_pointer(kvm->arch.aliases, aliases);
2234 2503 synchronize_srcu_expedited(&kvm->srcu);
2235 up_write(&kvm->slots_lock); 2504 kfree(old_aliases);
2236 2505 r = 0;
2237 return 0;
2238 2506
2507out_unlock:
2508 mutex_unlock(&kvm->slots_lock);
2239out: 2509out:
2240 return r; 2510 return r;
2241} 2511}
@@ -2273,18 +2543,18 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
2273 r = 0; 2543 r = 0;
2274 switch (chip->chip_id) { 2544 switch (chip->chip_id) {
2275 case KVM_IRQCHIP_PIC_MASTER: 2545 case KVM_IRQCHIP_PIC_MASTER:
2276 spin_lock(&pic_irqchip(kvm)->lock); 2546 raw_spin_lock(&pic_irqchip(kvm)->lock);
2277 memcpy(&pic_irqchip(kvm)->pics[0], 2547 memcpy(&pic_irqchip(kvm)->pics[0],
2278 &chip->chip.pic, 2548 &chip->chip.pic,
2279 sizeof(struct kvm_pic_state)); 2549 sizeof(struct kvm_pic_state));
2280 spin_unlock(&pic_irqchip(kvm)->lock); 2550 raw_spin_unlock(&pic_irqchip(kvm)->lock);
2281 break; 2551 break;
2282 case KVM_IRQCHIP_PIC_SLAVE: 2552 case KVM_IRQCHIP_PIC_SLAVE:
2283 spin_lock(&pic_irqchip(kvm)->lock); 2553 raw_spin_lock(&pic_irqchip(kvm)->lock);
2284 memcpy(&pic_irqchip(kvm)->pics[1], 2554 memcpy(&pic_irqchip(kvm)->pics[1],
2285 &chip->chip.pic, 2555 &chip->chip.pic,
2286 sizeof(struct kvm_pic_state)); 2556 sizeof(struct kvm_pic_state));
2287 spin_unlock(&pic_irqchip(kvm)->lock); 2557 raw_spin_unlock(&pic_irqchip(kvm)->lock);
2288 break; 2558 break;
2289 case KVM_IRQCHIP_IOAPIC: 2559 case KVM_IRQCHIP_IOAPIC:
2290 r = kvm_set_ioapic(kvm, &chip->chip.ioapic); 2560 r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
@@ -2364,29 +2634,62 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
2364int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 2634int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
2365 struct kvm_dirty_log *log) 2635 struct kvm_dirty_log *log)
2366{ 2636{
2367 int r; 2637 int r, n, i;
2368 int n;
2369 struct kvm_memory_slot *memslot; 2638 struct kvm_memory_slot *memslot;
2370 int is_dirty = 0; 2639 unsigned long is_dirty = 0;
2640 unsigned long *dirty_bitmap = NULL;
2371 2641
2372 down_write(&kvm->slots_lock); 2642 mutex_lock(&kvm->slots_lock);
2373 2643
2374 r = kvm_get_dirty_log(kvm, log, &is_dirty); 2644 r = -EINVAL;
2375 if (r) 2645 if (log->slot >= KVM_MEMORY_SLOTS)
2646 goto out;
2647
2648 memslot = &kvm->memslots->memslots[log->slot];
2649 r = -ENOENT;
2650 if (!memslot->dirty_bitmap)
2651 goto out;
2652
2653 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
2654
2655 r = -ENOMEM;
2656 dirty_bitmap = vmalloc(n);
2657 if (!dirty_bitmap)
2376 goto out; 2658 goto out;
2659 memset(dirty_bitmap, 0, n);
2660
2661 for (i = 0; !is_dirty && i < n/sizeof(long); i++)
2662 is_dirty = memslot->dirty_bitmap[i];
2377 2663
2378 /* If nothing is dirty, don't bother messing with page tables. */ 2664 /* If nothing is dirty, don't bother messing with page tables. */
2379 if (is_dirty) { 2665 if (is_dirty) {
2666 struct kvm_memslots *slots, *old_slots;
2667
2380 spin_lock(&kvm->mmu_lock); 2668 spin_lock(&kvm->mmu_lock);
2381 kvm_mmu_slot_remove_write_access(kvm, log->slot); 2669 kvm_mmu_slot_remove_write_access(kvm, log->slot);
2382 spin_unlock(&kvm->mmu_lock); 2670 spin_unlock(&kvm->mmu_lock);
2383 memslot = &kvm->memslots[log->slot]; 2671
2384 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 2672 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
2385 memset(memslot->dirty_bitmap, 0, n); 2673 if (!slots)
2674 goto out_free;
2675
2676 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
2677 slots->memslots[log->slot].dirty_bitmap = dirty_bitmap;
2678
2679 old_slots = kvm->memslots;
2680 rcu_assign_pointer(kvm->memslots, slots);
2681 synchronize_srcu_expedited(&kvm->srcu);
2682 dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap;
2683 kfree(old_slots);
2386 } 2684 }
2685
2387 r = 0; 2686 r = 0;
2687 if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
2688 r = -EFAULT;
2689out_free:
2690 vfree(dirty_bitmap);
2388out: 2691out:
2389 up_write(&kvm->slots_lock); 2692 mutex_unlock(&kvm->slots_lock);
2390 return r; 2693 return r;
2391} 2694}
2392 2695
@@ -2469,6 +2772,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
2469 if (vpic) { 2772 if (vpic) {
2470 r = kvm_ioapic_init(kvm); 2773 r = kvm_ioapic_init(kvm);
2471 if (r) { 2774 if (r) {
2775 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
2776 &vpic->dev);
2472 kfree(vpic); 2777 kfree(vpic);
2473 goto create_irqchip_unlock; 2778 goto create_irqchip_unlock;
2474 } 2779 }
@@ -2480,10 +2785,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
2480 r = kvm_setup_default_irq_routing(kvm); 2785 r = kvm_setup_default_irq_routing(kvm);
2481 if (r) { 2786 if (r) {
2482 mutex_lock(&kvm->irq_lock); 2787 mutex_lock(&kvm->irq_lock);
2483 kfree(kvm->arch.vpic); 2788 kvm_ioapic_destroy(kvm);
2484 kfree(kvm->arch.vioapic); 2789 kvm_destroy_pic(kvm);
2485 kvm->arch.vpic = NULL;
2486 kvm->arch.vioapic = NULL;
2487 mutex_unlock(&kvm->irq_lock); 2790 mutex_unlock(&kvm->irq_lock);
2488 } 2791 }
2489 create_irqchip_unlock: 2792 create_irqchip_unlock:
@@ -2499,7 +2802,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
2499 sizeof(struct kvm_pit_config))) 2802 sizeof(struct kvm_pit_config)))
2500 goto out; 2803 goto out;
2501 create_pit: 2804 create_pit:
2502 down_write(&kvm->slots_lock); 2805 mutex_lock(&kvm->slots_lock);
2503 r = -EEXIST; 2806 r = -EEXIST;
2504 if (kvm->arch.vpit) 2807 if (kvm->arch.vpit)
2505 goto create_pit_unlock; 2808 goto create_pit_unlock;
@@ -2508,7 +2811,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
2508 if (kvm->arch.vpit) 2811 if (kvm->arch.vpit)
2509 r = 0; 2812 r = 0;
2510 create_pit_unlock: 2813 create_pit_unlock:
2511 up_write(&kvm->slots_lock); 2814 mutex_unlock(&kvm->slots_lock);
2512 break; 2815 break;
2513 case KVM_IRQ_LINE_STATUS: 2816 case KVM_IRQ_LINE_STATUS:
2514 case KVM_IRQ_LINE: { 2817 case KVM_IRQ_LINE: {
@@ -2725,7 +3028,7 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
2725 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) 3028 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
2726 return 0; 3029 return 0;
2727 3030
2728 return kvm_io_bus_write(&vcpu->kvm->mmio_bus, addr, len, v); 3031 return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
2729} 3032}
2730 3033
2731static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) 3034static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
@@ -2734,17 +3037,44 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
2734 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) 3037 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
2735 return 0; 3038 return 0;
2736 3039
2737 return kvm_io_bus_read(&vcpu->kvm->mmio_bus, addr, len, v); 3040 return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
2738} 3041}
2739 3042
2740static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, 3043gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
2741 struct kvm_vcpu *vcpu) 3044{
3045 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3046 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
3047}
3048
3049 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
3050{
3051 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3052 access |= PFERR_FETCH_MASK;
3053 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
3054}
3055
3056gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
3057{
3058 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3059 access |= PFERR_WRITE_MASK;
3060 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
3061}
3062
3063/* uses this to access any guest's mapped memory without checking CPL */
3064gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
3065{
3066 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, 0, error);
3067}
3068
3069static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
3070 struct kvm_vcpu *vcpu, u32 access,
3071 u32 *error)
2742{ 3072{
2743 void *data = val; 3073 void *data = val;
2744 int r = X86EMUL_CONTINUE; 3074 int r = X86EMUL_CONTINUE;
2745 3075
2746 while (bytes) { 3076 while (bytes) {
2747 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 3077 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, access, error);
2748 unsigned offset = addr & (PAGE_SIZE-1); 3078 unsigned offset = addr & (PAGE_SIZE-1);
2749 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); 3079 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
2750 int ret; 3080 int ret;
@@ -2767,14 +3097,37 @@ out:
2767 return r; 3097 return r;
2768} 3098}
2769 3099
3100/* used for instruction fetching */
3101static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes,
3102 struct kvm_vcpu *vcpu, u32 *error)
3103{
3104 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3105 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
3106 access | PFERR_FETCH_MASK, error);
3107}
3108
3109static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
3110 struct kvm_vcpu *vcpu, u32 *error)
3111{
3112 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3113 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
3114 error);
3115}
3116
3117static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes,
3118 struct kvm_vcpu *vcpu, u32 *error)
3119{
3120 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error);
3121}
3122
2770static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, 3123static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
2771 struct kvm_vcpu *vcpu) 3124 struct kvm_vcpu *vcpu, u32 *error)
2772{ 3125{
2773 void *data = val; 3126 void *data = val;
2774 int r = X86EMUL_CONTINUE; 3127 int r = X86EMUL_CONTINUE;
2775 3128
2776 while (bytes) { 3129 while (bytes) {
2777 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 3130 gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error);
2778 unsigned offset = addr & (PAGE_SIZE-1); 3131 unsigned offset = addr & (PAGE_SIZE-1);
2779 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); 3132 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
2780 int ret; 3133 int ret;
@@ -2804,6 +3157,7 @@ static int emulator_read_emulated(unsigned long addr,
2804 struct kvm_vcpu *vcpu) 3157 struct kvm_vcpu *vcpu)
2805{ 3158{
2806 gpa_t gpa; 3159 gpa_t gpa;
3160 u32 error_code;
2807 3161
2808 if (vcpu->mmio_read_completed) { 3162 if (vcpu->mmio_read_completed) {
2809 memcpy(val, vcpu->mmio_data, bytes); 3163 memcpy(val, vcpu->mmio_data, bytes);
@@ -2813,17 +3167,20 @@ static int emulator_read_emulated(unsigned long addr,
2813 return X86EMUL_CONTINUE; 3167 return X86EMUL_CONTINUE;
2814 } 3168 }
2815 3169
2816 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 3170 gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, &error_code);
3171
3172 if (gpa == UNMAPPED_GVA) {
3173 kvm_inject_page_fault(vcpu, addr, error_code);
3174 return X86EMUL_PROPAGATE_FAULT;
3175 }
2817 3176
2818 /* For APIC access vmexit */ 3177 /* For APIC access vmexit */
2819 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 3178 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
2820 goto mmio; 3179 goto mmio;
2821 3180
2822 if (kvm_read_guest_virt(addr, val, bytes, vcpu) 3181 if (kvm_read_guest_virt(addr, val, bytes, vcpu, NULL)
2823 == X86EMUL_CONTINUE) 3182 == X86EMUL_CONTINUE)
2824 return X86EMUL_CONTINUE; 3183 return X86EMUL_CONTINUE;
2825 if (gpa == UNMAPPED_GVA)
2826 return X86EMUL_PROPAGATE_FAULT;
2827 3184
2828mmio: 3185mmio:
2829 /* 3186 /*
@@ -2862,11 +3219,12 @@ static int emulator_write_emulated_onepage(unsigned long addr,
2862 struct kvm_vcpu *vcpu) 3219 struct kvm_vcpu *vcpu)
2863{ 3220{
2864 gpa_t gpa; 3221 gpa_t gpa;
3222 u32 error_code;
2865 3223
2866 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 3224 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, &error_code);
2867 3225
2868 if (gpa == UNMAPPED_GVA) { 3226 if (gpa == UNMAPPED_GVA) {
2869 kvm_inject_page_fault(vcpu, addr, 2); 3227 kvm_inject_page_fault(vcpu, addr, error_code);
2870 return X86EMUL_PROPAGATE_FAULT; 3228 return X86EMUL_PROPAGATE_FAULT;
2871 } 3229 }
2872 3230
@@ -2930,7 +3288,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
2930 char *kaddr; 3288 char *kaddr;
2931 u64 val; 3289 u64 val;
2932 3290
2933 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 3291 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
2934 3292
2935 if (gpa == UNMAPPED_GVA || 3293 if (gpa == UNMAPPED_GVA ||
2936 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 3294 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@ -2967,35 +3325,21 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
2967 3325
2968int emulate_clts(struct kvm_vcpu *vcpu) 3326int emulate_clts(struct kvm_vcpu *vcpu)
2969{ 3327{
2970 kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS); 3328 kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
3329 kvm_x86_ops->fpu_activate(vcpu);
2971 return X86EMUL_CONTINUE; 3330 return X86EMUL_CONTINUE;
2972} 3331}
2973 3332
2974int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) 3333int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
2975{ 3334{
2976 struct kvm_vcpu *vcpu = ctxt->vcpu; 3335 return kvm_x86_ops->get_dr(ctxt->vcpu, dr, dest);
2977
2978 switch (dr) {
2979 case 0 ... 3:
2980 *dest = kvm_x86_ops->get_dr(vcpu, dr);
2981 return X86EMUL_CONTINUE;
2982 default:
2983 pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr);
2984 return X86EMUL_UNHANDLEABLE;
2985 }
2986} 3336}
2987 3337
2988int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) 3338int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
2989{ 3339{
2990 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; 3340 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
2991 int exception;
2992 3341
2993 kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception); 3342 return kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask);
2994 if (exception) {
2995 /* FIXME: better handling */
2996 return X86EMUL_UNHANDLEABLE;
2997 }
2998 return X86EMUL_CONTINUE;
2999} 3343}
3000 3344
3001void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) 3345void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
@@ -3009,7 +3353,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
3009 3353
3010 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); 3354 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
3011 3355
3012 kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu); 3356 kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu, NULL);
3013 3357
3014 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", 3358 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
3015 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); 3359 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
@@ -3017,7 +3361,8 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
3017EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); 3361EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
3018 3362
3019static struct x86_emulate_ops emulate_ops = { 3363static struct x86_emulate_ops emulate_ops = {
3020 .read_std = kvm_read_guest_virt, 3364 .read_std = kvm_read_guest_virt_system,
3365 .fetch = kvm_fetch_guest_virt,
3021 .read_emulated = emulator_read_emulated, 3366 .read_emulated = emulator_read_emulated,
3022 .write_emulated = emulator_write_emulated, 3367 .write_emulated = emulator_write_emulated,
3023 .cmpxchg_emulated = emulator_cmpxchg_emulated, 3368 .cmpxchg_emulated = emulator_cmpxchg_emulated,
@@ -3060,8 +3405,9 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
3060 vcpu->arch.emulate_ctxt.vcpu = vcpu; 3405 vcpu->arch.emulate_ctxt.vcpu = vcpu;
3061 vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); 3406 vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu);
3062 vcpu->arch.emulate_ctxt.mode = 3407 vcpu->arch.emulate_ctxt.mode =
3408 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
3063 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) 3409 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
3064 ? X86EMUL_MODE_REAL : cs_l 3410 ? X86EMUL_MODE_VM86 : cs_l
3065 ? X86EMUL_MODE_PROT64 : cs_db 3411 ? X86EMUL_MODE_PROT64 : cs_db
3066 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 3412 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
3067 3413
@@ -3153,12 +3499,17 @@ static int pio_copy_data(struct kvm_vcpu *vcpu)
3153 gva_t q = vcpu->arch.pio.guest_gva; 3499 gva_t q = vcpu->arch.pio.guest_gva;
3154 unsigned bytes; 3500 unsigned bytes;
3155 int ret; 3501 int ret;
3502 u32 error_code;
3156 3503
3157 bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; 3504 bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
3158 if (vcpu->arch.pio.in) 3505 if (vcpu->arch.pio.in)
3159 ret = kvm_write_guest_virt(q, p, bytes, vcpu); 3506 ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code);
3160 else 3507 else
3161 ret = kvm_read_guest_virt(q, p, bytes, vcpu); 3508 ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code);
3509
3510 if (ret == X86EMUL_PROPAGATE_FAULT)
3511 kvm_inject_page_fault(vcpu, q, error_code);
3512
3162 return ret; 3513 return ret;
3163} 3514}
3164 3515
@@ -3179,7 +3530,7 @@ int complete_pio(struct kvm_vcpu *vcpu)
3179 if (io->in) { 3530 if (io->in) {
3180 r = pio_copy_data(vcpu); 3531 r = pio_copy_data(vcpu);
3181 if (r) 3532 if (r)
3182 return r; 3533 goto out;
3183 } 3534 }
3184 3535
3185 delta = 1; 3536 delta = 1;
@@ -3206,7 +3557,7 @@ int complete_pio(struct kvm_vcpu *vcpu)
3206 kvm_register_write(vcpu, VCPU_REGS_RSI, val); 3557 kvm_register_write(vcpu, VCPU_REGS_RSI, val);
3207 } 3558 }
3208 } 3559 }
3209 3560out:
3210 io->count -= io->cur_count; 3561 io->count -= io->cur_count;
3211 io->cur_count = 0; 3562 io->cur_count = 0;
3212 3563
@@ -3219,11 +3570,12 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
3219 int r; 3570 int r;
3220 3571
3221 if (vcpu->arch.pio.in) 3572 if (vcpu->arch.pio.in)
3222 r = kvm_io_bus_read(&vcpu->kvm->pio_bus, vcpu->arch.pio.port, 3573 r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
3223 vcpu->arch.pio.size, pd); 3574 vcpu->arch.pio.size, pd);
3224 else 3575 else
3225 r = kvm_io_bus_write(&vcpu->kvm->pio_bus, vcpu->arch.pio.port, 3576 r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
3226 vcpu->arch.pio.size, pd); 3577 vcpu->arch.pio.port, vcpu->arch.pio.size,
3578 pd);
3227 return r; 3579 return r;
3228} 3580}
3229 3581
@@ -3234,7 +3586,7 @@ static int pio_string_write(struct kvm_vcpu *vcpu)
3234 int i, r = 0; 3586 int i, r = 0;
3235 3587
3236 for (i = 0; i < io->cur_count; i++) { 3588 for (i = 0; i < io->cur_count; i++) {
3237 if (kvm_io_bus_write(&vcpu->kvm->pio_bus, 3589 if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
3238 io->port, io->size, pd)) { 3590 io->port, io->size, pd)) {
3239 r = -EOPNOTSUPP; 3591 r = -EOPNOTSUPP;
3240 break; 3592 break;
@@ -3248,6 +3600,8 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port)
3248{ 3600{
3249 unsigned long val; 3601 unsigned long val;
3250 3602
3603 trace_kvm_pio(!in, port, size, 1);
3604
3251 vcpu->run->exit_reason = KVM_EXIT_IO; 3605 vcpu->run->exit_reason = KVM_EXIT_IO;
3252 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; 3606 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
3253 vcpu->run->io.size = vcpu->arch.pio.size = size; 3607 vcpu->run->io.size = vcpu->arch.pio.size = size;
@@ -3259,11 +3613,10 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port)
3259 vcpu->arch.pio.down = 0; 3613 vcpu->arch.pio.down = 0;
3260 vcpu->arch.pio.rep = 0; 3614 vcpu->arch.pio.rep = 0;
3261 3615
3262 trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port, 3616 if (!vcpu->arch.pio.in) {
3263 size, 1); 3617 val = kvm_register_read(vcpu, VCPU_REGS_RAX);
3264 3618 memcpy(vcpu->arch.pio_data, &val, 4);
3265 val = kvm_register_read(vcpu, VCPU_REGS_RAX); 3619 }
3266 memcpy(vcpu->arch.pio_data, &val, 4);
3267 3620
3268 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { 3621 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
3269 complete_pio(vcpu); 3622 complete_pio(vcpu);
@@ -3280,6 +3633,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
3280 unsigned now, in_page; 3633 unsigned now, in_page;
3281 int ret = 0; 3634 int ret = 0;
3282 3635
3636 trace_kvm_pio(!in, port, size, count);
3637
3283 vcpu->run->exit_reason = KVM_EXIT_IO; 3638 vcpu->run->exit_reason = KVM_EXIT_IO;
3284 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; 3639 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
3285 vcpu->run->io.size = vcpu->arch.pio.size = size; 3640 vcpu->run->io.size = vcpu->arch.pio.size = size;
@@ -3291,9 +3646,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
3291 vcpu->arch.pio.down = down; 3646 vcpu->arch.pio.down = down;
3292 vcpu->arch.pio.rep = rep; 3647 vcpu->arch.pio.rep = rep;
3293 3648
3294 trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
3295 size, count);
3296
3297 if (!count) { 3649 if (!count) {
3298 kvm_x86_ops->skip_emulated_instruction(vcpu); 3650 kvm_x86_ops->skip_emulated_instruction(vcpu);
3299 return 1; 3651 return 1;
@@ -3325,10 +3677,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
3325 if (!vcpu->arch.pio.in) { 3677 if (!vcpu->arch.pio.in) {
3326 /* string PIO write */ 3678 /* string PIO write */
3327 ret = pio_copy_data(vcpu); 3679 ret = pio_copy_data(vcpu);
3328 if (ret == X86EMUL_PROPAGATE_FAULT) { 3680 if (ret == X86EMUL_PROPAGATE_FAULT)
3329 kvm_inject_gp(vcpu, 0);
3330 return 1; 3681 return 1;
3331 }
3332 if (ret == 0 && !pio_string_write(vcpu)) { 3682 if (ret == 0 && !pio_string_write(vcpu)) {
3333 complete_pio(vcpu); 3683 complete_pio(vcpu);
3334 if (vcpu->arch.pio.count == 0) 3684 if (vcpu->arch.pio.count == 0)
@@ -3487,11 +3837,76 @@ static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
3487 return a0 | ((gpa_t)a1 << 32); 3837 return a0 | ((gpa_t)a1 << 32);
3488} 3838}
3489 3839
3840int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
3841{
3842 u64 param, ingpa, outgpa, ret;
3843 uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
3844 bool fast, longmode;
3845 int cs_db, cs_l;
3846
3847 /*
3848 * hypercall generates UD from non zero cpl and real mode
3849 * per HYPER-V spec
3850 */
3851 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
3852 kvm_queue_exception(vcpu, UD_VECTOR);
3853 return 0;
3854 }
3855
3856 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
3857 longmode = is_long_mode(vcpu) && cs_l == 1;
3858
3859 if (!longmode) {
3860 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
3861 (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
3862 ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
3863 (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
3864 outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
3865 (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
3866 }
3867#ifdef CONFIG_X86_64
3868 else {
3869 param = kvm_register_read(vcpu, VCPU_REGS_RCX);
3870 ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
3871 outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
3872 }
3873#endif
3874
3875 code = param & 0xffff;
3876 fast = (param >> 16) & 0x1;
3877 rep_cnt = (param >> 32) & 0xfff;
3878 rep_idx = (param >> 48) & 0xfff;
3879
3880 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
3881
3882 switch (code) {
3883 case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
3884 kvm_vcpu_on_spin(vcpu);
3885 break;
3886 default:
3887 res = HV_STATUS_INVALID_HYPERCALL_CODE;
3888 break;
3889 }
3890
3891 ret = res | (((u64)rep_done & 0xfff) << 32);
3892 if (longmode) {
3893 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
3894 } else {
3895 kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
3896 kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
3897 }
3898
3899 return 1;
3900}
3901
3490int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) 3902int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
3491{ 3903{
3492 unsigned long nr, a0, a1, a2, a3, ret; 3904 unsigned long nr, a0, a1, a2, a3, ret;
3493 int r = 1; 3905 int r = 1;
3494 3906
3907 if (kvm_hv_hypercall_enabled(vcpu->kvm))
3908 return kvm_hv_hypercall(vcpu);
3909
3495 nr = kvm_register_read(vcpu, VCPU_REGS_RAX); 3910 nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
3496 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); 3911 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
3497 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX); 3912 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
@@ -3534,10 +3949,8 @@ EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
3534int kvm_fix_hypercall(struct kvm_vcpu *vcpu) 3949int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
3535{ 3950{
3536 char instruction[3]; 3951 char instruction[3];
3537 int ret = 0;
3538 unsigned long rip = kvm_rip_read(vcpu); 3952 unsigned long rip = kvm_rip_read(vcpu);
3539 3953
3540
3541 /* 3954 /*
3542 * Blow out the MMU to ensure that no other VCPU has an active mapping 3955 * Blow out the MMU to ensure that no other VCPU has an active mapping
3543 * to ensure that the updated hypercall appears atomically across all 3956 * to ensure that the updated hypercall appears atomically across all
@@ -3546,11 +3959,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
3546 kvm_mmu_zap_all(vcpu->kvm); 3959 kvm_mmu_zap_all(vcpu->kvm);
3547 3960
3548 kvm_x86_ops->patch_hypercall(vcpu, instruction); 3961 kvm_x86_ops->patch_hypercall(vcpu, instruction);
3549 if (emulator_write_emulated(rip, instruction, 3, vcpu)
3550 != X86EMUL_CONTINUE)
3551 ret = -EFAULT;
3552 3962
3553 return ret; 3963 return emulator_write_emulated(rip, instruction, 3, vcpu);
3554} 3964}
3555 3965
3556static u64 mk_cr_64(u64 curr_cr, u32 new_val) 3966static u64 mk_cr_64(u64 curr_cr, u32 new_val)
@@ -3583,10 +3993,9 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
3583{ 3993{
3584 unsigned long value; 3994 unsigned long value;
3585 3995
3586 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
3587 switch (cr) { 3996 switch (cr) {
3588 case 0: 3997 case 0:
3589 value = vcpu->arch.cr0; 3998 value = kvm_read_cr0(vcpu);
3590 break; 3999 break;
3591 case 2: 4000 case 2:
3592 value = vcpu->arch.cr2; 4001 value = vcpu->arch.cr2;
@@ -3595,7 +4004,7 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
3595 value = vcpu->arch.cr3; 4004 value = vcpu->arch.cr3;
3596 break; 4005 break;
3597 case 4: 4006 case 4:
3598 value = vcpu->arch.cr4; 4007 value = kvm_read_cr4(vcpu);
3599 break; 4008 break;
3600 case 8: 4009 case 8:
3601 value = kvm_get_cr8(vcpu); 4010 value = kvm_get_cr8(vcpu);
@@ -3613,7 +4022,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
3613{ 4022{
3614 switch (cr) { 4023 switch (cr) {
3615 case 0: 4024 case 0:
3616 kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); 4025 kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
3617 *rflags = kvm_get_rflags(vcpu); 4026 *rflags = kvm_get_rflags(vcpu);
3618 break; 4027 break;
3619 case 2: 4028 case 2:
@@ -3623,7 +4032,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
3623 kvm_set_cr3(vcpu, val); 4032 kvm_set_cr3(vcpu, val);
3624 break; 4033 break;
3625 case 4: 4034 case 4:
3626 kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val)); 4035 kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
3627 break; 4036 break;
3628 case 8: 4037 case 8:
3629 kvm_set_cr8(vcpu, val & 0xfUL); 4038 kvm_set_cr8(vcpu, val & 0xfUL);
@@ -3690,6 +4099,7 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
3690 } 4099 }
3691 return best; 4100 return best;
3692} 4101}
4102EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
3693 4103
3694int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) 4104int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
3695{ 4105{
@@ -3773,14 +4183,15 @@ static void vapic_enter(struct kvm_vcpu *vcpu)
3773static void vapic_exit(struct kvm_vcpu *vcpu) 4183static void vapic_exit(struct kvm_vcpu *vcpu)
3774{ 4184{
3775 struct kvm_lapic *apic = vcpu->arch.apic; 4185 struct kvm_lapic *apic = vcpu->arch.apic;
4186 int idx;
3776 4187
3777 if (!apic || !apic->vapic_addr) 4188 if (!apic || !apic->vapic_addr)
3778 return; 4189 return;
3779 4190
3780 down_read(&vcpu->kvm->slots_lock); 4191 idx = srcu_read_lock(&vcpu->kvm->srcu);
3781 kvm_release_page_dirty(apic->vapic_page); 4192 kvm_release_page_dirty(apic->vapic_page);
3782 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); 4193 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
3783 up_read(&vcpu->kvm->slots_lock); 4194 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3784} 4195}
3785 4196
3786static void update_cr8_intercept(struct kvm_vcpu *vcpu) 4197static void update_cr8_intercept(struct kvm_vcpu *vcpu)
@@ -3876,12 +4287,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
3876 r = 0; 4287 r = 0;
3877 goto out; 4288 goto out;
3878 } 4289 }
4290 if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) {
4291 vcpu->fpu_active = 0;
4292 kvm_x86_ops->fpu_deactivate(vcpu);
4293 }
3879 } 4294 }
3880 4295
3881 preempt_disable(); 4296 preempt_disable();
3882 4297
3883 kvm_x86_ops->prepare_guest_switch(vcpu); 4298 kvm_x86_ops->prepare_guest_switch(vcpu);
3884 kvm_load_guest_fpu(vcpu); 4299 if (vcpu->fpu_active)
4300 kvm_load_guest_fpu(vcpu);
3885 4301
3886 local_irq_disable(); 4302 local_irq_disable();
3887 4303
@@ -3909,7 +4325,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
3909 kvm_lapic_sync_to_vapic(vcpu); 4325 kvm_lapic_sync_to_vapic(vcpu);
3910 } 4326 }
3911 4327
3912 up_read(&vcpu->kvm->slots_lock); 4328 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3913 4329
3914 kvm_guest_enter(); 4330 kvm_guest_enter();
3915 4331
@@ -3951,7 +4367,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
3951 4367
3952 preempt_enable(); 4368 preempt_enable();
3953 4369
3954 down_read(&vcpu->kvm->slots_lock); 4370 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3955 4371
3956 /* 4372 /*
3957 * Profile KVM exit RIPs: 4373 * Profile KVM exit RIPs:
@@ -3973,6 +4389,7 @@ out:
3973static int __vcpu_run(struct kvm_vcpu *vcpu) 4389static int __vcpu_run(struct kvm_vcpu *vcpu)
3974{ 4390{
3975 int r; 4391 int r;
4392 struct kvm *kvm = vcpu->kvm;
3976 4393
3977 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { 4394 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
3978 pr_debug("vcpu %d received sipi with vector # %x\n", 4395 pr_debug("vcpu %d received sipi with vector # %x\n",
@@ -3984,7 +4401,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
3984 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 4401 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
3985 } 4402 }
3986 4403
3987 down_read(&vcpu->kvm->slots_lock); 4404 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
3988 vapic_enter(vcpu); 4405 vapic_enter(vcpu);
3989 4406
3990 r = 1; 4407 r = 1;
@@ -3992,9 +4409,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
3992 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) 4409 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
3993 r = vcpu_enter_guest(vcpu); 4410 r = vcpu_enter_guest(vcpu);
3994 else { 4411 else {
3995 up_read(&vcpu->kvm->slots_lock); 4412 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
3996 kvm_vcpu_block(vcpu); 4413 kvm_vcpu_block(vcpu);
3997 down_read(&vcpu->kvm->slots_lock); 4414 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
3998 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) 4415 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
3999 { 4416 {
4000 switch(vcpu->arch.mp_state) { 4417 switch(vcpu->arch.mp_state) {
@@ -4029,13 +4446,13 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
4029 ++vcpu->stat.signal_exits; 4446 ++vcpu->stat.signal_exits;
4030 } 4447 }
4031 if (need_resched()) { 4448 if (need_resched()) {
4032 up_read(&vcpu->kvm->slots_lock); 4449 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
4033 kvm_resched(vcpu); 4450 kvm_resched(vcpu);
4034 down_read(&vcpu->kvm->slots_lock); 4451 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
4035 } 4452 }
4036 } 4453 }
4037 4454
4038 up_read(&vcpu->kvm->slots_lock); 4455 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
4039 post_kvm_run_save(vcpu); 4456 post_kvm_run_save(vcpu);
4040 4457
4041 vapic_exit(vcpu); 4458 vapic_exit(vcpu);
@@ -4074,10 +4491,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4074 vcpu->mmio_read_completed = 1; 4491 vcpu->mmio_read_completed = 1;
4075 vcpu->mmio_needed = 0; 4492 vcpu->mmio_needed = 0;
4076 4493
4077 down_read(&vcpu->kvm->slots_lock); 4494 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4078 r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, 4495 r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0,
4079 EMULTYPE_NO_DECODE); 4496 EMULTYPE_NO_DECODE);
4080 up_read(&vcpu->kvm->slots_lock); 4497 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4081 if (r == EMULATE_DO_MMIO) { 4498 if (r == EMULATE_DO_MMIO) {
4082 /* 4499 /*
4083 * Read-modify-write. Back to userspace. 4500 * Read-modify-write. Back to userspace.
@@ -4204,13 +4621,12 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4204 sregs->gdt.limit = dt.limit; 4621 sregs->gdt.limit = dt.limit;
4205 sregs->gdt.base = dt.base; 4622 sregs->gdt.base = dt.base;
4206 4623
4207 kvm_x86_ops->decache_cr4_guest_bits(vcpu); 4624 sregs->cr0 = kvm_read_cr0(vcpu);
4208 sregs->cr0 = vcpu->arch.cr0;
4209 sregs->cr2 = vcpu->arch.cr2; 4625 sregs->cr2 = vcpu->arch.cr2;
4210 sregs->cr3 = vcpu->arch.cr3; 4626 sregs->cr3 = vcpu->arch.cr3;
4211 sregs->cr4 = vcpu->arch.cr4; 4627 sregs->cr4 = kvm_read_cr4(vcpu);
4212 sregs->cr8 = kvm_get_cr8(vcpu); 4628 sregs->cr8 = kvm_get_cr8(vcpu);
4213 sregs->efer = vcpu->arch.shadow_efer; 4629 sregs->efer = vcpu->arch.efer;
4214 sregs->apic_base = kvm_get_apic_base(vcpu); 4630 sregs->apic_base = kvm_get_apic_base(vcpu);
4215 4631
4216 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); 4632 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
@@ -4298,14 +4714,23 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
4298{ 4714{
4299 struct descriptor_table dtable; 4715 struct descriptor_table dtable;
4300 u16 index = selector >> 3; 4716 u16 index = selector >> 3;
4717 int ret;
4718 u32 err;
4719 gva_t addr;
4301 4720
4302 get_segment_descriptor_dtable(vcpu, selector, &dtable); 4721 get_segment_descriptor_dtable(vcpu, selector, &dtable);
4303 4722
4304 if (dtable.limit < index * 8 + 7) { 4723 if (dtable.limit < index * 8 + 7) {
4305 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); 4724 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
4306 return 1; 4725 return X86EMUL_PROPAGATE_FAULT;
4307 } 4726 }
4308 return kvm_read_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); 4727 addr = dtable.base + index * 8;
4728 ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc),
4729 vcpu, &err);
4730 if (ret == X86EMUL_PROPAGATE_FAULT)
4731 kvm_inject_page_fault(vcpu, addr, err);
4732
4733 return ret;
4309} 4734}
4310 4735
4311/* allowed just for 8 bytes segments */ 4736/* allowed just for 8 bytes segments */
@@ -4319,15 +4744,23 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
4319 4744
4320 if (dtable.limit < index * 8 + 7) 4745 if (dtable.limit < index * 8 + 7)
4321 return 1; 4746 return 1;
4322 return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); 4747 return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL);
4748}
4749
4750static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu,
4751 struct desc_struct *seg_desc)
4752{
4753 u32 base_addr = get_desc_base(seg_desc);
4754
4755 return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL);
4323} 4756}
4324 4757
4325static gpa_t get_tss_base_addr(struct kvm_vcpu *vcpu, 4758static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu,
4326 struct desc_struct *seg_desc) 4759 struct desc_struct *seg_desc)
4327{ 4760{
4328 u32 base_addr = get_desc_base(seg_desc); 4761 u32 base_addr = get_desc_base(seg_desc);
4329 4762
4330 return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr); 4763 return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL);
4331} 4764}
4332 4765
4333static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) 4766static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
@@ -4338,18 +4771,6 @@ static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
4338 return kvm_seg.selector; 4771 return kvm_seg.selector;
4339} 4772}
4340 4773
4341static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu,
4342 u16 selector,
4343 struct kvm_segment *kvm_seg)
4344{
4345 struct desc_struct seg_desc;
4346
4347 if (load_guest_segment_descriptor(vcpu, selector, &seg_desc))
4348 return 1;
4349 seg_desct_to_kvm_desct(&seg_desc, selector, kvm_seg);
4350 return 0;
4351}
4352
4353static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) 4774static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg)
4354{ 4775{
4355 struct kvm_segment segvar = { 4776 struct kvm_segment segvar = {
@@ -4367,7 +4788,7 @@ static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int se
4367 .unusable = 0, 4788 .unusable = 0,
4368 }; 4789 };
4369 kvm_x86_ops->set_segment(vcpu, &segvar, seg); 4790 kvm_x86_ops->set_segment(vcpu, &segvar, seg);
4370 return 0; 4791 return X86EMUL_CONTINUE;
4371} 4792}
4372 4793
4373static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) 4794static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
@@ -4377,24 +4798,112 @@ static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
4377 (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); 4798 (kvm_get_rflags(vcpu) & X86_EFLAGS_VM);
4378} 4799}
4379 4800
4380int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 4801int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg)
4381 int type_bits, int seg)
4382{ 4802{
4383 struct kvm_segment kvm_seg; 4803 struct kvm_segment kvm_seg;
4804 struct desc_struct seg_desc;
4805 u8 dpl, rpl, cpl;
4806 unsigned err_vec = GP_VECTOR;
4807 u32 err_code = 0;
4808 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
4809 int ret;
4384 4810
4385 if (is_vm86_segment(vcpu, seg) || !(vcpu->arch.cr0 & X86_CR0_PE)) 4811 if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu))
4386 return kvm_load_realmode_segment(vcpu, selector, seg); 4812 return kvm_load_realmode_segment(vcpu, selector, seg);
4387 if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
4388 return 1;
4389 kvm_seg.type |= type_bits;
4390 4813
4391 if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS && 4814 /* NULL selector is not valid for TR, CS and SS */
4392 seg != VCPU_SREG_LDTR) 4815 if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR)
4393 if (!kvm_seg.s) 4816 && null_selector)
4394 kvm_seg.unusable = 1; 4817 goto exception;
4818
4819 /* TR should be in GDT only */
4820 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
4821 goto exception;
4822
4823 ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc);
4824 if (ret)
4825 return ret;
4826
4827 seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg);
4828
4829 if (null_selector) { /* for NULL selector skip all following checks */
4830 kvm_seg.unusable = 1;
4831 goto load;
4832 }
4833
4834 err_code = selector & 0xfffc;
4835 err_vec = GP_VECTOR;
4395 4836
4837 /* can't load system descriptor into segment selecor */
4838 if (seg <= VCPU_SREG_GS && !kvm_seg.s)
4839 goto exception;
4840
4841 if (!kvm_seg.present) {
4842 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
4843 goto exception;
4844 }
4845
4846 rpl = selector & 3;
4847 dpl = kvm_seg.dpl;
4848 cpl = kvm_x86_ops->get_cpl(vcpu);
4849
4850 switch (seg) {
4851 case VCPU_SREG_SS:
4852 /*
4853 * segment is not a writable data segment or segment
4854 * selector's RPL != CPL or segment selector's RPL != CPL
4855 */
4856 if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl)
4857 goto exception;
4858 break;
4859 case VCPU_SREG_CS:
4860 if (!(kvm_seg.type & 8))
4861 goto exception;
4862
4863 if (kvm_seg.type & 4) {
4864 /* conforming */
4865 if (dpl > cpl)
4866 goto exception;
4867 } else {
4868 /* nonconforming */
4869 if (rpl > cpl || dpl != cpl)
4870 goto exception;
4871 }
4872 /* CS(RPL) <- CPL */
4873 selector = (selector & 0xfffc) | cpl;
4874 break;
4875 case VCPU_SREG_TR:
4876 if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9))
4877 goto exception;
4878 break;
4879 case VCPU_SREG_LDTR:
4880 if (kvm_seg.s || kvm_seg.type != 2)
4881 goto exception;
4882 break;
4883 default: /* DS, ES, FS, or GS */
4884 /*
4885 * segment is not a data or readable code segment or
4886 * ((segment is a data or nonconforming code segment)
4887 * and (both RPL and CPL > DPL))
4888 */
4889 if ((kvm_seg.type & 0xa) == 0x8 ||
4890 (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl)))
4891 goto exception;
4892 break;
4893 }
4894
4895 if (!kvm_seg.unusable && kvm_seg.s) {
4896 /* mark segment as accessed */
4897 kvm_seg.type |= 1;
4898 seg_desc.type |= 1;
4899 save_guest_segment_descriptor(vcpu, selector, &seg_desc);
4900 }
4901load:
4396 kvm_set_segment(vcpu, &kvm_seg, seg); 4902 kvm_set_segment(vcpu, &kvm_seg, seg);
4397 return 0; 4903 return X86EMUL_CONTINUE;
4904exception:
4905 kvm_queue_exception_e(vcpu, err_vec, err_code);
4906 return X86EMUL_PROPAGATE_FAULT;
4398} 4907}
4399 4908
4400static void save_state_to_tss32(struct kvm_vcpu *vcpu, 4909static void save_state_to_tss32(struct kvm_vcpu *vcpu,
@@ -4420,6 +4929,14 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu,
4420 tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); 4929 tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
4421} 4930}
4422 4931
4932static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg)
4933{
4934 struct kvm_segment kvm_seg;
4935 kvm_get_segment(vcpu, &kvm_seg, seg);
4936 kvm_seg.selector = sel;
4937 kvm_set_segment(vcpu, &kvm_seg, seg);
4938}
4939
4423static int load_state_from_tss32(struct kvm_vcpu *vcpu, 4940static int load_state_from_tss32(struct kvm_vcpu *vcpu,
4424 struct tss_segment_32 *tss) 4941 struct tss_segment_32 *tss)
4425{ 4942{
@@ -4437,25 +4954,41 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu,
4437 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); 4954 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi);
4438 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); 4955 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi);
4439 4956
4440 if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) 4957 /*
4958 * SDM says that segment selectors are loaded before segment
4959 * descriptors
4960 */
4961 kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR);
4962 kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
4963 kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
4964 kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
4965 kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
4966 kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS);
4967 kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS);
4968
4969 /*
4970 * Now load segment descriptors. If fault happenes at this stage
4971 * it is handled in a context of new task
4972 */
4973 if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR))
4441 return 1; 4974 return 1;
4442 4975
4443 if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) 4976 if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
4444 return 1; 4977 return 1;
4445 4978
4446 if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) 4979 if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
4447 return 1; 4980 return 1;
4448 4981
4449 if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) 4982 if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
4450 return 1; 4983 return 1;
4451 4984
4452 if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) 4985 if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
4453 return 1; 4986 return 1;
4454 4987
4455 if (kvm_load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS)) 4988 if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS))
4456 return 1; 4989 return 1;
4457 4990
4458 if (kvm_load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS)) 4991 if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS))
4459 return 1; 4992 return 1;
4460 return 0; 4993 return 0;
4461} 4994}
@@ -4495,19 +5028,33 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu,
4495 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); 5028 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si);
4496 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); 5029 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di);
4497 5030
4498 if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) 5031 /*
5032 * SDM says that segment selectors are loaded before segment
5033 * descriptors
5034 */
5035 kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR);
5036 kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
5037 kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
5038 kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
5039 kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
5040
5041 /*
5042 * Now load segment descriptors. If fault happenes at this stage
5043 * it is handled in a context of new task
5044 */
5045 if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR))
4499 return 1; 5046 return 1;
4500 5047
4501 if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) 5048 if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
4502 return 1; 5049 return 1;
4503 5050
4504 if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) 5051 if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
4505 return 1; 5052 return 1;
4506 5053
4507 if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) 5054 if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
4508 return 1; 5055 return 1;
4509 5056
4510 if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) 5057 if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
4511 return 1; 5058 return 1;
4512 return 0; 5059 return 0;
4513} 5060}
@@ -4529,7 +5076,7 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
4529 sizeof tss_segment_16)) 5076 sizeof tss_segment_16))
4530 goto out; 5077 goto out;
4531 5078
4532 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), 5079 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
4533 &tss_segment_16, sizeof tss_segment_16)) 5080 &tss_segment_16, sizeof tss_segment_16))
4534 goto out; 5081 goto out;
4535 5082
@@ -4537,7 +5084,7 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
4537 tss_segment_16.prev_task_link = old_tss_sel; 5084 tss_segment_16.prev_task_link = old_tss_sel;
4538 5085
4539 if (kvm_write_guest(vcpu->kvm, 5086 if (kvm_write_guest(vcpu->kvm,
4540 get_tss_base_addr(vcpu, nseg_desc), 5087 get_tss_base_addr_write(vcpu, nseg_desc),
4541 &tss_segment_16.prev_task_link, 5088 &tss_segment_16.prev_task_link,
4542 sizeof tss_segment_16.prev_task_link)) 5089 sizeof tss_segment_16.prev_task_link))
4543 goto out; 5090 goto out;
@@ -4568,7 +5115,7 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
4568 sizeof tss_segment_32)) 5115 sizeof tss_segment_32))
4569 goto out; 5116 goto out;
4570 5117
4571 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), 5118 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
4572 &tss_segment_32, sizeof tss_segment_32)) 5119 &tss_segment_32, sizeof tss_segment_32))
4573 goto out; 5120 goto out;
4574 5121
@@ -4576,7 +5123,7 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
4576 tss_segment_32.prev_task_link = old_tss_sel; 5123 tss_segment_32.prev_task_link = old_tss_sel;
4577 5124
4578 if (kvm_write_guest(vcpu->kvm, 5125 if (kvm_write_guest(vcpu->kvm,
4579 get_tss_base_addr(vcpu, nseg_desc), 5126 get_tss_base_addr_write(vcpu, nseg_desc),
4580 &tss_segment_32.prev_task_link, 5127 &tss_segment_32.prev_task_link,
4581 sizeof tss_segment_32.prev_task_link)) 5128 sizeof tss_segment_32.prev_task_link))
4582 goto out; 5129 goto out;
@@ -4599,7 +5146,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
4599 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); 5146 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
4600 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); 5147 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
4601 5148
4602 old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base); 5149 old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL);
4603 5150
4604 /* FIXME: Handle errors. Failure to read either TSS or their 5151 /* FIXME: Handle errors. Failure to read either TSS or their
4605 * descriptors should generate a pagefault. 5152 * descriptors should generate a pagefault.
@@ -4658,7 +5205,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
4658 &nseg_desc); 5205 &nseg_desc);
4659 } 5206 }
4660 5207
4661 kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS); 5208 kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS);
4662 seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); 5209 seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
4663 tr_seg.type = 11; 5210 tr_seg.type = 11;
4664 kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); 5211 kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
@@ -4689,17 +5236,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4689 5236
4690 kvm_set_cr8(vcpu, sregs->cr8); 5237 kvm_set_cr8(vcpu, sregs->cr8);
4691 5238
4692 mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer; 5239 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
4693 kvm_x86_ops->set_efer(vcpu, sregs->efer); 5240 kvm_x86_ops->set_efer(vcpu, sregs->efer);
4694 kvm_set_apic_base(vcpu, sregs->apic_base); 5241 kvm_set_apic_base(vcpu, sregs->apic_base);
4695 5242
4696 kvm_x86_ops->decache_cr4_guest_bits(vcpu); 5243 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
4697
4698 mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0;
4699 kvm_x86_ops->set_cr0(vcpu, sregs->cr0); 5244 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
4700 vcpu->arch.cr0 = sregs->cr0; 5245 vcpu->arch.cr0 = sregs->cr0;
4701 5246
4702 mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4; 5247 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
4703 kvm_x86_ops->set_cr4(vcpu, sregs->cr4); 5248 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
4704 if (!is_long_mode(vcpu) && is_pae(vcpu)) { 5249 if (!is_long_mode(vcpu) && is_pae(vcpu)) {
4705 load_pdptrs(vcpu, vcpu->arch.cr3); 5250 load_pdptrs(vcpu, vcpu->arch.cr3);
@@ -4734,7 +5279,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4734 /* Older userspace won't unhalt the vcpu on reset. */ 5279 /* Older userspace won't unhalt the vcpu on reset. */
4735 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 && 5280 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
4736 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 && 5281 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
4737 !(vcpu->arch.cr0 & X86_CR0_PE)) 5282 !is_protmode(vcpu))
4738 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 5283 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
4739 5284
4740 vcpu_put(vcpu); 5285 vcpu_put(vcpu);
@@ -4832,11 +5377,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
4832{ 5377{
4833 unsigned long vaddr = tr->linear_address; 5378 unsigned long vaddr = tr->linear_address;
4834 gpa_t gpa; 5379 gpa_t gpa;
5380 int idx;
4835 5381
4836 vcpu_load(vcpu); 5382 vcpu_load(vcpu);
4837 down_read(&vcpu->kvm->slots_lock); 5383 idx = srcu_read_lock(&vcpu->kvm->srcu);
4838 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr); 5384 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
4839 up_read(&vcpu->kvm->slots_lock); 5385 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4840 tr->physical_address = gpa; 5386 tr->physical_address = gpa;
4841 tr->valid = gpa != UNMAPPED_GVA; 5387 tr->valid = gpa != UNMAPPED_GVA;
4842 tr->writeable = 1; 5388 tr->writeable = 1;
@@ -4917,14 +5463,14 @@ EXPORT_SYMBOL_GPL(fx_init);
4917 5463
4918void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) 5464void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
4919{ 5465{
4920 if (!vcpu->fpu_active || vcpu->guest_fpu_loaded) 5466 if (vcpu->guest_fpu_loaded)
4921 return; 5467 return;
4922 5468
4923 vcpu->guest_fpu_loaded = 1; 5469 vcpu->guest_fpu_loaded = 1;
4924 kvm_fx_save(&vcpu->arch.host_fx_image); 5470 kvm_fx_save(&vcpu->arch.host_fx_image);
4925 kvm_fx_restore(&vcpu->arch.guest_fx_image); 5471 kvm_fx_restore(&vcpu->arch.guest_fx_image);
5472 trace_kvm_fpu(1);
4926} 5473}
4927EXPORT_SYMBOL_GPL(kvm_load_guest_fpu);
4928 5474
4929void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) 5475void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
4930{ 5476{
@@ -4935,8 +5481,9 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
4935 kvm_fx_save(&vcpu->arch.guest_fx_image); 5481 kvm_fx_save(&vcpu->arch.guest_fx_image);
4936 kvm_fx_restore(&vcpu->arch.host_fx_image); 5482 kvm_fx_restore(&vcpu->arch.host_fx_image);
4937 ++vcpu->stat.fpu_reload; 5483 ++vcpu->stat.fpu_reload;
5484 set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests);
5485 trace_kvm_fpu(0);
4938} 5486}
4939EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
4940 5487
4941void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) 5488void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
4942{ 5489{
@@ -5088,11 +5635,13 @@ fail:
5088 5635
5089void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 5636void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
5090{ 5637{
5638 int idx;
5639
5091 kfree(vcpu->arch.mce_banks); 5640 kfree(vcpu->arch.mce_banks);
5092 kvm_free_lapic(vcpu); 5641 kvm_free_lapic(vcpu);
5093 down_read(&vcpu->kvm->slots_lock); 5642 idx = srcu_read_lock(&vcpu->kvm->srcu);
5094 kvm_mmu_destroy(vcpu); 5643 kvm_mmu_destroy(vcpu);
5095 up_read(&vcpu->kvm->slots_lock); 5644 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5096 free_page((unsigned long)vcpu->arch.pio_data); 5645 free_page((unsigned long)vcpu->arch.pio_data);
5097} 5646}
5098 5647
@@ -5103,6 +5652,12 @@ struct kvm *kvm_arch_create_vm(void)
5103 if (!kvm) 5652 if (!kvm)
5104 return ERR_PTR(-ENOMEM); 5653 return ERR_PTR(-ENOMEM);
5105 5654
5655 kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
5656 if (!kvm->arch.aliases) {
5657 kfree(kvm);
5658 return ERR_PTR(-ENOMEM);
5659 }
5660
5106 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 5661 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
5107 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 5662 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
5108 5663
@@ -5159,16 +5714,18 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
5159 put_page(kvm->arch.apic_access_page); 5714 put_page(kvm->arch.apic_access_page);
5160 if (kvm->arch.ept_identity_pagetable) 5715 if (kvm->arch.ept_identity_pagetable)
5161 put_page(kvm->arch.ept_identity_pagetable); 5716 put_page(kvm->arch.ept_identity_pagetable);
5717 cleanup_srcu_struct(&kvm->srcu);
5718 kfree(kvm->arch.aliases);
5162 kfree(kvm); 5719 kfree(kvm);
5163} 5720}
5164 5721
5165int kvm_arch_set_memory_region(struct kvm *kvm, 5722int kvm_arch_prepare_memory_region(struct kvm *kvm,
5166 struct kvm_userspace_memory_region *mem, 5723 struct kvm_memory_slot *memslot,
5167 struct kvm_memory_slot old, 5724 struct kvm_memory_slot old,
5725 struct kvm_userspace_memory_region *mem,
5168 int user_alloc) 5726 int user_alloc)
5169{ 5727{
5170 int npages = mem->memory_size >> PAGE_SHIFT; 5728 int npages = memslot->npages;
5171 struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
5172 5729
5173 /*To keep backward compatibility with older userspace, 5730 /*To keep backward compatibility with older userspace,
5174 *x86 needs to hanlde !user_alloc case. 5731 *x86 needs to hanlde !user_alloc case.
@@ -5188,26 +5745,35 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
5188 if (IS_ERR((void *)userspace_addr)) 5745 if (IS_ERR((void *)userspace_addr))
5189 return PTR_ERR((void *)userspace_addr); 5746 return PTR_ERR((void *)userspace_addr);
5190 5747
5191 /* set userspace_addr atomically for kvm_hva_to_rmapp */
5192 spin_lock(&kvm->mmu_lock);
5193 memslot->userspace_addr = userspace_addr; 5748 memslot->userspace_addr = userspace_addr;
5194 spin_unlock(&kvm->mmu_lock);
5195 } else {
5196 if (!old.user_alloc && old.rmap) {
5197 int ret;
5198
5199 down_write(&current->mm->mmap_sem);
5200 ret = do_munmap(current->mm, old.userspace_addr,
5201 old.npages * PAGE_SIZE);
5202 up_write(&current->mm->mmap_sem);
5203 if (ret < 0)
5204 printk(KERN_WARNING
5205 "kvm_vm_ioctl_set_memory_region: "
5206 "failed to munmap memory\n");
5207 }
5208 } 5749 }
5209 } 5750 }
5210 5751
5752
5753 return 0;
5754}
5755
5756void kvm_arch_commit_memory_region(struct kvm *kvm,
5757 struct kvm_userspace_memory_region *mem,
5758 struct kvm_memory_slot old,
5759 int user_alloc)
5760{
5761
5762 int npages = mem->memory_size >> PAGE_SHIFT;
5763
5764 if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
5765 int ret;
5766
5767 down_write(&current->mm->mmap_sem);
5768 ret = do_munmap(current->mm, old.userspace_addr,
5769 old.npages * PAGE_SIZE);
5770 up_write(&current->mm->mmap_sem);
5771 if (ret < 0)
5772 printk(KERN_WARNING
5773 "kvm_vm_ioctl_set_memory_region: "
5774 "failed to munmap memory\n");
5775 }
5776
5211 spin_lock(&kvm->mmu_lock); 5777 spin_lock(&kvm->mmu_lock);
5212 if (!kvm->arch.n_requested_mmu_pages) { 5778 if (!kvm->arch.n_requested_mmu_pages) {
5213 unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); 5779 unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
@@ -5216,8 +5782,6 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
5216 5782
5217 kvm_mmu_slot_remove_write_access(kvm, mem->slot); 5783 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
5218 spin_unlock(&kvm->mmu_lock); 5784 spin_unlock(&kvm->mmu_lock);
5219
5220 return 0;
5221} 5785}
5222 5786
5223void kvm_arch_flush_shadow(struct kvm *kvm) 5787void kvm_arch_flush_shadow(struct kvm *kvm)