aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c377
1 files changed, 293 insertions, 84 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 77c9d8673dc..73c6a4268bf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -44,6 +44,7 @@
44#include <linux/perf_event.h> 44#include <linux/perf_event.h>
45#include <linux/uaccess.h> 45#include <linux/uaccess.h>
46#include <linux/hash.h> 46#include <linux/hash.h>
47#include <linux/pci.h>
47#include <trace/events/kvm.h> 48#include <trace/events/kvm.h>
48 49
49#define CREATE_TRACE_POINTS 50#define CREATE_TRACE_POINTS
@@ -347,6 +348,7 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
347 vcpu->arch.cr2 = fault->address; 348 vcpu->arch.cr2 = fault->address;
348 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code); 349 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
349} 350}
351EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
350 352
351void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) 353void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
352{ 354{
@@ -579,6 +581,22 @@ static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
579 return best && (best->ecx & bit(X86_FEATURE_XSAVE)); 581 return best && (best->ecx & bit(X86_FEATURE_XSAVE));
580} 582}
581 583
584static bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu)
585{
586 struct kvm_cpuid_entry2 *best;
587
588 best = kvm_find_cpuid_entry(vcpu, 7, 0);
589 return best && (best->ebx & bit(X86_FEATURE_SMEP));
590}
591
592static bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
593{
594 struct kvm_cpuid_entry2 *best;
595
596 best = kvm_find_cpuid_entry(vcpu, 7, 0);
597 return best && (best->ebx & bit(X86_FEATURE_FSGSBASE));
598}
599
582static void update_cpuid(struct kvm_vcpu *vcpu) 600static void update_cpuid(struct kvm_vcpu *vcpu)
583{ 601{
584 struct kvm_cpuid_entry2 *best; 602 struct kvm_cpuid_entry2 *best;
@@ -598,14 +616,20 @@ static void update_cpuid(struct kvm_vcpu *vcpu)
598int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 616int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
599{ 617{
600 unsigned long old_cr4 = kvm_read_cr4(vcpu); 618 unsigned long old_cr4 = kvm_read_cr4(vcpu);
601 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; 619 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE |
602 620 X86_CR4_PAE | X86_CR4_SMEP;
603 if (cr4 & CR4_RESERVED_BITS) 621 if (cr4 & CR4_RESERVED_BITS)
604 return 1; 622 return 1;
605 623
606 if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE)) 624 if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE))
607 return 1; 625 return 1;
608 626
627 if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP))
628 return 1;
629
630 if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_RDWRGSFS))
631 return 1;
632
609 if (is_long_mode(vcpu)) { 633 if (is_long_mode(vcpu)) {
610 if (!(cr4 & X86_CR4_PAE)) 634 if (!(cr4 & X86_CR4_PAE))
611 return 1; 635 return 1;
@@ -615,11 +639,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
615 kvm_read_cr3(vcpu))) 639 kvm_read_cr3(vcpu)))
616 return 1; 640 return 1;
617 641
618 if (cr4 & X86_CR4_VMXE) 642 if (kvm_x86_ops->set_cr4(vcpu, cr4))
619 return 1; 643 return 1;
620 644
621 kvm_x86_ops->set_cr4(vcpu, cr4);
622
623 if ((cr4 ^ old_cr4) & pdptr_bits) 645 if ((cr4 ^ old_cr4) & pdptr_bits)
624 kvm_mmu_reset_context(vcpu); 646 kvm_mmu_reset_context(vcpu);
625 647
@@ -787,12 +809,12 @@ EXPORT_SYMBOL_GPL(kvm_get_dr);
787 * kvm-specific. Those are put in the beginning of the list. 809 * kvm-specific. Those are put in the beginning of the list.
788 */ 810 */
789 811
790#define KVM_SAVE_MSRS_BEGIN 8 812#define KVM_SAVE_MSRS_BEGIN 9
791static u32 msrs_to_save[] = { 813static u32 msrs_to_save[] = {
792 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 814 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
793 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, 815 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
794 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, 816 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
795 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, 817 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
796 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 818 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
797 MSR_STAR, 819 MSR_STAR,
798#ifdef CONFIG_X86_64 820#ifdef CONFIG_X86_64
@@ -1388,7 +1410,7 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1388 return 1; 1410 return 1;
1389 kvm_x86_ops->patch_hypercall(vcpu, instructions); 1411 kvm_x86_ops->patch_hypercall(vcpu, instructions);
1390 ((unsigned char *)instructions)[3] = 0xc3; /* ret */ 1412 ((unsigned char *)instructions)[3] = 0xc3; /* ret */
1391 if (copy_to_user((void __user *)addr, instructions, 4)) 1413 if (__copy_to_user((void __user *)addr, instructions, 4))
1392 return 1; 1414 return 1;
1393 kvm->arch.hv_hypercall = data; 1415 kvm->arch.hv_hypercall = data;
1394 break; 1416 break;
@@ -1415,7 +1437,7 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1415 HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT); 1437 HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT);
1416 if (kvm_is_error_hva(addr)) 1438 if (kvm_is_error_hva(addr))
1417 return 1; 1439 return 1;
1418 if (clear_user((void __user *)addr, PAGE_SIZE)) 1440 if (__clear_user((void __user *)addr, PAGE_SIZE))
1419 return 1; 1441 return 1;
1420 vcpu->arch.hv_vapic = data; 1442 vcpu->arch.hv_vapic = data;
1421 break; 1443 break;
@@ -1467,6 +1489,35 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu)
1467 } 1489 }
1468} 1490}
1469 1491
1492static void accumulate_steal_time(struct kvm_vcpu *vcpu)
1493{
1494 u64 delta;
1495
1496 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
1497 return;
1498
1499 delta = current->sched_info.run_delay - vcpu->arch.st.last_steal;
1500 vcpu->arch.st.last_steal = current->sched_info.run_delay;
1501 vcpu->arch.st.accum_steal = delta;
1502}
1503
1504static void record_steal_time(struct kvm_vcpu *vcpu)
1505{
1506 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
1507 return;
1508
1509 if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
1510 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
1511 return;
1512
1513 vcpu->arch.st.steal.steal += vcpu->arch.st.accum_steal;
1514 vcpu->arch.st.steal.version += 2;
1515 vcpu->arch.st.accum_steal = 0;
1516
1517 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
1518 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
1519}
1520
1470int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1521int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1471{ 1522{
1472 switch (msr) { 1523 switch (msr) {
@@ -1549,6 +1600,33 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1549 if (kvm_pv_enable_async_pf(vcpu, data)) 1600 if (kvm_pv_enable_async_pf(vcpu, data))
1550 return 1; 1601 return 1;
1551 break; 1602 break;
1603 case MSR_KVM_STEAL_TIME:
1604
1605 if (unlikely(!sched_info_on()))
1606 return 1;
1607
1608 if (data & KVM_STEAL_RESERVED_MASK)
1609 return 1;
1610
1611 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
1612 data & KVM_STEAL_VALID_BITS))
1613 return 1;
1614
1615 vcpu->arch.st.msr_val = data;
1616
1617 if (!(data & KVM_MSR_ENABLED))
1618 break;
1619
1620 vcpu->arch.st.last_steal = current->sched_info.run_delay;
1621
1622 preempt_disable();
1623 accumulate_steal_time(vcpu);
1624 preempt_enable();
1625
1626 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
1627
1628 break;
1629
1552 case MSR_IA32_MCG_CTL: 1630 case MSR_IA32_MCG_CTL:
1553 case MSR_IA32_MCG_STATUS: 1631 case MSR_IA32_MCG_STATUS:
1554 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: 1632 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
@@ -1834,6 +1912,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1834 case MSR_KVM_ASYNC_PF_EN: 1912 case MSR_KVM_ASYNC_PF_EN:
1835 data = vcpu->arch.apf.msr_val; 1913 data = vcpu->arch.apf.msr_val;
1836 break; 1914 break;
1915 case MSR_KVM_STEAL_TIME:
1916 data = vcpu->arch.st.msr_val;
1917 break;
1837 case MSR_IA32_P5_MC_ADDR: 1918 case MSR_IA32_P5_MC_ADDR:
1838 case MSR_IA32_P5_MC_TYPE: 1919 case MSR_IA32_P5_MC_TYPE:
1839 case MSR_IA32_MCG_CAP: 1920 case MSR_IA32_MCG_CAP:
@@ -2015,7 +2096,7 @@ int kvm_dev_ioctl_check_extension(long ext)
2015 r = 0; 2096 r = 0;
2016 break; 2097 break;
2017 case KVM_CAP_IOMMU: 2098 case KVM_CAP_IOMMU:
2018 r = iommu_found(); 2099 r = iommu_present(&pci_bus_type);
2019 break; 2100 break;
2020 case KVM_CAP_MCE: 2101 case KVM_CAP_MCE:
2021 r = KVM_MAX_MCE_BANKS; 2102 r = KVM_MAX_MCE_BANKS;
@@ -2145,6 +2226,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2145 kvm_migrate_timers(vcpu); 2226 kvm_migrate_timers(vcpu);
2146 vcpu->cpu = cpu; 2227 vcpu->cpu = cpu;
2147 } 2228 }
2229
2230 accumulate_steal_time(vcpu);
2231 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
2148} 2232}
2149 2233
2150void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 2234void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -2283,6 +2367,13 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2283 entry->flags = 0; 2367 entry->flags = 0;
2284} 2368}
2285 2369
2370static bool supported_xcr0_bit(unsigned bit)
2371{
2372 u64 mask = ((u64)1 << bit);
2373
2374 return mask & (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) & host_xcr0;
2375}
2376
2286#define F(x) bit(X86_FEATURE_##x) 2377#define F(x) bit(X86_FEATURE_##x)
2287 2378
2288static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, 2379static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
@@ -2328,7 +2419,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2328 0 /* Reserved, DCA */ | F(XMM4_1) | 2419 0 /* Reserved, DCA */ | F(XMM4_1) |
2329 F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | 2420 F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
2330 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | 2421 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
2331 F(F16C); 2422 F(F16C) | F(RDRAND);
2332 /* cpuid 0x80000001.ecx */ 2423 /* cpuid 0x80000001.ecx */
2333 const u32 kvm_supported_word6_x86_features = 2424 const u32 kvm_supported_word6_x86_features =
2334 F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | 2425 F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
@@ -2342,6 +2433,10 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2342 F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | 2433 F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
2343 F(PMM) | F(PMM_EN); 2434 F(PMM) | F(PMM_EN);
2344 2435
2436 /* cpuid 7.0.ebx */
2437 const u32 kvm_supported_word9_x86_features =
2438 F(SMEP) | F(FSGSBASE) | F(ERMS);
2439
2345 /* all calls to cpuid_count() should be made on the same cpu */ 2440 /* all calls to cpuid_count() should be made on the same cpu */
2346 get_cpu(); 2441 get_cpu();
2347 do_cpuid_1_ent(entry, function, index); 2442 do_cpuid_1_ent(entry, function, index);
@@ -2376,7 +2471,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2376 } 2471 }
2377 break; 2472 break;
2378 } 2473 }
2379 /* function 4 and 0xb have additional index. */ 2474 /* function 4 has additional index. */
2380 case 4: { 2475 case 4: {
2381 int i, cache_type; 2476 int i, cache_type;
2382 2477
@@ -2393,6 +2488,22 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2393 } 2488 }
2394 break; 2489 break;
2395 } 2490 }
2491 case 7: {
2492 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2493 /* Mask ebx against host capbability word 9 */
2494 if (index == 0) {
2495 entry->ebx &= kvm_supported_word9_x86_features;
2496 cpuid_mask(&entry->ebx, 9);
2497 } else
2498 entry->ebx = 0;
2499 entry->eax = 0;
2500 entry->ecx = 0;
2501 entry->edx = 0;
2502 break;
2503 }
2504 case 9:
2505 break;
2506 /* function 0xb has additional index. */
2396 case 0xb: { 2507 case 0xb: {
2397 int i, level_type; 2508 int i, level_type;
2398 2509
@@ -2410,16 +2521,17 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2410 break; 2521 break;
2411 } 2522 }
2412 case 0xd: { 2523 case 0xd: {
2413 int i; 2524 int idx, i;
2414 2525
2415 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 2526 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2416 for (i = 1; *nent < maxnent && i < 64; ++i) { 2527 for (idx = 1, i = 1; *nent < maxnent && idx < 64; ++idx) {
2417 if (entry[i].eax == 0) 2528 do_cpuid_1_ent(&entry[i], function, idx);
2529 if (entry[i].eax == 0 || !supported_xcr0_bit(idx))
2418 continue; 2530 continue;
2419 do_cpuid_1_ent(&entry[i], function, i);
2420 entry[i].flags |= 2531 entry[i].flags |=
2421 KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 2532 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2422 ++*nent; 2533 ++*nent;
2534 ++i;
2423 } 2535 }
2424 break; 2536 break;
2425 } 2537 }
@@ -2438,6 +2550,10 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2438 (1 << KVM_FEATURE_CLOCKSOURCE2) | 2550 (1 << KVM_FEATURE_CLOCKSOURCE2) |
2439 (1 << KVM_FEATURE_ASYNC_PF) | 2551 (1 << KVM_FEATURE_ASYNC_PF) |
2440 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); 2552 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
2553
2554 if (sched_info_on())
2555 entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
2556
2441 entry->ebx = 0; 2557 entry->ebx = 0;
2442 entry->ecx = 0; 2558 entry->ecx = 0;
2443 entry->edx = 0; 2559 entry->edx = 0;
@@ -2451,6 +2567,24 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2451 entry->ecx &= kvm_supported_word6_x86_features; 2567 entry->ecx &= kvm_supported_word6_x86_features;
2452 cpuid_mask(&entry->ecx, 6); 2568 cpuid_mask(&entry->ecx, 6);
2453 break; 2569 break;
2570 case 0x80000008: {
2571 unsigned g_phys_as = (entry->eax >> 16) & 0xff;
2572 unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
2573 unsigned phys_as = entry->eax & 0xff;
2574
2575 if (!g_phys_as)
2576 g_phys_as = phys_as;
2577 entry->eax = g_phys_as | (virt_as << 8);
2578 entry->ebx = entry->edx = 0;
2579 break;
2580 }
2581 case 0x80000019:
2582 entry->ecx = entry->edx = 0;
2583 break;
2584 case 0x8000001a:
2585 break;
2586 case 0x8000001d:
2587 break;
2454 /*Add support for Centaur's CPUID instruction*/ 2588 /*Add support for Centaur's CPUID instruction*/
2455 case 0xC0000000: 2589 case 0xC0000000:
2456 /*Just support up to 0xC0000004 now*/ 2590 /*Just support up to 0xC0000004 now*/
@@ -2460,10 +2594,16 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2460 entry->edx &= kvm_supported_word5_x86_features; 2594 entry->edx &= kvm_supported_word5_x86_features;
2461 cpuid_mask(&entry->edx, 5); 2595 cpuid_mask(&entry->edx, 5);
2462 break; 2596 break;
2597 case 3: /* Processor serial number */
2598 case 5: /* MONITOR/MWAIT */
2599 case 6: /* Thermal management */
2600 case 0xA: /* Architectural Performance Monitoring */
2601 case 0x80000007: /* Advanced power management */
2463 case 0xC0000002: 2602 case 0xC0000002:
2464 case 0xC0000003: 2603 case 0xC0000003:
2465 case 0xC0000004: 2604 case 0xC0000004:
2466 /*Now nothing to do, reserved for the future*/ 2605 default:
2606 entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
2467 break; 2607 break;
2468 } 2608 }
2469 2609
@@ -3817,7 +3957,7 @@ static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
3817 exception); 3957 exception);
3818} 3958}
3819 3959
3820static int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, 3960int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
3821 gva_t addr, void *val, unsigned int bytes, 3961 gva_t addr, void *val, unsigned int bytes,
3822 struct x86_exception *exception) 3962 struct x86_exception *exception)
3823{ 3963{
@@ -3827,6 +3967,7 @@ static int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
3827 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, 3967 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
3828 exception); 3968 exception);
3829} 3969}
3970EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
3830 3971
3831static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt, 3972static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
3832 gva_t addr, void *val, unsigned int bytes, 3973 gva_t addr, void *val, unsigned int bytes,
@@ -3836,7 +3977,7 @@ static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
3836 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); 3977 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
3837} 3978}
3838 3979
3839static int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, 3980int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
3840 gva_t addr, void *val, 3981 gva_t addr, void *val,
3841 unsigned int bytes, 3982 unsigned int bytes,
3842 struct x86_exception *exception) 3983 struct x86_exception *exception)
@@ -3868,6 +4009,42 @@ static int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
3868out: 4009out:
3869 return r; 4010 return r;
3870} 4011}
4012EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
4013
4014static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
4015 gpa_t *gpa, struct x86_exception *exception,
4016 bool write)
4017{
4018 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4019
4020 if (vcpu_match_mmio_gva(vcpu, gva) &&
4021 check_write_user_access(vcpu, write, access,
4022 vcpu->arch.access)) {
4023 *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
4024 (gva & (PAGE_SIZE - 1));
4025 trace_vcpu_match_mmio(gva, *gpa, write, false);
4026 return 1;
4027 }
4028
4029 if (write)
4030 access |= PFERR_WRITE_MASK;
4031
4032 *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
4033
4034 if (*gpa == UNMAPPED_GVA)
4035 return -1;
4036
4037 /* For APIC access vmexit */
4038 if ((*gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
4039 return 1;
4040
4041 if (vcpu_match_mmio_gpa(vcpu, *gpa)) {
4042 trace_vcpu_match_mmio(gva, *gpa, write, true);
4043 return 1;
4044 }
4045
4046 return 0;
4047}
3871 4048
3872static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, 4049static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
3873 unsigned long addr, 4050 unsigned long addr,
@@ -3876,8 +4053,8 @@ static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
3876 struct x86_exception *exception) 4053 struct x86_exception *exception)
3877{ 4054{
3878 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); 4055 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3879 gpa_t gpa; 4056 gpa_t gpa;
3880 int handled; 4057 int handled, ret;
3881 4058
3882 if (vcpu->mmio_read_completed) { 4059 if (vcpu->mmio_read_completed) {
3883 memcpy(val, vcpu->mmio_data, bytes); 4060 memcpy(val, vcpu->mmio_data, bytes);
@@ -3887,13 +4064,12 @@ static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
3887 return X86EMUL_CONTINUE; 4064 return X86EMUL_CONTINUE;
3888 } 4065 }
3889 4066
3890 gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, exception); 4067 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, false);
3891 4068
3892 if (gpa == UNMAPPED_GVA) 4069 if (ret < 0)
3893 return X86EMUL_PROPAGATE_FAULT; 4070 return X86EMUL_PROPAGATE_FAULT;
3894 4071
3895 /* For APIC access vmexit */ 4072 if (ret)
3896 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3897 goto mmio; 4073 goto mmio;
3898 4074
3899 if (kvm_read_guest_virt(ctxt, addr, val, bytes, exception) 4075 if (kvm_read_guest_virt(ctxt, addr, val, bytes, exception)
@@ -3944,16 +4120,16 @@ static int emulator_write_emulated_onepage(unsigned long addr,
3944 struct x86_exception *exception, 4120 struct x86_exception *exception,
3945 struct kvm_vcpu *vcpu) 4121 struct kvm_vcpu *vcpu)
3946{ 4122{
3947 gpa_t gpa; 4123 gpa_t gpa;
3948 int handled; 4124 int handled, ret;
3949 4125
3950 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception); 4126 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, true);
3951 4127
3952 if (gpa == UNMAPPED_GVA) 4128 if (ret < 0)
3953 return X86EMUL_PROPAGATE_FAULT; 4129 return X86EMUL_PROPAGATE_FAULT;
3954 4130
3955 /* For APIC access vmexit */ 4131 /* For APIC access vmexit */
3956 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 4132 if (ret)
3957 goto mmio; 4133 goto mmio;
3958 4134
3959 if (emulator_write_phys(vcpu, gpa, val, bytes)) 4135 if (emulator_write_phys(vcpu, gpa, val, bytes))
@@ -4473,9 +4649,24 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu)
4473 kvm_queue_exception(vcpu, ctxt->exception.vector); 4649 kvm_queue_exception(vcpu, ctxt->exception.vector);
4474} 4650}
4475 4651
4652static void init_decode_cache(struct x86_emulate_ctxt *ctxt,
4653 const unsigned long *regs)
4654{
4655 memset(&ctxt->twobyte, 0,
4656 (void *)&ctxt->regs - (void *)&ctxt->twobyte);
4657 memcpy(ctxt->regs, regs, sizeof(ctxt->regs));
4658
4659 ctxt->fetch.start = 0;
4660 ctxt->fetch.end = 0;
4661 ctxt->io_read.pos = 0;
4662 ctxt->io_read.end = 0;
4663 ctxt->mem_read.pos = 0;
4664 ctxt->mem_read.end = 0;
4665}
4666
4476static void init_emulate_ctxt(struct kvm_vcpu *vcpu) 4667static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
4477{ 4668{
4478 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; 4669 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4479 int cs_db, cs_l; 4670 int cs_db, cs_l;
4480 4671
4481 /* 4672 /*
@@ -4488,40 +4679,38 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
4488 4679
4489 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 4680 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
4490 4681
4491 vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); 4682 ctxt->eflags = kvm_get_rflags(vcpu);
4492 vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); 4683 ctxt->eip = kvm_rip_read(vcpu);
4493 vcpu->arch.emulate_ctxt.mode = 4684 ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
4494 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : 4685 (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
4495 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) 4686 cs_l ? X86EMUL_MODE_PROT64 :
4496 ? X86EMUL_MODE_VM86 : cs_l 4687 cs_db ? X86EMUL_MODE_PROT32 :
4497 ? X86EMUL_MODE_PROT64 : cs_db 4688 X86EMUL_MODE_PROT16;
4498 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 4689 ctxt->guest_mode = is_guest_mode(vcpu);
4499 vcpu->arch.emulate_ctxt.guest_mode = is_guest_mode(vcpu); 4690
4500 memset(c, 0, sizeof(struct decode_cache)); 4691 init_decode_cache(ctxt, vcpu->arch.regs);
4501 memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
4502 vcpu->arch.emulate_regs_need_sync_from_vcpu = false; 4692 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
4503} 4693}
4504 4694
4505int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) 4695int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
4506{ 4696{
4507 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; 4697 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4508 int ret; 4698 int ret;
4509 4699
4510 init_emulate_ctxt(vcpu); 4700 init_emulate_ctxt(vcpu);
4511 4701
4512 vcpu->arch.emulate_ctxt.decode.op_bytes = 2; 4702 ctxt->op_bytes = 2;
4513 vcpu->arch.emulate_ctxt.decode.ad_bytes = 2; 4703 ctxt->ad_bytes = 2;
4514 vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip + 4704 ctxt->_eip = ctxt->eip + inc_eip;
4515 inc_eip; 4705 ret = emulate_int_real(ctxt, irq);
4516 ret = emulate_int_real(&vcpu->arch.emulate_ctxt, &emulate_ops, irq);
4517 4706
4518 if (ret != X86EMUL_CONTINUE) 4707 if (ret != X86EMUL_CONTINUE)
4519 return EMULATE_FAIL; 4708 return EMULATE_FAIL;
4520 4709
4521 vcpu->arch.emulate_ctxt.eip = c->eip; 4710 ctxt->eip = ctxt->_eip;
4522 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); 4711 memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
4523 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); 4712 kvm_rip_write(vcpu, ctxt->eip);
4524 kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); 4713 kvm_set_rflags(vcpu, ctxt->eflags);
4525 4714
4526 if (irq == NMI_VECTOR) 4715 if (irq == NMI_VECTOR)
4527 vcpu->arch.nmi_pending = false; 4716 vcpu->arch.nmi_pending = false;
@@ -4582,21 +4771,21 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4582 int insn_len) 4771 int insn_len)
4583{ 4772{
4584 int r; 4773 int r;
4585 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; 4774 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4586 bool writeback = true; 4775 bool writeback = true;
4587 4776
4588 kvm_clear_exception_queue(vcpu); 4777 kvm_clear_exception_queue(vcpu);
4589 4778
4590 if (!(emulation_type & EMULTYPE_NO_DECODE)) { 4779 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
4591 init_emulate_ctxt(vcpu); 4780 init_emulate_ctxt(vcpu);
4592 vcpu->arch.emulate_ctxt.interruptibility = 0; 4781 ctxt->interruptibility = 0;
4593 vcpu->arch.emulate_ctxt.have_exception = false; 4782 ctxt->have_exception = false;
4594 vcpu->arch.emulate_ctxt.perm_ok = false; 4783 ctxt->perm_ok = false;
4595 4784
4596 vcpu->arch.emulate_ctxt.only_vendor_specific_insn 4785 ctxt->only_vendor_specific_insn
4597 = emulation_type & EMULTYPE_TRAP_UD; 4786 = emulation_type & EMULTYPE_TRAP_UD;
4598 4787
4599 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, insn, insn_len); 4788 r = x86_decode_insn(ctxt, insn, insn_len);
4600 4789
4601 trace_kvm_emulate_insn_start(vcpu); 4790 trace_kvm_emulate_insn_start(vcpu);
4602 ++vcpu->stat.insn_emulation; 4791 ++vcpu->stat.insn_emulation;
@@ -4612,7 +4801,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4612 } 4801 }
4613 4802
4614 if (emulation_type & EMULTYPE_SKIP) { 4803 if (emulation_type & EMULTYPE_SKIP) {
4615 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.decode.eip); 4804 kvm_rip_write(vcpu, ctxt->_eip);
4616 return EMULATE_DONE; 4805 return EMULATE_DONE;
4617 } 4806 }
4618 4807
@@ -4620,11 +4809,11 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4620 changes registers values during IO operation */ 4809 changes registers values during IO operation */
4621 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { 4810 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
4622 vcpu->arch.emulate_regs_need_sync_from_vcpu = false; 4811 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
4623 memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); 4812 memcpy(ctxt->regs, vcpu->arch.regs, sizeof ctxt->regs);
4624 } 4813 }
4625 4814
4626restart: 4815restart:
4627 r = x86_emulate_insn(&vcpu->arch.emulate_ctxt); 4816 r = x86_emulate_insn(ctxt);
4628 4817
4629 if (r == EMULATION_INTERCEPTED) 4818 if (r == EMULATION_INTERCEPTED)
4630 return EMULATE_DONE; 4819 return EMULATE_DONE;
@@ -4636,7 +4825,7 @@ restart:
4636 return handle_emulation_failure(vcpu); 4825 return handle_emulation_failure(vcpu);
4637 } 4826 }
4638 4827
4639 if (vcpu->arch.emulate_ctxt.have_exception) { 4828 if (ctxt->have_exception) {
4640 inject_emulated_exception(vcpu); 4829 inject_emulated_exception(vcpu);
4641 r = EMULATE_DONE; 4830 r = EMULATE_DONE;
4642 } else if (vcpu->arch.pio.count) { 4831 } else if (vcpu->arch.pio.count) {
@@ -4655,13 +4844,12 @@ restart:
4655 r = EMULATE_DONE; 4844 r = EMULATE_DONE;
4656 4845
4657 if (writeback) { 4846 if (writeback) {
4658 toggle_interruptibility(vcpu, 4847 toggle_interruptibility(vcpu, ctxt->interruptibility);
4659 vcpu->arch.emulate_ctxt.interruptibility); 4848 kvm_set_rflags(vcpu, ctxt->eflags);
4660 kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
4661 kvm_make_request(KVM_REQ_EVENT, vcpu); 4849 kvm_make_request(KVM_REQ_EVENT, vcpu);
4662 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); 4850 memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
4663 vcpu->arch.emulate_regs_need_sync_to_vcpu = false; 4851 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
4664 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); 4852 kvm_rip_write(vcpu, ctxt->eip);
4665 } else 4853 } else
4666 vcpu->arch.emulate_regs_need_sync_to_vcpu = true; 4854 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
4667 4855
@@ -4878,6 +5066,30 @@ void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
4878} 5066}
4879EXPORT_SYMBOL_GPL(kvm_after_handle_nmi); 5067EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
4880 5068
5069static void kvm_set_mmio_spte_mask(void)
5070{
5071 u64 mask;
5072 int maxphyaddr = boot_cpu_data.x86_phys_bits;
5073
5074 /*
5075 * Set the reserved bits and the present bit of an paging-structure
5076 * entry to generate page fault with PFER.RSV = 1.
5077 */
5078 mask = ((1ull << (62 - maxphyaddr + 1)) - 1) << maxphyaddr;
5079 mask |= 1ull;
5080
5081#ifdef CONFIG_X86_64
5082 /*
5083 * If reserved bit is not supported, clear the present bit to disable
5084 * mmio page fault.
5085 */
5086 if (maxphyaddr == 52)
5087 mask &= ~1ull;
5088#endif
5089
5090 kvm_mmu_set_mmio_spte_mask(mask);
5091}
5092
4881int kvm_arch_init(void *opaque) 5093int kvm_arch_init(void *opaque)
4882{ 5094{
4883 int r; 5095 int r;
@@ -4904,10 +5116,10 @@ int kvm_arch_init(void *opaque)
4904 if (r) 5116 if (r)
4905 goto out; 5117 goto out;
4906 5118
5119 kvm_set_mmio_spte_mask();
4907 kvm_init_msr_list(); 5120 kvm_init_msr_list();
4908 5121
4909 kvm_x86_ops = ops; 5122 kvm_x86_ops = ops;
4910 kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
4911 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, 5123 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
4912 PT_DIRTY_MASK, PT64_NX_MASK, 0); 5124 PT_DIRTY_MASK, PT64_NX_MASK, 0);
4913 5125
@@ -5082,8 +5294,7 @@ int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
5082 5294
5083 kvm_x86_ops->patch_hypercall(vcpu, instruction); 5295 kvm_x86_ops->patch_hypercall(vcpu, instruction);
5084 5296
5085 return emulator_write_emulated(&vcpu->arch.emulate_ctxt, 5297 return emulator_write_emulated(ctxt, rip, instruction, 3, NULL);
5086 rip, instruction, 3, NULL);
5087} 5298}
5088 5299
5089static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) 5300static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
@@ -5384,6 +5595,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5384 r = 1; 5595 r = 1;
5385 goto out; 5596 goto out;
5386 } 5597 }
5598 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
5599 record_steal_time(vcpu);
5600
5387 } 5601 }
5388 5602
5389 r = kvm_mmu_reload(vcpu); 5603 r = kvm_mmu_reload(vcpu);
@@ -5671,8 +5885,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
5671 * that usually, but some bad designed PV devices (vmware 5885 * that usually, but some bad designed PV devices (vmware
5672 * backdoor interface) need this to work 5886 * backdoor interface) need this to work
5673 */ 5887 */
5674 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; 5888 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5675 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); 5889 memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
5676 vcpu->arch.emulate_regs_need_sync_to_vcpu = false; 5890 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
5677 } 5891 }
5678 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); 5892 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
@@ -5801,21 +6015,20 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
5801int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, 6015int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
5802 bool has_error_code, u32 error_code) 6016 bool has_error_code, u32 error_code)
5803{ 6017{
5804 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; 6018 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5805 int ret; 6019 int ret;
5806 6020
5807 init_emulate_ctxt(vcpu); 6021 init_emulate_ctxt(vcpu);
5808 6022
5809 ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, 6023 ret = emulator_task_switch(ctxt, tss_selector, reason,
5810 tss_selector, reason, has_error_code, 6024 has_error_code, error_code);
5811 error_code);
5812 6025
5813 if (ret) 6026 if (ret)
5814 return EMULATE_FAIL; 6027 return EMULATE_FAIL;
5815 6028
5816 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); 6029 memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
5817 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); 6030 kvm_rip_write(vcpu, ctxt->eip);
5818 kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); 6031 kvm_set_rflags(vcpu, ctxt->eflags);
5819 kvm_make_request(KVM_REQ_EVENT, vcpu); 6032 kvm_make_request(KVM_REQ_EVENT, vcpu);
5820 return EMULATE_DONE; 6033 return EMULATE_DONE;
5821} 6034}
@@ -6093,12 +6306,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
6093 if (r == 0) 6306 if (r == 0)
6094 r = kvm_mmu_setup(vcpu); 6307 r = kvm_mmu_setup(vcpu);
6095 vcpu_put(vcpu); 6308 vcpu_put(vcpu);
6096 if (r < 0)
6097 goto free_vcpu;
6098 6309
6099 return 0;
6100free_vcpu:
6101 kvm_x86_ops->vcpu_free(vcpu);
6102 return r; 6310 return r;
6103} 6311}
6104 6312
@@ -6126,6 +6334,7 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
6126 6334
6127 kvm_make_request(KVM_REQ_EVENT, vcpu); 6335 kvm_make_request(KVM_REQ_EVENT, vcpu);
6128 vcpu->arch.apf.msr_val = 0; 6336 vcpu->arch.apf.msr_val = 0;
6337 vcpu->arch.st.msr_val = 0;
6129 6338
6130 kvmclock_reset(vcpu); 6339 kvmclock_reset(vcpu);
6131 6340