aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGleb Natapov <gleb@redhat.com>2010-10-14 05:22:50 -0400
committerAvi Kivity <avi@redhat.com>2011-01-12 04:23:12 -0500
commit344d9588a9df06182684168be4f1408b55c7da3e (patch)
tree16890e3f0f10ac767265e650a1d6d38b78780040
parentca3f10172eea9b95bbb66487656f3c3e93855702 (diff)
KVM: Add PV MSR to enable asynchronous page faults delivery.
Guest enables async PF vcpu functionality using this MSR. Reviewed-by: Rik van Riel <riel@redhat.com> Signed-off-by: Gleb Natapov <gleb@redhat.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
-rw-r--r--Documentation/kvm/cpuid.txt3
-rw-r--r--Documentation/kvm/msr.txt35
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/include/asm/kvm_para.h4
-rw-r--r--arch/x86/kvm/x86.c38
-rw-r--r--include/linux/kvm.h1
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--virt/kvm/async_pf.c20
8 files changed, 101 insertions, 3 deletions
diff --git a/Documentation/kvm/cpuid.txt b/Documentation/kvm/cpuid.txt
index 14a12ea92b7f..882068538c9c 100644
--- a/Documentation/kvm/cpuid.txt
+++ b/Documentation/kvm/cpuid.txt
@@ -36,6 +36,9 @@ KVM_FEATURE_MMU_OP || 2 || deprecated.
36KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs 36KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs
37 || || 0x4b564d00 and 0x4b564d01 37 || || 0x4b564d00 and 0x4b564d01
38------------------------------------------------------------------------------ 38------------------------------------------------------------------------------
39KVM_FEATURE_ASYNC_PF || 4 || async pf can be enabled by
40 || || writing to msr 0x4b564d02
41------------------------------------------------------------------------------
39KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side 42KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side
40 || || per-cpu warps are expected in 43 || || per-cpu warps are expected in
41 || || kvmclock. 44 || || kvmclock.
diff --git a/Documentation/kvm/msr.txt b/Documentation/kvm/msr.txt
index 8ddcfe84c09a..e67b4a8783df 100644
--- a/Documentation/kvm/msr.txt
+++ b/Documentation/kvm/msr.txt
@@ -3,7 +3,6 @@ Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010
3===================================================== 3=====================================================
4 4
5KVM makes use of some custom MSRs to service some requests. 5KVM makes use of some custom MSRs to service some requests.
6At present, this facility is only used by kvmclock.
7 6
8Custom MSRs have a range reserved for them, that goes from 7Custom MSRs have a range reserved for them, that goes from
90x4b564d00 to 0x4b564dff. There are MSRs outside this area, 80x4b564d00 to 0x4b564dff. There are MSRs outside this area,
@@ -151,3 +150,37 @@ MSR_KVM_SYSTEM_TIME: 0x12
151 return PRESENT; 150 return PRESENT;
152 } else 151 } else
153 return NON_PRESENT; 152 return NON_PRESENT;
153
154MSR_KVM_ASYNC_PF_EN: 0x4b564d02
155 data: Bits 63-6 hold 64-byte aligned physical address of a
156 64 byte memory area which must be in guest RAM and must be
157 zeroed. Bits 5-1 are reserved and should be zero. Bit 0 is 1
158 when asynchronous page faults are enabled on the vcpu 0 when
159 disabled.
160
161 First 4 byte of 64 byte memory location will be written to by
162 the hypervisor at the time of asynchronous page fault (APF)
163 injection to indicate type of asynchronous page fault. Value
164 of 1 means that the page referred to by the page fault is not
165 present. Value 2 means that the page is now available. Disabling
166 interrupt inhibits APFs. Guest must not enable interrupt
167 before the reason is read, or it may be overwritten by another
168 APF. Since APF uses the same exception vector as regular page
169 fault guest must reset the reason to 0 before it does
170 something that can generate normal page fault. If during page
171 fault APF reason is 0 it means that this is regular page
172 fault.
173
174 During delivery of type 1 APF cr2 contains a token that will
175 be used to notify a guest when missing page becomes
176 available. When page becomes available type 2 APF is sent with
177 cr2 set to the token associated with the page. There is special
178 kind of token 0xffffffff which tells vcpu that it should wake
179 up all processes waiting for APFs and no individual type 2 APFs
180 will be sent.
181
182 If APF is disabled while there are outstanding APFs, they will
183 not be delivered.
184
185 Currently type 2 APF will be always delivered on the same vcpu as
186 type 1 was, but guest should not rely on that.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c3076bcf5ef7..0d7039804b4c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -419,6 +419,8 @@ struct kvm_vcpu_arch {
419 struct { 419 struct {
420 bool halted; 420 bool halted;
421 gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)]; 421 gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)];
422 struct gfn_to_hva_cache data;
423 u64 msr_val;
422 } apf; 424 } apf;
423}; 425};
424 426
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index e3faaaf4301e..8662ae0a035c 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -20,6 +20,7 @@
20 * are available. The use of 0x11 and 0x12 is deprecated 20 * are available. The use of 0x11 and 0x12 is deprecated
21 */ 21 */
22#define KVM_FEATURE_CLOCKSOURCE2 3 22#define KVM_FEATURE_CLOCKSOURCE2 3
23#define KVM_FEATURE_ASYNC_PF 4
23 24
24/* The last 8 bits are used to indicate how to interpret the flags field 25/* The last 8 bits are used to indicate how to interpret the flags field
25 * in pvclock structure. If no bits are set, all flags are ignored. 26 * in pvclock structure. If no bits are set, all flags are ignored.
@@ -32,9 +33,12 @@
32/* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */ 33/* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */
33#define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00 34#define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00
34#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 35#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
36#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
35 37
36#define KVM_MAX_MMU_OP_BATCH 32 38#define KVM_MAX_MMU_OP_BATCH 32
37 39
40#define KVM_ASYNC_PF_ENABLED (1 << 0)
41
38/* Operations for KVM_HC_MMU_OP */ 42/* Operations for KVM_HC_MMU_OP */
39#define KVM_MMU_OP_WRITE_PTE 1 43#define KVM_MMU_OP_WRITE_PTE 1
40#define KVM_MMU_OP_FLUSH_TLB 2 44#define KVM_MMU_OP_FLUSH_TLB 2
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bd254779d1cc..063c07296764 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -783,12 +783,12 @@ EXPORT_SYMBOL_GPL(kvm_get_dr);
783 * kvm-specific. Those are put in the beginning of the list. 783 * kvm-specific. Those are put in the beginning of the list.
784 */ 784 */
785 785
786#define KVM_SAVE_MSRS_BEGIN 7 786#define KVM_SAVE_MSRS_BEGIN 8
787static u32 msrs_to_save[] = { 787static u32 msrs_to_save[] = {
788 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 788 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
789 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, 789 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
790 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, 790 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
791 HV_X64_MSR_APIC_ASSIST_PAGE, 791 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN,
792 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 792 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
793 MSR_STAR, 793 MSR_STAR,
794#ifdef CONFIG_X86_64 794#ifdef CONFIG_X86_64
@@ -1425,6 +1425,29 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1425 return 0; 1425 return 0;
1426} 1426}
1427 1427
1428static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
1429{
1430 gpa_t gpa = data & ~0x3f;
1431
1432 /* Bits 1:5 are resrved, Should be zero */
1433 if (data & 0x3e)
1434 return 1;
1435
1436 vcpu->arch.apf.msr_val = data;
1437
1438 if (!(data & KVM_ASYNC_PF_ENABLED)) {
1439 kvm_clear_async_pf_completion_queue(vcpu);
1440 kvm_async_pf_hash_reset(vcpu);
1441 return 0;
1442 }
1443
1444 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa))
1445 return 1;
1446
1447 kvm_async_pf_wakeup_all(vcpu);
1448 return 0;
1449}
1450
1428int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1451int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1429{ 1452{
1430 switch (msr) { 1453 switch (msr) {
@@ -1506,6 +1529,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1506 } 1529 }
1507 break; 1530 break;
1508 } 1531 }
1532 case MSR_KVM_ASYNC_PF_EN:
1533 if (kvm_pv_enable_async_pf(vcpu, data))
1534 return 1;
1535 break;
1509 case MSR_IA32_MCG_CTL: 1536 case MSR_IA32_MCG_CTL:
1510 case MSR_IA32_MCG_STATUS: 1537 case MSR_IA32_MCG_STATUS:
1511 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: 1538 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
@@ -1782,6 +1809,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1782 case MSR_KVM_SYSTEM_TIME_NEW: 1809 case MSR_KVM_SYSTEM_TIME_NEW:
1783 data = vcpu->arch.time; 1810 data = vcpu->arch.time;
1784 break; 1811 break;
1812 case MSR_KVM_ASYNC_PF_EN:
1813 data = vcpu->arch.apf.msr_val;
1814 break;
1785 case MSR_IA32_P5_MC_ADDR: 1815 case MSR_IA32_P5_MC_ADDR:
1786 case MSR_IA32_P5_MC_TYPE: 1816 case MSR_IA32_P5_MC_TYPE:
1787 case MSR_IA32_MCG_CAP: 1817 case MSR_IA32_MCG_CAP:
@@ -1929,6 +1959,7 @@ int kvm_dev_ioctl_check_extension(long ext)
1929 case KVM_CAP_DEBUGREGS: 1959 case KVM_CAP_DEBUGREGS:
1930 case KVM_CAP_X86_ROBUST_SINGLESTEP: 1960 case KVM_CAP_X86_ROBUST_SINGLESTEP:
1931 case KVM_CAP_XSAVE: 1961 case KVM_CAP_XSAVE:
1962 case KVM_CAP_ASYNC_PF:
1932 r = 1; 1963 r = 1;
1933 break; 1964 break;
1934 case KVM_CAP_COALESCED_MMIO: 1965 case KVM_CAP_COALESCED_MMIO:
@@ -5792,6 +5823,8 @@ free_vcpu:
5792 5823
5793void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 5824void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
5794{ 5825{
5826 vcpu->arch.apf.msr_val = 0;
5827
5795 vcpu_load(vcpu); 5828 vcpu_load(vcpu);
5796 kvm_mmu_unload(vcpu); 5829 kvm_mmu_unload(vcpu);
5797 vcpu_put(vcpu); 5830 vcpu_put(vcpu);
@@ -5811,6 +5844,7 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
5811 vcpu->arch.dr7 = DR7_FIXED_1; 5844 vcpu->arch.dr7 = DR7_FIXED_1;
5812 5845
5813 kvm_make_request(KVM_REQ_EVENT, vcpu); 5846 kvm_make_request(KVM_REQ_EVENT, vcpu);
5847 vcpu->arch.apf.msr_val = 0;
5814 5848
5815 kvm_clear_async_pf_completion_queue(vcpu); 5849 kvm_clear_async_pf_completion_queue(vcpu);
5816 kvm_async_pf_hash_reset(vcpu); 5850 kvm_async_pf_hash_reset(vcpu);
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 919ae53adc5c..ea2dc1a2e13d 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -540,6 +540,7 @@ struct kvm_ppc_pvinfo {
540#endif 540#endif
541#define KVM_CAP_PPC_GET_PVINFO 57 541#define KVM_CAP_PPC_GET_PVINFO 57
542#define KVM_CAP_PPC_IRQ_LEVEL 58 542#define KVM_CAP_PPC_IRQ_LEVEL 58
543#define KVM_CAP_ASYNC_PF 59
543 544
544#ifdef KVM_CAP_IRQ_ROUTING 545#ifdef KVM_CAP_IRQ_ROUTING
545 546
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e6748204cd56..ee4314e15ead 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -93,6 +93,7 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
93void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu); 93void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu);
94int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, 94int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
95 struct kvm_arch_async_pf *arch); 95 struct kvm_arch_async_pf *arch);
96int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
96#endif 97#endif
97 98
98struct kvm_vcpu { 99struct kvm_vcpu {
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index e97eae965a4c..1f59498561b2 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -190,3 +190,23 @@ retry_sync:
190 kmem_cache_free(async_pf_cache, work); 190 kmem_cache_free(async_pf_cache, work);
191 return 0; 191 return 0;
192} 192}
193
194int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
195{
196 struct kvm_async_pf *work;
197
198 if (!list_empty(&vcpu->async_pf.done))
199 return 0;
200
201 work = kmem_cache_zalloc(async_pf_cache, GFP_ATOMIC);
202 if (!work)
203 return -ENOMEM;
204
205 work->page = bad_page;
206 get_page(bad_page);
207 INIT_LIST_HEAD(&work->queue); /* for list_del to work */
208
209 list_add_tail(&work->link, &vcpu->async_pf.done);
210 vcpu->async_pf.queued++;
211 return 0;
212}