diff options
author | Gleb Natapov <gleb@redhat.com> | 2010-10-14 05:22:50 -0400 |
---|---|---|
committer | Avi Kivity <avi@redhat.com> | 2011-01-12 04:23:12 -0500 |
commit | 344d9588a9df06182684168be4f1408b55c7da3e (patch) | |
tree | 16890e3f0f10ac767265e650a1d6d38b78780040 | |
parent | ca3f10172eea9b95bbb66487656f3c3e93855702 (diff) |
KVM: Add PV MSR to enable asynchronous page faults delivery.
Guest enables async PF vcpu functionality using this MSR.
Reviewed-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
-rw-r--r-- | Documentation/kvm/cpuid.txt | 3 | ||||
-rw-r--r-- | Documentation/kvm/msr.txt | 35 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_para.h | 4 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 38 | ||||
-rw-r--r-- | include/linux/kvm.h | 1 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 1 | ||||
-rw-r--r-- | virt/kvm/async_pf.c | 20 |
8 files changed, 101 insertions, 3 deletions
diff --git a/Documentation/kvm/cpuid.txt b/Documentation/kvm/cpuid.txt index 14a12ea92b7f..882068538c9c 100644 --- a/Documentation/kvm/cpuid.txt +++ b/Documentation/kvm/cpuid.txt | |||
@@ -36,6 +36,9 @@ KVM_FEATURE_MMU_OP || 2 || deprecated. | |||
36 | KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs | 36 | KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs |
37 | || || 0x4b564d00 and 0x4b564d01 | 37 | || || 0x4b564d00 and 0x4b564d01 |
38 | ------------------------------------------------------------------------------ | 38 | ------------------------------------------------------------------------------ |
39 | KVM_FEATURE_ASYNC_PF || 4 || async pf can be enabled by | ||
40 | || || writing to msr 0x4b564d02 | ||
41 | ------------------------------------------------------------------------------ | ||
39 | KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side | 42 | KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side |
40 | || || per-cpu warps are expected in | 43 | || || per-cpu warps are expected in |
41 | || || kvmclock. | 44 | || || kvmclock. |
diff --git a/Documentation/kvm/msr.txt b/Documentation/kvm/msr.txt index 8ddcfe84c09a..e67b4a8783df 100644 --- a/Documentation/kvm/msr.txt +++ b/Documentation/kvm/msr.txt | |||
@@ -3,7 +3,6 @@ Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010 | |||
3 | ===================================================== | 3 | ===================================================== |
4 | 4 | ||
5 | KVM makes use of some custom MSRs to service some requests. | 5 | KVM makes use of some custom MSRs to service some requests. |
6 | At present, this facility is only used by kvmclock. | ||
7 | 6 | ||
8 | Custom MSRs have a range reserved for them, that goes from | 7 | Custom MSRs have a range reserved for them, that goes from |
9 | 0x4b564d00 to 0x4b564dff. There are MSRs outside this area, | 8 | 0x4b564d00 to 0x4b564dff. There are MSRs outside this area, |
@@ -151,3 +150,37 @@ MSR_KVM_SYSTEM_TIME: 0x12 | |||
151 | return PRESENT; | 150 | return PRESENT; |
152 | } else | 151 | } else |
153 | return NON_PRESENT; | 152 | return NON_PRESENT; |
153 | |||
154 | MSR_KVM_ASYNC_PF_EN: 0x4b564d02 | ||
155 | data: Bits 63-6 hold 64-byte aligned physical address of a | ||
156 | 64 byte memory area which must be in guest RAM and must be | ||
157 | zeroed. Bits 5-1 are reserved and should be zero. Bit 0 is 1 | ||
158 | when asynchronous page faults are enabled on the vcpu 0 when | ||
159 | disabled. | ||
160 | |||
161 | First 4 byte of 64 byte memory location will be written to by | ||
162 | the hypervisor at the time of asynchronous page fault (APF) | ||
163 | injection to indicate type of asynchronous page fault. Value | ||
164 | of 1 means that the page referred to by the page fault is not | ||
165 | present. Value 2 means that the page is now available. Disabling | ||
166 | interrupt inhibits APFs. Guest must not enable interrupt | ||
167 | before the reason is read, or it may be overwritten by another | ||
168 | APF. Since APF uses the same exception vector as regular page | ||
169 | fault guest must reset the reason to 0 before it does | ||
170 | something that can generate normal page fault. If during page | ||
171 | fault APF reason is 0 it means that this is regular page | ||
172 | fault. | ||
173 | |||
174 | During delivery of type 1 APF cr2 contains a token that will | ||
175 | be used to notify a guest when missing page becomes | ||
176 | available. When page becomes available type 2 APF is sent with | ||
177 | cr2 set to the token associated with the page. There is special | ||
178 | kind of token 0xffffffff which tells vcpu that it should wake | ||
179 | up all processes waiting for APFs and no individual type 2 APFs | ||
180 | will be sent. | ||
181 | |||
182 | If APF is disabled while there are outstanding APFs, they will | ||
183 | not be delivered. | ||
184 | |||
185 | Currently type 2 APF will be always delivered on the same vcpu as | ||
186 | type 1 was, but guest should not rely on that. | ||
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c3076bcf5ef7..0d7039804b4c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -419,6 +419,8 @@ struct kvm_vcpu_arch { | |||
419 | struct { | 419 | struct { |
420 | bool halted; | 420 | bool halted; |
421 | gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)]; | 421 | gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)]; |
422 | struct gfn_to_hva_cache data; | ||
423 | u64 msr_val; | ||
422 | } apf; | 424 | } apf; |
423 | }; | 425 | }; |
424 | 426 | ||
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index e3faaaf4301e..8662ae0a035c 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
@@ -20,6 +20,7 @@ | |||
20 | * are available. The use of 0x11 and 0x12 is deprecated | 20 | * are available. The use of 0x11 and 0x12 is deprecated |
21 | */ | 21 | */ |
22 | #define KVM_FEATURE_CLOCKSOURCE2 3 | 22 | #define KVM_FEATURE_CLOCKSOURCE2 3 |
23 | #define KVM_FEATURE_ASYNC_PF 4 | ||
23 | 24 | ||
24 | /* The last 8 bits are used to indicate how to interpret the flags field | 25 | /* The last 8 bits are used to indicate how to interpret the flags field |
25 | * in pvclock structure. If no bits are set, all flags are ignored. | 26 | * in pvclock structure. If no bits are set, all flags are ignored. |
@@ -32,9 +33,12 @@ | |||
32 | /* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */ | 33 | /* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */ |
33 | #define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00 | 34 | #define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00 |
34 | #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 | 35 | #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 |
36 | #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 | ||
35 | 37 | ||
36 | #define KVM_MAX_MMU_OP_BATCH 32 | 38 | #define KVM_MAX_MMU_OP_BATCH 32 |
37 | 39 | ||
40 | #define KVM_ASYNC_PF_ENABLED (1 << 0) | ||
41 | |||
38 | /* Operations for KVM_HC_MMU_OP */ | 42 | /* Operations for KVM_HC_MMU_OP */ |
39 | #define KVM_MMU_OP_WRITE_PTE 1 | 43 | #define KVM_MMU_OP_WRITE_PTE 1 |
40 | #define KVM_MMU_OP_FLUSH_TLB 2 | 44 | #define KVM_MMU_OP_FLUSH_TLB 2 |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bd254779d1cc..063c07296764 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -783,12 +783,12 @@ EXPORT_SYMBOL_GPL(kvm_get_dr); | |||
783 | * kvm-specific. Those are put in the beginning of the list. | 783 | * kvm-specific. Those are put in the beginning of the list. |
784 | */ | 784 | */ |
785 | 785 | ||
786 | #define KVM_SAVE_MSRS_BEGIN 7 | 786 | #define KVM_SAVE_MSRS_BEGIN 8 |
787 | static u32 msrs_to_save[] = { | 787 | static u32 msrs_to_save[] = { |
788 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 788 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, |
789 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, | 789 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, |
790 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | 790 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, |
791 | HV_X64_MSR_APIC_ASSIST_PAGE, | 791 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, |
792 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 792 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
793 | MSR_STAR, | 793 | MSR_STAR, |
794 | #ifdef CONFIG_X86_64 | 794 | #ifdef CONFIG_X86_64 |
@@ -1425,6 +1425,29 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1425 | return 0; | 1425 | return 0; |
1426 | } | 1426 | } |
1427 | 1427 | ||
1428 | static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) | ||
1429 | { | ||
1430 | gpa_t gpa = data & ~0x3f; | ||
1431 | |||
1432 | /* Bits 1:5 are resrved, Should be zero */ | ||
1433 | if (data & 0x3e) | ||
1434 | return 1; | ||
1435 | |||
1436 | vcpu->arch.apf.msr_val = data; | ||
1437 | |||
1438 | if (!(data & KVM_ASYNC_PF_ENABLED)) { | ||
1439 | kvm_clear_async_pf_completion_queue(vcpu); | ||
1440 | kvm_async_pf_hash_reset(vcpu); | ||
1441 | return 0; | ||
1442 | } | ||
1443 | |||
1444 | if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa)) | ||
1445 | return 1; | ||
1446 | |||
1447 | kvm_async_pf_wakeup_all(vcpu); | ||
1448 | return 0; | ||
1449 | } | ||
1450 | |||
1428 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 1451 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
1429 | { | 1452 | { |
1430 | switch (msr) { | 1453 | switch (msr) { |
@@ -1506,6 +1529,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1506 | } | 1529 | } |
1507 | break; | 1530 | break; |
1508 | } | 1531 | } |
1532 | case MSR_KVM_ASYNC_PF_EN: | ||
1533 | if (kvm_pv_enable_async_pf(vcpu, data)) | ||
1534 | return 1; | ||
1535 | break; | ||
1509 | case MSR_IA32_MCG_CTL: | 1536 | case MSR_IA32_MCG_CTL: |
1510 | case MSR_IA32_MCG_STATUS: | 1537 | case MSR_IA32_MCG_STATUS: |
1511 | case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: | 1538 | case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: |
@@ -1782,6 +1809,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1782 | case MSR_KVM_SYSTEM_TIME_NEW: | 1809 | case MSR_KVM_SYSTEM_TIME_NEW: |
1783 | data = vcpu->arch.time; | 1810 | data = vcpu->arch.time; |
1784 | break; | 1811 | break; |
1812 | case MSR_KVM_ASYNC_PF_EN: | ||
1813 | data = vcpu->arch.apf.msr_val; | ||
1814 | break; | ||
1785 | case MSR_IA32_P5_MC_ADDR: | 1815 | case MSR_IA32_P5_MC_ADDR: |
1786 | case MSR_IA32_P5_MC_TYPE: | 1816 | case MSR_IA32_P5_MC_TYPE: |
1787 | case MSR_IA32_MCG_CAP: | 1817 | case MSR_IA32_MCG_CAP: |
@@ -1929,6 +1959,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1929 | case KVM_CAP_DEBUGREGS: | 1959 | case KVM_CAP_DEBUGREGS: |
1930 | case KVM_CAP_X86_ROBUST_SINGLESTEP: | 1960 | case KVM_CAP_X86_ROBUST_SINGLESTEP: |
1931 | case KVM_CAP_XSAVE: | 1961 | case KVM_CAP_XSAVE: |
1962 | case KVM_CAP_ASYNC_PF: | ||
1932 | r = 1; | 1963 | r = 1; |
1933 | break; | 1964 | break; |
1934 | case KVM_CAP_COALESCED_MMIO: | 1965 | case KVM_CAP_COALESCED_MMIO: |
@@ -5792,6 +5823,8 @@ free_vcpu: | |||
5792 | 5823 | ||
5793 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | 5824 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) |
5794 | { | 5825 | { |
5826 | vcpu->arch.apf.msr_val = 0; | ||
5827 | |||
5795 | vcpu_load(vcpu); | 5828 | vcpu_load(vcpu); |
5796 | kvm_mmu_unload(vcpu); | 5829 | kvm_mmu_unload(vcpu); |
5797 | vcpu_put(vcpu); | 5830 | vcpu_put(vcpu); |
@@ -5811,6 +5844,7 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) | |||
5811 | vcpu->arch.dr7 = DR7_FIXED_1; | 5844 | vcpu->arch.dr7 = DR7_FIXED_1; |
5812 | 5845 | ||
5813 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 5846 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
5847 | vcpu->arch.apf.msr_val = 0; | ||
5814 | 5848 | ||
5815 | kvm_clear_async_pf_completion_queue(vcpu); | 5849 | kvm_clear_async_pf_completion_queue(vcpu); |
5816 | kvm_async_pf_hash_reset(vcpu); | 5850 | kvm_async_pf_hash_reset(vcpu); |
diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 919ae53adc5c..ea2dc1a2e13d 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h | |||
@@ -540,6 +540,7 @@ struct kvm_ppc_pvinfo { | |||
540 | #endif | 540 | #endif |
541 | #define KVM_CAP_PPC_GET_PVINFO 57 | 541 | #define KVM_CAP_PPC_GET_PVINFO 57 |
542 | #define KVM_CAP_PPC_IRQ_LEVEL 58 | 542 | #define KVM_CAP_PPC_IRQ_LEVEL 58 |
543 | #define KVM_CAP_ASYNC_PF 59 | ||
543 | 544 | ||
544 | #ifdef KVM_CAP_IRQ_ROUTING | 545 | #ifdef KVM_CAP_IRQ_ROUTING |
545 | 546 | ||
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e6748204cd56..ee4314e15ead 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -93,6 +93,7 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); | |||
93 | void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu); | 93 | void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu); |
94 | int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, | 94 | int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, |
95 | struct kvm_arch_async_pf *arch); | 95 | struct kvm_arch_async_pf *arch); |
96 | int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); | ||
96 | #endif | 97 | #endif |
97 | 98 | ||
98 | struct kvm_vcpu { | 99 | struct kvm_vcpu { |
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index e97eae965a4c..1f59498561b2 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c | |||
@@ -190,3 +190,23 @@ retry_sync: | |||
190 | kmem_cache_free(async_pf_cache, work); | 190 | kmem_cache_free(async_pf_cache, work); |
191 | return 0; | 191 | return 0; |
192 | } | 192 | } |
193 | |||
194 | int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu) | ||
195 | { | ||
196 | struct kvm_async_pf *work; | ||
197 | |||
198 | if (!list_empty(&vcpu->async_pf.done)) | ||
199 | return 0; | ||
200 | |||
201 | work = kmem_cache_zalloc(async_pf_cache, GFP_ATOMIC); | ||
202 | if (!work) | ||
203 | return -ENOMEM; | ||
204 | |||
205 | work->page = bad_page; | ||
206 | get_page(bad_page); | ||
207 | INIT_LIST_HEAD(&work->queue); /* for list_del to work */ | ||
208 | |||
209 | list_add_tail(&work->link, &vcpu->async_pf.done); | ||
210 | vcpu->async_pf.queued++; | ||
211 | return 0; | ||
212 | } | ||