diff options
author | Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> | 2013-06-07 04:51:26 -0400 |
---|---|---|
committer | Gleb Natapov <gleb@redhat.com> | 2013-06-27 07:20:36 -0400 |
commit | f8f559422b6c6a05469dfde614b67789b6142cb5 (patch) | |
tree | 6a20c82079581d5af9a9e8d4ca83bc2266e01f21 /arch/x86/kvm/mmu.c | |
parent | b37fbea6cefc3a8ff7b6cfec9867432d1a10046d (diff) |
KVM: MMU: fast invalidate all mmio sptes
This patch tries to introduce a very simple and scale way to invalidate
all mmio sptes - it need not walk any shadow pages and hold mmu-lock
KVM maintains a global mmio valid generation-number which is stored in
kvm->memslots.generation and every mmio spte stores the current global
generation-number into his available bits when it is created
When KVM need zap all mmio sptes, it just simply increase the global
generation-number. When guests do mmio access, KVM intercepts a MMIO #PF
then it walks the shadow page table and get the mmio spte. If the
generation-number on the spte does not equal the global generation-number,
it will go to the normal #PF handler to update the mmio spte
Since 19 bits are used to store generation-number on mmio spte, we zap all
mmio sptes when the number is round
Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Reviewed-by: Gleb Natapov <gleb@redhat.com>
Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'arch/x86/kvm/mmu.c')
-rw-r--r-- | arch/x86/kvm/mmu.c | 54 |
1 files changed, 46 insertions, 8 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 476d155834b9..3e893cd90389 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -205,9 +205,11 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); | |||
205 | #define MMIO_SPTE_GEN_LOW_SHIFT 3 | 205 | #define MMIO_SPTE_GEN_LOW_SHIFT 3 |
206 | #define MMIO_SPTE_GEN_HIGH_SHIFT 52 | 206 | #define MMIO_SPTE_GEN_HIGH_SHIFT 52 |
207 | 207 | ||
208 | #define MMIO_GEN_SHIFT 19 | ||
208 | #define MMIO_GEN_LOW_SHIFT 9 | 209 | #define MMIO_GEN_LOW_SHIFT 9 |
209 | #define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 1) | 210 | #define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 1) |
210 | #define MMIO_MAX_GEN ((1 << 19) - 1) | 211 | #define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1) |
212 | #define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1) | ||
211 | 213 | ||
212 | static u64 generation_mmio_spte_mask(unsigned int gen) | 214 | static u64 generation_mmio_spte_mask(unsigned int gen) |
213 | { | 215 | { |
@@ -231,17 +233,23 @@ static unsigned int get_mmio_spte_generation(u64 spte) | |||
231 | return gen; | 233 | return gen; |
232 | } | 234 | } |
233 | 235 | ||
236 | static unsigned int kvm_current_mmio_generation(struct kvm *kvm) | ||
237 | { | ||
238 | return kvm_memslots(kvm)->generation & MMIO_GEN_MASK; | ||
239 | } | ||
240 | |||
234 | static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn, | 241 | static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn, |
235 | unsigned access) | 242 | unsigned access) |
236 | { | 243 | { |
237 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); | 244 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); |
238 | u64 mask = generation_mmio_spte_mask(0); | 245 | unsigned int gen = kvm_current_mmio_generation(kvm); |
246 | u64 mask = generation_mmio_spte_mask(gen); | ||
239 | 247 | ||
240 | access &= ACC_WRITE_MASK | ACC_USER_MASK; | 248 | access &= ACC_WRITE_MASK | ACC_USER_MASK; |
241 | mask |= shadow_mmio_mask | access | gfn << PAGE_SHIFT; | 249 | mask |= shadow_mmio_mask | access | gfn << PAGE_SHIFT; |
242 | sp->mmio_cached = true; | 250 | sp->mmio_cached = true; |
243 | 251 | ||
244 | trace_mark_mmio_spte(sptep, gfn, access, 0); | 252 | trace_mark_mmio_spte(sptep, gfn, access, gen); |
245 | mmu_spte_set(sptep, mask); | 253 | mmu_spte_set(sptep, mask); |
246 | } | 254 | } |
247 | 255 | ||
@@ -273,6 +281,12 @@ static bool set_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, | |||
273 | return false; | 281 | return false; |
274 | } | 282 | } |
275 | 283 | ||
284 | static bool check_mmio_spte(struct kvm *kvm, u64 spte) | ||
285 | { | ||
286 | return likely(get_mmio_spte_generation(spte) == | ||
287 | kvm_current_mmio_generation(kvm)); | ||
288 | } | ||
289 | |||
276 | static inline u64 rsvd_bits(int s, int e) | 290 | static inline u64 rsvd_bits(int s, int e) |
277 | { | 291 | { |
278 | return ((1ULL << (e - s + 1)) - 1) << s; | 292 | return ((1ULL << (e - s + 1)) - 1) << s; |
@@ -3237,6 +3251,9 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) | |||
3237 | gfn_t gfn = get_mmio_spte_gfn(spte); | 3251 | gfn_t gfn = get_mmio_spte_gfn(spte); |
3238 | unsigned access = get_mmio_spte_access(spte); | 3252 | unsigned access = get_mmio_spte_access(spte); |
3239 | 3253 | ||
3254 | if (!check_mmio_spte(vcpu->kvm, spte)) | ||
3255 | return RET_MMIO_PF_INVALID; | ||
3256 | |||
3240 | if (direct) | 3257 | if (direct) |
3241 | addr = 0; | 3258 | addr = 0; |
3242 | 3259 | ||
@@ -3278,8 +3295,12 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | |||
3278 | 3295 | ||
3279 | pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); | 3296 | pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); |
3280 | 3297 | ||
3281 | if (unlikely(error_code & PFERR_RSVD_MASK)) | 3298 | if (unlikely(error_code & PFERR_RSVD_MASK)) { |
3282 | return handle_mmio_page_fault(vcpu, gva, error_code, true); | 3299 | r = handle_mmio_page_fault(vcpu, gva, error_code, true); |
3300 | |||
3301 | if (likely(r != RET_MMIO_PF_INVALID)) | ||
3302 | return r; | ||
3303 | } | ||
3283 | 3304 | ||
3284 | r = mmu_topup_memory_caches(vcpu); | 3305 | r = mmu_topup_memory_caches(vcpu); |
3285 | if (r) | 3306 | if (r) |
@@ -3355,8 +3376,12 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
3355 | ASSERT(vcpu); | 3376 | ASSERT(vcpu); |
3356 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 3377 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
3357 | 3378 | ||
3358 | if (unlikely(error_code & PFERR_RSVD_MASK)) | 3379 | if (unlikely(error_code & PFERR_RSVD_MASK)) { |
3359 | return handle_mmio_page_fault(vcpu, gpa, error_code, true); | 3380 | r = handle_mmio_page_fault(vcpu, gpa, error_code, true); |
3381 | |||
3382 | if (likely(r != RET_MMIO_PF_INVALID)) | ||
3383 | return r; | ||
3384 | } | ||
3360 | 3385 | ||
3361 | r = mmu_topup_memory_caches(vcpu); | 3386 | r = mmu_topup_memory_caches(vcpu); |
3362 | if (r) | 3387 | if (r) |
@@ -4329,7 +4354,7 @@ void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm) | |||
4329 | spin_unlock(&kvm->mmu_lock); | 4354 | spin_unlock(&kvm->mmu_lock); |
4330 | } | 4355 | } |
4331 | 4356 | ||
4332 | void kvm_mmu_zap_mmio_sptes(struct kvm *kvm) | 4357 | static void kvm_mmu_zap_mmio_sptes(struct kvm *kvm) |
4333 | { | 4358 | { |
4334 | struct kvm_mmu_page *sp, *node; | 4359 | struct kvm_mmu_page *sp, *node; |
4335 | LIST_HEAD(invalid_list); | 4360 | LIST_HEAD(invalid_list); |
@@ -4352,6 +4377,19 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm) | |||
4352 | return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages)); | 4377 | return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages)); |
4353 | } | 4378 | } |
4354 | 4379 | ||
4380 | void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm) | ||
4381 | { | ||
4382 | /* | ||
4383 | * The very rare case: if the generation-number is round, | ||
4384 | * zap all shadow pages. | ||
4385 | * | ||
4386 | * The max value is MMIO_MAX_GEN - 1 since it is not called | ||
4387 | * when mark memslot invalid. | ||
4388 | */ | ||
4389 | if (unlikely(kvm_current_mmio_generation(kvm) >= (MMIO_MAX_GEN - 1))) | ||
4390 | kvm_mmu_zap_mmio_sptes(kvm); | ||
4391 | } | ||
4392 | |||
4355 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | 4393 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) |
4356 | { | 4394 | { |
4357 | struct kvm *kvm; | 4395 | struct kvm *kvm; |