aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/mmu.c
diff options
context:
space:
mode:
authorXiao Guangrong <xiaoguangrong@cn.fujitsu.com>2011-07-11 15:32:13 -0400
committerAvi Kivity <avi@redhat.com>2011-07-24 04:50:38 -0400
commitc2a2ac2b563ccc3a69540965b5a994c19e3817d7 (patch)
tree83e3c35da3b9e696f77d08d6ab757bae606dd975 /arch/x86/kvm/mmu.c
parent603e0651cfc8562b103454d7ded71f3ad1eb3a37 (diff)
KVM: MMU: lockless walking shadow page table
Use rcu to protect shadow pages table to be freed, so we can safely walk it, it should run fastly and is needed by mmio page fault Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/x86/kvm/mmu.c')
-rw-r--r--arch/x86/kvm/mmu.c132
1 files changed, 124 insertions, 8 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a22b5fe92167..374530a478a4 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -182,6 +182,12 @@ struct kvm_shadow_walk_iterator {
182 shadow_walk_okay(&(_walker)); \ 182 shadow_walk_okay(&(_walker)); \
183 shadow_walk_next(&(_walker))) 183 shadow_walk_next(&(_walker)))
184 184
185#define for_each_shadow_entry_lockless(_vcpu, _addr, _walker, spte) \
186 for (shadow_walk_init(&(_walker), _vcpu, _addr); \
187 shadow_walk_okay(&(_walker)) && \
188 ({ spte = mmu_spte_get_lockless(_walker.sptep); 1; }); \
189 __shadow_walk_next(&(_walker), spte))
190
185static struct kmem_cache *pte_list_desc_cache; 191static struct kmem_cache *pte_list_desc_cache;
186static struct kmem_cache *mmu_page_header_cache; 192static struct kmem_cache *mmu_page_header_cache;
187static struct percpu_counter kvm_total_used_mmu_pages; 193static struct percpu_counter kvm_total_used_mmu_pages;
@@ -274,6 +280,11 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
274{ 280{
275 return xchg(sptep, spte); 281 return xchg(sptep, spte);
276} 282}
283
284static u64 __get_spte_lockless(u64 *sptep)
285{
286 return ACCESS_ONCE(*sptep);
287}
277#else 288#else
278union split_spte { 289union split_spte {
279 struct { 290 struct {
@@ -283,6 +294,18 @@ union split_spte {
283 u64 spte; 294 u64 spte;
284}; 295};
285 296
297static void count_spte_clear(u64 *sptep, u64 spte)
298{
299 struct kvm_mmu_page *sp = page_header(__pa(sptep));
300
301 if (is_shadow_present_pte(spte))
302 return;
303
304 /* Ensure the spte is completely set before we increase the count */
305 smp_wmb();
306 sp->clear_spte_count++;
307}
308
286static void __set_spte(u64 *sptep, u64 spte) 309static void __set_spte(u64 *sptep, u64 spte)
287{ 310{
288 union split_spte *ssptep, sspte; 311 union split_spte *ssptep, sspte;
@@ -318,6 +341,7 @@ static void __update_clear_spte_fast(u64 *sptep, u64 spte)
318 smp_wmb(); 341 smp_wmb();
319 342
320 ssptep->spte_high = sspte.spte_high; 343 ssptep->spte_high = sspte.spte_high;
344 count_spte_clear(sptep, spte);
321} 345}
322 346
323static u64 __update_clear_spte_slow(u64 *sptep, u64 spte) 347static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
@@ -330,9 +354,40 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
330 /* xchg acts as a barrier before the setting of the high bits */ 354 /* xchg acts as a barrier before the setting of the high bits */
331 orig.spte_low = xchg(&ssptep->spte_low, sspte.spte_low); 355 orig.spte_low = xchg(&ssptep->spte_low, sspte.spte_low);
332 orig.spte_high = ssptep->spte_high = sspte.spte_high; 356 orig.spte_high = ssptep->spte_high = sspte.spte_high;
357 count_spte_clear(sptep, spte);
333 358
334 return orig.spte; 359 return orig.spte;
335} 360}
361
362/*
363 * The idea using the light way get the spte on x86_32 guest is from
364 * gup_get_pte(arch/x86/mm/gup.c).
365 * The difference is we can not catch the spte tlb flush if we leave
366 * guest mode, so we emulate it by increase clear_spte_count when spte
367 * is cleared.
368 */
369static u64 __get_spte_lockless(u64 *sptep)
370{
371 struct kvm_mmu_page *sp = page_header(__pa(sptep));
372 union split_spte spte, *orig = (union split_spte *)sptep;
373 int count;
374
375retry:
376 count = sp->clear_spte_count;
377 smp_rmb();
378
379 spte.spte_low = orig->spte_low;
380 smp_rmb();
381
382 spte.spte_high = orig->spte_high;
383 smp_rmb();
384
385 if (unlikely(spte.spte_low != orig->spte_low ||
386 count != sp->clear_spte_count))
387 goto retry;
388
389 return spte.spte;
390}
336#endif 391#endif
337 392
338static bool spte_has_volatile_bits(u64 spte) 393static bool spte_has_volatile_bits(u64 spte)
@@ -435,6 +490,28 @@ static void mmu_spte_clear_no_track(u64 *sptep)
435 __update_clear_spte_fast(sptep, 0ull); 490 __update_clear_spte_fast(sptep, 0ull);
436} 491}
437 492
493static u64 mmu_spte_get_lockless(u64 *sptep)
494{
495 return __get_spte_lockless(sptep);
496}
497
498static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
499{
500 rcu_read_lock();
501 atomic_inc(&vcpu->kvm->arch.reader_counter);
502
503 /* Increase the counter before walking shadow page table */
504 smp_mb__after_atomic_inc();
505}
506
507static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
508{
509 /* Decrease the counter after walking shadow page table finished */
510 smp_mb__before_atomic_dec();
511 atomic_dec(&vcpu->kvm->arch.reader_counter);
512 rcu_read_unlock();
513}
514
438static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, 515static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
439 struct kmem_cache *base_cache, int min) 516 struct kmem_cache *base_cache, int min)
440{ 517{
@@ -1597,17 +1674,23 @@ static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
1597 return true; 1674 return true;
1598} 1675}
1599 1676
1600static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator) 1677static void __shadow_walk_next(struct kvm_shadow_walk_iterator *iterator,
1678 u64 spte)
1601{ 1679{
1602 if (is_last_spte(*iterator->sptep, iterator->level)) { 1680 if (is_last_spte(spte, iterator->level)) {
1603 iterator->level = 0; 1681 iterator->level = 0;
1604 return; 1682 return;
1605 } 1683 }
1606 1684
1607 iterator->shadow_addr = *iterator->sptep & PT64_BASE_ADDR_MASK; 1685 iterator->shadow_addr = spte & PT64_BASE_ADDR_MASK;
1608 --iterator->level; 1686 --iterator->level;
1609} 1687}
1610 1688
1689static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
1690{
1691 return __shadow_walk_next(iterator, *iterator->sptep);
1692}
1693
1611static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp) 1694static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp)
1612{ 1695{
1613 u64 spte; 1696 u64 spte;
@@ -1754,6 +1837,30 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
1754 return ret; 1837 return ret;
1755} 1838}
1756 1839
1840static void kvm_mmu_isolate_pages(struct list_head *invalid_list)
1841{
1842 struct kvm_mmu_page *sp;
1843
1844 list_for_each_entry(sp, invalid_list, link)
1845 kvm_mmu_isolate_page(sp);
1846}
1847
1848static void free_pages_rcu(struct rcu_head *head)
1849{
1850 struct kvm_mmu_page *next, *sp;
1851
1852 sp = container_of(head, struct kvm_mmu_page, rcu);
1853 while (sp) {
1854 if (!list_empty(&sp->link))
1855 next = list_first_entry(&sp->link,
1856 struct kvm_mmu_page, link);
1857 else
1858 next = NULL;
1859 kvm_mmu_free_page(sp);
1860 sp = next;
1861 }
1862}
1863
1757static void kvm_mmu_commit_zap_page(struct kvm *kvm, 1864static void kvm_mmu_commit_zap_page(struct kvm *kvm,
1758 struct list_head *invalid_list) 1865 struct list_head *invalid_list)
1759{ 1866{
@@ -1764,6 +1871,14 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
1764 1871
1765 kvm_flush_remote_tlbs(kvm); 1872 kvm_flush_remote_tlbs(kvm);
1766 1873
1874 if (atomic_read(&kvm->arch.reader_counter)) {
1875 kvm_mmu_isolate_pages(invalid_list);
1876 sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
1877 list_del_init(invalid_list);
1878 call_rcu(&sp->rcu, free_pages_rcu);
1879 return;
1880 }
1881
1767 do { 1882 do {
1768 sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); 1883 sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
1769 WARN_ON(!sp->role.invalid || sp->root_count); 1884 WARN_ON(!sp->role.invalid || sp->root_count);
@@ -3784,16 +3899,17 @@ out:
3784int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) 3899int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4])
3785{ 3900{
3786 struct kvm_shadow_walk_iterator iterator; 3901 struct kvm_shadow_walk_iterator iterator;
3902 u64 spte;
3787 int nr_sptes = 0; 3903 int nr_sptes = 0;
3788 3904
3789 spin_lock(&vcpu->kvm->mmu_lock); 3905 walk_shadow_page_lockless_begin(vcpu);
3790 for_each_shadow_entry(vcpu, addr, iterator) { 3906 for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
3791 sptes[iterator.level-1] = *iterator.sptep; 3907 sptes[iterator.level-1] = spte;
3792 nr_sptes++; 3908 nr_sptes++;
3793 if (!is_shadow_present_pte(*iterator.sptep)) 3909 if (!is_shadow_present_pte(spte))
3794 break; 3910 break;
3795 } 3911 }
3796 spin_unlock(&vcpu->kvm->mmu_lock); 3912 walk_shadow_page_lockless_end(vcpu);
3797 3913
3798 return nr_sptes; 3914 return nr_sptes;
3799} 3915}