aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChangbin Du <changbin.du@intel.com>2018-01-30 06:19:54 -0500
committerZhenyu Wang <zhenyuw@linux.intel.com>2018-03-06 00:19:21 -0500
commitb6c126a39345f7286bb25135efd9154419127427 (patch)
treee36ba0c57197cdbf65da730c5b84e066e19b5d50
parente502a2af4c358d14ecf8fce51bf4988ebb4d10b4 (diff)
drm/i915/gvt: Manage shadow pages with radix tree
We don't know how many page tables will be shadowed. It varies considerably corresponding to guest load. Radix tree is a better choice for us. Since Page Frame Number is used as key so most of the bits are common. Here is some performance data (duration in us) of looking up a element: Before: (aka. ppgtt_find_shadow_page) 0.308 0.292 0.246 0.432 0.143 ... 0.311 0.225 0.382 0.199 0.325 After: (aka. intel_vgpu_find_spt_by_mfn) 0.106 0.106 0.107 0.106 0.105 0.107 ... 0.107 0.109 0.105 0.108 This time I didn't get the early data of hash table. The data is measured when desktop is shown. As last change, the overall benchmark almost is not changed, but we get better scalability. Signed-off-by: Changbin Du <changbin.du@intel.com> Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
-rw-r--r--drivers/gpu/drm/i915/gvt/gtt.c50
-rw-r--r--drivers/gpu/drm/i915/gvt/gtt.h4
2 files changed, 27 insertions, 27 deletions
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 13eb0572afdb..d204532022bf 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -640,8 +640,8 @@ static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt)
640 640
641 dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096, 641 dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096,
642 PCI_DMA_BIDIRECTIONAL); 642 PCI_DMA_BIDIRECTIONAL);
643 if (!hlist_unhashed(&spt->node)) 643
644 hash_del(&spt->node); 644 radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn);
645 645
646 if (spt->guest_page.oos_page) 646 if (spt->guest_page.oos_page)
647 detach_oos_page(spt->vgpu, spt->guest_page.oos_page); 647 detach_oos_page(spt->vgpu, spt->guest_page.oos_page);
@@ -654,12 +654,14 @@ static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt)
654 654
655static void ppgtt_free_all_spt(struct intel_vgpu *vgpu) 655static void ppgtt_free_all_spt(struct intel_vgpu *vgpu)
656{ 656{
657 struct hlist_node *n;
658 struct intel_vgpu_ppgtt_spt *spt; 657 struct intel_vgpu_ppgtt_spt *spt;
659 int i; 658 struct radix_tree_iter iter;
659 void **slot;
660 660
661 hash_for_each_safe(vgpu->gtt.spt_hash_table, i, n, spt, node) 661 radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) {
662 spt = radix_tree_deref_slot(slot);
662 ppgtt_free_spt(spt); 663 ppgtt_free_spt(spt);
664 }
663} 665}
664 666
665static int ppgtt_handle_guest_write_page_table_bytes( 667static int ppgtt_handle_guest_write_page_table_bytes(
@@ -697,16 +699,10 @@ static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn(
697} 699}
698 700
699/* Find the spt by shadow page mfn. */ 701/* Find the spt by shadow page mfn. */
700static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn( 702static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn(
701 struct intel_vgpu *vgpu, unsigned long mfn) 703 struct intel_vgpu *vgpu, unsigned long mfn)
702{ 704{
703 struct intel_vgpu_ppgtt_spt *spt; 705 return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn);
704
705 hash_for_each_possible(vgpu->gtt.spt_hash_table, spt, node, mfn) {
706 if (spt->shadow_page.mfn == mfn)
707 return spt;
708 }
709 return NULL;
710} 706}
711 707
712static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt); 708static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
@@ -741,8 +737,8 @@ retry:
741 0, 4096, PCI_DMA_BIDIRECTIONAL); 737 0, 4096, PCI_DMA_BIDIRECTIONAL);
742 if (dma_mapping_error(kdev, daddr)) { 738 if (dma_mapping_error(kdev, daddr)) {
743 gvt_vgpu_err("fail to map dma addr\n"); 739 gvt_vgpu_err("fail to map dma addr\n");
744 free_spt(spt); 740 ret = -EINVAL;
745 return ERR_PTR(-EINVAL); 741 goto err_free_spt;
746 } 742 }
747 spt->shadow_page.vaddr = page_address(spt->shadow_page.page); 743 spt->shadow_page.vaddr = page_address(spt->shadow_page.page);
748 spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT; 744 spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT;
@@ -755,17 +751,23 @@ retry:
755 751
756 ret = intel_vgpu_register_page_track(vgpu, spt->guest_page.gfn, 752 ret = intel_vgpu_register_page_track(vgpu, spt->guest_page.gfn,
757 ppgtt_write_protection_handler, spt); 753 ppgtt_write_protection_handler, spt);
758 if (ret) { 754 if (ret)
759 free_spt(spt); 755 goto err_unmap_dma;
760 dma_unmap_page(kdev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
761 return ERR_PTR(ret);
762 }
763 756
764 INIT_HLIST_NODE(&spt->node); 757 ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt);
765 hash_add(vgpu->gtt.spt_hash_table, &spt->node, spt->shadow_page.mfn); 758 if (ret)
759 goto err_unreg_page_track;
766 760
767 trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn); 761 trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn);
768 return spt; 762 return spt;
763
764err_unreg_page_track:
765 intel_vgpu_unregister_page_track(vgpu, spt->guest_page.gfn);
766err_unmap_dma:
767 dma_unmap_page(kdev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
768err_free_spt:
769 free_spt(spt);
770 return ERR_PTR(ret);
769} 771}
770 772
771#define pt_entry_size_shift(spt) \ 773#define pt_entry_size_shift(spt) \
@@ -1994,7 +1996,7 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
1994{ 1996{
1995 struct intel_vgpu_gtt *gtt = &vgpu->gtt; 1997 struct intel_vgpu_gtt *gtt = &vgpu->gtt;
1996 1998
1997 hash_init(gtt->spt_hash_table); 1999 INIT_RADIX_TREE(&gtt->spt_tree, GFP_KERNEL);
1998 2000
1999 INIT_LIST_HEAD(&gtt->ppgtt_mm_list_head); 2001 INIT_LIST_HEAD(&gtt->ppgtt_mm_list_head);
2000 INIT_LIST_HEAD(&gtt->oos_page_list_head); 2002 INIT_LIST_HEAD(&gtt->oos_page_list_head);
@@ -2024,7 +2026,7 @@ static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)
2024 if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head))) 2026 if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head)))
2025 gvt_err("vgpu ppgtt mm is not fully destoried\n"); 2027 gvt_err("vgpu ppgtt mm is not fully destoried\n");
2026 2028
2027 if (GEM_WARN_ON(!hlist_empty(vgpu->gtt.spt_hash_table))) { 2029 if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) {
2028 gvt_err("Why we still has spt not freed?\n"); 2030 gvt_err("Why we still has spt not freed?\n");
2029 ppgtt_free_all_spt(vgpu); 2031 ppgtt_free_all_spt(vgpu);
2030 } 2032 }
diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h
index 695ab3bd4a69..e831507e17c3 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.h
+++ b/drivers/gpu/drm/i915/gvt/gtt.h
@@ -39,7 +39,6 @@
39 39
40struct intel_vgpu_mm; 40struct intel_vgpu_mm;
41 41
42#define INTEL_GVT_GTT_HASH_BITS 8
43#define INTEL_GVT_INVALID_ADDR (~0UL) 42#define INTEL_GVT_INVALID_ADDR (~0UL)
44 43
45struct intel_gvt_gtt_entry { 44struct intel_gvt_gtt_entry {
@@ -186,7 +185,7 @@ struct intel_vgpu_gtt {
186 struct intel_vgpu_mm *ggtt_mm; 185 struct intel_vgpu_mm *ggtt_mm;
187 unsigned long active_ppgtt_mm_bitmap; 186 unsigned long active_ppgtt_mm_bitmap;
188 struct list_head ppgtt_mm_list_head; 187 struct list_head ppgtt_mm_list_head;
189 DECLARE_HASHTABLE(spt_hash_table, INTEL_GVT_GTT_HASH_BITS); 188 struct radix_tree_root spt_tree;
190 struct list_head oos_page_list_head; 189 struct list_head oos_page_list_head;
191 struct list_head post_shadow_list_head; 190 struct list_head post_shadow_list_head;
192 struct intel_vgpu_scratch_pt scratch_pt[GTT_TYPE_MAX]; 191 struct intel_vgpu_scratch_pt scratch_pt[GTT_TYPE_MAX];
@@ -217,7 +216,6 @@ struct intel_vgpu_oos_page {
217struct intel_vgpu_ppgtt_spt { 216struct intel_vgpu_ppgtt_spt {
218 atomic_t refcount; 217 atomic_t refcount;
219 struct intel_vgpu *vgpu; 218 struct intel_vgpu *vgpu;
220 struct hlist_node node;
221 219
222 struct { 220 struct {
223 intel_gvt_gtt_type_t type; 221 intel_gvt_gtt_type_t type;