aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Deucher <alexander.deucher@amd.com>2014-07-30 11:49:56 -0400
committerAlex Deucher <alexander.deucher@amd.com>2014-08-05 08:53:51 -0400
commit0aea5e4aa299c465afafc77883ea2c19475036b1 (patch)
tree43afd9cf7bc0f0bd284f3e0089d4c546b9512fd7
parentc265f24d5ca3bf2877e857b93b0246098767e6a9 (diff)
drm/radeon: use an intervall tree to manage the VMA v2
Scales much better than scanning the address range linearly. v2: store pfn instead of address Signed-off-by: Christian König <christian.koenig@amd.com> Tested-by: Michel Dänzer <michel.daenzer@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/Kconfig1
-rw-r--r--drivers/gpu/drm/radeon/radeon.h7
-rw-r--r--drivers/gpu/drm/radeon/radeon_gem.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_trace.h4
-rw-r--r--drivers/gpu/drm/radeon/radeon_vm.c97
5 files changed, 52 insertions, 61 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 31894c8c1773..b066bb3ca01a 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -114,6 +114,7 @@ config DRM_RADEON
114 select POWER_SUPPLY 114 select POWER_SUPPLY
115 select HWMON 115 select HWMON
116 select BACKLIGHT_CLASS_DEVICE 116 select BACKLIGHT_CLASS_DEVICE
117 select INTERVAL_TREE
117 help 118 help
118 Choose this option if you have an ATI Radeon graphics card. There 119 Choose this option if you have an ATI Radeon graphics card. There
119 are both PCI and AGP versions. You don't need to choose this to 120 are both PCI and AGP versions. You don't need to choose this to
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 31dda41394d8..56fc7d2da149 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -64,6 +64,7 @@
64#include <linux/wait.h> 64#include <linux/wait.h>
65#include <linux/list.h> 65#include <linux/list.h>
66#include <linux/kref.h> 66#include <linux/kref.h>
67#include <linux/interval_tree.h>
67 68
68#include <ttm/ttm_bo_api.h> 69#include <ttm/ttm_bo_api.h>
69#include <ttm/ttm_bo_driver.h> 70#include <ttm/ttm_bo_driver.h>
@@ -447,14 +448,12 @@ struct radeon_mman {
447struct radeon_bo_va { 448struct radeon_bo_va {
448 /* protected by bo being reserved */ 449 /* protected by bo being reserved */
449 struct list_head bo_list; 450 struct list_head bo_list;
450 uint64_t soffset;
451 uint64_t eoffset;
452 uint32_t flags; 451 uint32_t flags;
453 uint64_t addr; 452 uint64_t addr;
454 unsigned ref_count; 453 unsigned ref_count;
455 454
456 /* protected by vm mutex */ 455 /* protected by vm mutex */
457 struct list_head vm_list; 456 struct interval_tree_node it;
458 struct list_head vm_status; 457 struct list_head vm_status;
459 458
460 /* constant after initialization */ 459 /* constant after initialization */
@@ -877,7 +876,7 @@ struct radeon_vm_pt {
877}; 876};
878 877
879struct radeon_vm { 878struct radeon_vm {
880 struct list_head va; 879 struct rb_root va;
881 unsigned id; 880 unsigned id;
882 881
883 /* BOs moved, but not yet updated in the PT */ 882 /* BOs moved, but not yet updated in the PT */
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 99e4e0cd72a6..bfd7e1b0ff3f 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -496,9 +496,9 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
496 496
497 switch (args->operation) { 497 switch (args->operation) {
498 case RADEON_VA_MAP: 498 case RADEON_VA_MAP:
499 if (bo_va->soffset) { 499 if (bo_va->it.start) {
500 args->operation = RADEON_VA_RESULT_VA_EXIST; 500 args->operation = RADEON_VA_RESULT_VA_EXIST;
501 args->offset = bo_va->soffset; 501 args->offset = bo_va->it.start * RADEON_GPU_PAGE_SIZE;
502 goto out; 502 goto out;
503 } 503 }
504 r = radeon_vm_bo_set_addr(rdev, bo_va, args->offset, args->flags); 504 r = radeon_vm_bo_set_addr(rdev, bo_va, args->offset, args->flags);
diff --git a/drivers/gpu/drm/radeon/radeon_trace.h b/drivers/gpu/drm/radeon/radeon_trace.h
index cd781f34bd8d..9db74a96ef61 100644
--- a/drivers/gpu/drm/radeon/radeon_trace.h
+++ b/drivers/gpu/drm/radeon/radeon_trace.h
@@ -72,8 +72,8 @@ TRACE_EVENT(radeon_vm_bo_update,
72 ), 72 ),
73 73
74 TP_fast_assign( 74 TP_fast_assign(
75 __entry->soffset = bo_va->soffset; 75 __entry->soffset = bo_va->it.start;
76 __entry->eoffset = bo_va->eoffset; 76 __entry->eoffset = bo_va->it.last + 1;
77 __entry->flags = bo_va->flags; 77 __entry->flags = bo_va->flags;
78 ), 78 ),
79 TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x", 79 TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x",
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index 906c8ae867ac..39bc5c2b02d1 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -326,17 +326,15 @@ struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
326 } 326 }
327 bo_va->vm = vm; 327 bo_va->vm = vm;
328 bo_va->bo = bo; 328 bo_va->bo = bo;
329 bo_va->soffset = 0; 329 bo_va->it.start = 0;
330 bo_va->eoffset = 0; 330 bo_va->it.last = 0;
331 bo_va->flags = 0; 331 bo_va->flags = 0;
332 bo_va->addr = 0; 332 bo_va->addr = 0;
333 bo_va->ref_count = 1; 333 bo_va->ref_count = 1;
334 INIT_LIST_HEAD(&bo_va->bo_list); 334 INIT_LIST_HEAD(&bo_va->bo_list);
335 INIT_LIST_HEAD(&bo_va->vm_list);
336 INIT_LIST_HEAD(&bo_va->vm_status); 335 INIT_LIST_HEAD(&bo_va->vm_status);
337 336
338 mutex_lock(&vm->mutex); 337 mutex_lock(&vm->mutex);
339 list_add(&bo_va->vm_list, &vm->va);
340 list_add_tail(&bo_va->bo_list, &bo->va); 338 list_add_tail(&bo_va->bo_list, &bo->va);
341 mutex_unlock(&vm->mutex); 339 mutex_unlock(&vm->mutex);
342 340
@@ -420,11 +418,9 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
420 uint32_t flags) 418 uint32_t flags)
421{ 419{
422 uint64_t size = radeon_bo_size(bo_va->bo); 420 uint64_t size = radeon_bo_size(bo_va->bo);
423 uint64_t eoffset, last_offset = 0;
424 struct radeon_vm *vm = bo_va->vm; 421 struct radeon_vm *vm = bo_va->vm;
425 struct radeon_bo_va *tmp;
426 struct list_head *head;
427 unsigned last_pfn, pt_idx; 422 unsigned last_pfn, pt_idx;
423 uint64_t eoffset;
428 int r; 424 int r;
429 425
430 if (soffset) { 426 if (soffset) {
@@ -446,51 +442,48 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
446 } 442 }
447 443
448 mutex_lock(&vm->mutex); 444 mutex_lock(&vm->mutex);
449 head = &vm->va; 445 if (bo_va->it.start || bo_va->it.last) {
450 last_offset = 0; 446 if (bo_va->addr) {
451 list_for_each_entry(tmp, &vm->va, vm_list) { 447 /* add a clone of the bo_va to clear the old address */
452 if (bo_va == tmp) { 448 struct radeon_bo_va *tmp;
453 /* skip over currently modified bo */ 449 tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
454 continue; 450 tmp->it.start = bo_va->it.start;
451 tmp->it.last = bo_va->it.last;
452 tmp->vm = vm;
453 tmp->addr = bo_va->addr;
454 list_add(&tmp->vm_status, &vm->freed);
455 } 455 }
456 456
457 if (soffset >= last_offset && eoffset <= tmp->soffset) { 457 interval_tree_remove(&bo_va->it, &vm->va);
458 /* bo can be added before this one */ 458 bo_va->it.start = 0;
459 break; 459 bo_va->it.last = 0;
460 }
461 if (eoffset > tmp->soffset && soffset < tmp->eoffset) {
462 /* bo and tmp overlap, invalid offset */
463 dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n",
464 bo_va->bo, (unsigned)bo_va->soffset, tmp->bo,
465 (unsigned)tmp->soffset, (unsigned)tmp->eoffset);
466 mutex_unlock(&vm->mutex);
467 return -EINVAL;
468 }
469 last_offset = tmp->eoffset;
470 head = &tmp->vm_list;
471 } 460 }
472 461
473 if (bo_va->soffset) { 462 soffset /= RADEON_GPU_PAGE_SIZE;
474 /* add a clone of the bo_va to clear the old address */ 463 eoffset /= RADEON_GPU_PAGE_SIZE;
475 tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); 464 if (soffset || eoffset) {
476 if (!tmp) { 465 struct interval_tree_node *it;
466 it = interval_tree_iter_first(&vm->va, soffset, eoffset - 1);
467 if (it) {
468 struct radeon_bo_va *tmp;
469 tmp = container_of(it, struct radeon_bo_va, it);
470 /* bo and tmp overlap, invalid offset */
471 dev_err(rdev->dev, "bo %p va 0x%010Lx conflict with "
472 "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo,
473 soffset, tmp->bo, tmp->it.start, tmp->it.last);
477 mutex_unlock(&vm->mutex); 474 mutex_unlock(&vm->mutex);
478 return -ENOMEM; 475 return -EINVAL;
479 } 476 }
480 tmp->soffset = bo_va->soffset; 477 bo_va->it.start = soffset;
481 tmp->eoffset = bo_va->eoffset; 478 bo_va->it.last = eoffset - 1;
482 tmp->vm = vm; 479 interval_tree_insert(&bo_va->it, &vm->va);
483 list_add(&tmp->vm_status, &vm->freed);
484 } 480 }
485 481
486 bo_va->soffset = soffset;
487 bo_va->eoffset = eoffset;
488 bo_va->flags = flags; 482 bo_va->flags = flags;
489 bo_va->addr = 0; 483 bo_va->addr = 0;
490 list_move(&bo_va->vm_list, head);
491 484
492 soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size; 485 soffset >>= radeon_vm_block_size;
493 eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size; 486 eoffset >>= radeon_vm_block_size;
494 487
495 BUG_ON(eoffset >= radeon_vm_num_pdes(rdev)); 488 BUG_ON(eoffset >= radeon_vm_num_pdes(rdev));
496 489
@@ -778,9 +771,6 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
778 unsigned count = 0; 771 unsigned count = 0;
779 uint64_t addr; 772 uint64_t addr;
780 773
781 start = start / RADEON_GPU_PAGE_SIZE;
782 end = end / RADEON_GPU_PAGE_SIZE;
783
784 /* walk over the address space and update the page tables */ 774 /* walk over the address space and update the page tables */
785 for (addr = start; addr < end; ) { 775 for (addr = start; addr < end; ) {
786 uint64_t pt_idx = addr >> radeon_vm_block_size; 776 uint64_t pt_idx = addr >> radeon_vm_block_size;
@@ -847,7 +837,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
847 uint64_t addr; 837 uint64_t addr;
848 int r; 838 int r;
849 839
850 if (!bo_va->soffset) { 840 if (!bo_va->it.start) {
851 dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", 841 dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n",
852 bo_va->bo, vm); 842 bo_va->bo, vm);
853 return -EINVAL; 843 return -EINVAL;
@@ -881,7 +871,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
881 871
882 trace_radeon_vm_bo_update(bo_va); 872 trace_radeon_vm_bo_update(bo_va);
883 873
884 nptes = (bo_va->eoffset - bo_va->soffset) / RADEON_GPU_PAGE_SIZE; 874 nptes = bo_va->it.last - bo_va->it.start + 1;
885 875
886 /* padding, etc. */ 876 /* padding, etc. */
887 ndw = 64; 877 ndw = 64;
@@ -906,8 +896,9 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
906 return r; 896 return r;
907 ib.length_dw = 0; 897 ib.length_dw = 0;
908 898
909 radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, 899 radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start,
910 addr, radeon_vm_page_flags(bo_va->flags)); 900 bo_va->it.last + 1, addr,
901 radeon_vm_page_flags(bo_va->flags));
911 902
912 radeon_semaphore_sync_to(ib.semaphore, vm->fence); 903 radeon_semaphore_sync_to(ib.semaphore, vm->fence);
913 r = radeon_ib_schedule(rdev, &ib, NULL); 904 r = radeon_ib_schedule(rdev, &ib, NULL);
@@ -993,7 +984,7 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev,
993 list_del(&bo_va->bo_list); 984 list_del(&bo_va->bo_list);
994 985
995 mutex_lock(&vm->mutex); 986 mutex_lock(&vm->mutex);
996 list_del(&bo_va->vm_list); 987 interval_tree_remove(&bo_va->it, &vm->va);
997 list_del(&bo_va->vm_status); 988 list_del(&bo_va->vm_status);
998 989
999 if (bo_va->addr) { 990 if (bo_va->addr) {
@@ -1051,7 +1042,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
1051 vm->last_flush = NULL; 1042 vm->last_flush = NULL;
1052 vm->last_id_use = NULL; 1043 vm->last_id_use = NULL;
1053 mutex_init(&vm->mutex); 1044 mutex_init(&vm->mutex);
1054 INIT_LIST_HEAD(&vm->va); 1045 vm->va = RB_ROOT;
1055 INIT_LIST_HEAD(&vm->invalidated); 1046 INIT_LIST_HEAD(&vm->invalidated);
1056 INIT_LIST_HEAD(&vm->freed); 1047 INIT_LIST_HEAD(&vm->freed);
1057 1048
@@ -1096,11 +1087,11 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
1096 struct radeon_bo_va *bo_va, *tmp; 1087 struct radeon_bo_va *bo_va, *tmp;
1097 int i, r; 1088 int i, r;
1098 1089
1099 if (!list_empty(&vm->va)) { 1090 if (!RB_EMPTY_ROOT(&vm->va)) {
1100 dev_err(rdev->dev, "still active bo inside vm\n"); 1091 dev_err(rdev->dev, "still active bo inside vm\n");
1101 } 1092 }
1102 list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) { 1093 rbtree_postorder_for_each_entry_safe(bo_va, tmp, &vm->va, it.rb) {
1103 list_del_init(&bo_va->vm_list); 1094 interval_tree_remove(&bo_va->it, &vm->va);
1104 r = radeon_bo_reserve(bo_va->bo, false); 1095 r = radeon_bo_reserve(bo_va->bo, false);
1105 if (!r) { 1096 if (!r) {
1106 list_del_init(&bo_va->bo_list); 1097 list_del_init(&bo_va->bo_list);