diff options
author | Alex Deucher <alexander.deucher@amd.com> | 2014-07-30 11:49:56 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2014-08-05 08:53:51 -0400 |
commit | 0aea5e4aa299c465afafc77883ea2c19475036b1 (patch) | |
tree | 43afd9cf7bc0f0bd284f3e0089d4c546b9512fd7 | |
parent | c265f24d5ca3bf2877e857b93b0246098767e6a9 (diff) |
drm/radeon: use an intervall tree to manage the VMA v2
Scales much better than scanning the address range linearly.
v2: store pfn instead of address
Signed-off-by: Christian König <christian.koenig@amd.com>
Tested-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r-- | drivers/gpu/drm/Kconfig | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon.h | 7 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_gem.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_trace.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_vm.c | 97 |
5 files changed, 52 insertions, 61 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 31894c8c1773..b066bb3ca01a 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig | |||
@@ -114,6 +114,7 @@ config DRM_RADEON | |||
114 | select POWER_SUPPLY | 114 | select POWER_SUPPLY |
115 | select HWMON | 115 | select HWMON |
116 | select BACKLIGHT_CLASS_DEVICE | 116 | select BACKLIGHT_CLASS_DEVICE |
117 | select INTERVAL_TREE | ||
117 | help | 118 | help |
118 | Choose this option if you have an ATI Radeon graphics card. There | 119 | Choose this option if you have an ATI Radeon graphics card. There |
119 | are both PCI and AGP versions. You don't need to choose this to | 120 | are both PCI and AGP versions. You don't need to choose this to |
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 31dda41394d8..56fc7d2da149 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h | |||
@@ -64,6 +64,7 @@ | |||
64 | #include <linux/wait.h> | 64 | #include <linux/wait.h> |
65 | #include <linux/list.h> | 65 | #include <linux/list.h> |
66 | #include <linux/kref.h> | 66 | #include <linux/kref.h> |
67 | #include <linux/interval_tree.h> | ||
67 | 68 | ||
68 | #include <ttm/ttm_bo_api.h> | 69 | #include <ttm/ttm_bo_api.h> |
69 | #include <ttm/ttm_bo_driver.h> | 70 | #include <ttm/ttm_bo_driver.h> |
@@ -447,14 +448,12 @@ struct radeon_mman { | |||
447 | struct radeon_bo_va { | 448 | struct radeon_bo_va { |
448 | /* protected by bo being reserved */ | 449 | /* protected by bo being reserved */ |
449 | struct list_head bo_list; | 450 | struct list_head bo_list; |
450 | uint64_t soffset; | ||
451 | uint64_t eoffset; | ||
452 | uint32_t flags; | 451 | uint32_t flags; |
453 | uint64_t addr; | 452 | uint64_t addr; |
454 | unsigned ref_count; | 453 | unsigned ref_count; |
455 | 454 | ||
456 | /* protected by vm mutex */ | 455 | /* protected by vm mutex */ |
457 | struct list_head vm_list; | 456 | struct interval_tree_node it; |
458 | struct list_head vm_status; | 457 | struct list_head vm_status; |
459 | 458 | ||
460 | /* constant after initialization */ | 459 | /* constant after initialization */ |
@@ -877,7 +876,7 @@ struct radeon_vm_pt { | |||
877 | }; | 876 | }; |
878 | 877 | ||
879 | struct radeon_vm { | 878 | struct radeon_vm { |
880 | struct list_head va; | 879 | struct rb_root va; |
881 | unsigned id; | 880 | unsigned id; |
882 | 881 | ||
883 | /* BOs moved, but not yet updated in the PT */ | 882 | /* BOs moved, but not yet updated in the PT */ |
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 99e4e0cd72a6..bfd7e1b0ff3f 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c | |||
@@ -496,9 +496,9 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data, | |||
496 | 496 | ||
497 | switch (args->operation) { | 497 | switch (args->operation) { |
498 | case RADEON_VA_MAP: | 498 | case RADEON_VA_MAP: |
499 | if (bo_va->soffset) { | 499 | if (bo_va->it.start) { |
500 | args->operation = RADEON_VA_RESULT_VA_EXIST; | 500 | args->operation = RADEON_VA_RESULT_VA_EXIST; |
501 | args->offset = bo_va->soffset; | 501 | args->offset = bo_va->it.start * RADEON_GPU_PAGE_SIZE; |
502 | goto out; | 502 | goto out; |
503 | } | 503 | } |
504 | r = radeon_vm_bo_set_addr(rdev, bo_va, args->offset, args->flags); | 504 | r = radeon_vm_bo_set_addr(rdev, bo_va, args->offset, args->flags); |
diff --git a/drivers/gpu/drm/radeon/radeon_trace.h b/drivers/gpu/drm/radeon/radeon_trace.h index cd781f34bd8d..9db74a96ef61 100644 --- a/drivers/gpu/drm/radeon/radeon_trace.h +++ b/drivers/gpu/drm/radeon/radeon_trace.h | |||
@@ -72,8 +72,8 @@ TRACE_EVENT(radeon_vm_bo_update, | |||
72 | ), | 72 | ), |
73 | 73 | ||
74 | TP_fast_assign( | 74 | TP_fast_assign( |
75 | __entry->soffset = bo_va->soffset; | 75 | __entry->soffset = bo_va->it.start; |
76 | __entry->eoffset = bo_va->eoffset; | 76 | __entry->eoffset = bo_va->it.last + 1; |
77 | __entry->flags = bo_va->flags; | 77 | __entry->flags = bo_va->flags; |
78 | ), | 78 | ), |
79 | TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x", | 79 | TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x", |
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 906c8ae867ac..39bc5c2b02d1 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c | |||
@@ -326,17 +326,15 @@ struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, | |||
326 | } | 326 | } |
327 | bo_va->vm = vm; | 327 | bo_va->vm = vm; |
328 | bo_va->bo = bo; | 328 | bo_va->bo = bo; |
329 | bo_va->soffset = 0; | 329 | bo_va->it.start = 0; |
330 | bo_va->eoffset = 0; | 330 | bo_va->it.last = 0; |
331 | bo_va->flags = 0; | 331 | bo_va->flags = 0; |
332 | bo_va->addr = 0; | 332 | bo_va->addr = 0; |
333 | bo_va->ref_count = 1; | 333 | bo_va->ref_count = 1; |
334 | INIT_LIST_HEAD(&bo_va->bo_list); | 334 | INIT_LIST_HEAD(&bo_va->bo_list); |
335 | INIT_LIST_HEAD(&bo_va->vm_list); | ||
336 | INIT_LIST_HEAD(&bo_va->vm_status); | 335 | INIT_LIST_HEAD(&bo_va->vm_status); |
337 | 336 | ||
338 | mutex_lock(&vm->mutex); | 337 | mutex_lock(&vm->mutex); |
339 | list_add(&bo_va->vm_list, &vm->va); | ||
340 | list_add_tail(&bo_va->bo_list, &bo->va); | 338 | list_add_tail(&bo_va->bo_list, &bo->va); |
341 | mutex_unlock(&vm->mutex); | 339 | mutex_unlock(&vm->mutex); |
342 | 340 | ||
@@ -420,11 +418,9 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, | |||
420 | uint32_t flags) | 418 | uint32_t flags) |
421 | { | 419 | { |
422 | uint64_t size = radeon_bo_size(bo_va->bo); | 420 | uint64_t size = radeon_bo_size(bo_va->bo); |
423 | uint64_t eoffset, last_offset = 0; | ||
424 | struct radeon_vm *vm = bo_va->vm; | 421 | struct radeon_vm *vm = bo_va->vm; |
425 | struct radeon_bo_va *tmp; | ||
426 | struct list_head *head; | ||
427 | unsigned last_pfn, pt_idx; | 422 | unsigned last_pfn, pt_idx; |
423 | uint64_t eoffset; | ||
428 | int r; | 424 | int r; |
429 | 425 | ||
430 | if (soffset) { | 426 | if (soffset) { |
@@ -446,51 +442,48 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, | |||
446 | } | 442 | } |
447 | 443 | ||
448 | mutex_lock(&vm->mutex); | 444 | mutex_lock(&vm->mutex); |
449 | head = &vm->va; | 445 | if (bo_va->it.start || bo_va->it.last) { |
450 | last_offset = 0; | 446 | if (bo_va->addr) { |
451 | list_for_each_entry(tmp, &vm->va, vm_list) { | 447 | /* add a clone of the bo_va to clear the old address */ |
452 | if (bo_va == tmp) { | 448 | struct radeon_bo_va *tmp; |
453 | /* skip over currently modified bo */ | 449 | tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); |
454 | continue; | 450 | tmp->it.start = bo_va->it.start; |
451 | tmp->it.last = bo_va->it.last; | ||
452 | tmp->vm = vm; | ||
453 | tmp->addr = bo_va->addr; | ||
454 | list_add(&tmp->vm_status, &vm->freed); | ||
455 | } | 455 | } |
456 | 456 | ||
457 | if (soffset >= last_offset && eoffset <= tmp->soffset) { | 457 | interval_tree_remove(&bo_va->it, &vm->va); |
458 | /* bo can be added before this one */ | 458 | bo_va->it.start = 0; |
459 | break; | 459 | bo_va->it.last = 0; |
460 | } | ||
461 | if (eoffset > tmp->soffset && soffset < tmp->eoffset) { | ||
462 | /* bo and tmp overlap, invalid offset */ | ||
463 | dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n", | ||
464 | bo_va->bo, (unsigned)bo_va->soffset, tmp->bo, | ||
465 | (unsigned)tmp->soffset, (unsigned)tmp->eoffset); | ||
466 | mutex_unlock(&vm->mutex); | ||
467 | return -EINVAL; | ||
468 | } | ||
469 | last_offset = tmp->eoffset; | ||
470 | head = &tmp->vm_list; | ||
471 | } | 460 | } |
472 | 461 | ||
473 | if (bo_va->soffset) { | 462 | soffset /= RADEON_GPU_PAGE_SIZE; |
474 | /* add a clone of the bo_va to clear the old address */ | 463 | eoffset /= RADEON_GPU_PAGE_SIZE; |
475 | tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); | 464 | if (soffset || eoffset) { |
476 | if (!tmp) { | 465 | struct interval_tree_node *it; |
466 | it = interval_tree_iter_first(&vm->va, soffset, eoffset - 1); | ||
467 | if (it) { | ||
468 | struct radeon_bo_va *tmp; | ||
469 | tmp = container_of(it, struct radeon_bo_va, it); | ||
470 | /* bo and tmp overlap, invalid offset */ | ||
471 | dev_err(rdev->dev, "bo %p va 0x%010Lx conflict with " | ||
472 | "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo, | ||
473 | soffset, tmp->bo, tmp->it.start, tmp->it.last); | ||
477 | mutex_unlock(&vm->mutex); | 474 | mutex_unlock(&vm->mutex); |
478 | return -ENOMEM; | 475 | return -EINVAL; |
479 | } | 476 | } |
480 | tmp->soffset = bo_va->soffset; | 477 | bo_va->it.start = soffset; |
481 | tmp->eoffset = bo_va->eoffset; | 478 | bo_va->it.last = eoffset - 1; |
482 | tmp->vm = vm; | 479 | interval_tree_insert(&bo_va->it, &vm->va); |
483 | list_add(&tmp->vm_status, &vm->freed); | ||
484 | } | 480 | } |
485 | 481 | ||
486 | bo_va->soffset = soffset; | ||
487 | bo_va->eoffset = eoffset; | ||
488 | bo_va->flags = flags; | 482 | bo_va->flags = flags; |
489 | bo_va->addr = 0; | 483 | bo_va->addr = 0; |
490 | list_move(&bo_va->vm_list, head); | ||
491 | 484 | ||
492 | soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size; | 485 | soffset >>= radeon_vm_block_size; |
493 | eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size; | 486 | eoffset >>= radeon_vm_block_size; |
494 | 487 | ||
495 | BUG_ON(eoffset >= radeon_vm_num_pdes(rdev)); | 488 | BUG_ON(eoffset >= radeon_vm_num_pdes(rdev)); |
496 | 489 | ||
@@ -778,9 +771,6 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, | |||
778 | unsigned count = 0; | 771 | unsigned count = 0; |
779 | uint64_t addr; | 772 | uint64_t addr; |
780 | 773 | ||
781 | start = start / RADEON_GPU_PAGE_SIZE; | ||
782 | end = end / RADEON_GPU_PAGE_SIZE; | ||
783 | |||
784 | /* walk over the address space and update the page tables */ | 774 | /* walk over the address space and update the page tables */ |
785 | for (addr = start; addr < end; ) { | 775 | for (addr = start; addr < end; ) { |
786 | uint64_t pt_idx = addr >> radeon_vm_block_size; | 776 | uint64_t pt_idx = addr >> radeon_vm_block_size; |
@@ -847,7 +837,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev, | |||
847 | uint64_t addr; | 837 | uint64_t addr; |
848 | int r; | 838 | int r; |
849 | 839 | ||
850 | if (!bo_va->soffset) { | 840 | if (!bo_va->it.start) { |
851 | dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", | 841 | dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", |
852 | bo_va->bo, vm); | 842 | bo_va->bo, vm); |
853 | return -EINVAL; | 843 | return -EINVAL; |
@@ -881,7 +871,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev, | |||
881 | 871 | ||
882 | trace_radeon_vm_bo_update(bo_va); | 872 | trace_radeon_vm_bo_update(bo_va); |
883 | 873 | ||
884 | nptes = (bo_va->eoffset - bo_va->soffset) / RADEON_GPU_PAGE_SIZE; | 874 | nptes = bo_va->it.last - bo_va->it.start + 1; |
885 | 875 | ||
886 | /* padding, etc. */ | 876 | /* padding, etc. */ |
887 | ndw = 64; | 877 | ndw = 64; |
@@ -906,8 +896,9 @@ int radeon_vm_bo_update(struct radeon_device *rdev, | |||
906 | return r; | 896 | return r; |
907 | ib.length_dw = 0; | 897 | ib.length_dw = 0; |
908 | 898 | ||
909 | radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, | 899 | radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start, |
910 | addr, radeon_vm_page_flags(bo_va->flags)); | 900 | bo_va->it.last + 1, addr, |
901 | radeon_vm_page_flags(bo_va->flags)); | ||
911 | 902 | ||
912 | radeon_semaphore_sync_to(ib.semaphore, vm->fence); | 903 | radeon_semaphore_sync_to(ib.semaphore, vm->fence); |
913 | r = radeon_ib_schedule(rdev, &ib, NULL); | 904 | r = radeon_ib_schedule(rdev, &ib, NULL); |
@@ -993,7 +984,7 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev, | |||
993 | list_del(&bo_va->bo_list); | 984 | list_del(&bo_va->bo_list); |
994 | 985 | ||
995 | mutex_lock(&vm->mutex); | 986 | mutex_lock(&vm->mutex); |
996 | list_del(&bo_va->vm_list); | 987 | interval_tree_remove(&bo_va->it, &vm->va); |
997 | list_del(&bo_va->vm_status); | 988 | list_del(&bo_va->vm_status); |
998 | 989 | ||
999 | if (bo_va->addr) { | 990 | if (bo_va->addr) { |
@@ -1051,7 +1042,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) | |||
1051 | vm->last_flush = NULL; | 1042 | vm->last_flush = NULL; |
1052 | vm->last_id_use = NULL; | 1043 | vm->last_id_use = NULL; |
1053 | mutex_init(&vm->mutex); | 1044 | mutex_init(&vm->mutex); |
1054 | INIT_LIST_HEAD(&vm->va); | 1045 | vm->va = RB_ROOT; |
1055 | INIT_LIST_HEAD(&vm->invalidated); | 1046 | INIT_LIST_HEAD(&vm->invalidated); |
1056 | INIT_LIST_HEAD(&vm->freed); | 1047 | INIT_LIST_HEAD(&vm->freed); |
1057 | 1048 | ||
@@ -1096,11 +1087,11 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) | |||
1096 | struct radeon_bo_va *bo_va, *tmp; | 1087 | struct radeon_bo_va *bo_va, *tmp; |
1097 | int i, r; | 1088 | int i, r; |
1098 | 1089 | ||
1099 | if (!list_empty(&vm->va)) { | 1090 | if (!RB_EMPTY_ROOT(&vm->va)) { |
1100 | dev_err(rdev->dev, "still active bo inside vm\n"); | 1091 | dev_err(rdev->dev, "still active bo inside vm\n"); |
1101 | } | 1092 | } |
1102 | list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) { | 1093 | rbtree_postorder_for_each_entry_safe(bo_va, tmp, &vm->va, it.rb) { |
1103 | list_del_init(&bo_va->vm_list); | 1094 | interval_tree_remove(&bo_va->it, &vm->va); |
1104 | r = radeon_bo_reserve(bo_va->bo, false); | 1095 | r = radeon_bo_reserve(bo_va->bo, false); |
1105 | if (!r) { | 1096 | if (!r) { |
1106 | list_del_init(&bo_va->bo_list); | 1097 | list_del_init(&bo_va->bo_list); |