diff options
| author | Alex Deucher <alexander.deucher@amd.com> | 2014-07-30 11:49:56 -0400 |
|---|---|---|
| committer | Alex Deucher <alexander.deucher@amd.com> | 2014-08-05 08:53:51 -0400 |
| commit | 0aea5e4aa299c465afafc77883ea2c19475036b1 (patch) | |
| tree | 43afd9cf7bc0f0bd284f3e0089d4c546b9512fd7 /drivers | |
| parent | c265f24d5ca3bf2877e857b93b0246098767e6a9 (diff) | |
drm/radeon: use an intervall tree to manage the VMA v2
Scales much better than scanning the address range linearly.
v2: store pfn instead of address
Signed-off-by: Christian König <christian.koenig@amd.com>
Tested-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers')
| -rw-r--r-- | drivers/gpu/drm/Kconfig | 1 | ||||
| -rw-r--r-- | drivers/gpu/drm/radeon/radeon.h | 7 | ||||
| -rw-r--r-- | drivers/gpu/drm/radeon/radeon_gem.c | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/radeon/radeon_trace.h | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/radeon/radeon_vm.c | 97 |
5 files changed, 52 insertions, 61 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 31894c8c1773..b066bb3ca01a 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig | |||
| @@ -114,6 +114,7 @@ config DRM_RADEON | |||
| 114 | select POWER_SUPPLY | 114 | select POWER_SUPPLY |
| 115 | select HWMON | 115 | select HWMON |
| 116 | select BACKLIGHT_CLASS_DEVICE | 116 | select BACKLIGHT_CLASS_DEVICE |
| 117 | select INTERVAL_TREE | ||
| 117 | help | 118 | help |
| 118 | Choose this option if you have an ATI Radeon graphics card. There | 119 | Choose this option if you have an ATI Radeon graphics card. There |
| 119 | are both PCI and AGP versions. You don't need to choose this to | 120 | are both PCI and AGP versions. You don't need to choose this to |
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 31dda41394d8..56fc7d2da149 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h | |||
| @@ -64,6 +64,7 @@ | |||
| 64 | #include <linux/wait.h> | 64 | #include <linux/wait.h> |
| 65 | #include <linux/list.h> | 65 | #include <linux/list.h> |
| 66 | #include <linux/kref.h> | 66 | #include <linux/kref.h> |
| 67 | #include <linux/interval_tree.h> | ||
| 67 | 68 | ||
| 68 | #include <ttm/ttm_bo_api.h> | 69 | #include <ttm/ttm_bo_api.h> |
| 69 | #include <ttm/ttm_bo_driver.h> | 70 | #include <ttm/ttm_bo_driver.h> |
| @@ -447,14 +448,12 @@ struct radeon_mman { | |||
| 447 | struct radeon_bo_va { | 448 | struct radeon_bo_va { |
| 448 | /* protected by bo being reserved */ | 449 | /* protected by bo being reserved */ |
| 449 | struct list_head bo_list; | 450 | struct list_head bo_list; |
| 450 | uint64_t soffset; | ||
| 451 | uint64_t eoffset; | ||
| 452 | uint32_t flags; | 451 | uint32_t flags; |
| 453 | uint64_t addr; | 452 | uint64_t addr; |
| 454 | unsigned ref_count; | 453 | unsigned ref_count; |
| 455 | 454 | ||
| 456 | /* protected by vm mutex */ | 455 | /* protected by vm mutex */ |
| 457 | struct list_head vm_list; | 456 | struct interval_tree_node it; |
| 458 | struct list_head vm_status; | 457 | struct list_head vm_status; |
| 459 | 458 | ||
| 460 | /* constant after initialization */ | 459 | /* constant after initialization */ |
| @@ -877,7 +876,7 @@ struct radeon_vm_pt { | |||
| 877 | }; | 876 | }; |
| 878 | 877 | ||
| 879 | struct radeon_vm { | 878 | struct radeon_vm { |
| 880 | struct list_head va; | 879 | struct rb_root va; |
| 881 | unsigned id; | 880 | unsigned id; |
| 882 | 881 | ||
| 883 | /* BOs moved, but not yet updated in the PT */ | 882 | /* BOs moved, but not yet updated in the PT */ |
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 99e4e0cd72a6..bfd7e1b0ff3f 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c | |||
| @@ -496,9 +496,9 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data, | |||
| 496 | 496 | ||
| 497 | switch (args->operation) { | 497 | switch (args->operation) { |
| 498 | case RADEON_VA_MAP: | 498 | case RADEON_VA_MAP: |
| 499 | if (bo_va->soffset) { | 499 | if (bo_va->it.start) { |
| 500 | args->operation = RADEON_VA_RESULT_VA_EXIST; | 500 | args->operation = RADEON_VA_RESULT_VA_EXIST; |
| 501 | args->offset = bo_va->soffset; | 501 | args->offset = bo_va->it.start * RADEON_GPU_PAGE_SIZE; |
| 502 | goto out; | 502 | goto out; |
| 503 | } | 503 | } |
| 504 | r = radeon_vm_bo_set_addr(rdev, bo_va, args->offset, args->flags); | 504 | r = radeon_vm_bo_set_addr(rdev, bo_va, args->offset, args->flags); |
diff --git a/drivers/gpu/drm/radeon/radeon_trace.h b/drivers/gpu/drm/radeon/radeon_trace.h index cd781f34bd8d..9db74a96ef61 100644 --- a/drivers/gpu/drm/radeon/radeon_trace.h +++ b/drivers/gpu/drm/radeon/radeon_trace.h | |||
| @@ -72,8 +72,8 @@ TRACE_EVENT(radeon_vm_bo_update, | |||
| 72 | ), | 72 | ), |
| 73 | 73 | ||
| 74 | TP_fast_assign( | 74 | TP_fast_assign( |
| 75 | __entry->soffset = bo_va->soffset; | 75 | __entry->soffset = bo_va->it.start; |
| 76 | __entry->eoffset = bo_va->eoffset; | 76 | __entry->eoffset = bo_va->it.last + 1; |
| 77 | __entry->flags = bo_va->flags; | 77 | __entry->flags = bo_va->flags; |
| 78 | ), | 78 | ), |
| 79 | TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x", | 79 | TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x", |
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 906c8ae867ac..39bc5c2b02d1 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c | |||
| @@ -326,17 +326,15 @@ struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, | |||
| 326 | } | 326 | } |
| 327 | bo_va->vm = vm; | 327 | bo_va->vm = vm; |
| 328 | bo_va->bo = bo; | 328 | bo_va->bo = bo; |
| 329 | bo_va->soffset = 0; | 329 | bo_va->it.start = 0; |
| 330 | bo_va->eoffset = 0; | 330 | bo_va->it.last = 0; |
| 331 | bo_va->flags = 0; | 331 | bo_va->flags = 0; |
| 332 | bo_va->addr = 0; | 332 | bo_va->addr = 0; |
| 333 | bo_va->ref_count = 1; | 333 | bo_va->ref_count = 1; |
| 334 | INIT_LIST_HEAD(&bo_va->bo_list); | 334 | INIT_LIST_HEAD(&bo_va->bo_list); |
| 335 | INIT_LIST_HEAD(&bo_va->vm_list); | ||
| 336 | INIT_LIST_HEAD(&bo_va->vm_status); | 335 | INIT_LIST_HEAD(&bo_va->vm_status); |
| 337 | 336 | ||
| 338 | mutex_lock(&vm->mutex); | 337 | mutex_lock(&vm->mutex); |
| 339 | list_add(&bo_va->vm_list, &vm->va); | ||
| 340 | list_add_tail(&bo_va->bo_list, &bo->va); | 338 | list_add_tail(&bo_va->bo_list, &bo->va); |
| 341 | mutex_unlock(&vm->mutex); | 339 | mutex_unlock(&vm->mutex); |
| 342 | 340 | ||
| @@ -420,11 +418,9 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, | |||
| 420 | uint32_t flags) | 418 | uint32_t flags) |
| 421 | { | 419 | { |
| 422 | uint64_t size = radeon_bo_size(bo_va->bo); | 420 | uint64_t size = radeon_bo_size(bo_va->bo); |
| 423 | uint64_t eoffset, last_offset = 0; | ||
| 424 | struct radeon_vm *vm = bo_va->vm; | 421 | struct radeon_vm *vm = bo_va->vm; |
| 425 | struct radeon_bo_va *tmp; | ||
| 426 | struct list_head *head; | ||
| 427 | unsigned last_pfn, pt_idx; | 422 | unsigned last_pfn, pt_idx; |
| 423 | uint64_t eoffset; | ||
| 428 | int r; | 424 | int r; |
| 429 | 425 | ||
| 430 | if (soffset) { | 426 | if (soffset) { |
| @@ -446,51 +442,48 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, | |||
| 446 | } | 442 | } |
| 447 | 443 | ||
| 448 | mutex_lock(&vm->mutex); | 444 | mutex_lock(&vm->mutex); |
| 449 | head = &vm->va; | 445 | if (bo_va->it.start || bo_va->it.last) { |
| 450 | last_offset = 0; | 446 | if (bo_va->addr) { |
| 451 | list_for_each_entry(tmp, &vm->va, vm_list) { | 447 | /* add a clone of the bo_va to clear the old address */ |
| 452 | if (bo_va == tmp) { | 448 | struct radeon_bo_va *tmp; |
| 453 | /* skip over currently modified bo */ | 449 | tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); |
| 454 | continue; | 450 | tmp->it.start = bo_va->it.start; |
| 451 | tmp->it.last = bo_va->it.last; | ||
| 452 | tmp->vm = vm; | ||
| 453 | tmp->addr = bo_va->addr; | ||
| 454 | list_add(&tmp->vm_status, &vm->freed); | ||
| 455 | } | 455 | } |
| 456 | 456 | ||
| 457 | if (soffset >= last_offset && eoffset <= tmp->soffset) { | 457 | interval_tree_remove(&bo_va->it, &vm->va); |
| 458 | /* bo can be added before this one */ | 458 | bo_va->it.start = 0; |
| 459 | break; | 459 | bo_va->it.last = 0; |
| 460 | } | ||
| 461 | if (eoffset > tmp->soffset && soffset < tmp->eoffset) { | ||
| 462 | /* bo and tmp overlap, invalid offset */ | ||
| 463 | dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n", | ||
| 464 | bo_va->bo, (unsigned)bo_va->soffset, tmp->bo, | ||
| 465 | (unsigned)tmp->soffset, (unsigned)tmp->eoffset); | ||
| 466 | mutex_unlock(&vm->mutex); | ||
| 467 | return -EINVAL; | ||
| 468 | } | ||
| 469 | last_offset = tmp->eoffset; | ||
| 470 | head = &tmp->vm_list; | ||
| 471 | } | 460 | } |
| 472 | 461 | ||
| 473 | if (bo_va->soffset) { | 462 | soffset /= RADEON_GPU_PAGE_SIZE; |
| 474 | /* add a clone of the bo_va to clear the old address */ | 463 | eoffset /= RADEON_GPU_PAGE_SIZE; |
| 475 | tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); | 464 | if (soffset || eoffset) { |
| 476 | if (!tmp) { | 465 | struct interval_tree_node *it; |
| 466 | it = interval_tree_iter_first(&vm->va, soffset, eoffset - 1); | ||
| 467 | if (it) { | ||
| 468 | struct radeon_bo_va *tmp; | ||
| 469 | tmp = container_of(it, struct radeon_bo_va, it); | ||
| 470 | /* bo and tmp overlap, invalid offset */ | ||
| 471 | dev_err(rdev->dev, "bo %p va 0x%010Lx conflict with " | ||
| 472 | "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo, | ||
| 473 | soffset, tmp->bo, tmp->it.start, tmp->it.last); | ||
| 477 | mutex_unlock(&vm->mutex); | 474 | mutex_unlock(&vm->mutex); |
| 478 | return -ENOMEM; | 475 | return -EINVAL; |
| 479 | } | 476 | } |
| 480 | tmp->soffset = bo_va->soffset; | 477 | bo_va->it.start = soffset; |
| 481 | tmp->eoffset = bo_va->eoffset; | 478 | bo_va->it.last = eoffset - 1; |
| 482 | tmp->vm = vm; | 479 | interval_tree_insert(&bo_va->it, &vm->va); |
| 483 | list_add(&tmp->vm_status, &vm->freed); | ||
| 484 | } | 480 | } |
| 485 | 481 | ||
| 486 | bo_va->soffset = soffset; | ||
| 487 | bo_va->eoffset = eoffset; | ||
| 488 | bo_va->flags = flags; | 482 | bo_va->flags = flags; |
| 489 | bo_va->addr = 0; | 483 | bo_va->addr = 0; |
| 490 | list_move(&bo_va->vm_list, head); | ||
| 491 | 484 | ||
| 492 | soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size; | 485 | soffset >>= radeon_vm_block_size; |
| 493 | eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size; | 486 | eoffset >>= radeon_vm_block_size; |
| 494 | 487 | ||
| 495 | BUG_ON(eoffset >= radeon_vm_num_pdes(rdev)); | 488 | BUG_ON(eoffset >= radeon_vm_num_pdes(rdev)); |
| 496 | 489 | ||
| @@ -778,9 +771,6 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, | |||
| 778 | unsigned count = 0; | 771 | unsigned count = 0; |
| 779 | uint64_t addr; | 772 | uint64_t addr; |
| 780 | 773 | ||
| 781 | start = start / RADEON_GPU_PAGE_SIZE; | ||
| 782 | end = end / RADEON_GPU_PAGE_SIZE; | ||
| 783 | |||
| 784 | /* walk over the address space and update the page tables */ | 774 | /* walk over the address space and update the page tables */ |
| 785 | for (addr = start; addr < end; ) { | 775 | for (addr = start; addr < end; ) { |
| 786 | uint64_t pt_idx = addr >> radeon_vm_block_size; | 776 | uint64_t pt_idx = addr >> radeon_vm_block_size; |
| @@ -847,7 +837,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev, | |||
| 847 | uint64_t addr; | 837 | uint64_t addr; |
| 848 | int r; | 838 | int r; |
| 849 | 839 | ||
| 850 | if (!bo_va->soffset) { | 840 | if (!bo_va->it.start) { |
| 851 | dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", | 841 | dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", |
| 852 | bo_va->bo, vm); | 842 | bo_va->bo, vm); |
| 853 | return -EINVAL; | 843 | return -EINVAL; |
| @@ -881,7 +871,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev, | |||
| 881 | 871 | ||
| 882 | trace_radeon_vm_bo_update(bo_va); | 872 | trace_radeon_vm_bo_update(bo_va); |
| 883 | 873 | ||
| 884 | nptes = (bo_va->eoffset - bo_va->soffset) / RADEON_GPU_PAGE_SIZE; | 874 | nptes = bo_va->it.last - bo_va->it.start + 1; |
| 885 | 875 | ||
| 886 | /* padding, etc. */ | 876 | /* padding, etc. */ |
| 887 | ndw = 64; | 877 | ndw = 64; |
| @@ -906,8 +896,9 @@ int radeon_vm_bo_update(struct radeon_device *rdev, | |||
| 906 | return r; | 896 | return r; |
| 907 | ib.length_dw = 0; | 897 | ib.length_dw = 0; |
| 908 | 898 | ||
| 909 | radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, | 899 | radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start, |
| 910 | addr, radeon_vm_page_flags(bo_va->flags)); | 900 | bo_va->it.last + 1, addr, |
| 901 | radeon_vm_page_flags(bo_va->flags)); | ||
| 911 | 902 | ||
| 912 | radeon_semaphore_sync_to(ib.semaphore, vm->fence); | 903 | radeon_semaphore_sync_to(ib.semaphore, vm->fence); |
| 913 | r = radeon_ib_schedule(rdev, &ib, NULL); | 904 | r = radeon_ib_schedule(rdev, &ib, NULL); |
| @@ -993,7 +984,7 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev, | |||
| 993 | list_del(&bo_va->bo_list); | 984 | list_del(&bo_va->bo_list); |
| 994 | 985 | ||
| 995 | mutex_lock(&vm->mutex); | 986 | mutex_lock(&vm->mutex); |
| 996 | list_del(&bo_va->vm_list); | 987 | interval_tree_remove(&bo_va->it, &vm->va); |
| 997 | list_del(&bo_va->vm_status); | 988 | list_del(&bo_va->vm_status); |
| 998 | 989 | ||
| 999 | if (bo_va->addr) { | 990 | if (bo_va->addr) { |
| @@ -1051,7 +1042,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) | |||
| 1051 | vm->last_flush = NULL; | 1042 | vm->last_flush = NULL; |
| 1052 | vm->last_id_use = NULL; | 1043 | vm->last_id_use = NULL; |
| 1053 | mutex_init(&vm->mutex); | 1044 | mutex_init(&vm->mutex); |
| 1054 | INIT_LIST_HEAD(&vm->va); | 1045 | vm->va = RB_ROOT; |
| 1055 | INIT_LIST_HEAD(&vm->invalidated); | 1046 | INIT_LIST_HEAD(&vm->invalidated); |
| 1056 | INIT_LIST_HEAD(&vm->freed); | 1047 | INIT_LIST_HEAD(&vm->freed); |
| 1057 | 1048 | ||
| @@ -1096,11 +1087,11 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) | |||
| 1096 | struct radeon_bo_va *bo_va, *tmp; | 1087 | struct radeon_bo_va *bo_va, *tmp; |
| 1097 | int i, r; | 1088 | int i, r; |
| 1098 | 1089 | ||
| 1099 | if (!list_empty(&vm->va)) { | 1090 | if (!RB_EMPTY_ROOT(&vm->va)) { |
| 1100 | dev_err(rdev->dev, "still active bo inside vm\n"); | 1091 | dev_err(rdev->dev, "still active bo inside vm\n"); |
| 1101 | } | 1092 | } |
| 1102 | list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) { | 1093 | rbtree_postorder_for_each_entry_safe(bo_va, tmp, &vm->va, it.rb) { |
| 1103 | list_del_init(&bo_va->vm_list); | 1094 | interval_tree_remove(&bo_va->it, &vm->va); |
| 1104 | r = radeon_bo_reserve(bo_va->bo, false); | 1095 | r = radeon_bo_reserve(bo_va->bo, false); |
| 1105 | if (!r) { | 1096 | if (!r) { |
| 1106 | list_del_init(&bo_va->bo_list); | 1097 | list_del_init(&bo_va->bo_list); |
