diff options
author | Mark Brown <broonie@kernel.org> | 2015-10-12 13:09:27 -0400 |
---|---|---|
committer | Mark Brown <broonie@kernel.org> | 2015-10-12 13:09:27 -0400 |
commit | 79828b4fa835f73cdaf4bffa48696abdcbea9d02 (patch) | |
tree | 5e0fa7156acb75ba603022bc807df8f2fedb97a8 /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |
parent | 721b51fcf91898299d96f4b72cb9434cda29dce6 (diff) | |
parent | 8c1a9d6323abf0fb1e5dad96cf3f1c783505ea5a (diff) |
Merge remote-tracking branch 'asoc/fix/rt5645' into asoc-fix-rt5645
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 305 |
1 files changed, 196 insertions, 109 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9a4e3b63f1cb..f68b7cdc370a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |||
@@ -127,16 +127,16 @@ struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev, | |||
127 | /** | 127 | /** |
128 | * amdgpu_vm_grab_id - allocate the next free VMID | 128 | * amdgpu_vm_grab_id - allocate the next free VMID |
129 | * | 129 | * |
130 | * @ring: ring we want to submit job to | ||
131 | * @vm: vm to allocate id for | 130 | * @vm: vm to allocate id for |
131 | * @ring: ring we want to submit job to | ||
132 | * @sync: sync object where we add dependencies | ||
132 | * | 133 | * |
133 | * Allocate an id for the vm (cayman+). | 134 | * Allocate an id for the vm, adding fences to the sync obj as necessary. |
134 | * Returns the fence we need to sync to (if any). | ||
135 | * | 135 | * |
136 | * Global and local mutex must be locked! | 136 | * Global mutex must be locked! |
137 | */ | 137 | */ |
138 | struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring, | 138 | int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, |
139 | struct amdgpu_vm *vm) | 139 | struct amdgpu_sync *sync) |
140 | { | 140 | { |
141 | struct amdgpu_fence *best[AMDGPU_MAX_RINGS] = {}; | 141 | struct amdgpu_fence *best[AMDGPU_MAX_RINGS] = {}; |
142 | struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; | 142 | struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; |
@@ -148,7 +148,7 @@ struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring, | |||
148 | /* check if the id is still valid */ | 148 | /* check if the id is still valid */ |
149 | if (vm_id->id && vm_id->last_id_use && | 149 | if (vm_id->id && vm_id->last_id_use && |
150 | vm_id->last_id_use == adev->vm_manager.active[vm_id->id]) | 150 | vm_id->last_id_use == adev->vm_manager.active[vm_id->id]) |
151 | return NULL; | 151 | return 0; |
152 | 152 | ||
153 | /* we definately need to flush */ | 153 | /* we definately need to flush */ |
154 | vm_id->pd_gpu_addr = ~0ll; | 154 | vm_id->pd_gpu_addr = ~0ll; |
@@ -161,7 +161,7 @@ struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring, | |||
161 | /* found a free one */ | 161 | /* found a free one */ |
162 | vm_id->id = i; | 162 | vm_id->id = i; |
163 | trace_amdgpu_vm_grab_id(i, ring->idx); | 163 | trace_amdgpu_vm_grab_id(i, ring->idx); |
164 | return NULL; | 164 | return 0; |
165 | } | 165 | } |
166 | 166 | ||
167 | if (amdgpu_fence_is_earlier(fence, best[fence->ring->idx])) { | 167 | if (amdgpu_fence_is_earlier(fence, best[fence->ring->idx])) { |
@@ -172,15 +172,19 @@ struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring, | |||
172 | 172 | ||
173 | for (i = 0; i < 2; ++i) { | 173 | for (i = 0; i < 2; ++i) { |
174 | if (choices[i]) { | 174 | if (choices[i]) { |
175 | struct amdgpu_fence *fence; | ||
176 | |||
177 | fence = adev->vm_manager.active[choices[i]]; | ||
175 | vm_id->id = choices[i]; | 178 | vm_id->id = choices[i]; |
179 | |||
176 | trace_amdgpu_vm_grab_id(choices[i], ring->idx); | 180 | trace_amdgpu_vm_grab_id(choices[i], ring->idx); |
177 | return adev->vm_manager.active[choices[i]]; | 181 | return amdgpu_sync_fence(ring->adev, sync, &fence->base); |
178 | } | 182 | } |
179 | } | 183 | } |
180 | 184 | ||
181 | /* should never happen */ | 185 | /* should never happen */ |
182 | BUG(); | 186 | BUG(); |
183 | return NULL; | 187 | return -EINVAL; |
184 | } | 188 | } |
185 | 189 | ||
186 | /** | 190 | /** |
@@ -196,17 +200,29 @@ struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring, | |||
196 | */ | 200 | */ |
197 | void amdgpu_vm_flush(struct amdgpu_ring *ring, | 201 | void amdgpu_vm_flush(struct amdgpu_ring *ring, |
198 | struct amdgpu_vm *vm, | 202 | struct amdgpu_vm *vm, |
199 | struct amdgpu_fence *updates) | 203 | struct fence *updates) |
200 | { | 204 | { |
201 | uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); | 205 | uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); |
202 | struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; | 206 | struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; |
207 | struct fence *flushed_updates = vm_id->flushed_updates; | ||
208 | bool is_earlier = false; | ||
203 | 209 | ||
204 | if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates || | 210 | if (flushed_updates && updates) { |
205 | amdgpu_fence_is_earlier(vm_id->flushed_updates, updates)) { | 211 | BUG_ON(flushed_updates->context != updates->context); |
212 | is_earlier = (updates->seqno - flushed_updates->seqno <= | ||
213 | INT_MAX) ? true : false; | ||
214 | } | ||
215 | |||
216 | if (pd_addr != vm_id->pd_gpu_addr || !flushed_updates || | ||
217 | is_earlier) { | ||
206 | 218 | ||
207 | trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id); | 219 | trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id); |
208 | amdgpu_fence_unref(&vm_id->flushed_updates); | 220 | if (is_earlier) { |
209 | vm_id->flushed_updates = amdgpu_fence_ref(updates); | 221 | vm_id->flushed_updates = fence_get(updates); |
222 | fence_put(flushed_updates); | ||
223 | } | ||
224 | if (!flushed_updates) | ||
225 | vm_id->flushed_updates = fence_get(updates); | ||
210 | vm_id->pd_gpu_addr = pd_addr; | 226 | vm_id->pd_gpu_addr = pd_addr; |
211 | amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr); | 227 | amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr); |
212 | } | 228 | } |
@@ -300,6 +316,15 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev, | |||
300 | } | 316 | } |
301 | } | 317 | } |
302 | 318 | ||
319 | int amdgpu_vm_free_job(struct amdgpu_job *sched_job) | ||
320 | { | ||
321 | int i; | ||
322 | for (i = 0; i < sched_job->num_ibs; i++) | ||
323 | amdgpu_ib_free(sched_job->adev, &sched_job->ibs[i]); | ||
324 | kfree(sched_job->ibs); | ||
325 | return 0; | ||
326 | } | ||
327 | |||
303 | /** | 328 | /** |
304 | * amdgpu_vm_clear_bo - initially clear the page dir/table | 329 | * amdgpu_vm_clear_bo - initially clear the page dir/table |
305 | * | 330 | * |
@@ -310,7 +335,8 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, | |||
310 | struct amdgpu_bo *bo) | 335 | struct amdgpu_bo *bo) |
311 | { | 336 | { |
312 | struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; | 337 | struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; |
313 | struct amdgpu_ib ib; | 338 | struct fence *fence = NULL; |
339 | struct amdgpu_ib *ib; | ||
314 | unsigned entries; | 340 | unsigned entries; |
315 | uint64_t addr; | 341 | uint64_t addr; |
316 | int r; | 342 | int r; |
@@ -330,24 +356,33 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, | |||
330 | addr = amdgpu_bo_gpu_offset(bo); | 356 | addr = amdgpu_bo_gpu_offset(bo); |
331 | entries = amdgpu_bo_size(bo) / 8; | 357 | entries = amdgpu_bo_size(bo) / 8; |
332 | 358 | ||
333 | r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, &ib); | 359 | ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); |
334 | if (r) | 360 | if (!ib) |
335 | goto error_unreserve; | 361 | goto error_unreserve; |
336 | 362 | ||
337 | ib.length_dw = 0; | 363 | r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, ib); |
338 | |||
339 | amdgpu_vm_update_pages(adev, &ib, addr, 0, entries, 0, 0, 0); | ||
340 | amdgpu_vm_pad_ib(adev, &ib); | ||
341 | WARN_ON(ib.length_dw > 64); | ||
342 | |||
343 | r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_VM); | ||
344 | if (r) | 364 | if (r) |
345 | goto error_free; | 365 | goto error_free; |
346 | 366 | ||
347 | amdgpu_bo_fence(bo, ib.fence, true); | 367 | ib->length_dw = 0; |
348 | 368 | ||
369 | amdgpu_vm_update_pages(adev, ib, addr, 0, entries, 0, 0, 0); | ||
370 | amdgpu_vm_pad_ib(adev, ib); | ||
371 | WARN_ON(ib->length_dw > 64); | ||
372 | r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, | ||
373 | &amdgpu_vm_free_job, | ||
374 | AMDGPU_FENCE_OWNER_VM, | ||
375 | &fence); | ||
376 | if (!r) | ||
377 | amdgpu_bo_fence(bo, fence, true); | ||
378 | fence_put(fence); | ||
379 | if (amdgpu_enable_scheduler) { | ||
380 | amdgpu_bo_unreserve(bo); | ||
381 | return 0; | ||
382 | } | ||
349 | error_free: | 383 | error_free: |
350 | amdgpu_ib_free(adev, &ib); | 384 | amdgpu_ib_free(adev, ib); |
385 | kfree(ib); | ||
351 | 386 | ||
352 | error_unreserve: | 387 | error_unreserve: |
353 | amdgpu_bo_unreserve(bo); | 388 | amdgpu_bo_unreserve(bo); |
@@ -400,7 +435,9 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, | |||
400 | uint32_t incr = AMDGPU_VM_PTE_COUNT * 8; | 435 | uint32_t incr = AMDGPU_VM_PTE_COUNT * 8; |
401 | uint64_t last_pde = ~0, last_pt = ~0; | 436 | uint64_t last_pde = ~0, last_pt = ~0; |
402 | unsigned count = 0, pt_idx, ndw; | 437 | unsigned count = 0, pt_idx, ndw; |
403 | struct amdgpu_ib ib; | 438 | struct amdgpu_ib *ib; |
439 | struct fence *fence = NULL; | ||
440 | |||
404 | int r; | 441 | int r; |
405 | 442 | ||
406 | /* padding, etc. */ | 443 | /* padding, etc. */ |
@@ -413,10 +450,14 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, | |||
413 | if (ndw > 0xfffff) | 450 | if (ndw > 0xfffff) |
414 | return -ENOMEM; | 451 | return -ENOMEM; |
415 | 452 | ||
416 | r = amdgpu_ib_get(ring, NULL, ndw * 4, &ib); | 453 | ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); |
454 | if (!ib) | ||
455 | return -ENOMEM; | ||
456 | |||
457 | r = amdgpu_ib_get(ring, NULL, ndw * 4, ib); | ||
417 | if (r) | 458 | if (r) |
418 | return r; | 459 | return r; |
419 | ib.length_dw = 0; | 460 | ib->length_dw = 0; |
420 | 461 | ||
421 | /* walk over the address space and update the page directory */ | 462 | /* walk over the address space and update the page directory */ |
422 | for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { | 463 | for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { |
@@ -436,7 +477,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, | |||
436 | ((last_pt + incr * count) != pt)) { | 477 | ((last_pt + incr * count) != pt)) { |
437 | 478 | ||
438 | if (count) { | 479 | if (count) { |
439 | amdgpu_vm_update_pages(adev, &ib, last_pde, | 480 | amdgpu_vm_update_pages(adev, ib, last_pde, |
440 | last_pt, count, incr, | 481 | last_pt, count, incr, |
441 | AMDGPU_PTE_VALID, 0); | 482 | AMDGPU_PTE_VALID, 0); |
442 | } | 483 | } |
@@ -450,23 +491,37 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, | |||
450 | } | 491 | } |
451 | 492 | ||
452 | if (count) | 493 | if (count) |
453 | amdgpu_vm_update_pages(adev, &ib, last_pde, last_pt, count, | 494 | amdgpu_vm_update_pages(adev, ib, last_pde, last_pt, count, |
454 | incr, AMDGPU_PTE_VALID, 0); | 495 | incr, AMDGPU_PTE_VALID, 0); |
455 | 496 | ||
456 | if (ib.length_dw != 0) { | 497 | if (ib->length_dw != 0) { |
457 | amdgpu_vm_pad_ib(adev, &ib); | 498 | amdgpu_vm_pad_ib(adev, ib); |
458 | amdgpu_sync_resv(adev, &ib.sync, pd->tbo.resv, AMDGPU_FENCE_OWNER_VM); | 499 | amdgpu_sync_resv(adev, &ib->sync, pd->tbo.resv, AMDGPU_FENCE_OWNER_VM); |
459 | WARN_ON(ib.length_dw > ndw); | 500 | WARN_ON(ib->length_dw > ndw); |
460 | r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_VM); | 501 | r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, |
461 | if (r) { | 502 | &amdgpu_vm_free_job, |
462 | amdgpu_ib_free(adev, &ib); | 503 | AMDGPU_FENCE_OWNER_VM, |
463 | return r; | 504 | &fence); |
464 | } | 505 | if (r) |
465 | amdgpu_bo_fence(pd, ib.fence, true); | 506 | goto error_free; |
507 | |||
508 | amdgpu_bo_fence(pd, fence, true); | ||
509 | fence_put(vm->page_directory_fence); | ||
510 | vm->page_directory_fence = fence_get(fence); | ||
511 | fence_put(fence); | ||
512 | } | ||
513 | |||
514 | if (!amdgpu_enable_scheduler || ib->length_dw == 0) { | ||
515 | amdgpu_ib_free(adev, ib); | ||
516 | kfree(ib); | ||
466 | } | 517 | } |
467 | amdgpu_ib_free(adev, &ib); | ||
468 | 518 | ||
469 | return 0; | 519 | return 0; |
520 | |||
521 | error_free: | ||
522 | amdgpu_ib_free(adev, ib); | ||
523 | kfree(ib); | ||
524 | return r; | ||
470 | } | 525 | } |
471 | 526 | ||
472 | /** | 527 | /** |
@@ -572,9 +627,14 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev, | |||
572 | { | 627 | { |
573 | uint64_t mask = AMDGPU_VM_PTE_COUNT - 1; | 628 | uint64_t mask = AMDGPU_VM_PTE_COUNT - 1; |
574 | uint64_t last_pte = ~0, last_dst = ~0; | 629 | uint64_t last_pte = ~0, last_dst = ~0; |
630 | void *owner = AMDGPU_FENCE_OWNER_VM; | ||
575 | unsigned count = 0; | 631 | unsigned count = 0; |
576 | uint64_t addr; | 632 | uint64_t addr; |
577 | 633 | ||
634 | /* sync to everything on unmapping */ | ||
635 | if (!(flags & AMDGPU_PTE_VALID)) | ||
636 | owner = AMDGPU_FENCE_OWNER_UNDEFINED; | ||
637 | |||
578 | /* walk over the address space and update the page tables */ | 638 | /* walk over the address space and update the page tables */ |
579 | for (addr = start; addr < end; ) { | 639 | for (addr = start; addr < end; ) { |
580 | uint64_t pt_idx = addr >> amdgpu_vm_block_size; | 640 | uint64_t pt_idx = addr >> amdgpu_vm_block_size; |
@@ -583,8 +643,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev, | |||
583 | uint64_t pte; | 643 | uint64_t pte; |
584 | int r; | 644 | int r; |
585 | 645 | ||
586 | amdgpu_sync_resv(adev, &ib->sync, pt->tbo.resv, | 646 | amdgpu_sync_resv(adev, &ib->sync, pt->tbo.resv, owner); |
587 | AMDGPU_FENCE_OWNER_VM); | ||
588 | r = reservation_object_reserve_shared(pt->tbo.resv); | 647 | r = reservation_object_reserve_shared(pt->tbo.resv); |
589 | if (r) | 648 | if (r) |
590 | return r; | 649 | return r; |
@@ -640,7 +699,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev, | |||
640 | */ | 699 | */ |
641 | static void amdgpu_vm_fence_pts(struct amdgpu_vm *vm, | 700 | static void amdgpu_vm_fence_pts(struct amdgpu_vm *vm, |
642 | uint64_t start, uint64_t end, | 701 | uint64_t start, uint64_t end, |
643 | struct amdgpu_fence *fence) | 702 | struct fence *fence) |
644 | { | 703 | { |
645 | unsigned i; | 704 | unsigned i; |
646 | 705 | ||
@@ -670,12 +729,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
670 | struct amdgpu_vm *vm, | 729 | struct amdgpu_vm *vm, |
671 | struct amdgpu_bo_va_mapping *mapping, | 730 | struct amdgpu_bo_va_mapping *mapping, |
672 | uint64_t addr, uint32_t gtt_flags, | 731 | uint64_t addr, uint32_t gtt_flags, |
673 | struct amdgpu_fence **fence) | 732 | struct fence **fence) |
674 | { | 733 | { |
675 | struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; | 734 | struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; |
676 | unsigned nptes, ncmds, ndw; | 735 | unsigned nptes, ncmds, ndw; |
677 | uint32_t flags = gtt_flags; | 736 | uint32_t flags = gtt_flags; |
678 | struct amdgpu_ib ib; | 737 | struct amdgpu_ib *ib; |
738 | struct fence *f = NULL; | ||
679 | int r; | 739 | int r; |
680 | 740 | ||
681 | /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here | 741 | /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here |
@@ -722,46 +782,54 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
722 | if (ndw > 0xfffff) | 782 | if (ndw > 0xfffff) |
723 | return -ENOMEM; | 783 | return -ENOMEM; |
724 | 784 | ||
725 | r = amdgpu_ib_get(ring, NULL, ndw * 4, &ib); | 785 | ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); |
726 | if (r) | 786 | if (!ib) |
727 | return r; | 787 | return -ENOMEM; |
728 | ib.length_dw = 0; | ||
729 | |||
730 | if (!(flags & AMDGPU_PTE_VALID)) { | ||
731 | unsigned i; | ||
732 | 788 | ||
733 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | 789 | r = amdgpu_ib_get(ring, NULL, ndw * 4, ib); |
734 | struct amdgpu_fence *f = vm->ids[i].last_id_use; | 790 | if (r) { |
735 | amdgpu_sync_fence(&ib.sync, f); | 791 | kfree(ib); |
736 | } | 792 | return r; |
737 | } | 793 | } |
738 | 794 | ||
739 | r = amdgpu_vm_update_ptes(adev, vm, &ib, mapping->it.start, | 795 | ib->length_dw = 0; |
796 | |||
797 | r = amdgpu_vm_update_ptes(adev, vm, ib, mapping->it.start, | ||
740 | mapping->it.last + 1, addr + mapping->offset, | 798 | mapping->it.last + 1, addr + mapping->offset, |
741 | flags, gtt_flags); | 799 | flags, gtt_flags); |
742 | 800 | ||
743 | if (r) { | 801 | if (r) { |
744 | amdgpu_ib_free(adev, &ib); | 802 | amdgpu_ib_free(adev, ib); |
803 | kfree(ib); | ||
745 | return r; | 804 | return r; |
746 | } | 805 | } |
747 | 806 | ||
748 | amdgpu_vm_pad_ib(adev, &ib); | 807 | amdgpu_vm_pad_ib(adev, ib); |
749 | WARN_ON(ib.length_dw > ndw); | 808 | WARN_ON(ib->length_dw > ndw); |
809 | r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, | ||
810 | &amdgpu_vm_free_job, | ||
811 | AMDGPU_FENCE_OWNER_VM, | ||
812 | &f); | ||
813 | if (r) | ||
814 | goto error_free; | ||
750 | 815 | ||
751 | r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_VM); | ||
752 | if (r) { | ||
753 | amdgpu_ib_free(adev, &ib); | ||
754 | return r; | ||
755 | } | ||
756 | amdgpu_vm_fence_pts(vm, mapping->it.start, | 816 | amdgpu_vm_fence_pts(vm, mapping->it.start, |
757 | mapping->it.last + 1, ib.fence); | 817 | mapping->it.last + 1, f); |
758 | if (fence) { | 818 | if (fence) { |
759 | amdgpu_fence_unref(fence); | 819 | fence_put(*fence); |
760 | *fence = amdgpu_fence_ref(ib.fence); | 820 | *fence = fence_get(f); |
821 | } | ||
822 | fence_put(f); | ||
823 | if (!amdgpu_enable_scheduler) { | ||
824 | amdgpu_ib_free(adev, ib); | ||
825 | kfree(ib); | ||
761 | } | 826 | } |
762 | amdgpu_ib_free(adev, &ib); | ||
763 | |||
764 | return 0; | 827 | return 0; |
828 | |||
829 | error_free: | ||
830 | amdgpu_ib_free(adev, ib); | ||
831 | kfree(ib); | ||
832 | return r; | ||
765 | } | 833 | } |
766 | 834 | ||
767 | /** | 835 | /** |
@@ -794,21 +862,25 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, | |||
794 | addr = 0; | 862 | addr = 0; |
795 | } | 863 | } |
796 | 864 | ||
797 | if (addr == bo_va->addr) | ||
798 | return 0; | ||
799 | |||
800 | flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); | 865 | flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); |
801 | 866 | ||
802 | list_for_each_entry(mapping, &bo_va->mappings, list) { | 867 | spin_lock(&vm->status_lock); |
868 | if (!list_empty(&bo_va->vm_status)) | ||
869 | list_splice_init(&bo_va->valids, &bo_va->invalids); | ||
870 | spin_unlock(&vm->status_lock); | ||
871 | |||
872 | list_for_each_entry(mapping, &bo_va->invalids, list) { | ||
803 | r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, addr, | 873 | r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, addr, |
804 | flags, &bo_va->last_pt_update); | 874 | flags, &bo_va->last_pt_update); |
805 | if (r) | 875 | if (r) |
806 | return r; | 876 | return r; |
807 | } | 877 | } |
808 | 878 | ||
809 | bo_va->addr = addr; | ||
810 | spin_lock(&vm->status_lock); | 879 | spin_lock(&vm->status_lock); |
880 | list_splice_init(&bo_va->invalids, &bo_va->valids); | ||
811 | list_del_init(&bo_va->vm_status); | 881 | list_del_init(&bo_va->vm_status); |
882 | if (!mem) | ||
883 | list_add(&bo_va->vm_status, &vm->cleared); | ||
812 | spin_unlock(&vm->status_lock); | 884 | spin_unlock(&vm->status_lock); |
813 | 885 | ||
814 | return 0; | 886 | return 0; |
@@ -861,7 +933,7 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, | |||
861 | struct amdgpu_vm *vm, struct amdgpu_sync *sync) | 933 | struct amdgpu_vm *vm, struct amdgpu_sync *sync) |
862 | { | 934 | { |
863 | struct amdgpu_bo_va *bo_va = NULL; | 935 | struct amdgpu_bo_va *bo_va = NULL; |
864 | int r; | 936 | int r = 0; |
865 | 937 | ||
866 | spin_lock(&vm->status_lock); | 938 | spin_lock(&vm->status_lock); |
867 | while (!list_empty(&vm->invalidated)) { | 939 | while (!list_empty(&vm->invalidated)) { |
@@ -878,8 +950,9 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, | |||
878 | spin_unlock(&vm->status_lock); | 950 | spin_unlock(&vm->status_lock); |
879 | 951 | ||
880 | if (bo_va) | 952 | if (bo_va) |
881 | amdgpu_sync_fence(sync, bo_va->last_pt_update); | 953 | r = amdgpu_sync_fence(adev, sync, bo_va->last_pt_update); |
882 | return 0; | 954 | |
955 | return r; | ||
883 | } | 956 | } |
884 | 957 | ||
885 | /** | 958 | /** |
@@ -907,10 +980,10 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, | |||
907 | } | 980 | } |
908 | bo_va->vm = vm; | 981 | bo_va->vm = vm; |
909 | bo_va->bo = bo; | 982 | bo_va->bo = bo; |
910 | bo_va->addr = 0; | ||
911 | bo_va->ref_count = 1; | 983 | bo_va->ref_count = 1; |
912 | INIT_LIST_HEAD(&bo_va->bo_list); | 984 | INIT_LIST_HEAD(&bo_va->bo_list); |
913 | INIT_LIST_HEAD(&bo_va->mappings); | 985 | INIT_LIST_HEAD(&bo_va->valids); |
986 | INIT_LIST_HEAD(&bo_va->invalids); | ||
914 | INIT_LIST_HEAD(&bo_va->vm_status); | 987 | INIT_LIST_HEAD(&bo_va->vm_status); |
915 | 988 | ||
916 | mutex_lock(&vm->mutex); | 989 | mutex_lock(&vm->mutex); |
@@ -999,12 +1072,10 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, | |||
999 | mapping->offset = offset; | 1072 | mapping->offset = offset; |
1000 | mapping->flags = flags; | 1073 | mapping->flags = flags; |
1001 | 1074 | ||
1002 | list_add(&mapping->list, &bo_va->mappings); | 1075 | list_add(&mapping->list, &bo_va->invalids); |
1003 | interval_tree_insert(&mapping->it, &vm->va); | 1076 | interval_tree_insert(&mapping->it, &vm->va); |
1004 | trace_amdgpu_vm_bo_map(bo_va, mapping); | 1077 | trace_amdgpu_vm_bo_map(bo_va, mapping); |
1005 | 1078 | ||
1006 | bo_va->addr = 0; | ||
1007 | |||
1008 | /* Make sure the page tables are allocated */ | 1079 | /* Make sure the page tables are allocated */ |
1009 | saddr >>= amdgpu_vm_block_size; | 1080 | saddr >>= amdgpu_vm_block_size; |
1010 | eaddr >>= amdgpu_vm_block_size; | 1081 | eaddr >>= amdgpu_vm_block_size; |
@@ -1028,7 +1099,9 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, | |||
1028 | 1099 | ||
1029 | r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8, | 1100 | r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8, |
1030 | AMDGPU_GPU_PAGE_SIZE, true, | 1101 | AMDGPU_GPU_PAGE_SIZE, true, |
1031 | AMDGPU_GEM_DOMAIN_VRAM, 0, NULL, &pt); | 1102 | AMDGPU_GEM_DOMAIN_VRAM, |
1103 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS, | ||
1104 | NULL, &pt); | ||
1032 | if (r) | 1105 | if (r) |
1033 | goto error_free; | 1106 | goto error_free; |
1034 | 1107 | ||
@@ -1085,17 +1158,27 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, | |||
1085 | { | 1158 | { |
1086 | struct amdgpu_bo_va_mapping *mapping; | 1159 | struct amdgpu_bo_va_mapping *mapping; |
1087 | struct amdgpu_vm *vm = bo_va->vm; | 1160 | struct amdgpu_vm *vm = bo_va->vm; |
1161 | bool valid = true; | ||
1088 | 1162 | ||
1089 | saddr /= AMDGPU_GPU_PAGE_SIZE; | 1163 | saddr /= AMDGPU_GPU_PAGE_SIZE; |
1090 | 1164 | ||
1091 | list_for_each_entry(mapping, &bo_va->mappings, list) { | 1165 | list_for_each_entry(mapping, &bo_va->valids, list) { |
1092 | if (mapping->it.start == saddr) | 1166 | if (mapping->it.start == saddr) |
1093 | break; | 1167 | break; |
1094 | } | 1168 | } |
1095 | 1169 | ||
1096 | if (&mapping->list == &bo_va->mappings) { | 1170 | if (&mapping->list == &bo_va->valids) { |
1097 | amdgpu_bo_unreserve(bo_va->bo); | 1171 | valid = false; |
1098 | return -ENOENT; | 1172 | |
1173 | list_for_each_entry(mapping, &bo_va->invalids, list) { | ||
1174 | if (mapping->it.start == saddr) | ||
1175 | break; | ||
1176 | } | ||
1177 | |||
1178 | if (&mapping->list == &bo_va->invalids) { | ||
1179 | amdgpu_bo_unreserve(bo_va->bo); | ||
1180 | return -ENOENT; | ||
1181 | } | ||
1099 | } | 1182 | } |
1100 | 1183 | ||
1101 | mutex_lock(&vm->mutex); | 1184 | mutex_lock(&vm->mutex); |
@@ -1103,12 +1186,10 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, | |||
1103 | interval_tree_remove(&mapping->it, &vm->va); | 1186 | interval_tree_remove(&mapping->it, &vm->va); |
1104 | trace_amdgpu_vm_bo_unmap(bo_va, mapping); | 1187 | trace_amdgpu_vm_bo_unmap(bo_va, mapping); |
1105 | 1188 | ||
1106 | if (bo_va->addr) { | 1189 | if (valid) |
1107 | /* clear the old address */ | ||
1108 | list_add(&mapping->list, &vm->freed); | 1190 | list_add(&mapping->list, &vm->freed); |
1109 | } else { | 1191 | else |
1110 | kfree(mapping); | 1192 | kfree(mapping); |
1111 | } | ||
1112 | mutex_unlock(&vm->mutex); | 1193 | mutex_unlock(&vm->mutex); |
1113 | amdgpu_bo_unreserve(bo_va->bo); | 1194 | amdgpu_bo_unreserve(bo_va->bo); |
1114 | 1195 | ||
@@ -1139,16 +1220,19 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, | |||
1139 | list_del(&bo_va->vm_status); | 1220 | list_del(&bo_va->vm_status); |
1140 | spin_unlock(&vm->status_lock); | 1221 | spin_unlock(&vm->status_lock); |
1141 | 1222 | ||
1142 | list_for_each_entry_safe(mapping, next, &bo_va->mappings, list) { | 1223 | list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { |
1143 | list_del(&mapping->list); | 1224 | list_del(&mapping->list); |
1144 | interval_tree_remove(&mapping->it, &vm->va); | 1225 | interval_tree_remove(&mapping->it, &vm->va); |
1145 | trace_amdgpu_vm_bo_unmap(bo_va, mapping); | 1226 | trace_amdgpu_vm_bo_unmap(bo_va, mapping); |
1146 | if (bo_va->addr) | 1227 | list_add(&mapping->list, &vm->freed); |
1147 | list_add(&mapping->list, &vm->freed); | 1228 | } |
1148 | else | 1229 | list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) { |
1149 | kfree(mapping); | 1230 | list_del(&mapping->list); |
1231 | interval_tree_remove(&mapping->it, &vm->va); | ||
1232 | kfree(mapping); | ||
1150 | } | 1233 | } |
1151 | amdgpu_fence_unref(&bo_va->last_pt_update); | 1234 | |
1235 | fence_put(bo_va->last_pt_update); | ||
1152 | kfree(bo_va); | 1236 | kfree(bo_va); |
1153 | 1237 | ||
1154 | mutex_unlock(&vm->mutex); | 1238 | mutex_unlock(&vm->mutex); |
@@ -1169,12 +1253,10 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, | |||
1169 | struct amdgpu_bo_va *bo_va; | 1253 | struct amdgpu_bo_va *bo_va; |
1170 | 1254 | ||
1171 | list_for_each_entry(bo_va, &bo->va, bo_list) { | 1255 | list_for_each_entry(bo_va, &bo->va, bo_list) { |
1172 | if (bo_va->addr) { | 1256 | spin_lock(&bo_va->vm->status_lock); |
1173 | spin_lock(&bo_va->vm->status_lock); | 1257 | if (list_empty(&bo_va->vm_status)) |
1174 | list_del(&bo_va->vm_status); | ||
1175 | list_add(&bo_va->vm_status, &bo_va->vm->invalidated); | 1258 | list_add(&bo_va->vm_status, &bo_va->vm->invalidated); |
1176 | spin_unlock(&bo_va->vm->status_lock); | 1259 | spin_unlock(&bo_va->vm->status_lock); |
1177 | } | ||
1178 | } | 1260 | } |
1179 | } | 1261 | } |
1180 | 1262 | ||
@@ -1202,6 +1284,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
1202 | vm->va = RB_ROOT; | 1284 | vm->va = RB_ROOT; |
1203 | spin_lock_init(&vm->status_lock); | 1285 | spin_lock_init(&vm->status_lock); |
1204 | INIT_LIST_HEAD(&vm->invalidated); | 1286 | INIT_LIST_HEAD(&vm->invalidated); |
1287 | INIT_LIST_HEAD(&vm->cleared); | ||
1205 | INIT_LIST_HEAD(&vm->freed); | 1288 | INIT_LIST_HEAD(&vm->freed); |
1206 | 1289 | ||
1207 | pd_size = amdgpu_vm_directory_size(adev); | 1290 | pd_size = amdgpu_vm_directory_size(adev); |
@@ -1215,8 +1298,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
1215 | return -ENOMEM; | 1298 | return -ENOMEM; |
1216 | } | 1299 | } |
1217 | 1300 | ||
1301 | vm->page_directory_fence = NULL; | ||
1302 | |||
1218 | r = amdgpu_bo_create(adev, pd_size, align, true, | 1303 | r = amdgpu_bo_create(adev, pd_size, align, true, |
1219 | AMDGPU_GEM_DOMAIN_VRAM, 0, | 1304 | AMDGPU_GEM_DOMAIN_VRAM, |
1305 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS, | ||
1220 | NULL, &vm->page_directory); | 1306 | NULL, &vm->page_directory); |
1221 | if (r) | 1307 | if (r) |
1222 | return r; | 1308 | return r; |
@@ -1263,9 +1349,10 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
1263 | kfree(vm->page_tables); | 1349 | kfree(vm->page_tables); |
1264 | 1350 | ||
1265 | amdgpu_bo_unref(&vm->page_directory); | 1351 | amdgpu_bo_unref(&vm->page_directory); |
1352 | fence_put(vm->page_directory_fence); | ||
1266 | 1353 | ||
1267 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | 1354 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
1268 | amdgpu_fence_unref(&vm->ids[i].flushed_updates); | 1355 | fence_put(vm->ids[i].flushed_updates); |
1269 | amdgpu_fence_unref(&vm->ids[i].last_id_use); | 1356 | amdgpu_fence_unref(&vm->ids[i].last_id_use); |
1270 | } | 1357 | } |
1271 | 1358 | ||