aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
diff options
context:
space:
mode:
authorMark Brown <broonie@kernel.org>2015-10-12 13:09:27 -0400
committerMark Brown <broonie@kernel.org>2015-10-12 13:09:27 -0400
commit79828b4fa835f73cdaf4bffa48696abdcbea9d02 (patch)
tree5e0fa7156acb75ba603022bc807df8f2fedb97a8 /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
parent721b51fcf91898299d96f4b72cb9434cda29dce6 (diff)
parent8c1a9d6323abf0fb1e5dad96cf3f1c783505ea5a (diff)
Merge remote-tracking branch 'asoc/fix/rt5645' into asoc-fix-rt5645
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c305
1 files changed, 196 insertions, 109 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9a4e3b63f1cb..f68b7cdc370a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -127,16 +127,16 @@ struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev,
127/** 127/**
128 * amdgpu_vm_grab_id - allocate the next free VMID 128 * amdgpu_vm_grab_id - allocate the next free VMID
129 * 129 *
130 * @ring: ring we want to submit job to
131 * @vm: vm to allocate id for 130 * @vm: vm to allocate id for
131 * @ring: ring we want to submit job to
132 * @sync: sync object where we add dependencies
132 * 133 *
133 * Allocate an id for the vm (cayman+). 134 * Allocate an id for the vm, adding fences to the sync obj as necessary.
134 * Returns the fence we need to sync to (if any).
135 * 135 *
136 * Global and local mutex must be locked! 136 * Global mutex must be locked!
137 */ 137 */
138struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring, 138int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
139 struct amdgpu_vm *vm) 139 struct amdgpu_sync *sync)
140{ 140{
141 struct amdgpu_fence *best[AMDGPU_MAX_RINGS] = {}; 141 struct amdgpu_fence *best[AMDGPU_MAX_RINGS] = {};
142 struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; 142 struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
@@ -148,7 +148,7 @@ struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring,
148 /* check if the id is still valid */ 148 /* check if the id is still valid */
149 if (vm_id->id && vm_id->last_id_use && 149 if (vm_id->id && vm_id->last_id_use &&
150 vm_id->last_id_use == adev->vm_manager.active[vm_id->id]) 150 vm_id->last_id_use == adev->vm_manager.active[vm_id->id])
151 return NULL; 151 return 0;
152 152
153 /* we definately need to flush */ 153 /* we definately need to flush */
154 vm_id->pd_gpu_addr = ~0ll; 154 vm_id->pd_gpu_addr = ~0ll;
@@ -161,7 +161,7 @@ struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring,
161 /* found a free one */ 161 /* found a free one */
162 vm_id->id = i; 162 vm_id->id = i;
163 trace_amdgpu_vm_grab_id(i, ring->idx); 163 trace_amdgpu_vm_grab_id(i, ring->idx);
164 return NULL; 164 return 0;
165 } 165 }
166 166
167 if (amdgpu_fence_is_earlier(fence, best[fence->ring->idx])) { 167 if (amdgpu_fence_is_earlier(fence, best[fence->ring->idx])) {
@@ -172,15 +172,19 @@ struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring,
172 172
173 for (i = 0; i < 2; ++i) { 173 for (i = 0; i < 2; ++i) {
174 if (choices[i]) { 174 if (choices[i]) {
175 struct amdgpu_fence *fence;
176
177 fence = adev->vm_manager.active[choices[i]];
175 vm_id->id = choices[i]; 178 vm_id->id = choices[i];
179
176 trace_amdgpu_vm_grab_id(choices[i], ring->idx); 180 trace_amdgpu_vm_grab_id(choices[i], ring->idx);
177 return adev->vm_manager.active[choices[i]]; 181 return amdgpu_sync_fence(ring->adev, sync, &fence->base);
178 } 182 }
179 } 183 }
180 184
181 /* should never happen */ 185 /* should never happen */
182 BUG(); 186 BUG();
183 return NULL; 187 return -EINVAL;
184} 188}
185 189
186/** 190/**
@@ -196,17 +200,29 @@ struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring,
196 */ 200 */
197void amdgpu_vm_flush(struct amdgpu_ring *ring, 201void amdgpu_vm_flush(struct amdgpu_ring *ring,
198 struct amdgpu_vm *vm, 202 struct amdgpu_vm *vm,
199 struct amdgpu_fence *updates) 203 struct fence *updates)
200{ 204{
201 uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); 205 uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
202 struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; 206 struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
207 struct fence *flushed_updates = vm_id->flushed_updates;
208 bool is_earlier = false;
203 209
204 if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates || 210 if (flushed_updates && updates) {
205 amdgpu_fence_is_earlier(vm_id->flushed_updates, updates)) { 211 BUG_ON(flushed_updates->context != updates->context);
212 is_earlier = (updates->seqno - flushed_updates->seqno <=
213 INT_MAX) ? true : false;
214 }
215
216 if (pd_addr != vm_id->pd_gpu_addr || !flushed_updates ||
217 is_earlier) {
206 218
207 trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id); 219 trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id);
208 amdgpu_fence_unref(&vm_id->flushed_updates); 220 if (is_earlier) {
209 vm_id->flushed_updates = amdgpu_fence_ref(updates); 221 vm_id->flushed_updates = fence_get(updates);
222 fence_put(flushed_updates);
223 }
224 if (!flushed_updates)
225 vm_id->flushed_updates = fence_get(updates);
210 vm_id->pd_gpu_addr = pd_addr; 226 vm_id->pd_gpu_addr = pd_addr;
211 amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr); 227 amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr);
212 } 228 }
@@ -300,6 +316,15 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
300 } 316 }
301} 317}
302 318
319int amdgpu_vm_free_job(struct amdgpu_job *sched_job)
320{
321 int i;
322 for (i = 0; i < sched_job->num_ibs; i++)
323 amdgpu_ib_free(sched_job->adev, &sched_job->ibs[i]);
324 kfree(sched_job->ibs);
325 return 0;
326}
327
303/** 328/**
304 * amdgpu_vm_clear_bo - initially clear the page dir/table 329 * amdgpu_vm_clear_bo - initially clear the page dir/table
305 * 330 *
@@ -310,7 +335,8 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
310 struct amdgpu_bo *bo) 335 struct amdgpu_bo *bo)
311{ 336{
312 struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; 337 struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring;
313 struct amdgpu_ib ib; 338 struct fence *fence = NULL;
339 struct amdgpu_ib *ib;
314 unsigned entries; 340 unsigned entries;
315 uint64_t addr; 341 uint64_t addr;
316 int r; 342 int r;
@@ -330,24 +356,33 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
330 addr = amdgpu_bo_gpu_offset(bo); 356 addr = amdgpu_bo_gpu_offset(bo);
331 entries = amdgpu_bo_size(bo) / 8; 357 entries = amdgpu_bo_size(bo) / 8;
332 358
333 r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, &ib); 359 ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
334 if (r) 360 if (!ib)
335 goto error_unreserve; 361 goto error_unreserve;
336 362
337 ib.length_dw = 0; 363 r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, ib);
338
339 amdgpu_vm_update_pages(adev, &ib, addr, 0, entries, 0, 0, 0);
340 amdgpu_vm_pad_ib(adev, &ib);
341 WARN_ON(ib.length_dw > 64);
342
343 r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_VM);
344 if (r) 364 if (r)
345 goto error_free; 365 goto error_free;
346 366
347 amdgpu_bo_fence(bo, ib.fence, true); 367 ib->length_dw = 0;
348 368
369 amdgpu_vm_update_pages(adev, ib, addr, 0, entries, 0, 0, 0);
370 amdgpu_vm_pad_ib(adev, ib);
371 WARN_ON(ib->length_dw > 64);
372 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
373 &amdgpu_vm_free_job,
374 AMDGPU_FENCE_OWNER_VM,
375 &fence);
376 if (!r)
377 amdgpu_bo_fence(bo, fence, true);
378 fence_put(fence);
379 if (amdgpu_enable_scheduler) {
380 amdgpu_bo_unreserve(bo);
381 return 0;
382 }
349error_free: 383error_free:
350 amdgpu_ib_free(adev, &ib); 384 amdgpu_ib_free(adev, ib);
385 kfree(ib);
351 386
352error_unreserve: 387error_unreserve:
353 amdgpu_bo_unreserve(bo); 388 amdgpu_bo_unreserve(bo);
@@ -400,7 +435,9 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
400 uint32_t incr = AMDGPU_VM_PTE_COUNT * 8; 435 uint32_t incr = AMDGPU_VM_PTE_COUNT * 8;
401 uint64_t last_pde = ~0, last_pt = ~0; 436 uint64_t last_pde = ~0, last_pt = ~0;
402 unsigned count = 0, pt_idx, ndw; 437 unsigned count = 0, pt_idx, ndw;
403 struct amdgpu_ib ib; 438 struct amdgpu_ib *ib;
439 struct fence *fence = NULL;
440
404 int r; 441 int r;
405 442
406 /* padding, etc. */ 443 /* padding, etc. */
@@ -413,10 +450,14 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
413 if (ndw > 0xfffff) 450 if (ndw > 0xfffff)
414 return -ENOMEM; 451 return -ENOMEM;
415 452
416 r = amdgpu_ib_get(ring, NULL, ndw * 4, &ib); 453 ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
454 if (!ib)
455 return -ENOMEM;
456
457 r = amdgpu_ib_get(ring, NULL, ndw * 4, ib);
417 if (r) 458 if (r)
418 return r; 459 return r;
419 ib.length_dw = 0; 460 ib->length_dw = 0;
420 461
421 /* walk over the address space and update the page directory */ 462 /* walk over the address space and update the page directory */
422 for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { 463 for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
@@ -436,7 +477,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
436 ((last_pt + incr * count) != pt)) { 477 ((last_pt + incr * count) != pt)) {
437 478
438 if (count) { 479 if (count) {
439 amdgpu_vm_update_pages(adev, &ib, last_pde, 480 amdgpu_vm_update_pages(adev, ib, last_pde,
440 last_pt, count, incr, 481 last_pt, count, incr,
441 AMDGPU_PTE_VALID, 0); 482 AMDGPU_PTE_VALID, 0);
442 } 483 }
@@ -450,23 +491,37 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
450 } 491 }
451 492
452 if (count) 493 if (count)
453 amdgpu_vm_update_pages(adev, &ib, last_pde, last_pt, count, 494 amdgpu_vm_update_pages(adev, ib, last_pde, last_pt, count,
454 incr, AMDGPU_PTE_VALID, 0); 495 incr, AMDGPU_PTE_VALID, 0);
455 496
456 if (ib.length_dw != 0) { 497 if (ib->length_dw != 0) {
457 amdgpu_vm_pad_ib(adev, &ib); 498 amdgpu_vm_pad_ib(adev, ib);
458 amdgpu_sync_resv(adev, &ib.sync, pd->tbo.resv, AMDGPU_FENCE_OWNER_VM); 499 amdgpu_sync_resv(adev, &ib->sync, pd->tbo.resv, AMDGPU_FENCE_OWNER_VM);
459 WARN_ON(ib.length_dw > ndw); 500 WARN_ON(ib->length_dw > ndw);
460 r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_VM); 501 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
461 if (r) { 502 &amdgpu_vm_free_job,
462 amdgpu_ib_free(adev, &ib); 503 AMDGPU_FENCE_OWNER_VM,
463 return r; 504 &fence);
464 } 505 if (r)
465 amdgpu_bo_fence(pd, ib.fence, true); 506 goto error_free;
507
508 amdgpu_bo_fence(pd, fence, true);
509 fence_put(vm->page_directory_fence);
510 vm->page_directory_fence = fence_get(fence);
511 fence_put(fence);
512 }
513
514 if (!amdgpu_enable_scheduler || ib->length_dw == 0) {
515 amdgpu_ib_free(adev, ib);
516 kfree(ib);
466 } 517 }
467 amdgpu_ib_free(adev, &ib);
468 518
469 return 0; 519 return 0;
520
521error_free:
522 amdgpu_ib_free(adev, ib);
523 kfree(ib);
524 return r;
470} 525}
471 526
472/** 527/**
@@ -572,9 +627,14 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
572{ 627{
573 uint64_t mask = AMDGPU_VM_PTE_COUNT - 1; 628 uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
574 uint64_t last_pte = ~0, last_dst = ~0; 629 uint64_t last_pte = ~0, last_dst = ~0;
630 void *owner = AMDGPU_FENCE_OWNER_VM;
575 unsigned count = 0; 631 unsigned count = 0;
576 uint64_t addr; 632 uint64_t addr;
577 633
634 /* sync to everything on unmapping */
635 if (!(flags & AMDGPU_PTE_VALID))
636 owner = AMDGPU_FENCE_OWNER_UNDEFINED;
637
578 /* walk over the address space and update the page tables */ 638 /* walk over the address space and update the page tables */
579 for (addr = start; addr < end; ) { 639 for (addr = start; addr < end; ) {
580 uint64_t pt_idx = addr >> amdgpu_vm_block_size; 640 uint64_t pt_idx = addr >> amdgpu_vm_block_size;
@@ -583,8 +643,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
583 uint64_t pte; 643 uint64_t pte;
584 int r; 644 int r;
585 645
586 amdgpu_sync_resv(adev, &ib->sync, pt->tbo.resv, 646 amdgpu_sync_resv(adev, &ib->sync, pt->tbo.resv, owner);
587 AMDGPU_FENCE_OWNER_VM);
588 r = reservation_object_reserve_shared(pt->tbo.resv); 647 r = reservation_object_reserve_shared(pt->tbo.resv);
589 if (r) 648 if (r)
590 return r; 649 return r;
@@ -640,7 +699,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
640 */ 699 */
641static void amdgpu_vm_fence_pts(struct amdgpu_vm *vm, 700static void amdgpu_vm_fence_pts(struct amdgpu_vm *vm,
642 uint64_t start, uint64_t end, 701 uint64_t start, uint64_t end,
643 struct amdgpu_fence *fence) 702 struct fence *fence)
644{ 703{
645 unsigned i; 704 unsigned i;
646 705
@@ -670,12 +729,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
670 struct amdgpu_vm *vm, 729 struct amdgpu_vm *vm,
671 struct amdgpu_bo_va_mapping *mapping, 730 struct amdgpu_bo_va_mapping *mapping,
672 uint64_t addr, uint32_t gtt_flags, 731 uint64_t addr, uint32_t gtt_flags,
673 struct amdgpu_fence **fence) 732 struct fence **fence)
674{ 733{
675 struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; 734 struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring;
676 unsigned nptes, ncmds, ndw; 735 unsigned nptes, ncmds, ndw;
677 uint32_t flags = gtt_flags; 736 uint32_t flags = gtt_flags;
678 struct amdgpu_ib ib; 737 struct amdgpu_ib *ib;
738 struct fence *f = NULL;
679 int r; 739 int r;
680 740
681 /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here 741 /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
@@ -722,46 +782,54 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
722 if (ndw > 0xfffff) 782 if (ndw > 0xfffff)
723 return -ENOMEM; 783 return -ENOMEM;
724 784
725 r = amdgpu_ib_get(ring, NULL, ndw * 4, &ib); 785 ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
726 if (r) 786 if (!ib)
727 return r; 787 return -ENOMEM;
728 ib.length_dw = 0;
729
730 if (!(flags & AMDGPU_PTE_VALID)) {
731 unsigned i;
732 788
733 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 789 r = amdgpu_ib_get(ring, NULL, ndw * 4, ib);
734 struct amdgpu_fence *f = vm->ids[i].last_id_use; 790 if (r) {
735 amdgpu_sync_fence(&ib.sync, f); 791 kfree(ib);
736 } 792 return r;
737 } 793 }
738 794
739 r = amdgpu_vm_update_ptes(adev, vm, &ib, mapping->it.start, 795 ib->length_dw = 0;
796
797 r = amdgpu_vm_update_ptes(adev, vm, ib, mapping->it.start,
740 mapping->it.last + 1, addr + mapping->offset, 798 mapping->it.last + 1, addr + mapping->offset,
741 flags, gtt_flags); 799 flags, gtt_flags);
742 800
743 if (r) { 801 if (r) {
744 amdgpu_ib_free(adev, &ib); 802 amdgpu_ib_free(adev, ib);
803 kfree(ib);
745 return r; 804 return r;
746 } 805 }
747 806
748 amdgpu_vm_pad_ib(adev, &ib); 807 amdgpu_vm_pad_ib(adev, ib);
749 WARN_ON(ib.length_dw > ndw); 808 WARN_ON(ib->length_dw > ndw);
809 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
810 &amdgpu_vm_free_job,
811 AMDGPU_FENCE_OWNER_VM,
812 &f);
813 if (r)
814 goto error_free;
750 815
751 r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_VM);
752 if (r) {
753 amdgpu_ib_free(adev, &ib);
754 return r;
755 }
756 amdgpu_vm_fence_pts(vm, mapping->it.start, 816 amdgpu_vm_fence_pts(vm, mapping->it.start,
757 mapping->it.last + 1, ib.fence); 817 mapping->it.last + 1, f);
758 if (fence) { 818 if (fence) {
759 amdgpu_fence_unref(fence); 819 fence_put(*fence);
760 *fence = amdgpu_fence_ref(ib.fence); 820 *fence = fence_get(f);
821 }
822 fence_put(f);
823 if (!amdgpu_enable_scheduler) {
824 amdgpu_ib_free(adev, ib);
825 kfree(ib);
761 } 826 }
762 amdgpu_ib_free(adev, &ib);
763
764 return 0; 827 return 0;
828
829error_free:
830 amdgpu_ib_free(adev, ib);
831 kfree(ib);
832 return r;
765} 833}
766 834
767/** 835/**
@@ -794,21 +862,25 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
794 addr = 0; 862 addr = 0;
795 } 863 }
796 864
797 if (addr == bo_va->addr)
798 return 0;
799
800 flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); 865 flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
801 866
802 list_for_each_entry(mapping, &bo_va->mappings, list) { 867 spin_lock(&vm->status_lock);
868 if (!list_empty(&bo_va->vm_status))
869 list_splice_init(&bo_va->valids, &bo_va->invalids);
870 spin_unlock(&vm->status_lock);
871
872 list_for_each_entry(mapping, &bo_va->invalids, list) {
803 r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, addr, 873 r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, addr,
804 flags, &bo_va->last_pt_update); 874 flags, &bo_va->last_pt_update);
805 if (r) 875 if (r)
806 return r; 876 return r;
807 } 877 }
808 878
809 bo_va->addr = addr;
810 spin_lock(&vm->status_lock); 879 spin_lock(&vm->status_lock);
880 list_splice_init(&bo_va->invalids, &bo_va->valids);
811 list_del_init(&bo_va->vm_status); 881 list_del_init(&bo_va->vm_status);
882 if (!mem)
883 list_add(&bo_va->vm_status, &vm->cleared);
812 spin_unlock(&vm->status_lock); 884 spin_unlock(&vm->status_lock);
813 885
814 return 0; 886 return 0;
@@ -861,7 +933,7 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
861 struct amdgpu_vm *vm, struct amdgpu_sync *sync) 933 struct amdgpu_vm *vm, struct amdgpu_sync *sync)
862{ 934{
863 struct amdgpu_bo_va *bo_va = NULL; 935 struct amdgpu_bo_va *bo_va = NULL;
864 int r; 936 int r = 0;
865 937
866 spin_lock(&vm->status_lock); 938 spin_lock(&vm->status_lock);
867 while (!list_empty(&vm->invalidated)) { 939 while (!list_empty(&vm->invalidated)) {
@@ -878,8 +950,9 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
878 spin_unlock(&vm->status_lock); 950 spin_unlock(&vm->status_lock);
879 951
880 if (bo_va) 952 if (bo_va)
881 amdgpu_sync_fence(sync, bo_va->last_pt_update); 953 r = amdgpu_sync_fence(adev, sync, bo_va->last_pt_update);
882 return 0; 954
955 return r;
883} 956}
884 957
885/** 958/**
@@ -907,10 +980,10 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
907 } 980 }
908 bo_va->vm = vm; 981 bo_va->vm = vm;
909 bo_va->bo = bo; 982 bo_va->bo = bo;
910 bo_va->addr = 0;
911 bo_va->ref_count = 1; 983 bo_va->ref_count = 1;
912 INIT_LIST_HEAD(&bo_va->bo_list); 984 INIT_LIST_HEAD(&bo_va->bo_list);
913 INIT_LIST_HEAD(&bo_va->mappings); 985 INIT_LIST_HEAD(&bo_va->valids);
986 INIT_LIST_HEAD(&bo_va->invalids);
914 INIT_LIST_HEAD(&bo_va->vm_status); 987 INIT_LIST_HEAD(&bo_va->vm_status);
915 988
916 mutex_lock(&vm->mutex); 989 mutex_lock(&vm->mutex);
@@ -999,12 +1072,10 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
999 mapping->offset = offset; 1072 mapping->offset = offset;
1000 mapping->flags = flags; 1073 mapping->flags = flags;
1001 1074
1002 list_add(&mapping->list, &bo_va->mappings); 1075 list_add(&mapping->list, &bo_va->invalids);
1003 interval_tree_insert(&mapping->it, &vm->va); 1076 interval_tree_insert(&mapping->it, &vm->va);
1004 trace_amdgpu_vm_bo_map(bo_va, mapping); 1077 trace_amdgpu_vm_bo_map(bo_va, mapping);
1005 1078
1006 bo_va->addr = 0;
1007
1008 /* Make sure the page tables are allocated */ 1079 /* Make sure the page tables are allocated */
1009 saddr >>= amdgpu_vm_block_size; 1080 saddr >>= amdgpu_vm_block_size;
1010 eaddr >>= amdgpu_vm_block_size; 1081 eaddr >>= amdgpu_vm_block_size;
@@ -1028,7 +1099,9 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
1028 1099
1029 r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8, 1100 r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8,
1030 AMDGPU_GPU_PAGE_SIZE, true, 1101 AMDGPU_GPU_PAGE_SIZE, true,
1031 AMDGPU_GEM_DOMAIN_VRAM, 0, NULL, &pt); 1102 AMDGPU_GEM_DOMAIN_VRAM,
1103 AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
1104 NULL, &pt);
1032 if (r) 1105 if (r)
1033 goto error_free; 1106 goto error_free;
1034 1107
@@ -1085,17 +1158,27 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
1085{ 1158{
1086 struct amdgpu_bo_va_mapping *mapping; 1159 struct amdgpu_bo_va_mapping *mapping;
1087 struct amdgpu_vm *vm = bo_va->vm; 1160 struct amdgpu_vm *vm = bo_va->vm;
1161 bool valid = true;
1088 1162
1089 saddr /= AMDGPU_GPU_PAGE_SIZE; 1163 saddr /= AMDGPU_GPU_PAGE_SIZE;
1090 1164
1091 list_for_each_entry(mapping, &bo_va->mappings, list) { 1165 list_for_each_entry(mapping, &bo_va->valids, list) {
1092 if (mapping->it.start == saddr) 1166 if (mapping->it.start == saddr)
1093 break; 1167 break;
1094 } 1168 }
1095 1169
1096 if (&mapping->list == &bo_va->mappings) { 1170 if (&mapping->list == &bo_va->valids) {
1097 amdgpu_bo_unreserve(bo_va->bo); 1171 valid = false;
1098 return -ENOENT; 1172
1173 list_for_each_entry(mapping, &bo_va->invalids, list) {
1174 if (mapping->it.start == saddr)
1175 break;
1176 }
1177
1178 if (&mapping->list == &bo_va->invalids) {
1179 amdgpu_bo_unreserve(bo_va->bo);
1180 return -ENOENT;
1181 }
1099 } 1182 }
1100 1183
1101 mutex_lock(&vm->mutex); 1184 mutex_lock(&vm->mutex);
@@ -1103,12 +1186,10 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
1103 interval_tree_remove(&mapping->it, &vm->va); 1186 interval_tree_remove(&mapping->it, &vm->va);
1104 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 1187 trace_amdgpu_vm_bo_unmap(bo_va, mapping);
1105 1188
1106 if (bo_va->addr) { 1189 if (valid)
1107 /* clear the old address */
1108 list_add(&mapping->list, &vm->freed); 1190 list_add(&mapping->list, &vm->freed);
1109 } else { 1191 else
1110 kfree(mapping); 1192 kfree(mapping);
1111 }
1112 mutex_unlock(&vm->mutex); 1193 mutex_unlock(&vm->mutex);
1113 amdgpu_bo_unreserve(bo_va->bo); 1194 amdgpu_bo_unreserve(bo_va->bo);
1114 1195
@@ -1139,16 +1220,19 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
1139 list_del(&bo_va->vm_status); 1220 list_del(&bo_va->vm_status);
1140 spin_unlock(&vm->status_lock); 1221 spin_unlock(&vm->status_lock);
1141 1222
1142 list_for_each_entry_safe(mapping, next, &bo_va->mappings, list) { 1223 list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
1143 list_del(&mapping->list); 1224 list_del(&mapping->list);
1144 interval_tree_remove(&mapping->it, &vm->va); 1225 interval_tree_remove(&mapping->it, &vm->va);
1145 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 1226 trace_amdgpu_vm_bo_unmap(bo_va, mapping);
1146 if (bo_va->addr) 1227 list_add(&mapping->list, &vm->freed);
1147 list_add(&mapping->list, &vm->freed); 1228 }
1148 else 1229 list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
1149 kfree(mapping); 1230 list_del(&mapping->list);
1231 interval_tree_remove(&mapping->it, &vm->va);
1232 kfree(mapping);
1150 } 1233 }
1151 amdgpu_fence_unref(&bo_va->last_pt_update); 1234
1235 fence_put(bo_va->last_pt_update);
1152 kfree(bo_va); 1236 kfree(bo_va);
1153 1237
1154 mutex_unlock(&vm->mutex); 1238 mutex_unlock(&vm->mutex);
@@ -1169,12 +1253,10 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
1169 struct amdgpu_bo_va *bo_va; 1253 struct amdgpu_bo_va *bo_va;
1170 1254
1171 list_for_each_entry(bo_va, &bo->va, bo_list) { 1255 list_for_each_entry(bo_va, &bo->va, bo_list) {
1172 if (bo_va->addr) { 1256 spin_lock(&bo_va->vm->status_lock);
1173 spin_lock(&bo_va->vm->status_lock); 1257 if (list_empty(&bo_va->vm_status))
1174 list_del(&bo_va->vm_status);
1175 list_add(&bo_va->vm_status, &bo_va->vm->invalidated); 1258 list_add(&bo_va->vm_status, &bo_va->vm->invalidated);
1176 spin_unlock(&bo_va->vm->status_lock); 1259 spin_unlock(&bo_va->vm->status_lock);
1177 }
1178 } 1260 }
1179} 1261}
1180 1262
@@ -1202,6 +1284,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1202 vm->va = RB_ROOT; 1284 vm->va = RB_ROOT;
1203 spin_lock_init(&vm->status_lock); 1285 spin_lock_init(&vm->status_lock);
1204 INIT_LIST_HEAD(&vm->invalidated); 1286 INIT_LIST_HEAD(&vm->invalidated);
1287 INIT_LIST_HEAD(&vm->cleared);
1205 INIT_LIST_HEAD(&vm->freed); 1288 INIT_LIST_HEAD(&vm->freed);
1206 1289
1207 pd_size = amdgpu_vm_directory_size(adev); 1290 pd_size = amdgpu_vm_directory_size(adev);
@@ -1215,8 +1298,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1215 return -ENOMEM; 1298 return -ENOMEM;
1216 } 1299 }
1217 1300
1301 vm->page_directory_fence = NULL;
1302
1218 r = amdgpu_bo_create(adev, pd_size, align, true, 1303 r = amdgpu_bo_create(adev, pd_size, align, true,
1219 AMDGPU_GEM_DOMAIN_VRAM, 0, 1304 AMDGPU_GEM_DOMAIN_VRAM,
1305 AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
1220 NULL, &vm->page_directory); 1306 NULL, &vm->page_directory);
1221 if (r) 1307 if (r)
1222 return r; 1308 return r;
@@ -1263,9 +1349,10 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1263 kfree(vm->page_tables); 1349 kfree(vm->page_tables);
1264 1350
1265 amdgpu_bo_unref(&vm->page_directory); 1351 amdgpu_bo_unref(&vm->page_directory);
1352 fence_put(vm->page_directory_fence);
1266 1353
1267 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 1354 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
1268 amdgpu_fence_unref(&vm->ids[i].flushed_updates); 1355 fence_put(vm->ids[i].flushed_updates);
1269 amdgpu_fence_unref(&vm->ids[i].last_id_use); 1356 amdgpu_fence_unref(&vm->ids[i].last_id_use);
1270 } 1357 }
1271 1358