aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2015-11-30 08:19:26 -0500
committerAlex Deucher <alexander.deucher@amd.com>2016-02-10 14:17:05 -0500
commit9ab21462894ddcb5463211cefaab18334fdde244 (patch)
tree82f9e20226ecbabc757003b1fd197b867b673674 /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
parentb07c9d2a73f4b956ee141005e7dfbada4e51c52c (diff)
drm/amdgpu: use BOs GART instance for mapping addresses v4
That allows the VM code to use GART BOs from other driver instances. v2: don't use copy optimization for foreign GARTs, that won't work. v3: some more comment cleanups v4: agd: rebase on upstream Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c114
1 files changed, 68 insertions, 46 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ae3b275f2a38..b371a60837eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -276,31 +276,34 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
276 * amdgpu_vm_update_pages - helper to call the right asic function 276 * amdgpu_vm_update_pages - helper to call the right asic function
277 * 277 *
278 * @adev: amdgpu_device pointer 278 * @adev: amdgpu_device pointer
279 * @gtt: GART instance to use for mapping
280 * @gtt_flags: GTT hw access flags
279 * @ib: indirect buffer to fill with commands 281 * @ib: indirect buffer to fill with commands
280 * @pe: addr of the page entry 282 * @pe: addr of the page entry
281 * @addr: dst addr to write into pe 283 * @addr: dst addr to write into pe
282 * @count: number of page entries to update 284 * @count: number of page entries to update
283 * @incr: increase next addr by incr bytes 285 * @incr: increase next addr by incr bytes
284 * @flags: hw access flags 286 * @flags: hw access flags
285 * @gtt_flags: GTT hw access flags
286 * 287 *
287 * Traces the parameters and calls the right asic functions 288 * Traces the parameters and calls the right asic functions
288 * to setup the page table using the DMA. 289 * to setup the page table using the DMA.
289 */ 290 */
290static void amdgpu_vm_update_pages(struct amdgpu_device *adev, 291static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
292 struct amdgpu_gart *gtt,
293 uint32_t gtt_flags,
291 struct amdgpu_ib *ib, 294 struct amdgpu_ib *ib,
292 uint64_t pe, uint64_t addr, 295 uint64_t pe, uint64_t addr,
293 unsigned count, uint32_t incr, 296 unsigned count, uint32_t incr,
294 uint32_t flags, uint32_t gtt_flags) 297 uint32_t flags)
295{ 298{
296 trace_amdgpu_vm_set_page(pe, addr, count, incr, flags); 299 trace_amdgpu_vm_set_page(pe, addr, count, incr, flags);
297 300
298 if ((flags & AMDGPU_PTE_SYSTEM) && (flags == gtt_flags)) { 301 if ((gtt == &adev->gart) && (flags == gtt_flags)) {
299 uint64_t src = adev->gart.table_addr + (addr >> 12) * 8; 302 uint64_t src = gtt->table_addr + (addr >> 12) * 8;
300 amdgpu_vm_copy_pte(adev, ib, pe, src, count); 303 amdgpu_vm_copy_pte(adev, ib, pe, src, count);
301 304
302 } else if (flags & AMDGPU_PTE_SYSTEM) { 305 } else if (gtt) {
303 dma_addr_t *pages_addr = adev->gart.pages_addr; 306 dma_addr_t *pages_addr = gtt->pages_addr;
304 amdgpu_vm_write_pte(adev, ib, pages_addr, pe, addr, 307 amdgpu_vm_write_pte(adev, ib, pages_addr, pe, addr,
305 count, incr, flags); 308 count, incr, flags);
306 309
@@ -362,7 +365,8 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
362 365
363 ib->length_dw = 0; 366 ib->length_dw = 0;
364 367
365 amdgpu_vm_update_pages(adev, ib, addr, 0, entries, 0, 0, 0); 368 amdgpu_vm_update_pages(adev, NULL, 0, ib, addr, 0, entries, 0, 0);
369
366 amdgpu_vm_pad_ib(adev, ib); 370 amdgpu_vm_pad_ib(adev, ib);
367 WARN_ON(ib->length_dw > 64); 371 WARN_ON(ib->length_dw > 64);
368 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, 372 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
@@ -475,9 +479,10 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
475 ((last_pt + incr * count) != pt)) { 479 ((last_pt + incr * count) != pt)) {
476 480
477 if (count) { 481 if (count) {
478 amdgpu_vm_update_pages(adev, ib, last_pde, 482 amdgpu_vm_update_pages(adev, NULL, 0, ib,
479 last_pt, count, incr, 483 last_pde, last_pt,
480 AMDGPU_PTE_VALID, 0); 484 count, incr,
485 AMDGPU_PTE_VALID);
481 } 486 }
482 487
483 count = 1; 488 count = 1;
@@ -489,8 +494,8 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
489 } 494 }
490 495
491 if (count) 496 if (count)
492 amdgpu_vm_update_pages(adev, ib, last_pde, last_pt, count, 497 amdgpu_vm_update_pages(adev, NULL, 0, ib, last_pde, last_pt,
493 incr, AMDGPU_PTE_VALID, 0); 498 count, incr, AMDGPU_PTE_VALID);
494 499
495 if (ib->length_dw != 0) { 500 if (ib->length_dw != 0) {
496 amdgpu_vm_pad_ib(adev, ib); 501 amdgpu_vm_pad_ib(adev, ib);
@@ -526,20 +531,22 @@ error_free:
526 * amdgpu_vm_frag_ptes - add fragment information to PTEs 531 * amdgpu_vm_frag_ptes - add fragment information to PTEs
527 * 532 *
528 * @adev: amdgpu_device pointer 533 * @adev: amdgpu_device pointer
534 * @gtt: GART instance to use for mapping
535 * @gtt_flags: GTT hw mapping flags
529 * @ib: IB for the update 536 * @ib: IB for the update
530 * @pe_start: first PTE to handle 537 * @pe_start: first PTE to handle
531 * @pe_end: last PTE to handle 538 * @pe_end: last PTE to handle
532 * @addr: addr those PTEs should point to 539 * @addr: addr those PTEs should point to
533 * @flags: hw mapping flags 540 * @flags: hw mapping flags
534 * @gtt_flags: GTT hw mapping flags
535 * 541 *
536 * Global and local mutex must be locked! 542 * Global and local mutex must be locked!
537 */ 543 */
538static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, 544static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
545 struct amdgpu_gart *gtt,
546 uint32_t gtt_flags,
539 struct amdgpu_ib *ib, 547 struct amdgpu_ib *ib,
540 uint64_t pe_start, uint64_t pe_end, 548 uint64_t pe_start, uint64_t pe_end,
541 uint64_t addr, uint32_t flags, 549 uint64_t addr, uint32_t flags)
542 uint32_t gtt_flags)
543{ 550{
544 /** 551 /**
545 * The MC L1 TLB supports variable sized pages, based on a fragment 552 * The MC L1 TLB supports variable sized pages, based on a fragment
@@ -570,35 +577,34 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
570 unsigned count; 577 unsigned count;
571 578
572 /* system pages are non continuously */ 579 /* system pages are non continuously */
573 if ((flags & AMDGPU_PTE_SYSTEM) || !(flags & AMDGPU_PTE_VALID) || 580 if (gtt || !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) {
574 (frag_start >= frag_end)) {
575 581
576 count = (pe_end - pe_start) / 8; 582 count = (pe_end - pe_start) / 8;
577 amdgpu_vm_update_pages(adev, ib, pe_start, addr, count, 583 amdgpu_vm_update_pages(adev, gtt, gtt_flags, ib, pe_start,
578 AMDGPU_GPU_PAGE_SIZE, flags, gtt_flags); 584 addr, count, AMDGPU_GPU_PAGE_SIZE,
585 flags);
579 return; 586 return;
580 } 587 }
581 588
582 /* handle the 4K area at the beginning */ 589 /* handle the 4K area at the beginning */
583 if (pe_start != frag_start) { 590 if (pe_start != frag_start) {
584 count = (frag_start - pe_start) / 8; 591 count = (frag_start - pe_start) / 8;
585 amdgpu_vm_update_pages(adev, ib, pe_start, addr, count, 592 amdgpu_vm_update_pages(adev, NULL, 0, ib, pe_start, addr,
586 AMDGPU_GPU_PAGE_SIZE, flags, gtt_flags); 593 count, AMDGPU_GPU_PAGE_SIZE, flags);
587 addr += AMDGPU_GPU_PAGE_SIZE * count; 594 addr += AMDGPU_GPU_PAGE_SIZE * count;
588 } 595 }
589 596
590 /* handle the area in the middle */ 597 /* handle the area in the middle */
591 count = (frag_end - frag_start) / 8; 598 count = (frag_end - frag_start) / 8;
592 amdgpu_vm_update_pages(adev, ib, frag_start, addr, count, 599 amdgpu_vm_update_pages(adev, NULL, 0, ib, frag_start, addr, count,
593 AMDGPU_GPU_PAGE_SIZE, flags | frag_flags, 600 AMDGPU_GPU_PAGE_SIZE, flags | frag_flags);
594 gtt_flags);
595 601
596 /* handle the 4K area at the end */ 602 /* handle the 4K area at the end */
597 if (frag_end != pe_end) { 603 if (frag_end != pe_end) {
598 addr += AMDGPU_GPU_PAGE_SIZE * count; 604 addr += AMDGPU_GPU_PAGE_SIZE * count;
599 count = (pe_end - frag_end) / 8; 605 count = (pe_end - frag_end) / 8;
600 amdgpu_vm_update_pages(adev, ib, frag_end, addr, count, 606 amdgpu_vm_update_pages(adev, NULL, 0, ib, frag_end, addr,
601 AMDGPU_GPU_PAGE_SIZE, flags, gtt_flags); 607 count, AMDGPU_GPU_PAGE_SIZE, flags);
602 } 608 }
603} 609}
604 610
@@ -606,6 +612,8 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
606 * amdgpu_vm_update_ptes - make sure that page tables are valid 612 * amdgpu_vm_update_ptes - make sure that page tables are valid
607 * 613 *
608 * @adev: amdgpu_device pointer 614 * @adev: amdgpu_device pointer
615 * @gtt: GART instance to use for mapping
616 * @gtt_flags: GTT hw mapping flags
609 * @vm: requested vm 617 * @vm: requested vm
610 * @start: start of GPU address range 618 * @start: start of GPU address range
611 * @end: end of GPU address range 619 * @end: end of GPU address range
@@ -617,11 +625,12 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
617 * Global and local mutex must be locked! 625 * Global and local mutex must be locked!
618 */ 626 */
619static int amdgpu_vm_update_ptes(struct amdgpu_device *adev, 627static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
628 struct amdgpu_gart *gtt,
629 uint32_t gtt_flags,
620 struct amdgpu_vm *vm, 630 struct amdgpu_vm *vm,
621 struct amdgpu_ib *ib, 631 struct amdgpu_ib *ib,
622 uint64_t start, uint64_t end, 632 uint64_t start, uint64_t end,
623 uint64_t dst, uint32_t flags, 633 uint64_t dst, uint32_t flags)
624 uint32_t gtt_flags)
625{ 634{
626 uint64_t mask = AMDGPU_VM_PTE_COUNT - 1; 635 uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
627 uint64_t last_pte = ~0, last_dst = ~0; 636 uint64_t last_pte = ~0, last_dst = ~0;
@@ -657,10 +666,9 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
657 if ((last_pte + 8 * count) != pte) { 666 if ((last_pte + 8 * count) != pte) {
658 667
659 if (count) { 668 if (count) {
660 amdgpu_vm_frag_ptes(adev, ib, last_pte, 669 amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
661 last_pte + 8 * count, 670 last_pte, last_pte + 8 * count,
662 last_dst, flags, 671 last_dst, flags);
663 gtt_flags);
664 } 672 }
665 673
666 count = nptes; 674 count = nptes;
@@ -675,9 +683,9 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
675 } 683 }
676 684
677 if (count) { 685 if (count) {
678 amdgpu_vm_frag_ptes(adev, ib, last_pte, 686 amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
679 last_pte + 8 * count, 687 last_pte, last_pte + 8 * count,
680 last_dst, flags, gtt_flags); 688 last_dst, flags);
681 } 689 }
682 690
683 return 0; 691 return 0;
@@ -687,6 +695,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
687 * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table 695 * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
688 * 696 *
689 * @adev: amdgpu_device pointer 697 * @adev: amdgpu_device pointer
698 * @gtt: GART instance to use for mapping
690 * @vm: requested vm 699 * @vm: requested vm
691 * @mapping: mapped range and flags to use for the update 700 * @mapping: mapped range and flags to use for the update
692 * @addr: addr to set the area to 701 * @addr: addr to set the area to
@@ -699,10 +708,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
699 * Object have to be reserved and mutex must be locked! 708 * Object have to be reserved and mutex must be locked!
700 */ 709 */
701static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, 710static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
711 struct amdgpu_gart *gtt,
712 uint32_t gtt_flags,
702 struct amdgpu_vm *vm, 713 struct amdgpu_vm *vm,
703 struct amdgpu_bo_va_mapping *mapping, 714 struct amdgpu_bo_va_mapping *mapping,
704 uint64_t addr, uint32_t gtt_flags, 715 uint64_t addr, struct fence **fence)
705 struct fence **fence)
706{ 716{
707 struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; 717 struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring;
708 unsigned nptes, ncmds, ndw; 718 unsigned nptes, ncmds, ndw;
@@ -732,11 +742,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
732 /* padding, etc. */ 742 /* padding, etc. */
733 ndw = 64; 743 ndw = 64;
734 744
735 if ((flags & AMDGPU_PTE_SYSTEM) && (flags == gtt_flags)) { 745 if ((gtt == &adev->gart) && (flags == gtt_flags)) {
736 /* only copy commands needed */ 746 /* only copy commands needed */
737 ndw += ncmds * 7; 747 ndw += ncmds * 7;
738 748
739 } else if (flags & AMDGPU_PTE_SYSTEM) { 749 } else if (gtt) {
740 /* header for write data commands */ 750 /* header for write data commands */
741 ndw += ncmds * 4; 751 ndw += ncmds * 4;
742 752
@@ -763,9 +773,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
763 773
764 ib->length_dw = 0; 774 ib->length_dw = 0;
765 775
766 r = amdgpu_vm_update_ptes(adev, vm, ib, mapping->it.start, 776 r = amdgpu_vm_update_ptes(adev, gtt, gtt_flags, vm, ib,
767 mapping->it.last + 1, addr + mapping->offset, 777 mapping->it.start, mapping->it.last + 1,
768 flags, gtt_flags); 778 addr + mapping->offset, flags);
769 779
770 if (r) { 780 if (r) {
771 amdgpu_ib_free(adev, ib); 781 amdgpu_ib_free(adev, ib);
@@ -814,14 +824,25 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
814{ 824{
815 struct amdgpu_vm *vm = bo_va->vm; 825 struct amdgpu_vm *vm = bo_va->vm;
816 struct amdgpu_bo_va_mapping *mapping; 826 struct amdgpu_bo_va_mapping *mapping;
827 struct amdgpu_gart *gtt = NULL;
817 uint32_t flags; 828 uint32_t flags;
818 uint64_t addr; 829 uint64_t addr;
819 int r; 830 int r;
820 831
821 if (mem) { 832 if (mem) {
822 addr = (u64)mem->start << PAGE_SHIFT; 833 addr = (u64)mem->start << PAGE_SHIFT;
823 if (mem->mem_type != TTM_PL_TT) 834 switch (mem->mem_type) {
835 case TTM_PL_TT:
836 gtt = &bo_va->bo->adev->gart;
837 break;
838
839 case TTM_PL_VRAM:
824 addr += adev->vm_manager.vram_base_offset; 840 addr += adev->vm_manager.vram_base_offset;
841 break;
842
843 default:
844 break;
845 }
825 } else { 846 } else {
826 addr = 0; 847 addr = 0;
827 } 848 }
@@ -834,8 +855,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
834 spin_unlock(&vm->status_lock); 855 spin_unlock(&vm->status_lock);
835 856
836 list_for_each_entry(mapping, &bo_va->invalids, list) { 857 list_for_each_entry(mapping, &bo_va->invalids, list) {
837 r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, addr, 858 r = amdgpu_vm_bo_update_mapping(adev, gtt, flags, vm, mapping, addr,
838 flags, &bo_va->last_pt_update); 859 &bo_va->last_pt_update);
839 if (r) 860 if (r)
840 return r; 861 return r;
841 } 862 }
@@ -881,7 +902,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
881 struct amdgpu_bo_va_mapping, list); 902 struct amdgpu_bo_va_mapping, list);
882 list_del(&mapping->list); 903 list_del(&mapping->list);
883 spin_unlock(&vm->freed_lock); 904 spin_unlock(&vm->freed_lock);
884 r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, 0, 0, NULL); 905 r = amdgpu_vm_bo_update_mapping(adev, NULL, 0, vm, mapping,
906 0, NULL);
885 kfree(mapping); 907 kfree(mapping);
886 if (r) 908 if (r)
887 return r; 909 return r;