diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 284 |
1 files changed, 150 insertions, 134 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 06f24322e7c3..ded57dd538e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |||
@@ -116,38 +116,43 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, | |||
116 | } | 116 | } |
117 | 117 | ||
118 | /** | 118 | /** |
119 | * amdgpu_vm_get_bos - add the vm BOs to a duplicates list | 119 | * amdgpu_vm_validate_pt_bos - validate the page table BOs |
120 | * | 120 | * |
121 | * @adev: amdgpu device pointer | 121 | * @adev: amdgpu device pointer |
122 | * @vm: vm providing the BOs | 122 | * @vm: vm providing the BOs |
123 | * @duplicates: head of duplicates list | 123 | * @validate: callback to do the validation |
124 | * @param: parameter for the validation callback | ||
124 | * | 125 | * |
125 | * Add the page directory to the BO duplicates list | 126 | * Validate the page table BOs on command submission if neccessary. |
126 | * for command submission. | ||
127 | */ | 127 | */ |
128 | void amdgpu_vm_get_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, | 128 | int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
129 | struct list_head *duplicates) | 129 | int (*validate)(void *p, struct amdgpu_bo *bo), |
130 | void *param) | ||
130 | { | 131 | { |
131 | uint64_t num_evictions; | 132 | uint64_t num_evictions; |
132 | unsigned i; | 133 | unsigned i; |
134 | int r; | ||
133 | 135 | ||
134 | /* We only need to validate the page tables | 136 | /* We only need to validate the page tables |
135 | * if they aren't already valid. | 137 | * if they aren't already valid. |
136 | */ | 138 | */ |
137 | num_evictions = atomic64_read(&adev->num_evictions); | 139 | num_evictions = atomic64_read(&adev->num_evictions); |
138 | if (num_evictions == vm->last_eviction_counter) | 140 | if (num_evictions == vm->last_eviction_counter) |
139 | return; | 141 | return 0; |
140 | 142 | ||
141 | /* add the vm page table to the list */ | 143 | /* add the vm page table to the list */ |
142 | for (i = 0; i <= vm->max_pde_used; ++i) { | 144 | for (i = 0; i <= vm->max_pde_used; ++i) { |
143 | struct amdgpu_bo_list_entry *entry = &vm->page_tables[i].entry; | 145 | struct amdgpu_bo *bo = vm->page_tables[i].bo; |
144 | 146 | ||
145 | if (!entry->robj) | 147 | if (!bo) |
146 | continue; | 148 | continue; |
147 | 149 | ||
148 | list_add(&entry->tv.head, duplicates); | 150 | r = validate(param, bo); |
151 | if (r) | ||
152 | return r; | ||
149 | } | 153 | } |
150 | 154 | ||
155 | return 0; | ||
151 | } | 156 | } |
152 | 157 | ||
153 | /** | 158 | /** |
@@ -166,12 +171,12 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, | |||
166 | 171 | ||
167 | spin_lock(&glob->lru_lock); | 172 | spin_lock(&glob->lru_lock); |
168 | for (i = 0; i <= vm->max_pde_used; ++i) { | 173 | for (i = 0; i <= vm->max_pde_used; ++i) { |
169 | struct amdgpu_bo_list_entry *entry = &vm->page_tables[i].entry; | 174 | struct amdgpu_bo *bo = vm->page_tables[i].bo; |
170 | 175 | ||
171 | if (!entry->robj) | 176 | if (!bo) |
172 | continue; | 177 | continue; |
173 | 178 | ||
174 | ttm_bo_move_to_lru_tail(&entry->robj->tbo); | 179 | ttm_bo_move_to_lru_tail(&bo->tbo); |
175 | } | 180 | } |
176 | spin_unlock(&glob->lru_lock); | 181 | spin_unlock(&glob->lru_lock); |
177 | } | 182 | } |
@@ -341,9 +346,9 @@ error: | |||
341 | static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring) | 346 | static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring) |
342 | { | 347 | { |
343 | struct amdgpu_device *adev = ring->adev; | 348 | struct amdgpu_device *adev = ring->adev; |
344 | const struct amdgpu_ip_block_version *ip_block; | 349 | const struct amdgpu_ip_block *ip_block; |
345 | 350 | ||
346 | if (ring->type != AMDGPU_RING_TYPE_COMPUTE) | 351 | if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) |
347 | /* only compute rings */ | 352 | /* only compute rings */ |
348 | return false; | 353 | return false; |
349 | 354 | ||
@@ -351,10 +356,10 @@ static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring) | |||
351 | if (!ip_block) | 356 | if (!ip_block) |
352 | return false; | 357 | return false; |
353 | 358 | ||
354 | if (ip_block->major <= 7) { | 359 | if (ip_block->version->major <= 7) { |
355 | /* gfx7 has no workaround */ | 360 | /* gfx7 has no workaround */ |
356 | return true; | 361 | return true; |
357 | } else if (ip_block->major == 8) { | 362 | } else if (ip_block->version->major == 8) { |
358 | if (adev->gfx.mec_fw_version >= 673) | 363 | if (adev->gfx.mec_fw_version >= 673) |
359 | /* gfx8 is fixed in MEC firmware 673 */ | 364 | /* gfx8 is fixed in MEC firmware 673 */ |
360 | return false; | 365 | return false; |
@@ -612,16 +617,26 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) | |||
612 | return result; | 617 | return result; |
613 | } | 618 | } |
614 | 619 | ||
615 | static int amdgpu_vm_update_pd_or_shadow(struct amdgpu_device *adev, | 620 | /* |
616 | struct amdgpu_vm *vm, | 621 | * amdgpu_vm_update_pdes - make sure that page directory is valid |
617 | bool shadow) | 622 | * |
623 | * @adev: amdgpu_device pointer | ||
624 | * @vm: requested vm | ||
625 | * @start: start of GPU address range | ||
626 | * @end: end of GPU address range | ||
627 | * | ||
628 | * Allocates new page tables if necessary | ||
629 | * and updates the page directory. | ||
630 | * Returns 0 for success, error for failure. | ||
631 | */ | ||
632 | int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, | ||
633 | struct amdgpu_vm *vm) | ||
618 | { | 634 | { |
635 | struct amdgpu_bo *shadow; | ||
619 | struct amdgpu_ring *ring; | 636 | struct amdgpu_ring *ring; |
620 | struct amdgpu_bo *pd = shadow ? vm->page_directory->shadow : | 637 | uint64_t pd_addr, shadow_addr; |
621 | vm->page_directory; | ||
622 | uint64_t pd_addr; | ||
623 | uint32_t incr = AMDGPU_VM_PTE_COUNT * 8; | 638 | uint32_t incr = AMDGPU_VM_PTE_COUNT * 8; |
624 | uint64_t last_pde = ~0, last_pt = ~0; | 639 | uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; |
625 | unsigned count = 0, pt_idx, ndw; | 640 | unsigned count = 0, pt_idx, ndw; |
626 | struct amdgpu_job *job; | 641 | struct amdgpu_job *job; |
627 | struct amdgpu_pte_update_params params; | 642 | struct amdgpu_pte_update_params params; |
@@ -629,15 +644,8 @@ static int amdgpu_vm_update_pd_or_shadow(struct amdgpu_device *adev, | |||
629 | 644 | ||
630 | int r; | 645 | int r; |
631 | 646 | ||
632 | if (!pd) | ||
633 | return 0; | ||
634 | |||
635 | r = amdgpu_ttm_bind(&pd->tbo, &pd->tbo.mem); | ||
636 | if (r) | ||
637 | return r; | ||
638 | |||
639 | pd_addr = amdgpu_bo_gpu_offset(pd); | ||
640 | ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); | 647 | ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); |
648 | shadow = vm->page_directory->shadow; | ||
641 | 649 | ||
642 | /* padding, etc. */ | 650 | /* padding, etc. */ |
643 | ndw = 64; | 651 | ndw = 64; |
@@ -645,6 +653,17 @@ static int amdgpu_vm_update_pd_or_shadow(struct amdgpu_device *adev, | |||
645 | /* assume the worst case */ | 653 | /* assume the worst case */ |
646 | ndw += vm->max_pde_used * 6; | 654 | ndw += vm->max_pde_used * 6; |
647 | 655 | ||
656 | pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); | ||
657 | if (shadow) { | ||
658 | r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); | ||
659 | if (r) | ||
660 | return r; | ||
661 | shadow_addr = amdgpu_bo_gpu_offset(shadow); | ||
662 | ndw *= 2; | ||
663 | } else { | ||
664 | shadow_addr = 0; | ||
665 | } | ||
666 | |||
648 | r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); | 667 | r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); |
649 | if (r) | 668 | if (r) |
650 | return r; | 669 | return r; |
@@ -655,30 +674,26 @@ static int amdgpu_vm_update_pd_or_shadow(struct amdgpu_device *adev, | |||
655 | 674 | ||
656 | /* walk over the address space and update the page directory */ | 675 | /* walk over the address space and update the page directory */ |
657 | for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { | 676 | for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { |
658 | struct amdgpu_bo *bo = vm->page_tables[pt_idx].entry.robj; | 677 | struct amdgpu_bo *bo = vm->page_tables[pt_idx].bo; |
659 | uint64_t pde, pt; | 678 | uint64_t pde, pt; |
660 | 679 | ||
661 | if (bo == NULL) | 680 | if (bo == NULL) |
662 | continue; | 681 | continue; |
663 | 682 | ||
664 | if (bo->shadow) { | 683 | if (bo->shadow) { |
665 | struct amdgpu_bo *shadow = bo->shadow; | 684 | struct amdgpu_bo *pt_shadow = bo->shadow; |
666 | 685 | ||
667 | r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); | 686 | r = amdgpu_ttm_bind(&pt_shadow->tbo, |
687 | &pt_shadow->tbo.mem); | ||
668 | if (r) | 688 | if (r) |
669 | return r; | 689 | return r; |
670 | } | 690 | } |
671 | 691 | ||
672 | pt = amdgpu_bo_gpu_offset(bo); | 692 | pt = amdgpu_bo_gpu_offset(bo); |
673 | if (!shadow) { | 693 | if (vm->page_tables[pt_idx].addr == pt) |
674 | if (vm->page_tables[pt_idx].addr == pt) | 694 | continue; |
675 | continue; | 695 | |
676 | vm->page_tables[pt_idx].addr = pt; | 696 | vm->page_tables[pt_idx].addr = pt; |
677 | } else { | ||
678 | if (vm->page_tables[pt_idx].shadow_addr == pt) | ||
679 | continue; | ||
680 | vm->page_tables[pt_idx].shadow_addr = pt; | ||
681 | } | ||
682 | 697 | ||
683 | pde = pd_addr + pt_idx * 8; | 698 | pde = pd_addr + pt_idx * 8; |
684 | if (((last_pde + 8 * count) != pde) || | 699 | if (((last_pde + 8 * count) != pde) || |
@@ -686,6 +701,13 @@ static int amdgpu_vm_update_pd_or_shadow(struct amdgpu_device *adev, | |||
686 | (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { | 701 | (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { |
687 | 702 | ||
688 | if (count) { | 703 | if (count) { |
704 | if (shadow) | ||
705 | amdgpu_vm_do_set_ptes(¶ms, | ||
706 | last_shadow, | ||
707 | last_pt, count, | ||
708 | incr, | ||
709 | AMDGPU_PTE_VALID); | ||
710 | |||
689 | amdgpu_vm_do_set_ptes(¶ms, last_pde, | 711 | amdgpu_vm_do_set_ptes(¶ms, last_pde, |
690 | last_pt, count, incr, | 712 | last_pt, count, incr, |
691 | AMDGPU_PTE_VALID); | 713 | AMDGPU_PTE_VALID); |
@@ -693,34 +715,44 @@ static int amdgpu_vm_update_pd_or_shadow(struct amdgpu_device *adev, | |||
693 | 715 | ||
694 | count = 1; | 716 | count = 1; |
695 | last_pde = pde; | 717 | last_pde = pde; |
718 | last_shadow = shadow_addr + pt_idx * 8; | ||
696 | last_pt = pt; | 719 | last_pt = pt; |
697 | } else { | 720 | } else { |
698 | ++count; | 721 | ++count; |
699 | } | 722 | } |
700 | } | 723 | } |
701 | 724 | ||
702 | if (count) | 725 | if (count) { |
726 | if (vm->page_directory->shadow) | ||
727 | amdgpu_vm_do_set_ptes(¶ms, last_shadow, last_pt, | ||
728 | count, incr, AMDGPU_PTE_VALID); | ||
729 | |||
703 | amdgpu_vm_do_set_ptes(¶ms, last_pde, last_pt, | 730 | amdgpu_vm_do_set_ptes(¶ms, last_pde, last_pt, |
704 | count, incr, AMDGPU_PTE_VALID); | 731 | count, incr, AMDGPU_PTE_VALID); |
732 | } | ||
705 | 733 | ||
706 | if (params.ib->length_dw != 0) { | 734 | if (params.ib->length_dw == 0) { |
707 | amdgpu_ring_pad_ib(ring, params.ib); | 735 | amdgpu_job_free(job); |
708 | amdgpu_sync_resv(adev, &job->sync, pd->tbo.resv, | 736 | return 0; |
737 | } | ||
738 | |||
739 | amdgpu_ring_pad_ib(ring, params.ib); | ||
740 | amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv, | ||
741 | AMDGPU_FENCE_OWNER_VM); | ||
742 | if (shadow) | ||
743 | amdgpu_sync_resv(adev, &job->sync, shadow->tbo.resv, | ||
709 | AMDGPU_FENCE_OWNER_VM); | 744 | AMDGPU_FENCE_OWNER_VM); |
710 | WARN_ON(params.ib->length_dw > ndw); | ||
711 | r = amdgpu_job_submit(job, ring, &vm->entity, | ||
712 | AMDGPU_FENCE_OWNER_VM, &fence); | ||
713 | if (r) | ||
714 | goto error_free; | ||
715 | 745 | ||
716 | amdgpu_bo_fence(pd, fence, true); | 746 | WARN_ON(params.ib->length_dw > ndw); |
717 | fence_put(vm->page_directory_fence); | 747 | r = amdgpu_job_submit(job, ring, &vm->entity, |
718 | vm->page_directory_fence = fence_get(fence); | 748 | AMDGPU_FENCE_OWNER_VM, &fence); |
719 | fence_put(fence); | 749 | if (r) |
750 | goto error_free; | ||
720 | 751 | ||
721 | } else { | 752 | amdgpu_bo_fence(vm->page_directory, fence, true); |
722 | amdgpu_job_free(job); | 753 | fence_put(vm->page_directory_fence); |
723 | } | 754 | vm->page_directory_fence = fence_get(fence); |
755 | fence_put(fence); | ||
724 | 756 | ||
725 | return 0; | 757 | return 0; |
726 | 758 | ||
@@ -729,29 +761,6 @@ error_free: | |||
729 | return r; | 761 | return r; |
730 | } | 762 | } |
731 | 763 | ||
732 | /* | ||
733 | * amdgpu_vm_update_pdes - make sure that page directory is valid | ||
734 | * | ||
735 | * @adev: amdgpu_device pointer | ||
736 | * @vm: requested vm | ||
737 | * @start: start of GPU address range | ||
738 | * @end: end of GPU address range | ||
739 | * | ||
740 | * Allocates new page tables if necessary | ||
741 | * and updates the page directory. | ||
742 | * Returns 0 for success, error for failure. | ||
743 | */ | ||
744 | int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, | ||
745 | struct amdgpu_vm *vm) | ||
746 | { | ||
747 | int r; | ||
748 | |||
749 | r = amdgpu_vm_update_pd_or_shadow(adev, vm, true); | ||
750 | if (r) | ||
751 | return r; | ||
752 | return amdgpu_vm_update_pd_or_shadow(adev, vm, false); | ||
753 | } | ||
754 | |||
755 | /** | 764 | /** |
756 | * amdgpu_vm_update_ptes - make sure that page tables are valid | 765 | * amdgpu_vm_update_ptes - make sure that page tables are valid |
757 | * | 766 | * |
@@ -781,11 +790,11 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, | |||
781 | /* initialize the variables */ | 790 | /* initialize the variables */ |
782 | addr = start; | 791 | addr = start; |
783 | pt_idx = addr >> amdgpu_vm_block_size; | 792 | pt_idx = addr >> amdgpu_vm_block_size; |
784 | pt = vm->page_tables[pt_idx].entry.robj; | 793 | pt = vm->page_tables[pt_idx].bo; |
785 | if (params->shadow) { | 794 | if (params->shadow) { |
786 | if (!pt->shadow) | 795 | if (!pt->shadow) |
787 | return; | 796 | return; |
788 | pt = vm->page_tables[pt_idx].entry.robj->shadow; | 797 | pt = pt->shadow; |
789 | } | 798 | } |
790 | if ((addr & ~mask) == (end & ~mask)) | 799 | if ((addr & ~mask) == (end & ~mask)) |
791 | nptes = end - addr; | 800 | nptes = end - addr; |
@@ -804,11 +813,11 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, | |||
804 | /* walk over the address space and update the page tables */ | 813 | /* walk over the address space and update the page tables */ |
805 | while (addr < end) { | 814 | while (addr < end) { |
806 | pt_idx = addr >> amdgpu_vm_block_size; | 815 | pt_idx = addr >> amdgpu_vm_block_size; |
807 | pt = vm->page_tables[pt_idx].entry.robj; | 816 | pt = vm->page_tables[pt_idx].bo; |
808 | if (params->shadow) { | 817 | if (params->shadow) { |
809 | if (!pt->shadow) | 818 | if (!pt->shadow) |
810 | return; | 819 | return; |
811 | pt = vm->page_tables[pt_idx].entry.robj->shadow; | 820 | pt = pt->shadow; |
812 | } | 821 | } |
813 | 822 | ||
814 | if ((addr & ~mask) == (end & ~mask)) | 823 | if ((addr & ~mask) == (end & ~mask)) |
@@ -1065,8 +1074,8 @@ error_free: | |||
1065 | * @pages_addr: DMA addresses to use for mapping | 1074 | * @pages_addr: DMA addresses to use for mapping |
1066 | * @vm: requested vm | 1075 | * @vm: requested vm |
1067 | * @mapping: mapped range and flags to use for the update | 1076 | * @mapping: mapped range and flags to use for the update |
1068 | * @addr: addr to set the area to | ||
1069 | * @flags: HW flags for the mapping | 1077 | * @flags: HW flags for the mapping |
1078 | * @nodes: array of drm_mm_nodes with the MC addresses | ||
1070 | * @fence: optional resulting fence | 1079 | * @fence: optional resulting fence |
1071 | * | 1080 | * |
1072 | * Split the mapping into smaller chunks so that each update fits | 1081 | * Split the mapping into smaller chunks so that each update fits |
@@ -1079,12 +1088,11 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, | |||
1079 | dma_addr_t *pages_addr, | 1088 | dma_addr_t *pages_addr, |
1080 | struct amdgpu_vm *vm, | 1089 | struct amdgpu_vm *vm, |
1081 | struct amdgpu_bo_va_mapping *mapping, | 1090 | struct amdgpu_bo_va_mapping *mapping, |
1082 | uint32_t flags, uint64_t addr, | 1091 | uint32_t flags, |
1092 | struct drm_mm_node *nodes, | ||
1083 | struct fence **fence) | 1093 | struct fence **fence) |
1084 | { | 1094 | { |
1085 | const uint64_t max_size = 64ULL * 1024ULL * 1024ULL / AMDGPU_GPU_PAGE_SIZE; | 1095 | uint64_t pfn, src = 0, start = mapping->it.start; |
1086 | |||
1087 | uint64_t src = 0, start = mapping->it.start; | ||
1088 | int r; | 1096 | int r; |
1089 | 1097 | ||
1090 | /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here | 1098 | /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here |
@@ -1097,23 +1105,40 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, | |||
1097 | 1105 | ||
1098 | trace_amdgpu_vm_bo_update(mapping); | 1106 | trace_amdgpu_vm_bo_update(mapping); |
1099 | 1107 | ||
1100 | if (pages_addr) { | 1108 | pfn = mapping->offset >> PAGE_SHIFT; |
1101 | if (flags == gtt_flags) | 1109 | if (nodes) { |
1102 | src = adev->gart.table_addr + (addr >> 12) * 8; | 1110 | while (pfn >= nodes->size) { |
1103 | addr = 0; | 1111 | pfn -= nodes->size; |
1112 | ++nodes; | ||
1113 | } | ||
1104 | } | 1114 | } |
1105 | addr += mapping->offset; | ||
1106 | 1115 | ||
1107 | if (!pages_addr || src) | 1116 | do { |
1108 | return amdgpu_vm_bo_update_mapping(adev, exclusive, | 1117 | uint64_t max_entries; |
1109 | src, pages_addr, vm, | 1118 | uint64_t addr, last; |
1110 | start, mapping->it.last, | 1119 | |
1111 | flags, addr, fence); | 1120 | if (nodes) { |
1121 | addr = nodes->start << PAGE_SHIFT; | ||
1122 | max_entries = (nodes->size - pfn) * | ||
1123 | (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); | ||
1124 | } else { | ||
1125 | addr = 0; | ||
1126 | max_entries = S64_MAX; | ||
1127 | } | ||
1112 | 1128 | ||
1113 | while (start != mapping->it.last + 1) { | 1129 | if (pages_addr) { |
1114 | uint64_t last; | 1130 | if (flags == gtt_flags) |
1131 | src = adev->gart.table_addr + | ||
1132 | (addr >> AMDGPU_GPU_PAGE_SHIFT) * 8; | ||
1133 | else | ||
1134 | max_entries = min(max_entries, 16ull * 1024ull); | ||
1135 | addr = 0; | ||
1136 | } else if (flags & AMDGPU_PTE_VALID) { | ||
1137 | addr += adev->vm_manager.vram_base_offset; | ||
1138 | } | ||
1139 | addr += pfn << PAGE_SHIFT; | ||
1115 | 1140 | ||
1116 | last = min((uint64_t)mapping->it.last, start + max_size - 1); | 1141 | last = min((uint64_t)mapping->it.last, start + max_entries - 1); |
1117 | r = amdgpu_vm_bo_update_mapping(adev, exclusive, | 1142 | r = amdgpu_vm_bo_update_mapping(adev, exclusive, |
1118 | src, pages_addr, vm, | 1143 | src, pages_addr, vm, |
1119 | start, last, flags, addr, | 1144 | start, last, flags, addr, |
@@ -1121,9 +1146,14 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, | |||
1121 | if (r) | 1146 | if (r) |
1122 | return r; | 1147 | return r; |
1123 | 1148 | ||
1149 | pfn += last - start + 1; | ||
1150 | if (nodes && nodes->size == pfn) { | ||
1151 | pfn = 0; | ||
1152 | ++nodes; | ||
1153 | } | ||
1124 | start = last + 1; | 1154 | start = last + 1; |
1125 | addr += max_size * AMDGPU_GPU_PAGE_SIZE; | 1155 | |
1126 | } | 1156 | } while (unlikely(start != mapping->it.last + 1)); |
1127 | 1157 | ||
1128 | return 0; | 1158 | return 0; |
1129 | } | 1159 | } |
@@ -1147,40 +1177,30 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, | |||
1147 | dma_addr_t *pages_addr = NULL; | 1177 | dma_addr_t *pages_addr = NULL; |
1148 | uint32_t gtt_flags, flags; | 1178 | uint32_t gtt_flags, flags; |
1149 | struct ttm_mem_reg *mem; | 1179 | struct ttm_mem_reg *mem; |
1180 | struct drm_mm_node *nodes; | ||
1150 | struct fence *exclusive; | 1181 | struct fence *exclusive; |
1151 | uint64_t addr; | ||
1152 | int r; | 1182 | int r; |
1153 | 1183 | ||
1154 | if (clear) { | 1184 | if (clear) { |
1155 | mem = NULL; | 1185 | mem = NULL; |
1156 | addr = 0; | 1186 | nodes = NULL; |
1157 | exclusive = NULL; | 1187 | exclusive = NULL; |
1158 | } else { | 1188 | } else { |
1159 | struct ttm_dma_tt *ttm; | 1189 | struct ttm_dma_tt *ttm; |
1160 | 1190 | ||
1161 | mem = &bo_va->bo->tbo.mem; | 1191 | mem = &bo_va->bo->tbo.mem; |
1162 | addr = (u64)mem->start << PAGE_SHIFT; | 1192 | nodes = mem->mm_node; |
1163 | switch (mem->mem_type) { | 1193 | if (mem->mem_type == TTM_PL_TT) { |
1164 | case TTM_PL_TT: | ||
1165 | ttm = container_of(bo_va->bo->tbo.ttm, struct | 1194 | ttm = container_of(bo_va->bo->tbo.ttm, struct |
1166 | ttm_dma_tt, ttm); | 1195 | ttm_dma_tt, ttm); |
1167 | pages_addr = ttm->dma_address; | 1196 | pages_addr = ttm->dma_address; |
1168 | break; | ||
1169 | |||
1170 | case TTM_PL_VRAM: | ||
1171 | addr += adev->vm_manager.vram_base_offset; | ||
1172 | break; | ||
1173 | |||
1174 | default: | ||
1175 | break; | ||
1176 | } | 1197 | } |
1177 | |||
1178 | exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv); | 1198 | exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv); |
1179 | } | 1199 | } |
1180 | 1200 | ||
1181 | flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); | 1201 | flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); |
1182 | gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) && | 1202 | gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) && |
1183 | adev == bo_va->bo->adev) ? flags : 0; | 1203 | adev == amdgpu_ttm_adev(bo_va->bo->tbo.bdev)) ? flags : 0; |
1184 | 1204 | ||
1185 | spin_lock(&vm->status_lock); | 1205 | spin_lock(&vm->status_lock); |
1186 | if (!list_empty(&bo_va->vm_status)) | 1206 | if (!list_empty(&bo_va->vm_status)) |
@@ -1190,7 +1210,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, | |||
1190 | list_for_each_entry(mapping, &bo_va->invalids, list) { | 1210 | list_for_each_entry(mapping, &bo_va->invalids, list) { |
1191 | r = amdgpu_vm_bo_split_mapping(adev, exclusive, | 1211 | r = amdgpu_vm_bo_split_mapping(adev, exclusive, |
1192 | gtt_flags, pages_addr, vm, | 1212 | gtt_flags, pages_addr, vm, |
1193 | mapping, flags, addr, | 1213 | mapping, flags, nodes, |
1194 | &bo_va->last_pt_update); | 1214 | &bo_va->last_pt_update); |
1195 | if (r) | 1215 | if (r) |
1196 | return r; | 1216 | return r; |
@@ -1405,18 +1425,17 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, | |||
1405 | /* walk over the address space and allocate the page tables */ | 1425 | /* walk over the address space and allocate the page tables */ |
1406 | for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) { | 1426 | for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) { |
1407 | struct reservation_object *resv = vm->page_directory->tbo.resv; | 1427 | struct reservation_object *resv = vm->page_directory->tbo.resv; |
1408 | struct amdgpu_bo_list_entry *entry; | ||
1409 | struct amdgpu_bo *pt; | 1428 | struct amdgpu_bo *pt; |
1410 | 1429 | ||
1411 | entry = &vm->page_tables[pt_idx].entry; | 1430 | if (vm->page_tables[pt_idx].bo) |
1412 | if (entry->robj) | ||
1413 | continue; | 1431 | continue; |
1414 | 1432 | ||
1415 | r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8, | 1433 | r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8, |
1416 | AMDGPU_GPU_PAGE_SIZE, true, | 1434 | AMDGPU_GPU_PAGE_SIZE, true, |
1417 | AMDGPU_GEM_DOMAIN_VRAM, | 1435 | AMDGPU_GEM_DOMAIN_VRAM, |
1418 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS | | 1436 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS | |
1419 | AMDGPU_GEM_CREATE_SHADOW, | 1437 | AMDGPU_GEM_CREATE_SHADOW | |
1438 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, | ||
1420 | NULL, resv, &pt); | 1439 | NULL, resv, &pt); |
1421 | if (r) | 1440 | if (r) |
1422 | goto error_free; | 1441 | goto error_free; |
@@ -1442,11 +1461,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, | |||
1442 | } | 1461 | } |
1443 | } | 1462 | } |
1444 | 1463 | ||
1445 | entry->robj = pt; | 1464 | vm->page_tables[pt_idx].bo = pt; |
1446 | entry->priority = 0; | ||
1447 | entry->tv.bo = &entry->robj->tbo; | ||
1448 | entry->tv.shared = true; | ||
1449 | entry->user_pages = NULL; | ||
1450 | vm->page_tables[pt_idx].addr = 0; | 1465 | vm->page_tables[pt_idx].addr = 0; |
1451 | } | 1466 | } |
1452 | 1467 | ||
@@ -1626,7 +1641,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
1626 | r = amdgpu_bo_create(adev, pd_size, align, true, | 1641 | r = amdgpu_bo_create(adev, pd_size, align, true, |
1627 | AMDGPU_GEM_DOMAIN_VRAM, | 1642 | AMDGPU_GEM_DOMAIN_VRAM, |
1628 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS | | 1643 | AMDGPU_GEM_CREATE_NO_CPU_ACCESS | |
1629 | AMDGPU_GEM_CREATE_SHADOW, | 1644 | AMDGPU_GEM_CREATE_SHADOW | |
1645 | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, | ||
1630 | NULL, NULL, &vm->page_directory); | 1646 | NULL, NULL, &vm->page_directory); |
1631 | if (r) | 1647 | if (r) |
1632 | goto error_free_sched_entity; | 1648 | goto error_free_sched_entity; |
@@ -1697,7 +1713,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
1697 | } | 1713 | } |
1698 | 1714 | ||
1699 | for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) { | 1715 | for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) { |
1700 | struct amdgpu_bo *pt = vm->page_tables[i].entry.robj; | 1716 | struct amdgpu_bo *pt = vm->page_tables[i].bo; |
1701 | 1717 | ||
1702 | if (!pt) | 1718 | if (!pt) |
1703 | continue; | 1719 | continue; |