diff options
author | Dave Airlie <airlied@redhat.com> | 2017-09-27 18:37:02 -0400 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2017-09-27 18:37:02 -0400 |
commit | 754270c7c56292e97d0eff924a5d5d83f92add07 (patch) | |
tree | 8ee52859dbc5e1712b22a0bcb73cadf01d9d0688 /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |
parent | 9afafdbfbf5e8fca4dabd817939b61f1e766e64c (diff) | |
parent | 6f87a895709eecc1542fe947e349364ad061ac00 (diff) |
Merge branch 'drm-next-4.15' of git://people.freedesktop.org/~agd5f/linux into drm-next
First feature pull for 4.15. Highlights:
- Per VM BO support
- Lots of powerplay cleanups
- Powerplay support for CI
- pasid mgr for kfd
- interrupt infrastructure for recoverable page faults
- SR-IOV fixes
- initial GPU reset for vega10
- prime mmap support
- ttm page table debugging improvements
- lots of bug fixes
* 'drm-next-4.15' of git://people.freedesktop.org/~agd5f/linux: (232 commits)
drm/amdgpu: clarify license in amdgpu_trace_points.c
drm/amdgpu: Add gem_prime_mmap support
drm/amd/powerplay: delete dead code in smumgr
drm/amd/powerplay: delete SMUM_FIELD_MASK
drm/amd/powerplay: delete SMUM_WAIT_INDIRECT_FIELD
drm/amd/powerplay: delete SMUM_READ_FIELD
drm/amd/powerplay: delete SMUM_SET_FIELD
drm/amd/powerplay: delete SMUM_READ_VFPF_INDIRECT_FIELD
drm/amd/powerplay: delete SMUM_WRITE_VFPF_INDIRECT_FIELD
drm/amd/powerplay: delete SMUM_WRITE_FIELD
drm/amd/powerplay: delete SMU_WRITE_INDIRECT_FIELD
drm/amd/powerplay: move macros to hwmgr.h
drm/amd/powerplay: move PHM_WAIT_VFPF_INDIRECT_FIELD to hwmgr.h
drm/amd/powerplay: move SMUM_WAIT_VFPF_INDIRECT_FIELD_UNEQUAL to hwmgr.h
drm/amd/powerplay: move SMUM_WAIT_INDIRECT_FIELD_UNEQUAL to hwmgr.h
drm/amd/powerplay: add new helper functions in hwmgr.h
drm/amd/powerplay: use SMU_IND_INDEX/DATA_11 pair
drm/amd/powerplay: refine powerplay code.
drm/amd/powerplay: delete dead code in hwmgr.h
drm/amd/powerplay: refine interface in struct pp_smumgr_func
...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 598 |
1 files changed, 382 insertions, 216 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index bd20ff018512..bbcc67038203 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |||
@@ -27,12 +27,59 @@ | |||
27 | */ | 27 | */ |
28 | #include <linux/dma-fence-array.h> | 28 | #include <linux/dma-fence-array.h> |
29 | #include <linux/interval_tree_generic.h> | 29 | #include <linux/interval_tree_generic.h> |
30 | #include <linux/idr.h> | ||
30 | #include <drm/drmP.h> | 31 | #include <drm/drmP.h> |
31 | #include <drm/amdgpu_drm.h> | 32 | #include <drm/amdgpu_drm.h> |
32 | #include "amdgpu.h" | 33 | #include "amdgpu.h" |
33 | #include "amdgpu_trace.h" | 34 | #include "amdgpu_trace.h" |
34 | 35 | ||
35 | /* | 36 | /* |
37 | * PASID manager | ||
38 | * | ||
39 | * PASIDs are global address space identifiers that can be shared | ||
40 | * between the GPU, an IOMMU and the driver. VMs on different devices | ||
41 | * may use the same PASID if they share the same address | ||
42 | * space. Therefore PASIDs are allocated using a global IDA. VMs are | ||
43 | * looked up from the PASID per amdgpu_device. | ||
44 | */ | ||
45 | static DEFINE_IDA(amdgpu_vm_pasid_ida); | ||
46 | |||
47 | /** | ||
48 | * amdgpu_vm_alloc_pasid - Allocate a PASID | ||
49 | * @bits: Maximum width of the PASID in bits, must be at least 1 | ||
50 | * | ||
51 | * Allocates a PASID of the given width while keeping smaller PASIDs | ||
52 | * available if possible. | ||
53 | * | ||
54 | * Returns a positive integer on success. Returns %-EINVAL if bits==0. | ||
55 | * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on | ||
56 | * memory allocation failure. | ||
57 | */ | ||
58 | int amdgpu_vm_alloc_pasid(unsigned int bits) | ||
59 | { | ||
60 | int pasid = -EINVAL; | ||
61 | |||
62 | for (bits = min(bits, 31U); bits > 0; bits--) { | ||
63 | pasid = ida_simple_get(&amdgpu_vm_pasid_ida, | ||
64 | 1U << (bits - 1), 1U << bits, | ||
65 | GFP_KERNEL); | ||
66 | if (pasid != -ENOSPC) | ||
67 | break; | ||
68 | } | ||
69 | |||
70 | return pasid; | ||
71 | } | ||
72 | |||
73 | /** | ||
74 | * amdgpu_vm_free_pasid - Free a PASID | ||
75 | * @pasid: PASID to free | ||
76 | */ | ||
77 | void amdgpu_vm_free_pasid(unsigned int pasid) | ||
78 | { | ||
79 | ida_simple_remove(&amdgpu_vm_pasid_ida, pasid); | ||
80 | } | ||
81 | |||
82 | /* | ||
36 | * GPUVM | 83 | * GPUVM |
37 | * GPUVM is similar to the legacy gart on older asics, however | 84 | * GPUVM is similar to the legacy gart on older asics, however |
38 | * rather than there being a single global gart table | 85 | * rather than there being a single global gart table |
@@ -140,7 +187,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, | |||
140 | struct list_head *validated, | 187 | struct list_head *validated, |
141 | struct amdgpu_bo_list_entry *entry) | 188 | struct amdgpu_bo_list_entry *entry) |
142 | { | 189 | { |
143 | entry->robj = vm->root.bo; | 190 | entry->robj = vm->root.base.bo; |
144 | entry->priority = 0; | 191 | entry->priority = 0; |
145 | entry->tv.bo = &entry->robj->tbo; | 192 | entry->tv.bo = &entry->robj->tbo; |
146 | entry->tv.shared = true; | 193 | entry->tv.shared = true; |
@@ -149,86 +196,80 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, | |||
149 | } | 196 | } |
150 | 197 | ||
151 | /** | 198 | /** |
152 | * amdgpu_vm_validate_layer - validate a single page table level | 199 | * amdgpu_vm_validate_pt_bos - validate the page table BOs |
153 | * | 200 | * |
154 | * @parent: parent page table level | 201 | * @adev: amdgpu device pointer |
202 | * @vm: vm providing the BOs | ||
155 | * @validate: callback to do the validation | 203 | * @validate: callback to do the validation |
156 | * @param: parameter for the validation callback | 204 | * @param: parameter for the validation callback |
157 | * | 205 | * |
158 | * Validate the page table BOs on command submission if neccessary. | 206 | * Validate the page table BOs on command submission if neccessary. |
159 | */ | 207 | */ |
160 | static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent, | 208 | int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
161 | int (*validate)(void *, struct amdgpu_bo *), | 209 | int (*validate)(void *p, struct amdgpu_bo *bo), |
162 | void *param, bool use_cpu_for_update, | 210 | void *param) |
163 | struct ttm_bo_global *glob) | ||
164 | { | 211 | { |
165 | unsigned i; | 212 | struct ttm_bo_global *glob = adev->mman.bdev.glob; |
166 | int r; | 213 | int r; |
167 | 214 | ||
168 | if (use_cpu_for_update) { | 215 | spin_lock(&vm->status_lock); |
169 | r = amdgpu_bo_kmap(parent->bo, NULL); | 216 | while (!list_empty(&vm->evicted)) { |
170 | if (r) | 217 | struct amdgpu_vm_bo_base *bo_base; |
171 | return r; | 218 | struct amdgpu_bo *bo; |
172 | } | ||
173 | |||
174 | if (!parent->entries) | ||
175 | return 0; | ||
176 | 219 | ||
177 | for (i = 0; i <= parent->last_entry_used; ++i) { | 220 | bo_base = list_first_entry(&vm->evicted, |
178 | struct amdgpu_vm_pt *entry = &parent->entries[i]; | 221 | struct amdgpu_vm_bo_base, |
222 | vm_status); | ||
223 | spin_unlock(&vm->status_lock); | ||
179 | 224 | ||
180 | if (!entry->bo) | 225 | bo = bo_base->bo; |
181 | continue; | 226 | BUG_ON(!bo); |
227 | if (bo->parent) { | ||
228 | r = validate(param, bo); | ||
229 | if (r) | ||
230 | return r; | ||
182 | 231 | ||
183 | r = validate(param, entry->bo); | 232 | spin_lock(&glob->lru_lock); |
184 | if (r) | 233 | ttm_bo_move_to_lru_tail(&bo->tbo); |
185 | return r; | 234 | if (bo->shadow) |
235 | ttm_bo_move_to_lru_tail(&bo->shadow->tbo); | ||
236 | spin_unlock(&glob->lru_lock); | ||
237 | } | ||
186 | 238 | ||
187 | spin_lock(&glob->lru_lock); | 239 | if (bo->tbo.type == ttm_bo_type_kernel && |
188 | ttm_bo_move_to_lru_tail(&entry->bo->tbo); | 240 | vm->use_cpu_for_update) { |
189 | if (entry->bo->shadow) | 241 | r = amdgpu_bo_kmap(bo, NULL); |
190 | ttm_bo_move_to_lru_tail(&entry->bo->shadow->tbo); | 242 | if (r) |
191 | spin_unlock(&glob->lru_lock); | 243 | return r; |
244 | } | ||
192 | 245 | ||
193 | /* | 246 | spin_lock(&vm->status_lock); |
194 | * Recurse into the sub directory. This is harmless because we | 247 | if (bo->tbo.type != ttm_bo_type_kernel) |
195 | * have only a maximum of 5 layers. | 248 | list_move(&bo_base->vm_status, &vm->moved); |
196 | */ | 249 | else |
197 | r = amdgpu_vm_validate_level(entry, validate, param, | 250 | list_move(&bo_base->vm_status, &vm->relocated); |
198 | use_cpu_for_update, glob); | ||
199 | if (r) | ||
200 | return r; | ||
201 | } | 251 | } |
252 | spin_unlock(&vm->status_lock); | ||
202 | 253 | ||
203 | return r; | 254 | return 0; |
204 | } | 255 | } |
205 | 256 | ||
206 | /** | 257 | /** |
207 | * amdgpu_vm_validate_pt_bos - validate the page table BOs | 258 | * amdgpu_vm_ready - check VM is ready for updates |
208 | * | 259 | * |
209 | * @adev: amdgpu device pointer | 260 | * @vm: VM to check |
210 | * @vm: vm providing the BOs | ||
211 | * @validate: callback to do the validation | ||
212 | * @param: parameter for the validation callback | ||
213 | * | 261 | * |
214 | * Validate the page table BOs on command submission if neccessary. | 262 | * Check if all VM PDs/PTs are ready for updates |
215 | */ | 263 | */ |
216 | int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, | 264 | bool amdgpu_vm_ready(struct amdgpu_vm *vm) |
217 | int (*validate)(void *p, struct amdgpu_bo *bo), | ||
218 | void *param) | ||
219 | { | 265 | { |
220 | uint64_t num_evictions; | 266 | bool ready; |
221 | 267 | ||
222 | /* We only need to validate the page tables | 268 | spin_lock(&vm->status_lock); |
223 | * if they aren't already valid. | 269 | ready = list_empty(&vm->evicted); |
224 | */ | 270 | spin_unlock(&vm->status_lock); |
225 | num_evictions = atomic64_read(&adev->num_evictions); | ||
226 | if (num_evictions == vm->last_eviction_counter) | ||
227 | return 0; | ||
228 | 271 | ||
229 | return amdgpu_vm_validate_level(&vm->root, validate, param, | 272 | return ready; |
230 | vm->use_cpu_for_update, | ||
231 | adev->mman.bdev.glob); | ||
232 | } | 273 | } |
233 | 274 | ||
234 | /** | 275 | /** |
@@ -294,11 +335,11 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, | |||
294 | 335 | ||
295 | /* walk over the address space and allocate the page tables */ | 336 | /* walk over the address space and allocate the page tables */ |
296 | for (pt_idx = from; pt_idx <= to; ++pt_idx) { | 337 | for (pt_idx = from; pt_idx <= to; ++pt_idx) { |
297 | struct reservation_object *resv = vm->root.bo->tbo.resv; | 338 | struct reservation_object *resv = vm->root.base.bo->tbo.resv; |
298 | struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; | 339 | struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; |
299 | struct amdgpu_bo *pt; | 340 | struct amdgpu_bo *pt; |
300 | 341 | ||
301 | if (!entry->bo) { | 342 | if (!entry->base.bo) { |
302 | r = amdgpu_bo_create(adev, | 343 | r = amdgpu_bo_create(adev, |
303 | amdgpu_vm_bo_size(adev, level), | 344 | amdgpu_vm_bo_size(adev, level), |
304 | AMDGPU_GPU_PAGE_SIZE, true, | 345 | AMDGPU_GPU_PAGE_SIZE, true, |
@@ -319,9 +360,14 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, | |||
319 | /* Keep a reference to the root directory to avoid | 360 | /* Keep a reference to the root directory to avoid |
320 | * freeing them up in the wrong order. | 361 | * freeing them up in the wrong order. |
321 | */ | 362 | */ |
322 | pt->parent = amdgpu_bo_ref(vm->root.bo); | 363 | pt->parent = amdgpu_bo_ref(parent->base.bo); |
323 | 364 | ||
324 | entry->bo = pt; | 365 | entry->base.vm = vm; |
366 | entry->base.bo = pt; | ||
367 | list_add_tail(&entry->base.bo_list, &pt->va); | ||
368 | spin_lock(&vm->status_lock); | ||
369 | list_add(&entry->base.vm_status, &vm->relocated); | ||
370 | spin_unlock(&vm->status_lock); | ||
325 | entry->addr = 0; | 371 | entry->addr = 0; |
326 | } | 372 | } |
327 | 373 | ||
@@ -988,7 +1034,7 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
988 | int r; | 1034 | int r; |
989 | 1035 | ||
990 | amdgpu_sync_create(&sync); | 1036 | amdgpu_sync_create(&sync); |
991 | amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.resv, owner); | 1037 | amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner); |
992 | r = amdgpu_sync_wait(&sync, true); | 1038 | r = amdgpu_sync_wait(&sync, true); |
993 | amdgpu_sync_free(&sync); | 1039 | amdgpu_sync_free(&sync); |
994 | 1040 | ||
@@ -1007,18 +1053,17 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
1007 | */ | 1053 | */ |
1008 | static int amdgpu_vm_update_level(struct amdgpu_device *adev, | 1054 | static int amdgpu_vm_update_level(struct amdgpu_device *adev, |
1009 | struct amdgpu_vm *vm, | 1055 | struct amdgpu_vm *vm, |
1010 | struct amdgpu_vm_pt *parent, | 1056 | struct amdgpu_vm_pt *parent) |
1011 | unsigned level) | ||
1012 | { | 1057 | { |
1013 | struct amdgpu_bo *shadow; | 1058 | struct amdgpu_bo *shadow; |
1014 | struct amdgpu_ring *ring = NULL; | 1059 | struct amdgpu_ring *ring = NULL; |
1015 | uint64_t pd_addr, shadow_addr = 0; | 1060 | uint64_t pd_addr, shadow_addr = 0; |
1016 | uint32_t incr = amdgpu_vm_bo_size(adev, level + 1); | ||
1017 | uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; | 1061 | uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; |
1018 | unsigned count = 0, pt_idx, ndw = 0; | 1062 | unsigned count = 0, pt_idx, ndw = 0; |
1019 | struct amdgpu_job *job; | 1063 | struct amdgpu_job *job; |
1020 | struct amdgpu_pte_update_params params; | 1064 | struct amdgpu_pte_update_params params; |
1021 | struct dma_fence *fence = NULL; | 1065 | struct dma_fence *fence = NULL; |
1066 | uint32_t incr; | ||
1022 | 1067 | ||
1023 | int r; | 1068 | int r; |
1024 | 1069 | ||
@@ -1027,10 +1072,10 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
1027 | 1072 | ||
1028 | memset(¶ms, 0, sizeof(params)); | 1073 | memset(¶ms, 0, sizeof(params)); |
1029 | params.adev = adev; | 1074 | params.adev = adev; |
1030 | shadow = parent->bo->shadow; | 1075 | shadow = parent->base.bo->shadow; |
1031 | 1076 | ||
1032 | if (vm->use_cpu_for_update) { | 1077 | if (vm->use_cpu_for_update) { |
1033 | pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo); | 1078 | pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); |
1034 | r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); | 1079 | r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); |
1035 | if (unlikely(r)) | 1080 | if (unlikely(r)) |
1036 | return r; | 1081 | return r; |
@@ -1046,7 +1091,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
1046 | /* assume the worst case */ | 1091 | /* assume the worst case */ |
1047 | ndw += parent->last_entry_used * 6; | 1092 | ndw += parent->last_entry_used * 6; |
1048 | 1093 | ||
1049 | pd_addr = amdgpu_bo_gpu_offset(parent->bo); | 1094 | pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); |
1050 | 1095 | ||
1051 | if (shadow) { | 1096 | if (shadow) { |
1052 | shadow_addr = amdgpu_bo_gpu_offset(shadow); | 1097 | shadow_addr = amdgpu_bo_gpu_offset(shadow); |
@@ -1066,12 +1111,17 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
1066 | 1111 | ||
1067 | /* walk over the address space and update the directory */ | 1112 | /* walk over the address space and update the directory */ |
1068 | for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { | 1113 | for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { |
1069 | struct amdgpu_bo *bo = parent->entries[pt_idx].bo; | 1114 | struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; |
1115 | struct amdgpu_bo *bo = entry->base.bo; | ||
1070 | uint64_t pde, pt; | 1116 | uint64_t pde, pt; |
1071 | 1117 | ||
1072 | if (bo == NULL) | 1118 | if (bo == NULL) |
1073 | continue; | 1119 | continue; |
1074 | 1120 | ||
1121 | spin_lock(&vm->status_lock); | ||
1122 | list_del_init(&entry->base.vm_status); | ||
1123 | spin_unlock(&vm->status_lock); | ||
1124 | |||
1075 | pt = amdgpu_bo_gpu_offset(bo); | 1125 | pt = amdgpu_bo_gpu_offset(bo); |
1076 | pt = amdgpu_gart_get_vm_pde(adev, pt); | 1126 | pt = amdgpu_gart_get_vm_pde(adev, pt); |
1077 | /* Don't update huge pages here */ | 1127 | /* Don't update huge pages here */ |
@@ -1082,6 +1132,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
1082 | parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID; | 1132 | parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID; |
1083 | 1133 | ||
1084 | pde = pd_addr + pt_idx * 8; | 1134 | pde = pd_addr + pt_idx * 8; |
1135 | incr = amdgpu_bo_size(bo); | ||
1085 | if (((last_pde + 8 * count) != pde) || | 1136 | if (((last_pde + 8 * count) != pde) || |
1086 | ((last_pt + incr * count) != pt) || | 1137 | ((last_pt + incr * count) != pt) || |
1087 | (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { | 1138 | (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { |
@@ -1109,7 +1160,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
1109 | } | 1160 | } |
1110 | 1161 | ||
1111 | if (count) { | 1162 | if (count) { |
1112 | if (vm->root.bo->shadow) | 1163 | if (vm->root.base.bo->shadow) |
1113 | params.func(¶ms, last_shadow, last_pt, | 1164 | params.func(¶ms, last_shadow, last_pt, |
1114 | count, incr, AMDGPU_PTE_VALID); | 1165 | count, incr, AMDGPU_PTE_VALID); |
1115 | 1166 | ||
@@ -1122,7 +1173,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
1122 | amdgpu_job_free(job); | 1173 | amdgpu_job_free(job); |
1123 | } else { | 1174 | } else { |
1124 | amdgpu_ring_pad_ib(ring, params.ib); | 1175 | amdgpu_ring_pad_ib(ring, params.ib); |
1125 | amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv, | 1176 | amdgpu_sync_resv(adev, &job->sync, |
1177 | parent->base.bo->tbo.resv, | ||
1126 | AMDGPU_FENCE_OWNER_VM); | 1178 | AMDGPU_FENCE_OWNER_VM); |
1127 | if (shadow) | 1179 | if (shadow) |
1128 | amdgpu_sync_resv(adev, &job->sync, | 1180 | amdgpu_sync_resv(adev, &job->sync, |
@@ -1135,26 +1187,11 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, | |||
1135 | if (r) | 1187 | if (r) |
1136 | goto error_free; | 1188 | goto error_free; |
1137 | 1189 | ||
1138 | amdgpu_bo_fence(parent->bo, fence, true); | 1190 | amdgpu_bo_fence(parent->base.bo, fence, true); |
1139 | dma_fence_put(vm->last_dir_update); | 1191 | dma_fence_put(vm->last_update); |
1140 | vm->last_dir_update = dma_fence_get(fence); | 1192 | vm->last_update = fence; |
1141 | dma_fence_put(fence); | ||
1142 | } | 1193 | } |
1143 | } | 1194 | } |
1144 | /* | ||
1145 | * Recurse into the subdirectories. This recursion is harmless because | ||
1146 | * we only have a maximum of 5 layers. | ||
1147 | */ | ||
1148 | for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { | ||
1149 | struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; | ||
1150 | |||
1151 | if (!entry->bo) | ||
1152 | continue; | ||
1153 | |||
1154 | r = amdgpu_vm_update_level(adev, vm, entry, level + 1); | ||
1155 | if (r) | ||
1156 | return r; | ||
1157 | } | ||
1158 | 1195 | ||
1159 | return 0; | 1196 | return 0; |
1160 | 1197 | ||
@@ -1170,7 +1207,8 @@ error_free: | |||
1170 | * | 1207 | * |
1171 | * Mark all PD level as invalid after an error. | 1208 | * Mark all PD level as invalid after an error. |
1172 | */ | 1209 | */ |
1173 | static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent) | 1210 | static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm, |
1211 | struct amdgpu_vm_pt *parent) | ||
1174 | { | 1212 | { |
1175 | unsigned pt_idx; | 1213 | unsigned pt_idx; |
1176 | 1214 | ||
@@ -1181,11 +1219,15 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent) | |||
1181 | for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { | 1219 | for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { |
1182 | struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; | 1220 | struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; |
1183 | 1221 | ||
1184 | if (!entry->bo) | 1222 | if (!entry->base.bo) |
1185 | continue; | 1223 | continue; |
1186 | 1224 | ||
1187 | entry->addr = ~0ULL; | 1225 | entry->addr = ~0ULL; |
1188 | amdgpu_vm_invalidate_level(entry); | 1226 | spin_lock(&vm->status_lock); |
1227 | if (list_empty(&entry->base.vm_status)) | ||
1228 | list_add(&entry->base.vm_status, &vm->relocated); | ||
1229 | spin_unlock(&vm->status_lock); | ||
1230 | amdgpu_vm_invalidate_level(vm, entry); | ||
1189 | } | 1231 | } |
1190 | } | 1232 | } |
1191 | 1233 | ||
@@ -1203,9 +1245,38 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, | |||
1203 | { | 1245 | { |
1204 | int r; | 1246 | int r; |
1205 | 1247 | ||
1206 | r = amdgpu_vm_update_level(adev, vm, &vm->root, 0); | 1248 | spin_lock(&vm->status_lock); |
1207 | if (r) | 1249 | while (!list_empty(&vm->relocated)) { |
1208 | amdgpu_vm_invalidate_level(&vm->root); | 1250 | struct amdgpu_vm_bo_base *bo_base; |
1251 | struct amdgpu_bo *bo; | ||
1252 | |||
1253 | bo_base = list_first_entry(&vm->relocated, | ||
1254 | struct amdgpu_vm_bo_base, | ||
1255 | vm_status); | ||
1256 | spin_unlock(&vm->status_lock); | ||
1257 | |||
1258 | bo = bo_base->bo->parent; | ||
1259 | if (bo) { | ||
1260 | struct amdgpu_vm_bo_base *parent; | ||
1261 | struct amdgpu_vm_pt *pt; | ||
1262 | |||
1263 | parent = list_first_entry(&bo->va, | ||
1264 | struct amdgpu_vm_bo_base, | ||
1265 | bo_list); | ||
1266 | pt = container_of(parent, struct amdgpu_vm_pt, base); | ||
1267 | |||
1268 | r = amdgpu_vm_update_level(adev, vm, pt); | ||
1269 | if (r) { | ||
1270 | amdgpu_vm_invalidate_level(vm, &vm->root); | ||
1271 | return r; | ||
1272 | } | ||
1273 | spin_lock(&vm->status_lock); | ||
1274 | } else { | ||
1275 | spin_lock(&vm->status_lock); | ||
1276 | list_del_init(&bo_base->vm_status); | ||
1277 | } | ||
1278 | } | ||
1279 | spin_unlock(&vm->status_lock); | ||
1209 | 1280 | ||
1210 | if (vm->use_cpu_for_update) { | 1281 | if (vm->use_cpu_for_update) { |
1211 | /* Flush HDP */ | 1282 | /* Flush HDP */ |
@@ -1236,7 +1307,7 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, | |||
1236 | *entry = &p->vm->root; | 1307 | *entry = &p->vm->root; |
1237 | while ((*entry)->entries) { | 1308 | while ((*entry)->entries) { |
1238 | idx = addr >> (p->adev->vm_manager.block_size * level--); | 1309 | idx = addr >> (p->adev->vm_manager.block_size * level--); |
1239 | idx %= amdgpu_bo_size((*entry)->bo) / 8; | 1310 | idx %= amdgpu_bo_size((*entry)->base.bo) / 8; |
1240 | *parent = *entry; | 1311 | *parent = *entry; |
1241 | *entry = &(*entry)->entries[idx]; | 1312 | *entry = &(*entry)->entries[idx]; |
1242 | } | 1313 | } |
@@ -1272,7 +1343,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, | |||
1272 | p->src || | 1343 | p->src || |
1273 | !(flags & AMDGPU_PTE_VALID)) { | 1344 | !(flags & AMDGPU_PTE_VALID)) { |
1274 | 1345 | ||
1275 | dst = amdgpu_bo_gpu_offset(entry->bo); | 1346 | dst = amdgpu_bo_gpu_offset(entry->base.bo); |
1276 | dst = amdgpu_gart_get_vm_pde(p->adev, dst); | 1347 | dst = amdgpu_gart_get_vm_pde(p->adev, dst); |
1277 | flags = AMDGPU_PTE_VALID; | 1348 | flags = AMDGPU_PTE_VALID; |
1278 | } else { | 1349 | } else { |
@@ -1298,18 +1369,18 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, | |||
1298 | tmp = p->pages_addr; | 1369 | tmp = p->pages_addr; |
1299 | p->pages_addr = NULL; | 1370 | p->pages_addr = NULL; |
1300 | 1371 | ||
1301 | pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo); | 1372 | pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); |
1302 | pde = pd_addr + (entry - parent->entries) * 8; | 1373 | pde = pd_addr + (entry - parent->entries) * 8; |
1303 | amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags); | 1374 | amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags); |
1304 | 1375 | ||
1305 | p->pages_addr = tmp; | 1376 | p->pages_addr = tmp; |
1306 | } else { | 1377 | } else { |
1307 | if (parent->bo->shadow) { | 1378 | if (parent->base.bo->shadow) { |
1308 | pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow); | 1379 | pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow); |
1309 | pde = pd_addr + (entry - parent->entries) * 8; | 1380 | pde = pd_addr + (entry - parent->entries) * 8; |
1310 | amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); | 1381 | amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); |
1311 | } | 1382 | } |
1312 | pd_addr = amdgpu_bo_gpu_offset(parent->bo); | 1383 | pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); |
1313 | pde = pd_addr + (entry - parent->entries) * 8; | 1384 | pde = pd_addr + (entry - parent->entries) * 8; |
1314 | amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); | 1385 | amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); |
1315 | } | 1386 | } |
@@ -1360,7 +1431,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, | |||
1360 | if (entry->addr & AMDGPU_PDE_PTE) | 1431 | if (entry->addr & AMDGPU_PDE_PTE) |
1361 | continue; | 1432 | continue; |
1362 | 1433 | ||
1363 | pt = entry->bo; | 1434 | pt = entry->base.bo; |
1364 | if (use_cpu_update) { | 1435 | if (use_cpu_update) { |
1365 | pe_start = (unsigned long)amdgpu_bo_kptr(pt); | 1436 | pe_start = (unsigned long)amdgpu_bo_kptr(pt); |
1366 | } else { | 1437 | } else { |
@@ -1396,8 +1467,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, | |||
1396 | uint64_t start, uint64_t end, | 1467 | uint64_t start, uint64_t end, |
1397 | uint64_t dst, uint64_t flags) | 1468 | uint64_t dst, uint64_t flags) |
1398 | { | 1469 | { |
1399 | int r; | ||
1400 | |||
1401 | /** | 1470 | /** |
1402 | * The MC L1 TLB supports variable sized pages, based on a fragment | 1471 | * The MC L1 TLB supports variable sized pages, based on a fragment |
1403 | * field in the PTE. When this field is set to a non-zero value, page | 1472 | * field in the PTE. When this field is set to a non-zero value, page |
@@ -1416,39 +1485,38 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, | |||
1416 | * Userspace can support this by aligning virtual base address and | 1485 | * Userspace can support this by aligning virtual base address and |
1417 | * allocation size to the fragment size. | 1486 | * allocation size to the fragment size. |
1418 | */ | 1487 | */ |
1419 | unsigned pages_per_frag = params->adev->vm_manager.fragment_size; | 1488 | unsigned max_frag = params->adev->vm_manager.fragment_size; |
1420 | uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag); | 1489 | int r; |
1421 | uint64_t frag_align = 1 << pages_per_frag; | ||
1422 | |||
1423 | uint64_t frag_start = ALIGN(start, frag_align); | ||
1424 | uint64_t frag_end = end & ~(frag_align - 1); | ||
1425 | 1490 | ||
1426 | /* system pages are non continuously */ | 1491 | /* system pages are non continuously */ |
1427 | if (params->src || !(flags & AMDGPU_PTE_VALID) || | 1492 | if (params->src || !(flags & AMDGPU_PTE_VALID)) |
1428 | (frag_start >= frag_end)) | ||
1429 | return amdgpu_vm_update_ptes(params, start, end, dst, flags); | 1493 | return amdgpu_vm_update_ptes(params, start, end, dst, flags); |
1430 | 1494 | ||
1431 | /* handle the 4K area at the beginning */ | 1495 | while (start != end) { |
1432 | if (start != frag_start) { | 1496 | uint64_t frag_flags, frag_end; |
1433 | r = amdgpu_vm_update_ptes(params, start, frag_start, | 1497 | unsigned frag; |
1434 | dst, flags); | 1498 | |
1499 | /* This intentionally wraps around if no bit is set */ | ||
1500 | frag = min((unsigned)ffs(start) - 1, | ||
1501 | (unsigned)fls64(end - start) - 1); | ||
1502 | if (frag >= max_frag) { | ||
1503 | frag_flags = AMDGPU_PTE_FRAG(max_frag); | ||
1504 | frag_end = end & ~((1ULL << max_frag) - 1); | ||
1505 | } else { | ||
1506 | frag_flags = AMDGPU_PTE_FRAG(frag); | ||
1507 | frag_end = start + (1 << frag); | ||
1508 | } | ||
1509 | |||
1510 | r = amdgpu_vm_update_ptes(params, start, frag_end, dst, | ||
1511 | flags | frag_flags); | ||
1435 | if (r) | 1512 | if (r) |
1436 | return r; | 1513 | return r; |
1437 | dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE; | ||
1438 | } | ||
1439 | 1514 | ||
1440 | /* handle the area in the middle */ | 1515 | dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE; |
1441 | r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst, | 1516 | start = frag_end; |
1442 | flags | frag_flags); | ||
1443 | if (r) | ||
1444 | return r; | ||
1445 | |||
1446 | /* handle the 4K area at the end */ | ||
1447 | if (frag_end != end) { | ||
1448 | dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE; | ||
1449 | r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags); | ||
1450 | } | 1517 | } |
1451 | return r; | 1518 | |
1519 | return 0; | ||
1452 | } | 1520 | } |
1453 | 1521 | ||
1454 | /** | 1522 | /** |
@@ -1456,7 +1524,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, | |||
1456 | * | 1524 | * |
1457 | * @adev: amdgpu_device pointer | 1525 | * @adev: amdgpu_device pointer |
1458 | * @exclusive: fence we need to sync to | 1526 | * @exclusive: fence we need to sync to |
1459 | * @src: address where to copy page table entries from | ||
1460 | * @pages_addr: DMA addresses to use for mapping | 1527 | * @pages_addr: DMA addresses to use for mapping |
1461 | * @vm: requested vm | 1528 | * @vm: requested vm |
1462 | * @start: start of mapped range | 1529 | * @start: start of mapped range |
@@ -1470,7 +1537,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, | |||
1470 | */ | 1537 | */ |
1471 | static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | 1538 | static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, |
1472 | struct dma_fence *exclusive, | 1539 | struct dma_fence *exclusive, |
1473 | uint64_t src, | ||
1474 | dma_addr_t *pages_addr, | 1540 | dma_addr_t *pages_addr, |
1475 | struct amdgpu_vm *vm, | 1541 | struct amdgpu_vm *vm, |
1476 | uint64_t start, uint64_t last, | 1542 | uint64_t start, uint64_t last, |
@@ -1488,7 +1554,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
1488 | memset(¶ms, 0, sizeof(params)); | 1554 | memset(¶ms, 0, sizeof(params)); |
1489 | params.adev = adev; | 1555 | params.adev = adev; |
1490 | params.vm = vm; | 1556 | params.vm = vm; |
1491 | params.src = src; | ||
1492 | 1557 | ||
1493 | /* sync to everything on unmapping */ | 1558 | /* sync to everything on unmapping */ |
1494 | if (!(flags & AMDGPU_PTE_VALID)) | 1559 | if (!(flags & AMDGPU_PTE_VALID)) |
@@ -1517,10 +1582,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
1517 | nptes = last - start + 1; | 1582 | nptes = last - start + 1; |
1518 | 1583 | ||
1519 | /* | 1584 | /* |
1520 | * reserve space for one command every (1 << BLOCK_SIZE) | 1585 | * reserve space for two commands every (1 << BLOCK_SIZE) |
1521 | * entries or 2k dwords (whatever is smaller) | 1586 | * entries or 2k dwords (whatever is smaller) |
1587 | * | ||
1588 | * The second command is for the shadow pagetables. | ||
1522 | */ | 1589 | */ |
1523 | ncmds = (nptes >> min(adev->vm_manager.block_size, 11u)) + 1; | 1590 | ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2; |
1524 | 1591 | ||
1525 | /* padding, etc. */ | 1592 | /* padding, etc. */ |
1526 | ndw = 64; | 1593 | ndw = 64; |
@@ -1528,15 +1595,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
1528 | /* one PDE write for each huge page */ | 1595 | /* one PDE write for each huge page */ |
1529 | ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6; | 1596 | ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6; |
1530 | 1597 | ||
1531 | if (src) { | 1598 | if (pages_addr) { |
1532 | /* only copy commands needed */ | ||
1533 | ndw += ncmds * 7; | ||
1534 | |||
1535 | params.func = amdgpu_vm_do_copy_ptes; | ||
1536 | |||
1537 | } else if (pages_addr) { | ||
1538 | /* copy commands needed */ | 1599 | /* copy commands needed */ |
1539 | ndw += ncmds * 7; | 1600 | ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw; |
1540 | 1601 | ||
1541 | /* and also PTEs */ | 1602 | /* and also PTEs */ |
1542 | ndw += nptes * 2; | 1603 | ndw += nptes * 2; |
@@ -1545,10 +1606,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
1545 | 1606 | ||
1546 | } else { | 1607 | } else { |
1547 | /* set page commands needed */ | 1608 | /* set page commands needed */ |
1548 | ndw += ncmds * 10; | 1609 | ndw += ncmds * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw; |
1549 | 1610 | ||
1550 | /* two extra commands for begin/end of fragment */ | 1611 | /* extra commands for begin/end fragments */ |
1551 | ndw += 2 * 10; | 1612 | ndw += 2 * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw |
1613 | * adev->vm_manager.fragment_size; | ||
1552 | 1614 | ||
1553 | params.func = amdgpu_vm_do_set_ptes; | 1615 | params.func = amdgpu_vm_do_set_ptes; |
1554 | } | 1616 | } |
@@ -1559,7 +1621,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
1559 | 1621 | ||
1560 | params.ib = &job->ibs[0]; | 1622 | params.ib = &job->ibs[0]; |
1561 | 1623 | ||
1562 | if (!src && pages_addr) { | 1624 | if (pages_addr) { |
1563 | uint64_t *pte; | 1625 | uint64_t *pte; |
1564 | unsigned i; | 1626 | unsigned i; |
1565 | 1627 | ||
@@ -1580,12 +1642,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
1580 | if (r) | 1642 | if (r) |
1581 | goto error_free; | 1643 | goto error_free; |
1582 | 1644 | ||
1583 | r = amdgpu_sync_resv(adev, &job->sync, vm->root.bo->tbo.resv, | 1645 | r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv, |
1584 | owner); | 1646 | owner); |
1585 | if (r) | 1647 | if (r) |
1586 | goto error_free; | 1648 | goto error_free; |
1587 | 1649 | ||
1588 | r = reservation_object_reserve_shared(vm->root.bo->tbo.resv); | 1650 | r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); |
1589 | if (r) | 1651 | if (r) |
1590 | goto error_free; | 1652 | goto error_free; |
1591 | 1653 | ||
@@ -1600,14 +1662,14 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
1600 | if (r) | 1662 | if (r) |
1601 | goto error_free; | 1663 | goto error_free; |
1602 | 1664 | ||
1603 | amdgpu_bo_fence(vm->root.bo, f, true); | 1665 | amdgpu_bo_fence(vm->root.base.bo, f, true); |
1604 | dma_fence_put(*fence); | 1666 | dma_fence_put(*fence); |
1605 | *fence = f; | 1667 | *fence = f; |
1606 | return 0; | 1668 | return 0; |
1607 | 1669 | ||
1608 | error_free: | 1670 | error_free: |
1609 | amdgpu_job_free(job); | 1671 | amdgpu_job_free(job); |
1610 | amdgpu_vm_invalidate_level(&vm->root); | 1672 | amdgpu_vm_invalidate_level(vm, &vm->root); |
1611 | return r; | 1673 | return r; |
1612 | } | 1674 | } |
1613 | 1675 | ||
@@ -1636,7 +1698,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, | |||
1636 | struct drm_mm_node *nodes, | 1698 | struct drm_mm_node *nodes, |
1637 | struct dma_fence **fence) | 1699 | struct dma_fence **fence) |
1638 | { | 1700 | { |
1639 | uint64_t pfn, src = 0, start = mapping->start; | 1701 | uint64_t pfn, start = mapping->start; |
1640 | int r; | 1702 | int r; |
1641 | 1703 | ||
1642 | /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here | 1704 | /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here |
@@ -1691,8 +1753,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, | |||
1691 | addr += pfn << PAGE_SHIFT; | 1753 | addr += pfn << PAGE_SHIFT; |
1692 | 1754 | ||
1693 | last = min((uint64_t)mapping->last, start + max_entries - 1); | 1755 | last = min((uint64_t)mapping->last, start + max_entries - 1); |
1694 | r = amdgpu_vm_bo_update_mapping(adev, exclusive, | 1756 | r = amdgpu_vm_bo_update_mapping(adev, exclusive, pages_addr, vm, |
1695 | src, pages_addr, vm, | ||
1696 | start, last, flags, addr, | 1757 | start, last, flags, addr, |
1697 | fence); | 1758 | fence); |
1698 | if (r) | 1759 | if (r) |
@@ -1730,7 +1791,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, | |||
1730 | dma_addr_t *pages_addr = NULL; | 1791 | dma_addr_t *pages_addr = NULL; |
1731 | struct ttm_mem_reg *mem; | 1792 | struct ttm_mem_reg *mem; |
1732 | struct drm_mm_node *nodes; | 1793 | struct drm_mm_node *nodes; |
1733 | struct dma_fence *exclusive; | 1794 | struct dma_fence *exclusive, **last_update; |
1734 | uint64_t flags; | 1795 | uint64_t flags; |
1735 | int r; | 1796 | int r; |
1736 | 1797 | ||
@@ -1756,38 +1817,43 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, | |||
1756 | else | 1817 | else |
1757 | flags = 0x0; | 1818 | flags = 0x0; |
1758 | 1819 | ||
1759 | spin_lock(&vm->status_lock); | 1820 | if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv)) |
1760 | if (!list_empty(&bo_va->base.vm_status)) | 1821 | last_update = &vm->last_update; |
1822 | else | ||
1823 | last_update = &bo_va->last_pt_update; | ||
1824 | |||
1825 | if (!clear && bo_va->base.moved) { | ||
1826 | bo_va->base.moved = false; | ||
1761 | list_splice_init(&bo_va->valids, &bo_va->invalids); | 1827 | list_splice_init(&bo_va->valids, &bo_va->invalids); |
1762 | spin_unlock(&vm->status_lock); | 1828 | |
1829 | } else if (bo_va->cleared != clear) { | ||
1830 | list_splice_init(&bo_va->valids, &bo_va->invalids); | ||
1831 | } | ||
1763 | 1832 | ||
1764 | list_for_each_entry(mapping, &bo_va->invalids, list) { | 1833 | list_for_each_entry(mapping, &bo_va->invalids, list) { |
1765 | r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm, | 1834 | r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm, |
1766 | mapping, flags, nodes, | 1835 | mapping, flags, nodes, |
1767 | &bo_va->last_pt_update); | 1836 | last_update); |
1768 | if (r) | 1837 | if (r) |
1769 | return r; | 1838 | return r; |
1770 | } | 1839 | } |
1771 | 1840 | ||
1772 | if (trace_amdgpu_vm_bo_mapping_enabled()) { | 1841 | if (vm->use_cpu_for_update) { |
1773 | list_for_each_entry(mapping, &bo_va->valids, list) | 1842 | /* Flush HDP */ |
1774 | trace_amdgpu_vm_bo_mapping(mapping); | 1843 | mb(); |
1775 | 1844 | amdgpu_gart_flush_gpu_tlb(adev, 0); | |
1776 | list_for_each_entry(mapping, &bo_va->invalids, list) | ||
1777 | trace_amdgpu_vm_bo_mapping(mapping); | ||
1778 | } | 1845 | } |
1779 | 1846 | ||
1780 | spin_lock(&vm->status_lock); | 1847 | spin_lock(&vm->status_lock); |
1781 | list_splice_init(&bo_va->invalids, &bo_va->valids); | ||
1782 | list_del_init(&bo_va->base.vm_status); | 1848 | list_del_init(&bo_va->base.vm_status); |
1783 | if (clear) | ||
1784 | list_add(&bo_va->base.vm_status, &vm->cleared); | ||
1785 | spin_unlock(&vm->status_lock); | 1849 | spin_unlock(&vm->status_lock); |
1786 | 1850 | ||
1787 | if (vm->use_cpu_for_update) { | 1851 | list_splice_init(&bo_va->invalids, &bo_va->valids); |
1788 | /* Flush HDP */ | 1852 | bo_va->cleared = clear; |
1789 | mb(); | 1853 | |
1790 | amdgpu_gart_flush_gpu_tlb(adev, 0); | 1854 | if (trace_amdgpu_vm_bo_mapping_enabled()) { |
1855 | list_for_each_entry(mapping, &bo_va->valids, list) | ||
1856 | trace_amdgpu_vm_bo_mapping(mapping); | ||
1791 | } | 1857 | } |
1792 | 1858 | ||
1793 | return 0; | 1859 | return 0; |
@@ -1895,7 +1961,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, | |||
1895 | */ | 1961 | */ |
1896 | static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) | 1962 | static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) |
1897 | { | 1963 | { |
1898 | struct reservation_object *resv = vm->root.bo->tbo.resv; | 1964 | struct reservation_object *resv = vm->root.base.bo->tbo.resv; |
1899 | struct dma_fence *excl, **shared; | 1965 | struct dma_fence *excl, **shared; |
1900 | unsigned i, shared_count; | 1966 | unsigned i, shared_count; |
1901 | int r; | 1967 | int r; |
@@ -1953,7 +2019,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, | |||
1953 | if (vm->pte_support_ats) | 2019 | if (vm->pte_support_ats) |
1954 | init_pte_value = AMDGPU_PTE_SYSTEM; | 2020 | init_pte_value = AMDGPU_PTE_SYSTEM; |
1955 | 2021 | ||
1956 | r = amdgpu_vm_bo_update_mapping(adev, NULL, 0, NULL, vm, | 2022 | r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, |
1957 | mapping->start, mapping->last, | 2023 | mapping->start, mapping->last, |
1958 | init_pte_value, 0, &f); | 2024 | init_pte_value, 0, &f); |
1959 | amdgpu_vm_free_mapping(adev, vm, mapping, f); | 2025 | amdgpu_vm_free_mapping(adev, vm, mapping, f); |
@@ -1975,29 +2041,35 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, | |||
1975 | } | 2041 | } |
1976 | 2042 | ||
1977 | /** | 2043 | /** |
1978 | * amdgpu_vm_clear_moved - clear moved BOs in the PT | 2044 | * amdgpu_vm_handle_moved - handle moved BOs in the PT |
1979 | * | 2045 | * |
1980 | * @adev: amdgpu_device pointer | 2046 | * @adev: amdgpu_device pointer |
1981 | * @vm: requested vm | 2047 | * @vm: requested vm |
2048 | * @sync: sync object to add fences to | ||
1982 | * | 2049 | * |
1983 | * Make sure all moved BOs are cleared in the PT. | 2050 | * Make sure all BOs which are moved are updated in the PTs. |
1984 | * Returns 0 for success. | 2051 | * Returns 0 for success. |
1985 | * | 2052 | * |
1986 | * PTs have to be reserved and mutex must be locked! | 2053 | * PTs have to be reserved! |
1987 | */ | 2054 | */ |
1988 | int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm, | 2055 | int amdgpu_vm_handle_moved(struct amdgpu_device *adev, |
1989 | struct amdgpu_sync *sync) | 2056 | struct amdgpu_vm *vm) |
1990 | { | 2057 | { |
1991 | struct amdgpu_bo_va *bo_va = NULL; | 2058 | bool clear; |
1992 | int r = 0; | 2059 | int r = 0; |
1993 | 2060 | ||
1994 | spin_lock(&vm->status_lock); | 2061 | spin_lock(&vm->status_lock); |
1995 | while (!list_empty(&vm->moved)) { | 2062 | while (!list_empty(&vm->moved)) { |
2063 | struct amdgpu_bo_va *bo_va; | ||
2064 | |||
1996 | bo_va = list_first_entry(&vm->moved, | 2065 | bo_va = list_first_entry(&vm->moved, |
1997 | struct amdgpu_bo_va, base.vm_status); | 2066 | struct amdgpu_bo_va, base.vm_status); |
1998 | spin_unlock(&vm->status_lock); | 2067 | spin_unlock(&vm->status_lock); |
1999 | 2068 | ||
2000 | r = amdgpu_vm_bo_update(adev, bo_va, true); | 2069 | /* Per VM BOs never need to bo cleared in the page tables */ |
2070 | clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv; | ||
2071 | |||
2072 | r = amdgpu_vm_bo_update(adev, bo_va, clear); | ||
2001 | if (r) | 2073 | if (r) |
2002 | return r; | 2074 | return r; |
2003 | 2075 | ||
@@ -2005,9 +2077,6 @@ int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
2005 | } | 2077 | } |
2006 | spin_unlock(&vm->status_lock); | 2078 | spin_unlock(&vm->status_lock); |
2007 | 2079 | ||
2008 | if (bo_va) | ||
2009 | r = amdgpu_sync_fence(adev, sync, bo_va->last_pt_update); | ||
2010 | |||
2011 | return r; | 2080 | return r; |
2012 | } | 2081 | } |
2013 | 2082 | ||
@@ -2049,6 +2118,39 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, | |||
2049 | return bo_va; | 2118 | return bo_va; |
2050 | } | 2119 | } |
2051 | 2120 | ||
2121 | |||
2122 | /** | ||
2123 | * amdgpu_vm_bo_insert_mapping - insert a new mapping | ||
2124 | * | ||
2125 | * @adev: amdgpu_device pointer | ||
2126 | * @bo_va: bo_va to store the address | ||
2127 | * @mapping: the mapping to insert | ||
2128 | * | ||
2129 | * Insert a new mapping into all structures. | ||
2130 | */ | ||
2131 | static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, | ||
2132 | struct amdgpu_bo_va *bo_va, | ||
2133 | struct amdgpu_bo_va_mapping *mapping) | ||
2134 | { | ||
2135 | struct amdgpu_vm *vm = bo_va->base.vm; | ||
2136 | struct amdgpu_bo *bo = bo_va->base.bo; | ||
2137 | |||
2138 | mapping->bo_va = bo_va; | ||
2139 | list_add(&mapping->list, &bo_va->invalids); | ||
2140 | amdgpu_vm_it_insert(mapping, &vm->va); | ||
2141 | |||
2142 | if (mapping->flags & AMDGPU_PTE_PRT) | ||
2143 | amdgpu_vm_prt_get(adev); | ||
2144 | |||
2145 | if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) { | ||
2146 | spin_lock(&vm->status_lock); | ||
2147 | if (list_empty(&bo_va->base.vm_status)) | ||
2148 | list_add(&bo_va->base.vm_status, &vm->moved); | ||
2149 | spin_unlock(&vm->status_lock); | ||
2150 | } | ||
2151 | trace_amdgpu_vm_bo_map(bo_va, mapping); | ||
2152 | } | ||
2153 | |||
2052 | /** | 2154 | /** |
2053 | * amdgpu_vm_bo_map - map bo inside a vm | 2155 | * amdgpu_vm_bo_map - map bo inside a vm |
2054 | * | 2156 | * |
@@ -2100,17 +2202,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, | |||
2100 | if (!mapping) | 2202 | if (!mapping) |
2101 | return -ENOMEM; | 2203 | return -ENOMEM; |
2102 | 2204 | ||
2103 | INIT_LIST_HEAD(&mapping->list); | ||
2104 | mapping->start = saddr; | 2205 | mapping->start = saddr; |
2105 | mapping->last = eaddr; | 2206 | mapping->last = eaddr; |
2106 | mapping->offset = offset; | 2207 | mapping->offset = offset; |
2107 | mapping->flags = flags; | 2208 | mapping->flags = flags; |
2108 | 2209 | ||
2109 | list_add(&mapping->list, &bo_va->invalids); | 2210 | amdgpu_vm_bo_insert_map(adev, bo_va, mapping); |
2110 | amdgpu_vm_it_insert(mapping, &vm->va); | ||
2111 | |||
2112 | if (flags & AMDGPU_PTE_PRT) | ||
2113 | amdgpu_vm_prt_get(adev); | ||
2114 | 2211 | ||
2115 | return 0; | 2212 | return 0; |
2116 | } | 2213 | } |
@@ -2137,7 +2234,6 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, | |||
2137 | { | 2234 | { |
2138 | struct amdgpu_bo_va_mapping *mapping; | 2235 | struct amdgpu_bo_va_mapping *mapping; |
2139 | struct amdgpu_bo *bo = bo_va->base.bo; | 2236 | struct amdgpu_bo *bo = bo_va->base.bo; |
2140 | struct amdgpu_vm *vm = bo_va->base.vm; | ||
2141 | uint64_t eaddr; | 2237 | uint64_t eaddr; |
2142 | int r; | 2238 | int r; |
2143 | 2239 | ||
@@ -2171,11 +2267,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, | |||
2171 | mapping->offset = offset; | 2267 | mapping->offset = offset; |
2172 | mapping->flags = flags; | 2268 | mapping->flags = flags; |
2173 | 2269 | ||
2174 | list_add(&mapping->list, &bo_va->invalids); | 2270 | amdgpu_vm_bo_insert_map(adev, bo_va, mapping); |
2175 | amdgpu_vm_it_insert(mapping, &vm->va); | ||
2176 | |||
2177 | if (flags & AMDGPU_PTE_PRT) | ||
2178 | amdgpu_vm_prt_get(adev); | ||
2179 | 2271 | ||
2180 | return 0; | 2272 | return 0; |
2181 | } | 2273 | } |
@@ -2221,6 +2313,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, | |||
2221 | 2313 | ||
2222 | list_del(&mapping->list); | 2314 | list_del(&mapping->list); |
2223 | amdgpu_vm_it_remove(mapping, &vm->va); | 2315 | amdgpu_vm_it_remove(mapping, &vm->va); |
2316 | mapping->bo_va = NULL; | ||
2224 | trace_amdgpu_vm_bo_unmap(bo_va, mapping); | 2317 | trace_amdgpu_vm_bo_unmap(bo_va, mapping); |
2225 | 2318 | ||
2226 | if (valid) | 2319 | if (valid) |
@@ -2306,6 +2399,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, | |||
2306 | if (tmp->last > eaddr) | 2399 | if (tmp->last > eaddr) |
2307 | tmp->last = eaddr; | 2400 | tmp->last = eaddr; |
2308 | 2401 | ||
2402 | tmp->bo_va = NULL; | ||
2309 | list_add(&tmp->list, &vm->freed); | 2403 | list_add(&tmp->list, &vm->freed); |
2310 | trace_amdgpu_vm_bo_unmap(NULL, tmp); | 2404 | trace_amdgpu_vm_bo_unmap(NULL, tmp); |
2311 | } | 2405 | } |
@@ -2332,6 +2426,19 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, | |||
2332 | } | 2426 | } |
2333 | 2427 | ||
2334 | /** | 2428 | /** |
2429 | * amdgpu_vm_bo_lookup_mapping - find mapping by address | ||
2430 | * | ||
2431 | * @vm: the requested VM | ||
2432 | * | ||
2433 | * Find a mapping by it's address. | ||
2434 | */ | ||
2435 | struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm, | ||
2436 | uint64_t addr) | ||
2437 | { | ||
2438 | return amdgpu_vm_it_iter_first(&vm->va, addr, addr); | ||
2439 | } | ||
2440 | |||
2441 | /** | ||
2335 | * amdgpu_vm_bo_rmv - remove a bo to a specific vm | 2442 | * amdgpu_vm_bo_rmv - remove a bo to a specific vm |
2336 | * | 2443 | * |
2337 | * @adev: amdgpu_device pointer | 2444 | * @adev: amdgpu_device pointer |
@@ -2356,6 +2463,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, | |||
2356 | list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { | 2463 | list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { |
2357 | list_del(&mapping->list); | 2464 | list_del(&mapping->list); |
2358 | amdgpu_vm_it_remove(mapping, &vm->va); | 2465 | amdgpu_vm_it_remove(mapping, &vm->va); |
2466 | mapping->bo_va = NULL; | ||
2359 | trace_amdgpu_vm_bo_unmap(bo_va, mapping); | 2467 | trace_amdgpu_vm_bo_unmap(bo_va, mapping); |
2360 | list_add(&mapping->list, &vm->freed); | 2468 | list_add(&mapping->list, &vm->freed); |
2361 | } | 2469 | } |
@@ -2380,15 +2488,36 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, | |||
2380 | * Mark @bo as invalid. | 2488 | * Mark @bo as invalid. |
2381 | */ | 2489 | */ |
2382 | void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, | 2490 | void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, |
2383 | struct amdgpu_bo *bo) | 2491 | struct amdgpu_bo *bo, bool evicted) |
2384 | { | 2492 | { |
2385 | struct amdgpu_vm_bo_base *bo_base; | 2493 | struct amdgpu_vm_bo_base *bo_base; |
2386 | 2494 | ||
2387 | list_for_each_entry(bo_base, &bo->va, bo_list) { | 2495 | list_for_each_entry(bo_base, &bo->va, bo_list) { |
2496 | struct amdgpu_vm *vm = bo_base->vm; | ||
2497 | |||
2498 | bo_base->moved = true; | ||
2499 | if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { | ||
2500 | spin_lock(&bo_base->vm->status_lock); | ||
2501 | if (bo->tbo.type == ttm_bo_type_kernel) | ||
2502 | list_move(&bo_base->vm_status, &vm->evicted); | ||
2503 | else | ||
2504 | list_move_tail(&bo_base->vm_status, | ||
2505 | &vm->evicted); | ||
2506 | spin_unlock(&bo_base->vm->status_lock); | ||
2507 | continue; | ||
2508 | } | ||
2509 | |||
2510 | if (bo->tbo.type == ttm_bo_type_kernel) { | ||
2511 | spin_lock(&bo_base->vm->status_lock); | ||
2512 | if (list_empty(&bo_base->vm_status)) | ||
2513 | list_add(&bo_base->vm_status, &vm->relocated); | ||
2514 | spin_unlock(&bo_base->vm->status_lock); | ||
2515 | continue; | ||
2516 | } | ||
2517 | |||
2388 | spin_lock(&bo_base->vm->status_lock); | 2518 | spin_lock(&bo_base->vm->status_lock); |
2389 | if (list_empty(&bo_base->vm_status)) | 2519 | if (list_empty(&bo_base->vm_status)) |
2390 | list_add(&bo_base->vm_status, | 2520 | list_add(&bo_base->vm_status, &vm->moved); |
2391 | &bo_base->vm->moved); | ||
2392 | spin_unlock(&bo_base->vm->status_lock); | 2521 | spin_unlock(&bo_base->vm->status_lock); |
2393 | } | 2522 | } |
2394 | } | 2523 | } |
@@ -2458,7 +2587,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size, uint32_ | |||
2458 | * Init @vm fields. | 2587 | * Init @vm fields. |
2459 | */ | 2588 | */ |
2460 | int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, | 2589 | int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, |
2461 | int vm_context) | 2590 | int vm_context, unsigned int pasid) |
2462 | { | 2591 | { |
2463 | const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, | 2592 | const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, |
2464 | AMDGPU_VM_PTE_COUNT(adev) * 8); | 2593 | AMDGPU_VM_PTE_COUNT(adev) * 8); |
@@ -2474,8 +2603,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
2474 | for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) | 2603 | for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) |
2475 | vm->reserved_vmid[i] = NULL; | 2604 | vm->reserved_vmid[i] = NULL; |
2476 | spin_lock_init(&vm->status_lock); | 2605 | spin_lock_init(&vm->status_lock); |
2606 | INIT_LIST_HEAD(&vm->evicted); | ||
2607 | INIT_LIST_HEAD(&vm->relocated); | ||
2477 | INIT_LIST_HEAD(&vm->moved); | 2608 | INIT_LIST_HEAD(&vm->moved); |
2478 | INIT_LIST_HEAD(&vm->cleared); | ||
2479 | INIT_LIST_HEAD(&vm->freed); | 2609 | INIT_LIST_HEAD(&vm->freed); |
2480 | 2610 | ||
2481 | /* create scheduler entity for page table updates */ | 2611 | /* create scheduler entity for page table updates */ |
@@ -2506,7 +2636,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
2506 | vm->use_cpu_for_update ? "CPU" : "SDMA"); | 2636 | vm->use_cpu_for_update ? "CPU" : "SDMA"); |
2507 | WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), | 2637 | WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), |
2508 | "CPU update of VM recommended only for large BAR system\n"); | 2638 | "CPU update of VM recommended only for large BAR system\n"); |
2509 | vm->last_dir_update = NULL; | 2639 | vm->last_update = NULL; |
2510 | 2640 | ||
2511 | flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | | 2641 | flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | |
2512 | AMDGPU_GEM_CREATE_VRAM_CLEARED; | 2642 | AMDGPU_GEM_CREATE_VRAM_CLEARED; |
@@ -2519,30 +2649,46 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
2519 | r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, | 2649 | r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, |
2520 | AMDGPU_GEM_DOMAIN_VRAM, | 2650 | AMDGPU_GEM_DOMAIN_VRAM, |
2521 | flags, | 2651 | flags, |
2522 | NULL, NULL, init_pde_value, &vm->root.bo); | 2652 | NULL, NULL, init_pde_value, &vm->root.base.bo); |
2523 | if (r) | 2653 | if (r) |
2524 | goto error_free_sched_entity; | 2654 | goto error_free_sched_entity; |
2525 | 2655 | ||
2526 | r = amdgpu_bo_reserve(vm->root.bo, false); | 2656 | vm->root.base.vm = vm; |
2527 | if (r) | 2657 | list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va); |
2528 | goto error_free_root; | 2658 | INIT_LIST_HEAD(&vm->root.base.vm_status); |
2529 | |||
2530 | vm->last_eviction_counter = atomic64_read(&adev->num_evictions); | ||
2531 | 2659 | ||
2532 | if (vm->use_cpu_for_update) { | 2660 | if (vm->use_cpu_for_update) { |
2533 | r = amdgpu_bo_kmap(vm->root.bo, NULL); | 2661 | r = amdgpu_bo_reserve(vm->root.base.bo, false); |
2534 | if (r) | 2662 | if (r) |
2535 | goto error_free_root; | 2663 | goto error_free_root; |
2664 | |||
2665 | r = amdgpu_bo_kmap(vm->root.base.bo, NULL); | ||
2666 | amdgpu_bo_unreserve(vm->root.base.bo); | ||
2667 | if (r) | ||
2668 | goto error_free_root; | ||
2669 | } | ||
2670 | |||
2671 | if (pasid) { | ||
2672 | unsigned long flags; | ||
2673 | |||
2674 | spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); | ||
2675 | r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1, | ||
2676 | GFP_ATOMIC); | ||
2677 | spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); | ||
2678 | if (r < 0) | ||
2679 | goto error_free_root; | ||
2680 | |||
2681 | vm->pasid = pasid; | ||
2536 | } | 2682 | } |
2537 | 2683 | ||
2538 | amdgpu_bo_unreserve(vm->root.bo); | 2684 | INIT_KFIFO(vm->faults); |
2539 | 2685 | ||
2540 | return 0; | 2686 | return 0; |
2541 | 2687 | ||
2542 | error_free_root: | 2688 | error_free_root: |
2543 | amdgpu_bo_unref(&vm->root.bo->shadow); | 2689 | amdgpu_bo_unref(&vm->root.base.bo->shadow); |
2544 | amdgpu_bo_unref(&vm->root.bo); | 2690 | amdgpu_bo_unref(&vm->root.base.bo); |
2545 | vm->root.bo = NULL; | 2691 | vm->root.base.bo = NULL; |
2546 | 2692 | ||
2547 | error_free_sched_entity: | 2693 | error_free_sched_entity: |
2548 | amd_sched_entity_fini(&ring->sched, &vm->entity); | 2694 | amd_sched_entity_fini(&ring->sched, &vm->entity); |
@@ -2561,9 +2707,11 @@ static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level) | |||
2561 | { | 2707 | { |
2562 | unsigned i; | 2708 | unsigned i; |
2563 | 2709 | ||
2564 | if (level->bo) { | 2710 | if (level->base.bo) { |
2565 | amdgpu_bo_unref(&level->bo->shadow); | 2711 | list_del(&level->base.bo_list); |
2566 | amdgpu_bo_unref(&level->bo); | 2712 | list_del(&level->base.vm_status); |
2713 | amdgpu_bo_unref(&level->base.bo->shadow); | ||
2714 | amdgpu_bo_unref(&level->base.bo); | ||
2567 | } | 2715 | } |
2568 | 2716 | ||
2569 | if (level->entries) | 2717 | if (level->entries) |
@@ -2586,8 +2734,21 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
2586 | { | 2734 | { |
2587 | struct amdgpu_bo_va_mapping *mapping, *tmp; | 2735 | struct amdgpu_bo_va_mapping *mapping, *tmp; |
2588 | bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; | 2736 | bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; |
2737 | u64 fault; | ||
2589 | int i; | 2738 | int i; |
2590 | 2739 | ||
2740 | /* Clear pending page faults from IH when the VM is destroyed */ | ||
2741 | while (kfifo_get(&vm->faults, &fault)) | ||
2742 | amdgpu_ih_clear_fault(adev, fault); | ||
2743 | |||
2744 | if (vm->pasid) { | ||
2745 | unsigned long flags; | ||
2746 | |||
2747 | spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); | ||
2748 | idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); | ||
2749 | spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); | ||
2750 | } | ||
2751 | |||
2591 | amd_sched_entity_fini(vm->entity.sched, &vm->entity); | 2752 | amd_sched_entity_fini(vm->entity.sched, &vm->entity); |
2592 | 2753 | ||
2593 | if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { | 2754 | if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { |
@@ -2610,7 +2771,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
2610 | } | 2771 | } |
2611 | 2772 | ||
2612 | amdgpu_vm_free_levels(&vm->root); | 2773 | amdgpu_vm_free_levels(&vm->root); |
2613 | dma_fence_put(vm->last_dir_update); | 2774 | dma_fence_put(vm->last_update); |
2614 | for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) | 2775 | for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) |
2615 | amdgpu_vm_free_reserved_vmid(adev, vm, i); | 2776 | amdgpu_vm_free_reserved_vmid(adev, vm, i); |
2616 | } | 2777 | } |
@@ -2668,6 +2829,8 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) | |||
2668 | adev->vm_manager.vm_update_mode = 0; | 2829 | adev->vm_manager.vm_update_mode = 0; |
2669 | #endif | 2830 | #endif |
2670 | 2831 | ||
2832 | idr_init(&adev->vm_manager.pasid_idr); | ||
2833 | spin_lock_init(&adev->vm_manager.pasid_lock); | ||
2671 | } | 2834 | } |
2672 | 2835 | ||
2673 | /** | 2836 | /** |
@@ -2681,6 +2844,9 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) | |||
2681 | { | 2844 | { |
2682 | unsigned i, j; | 2845 | unsigned i, j; |
2683 | 2846 | ||
2847 | WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr)); | ||
2848 | idr_destroy(&adev->vm_manager.pasid_idr); | ||
2849 | |||
2684 | for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { | 2850 | for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { |
2685 | struct amdgpu_vm_id_manager *id_mgr = | 2851 | struct amdgpu_vm_id_manager *id_mgr = |
2686 | &adev->vm_manager.id_mgr[i]; | 2852 | &adev->vm_manager.id_mgr[i]; |