diff options
author | Christian König <christian.koenig@amd.com> | 2016-06-01 04:47:36 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2016-07-07 14:51:23 -0400 |
commit | 1fbb2e929902ab6e161ebcfb2f4d6de1c4613473 (patch) | |
tree | 12738147113fb6731365826c1bd5e80d1cdf4079 | |
parent | 8d76001e0058aca129f599810d4f60e36fb36c5b (diff) |
drm/amdgpu: use a fence array for VMID management
Just wait for any fence to become available, instead
of waiting for the last entry of the LRU.
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu.h | 10 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 69 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 155 |
4 files changed, 117 insertions, 119 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 922a20c972b1..c13023dded8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h | |||
@@ -597,10 +597,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, | |||
597 | struct amdgpu_sync *sync, | 597 | struct amdgpu_sync *sync, |
598 | struct reservation_object *resv, | 598 | struct reservation_object *resv, |
599 | void *owner); | 599 | void *owner); |
600 | bool amdgpu_sync_is_idle(struct amdgpu_sync *sync, | 600 | struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, |
601 | struct amdgpu_ring *ring); | 601 | struct amdgpu_ring *ring); |
602 | int amdgpu_sync_cycle_fences(struct amdgpu_sync *dst, struct amdgpu_sync *src, | ||
603 | struct fence *fence); | ||
604 | struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); | 602 | struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); |
605 | void amdgpu_sync_free(struct amdgpu_sync *sync); | 603 | void amdgpu_sync_free(struct amdgpu_sync *sync); |
606 | int amdgpu_sync_init(void); | 604 | int amdgpu_sync_init(void); |
@@ -909,6 +907,10 @@ struct amdgpu_vm_manager { | |||
909 | struct list_head ids_lru; | 907 | struct list_head ids_lru; |
910 | struct amdgpu_vm_id ids[AMDGPU_NUM_VM]; | 908 | struct amdgpu_vm_id ids[AMDGPU_NUM_VM]; |
911 | 909 | ||
910 | /* Handling of VM fences */ | ||
911 | u64 fence_context; | ||
912 | unsigned seqno[AMDGPU_MAX_RINGS]; | ||
913 | |||
912 | uint32_t max_pfn; | 914 | uint32_t max_pfn; |
913 | /* vram base address for page table entry */ | 915 | /* vram base address for page table entry */ |
914 | u64 vram_base_offset; | 916 | u64 vram_base_offset; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index e395bbebb3ad..b50a8450fcae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | |||
@@ -166,7 +166,7 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job) | |||
166 | } | 166 | } |
167 | job = to_amdgpu_job(sched_job); | 167 | job = to_amdgpu_job(sched_job); |
168 | 168 | ||
169 | BUG_ON(!amdgpu_sync_is_idle(&job->sync, NULL)); | 169 | BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); |
170 | 170 | ||
171 | trace_amdgpu_sched_run_job(job); | 171 | trace_amdgpu_sched_run_job(job); |
172 | r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, | 172 | r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index a2766d72b2da..5c8d3022fb87 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | |||
@@ -223,16 +223,16 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, | |||
223 | } | 223 | } |
224 | 224 | ||
225 | /** | 225 | /** |
226 | * amdgpu_sync_is_idle - test if all fences are signaled | 226 | * amdgpu_sync_peek_fence - get the next fence not signaled yet |
227 | * | 227 | * |
228 | * @sync: the sync object | 228 | * @sync: the sync object |
229 | * @ring: optional ring to use for test | 229 | * @ring: optional ring to use for test |
230 | * | 230 | * |
231 | * Returns true if all fences in the sync object are signaled or scheduled to | 231 | * Returns the next fence not signaled yet without removing it from the sync |
232 | * the ring (if provided). | 232 | * object. |
233 | */ | 233 | */ |
234 | bool amdgpu_sync_is_idle(struct amdgpu_sync *sync, | 234 | struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, |
235 | struct amdgpu_ring *ring) | 235 | struct amdgpu_ring *ring) |
236 | { | 236 | { |
237 | struct amdgpu_sync_entry *e; | 237 | struct amdgpu_sync_entry *e; |
238 | struct hlist_node *tmp; | 238 | struct hlist_node *tmp; |
@@ -246,68 +246,25 @@ bool amdgpu_sync_is_idle(struct amdgpu_sync *sync, | |||
246 | /* For fences from the same ring it is sufficient | 246 | /* For fences from the same ring it is sufficient |
247 | * when they are scheduled. | 247 | * when they are scheduled. |
248 | */ | 248 | */ |
249 | if (s_fence->sched == &ring->sched && | 249 | if (s_fence->sched == &ring->sched) { |
250 | fence_is_signaled(&s_fence->scheduled)) | 250 | if (fence_is_signaled(&s_fence->scheduled)) |
251 | continue; | 251 | continue; |
252 | } | ||
253 | 252 | ||
254 | if (fence_is_signaled(f)) { | 253 | return &s_fence->scheduled; |
255 | hash_del(&e->node); | 254 | } |
256 | fence_put(f); | ||
257 | kmem_cache_free(amdgpu_sync_slab, e); | ||
258 | continue; | ||
259 | } | 255 | } |
260 | 256 | ||
261 | return false; | ||
262 | } | ||
263 | |||
264 | return true; | ||
265 | } | ||
266 | |||
267 | /** | ||
268 | * amdgpu_sync_cycle_fences - move fences from one sync object into another | ||
269 | * | ||
270 | * @dst: the destination sync object | ||
271 | * @src: the source sync object | ||
272 | * @fence: fence to add to source | ||
273 | * | ||
274 | * Remove all fences from source and put them into destination and add | ||
275 | * fence as new one into source. | ||
276 | */ | ||
277 | int amdgpu_sync_cycle_fences(struct amdgpu_sync *dst, struct amdgpu_sync *src, | ||
278 | struct fence *fence) | ||
279 | { | ||
280 | struct amdgpu_sync_entry *e, *newone; | ||
281 | struct hlist_node *tmp; | ||
282 | int i; | ||
283 | |||
284 | /* Allocate the new entry before moving the old ones */ | ||
285 | newone = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL); | ||
286 | if (!newone) | ||
287 | return -ENOMEM; | ||
288 | |||
289 | hash_for_each_safe(src->fences, i, tmp, e, node) { | ||
290 | struct fence *f = e->fence; | ||
291 | |||
292 | hash_del(&e->node); | ||
293 | if (fence_is_signaled(f)) { | 257 | if (fence_is_signaled(f)) { |
258 | hash_del(&e->node); | ||
294 | fence_put(f); | 259 | fence_put(f); |
295 | kmem_cache_free(amdgpu_sync_slab, e); | 260 | kmem_cache_free(amdgpu_sync_slab, e); |
296 | continue; | 261 | continue; |
297 | } | 262 | } |
298 | 263 | ||
299 | if (amdgpu_sync_add_later(dst, f)) { | 264 | return f; |
300 | kmem_cache_free(amdgpu_sync_slab, e); | ||
301 | continue; | ||
302 | } | ||
303 | |||
304 | hash_add(dst->fences, &e->node, f->context); | ||
305 | } | 265 | } |
306 | 266 | ||
307 | hash_add(src->fences, &newone->node, fence->context); | 267 | return NULL; |
308 | newone->fence = fence_get(fence); | ||
309 | |||
310 | return 0; | ||
311 | } | 268 | } |
312 | 269 | ||
313 | /** | 270 | /** |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index f38978d8834c..06eb60950cef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |||
@@ -25,6 +25,7 @@ | |||
25 | * Alex Deucher | 25 | * Alex Deucher |
26 | * Jerome Glisse | 26 | * Jerome Glisse |
27 | */ | 27 | */ |
28 | #include <linux/fence-array.h> | ||
28 | #include <drm/drmP.h> | 29 | #include <drm/drmP.h> |
29 | #include <drm/amdgpu_drm.h> | 30 | #include <drm/amdgpu_drm.h> |
30 | #include "amdgpu.h" | 31 | #include "amdgpu.h" |
@@ -180,82 +181,116 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, | |||
180 | struct amdgpu_device *adev = ring->adev; | 181 | struct amdgpu_device *adev = ring->adev; |
181 | struct fence *updates = sync->last_vm_update; | 182 | struct fence *updates = sync->last_vm_update; |
182 | struct amdgpu_vm_id *id, *idle; | 183 | struct amdgpu_vm_id *id, *idle; |
183 | unsigned i = ring->idx; | 184 | struct fence **fences; |
184 | int r; | 185 | unsigned i; |
186 | int r = 0; | ||
187 | |||
188 | fences = kmalloc_array(sizeof(void *), adev->vm_manager.num_ids, | ||
189 | GFP_KERNEL); | ||
190 | if (!fences) | ||
191 | return -ENOMEM; | ||
185 | 192 | ||
186 | mutex_lock(&adev->vm_manager.lock); | 193 | mutex_lock(&adev->vm_manager.lock); |
187 | 194 | ||
188 | /* Check if we have an idle VMID */ | 195 | /* Check if we have an idle VMID */ |
196 | i = 0; | ||
189 | list_for_each_entry(idle, &adev->vm_manager.ids_lru, list) { | 197 | list_for_each_entry(idle, &adev->vm_manager.ids_lru, list) { |
190 | if (amdgpu_sync_is_idle(&idle->active, ring)) | 198 | fences[i] = amdgpu_sync_peek_fence(&idle->active, ring); |
199 | if (!fences[i]) | ||
191 | break; | 200 | break; |
192 | 201 | ++i; | |
193 | } | 202 | } |
194 | 203 | ||
195 | /* If we can't find a idle VMID to use, just wait for the oldest */ | 204 | /* If we can't find a idle VMID to use, wait till one becomes available */ |
196 | if (&idle->list == &adev->vm_manager.ids_lru) { | 205 | if (&idle->list == &adev->vm_manager.ids_lru) { |
197 | id = list_first_entry(&adev->vm_manager.ids_lru, | 206 | u64 fence_context = adev->vm_manager.fence_context + ring->idx; |
198 | struct amdgpu_vm_id, | 207 | unsigned seqno = ++adev->vm_manager.seqno[ring->idx]; |
199 | list); | 208 | struct fence_array *array; |
200 | } else { | 209 | unsigned j; |
201 | /* Check if we can use a VMID already assigned to this VM */ | 210 | |
202 | do { | 211 | for (j = 0; j < i; ++j) |
203 | struct fence *flushed; | 212 | fence_get(fences[j]); |
204 | 213 | ||
205 | id = vm->ids[i++]; | 214 | array = fence_array_create(i, fences, fence_context, |
206 | if (i == AMDGPU_MAX_RINGS) | 215 | seqno, true); |
207 | i = 0; | 216 | if (!array) { |
208 | 217 | for (j = 0; j < i; ++j) | |
209 | /* Check all the prerequisites to using this VMID */ | 218 | fence_put(fences[j]); |
210 | if (!id) | 219 | kfree(fences); |
211 | continue; | 220 | r = -ENOMEM; |
212 | 221 | goto error; | |
213 | if (atomic64_read(&id->owner) != vm->client_id) | 222 | } |
214 | continue; | 223 | |
215 | 224 | ||
216 | if (pd_addr != id->pd_gpu_addr) | 225 | r = amdgpu_sync_fence(ring->adev, sync, &array->base); |
217 | continue; | 226 | fence_put(&array->base); |
218 | 227 | if (r) | |
219 | if (id->last_user != ring && (!id->last_flush || | 228 | goto error; |
220 | !fence_is_signaled(id->last_flush))) | 229 | |
221 | continue; | 230 | mutex_unlock(&adev->vm_manager.lock); |
222 | 231 | return 0; | |
223 | flushed = id->flushed_updates; | 232 | |
224 | if (updates && (!flushed || | 233 | } |
225 | fence_is_later(updates, flushed))) | 234 | kfree(fences); |
226 | continue; | 235 | |
227 | 236 | /* Check if we can use a VMID already assigned to this VM */ | |
228 | /* Good we can use this VMID */ | 237 | i = ring->idx; |
229 | if (id->last_user == ring) { | 238 | do { |
230 | r = amdgpu_sync_fence(ring->adev, sync, | 239 | struct fence *flushed; |
231 | id->first); | 240 | |
232 | if (r) | 241 | id = vm->ids[i++]; |
233 | goto error; | 242 | if (i == AMDGPU_MAX_RINGS) |
234 | } | 243 | i = 0; |
235 | 244 | ||
236 | /* And remember this submission as user of the VMID */ | 245 | /* Check all the prerequisites to using this VMID */ |
237 | r = amdgpu_sync_fence(ring->adev, &id->active, fence); | 246 | if (!id) |
247 | continue; | ||
248 | |||
249 | if (atomic64_read(&id->owner) != vm->client_id) | ||
250 | continue; | ||
251 | |||
252 | if (pd_addr != id->pd_gpu_addr) | ||
253 | continue; | ||
254 | |||
255 | if (id->last_user != ring && | ||
256 | (!id->last_flush || !fence_is_signaled(id->last_flush))) | ||
257 | continue; | ||
258 | |||
259 | flushed = id->flushed_updates; | ||
260 | if (updates && | ||
261 | (!flushed || fence_is_later(updates, flushed))) | ||
262 | continue; | ||
263 | |||
264 | /* Good we can use this VMID */ | ||
265 | if (id->last_user == ring) { | ||
266 | r = amdgpu_sync_fence(ring->adev, sync, | ||
267 | id->first); | ||
238 | if (r) | 268 | if (r) |
239 | goto error; | 269 | goto error; |
270 | } | ||
271 | |||
272 | /* And remember this submission as user of the VMID */ | ||
273 | r = amdgpu_sync_fence(ring->adev, &id->active, fence); | ||
274 | if (r) | ||
275 | goto error; | ||
240 | 276 | ||
241 | list_move_tail(&id->list, &adev->vm_manager.ids_lru); | 277 | list_move_tail(&id->list, &adev->vm_manager.ids_lru); |
242 | vm->ids[ring->idx] = id; | 278 | vm->ids[ring->idx] = id; |
243 | 279 | ||
244 | *vm_id = id - adev->vm_manager.ids; | 280 | *vm_id = id - adev->vm_manager.ids; |
245 | *vm_pd_addr = AMDGPU_VM_NO_FLUSH; | 281 | *vm_pd_addr = AMDGPU_VM_NO_FLUSH; |
246 | trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, | 282 | trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr); |
247 | *vm_pd_addr); | ||
248 | 283 | ||
249 | mutex_unlock(&adev->vm_manager.lock); | 284 | mutex_unlock(&adev->vm_manager.lock); |
250 | return 0; | 285 | return 0; |
251 | 286 | ||
252 | } while (i != ring->idx); | 287 | } while (i != ring->idx); |
253 | 288 | ||
254 | /* Still no ID to use? Then use the idle one found earlier */ | 289 | /* Still no ID to use? Then use the idle one found earlier */ |
255 | id = idle; | 290 | id = idle; |
256 | } | ||
257 | 291 | ||
258 | r = amdgpu_sync_cycle_fences(sync, &id->active, fence); | 292 | /* Remember this submission as user of the VMID */ |
293 | r = amdgpu_sync_fence(ring->adev, &id->active, fence); | ||
259 | if (r) | 294 | if (r) |
260 | goto error; | 295 | goto error; |
261 | 296 | ||
@@ -1515,6 +1550,10 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) | |||
1515 | &adev->vm_manager.ids_lru); | 1550 | &adev->vm_manager.ids_lru); |
1516 | } | 1551 | } |
1517 | 1552 | ||
1553 | adev->vm_manager.fence_context = fence_context_alloc(AMDGPU_MAX_RINGS); | ||
1554 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) | ||
1555 | adev->vm_manager.seqno[i] = 0; | ||
1556 | |||
1518 | atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); | 1557 | atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); |
1519 | atomic64_set(&adev->vm_manager.client_counter, 0); | 1558 | atomic64_set(&adev->vm_manager.client_counter, 0); |
1520 | } | 1559 | } |