aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2016-06-01 04:47:36 -0400
committerAlex Deucher <alexander.deucher@amd.com>2016-07-07 14:51:23 -0400
commit1fbb2e929902ab6e161ebcfb2f4d6de1c4613473 (patch)
tree12738147113fb6731365826c1bd5e80d1cdf4079
parent8d76001e0058aca129f599810d4f60e36fb36c5b (diff)
drm/amdgpu: use a fence array for VMID management
Just wait for any fence to become available, instead of waiting for the last entry of the LRU. Acked-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Acked-by: Chunming Zhou <david1.zhou@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c69
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c155
4 files changed, 117 insertions, 119 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 922a20c972b1..c13023dded8f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -597,10 +597,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
597 struct amdgpu_sync *sync, 597 struct amdgpu_sync *sync,
598 struct reservation_object *resv, 598 struct reservation_object *resv,
599 void *owner); 599 void *owner);
600bool amdgpu_sync_is_idle(struct amdgpu_sync *sync, 600struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
601 struct amdgpu_ring *ring); 601 struct amdgpu_ring *ring);
602int amdgpu_sync_cycle_fences(struct amdgpu_sync *dst, struct amdgpu_sync *src,
603 struct fence *fence);
604struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); 602struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
605void amdgpu_sync_free(struct amdgpu_sync *sync); 603void amdgpu_sync_free(struct amdgpu_sync *sync);
606int amdgpu_sync_init(void); 604int amdgpu_sync_init(void);
@@ -909,6 +907,10 @@ struct amdgpu_vm_manager {
909 struct list_head ids_lru; 907 struct list_head ids_lru;
910 struct amdgpu_vm_id ids[AMDGPU_NUM_VM]; 908 struct amdgpu_vm_id ids[AMDGPU_NUM_VM];
911 909
910 /* Handling of VM fences */
911 u64 fence_context;
912 unsigned seqno[AMDGPU_MAX_RINGS];
913
912 uint32_t max_pfn; 914 uint32_t max_pfn;
913 /* vram base address for page table entry */ 915 /* vram base address for page table entry */
914 u64 vram_base_offset; 916 u64 vram_base_offset;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index e395bbebb3ad..b50a8450fcae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -166,7 +166,7 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
166 } 166 }
167 job = to_amdgpu_job(sched_job); 167 job = to_amdgpu_job(sched_job);
168 168
169 BUG_ON(!amdgpu_sync_is_idle(&job->sync, NULL)); 169 BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));
170 170
171 trace_amdgpu_sched_run_job(job); 171 trace_amdgpu_sched_run_job(job);
172 r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, 172 r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index a2766d72b2da..5c8d3022fb87 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -223,16 +223,16 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
223} 223}
224 224
225/** 225/**
226 * amdgpu_sync_is_idle - test if all fences are signaled 226 * amdgpu_sync_peek_fence - get the next fence not signaled yet
227 * 227 *
228 * @sync: the sync object 228 * @sync: the sync object
229 * @ring: optional ring to use for test 229 * @ring: optional ring to use for test
230 * 230 *
231 * Returns true if all fences in the sync object are signaled or scheduled to 231 * Returns the next fence not signaled yet without removing it from the sync
232 * the ring (if provided). 232 * object.
233 */ 233 */
234bool amdgpu_sync_is_idle(struct amdgpu_sync *sync, 234struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
235 struct amdgpu_ring *ring) 235 struct amdgpu_ring *ring)
236{ 236{
237 struct amdgpu_sync_entry *e; 237 struct amdgpu_sync_entry *e;
238 struct hlist_node *tmp; 238 struct hlist_node *tmp;
@@ -246,68 +246,25 @@ bool amdgpu_sync_is_idle(struct amdgpu_sync *sync,
246 /* For fences from the same ring it is sufficient 246 /* For fences from the same ring it is sufficient
247 * when they are scheduled. 247 * when they are scheduled.
248 */ 248 */
249 if (s_fence->sched == &ring->sched && 249 if (s_fence->sched == &ring->sched) {
250 fence_is_signaled(&s_fence->scheduled)) 250 if (fence_is_signaled(&s_fence->scheduled))
251 continue; 251 continue;
252 }
253 252
254 if (fence_is_signaled(f)) { 253 return &s_fence->scheduled;
255 hash_del(&e->node); 254 }
256 fence_put(f);
257 kmem_cache_free(amdgpu_sync_slab, e);
258 continue;
259 } 255 }
260 256
261 return false;
262 }
263
264 return true;
265}
266
267/**
268 * amdgpu_sync_cycle_fences - move fences from one sync object into another
269 *
270 * @dst: the destination sync object
271 * @src: the source sync object
272 * @fence: fence to add to source
273 *
274 * Remove all fences from source and put them into destination and add
275 * fence as new one into source.
276 */
277int amdgpu_sync_cycle_fences(struct amdgpu_sync *dst, struct amdgpu_sync *src,
278 struct fence *fence)
279{
280 struct amdgpu_sync_entry *e, *newone;
281 struct hlist_node *tmp;
282 int i;
283
284 /* Allocate the new entry before moving the old ones */
285 newone = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
286 if (!newone)
287 return -ENOMEM;
288
289 hash_for_each_safe(src->fences, i, tmp, e, node) {
290 struct fence *f = e->fence;
291
292 hash_del(&e->node);
293 if (fence_is_signaled(f)) { 257 if (fence_is_signaled(f)) {
258 hash_del(&e->node);
294 fence_put(f); 259 fence_put(f);
295 kmem_cache_free(amdgpu_sync_slab, e); 260 kmem_cache_free(amdgpu_sync_slab, e);
296 continue; 261 continue;
297 } 262 }
298 263
299 if (amdgpu_sync_add_later(dst, f)) { 264 return f;
300 kmem_cache_free(amdgpu_sync_slab, e);
301 continue;
302 }
303
304 hash_add(dst->fences, &e->node, f->context);
305 } 265 }
306 266
307 hash_add(src->fences, &newone->node, fence->context); 267 return NULL;
308 newone->fence = fence_get(fence);
309
310 return 0;
311} 268}
312 269
313/** 270/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f38978d8834c..06eb60950cef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -25,6 +25,7 @@
25 * Alex Deucher 25 * Alex Deucher
26 * Jerome Glisse 26 * Jerome Glisse
27 */ 27 */
28#include <linux/fence-array.h>
28#include <drm/drmP.h> 29#include <drm/drmP.h>
29#include <drm/amdgpu_drm.h> 30#include <drm/amdgpu_drm.h>
30#include "amdgpu.h" 31#include "amdgpu.h"
@@ -180,82 +181,116 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
180 struct amdgpu_device *adev = ring->adev; 181 struct amdgpu_device *adev = ring->adev;
181 struct fence *updates = sync->last_vm_update; 182 struct fence *updates = sync->last_vm_update;
182 struct amdgpu_vm_id *id, *idle; 183 struct amdgpu_vm_id *id, *idle;
183 unsigned i = ring->idx; 184 struct fence **fences;
184 int r; 185 unsigned i;
186 int r = 0;
187
188 fences = kmalloc_array(sizeof(void *), adev->vm_manager.num_ids,
189 GFP_KERNEL);
190 if (!fences)
191 return -ENOMEM;
185 192
186 mutex_lock(&adev->vm_manager.lock); 193 mutex_lock(&adev->vm_manager.lock);
187 194
188 /* Check if we have an idle VMID */ 195 /* Check if we have an idle VMID */
196 i = 0;
189 list_for_each_entry(idle, &adev->vm_manager.ids_lru, list) { 197 list_for_each_entry(idle, &adev->vm_manager.ids_lru, list) {
190 if (amdgpu_sync_is_idle(&idle->active, ring)) 198 fences[i] = amdgpu_sync_peek_fence(&idle->active, ring);
199 if (!fences[i])
191 break; 200 break;
192 201 ++i;
193 } 202 }
194 203
195 /* If we can't find a idle VMID to use, just wait for the oldest */ 204 /* If we can't find a idle VMID to use, wait till one becomes available */
196 if (&idle->list == &adev->vm_manager.ids_lru) { 205 if (&idle->list == &adev->vm_manager.ids_lru) {
197 id = list_first_entry(&adev->vm_manager.ids_lru, 206 u64 fence_context = adev->vm_manager.fence_context + ring->idx;
198 struct amdgpu_vm_id, 207 unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
199 list); 208 struct fence_array *array;
200 } else { 209 unsigned j;
201 /* Check if we can use a VMID already assigned to this VM */ 210
202 do { 211 for (j = 0; j < i; ++j)
203 struct fence *flushed; 212 fence_get(fences[j]);
204 213
205 id = vm->ids[i++]; 214 array = fence_array_create(i, fences, fence_context,
206 if (i == AMDGPU_MAX_RINGS) 215 seqno, true);
207 i = 0; 216 if (!array) {
208 217 for (j = 0; j < i; ++j)
209 /* Check all the prerequisites to using this VMID */ 218 fence_put(fences[j]);
210 if (!id) 219 kfree(fences);
211 continue; 220 r = -ENOMEM;
212 221 goto error;
213 if (atomic64_read(&id->owner) != vm->client_id) 222 }
214 continue; 223
215 224
216 if (pd_addr != id->pd_gpu_addr) 225 r = amdgpu_sync_fence(ring->adev, sync, &array->base);
217 continue; 226 fence_put(&array->base);
218 227 if (r)
219 if (id->last_user != ring && (!id->last_flush || 228 goto error;
220 !fence_is_signaled(id->last_flush))) 229
221 continue; 230 mutex_unlock(&adev->vm_manager.lock);
222 231 return 0;
223 flushed = id->flushed_updates; 232
224 if (updates && (!flushed || 233 }
225 fence_is_later(updates, flushed))) 234 kfree(fences);
226 continue; 235
227 236 /* Check if we can use a VMID already assigned to this VM */
228 /* Good we can use this VMID */ 237 i = ring->idx;
229 if (id->last_user == ring) { 238 do {
230 r = amdgpu_sync_fence(ring->adev, sync, 239 struct fence *flushed;
231 id->first); 240
232 if (r) 241 id = vm->ids[i++];
233 goto error; 242 if (i == AMDGPU_MAX_RINGS)
234 } 243 i = 0;
235 244
236 /* And remember this submission as user of the VMID */ 245 /* Check all the prerequisites to using this VMID */
237 r = amdgpu_sync_fence(ring->adev, &id->active, fence); 246 if (!id)
247 continue;
248
249 if (atomic64_read(&id->owner) != vm->client_id)
250 continue;
251
252 if (pd_addr != id->pd_gpu_addr)
253 continue;
254
255 if (id->last_user != ring &&
256 (!id->last_flush || !fence_is_signaled(id->last_flush)))
257 continue;
258
259 flushed = id->flushed_updates;
260 if (updates &&
261 (!flushed || fence_is_later(updates, flushed)))
262 continue;
263
264 /* Good we can use this VMID */
265 if (id->last_user == ring) {
266 r = amdgpu_sync_fence(ring->adev, sync,
267 id->first);
238 if (r) 268 if (r)
239 goto error; 269 goto error;
270 }
271
272 /* And remember this submission as user of the VMID */
273 r = amdgpu_sync_fence(ring->adev, &id->active, fence);
274 if (r)
275 goto error;
240 276
241 list_move_tail(&id->list, &adev->vm_manager.ids_lru); 277 list_move_tail(&id->list, &adev->vm_manager.ids_lru);
242 vm->ids[ring->idx] = id; 278 vm->ids[ring->idx] = id;
243 279
244 *vm_id = id - adev->vm_manager.ids; 280 *vm_id = id - adev->vm_manager.ids;
245 *vm_pd_addr = AMDGPU_VM_NO_FLUSH; 281 *vm_pd_addr = AMDGPU_VM_NO_FLUSH;
246 trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, 282 trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
247 *vm_pd_addr);
248 283
249 mutex_unlock(&adev->vm_manager.lock); 284 mutex_unlock(&adev->vm_manager.lock);
250 return 0; 285 return 0;
251 286
252 } while (i != ring->idx); 287 } while (i != ring->idx);
253 288
254 /* Still no ID to use? Then use the idle one found earlier */ 289 /* Still no ID to use? Then use the idle one found earlier */
255 id = idle; 290 id = idle;
256 }
257 291
258 r = amdgpu_sync_cycle_fences(sync, &id->active, fence); 292 /* Remember this submission as user of the VMID */
293 r = amdgpu_sync_fence(ring->adev, &id->active, fence);
259 if (r) 294 if (r)
260 goto error; 295 goto error;
261 296
@@ -1515,6 +1550,10 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
1515 &adev->vm_manager.ids_lru); 1550 &adev->vm_manager.ids_lru);
1516 } 1551 }
1517 1552
1553 adev->vm_manager.fence_context = fence_context_alloc(AMDGPU_MAX_RINGS);
1554 for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
1555 adev->vm_manager.seqno[i] = 0;
1556
1518 atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); 1557 atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
1519 atomic64_set(&adev->vm_manager.client_counter, 0); 1558 atomic64_set(&adev->vm_manager.client_counter, 0);
1520} 1559}