aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2018-08-21 05:11:36 -0400
committerAlex Deucher <alexander.deucher@amd.com>2018-08-27 16:10:07 -0400
commit7876fa4f55fda4a57348832f4a668279ed2b2fc4 (patch)
tree3a2c8d39c5a86fff4f5f71906b26c4aa69be7a6c /drivers/gpu/drm/amd/amdgpu
parent07e6d3f03b5ff7b93af37932ee0f4b775812274f (diff)
drm/amdgpu: add ring soft recovery v4
Instead of hammering hard on the GPU try a soft recovery first. v2: reorder code a bit v3: increase timeout to 10ms, increment GPU reset counter v4: squash in compile fix (Christian) Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Huang Rui <ray.huang@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h4
3 files changed, 35 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index facc0f08d804..34e54d41f5ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -33,6 +33,12 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
33 struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); 33 struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
34 struct amdgpu_job *job = to_amdgpu_job(s_job); 34 struct amdgpu_job *job = to_amdgpu_job(s_job);
35 35
36 if (amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
37 DRM_ERROR("ring %s timeout, but soft recovered\n",
38 s_job->sched->name);
39 return;
40 }
41
36 DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n", 42 DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n",
37 job->base.sched->name, atomic_read(&ring->fence_drv.last_seq), 43 job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
38 ring->fence_drv.sync_seq); 44 ring->fence_drv.sync_seq);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 5dfd26be1eec..b70e85ec147d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -383,6 +383,31 @@ void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
383 amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); 383 amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
384} 384}
385 385
386/**
387 * amdgpu_ring_soft_recovery - try to soft recover a ring lockup
388 *
389 * @ring: ring to try the recovery on
390 * @vmid: VMID we try to get going again
391 * @fence: timedout fence
392 *
393 * Tries to get a ring proceeding again when it is stuck.
394 */
395bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
396 struct dma_fence *fence)
397{
398 ktime_t deadline = ktime_add_us(ktime_get(), 10000);
399
400 if (!ring->funcs->soft_recovery)
401 return false;
402
403 atomic_inc(&ring->adev->gpu_reset_counter);
404 while (!dma_fence_is_signaled(fence) &&
405 ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0)
406 ring->funcs->soft_recovery(ring, vmid);
407
408 return dma_fence_is_signaled(fence);
409}
410
386/* 411/*
387 * Debugfs info 412 * Debugfs info
388 */ 413 */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 409fdd9b9710..9cc239968e40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -168,6 +168,8 @@ struct amdgpu_ring_funcs {
168 /* priority functions */ 168 /* priority functions */
169 void (*set_priority) (struct amdgpu_ring *ring, 169 void (*set_priority) (struct amdgpu_ring *ring,
170 enum drm_sched_priority priority); 170 enum drm_sched_priority priority);
171 /* Try to soft recover the ring to make the fence signal */
172 void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
171}; 173};
172 174
173struct amdgpu_ring { 175struct amdgpu_ring {
@@ -260,6 +262,8 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring);
260void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, 262void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
261 uint32_t reg0, uint32_t val0, 263 uint32_t reg0, uint32_t val0,
262 uint32_t reg1, uint32_t val1); 264 uint32_t reg1, uint32_t val1);
265bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
266 struct dma_fence *fence);
263 267
264static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) 268static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
265{ 269{