aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd
diff options
context:
space:
mode:
authorJunwei Zhang <Jerry.Zhang@amd.com>2015-10-09 20:48:42 -0400
committerAlex Deucher <alexander.deucher@amd.com>2015-10-14 16:16:42 -0400
commit2440ff2c9151120c8ae27de6565b11831ee07e08 (patch)
tree2e36ced49bf5a0441ac7ba4c4d573c0c065bc358 /drivers/gpu/drm/amd
parentd6c10f6b81e340e7d2d337183517a6da739d5ba2 (diff)
drm/amdgpu: add timer to fence to detect scheduler lockup
Change-Id: I67e987db0efdca28faa80b332b75571192130d33 Signed-off-by: Junwei Zhang <Jerry.Zhang@amd.com> Reviewed-by: David Zhou <david1.zhou@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c14
-rw-r--r--drivers/gpu/drm/amd/scheduler/gpu_scheduler.c43
-rw-r--r--drivers/gpu/drm/amd/scheduler/gpu_scheduler.h7
3 files changed, 61 insertions, 3 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index b3fc26c59787..fcad7e060938 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -628,8 +628,20 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
628 init_waitqueue_head(&ring->fence_drv.fence_queue); 628 init_waitqueue_head(&ring->fence_drv.fence_queue);
629 629
630 if (amdgpu_enable_scheduler) { 630 if (amdgpu_enable_scheduler) {
631 long timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
632 if (timeout == 0) {
633 /*
634 * FIXME:
635 * Delayed workqueue cannot use it directly,
636 * so the scheduler will not use delayed workqueue if
637 * MAX_SCHEDULE_TIMEOUT is set.
638 * Currently keep it simple and silly.
639 */
640 timeout = MAX_SCHEDULE_TIMEOUT;
641 }
631 r = amd_sched_init(&ring->sched, &amdgpu_sched_ops, 642 r = amd_sched_init(&ring->sched, &amdgpu_sched_ops,
632 amdgpu_sched_hw_submission, ring->name); 643 amdgpu_sched_hw_submission,
644 timeout, ring->name);
633 if (r) { 645 if (r) {
634 DRM_ERROR("Failed to create scheduler on ring %s.\n", 646 DRM_ERROR("Failed to create scheduler on ring %s.\n",
635 ring->name); 647 ring->name);
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 3697eeeecf82..a413dee7cd19 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -327,19 +327,49 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
327 struct amd_sched_fence *s_fence = 327 struct amd_sched_fence *s_fence =
328 container_of(cb, struct amd_sched_fence, cb); 328 container_of(cb, struct amd_sched_fence, cb);
329 struct amd_gpu_scheduler *sched = s_fence->sched; 329 struct amd_gpu_scheduler *sched = s_fence->sched;
330 unsigned long flags;
330 331
331 atomic_dec(&sched->hw_rq_count); 332 atomic_dec(&sched->hw_rq_count);
332 amd_sched_fence_signal(s_fence); 333 amd_sched_fence_signal(s_fence);
334 if (sched->timeout != MAX_SCHEDULE_TIMEOUT) {
335 cancel_delayed_work_sync(&s_fence->dwork);
336 spin_lock_irqsave(&sched->fence_list_lock, flags);
337 list_del_init(&s_fence->list);
338 spin_unlock_irqrestore(&sched->fence_list_lock, flags);
339 }
333 fence_put(&s_fence->base); 340 fence_put(&s_fence->base);
334 wake_up_interruptible(&sched->wake_up_worker); 341 wake_up_interruptible(&sched->wake_up_worker);
335} 342}
336 343
344static void amd_sched_fence_work_func(struct work_struct *work)
345{
346 struct amd_sched_fence *s_fence =
347 container_of(work, struct amd_sched_fence, dwork.work);
348 struct amd_gpu_scheduler *sched = s_fence->sched;
349 struct amd_sched_fence *entity, *tmp;
350 unsigned long flags;
351
352 DRM_ERROR("[%s] scheduler is timeout!\n", sched->name);
353
354 /* Clean all pending fences */
355 list_for_each_entry_safe(entity, tmp, &sched->fence_list, list) {
356 DRM_ERROR(" fence no %d\n", entity->base.seqno);
357 cancel_delayed_work_sync(&entity->dwork);
358 spin_lock_irqsave(&sched->fence_list_lock, flags);
359 list_del_init(&entity->list);
360 spin_unlock_irqrestore(&sched->fence_list_lock, flags);
361 fence_put(&entity->base);
362 }
363}
364
337static int amd_sched_main(void *param) 365static int amd_sched_main(void *param)
338{ 366{
339 struct sched_param sparam = {.sched_priority = 1}; 367 struct sched_param sparam = {.sched_priority = 1};
340 struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param; 368 struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param;
341 int r, count; 369 int r, count;
342 370
371 spin_lock_init(&sched->fence_list_lock);
372 INIT_LIST_HEAD(&sched->fence_list);
343 sched_setscheduler(current, SCHED_FIFO, &sparam); 373 sched_setscheduler(current, SCHED_FIFO, &sparam);
344 374
345 while (!kthread_should_stop()) { 375 while (!kthread_should_stop()) {
@@ -347,6 +377,7 @@ static int amd_sched_main(void *param)
347 struct amd_sched_fence *s_fence; 377 struct amd_sched_fence *s_fence;
348 struct amd_sched_job *sched_job; 378 struct amd_sched_job *sched_job;
349 struct fence *fence; 379 struct fence *fence;
380 unsigned long flags;
350 381
351 wait_event_interruptible(sched->wake_up_worker, 382 wait_event_interruptible(sched->wake_up_worker,
352 kthread_should_stop() || 383 kthread_should_stop() ||
@@ -357,6 +388,15 @@ static int amd_sched_main(void *param)
357 388
358 entity = sched_job->s_entity; 389 entity = sched_job->s_entity;
359 s_fence = sched_job->s_fence; 390 s_fence = sched_job->s_fence;
391
392 if (sched->timeout != MAX_SCHEDULE_TIMEOUT) {
393 INIT_DELAYED_WORK(&s_fence->dwork, amd_sched_fence_work_func);
394 schedule_delayed_work(&s_fence->dwork, sched->timeout);
395 spin_lock_irqsave(&sched->fence_list_lock, flags);
396 list_add_tail(&s_fence->list, &sched->fence_list);
397 spin_unlock_irqrestore(&sched->fence_list_lock, flags);
398 }
399
360 atomic_inc(&sched->hw_rq_count); 400 atomic_inc(&sched->hw_rq_count);
361 fence = sched->ops->run_job(sched_job); 401 fence = sched->ops->run_job(sched_job);
362 if (fence) { 402 if (fence) {
@@ -392,11 +432,12 @@ static int amd_sched_main(void *param)
392*/ 432*/
393int amd_sched_init(struct amd_gpu_scheduler *sched, 433int amd_sched_init(struct amd_gpu_scheduler *sched,
394 struct amd_sched_backend_ops *ops, 434 struct amd_sched_backend_ops *ops,
395 unsigned hw_submission, const char *name) 435 unsigned hw_submission, long timeout, const char *name)
396{ 436{
397 sched->ops = ops; 437 sched->ops = ops;
398 sched->hw_submission_limit = hw_submission; 438 sched->hw_submission_limit = hw_submission;
399 sched->name = name; 439 sched->name = name;
440 sched->timeout = timeout;
400 amd_sched_rq_init(&sched->sched_rq); 441 amd_sched_rq_init(&sched->sched_rq);
401 amd_sched_rq_init(&sched->kernel_rq); 442 amd_sched_rq_init(&sched->kernel_rq);
402 443
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index 80b64dc22214..929e9aced041 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -68,6 +68,8 @@ struct amd_sched_fence {
68 struct amd_gpu_scheduler *sched; 68 struct amd_gpu_scheduler *sched;
69 spinlock_t lock; 69 spinlock_t lock;
70 void *owner; 70 void *owner;
71 struct delayed_work dwork;
72 struct list_head list;
71}; 73};
72 74
73struct amd_sched_job { 75struct amd_sched_job {
@@ -103,18 +105,21 @@ struct amd_sched_backend_ops {
103struct amd_gpu_scheduler { 105struct amd_gpu_scheduler {
104 struct amd_sched_backend_ops *ops; 106 struct amd_sched_backend_ops *ops;
105 uint32_t hw_submission_limit; 107 uint32_t hw_submission_limit;
108 long timeout;
106 const char *name; 109 const char *name;
107 struct amd_sched_rq sched_rq; 110 struct amd_sched_rq sched_rq;
108 struct amd_sched_rq kernel_rq; 111 struct amd_sched_rq kernel_rq;
109 wait_queue_head_t wake_up_worker; 112 wait_queue_head_t wake_up_worker;
110 wait_queue_head_t job_scheduled; 113 wait_queue_head_t job_scheduled;
111 atomic_t hw_rq_count; 114 atomic_t hw_rq_count;
115 struct list_head fence_list;
116 spinlock_t fence_list_lock;
112 struct task_struct *thread; 117 struct task_struct *thread;
113}; 118};
114 119
115int amd_sched_init(struct amd_gpu_scheduler *sched, 120int amd_sched_init(struct amd_gpu_scheduler *sched,
116 struct amd_sched_backend_ops *ops, 121 struct amd_sched_backend_ops *ops,
117 uint32_t hw_submission, const char *name); 122 uint32_t hw_submission, long timeout, const char *name);
118void amd_sched_fini(struct amd_gpu_scheduler *sched); 123void amd_sched_fini(struct amd_gpu_scheduler *sched);
119 124
120int amd_sched_entity_init(struct amd_gpu_scheduler *sched, 125int amd_sched_entity_init(struct amd_gpu_scheduler *sched,