diff options
| author | Junwei Zhang <Jerry.Zhang@amd.com> | 2015-10-09 20:48:42 -0400 |
|---|---|---|
| committer | Alex Deucher <alexander.deucher@amd.com> | 2015-10-14 16:16:42 -0400 |
| commit | 2440ff2c9151120c8ae27de6565b11831ee07e08 (patch) | |
| tree | 2e36ced49bf5a0441ac7ba4c4d573c0c065bc358 /drivers/gpu/drm/amd | |
| parent | d6c10f6b81e340e7d2d337183517a6da739d5ba2 (diff) | |
drm/amdgpu: add timer to fence to detect scheduler lockup
Change-Id: I67e987db0efdca28faa80b332b75571192130d33
Signed-off-by: Junwei Zhang <Jerry.Zhang@amd.com>
Reviewed-by: David Zhou <david1.zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 14 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 43 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 7 |
3 files changed, 61 insertions, 3 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index b3fc26c59787..fcad7e060938 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | |||
| @@ -628,8 +628,20 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) | |||
| 628 | init_waitqueue_head(&ring->fence_drv.fence_queue); | 628 | init_waitqueue_head(&ring->fence_drv.fence_queue); |
| 629 | 629 | ||
| 630 | if (amdgpu_enable_scheduler) { | 630 | if (amdgpu_enable_scheduler) { |
| 631 | long timeout = msecs_to_jiffies(amdgpu_lockup_timeout); | ||
| 632 | if (timeout == 0) { | ||
| 633 | /* | ||
| 634 | * FIXME: | ||
| 635 | * Delayed workqueue cannot use it directly, | ||
| 636 | * so the scheduler will not use delayed workqueue if | ||
| 637 | * MAX_SCHEDULE_TIMEOUT is set. | ||
| 638 | * Currently keep it simple and silly. | ||
| 639 | */ | ||
| 640 | timeout = MAX_SCHEDULE_TIMEOUT; | ||
| 641 | } | ||
| 631 | r = amd_sched_init(&ring->sched, &amdgpu_sched_ops, | 642 | r = amd_sched_init(&ring->sched, &amdgpu_sched_ops, |
| 632 | amdgpu_sched_hw_submission, ring->name); | 643 | amdgpu_sched_hw_submission, |
| 644 | timeout, ring->name); | ||
| 633 | if (r) { | 645 | if (r) { |
| 634 | DRM_ERROR("Failed to create scheduler on ring %s.\n", | 646 | DRM_ERROR("Failed to create scheduler on ring %s.\n", |
| 635 | ring->name); | 647 | ring->name); |
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 3697eeeecf82..a413dee7cd19 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | |||
| @@ -327,19 +327,49 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) | |||
| 327 | struct amd_sched_fence *s_fence = | 327 | struct amd_sched_fence *s_fence = |
| 328 | container_of(cb, struct amd_sched_fence, cb); | 328 | container_of(cb, struct amd_sched_fence, cb); |
| 329 | struct amd_gpu_scheduler *sched = s_fence->sched; | 329 | struct amd_gpu_scheduler *sched = s_fence->sched; |
| 330 | unsigned long flags; | ||
| 330 | 331 | ||
| 331 | atomic_dec(&sched->hw_rq_count); | 332 | atomic_dec(&sched->hw_rq_count); |
| 332 | amd_sched_fence_signal(s_fence); | 333 | amd_sched_fence_signal(s_fence); |
| 334 | if (sched->timeout != MAX_SCHEDULE_TIMEOUT) { | ||
| 335 | cancel_delayed_work_sync(&s_fence->dwork); | ||
| 336 | spin_lock_irqsave(&sched->fence_list_lock, flags); | ||
| 337 | list_del_init(&s_fence->list); | ||
| 338 | spin_unlock_irqrestore(&sched->fence_list_lock, flags); | ||
| 339 | } | ||
| 333 | fence_put(&s_fence->base); | 340 | fence_put(&s_fence->base); |
| 334 | wake_up_interruptible(&sched->wake_up_worker); | 341 | wake_up_interruptible(&sched->wake_up_worker); |
| 335 | } | 342 | } |
| 336 | 343 | ||
| 344 | static void amd_sched_fence_work_func(struct work_struct *work) | ||
| 345 | { | ||
| 346 | struct amd_sched_fence *s_fence = | ||
| 347 | container_of(work, struct amd_sched_fence, dwork.work); | ||
| 348 | struct amd_gpu_scheduler *sched = s_fence->sched; | ||
| 349 | struct amd_sched_fence *entity, *tmp; | ||
| 350 | unsigned long flags; | ||
| 351 | |||
| 352 | DRM_ERROR("[%s] scheduler is timeout!\n", sched->name); | ||
| 353 | |||
| 354 | /* Clean all pending fences */ | ||
| 355 | list_for_each_entry_safe(entity, tmp, &sched->fence_list, list) { | ||
| 356 | DRM_ERROR(" fence no %d\n", entity->base.seqno); | ||
| 357 | cancel_delayed_work_sync(&entity->dwork); | ||
| 358 | spin_lock_irqsave(&sched->fence_list_lock, flags); | ||
| 359 | list_del_init(&entity->list); | ||
| 360 | spin_unlock_irqrestore(&sched->fence_list_lock, flags); | ||
| 361 | fence_put(&entity->base); | ||
| 362 | } | ||
| 363 | } | ||
| 364 | |||
| 337 | static int amd_sched_main(void *param) | 365 | static int amd_sched_main(void *param) |
| 338 | { | 366 | { |
| 339 | struct sched_param sparam = {.sched_priority = 1}; | 367 | struct sched_param sparam = {.sched_priority = 1}; |
| 340 | struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param; | 368 | struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param; |
| 341 | int r, count; | 369 | int r, count; |
| 342 | 370 | ||
| 371 | spin_lock_init(&sched->fence_list_lock); | ||
| 372 | INIT_LIST_HEAD(&sched->fence_list); | ||
| 343 | sched_setscheduler(current, SCHED_FIFO, &sparam); | 373 | sched_setscheduler(current, SCHED_FIFO, &sparam); |
| 344 | 374 | ||
| 345 | while (!kthread_should_stop()) { | 375 | while (!kthread_should_stop()) { |
| @@ -347,6 +377,7 @@ static int amd_sched_main(void *param) | |||
| 347 | struct amd_sched_fence *s_fence; | 377 | struct amd_sched_fence *s_fence; |
| 348 | struct amd_sched_job *sched_job; | 378 | struct amd_sched_job *sched_job; |
| 349 | struct fence *fence; | 379 | struct fence *fence; |
| 380 | unsigned long flags; | ||
| 350 | 381 | ||
| 351 | wait_event_interruptible(sched->wake_up_worker, | 382 | wait_event_interruptible(sched->wake_up_worker, |
| 352 | kthread_should_stop() || | 383 | kthread_should_stop() || |
| @@ -357,6 +388,15 @@ static int amd_sched_main(void *param) | |||
| 357 | 388 | ||
| 358 | entity = sched_job->s_entity; | 389 | entity = sched_job->s_entity; |
| 359 | s_fence = sched_job->s_fence; | 390 | s_fence = sched_job->s_fence; |
| 391 | |||
| 392 | if (sched->timeout != MAX_SCHEDULE_TIMEOUT) { | ||
| 393 | INIT_DELAYED_WORK(&s_fence->dwork, amd_sched_fence_work_func); | ||
| 394 | schedule_delayed_work(&s_fence->dwork, sched->timeout); | ||
| 395 | spin_lock_irqsave(&sched->fence_list_lock, flags); | ||
| 396 | list_add_tail(&s_fence->list, &sched->fence_list); | ||
| 397 | spin_unlock_irqrestore(&sched->fence_list_lock, flags); | ||
| 398 | } | ||
| 399 | |||
| 360 | atomic_inc(&sched->hw_rq_count); | 400 | atomic_inc(&sched->hw_rq_count); |
| 361 | fence = sched->ops->run_job(sched_job); | 401 | fence = sched->ops->run_job(sched_job); |
| 362 | if (fence) { | 402 | if (fence) { |
| @@ -392,11 +432,12 @@ static int amd_sched_main(void *param) | |||
| 392 | */ | 432 | */ |
| 393 | int amd_sched_init(struct amd_gpu_scheduler *sched, | 433 | int amd_sched_init(struct amd_gpu_scheduler *sched, |
| 394 | struct amd_sched_backend_ops *ops, | 434 | struct amd_sched_backend_ops *ops, |
| 395 | unsigned hw_submission, const char *name) | 435 | unsigned hw_submission, long timeout, const char *name) |
| 396 | { | 436 | { |
| 397 | sched->ops = ops; | 437 | sched->ops = ops; |
| 398 | sched->hw_submission_limit = hw_submission; | 438 | sched->hw_submission_limit = hw_submission; |
| 399 | sched->name = name; | 439 | sched->name = name; |
| 440 | sched->timeout = timeout; | ||
| 400 | amd_sched_rq_init(&sched->sched_rq); | 441 | amd_sched_rq_init(&sched->sched_rq); |
| 401 | amd_sched_rq_init(&sched->kernel_rq); | 442 | amd_sched_rq_init(&sched->kernel_rq); |
| 402 | 443 | ||
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 80b64dc22214..929e9aced041 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | |||
| @@ -68,6 +68,8 @@ struct amd_sched_fence { | |||
| 68 | struct amd_gpu_scheduler *sched; | 68 | struct amd_gpu_scheduler *sched; |
| 69 | spinlock_t lock; | 69 | spinlock_t lock; |
| 70 | void *owner; | 70 | void *owner; |
| 71 | struct delayed_work dwork; | ||
| 72 | struct list_head list; | ||
| 71 | }; | 73 | }; |
| 72 | 74 | ||
| 73 | struct amd_sched_job { | 75 | struct amd_sched_job { |
| @@ -103,18 +105,21 @@ struct amd_sched_backend_ops { | |||
| 103 | struct amd_gpu_scheduler { | 105 | struct amd_gpu_scheduler { |
| 104 | struct amd_sched_backend_ops *ops; | 106 | struct amd_sched_backend_ops *ops; |
| 105 | uint32_t hw_submission_limit; | 107 | uint32_t hw_submission_limit; |
| 108 | long timeout; | ||
| 106 | const char *name; | 109 | const char *name; |
| 107 | struct amd_sched_rq sched_rq; | 110 | struct amd_sched_rq sched_rq; |
| 108 | struct amd_sched_rq kernel_rq; | 111 | struct amd_sched_rq kernel_rq; |
| 109 | wait_queue_head_t wake_up_worker; | 112 | wait_queue_head_t wake_up_worker; |
| 110 | wait_queue_head_t job_scheduled; | 113 | wait_queue_head_t job_scheduled; |
| 111 | atomic_t hw_rq_count; | 114 | atomic_t hw_rq_count; |
| 115 | struct list_head fence_list; | ||
| 116 | spinlock_t fence_list_lock; | ||
| 112 | struct task_struct *thread; | 117 | struct task_struct *thread; |
| 113 | }; | 118 | }; |
| 114 | 119 | ||
| 115 | int amd_sched_init(struct amd_gpu_scheduler *sched, | 120 | int amd_sched_init(struct amd_gpu_scheduler *sched, |
| 116 | struct amd_sched_backend_ops *ops, | 121 | struct amd_sched_backend_ops *ops, |
| 117 | uint32_t hw_submission, const char *name); | 122 | uint32_t hw_submission, long timeout, const char *name); |
| 118 | void amd_sched_fini(struct amd_gpu_scheduler *sched); | 123 | void amd_sched_fini(struct amd_gpu_scheduler *sched); |
| 119 | 124 | ||
| 120 | int amd_sched_entity_init(struct amd_gpu_scheduler *sched, | 125 | int amd_sched_entity_init(struct amd_gpu_scheduler *sched, |
