aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRob Clark <robdclark@gmail.com>2015-06-07 13:46:04 -0400
committerRob Clark <robdclark@gmail.com>2015-06-11 13:11:06 -0400
commit1a370be9ac51129e40b0ed7fa71d2b2b92bc47e5 (patch)
tree678bad05754937920c7d09fb66dd5a356ed9f172
parent56c2da8338d5cdfc0695eeed96ebe03cf2ac0321 (diff)
drm/msm: restart queued submits after hang
Track the list of in-flight submits. If the gpu hangs, retire up to an including the offending submit, and then re-submit the remainder. This way, for concurrently running piglit tests (for example), one failing test doesn't cause unrelated tests to fail simply because it's submit was queued up after one that triggered a hang. Signed-off-by: Rob Clark <robdclark@gmail.com>
-rw-r--r--drivers/gpu/drm/msm/msm_gem.h1
-rw-r--r--drivers/gpu/drm/msm/msm_gem_submit.c1
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.c49
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.h2
4 files changed, 49 insertions, 4 deletions
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 85d481e29276..6fc59bfeedeb 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -96,6 +96,7 @@ static inline uint32_t msm_gem_fence(struct msm_gem_object *msm_obj,
96struct msm_gem_submit { 96struct msm_gem_submit {
97 struct drm_device *dev; 97 struct drm_device *dev;
98 struct msm_gpu *gpu; 98 struct msm_gpu *gpu;
99 struct list_head node; /* node in gpu submit_list */
99 struct list_head bo_list; 100 struct list_head bo_list;
100 struct ww_acquire_ctx ticket; 101 struct ww_acquire_ctx ticket;
101 uint32_t fence; 102 uint32_t fence;
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index cd0554f68316..6d7cd3fe21e7 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -314,7 +314,6 @@ static void submit_cleanup(struct msm_gem_submit *submit, bool fail)
314 } 314 }
315 315
316 ww_acquire_fini(&submit->ticket); 316 ww_acquire_fini(&submit->ticket);
317 kfree(submit);
318} 317}
319 318
320int msm_ioctl_gem_submit(struct drm_device *dev, void *data, 319int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 4016aef56c50..8f70d9248ac5 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -265,6 +265,8 @@ static void inactive_start(struct msm_gpu *gpu)
265 * Hangcheck detection for locked gpu: 265 * Hangcheck detection for locked gpu:
266 */ 266 */
267 267
268static void retire_submits(struct msm_gpu *gpu, uint32_t fence);
269
268static void recover_worker(struct work_struct *work) 270static void recover_worker(struct work_struct *work)
269{ 271{
270 struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work); 272 struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work);
@@ -274,8 +276,19 @@ static void recover_worker(struct work_struct *work)
274 276
275 mutex_lock(&dev->struct_mutex); 277 mutex_lock(&dev->struct_mutex);
276 if (msm_gpu_active(gpu)) { 278 if (msm_gpu_active(gpu)) {
279 struct msm_gem_submit *submit;
280 uint32_t fence = gpu->funcs->last_fence(gpu);
281
282 /* retire completed submits, plus the one that hung: */
283 retire_submits(gpu, fence + 1);
284
277 inactive_cancel(gpu); 285 inactive_cancel(gpu);
278 gpu->funcs->recover(gpu); 286 gpu->funcs->recover(gpu);
287
288 /* replay the remaining submits after the one that hung: */
289 list_for_each_entry(submit, &gpu->submit_list, node) {
290 gpu->funcs->submit(gpu, submit, NULL);
291 }
279 } 292 }
280 mutex_unlock(&dev->struct_mutex); 293 mutex_unlock(&dev->struct_mutex);
281 294
@@ -418,6 +431,27 @@ out:
418 * Cmdstream submission/retirement: 431 * Cmdstream submission/retirement:
419 */ 432 */
420 433
434static void retire_submits(struct msm_gpu *gpu, uint32_t fence)
435{
436 struct drm_device *dev = gpu->dev;
437
438 WARN_ON(!mutex_is_locked(&dev->struct_mutex));
439
440 while (!list_empty(&gpu->submit_list)) {
441 struct msm_gem_submit *submit;
442
443 submit = list_first_entry(&gpu->submit_list,
444 struct msm_gem_submit, node);
445
446 if (submit->fence <= fence) {
447 list_del(&submit->node);
448 kfree(submit);
449 } else {
450 break;
451 }
452 }
453}
454
421static void retire_worker(struct work_struct *work) 455static void retire_worker(struct work_struct *work)
422{ 456{
423 struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); 457 struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work);
@@ -428,6 +462,8 @@ static void retire_worker(struct work_struct *work)
428 462
429 mutex_lock(&dev->struct_mutex); 463 mutex_lock(&dev->struct_mutex);
430 464
465 retire_submits(gpu, fence);
466
431 while (!list_empty(&gpu->active_list)) { 467 while (!list_empty(&gpu->active_list)) {
432 struct msm_gem_object *obj; 468 struct msm_gem_object *obj;
433 469
@@ -467,21 +503,22 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
467 struct msm_drm_private *priv = dev->dev_private; 503 struct msm_drm_private *priv = dev->dev_private;
468 int i, ret; 504 int i, ret;
469 505
506 WARN_ON(!mutex_is_locked(&dev->struct_mutex));
507
470 submit->fence = ++priv->next_fence; 508 submit->fence = ++priv->next_fence;
471 509
472 gpu->submitted_fence = submit->fence; 510 gpu->submitted_fence = submit->fence;
473 511
474 inactive_cancel(gpu); 512 inactive_cancel(gpu);
475 513
514 list_add_tail(&submit->node, &gpu->submit_list);
515
476 msm_rd_dump_submit(submit); 516 msm_rd_dump_submit(submit);
477 517
478 gpu->submitted_fence = submit->fence; 518 gpu->submitted_fence = submit->fence;
479 519
480 update_sw_cntrs(gpu); 520 update_sw_cntrs(gpu);
481 521
482 ret = gpu->funcs->submit(gpu, submit, ctx);
483 priv->lastctx = ctx;
484
485 for (i = 0; i < submit->nr_bos; i++) { 522 for (i = 0; i < submit->nr_bos; i++) {
486 struct msm_gem_object *msm_obj = submit->bos[i].obj; 523 struct msm_gem_object *msm_obj = submit->bos[i].obj;
487 524
@@ -505,6 +542,10 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
505 if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) 542 if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE)
506 msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence); 543 msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence);
507 } 544 }
545
546 ret = gpu->funcs->submit(gpu, submit, ctx);
547 priv->lastctx = ctx;
548
508 hangcheck_timer_reset(gpu); 549 hangcheck_timer_reset(gpu);
509 550
510 return ret; 551 return ret;
@@ -545,6 +586,8 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
545 INIT_WORK(&gpu->inactive_work, inactive_worker); 586 INIT_WORK(&gpu->inactive_work, inactive_worker);
546 INIT_WORK(&gpu->recover_work, recover_worker); 587 INIT_WORK(&gpu->recover_work, recover_worker);
547 588
589 INIT_LIST_HEAD(&gpu->submit_list);
590
548 setup_timer(&gpu->inactive_timer, inactive_handler, 591 setup_timer(&gpu->inactive_timer, inactive_handler,
549 (unsigned long)gpu); 592 (unsigned long)gpu);
550 setup_timer(&gpu->hangcheck_timer, hangcheck_handler, 593 setup_timer(&gpu->hangcheck_timer, hangcheck_handler,
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 7b3ec21d5d77..2bbe85a3d6f6 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -119,6 +119,8 @@ struct msm_gpu {
119 struct timer_list hangcheck_timer; 119 struct timer_list hangcheck_timer;
120 uint32_t hangcheck_fence; 120 uint32_t hangcheck_fence;
121 struct work_struct recover_work; 121 struct work_struct recover_work;
122
123 struct list_head submit_list;
122}; 124};
123 125
124static inline bool msm_gpu_active(struct msm_gpu *gpu) 126static inline bool msm_gpu_active(struct msm_gpu *gpu)