aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gpu_error.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gpu_error.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c124
1 files changed, 88 insertions, 36 deletions
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 34ff2459ceea..9d73d2216adc 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -332,7 +332,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
332 const struct i915_error_state_file_priv *error_priv) 332 const struct i915_error_state_file_priv *error_priv)
333{ 333{
334 struct drm_device *dev = error_priv->dev; 334 struct drm_device *dev = error_priv->dev;
335 struct drm_i915_private *dev_priv = dev->dev_private; 335 struct drm_i915_private *dev_priv = to_i915(dev);
336 struct drm_i915_error_state *error = error_priv->error; 336 struct drm_i915_error_state *error = error_priv->error;
337 struct drm_i915_error_object *obj; 337 struct drm_i915_error_object *obj;
338 int i, j, offset, elt; 338 int i, j, offset, elt;
@@ -463,6 +463,18 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
463 } 463 }
464 } 464 }
465 465
466 if (error->ring[i].num_waiters) {
467 err_printf(m, "%s --- %d waiters\n",
468 dev_priv->engine[i].name,
469 error->ring[i].num_waiters);
470 for (j = 0; j < error->ring[i].num_waiters; j++) {
471 err_printf(m, " seqno 0x%08x for %s [%d]\n",
472 error->ring[i].waiters[j].seqno,
473 error->ring[i].waiters[j].comm,
474 error->ring[i].waiters[j].pid);
475 }
476 }
477
466 if ((obj = error->ring[i].ringbuffer)) { 478 if ((obj = error->ring[i].ringbuffer)) {
467 err_printf(m, "%s --- ringbuffer = 0x%08x\n", 479 err_printf(m, "%s --- ringbuffer = 0x%08x\n",
468 dev_priv->engine[i].name, 480 dev_priv->engine[i].name,
@@ -488,7 +500,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
488 hws_page[elt+1], 500 hws_page[elt+1],
489 hws_page[elt+2], 501 hws_page[elt+2],
490 hws_page[elt+3]); 502 hws_page[elt+3]);
491 offset += 16; 503 offset += 16;
492 } 504 }
493 } 505 }
494 506
@@ -605,8 +617,9 @@ static void i915_error_state_free(struct kref *error_ref)
605 i915_error_object_free(error->ring[i].ringbuffer); 617 i915_error_object_free(error->ring[i].ringbuffer);
606 i915_error_object_free(error->ring[i].hws_page); 618 i915_error_object_free(error->ring[i].hws_page);
607 i915_error_object_free(error->ring[i].ctx); 619 i915_error_object_free(error->ring[i].ctx);
608 kfree(error->ring[i].requests);
609 i915_error_object_free(error->ring[i].wa_ctx); 620 i915_error_object_free(error->ring[i].wa_ctx);
621 kfree(error->ring[i].requests);
622 kfree(error->ring[i].waiters);
610 } 623 }
611 624
612 i915_error_object_free(error->semaphore_obj); 625 i915_error_object_free(error->semaphore_obj);
@@ -892,6 +905,48 @@ static void gen6_record_semaphore_state(struct drm_i915_private *dev_priv,
892 } 905 }
893} 906}
894 907
908static void engine_record_waiters(struct intel_engine_cs *engine,
909 struct drm_i915_error_ring *ering)
910{
911 struct intel_breadcrumbs *b = &engine->breadcrumbs;
912 struct drm_i915_error_waiter *waiter;
913 struct rb_node *rb;
914 int count;
915
916 ering->num_waiters = 0;
917 ering->waiters = NULL;
918
919 spin_lock(&b->lock);
920 count = 0;
921 for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb))
922 count++;
923 spin_unlock(&b->lock);
924
925 waiter = NULL;
926 if (count)
927 waiter = kmalloc_array(count,
928 sizeof(struct drm_i915_error_waiter),
929 GFP_ATOMIC);
930 if (!waiter)
931 return;
932
933 ering->waiters = waiter;
934
935 spin_lock(&b->lock);
936 for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
937 struct intel_wait *w = container_of(rb, typeof(*w), node);
938
939 strcpy(waiter->comm, w->tsk->comm);
940 waiter->pid = w->tsk->pid;
941 waiter->seqno = w->seqno;
942 waiter++;
943
944 if (++ering->num_waiters == count)
945 break;
946 }
947 spin_unlock(&b->lock);
948}
949
895static void i915_record_ring_state(struct drm_i915_private *dev_priv, 950static void i915_record_ring_state(struct drm_i915_private *dev_priv,
896 struct drm_i915_error_state *error, 951 struct drm_i915_error_state *error,
897 struct intel_engine_cs *engine, 952 struct intel_engine_cs *engine,
@@ -926,10 +981,10 @@ static void i915_record_ring_state(struct drm_i915_private *dev_priv,
926 ering->instdone = I915_READ(GEN2_INSTDONE); 981 ering->instdone = I915_READ(GEN2_INSTDONE);
927 } 982 }
928 983
929 ering->waiting = waitqueue_active(&engine->irq_queue); 984 ering->waiting = intel_engine_has_waiter(engine);
930 ering->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); 985 ering->instpm = I915_READ(RING_INSTPM(engine->mmio_base));
931 ering->acthd = intel_ring_get_active_head(engine); 986 ering->acthd = intel_ring_get_active_head(engine);
932 ering->seqno = engine->get_seqno(engine); 987 ering->seqno = intel_engine_get_seqno(engine);
933 ering->last_seqno = engine->last_submitted_seqno; 988 ering->last_seqno = engine->last_submitted_seqno;
934 ering->start = I915_READ_START(engine); 989 ering->start = I915_READ_START(engine);
935 ering->head = I915_READ_HEAD(engine); 990 ering->head = I915_READ_HEAD(engine);
@@ -1022,7 +1077,6 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
1022 1077
1023 for (i = 0; i < I915_NUM_ENGINES; i++) { 1078 for (i = 0; i < I915_NUM_ENGINES; i++) {
1024 struct intel_engine_cs *engine = &dev_priv->engine[i]; 1079 struct intel_engine_cs *engine = &dev_priv->engine[i];
1025 struct intel_ringbuffer *rbuf;
1026 1080
1027 error->ring[i].pid = -1; 1081 error->ring[i].pid = -1;
1028 1082
@@ -1032,14 +1086,15 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
1032 error->ring[i].valid = true; 1086 error->ring[i].valid = true;
1033 1087
1034 i915_record_ring_state(dev_priv, error, engine, &error->ring[i]); 1088 i915_record_ring_state(dev_priv, error, engine, &error->ring[i]);
1089 engine_record_waiters(engine, &error->ring[i]);
1035 1090
1036 request = i915_gem_find_active_request(engine); 1091 request = i915_gem_find_active_request(engine);
1037 if (request) { 1092 if (request) {
1038 struct i915_address_space *vm; 1093 struct i915_address_space *vm;
1094 struct intel_ringbuffer *rb;
1039 1095
1040 vm = request->ctx && request->ctx->ppgtt ? 1096 vm = request->ctx->ppgtt ?
1041 &request->ctx->ppgtt->base : 1097 &request->ctx->ppgtt->base : &ggtt->base;
1042 &ggtt->base;
1043 1098
1044 /* We need to copy these to an anonymous buffer 1099 /* We need to copy these to an anonymous buffer
1045 * as the simplest method to avoid being overwritten 1100 * as the simplest method to avoid being overwritten
@@ -1066,26 +1121,17 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
1066 } 1121 }
1067 rcu_read_unlock(); 1122 rcu_read_unlock();
1068 } 1123 }
1069 }
1070
1071 if (i915.enable_execlists) {
1072 /* TODO: This is only a small fix to keep basic error
1073 * capture working, but we need to add more information
1074 * for it to be useful (e.g. dump the context being
1075 * executed).
1076 */
1077 if (request)
1078 rbuf = request->ctx->engine[engine->id].ringbuf;
1079 else
1080 rbuf = dev_priv->kernel_context->engine[engine->id].ringbuf;
1081 } else
1082 rbuf = engine->buffer;
1083 1124
1084 error->ring[i].cpu_ring_head = rbuf->head; 1125 error->simulated |=
1085 error->ring[i].cpu_ring_tail = rbuf->tail; 1126 request->ctx->flags & CONTEXT_NO_ERROR_CAPTURE;
1086 1127
1087 error->ring[i].ringbuffer = 1128 rb = request->ringbuf;
1088 i915_error_ggtt_object_create(dev_priv, rbuf->obj); 1129 error->ring[i].cpu_ring_head = rb->head;
1130 error->ring[i].cpu_ring_tail = rb->tail;
1131 error->ring[i].ringbuffer =
1132 i915_error_ggtt_object_create(dev_priv,
1133 rb->obj);
1134 }
1089 1135
1090 error->ring[i].hws_page = 1136 error->ring[i].hws_page =
1091 i915_error_ggtt_object_create(dev_priv, 1137 i915_error_ggtt_object_create(dev_priv,
@@ -1230,7 +1276,7 @@ static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
1230static void i915_capture_reg_state(struct drm_i915_private *dev_priv, 1276static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
1231 struct drm_i915_error_state *error) 1277 struct drm_i915_error_state *error)
1232{ 1278{
1233 struct drm_device *dev = dev_priv->dev; 1279 struct drm_device *dev = &dev_priv->drm;
1234 int i; 1280 int i;
1235 1281
1236 /* General organization 1282 /* General organization
@@ -1355,6 +1401,9 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
1355 struct drm_i915_error_state *error; 1401 struct drm_i915_error_state *error;
1356 unsigned long flags; 1402 unsigned long flags;
1357 1403
1404 if (READ_ONCE(dev_priv->gpu_error.first_error))
1405 return;
1406
1358 /* Account for pipe specific data like PIPE*STAT */ 1407 /* Account for pipe specific data like PIPE*STAT */
1359 error = kzalloc(sizeof(*error), GFP_ATOMIC); 1408 error = kzalloc(sizeof(*error), GFP_ATOMIC);
1360 if (!error) { 1409 if (!error) {
@@ -1378,12 +1427,14 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
1378 i915_error_capture_msg(dev_priv, error, engine_mask, error_msg); 1427 i915_error_capture_msg(dev_priv, error, engine_mask, error_msg);
1379 DRM_INFO("%s\n", error->error_msg); 1428 DRM_INFO("%s\n", error->error_msg);
1380 1429
1381 spin_lock_irqsave(&dev_priv->gpu_error.lock, flags); 1430 if (!error->simulated) {
1382 if (dev_priv->gpu_error.first_error == NULL) { 1431 spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
1383 dev_priv->gpu_error.first_error = error; 1432 if (!dev_priv->gpu_error.first_error) {
1384 error = NULL; 1433 dev_priv->gpu_error.first_error = error;
1434 error = NULL;
1435 }
1436 spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
1385 } 1437 }
1386 spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
1387 1438
1388 if (error) { 1439 if (error) {
1389 i915_error_state_free(&error->ref); 1440 i915_error_state_free(&error->ref);
@@ -1395,7 +1446,8 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
1395 DRM_INFO("Please file a _new_ bug report on bugs.freedesktop.org against DRI -> DRM/Intel\n"); 1446 DRM_INFO("Please file a _new_ bug report on bugs.freedesktop.org against DRI -> DRM/Intel\n");
1396 DRM_INFO("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n"); 1447 DRM_INFO("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n");
1397 DRM_INFO("The gpu crash dump is required to analyze gpu hangs, so please always attach it.\n"); 1448 DRM_INFO("The gpu crash dump is required to analyze gpu hangs, so please always attach it.\n");
1398 DRM_INFO("GPU crash dump saved to /sys/class/drm/card%d/error\n", dev_priv->dev->primary->index); 1449 DRM_INFO("GPU crash dump saved to /sys/class/drm/card%d/error\n",
1450 dev_priv->drm.primary->index);
1399 warned = true; 1451 warned = true;
1400 } 1452 }
1401} 1453}
@@ -1403,7 +1455,7 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
1403void i915_error_state_get(struct drm_device *dev, 1455void i915_error_state_get(struct drm_device *dev,
1404 struct i915_error_state_file_priv *error_priv) 1456 struct i915_error_state_file_priv *error_priv)
1405{ 1457{
1406 struct drm_i915_private *dev_priv = dev->dev_private; 1458 struct drm_i915_private *dev_priv = to_i915(dev);
1407 1459
1408 spin_lock_irq(&dev_priv->gpu_error.lock); 1460 spin_lock_irq(&dev_priv->gpu_error.lock);
1409 error_priv->error = dev_priv->gpu_error.first_error; 1461 error_priv->error = dev_priv->gpu_error.first_error;
@@ -1421,7 +1473,7 @@ void i915_error_state_put(struct i915_error_state_file_priv *error_priv)
1421 1473
1422void i915_destroy_error_state(struct drm_device *dev) 1474void i915_destroy_error_state(struct drm_device *dev)
1423{ 1475{
1424 struct drm_i915_private *dev_priv = dev->dev_private; 1476 struct drm_i915_private *dev_priv = to_i915(dev);
1425 struct drm_i915_error_state *error; 1477 struct drm_i915_error_state *error;
1426 1478
1427 spin_lock_irq(&dev_priv->gpu_error.lock); 1479 spin_lock_irq(&dev_priv->gpu_error.lock);