diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gpu_error.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gpu_error.c | 124 |
1 files changed, 88 insertions, 36 deletions
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 34ff2459ceea..9d73d2216adc 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c | |||
@@ -332,7 +332,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, | |||
332 | const struct i915_error_state_file_priv *error_priv) | 332 | const struct i915_error_state_file_priv *error_priv) |
333 | { | 333 | { |
334 | struct drm_device *dev = error_priv->dev; | 334 | struct drm_device *dev = error_priv->dev; |
335 | struct drm_i915_private *dev_priv = dev->dev_private; | 335 | struct drm_i915_private *dev_priv = to_i915(dev); |
336 | struct drm_i915_error_state *error = error_priv->error; | 336 | struct drm_i915_error_state *error = error_priv->error; |
337 | struct drm_i915_error_object *obj; | 337 | struct drm_i915_error_object *obj; |
338 | int i, j, offset, elt; | 338 | int i, j, offset, elt; |
@@ -463,6 +463,18 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, | |||
463 | } | 463 | } |
464 | } | 464 | } |
465 | 465 | ||
466 | if (error->ring[i].num_waiters) { | ||
467 | err_printf(m, "%s --- %d waiters\n", | ||
468 | dev_priv->engine[i].name, | ||
469 | error->ring[i].num_waiters); | ||
470 | for (j = 0; j < error->ring[i].num_waiters; j++) { | ||
471 | err_printf(m, " seqno 0x%08x for %s [%d]\n", | ||
472 | error->ring[i].waiters[j].seqno, | ||
473 | error->ring[i].waiters[j].comm, | ||
474 | error->ring[i].waiters[j].pid); | ||
475 | } | ||
476 | } | ||
477 | |||
466 | if ((obj = error->ring[i].ringbuffer)) { | 478 | if ((obj = error->ring[i].ringbuffer)) { |
467 | err_printf(m, "%s --- ringbuffer = 0x%08x\n", | 479 | err_printf(m, "%s --- ringbuffer = 0x%08x\n", |
468 | dev_priv->engine[i].name, | 480 | dev_priv->engine[i].name, |
@@ -488,7 +500,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, | |||
488 | hws_page[elt+1], | 500 | hws_page[elt+1], |
489 | hws_page[elt+2], | 501 | hws_page[elt+2], |
490 | hws_page[elt+3]); | 502 | hws_page[elt+3]); |
491 | offset += 16; | 503 | offset += 16; |
492 | } | 504 | } |
493 | } | 505 | } |
494 | 506 | ||
@@ -605,8 +617,9 @@ static void i915_error_state_free(struct kref *error_ref) | |||
605 | i915_error_object_free(error->ring[i].ringbuffer); | 617 | i915_error_object_free(error->ring[i].ringbuffer); |
606 | i915_error_object_free(error->ring[i].hws_page); | 618 | i915_error_object_free(error->ring[i].hws_page); |
607 | i915_error_object_free(error->ring[i].ctx); | 619 | i915_error_object_free(error->ring[i].ctx); |
608 | kfree(error->ring[i].requests); | ||
609 | i915_error_object_free(error->ring[i].wa_ctx); | 620 | i915_error_object_free(error->ring[i].wa_ctx); |
621 | kfree(error->ring[i].requests); | ||
622 | kfree(error->ring[i].waiters); | ||
610 | } | 623 | } |
611 | 624 | ||
612 | i915_error_object_free(error->semaphore_obj); | 625 | i915_error_object_free(error->semaphore_obj); |
@@ -892,6 +905,48 @@ static void gen6_record_semaphore_state(struct drm_i915_private *dev_priv, | |||
892 | } | 905 | } |
893 | } | 906 | } |
894 | 907 | ||
908 | static void engine_record_waiters(struct intel_engine_cs *engine, | ||
909 | struct drm_i915_error_ring *ering) | ||
910 | { | ||
911 | struct intel_breadcrumbs *b = &engine->breadcrumbs; | ||
912 | struct drm_i915_error_waiter *waiter; | ||
913 | struct rb_node *rb; | ||
914 | int count; | ||
915 | |||
916 | ering->num_waiters = 0; | ||
917 | ering->waiters = NULL; | ||
918 | |||
919 | spin_lock(&b->lock); | ||
920 | count = 0; | ||
921 | for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb)) | ||
922 | count++; | ||
923 | spin_unlock(&b->lock); | ||
924 | |||
925 | waiter = NULL; | ||
926 | if (count) | ||
927 | waiter = kmalloc_array(count, | ||
928 | sizeof(struct drm_i915_error_waiter), | ||
929 | GFP_ATOMIC); | ||
930 | if (!waiter) | ||
931 | return; | ||
932 | |||
933 | ering->waiters = waiter; | ||
934 | |||
935 | spin_lock(&b->lock); | ||
936 | for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { | ||
937 | struct intel_wait *w = container_of(rb, typeof(*w), node); | ||
938 | |||
939 | strcpy(waiter->comm, w->tsk->comm); | ||
940 | waiter->pid = w->tsk->pid; | ||
941 | waiter->seqno = w->seqno; | ||
942 | waiter++; | ||
943 | |||
944 | if (++ering->num_waiters == count) | ||
945 | break; | ||
946 | } | ||
947 | spin_unlock(&b->lock); | ||
948 | } | ||
949 | |||
895 | static void i915_record_ring_state(struct drm_i915_private *dev_priv, | 950 | static void i915_record_ring_state(struct drm_i915_private *dev_priv, |
896 | struct drm_i915_error_state *error, | 951 | struct drm_i915_error_state *error, |
897 | struct intel_engine_cs *engine, | 952 | struct intel_engine_cs *engine, |
@@ -926,10 +981,10 @@ static void i915_record_ring_state(struct drm_i915_private *dev_priv, | |||
926 | ering->instdone = I915_READ(GEN2_INSTDONE); | 981 | ering->instdone = I915_READ(GEN2_INSTDONE); |
927 | } | 982 | } |
928 | 983 | ||
929 | ering->waiting = waitqueue_active(&engine->irq_queue); | 984 | ering->waiting = intel_engine_has_waiter(engine); |
930 | ering->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); | 985 | ering->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); |
931 | ering->acthd = intel_ring_get_active_head(engine); | 986 | ering->acthd = intel_ring_get_active_head(engine); |
932 | ering->seqno = engine->get_seqno(engine); | 987 | ering->seqno = intel_engine_get_seqno(engine); |
933 | ering->last_seqno = engine->last_submitted_seqno; | 988 | ering->last_seqno = engine->last_submitted_seqno; |
934 | ering->start = I915_READ_START(engine); | 989 | ering->start = I915_READ_START(engine); |
935 | ering->head = I915_READ_HEAD(engine); | 990 | ering->head = I915_READ_HEAD(engine); |
@@ -1022,7 +1077,6 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, | |||
1022 | 1077 | ||
1023 | for (i = 0; i < I915_NUM_ENGINES; i++) { | 1078 | for (i = 0; i < I915_NUM_ENGINES; i++) { |
1024 | struct intel_engine_cs *engine = &dev_priv->engine[i]; | 1079 | struct intel_engine_cs *engine = &dev_priv->engine[i]; |
1025 | struct intel_ringbuffer *rbuf; | ||
1026 | 1080 | ||
1027 | error->ring[i].pid = -1; | 1081 | error->ring[i].pid = -1; |
1028 | 1082 | ||
@@ -1032,14 +1086,15 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, | |||
1032 | error->ring[i].valid = true; | 1086 | error->ring[i].valid = true; |
1033 | 1087 | ||
1034 | i915_record_ring_state(dev_priv, error, engine, &error->ring[i]); | 1088 | i915_record_ring_state(dev_priv, error, engine, &error->ring[i]); |
1089 | engine_record_waiters(engine, &error->ring[i]); | ||
1035 | 1090 | ||
1036 | request = i915_gem_find_active_request(engine); | 1091 | request = i915_gem_find_active_request(engine); |
1037 | if (request) { | 1092 | if (request) { |
1038 | struct i915_address_space *vm; | 1093 | struct i915_address_space *vm; |
1094 | struct intel_ringbuffer *rb; | ||
1039 | 1095 | ||
1040 | vm = request->ctx && request->ctx->ppgtt ? | 1096 | vm = request->ctx->ppgtt ? |
1041 | &request->ctx->ppgtt->base : | 1097 | &request->ctx->ppgtt->base : &ggtt->base; |
1042 | &ggtt->base; | ||
1043 | 1098 | ||
1044 | /* We need to copy these to an anonymous buffer | 1099 | /* We need to copy these to an anonymous buffer |
1045 | * as the simplest method to avoid being overwritten | 1100 | * as the simplest method to avoid being overwritten |
@@ -1066,26 +1121,17 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, | |||
1066 | } | 1121 | } |
1067 | rcu_read_unlock(); | 1122 | rcu_read_unlock(); |
1068 | } | 1123 | } |
1069 | } | ||
1070 | |||
1071 | if (i915.enable_execlists) { | ||
1072 | /* TODO: This is only a small fix to keep basic error | ||
1073 | * capture working, but we need to add more information | ||
1074 | * for it to be useful (e.g. dump the context being | ||
1075 | * executed). | ||
1076 | */ | ||
1077 | if (request) | ||
1078 | rbuf = request->ctx->engine[engine->id].ringbuf; | ||
1079 | else | ||
1080 | rbuf = dev_priv->kernel_context->engine[engine->id].ringbuf; | ||
1081 | } else | ||
1082 | rbuf = engine->buffer; | ||
1083 | 1124 | ||
1084 | error->ring[i].cpu_ring_head = rbuf->head; | 1125 | error->simulated |= |
1085 | error->ring[i].cpu_ring_tail = rbuf->tail; | 1126 | request->ctx->flags & CONTEXT_NO_ERROR_CAPTURE; |
1086 | 1127 | ||
1087 | error->ring[i].ringbuffer = | 1128 | rb = request->ringbuf; |
1088 | i915_error_ggtt_object_create(dev_priv, rbuf->obj); | 1129 | error->ring[i].cpu_ring_head = rb->head; |
1130 | error->ring[i].cpu_ring_tail = rb->tail; | ||
1131 | error->ring[i].ringbuffer = | ||
1132 | i915_error_ggtt_object_create(dev_priv, | ||
1133 | rb->obj); | ||
1134 | } | ||
1089 | 1135 | ||
1090 | error->ring[i].hws_page = | 1136 | error->ring[i].hws_page = |
1091 | i915_error_ggtt_object_create(dev_priv, | 1137 | i915_error_ggtt_object_create(dev_priv, |
@@ -1230,7 +1276,7 @@ static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv, | |||
1230 | static void i915_capture_reg_state(struct drm_i915_private *dev_priv, | 1276 | static void i915_capture_reg_state(struct drm_i915_private *dev_priv, |
1231 | struct drm_i915_error_state *error) | 1277 | struct drm_i915_error_state *error) |
1232 | { | 1278 | { |
1233 | struct drm_device *dev = dev_priv->dev; | 1279 | struct drm_device *dev = &dev_priv->drm; |
1234 | int i; | 1280 | int i; |
1235 | 1281 | ||
1236 | /* General organization | 1282 | /* General organization |
@@ -1355,6 +1401,9 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, | |||
1355 | struct drm_i915_error_state *error; | 1401 | struct drm_i915_error_state *error; |
1356 | unsigned long flags; | 1402 | unsigned long flags; |
1357 | 1403 | ||
1404 | if (READ_ONCE(dev_priv->gpu_error.first_error)) | ||
1405 | return; | ||
1406 | |||
1358 | /* Account for pipe specific data like PIPE*STAT */ | 1407 | /* Account for pipe specific data like PIPE*STAT */ |
1359 | error = kzalloc(sizeof(*error), GFP_ATOMIC); | 1408 | error = kzalloc(sizeof(*error), GFP_ATOMIC); |
1360 | if (!error) { | 1409 | if (!error) { |
@@ -1378,12 +1427,14 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, | |||
1378 | i915_error_capture_msg(dev_priv, error, engine_mask, error_msg); | 1427 | i915_error_capture_msg(dev_priv, error, engine_mask, error_msg); |
1379 | DRM_INFO("%s\n", error->error_msg); | 1428 | DRM_INFO("%s\n", error->error_msg); |
1380 | 1429 | ||
1381 | spin_lock_irqsave(&dev_priv->gpu_error.lock, flags); | 1430 | if (!error->simulated) { |
1382 | if (dev_priv->gpu_error.first_error == NULL) { | 1431 | spin_lock_irqsave(&dev_priv->gpu_error.lock, flags); |
1383 | dev_priv->gpu_error.first_error = error; | 1432 | if (!dev_priv->gpu_error.first_error) { |
1384 | error = NULL; | 1433 | dev_priv->gpu_error.first_error = error; |
1434 | error = NULL; | ||
1435 | } | ||
1436 | spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags); | ||
1385 | } | 1437 | } |
1386 | spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags); | ||
1387 | 1438 | ||
1388 | if (error) { | 1439 | if (error) { |
1389 | i915_error_state_free(&error->ref); | 1440 | i915_error_state_free(&error->ref); |
@@ -1395,7 +1446,8 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, | |||
1395 | DRM_INFO("Please file a _new_ bug report on bugs.freedesktop.org against DRI -> DRM/Intel\n"); | 1446 | DRM_INFO("Please file a _new_ bug report on bugs.freedesktop.org against DRI -> DRM/Intel\n"); |
1396 | DRM_INFO("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n"); | 1447 | DRM_INFO("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n"); |
1397 | DRM_INFO("The gpu crash dump is required to analyze gpu hangs, so please always attach it.\n"); | 1448 | DRM_INFO("The gpu crash dump is required to analyze gpu hangs, so please always attach it.\n"); |
1398 | DRM_INFO("GPU crash dump saved to /sys/class/drm/card%d/error\n", dev_priv->dev->primary->index); | 1449 | DRM_INFO("GPU crash dump saved to /sys/class/drm/card%d/error\n", |
1450 | dev_priv->drm.primary->index); | ||
1399 | warned = true; | 1451 | warned = true; |
1400 | } | 1452 | } |
1401 | } | 1453 | } |
@@ -1403,7 +1455,7 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, | |||
1403 | void i915_error_state_get(struct drm_device *dev, | 1455 | void i915_error_state_get(struct drm_device *dev, |
1404 | struct i915_error_state_file_priv *error_priv) | 1456 | struct i915_error_state_file_priv *error_priv) |
1405 | { | 1457 | { |
1406 | struct drm_i915_private *dev_priv = dev->dev_private; | 1458 | struct drm_i915_private *dev_priv = to_i915(dev); |
1407 | 1459 | ||
1408 | spin_lock_irq(&dev_priv->gpu_error.lock); | 1460 | spin_lock_irq(&dev_priv->gpu_error.lock); |
1409 | error_priv->error = dev_priv->gpu_error.first_error; | 1461 | error_priv->error = dev_priv->gpu_error.first_error; |
@@ -1421,7 +1473,7 @@ void i915_error_state_put(struct i915_error_state_file_priv *error_priv) | |||
1421 | 1473 | ||
1422 | void i915_destroy_error_state(struct drm_device *dev) | 1474 | void i915_destroy_error_state(struct drm_device *dev) |
1423 | { | 1475 | { |
1424 | struct drm_i915_private *dev_priv = dev->dev_private; | 1476 | struct drm_i915_private *dev_priv = to_i915(dev); |
1425 | struct drm_i915_error_state *error; | 1477 | struct drm_i915_error_state *error; |
1426 | 1478 | ||
1427 | spin_lock_irq(&dev_priv->gpu_error.lock); | 1479 | spin_lock_irq(&dev_priv->gpu_error.lock); |