aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2014-02-25 10:11:24 -0500
committerDaniel Vetter <daniel.vetter@ffwll.ch>2014-03-05 15:30:24 -0500
commitab0e7ff9f2d0bfe139a2ed5bb6a36f8cbd4e0886 (patch)
tree66a0a24af927cffbb34a25d0c0190d1a4507cecc /drivers/gpu/drm/i915
parent8d9fc7fd2de6edc3b9c3f828a701bfa6891987e7 (diff)
drm/i915: Record pid/comm of hanging task
After finding the guilty batch and request, we can use it to find the process that submitted the batch and then add the culprit into the error state. This is a slightly different approach from Ben's in that instead of adding the extra information into the struct i915_hw_context, we use the information already captured in struct drm_file which is then referenced from the request. v2: Also capture the workaround buffer for gen2, so that we can compare its contents against the intended batch for the active request. v3: Rebase (Mika) v4: Check for null context (Chris) checkpatch warnings fixed Link: http://lists.freedesktop.org/archives/intel-gfx/2013-August/032280.html Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> (v2) Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com> (v4) Acked-by: Ben Widawsky <ben@bwidawsk.net> Cc: Ben Widawsky <ben@bwidawsk.net> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/i915')
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h6
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c1
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c136
3 files changed, 86 insertions, 57 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fe4427be2e03..826fcaef25c1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -360,7 +360,7 @@ struct drm_i915_error_state {
360 int page_count; 360 int page_count;
361 u32 gtt_offset; 361 u32 gtt_offset;
362 u32 *pages[0]; 362 u32 *pages[0];
363 } *ringbuffer, *batchbuffer, *ctx, *hws_page; 363 } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
364 364
365 struct drm_i915_error_request { 365 struct drm_i915_error_request {
366 long jiffies; 366 long jiffies;
@@ -375,6 +375,9 @@ struct drm_i915_error_state {
375 u32 pp_dir_base; 375 u32 pp_dir_base;
376 }; 376 };
377 } vm_info; 377 } vm_info;
378
379 pid_t pid;
380 char comm[TASK_COMM_LEN];
378 } ring[I915_NUM_RINGS]; 381 } ring[I915_NUM_RINGS];
379 struct drm_i915_error_buffer { 382 struct drm_i915_error_buffer {
380 u32 size; 383 u32 size;
@@ -1797,6 +1800,7 @@ struct drm_i915_gem_request {
1797 1800
1798struct drm_i915_file_private { 1801struct drm_i915_file_private {
1799 struct drm_i915_private *dev_priv; 1802 struct drm_i915_private *dev_priv;
1803 struct drm_file *file;
1800 1804
1801 struct { 1805 struct {
1802 spinlock_t lock; 1806 spinlock_t lock;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c5a182be2eb0..6e17b45db850 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4857,6 +4857,7 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file)
4857 4857
4858 file->driver_priv = file_priv; 4858 file->driver_priv = file_priv;
4859 file_priv->dev_priv = dev->dev_private; 4859 file_priv->dev_priv = dev->dev_private;
4860 file_priv->file = file;
4860 4861
4861 spin_lock_init(&file_priv->mm.lock); 4862 spin_lock_init(&file_priv->mm.lock);
4862 INIT_LIST_HEAD(&file_priv->mm.request_list); 4863 INIT_LIST_HEAD(&file_priv->mm.request_list);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index eed1b34eaf47..8b02498ee963 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -301,13 +301,28 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
301 va_end(args); 301 va_end(args);
302} 302}
303 303
304static void print_error_obj(struct drm_i915_error_state_buf *m,
305 struct drm_i915_error_object *obj)
306{
307 int page, offset, elt;
308
309 for (page = offset = 0; page < obj->page_count; page++) {
310 for (elt = 0; elt < PAGE_SIZE/4; elt++) {
311 err_printf(m, "%08x : %08x\n", offset,
312 obj->pages[page][elt]);
313 offset += 4;
314 }
315 }
316}
317
304int i915_error_state_to_str(struct drm_i915_error_state_buf *m, 318int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
305 const struct i915_error_state_file_priv *error_priv) 319 const struct i915_error_state_file_priv *error_priv)
306{ 320{
307 struct drm_device *dev = error_priv->dev; 321 struct drm_device *dev = error_priv->dev;
308 drm_i915_private_t *dev_priv = dev->dev_private; 322 drm_i915_private_t *dev_priv = dev->dev_private;
309 struct drm_i915_error_state *error = error_priv->error; 323 struct drm_i915_error_state *error = error_priv->error;
310 int i, j, page, offset, elt; 324 int i, j, offset, elt;
325 int max_hangcheck_score;
311 326
312 if (!error) { 327 if (!error) {
313 err_printf(m, "no error state collected\n"); 328 err_printf(m, "no error state collected\n");
@@ -317,6 +332,20 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
317 err_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec, 332 err_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec,
318 error->time.tv_usec); 333 error->time.tv_usec);
319 err_printf(m, "Kernel: " UTS_RELEASE "\n"); 334 err_printf(m, "Kernel: " UTS_RELEASE "\n");
335 max_hangcheck_score = 0;
336 for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
337 if (error->ring[i].hangcheck_score > max_hangcheck_score)
338 max_hangcheck_score = error->ring[i].hangcheck_score;
339 }
340 for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
341 if (error->ring[i].hangcheck_score == max_hangcheck_score &&
342 error->ring[i].pid != -1) {
343 err_printf(m, "Active process (on ring %s): %s [%d]\n",
344 ring_str(i),
345 error->ring[i].comm,
346 error->ring[i].pid);
347 }
348 }
320 err_printf(m, "PCI ID: 0x%04x\n", dev->pdev->device); 349 err_printf(m, "PCI ID: 0x%04x\n", dev->pdev->device);
321 err_printf(m, "EIR: 0x%08x\n", error->eir); 350 err_printf(m, "EIR: 0x%08x\n", error->eir);
322 err_printf(m, "IER: 0x%08x\n", error->ier); 351 err_printf(m, "IER: 0x%08x\n", error->ier);
@@ -359,18 +388,23 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
359 for (i = 0; i < ARRAY_SIZE(error->ring); i++) { 388 for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
360 struct drm_i915_error_object *obj; 389 struct drm_i915_error_object *obj;
361 390
362 if ((obj = error->ring[i].batchbuffer)) { 391 obj = error->ring[i].batchbuffer;
363 err_printf(m, "%s --- gtt_offset = 0x%08x\n", 392 if (obj) {
364 dev_priv->ring[i].name, 393 err_puts(m, dev_priv->ring[i].name);
394 if (error->ring[i].pid != -1)
395 err_printf(m, " (submitted by %s [%d])",
396 error->ring[i].comm,
397 error->ring[i].pid);
398 err_printf(m, " --- gtt_offset = 0x%08x\n",
365 obj->gtt_offset); 399 obj->gtt_offset);
366 offset = 0; 400 print_error_obj(m, obj);
367 for (page = 0; page < obj->page_count; page++) { 401 }
368 for (elt = 0; elt < PAGE_SIZE/4; elt++) { 402
369 err_printf(m, "%08x : %08x\n", offset, 403 obj = error->ring[i].wa_batchbuffer;
370 obj->pages[page][elt]); 404 if (obj) {
371 offset += 4; 405 err_printf(m, "%s (w/a) --- gtt_offset = 0x%08x\n",
372 } 406 dev_priv->ring[i].name, obj->gtt_offset);
373 } 407 print_error_obj(m, obj);
374 } 408 }
375 409
376 if (error->ring[i].num_requests) { 410 if (error->ring[i].num_requests) {
@@ -389,15 +423,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
389 err_printf(m, "%s --- ringbuffer = 0x%08x\n", 423 err_printf(m, "%s --- ringbuffer = 0x%08x\n",
390 dev_priv->ring[i].name, 424 dev_priv->ring[i].name,
391 obj->gtt_offset); 425 obj->gtt_offset);
392 offset = 0; 426 print_error_obj(m, obj);
393 for (page = 0; page < obj->page_count; page++) {
394 for (elt = 0; elt < PAGE_SIZE/4; elt++) {
395 err_printf(m, "%08x : %08x\n",
396 offset,
397 obj->pages[page][elt]);
398 offset += 4;
399 }
400 }
401 } 427 }
402 428
403 if ((obj = error->ring[i].hws_page)) { 429 if ((obj = error->ring[i].hws_page)) {
@@ -713,39 +739,6 @@ static void i915_gem_record_fences(struct drm_device *dev,
713 } 739 }
714} 740}
715 741
716static struct drm_i915_error_object *
717i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
718 struct intel_ring_buffer *ring)
719{
720 struct drm_i915_gem_request *request;
721
722 if (HAS_BROKEN_CS_TLB(dev_priv->dev)) {
723 struct drm_i915_gem_object *obj;
724 u32 acthd = I915_READ(ACTHD);
725
726 if (WARN_ON(ring->id != RCS))
727 return NULL;
728
729 obj = ring->scratch.obj;
730 if (obj != NULL &&
731 acthd >= i915_gem_obj_ggtt_offset(obj) &&
732 acthd < i915_gem_obj_ggtt_offset(obj) + obj->base.size)
733 return i915_error_ggtt_object_create(dev_priv, obj);
734 }
735
736 request = i915_gem_find_active_request(ring);
737 if (request == NULL)
738 return NULL;
739
740 /* We need to copy these to an anonymous buffer as the simplest
741 * method to avoid being overwritten by userspace.
742 */
743 return i915_error_object_create(dev_priv, request->batch_obj,
744 request->ctx ?
745 request->ctx->vm :
746 &dev_priv->gtt.base);
747}
748
749static void i915_record_ring_state(struct drm_device *dev, 742static void i915_record_ring_state(struct drm_device *dev,
750 struct intel_ring_buffer *ring, 743 struct intel_ring_buffer *ring,
751 struct drm_i915_error_ring *ering) 744 struct drm_i915_error_ring *ering)
@@ -894,8 +887,39 @@ static void i915_gem_record_rings(struct drm_device *dev,
894 887
895 i915_record_ring_state(dev, ring, &error->ring[i]); 888 i915_record_ring_state(dev, ring, &error->ring[i]);
896 889
897 error->ring[i].batchbuffer = 890 error->ring[i].pid = -1;
898 i915_error_first_batchbuffer(dev_priv, ring); 891 request = i915_gem_find_active_request(ring);
892 if (request) {
893 /* We need to copy these to an anonymous buffer
894 * as the simplest method to avoid being overwritten
895 * by userspace.
896 */
897 error->ring[i].batchbuffer =
898 i915_error_object_create(dev_priv,
899 request->batch_obj,
900 request->ctx ?
901 request->ctx->vm :
902 &dev_priv->gtt.base);
903
904 if (HAS_BROKEN_CS_TLB(dev_priv->dev) &&
905 ring->scratch.obj)
906 error->ring[i].wa_batchbuffer =
907 i915_error_ggtt_object_create(dev_priv,
908 ring->scratch.obj);
909
910 if (request->file_priv) {
911 struct task_struct *task;
912
913 rcu_read_lock();
914 task = pid_task(request->file_priv->file->pid,
915 PIDTYPE_PID);
916 if (task) {
917 strcpy(error->ring[i].comm, task->comm);
918 error->ring[i].pid = task->pid;
919 }
920 rcu_read_unlock();
921 }
922 }
899 923
900 error->ring[i].ringbuffer = 924 error->ring[i].ringbuffer =
901 i915_error_ggtt_object_create(dev_priv, ring->obj); 925 i915_error_ggtt_object_create(dev_priv, ring->obj);