aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2014-01-27 08:52:34 -0500
committerDaniel Vetter <daniel.vetter@ffwll.ch>2014-01-27 11:10:33 -0500
commit372fbb8e3927fc76b0f842d8eb8a798a71d8960f (patch)
tree1a3b00181c187f3be15c82835d3e16f3aa6e3728
parent22accca01713b13dac386ca90b787aadf88f6551 (diff)
drm/i915: Decouple GPU error reporting from ring initialisation
Currently we report through our error state only the rings that have been initialised (as detected by ring->obj). This check is done after the GPU reset and ring re-initialisation, which means that the software state may not be the same as when we captured the hardware error and we may not print out any of the vital information for debugging the hang. This (and the implied object leak) is a regression from commit 3d57e5bd1284f44e325f3a52d966259ed42f9e05 Author: Ben Widawsky <ben@bwidawsk.net> Date: Mon Oct 14 10:01:36 2013 -0700 drm/i915: Do a fuller init after reset Note that we are already starting to get bug reports with incomplete error states from 3.13, which also hampers debugging userspace driver issues. v2: Prevent a NULL dereference on 830gm/845g after a GPU reset where the scratch obj may be NULL. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Ben Widawsky <ben@bwidawsk.net> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> References: https://bugs.freedesktop.org/show_bug.cgi?id=74094 Cc: stable@vger.kernel.org # please don't delay since it's a vital support/debug feature for the intel gfx stack in general Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> [danvet: Add a bit of fluff to make it clear we need this expedited in stable.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h1
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c22
2 files changed, 16 insertions, 7 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ff6f870d6621..98322053eb2a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -330,6 +330,7 @@ struct drm_i915_error_state {
330 u64 fence[I915_MAX_NUM_FENCES]; 330 u64 fence[I915_MAX_NUM_FENCES];
331 struct timeval time; 331 struct timeval time;
332 struct drm_i915_error_ring { 332 struct drm_i915_error_ring {
333 bool valid;
333 struct drm_i915_error_object { 334 struct drm_i915_error_object {
334 int page_count; 335 int page_count;
335 u32 gtt_offset; 336 u32 gtt_offset;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a707cca692e4..d7fd2fd2f0a5 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -239,6 +239,9 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
239 unsigned ring) 239 unsigned ring)
240{ 240{
241 BUG_ON(ring >= I915_NUM_RINGS); /* shut up confused gcc */ 241 BUG_ON(ring >= I915_NUM_RINGS); /* shut up confused gcc */
242 if (!error->ring[ring].valid)
243 return;
244
242 err_printf(m, "%s command stream:\n", ring_str(ring)); 245 err_printf(m, "%s command stream:\n", ring_str(ring));
243 err_printf(m, " HEAD: 0x%08x\n", error->head[ring]); 246 err_printf(m, " HEAD: 0x%08x\n", error->head[ring]);
244 err_printf(m, " TAIL: 0x%08x\n", error->tail[ring]); 247 err_printf(m, " TAIL: 0x%08x\n", error->tail[ring]);
@@ -293,7 +296,6 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
293 struct drm_device *dev = error_priv->dev; 296 struct drm_device *dev = error_priv->dev;
294 drm_i915_private_t *dev_priv = dev->dev_private; 297 drm_i915_private_t *dev_priv = dev->dev_private;
295 struct drm_i915_error_state *error = error_priv->error; 298 struct drm_i915_error_state *error = error_priv->error;
296 struct intel_ring_buffer *ring;
297 int i, j, page, offset, elt; 299 int i, j, page, offset, elt;
298 300
299 if (!error) { 301 if (!error) {
@@ -328,7 +330,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
328 if (INTEL_INFO(dev)->gen == 7) 330 if (INTEL_INFO(dev)->gen == 7)
329 err_printf(m, "ERR_INT: 0x%08x\n", error->err_int); 331 err_printf(m, "ERR_INT: 0x%08x\n", error->err_int);
330 332
331 for_each_ring(ring, dev_priv, i) 333 for (i = 0; i < ARRAY_SIZE(error->ring); i++)
332 i915_ring_error_state(m, dev, error, i); 334 i915_ring_error_state(m, dev, error, i);
333 335
334 if (error->active_bo) 336 if (error->active_bo)
@@ -385,8 +387,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
385 } 387 }
386 } 388 }
387 389
388 obj = error->ring[i].ctx; 390 if ((obj = error->ring[i].ctx)) {
389 if (obj) {
390 err_printf(m, "%s --- HW Context = 0x%08x\n", 391 err_printf(m, "%s --- HW Context = 0x%08x\n",
391 dev_priv->ring[i].name, 392 dev_priv->ring[i].name,
392 obj->gtt_offset); 393 obj->gtt_offset);
@@ -667,7 +668,8 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
667 return NULL; 668 return NULL;
668 669
669 obj = ring->scratch.obj; 670 obj = ring->scratch.obj;
670 if (acthd >= i915_gem_obj_ggtt_offset(obj) && 671 if (obj != NULL &&
672 acthd >= i915_gem_obj_ggtt_offset(obj) &&
671 acthd < i915_gem_obj_ggtt_offset(obj) + obj->base.size) 673 acthd < i915_gem_obj_ggtt_offset(obj) + obj->base.size)
672 return i915_error_object_create(dev_priv, obj); 674 return i915_error_object_create(dev_priv, obj);
673 } 675 }
@@ -775,11 +777,17 @@ static void i915_gem_record_rings(struct drm_device *dev,
775 struct drm_i915_error_state *error) 777 struct drm_i915_error_state *error)
776{ 778{
777 struct drm_i915_private *dev_priv = dev->dev_private; 779 struct drm_i915_private *dev_priv = dev->dev_private;
778 struct intel_ring_buffer *ring;
779 struct drm_i915_gem_request *request; 780 struct drm_i915_gem_request *request;
780 int i, count; 781 int i, count;
781 782
782 for_each_ring(ring, dev_priv, i) { 783 for (i = 0; i < I915_NUM_RINGS; i++) {
784 struct intel_ring_buffer *ring = &dev_priv->ring[i];
785
786 if (ring->dev == NULL)
787 continue;
788
789 error->ring[i].valid = true;
790
783 i915_record_ring_state(dev, error, ring); 791 i915_record_ring_state(dev, error, ring);
784 792
785 error->ring[i].batchbuffer = 793 error->ring[i].batchbuffer =