aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2016-09-09 09:11:51 -0400
committerChris Wilson <chris@chris-wilson.co.uk>2016-09-09 09:23:04 -0400
commit221fe7994554cc3985fc5d761ed7e44dcae0fa52 (patch)
tree797b1e03563c410b3fcb2e700f628f5ad5cbf29f /drivers/gpu/drm
parent22dd3bb9190566c7c3b80edb7dea67d1e21d0f91 (diff)
drm/i915: Perform a direct reset of the GPU from the waiter
If a waiter is holding the struct_mutex, then the reset worker cannot reset the GPU until the waiter returns. We do not want to return -EAGAIN form i915_wait_request as that breaks delicate operations like i915_vma_unbind() which often cannot be restarted easily, and returning -EIO is just as useless (and has in the past proven dangerous). The remaining WARN_ON(i915_wait_request) serve as a valuable reminder that handling errors from an indefinite wait are tricky. We can keep the current semantic that knowing after a reset is complete, so is the request, by performing the reset ourselves if we hold the mutex. uevent emission is still handled by the reset worker, so it may appear slightly out of order with respect to the actual reset (and concurrent use of the device). Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20160909131201.16673-11-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c11
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h15
-rw-r--r--drivers/gpu/drm/i915/i915_gem_request.c29
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c2
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c3
5 files changed, 40 insertions, 20 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 47a676d859db..ff4173e6e298 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1729,6 +1729,8 @@ int i915_resume_switcheroo(struct drm_device *dev)
1729 * Reset the chip. Useful if a hang is detected. Returns zero on successful 1729 * Reset the chip. Useful if a hang is detected. Returns zero on successful
1730 * reset or otherwise an error code. 1730 * reset or otherwise an error code.
1731 * 1731 *
1732 * Caller must hold the struct_mutex.
1733 *
1732 * Procedure is fairly simple: 1734 * Procedure is fairly simple:
1733 * - reset the chip using the reset reg 1735 * - reset the chip using the reset reg
1734 * - re-init context state 1736 * - re-init context state
@@ -1743,7 +1745,10 @@ int i915_reset(struct drm_i915_private *dev_priv)
1743 struct i915_gpu_error *error = &dev_priv->gpu_error; 1745 struct i915_gpu_error *error = &dev_priv->gpu_error;
1744 int ret; 1746 int ret;
1745 1747
1746 mutex_lock(&dev->struct_mutex); 1748 lockdep_assert_held(&dev->struct_mutex);
1749
1750 if (!test_and_clear_bit(I915_RESET_IN_PROGRESS, &error->flags))
1751 return test_bit(I915_WEDGED, &error->flags) ? -EIO : 0;
1747 1752
1748 /* Clear any previous failed attempts at recovery. Time to try again. */ 1753 /* Clear any previous failed attempts at recovery. Time to try again. */
1749 __clear_bit(I915_WEDGED, &error->flags); 1754 __clear_bit(I915_WEDGED, &error->flags);
@@ -1784,9 +1789,6 @@ int i915_reset(struct drm_i915_private *dev_priv)
1784 goto error; 1789 goto error;
1785 } 1790 }
1786 1791
1787 clear_bit(I915_RESET_IN_PROGRESS, &error->flags);
1788 mutex_unlock(&dev->struct_mutex);
1789
1790 /* 1792 /*
1791 * rps/rc6 re-init is necessary to restore state lost after the 1793 * rps/rc6 re-init is necessary to restore state lost after the
1792 * reset and the re-install of gt irqs. Skip for ironlake per 1794 * reset and the re-install of gt irqs. Skip for ironlake per
@@ -1800,7 +1802,6 @@ int i915_reset(struct drm_i915_private *dev_priv)
1800 1802
1801error: 1803error:
1802 set_bit(I915_WEDGED, &error->flags); 1804 set_bit(I915_WEDGED, &error->flags);
1803 mutex_unlock(&dev->struct_mutex);
1804 return ret; 1805 return ret;
1805} 1806}
1806 1807
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 20b7743f8ec5..15f1977e356a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3863,7 +3863,9 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
3863 schedule_timeout_uninterruptible(remaining_jiffies); 3863 schedule_timeout_uninterruptible(remaining_jiffies);
3864 } 3864 }
3865} 3865}
3866static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) 3866
3867static inline bool
3868__i915_request_irq_complete(struct drm_i915_gem_request *req)
3867{ 3869{
3868 struct intel_engine_cs *engine = req->engine; 3870 struct intel_engine_cs *engine = req->engine;
3869 3871
@@ -3925,17 +3927,6 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
3925 return true; 3927 return true;
3926 } 3928 }
3927 3929
3928 /* We need to check whether any gpu reset happened in between
3929 * the request being submitted and now. If a reset has occurred,
3930 * the seqno will have been advance past ours and our request
3931 * is complete. If we are in the process of handling a reset,
3932 * the request is effectively complete as the rendering will
3933 * be discarded, but we need to return in order to drop the
3934 * struct_mutex.
3935 */
3936 if (i915_reset_in_progress(&req->i915->gpu_error))
3937 return true;
3938
3939 return false; 3930 return false;
3940} 3931}
3941 3932
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 5f89801e6a16..64c370681a81 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -533,6 +533,16 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
533 engine->submit_request(request); 533 engine->submit_request(request);
534} 534}
535 535
536static void reset_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
537{
538 unsigned long flags;
539
540 spin_lock_irqsave(&q->lock, flags);
541 if (list_empty(&wait->task_list))
542 __add_wait_queue(q, wait);
543 spin_unlock_irqrestore(&q->lock, flags);
544}
545
536static unsigned long local_clock_us(unsigned int *cpu) 546static unsigned long local_clock_us(unsigned int *cpu)
537{ 547{
538 unsigned long t; 548 unsigned long t;
@@ -710,6 +720,25 @@ wakeup:
710 if (__i915_request_irq_complete(req)) 720 if (__i915_request_irq_complete(req))
711 break; 721 break;
712 722
723 /* If the GPU is hung, and we hold the lock, reset the GPU
724 * and then check for completion. On a full reset, the engine's
725 * HW seqno will be advanced passed us and we are complete.
726 * If we do a partial reset, we have to wait for the GPU to
727 * resume and update the breadcrumb.
728 *
729 * If we don't hold the mutex, we can just wait for the worker
730 * to come along and update the breadcrumb (either directly
731 * itself, or indirectly by recovering the GPU).
732 */
733 if (flags & I915_WAIT_LOCKED &&
734 i915_reset_in_progress(&req->i915->gpu_error)) {
735 __set_current_state(TASK_RUNNING);
736 i915_reset(req->i915);
737 reset_wait_queue(&req->i915->gpu_error.wait_queue,
738 &reset);
739 continue;
740 }
741
713 /* Only spin if we know the GPU is processing this request */ 742 /* Only spin if we know the GPU is processing this request */
714 if (i915_spin_request(req, state, 2)) 743 if (i915_spin_request(req, state, 2))
715 break; 744 break;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index ed172d7beecb..2c7cb5041511 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2521,7 +2521,9 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
2521 * pending state and not properly drop locks, resulting in 2521 * pending state and not properly drop locks, resulting in
2522 * deadlocks with the reset work. 2522 * deadlocks with the reset work.
2523 */ 2523 */
2524 mutex_lock(&dev_priv->drm.struct_mutex);
2524 ret = i915_reset(dev_priv); 2525 ret = i915_reset(dev_priv);
2526 mutex_unlock(&dev_priv->drm.struct_mutex);
2525 2527
2526 intel_finish_reset(dev_priv); 2528 intel_finish_reset(dev_priv);
2527 2529
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 8687a84a7ff3..d2d85fc869e1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2229,9 +2229,6 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
2229 if (ret) 2229 if (ret)
2230 return ret; 2230 return ret;
2231 2231
2232 if (i915_reset_in_progress(&target->i915->gpu_error))
2233 return -EAGAIN;
2234
2235 i915_gem_request_retire_upto(target); 2232 i915_gem_request_retire_upto(target);
2236 2233
2237 intel_ring_update_space(ring); 2234 intel_ring_update_space(ring);