aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorBen Gamari <bgamari.foss@gmail.com>2009-09-14 17:48:44 -0400
committerJesse Barnes <jbarnes@virtuousgeek.org>2009-09-17 17:36:01 -0400
commitf65d94211e2bcba17faf05a6a3809af0e4217767 (patch)
treea040777b0311b5c5a7a26f64c7d8ae9a6ac872f1 /drivers/gpu
parent22be172423b0007a02a06d70db8aeb4d9e64c6b3 (diff)
drm/i915: Add hangcheck timer
We set a periodic timer to check on the GPU, resetting it every time a batch is completed. If the timer elapses, we check acthd. If acthd hasn't changed in two timer periods, we assume the chip is wedged. This is implemented in such a way that it leaves the option open to employ adaptive timer intervals in the future. One could wait until several timer periods have elapsed before declaring the chip dead. If the chip comes back after several periods but before the "dead" threshold, the timer interval or dead threshold could be raised. It is important to note that while checking for active requests, we need to account for the fact that requests are removed from the list (i.e. retired) in a deferred work queue handler. This means that merely checking for an empty request_list is insufficient; the list could be non-empty yet the GPU still idle, causing the hangcheck timer to incorrectly mark the GPU as wedged (it took me a while to figure that out---sigh...) Signed-off-by: Ben Gamari <bgamari.foss@gmail.com> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c3
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h7
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c8
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c49
4 files changed, 65 insertions, 2 deletions
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 7a73b2941eb..08a5048335e 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1447,6 +1447,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
1447 if (!IS_IGDNG(dev)) 1447 if (!IS_IGDNG(dev))
1448 intel_opregion_init(dev, 0); 1448 intel_opregion_init(dev, 0);
1449 1449
1450 setup_timer(&dev_priv->hangcheck_timer, i915_hangcheck_elapsed,
1451 (unsigned long) dev);
1450 return 0; 1452 return 0;
1451 1453
1452out_workqueue_free: 1454out_workqueue_free:
@@ -1467,6 +1469,7 @@ int i915_driver_unload(struct drm_device *dev)
1467 struct drm_i915_private *dev_priv = dev->dev_private; 1469 struct drm_i915_private *dev_priv = dev->dev_private;
1468 1470
1469 destroy_workqueue(dev_priv->wq); 1471 destroy_workqueue(dev_priv->wq);
1472 del_timer_sync(&dev_priv->hangcheck_timer);
1470 1473
1471 io_mapping_free(dev_priv->mm.gtt_mapping); 1474 io_mapping_free(dev_priv->mm.gtt_mapping);
1472 if (dev_priv->mm.gtt_mtrr >= 0) { 1475 if (dev_priv->mm.gtt_mtrr >= 0) {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 933d832aeff..afbcaa9866f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -203,6 +203,12 @@ typedef struct drm_i915_private {
203 unsigned int sr01, adpa, ppcr, dvob, dvoc, lvds; 203 unsigned int sr01, adpa, ppcr, dvob, dvoc, lvds;
204 int vblank_pipe; 204 int vblank_pipe;
205 205
206 /* For hangcheck timer */
207#define DRM_I915_HANGCHECK_PERIOD 75 /* in jiffies */
208 struct timer_list hangcheck_timer;
209 int hangcheck_count;
210 uint32_t last_acthd;
211
206 bool cursor_needs_physical; 212 bool cursor_needs_physical;
207 213
208 struct drm_mm vram; 214 struct drm_mm vram;
@@ -620,6 +626,7 @@ extern int i915_emit_box(struct drm_device *dev,
620 int i, int DR1, int DR4); 626 int i, int DR1, int DR4);
621 627
622/* i915_irq.c */ 628/* i915_irq.c */
629void i915_hangcheck_elapsed(unsigned long data);
623extern int i915_irq_emit(struct drm_device *dev, void *data, 630extern int i915_irq_emit(struct drm_device *dev, void *data,
624 struct drm_file *file_priv); 631 struct drm_file *file_priv);
625extern int i915_irq_wait(struct drm_device *dev, void *data, 632extern int i915_irq_wait(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c70e91b51f6..579b3b04ff1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1584,8 +1584,11 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
1584 1584
1585 } 1585 }
1586 1586
1587 if (was_empty && !dev_priv->mm.suspended) 1587 if (!dev_priv->mm.suspended) {
1588 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); 1588 mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
1589 if (was_empty)
1590 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1591 }
1589 return seqno; 1592 return seqno;
1590} 1593}
1591 1594
@@ -3896,6 +3899,7 @@ i915_gem_idle(struct drm_device *dev)
3896 * We need to replace this with a semaphore, or something. 3899 * We need to replace this with a semaphore, or something.
3897 */ 3900 */
3898 dev_priv->mm.suspended = 1; 3901 dev_priv->mm.suspended = 1;
3902 del_timer(&dev_priv->hangcheck_timer);
3899 3903
3900 /* Cancel the retire work handler, wait for it to finish if running 3904 /* Cancel the retire work handler, wait for it to finish if running
3901 */ 3905 */
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 6c89f2ff249..77e42e719d7 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -601,6 +601,8 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
601 if (iir & I915_USER_INTERRUPT) { 601 if (iir & I915_USER_INTERRUPT) {
602 dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev); 602 dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev);
603 DRM_WAKEUP(&dev_priv->irq_queue); 603 DRM_WAKEUP(&dev_priv->irq_queue);
604 dev_priv->hangcheck_count = 0;
605 mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
604 } 606 }
605 607
606 if (pipea_stats & vblank_status) { 608 if (pipea_stats & vblank_status) {
@@ -880,6 +882,53 @@ int i915_vblank_swap(struct drm_device *dev, void *data,
880 return -EINVAL; 882 return -EINVAL;
881} 883}
882 884
885struct drm_i915_gem_request *i915_get_tail_request(struct drm_device *dev) {
886 drm_i915_private_t *dev_priv = dev->dev_private;
887 return list_entry(dev_priv->mm.request_list.prev, struct drm_i915_gem_request, list);
888}
889
890/**
891 * This is called when the chip hasn't reported back with completed
892 * batchbuffers in a long time. The first time this is called we simply record
893 * ACTHD. If ACTHD hasn't changed by the time the hangcheck timer elapses
894 * again, we assume the chip is wedged and try to fix it.
895 */
896void i915_hangcheck_elapsed(unsigned long data)
897{
898 struct drm_device *dev = (struct drm_device *)data;
899 drm_i915_private_t *dev_priv = dev->dev_private;
900 uint32_t acthd;
901
902 if (!IS_I965G(dev))
903 acthd = I915_READ(ACTHD);
904 else
905 acthd = I915_READ(ACTHD_I965);
906
907 /* If all work is done then ACTHD clearly hasn't advanced. */
908 if (list_empty(&dev_priv->mm.request_list) ||
909 i915_seqno_passed(i915_get_gem_seqno(dev), i915_get_tail_request(dev)->seqno)) {
910 dev_priv->hangcheck_count = 0;
911 return;
912 }
913
914 if (dev_priv->last_acthd == acthd && dev_priv->hangcheck_count > 0) {
915 DRM_ERROR("Hangcheck timer elapsed... GPU hung\n");
916 dev_priv->mm.wedged = true; /* Hopefully this is atomic */
917 i915_handle_error(dev);
918 return;
919 }
920
921 /* Reset timer case chip hangs without another request being added */
922 mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
923
924 if (acthd != dev_priv->last_acthd)
925 dev_priv->hangcheck_count = 0;
926 else
927 dev_priv->hangcheck_count++;
928
929 dev_priv->last_acthd = acthd;
930}
931
883/* drm_dma.h hooks 932/* drm_dma.h hooks
884*/ 933*/
885static void igdng_irq_preinstall(struct drm_device *dev) 934static void igdng_irq_preinstall(struct drm_device *dev)