aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_drv.h
diff options
context:
space:
mode:
authorMika Kuoppala <mika.kuoppala@linux.intel.com>2013-08-30 09:19:28 -0400
committerDaniel Vetter <daniel.vetter@ffwll.ch>2013-09-06 11:55:50 -0400
commitbe62acb4cce1389a28296852737e3917d9cc5b25 (patch)
tree7675ef0ebe4cad2d72f1ec94124f73fa97a42472 /drivers/gpu/drm/i915/i915_drv.h
parentbf13e81b904a37d94d83dd6c3b53a147719a3ead (diff)
drm/i915: ban badly behaving contexts
Now when we have mechanism in place to track which context was guilty of hanging the gpu, it is possible to punish for bad behaviour. If context has recently submitted a faulty batchbuffers guilty of gpu hang and submits another batch which hangs gpu in quick succession, ban it permanently. If ctx is banned, no more batchbuffers will be queued for execution. There is no need for global wedge machinery anymore and it would be unwise to wedge the whole gpu if we have multiple hanging batches queued for execution. Instead just ban the guilty ones and carry on. v2: Store guilty ban status bool in gpu_error instead of pointers that might become danling before hang is declared. v3: Use return value for banned status instead of stashing state into gpu_error (Chris Wilson) v4: - rebase on top of fixed hang stats api - add define for ban period - rename commit and improve commit msg v5: - rely context banning instead of wedging the gpu - beautification and fix for ban calculation (Chris) Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/i915/i915_drv.h')
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h11
1 files changed, 9 insertions, 2 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e357995a6aad..c5f0abaa9a22 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -586,6 +586,12 @@ struct i915_ctx_hang_stats {
586 586
587 /* This context had batch active when hang was declared */ 587 /* This context had batch active when hang was declared */
588 unsigned batch_active; 588 unsigned batch_active;
589
590 /* Time when this context was last blamed for a GPU reset */
591 unsigned long guilty_ts;
592
593 /* This context is banned to submit more work */
594 bool banned;
589}; 595};
590 596
591/* This must match up with the value previously used for execbuf2.rsvd1. */ 597/* This must match up with the value previously used for execbuf2.rsvd1. */
@@ -987,6 +993,9 @@ struct i915_gpu_error {
987 /* For hangcheck timer */ 993 /* For hangcheck timer */
988#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ 994#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
989#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD) 995#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)
996 /* Hang gpu twice in this window and your context gets banned */
997#define DRM_I915_CTX_BAN_PERIOD DIV_ROUND_UP(8*DRM_I915_HANGCHECK_PERIOD, 1000)
998
990 struct timer_list hangcheck_timer; 999 struct timer_list hangcheck_timer;
991 1000
992 /* For reset and error_state handling. */ 1001 /* For reset and error_state handling. */
@@ -995,8 +1004,6 @@ struct i915_gpu_error {
995 struct drm_i915_error_state *first_error; 1004 struct drm_i915_error_state *first_error;
996 struct work_struct work; 1005 struct work_struct work;
997 1006
998 unsigned long last_reset;
999
1000 /** 1007 /**
1001 * State variable and reset counter controlling the reset flow 1008 * State variable and reset counter controlling the reset flow
1002 * 1009 *