aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2018-04-06 18:03:53 -0400
committerChris Wilson <chris@chris-wilson.co.uk>2018-04-06 18:43:47 -0400
commitbba0869b18e44ff2f713c98575ddad8c7c5e9b10 (patch)
tree0643eac57745fbd5c143016ec3a395e34b329bfe
parente4d2006f8f040825fa371e774a5debacdbf20b08 (diff)
drm/i915: Treat i915_reset_engine() as guilty until proven innocent
If we are resetting just one engine, we know it has stalled. So we can pass the stalled parameter directly to i915_gem_reset_engine(), which alleviates the necessity to poke at the generic engine->hangcheck.stalled magic variable, leaving that under control of hangcheck as its name implies. Other than simplifying by removing the indirect parameter along this path, this allows us to introduce new reset mechanisms that run independently of hangcheck. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Michel Thierry <michel.thierry@intel.com> Cc: Jeff McGee <jeff.mcgee@intel.com> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Reviewed-by: Michel Thierry <michel.thierry@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180406220354.18911-1-chris@chris-wilson.co.uk
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c2
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h3
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c36
-rw-r--r--drivers/gpu/drm/i915/selftests/intel_hangcheck.c9
4 files changed, 20 insertions, 30 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 684060ed8db6..7ce229c6f424 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -2050,7 +2050,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
2050 * active request and can drop it, adjust head to skip the offending 2050 * active request and can drop it, adjust head to skip the offending
2051 * request to resume executing remaining requests in the queue. 2051 * request to resume executing remaining requests in the queue.
2052 */ 2052 */
2053 i915_gem_reset_engine(engine, active_request); 2053 i915_gem_reset_engine(engine, active_request, true);
2054 2054
2055 /* 2055 /*
2056 * The engine and its registers (and workarounds in case of render) 2056 * The engine and its registers (and workarounds in case of render)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5373b171bb96..6b3f2f651def 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3132,7 +3132,8 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
3132void i915_gem_set_wedged(struct drm_i915_private *dev_priv); 3132void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
3133bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv); 3133bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv);
3134void i915_gem_reset_engine(struct intel_engine_cs *engine, 3134void i915_gem_reset_engine(struct intel_engine_cs *engine,
3135 struct i915_request *request); 3135 struct i915_request *request,
3136 bool stalled);
3136 3137
3137void i915_gem_init_mmio(struct drm_i915_private *i915); 3138void i915_gem_init_mmio(struct drm_i915_private *i915);
3138int __must_check i915_gem_init(struct drm_i915_private *dev_priv); 3139int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a69dc19a0bdb..306d7a805eb7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2990,20 +2990,6 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
2990 return active; 2990 return active;
2991} 2991}
2992 2992
2993static bool engine_stalled(struct intel_engine_cs *engine)
2994{
2995 if (!engine->hangcheck.stalled)
2996 return false;
2997
2998 /* Check for possible seqno movement after hang declaration */
2999 if (engine->hangcheck.seqno != intel_engine_get_seqno(engine)) {
3000 DRM_DEBUG_DRIVER("%s pardoned\n", engine->name);
3001 return false;
3002 }
3003
3004 return true;
3005}
3006
3007/* 2993/*
3008 * Ensure irq handler finishes, and not run again. 2994 * Ensure irq handler finishes, and not run again.
3009 * Also return the active request so that we only search for it once. 2995 * Also return the active request so that we only search for it once.
@@ -3142,7 +3128,8 @@ static void engine_skip_context(struct i915_request *request)
3142/* Returns the request if it was guilty of the hang */ 3128/* Returns the request if it was guilty of the hang */
3143static struct i915_request * 3129static struct i915_request *
3144i915_gem_reset_request(struct intel_engine_cs *engine, 3130i915_gem_reset_request(struct intel_engine_cs *engine,
3145 struct i915_request *request) 3131 struct i915_request *request,
3132 bool stalled)
3146{ 3133{
3147 /* The guilty request will get skipped on a hung engine. 3134 /* The guilty request will get skipped on a hung engine.
3148 * 3135 *
@@ -3165,7 +3152,15 @@ i915_gem_reset_request(struct intel_engine_cs *engine,
3165 * subsequent hangs. 3152 * subsequent hangs.
3166 */ 3153 */
3167 3154
3168 if (engine_stalled(engine)) { 3155 if (i915_request_completed(request)) {
3156 GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n",
3157 engine->name, request->global_seqno,
3158 request->fence.context, request->fence.seqno,
3159 intel_engine_get_seqno(engine));
3160 stalled = false;
3161 }
3162
3163 if (stalled) {
3169 i915_gem_context_mark_guilty(request->ctx); 3164 i915_gem_context_mark_guilty(request->ctx);
3170 skip_request(request); 3165 skip_request(request);
3171 3166
@@ -3196,7 +3191,8 @@ i915_gem_reset_request(struct intel_engine_cs *engine,
3196} 3191}
3197 3192
3198void i915_gem_reset_engine(struct intel_engine_cs *engine, 3193void i915_gem_reset_engine(struct intel_engine_cs *engine,
3199 struct i915_request *request) 3194 struct i915_request *request,
3195 bool stalled)
3200{ 3196{
3201 /* 3197 /*
3202 * Make sure this write is visible before we re-enable the interrupt 3198 * Make sure this write is visible before we re-enable the interrupt
@@ -3206,7 +3202,7 @@ void i915_gem_reset_engine(struct intel_engine_cs *engine,
3206 smp_store_mb(engine->irq_posted, 0); 3202 smp_store_mb(engine->irq_posted, 0);
3207 3203
3208 if (request) 3204 if (request)
3209 request = i915_gem_reset_request(engine, request); 3205 request = i915_gem_reset_request(engine, request, stalled);
3210 3206
3211 if (request) { 3207 if (request) {
3212 DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", 3208 DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
@@ -3229,7 +3225,9 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
3229 for_each_engine(engine, dev_priv, id) { 3225 for_each_engine(engine, dev_priv, id) {
3230 struct i915_gem_context *ctx; 3226 struct i915_gem_context *ctx;
3231 3227
3232 i915_gem_reset_engine(engine, engine->hangcheck.active_request); 3228 i915_gem_reset_engine(engine,
3229 engine->hangcheck.active_request,
3230 engine->hangcheck.stalled);
3233 ctx = fetch_and_zero(&engine->last_retired_context); 3231 ctx = fetch_and_zero(&engine->last_retired_context);
3234 if (ctx) 3232 if (ctx)
3235 engine->context_unpin(engine, ctx); 3233 engine->context_unpin(engine, ctx);
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 8650853c8cb3..acfb4dcc9fb5 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -522,9 +522,6 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
522 i915_request_put(rq); 522 i915_request_put(rq);
523 } 523 }
524 524
525 engine->hangcheck.stalled = true;
526 engine->hangcheck.seqno = seqno;
527
528 err = i915_reset_engine(engine, NULL); 525 err = i915_reset_engine(engine, NULL);
529 if (err) { 526 if (err) {
530 pr_err("i915_reset_engine failed\n"); 527 pr_err("i915_reset_engine failed\n");
@@ -545,8 +542,6 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
545 err = -EINVAL; 542 err = -EINVAL;
546 break; 543 break;
547 } 544 }
548
549 engine->hangcheck.stalled = false;
550 } while (time_before(jiffies, end_time)); 545 } while (time_before(jiffies, end_time));
551 clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); 546 clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
552 547
@@ -764,9 +759,6 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
764 seqno = rq->global_seqno - 1; 759 seqno = rq->global_seqno - 1;
765 } 760 }
766 761
767 engine->hangcheck.stalled = true;
768 engine->hangcheck.seqno = seqno;
769
770 err = i915_reset_engine(engine, NULL); 762 err = i915_reset_engine(engine, NULL);
771 if (err) { 763 if (err) {
772 pr_err("i915_reset_engine(%s:%s): failed, err=%d\n", 764 pr_err("i915_reset_engine(%s:%s): failed, err=%d\n",
@@ -774,7 +766,6 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
774 break; 766 break;
775 } 767 }
776 768
777 engine->hangcheck.stalled = false;
778 count++; 769 count++;
779 770
780 if (rq) { 771 if (rq) {