aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2017-12-13 08:43:47 -0500
committerChris Wilson <chris@chris-wilson.co.uk>2017-12-13 13:53:35 -0500
commit6ca9a2beb54a385c8cc5e39a9d2ef49945896c64 (patch)
treed50c2b914ece7713444ecdae37903b9c622ec8dc
parent31c70f97bec3107805df0f491485e7eacbc3a3ae (diff)
drm/i915: Unwind i915_gem_init() failure
Since Michal introduced new user controllable errors other than -EIO during i915_gem_init(), we need to actually unwind on the error path as we have to abort the module load (and we expect to do so cleanly!). As we now teardown key state and then mark the driver as wedged (on EIO), we have to be careful to not allow ourselves to resume and unwedge, thus attempting to use the uninitialised driver. v2: Try not to free driver state for the suppressed EIO v3: Use load-fault-injection to test both error/recovery paths. References: 8620eb1dbbf2 ("drm/i915/uc: Don't use -EIO to report missing firmware") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com> Reviewed-by: MichaƂ Winiarski <michal.winiarski@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20171213134347.4608-1-chris@chris-wilson.co.uk
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c80
1 files changed, 66 insertions, 14 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8c3d801696b7..13fa26238e89 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4865,7 +4865,8 @@ void i915_gem_resume(struct drm_i915_private *i915)
4865 i915_gem_restore_gtt_mappings(i915); 4865 i915_gem_restore_gtt_mappings(i915);
4866 i915_gem_restore_fences(i915); 4866 i915_gem_restore_fences(i915);
4867 4867
4868 /* As we didn't flush the kernel context before suspend, we cannot 4868 /*
4869 * As we didn't flush the kernel context before suspend, we cannot
4869 * guarantee that the context image is complete. So let's just reset 4870 * guarantee that the context image is complete. So let's just reset
4870 * it and start again. 4871 * it and start again.
4871 */ 4872 */
@@ -4886,8 +4887,10 @@ out_unlock:
4886 return; 4887 return;
4887 4888
4888err_wedged: 4889err_wedged:
4889 DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); 4890 if (!i915_terminally_wedged(&i915->gpu_error)) {
4890 i915_gem_set_wedged(i915); 4891 DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
4892 i915_gem_set_wedged(i915);
4893 }
4891 goto out_unlock; 4894 goto out_unlock;
4892} 4895}
4893 4896
@@ -5170,22 +5173,28 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
5170 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5173 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5171 5174
5172 ret = i915_gem_init_ggtt(dev_priv); 5175 ret = i915_gem_init_ggtt(dev_priv);
5173 if (ret) 5176 if (ret) {
5174 goto out_unlock; 5177 GEM_BUG_ON(ret == -EIO);
5178 goto err_unlock;
5179 }
5175 5180
5176 ret = i915_gem_contexts_init(dev_priv); 5181 ret = i915_gem_contexts_init(dev_priv);
5177 if (ret) 5182 if (ret) {
5178 goto out_unlock; 5183 GEM_BUG_ON(ret == -EIO);
5184 goto err_ggtt;
5185 }
5179 5186
5180 ret = intel_engines_init(dev_priv); 5187 ret = intel_engines_init(dev_priv);
5181 if (ret) 5188 if (ret) {
5182 goto out_unlock; 5189 GEM_BUG_ON(ret == -EIO);
5190 goto err_context;
5191 }
5183 5192
5184 intel_init_gt_powersave(dev_priv); 5193 intel_init_gt_powersave(dev_priv);
5185 5194
5186 ret = i915_gem_init_hw(dev_priv); 5195 ret = i915_gem_init_hw(dev_priv);
5187 if (ret) 5196 if (ret)
5188 goto out_unlock; 5197 goto err_pm;
5189 5198
5190 /* 5199 /*
5191 * Despite its name intel_init_clock_gating applies both display 5200 * Despite its name intel_init_clock_gating applies both display
@@ -5199,9 +5208,53 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
5199 intel_init_clock_gating(dev_priv); 5208 intel_init_clock_gating(dev_priv);
5200 5209
5201 ret = __intel_engines_record_defaults(dev_priv); 5210 ret = __intel_engines_record_defaults(dev_priv);
5202out_unlock: 5211 if (ret)
5212 goto err_init_hw;
5213
5214 if (i915_inject_load_failure()) {
5215 ret = -ENODEV;
5216 goto err_init_hw;
5217 }
5218
5219 if (i915_inject_load_failure()) {
5220 ret = -EIO;
5221 goto err_init_hw;
5222 }
5223
5224 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5225 mutex_unlock(&dev_priv->drm.struct_mutex);
5226
5227 return 0;
5228
5229 /*
5230 * Unwinding is complicated by that we want to handle -EIO to mean
5231 * disable GPU submission but keep KMS alive. We want to mark the
5232 * HW as irrevisibly wedged, but keep enough state around that the
5233 * driver doesn't explode during runtime.
5234 */
5235err_init_hw:
5236 i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED);
5237 i915_gem_contexts_lost(dev_priv);
5238 intel_uc_fini_hw(dev_priv);
5239err_pm:
5240 if (ret != -EIO) {
5241 intel_cleanup_gt_powersave(dev_priv);
5242 i915_gem_cleanup_engines(dev_priv);
5243 }
5244err_context:
5245 if (ret != -EIO)
5246 i915_gem_contexts_fini(dev_priv);
5247err_ggtt:
5248err_unlock:
5249 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5250 mutex_unlock(&dev_priv->drm.struct_mutex);
5251
5252 if (ret != -EIO)
5253 i915_gem_cleanup_userptr(dev_priv);
5254
5203 if (ret == -EIO) { 5255 if (ret == -EIO) {
5204 /* Allow engine initialisation to fail by marking the GPU as 5256 /*
5257 * Allow engine initialisation to fail by marking the GPU as
5205 * wedged. But we only want to do this where the GPU is angry, 5258 * wedged. But we only want to do this where the GPU is angry,
5206 * for all other failure, such as an allocation failure, bail. 5259 * for all other failure, such as an allocation failure, bail.
5207 */ 5260 */
@@ -5211,9 +5264,8 @@ out_unlock:
5211 } 5264 }
5212 ret = 0; 5265 ret = 0;
5213 } 5266 }
5214 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5215 mutex_unlock(&dev_priv->drm.struct_mutex);
5216 5267
5268 i915_gem_drain_freed_objects(dev_priv);
5217 return ret; 5269 return ret;
5218} 5270}
5219 5271