diff options
-rw-r--r-- | drivers/gpu/drm/i915/i915_drv.c | 13 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_drv.h | 10 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gpu_error.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_irq.c | 12 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_request.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 30 |
7 files changed, 47 insertions, 32 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 7ce229c6f424..f770be18b2d7 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c | |||
@@ -1866,6 +1866,8 @@ static int i915_resume_switcheroo(struct drm_device *dev) | |||
1866 | /** | 1866 | /** |
1867 | * i915_reset - reset chip after a hang | 1867 | * i915_reset - reset chip after a hang |
1868 | * @i915: #drm_i915_private to reset | 1868 | * @i915: #drm_i915_private to reset |
1869 | * @stalled_mask: mask of the stalled engines with the guilty requests | ||
1870 | * @reason: user error message for why we are resetting | ||
1869 | * | 1871 | * |
1870 | * Reset the chip. Useful if a hang is detected. Marks the device as wedged | 1872 | * Reset the chip. Useful if a hang is detected. Marks the device as wedged |
1871 | * on failure. | 1873 | * on failure. |
@@ -1880,7 +1882,9 @@ static int i915_resume_switcheroo(struct drm_device *dev) | |||
1880 | * - re-init interrupt state | 1882 | * - re-init interrupt state |
1881 | * - re-init display | 1883 | * - re-init display |
1882 | */ | 1884 | */ |
1883 | void i915_reset(struct drm_i915_private *i915) | 1885 | void i915_reset(struct drm_i915_private *i915, |
1886 | unsigned int stalled_mask, | ||
1887 | const char *reason) | ||
1884 | { | 1888 | { |
1885 | struct i915_gpu_error *error = &i915->gpu_error; | 1889 | struct i915_gpu_error *error = &i915->gpu_error; |
1886 | int ret; | 1890 | int ret; |
@@ -1899,9 +1903,8 @@ void i915_reset(struct drm_i915_private *i915) | |||
1899 | if (!i915_gem_unset_wedged(i915)) | 1903 | if (!i915_gem_unset_wedged(i915)) |
1900 | goto wakeup; | 1904 | goto wakeup; |
1901 | 1905 | ||
1902 | if (error->reason) | 1906 | if (reason) |
1903 | dev_notice(i915->drm.dev, | 1907 | dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason); |
1904 | "Resetting chip for %s\n", error->reason); | ||
1905 | error->reset_count++; | 1908 | error->reset_count++; |
1906 | 1909 | ||
1907 | disable_irq(i915->drm.irq); | 1910 | disable_irq(i915->drm.irq); |
@@ -1944,7 +1947,7 @@ void i915_reset(struct drm_i915_private *i915) | |||
1944 | goto error; | 1947 | goto error; |
1945 | } | 1948 | } |
1946 | 1949 | ||
1947 | i915_gem_reset(i915); | 1950 | i915_gem_reset(i915, stalled_mask); |
1948 | intel_overlay_reset(i915); | 1951 | intel_overlay_reset(i915); |
1949 | 1952 | ||
1950 | /* | 1953 | /* |
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6b3f2f651def..9bca104c409e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h | |||
@@ -2701,8 +2701,11 @@ extern void i915_driver_unload(struct drm_device *dev); | |||
2701 | extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask); | 2701 | extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask); |
2702 | extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv); | 2702 | extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv); |
2703 | 2703 | ||
2704 | extern void i915_reset(struct drm_i915_private *i915); | 2704 | extern void i915_reset(struct drm_i915_private *i915, |
2705 | extern int i915_reset_engine(struct intel_engine_cs *engine, const char *msg); | 2705 | unsigned int stalled_mask, |
2706 | const char *reason); | ||
2707 | extern int i915_reset_engine(struct intel_engine_cs *engine, | ||
2708 | const char *reason); | ||
2706 | 2709 | ||
2707 | extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv); | 2710 | extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv); |
2708 | extern int intel_reset_guc(struct drm_i915_private *dev_priv); | 2711 | extern int intel_reset_guc(struct drm_i915_private *dev_priv); |
@@ -3126,7 +3129,8 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, | |||
3126 | struct i915_request * | 3129 | struct i915_request * |
3127 | i915_gem_reset_prepare_engine(struct intel_engine_cs *engine); | 3130 | i915_gem_reset_prepare_engine(struct intel_engine_cs *engine); |
3128 | int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); | 3131 | int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); |
3129 | void i915_gem_reset(struct drm_i915_private *dev_priv); | 3132 | void i915_gem_reset(struct drm_i915_private *dev_priv, |
3133 | unsigned int stalled_mask); | ||
3130 | void i915_gem_reset_finish_engine(struct intel_engine_cs *engine); | 3134 | void i915_gem_reset_finish_engine(struct intel_engine_cs *engine); |
3131 | void i915_gem_reset_finish(struct drm_i915_private *dev_priv); | 3135 | void i915_gem_reset_finish(struct drm_i915_private *dev_priv); |
3132 | void i915_gem_set_wedged(struct drm_i915_private *dev_priv); | 3136 | void i915_gem_set_wedged(struct drm_i915_private *dev_priv); |
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 306d7a805eb7..28ab0beff86c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c | |||
@@ -3213,7 +3213,8 @@ void i915_gem_reset_engine(struct intel_engine_cs *engine, | |||
3213 | engine->reset_hw(engine, request); | 3213 | engine->reset_hw(engine, request); |
3214 | } | 3214 | } |
3215 | 3215 | ||
3216 | void i915_gem_reset(struct drm_i915_private *dev_priv) | 3216 | void i915_gem_reset(struct drm_i915_private *dev_priv, |
3217 | unsigned int stalled_mask) | ||
3217 | { | 3218 | { |
3218 | struct intel_engine_cs *engine; | 3219 | struct intel_engine_cs *engine; |
3219 | enum intel_engine_id id; | 3220 | enum intel_engine_id id; |
@@ -3227,7 +3228,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) | |||
3227 | 3228 | ||
3228 | i915_gem_reset_engine(engine, | 3229 | i915_gem_reset_engine(engine, |
3229 | engine->hangcheck.active_request, | 3230 | engine->hangcheck.active_request, |
3230 | engine->hangcheck.stalled); | 3231 | stalled_mask & ENGINE_MASK(id)); |
3231 | ctx = fetch_and_zero(&engine->last_retired_context); | 3232 | ctx = fetch_and_zero(&engine->last_retired_context); |
3232 | if (ctx) | 3233 | if (ctx) |
3233 | engine->context_unpin(engine, ctx); | 3234 | engine->context_unpin(engine, ctx); |
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index ac5760673cc9..c05b6034d718 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h | |||
@@ -269,6 +269,9 @@ struct i915_gpu_error { | |||
269 | /** Number of times an engine has been reset */ | 269 | /** Number of times an engine has been reset */ |
270 | u32 reset_engine_count[I915_NUM_ENGINES]; | 270 | u32 reset_engine_count[I915_NUM_ENGINES]; |
271 | 271 | ||
272 | /** Set of stalled engines with guilty requests, in the current reset */ | ||
273 | u32 stalled_mask; | ||
274 | |||
272 | /** Reason for the current *global* reset */ | 275 | /** Reason for the current *global* reset */ |
273 | const char *reason; | 276 | const char *reason; |
274 | 277 | ||
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index c2f878ace0ea..b03d18561b55 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c | |||
@@ -2961,7 +2961,8 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg) | |||
2961 | } | 2961 | } |
2962 | 2962 | ||
2963 | static void i915_reset_device(struct drm_i915_private *dev_priv, | 2963 | static void i915_reset_device(struct drm_i915_private *dev_priv, |
2964 | const char *msg) | 2964 | u32 engine_mask, |
2965 | const char *reason) | ||
2965 | { | 2966 | { |
2966 | struct i915_gpu_error *error = &dev_priv->gpu_error; | 2967 | struct i915_gpu_error *error = &dev_priv->gpu_error; |
2967 | struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj; | 2968 | struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj; |
@@ -2979,9 +2980,11 @@ static void i915_reset_device(struct drm_i915_private *dev_priv, | |||
2979 | i915_wedge_on_timeout(&w, dev_priv, 5*HZ) { | 2980 | i915_wedge_on_timeout(&w, dev_priv, 5*HZ) { |
2980 | intel_prepare_reset(dev_priv); | 2981 | intel_prepare_reset(dev_priv); |
2981 | 2982 | ||
2982 | error->reason = msg; | 2983 | error->reason = reason; |
2984 | error->stalled_mask = engine_mask; | ||
2983 | 2985 | ||
2984 | /* Signal that locked waiters should reset the GPU */ | 2986 | /* Signal that locked waiters should reset the GPU */ |
2987 | smp_mb__before_atomic(); | ||
2985 | set_bit(I915_RESET_HANDOFF, &error->flags); | 2988 | set_bit(I915_RESET_HANDOFF, &error->flags); |
2986 | wake_up_all(&error->wait_queue); | 2989 | wake_up_all(&error->wait_queue); |
2987 | 2990 | ||
@@ -2990,7 +2993,7 @@ static void i915_reset_device(struct drm_i915_private *dev_priv, | |||
2990 | */ | 2993 | */ |
2991 | do { | 2994 | do { |
2992 | if (mutex_trylock(&dev_priv->drm.struct_mutex)) { | 2995 | if (mutex_trylock(&dev_priv->drm.struct_mutex)) { |
2993 | i915_reset(dev_priv); | 2996 | i915_reset(dev_priv, engine_mask, reason); |
2994 | mutex_unlock(&dev_priv->drm.struct_mutex); | 2997 | mutex_unlock(&dev_priv->drm.struct_mutex); |
2995 | } | 2998 | } |
2996 | } while (wait_on_bit_timeout(&error->flags, | 2999 | } while (wait_on_bit_timeout(&error->flags, |
@@ -2998,6 +3001,7 @@ static void i915_reset_device(struct drm_i915_private *dev_priv, | |||
2998 | TASK_UNINTERRUPTIBLE, | 3001 | TASK_UNINTERRUPTIBLE, |
2999 | 1)); | 3002 | 1)); |
3000 | 3003 | ||
3004 | error->stalled_mask = 0; | ||
3001 | error->reason = NULL; | 3005 | error->reason = NULL; |
3002 | 3006 | ||
3003 | intel_finish_reset(dev_priv); | 3007 | intel_finish_reset(dev_priv); |
@@ -3122,7 +3126,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv, | |||
3122 | TASK_UNINTERRUPTIBLE); | 3126 | TASK_UNINTERRUPTIBLE); |
3123 | } | 3127 | } |
3124 | 3128 | ||
3125 | i915_reset_device(dev_priv, msg); | 3129 | i915_reset_device(dev_priv, engine_mask, msg); |
3126 | 3130 | ||
3127 | for_each_engine(engine, dev_priv, tmp) { | 3131 | for_each_engine(engine, dev_priv, tmp) { |
3128 | clear_bit(I915_RESET_ENGINE + engine->id, | 3132 | clear_bit(I915_RESET_ENGINE + engine->id, |
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index a9d0bde16443..629f3e860592 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c | |||
@@ -1185,11 +1185,13 @@ static bool __i915_spin_request(const struct i915_request *rq, | |||
1185 | 1185 | ||
1186 | static bool __i915_wait_request_check_and_reset(struct i915_request *request) | 1186 | static bool __i915_wait_request_check_and_reset(struct i915_request *request) |
1187 | { | 1187 | { |
1188 | if (likely(!i915_reset_handoff(&request->i915->gpu_error))) | 1188 | struct i915_gpu_error *error = &request->i915->gpu_error; |
1189 | |||
1190 | if (likely(!i915_reset_handoff(error))) | ||
1189 | return false; | 1191 | return false; |
1190 | 1192 | ||
1191 | __set_current_state(TASK_RUNNING); | 1193 | __set_current_state(TASK_RUNNING); |
1192 | i915_reset(request->i915); | 1194 | i915_reset(request->i915, error->stalled_mask, error->reason); |
1193 | return true; | 1195 | return true; |
1194 | } | 1196 | } |
1195 | 1197 | ||
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index acfb4dcc9fb5..24f913f26a7b 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c | |||
@@ -437,7 +437,7 @@ static int igt_global_reset(void *arg) | |||
437 | mutex_lock(&i915->drm.struct_mutex); | 437 | mutex_lock(&i915->drm.struct_mutex); |
438 | reset_count = i915_reset_count(&i915->gpu_error); | 438 | reset_count = i915_reset_count(&i915->gpu_error); |
439 | 439 | ||
440 | i915_reset(i915); | 440 | i915_reset(i915, ALL_ENGINES, NULL); |
441 | 441 | ||
442 | if (i915_reset_count(&i915->gpu_error) == reset_count) { | 442 | if (i915_reset_count(&i915->gpu_error) == reset_count) { |
443 | pr_err("No GPU reset recorded!\n"); | 443 | pr_err("No GPU reset recorded!\n"); |
@@ -881,17 +881,18 @@ static int igt_reset_engines(void *arg) | |||
881 | return 0; | 881 | return 0; |
882 | } | 882 | } |
883 | 883 | ||
884 | static u32 fake_hangcheck(struct i915_request *rq) | 884 | static u32 fake_hangcheck(struct i915_request *rq, u32 mask) |
885 | { | 885 | { |
886 | u32 reset_count; | 886 | struct i915_gpu_error *error = &rq->i915->gpu_error; |
887 | u32 reset_count = i915_reset_count(error); | ||
887 | 888 | ||
888 | rq->engine->hangcheck.stalled = true; | 889 | error->stalled_mask = mask; |
889 | rq->engine->hangcheck.seqno = intel_engine_get_seqno(rq->engine); | ||
890 | 890 | ||
891 | reset_count = i915_reset_count(&rq->i915->gpu_error); | 891 | /* set_bit() must be after we have setup the backchannel (mask) */ |
892 | smp_mb__before_atomic(); | ||
893 | set_bit(I915_RESET_HANDOFF, &error->flags); | ||
892 | 894 | ||
893 | set_bit(I915_RESET_HANDOFF, &rq->i915->gpu_error.flags); | 895 | wake_up_all(&error->wait_queue); |
894 | wake_up_all(&rq->i915->gpu_error.wait_queue); | ||
895 | 896 | ||
896 | return reset_count; | 897 | return reset_count; |
897 | } | 898 | } |
@@ -939,7 +940,7 @@ static int igt_wait_reset(void *arg) | |||
939 | goto out_rq; | 940 | goto out_rq; |
940 | } | 941 | } |
941 | 942 | ||
942 | reset_count = fake_hangcheck(rq); | 943 | reset_count = fake_hangcheck(rq, ALL_ENGINES); |
943 | 944 | ||
944 | timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10); | 945 | timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10); |
945 | if (timeout < 0) { | 946 | if (timeout < 0) { |
@@ -1075,9 +1076,9 @@ static int igt_reset_queue(void *arg) | |||
1075 | goto fini; | 1076 | goto fini; |
1076 | } | 1077 | } |
1077 | 1078 | ||
1078 | reset_count = fake_hangcheck(prev); | 1079 | reset_count = fake_hangcheck(prev, ENGINE_MASK(id)); |
1079 | 1080 | ||
1080 | i915_reset(i915); | 1081 | i915_reset(i915, ENGINE_MASK(id), NULL); |
1081 | 1082 | ||
1082 | GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, | 1083 | GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, |
1083 | &i915->gpu_error.flags)); | 1084 | &i915->gpu_error.flags)); |
@@ -1150,7 +1151,7 @@ static int igt_handle_error(void *arg) | |||
1150 | if (!intel_has_reset_engine(i915)) | 1151 | if (!intel_has_reset_engine(i915)) |
1151 | return 0; | 1152 | return 0; |
1152 | 1153 | ||
1153 | if (!intel_engine_can_store_dword(i915->engine[RCS])) | 1154 | if (!engine || !intel_engine_can_store_dword(engine)) |
1154 | return 0; | 1155 | return 0; |
1155 | 1156 | ||
1156 | mutex_lock(&i915->drm.struct_mutex); | 1157 | mutex_lock(&i915->drm.struct_mutex); |
@@ -1186,10 +1187,7 @@ static int igt_handle_error(void *arg) | |||
1186 | /* Temporarily disable error capture */ | 1187 | /* Temporarily disable error capture */ |
1187 | error = xchg(&i915->gpu_error.first_error, (void *)-1); | 1188 | error = xchg(&i915->gpu_error.first_error, (void *)-1); |
1188 | 1189 | ||
1189 | engine->hangcheck.stalled = true; | 1190 | i915_handle_error(i915, ENGINE_MASK(engine->id), 0, NULL); |
1190 | engine->hangcheck.seqno = intel_engine_get_seqno(engine); | ||
1191 | |||
1192 | i915_handle_error(i915, intel_engine_flag(engine), 0, NULL); | ||
1193 | 1191 | ||
1194 | xchg(&i915->gpu_error.first_error, error); | 1192 | xchg(&i915->gpu_error.first_error, error); |
1195 | 1193 | ||