aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
authorRob Clark <robdclark@gmail.com>2013-08-24 14:20:38 -0400
committerRob Clark <robdclark@gmail.com>2013-08-24 14:57:19 -0400
commitbd6f82d8289422f618b98451a43887f452b3423e (patch)
treee4416dba680a3e88236f9e8b568ff2544c6e1779 /drivers/gpu/drm
parent7198e6b03155f6dadecadba004eb83b81a6ffe4c (diff)
drm/msm: add basic hangcheck/recovery mechanism
A basic, no-frills recovery mechanism in case the gpu gets wedged. We could try to be a bit more fancy and restart the next submit after the one that got wedged, but for now keep it simple. This is enough to recover things if, for example, the gpu hangs mid way through a piglit run. Signed-off-by: Rob Clark <robdclark@gmail.com>
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/msm/adreno/a3xx_gpu.c1
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gpu.c26
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gpu.h3
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.c52
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.h10
5 files changed, 87 insertions, 5 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index 13d61bbed302..035bd13dc8bd 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -371,6 +371,7 @@ static const struct adreno_gpu_funcs funcs = {
371 .hw_init = a3xx_hw_init, 371 .hw_init = a3xx_hw_init,
372 .pm_suspend = msm_gpu_pm_suspend, 372 .pm_suspend = msm_gpu_pm_suspend,
373 .pm_resume = msm_gpu_pm_resume, 373 .pm_resume = msm_gpu_pm_resume,
374 .recover = adreno_recover,
374 .last_fence = adreno_last_fence, 375 .last_fence = adreno_last_fence,
375 .submit = adreno_submit, 376 .submit = adreno_submit,
376 .flush = adreno_flush, 377 .flush = adreno_flush,
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 282163ee3fa5..a60584763b61 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -111,6 +111,28 @@ uint32_t adreno_last_fence(struct msm_gpu *gpu)
111 return adreno_gpu->memptrs->fence; 111 return adreno_gpu->memptrs->fence;
112} 112}
113 113
114void adreno_recover(struct msm_gpu *gpu)
115{
116 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
117 struct drm_device *dev = gpu->dev;
118 int ret;
119
120 gpu->funcs->pm_suspend(gpu);
121
122 /* reset ringbuffer: */
123 gpu->rb->cur = gpu->rb->start;
124
125 /* reset completed fence seqno, just discard anything pending: */
126 adreno_gpu->memptrs->fence = gpu->submitted_fence;
127
128 gpu->funcs->pm_resume(gpu);
129 ret = gpu->funcs->hw_init(gpu);
130 if (ret) {
131 dev_err(dev->dev, "gpu hw init failed: %d\n", ret);
132 /* hmm, oh well? */
133 }
134}
135
114int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, 136int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
115 struct msm_file_private *ctx) 137 struct msm_file_private *ctx)
116{ 138{
@@ -119,8 +141,6 @@ int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
119 struct msm_ringbuffer *ring = gpu->rb; 141 struct msm_ringbuffer *ring = gpu->rb;
120 unsigned i, ibs = 0; 142 unsigned i, ibs = 0;
121 143
122 adreno_gpu->last_fence = submit->fence;
123
124 for (i = 0; i < submit->nr_cmds; i++) { 144 for (i = 0; i < submit->nr_cmds; i++) {
125 switch (submit->cmd[i].type) { 145 switch (submit->cmd[i].type) {
126 case MSM_SUBMIT_CMD_IB_TARGET_BUF: 146 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
@@ -225,7 +245,7 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
225 adreno_gpu->rev.patchid); 245 adreno_gpu->rev.patchid);
226 246
227 seq_printf(m, "fence: %d/%d\n", adreno_gpu->memptrs->fence, 247 seq_printf(m, "fence: %d/%d\n", adreno_gpu->memptrs->fence,
228 adreno_gpu->last_fence); 248 gpu->submitted_fence);
229 seq_printf(m, "rptr: %d\n", adreno_gpu->memptrs->rptr); 249 seq_printf(m, "rptr: %d\n", adreno_gpu->memptrs->rptr);
230 seq_printf(m, "wptr: %d\n", adreno_gpu->memptrs->wptr); 250 seq_printf(m, "wptr: %d\n", adreno_gpu->memptrs->wptr);
231 seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb)); 251 seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb));
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index 6b49c4f27fec..f73abfba7c22 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -54,8 +54,6 @@ struct adreno_gpu {
54 uint32_t revn; /* numeric revision name */ 54 uint32_t revn; /* numeric revision name */
55 const struct adreno_gpu_funcs *funcs; 55 const struct adreno_gpu_funcs *funcs;
56 56
57 uint32_t last_fence;
58
59 /* firmware: */ 57 /* firmware: */
60 const struct firmware *pm4, *pfp; 58 const struct firmware *pm4, *pfp;
61 59
@@ -99,6 +97,7 @@ static inline bool adreno_is_a330(struct adreno_gpu *gpu)
99int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value); 97int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value);
100int adreno_hw_init(struct msm_gpu *gpu); 98int adreno_hw_init(struct msm_gpu *gpu);
101uint32_t adreno_last_fence(struct msm_gpu *gpu); 99uint32_t adreno_last_fence(struct msm_gpu *gpu);
100void adreno_recover(struct msm_gpu *gpu);
102int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, 101int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
103 struct msm_file_private *ctx); 102 struct msm_file_private *ctx);
104void adreno_flush(struct msm_gpu *gpu); 103void adreno_flush(struct msm_gpu *gpu);
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 7c6541e4a7ec..e1e1ec9321ff 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -203,6 +203,51 @@ int msm_gpu_pm_suspend(struct msm_gpu *gpu)
203} 203}
204 204
205/* 205/*
206 * Hangcheck detection for locked gpu:
207 */
208
209static void recover_worker(struct work_struct *work)
210{
211 struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work);
212 struct drm_device *dev = gpu->dev;
213
214 dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name);
215
216 mutex_lock(&dev->struct_mutex);
217 gpu->funcs->recover(gpu);
218 mutex_unlock(&dev->struct_mutex);
219
220 msm_gpu_retire(gpu);
221}
222
223static void hangcheck_timer_reset(struct msm_gpu *gpu)
224{
225 DBG("%s", gpu->name);
226 mod_timer(&gpu->hangcheck_timer,
227 round_jiffies_up(jiffies + DRM_MSM_HANGCHECK_JIFFIES));
228}
229
230static void hangcheck_handler(unsigned long data)
231{
232 struct msm_gpu *gpu = (struct msm_gpu *)data;
233 uint32_t fence = gpu->funcs->last_fence(gpu);
234
235 if (fence != gpu->hangcheck_fence) {
236 /* some progress has been made.. ya! */
237 gpu->hangcheck_fence = fence;
238 } else if (fence < gpu->submitted_fence) {
239 /* no progress and not done.. hung! */
240 struct msm_drm_private *priv = gpu->dev->dev_private;
241 gpu->hangcheck_fence = fence;
242 queue_work(priv->wq, &gpu->recover_work);
243 }
244
245 /* if still more pending work, reset the hangcheck timer: */
246 if (gpu->submitted_fence > gpu->hangcheck_fence)
247 hangcheck_timer_reset(gpu);
248}
249
250/*
206 * Cmdstream submission/retirement: 251 * Cmdstream submission/retirement:
207 */ 252 */
208 253
@@ -254,6 +299,8 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
254 299
255 submit->fence = ++priv->next_fence; 300 submit->fence = ++priv->next_fence;
256 301
302 gpu->submitted_fence = submit->fence;
303
257 ret = gpu->funcs->submit(gpu, submit, ctx); 304 ret = gpu->funcs->submit(gpu, submit, ctx);
258 priv->lastctx = ctx; 305 priv->lastctx = ctx;
259 306
@@ -276,6 +323,7 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
276 323
277 msm_gem_move_to_active(&msm_obj->base, gpu, submit->fence); 324 msm_gem_move_to_active(&msm_obj->base, gpu, submit->fence);
278 } 325 }
326 hangcheck_timer_reset(gpu);
279 mutex_unlock(&dev->struct_mutex); 327 mutex_unlock(&dev->struct_mutex);
280 328
281 return ret; 329 return ret;
@@ -307,6 +355,10 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
307 355
308 INIT_LIST_HEAD(&gpu->active_list); 356 INIT_LIST_HEAD(&gpu->active_list);
309 INIT_WORK(&gpu->retire_work, retire_worker); 357 INIT_WORK(&gpu->retire_work, retire_worker);
358 INIT_WORK(&gpu->recover_work, recover_worker);
359
360 setup_timer(&gpu->hangcheck_timer, hangcheck_handler,
361 (unsigned long)gpu);
310 362
311 BUG_ON(ARRAY_SIZE(clk_names) != ARRAY_SIZE(gpu->grp_clks)); 363 BUG_ON(ARRAY_SIZE(clk_names) != ARRAY_SIZE(gpu->grp_clks));
312 364
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 8d2cd6c2226b..8cd829e520bb 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -51,6 +51,7 @@ struct msm_gpu_funcs {
51 void (*idle)(struct msm_gpu *gpu); 51 void (*idle)(struct msm_gpu *gpu);
52 irqreturn_t (*irq)(struct msm_gpu *irq); 52 irqreturn_t (*irq)(struct msm_gpu *irq);
53 uint32_t (*last_fence)(struct msm_gpu *gpu); 53 uint32_t (*last_fence)(struct msm_gpu *gpu);
54 void (*recover)(struct msm_gpu *gpu);
54 void (*destroy)(struct msm_gpu *gpu); 55 void (*destroy)(struct msm_gpu *gpu);
55#ifdef CONFIG_DEBUG_FS 56#ifdef CONFIG_DEBUG_FS
56 /* show GPU status in debugfs: */ 57 /* show GPU status in debugfs: */
@@ -69,6 +70,8 @@ struct msm_gpu {
69 /* list of GEM active objects: */ 70 /* list of GEM active objects: */
70 struct list_head active_list; 71 struct list_head active_list;
71 72
73 uint32_t submitted_fence;
74
72 /* worker for handling active-list retiring: */ 75 /* worker for handling active-list retiring: */
73 struct work_struct retire_work; 76 struct work_struct retire_work;
74 77
@@ -83,6 +86,13 @@ struct msm_gpu {
83 struct clk *ebi1_clk, *grp_clks[5]; 86 struct clk *ebi1_clk, *grp_clks[5];
84 uint32_t fast_rate, slow_rate, bus_freq; 87 uint32_t fast_rate, slow_rate, bus_freq;
85 uint32_t bsc; 88 uint32_t bsc;
89
90 /* Hang Detction: */
91#define DRM_MSM_HANGCHECK_PERIOD 500 /* in ms */
92#define DRM_MSM_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_MSM_HANGCHECK_PERIOD)
93 struct timer_list hangcheck_timer;
94 uint32_t hangcheck_fence;
95 struct work_struct recover_work;
86}; 96};
87 97
88static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data) 98static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data)