diff options
author | Jesse Barnes <jbarnes@virtuousgeek.org> | 2009-07-11 16:48:03 -0400 |
---|---|---|
committer | Eric Anholt <eric@anholt.net> | 2009-07-13 18:33:37 -0400 |
commit | 8a90523639f49dc4b4fa7ae47bb9c8ed73ea8577 (patch) | |
tree | 6b7142b32b16c2f87e38420568a39f92968692b3 /drivers | |
parent | 832cc28d5bc676331e6376d940ae45d5937aa688 (diff) |
drm/i915: refactor error detection & collection
This patch refactors the existing error detection and collection code,
placing most of it in i915_handle_error(). Additionally, we introduce a
work queue for scheduling post-crash tasks such as generating a uevent.
Using the uevent facility, userspace should be able to capture a
post-mortem dump for diagnostics.
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Ben Gamari <bgamari.foss@gmail.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/drm/i915/i915_drv.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_debugfs.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_irq.c | 232 |
3 files changed, 161 insertions, 74 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d08752875885..b05b44dd3bf6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h | |||
@@ -229,6 +229,7 @@ typedef struct drm_i915_private { | |||
229 | 229 | ||
230 | spinlock_t error_lock; | 230 | spinlock_t error_lock; |
231 | struct drm_i915_error_state *first_error; | 231 | struct drm_i915_error_state *first_error; |
232 | struct work_struct error_work; | ||
232 | 233 | ||
233 | /* Register state */ | 234 | /* Register state */ |
234 | u8 saveLBB; | 235 | u8 saveLBB; |
diff --git a/drivers/gpu/drm/i915/i915_gem_debugfs.c b/drivers/gpu/drm/i915/i915_gem_debugfs.c index 9a44bfcb8139..cb3b97405fbf 100644 --- a/drivers/gpu/drm/i915/i915_gem_debugfs.c +++ b/drivers/gpu/drm/i915/i915_gem_debugfs.c | |||
@@ -343,6 +343,8 @@ static int i915_error_state(struct seq_file *m, void *unused) | |||
343 | 343 | ||
344 | error = dev_priv->first_error; | 344 | error = dev_priv->first_error; |
345 | 345 | ||
346 | seq_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec, | ||
347 | error->time.tv_usec); | ||
346 | seq_printf(m, "EIR: 0x%08x\n", error->eir); | 348 | seq_printf(m, "EIR: 0x%08x\n", error->eir); |
347 | seq_printf(m, " PGTBL_ER: 0x%08x\n", error->pgtbl_er); | 349 | seq_printf(m, " PGTBL_ER: 0x%08x\n", error->pgtbl_er); |
348 | seq_printf(m, " INSTPM: 0x%08x\n", error->instpm); | 350 | seq_printf(m, " INSTPM: 0x%08x\n", error->instpm); |
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7ba23a69a0c0..f340b3fd54e6 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c | |||
@@ -290,6 +290,35 @@ irqreturn_t igdng_irq_handler(struct drm_device *dev) | |||
290 | return ret; | 290 | return ret; |
291 | } | 291 | } |
292 | 292 | ||
293 | /** | ||
294 | * i915_error_work_func - do process context error handling work | ||
295 | * @work: work struct | ||
296 | * | ||
297 | * Fire an error uevent so userspace can see that a hang or error | ||
298 | * was detected. | ||
299 | */ | ||
300 | static void i915_error_work_func(struct work_struct *work) | ||
301 | { | ||
302 | drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t, | ||
303 | error_work); | ||
304 | struct drm_device *dev = dev_priv->dev; | ||
305 | char *event_string = "ERROR=1"; | ||
306 | char *envp[] = { event_string, NULL }; | ||
307 | |||
308 | DRM_DEBUG("generating error event\n"); | ||
309 | |||
310 | kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, envp); | ||
311 | } | ||
312 | |||
313 | /** | ||
314 | * i915_capture_error_state - capture an error record for later analysis | ||
315 | * @dev: drm device | ||
316 | * | ||
317 | * Should be called when an error is detected (either a hang or an error | ||
318 | * interrupt) to capture error state from the time of the error. Fills | ||
319 | * out a structure which becomes available in debugfs for user level tools | ||
320 | * to pick up. | ||
321 | */ | ||
293 | static void i915_capture_error_state(struct drm_device *dev) | 322 | static void i915_capture_error_state(struct drm_device *dev) |
294 | { | 323 | { |
295 | struct drm_i915_private *dev_priv = dev->dev_private; | 324 | struct drm_i915_private *dev_priv = dev->dev_private; |
@@ -325,12 +354,137 @@ static void i915_capture_error_state(struct drm_device *dev) | |||
325 | error->acthd = I915_READ(ACTHD_I965); | 354 | error->acthd = I915_READ(ACTHD_I965); |
326 | } | 355 | } |
327 | 356 | ||
357 | do_gettimeofday(&error->time); | ||
358 | |||
328 | dev_priv->first_error = error; | 359 | dev_priv->first_error = error; |
329 | 360 | ||
330 | out: | 361 | out: |
331 | spin_unlock_irqrestore(&dev_priv->error_lock, flags); | 362 | spin_unlock_irqrestore(&dev_priv->error_lock, flags); |
332 | } | 363 | } |
333 | 364 | ||
365 | /** | ||
366 | * i915_handle_error - handle an error interrupt | ||
367 | * @dev: drm device | ||
368 | * | ||
369 | * Do some basic checking of regsiter state at error interrupt time and | ||
370 | * dump it to the syslog. Also call i915_capture_error_state() to make | ||
371 | * sure we get a record and make it available in debugfs. Fire a uevent | ||
372 | * so userspace knows something bad happened (should trigger collection | ||
373 | * of a ring dump etc.). | ||
374 | */ | ||
375 | static void i915_handle_error(struct drm_device *dev) | ||
376 | { | ||
377 | struct drm_i915_private *dev_priv = dev->dev_private; | ||
378 | u32 eir = I915_READ(EIR); | ||
379 | u32 pipea_stats = I915_READ(PIPEASTAT); | ||
380 | u32 pipeb_stats = I915_READ(PIPEBSTAT); | ||
381 | |||
382 | i915_capture_error_state(dev); | ||
383 | |||
384 | printk(KERN_ERR "render error detected, EIR: 0x%08x\n", | ||
385 | eir); | ||
386 | |||
387 | if (IS_G4X(dev)) { | ||
388 | if (eir & (GM45_ERROR_MEM_PRIV | GM45_ERROR_CP_PRIV)) { | ||
389 | u32 ipeir = I915_READ(IPEIR_I965); | ||
390 | |||
391 | printk(KERN_ERR " IPEIR: 0x%08x\n", | ||
392 | I915_READ(IPEIR_I965)); | ||
393 | printk(KERN_ERR " IPEHR: 0x%08x\n", | ||
394 | I915_READ(IPEHR_I965)); | ||
395 | printk(KERN_ERR " INSTDONE: 0x%08x\n", | ||
396 | I915_READ(INSTDONE_I965)); | ||
397 | printk(KERN_ERR " INSTPS: 0x%08x\n", | ||
398 | I915_READ(INSTPS)); | ||
399 | printk(KERN_ERR " INSTDONE1: 0x%08x\n", | ||
400 | I915_READ(INSTDONE1)); | ||
401 | printk(KERN_ERR " ACTHD: 0x%08x\n", | ||
402 | I915_READ(ACTHD_I965)); | ||
403 | I915_WRITE(IPEIR_I965, ipeir); | ||
404 | (void)I915_READ(IPEIR_I965); | ||
405 | } | ||
406 | if (eir & GM45_ERROR_PAGE_TABLE) { | ||
407 | u32 pgtbl_err = I915_READ(PGTBL_ER); | ||
408 | printk(KERN_ERR "page table error\n"); | ||
409 | printk(KERN_ERR " PGTBL_ER: 0x%08x\n", | ||
410 | pgtbl_err); | ||
411 | I915_WRITE(PGTBL_ER, pgtbl_err); | ||
412 | (void)I915_READ(PGTBL_ER); | ||
413 | } | ||
414 | } | ||
415 | |||
416 | if (IS_I9XX(dev)) { | ||
417 | if (eir & I915_ERROR_PAGE_TABLE) { | ||
418 | u32 pgtbl_err = I915_READ(PGTBL_ER); | ||
419 | printk(KERN_ERR "page table error\n"); | ||
420 | printk(KERN_ERR " PGTBL_ER: 0x%08x\n", | ||
421 | pgtbl_err); | ||
422 | I915_WRITE(PGTBL_ER, pgtbl_err); | ||
423 | (void)I915_READ(PGTBL_ER); | ||
424 | } | ||
425 | } | ||
426 | |||
427 | if (eir & I915_ERROR_MEMORY_REFRESH) { | ||
428 | printk(KERN_ERR "memory refresh error\n"); | ||
429 | printk(KERN_ERR "PIPEASTAT: 0x%08x\n", | ||
430 | pipea_stats); | ||
431 | printk(KERN_ERR "PIPEBSTAT: 0x%08x\n", | ||
432 | pipeb_stats); | ||
433 | /* pipestat has already been acked */ | ||
434 | } | ||
435 | if (eir & I915_ERROR_INSTRUCTION) { | ||
436 | printk(KERN_ERR "instruction error\n"); | ||
437 | printk(KERN_ERR " INSTPM: 0x%08x\n", | ||
438 | I915_READ(INSTPM)); | ||
439 | if (!IS_I965G(dev)) { | ||
440 | u32 ipeir = I915_READ(IPEIR); | ||
441 | |||
442 | printk(KERN_ERR " IPEIR: 0x%08x\n", | ||
443 | I915_READ(IPEIR)); | ||
444 | printk(KERN_ERR " IPEHR: 0x%08x\n", | ||
445 | I915_READ(IPEHR)); | ||
446 | printk(KERN_ERR " INSTDONE: 0x%08x\n", | ||
447 | I915_READ(INSTDONE)); | ||
448 | printk(KERN_ERR " ACTHD: 0x%08x\n", | ||
449 | I915_READ(ACTHD)); | ||
450 | I915_WRITE(IPEIR, ipeir); | ||
451 | (void)I915_READ(IPEIR); | ||
452 | } else { | ||
453 | u32 ipeir = I915_READ(IPEIR_I965); | ||
454 | |||
455 | printk(KERN_ERR " IPEIR: 0x%08x\n", | ||
456 | I915_READ(IPEIR_I965)); | ||
457 | printk(KERN_ERR " IPEHR: 0x%08x\n", | ||
458 | I915_READ(IPEHR_I965)); | ||
459 | printk(KERN_ERR " INSTDONE: 0x%08x\n", | ||
460 | I915_READ(INSTDONE_I965)); | ||
461 | printk(KERN_ERR " INSTPS: 0x%08x\n", | ||
462 | I915_READ(INSTPS)); | ||
463 | printk(KERN_ERR " INSTDONE1: 0x%08x\n", | ||
464 | I915_READ(INSTDONE1)); | ||
465 | printk(KERN_ERR " ACTHD: 0x%08x\n", | ||
466 | I915_READ(ACTHD_I965)); | ||
467 | I915_WRITE(IPEIR_I965, ipeir); | ||
468 | (void)I915_READ(IPEIR_I965); | ||
469 | } | ||
470 | } | ||
471 | |||
472 | I915_WRITE(EIR, eir); | ||
473 | (void)I915_READ(EIR); | ||
474 | eir = I915_READ(EIR); | ||
475 | if (eir) { | ||
476 | /* | ||
477 | * some errors might have become stuck, | ||
478 | * mask them. | ||
479 | */ | ||
480 | DRM_ERROR("EIR stuck: 0x%08x, masking\n", eir); | ||
481 | I915_WRITE(EMR, I915_READ(EMR) | eir); | ||
482 | I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT); | ||
483 | } | ||
484 | |||
485 | schedule_work(&dev_priv->error_work); | ||
486 | } | ||
487 | |||
334 | irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) | 488 | irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) |
335 | { | 489 | { |
336 | struct drm_device *dev = (struct drm_device *) arg; | 490 | struct drm_device *dev = (struct drm_device *) arg; |
@@ -372,6 +526,9 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) | |||
372 | pipea_stats = I915_READ(PIPEASTAT); | 526 | pipea_stats = I915_READ(PIPEASTAT); |
373 | pipeb_stats = I915_READ(PIPEBSTAT); | 527 | pipeb_stats = I915_READ(PIPEBSTAT); |
374 | 528 | ||
529 | if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) | ||
530 | i915_handle_error(dev); | ||
531 | |||
375 | /* | 532 | /* |
376 | * Clear the PIPE(A|B)STAT regs before the IIR | 533 | * Clear the PIPE(A|B)STAT regs before the IIR |
377 | */ | 534 | */ |
@@ -409,80 +566,6 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) | |||
409 | I915_READ(PORT_HOTPLUG_STAT); | 566 | I915_READ(PORT_HOTPLUG_STAT); |
410 | } | 567 | } |
411 | 568 | ||
412 | if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) { | ||
413 | u32 eir = I915_READ(EIR); | ||
414 | |||
415 | i915_capture_error_state(dev); | ||
416 | |||
417 | printk(KERN_ERR "render error detected, EIR: 0x%08x\n", | ||
418 | eir); | ||
419 | if (eir & I915_ERROR_PAGE_TABLE) { | ||
420 | u32 pgtbl_err = I915_READ(PGTBL_ER); | ||
421 | printk(KERN_ERR "page table error\n"); | ||
422 | printk(KERN_ERR " PGTBL_ER: 0x%08x\n", | ||
423 | pgtbl_err); | ||
424 | I915_WRITE(PGTBL_ER, pgtbl_err); | ||
425 | (void)I915_READ(PGTBL_ER); | ||
426 | } | ||
427 | if (eir & I915_ERROR_MEMORY_REFRESH) { | ||
428 | printk(KERN_ERR "memory refresh error\n"); | ||
429 | printk(KERN_ERR "PIPEASTAT: 0x%08x\n", | ||
430 | pipea_stats); | ||
431 | printk(KERN_ERR "PIPEBSTAT: 0x%08x\n", | ||
432 | pipeb_stats); | ||
433 | /* pipestat has already been acked */ | ||
434 | } | ||
435 | if (eir & I915_ERROR_INSTRUCTION) { | ||
436 | printk(KERN_ERR "instruction error\n"); | ||
437 | printk(KERN_ERR " INSTPM: 0x%08x\n", | ||
438 | I915_READ(INSTPM)); | ||
439 | if (!IS_I965G(dev)) { | ||
440 | u32 ipeir = I915_READ(IPEIR); | ||
441 | |||
442 | printk(KERN_ERR " IPEIR: 0x%08x\n", | ||
443 | I915_READ(IPEIR)); | ||
444 | printk(KERN_ERR " IPEHR: 0x%08x\n", | ||
445 | I915_READ(IPEHR)); | ||
446 | printk(KERN_ERR " INSTDONE: 0x%08x\n", | ||
447 | I915_READ(INSTDONE)); | ||
448 | printk(KERN_ERR " ACTHD: 0x%08x\n", | ||
449 | I915_READ(ACTHD)); | ||
450 | I915_WRITE(IPEIR, ipeir); | ||
451 | (void)I915_READ(IPEIR); | ||
452 | } else { | ||
453 | u32 ipeir = I915_READ(IPEIR_I965); | ||
454 | |||
455 | printk(KERN_ERR " IPEIR: 0x%08x\n", | ||
456 | I915_READ(IPEIR_I965)); | ||
457 | printk(KERN_ERR " IPEHR: 0x%08x\n", | ||
458 | I915_READ(IPEHR_I965)); | ||
459 | printk(KERN_ERR " INSTDONE: 0x%08x\n", | ||
460 | I915_READ(INSTDONE_I965)); | ||
461 | printk(KERN_ERR " INSTPS: 0x%08x\n", | ||
462 | I915_READ(INSTPS)); | ||
463 | printk(KERN_ERR " INSTDONE1: 0x%08x\n", | ||
464 | I915_READ(INSTDONE1)); | ||
465 | printk(KERN_ERR " ACTHD: 0x%08x\n", | ||
466 | I915_READ(ACTHD_I965)); | ||
467 | I915_WRITE(IPEIR_I965, ipeir); | ||
468 | (void)I915_READ(IPEIR_I965); | ||
469 | } | ||
470 | } | ||
471 | |||
472 | I915_WRITE(EIR, eir); | ||
473 | (void)I915_READ(EIR); | ||
474 | eir = I915_READ(EIR); | ||
475 | if (eir) { | ||
476 | /* | ||
477 | * some errors might have become stuck, | ||
478 | * mask them. | ||
479 | */ | ||
480 | DRM_ERROR("EIR stuck: 0x%08x, masking\n", eir); | ||
481 | I915_WRITE(EMR, I915_READ(EMR) | eir); | ||
482 | I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT); | ||
483 | } | ||
484 | } | ||
485 | |||
486 | I915_WRITE(IIR, iir); | 569 | I915_WRITE(IIR, iir); |
487 | new_iir = I915_READ(IIR); /* Flush posted writes */ | 570 | new_iir = I915_READ(IIR); /* Flush posted writes */ |
488 | 571 | ||
@@ -830,6 +913,7 @@ void i915_driver_irq_preinstall(struct drm_device * dev) | |||
830 | atomic_set(&dev_priv->irq_received, 0); | 913 | atomic_set(&dev_priv->irq_received, 0); |
831 | 914 | ||
832 | INIT_WORK(&dev_priv->hotplug_work, i915_hotplug_work_func); | 915 | INIT_WORK(&dev_priv->hotplug_work, i915_hotplug_work_func); |
916 | INIT_WORK(&dev_priv->error_work, i915_error_work_func); | ||
833 | 917 | ||
834 | if (IS_IGDNG(dev)) { | 918 | if (IS_IGDNG(dev)) { |
835 | igdng_irq_preinstall(dev); | 919 | igdng_irq_preinstall(dev); |