diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2010-02-18 05:24:56 -0500 |
---|---|---|
committer | Eric Anholt <eric@anholt.net> | 2010-02-22 12:01:39 -0500 |
commit | 9df30794f609d9412f14cfd0eb7b45dd64d0b14e (patch) | |
tree | 17e2658af3fe1af83c6a89ca13c3c93752bdfd13 /drivers/gpu/drm/i915/i915_irq.c | |
parent | 7b9c5abee98c54f85bcc04bd4d7ec8d5094c73f4 (diff) |
drm/i915: Record batch buffer following GPU error
In order to improve our diagnostic capabilities following a GPU hang
and subsequent reset, we need to record the batch buffer that triggered
the error. We assume that the current batch buffer, plus a few details
about what else is on the active list, will be sufficient -- at the very
least an improvement over nothing.
The extra information is stored in /debug/dri/.../i915_error_state
following an error, and may be decoded using
intel_gpu_tools/tools/intel_error_decode.
v2: Avoid excessive work under spinlocks.
v3: Include ringbuffer for later analysis.
v4: Use kunmap correctly and record more buffer state.
v5: Search ringbuffer for current batch buffer
v6: Use a work fn for the impossible IRQ error case.
v7: Avoid non-atomic paths whilst in IRQ context.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Eric Anholt <eric@anholt.net>
Diffstat (limited to 'drivers/gpu/drm/i915/i915_irq.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_irq.c | 224 |
1 files changed, 217 insertions, 7 deletions
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 1a56ae7b5a78..ba1d8314c1ce 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c | |||
@@ -432,6 +432,121 @@ static void i915_error_work_func(struct work_struct *work) | |||
432 | } | 432 | } |
433 | } | 433 | } |
434 | 434 | ||
435 | static struct drm_i915_error_object * | ||
436 | i915_error_object_create(struct drm_device *dev, | ||
437 | struct drm_gem_object *src) | ||
438 | { | ||
439 | struct drm_i915_error_object *dst; | ||
440 | struct drm_i915_gem_object *src_priv; | ||
441 | int page, page_count; | ||
442 | |||
443 | if (src == NULL) | ||
444 | return NULL; | ||
445 | |||
446 | src_priv = src->driver_private; | ||
447 | if (src_priv->pages == NULL) | ||
448 | return NULL; | ||
449 | |||
450 | page_count = src->size / PAGE_SIZE; | ||
451 | |||
452 | dst = kmalloc(sizeof(*dst) + page_count * sizeof (u32 *), GFP_ATOMIC); | ||
453 | if (dst == NULL) | ||
454 | return NULL; | ||
455 | |||
456 | for (page = 0; page < page_count; page++) { | ||
457 | void *s, *d = kmalloc(PAGE_SIZE, GFP_ATOMIC); | ||
458 | if (d == NULL) | ||
459 | goto unwind; | ||
460 | s = kmap_atomic(src_priv->pages[page], KM_USER0); | ||
461 | memcpy(d, s, PAGE_SIZE); | ||
462 | kunmap_atomic(s, KM_USER0); | ||
463 | dst->pages[page] = d; | ||
464 | } | ||
465 | dst->page_count = page_count; | ||
466 | dst->gtt_offset = src_priv->gtt_offset; | ||
467 | |||
468 | return dst; | ||
469 | |||
470 | unwind: | ||
471 | while (page--) | ||
472 | kfree(dst->pages[page]); | ||
473 | kfree(dst); | ||
474 | return NULL; | ||
475 | } | ||
476 | |||
477 | static void | ||
478 | i915_error_object_free(struct drm_i915_error_object *obj) | ||
479 | { | ||
480 | int page; | ||
481 | |||
482 | if (obj == NULL) | ||
483 | return; | ||
484 | |||
485 | for (page = 0; page < obj->page_count; page++) | ||
486 | kfree(obj->pages[page]); | ||
487 | |||
488 | kfree(obj); | ||
489 | } | ||
490 | |||
491 | static void | ||
492 | i915_error_state_free(struct drm_device *dev, | ||
493 | struct drm_i915_error_state *error) | ||
494 | { | ||
495 | i915_error_object_free(error->batchbuffer[0]); | ||
496 | i915_error_object_free(error->batchbuffer[1]); | ||
497 | i915_error_object_free(error->ringbuffer); | ||
498 | kfree(error->active_bo); | ||
499 | kfree(error); | ||
500 | } | ||
501 | |||
502 | static u32 | ||
503 | i915_get_bbaddr(struct drm_device *dev, u32 *ring) | ||
504 | { | ||
505 | u32 cmd; | ||
506 | |||
507 | if (IS_I830(dev) || IS_845G(dev)) | ||
508 | cmd = MI_BATCH_BUFFER; | ||
509 | else if (IS_I965G(dev)) | ||
510 | cmd = (MI_BATCH_BUFFER_START | (2 << 6) | | ||
511 | MI_BATCH_NON_SECURE_I965); | ||
512 | else | ||
513 | cmd = (MI_BATCH_BUFFER_START | (2 << 6)); | ||
514 | |||
515 | return ring[0] == cmd ? ring[1] : 0; | ||
516 | } | ||
517 | |||
518 | static u32 | ||
519 | i915_ringbuffer_last_batch(struct drm_device *dev) | ||
520 | { | ||
521 | struct drm_i915_private *dev_priv = dev->dev_private; | ||
522 | u32 head, bbaddr; | ||
523 | u32 *ring; | ||
524 | |||
525 | /* Locate the current position in the ringbuffer and walk back | ||
526 | * to find the most recently dispatched batch buffer. | ||
527 | */ | ||
528 | bbaddr = 0; | ||
529 | head = I915_READ(PRB0_HEAD) & HEAD_ADDR; | ||
530 | ring = (u32 *)(dev_priv->ring.virtual_start + head); | ||
531 | |||
532 | while (--ring >= (u32 *)dev_priv->ring.virtual_start) { | ||
533 | bbaddr = i915_get_bbaddr(dev, ring); | ||
534 | if (bbaddr) | ||
535 | break; | ||
536 | } | ||
537 | |||
538 | if (bbaddr == 0) { | ||
539 | ring = (u32 *)(dev_priv->ring.virtual_start + dev_priv->ring.Size); | ||
540 | while (--ring >= (u32 *)dev_priv->ring.virtual_start) { | ||
541 | bbaddr = i915_get_bbaddr(dev, ring); | ||
542 | if (bbaddr) | ||
543 | break; | ||
544 | } | ||
545 | } | ||
546 | |||
547 | return bbaddr; | ||
548 | } | ||
549 | |||
435 | /** | 550 | /** |
436 | * i915_capture_error_state - capture an error record for later analysis | 551 | * i915_capture_error_state - capture an error record for later analysis |
437 | * @dev: drm device | 552 | * @dev: drm device |
@@ -444,19 +559,26 @@ static void i915_error_work_func(struct work_struct *work) | |||
444 | static void i915_capture_error_state(struct drm_device *dev) | 559 | static void i915_capture_error_state(struct drm_device *dev) |
445 | { | 560 | { |
446 | struct drm_i915_private *dev_priv = dev->dev_private; | 561 | struct drm_i915_private *dev_priv = dev->dev_private; |
562 | struct drm_i915_gem_object *obj_priv; | ||
447 | struct drm_i915_error_state *error; | 563 | struct drm_i915_error_state *error; |
564 | struct drm_gem_object *batchbuffer[2]; | ||
448 | unsigned long flags; | 565 | unsigned long flags; |
566 | u32 bbaddr; | ||
567 | int count; | ||
449 | 568 | ||
450 | spin_lock_irqsave(&dev_priv->error_lock, flags); | 569 | spin_lock_irqsave(&dev_priv->error_lock, flags); |
451 | if (dev_priv->first_error) | 570 | error = dev_priv->first_error; |
452 | goto out; | 571 | spin_unlock_irqrestore(&dev_priv->error_lock, flags); |
572 | if (error) | ||
573 | return; | ||
453 | 574 | ||
454 | error = kmalloc(sizeof(*error), GFP_ATOMIC); | 575 | error = kmalloc(sizeof(*error), GFP_ATOMIC); |
455 | if (!error) { | 576 | if (!error) { |
456 | DRM_DEBUG_DRIVER("out ot memory, not capturing error state\n"); | 577 | DRM_DEBUG_DRIVER("out of memory, not capturing error state\n"); |
457 | goto out; | 578 | return; |
458 | } | 579 | } |
459 | 580 | ||
581 | error->seqno = i915_get_gem_seqno(dev); | ||
460 | error->eir = I915_READ(EIR); | 582 | error->eir = I915_READ(EIR); |
461 | error->pgtbl_er = I915_READ(PGTBL_ER); | 583 | error->pgtbl_er = I915_READ(PGTBL_ER); |
462 | error->pipeastat = I915_READ(PIPEASTAT); | 584 | error->pipeastat = I915_READ(PIPEASTAT); |
@@ -467,6 +589,7 @@ static void i915_capture_error_state(struct drm_device *dev) | |||
467 | error->ipehr = I915_READ(IPEHR); | 589 | error->ipehr = I915_READ(IPEHR); |
468 | error->instdone = I915_READ(INSTDONE); | 590 | error->instdone = I915_READ(INSTDONE); |
469 | error->acthd = I915_READ(ACTHD); | 591 | error->acthd = I915_READ(ACTHD); |
592 | error->bbaddr = 0; | ||
470 | } else { | 593 | } else { |
471 | error->ipeir = I915_READ(IPEIR_I965); | 594 | error->ipeir = I915_READ(IPEIR_I965); |
472 | error->ipehr = I915_READ(IPEHR_I965); | 595 | error->ipehr = I915_READ(IPEHR_I965); |
@@ -474,14 +597,101 @@ static void i915_capture_error_state(struct drm_device *dev) | |||
474 | error->instps = I915_READ(INSTPS); | 597 | error->instps = I915_READ(INSTPS); |
475 | error->instdone1 = I915_READ(INSTDONE1); | 598 | error->instdone1 = I915_READ(INSTDONE1); |
476 | error->acthd = I915_READ(ACTHD_I965); | 599 | error->acthd = I915_READ(ACTHD_I965); |
600 | error->bbaddr = I915_READ64(BB_ADDR); | ||
477 | } | 601 | } |
478 | 602 | ||
479 | do_gettimeofday(&error->time); | 603 | bbaddr = i915_ringbuffer_last_batch(dev); |
604 | |||
605 | /* Grab the current batchbuffer, most likely to have crashed. */ | ||
606 | batchbuffer[0] = NULL; | ||
607 | batchbuffer[1] = NULL; | ||
608 | count = 0; | ||
609 | list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list) { | ||
610 | struct drm_gem_object *obj = obj_priv->obj; | ||
611 | |||
612 | if (batchbuffer[0] == NULL && | ||
613 | bbaddr >= obj_priv->gtt_offset && | ||
614 | bbaddr < obj_priv->gtt_offset + obj->size) | ||
615 | batchbuffer[0] = obj; | ||
616 | |||
617 | if (batchbuffer[1] == NULL && | ||
618 | error->acthd >= obj_priv->gtt_offset && | ||
619 | error->acthd < obj_priv->gtt_offset + obj->size && | ||
620 | batchbuffer[0] != obj) | ||
621 | batchbuffer[1] = obj; | ||
622 | |||
623 | count++; | ||
624 | } | ||
480 | 625 | ||
481 | dev_priv->first_error = error; | 626 | /* We need to copy these to an anonymous buffer as the simplest |
627 | * method to avoid being overwritten by userpace. | ||
628 | */ | ||
629 | error->batchbuffer[0] = i915_error_object_create(dev, batchbuffer[0]); | ||
630 | error->batchbuffer[1] = i915_error_object_create(dev, batchbuffer[1]); | ||
631 | |||
632 | /* Record the ringbuffer */ | ||
633 | error->ringbuffer = i915_error_object_create(dev, dev_priv->ring.ring_obj); | ||
634 | |||
635 | /* Record buffers on the active list. */ | ||
636 | error->active_bo = NULL; | ||
637 | error->active_bo_count = 0; | ||
638 | |||
639 | if (count) | ||
640 | error->active_bo = kmalloc(sizeof(*error->active_bo)*count, | ||
641 | GFP_ATOMIC); | ||
642 | |||
643 | if (error->active_bo) { | ||
644 | int i = 0; | ||
645 | list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list) { | ||
646 | struct drm_gem_object *obj = obj_priv->obj; | ||
647 | |||
648 | error->active_bo[i].size = obj->size; | ||
649 | error->active_bo[i].name = obj->name; | ||
650 | error->active_bo[i].seqno = obj_priv->last_rendering_seqno; | ||
651 | error->active_bo[i].gtt_offset = obj_priv->gtt_offset; | ||
652 | error->active_bo[i].read_domains = obj->read_domains; | ||
653 | error->active_bo[i].write_domain = obj->write_domain; | ||
654 | error->active_bo[i].fence_reg = obj_priv->fence_reg; | ||
655 | error->active_bo[i].pinned = 0; | ||
656 | if (obj_priv->pin_count > 0) | ||
657 | error->active_bo[i].pinned = 1; | ||
658 | if (obj_priv->user_pin_count > 0) | ||
659 | error->active_bo[i].pinned = -1; | ||
660 | error->active_bo[i].tiling = obj_priv->tiling_mode; | ||
661 | error->active_bo[i].dirty = obj_priv->dirty; | ||
662 | error->active_bo[i].purgeable = obj_priv->madv != I915_MADV_WILLNEED; | ||
663 | |||
664 | if (++i == count) | ||
665 | break; | ||
666 | } | ||
667 | error->active_bo_count = i; | ||
668 | } | ||
669 | |||
670 | do_gettimeofday(&error->time); | ||
482 | 671 | ||
483 | out: | 672 | spin_lock_irqsave(&dev_priv->error_lock, flags); |
673 | if (dev_priv->first_error == NULL) { | ||
674 | dev_priv->first_error = error; | ||
675 | error = NULL; | ||
676 | } | ||
484 | spin_unlock_irqrestore(&dev_priv->error_lock, flags); | 677 | spin_unlock_irqrestore(&dev_priv->error_lock, flags); |
678 | |||
679 | if (error) | ||
680 | i915_error_state_free(dev, error); | ||
681 | } | ||
682 | |||
683 | void i915_destroy_error_state(struct drm_device *dev) | ||
684 | { | ||
685 | struct drm_i915_private *dev_priv = dev->dev_private; | ||
686 | struct drm_i915_error_state *error; | ||
687 | |||
688 | spin_lock(&dev_priv->error_lock); | ||
689 | error = dev_priv->first_error; | ||
690 | dev_priv->first_error = NULL; | ||
691 | spin_unlock(&dev_priv->error_lock); | ||
692 | |||
693 | if (error) | ||
694 | i915_error_state_free(dev, error); | ||
485 | } | 695 | } |
486 | 696 | ||
487 | /** | 697 | /** |