aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2010-02-18 05:24:56 -0500
committerEric Anholt <eric@anholt.net>2010-02-22 12:01:39 -0500
commit9df30794f609d9412f14cfd0eb7b45dd64d0b14e (patch)
tree17e2658af3fe1af83c6a89ca13c3c93752bdfd13 /drivers/gpu/drm
parent7b9c5abee98c54f85bcc04bd4d7ec8d5094c73f4 (diff)
drm/i915: Record batch buffer following GPU error
In order to improve our diagnostic capabilities following a GPU hang and subsequent reset, we need to record the batch buffer that triggered the error. We assume that the current batch buffer, plus a few details about what else is on the active list, will be sufficient -- at the very least an improvement over nothing. The extra information is stored in /debug/dri/.../i915_error_state following an error, and may be decoded using intel_gpu_tools/tools/intel_error_decode. v2: Avoid excessive work under spinlocks. v3: Include ringbuffer for later analysis. v4: Use kunmap correctly and record more buffer state. v5: Search ringbuffer for current batch buffer v6: Use a work fn for the impossible IRQ error case. v7: Avoid non-atomic paths whilst in IRQ context. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Eric Anholt <eric@anholt.net>
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c85
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c2
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h21
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c224
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h1
5 files changed, 326 insertions, 7 deletions
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 56095b3d28ce..5eed46312442 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -350,6 +350,36 @@ static int i915_ringbuffer_info(struct seq_file *m, void *data)
350 return 0; 350 return 0;
351} 351}
352 352
353static const char *pin_flag(int pinned)
354{
355 if (pinned > 0)
356 return " P";
357 else if (pinned < 0)
358 return " p";
359 else
360 return "";
361}
362
363static const char *tiling_flag(int tiling)
364{
365 switch (tiling) {
366 default:
367 case I915_TILING_NONE: return "";
368 case I915_TILING_X: return " X";
369 case I915_TILING_Y: return " Y";
370 }
371}
372
373static const char *dirty_flag(int dirty)
374{
375 return dirty ? " dirty" : "";
376}
377
378static const char *purgeable_flag(int purgeable)
379{
380 return purgeable ? " purgeable" : "";
381}
382
353static int i915_error_state(struct seq_file *m, void *unused) 383static int i915_error_state(struct seq_file *m, void *unused)
354{ 384{
355 struct drm_info_node *node = (struct drm_info_node *) m->private; 385 struct drm_info_node *node = (struct drm_info_node *) m->private;
@@ -357,6 +387,7 @@ static int i915_error_state(struct seq_file *m, void *unused)
357 drm_i915_private_t *dev_priv = dev->dev_private; 387 drm_i915_private_t *dev_priv = dev->dev_private;
358 struct drm_i915_error_state *error; 388 struct drm_i915_error_state *error;
359 unsigned long flags; 389 unsigned long flags;
390 int i, page, offset, elt;
360 391
361 spin_lock_irqsave(&dev_priv->error_lock, flags); 392 spin_lock_irqsave(&dev_priv->error_lock, flags);
362 if (!dev_priv->first_error) { 393 if (!dev_priv->first_error) {
@@ -368,6 +399,7 @@ static int i915_error_state(struct seq_file *m, void *unused)
368 399
369 seq_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec, 400 seq_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec,
370 error->time.tv_usec); 401 error->time.tv_usec);
402 seq_printf(m, "PCI ID: 0x%04x\n", dev->pci_device);
371 seq_printf(m, "EIR: 0x%08x\n", error->eir); 403 seq_printf(m, "EIR: 0x%08x\n", error->eir);
372 seq_printf(m, " PGTBL_ER: 0x%08x\n", error->pgtbl_er); 404 seq_printf(m, " PGTBL_ER: 0x%08x\n", error->pgtbl_er);
373 seq_printf(m, " INSTPM: 0x%08x\n", error->instpm); 405 seq_printf(m, " INSTPM: 0x%08x\n", error->instpm);
@@ -379,6 +411,59 @@ static int i915_error_state(struct seq_file *m, void *unused)
379 seq_printf(m, " INSTPS: 0x%08x\n", error->instps); 411 seq_printf(m, " INSTPS: 0x%08x\n", error->instps);
380 seq_printf(m, " INSTDONE1: 0x%08x\n", error->instdone1); 412 seq_printf(m, " INSTDONE1: 0x%08x\n", error->instdone1);
381 } 413 }
414 seq_printf(m, "seqno: 0x%08x\n", error->seqno);
415
416 if (error->active_bo_count) {
417 seq_printf(m, "Buffers [%d]:\n", error->active_bo_count);
418
419 for (i = 0; i < error->active_bo_count; i++) {
420 seq_printf(m, " %08x %8zd %08x %08x %08x%s%s%s%s",
421 error->active_bo[i].gtt_offset,
422 error->active_bo[i].size,
423 error->active_bo[i].read_domains,
424 error->active_bo[i].write_domain,
425 error->active_bo[i].seqno,
426 pin_flag(error->active_bo[i].pinned),
427 tiling_flag(error->active_bo[i].tiling),
428 dirty_flag(error->active_bo[i].dirty),
429 purgeable_flag(error->active_bo[i].purgeable));
430
431 if (error->active_bo[i].name)
432 seq_printf(m, " (name: %d)", error->active_bo[i].name);
433 if (error->active_bo[i].fence_reg != I915_FENCE_REG_NONE)
434 seq_printf(m, " (fence: %d)", error->active_bo[i].fence_reg);
435
436 seq_printf(m, "\n");
437 }
438 }
439
440 for (i = 0; i < ARRAY_SIZE(error->batchbuffer); i++) {
441 if (error->batchbuffer[i]) {
442 struct drm_i915_error_object *obj = error->batchbuffer[i];
443
444 seq_printf(m, "--- gtt_offset = 0x%08x\n", obj->gtt_offset);
445 offset = 0;
446 for (page = 0; page < obj->page_count; page++) {
447 for (elt = 0; elt < PAGE_SIZE/4; elt++) {
448 seq_printf(m, "%08x : %08x\n", offset, obj->pages[page][elt]);
449 offset += 4;
450 }
451 }
452 }
453 }
454
455 if (error->ringbuffer) {
456 struct drm_i915_error_object *obj = error->ringbuffer;
457
458 seq_printf(m, "--- ringbuffer = 0x%08x\n", obj->gtt_offset);
459 offset = 0;
460 for (page = 0; page < obj->page_count; page++) {
461 for (elt = 0; elt < PAGE_SIZE/4; elt++) {
462 seq_printf(m, "%08x : %08x\n", offset, obj->pages[page][elt]);
463 offset += 4;
464 }
465 }
466 }
382 467
383out: 468out:
384 spin_unlock_irqrestore(&dev_priv->error_lock, flags); 469 spin_unlock_irqrestore(&dev_priv->error_lock, flags);
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 47805a41395e..dbfe07c90cbc 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1644,6 +1644,8 @@ int i915_driver_unload(struct drm_device *dev)
1644{ 1644{
1645 struct drm_i915_private *dev_priv = dev->dev_private; 1645 struct drm_i915_private *dev_priv = dev->dev_private;
1646 1646
1647 i915_destroy_error_state(dev);
1648
1647 destroy_workqueue(dev_priv->wq); 1649 destroy_workqueue(dev_priv->wq);
1648 del_timer_sync(&dev_priv->hangcheck_timer); 1650 del_timer_sync(&dev_priv->hangcheck_timer);
1649 1651
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 40b0da37b1f1..ec06d4865a5f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -150,7 +150,27 @@ struct drm_i915_error_state {
150 u32 instps; 150 u32 instps;
151 u32 instdone1; 151 u32 instdone1;
152 u32 seqno; 152 u32 seqno;
153 u64 bbaddr;
153 struct timeval time; 154 struct timeval time;
155 struct drm_i915_error_object {
156 int page_count;
157 u32 gtt_offset;
158 u32 *pages[0];
159 } *ringbuffer, *batchbuffer[2];
160 struct drm_i915_error_buffer {
161 size_t size;
162 u32 name;
163 u32 seqno;
164 u32 gtt_offset;
165 u32 read_domains;
166 u32 write_domain;
167 u32 fence_reg;
168 s32 pinned:2;
169 u32 tiling:2;
170 u32 dirty:1;
171 u32 purgeable:1;
172 } *active_bo;
173 u32 active_bo_count;
154}; 174};
155 175
156struct drm_i915_display_funcs { 176struct drm_i915_display_funcs {
@@ -778,6 +798,7 @@ extern int i965_reset(struct drm_device *dev, u8 flags);
778 798
779/* i915_irq.c */ 799/* i915_irq.c */
780void i915_hangcheck_elapsed(unsigned long data); 800void i915_hangcheck_elapsed(unsigned long data);
801void i915_destroy_error_state(struct drm_device *dev);
781extern int i915_irq_emit(struct drm_device *dev, void *data, 802extern int i915_irq_emit(struct drm_device *dev, void *data,
782 struct drm_file *file_priv); 803 struct drm_file *file_priv);
783extern int i915_irq_wait(struct drm_device *dev, void *data, 804extern int i915_irq_wait(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 1a56ae7b5a78..ba1d8314c1ce 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -432,6 +432,121 @@ static void i915_error_work_func(struct work_struct *work)
432 } 432 }
433} 433}
434 434
435static struct drm_i915_error_object *
436i915_error_object_create(struct drm_device *dev,
437 struct drm_gem_object *src)
438{
439 struct drm_i915_error_object *dst;
440 struct drm_i915_gem_object *src_priv;
441 int page, page_count;
442
443 if (src == NULL)
444 return NULL;
445
446 src_priv = src->driver_private;
447 if (src_priv->pages == NULL)
448 return NULL;
449
450 page_count = src->size / PAGE_SIZE;
451
452 dst = kmalloc(sizeof(*dst) + page_count * sizeof (u32 *), GFP_ATOMIC);
453 if (dst == NULL)
454 return NULL;
455
456 for (page = 0; page < page_count; page++) {
457 void *s, *d = kmalloc(PAGE_SIZE, GFP_ATOMIC);
458 if (d == NULL)
459 goto unwind;
460 s = kmap_atomic(src_priv->pages[page], KM_USER0);
461 memcpy(d, s, PAGE_SIZE);
462 kunmap_atomic(s, KM_USER0);
463 dst->pages[page] = d;
464 }
465 dst->page_count = page_count;
466 dst->gtt_offset = src_priv->gtt_offset;
467
468 return dst;
469
470unwind:
471 while (page--)
472 kfree(dst->pages[page]);
473 kfree(dst);
474 return NULL;
475}
476
477static void
478i915_error_object_free(struct drm_i915_error_object *obj)
479{
480 int page;
481
482 if (obj == NULL)
483 return;
484
485 for (page = 0; page < obj->page_count; page++)
486 kfree(obj->pages[page]);
487
488 kfree(obj);
489}
490
491static void
492i915_error_state_free(struct drm_device *dev,
493 struct drm_i915_error_state *error)
494{
495 i915_error_object_free(error->batchbuffer[0]);
496 i915_error_object_free(error->batchbuffer[1]);
497 i915_error_object_free(error->ringbuffer);
498 kfree(error->active_bo);
499 kfree(error);
500}
501
502static u32
503i915_get_bbaddr(struct drm_device *dev, u32 *ring)
504{
505 u32 cmd;
506
507 if (IS_I830(dev) || IS_845G(dev))
508 cmd = MI_BATCH_BUFFER;
509 else if (IS_I965G(dev))
510 cmd = (MI_BATCH_BUFFER_START | (2 << 6) |
511 MI_BATCH_NON_SECURE_I965);
512 else
513 cmd = (MI_BATCH_BUFFER_START | (2 << 6));
514
515 return ring[0] == cmd ? ring[1] : 0;
516}
517
518static u32
519i915_ringbuffer_last_batch(struct drm_device *dev)
520{
521 struct drm_i915_private *dev_priv = dev->dev_private;
522 u32 head, bbaddr;
523 u32 *ring;
524
525 /* Locate the current position in the ringbuffer and walk back
526 * to find the most recently dispatched batch buffer.
527 */
528 bbaddr = 0;
529 head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
530 ring = (u32 *)(dev_priv->ring.virtual_start + head);
531
532 while (--ring >= (u32 *)dev_priv->ring.virtual_start) {
533 bbaddr = i915_get_bbaddr(dev, ring);
534 if (bbaddr)
535 break;
536 }
537
538 if (bbaddr == 0) {
539 ring = (u32 *)(dev_priv->ring.virtual_start + dev_priv->ring.Size);
540 while (--ring >= (u32 *)dev_priv->ring.virtual_start) {
541 bbaddr = i915_get_bbaddr(dev, ring);
542 if (bbaddr)
543 break;
544 }
545 }
546
547 return bbaddr;
548}
549
435/** 550/**
436 * i915_capture_error_state - capture an error record for later analysis 551 * i915_capture_error_state - capture an error record for later analysis
437 * @dev: drm device 552 * @dev: drm device
@@ -444,19 +559,26 @@ static void i915_error_work_func(struct work_struct *work)
444static void i915_capture_error_state(struct drm_device *dev) 559static void i915_capture_error_state(struct drm_device *dev)
445{ 560{
446 struct drm_i915_private *dev_priv = dev->dev_private; 561 struct drm_i915_private *dev_priv = dev->dev_private;
562 struct drm_i915_gem_object *obj_priv;
447 struct drm_i915_error_state *error; 563 struct drm_i915_error_state *error;
564 struct drm_gem_object *batchbuffer[2];
448 unsigned long flags; 565 unsigned long flags;
566 u32 bbaddr;
567 int count;
449 568
450 spin_lock_irqsave(&dev_priv->error_lock, flags); 569 spin_lock_irqsave(&dev_priv->error_lock, flags);
451 if (dev_priv->first_error) 570 error = dev_priv->first_error;
452 goto out; 571 spin_unlock_irqrestore(&dev_priv->error_lock, flags);
572 if (error)
573 return;
453 574
454 error = kmalloc(sizeof(*error), GFP_ATOMIC); 575 error = kmalloc(sizeof(*error), GFP_ATOMIC);
455 if (!error) { 576 if (!error) {
456 DRM_DEBUG_DRIVER("out ot memory, not capturing error state\n"); 577 DRM_DEBUG_DRIVER("out of memory, not capturing error state\n");
457 goto out; 578 return;
458 } 579 }
459 580
581 error->seqno = i915_get_gem_seqno(dev);
460 error->eir = I915_READ(EIR); 582 error->eir = I915_READ(EIR);
461 error->pgtbl_er = I915_READ(PGTBL_ER); 583 error->pgtbl_er = I915_READ(PGTBL_ER);
462 error->pipeastat = I915_READ(PIPEASTAT); 584 error->pipeastat = I915_READ(PIPEASTAT);
@@ -467,6 +589,7 @@ static void i915_capture_error_state(struct drm_device *dev)
467 error->ipehr = I915_READ(IPEHR); 589 error->ipehr = I915_READ(IPEHR);
468 error->instdone = I915_READ(INSTDONE); 590 error->instdone = I915_READ(INSTDONE);
469 error->acthd = I915_READ(ACTHD); 591 error->acthd = I915_READ(ACTHD);
592 error->bbaddr = 0;
470 } else { 593 } else {
471 error->ipeir = I915_READ(IPEIR_I965); 594 error->ipeir = I915_READ(IPEIR_I965);
472 error->ipehr = I915_READ(IPEHR_I965); 595 error->ipehr = I915_READ(IPEHR_I965);
@@ -474,14 +597,101 @@ static void i915_capture_error_state(struct drm_device *dev)
474 error->instps = I915_READ(INSTPS); 597 error->instps = I915_READ(INSTPS);
475 error->instdone1 = I915_READ(INSTDONE1); 598 error->instdone1 = I915_READ(INSTDONE1);
476 error->acthd = I915_READ(ACTHD_I965); 599 error->acthd = I915_READ(ACTHD_I965);
600 error->bbaddr = I915_READ64(BB_ADDR);
477 } 601 }
478 602
479 do_gettimeofday(&error->time); 603 bbaddr = i915_ringbuffer_last_batch(dev);
604
605 /* Grab the current batchbuffer, most likely to have crashed. */
606 batchbuffer[0] = NULL;
607 batchbuffer[1] = NULL;
608 count = 0;
609 list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list) {
610 struct drm_gem_object *obj = obj_priv->obj;
611
612 if (batchbuffer[0] == NULL &&
613 bbaddr >= obj_priv->gtt_offset &&
614 bbaddr < obj_priv->gtt_offset + obj->size)
615 batchbuffer[0] = obj;
616
617 if (batchbuffer[1] == NULL &&
618 error->acthd >= obj_priv->gtt_offset &&
619 error->acthd < obj_priv->gtt_offset + obj->size &&
620 batchbuffer[0] != obj)
621 batchbuffer[1] = obj;
622
623 count++;
624 }
480 625
481 dev_priv->first_error = error; 626 /* We need to copy these to an anonymous buffer as the simplest
627 * method to avoid being overwritten by userpace.
628 */
629 error->batchbuffer[0] = i915_error_object_create(dev, batchbuffer[0]);
630 error->batchbuffer[1] = i915_error_object_create(dev, batchbuffer[1]);
631
632 /* Record the ringbuffer */
633 error->ringbuffer = i915_error_object_create(dev, dev_priv->ring.ring_obj);
634
635 /* Record buffers on the active list. */
636 error->active_bo = NULL;
637 error->active_bo_count = 0;
638
639 if (count)
640 error->active_bo = kmalloc(sizeof(*error->active_bo)*count,
641 GFP_ATOMIC);
642
643 if (error->active_bo) {
644 int i = 0;
645 list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list) {
646 struct drm_gem_object *obj = obj_priv->obj;
647
648 error->active_bo[i].size = obj->size;
649 error->active_bo[i].name = obj->name;
650 error->active_bo[i].seqno = obj_priv->last_rendering_seqno;
651 error->active_bo[i].gtt_offset = obj_priv->gtt_offset;
652 error->active_bo[i].read_domains = obj->read_domains;
653 error->active_bo[i].write_domain = obj->write_domain;
654 error->active_bo[i].fence_reg = obj_priv->fence_reg;
655 error->active_bo[i].pinned = 0;
656 if (obj_priv->pin_count > 0)
657 error->active_bo[i].pinned = 1;
658 if (obj_priv->user_pin_count > 0)
659 error->active_bo[i].pinned = -1;
660 error->active_bo[i].tiling = obj_priv->tiling_mode;
661 error->active_bo[i].dirty = obj_priv->dirty;
662 error->active_bo[i].purgeable = obj_priv->madv != I915_MADV_WILLNEED;
663
664 if (++i == count)
665 break;
666 }
667 error->active_bo_count = i;
668 }
669
670 do_gettimeofday(&error->time);
482 671
483out: 672 spin_lock_irqsave(&dev_priv->error_lock, flags);
673 if (dev_priv->first_error == NULL) {
674 dev_priv->first_error = error;
675 error = NULL;
676 }
484 spin_unlock_irqrestore(&dev_priv->error_lock, flags); 677 spin_unlock_irqrestore(&dev_priv->error_lock, flags);
678
679 if (error)
680 i915_error_state_free(dev, error);
681}
682
683void i915_destroy_error_state(struct drm_device *dev)
684{
685 struct drm_i915_private *dev_priv = dev->dev_private;
686 struct drm_i915_error_state *error;
687
688 spin_lock(&dev_priv->error_lock);
689 error = dev_priv->first_error;
690 dev_priv->first_error = NULL;
691 spin_unlock(&dev_priv->error_lock);
692
693 if (error)
694 i915_error_state_free(dev, error);
485} 695}
486 696
487/** 697/**
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index d344c031f188..eff8d850a758 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -328,6 +328,7 @@
328#define CM0_COLOR_EVICT_DISABLE (1<<3) 328#define CM0_COLOR_EVICT_DISABLE (1<<3)
329#define CM0_DEPTH_WRITE_DISABLE (1<<1) 329#define CM0_DEPTH_WRITE_DISABLE (1<<1)
330#define CM0_RC_OP_FLUSH_DISABLE (1<<0) 330#define CM0_RC_OP_FLUSH_DISABLE (1<<0)
331#define BB_ADDR 0x02140 /* 8 bytes */
331#define GFX_FLSH_CNTL 0x02170 /* 915+ only */ 332#define GFX_FLSH_CNTL 0x02170 /* 915+ only */
332 333
333 334