aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c256
1 files changed, 215 insertions, 41 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 47a126a0493f..0a80e419b589 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -33,14 +33,24 @@
33#include "i915_trace.h" 33#include "i915_trace.h"
34#include "intel_drv.h" 34#include "intel_drv.h"
35 35
36/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, 36bool
37 * but keeps the logic simple. Indeed, the whole purpose of this macro is just 37intel_ring_initialized(struct intel_engine_cs *ring)
38 * to give some inclination as to some of the magic values used in the various 38{
39 * workarounds! 39 struct drm_device *dev = ring->dev;
40 */ 40
41#define CACHELINE_BYTES 64 41 if (!dev)
42 return false;
43
44 if (i915.enable_execlists) {
45 struct intel_context *dctx = ring->default_context;
46 struct intel_ringbuffer *ringbuf = dctx->engine[ring->id].ringbuf;
47
48 return ringbuf->obj;
49 } else
50 return ring->buffer && ring->buffer->obj;
51}
42 52
43static inline int __ring_space(int head, int tail, int size) 53int __intel_ring_space(int head, int tail, int size)
44{ 54{
45 int space = head - (tail + I915_RING_FREE_SPACE); 55 int space = head - (tail + I915_RING_FREE_SPACE);
46 if (space < 0) 56 if (space < 0)
@@ -48,12 +58,13 @@ static inline int __ring_space(int head, int tail, int size)
48 return space; 58 return space;
49} 59}
50 60
51static inline int ring_space(struct intel_ringbuffer *ringbuf) 61int intel_ring_space(struct intel_ringbuffer *ringbuf)
52{ 62{
53 return __ring_space(ringbuf->head & HEAD_ADDR, ringbuf->tail, ringbuf->size); 63 return __intel_ring_space(ringbuf->head & HEAD_ADDR,
64 ringbuf->tail, ringbuf->size);
54} 65}
55 66
56static bool intel_ring_stopped(struct intel_engine_cs *ring) 67bool intel_ring_stopped(struct intel_engine_cs *ring)
57{ 68{
58 struct drm_i915_private *dev_priv = ring->dev->dev_private; 69 struct drm_i915_private *dev_priv = ring->dev->dev_private;
59 return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring); 70 return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring);
@@ -433,7 +444,14 @@ gen8_render_ring_flush(struct intel_engine_cs *ring,
433 return ret; 444 return ret;
434 } 445 }
435 446
436 return gen8_emit_pipe_control(ring, flags, scratch_addr); 447 ret = gen8_emit_pipe_control(ring, flags, scratch_addr);
448 if (ret)
449 return ret;
450
451 if (!invalidate_domains && flush_domains)
452 return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
453
454 return 0;
437} 455}
438 456
439static void ring_write_tail(struct intel_engine_cs *ring, 457static void ring_write_tail(struct intel_engine_cs *ring,
@@ -476,9 +494,14 @@ static bool stop_ring(struct intel_engine_cs *ring)
476 494
477 if (!IS_GEN2(ring->dev)) { 495 if (!IS_GEN2(ring->dev)) {
478 I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING)); 496 I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
479 if (wait_for_atomic((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) { 497 if (wait_for((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
480 DRM_ERROR("%s :timed out trying to stop ring\n", ring->name); 498 DRM_ERROR("%s : timed out trying to stop ring\n", ring->name);
481 return false; 499 /* Sometimes we observe that the idle flag is not
500 * set even though the ring is empty. So double
501 * check before giving up.
502 */
503 if (I915_READ_HEAD(ring) != I915_READ_TAIL(ring))
504 return false;
482 } 505 }
483 } 506 }
484 507
@@ -540,6 +563,14 @@ static int init_ring_common(struct intel_engine_cs *ring)
540 * also enforces ordering), otherwise the hw might lose the new ring 563 * also enforces ordering), otherwise the hw might lose the new ring
541 * register values. */ 564 * register values. */
542 I915_WRITE_START(ring, i915_gem_obj_ggtt_offset(obj)); 565 I915_WRITE_START(ring, i915_gem_obj_ggtt_offset(obj));
566
567 /* WaClearRingBufHeadRegAtInit:ctg,elk */
568 if (I915_READ_HEAD(ring))
569 DRM_DEBUG("%s initialization failed [head=%08x], fudging\n",
570 ring->name, I915_READ_HEAD(ring));
571 I915_WRITE_HEAD(ring, 0);
572 (void)I915_READ_HEAD(ring);
573
543 I915_WRITE_CTL(ring, 574 I915_WRITE_CTL(ring,
544 ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) 575 ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES)
545 | RING_VALID); 576 | RING_VALID);
@@ -563,7 +594,7 @@ static int init_ring_common(struct intel_engine_cs *ring)
563 else { 594 else {
564 ringbuf->head = I915_READ_HEAD(ring); 595 ringbuf->head = I915_READ_HEAD(ring);
565 ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR; 596 ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
566 ringbuf->space = ring_space(ringbuf); 597 ringbuf->space = intel_ring_space(ringbuf);
567 ringbuf->last_retired_head = -1; 598 ringbuf->last_retired_head = -1;
568 } 599 }
569 600
@@ -575,8 +606,25 @@ out:
575 return ret; 606 return ret;
576} 607}
577 608
578static int 609void
579init_pipe_control(struct intel_engine_cs *ring) 610intel_fini_pipe_control(struct intel_engine_cs *ring)
611{
612 struct drm_device *dev = ring->dev;
613
614 if (ring->scratch.obj == NULL)
615 return;
616
617 if (INTEL_INFO(dev)->gen >= 5) {
618 kunmap(sg_page(ring->scratch.obj->pages->sgl));
619 i915_gem_object_ggtt_unpin(ring->scratch.obj);
620 }
621
622 drm_gem_object_unreference(&ring->scratch.obj->base);
623 ring->scratch.obj = NULL;
624}
625
626int
627intel_init_pipe_control(struct intel_engine_cs *ring)
580{ 628{
581 int ret; 629 int ret;
582 630
@@ -617,6 +665,135 @@ err:
617 return ret; 665 return ret;
618} 666}
619 667
668static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
669 u32 addr, u32 value)
670{
671 struct drm_device *dev = ring->dev;
672 struct drm_i915_private *dev_priv = dev->dev_private;
673
674 if (WARN_ON(dev_priv->num_wa_regs >= I915_MAX_WA_REGS))
675 return;
676
677 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
678 intel_ring_emit(ring, addr);
679 intel_ring_emit(ring, value);
680
681 dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr;
682 dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = value & 0xFFFF;
683 /* value is updated with the status of remaining bits of this
684 * register when it is read from debugfs file
685 */
686 dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value;
687 dev_priv->num_wa_regs++;
688
689 return;
690}
691
692static int bdw_init_workarounds(struct intel_engine_cs *ring)
693{
694 int ret;
695 struct drm_device *dev = ring->dev;
696 struct drm_i915_private *dev_priv = dev->dev_private;
697
698 /*
699 * workarounds applied in this fn are part of register state context,
700 * they need to be re-initialized followed by gpu reset, suspend/resume,
701 * module reload.
702 */
703 dev_priv->num_wa_regs = 0;
704 memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
705
706 /*
707 * update the number of dwords required based on the
708 * actual number of workarounds applied
709 */
710 ret = intel_ring_begin(ring, 18);
711 if (ret)
712 return ret;
713
714 /* WaDisablePartialInstShootdown:bdw */
715 /* WaDisableThreadStallDopClockGating:bdw */
716 /* FIXME: Unclear whether we really need this on production bdw. */
717 intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
718 _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
719 | STALL_DOP_GATING_DISABLE));
720
721 /* WaDisableDopClockGating:bdw May not be needed for production */
722 intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
723 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
724
725 intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
726 _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
727
728 /* Use Force Non-Coherent whenever executing a 3D context. This is a
729 * workaround for for a possible hang in the unlikely event a TLB
730 * invalidation occurs during a PSD flush.
731 */
732 intel_ring_emit_wa(ring, HDC_CHICKEN0,
733 _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
734
735 /* Wa4x4STCOptimizationDisable:bdw */
736 intel_ring_emit_wa(ring, CACHE_MODE_1,
737 _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
738
739 /*
740 * BSpec recommends 8x4 when MSAA is used,
741 * however in practice 16x4 seems fastest.
742 *
743 * Note that PS/WM thread counts depend on the WIZ hashing
744 * disable bit, which we don't touch here, but it's good
745 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
746 */
747 intel_ring_emit_wa(ring, GEN7_GT_MODE,
748 GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
749
750 intel_ring_advance(ring);
751
752 DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n",
753 dev_priv->num_wa_regs);
754
755 return 0;
756}
757
758static int chv_init_workarounds(struct intel_engine_cs *ring)
759{
760 int ret;
761 struct drm_device *dev = ring->dev;
762 struct drm_i915_private *dev_priv = dev->dev_private;
763
764 /*
765 * workarounds applied in this fn are part of register state context,
766 * they need to be re-initialized followed by gpu reset, suspend/resume,
767 * module reload.
768 */
769 dev_priv->num_wa_regs = 0;
770 memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
771
772 ret = intel_ring_begin(ring, 12);
773 if (ret)
774 return ret;
775
776 /* WaDisablePartialInstShootdown:chv */
777 intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
778 _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
779
780 /* WaDisableThreadStallDopClockGating:chv */
781 intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
782 _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
783
784 /* WaDisableDopClockGating:chv (pre-production hw) */
785 intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
786 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
787
788 /* WaDisableSamplerPowerBypass:chv (pre-production hw) */
789 intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
790 _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
791
792 intel_ring_advance(ring);
793
794 return 0;
795}
796
620static int init_render_ring(struct intel_engine_cs *ring) 797static int init_render_ring(struct intel_engine_cs *ring)
621{ 798{
622 struct drm_device *dev = ring->dev; 799 struct drm_device *dev = ring->dev;
@@ -651,7 +828,7 @@ static int init_render_ring(struct intel_engine_cs *ring)
651 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); 828 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
652 829
653 if (INTEL_INFO(dev)->gen >= 5) { 830 if (INTEL_INFO(dev)->gen >= 5) {
654 ret = init_pipe_control(ring); 831 ret = intel_init_pipe_control(ring);
655 if (ret) 832 if (ret)
656 return ret; 833 return ret;
657 } 834 }
@@ -686,16 +863,7 @@ static void render_ring_cleanup(struct intel_engine_cs *ring)
686 dev_priv->semaphore_obj = NULL; 863 dev_priv->semaphore_obj = NULL;
687 } 864 }
688 865
689 if (ring->scratch.obj == NULL) 866 intel_fini_pipe_control(ring);
690 return;
691
692 if (INTEL_INFO(dev)->gen >= 5) {
693 kunmap(sg_page(ring->scratch.obj->pages->sgl));
694 i915_gem_object_ggtt_unpin(ring->scratch.obj);
695 }
696
697 drm_gem_object_unreference(&ring->scratch.obj->base);
698 ring->scratch.obj = NULL;
699} 867}
700 868
701static int gen8_rcs_signal(struct intel_engine_cs *signaller, 869static int gen8_rcs_signal(struct intel_engine_cs *signaller,
@@ -1526,7 +1694,7 @@ static int init_phys_status_page(struct intel_engine_cs *ring)
1526 return 0; 1694 return 0;
1527} 1695}
1528 1696
1529static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) 1697void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
1530{ 1698{
1531 if (!ringbuf->obj) 1699 if (!ringbuf->obj)
1532 return; 1700 return;
@@ -1537,8 +1705,8 @@ static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
1537 ringbuf->obj = NULL; 1705 ringbuf->obj = NULL;
1538} 1706}
1539 1707
1540static int intel_alloc_ringbuffer_obj(struct drm_device *dev, 1708int intel_alloc_ringbuffer_obj(struct drm_device *dev,
1541 struct intel_ringbuffer *ringbuf) 1709 struct intel_ringbuffer *ringbuf)
1542{ 1710{
1543 struct drm_i915_private *dev_priv = to_i915(dev); 1711 struct drm_i915_private *dev_priv = to_i915(dev);
1544 struct drm_i915_gem_object *obj; 1712 struct drm_i915_gem_object *obj;
@@ -1600,7 +1768,9 @@ static int intel_init_ring_buffer(struct drm_device *dev,
1600 ring->dev = dev; 1768 ring->dev = dev;
1601 INIT_LIST_HEAD(&ring->active_list); 1769 INIT_LIST_HEAD(&ring->active_list);
1602 INIT_LIST_HEAD(&ring->request_list); 1770 INIT_LIST_HEAD(&ring->request_list);
1771 INIT_LIST_HEAD(&ring->execlist_queue);
1603 ringbuf->size = 32 * PAGE_SIZE; 1772 ringbuf->size = 32 * PAGE_SIZE;
1773 ringbuf->ring = ring;
1604 memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno)); 1774 memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
1605 1775
1606 init_waitqueue_head(&ring->irq_queue); 1776 init_waitqueue_head(&ring->irq_queue);
@@ -1683,13 +1853,14 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
1683 ringbuf->head = ringbuf->last_retired_head; 1853 ringbuf->head = ringbuf->last_retired_head;
1684 ringbuf->last_retired_head = -1; 1854 ringbuf->last_retired_head = -1;
1685 1855
1686 ringbuf->space = ring_space(ringbuf); 1856 ringbuf->space = intel_ring_space(ringbuf);
1687 if (ringbuf->space >= n) 1857 if (ringbuf->space >= n)
1688 return 0; 1858 return 0;
1689 } 1859 }
1690 1860
1691 list_for_each_entry(request, &ring->request_list, list) { 1861 list_for_each_entry(request, &ring->request_list, list) {
1692 if (__ring_space(request->tail, ringbuf->tail, ringbuf->size) >= n) { 1862 if (__intel_ring_space(request->tail, ringbuf->tail,
1863 ringbuf->size) >= n) {
1693 seqno = request->seqno; 1864 seqno = request->seqno;
1694 break; 1865 break;
1695 } 1866 }
@@ -1706,7 +1877,7 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
1706 ringbuf->head = ringbuf->last_retired_head; 1877 ringbuf->head = ringbuf->last_retired_head;
1707 ringbuf->last_retired_head = -1; 1878 ringbuf->last_retired_head = -1;
1708 1879
1709 ringbuf->space = ring_space(ringbuf); 1880 ringbuf->space = intel_ring_space(ringbuf);
1710 return 0; 1881 return 0;
1711} 1882}
1712 1883
@@ -1735,7 +1906,7 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
1735 trace_i915_ring_wait_begin(ring); 1906 trace_i915_ring_wait_begin(ring);
1736 do { 1907 do {
1737 ringbuf->head = I915_READ_HEAD(ring); 1908 ringbuf->head = I915_READ_HEAD(ring);
1738 ringbuf->space = ring_space(ringbuf); 1909 ringbuf->space = intel_ring_space(ringbuf);
1739 if (ringbuf->space >= n) { 1910 if (ringbuf->space >= n) {
1740 ret = 0; 1911 ret = 0;
1741 break; 1912 break;
@@ -1787,7 +1958,7 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
1787 iowrite32(MI_NOOP, virt++); 1958 iowrite32(MI_NOOP, virt++);
1788 1959
1789 ringbuf->tail = 0; 1960 ringbuf->tail = 0;
1790 ringbuf->space = ring_space(ringbuf); 1961 ringbuf->space = intel_ring_space(ringbuf);
1791 1962
1792 return 0; 1963 return 0;
1793} 1964}
@@ -1992,9 +2163,7 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
1992 u64 offset, u32 len, 2163 u64 offset, u32 len,
1993 unsigned flags) 2164 unsigned flags)
1994{ 2165{
1995 struct drm_i915_private *dev_priv = ring->dev->dev_private; 2166 bool ppgtt = USES_PPGTT(ring->dev) && !(flags & I915_DISPATCH_SECURE);
1996 bool ppgtt = dev_priv->mm.aliasing_ppgtt != NULL &&
1997 !(flags & I915_DISPATCH_SECURE);
1998 int ret; 2167 int ret;
1999 2168
2000 ret = intel_ring_begin(ring, 4); 2169 ret = intel_ring_begin(ring, 4);
@@ -2023,8 +2192,9 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
2023 return ret; 2192 return ret;
2024 2193
2025 intel_ring_emit(ring, 2194 intel_ring_emit(ring,
2026 MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW | 2195 MI_BATCH_BUFFER_START |
2027 (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW)); 2196 (flags & I915_DISPATCH_SECURE ?
2197 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW));
2028 /* bit0-7 is the length on GEN6+ */ 2198 /* bit0-7 is the length on GEN6+ */
2029 intel_ring_emit(ring, offset); 2199 intel_ring_emit(ring, offset);
2030 intel_ring_advance(ring); 2200 intel_ring_advance(ring);
@@ -2123,6 +2293,10 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
2123 dev_priv->semaphore_obj = obj; 2293 dev_priv->semaphore_obj = obj;
2124 } 2294 }
2125 } 2295 }
2296 if (IS_CHERRYVIEW(dev))
2297 ring->init_context = chv_init_workarounds;
2298 else
2299 ring->init_context = bdw_init_workarounds;
2126 ring->add_request = gen6_add_request; 2300 ring->add_request = gen6_add_request;
2127 ring->flush = gen8_render_ring_flush; 2301 ring->flush = gen8_render_ring_flush;
2128 ring->irq_get = gen8_ring_get_irq; 2302 ring->irq_get = gen8_ring_get_irq;