aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
authorArun Siluvery <arun.siluvery@linux.intel.com>2014-08-26 09:44:50 -0400
committerDaniel Vetter <daniel.vetter@ffwll.ch>2014-09-03 05:04:42 -0400
commit86d7f23842f1bce3ab5e8c8d0c676112bbc4c99b (patch)
tree3cb49838c8b3d1faa087ac682b5bf8aaaf6ab807 /drivers/gpu/drm/i915/intel_ringbuffer.c
parentc5ad011d7d256ecbe173324029e992817194d2b0 (diff)
drm/i915/bdw: Apply workarounds in render ring init function
For BDW workarounds are currently initialized in init_clock_gating() but they are lost during reset, suspend/resume etc; this patch moves the WAs that are part of register state context to render ring init fn otherwise default context ends up with incorrect values as they don't get initialized until init_clock_gating fn. v2: Add workarounds to golden render state This method has its own issues, first of all this is different for each gen and it is generated using a tool so adding new workaround and mainitaining them across gens is not a straightforward process. v3: Use LRIs to emit these workarounds (Ville) Instead of modifying the golden render state the same LRIs are emitted from within the driver. v4: Use abstract name when exporting gen specific routines (Chris) For: VIZ-4092 Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c79
1 files changed, 79 insertions, 0 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index de7654623acc..1d5bfdb4fe97 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -657,6 +657,84 @@ err:
657 return ret; 657 return ret;
658} 658}
659 659
660static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
661 u32 addr, u32 value)
662{
663 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
664 intel_ring_emit(ring, addr);
665 intel_ring_emit(ring, value);
666}
667
668static int gen8_init_workarounds(struct intel_engine_cs *ring)
669{
670 int ret;
671
672 /*
673 * workarounds applied in this fn are part of register state context,
674 * they need to be re-initialized followed by gpu reset, suspend/resume,
675 * module reload.
676 */
677
678 /*
679 * update the number of dwords required based on the
680 * actual number of workarounds applied
681 */
682 ret = intel_ring_begin(ring, 24);
683 if (ret)
684 return ret;
685
686 /* WaDisablePartialInstShootdown:bdw */
687 /* WaDisableThreadStallDopClockGating:bdw */
688 /* FIXME: Unclear whether we really need this on production bdw. */
689 intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
690 _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
691 | STALL_DOP_GATING_DISABLE));
692
693 /* WaDisableDopClockGating:bdw May not be needed for production */
694 intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
695 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
696
697 /*
698 * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
699 * pre-production hardware
700 */
701 intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
702 _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS
703 | GEN8_SAMPLER_POWER_BYPASS_DIS));
704
705 intel_ring_emit_wa(ring, GEN7_HALF_SLICE_CHICKEN1,
706 _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
707
708 intel_ring_emit_wa(ring, COMMON_SLICE_CHICKEN2,
709 _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
710
711 /* Use Force Non-Coherent whenever executing a 3D context. This is a
712 * workaround for for a possible hang in the unlikely event a TLB
713 * invalidation occurs during a PSD flush.
714 */
715 intel_ring_emit_wa(ring, HDC_CHICKEN0,
716 _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
717
718 /* Wa4x4STCOptimizationDisable:bdw */
719 intel_ring_emit_wa(ring, CACHE_MODE_1,
720 _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
721
722 /*
723 * BSpec recommends 8x4 when MSAA is used,
724 * however in practice 16x4 seems fastest.
725 *
726 * Note that PS/WM thread counts depend on the WIZ hashing
727 * disable bit, which we don't touch here, but it's good
728 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
729 */
730 intel_ring_emit_wa(ring, GEN7_GT_MODE,
731 GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
732
733 intel_ring_advance(ring);
734
735 return 0;
736}
737
660static int init_render_ring(struct intel_engine_cs *ring) 738static int init_render_ring(struct intel_engine_cs *ring)
661{ 739{
662 struct drm_device *dev = ring->dev; 740 struct drm_device *dev = ring->dev;
@@ -2143,6 +2221,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
2143 dev_priv->semaphore_obj = obj; 2221 dev_priv->semaphore_obj = obj;
2144 } 2222 }
2145 } 2223 }
2224 ring->init_context = gen8_init_workarounds;
2146 ring->add_request = gen6_add_request; 2225 ring->add_request = gen6_add_request;
2147 ring->flush = gen8_render_ring_flush; 2226 ring->flush = gen8_render_ring_flush;
2148 ring->irq_get = gen8_ring_get_irq; 2227 ring->irq_get = gen8_ring_get_irq;