aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
authorMika Kuoppala <mika.kuoppala@linux.intel.com>2014-10-07 10:21:26 -0400
committerDaniel Vetter <daniel.vetter@ffwll.ch>2014-10-24 10:34:07 -0400
commit7225342ab501befdb64bcec76ded41f5897c0855 (patch)
tree5243e7eb5a347031451c320aaae58af69bd64a42 /drivers/gpu/drm/i915/intel_ringbuffer.c
parent8eff426233e61332686acea22a4199288047ec64 (diff)
drm/i915: Build workaround list in ring initialization
If we build the workaround list in ring initialization and decouple it from the actual writing of values, we gain the ability to decide where and how we want to apply the values. The advantage of this will become more clear when we need to initialize workarounds on older gens where it is not possible to write all the registers through ring LRIs. v2: rebase on newest bdw workarounds Cc: Arun Siluvery <arun.siluvery@linux.intel.com> Cc: Damien Lespiau <damien.lespiau@intel.com> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com> Reviewed-by: Arun Siluvery <arun.siluvery@linux.intel.com> [danvet: Resolve tiny conflict in comments and ocd alignments a bit.] [danvet2: Remove bogus force_wake_get call spotted by Paulo and QA.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c184
1 files changed, 104 insertions, 80 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 5ebe46a05a05..5f935d4dfb6a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -665,79 +665,107 @@ err:
665 return ret; 665 return ret;
666} 666}
667 667
668static inline void intel_ring_emit_wa(struct intel_engine_cs *ring, 668static int intel_ring_workarounds_emit(struct intel_engine_cs *ring)
669 u32 addr, u32 value)
670{ 669{
670 int ret, i;
671 struct drm_device *dev = ring->dev; 671 struct drm_device *dev = ring->dev;
672 struct drm_i915_private *dev_priv = dev->dev_private; 672 struct drm_i915_private *dev_priv = dev->dev_private;
673 struct i915_workarounds *w = &dev_priv->workarounds;
673 674
674 if (WARN_ON(dev_priv->num_wa_regs >= I915_MAX_WA_REGS)) 675 if (WARN_ON(w->count == 0))
675 return; 676 return 0;
676 677
677 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 678 ring->gpu_caches_dirty = true;
678 intel_ring_emit(ring, addr); 679 ret = intel_ring_flush_all_caches(ring);
679 intel_ring_emit(ring, value); 680 if (ret)
681 return ret;
680 682
681 dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr; 683 ret = intel_ring_begin(ring, w->count * 3);
682 dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = value & 0xFFFF; 684 if (ret)
683 /* value is updated with the status of remaining bits of this 685 return ret;
684 * register when it is read from debugfs file 686
685 */ 687 for (i = 0; i < w->count; i++) {
686 dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value; 688 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
687 dev_priv->num_wa_regs++; 689 intel_ring_emit(ring, w->reg[i].addr);
690 intel_ring_emit(ring, w->reg[i].value);
691 }
692
693 intel_ring_advance(ring);
694
695 ring->gpu_caches_dirty = true;
696 ret = intel_ring_flush_all_caches(ring);
697 if (ret)
698 return ret;
699
700 DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count);
688 701
689 return; 702 return 0;
703}
704
705static int wa_add(struct drm_i915_private *dev_priv,
706 const u32 addr, const u32 val, const u32 mask)
707{
708 const u32 idx = dev_priv->workarounds.count;
709
710 if (WARN_ON(idx >= I915_MAX_WA_REGS))
711 return -ENOSPC;
712
713 dev_priv->workarounds.reg[idx].addr = addr;
714 dev_priv->workarounds.reg[idx].value = val;
715 dev_priv->workarounds.reg[idx].mask = mask;
716
717 dev_priv->workarounds.count++;
718
719 return 0;
690} 720}
691 721
722#define WA_REG(addr, val, mask) { \
723 const int r = wa_add(dev_priv, (addr), (val), (mask)); \
724 if (r) \
725 return r; \
726 }
727
728#define WA_SET_BIT_MASKED(addr, mask) \
729 WA_REG(addr, _MASKED_BIT_ENABLE(mask), (mask) & 0xffff)
730
731#define WA_CLR_BIT_MASKED(addr, mask) \
732 WA_REG(addr, _MASKED_BIT_DISABLE(mask), (mask) & 0xffff)
733
734#define WA_SET_BIT(addr, mask) WA_REG(addr, I915_READ(addr) | (mask), mask)
735#define WA_CLR_BIT(addr, mask) WA_REG(addr, I915_READ(addr) & ~(mask), mask)
736
737#define WA_WRITE(addr, val) WA_REG(addr, val, 0xffffffff)
738
692static int bdw_init_workarounds(struct intel_engine_cs *ring) 739static int bdw_init_workarounds(struct intel_engine_cs *ring)
693{ 740{
694 int ret;
695 struct drm_device *dev = ring->dev; 741 struct drm_device *dev = ring->dev;
696 struct drm_i915_private *dev_priv = dev->dev_private; 742 struct drm_i915_private *dev_priv = dev->dev_private;
697 743
698 /*
699 * workarounds applied in this fn are part of register state context,
700 * they need to be re-initialized followed by gpu reset, suspend/resume,
701 * module reload.
702 */
703 dev_priv->num_wa_regs = 0;
704 memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
705
706 /*
707 * update the number of dwords required based on the
708 * actual number of workarounds applied
709 */
710 ret = intel_ring_begin(ring, 18);
711 if (ret)
712 return ret;
713
714 /* WaDisablePartialInstShootdown:bdw */ 744 /* WaDisablePartialInstShootdown:bdw */
715 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ 745 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
716 intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, 746 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
717 _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE 747 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE |
718 | STALL_DOP_GATING_DISABLE)); 748 STALL_DOP_GATING_DISABLE);
719 749
720 /* WaDisableDopClockGating:bdw */ 750 /* WaDisableDopClockGating:bdw */
721 intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2, 751 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
722 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 752 DOP_CLOCK_GATING_DISABLE);
723 753
724 intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3, 754 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
725 _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); 755 GEN8_SAMPLER_POWER_BYPASS_DIS);
726 756
727 /* Use Force Non-Coherent whenever executing a 3D context. This is a 757 /* Use Force Non-Coherent whenever executing a 3D context. This is a
728 * workaround for for a possible hang in the unlikely event a TLB 758 * workaround for for a possible hang in the unlikely event a TLB
729 * invalidation occurs during a PSD flush. 759 * invalidation occurs during a PSD flush.
730 */ 760 */
731 /* WaDisableFenceDestinationToSLM:bdw (GT3 pre-production) */ 761 /* WaDisableFenceDestinationToSLM:bdw (GT3 pre-production) */
732 intel_ring_emit_wa(ring, HDC_CHICKEN0, 762 WA_SET_BIT_MASKED(HDC_CHICKEN0,
733 _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT | 763 HDC_FORCE_NON_COHERENT |
734 (IS_BDW_GT3(dev) ? 764 (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
735 HDC_FENCE_DEST_SLM_DISABLE : 0)
736 ));
737 765
738 /* Wa4x4STCOptimizationDisable:bdw */ 766 /* Wa4x4STCOptimizationDisable:bdw */
739 intel_ring_emit_wa(ring, CACHE_MODE_1, 767 WA_SET_BIT_MASKED(CACHE_MODE_1,
740 _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); 768 GEN8_4x4_STC_OPTIMIZATION_DISABLE);
741 769
742 /* 770 /*
743 * BSpec recommends 8x4 when MSAA is used, 771 * BSpec recommends 8x4 when MSAA is used,
@@ -747,52 +775,50 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring)
747 * disable bit, which we don't touch here, but it's good 775 * disable bit, which we don't touch here, but it's good
748 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 776 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
749 */ 777 */
750 intel_ring_emit_wa(ring, GEN7_GT_MODE, 778 WA_SET_BIT_MASKED(GEN7_GT_MODE,
751 GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); 779 GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
752
753 intel_ring_advance(ring);
754
755 DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n",
756 dev_priv->num_wa_regs);
757 780
758 return 0; 781 return 0;
759} 782}
760 783
761static int chv_init_workarounds(struct intel_engine_cs *ring) 784static int chv_init_workarounds(struct intel_engine_cs *ring)
762{ 785{
763 int ret;
764 struct drm_device *dev = ring->dev; 786 struct drm_device *dev = ring->dev;
765 struct drm_i915_private *dev_priv = dev->dev_private; 787 struct drm_i915_private *dev_priv = dev->dev_private;
766 788
767 /*
768 * workarounds applied in this fn are part of register state context,
769 * they need to be re-initialized followed by gpu reset, suspend/resume,
770 * module reload.
771 */
772 dev_priv->num_wa_regs = 0;
773 memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
774
775 ret = intel_ring_begin(ring, 12);
776 if (ret)
777 return ret;
778
779 /* WaDisablePartialInstShootdown:chv */ 789 /* WaDisablePartialInstShootdown:chv */
780 intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, 790 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
781 _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); 791 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
782 792
783 /* WaDisableThreadStallDopClockGating:chv */ 793 /* WaDisableThreadStallDopClockGating:chv */
784 intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, 794 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
785 _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); 795 STALL_DOP_GATING_DISABLE);
786 796
787 /* WaDisableDopClockGating:chv (pre-production hw) */ 797 /* WaDisableDopClockGating:chv (pre-production hw) */
788 intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2, 798 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
789 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 799 DOP_CLOCK_GATING_DISABLE);
790 800
791 /* WaDisableSamplerPowerBypass:chv (pre-production hw) */ 801 /* WaDisableSamplerPowerBypass:chv (pre-production hw) */
792 intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3, 802 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
793 _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); 803 GEN8_SAMPLER_POWER_BYPASS_DIS);
794 804
795 intel_ring_advance(ring); 805 return 0;
806}
807
808static int init_workarounds_ring(struct intel_engine_cs *ring)
809{
810 struct drm_device *dev = ring->dev;
811 struct drm_i915_private *dev_priv = dev->dev_private;
812
813 WARN_ON(ring->id != RCS);
814
815 dev_priv->workarounds.count = 0;
816
817 if (IS_BROADWELL(dev))
818 return bdw_init_workarounds(ring);
819
820 if (IS_CHERRYVIEW(dev))
821 return chv_init_workarounds(ring);
796 822
797 return 0; 823 return 0;
798} 824}
@@ -852,7 +878,7 @@ static int init_render_ring(struct intel_engine_cs *ring)
852 if (HAS_L3_DPF(dev)) 878 if (HAS_L3_DPF(dev))
853 I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev)); 879 I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
854 880
855 return ret; 881 return init_workarounds_ring(ring);
856} 882}
857 883
858static void render_ring_cleanup(struct intel_engine_cs *ring) 884static void render_ring_cleanup(struct intel_engine_cs *ring)
@@ -2298,10 +2324,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
2298 dev_priv->semaphore_obj = obj; 2324 dev_priv->semaphore_obj = obj;
2299 } 2325 }
2300 } 2326 }
2301 if (IS_CHERRYVIEW(dev)) 2327
2302 ring->init_context = chv_init_workarounds; 2328 ring->init_context = intel_ring_workarounds_emit;
2303 else
2304 ring->init_context = bdw_init_workarounds;
2305 ring->add_request = gen6_add_request; 2329 ring->add_request = gen6_add_request;
2306 ring->flush = gen8_render_ring_flush; 2330 ring->flush = gen8_render_ring_flush;
2307 ring->irq_get = gen8_ring_get_irq; 2331 ring->irq_get = gen8_ring_get_irq;