aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c119
1 files changed, 53 insertions, 66 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 16a4eada60a1..654ae991ea13 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -717,7 +717,7 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
717 struct drm_i915_private *dev_priv = dev->dev_private; 717 struct drm_i915_private *dev_priv = dev->dev_private;
718 struct i915_workarounds *w = &dev_priv->workarounds; 718 struct i915_workarounds *w = &dev_priv->workarounds;
719 719
720 if (WARN_ON_ONCE(w->count == 0)) 720 if (w->count == 0)
721 return 0; 721 return 0;
722 722
723 ring->gpu_caches_dirty = true; 723 ring->gpu_caches_dirty = true;
@@ -800,42 +800,29 @@ static int wa_add(struct drm_i915_private *dev_priv,
800 800
801#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val) 801#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val)
802 802
803static int bdw_init_workarounds(struct intel_engine_cs *ring) 803static int gen8_init_workarounds(struct intel_engine_cs *ring)
804{ 804{
805 struct drm_device *dev = ring->dev; 805 struct drm_device *dev = ring->dev;
806 struct drm_i915_private *dev_priv = dev->dev_private; 806 struct drm_i915_private *dev_priv = dev->dev_private;
807 807
808 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); 808 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
809 809
810 /* WaDisableAsyncFlipPerfMode:bdw */ 810 /* WaDisableAsyncFlipPerfMode:bdw,chv */
811 WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); 811 WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
812 812
813 /* WaDisablePartialInstShootdown:bdw */ 813 /* WaDisablePartialInstShootdown:bdw,chv */
814 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
815 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 814 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
816 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE | 815 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
817 STALL_DOP_GATING_DISABLE);
818
819 /* WaDisableDopClockGating:bdw */
820 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
821 DOP_CLOCK_GATING_DISABLE);
822
823 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
824 GEN8_SAMPLER_POWER_BYPASS_DIS);
825 816
826 /* Use Force Non-Coherent whenever executing a 3D context. This is a 817 /* Use Force Non-Coherent whenever executing a 3D context. This is a
827 * workaround for for a possible hang in the unlikely event a TLB 818 * workaround for for a possible hang in the unlikely event a TLB
828 * invalidation occurs during a PSD flush. 819 * invalidation occurs during a PSD flush.
829 */ 820 */
821 /* WaForceEnableNonCoherent:bdw,chv */
822 /* WaHdcDisableFetchWhenMasked:bdw,chv */
830 WA_SET_BIT_MASKED(HDC_CHICKEN0, 823 WA_SET_BIT_MASKED(HDC_CHICKEN0,
831 /* WaForceEnableNonCoherent:bdw */
832 HDC_FORCE_NON_COHERENT |
833 /* WaForceContextSaveRestoreNonCoherent:bdw */
834 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
835 /* WaHdcDisableFetchWhenMasked:bdw */
836 HDC_DONOT_FETCH_MEM_WHEN_MASKED | 824 HDC_DONOT_FETCH_MEM_WHEN_MASKED |
837 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ 825 HDC_FORCE_NON_COHERENT);
838 (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
839 826
840 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: 827 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
841 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping 828 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
@@ -843,13 +830,12 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring)
843 * stalling waiting for the earlier ones to write to Hierarchical Z 830 * stalling waiting for the earlier ones to write to Hierarchical Z
844 * buffer." 831 * buffer."
845 * 832 *
846 * This optimization is off by default for Broadwell; turn it on. 833 * This optimization is off by default for BDW and CHV; turn it on.
847 */ 834 */
848 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); 835 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
849 836
850 /* Wa4x4STCOptimizationDisable:bdw */ 837 /* Wa4x4STCOptimizationDisable:bdw,chv */
851 WA_SET_BIT_MASKED(CACHE_MODE_1, 838 WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
852 GEN8_4x4_STC_OPTIMIZATION_DISABLE);
853 839
854 /* 840 /*
855 * BSpec recommends 8x4 when MSAA is used, 841 * BSpec recommends 8x4 when MSAA is used,
@@ -866,56 +852,51 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring)
866 return 0; 852 return 0;
867} 853}
868 854
869static int chv_init_workarounds(struct intel_engine_cs *ring) 855static int bdw_init_workarounds(struct intel_engine_cs *ring)
870{ 856{
857 int ret;
871 struct drm_device *dev = ring->dev; 858 struct drm_device *dev = ring->dev;
872 struct drm_i915_private *dev_priv = dev->dev_private; 859 struct drm_i915_private *dev_priv = dev->dev_private;
873 860
874 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); 861 ret = gen8_init_workarounds(ring);
862 if (ret)
863 return ret;
875 864
876 /* WaDisableAsyncFlipPerfMode:chv */ 865 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
877 WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); 866 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
878 867
879 /* WaDisablePartialInstShootdown:chv */ 868 /* WaDisableDopClockGating:bdw */
880 /* WaDisableThreadStallDopClockGating:chv */ 869 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
881 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 870 DOP_CLOCK_GATING_DISABLE);
882 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE | 871
883 STALL_DOP_GATING_DISABLE); 872 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
873 GEN8_SAMPLER_POWER_BYPASS_DIS);
884 874
885 /* Use Force Non-Coherent whenever executing a 3D context. This is a
886 * workaround for a possible hang in the unlikely event a TLB
887 * invalidation occurs during a PSD flush.
888 */
889 /* WaForceEnableNonCoherent:chv */
890 /* WaHdcDisableFetchWhenMasked:chv */
891 WA_SET_BIT_MASKED(HDC_CHICKEN0, 875 WA_SET_BIT_MASKED(HDC_CHICKEN0,
892 HDC_FORCE_NON_COHERENT | 876 /* WaForceContextSaveRestoreNonCoherent:bdw */
893 HDC_DONOT_FETCH_MEM_WHEN_MASKED); 877 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
878 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
879 (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
894 880
895 /* According to the CACHE_MODE_0 default value documentation, some 881 return 0;
896 * CHV platforms disable this optimization by default. Turn it on. 882}
897 */
898 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
899 883
900 /* Wa4x4STCOptimizationDisable:chv */ 884static int chv_init_workarounds(struct intel_engine_cs *ring)
901 WA_SET_BIT_MASKED(CACHE_MODE_1, 885{
902 GEN8_4x4_STC_OPTIMIZATION_DISABLE); 886 int ret;
887 struct drm_device *dev = ring->dev;
888 struct drm_i915_private *dev_priv = dev->dev_private;
889
890 ret = gen8_init_workarounds(ring);
891 if (ret)
892 return ret;
893
894 /* WaDisableThreadStallDopClockGating:chv */
895 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
903 896
904 /* Improve HiZ throughput on CHV. */ 897 /* Improve HiZ throughput on CHV. */
905 WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); 898 WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
906 899
907 /*
908 * BSpec recommends 8x4 when MSAA is used,
909 * however in practice 16x4 seems fastest.
910 *
911 * Note that PS/WM thread counts depend on the WIZ hashing
912 * disable bit, which we don't touch here, but it's good
913 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
914 */
915 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
916 GEN6_WIZ_HASHING_MASK,
917 GEN6_WIZ_HASHING_16x4);
918
919 return 0; 900 return 0;
920} 901}
921 902
@@ -961,10 +942,9 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring)
961 } 942 }
962 943
963 /* Wa4x4STCOptimizationDisable:skl,bxt */ 944 /* Wa4x4STCOptimizationDisable:skl,bxt */
964 WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
965
966 /* WaDisablePartialResolveInVc:skl,bxt */ 945 /* WaDisablePartialResolveInVc:skl,bxt */
967 WA_SET_BIT_MASKED(CACHE_MODE_1, GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); 946 WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE |
947 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE));
968 948
969 /* WaCcsTlbPrefetchDisable:skl,bxt */ 949 /* WaCcsTlbPrefetchDisable:skl,bxt */
970 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, 950 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
@@ -1041,10 +1021,13 @@ static int skl_tune_iz_hashing(struct intel_engine_cs *ring)
1041 1021
1042static int skl_init_workarounds(struct intel_engine_cs *ring) 1022static int skl_init_workarounds(struct intel_engine_cs *ring)
1043{ 1023{
1024 int ret;
1044 struct drm_device *dev = ring->dev; 1025 struct drm_device *dev = ring->dev;
1045 struct drm_i915_private *dev_priv = dev->dev_private; 1026 struct drm_i915_private *dev_priv = dev->dev_private;
1046 1027
1047 gen9_init_workarounds(ring); 1028 ret = gen9_init_workarounds(ring);
1029 if (ret)
1030 return ret;
1048 1031
1049 /* WaDisablePowerCompilerClockGating:skl */ 1032 /* WaDisablePowerCompilerClockGating:skl */
1050 if (INTEL_REVID(dev) == SKL_REVID_B0) 1033 if (INTEL_REVID(dev) == SKL_REVID_B0)
@@ -1081,10 +1064,13 @@ static int skl_init_workarounds(struct intel_engine_cs *ring)
1081 1064
1082static int bxt_init_workarounds(struct intel_engine_cs *ring) 1065static int bxt_init_workarounds(struct intel_engine_cs *ring)
1083{ 1066{
1067 int ret;
1084 struct drm_device *dev = ring->dev; 1068 struct drm_device *dev = ring->dev;
1085 struct drm_i915_private *dev_priv = dev->dev_private; 1069 struct drm_i915_private *dev_priv = dev->dev_private;
1086 1070
1087 gen9_init_workarounds(ring); 1071 ret = gen9_init_workarounds(ring);
1072 if (ret)
1073 return ret;
1088 1074
1089 /* WaDisableThreadStallDopClockGating:bxt */ 1075 /* WaDisableThreadStallDopClockGating:bxt */
1090 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 1076 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
@@ -2637,6 +2623,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
2637 GEN8_RING_SEMAPHORE_INIT; 2623 GEN8_RING_SEMAPHORE_INIT;
2638 } 2624 }
2639 } else if (INTEL_INFO(dev)->gen >= 6) { 2625 } else if (INTEL_INFO(dev)->gen >= 6) {
2626 ring->init_context = intel_rcs_ctx_init;
2640 ring->add_request = gen6_add_request; 2627 ring->add_request = gen6_add_request;
2641 ring->flush = gen7_render_ring_flush; 2628 ring->flush = gen7_render_ring_flush;
2642 if (INTEL_INFO(dev)->gen == 6) 2629 if (INTEL_INFO(dev)->gen == 6)