diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.c | 119 |
1 files changed, 53 insertions, 66 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 16a4eada60a1..654ae991ea13 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c | |||
@@ -717,7 +717,7 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) | |||
717 | struct drm_i915_private *dev_priv = dev->dev_private; | 717 | struct drm_i915_private *dev_priv = dev->dev_private; |
718 | struct i915_workarounds *w = &dev_priv->workarounds; | 718 | struct i915_workarounds *w = &dev_priv->workarounds; |
719 | 719 | ||
720 | if (WARN_ON_ONCE(w->count == 0)) | 720 | if (w->count == 0) |
721 | return 0; | 721 | return 0; |
722 | 722 | ||
723 | ring->gpu_caches_dirty = true; | 723 | ring->gpu_caches_dirty = true; |
@@ -800,42 +800,29 @@ static int wa_add(struct drm_i915_private *dev_priv, | |||
800 | 800 | ||
801 | #define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val) | 801 | #define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val) |
802 | 802 | ||
803 | static int bdw_init_workarounds(struct intel_engine_cs *ring) | 803 | static int gen8_init_workarounds(struct intel_engine_cs *ring) |
804 | { | 804 | { |
805 | struct drm_device *dev = ring->dev; | 805 | struct drm_device *dev = ring->dev; |
806 | struct drm_i915_private *dev_priv = dev->dev_private; | 806 | struct drm_i915_private *dev_priv = dev->dev_private; |
807 | 807 | ||
808 | WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); | 808 | WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); |
809 | 809 | ||
810 | /* WaDisableAsyncFlipPerfMode:bdw */ | 810 | /* WaDisableAsyncFlipPerfMode:bdw,chv */ |
811 | WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); | 811 | WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); |
812 | 812 | ||
813 | /* WaDisablePartialInstShootdown:bdw */ | 813 | /* WaDisablePartialInstShootdown:bdw,chv */ |
814 | /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ | ||
815 | WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, | 814 | WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, |
816 | PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE | | 815 | PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); |
817 | STALL_DOP_GATING_DISABLE); | ||
818 | |||
819 | /* WaDisableDopClockGating:bdw */ | ||
820 | WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, | ||
821 | DOP_CLOCK_GATING_DISABLE); | ||
822 | |||
823 | WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, | ||
824 | GEN8_SAMPLER_POWER_BYPASS_DIS); | ||
825 | 816 | ||
826 | /* Use Force Non-Coherent whenever executing a 3D context. This is a | 817 | /* Use Force Non-Coherent whenever executing a 3D context. This is a |
827 | * workaround for for a possible hang in the unlikely event a TLB | 818 | * workaround for for a possible hang in the unlikely event a TLB |
828 | * invalidation occurs during a PSD flush. | 819 | * invalidation occurs during a PSD flush. |
829 | */ | 820 | */ |
821 | /* WaForceEnableNonCoherent:bdw,chv */ | ||
822 | /* WaHdcDisableFetchWhenMasked:bdw,chv */ | ||
830 | WA_SET_BIT_MASKED(HDC_CHICKEN0, | 823 | WA_SET_BIT_MASKED(HDC_CHICKEN0, |
831 | /* WaForceEnableNonCoherent:bdw */ | ||
832 | HDC_FORCE_NON_COHERENT | | ||
833 | /* WaForceContextSaveRestoreNonCoherent:bdw */ | ||
834 | HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | | ||
835 | /* WaHdcDisableFetchWhenMasked:bdw */ | ||
836 | HDC_DONOT_FETCH_MEM_WHEN_MASKED | | 824 | HDC_DONOT_FETCH_MEM_WHEN_MASKED | |
837 | /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ | 825 | HDC_FORCE_NON_COHERENT); |
838 | (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); | ||
839 | 826 | ||
840 | /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: | 827 | /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: |
841 | * "The Hierarchical Z RAW Stall Optimization allows non-overlapping | 828 | * "The Hierarchical Z RAW Stall Optimization allows non-overlapping |
@@ -843,13 +830,12 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring) | |||
843 | * stalling waiting for the earlier ones to write to Hierarchical Z | 830 | * stalling waiting for the earlier ones to write to Hierarchical Z |
844 | * buffer." | 831 | * buffer." |
845 | * | 832 | * |
846 | * This optimization is off by default for Broadwell; turn it on. | 833 | * This optimization is off by default for BDW and CHV; turn it on. |
847 | */ | 834 | */ |
848 | WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); | 835 | WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); |
849 | 836 | ||
850 | /* Wa4x4STCOptimizationDisable:bdw */ | 837 | /* Wa4x4STCOptimizationDisable:bdw,chv */ |
851 | WA_SET_BIT_MASKED(CACHE_MODE_1, | 838 | WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); |
852 | GEN8_4x4_STC_OPTIMIZATION_DISABLE); | ||
853 | 839 | ||
854 | /* | 840 | /* |
855 | * BSpec recommends 8x4 when MSAA is used, | 841 | * BSpec recommends 8x4 when MSAA is used, |
@@ -866,56 +852,51 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring) | |||
866 | return 0; | 852 | return 0; |
867 | } | 853 | } |
868 | 854 | ||
869 | static int chv_init_workarounds(struct intel_engine_cs *ring) | 855 | static int bdw_init_workarounds(struct intel_engine_cs *ring) |
870 | { | 856 | { |
857 | int ret; | ||
871 | struct drm_device *dev = ring->dev; | 858 | struct drm_device *dev = ring->dev; |
872 | struct drm_i915_private *dev_priv = dev->dev_private; | 859 | struct drm_i915_private *dev_priv = dev->dev_private; |
873 | 860 | ||
874 | WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); | 861 | ret = gen8_init_workarounds(ring); |
862 | if (ret) | ||
863 | return ret; | ||
875 | 864 | ||
876 | /* WaDisableAsyncFlipPerfMode:chv */ | 865 | /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ |
877 | WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); | 866 | WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); |
878 | 867 | ||
879 | /* WaDisablePartialInstShootdown:chv */ | 868 | /* WaDisableDopClockGating:bdw */ |
880 | /* WaDisableThreadStallDopClockGating:chv */ | 869 | WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, |
881 | WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, | 870 | DOP_CLOCK_GATING_DISABLE); |
882 | PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE | | 871 | |
883 | STALL_DOP_GATING_DISABLE); | 872 | WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, |
873 | GEN8_SAMPLER_POWER_BYPASS_DIS); | ||
884 | 874 | ||
885 | /* Use Force Non-Coherent whenever executing a 3D context. This is a | ||
886 | * workaround for a possible hang in the unlikely event a TLB | ||
887 | * invalidation occurs during a PSD flush. | ||
888 | */ | ||
889 | /* WaForceEnableNonCoherent:chv */ | ||
890 | /* WaHdcDisableFetchWhenMasked:chv */ | ||
891 | WA_SET_BIT_MASKED(HDC_CHICKEN0, | 875 | WA_SET_BIT_MASKED(HDC_CHICKEN0, |
892 | HDC_FORCE_NON_COHERENT | | 876 | /* WaForceContextSaveRestoreNonCoherent:bdw */ |
893 | HDC_DONOT_FETCH_MEM_WHEN_MASKED); | 877 | HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | |
878 | /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ | ||
879 | (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); | ||
894 | 880 | ||
895 | /* According to the CACHE_MODE_0 default value documentation, some | 881 | return 0; |
896 | * CHV platforms disable this optimization by default. Turn it on. | 882 | } |
897 | */ | ||
898 | WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); | ||
899 | 883 | ||
900 | /* Wa4x4STCOptimizationDisable:chv */ | 884 | static int chv_init_workarounds(struct intel_engine_cs *ring) |
901 | WA_SET_BIT_MASKED(CACHE_MODE_1, | 885 | { |
902 | GEN8_4x4_STC_OPTIMIZATION_DISABLE); | 886 | int ret; |
887 | struct drm_device *dev = ring->dev; | ||
888 | struct drm_i915_private *dev_priv = dev->dev_private; | ||
889 | |||
890 | ret = gen8_init_workarounds(ring); | ||
891 | if (ret) | ||
892 | return ret; | ||
893 | |||
894 | /* WaDisableThreadStallDopClockGating:chv */ | ||
895 | WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); | ||
903 | 896 | ||
904 | /* Improve HiZ throughput on CHV. */ | 897 | /* Improve HiZ throughput on CHV. */ |
905 | WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); | 898 | WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); |
906 | 899 | ||
907 | /* | ||
908 | * BSpec recommends 8x4 when MSAA is used, | ||
909 | * however in practice 16x4 seems fastest. | ||
910 | * | ||
911 | * Note that PS/WM thread counts depend on the WIZ hashing | ||
912 | * disable bit, which we don't touch here, but it's good | ||
913 | * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). | ||
914 | */ | ||
915 | WA_SET_FIELD_MASKED(GEN7_GT_MODE, | ||
916 | GEN6_WIZ_HASHING_MASK, | ||
917 | GEN6_WIZ_HASHING_16x4); | ||
918 | |||
919 | return 0; | 900 | return 0; |
920 | } | 901 | } |
921 | 902 | ||
@@ -961,10 +942,9 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) | |||
961 | } | 942 | } |
962 | 943 | ||
963 | /* Wa4x4STCOptimizationDisable:skl,bxt */ | 944 | /* Wa4x4STCOptimizationDisable:skl,bxt */ |
964 | WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); | ||
965 | |||
966 | /* WaDisablePartialResolveInVc:skl,bxt */ | 945 | /* WaDisablePartialResolveInVc:skl,bxt */ |
967 | WA_SET_BIT_MASKED(CACHE_MODE_1, GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); | 946 | WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE | |
947 | GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE)); | ||
968 | 948 | ||
969 | /* WaCcsTlbPrefetchDisable:skl,bxt */ | 949 | /* WaCcsTlbPrefetchDisable:skl,bxt */ |
970 | WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, | 950 | WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, |
@@ -1041,10 +1021,13 @@ static int skl_tune_iz_hashing(struct intel_engine_cs *ring) | |||
1041 | 1021 | ||
1042 | static int skl_init_workarounds(struct intel_engine_cs *ring) | 1022 | static int skl_init_workarounds(struct intel_engine_cs *ring) |
1043 | { | 1023 | { |
1024 | int ret; | ||
1044 | struct drm_device *dev = ring->dev; | 1025 | struct drm_device *dev = ring->dev; |
1045 | struct drm_i915_private *dev_priv = dev->dev_private; | 1026 | struct drm_i915_private *dev_priv = dev->dev_private; |
1046 | 1027 | ||
1047 | gen9_init_workarounds(ring); | 1028 | ret = gen9_init_workarounds(ring); |
1029 | if (ret) | ||
1030 | return ret; | ||
1048 | 1031 | ||
1049 | /* WaDisablePowerCompilerClockGating:skl */ | 1032 | /* WaDisablePowerCompilerClockGating:skl */ |
1050 | if (INTEL_REVID(dev) == SKL_REVID_B0) | 1033 | if (INTEL_REVID(dev) == SKL_REVID_B0) |
@@ -1081,10 +1064,13 @@ static int skl_init_workarounds(struct intel_engine_cs *ring) | |||
1081 | 1064 | ||
1082 | static int bxt_init_workarounds(struct intel_engine_cs *ring) | 1065 | static int bxt_init_workarounds(struct intel_engine_cs *ring) |
1083 | { | 1066 | { |
1067 | int ret; | ||
1084 | struct drm_device *dev = ring->dev; | 1068 | struct drm_device *dev = ring->dev; |
1085 | struct drm_i915_private *dev_priv = dev->dev_private; | 1069 | struct drm_i915_private *dev_priv = dev->dev_private; |
1086 | 1070 | ||
1087 | gen9_init_workarounds(ring); | 1071 | ret = gen9_init_workarounds(ring); |
1072 | if (ret) | ||
1073 | return ret; | ||
1088 | 1074 | ||
1089 | /* WaDisableThreadStallDopClockGating:bxt */ | 1075 | /* WaDisableThreadStallDopClockGating:bxt */ |
1090 | WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, | 1076 | WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, |
@@ -2637,6 +2623,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) | |||
2637 | GEN8_RING_SEMAPHORE_INIT; | 2623 | GEN8_RING_SEMAPHORE_INIT; |
2638 | } | 2624 | } |
2639 | } else if (INTEL_INFO(dev)->gen >= 6) { | 2625 | } else if (INTEL_INFO(dev)->gen >= 6) { |
2626 | ring->init_context = intel_rcs_ctx_init; | ||
2640 | ring->add_request = gen6_add_request; | 2627 | ring->add_request = gen6_add_request; |
2641 | ring->flush = gen7_render_ring_flush; | 2628 | ring->flush = gen7_render_ring_flush; |
2642 | if (INTEL_INFO(dev)->gen == 6) | 2629 | if (INTEL_INFO(dev)->gen == 6) |