aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOscar Mateo <oscar.mateo@intel.com>2018-04-10 12:12:46 -0400
committerChris Wilson <chris@chris-wilson.co.uk>2018-04-11 17:47:01 -0400
commit7d3c425fefb91da7e984a43ba27dff6cdd53758a (patch)
treed323829a482b08f44eef90acbf9ffb1368a06ed3
parent15c83c436424adf3fe0365e9085a82da1190c95e (diff)
drm/i915: Move a bunch of workaround-related code to its own file
This has grown to be a sizable amount of code, so move it to its own file before we try to refactor anything. For the moment, we are leaving behind the WA BB code and the WAs that get applied (incorrectly) in init_clock_gating, but we will deal with it later. v2: Use intel_ prefix for code that deals with the hardware (Chris) v3: Rebased v4: - Rebased - New license header v5: - Rebased - Added some organisational notes to the file (Chris) v6: Include DOC section in the documentation build (Jani) Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Jani Nikula <jani.nikula@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> [ickle: appease checkpatch, mostly] Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/1523376767-18480-1-git-send-email-oscar.mateo@intel.com
-rw-r--r--Documentation/gpu/i915.rst6
-rw-r--r--drivers/gpu/drm/i915/Makefile3
-rw-r--r--drivers/gpu/drm/i915/intel_engine_cs.c634
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c1
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c1
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h3
-rw-r--r--drivers/gpu/drm/i915/intel_workarounds.c686
-rw-r--r--drivers/gpu/drm/i915/intel_workarounds.h13
8 files changed, 709 insertions, 638 deletions
diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index 34d22f275708..055df45596c1 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -58,6 +58,12 @@ Intel GVT-g Host Support(vGPU device model)
58.. kernel-doc:: drivers/gpu/drm/i915/intel_gvt.c 58.. kernel-doc:: drivers/gpu/drm/i915/intel_gvt.c
59 :internal: 59 :internal:
60 60
61Workarounds
62-----------
63
64.. kernel-doc:: drivers/gpu/drm/i915/intel_workarounds.c
65 :doc: Hardware workarounds
66
61Display Hardware Handling 67Display Hardware Handling
62========================= 68=========================
63 69
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 0c79c19223af..9bee52a949a9 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -43,7 +43,8 @@ i915-y := i915_drv.o \
43 intel_csr.o \ 43 intel_csr.o \
44 intel_device_info.o \ 44 intel_device_info.o \
45 intel_pm.o \ 45 intel_pm.o \
46 intel_runtime_pm.o 46 intel_runtime_pm.o \
47 intel_workarounds.o
47 48
48i915-$(CONFIG_COMPAT) += i915_ioc32.o 49i915-$(CONFIG_COMPAT) += i915_ioc32.o
49i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o 50i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index a217b3fe5f0b..68898d58dd1e 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -903,640 +903,6 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine,
903 } 903 }
904} 904}
905 905
906static int wa_add(struct drm_i915_private *dev_priv,
907 i915_reg_t addr,
908 const u32 mask, const u32 val)
909{
910 const u32 idx = dev_priv->workarounds.count;
911
912 if (WARN_ON(idx >= I915_MAX_WA_REGS))
913 return -ENOSPC;
914
915 dev_priv->workarounds.reg[idx].addr = addr;
916 dev_priv->workarounds.reg[idx].value = val;
917 dev_priv->workarounds.reg[idx].mask = mask;
918
919 dev_priv->workarounds.count++;
920
921 return 0;
922}
923
924#define WA_REG(addr, mask, val) do { \
925 const int r = wa_add(dev_priv, (addr), (mask), (val)); \
926 if (r) \
927 return r; \
928 } while (0)
929
930#define WA_SET_BIT_MASKED(addr, mask) \
931 WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
932
933#define WA_CLR_BIT_MASKED(addr, mask) \
934 WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
935
936#define WA_SET_FIELD_MASKED(addr, mask, value) \
937 WA_REG(addr, mask, _MASKED_FIELD(mask, value))
938
939static int wa_ring_whitelist_reg(struct intel_engine_cs *engine,
940 i915_reg_t reg)
941{
942 struct drm_i915_private *dev_priv = engine->i915;
943 struct i915_workarounds *wa = &dev_priv->workarounds;
944 const uint32_t index = wa->hw_whitelist_count[engine->id];
945
946 if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS))
947 return -EINVAL;
948
949 I915_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index),
950 i915_mmio_reg_offset(reg));
951 wa->hw_whitelist_count[engine->id]++;
952
953 return 0;
954}
955
956static int gen8_init_workarounds(struct intel_engine_cs *engine)
957{
958 struct drm_i915_private *dev_priv = engine->i915;
959
960 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
961
962 /* WaDisableAsyncFlipPerfMode:bdw,chv */
963 WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
964
965 /* WaDisablePartialInstShootdown:bdw,chv */
966 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
967 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
968
969 /* Use Force Non-Coherent whenever executing a 3D context. This is a
970 * workaround for for a possible hang in the unlikely event a TLB
971 * invalidation occurs during a PSD flush.
972 */
973 /* WaForceEnableNonCoherent:bdw,chv */
974 /* WaHdcDisableFetchWhenMasked:bdw,chv */
975 WA_SET_BIT_MASKED(HDC_CHICKEN0,
976 HDC_DONOT_FETCH_MEM_WHEN_MASKED |
977 HDC_FORCE_NON_COHERENT);
978
979 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
980 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
981 * polygons in the same 8x4 pixel/sample area to be processed without
982 * stalling waiting for the earlier ones to write to Hierarchical Z
983 * buffer."
984 *
985 * This optimization is off by default for BDW and CHV; turn it on.
986 */
987 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
988
989 /* Wa4x4STCOptimizationDisable:bdw,chv */
990 WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
991
992 /*
993 * BSpec recommends 8x4 when MSAA is used,
994 * however in practice 16x4 seems fastest.
995 *
996 * Note that PS/WM thread counts depend on the WIZ hashing
997 * disable bit, which we don't touch here, but it's good
998 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
999 */
1000 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
1001 GEN6_WIZ_HASHING_MASK,
1002 GEN6_WIZ_HASHING_16x4);
1003
1004 return 0;
1005}
1006
1007static int bdw_init_workarounds(struct intel_engine_cs *engine)
1008{
1009 struct drm_i915_private *dev_priv = engine->i915;
1010 int ret;
1011
1012 ret = gen8_init_workarounds(engine);
1013 if (ret)
1014 return ret;
1015
1016 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
1017 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
1018
1019 /* WaDisableDopClockGating:bdw
1020 *
1021 * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
1022 * to disable EUTC clock gating.
1023 */
1024 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
1025 DOP_CLOCK_GATING_DISABLE);
1026
1027 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
1028 GEN8_SAMPLER_POWER_BYPASS_DIS);
1029
1030 WA_SET_BIT_MASKED(HDC_CHICKEN0,
1031 /* WaForceContextSaveRestoreNonCoherent:bdw */
1032 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
1033 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
1034 (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
1035
1036 return 0;
1037}
1038
1039static int chv_init_workarounds(struct intel_engine_cs *engine)
1040{
1041 struct drm_i915_private *dev_priv = engine->i915;
1042 int ret;
1043
1044 ret = gen8_init_workarounds(engine);
1045 if (ret)
1046 return ret;
1047
1048 /* WaDisableThreadStallDopClockGating:chv */
1049 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
1050
1051 /* Improve HiZ throughput on CHV. */
1052 WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
1053
1054 return 0;
1055}
1056
1057static int gen9_init_workarounds(struct intel_engine_cs *engine)
1058{
1059 struct drm_i915_private *dev_priv = engine->i915;
1060 int ret;
1061
1062 /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
1063 I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE));
1064
1065 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
1066 I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
1067 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
1068
1069 /* WaDisableKillLogic:bxt,skl,kbl */
1070 if (!IS_COFFEELAKE(dev_priv))
1071 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
1072 ECOCHK_DIS_TLB);
1073
1074 if (HAS_LLC(dev_priv)) {
1075 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
1076 *
1077 * Must match Display Engine. See
1078 * WaCompressedResourceDisplayNewHashMode.
1079 */
1080 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1081 GEN9_PBE_COMPRESSED_HASH_SELECTION);
1082 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
1083 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
1084
1085 I915_WRITE(MMCD_MISC_CTRL,
1086 I915_READ(MMCD_MISC_CTRL) |
1087 MMCD_PCLA |
1088 MMCD_HOTSPOT_EN);
1089 }
1090
1091 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
1092 /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
1093 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
1094 FLOW_CONTROL_ENABLE |
1095 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
1096
1097 /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
1098 if (!IS_COFFEELAKE(dev_priv))
1099 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
1100 GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
1101
1102 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
1103 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
1104 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
1105 GEN9_ENABLE_YV12_BUGFIX |
1106 GEN9_ENABLE_GPGPU_PREEMPTION);
1107
1108 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
1109 /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
1110 WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE |
1111 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE));
1112
1113 /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
1114 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
1115 GEN9_CCS_TLB_PREFETCH_ENABLE);
1116
1117 /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
1118 WA_SET_BIT_MASKED(HDC_CHICKEN0,
1119 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
1120 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
1121
1122 /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
1123 * both tied to WaForceContextSaveRestoreNonCoherent
1124 * in some hsds for skl. We keep the tie for all gen9. The
1125 * documentation is a bit hazy and so we want to get common behaviour,
1126 * even though there is no clear evidence we would need both on kbl/bxt.
1127 * This area has been source of system hangs so we play it safe
1128 * and mimic the skl regardless of what bspec says.
1129 *
1130 * Use Force Non-Coherent whenever executing a 3D context. This
1131 * is a workaround for a possible hang in the unlikely event
1132 * a TLB invalidation occurs during a PSD flush.
1133 */
1134
1135 /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
1136 WA_SET_BIT_MASKED(HDC_CHICKEN0,
1137 HDC_FORCE_NON_COHERENT);
1138
1139 /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
1140 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
1141 BDW_DISABLE_HDC_INVALIDATION);
1142
1143 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
1144 if (IS_SKYLAKE(dev_priv) ||
1145 IS_KABYLAKE(dev_priv) ||
1146 IS_COFFEELAKE(dev_priv))
1147 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
1148 GEN8_SAMPLER_POWER_BYPASS_DIS);
1149
1150 /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
1151 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
1152
1153 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
1154 if (IS_GEN9_LP(dev_priv)) {
1155 u32 val = I915_READ(GEN8_L3SQCREG1);
1156
1157 val &= ~L3_PRIO_CREDITS_MASK;
1158 val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2);
1159 I915_WRITE(GEN8_L3SQCREG1, val);
1160 }
1161
1162 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
1163 I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
1164 GEN8_LQSC_FLUSH_COHERENT_LINES));
1165
1166 /*
1167 * Supporting preemption with fine-granularity requires changes in the
1168 * batch buffer programming. Since we can't break old userspace, we
1169 * need to set our default preemption level to safe value. Userspace is
1170 * still able to use more fine-grained preemption levels, since in
1171 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
1172 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
1173 * not real HW workarounds, but merely a way to start using preemption
1174 * while maintaining old contract with userspace.
1175 */
1176
1177 /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
1178 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
1179
1180 /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
1181 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
1182 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
1183
1184 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1185 ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG);
1186 if (ret)
1187 return ret;
1188
1189 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1190 I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
1191 _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
1192 ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
1193 if (ret)
1194 return ret;
1195
1196 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1197 ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1);
1198 if (ret)
1199 return ret;
1200
1201 return 0;
1202}
1203
1204static int skl_tune_iz_hashing(struct intel_engine_cs *engine)
1205{
1206 struct drm_i915_private *dev_priv = engine->i915;
1207 u8 vals[3] = { 0, 0, 0 };
1208 unsigned int i;
1209
1210 for (i = 0; i < 3; i++) {
1211 u8 ss;
1212
1213 /*
1214 * Only consider slices where one, and only one, subslice has 7
1215 * EUs
1216 */
1217 if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]))
1218 continue;
1219
1220 /*
1221 * subslice_7eu[i] != 0 (because of the check above) and
1222 * ss_max == 4 (maximum number of subslices possible per slice)
1223 *
1224 * -> 0 <= ss <= 3;
1225 */
1226 ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1;
1227 vals[i] = 3 - ss;
1228 }
1229
1230 if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
1231 return 0;
1232
1233 /* Tune IZ hashing. See intel_device_info_runtime_init() */
1234 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
1235 GEN9_IZ_HASHING_MASK(2) |
1236 GEN9_IZ_HASHING_MASK(1) |
1237 GEN9_IZ_HASHING_MASK(0),
1238 GEN9_IZ_HASHING(2, vals[2]) |
1239 GEN9_IZ_HASHING(1, vals[1]) |
1240 GEN9_IZ_HASHING(0, vals[0]));
1241
1242 return 0;
1243}
1244
1245static int skl_init_workarounds(struct intel_engine_cs *engine)
1246{
1247 struct drm_i915_private *dev_priv = engine->i915;
1248 int ret;
1249
1250 ret = gen9_init_workarounds(engine);
1251 if (ret)
1252 return ret;
1253
1254 /* WaEnableGapsTsvCreditFix:skl */
1255 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
1256 GEN9_GAPS_TSV_CREDIT_DISABLE));
1257
1258 /* WaDisableGafsUnitClkGating:skl */
1259 I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) |
1260 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE));
1261
1262 /* WaInPlaceDecompressionHang:skl */
1263 if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
1264 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
1265 (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
1266 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
1267
1268 /* WaDisableLSQCROPERFforOCL:skl */
1269 ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
1270 if (ret)
1271 return ret;
1272
1273 return skl_tune_iz_hashing(engine);
1274}
1275
1276static int bxt_init_workarounds(struct intel_engine_cs *engine)
1277{
1278 struct drm_i915_private *dev_priv = engine->i915;
1279 int ret;
1280
1281 ret = gen9_init_workarounds(engine);
1282 if (ret)
1283 return ret;
1284
1285 /* WaDisableThreadStallDopClockGating:bxt */
1286 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
1287 STALL_DOP_GATING_DISABLE);
1288
1289 /* WaDisablePooledEuLoadBalancingFix:bxt */
1290 I915_WRITE(FF_SLICE_CS_CHICKEN2,
1291 _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE));
1292
1293 /* WaToEnableHwFixForPushConstHWBug:bxt */
1294 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1295 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1296
1297 /* WaInPlaceDecompressionHang:bxt */
1298 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
1299 (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
1300 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
1301
1302 return 0;
1303}
1304
1305static int cnl_init_workarounds(struct intel_engine_cs *engine)
1306{
1307 struct drm_i915_private *dev_priv = engine->i915;
1308 int ret;
1309
1310 /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
1311 if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
1312 I915_WRITE(GAMT_CHKN_BIT_REG,
1313 (I915_READ(GAMT_CHKN_BIT_REG) |
1314 GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT));
1315
1316 /* WaForceContextSaveRestoreNonCoherent:cnl */
1317 WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
1318 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
1319
1320 /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
1321 if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
1322 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
1323
1324 /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
1325 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1326 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1327
1328 /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
1329 if (IS_CNL_REVID(dev_priv, 0, CNL_REVID_B0))
1330 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1331 GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
1332
1333 /* WaInPlaceDecompressionHang:cnl */
1334 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
1335 (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
1336 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
1337
1338 /* WaPushConstantDereferenceHoldDisable:cnl */
1339 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
1340
1341 /* FtrEnableFastAnisoL1BankingFix: cnl */
1342 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
1343
1344 /* WaDisable3DMidCmdPreemption:cnl */
1345 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
1346
1347 /* WaDisableGPGPUMidCmdPreemption:cnl */
1348 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
1349 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
1350
1351 /* WaEnablePreemptionGranularityControlByUMD:cnl */
1352 I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
1353 _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
1354 ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
1355 if (ret)
1356 return ret;
1357
1358 /* WaDisableEarlyEOT:cnl */
1359 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
1360
1361 return 0;
1362}
1363
1364static int kbl_init_workarounds(struct intel_engine_cs *engine)
1365{
1366 struct drm_i915_private *dev_priv = engine->i915;
1367 int ret;
1368
1369 ret = gen9_init_workarounds(engine);
1370 if (ret)
1371 return ret;
1372
1373 /* WaEnableGapsTsvCreditFix:kbl */
1374 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
1375 GEN9_GAPS_TSV_CREDIT_DISABLE));
1376
1377 /* WaDisableDynamicCreditSharing:kbl */
1378 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
1379 I915_WRITE(GAMT_CHKN_BIT_REG,
1380 (I915_READ(GAMT_CHKN_BIT_REG) |
1381 GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING));
1382
1383 /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */
1384 if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0))
1385 WA_SET_BIT_MASKED(HDC_CHICKEN0,
1386 HDC_FENCE_DEST_SLM_DISABLE);
1387
1388 /* WaToEnableHwFixForPushConstHWBug:kbl */
1389 if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER))
1390 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1391 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1392
1393 /* WaDisableGafsUnitClkGating:kbl */
1394 I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) |
1395 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE));
1396
1397 /* WaDisableSbeCacheDispatchPortSharing:kbl */
1398 WA_SET_BIT_MASKED(
1399 GEN7_HALF_SLICE_CHICKEN1,
1400 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1401
1402 /* WaInPlaceDecompressionHang:kbl */
1403 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
1404 (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
1405 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
1406
1407 /* WaDisableLSQCROPERFforOCL:kbl */
1408 ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
1409 if (ret)
1410 return ret;
1411
1412 return 0;
1413}
1414
1415static int glk_init_workarounds(struct intel_engine_cs *engine)
1416{
1417 struct drm_i915_private *dev_priv = engine->i915;
1418 int ret;
1419
1420 ret = gen9_init_workarounds(engine);
1421 if (ret)
1422 return ret;
1423
1424 /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1425 ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1426 if (ret)
1427 return ret;
1428
1429 /* WaToEnableHwFixForPushConstHWBug:glk */
1430 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1431 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1432
1433 return 0;
1434}
1435
1436static int cfl_init_workarounds(struct intel_engine_cs *engine)
1437{
1438 struct drm_i915_private *dev_priv = engine->i915;
1439 int ret;
1440
1441 ret = gen9_init_workarounds(engine);
1442 if (ret)
1443 return ret;
1444
1445 /* WaEnableGapsTsvCreditFix:cfl */
1446 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
1447 GEN9_GAPS_TSV_CREDIT_DISABLE));
1448
1449 /* WaToEnableHwFixForPushConstHWBug:cfl */
1450 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1451 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1452
1453 /* WaDisableGafsUnitClkGating:cfl */
1454 I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) |
1455 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE));
1456
1457 /* WaDisableSbeCacheDispatchPortSharing:cfl */
1458 WA_SET_BIT_MASKED(
1459 GEN7_HALF_SLICE_CHICKEN1,
1460 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1461
1462 /* WaInPlaceDecompressionHang:cfl */
1463 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
1464 (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
1465 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
1466
1467 return 0;
1468}
1469
1470int init_workarounds_ring(struct intel_engine_cs *engine)
1471{
1472 struct drm_i915_private *dev_priv = engine->i915;
1473 int err;
1474
1475 if (GEM_WARN_ON(engine->id != RCS))
1476 return -EINVAL;
1477
1478 dev_priv->workarounds.count = 0;
1479 dev_priv->workarounds.hw_whitelist_count[engine->id] = 0;
1480
1481 if (IS_BROADWELL(dev_priv))
1482 err = bdw_init_workarounds(engine);
1483 else if (IS_CHERRYVIEW(dev_priv))
1484 err = chv_init_workarounds(engine);
1485 else if (IS_SKYLAKE(dev_priv))
1486 err = skl_init_workarounds(engine);
1487 else if (IS_BROXTON(dev_priv))
1488 err = bxt_init_workarounds(engine);
1489 else if (IS_KABYLAKE(dev_priv))
1490 err = kbl_init_workarounds(engine);
1491 else if (IS_GEMINILAKE(dev_priv))
1492 err = glk_init_workarounds(engine);
1493 else if (IS_COFFEELAKE(dev_priv))
1494 err = cfl_init_workarounds(engine);
1495 else if (IS_CANNONLAKE(dev_priv))
1496 err = cnl_init_workarounds(engine);
1497 else
1498 err = 0;
1499 if (err)
1500 return err;
1501
1502 DRM_DEBUG_DRIVER("%s: Number of context specific w/a: %d\n",
1503 engine->name, dev_priv->workarounds.count);
1504 return 0;
1505}
1506
1507int intel_ring_workarounds_emit(struct i915_request *rq)
1508{
1509 struct i915_workarounds *w = &rq->i915->workarounds;
1510 u32 *cs;
1511 int ret, i;
1512
1513 if (w->count == 0)
1514 return 0;
1515
1516 ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
1517 if (ret)
1518 return ret;
1519
1520 cs = intel_ring_begin(rq, w->count * 2 + 2);
1521 if (IS_ERR(cs))
1522 return PTR_ERR(cs);
1523
1524 *cs++ = MI_LOAD_REGISTER_IMM(w->count);
1525 for (i = 0; i < w->count; i++) {
1526 *cs++ = i915_mmio_reg_offset(w->reg[i].addr);
1527 *cs++ = w->reg[i].value;
1528 }
1529 *cs++ = MI_NOOP;
1530
1531 intel_ring_advance(rq, cs);
1532
1533 ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
1534 if (ret)
1535 return ret;
1536
1537 return 0;
1538}
1539
1540static bool ring_is_idle(struct intel_engine_cs *engine) 906static bool ring_is_idle(struct intel_engine_cs *engine)
1541{ 907{
1542 struct drm_i915_private *dev_priv = engine->i915; 908 struct drm_i915_private *dev_priv = engine->i915;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 665d9e82e954..03b9d5ae883a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -139,6 +139,7 @@
139#include "i915_gem_render_state.h" 139#include "i915_gem_render_state.h"
140#include "intel_lrc_reg.h" 140#include "intel_lrc_reg.h"
141#include "intel_mocs.h" 141#include "intel_mocs.h"
142#include "intel_workarounds.h"
142 143
143#define RING_EXECLIST_QFULL (1 << 0x2) 144#define RING_EXECLIST_QFULL (1 << 0x2)
144#define RING_EXECLIST1_VALID (1 << 0x3) 145#define RING_EXECLIST1_VALID (1 << 0x3)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 04d9d9a946a7..36acc32374e4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -36,6 +36,7 @@
36#include "i915_gem_render_state.h" 36#include "i915_gem_render_state.h"
37#include "i915_trace.h" 37#include "i915_trace.h"
38#include "intel_drv.h" 38#include "intel_drv.h"
39#include "intel_workarounds.h"
39 40
40/* Rough estimate of the typical request size, performing a flush, 41/* Rough estimate of the typical request size, performing a flush,
41 * set-context and then emitting the batch. 42 * set-context and then emitting the batch.
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 256d58487559..717041640135 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -885,9 +885,6 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
885 return READ_ONCE(engine->timeline->seqno); 885 return READ_ONCE(engine->timeline->seqno);
886} 886}
887 887
888int init_workarounds_ring(struct intel_engine_cs *engine);
889int intel_ring_workarounds_emit(struct i915_request *rq);
890
891void intel_engine_get_instdone(struct intel_engine_cs *engine, 888void intel_engine_get_instdone(struct intel_engine_cs *engine,
892 struct intel_instdone *instdone); 889 struct intel_instdone *instdone);
893 890
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
new file mode 100644
index 000000000000..d60a37700f84
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -0,0 +1,686 @@
1/*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright © 2014-2018 Intel Corporation
5 */
6
7#include "i915_drv.h"
8#include "intel_workarounds.h"
9
10/**
11 * DOC: Hardware workarounds
12 *
13 * This file is intended as a central place to implement most [1]_ of the
14 * required workarounds for hardware to work as originally intended. They fall
15 * in five basic categories depending on how/when they are applied:
16 *
17 * - Workarounds that touch registers that are saved/restored to/from the HW
18 * context image. The list is emitted (via Load Register Immediate commands)
19 * everytime a new context is created.
20 * - GT workarounds. The list of these WAs is applied whenever these registers
21 * revert to default values (on GPU reset, suspend/resume [2]_, etc..).
22 * - Display workarounds. The list is applied during display clock-gating
23 * initialization.
24 * - Workarounds that whitelist a privileged register, so that UMDs can manage
25 * them directly. This is just a special case of a MMMIO workaround (as we
26 * write the list of these to/be-whitelisted registers to some special HW
27 * registers).
28 * - Workaround batchbuffers, that get executed automatically by the hardware
29 * on every HW context restore.
30 *
31 * .. [1] Please notice that there are other WAs that, due to their nature,
32 * cannot be applied from a central place. Those are peppered around the rest
33 * of the code, as needed.
34 *
35 * .. [2] Technically, some registers are powercontext saved & restored, so they
36 * survive a suspend/resume. In practice, writing them again is not too
37 * costly and simplifies things. We can revisit this in the future.
38 *
39 * Layout
40 * ''''''
41 *
42 * Keep things in this file ordered by WA type, as per the above (context, GT,
43 * display, register whitelist, batchbuffer). Then, inside each type, keep the
44 * following order:
45 *
46 * - Infrastructure functions and macros
47 * - WAs per platform in standard gen/chrono order
48 * - Public functions to init or apply the given workaround type.
49 */
50
51static int wa_add(struct drm_i915_private *dev_priv,
52 i915_reg_t addr,
53 const u32 mask, const u32 val)
54{
55 const unsigned int idx = dev_priv->workarounds.count;
56
57 if (WARN_ON(idx >= I915_MAX_WA_REGS))
58 return -ENOSPC;
59
60 dev_priv->workarounds.reg[idx].addr = addr;
61 dev_priv->workarounds.reg[idx].value = val;
62 dev_priv->workarounds.reg[idx].mask = mask;
63
64 dev_priv->workarounds.count++;
65
66 return 0;
67}
68
69#define WA_REG(addr, mask, val) do { \
70 const int r = wa_add(dev_priv, (addr), (mask), (val)); \
71 if (r) \
72 return r; \
73 } while (0)
74
75#define WA_SET_BIT_MASKED(addr, mask) \
76 WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
77
78#define WA_CLR_BIT_MASKED(addr, mask) \
79 WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
80
81#define WA_SET_FIELD_MASKED(addr, mask, value) \
82 WA_REG(addr, (mask), _MASKED_FIELD(mask, value))
83
84static int wa_ring_whitelist_reg(struct intel_engine_cs *engine,
85 i915_reg_t reg)
86{
87 struct drm_i915_private *dev_priv = engine->i915;
88 struct i915_workarounds *wa = &dev_priv->workarounds;
89 const unsigned int index = wa->hw_whitelist_count[engine->id];
90
91 if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS))
92 return -EINVAL;
93
94 I915_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index),
95 i915_mmio_reg_offset(reg));
96 wa->hw_whitelist_count[engine->id]++;
97
98 return 0;
99}
100
101static int gen8_init_workarounds(struct intel_engine_cs *engine)
102{
103 struct drm_i915_private *dev_priv = engine->i915;
104
105 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
106
107 /* WaDisableAsyncFlipPerfMode:bdw,chv */
108 WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
109
110 /* WaDisablePartialInstShootdown:bdw,chv */
111 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
112 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
113
114 /* Use Force Non-Coherent whenever executing a 3D context. This is a
115 * workaround for for a possible hang in the unlikely event a TLB
116 * invalidation occurs during a PSD flush.
117 */
118 /* WaForceEnableNonCoherent:bdw,chv */
119 /* WaHdcDisableFetchWhenMasked:bdw,chv */
120 WA_SET_BIT_MASKED(HDC_CHICKEN0,
121 HDC_DONOT_FETCH_MEM_WHEN_MASKED |
122 HDC_FORCE_NON_COHERENT);
123
124 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
125 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
126 * polygons in the same 8x4 pixel/sample area to be processed without
127 * stalling waiting for the earlier ones to write to Hierarchical Z
128 * buffer."
129 *
130 * This optimization is off by default for BDW and CHV; turn it on.
131 */
132 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
133
134 /* Wa4x4STCOptimizationDisable:bdw,chv */
135 WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
136
137 /*
138 * BSpec recommends 8x4 when MSAA is used,
139 * however in practice 16x4 seems fastest.
140 *
141 * Note that PS/WM thread counts depend on the WIZ hashing
142 * disable bit, which we don't touch here, but it's good
143 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
144 */
145 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
146 GEN6_WIZ_HASHING_MASK,
147 GEN6_WIZ_HASHING_16x4);
148
149 return 0;
150}
151
152static int bdw_init_workarounds(struct intel_engine_cs *engine)
153{
154 struct drm_i915_private *dev_priv = engine->i915;
155 int ret;
156
157 ret = gen8_init_workarounds(engine);
158 if (ret)
159 return ret;
160
161 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
162 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
163
164 /* WaDisableDopClockGating:bdw
165 *
166 * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
167 * to disable EUTC clock gating.
168 */
169 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
170 DOP_CLOCK_GATING_DISABLE);
171
172 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
173 GEN8_SAMPLER_POWER_BYPASS_DIS);
174
175 WA_SET_BIT_MASKED(HDC_CHICKEN0,
176 /* WaForceContextSaveRestoreNonCoherent:bdw */
177 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
178 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
179 (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
180
181 return 0;
182}
183
184static int chv_init_workarounds(struct intel_engine_cs *engine)
185{
186 struct drm_i915_private *dev_priv = engine->i915;
187 int ret;
188
189 ret = gen8_init_workarounds(engine);
190 if (ret)
191 return ret;
192
193 /* WaDisableThreadStallDopClockGating:chv */
194 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
195
196 /* Improve HiZ throughput on CHV. */
197 WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
198
199 return 0;
200}
201
202static int gen9_init_workarounds(struct intel_engine_cs *engine)
203{
204 struct drm_i915_private *dev_priv = engine->i915;
205 int ret;
206
207 /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
208 I915_WRITE(GEN9_CSFE_CHICKEN1_RCS,
209 _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE));
210
211 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
212 I915_WRITE(BDW_SCRATCH1,
213 I915_READ(BDW_SCRATCH1) |
214 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
215
216 /* WaDisableKillLogic:bxt,skl,kbl */
217 if (!IS_COFFEELAKE(dev_priv))
218 I915_WRITE(GAM_ECOCHK,
219 I915_READ(GAM_ECOCHK) | ECOCHK_DIS_TLB);
220
221 if (HAS_LLC(dev_priv)) {
222 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
223 *
224 * Must match Display Engine. See
225 * WaCompressedResourceDisplayNewHashMode.
226 */
227 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
228 GEN9_PBE_COMPRESSED_HASH_SELECTION);
229 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
230 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
231
232 I915_WRITE(MMCD_MISC_CTRL,
233 I915_READ(MMCD_MISC_CTRL) |
234 MMCD_PCLA |
235 MMCD_HOTSPOT_EN);
236 }
237
238 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
239 /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
240 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
241 FLOW_CONTROL_ENABLE |
242 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
243
244 /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
245 if (!IS_COFFEELAKE(dev_priv))
246 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
247 GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
248
249 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
250 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
251 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
252 GEN9_ENABLE_YV12_BUGFIX |
253 GEN9_ENABLE_GPGPU_PREEMPTION);
254
255 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
256 /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
257 WA_SET_BIT_MASKED(CACHE_MODE_1,
258 GEN8_4x4_STC_OPTIMIZATION_DISABLE |
259 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
260
261 /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
262 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
263 GEN9_CCS_TLB_PREFETCH_ENABLE);
264
265 /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
266 WA_SET_BIT_MASKED(HDC_CHICKEN0,
267 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
268 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
269
270 /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
271 * both tied to WaForceContextSaveRestoreNonCoherent
272 * in some hsds for skl. We keep the tie for all gen9. The
273 * documentation is a bit hazy and so we want to get common behaviour,
274 * even though there is no clear evidence we would need both on kbl/bxt.
275 * This area has been source of system hangs so we play it safe
276 * and mimic the skl regardless of what bspec says.
277 *
278 * Use Force Non-Coherent whenever executing a 3D context. This
279 * is a workaround for a possible hang in the unlikely event
280 * a TLB invalidation occurs during a PSD flush.
281 */
282
283 /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
284 WA_SET_BIT_MASKED(HDC_CHICKEN0,
285 HDC_FORCE_NON_COHERENT);
286
287 /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
288 I915_WRITE(GAM_ECOCHK,
289 I915_READ(GAM_ECOCHK) | BDW_DISABLE_HDC_INVALIDATION);
290
291 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
292 if (IS_SKYLAKE(dev_priv) ||
293 IS_KABYLAKE(dev_priv) ||
294 IS_COFFEELAKE(dev_priv))
295 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
296 GEN8_SAMPLER_POWER_BYPASS_DIS);
297
298 /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
299 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
300
301 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
302 if (IS_GEN9_LP(dev_priv)) {
303 u32 val = I915_READ(GEN8_L3SQCREG1);
304
305 val &= ~L3_PRIO_CREDITS_MASK;
306 val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2);
307 I915_WRITE(GEN8_L3SQCREG1, val);
308 }
309
310 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
311 I915_WRITE(GEN8_L3SQCREG4,
312 I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_FLUSH_COHERENT_LINES);
313
314 /*
315 * Supporting preemption with fine-granularity requires changes in the
316 * batch buffer programming. Since we can't break old userspace, we
317 * need to set our default preemption level to safe value. Userspace is
318 * still able to use more fine-grained preemption levels, since in
319 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
320 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
321 * not real HW workarounds, but merely a way to start using preemption
322 * while maintaining old contract with userspace.
323 */
324
325 /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
326 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
327
328 /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
329 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
330 GEN9_PREEMPT_GPGPU_LEVEL_MASK,
331 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
332
333 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
334 ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG);
335 if (ret)
336 return ret;
337
338 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
339 I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
340 _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
341 ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
342 if (ret)
343 return ret;
344
345 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
346 ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1);
347 if (ret)
348 return ret;
349
350 return 0;
351}
352
353static int skl_tune_iz_hashing(struct intel_engine_cs *engine)
354{
355 struct drm_i915_private *dev_priv = engine->i915;
356 u8 vals[3] = { 0, 0, 0 };
357 unsigned int i;
358
359 for (i = 0; i < 3; i++) {
360 u8 ss;
361
362 /*
363 * Only consider slices where one, and only one, subslice has 7
364 * EUs
365 */
366 if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]))
367 continue;
368
369 /*
370 * subslice_7eu[i] != 0 (because of the check above) and
371 * ss_max == 4 (maximum number of subslices possible per slice)
372 *
373 * -> 0 <= ss <= 3;
374 */
375 ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1;
376 vals[i] = 3 - ss;
377 }
378
379 if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
380 return 0;
381
382 /* Tune IZ hashing. See intel_device_info_runtime_init() */
383 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
384 GEN9_IZ_HASHING_MASK(2) |
385 GEN9_IZ_HASHING_MASK(1) |
386 GEN9_IZ_HASHING_MASK(0),
387 GEN9_IZ_HASHING(2, vals[2]) |
388 GEN9_IZ_HASHING(1, vals[1]) |
389 GEN9_IZ_HASHING(0, vals[0]));
390
391 return 0;
392}
393
394static int skl_init_workarounds(struct intel_engine_cs *engine)
395{
396 struct drm_i915_private *dev_priv = engine->i915;
397 int ret;
398
399 ret = gen9_init_workarounds(engine);
400 if (ret)
401 return ret;
402
403 /* WaEnableGapsTsvCreditFix:skl */
404 I915_WRITE(GEN8_GARBCNTL,
405 I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE);
406
407 /* WaDisableGafsUnitClkGating:skl */
408 I915_WRITE(GEN7_UCGCTL4,
409 I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
410
411 /* WaInPlaceDecompressionHang:skl */
412 if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
413 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
414 I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
415 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
416
417 /* WaDisableLSQCROPERFforOCL:skl */
418 ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
419 if (ret)
420 return ret;
421
422 return skl_tune_iz_hashing(engine);
423}
424
425static int bxt_init_workarounds(struct intel_engine_cs *engine)
426{
427 struct drm_i915_private *dev_priv = engine->i915;
428 int ret;
429
430 ret = gen9_init_workarounds(engine);
431 if (ret)
432 return ret;
433
434 /* WaDisableThreadStallDopClockGating:bxt */
435 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
436 STALL_DOP_GATING_DISABLE);
437
438 /* WaDisablePooledEuLoadBalancingFix:bxt */
439 I915_WRITE(FF_SLICE_CS_CHICKEN2,
440 _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE));
441
442 /* WaToEnableHwFixForPushConstHWBug:bxt */
443 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
444 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
445
446 /* WaInPlaceDecompressionHang:bxt */
447 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
448 I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
449 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
450
451 return 0;
452}
453
454static int cnl_init_workarounds(struct intel_engine_cs *engine)
455{
456 struct drm_i915_private *dev_priv = engine->i915;
457 int ret;
458
459 /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
460 if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
461 I915_WRITE(GAMT_CHKN_BIT_REG,
462 I915_READ(GAMT_CHKN_BIT_REG) |
463 GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
464
465 /* WaForceContextSaveRestoreNonCoherent:cnl */
466 WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
467 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
468
469 /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
470 if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
471 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
472
473 /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
474 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
475 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
476
477 /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
478 if (IS_CNL_REVID(dev_priv, 0, CNL_REVID_B0))
479 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
480 GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
481
482 /* WaInPlaceDecompressionHang:cnl */
483 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
484 I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
485 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
486
487 /* WaPushConstantDereferenceHoldDisable:cnl */
488 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
489
490 /* FtrEnableFastAnisoL1BankingFix: cnl */
491 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
492
493 /* WaDisable3DMidCmdPreemption:cnl */
494 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
495
496 /* WaDisableGPGPUMidCmdPreemption:cnl */
497 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
498 GEN9_PREEMPT_GPGPU_LEVEL_MASK,
499 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
500
501 /* WaEnablePreemptionGranularityControlByUMD:cnl */
502 I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
503 _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
504 ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
505 if (ret)
506 return ret;
507
508 /* WaDisableEarlyEOT:cnl */
509 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
510
511 return 0;
512}
513
514static int kbl_init_workarounds(struct intel_engine_cs *engine)
515{
516 struct drm_i915_private *dev_priv = engine->i915;
517 int ret;
518
519 ret = gen9_init_workarounds(engine);
520 if (ret)
521 return ret;
522
523 /* WaEnableGapsTsvCreditFix:kbl */
524 I915_WRITE(GEN8_GARBCNTL,
525 I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE);
526
527 /* WaDisableDynamicCreditSharing:kbl */
528 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
529 I915_WRITE(GAMT_CHKN_BIT_REG,
530 I915_READ(GAMT_CHKN_BIT_REG) |
531 GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
532
533 /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */
534 if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0))
535 WA_SET_BIT_MASKED(HDC_CHICKEN0,
536 HDC_FENCE_DEST_SLM_DISABLE);
537
538 /* WaToEnableHwFixForPushConstHWBug:kbl */
539 if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER))
540 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
541 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
542
543 /* WaDisableGafsUnitClkGating:kbl */
544 I915_WRITE(GEN7_UCGCTL4,
545 I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
546
547 /* WaDisableSbeCacheDispatchPortSharing:kbl */
548 WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
549 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
550
551 /* WaInPlaceDecompressionHang:kbl */
552 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
553 I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
554 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
555
556 /* WaDisableLSQCROPERFforOCL:kbl */
557 ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
558 if (ret)
559 return ret;
560
561 return 0;
562}
563
564static int glk_init_workarounds(struct intel_engine_cs *engine)
565{
566 struct drm_i915_private *dev_priv = engine->i915;
567 int ret;
568
569 ret = gen9_init_workarounds(engine);
570 if (ret)
571 return ret;
572
573 /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
574 ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1);
575 if (ret)
576 return ret;
577
578 /* WaToEnableHwFixForPushConstHWBug:glk */
579 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
580 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
581
582 return 0;
583}
584
585static int cfl_init_workarounds(struct intel_engine_cs *engine)
586{
587 struct drm_i915_private *dev_priv = engine->i915;
588 int ret;
589
590 ret = gen9_init_workarounds(engine);
591 if (ret)
592 return ret;
593
594 /* WaEnableGapsTsvCreditFix:cfl */
595 I915_WRITE(GEN8_GARBCNTL,
596 I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE);
597
598 /* WaToEnableHwFixForPushConstHWBug:cfl */
599 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
600 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
601
602 /* WaDisableGafsUnitClkGating:cfl */
603 I915_WRITE(GEN7_UCGCTL4,
604 I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
605
606 /* WaDisableSbeCacheDispatchPortSharing:cfl */
607 WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
608 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
609
610 /* WaInPlaceDecompressionHang:cfl */
611 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
612 I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
613 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
614
615 return 0;
616}
617
618int init_workarounds_ring(struct intel_engine_cs *engine)
619{
620 struct drm_i915_private *dev_priv = engine->i915;
621 int err;
622
623 if (GEM_WARN_ON(engine->id != RCS))
624 return -EINVAL;
625
626 dev_priv->workarounds.count = 0;
627 dev_priv->workarounds.hw_whitelist_count[engine->id] = 0;
628
629 if (IS_BROADWELL(dev_priv))
630 err = bdw_init_workarounds(engine);
631 else if (IS_CHERRYVIEW(dev_priv))
632 err = chv_init_workarounds(engine);
633 else if (IS_SKYLAKE(dev_priv))
634 err = skl_init_workarounds(engine);
635 else if (IS_BROXTON(dev_priv))
636 err = bxt_init_workarounds(engine);
637 else if (IS_KABYLAKE(dev_priv))
638 err = kbl_init_workarounds(engine);
639 else if (IS_GEMINILAKE(dev_priv))
640 err = glk_init_workarounds(engine);
641 else if (IS_COFFEELAKE(dev_priv))
642 err = cfl_init_workarounds(engine);
643 else if (IS_CANNONLAKE(dev_priv))
644 err = cnl_init_workarounds(engine);
645 else
646 err = 0;
647 if (err)
648 return err;
649
650 DRM_DEBUG_DRIVER("%s: Number of context specific w/a: %d\n",
651 engine->name, dev_priv->workarounds.count);
652 return 0;
653}
654
655int intel_ring_workarounds_emit(struct i915_request *rq)
656{
657 struct i915_workarounds *w = &rq->i915->workarounds;
658 u32 *cs;
659 int ret, i;
660
661 if (w->count == 0)
662 return 0;
663
664 ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
665 if (ret)
666 return ret;
667
668 cs = intel_ring_begin(rq, w->count * 2 + 2);
669 if (IS_ERR(cs))
670 return PTR_ERR(cs);
671
672 *cs++ = MI_LOAD_REGISTER_IMM(w->count);
673 for (i = 0; i < w->count; i++) {
674 *cs++ = i915_mmio_reg_offset(w->reg[i].addr);
675 *cs++ = w->reg[i].value;
676 }
677 *cs++ = MI_NOOP;
678
679 intel_ring_advance(rq, cs);
680
681 ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
682 if (ret)
683 return ret;
684
685 return 0;
686}
diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/intel_workarounds.h
new file mode 100644
index 000000000000..2afea73aeeae
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_workarounds.h
@@ -0,0 +1,13 @@
1/*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright © 2014-2018 Intel Corporation
5 */
6
7#ifndef _I915_WORKAROUNDS_H_
8#define _I915_WORKAROUNDS_H_
9
10int init_workarounds_ring(struct intel_engine_cs *engine);
11int intel_ring_workarounds_emit(struct i915_request *rq);
12
13#endif