aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>2018-03-06 07:28:52 -0500
committerLionel Landwerlin <lionel.g.landwerlin@intel.com>2018-03-08 05:06:20 -0500
commit8cc7669355136f8952779e6f60053c1284d59c4d (patch)
tree0ec74efcaac56cdceafe31e48de5a81bb1225cd2
parent401d0ae326c92185f1727b0f12834197536265ae (diff)
drm/i915: store all subslice masks
Up to now, subslice mask was assumed to be uniform across slices. But starting with Cannonlake, slices can be asymmetric (for example slice0 has different number of subslices as slice1+). This change stores all subslices masks for all slices rather than having a single mask that applies to all slices. v2: Rework how we store total numbers in sseu_dev_info (Tvrtko) Fix CHV eu masks, was reading disabled as enabled (Tvrtko) Readability changes (Tvrtko) Add EU index helper (Tvrtko) v3: Turn ALIGN(v, 8) / 8 into DIV_ROUND_UP(v, BITS_PER_BYTE) (Tvrtko) Reuse sseu_eu_idx() for setting eu_mask on CHV (Tvrtko) Reformat debug prints for subslices (Tvrtko) v4: Change eu_mask helper into sseu_set_eus() (Tvrtko) v5: With Haswell reporting masks & counts, bump sseu_*_eus() functions to use u16 (Lionel) v6: Fix sseu_get_eus() for > 8 EUs per subslice (Lionel) v7: Change debugfs enabels for number of subslices per slice, will need a small igt/pm_sseu change (Lionel) Drop subslice_total field from sseu_dev_info, rely on sseu_subslice_total() to recompute the value instead (Lionel) v8: Remove unused function compute_subslice_total() (Lionel) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20180306122857.27317-2-lionel.g.landwerlin@intel.com
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c28
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c2
-rw-r--r--drivers/gpu/drm/i915/intel_device_info.c208
-rw-r--r--drivers/gpu/drm/i915/intel_device_info.h62
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c2
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h2
6 files changed, 237 insertions, 67 deletions
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index e838c765b251..229d4d605cd9 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4323,7 +4323,7 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
4323 continue; 4323 continue;
4324 4324
4325 sseu->slice_mask = BIT(0); 4325 sseu->slice_mask = BIT(0);
4326 sseu->subslice_mask |= BIT(ss); 4326 sseu->subslice_mask[0] |= BIT(ss);
4327 eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) + 4327 eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
4328 ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) + 4328 ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
4329 ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) + 4329 ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
@@ -4370,7 +4370,7 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
4370 continue; 4370 continue;
4371 4371
4372 sseu->slice_mask |= BIT(s); 4372 sseu->slice_mask |= BIT(s);
4373 sseu->subslice_mask = info->sseu.subslice_mask; 4373 sseu->subslice_mask[s] = info->sseu.subslice_mask[s];
4374 4374
4375 for (ss = 0; ss < ss_max; ss++) { 4375 for (ss = 0; ss < ss_max; ss++) {
4376 unsigned int eu_cnt; 4376 unsigned int eu_cnt;
@@ -4425,8 +4425,8 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
4425 sseu->slice_mask |= BIT(s); 4425 sseu->slice_mask |= BIT(s);
4426 4426
4427 if (IS_GEN9_BC(dev_priv)) 4427 if (IS_GEN9_BC(dev_priv))
4428 sseu->subslice_mask = 4428 sseu->subslice_mask[s] =
4429 INTEL_INFO(dev_priv)->sseu.subslice_mask; 4429 INTEL_INFO(dev_priv)->sseu.subslice_mask[s];
4430 4430
4431 for (ss = 0; ss < ss_max; ss++) { 4431 for (ss = 0; ss < ss_max; ss++) {
4432 unsigned int eu_cnt; 4432 unsigned int eu_cnt;
@@ -4436,7 +4436,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
4436 /* skip disabled subslice */ 4436 /* skip disabled subslice */
4437 continue; 4437 continue;
4438 4438
4439 sseu->subslice_mask |= BIT(ss); 4439 sseu->subslice_mask[s] |= BIT(ss);
4440 } 4440 }
4441 4441
4442 eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] & 4442 eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
@@ -4458,9 +4458,12 @@ static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
4458 sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK; 4458 sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK;
4459 4459
4460 if (sseu->slice_mask) { 4460 if (sseu->slice_mask) {
4461 sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask;
4462 sseu->eu_per_subslice = 4461 sseu->eu_per_subslice =
4463 INTEL_INFO(dev_priv)->sseu.eu_per_subslice; 4462 INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
4463 for (s = 0; s < fls(sseu->slice_mask); s++) {
4464 sseu->subslice_mask[s] =
4465 INTEL_INFO(dev_priv)->sseu.subslice_mask[s];
4466 }
4464 sseu->eu_total = sseu->eu_per_subslice * 4467 sseu->eu_total = sseu->eu_per_subslice *
4465 sseu_subslice_total(sseu); 4468 sseu_subslice_total(sseu);
4466 4469
@@ -4479,6 +4482,7 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
4479{ 4482{
4480 struct drm_i915_private *dev_priv = node_to_i915(m->private); 4483 struct drm_i915_private *dev_priv = node_to_i915(m->private);
4481 const char *type = is_available_info ? "Available" : "Enabled"; 4484 const char *type = is_available_info ? "Available" : "Enabled";
4485 int s;
4482 4486
4483 seq_printf(m, " %s Slice Mask: %04x\n", type, 4487 seq_printf(m, " %s Slice Mask: %04x\n", type,
4484 sseu->slice_mask); 4488 sseu->slice_mask);
@@ -4486,10 +4490,10 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
4486 hweight8(sseu->slice_mask)); 4490 hweight8(sseu->slice_mask));
4487 seq_printf(m, " %s Subslice Total: %u\n", type, 4491 seq_printf(m, " %s Subslice Total: %u\n", type,
4488 sseu_subslice_total(sseu)); 4492 sseu_subslice_total(sseu));
4489 seq_printf(m, " %s Subslice Mask: %04x\n", type, 4493 for (s = 0; s < fls(sseu->slice_mask); s++) {
4490 sseu->subslice_mask); 4494 seq_printf(m, " %s Slice%i subslices: %u\n", type,
4491 seq_printf(m, " %s Subslice Per Slice: %u\n", type, 4495 s, hweight8(sseu->subslice_mask[s]));
4492 hweight8(sseu->subslice_mask)); 4496 }
4493 seq_printf(m, " %s EU Total: %u\n", type, 4497 seq_printf(m, " %s EU Total: %u\n", type,
4494 sseu->eu_total); 4498 sseu->eu_total);
4495 seq_printf(m, " %s EU Per Subslice: %u\n", type, 4499 seq_printf(m, " %s EU Per Subslice: %u\n", type,
@@ -4523,6 +4527,10 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
4523 4527
4524 seq_puts(m, "SSEU Device Status\n"); 4528 seq_puts(m, "SSEU Device Status\n");
4525 memset(&sseu, 0, sizeof(sseu)); 4529 memset(&sseu, 0, sizeof(sseu));
4530 sseu.max_slices = INTEL_INFO(dev_priv)->sseu.max_slices;
4531 sseu.max_subslices = INTEL_INFO(dev_priv)->sseu.max_subslices;
4532 sseu.max_eus_per_subslice =
4533 INTEL_INFO(dev_priv)->sseu.max_eus_per_subslice;
4526 4534
4527 intel_runtime_pm_get(dev_priv); 4535 intel_runtime_pm_get(dev_priv);
4528 4536
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index d61b51c0bf0b..c594ff5e57d0 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -428,7 +428,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
428 return -ENODEV; 428 return -ENODEV;
429 break; 429 break;
430 case I915_PARAM_SUBSLICE_MASK: 430 case I915_PARAM_SUBSLICE_MASK:
431 value = INTEL_INFO(dev_priv)->sseu.subslice_mask; 431 value = INTEL_INFO(dev_priv)->sseu.subslice_mask[0];
432 if (!value) 432 if (!value)
433 return -ENODEV; 433 return -ENODEV;
434 break; 434 break;
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index cadc5f81ed72..b29a35d441b4 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -81,12 +81,16 @@ void intel_device_info_dump_flags(const struct intel_device_info *info,
81 81
82static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) 82static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
83{ 83{
84 int s;
85
84 drm_printf(p, "slice mask: %04x\n", sseu->slice_mask); 86 drm_printf(p, "slice mask: %04x\n", sseu->slice_mask);
85 drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask)); 87 drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask));
86 drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu)); 88 drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu));
87 drm_printf(p, "subslice mask %04x\n", sseu->subslice_mask); 89 for (s = 0; s < ARRAY_SIZE(sseu->subslice_mask); s++) {
88 drm_printf(p, "subslice per slice: %u\n", 90 drm_printf(p, "slice%d %u subslices mask=%04x\n",
89 hweight8(sseu->subslice_mask)); 91 s, hweight8(sseu->subslice_mask[s]),
92 sseu->subslice_mask[s]);
93 }
90 drm_printf(p, "EU total: %u\n", sseu->eu_total); 94 drm_printf(p, "EU total: %u\n", sseu->eu_total);
91 drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice); 95 drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
92 drm_printf(p, "has slice power gating: %s\n", 96 drm_printf(p, "has slice power gating: %s\n",
@@ -120,22 +124,76 @@ void intel_device_info_dump(const struct intel_device_info *info,
120 intel_device_info_dump_flags(info, p); 124 intel_device_info_dump_flags(info, p);
121} 125}
122 126
127static u16 compute_eu_total(const struct sseu_dev_info *sseu)
128{
129 u16 i, total = 0;
130
131 for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
132 total += hweight8(sseu->eu_mask[i]);
133
134 return total;
135}
136
123static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) 137static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
124{ 138{
125 struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; 139 struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
126 const u32 fuse2 = I915_READ(GEN8_FUSE2); 140 const u32 fuse2 = I915_READ(GEN8_FUSE2);
141 int s, ss;
142 const int eu_mask = 0xff;
143 u32 subslice_mask, eu_en;
127 144
128 sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >> 145 sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >>
129 GEN10_F2_S_ENA_SHIFT; 146 GEN10_F2_S_ENA_SHIFT;
130 sseu->subslice_mask = (1 << 4) - 1; 147 sseu->max_slices = 6;
131 sseu->subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> 148 sseu->max_subslices = 4;
132 GEN10_F2_SS_DIS_SHIFT); 149 sseu->max_eus_per_subslice = 8;
133 150
134 sseu->eu_total = hweight32(~I915_READ(GEN8_EU_DISABLE0)); 151 subslice_mask = (1 << 4) - 1;
135 sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE1)); 152 subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
136 sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE2)); 153 GEN10_F2_SS_DIS_SHIFT);
137 sseu->eu_total += hweight8(~(I915_READ(GEN10_EU_DISABLE3) & 154
138 GEN10_EU_DIS_SS_MASK)); 155 /*
156 * Slice0 can have up to 3 subslices, but there are only 2 in
157 * slice1/2.
158 */
159 sseu->subslice_mask[0] = subslice_mask;
160 for (s = 1; s < sseu->max_slices; s++)
161 sseu->subslice_mask[s] = subslice_mask & 0x3;
162
163 /* Slice0 */
164 eu_en = ~I915_READ(GEN8_EU_DISABLE0);
165 for (ss = 0; ss < sseu->max_subslices; ss++)
166 sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & eu_mask);
167 /* Slice1 */
168 sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask);
169 eu_en = ~I915_READ(GEN8_EU_DISABLE1);
170 sseu_set_eus(sseu, 1, 1, eu_en & eu_mask);
171 /* Slice2 */
172 sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask);
173 sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask);
174 /* Slice3 */
175 sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask);
176 eu_en = ~I915_READ(GEN8_EU_DISABLE2);
177 sseu_set_eus(sseu, 3, 1, eu_en & eu_mask);
178 /* Slice4 */
179 sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask);
180 sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask);
181 /* Slice5 */
182 sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask);
183 eu_en = ~I915_READ(GEN10_EU_DISABLE3);
184 sseu_set_eus(sseu, 5, 1, eu_en & eu_mask);
185
186 /* Do a second pass where we mark the subslices disabled if all their
187 * eus are off.
188 */
189 for (s = 0; s < sseu->max_slices; s++) {
190 for (ss = 0; ss < sseu->max_subslices; ss++) {
191 if (sseu_get_eus(sseu, s, ss) == 0)
192 sseu->subslice_mask[s] &= ~BIT(ss);
193 }
194 }
195
196 sseu->eu_total = compute_eu_total(sseu);
139 197
140 /* 198 /*
141 * CNL is expected to always have a uniform distribution 199 * CNL is expected to always have a uniform distribution
@@ -156,26 +214,39 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
156static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) 214static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
157{ 215{
158 struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; 216 struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
159 u32 fuse, eu_dis; 217 u32 fuse;
160 218
161 fuse = I915_READ(CHV_FUSE_GT); 219 fuse = I915_READ(CHV_FUSE_GT);
162 220
163 sseu->slice_mask = BIT(0); 221 sseu->slice_mask = BIT(0);
222 sseu->max_slices = 1;
223 sseu->max_subslices = 2;
224 sseu->max_eus_per_subslice = 8;
164 225
165 if (!(fuse & CHV_FGT_DISABLE_SS0)) { 226 if (!(fuse & CHV_FGT_DISABLE_SS0)) {
166 sseu->subslice_mask |= BIT(0); 227 u8 disabled_mask =
167 eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK | 228 ((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >>
168 CHV_FGT_EU_DIS_SS0_R1_MASK); 229 CHV_FGT_EU_DIS_SS0_R0_SHIFT) |
169 sseu->eu_total += 8 - hweight32(eu_dis); 230 (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >>
231 CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
232
233 sseu->subslice_mask[0] |= BIT(0);
234 sseu_set_eus(sseu, 0, 0, ~disabled_mask);
170 } 235 }
171 236
172 if (!(fuse & CHV_FGT_DISABLE_SS1)) { 237 if (!(fuse & CHV_FGT_DISABLE_SS1)) {
173 sseu->subslice_mask |= BIT(1); 238 u8 disabled_mask =
174 eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK | 239 ((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >>
175 CHV_FGT_EU_DIS_SS1_R1_MASK); 240 CHV_FGT_EU_DIS_SS1_R0_SHIFT) |
176 sseu->eu_total += 8 - hweight32(eu_dis); 241 (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >>
242 CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4);
243
244 sseu->subslice_mask[0] |= BIT(1);
245 sseu_set_eus(sseu, 0, 1, ~disabled_mask);
177 } 246 }
178 247
248 sseu->eu_total = compute_eu_total(sseu);
249
179 /* 250 /*
180 * CHV expected to always have a uniform distribution of EU 251 * CHV expected to always have a uniform distribution of EU
181 * across subslices. 252 * across subslices.
@@ -197,41 +268,52 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
197{ 268{
198 struct intel_device_info *info = mkwrite_device_info(dev_priv); 269 struct intel_device_info *info = mkwrite_device_info(dev_priv);
199 struct sseu_dev_info *sseu = &info->sseu; 270 struct sseu_dev_info *sseu = &info->sseu;
200 int s_max = 3, ss_max = 4, eu_max = 8;
201 int s, ss; 271 int s, ss;
202 u32 fuse2, eu_disable; 272 u32 fuse2, eu_disable, subslice_mask;
203 u8 eu_mask = 0xff; 273 const u8 eu_mask = 0xff;
204 274
205 fuse2 = I915_READ(GEN8_FUSE2); 275 fuse2 = I915_READ(GEN8_FUSE2);
206 sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; 276 sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
207 277
278 /* BXT has a single slice and at most 3 subslices. */
279 sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3;
280 sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4;
281 sseu->max_eus_per_subslice = 8;
282
208 /* 283 /*
209 * The subslice disable field is global, i.e. it applies 284 * The subslice disable field is global, i.e. it applies
210 * to each of the enabled slices. 285 * to each of the enabled slices.
211 */ 286 */
212 sseu->subslice_mask = (1 << ss_max) - 1; 287 subslice_mask = (1 << sseu->max_subslices) - 1;
213 sseu->subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> 288 subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
214 GEN9_F2_SS_DIS_SHIFT); 289 GEN9_F2_SS_DIS_SHIFT);
215 290
216 /* 291 /*
217 * Iterate through enabled slices and subslices to 292 * Iterate through enabled slices and subslices to
218 * count the total enabled EU. 293 * count the total enabled EU.
219 */ 294 */
220 for (s = 0; s < s_max; s++) { 295 for (s = 0; s < sseu->max_slices; s++) {
221 if (!(sseu->slice_mask & BIT(s))) 296 if (!(sseu->slice_mask & BIT(s)))
222 /* skip disabled slice */ 297 /* skip disabled slice */
223 continue; 298 continue;
224 299
300 sseu->subslice_mask[s] = subslice_mask;
301
225 eu_disable = I915_READ(GEN9_EU_DISABLE(s)); 302 eu_disable = I915_READ(GEN9_EU_DISABLE(s));
226 for (ss = 0; ss < ss_max; ss++) { 303 for (ss = 0; ss < sseu->max_subslices; ss++) {
227 int eu_per_ss; 304 int eu_per_ss;
305 u8 eu_disabled_mask;
228 306
229 if (!(sseu->subslice_mask & BIT(ss))) 307 if (!(sseu->subslice_mask[s] & BIT(ss)))
230 /* skip disabled subslice */ 308 /* skip disabled subslice */
231 continue; 309 continue;
232 310
233 eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) & 311 eu_disabled_mask = (eu_disable >> (ss*8)) & eu_mask;
234 eu_mask); 312
313 sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
314
315 eu_per_ss = sseu->max_eus_per_subslice -
316 hweight8(eu_disabled_mask);
235 317
236 /* 318 /*
237 * Record which subslice(s) has(have) 7 EUs. we 319 * Record which subslice(s) has(have) 7 EUs. we
@@ -240,11 +322,11 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
240 */ 322 */
241 if (eu_per_ss == 7) 323 if (eu_per_ss == 7)
242 sseu->subslice_7eu[s] |= BIT(ss); 324 sseu->subslice_7eu[s] |= BIT(ss);
243
244 sseu->eu_total += eu_per_ss;
245 } 325 }
246 } 326 }
247 327
328 sseu->eu_total = compute_eu_total(sseu);
329
248 /* 330 /*
249 * SKL is expected to always have a uniform distribution 331 * SKL is expected to always have a uniform distribution
250 * of EU across subslices with the exception that any one 332 * of EU across subslices with the exception that any one
@@ -270,8 +352,8 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
270 sseu->has_eu_pg = sseu->eu_per_subslice > 2; 352 sseu->has_eu_pg = sseu->eu_per_subslice > 2;
271 353
272 if (IS_GEN9_LP(dev_priv)) { 354 if (IS_GEN9_LP(dev_priv)) {
273#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask & BIT(ss))) 355#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask[0] & BIT(ss)))
274 info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3; 356 info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3;
275 357
276 sseu->min_eu_in_pool = 0; 358 sseu->min_eu_in_pool = 0;
277 if (info->has_pooled_eu) { 359 if (info->has_pooled_eu) {
@@ -289,19 +371,22 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
289static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) 371static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
290{ 372{
291 struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; 373 struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
292 const int s_max = 3, ss_max = 3, eu_max = 8;
293 int s, ss; 374 int s, ss;
294 u32 fuse2, eu_disable[3]; /* s_max */ 375 u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
295 376
296 fuse2 = I915_READ(GEN8_FUSE2); 377 fuse2 = I915_READ(GEN8_FUSE2);
297 sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; 378 sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
379 sseu->max_slices = 3;
380 sseu->max_subslices = 3;
381 sseu->max_eus_per_subslice = 8;
382
298 /* 383 /*
299 * The subslice disable field is global, i.e. it applies 384 * The subslice disable field is global, i.e. it applies
300 * to each of the enabled slices. 385 * to each of the enabled slices.
301 */ 386 */
302 sseu->subslice_mask = GENMASK(ss_max - 1, 0); 387 subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
303 sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> 388 subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
304 GEN8_F2_SS_DIS_SHIFT); 389 GEN8_F2_SS_DIS_SHIFT);
305 390
306 eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & GEN8_EU_DIS0_S0_MASK; 391 eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & GEN8_EU_DIS0_S0_MASK;
307 eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> GEN8_EU_DIS0_S1_SHIFT) | 392 eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> GEN8_EU_DIS0_S1_SHIFT) |
@@ -315,30 +400,38 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
315 * Iterate through enabled slices and subslices to 400 * Iterate through enabled slices and subslices to
316 * count the total enabled EU. 401 * count the total enabled EU.
317 */ 402 */
318 for (s = 0; s < s_max; s++) { 403 for (s = 0; s < sseu->max_slices; s++) {
319 if (!(sseu->slice_mask & BIT(s))) 404 if (!(sseu->slice_mask & BIT(s)))
320 /* skip disabled slice */ 405 /* skip disabled slice */
321 continue; 406 continue;
322 407
323 for (ss = 0; ss < ss_max; ss++) { 408 sseu->subslice_mask[s] = subslice_mask;
409
410 for (ss = 0; ss < sseu->max_subslices; ss++) {
411 u8 eu_disabled_mask;
324 u32 n_disabled; 412 u32 n_disabled;
325 413
326 if (!(sseu->subslice_mask & BIT(ss))) 414 if (!(sseu->subslice_mask[ss] & BIT(ss)))
327 /* skip disabled subslice */ 415 /* skip disabled subslice */
328 continue; 416 continue;
329 417
330 n_disabled = hweight8(eu_disable[s] >> (ss * eu_max)); 418 eu_disabled_mask =
419 eu_disable[s] >> (ss * sseu->max_eus_per_subslice);
420
421 sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
422
423 n_disabled = hweight8(eu_disabled_mask);
331 424
332 /* 425 /*
333 * Record which subslices have 7 EUs. 426 * Record which subslices have 7 EUs.
334 */ 427 */
335 if (eu_max - n_disabled == 7) 428 if (sseu->max_eus_per_subslice - n_disabled == 7)
336 sseu->subslice_7eu[s] |= 1 << ss; 429 sseu->subslice_7eu[s] |= 1 << ss;
337
338 sseu->eu_total += eu_max - n_disabled;
339 } 430 }
340 } 431 }
341 432
433 sseu->eu_total = compute_eu_total(sseu);
434
342 /* 435 /*
343 * BDW is expected to always have a uniform distribution of EU across 436 * BDW is expected to always have a uniform distribution of EU across
344 * subslices with the exception that any one EU in any one subslice may 437 * subslices with the exception that any one EU in any one subslice may
@@ -362,6 +455,7 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv)
362 struct intel_device_info *info = mkwrite_device_info(dev_priv); 455 struct intel_device_info *info = mkwrite_device_info(dev_priv);
363 struct sseu_dev_info *sseu = &info->sseu; 456 struct sseu_dev_info *sseu = &info->sseu;
364 u32 fuse1; 457 u32 fuse1;
458 int s, ss;
365 459
366 /* 460 /*
367 * There isn't a register to tell us how many slices/subslices. We 461 * There isn't a register to tell us how many slices/subslices. We
@@ -373,18 +467,22 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv)
373 /* fall through */ 467 /* fall through */
374 case 1: 468 case 1:
375 sseu->slice_mask = BIT(0); 469 sseu->slice_mask = BIT(0);
376 sseu->subslice_mask = BIT(0); 470 sseu->subslice_mask[0] = BIT(0);
377 break; 471 break;
378 case 2: 472 case 2:
379 sseu->slice_mask = BIT(0); 473 sseu->slice_mask = BIT(0);
380 sseu->subslice_mask = BIT(0) | BIT(1); 474 sseu->subslice_mask[0] = BIT(0) | BIT(1);
381 break; 475 break;
382 case 3: 476 case 3:
383 sseu->slice_mask = BIT(0) | BIT(1); 477 sseu->slice_mask = BIT(0) | BIT(1);
384 sseu->subslice_mask = BIT(0) | BIT(1); 478 sseu->subslice_mask[0] = BIT(0) | BIT(1);
479 sseu->subslice_mask[1] = BIT(0) | BIT(1);
385 break; 480 break;
386 } 481 }
387 482
483 sseu->max_slices = hweight8(sseu->slice_mask);
484 sseu->max_subslices = hweight8(sseu->subslice_mask[0]);
485
388 fuse1 = I915_READ(HSW_PAVP_FUSE1); 486 fuse1 = I915_READ(HSW_PAVP_FUSE1);
389 switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) { 487 switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) {
390 default: 488 default:
@@ -401,8 +499,16 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv)
401 sseu->eu_per_subslice = 6; 499 sseu->eu_per_subslice = 6;
402 break; 500 break;
403 } 501 }
502 sseu->max_eus_per_subslice = sseu->eu_per_subslice;
503
504 for (s = 0; s < sseu->max_slices; s++) {
505 for (ss = 0; ss < sseu->max_subslices; ss++) {
506 sseu_set_eus(sseu, s, ss,
507 (1UL << sseu->eu_per_subslice) - 1);
508 }
509 }
404 510
405 sseu->eu_total = sseu_subslice_total(sseu) * sseu->eu_per_subslice; 511 sseu->eu_total = compute_eu_total(sseu);
406 512
407 /* No powergating for you. */ 513 /* No powergating for you. */
408 sseu->has_slice_pg = 0; 514 sseu->has_slice_pg = 0;
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 7cc5a8e649b5..4bc7b06a789e 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -113,10 +113,13 @@ enum intel_platform {
113 func(supports_tv); \ 113 func(supports_tv); \
114 func(has_ipc); 114 func(has_ipc);
115 115
116#define GEN_MAX_SLICES (6) /* CNL upper bound */
117#define GEN_MAX_SUBSLICES (7)
118
116struct sseu_dev_info { 119struct sseu_dev_info {
117 u8 slice_mask; 120 u8 slice_mask;
118 u8 subslice_mask; 121 u8 subslice_mask[GEN_MAX_SUBSLICES];
119 u8 eu_total; 122 u16 eu_total;
120 u8 eu_per_subslice; 123 u8 eu_per_subslice;
121 u8 min_eu_in_pool; 124 u8 min_eu_in_pool;
122 /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ 125 /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
@@ -124,6 +127,17 @@ struct sseu_dev_info {
124 u8 has_slice_pg:1; 127 u8 has_slice_pg:1;
125 u8 has_subslice_pg:1; 128 u8 has_subslice_pg:1;
126 u8 has_eu_pg:1; 129 u8 has_eu_pg:1;
130
131 /* Topology fields */
132 u8 max_slices;
133 u8 max_subslices;
134 u8 max_eus_per_subslice;
135
136 /* We don't have more than 8 eus per subslice at the moment and as we
137 * store eus enabled using bits, no need to multiply by eus per
138 * subslice.
139 */
140 u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
127}; 141};
128 142
129typedef u8 intel_ring_mask_t; 143typedef u8 intel_ring_mask_t;
@@ -176,7 +190,49 @@ struct intel_driver_caps {
176 190
177static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu) 191static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu)
178{ 192{
179 return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask); 193 unsigned int i, total = 0;
194
195 for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
196 total += hweight8(sseu->subslice_mask[i]);
197
198 return total;
199}
200
201static inline int sseu_eu_idx(const struct sseu_dev_info *sseu,
202 int slice, int subslice)
203{
204 int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice,
205 BITS_PER_BYTE);
206 int slice_stride = sseu->max_subslices * subslice_stride;
207
208 return slice * slice_stride + subslice * subslice_stride;
209}
210
211static inline u16 sseu_get_eus(const struct sseu_dev_info *sseu,
212 int slice, int subslice)
213{
214 int i, offset = sseu_eu_idx(sseu, slice, subslice);
215 u16 eu_mask = 0;
216
217 for (i = 0;
218 i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) {
219 eu_mask |= ((u16) sseu->eu_mask[offset + i]) <<
220 (i * BITS_PER_BYTE);
221 }
222
223 return eu_mask;
224}
225
226static inline void sseu_set_eus(struct sseu_dev_info *sseu,
227 int slice, int subslice, u16 eu_mask)
228{
229 int i, offset = sseu_eu_idx(sseu, slice, subslice);
230
231 for (i = 0;
232 i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) {
233 sseu->eu_mask[offset + i] =
234 (eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
235 }
180} 236}
181 237
182const char *intel_platform_name(enum intel_platform platform); 238const char *intel_platform_name(enum intel_platform platform);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 999d5f2539d4..d763dfb51190 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2248,7 +2248,7 @@ make_rpcs(struct drm_i915_private *dev_priv)
2248 2248
2249 if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) { 2249 if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) {
2250 rpcs |= GEN8_RPCS_SS_CNT_ENABLE; 2250 rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
2251 rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) << 2251 rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]) <<
2252 GEN8_RPCS_SS_CNT_SHIFT; 2252 GEN8_RPCS_SS_CNT_SHIFT;
2253 rpcs |= GEN8_RPCS_ENABLE; 2253 rpcs |= GEN8_RPCS_ENABLE;
2254 } 2254 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index d8ddea0174ca..0320c2c4cfba 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -92,7 +92,7 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
92 92
93#define instdone_subslice_mask(dev_priv__) \ 93#define instdone_subslice_mask(dev_priv__) \
94 (INTEL_GEN(dev_priv__) == 7 ? \ 94 (INTEL_GEN(dev_priv__) == 7 ? \
95 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask) 95 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask[0])
96 96
97#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ 97#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
98 for ((slice__) = 0, (subslice__) = 0; \ 98 for ((slice__) = 0, (subslice__) = 0; \