aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTvrtko Ursulin <tvrtko.ursulin@intel.com>2018-09-03 07:30:07 -0400
committerTvrtko Ursulin <tvrtko.ursulin@intel.com>2018-09-04 09:49:46 -0400
commitb212f0a470eeb62a8eaa95f51b3cdbc457f687a8 (patch)
tree9af7a428631b794bce912a62758aa8f940d6d9cf
parent06348d3086a3b34f2db6c7692b4327fb7fc0b6c7 (diff)
drm/i915/icl: Fix context RPCS programming
There are two issues with the current RPCS programming for Icelake: Expansion of the slice count bitfield has been missed, as well as the required programming workaround for the subslice count bitfield size limitation. 1) Bitfield width for configuring the active slice count has grown so we need to program the GEN8_R_PWR_CLK_STATE accordingly. Current code was always requesting eight times the number of slices (due writing to a bitfield starting three bits higher than it should). These requests were luckily a) capped by the hardware to the available number of slices, and b) we haven't yet exported the code to ask for reduced slice configurations. Due both of the above there was no impact from this incorrect programming but we should still fix it. 2) Due subslice count bitfield being only three bits wide and furthermore capped to a maximum documented value of four, special programming workaround is needed to enable more than four subslices. With this programming driver has to consider the GT configuration as 2x4x8, while the hardware internally translates this to 1x8x8. A limitation stemming from this is that either a subslice count between one and four can be selected, or a subslice count equaling the total number of subslices in all selected slices. In other words, odd subslice counts greater than four are impossible, as are odd subslice counts greater than a single slice subslice count. This also had no impact in the current code base due breakage from 1) always reqesting more than one slice. While fixing this we also add some asserts to flag up any future bitfield overflows. v2: * Use a local in all branches for clarity. (Lionel) Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Bspec: 12247 Reported-by: tony.ye@intel.com Suggested-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: tony.ye@intel.com Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180903113007.2643-1-tvrtko.ursulin@linux.intel.com
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h2
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c87
2 files changed, 76 insertions, 13 deletions
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f2321785cbd6..09bc8e730ee1 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -344,6 +344,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
344#define GEN8_RPCS_S_CNT_ENABLE (1 << 18) 344#define GEN8_RPCS_S_CNT_ENABLE (1 << 18)
345#define GEN8_RPCS_S_CNT_SHIFT 15 345#define GEN8_RPCS_S_CNT_SHIFT 15
346#define GEN8_RPCS_S_CNT_MASK (0x7 << GEN8_RPCS_S_CNT_SHIFT) 346#define GEN8_RPCS_S_CNT_MASK (0x7 << GEN8_RPCS_S_CNT_SHIFT)
347#define GEN11_RPCS_S_CNT_SHIFT 12
348#define GEN11_RPCS_S_CNT_MASK (0x3f << GEN11_RPCS_S_CNT_SHIFT)
347#define GEN8_RPCS_SS_CNT_ENABLE (1 << 11) 349#define GEN8_RPCS_SS_CNT_ENABLE (1 << 11)
348#define GEN8_RPCS_SS_CNT_SHIFT 8 350#define GEN8_RPCS_SS_CNT_SHIFT 8
349#define GEN8_RPCS_SS_CNT_MASK (0x7 << GEN8_RPCS_SS_CNT_SHIFT) 351#define GEN8_RPCS_SS_CNT_MASK (0x7 << GEN8_RPCS_SS_CNT_SHIFT)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index f8ceb9c99dd6..def467c2451b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2480,6 +2480,9 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine)
2480static u32 2480static u32
2481make_rpcs(struct drm_i915_private *dev_priv) 2481make_rpcs(struct drm_i915_private *dev_priv)
2482{ 2482{
2483 bool subslice_pg = INTEL_INFO(dev_priv)->sseu.has_subslice_pg;
2484 u8 slices = hweight8(INTEL_INFO(dev_priv)->sseu.slice_mask);
2485 u8 subslices = hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]);
2483 u32 rpcs = 0; 2486 u32 rpcs = 0;
2484 2487
2485 /* 2488 /*
@@ -2490,30 +2493,88 @@ make_rpcs(struct drm_i915_private *dev_priv)
2490 return 0; 2493 return 0;
2491 2494
2492 /* 2495 /*
2496 * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
2497 * wide and Icelake has up to eight subslices, specfial programming is
2498 * needed in order to correctly enable all subslices.
2499 *
2500 * According to documentation software must consider the configuration
2501 * as 2x4x8 and hardware will translate this to 1x8x8.
2502 *
2503 * Furthemore, even though SScount is three bits, maximum documented
2504 * value for it is four. From this some rules/restrictions follow:
2505 *
2506 * 1.
2507 * If enabled subslice count is greater than four, two whole slices must
2508 * be enabled instead.
2509 *
2510 * 2.
2511 * When more than one slice is enabled, hardware ignores the subslice
2512 * count altogether.
2513 *
2514 * From these restrictions it follows that it is not possible to enable
2515 * a count of subslices between the SScount maximum of four restriction,
2516 * and the maximum available number on a particular SKU. Either all
2517 * subslices are enabled, or a count between one and four on the first
2518 * slice.
2519 */
2520 if (IS_GEN11(dev_priv) && slices == 1 && subslices >= 4) {
2521 GEM_BUG_ON(subslices & 1);
2522
2523 subslice_pg = false;
2524 slices *= 2;
2525 }
2526
2527 /*
2493 * Starting in Gen9, render power gating can leave 2528 * Starting in Gen9, render power gating can leave
2494 * slice/subslice/EU in a partially enabled state. We 2529 * slice/subslice/EU in a partially enabled state. We
2495 * must make an explicit request through RPCS for full 2530 * must make an explicit request through RPCS for full
2496 * enablement. 2531 * enablement.
2497 */ 2532 */
2498 if (INTEL_INFO(dev_priv)->sseu.has_slice_pg) { 2533 if (INTEL_INFO(dev_priv)->sseu.has_slice_pg) {
2499 rpcs |= GEN8_RPCS_S_CNT_ENABLE; 2534 u32 mask, val = slices;
2500 rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.slice_mask) << 2535
2501 GEN8_RPCS_S_CNT_SHIFT; 2536 if (INTEL_GEN(dev_priv) >= 11) {
2502 rpcs |= GEN8_RPCS_ENABLE; 2537 mask = GEN11_RPCS_S_CNT_MASK;
2538 val <<= GEN11_RPCS_S_CNT_SHIFT;
2539 } else {
2540 mask = GEN8_RPCS_S_CNT_MASK;
2541 val <<= GEN8_RPCS_S_CNT_SHIFT;
2542 }
2543
2544 GEM_BUG_ON(val & ~mask);
2545 val &= mask;
2546
2547 rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val;
2503 } 2548 }
2504 2549
2505 if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) { 2550 if (subslice_pg) {
2506 rpcs |= GEN8_RPCS_SS_CNT_ENABLE; 2551 u32 val = subslices;
2507 rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]) << 2552
2508 GEN8_RPCS_SS_CNT_SHIFT; 2553 val <<= GEN8_RPCS_SS_CNT_SHIFT;
2509 rpcs |= GEN8_RPCS_ENABLE; 2554
2555 GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK);
2556 val &= GEN8_RPCS_SS_CNT_MASK;
2557
2558 rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val;
2510 } 2559 }
2511 2560
2512 if (INTEL_INFO(dev_priv)->sseu.has_eu_pg) { 2561 if (INTEL_INFO(dev_priv)->sseu.has_eu_pg) {
2513 rpcs |= INTEL_INFO(dev_priv)->sseu.eu_per_subslice << 2562 u32 val;
2514 GEN8_RPCS_EU_MIN_SHIFT; 2563
2515 rpcs |= INTEL_INFO(dev_priv)->sseu.eu_per_subslice << 2564 val = INTEL_INFO(dev_priv)->sseu.eu_per_subslice <<
2516 GEN8_RPCS_EU_MAX_SHIFT; 2565 GEN8_RPCS_EU_MIN_SHIFT;
2566 GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
2567 val &= GEN8_RPCS_EU_MIN_MASK;
2568
2569 rpcs |= val;
2570
2571 val = INTEL_INFO(dev_priv)->sseu.eu_per_subslice <<
2572 GEN8_RPCS_EU_MAX_SHIFT;
2573 GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
2574 val &= GEN8_RPCS_EU_MAX_MASK;
2575
2576 rpcs |= val;
2577
2517 rpcs |= GEN8_RPCS_ENABLE; 2578 rpcs |= GEN8_RPCS_ENABLE;
2518 } 2579 }
2519 2580