aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorDamien Lespiau <damien.lespiau@intel.com>2015-02-14 13:30:29 -0500
committerDaniel Vetter <daniel.vetter@ffwll.ch>2015-02-23 18:07:05 -0500
commitb76687910693b1f6c32a3251a8291d67363bba34 (patch)
tree6452da876111dc6dbd93be1913ec0df3186e3f5b /drivers
parentcb0a08c1ed7daa16d13876e3e1b8787d95b25b0e (diff)
drm/i915/skl: Tune IZ hashing when subslices are unbalanced
When one EU is disabled in a particular subslice, we can tune how the work is spread between subslices to improve EU utilization. v2: - Use a bitfield to record which subslice(s) has(have) 7 EUs. That will also make the machinery work if several sublices have 7 EUs. (Jeff Mcgee) - Only apply the different hashing algorithm if the slice is effectively unbalanced by checking there's a single subslice with 7 EUs. (Jeff Mcgee) v3: Fix typo in comment (Jeff Mcgee) Issue: VIZ-3845 Cc: Jeff Mcgee <jeff.mcgee@intel.com> Reviewed-by: Jeff Mcgee <jeff.mcgee@intel.com> Signed-off-by: Damien Lespiau <damien.lespiau@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c17
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h2
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h2
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c45
4 files changed, 62 insertions, 4 deletions
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 9a365b40b50e..f9992ca11d10 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -650,13 +650,24 @@ static void intel_device_info_runtime_init(struct drm_device *dev)
650 continue; 650 continue;
651 651
652 for (ss = 0; ss < ss_max; ss++) { 652 for (ss = 0; ss < ss_max; ss++) {
653 u32 n_disabled;
654
653 if (ss_disable & (0x1 << ss)) 655 if (ss_disable & (0x1 << ss))
654 /* skip disabled subslice */ 656 /* skip disabled subslice */
655 continue; 657 continue;
656 658
657 info->eu_total += eu_max - 659 n_disabled = hweight8(eu_disable[s] >>
658 hweight8(eu_disable[s] >> 660 (ss * eu_max));
659 (ss * eu_max)); 661
662 /*
663 * Record which subslice(s) has(have) 7 EUs. we
664 * can tune the hash used to spread work among
665 * subslices if they are unbalanced.
666 */
667 if (eu_max - n_disabled == 7)
668 info->subslice_7eu[s] |= 1 << ss;
669
670 info->eu_total += eu_max - n_disabled;
660 } 671 }
661 } 672 }
662 673
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 61d41abde2e9..4280d0b292da 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -700,6 +700,8 @@ struct intel_device_info {
700 u8 subslice_per_slice; 700 u8 subslice_per_slice;
701 u8 eu_total; 701 u8 eu_total;
702 u8 eu_per_subslice; 702 u8 eu_per_subslice;
703 /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
704 u8 subslice_7eu[3];
703 u8 has_slice_pg:1; 705 u8 has_slice_pg:1;
704 u8 has_subslice_pg:1; 706 u8 has_subslice_pg:1;
705 u8 has_eu_pg:1; 707 u8 has_eu_pg:1;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 5fab90c84c5d..c2124119692d 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1351,6 +1351,8 @@ enum skl_disp_power_wells {
1351#define GEN6_WIZ_HASHING_16x4 GEN6_WIZ_HASHING(1, 0) 1351#define GEN6_WIZ_HASHING_16x4 GEN6_WIZ_HASHING(1, 0)
1352#define GEN6_WIZ_HASHING_MASK GEN6_WIZ_HASHING(1, 1) 1352#define GEN6_WIZ_HASHING_MASK GEN6_WIZ_HASHING(1, 1)
1353#define GEN6_TD_FOUR_ROW_DISPATCH_DISABLE (1 << 5) 1353#define GEN6_TD_FOUR_ROW_DISPATCH_DISABLE (1 << 5)
1354#define GEN9_IZ_HASHING_MASK(slice) (0x3 << (slice * 2))
1355#define GEN9_IZ_HASHING(slice, val) ((val) << (slice * 2))
1354 1356
1355#define GFX_MODE 0x02520 1357#define GFX_MODE 0x02520
1356#define GFX_MODE_GEN7 0x0229c 1358#define GFX_MODE_GEN7 0x0229c
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 4570fe172b79..665985d5fcf4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1002,6 +1002,49 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring)
1002 return 0; 1002 return 0;
1003} 1003}
1004 1004
1005static int skl_tune_iz_hashing(struct intel_engine_cs *ring)
1006{
1007 struct drm_device *dev = ring->dev;
1008 struct drm_i915_private *dev_priv = dev->dev_private;
1009 u8 vals[3] = { 0, 0, 0 };
1010 unsigned int i;
1011
1012 for (i = 0; i < 3; i++) {
1013 u8 ss;
1014
1015 /*
1016 * Only consider slices where one, and only one, subslice has 7
1017 * EUs
1018 */
1019 if (hweight8(dev_priv->info.subslice_7eu[i]) != 1)
1020 continue;
1021
1022 /*
1023 * subslice_7eu[i] != 0 (because of the check above) and
1024 * ss_max == 4 (maximum number of subslices possible per slice)
1025 *
1026 * -> 0 <= ss <= 3;
1027 */
1028 ss = ffs(dev_priv->info.subslice_7eu[i]) - 1;
1029 vals[i] = 3 - ss;
1030 }
1031
1032 if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
1033 return 0;
1034
1035 /* Tune IZ hashing. See intel_device_info_runtime_init() */
1036 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
1037 GEN9_IZ_HASHING_MASK(2) |
1038 GEN9_IZ_HASHING_MASK(1) |
1039 GEN9_IZ_HASHING_MASK(0),
1040 GEN9_IZ_HASHING(2, vals[2]) |
1041 GEN9_IZ_HASHING(1, vals[1]) |
1042 GEN9_IZ_HASHING(0, vals[0]));
1043
1044 return 0;
1045}
1046
1047
1005static int skl_init_workarounds(struct intel_engine_cs *ring) 1048static int skl_init_workarounds(struct intel_engine_cs *ring)
1006{ 1049{
1007 struct drm_device *dev = ring->dev; 1050 struct drm_device *dev = ring->dev;
@@ -1014,7 +1057,7 @@ static int skl_init_workarounds(struct intel_engine_cs *ring)
1014 WA_SET_BIT_MASKED(HIZ_CHICKEN, 1057 WA_SET_BIT_MASKED(HIZ_CHICKEN,
1015 BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE); 1058 BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
1016 1059
1017 return 0; 1060 return skl_tune_iz_hashing(ring);
1018} 1061}
1019 1062
1020int init_workarounds_ring(struct intel_engine_cs *ring) 1063int init_workarounds_ring(struct intel_engine_cs *ring)