diff options
author | Damien Lespiau <damien.lespiau@intel.com> | 2015-02-14 13:30:29 -0500 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2015-02-23 18:07:05 -0500 |
commit | b76687910693b1f6c32a3251a8291d67363bba34 (patch) | |
tree | 6452da876111dc6dbd93be1913ec0df3186e3f5b /drivers | |
parent | cb0a08c1ed7daa16d13876e3e1b8787d95b25b0e (diff) |
drm/i915/skl: Tune IZ hashing when subslices are unbalanced
When one EU is disabled in a particular subslice, we can tune how the
work is spread between subslices to improve EU utilization.
v2: - Use a bitfield to record which subslice(s) has(have) 7 EUs. That
will also make the machinery work if several sublices have 7 EUs.
(Jeff Mcgee)
- Only apply the different hashing algorithm if the slice is
effectively unbalanced by checking there's a single subslice with
7 EUs. (Jeff Mcgee)
v3: Fix typo in comment (Jeff Mcgee)
Issue: VIZ-3845
Cc: Jeff Mcgee <jeff.mcgee@intel.com>
Reviewed-by: Jeff Mcgee <jeff.mcgee@intel.com>
Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/drm/i915/i915_dma.c | 17 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_drv.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_reg.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.c | 45 |
4 files changed, 62 insertions, 4 deletions
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 9a365b40b50e..f9992ca11d10 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c | |||
@@ -650,13 +650,24 @@ static void intel_device_info_runtime_init(struct drm_device *dev) | |||
650 | continue; | 650 | continue; |
651 | 651 | ||
652 | for (ss = 0; ss < ss_max; ss++) { | 652 | for (ss = 0; ss < ss_max; ss++) { |
653 | u32 n_disabled; | ||
654 | |||
653 | if (ss_disable & (0x1 << ss)) | 655 | if (ss_disable & (0x1 << ss)) |
654 | /* skip disabled subslice */ | 656 | /* skip disabled subslice */ |
655 | continue; | 657 | continue; |
656 | 658 | ||
657 | info->eu_total += eu_max - | 659 | n_disabled = hweight8(eu_disable[s] >> |
658 | hweight8(eu_disable[s] >> | 660 | (ss * eu_max)); |
659 | (ss * eu_max)); | 661 | |
662 | /* | ||
663 | * Record which subslice(s) has(have) 7 EUs. we | ||
664 | * can tune the hash used to spread work among | ||
665 | * subslices if they are unbalanced. | ||
666 | */ | ||
667 | if (eu_max - n_disabled == 7) | ||
668 | info->subslice_7eu[s] |= 1 << ss; | ||
669 | |||
670 | info->eu_total += eu_max - n_disabled; | ||
660 | } | 671 | } |
661 | } | 672 | } |
662 | 673 | ||
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 61d41abde2e9..4280d0b292da 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h | |||
@@ -700,6 +700,8 @@ struct intel_device_info { | |||
700 | u8 subslice_per_slice; | 700 | u8 subslice_per_slice; |
701 | u8 eu_total; | 701 | u8 eu_total; |
702 | u8 eu_per_subslice; | 702 | u8 eu_per_subslice; |
703 | /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ | ||
704 | u8 subslice_7eu[3]; | ||
703 | u8 has_slice_pg:1; | 705 | u8 has_slice_pg:1; |
704 | u8 has_subslice_pg:1; | 706 | u8 has_subslice_pg:1; |
705 | u8 has_eu_pg:1; | 707 | u8 has_eu_pg:1; |
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 5fab90c84c5d..c2124119692d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h | |||
@@ -1351,6 +1351,8 @@ enum skl_disp_power_wells { | |||
1351 | #define GEN6_WIZ_HASHING_16x4 GEN6_WIZ_HASHING(1, 0) | 1351 | #define GEN6_WIZ_HASHING_16x4 GEN6_WIZ_HASHING(1, 0) |
1352 | #define GEN6_WIZ_HASHING_MASK GEN6_WIZ_HASHING(1, 1) | 1352 | #define GEN6_WIZ_HASHING_MASK GEN6_WIZ_HASHING(1, 1) |
1353 | #define GEN6_TD_FOUR_ROW_DISPATCH_DISABLE (1 << 5) | 1353 | #define GEN6_TD_FOUR_ROW_DISPATCH_DISABLE (1 << 5) |
1354 | #define GEN9_IZ_HASHING_MASK(slice) (0x3 << (slice * 2)) | ||
1355 | #define GEN9_IZ_HASHING(slice, val) ((val) << (slice * 2)) | ||
1354 | 1356 | ||
1355 | #define GFX_MODE 0x02520 | 1357 | #define GFX_MODE 0x02520 |
1356 | #define GFX_MODE_GEN7 0x0229c | 1358 | #define GFX_MODE_GEN7 0x0229c |
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 4570fe172b79..665985d5fcf4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c | |||
@@ -1002,6 +1002,49 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) | |||
1002 | return 0; | 1002 | return 0; |
1003 | } | 1003 | } |
1004 | 1004 | ||
1005 | static int skl_tune_iz_hashing(struct intel_engine_cs *ring) | ||
1006 | { | ||
1007 | struct drm_device *dev = ring->dev; | ||
1008 | struct drm_i915_private *dev_priv = dev->dev_private; | ||
1009 | u8 vals[3] = { 0, 0, 0 }; | ||
1010 | unsigned int i; | ||
1011 | |||
1012 | for (i = 0; i < 3; i++) { | ||
1013 | u8 ss; | ||
1014 | |||
1015 | /* | ||
1016 | * Only consider slices where one, and only one, subslice has 7 | ||
1017 | * EUs | ||
1018 | */ | ||
1019 | if (hweight8(dev_priv->info.subslice_7eu[i]) != 1) | ||
1020 | continue; | ||
1021 | |||
1022 | /* | ||
1023 | * subslice_7eu[i] != 0 (because of the check above) and | ||
1024 | * ss_max == 4 (maximum number of subslices possible per slice) | ||
1025 | * | ||
1026 | * -> 0 <= ss <= 3; | ||
1027 | */ | ||
1028 | ss = ffs(dev_priv->info.subslice_7eu[i]) - 1; | ||
1029 | vals[i] = 3 - ss; | ||
1030 | } | ||
1031 | |||
1032 | if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) | ||
1033 | return 0; | ||
1034 | |||
1035 | /* Tune IZ hashing. See intel_device_info_runtime_init() */ | ||
1036 | WA_SET_FIELD_MASKED(GEN7_GT_MODE, | ||
1037 | GEN9_IZ_HASHING_MASK(2) | | ||
1038 | GEN9_IZ_HASHING_MASK(1) | | ||
1039 | GEN9_IZ_HASHING_MASK(0), | ||
1040 | GEN9_IZ_HASHING(2, vals[2]) | | ||
1041 | GEN9_IZ_HASHING(1, vals[1]) | | ||
1042 | GEN9_IZ_HASHING(0, vals[0])); | ||
1043 | |||
1044 | return 0; | ||
1045 | } | ||
1046 | |||
1047 | |||
1005 | static int skl_init_workarounds(struct intel_engine_cs *ring) | 1048 | static int skl_init_workarounds(struct intel_engine_cs *ring) |
1006 | { | 1049 | { |
1007 | struct drm_device *dev = ring->dev; | 1050 | struct drm_device *dev = ring->dev; |
@@ -1014,7 +1057,7 @@ static int skl_init_workarounds(struct intel_engine_cs *ring) | |||
1014 | WA_SET_BIT_MASKED(HIZ_CHICKEN, | 1057 | WA_SET_BIT_MASKED(HIZ_CHICKEN, |
1015 | BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE); | 1058 | BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE); |
1016 | 1059 | ||
1017 | return 0; | 1060 | return skl_tune_iz_hashing(ring); |
1018 | } | 1061 | } |
1019 | 1062 | ||
1020 | int init_workarounds_ring(struct intel_engine_cs *ring) | 1063 | int init_workarounds_ring(struct intel_engine_cs *ring) |