aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2017-03-13 13:06:17 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2017-04-18 01:11:46 -0400
commit82dafcb93b0f7cd7563cbe6460a20b090a0d62d7 (patch)
tree83bc1b5f6202ffef05d75fa284d79a377afb8531
parent954ce087072cceb25a9106ba85d98e3219e4af7a (diff)
drm/i915: Stop using RP_DOWN_EI on Baytrail
commit 8f68d591d4765b2e1ce9d916ac7bc5583285c4ad upstream. On Baytrail, we manually calculate busyness over the evaluation interval to avoid issues with miscaluations with RC6 enabled. However, it turns out that the DOWN_EI interrupt generator is completely bust - it operates in two modes, continuous or never. Neither of which are conducive to good behaviour. Stop unmask the DOWN_EI interrupt and just compute everything from the UP_EI which does seem to correspond to the desired interval. v2: Fixup gen6_rps_pm_mask() as well v3: Inline vlv_c0_above() to combine the now identical elapsed calculation for up/down and simplify the threshold testing Fixes: 43cf3bf084ba ("drm/i915: Improved w/a for rps on Baytrail") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170309211232.28878-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170313170617.31564-1-chris@chris-wilson.co.uk (cherry picked from commit e0e8c7cb6eb68e9256de2d8cbeb481d3701c05ac) Signed-off-by: Jani Nikula <jani.nikula@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h2
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c73
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c5
3 files changed, 32 insertions, 48 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 107146c82776..e0d72457b23c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1225,7 +1225,7 @@ struct intel_gen6_power_mgmt {
1225 unsigned boosts; 1225 unsigned boosts;
1226 1226
1227 /* manual wa residency calculations */ 1227 /* manual wa residency calculations */
1228 struct intel_rps_ei up_ei, down_ei; 1228 struct intel_rps_ei ei;
1229 1229
1230 /* 1230 /*
1231 * Protects RPS/RC6 register access and PCU communication. 1231 * Protects RPS/RC6 register access and PCU communication.
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index c0c336286129..02908e37c228 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -990,68 +990,51 @@ static void vlv_c0_read(struct drm_i915_private *dev_priv,
990 ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT); 990 ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
991} 991}
992 992
993static bool vlv_c0_above(struct drm_i915_private *dev_priv,
994 const struct intel_rps_ei *old,
995 const struct intel_rps_ei *now,
996 int threshold)
997{
998 u64 time, c0;
999 unsigned int mul = 100;
1000
1001 if (old->cz_clock == 0)
1002 return false;
1003
1004 if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
1005 mul <<= 8;
1006
1007 time = now->cz_clock - old->cz_clock;
1008 time *= threshold * dev_priv->czclk_freq;
1009
1010 /* Workload can be split between render + media, e.g. SwapBuffers
1011 * being blitted in X after being rendered in mesa. To account for
1012 * this we need to combine both engines into our activity counter.
1013 */
1014 c0 = now->render_c0 - old->render_c0;
1015 c0 += now->media_c0 - old->media_c0;
1016 c0 *= mul * VLV_CZ_CLOCK_TO_MILLI_SEC;
1017
1018 return c0 >= time;
1019}
1020
1021void gen6_rps_reset_ei(struct drm_i915_private *dev_priv) 993void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
1022{ 994{
1023 vlv_c0_read(dev_priv, &dev_priv->rps.down_ei); 995 memset(&dev_priv->rps.ei, 0, sizeof(dev_priv->rps.ei));
1024 dev_priv->rps.up_ei = dev_priv->rps.down_ei;
1025} 996}
1026 997
1027static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) 998static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
1028{ 999{
1000 const struct intel_rps_ei *prev = &dev_priv->rps.ei;
1029 struct intel_rps_ei now; 1001 struct intel_rps_ei now;
1030 u32 events = 0; 1002 u32 events = 0;
1031 1003
1032 if ((pm_iir & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) == 0) 1004 if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
1033 return 0; 1005 return 0;
1034 1006
1035 vlv_c0_read(dev_priv, &now); 1007 vlv_c0_read(dev_priv, &now);
1036 if (now.cz_clock == 0) 1008 if (now.cz_clock == 0)
1037 return 0; 1009 return 0;
1038 1010
1039 if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) { 1011 if (prev->cz_clock) {
1040 if (!vlv_c0_above(dev_priv, 1012 u64 time, c0;
1041 &dev_priv->rps.down_ei, &now, 1013 unsigned int mul;
1042 dev_priv->rps.down_threshold))
1043 events |= GEN6_PM_RP_DOWN_THRESHOLD;
1044 dev_priv->rps.down_ei = now;
1045 }
1046 1014
1047 if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) { 1015 mul = VLV_CZ_CLOCK_TO_MILLI_SEC * 100; /* scale to threshold% */
1048 if (vlv_c0_above(dev_priv, 1016 if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
1049 &dev_priv->rps.up_ei, &now, 1017 mul <<= 8;
1050 dev_priv->rps.up_threshold)) 1018
1051 events |= GEN6_PM_RP_UP_THRESHOLD; 1019 time = now.cz_clock - prev->cz_clock;
1052 dev_priv->rps.up_ei = now; 1020 time *= dev_priv->czclk_freq;
1021
1022 /* Workload can be split between render + media,
1023 * e.g. SwapBuffers being blitted in X after being rendered in
1024 * mesa. To account for this we need to combine both engines
1025 * into our activity counter.
1026 */
1027 c0 = now.render_c0 - prev->render_c0;
1028 c0 += now.media_c0 - prev->media_c0;
1029 c0 *= mul;
1030
1031 if (c0 > time * dev_priv->rps.up_threshold)
1032 events = GEN6_PM_RP_UP_THRESHOLD;
1033 else if (c0 < time * dev_priv->rps.down_threshold)
1034 events = GEN6_PM_RP_DOWN_THRESHOLD;
1053 } 1035 }
1054 1036
1037 dev_priv->rps.ei = now;
1055 return events; 1038 return events;
1056} 1039}
1057 1040
@@ -4490,7 +4473,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
4490 /* Let's track the enabled rps events */ 4473 /* Let's track the enabled rps events */
4491 if (IS_VALLEYVIEW(dev_priv)) 4474 if (IS_VALLEYVIEW(dev_priv))
4492 /* WaGsvRC0ResidencyMethod:vlv */ 4475 /* WaGsvRC0ResidencyMethod:vlv */
4493 dev_priv->pm_rps_events = GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED; 4476 dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
4494 else 4477 else
4495 dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS; 4478 dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
4496 4479
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index e590d0f960f8..2c6d59d4b6d3 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4940,8 +4940,9 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
4940{ 4940{
4941 u32 mask = 0; 4941 u32 mask = 0;
4942 4942
4943 /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
4943 if (val > dev_priv->rps.min_freq_softlimit) 4944 if (val > dev_priv->rps.min_freq_softlimit)
4944 mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; 4945 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
4945 if (val < dev_priv->rps.max_freq_softlimit) 4946 if (val < dev_priv->rps.max_freq_softlimit)
4946 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; 4947 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
4947 4948
@@ -5041,7 +5042,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
5041{ 5042{
5042 mutex_lock(&dev_priv->rps.hw_lock); 5043 mutex_lock(&dev_priv->rps.hw_lock);
5043 if (dev_priv->rps.enabled) { 5044 if (dev_priv->rps.enabled) {
5044 if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) 5045 if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
5045 gen6_rps_reset_ei(dev_priv); 5046 gen6_rps_reset_ei(dev_priv);
5046 I915_WRITE(GEN6_PMINTRMSK, 5047 I915_WRITE(GEN6_PMINTRMSK,
5047 gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); 5048 gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));