aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2017-03-13 13:06:17 -0400
committerJani Nikula <jani.nikula@intel.com>2017-03-14 06:29:43 -0400
commit8f68d591d4765b2e1ce9d916ac7bc5583285c4ad (patch)
tree5cfe7e02a2164f1cbf63bcf40e25ff6562ce80e0
parent0f5418e564ac6452b9086295646e602a9addc4bf (diff)
drm/i915: Stop using RP_DOWN_EI on Baytrail
On Baytrail, we manually calculate busyness over the evaluation interval to avoid issues with miscaluations with RC6 enabled. However, it turns out that the DOWN_EI interrupt generator is completely bust - it operates in two modes, continuous or never. Neither of which are conducive to good behaviour. Stop unmask the DOWN_EI interrupt and just compute everything from the UP_EI which does seem to correspond to the desired interval. v2: Fixup gen6_rps_pm_mask() as well v3: Inline vlv_c0_above() to combine the now identical elapsed calculation for up/down and simplify the threshold testing Fixes: 43cf3bf084ba ("drm/i915: Improved w/a for rps on Baytrail") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: <stable@vger.kernel.org> # v4.1+ Link: http://patchwork.freedesktop.org/patch/msgid/20170309211232.28878-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170313170617.31564-1-chris@chris-wilson.co.uk (cherry picked from commit e0e8c7cb6eb68e9256de2d8cbeb481d3701c05ac) Signed-off-by: Jani Nikula <jani.nikula@intel.com>
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h2
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c73
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c5
3 files changed, 32 insertions, 48 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 80be09831a52..1e53c31b6826 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1325,7 +1325,7 @@ struct intel_gen6_power_mgmt {
1325 unsigned boosts; 1325 unsigned boosts;
1326 1326
1327 /* manual wa residency calculations */ 1327 /* manual wa residency calculations */
1328 struct intel_rps_ei up_ei, down_ei; 1328 struct intel_rps_ei ei;
1329 1329
1330 /* 1330 /*
1331 * Protects RPS/RC6 register access and PCU communication. 1331 * Protects RPS/RC6 register access and PCU communication.
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 4fc8973744b4..b6c886ac901b 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1046,68 +1046,51 @@ static void vlv_c0_read(struct drm_i915_private *dev_priv,
1046 ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT); 1046 ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
1047} 1047}
1048 1048
1049static bool vlv_c0_above(struct drm_i915_private *dev_priv,
1050 const struct intel_rps_ei *old,
1051 const struct intel_rps_ei *now,
1052 int threshold)
1053{
1054 u64 time, c0;
1055 unsigned int mul = 100;
1056
1057 if (old->cz_clock == 0)
1058 return false;
1059
1060 if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
1061 mul <<= 8;
1062
1063 time = now->cz_clock - old->cz_clock;
1064 time *= threshold * dev_priv->czclk_freq;
1065
1066 /* Workload can be split between render + media, e.g. SwapBuffers
1067 * being blitted in X after being rendered in mesa. To account for
1068 * this we need to combine both engines into our activity counter.
1069 */
1070 c0 = now->render_c0 - old->render_c0;
1071 c0 += now->media_c0 - old->media_c0;
1072 c0 *= mul * VLV_CZ_CLOCK_TO_MILLI_SEC;
1073
1074 return c0 >= time;
1075}
1076
1077void gen6_rps_reset_ei(struct drm_i915_private *dev_priv) 1049void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
1078{ 1050{
1079 vlv_c0_read(dev_priv, &dev_priv->rps.down_ei); 1051 memset(&dev_priv->rps.ei, 0, sizeof(dev_priv->rps.ei));
1080 dev_priv->rps.up_ei = dev_priv->rps.down_ei;
1081} 1052}
1082 1053
1083static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) 1054static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
1084{ 1055{
1056 const struct intel_rps_ei *prev = &dev_priv->rps.ei;
1085 struct intel_rps_ei now; 1057 struct intel_rps_ei now;
1086 u32 events = 0; 1058 u32 events = 0;
1087 1059
1088 if ((pm_iir & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) == 0) 1060 if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
1089 return 0; 1061 return 0;
1090 1062
1091 vlv_c0_read(dev_priv, &now); 1063 vlv_c0_read(dev_priv, &now);
1092 if (now.cz_clock == 0) 1064 if (now.cz_clock == 0)
1093 return 0; 1065 return 0;
1094 1066
1095 if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) { 1067 if (prev->cz_clock) {
1096 if (!vlv_c0_above(dev_priv, 1068 u64 time, c0;
1097 &dev_priv->rps.down_ei, &now, 1069 unsigned int mul;
1098 dev_priv->rps.down_threshold))
1099 events |= GEN6_PM_RP_DOWN_THRESHOLD;
1100 dev_priv->rps.down_ei = now;
1101 }
1102 1070
1103 if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) { 1071 mul = VLV_CZ_CLOCK_TO_MILLI_SEC * 100; /* scale to threshold% */
1104 if (vlv_c0_above(dev_priv, 1072 if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
1105 &dev_priv->rps.up_ei, &now, 1073 mul <<= 8;
1106 dev_priv->rps.up_threshold)) 1074
1107 events |= GEN6_PM_RP_UP_THRESHOLD; 1075 time = now.cz_clock - prev->cz_clock;
1108 dev_priv->rps.up_ei = now; 1076 time *= dev_priv->czclk_freq;
1077
1078 /* Workload can be split between render + media,
1079 * e.g. SwapBuffers being blitted in X after being rendered in
1080 * mesa. To account for this we need to combine both engines
1081 * into our activity counter.
1082 */
1083 c0 = now.render_c0 - prev->render_c0;
1084 c0 += now.media_c0 - prev->media_c0;
1085 c0 *= mul;
1086
1087 if (c0 > time * dev_priv->rps.up_threshold)
1088 events = GEN6_PM_RP_UP_THRESHOLD;
1089 else if (c0 < time * dev_priv->rps.down_threshold)
1090 events = GEN6_PM_RP_DOWN_THRESHOLD;
1109 } 1091 }
1110 1092
1093 dev_priv->rps.ei = now;
1111 return events; 1094 return events;
1112} 1095}
1113 1096
@@ -4228,7 +4211,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
4228 /* Let's track the enabled rps events */ 4211 /* Let's track the enabled rps events */
4229 if (IS_VALLEYVIEW(dev_priv)) 4212 if (IS_VALLEYVIEW(dev_priv))
4230 /* WaGsvRC0ResidencyMethod:vlv */ 4213 /* WaGsvRC0ResidencyMethod:vlv */
4231 dev_priv->pm_rps_events = GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED; 4214 dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
4232 else 4215 else
4233 dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS; 4216 dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
4234 4217
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 940bab22d464..6a29784d2b41 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4928,8 +4928,9 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
4928{ 4928{
4929 u32 mask = 0; 4929 u32 mask = 0;
4930 4930
4931 /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
4931 if (val > dev_priv->rps.min_freq_softlimit) 4932 if (val > dev_priv->rps.min_freq_softlimit)
4932 mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; 4933 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
4933 if (val < dev_priv->rps.max_freq_softlimit) 4934 if (val < dev_priv->rps.max_freq_softlimit)
4934 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; 4935 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
4935 4936
@@ -5039,7 +5040,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
5039{ 5040{
5040 mutex_lock(&dev_priv->rps.hw_lock); 5041 mutex_lock(&dev_priv->rps.hw_lock);
5041 if (dev_priv->rps.enabled) { 5042 if (dev_priv->rps.enabled) {
5042 if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) 5043 if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
5043 gen6_rps_reset_ei(dev_priv); 5044 gen6_rps_reset_ei(dev_priv);
5044 I915_WRITE(GEN6_PMINTRMSK, 5045 I915_WRITE(GEN6_PMINTRMSK,
5045 gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); 5046 gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));