diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2017-03-13 13:06:17 -0400 |
---|---|---|
committer | Jani Nikula <jani.nikula@intel.com> | 2017-03-14 06:29:43 -0400 |
commit | 8f68d591d4765b2e1ce9d916ac7bc5583285c4ad (patch) | |
tree | 5cfe7e02a2164f1cbf63bcf40e25ff6562ce80e0 | |
parent | 0f5418e564ac6452b9086295646e602a9addc4bf (diff) |
drm/i915: Stop using RP_DOWN_EI on Baytrail
On Baytrail, we manually calculate busyness over the evaluation interval
to avoid issues with miscaluations with RC6 enabled. However, it turns
out that the DOWN_EI interrupt generator is completely bust - it
operates in two modes, continuous or never. Neither of which are
conducive to good behaviour. Stop unmask the DOWN_EI interrupt and just
compute everything from the UP_EI which does seem to correspond to the
desired interval.
v2: Fixup gen6_rps_pm_mask() as well
v3: Inline vlv_c0_above() to combine the now identical elapsed
calculation for up/down and simplify the threshold testing
Fixes: 43cf3bf084ba ("drm/i915: Improved w/a for rps on Baytrail")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: <stable@vger.kernel.org> # v4.1+
Link: http://patchwork.freedesktop.org/patch/msgid/20170309211232.28878-1-chris@chris-wilson.co.uk
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170313170617.31564-1-chris@chris-wilson.co.uk
(cherry picked from commit e0e8c7cb6eb68e9256de2d8cbeb481d3701c05ac)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
-rw-r--r-- | drivers/gpu/drm/i915/i915_drv.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_irq.c | 73 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_pm.c | 5 |
3 files changed, 32 insertions, 48 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 80be09831a52..1e53c31b6826 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h | |||
@@ -1325,7 +1325,7 @@ struct intel_gen6_power_mgmt { | |||
1325 | unsigned boosts; | 1325 | unsigned boosts; |
1326 | 1326 | ||
1327 | /* manual wa residency calculations */ | 1327 | /* manual wa residency calculations */ |
1328 | struct intel_rps_ei up_ei, down_ei; | 1328 | struct intel_rps_ei ei; |
1329 | 1329 | ||
1330 | /* | 1330 | /* |
1331 | * Protects RPS/RC6 register access and PCU communication. | 1331 | * Protects RPS/RC6 register access and PCU communication. |
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 4fc8973744b4..b6c886ac901b 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c | |||
@@ -1046,68 +1046,51 @@ static void vlv_c0_read(struct drm_i915_private *dev_priv, | |||
1046 | ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT); | 1046 | ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT); |
1047 | } | 1047 | } |
1048 | 1048 | ||
1049 | static bool vlv_c0_above(struct drm_i915_private *dev_priv, | ||
1050 | const struct intel_rps_ei *old, | ||
1051 | const struct intel_rps_ei *now, | ||
1052 | int threshold) | ||
1053 | { | ||
1054 | u64 time, c0; | ||
1055 | unsigned int mul = 100; | ||
1056 | |||
1057 | if (old->cz_clock == 0) | ||
1058 | return false; | ||
1059 | |||
1060 | if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH) | ||
1061 | mul <<= 8; | ||
1062 | |||
1063 | time = now->cz_clock - old->cz_clock; | ||
1064 | time *= threshold * dev_priv->czclk_freq; | ||
1065 | |||
1066 | /* Workload can be split between render + media, e.g. SwapBuffers | ||
1067 | * being blitted in X after being rendered in mesa. To account for | ||
1068 | * this we need to combine both engines into our activity counter. | ||
1069 | */ | ||
1070 | c0 = now->render_c0 - old->render_c0; | ||
1071 | c0 += now->media_c0 - old->media_c0; | ||
1072 | c0 *= mul * VLV_CZ_CLOCK_TO_MILLI_SEC; | ||
1073 | |||
1074 | return c0 >= time; | ||
1075 | } | ||
1076 | |||
1077 | void gen6_rps_reset_ei(struct drm_i915_private *dev_priv) | 1049 | void gen6_rps_reset_ei(struct drm_i915_private *dev_priv) |
1078 | { | 1050 | { |
1079 | vlv_c0_read(dev_priv, &dev_priv->rps.down_ei); | 1051 | memset(&dev_priv->rps.ei, 0, sizeof(dev_priv->rps.ei)); |
1080 | dev_priv->rps.up_ei = dev_priv->rps.down_ei; | ||
1081 | } | 1052 | } |
1082 | 1053 | ||
1083 | static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) | 1054 | static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) |
1084 | { | 1055 | { |
1056 | const struct intel_rps_ei *prev = &dev_priv->rps.ei; | ||
1085 | struct intel_rps_ei now; | 1057 | struct intel_rps_ei now; |
1086 | u32 events = 0; | 1058 | u32 events = 0; |
1087 | 1059 | ||
1088 | if ((pm_iir & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) == 0) | 1060 | if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0) |
1089 | return 0; | 1061 | return 0; |
1090 | 1062 | ||
1091 | vlv_c0_read(dev_priv, &now); | 1063 | vlv_c0_read(dev_priv, &now); |
1092 | if (now.cz_clock == 0) | 1064 | if (now.cz_clock == 0) |
1093 | return 0; | 1065 | return 0; |
1094 | 1066 | ||
1095 | if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) { | 1067 | if (prev->cz_clock) { |
1096 | if (!vlv_c0_above(dev_priv, | 1068 | u64 time, c0; |
1097 | &dev_priv->rps.down_ei, &now, | 1069 | unsigned int mul; |
1098 | dev_priv->rps.down_threshold)) | ||
1099 | events |= GEN6_PM_RP_DOWN_THRESHOLD; | ||
1100 | dev_priv->rps.down_ei = now; | ||
1101 | } | ||
1102 | 1070 | ||
1103 | if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) { | 1071 | mul = VLV_CZ_CLOCK_TO_MILLI_SEC * 100; /* scale to threshold% */ |
1104 | if (vlv_c0_above(dev_priv, | 1072 | if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH) |
1105 | &dev_priv->rps.up_ei, &now, | 1073 | mul <<= 8; |
1106 | dev_priv->rps.up_threshold)) | 1074 | |
1107 | events |= GEN6_PM_RP_UP_THRESHOLD; | 1075 | time = now.cz_clock - prev->cz_clock; |
1108 | dev_priv->rps.up_ei = now; | 1076 | time *= dev_priv->czclk_freq; |
1077 | |||
1078 | /* Workload can be split between render + media, | ||
1079 | * e.g. SwapBuffers being blitted in X after being rendered in | ||
1080 | * mesa. To account for this we need to combine both engines | ||
1081 | * into our activity counter. | ||
1082 | */ | ||
1083 | c0 = now.render_c0 - prev->render_c0; | ||
1084 | c0 += now.media_c0 - prev->media_c0; | ||
1085 | c0 *= mul; | ||
1086 | |||
1087 | if (c0 > time * dev_priv->rps.up_threshold) | ||
1088 | events = GEN6_PM_RP_UP_THRESHOLD; | ||
1089 | else if (c0 < time * dev_priv->rps.down_threshold) | ||
1090 | events = GEN6_PM_RP_DOWN_THRESHOLD; | ||
1109 | } | 1091 | } |
1110 | 1092 | ||
1093 | dev_priv->rps.ei = now; | ||
1111 | return events; | 1094 | return events; |
1112 | } | 1095 | } |
1113 | 1096 | ||
@@ -4228,7 +4211,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv) | |||
4228 | /* Let's track the enabled rps events */ | 4211 | /* Let's track the enabled rps events */ |
4229 | if (IS_VALLEYVIEW(dev_priv)) | 4212 | if (IS_VALLEYVIEW(dev_priv)) |
4230 | /* WaGsvRC0ResidencyMethod:vlv */ | 4213 | /* WaGsvRC0ResidencyMethod:vlv */ |
4231 | dev_priv->pm_rps_events = GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED; | 4214 | dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED; |
4232 | else | 4215 | else |
4233 | dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS; | 4216 | dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS; |
4234 | 4217 | ||
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 940bab22d464..6a29784d2b41 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c | |||
@@ -4928,8 +4928,9 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) | |||
4928 | { | 4928 | { |
4929 | u32 mask = 0; | 4929 | u32 mask = 0; |
4930 | 4930 | ||
4931 | /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */ | ||
4931 | if (val > dev_priv->rps.min_freq_softlimit) | 4932 | if (val > dev_priv->rps.min_freq_softlimit) |
4932 | mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; | 4933 | mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; |
4933 | if (val < dev_priv->rps.max_freq_softlimit) | 4934 | if (val < dev_priv->rps.max_freq_softlimit) |
4934 | mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; | 4935 | mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; |
4935 | 4936 | ||
@@ -5039,7 +5040,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv) | |||
5039 | { | 5040 | { |
5040 | mutex_lock(&dev_priv->rps.hw_lock); | 5041 | mutex_lock(&dev_priv->rps.hw_lock); |
5041 | if (dev_priv->rps.enabled) { | 5042 | if (dev_priv->rps.enabled) { |
5042 | if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) | 5043 | if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED) |
5043 | gen6_rps_reset_ei(dev_priv); | 5044 | gen6_rps_reset_ei(dev_priv); |
5044 | I915_WRITE(GEN6_PMINTRMSK, | 5045 | I915_WRITE(GEN6_PMINTRMSK, |
5045 | gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); | 5046 | gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); |