aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2015-03-18 05:48:22 -0400
committerDaniel Vetter <daniel.vetter@ffwll.ch>2015-03-20 06:48:14 -0400
commit43cf3bf084ba097463d67e756ff821505bdaa69d (patch)
tree54529985290904a480f7830763f632db9e9b7d6e
parentaed242ff7ebb697e4dff912bd4dc7ec7192f7581 (diff)
drm/i915: Improved w/a for rps on Baytrail
Rewrite commit 31685c258e0b0ad6aa486c5ec001382cf8a64212 Author: Deepak S <deepak.s@linux.intel.com> Date: Thu Jul 3 17:33:01 2014 -0400 drm/i915/vlv: WA for Turbo and RC6 to work together. Other than code clarity, the major improvement is to disable the extra interrupts generated when idle. However, the reclocking remains rather slow under the new manual regime, in particular it fails to downclock as quickly as desired. The second major improvement is that for certain workloads, like games, we need to combine render+media activity counters as the work of displaying the frame is split across the engines and both need to be taken into account when deciding the global GPU frequency as memory cycles are shared. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Deepak S <deepak.s@linux.intel.com> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Reviewed-by: Deepak S<deepak.s@linux.intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c155
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h4
-rw-r--r--drivers/gpu/drm/i915/intel_display.c2
-rw-r--r--drivers/gpu/drm/i915/intel_drv.h2
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c22
5 files changed, 81 insertions, 104 deletions
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 49ad5fb82ace..8d8d33d068dd 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -997,129 +997,84 @@ static void notify_ring(struct drm_device *dev,
997 wake_up_all(&ring->irq_queue); 997 wake_up_all(&ring->irq_queue);
998} 998}
999 999
1000static u32 vlv_c0_residency(struct drm_i915_private *dev_priv, 1000static void vlv_c0_read(struct drm_i915_private *dev_priv,
1001 struct intel_rps_ei *rps_ei) 1001 struct intel_rps_ei *ei)
1002{ 1002{
1003 u32 cz_ts, cz_freq_khz; 1003 ei->cz_clock = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
1004 u32 render_count, media_count; 1004 ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT);
1005 u32 elapsed_render, elapsed_media, elapsed_time; 1005 ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
1006 u32 residency = 0; 1006}
1007
1008 cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
1009 cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
1010
1011 render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
1012 media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
1013
1014 if (rps_ei->cz_clock == 0) {
1015 rps_ei->cz_clock = cz_ts;
1016 rps_ei->render_c0 = render_count;
1017 rps_ei->media_c0 = media_count;
1018
1019 return dev_priv->rps.cur_freq;
1020 }
1021
1022 elapsed_time = cz_ts - rps_ei->cz_clock;
1023 rps_ei->cz_clock = cz_ts;
1024 1007
1025 elapsed_render = render_count - rps_ei->render_c0; 1008static bool vlv_c0_above(struct drm_i915_private *dev_priv,
1026 rps_ei->render_c0 = render_count; 1009 const struct intel_rps_ei *old,
1010 const struct intel_rps_ei *now,
1011 int threshold)
1012{
1013 u64 time, c0;
1027 1014
1028 elapsed_media = media_count - rps_ei->media_c0; 1015 if (old->cz_clock == 0)
1029 rps_ei->media_c0 = media_count; 1016 return false;
1030 1017
1031 /* Convert all the counters into common unit of milli sec */ 1018 time = now->cz_clock - old->cz_clock;
1032 elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC; 1019 time *= threshold * dev_priv->mem_freq;
1033 elapsed_render /= cz_freq_khz;
1034 elapsed_media /= cz_freq_khz;
1035 1020
1036 /* 1021 /* Workload can be split between render + media, e.g. SwapBuffers
1037 * Calculate overall C0 residency percentage 1022 * being blitted in X after being rendered in mesa. To account for
1038 * only if elapsed time is non zero 1023 * this we need to combine both engines into our activity counter.
1039 */ 1024 */
1040 if (elapsed_time) { 1025 c0 = now->render_c0 - old->render_c0;
1041 residency = 1026 c0 += now->media_c0 - old->media_c0;
1042 ((max(elapsed_render, elapsed_media) * 100) 1027 c0 *= 100 * VLV_CZ_CLOCK_TO_MILLI_SEC * 4 / 1000;
1043 / elapsed_time);
1044 }
1045 1028
1046 return residency; 1029 return c0 >= time;
1047} 1030}
1048 1031
1049/** 1032void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
1050 * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
1051 * busy-ness calculated from C0 counters of render & media power wells
1052 * @dev_priv: DRM device private
1053 *
1054 */
1055static int vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
1056{ 1033{
1057 u32 residency_C0_up = 0, residency_C0_down = 0; 1034 vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
1058 int new_delay, adj; 1035 dev_priv->rps.up_ei = dev_priv->rps.down_ei;
1059 1036 dev_priv->rps.ei_interrupt_count = 0;
1060 dev_priv->rps.ei_interrupt_count++; 1037}
1061
1062 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
1063 1038
1039static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
1040{
1041 struct intel_rps_ei now;
1042 u32 events = 0;
1064 1043
1065 if (dev_priv->rps.up_ei.cz_clock == 0) { 1044 if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
1066 vlv_c0_residency(dev_priv, &dev_priv->rps.up_ei); 1045 return 0;
1067 vlv_c0_residency(dev_priv, &dev_priv->rps.down_ei);
1068 return dev_priv->rps.cur_freq;
1069 }
1070 1046
1047 vlv_c0_read(dev_priv, &now);
1048 if (now.cz_clock == 0)
1049 return 0;
1071 1050
1072 /* 1051 /*
1073 * To down throttle, C0 residency should be less than down threshold 1052 * To down throttle, C0 residency should be less than down threshold
1074 * for continous EI intervals. So calculate down EI counters 1053 * for continous EI intervals. So calculate down EI counters
1075 * once in VLV_INT_COUNT_FOR_DOWN_EI 1054 * once in VLV_INT_COUNT_FOR_DOWN_EI
1076 */ 1055 */
1077 if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) { 1056 if (++dev_priv->rps.ei_interrupt_count >= VLV_INT_COUNT_FOR_DOWN_EI) {
1078 1057 pm_iir |= GEN6_PM_RP_DOWN_EI_EXPIRED;
1079 dev_priv->rps.ei_interrupt_count = 0; 1058 dev_priv->rps.ei_interrupt_count = 0;
1080
1081 residency_C0_down = vlv_c0_residency(dev_priv,
1082 &dev_priv->rps.down_ei);
1083 } else {
1084 residency_C0_up = vlv_c0_residency(dev_priv,
1085 &dev_priv->rps.up_ei);
1086 } 1059 }
1087 1060
1088 new_delay = dev_priv->rps.cur_freq; 1061 if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
1089 1062 if (!vlv_c0_above(dev_priv,
1090 adj = dev_priv->rps.last_adj; 1063 &dev_priv->rps.down_ei, &now,
1091 /* C0 residency is greater than UP threshold. Increase Frequency */ 1064 VLV_RP_DOWN_EI_THRESHOLD))
1092 if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) { 1065 events |= GEN6_PM_RP_DOWN_THRESHOLD;
1093 if (adj > 0) 1066 dev_priv->rps.down_ei = now;
1094 adj *= 2; 1067 }
1095 else
1096 adj = 1;
1097
1098 if (dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)
1099 new_delay = dev_priv->rps.cur_freq + adj;
1100
1101 /*
1102 * For better performance, jump directly
1103 * to RPe if we're below it.
1104 */
1105 if (new_delay < dev_priv->rps.efficient_freq)
1106 new_delay = dev_priv->rps.efficient_freq;
1107 1068
1108 } else if (!dev_priv->rps.ei_interrupt_count && 1069 if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
1109 (residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) { 1070 if (vlv_c0_above(dev_priv,
1110 if (adj < 0) 1071 &dev_priv->rps.up_ei, &now,
1111 adj *= 2; 1072 VLV_RP_UP_EI_THRESHOLD))
1112 else 1073 events |= GEN6_PM_RP_UP_THRESHOLD;
1113 adj = -1; 1074 dev_priv->rps.up_ei = now;
1114 /*
1115 * This means, C0 residency is less than down threshold over
1116 * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
1117 */
1118 if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit)
1119 new_delay = dev_priv->rps.cur_freq + adj;
1120 } 1075 }
1121 1076
1122 return new_delay; 1077 return events;
1123} 1078}
1124 1079
1125static void gen6_pm_rps_work(struct work_struct *work) 1080static void gen6_pm_rps_work(struct work_struct *work)
@@ -1149,6 +1104,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
1149 1104
1150 mutex_lock(&dev_priv->rps.hw_lock); 1105 mutex_lock(&dev_priv->rps.hw_lock);
1151 1106
1107 pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
1108
1152 adj = dev_priv->rps.last_adj; 1109 adj = dev_priv->rps.last_adj;
1153 if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { 1110 if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
1154 if (adj > 0) 1111 if (adj > 0)
@@ -1171,8 +1128,6 @@ static void gen6_pm_rps_work(struct work_struct *work)
1171 else 1128 else
1172 new_delay = dev_priv->rps.min_freq_softlimit; 1129 new_delay = dev_priv->rps.min_freq_softlimit;
1173 adj = 0; 1130 adj = 0;
1174 } else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
1175 new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
1176 } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { 1131 } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
1177 if (adj < 0) 1132 if (adj < 0)
1178 adj *= 2; 1133 adj *= 2;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index cc8ebabc488d..2d76c566d843 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -6220,8 +6220,8 @@ enum skl_disp_power_wells {
6220 6220
6221#define GEN6_GT_GFX_RC6p 0x13810C 6221#define GEN6_GT_GFX_RC6p 0x13810C
6222#define GEN6_GT_GFX_RC6pp 0x138110 6222#define GEN6_GT_GFX_RC6pp 0x138110
6223#define VLV_RENDER_C0_COUNT_REG 0x138118 6223#define VLV_RENDER_C0_COUNT 0x138118
6224#define VLV_MEDIA_C0_COUNT_REG 0x13811C 6224#define VLV_MEDIA_C0_COUNT 0x13811C
6225 6225
6226#define GEN6_PCODE_MAILBOX 0x138124 6226#define GEN6_PCODE_MAILBOX 0x138124
6227#define GEN6_PCODE_READY (1<<31) 6227#define GEN6_PCODE_READY (1<<31)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 90b460cf2b57..f1c0295f69e5 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9201,6 +9201,8 @@ void intel_mark_busy(struct drm_device *dev)
9201 9201
9202 intel_runtime_pm_get(dev_priv); 9202 intel_runtime_pm_get(dev_priv);
9203 i915_update_gfx_val(dev_priv); 9203 i915_update_gfx_val(dev_priv);
9204 if (INTEL_INFO(dev)->gen >= 6)
9205 gen6_rps_busy(dev_priv);
9204 dev_priv->mm.busy = true; 9206 dev_priv->mm.busy = true;
9205} 9207}
9206 9208
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index a1baaa188b0a..8bb18e507f5f 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1242,6 +1242,8 @@ void intel_disable_gt_powersave(struct drm_device *dev);
1242void intel_suspend_gt_powersave(struct drm_device *dev); 1242void intel_suspend_gt_powersave(struct drm_device *dev);
1243void intel_reset_gt_powersave(struct drm_device *dev); 1243void intel_reset_gt_powersave(struct drm_device *dev);
1244void gen6_update_ring_freq(struct drm_device *dev); 1244void gen6_update_ring_freq(struct drm_device *dev);
1245void gen6_rps_busy(struct drm_i915_private *dev_priv);
1246void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
1245void gen6_rps_idle(struct drm_i915_private *dev_priv); 1247void gen6_rps_idle(struct drm_i915_private *dev_priv);
1246void gen6_rps_boost(struct drm_i915_private *dev_priv); 1248void gen6_rps_boost(struct drm_i915_private *dev_priv);
1247void ilk_wm_get_hw_state(struct drm_device *dev); 1249void ilk_wm_get_hw_state(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index beab305e320d..68c9cc252d36 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4041,6 +4041,18 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
4041 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); 4041 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
4042} 4042}
4043 4043
4044void gen6_rps_busy(struct drm_i915_private *dev_priv)
4045{
4046 mutex_lock(&dev_priv->rps.hw_lock);
4047 if (dev_priv->rps.enabled) {
4048 if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED))
4049 gen6_rps_reset_ei(dev_priv);
4050 I915_WRITE(GEN6_PMINTRMSK,
4051 gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
4052 }
4053 mutex_unlock(&dev_priv->rps.hw_lock);
4054}
4055
4044void gen6_rps_idle(struct drm_i915_private *dev_priv) 4056void gen6_rps_idle(struct drm_i915_private *dev_priv)
4045{ 4057{
4046 struct drm_device *dev = dev_priv->dev; 4058 struct drm_device *dev = dev_priv->dev;
@@ -4052,15 +4064,21 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
4052 else 4064 else
4053 gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq); 4065 gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
4054 dev_priv->rps.last_adj = 0; 4066 dev_priv->rps.last_adj = 0;
4067 I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
4055 } 4068 }
4056 mutex_unlock(&dev_priv->rps.hw_lock); 4069 mutex_unlock(&dev_priv->rps.hw_lock);
4057} 4070}
4058 4071
4059void gen6_rps_boost(struct drm_i915_private *dev_priv) 4072void gen6_rps_boost(struct drm_i915_private *dev_priv)
4060{ 4073{
4074 u32 val;
4075
4061 mutex_lock(&dev_priv->rps.hw_lock); 4076 mutex_lock(&dev_priv->rps.hw_lock);
4062 if (dev_priv->rps.enabled) { 4077 val = dev_priv->rps.max_freq_softlimit;
4063 intel_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit); 4078 if (dev_priv->rps.enabled &&
4079 dev_priv->mm.busy &&
4080 dev_priv->rps.cur_freq < val) {
4081 intel_set_rps(dev_priv->dev, val);
4064 dev_priv->rps.last_adj = 0; 4082 dev_priv->rps.last_adj = 0;
4065 } 4083 }
4066 mutex_unlock(&dev_priv->rps.hw_lock); 4084 mutex_unlock(&dev_priv->rps.hw_lock);