aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLyude <cpaul@redhat.com>2016-04-13 10:58:31 -0400
committerDaniel Vetter <daniel.vetter@ffwll.ch>2016-04-22 12:51:54 -0400
commit82922da39190199260a726d7081a8ea4873e5fd6 (patch)
tree0e03b3cda5449475eee2b95c56d5c8e59d25f29c
parente1083ff35157185b01bc0a99cb19b7cbae0fc9fa (diff)
drm/dp_helper: Retry aux transactions on all errors
This is part of a patch series to migrate all of the workarounds for commonly seen behavior from bad sinks in intel_dp_dpcd_read_wake() to drm's DP helper. We cannot rely on sinks NACKing or deferring when they can't receive transactions, nor can we rely on any other sort of consistent error to know when we should stop retrying. As such, we need to just retry unconditionally on errors. We also make sure here to return the error we encountered during the first transaction, since it's possible that retrying the transaction might return a different error then we had originally. This, along with the previous patch, work around a weird bug with the ThinkPad T560's and it's dock. When resuming the laptop, it appears that there's a short period of time where we're unable to complete any aux transactions, as they all immediately timeout. The only machine I'm able to reproduce this on is the T560 as other production Skylake models seem to be fine. The period during which AUX transactions fail appears to be around 22ms long. AFAIK, the dock for the T560 never actually turns off, the only difference is that it's in SST mode at the start of the resume process, so it's unclear as to why it would need so much time to come back up. There's been a discussion on this issue going on for a while on the intel-gfx mailing list about this that has, in addition to including developers from Intel, also had the correspondence of one of the hardware engineers for Intel: http://www.spinics.net/lists/intel-gfx/msg88831.html http://www.spinics.net/lists/intel-gfx/msg88410.html We've already looked into a couple of possible explanations for the problem: - Calling intel_dp_mst_resume() before right fix. intel_runtime_pm_enable_interrupts(). This was the first fix I tried, and while it worked it definitely wasn't the right fix. This worked because DP aux transactions don't actually require interrupts to work: static uint32_t intel_dp_aux_wait_done(struct intel_dp *intel_dp, bool has_aux_irq) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = dev->dev_private; i915_reg_t ch_ctl = intel_dp->aux_ch_ctl_reg; uint32_t status; bool done; #define C (((status = I915_READ_NOTRACE(ch_ctl)) & DP_AUX_CH_CTL_SEND_BUSY) == 0) if (has_aux_irq) done = wait_event_timeout(dev_priv->gmbus_wait_queue, C, msecs_to_jiffies_timeout(10)); else done = wait_for_atomic(C, 10) == 0; if (!done) DRM_ERROR("dp aux hw did not signal timeout (has irq: %i)!\n", has_aux_irq); #undef C return status; } When there's no interrupts enabled, we end up timing out on the wait_event_timeout() call, which causes us to check the DP status register once to see if the transaction was successful or not. Since this adds a 10ms delay to each aux transaction, it ends up adding a long enough delay to the resume process for aux transactions to become functional again. This gave us the illusion that enabling interrupts had something to do with making things work again, and put me on the wrong track for a while. - Interrupts occurring when we try to perform the aux transactions required to put the dock back into MST mode. This isn't the problem, as the only interrupts I've observed that come during this timeout period are from the snd_hda_intel driver, and disabling that driver doesn't appear to change the behavior at all. - Skylake's PSR block causing issues by performing aux transactions while we try to bring the dock out of MST mode. Disabling PSR through i915's command line options doesn't seem to change the behavior either, nor does preventing the DMC firmware from being loaded. Since this investigation went on for about 2 weeks, we decided it would be better for the time being to just workaround this issue by making sure AUX transactions wait a short period of time before retrying. Signed-off-by: Lyude <cpaul@redhat.com> Tested-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/1460559513-32280-3-git-send-email-cpaul@redhat.com
-rw-r--r--drivers/gpu/drm/drm_dp_helper.c42
1 files changed, 22 insertions, 20 deletions
diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index 7dd330ae0e81..540c3e43a8ea 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@@ -178,8 +178,8 @@ static int drm_dp_dpcd_access(struct drm_dp_aux *aux, u8 request,
178 unsigned int offset, void *buffer, size_t size) 178 unsigned int offset, void *buffer, size_t size)
179{ 179{
180 struct drm_dp_aux_msg msg; 180 struct drm_dp_aux_msg msg;
181 unsigned int retry; 181 unsigned int retry, native_reply;
182 int err = 0; 182 int err = 0, ret = 0;
183 183
184 memset(&msg, 0, sizeof(msg)); 184 memset(&msg, 0, sizeof(msg));
185 msg.address = offset; 185 msg.address = offset;
@@ -196,37 +196,39 @@ static int drm_dp_dpcd_access(struct drm_dp_aux *aux, u8 request,
196 * sufficient, bump to 32 which makes Dell 4k monitors happier. 196 * sufficient, bump to 32 which makes Dell 4k monitors happier.
197 */ 197 */
198 for (retry = 0; retry < 32; retry++) { 198 for (retry = 0; retry < 32; retry++) {
199 if (err != 0 && err != -ETIMEDOUT) { 199 if (ret != 0 && ret != -ETIMEDOUT) {
200 usleep_range(AUX_RETRY_INTERVAL, 200 usleep_range(AUX_RETRY_INTERVAL,
201 AUX_RETRY_INTERVAL + 100); 201 AUX_RETRY_INTERVAL + 100);
202 } 202 }
203 203
204 err = aux->transfer(aux, &msg); 204 ret = aux->transfer(aux, &msg);
205 if (err < 0) {
206 if (err == -EBUSY)
207 continue;
208
209 goto unlock;
210 }
211 205
212 switch (msg.reply & DP_AUX_NATIVE_REPLY_MASK) { 206 if (ret > 0) {
213 case DP_AUX_NATIVE_REPLY_ACK: 207 native_reply = msg.reply & DP_AUX_NATIVE_REPLY_MASK;
214 if (err < size) 208 if (native_reply == DP_AUX_NATIVE_REPLY_ACK) {
215 err = -EPROTO; 209 if (ret == size)
216 goto unlock; 210 goto unlock;
217 211
218 case DP_AUX_NATIVE_REPLY_NACK: 212 ret = -EPROTO;
219 err = -EIO; 213 } else
220 goto unlock; 214 ret = -EIO;
221 } 215 }
216
217 /*
218 * We want the error we return to be the error we received on
219 * the first transaction, since we may get a different error the
220 * next time we retry
221 */
222 if (!err)
223 err = ret;
222 } 224 }
223 225
224 DRM_DEBUG_KMS("too many retries, giving up\n"); 226 DRM_DEBUG_KMS("too many retries, giving up\n");
225 err = -EIO; 227 ret = err;
226 228
227unlock: 229unlock:
228 mutex_unlock(&aux->hw_mutex); 230 mutex_unlock(&aux->hw_mutex);
229 return err; 231 return ret;
230} 232}
231 233
232/** 234/**