diff options
author | Christian König <deathsimple@vodafone.de> | 2012-05-02 09:11:13 -0400 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2012-05-03 04:16:12 -0400 |
commit | 36abacaed34bc1f5bcb11ca611dd3a06c5c0ef39 (patch) | |
tree | 56a71439985e3938ca6476b53aa9a1c3111d187a /drivers/gpu/drm/radeon/radeon_fence.c | |
parent | 7bd560e8852deab1977ca73d9820d8d36a283d5e (diff) |
drm/radeon: rework gpu lockup detection and processing
Previusly multiple rings could trigger multiple GPU
resets at the same time.
Signed-off-by: Christian König <deathsimple@vodafone.de>
Reviewed-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/radeon_fence.c')
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_fence.c | 150 |
1 files changed, 76 insertions, 74 deletions
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 36c411fd0fed..1a9765aae710 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c | |||
@@ -74,6 +74,10 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence) | |||
74 | radeon_fence_ring_emit(rdev, fence->ring, fence); | 74 | radeon_fence_ring_emit(rdev, fence->ring, fence); |
75 | trace_radeon_fence_emit(rdev->ddev, fence->seq); | 75 | trace_radeon_fence_emit(rdev->ddev, fence->seq); |
76 | fence->emitted = true; | 76 | fence->emitted = true; |
77 | /* are we the first fence on a previusly idle ring? */ | ||
78 | if (list_empty(&rdev->fence_drv[fence->ring].emitted)) { | ||
79 | rdev->fence_drv[fence->ring].last_activity = jiffies; | ||
80 | } | ||
77 | list_move_tail(&fence->list, &rdev->fence_drv[fence->ring].emitted); | 81 | list_move_tail(&fence->list, &rdev->fence_drv[fence->ring].emitted); |
78 | write_unlock_irqrestore(&rdev->fence_lock, irq_flags); | 82 | write_unlock_irqrestore(&rdev->fence_lock, irq_flags); |
79 | return 0; | 83 | return 0; |
@@ -85,34 +89,14 @@ static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring) | |||
85 | struct list_head *i, *n; | 89 | struct list_head *i, *n; |
86 | uint32_t seq; | 90 | uint32_t seq; |
87 | bool wake = false; | 91 | bool wake = false; |
88 | unsigned long cjiffies; | ||
89 | 92 | ||
90 | seq = radeon_fence_read(rdev, ring); | 93 | seq = radeon_fence_read(rdev, ring); |
91 | if (seq != rdev->fence_drv[ring].last_seq) { | 94 | if (seq == rdev->fence_drv[ring].last_seq) |
92 | rdev->fence_drv[ring].last_seq = seq; | ||
93 | rdev->fence_drv[ring].last_jiffies = jiffies; | ||
94 | rdev->fence_drv[ring].last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT; | ||
95 | } else { | ||
96 | cjiffies = jiffies; | ||
97 | if (time_after(cjiffies, rdev->fence_drv[ring].last_jiffies)) { | ||
98 | cjiffies -= rdev->fence_drv[ring].last_jiffies; | ||
99 | if (time_after(rdev->fence_drv[ring].last_timeout, cjiffies)) { | ||
100 | /* update the timeout */ | ||
101 | rdev->fence_drv[ring].last_timeout -= cjiffies; | ||
102 | } else { | ||
103 | /* the 500ms timeout is elapsed we should test | ||
104 | * for GPU lockup | ||
105 | */ | ||
106 | rdev->fence_drv[ring].last_timeout = 1; | ||
107 | } | ||
108 | } else { | ||
109 | /* wrap around update last jiffies, we will just wait | ||
110 | * a little longer | ||
111 | */ | ||
112 | rdev->fence_drv[ring].last_jiffies = cjiffies; | ||
113 | } | ||
114 | return false; | 95 | return false; |
115 | } | 96 | |
97 | rdev->fence_drv[ring].last_seq = seq; | ||
98 | rdev->fence_drv[ring].last_activity = jiffies; | ||
99 | |||
116 | n = NULL; | 100 | n = NULL; |
117 | list_for_each(i, &rdev->fence_drv[ring].emitted) { | 101 | list_for_each(i, &rdev->fence_drv[ring].emitted) { |
118 | fence = list_entry(i, struct radeon_fence, list); | 102 | fence = list_entry(i, struct radeon_fence, list); |
@@ -207,66 +191,84 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr) | |||
207 | struct radeon_device *rdev; | 191 | struct radeon_device *rdev; |
208 | unsigned long irq_flags, timeout; | 192 | unsigned long irq_flags, timeout; |
209 | u32 seq; | 193 | u32 seq; |
210 | int r; | 194 | int i, r; |
195 | bool signaled; | ||
211 | 196 | ||
212 | if (fence == NULL) { | 197 | if (fence == NULL) { |
213 | WARN(1, "Querying an invalid fence : %p !\n", fence); | 198 | WARN(1, "Querying an invalid fence : %p !\n", fence); |
214 | return 0; | 199 | return -EINVAL; |
215 | } | 200 | } |
201 | |||
216 | rdev = fence->rdev; | 202 | rdev = fence->rdev; |
217 | if (radeon_fence_signaled(fence)) { | 203 | signaled = radeon_fence_signaled(fence); |
218 | return 0; | 204 | while (!signaled) { |
219 | } | 205 | read_lock_irqsave(&rdev->fence_lock, irq_flags); |
220 | timeout = rdev->fence_drv[fence->ring].last_timeout; | 206 | timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT; |
221 | retry: | 207 | if (time_after(rdev->fence_drv[fence->ring].last_activity, timeout)) { |
222 | /* save current sequence used to check for GPU lockup */ | 208 | /* the normal case, timeout is somewhere before last_activity */ |
223 | seq = rdev->fence_drv[fence->ring].last_seq; | 209 | timeout = rdev->fence_drv[fence->ring].last_activity - timeout; |
224 | trace_radeon_fence_wait_begin(rdev->ddev, seq); | 210 | } else { |
225 | if (intr) { | 211 | /* either jiffies wrapped around, or no fence was signaled in the last 500ms |
212 | * anyway we will just wait for the minimum amount and then check for a lockup */ | ||
213 | timeout = 1; | ||
214 | } | ||
215 | /* save current sequence value used to check for GPU lockups */ | ||
216 | seq = rdev->fence_drv[fence->ring].last_seq; | ||
217 | read_unlock_irqrestore(&rdev->fence_lock, irq_flags); | ||
218 | |||
219 | trace_radeon_fence_wait_begin(rdev->ddev, seq); | ||
226 | radeon_irq_kms_sw_irq_get(rdev, fence->ring); | 220 | radeon_irq_kms_sw_irq_get(rdev, fence->ring); |
227 | r = wait_event_interruptible_timeout(rdev->fence_drv[fence->ring].queue, | 221 | if (intr) { |
228 | radeon_fence_signaled(fence), timeout); | 222 | r = wait_event_interruptible_timeout( |
223 | rdev->fence_drv[fence->ring].queue, | ||
224 | (signaled = radeon_fence_signaled(fence)), timeout); | ||
225 | } else { | ||
226 | r = wait_event_timeout( | ||
227 | rdev->fence_drv[fence->ring].queue, | ||
228 | (signaled = radeon_fence_signaled(fence)), timeout); | ||
229 | } | ||
229 | radeon_irq_kms_sw_irq_put(rdev, fence->ring); | 230 | radeon_irq_kms_sw_irq_put(rdev, fence->ring); |
230 | if (unlikely(r < 0)) { | 231 | if (unlikely(r < 0)) { |
231 | return r; | 232 | return r; |
232 | } | 233 | } |
233 | } else { | 234 | trace_radeon_fence_wait_end(rdev->ddev, seq); |
234 | radeon_irq_kms_sw_irq_get(rdev, fence->ring); | 235 | |
235 | r = wait_event_timeout(rdev->fence_drv[fence->ring].queue, | 236 | if (unlikely(!signaled)) { |
236 | radeon_fence_signaled(fence), timeout); | 237 | /* we were interrupted for some reason and fence |
237 | radeon_irq_kms_sw_irq_put(rdev, fence->ring); | 238 | * isn't signaled yet, resume waiting */ |
238 | } | 239 | if (r) { |
239 | trace_radeon_fence_wait_end(rdev->ddev, seq); | 240 | continue; |
240 | if (unlikely(!radeon_fence_signaled(fence))) { | 241 | } |
241 | /* we were interrupted for some reason and fence isn't | 242 | |
242 | * isn't signaled yet, resume wait | 243 | write_lock_irqsave(&rdev->fence_lock, irq_flags); |
243 | */ | 244 | /* check if sequence value has changed since last_activity */ |
244 | if (r) { | 245 | if (seq != rdev->fence_drv[fence->ring].last_seq) { |
245 | timeout = r; | 246 | write_unlock_irqrestore(&rdev->fence_lock, irq_flags); |
246 | goto retry; | 247 | continue; |
247 | } | 248 | } |
248 | /* don't protect read access to rdev->fence_drv[t].last_seq | 249 | |
249 | * if we experiencing a lockup the value doesn't change | 250 | /* change sequence value on all rings, so nobody else things there is a lockup */ |
250 | */ | 251 | for (i = 0; i < RADEON_NUM_RINGS; ++i) |
251 | if (seq == rdev->fence_drv[fence->ring].last_seq && | 252 | rdev->fence_drv[i].last_seq -= 0x10000; |
252 | radeon_ring_is_lockup(rdev, fence->ring, &rdev->ring[fence->ring])) { | 253 | write_unlock_irqrestore(&rdev->fence_lock, irq_flags); |
253 | 254 | ||
254 | /* good news we believe it's a lockup */ | 255 | if (radeon_ring_is_lockup(rdev, fence->ring, &rdev->ring[fence->ring])) { |
255 | printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n", | 256 | |
256 | fence->seq, seq); | 257 | /* good news we believe it's a lockup */ |
257 | 258 | printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n", | |
258 | /* mark the ring as not ready any more */ | 259 | fence->seq, seq); |
259 | rdev->ring[fence->ring].ready = false; | 260 | |
260 | r = radeon_gpu_reset(rdev); | 261 | /* mark the ring as not ready any more */ |
261 | if (r) | 262 | rdev->ring[fence->ring].ready = false; |
262 | return r; | 263 | r = radeon_gpu_reset(rdev); |
264 | if (r) | ||
265 | return r; | ||
266 | |||
267 | write_lock_irqsave(&rdev->fence_lock, irq_flags); | ||
268 | rdev->fence_drv[fence->ring].last_activity = jiffies; | ||
269 | write_unlock_irqrestore(&rdev->fence_lock, irq_flags); | ||
270 | } | ||
263 | } | 271 | } |
264 | timeout = RADEON_FENCE_JIFFIES_TIMEOUT; | ||
265 | write_lock_irqsave(&rdev->fence_lock, irq_flags); | ||
266 | rdev->fence_drv[fence->ring].last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT; | ||
267 | rdev->fence_drv[fence->ring].last_jiffies = jiffies; | ||
268 | write_unlock_irqrestore(&rdev->fence_lock, irq_flags); | ||
269 | goto retry; | ||
270 | } | 272 | } |
271 | return 0; | 273 | return 0; |
272 | } | 274 | } |