diff options
Diffstat (limited to 'drivers/gpu/drm/radeon/radeon_fence.c')
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_fence.c | 621 |
1 files changed, 371 insertions, 250 deletions
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 4bd36a354fbe..11f5f402d22c 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c | |||
@@ -63,98 +63,82 @@ static u32 radeon_fence_read(struct radeon_device *rdev, int ring) | |||
63 | 63 | ||
64 | int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence) | 64 | int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence) |
65 | { | 65 | { |
66 | unsigned long irq_flags; | 66 | /* we are protected by the ring emission mutex */ |
67 | 67 | if (fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) { | |
68 | write_lock_irqsave(&rdev->fence_lock, irq_flags); | ||
69 | if (fence->emitted) { | ||
70 | write_unlock_irqrestore(&rdev->fence_lock, irq_flags); | ||
71 | return 0; | 68 | return 0; |
72 | } | 69 | } |
73 | fence->seq = atomic_add_return(1, &rdev->fence_drv[fence->ring].seq); | 70 | fence->seq = ++rdev->fence_drv[fence->ring].seq; |
74 | if (!rdev->ring[fence->ring].ready) | 71 | radeon_fence_ring_emit(rdev, fence->ring, fence); |
75 | /* FIXME: cp is not running assume everythings is done right | ||
76 | * away | ||
77 | */ | ||
78 | radeon_fence_write(rdev, fence->seq, fence->ring); | ||
79 | else | ||
80 | radeon_fence_ring_emit(rdev, fence->ring, fence); | ||
81 | |||
82 | trace_radeon_fence_emit(rdev->ddev, fence->seq); | 72 | trace_radeon_fence_emit(rdev->ddev, fence->seq); |
83 | fence->emitted = true; | ||
84 | list_move_tail(&fence->list, &rdev->fence_drv[fence->ring].emitted); | ||
85 | write_unlock_irqrestore(&rdev->fence_lock, irq_flags); | ||
86 | return 0; | 73 | return 0; |
87 | } | 74 | } |
88 | 75 | ||
89 | static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring) | 76 | void radeon_fence_process(struct radeon_device *rdev, int ring) |
90 | { | 77 | { |
91 | struct radeon_fence *fence; | 78 | uint64_t seq, last_seq; |
92 | struct list_head *i, *n; | 79 | unsigned count_loop = 0; |
93 | uint32_t seq; | ||
94 | bool wake = false; | 80 | bool wake = false; |
95 | unsigned long cjiffies; | ||
96 | 81 | ||
97 | seq = radeon_fence_read(rdev, ring); | 82 | /* Note there is a scenario here for an infinite loop but it's |
98 | if (seq != rdev->fence_drv[ring].last_seq) { | 83 | * very unlikely to happen. For it to happen, the current polling |
99 | rdev->fence_drv[ring].last_seq = seq; | 84 | * process need to be interrupted by another process and another |
100 | rdev->fence_drv[ring].last_jiffies = jiffies; | 85 | * process needs to update the last_seq btw the atomic read and |
101 | rdev->fence_drv[ring].last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT; | 86 | * xchg of the current process. |
102 | } else { | 87 | * |
103 | cjiffies = jiffies; | 88 | * More over for this to go in infinite loop there need to be |
104 | if (time_after(cjiffies, rdev->fence_drv[ring].last_jiffies)) { | 89 | * continuously new fence signaled ie radeon_fence_read needs |
105 | cjiffies -= rdev->fence_drv[ring].last_jiffies; | 90 | * to return a different value each time for both the currently |
106 | if (time_after(rdev->fence_drv[ring].last_timeout, cjiffies)) { | 91 | * polling process and the other process that xchg the last_seq |
107 | /* update the timeout */ | 92 | * btw atomic read and xchg of the current process. And the |
108 | rdev->fence_drv[ring].last_timeout -= cjiffies; | 93 | * value the other process set as last seq must be higher than |
109 | } else { | 94 | * the seq value we just read. Which means that current process |
110 | /* the 500ms timeout is elapsed we should test | 95 | * need to be interrupted after radeon_fence_read and before |
111 | * for GPU lockup | 96 | * atomic xchg. |
112 | */ | 97 | * |
113 | rdev->fence_drv[ring].last_timeout = 1; | 98 | * To be even more safe we count the number of time we loop and |
114 | } | 99 | * we bail after 10 loop just accepting the fact that we might |
115 | } else { | 100 | * have temporarly set the last_seq not to the true real last |
116 | /* wrap around update last jiffies, we will just wait | 101 | * seq but to an older one. |
117 | * a little longer | 102 | */ |
118 | */ | 103 | last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq); |
119 | rdev->fence_drv[ring].last_jiffies = cjiffies; | 104 | do { |
105 | seq = radeon_fence_read(rdev, ring); | ||
106 | seq |= last_seq & 0xffffffff00000000LL; | ||
107 | if (seq < last_seq) { | ||
108 | seq += 0x100000000LL; | ||
120 | } | 109 | } |
121 | return false; | 110 | |
122 | } | 111 | if (seq == last_seq) { |
123 | n = NULL; | ||
124 | list_for_each(i, &rdev->fence_drv[ring].emitted) { | ||
125 | fence = list_entry(i, struct radeon_fence, list); | ||
126 | if (fence->seq == seq) { | ||
127 | n = i; | ||
128 | break; | 112 | break; |
129 | } | 113 | } |
130 | } | 114 | /* If we loop over we don't want to return without |
131 | /* all fence previous to this one are considered as signaled */ | 115 | * checking if a fence is signaled as it means that the |
132 | if (n) { | 116 | * seq we just read is different from the previous on. |
133 | i = n; | 117 | */ |
134 | do { | ||
135 | n = i->prev; | ||
136 | list_move_tail(i, &rdev->fence_drv[ring].signaled); | ||
137 | fence = list_entry(i, struct radeon_fence, list); | ||
138 | fence->signaled = true; | ||
139 | i = n; | ||
140 | } while (i != &rdev->fence_drv[ring].emitted); | ||
141 | wake = true; | 118 | wake = true; |
119 | last_seq = seq; | ||
120 | if ((count_loop++) > 10) { | ||
121 | /* We looped over too many time leave with the | ||
122 | * fact that we might have set an older fence | ||
123 | * seq then the current real last seq as signaled | ||
124 | * by the hw. | ||
125 | */ | ||
126 | break; | ||
127 | } | ||
128 | } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq); | ||
129 | |||
130 | if (wake) { | ||
131 | rdev->fence_drv[ring].last_activity = jiffies; | ||
132 | wake_up_all(&rdev->fence_queue); | ||
142 | } | 133 | } |
143 | return wake; | ||
144 | } | 134 | } |
145 | 135 | ||
146 | static void radeon_fence_destroy(struct kref *kref) | 136 | static void radeon_fence_destroy(struct kref *kref) |
147 | { | 137 | { |
148 | unsigned long irq_flags; | 138 | struct radeon_fence *fence; |
149 | struct radeon_fence *fence; | ||
150 | 139 | ||
151 | fence = container_of(kref, struct radeon_fence, kref); | 140 | fence = container_of(kref, struct radeon_fence, kref); |
152 | write_lock_irqsave(&fence->rdev->fence_lock, irq_flags); | 141 | fence->seq = RADEON_FENCE_NOTEMITED_SEQ; |
153 | list_del(&fence->list); | ||
154 | fence->emitted = false; | ||
155 | write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags); | ||
156 | if (fence->semaphore) | ||
157 | radeon_semaphore_free(fence->rdev, fence->semaphore); | ||
158 | kfree(fence); | 142 | kfree(fence); |
159 | } | 143 | } |
160 | 144 | ||
@@ -162,171 +146,342 @@ int radeon_fence_create(struct radeon_device *rdev, | |||
162 | struct radeon_fence **fence, | 146 | struct radeon_fence **fence, |
163 | int ring) | 147 | int ring) |
164 | { | 148 | { |
165 | unsigned long irq_flags; | ||
166 | |||
167 | *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL); | 149 | *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL); |
168 | if ((*fence) == NULL) { | 150 | if ((*fence) == NULL) { |
169 | return -ENOMEM; | 151 | return -ENOMEM; |
170 | } | 152 | } |
171 | kref_init(&((*fence)->kref)); | 153 | kref_init(&((*fence)->kref)); |
172 | (*fence)->rdev = rdev; | 154 | (*fence)->rdev = rdev; |
173 | (*fence)->emitted = false; | 155 | (*fence)->seq = RADEON_FENCE_NOTEMITED_SEQ; |
174 | (*fence)->signaled = false; | ||
175 | (*fence)->seq = 0; | ||
176 | (*fence)->ring = ring; | 156 | (*fence)->ring = ring; |
177 | (*fence)->semaphore = NULL; | ||
178 | INIT_LIST_HEAD(&(*fence)->list); | ||
179 | |||
180 | write_lock_irqsave(&rdev->fence_lock, irq_flags); | ||
181 | list_add_tail(&(*fence)->list, &rdev->fence_drv[ring].created); | ||
182 | write_unlock_irqrestore(&rdev->fence_lock, irq_flags); | ||
183 | return 0; | 157 | return 0; |
184 | } | 158 | } |
185 | 159 | ||
186 | bool radeon_fence_signaled(struct radeon_fence *fence) | 160 | static bool radeon_fence_seq_signaled(struct radeon_device *rdev, |
161 | u64 seq, unsigned ring) | ||
187 | { | 162 | { |
188 | unsigned long irq_flags; | 163 | if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) { |
189 | bool signaled = false; | ||
190 | |||
191 | if (!fence) | ||
192 | return true; | 164 | return true; |
193 | 165 | } | |
194 | if (fence->rdev->gpu_lockup) | 166 | /* poll new last sequence at least once */ |
167 | radeon_fence_process(rdev, ring); | ||
168 | if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) { | ||
195 | return true; | 169 | return true; |
170 | } | ||
171 | return false; | ||
172 | } | ||
196 | 173 | ||
197 | write_lock_irqsave(&fence->rdev->fence_lock, irq_flags); | 174 | bool radeon_fence_signaled(struct radeon_fence *fence) |
198 | signaled = fence->signaled; | 175 | { |
199 | /* if we are shuting down report all fence as signaled */ | 176 | if (!fence) { |
200 | if (fence->rdev->shutdown) { | 177 | return true; |
201 | signaled = true; | ||
202 | } | 178 | } |
203 | if (!fence->emitted) { | 179 | if (fence->seq == RADEON_FENCE_NOTEMITED_SEQ) { |
204 | WARN(1, "Querying an unemitted fence : %p !\n", fence); | 180 | WARN(1, "Querying an unemitted fence : %p !\n", fence); |
205 | signaled = true; | 181 | return true; |
206 | } | 182 | } |
207 | if (!signaled) { | 183 | if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) { |
208 | radeon_fence_poll_locked(fence->rdev, fence->ring); | 184 | return true; |
209 | signaled = fence->signaled; | 185 | } |
186 | if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) { | ||
187 | fence->seq = RADEON_FENCE_SIGNALED_SEQ; | ||
188 | return true; | ||
210 | } | 189 | } |
211 | write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags); | 190 | return false; |
212 | return signaled; | 191 | } |
192 | |||
193 | static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 target_seq, | ||
194 | unsigned ring, bool intr, bool lock_ring) | ||
195 | { | ||
196 | unsigned long timeout, last_activity; | ||
197 | uint64_t seq; | ||
198 | unsigned i; | ||
199 | bool signaled; | ||
200 | int r; | ||
201 | |||
202 | while (target_seq > atomic64_read(&rdev->fence_drv[ring].last_seq)) { | ||
203 | if (!rdev->ring[ring].ready) { | ||
204 | return -EBUSY; | ||
205 | } | ||
206 | |||
207 | timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT; | ||
208 | if (time_after(rdev->fence_drv[ring].last_activity, timeout)) { | ||
209 | /* the normal case, timeout is somewhere before last_activity */ | ||
210 | timeout = rdev->fence_drv[ring].last_activity - timeout; | ||
211 | } else { | ||
212 | /* either jiffies wrapped around, or no fence was signaled in the last 500ms | ||
213 | * anyway we will just wait for the minimum amount and then check for a lockup | ||
214 | */ | ||
215 | timeout = 1; | ||
216 | } | ||
217 | seq = atomic64_read(&rdev->fence_drv[ring].last_seq); | ||
218 | /* Save current last activity valuee, used to check for GPU lockups */ | ||
219 | last_activity = rdev->fence_drv[ring].last_activity; | ||
220 | |||
221 | trace_radeon_fence_wait_begin(rdev->ddev, seq); | ||
222 | radeon_irq_kms_sw_irq_get(rdev, ring); | ||
223 | if (intr) { | ||
224 | r = wait_event_interruptible_timeout(rdev->fence_queue, | ||
225 | (signaled = radeon_fence_seq_signaled(rdev, target_seq, ring)), | ||
226 | timeout); | ||
227 | } else { | ||
228 | r = wait_event_timeout(rdev->fence_queue, | ||
229 | (signaled = radeon_fence_seq_signaled(rdev, target_seq, ring)), | ||
230 | timeout); | ||
231 | } | ||
232 | radeon_irq_kms_sw_irq_put(rdev, ring); | ||
233 | if (unlikely(r < 0)) { | ||
234 | return r; | ||
235 | } | ||
236 | trace_radeon_fence_wait_end(rdev->ddev, seq); | ||
237 | |||
238 | if (unlikely(!signaled)) { | ||
239 | /* we were interrupted for some reason and fence | ||
240 | * isn't signaled yet, resume waiting */ | ||
241 | if (r) { | ||
242 | continue; | ||
243 | } | ||
244 | |||
245 | /* check if sequence value has changed since last_activity */ | ||
246 | if (seq != atomic64_read(&rdev->fence_drv[ring].last_seq)) { | ||
247 | continue; | ||
248 | } | ||
249 | |||
250 | if (lock_ring) { | ||
251 | mutex_lock(&rdev->ring_lock); | ||
252 | } | ||
253 | |||
254 | /* test if somebody else has already decided that this is a lockup */ | ||
255 | if (last_activity != rdev->fence_drv[ring].last_activity) { | ||
256 | if (lock_ring) { | ||
257 | mutex_unlock(&rdev->ring_lock); | ||
258 | } | ||
259 | continue; | ||
260 | } | ||
261 | |||
262 | if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) { | ||
263 | /* good news we believe it's a lockup */ | ||
264 | dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016llx last fence id 0x%016llx)\n", | ||
265 | target_seq, seq); | ||
266 | |||
267 | /* change last activity so nobody else think there is a lockup */ | ||
268 | for (i = 0; i < RADEON_NUM_RINGS; ++i) { | ||
269 | rdev->fence_drv[i].last_activity = jiffies; | ||
270 | } | ||
271 | |||
272 | /* mark the ring as not ready any more */ | ||
273 | rdev->ring[ring].ready = false; | ||
274 | if (lock_ring) { | ||
275 | mutex_unlock(&rdev->ring_lock); | ||
276 | } | ||
277 | return -EDEADLK; | ||
278 | } | ||
279 | |||
280 | if (lock_ring) { | ||
281 | mutex_unlock(&rdev->ring_lock); | ||
282 | } | ||
283 | } | ||
284 | } | ||
285 | return 0; | ||
213 | } | 286 | } |
214 | 287 | ||
215 | int radeon_fence_wait(struct radeon_fence *fence, bool intr) | 288 | int radeon_fence_wait(struct radeon_fence *fence, bool intr) |
216 | { | 289 | { |
217 | struct radeon_device *rdev; | ||
218 | unsigned long irq_flags, timeout; | ||
219 | u32 seq; | ||
220 | int r; | 290 | int r; |
221 | 291 | ||
222 | if (fence == NULL) { | 292 | if (fence == NULL) { |
223 | WARN(1, "Querying an invalid fence : %p !\n", fence); | 293 | WARN(1, "Querying an invalid fence : %p !\n", fence); |
224 | return 0; | 294 | return -EINVAL; |
225 | } | 295 | } |
226 | rdev = fence->rdev; | 296 | |
227 | if (radeon_fence_signaled(fence)) { | 297 | r = radeon_fence_wait_seq(fence->rdev, fence->seq, |
228 | return 0; | 298 | fence->ring, intr, true); |
299 | if (r) { | ||
300 | return r; | ||
229 | } | 301 | } |
230 | timeout = rdev->fence_drv[fence->ring].last_timeout; | 302 | fence->seq = RADEON_FENCE_SIGNALED_SEQ; |
231 | retry: | 303 | return 0; |
232 | /* save current sequence used to check for GPU lockup */ | 304 | } |
233 | seq = rdev->fence_drv[fence->ring].last_seq; | 305 | |
234 | trace_radeon_fence_wait_begin(rdev->ddev, seq); | 306 | bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq) |
235 | if (intr) { | 307 | { |
236 | radeon_irq_kms_sw_irq_get(rdev, fence->ring); | 308 | unsigned i; |
237 | r = wait_event_interruptible_timeout(rdev->fence_drv[fence->ring].queue, | 309 | |
238 | radeon_fence_signaled(fence), timeout); | 310 | for (i = 0; i < RADEON_NUM_RINGS; ++i) { |
239 | radeon_irq_kms_sw_irq_put(rdev, fence->ring); | 311 | if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i)) { |
240 | if (unlikely(r < 0)) { | 312 | return true; |
241 | return r; | ||
242 | } | 313 | } |
243 | } else { | ||
244 | radeon_irq_kms_sw_irq_get(rdev, fence->ring); | ||
245 | r = wait_event_timeout(rdev->fence_drv[fence->ring].queue, | ||
246 | radeon_fence_signaled(fence), timeout); | ||
247 | radeon_irq_kms_sw_irq_put(rdev, fence->ring); | ||
248 | } | 314 | } |
249 | trace_radeon_fence_wait_end(rdev->ddev, seq); | 315 | return false; |
250 | if (unlikely(!radeon_fence_signaled(fence))) { | 316 | } |
251 | /* we were interrupted for some reason and fence isn't | 317 | |
252 | * isn't signaled yet, resume wait | 318 | static int radeon_fence_wait_any_seq(struct radeon_device *rdev, |
253 | */ | 319 | u64 *target_seq, bool intr) |
254 | if (r) { | 320 | { |
255 | timeout = r; | 321 | unsigned long timeout, last_activity, tmp; |
256 | goto retry; | 322 | unsigned i, ring = RADEON_NUM_RINGS; |
323 | bool signaled; | ||
324 | int r; | ||
325 | |||
326 | for (i = 0, last_activity = 0; i < RADEON_NUM_RINGS; ++i) { | ||
327 | if (!target_seq[i]) { | ||
328 | continue; | ||
329 | } | ||
330 | |||
331 | /* use the most recent one as indicator */ | ||
332 | if (time_after(rdev->fence_drv[i].last_activity, last_activity)) { | ||
333 | last_activity = rdev->fence_drv[i].last_activity; | ||
257 | } | 334 | } |
258 | /* don't protect read access to rdev->fence_drv[t].last_seq | 335 | |
259 | * if we experiencing a lockup the value doesn't change | 336 | /* For lockup detection just pick the lowest ring we are |
337 | * actively waiting for | ||
260 | */ | 338 | */ |
261 | if (seq == rdev->fence_drv[fence->ring].last_seq && | 339 | if (i < ring) { |
262 | radeon_gpu_is_lockup(rdev, &rdev->ring[fence->ring])) { | 340 | ring = i; |
263 | /* good news we believe it's a lockup */ | 341 | } |
264 | printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n", | 342 | } |
265 | fence->seq, seq); | 343 | |
266 | /* FIXME: what should we do ? marking everyone | 344 | /* nothing to wait for ? */ |
267 | * as signaled for now | 345 | if (ring == RADEON_NUM_RINGS) { |
346 | return 0; | ||
347 | } | ||
348 | |||
349 | while (!radeon_fence_any_seq_signaled(rdev, target_seq)) { | ||
350 | timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT; | ||
351 | if (time_after(last_activity, timeout)) { | ||
352 | /* the normal case, timeout is somewhere before last_activity */ | ||
353 | timeout = last_activity - timeout; | ||
354 | } else { | ||
355 | /* either jiffies wrapped around, or no fence was signaled in the last 500ms | ||
356 | * anyway we will just wait for the minimum amount and then check for a lockup | ||
268 | */ | 357 | */ |
269 | rdev->gpu_lockup = true; | 358 | timeout = 1; |
270 | r = radeon_gpu_reset(rdev); | 359 | } |
271 | if (r) | 360 | |
272 | return r; | 361 | trace_radeon_fence_wait_begin(rdev->ddev, target_seq[ring]); |
273 | radeon_fence_write(rdev, fence->seq, fence->ring); | 362 | for (i = 0; i < RADEON_NUM_RINGS; ++i) { |
274 | rdev->gpu_lockup = false; | 363 | if (target_seq[i]) { |
364 | radeon_irq_kms_sw_irq_get(rdev, i); | ||
365 | } | ||
366 | } | ||
367 | if (intr) { | ||
368 | r = wait_event_interruptible_timeout(rdev->fence_queue, | ||
369 | (signaled = radeon_fence_any_seq_signaled(rdev, target_seq)), | ||
370 | timeout); | ||
371 | } else { | ||
372 | r = wait_event_timeout(rdev->fence_queue, | ||
373 | (signaled = radeon_fence_any_seq_signaled(rdev, target_seq)), | ||
374 | timeout); | ||
375 | } | ||
376 | for (i = 0; i < RADEON_NUM_RINGS; ++i) { | ||
377 | if (target_seq[i]) { | ||
378 | radeon_irq_kms_sw_irq_put(rdev, i); | ||
379 | } | ||
380 | } | ||
381 | if (unlikely(r < 0)) { | ||
382 | return r; | ||
383 | } | ||
384 | trace_radeon_fence_wait_end(rdev->ddev, target_seq[ring]); | ||
385 | |||
386 | if (unlikely(!signaled)) { | ||
387 | /* we were interrupted for some reason and fence | ||
388 | * isn't signaled yet, resume waiting */ | ||
389 | if (r) { | ||
390 | continue; | ||
391 | } | ||
392 | |||
393 | mutex_lock(&rdev->ring_lock); | ||
394 | for (i = 0, tmp = 0; i < RADEON_NUM_RINGS; ++i) { | ||
395 | if (time_after(rdev->fence_drv[i].last_activity, tmp)) { | ||
396 | tmp = rdev->fence_drv[i].last_activity; | ||
397 | } | ||
398 | } | ||
399 | /* test if somebody else has already decided that this is a lockup */ | ||
400 | if (last_activity != tmp) { | ||
401 | last_activity = tmp; | ||
402 | mutex_unlock(&rdev->ring_lock); | ||
403 | continue; | ||
404 | } | ||
405 | |||
406 | if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) { | ||
407 | /* good news we believe it's a lockup */ | ||
408 | dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016llx)\n", | ||
409 | target_seq[ring]); | ||
410 | |||
411 | /* change last activity so nobody else think there is a lockup */ | ||
412 | for (i = 0; i < RADEON_NUM_RINGS; ++i) { | ||
413 | rdev->fence_drv[i].last_activity = jiffies; | ||
414 | } | ||
415 | |||
416 | /* mark the ring as not ready any more */ | ||
417 | rdev->ring[ring].ready = false; | ||
418 | mutex_unlock(&rdev->ring_lock); | ||
419 | return -EDEADLK; | ||
420 | } | ||
421 | mutex_unlock(&rdev->ring_lock); | ||
275 | } | 422 | } |
276 | timeout = RADEON_FENCE_JIFFIES_TIMEOUT; | ||
277 | write_lock_irqsave(&rdev->fence_lock, irq_flags); | ||
278 | rdev->fence_drv[fence->ring].last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT; | ||
279 | rdev->fence_drv[fence->ring].last_jiffies = jiffies; | ||
280 | write_unlock_irqrestore(&rdev->fence_lock, irq_flags); | ||
281 | goto retry; | ||
282 | } | 423 | } |
283 | return 0; | 424 | return 0; |
284 | } | 425 | } |
285 | 426 | ||
286 | int radeon_fence_wait_next(struct radeon_device *rdev, int ring) | 427 | int radeon_fence_wait_any(struct radeon_device *rdev, |
428 | struct radeon_fence **fences, | ||
429 | bool intr) | ||
287 | { | 430 | { |
288 | unsigned long irq_flags; | 431 | uint64_t seq[RADEON_NUM_RINGS]; |
289 | struct radeon_fence *fence; | 432 | unsigned i; |
290 | int r; | 433 | int r; |
291 | 434 | ||
292 | if (rdev->gpu_lockup) { | 435 | for (i = 0; i < RADEON_NUM_RINGS; ++i) { |
293 | return 0; | 436 | seq[i] = 0; |
437 | |||
438 | if (!fences[i]) { | ||
439 | continue; | ||
440 | } | ||
441 | |||
442 | if (fences[i]->seq == RADEON_FENCE_SIGNALED_SEQ) { | ||
443 | /* something was allready signaled */ | ||
444 | return 0; | ||
445 | } | ||
446 | |||
447 | if (fences[i]->seq < RADEON_FENCE_NOTEMITED_SEQ) { | ||
448 | seq[i] = fences[i]->seq; | ||
449 | } | ||
294 | } | 450 | } |
295 | write_lock_irqsave(&rdev->fence_lock, irq_flags); | 451 | |
296 | if (list_empty(&rdev->fence_drv[ring].emitted)) { | 452 | r = radeon_fence_wait_any_seq(rdev, seq, intr); |
297 | write_unlock_irqrestore(&rdev->fence_lock, irq_flags); | 453 | if (r) { |
298 | return 0; | 454 | return r; |
299 | } | 455 | } |
300 | fence = list_entry(rdev->fence_drv[ring].emitted.next, | 456 | return 0; |
301 | struct radeon_fence, list); | ||
302 | radeon_fence_ref(fence); | ||
303 | write_unlock_irqrestore(&rdev->fence_lock, irq_flags); | ||
304 | r = radeon_fence_wait(fence, false); | ||
305 | radeon_fence_unref(&fence); | ||
306 | return r; | ||
307 | } | 457 | } |
308 | 458 | ||
309 | int radeon_fence_wait_last(struct radeon_device *rdev, int ring) | 459 | int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) |
310 | { | 460 | { |
311 | unsigned long irq_flags; | 461 | uint64_t seq; |
312 | struct radeon_fence *fence; | 462 | |
313 | int r; | 463 | /* We are not protected by ring lock when reading current seq but |
314 | 464 | * it's ok as worst case is we return to early while we could have | |
315 | if (rdev->gpu_lockup) { | 465 | * wait. |
316 | return 0; | 466 | */ |
467 | seq = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL; | ||
468 | if (seq >= rdev->fence_drv[ring].seq) { | ||
469 | /* nothing to wait for, last_seq is | ||
470 | already the last emited fence */ | ||
471 | return -ENOENT; | ||
317 | } | 472 | } |
318 | write_lock_irqsave(&rdev->fence_lock, irq_flags); | 473 | return radeon_fence_wait_seq(rdev, seq, ring, false, false); |
319 | if (list_empty(&rdev->fence_drv[ring].emitted)) { | 474 | } |
320 | write_unlock_irqrestore(&rdev->fence_lock, irq_flags); | 475 | |
321 | return 0; | 476 | int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring) |
322 | } | 477 | { |
323 | fence = list_entry(rdev->fence_drv[ring].emitted.prev, | 478 | /* We are not protected by ring lock when reading current seq |
324 | struct radeon_fence, list); | 479 | * but it's ok as wait empty is call from place where no more |
325 | radeon_fence_ref(fence); | 480 | * activity can be scheduled so there won't be concurrent access |
326 | write_unlock_irqrestore(&rdev->fence_lock, irq_flags); | 481 | * to seq value. |
327 | r = radeon_fence_wait(fence, false); | 482 | */ |
328 | radeon_fence_unref(&fence); | 483 | return radeon_fence_wait_seq(rdev, rdev->fence_drv[ring].seq, |
329 | return r; | 484 | ring, false, false); |
330 | } | 485 | } |
331 | 486 | ||
332 | struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence) | 487 | struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence) |
@@ -345,49 +500,27 @@ void radeon_fence_unref(struct radeon_fence **fence) | |||
345 | } | 500 | } |
346 | } | 501 | } |
347 | 502 | ||
348 | void radeon_fence_process(struct radeon_device *rdev, int ring) | 503 | unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring) |
349 | { | ||
350 | unsigned long irq_flags; | ||
351 | bool wake; | ||
352 | |||
353 | write_lock_irqsave(&rdev->fence_lock, irq_flags); | ||
354 | wake = radeon_fence_poll_locked(rdev, ring); | ||
355 | write_unlock_irqrestore(&rdev->fence_lock, irq_flags); | ||
356 | if (wake) { | ||
357 | wake_up_all(&rdev->fence_drv[ring].queue); | ||
358 | } | ||
359 | } | ||
360 | |||
361 | int radeon_fence_count_emitted(struct radeon_device *rdev, int ring) | ||
362 | { | 504 | { |
363 | unsigned long irq_flags; | 505 | uint64_t emitted; |
364 | int not_processed = 0; | 506 | |
365 | 507 | /* We are not protected by ring lock when reading the last sequence | |
366 | read_lock_irqsave(&rdev->fence_lock, irq_flags); | 508 | * but it's ok to report slightly wrong fence count here. |
367 | if (!rdev->fence_drv[ring].initialized) { | 509 | */ |
368 | read_unlock_irqrestore(&rdev->fence_lock, irq_flags); | 510 | radeon_fence_process(rdev, ring); |
369 | return 0; | 511 | emitted = rdev->fence_drv[ring].seq - atomic64_read(&rdev->fence_drv[ring].last_seq); |
512 | /* to avoid 32bits warp around */ | ||
513 | if (emitted > 0x10000000) { | ||
514 | emitted = 0x10000000; | ||
370 | } | 515 | } |
371 | 516 | return (unsigned)emitted; | |
372 | if (!list_empty(&rdev->fence_drv[ring].emitted)) { | ||
373 | struct list_head *ptr; | ||
374 | list_for_each(ptr, &rdev->fence_drv[ring].emitted) { | ||
375 | /* count up to 3, that's enought info */ | ||
376 | if (++not_processed >= 3) | ||
377 | break; | ||
378 | } | ||
379 | } | ||
380 | read_unlock_irqrestore(&rdev->fence_lock, irq_flags); | ||
381 | return not_processed; | ||
382 | } | 517 | } |
383 | 518 | ||
384 | int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) | 519 | int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) |
385 | { | 520 | { |
386 | unsigned long irq_flags; | ||
387 | uint64_t index; | 521 | uint64_t index; |
388 | int r; | 522 | int r; |
389 | 523 | ||
390 | write_lock_irqsave(&rdev->fence_lock, irq_flags); | ||
391 | radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); | 524 | radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); |
392 | if (rdev->wb.use_event) { | 525 | if (rdev->wb.use_event) { |
393 | rdev->fence_drv[ring].scratch_reg = 0; | 526 | rdev->fence_drv[ring].scratch_reg = 0; |
@@ -396,7 +529,6 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) | |||
396 | r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg); | 529 | r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg); |
397 | if (r) { | 530 | if (r) { |
398 | dev_err(rdev->dev, "fence failed to get scratch register\n"); | 531 | dev_err(rdev->dev, "fence failed to get scratch register\n"); |
399 | write_unlock_irqrestore(&rdev->fence_lock, irq_flags); | ||
400 | return r; | 532 | return r; |
401 | } | 533 | } |
402 | index = RADEON_WB_SCRATCH_OFFSET + | 534 | index = RADEON_WB_SCRATCH_OFFSET + |
@@ -405,11 +537,10 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) | |||
405 | } | 537 | } |
406 | rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; | 538 | rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; |
407 | rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index; | 539 | rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index; |
408 | radeon_fence_write(rdev, atomic_read(&rdev->fence_drv[ring].seq), ring); | 540 | radeon_fence_write(rdev, rdev->fence_drv[ring].seq, ring); |
409 | rdev->fence_drv[ring].initialized = true; | 541 | rdev->fence_drv[ring].initialized = true; |
410 | DRM_INFO("fence driver on ring %d use gpu addr 0x%08Lx and cpu addr 0x%p\n", | 542 | dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n", |
411 | ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr); | 543 | ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr); |
412 | write_unlock_irqrestore(&rdev->fence_lock, irq_flags); | ||
413 | return 0; | 544 | return 0; |
414 | } | 545 | } |
415 | 546 | ||
@@ -418,24 +549,20 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring) | |||
418 | rdev->fence_drv[ring].scratch_reg = -1; | 549 | rdev->fence_drv[ring].scratch_reg = -1; |
419 | rdev->fence_drv[ring].cpu_addr = NULL; | 550 | rdev->fence_drv[ring].cpu_addr = NULL; |
420 | rdev->fence_drv[ring].gpu_addr = 0; | 551 | rdev->fence_drv[ring].gpu_addr = 0; |
421 | atomic_set(&rdev->fence_drv[ring].seq, 0); | 552 | rdev->fence_drv[ring].seq = 0; |
422 | INIT_LIST_HEAD(&rdev->fence_drv[ring].created); | 553 | atomic64_set(&rdev->fence_drv[ring].last_seq, 0); |
423 | INIT_LIST_HEAD(&rdev->fence_drv[ring].emitted); | 554 | rdev->fence_drv[ring].last_activity = jiffies; |
424 | INIT_LIST_HEAD(&rdev->fence_drv[ring].signaled); | ||
425 | init_waitqueue_head(&rdev->fence_drv[ring].queue); | ||
426 | rdev->fence_drv[ring].initialized = false; | 555 | rdev->fence_drv[ring].initialized = false; |
427 | } | 556 | } |
428 | 557 | ||
429 | int radeon_fence_driver_init(struct radeon_device *rdev) | 558 | int radeon_fence_driver_init(struct radeon_device *rdev) |
430 | { | 559 | { |
431 | unsigned long irq_flags; | ||
432 | int ring; | 560 | int ring; |
433 | 561 | ||
434 | write_lock_irqsave(&rdev->fence_lock, irq_flags); | 562 | init_waitqueue_head(&rdev->fence_queue); |
435 | for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { | 563 | for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { |
436 | radeon_fence_driver_init_ring(rdev, ring); | 564 | radeon_fence_driver_init_ring(rdev, ring); |
437 | } | 565 | } |
438 | write_unlock_irqrestore(&rdev->fence_lock, irq_flags); | ||
439 | if (radeon_debugfs_fence_init(rdev)) { | 566 | if (radeon_debugfs_fence_init(rdev)) { |
440 | dev_err(rdev->dev, "fence debugfs file creation failed\n"); | 567 | dev_err(rdev->dev, "fence debugfs file creation failed\n"); |
441 | } | 568 | } |
@@ -444,19 +571,18 @@ int radeon_fence_driver_init(struct radeon_device *rdev) | |||
444 | 571 | ||
445 | void radeon_fence_driver_fini(struct radeon_device *rdev) | 572 | void radeon_fence_driver_fini(struct radeon_device *rdev) |
446 | { | 573 | { |
447 | unsigned long irq_flags; | ||
448 | int ring; | 574 | int ring; |
449 | 575 | ||
576 | mutex_lock(&rdev->ring_lock); | ||
450 | for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { | 577 | for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { |
451 | if (!rdev->fence_drv[ring].initialized) | 578 | if (!rdev->fence_drv[ring].initialized) |
452 | continue; | 579 | continue; |
453 | radeon_fence_wait_last(rdev, ring); | 580 | radeon_fence_wait_empty_locked(rdev, ring); |
454 | wake_up_all(&rdev->fence_drv[ring].queue); | 581 | wake_up_all(&rdev->fence_queue); |
455 | write_lock_irqsave(&rdev->fence_lock, irq_flags); | ||
456 | radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); | 582 | radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); |
457 | write_unlock_irqrestore(&rdev->fence_lock, irq_flags); | ||
458 | rdev->fence_drv[ring].initialized = false; | 583 | rdev->fence_drv[ring].initialized = false; |
459 | } | 584 | } |
585 | mutex_unlock(&rdev->ring_lock); | ||
460 | } | 586 | } |
461 | 587 | ||
462 | 588 | ||
@@ -469,7 +595,6 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data) | |||
469 | struct drm_info_node *node = (struct drm_info_node *)m->private; | 595 | struct drm_info_node *node = (struct drm_info_node *)m->private; |
470 | struct drm_device *dev = node->minor->dev; | 596 | struct drm_device *dev = node->minor->dev; |
471 | struct radeon_device *rdev = dev->dev_private; | 597 | struct radeon_device *rdev = dev->dev_private; |
472 | struct radeon_fence *fence; | ||
473 | int i; | 598 | int i; |
474 | 599 | ||
475 | for (i = 0; i < RADEON_NUM_RINGS; ++i) { | 600 | for (i = 0; i < RADEON_NUM_RINGS; ++i) { |
@@ -477,14 +602,10 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data) | |||
477 | continue; | 602 | continue; |
478 | 603 | ||
479 | seq_printf(m, "--- ring %d ---\n", i); | 604 | seq_printf(m, "--- ring %d ---\n", i); |
480 | seq_printf(m, "Last signaled fence 0x%08X\n", | 605 | seq_printf(m, "Last signaled fence 0x%016llx\n", |
481 | radeon_fence_read(rdev, i)); | 606 | (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq)); |
482 | if (!list_empty(&rdev->fence_drv[i].emitted)) { | 607 | seq_printf(m, "Last emitted 0x%016llx\n", |
483 | fence = list_entry(rdev->fence_drv[i].emitted.prev, | 608 | rdev->fence_drv[i].seq); |
484 | struct radeon_fence, list); | ||
485 | seq_printf(m, "Last emitted fence %p with 0x%08X\n", | ||
486 | fence, fence->seq); | ||
487 | } | ||
488 | } | 609 | } |
489 | return 0; | 610 | return 0; |
490 | } | 611 | } |