aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/radeon_fence.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/radeon/radeon_fence.c')
-rw-r--r--drivers/gpu/drm/radeon/radeon_fence.c621
1 files changed, 371 insertions, 250 deletions
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 4bd36a354fbe..11f5f402d22c 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -63,98 +63,82 @@ static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
63 63
64int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence) 64int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence)
65{ 65{
66 unsigned long irq_flags; 66 /* we are protected by the ring emission mutex */
67 67 if (fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) {
68 write_lock_irqsave(&rdev->fence_lock, irq_flags);
69 if (fence->emitted) {
70 write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
71 return 0; 68 return 0;
72 } 69 }
73 fence->seq = atomic_add_return(1, &rdev->fence_drv[fence->ring].seq); 70 fence->seq = ++rdev->fence_drv[fence->ring].seq;
74 if (!rdev->ring[fence->ring].ready) 71 radeon_fence_ring_emit(rdev, fence->ring, fence);
75 /* FIXME: cp is not running assume everythings is done right
76 * away
77 */
78 radeon_fence_write(rdev, fence->seq, fence->ring);
79 else
80 radeon_fence_ring_emit(rdev, fence->ring, fence);
81
82 trace_radeon_fence_emit(rdev->ddev, fence->seq); 72 trace_radeon_fence_emit(rdev->ddev, fence->seq);
83 fence->emitted = true;
84 list_move_tail(&fence->list, &rdev->fence_drv[fence->ring].emitted);
85 write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
86 return 0; 73 return 0;
87} 74}
88 75
89static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring) 76void radeon_fence_process(struct radeon_device *rdev, int ring)
90{ 77{
91 struct radeon_fence *fence; 78 uint64_t seq, last_seq;
92 struct list_head *i, *n; 79 unsigned count_loop = 0;
93 uint32_t seq;
94 bool wake = false; 80 bool wake = false;
95 unsigned long cjiffies;
96 81
97 seq = radeon_fence_read(rdev, ring); 82 /* Note there is a scenario here for an infinite loop but it's
98 if (seq != rdev->fence_drv[ring].last_seq) { 83 * very unlikely to happen. For it to happen, the current polling
99 rdev->fence_drv[ring].last_seq = seq; 84 * process need to be interrupted by another process and another
100 rdev->fence_drv[ring].last_jiffies = jiffies; 85 * process needs to update the last_seq btw the atomic read and
101 rdev->fence_drv[ring].last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT; 86 * xchg of the current process.
102 } else { 87 *
103 cjiffies = jiffies; 88 * More over for this to go in infinite loop there need to be
104 if (time_after(cjiffies, rdev->fence_drv[ring].last_jiffies)) { 89 * continuously new fence signaled ie radeon_fence_read needs
105 cjiffies -= rdev->fence_drv[ring].last_jiffies; 90 * to return a different value each time for both the currently
106 if (time_after(rdev->fence_drv[ring].last_timeout, cjiffies)) { 91 * polling process and the other process that xchg the last_seq
107 /* update the timeout */ 92 * btw atomic read and xchg of the current process. And the
108 rdev->fence_drv[ring].last_timeout -= cjiffies; 93 * value the other process set as last seq must be higher than
109 } else { 94 * the seq value we just read. Which means that current process
110 /* the 500ms timeout is elapsed we should test 95 * need to be interrupted after radeon_fence_read and before
111 * for GPU lockup 96 * atomic xchg.
112 */ 97 *
113 rdev->fence_drv[ring].last_timeout = 1; 98 * To be even more safe we count the number of time we loop and
114 } 99 * we bail after 10 loop just accepting the fact that we might
115 } else { 100 * have temporarly set the last_seq not to the true real last
116 /* wrap around update last jiffies, we will just wait 101 * seq but to an older one.
117 * a little longer 102 */
118 */ 103 last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
119 rdev->fence_drv[ring].last_jiffies = cjiffies; 104 do {
105 seq = radeon_fence_read(rdev, ring);
106 seq |= last_seq & 0xffffffff00000000LL;
107 if (seq < last_seq) {
108 seq += 0x100000000LL;
120 } 109 }
121 return false; 110
122 } 111 if (seq == last_seq) {
123 n = NULL;
124 list_for_each(i, &rdev->fence_drv[ring].emitted) {
125 fence = list_entry(i, struct radeon_fence, list);
126 if (fence->seq == seq) {
127 n = i;
128 break; 112 break;
129 } 113 }
130 } 114 /* If we loop over we don't want to return without
131 /* all fence previous to this one are considered as signaled */ 115 * checking if a fence is signaled as it means that the
132 if (n) { 116 * seq we just read is different from the previous on.
133 i = n; 117 */
134 do {
135 n = i->prev;
136 list_move_tail(i, &rdev->fence_drv[ring].signaled);
137 fence = list_entry(i, struct radeon_fence, list);
138 fence->signaled = true;
139 i = n;
140 } while (i != &rdev->fence_drv[ring].emitted);
141 wake = true; 118 wake = true;
119 last_seq = seq;
120 if ((count_loop++) > 10) {
121 /* We looped over too many time leave with the
122 * fact that we might have set an older fence
123 * seq then the current real last seq as signaled
124 * by the hw.
125 */
126 break;
127 }
128 } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
129
130 if (wake) {
131 rdev->fence_drv[ring].last_activity = jiffies;
132 wake_up_all(&rdev->fence_queue);
142 } 133 }
143 return wake;
144} 134}
145 135
146static void radeon_fence_destroy(struct kref *kref) 136static void radeon_fence_destroy(struct kref *kref)
147{ 137{
148 unsigned long irq_flags; 138 struct radeon_fence *fence;
149 struct radeon_fence *fence;
150 139
151 fence = container_of(kref, struct radeon_fence, kref); 140 fence = container_of(kref, struct radeon_fence, kref);
152 write_lock_irqsave(&fence->rdev->fence_lock, irq_flags); 141 fence->seq = RADEON_FENCE_NOTEMITED_SEQ;
153 list_del(&fence->list);
154 fence->emitted = false;
155 write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags);
156 if (fence->semaphore)
157 radeon_semaphore_free(fence->rdev, fence->semaphore);
158 kfree(fence); 142 kfree(fence);
159} 143}
160 144
@@ -162,171 +146,342 @@ int radeon_fence_create(struct radeon_device *rdev,
162 struct radeon_fence **fence, 146 struct radeon_fence **fence,
163 int ring) 147 int ring)
164{ 148{
165 unsigned long irq_flags;
166
167 *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL); 149 *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
168 if ((*fence) == NULL) { 150 if ((*fence) == NULL) {
169 return -ENOMEM; 151 return -ENOMEM;
170 } 152 }
171 kref_init(&((*fence)->kref)); 153 kref_init(&((*fence)->kref));
172 (*fence)->rdev = rdev; 154 (*fence)->rdev = rdev;
173 (*fence)->emitted = false; 155 (*fence)->seq = RADEON_FENCE_NOTEMITED_SEQ;
174 (*fence)->signaled = false;
175 (*fence)->seq = 0;
176 (*fence)->ring = ring; 156 (*fence)->ring = ring;
177 (*fence)->semaphore = NULL;
178 INIT_LIST_HEAD(&(*fence)->list);
179
180 write_lock_irqsave(&rdev->fence_lock, irq_flags);
181 list_add_tail(&(*fence)->list, &rdev->fence_drv[ring].created);
182 write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
183 return 0; 157 return 0;
184} 158}
185 159
186bool radeon_fence_signaled(struct radeon_fence *fence) 160static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
161 u64 seq, unsigned ring)
187{ 162{
188 unsigned long irq_flags; 163 if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
189 bool signaled = false;
190
191 if (!fence)
192 return true; 164 return true;
193 165 }
194 if (fence->rdev->gpu_lockup) 166 /* poll new last sequence at least once */
167 radeon_fence_process(rdev, ring);
168 if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
195 return true; 169 return true;
170 }
171 return false;
172}
196 173
197 write_lock_irqsave(&fence->rdev->fence_lock, irq_flags); 174bool radeon_fence_signaled(struct radeon_fence *fence)
198 signaled = fence->signaled; 175{
199 /* if we are shuting down report all fence as signaled */ 176 if (!fence) {
200 if (fence->rdev->shutdown) { 177 return true;
201 signaled = true;
202 } 178 }
203 if (!fence->emitted) { 179 if (fence->seq == RADEON_FENCE_NOTEMITED_SEQ) {
204 WARN(1, "Querying an unemitted fence : %p !\n", fence); 180 WARN(1, "Querying an unemitted fence : %p !\n", fence);
205 signaled = true; 181 return true;
206 } 182 }
207 if (!signaled) { 183 if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) {
208 radeon_fence_poll_locked(fence->rdev, fence->ring); 184 return true;
209 signaled = fence->signaled; 185 }
186 if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
187 fence->seq = RADEON_FENCE_SIGNALED_SEQ;
188 return true;
210 } 189 }
211 write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags); 190 return false;
212 return signaled; 191}
192
193static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 target_seq,
194 unsigned ring, bool intr, bool lock_ring)
195{
196 unsigned long timeout, last_activity;
197 uint64_t seq;
198 unsigned i;
199 bool signaled;
200 int r;
201
202 while (target_seq > atomic64_read(&rdev->fence_drv[ring].last_seq)) {
203 if (!rdev->ring[ring].ready) {
204 return -EBUSY;
205 }
206
207 timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT;
208 if (time_after(rdev->fence_drv[ring].last_activity, timeout)) {
209 /* the normal case, timeout is somewhere before last_activity */
210 timeout = rdev->fence_drv[ring].last_activity - timeout;
211 } else {
212 /* either jiffies wrapped around, or no fence was signaled in the last 500ms
213 * anyway we will just wait for the minimum amount and then check for a lockup
214 */
215 timeout = 1;
216 }
217 seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
218 /* Save current last activity valuee, used to check for GPU lockups */
219 last_activity = rdev->fence_drv[ring].last_activity;
220
221 trace_radeon_fence_wait_begin(rdev->ddev, seq);
222 radeon_irq_kms_sw_irq_get(rdev, ring);
223 if (intr) {
224 r = wait_event_interruptible_timeout(rdev->fence_queue,
225 (signaled = radeon_fence_seq_signaled(rdev, target_seq, ring)),
226 timeout);
227 } else {
228 r = wait_event_timeout(rdev->fence_queue,
229 (signaled = radeon_fence_seq_signaled(rdev, target_seq, ring)),
230 timeout);
231 }
232 radeon_irq_kms_sw_irq_put(rdev, ring);
233 if (unlikely(r < 0)) {
234 return r;
235 }
236 trace_radeon_fence_wait_end(rdev->ddev, seq);
237
238 if (unlikely(!signaled)) {
239 /* we were interrupted for some reason and fence
240 * isn't signaled yet, resume waiting */
241 if (r) {
242 continue;
243 }
244
245 /* check if sequence value has changed since last_activity */
246 if (seq != atomic64_read(&rdev->fence_drv[ring].last_seq)) {
247 continue;
248 }
249
250 if (lock_ring) {
251 mutex_lock(&rdev->ring_lock);
252 }
253
254 /* test if somebody else has already decided that this is a lockup */
255 if (last_activity != rdev->fence_drv[ring].last_activity) {
256 if (lock_ring) {
257 mutex_unlock(&rdev->ring_lock);
258 }
259 continue;
260 }
261
262 if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
263 /* good news we believe it's a lockup */
264 dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016llx last fence id 0x%016llx)\n",
265 target_seq, seq);
266
267 /* change last activity so nobody else think there is a lockup */
268 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
269 rdev->fence_drv[i].last_activity = jiffies;
270 }
271
272 /* mark the ring as not ready any more */
273 rdev->ring[ring].ready = false;
274 if (lock_ring) {
275 mutex_unlock(&rdev->ring_lock);
276 }
277 return -EDEADLK;
278 }
279
280 if (lock_ring) {
281 mutex_unlock(&rdev->ring_lock);
282 }
283 }
284 }
285 return 0;
213} 286}
214 287
215int radeon_fence_wait(struct radeon_fence *fence, bool intr) 288int radeon_fence_wait(struct radeon_fence *fence, bool intr)
216{ 289{
217 struct radeon_device *rdev;
218 unsigned long irq_flags, timeout;
219 u32 seq;
220 int r; 290 int r;
221 291
222 if (fence == NULL) { 292 if (fence == NULL) {
223 WARN(1, "Querying an invalid fence : %p !\n", fence); 293 WARN(1, "Querying an invalid fence : %p !\n", fence);
224 return 0; 294 return -EINVAL;
225 } 295 }
226 rdev = fence->rdev; 296
227 if (radeon_fence_signaled(fence)) { 297 r = radeon_fence_wait_seq(fence->rdev, fence->seq,
228 return 0; 298 fence->ring, intr, true);
299 if (r) {
300 return r;
229 } 301 }
230 timeout = rdev->fence_drv[fence->ring].last_timeout; 302 fence->seq = RADEON_FENCE_SIGNALED_SEQ;
231retry: 303 return 0;
232 /* save current sequence used to check for GPU lockup */ 304}
233 seq = rdev->fence_drv[fence->ring].last_seq; 305
234 trace_radeon_fence_wait_begin(rdev->ddev, seq); 306bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
235 if (intr) { 307{
236 radeon_irq_kms_sw_irq_get(rdev, fence->ring); 308 unsigned i;
237 r = wait_event_interruptible_timeout(rdev->fence_drv[fence->ring].queue, 309
238 radeon_fence_signaled(fence), timeout); 310 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
239 radeon_irq_kms_sw_irq_put(rdev, fence->ring); 311 if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i)) {
240 if (unlikely(r < 0)) { 312 return true;
241 return r;
242 } 313 }
243 } else {
244 radeon_irq_kms_sw_irq_get(rdev, fence->ring);
245 r = wait_event_timeout(rdev->fence_drv[fence->ring].queue,
246 radeon_fence_signaled(fence), timeout);
247 radeon_irq_kms_sw_irq_put(rdev, fence->ring);
248 } 314 }
249 trace_radeon_fence_wait_end(rdev->ddev, seq); 315 return false;
250 if (unlikely(!radeon_fence_signaled(fence))) { 316}
251 /* we were interrupted for some reason and fence isn't 317
252 * isn't signaled yet, resume wait 318static int radeon_fence_wait_any_seq(struct radeon_device *rdev,
253 */ 319 u64 *target_seq, bool intr)
254 if (r) { 320{
255 timeout = r; 321 unsigned long timeout, last_activity, tmp;
256 goto retry; 322 unsigned i, ring = RADEON_NUM_RINGS;
323 bool signaled;
324 int r;
325
326 for (i = 0, last_activity = 0; i < RADEON_NUM_RINGS; ++i) {
327 if (!target_seq[i]) {
328 continue;
329 }
330
331 /* use the most recent one as indicator */
332 if (time_after(rdev->fence_drv[i].last_activity, last_activity)) {
333 last_activity = rdev->fence_drv[i].last_activity;
257 } 334 }
258 /* don't protect read access to rdev->fence_drv[t].last_seq 335
259 * if we experiencing a lockup the value doesn't change 336 /* For lockup detection just pick the lowest ring we are
337 * actively waiting for
260 */ 338 */
261 if (seq == rdev->fence_drv[fence->ring].last_seq && 339 if (i < ring) {
262 radeon_gpu_is_lockup(rdev, &rdev->ring[fence->ring])) { 340 ring = i;
263 /* good news we believe it's a lockup */ 341 }
264 printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n", 342 }
265 fence->seq, seq); 343
266 /* FIXME: what should we do ? marking everyone 344 /* nothing to wait for ? */
267 * as signaled for now 345 if (ring == RADEON_NUM_RINGS) {
346 return 0;
347 }
348
349 while (!radeon_fence_any_seq_signaled(rdev, target_seq)) {
350 timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT;
351 if (time_after(last_activity, timeout)) {
352 /* the normal case, timeout is somewhere before last_activity */
353 timeout = last_activity - timeout;
354 } else {
355 /* either jiffies wrapped around, or no fence was signaled in the last 500ms
356 * anyway we will just wait for the minimum amount and then check for a lockup
268 */ 357 */
269 rdev->gpu_lockup = true; 358 timeout = 1;
270 r = radeon_gpu_reset(rdev); 359 }
271 if (r) 360
272 return r; 361 trace_radeon_fence_wait_begin(rdev->ddev, target_seq[ring]);
273 radeon_fence_write(rdev, fence->seq, fence->ring); 362 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
274 rdev->gpu_lockup = false; 363 if (target_seq[i]) {
364 radeon_irq_kms_sw_irq_get(rdev, i);
365 }
366 }
367 if (intr) {
368 r = wait_event_interruptible_timeout(rdev->fence_queue,
369 (signaled = radeon_fence_any_seq_signaled(rdev, target_seq)),
370 timeout);
371 } else {
372 r = wait_event_timeout(rdev->fence_queue,
373 (signaled = radeon_fence_any_seq_signaled(rdev, target_seq)),
374 timeout);
375 }
376 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
377 if (target_seq[i]) {
378 radeon_irq_kms_sw_irq_put(rdev, i);
379 }
380 }
381 if (unlikely(r < 0)) {
382 return r;
383 }
384 trace_radeon_fence_wait_end(rdev->ddev, target_seq[ring]);
385
386 if (unlikely(!signaled)) {
387 /* we were interrupted for some reason and fence
388 * isn't signaled yet, resume waiting */
389 if (r) {
390 continue;
391 }
392
393 mutex_lock(&rdev->ring_lock);
394 for (i = 0, tmp = 0; i < RADEON_NUM_RINGS; ++i) {
395 if (time_after(rdev->fence_drv[i].last_activity, tmp)) {
396 tmp = rdev->fence_drv[i].last_activity;
397 }
398 }
399 /* test if somebody else has already decided that this is a lockup */
400 if (last_activity != tmp) {
401 last_activity = tmp;
402 mutex_unlock(&rdev->ring_lock);
403 continue;
404 }
405
406 if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
407 /* good news we believe it's a lockup */
408 dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016llx)\n",
409 target_seq[ring]);
410
411 /* change last activity so nobody else think there is a lockup */
412 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
413 rdev->fence_drv[i].last_activity = jiffies;
414 }
415
416 /* mark the ring as not ready any more */
417 rdev->ring[ring].ready = false;
418 mutex_unlock(&rdev->ring_lock);
419 return -EDEADLK;
420 }
421 mutex_unlock(&rdev->ring_lock);
275 } 422 }
276 timeout = RADEON_FENCE_JIFFIES_TIMEOUT;
277 write_lock_irqsave(&rdev->fence_lock, irq_flags);
278 rdev->fence_drv[fence->ring].last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT;
279 rdev->fence_drv[fence->ring].last_jiffies = jiffies;
280 write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
281 goto retry;
282 } 423 }
283 return 0; 424 return 0;
284} 425}
285 426
286int radeon_fence_wait_next(struct radeon_device *rdev, int ring) 427int radeon_fence_wait_any(struct radeon_device *rdev,
428 struct radeon_fence **fences,
429 bool intr)
287{ 430{
288 unsigned long irq_flags; 431 uint64_t seq[RADEON_NUM_RINGS];
289 struct radeon_fence *fence; 432 unsigned i;
290 int r; 433 int r;
291 434
292 if (rdev->gpu_lockup) { 435 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
293 return 0; 436 seq[i] = 0;
437
438 if (!fences[i]) {
439 continue;
440 }
441
442 if (fences[i]->seq == RADEON_FENCE_SIGNALED_SEQ) {
443 /* something was allready signaled */
444 return 0;
445 }
446
447 if (fences[i]->seq < RADEON_FENCE_NOTEMITED_SEQ) {
448 seq[i] = fences[i]->seq;
449 }
294 } 450 }
295 write_lock_irqsave(&rdev->fence_lock, irq_flags); 451
296 if (list_empty(&rdev->fence_drv[ring].emitted)) { 452 r = radeon_fence_wait_any_seq(rdev, seq, intr);
297 write_unlock_irqrestore(&rdev->fence_lock, irq_flags); 453 if (r) {
298 return 0; 454 return r;
299 } 455 }
300 fence = list_entry(rdev->fence_drv[ring].emitted.next, 456 return 0;
301 struct radeon_fence, list);
302 radeon_fence_ref(fence);
303 write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
304 r = radeon_fence_wait(fence, false);
305 radeon_fence_unref(&fence);
306 return r;
307} 457}
308 458
309int radeon_fence_wait_last(struct radeon_device *rdev, int ring) 459int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring)
310{ 460{
311 unsigned long irq_flags; 461 uint64_t seq;
312 struct radeon_fence *fence; 462
313 int r; 463 /* We are not protected by ring lock when reading current seq but
314 464 * it's ok as worst case is we return to early while we could have
315 if (rdev->gpu_lockup) { 465 * wait.
316 return 0; 466 */
467 seq = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
468 if (seq >= rdev->fence_drv[ring].seq) {
469 /* nothing to wait for, last_seq is
470 already the last emited fence */
471 return -ENOENT;
317 } 472 }
318 write_lock_irqsave(&rdev->fence_lock, irq_flags); 473 return radeon_fence_wait_seq(rdev, seq, ring, false, false);
319 if (list_empty(&rdev->fence_drv[ring].emitted)) { 474}
320 write_unlock_irqrestore(&rdev->fence_lock, irq_flags); 475
321 return 0; 476int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
322 } 477{
323 fence = list_entry(rdev->fence_drv[ring].emitted.prev, 478 /* We are not protected by ring lock when reading current seq
324 struct radeon_fence, list); 479 * but it's ok as wait empty is call from place where no more
325 radeon_fence_ref(fence); 480 * activity can be scheduled so there won't be concurrent access
326 write_unlock_irqrestore(&rdev->fence_lock, irq_flags); 481 * to seq value.
327 r = radeon_fence_wait(fence, false); 482 */
328 radeon_fence_unref(&fence); 483 return radeon_fence_wait_seq(rdev, rdev->fence_drv[ring].seq,
329 return r; 484 ring, false, false);
330} 485}
331 486
332struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence) 487struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
@@ -345,49 +500,27 @@ void radeon_fence_unref(struct radeon_fence **fence)
345 } 500 }
346} 501}
347 502
348void radeon_fence_process(struct radeon_device *rdev, int ring) 503unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
349{
350 unsigned long irq_flags;
351 bool wake;
352
353 write_lock_irqsave(&rdev->fence_lock, irq_flags);
354 wake = radeon_fence_poll_locked(rdev, ring);
355 write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
356 if (wake) {
357 wake_up_all(&rdev->fence_drv[ring].queue);
358 }
359}
360
361int radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
362{ 504{
363 unsigned long irq_flags; 505 uint64_t emitted;
364 int not_processed = 0; 506
365 507 /* We are not protected by ring lock when reading the last sequence
366 read_lock_irqsave(&rdev->fence_lock, irq_flags); 508 * but it's ok to report slightly wrong fence count here.
367 if (!rdev->fence_drv[ring].initialized) { 509 */
368 read_unlock_irqrestore(&rdev->fence_lock, irq_flags); 510 radeon_fence_process(rdev, ring);
369 return 0; 511 emitted = rdev->fence_drv[ring].seq - atomic64_read(&rdev->fence_drv[ring].last_seq);
512 /* to avoid 32bits warp around */
513 if (emitted > 0x10000000) {
514 emitted = 0x10000000;
370 } 515 }
371 516 return (unsigned)emitted;
372 if (!list_empty(&rdev->fence_drv[ring].emitted)) {
373 struct list_head *ptr;
374 list_for_each(ptr, &rdev->fence_drv[ring].emitted) {
375 /* count up to 3, that's enought info */
376 if (++not_processed >= 3)
377 break;
378 }
379 }
380 read_unlock_irqrestore(&rdev->fence_lock, irq_flags);
381 return not_processed;
382} 517}
383 518
384int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) 519int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
385{ 520{
386 unsigned long irq_flags;
387 uint64_t index; 521 uint64_t index;
388 int r; 522 int r;
389 523
390 write_lock_irqsave(&rdev->fence_lock, irq_flags);
391 radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); 524 radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
392 if (rdev->wb.use_event) { 525 if (rdev->wb.use_event) {
393 rdev->fence_drv[ring].scratch_reg = 0; 526 rdev->fence_drv[ring].scratch_reg = 0;
@@ -396,7 +529,6 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
396 r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg); 529 r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
397 if (r) { 530 if (r) {
398 dev_err(rdev->dev, "fence failed to get scratch register\n"); 531 dev_err(rdev->dev, "fence failed to get scratch register\n");
399 write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
400 return r; 532 return r;
401 } 533 }
402 index = RADEON_WB_SCRATCH_OFFSET + 534 index = RADEON_WB_SCRATCH_OFFSET +
@@ -405,11 +537,10 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
405 } 537 }
406 rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; 538 rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
407 rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index; 539 rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
408 radeon_fence_write(rdev, atomic_read(&rdev->fence_drv[ring].seq), ring); 540 radeon_fence_write(rdev, rdev->fence_drv[ring].seq, ring);
409 rdev->fence_drv[ring].initialized = true; 541 rdev->fence_drv[ring].initialized = true;
410 DRM_INFO("fence driver on ring %d use gpu addr 0x%08Lx and cpu addr 0x%p\n", 542 dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n",
411 ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr); 543 ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
412 write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
413 return 0; 544 return 0;
414} 545}
415 546
@@ -418,24 +549,20 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
418 rdev->fence_drv[ring].scratch_reg = -1; 549 rdev->fence_drv[ring].scratch_reg = -1;
419 rdev->fence_drv[ring].cpu_addr = NULL; 550 rdev->fence_drv[ring].cpu_addr = NULL;
420 rdev->fence_drv[ring].gpu_addr = 0; 551 rdev->fence_drv[ring].gpu_addr = 0;
421 atomic_set(&rdev->fence_drv[ring].seq, 0); 552 rdev->fence_drv[ring].seq = 0;
422 INIT_LIST_HEAD(&rdev->fence_drv[ring].created); 553 atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
423 INIT_LIST_HEAD(&rdev->fence_drv[ring].emitted); 554 rdev->fence_drv[ring].last_activity = jiffies;
424 INIT_LIST_HEAD(&rdev->fence_drv[ring].signaled);
425 init_waitqueue_head(&rdev->fence_drv[ring].queue);
426 rdev->fence_drv[ring].initialized = false; 555 rdev->fence_drv[ring].initialized = false;
427} 556}
428 557
429int radeon_fence_driver_init(struct radeon_device *rdev) 558int radeon_fence_driver_init(struct radeon_device *rdev)
430{ 559{
431 unsigned long irq_flags;
432 int ring; 560 int ring;
433 561
434 write_lock_irqsave(&rdev->fence_lock, irq_flags); 562 init_waitqueue_head(&rdev->fence_queue);
435 for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { 563 for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
436 radeon_fence_driver_init_ring(rdev, ring); 564 radeon_fence_driver_init_ring(rdev, ring);
437 } 565 }
438 write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
439 if (radeon_debugfs_fence_init(rdev)) { 566 if (radeon_debugfs_fence_init(rdev)) {
440 dev_err(rdev->dev, "fence debugfs file creation failed\n"); 567 dev_err(rdev->dev, "fence debugfs file creation failed\n");
441 } 568 }
@@ -444,19 +571,18 @@ int radeon_fence_driver_init(struct radeon_device *rdev)
444 571
445void radeon_fence_driver_fini(struct radeon_device *rdev) 572void radeon_fence_driver_fini(struct radeon_device *rdev)
446{ 573{
447 unsigned long irq_flags;
448 int ring; 574 int ring;
449 575
576 mutex_lock(&rdev->ring_lock);
450 for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { 577 for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
451 if (!rdev->fence_drv[ring].initialized) 578 if (!rdev->fence_drv[ring].initialized)
452 continue; 579 continue;
453 radeon_fence_wait_last(rdev, ring); 580 radeon_fence_wait_empty_locked(rdev, ring);
454 wake_up_all(&rdev->fence_drv[ring].queue); 581 wake_up_all(&rdev->fence_queue);
455 write_lock_irqsave(&rdev->fence_lock, irq_flags);
456 radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); 582 radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
457 write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
458 rdev->fence_drv[ring].initialized = false; 583 rdev->fence_drv[ring].initialized = false;
459 } 584 }
585 mutex_unlock(&rdev->ring_lock);
460} 586}
461 587
462 588
@@ -469,7 +595,6 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
469 struct drm_info_node *node = (struct drm_info_node *)m->private; 595 struct drm_info_node *node = (struct drm_info_node *)m->private;
470 struct drm_device *dev = node->minor->dev; 596 struct drm_device *dev = node->minor->dev;
471 struct radeon_device *rdev = dev->dev_private; 597 struct radeon_device *rdev = dev->dev_private;
472 struct radeon_fence *fence;
473 int i; 598 int i;
474 599
475 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 600 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
@@ -477,14 +602,10 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
477 continue; 602 continue;
478 603
479 seq_printf(m, "--- ring %d ---\n", i); 604 seq_printf(m, "--- ring %d ---\n", i);
480 seq_printf(m, "Last signaled fence 0x%08X\n", 605 seq_printf(m, "Last signaled fence 0x%016llx\n",
481 radeon_fence_read(rdev, i)); 606 (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq));
482 if (!list_empty(&rdev->fence_drv[i].emitted)) { 607 seq_printf(m, "Last emitted 0x%016llx\n",
483 fence = list_entry(rdev->fence_drv[i].emitted.prev, 608 rdev->fence_drv[i].seq);
484 struct radeon_fence, list);
485 seq_printf(m, "Last emitted fence %p with 0x%08X\n",
486 fence, fence->seq);
487 }
488 } 609 }
489 return 0; 610 return 0;
490} 611}