diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 563 |
1 files changed, 169 insertions, 394 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index a7189a1fa6a1..98500f1756f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | |||
@@ -126,7 +126,8 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner, | |||
126 | (*fence)->ring = ring; | 126 | (*fence)->ring = ring; |
127 | (*fence)->owner = owner; | 127 | (*fence)->owner = owner; |
128 | fence_init(&(*fence)->base, &amdgpu_fence_ops, | 128 | fence_init(&(*fence)->base, &amdgpu_fence_ops, |
129 | &adev->fence_queue.lock, adev->fence_context + ring->idx, | 129 | &ring->fence_drv.fence_queue.lock, |
130 | adev->fence_context + ring->idx, | ||
130 | (*fence)->seq); | 131 | (*fence)->seq); |
131 | amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, | 132 | amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, |
132 | (*fence)->seq, | 133 | (*fence)->seq, |
@@ -136,38 +137,6 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner, | |||
136 | } | 137 | } |
137 | 138 | ||
138 | /** | 139 | /** |
139 | * amdgpu_fence_recreate - recreate a fence from an user fence | ||
140 | * | ||
141 | * @ring: ring the fence is associated with | ||
142 | * @owner: creator of the fence | ||
143 | * @seq: user fence sequence number | ||
144 | * @fence: resulting amdgpu fence object | ||
145 | * | ||
146 | * Recreates a fence command from the user fence sequence number (all asics). | ||
147 | * Returns 0 on success, -ENOMEM on failure. | ||
148 | */ | ||
149 | int amdgpu_fence_recreate(struct amdgpu_ring *ring, void *owner, | ||
150 | uint64_t seq, struct amdgpu_fence **fence) | ||
151 | { | ||
152 | struct amdgpu_device *adev = ring->adev; | ||
153 | |||
154 | if (seq > ring->fence_drv.sync_seq[ring->idx]) | ||
155 | return -EINVAL; | ||
156 | |||
157 | *fence = kmalloc(sizeof(struct amdgpu_fence), GFP_KERNEL); | ||
158 | if ((*fence) == NULL) | ||
159 | return -ENOMEM; | ||
160 | |||
161 | (*fence)->seq = seq; | ||
162 | (*fence)->ring = ring; | ||
163 | (*fence)->owner = owner; | ||
164 | fence_init(&(*fence)->base, &amdgpu_fence_ops, | ||
165 | &adev->fence_queue.lock, adev->fence_context + ring->idx, | ||
166 | (*fence)->seq); | ||
167 | return 0; | ||
168 | } | ||
169 | |||
170 | /** | ||
171 | * amdgpu_fence_check_signaled - callback from fence_queue | 140 | * amdgpu_fence_check_signaled - callback from fence_queue |
172 | * | 141 | * |
173 | * this function is called with fence_queue lock held, which is also used | 142 | * this function is called with fence_queue lock held, which is also used |
@@ -196,9 +165,7 @@ static int amdgpu_fence_check_signaled(wait_queue_t *wait, unsigned mode, int fl | |||
196 | else | 165 | else |
197 | FENCE_TRACE(&fence->base, "was already signaled\n"); | 166 | FENCE_TRACE(&fence->base, "was already signaled\n"); |
198 | 167 | ||
199 | amdgpu_irq_put(adev, fence->ring->fence_drv.irq_src, | 168 | __remove_wait_queue(&fence->ring->fence_drv.fence_queue, &fence->fence_wake); |
200 | fence->ring->fence_drv.irq_type); | ||
201 | __remove_wait_queue(&adev->fence_queue, &fence->fence_wake); | ||
202 | fence_put(&fence->base); | 169 | fence_put(&fence->base); |
203 | } else | 170 | } else |
204 | FENCE_TRACE(&fence->base, "pending\n"); | 171 | FENCE_TRACE(&fence->base, "pending\n"); |
@@ -299,14 +266,9 @@ static void amdgpu_fence_check_lockup(struct work_struct *work) | |||
299 | return; | 266 | return; |
300 | } | 267 | } |
301 | 268 | ||
302 | if (fence_drv->delayed_irq && ring->adev->ddev->irq_enabled) { | 269 | if (amdgpu_fence_activity(ring)) { |
303 | fence_drv->delayed_irq = false; | 270 | wake_up_all(&ring->fence_drv.fence_queue); |
304 | amdgpu_irq_update(ring->adev, fence_drv->irq_src, | ||
305 | fence_drv->irq_type); | ||
306 | } | 271 | } |
307 | |||
308 | if (amdgpu_fence_activity(ring)) | ||
309 | wake_up_all(&ring->adev->fence_queue); | ||
310 | else if (amdgpu_ring_is_lockup(ring)) { | 272 | else if (amdgpu_ring_is_lockup(ring)) { |
311 | /* good news we believe it's a lockup */ | 273 | /* good news we believe it's a lockup */ |
312 | dev_warn(ring->adev->dev, "GPU lockup (current fence id " | 274 | dev_warn(ring->adev->dev, "GPU lockup (current fence id " |
@@ -316,7 +278,7 @@ static void amdgpu_fence_check_lockup(struct work_struct *work) | |||
316 | 278 | ||
317 | /* remember that we need an reset */ | 279 | /* remember that we need an reset */ |
318 | ring->adev->needs_reset = true; | 280 | ring->adev->needs_reset = true; |
319 | wake_up_all(&ring->adev->fence_queue); | 281 | wake_up_all(&ring->fence_drv.fence_queue); |
320 | } | 282 | } |
321 | up_read(&ring->adev->exclusive_lock); | 283 | up_read(&ring->adev->exclusive_lock); |
322 | } | 284 | } |
@@ -332,62 +294,8 @@ static void amdgpu_fence_check_lockup(struct work_struct *work) | |||
332 | */ | 294 | */ |
333 | void amdgpu_fence_process(struct amdgpu_ring *ring) | 295 | void amdgpu_fence_process(struct amdgpu_ring *ring) |
334 | { | 296 | { |
335 | uint64_t seq, last_seq, last_emitted; | 297 | if (amdgpu_fence_activity(ring)) |
336 | unsigned count_loop = 0; | 298 | wake_up_all(&ring->fence_drv.fence_queue); |
337 | bool wake = false; | ||
338 | |||
339 | /* Note there is a scenario here for an infinite loop but it's | ||
340 | * very unlikely to happen. For it to happen, the current polling | ||
341 | * process need to be interrupted by another process and another | ||
342 | * process needs to update the last_seq btw the atomic read and | ||
343 | * xchg of the current process. | ||
344 | * | ||
345 | * More over for this to go in infinite loop there need to be | ||
346 | * continuously new fence signaled ie amdgpu_fence_read needs | ||
347 | * to return a different value each time for both the currently | ||
348 | * polling process and the other process that xchg the last_seq | ||
349 | * btw atomic read and xchg of the current process. And the | ||
350 | * value the other process set as last seq must be higher than | ||
351 | * the seq value we just read. Which means that current process | ||
352 | * need to be interrupted after amdgpu_fence_read and before | ||
353 | * atomic xchg. | ||
354 | * | ||
355 | * To be even more safe we count the number of time we loop and | ||
356 | * we bail after 10 loop just accepting the fact that we might | ||
357 | * have temporarly set the last_seq not to the true real last | ||
358 | * seq but to an older one. | ||
359 | */ | ||
360 | last_seq = atomic64_read(&ring->fence_drv.last_seq); | ||
361 | do { | ||
362 | last_emitted = ring->fence_drv.sync_seq[ring->idx]; | ||
363 | seq = amdgpu_fence_read(ring); | ||
364 | seq |= last_seq & 0xffffffff00000000LL; | ||
365 | if (seq < last_seq) { | ||
366 | seq &= 0xffffffff; | ||
367 | seq |= last_emitted & 0xffffffff00000000LL; | ||
368 | } | ||
369 | |||
370 | if (seq <= last_seq || seq > last_emitted) { | ||
371 | break; | ||
372 | } | ||
373 | /* If we loop over we don't want to return without | ||
374 | * checking if a fence is signaled as it means that the | ||
375 | * seq we just read is different from the previous on. | ||
376 | */ | ||
377 | wake = true; | ||
378 | last_seq = seq; | ||
379 | if ((count_loop++) > 10) { | ||
380 | /* We looped over too many time leave with the | ||
381 | * fact that we might have set an older fence | ||
382 | * seq then the current real last seq as signaled | ||
383 | * by the hw. | ||
384 | */ | ||
385 | break; | ||
386 | } | ||
387 | } while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq); | ||
388 | |||
389 | if (wake) | ||
390 | wake_up_all(&ring->adev->fence_queue); | ||
391 | } | 299 | } |
392 | 300 | ||
393 | /** | 301 | /** |
@@ -447,284 +355,49 @@ static bool amdgpu_fence_enable_signaling(struct fence *f) | |||
447 | { | 355 | { |
448 | struct amdgpu_fence *fence = to_amdgpu_fence(f); | 356 | struct amdgpu_fence *fence = to_amdgpu_fence(f); |
449 | struct amdgpu_ring *ring = fence->ring; | 357 | struct amdgpu_ring *ring = fence->ring; |
450 | struct amdgpu_device *adev = ring->adev; | ||
451 | 358 | ||
452 | if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq) | 359 | if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq) |
453 | return false; | 360 | return false; |
454 | 361 | ||
455 | if (down_read_trylock(&adev->exclusive_lock)) { | ||
456 | amdgpu_irq_get(adev, ring->fence_drv.irq_src, | ||
457 | ring->fence_drv.irq_type); | ||
458 | if (amdgpu_fence_activity(ring)) | ||
459 | wake_up_all_locked(&adev->fence_queue); | ||
460 | |||
461 | /* did fence get signaled after we enabled the sw irq? */ | ||
462 | if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq) { | ||
463 | amdgpu_irq_put(adev, ring->fence_drv.irq_src, | ||
464 | ring->fence_drv.irq_type); | ||
465 | up_read(&adev->exclusive_lock); | ||
466 | return false; | ||
467 | } | ||
468 | |||
469 | up_read(&adev->exclusive_lock); | ||
470 | } else { | ||
471 | /* we're probably in a lockup, lets not fiddle too much */ | ||
472 | if (amdgpu_irq_get_delayed(adev, ring->fence_drv.irq_src, | ||
473 | ring->fence_drv.irq_type)) | ||
474 | ring->fence_drv.delayed_irq = true; | ||
475 | amdgpu_fence_schedule_check(ring); | ||
476 | } | ||
477 | |||
478 | fence->fence_wake.flags = 0; | 362 | fence->fence_wake.flags = 0; |
479 | fence->fence_wake.private = NULL; | 363 | fence->fence_wake.private = NULL; |
480 | fence->fence_wake.func = amdgpu_fence_check_signaled; | 364 | fence->fence_wake.func = amdgpu_fence_check_signaled; |
481 | __add_wait_queue(&adev->fence_queue, &fence->fence_wake); | 365 | __add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake); |
482 | fence_get(f); | 366 | fence_get(f); |
483 | FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx); | 367 | FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx); |
484 | return true; | 368 | return true; |
485 | } | 369 | } |
486 | 370 | ||
487 | /** | 371 | /* |
488 | * amdgpu_fence_signaled - check if a fence has signaled | 372 | * amdgpu_ring_wait_seq_timeout - wait for seq of the specific ring to signal |
489 | * | 373 | * @ring: ring to wait on for the seq number |
490 | * @fence: amdgpu fence object | 374 | * @seq: seq number wait for |
491 | * | ||
492 | * Check if the requested fence has signaled (all asics). | ||
493 | * Returns true if the fence has signaled or false if it has not. | ||
494 | */ | ||
495 | bool amdgpu_fence_signaled(struct amdgpu_fence *fence) | ||
496 | { | ||
497 | if (!fence) | ||
498 | return true; | ||
499 | |||
500 | if (amdgpu_fence_seq_signaled(fence->ring, fence->seq)) { | ||
501 | if (!fence_signal(&fence->base)) | ||
502 | FENCE_TRACE(&fence->base, "signaled from amdgpu_fence_signaled\n"); | ||
503 | return true; | ||
504 | } | ||
505 | |||
506 | return false; | ||
507 | } | ||
508 | |||
509 | /** | ||
510 | * amdgpu_fence_any_seq_signaled - check if any sequence number is signaled | ||
511 | * | ||
512 | * @adev: amdgpu device pointer | ||
513 | * @seq: sequence numbers | ||
514 | * | ||
515 | * Check if the last signaled fence sequnce number is >= the requested | ||
516 | * sequence number (all asics). | ||
517 | * Returns true if any has signaled (current value is >= requested value) | ||
518 | * or false if it has not. Helper function for amdgpu_fence_wait_seq. | ||
519 | */ | ||
520 | static bool amdgpu_fence_any_seq_signaled(struct amdgpu_device *adev, u64 *seq) | ||
521 | { | ||
522 | unsigned i; | ||
523 | |||
524 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | ||
525 | if (!adev->rings[i] || !seq[i]) | ||
526 | continue; | ||
527 | |||
528 | if (amdgpu_fence_seq_signaled(adev->rings[i], seq[i])) | ||
529 | return true; | ||
530 | } | ||
531 | |||
532 | return false; | ||
533 | } | ||
534 | |||
535 | /** | ||
536 | * amdgpu_fence_wait_seq_timeout - wait for a specific sequence numbers | ||
537 | * | ||
538 | * @adev: amdgpu device pointer | ||
539 | * @target_seq: sequence number(s) we want to wait for | ||
540 | * @intr: use interruptable sleep | ||
541 | * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait | ||
542 | * | ||
543 | * Wait for the requested sequence number(s) to be written by any ring | ||
544 | * (all asics). Sequnce number array is indexed by ring id. | ||
545 | * @intr selects whether to use interruptable (true) or non-interruptable | ||
546 | * (false) sleep when waiting for the sequence number. Helper function | ||
547 | * for amdgpu_fence_wait_*(). | ||
548 | * Returns remaining time if the sequence number has passed, 0 when | ||
549 | * the wait timeout, or an error for all other cases. | ||
550 | * -EDEADLK is returned when a GPU lockup has been detected. | ||
551 | */ | ||
552 | static long amdgpu_fence_wait_seq_timeout(struct amdgpu_device *adev, | ||
553 | u64 *target_seq, bool intr, | ||
554 | long timeout) | ||
555 | { | ||
556 | uint64_t last_seq[AMDGPU_MAX_RINGS]; | ||
557 | bool signaled; | ||
558 | int i; | ||
559 | long r; | ||
560 | |||
561 | if (timeout == 0) { | ||
562 | return amdgpu_fence_any_seq_signaled(adev, target_seq); | ||
563 | } | ||
564 | |||
565 | while (!amdgpu_fence_any_seq_signaled(adev, target_seq)) { | ||
566 | |||
567 | /* Save current sequence values, used to check for GPU lockups */ | ||
568 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | ||
569 | struct amdgpu_ring *ring = adev->rings[i]; | ||
570 | |||
571 | if (!ring || !target_seq[i]) | ||
572 | continue; | ||
573 | |||
574 | last_seq[i] = atomic64_read(&ring->fence_drv.last_seq); | ||
575 | trace_amdgpu_fence_wait_begin(adev->ddev, i, target_seq[i]); | ||
576 | amdgpu_irq_get(adev, ring->fence_drv.irq_src, | ||
577 | ring->fence_drv.irq_type); | ||
578 | } | ||
579 | |||
580 | if (intr) { | ||
581 | r = wait_event_interruptible_timeout(adev->fence_queue, ( | ||
582 | (signaled = amdgpu_fence_any_seq_signaled(adev, target_seq)) | ||
583 | || adev->needs_reset), AMDGPU_FENCE_JIFFIES_TIMEOUT); | ||
584 | } else { | ||
585 | r = wait_event_timeout(adev->fence_queue, ( | ||
586 | (signaled = amdgpu_fence_any_seq_signaled(adev, target_seq)) | ||
587 | || adev->needs_reset), AMDGPU_FENCE_JIFFIES_TIMEOUT); | ||
588 | } | ||
589 | |||
590 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | ||
591 | struct amdgpu_ring *ring = adev->rings[i]; | ||
592 | |||
593 | if (!ring || !target_seq[i]) | ||
594 | continue; | ||
595 | |||
596 | amdgpu_irq_put(adev, ring->fence_drv.irq_src, | ||
597 | ring->fence_drv.irq_type); | ||
598 | trace_amdgpu_fence_wait_end(adev->ddev, i, target_seq[i]); | ||
599 | } | ||
600 | |||
601 | if (unlikely(r < 0)) | ||
602 | return r; | ||
603 | |||
604 | if (unlikely(!signaled)) { | ||
605 | |||
606 | if (adev->needs_reset) | ||
607 | return -EDEADLK; | ||
608 | |||
609 | /* we were interrupted for some reason and fence | ||
610 | * isn't signaled yet, resume waiting */ | ||
611 | if (r) | ||
612 | continue; | ||
613 | |||
614 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | ||
615 | struct amdgpu_ring *ring = adev->rings[i]; | ||
616 | |||
617 | if (!ring || !target_seq[i]) | ||
618 | continue; | ||
619 | |||
620 | if (last_seq[i] != atomic64_read(&ring->fence_drv.last_seq)) | ||
621 | break; | ||
622 | } | ||
623 | |||
624 | if (i != AMDGPU_MAX_RINGS) | ||
625 | continue; | ||
626 | |||
627 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | ||
628 | if (!adev->rings[i] || !target_seq[i]) | ||
629 | continue; | ||
630 | |||
631 | if (amdgpu_ring_is_lockup(adev->rings[i])) | ||
632 | break; | ||
633 | } | ||
634 | |||
635 | if (i < AMDGPU_MAX_RINGS) { | ||
636 | /* good news we believe it's a lockup */ | ||
637 | dev_warn(adev->dev, "GPU lockup (waiting for " | ||
638 | "0x%016llx last fence id 0x%016llx on" | ||
639 | " ring %d)\n", | ||
640 | target_seq[i], last_seq[i], i); | ||
641 | |||
642 | /* remember that we need an reset */ | ||
643 | adev->needs_reset = true; | ||
644 | wake_up_all(&adev->fence_queue); | ||
645 | return -EDEADLK; | ||
646 | } | ||
647 | |||
648 | if (timeout < MAX_SCHEDULE_TIMEOUT) { | ||
649 | timeout -= AMDGPU_FENCE_JIFFIES_TIMEOUT; | ||
650 | if (timeout <= 0) { | ||
651 | return 0; | ||
652 | } | ||
653 | } | ||
654 | } | ||
655 | } | ||
656 | return timeout; | ||
657 | } | ||
658 | |||
659 | /** | ||
660 | * amdgpu_fence_wait - wait for a fence to signal | ||
661 | * | ||
662 | * @fence: amdgpu fence object | ||
663 | * @intr: use interruptable sleep | ||
664 | * | ||
665 | * Wait for the requested fence to signal (all asics). | ||
666 | * @intr selects whether to use interruptable (true) or non-interruptable | ||
667 | * (false) sleep when waiting for the fence. | ||
668 | * Returns 0 if the fence has passed, error for all other cases. | ||
669 | */ | ||
670 | int amdgpu_fence_wait(struct amdgpu_fence *fence, bool intr) | ||
671 | { | ||
672 | uint64_t seq[AMDGPU_MAX_RINGS] = {}; | ||
673 | long r; | ||
674 | |||
675 | seq[fence->ring->idx] = fence->seq; | ||
676 | r = amdgpu_fence_wait_seq_timeout(fence->ring->adev, seq, intr, MAX_SCHEDULE_TIMEOUT); | ||
677 | if (r < 0) { | ||
678 | return r; | ||
679 | } | ||
680 | |||
681 | r = fence_signal(&fence->base); | ||
682 | if (!r) | ||
683 | FENCE_TRACE(&fence->base, "signaled from fence_wait\n"); | ||
684 | return 0; | ||
685 | } | ||
686 | |||
687 | /** | ||
688 | * amdgpu_fence_wait_any - wait for a fence to signal on any ring | ||
689 | * | ||
690 | * @adev: amdgpu device pointer | ||
691 | * @fences: amdgpu fence object(s) | ||
692 | * @intr: use interruptable sleep | ||
693 | * | 375 | * |
694 | * Wait for any requested fence to signal (all asics). Fence | 376 | * return value: |
695 | * array is indexed by ring id. @intr selects whether to use | 377 | * 0: seq signaled, and gpu not hang |
696 | * interruptable (true) or non-interruptable (false) sleep when | 378 | * -EDEADL: GPU hang detected |
697 | * waiting for the fences. Used by the suballocator. | 379 | * -EINVAL: some paramter is not valid |
698 | * Returns 0 if any fence has passed, error for all other cases. | ||
699 | */ | 380 | */ |
700 | int amdgpu_fence_wait_any(struct amdgpu_device *adev, | 381 | static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq) |
701 | struct amdgpu_fence **fences, | ||
702 | bool intr) | ||
703 | { | 382 | { |
704 | uint64_t seq[AMDGPU_MAX_RINGS]; | 383 | struct amdgpu_device *adev = ring->adev; |
705 | unsigned i, num_rings = 0; | 384 | bool signaled = false; |
706 | long r; | ||
707 | |||
708 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | ||
709 | seq[i] = 0; | ||
710 | 385 | ||
711 | if (!fences[i]) { | 386 | BUG_ON(!ring); |
712 | continue; | 387 | if (seq > ring->fence_drv.sync_seq[ring->idx]) |
713 | } | 388 | return -EINVAL; |
714 | 389 | ||
715 | seq[i] = fences[i]->seq; | 390 | if (atomic64_read(&ring->fence_drv.last_seq) >= seq) |
716 | ++num_rings; | 391 | return 0; |
717 | } | ||
718 | 392 | ||
719 | /* nothing to wait for ? */ | 393 | wait_event(ring->fence_drv.fence_queue, ( |
720 | if (num_rings == 0) | 394 | (signaled = amdgpu_fence_seq_signaled(ring, seq)) |
721 | return -ENOENT; | 395 | || adev->needs_reset)); |
722 | 396 | ||
723 | r = amdgpu_fence_wait_seq_timeout(adev, seq, intr, MAX_SCHEDULE_TIMEOUT); | 397 | if (signaled) |
724 | if (r < 0) { | 398 | return 0; |
725 | return r; | 399 | else |
726 | } | 400 | return -EDEADLK; |
727 | return 0; | ||
728 | } | 401 | } |
729 | 402 | ||
730 | /** | 403 | /** |
@@ -739,19 +412,12 @@ int amdgpu_fence_wait_any(struct amdgpu_device *adev, | |||
739 | */ | 412 | */ |
740 | int amdgpu_fence_wait_next(struct amdgpu_ring *ring) | 413 | int amdgpu_fence_wait_next(struct amdgpu_ring *ring) |
741 | { | 414 | { |
742 | uint64_t seq[AMDGPU_MAX_RINGS] = {}; | 415 | uint64_t seq = atomic64_read(&ring->fence_drv.last_seq) + 1ULL; |
743 | long r; | ||
744 | 416 | ||
745 | seq[ring->idx] = atomic64_read(&ring->fence_drv.last_seq) + 1ULL; | 417 | if (seq >= ring->fence_drv.sync_seq[ring->idx]) |
746 | if (seq[ring->idx] >= ring->fence_drv.sync_seq[ring->idx]) { | ||
747 | /* nothing to wait for, last_seq is | ||
748 | already the last emited fence */ | ||
749 | return -ENOENT; | 418 | return -ENOENT; |
750 | } | 419 | |
751 | r = amdgpu_fence_wait_seq_timeout(ring->adev, seq, false, MAX_SCHEDULE_TIMEOUT); | 420 | return amdgpu_fence_ring_wait_seq(ring, seq); |
752 | if (r < 0) | ||
753 | return r; | ||
754 | return 0; | ||
755 | } | 421 | } |
756 | 422 | ||
757 | /** | 423 | /** |
@@ -766,23 +432,12 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring) | |||
766 | */ | 432 | */ |
767 | int amdgpu_fence_wait_empty(struct amdgpu_ring *ring) | 433 | int amdgpu_fence_wait_empty(struct amdgpu_ring *ring) |
768 | { | 434 | { |
769 | struct amdgpu_device *adev = ring->adev; | 435 | uint64_t seq = ring->fence_drv.sync_seq[ring->idx]; |
770 | uint64_t seq[AMDGPU_MAX_RINGS] = {}; | ||
771 | long r; | ||
772 | 436 | ||
773 | seq[ring->idx] = ring->fence_drv.sync_seq[ring->idx]; | 437 | if (!seq) |
774 | if (!seq[ring->idx]) | ||
775 | return 0; | 438 | return 0; |
776 | 439 | ||
777 | r = amdgpu_fence_wait_seq_timeout(adev, seq, false, MAX_SCHEDULE_TIMEOUT); | 440 | return amdgpu_fence_ring_wait_seq(ring, seq); |
778 | if (r < 0) { | ||
779 | if (r == -EDEADLK) | ||
780 | return -EDEADLK; | ||
781 | |||
782 | dev_err(adev->dev, "error waiting for ring[%d] to become idle (%ld)\n", | ||
783 | ring->idx, r); | ||
784 | } | ||
785 | return 0; | ||
786 | } | 441 | } |
787 | 442 | ||
788 | /** | 443 | /** |
@@ -933,9 +588,12 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, | |||
933 | ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index; | 588 | ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index; |
934 | } | 589 | } |
935 | amdgpu_fence_write(ring, atomic64_read(&ring->fence_drv.last_seq)); | 590 | amdgpu_fence_write(ring, atomic64_read(&ring->fence_drv.last_seq)); |
936 | ring->fence_drv.initialized = true; | 591 | amdgpu_irq_get(adev, irq_src, irq_type); |
592 | |||
937 | ring->fence_drv.irq_src = irq_src; | 593 | ring->fence_drv.irq_src = irq_src; |
938 | ring->fence_drv.irq_type = irq_type; | 594 | ring->fence_drv.irq_type = irq_type; |
595 | ring->fence_drv.initialized = true; | ||
596 | |||
939 | dev_info(adev->dev, "fence driver on ring %d use gpu addr 0x%016llx, " | 597 | dev_info(adev->dev, "fence driver on ring %d use gpu addr 0x%016llx, " |
940 | "cpu addr 0x%p\n", ring->idx, | 598 | "cpu addr 0x%p\n", ring->idx, |
941 | ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr); | 599 | ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr); |
@@ -966,6 +624,16 @@ void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) | |||
966 | INIT_DELAYED_WORK(&ring->fence_drv.lockup_work, | 624 | INIT_DELAYED_WORK(&ring->fence_drv.lockup_work, |
967 | amdgpu_fence_check_lockup); | 625 | amdgpu_fence_check_lockup); |
968 | ring->fence_drv.ring = ring; | 626 | ring->fence_drv.ring = ring; |
627 | |||
628 | if (amdgpu_enable_scheduler) { | ||
629 | ring->scheduler = amd_sched_create((void *)ring->adev, | ||
630 | &amdgpu_sched_ops, | ||
631 | ring->idx, 5, 0, | ||
632 | amdgpu_sched_hw_submission); | ||
633 | if (!ring->scheduler) | ||
634 | DRM_ERROR("Failed to create scheduler on ring %d.\n", | ||
635 | ring->idx); | ||
636 | } | ||
969 | } | 637 | } |
970 | 638 | ||
971 | /** | 639 | /** |
@@ -982,7 +650,6 @@ void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) | |||
982 | */ | 650 | */ |
983 | int amdgpu_fence_driver_init(struct amdgpu_device *adev) | 651 | int amdgpu_fence_driver_init(struct amdgpu_device *adev) |
984 | { | 652 | { |
985 | init_waitqueue_head(&adev->fence_queue); | ||
986 | if (amdgpu_debugfs_fence_init(adev)) | 653 | if (amdgpu_debugfs_fence_init(adev)) |
987 | dev_err(adev->dev, "fence debugfs file creation failed\n"); | 654 | dev_err(adev->dev, "fence debugfs file creation failed\n"); |
988 | 655 | ||
@@ -1011,13 +678,78 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) | |||
1011 | /* no need to trigger GPU reset as we are unloading */ | 678 | /* no need to trigger GPU reset as we are unloading */ |
1012 | amdgpu_fence_driver_force_completion(adev); | 679 | amdgpu_fence_driver_force_completion(adev); |
1013 | } | 680 | } |
1014 | wake_up_all(&adev->fence_queue); | 681 | wake_up_all(&ring->fence_drv.fence_queue); |
682 | amdgpu_irq_put(adev, ring->fence_drv.irq_src, | ||
683 | ring->fence_drv.irq_type); | ||
684 | if (ring->scheduler) | ||
685 | amd_sched_destroy(ring->scheduler); | ||
1015 | ring->fence_drv.initialized = false; | 686 | ring->fence_drv.initialized = false; |
1016 | } | 687 | } |
1017 | mutex_unlock(&adev->ring_lock); | 688 | mutex_unlock(&adev->ring_lock); |
1018 | } | 689 | } |
1019 | 690 | ||
1020 | /** | 691 | /** |
692 | * amdgpu_fence_driver_suspend - suspend the fence driver | ||
693 | * for all possible rings. | ||
694 | * | ||
695 | * @adev: amdgpu device pointer | ||
696 | * | ||
697 | * Suspend the fence driver for all possible rings (all asics). | ||
698 | */ | ||
699 | void amdgpu_fence_driver_suspend(struct amdgpu_device *adev) | ||
700 | { | ||
701 | int i, r; | ||
702 | |||
703 | mutex_lock(&adev->ring_lock); | ||
704 | for (i = 0; i < AMDGPU_MAX_RINGS; i++) { | ||
705 | struct amdgpu_ring *ring = adev->rings[i]; | ||
706 | if (!ring || !ring->fence_drv.initialized) | ||
707 | continue; | ||
708 | |||
709 | /* wait for gpu to finish processing current batch */ | ||
710 | r = amdgpu_fence_wait_empty(ring); | ||
711 | if (r) { | ||
712 | /* delay GPU reset to resume */ | ||
713 | amdgpu_fence_driver_force_completion(adev); | ||
714 | } | ||
715 | |||
716 | /* disable the interrupt */ | ||
717 | amdgpu_irq_put(adev, ring->fence_drv.irq_src, | ||
718 | ring->fence_drv.irq_type); | ||
719 | } | ||
720 | mutex_unlock(&adev->ring_lock); | ||
721 | } | ||
722 | |||
723 | /** | ||
724 | * amdgpu_fence_driver_resume - resume the fence driver | ||
725 | * for all possible rings. | ||
726 | * | ||
727 | * @adev: amdgpu device pointer | ||
728 | * | ||
729 | * Resume the fence driver for all possible rings (all asics). | ||
730 | * Not all asics have all rings, so each asic will only | ||
731 | * start the fence driver on the rings it has using | ||
732 | * amdgpu_fence_driver_start_ring(). | ||
733 | * Returns 0 for success. | ||
734 | */ | ||
735 | void amdgpu_fence_driver_resume(struct amdgpu_device *adev) | ||
736 | { | ||
737 | int i; | ||
738 | |||
739 | mutex_lock(&adev->ring_lock); | ||
740 | for (i = 0; i < AMDGPU_MAX_RINGS; i++) { | ||
741 | struct amdgpu_ring *ring = adev->rings[i]; | ||
742 | if (!ring || !ring->fence_drv.initialized) | ||
743 | continue; | ||
744 | |||
745 | /* enable the interrupt */ | ||
746 | amdgpu_irq_get(adev, ring->fence_drv.irq_src, | ||
747 | ring->fence_drv.irq_type); | ||
748 | } | ||
749 | mutex_unlock(&adev->ring_lock); | ||
750 | } | ||
751 | |||
752 | /** | ||
1021 | * amdgpu_fence_driver_force_completion - force all fence waiter to complete | 753 | * amdgpu_fence_driver_force_completion - force all fence waiter to complete |
1022 | * | 754 | * |
1023 | * @adev: amdgpu device pointer | 755 | * @adev: amdgpu device pointer |
@@ -1104,6 +836,22 @@ static inline bool amdgpu_test_signaled(struct amdgpu_fence *fence) | |||
1104 | return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags); | 836 | return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags); |
1105 | } | 837 | } |
1106 | 838 | ||
839 | static inline bool amdgpu_test_signaled_any(struct amdgpu_fence **fences) | ||
840 | { | ||
841 | int idx; | ||
842 | struct amdgpu_fence *fence; | ||
843 | |||
844 | idx = 0; | ||
845 | for (idx = 0; idx < AMDGPU_MAX_RINGS; ++idx) { | ||
846 | fence = fences[idx]; | ||
847 | if (fence) { | ||
848 | if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags)) | ||
849 | return true; | ||
850 | } | ||
851 | } | ||
852 | return false; | ||
853 | } | ||
854 | |||
1107 | struct amdgpu_wait_cb { | 855 | struct amdgpu_wait_cb { |
1108 | struct fence_cb base; | 856 | struct fence_cb base; |
1109 | struct task_struct *task; | 857 | struct task_struct *task; |
@@ -1119,14 +867,35 @@ static void amdgpu_fence_wait_cb(struct fence *fence, struct fence_cb *cb) | |||
1119 | static signed long amdgpu_fence_default_wait(struct fence *f, bool intr, | 867 | static signed long amdgpu_fence_default_wait(struct fence *f, bool intr, |
1120 | signed long t) | 868 | signed long t) |
1121 | { | 869 | { |
870 | struct amdgpu_fence *array[AMDGPU_MAX_RINGS]; | ||
1122 | struct amdgpu_fence *fence = to_amdgpu_fence(f); | 871 | struct amdgpu_fence *fence = to_amdgpu_fence(f); |
1123 | struct amdgpu_device *adev = fence->ring->adev; | 872 | struct amdgpu_device *adev = fence->ring->adev; |
1124 | struct amdgpu_wait_cb cb; | ||
1125 | 873 | ||
1126 | cb.task = current; | 874 | memset(&array[0], 0, sizeof(array)); |
875 | array[0] = fence; | ||
876 | |||
877 | return amdgpu_fence_wait_any(adev, array, intr, t); | ||
878 | } | ||
879 | |||
880 | /* wait until any fence in array signaled */ | ||
881 | signed long amdgpu_fence_wait_any(struct amdgpu_device *adev, | ||
882 | struct amdgpu_fence **array, bool intr, signed long t) | ||
883 | { | ||
884 | long idx = 0; | ||
885 | struct amdgpu_wait_cb cb[AMDGPU_MAX_RINGS]; | ||
886 | struct amdgpu_fence *fence; | ||
887 | |||
888 | BUG_ON(!array); | ||
1127 | 889 | ||
1128 | if (fence_add_callback(f, &cb.base, amdgpu_fence_wait_cb)) | 890 | for (idx = 0; idx < AMDGPU_MAX_RINGS; ++idx) { |
1129 | return t; | 891 | fence = array[idx]; |
892 | if (fence) { | ||
893 | cb[idx].task = current; | ||
894 | if (fence_add_callback(&fence->base, | ||
895 | &cb[idx].base, amdgpu_fence_wait_cb)) | ||
896 | return t; /* return if fence is already signaled */ | ||
897 | } | ||
898 | } | ||
1130 | 899 | ||
1131 | while (t > 0) { | 900 | while (t > 0) { |
1132 | if (intr) | 901 | if (intr) |
@@ -1135,10 +904,10 @@ static signed long amdgpu_fence_default_wait(struct fence *f, bool intr, | |||
1135 | set_current_state(TASK_UNINTERRUPTIBLE); | 904 | set_current_state(TASK_UNINTERRUPTIBLE); |
1136 | 905 | ||
1137 | /* | 906 | /* |
1138 | * amdgpu_test_signaled must be called after | 907 | * amdgpu_test_signaled_any must be called after |
1139 | * set_current_state to prevent a race with wake_up_process | 908 | * set_current_state to prevent a race with wake_up_process |
1140 | */ | 909 | */ |
1141 | if (amdgpu_test_signaled(fence)) | 910 | if (amdgpu_test_signaled_any(array)) |
1142 | break; | 911 | break; |
1143 | 912 | ||
1144 | if (adev->needs_reset) { | 913 | if (adev->needs_reset) { |
@@ -1153,7 +922,13 @@ static signed long amdgpu_fence_default_wait(struct fence *f, bool intr, | |||
1153 | } | 922 | } |
1154 | 923 | ||
1155 | __set_current_state(TASK_RUNNING); | 924 | __set_current_state(TASK_RUNNING); |
1156 | fence_remove_callback(f, &cb.base); | 925 | |
926 | idx = 0; | ||
927 | for (idx = 0; idx < AMDGPU_MAX_RINGS; ++idx) { | ||
928 | fence = array[idx]; | ||
929 | if (fence) | ||
930 | fence_remove_callback(&fence->base, &cb[idx].base); | ||
931 | } | ||
1157 | 932 | ||
1158 | return t; | 933 | return t; |
1159 | } | 934 | } |