aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/vc4/vc4_gem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/vc4/vc4_gem.c')
-rw-r--r--drivers/gpu/drm/vc4/vc4_gem.c161
1 files changed, 150 insertions, 11 deletions
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index e9c381c42139..735412e3725a 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -463,6 +463,8 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
463 for (i = 0; i < exec->bo_count; i++) { 463 for (i = 0; i < exec->bo_count; i++) {
464 bo = to_vc4_bo(&exec->bo[i]->base); 464 bo = to_vc4_bo(&exec->bo[i]->base);
465 bo->seqno = seqno; 465 bo->seqno = seqno;
466
467 reservation_object_add_shared_fence(bo->resv, exec->fence);
466 } 468 }
467 469
468 list_for_each_entry(bo, &exec->unref_list, unref_head) { 470 list_for_each_entry(bo, &exec->unref_list, unref_head) {
@@ -472,7 +474,103 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
472 for (i = 0; i < exec->rcl_write_bo_count; i++) { 474 for (i = 0; i < exec->rcl_write_bo_count; i++) {
473 bo = to_vc4_bo(&exec->rcl_write_bo[i]->base); 475 bo = to_vc4_bo(&exec->rcl_write_bo[i]->base);
474 bo->write_seqno = seqno; 476 bo->write_seqno = seqno;
477
478 reservation_object_add_excl_fence(bo->resv, exec->fence);
479 }
480}
481
482static void
483vc4_unlock_bo_reservations(struct drm_device *dev,
484 struct vc4_exec_info *exec,
485 struct ww_acquire_ctx *acquire_ctx)
486{
487 int i;
488
489 for (i = 0; i < exec->bo_count; i++) {
490 struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base);
491
492 ww_mutex_unlock(&bo->resv->lock);
493 }
494
495 ww_acquire_fini(acquire_ctx);
496}
497
498/* Takes the reservation lock on all the BOs being referenced, so that
499 * at queue submit time we can update the reservations.
500 *
501 * We don't lock the RCL the tile alloc/state BOs, or overflow memory
502 * (all of which are on exec->unref_list). They're entirely private
503 * to vc4, so we don't attach dma-buf fences to them.
504 */
505static int
506vc4_lock_bo_reservations(struct drm_device *dev,
507 struct vc4_exec_info *exec,
508 struct ww_acquire_ctx *acquire_ctx)
509{
510 int contended_lock = -1;
511 int i, ret;
512 struct vc4_bo *bo;
513
514 ww_acquire_init(acquire_ctx, &reservation_ww_class);
515
516retry:
517 if (contended_lock != -1) {
518 bo = to_vc4_bo(&exec->bo[contended_lock]->base);
519 ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock,
520 acquire_ctx);
521 if (ret) {
522 ww_acquire_done(acquire_ctx);
523 return ret;
524 }
525 }
526
527 for (i = 0; i < exec->bo_count; i++) {
528 if (i == contended_lock)
529 continue;
530
531 bo = to_vc4_bo(&exec->bo[i]->base);
532
533 ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx);
534 if (ret) {
535 int j;
536
537 for (j = 0; j < i; j++) {
538 bo = to_vc4_bo(&exec->bo[j]->base);
539 ww_mutex_unlock(&bo->resv->lock);
540 }
541
542 if (contended_lock != -1 && contended_lock >= i) {
543 bo = to_vc4_bo(&exec->bo[contended_lock]->base);
544
545 ww_mutex_unlock(&bo->resv->lock);
546 }
547
548 if (ret == -EDEADLK) {
549 contended_lock = i;
550 goto retry;
551 }
552
553 ww_acquire_done(acquire_ctx);
554 return ret;
555 }
475 } 556 }
557
558 ww_acquire_done(acquire_ctx);
559
560 /* Reserve space for our shared (read-only) fence references,
561 * before we commit the CL to the hardware.
562 */
563 for (i = 0; i < exec->bo_count; i++) {
564 bo = to_vc4_bo(&exec->bo[i]->base);
565
566 ret = reservation_object_reserve_shared(bo->resv);
567 if (ret) {
568 vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
569 return ret;
570 }
571 }
572
573 return 0;
476} 574}
477 575
478/* Queues a struct vc4_exec_info for execution. If no job is 576/* Queues a struct vc4_exec_info for execution. If no job is
@@ -484,19 +582,34 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
484 * then bump the end address. That's a change for a later date, 582 * then bump the end address. That's a change for a later date,
485 * though. 583 * though.
486 */ 584 */
487static void 585static int
488vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) 586vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
587 struct ww_acquire_ctx *acquire_ctx)
489{ 588{
490 struct vc4_dev *vc4 = to_vc4_dev(dev); 589 struct vc4_dev *vc4 = to_vc4_dev(dev);
491 uint64_t seqno; 590 uint64_t seqno;
492 unsigned long irqflags; 591 unsigned long irqflags;
592 struct vc4_fence *fence;
593
594 fence = kzalloc(sizeof(*fence), GFP_KERNEL);
595 if (!fence)
596 return -ENOMEM;
597 fence->dev = dev;
493 598
494 spin_lock_irqsave(&vc4->job_lock, irqflags); 599 spin_lock_irqsave(&vc4->job_lock, irqflags);
495 600
496 seqno = ++vc4->emit_seqno; 601 seqno = ++vc4->emit_seqno;
497 exec->seqno = seqno; 602 exec->seqno = seqno;
603
604 dma_fence_init(&fence->base, &vc4_fence_ops, &vc4->job_lock,
605 vc4->dma_fence_context, exec->seqno);
606 fence->seqno = exec->seqno;
607 exec->fence = &fence->base;
608
498 vc4_update_bo_seqnos(exec, seqno); 609 vc4_update_bo_seqnos(exec, seqno);
499 610
611 vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
612
500 list_add_tail(&exec->head, &vc4->bin_job_list); 613 list_add_tail(&exec->head, &vc4->bin_job_list);
501 614
502 /* If no job was executing, kick ours off. Otherwise, it'll 615 /* If no job was executing, kick ours off. Otherwise, it'll
@@ -509,6 +622,8 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec)
509 } 622 }
510 623
511 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 624 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
625
626 return 0;
512} 627}
513 628
514/** 629/**
@@ -705,8 +820,15 @@ static void
705vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) 820vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
706{ 821{
707 struct vc4_dev *vc4 = to_vc4_dev(dev); 822 struct vc4_dev *vc4 = to_vc4_dev(dev);
823 unsigned long irqflags;
708 unsigned i; 824 unsigned i;
709 825
826 /* If we got force-completed because of GPU reset rather than
827 * through our IRQ handler, signal the fence now.
828 */
829 if (exec->fence)
830 dma_fence_signal(exec->fence);
831
710 if (exec->bo) { 832 if (exec->bo) {
711 for (i = 0; i < exec->bo_count; i++) 833 for (i = 0; i < exec->bo_count; i++)
712 drm_gem_object_unreference_unlocked(&exec->bo[i]->base); 834 drm_gem_object_unreference_unlocked(&exec->bo[i]->base);
@@ -720,6 +842,11 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
720 drm_gem_object_unreference_unlocked(&bo->base.base); 842 drm_gem_object_unreference_unlocked(&bo->base.base);
721 } 843 }
722 844
845 /* Free up the allocation of any bin slots we used. */
846 spin_lock_irqsave(&vc4->job_lock, irqflags);
847 vc4->bin_alloc_used &= ~exec->bin_slots;
848 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
849
723 mutex_lock(&vc4->power_lock); 850 mutex_lock(&vc4->power_lock);
724 if (--vc4->power_refcount == 0) { 851 if (--vc4->power_refcount == 0) {
725 pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); 852 pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
@@ -874,6 +1001,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
874 struct vc4_dev *vc4 = to_vc4_dev(dev); 1001 struct vc4_dev *vc4 = to_vc4_dev(dev);
875 struct drm_vc4_submit_cl *args = data; 1002 struct drm_vc4_submit_cl *args = data;
876 struct vc4_exec_info *exec; 1003 struct vc4_exec_info *exec;
1004 struct ww_acquire_ctx acquire_ctx;
877 int ret = 0; 1005 int ret = 0;
878 1006
879 if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) { 1007 if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
@@ -888,13 +1016,16 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
888 } 1016 }
889 1017
890 mutex_lock(&vc4->power_lock); 1018 mutex_lock(&vc4->power_lock);
891 if (vc4->power_refcount++ == 0) 1019 if (vc4->power_refcount++ == 0) {
892 ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); 1020 ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
893 mutex_unlock(&vc4->power_lock); 1021 if (ret < 0) {
894 if (ret < 0) { 1022 mutex_unlock(&vc4->power_lock);
895 kfree(exec); 1023 vc4->power_refcount--;
896 return ret; 1024 kfree(exec);
1025 return ret;
1026 }
897 } 1027 }
1028 mutex_unlock(&vc4->power_lock);
898 1029
899 exec->args = args; 1030 exec->args = args;
900 INIT_LIST_HEAD(&exec->unref_list); 1031 INIT_LIST_HEAD(&exec->unref_list);
@@ -916,12 +1047,18 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
916 if (ret) 1047 if (ret)
917 goto fail; 1048 goto fail;
918 1049
1050 ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx);
1051 if (ret)
1052 goto fail;
1053
919 /* Clear this out of the struct we'll be putting in the queue, 1054 /* Clear this out of the struct we'll be putting in the queue,
920 * since it's part of our stack. 1055 * since it's part of our stack.
921 */ 1056 */
922 exec->args = NULL; 1057 exec->args = NULL;
923 1058
924 vc4_queue_submit(dev, exec); 1059 ret = vc4_queue_submit(dev, exec, &acquire_ctx);
1060 if (ret)
1061 goto fail;
925 1062
926 /* Return the seqno for our job. */ 1063 /* Return the seqno for our job. */
927 args->seqno = vc4->emit_seqno; 1064 args->seqno = vc4->emit_seqno;
@@ -939,6 +1076,8 @@ vc4_gem_init(struct drm_device *dev)
939{ 1076{
940 struct vc4_dev *vc4 = to_vc4_dev(dev); 1077 struct vc4_dev *vc4 = to_vc4_dev(dev);
941 1078
1079 vc4->dma_fence_context = dma_fence_context_alloc(1);
1080
942 INIT_LIST_HEAD(&vc4->bin_job_list); 1081 INIT_LIST_HEAD(&vc4->bin_job_list);
943 INIT_LIST_HEAD(&vc4->render_job_list); 1082 INIT_LIST_HEAD(&vc4->render_job_list);
944 INIT_LIST_HEAD(&vc4->job_done_list); 1083 INIT_LIST_HEAD(&vc4->job_done_list);
@@ -968,9 +1107,9 @@ vc4_gem_destroy(struct drm_device *dev)
968 /* V3D should already have disabled its interrupt and cleared 1107 /* V3D should already have disabled its interrupt and cleared
969 * the overflow allocation registers. Now free the object. 1108 * the overflow allocation registers. Now free the object.
970 */ 1109 */
971 if (vc4->overflow_mem) { 1110 if (vc4->bin_bo) {
972 drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base); 1111 drm_gem_object_put_unlocked(&vc4->bin_bo->base.base);
973 vc4->overflow_mem = NULL; 1112 vc4->bin_bo = NULL;
974 } 1113 }
975 1114
976 if (vc4->hang_state) 1115 if (vc4->hang_state)