diff options
Diffstat (limited to 'drivers/gpu/drm/vc4/vc4_gem.c')
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_gem.c | 161 |
1 files changed, 150 insertions, 11 deletions
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index e9c381c42139..735412e3725a 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c | |||
@@ -463,6 +463,8 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) | |||
463 | for (i = 0; i < exec->bo_count; i++) { | 463 | for (i = 0; i < exec->bo_count; i++) { |
464 | bo = to_vc4_bo(&exec->bo[i]->base); | 464 | bo = to_vc4_bo(&exec->bo[i]->base); |
465 | bo->seqno = seqno; | 465 | bo->seqno = seqno; |
466 | |||
467 | reservation_object_add_shared_fence(bo->resv, exec->fence); | ||
466 | } | 468 | } |
467 | 469 | ||
468 | list_for_each_entry(bo, &exec->unref_list, unref_head) { | 470 | list_for_each_entry(bo, &exec->unref_list, unref_head) { |
@@ -472,7 +474,103 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) | |||
472 | for (i = 0; i < exec->rcl_write_bo_count; i++) { | 474 | for (i = 0; i < exec->rcl_write_bo_count; i++) { |
473 | bo = to_vc4_bo(&exec->rcl_write_bo[i]->base); | 475 | bo = to_vc4_bo(&exec->rcl_write_bo[i]->base); |
474 | bo->write_seqno = seqno; | 476 | bo->write_seqno = seqno; |
477 | |||
478 | reservation_object_add_excl_fence(bo->resv, exec->fence); | ||
479 | } | ||
480 | } | ||
481 | |||
482 | static void | ||
483 | vc4_unlock_bo_reservations(struct drm_device *dev, | ||
484 | struct vc4_exec_info *exec, | ||
485 | struct ww_acquire_ctx *acquire_ctx) | ||
486 | { | ||
487 | int i; | ||
488 | |||
489 | for (i = 0; i < exec->bo_count; i++) { | ||
490 | struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base); | ||
491 | |||
492 | ww_mutex_unlock(&bo->resv->lock); | ||
493 | } | ||
494 | |||
495 | ww_acquire_fini(acquire_ctx); | ||
496 | } | ||
497 | |||
498 | /* Takes the reservation lock on all the BOs being referenced, so that | ||
499 | * at queue submit time we can update the reservations. | ||
500 | * | ||
501 | * We don't lock the RCL the tile alloc/state BOs, or overflow memory | ||
502 | * (all of which are on exec->unref_list). They're entirely private | ||
503 | * to vc4, so we don't attach dma-buf fences to them. | ||
504 | */ | ||
505 | static int | ||
506 | vc4_lock_bo_reservations(struct drm_device *dev, | ||
507 | struct vc4_exec_info *exec, | ||
508 | struct ww_acquire_ctx *acquire_ctx) | ||
509 | { | ||
510 | int contended_lock = -1; | ||
511 | int i, ret; | ||
512 | struct vc4_bo *bo; | ||
513 | |||
514 | ww_acquire_init(acquire_ctx, &reservation_ww_class); | ||
515 | |||
516 | retry: | ||
517 | if (contended_lock != -1) { | ||
518 | bo = to_vc4_bo(&exec->bo[contended_lock]->base); | ||
519 | ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, | ||
520 | acquire_ctx); | ||
521 | if (ret) { | ||
522 | ww_acquire_done(acquire_ctx); | ||
523 | return ret; | ||
524 | } | ||
525 | } | ||
526 | |||
527 | for (i = 0; i < exec->bo_count; i++) { | ||
528 | if (i == contended_lock) | ||
529 | continue; | ||
530 | |||
531 | bo = to_vc4_bo(&exec->bo[i]->base); | ||
532 | |||
533 | ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx); | ||
534 | if (ret) { | ||
535 | int j; | ||
536 | |||
537 | for (j = 0; j < i; j++) { | ||
538 | bo = to_vc4_bo(&exec->bo[j]->base); | ||
539 | ww_mutex_unlock(&bo->resv->lock); | ||
540 | } | ||
541 | |||
542 | if (contended_lock != -1 && contended_lock >= i) { | ||
543 | bo = to_vc4_bo(&exec->bo[contended_lock]->base); | ||
544 | |||
545 | ww_mutex_unlock(&bo->resv->lock); | ||
546 | } | ||
547 | |||
548 | if (ret == -EDEADLK) { | ||
549 | contended_lock = i; | ||
550 | goto retry; | ||
551 | } | ||
552 | |||
553 | ww_acquire_done(acquire_ctx); | ||
554 | return ret; | ||
555 | } | ||
475 | } | 556 | } |
557 | |||
558 | ww_acquire_done(acquire_ctx); | ||
559 | |||
560 | /* Reserve space for our shared (read-only) fence references, | ||
561 | * before we commit the CL to the hardware. | ||
562 | */ | ||
563 | for (i = 0; i < exec->bo_count; i++) { | ||
564 | bo = to_vc4_bo(&exec->bo[i]->base); | ||
565 | |||
566 | ret = reservation_object_reserve_shared(bo->resv); | ||
567 | if (ret) { | ||
568 | vc4_unlock_bo_reservations(dev, exec, acquire_ctx); | ||
569 | return ret; | ||
570 | } | ||
571 | } | ||
572 | |||
573 | return 0; | ||
476 | } | 574 | } |
477 | 575 | ||
478 | /* Queues a struct vc4_exec_info for execution. If no job is | 576 | /* Queues a struct vc4_exec_info for execution. If no job is |
@@ -484,19 +582,34 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) | |||
484 | * then bump the end address. That's a change for a later date, | 582 | * then bump the end address. That's a change for a later date, |
485 | * though. | 583 | * though. |
486 | */ | 584 | */ |
487 | static void | 585 | static int |
488 | vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) | 586 | vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, |
587 | struct ww_acquire_ctx *acquire_ctx) | ||
489 | { | 588 | { |
490 | struct vc4_dev *vc4 = to_vc4_dev(dev); | 589 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
491 | uint64_t seqno; | 590 | uint64_t seqno; |
492 | unsigned long irqflags; | 591 | unsigned long irqflags; |
592 | struct vc4_fence *fence; | ||
593 | |||
594 | fence = kzalloc(sizeof(*fence), GFP_KERNEL); | ||
595 | if (!fence) | ||
596 | return -ENOMEM; | ||
597 | fence->dev = dev; | ||
493 | 598 | ||
494 | spin_lock_irqsave(&vc4->job_lock, irqflags); | 599 | spin_lock_irqsave(&vc4->job_lock, irqflags); |
495 | 600 | ||
496 | seqno = ++vc4->emit_seqno; | 601 | seqno = ++vc4->emit_seqno; |
497 | exec->seqno = seqno; | 602 | exec->seqno = seqno; |
603 | |||
604 | dma_fence_init(&fence->base, &vc4_fence_ops, &vc4->job_lock, | ||
605 | vc4->dma_fence_context, exec->seqno); | ||
606 | fence->seqno = exec->seqno; | ||
607 | exec->fence = &fence->base; | ||
608 | |||
498 | vc4_update_bo_seqnos(exec, seqno); | 609 | vc4_update_bo_seqnos(exec, seqno); |
499 | 610 | ||
611 | vc4_unlock_bo_reservations(dev, exec, acquire_ctx); | ||
612 | |||
500 | list_add_tail(&exec->head, &vc4->bin_job_list); | 613 | list_add_tail(&exec->head, &vc4->bin_job_list); |
501 | 614 | ||
502 | /* If no job was executing, kick ours off. Otherwise, it'll | 615 | /* If no job was executing, kick ours off. Otherwise, it'll |
@@ -509,6 +622,8 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) | |||
509 | } | 622 | } |
510 | 623 | ||
511 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | 624 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); |
625 | |||
626 | return 0; | ||
512 | } | 627 | } |
513 | 628 | ||
514 | /** | 629 | /** |
@@ -705,8 +820,15 @@ static void | |||
705 | vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) | 820 | vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) |
706 | { | 821 | { |
707 | struct vc4_dev *vc4 = to_vc4_dev(dev); | 822 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
823 | unsigned long irqflags; | ||
708 | unsigned i; | 824 | unsigned i; |
709 | 825 | ||
826 | /* If we got force-completed because of GPU reset rather than | ||
827 | * through our IRQ handler, signal the fence now. | ||
828 | */ | ||
829 | if (exec->fence) | ||
830 | dma_fence_signal(exec->fence); | ||
831 | |||
710 | if (exec->bo) { | 832 | if (exec->bo) { |
711 | for (i = 0; i < exec->bo_count; i++) | 833 | for (i = 0; i < exec->bo_count; i++) |
712 | drm_gem_object_unreference_unlocked(&exec->bo[i]->base); | 834 | drm_gem_object_unreference_unlocked(&exec->bo[i]->base); |
@@ -720,6 +842,11 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) | |||
720 | drm_gem_object_unreference_unlocked(&bo->base.base); | 842 | drm_gem_object_unreference_unlocked(&bo->base.base); |
721 | } | 843 | } |
722 | 844 | ||
845 | /* Free up the allocation of any bin slots we used. */ | ||
846 | spin_lock_irqsave(&vc4->job_lock, irqflags); | ||
847 | vc4->bin_alloc_used &= ~exec->bin_slots; | ||
848 | spin_unlock_irqrestore(&vc4->job_lock, irqflags); | ||
849 | |||
723 | mutex_lock(&vc4->power_lock); | 850 | mutex_lock(&vc4->power_lock); |
724 | if (--vc4->power_refcount == 0) { | 851 | if (--vc4->power_refcount == 0) { |
725 | pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); | 852 | pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); |
@@ -874,6 +1001,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, | |||
874 | struct vc4_dev *vc4 = to_vc4_dev(dev); | 1001 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
875 | struct drm_vc4_submit_cl *args = data; | 1002 | struct drm_vc4_submit_cl *args = data; |
876 | struct vc4_exec_info *exec; | 1003 | struct vc4_exec_info *exec; |
1004 | struct ww_acquire_ctx acquire_ctx; | ||
877 | int ret = 0; | 1005 | int ret = 0; |
878 | 1006 | ||
879 | if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) { | 1007 | if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) { |
@@ -888,13 +1016,16 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, | |||
888 | } | 1016 | } |
889 | 1017 | ||
890 | mutex_lock(&vc4->power_lock); | 1018 | mutex_lock(&vc4->power_lock); |
891 | if (vc4->power_refcount++ == 0) | 1019 | if (vc4->power_refcount++ == 0) { |
892 | ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); | 1020 | ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); |
893 | mutex_unlock(&vc4->power_lock); | 1021 | if (ret < 0) { |
894 | if (ret < 0) { | 1022 | mutex_unlock(&vc4->power_lock); |
895 | kfree(exec); | 1023 | vc4->power_refcount--; |
896 | return ret; | 1024 | kfree(exec); |
1025 | return ret; | ||
1026 | } | ||
897 | } | 1027 | } |
1028 | mutex_unlock(&vc4->power_lock); | ||
898 | 1029 | ||
899 | exec->args = args; | 1030 | exec->args = args; |
900 | INIT_LIST_HEAD(&exec->unref_list); | 1031 | INIT_LIST_HEAD(&exec->unref_list); |
@@ -916,12 +1047,18 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, | |||
916 | if (ret) | 1047 | if (ret) |
917 | goto fail; | 1048 | goto fail; |
918 | 1049 | ||
1050 | ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx); | ||
1051 | if (ret) | ||
1052 | goto fail; | ||
1053 | |||
919 | /* Clear this out of the struct we'll be putting in the queue, | 1054 | /* Clear this out of the struct we'll be putting in the queue, |
920 | * since it's part of our stack. | 1055 | * since it's part of our stack. |
921 | */ | 1056 | */ |
922 | exec->args = NULL; | 1057 | exec->args = NULL; |
923 | 1058 | ||
924 | vc4_queue_submit(dev, exec); | 1059 | ret = vc4_queue_submit(dev, exec, &acquire_ctx); |
1060 | if (ret) | ||
1061 | goto fail; | ||
925 | 1062 | ||
926 | /* Return the seqno for our job. */ | 1063 | /* Return the seqno for our job. */ |
927 | args->seqno = vc4->emit_seqno; | 1064 | args->seqno = vc4->emit_seqno; |
@@ -939,6 +1076,8 @@ vc4_gem_init(struct drm_device *dev) | |||
939 | { | 1076 | { |
940 | struct vc4_dev *vc4 = to_vc4_dev(dev); | 1077 | struct vc4_dev *vc4 = to_vc4_dev(dev); |
941 | 1078 | ||
1079 | vc4->dma_fence_context = dma_fence_context_alloc(1); | ||
1080 | |||
942 | INIT_LIST_HEAD(&vc4->bin_job_list); | 1081 | INIT_LIST_HEAD(&vc4->bin_job_list); |
943 | INIT_LIST_HEAD(&vc4->render_job_list); | 1082 | INIT_LIST_HEAD(&vc4->render_job_list); |
944 | INIT_LIST_HEAD(&vc4->job_done_list); | 1083 | INIT_LIST_HEAD(&vc4->job_done_list); |
@@ -968,9 +1107,9 @@ vc4_gem_destroy(struct drm_device *dev) | |||
968 | /* V3D should already have disabled its interrupt and cleared | 1107 | /* V3D should already have disabled its interrupt and cleared |
969 | * the overflow allocation registers. Now free the object. | 1108 | * the overflow allocation registers. Now free the object. |
970 | */ | 1109 | */ |
971 | if (vc4->overflow_mem) { | 1110 | if (vc4->bin_bo) { |
972 | drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base); | 1111 | drm_gem_object_put_unlocked(&vc4->bin_bo->base.base); |
973 | vc4->overflow_mem = NULL; | 1112 | vc4->bin_bo = NULL; |
974 | } | 1113 | } |
975 | 1114 | ||
976 | if (vc4->hang_state) | 1115 | if (vc4->hang_state) |