aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-15 23:42:10 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-15 23:42:10 -0500
commite60e1ee60630cafef5e430c2ae364877e061d980 (patch)
tree816aeef8fe8d4a2c6a1ebbc7a350839bac8dd4c2 /drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
parent5d352e69c60e54b5f04d6e337a1d2bf0dbf3d94a (diff)
parentf150891fd9878ef0d9197c4e8451ce67c3bdd014 (diff)
Merge tag 'drm-for-v4.15' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie: "This is the main drm pull request for v4.15. Core: - Atomic object lifetime fixes - Atomic iterator improvements - Sparse/smatch fixes - Legacy kms ioctls to be interruptible - EDID override improvements - fb/gem helper cleanups - Simple outreachy patches - Documentation improvements - Fix dma-buf rcu races - DRM mode object leasing for improving VR use cases. - vgaarb improvements for non-x86 platforms. New driver: - tve200: Faraday Technology TVE200 block. This "TV Encoder" encodes a ITU-T BT.656 stream and can be found in the StorLink SL3516 (later Cortina Systems CS3516) as well as the Grain Media GM8180. New bridges: - SiI9234 support New panels: - S6E63J0X03, OTM8009A, Seiko 43WVF1G, 7" rpi touch panel, Toshiba LT089AC19000, Innolux AT043TN24 i915: - Remove Coffeelake from alpha support - Cannonlake workarounds - Infoframe refactoring for DisplayPort - VBT updates - DisplayPort vswing/emph/buffer translation refactoring - CCS fixes - Restore GPU clock boost on missed vblanks - Scatter list updates for userptr allocations - Gen9+ transition watermarks - Display IPC (Isochronous Priority Control) - Private PAT management - GVT: improved error handling and pci config sanitizing - Execlist refactoring - Transparent Huge Page support - User defined priorities support - HuC/GuC firmware refactoring - DP MST fixes - eDP power sequencing fixes - Use RCU instead of stop_machine - PSR state tracking support - Eviction fixes - BDW DP aux channel timeout fixes - LSPCON fixes - Cannonlake PLL fixes amdgpu: - Per VM BO support - Powerplay cleanups - CI powerplay support - PASID mgr for kfd - SR-IOV fixes - initial GPU reset for vega10 - Prime mmap support - TTM updates - Clock query interface for Raven - Fence to handle ioctl - UVD encode ring support on Polaris - Transparent huge page DMA support - Compute LRU pipe tweaks - BO flag to allow buffers to opt out of implicit sync - CTX priority setting API - VRAM lost infrastructure plumbing qxl: - fix flicker since atomic rework amdkfd: - Further improvements from internal AMD tree - Usermode events - Drop radeon support nouveau: - Pascal temperature sensor support - Improved BAR2 handling - MMU rework to support Pascal MMU exynos: - Improved HDMI/mixer support - HDMI audio interface support tegra: - Prep work for tegra186 - Cleanup/fixes msm: - Preemption support for a5xx - Display fixes for 8x96 (snapdragon 820) - Async cursor plane fixes - FW loading rework - GPU debugging improvements vc4: - Prep for DSI panels - fix T-format tiling scanout - New madvise ioctl Rockchip: - LVDS support omapdrm: - omap4 HDMI CEC support etnaviv: - GPU performance counters groundwork sun4i: - refactor driver load + TCON backend - HDMI improvements - A31 support - Misc fixes udl: - Probe/EDID read fixes. tilcdc: - Misc fixes. pl111: - Support more variants adv7511: - Improve EDID handling. - HDMI CEC support sii8620: - Add remote control support" * tag 'drm-for-v4.15' of git://people.freedesktop.org/~airlied/linux: (1480 commits) drm/rockchip: analogix_dp: Use mutex rather than spinlock drm/mode_object: fix documentation for object lookups. drm/i915: Reorder context-close to avoid calling i915_vma_close() under RCU drm/i915: Move init_clock_gating() back to where it was drm/i915: Prune the reservation shared fence array drm/i915: Idle the GPU before shinking everything drm/i915: Lock llist_del_first() vs llist_del_all() drm/i915: Calculate ironlake intermediate watermarks correctly, v2. drm/i915: Disable lazy PPGTT page table optimization for vGPU drm/i915/execlists: Remove the priority "optimisation" drm/i915: Filter out spurious execlists context-switch interrupts drm/amdgpu: use irq-safe lock for kiq->ring_lock drm/amdgpu: bypass lru touch for KIQ ring submission drm/amdgpu: Potential uninitialized variable in amdgpu_vm_update_directories() drm/amdgpu: potential uninitialized variable in amdgpu_vce_ring_parse_cs() drm/amd/powerplay: initialize a variable before using it drm/amd/powerplay: suppress KASAN out of bounds warning in vega10_populate_all_memory_levels drm/amd/amdgpu: fix evicted VRAM bo adjudgement condition drm/vblank: Tune drm_crtc_accurate_vblank_count() WARN down to a debug drm/rockchip: add CONFIG_OF dependency for lvds ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c386
1 files changed, 221 insertions, 165 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index cd664832f9e8..6c78623e1386 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -25,6 +25,7 @@
25 * Jerome Glisse <glisse@freedesktop.org> 25 * Jerome Glisse <glisse@freedesktop.org>
26 */ 26 */
27#include <linux/pagemap.h> 27#include <linux/pagemap.h>
28#include <linux/sync_file.h>
28#include <drm/drmP.h> 29#include <drm/drmP.h>
29#include <drm/amdgpu_drm.h> 30#include <drm/amdgpu_drm.h>
30#include <drm/drm_syncobj.h> 31#include <drm/drm_syncobj.h>
@@ -89,12 +90,14 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
89 goto free_chunk; 90 goto free_chunk;
90 } 91 }
91 92
93 mutex_lock(&p->ctx->lock);
94
92 /* get chunks */ 95 /* get chunks */
93 chunk_array_user = u64_to_user_ptr(cs->in.chunks); 96 chunk_array_user = u64_to_user_ptr(cs->in.chunks);
94 if (copy_from_user(chunk_array, chunk_array_user, 97 if (copy_from_user(chunk_array, chunk_array_user,
95 sizeof(uint64_t)*cs->in.num_chunks)) { 98 sizeof(uint64_t)*cs->in.num_chunks)) {
96 ret = -EFAULT; 99 ret = -EFAULT;
97 goto put_ctx; 100 goto free_chunk;
98 } 101 }
99 102
100 p->nchunks = cs->in.num_chunks; 103 p->nchunks = cs->in.num_chunks;
@@ -102,7 +105,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
102 GFP_KERNEL); 105 GFP_KERNEL);
103 if (!p->chunks) { 106 if (!p->chunks) {
104 ret = -ENOMEM; 107 ret = -ENOMEM;
105 goto put_ctx; 108 goto free_chunk;
106 } 109 }
107 110
108 for (i = 0; i < p->nchunks; i++) { 111 for (i = 0; i < p->nchunks; i++) {
@@ -169,6 +172,11 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
169 if (ret) 172 if (ret)
170 goto free_all_kdata; 173 goto free_all_kdata;
171 174
175 if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) {
176 ret = -ECANCELED;
177 goto free_all_kdata;
178 }
179
172 if (p->uf_entry.robj) 180 if (p->uf_entry.robj)
173 p->job->uf_addr = uf_offset; 181 p->job->uf_addr = uf_offset;
174 kfree(chunk_array); 182 kfree(chunk_array);
@@ -182,8 +190,6 @@ free_partial_kdata:
182 kfree(p->chunks); 190 kfree(p->chunks);
183 p->chunks = NULL; 191 p->chunks = NULL;
184 p->nchunks = 0; 192 p->nchunks = 0;
185put_ctx:
186 amdgpu_ctx_put(p->ctx);
187free_chunk: 193free_chunk:
188 kfree(chunk_array); 194 kfree(chunk_array);
189 195
@@ -473,11 +479,16 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
473 return -EPERM; 479 return -EPERM;
474 480
475 /* Check if we have user pages and nobody bound the BO already */ 481 /* Check if we have user pages and nobody bound the BO already */
476 if (lobj->user_pages && bo->tbo.ttm->state != tt_bound) { 482 if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
477 size_t size = sizeof(struct page *); 483 lobj->user_pages) {
478 484 amdgpu_ttm_placement_from_domain(bo,
479 size *= bo->tbo.ttm->num_pages; 485 AMDGPU_GEM_DOMAIN_CPU);
480 memcpy(bo->tbo.ttm->pages, lobj->user_pages, size); 486 r = ttm_bo_validate(&bo->tbo, &bo->placement, true,
487 false);
488 if (r)
489 return r;
490 amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
491 lobj->user_pages);
481 binding_userptr = true; 492 binding_userptr = true;
482 } 493 }
483 494
@@ -502,7 +513,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
502 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 513 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
503 struct amdgpu_bo_list_entry *e; 514 struct amdgpu_bo_list_entry *e;
504 struct list_head duplicates; 515 struct list_head duplicates;
505 bool need_mmap_lock = false;
506 unsigned i, tries = 10; 516 unsigned i, tries = 10;
507 int r; 517 int r;
508 518
@@ -510,9 +520,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
510 520
511 p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); 521 p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
512 if (p->bo_list) { 522 if (p->bo_list) {
513 need_mmap_lock = p->bo_list->first_userptr !=
514 p->bo_list->num_entries;
515 amdgpu_bo_list_get_list(p->bo_list, &p->validated); 523 amdgpu_bo_list_get_list(p->bo_list, &p->validated);
524 if (p->bo_list->first_userptr != p->bo_list->num_entries)
525 p->mn = amdgpu_mn_get(p->adev);
516 } 526 }
517 527
518 INIT_LIST_HEAD(&duplicates); 528 INIT_LIST_HEAD(&duplicates);
@@ -521,9 +531,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
521 if (p->uf_entry.robj) 531 if (p->uf_entry.robj)
522 list_add(&p->uf_entry.tv.head, &p->validated); 532 list_add(&p->uf_entry.tv.head, &p->validated);
523 533
524 if (need_mmap_lock)
525 down_read(&current->mm->mmap_sem);
526
527 while (1) { 534 while (1) {
528 struct list_head need_pages; 535 struct list_head need_pages;
529 unsigned i; 536 unsigned i;
@@ -543,22 +550,24 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
543 INIT_LIST_HEAD(&need_pages); 550 INIT_LIST_HEAD(&need_pages);
544 for (i = p->bo_list->first_userptr; 551 for (i = p->bo_list->first_userptr;
545 i < p->bo_list->num_entries; ++i) { 552 i < p->bo_list->num_entries; ++i) {
553 struct amdgpu_bo *bo;
546 554
547 e = &p->bo_list->array[i]; 555 e = &p->bo_list->array[i];
556 bo = e->robj;
548 557
549 if (amdgpu_ttm_tt_userptr_invalidated(e->robj->tbo.ttm, 558 if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
550 &e->user_invalidated) && e->user_pages) { 559 &e->user_invalidated) && e->user_pages) {
551 560
552 /* We acquired a page array, but somebody 561 /* We acquired a page array, but somebody
553 * invalidated it. Free it and try again 562 * invalidated it. Free it and try again
554 */ 563 */
555 release_pages(e->user_pages, 564 release_pages(e->user_pages,
556 e->robj->tbo.ttm->num_pages); 565 bo->tbo.ttm->num_pages);
557 kvfree(e->user_pages); 566 kvfree(e->user_pages);
558 e->user_pages = NULL; 567 e->user_pages = NULL;
559 } 568 }
560 569
561 if (e->robj->tbo.ttm->state != tt_bound && 570 if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
562 !e->user_pages) { 571 !e->user_pages) {
563 list_del(&e->tv.head); 572 list_del(&e->tv.head);
564 list_add(&e->tv.head, &need_pages); 573 list_add(&e->tv.head, &need_pages);
@@ -635,9 +644,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
635 644
636 amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, 645 amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
637 p->bytes_moved_vis); 646 p->bytes_moved_vis);
638 fpriv->vm.last_eviction_counter =
639 atomic64_read(&p->adev->num_evictions);
640
641 if (p->bo_list) { 647 if (p->bo_list) {
642 struct amdgpu_bo *gds = p->bo_list->gds_obj; 648 struct amdgpu_bo *gds = p->bo_list->gds_obj;
643 struct amdgpu_bo *gws = p->bo_list->gws_obj; 649 struct amdgpu_bo *gws = p->bo_list->gws_obj;
@@ -678,9 +684,6 @@ error_validate:
678 684
679error_free_pages: 685error_free_pages:
680 686
681 if (need_mmap_lock)
682 up_read(&current->mm->mmap_sem);
683
684 if (p->bo_list) { 687 if (p->bo_list) {
685 for (i = p->bo_list->first_userptr; 688 for (i = p->bo_list->first_userptr;
686 i < p->bo_list->num_entries; ++i) { 689 i < p->bo_list->num_entries; ++i) {
@@ -705,7 +708,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
705 708
706 list_for_each_entry(e, &p->validated, tv.head) { 709 list_for_each_entry(e, &p->validated, tv.head) {
707 struct reservation_object *resv = e->robj->tbo.resv; 710 struct reservation_object *resv = e->robj->tbo.resv;
708 r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp); 711 r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp,
712 amdgpu_bo_explicit_sync(e->robj));
709 713
710 if (r) 714 if (r)
711 return r; 715 return r;
@@ -726,11 +730,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
726{ 730{
727 unsigned i; 731 unsigned i;
728 732
729 if (!error) 733 if (error && backoff)
730 ttm_eu_fence_buffer_objects(&parser->ticket,
731 &parser->validated,
732 parser->fence);
733 else if (backoff)
734 ttm_eu_backoff_reservation(&parser->ticket, 734 ttm_eu_backoff_reservation(&parser->ticket,
735 &parser->validated); 735 &parser->validated);
736 736
@@ -740,8 +740,10 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
740 740
741 dma_fence_put(parser->fence); 741 dma_fence_put(parser->fence);
742 742
743 if (parser->ctx) 743 if (parser->ctx) {
744 mutex_unlock(&parser->ctx->lock);
744 amdgpu_ctx_put(parser->ctx); 745 amdgpu_ctx_put(parser->ctx);
746 }
745 if (parser->bo_list) 747 if (parser->bo_list)
746 amdgpu_bo_list_put(parser->bo_list); 748 amdgpu_bo_list_put(parser->bo_list);
747 749
@@ -766,10 +768,6 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
766 if (r) 768 if (r)
767 return r; 769 return r;
768 770
769 r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_dir_update);
770 if (r)
771 return r;
772
773 r = amdgpu_vm_clear_freed(adev, vm, NULL); 771 r = amdgpu_vm_clear_freed(adev, vm, NULL);
774 if (r) 772 if (r)
775 return r; 773 return r;
@@ -823,7 +821,13 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
823 821
824 } 822 }
825 823
826 r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync); 824 r = amdgpu_vm_handle_moved(adev, vm);
825 if (r)
826 return r;
827
828 r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update);
829 if (r)
830 return r;
827 831
828 if (amdgpu_vm_debug && p->bo_list) { 832 if (amdgpu_vm_debug && p->bo_list) {
829 /* Invalidate all BOs to test for userspace bugs */ 833 /* Invalidate all BOs to test for userspace bugs */
@@ -833,7 +837,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
833 if (!bo) 837 if (!bo)
834 continue; 838 continue;
835 839
836 amdgpu_vm_bo_invalidate(adev, bo); 840 amdgpu_vm_bo_invalidate(adev, bo, false);
837 } 841 }
838 } 842 }
839 843
@@ -846,19 +850,63 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
846 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 850 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
847 struct amdgpu_vm *vm = &fpriv->vm; 851 struct amdgpu_vm *vm = &fpriv->vm;
848 struct amdgpu_ring *ring = p->job->ring; 852 struct amdgpu_ring *ring = p->job->ring;
849 int i, r; 853 int r;
850 854
851 /* Only for UVD/VCE VM emulation */ 855 /* Only for UVD/VCE VM emulation */
852 if (ring->funcs->parse_cs) { 856 if (p->job->ring->funcs->parse_cs) {
853 for (i = 0; i < p->job->num_ibs; i++) { 857 unsigned i, j;
854 r = amdgpu_ring_parse_cs(ring, p, i); 858
859 for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
860 struct drm_amdgpu_cs_chunk_ib *chunk_ib;
861 struct amdgpu_bo_va_mapping *m;
862 struct amdgpu_bo *aobj = NULL;
863 struct amdgpu_cs_chunk *chunk;
864 struct amdgpu_ib *ib;
865 uint64_t offset;
866 uint8_t *kptr;
867
868 chunk = &p->chunks[i];
869 ib = &p->job->ibs[j];
870 chunk_ib = chunk->kdata;
871
872 if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
873 continue;
874
875 r = amdgpu_cs_find_mapping(p, chunk_ib->va_start,
876 &aobj, &m);
877 if (r) {
878 DRM_ERROR("IB va_start is invalid\n");
879 return r;
880 }
881
882 if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
883 (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
884 DRM_ERROR("IB va_start+ib_bytes is invalid\n");
885 return -EINVAL;
886 }
887
888 /* the IB should be reserved at this point */
889 r = amdgpu_bo_kmap(aobj, (void **)&kptr);
890 if (r) {
891 return r;
892 }
893
894 offset = m->start * AMDGPU_GPU_PAGE_SIZE;
895 kptr += chunk_ib->va_start - offset;
896
897 memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
898 amdgpu_bo_kunmap(aobj);
899
900 r = amdgpu_ring_parse_cs(ring, p, j);
855 if (r) 901 if (r)
856 return r; 902 return r;
903
904 j++;
857 } 905 }
858 } 906 }
859 907
860 if (p->job->vm) { 908 if (p->job->vm) {
861 p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.bo); 909 p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
862 910
863 r = amdgpu_bo_vm_update_pte(p); 911 r = amdgpu_bo_vm_update_pte(p);
864 if (r) 912 if (r)
@@ -920,54 +968,18 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
920 968
921 parser->job->ring = ring; 969 parser->job->ring = ring;
922 970
923 if (ring->funcs->parse_cs) { 971 r = amdgpu_ib_get(adev, vm,
924 struct amdgpu_bo_va_mapping *m; 972 ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0,
925 struct amdgpu_bo *aobj = NULL; 973 ib);
926 uint64_t offset; 974 if (r) {
927 uint8_t *kptr; 975 DRM_ERROR("Failed to get ib !\n");
928 976 return r;
929 m = amdgpu_cs_find_mapping(parser, chunk_ib->va_start,
930 &aobj);
931 if (!aobj) {
932 DRM_ERROR("IB va_start is invalid\n");
933 return -EINVAL;
934 }
935
936 if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
937 (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
938 DRM_ERROR("IB va_start+ib_bytes is invalid\n");
939 return -EINVAL;
940 }
941
942 /* the IB should be reserved at this point */
943 r = amdgpu_bo_kmap(aobj, (void **)&kptr);
944 if (r) {
945 return r;
946 }
947
948 offset = m->start * AMDGPU_GPU_PAGE_SIZE;
949 kptr += chunk_ib->va_start - offset;
950
951 r = amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib);
952 if (r) {
953 DRM_ERROR("Failed to get ib !\n");
954 return r;
955 }
956
957 memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
958 amdgpu_bo_kunmap(aobj);
959 } else {
960 r = amdgpu_ib_get(adev, vm, 0, ib);
961 if (r) {
962 DRM_ERROR("Failed to get ib !\n");
963 return r;
964 }
965
966 } 977 }
967 978
968 ib->gpu_addr = chunk_ib->va_start; 979 ib->gpu_addr = chunk_ib->va_start;
969 ib->length_dw = chunk_ib->ib_bytes / 4; 980 ib->length_dw = chunk_ib->ib_bytes / 4;
970 ib->flags = chunk_ib->flags; 981 ib->flags = chunk_ib->flags;
982
971 j++; 983 j++;
972 } 984 }
973 985
@@ -977,7 +989,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
977 parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) 989 parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
978 return -EINVAL; 990 return -EINVAL;
979 991
980 return 0; 992 return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx);
981} 993}
982 994
983static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, 995static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
@@ -1131,14 +1143,31 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1131 struct amdgpu_ring *ring = p->job->ring; 1143 struct amdgpu_ring *ring = p->job->ring;
1132 struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity; 1144 struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
1133 struct amdgpu_job *job; 1145 struct amdgpu_job *job;
1146 unsigned i;
1147 uint64_t seq;
1148
1134 int r; 1149 int r;
1135 1150
1151 amdgpu_mn_lock(p->mn);
1152 if (p->bo_list) {
1153 for (i = p->bo_list->first_userptr;
1154 i < p->bo_list->num_entries; ++i) {
1155 struct amdgpu_bo *bo = p->bo_list->array[i].robj;
1156
1157 if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
1158 amdgpu_mn_unlock(p->mn);
1159 return -ERESTARTSYS;
1160 }
1161 }
1162 }
1163
1136 job = p->job; 1164 job = p->job;
1137 p->job = NULL; 1165 p->job = NULL;
1138 1166
1139 r = amd_sched_job_init(&job->base, &ring->sched, entity, p->filp); 1167 r = amd_sched_job_init(&job->base, &ring->sched, entity, p->filp);
1140 if (r) { 1168 if (r) {
1141 amdgpu_job_free(job); 1169 amdgpu_job_free(job);
1170 amdgpu_mn_unlock(p->mn);
1142 return r; 1171 return r;
1143 } 1172 }
1144 1173
@@ -1146,21 +1175,36 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1146 job->fence_ctx = entity->fence_context; 1175 job->fence_ctx = entity->fence_context;
1147 p->fence = dma_fence_get(&job->base.s_fence->finished); 1176 p->fence = dma_fence_get(&job->base.s_fence->finished);
1148 1177
1178 r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq);
1179 if (r) {
1180 dma_fence_put(p->fence);
1181 dma_fence_put(&job->base.s_fence->finished);
1182 amdgpu_job_free(job);
1183 amdgpu_mn_unlock(p->mn);
1184 return r;
1185 }
1186
1149 amdgpu_cs_post_dependencies(p); 1187 amdgpu_cs_post_dependencies(p);
1150 1188
1151 cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); 1189 cs->out.handle = seq;
1152 job->uf_sequence = cs->out.handle; 1190 job->uf_sequence = seq;
1191
1153 amdgpu_job_free_resources(job); 1192 amdgpu_job_free_resources(job);
1193 amdgpu_ring_priority_get(job->ring,
1194 amd_sched_get_job_priority(&job->base));
1154 1195
1155 trace_amdgpu_cs_ioctl(job); 1196 trace_amdgpu_cs_ioctl(job);
1156 amd_sched_entity_push_job(&job->base); 1197 amd_sched_entity_push_job(&job->base);
1198
1199 ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
1200 amdgpu_mn_unlock(p->mn);
1201
1157 return 0; 1202 return 0;
1158} 1203}
1159 1204
1160int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 1205int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
1161{ 1206{
1162 struct amdgpu_device *adev = dev->dev_private; 1207 struct amdgpu_device *adev = dev->dev_private;
1163 struct amdgpu_fpriv *fpriv = filp->driver_priv;
1164 union drm_amdgpu_cs *cs = data; 1208 union drm_amdgpu_cs *cs = data;
1165 struct amdgpu_cs_parser parser = {}; 1209 struct amdgpu_cs_parser parser = {};
1166 bool reserved_buffers = false; 1210 bool reserved_buffers = false;
@@ -1168,8 +1212,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
1168 1212
1169 if (!adev->accel_working) 1213 if (!adev->accel_working)
1170 return -EBUSY; 1214 return -EBUSY;
1171 if (amdgpu_kms_vram_lost(adev, fpriv))
1172 return -ENODEV;
1173 1215
1174 parser.adev = adev; 1216 parser.adev = adev;
1175 parser.filp = filp; 1217 parser.filp = filp;
@@ -1180,6 +1222,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
1180 goto out; 1222 goto out;
1181 } 1223 }
1182 1224
1225 r = amdgpu_cs_ib_fill(adev, &parser);
1226 if (r)
1227 goto out;
1228
1183 r = amdgpu_cs_parser_bos(&parser, data); 1229 r = amdgpu_cs_parser_bos(&parser, data);
1184 if (r) { 1230 if (r) {
1185 if (r == -ENOMEM) 1231 if (r == -ENOMEM)
@@ -1190,9 +1236,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
1190 } 1236 }
1191 1237
1192 reserved_buffers = true; 1238 reserved_buffers = true;
1193 r = amdgpu_cs_ib_fill(adev, &parser);
1194 if (r)
1195 goto out;
1196 1239
1197 r = amdgpu_cs_dependencies(adev, &parser); 1240 r = amdgpu_cs_dependencies(adev, &parser);
1198 if (r) { 1241 if (r) {
@@ -1228,16 +1271,12 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
1228{ 1271{
1229 union drm_amdgpu_wait_cs *wait = data; 1272 union drm_amdgpu_wait_cs *wait = data;
1230 struct amdgpu_device *adev = dev->dev_private; 1273 struct amdgpu_device *adev = dev->dev_private;
1231 struct amdgpu_fpriv *fpriv = filp->driver_priv;
1232 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); 1274 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
1233 struct amdgpu_ring *ring = NULL; 1275 struct amdgpu_ring *ring = NULL;
1234 struct amdgpu_ctx *ctx; 1276 struct amdgpu_ctx *ctx;
1235 struct dma_fence *fence; 1277 struct dma_fence *fence;
1236 long r; 1278 long r;
1237 1279
1238 if (amdgpu_kms_vram_lost(adev, fpriv))
1239 return -ENODEV;
1240
1241 ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); 1280 ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
1242 if (ctx == NULL) 1281 if (ctx == NULL)
1243 return -EINVAL; 1282 return -EINVAL;
@@ -1255,6 +1294,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
1255 r = PTR_ERR(fence); 1294 r = PTR_ERR(fence);
1256 else if (fence) { 1295 else if (fence) {
1257 r = dma_fence_wait_timeout(fence, true, timeout); 1296 r = dma_fence_wait_timeout(fence, true, timeout);
1297 if (r > 0 && fence->error)
1298 r = fence->error;
1258 dma_fence_put(fence); 1299 dma_fence_put(fence);
1259 } else 1300 } else
1260 r = 1; 1301 r = 1;
@@ -1302,6 +1343,62 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
1302 return fence; 1343 return fence;
1303} 1344}
1304 1345
1346int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
1347 struct drm_file *filp)
1348{
1349 struct amdgpu_device *adev = dev->dev_private;
1350 union drm_amdgpu_fence_to_handle *info = data;
1351 struct dma_fence *fence;
1352 struct drm_syncobj *syncobj;
1353 struct sync_file *sync_file;
1354 int fd, r;
1355
1356 fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
1357 if (IS_ERR(fence))
1358 return PTR_ERR(fence);
1359
1360 switch (info->in.what) {
1361 case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:
1362 r = drm_syncobj_create(&syncobj, 0, fence);
1363 dma_fence_put(fence);
1364 if (r)
1365 return r;
1366 r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle);
1367 drm_syncobj_put(syncobj);
1368 return r;
1369
1370 case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD:
1371 r = drm_syncobj_create(&syncobj, 0, fence);
1372 dma_fence_put(fence);
1373 if (r)
1374 return r;
1375 r = drm_syncobj_get_fd(syncobj, (int*)&info->out.handle);
1376 drm_syncobj_put(syncobj);
1377 return r;
1378
1379 case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD:
1380 fd = get_unused_fd_flags(O_CLOEXEC);
1381 if (fd < 0) {
1382 dma_fence_put(fence);
1383 return fd;
1384 }
1385
1386 sync_file = sync_file_create(fence);
1387 dma_fence_put(fence);
1388 if (!sync_file) {
1389 put_unused_fd(fd);
1390 return -ENOMEM;
1391 }
1392
1393 fd_install(fd, sync_file->file);
1394 info->out.handle = fd;
1395 return 0;
1396
1397 default:
1398 return -EINVAL;
1399 }
1400}
1401
1305/** 1402/**
1306 * amdgpu_cs_wait_all_fence - wait on all fences to signal 1403 * amdgpu_cs_wait_all_fence - wait on all fences to signal
1307 * 1404 *
@@ -1336,6 +1433,9 @@ static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
1336 1433
1337 if (r == 0) 1434 if (r == 0)
1338 break; 1435 break;
1436
1437 if (fence->error)
1438 return fence->error;
1339 } 1439 }
1340 1440
1341 memset(wait, 0, sizeof(*wait)); 1441 memset(wait, 0, sizeof(*wait));
@@ -1381,6 +1481,7 @@ static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
1381 array[i] = fence; 1481 array[i] = fence;
1382 } else { /* NULL, the fence has been already signaled */ 1482 } else { /* NULL, the fence has been already signaled */
1383 r = 1; 1483 r = 1;
1484 first = i;
1384 goto out; 1485 goto out;
1385 } 1486 }
1386 } 1487 }
@@ -1395,7 +1496,7 @@ out:
1395 wait->out.status = (r > 0); 1496 wait->out.status = (r > 0);
1396 wait->out.first_signaled = first; 1497 wait->out.first_signaled = first;
1397 /* set return value 0 to indicate success */ 1498 /* set return value 0 to indicate success */
1398 r = 0; 1499 r = array[first]->error;
1399 1500
1400err_free_fence_array: 1501err_free_fence_array:
1401 for (i = 0; i < fence_count; i++) 1502 for (i = 0; i < fence_count; i++)
@@ -1416,15 +1517,12 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
1416 struct drm_file *filp) 1517 struct drm_file *filp)
1417{ 1518{
1418 struct amdgpu_device *adev = dev->dev_private; 1519 struct amdgpu_device *adev = dev->dev_private;
1419 struct amdgpu_fpriv *fpriv = filp->driver_priv;
1420 union drm_amdgpu_wait_fences *wait = data; 1520 union drm_amdgpu_wait_fences *wait = data;
1421 uint32_t fence_count = wait->in.fence_count; 1521 uint32_t fence_count = wait->in.fence_count;
1422 struct drm_amdgpu_fence *fences_user; 1522 struct drm_amdgpu_fence *fences_user;
1423 struct drm_amdgpu_fence *fences; 1523 struct drm_amdgpu_fence *fences;
1424 int r; 1524 int r;
1425 1525
1426 if (amdgpu_kms_vram_lost(adev, fpriv))
1427 return -ENODEV;
1428 /* Get the fences from userspace */ 1526 /* Get the fences from userspace */
1429 fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence), 1527 fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
1430 GFP_KERNEL); 1528 GFP_KERNEL);
@@ -1460,78 +1558,36 @@ err_free_fences:
1460 * virtual memory address. Returns allocation structure when found, NULL 1558 * virtual memory address. Returns allocation structure when found, NULL
1461 * otherwise. 1559 * otherwise.
1462 */ 1560 */
1463struct amdgpu_bo_va_mapping * 1561int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
1464amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, 1562 uint64_t addr, struct amdgpu_bo **bo,
1465 uint64_t addr, struct amdgpu_bo **bo) 1563 struct amdgpu_bo_va_mapping **map)
1466{ 1564{
1565 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
1566 struct amdgpu_vm *vm = &fpriv->vm;
1467 struct amdgpu_bo_va_mapping *mapping; 1567 struct amdgpu_bo_va_mapping *mapping;
1468 unsigned i;
1469
1470 if (!parser->bo_list)
1471 return NULL;
1472
1473 addr /= AMDGPU_GPU_PAGE_SIZE;
1474
1475 for (i = 0; i < parser->bo_list->num_entries; i++) {
1476 struct amdgpu_bo_list_entry *lobj;
1477
1478 lobj = &parser->bo_list->array[i];
1479 if (!lobj->bo_va)
1480 continue;
1481
1482 list_for_each_entry(mapping, &lobj->bo_va->valids, list) {
1483 if (mapping->start > addr ||
1484 addr > mapping->last)
1485 continue;
1486
1487 *bo = lobj->bo_va->base.bo;
1488 return mapping;
1489 }
1490
1491 list_for_each_entry(mapping, &lobj->bo_va->invalids, list) {
1492 if (mapping->start > addr ||
1493 addr > mapping->last)
1494 continue;
1495
1496 *bo = lobj->bo_va->base.bo;
1497 return mapping;
1498 }
1499 }
1500
1501 return NULL;
1502}
1503
1504/**
1505 * amdgpu_cs_sysvm_access_required - make BOs accessible by the system VM
1506 *
1507 * @parser: command submission parser context
1508 *
1509 * Helper for UVD/VCE VM emulation, make sure BOs are accessible by the system VM.
1510 */
1511int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
1512{
1513 unsigned i;
1514 int r; 1568 int r;
1515 1569
1516 if (!parser->bo_list) 1570 addr /= AMDGPU_GPU_PAGE_SIZE;
1517 return 0;
1518 1571
1519 for (i = 0; i < parser->bo_list->num_entries; i++) { 1572 mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
1520 struct amdgpu_bo *bo = parser->bo_list->array[i].robj; 1573 if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)
1574 return -EINVAL;
1521 1575
1522 r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem); 1576 *bo = mapping->bo_va->base.bo;
1523 if (unlikely(r)) 1577 *map = mapping;
1524 return r;
1525 1578
1526 if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) 1579 /* Double check that the BO is reserved by this CS */
1527 continue; 1580 if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket)
1581 return -EINVAL;
1528 1582
1529 bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 1583 if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
1530 amdgpu_ttm_placement_from_domain(bo, bo->allowed_domains); 1584 (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
1531 r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); 1585 amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains);
1532 if (unlikely(r)) 1586 r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, false,
1587 false);
1588 if (r)
1533 return r; 1589 return r;
1534 } 1590 }
1535 1591
1536 return 0; 1592 return amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem);
1537} 1593}