diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-15 23:42:10 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-15 23:42:10 -0500 |
commit | e60e1ee60630cafef5e430c2ae364877e061d980 (patch) | |
tree | 816aeef8fe8d4a2c6a1ebbc7a350839bac8dd4c2 /drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | |
parent | 5d352e69c60e54b5f04d6e337a1d2bf0dbf3d94a (diff) | |
parent | f150891fd9878ef0d9197c4e8451ce67c3bdd014 (diff) |
Merge tag 'drm-for-v4.15' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie:
"This is the main drm pull request for v4.15.
Core:
- Atomic object lifetime fixes
- Atomic iterator improvements
- Sparse/smatch fixes
- Legacy kms ioctls to be interruptible
- EDID override improvements
- fb/gem helper cleanups
- Simple outreachy patches
- Documentation improvements
- Fix dma-buf rcu races
- DRM mode object leasing for improving VR use cases.
- vgaarb improvements for non-x86 platforms.
New driver:
- tve200: Faraday Technology TVE200 block.
This "TV Encoder" encodes a ITU-T BT.656 stream and can be found in
the StorLink SL3516 (later Cortina Systems CS3516) as well as the
Grain Media GM8180.
New bridges:
- SiI9234 support
New panels:
- S6E63J0X03, OTM8009A, Seiko 43WVF1G, 7" rpi touch panel, Toshiba
LT089AC19000, Innolux AT043TN24
i915:
- Remove Coffeelake from alpha support
- Cannonlake workarounds
- Infoframe refactoring for DisplayPort
- VBT updates
- DisplayPort vswing/emph/buffer translation refactoring
- CCS fixes
- Restore GPU clock boost on missed vblanks
- Scatter list updates for userptr allocations
- Gen9+ transition watermarks
- Display IPC (Isochronous Priority Control)
- Private PAT management
- GVT: improved error handling and pci config sanitizing
- Execlist refactoring
- Transparent Huge Page support
- User defined priorities support
- HuC/GuC firmware refactoring
- DP MST fixes
- eDP power sequencing fixes
- Use RCU instead of stop_machine
- PSR state tracking support
- Eviction fixes
- BDW DP aux channel timeout fixes
- LSPCON fixes
- Cannonlake PLL fixes
amdgpu:
- Per VM BO support
- Powerplay cleanups
- CI powerplay support
- PASID mgr for kfd
- SR-IOV fixes
- initial GPU reset for vega10
- Prime mmap support
- TTM updates
- Clock query interface for Raven
- Fence to handle ioctl
- UVD encode ring support on Polaris
- Transparent huge page DMA support
- Compute LRU pipe tweaks
- BO flag to allow buffers to opt out of implicit sync
- CTX priority setting API
- VRAM lost infrastructure plumbing
qxl:
- fix flicker since atomic rework
amdkfd:
- Further improvements from internal AMD tree
- Usermode events
- Drop radeon support
nouveau:
- Pascal temperature sensor support
- Improved BAR2 handling
- MMU rework to support Pascal MMU
exynos:
- Improved HDMI/mixer support
- HDMI audio interface support
tegra:
- Prep work for tegra186
- Cleanup/fixes
msm:
- Preemption support for a5xx
- Display fixes for 8x96 (snapdragon 820)
- Async cursor plane fixes
- FW loading rework
- GPU debugging improvements
vc4:
- Prep for DSI panels
- fix T-format tiling scanout
- New madvise ioctl
Rockchip:
- LVDS support
omapdrm:
- omap4 HDMI CEC support
etnaviv:
- GPU performance counters groundwork
sun4i:
- refactor driver load + TCON backend
- HDMI improvements
- A31 support
- Misc fixes
udl:
- Probe/EDID read fixes.
tilcdc:
- Misc fixes.
pl111:
- Support more variants
adv7511:
- Improve EDID handling.
- HDMI CEC support
sii8620:
- Add remote control support"
* tag 'drm-for-v4.15' of git://people.freedesktop.org/~airlied/linux: (1480 commits)
drm/rockchip: analogix_dp: Use mutex rather than spinlock
drm/mode_object: fix documentation for object lookups.
drm/i915: Reorder context-close to avoid calling i915_vma_close() under RCU
drm/i915: Move init_clock_gating() back to where it was
drm/i915: Prune the reservation shared fence array
drm/i915: Idle the GPU before shinking everything
drm/i915: Lock llist_del_first() vs llist_del_all()
drm/i915: Calculate ironlake intermediate watermarks correctly, v2.
drm/i915: Disable lazy PPGTT page table optimization for vGPU
drm/i915/execlists: Remove the priority "optimisation"
drm/i915: Filter out spurious execlists context-switch interrupts
drm/amdgpu: use irq-safe lock for kiq->ring_lock
drm/amdgpu: bypass lru touch for KIQ ring submission
drm/amdgpu: Potential uninitialized variable in amdgpu_vm_update_directories()
drm/amdgpu: potential uninitialized variable in amdgpu_vce_ring_parse_cs()
drm/amd/powerplay: initialize a variable before using it
drm/amd/powerplay: suppress KASAN out of bounds warning in vega10_populate_all_memory_levels
drm/amd/amdgpu: fix evicted VRAM bo adjudgement condition
drm/vblank: Tune drm_crtc_accurate_vblank_count() WARN down to a debug
drm/rockchip: add CONFIG_OF dependency for lvds
...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 386 |
1 files changed, 221 insertions, 165 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index cd664832f9e8..6c78623e1386 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | |||
@@ -25,6 +25,7 @@ | |||
25 | * Jerome Glisse <glisse@freedesktop.org> | 25 | * Jerome Glisse <glisse@freedesktop.org> |
26 | */ | 26 | */ |
27 | #include <linux/pagemap.h> | 27 | #include <linux/pagemap.h> |
28 | #include <linux/sync_file.h> | ||
28 | #include <drm/drmP.h> | 29 | #include <drm/drmP.h> |
29 | #include <drm/amdgpu_drm.h> | 30 | #include <drm/amdgpu_drm.h> |
30 | #include <drm/drm_syncobj.h> | 31 | #include <drm/drm_syncobj.h> |
@@ -89,12 +90,14 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | |||
89 | goto free_chunk; | 90 | goto free_chunk; |
90 | } | 91 | } |
91 | 92 | ||
93 | mutex_lock(&p->ctx->lock); | ||
94 | |||
92 | /* get chunks */ | 95 | /* get chunks */ |
93 | chunk_array_user = u64_to_user_ptr(cs->in.chunks); | 96 | chunk_array_user = u64_to_user_ptr(cs->in.chunks); |
94 | if (copy_from_user(chunk_array, chunk_array_user, | 97 | if (copy_from_user(chunk_array, chunk_array_user, |
95 | sizeof(uint64_t)*cs->in.num_chunks)) { | 98 | sizeof(uint64_t)*cs->in.num_chunks)) { |
96 | ret = -EFAULT; | 99 | ret = -EFAULT; |
97 | goto put_ctx; | 100 | goto free_chunk; |
98 | } | 101 | } |
99 | 102 | ||
100 | p->nchunks = cs->in.num_chunks; | 103 | p->nchunks = cs->in.num_chunks; |
@@ -102,7 +105,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | |||
102 | GFP_KERNEL); | 105 | GFP_KERNEL); |
103 | if (!p->chunks) { | 106 | if (!p->chunks) { |
104 | ret = -ENOMEM; | 107 | ret = -ENOMEM; |
105 | goto put_ctx; | 108 | goto free_chunk; |
106 | } | 109 | } |
107 | 110 | ||
108 | for (i = 0; i < p->nchunks; i++) { | 111 | for (i = 0; i < p->nchunks; i++) { |
@@ -169,6 +172,11 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | |||
169 | if (ret) | 172 | if (ret) |
170 | goto free_all_kdata; | 173 | goto free_all_kdata; |
171 | 174 | ||
175 | if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) { | ||
176 | ret = -ECANCELED; | ||
177 | goto free_all_kdata; | ||
178 | } | ||
179 | |||
172 | if (p->uf_entry.robj) | 180 | if (p->uf_entry.robj) |
173 | p->job->uf_addr = uf_offset; | 181 | p->job->uf_addr = uf_offset; |
174 | kfree(chunk_array); | 182 | kfree(chunk_array); |
@@ -182,8 +190,6 @@ free_partial_kdata: | |||
182 | kfree(p->chunks); | 190 | kfree(p->chunks); |
183 | p->chunks = NULL; | 191 | p->chunks = NULL; |
184 | p->nchunks = 0; | 192 | p->nchunks = 0; |
185 | put_ctx: | ||
186 | amdgpu_ctx_put(p->ctx); | ||
187 | free_chunk: | 193 | free_chunk: |
188 | kfree(chunk_array); | 194 | kfree(chunk_array); |
189 | 195 | ||
@@ -473,11 +479,16 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, | |||
473 | return -EPERM; | 479 | return -EPERM; |
474 | 480 | ||
475 | /* Check if we have user pages and nobody bound the BO already */ | 481 | /* Check if we have user pages and nobody bound the BO already */ |
476 | if (lobj->user_pages && bo->tbo.ttm->state != tt_bound) { | 482 | if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && |
477 | size_t size = sizeof(struct page *); | 483 | lobj->user_pages) { |
478 | 484 | amdgpu_ttm_placement_from_domain(bo, | |
479 | size *= bo->tbo.ttm->num_pages; | 485 | AMDGPU_GEM_DOMAIN_CPU); |
480 | memcpy(bo->tbo.ttm->pages, lobj->user_pages, size); | 486 | r = ttm_bo_validate(&bo->tbo, &bo->placement, true, |
487 | false); | ||
488 | if (r) | ||
489 | return r; | ||
490 | amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, | ||
491 | lobj->user_pages); | ||
481 | binding_userptr = true; | 492 | binding_userptr = true; |
482 | } | 493 | } |
483 | 494 | ||
@@ -502,7 +513,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
502 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; | 513 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; |
503 | struct amdgpu_bo_list_entry *e; | 514 | struct amdgpu_bo_list_entry *e; |
504 | struct list_head duplicates; | 515 | struct list_head duplicates; |
505 | bool need_mmap_lock = false; | ||
506 | unsigned i, tries = 10; | 516 | unsigned i, tries = 10; |
507 | int r; | 517 | int r; |
508 | 518 | ||
@@ -510,9 +520,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
510 | 520 | ||
511 | p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); | 521 | p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); |
512 | if (p->bo_list) { | 522 | if (p->bo_list) { |
513 | need_mmap_lock = p->bo_list->first_userptr != | ||
514 | p->bo_list->num_entries; | ||
515 | amdgpu_bo_list_get_list(p->bo_list, &p->validated); | 523 | amdgpu_bo_list_get_list(p->bo_list, &p->validated); |
524 | if (p->bo_list->first_userptr != p->bo_list->num_entries) | ||
525 | p->mn = amdgpu_mn_get(p->adev); | ||
516 | } | 526 | } |
517 | 527 | ||
518 | INIT_LIST_HEAD(&duplicates); | 528 | INIT_LIST_HEAD(&duplicates); |
@@ -521,9 +531,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
521 | if (p->uf_entry.robj) | 531 | if (p->uf_entry.robj) |
522 | list_add(&p->uf_entry.tv.head, &p->validated); | 532 | list_add(&p->uf_entry.tv.head, &p->validated); |
523 | 533 | ||
524 | if (need_mmap_lock) | ||
525 | down_read(¤t->mm->mmap_sem); | ||
526 | |||
527 | while (1) { | 534 | while (1) { |
528 | struct list_head need_pages; | 535 | struct list_head need_pages; |
529 | unsigned i; | 536 | unsigned i; |
@@ -543,22 +550,24 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
543 | INIT_LIST_HEAD(&need_pages); | 550 | INIT_LIST_HEAD(&need_pages); |
544 | for (i = p->bo_list->first_userptr; | 551 | for (i = p->bo_list->first_userptr; |
545 | i < p->bo_list->num_entries; ++i) { | 552 | i < p->bo_list->num_entries; ++i) { |
553 | struct amdgpu_bo *bo; | ||
546 | 554 | ||
547 | e = &p->bo_list->array[i]; | 555 | e = &p->bo_list->array[i]; |
556 | bo = e->robj; | ||
548 | 557 | ||
549 | if (amdgpu_ttm_tt_userptr_invalidated(e->robj->tbo.ttm, | 558 | if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, |
550 | &e->user_invalidated) && e->user_pages) { | 559 | &e->user_invalidated) && e->user_pages) { |
551 | 560 | ||
552 | /* We acquired a page array, but somebody | 561 | /* We acquired a page array, but somebody |
553 | * invalidated it. Free it and try again | 562 | * invalidated it. Free it and try again |
554 | */ | 563 | */ |
555 | release_pages(e->user_pages, | 564 | release_pages(e->user_pages, |
556 | e->robj->tbo.ttm->num_pages); | 565 | bo->tbo.ttm->num_pages); |
557 | kvfree(e->user_pages); | 566 | kvfree(e->user_pages); |
558 | e->user_pages = NULL; | 567 | e->user_pages = NULL; |
559 | } | 568 | } |
560 | 569 | ||
561 | if (e->robj->tbo.ttm->state != tt_bound && | 570 | if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && |
562 | !e->user_pages) { | 571 | !e->user_pages) { |
563 | list_del(&e->tv.head); | 572 | list_del(&e->tv.head); |
564 | list_add(&e->tv.head, &need_pages); | 573 | list_add(&e->tv.head, &need_pages); |
@@ -635,9 +644,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
635 | 644 | ||
636 | amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, | 645 | amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, |
637 | p->bytes_moved_vis); | 646 | p->bytes_moved_vis); |
638 | fpriv->vm.last_eviction_counter = | ||
639 | atomic64_read(&p->adev->num_evictions); | ||
640 | |||
641 | if (p->bo_list) { | 647 | if (p->bo_list) { |
642 | struct amdgpu_bo *gds = p->bo_list->gds_obj; | 648 | struct amdgpu_bo *gds = p->bo_list->gds_obj; |
643 | struct amdgpu_bo *gws = p->bo_list->gws_obj; | 649 | struct amdgpu_bo *gws = p->bo_list->gws_obj; |
@@ -678,9 +684,6 @@ error_validate: | |||
678 | 684 | ||
679 | error_free_pages: | 685 | error_free_pages: |
680 | 686 | ||
681 | if (need_mmap_lock) | ||
682 | up_read(¤t->mm->mmap_sem); | ||
683 | |||
684 | if (p->bo_list) { | 687 | if (p->bo_list) { |
685 | for (i = p->bo_list->first_userptr; | 688 | for (i = p->bo_list->first_userptr; |
686 | i < p->bo_list->num_entries; ++i) { | 689 | i < p->bo_list->num_entries; ++i) { |
@@ -705,7 +708,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) | |||
705 | 708 | ||
706 | list_for_each_entry(e, &p->validated, tv.head) { | 709 | list_for_each_entry(e, &p->validated, tv.head) { |
707 | struct reservation_object *resv = e->robj->tbo.resv; | 710 | struct reservation_object *resv = e->robj->tbo.resv; |
708 | r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp); | 711 | r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp, |
712 | amdgpu_bo_explicit_sync(e->robj)); | ||
709 | 713 | ||
710 | if (r) | 714 | if (r) |
711 | return r; | 715 | return r; |
@@ -726,11 +730,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, | |||
726 | { | 730 | { |
727 | unsigned i; | 731 | unsigned i; |
728 | 732 | ||
729 | if (!error) | 733 | if (error && backoff) |
730 | ttm_eu_fence_buffer_objects(&parser->ticket, | ||
731 | &parser->validated, | ||
732 | parser->fence); | ||
733 | else if (backoff) | ||
734 | ttm_eu_backoff_reservation(&parser->ticket, | 734 | ttm_eu_backoff_reservation(&parser->ticket, |
735 | &parser->validated); | 735 | &parser->validated); |
736 | 736 | ||
@@ -740,8 +740,10 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, | |||
740 | 740 | ||
741 | dma_fence_put(parser->fence); | 741 | dma_fence_put(parser->fence); |
742 | 742 | ||
743 | if (parser->ctx) | 743 | if (parser->ctx) { |
744 | mutex_unlock(&parser->ctx->lock); | ||
744 | amdgpu_ctx_put(parser->ctx); | 745 | amdgpu_ctx_put(parser->ctx); |
746 | } | ||
745 | if (parser->bo_list) | 747 | if (parser->bo_list) |
746 | amdgpu_bo_list_put(parser->bo_list); | 748 | amdgpu_bo_list_put(parser->bo_list); |
747 | 749 | ||
@@ -766,10 +768,6 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) | |||
766 | if (r) | 768 | if (r) |
767 | return r; | 769 | return r; |
768 | 770 | ||
769 | r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_dir_update); | ||
770 | if (r) | ||
771 | return r; | ||
772 | |||
773 | r = amdgpu_vm_clear_freed(adev, vm, NULL); | 771 | r = amdgpu_vm_clear_freed(adev, vm, NULL); |
774 | if (r) | 772 | if (r) |
775 | return r; | 773 | return r; |
@@ -823,7 +821,13 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) | |||
823 | 821 | ||
824 | } | 822 | } |
825 | 823 | ||
826 | r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync); | 824 | r = amdgpu_vm_handle_moved(adev, vm); |
825 | if (r) | ||
826 | return r; | ||
827 | |||
828 | r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update); | ||
829 | if (r) | ||
830 | return r; | ||
827 | 831 | ||
828 | if (amdgpu_vm_debug && p->bo_list) { | 832 | if (amdgpu_vm_debug && p->bo_list) { |
829 | /* Invalidate all BOs to test for userspace bugs */ | 833 | /* Invalidate all BOs to test for userspace bugs */ |
@@ -833,7 +837,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) | |||
833 | if (!bo) | 837 | if (!bo) |
834 | continue; | 838 | continue; |
835 | 839 | ||
836 | amdgpu_vm_bo_invalidate(adev, bo); | 840 | amdgpu_vm_bo_invalidate(adev, bo, false); |
837 | } | 841 | } |
838 | } | 842 | } |
839 | 843 | ||
@@ -846,19 +850,63 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, | |||
846 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; | 850 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; |
847 | struct amdgpu_vm *vm = &fpriv->vm; | 851 | struct amdgpu_vm *vm = &fpriv->vm; |
848 | struct amdgpu_ring *ring = p->job->ring; | 852 | struct amdgpu_ring *ring = p->job->ring; |
849 | int i, r; | 853 | int r; |
850 | 854 | ||
851 | /* Only for UVD/VCE VM emulation */ | 855 | /* Only for UVD/VCE VM emulation */ |
852 | if (ring->funcs->parse_cs) { | 856 | if (p->job->ring->funcs->parse_cs) { |
853 | for (i = 0; i < p->job->num_ibs; i++) { | 857 | unsigned i, j; |
854 | r = amdgpu_ring_parse_cs(ring, p, i); | 858 | |
859 | for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { | ||
860 | struct drm_amdgpu_cs_chunk_ib *chunk_ib; | ||
861 | struct amdgpu_bo_va_mapping *m; | ||
862 | struct amdgpu_bo *aobj = NULL; | ||
863 | struct amdgpu_cs_chunk *chunk; | ||
864 | struct amdgpu_ib *ib; | ||
865 | uint64_t offset; | ||
866 | uint8_t *kptr; | ||
867 | |||
868 | chunk = &p->chunks[i]; | ||
869 | ib = &p->job->ibs[j]; | ||
870 | chunk_ib = chunk->kdata; | ||
871 | |||
872 | if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) | ||
873 | continue; | ||
874 | |||
875 | r = amdgpu_cs_find_mapping(p, chunk_ib->va_start, | ||
876 | &aobj, &m); | ||
877 | if (r) { | ||
878 | DRM_ERROR("IB va_start is invalid\n"); | ||
879 | return r; | ||
880 | } | ||
881 | |||
882 | if ((chunk_ib->va_start + chunk_ib->ib_bytes) > | ||
883 | (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { | ||
884 | DRM_ERROR("IB va_start+ib_bytes is invalid\n"); | ||
885 | return -EINVAL; | ||
886 | } | ||
887 | |||
888 | /* the IB should be reserved at this point */ | ||
889 | r = amdgpu_bo_kmap(aobj, (void **)&kptr); | ||
890 | if (r) { | ||
891 | return r; | ||
892 | } | ||
893 | |||
894 | offset = m->start * AMDGPU_GPU_PAGE_SIZE; | ||
895 | kptr += chunk_ib->va_start - offset; | ||
896 | |||
897 | memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); | ||
898 | amdgpu_bo_kunmap(aobj); | ||
899 | |||
900 | r = amdgpu_ring_parse_cs(ring, p, j); | ||
855 | if (r) | 901 | if (r) |
856 | return r; | 902 | return r; |
903 | |||
904 | j++; | ||
857 | } | 905 | } |
858 | } | 906 | } |
859 | 907 | ||
860 | if (p->job->vm) { | 908 | if (p->job->vm) { |
861 | p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.bo); | 909 | p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo); |
862 | 910 | ||
863 | r = amdgpu_bo_vm_update_pte(p); | 911 | r = amdgpu_bo_vm_update_pte(p); |
864 | if (r) | 912 | if (r) |
@@ -920,54 +968,18 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, | |||
920 | 968 | ||
921 | parser->job->ring = ring; | 969 | parser->job->ring = ring; |
922 | 970 | ||
923 | if (ring->funcs->parse_cs) { | 971 | r = amdgpu_ib_get(adev, vm, |
924 | struct amdgpu_bo_va_mapping *m; | 972 | ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0, |
925 | struct amdgpu_bo *aobj = NULL; | 973 | ib); |
926 | uint64_t offset; | 974 | if (r) { |
927 | uint8_t *kptr; | 975 | DRM_ERROR("Failed to get ib !\n"); |
928 | 976 | return r; | |
929 | m = amdgpu_cs_find_mapping(parser, chunk_ib->va_start, | ||
930 | &aobj); | ||
931 | if (!aobj) { | ||
932 | DRM_ERROR("IB va_start is invalid\n"); | ||
933 | return -EINVAL; | ||
934 | } | ||
935 | |||
936 | if ((chunk_ib->va_start + chunk_ib->ib_bytes) > | ||
937 | (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { | ||
938 | DRM_ERROR("IB va_start+ib_bytes is invalid\n"); | ||
939 | return -EINVAL; | ||
940 | } | ||
941 | |||
942 | /* the IB should be reserved at this point */ | ||
943 | r = amdgpu_bo_kmap(aobj, (void **)&kptr); | ||
944 | if (r) { | ||
945 | return r; | ||
946 | } | ||
947 | |||
948 | offset = m->start * AMDGPU_GPU_PAGE_SIZE; | ||
949 | kptr += chunk_ib->va_start - offset; | ||
950 | |||
951 | r = amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib); | ||
952 | if (r) { | ||
953 | DRM_ERROR("Failed to get ib !\n"); | ||
954 | return r; | ||
955 | } | ||
956 | |||
957 | memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); | ||
958 | amdgpu_bo_kunmap(aobj); | ||
959 | } else { | ||
960 | r = amdgpu_ib_get(adev, vm, 0, ib); | ||
961 | if (r) { | ||
962 | DRM_ERROR("Failed to get ib !\n"); | ||
963 | return r; | ||
964 | } | ||
965 | |||
966 | } | 977 | } |
967 | 978 | ||
968 | ib->gpu_addr = chunk_ib->va_start; | 979 | ib->gpu_addr = chunk_ib->va_start; |
969 | ib->length_dw = chunk_ib->ib_bytes / 4; | 980 | ib->length_dw = chunk_ib->ib_bytes / 4; |
970 | ib->flags = chunk_ib->flags; | 981 | ib->flags = chunk_ib->flags; |
982 | |||
971 | j++; | 983 | j++; |
972 | } | 984 | } |
973 | 985 | ||
@@ -977,7 +989,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, | |||
977 | parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) | 989 | parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) |
978 | return -EINVAL; | 990 | return -EINVAL; |
979 | 991 | ||
980 | return 0; | 992 | return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx); |
981 | } | 993 | } |
982 | 994 | ||
983 | static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, | 995 | static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, |
@@ -1131,14 +1143,31 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, | |||
1131 | struct amdgpu_ring *ring = p->job->ring; | 1143 | struct amdgpu_ring *ring = p->job->ring; |
1132 | struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity; | 1144 | struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity; |
1133 | struct amdgpu_job *job; | 1145 | struct amdgpu_job *job; |
1146 | unsigned i; | ||
1147 | uint64_t seq; | ||
1148 | |||
1134 | int r; | 1149 | int r; |
1135 | 1150 | ||
1151 | amdgpu_mn_lock(p->mn); | ||
1152 | if (p->bo_list) { | ||
1153 | for (i = p->bo_list->first_userptr; | ||
1154 | i < p->bo_list->num_entries; ++i) { | ||
1155 | struct amdgpu_bo *bo = p->bo_list->array[i].robj; | ||
1156 | |||
1157 | if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { | ||
1158 | amdgpu_mn_unlock(p->mn); | ||
1159 | return -ERESTARTSYS; | ||
1160 | } | ||
1161 | } | ||
1162 | } | ||
1163 | |||
1136 | job = p->job; | 1164 | job = p->job; |
1137 | p->job = NULL; | 1165 | p->job = NULL; |
1138 | 1166 | ||
1139 | r = amd_sched_job_init(&job->base, &ring->sched, entity, p->filp); | 1167 | r = amd_sched_job_init(&job->base, &ring->sched, entity, p->filp); |
1140 | if (r) { | 1168 | if (r) { |
1141 | amdgpu_job_free(job); | 1169 | amdgpu_job_free(job); |
1170 | amdgpu_mn_unlock(p->mn); | ||
1142 | return r; | 1171 | return r; |
1143 | } | 1172 | } |
1144 | 1173 | ||
@@ -1146,21 +1175,36 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, | |||
1146 | job->fence_ctx = entity->fence_context; | 1175 | job->fence_ctx = entity->fence_context; |
1147 | p->fence = dma_fence_get(&job->base.s_fence->finished); | 1176 | p->fence = dma_fence_get(&job->base.s_fence->finished); |
1148 | 1177 | ||
1178 | r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq); | ||
1179 | if (r) { | ||
1180 | dma_fence_put(p->fence); | ||
1181 | dma_fence_put(&job->base.s_fence->finished); | ||
1182 | amdgpu_job_free(job); | ||
1183 | amdgpu_mn_unlock(p->mn); | ||
1184 | return r; | ||
1185 | } | ||
1186 | |||
1149 | amdgpu_cs_post_dependencies(p); | 1187 | amdgpu_cs_post_dependencies(p); |
1150 | 1188 | ||
1151 | cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); | 1189 | cs->out.handle = seq; |
1152 | job->uf_sequence = cs->out.handle; | 1190 | job->uf_sequence = seq; |
1191 | |||
1153 | amdgpu_job_free_resources(job); | 1192 | amdgpu_job_free_resources(job); |
1193 | amdgpu_ring_priority_get(job->ring, | ||
1194 | amd_sched_get_job_priority(&job->base)); | ||
1154 | 1195 | ||
1155 | trace_amdgpu_cs_ioctl(job); | 1196 | trace_amdgpu_cs_ioctl(job); |
1156 | amd_sched_entity_push_job(&job->base); | 1197 | amd_sched_entity_push_job(&job->base); |
1198 | |||
1199 | ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); | ||
1200 | amdgpu_mn_unlock(p->mn); | ||
1201 | |||
1157 | return 0; | 1202 | return 0; |
1158 | } | 1203 | } |
1159 | 1204 | ||
1160 | int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | 1205 | int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) |
1161 | { | 1206 | { |
1162 | struct amdgpu_device *adev = dev->dev_private; | 1207 | struct amdgpu_device *adev = dev->dev_private; |
1163 | struct amdgpu_fpriv *fpriv = filp->driver_priv; | ||
1164 | union drm_amdgpu_cs *cs = data; | 1208 | union drm_amdgpu_cs *cs = data; |
1165 | struct amdgpu_cs_parser parser = {}; | 1209 | struct amdgpu_cs_parser parser = {}; |
1166 | bool reserved_buffers = false; | 1210 | bool reserved_buffers = false; |
@@ -1168,8 +1212,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | |||
1168 | 1212 | ||
1169 | if (!adev->accel_working) | 1213 | if (!adev->accel_working) |
1170 | return -EBUSY; | 1214 | return -EBUSY; |
1171 | if (amdgpu_kms_vram_lost(adev, fpriv)) | ||
1172 | return -ENODEV; | ||
1173 | 1215 | ||
1174 | parser.adev = adev; | 1216 | parser.adev = adev; |
1175 | parser.filp = filp; | 1217 | parser.filp = filp; |
@@ -1180,6 +1222,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | |||
1180 | goto out; | 1222 | goto out; |
1181 | } | 1223 | } |
1182 | 1224 | ||
1225 | r = amdgpu_cs_ib_fill(adev, &parser); | ||
1226 | if (r) | ||
1227 | goto out; | ||
1228 | |||
1183 | r = amdgpu_cs_parser_bos(&parser, data); | 1229 | r = amdgpu_cs_parser_bos(&parser, data); |
1184 | if (r) { | 1230 | if (r) { |
1185 | if (r == -ENOMEM) | 1231 | if (r == -ENOMEM) |
@@ -1190,9 +1236,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | |||
1190 | } | 1236 | } |
1191 | 1237 | ||
1192 | reserved_buffers = true; | 1238 | reserved_buffers = true; |
1193 | r = amdgpu_cs_ib_fill(adev, &parser); | ||
1194 | if (r) | ||
1195 | goto out; | ||
1196 | 1239 | ||
1197 | r = amdgpu_cs_dependencies(adev, &parser); | 1240 | r = amdgpu_cs_dependencies(adev, &parser); |
1198 | if (r) { | 1241 | if (r) { |
@@ -1228,16 +1271,12 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, | |||
1228 | { | 1271 | { |
1229 | union drm_amdgpu_wait_cs *wait = data; | 1272 | union drm_amdgpu_wait_cs *wait = data; |
1230 | struct amdgpu_device *adev = dev->dev_private; | 1273 | struct amdgpu_device *adev = dev->dev_private; |
1231 | struct amdgpu_fpriv *fpriv = filp->driver_priv; | ||
1232 | unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); | 1274 | unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); |
1233 | struct amdgpu_ring *ring = NULL; | 1275 | struct amdgpu_ring *ring = NULL; |
1234 | struct amdgpu_ctx *ctx; | 1276 | struct amdgpu_ctx *ctx; |
1235 | struct dma_fence *fence; | 1277 | struct dma_fence *fence; |
1236 | long r; | 1278 | long r; |
1237 | 1279 | ||
1238 | if (amdgpu_kms_vram_lost(adev, fpriv)) | ||
1239 | return -ENODEV; | ||
1240 | |||
1241 | ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); | 1280 | ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); |
1242 | if (ctx == NULL) | 1281 | if (ctx == NULL) |
1243 | return -EINVAL; | 1282 | return -EINVAL; |
@@ -1255,6 +1294,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, | |||
1255 | r = PTR_ERR(fence); | 1294 | r = PTR_ERR(fence); |
1256 | else if (fence) { | 1295 | else if (fence) { |
1257 | r = dma_fence_wait_timeout(fence, true, timeout); | 1296 | r = dma_fence_wait_timeout(fence, true, timeout); |
1297 | if (r > 0 && fence->error) | ||
1298 | r = fence->error; | ||
1258 | dma_fence_put(fence); | 1299 | dma_fence_put(fence); |
1259 | } else | 1300 | } else |
1260 | r = 1; | 1301 | r = 1; |
@@ -1302,6 +1343,62 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev, | |||
1302 | return fence; | 1343 | return fence; |
1303 | } | 1344 | } |
1304 | 1345 | ||
1346 | int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, | ||
1347 | struct drm_file *filp) | ||
1348 | { | ||
1349 | struct amdgpu_device *adev = dev->dev_private; | ||
1350 | union drm_amdgpu_fence_to_handle *info = data; | ||
1351 | struct dma_fence *fence; | ||
1352 | struct drm_syncobj *syncobj; | ||
1353 | struct sync_file *sync_file; | ||
1354 | int fd, r; | ||
1355 | |||
1356 | fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence); | ||
1357 | if (IS_ERR(fence)) | ||
1358 | return PTR_ERR(fence); | ||
1359 | |||
1360 | switch (info->in.what) { | ||
1361 | case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ: | ||
1362 | r = drm_syncobj_create(&syncobj, 0, fence); | ||
1363 | dma_fence_put(fence); | ||
1364 | if (r) | ||
1365 | return r; | ||
1366 | r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle); | ||
1367 | drm_syncobj_put(syncobj); | ||
1368 | return r; | ||
1369 | |||
1370 | case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD: | ||
1371 | r = drm_syncobj_create(&syncobj, 0, fence); | ||
1372 | dma_fence_put(fence); | ||
1373 | if (r) | ||
1374 | return r; | ||
1375 | r = drm_syncobj_get_fd(syncobj, (int*)&info->out.handle); | ||
1376 | drm_syncobj_put(syncobj); | ||
1377 | return r; | ||
1378 | |||
1379 | case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD: | ||
1380 | fd = get_unused_fd_flags(O_CLOEXEC); | ||
1381 | if (fd < 0) { | ||
1382 | dma_fence_put(fence); | ||
1383 | return fd; | ||
1384 | } | ||
1385 | |||
1386 | sync_file = sync_file_create(fence); | ||
1387 | dma_fence_put(fence); | ||
1388 | if (!sync_file) { | ||
1389 | put_unused_fd(fd); | ||
1390 | return -ENOMEM; | ||
1391 | } | ||
1392 | |||
1393 | fd_install(fd, sync_file->file); | ||
1394 | info->out.handle = fd; | ||
1395 | return 0; | ||
1396 | |||
1397 | default: | ||
1398 | return -EINVAL; | ||
1399 | } | ||
1400 | } | ||
1401 | |||
1305 | /** | 1402 | /** |
1306 | * amdgpu_cs_wait_all_fence - wait on all fences to signal | 1403 | * amdgpu_cs_wait_all_fence - wait on all fences to signal |
1307 | * | 1404 | * |
@@ -1336,6 +1433,9 @@ static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev, | |||
1336 | 1433 | ||
1337 | if (r == 0) | 1434 | if (r == 0) |
1338 | break; | 1435 | break; |
1436 | |||
1437 | if (fence->error) | ||
1438 | return fence->error; | ||
1339 | } | 1439 | } |
1340 | 1440 | ||
1341 | memset(wait, 0, sizeof(*wait)); | 1441 | memset(wait, 0, sizeof(*wait)); |
@@ -1381,6 +1481,7 @@ static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev, | |||
1381 | array[i] = fence; | 1481 | array[i] = fence; |
1382 | } else { /* NULL, the fence has been already signaled */ | 1482 | } else { /* NULL, the fence has been already signaled */ |
1383 | r = 1; | 1483 | r = 1; |
1484 | first = i; | ||
1384 | goto out; | 1485 | goto out; |
1385 | } | 1486 | } |
1386 | } | 1487 | } |
@@ -1395,7 +1496,7 @@ out: | |||
1395 | wait->out.status = (r > 0); | 1496 | wait->out.status = (r > 0); |
1396 | wait->out.first_signaled = first; | 1497 | wait->out.first_signaled = first; |
1397 | /* set return value 0 to indicate success */ | 1498 | /* set return value 0 to indicate success */ |
1398 | r = 0; | 1499 | r = array[first]->error; |
1399 | 1500 | ||
1400 | err_free_fence_array: | 1501 | err_free_fence_array: |
1401 | for (i = 0; i < fence_count; i++) | 1502 | for (i = 0; i < fence_count; i++) |
@@ -1416,15 +1517,12 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, | |||
1416 | struct drm_file *filp) | 1517 | struct drm_file *filp) |
1417 | { | 1518 | { |
1418 | struct amdgpu_device *adev = dev->dev_private; | 1519 | struct amdgpu_device *adev = dev->dev_private; |
1419 | struct amdgpu_fpriv *fpriv = filp->driver_priv; | ||
1420 | union drm_amdgpu_wait_fences *wait = data; | 1520 | union drm_amdgpu_wait_fences *wait = data; |
1421 | uint32_t fence_count = wait->in.fence_count; | 1521 | uint32_t fence_count = wait->in.fence_count; |
1422 | struct drm_amdgpu_fence *fences_user; | 1522 | struct drm_amdgpu_fence *fences_user; |
1423 | struct drm_amdgpu_fence *fences; | 1523 | struct drm_amdgpu_fence *fences; |
1424 | int r; | 1524 | int r; |
1425 | 1525 | ||
1426 | if (amdgpu_kms_vram_lost(adev, fpriv)) | ||
1427 | return -ENODEV; | ||
1428 | /* Get the fences from userspace */ | 1526 | /* Get the fences from userspace */ |
1429 | fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence), | 1527 | fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence), |
1430 | GFP_KERNEL); | 1528 | GFP_KERNEL); |
@@ -1460,78 +1558,36 @@ err_free_fences: | |||
1460 | * virtual memory address. Returns allocation structure when found, NULL | 1558 | * virtual memory address. Returns allocation structure when found, NULL |
1461 | * otherwise. | 1559 | * otherwise. |
1462 | */ | 1560 | */ |
1463 | struct amdgpu_bo_va_mapping * | 1561 | int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, |
1464 | amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, | 1562 | uint64_t addr, struct amdgpu_bo **bo, |
1465 | uint64_t addr, struct amdgpu_bo **bo) | 1563 | struct amdgpu_bo_va_mapping **map) |
1466 | { | 1564 | { |
1565 | struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; | ||
1566 | struct amdgpu_vm *vm = &fpriv->vm; | ||
1467 | struct amdgpu_bo_va_mapping *mapping; | 1567 | struct amdgpu_bo_va_mapping *mapping; |
1468 | unsigned i; | ||
1469 | |||
1470 | if (!parser->bo_list) | ||
1471 | return NULL; | ||
1472 | |||
1473 | addr /= AMDGPU_GPU_PAGE_SIZE; | ||
1474 | |||
1475 | for (i = 0; i < parser->bo_list->num_entries; i++) { | ||
1476 | struct amdgpu_bo_list_entry *lobj; | ||
1477 | |||
1478 | lobj = &parser->bo_list->array[i]; | ||
1479 | if (!lobj->bo_va) | ||
1480 | continue; | ||
1481 | |||
1482 | list_for_each_entry(mapping, &lobj->bo_va->valids, list) { | ||
1483 | if (mapping->start > addr || | ||
1484 | addr > mapping->last) | ||
1485 | continue; | ||
1486 | |||
1487 | *bo = lobj->bo_va->base.bo; | ||
1488 | return mapping; | ||
1489 | } | ||
1490 | |||
1491 | list_for_each_entry(mapping, &lobj->bo_va->invalids, list) { | ||
1492 | if (mapping->start > addr || | ||
1493 | addr > mapping->last) | ||
1494 | continue; | ||
1495 | |||
1496 | *bo = lobj->bo_va->base.bo; | ||
1497 | return mapping; | ||
1498 | } | ||
1499 | } | ||
1500 | |||
1501 | return NULL; | ||
1502 | } | ||
1503 | |||
1504 | /** | ||
1505 | * amdgpu_cs_sysvm_access_required - make BOs accessible by the system VM | ||
1506 | * | ||
1507 | * @parser: command submission parser context | ||
1508 | * | ||
1509 | * Helper for UVD/VCE VM emulation, make sure BOs are accessible by the system VM. | ||
1510 | */ | ||
1511 | int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser) | ||
1512 | { | ||
1513 | unsigned i; | ||
1514 | int r; | 1568 | int r; |
1515 | 1569 | ||
1516 | if (!parser->bo_list) | 1570 | addr /= AMDGPU_GPU_PAGE_SIZE; |
1517 | return 0; | ||
1518 | 1571 | ||
1519 | for (i = 0; i < parser->bo_list->num_entries; i++) { | 1572 | mapping = amdgpu_vm_bo_lookup_mapping(vm, addr); |
1520 | struct amdgpu_bo *bo = parser->bo_list->array[i].robj; | 1573 | if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo) |
1574 | return -EINVAL; | ||
1521 | 1575 | ||
1522 | r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem); | 1576 | *bo = mapping->bo_va->base.bo; |
1523 | if (unlikely(r)) | 1577 | *map = mapping; |
1524 | return r; | ||
1525 | 1578 | ||
1526 | if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) | 1579 | /* Double check that the BO is reserved by this CS */ |
1527 | continue; | 1580 | if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket) |
1581 | return -EINVAL; | ||
1528 | 1582 | ||
1529 | bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; | 1583 | if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { |
1530 | amdgpu_ttm_placement_from_domain(bo, bo->allowed_domains); | 1584 | (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; |
1531 | r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); | 1585 | amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains); |
1532 | if (unlikely(r)) | 1586 | r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, false, |
1587 | false); | ||
1588 | if (r) | ||
1533 | return r; | 1589 | return r; |
1534 | } | 1590 | } |
1535 | 1591 | ||
1536 | return 0; | 1592 | return amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem); |
1537 | } | 1593 | } |