aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2018-09-20 19:52:34 -0400
committerDave Airlie <airlied@redhat.com>2018-09-20 19:52:53 -0400
commit36c9c3c91128e2b892c9be0dd9ee9bd82cbe82ad (patch)
tree687db2e37b7fdcb4bd756a078812d049da18c804 /drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
parent0320ac5188eab5c6e8b92b110d1eae967ac272d2 (diff)
parent846311ae68f3c78365ebf3dff505c99e7da861cf (diff)
Merge branch 'drm-next-4.20' of git://people.freedesktop.org/~agd5f/linux into drm-next
This is a new pull for drm-next on top of last weeks with the following changes: - Fixed 64 bit divide - Fixed vram type on vega20 - Misc vega20 fixes - Misc DC fixes - Fix GDS/GWS/OA domain handling Previous changes from last week: amdgpu/kfd: - Picasso (new APU) support - Raven2 (new APU) support - Vega20 enablement - ACP powergating improvements - Add ABGR/XBGR display support - VCN JPEG engine support - Initial xGMI support - Use load balancing for engine scheduling - Lots of new documentation - Rework and clean up i2c and aux handling in DC - Add DP YCbCr 4:2:0 support in DC - Add DMCU firmware loading for Raven (used for ABM and PSR) - New debugfs features in DC - LVDS support in DC - Implement wave kill for gfx/compute (light weight reset for shaders) - Use AGP aperture to avoid gart mappings when possible - GPUVM performance improvements - Bulk moves for more efficient GPUVM LRU handling - Merge amdgpu and amdkfd into one module - Enable gfxoff and stutter mode on Raven - Misc cleanups Scheduler: - Load balancing support - Bug fixes ttm: - Bulk move functionality - Bug fixes radeon: - Misc cleanups Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexdeucher@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180920150438.12693-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c334
1 files changed, 166 insertions, 168 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 8f05e28607e9..b7ec2b0e5a9b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -32,38 +32,47 @@
32#include "amdgpu.h" 32#include "amdgpu.h"
33#include "amdgpu_trace.h" 33#include "amdgpu_trace.h"
34#include "amdgpu_gmc.h" 34#include "amdgpu_gmc.h"
35#include "amdgpu_gem.h"
35 36
36static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, 37static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
37 struct drm_amdgpu_cs_chunk_fence *data, 38 struct drm_amdgpu_cs_chunk_fence *data,
38 uint32_t *offset) 39 uint32_t *offset)
39{ 40{
40 struct drm_gem_object *gobj; 41 struct drm_gem_object *gobj;
42 struct amdgpu_bo *bo;
41 unsigned long size; 43 unsigned long size;
44 int r;
42 45
43 gobj = drm_gem_object_lookup(p->filp, data->handle); 46 gobj = drm_gem_object_lookup(p->filp, data->handle);
44 if (gobj == NULL) 47 if (gobj == NULL)
45 return -EINVAL; 48 return -EINVAL;
46 49
47 p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); 50 bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
48 p->uf_entry.priority = 0; 51 p->uf_entry.priority = 0;
49 p->uf_entry.tv.bo = &p->uf_entry.robj->tbo; 52 p->uf_entry.tv.bo = &bo->tbo;
50 p->uf_entry.tv.shared = true; 53 p->uf_entry.tv.shared = true;
51 p->uf_entry.user_pages = NULL; 54 p->uf_entry.user_pages = NULL;
52 55
53 size = amdgpu_bo_size(p->uf_entry.robj);
54 if (size != PAGE_SIZE || (data->offset + 8) > size)
55 return -EINVAL;
56
57 *offset = data->offset;
58
59 drm_gem_object_put_unlocked(gobj); 56 drm_gem_object_put_unlocked(gobj);
60 57
61 if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) { 58 size = amdgpu_bo_size(bo);
62 amdgpu_bo_unref(&p->uf_entry.robj); 59 if (size != PAGE_SIZE || (data->offset + 8) > size) {
63 return -EINVAL; 60 r = -EINVAL;
61 goto error_unref;
62 }
63
64 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
65 r = -EINVAL;
66 goto error_unref;
64 } 67 }
65 68
69 *offset = data->offset;
70
66 return 0; 71 return 0;
72
73error_unref:
74 amdgpu_bo_unref(&bo);
75 return r;
67} 76}
68 77
69static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p, 78static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
@@ -221,7 +230,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
221 goto free_all_kdata; 230 goto free_all_kdata;
222 } 231 }
223 232
224 if (p->uf_entry.robj) 233 if (p->uf_entry.tv.bo)
225 p->job->uf_addr = uf_offset; 234 p->job->uf_addr = uf_offset;
226 kfree(chunk_array); 235 kfree(chunk_array);
227 236
@@ -450,13 +459,13 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
450 p->evictable = list_prev_entry(p->evictable, tv.head)) { 459 p->evictable = list_prev_entry(p->evictable, tv.head)) {
451 460
452 struct amdgpu_bo_list_entry *candidate = p->evictable; 461 struct amdgpu_bo_list_entry *candidate = p->evictable;
453 struct amdgpu_bo *bo = candidate->robj; 462 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo);
454 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 463 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
455 bool update_bytes_moved_vis; 464 bool update_bytes_moved_vis;
456 uint32_t other; 465 uint32_t other;
457 466
458 /* If we reached our current BO we can forget it */ 467 /* If we reached our current BO we can forget it */
459 if (candidate->robj == validated) 468 if (bo == validated)
460 break; 469 break;
461 470
462 /* We can't move pinned BOs here */ 471 /* We can't move pinned BOs here */
@@ -521,7 +530,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
521 int r; 530 int r;
522 531
523 list_for_each_entry(lobj, validated, tv.head) { 532 list_for_each_entry(lobj, validated, tv.head) {
524 struct amdgpu_bo *bo = lobj->robj; 533 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
525 bool binding_userptr = false; 534 bool binding_userptr = false;
526 struct mm_struct *usermm; 535 struct mm_struct *usermm;
527 536
@@ -596,7 +605,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
596 INIT_LIST_HEAD(&duplicates); 605 INIT_LIST_HEAD(&duplicates);
597 amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); 606 amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
598 607
599 if (p->uf_entry.robj && !p->uf_entry.robj->parent) 608 if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
600 list_add(&p->uf_entry.tv.head, &p->validated); 609 list_add(&p->uf_entry.tv.head, &p->validated);
601 610
602 while (1) { 611 while (1) {
@@ -612,7 +621,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
612 621
613 INIT_LIST_HEAD(&need_pages); 622 INIT_LIST_HEAD(&need_pages);
614 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 623 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
615 struct amdgpu_bo *bo = e->robj; 624 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
616 625
617 if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, 626 if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
618 &e->user_invalidated) && e->user_pages) { 627 &e->user_invalidated) && e->user_pages) {
@@ -631,7 +640,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
631 list_del(&e->tv.head); 640 list_del(&e->tv.head);
632 list_add(&e->tv.head, &need_pages); 641 list_add(&e->tv.head, &need_pages);
633 642
634 amdgpu_bo_unreserve(e->robj); 643 amdgpu_bo_unreserve(bo);
635 } 644 }
636 } 645 }
637 646
@@ -650,7 +659,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
650 659
651 /* Fill the page arrays for all userptrs. */ 660 /* Fill the page arrays for all userptrs. */
652 list_for_each_entry(e, &need_pages, tv.head) { 661 list_for_each_entry(e, &need_pages, tv.head) {
653 struct ttm_tt *ttm = e->robj->tbo.ttm; 662 struct ttm_tt *ttm = e->tv.bo->ttm;
654 663
655 e->user_pages = kvmalloc_array(ttm->num_pages, 664 e->user_pages = kvmalloc_array(ttm->num_pages,
656 sizeof(struct page*), 665 sizeof(struct page*),
@@ -709,23 +718,23 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
709 oa = p->bo_list->oa_obj; 718 oa = p->bo_list->oa_obj;
710 719
711 amdgpu_bo_list_for_each_entry(e, p->bo_list) 720 amdgpu_bo_list_for_each_entry(e, p->bo_list)
712 e->bo_va = amdgpu_vm_bo_find(vm, e->robj); 721 e->bo_va = amdgpu_vm_bo_find(vm, ttm_to_amdgpu_bo(e->tv.bo));
713 722
714 if (gds) { 723 if (gds) {
715 p->job->gds_base = amdgpu_bo_gpu_offset(gds); 724 p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
716 p->job->gds_size = amdgpu_bo_size(gds); 725 p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
717 } 726 }
718 if (gws) { 727 if (gws) {
719 p->job->gws_base = amdgpu_bo_gpu_offset(gws); 728 p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
720 p->job->gws_size = amdgpu_bo_size(gws); 729 p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
721 } 730 }
722 if (oa) { 731 if (oa) {
723 p->job->oa_base = amdgpu_bo_gpu_offset(oa); 732 p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
724 p->job->oa_size = amdgpu_bo_size(oa); 733 p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
725 } 734 }
726 735
727 if (!r && p->uf_entry.robj) { 736 if (!r && p->uf_entry.tv.bo) {
728 struct amdgpu_bo *uf = p->uf_entry.robj; 737 struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
729 738
730 r = amdgpu_ttm_alloc_gart(&uf->tbo); 739 r = amdgpu_ttm_alloc_gart(&uf->tbo);
731 p->job->uf_addr += amdgpu_bo_gpu_offset(uf); 740 p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
@@ -741,8 +750,7 @@ error_free_pages:
741 if (!e->user_pages) 750 if (!e->user_pages)
742 continue; 751 continue;
743 752
744 release_pages(e->user_pages, 753 release_pages(e->user_pages, e->tv.bo->ttm->num_pages);
745 e->robj->tbo.ttm->num_pages);
746 kvfree(e->user_pages); 754 kvfree(e->user_pages);
747 } 755 }
748 756
@@ -755,9 +763,11 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
755 int r; 763 int r;
756 764
757 list_for_each_entry(e, &p->validated, tv.head) { 765 list_for_each_entry(e, &p->validated, tv.head) {
758 struct reservation_object *resv = e->robj->tbo.resv; 766 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
767 struct reservation_object *resv = bo->tbo.resv;
768
759 r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp, 769 r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp,
760 amdgpu_bo_explicit_sync(e->robj)); 770 amdgpu_bo_explicit_sync(bo));
761 771
762 if (r) 772 if (r)
763 return r; 773 return r;
@@ -800,11 +810,16 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
800 kfree(parser->chunks); 810 kfree(parser->chunks);
801 if (parser->job) 811 if (parser->job)
802 amdgpu_job_free(parser->job); 812 amdgpu_job_free(parser->job);
803 amdgpu_bo_unref(&parser->uf_entry.robj); 813 if (parser->uf_entry.tv.bo) {
814 struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
815
816 amdgpu_bo_unref(&uf);
817 }
804} 818}
805 819
806static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) 820static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
807{ 821{
822 struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
808 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 823 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
809 struct amdgpu_device *adev = p->adev; 824 struct amdgpu_device *adev = p->adev;
810 struct amdgpu_vm *vm = &fpriv->vm; 825 struct amdgpu_vm *vm = &fpriv->vm;
@@ -813,6 +828,71 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
813 struct amdgpu_bo *bo; 828 struct amdgpu_bo *bo;
814 int r; 829 int r;
815 830
831 /* Only for UVD/VCE VM emulation */
832 if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) {
833 unsigned i, j;
834
835 for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
836 struct drm_amdgpu_cs_chunk_ib *chunk_ib;
837 struct amdgpu_bo_va_mapping *m;
838 struct amdgpu_bo *aobj = NULL;
839 struct amdgpu_cs_chunk *chunk;
840 uint64_t offset, va_start;
841 struct amdgpu_ib *ib;
842 uint8_t *kptr;
843
844 chunk = &p->chunks[i];
845 ib = &p->job->ibs[j];
846 chunk_ib = chunk->kdata;
847
848 if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
849 continue;
850
851 va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK;
852 r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
853 if (r) {
854 DRM_ERROR("IB va_start is invalid\n");
855 return r;
856 }
857
858 if ((va_start + chunk_ib->ib_bytes) >
859 (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
860 DRM_ERROR("IB va_start+ib_bytes is invalid\n");
861 return -EINVAL;
862 }
863
864 /* the IB should be reserved at this point */
865 r = amdgpu_bo_kmap(aobj, (void **)&kptr);
866 if (r) {
867 return r;
868 }
869
870 offset = m->start * AMDGPU_GPU_PAGE_SIZE;
871 kptr += va_start - offset;
872
873 if (ring->funcs->parse_cs) {
874 memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
875 amdgpu_bo_kunmap(aobj);
876
877 r = amdgpu_ring_parse_cs(ring, p, j);
878 if (r)
879 return r;
880 } else {
881 ib->ptr = (uint32_t *)kptr;
882 r = amdgpu_ring_patch_cs_in_place(ring, p, j);
883 amdgpu_bo_kunmap(aobj);
884 if (r)
885 return r;
886 }
887
888 j++;
889 }
890 }
891
892 if (!p->job->vm)
893 return amdgpu_cs_sync_rings(p);
894
895
816 r = amdgpu_vm_clear_freed(adev, vm, NULL); 896 r = amdgpu_vm_clear_freed(adev, vm, NULL);
817 if (r) 897 if (r)
818 return r; 898 return r;
@@ -845,7 +925,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
845 struct dma_fence *f; 925 struct dma_fence *f;
846 926
847 /* ignore duplicates */ 927 /* ignore duplicates */
848 bo = e->robj; 928 bo = ttm_to_amdgpu_bo(e->tv.bo);
849 if (!bo) 929 if (!bo)
850 continue; 930 continue;
851 931
@@ -875,101 +955,25 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
875 if (r) 955 if (r)
876 return r; 956 return r;
877 957
958 r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
959 if (r)
960 return r;
961
962 p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
963
878 if (amdgpu_vm_debug) { 964 if (amdgpu_vm_debug) {
879 /* Invalidate all BOs to test for userspace bugs */ 965 /* Invalidate all BOs to test for userspace bugs */
880 amdgpu_bo_list_for_each_entry(e, p->bo_list) { 966 amdgpu_bo_list_for_each_entry(e, p->bo_list) {
881 /* ignore duplicates */ 967 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
882 if (!e->robj)
883 continue;
884
885 amdgpu_vm_bo_invalidate(adev, e->robj, false);
886 }
887 }
888
889 return r;
890}
891
892static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
893 struct amdgpu_cs_parser *p)
894{
895 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
896 struct amdgpu_vm *vm = &fpriv->vm;
897 struct amdgpu_ring *ring = p->ring;
898 int r;
899
900 /* Only for UVD/VCE VM emulation */
901 if (p->ring->funcs->parse_cs || p->ring->funcs->patch_cs_in_place) {
902 unsigned i, j;
903 968
904 for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { 969 /* ignore duplicates */
905 struct drm_amdgpu_cs_chunk_ib *chunk_ib; 970 if (!bo)
906 struct amdgpu_bo_va_mapping *m;
907 struct amdgpu_bo *aobj = NULL;
908 struct amdgpu_cs_chunk *chunk;
909 uint64_t offset, va_start;
910 struct amdgpu_ib *ib;
911 uint8_t *kptr;
912
913 chunk = &p->chunks[i];
914 ib = &p->job->ibs[j];
915 chunk_ib = chunk->kdata;
916
917 if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
918 continue; 971 continue;
919 972
920 va_start = chunk_ib->va_start & AMDGPU_VA_HOLE_MASK; 973 amdgpu_vm_bo_invalidate(adev, bo, false);
921 r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
922 if (r) {
923 DRM_ERROR("IB va_start is invalid\n");
924 return r;
925 }
926
927 if ((va_start + chunk_ib->ib_bytes) >
928 (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
929 DRM_ERROR("IB va_start+ib_bytes is invalid\n");
930 return -EINVAL;
931 }
932
933 /* the IB should be reserved at this point */
934 r = amdgpu_bo_kmap(aobj, (void **)&kptr);
935 if (r) {
936 return r;
937 }
938
939 offset = m->start * AMDGPU_GPU_PAGE_SIZE;
940 kptr += va_start - offset;
941
942 if (p->ring->funcs->parse_cs) {
943 memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
944 amdgpu_bo_kunmap(aobj);
945
946 r = amdgpu_ring_parse_cs(ring, p, j);
947 if (r)
948 return r;
949 } else {
950 ib->ptr = (uint32_t *)kptr;
951 r = amdgpu_ring_patch_cs_in_place(ring, p, j);
952 amdgpu_bo_kunmap(aobj);
953 if (r)
954 return r;
955 }
956
957 j++;
958 } 974 }
959 } 975 }
960 976
961 if (p->job->vm) {
962 p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
963
964 r = amdgpu_bo_vm_update_pte(p);
965 if (r)
966 return r;
967
968 r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
969 if (r)
970 return r;
971 }
972
973 return amdgpu_cs_sync_rings(p); 977 return amdgpu_cs_sync_rings(p);
974} 978}
975 979
@@ -978,14 +982,15 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
978{ 982{
979 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; 983 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
980 struct amdgpu_vm *vm = &fpriv->vm; 984 struct amdgpu_vm *vm = &fpriv->vm;
981 int i, j;
982 int r, ce_preempt = 0, de_preempt = 0; 985 int r, ce_preempt = 0, de_preempt = 0;
986 struct amdgpu_ring *ring;
987 int i, j;
983 988
984 for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) { 989 for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
985 struct amdgpu_cs_chunk *chunk; 990 struct amdgpu_cs_chunk *chunk;
986 struct amdgpu_ib *ib; 991 struct amdgpu_ib *ib;
987 struct drm_amdgpu_cs_chunk_ib *chunk_ib; 992 struct drm_amdgpu_cs_chunk_ib *chunk_ib;
988 struct amdgpu_ring *ring; 993 struct drm_sched_entity *entity;
989 994
990 chunk = &parser->chunks[i]; 995 chunk = &parser->chunks[i];
991 ib = &parser->job->ibs[j]; 996 ib = &parser->job->ibs[j];
@@ -1007,8 +1012,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
1007 return -EINVAL; 1012 return -EINVAL;
1008 } 1013 }
1009 1014
1010 r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type, 1015 r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
1011 chunk_ib->ip_instance, chunk_ib->ring, &ring); 1016 chunk_ib->ip_instance, chunk_ib->ring,
1017 &entity);
1012 if (r) 1018 if (r)
1013 return r; 1019 return r;
1014 1020
@@ -1016,14 +1022,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
1016 parser->job->preamble_status |= 1022 parser->job->preamble_status |=
1017 AMDGPU_PREAMBLE_IB_PRESENT; 1023 AMDGPU_PREAMBLE_IB_PRESENT;
1018 1024
1019 if (parser->ring && parser->ring != ring) 1025 if (parser->entity && parser->entity != entity)
1020 return -EINVAL; 1026 return -EINVAL;
1021 1027
1022 parser->ring = ring; 1028 parser->entity = entity;
1023 1029
1024 r = amdgpu_ib_get(adev, vm, 1030 ring = to_amdgpu_ring(entity->rq->sched);
1025 ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0, 1031 r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
1026 ib); 1032 chunk_ib->ib_bytes : 0, ib);
1027 if (r) { 1033 if (r) {
1028 DRM_ERROR("Failed to get ib !\n"); 1034 DRM_ERROR("Failed to get ib !\n");
1029 return r; 1035 return r;
@@ -1037,12 +1043,13 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
1037 } 1043 }
1038 1044
1039 /* UVD & VCE fw doesn't support user fences */ 1045 /* UVD & VCE fw doesn't support user fences */
1046 ring = to_amdgpu_ring(parser->entity->rq->sched);
1040 if (parser->job->uf_addr && ( 1047 if (parser->job->uf_addr && (
1041 parser->ring->funcs->type == AMDGPU_RING_TYPE_UVD || 1048 ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
1042 parser->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) 1049 ring->funcs->type == AMDGPU_RING_TYPE_VCE))
1043 return -EINVAL; 1050 return -EINVAL;
1044 1051
1045 return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->ring->idx); 1052 return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
1046} 1053}
1047 1054
1048static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, 1055static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
@@ -1058,24 +1065,23 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
1058 sizeof(struct drm_amdgpu_cs_chunk_dep); 1065 sizeof(struct drm_amdgpu_cs_chunk_dep);
1059 1066
1060 for (i = 0; i < num_deps; ++i) { 1067 for (i = 0; i < num_deps; ++i) {
1061 struct amdgpu_ring *ring;
1062 struct amdgpu_ctx *ctx; 1068 struct amdgpu_ctx *ctx;
1069 struct drm_sched_entity *entity;
1063 struct dma_fence *fence; 1070 struct dma_fence *fence;
1064 1071
1065 ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id); 1072 ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
1066 if (ctx == NULL) 1073 if (ctx == NULL)
1067 return -EINVAL; 1074 return -EINVAL;
1068 1075
1069 r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr, 1076 r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
1070 deps[i].ip_type, 1077 deps[i].ip_instance,
1071 deps[i].ip_instance, 1078 deps[i].ring, &entity);
1072 deps[i].ring, &ring);
1073 if (r) { 1079 if (r) {
1074 amdgpu_ctx_put(ctx); 1080 amdgpu_ctx_put(ctx);
1075 return r; 1081 return r;
1076 } 1082 }
1077 1083
1078 fence = amdgpu_ctx_get_fence(ctx, ring, 1084 fence = amdgpu_ctx_get_fence(ctx, entity,
1079 deps[i].handle); 1085 deps[i].handle);
1080 if (IS_ERR(fence)) { 1086 if (IS_ERR(fence)) {
1081 r = PTR_ERR(fence); 1087 r = PTR_ERR(fence);
@@ -1194,9 +1200,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1194 union drm_amdgpu_cs *cs) 1200 union drm_amdgpu_cs *cs)
1195{ 1201{
1196 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 1202 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
1197 struct amdgpu_ring *ring = p->ring; 1203 struct drm_sched_entity *entity = p->entity;
1198 struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
1199 enum drm_sched_priority priority; 1204 enum drm_sched_priority priority;
1205 struct amdgpu_ring *ring;
1200 struct amdgpu_bo_list_entry *e; 1206 struct amdgpu_bo_list_entry *e;
1201 struct amdgpu_job *job; 1207 struct amdgpu_job *job;
1202 uint64_t seq; 1208 uint64_t seq;
@@ -1213,7 +1219,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1213 /* No memory allocation is allowed while holding the mn lock */ 1219 /* No memory allocation is allowed while holding the mn lock */
1214 amdgpu_mn_lock(p->mn); 1220 amdgpu_mn_lock(p->mn);
1215 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 1221 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
1216 struct amdgpu_bo *bo = e->robj; 1222 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
1217 1223
1218 if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { 1224 if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
1219 r = -ERESTARTSYS; 1225 r = -ERESTARTSYS;
@@ -1224,15 +1230,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1224 job->owner = p->filp; 1230 job->owner = p->filp;
1225 p->fence = dma_fence_get(&job->base.s_fence->finished); 1231 p->fence = dma_fence_get(&job->base.s_fence->finished);
1226 1232
1227 r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq); 1233 amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
1228 if (r) {
1229 dma_fence_put(p->fence);
1230 dma_fence_put(&job->base.s_fence->finished);
1231 amdgpu_job_free(job);
1232 amdgpu_mn_unlock(p->mn);
1233 return r;
1234 }
1235
1236 amdgpu_cs_post_dependencies(p); 1234 amdgpu_cs_post_dependencies(p);
1237 1235
1238 if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && 1236 if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
@@ -1254,6 +1252,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1254 ring = to_amdgpu_ring(entity->rq->sched); 1252 ring = to_amdgpu_ring(entity->rq->sched);
1255 amdgpu_ring_priority_get(ring, priority); 1253 amdgpu_ring_priority_get(ring, priority);
1256 1254
1255 amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
1256
1257 ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); 1257 ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
1258 amdgpu_mn_unlock(p->mn); 1258 amdgpu_mn_unlock(p->mn);
1259 1259
@@ -1293,6 +1293,12 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
1293 if (r) 1293 if (r)
1294 goto out; 1294 goto out;
1295 1295
1296 r = amdgpu_cs_dependencies(adev, &parser);
1297 if (r) {
1298 DRM_ERROR("Failed in the dependencies handling %d!\n", r);
1299 goto out;
1300 }
1301
1296 r = amdgpu_cs_parser_bos(&parser, data); 1302 r = amdgpu_cs_parser_bos(&parser, data);
1297 if (r) { 1303 if (r) {
1298 if (r == -ENOMEM) 1304 if (r == -ENOMEM)
@@ -1304,16 +1310,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
1304 1310
1305 reserved_buffers = true; 1311 reserved_buffers = true;
1306 1312
1307 r = amdgpu_cs_dependencies(adev, &parser);
1308 if (r) {
1309 DRM_ERROR("Failed in the dependencies handling %d!\n", r);
1310 goto out;
1311 }
1312
1313 for (i = 0; i < parser.job->num_ibs; i++) 1313 for (i = 0; i < parser.job->num_ibs; i++)
1314 trace_amdgpu_cs(&parser, i); 1314 trace_amdgpu_cs(&parser, i);
1315 1315
1316 r = amdgpu_cs_ib_vm_chunk(adev, &parser); 1316 r = amdgpu_cs_vm_handling(&parser);
1317 if (r) 1317 if (r)
1318 goto out; 1318 goto out;
1319 1319
@@ -1337,9 +1337,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
1337 struct drm_file *filp) 1337 struct drm_file *filp)
1338{ 1338{
1339 union drm_amdgpu_wait_cs *wait = data; 1339 union drm_amdgpu_wait_cs *wait = data;
1340 struct amdgpu_device *adev = dev->dev_private;
1341 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); 1340 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
1342 struct amdgpu_ring *ring = NULL; 1341 struct drm_sched_entity *entity;
1343 struct amdgpu_ctx *ctx; 1342 struct amdgpu_ctx *ctx;
1344 struct dma_fence *fence; 1343 struct dma_fence *fence;
1345 long r; 1344 long r;
@@ -1348,15 +1347,14 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
1348 if (ctx == NULL) 1347 if (ctx == NULL)
1349 return -EINVAL; 1348 return -EINVAL;
1350 1349
1351 r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, 1350 r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
1352 wait->in.ip_type, wait->in.ip_instance, 1351 wait->in.ring, &entity);
1353 wait->in.ring, &ring);
1354 if (r) { 1352 if (r) {
1355 amdgpu_ctx_put(ctx); 1353 amdgpu_ctx_put(ctx);
1356 return r; 1354 return r;
1357 } 1355 }
1358 1356
1359 fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); 1357 fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
1360 if (IS_ERR(fence)) 1358 if (IS_ERR(fence))
1361 r = PTR_ERR(fence); 1359 r = PTR_ERR(fence);
1362 else if (fence) { 1360 else if (fence) {
@@ -1388,7 +1386,7 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
1388 struct drm_file *filp, 1386 struct drm_file *filp,
1389 struct drm_amdgpu_fence *user) 1387 struct drm_amdgpu_fence *user)
1390{ 1388{
1391 struct amdgpu_ring *ring; 1389 struct drm_sched_entity *entity;
1392 struct amdgpu_ctx *ctx; 1390 struct amdgpu_ctx *ctx;
1393 struct dma_fence *fence; 1391 struct dma_fence *fence;
1394 int r; 1392 int r;
@@ -1397,14 +1395,14 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
1397 if (ctx == NULL) 1395 if (ctx == NULL)
1398 return ERR_PTR(-EINVAL); 1396 return ERR_PTR(-EINVAL);
1399 1397
1400 r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type, 1398 r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
1401 user->ip_instance, user->ring, &ring); 1399 user->ring, &entity);
1402 if (r) { 1400 if (r) {
1403 amdgpu_ctx_put(ctx); 1401 amdgpu_ctx_put(ctx);
1404 return ERR_PTR(r); 1402 return ERR_PTR(r);
1405 } 1403 }
1406 1404
1407 fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no); 1405 fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
1408 amdgpu_ctx_put(ctx); 1406 amdgpu_ctx_put(ctx);
1409 1407
1410 return fence; 1408 return fence;