diff options
author | Dave Airlie <airlied@redhat.com> | 2018-09-20 19:52:34 -0400 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2018-09-20 19:52:53 -0400 |
commit | 36c9c3c91128e2b892c9be0dd9ee9bd82cbe82ad (patch) | |
tree | 687db2e37b7fdcb4bd756a078812d049da18c804 /drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | |
parent | 0320ac5188eab5c6e8b92b110d1eae967ac272d2 (diff) | |
parent | 846311ae68f3c78365ebf3dff505c99e7da861cf (diff) |
Merge branch 'drm-next-4.20' of git://people.freedesktop.org/~agd5f/linux into drm-next
This is a new pull for drm-next on top of last weeks with the following
changes:
- Fixed 64 bit divide
- Fixed vram type on vega20
- Misc vega20 fixes
- Misc DC fixes
- Fix GDS/GWS/OA domain handling
Previous changes from last week:
amdgpu/kfd:
- Picasso (new APU) support
- Raven2 (new APU) support
- Vega20 enablement
- ACP powergating improvements
- Add ABGR/XBGR display support
- VCN JPEG engine support
- Initial xGMI support
- Use load balancing for engine scheduling
- Lots of new documentation
- Rework and clean up i2c and aux handling in DC
- Add DP YCbCr 4:2:0 support in DC
- Add DMCU firmware loading for Raven (used for ABM and PSR)
- New debugfs features in DC
- LVDS support in DC
- Implement wave kill for gfx/compute (light weight reset for shaders)
- Use AGP aperture to avoid gart mappings when possible
- GPUVM performance improvements
- Bulk moves for more efficient GPUVM LRU handling
- Merge amdgpu and amdkfd into one module
- Enable gfxoff and stutter mode on Raven
- Misc cleanups
Scheduler:
- Load balancing support
- Bug fixes
ttm:
- Bulk move functionality
- Bug fixes
radeon:
- Misc cleanups
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180920150438.12693-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 334 |
1 files changed, 166 insertions, 168 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8f05e28607e9..b7ec2b0e5a9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | |||
@@ -32,38 +32,47 @@ | |||
32 | #include "amdgpu.h" | 32 | #include "amdgpu.h" |
33 | #include "amdgpu_trace.h" | 33 | #include "amdgpu_trace.h" |
34 | #include "amdgpu_gmc.h" | 34 | #include "amdgpu_gmc.h" |
35 | #include "amdgpu_gem.h" | ||
35 | 36 | ||
36 | static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, | 37 | static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, |
37 | struct drm_amdgpu_cs_chunk_fence *data, | 38 | struct drm_amdgpu_cs_chunk_fence *data, |
38 | uint32_t *offset) | 39 | uint32_t *offset) |
39 | { | 40 | { |
40 | struct drm_gem_object *gobj; | 41 | struct drm_gem_object *gobj; |
42 | struct amdgpu_bo *bo; | ||
41 | unsigned long size; | 43 | unsigned long size; |
44 | int r; | ||
42 | 45 | ||
43 | gobj = drm_gem_object_lookup(p->filp, data->handle); | 46 | gobj = drm_gem_object_lookup(p->filp, data->handle); |
44 | if (gobj == NULL) | 47 | if (gobj == NULL) |
45 | return -EINVAL; | 48 | return -EINVAL; |
46 | 49 | ||
47 | p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); | 50 | bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); |
48 | p->uf_entry.priority = 0; | 51 | p->uf_entry.priority = 0; |
49 | p->uf_entry.tv.bo = &p->uf_entry.robj->tbo; | 52 | p->uf_entry.tv.bo = &bo->tbo; |
50 | p->uf_entry.tv.shared = true; | 53 | p->uf_entry.tv.shared = true; |
51 | p->uf_entry.user_pages = NULL; | 54 | p->uf_entry.user_pages = NULL; |
52 | 55 | ||
53 | size = amdgpu_bo_size(p->uf_entry.robj); | ||
54 | if (size != PAGE_SIZE || (data->offset + 8) > size) | ||
55 | return -EINVAL; | ||
56 | |||
57 | *offset = data->offset; | ||
58 | |||
59 | drm_gem_object_put_unlocked(gobj); | 56 | drm_gem_object_put_unlocked(gobj); |
60 | 57 | ||
61 | if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) { | 58 | size = amdgpu_bo_size(bo); |
62 | amdgpu_bo_unref(&p->uf_entry.robj); | 59 | if (size != PAGE_SIZE || (data->offset + 8) > size) { |
63 | return -EINVAL; | 60 | r = -EINVAL; |
61 | goto error_unref; | ||
62 | } | ||
63 | |||
64 | if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { | ||
65 | r = -EINVAL; | ||
66 | goto error_unref; | ||
64 | } | 67 | } |
65 | 68 | ||
69 | *offset = data->offset; | ||
70 | |||
66 | return 0; | 71 | return 0; |
72 | |||
73 | error_unref: | ||
74 | amdgpu_bo_unref(&bo); | ||
75 | return r; | ||
67 | } | 76 | } |
68 | 77 | ||
69 | static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p, | 78 | static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p, |
@@ -221,7 +230,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs | |||
221 | goto free_all_kdata; | 230 | goto free_all_kdata; |
222 | } | 231 | } |
223 | 232 | ||
224 | if (p->uf_entry.robj) | 233 | if (p->uf_entry.tv.bo) |
225 | p->job->uf_addr = uf_offset; | 234 | p->job->uf_addr = uf_offset; |
226 | kfree(chunk_array); | 235 | kfree(chunk_array); |
227 | 236 | ||
@@ -450,13 +459,13 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, | |||
450 | p->evictable = list_prev_entry(p->evictable, tv.head)) { | 459 | p->evictable = list_prev_entry(p->evictable, tv.head)) { |
451 | 460 | ||
452 | struct amdgpu_bo_list_entry *candidate = p->evictable; | 461 | struct amdgpu_bo_list_entry *candidate = p->evictable; |
453 | struct amdgpu_bo *bo = candidate->robj; | 462 | struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo); |
454 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); | 463 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); |
455 | bool update_bytes_moved_vis; | 464 | bool update_bytes_moved_vis; |
456 | uint32_t other; | 465 | uint32_t other; |
457 | 466 | ||
458 | /* If we reached our current BO we can forget it */ | 467 | /* If we reached our current BO we can forget it */ |
459 | if (candidate->robj == validated) | 468 | if (bo == validated) |
460 | break; | 469 | break; |
461 | 470 | ||
462 | /* We can't move pinned BOs here */ | 471 | /* We can't move pinned BOs here */ |
@@ -521,7 +530,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, | |||
521 | int r; | 530 | int r; |
522 | 531 | ||
523 | list_for_each_entry(lobj, validated, tv.head) { | 532 | list_for_each_entry(lobj, validated, tv.head) { |
524 | struct amdgpu_bo *bo = lobj->robj; | 533 | struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo); |
525 | bool binding_userptr = false; | 534 | bool binding_userptr = false; |
526 | struct mm_struct *usermm; | 535 | struct mm_struct *usermm; |
527 | 536 | ||
@@ -596,7 +605,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
596 | INIT_LIST_HEAD(&duplicates); | 605 | INIT_LIST_HEAD(&duplicates); |
597 | amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); | 606 | amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); |
598 | 607 | ||
599 | if (p->uf_entry.robj && !p->uf_entry.robj->parent) | 608 | if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent) |
600 | list_add(&p->uf_entry.tv.head, &p->validated); | 609 | list_add(&p->uf_entry.tv.head, &p->validated); |
601 | 610 | ||
602 | while (1) { | 611 | while (1) { |
@@ -612,7 +621,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
612 | 621 | ||
613 | INIT_LIST_HEAD(&need_pages); | 622 | INIT_LIST_HEAD(&need_pages); |
614 | amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { | 623 | amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { |
615 | struct amdgpu_bo *bo = e->robj; | 624 | struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); |
616 | 625 | ||
617 | if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, | 626 | if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, |
618 | &e->user_invalidated) && e->user_pages) { | 627 | &e->user_invalidated) && e->user_pages) { |
@@ -631,7 +640,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
631 | list_del(&e->tv.head); | 640 | list_del(&e->tv.head); |
632 | list_add(&e->tv.head, &need_pages); | 641 | list_add(&e->tv.head, &need_pages); |
633 | 642 | ||
634 | amdgpu_bo_unreserve(e->robj); | 643 | amdgpu_bo_unreserve(bo); |
635 | } | 644 | } |
636 | } | 645 | } |
637 | 646 | ||
@@ -650,7 +659,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
650 | 659 | ||
651 | /* Fill the page arrays for all userptrs. */ | 660 | /* Fill the page arrays for all userptrs. */ |
652 | list_for_each_entry(e, &need_pages, tv.head) { | 661 | list_for_each_entry(e, &need_pages, tv.head) { |
653 | struct ttm_tt *ttm = e->robj->tbo.ttm; | 662 | struct ttm_tt *ttm = e->tv.bo->ttm; |
654 | 663 | ||
655 | e->user_pages = kvmalloc_array(ttm->num_pages, | 664 | e->user_pages = kvmalloc_array(ttm->num_pages, |
656 | sizeof(struct page*), | 665 | sizeof(struct page*), |
@@ -709,23 +718,23 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
709 | oa = p->bo_list->oa_obj; | 718 | oa = p->bo_list->oa_obj; |
710 | 719 | ||
711 | amdgpu_bo_list_for_each_entry(e, p->bo_list) | 720 | amdgpu_bo_list_for_each_entry(e, p->bo_list) |
712 | e->bo_va = amdgpu_vm_bo_find(vm, e->robj); | 721 | e->bo_va = amdgpu_vm_bo_find(vm, ttm_to_amdgpu_bo(e->tv.bo)); |
713 | 722 | ||
714 | if (gds) { | 723 | if (gds) { |
715 | p->job->gds_base = amdgpu_bo_gpu_offset(gds); | 724 | p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT; |
716 | p->job->gds_size = amdgpu_bo_size(gds); | 725 | p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT; |
717 | } | 726 | } |
718 | if (gws) { | 727 | if (gws) { |
719 | p->job->gws_base = amdgpu_bo_gpu_offset(gws); | 728 | p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT; |
720 | p->job->gws_size = amdgpu_bo_size(gws); | 729 | p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT; |
721 | } | 730 | } |
722 | if (oa) { | 731 | if (oa) { |
723 | p->job->oa_base = amdgpu_bo_gpu_offset(oa); | 732 | p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT; |
724 | p->job->oa_size = amdgpu_bo_size(oa); | 733 | p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT; |
725 | } | 734 | } |
726 | 735 | ||
727 | if (!r && p->uf_entry.robj) { | 736 | if (!r && p->uf_entry.tv.bo) { |
728 | struct amdgpu_bo *uf = p->uf_entry.robj; | 737 | struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo); |
729 | 738 | ||
730 | r = amdgpu_ttm_alloc_gart(&uf->tbo); | 739 | r = amdgpu_ttm_alloc_gart(&uf->tbo); |
731 | p->job->uf_addr += amdgpu_bo_gpu_offset(uf); | 740 | p->job->uf_addr += amdgpu_bo_gpu_offset(uf); |
@@ -741,8 +750,7 @@ error_free_pages: | |||
741 | if (!e->user_pages) | 750 | if (!e->user_pages) |
742 | continue; | 751 | continue; |
743 | 752 | ||
744 | release_pages(e->user_pages, | 753 | release_pages(e->user_pages, e->tv.bo->ttm->num_pages); |
745 | e->robj->tbo.ttm->num_pages); | ||
746 | kvfree(e->user_pages); | 754 | kvfree(e->user_pages); |
747 | } | 755 | } |
748 | 756 | ||
@@ -755,9 +763,11 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) | |||
755 | int r; | 763 | int r; |
756 | 764 | ||
757 | list_for_each_entry(e, &p->validated, tv.head) { | 765 | list_for_each_entry(e, &p->validated, tv.head) { |
758 | struct reservation_object *resv = e->robj->tbo.resv; | 766 | struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); |
767 | struct reservation_object *resv = bo->tbo.resv; | ||
768 | |||
759 | r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp, | 769 | r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp, |
760 | amdgpu_bo_explicit_sync(e->robj)); | 770 | amdgpu_bo_explicit_sync(bo)); |
761 | 771 | ||
762 | if (r) | 772 | if (r) |
763 | return r; | 773 | return r; |
@@ -800,11 +810,16 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, | |||
800 | kfree(parser->chunks); | 810 | kfree(parser->chunks); |
801 | if (parser->job) | 811 | if (parser->job) |
802 | amdgpu_job_free(parser->job); | 812 | amdgpu_job_free(parser->job); |
803 | amdgpu_bo_unref(&parser->uf_entry.robj); | 813 | if (parser->uf_entry.tv.bo) { |
814 | struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo); | ||
815 | |||
816 | amdgpu_bo_unref(&uf); | ||
817 | } | ||
804 | } | 818 | } |
805 | 819 | ||
806 | static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) | 820 | static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) |
807 | { | 821 | { |
822 | struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); | ||
808 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; | 823 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; |
809 | struct amdgpu_device *adev = p->adev; | 824 | struct amdgpu_device *adev = p->adev; |
810 | struct amdgpu_vm *vm = &fpriv->vm; | 825 | struct amdgpu_vm *vm = &fpriv->vm; |
@@ -813,6 +828,71 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) | |||
813 | struct amdgpu_bo *bo; | 828 | struct amdgpu_bo *bo; |
814 | int r; | 829 | int r; |
815 | 830 | ||
831 | /* Only for UVD/VCE VM emulation */ | ||
832 | if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) { | ||
833 | unsigned i, j; | ||
834 | |||
835 | for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { | ||
836 | struct drm_amdgpu_cs_chunk_ib *chunk_ib; | ||
837 | struct amdgpu_bo_va_mapping *m; | ||
838 | struct amdgpu_bo *aobj = NULL; | ||
839 | struct amdgpu_cs_chunk *chunk; | ||
840 | uint64_t offset, va_start; | ||
841 | struct amdgpu_ib *ib; | ||
842 | uint8_t *kptr; | ||
843 | |||
844 | chunk = &p->chunks[i]; | ||
845 | ib = &p->job->ibs[j]; | ||
846 | chunk_ib = chunk->kdata; | ||
847 | |||
848 | if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) | ||
849 | continue; | ||
850 | |||
851 | va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK; | ||
852 | r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m); | ||
853 | if (r) { | ||
854 | DRM_ERROR("IB va_start is invalid\n"); | ||
855 | return r; | ||
856 | } | ||
857 | |||
858 | if ((va_start + chunk_ib->ib_bytes) > | ||
859 | (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { | ||
860 | DRM_ERROR("IB va_start+ib_bytes is invalid\n"); | ||
861 | return -EINVAL; | ||
862 | } | ||
863 | |||
864 | /* the IB should be reserved at this point */ | ||
865 | r = amdgpu_bo_kmap(aobj, (void **)&kptr); | ||
866 | if (r) { | ||
867 | return r; | ||
868 | } | ||
869 | |||
870 | offset = m->start * AMDGPU_GPU_PAGE_SIZE; | ||
871 | kptr += va_start - offset; | ||
872 | |||
873 | if (ring->funcs->parse_cs) { | ||
874 | memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); | ||
875 | amdgpu_bo_kunmap(aobj); | ||
876 | |||
877 | r = amdgpu_ring_parse_cs(ring, p, j); | ||
878 | if (r) | ||
879 | return r; | ||
880 | } else { | ||
881 | ib->ptr = (uint32_t *)kptr; | ||
882 | r = amdgpu_ring_patch_cs_in_place(ring, p, j); | ||
883 | amdgpu_bo_kunmap(aobj); | ||
884 | if (r) | ||
885 | return r; | ||
886 | } | ||
887 | |||
888 | j++; | ||
889 | } | ||
890 | } | ||
891 | |||
892 | if (!p->job->vm) | ||
893 | return amdgpu_cs_sync_rings(p); | ||
894 | |||
895 | |||
816 | r = amdgpu_vm_clear_freed(adev, vm, NULL); | 896 | r = amdgpu_vm_clear_freed(adev, vm, NULL); |
817 | if (r) | 897 | if (r) |
818 | return r; | 898 | return r; |
@@ -845,7 +925,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) | |||
845 | struct dma_fence *f; | 925 | struct dma_fence *f; |
846 | 926 | ||
847 | /* ignore duplicates */ | 927 | /* ignore duplicates */ |
848 | bo = e->robj; | 928 | bo = ttm_to_amdgpu_bo(e->tv.bo); |
849 | if (!bo) | 929 | if (!bo) |
850 | continue; | 930 | continue; |
851 | 931 | ||
@@ -875,101 +955,25 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) | |||
875 | if (r) | 955 | if (r) |
876 | return r; | 956 | return r; |
877 | 957 | ||
958 | r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); | ||
959 | if (r) | ||
960 | return r; | ||
961 | |||
962 | p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo); | ||
963 | |||
878 | if (amdgpu_vm_debug) { | 964 | if (amdgpu_vm_debug) { |
879 | /* Invalidate all BOs to test for userspace bugs */ | 965 | /* Invalidate all BOs to test for userspace bugs */ |
880 | amdgpu_bo_list_for_each_entry(e, p->bo_list) { | 966 | amdgpu_bo_list_for_each_entry(e, p->bo_list) { |
881 | /* ignore duplicates */ | 967 | struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); |
882 | if (!e->robj) | ||
883 | continue; | ||
884 | |||
885 | amdgpu_vm_bo_invalidate(adev, e->robj, false); | ||
886 | } | ||
887 | } | ||
888 | |||
889 | return r; | ||
890 | } | ||
891 | |||
892 | static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, | ||
893 | struct amdgpu_cs_parser *p) | ||
894 | { | ||
895 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; | ||
896 | struct amdgpu_vm *vm = &fpriv->vm; | ||
897 | struct amdgpu_ring *ring = p->ring; | ||
898 | int r; | ||
899 | |||
900 | /* Only for UVD/VCE VM emulation */ | ||
901 | if (p->ring->funcs->parse_cs || p->ring->funcs->patch_cs_in_place) { | ||
902 | unsigned i, j; | ||
903 | 968 | ||
904 | for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { | 969 | /* ignore duplicates */ |
905 | struct drm_amdgpu_cs_chunk_ib *chunk_ib; | 970 | if (!bo) |
906 | struct amdgpu_bo_va_mapping *m; | ||
907 | struct amdgpu_bo *aobj = NULL; | ||
908 | struct amdgpu_cs_chunk *chunk; | ||
909 | uint64_t offset, va_start; | ||
910 | struct amdgpu_ib *ib; | ||
911 | uint8_t *kptr; | ||
912 | |||
913 | chunk = &p->chunks[i]; | ||
914 | ib = &p->job->ibs[j]; | ||
915 | chunk_ib = chunk->kdata; | ||
916 | |||
917 | if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) | ||
918 | continue; | 971 | continue; |
919 | 972 | ||
920 | va_start = chunk_ib->va_start & AMDGPU_VA_HOLE_MASK; | 973 | amdgpu_vm_bo_invalidate(adev, bo, false); |
921 | r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m); | ||
922 | if (r) { | ||
923 | DRM_ERROR("IB va_start is invalid\n"); | ||
924 | return r; | ||
925 | } | ||
926 | |||
927 | if ((va_start + chunk_ib->ib_bytes) > | ||
928 | (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { | ||
929 | DRM_ERROR("IB va_start+ib_bytes is invalid\n"); | ||
930 | return -EINVAL; | ||
931 | } | ||
932 | |||
933 | /* the IB should be reserved at this point */ | ||
934 | r = amdgpu_bo_kmap(aobj, (void **)&kptr); | ||
935 | if (r) { | ||
936 | return r; | ||
937 | } | ||
938 | |||
939 | offset = m->start * AMDGPU_GPU_PAGE_SIZE; | ||
940 | kptr += va_start - offset; | ||
941 | |||
942 | if (p->ring->funcs->parse_cs) { | ||
943 | memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); | ||
944 | amdgpu_bo_kunmap(aobj); | ||
945 | |||
946 | r = amdgpu_ring_parse_cs(ring, p, j); | ||
947 | if (r) | ||
948 | return r; | ||
949 | } else { | ||
950 | ib->ptr = (uint32_t *)kptr; | ||
951 | r = amdgpu_ring_patch_cs_in_place(ring, p, j); | ||
952 | amdgpu_bo_kunmap(aobj); | ||
953 | if (r) | ||
954 | return r; | ||
955 | } | ||
956 | |||
957 | j++; | ||
958 | } | 974 | } |
959 | } | 975 | } |
960 | 976 | ||
961 | if (p->job->vm) { | ||
962 | p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo); | ||
963 | |||
964 | r = amdgpu_bo_vm_update_pte(p); | ||
965 | if (r) | ||
966 | return r; | ||
967 | |||
968 | r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); | ||
969 | if (r) | ||
970 | return r; | ||
971 | } | ||
972 | |||
973 | return amdgpu_cs_sync_rings(p); | 977 | return amdgpu_cs_sync_rings(p); |
974 | } | 978 | } |
975 | 979 | ||
@@ -978,14 +982,15 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, | |||
978 | { | 982 | { |
979 | struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; | 983 | struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; |
980 | struct amdgpu_vm *vm = &fpriv->vm; | 984 | struct amdgpu_vm *vm = &fpriv->vm; |
981 | int i, j; | ||
982 | int r, ce_preempt = 0, de_preempt = 0; | 985 | int r, ce_preempt = 0, de_preempt = 0; |
986 | struct amdgpu_ring *ring; | ||
987 | int i, j; | ||
983 | 988 | ||
984 | for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) { | 989 | for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) { |
985 | struct amdgpu_cs_chunk *chunk; | 990 | struct amdgpu_cs_chunk *chunk; |
986 | struct amdgpu_ib *ib; | 991 | struct amdgpu_ib *ib; |
987 | struct drm_amdgpu_cs_chunk_ib *chunk_ib; | 992 | struct drm_amdgpu_cs_chunk_ib *chunk_ib; |
988 | struct amdgpu_ring *ring; | 993 | struct drm_sched_entity *entity; |
989 | 994 | ||
990 | chunk = &parser->chunks[i]; | 995 | chunk = &parser->chunks[i]; |
991 | ib = &parser->job->ibs[j]; | 996 | ib = &parser->job->ibs[j]; |
@@ -1007,8 +1012,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, | |||
1007 | return -EINVAL; | 1012 | return -EINVAL; |
1008 | } | 1013 | } |
1009 | 1014 | ||
1010 | r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type, | 1015 | r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type, |
1011 | chunk_ib->ip_instance, chunk_ib->ring, &ring); | 1016 | chunk_ib->ip_instance, chunk_ib->ring, |
1017 | &entity); | ||
1012 | if (r) | 1018 | if (r) |
1013 | return r; | 1019 | return r; |
1014 | 1020 | ||
@@ -1016,14 +1022,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, | |||
1016 | parser->job->preamble_status |= | 1022 | parser->job->preamble_status |= |
1017 | AMDGPU_PREAMBLE_IB_PRESENT; | 1023 | AMDGPU_PREAMBLE_IB_PRESENT; |
1018 | 1024 | ||
1019 | if (parser->ring && parser->ring != ring) | 1025 | if (parser->entity && parser->entity != entity) |
1020 | return -EINVAL; | 1026 | return -EINVAL; |
1021 | 1027 | ||
1022 | parser->ring = ring; | 1028 | parser->entity = entity; |
1023 | 1029 | ||
1024 | r = amdgpu_ib_get(adev, vm, | 1030 | ring = to_amdgpu_ring(entity->rq->sched); |
1025 | ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0, | 1031 | r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ? |
1026 | ib); | 1032 | chunk_ib->ib_bytes : 0, ib); |
1027 | if (r) { | 1033 | if (r) { |
1028 | DRM_ERROR("Failed to get ib !\n"); | 1034 | DRM_ERROR("Failed to get ib !\n"); |
1029 | return r; | 1035 | return r; |
@@ -1037,12 +1043,13 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, | |||
1037 | } | 1043 | } |
1038 | 1044 | ||
1039 | /* UVD & VCE fw doesn't support user fences */ | 1045 | /* UVD & VCE fw doesn't support user fences */ |
1046 | ring = to_amdgpu_ring(parser->entity->rq->sched); | ||
1040 | if (parser->job->uf_addr && ( | 1047 | if (parser->job->uf_addr && ( |
1041 | parser->ring->funcs->type == AMDGPU_RING_TYPE_UVD || | 1048 | ring->funcs->type == AMDGPU_RING_TYPE_UVD || |
1042 | parser->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) | 1049 | ring->funcs->type == AMDGPU_RING_TYPE_VCE)) |
1043 | return -EINVAL; | 1050 | return -EINVAL; |
1044 | 1051 | ||
1045 | return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->ring->idx); | 1052 | return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity); |
1046 | } | 1053 | } |
1047 | 1054 | ||
1048 | static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, | 1055 | static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, |
@@ -1058,24 +1065,23 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, | |||
1058 | sizeof(struct drm_amdgpu_cs_chunk_dep); | 1065 | sizeof(struct drm_amdgpu_cs_chunk_dep); |
1059 | 1066 | ||
1060 | for (i = 0; i < num_deps; ++i) { | 1067 | for (i = 0; i < num_deps; ++i) { |
1061 | struct amdgpu_ring *ring; | ||
1062 | struct amdgpu_ctx *ctx; | 1068 | struct amdgpu_ctx *ctx; |
1069 | struct drm_sched_entity *entity; | ||
1063 | struct dma_fence *fence; | 1070 | struct dma_fence *fence; |
1064 | 1071 | ||
1065 | ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id); | 1072 | ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id); |
1066 | if (ctx == NULL) | 1073 | if (ctx == NULL) |
1067 | return -EINVAL; | 1074 | return -EINVAL; |
1068 | 1075 | ||
1069 | r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr, | 1076 | r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type, |
1070 | deps[i].ip_type, | 1077 | deps[i].ip_instance, |
1071 | deps[i].ip_instance, | 1078 | deps[i].ring, &entity); |
1072 | deps[i].ring, &ring); | ||
1073 | if (r) { | 1079 | if (r) { |
1074 | amdgpu_ctx_put(ctx); | 1080 | amdgpu_ctx_put(ctx); |
1075 | return r; | 1081 | return r; |
1076 | } | 1082 | } |
1077 | 1083 | ||
1078 | fence = amdgpu_ctx_get_fence(ctx, ring, | 1084 | fence = amdgpu_ctx_get_fence(ctx, entity, |
1079 | deps[i].handle); | 1085 | deps[i].handle); |
1080 | if (IS_ERR(fence)) { | 1086 | if (IS_ERR(fence)) { |
1081 | r = PTR_ERR(fence); | 1087 | r = PTR_ERR(fence); |
@@ -1194,9 +1200,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, | |||
1194 | union drm_amdgpu_cs *cs) | 1200 | union drm_amdgpu_cs *cs) |
1195 | { | 1201 | { |
1196 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; | 1202 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; |
1197 | struct amdgpu_ring *ring = p->ring; | 1203 | struct drm_sched_entity *entity = p->entity; |
1198 | struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity; | ||
1199 | enum drm_sched_priority priority; | 1204 | enum drm_sched_priority priority; |
1205 | struct amdgpu_ring *ring; | ||
1200 | struct amdgpu_bo_list_entry *e; | 1206 | struct amdgpu_bo_list_entry *e; |
1201 | struct amdgpu_job *job; | 1207 | struct amdgpu_job *job; |
1202 | uint64_t seq; | 1208 | uint64_t seq; |
@@ -1213,7 +1219,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, | |||
1213 | /* No memory allocation is allowed while holding the mn lock */ | 1219 | /* No memory allocation is allowed while holding the mn lock */ |
1214 | amdgpu_mn_lock(p->mn); | 1220 | amdgpu_mn_lock(p->mn); |
1215 | amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { | 1221 | amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { |
1216 | struct amdgpu_bo *bo = e->robj; | 1222 | struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); |
1217 | 1223 | ||
1218 | if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { | 1224 | if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { |
1219 | r = -ERESTARTSYS; | 1225 | r = -ERESTARTSYS; |
@@ -1224,15 +1230,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, | |||
1224 | job->owner = p->filp; | 1230 | job->owner = p->filp; |
1225 | p->fence = dma_fence_get(&job->base.s_fence->finished); | 1231 | p->fence = dma_fence_get(&job->base.s_fence->finished); |
1226 | 1232 | ||
1227 | r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq); | 1233 | amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq); |
1228 | if (r) { | ||
1229 | dma_fence_put(p->fence); | ||
1230 | dma_fence_put(&job->base.s_fence->finished); | ||
1231 | amdgpu_job_free(job); | ||
1232 | amdgpu_mn_unlock(p->mn); | ||
1233 | return r; | ||
1234 | } | ||
1235 | |||
1236 | amdgpu_cs_post_dependencies(p); | 1234 | amdgpu_cs_post_dependencies(p); |
1237 | 1235 | ||
1238 | if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && | 1236 | if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && |
@@ -1254,6 +1252,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, | |||
1254 | ring = to_amdgpu_ring(entity->rq->sched); | 1252 | ring = to_amdgpu_ring(entity->rq->sched); |
1255 | amdgpu_ring_priority_get(ring, priority); | 1253 | amdgpu_ring_priority_get(ring, priority); |
1256 | 1254 | ||
1255 | amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); | ||
1256 | |||
1257 | ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); | 1257 | ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); |
1258 | amdgpu_mn_unlock(p->mn); | 1258 | amdgpu_mn_unlock(p->mn); |
1259 | 1259 | ||
@@ -1293,6 +1293,12 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | |||
1293 | if (r) | 1293 | if (r) |
1294 | goto out; | 1294 | goto out; |
1295 | 1295 | ||
1296 | r = amdgpu_cs_dependencies(adev, &parser); | ||
1297 | if (r) { | ||
1298 | DRM_ERROR("Failed in the dependencies handling %d!\n", r); | ||
1299 | goto out; | ||
1300 | } | ||
1301 | |||
1296 | r = amdgpu_cs_parser_bos(&parser, data); | 1302 | r = amdgpu_cs_parser_bos(&parser, data); |
1297 | if (r) { | 1303 | if (r) { |
1298 | if (r == -ENOMEM) | 1304 | if (r == -ENOMEM) |
@@ -1304,16 +1310,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | |||
1304 | 1310 | ||
1305 | reserved_buffers = true; | 1311 | reserved_buffers = true; |
1306 | 1312 | ||
1307 | r = amdgpu_cs_dependencies(adev, &parser); | ||
1308 | if (r) { | ||
1309 | DRM_ERROR("Failed in the dependencies handling %d!\n", r); | ||
1310 | goto out; | ||
1311 | } | ||
1312 | |||
1313 | for (i = 0; i < parser.job->num_ibs; i++) | 1313 | for (i = 0; i < parser.job->num_ibs; i++) |
1314 | trace_amdgpu_cs(&parser, i); | 1314 | trace_amdgpu_cs(&parser, i); |
1315 | 1315 | ||
1316 | r = amdgpu_cs_ib_vm_chunk(adev, &parser); | 1316 | r = amdgpu_cs_vm_handling(&parser); |
1317 | if (r) | 1317 | if (r) |
1318 | goto out; | 1318 | goto out; |
1319 | 1319 | ||
@@ -1337,9 +1337,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, | |||
1337 | struct drm_file *filp) | 1337 | struct drm_file *filp) |
1338 | { | 1338 | { |
1339 | union drm_amdgpu_wait_cs *wait = data; | 1339 | union drm_amdgpu_wait_cs *wait = data; |
1340 | struct amdgpu_device *adev = dev->dev_private; | ||
1341 | unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); | 1340 | unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); |
1342 | struct amdgpu_ring *ring = NULL; | 1341 | struct drm_sched_entity *entity; |
1343 | struct amdgpu_ctx *ctx; | 1342 | struct amdgpu_ctx *ctx; |
1344 | struct dma_fence *fence; | 1343 | struct dma_fence *fence; |
1345 | long r; | 1344 | long r; |
@@ -1348,15 +1347,14 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, | |||
1348 | if (ctx == NULL) | 1347 | if (ctx == NULL) |
1349 | return -EINVAL; | 1348 | return -EINVAL; |
1350 | 1349 | ||
1351 | r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, | 1350 | r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance, |
1352 | wait->in.ip_type, wait->in.ip_instance, | 1351 | wait->in.ring, &entity); |
1353 | wait->in.ring, &ring); | ||
1354 | if (r) { | 1352 | if (r) { |
1355 | amdgpu_ctx_put(ctx); | 1353 | amdgpu_ctx_put(ctx); |
1356 | return r; | 1354 | return r; |
1357 | } | 1355 | } |
1358 | 1356 | ||
1359 | fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); | 1357 | fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle); |
1360 | if (IS_ERR(fence)) | 1358 | if (IS_ERR(fence)) |
1361 | r = PTR_ERR(fence); | 1359 | r = PTR_ERR(fence); |
1362 | else if (fence) { | 1360 | else if (fence) { |
@@ -1388,7 +1386,7 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev, | |||
1388 | struct drm_file *filp, | 1386 | struct drm_file *filp, |
1389 | struct drm_amdgpu_fence *user) | 1387 | struct drm_amdgpu_fence *user) |
1390 | { | 1388 | { |
1391 | struct amdgpu_ring *ring; | 1389 | struct drm_sched_entity *entity; |
1392 | struct amdgpu_ctx *ctx; | 1390 | struct amdgpu_ctx *ctx; |
1393 | struct dma_fence *fence; | 1391 | struct dma_fence *fence; |
1394 | int r; | 1392 | int r; |
@@ -1397,14 +1395,14 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev, | |||
1397 | if (ctx == NULL) | 1395 | if (ctx == NULL) |
1398 | return ERR_PTR(-EINVAL); | 1396 | return ERR_PTR(-EINVAL); |
1399 | 1397 | ||
1400 | r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type, | 1398 | r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance, |
1401 | user->ip_instance, user->ring, &ring); | 1399 | user->ring, &entity); |
1402 | if (r) { | 1400 | if (r) { |
1403 | amdgpu_ctx_put(ctx); | 1401 | amdgpu_ctx_put(ctx); |
1404 | return ERR_PTR(r); | 1402 | return ERR_PTR(r); |
1405 | } | 1403 | } |
1406 | 1404 | ||
1407 | fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no); | 1405 | fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no); |
1408 | amdgpu_ctx_put(ctx); | 1406 | amdgpu_ctx_put(ctx); |
1409 | 1407 | ||
1410 | return fence; | 1408 | return fence; |