diff options
author | Tony Lindgren <tony@atomide.com> | 2018-08-28 12:58:03 -0400 |
---|---|---|
committer | Tony Lindgren <tony@atomide.com> | 2018-08-28 12:58:03 -0400 |
commit | ea4d65f14f6aaa53e379b93c5544245ef081b3e7 (patch) | |
tree | a15485f4f1cf547a52b31fa8e16e14b9579b7200 /drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | |
parent | ce32d59ee2cd036f6e8a6ed17a06a0b0bec5c67c (diff) | |
parent | 496f3347d834aec91c38b45d6249ed00f58ad233 (diff) |
Merge branch 'perm-fix' into omap-for-v4.19/fixes-v2
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 259 |
1 files changed, 149 insertions, 110 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9c85a90be293..502b94fb116a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <drm/drm_syncobj.h> | 31 | #include <drm/drm_syncobj.h> |
32 | #include "amdgpu.h" | 32 | #include "amdgpu.h" |
33 | #include "amdgpu_trace.h" | 33 | #include "amdgpu_trace.h" |
34 | #include "amdgpu_gmc.h" | ||
34 | 35 | ||
35 | static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, | 36 | static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, |
36 | struct drm_amdgpu_cs_chunk_fence *data, | 37 | struct drm_amdgpu_cs_chunk_fence *data, |
@@ -65,11 +66,35 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, | |||
65 | return 0; | 66 | return 0; |
66 | } | 67 | } |
67 | 68 | ||
68 | static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | 69 | static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p, |
70 | struct drm_amdgpu_bo_list_in *data) | ||
71 | { | ||
72 | int r; | ||
73 | struct drm_amdgpu_bo_list_entry *info = NULL; | ||
74 | |||
75 | r = amdgpu_bo_create_list_entry_array(data, &info); | ||
76 | if (r) | ||
77 | return r; | ||
78 | |||
79 | r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number, | ||
80 | &p->bo_list); | ||
81 | if (r) | ||
82 | goto error_free; | ||
83 | |||
84 | kvfree(info); | ||
85 | return 0; | ||
86 | |||
87 | error_free: | ||
88 | if (info) | ||
89 | kvfree(info); | ||
90 | |||
91 | return r; | ||
92 | } | ||
93 | |||
94 | static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) | ||
69 | { | 95 | { |
70 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; | 96 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; |
71 | struct amdgpu_vm *vm = &fpriv->vm; | 97 | struct amdgpu_vm *vm = &fpriv->vm; |
72 | union drm_amdgpu_cs *cs = data; | ||
73 | uint64_t *chunk_array_user; | 98 | uint64_t *chunk_array_user; |
74 | uint64_t *chunk_array; | 99 | uint64_t *chunk_array; |
75 | unsigned size, num_ibs = 0; | 100 | unsigned size, num_ibs = 0; |
@@ -163,6 +188,19 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | |||
163 | 188 | ||
164 | break; | 189 | break; |
165 | 190 | ||
191 | case AMDGPU_CHUNK_ID_BO_HANDLES: | ||
192 | size = sizeof(struct drm_amdgpu_bo_list_in); | ||
193 | if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { | ||
194 | ret = -EINVAL; | ||
195 | goto free_partial_kdata; | ||
196 | } | ||
197 | |||
198 | ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata); | ||
199 | if (ret) | ||
200 | goto free_partial_kdata; | ||
201 | |||
202 | break; | ||
203 | |||
166 | case AMDGPU_CHUNK_ID_DEPENDENCIES: | 204 | case AMDGPU_CHUNK_ID_DEPENDENCIES: |
167 | case AMDGPU_CHUNK_ID_SYNCOBJ_IN: | 205 | case AMDGPU_CHUNK_ID_SYNCOBJ_IN: |
168 | case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: | 206 | case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: |
@@ -186,6 +224,10 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | |||
186 | if (p->uf_entry.robj) | 224 | if (p->uf_entry.robj) |
187 | p->job->uf_addr = uf_offset; | 225 | p->job->uf_addr = uf_offset; |
188 | kfree(chunk_array); | 226 | kfree(chunk_array); |
227 | |||
228 | /* Use this opportunity to fill in task info for the vm */ | ||
229 | amdgpu_vm_set_task_info(vm); | ||
230 | |||
189 | return 0; | 231 | return 0; |
190 | 232 | ||
191 | free_all_kdata: | 233 | free_all_kdata: |
@@ -257,7 +299,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, | |||
257 | return; | 299 | return; |
258 | } | 300 | } |
259 | 301 | ||
260 | total_vram = adev->gmc.real_vram_size - adev->vram_pin_size; | 302 | total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size); |
261 | used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); | 303 | used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); |
262 | free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; | 304 | free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; |
263 | 305 | ||
@@ -302,7 +344,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, | |||
302 | *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); | 344 | *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); |
303 | 345 | ||
304 | /* Do the same for visible VRAM if half of it is free */ | 346 | /* Do the same for visible VRAM if half of it is free */ |
305 | if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size) { | 347 | if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) { |
306 | u64 total_vis_vram = adev->gmc.visible_vram_size; | 348 | u64 total_vis_vram = adev->gmc.visible_vram_size; |
307 | u64 used_vis_vram = | 349 | u64 used_vis_vram = |
308 | amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); | 350 | amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); |
@@ -359,7 +401,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, | |||
359 | * to move it. Don't move anything if the threshold is zero. | 401 | * to move it. Don't move anything if the threshold is zero. |
360 | */ | 402 | */ |
361 | if (p->bytes_moved < p->bytes_moved_threshold) { | 403 | if (p->bytes_moved < p->bytes_moved_threshold) { |
362 | if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && | 404 | if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && |
363 | (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { | 405 | (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { |
364 | /* And don't move a CPU_ACCESS_REQUIRED BO to limited | 406 | /* And don't move a CPU_ACCESS_REQUIRED BO to limited |
365 | * visible VRAM if we've depleted our allowance to do | 407 | * visible VRAM if we've depleted our allowance to do |
@@ -377,11 +419,11 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, | |||
377 | } | 419 | } |
378 | 420 | ||
379 | retry: | 421 | retry: |
380 | amdgpu_ttm_placement_from_domain(bo, domain); | 422 | amdgpu_bo_placement_from_domain(bo, domain); |
381 | r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); | 423 | r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); |
382 | 424 | ||
383 | p->bytes_moved += ctx.bytes_moved; | 425 | p->bytes_moved += ctx.bytes_moved; |
384 | if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && | 426 | if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && |
385 | amdgpu_bo_in_cpu_visible_vram(bo)) | 427 | amdgpu_bo_in_cpu_visible_vram(bo)) |
386 | p->bytes_moved_vis += ctx.bytes_moved; | 428 | p->bytes_moved_vis += ctx.bytes_moved; |
387 | 429 | ||
@@ -434,9 +476,9 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, | |||
434 | 476 | ||
435 | /* Good we can try to move this BO somewhere else */ | 477 | /* Good we can try to move this BO somewhere else */ |
436 | update_bytes_moved_vis = | 478 | update_bytes_moved_vis = |
437 | adev->gmc.visible_vram_size < adev->gmc.real_vram_size && | 479 | !amdgpu_gmc_vram_full_visible(&adev->gmc) && |
438 | amdgpu_bo_in_cpu_visible_vram(bo); | 480 | amdgpu_bo_in_cpu_visible_vram(bo); |
439 | amdgpu_ttm_placement_from_domain(bo, other); | 481 | amdgpu_bo_placement_from_domain(bo, other); |
440 | r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); | 482 | r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); |
441 | p->bytes_moved += ctx.bytes_moved; | 483 | p->bytes_moved += ctx.bytes_moved; |
442 | if (update_bytes_moved_vis) | 484 | if (update_bytes_moved_vis) |
@@ -490,8 +532,8 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, | |||
490 | /* Check if we have user pages and nobody bound the BO already */ | 532 | /* Check if we have user pages and nobody bound the BO already */ |
491 | if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && | 533 | if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && |
492 | lobj->user_pages) { | 534 | lobj->user_pages) { |
493 | amdgpu_ttm_placement_from_domain(bo, | 535 | amdgpu_bo_placement_from_domain(bo, |
494 | AMDGPU_GEM_DOMAIN_CPU); | 536 | AMDGPU_GEM_DOMAIN_CPU); |
495 | r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); | 537 | r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); |
496 | if (r) | 538 | if (r) |
497 | return r; | 539 | return r; |
@@ -519,23 +561,38 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
519 | union drm_amdgpu_cs *cs) | 561 | union drm_amdgpu_cs *cs) |
520 | { | 562 | { |
521 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; | 563 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; |
564 | struct amdgpu_vm *vm = &fpriv->vm; | ||
522 | struct amdgpu_bo_list_entry *e; | 565 | struct amdgpu_bo_list_entry *e; |
523 | struct list_head duplicates; | 566 | struct list_head duplicates; |
524 | unsigned i, tries = 10; | ||
525 | struct amdgpu_bo *gds; | 567 | struct amdgpu_bo *gds; |
526 | struct amdgpu_bo *gws; | 568 | struct amdgpu_bo *gws; |
527 | struct amdgpu_bo *oa; | 569 | struct amdgpu_bo *oa; |
570 | unsigned tries = 10; | ||
528 | int r; | 571 | int r; |
529 | 572 | ||
530 | INIT_LIST_HEAD(&p->validated); | 573 | INIT_LIST_HEAD(&p->validated); |
531 | 574 | ||
532 | p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); | 575 | /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */ |
533 | if (p->bo_list) { | 576 | if (cs->in.bo_list_handle) { |
534 | amdgpu_bo_list_get_list(p->bo_list, &p->validated); | 577 | if (p->bo_list) |
535 | if (p->bo_list->first_userptr != p->bo_list->num_entries) | 578 | return -EINVAL; |
536 | p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); | 579 | |
580 | r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle, | ||
581 | &p->bo_list); | ||
582 | if (r) | ||
583 | return r; | ||
584 | } else if (!p->bo_list) { | ||
585 | /* Create a empty bo_list when no handle is provided */ | ||
586 | r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0, | ||
587 | &p->bo_list); | ||
588 | if (r) | ||
589 | return r; | ||
537 | } | 590 | } |
538 | 591 | ||
592 | amdgpu_bo_list_get_list(p->bo_list, &p->validated); | ||
593 | if (p->bo_list->first_userptr != p->bo_list->num_entries) | ||
594 | p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); | ||
595 | |||
539 | INIT_LIST_HEAD(&duplicates); | 596 | INIT_LIST_HEAD(&duplicates); |
540 | amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); | 597 | amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); |
541 | 598 | ||
@@ -544,7 +601,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
544 | 601 | ||
545 | while (1) { | 602 | while (1) { |
546 | struct list_head need_pages; | 603 | struct list_head need_pages; |
547 | unsigned i; | ||
548 | 604 | ||
549 | r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, | 605 | r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, |
550 | &duplicates); | 606 | &duplicates); |
@@ -554,17 +610,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
554 | goto error_free_pages; | 610 | goto error_free_pages; |
555 | } | 611 | } |
556 | 612 | ||
557 | /* Without a BO list we don't have userptr BOs */ | ||
558 | if (!p->bo_list) | ||
559 | break; | ||
560 | |||
561 | INIT_LIST_HEAD(&need_pages); | 613 | INIT_LIST_HEAD(&need_pages); |
562 | for (i = p->bo_list->first_userptr; | 614 | amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { |
563 | i < p->bo_list->num_entries; ++i) { | 615 | struct amdgpu_bo *bo = e->robj; |
564 | struct amdgpu_bo *bo; | ||
565 | |||
566 | e = &p->bo_list->array[i]; | ||
567 | bo = e->robj; | ||
568 | 616 | ||
569 | if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, | 617 | if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, |
570 | &e->user_invalidated) && e->user_pages) { | 618 | &e->user_invalidated) && e->user_pages) { |
@@ -656,23 +704,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
656 | amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, | 704 | amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, |
657 | p->bytes_moved_vis); | 705 | p->bytes_moved_vis); |
658 | 706 | ||
659 | if (p->bo_list) { | 707 | gds = p->bo_list->gds_obj; |
660 | struct amdgpu_vm *vm = &fpriv->vm; | 708 | gws = p->bo_list->gws_obj; |
661 | unsigned i; | 709 | oa = p->bo_list->oa_obj; |
662 | 710 | ||
663 | gds = p->bo_list->gds_obj; | 711 | amdgpu_bo_list_for_each_entry(e, p->bo_list) |
664 | gws = p->bo_list->gws_obj; | 712 | e->bo_va = amdgpu_vm_bo_find(vm, e->robj); |
665 | oa = p->bo_list->oa_obj; | ||
666 | for (i = 0; i < p->bo_list->num_entries; i++) { | ||
667 | struct amdgpu_bo *bo = p->bo_list->array[i].robj; | ||
668 | |||
669 | p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo); | ||
670 | } | ||
671 | } else { | ||
672 | gds = p->adev->gds.gds_gfx_bo; | ||
673 | gws = p->adev->gds.gws_gfx_bo; | ||
674 | oa = p->adev->gds.oa_gfx_bo; | ||
675 | } | ||
676 | 713 | ||
677 | if (gds) { | 714 | if (gds) { |
678 | p->job->gds_base = amdgpu_bo_gpu_offset(gds); | 715 | p->job->gds_base = amdgpu_bo_gpu_offset(gds); |
@@ -700,18 +737,13 @@ error_validate: | |||
700 | 737 | ||
701 | error_free_pages: | 738 | error_free_pages: |
702 | 739 | ||
703 | if (p->bo_list) { | 740 | amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { |
704 | for (i = p->bo_list->first_userptr; | 741 | if (!e->user_pages) |
705 | i < p->bo_list->num_entries; ++i) { | 742 | continue; |
706 | e = &p->bo_list->array[i]; | ||
707 | |||
708 | if (!e->user_pages) | ||
709 | continue; | ||
710 | 743 | ||
711 | release_pages(e->user_pages, | 744 | release_pages(e->user_pages, |
712 | e->robj->tbo.ttm->num_pages); | 745 | e->robj->tbo.ttm->num_pages); |
713 | kvfree(e->user_pages); | 746 | kvfree(e->user_pages); |
714 | } | ||
715 | } | 747 | } |
716 | 748 | ||
717 | return r; | 749 | return r; |
@@ -773,12 +805,13 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, | |||
773 | 805 | ||
774 | static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) | 806 | static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) |
775 | { | 807 | { |
776 | struct amdgpu_device *adev = p->adev; | ||
777 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; | 808 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; |
809 | struct amdgpu_device *adev = p->adev; | ||
778 | struct amdgpu_vm *vm = &fpriv->vm; | 810 | struct amdgpu_vm *vm = &fpriv->vm; |
811 | struct amdgpu_bo_list_entry *e; | ||
779 | struct amdgpu_bo_va *bo_va; | 812 | struct amdgpu_bo_va *bo_va; |
780 | struct amdgpu_bo *bo; | 813 | struct amdgpu_bo *bo; |
781 | int i, r; | 814 | int r; |
782 | 815 | ||
783 | r = amdgpu_vm_clear_freed(adev, vm, NULL); | 816 | r = amdgpu_vm_clear_freed(adev, vm, NULL); |
784 | if (r) | 817 | if (r) |
@@ -808,29 +841,26 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) | |||
808 | return r; | 841 | return r; |
809 | } | 842 | } |
810 | 843 | ||
811 | if (p->bo_list) { | 844 | amdgpu_bo_list_for_each_entry(e, p->bo_list) { |
812 | for (i = 0; i < p->bo_list->num_entries; i++) { | 845 | struct dma_fence *f; |
813 | struct dma_fence *f; | ||
814 | |||
815 | /* ignore duplicates */ | ||
816 | bo = p->bo_list->array[i].robj; | ||
817 | if (!bo) | ||
818 | continue; | ||
819 | 846 | ||
820 | bo_va = p->bo_list->array[i].bo_va; | 847 | /* ignore duplicates */ |
821 | if (bo_va == NULL) | 848 | bo = e->robj; |
822 | continue; | 849 | if (!bo) |
850 | continue; | ||
823 | 851 | ||
824 | r = amdgpu_vm_bo_update(adev, bo_va, false); | 852 | bo_va = e->bo_va; |
825 | if (r) | 853 | if (bo_va == NULL) |
826 | return r; | 854 | continue; |
827 | 855 | ||
828 | f = bo_va->last_pt_update; | 856 | r = amdgpu_vm_bo_update(adev, bo_va, false); |
829 | r = amdgpu_sync_fence(adev, &p->job->sync, f, false); | 857 | if (r) |
830 | if (r) | 858 | return r; |
831 | return r; | ||
832 | } | ||
833 | 859 | ||
860 | f = bo_va->last_pt_update; | ||
861 | r = amdgpu_sync_fence(adev, &p->job->sync, f, false); | ||
862 | if (r) | ||
863 | return r; | ||
834 | } | 864 | } |
835 | 865 | ||
836 | r = amdgpu_vm_handle_moved(adev, vm); | 866 | r = amdgpu_vm_handle_moved(adev, vm); |
@@ -845,15 +875,14 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) | |||
845 | if (r) | 875 | if (r) |
846 | return r; | 876 | return r; |
847 | 877 | ||
848 | if (amdgpu_vm_debug && p->bo_list) { | 878 | if (amdgpu_vm_debug) { |
849 | /* Invalidate all BOs to test for userspace bugs */ | 879 | /* Invalidate all BOs to test for userspace bugs */ |
850 | for (i = 0; i < p->bo_list->num_entries; i++) { | 880 | amdgpu_bo_list_for_each_entry(e, p->bo_list) { |
851 | /* ignore duplicates */ | 881 | /* ignore duplicates */ |
852 | bo = p->bo_list->array[i].robj; | 882 | if (!e->robj) |
853 | if (!bo) | ||
854 | continue; | 883 | continue; |
855 | 884 | ||
856 | amdgpu_vm_bo_invalidate(adev, bo, false); | 885 | amdgpu_vm_bo_invalidate(adev, e->robj, false); |
857 | } | 886 | } |
858 | } | 887 | } |
859 | 888 | ||
@@ -865,11 +894,11 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, | |||
865 | { | 894 | { |
866 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; | 895 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; |
867 | struct amdgpu_vm *vm = &fpriv->vm; | 896 | struct amdgpu_vm *vm = &fpriv->vm; |
868 | struct amdgpu_ring *ring = p->job->ring; | 897 | struct amdgpu_ring *ring = p->ring; |
869 | int r; | 898 | int r; |
870 | 899 | ||
871 | /* Only for UVD/VCE VM emulation */ | 900 | /* Only for UVD/VCE VM emulation */ |
872 | if (p->job->ring->funcs->parse_cs) { | 901 | if (p->ring->funcs->parse_cs || p->ring->funcs->patch_cs_in_place) { |
873 | unsigned i, j; | 902 | unsigned i, j; |
874 | 903 | ||
875 | for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { | 904 | for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { |
@@ -910,12 +939,20 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, | |||
910 | offset = m->start * AMDGPU_GPU_PAGE_SIZE; | 939 | offset = m->start * AMDGPU_GPU_PAGE_SIZE; |
911 | kptr += va_start - offset; | 940 | kptr += va_start - offset; |
912 | 941 | ||
913 | memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); | 942 | if (p->ring->funcs->parse_cs) { |
914 | amdgpu_bo_kunmap(aobj); | 943 | memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); |
915 | 944 | amdgpu_bo_kunmap(aobj); | |
916 | r = amdgpu_ring_parse_cs(ring, p, j); | 945 | |
917 | if (r) | 946 | r = amdgpu_ring_parse_cs(ring, p, j); |
918 | return r; | 947 | if (r) |
948 | return r; | ||
949 | } else { | ||
950 | ib->ptr = (uint32_t *)kptr; | ||
951 | r = amdgpu_ring_patch_cs_in_place(ring, p, j); | ||
952 | amdgpu_bo_kunmap(aobj); | ||
953 | if (r) | ||
954 | return r; | ||
955 | } | ||
919 | 956 | ||
920 | j++; | 957 | j++; |
921 | } | 958 | } |
@@ -983,10 +1020,10 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, | |||
983 | } | 1020 | } |
984 | } | 1021 | } |
985 | 1022 | ||
986 | if (parser->job->ring && parser->job->ring != ring) | 1023 | if (parser->ring && parser->ring != ring) |
987 | return -EINVAL; | 1024 | return -EINVAL; |
988 | 1025 | ||
989 | parser->job->ring = ring; | 1026 | parser->ring = ring; |
990 | 1027 | ||
991 | r = amdgpu_ib_get(adev, vm, | 1028 | r = amdgpu_ib_get(adev, vm, |
992 | ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0, | 1029 | ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0, |
@@ -1005,11 +1042,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, | |||
1005 | 1042 | ||
1006 | /* UVD & VCE fw doesn't support user fences */ | 1043 | /* UVD & VCE fw doesn't support user fences */ |
1007 | if (parser->job->uf_addr && ( | 1044 | if (parser->job->uf_addr && ( |
1008 | parser->job->ring->funcs->type == AMDGPU_RING_TYPE_UVD || | 1045 | parser->ring->funcs->type == AMDGPU_RING_TYPE_UVD || |
1009 | parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) | 1046 | parser->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) |
1010 | return -EINVAL; | 1047 | return -EINVAL; |
1011 | 1048 | ||
1012 | return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx); | 1049 | return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->ring->idx); |
1013 | } | 1050 | } |
1014 | 1051 | ||
1015 | static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, | 1052 | static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, |
@@ -1160,31 +1197,30 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) | |||
1160 | static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, | 1197 | static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, |
1161 | union drm_amdgpu_cs *cs) | 1198 | union drm_amdgpu_cs *cs) |
1162 | { | 1199 | { |
1163 | struct amdgpu_ring *ring = p->job->ring; | 1200 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; |
1201 | struct amdgpu_ring *ring = p->ring; | ||
1164 | struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity; | 1202 | struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity; |
1203 | enum drm_sched_priority priority; | ||
1204 | struct amdgpu_bo_list_entry *e; | ||
1165 | struct amdgpu_job *job; | 1205 | struct amdgpu_job *job; |
1166 | unsigned i; | ||
1167 | uint64_t seq; | 1206 | uint64_t seq; |
1168 | 1207 | ||
1169 | int r; | 1208 | int r; |
1170 | 1209 | ||
1171 | amdgpu_mn_lock(p->mn); | 1210 | amdgpu_mn_lock(p->mn); |
1172 | if (p->bo_list) { | 1211 | amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { |
1173 | for (i = p->bo_list->first_userptr; | 1212 | struct amdgpu_bo *bo = e->robj; |
1174 | i < p->bo_list->num_entries; ++i) { | 1213 | |
1175 | struct amdgpu_bo *bo = p->bo_list->array[i].robj; | 1214 | if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { |
1176 | 1215 | amdgpu_mn_unlock(p->mn); | |
1177 | if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { | 1216 | return -ERESTARTSYS; |
1178 | amdgpu_mn_unlock(p->mn); | ||
1179 | return -ERESTARTSYS; | ||
1180 | } | ||
1181 | } | 1217 | } |
1182 | } | 1218 | } |
1183 | 1219 | ||
1184 | job = p->job; | 1220 | job = p->job; |
1185 | p->job = NULL; | 1221 | p->job = NULL; |
1186 | 1222 | ||
1187 | r = drm_sched_job_init(&job->base, &ring->sched, entity, p->filp); | 1223 | r = drm_sched_job_init(&job->base, entity, p->filp); |
1188 | if (r) { | 1224 | if (r) { |
1189 | amdgpu_job_free(job); | 1225 | amdgpu_job_free(job); |
1190 | amdgpu_mn_unlock(p->mn); | 1226 | amdgpu_mn_unlock(p->mn); |
@@ -1192,7 +1228,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, | |||
1192 | } | 1228 | } |
1193 | 1229 | ||
1194 | job->owner = p->filp; | 1230 | job->owner = p->filp; |
1195 | job->fence_ctx = entity->fence_context; | ||
1196 | p->fence = dma_fence_get(&job->base.s_fence->finished); | 1231 | p->fence = dma_fence_get(&job->base.s_fence->finished); |
1197 | 1232 | ||
1198 | r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq); | 1233 | r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq); |
@@ -1210,11 +1245,15 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, | |||
1210 | job->uf_sequence = seq; | 1245 | job->uf_sequence = seq; |
1211 | 1246 | ||
1212 | amdgpu_job_free_resources(job); | 1247 | amdgpu_job_free_resources(job); |
1213 | amdgpu_ring_priority_get(job->ring, job->base.s_priority); | ||
1214 | 1248 | ||
1215 | trace_amdgpu_cs_ioctl(job); | 1249 | trace_amdgpu_cs_ioctl(job); |
1250 | amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket); | ||
1251 | priority = job->base.s_priority; | ||
1216 | drm_sched_entity_push_job(&job->base, entity); | 1252 | drm_sched_entity_push_job(&job->base, entity); |
1217 | 1253 | ||
1254 | ring = to_amdgpu_ring(entity->rq->sched); | ||
1255 | amdgpu_ring_priority_get(ring, priority); | ||
1256 | |||
1218 | ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); | 1257 | ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); |
1219 | amdgpu_mn_unlock(p->mn); | 1258 | amdgpu_mn_unlock(p->mn); |
1220 | 1259 | ||
@@ -1605,7 +1644,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, | |||
1605 | 1644 | ||
1606 | if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { | 1645 | if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { |
1607 | (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; | 1646 | (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; |
1608 | amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains); | 1647 | amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); |
1609 | r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); | 1648 | r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); |
1610 | if (r) | 1649 | if (r) |
1611 | return r; | 1650 | return r; |