diff options
author | Chunming Zhou <david1.zhou@amd.com> | 2015-07-21 02:36:51 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2015-08-17 16:50:34 -0400 |
commit | 049fc527b4641f99e573b26f1a726a3eadd0cc25 (patch) | |
tree | 7e204a188fdb55c02a0eb1fd7fe39f1e63fd9597 /drivers/gpu/drm/amd | |
parent | 372bc1e18ca961ef51997df235e822aed6283726 (diff) |
drm/amdgpu: dispatch jobs in cs
BO validation is moved to scheduler except usrptr which must be validated
in user process
Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
Acked-by: Christian K?nig <christian.koenig@amd.com>
Reviewed-by: Jammy Zhou <Jammy.Zhou@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 256 |
2 files changed, 200 insertions, 57 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index cfc6c786b2f2..becb26317467 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h | |||
@@ -1243,6 +1243,7 @@ struct amdgpu_cs_parser { | |||
1243 | struct work_struct job_work; | 1243 | struct work_struct job_work; |
1244 | int (*prepare_job)(struct amdgpu_cs_parser *sched_job); | 1244 | int (*prepare_job)(struct amdgpu_cs_parser *sched_job); |
1245 | int (*run_job)(struct amdgpu_cs_parser *sched_job); | 1245 | int (*run_job)(struct amdgpu_cs_parser *sched_job); |
1246 | int (*free_job)(struct amdgpu_cs_parser *sched_job); | ||
1246 | }; | 1247 | }; |
1247 | 1248 | ||
1248 | static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx) | 1249 | static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx) |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index bc0a70415485..f9d4fe985668 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | |||
@@ -41,6 +41,11 @@ struct amdgpu_cs_buckets { | |||
41 | struct list_head bucket[AMDGPU_CS_NUM_BUCKETS]; | 41 | struct list_head bucket[AMDGPU_CS_NUM_BUCKETS]; |
42 | }; | 42 | }; |
43 | 43 | ||
44 | static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, | ||
45 | int error, bool backoff); | ||
46 | static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int error, bool backoff); | ||
47 | static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser); | ||
48 | |||
44 | static void amdgpu_cs_buckets_init(struct amdgpu_cs_buckets *b) | 49 | static void amdgpu_cs_buckets_init(struct amdgpu_cs_buckets *b) |
45 | { | 50 | { |
46 | unsigned i; | 51 | unsigned i; |
@@ -126,12 +131,52 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, | |||
126 | return 0; | 131 | return 0; |
127 | } | 132 | } |
128 | 133 | ||
134 | static void amdgpu_job_work_func(struct work_struct *work) | ||
135 | { | ||
136 | struct amdgpu_cs_parser *sched_job = | ||
137 | container_of(work, struct amdgpu_cs_parser, | ||
138 | job_work); | ||
139 | mutex_lock(&sched_job->job_lock); | ||
140 | sched_job->free_job(sched_job); | ||
141 | mutex_unlock(&sched_job->job_lock); | ||
142 | /* after processing job, free memory */ | ||
143 | kfree(sched_job); | ||
144 | } | ||
145 | struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev, | ||
146 | struct drm_file *filp, | ||
147 | struct amdgpu_ctx *ctx, | ||
148 | struct amdgpu_ib *ibs, | ||
149 | uint32_t num_ibs) | ||
150 | { | ||
151 | struct amdgpu_cs_parser *parser; | ||
152 | int i; | ||
153 | |||
154 | parser = kzalloc(sizeof(struct amdgpu_cs_parser), GFP_KERNEL); | ||
155 | if (!parser) | ||
156 | return NULL; | ||
157 | |||
158 | parser->adev = adev; | ||
159 | parser->filp = filp; | ||
160 | parser->ctx = ctx; | ||
161 | parser->ibs = ibs; | ||
162 | parser->num_ibs = num_ibs; | ||
163 | if (amdgpu_enable_scheduler) { | ||
164 | mutex_init(&parser->job_lock); | ||
165 | INIT_WORK(&parser->job_work, amdgpu_job_work_func); | ||
166 | } | ||
167 | for (i = 0; i < num_ibs; i++) | ||
168 | ibs[i].ctx = ctx; | ||
169 | |||
170 | return parser; | ||
171 | } | ||
172 | |||
129 | int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | 173 | int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) |
130 | { | 174 | { |
131 | union drm_amdgpu_cs *cs = data; | 175 | union drm_amdgpu_cs *cs = data; |
132 | uint64_t *chunk_array_user; | 176 | uint64_t *chunk_array_user; |
133 | uint64_t *chunk_array = NULL; | 177 | uint64_t *chunk_array = NULL; |
134 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; | 178 | struct amdgpu_fpriv *fpriv = p->filp->driver_priv; |
179 | struct amdgpu_bo_list *bo_list = NULL; | ||
135 | unsigned size, i; | 180 | unsigned size, i; |
136 | int r = 0; | 181 | int r = 0; |
137 | 182 | ||
@@ -143,7 +188,17 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | |||
143 | r = -EINVAL; | 188 | r = -EINVAL; |
144 | goto out; | 189 | goto out; |
145 | } | 190 | } |
146 | p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); | 191 | bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); |
192 | if (bo_list && !bo_list->has_userptr) { | ||
193 | p->bo_list = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL); | ||
194 | if (!p->bo_list) | ||
195 | return -ENOMEM; | ||
196 | amdgpu_bo_list_copy(p->adev, p->bo_list, bo_list); | ||
197 | amdgpu_bo_list_put(bo_list); | ||
198 | } else if (bo_list && bo_list->has_userptr) | ||
199 | p->bo_list = bo_list; | ||
200 | else | ||
201 | p->bo_list = NULL; | ||
147 | 202 | ||
148 | /* get chunks */ | 203 | /* get chunks */ |
149 | INIT_LIST_HEAD(&p->validated); | 204 | INIT_LIST_HEAD(&p->validated); |
@@ -424,8 +479,26 @@ static int cmp_size_smaller_first(void *priv, struct list_head *a, | |||
424 | **/ | 479 | **/ |
425 | static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff) | 480 | static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff) |
426 | { | 481 | { |
427 | unsigned i; | 482 | amdgpu_cs_parser_fini_early(parser, error, backoff); |
483 | amdgpu_cs_parser_fini_late(parser); | ||
484 | } | ||
428 | 485 | ||
486 | static int amdgpu_cs_parser_run_job( | ||
487 | struct amdgpu_cs_parser *sched_job) | ||
488 | { | ||
489 | amdgpu_cs_parser_fini_early(sched_job, 0, true); | ||
490 | return 0; | ||
491 | } | ||
492 | |||
493 | static int amdgpu_cs_parser_free_job( | ||
494 | struct amdgpu_cs_parser *sched_job) | ||
495 | { | ||
496 | amdgpu_cs_parser_fini_late(sched_job); | ||
497 | return 0; | ||
498 | } | ||
499 | |||
500 | static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int error, bool backoff) | ||
501 | { | ||
429 | if (!error) { | 502 | if (!error) { |
430 | /* Sort the buffer list from the smallest to largest buffer, | 503 | /* Sort the buffer list from the smallest to largest buffer, |
431 | * which affects the order of buffers in the LRU list. | 504 | * which affects the order of buffers in the LRU list. |
@@ -446,11 +519,19 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo | |||
446 | ttm_eu_backoff_reservation(&parser->ticket, | 519 | ttm_eu_backoff_reservation(&parser->ticket, |
447 | &parser->validated); | 520 | &parser->validated); |
448 | } | 521 | } |
522 | } | ||
449 | 523 | ||
524 | static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser) | ||
525 | { | ||
526 | unsigned i; | ||
450 | if (parser->ctx) | 527 | if (parser->ctx) |
451 | amdgpu_ctx_put(parser->ctx); | 528 | amdgpu_ctx_put(parser->ctx); |
452 | if (parser->bo_list) | 529 | if (parser->bo_list) { |
453 | amdgpu_bo_list_put(parser->bo_list); | 530 | if (!parser->bo_list->has_userptr) |
531 | amdgpu_bo_list_free(parser->bo_list); | ||
532 | else | ||
533 | amdgpu_bo_list_put(parser->bo_list); | ||
534 | } | ||
454 | drm_free_large(parser->vm_bos); | 535 | drm_free_large(parser->vm_bos); |
455 | for (i = 0; i < parser->nchunks; i++) | 536 | for (i = 0; i < parser->nchunks; i++) |
456 | drm_free_large(parser->chunks[i].kdata); | 537 | drm_free_large(parser->chunks[i].kdata); |
@@ -461,6 +542,9 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo | |||
461 | kfree(parser->ibs); | 542 | kfree(parser->ibs); |
462 | if (parser->uf.bo) | 543 | if (parser->uf.bo) |
463 | drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base); | 544 | drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base); |
545 | |||
546 | if (!amdgpu_enable_scheduler) | ||
547 | kfree(parser); | ||
464 | } | 548 | } |
465 | 549 | ||
466 | static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, | 550 | static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, |
@@ -533,9 +617,9 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, | |||
533 | goto out; | 617 | goto out; |
534 | } | 618 | } |
535 | amdgpu_cs_sync_rings(parser); | 619 | amdgpu_cs_sync_rings(parser); |
536 | 620 | if (!amdgpu_enable_scheduler) | |
537 | r = amdgpu_ib_schedule(adev, parser->num_ibs, parser->ibs, | 621 | r = amdgpu_ib_schedule(adev, parser->num_ibs, parser->ibs, |
538 | parser->filp); | 622 | parser->filp); |
539 | 623 | ||
540 | out: | 624 | out: |
541 | mutex_unlock(&vm->mutex); | 625 | mutex_unlock(&vm->mutex); |
@@ -731,35 +815,16 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, | |||
731 | return 0; | 815 | return 0; |
732 | } | 816 | } |
733 | 817 | ||
734 | int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | 818 | static int amdgpu_cs_parser_prepare_job(struct amdgpu_cs_parser *sched_job) |
735 | { | 819 | { |
736 | struct amdgpu_device *adev = dev->dev_private; | 820 | int r, i; |
737 | union drm_amdgpu_cs *cs = data; | 821 | struct amdgpu_cs_parser *parser = sched_job; |
738 | struct amdgpu_cs_parser parser; | 822 | struct amdgpu_device *adev = sched_job->adev; |
739 | int r, i; | 823 | bool reserved_buffers = false; |
740 | bool reserved_buffers = false; | 824 | |
741 | 825 | r = amdgpu_cs_parser_relocs(parser); | |
742 | down_read(&adev->exclusive_lock); | 826 | if (r) { |
743 | if (!adev->accel_working) { | 827 | if (r != -ERESTARTSYS) { |
744 | up_read(&adev->exclusive_lock); | ||
745 | return -EBUSY; | ||
746 | } | ||
747 | /* initialize parser */ | ||
748 | memset(&parser, 0, sizeof(struct amdgpu_cs_parser)); | ||
749 | parser.filp = filp; | ||
750 | parser.adev = adev; | ||
751 | r = amdgpu_cs_parser_init(&parser, data); | ||
752 | if (r) { | ||
753 | DRM_ERROR("Failed to initialize parser !\n"); | ||
754 | amdgpu_cs_parser_fini(&parser, r, false); | ||
755 | up_read(&adev->exclusive_lock); | ||
756 | r = amdgpu_cs_handle_lockup(adev, r); | ||
757 | return r; | ||
758 | } | ||
759 | |||
760 | r = amdgpu_cs_parser_relocs(&parser); | ||
761 | if (r) { | ||
762 | if (r != -ERESTARTSYS) { | ||
763 | if (r == -ENOMEM) | 828 | if (r == -ENOMEM) |
764 | DRM_ERROR("Not enough memory for command submission!\n"); | 829 | DRM_ERROR("Not enough memory for command submission!\n"); |
765 | else | 830 | else |
@@ -769,33 +834,104 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | |||
769 | 834 | ||
770 | if (!r) { | 835 | if (!r) { |
771 | reserved_buffers = true; | 836 | reserved_buffers = true; |
772 | r = amdgpu_cs_ib_fill(adev, &parser); | 837 | r = amdgpu_cs_ib_fill(adev, parser); |
773 | } | 838 | } |
774 | |||
775 | if (!r) { | 839 | if (!r) { |
776 | r = amdgpu_cs_dependencies(adev, &parser); | 840 | r = amdgpu_cs_dependencies(adev, parser); |
777 | if (r) | 841 | if (r) |
778 | DRM_ERROR("Failed in the dependencies handling %d!\n", r); | 842 | DRM_ERROR("Failed in the dependencies handling %d!\n", r); |
779 | } | 843 | } |
844 | if (r) { | ||
845 | amdgpu_cs_parser_fini(parser, r, reserved_buffers); | ||
846 | return r; | ||
847 | } | ||
848 | |||
849 | for (i = 0; i < parser->num_ibs; i++) | ||
850 | trace_amdgpu_cs(parser, i); | ||
851 | |||
852 | r = amdgpu_cs_ib_vm_chunk(adev, parser); | ||
853 | return r; | ||
854 | } | ||
855 | |||
856 | static struct amdgpu_ring *amdgpu_cs_parser_get_ring( | ||
857 | struct amdgpu_device *adev, | ||
858 | struct amdgpu_cs_parser *parser) | ||
859 | { | ||
860 | int i, r; | ||
861 | |||
862 | struct amdgpu_cs_chunk *chunk; | ||
863 | struct drm_amdgpu_cs_chunk_ib *chunk_ib; | ||
864 | struct amdgpu_ring *ring; | ||
865 | for (i = 0; i < parser->nchunks; i++) { | ||
866 | chunk = &parser->chunks[i]; | ||
867 | chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; | ||
868 | |||
869 | if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) | ||
870 | continue; | ||
871 | |||
872 | r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type, | ||
873 | chunk_ib->ip_instance, chunk_ib->ring, | ||
874 | &ring); | ||
875 | if (r) | ||
876 | return NULL; | ||
877 | break; | ||
878 | } | ||
879 | return ring; | ||
880 | } | ||
881 | |||
882 | int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | ||
883 | { | ||
884 | struct amdgpu_device *adev = dev->dev_private; | ||
885 | union drm_amdgpu_cs *cs = data; | ||
886 | struct amdgpu_cs_parser *parser; | ||
887 | int r; | ||
888 | |||
889 | down_read(&adev->exclusive_lock); | ||
890 | if (!adev->accel_working) { | ||
891 | up_read(&adev->exclusive_lock); | ||
892 | return -EBUSY; | ||
893 | } | ||
780 | 894 | ||
895 | parser = amdgpu_cs_parser_create(adev, filp, NULL, NULL, 0); | ||
896 | if (!parser) | ||
897 | return -ENOMEM; | ||
898 | r = amdgpu_cs_parser_init(parser, data); | ||
781 | if (r) { | 899 | if (r) { |
782 | amdgpu_cs_parser_fini(&parser, r, reserved_buffers); | 900 | DRM_ERROR("Failed to initialize parser !\n"); |
901 | amdgpu_cs_parser_fini(parser, r, false); | ||
783 | up_read(&adev->exclusive_lock); | 902 | up_read(&adev->exclusive_lock); |
784 | r = amdgpu_cs_handle_lockup(adev, r); | 903 | r = amdgpu_cs_handle_lockup(adev, r); |
785 | return r; | 904 | return r; |
786 | } | 905 | } |
787 | 906 | ||
788 | for (i = 0; i < parser.num_ibs; i++) | 907 | if (amdgpu_enable_scheduler && parser->num_ibs) { |
789 | trace_amdgpu_cs(&parser, i); | 908 | struct amdgpu_ring * ring = |
790 | 909 | amdgpu_cs_parser_get_ring(adev, parser); | |
791 | r = amdgpu_cs_ib_vm_chunk(adev, &parser); | 910 | parser->uf.sequence = atomic64_inc_return( |
792 | if (r) { | 911 | &parser->ctx->rings[ring->idx].c_entity.last_queued_v_seq); |
793 | goto out; | 912 | if ((parser->bo_list && parser->bo_list->has_userptr)) { |
913 | r = amdgpu_cs_parser_prepare_job(parser); | ||
914 | if (r) | ||
915 | goto out; | ||
916 | } else | ||
917 | parser->prepare_job = amdgpu_cs_parser_prepare_job; | ||
918 | |||
919 | parser->run_job = amdgpu_cs_parser_run_job; | ||
920 | parser->free_job = amdgpu_cs_parser_free_job; | ||
921 | amd_sched_push_job(ring->scheduler, | ||
922 | &parser->ctx->rings[ring->idx].c_entity, | ||
923 | parser); | ||
924 | cs->out.handle = parser->uf.sequence; | ||
925 | up_read(&adev->exclusive_lock); | ||
926 | return 0; | ||
794 | } | 927 | } |
928 | r = amdgpu_cs_parser_prepare_job(parser); | ||
929 | if (r) | ||
930 | goto out; | ||
795 | 931 | ||
796 | cs->out.handle = parser.ibs[parser.num_ibs - 1].sequence; | 932 | cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence; |
797 | out: | 933 | out: |
798 | amdgpu_cs_parser_fini(&parser, r, true); | 934 | amdgpu_cs_parser_fini(parser, r, true); |
799 | up_read(&adev->exclusive_lock); | 935 | up_read(&adev->exclusive_lock); |
800 | r = amdgpu_cs_handle_lockup(adev, r); | 936 | r = amdgpu_cs_handle_lockup(adev, r); |
801 | return r; | 937 | return r; |
@@ -829,18 +965,24 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, | |||
829 | ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); | 965 | ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); |
830 | if (ctx == NULL) | 966 | if (ctx == NULL) |
831 | return -EINVAL; | 967 | return -EINVAL; |
832 | 968 | if (amdgpu_enable_scheduler) { | |
833 | fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); | 969 | r = amd_sched_wait_ts(&ctx->rings[ring->idx].c_entity, |
834 | if (IS_ERR(fence)) | 970 | wait->in.handle, true, timeout); |
835 | r = PTR_ERR(fence); | 971 | if (r) |
836 | 972 | return r; | |
837 | else if (fence) { | ||
838 | r = fence_wait_timeout(fence, true, timeout); | ||
839 | fence_put(fence); | ||
840 | |||
841 | } else | ||
842 | r = 1; | 973 | r = 1; |
974 | } else { | ||
975 | fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); | ||
976 | if (IS_ERR(fence)) | ||
977 | r = PTR_ERR(fence); | ||
843 | 978 | ||
979 | else if (fence) { | ||
980 | r = fence_wait_timeout(fence, true, timeout); | ||
981 | fence_put(fence); | ||
982 | |||
983 | } else | ||
984 | r = 1; | ||
985 | } | ||
844 | amdgpu_ctx_put(ctx); | 986 | amdgpu_ctx_put(ctx); |
845 | if (r < 0) | 987 | if (r < 0) |
846 | return r; | 988 | return r; |