diff options
author | Christian König <christian.koenig@amd.com> | 2015-07-07 11:24:49 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2015-08-17 16:50:14 -0400 |
commit | 21c16bf634e62cf9673946f509b469e7f0953ecf (patch) | |
tree | 91595b4cd4064a7867bbdd32cb37fc090c7ec37d /drivers/gpu/drm/amd/amdgpu | |
parent | 91e1a5207edec9e4f888e44478a9a254186e0ba8 (diff) |
drm/amdgpu: add user fence context map v2
This is a prerequisite for the GPU scheduler to make the order
of submission independent from the order of execution.
v2: properly implement the locking
Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Jammy Zhou <Jammy.Zhou@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu.h | 16 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 60 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 60 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 4 |
4 files changed, 110 insertions, 30 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 70e783a849ed..0220d98ba8bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h | |||
@@ -415,6 +415,8 @@ struct amdgpu_user_fence { | |||
415 | struct amdgpu_bo *bo; | 415 | struct amdgpu_bo *bo; |
416 | /* write-back address offset to bo start */ | 416 | /* write-back address offset to bo start */ |
417 | uint32_t offset; | 417 | uint32_t offset; |
418 | /* resulting sequence number */ | ||
419 | uint64_t sequence; | ||
418 | }; | 420 | }; |
419 | 421 | ||
420 | int amdgpu_fence_driver_init(struct amdgpu_device *adev); | 422 | int amdgpu_fence_driver_init(struct amdgpu_device *adev); |
@@ -985,9 +987,18 @@ struct amdgpu_vm_manager { | |||
985 | * context related structures | 987 | * context related structures |
986 | */ | 988 | */ |
987 | 989 | ||
990 | #define AMDGPU_CTX_MAX_CS_PENDING 16 | ||
991 | |||
992 | struct amdgpu_ctx_ring { | ||
993 | uint64_t sequence; | ||
994 | struct fence *fences[AMDGPU_CTX_MAX_CS_PENDING]; | ||
995 | }; | ||
996 | |||
988 | struct amdgpu_ctx { | 997 | struct amdgpu_ctx { |
989 | struct kref refcount; | 998 | struct kref refcount; |
990 | unsigned reset_counter; | 999 | unsigned reset_counter; |
1000 | spinlock_t ring_lock; | ||
1001 | struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; | ||
991 | }; | 1002 | }; |
992 | 1003 | ||
993 | struct amdgpu_ctx_mgr { | 1004 | struct amdgpu_ctx_mgr { |
@@ -1007,6 +1018,11 @@ void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv); | |||
1007 | struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); | 1018 | struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); |
1008 | int amdgpu_ctx_put(struct amdgpu_ctx *ctx); | 1019 | int amdgpu_ctx_put(struct amdgpu_ctx *ctx); |
1009 | 1020 | ||
1021 | uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, | ||
1022 | struct fence *fence); | ||
1023 | struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, | ||
1024 | struct amdgpu_ring *ring, uint64_t seq); | ||
1025 | |||
1010 | int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, | 1026 | int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, |
1011 | struct drm_file *filp); | 1027 | struct drm_file *filp); |
1012 | 1028 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 53e6a10fe9f9..cef8360698be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | |||
@@ -698,9 +698,9 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, | |||
698 | sizeof(struct drm_amdgpu_cs_chunk_dep); | 698 | sizeof(struct drm_amdgpu_cs_chunk_dep); |
699 | 699 | ||
700 | for (j = 0; j < num_deps; ++j) { | 700 | for (j = 0; j < num_deps; ++j) { |
701 | struct amdgpu_fence *fence; | ||
702 | struct amdgpu_ring *ring; | 701 | struct amdgpu_ring *ring; |
703 | struct amdgpu_ctx *ctx; | 702 | struct amdgpu_ctx *ctx; |
703 | struct fence *fence; | ||
704 | 704 | ||
705 | r = amdgpu_cs_get_ring(adev, deps[j].ip_type, | 705 | r = amdgpu_cs_get_ring(adev, deps[j].ip_type, |
706 | deps[j].ip_instance, | 706 | deps[j].ip_instance, |
@@ -712,20 +712,20 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, | |||
712 | if (ctx == NULL) | 712 | if (ctx == NULL) |
713 | return -EINVAL; | 713 | return -EINVAL; |
714 | 714 | ||
715 | r = amdgpu_fence_recreate(ring, p->filp, | 715 | fence = amdgpu_ctx_get_fence(ctx, ring, |
716 | deps[j].handle, | 716 | deps[j].handle); |
717 | &fence); | 717 | if (IS_ERR(fence)) { |
718 | if (r) { | 718 | r = PTR_ERR(fence); |
719 | amdgpu_ctx_put(ctx); | 719 | amdgpu_ctx_put(ctx); |
720 | return r; | 720 | return r; |
721 | } | ||
722 | |||
723 | r = amdgpu_sync_fence(adev, &ib->sync, &fence->base); | ||
724 | amdgpu_fence_unref(&fence); | ||
725 | amdgpu_ctx_put(ctx); | ||
726 | 721 | ||
727 | if (r) | 722 | } else if (fence) { |
728 | return r; | 723 | r = amdgpu_sync_fence(adev, &ib->sync, fence); |
724 | fence_put(fence); | ||
725 | amdgpu_ctx_put(ctx); | ||
726 | if (r) | ||
727 | return r; | ||
728 | } | ||
729 | } | 729 | } |
730 | } | 730 | } |
731 | 731 | ||
@@ -773,8 +773,11 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | |||
773 | r = amdgpu_cs_ib_fill(adev, &parser); | 773 | r = amdgpu_cs_ib_fill(adev, &parser); |
774 | } | 774 | } |
775 | 775 | ||
776 | if (!r) | 776 | if (!r) { |
777 | r = amdgpu_cs_dependencies(adev, &parser); | 777 | r = amdgpu_cs_dependencies(adev, &parser); |
778 | if (r) | ||
779 | DRM_ERROR("Failed in the dependencies handling %d!\n", r); | ||
780 | } | ||
778 | 781 | ||
779 | if (r) { | 782 | if (r) { |
780 | amdgpu_cs_parser_fini(&parser, r, reserved_buffers); | 783 | amdgpu_cs_parser_fini(&parser, r, reserved_buffers); |
@@ -791,7 +794,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | |||
791 | goto out; | 794 | goto out; |
792 | } | 795 | } |
793 | 796 | ||
794 | cs->out.handle = parser.ibs[parser.num_ibs - 1].fence->seq; | 797 | cs->out.handle = parser.uf.sequence; |
795 | out: | 798 | out: |
796 | amdgpu_cs_parser_fini(&parser, r, true); | 799 | amdgpu_cs_parser_fini(&parser, r, true); |
797 | up_read(&adev->exclusive_lock); | 800 | up_read(&adev->exclusive_lock); |
@@ -814,30 +817,31 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, | |||
814 | union drm_amdgpu_wait_cs *wait = data; | 817 | union drm_amdgpu_wait_cs *wait = data; |
815 | struct amdgpu_device *adev = dev->dev_private; | 818 | struct amdgpu_device *adev = dev->dev_private; |
816 | unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); | 819 | unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); |
817 | struct amdgpu_fence *fence = NULL; | ||
818 | struct amdgpu_ring *ring = NULL; | 820 | struct amdgpu_ring *ring = NULL; |
819 | struct amdgpu_ctx *ctx; | 821 | struct amdgpu_ctx *ctx; |
822 | struct fence *fence; | ||
820 | long r; | 823 | long r; |
821 | 824 | ||
825 | r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, | ||
826 | wait->in.ring, &ring); | ||
827 | if (r) | ||
828 | return r; | ||
829 | |||
822 | ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); | 830 | ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); |
823 | if (ctx == NULL) | 831 | if (ctx == NULL) |
824 | return -EINVAL; | 832 | return -EINVAL; |
825 | 833 | ||
826 | r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, | 834 | fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); |
827 | wait->in.ring, &ring); | 835 | if (IS_ERR(fence)) |
828 | if (r) { | 836 | r = PTR_ERR(fence); |
829 | amdgpu_ctx_put(ctx); | ||
830 | return r; | ||
831 | } | ||
832 | 837 | ||
833 | r = amdgpu_fence_recreate(ring, filp, wait->in.handle, &fence); | 838 | else if (fence) { |
834 | if (r) { | 839 | r = fence_wait_timeout(fence, true, timeout); |
835 | amdgpu_ctx_put(ctx); | 840 | fence_put(fence); |
836 | return r; | 841 | |
837 | } | 842 | } else |
843 | r = 1; | ||
838 | 844 | ||
839 | r = fence_wait_timeout(&fence->base, true, timeout); | ||
840 | amdgpu_fence_unref(&fence); | ||
841 | amdgpu_ctx_put(ctx); | 845 | amdgpu_ctx_put(ctx); |
842 | if (r < 0) | 846 | if (r < 0) |
843 | return r; | 847 | return r; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index e63cfb7fa390..c23bfd8fe414 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | |||
@@ -28,17 +28,22 @@ | |||
28 | static void amdgpu_ctx_do_release(struct kref *ref) | 28 | static void amdgpu_ctx_do_release(struct kref *ref) |
29 | { | 29 | { |
30 | struct amdgpu_ctx *ctx; | 30 | struct amdgpu_ctx *ctx; |
31 | unsigned i, j; | ||
31 | 32 | ||
32 | ctx = container_of(ref, struct amdgpu_ctx, refcount); | 33 | ctx = container_of(ref, struct amdgpu_ctx, refcount); |
34 | |||
35 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) | ||
36 | for (j = 0; j < AMDGPU_CTX_MAX_CS_PENDING; ++j) | ||
37 | fence_put(ctx->rings[i].fences[j]); | ||
33 | kfree(ctx); | 38 | kfree(ctx); |
34 | } | 39 | } |
35 | 40 | ||
36 | int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, | 41 | int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, |
37 | uint32_t *id) | 42 | uint32_t *id) |
38 | { | 43 | { |
39 | int r; | ||
40 | struct amdgpu_ctx *ctx; | 44 | struct amdgpu_ctx *ctx; |
41 | struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; | 45 | struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; |
46 | int i, r; | ||
42 | 47 | ||
43 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); | 48 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); |
44 | if (!ctx) | 49 | if (!ctx) |
@@ -55,6 +60,9 @@ int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, | |||
55 | 60 | ||
56 | memset(ctx, 0, sizeof(*ctx)); | 61 | memset(ctx, 0, sizeof(*ctx)); |
57 | kref_init(&ctx->refcount); | 62 | kref_init(&ctx->refcount); |
63 | spin_lock_init(&ctx->ring_lock); | ||
64 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) | ||
65 | ctx->rings[i].sequence = 1; | ||
58 | mutex_unlock(&mgr->lock); | 66 | mutex_unlock(&mgr->lock); |
59 | 67 | ||
60 | return 0; | 68 | return 0; |
@@ -177,3 +185,53 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) | |||
177 | kref_put(&ctx->refcount, amdgpu_ctx_do_release); | 185 | kref_put(&ctx->refcount, amdgpu_ctx_do_release); |
178 | return 0; | 186 | return 0; |
179 | } | 187 | } |
188 | |||
189 | uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, | ||
190 | struct fence *fence) | ||
191 | { | ||
192 | struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; | ||
193 | uint64_t seq = cring->sequence; | ||
194 | unsigned idx = seq % AMDGPU_CTX_MAX_CS_PENDING; | ||
195 | struct fence *other = cring->fences[idx]; | ||
196 | |||
197 | if (other) { | ||
198 | signed long r; | ||
199 | r = fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT); | ||
200 | if (r < 0) | ||
201 | DRM_ERROR("Error (%ld) waiting for fence!\n", r); | ||
202 | } | ||
203 | |||
204 | fence_get(fence); | ||
205 | |||
206 | spin_lock(&ctx->ring_lock); | ||
207 | cring->fences[idx] = fence; | ||
208 | cring->sequence++; | ||
209 | spin_unlock(&ctx->ring_lock); | ||
210 | |||
211 | fence_put(other); | ||
212 | |||
213 | return seq; | ||
214 | } | ||
215 | |||
216 | struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, | ||
217 | struct amdgpu_ring *ring, uint64_t seq) | ||
218 | { | ||
219 | struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; | ||
220 | struct fence *fence; | ||
221 | |||
222 | spin_lock(&ctx->ring_lock); | ||
223 | if (seq >= cring->sequence) { | ||
224 | spin_unlock(&ctx->ring_lock); | ||
225 | return ERR_PTR(-EINVAL); | ||
226 | } | ||
227 | |||
228 | if (seq < cring->sequence - AMDGPU_CTX_MAX_CS_PENDING) { | ||
229 | spin_unlock(&ctx->ring_lock); | ||
230 | return NULL; | ||
231 | } | ||
232 | |||
233 | fence = fence_get(cring->fences[seq % AMDGPU_CTX_MAX_CS_PENDING]); | ||
234 | spin_unlock(&ctx->ring_lock); | ||
235 | |||
236 | return fence; | ||
237 | } | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 2722815eddbb..95d533422a5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | |||
@@ -219,8 +219,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, | |||
219 | /* wrap the last IB with fence */ | 219 | /* wrap the last IB with fence */ |
220 | if (ib->user) { | 220 | if (ib->user) { |
221 | uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo); | 221 | uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo); |
222 | ib->user->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, | ||
223 | &ib->fence->base); | ||
222 | addr += ib->user->offset; | 224 | addr += ib->user->offset; |
223 | amdgpu_ring_emit_fence(ring, addr, ib->fence->seq, | 225 | amdgpu_ring_emit_fence(ring, addr, ib->user->sequence, |
224 | AMDGPU_FENCE_FLAG_64BIT); | 226 | AMDGPU_FENCE_FLAG_64BIT); |
225 | } | 227 | } |
226 | 228 | ||