drm/amdgpu: rework ctx entity creation

Use a fixed number of entities for each hardware IP. The number of compute entities is reduced to four, SDMA keeps it two entities and all other engines just expose one entity. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Chunming Zhou <david1.zhou@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
author: Christian König <christian.koenig@amd.com> 2018-08-01 10:00:52 -0400
committer: Alex Deucher <alexander.deucher@amd.com> 2018-08-27 12:11:11 -0400
commit: 1b1f2fecb699bb4ccc3cb2fafe92950e9bdb39de (patch)
tree: 076c814bb4d51ba7bd3283a95cddbe10ac112352 /drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
parent: a245daf3d7a143fb2df16485ad200aa3298eac8c (diff)
1 files changed, 149 insertions, 142 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 0a6cd1202ee5..987b7f256463 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -27,8 +27,29 @@
 #include "amdgpu.h"
 #include "amdgpu_sched.h"
-#define to_amdgpu_ctx_ring(e)   \
+#define to_amdgpu_ctx_entity(e) \
-        container_of((e), struct amdgpu_ctx_ring, entity)
+        container_of((e), struct amdgpu_ctx_entity, entity)
+const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
+        [AMDGPU_HW_IP_GFX]      =       1,
+        [AMDGPU_HW_IP_COMPUTE]  =       4,
+        [AMDGPU_HW_IP_DMA]      =       2,
+        [AMDGPU_HW_IP_UVD]      =       1,
+        [AMDGPU_HW_IP_VCE]      =       1,
+        [AMDGPU_HW_IP_UVD_ENC]  =       1,
+        [AMDGPU_HW_IP_VCN_DEC]  =       1,
+        [AMDGPU_HW_IP_VCN_ENC]  =       1,
+};
+static int amdgput_ctx_total_num_entities(void)
+{
+        unsigned i, num_entities = 0;
+        for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+                num_entities += amdgpu_ctx_num_entities[i];
+        return num_entities;
+}
 static int amdgpu_ctx_priority_permit(struct drm_file *filp,
                                      enum drm_sched_priority priority)
@@ -51,9 +72,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
                           struct drm_file *filp,
                           struct amdgpu_ctx *ctx)
 {
-        struct drm_sched_rq *sdma_rqs[AMDGPU_MAX_RINGS];
+        unsigned num_entities = amdgput_ctx_total_num_entities();
-        struct drm_sched_rq *comp_rqs[AMDGPU_MAX_RINGS];
+        unsigned i, j;
-        unsigned i, j, num_sdma_rqs, num_comp_rqs;
        int r;
        if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
@@ -65,19 +85,33 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
        memset(ctx, 0, sizeof(*ctx));
        ctx->adev = adev;
-        kref_init(&ctx->refcount);
-        spin_lock_init(&ctx->ring_lock);
+        ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities,
-        ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
                              sizeof(struct dma_fence*), GFP_KERNEL);
        if (!ctx->fences)
                return -ENOMEM;
-        mutex_init(&ctx->lock);
+        ctx->entities[0] = kcalloc(num_entities,
+                                   sizeof(struct amdgpu_ctx_entity),
+                                   GFP_KERNEL);
+        if (!ctx->entities[0]) {
+                r = -ENOMEM;
+                goto error_free_fences;
+        }
-        for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+        for (i = 0; i < num_entities; ++i) {
-                ctx->rings[i].sequence = 1;
+                struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
-                ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
+                entity->sequence = 1;
+                entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
        }
+        for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
+                ctx->entities[i] = ctx->entities[i - 1] +
+                        amdgpu_ctx_num_entities[i - 1];
+        kref_init(&ctx->refcount);
+        spin_lock_init(&ctx->ring_lock);
+        mutex_init(&ctx->lock);
        ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
        ctx->reset_counter_query = ctx->reset_counter;
@@ -85,50 +119,70 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
        ctx->init_priority = priority;
        ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
-        num_sdma_rqs = 0;
+        for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-        num_comp_rqs = 0;
+                struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
-        for (i = 0; i < adev->num_rings; i++) {
+                struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
-                struct amdgpu_ring *ring = adev->rings[i];
+                unsigned num_rings;
-                struct drm_sched_rq *rq;
+                switch (i) {
-                rq = &ring->sched.sched_rq[priority];
+                case AMDGPU_HW_IP_GFX:
-                if (ring->funcs->type == AMDGPU_RING_TYPE_SDMA)
+                        rings[0] = &adev->gfx.gfx_ring[0];
-                        sdma_rqs[num_sdma_rqs++] = rq;
+                        num_rings = 1;
-                else if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
+                        break;
-                        comp_rqs[num_comp_rqs++] = rq;
+                case AMDGPU_HW_IP_COMPUTE:
-        }
+                        for (j = 0; j < adev->gfx.num_compute_rings; ++j)
+                                rings[j] = &adev->gfx.compute_ring[j];
-        /* create context entity for each ring */
+                        num_rings = adev->gfx.num_compute_rings;
-        for (i = 0; i < adev->num_rings; i++) {
+                        break;
-                struct amdgpu_ring *ring = adev->rings[i];
+                case AMDGPU_HW_IP_DMA:
+                        for (j = 0; j < adev->sdma.num_instances; ++j)
+                                rings[j] = &adev->sdma.instance[j].ring;
+                        num_rings = adev->sdma.num_instances;
+                        break;
+                case AMDGPU_HW_IP_UVD:
+                        rings[0] = &adev->uvd.inst[0].ring;
+                        num_rings = 1;
+                        break;
+                case AMDGPU_HW_IP_VCE:
+                        rings[0] = &adev->vce.ring[0];
+                        num_rings = 1;
+                        break;
+                case AMDGPU_HW_IP_UVD_ENC:
+                        rings[0] = &adev->uvd.inst[0].ring_enc[0];
+                        num_rings = 1;
+                        break;
+                case AMDGPU_HW_IP_VCN_DEC:
+                        rings[0] = &adev->vcn.ring_dec;
+                        num_rings = 1;
+                        break;
+                case AMDGPU_HW_IP_VCN_ENC:
+                        rings[0] = &adev->vcn.ring_enc[0];
+                        num_rings = 1;
+                        break;
+                case AMDGPU_HW_IP_VCN_JPEG:
+                        rings[0] = &adev->vcn.ring_jpeg;
+                        num_rings = 1;
+                        break;
+                }
-                if (ring == &adev->gfx.kiq.ring)
+                for (j = 0; j < num_rings; ++j)
-                        continue;
+                        rqs[j] = &rings[j]->sched.sched_rq[priority];
-                if (ring->funcs->type == AMDGPU_RING_TYPE_SDMA) {
+                for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
-                        r = drm_sched_entity_init(&ctx->rings[i].entity,
+                        r = drm_sched_entity_init(&ctx->entities[i][j].entity,
-                                                  sdma_rqs, num_sdma_rqs,
+                                                  rqs, num_rings, &ctx->guilty);
-                                                  &ctx->guilty);
-                } else if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
-                        r = drm_sched_entity_init(&ctx->rings[i].entity,
-                                                  comp_rqs, num_comp_rqs,
-                                                  &ctx->guilty);
-                } else {
-                        struct drm_sched_rq *rq;
-                        rq = &ring->sched.sched_rq[priority];
-                        r = drm_sched_entity_init(&ctx->rings[i].entity,
-                                                  &rq, 1, &ctx->guilty);
-                }
                if (r)
-                        goto failed;
+                        goto error_cleanup_entities;
        }
        return 0;
-failed:
+error_cleanup_entities:
-        for (j = 0; j < i; j++)
+        for (i = 0; i < num_entities; ++i)
-                drm_sched_entity_destroy(&ctx->rings[j].entity);
+                drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+        kfree(ctx->entities[0]);
+error_free_fences:
        kfree(ctx->fences);
        ctx->fences = NULL;
        return r;
@@ -137,17 +191,18 @@ failed:
 static void amdgpu_ctx_fini(struct kref *ref)
 {
        struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
+        unsigned num_entities = amdgput_ctx_total_num_entities();
        struct amdgpu_device *adev = ctx->adev;
        unsigned i, j;
        if (!adev)
                return;
-        for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
+        for (i = 0; i < num_entities; ++i)
                for (j = 0; j < amdgpu_sched_jobs; ++j)
-                        dma_fence_put(ctx->rings[i].fences[j]);
+                        dma_fence_put(ctx->entities[0][i].fences[j]);
        kfree(ctx->fences);
-        ctx->fences = NULL;
+        kfree(ctx->entities[0]);
        mutex_destroy(&ctx->lock);
@@ -157,9 +212,10 @@ static void amdgpu_ctx_fini(struct kref *ref)
 int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
                          u32 ring, struct drm_sched_entity **entity)
 {
-        struct amdgpu_device *adev = ctx->adev;
+        if (hw_ip >= AMDGPU_HW_IP_NUM) {
-        unsigned num_rings = 0;
+                DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
-        struct amdgpu_ring *out_ring;
+                return -EINVAL;
+        }
        /* Right now all IPs have only one instance - multiple rings. */
        if (instance != 0) {
@@ -167,52 +223,12 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
                return -EINVAL;
        }
-        switch (hw_ip) {
+        if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
-        case AMDGPU_HW_IP_GFX:
+                DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
-                out_ring = &adev->gfx.gfx_ring[ring];
-                num_rings = adev->gfx.num_gfx_rings;
-                break;
-        case AMDGPU_HW_IP_COMPUTE:
-                out_ring = &adev->gfx.compute_ring[ring];
-                num_rings = adev->gfx.num_compute_rings;
-                break;
-        case AMDGPU_HW_IP_DMA:
-                out_ring = &adev->sdma.instance[ring].ring;
-                num_rings = adev->sdma.num_instances;
-                break;
-        case AMDGPU_HW_IP_UVD:
-                out_ring = &adev->uvd.inst[0].ring;
-                num_rings = adev->uvd.num_uvd_inst;
-                break;
-        case AMDGPU_HW_IP_VCE:
-                out_ring = &adev->vce.ring[ring];
-                num_rings = adev->vce.num_rings;
-                break;
-        case AMDGPU_HW_IP_UVD_ENC:
-                out_ring = &adev->uvd.inst[0].ring_enc[ring];
-                num_rings = adev->uvd.num_enc_rings;
-                break;
-        case AMDGPU_HW_IP_VCN_DEC:
-                out_ring = &adev->vcn.ring_dec;
-                num_rings = 1;
-                break;
-        case AMDGPU_HW_IP_VCN_ENC:
-                out_ring = &adev->vcn.ring_enc[ring];
-                num_rings = adev->vcn.num_enc_rings;
-                break;
-        case AMDGPU_HW_IP_VCN_JPEG:
-                out_ring = &adev->vcn.ring_jpeg;
-                num_rings = 1;
-                break;
-        default:
-                DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
                return -EINVAL;
        }
-        if (ring > num_rings)
+        *entity = &ctx->entities[hw_ip][ring].entity;
-                return -EINVAL;
-        *entity = &ctx->rings[out_ring->idx].entity;
        return 0;
 }
@@ -252,17 +268,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 static void amdgpu_ctx_do_release(struct kref *ref)
 {
        struct amdgpu_ctx *ctx;
+        unsigned num_entities;
        u32 i;
        ctx = container_of(ref, struct amdgpu_ctx, refcount);
-        for (i = 0; i < ctx->adev->num_rings; i++) {
+        num_entities = 0;
+        for (i = 0; i < AMDGPU_HW_IP_NUM; i++)
+                num_entities += amdgpu_ctx_num_entities[i];
-                if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
+        for (i = 0; i < num_entities; i++)
-                        continue;
+                drm_sched_entity_destroy(&ctx->entities[0][i].entity);
-                drm_sched_entity_destroy(&ctx->rings[i].entity);
-        }
        amdgpu_ctx_fini(ref);
 }
@@ -422,21 +438,21 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
                         struct drm_sched_entity *entity,
                         struct dma_fence *fence, uint64_t* handle)
 {
-        struct amdgpu_ctx_ring *cring = to_amdgpu_ctx_ring(entity);
+        struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
-        uint64_t seq = cring->sequence;
+        uint64_t seq = centity->sequence;
        struct dma_fence *other = NULL;
        unsigned idx = 0;
        idx = seq & (amdgpu_sched_jobs - 1);
-        other = cring->fences[idx];
+        other = centity->fences[idx];
        if (other)
                BUG_ON(!dma_fence_is_signaled(other));
        dma_fence_get(fence);
        spin_lock(&ctx->ring_lock);
-        cring->fences[idx] = fence;
+        centity->fences[idx] = fence;
-        cring->sequence++;
+        centity->sequence++;
        spin_unlock(&ctx->ring_lock);
        dma_fence_put(other);
@@ -450,26 +466,26 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
                                       struct drm_sched_entity *entity,
                                       uint64_t seq)
 {
-        struct amdgpu_ctx_ring *cring = to_amdgpu_ctx_ring(entity);
+        struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
        struct dma_fence *fence;
        spin_lock(&ctx->ring_lock);
        if (seq == ~0ull)
-                seq = cring->sequence - 1;
+                seq = centity->sequence - 1;
-        if (seq >= cring->sequence) {
+        if (seq >= centity->sequence) {
                spin_unlock(&ctx->ring_lock);
                return ERR_PTR(-EINVAL);
        }
-        if (seq + amdgpu_sched_jobs < cring->sequence) {
+        if (seq + amdgpu_sched_jobs < centity->sequence) {
                spin_unlock(&ctx->ring_lock);
                return NULL;
        }
-        fence = dma_fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]);
+        fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
        spin_unlock(&ctx->ring_lock);
        return fence;
@@ -478,23 +494,17 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
                                  enum drm_sched_priority priority)
 {
-        int i;
+        unsigned num_entities = amdgput_ctx_total_num_entities();
-        struct amdgpu_device *adev = ctx->adev;
-        struct drm_sched_entity *entity;
-        struct amdgpu_ring *ring;
        enum drm_sched_priority ctx_prio;
+        unsigned i;
        ctx->override_priority = priority;
        ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
                        ctx->init_priority : ctx->override_priority;
-        for (i = 0; i < adev->num_rings; i++) {
+        for (i = 0; i < num_entities; i++) {
-                ring = adev->rings[i];
+                struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
-                entity = &ctx->rings[i].entity;
-                if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
-                        continue;
                drm_sched_entity_set_priority(entity, ctx_prio);
        }
@@ -503,9 +513,9 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
                               struct drm_sched_entity *entity)
 {
-        struct amdgpu_ctx_ring *cring = to_amdgpu_ctx_ring(entity);
+        struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
-        unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
+        unsigned idx = centity->sequence & (amdgpu_sched_jobs - 1);
-        struct dma_fence *other = cring->fences[idx];
+        struct dma_fence *other = centity->fences[idx];
        if (other) {
                signed long r;
@@ -529,6 +539,7 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
 void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
 {
+        unsigned num_entities = amdgput_ctx_total_num_entities();
        struct amdgpu_ctx *ctx;
        struct idr *idp;
        uint32_t id, i;
@@ -544,13 +555,11 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
                        return;
                }
-                for (i = 0; i < ctx->adev->num_rings; i++) {
+                for (i = 0; i < num_entities; i++) {
+                        struct drm_sched_entity *entity;
-                        if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
+                        entity = &ctx->entities[0][i].entity;
-                                continue;
+                        max_wait = drm_sched_entity_flush(entity, max_wait);
-                        max_wait = drm_sched_entity_flush(&ctx->rings[i].entity,
-                                                          max_wait);
                }
        }
        mutex_unlock(&mgr->lock);
@@ -558,6 +567,7 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 {
+        unsigned num_entities = amdgput_ctx_total_num_entities();
        struct amdgpu_ctx *ctx;
        struct idr *idp;
        uint32_t id, i;
@@ -569,16 +579,13 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
                if (!ctx->adev)
                        return;
-                for (i = 0; i < ctx->adev->num_rings; i++) {
+                if (kref_read(&ctx->refcount) != 1) {
+                        DRM_ERROR("ctx %p is still alive\n", ctx);
-                        if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
+                        continue;
-                                continue;
-                        if (kref_read(&ctx->refcount) == 1)
-                                drm_sched_entity_fini(&ctx->rings[i].entity);
-                        else
-                                DRM_ERROR("ctx %p is still alive\n", ctx);
                }
+                for (i = 0; i < num_entities; i++)
+                        drm_sched_entity_fini(&ctx->entities[0][i].entity);
        }
 }
author	Christian König <christian.koenig@amd.com>	2018-08-01 10:00:52 -0400
committer	Alex Deucher <alexander.deucher@amd.com>	2018-08-27 12:11:11 -0400
commit	1b1f2fecb699bb4ccc3cb2fafe92950e9bdb39de (patch)
tree	076c814bb4d51ba7bd3283a95cddbe10ac112352 /drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
parent	a245daf3d7a143fb2df16485ad200aa3298eac8c (diff)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 0a6cd1202ee5..987b7f256463 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -27,8 +27,29 @@
27	#include "amdgpu.h"	27	#include "amdgpu.h"
28	#include "amdgpu_sched.h"	28	#include "amdgpu_sched.h"
29		29
30	#define to_amdgpu_ctx_ring(e) \	30	#define to_amdgpu_ctx_entity(e) \
31	container_of((e), struct amdgpu_ctx_ring, entity)	31	container_of((e), struct amdgpu_ctx_entity, entity)
		32
		33	const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
		34	[AMDGPU_HW_IP_GFX] = 1,
		35	[AMDGPU_HW_IP_COMPUTE] = 4,
		36	[AMDGPU_HW_IP_DMA] = 2,
		37	[AMDGPU_HW_IP_UVD] = 1,
		38	[AMDGPU_HW_IP_VCE] = 1,
		39	[AMDGPU_HW_IP_UVD_ENC] = 1,
		40	[AMDGPU_HW_IP_VCN_DEC] = 1,
		41	[AMDGPU_HW_IP_VCN_ENC] = 1,
		42	};
		43
		44	static int amdgput_ctx_total_num_entities(void)
		45	{
		46	unsigned i, num_entities = 0;
		47
		48	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
		49	num_entities += amdgpu_ctx_num_entities[i];
		50
		51	return num_entities;
		52	}
32		53
33	static int amdgpu_ctx_priority_permit(struct drm_file *filp,	54	static int amdgpu_ctx_priority_permit(struct drm_file *filp,
34	enum drm_sched_priority priority)	55	enum drm_sched_priority priority)
@@ -51,9 +72,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
51	struct drm_file *filp,	72	struct drm_file *filp,
52	struct amdgpu_ctx *ctx)	73	struct amdgpu_ctx *ctx)
53	{	74	{
54	struct drm_sched_rq *sdma_rqs[AMDGPU_MAX_RINGS];	75	unsigned num_entities = amdgput_ctx_total_num_entities();
55	struct drm_sched_rq *comp_rqs[AMDGPU_MAX_RINGS];	76	unsigned i, j;
56	unsigned i, j, num_sdma_rqs, num_comp_rqs;
57	int r;	77	int r;
58		78
59	if (priority < 0 \|\| priority >= DRM_SCHED_PRIORITY_MAX)	79	if (priority < 0 \|\| priority >= DRM_SCHED_PRIORITY_MAX)
@@ -65,19 +85,33 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
65		85
66	memset(ctx, 0, sizeof(*ctx));	86	memset(ctx, 0, sizeof(*ctx));
67	ctx->adev = adev;	87	ctx->adev = adev;
68	kref_init(&ctx->refcount);	88
69	spin_lock_init(&ctx->ring_lock);	89	ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities,
70	ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
71	sizeof(struct dma_fence*), GFP_KERNEL);	90	sizeof(struct dma_fence*), GFP_KERNEL);
72	if (!ctx->fences)	91	if (!ctx->fences)
73	return -ENOMEM;	92	return -ENOMEM;
74		93
75	mutex_init(&ctx->lock);	94	ctx->entities[0] = kcalloc(num_entities,
		95	sizeof(struct amdgpu_ctx_entity),
		96	GFP_KERNEL);
		97	if (!ctx->entities[0]) {
		98	r = -ENOMEM;
		99	goto error_free_fences;
		100	}
76		101
77	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {	102	for (i = 0; i < num_entities; ++i) {
78	ctx->rings[i].sequence = 1;	103	struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
79	ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];	104
		105	entity->sequence = 1;
		106	entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
80	}	107	}
		108	for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
		109	ctx->entities[i] = ctx->entities[i - 1] +
		110	amdgpu_ctx_num_entities[i - 1];
		111
		112	kref_init(&ctx->refcount);
		113	spin_lock_init(&ctx->ring_lock);
		114	mutex_init(&ctx->lock);
81		115
82	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);	116	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
83	ctx->reset_counter_query = ctx->reset_counter;	117	ctx->reset_counter_query = ctx->reset_counter;
@@ -85,50 +119,70 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
85	ctx->init_priority = priority;	119	ctx->init_priority = priority;
86	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;	120	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
87		121
88	num_sdma_rqs = 0;	122	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
89	num_comp_rqs = 0;	123	struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
90	for (i = 0; i < adev->num_rings; i++) {	124	struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
91	struct amdgpu_ring *ring = adev->rings[i];	125	unsigned num_rings;
92	struct drm_sched_rq *rq;	126
93		127	switch (i) {
94	rq = &ring->sched.sched_rq[priority];	128	case AMDGPU_HW_IP_GFX:
95	if (ring->funcs->type == AMDGPU_RING_TYPE_SDMA)	129	rings[0] = &adev->gfx.gfx_ring[0];
96	sdma_rqs[num_sdma_rqs++] = rq;	130	num_rings = 1;
97	else if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)	131	break;
98	comp_rqs[num_comp_rqs++] = rq;	132	case AMDGPU_HW_IP_COMPUTE:
99	}	133	for (j = 0; j < adev->gfx.num_compute_rings; ++j)
100		134	rings[j] = &adev->gfx.compute_ring[j];
101	/* create context entity for each ring */	135	num_rings = adev->gfx.num_compute_rings;
102	for (i = 0; i < adev->num_rings; i++) {	136	break;
103	struct amdgpu_ring *ring = adev->rings[i];	137	case AMDGPU_HW_IP_DMA:
		138	for (j = 0; j < adev->sdma.num_instances; ++j)
		139	rings[j] = &adev->sdma.instance[j].ring;
		140	num_rings = adev->sdma.num_instances;
		141	break;
		142	case AMDGPU_HW_IP_UVD:
		143	rings[0] = &adev->uvd.inst[0].ring;
		144	num_rings = 1;
		145	break;
		146	case AMDGPU_HW_IP_VCE:
		147	rings[0] = &adev->vce.ring[0];
		148	num_rings = 1;
		149	break;
		150	case AMDGPU_HW_IP_UVD_ENC:
		151	rings[0] = &adev->uvd.inst[0].ring_enc[0];
		152	num_rings = 1;
		153	break;
		154	case AMDGPU_HW_IP_VCN_DEC:
		155	rings[0] = &adev->vcn.ring_dec;
		156	num_rings = 1;
		157	break;
		158	case AMDGPU_HW_IP_VCN_ENC:
		159	rings[0] = &adev->vcn.ring_enc[0];
		160	num_rings = 1;
		161	break;
		162	case AMDGPU_HW_IP_VCN_JPEG:
		163	rings[0] = &adev->vcn.ring_jpeg;
		164	num_rings = 1;
		165	break;
		166	}
104		167
105	if (ring == &adev->gfx.kiq.ring)	168	for (j = 0; j < num_rings; ++j)
106	continue;	169	rqs[j] = &rings[j]->sched.sched_rq[priority];
107		170
108	if (ring->funcs->type == AMDGPU_RING_TYPE_SDMA) {	171	for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
109	r = drm_sched_entity_init(&ctx->rings[i].entity,	172	r = drm_sched_entity_init(&ctx->entities[i][j].entity,
110	sdma_rqs, num_sdma_rqs,	173	rqs, num_rings, &ctx->guilty);
111	&ctx->guilty);
112	} else if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
113	r = drm_sched_entity_init(&ctx->rings[i].entity,
114	comp_rqs, num_comp_rqs,
115	&ctx->guilty);
116	} else {
117	struct drm_sched_rq *rq;
118
119	rq = &ring->sched.sched_rq[priority];
120	r = drm_sched_entity_init(&ctx->rings[i].entity,
121	&rq, 1, &ctx->guilty);
122	}
123	if (r)	174	if (r)
124	goto failed;	175	goto error_cleanup_entities;
125	}	176	}
126		177
127	return 0;	178	return 0;
128		179
129	failed:	180	error_cleanup_entities:
130	for (j = 0; j < i; j++)	181	for (i = 0; i < num_entities; ++i)
131	drm_sched_entity_destroy(&ctx->rings[j].entity);	182	drm_sched_entity_destroy(&ctx->entities[0][i].entity);
		183	kfree(ctx->entities[0]);
		184
		185	error_free_fences:
132	kfree(ctx->fences);	186	kfree(ctx->fences);
133	ctx->fences = NULL;	187	ctx->fences = NULL;
134	return r;	188	return r;
@@ -137,17 +191,18 @@ failed:
137	static void amdgpu_ctx_fini(struct kref *ref)	191	static void amdgpu_ctx_fini(struct kref *ref)
138	{	192	{
139	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);	193	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
		194	unsigned num_entities = amdgput_ctx_total_num_entities();
140	struct amdgpu_device *adev = ctx->adev;	195	struct amdgpu_device *adev = ctx->adev;
141	unsigned i, j;	196	unsigned i, j;
142		197
143	if (!adev)	198	if (!adev)
144	return;	199	return;
145		200
146	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)	201	for (i = 0; i < num_entities; ++i)
147	for (j = 0; j < amdgpu_sched_jobs; ++j)	202	for (j = 0; j < amdgpu_sched_jobs; ++j)
148	dma_fence_put(ctx->rings[i].fences[j]);	203	dma_fence_put(ctx->entities[0][i].fences[j]);
149	kfree(ctx->fences);	204	kfree(ctx->fences);
150	ctx->fences = NULL;	205	kfree(ctx->entities[0]);
151		206
152	mutex_destroy(&ctx->lock);	207	mutex_destroy(&ctx->lock);
153		208
@@ -157,9 +212,10 @@ static void amdgpu_ctx_fini(struct kref *ref)
157	int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,	212	int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
158	u32 ring, struct drm_sched_entity **entity)	213	u32 ring, struct drm_sched_entity **entity)
159	{	214	{
160	struct amdgpu_device *adev = ctx->adev;	215	if (hw_ip >= AMDGPU_HW_IP_NUM) {
161	unsigned num_rings = 0;	216	DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
162	struct amdgpu_ring *out_ring;	217	return -EINVAL;
		218	}
163		219
164	/* Right now all IPs have only one instance - multiple rings. */	220	/* Right now all IPs have only one instance - multiple rings. */
165	if (instance != 0) {	221	if (instance != 0) {
@@ -167,52 +223,12 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
167	return -EINVAL;	223	return -EINVAL;
168	}	224	}
169		225
170	switch (hw_ip) {	226	if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
171	case AMDGPU_HW_IP_GFX:	227	DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
172	out_ring = &adev->gfx.gfx_ring[ring];
173	num_rings = adev->gfx.num_gfx_rings;
174	break;
175	case AMDGPU_HW_IP_COMPUTE:
176	out_ring = &adev->gfx.compute_ring[ring];
177	num_rings = adev->gfx.num_compute_rings;
178	break;
179	case AMDGPU_HW_IP_DMA:
180	out_ring = &adev->sdma.instance[ring].ring;
181	num_rings = adev->sdma.num_instances;
182	break;
183	case AMDGPU_HW_IP_UVD:
184	out_ring = &adev->uvd.inst[0].ring;
185	num_rings = adev->uvd.num_uvd_inst;
186	break;
187	case AMDGPU_HW_IP_VCE:
188	out_ring = &adev->vce.ring[ring];
189	num_rings = adev->vce.num_rings;
190	break;
191	case AMDGPU_HW_IP_UVD_ENC:
192	out_ring = &adev->uvd.inst[0].ring_enc[ring];
193	num_rings = adev->uvd.num_enc_rings;
194	break;
195	case AMDGPU_HW_IP_VCN_DEC:
196	out_ring = &adev->vcn.ring_dec;
197	num_rings = 1;
198	break;
199	case AMDGPU_HW_IP_VCN_ENC:
200	out_ring = &adev->vcn.ring_enc[ring];
201	num_rings = adev->vcn.num_enc_rings;
202	break;
203	case AMDGPU_HW_IP_VCN_JPEG:
204	out_ring = &adev->vcn.ring_jpeg;
205	num_rings = 1;
206	break;
207	default:
208	DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
209	return -EINVAL;	228	return -EINVAL;
210	}	229	}
211		230
212	if (ring > num_rings)	231	*entity = &ctx->entities[hw_ip][ring].entity;
213	return -EINVAL;
214
215	*entity = &ctx->rings[out_ring->idx].entity;
216	return 0;	232	return 0;
217	}	233	}
218		234
@@ -252,17 +268,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
252	static void amdgpu_ctx_do_release(struct kref *ref)	268	static void amdgpu_ctx_do_release(struct kref *ref)
253	{	269	{
254	struct amdgpu_ctx *ctx;	270	struct amdgpu_ctx *ctx;
		271	unsigned num_entities;
255	u32 i;	272	u32 i;
256		273
257	ctx = container_of(ref, struct amdgpu_ctx, refcount);	274	ctx = container_of(ref, struct amdgpu_ctx, refcount);
258		275
259	for (i = 0; i < ctx->adev->num_rings; i++) {	276	num_entities = 0;
		277	for (i = 0; i < AMDGPU_HW_IP_NUM; i++)
		278	num_entities += amdgpu_ctx_num_entities[i];
260		279
261	if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)	280	for (i = 0; i < num_entities; i++)
262	continue;	281	drm_sched_entity_destroy(&ctx->entities[0][i].entity);
263
264	drm_sched_entity_destroy(&ctx->rings[i].entity);
265	}
266		282
267	amdgpu_ctx_fini(ref);	283	amdgpu_ctx_fini(ref);
268	}	284	}
@@ -422,21 +438,21 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
422	struct drm_sched_entity *entity,	438	struct drm_sched_entity *entity,
423	struct dma_fence fence, uint64_t handle)	439	struct dma_fence fence, uint64_t handle)
424	{	440	{
425	struct amdgpu_ctx_ring *cring = to_amdgpu_ctx_ring(entity);	441	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
426	uint64_t seq = cring->sequence;	442	uint64_t seq = centity->sequence;
427	struct dma_fence *other = NULL;	443	struct dma_fence *other = NULL;
428	unsigned idx = 0;	444	unsigned idx = 0;
429		445
430	idx = seq & (amdgpu_sched_jobs - 1);	446	idx = seq & (amdgpu_sched_jobs - 1);
431	other = cring->fences[idx];	447	other = centity->fences[idx];
432	if (other)	448	if (other)
433	BUG_ON(!dma_fence_is_signaled(other));	449	BUG_ON(!dma_fence_is_signaled(other));
434		450
435	dma_fence_get(fence);	451	dma_fence_get(fence);
436		452
437	spin_lock(&ctx->ring_lock);	453	spin_lock(&ctx->ring_lock);
438	cring->fences[idx] = fence;	454	centity->fences[idx] = fence;
439	cring->sequence++;	455	centity->sequence++;
440	spin_unlock(&ctx->ring_lock);	456	spin_unlock(&ctx->ring_lock);
441		457
442	dma_fence_put(other);	458	dma_fence_put(other);
@@ -450,26 +466,26 @@ struct dma_fence amdgpu_ctx_get_fence(struct amdgpu_ctx ctx,
450	struct drm_sched_entity *entity,	466	struct drm_sched_entity *entity,
451	uint64_t seq)	467	uint64_t seq)
452	{	468	{
453	struct amdgpu_ctx_ring *cring = to_amdgpu_ctx_ring(entity);	469	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
454	struct dma_fence *fence;	470	struct dma_fence *fence;
455		471
456	spin_lock(&ctx->ring_lock);	472	spin_lock(&ctx->ring_lock);
457		473
458	if (seq == ~0ull)	474	if (seq == ~0ull)
459	seq = cring->sequence - 1;	475	seq = centity->sequence - 1;
460		476
461	if (seq >= cring->sequence) {	477	if (seq >= centity->sequence) {
462	spin_unlock(&ctx->ring_lock);	478	spin_unlock(&ctx->ring_lock);
463	return ERR_PTR(-EINVAL);	479	return ERR_PTR(-EINVAL);
464	}	480	}
465		481
466		482
467	if (seq + amdgpu_sched_jobs < cring->sequence) {	483	if (seq + amdgpu_sched_jobs < centity->sequence) {
468	spin_unlock(&ctx->ring_lock);	484	spin_unlock(&ctx->ring_lock);
469	return NULL;	485	return NULL;
470	}	486	}
471		487
472	fence = dma_fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]);	488	fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
473	spin_unlock(&ctx->ring_lock);	489	spin_unlock(&ctx->ring_lock);
474		490
475	return fence;	491	return fence;
@@ -478,23 +494,17 @@ struct dma_fence amdgpu_ctx_get_fence(struct amdgpu_ctx ctx,
478	void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,	494	void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
479	enum drm_sched_priority priority)	495	enum drm_sched_priority priority)
480	{	496	{
481	int i;	497	unsigned num_entities = amdgput_ctx_total_num_entities();
482	struct amdgpu_device *adev = ctx->adev;
483	struct drm_sched_entity *entity;
484	struct amdgpu_ring *ring;
485	enum drm_sched_priority ctx_prio;	498	enum drm_sched_priority ctx_prio;
		499	unsigned i;
486		500
487	ctx->override_priority = priority;	501	ctx->override_priority = priority;
488		502
489	ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?	503	ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
490	ctx->init_priority : ctx->override_priority;	504	ctx->init_priority : ctx->override_priority;
491		505
492	for (i = 0; i < adev->num_rings; i++) {	506	for (i = 0; i < num_entities; i++) {
493	ring = adev->rings[i];	507	struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
494	entity = &ctx->rings[i].entity;
495
496	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
497	continue;
498		508
499	drm_sched_entity_set_priority(entity, ctx_prio);	509	drm_sched_entity_set_priority(entity, ctx_prio);
500	}	510	}
@@ -503,9 +513,9 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
503	int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,	513	int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
504	struct drm_sched_entity *entity)	514	struct drm_sched_entity *entity)
505	{	515	{
506	struct amdgpu_ctx_ring *cring = to_amdgpu_ctx_ring(entity);	516	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
507	unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);	517	unsigned idx = centity->sequence & (amdgpu_sched_jobs - 1);
508	struct dma_fence *other = cring->fences[idx];	518	struct dma_fence *other = centity->fences[idx];
509		519
510	if (other) {	520	if (other) {
511	signed long r;	521	signed long r;
@@ -529,6 +539,7 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
529		539
530	void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)	540	void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
531	{	541	{
		542	unsigned num_entities = amdgput_ctx_total_num_entities();
532	struct amdgpu_ctx *ctx;	543	struct amdgpu_ctx *ctx;
533	struct idr *idp;	544	struct idr *idp;
534	uint32_t id, i;	545	uint32_t id, i;
@@ -544,13 +555,11 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
544	return;	555	return;
545	}	556	}
546		557
547	for (i = 0; i < ctx->adev->num_rings; i++) {	558	for (i = 0; i < num_entities; i++) {
		559	struct drm_sched_entity *entity;
548		560
549	if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)	561	entity = &ctx->entities[0][i].entity;
550	continue;	562	max_wait = drm_sched_entity_flush(entity, max_wait);
551
552	max_wait = drm_sched_entity_flush(&ctx->rings[i].entity,
553	max_wait);
554	}	563	}
555	}	564	}
556	mutex_unlock(&mgr->lock);	565	mutex_unlock(&mgr->lock);
@@ -558,6 +567,7 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
558		567
559	void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)	568	void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
560	{	569	{
		570	unsigned num_entities = amdgput_ctx_total_num_entities();
561	struct amdgpu_ctx *ctx;	571	struct amdgpu_ctx *ctx;
562	struct idr *idp;	572	struct idr *idp;
563	uint32_t id, i;	573	uint32_t id, i;
@@ -569,16 +579,13 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
569	if (!ctx->adev)	579	if (!ctx->adev)
570	return;	580	return;
571		581
572	for (i = 0; i < ctx->adev->num_rings; i++) {	582	if (kref_read(&ctx->refcount) != 1) {
573		583	DRM_ERROR("ctx %p is still alive\n", ctx);
574	if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)	584	continue;
575	continue;
576
577	if (kref_read(&ctx->refcount) == 1)
578	drm_sched_entity_fini(&ctx->rings[i].entity);
579	else
580	DRM_ERROR("ctx %p is still alive\n", ctx);
581	}	585	}
		586
		587	for (i = 0; i < num_entities; i++)
		588	drm_sched_entity_fini(&ctx->entities[0][i].entity);
582	}	589	}
583	}	590	}
584		591