drm/amdgpu: allocate queues horizontally across pipes

Pipes provide better concurrency than queues, therefore we want to make sure that apps use queues from different pipes whenever possible. Optimize for the trivial case where an app will consume rings in order, therefore we don't want adjacent rings to belong to the same pipe. Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net> Acked-by: Felix Kuehling <Felix.Kuehling@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Andres Rodriguez <andresx7@gmail.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
author: Andres Rodriguez <andresx7@gmail.com> 2017-02-03 17:31:38 -0500
committer: Alex Deucher <alexander.deucher@amd.com> 2017-05-31 16:48:56 -0400
commit: e33fec4835b9d2b1f8f8b1eb7c3415ab75a0faf5 (patch)
tree: d143f7d1d1744394ebda980e045f36bdc566ad64 /drivers/gpu/drm/amd/amdgpu
parent: 5e7095625d3ffd74165e5a6f46e894395caeabe8 (diff)
3 files changed, 113 insertions, 69 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4b8710b2f459..30ba29adddd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1803,6 +1803,19 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
                return NULL;
 }
+static inline bool amdgpu_is_mec_queue_enabled(struct amdgpu_device *adev,
+                                                int mec, int pipe, int queue)
+{
+        int bit = 0;
+        bit += mec * adev->gfx.mec.num_pipe_per_mec
+                * adev->gfx.mec.num_queue_per_pipe;
+        bit += pipe * adev->gfx.mec.num_queue_per_pipe;
+        bit += queue;
+        return test_bit(bit, adev->gfx.mec.queue_bitmap);
+}
 /*
 * ASICs macro.
 */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 4e1af6f48737..7e1db63b5388 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4752,11 +4752,42 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
        adev->gfx.config.gb_addr_config = gb_addr_config;
 }
+static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+                                        int mec, int pipe, int queue)
+{
+        int r;
+        unsigned irq_type;
+        struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+        /* mec0 is me1 */
+        ring->me = mec + 1;
+        ring->pipe = pipe;
+        ring->queue = queue;
+        ring->ring_obj = NULL;
+        ring->use_doorbell = true;
+        ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
+        sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+        irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+                + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+                + ring->pipe;
+        /* type-2 packets are deprecated on MEC, use type-3 instead */
+        r = amdgpu_ring_init(adev, ring, 1024,
+                        &adev->gfx.eop_irq, irq_type);
+        if (r)
+                return r;
+        return 0;
+}
 static int gfx_v7_0_sw_init(void *handle)
 {
        struct amdgpu_ring *ring;
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-        int i, r, ring_id;
+        int i, j, k, r, ring_id;
        /* EOP Event */
        r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
@@ -4806,39 +4837,23 @@ static int gfx_v7_0_sw_init(void *handle)
                        return r;
        }
-        /* set up the compute queues */
+        /* set up the compute queues - allocate horizontally across pipes */
-        for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
+        ring_id = 0;
-                unsigned irq_type;
+        for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+                for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
-                if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+                        for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
-                        continue;
+                                if (!amdgpu_is_mec_queue_enabled(adev, i, k, j))
+                                        continue;
-                ring = &adev->gfx.compute_ring[ring_id];
+                                r = gfx_v7_0_compute_ring_init(adev,
-                /* mec0 is me1 */
+                                                                ring_id,
-                ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
+                                                                i, k, j);
-                                / adev->gfx.mec.num_pipe_per_mec)
+                                if (r)
-                                + 1;
+                                        return r;
-                ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
-                                % adev->gfx.mec.num_pipe_per_mec;
+                                ring_id++;
-                ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
+                        }
+                }
-                ring->ring_obj = NULL;
-                ring->use_doorbell = true;
-                ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
-                sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
-                irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
-                        + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
-                        + ring->pipe;
-                /* type-2 packets are deprecated on MEC, use type-3 instead */
-                r = amdgpu_ring_init(adev, ring, 1024,
-                                     &adev->gfx.eop_irq, irq_type);
-                if (r)
-                        return r;
-                ring_id++;
        }
        /* reserve GDS, GWS and OA resource for gfx */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 63961a0f1da6..65c45ae67157 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -2139,9 +2139,44 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
        return 0;
 }
+static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+                                        int mec, int pipe, int queue)
+{
+        int r;
+        unsigned irq_type;
+        struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+        ring = &adev->gfx.compute_ring[ring_id];
+        /* mec0 is me1 */
+        ring->me = mec + 1;
+        ring->pipe = pipe;
+        ring->queue = queue;
+        ring->ring_obj = NULL;
+        ring->use_doorbell = true;
+        ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
+        ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+                                + (ring_id * GFX8_MEC_HPD_SIZE);
+        sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+        irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+                + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+                + ring->pipe;
+        /* type-2 packets are deprecated on MEC, use type-3 instead */
+        r = amdgpu_ring_init(adev, ring, 1024,
+                        &adev->gfx.eop_irq, irq_type);
+        if (r)
+                return r;
+        return 0;
+}
 static int gfx_v8_0_sw_init(void *handle)
 {
-        int i, r, ring_id;
+        int i, j, k, r, ring_id;
        struct amdgpu_ring *ring;
        struct amdgpu_kiq *kiq;
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -2207,43 +2242,24 @@ static int gfx_v8_0_sw_init(void *handle)
                        return r;
        }
-        /* set up the compute queues */
-        for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
-                unsigned irq_type;
-                if (!test_bit(i, adev->gfx.mec.queue_bitmap))
-                        continue;
-                if (WARN_ON(ring_id >= AMDGPU_MAX_COMPUTE_RINGS))
-                        break;
-                ring = &adev->gfx.compute_ring[ring_id];
-                /* mec0 is me1 */
-                ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
-                                / adev->gfx.mec.num_pipe_per_mec)
-                                + 1;
-                ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
-                                % adev->gfx.mec.num_pipe_per_mec;
-                ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
-                ring->ring_obj = NULL;
-                ring->use_doorbell = true;
-                ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX8_MEC_HPD_SIZE);
-                ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
-                sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
-                irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+        /* set up the compute queues - allocate horizontally across pipes */
-                        + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+        ring_id = 0;
-                        + ring->pipe;
+        for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+                for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+                        for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+                                if (!amdgpu_is_mec_queue_enabled(adev, i, k, j))
+                                        continue;
-                /* type-2 packets are deprecated on MEC, use type-3 instead */
+                                r = gfx_v8_0_compute_ring_init(adev,
-                r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
+                                                                ring_id,
-                                     irq_type);
+                                                                i, k, j);
-                if (r)
+                                if (r)
-                        return r;
+                                        return r;
-                ring_id++;
+                                ring_id++;
+                        }
+                }
        }
        r = gfx_v8_0_kiq_init(adev);
author	Andres Rodriguez <andresx7@gmail.com>	2017-02-03 17:31:38 -0500
committer	Alex Deucher <alexander.deucher@amd.com>	2017-05-31 16:48:56 -0400
commit	e33fec4835b9d2b1f8f8b1eb7c3415ab75a0faf5 (patch)
tree	d143f7d1d1744394ebda980e045f36bdc566ad64 /drivers/gpu/drm/amd/amdgpu
parent	5e7095625d3ffd74165e5a6f46e894395caeabe8 (diff)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4b8710b2f459..30ba29adddd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1803,6 +1803,19 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
1803	return NULL;	1803	return NULL;
1804	}	1804	}
1805		1805
		1806	static inline bool amdgpu_is_mec_queue_enabled(struct amdgpu_device *adev,
		1807	int mec, int pipe, int queue)
		1808	{
		1809	int bit = 0;
		1810
		1811	bit += mec * adev->gfx.mec.num_pipe_per_mec
		1812	* adev->gfx.mec.num_queue_per_pipe;
		1813	bit += pipe * adev->gfx.mec.num_queue_per_pipe;
		1814	bit += queue;
		1815
		1816	return test_bit(bit, adev->gfx.mec.queue_bitmap);
		1817	}
		1818
1806	/*	1819	/*
1807	* ASICs macro.	1820	* ASICs macro.
1808	*/	1821	*/


diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 4e1af6f48737..7e1db63b5388 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4752,11 +4752,42 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
4752	adev->gfx.config.gb_addr_config = gb_addr_config;	4752	adev->gfx.config.gb_addr_config = gb_addr_config;
4753	}	4753	}
4754		4754
		4755	static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
		4756	int mec, int pipe, int queue)
		4757	{
		4758	int r;
		4759	unsigned irq_type;
		4760	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
		4761
		4762	/* mec0 is me1 */
		4763	ring->me = mec + 1;
		4764	ring->pipe = pipe;
		4765	ring->queue = queue;
		4766
		4767	ring->ring_obj = NULL;
		4768	ring->use_doorbell = true;
		4769	ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
		4770	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
		4771
		4772	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
		4773	+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
		4774	+ ring->pipe;
		4775
		4776	/* type-2 packets are deprecated on MEC, use type-3 instead */
		4777	r = amdgpu_ring_init(adev, ring, 1024,
		4778	&adev->gfx.eop_irq, irq_type);
		4779	if (r)
		4780	return r;
		4781
		4782
		4783	return 0;
		4784	}
		4785
4755	static int gfx_v7_0_sw_init(void *handle)	4786	static int gfx_v7_0_sw_init(void *handle)
4756	{	4787	{
4757	struct amdgpu_ring *ring;	4788	struct amdgpu_ring *ring;
4758	struct amdgpu_device adev = (struct amdgpu_device )handle;	4789	struct amdgpu_device adev = (struct amdgpu_device )handle;
4759	int i, r, ring_id;	4790	int i, j, k, r, ring_id;
4760		4791
4761	/* EOP Event */	4792	/* EOP Event */
4762	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);	4793	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
@@ -4806,39 +4837,23 @@ static int gfx_v7_0_sw_init(void *handle)
4806	return r;	4837	return r;
4807	}	4838	}
4808		4839
4809	/* set up the compute queues */	4840	/* set up the compute queues - allocate horizontally across pipes */
4810	for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {	4841	ring_id = 0;
4811	unsigned irq_type;	4842	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
4812		4843	for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
4813	if (!test_bit(i, adev->gfx.mec.queue_bitmap))	4844	for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
4814	continue;	4845	if (!amdgpu_is_mec_queue_enabled(adev, i, k, j))
4815		4846	continue;
4816	ring = &adev->gfx.compute_ring[ring_id];	4847
4817		4848	r = gfx_v7_0_compute_ring_init(adev,
4818	/* mec0 is me1 */	4849	ring_id,
4819	ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)	4850	i, k, j);
4820	/ adev->gfx.mec.num_pipe_per_mec)	4851	if (r)
4821	+ 1;	4852	return r;
4822	ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)	4853
4823	% adev->gfx.mec.num_pipe_per_mec;	4854	ring_id++;
4824	ring->queue = i % adev->gfx.mec.num_queue_per_pipe;	4855	}
4825		4856	}
4826	ring->ring_obj = NULL;
4827	ring->use_doorbell = true;
4828	ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
4829	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
4830
4831	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
4832	+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
4833	+ ring->pipe;
4834
4835	/* type-2 packets are deprecated on MEC, use type-3 instead */
4836	r = amdgpu_ring_init(adev, ring, 1024,
4837	&adev->gfx.eop_irq, irq_type);
4838	if (r)
4839	return r;
4840
4841	ring_id++;
4842	}	4857	}
4843		4858
4844	/* reserve GDS, GWS and OA resource for gfx */	4859	/* reserve GDS, GWS and OA resource for gfx */


diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 63961a0f1da6..65c45ae67157 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -2139,9 +2139,44 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
2139	return 0;	2139	return 0;
2140	}	2140	}
2141		2141
		2142	static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
		2143	int mec, int pipe, int queue)
		2144	{
		2145	int r;
		2146	unsigned irq_type;
		2147	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
		2148
		2149	ring = &adev->gfx.compute_ring[ring_id];
		2150
		2151	/* mec0 is me1 */
		2152	ring->me = mec + 1;
		2153	ring->pipe = pipe;
		2154	ring->queue = queue;
		2155
		2156	ring->ring_obj = NULL;
		2157	ring->use_doorbell = true;
		2158	ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
		2159	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
		2160	+ (ring_id * GFX8_MEC_HPD_SIZE);
		2161	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
		2162
		2163	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
		2164	+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
		2165	+ ring->pipe;
		2166
		2167	/* type-2 packets are deprecated on MEC, use type-3 instead */
		2168	r = amdgpu_ring_init(adev, ring, 1024,
		2169	&adev->gfx.eop_irq, irq_type);
		2170	if (r)
		2171	return r;
		2172
		2173
		2174	return 0;
		2175	}
		2176
2142	static int gfx_v8_0_sw_init(void *handle)	2177	static int gfx_v8_0_sw_init(void *handle)
2143	{	2178	{
2144	int i, r, ring_id;	2179	int i, j, k, r, ring_id;
2145	struct amdgpu_ring *ring;	2180	struct amdgpu_ring *ring;
2146	struct amdgpu_kiq *kiq;	2181	struct amdgpu_kiq *kiq;
2147	struct amdgpu_device adev = (struct amdgpu_device )handle;	2182	struct amdgpu_device adev = (struct amdgpu_device )handle;
@@ -2207,43 +2242,24 @@ static int gfx_v8_0_sw_init(void *handle)
2207	return r;	2242	return r;
2208	}	2243	}
2209		2244
2210	/* set up the compute queues */
2211	for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
2212	unsigned irq_type;
2213
2214	if (!test_bit(i, adev->gfx.mec.queue_bitmap))
2215	continue;
2216
2217	if (WARN_ON(ring_id >= AMDGPU_MAX_COMPUTE_RINGS))
2218	break;
2219
2220	ring = &adev->gfx.compute_ring[ring_id];
2221
2222	/* mec0 is me1 */
2223	ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
2224	/ adev->gfx.mec.num_pipe_per_mec)
2225	+ 1;
2226	ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
2227	% adev->gfx.mec.num_pipe_per_mec;
2228	ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
2229
2230	ring->ring_obj = NULL;
2231	ring->use_doorbell = true;
2232	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX8_MEC_HPD_SIZE);
2233	ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
2234	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2235		2245
2236	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP	2246	/* set up the compute queues - allocate horizontally across pipes */
2237	+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)	2247	ring_id = 0;
2238	+ ring->pipe;	2248	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
		2249	for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
		2250	for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
		2251	if (!amdgpu_is_mec_queue_enabled(adev, i, k, j))
		2252	continue;
2239		2253
2240	/* type-2 packets are deprecated on MEC, use type-3 instead */	2254	r = gfx_v8_0_compute_ring_init(adev,
2241	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,	2255	ring_id,
2242	irq_type);	2256	i, k, j);
2243	if (r)	2257	if (r)
2244	return r;	2258	return r;
2245		2259
2246	ring_id++;	2260	ring_id++;
		2261	}
		2262	}
2247	}	2263	}
2248		2264
2249	r = gfx_v8_0_kiq_init(adev);	2265	r = gfx_v8_0_kiq_init(adev);