drm/amdgpu: add high priority compute support for gfx9

We follow the same approach as gfx8. The only changes are register access macros. Tested on vega10. The execution latency results fall within the expected ranges from the polaris10 data. Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Andres Rodriguez <andresx7@gmail.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
author: Andres Rodriguez <andresx7@gmail.com> 2018-01-02 15:49:40 -0500
committer: Alex Deucher <alexander.deucher@amd.com> 2018-02-19 14:17:10 -0500
commit: 761c77c195bf27d5127bbf4bcee83a112ebe6501 (patch)
tree: a03b0e2e696f33d6f84bbd1826e4936990e8eaba
parent: 10cd19c87736c1354ef7c175729433b73a988fb1 (diff)
1 files changed, 100 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index c06479615e8a..ee5464b43e9e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3735,6 +3735,105 @@ static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
        return wptr;
 }
+static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
+                                           bool acquire)
+{
+        struct amdgpu_device *adev = ring->adev;
+        int pipe_num, tmp, reg;
+        int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
+        pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
+        /* first me only has 2 entries, GFX and HP3D */
+        if (ring->me > 0)
+                pipe_num -= 2;
+        reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
+        tmp = RREG32(reg);
+        tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
+        WREG32(reg, tmp);
+}
+static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
+                                            struct amdgpu_ring *ring,
+                                            bool acquire)
+{
+        int i, pipe;
+        bool reserve;
+        struct amdgpu_ring *iring;
+        mutex_lock(&adev->gfx.pipe_reserve_mutex);
+        pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
+        if (acquire)
+                set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
+        else
+                clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
+        if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
+                /* Clear all reservations - everyone reacquires all resources */
+                for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
+                        gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
+                                                       true);
+                for (i = 0; i < adev->gfx.num_compute_rings; ++i)
+                        gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
+                                                       true);
+        } else {
+                /* Lower all pipes without a current reservation */
+                for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
+                        iring = &adev->gfx.gfx_ring[i];
+                        pipe = amdgpu_gfx_queue_to_bit(adev,
+                                                       iring->me,
+                                                       iring->pipe,
+                                                       0);
+                        reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
+                        gfx_v9_0_ring_set_pipe_percent(iring, reserve);
+                }
+                for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
+                        iring = &adev->gfx.compute_ring[i];
+                        pipe = amdgpu_gfx_queue_to_bit(adev,
+                                                       iring->me,
+                                                       iring->pipe,
+                                                       0);
+                        reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
+                        gfx_v9_0_ring_set_pipe_percent(iring, reserve);
+                }
+        }
+        mutex_unlock(&adev->gfx.pipe_reserve_mutex);
+}
+static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
+                                      struct amdgpu_ring *ring,
+                                      bool acquire)
+{
+        uint32_t pipe_priority = acquire ? 0x2 : 0x0;
+        uint32_t queue_priority = acquire ? 0xf : 0x0;
+        mutex_lock(&adev->srbm_mutex);
+        soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+        WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
+        WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
+        soc15_grbm_select(adev, 0, 0, 0, 0);
+        mutex_unlock(&adev->srbm_mutex);
+}
+static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
+                                               enum drm_sched_priority priority)
+{
+        struct amdgpu_device *adev = ring->adev;
+        bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
+        if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
+                return;
+        gfx_v9_0_hqd_set_priority(adev, ring, acquire);
+        gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
+}
 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
 {
        struct amdgpu_device *adev = ring->adev;
@@ -4261,6 +4360,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
        .test_ib = gfx_v9_0_ring_test_ib,
        .insert_nop = amdgpu_ring_insert_nop,
        .pad_ib = amdgpu_ring_generic_pad_ib,
+        .set_priority = gfx_v9_0_ring_set_priority_compute,
 };
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
author	Andres Rodriguez <andresx7@gmail.com>	2018-01-02 15:49:40 -0500
committer	Alex Deucher <alexander.deucher@amd.com>	2018-02-19 14:17:10 -0500
commit	761c77c195bf27d5127bbf4bcee83a112ebe6501 (patch)
tree	a03b0e2e696f33d6f84bbd1826e4936990e8eaba
parent	10cd19c87736c1354ef7c175729433b73a988fb1 (diff)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index c06479615e8a..ee5464b43e9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3735,6 +3735,105 @@ static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
3735	return wptr;	3735	return wptr;
3736	}	3736	}
3737		3737
		3738	static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
		3739	bool acquire)
		3740	{
		3741	struct amdgpu_device *adev = ring->adev;
		3742	int pipe_num, tmp, reg;
		3743	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
		3744
		3745	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
		3746
		3747	/* first me only has 2 entries, GFX and HP3D */
		3748	if (ring->me > 0)
		3749	pipe_num -= 2;
		3750
		3751	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
		3752	tmp = RREG32(reg);
		3753	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
		3754	WREG32(reg, tmp);
		3755	}
		3756
		3757	static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
		3758	struct amdgpu_ring *ring,
		3759	bool acquire)
		3760	{
		3761	int i, pipe;
		3762	bool reserve;
		3763	struct amdgpu_ring *iring;
		3764
		3765	mutex_lock(&adev->gfx.pipe_reserve_mutex);
		3766	pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
		3767	if (acquire)
		3768	set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
		3769	else
		3770	clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
		3771
		3772	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
		3773	/* Clear all reservations - everyone reacquires all resources */
		3774	for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
		3775	gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
		3776	true);
		3777
		3778	for (i = 0; i < adev->gfx.num_compute_rings; ++i)
		3779	gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
		3780	true);
		3781	} else {
		3782	/* Lower all pipes without a current reservation */
		3783	for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
		3784	iring = &adev->gfx.gfx_ring[i];
		3785	pipe = amdgpu_gfx_queue_to_bit(adev,
		3786	iring->me,
		3787	iring->pipe,
		3788	0);
		3789	reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
		3790	gfx_v9_0_ring_set_pipe_percent(iring, reserve);
		3791	}
		3792
		3793	for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
		3794	iring = &adev->gfx.compute_ring[i];
		3795	pipe = amdgpu_gfx_queue_to_bit(adev,
		3796	iring->me,
		3797	iring->pipe,
		3798	0);
		3799	reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
		3800	gfx_v9_0_ring_set_pipe_percent(iring, reserve);
		3801	}
		3802	}
		3803
		3804	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
		3805	}
		3806
		3807	static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
		3808	struct amdgpu_ring *ring,
		3809	bool acquire)
		3810	{
		3811	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
		3812	uint32_t queue_priority = acquire ? 0xf : 0x0;
		3813
		3814	mutex_lock(&adev->srbm_mutex);
		3815	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
		3816
		3817	WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
		3818	WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
		3819
		3820	soc15_grbm_select(adev, 0, 0, 0, 0);
		3821	mutex_unlock(&adev->srbm_mutex);
		3822	}
		3823
		3824	static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
		3825	enum drm_sched_priority priority)
		3826	{
		3827	struct amdgpu_device *adev = ring->adev;
		3828	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
		3829
		3830	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
		3831	return;
		3832
		3833	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
		3834	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
		3835	}
		3836
3738	static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)	3837	static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
3739	{	3838	{
3740	struct amdgpu_device *adev = ring->adev;	3839	struct amdgpu_device *adev = ring->adev;
@@ -4261,6 +4360,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
4261	.test_ib = gfx_v9_0_ring_test_ib,	4360	.test_ib = gfx_v9_0_ring_test_ib,
4262	.insert_nop = amdgpu_ring_insert_nop,	4361	.insert_nop = amdgpu_ring_insert_nop,
4263	.pad_ib = amdgpu_ring_generic_pad_ib,	4362	.pad_ib = amdgpu_ring_generic_pad_ib,
		4363	.set_priority = gfx_v9_0_ring_set_priority_compute,
4264	};	4364	};
4265		4365
4266	static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {	4366	static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {