aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndres Rodriguez <andresx7@gmail.com>2018-01-02 15:49:40 -0500
committerAlex Deucher <alexander.deucher@amd.com>2018-02-19 14:17:10 -0500
commit761c77c195bf27d5127bbf4bcee83a112ebe6501 (patch)
treea03b0e2e696f33d6f84bbd1826e4936990e8eaba
parent10cd19c87736c1354ef7c175729433b73a988fb1 (diff)
drm/amdgpu: add high priority compute support for gfx9
We follow the same approach as gfx8. The only changes are register access macros. Tested on vega10. The execution latency results fall within the expected ranges from the polaris10 data. Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Andres Rodriguez <andresx7@gmail.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c100
1 files changed, 100 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index c06479615e8a..ee5464b43e9e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3735,6 +3735,105 @@ static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
3735 return wptr; 3735 return wptr;
3736} 3736}
3737 3737
3738static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
3739 bool acquire)
3740{
3741 struct amdgpu_device *adev = ring->adev;
3742 int pipe_num, tmp, reg;
3743 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
3744
3745 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
3746
3747 /* first me only has 2 entries, GFX and HP3D */
3748 if (ring->me > 0)
3749 pipe_num -= 2;
3750
3751 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
3752 tmp = RREG32(reg);
3753 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
3754 WREG32(reg, tmp);
3755}
3756
3757static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
3758 struct amdgpu_ring *ring,
3759 bool acquire)
3760{
3761 int i, pipe;
3762 bool reserve;
3763 struct amdgpu_ring *iring;
3764
3765 mutex_lock(&adev->gfx.pipe_reserve_mutex);
3766 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
3767 if (acquire)
3768 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
3769 else
3770 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
3771
3772 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
3773 /* Clear all reservations - everyone reacquires all resources */
3774 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
3775 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
3776 true);
3777
3778 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
3779 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
3780 true);
3781 } else {
3782 /* Lower all pipes without a current reservation */
3783 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
3784 iring = &adev->gfx.gfx_ring[i];
3785 pipe = amdgpu_gfx_queue_to_bit(adev,
3786 iring->me,
3787 iring->pipe,
3788 0);
3789 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
3790 gfx_v9_0_ring_set_pipe_percent(iring, reserve);
3791 }
3792
3793 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
3794 iring = &adev->gfx.compute_ring[i];
3795 pipe = amdgpu_gfx_queue_to_bit(adev,
3796 iring->me,
3797 iring->pipe,
3798 0);
3799 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
3800 gfx_v9_0_ring_set_pipe_percent(iring, reserve);
3801 }
3802 }
3803
3804 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
3805}
3806
3807static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
3808 struct amdgpu_ring *ring,
3809 bool acquire)
3810{
3811 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
3812 uint32_t queue_priority = acquire ? 0xf : 0x0;
3813
3814 mutex_lock(&adev->srbm_mutex);
3815 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3816
3817 WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
3818 WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
3819
3820 soc15_grbm_select(adev, 0, 0, 0, 0);
3821 mutex_unlock(&adev->srbm_mutex);
3822}
3823
3824static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
3825 enum drm_sched_priority priority)
3826{
3827 struct amdgpu_device *adev = ring->adev;
3828 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
3829
3830 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
3831 return;
3832
3833 gfx_v9_0_hqd_set_priority(adev, ring, acquire);
3834 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
3835}
3836
3738static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 3837static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
3739{ 3838{
3740 struct amdgpu_device *adev = ring->adev; 3839 struct amdgpu_device *adev = ring->adev;
@@ -4261,6 +4360,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
4261 .test_ib = gfx_v9_0_ring_test_ib, 4360 .test_ib = gfx_v9_0_ring_test_ib,
4262 .insert_nop = amdgpu_ring_insert_nop, 4361 .insert_nop = amdgpu_ring_insert_nop,
4263 .pad_ib = amdgpu_ring_generic_pad_ib, 4362 .pad_ib = amdgpu_ring_generic_pad_ib,
4363 .set_priority = gfx_v9_0_ring_set_priority_compute,
4264}; 4364};
4265 4365
4266static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 4366static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {