diff options
author | Pixel Ding <Pixel.Ding@amd.com> | 2017-12-11 03:48:33 -0500 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2017-12-12 14:45:47 -0500 |
commit | 2ffe31deb27579e2f2c9444e01f4d8abf385d145 (patch) | |
tree | 30b712dbe3053fd00219ad99a36eceb2d2b5d5da /drivers/gpu/drm/amd/amdgpu | |
parent | b852f3d3fb5d770f54c10614e6320a29943eb52f (diff) |
drm/amdgpu: use polling mem to set SDMA3 wptr for VF
On Tonga VF, there're 2 sources updating wptr registers for
sdma3: 1) polling mem and 2) doorbell. When doorbell and polling
mem are both enabled on sdma3, there will be collision hit in
occasion between those two sources when ucode and h/w are doing
the updating on wptr register in parallel. Issue doesn't happen
on CP GFX/Compute since CP drops all doorbell writes when VF is
inactive. So enable polling mem and don't use doorbell for SDMA3.
Signed-off-by: Pixel Ding <Pixel.Ding@amd.com>
Reviewed-by: Monk Liu <monk.liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 27 |
2 files changed, 20 insertions, 8 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 641e3fd7ba3c..010f69084af5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | |||
@@ -186,6 +186,7 @@ struct amdgpu_ring { | |||
186 | uint64_t eop_gpu_addr; | 186 | uint64_t eop_gpu_addr; |
187 | u32 doorbell_index; | 187 | u32 doorbell_index; |
188 | bool use_doorbell; | 188 | bool use_doorbell; |
189 | bool use_pollmem; | ||
189 | unsigned wptr_offs; | 190 | unsigned wptr_offs; |
190 | unsigned fence_offs; | 191 | unsigned fence_offs; |
191 | uint64_t current_ctx; | 192 | uint64_t current_ctx; |
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index c8c93f9dac21..41c60f5ac96e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | |||
@@ -355,7 +355,7 @@ static uint64_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring) | |||
355 | struct amdgpu_device *adev = ring->adev; | 355 | struct amdgpu_device *adev = ring->adev; |
356 | u32 wptr; | 356 | u32 wptr; |
357 | 357 | ||
358 | if (ring->use_doorbell) { | 358 | if (ring->use_doorbell || ring->use_pollmem) { |
359 | /* XXX check if swapping is necessary on BE */ | 359 | /* XXX check if swapping is necessary on BE */ |
360 | wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2; | 360 | wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2; |
361 | } else { | 361 | } else { |
@@ -380,10 +380,13 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring) | |||
380 | 380 | ||
381 | if (ring->use_doorbell) { | 381 | if (ring->use_doorbell) { |
382 | u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs]; | 382 | u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs]; |
383 | |||
384 | /* XXX check if swapping is necessary on BE */ | 383 | /* XXX check if swapping is necessary on BE */ |
385 | WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2)); | 384 | WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2)); |
386 | WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2); | 385 | WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2); |
386 | } else if (ring->use_pollmem) { | ||
387 | u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs]; | ||
388 | |||
389 | WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2)); | ||
387 | } else { | 390 | } else { |
388 | int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; | 391 | int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; |
389 | 392 | ||
@@ -718,10 +721,14 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) | |||
718 | WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI + sdma_offsets[i], | 721 | WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI + sdma_offsets[i], |
719 | upper_32_bits(wptr_gpu_addr)); | 722 | upper_32_bits(wptr_gpu_addr)); |
720 | wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]); | 723 | wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]); |
721 | if (amdgpu_sriov_vf(adev)) | 724 | if (ring->use_pollmem) |
722 | wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); | 725 | wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, |
726 | SDMA0_GFX_RB_WPTR_POLL_CNTL, | ||
727 | ENABLE, 1); | ||
723 | else | 728 | else |
724 | wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0); | 729 | wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, |
730 | SDMA0_GFX_RB_WPTR_POLL_CNTL, | ||
731 | ENABLE, 0); | ||
725 | WREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i], wptr_poll_cntl); | 732 | WREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i], wptr_poll_cntl); |
726 | 733 | ||
727 | /* enable DMA RB */ | 734 | /* enable DMA RB */ |
@@ -1203,9 +1210,13 @@ static int sdma_v3_0_sw_init(void *handle) | |||
1203 | for (i = 0; i < adev->sdma.num_instances; i++) { | 1210 | for (i = 0; i < adev->sdma.num_instances; i++) { |
1204 | ring = &adev->sdma.instance[i].ring; | 1211 | ring = &adev->sdma.instance[i].ring; |
1205 | ring->ring_obj = NULL; | 1212 | ring->ring_obj = NULL; |
1206 | ring->use_doorbell = true; | 1213 | if (!amdgpu_sriov_vf(adev)) { |
1207 | ring->doorbell_index = (i == 0) ? | 1214 | ring->use_doorbell = true; |
1208 | AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1; | 1215 | ring->doorbell_index = (i == 0) ? |
1216 | AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1; | ||
1217 | } else { | ||
1218 | ring->use_pollmem = true; | ||
1219 | } | ||
1209 | 1220 | ||
1210 | sprintf(ring->name, "sdma%d", i); | 1221 | sprintf(ring->name, "sdma%d", i); |
1211 | r = amdgpu_ring_init(adev, ring, 1024, | 1222 | r = amdgpu_ring_init(adev, ring, 1024, |