aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorchangzhu <Changfeng.Zhu@amd.com>2019-10-09 23:02:33 -0400
committerAlex Deucher <alexander.deucher@amd.com>2019-11-06 22:06:23 -0500
commit589b64a7e39720b1784a1a26569c57cb977936ef (patch)
tree24e6d4ac50cca6d0f808f507903449c924284fb2
parent6a299d7aaa97dfde5988d8f9e2fa2c046b5793ff (diff)
drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10
The GRBM register interface is now capable of bursting 1 cycle per register wr->wr, wr->rd much faster than previous muticycle per transaction done interface. This has caused a problem where status registers requiring HW to update have a 1 cycle delay, due to the register update having to go through GRBM. For cp ucode, it has realized dummy read in cp firmware.It covers the use of WAIT_REG_MEM operation 1 case only.So it needs to call gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning to update firmware in case firmware is too old to have function to realize dummy read in cp firmware. For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma is moved to gfxhub in gfx10. So it needs to add dummy read in driver between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0. Signed-off-by: changzhu <Changfeng.Zhu@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c48
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c13
4 files changed, 64 insertions, 6 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 6ee4021910e2..6d19183b478b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -289,6 +289,7 @@ struct amdgpu_gfx {
289 uint32_t mec2_feature_version; 289 uint32_t mec2_feature_version;
290 bool mec_fw_write_wait; 290 bool mec_fw_write_wait;
291 bool me_fw_write_wait; 291 bool me_fw_write_wait;
292 bool cp_fw_write_wait;
292 struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; 293 struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
293 unsigned num_gfx_rings; 294 unsigned num_gfx_rings;
294 struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS]; 295 struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 8dfc775626a7..53090eae0082 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -564,6 +564,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
564 kfree(adev->gfx.rlc.register_list_format); 564 kfree(adev->gfx.rlc.register_list_format);
565} 565}
566 566
567static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
568{
569 adev->gfx.cp_fw_write_wait = false;
570
571 switch (adev->asic_type) {
572 case CHIP_NAVI10:
573 case CHIP_NAVI12:
574 case CHIP_NAVI14:
575 if ((adev->gfx.me_fw_version >= 0x00000046) &&
576 (adev->gfx.me_feature_version >= 27) &&
577 (adev->gfx.pfp_fw_version >= 0x00000068) &&
578 (adev->gfx.pfp_feature_version >= 27) &&
579 (adev->gfx.mec_fw_version >= 0x0000005b) &&
580 (adev->gfx.mec_feature_version >= 27))
581 adev->gfx.cp_fw_write_wait = true;
582 break;
583 default:
584 break;
585 }
586
587 if (adev->gfx.cp_fw_write_wait == false)
588 DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
589 GRBM requires 1-cycle delay in cp firmware\n");
590}
591
592
567static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev) 593static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
568{ 594{
569 const struct rlc_firmware_header_v2_1 *rlc_hdr; 595 const struct rlc_firmware_header_v2_1 *rlc_hdr;
@@ -832,6 +858,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
832 } 858 }
833 } 859 }
834 860
861 gfx_v10_0_check_fw_write_wait(adev);
835out: 862out:
836 if (err) { 863 if (err) {
837 dev_err(adev->dev, 864 dev_err(adev->dev,
@@ -4765,6 +4792,24 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4765 gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 4792 gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4766} 4793}
4767 4794
4795static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
4796 uint32_t reg0, uint32_t reg1,
4797 uint32_t ref, uint32_t mask)
4798{
4799 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4800 struct amdgpu_device *adev = ring->adev;
4801 bool fw_version_ok = false;
4802
4803 fw_version_ok = adev->gfx.cp_fw_write_wait;
4804
4805 if (fw_version_ok)
4806 gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
4807 ref, mask, 0x20);
4808 else
4809 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
4810 ref, mask);
4811}
4812
4768static void 4813static void
4769gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 4814gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4770 uint32_t me, uint32_t pipe, 4815 uint32_t me, uint32_t pipe,
@@ -5155,6 +5200,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
5155 .emit_tmz = gfx_v10_0_ring_emit_tmz, 5200 .emit_tmz = gfx_v10_0_ring_emit_tmz,
5156 .emit_wreg = gfx_v10_0_ring_emit_wreg, 5201 .emit_wreg = gfx_v10_0_ring_emit_wreg,
5157 .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, 5202 .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
5203 .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
5158}; 5204};
5159 5205
5160static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { 5206static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
@@ -5188,6 +5234,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
5188 .pad_ib = amdgpu_ring_generic_pad_ib, 5234 .pad_ib = amdgpu_ring_generic_pad_ib,
5189 .emit_wreg = gfx_v10_0_ring_emit_wreg, 5235 .emit_wreg = gfx_v10_0_ring_emit_wreg,
5190 .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, 5236 .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
5237 .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
5191}; 5238};
5192 5239
5193static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { 5240static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
@@ -5218,6 +5265,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
5218 .emit_rreg = gfx_v10_0_ring_emit_rreg, 5265 .emit_rreg = gfx_v10_0_ring_emit_rreg,
5219 .emit_wreg = gfx_v10_0_ring_emit_wreg, 5266 .emit_wreg = gfx_v10_0_ring_emit_wreg,
5220 .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, 5267 .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
5268 .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
5221}; 5269};
5222 5270
5223static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev) 5271static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 354e6200ca9a..5c7d5f73f54f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
344 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), 344 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
345 upper_32_bits(pd_addr)); 345 upper_32_bits(pd_addr));
346 346
347 amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); 347 amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
348 348 hub->vm_inv_eng0_ack + eng,
349 /* wait for the invalidate to complete */ 349 req, 1 << vmid);
350 amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
351 1 << vmid, 1 << vmid);
352 350
353 return pd_addr; 351 return pd_addr;
354} 352}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index f6e81680dd7e..8493bfbbc148 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -1173,6 +1173,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1173 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); 1173 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
1174} 1174}
1175 1175
1176static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
1177 uint32_t reg0, uint32_t reg1,
1178 uint32_t ref, uint32_t mask)
1179{
1180 amdgpu_ring_emit_wreg(ring, reg0, ref);
1181 /* wait for a cycle to reset vm_inv_eng*_ack */
1182 amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
1183 amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
1184}
1185
1176static int sdma_v5_0_early_init(void *handle) 1186static int sdma_v5_0_early_init(void *handle)
1177{ 1187{
1178 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1188 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1588,7 +1598,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
1588 6 + /* sdma_v5_0_ring_emit_pipeline_sync */ 1598 6 + /* sdma_v5_0_ring_emit_pipeline_sync */
1589 /* sdma_v5_0_ring_emit_vm_flush */ 1599 /* sdma_v5_0_ring_emit_vm_flush */
1590 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 1600 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1591 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + 1601 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
1592 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */ 1602 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
1593 .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */ 1603 .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
1594 .emit_ib = sdma_v5_0_ring_emit_ib, 1604 .emit_ib = sdma_v5_0_ring_emit_ib,
@@ -1602,6 +1612,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
1602 .pad_ib = sdma_v5_0_ring_pad_ib, 1612 .pad_ib = sdma_v5_0_ring_pad_ib,
1603 .emit_wreg = sdma_v5_0_ring_emit_wreg, 1613 .emit_wreg = sdma_v5_0_ring_emit_wreg,
1604 .emit_reg_wait = sdma_v5_0_ring_emit_reg_wait, 1614 .emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
1615 .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
1605 .init_cond_exec = sdma_v5_0_ring_init_cond_exec, 1616 .init_cond_exec = sdma_v5_0_ring_init_cond_exec,
1606 .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec, 1617 .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
1607 .preempt_ib = sdma_v5_0_ring_preempt_ib, 1618 .preempt_ib = sdma_v5_0_ring_preempt_ib,