aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authormonk.liu <monk.liu@amd.com>2015-09-23 01:49:58 -0400
committerAlex Deucher <alexander.deucher@amd.com>2015-09-23 17:23:45 -0400
commit5c3422b0b135b46c8dca9c1d909c1ae84f3561bd (patch)
tree98bb55830306ee8e9869dc5a474de3732ed6a86c /drivers
parent54ef0b5461c071050c61e501af5544842d61f40a (diff)
drm/amdgpu: sync ce and me with SWITCH_BUFFER(2)
we used to adopt wait_reg_mem to let CE wait before DE finish page updating, but from Tonga+, CE doesn't support wait_reg_mem package so this logic no longer works. so here is another approach to do same thing: Insert two of SWITCH_BUFFER at both front and end of vm_flush can guarantee that CE not go further to process IB_const before vm_flush done. Insert two of SWITCH_BUFFER also works on CI, so remove legency method to sync CE and ME v2: Insert double SWITCH_BUFFER at front of vm flush as well. Signed-off-by: monk.liu <monk.liu@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c55
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c58
3 files changed, 23 insertions, 92 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 57b427f958da..6647fb26ef25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1202,8 +1202,6 @@ struct amdgpu_gfx {
1202 struct amdgpu_irq_src priv_inst_irq; 1202 struct amdgpu_irq_src priv_inst_irq;
1203 /* gfx status */ 1203 /* gfx status */
1204 uint32_t gfx_current_status; 1204 uint32_t gfx_current_status;
1205 /* sync signal for const engine */
1206 unsigned ce_sync_offs;
1207 /* ce ram size*/ 1205 /* ce ram size*/
1208 unsigned ce_ram_size; 1206 unsigned ce_ram_size;
1209}; 1207};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 392ec10cc475..e992bf2ff66c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -3610,41 +3610,6 @@ static int gfx_v7_0_cp_resume(struct amdgpu_device *adev)
3610 return 0; 3610 return 0;
3611} 3611}
3612 3612
3613static void gfx_v7_0_ce_sync_me(struct amdgpu_ring *ring)
3614{
3615 struct amdgpu_device *adev = ring->adev;
3616 u64 gpu_addr = adev->wb.gpu_addr + adev->gfx.ce_sync_offs * 4;
3617
3618 /* instruct DE to set a magic number */
3619 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3620 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3621 WRITE_DATA_DST_SEL(5)));
3622 amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
3623 amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
3624 amdgpu_ring_write(ring, 1);
3625
3626 /* let CE wait till condition satisfied */
3627 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3628 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
3629 WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
3630 WAIT_REG_MEM_FUNCTION(3) | /* == */
3631 WAIT_REG_MEM_ENGINE(2))); /* ce */
3632 amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
3633 amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
3634 amdgpu_ring_write(ring, 1);
3635 amdgpu_ring_write(ring, 0xffffffff);
3636 amdgpu_ring_write(ring, 4); /* poll interval */
3637
3638 /* instruct CE to reset wb of ce_sync to zero */
3639 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3640 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
3641 WRITE_DATA_DST_SEL(5) |
3642 WR_CONFIRM));
3643 amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
3644 amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
3645 amdgpu_ring_write(ring, 0);
3646}
3647
3648/* 3613/*
3649 * vm 3614 * vm
3650 * VMID 0 is the physical GPU addresses as used by the kernel. 3615 * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -3663,6 +3628,13 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
3663 unsigned vm_id, uint64_t pd_addr) 3628 unsigned vm_id, uint64_t pd_addr)
3664{ 3629{
3665 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); 3630 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
3631 if (usepfp) {
3632 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
3633 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3634 amdgpu_ring_write(ring, 0);
3635 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3636 amdgpu_ring_write(ring, 0);
3637 }
3666 3638
3667 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3639 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3668 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 3640 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
@@ -3703,7 +3675,10 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
3703 amdgpu_ring_write(ring, 0x0); 3675 amdgpu_ring_write(ring, 0x0);
3704 3676
3705 /* synce CE with ME to prevent CE fetch CEIB before context switch done */ 3677 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
3706 gfx_v7_0_ce_sync_me(ring); 3678 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3679 amdgpu_ring_write(ring, 0);
3680 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3681 amdgpu_ring_write(ring, 0);
3707 } 3682 }
3708} 3683}
3709 3684
@@ -4805,12 +4780,6 @@ static int gfx_v7_0_sw_init(void *handle)
4805 return r; 4780 return r;
4806 } 4781 }
4807 4782
4808 r = amdgpu_wb_get(adev, &adev->gfx.ce_sync_offs);
4809 if (r) {
4810 DRM_ERROR("(%d) gfx.ce_sync_offs wb alloc failed\n", r);
4811 return r;
4812 }
4813
4814 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4783 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4815 ring = &adev->gfx.gfx_ring[i]; 4784 ring = &adev->gfx.gfx_ring[i];
4816 ring->ring_obj = NULL; 4785 ring->ring_obj = NULL;
@@ -4889,8 +4858,6 @@ static int gfx_v7_0_sw_fini(void *handle)
4889 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4858 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4890 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 4859 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
4891 4860
4892 amdgpu_wb_free(adev, adev->gfx.ce_sync_offs);
4893
4894 gfx_v7_0_cp_compute_fini(adev); 4861 gfx_v7_0_cp_compute_fini(adev);
4895 gfx_v7_0_rlc_fini(adev); 4862 gfx_v7_0_rlc_fini(adev);
4896 gfx_v7_0_mec_fini(adev); 4863 gfx_v7_0_mec_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 78e5900d71cd..cb4f68f53f24 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -940,12 +940,6 @@ static int gfx_v8_0_sw_init(void *handle)
940 return r; 940 return r;
941 } 941 }
942 942
943 r = amdgpu_wb_get(adev, &adev->gfx.ce_sync_offs);
944 if (r) {
945 DRM_ERROR("(%d) gfx.ce_sync_offs wb alloc failed\n", r);
946 return r;
947 }
948
949 /* set up the gfx ring */ 943 /* set up the gfx ring */
950 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 944 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
951 ring = &adev->gfx.gfx_ring[i]; 945 ring = &adev->gfx.gfx_ring[i];
@@ -1033,8 +1027,6 @@ static int gfx_v8_0_sw_fini(void *handle)
1033 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1027 for (i = 0; i < adev->gfx.num_compute_rings; i++)
1034 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1028 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1035 1029
1036 amdgpu_wb_free(adev, adev->gfx.ce_sync_offs);
1037
1038 gfx_v8_0_mec_fini(adev); 1030 gfx_v8_0_mec_fini(adev);
1039 1031
1040 return 0; 1032 return 0;
@@ -4006,41 +3998,6 @@ static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring,
4006 return true; 3998 return true;
4007} 3999}
4008 4000
4009static void gfx_v8_0_ce_sync_me(struct amdgpu_ring *ring)
4010{
4011 struct amdgpu_device *adev = ring->adev;
4012 u64 gpu_addr = adev->wb.gpu_addr + adev->gfx.ce_sync_offs * 4;
4013
4014 /* instruct DE to set a magic number */
4015 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4016 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4017 WRITE_DATA_DST_SEL(5)));
4018 amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
4019 amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
4020 amdgpu_ring_write(ring, 1);
4021
4022 /* let CE wait till condition satisfied */
4023 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4024 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
4025 WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
4026 WAIT_REG_MEM_FUNCTION(3) | /* == */
4027 WAIT_REG_MEM_ENGINE(2))); /* ce */
4028 amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
4029 amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
4030 amdgpu_ring_write(ring, 1);
4031 amdgpu_ring_write(ring, 0xffffffff);
4032 amdgpu_ring_write(ring, 4); /* poll interval */
4033
4034 /* instruct CE to reset wb of ce_sync to zero */
4035 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4036 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
4037 WRITE_DATA_DST_SEL(5) |
4038 WR_CONFIRM));
4039 amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
4040 amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
4041 amdgpu_ring_write(ring, 0);
4042}
4043
4044static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 4001static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4045 unsigned vm_id, uint64_t pd_addr) 4002 unsigned vm_id, uint64_t pd_addr)
4046{ 4003{
@@ -4057,6 +4014,14 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4057 amdgpu_ring_write(ring, 0xffffffff); 4014 amdgpu_ring_write(ring, 0xffffffff);
4058 amdgpu_ring_write(ring, 4); /* poll interval */ 4015 amdgpu_ring_write(ring, 4); /* poll interval */
4059 4016
4017 if (usepfp) {
4018 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
4019 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4020 amdgpu_ring_write(ring, 0);
4021 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4022 amdgpu_ring_write(ring, 0);
4023 }
4024
4060 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4025 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4061 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 4026 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
4062 WRITE_DATA_DST_SEL(0)) | 4027 WRITE_DATA_DST_SEL(0)) |
@@ -4096,9 +4061,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4096 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 4061 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4097 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 4062 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4098 amdgpu_ring_write(ring, 0x0); 4063 amdgpu_ring_write(ring, 0x0);
4099 4064 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4100 /* synce CE with ME to prevent CE fetch CEIB before context switch done */ 4065 amdgpu_ring_write(ring, 0);
4101 gfx_v8_0_ce_sync_me(ring); 4066 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4067 amdgpu_ring_write(ring, 0);
4102 } 4068 }
4103} 4069}
4104 4070