diff options
author | monk.liu <monk.liu@amd.com> | 2015-09-23 01:49:58 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2015-09-23 17:23:45 -0400 |
commit | 5c3422b0b135b46c8dca9c1d909c1ae84f3561bd (patch) | |
tree | 98bb55830306ee8e9869dc5a474de3732ed6a86c /drivers | |
parent | 54ef0b5461c071050c61e501af5544842d61f40a (diff) |
drm/amdgpu: sync ce and me with SWITCH_BUFFER(2)
we used to adopt wait_reg_mem to let CE wait before DE finish page
updating, but from Tonga+, CE doesn't support wait_reg_mem package so
this logic no longer works.
so here is another approach to do same thing:
Insert two of SWITCH_BUFFER at both front and end of vm_flush can
guarantee that CE not go further to process IB_const before vm_flush
done.
Insert two of SWITCH_BUFFER also works on CI, so remove legency method
to sync CE and ME
v2:
Insert double SWITCH_BUFFER at front of vm flush as well.
Signed-off-by: monk.liu <monk.liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 55 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 58 |
3 files changed, 23 insertions, 92 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 57b427f958da..6647fb26ef25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h | |||
@@ -1202,8 +1202,6 @@ struct amdgpu_gfx { | |||
1202 | struct amdgpu_irq_src priv_inst_irq; | 1202 | struct amdgpu_irq_src priv_inst_irq; |
1203 | /* gfx status */ | 1203 | /* gfx status */ |
1204 | uint32_t gfx_current_status; | 1204 | uint32_t gfx_current_status; |
1205 | /* sync signal for const engine */ | ||
1206 | unsigned ce_sync_offs; | ||
1207 | /* ce ram size*/ | 1205 | /* ce ram size*/ |
1208 | unsigned ce_ram_size; | 1206 | unsigned ce_ram_size; |
1209 | }; | 1207 | }; |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 392ec10cc475..e992bf2ff66c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | |||
@@ -3610,41 +3610,6 @@ static int gfx_v7_0_cp_resume(struct amdgpu_device *adev) | |||
3610 | return 0; | 3610 | return 0; |
3611 | } | 3611 | } |
3612 | 3612 | ||
3613 | static void gfx_v7_0_ce_sync_me(struct amdgpu_ring *ring) | ||
3614 | { | ||
3615 | struct amdgpu_device *adev = ring->adev; | ||
3616 | u64 gpu_addr = adev->wb.gpu_addr + adev->gfx.ce_sync_offs * 4; | ||
3617 | |||
3618 | /* instruct DE to set a magic number */ | ||
3619 | amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); | ||
3620 | amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | | ||
3621 | WRITE_DATA_DST_SEL(5))); | ||
3622 | amdgpu_ring_write(ring, gpu_addr & 0xfffffffc); | ||
3623 | amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff); | ||
3624 | amdgpu_ring_write(ring, 1); | ||
3625 | |||
3626 | /* let CE wait till condition satisfied */ | ||
3627 | amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); | ||
3628 | amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ | ||
3629 | WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ | ||
3630 | WAIT_REG_MEM_FUNCTION(3) | /* == */ | ||
3631 | WAIT_REG_MEM_ENGINE(2))); /* ce */ | ||
3632 | amdgpu_ring_write(ring, gpu_addr & 0xfffffffc); | ||
3633 | amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff); | ||
3634 | amdgpu_ring_write(ring, 1); | ||
3635 | amdgpu_ring_write(ring, 0xffffffff); | ||
3636 | amdgpu_ring_write(ring, 4); /* poll interval */ | ||
3637 | |||
3638 | /* instruct CE to reset wb of ce_sync to zero */ | ||
3639 | amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); | ||
3640 | amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | | ||
3641 | WRITE_DATA_DST_SEL(5) | | ||
3642 | WR_CONFIRM)); | ||
3643 | amdgpu_ring_write(ring, gpu_addr & 0xfffffffc); | ||
3644 | amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff); | ||
3645 | amdgpu_ring_write(ring, 0); | ||
3646 | } | ||
3647 | |||
3648 | /* | 3613 | /* |
3649 | * vm | 3614 | * vm |
3650 | * VMID 0 is the physical GPU addresses as used by the kernel. | 3615 | * VMID 0 is the physical GPU addresses as used by the kernel. |
@@ -3663,6 +3628,13 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, | |||
3663 | unsigned vm_id, uint64_t pd_addr) | 3628 | unsigned vm_id, uint64_t pd_addr) |
3664 | { | 3629 | { |
3665 | int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); | 3630 | int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); |
3631 | if (usepfp) { | ||
3632 | /* synce CE with ME to prevent CE fetch CEIB before context switch done */ | ||
3633 | amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); | ||
3634 | amdgpu_ring_write(ring, 0); | ||
3635 | amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); | ||
3636 | amdgpu_ring_write(ring, 0); | ||
3637 | } | ||
3666 | 3638 | ||
3667 | amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); | 3639 | amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); |
3668 | amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | | 3640 | amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | |
@@ -3703,7 +3675,10 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, | |||
3703 | amdgpu_ring_write(ring, 0x0); | 3675 | amdgpu_ring_write(ring, 0x0); |
3704 | 3676 | ||
3705 | /* synce CE with ME to prevent CE fetch CEIB before context switch done */ | 3677 | /* synce CE with ME to prevent CE fetch CEIB before context switch done */ |
3706 | gfx_v7_0_ce_sync_me(ring); | 3678 | amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); |
3679 | amdgpu_ring_write(ring, 0); | ||
3680 | amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); | ||
3681 | amdgpu_ring_write(ring, 0); | ||
3707 | } | 3682 | } |
3708 | } | 3683 | } |
3709 | 3684 | ||
@@ -4805,12 +4780,6 @@ static int gfx_v7_0_sw_init(void *handle) | |||
4805 | return r; | 4780 | return r; |
4806 | } | 4781 | } |
4807 | 4782 | ||
4808 | r = amdgpu_wb_get(adev, &adev->gfx.ce_sync_offs); | ||
4809 | if (r) { | ||
4810 | DRM_ERROR("(%d) gfx.ce_sync_offs wb alloc failed\n", r); | ||
4811 | return r; | ||
4812 | } | ||
4813 | |||
4814 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) { | 4783 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) { |
4815 | ring = &adev->gfx.gfx_ring[i]; | 4784 | ring = &adev->gfx.gfx_ring[i]; |
4816 | ring->ring_obj = NULL; | 4785 | ring->ring_obj = NULL; |
@@ -4889,8 +4858,6 @@ static int gfx_v7_0_sw_fini(void *handle) | |||
4889 | for (i = 0; i < adev->gfx.num_compute_rings; i++) | 4858 | for (i = 0; i < adev->gfx.num_compute_rings; i++) |
4890 | amdgpu_ring_fini(&adev->gfx.compute_ring[i]); | 4859 | amdgpu_ring_fini(&adev->gfx.compute_ring[i]); |
4891 | 4860 | ||
4892 | amdgpu_wb_free(adev, adev->gfx.ce_sync_offs); | ||
4893 | |||
4894 | gfx_v7_0_cp_compute_fini(adev); | 4861 | gfx_v7_0_cp_compute_fini(adev); |
4895 | gfx_v7_0_rlc_fini(adev); | 4862 | gfx_v7_0_rlc_fini(adev); |
4896 | gfx_v7_0_mec_fini(adev); | 4863 | gfx_v7_0_mec_fini(adev); |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 78e5900d71cd..cb4f68f53f24 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | |||
@@ -940,12 +940,6 @@ static int gfx_v8_0_sw_init(void *handle) | |||
940 | return r; | 940 | return r; |
941 | } | 941 | } |
942 | 942 | ||
943 | r = amdgpu_wb_get(adev, &adev->gfx.ce_sync_offs); | ||
944 | if (r) { | ||
945 | DRM_ERROR("(%d) gfx.ce_sync_offs wb alloc failed\n", r); | ||
946 | return r; | ||
947 | } | ||
948 | |||
949 | /* set up the gfx ring */ | 943 | /* set up the gfx ring */ |
950 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) { | 944 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) { |
951 | ring = &adev->gfx.gfx_ring[i]; | 945 | ring = &adev->gfx.gfx_ring[i]; |
@@ -1033,8 +1027,6 @@ static int gfx_v8_0_sw_fini(void *handle) | |||
1033 | for (i = 0; i < adev->gfx.num_compute_rings; i++) | 1027 | for (i = 0; i < adev->gfx.num_compute_rings; i++) |
1034 | amdgpu_ring_fini(&adev->gfx.compute_ring[i]); | 1028 | amdgpu_ring_fini(&adev->gfx.compute_ring[i]); |
1035 | 1029 | ||
1036 | amdgpu_wb_free(adev, adev->gfx.ce_sync_offs); | ||
1037 | |||
1038 | gfx_v8_0_mec_fini(adev); | 1030 | gfx_v8_0_mec_fini(adev); |
1039 | 1031 | ||
1040 | return 0; | 1032 | return 0; |
@@ -4006,41 +3998,6 @@ static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring, | |||
4006 | return true; | 3998 | return true; |
4007 | } | 3999 | } |
4008 | 4000 | ||
4009 | static void gfx_v8_0_ce_sync_me(struct amdgpu_ring *ring) | ||
4010 | { | ||
4011 | struct amdgpu_device *adev = ring->adev; | ||
4012 | u64 gpu_addr = adev->wb.gpu_addr + adev->gfx.ce_sync_offs * 4; | ||
4013 | |||
4014 | /* instruct DE to set a magic number */ | ||
4015 | amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); | ||
4016 | amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | | ||
4017 | WRITE_DATA_DST_SEL(5))); | ||
4018 | amdgpu_ring_write(ring, gpu_addr & 0xfffffffc); | ||
4019 | amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff); | ||
4020 | amdgpu_ring_write(ring, 1); | ||
4021 | |||
4022 | /* let CE wait till condition satisfied */ | ||
4023 | amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); | ||
4024 | amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ | ||
4025 | WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ | ||
4026 | WAIT_REG_MEM_FUNCTION(3) | /* == */ | ||
4027 | WAIT_REG_MEM_ENGINE(2))); /* ce */ | ||
4028 | amdgpu_ring_write(ring, gpu_addr & 0xfffffffc); | ||
4029 | amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff); | ||
4030 | amdgpu_ring_write(ring, 1); | ||
4031 | amdgpu_ring_write(ring, 0xffffffff); | ||
4032 | amdgpu_ring_write(ring, 4); /* poll interval */ | ||
4033 | |||
4034 | /* instruct CE to reset wb of ce_sync to zero */ | ||
4035 | amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); | ||
4036 | amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | | ||
4037 | WRITE_DATA_DST_SEL(5) | | ||
4038 | WR_CONFIRM)); | ||
4039 | amdgpu_ring_write(ring, gpu_addr & 0xfffffffc); | ||
4040 | amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff); | ||
4041 | amdgpu_ring_write(ring, 0); | ||
4042 | } | ||
4043 | |||
4044 | static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, | 4001 | static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, |
4045 | unsigned vm_id, uint64_t pd_addr) | 4002 | unsigned vm_id, uint64_t pd_addr) |
4046 | { | 4003 | { |
@@ -4057,6 +4014,14 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, | |||
4057 | amdgpu_ring_write(ring, 0xffffffff); | 4014 | amdgpu_ring_write(ring, 0xffffffff); |
4058 | amdgpu_ring_write(ring, 4); /* poll interval */ | 4015 | amdgpu_ring_write(ring, 4); /* poll interval */ |
4059 | 4016 | ||
4017 | if (usepfp) { | ||
4018 | /* synce CE with ME to prevent CE fetch CEIB before context switch done */ | ||
4019 | amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); | ||
4020 | amdgpu_ring_write(ring, 0); | ||
4021 | amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); | ||
4022 | amdgpu_ring_write(ring, 0); | ||
4023 | } | ||
4024 | |||
4060 | amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); | 4025 | amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); |
4061 | amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | | 4026 | amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | |
4062 | WRITE_DATA_DST_SEL(0)) | | 4027 | WRITE_DATA_DST_SEL(0)) | |
@@ -4096,9 +4061,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, | |||
4096 | /* sync PFP to ME, otherwise we might get invalid PFP reads */ | 4061 | /* sync PFP to ME, otherwise we might get invalid PFP reads */ |
4097 | amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); | 4062 | amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); |
4098 | amdgpu_ring_write(ring, 0x0); | 4063 | amdgpu_ring_write(ring, 0x0); |
4099 | 4064 | amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); | |
4100 | /* synce CE with ME to prevent CE fetch CEIB before context switch done */ | 4065 | amdgpu_ring_write(ring, 0); |
4101 | gfx_v8_0_ce_sync_me(ring); | 4066 | amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); |
4067 | amdgpu_ring_write(ring, 0); | ||
4102 | } | 4068 | } |
4103 | } | 4069 | } |
4104 | 4070 | ||