diff options
author | Michel Dänzer <michel.daenzer@amd.com> | 2014-07-31 05:43:49 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2014-08-05 08:53:45 -0400 |
commit | 72a9987edcedb89db988079a03c9b9c65b6ec9ac (patch) | |
tree | 80fb28570b7cf060188bdad013f0aa58b4a25fb0 | |
parent | 124764f17473479061942429ada2e5e786d5d6ed (diff) |
drm/radeon: Always flush the HDP cache before submitting a CS to the GPU
This ensures the GPU sees all previous CPU writes to VRAM, which makes it
safe:
* For userspace to stream data from CPU to GPU via VRAM instead of GTT
* For IBs to be stored in VRAM instead of GTT
* For ring buffers to be stored in VRAM instead of GTT, if the HPD flush
is performed via MMIO
Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r-- | drivers/gpu/drm/radeon/cik.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/r100.c | 20 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_asic.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_asic.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_drv.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_ring.c | 10 |
7 files changed, 35 insertions, 13 deletions
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 60c276538ba9..afdfe04a9290 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c | |||
@@ -3890,8 +3890,6 @@ void cik_fence_gfx_ring_emit(struct radeon_device *rdev, | |||
3890 | radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2)); | 3890 | radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2)); |
3891 | radeon_ring_write(ring, fence->seq); | 3891 | radeon_ring_write(ring, fence->seq); |
3892 | radeon_ring_write(ring, 0); | 3892 | radeon_ring_write(ring, 0); |
3893 | /* HDP flush */ | ||
3894 | cik_hdp_flush_cp_ring_emit(rdev, fence->ring); | ||
3895 | } | 3893 | } |
3896 | 3894 | ||
3897 | /** | 3895 | /** |
@@ -3920,8 +3918,6 @@ void cik_fence_compute_ring_emit(struct radeon_device *rdev, | |||
3920 | radeon_ring_write(ring, upper_32_bits(addr)); | 3918 | radeon_ring_write(ring, upper_32_bits(addr)); |
3921 | radeon_ring_write(ring, fence->seq); | 3919 | radeon_ring_write(ring, fence->seq); |
3922 | radeon_ring_write(ring, 0); | 3920 | radeon_ring_write(ring, 0); |
3923 | /* HDP flush */ | ||
3924 | cik_hdp_flush_cp_ring_emit(rdev, fence->ring); | ||
3925 | } | 3921 | } |
3926 | 3922 | ||
3927 | bool cik_semaphore_ring_emit(struct radeon_device *rdev, | 3923 | bool cik_semaphore_ring_emit(struct radeon_device *rdev, |
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 5fd242795178..04b5940b8923 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c | |||
@@ -837,11 +837,7 @@ void r100_fence_ring_emit(struct radeon_device *rdev, | |||
837 | /* Wait until IDLE & CLEAN */ | 837 | /* Wait until IDLE & CLEAN */ |
838 | radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); | 838 | radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); |
839 | radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); | 839 | radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); |
840 | radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); | 840 | r100_ring_hdp_flush(rdev, ring); |
841 | radeon_ring_write(ring, rdev->config.r100.hdp_cntl | | ||
842 | RADEON_HDP_READ_BUFFER_INVALIDATE); | ||
843 | radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); | ||
844 | radeon_ring_write(ring, rdev->config.r100.hdp_cntl); | ||
845 | /* Emit fence sequence & fire IRQ */ | 841 | /* Emit fence sequence & fire IRQ */ |
846 | radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0)); | 842 | radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0)); |
847 | radeon_ring_write(ring, fence->seq); | 843 | radeon_ring_write(ring, fence->seq); |
@@ -1060,6 +1056,20 @@ void r100_gfx_set_wptr(struct radeon_device *rdev, | |||
1060 | (void)RREG32(RADEON_CP_RB_WPTR); | 1056 | (void)RREG32(RADEON_CP_RB_WPTR); |
1061 | } | 1057 | } |
1062 | 1058 | ||
1059 | /** | ||
1060 | * r100_ring_hdp_flush - flush Host Data Path via the ring buffer | ||
1061 | * rdev: radeon device structure | ||
1062 | * ring: ring buffer struct for emitting packets | ||
1063 | */ | ||
1064 | void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring) | ||
1065 | { | ||
1066 | radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); | ||
1067 | radeon_ring_write(ring, rdev->config.r100.hdp_cntl | | ||
1068 | RADEON_HDP_READ_BUFFER_INVALIDATE); | ||
1069 | radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); | ||
1070 | radeon_ring_write(ring, rdev->config.r100.hdp_cntl); | ||
1071 | } | ||
1072 | |||
1063 | static void r100_cp_load_microcode(struct radeon_device *rdev) | 1073 | static void r100_cp_load_microcode(struct radeon_device *rdev) |
1064 | { | 1074 | { |
1065 | const __be32 *fw_data; | 1075 | const __be32 *fw_data; |
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index bcc98c1c135e..29731584b6ae 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h | |||
@@ -1749,6 +1749,7 @@ struct radeon_asic_ring { | |||
1749 | /* command emmit functions */ | 1749 | /* command emmit functions */ |
1750 | void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib); | 1750 | void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib); |
1751 | void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence); | 1751 | void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence); |
1752 | void (*hdp_flush)(struct radeon_device *rdev, struct radeon_ring *ring); | ||
1752 | bool (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp, | 1753 | bool (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp, |
1753 | struct radeon_semaphore *semaphore, bool emit_wait); | 1754 | struct radeon_semaphore *semaphore, bool emit_wait); |
1754 | void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); | 1755 | void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); |
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 5781fde5c1ce..c49a01f92b4d 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c | |||
@@ -185,6 +185,7 @@ static struct radeon_asic_ring r100_gfx_ring = { | |||
185 | .get_rptr = &r100_gfx_get_rptr, | 185 | .get_rptr = &r100_gfx_get_rptr, |
186 | .get_wptr = &r100_gfx_get_wptr, | 186 | .get_wptr = &r100_gfx_get_wptr, |
187 | .set_wptr = &r100_gfx_set_wptr, | 187 | .set_wptr = &r100_gfx_set_wptr, |
188 | .hdp_flush = &r100_ring_hdp_flush, | ||
188 | }; | 189 | }; |
189 | 190 | ||
190 | static struct radeon_asic r100_asic = { | 191 | static struct radeon_asic r100_asic = { |
@@ -331,6 +332,7 @@ static struct radeon_asic_ring r300_gfx_ring = { | |||
331 | .get_rptr = &r100_gfx_get_rptr, | 332 | .get_rptr = &r100_gfx_get_rptr, |
332 | .get_wptr = &r100_gfx_get_wptr, | 333 | .get_wptr = &r100_gfx_get_wptr, |
333 | .set_wptr = &r100_gfx_set_wptr, | 334 | .set_wptr = &r100_gfx_set_wptr, |
335 | .hdp_flush = &r100_ring_hdp_flush, | ||
334 | }; | 336 | }; |
335 | 337 | ||
336 | static struct radeon_asic r300_asic = { | 338 | static struct radeon_asic r300_asic = { |
@@ -1987,7 +1989,7 @@ static struct radeon_asic ci_asic = { | |||
1987 | .resume = &cik_resume, | 1989 | .resume = &cik_resume, |
1988 | .asic_reset = &cik_asic_reset, | 1990 | .asic_reset = &cik_asic_reset, |
1989 | .vga_set_state = &r600_vga_set_state, | 1991 | .vga_set_state = &r600_vga_set_state, |
1990 | .mmio_hdp_flush = NULL, | 1992 | .mmio_hdp_flush = &r600_mmio_hdp_flush, |
1991 | .gui_idle = &r600_gui_idle, | 1993 | .gui_idle = &r600_gui_idle, |
1992 | .mc_wait_for_idle = &evergreen_mc_wait_for_idle, | 1994 | .mc_wait_for_idle = &evergreen_mc_wait_for_idle, |
1993 | .get_xclk = &cik_get_xclk, | 1995 | .get_xclk = &cik_get_xclk, |
@@ -2091,7 +2093,7 @@ static struct radeon_asic kv_asic = { | |||
2091 | .resume = &cik_resume, | 2093 | .resume = &cik_resume, |
2092 | .asic_reset = &cik_asic_reset, | 2094 | .asic_reset = &cik_asic_reset, |
2093 | .vga_set_state = &r600_vga_set_state, | 2095 | .vga_set_state = &r600_vga_set_state, |
2094 | .mmio_hdp_flush = NULL, | 2096 | .mmio_hdp_flush = &r600_mmio_hdp_flush, |
2095 | .gui_idle = &r600_gui_idle, | 2097 | .gui_idle = &r600_gui_idle, |
2096 | .mc_wait_for_idle = &evergreen_mc_wait_for_idle, | 2098 | .mc_wait_for_idle = &evergreen_mc_wait_for_idle, |
2097 | .get_xclk = &cik_get_xclk, | 2099 | .get_xclk = &cik_get_xclk, |
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index b8826c655685..3cf6be6666fc 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h | |||
@@ -148,7 +148,8 @@ u32 r100_gfx_get_wptr(struct radeon_device *rdev, | |||
148 | struct radeon_ring *ring); | 148 | struct radeon_ring *ring); |
149 | void r100_gfx_set_wptr(struct radeon_device *rdev, | 149 | void r100_gfx_set_wptr(struct radeon_device *rdev, |
150 | struct radeon_ring *ring); | 150 | struct radeon_ring *ring); |
151 | 151 | void r100_ring_hdp_flush(struct radeon_device *rdev, | |
152 | struct radeon_ring *ring); | ||
152 | /* | 153 | /* |
153 | * r200,rv250,rs300,rv280 | 154 | * r200,rv250,rs300,rv280 |
154 | */ | 155 | */ |
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index e9e361084249..54aa293a8175 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c | |||
@@ -82,9 +82,11 @@ | |||
82 | * 2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN), | 82 | * 2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN), |
83 | * CIK: 1D and linear tiling modes contain valid PIPE_CONFIG | 83 | * CIK: 1D and linear tiling modes contain valid PIPE_CONFIG |
84 | * 2.39.0 - Add INFO query for number of active CUs | 84 | * 2.39.0 - Add INFO query for number of active CUs |
85 | * 2.40.0 - Add RADEON_GEM_GTT_WC/UC, flush HDP cache before submitting | ||
86 | * CS to GPU | ||
85 | */ | 87 | */ |
86 | #define KMS_DRIVER_MAJOR 2 | 88 | #define KMS_DRIVER_MAJOR 2 |
87 | #define KMS_DRIVER_MINOR 39 | 89 | #define KMS_DRIVER_MINOR 40 |
88 | #define KMS_DRIVER_PATCHLEVEL 0 | 90 | #define KMS_DRIVER_PATCHLEVEL 0 |
89 | int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); | 91 | int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); |
90 | int radeon_driver_unload_kms(struct drm_device *dev); | 92 | int radeon_driver_unload_kms(struct drm_device *dev); |
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 9c86ac947275..5b4e0cf231a0 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c | |||
@@ -183,11 +183,21 @@ int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *ring, unsig | |||
183 | */ | 183 | */ |
184 | void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring) | 184 | void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring) |
185 | { | 185 | { |
186 | /* If we are emitting the HDP flush via the ring buffer, we need to | ||
187 | * do it before padding. | ||
188 | */ | ||
189 | if (rdev->asic->ring[ring->idx]->hdp_flush) | ||
190 | rdev->asic->ring[ring->idx]->hdp_flush(rdev, ring); | ||
186 | /* We pad to match fetch size */ | 191 | /* We pad to match fetch size */ |
187 | while (ring->wptr & ring->align_mask) { | 192 | while (ring->wptr & ring->align_mask) { |
188 | radeon_ring_write(ring, ring->nop); | 193 | radeon_ring_write(ring, ring->nop); |
189 | } | 194 | } |
190 | mb(); | 195 | mb(); |
196 | /* If we are emitting the HDP flush via MMIO, we need to do it after | ||
197 | * all CPU writes to VRAM finished. | ||
198 | */ | ||
199 | if (rdev->asic->mmio_hdp_flush) | ||
200 | rdev->asic->mmio_hdp_flush(rdev); | ||
191 | radeon_ring_set_wptr(rdev, ring); | 201 | radeon_ring_set_wptr(rdev, ring); |
192 | } | 202 | } |
193 | 203 | ||