aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichel Dänzer <michel.daenzer@amd.com>2014-07-31 05:43:49 -0400
committerAlex Deucher <alexander.deucher@amd.com>2014-08-05 08:53:45 -0400
commit72a9987edcedb89db988079a03c9b9c65b6ec9ac (patch)
tree80fb28570b7cf060188bdad013f0aa58b4a25fb0
parent124764f17473479061942429ada2e5e786d5d6ed (diff)
drm/radeon: Always flush the HDP cache before submitting a CS to the GPU
This ensures the GPU sees all previous CPU writes to VRAM, which makes it safe: * For userspace to stream data from CPU to GPU via VRAM instead of GTT * For IBs to be stored in VRAM instead of GTT * For ring buffers to be stored in VRAM instead of GTT, if the HPD flush is performed via MMIO Signed-off-by: Michel Dänzer <michel.daenzer@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/radeon/cik.c4
-rw-r--r--drivers/gpu/drm/radeon/r100.c20
-rw-r--r--drivers/gpu/drm/radeon/radeon.h1
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.c6
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.h3
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_ring.c10
7 files changed, 35 insertions, 13 deletions
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 60c276538ba9..afdfe04a9290 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -3890,8 +3890,6 @@ void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3890 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2)); 3890 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3891 radeon_ring_write(ring, fence->seq); 3891 radeon_ring_write(ring, fence->seq);
3892 radeon_ring_write(ring, 0); 3892 radeon_ring_write(ring, 0);
3893 /* HDP flush */
3894 cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3895} 3893}
3896 3894
3897/** 3895/**
@@ -3920,8 +3918,6 @@ void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3920 radeon_ring_write(ring, upper_32_bits(addr)); 3918 radeon_ring_write(ring, upper_32_bits(addr));
3921 radeon_ring_write(ring, fence->seq); 3919 radeon_ring_write(ring, fence->seq);
3922 radeon_ring_write(ring, 0); 3920 radeon_ring_write(ring, 0);
3923 /* HDP flush */
3924 cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3925} 3921}
3926 3922
3927bool cik_semaphore_ring_emit(struct radeon_device *rdev, 3923bool cik_semaphore_ring_emit(struct radeon_device *rdev,
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 5fd242795178..04b5940b8923 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -837,11 +837,7 @@ void r100_fence_ring_emit(struct radeon_device *rdev,
837 /* Wait until IDLE & CLEAN */ 837 /* Wait until IDLE & CLEAN */
838 radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); 838 radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
839 radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); 839 radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
840 radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); 840 r100_ring_hdp_flush(rdev, ring);
841 radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
842 RADEON_HDP_READ_BUFFER_INVALIDATE);
843 radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
844 radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
845 /* Emit fence sequence & fire IRQ */ 841 /* Emit fence sequence & fire IRQ */
846 radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0)); 842 radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0));
847 radeon_ring_write(ring, fence->seq); 843 radeon_ring_write(ring, fence->seq);
@@ -1060,6 +1056,20 @@ void r100_gfx_set_wptr(struct radeon_device *rdev,
1060 (void)RREG32(RADEON_CP_RB_WPTR); 1056 (void)RREG32(RADEON_CP_RB_WPTR);
1061} 1057}
1062 1058
1059/**
1060 * r100_ring_hdp_flush - flush Host Data Path via the ring buffer
1061 * rdev: radeon device structure
1062 * ring: ring buffer struct for emitting packets
1063 */
1064void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring)
1065{
1066 radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
1067 radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
1068 RADEON_HDP_READ_BUFFER_INVALIDATE);
1069 radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
1070 radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
1071}
1072
1063static void r100_cp_load_microcode(struct radeon_device *rdev) 1073static void r100_cp_load_microcode(struct radeon_device *rdev)
1064{ 1074{
1065 const __be32 *fw_data; 1075 const __be32 *fw_data;
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index bcc98c1c135e..29731584b6ae 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1749,6 +1749,7 @@ struct radeon_asic_ring {
1749 /* command emmit functions */ 1749 /* command emmit functions */
1750 void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib); 1750 void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib);
1751 void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence); 1751 void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence);
1752 void (*hdp_flush)(struct radeon_device *rdev, struct radeon_ring *ring);
1752 bool (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp, 1753 bool (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp,
1753 struct radeon_semaphore *semaphore, bool emit_wait); 1754 struct radeon_semaphore *semaphore, bool emit_wait);
1754 void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); 1755 void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index 5781fde5c1ce..c49a01f92b4d 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -185,6 +185,7 @@ static struct radeon_asic_ring r100_gfx_ring = {
185 .get_rptr = &r100_gfx_get_rptr, 185 .get_rptr = &r100_gfx_get_rptr,
186 .get_wptr = &r100_gfx_get_wptr, 186 .get_wptr = &r100_gfx_get_wptr,
187 .set_wptr = &r100_gfx_set_wptr, 187 .set_wptr = &r100_gfx_set_wptr,
188 .hdp_flush = &r100_ring_hdp_flush,
188}; 189};
189 190
190static struct radeon_asic r100_asic = { 191static struct radeon_asic r100_asic = {
@@ -331,6 +332,7 @@ static struct radeon_asic_ring r300_gfx_ring = {
331 .get_rptr = &r100_gfx_get_rptr, 332 .get_rptr = &r100_gfx_get_rptr,
332 .get_wptr = &r100_gfx_get_wptr, 333 .get_wptr = &r100_gfx_get_wptr,
333 .set_wptr = &r100_gfx_set_wptr, 334 .set_wptr = &r100_gfx_set_wptr,
335 .hdp_flush = &r100_ring_hdp_flush,
334}; 336};
335 337
336static struct radeon_asic r300_asic = { 338static struct radeon_asic r300_asic = {
@@ -1987,7 +1989,7 @@ static struct radeon_asic ci_asic = {
1987 .resume = &cik_resume, 1989 .resume = &cik_resume,
1988 .asic_reset = &cik_asic_reset, 1990 .asic_reset = &cik_asic_reset,
1989 .vga_set_state = &r600_vga_set_state, 1991 .vga_set_state = &r600_vga_set_state,
1990 .mmio_hdp_flush = NULL, 1992 .mmio_hdp_flush = &r600_mmio_hdp_flush,
1991 .gui_idle = &r600_gui_idle, 1993 .gui_idle = &r600_gui_idle,
1992 .mc_wait_for_idle = &evergreen_mc_wait_for_idle, 1994 .mc_wait_for_idle = &evergreen_mc_wait_for_idle,
1993 .get_xclk = &cik_get_xclk, 1995 .get_xclk = &cik_get_xclk,
@@ -2091,7 +2093,7 @@ static struct radeon_asic kv_asic = {
2091 .resume = &cik_resume, 2093 .resume = &cik_resume,
2092 .asic_reset = &cik_asic_reset, 2094 .asic_reset = &cik_asic_reset,
2093 .vga_set_state = &r600_vga_set_state, 2095 .vga_set_state = &r600_vga_set_state,
2094 .mmio_hdp_flush = NULL, 2096 .mmio_hdp_flush = &r600_mmio_hdp_flush,
2095 .gui_idle = &r600_gui_idle, 2097 .gui_idle = &r600_gui_idle,
2096 .mc_wait_for_idle = &evergreen_mc_wait_for_idle, 2098 .mc_wait_for_idle = &evergreen_mc_wait_for_idle,
2097 .get_xclk = &cik_get_xclk, 2099 .get_xclk = &cik_get_xclk,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index b8826c655685..3cf6be6666fc 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -148,7 +148,8 @@ u32 r100_gfx_get_wptr(struct radeon_device *rdev,
148 struct radeon_ring *ring); 148 struct radeon_ring *ring);
149void r100_gfx_set_wptr(struct radeon_device *rdev, 149void r100_gfx_set_wptr(struct radeon_device *rdev,
150 struct radeon_ring *ring); 150 struct radeon_ring *ring);
151 151void r100_ring_hdp_flush(struct radeon_device *rdev,
152 struct radeon_ring *ring);
152/* 153/*
153 * r200,rv250,rs300,rv280 154 * r200,rv250,rs300,rv280
154 */ 155 */
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index e9e361084249..54aa293a8175 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -82,9 +82,11 @@
82 * 2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN), 82 * 2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN),
83 * CIK: 1D and linear tiling modes contain valid PIPE_CONFIG 83 * CIK: 1D and linear tiling modes contain valid PIPE_CONFIG
84 * 2.39.0 - Add INFO query for number of active CUs 84 * 2.39.0 - Add INFO query for number of active CUs
85 * 2.40.0 - Add RADEON_GEM_GTT_WC/UC, flush HDP cache before submitting
86 * CS to GPU
85 */ 87 */
86#define KMS_DRIVER_MAJOR 2 88#define KMS_DRIVER_MAJOR 2
87#define KMS_DRIVER_MINOR 39 89#define KMS_DRIVER_MINOR 40
88#define KMS_DRIVER_PATCHLEVEL 0 90#define KMS_DRIVER_PATCHLEVEL 0
89int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); 91int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
90int radeon_driver_unload_kms(struct drm_device *dev); 92int radeon_driver_unload_kms(struct drm_device *dev);
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 9c86ac947275..5b4e0cf231a0 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -183,11 +183,21 @@ int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *ring, unsig
183 */ 183 */
184void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring) 184void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring)
185{ 185{
186 /* If we are emitting the HDP flush via the ring buffer, we need to
187 * do it before padding.
188 */
189 if (rdev->asic->ring[ring->idx]->hdp_flush)
190 rdev->asic->ring[ring->idx]->hdp_flush(rdev, ring);
186 /* We pad to match fetch size */ 191 /* We pad to match fetch size */
187 while (ring->wptr & ring->align_mask) { 192 while (ring->wptr & ring->align_mask) {
188 radeon_ring_write(ring, ring->nop); 193 radeon_ring_write(ring, ring->nop);
189 } 194 }
190 mb(); 195 mb();
196 /* If we are emitting the HDP flush via MMIO, we need to do it after
197 * all CPU writes to VRAM finished.
198 */
199 if (rdev->asic->mmio_hdp_flush)
200 rdev->asic->mmio_hdp_flush(rdev);
191 radeon_ring_set_wptr(rdev, ring); 201 radeon_ring_set_wptr(rdev, ring);
192} 202}
193 203