aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2018-04-03 13:05:03 -0400
committerAlex Deucher <alexander.deucher@amd.com>2018-05-15 14:43:32 -0400
commitd240cd9eddd943dbe0267d081697195ff1e90b65 (patch)
treeff9648c93738e223b8da42489f5d64aa8caa9895
parent3f188453faf7ba5b59e8064df4afffbc946e25ec (diff)
drm/amdgpu: optionally do a writeback but don't invalidate TC for IB fences
There is a new IB flag that enables this new behavior. Full invalidation is unnecessary for RELEASE_MEM and doesn't make sense when draw calls from two adjacent gfx IBs run in parallel. This will be the new default for Mesa. v2: bump the version Signed-off-by: Marek Olšák <marek.olsak@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15d.h1
-rw-r--r--include/uapi/drm/amdgpu_drm.h4
8 files changed, 27 insertions, 11 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 5c0567ad1ba7..7c17a0bc2cd2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -75,9 +75,10 @@
75 * - 3.23.0 - Add query for VRAM lost counter 75 * - 3.23.0 - Add query for VRAM lost counter
76 * - 3.24.0 - Add high priority compute support for gfx9 76 * - 3.24.0 - Add high priority compute support for gfx9
77 * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). 77 * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
78 * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
78 */ 79 */
79#define KMS_DRIVER_MAJOR 3 80#define KMS_DRIVER_MAJOR 3
80#define KMS_DRIVER_MINOR 25 81#define KMS_DRIVER_MINOR 26
81#define KMS_DRIVER_PATCHLEVEL 0 82#define KMS_DRIVER_PATCHLEVEL 0
82 83
83int amdgpu_vram_limit = 0; 84int amdgpu_vram_limit = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 97449e06a242..d09fcab2398f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -131,7 +131,8 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
131 * Emits a fence command on the requested ring (all asics). 131 * Emits a fence command on the requested ring (all asics).
132 * Returns 0 on success, -ENOMEM on failure. 132 * Returns 0 on success, -ENOMEM on failure.
133 */ 133 */
134int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f) 134int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
135 unsigned flags)
135{ 136{
136 struct amdgpu_device *adev = ring->adev; 137 struct amdgpu_device *adev = ring->adev;
137 struct amdgpu_fence *fence; 138 struct amdgpu_fence *fence;
@@ -149,7 +150,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f)
149 adev->fence_context + ring->idx, 150 adev->fence_context + ring->idx,
150 seq); 151 seq);
151 amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, 152 amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
152 seq, AMDGPU_FENCE_FLAG_INT); 153 seq, flags | AMDGPU_FENCE_FLAG_INT);
153 154
154 ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; 155 ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
155 /* This function can't be called concurrently anyway, otherwise 156 /* This function can't be called concurrently anyway, otherwise
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 311589e02d17..f70eeed9ed76 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -127,6 +127,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
127 struct amdgpu_vm *vm; 127 struct amdgpu_vm *vm;
128 uint64_t fence_ctx; 128 uint64_t fence_ctx;
129 uint32_t status = 0, alloc_size; 129 uint32_t status = 0, alloc_size;
130 unsigned fence_flags = 0;
130 131
131 unsigned i; 132 unsigned i;
132 int r = 0; 133 int r = 0;
@@ -227,7 +228,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
227#endif 228#endif
228 amdgpu_asic_invalidate_hdp(adev, ring); 229 amdgpu_asic_invalidate_hdp(adev, ring);
229 230
230 r = amdgpu_fence_emit(ring, f); 231 if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE)
232 fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY;
233
234 r = amdgpu_fence_emit(ring, f, fence_flags);
231 if (r) { 235 if (r) {
232 dev_err(adev->dev, "failed to emit fence (%d)\n", r); 236 dev_err(adev->dev, "failed to emit fence (%d)\n", r);
233 if (job && job->vmid) 237 if (job && job->vmid)
@@ -242,7 +246,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
242 /* wrap the last IB with fence */ 246 /* wrap the last IB with fence */
243 if (job && job->uf_addr) { 247 if (job && job->uf_addr) {
244 amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence, 248 amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
245 AMDGPU_FENCE_FLAG_64BIT); 249 fence_flags | AMDGPU_FENCE_FLAG_64BIT);
246 } 250 }
247 251
248 if (patch_offset != ~0 && ring->funcs->patch_cond_exec) 252 if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 08fcdf6f7b53..4f8dac2d36a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -42,6 +42,7 @@
42 42
43#define AMDGPU_FENCE_FLAG_64BIT (1 << 0) 43#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
44#define AMDGPU_FENCE_FLAG_INT (1 << 1) 44#define AMDGPU_FENCE_FLAG_INT (1 << 1)
45#define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2)
45 46
46enum amdgpu_ring_type { 47enum amdgpu_ring_type {
47 AMDGPU_RING_TYPE_GFX, 48 AMDGPU_RING_TYPE_GFX,
@@ -90,7 +91,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
90 unsigned irq_type); 91 unsigned irq_type);
91void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); 92void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
92void amdgpu_fence_driver_resume(struct amdgpu_device *adev); 93void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
93int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence); 94int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence,
95 unsigned flags);
94int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); 96int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s);
95void amdgpu_fence_process(struct amdgpu_ring *ring); 97void amdgpu_fence_process(struct amdgpu_ring *ring);
96int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); 98int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9ec7c1041df2..9c2195a2896d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -633,7 +633,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
633 amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); 633 amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
634 634
635 if (vm_flush_needed || pasid_mapping_needed) { 635 if (vm_flush_needed || pasid_mapping_needed) {
636 r = amdgpu_fence_emit(ring, &fence); 636 r = amdgpu_fence_emit(ring, &fence, 0);
637 if (r) 637 if (r)
638 return r; 638 return r;
639 } 639 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 6a19e0311a9c..05b2d34110b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3775,13 +3775,16 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
3775{ 3775{
3776 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 3776 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
3777 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 3777 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
3778 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
3778 3779
3779 /* RELEASE_MEM - flush caches, send int */ 3780 /* RELEASE_MEM - flush caches, send int */
3780 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 3781 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
3781 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 3782 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
3782 EOP_TC_ACTION_EN | 3783 EOP_TC_NC_ACTION_EN) :
3783 EOP_TC_WB_ACTION_EN | 3784 (EOP_TCL1_ACTION_EN |
3784 EOP_TC_MD_ACTION_EN | 3785 EOP_TC_ACTION_EN |
3786 EOP_TC_WB_ACTION_EN |
3787 EOP_TC_MD_ACTION_EN)) |
3785 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 3788 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3786 EVENT_INDEX(5))); 3789 EVENT_INDEX(5)));
3787 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 3790 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
index 7f408f85fdb6..839a144c1645 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@ -159,6 +159,7 @@
159#define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */ 159#define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */
160#define EOP_TCL1_ACTION_EN (1 << 16) 160#define EOP_TCL1_ACTION_EN (1 << 16)
161#define EOP_TC_ACTION_EN (1 << 17) /* L2 */ 161#define EOP_TC_ACTION_EN (1 << 17) /* L2 */
162#define EOP_TC_NC_ACTION_EN (1 << 19)
162#define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */ 163#define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */
163 164
164#define DATA_SEL(x) ((x) << 29) 165#define DATA_SEL(x) ((x) << 29)
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index b193e95f1f24..78fe828f2f79 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -526,6 +526,10 @@ union drm_amdgpu_cs {
526/* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */ 526/* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */
527#define AMDGPU_IB_FLAG_PREEMPT (1<<2) 527#define AMDGPU_IB_FLAG_PREEMPT (1<<2)
528 528
529/* The IB fence should do the L2 writeback but not invalidate any shader
530 * caches (L2/vL1/sL1/I$). */
531#define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
532
529struct drm_amdgpu_cs_chunk_ib { 533struct drm_amdgpu_cs_chunk_ib {
530 __u32 _pad; 534 __u32 _pad;
531 /** AMDGPU_IB_FLAG_* */ 535 /** AMDGPU_IB_FLAG_* */