diff options
author | Marek Olšák <marek.olsak@amd.com> | 2018-04-03 13:05:03 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2018-05-15 14:43:32 -0400 |
commit | d240cd9eddd943dbe0267d081697195ff1e90b65 (patch) | |
tree | ff9648c93738e223b8da42489f5d64aa8caa9895 /drivers/gpu/drm/amd/amdgpu | |
parent | 3f188453faf7ba5b59e8064df4afffbc946e25ec (diff) |
drm/amdgpu: optionally do a writeback but don't invalidate TC for IB fences
There is a new IB flag that enables this new behavior.
Full invalidation is unnecessary for RELEASE_MEM and doesn't make sense
when draw calls from two adjacent gfx IBs run in parallel. This will be
the new default for Mesa.
v2: bump the version
Signed-off-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 11 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/soc15d.h | 1 |
7 files changed, 23 insertions, 11 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 5c0567ad1ba7..7c17a0bc2cd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | |||
@@ -75,9 +75,10 @@ | |||
75 | * - 3.23.0 - Add query for VRAM lost counter | 75 | * - 3.23.0 - Add query for VRAM lost counter |
76 | * - 3.24.0 - Add high priority compute support for gfx9 | 76 | * - 3.24.0 - Add high priority compute support for gfx9 |
77 | * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). | 77 | * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). |
78 | * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. | ||
78 | */ | 79 | */ |
79 | #define KMS_DRIVER_MAJOR 3 | 80 | #define KMS_DRIVER_MAJOR 3 |
80 | #define KMS_DRIVER_MINOR 25 | 81 | #define KMS_DRIVER_MINOR 26 |
81 | #define KMS_DRIVER_PATCHLEVEL 0 | 82 | #define KMS_DRIVER_PATCHLEVEL 0 |
82 | 83 | ||
83 | int amdgpu_vram_limit = 0; | 84 | int amdgpu_vram_limit = 0; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 97449e06a242..d09fcab2398f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | |||
@@ -131,7 +131,8 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) | |||
131 | * Emits a fence command on the requested ring (all asics). | 131 | * Emits a fence command on the requested ring (all asics). |
132 | * Returns 0 on success, -ENOMEM on failure. | 132 | * Returns 0 on success, -ENOMEM on failure. |
133 | */ | 133 | */ |
134 | int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f) | 134 | int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, |
135 | unsigned flags) | ||
135 | { | 136 | { |
136 | struct amdgpu_device *adev = ring->adev; | 137 | struct amdgpu_device *adev = ring->adev; |
137 | struct amdgpu_fence *fence; | 138 | struct amdgpu_fence *fence; |
@@ -149,7 +150,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f) | |||
149 | adev->fence_context + ring->idx, | 150 | adev->fence_context + ring->idx, |
150 | seq); | 151 | seq); |
151 | amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, | 152 | amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, |
152 | seq, AMDGPU_FENCE_FLAG_INT); | 153 | seq, flags | AMDGPU_FENCE_FLAG_INT); |
153 | 154 | ||
154 | ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; | 155 | ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; |
155 | /* This function can't be called concurrently anyway, otherwise | 156 | /* This function can't be called concurrently anyway, otherwise |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 311589e02d17..f70eeed9ed76 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | |||
@@ -127,6 +127,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, | |||
127 | struct amdgpu_vm *vm; | 127 | struct amdgpu_vm *vm; |
128 | uint64_t fence_ctx; | 128 | uint64_t fence_ctx; |
129 | uint32_t status = 0, alloc_size; | 129 | uint32_t status = 0, alloc_size; |
130 | unsigned fence_flags = 0; | ||
130 | 131 | ||
131 | unsigned i; | 132 | unsigned i; |
132 | int r = 0; | 133 | int r = 0; |
@@ -227,7 +228,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, | |||
227 | #endif | 228 | #endif |
228 | amdgpu_asic_invalidate_hdp(adev, ring); | 229 | amdgpu_asic_invalidate_hdp(adev, ring); |
229 | 230 | ||
230 | r = amdgpu_fence_emit(ring, f); | 231 | if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE) |
232 | fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY; | ||
233 | |||
234 | r = amdgpu_fence_emit(ring, f, fence_flags); | ||
231 | if (r) { | 235 | if (r) { |
232 | dev_err(adev->dev, "failed to emit fence (%d)\n", r); | 236 | dev_err(adev->dev, "failed to emit fence (%d)\n", r); |
233 | if (job && job->vmid) | 237 | if (job && job->vmid) |
@@ -242,7 +246,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, | |||
242 | /* wrap the last IB with fence */ | 246 | /* wrap the last IB with fence */ |
243 | if (job && job->uf_addr) { | 247 | if (job && job->uf_addr) { |
244 | amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence, | 248 | amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence, |
245 | AMDGPU_FENCE_FLAG_64BIT); | 249 | fence_flags | AMDGPU_FENCE_FLAG_64BIT); |
246 | } | 250 | } |
247 | 251 | ||
248 | if (patch_offset != ~0 && ring->funcs->patch_cond_exec) | 252 | if (patch_offset != ~0 && ring->funcs->patch_cond_exec) |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 08fcdf6f7b53..4f8dac2d36a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | |||
@@ -42,6 +42,7 @@ | |||
42 | 42 | ||
43 | #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) | 43 | #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) |
44 | #define AMDGPU_FENCE_FLAG_INT (1 << 1) | 44 | #define AMDGPU_FENCE_FLAG_INT (1 << 1) |
45 | #define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2) | ||
45 | 46 | ||
46 | enum amdgpu_ring_type { | 47 | enum amdgpu_ring_type { |
47 | AMDGPU_RING_TYPE_GFX, | 48 | AMDGPU_RING_TYPE_GFX, |
@@ -90,7 +91,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, | |||
90 | unsigned irq_type); | 91 | unsigned irq_type); |
91 | void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); | 92 | void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); |
92 | void amdgpu_fence_driver_resume(struct amdgpu_device *adev); | 93 | void amdgpu_fence_driver_resume(struct amdgpu_device *adev); |
93 | int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence); | 94 | int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, |
95 | unsigned flags); | ||
94 | int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); | 96 | int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); |
95 | void amdgpu_fence_process(struct amdgpu_ring *ring); | 97 | void amdgpu_fence_process(struct amdgpu_ring *ring); |
96 | int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); | 98 | int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9ec7c1041df2..9c2195a2896d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |||
@@ -633,7 +633,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ | |||
633 | amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); | 633 | amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); |
634 | 634 | ||
635 | if (vm_flush_needed || pasid_mapping_needed) { | 635 | if (vm_flush_needed || pasid_mapping_needed) { |
636 | r = amdgpu_fence_emit(ring, &fence); | 636 | r = amdgpu_fence_emit(ring, &fence, 0); |
637 | if (r) | 637 | if (r) |
638 | return r; | 638 | return r; |
639 | } | 639 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 6a19e0311a9c..05b2d34110b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | |||
@@ -3775,13 +3775,16 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, | |||
3775 | { | 3775 | { |
3776 | bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; | 3776 | bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; |
3777 | bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; | 3777 | bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; |
3778 | bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; | ||
3778 | 3779 | ||
3779 | /* RELEASE_MEM - flush caches, send int */ | 3780 | /* RELEASE_MEM - flush caches, send int */ |
3780 | amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); | 3781 | amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); |
3781 | amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | | 3782 | amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | |
3782 | EOP_TC_ACTION_EN | | 3783 | EOP_TC_NC_ACTION_EN) : |
3783 | EOP_TC_WB_ACTION_EN | | 3784 | (EOP_TCL1_ACTION_EN | |
3784 | EOP_TC_MD_ACTION_EN | | 3785 | EOP_TC_ACTION_EN | |
3786 | EOP_TC_WB_ACTION_EN | | ||
3787 | EOP_TC_MD_ACTION_EN)) | | ||
3785 | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | | 3788 | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | |
3786 | EVENT_INDEX(5))); | 3789 | EVENT_INDEX(5))); |
3787 | amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); | 3790 | amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); |
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 7f408f85fdb6..839a144c1645 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h | |||
@@ -159,6 +159,7 @@ | |||
159 | #define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */ | 159 | #define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */ |
160 | #define EOP_TCL1_ACTION_EN (1 << 16) | 160 | #define EOP_TCL1_ACTION_EN (1 << 16) |
161 | #define EOP_TC_ACTION_EN (1 << 17) /* L2 */ | 161 | #define EOP_TC_ACTION_EN (1 << 17) /* L2 */ |
162 | #define EOP_TC_NC_ACTION_EN (1 << 19) | ||
162 | #define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */ | 163 | #define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */ |
163 | 164 | ||
164 | #define DATA_SEL(x) ((x) << 29) | 165 | #define DATA_SEL(x) ((x) << 29) |