diff options
author | Alex Deucher <alexander.deucher@amd.com> | 2017-08-22 16:39:30 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2017-08-29 15:27:44 -0400 |
commit | b249e18df151c9627af808321a8090c0b8d4d834 (patch) | |
tree | e1c9147f0bcc76a27db177edf650d260d93c34bc /drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | |
parent | c3db7b5a5591ede54fad5a4f5ea45f298e5d3470 (diff) |
drm/amdgpu: set sched_hw_submission higher for KIQ (v3)
KIQ doesn't really use the GPU scheduler. The base
drivers generally use the KIQ ring directly rather than
submitting IBs. However, amdgpu_sched_hw_submission
(which defaults to 2) limits the number of outstanding
fences to 2. KFD uses the KIQ for TLB flushes and the
2 fence limit hurts performance when there are several KFD
processes running.
v2: move some expressions to one line
change KIQ sched_hw_submission to at least 16
v3: bump to 256
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 16 |
1 files changed, 12 insertions, 4 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 6c5646b48d1a..5ce65280b396 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | |||
@@ -170,6 +170,16 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, | |||
170 | unsigned irq_type) | 170 | unsigned irq_type) |
171 | { | 171 | { |
172 | int r; | 172 | int r; |
173 | int sched_hw_submission = amdgpu_sched_hw_submission; | ||
174 | |||
175 | /* Set the hw submission limit higher for KIQ because | ||
176 | * it's used for a number of gfx/compute tasks by both | ||
177 | * KFD and KGD which may have outstanding fences and | ||
178 | * it doesn't really use the gpu scheduler anyway; | ||
179 | * KIQ tasks get submitted directly to the ring. | ||
180 | */ | ||
181 | if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) | ||
182 | sched_hw_submission = max(sched_hw_submission, 256); | ||
173 | 183 | ||
174 | if (ring->adev == NULL) { | 184 | if (ring->adev == NULL) { |
175 | if (adev->num_rings >= AMDGPU_MAX_RINGS) | 185 | if (adev->num_rings >= AMDGPU_MAX_RINGS) |
@@ -178,8 +188,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, | |||
178 | ring->adev = adev; | 188 | ring->adev = adev; |
179 | ring->idx = adev->num_rings++; | 189 | ring->idx = adev->num_rings++; |
180 | adev->rings[ring->idx] = ring; | 190 | adev->rings[ring->idx] = ring; |
181 | r = amdgpu_fence_driver_init_ring(ring, | 191 | r = amdgpu_fence_driver_init_ring(ring, sched_hw_submission); |
182 | amdgpu_sched_hw_submission); | ||
183 | if (r) | 192 | if (r) |
184 | return r; | 193 | return r; |
185 | } | 194 | } |
@@ -218,8 +227,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, | |||
218 | return r; | 227 | return r; |
219 | } | 228 | } |
220 | 229 | ||
221 | ring->ring_size = roundup_pow_of_two(max_dw * 4 * | 230 | ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission); |
222 | amdgpu_sched_hw_submission); | ||
223 | 231 | ||
224 | ring->buf_mask = (ring->ring_size / 4) - 1; | 232 | ring->buf_mask = (ring->ring_size / 4) - 1; |
225 | ring->ptr_mask = ring->funcs->support_64bit_ptrs ? | 233 | ring->ptr_mask = ring->funcs->support_64bit_ptrs ? |