aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c152
1 files changed, 104 insertions, 48 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index d80e5d3a4add..82927570333a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -235,56 +235,115 @@ free_chunk:
235 return ret; 235 return ret;
236} 236}
237 237
238/* Returns how many bytes TTM can move per IB. 238/* Convert microseconds to bytes. */
239static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
240{
241 if (us <= 0 || !adev->mm_stats.log2_max_MBps)
242 return 0;
243
244 /* Since accum_us is incremented by a million per second, just
245 * multiply it by the number of MB/s to get the number of bytes.
246 */
247 return us << adev->mm_stats.log2_max_MBps;
248}
249
250static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
251{
252 if (!adev->mm_stats.log2_max_MBps)
253 return 0;
254
255 return bytes >> adev->mm_stats.log2_max_MBps;
256}
257
258/* Returns how many bytes TTM can move right now. If no bytes can be moved,
259 * it returns 0. If it returns non-zero, it's OK to move at least one buffer,
260 * which means it can go over the threshold once. If that happens, the driver
261 * will be in debt and no other buffer migrations can be done until that debt
262 * is repaid.
263 *
264 * This approach allows moving a buffer of any size (it's important to allow
265 * that).
266 *
267 * The currency is simply time in microseconds and it increases as the clock
268 * ticks. The accumulated microseconds (us) are converted to bytes and
269 * returned.
239 */ 270 */
240static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) 271static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
241{ 272{
242 u64 real_vram_size = adev->mc.real_vram_size; 273 s64 time_us, increment_us;
243 u64 vram_usage = atomic64_read(&adev->vram_usage); 274 u64 max_bytes;
275 u64 free_vram, total_vram, used_vram;
244 276
245 /* This function is based on the current VRAM usage. 277 /* Allow a maximum of 200 accumulated ms. This is basically per-IB
278 * throttling.
246 * 279 *
247 * - If all of VRAM is free, allow relocating the number of bytes that 280 * It means that in order to get full max MBps, at least 5 IBs per
248 * is equal to 1/4 of the size of VRAM for this IB. 281 * second must be submitted and not more than 200ms apart from each
282 * other.
283 */
284 const s64 us_upper_bound = 200000;
249 285
250 * - If more than one half of VRAM is occupied, only allow relocating 286 if (!adev->mm_stats.log2_max_MBps)
251 * 1 MB of data for this IB. 287 return 0;
252 * 288
253 * - From 0 to one half of used VRAM, the threshold decreases 289 total_vram = adev->mc.real_vram_size - adev->vram_pin_size;
254 * linearly. 290 used_vram = atomic64_read(&adev->vram_usage);
255 * __________________ 291 free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
256 * 1/4 of -|\ | 292
257 * VRAM | \ | 293 spin_lock(&adev->mm_stats.lock);
258 * | \ | 294
259 * | \ | 295 /* Increase the amount of accumulated us. */
260 * | \ | 296 time_us = ktime_to_us(ktime_get());
261 * | \ | 297 increment_us = time_us - adev->mm_stats.last_update_us;
262 * | \ | 298 adev->mm_stats.last_update_us = time_us;
263 * | \________|1 MB 299 adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
264 * |----------------| 300 us_upper_bound);
265 * VRAM 0 % 100 % 301
266 * used used 302 /* This prevents the short period of low performance when the VRAM
267 * 303 * usage is low and the driver is in debt or doesn't have enough
268 * Note: It's a threshold, not a limit. The threshold must be crossed 304 * accumulated us to fill VRAM quickly.
269 * for buffer relocations to stop, so any buffer of an arbitrary size
270 * can be moved as long as the threshold isn't crossed before
271 * the relocation takes place. We don't want to disable buffer
272 * relocations completely.
273 * 305 *
274 * The idea is that buffers should be placed in VRAM at creation time 306 * The situation can occur in these cases:
275 * and TTM should only do a minimum number of relocations during 307 * - a lot of VRAM is freed by userspace
276 * command submission. In practice, you need to submit at least 308 * - the presence of a big buffer causes a lot of evictions
277 * a dozen IBs to move all buffers to VRAM if they are in GTT. 309 * (solution: split buffers into smaller ones)
278 * 310 *
279 * Also, things can get pretty crazy under memory pressure and actual 311 * If 128 MB or 1/8th of VRAM is free, start filling it now by setting
280 * VRAM usage can change a lot, so playing safe even at 50% does 312 * accum_us to a positive number.
281 * consistently increase performance.
282 */ 313 */
314 if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
315 s64 min_us;
316
317 /* Be more aggresive on dGPUs. Try to fill a portion of free
318 * VRAM now.
319 */
320 if (!(adev->flags & AMD_IS_APU))
321 min_us = bytes_to_us(adev, free_vram / 4);
322 else
323 min_us = 0; /* Reset accum_us on APUs. */
324
325 adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
326 }
283 327
284 u64 half_vram = real_vram_size >> 1; 328 /* This returns 0 if the driver is in debt to disallow (optional)
285 u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage; 329 * buffer moves.
286 u64 bytes_moved_threshold = half_free_vram >> 1; 330 */
287 return max(bytes_moved_threshold, 1024*1024ull); 331 max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
332
333 spin_unlock(&adev->mm_stats.lock);
334 return max_bytes;
335}
336
337/* Report how many bytes have really been moved for the last command
338 * submission. This can result in a debt that can stop buffer migrations
339 * temporarily.
340 */
341static void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev,
342 u64 num_bytes)
343{
344 spin_lock(&adev->mm_stats.lock);
345 adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
346 spin_unlock(&adev->mm_stats.lock);
288} 347}
289 348
290static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, 349static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
@@ -297,15 +356,10 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
297 if (bo->pin_count) 356 if (bo->pin_count)
298 return 0; 357 return 0;
299 358
300 /* Avoid moving this one if we have moved too many buffers 359 /* Don't move this buffer if we have depleted our allowance
301 * for this IB already. 360 * to move it. Don't move anything if the threshold is zero.
302 *
303 * Note that this allows moving at least one buffer of
304 * any size, because it doesn't take the current "bo"
305 * into account. We don't want to disallow buffer moves
306 * completely.
307 */ 361 */
308 if (p->bytes_moved <= p->bytes_moved_threshold) 362 if (p->bytes_moved < p->bytes_moved_threshold)
309 domain = bo->prefered_domains; 363 domain = bo->prefered_domains;
310 else 364 else
311 domain = bo->allowed_domains; 365 domain = bo->allowed_domains;
@@ -494,6 +548,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
494 goto error_validate; 548 goto error_validate;
495 } 549 }
496 550
551 amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved);
552
497 fpriv->vm.last_eviction_counter = 553 fpriv->vm.last_eviction_counter =
498 atomic64_read(&p->adev->num_evictions); 554 atomic64_read(&p->adev->num_evictions);
499 555