diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 152 |
1 files changed, 104 insertions, 48 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d80e5d3a4add..82927570333a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | |||
@@ -235,56 +235,115 @@ free_chunk: | |||
235 | return ret; | 235 | return ret; |
236 | } | 236 | } |
237 | 237 | ||
238 | /* Returns how many bytes TTM can move per IB. | 238 | /* Convert microseconds to bytes. */ |
239 | static u64 us_to_bytes(struct amdgpu_device *adev, s64 us) | ||
240 | { | ||
241 | if (us <= 0 || !adev->mm_stats.log2_max_MBps) | ||
242 | return 0; | ||
243 | |||
244 | /* Since accum_us is incremented by a million per second, just | ||
245 | * multiply it by the number of MB/s to get the number of bytes. | ||
246 | */ | ||
247 | return us << adev->mm_stats.log2_max_MBps; | ||
248 | } | ||
249 | |||
250 | static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes) | ||
251 | { | ||
252 | if (!adev->mm_stats.log2_max_MBps) | ||
253 | return 0; | ||
254 | |||
255 | return bytes >> adev->mm_stats.log2_max_MBps; | ||
256 | } | ||
257 | |||
258 | /* Returns how many bytes TTM can move right now. If no bytes can be moved, | ||
259 | * it returns 0. If it returns non-zero, it's OK to move at least one buffer, | ||
260 | * which means it can go over the threshold once. If that happens, the driver | ||
261 | * will be in debt and no other buffer migrations can be done until that debt | ||
262 | * is repaid. | ||
263 | * | ||
264 | * This approach allows moving a buffer of any size (it's important to allow | ||
265 | * that). | ||
266 | * | ||
267 | * The currency is simply time in microseconds and it increases as the clock | ||
268 | * ticks. The accumulated microseconds (us) are converted to bytes and | ||
269 | * returned. | ||
239 | */ | 270 | */ |
240 | static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) | 271 | static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) |
241 | { | 272 | { |
242 | u64 real_vram_size = adev->mc.real_vram_size; | 273 | s64 time_us, increment_us; |
243 | u64 vram_usage = atomic64_read(&adev->vram_usage); | 274 | u64 max_bytes; |
275 | u64 free_vram, total_vram, used_vram; | ||
244 | 276 | ||
245 | /* This function is based on the current VRAM usage. | 277 | /* Allow a maximum of 200 accumulated ms. This is basically per-IB |
278 | * throttling. | ||
246 | * | 279 | * |
247 | * - If all of VRAM is free, allow relocating the number of bytes that | 280 | * It means that in order to get full max MBps, at least 5 IBs per |
248 | * is equal to 1/4 of the size of VRAM for this IB. | 281 | * second must be submitted and not more than 200ms apart from each |
282 | * other. | ||
283 | */ | ||
284 | const s64 us_upper_bound = 200000; | ||
249 | 285 | ||
250 | * - If more than one half of VRAM is occupied, only allow relocating | 286 | if (!adev->mm_stats.log2_max_MBps) |
251 | * 1 MB of data for this IB. | 287 | return 0; |
252 | * | 288 | |
253 | * - From 0 to one half of used VRAM, the threshold decreases | 289 | total_vram = adev->mc.real_vram_size - adev->vram_pin_size; |
254 | * linearly. | 290 | used_vram = atomic64_read(&adev->vram_usage); |
255 | * __________________ | 291 | free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; |
256 | * 1/4 of -|\ | | 292 | |
257 | * VRAM | \ | | 293 | spin_lock(&adev->mm_stats.lock); |
258 | * | \ | | 294 | |
259 | * | \ | | 295 | /* Increase the amount of accumulated us. */ |
260 | * | \ | | 296 | time_us = ktime_to_us(ktime_get()); |
261 | * | \ | | 297 | increment_us = time_us - adev->mm_stats.last_update_us; |
262 | * | \ | | 298 | adev->mm_stats.last_update_us = time_us; |
263 | * | \________|1 MB | 299 | adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us, |
264 | * |----------------| | 300 | us_upper_bound); |
265 | * VRAM 0 % 100 % | 301 | |
266 | * used used | 302 | /* This prevents the short period of low performance when the VRAM |
267 | * | 303 | * usage is low and the driver is in debt or doesn't have enough |
268 | * Note: It's a threshold, not a limit. The threshold must be crossed | 304 | * accumulated us to fill VRAM quickly. |
269 | * for buffer relocations to stop, so any buffer of an arbitrary size | ||
270 | * can be moved as long as the threshold isn't crossed before | ||
271 | * the relocation takes place. We don't want to disable buffer | ||
272 | * relocations completely. | ||
273 | * | 305 | * |
274 | * The idea is that buffers should be placed in VRAM at creation time | 306 | * The situation can occur in these cases: |
275 | * and TTM should only do a minimum number of relocations during | 307 | * - a lot of VRAM is freed by userspace |
276 | * command submission. In practice, you need to submit at least | 308 | * - the presence of a big buffer causes a lot of evictions |
277 | * a dozen IBs to move all buffers to VRAM if they are in GTT. | 309 | * (solution: split buffers into smaller ones) |
278 | * | 310 | * |
279 | * Also, things can get pretty crazy under memory pressure and actual | 311 | * If 128 MB or 1/8th of VRAM is free, start filling it now by setting |
280 | * VRAM usage can change a lot, so playing safe even at 50% does | 312 | * accum_us to a positive number. |
281 | * consistently increase performance. | ||
282 | */ | 313 | */ |
314 | if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) { | ||
315 | s64 min_us; | ||
316 | |||
317 | /* Be more aggresive on dGPUs. Try to fill a portion of free | ||
318 | * VRAM now. | ||
319 | */ | ||
320 | if (!(adev->flags & AMD_IS_APU)) | ||
321 | min_us = bytes_to_us(adev, free_vram / 4); | ||
322 | else | ||
323 | min_us = 0; /* Reset accum_us on APUs. */ | ||
324 | |||
325 | adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us); | ||
326 | } | ||
283 | 327 | ||
284 | u64 half_vram = real_vram_size >> 1; | 328 | /* This returns 0 if the driver is in debt to disallow (optional) |
285 | u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage; | 329 | * buffer moves. |
286 | u64 bytes_moved_threshold = half_free_vram >> 1; | 330 | */ |
287 | return max(bytes_moved_threshold, 1024*1024ull); | 331 | max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); |
332 | |||
333 | spin_unlock(&adev->mm_stats.lock); | ||
334 | return max_bytes; | ||
335 | } | ||
336 | |||
337 | /* Report how many bytes have really been moved for the last command | ||
338 | * submission. This can result in a debt that can stop buffer migrations | ||
339 | * temporarily. | ||
340 | */ | ||
341 | static void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, | ||
342 | u64 num_bytes) | ||
343 | { | ||
344 | spin_lock(&adev->mm_stats.lock); | ||
345 | adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes); | ||
346 | spin_unlock(&adev->mm_stats.lock); | ||
288 | } | 347 | } |
289 | 348 | ||
290 | static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, | 349 | static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, |
@@ -297,15 +356,10 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, | |||
297 | if (bo->pin_count) | 356 | if (bo->pin_count) |
298 | return 0; | 357 | return 0; |
299 | 358 | ||
300 | /* Avoid moving this one if we have moved too many buffers | 359 | /* Don't move this buffer if we have depleted our allowance |
301 | * for this IB already. | 360 | * to move it. Don't move anything if the threshold is zero. |
302 | * | ||
303 | * Note that this allows moving at least one buffer of | ||
304 | * any size, because it doesn't take the current "bo" | ||
305 | * into account. We don't want to disallow buffer moves | ||
306 | * completely. | ||
307 | */ | 361 | */ |
308 | if (p->bytes_moved <= p->bytes_moved_threshold) | 362 | if (p->bytes_moved < p->bytes_moved_threshold) |
309 | domain = bo->prefered_domains; | 363 | domain = bo->prefered_domains; |
310 | else | 364 | else |
311 | domain = bo->allowed_domains; | 365 | domain = bo->allowed_domains; |
@@ -494,6 +548,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
494 | goto error_validate; | 548 | goto error_validate; |
495 | } | 549 | } |
496 | 550 | ||
551 | amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved); | ||
552 | |||
497 | fpriv->vm.last_eviction_counter = | 553 | fpriv->vm.last_eviction_counter = |
498 | atomic64_read(&p->adev->num_evictions); | 554 | atomic64_read(&p->adev->num_evictions); |
499 | 555 | ||