aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
diff options
context:
space:
mode:
authorSean Paul <seanpaul@chromium.org>2017-08-18 10:52:44 -0400
committerSean Paul <seanpaul@chromium.org>2017-08-18 10:52:44 -0400
commit0e8841ec7ee5b1ffe416c3be7743985b1896ec00 (patch)
tree9e502f1f39c740ff7417e5078cbda6eedac1c572 /drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
parent36436f4e933b42616c8e9ba4907dccf1329cb318 (diff)
parent8824c751eb61ebffb053c291199932845bac88b4 (diff)
Merge airlied/drm-next into drm-misc-next
Archit requested this backmerge to facilitate merging some patches depending on changes between -rc2 & -rc5 Signed-off-by: Sean Paul <seanpaul@chromium.org>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c102
1 files changed, 78 insertions, 24 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 5599c01b265d..c05479ec825a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -54,7 +54,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
54 54
55 *offset = data->offset; 55 *offset = data->offset;
56 56
57 drm_gem_object_unreference_unlocked(gobj); 57 drm_gem_object_put_unlocked(gobj);
58 58
59 if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) { 59 if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) {
60 amdgpu_bo_unref(&p->uf_entry.robj); 60 amdgpu_bo_unref(&p->uf_entry.robj);
@@ -90,7 +90,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
90 } 90 }
91 91
92 /* get chunks */ 92 /* get chunks */
93 chunk_array_user = (uint64_t __user *)(uintptr_t)(cs->in.chunks); 93 chunk_array_user = u64_to_user_ptr(cs->in.chunks);
94 if (copy_from_user(chunk_array, chunk_array_user, 94 if (copy_from_user(chunk_array, chunk_array_user,
95 sizeof(uint64_t)*cs->in.num_chunks)) { 95 sizeof(uint64_t)*cs->in.num_chunks)) {
96 ret = -EFAULT; 96 ret = -EFAULT;
@@ -110,7 +110,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
110 struct drm_amdgpu_cs_chunk user_chunk; 110 struct drm_amdgpu_cs_chunk user_chunk;
111 uint32_t __user *cdata; 111 uint32_t __user *cdata;
112 112
113 chunk_ptr = (void __user *)(uintptr_t)chunk_array[i]; 113 chunk_ptr = u64_to_user_ptr(chunk_array[i]);
114 if (copy_from_user(&user_chunk, chunk_ptr, 114 if (copy_from_user(&user_chunk, chunk_ptr,
115 sizeof(struct drm_amdgpu_cs_chunk))) { 115 sizeof(struct drm_amdgpu_cs_chunk))) {
116 ret = -EFAULT; 116 ret = -EFAULT;
@@ -121,7 +121,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
121 p->chunks[i].length_dw = user_chunk.length_dw; 121 p->chunks[i].length_dw = user_chunk.length_dw;
122 122
123 size = p->chunks[i].length_dw; 123 size = p->chunks[i].length_dw;
124 cdata = (void __user *)(uintptr_t)user_chunk.chunk_data; 124 cdata = u64_to_user_ptr(user_chunk.chunk_data);
125 125
126 p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL); 126 p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
127 if (p->chunks[i].kdata == NULL) { 127 if (p->chunks[i].kdata == NULL) {
@@ -223,10 +223,11 @@ static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
223 * ticks. The accumulated microseconds (us) are converted to bytes and 223 * ticks. The accumulated microseconds (us) are converted to bytes and
224 * returned. 224 * returned.
225 */ 225 */
226static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) 226static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
227 u64 *max_bytes,
228 u64 *max_vis_bytes)
227{ 229{
228 s64 time_us, increment_us; 230 s64 time_us, increment_us;
229 u64 max_bytes;
230 u64 free_vram, total_vram, used_vram; 231 u64 free_vram, total_vram, used_vram;
231 232
232 /* Allow a maximum of 200 accumulated ms. This is basically per-IB 233 /* Allow a maximum of 200 accumulated ms. This is basically per-IB
@@ -238,8 +239,11 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
238 */ 239 */
239 const s64 us_upper_bound = 200000; 240 const s64 us_upper_bound = 200000;
240 241
241 if (!adev->mm_stats.log2_max_MBps) 242 if (!adev->mm_stats.log2_max_MBps) {
242 return 0; 243 *max_bytes = 0;
244 *max_vis_bytes = 0;
245 return;
246 }
243 247
244 total_vram = adev->mc.real_vram_size - adev->vram_pin_size; 248 total_vram = adev->mc.real_vram_size - adev->vram_pin_size;
245 used_vram = atomic64_read(&adev->vram_usage); 249 used_vram = atomic64_read(&adev->vram_usage);
@@ -280,23 +284,45 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
280 adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us); 284 adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
281 } 285 }
282 286
283 /* This returns 0 if the driver is in debt to disallow (optional) 287 /* This is set to 0 if the driver is in debt to disallow (optional)
284 * buffer moves. 288 * buffer moves.
285 */ 289 */
286 max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); 290 *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
291
292 /* Do the same for visible VRAM if half of it is free */
293 if (adev->mc.visible_vram_size < adev->mc.real_vram_size) {
294 u64 total_vis_vram = adev->mc.visible_vram_size;
295 u64 used_vis_vram = atomic64_read(&adev->vram_vis_usage);
296
297 if (used_vis_vram < total_vis_vram) {
298 u64 free_vis_vram = total_vis_vram - used_vis_vram;
299 adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
300 increment_us, us_upper_bound);
301
302 if (free_vis_vram >= total_vis_vram / 2)
303 adev->mm_stats.accum_us_vis =
304 max(bytes_to_us(adev, free_vis_vram / 2),
305 adev->mm_stats.accum_us_vis);
306 }
307
308 *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
309 } else {
310 *max_vis_bytes = 0;
311 }
287 312
288 spin_unlock(&adev->mm_stats.lock); 313 spin_unlock(&adev->mm_stats.lock);
289 return max_bytes;
290} 314}
291 315
292/* Report how many bytes have really been moved for the last command 316/* Report how many bytes have really been moved for the last command
293 * submission. This can result in a debt that can stop buffer migrations 317 * submission. This can result in a debt that can stop buffer migrations
294 * temporarily. 318 * temporarily.
295 */ 319 */
296void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes) 320void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
321 u64 num_vis_bytes)
297{ 322{
298 spin_lock(&adev->mm_stats.lock); 323 spin_lock(&adev->mm_stats.lock);
299 adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes); 324 adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
325 adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
300 spin_unlock(&adev->mm_stats.lock); 326 spin_unlock(&adev->mm_stats.lock);
301} 327}
302 328
@@ -304,7 +330,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
304 struct amdgpu_bo *bo) 330 struct amdgpu_bo *bo)
305{ 331{
306 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 332 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
307 u64 initial_bytes_moved; 333 u64 initial_bytes_moved, bytes_moved;
308 uint32_t domain; 334 uint32_t domain;
309 int r; 335 int r;
310 336
@@ -314,17 +340,35 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
314 /* Don't move this buffer if we have depleted our allowance 340 /* Don't move this buffer if we have depleted our allowance
315 * to move it. Don't move anything if the threshold is zero. 341 * to move it. Don't move anything if the threshold is zero.
316 */ 342 */
317 if (p->bytes_moved < p->bytes_moved_threshold) 343 if (p->bytes_moved < p->bytes_moved_threshold) {
318 domain = bo->prefered_domains; 344 if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
319 else 345 (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
346 /* And don't move a CPU_ACCESS_REQUIRED BO to limited
347 * visible VRAM if we've depleted our allowance to do
348 * that.
349 */
350 if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
351 domain = bo->preferred_domains;
352 else
353 domain = bo->allowed_domains;
354 } else {
355 domain = bo->preferred_domains;
356 }
357 } else {
320 domain = bo->allowed_domains; 358 domain = bo->allowed_domains;
359 }
321 360
322retry: 361retry:
323 amdgpu_ttm_placement_from_domain(bo, domain); 362 amdgpu_ttm_placement_from_domain(bo, domain);
324 initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); 363 initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
325 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); 364 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
326 p->bytes_moved += atomic64_read(&adev->num_bytes_moved) - 365 bytes_moved = atomic64_read(&adev->num_bytes_moved) -
327 initial_bytes_moved; 366 initial_bytes_moved;
367 p->bytes_moved += bytes_moved;
368 if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
369 bo->tbo.mem.mem_type == TTM_PL_VRAM &&
370 bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
371 p->bytes_moved_vis += bytes_moved;
328 372
329 if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { 373 if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
330 domain = bo->allowed_domains; 374 domain = bo->allowed_domains;
@@ -350,7 +394,8 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
350 struct amdgpu_bo_list_entry *candidate = p->evictable; 394 struct amdgpu_bo_list_entry *candidate = p->evictable;
351 struct amdgpu_bo *bo = candidate->robj; 395 struct amdgpu_bo *bo = candidate->robj;
352 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 396 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
353 u64 initial_bytes_moved; 397 u64 initial_bytes_moved, bytes_moved;
398 bool update_bytes_moved_vis;
354 uint32_t other; 399 uint32_t other;
355 400
356 /* If we reached our current BO we can forget it */ 401 /* If we reached our current BO we can forget it */
@@ -370,10 +415,17 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
370 415
371 /* Good we can try to move this BO somewhere else */ 416 /* Good we can try to move this BO somewhere else */
372 amdgpu_ttm_placement_from_domain(bo, other); 417 amdgpu_ttm_placement_from_domain(bo, other);
418 update_bytes_moved_vis =
419 adev->mc.visible_vram_size < adev->mc.real_vram_size &&
420 bo->tbo.mem.mem_type == TTM_PL_VRAM &&
421 bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT;
373 initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); 422 initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
374 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); 423 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
375 p->bytes_moved += atomic64_read(&adev->num_bytes_moved) - 424 bytes_moved = atomic64_read(&adev->num_bytes_moved) -
376 initial_bytes_moved; 425 initial_bytes_moved;
426 p->bytes_moved += bytes_moved;
427 if (update_bytes_moved_vis)
428 p->bytes_moved_vis += bytes_moved;
377 429
378 if (unlikely(r)) 430 if (unlikely(r))
379 break; 431 break;
@@ -554,8 +606,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
554 list_splice(&need_pages, &p->validated); 606 list_splice(&need_pages, &p->validated);
555 } 607 }
556 608
557 p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev); 609 amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
610 &p->bytes_moved_vis_threshold);
558 p->bytes_moved = 0; 611 p->bytes_moved = 0;
612 p->bytes_moved_vis = 0;
559 p->evictable = list_last_entry(&p->validated, 613 p->evictable = list_last_entry(&p->validated,
560 struct amdgpu_bo_list_entry, 614 struct amdgpu_bo_list_entry,
561 tv.head); 615 tv.head);
@@ -579,8 +633,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
579 goto error_validate; 633 goto error_validate;
580 } 634 }
581 635
582 amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved); 636 amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
583 637 p->bytes_moved_vis);
584 fpriv->vm.last_eviction_counter = 638 fpriv->vm.last_eviction_counter =
585 atomic64_read(&p->adev->num_evictions); 639 atomic64_read(&p->adev->num_evictions);
586 640
@@ -1383,7 +1437,7 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
1383 if (fences == NULL) 1437 if (fences == NULL)
1384 return -ENOMEM; 1438 return -ENOMEM;
1385 1439
1386 fences_user = (void __user *)(uintptr_t)(wait->in.fences); 1440 fences_user = u64_to_user_ptr(wait->in.fences);
1387 if (copy_from_user(fences, fences_user, 1441 if (copy_from_user(fences, fences_user,
1388 sizeof(struct drm_amdgpu_fence) * fence_count)) { 1442 sizeof(struct drm_amdgpu_fence) * fence_count)) {
1389 r = -EFAULT; 1443 r = -EFAULT;