diff options
author | Sean Paul <seanpaul@chromium.org> | 2017-08-18 10:52:44 -0400 |
---|---|---|
committer | Sean Paul <seanpaul@chromium.org> | 2017-08-18 10:52:44 -0400 |
commit | 0e8841ec7ee5b1ffe416c3be7743985b1896ec00 (patch) | |
tree | 9e502f1f39c740ff7417e5078cbda6eedac1c572 /drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | |
parent | 36436f4e933b42616c8e9ba4907dccf1329cb318 (diff) | |
parent | 8824c751eb61ebffb053c291199932845bac88b4 (diff) |
Merge airlied/drm-next into drm-misc-next
Archit requested this backmerge to facilitate merging some patches
depending on changes between -rc2 & -rc5
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 102 |
1 files changed, 78 insertions, 24 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 5599c01b265d..c05479ec825a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | |||
@@ -54,7 +54,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, | |||
54 | 54 | ||
55 | *offset = data->offset; | 55 | *offset = data->offset; |
56 | 56 | ||
57 | drm_gem_object_unreference_unlocked(gobj); | 57 | drm_gem_object_put_unlocked(gobj); |
58 | 58 | ||
59 | if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) { | 59 | if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) { |
60 | amdgpu_bo_unref(&p->uf_entry.robj); | 60 | amdgpu_bo_unref(&p->uf_entry.robj); |
@@ -90,7 +90,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | |||
90 | } | 90 | } |
91 | 91 | ||
92 | /* get chunks */ | 92 | /* get chunks */ |
93 | chunk_array_user = (uint64_t __user *)(uintptr_t)(cs->in.chunks); | 93 | chunk_array_user = u64_to_user_ptr(cs->in.chunks); |
94 | if (copy_from_user(chunk_array, chunk_array_user, | 94 | if (copy_from_user(chunk_array, chunk_array_user, |
95 | sizeof(uint64_t)*cs->in.num_chunks)) { | 95 | sizeof(uint64_t)*cs->in.num_chunks)) { |
96 | ret = -EFAULT; | 96 | ret = -EFAULT; |
@@ -110,7 +110,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | |||
110 | struct drm_amdgpu_cs_chunk user_chunk; | 110 | struct drm_amdgpu_cs_chunk user_chunk; |
111 | uint32_t __user *cdata; | 111 | uint32_t __user *cdata; |
112 | 112 | ||
113 | chunk_ptr = (void __user *)(uintptr_t)chunk_array[i]; | 113 | chunk_ptr = u64_to_user_ptr(chunk_array[i]); |
114 | if (copy_from_user(&user_chunk, chunk_ptr, | 114 | if (copy_from_user(&user_chunk, chunk_ptr, |
115 | sizeof(struct drm_amdgpu_cs_chunk))) { | 115 | sizeof(struct drm_amdgpu_cs_chunk))) { |
116 | ret = -EFAULT; | 116 | ret = -EFAULT; |
@@ -121,7 +121,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) | |||
121 | p->chunks[i].length_dw = user_chunk.length_dw; | 121 | p->chunks[i].length_dw = user_chunk.length_dw; |
122 | 122 | ||
123 | size = p->chunks[i].length_dw; | 123 | size = p->chunks[i].length_dw; |
124 | cdata = (void __user *)(uintptr_t)user_chunk.chunk_data; | 124 | cdata = u64_to_user_ptr(user_chunk.chunk_data); |
125 | 125 | ||
126 | p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL); | 126 | p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL); |
127 | if (p->chunks[i].kdata == NULL) { | 127 | if (p->chunks[i].kdata == NULL) { |
@@ -223,10 +223,11 @@ static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes) | |||
223 | * ticks. The accumulated microseconds (us) are converted to bytes and | 223 | * ticks. The accumulated microseconds (us) are converted to bytes and |
224 | * returned. | 224 | * returned. |
225 | */ | 225 | */ |
226 | static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) | 226 | static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, |
227 | u64 *max_bytes, | ||
228 | u64 *max_vis_bytes) | ||
227 | { | 229 | { |
228 | s64 time_us, increment_us; | 230 | s64 time_us, increment_us; |
229 | u64 max_bytes; | ||
230 | u64 free_vram, total_vram, used_vram; | 231 | u64 free_vram, total_vram, used_vram; |
231 | 232 | ||
232 | /* Allow a maximum of 200 accumulated ms. This is basically per-IB | 233 | /* Allow a maximum of 200 accumulated ms. This is basically per-IB |
@@ -238,8 +239,11 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) | |||
238 | */ | 239 | */ |
239 | const s64 us_upper_bound = 200000; | 240 | const s64 us_upper_bound = 200000; |
240 | 241 | ||
241 | if (!adev->mm_stats.log2_max_MBps) | 242 | if (!adev->mm_stats.log2_max_MBps) { |
242 | return 0; | 243 | *max_bytes = 0; |
244 | *max_vis_bytes = 0; | ||
245 | return; | ||
246 | } | ||
243 | 247 | ||
244 | total_vram = adev->mc.real_vram_size - adev->vram_pin_size; | 248 | total_vram = adev->mc.real_vram_size - adev->vram_pin_size; |
245 | used_vram = atomic64_read(&adev->vram_usage); | 249 | used_vram = atomic64_read(&adev->vram_usage); |
@@ -280,23 +284,45 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) | |||
280 | adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us); | 284 | adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us); |
281 | } | 285 | } |
282 | 286 | ||
283 | /* This returns 0 if the driver is in debt to disallow (optional) | 287 | /* This is set to 0 if the driver is in debt to disallow (optional) |
284 | * buffer moves. | 288 | * buffer moves. |
285 | */ | 289 | */ |
286 | max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); | 290 | *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); |
291 | |||
292 | /* Do the same for visible VRAM if half of it is free */ | ||
293 | if (adev->mc.visible_vram_size < adev->mc.real_vram_size) { | ||
294 | u64 total_vis_vram = adev->mc.visible_vram_size; | ||
295 | u64 used_vis_vram = atomic64_read(&adev->vram_vis_usage); | ||
296 | |||
297 | if (used_vis_vram < total_vis_vram) { | ||
298 | u64 free_vis_vram = total_vis_vram - used_vis_vram; | ||
299 | adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis + | ||
300 | increment_us, us_upper_bound); | ||
301 | |||
302 | if (free_vis_vram >= total_vis_vram / 2) | ||
303 | adev->mm_stats.accum_us_vis = | ||
304 | max(bytes_to_us(adev, free_vis_vram / 2), | ||
305 | adev->mm_stats.accum_us_vis); | ||
306 | } | ||
307 | |||
308 | *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis); | ||
309 | } else { | ||
310 | *max_vis_bytes = 0; | ||
311 | } | ||
287 | 312 | ||
288 | spin_unlock(&adev->mm_stats.lock); | 313 | spin_unlock(&adev->mm_stats.lock); |
289 | return max_bytes; | ||
290 | } | 314 | } |
291 | 315 | ||
292 | /* Report how many bytes have really been moved for the last command | 316 | /* Report how many bytes have really been moved for the last command |
293 | * submission. This can result in a debt that can stop buffer migrations | 317 | * submission. This can result in a debt that can stop buffer migrations |
294 | * temporarily. | 318 | * temporarily. |
295 | */ | 319 | */ |
296 | void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes) | 320 | void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, |
321 | u64 num_vis_bytes) | ||
297 | { | 322 | { |
298 | spin_lock(&adev->mm_stats.lock); | 323 | spin_lock(&adev->mm_stats.lock); |
299 | adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes); | 324 | adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes); |
325 | adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes); | ||
300 | spin_unlock(&adev->mm_stats.lock); | 326 | spin_unlock(&adev->mm_stats.lock); |
301 | } | 327 | } |
302 | 328 | ||
@@ -304,7 +330,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, | |||
304 | struct amdgpu_bo *bo) | 330 | struct amdgpu_bo *bo) |
305 | { | 331 | { |
306 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); | 332 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); |
307 | u64 initial_bytes_moved; | 333 | u64 initial_bytes_moved, bytes_moved; |
308 | uint32_t domain; | 334 | uint32_t domain; |
309 | int r; | 335 | int r; |
310 | 336 | ||
@@ -314,17 +340,35 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, | |||
314 | /* Don't move this buffer if we have depleted our allowance | 340 | /* Don't move this buffer if we have depleted our allowance |
315 | * to move it. Don't move anything if the threshold is zero. | 341 | * to move it. Don't move anything if the threshold is zero. |
316 | */ | 342 | */ |
317 | if (p->bytes_moved < p->bytes_moved_threshold) | 343 | if (p->bytes_moved < p->bytes_moved_threshold) { |
318 | domain = bo->prefered_domains; | 344 | if (adev->mc.visible_vram_size < adev->mc.real_vram_size && |
319 | else | 345 | (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { |
346 | /* And don't move a CPU_ACCESS_REQUIRED BO to limited | ||
347 | * visible VRAM if we've depleted our allowance to do | ||
348 | * that. | ||
349 | */ | ||
350 | if (p->bytes_moved_vis < p->bytes_moved_vis_threshold) | ||
351 | domain = bo->preferred_domains; | ||
352 | else | ||
353 | domain = bo->allowed_domains; | ||
354 | } else { | ||
355 | domain = bo->preferred_domains; | ||
356 | } | ||
357 | } else { | ||
320 | domain = bo->allowed_domains; | 358 | domain = bo->allowed_domains; |
359 | } | ||
321 | 360 | ||
322 | retry: | 361 | retry: |
323 | amdgpu_ttm_placement_from_domain(bo, domain); | 362 | amdgpu_ttm_placement_from_domain(bo, domain); |
324 | initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); | 363 | initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); |
325 | r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); | 364 | r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); |
326 | p->bytes_moved += atomic64_read(&adev->num_bytes_moved) - | 365 | bytes_moved = atomic64_read(&adev->num_bytes_moved) - |
327 | initial_bytes_moved; | 366 | initial_bytes_moved; |
367 | p->bytes_moved += bytes_moved; | ||
368 | if (adev->mc.visible_vram_size < adev->mc.real_vram_size && | ||
369 | bo->tbo.mem.mem_type == TTM_PL_VRAM && | ||
370 | bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT) | ||
371 | p->bytes_moved_vis += bytes_moved; | ||
328 | 372 | ||
329 | if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { | 373 | if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { |
330 | domain = bo->allowed_domains; | 374 | domain = bo->allowed_domains; |
@@ -350,7 +394,8 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, | |||
350 | struct amdgpu_bo_list_entry *candidate = p->evictable; | 394 | struct amdgpu_bo_list_entry *candidate = p->evictable; |
351 | struct amdgpu_bo *bo = candidate->robj; | 395 | struct amdgpu_bo *bo = candidate->robj; |
352 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); | 396 | struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); |
353 | u64 initial_bytes_moved; | 397 | u64 initial_bytes_moved, bytes_moved; |
398 | bool update_bytes_moved_vis; | ||
354 | uint32_t other; | 399 | uint32_t other; |
355 | 400 | ||
356 | /* If we reached our current BO we can forget it */ | 401 | /* If we reached our current BO we can forget it */ |
@@ -370,10 +415,17 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, | |||
370 | 415 | ||
371 | /* Good we can try to move this BO somewhere else */ | 416 | /* Good we can try to move this BO somewhere else */ |
372 | amdgpu_ttm_placement_from_domain(bo, other); | 417 | amdgpu_ttm_placement_from_domain(bo, other); |
418 | update_bytes_moved_vis = | ||
419 | adev->mc.visible_vram_size < adev->mc.real_vram_size && | ||
420 | bo->tbo.mem.mem_type == TTM_PL_VRAM && | ||
421 | bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT; | ||
373 | initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); | 422 | initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); |
374 | r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); | 423 | r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); |
375 | p->bytes_moved += atomic64_read(&adev->num_bytes_moved) - | 424 | bytes_moved = atomic64_read(&adev->num_bytes_moved) - |
376 | initial_bytes_moved; | 425 | initial_bytes_moved; |
426 | p->bytes_moved += bytes_moved; | ||
427 | if (update_bytes_moved_vis) | ||
428 | p->bytes_moved_vis += bytes_moved; | ||
377 | 429 | ||
378 | if (unlikely(r)) | 430 | if (unlikely(r)) |
379 | break; | 431 | break; |
@@ -554,8 +606,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
554 | list_splice(&need_pages, &p->validated); | 606 | list_splice(&need_pages, &p->validated); |
555 | } | 607 | } |
556 | 608 | ||
557 | p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev); | 609 | amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, |
610 | &p->bytes_moved_vis_threshold); | ||
558 | p->bytes_moved = 0; | 611 | p->bytes_moved = 0; |
612 | p->bytes_moved_vis = 0; | ||
559 | p->evictable = list_last_entry(&p->validated, | 613 | p->evictable = list_last_entry(&p->validated, |
560 | struct amdgpu_bo_list_entry, | 614 | struct amdgpu_bo_list_entry, |
561 | tv.head); | 615 | tv.head); |
@@ -579,8 +633,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, | |||
579 | goto error_validate; | 633 | goto error_validate; |
580 | } | 634 | } |
581 | 635 | ||
582 | amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved); | 636 | amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, |
583 | 637 | p->bytes_moved_vis); | |
584 | fpriv->vm.last_eviction_counter = | 638 | fpriv->vm.last_eviction_counter = |
585 | atomic64_read(&p->adev->num_evictions); | 639 | atomic64_read(&p->adev->num_evictions); |
586 | 640 | ||
@@ -1383,7 +1437,7 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, | |||
1383 | if (fences == NULL) | 1437 | if (fences == NULL) |
1384 | return -ENOMEM; | 1438 | return -ENOMEM; |
1385 | 1439 | ||
1386 | fences_user = (void __user *)(uintptr_t)(wait->in.fences); | 1440 | fences_user = u64_to_user_ptr(wait->in.fences); |
1387 | if (copy_from_user(fences, fences_user, | 1441 | if (copy_from_user(fences, fences_user, |
1388 | sizeof(struct drm_amdgpu_fence) * fence_count)) { | 1442 | sizeof(struct drm_amdgpu_fence) * fence_count)) { |
1389 | r = -EFAULT; | 1443 | r = -EFAULT; |