aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2017-12-06 15:28:22 -0500
committerDave Airlie <airlied@redhat.com>2017-12-06 15:28:22 -0500
commit9c606cd4117a3c45e04a6616b1a0dbeb18eeee62 (patch)
treeaa6c1db29e1a3f687c81fa03aecd24992a76e993 /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
parentc5dd52f653fa74f8f4771425c6db33609ad21258 (diff)
parent3997eea57caf542e9327df9b6bb2882a57c4c421 (diff)
Merge branch 'drm-next-4.16' of git://people.freedesktop.org/~agd5f/linux into drm-next
First feature request for 4.16. Highlights: - RV and Vega header cleanups - TTM operation context support - 48 bit GPUVM fixes for Vega/RV - More smatch fixes - ECC support for vega10 - Resizeable BAR support - Multi-display sync support in DC - SR-IOV fixes - Various scheduler improvements - GPU reset fixes and vram lost tracking - Clean up DC/powerplay interfaces - DCN display fixes - Various DC fixes * 'drm-next-4.16' of git://people.freedesktop.org/~agd5f/linux: (291 commits) drm/radeon: Use drm_fb_helper_lastclose() and _poll_changed() drm/amdgpu: Use drm_fb_helper_lastclose() and _poll_changed() drm/amd/display: Use drm_fb_helper_poll_changed() drm/ttm: swap consecutive allocated pooled pages v4 drm/amdgpu: fix amdgpu_sync_resv v2 drm/ttm: swap consecutive allocated cached pages v3 drm/amd/amdgpu: set gtt size according to system memory size only drm/amdgpu: Get rid of dep_sync as a seperate object. drm/amdgpu: allow specifying vm_block_size for multi level PDs v2 drm/amdgpu: move validation of the VM size into the VM code drm/amdgpu: allow non pot VM size values drm/amdgpu: choose number of VM levels based on VM size drm/amdgpu: unify VM size handling of Vega10 with older generation drm/amdgpu: fix amdgpu_vm_num_entries drm/amdgpu: fix VM PD addr shift drm/amdgpu: correct vce4.0 fw config for SRIOV (V2) drm/amd/display: Don't call dm_log_to_buffer directly in dc_conn_log drm/amd/display: Add dm_logger_append_va API drm/ttm: Use a static string instead of an array of char * drm/amd/display: remove usage of legacy_cursor_update ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c126
1 files changed, 77 insertions, 49 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index c8c26f21993c..3ecdbdfb04dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -139,6 +139,24 @@ struct amdgpu_prt_cb {
139}; 139};
140 140
141/** 141/**
142 * amdgpu_vm_level_shift - return the addr shift for each level
143 *
144 * @adev: amdgpu_device pointer
145 *
146 * Returns the number of bits the pfn needs to be right shifted for a level.
147 */
148static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
149 unsigned level)
150{
151 if (level != adev->vm_manager.num_level)
152 return 9 * (adev->vm_manager.num_level - level - 1) +
153 adev->vm_manager.block_size;
154 else
155 /* For the page tables on the leaves */
156 return 0;
157}
158
159/**
142 * amdgpu_vm_num_entries - return the number of entries in a PD/PT 160 * amdgpu_vm_num_entries - return the number of entries in a PD/PT
143 * 161 *
144 * @adev: amdgpu_device pointer 162 * @adev: amdgpu_device pointer
@@ -148,17 +166,17 @@ struct amdgpu_prt_cb {
148static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, 166static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
149 unsigned level) 167 unsigned level)
150{ 168{
169 unsigned shift = amdgpu_vm_level_shift(adev, 0);
170
151 if (level == 0) 171 if (level == 0)
152 /* For the root directory */ 172 /* For the root directory */
153 return adev->vm_manager.max_pfn >> 173 return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift;
154 (adev->vm_manager.block_size * 174 else if (level != adev->vm_manager.num_level)
155 adev->vm_manager.num_level); 175 /* Everything in between */
156 else if (level == adev->vm_manager.num_level) 176 return 512;
177 else
157 /* For the page tables on the leaves */ 178 /* For the page tables on the leaves */
158 return AMDGPU_VM_PTE_COUNT(adev); 179 return AMDGPU_VM_PTE_COUNT(adev);
159 else
160 /* Everything in between */
161 return 1 << adev->vm_manager.block_size;
162} 180}
163 181
164/** 182/**
@@ -288,8 +306,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
288 uint64_t saddr, uint64_t eaddr, 306 uint64_t saddr, uint64_t eaddr,
289 unsigned level) 307 unsigned level)
290{ 308{
291 unsigned shift = (adev->vm_manager.num_level - level) * 309 unsigned shift = amdgpu_vm_level_shift(adev, level);
292 adev->vm_manager.block_size;
293 unsigned pt_idx, from, to; 310 unsigned pt_idx, from, to;
294 int r; 311 int r;
295 u64 flags; 312 u64 flags;
@@ -471,7 +488,7 @@ static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm,
471 id->pd_gpu_addr = 0; 488 id->pd_gpu_addr = 0;
472 tmp = amdgpu_sync_peek_fence(&id->active, ring); 489 tmp = amdgpu_sync_peek_fence(&id->active, ring);
473 if (tmp) { 490 if (tmp) {
474 r = amdgpu_sync_fence(adev, sync, tmp); 491 r = amdgpu_sync_fence(adev, sync, tmp, false);
475 return r; 492 return r;
476 } 493 }
477 } 494 }
@@ -479,7 +496,7 @@ static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm,
479 /* Good we can use this VMID. Remember this submission as 496 /* Good we can use this VMID. Remember this submission as
480 * user of the VMID. 497 * user of the VMID.
481 */ 498 */
482 r = amdgpu_sync_fence(ring->adev, &id->active, fence); 499 r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
483 if (r) 500 if (r)
484 goto out; 501 goto out;
485 502
@@ -566,7 +583,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
566 } 583 }
567 584
568 585
569 r = amdgpu_sync_fence(ring->adev, sync, &array->base); 586 r = amdgpu_sync_fence(ring->adev, sync, &array->base, false);
570 dma_fence_put(&array->base); 587 dma_fence_put(&array->base);
571 if (r) 588 if (r)
572 goto error; 589 goto error;
@@ -609,7 +626,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
609 /* Good we can use this VMID. Remember this submission as 626 /* Good we can use this VMID. Remember this submission as
610 * user of the VMID. 627 * user of the VMID.
611 */ 628 */
612 r = amdgpu_sync_fence(ring->adev, &id->active, fence); 629 r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
613 if (r) 630 if (r)
614 goto error; 631 goto error;
615 632
@@ -629,7 +646,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
629 id = idle; 646 id = idle;
630 647
631 /* Remember this submission as user of the VMID */ 648 /* Remember this submission as user of the VMID */
632 r = amdgpu_sync_fence(ring->adev, &id->active, fence); 649 r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
633 if (r) 650 if (r)
634 goto error; 651 goto error;
635 652
@@ -1302,18 +1319,19 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
1302 struct amdgpu_vm_pt **entry, 1319 struct amdgpu_vm_pt **entry,
1303 struct amdgpu_vm_pt **parent) 1320 struct amdgpu_vm_pt **parent)
1304{ 1321{
1305 unsigned idx, level = p->adev->vm_manager.num_level; 1322 unsigned level = 0;
1306 1323
1307 *parent = NULL; 1324 *parent = NULL;
1308 *entry = &p->vm->root; 1325 *entry = &p->vm->root;
1309 while ((*entry)->entries) { 1326 while ((*entry)->entries) {
1310 idx = addr >> (p->adev->vm_manager.block_size * level--); 1327 unsigned idx = addr >> amdgpu_vm_level_shift(p->adev, level++);
1328
1311 idx %= amdgpu_bo_size((*entry)->base.bo) / 8; 1329 idx %= amdgpu_bo_size((*entry)->base.bo) / 8;
1312 *parent = *entry; 1330 *parent = *entry;
1313 *entry = &(*entry)->entries[idx]; 1331 *entry = &(*entry)->entries[idx];
1314 } 1332 }
1315 1333
1316 if (level) 1334 if (level != p->adev->vm_manager.num_level)
1317 *entry = NULL; 1335 *entry = NULL;
1318} 1336}
1319 1337
@@ -1639,7 +1657,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1639 addr = 0; 1657 addr = 0;
1640 } 1658 }
1641 1659
1642 r = amdgpu_sync_fence(adev, &job->sync, exclusive); 1660 r = amdgpu_sync_fence(adev, &job->sync, exclusive, false);
1643 if (r) 1661 if (r)
1644 goto error_free; 1662 goto error_free;
1645 1663
@@ -2556,47 +2574,57 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
2556} 2574}
2557 2575
2558/** 2576/**
2559 * amdgpu_vm_set_fragment_size - adjust fragment size in PTE
2560 *
2561 * @adev: amdgpu_device pointer
2562 * @fragment_size_default: the default fragment size if it's set auto
2563 */
2564void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev,
2565 uint32_t fragment_size_default)
2566{
2567 if (amdgpu_vm_fragment_size == -1)
2568 adev->vm_manager.fragment_size = fragment_size_default;
2569 else
2570 adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
2571}
2572
2573/**
2574 * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size 2577 * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size
2575 * 2578 *
2576 * @adev: amdgpu_device pointer 2579 * @adev: amdgpu_device pointer
2577 * @vm_size: the default vm size if it's set auto 2580 * @vm_size: the default vm size if it's set auto
2578 */ 2581 */
2579void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size, 2582void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
2580 uint32_t fragment_size_default) 2583 uint32_t fragment_size_default, unsigned max_level,
2584 unsigned max_bits)
2581{ 2585{
2582 /* adjust vm size firstly */ 2586 uint64_t tmp;
2583 if (amdgpu_vm_size == -1)
2584 adev->vm_manager.vm_size = vm_size;
2585 else
2586 adev->vm_manager.vm_size = amdgpu_vm_size;
2587 2587
2588 /* block size depends on vm size */ 2588 /* adjust vm size first */
2589 if (amdgpu_vm_block_size == -1) 2589 if (amdgpu_vm_size != -1) {
2590 unsigned max_size = 1 << (max_bits - 30);
2591
2592 vm_size = amdgpu_vm_size;
2593 if (vm_size > max_size) {
2594 dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n",
2595 amdgpu_vm_size, max_size);
2596 vm_size = max_size;
2597 }
2598 }
2599
2600 adev->vm_manager.max_pfn = (uint64_t)vm_size << 18;
2601
2602 tmp = roundup_pow_of_two(adev->vm_manager.max_pfn);
2603 if (amdgpu_vm_block_size != -1)
2604 tmp >>= amdgpu_vm_block_size - 9;
2605 tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1;
2606 adev->vm_manager.num_level = min(max_level, (unsigned)tmp);
2607
2608 /* block size depends on vm size and hw setup*/
2609 if (amdgpu_vm_block_size != -1)
2590 adev->vm_manager.block_size = 2610 adev->vm_manager.block_size =
2591 amdgpu_vm_get_block_size(adev->vm_manager.vm_size); 2611 min((unsigned)amdgpu_vm_block_size, max_bits
2612 - AMDGPU_GPU_PAGE_SHIFT
2613 - 9 * adev->vm_manager.num_level);
2614 else if (adev->vm_manager.num_level > 1)
2615 adev->vm_manager.block_size = 9;
2592 else 2616 else
2593 adev->vm_manager.block_size = amdgpu_vm_block_size; 2617 adev->vm_manager.block_size = amdgpu_vm_get_block_size(tmp);
2594 2618
2595 amdgpu_vm_set_fragment_size(adev, fragment_size_default); 2619 if (amdgpu_vm_fragment_size == -1)
2620 adev->vm_manager.fragment_size = fragment_size_default;
2621 else
2622 adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
2596 2623
2597 DRM_INFO("vm size is %llu GB, block size is %u-bit, fragment size is %u-bit\n", 2624 DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n",
2598 adev->vm_manager.vm_size, adev->vm_manager.block_size, 2625 vm_size, adev->vm_manager.num_level + 1,
2599 adev->vm_manager.fragment_size); 2626 adev->vm_manager.block_size,
2627 adev->vm_manager.fragment_size);
2600} 2628}
2601 2629
2602/** 2630/**
@@ -2637,7 +2665,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2637 ring = adev->vm_manager.vm_pte_rings[ring_instance]; 2665 ring = adev->vm_manager.vm_pte_rings[ring_instance];
2638 rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL]; 2666 rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
2639 r = amd_sched_entity_init(&ring->sched, &vm->entity, 2667 r = amd_sched_entity_init(&ring->sched, &vm->entity,
2640 rq, amdgpu_sched_jobs); 2668 rq, amdgpu_sched_jobs, NULL);
2641 if (r) 2669 if (r)
2642 return r; 2670 return r;
2643 2671