aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
diff options
context:
space:
mode:
authorRoger He <Hongbo.He@amd.com>2017-08-30 01:01:19 -0400
committerAlex Deucher <alexander.deucher@amd.com>2017-09-01 12:49:32 -0400
commit6849d47cabc36e8f2697043f8c81e7719876dfd3 (patch)
tree2401a9f580a77eb6c28ec97b79e81ea5f1208076 /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
parent2b9bdfa70faf8c00969f91d3c4548a0df6071b90 (diff)
drm/amdgpu: handle all fragment sizes v4
This can improve performance for some cases. v2 (chk): handle all sizes, simplify the patch quite a bit v3 (chk): adjust dw estimation as well v4 (chk): use single loop, make end mask 64bit Signed-off-by: Roger He <Hongbo.He@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Tested-by: Roger He <Hongbo.He@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Reviewed-by: Chunming Zhou <david1.zhou@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c55
1 files changed, 26 insertions, 29 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 26eb7dce5fe5..b83e0fa1f269 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1420,8 +1420,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
1420 uint64_t start, uint64_t end, 1420 uint64_t start, uint64_t end,
1421 uint64_t dst, uint64_t flags) 1421 uint64_t dst, uint64_t flags)
1422{ 1422{
1423 int r;
1424
1425 /** 1423 /**
1426 * The MC L1 TLB supports variable sized pages, based on a fragment 1424 * The MC L1 TLB supports variable sized pages, based on a fragment
1427 * field in the PTE. When this field is set to a non-zero value, page 1425 * field in the PTE. When this field is set to a non-zero value, page
@@ -1440,39 +1438,38 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
1440 * Userspace can support this by aligning virtual base address and 1438 * Userspace can support this by aligning virtual base address and
1441 * allocation size to the fragment size. 1439 * allocation size to the fragment size.
1442 */ 1440 */
1443 unsigned pages_per_frag = params->adev->vm_manager.fragment_size; 1441 unsigned max_frag = params->adev->vm_manager.fragment_size;
1444 uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag); 1442 int r;
1445 uint64_t frag_align = 1 << pages_per_frag;
1446
1447 uint64_t frag_start = ALIGN(start, frag_align);
1448 uint64_t frag_end = end & ~(frag_align - 1);
1449 1443
1450 /* system pages are non continuously */ 1444 /* system pages are non continuously */
1451 if (params->src || !(flags & AMDGPU_PTE_VALID) || 1445 if (params->src || !(flags & AMDGPU_PTE_VALID))
1452 (frag_start >= frag_end))
1453 return amdgpu_vm_update_ptes(params, start, end, dst, flags); 1446 return amdgpu_vm_update_ptes(params, start, end, dst, flags);
1454 1447
1455 /* handle the 4K area at the beginning */ 1448 while (start != end) {
1456 if (start != frag_start) { 1449 uint64_t frag_flags, frag_end;
1457 r = amdgpu_vm_update_ptes(params, start, frag_start, 1450 unsigned frag;
1458 dst, flags); 1451
1452 /* This intentionally wraps around if no bit is set */
1453 frag = min((unsigned)ffs(start) - 1,
1454 (unsigned)fls64(end - start) - 1);
1455 if (frag >= max_frag) {
1456 frag_flags = AMDGPU_PTE_FRAG(max_frag);
1457 frag_end = end & ~((1ULL << max_frag) - 1);
1458 } else {
1459 frag_flags = AMDGPU_PTE_FRAG(frag);
1460 frag_end = start + (1 << frag);
1461 }
1462
1463 r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
1464 flags | frag_flags);
1459 if (r) 1465 if (r)
1460 return r; 1466 return r;
1461 dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
1462 }
1463
1464 /* handle the area in the middle */
1465 r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
1466 flags | frag_flags);
1467 if (r)
1468 return r;
1469 1467
1470 /* handle the 4K area at the end */ 1468 dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
1471 if (frag_end != end) { 1469 start = frag_end;
1472 dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
1473 r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
1474 } 1470 }
1475 return r; 1471
1472 return 0;
1476} 1473}
1477 1474
1478/** 1475/**
@@ -1562,8 +1559,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1562 /* set page commands needed */ 1559 /* set page commands needed */
1563 ndw += ncmds * 10; 1560 ndw += ncmds * 10;
1564 1561
1565 /* two extra commands for begin/end of fragment */ 1562 /* extra commands for begin/end fragments */
1566 ndw += 2 * 10; 1563 ndw += 2 * 10 * adev->vm_manager.fragment_size;
1567 1564
1568 params.func = amdgpu_vm_do_set_ptes; 1565 params.func = amdgpu_vm_do_set_ptes;
1569 } 1566 }