diff options
author | Roger He <Hongbo.He@amd.com> | 2017-08-30 01:01:19 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2017-09-01 12:49:32 -0400 |
commit | 6849d47cabc36e8f2697043f8c81e7719876dfd3 (patch) | |
tree | 2401a9f580a77eb6c28ec97b79e81ea5f1208076 /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |
parent | 2b9bdfa70faf8c00969f91d3c4548a0df6071b90 (diff) |
drm/amdgpu: handle all fragment sizes v4
This can improve performance for some cases.
v2 (chk): handle all sizes, simplify the patch quite a bit
v3 (chk): adjust dw estimation as well
v4 (chk): use single loop, make end mask 64bit
Signed-off-by: Roger He <Hongbo.He@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
Tested-by: Roger He <Hongbo.He@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 55 |
1 files changed, 26 insertions, 29 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 26eb7dce5fe5..b83e0fa1f269 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |||
@@ -1420,8 +1420,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, | |||
1420 | uint64_t start, uint64_t end, | 1420 | uint64_t start, uint64_t end, |
1421 | uint64_t dst, uint64_t flags) | 1421 | uint64_t dst, uint64_t flags) |
1422 | { | 1422 | { |
1423 | int r; | ||
1424 | |||
1425 | /** | 1423 | /** |
1426 | * The MC L1 TLB supports variable sized pages, based on a fragment | 1424 | * The MC L1 TLB supports variable sized pages, based on a fragment |
1427 | * field in the PTE. When this field is set to a non-zero value, page | 1425 | * field in the PTE. When this field is set to a non-zero value, page |
@@ -1440,39 +1438,38 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, | |||
1440 | * Userspace can support this by aligning virtual base address and | 1438 | * Userspace can support this by aligning virtual base address and |
1441 | * allocation size to the fragment size. | 1439 | * allocation size to the fragment size. |
1442 | */ | 1440 | */ |
1443 | unsigned pages_per_frag = params->adev->vm_manager.fragment_size; | 1441 | unsigned max_frag = params->adev->vm_manager.fragment_size; |
1444 | uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag); | 1442 | int r; |
1445 | uint64_t frag_align = 1 << pages_per_frag; | ||
1446 | |||
1447 | uint64_t frag_start = ALIGN(start, frag_align); | ||
1448 | uint64_t frag_end = end & ~(frag_align - 1); | ||
1449 | 1443 | ||
1450 | /* system pages are non continuously */ | 1444 | /* system pages are non continuously */ |
1451 | if (params->src || !(flags & AMDGPU_PTE_VALID) || | 1445 | if (params->src || !(flags & AMDGPU_PTE_VALID)) |
1452 | (frag_start >= frag_end)) | ||
1453 | return amdgpu_vm_update_ptes(params, start, end, dst, flags); | 1446 | return amdgpu_vm_update_ptes(params, start, end, dst, flags); |
1454 | 1447 | ||
1455 | /* handle the 4K area at the beginning */ | 1448 | while (start != end) { |
1456 | if (start != frag_start) { | 1449 | uint64_t frag_flags, frag_end; |
1457 | r = amdgpu_vm_update_ptes(params, start, frag_start, | 1450 | unsigned frag; |
1458 | dst, flags); | 1451 | |
1452 | /* This intentionally wraps around if no bit is set */ | ||
1453 | frag = min((unsigned)ffs(start) - 1, | ||
1454 | (unsigned)fls64(end - start) - 1); | ||
1455 | if (frag >= max_frag) { | ||
1456 | frag_flags = AMDGPU_PTE_FRAG(max_frag); | ||
1457 | frag_end = end & ~((1ULL << max_frag) - 1); | ||
1458 | } else { | ||
1459 | frag_flags = AMDGPU_PTE_FRAG(frag); | ||
1460 | frag_end = start + (1 << frag); | ||
1461 | } | ||
1462 | |||
1463 | r = amdgpu_vm_update_ptes(params, start, frag_end, dst, | ||
1464 | flags | frag_flags); | ||
1459 | if (r) | 1465 | if (r) |
1460 | return r; | 1466 | return r; |
1461 | dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE; | ||
1462 | } | ||
1463 | |||
1464 | /* handle the area in the middle */ | ||
1465 | r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst, | ||
1466 | flags | frag_flags); | ||
1467 | if (r) | ||
1468 | return r; | ||
1469 | 1467 | ||
1470 | /* handle the 4K area at the end */ | 1468 | dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE; |
1471 | if (frag_end != end) { | 1469 | start = frag_end; |
1472 | dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE; | ||
1473 | r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags); | ||
1474 | } | 1470 | } |
1475 | return r; | 1471 | |
1472 | return 0; | ||
1476 | } | 1473 | } |
1477 | 1474 | ||
1478 | /** | 1475 | /** |
@@ -1562,8 +1559,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
1562 | /* set page commands needed */ | 1559 | /* set page commands needed */ |
1563 | ndw += ncmds * 10; | 1560 | ndw += ncmds * 10; |
1564 | 1561 | ||
1565 | /* two extra commands for begin/end of fragment */ | 1562 | /* extra commands for begin/end fragments */ |
1566 | ndw += 2 * 10; | 1563 | ndw += 2 * 10 * adev->vm_manager.fragment_size; |
1567 | 1564 | ||
1568 | params.func = amdgpu_vm_do_set_ptes; | 1565 | params.func = amdgpu_vm_do_set_ptes; |
1569 | } | 1566 | } |