diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2016-09-16 04:28:18 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2016-10-13 11:09:16 -0400 |
commit | 8728da1c6e76566ebc4717399d1f247200125595 (patch) | |
tree | 86a3b5c581998e0a9575de7a1c292e648adae73d /drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |
parent | de17750cf975005d5f3db8a0195f9a04961cc74e (diff) |
gpu: nvgpu: compact pte buffers
The lowest page table level may hold very few entries for mappings of
large pages, but a new page is allocated for each list of entries at the
lowest level, wasting memory and performance. Compact these so that the
new "allocation" of ptes is appended at the end of the previous
allocation, if there is space.
4 KB page is still the smallest size requested from the allocator; any
possible overhead in the allocator (e.g., internally allocating big
pages only) is not taken into account.
Bug 1736604
Change-Id: I03fb795cbc06c869fcf5f1b92def89a04583ee83
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1221841
(cherry picked from commit fa92017ed48e1d5f48c1a12c512641c6ce9924af)
Reviewed-on: http://git-master/r/1234996
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.h')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index ee2bb61e..fe10b046 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -183,6 +183,7 @@ struct gk20a_comptags { | |||
183 | struct gk20a_mm_entry { | 183 | struct gk20a_mm_entry { |
184 | /* backing for */ | 184 | /* backing for */ |
185 | struct mem_desc mem; | 185 | struct mem_desc mem; |
186 | u32 woffset; /* if >0, mem is a shadow copy, owned by another entry */ | ||
186 | int pgsz; | 187 | int pgsz; |
187 | struct gk20a_mm_entry *entries; | 188 | struct gk20a_mm_entry *entries; |
188 | int num_entries; | 189 | int num_entries; |
@@ -631,6 +632,10 @@ u32 __gk20a_aperture_mask(struct gk20a *g, enum gk20a_aperture aperture, | |||
631 | u32 gk20a_aperture_mask(struct gk20a *g, struct mem_desc *mem, | 632 | u32 gk20a_aperture_mask(struct gk20a *g, struct mem_desc *mem, |
632 | u32 sysmem_mask, u32 vidmem_mask); | 633 | u32 sysmem_mask, u32 vidmem_mask); |
633 | 634 | ||
635 | void gk20a_pde_wr32(struct gk20a *g, struct gk20a_mm_entry *entry, | ||
636 | size_t w, size_t data); | ||
637 | u64 gk20a_pde_addr(struct gk20a *g, struct gk20a_mm_entry *entry); | ||
638 | |||
634 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | 639 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, |
635 | u64 map_offset, | 640 | u64 map_offset, |
636 | struct sg_table *sgt, | 641 | struct sg_table *sgt, |