summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2016-09-19 02:24:13 -0400
committerDeepak Nibade <dnibade@nvidia.com>2016-12-27 04:56:49 -0500
commit4afc6a1659ec058fd44953ccff7a1030275bcc92 (patch)
treea6953c8895f1ab917f3a5eca29f4568a6702749c /drivers/gpu/nvgpu/gp10b/mm_gp10b.c
parent49c3fb25822565a9078961cdef1222aaa8c7e89a (diff)
gpu: nvgpu: compact pte buffers
The lowest page table level may hold very few entries for mappings of large pages, but a new page is allocated for each list of entries at the lowest level, wasting memory and performance. Compact these so that the new "allocation" of ptes is appended at the end of the previous allocation, if there is space. Bug 1736604 Change-Id: I4c7c4cad9019de202325750aee6034076e7e61c2 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1222810 (cherry picked from commit 97303ecc946c17150496486a2f52bd481311dbf7) Reviewed-on: http://git-master/r/1234995 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gp10b/mm_gp10b.c')
-rw-r--r--drivers/gpu/nvgpu/gp10b/mm_gp10b.c39
1 files changed, 14 insertions, 25 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 03bab121..1e073ab2 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -151,18 +151,6 @@ static u32 pte3_from_index(u32 i)
151 return i * gmmu_new_pte__size_v() / sizeof(u32); 151 return i * gmmu_new_pte__size_v() / sizeof(u32);
152} 152}
153 153
154static u64 entry_addr(struct gk20a *g, struct gk20a_mm_entry *entry)
155{
156 u64 addr;
157
158 if (g->mm.has_physical_mode)
159 addr = sg_phys(entry->mem.sgt->sgl);
160 else
161 addr = gk20a_mem_get_base_addr(g, &entry->mem, 0);
162
163 return addr;
164}
165
166static int update_gmmu_pde3_locked(struct vm_gk20a *vm, 154static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
167 struct gk20a_mm_entry *parent, 155 struct gk20a_mm_entry *parent,
168 u32 i, u32 gmmu_pgsz_idx, 156 u32 i, u32 gmmu_pgsz_idx,
@@ -176,15 +164,13 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
176{ 164{
177 struct gk20a *g = gk20a_from_vm(vm); 165 struct gk20a *g = gk20a_from_vm(vm);
178 u64 pte_addr = 0; 166 u64 pte_addr = 0;
179 u64 pde_addr = 0;
180 struct gk20a_mm_entry *pte = parent->entries + i; 167 struct gk20a_mm_entry *pte = parent->entries + i;
181 u32 pde_v[2] = {0, 0}; 168 u32 pde_v[2] = {0, 0};
182 u32 pde; 169 u32 pde;
183 170
184 gk20a_dbg_fn(""); 171 gk20a_dbg_fn("");
185 172
186 pte_addr = entry_addr(g, pte) >> gmmu_new_pde_address_shift_v(); 173 pte_addr = gk20a_pde_addr(g, pte) >> gmmu_new_pde_address_shift_v();
187 pde_addr = entry_addr(g, parent);
188 174
189 pde_v[0] |= gk20a_aperture_mask(g, &pte->mem, 175 pde_v[0] |= gk20a_aperture_mask(g, &pte->mem,
190 gmmu_new_pde_aperture_sys_mem_ncoh_f(), 176 gmmu_new_pde_aperture_sys_mem_ncoh_f(),
@@ -194,8 +180,8 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
194 pde_v[1] |= pte_addr >> 24; 180 pde_v[1] |= pte_addr >> 24;
195 pde = pde3_from_index(i); 181 pde = pde3_from_index(i);
196 182
197 gk20a_mem_wr32(g, &parent->mem, pde + 0, pde_v[0]); 183 gk20a_pde_wr32(g, parent, pde + 0, pde_v[0]);
198 gk20a_mem_wr32(g, &parent->mem, pde + 1, pde_v[1]); 184 gk20a_pde_wr32(g, parent, pde + 1, pde_v[1]);
199 185
200 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", 186 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x",
201 i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); 187 i, gmmu_pgsz_idx, pde_v[1], pde_v[0]);
@@ -232,12 +218,12 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
232 big_valid = entry->mem.size && entry->pgsz == gmmu_page_size_big; 218 big_valid = entry->mem.size && entry->pgsz == gmmu_page_size_big;
233 219
234 if (small_valid) { 220 if (small_valid) {
235 pte_addr_small = entry_addr(g, entry) 221 pte_addr_small = gk20a_pde_addr(g, entry)
236 >> gmmu_new_dual_pde_address_shift_v(); 222 >> gmmu_new_dual_pde_address_shift_v();
237 } 223 }
238 224
239 if (big_valid) 225 if (big_valid)
240 pte_addr_big = entry_addr(g, entry) 226 pte_addr_big = gk20a_pde_addr(g, entry)
241 >> gmmu_new_dual_pde_address_big_shift_v(); 227 >> gmmu_new_dual_pde_address_big_shift_v();
242 228
243 if (small_valid) { 229 if (small_valid) {
@@ -260,10 +246,10 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
260 246
261 pde = pde0_from_index(i); 247 pde = pde0_from_index(i);
262 248
263 gk20a_mem_wr32(g, &pte->mem, pde + 0, pde_v[0]); 249 gk20a_pde_wr32(g, pte, pde + 0, pde_v[0]);
264 gk20a_mem_wr32(g, &pte->mem, pde + 1, pde_v[1]); 250 gk20a_pde_wr32(g, pte, pde + 1, pde_v[1]);
265 gk20a_mem_wr32(g, &pte->mem, pde + 2, pde_v[2]); 251 gk20a_pde_wr32(g, pte, pde + 2, pde_v[2]);
266 gk20a_mem_wr32(g, &pte->mem, pde + 3, pde_v[3]); 252 gk20a_pde_wr32(g, pte, pde + 3, pde_v[3]);
267 253
268 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]", 254 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]",
269 i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]); 255 i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]);
@@ -286,6 +272,7 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
286 u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; 272 u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx];
287 u64 ctag_granularity = g->ops.fb.compression_page_size(g); 273 u64 ctag_granularity = g->ops.fb.compression_page_size(g);
288 u32 pte_w[2] = {0, 0}; /* invalid pte */ 274 u32 pte_w[2] = {0, 0}; /* invalid pte */
275 u32 pte_i;
289 276
290 if (*iova) { 277 if (*iova) {
291 u32 pte_valid = unmapped_pte ? 278 u32 pte_valid = unmapped_pte ?
@@ -331,8 +318,10 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
331 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); 318 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i);
332 } 319 }
333 320
334 gk20a_mem_wr32(g, &pte->mem, pte3_from_index(i) + 0, pte_w[0]); 321 pte_i = pte3_from_index(i);
335 gk20a_mem_wr32(g, &pte->mem, pte3_from_index(i) + 1, pte_w[1]); 322
323 gk20a_pde_wr32(g, pte, pte_i + 0, pte_w[0]);
324 gk20a_pde_wr32(g, pte, pte_i + 1, pte_w[1]);
336 325
337 if (*iova) { 326 if (*iova) {
338 *iova += page_size; 327 *iova += page_size;