summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gp10b/mm_gp10b.c')
-rw-r--r--drivers/gpu/nvgpu/gp10b/mm_gp10b.c309
1 files changed, 145 insertions, 164 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index d7391c6d..c3867e9d 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -14,6 +14,7 @@
14 */ 14 */
15 15
16#include <nvgpu/dma.h> 16#include <nvgpu/dma.h>
17#include <nvgpu/gmmu.h>
17 18
18#include "gk20a/gk20a.h" 19#include "gk20a/gk20a.h"
19#include "gk20a/platform_gk20a.h" 20#include "gk20a/platform_gk20a.h"
@@ -149,206 +150,186 @@ static u64 gp10b_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl,
149 return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); 150 return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl));
150} 151}
151 152
152static u32 pde3_from_index(u32 i) 153static void update_gmmu_pde3_locked(struct vm_gk20a *vm,
153{ 154 const struct gk20a_mmu_level *l,
154 return i * gmmu_new_pde__size_v() / sizeof(u32); 155 struct nvgpu_gmmu_pd *pd,
155} 156 u32 pd_idx,
156 157 u64 virt_addr,
157static u32 pte3_from_index(u32 i) 158 u64 phys_addr,
158{ 159 struct nvgpu_gmmu_attrs *attrs)
159 return i * gmmu_new_pte__size_v() / sizeof(u32);
160}
161
162static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
163 struct gk20a_mm_entry *parent,
164 u32 i, u32 gmmu_pgsz_idx,
165 struct scatterlist **sgl,
166 u64 *offset,
167 u64 *iova,
168 u32 kind_v, u64 *ctag,
169 bool cacheable, bool unmapped_pte,
170 int rw_flag, bool sparse, bool priv,
171 enum nvgpu_aperture aperture)
172{ 160{
173 struct gk20a *g = gk20a_from_vm(vm); 161 struct gk20a *g = gk20a_from_vm(vm);
174 u64 pte_addr = 0; 162 u32 pd_offset = pd_offset_from_index(l, pd_idx);
175 struct gk20a_mm_entry *pte = parent->entries + i;
176 u32 pde_v[2] = {0, 0}; 163 u32 pde_v[2] = {0, 0};
177 u32 pde;
178
179 gk20a_dbg_fn("");
180 164
181 pte_addr = gk20a_pde_addr(g, pte) >> gmmu_new_pde_address_shift_v(); 165 phys_addr >>= gmmu_new_pde_address_shift_v();
182 166
183 pde_v[0] |= nvgpu_aperture_mask(g, &pte->mem, 167 pde_v[0] |= nvgpu_aperture_mask(g, &pd->mem,
184 gmmu_new_pde_aperture_sys_mem_ncoh_f(), 168 gmmu_new_pde_aperture_sys_mem_ncoh_f(),
185 gmmu_new_pde_aperture_video_memory_f()); 169 gmmu_new_pde_aperture_video_memory_f());
186 pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr)); 170 pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr));
187 pde_v[0] |= gmmu_new_pde_vol_true_f(); 171 pde_v[0] |= gmmu_new_pde_vol_true_f();
188 pde_v[1] |= pte_addr >> 24; 172 pde_v[1] |= phys_addr >> 24;
189 pde = pde3_from_index(i); 173
190 174 pd_write(g, pd, pd_offset + 0, pde_v[0]);
191 gk20a_pde_wr32(g, parent, pde + 0, pde_v[0]); 175 pd_write(g, pd, pd_offset + 1, pde_v[1]);
192 gk20a_pde_wr32(g, parent, pde + 1, pde_v[1]); 176
193 177 pte_dbg(g, attrs,
194 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", 178 "PDE: i=%-4u size=%-2u offs=%-4u pgsz: -- | "
195 i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); 179 "GPU %#-12llx phys %#-12llx "
196 gk20a_dbg_fn("done"); 180 "[0x%08x, 0x%08x]",
197 return 0; 181 pd_idx, l->entry_size, pd_offset,
182 virt_addr, phys_addr,
183 pde_v[1], pde_v[0]);
198} 184}
199 185
200static u32 pde0_from_index(u32 i) 186static void update_gmmu_pde0_locked(struct vm_gk20a *vm,
201{ 187 const struct gk20a_mmu_level *l,
202 return i * gmmu_new_dual_pde__size_v() / sizeof(u32); 188 struct nvgpu_gmmu_pd *pd,
203} 189 u32 pd_idx,
204 190 u64 virt_addr,
205static int update_gmmu_pde0_locked(struct vm_gk20a *vm, 191 u64 phys_addr,
206 struct gk20a_mm_entry *pte, 192 struct nvgpu_gmmu_attrs *attrs)
207 u32 i, u32 gmmu_pgsz_idx,
208 struct scatterlist **sgl,
209 u64 *offset,
210 u64 *iova,
211 u32 kind_v, u64 *ctag,
212 bool cacheable, bool unmapped_pte,
213 int rw_flag, bool sparse, bool priv,
214 enum nvgpu_aperture aperture)
215{ 193{
216 struct gk20a *g = gk20a_from_vm(vm); 194 struct gk20a *g = gk20a_from_vm(vm);
217 bool small_valid, big_valid; 195 bool small_valid, big_valid;
218 u32 pte_addr_small = 0, pte_addr_big = 0; 196 u32 small_addr = 0, big_addr = 0;
219 struct gk20a_mm_entry *entry = pte->entries + i; 197 u32 pd_offset = pd_offset_from_index(l, pd_idx);
220 u32 pde_v[4] = {0, 0, 0, 0}; 198 u32 pde_v[4] = {0, 0, 0, 0};
221 u32 pde;
222
223 gk20a_dbg_fn("");
224 199
225 small_valid = entry->mem.size && entry->pgsz == gmmu_page_size_small; 200 small_valid = attrs->pgsz == gmmu_page_size_small;
226 big_valid = entry->mem.size && entry->pgsz == gmmu_page_size_big; 201 big_valid = attrs->pgsz == gmmu_page_size_big;
227 202
228 if (small_valid) { 203 if (small_valid)
229 pte_addr_small = gk20a_pde_addr(g, entry) 204 small_addr = phys_addr >> gmmu_new_dual_pde_address_shift_v();
230 >> gmmu_new_dual_pde_address_shift_v();
231 }
232 205
233 if (big_valid) 206 if (big_valid)
234 pte_addr_big = gk20a_pde_addr(g, entry) 207 big_addr = phys_addr >> gmmu_new_dual_pde_address_big_shift_v();
235 >> gmmu_new_dual_pde_address_big_shift_v();
236 208
237 if (small_valid) { 209 if (small_valid) {
238 pde_v[2] |= gmmu_new_dual_pde_address_small_sys_f(pte_addr_small); 210 pde_v[2] |=
239 pde_v[2] |= nvgpu_aperture_mask(g, &entry->mem, 211 gmmu_new_dual_pde_address_small_sys_f(small_addr);
212 pde_v[2] |= nvgpu_aperture_mask(g, &pd->mem,
240 gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(), 213 gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(),
241 gmmu_new_dual_pde_aperture_small_video_memory_f()); 214 gmmu_new_dual_pde_aperture_small_video_memory_f());
242 pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f(); 215 pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f();
243 pde_v[3] |= pte_addr_small >> 24; 216 pde_v[3] |= small_addr >> 24;
244 } 217 }
245 218
246 if (big_valid) { 219 if (big_valid) {
247 pde_v[0] |= gmmu_new_dual_pde_address_big_sys_f(pte_addr_big); 220 pde_v[0] |= gmmu_new_dual_pde_address_big_sys_f(big_addr);
248 pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f(); 221 pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f();
249 pde_v[0] |= nvgpu_aperture_mask(g, &entry->mem, 222 pde_v[0] |= nvgpu_aperture_mask(g, &pd->mem,
250 gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(), 223 gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(),
251 gmmu_new_dual_pde_aperture_big_video_memory_f()); 224 gmmu_new_dual_pde_aperture_big_video_memory_f());
252 pde_v[1] |= pte_addr_big >> 28; 225 pde_v[1] |= big_addr >> 28;
253 } 226 }
254 227
255 pde = pde0_from_index(i); 228 pd_write(g, pd, pd_offset + 0, pde_v[0]);
256 229 pd_write(g, pd, pd_offset + 1, pde_v[1]);
257 gk20a_pde_wr32(g, pte, pde + 0, pde_v[0]); 230 pd_write(g, pd, pd_offset + 2, pde_v[2]);
258 gk20a_pde_wr32(g, pte, pde + 1, pde_v[1]); 231 pd_write(g, pd, pd_offset + 3, pde_v[3]);
259 gk20a_pde_wr32(g, pte, pde + 2, pde_v[2]); 232
260 gk20a_pde_wr32(g, pte, pde + 3, pde_v[3]); 233 pte_dbg(g, attrs,
261 234 "PDE: i=%-4u size=%-2u offs=%-4u pgsz: %c%c | "
262 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]", 235 "GPU %#-12llx phys %#-12llx "
263 i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]); 236 "[0x%08x, 0x%08x, 0x%08x, 0x%08x]",
264 gk20a_dbg_fn("done"); 237 pd_idx, l->entry_size, pd_offset,
265 return 0; 238 small_valid ? 'S' : '-',
239 big_valid ? 'B' : '-',
240 virt_addr, phys_addr,
241 pde_v[3], pde_v[2], pde_v[1], pde_v[0]);
266} 242}
267 243
268static int update_gmmu_pte_locked(struct vm_gk20a *vm, 244static void __update_pte(struct vm_gk20a *vm,
269 struct gk20a_mm_entry *pte, 245 u32 *pte_w,
270 u32 i, u32 gmmu_pgsz_idx, 246 u64 phys_addr,
271 struct scatterlist **sgl, 247 struct nvgpu_gmmu_attrs *attrs)
272 u64 *offset,
273 u64 *iova,
274 u32 kind_v, u64 *ctag,
275 bool cacheable, bool unmapped_pte,
276 int rw_flag, bool sparse, bool priv,
277 enum nvgpu_aperture aperture)
278{ 248{
279 struct gk20a *g = vm->mm->g; 249 struct gk20a *g = gk20a_from_vm(vm);
280 u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx];
281 u64 ctag_granularity = g->ops.fb.compression_page_size(g); 250 u64 ctag_granularity = g->ops.fb.compression_page_size(g);
282 u32 pte_w[2] = {0, 0}; /* invalid pte */ 251 u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
283 u32 pte_i; 252 u32 pte_valid = attrs->valid ?
284 253 gmmu_new_pte_valid_true_f() :
285 if (*iova) { 254 gmmu_new_pte_valid_false_f();
286 u32 pte_valid = unmapped_pte ? 255 u32 phys_shifted = phys_addr >> gmmu_new_pte_address_shift_v();
287 gmmu_new_pte_valid_false_f() : 256 u32 pte_addr = attrs->aperture == APERTURE_SYSMEM ?
288 gmmu_new_pte_valid_true_f(); 257 gmmu_new_pte_address_sys_f(phys_shifted) :
289 u32 iova_v = *iova >> gmmu_new_pte_address_shift_v(); 258 gmmu_new_pte_address_vid_f(phys_shifted);
290 u32 pte_addr = aperture == APERTURE_SYSMEM ? 259 u32 pte_tgt = __nvgpu_aperture_mask(g, attrs->aperture,
291 gmmu_new_pte_address_sys_f(iova_v) : 260 gmmu_new_pte_aperture_sys_mem_ncoh_f(),
292 gmmu_new_pte_address_vid_f(iova_v); 261 gmmu_new_pte_aperture_video_memory_f());
293 u32 pte_tgt = __nvgpu_aperture_mask(g, aperture, 262
294 gmmu_new_pte_aperture_sys_mem_ncoh_f(), 263 pte_w[0] = pte_valid | pte_addr | pte_tgt;
295 gmmu_new_pte_aperture_video_memory_f()); 264
296 265 if (attrs->priv)
297 pte_w[0] = pte_valid | pte_addr | pte_tgt; 266 pte_w[0] |= gmmu_new_pte_privilege_true_f();
298 267
299 if (priv) 268 pte_w[1] = phys_addr >> (24 + gmmu_new_pte_address_shift_v()) |
300 pte_w[0] |= gmmu_new_pte_privilege_true_f(); 269 gmmu_new_pte_kind_f(attrs->kind_v) |
301 270 gmmu_new_pte_comptagline_f((u32)(attrs->ctag /
302 pte_w[1] = *iova >> (24 + gmmu_new_pte_address_shift_v()) | 271 ctag_granularity));
303 gmmu_new_pte_kind_f(kind_v) | 272
304 gmmu_new_pte_comptagline_f((u32)(*ctag / ctag_granularity)); 273 if (attrs->rw_flag == gk20a_mem_flag_read_only)
305 274 pte_w[0] |= gmmu_new_pte_read_only_true_f();
306 if (rw_flag == gk20a_mem_flag_read_only) 275
307 pte_w[0] |= gmmu_new_pte_read_only_true_f(); 276 if (!attrs->valid && !attrs->cacheable)
308 if (unmapped_pte && !cacheable) 277 pte_w[0] |= gmmu_new_pte_read_only_true_f();
309 pte_w[0] |= gmmu_new_pte_read_only_true_f(); 278 else if (!attrs->cacheable)
310 else if (!cacheable)
311 pte_w[0] |= gmmu_new_pte_vol_true_f();
312
313 gk20a_dbg(gpu_dbg_pte, "pte=%d iova=0x%llx kind=%d"
314 " ctag=%d vol=%d"
315 " [0x%08x, 0x%08x]",
316 i, *iova,
317 kind_v, (u32)(*ctag / ctag_granularity), !cacheable,
318 pte_w[1], pte_w[0]);
319
320 if (*ctag)
321 *ctag += page_size;
322 } else if (sparse) {
323 pte_w[0] = gmmu_new_pte_valid_false_f();
324 pte_w[0] |= gmmu_new_pte_vol_true_f(); 279 pte_w[0] |= gmmu_new_pte_vol_true_f();
325 } else {
326 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i);
327 }
328 280
329 pte_i = pte3_from_index(i); 281 if (attrs->ctag)
330 282 attrs->ctag += page_size;
331 gk20a_pde_wr32(g, pte, pte_i + 0, pte_w[0]); 283
332 gk20a_pde_wr32(g, pte, pte_i + 1, pte_w[1]); 284}
333 285
334 if (*iova) { 286static void __update_pte_sparse(u32 *pte_w)
335 *iova += page_size; 287{
336 *offset += page_size; 288 pte_w[0] = gmmu_new_pte_valid_false_f();
337 if (*sgl && *offset + page_size > (*sgl)->length) { 289 pte_w[0] |= gmmu_new_pte_vol_true_f();
338 u64 new_iova; 290}
339 *sgl = sg_next(*sgl); 291
340 if (*sgl) { 292static void update_gmmu_pte_locked(struct vm_gk20a *vm,
341 new_iova = sg_phys(*sgl); 293 const struct gk20a_mmu_level *l,
342 gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d", 294 struct nvgpu_gmmu_pd *pd,
343 new_iova, (*sgl)->length); 295 u32 pd_idx,
344 if (new_iova) { 296 u64 virt_addr,
345 *offset = 0; 297 u64 phys_addr,
346 *iova = new_iova; 298 struct nvgpu_gmmu_attrs *attrs)
347 } 299{
348 } 300 struct gk20a *g = vm->mm->g;
349 } 301 u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
350 } 302 u32 pd_offset = pd_offset_from_index(l, pd_idx);
351 return 0; 303 u32 pte_w[2] = {0, 0};
304
305 if (phys_addr)
306 __update_pte(vm, pte_w, phys_addr, attrs);
307 else if (attrs->sparse)
308 __update_pte_sparse(pte_w);
309
310 pte_dbg(g, attrs,
311 "vm=%s "
312 "PTE: i=%-4u size=%-2u offs=%-4u | "
313 "GPU %#-12llx phys %#-12llx "
314 "pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c "
315 "ctag=0x%08x "
316 "[0x%08x, 0x%08x]",
317 vm->name,
318 pd_idx, l->entry_size, pd_offset,
319 virt_addr, phys_addr,
320 page_size >> 10,
321 nvgpu_gmmu_perm_str(attrs->rw_flag),
322 attrs->kind_v,
323 nvgpu_aperture_str(attrs->aperture),
324 attrs->valid ? 'V' : '-',
325 attrs->cacheable ? 'C' : '-',
326 attrs->sparse ? 'S' : '-',
327 attrs->priv ? 'P' : '-',
328 (u32)attrs->ctag / g->ops.fb.compression_page_size(g),
329 pte_w[1], pte_w[0]);
330
331 pd_write(g, pd, pd_offset + 0, pte_w[0]);
332 pd_write(g, pd, pd_offset + 1, pte_w[1]);
352} 333}
353 334
354static const struct gk20a_mmu_level gp10b_mm_levels[] = { 335static const struct gk20a_mmu_level gp10b_mm_levels[] = {
@@ -384,7 +365,7 @@ static const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g,
384static void gp10b_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block, 365static void gp10b_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
385 struct vm_gk20a *vm) 366 struct vm_gk20a *vm)
386{ 367{
387 u64 pdb_addr = gk20a_mem_get_base_addr(g, &vm->pdb.mem, 0); 368 u64 pdb_addr = nvgpu_mem_get_base_addr(g, &vm->pdb.mem, 0);
388 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); 369 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
389 u32 pdb_addr_hi = u64_hi32(pdb_addr); 370 u32 pdb_addr_hi = u64_hi32(pdb_addr);
390 371