summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gp10b/mm_gp10b.c')
-rw-r--r--drivers/gpu/nvgpu/gp10b/mm_gp10b.c446
1 files changed, 446 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
new file mode 100644
index 00000000..4b985af4
--- /dev/null
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -0,0 +1,446 @@
1/*
2 * GP10B MMU
3 *
4 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25#include <nvgpu/mm.h>
26#include <nvgpu/dma.h>
27#include <nvgpu/gmmu.h>
28
29#include "gk20a/gk20a.h"
30#include "gm20b/mm_gm20b.h"
31#include "mm_gp10b.h"
32#include "rpfb_gp10b.h"
33
34#include <nvgpu/hw/gp10b/hw_fb_gp10b.h>
35#include <nvgpu/hw/gp10b/hw_ram_gp10b.h>
36#include <nvgpu/hw/gp10b/hw_bus_gp10b.h>
37#include <nvgpu/hw/gp10b/hw_gmmu_gp10b.h>
38
39u32 gp10b_mm_get_default_big_page_size(void)
40{
41 return SZ_64K;
42}
43
44u32 gp10b_mm_get_iommu_bit(struct gk20a *g)
45{
46 return 36;
47}
48
49int gp10b_init_mm_setup_hw(struct gk20a *g)
50{
51 struct mm_gk20a *mm = &g->mm;
52 struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
53 int err = 0;
54
55 gk20a_dbg_fn("");
56
57 g->ops.fb.set_mmu_page_size(g);
58
59 gk20a_writel(g, fb_niso_flush_sysmem_addr_r(),
60 nvgpu_mem_get_addr(g, &g->mm.sysmem_flush) >> 8ULL);
61
62 g->ops.bus.bar1_bind(g, inst_block);
63
64 if (g->ops.mm.init_bar2_mm_hw_setup) {
65 err = g->ops.mm.init_bar2_mm_hw_setup(g);
66 if (err)
67 return err;
68 }
69
70 if (gk20a_mm_fb_flush(g) || gk20a_mm_fb_flush(g))
71 return -EBUSY;
72
73 err = gp10b_replayable_pagefault_buffer_init(g);
74
75 gk20a_dbg_fn("done");
76 return err;
77
78}
79
80int gb10b_init_bar2_vm(struct gk20a *g)
81{
82 int err;
83 struct mm_gk20a *mm = &g->mm;
84 struct nvgpu_mem *inst_block = &mm->bar2.inst_block;
85 u32 big_page_size = g->ops.mm.get_default_big_page_size();
86
87 /* BAR2 aperture size is 32MB */
88 mm->bar2.aperture_size = 32 << 20;
89 gk20a_dbg_info("bar2 vm size = 0x%x", mm->bar2.aperture_size);
90
91 mm->bar2.vm = nvgpu_vm_init(g, big_page_size, SZ_4K,
92 mm->bar2.aperture_size - SZ_4K,
93 mm->bar2.aperture_size, false, false, "bar2");
94 if (!mm->bar2.vm)
95 return -ENOMEM;
96
97 /* allocate instance mem for bar2 */
98 err = g->ops.mm.alloc_inst_block(g, inst_block);
99 if (err)
100 goto clean_up_va;
101
102 g->ops.mm.init_inst_block(inst_block, mm->bar2.vm, big_page_size);
103
104 return 0;
105
106clean_up_va:
107 nvgpu_vm_put(mm->bar2.vm);
108 return err;
109}
110
111int gb10b_init_bar2_mm_hw_setup(struct gk20a *g)
112{
113 struct mm_gk20a *mm = &g->mm;
114 struct nvgpu_mem *inst_block = &mm->bar2.inst_block;
115 u64 inst_pa = nvgpu_inst_block_addr(g, inst_block);
116
117 gk20a_dbg_fn("");
118
119 g->ops.fb.set_mmu_page_size(g);
120
121 inst_pa = (u32)(inst_pa >> bus_bar2_block_ptr_shift_v());
122 gk20a_dbg_info("bar2 inst block ptr: 0x%08x", (u32)inst_pa);
123
124 gk20a_writel(g, bus_bar2_block_r(),
125 nvgpu_aperture_mask(g, inst_block,
126 bus_bar2_block_target_sys_mem_ncoh_f(),
127 bus_bar2_block_target_vid_mem_f()) |
128 bus_bar2_block_mode_virtual_f() |
129 bus_bar2_block_ptr_f(inst_pa));
130
131 gk20a_dbg_fn("done");
132 return 0;
133}
134
135static void update_gmmu_pde3_locked(struct vm_gk20a *vm,
136 const struct gk20a_mmu_level *l,
137 struct nvgpu_gmmu_pd *pd,
138 u32 pd_idx,
139 u64 virt_addr,
140 u64 phys_addr,
141 struct nvgpu_gmmu_attrs *attrs)
142{
143 struct gk20a *g = gk20a_from_vm(vm);
144 u32 pd_offset = pd_offset_from_index(l, pd_idx);
145 u32 pde_v[2] = {0, 0};
146
147 phys_addr >>= gmmu_new_pde_address_shift_v();
148
149 pde_v[0] |= nvgpu_aperture_mask(g, pd->mem,
150 gmmu_new_pde_aperture_sys_mem_ncoh_f(),
151 gmmu_new_pde_aperture_video_memory_f());
152 pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr));
153 pde_v[0] |= gmmu_new_pde_vol_true_f();
154 pde_v[1] |= phys_addr >> 24;
155
156 pd_write(g, pd, pd_offset + 0, pde_v[0]);
157 pd_write(g, pd, pd_offset + 1, pde_v[1]);
158
159 pte_dbg(g, attrs,
160 "PDE: i=%-4u size=%-2u offs=%-4u pgsz: -- | "
161 "GPU %#-12llx phys %#-12llx "
162 "[0x%08x, 0x%08x]",
163 pd_idx, l->entry_size, pd_offset,
164 virt_addr, phys_addr,
165 pde_v[1], pde_v[0]);
166}
167
168static void update_gmmu_pde0_locked(struct vm_gk20a *vm,
169 const struct gk20a_mmu_level *l,
170 struct nvgpu_gmmu_pd *pd,
171 u32 pd_idx,
172 u64 virt_addr,
173 u64 phys_addr,
174 struct nvgpu_gmmu_attrs *attrs)
175{
176 struct gk20a *g = gk20a_from_vm(vm);
177 bool small_valid, big_valid;
178 u32 small_addr = 0, big_addr = 0;
179 u32 pd_offset = pd_offset_from_index(l, pd_idx);
180 u32 pde_v[4] = {0, 0, 0, 0};
181
182 small_valid = attrs->pgsz == gmmu_page_size_small;
183 big_valid = attrs->pgsz == gmmu_page_size_big;
184
185 if (small_valid)
186 small_addr = phys_addr >> gmmu_new_dual_pde_address_shift_v();
187
188 if (big_valid)
189 big_addr = phys_addr >> gmmu_new_dual_pde_address_big_shift_v();
190
191 if (small_valid) {
192 pde_v[2] |=
193 gmmu_new_dual_pde_address_small_sys_f(small_addr);
194 pde_v[2] |= nvgpu_aperture_mask(g, pd->mem,
195 gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(),
196 gmmu_new_dual_pde_aperture_small_video_memory_f());
197 pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f();
198 pde_v[3] |= small_addr >> 24;
199 }
200
201 if (big_valid) {
202 pde_v[0] |= gmmu_new_dual_pde_address_big_sys_f(big_addr);
203 pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f();
204 pde_v[0] |= nvgpu_aperture_mask(g, pd->mem,
205 gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(),
206 gmmu_new_dual_pde_aperture_big_video_memory_f());
207 pde_v[1] |= big_addr >> 28;
208 }
209
210 pd_write(g, pd, pd_offset + 0, pde_v[0]);
211 pd_write(g, pd, pd_offset + 1, pde_v[1]);
212 pd_write(g, pd, pd_offset + 2, pde_v[2]);
213 pd_write(g, pd, pd_offset + 3, pde_v[3]);
214
215 pte_dbg(g, attrs,
216 "PDE: i=%-4u size=%-2u offs=%-4u pgsz: %c%c | "
217 "GPU %#-12llx phys %#-12llx "
218 "[0x%08x, 0x%08x, 0x%08x, 0x%08x]",
219 pd_idx, l->entry_size, pd_offset,
220 small_valid ? 'S' : '-',
221 big_valid ? 'B' : '-',
222 virt_addr, phys_addr,
223 pde_v[3], pde_v[2], pde_v[1], pde_v[0]);
224}
225
226static void __update_pte(struct vm_gk20a *vm,
227 u32 *pte_w,
228 u64 phys_addr,
229 struct nvgpu_gmmu_attrs *attrs)
230{
231 struct gk20a *g = gk20a_from_vm(vm);
232 u64 ctag_granularity = g->ops.fb.compression_page_size(g);
233 u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
234 u32 pte_valid = attrs->valid ?
235 gmmu_new_pte_valid_true_f() :
236 gmmu_new_pte_valid_false_f();
237 u32 phys_shifted = phys_addr >> gmmu_new_pte_address_shift_v();
238 u32 pte_addr = attrs->aperture == APERTURE_SYSMEM ?
239 gmmu_new_pte_address_sys_f(phys_shifted) :
240 gmmu_new_pte_address_vid_f(phys_shifted);
241 u32 pte_tgt = __nvgpu_aperture_mask(g,
242 attrs->aperture,
243 attrs->coherent ?
244 gmmu_new_pte_aperture_sys_mem_coh_f() :
245 gmmu_new_pte_aperture_sys_mem_ncoh_f(),
246 gmmu_new_pte_aperture_video_memory_f());
247
248 pte_w[0] = pte_valid | pte_addr | pte_tgt;
249
250 if (attrs->priv)
251 pte_w[0] |= gmmu_new_pte_privilege_true_f();
252
253 pte_w[1] = phys_addr >> (24 + gmmu_new_pte_address_shift_v()) |
254 gmmu_new_pte_kind_f(attrs->kind_v) |
255 gmmu_new_pte_comptagline_f((u32)(attrs->ctag /
256 ctag_granularity));
257
258 if (attrs->rw_flag == gk20a_mem_flag_read_only)
259 pte_w[0] |= gmmu_new_pte_read_only_true_f();
260
261 if (!attrs->valid && !attrs->cacheable)
262 pte_w[0] |= gmmu_new_pte_read_only_true_f();
263 else if (!attrs->cacheable)
264 pte_w[0] |= gmmu_new_pte_vol_true_f();
265
266 if (attrs->ctag)
267 attrs->ctag += page_size;
268
269}
270
271static void __update_pte_sparse(u32 *pte_w)
272{
273 pte_w[0] = gmmu_new_pte_valid_false_f();
274 pte_w[0] |= gmmu_new_pte_vol_true_f();
275}
276
277static void update_gmmu_pte_locked(struct vm_gk20a *vm,
278 const struct gk20a_mmu_level *l,
279 struct nvgpu_gmmu_pd *pd,
280 u32 pd_idx,
281 u64 virt_addr,
282 u64 phys_addr,
283 struct nvgpu_gmmu_attrs *attrs)
284{
285 struct gk20a *g = vm->mm->g;
286 u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
287 u32 pd_offset = pd_offset_from_index(l, pd_idx);
288 u32 pte_w[2] = {0, 0};
289
290 if (phys_addr)
291 __update_pte(vm, pte_w, phys_addr, attrs);
292 else if (attrs->sparse)
293 __update_pte_sparse(pte_w);
294
295 pte_dbg(g, attrs,
296 "vm=%s "
297 "PTE: i=%-4u size=%-2u | "
298 "GPU %#-12llx phys %#-12llx "
299 "pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c%c "
300 "ctag=0x%08x "
301 "[0x%08x, 0x%08x]",
302 vm->name,
303 pd_idx, l->entry_size,
304 virt_addr, phys_addr,
305 page_size >> 10,
306 nvgpu_gmmu_perm_str(attrs->rw_flag),
307 attrs->kind_v,
308 nvgpu_aperture_str(attrs->aperture),
309 attrs->cacheable ? 'C' : 'v',
310 attrs->sparse ? 'S' : '-',
311 attrs->priv ? 'P' : '-',
312 attrs->coherent ? 'c' : '-',
313 attrs->valid ? 'V' : '-',
314 (u32)attrs->ctag / g->ops.fb.compression_page_size(g),
315 pte_w[1], pte_w[0]);
316
317 pd_write(g, pd, pd_offset + 0, pte_w[0]);
318 pd_write(g, pd, pd_offset + 1, pte_w[1]);
319}
320
321#define GP10B_PDE0_ENTRY_SIZE 16
322
323/*
324 * Calculate the pgsz of the pde level
325 * Pascal+ implements a 5 level page table structure with only the last
326 * level having a different number of entries depending on whether it holds
327 * big pages or small pages.
328 */
329static enum gmmu_pgsz_gk20a gp10b_get_pde0_pgsz(struct gk20a *g,
330 struct nvgpu_gmmu_pd *pd, u32 pd_idx)
331{
332 u32 pde_base = pd->mem_offs / sizeof(u32);
333 u32 pde_v[GP10B_PDE0_ENTRY_SIZE >> 2];
334 u32 i;
335 enum gmmu_pgsz_gk20a pgsz = gmmu_nr_page_sizes;
336
337 if (!pd->mem)
338 return pgsz;
339
340 nvgpu_mem_begin(g, pd->mem);
341 for (i = 0; i < GP10B_PDE0_ENTRY_SIZE >> 2; i++)
342 pde_v[i] = nvgpu_mem_rd32(g, pd->mem, pde_base + i);
343 nvgpu_mem_end(g, pd->mem);
344
345 /*
346 * Check if the aperture AND address are set
347 */
348 if (pde_v[2] & (gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f() ||
349 gmmu_new_dual_pde_aperture_small_video_memory_f())) {
350 u64 addr = ((u64) (pde_v[2] &
351 gmmu_new_dual_pde_address_small_sys_f(~0)) <<
352 gmmu_new_dual_pde_address_shift_v()) |
353 ((u64) pde_v[3] << 32);
354
355 if (addr)
356 pgsz = gmmu_page_size_small;
357 }
358
359 if (pde_v[0] & (gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f() |
360 gmmu_new_dual_pde_aperture_big_video_memory_f())) {
361 u64 addr = ((u64) (pde_v[0] &
362 gmmu_new_dual_pde_address_big_sys_f(~0)) <<
363 gmmu_new_dual_pde_address_big_shift_v()) |
364 ((u64) pde_v[1] << 32);
365 if (addr) {
366 /*
367 * If small is set that means that somehow MM allowed
368 * both small and big to be set, the PDE is not valid
369 * and may be corrupted
370 */
371 if (pgsz == gmmu_page_size_small) {
372 nvgpu_err(g,
373 "both small and big apertures enabled");
374 return gmmu_nr_page_sizes;
375 }
376 }
377 pgsz = gmmu_page_size_big;
378 }
379
380 return pgsz;
381}
382
383static const struct gk20a_mmu_level gp10b_mm_levels[] = {
384 {.hi_bit = {48, 48},
385 .lo_bit = {47, 47},
386 .update_entry = update_gmmu_pde3_locked,
387 .entry_size = 8,
388 .get_pgsz = gk20a_get_pde_pgsz},
389 {.hi_bit = {46, 46},
390 .lo_bit = {38, 38},
391 .update_entry = update_gmmu_pde3_locked,
392 .entry_size = 8,
393 .get_pgsz = gk20a_get_pde_pgsz},
394 {.hi_bit = {37, 37},
395 .lo_bit = {29, 29},
396 .update_entry = update_gmmu_pde3_locked,
397 .entry_size = 8,
398 .get_pgsz = gk20a_get_pde_pgsz},
399 {.hi_bit = {28, 28},
400 .lo_bit = {21, 21},
401 .update_entry = update_gmmu_pde0_locked,
402 .entry_size = GP10B_PDE0_ENTRY_SIZE,
403 .get_pgsz = gp10b_get_pde0_pgsz},
404 {.hi_bit = {20, 20},
405 .lo_bit = {12, 16},
406 .update_entry = update_gmmu_pte_locked,
407 .entry_size = 8,
408 .get_pgsz = gk20a_get_pte_pgsz},
409 {.update_entry = NULL}
410};
411
412const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g,
413 u32 big_page_size)
414{
415 return gp10b_mm_levels;
416}
417
418void gp10b_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
419 struct vm_gk20a *vm)
420{
421 u64 pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem);
422 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
423 u32 pdb_addr_hi = u64_hi32(pdb_addr);
424
425 gk20a_dbg_info("pde pa=0x%llx", pdb_addr);
426
427 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
428 nvgpu_aperture_mask(g, vm->pdb.mem,
429 ram_in_page_dir_base_target_sys_mem_ncoh_f(),
430 ram_in_page_dir_base_target_vid_mem_f()) |
431 ram_in_page_dir_base_vol_true_f() |
432 ram_in_page_dir_base_lo_f(pdb_addr_lo) |
433 1 << 10);
434
435 nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(),
436 ram_in_page_dir_base_hi_f(pdb_addr_hi));
437}
438
439void gp10b_remove_bar2_vm(struct gk20a *g)
440{
441 struct mm_gk20a *mm = &g->mm;
442
443 gp10b_replayable_pagefault_buffer_deinit(g);
444 nvgpu_free_inst_block(g, &mm->bar2.inst_block);
445 nvgpu_vm_put(mm->bar2.vm);
446}