diff options
Diffstat (limited to 'drivers/gpu')
25 files changed, 724 insertions, 664 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu index ce4f67b0..e689aa7f 100644 --- a/drivers/gpu/nvgpu/Makefile.nvgpu +++ b/drivers/gpu/nvgpu/Makefile.nvgpu | |||
@@ -65,6 +65,7 @@ nvgpu-y := \ | |||
65 | common/mm/vm_area.o \ | 65 | common/mm/vm_area.o \ |
66 | common/mm/nvgpu_mem.o \ | 66 | common/mm/nvgpu_mem.o \ |
67 | common/mm/comptags.o \ | 67 | common/mm/comptags.o \ |
68 | common/mm/mm.o \ | ||
68 | common/bus.o \ | 69 | common/bus.o \ |
69 | common/enabled.o \ | 70 | common/enabled.o \ |
70 | common/pramin.o \ | 71 | common/pramin.o \ |
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c index 56edc11b..c8831a97 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c | |||
@@ -1372,7 +1372,7 @@ static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset) | |||
1372 | err = g->ops.dbg_session_ops.perfbuffer_disable(g); | 1372 | err = g->ops.dbg_session_ops.perfbuffer_disable(g); |
1373 | 1373 | ||
1374 | nvgpu_vm_unmap_buffer(vm, offset, NULL); | 1374 | nvgpu_vm_unmap_buffer(vm, offset, NULL); |
1375 | gk20a_free_inst_block(g, &mm->perfbuf.inst_block); | 1375 | nvgpu_free_inst_block(g, &mm->perfbuf.inst_block); |
1376 | nvgpu_vm_put(vm); | 1376 | nvgpu_vm_put(vm); |
1377 | 1377 | ||
1378 | g->perfbuf.owner = NULL; | 1378 | g->perfbuf.owner = NULL; |
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c index c6f10a69..a2546e9d 100644 --- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c | |||
@@ -25,8 +25,8 @@ | |||
25 | #include <nvgpu/bug.h> | 25 | #include <nvgpu/bug.h> |
26 | #include <nvgpu/log2.h> | 26 | #include <nvgpu/log2.h> |
27 | #include <nvgpu/barrier.h> | 27 | #include <nvgpu/barrier.h> |
28 | 28 | #include <nvgpu/mm.h> | |
29 | #include "gk20a/mm_gk20a.h" | 29 | #include <nvgpu/vm.h> |
30 | 30 | ||
31 | #include "buddy_allocator_priv.h" | 31 | #include "buddy_allocator_priv.h" |
32 | 32 | ||
diff --git a/drivers/gpu/nvgpu/common/mm/mm.c b/drivers/gpu/nvgpu/common/mm/mm.c new file mode 100644 index 00000000..1027ed28 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/mm.c | |||
@@ -0,0 +1,426 @@ | |||
1 | /* | ||
2 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
3 | * copy of this software and associated documentation files (the "Software"), | ||
4 | * to deal in the Software without restriction, including without limitation | ||
5 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
6 | * and/or sell copies of the Software, and to permit persons to whom the | ||
7 | * Software is furnished to do so, subject to the following conditions: | ||
8 | * | ||
9 | * The above copyright notice and this permission notice shall be included in | ||
10 | * all copies or substantial portions of the Software. | ||
11 | * | ||
12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
15 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
17 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
18 | * DEALINGS IN THE SOFTWARE. | ||
19 | */ | ||
20 | |||
21 | #include <nvgpu/mm.h> | ||
22 | #include <nvgpu/vm.h> | ||
23 | #include <nvgpu/dma.h> | ||
24 | #include <nvgpu/vm_area.h> | ||
25 | #include <nvgpu/gmmu.h> | ||
26 | #include <nvgpu/vidmem.h> | ||
27 | #include <nvgpu/semaphore.h> | ||
28 | #include <nvgpu/pramin.h> | ||
29 | #include <nvgpu/enabled.h> | ||
30 | |||
31 | #include "gk20a/gk20a.h" | ||
32 | |||
33 | /* | ||
34 | * Attempt to find a reserved memory area to determine PTE size for the passed | ||
35 | * mapping. If no reserved area can be found use small pages. | ||
36 | */ | ||
37 | enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm, | ||
38 | u64 base, u64 size) | ||
39 | { | ||
40 | struct nvgpu_vm_area *vm_area; | ||
41 | |||
42 | vm_area = nvgpu_vm_area_find(vm, base); | ||
43 | if (!vm_area) | ||
44 | return gmmu_page_size_small; | ||
45 | |||
46 | return vm_area->pgsz_idx; | ||
47 | } | ||
48 | |||
49 | /* | ||
50 | * This is for when the address space does not support unified address spaces. | ||
51 | */ | ||
52 | static enum gmmu_pgsz_gk20a __get_pte_size_split_addr(struct vm_gk20a *vm, | ||
53 | u64 base, u64 size) | ||
54 | { | ||
55 | if (!base) { | ||
56 | if (size >= vm->gmmu_page_sizes[gmmu_page_size_big]) | ||
57 | return gmmu_page_size_big; | ||
58 | return gmmu_page_size_small; | ||
59 | } else { | ||
60 | if (base < __nv_gmmu_va_small_page_limit()) | ||
61 | return gmmu_page_size_small; | ||
62 | else | ||
63 | return gmmu_page_size_big; | ||
64 | } | ||
65 | } | ||
66 | |||
67 | /* | ||
68 | * This determines the PTE size for a given alloc. Used by both the GVA space | ||
69 | * allocator and the mm core code so that agreement can be reached on how to | ||
70 | * map allocations. | ||
71 | * | ||
72 | * The page size of a buffer is this: | ||
73 | * | ||
74 | * o If the VM doesn't support large pages then obviously small pages | ||
75 | * must be used. | ||
76 | * o If the base address is non-zero (fixed address map): | ||
77 | * - Attempt to find a reserved memory area and use the page size | ||
78 | * based on that. | ||
79 | * - If no reserved page size is available, default to small pages. | ||
80 | * o If the base is zero: | ||
81 | * - If the size is larger than or equal to the big page size, use big | ||
82 | * pages. | ||
83 | * - Otherwise use small pages. | ||
84 | */ | ||
85 | enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size) | ||
86 | { | ||
87 | struct gk20a *g = gk20a_from_vm(vm); | ||
88 | |||
89 | if (!vm->big_pages) | ||
90 | return gmmu_page_size_small; | ||
91 | |||
92 | if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES)) | ||
93 | return __get_pte_size_split_addr(vm, base, size); | ||
94 | |||
95 | if (base) | ||
96 | return __get_pte_size_fixed_map(vm, base, size); | ||
97 | |||
98 | if (size >= vm->gmmu_page_sizes[gmmu_page_size_big]) | ||
99 | return gmmu_page_size_big; | ||
100 | return gmmu_page_size_small; | ||
101 | } | ||
102 | |||
103 | int nvgpu_mm_suspend(struct gk20a *g) | ||
104 | { | ||
105 | nvgpu_info(g, "MM suspend running..."); | ||
106 | |||
107 | nvgpu_vidmem_thread_pause_sync(&g->mm); | ||
108 | |||
109 | g->ops.mm.cbc_clean(g); | ||
110 | g->ops.mm.l2_flush(g, false); | ||
111 | |||
112 | nvgpu_info(g, "MM suspend done!"); | ||
113 | |||
114 | return 0; | ||
115 | } | ||
116 | |||
117 | u64 nvgpu_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block) | ||
118 | { | ||
119 | if (g->mm.has_physical_mode) | ||
120 | return nvgpu_mem_get_phys_addr(g, inst_block); | ||
121 | else | ||
122 | return nvgpu_mem_get_addr(g, inst_block); | ||
123 | } | ||
124 | |||
125 | void nvgpu_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block) | ||
126 | { | ||
127 | if (nvgpu_mem_is_valid(inst_block)) | ||
128 | nvgpu_dma_free(g, inst_block); | ||
129 | } | ||
130 | |||
131 | static int nvgpu_alloc_sysmem_flush(struct gk20a *g) | ||
132 | { | ||
133 | return nvgpu_dma_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush); | ||
134 | } | ||
135 | |||
136 | static void nvgpu_remove_mm_ce_support(struct mm_gk20a *mm) | ||
137 | { | ||
138 | struct gk20a *g = gk20a_from_mm(mm); | ||
139 | |||
140 | if (mm->vidmem.ce_ctx_id != (u32)~0) | ||
141 | gk20a_ce_delete_context_priv(g, mm->vidmem.ce_ctx_id); | ||
142 | |||
143 | mm->vidmem.ce_ctx_id = (u32)~0; | ||
144 | |||
145 | nvgpu_vm_put(mm->ce.vm); | ||
146 | } | ||
147 | |||
148 | static void nvgpu_remove_mm_support(struct mm_gk20a *mm) | ||
149 | { | ||
150 | struct gk20a *g = gk20a_from_mm(mm); | ||
151 | |||
152 | if (g->ops.mm.fault_info_mem_destroy) | ||
153 | g->ops.mm.fault_info_mem_destroy(g); | ||
154 | |||
155 | if (g->ops.mm.remove_bar2_vm) | ||
156 | g->ops.mm.remove_bar2_vm(g); | ||
157 | |||
158 | if (g->ops.mm.is_bar1_supported(g)) { | ||
159 | nvgpu_free_inst_block(g, &mm->bar1.inst_block); | ||
160 | nvgpu_vm_put(mm->bar1.vm); | ||
161 | } | ||
162 | |||
163 | nvgpu_free_inst_block(g, &mm->pmu.inst_block); | ||
164 | nvgpu_free_inst_block(g, &mm->hwpm.inst_block); | ||
165 | nvgpu_vm_put(mm->pmu.vm); | ||
166 | nvgpu_vm_put(mm->cde.vm); | ||
167 | |||
168 | nvgpu_semaphore_sea_destroy(g); | ||
169 | nvgpu_vidmem_destroy(g); | ||
170 | nvgpu_pd_cache_fini(g); | ||
171 | } | ||
172 | |||
173 | /* pmu vm, share channel_vm interfaces */ | ||
174 | static int nvgpu_init_system_vm(struct mm_gk20a *mm) | ||
175 | { | ||
176 | int err; | ||
177 | struct gk20a *g = gk20a_from_mm(mm); | ||
178 | struct nvgpu_mem *inst_block = &mm->pmu.inst_block; | ||
179 | u32 big_page_size = g->ops.mm.get_default_big_page_size(); | ||
180 | u32 low_hole, aperture_size; | ||
181 | |||
182 | /* | ||
183 | * No user region - so we will pass that as zero sized. | ||
184 | */ | ||
185 | low_hole = SZ_4K * 16; | ||
186 | aperture_size = GK20A_PMU_VA_SIZE * 2; | ||
187 | |||
188 | mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; | ||
189 | nvgpu_info(g, "pmu vm size = 0x%x", mm->pmu.aperture_size); | ||
190 | |||
191 | mm->pmu.vm = nvgpu_vm_init(g, big_page_size, | ||
192 | low_hole, | ||
193 | aperture_size - low_hole, | ||
194 | aperture_size, | ||
195 | true, | ||
196 | false, | ||
197 | "system"); | ||
198 | if (!mm->pmu.vm) | ||
199 | return -ENOMEM; | ||
200 | |||
201 | err = g->ops.mm.alloc_inst_block(g, inst_block); | ||
202 | if (err) | ||
203 | goto clean_up_vm; | ||
204 | g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, big_page_size); | ||
205 | |||
206 | return 0; | ||
207 | |||
208 | clean_up_vm: | ||
209 | nvgpu_vm_put(mm->pmu.vm); | ||
210 | return err; | ||
211 | } | ||
212 | |||
213 | static int nvgpu_init_hwpm(struct mm_gk20a *mm) | ||
214 | { | ||
215 | int err; | ||
216 | struct gk20a *g = gk20a_from_mm(mm); | ||
217 | struct nvgpu_mem *inst_block = &mm->hwpm.inst_block; | ||
218 | |||
219 | err = g->ops.mm.alloc_inst_block(g, inst_block); | ||
220 | if (err) | ||
221 | return err; | ||
222 | g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, 0); | ||
223 | |||
224 | return 0; | ||
225 | } | ||
226 | |||
227 | static int nvgpu_init_cde_vm(struct mm_gk20a *mm) | ||
228 | { | ||
229 | struct gk20a *g = gk20a_from_mm(mm); | ||
230 | u32 big_page_size = g->ops.mm.get_default_big_page_size(); | ||
231 | |||
232 | mm->cde.vm = nvgpu_vm_init(g, big_page_size, | ||
233 | big_page_size << 10, | ||
234 | NV_MM_DEFAULT_KERNEL_SIZE, | ||
235 | NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE, | ||
236 | false, false, "cde"); | ||
237 | if (!mm->cde.vm) | ||
238 | return -ENOMEM; | ||
239 | return 0; | ||
240 | } | ||
241 | |||
242 | static int nvgpu_init_ce_vm(struct mm_gk20a *mm) | ||
243 | { | ||
244 | struct gk20a *g = gk20a_from_mm(mm); | ||
245 | u32 big_page_size = g->ops.mm.get_default_big_page_size(); | ||
246 | |||
247 | mm->ce.vm = nvgpu_vm_init(g, big_page_size, | ||
248 | big_page_size << 10, | ||
249 | NV_MM_DEFAULT_KERNEL_SIZE, | ||
250 | NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE, | ||
251 | false, false, "ce"); | ||
252 | if (!mm->ce.vm) | ||
253 | return -ENOMEM; | ||
254 | return 0; | ||
255 | } | ||
256 | |||
257 | void nvgpu_init_mm_ce_context(struct gk20a *g) | ||
258 | { | ||
259 | #if defined(CONFIG_GK20A_VIDMEM) | ||
260 | if (g->mm.vidmem.size && (g->mm.vidmem.ce_ctx_id == (u32)~0)) { | ||
261 | g->mm.vidmem.ce_ctx_id = | ||
262 | gk20a_ce_create_context_with_cb(g, | ||
263 | gk20a_fifo_get_fast_ce_runlist_id(g), | ||
264 | -1, | ||
265 | -1, | ||
266 | -1, | ||
267 | NULL); | ||
268 | |||
269 | if (g->mm.vidmem.ce_ctx_id == (u32)~0) | ||
270 | nvgpu_err(g, | ||
271 | "Failed to allocate CE context for vidmem page clearing support"); | ||
272 | } | ||
273 | #endif | ||
274 | } | ||
275 | |||
276 | static int nvgpu_init_mm_reset_enable_hw(struct gk20a *g) | ||
277 | { | ||
278 | if (g->ops.fb.reset) | ||
279 | g->ops.fb.reset(g); | ||
280 | |||
281 | if (g->ops.clock_gating.slcg_fb_load_gating_prod) | ||
282 | g->ops.clock_gating.slcg_fb_load_gating_prod(g, | ||
283 | g->slcg_enabled); | ||
284 | if (g->ops.clock_gating.slcg_ltc_load_gating_prod) | ||
285 | g->ops.clock_gating.slcg_ltc_load_gating_prod(g, | ||
286 | g->slcg_enabled); | ||
287 | if (g->ops.clock_gating.blcg_fb_load_gating_prod) | ||
288 | g->ops.clock_gating.blcg_fb_load_gating_prod(g, | ||
289 | g->blcg_enabled); | ||
290 | if (g->ops.clock_gating.blcg_ltc_load_gating_prod) | ||
291 | g->ops.clock_gating.blcg_ltc_load_gating_prod(g, | ||
292 | g->blcg_enabled); | ||
293 | |||
294 | if (g->ops.fb.init_fs_state) | ||
295 | g->ops.fb.init_fs_state(g); | ||
296 | |||
297 | return 0; | ||
298 | } | ||
299 | |||
300 | static int nvgpu_init_bar1_vm(struct mm_gk20a *mm) | ||
301 | { | ||
302 | int err; | ||
303 | struct gk20a *g = gk20a_from_mm(mm); | ||
304 | struct nvgpu_mem *inst_block = &mm->bar1.inst_block; | ||
305 | u32 big_page_size = g->ops.mm.get_default_big_page_size(); | ||
306 | |||
307 | mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; | ||
308 | nvgpu_info(g, "bar1 vm size = 0x%x", mm->bar1.aperture_size); | ||
309 | mm->bar1.vm = nvgpu_vm_init(g, | ||
310 | big_page_size, | ||
311 | SZ_4K, | ||
312 | mm->bar1.aperture_size - SZ_4K, | ||
313 | mm->bar1.aperture_size, | ||
314 | true, false, | ||
315 | "bar1"); | ||
316 | if (!mm->bar1.vm) | ||
317 | return -ENOMEM; | ||
318 | |||
319 | err = g->ops.mm.alloc_inst_block(g, inst_block); | ||
320 | if (err) | ||
321 | goto clean_up_vm; | ||
322 | g->ops.mm.init_inst_block(inst_block, mm->bar1.vm, big_page_size); | ||
323 | |||
324 | return 0; | ||
325 | |||
326 | clean_up_vm: | ||
327 | nvgpu_vm_put(mm->bar1.vm); | ||
328 | return err; | ||
329 | } | ||
330 | |||
331 | static int nvgpu_init_mm_setup_sw(struct gk20a *g) | ||
332 | { | ||
333 | struct mm_gk20a *mm = &g->mm; | ||
334 | int err; | ||
335 | |||
336 | if (mm->sw_ready) { | ||
337 | nvgpu_info(g, "skip init"); | ||
338 | return 0; | ||
339 | } | ||
340 | |||
341 | mm->g = g; | ||
342 | nvgpu_mutex_init(&mm->l2_op_lock); | ||
343 | |||
344 | /*TBD: make channel vm size configurable */ | ||
345 | mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE - | ||
346 | NV_MM_DEFAULT_KERNEL_SIZE; | ||
347 | mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE; | ||
348 | |||
349 | nvgpu_info(g, "channel vm size: user %dMB kernel %dMB", | ||
350 | (int)(mm->channel.user_size >> 20), | ||
351 | (int)(mm->channel.kernel_size >> 20)); | ||
352 | |||
353 | nvgpu_init_pramin(mm); | ||
354 | |||
355 | mm->vidmem.ce_ctx_id = (u32)~0; | ||
356 | |||
357 | err = nvgpu_vidmem_init(mm); | ||
358 | if (err) | ||
359 | return err; | ||
360 | |||
361 | /* | ||
362 | * this requires fixed allocations in vidmem which must be | ||
363 | * allocated before all other buffers | ||
364 | */ | ||
365 | if (g->ops.pmu.alloc_blob_space | ||
366 | && !nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) { | ||
367 | err = g->ops.pmu.alloc_blob_space(g, 0, &g->acr.ucode_blob); | ||
368 | if (err) | ||
369 | return err; | ||
370 | } | ||
371 | |||
372 | err = nvgpu_alloc_sysmem_flush(g); | ||
373 | if (err) | ||
374 | return err; | ||
375 | |||
376 | if (g->ops.mm.is_bar1_supported(g)) { | ||
377 | err = nvgpu_init_bar1_vm(mm); | ||
378 | if (err) | ||
379 | return err; | ||
380 | } | ||
381 | if (g->ops.mm.init_bar2_vm) { | ||
382 | err = g->ops.mm.init_bar2_vm(g); | ||
383 | if (err) | ||
384 | return err; | ||
385 | } | ||
386 | err = nvgpu_init_system_vm(mm); | ||
387 | if (err) | ||
388 | return err; | ||
389 | |||
390 | err = nvgpu_init_hwpm(mm); | ||
391 | if (err) | ||
392 | return err; | ||
393 | |||
394 | err = nvgpu_init_cde_vm(mm); | ||
395 | if (err) | ||
396 | return err; | ||
397 | |||
398 | err = nvgpu_init_ce_vm(mm); | ||
399 | if (err) | ||
400 | return err; | ||
401 | |||
402 | mm->remove_support = nvgpu_remove_mm_support; | ||
403 | mm->remove_ce_support = nvgpu_remove_mm_ce_support; | ||
404 | |||
405 | mm->sw_ready = true; | ||
406 | |||
407 | return 0; | ||
408 | } | ||
409 | |||
410 | int nvgpu_init_mm_support(struct gk20a *g) | ||
411 | { | ||
412 | u32 err; | ||
413 | |||
414 | err = nvgpu_init_mm_reset_enable_hw(g); | ||
415 | if (err) | ||
416 | return err; | ||
417 | |||
418 | err = nvgpu_init_mm_setup_sw(g); | ||
419 | if (err) | ||
420 | return err; | ||
421 | |||
422 | if (g->ops.mm.init_mm_setup_hw) | ||
423 | err = g->ops.mm.init_mm_setup_hw(g); | ||
424 | |||
425 | return err; | ||
426 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c index 938c4b00..9b031bbf 100644 --- a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <nvgpu/log.h> | 24 | #include <nvgpu/log.h> |
25 | #include <nvgpu/soc.h> | 25 | #include <nvgpu/soc.h> |
26 | #include <nvgpu/bus.h> | 26 | #include <nvgpu/bus.h> |
27 | #include <nvgpu/mm.h> | ||
27 | 28 | ||
28 | #include "gk20a.h" | 29 | #include "gk20a.h" |
29 | #include "bus_gk20a.h" | 30 | #include "bus_gk20a.h" |
@@ -137,8 +138,8 @@ int gk20a_read_ptimer(struct gk20a *g, u64 *value) | |||
137 | 138 | ||
138 | int gk20a_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst) | 139 | int gk20a_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst) |
139 | { | 140 | { |
140 | u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst); | 141 | u64 iova = nvgpu_inst_block_addr(g, bar1_inst); |
141 | u32 ptr_v = (u32)(iova >> bar1_instance_block_shift_gk20a()); | 142 | u32 ptr_v = (u32)(iova >> bus_bar1_block_ptr_shift_v()); |
142 | 143 | ||
143 | gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v); | 144 | gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v); |
144 | 145 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c index 725ae278..e3896981 100644 --- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <nvgpu/kmem.h> | 29 | #include <nvgpu/kmem.h> |
30 | #include <nvgpu/lock.h> | 30 | #include <nvgpu/lock.h> |
31 | #include <nvgpu/dma.h> | 31 | #include <nvgpu/dma.h> |
32 | #include <nvgpu/mm.h> | ||
32 | 33 | ||
33 | #include "gk20a.h" | 34 | #include "gk20a.h" |
34 | #include "css_gr_gk20a.h" | 35 | #include "css_gr_gk20a.h" |
@@ -183,7 +184,7 @@ int css_hw_enable_snapshot(struct channel_gk20a *ch, | |||
183 | gk20a_writel(g, perf_pmasys_outsize_r(), snapshot_size); | 184 | gk20a_writel(g, perf_pmasys_outsize_r(), snapshot_size); |
184 | 185 | ||
185 | /* this field is aligned to 4K */ | 186 | /* this field is aligned to 4K */ |
186 | inst_pa_page = gk20a_mm_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12; | 187 | inst_pa_page = nvgpu_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12; |
187 | 188 | ||
188 | /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK | 189 | /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK |
189 | * should be written last */ | 190 | * should be written last */ |
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 8c39ecb7..802ccd76 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <nvgpu/log.h> | 26 | #include <nvgpu/log.h> |
27 | #include <nvgpu/vm.h> | 27 | #include <nvgpu/vm.h> |
28 | #include <nvgpu/atomic.h> | 28 | #include <nvgpu/atomic.h> |
29 | #include <nvgpu/mm.h> | ||
29 | 30 | ||
30 | #include "gk20a.h" | 31 | #include "gk20a.h" |
31 | #include "gk20a/platform_gk20a.h" | 32 | #include "gk20a/platform_gk20a.h" |
@@ -305,7 +306,7 @@ int gk20a_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size) | |||
305 | return err; | 306 | return err; |
306 | } | 307 | } |
307 | 308 | ||
308 | err = gk20a_alloc_inst_block(g, &mm->perfbuf.inst_block); | 309 | err = g->ops.mm.alloc_inst_block(g, &mm->perfbuf.inst_block); |
309 | if (err) | 310 | if (err) |
310 | return err; | 311 | return err; |
311 | 312 | ||
@@ -322,8 +323,7 @@ int gk20a_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size) | |||
322 | gk20a_writel(g, perf_pmasys_outsize_r(), size); | 323 | gk20a_writel(g, perf_pmasys_outsize_r(), size); |
323 | 324 | ||
324 | /* this field is aligned to 4K */ | 325 | /* this field is aligned to 4K */ |
325 | inst_pa_page = gk20a_mm_inst_block_addr(g, | 326 | inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12; |
326 | &mm->perfbuf.inst_block) >> 12; | ||
327 | 327 | ||
328 | /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK | 328 | /* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK |
329 | * should be written last */ | 329 | * should be written last */ |
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index 7fd1793c..12d7dcb9 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <nvgpu/circ_buf.h> | 32 | #include <nvgpu/circ_buf.h> |
33 | #include <nvgpu/thread.h> | 33 | #include <nvgpu/thread.h> |
34 | #include <nvgpu/barrier.h> | 34 | #include <nvgpu/barrier.h> |
35 | #include <nvgpu/mm.h> | ||
35 | 36 | ||
36 | #include "ctxsw_trace_gk20a.h" | 37 | #include "ctxsw_trace_gk20a.h" |
37 | #include "fecs_trace_gk20a.h" | 38 | #include "fecs_trace_gk20a.h" |
@@ -93,7 +94,7 @@ static inline u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts) | |||
93 | 94 | ||
94 | static u32 gk20a_fecs_trace_fecs_context_ptr(struct gk20a *g, struct channel_gk20a *ch) | 95 | static u32 gk20a_fecs_trace_fecs_context_ptr(struct gk20a *g, struct channel_gk20a *ch) |
95 | { | 96 | { |
96 | return (u32) (gk20a_mm_inst_block_addr(g, &ch->inst_block) >> 12LL); | 97 | return (u32) (nvgpu_inst_block_addr(g, &ch->inst_block) >> 12LL); |
97 | } | 98 | } |
98 | 99 | ||
99 | static inline int gk20a_fecs_trace_num_ts(void) | 100 | static inline int gk20a_fecs_trace_num_ts(void) |
@@ -633,12 +634,12 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g, | |||
633 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, | 634 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, |
634 | "chid=%d context_ptr=%x inst_block=%llx", | 635 | "chid=%d context_ptr=%x inst_block=%llx", |
635 | ch->chid, context_ptr, | 636 | ch->chid, context_ptr, |
636 | gk20a_mm_inst_block_addr(g, &ch->inst_block)); | 637 | nvgpu_inst_block_addr(g, &ch->inst_block)); |
637 | 638 | ||
638 | if (!trace) | 639 | if (!trace) |
639 | return -ENOMEM; | 640 | return -ENOMEM; |
640 | 641 | ||
641 | pa = gk20a_mm_inst_block_addr(g, &trace->trace_buf); | 642 | pa = nvgpu_inst_block_addr(g, &trace->trace_buf); |
642 | if (!pa) | 643 | if (!pa) |
643 | return -ENOMEM; | 644 | return -ENOMEM; |
644 | aperture = nvgpu_aperture_mask(g, &trace->trace_buf, | 645 | aperture = nvgpu_aperture_mask(g, &trace->trace_buf, |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 03ca6984..fc71c358 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -24,6 +24,7 @@ | |||
24 | 24 | ||
25 | #include <trace/events/gk20a.h> | 25 | #include <trace/events/gk20a.h> |
26 | 26 | ||
27 | #include <nvgpu/mm.h> | ||
27 | #include <nvgpu/dma.h> | 28 | #include <nvgpu/dma.h> |
28 | #include <nvgpu/timers.h> | 29 | #include <nvgpu/timers.h> |
29 | #include <nvgpu/semaphore.h> | 30 | #include <nvgpu/semaphore.h> |
@@ -1058,7 +1059,7 @@ gk20a_refch_from_inst_ptr(struct gk20a *g, u64 inst_ptr) | |||
1058 | if (!ch) | 1059 | if (!ch) |
1059 | continue; | 1060 | continue; |
1060 | 1061 | ||
1061 | ch_inst_ptr = gk20a_mm_inst_block_addr(g, &ch->inst_block); | 1062 | ch_inst_ptr = nvgpu_inst_block_addr(g, &ch->inst_block); |
1062 | if (inst_ptr == ch_inst_ptr) | 1063 | if (inst_ptr == ch_inst_ptr) |
1063 | return ch; | 1064 | return ch; |
1064 | 1065 | ||
@@ -1659,10 +1660,10 @@ static bool gk20a_fifo_handle_mmu_fault( | |||
1659 | ch->chid); | 1660 | ch->chid); |
1660 | } | 1661 | } |
1661 | } else if (mmfault_info.inst_ptr == | 1662 | } else if (mmfault_info.inst_ptr == |
1662 | gk20a_mm_inst_block_addr(g, &g->mm.bar1.inst_block)) { | 1663 | nvgpu_inst_block_addr(g, &g->mm.bar1.inst_block)) { |
1663 | nvgpu_err(g, "mmu fault from bar1"); | 1664 | nvgpu_err(g, "mmu fault from bar1"); |
1664 | } else if (mmfault_info.inst_ptr == | 1665 | } else if (mmfault_info.inst_ptr == |
1665 | gk20a_mm_inst_block_addr(g, &g->mm.pmu.inst_block)) { | 1666 | nvgpu_inst_block_addr(g, &g->mm.pmu.inst_block)) { |
1666 | nvgpu_err(g, "mmu fault from pmu"); | 1667 | nvgpu_err(g, "mmu fault from pmu"); |
1667 | } else | 1668 | } else |
1668 | nvgpu_err(g, "couldn't locate channel for mmu fault"); | 1669 | nvgpu_err(g, "couldn't locate channel for mmu fault"); |
@@ -3973,12 +3974,12 @@ int gk20a_fifo_alloc_inst(struct gk20a *g, struct channel_gk20a *ch) | |||
3973 | 3974 | ||
3974 | gk20a_dbg_fn(""); | 3975 | gk20a_dbg_fn(""); |
3975 | 3976 | ||
3976 | err = gk20a_alloc_inst_block(g, &ch->inst_block); | 3977 | err = g->ops.mm.alloc_inst_block(g, &ch->inst_block); |
3977 | if (err) | 3978 | if (err) |
3978 | return err; | 3979 | return err; |
3979 | 3980 | ||
3980 | gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx", | 3981 | gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx", |
3981 | ch->chid, gk20a_mm_inst_block_addr(g, &ch->inst_block)); | 3982 | ch->chid, nvgpu_inst_block_addr(g, &ch->inst_block)); |
3982 | 3983 | ||
3983 | gk20a_dbg_fn("done"); | 3984 | gk20a_dbg_fn("done"); |
3984 | return 0; | 3985 | return 0; |
@@ -3986,7 +3987,7 @@ int gk20a_fifo_alloc_inst(struct gk20a *g, struct channel_gk20a *ch) | |||
3986 | 3987 | ||
3987 | void gk20a_fifo_free_inst(struct gk20a *g, struct channel_gk20a *ch) | 3988 | void gk20a_fifo_free_inst(struct gk20a *g, struct channel_gk20a *ch) |
3988 | { | 3989 | { |
3989 | gk20a_free_inst_block(g, &ch->inst_block); | 3990 | nvgpu_free_inst_block(g, &ch->inst_block); |
3990 | } | 3991 | } |
3991 | 3992 | ||
3992 | u32 gk20a_fifo_userd_gp_get(struct gk20a *g, struct channel_gk20a *c) | 3993 | u32 gk20a_fifo_userd_gp_get(struct gk20a *g, struct channel_gk20a *c) |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index 2bc7d9a8..ea5d55a4 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <nvgpu/kref.h> | 36 | #include <nvgpu/kref.h> |
37 | 37 | ||
38 | struct gk20a_debug_output; | 38 | struct gk20a_debug_output; |
39 | struct mmu_fault_info; | ||
39 | 40 | ||
40 | #define MAX_RUNLIST_BUFFERS 2 | 41 | #define MAX_RUNLIST_BUFFERS 2 |
41 | 42 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 2d09c0bb..e3c2397c 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <nvgpu/gmmu.h> | 34 | #include <nvgpu/gmmu.h> |
35 | #include <nvgpu/ltc.h> | 35 | #include <nvgpu/ltc.h> |
36 | #include <nvgpu/vidmem.h> | 36 | #include <nvgpu/vidmem.h> |
37 | #include <nvgpu/mm.h> | ||
37 | 38 | ||
38 | #include <trace/events/gk20a.h> | 39 | #include <trace/events/gk20a.h> |
39 | 40 | ||
@@ -107,7 +108,7 @@ int gk20a_prepare_poweroff(struct gk20a *g) | |||
107 | ret |= nvgpu_pmu_destroy(g); | 108 | ret |= nvgpu_pmu_destroy(g); |
108 | 109 | ||
109 | ret |= gk20a_gr_suspend(g); | 110 | ret |= gk20a_gr_suspend(g); |
110 | ret |= gk20a_mm_suspend(g); | 111 | ret |= nvgpu_mm_suspend(g); |
111 | ret |= gk20a_fifo_suspend(g); | 112 | ret |= gk20a_fifo_suspend(g); |
112 | 113 | ||
113 | gk20a_ce_suspend(g); | 114 | gk20a_ce_suspend(g); |
@@ -213,7 +214,7 @@ int gk20a_finalize_poweron(struct gk20a *g) | |||
213 | goto done; | 214 | goto done; |
214 | } | 215 | } |
215 | 216 | ||
216 | err = gk20a_init_mm_support(g); | 217 | err = nvgpu_init_mm_support(g); |
217 | if (err) { | 218 | if (err) { |
218 | nvgpu_err(g, "failed to init gk20a mm"); | 219 | nvgpu_err(g, "failed to init gk20a mm"); |
219 | goto done; | 220 | goto done; |
@@ -314,7 +315,7 @@ int gk20a_finalize_poweron(struct gk20a *g) | |||
314 | 315 | ||
315 | gk20a_init_ce_support(g); | 316 | gk20a_init_ce_support(g); |
316 | 317 | ||
317 | gk20a_init_mm_ce_context(g); | 318 | nvgpu_init_mm_ce_context(g); |
318 | 319 | ||
319 | if (g->ops.xve.available_speeds) { | 320 | if (g->ops.xve.available_speeds) { |
320 | u32 speed; | 321 | u32 speed; |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 92bcb618..9c09e85f 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -47,6 +47,7 @@ struct nvgpu_warpstate; | |||
47 | #include <nvgpu/thread.h> | 47 | #include <nvgpu/thread.h> |
48 | #include <nvgpu/io.h> | 48 | #include <nvgpu/io.h> |
49 | 49 | ||
50 | #include <nvgpu/mm.h> | ||
50 | #include <nvgpu/as.h> | 51 | #include <nvgpu/as.h> |
51 | #include <nvgpu/log.h> | 52 | #include <nvgpu/log.h> |
52 | #include <nvgpu/pramin.h> | 53 | #include <nvgpu/pramin.h> |
@@ -756,6 +757,8 @@ struct gpu_ops { | |||
756 | u64 (*gpu_phys_addr)(struct gk20a *g, | 757 | u64 (*gpu_phys_addr)(struct gk20a *g, |
757 | struct nvgpu_gmmu_attrs *attrs, u64 phys); | 758 | struct nvgpu_gmmu_attrs *attrs, u64 phys); |
758 | size_t (*get_vidmem_size)(struct gk20a *g); | 759 | size_t (*get_vidmem_size)(struct gk20a *g); |
760 | int (*alloc_inst_block)(struct gk20a *g, | ||
761 | struct nvgpu_mem *inst_block); | ||
759 | void (*init_inst_block)(struct nvgpu_mem *inst_block, | 762 | void (*init_inst_block)(struct nvgpu_mem *inst_block, |
760 | struct vm_gk20a *vm, u32 big_page_size); | 763 | struct vm_gk20a *vm, u32 big_page_size); |
761 | bool (*mmu_fault_pending)(struct gk20a *g); | 764 | bool (*mmu_fault_pending)(struct gk20a *g); |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index d6732453..6d370250 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <nvgpu/enabled.h> | 37 | #include <nvgpu/enabled.h> |
38 | #include <nvgpu/debug.h> | 38 | #include <nvgpu/debug.h> |
39 | #include <nvgpu/barrier.h> | 39 | #include <nvgpu/barrier.h> |
40 | #include <nvgpu/mm.h> | ||
40 | 41 | ||
41 | #include "gk20a.h" | 42 | #include "gk20a.h" |
42 | #include "kind_gk20a.h" | 43 | #include "kind_gk20a.h" |
@@ -731,7 +732,7 @@ void gr_gk20a_ctx_patch_write(struct gk20a *g, | |||
731 | 732 | ||
732 | static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block) | 733 | static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block) |
733 | { | 734 | { |
734 | u32 ptr = u64_lo32(gk20a_mm_inst_block_addr(g, inst_block) | 735 | u32 ptr = u64_lo32(nvgpu_inst_block_addr(g, inst_block) |
735 | >> ram_in_base_shift_v()); | 736 | >> ram_in_base_shift_v()); |
736 | u32 aperture = nvgpu_aperture_mask(g, inst_block, | 737 | u32 aperture = nvgpu_aperture_mask(g, inst_block, |
737 | gr_fecs_current_ctx_target_sys_mem_ncoh_f(), | 738 | gr_fecs_current_ctx_target_sys_mem_ncoh_f(), |
@@ -744,7 +745,7 @@ static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block) | |||
744 | static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, | 745 | static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, |
745 | struct channel_gk20a *c) | 746 | struct channel_gk20a *c) |
746 | { | 747 | { |
747 | u32 inst_base_ptr = u64_lo32(gk20a_mm_inst_block_addr(g, &c->inst_block) | 748 | u32 inst_base_ptr = u64_lo32(nvgpu_inst_block_addr(g, &c->inst_block) |
748 | >> ram_in_base_shift_v()); | 749 | >> ram_in_base_shift_v()); |
749 | u32 data = fecs_current_ctx_data(g, &c->inst_block); | 750 | u32 data = fecs_current_ctx_data(g, &c->inst_block); |
750 | u32 ret; | 751 | u32 ret; |
@@ -1980,7 +1981,7 @@ static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g) | |||
1980 | struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; | 1981 | struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; |
1981 | int err; | 1982 | int err; |
1982 | 1983 | ||
1983 | err = gk20a_alloc_inst_block(g, &ucode_info->inst_blk_desc); | 1984 | err = g->ops.mm.alloc_inst_block(g, &ucode_info->inst_blk_desc); |
1984 | if (err) | 1985 | if (err) |
1985 | return err; | 1986 | return err; |
1986 | 1987 | ||
@@ -2154,7 +2155,7 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g) | |||
2154 | 2155 | ||
2155 | gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0); | 2156 | gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0); |
2156 | 2157 | ||
2157 | inst_ptr = gk20a_mm_inst_block_addr(g, &ucode_info->inst_blk_desc); | 2158 | inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc); |
2158 | gk20a_writel(g, gr_fecs_new_ctx_r(), | 2159 | gk20a_writel(g, gr_fecs_new_ctx_r(), |
2159 | gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) | | 2160 | gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) | |
2160 | nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, | 2161 | nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, |
@@ -5455,7 +5456,7 @@ static struct channel_gk20a *gk20a_gr_get_channel_from_ctx( | |||
5455 | if (!gk20a_channel_get(ch)) | 5456 | if (!gk20a_channel_get(ch)) |
5456 | continue; | 5457 | continue; |
5457 | 5458 | ||
5458 | if ((u32)(gk20a_mm_inst_block_addr(g, &ch->inst_block) >> | 5459 | if ((u32)(nvgpu_inst_block_addr(g, &ch->inst_block) >> |
5459 | ram_in_base_shift_v()) == | 5460 | ram_in_base_shift_v()) == |
5460 | gr_fecs_current_ctx_ptr_v(curr_ctx)) { | 5461 | gr_fecs_current_ctx_ptr_v(curr_ctx)) { |
5461 | tsgid = ch->tsgid; | 5462 | tsgid = ch->tsgid; |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index d96fa4e1..a17d6bb6 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -1,6 +1,4 @@ | |||
1 | /* | 1 | /* |
2 | * GK20A memory management | ||
3 | * | ||
4 | * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. | 2 | * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. |
5 | * | 3 | * |
6 | * Permission is hereby granted, free of charge, to any person obtaining a | 4 | * Permission is hereby granted, free of charge, to any person obtaining a |
@@ -24,6 +22,7 @@ | |||
24 | 22 | ||
25 | #include <trace/events/gk20a.h> | 23 | #include <trace/events/gk20a.h> |
26 | 24 | ||
25 | #include <nvgpu/mm.h> | ||
27 | #include <nvgpu/vm.h> | 26 | #include <nvgpu/vm.h> |
28 | #include <nvgpu/vm_area.h> | 27 | #include <nvgpu/vm_area.h> |
29 | #include <nvgpu/dma.h> | 28 | #include <nvgpu/dma.h> |
@@ -88,161 +87,6 @@ | |||
88 | * | 87 | * |
89 | */ | 88 | */ |
90 | 89 | ||
91 | static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm); | ||
92 | static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); | ||
93 | static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm); | ||
94 | static int __must_check gk20a_init_cde_vm(struct mm_gk20a *mm); | ||
95 | static int __must_check gk20a_init_ce_vm(struct mm_gk20a *mm); | ||
96 | |||
97 | static int gk20a_init_mm_reset_enable_hw(struct gk20a *g) | ||
98 | { | ||
99 | gk20a_dbg_fn(""); | ||
100 | if (g->ops.fb.reset) | ||
101 | g->ops.fb.reset(g); | ||
102 | |||
103 | if (g->ops.clock_gating.slcg_fb_load_gating_prod) | ||
104 | g->ops.clock_gating.slcg_fb_load_gating_prod(g, | ||
105 | g->slcg_enabled); | ||
106 | if (g->ops.clock_gating.slcg_ltc_load_gating_prod) | ||
107 | g->ops.clock_gating.slcg_ltc_load_gating_prod(g, | ||
108 | g->slcg_enabled); | ||
109 | if (g->ops.clock_gating.blcg_fb_load_gating_prod) | ||
110 | g->ops.clock_gating.blcg_fb_load_gating_prod(g, | ||
111 | g->blcg_enabled); | ||
112 | if (g->ops.clock_gating.blcg_ltc_load_gating_prod) | ||
113 | g->ops.clock_gating.blcg_ltc_load_gating_prod(g, | ||
114 | g->blcg_enabled); | ||
115 | |||
116 | if (g->ops.fb.init_fs_state) | ||
117 | g->ops.fb.init_fs_state(g); | ||
118 | |||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | static void gk20a_remove_mm_ce_support(struct mm_gk20a *mm) | ||
123 | { | ||
124 | struct gk20a *g = gk20a_from_mm(mm); | ||
125 | |||
126 | if (mm->vidmem.ce_ctx_id != (u32)~0) | ||
127 | gk20a_ce_delete_context_priv(g, mm->vidmem.ce_ctx_id); | ||
128 | |||
129 | mm->vidmem.ce_ctx_id = (u32)~0; | ||
130 | |||
131 | nvgpu_vm_put(mm->ce.vm); | ||
132 | } | ||
133 | |||
134 | static void gk20a_remove_mm_support(struct mm_gk20a *mm) | ||
135 | { | ||
136 | struct gk20a *g = gk20a_from_mm(mm); | ||
137 | |||
138 | if (g->ops.mm.fault_info_mem_destroy) | ||
139 | g->ops.mm.fault_info_mem_destroy(g); | ||
140 | |||
141 | if (g->ops.mm.remove_bar2_vm) | ||
142 | g->ops.mm.remove_bar2_vm(g); | ||
143 | |||
144 | if (g->ops.mm.is_bar1_supported(g)) { | ||
145 | gk20a_free_inst_block(g, &mm->bar1.inst_block); | ||
146 | nvgpu_vm_put(mm->bar1.vm); | ||
147 | } | ||
148 | |||
149 | gk20a_free_inst_block(g, &mm->pmu.inst_block); | ||
150 | gk20a_free_inst_block(g, &mm->hwpm.inst_block); | ||
151 | nvgpu_vm_put(mm->pmu.vm); | ||
152 | nvgpu_vm_put(mm->cde.vm); | ||
153 | |||
154 | nvgpu_semaphore_sea_destroy(g); | ||
155 | nvgpu_vidmem_destroy(g); | ||
156 | nvgpu_pd_cache_fini(g); | ||
157 | } | ||
158 | |||
159 | static int gk20a_alloc_sysmem_flush(struct gk20a *g) | ||
160 | { | ||
161 | return nvgpu_dma_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush); | ||
162 | } | ||
163 | |||
164 | int gk20a_init_mm_setup_sw(struct gk20a *g) | ||
165 | { | ||
166 | struct mm_gk20a *mm = &g->mm; | ||
167 | int err; | ||
168 | |||
169 | gk20a_dbg_fn(""); | ||
170 | |||
171 | if (mm->sw_ready) { | ||
172 | gk20a_dbg_fn("skip init"); | ||
173 | return 0; | ||
174 | } | ||
175 | |||
176 | mm->g = g; | ||
177 | nvgpu_mutex_init(&mm->l2_op_lock); | ||
178 | |||
179 | /*TBD: make channel vm size configurable */ | ||
180 | mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE - | ||
181 | NV_MM_DEFAULT_KERNEL_SIZE; | ||
182 | mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE; | ||
183 | |||
184 | gk20a_dbg_info("channel vm size: user %dMB kernel %dMB", | ||
185 | (int)(mm->channel.user_size >> 20), | ||
186 | (int)(mm->channel.kernel_size >> 20)); | ||
187 | |||
188 | nvgpu_init_pramin(mm); | ||
189 | |||
190 | mm->vidmem.ce_ctx_id = (u32)~0; | ||
191 | |||
192 | err = nvgpu_vidmem_init(mm); | ||
193 | if (err) | ||
194 | return err; | ||
195 | |||
196 | /* | ||
197 | * this requires fixed allocations in vidmem which must be | ||
198 | * allocated before all other buffers | ||
199 | */ | ||
200 | if (g->ops.pmu.alloc_blob_space | ||
201 | && !nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) { | ||
202 | err = g->ops.pmu.alloc_blob_space(g, 0, &g->acr.ucode_blob); | ||
203 | if (err) | ||
204 | return err; | ||
205 | } | ||
206 | |||
207 | err = gk20a_alloc_sysmem_flush(g); | ||
208 | if (err) | ||
209 | return err; | ||
210 | |||
211 | if (g->ops.mm.is_bar1_supported(g)) { | ||
212 | err = gk20a_init_bar1_vm(mm); | ||
213 | if (err) | ||
214 | return err; | ||
215 | } | ||
216 | if (g->ops.mm.init_bar2_vm) { | ||
217 | err = g->ops.mm.init_bar2_vm(g); | ||
218 | if (err) | ||
219 | return err; | ||
220 | } | ||
221 | err = gk20a_init_system_vm(mm); | ||
222 | if (err) | ||
223 | return err; | ||
224 | |||
225 | err = gk20a_init_hwpm(mm); | ||
226 | if (err) | ||
227 | return err; | ||
228 | |||
229 | err = gk20a_init_cde_vm(mm); | ||
230 | if (err) | ||
231 | return err; | ||
232 | |||
233 | err = gk20a_init_ce_vm(mm); | ||
234 | if (err) | ||
235 | return err; | ||
236 | |||
237 | mm->remove_support = gk20a_remove_mm_support; | ||
238 | mm->remove_ce_support = gk20a_remove_mm_ce_support; | ||
239 | |||
240 | mm->sw_ready = true; | ||
241 | |||
242 | gk20a_dbg_fn("done"); | ||
243 | return 0; | ||
244 | } | ||
245 | |||
246 | /* make sure gk20a_init_mm_support is called before */ | 90 | /* make sure gk20a_init_mm_support is called before */ |
247 | int gk20a_init_mm_setup_hw(struct gk20a *g) | 91 | int gk20a_init_mm_setup_hw(struct gk20a *g) |
248 | { | 92 | { |
@@ -274,43 +118,6 @@ int gk20a_init_mm_setup_hw(struct gk20a *g) | |||
274 | return 0; | 118 | return 0; |
275 | } | 119 | } |
276 | 120 | ||
277 | int gk20a_init_mm_support(struct gk20a *g) | ||
278 | { | ||
279 | u32 err; | ||
280 | |||
281 | err = gk20a_init_mm_reset_enable_hw(g); | ||
282 | if (err) | ||
283 | return err; | ||
284 | |||
285 | err = gk20a_init_mm_setup_sw(g); | ||
286 | if (err) | ||
287 | return err; | ||
288 | |||
289 | if (g->ops.mm.init_mm_setup_hw) | ||
290 | err = g->ops.mm.init_mm_setup_hw(g); | ||
291 | |||
292 | return err; | ||
293 | } | ||
294 | |||
295 | void gk20a_init_mm_ce_context(struct gk20a *g) | ||
296 | { | ||
297 | #if defined(CONFIG_GK20A_VIDMEM) | ||
298 | if (g->mm.vidmem.size && (g->mm.vidmem.ce_ctx_id == (u32)~0)) { | ||
299 | g->mm.vidmem.ce_ctx_id = | ||
300 | gk20a_ce_create_context_with_cb(g, | ||
301 | gk20a_fifo_get_fast_ce_runlist_id(g), | ||
302 | -1, | ||
303 | -1, | ||
304 | -1, | ||
305 | NULL); | ||
306 | |||
307 | if (g->mm.vidmem.ce_ctx_id == (u32)~0) | ||
308 | nvgpu_err(g, | ||
309 | "Failed to allocate CE context for vidmem page clearing support"); | ||
310 | } | ||
311 | #endif | ||
312 | } | ||
313 | |||
314 | int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm) | 121 | int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm) |
315 | { | 122 | { |
316 | return vm->mmu_levels[0].lo_bit[0]; | 123 | return vm->mmu_levels[0].lo_bit[0]; |
@@ -505,76 +312,6 @@ const struct gk20a_mmu_level gk20a_mm_levels_128k[] = { | |||
505 | {.update_entry = NULL} | 312 | {.update_entry = NULL} |
506 | }; | 313 | }; |
507 | 314 | ||
508 | /* | ||
509 | * Attempt to find a reserved memory area to determine PTE size for the passed | ||
510 | * mapping. If no reserved area can be found use small pages. | ||
511 | */ | ||
512 | enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm, | ||
513 | u64 base, u64 size) | ||
514 | { | ||
515 | struct nvgpu_vm_area *vm_area; | ||
516 | |||
517 | vm_area = nvgpu_vm_area_find(vm, base); | ||
518 | if (!vm_area) | ||
519 | return gmmu_page_size_small; | ||
520 | |||
521 | return vm_area->pgsz_idx; | ||
522 | } | ||
523 | |||
524 | /* | ||
525 | * This is for when the address space does not support unified address spaces. | ||
526 | */ | ||
527 | static enum gmmu_pgsz_gk20a __get_pte_size_split_addr(struct vm_gk20a *vm, | ||
528 | u64 base, u64 size) | ||
529 | { | ||
530 | if (!base) { | ||
531 | if (size >= vm->gmmu_page_sizes[gmmu_page_size_big]) | ||
532 | return gmmu_page_size_big; | ||
533 | return gmmu_page_size_small; | ||
534 | } else { | ||
535 | if (base < __nv_gmmu_va_small_page_limit()) | ||
536 | return gmmu_page_size_small; | ||
537 | else | ||
538 | return gmmu_page_size_big; | ||
539 | } | ||
540 | } | ||
541 | |||
542 | /* | ||
543 | * This determines the PTE size for a given alloc. Used by both the GVA space | ||
544 | * allocator and the mm core code so that agreement can be reached on how to | ||
545 | * map allocations. | ||
546 | * | ||
547 | * The page size of a buffer is this: | ||
548 | * | ||
549 | * o If the VM doesn't support large pages then obviously small pages | ||
550 | * must be used. | ||
551 | * o If the base address is non-zero (fixed address map): | ||
552 | * - Attempt to find a reserved memory area and use the page size | ||
553 | * based on that. | ||
554 | * - If no reserved page size is available, default to small pages. | ||
555 | * o If the base is zero: | ||
556 | * - If the size is larger than or equal to the big page size, use big | ||
557 | * pages. | ||
558 | * - Otherwise use small pages. | ||
559 | */ | ||
560 | enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size) | ||
561 | { | ||
562 | struct gk20a *g = gk20a_from_vm(vm); | ||
563 | |||
564 | if (!vm->big_pages) | ||
565 | return gmmu_page_size_small; | ||
566 | |||
567 | if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES)) | ||
568 | return __get_pte_size_split_addr(vm, base, size); | ||
569 | |||
570 | if (base) | ||
571 | return __get_pte_size_fixed_map(vm, base, size); | ||
572 | |||
573 | if (size >= vm->gmmu_page_sizes[gmmu_page_size_big]) | ||
574 | return gmmu_page_size_big; | ||
575 | return gmmu_page_size_small; | ||
576 | } | ||
577 | |||
578 | int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch) | 315 | int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch) |
579 | { | 316 | { |
580 | int err = 0; | 317 | int err = 0; |
@@ -599,151 +336,6 @@ int gk20a_vm_bind_channel(struct gk20a_as_share *as_share, | |||
599 | return __gk20a_vm_bind_channel(as_share->vm, ch); | 336 | return __gk20a_vm_bind_channel(as_share->vm, ch); |
600 | } | 337 | } |
601 | 338 | ||
602 | int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block) | ||
603 | { | ||
604 | int err; | ||
605 | |||
606 | gk20a_dbg_fn(""); | ||
607 | |||
608 | err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block); | ||
609 | if (err) { | ||
610 | nvgpu_err(g, "%s: memory allocation failed", __func__); | ||
611 | return err; | ||
612 | } | ||
613 | |||
614 | gk20a_dbg_fn("done"); | ||
615 | return 0; | ||
616 | } | ||
617 | |||
618 | void gk20a_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block) | ||
619 | { | ||
620 | if (inst_block->size) | ||
621 | nvgpu_dma_free(g, inst_block); | ||
622 | } | ||
623 | |||
624 | u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block) | ||
625 | { | ||
626 | if (g->mm.has_physical_mode) | ||
627 | return nvgpu_mem_get_phys_addr(g, inst_block); | ||
628 | else | ||
629 | return nvgpu_mem_get_addr(g, inst_block); | ||
630 | } | ||
631 | |||
632 | static int gk20a_init_bar1_vm(struct mm_gk20a *mm) | ||
633 | { | ||
634 | int err; | ||
635 | struct gk20a *g = gk20a_from_mm(mm); | ||
636 | struct nvgpu_mem *inst_block = &mm->bar1.inst_block; | ||
637 | u32 big_page_size = g->ops.mm.get_default_big_page_size(); | ||
638 | |||
639 | mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; | ||
640 | gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); | ||
641 | mm->bar1.vm = nvgpu_vm_init(g, | ||
642 | big_page_size, | ||
643 | SZ_4K, | ||
644 | mm->bar1.aperture_size - SZ_4K, | ||
645 | mm->bar1.aperture_size, | ||
646 | true, false, | ||
647 | "bar1"); | ||
648 | if (!mm->bar1.vm) | ||
649 | return -ENOMEM; | ||
650 | |||
651 | err = gk20a_alloc_inst_block(g, inst_block); | ||
652 | if (err) | ||
653 | goto clean_up_vm; | ||
654 | g->ops.mm.init_inst_block(inst_block, mm->bar1.vm, big_page_size); | ||
655 | |||
656 | return 0; | ||
657 | |||
658 | clean_up_vm: | ||
659 | nvgpu_vm_put(mm->bar1.vm); | ||
660 | return err; | ||
661 | } | ||
662 | |||
663 | /* pmu vm, share channel_vm interfaces */ | ||
664 | static int gk20a_init_system_vm(struct mm_gk20a *mm) | ||
665 | { | ||
666 | int err; | ||
667 | struct gk20a *g = gk20a_from_mm(mm); | ||
668 | struct nvgpu_mem *inst_block = &mm->pmu.inst_block; | ||
669 | u32 big_page_size = g->ops.mm.get_default_big_page_size(); | ||
670 | u32 low_hole, aperture_size; | ||
671 | |||
672 | /* | ||
673 | * No user region - so we will pass that as zero sized. | ||
674 | */ | ||
675 | low_hole = SZ_4K * 16; | ||
676 | aperture_size = GK20A_PMU_VA_SIZE * 2; | ||
677 | |||
678 | mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; | ||
679 | gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); | ||
680 | |||
681 | mm->pmu.vm = nvgpu_vm_init(g, big_page_size, | ||
682 | low_hole, | ||
683 | aperture_size - low_hole, | ||
684 | aperture_size, | ||
685 | true, | ||
686 | false, | ||
687 | "system"); | ||
688 | if (!mm->pmu.vm) | ||
689 | return -ENOMEM; | ||
690 | |||
691 | err = gk20a_alloc_inst_block(g, inst_block); | ||
692 | if (err) | ||
693 | goto clean_up_vm; | ||
694 | g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, big_page_size); | ||
695 | |||
696 | return 0; | ||
697 | |||
698 | clean_up_vm: | ||
699 | nvgpu_vm_put(mm->pmu.vm); | ||
700 | return err; | ||
701 | } | ||
702 | |||
703 | static int gk20a_init_hwpm(struct mm_gk20a *mm) | ||
704 | { | ||
705 | int err; | ||
706 | struct gk20a *g = gk20a_from_mm(mm); | ||
707 | struct nvgpu_mem *inst_block = &mm->hwpm.inst_block; | ||
708 | |||
709 | err = gk20a_alloc_inst_block(g, inst_block); | ||
710 | if (err) | ||
711 | return err; | ||
712 | g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, 0); | ||
713 | |||
714 | return 0; | ||
715 | } | ||
716 | |||
717 | static int gk20a_init_cde_vm(struct mm_gk20a *mm) | ||
718 | { | ||
719 | struct gk20a *g = gk20a_from_mm(mm); | ||
720 | u32 big_page_size = g->ops.mm.get_default_big_page_size(); | ||
721 | |||
722 | mm->cde.vm = nvgpu_vm_init(g, big_page_size, | ||
723 | big_page_size << 10, | ||
724 | NV_MM_DEFAULT_KERNEL_SIZE, | ||
725 | NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE, | ||
726 | false, false, "cde"); | ||
727 | if (!mm->cde.vm) | ||
728 | return -ENOMEM; | ||
729 | return 0; | ||
730 | } | ||
731 | |||
732 | static int gk20a_init_ce_vm(struct mm_gk20a *mm) | ||
733 | { | ||
734 | struct gk20a *g = gk20a_from_mm(mm); | ||
735 | u32 big_page_size = g->ops.mm.get_default_big_page_size(); | ||
736 | |||
737 | mm->ce.vm = nvgpu_vm_init(g, big_page_size, | ||
738 | big_page_size << 10, | ||
739 | NV_MM_DEFAULT_KERNEL_SIZE, | ||
740 | NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE, | ||
741 | false, false, "ce"); | ||
742 | if (!mm->ce.vm) | ||
743 | return -ENOMEM; | ||
744 | return 0; | ||
745 | } | ||
746 | |||
747 | void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block, | 339 | void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block, |
748 | struct vm_gk20a *vm) | 340 | struct vm_gk20a *vm) |
749 | { | 341 | { |
@@ -770,7 +362,7 @@ void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm, | |||
770 | struct gk20a *g = gk20a_from_vm(vm); | 362 | struct gk20a *g = gk20a_from_vm(vm); |
771 | 363 | ||
772 | gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", | 364 | gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", |
773 | gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va); | 365 | nvgpu_inst_block_addr(g, inst_block), inst_block->cpu_va); |
774 | 366 | ||
775 | g->ops.mm.init_pdb(g, inst_block, vm); | 367 | g->ops.mm.init_pdb(g, inst_block, vm); |
776 | 368 | ||
@@ -784,6 +376,22 @@ void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm, | |||
784 | g->ops.mm.set_big_page_size(g, inst_block, big_page_size); | 376 | g->ops.mm.set_big_page_size(g, inst_block, big_page_size); |
785 | } | 377 | } |
786 | 378 | ||
379 | int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block) | ||
380 | { | ||
381 | int err; | ||
382 | |||
383 | gk20a_dbg_fn(""); | ||
384 | |||
385 | err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block); | ||
386 | if (err) { | ||
387 | nvgpu_err(g, "%s: memory allocation failed", __func__); | ||
388 | return err; | ||
389 | } | ||
390 | |||
391 | gk20a_dbg_fn("done"); | ||
392 | return 0; | ||
393 | } | ||
394 | |||
787 | int gk20a_mm_fb_flush(struct gk20a *g) | 395 | int gk20a_mm_fb_flush(struct gk20a *g) |
788 | { | 396 | { |
789 | struct mm_gk20a *mm = &g->mm; | 397 | struct mm_gk20a *mm = &g->mm; |
@@ -992,19 +600,6 @@ hw_was_off: | |||
992 | gk20a_idle_nosuspend(g); | 600 | gk20a_idle_nosuspend(g); |
993 | } | 601 | } |
994 | 602 | ||
995 | int gk20a_mm_suspend(struct gk20a *g) | ||
996 | { | ||
997 | gk20a_dbg_fn(""); | ||
998 | |||
999 | nvgpu_vidmem_thread_pause_sync(&g->mm); | ||
1000 | |||
1001 | g->ops.mm.cbc_clean(g); | ||
1002 | g->ops.mm.l2_flush(g, false); | ||
1003 | |||
1004 | gk20a_dbg_fn("done"); | ||
1005 | return 0; | ||
1006 | } | ||
1007 | |||
1008 | u32 gk20a_mm_get_iommu_bit(struct gk20a *g) | 603 | u32 gk20a_mm_get_iommu_bit(struct gk20a *g) |
1009 | { | 604 | { |
1010 | return 34; | 605 | return 34; |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 15876b10..434fc422 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -35,11 +35,6 @@ | |||
35 | #include <nvgpu/list.h> | 35 | #include <nvgpu/list.h> |
36 | #include <nvgpu/rbtree.h> | 36 | #include <nvgpu/rbtree.h> |
37 | #include <nvgpu/kref.h> | 37 | #include <nvgpu/kref.h> |
38 | #include <nvgpu/atomic.h> | ||
39 | #include <nvgpu/cond.h> | ||
40 | #include <nvgpu/thread.h> | ||
41 | |||
42 | struct nvgpu_pd_cache; | ||
43 | 38 | ||
44 | #ifdef CONFIG_ARM64 | 39 | #ifdef CONFIG_ARM64 |
45 | #define outer_flush_range(a, b) | 40 | #define outer_flush_range(a, b) |
@@ -138,218 +133,23 @@ struct priv_cmd_entry { | |||
138 | struct gk20a; | 133 | struct gk20a; |
139 | struct channel_gk20a; | 134 | struct channel_gk20a; |
140 | 135 | ||
141 | int gk20a_init_mm_support(struct gk20a *g); | ||
142 | int gk20a_init_mm_setup_sw(struct gk20a *g); | ||
143 | int gk20a_init_mm_setup_hw(struct gk20a *g); | ||
144 | void gk20a_init_mm_ce_context(struct gk20a *g); | ||
145 | |||
146 | int gk20a_mm_fb_flush(struct gk20a *g); | 136 | int gk20a_mm_fb_flush(struct gk20a *g); |
147 | void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate); | 137 | void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate); |
148 | void gk20a_mm_cbc_clean(struct gk20a *g); | 138 | void gk20a_mm_cbc_clean(struct gk20a *g); |
149 | void gk20a_mm_l2_invalidate(struct gk20a *g); | 139 | void gk20a_mm_l2_invalidate(struct gk20a *g); |
150 | 140 | ||
151 | #define FAULT_TYPE_NUM 2 /* replay and nonreplay faults */ | ||
152 | |||
153 | struct mmu_fault_info { | ||
154 | u64 inst_ptr; | ||
155 | u32 inst_aperture; | ||
156 | u64 fault_addr; | ||
157 | u32 fault_addr_aperture; | ||
158 | u32 timestamp_lo; | ||
159 | u32 timestamp_hi; | ||
160 | u32 mmu_engine_id; | ||
161 | u32 gpc_id; | ||
162 | u32 client_type; | ||
163 | u32 client_id; | ||
164 | u32 fault_type; | ||
165 | u32 access_type; | ||
166 | u32 protected_mode; | ||
167 | u32 replayable_fault; | ||
168 | u32 replay_fault_en; | ||
169 | u32 valid; | ||
170 | u32 faulted_pbdma; | ||
171 | u32 faulted_engine; | ||
172 | u32 faulted_subid; | ||
173 | u32 chid; | ||
174 | struct channel_gk20a *refch; | ||
175 | const char *client_type_desc; | ||
176 | const char *fault_type_desc; | ||
177 | const char *client_id_desc; | ||
178 | }; | ||
179 | |||
180 | struct mm_gk20a { | ||
181 | struct gk20a *g; | ||
182 | |||
183 | /* GPU VA default sizes address spaces for channels */ | ||
184 | struct { | ||
185 | u64 user_size; /* userspace-visible GPU VA region */ | ||
186 | u64 kernel_size; /* kernel-only GPU VA region */ | ||
187 | } channel; | ||
188 | |||
189 | struct { | ||
190 | u32 aperture_size; | ||
191 | struct vm_gk20a *vm; | ||
192 | struct nvgpu_mem inst_block; | ||
193 | } bar1; | ||
194 | |||
195 | struct { | ||
196 | u32 aperture_size; | ||
197 | struct vm_gk20a *vm; | ||
198 | struct nvgpu_mem inst_block; | ||
199 | } bar2; | ||
200 | |||
201 | struct { | ||
202 | u32 aperture_size; | ||
203 | struct vm_gk20a *vm; | ||
204 | struct nvgpu_mem inst_block; | ||
205 | } pmu; | ||
206 | |||
207 | struct { | ||
208 | /* using pmu vm currently */ | ||
209 | struct nvgpu_mem inst_block; | ||
210 | } hwpm; | ||
211 | |||
212 | struct { | ||
213 | struct vm_gk20a *vm; | ||
214 | struct nvgpu_mem inst_block; | ||
215 | } perfbuf; | ||
216 | |||
217 | struct { | ||
218 | struct vm_gk20a *vm; | ||
219 | } cde; | ||
220 | |||
221 | struct { | ||
222 | struct vm_gk20a *vm; | ||
223 | } ce; | ||
224 | |||
225 | struct nvgpu_pd_cache *pd_cache; | ||
226 | |||
227 | struct nvgpu_mutex l2_op_lock; | ||
228 | struct nvgpu_mutex tlb_lock; | ||
229 | struct nvgpu_mutex priv_lock; | ||
230 | |||
231 | struct nvgpu_mem bar2_desc; | ||
232 | |||
233 | #ifdef CONFIG_TEGRA_19x_GPU | ||
234 | struct nvgpu_mem hw_fault_buf[FAULT_TYPE_NUM]; | ||
235 | unsigned int hw_fault_buf_status[FAULT_TYPE_NUM]; | ||
236 | struct mmu_fault_info *fault_info[FAULT_TYPE_NUM]; | ||
237 | struct nvgpu_mutex hub_isr_mutex; | ||
238 | u32 hub_intr_types; | ||
239 | #endif | ||
240 | /* | ||
241 | * Separate function to cleanup the CE since it requires a channel to | ||
242 | * be closed which must happen before fifo cleanup. | ||
243 | */ | ||
244 | void (*remove_ce_support)(struct mm_gk20a *mm); | ||
245 | void (*remove_support)(struct mm_gk20a *mm); | ||
246 | bool sw_ready; | ||
247 | int physical_bits; | ||
248 | bool use_full_comp_tag_line; | ||
249 | bool ltc_enabled_current; | ||
250 | bool ltc_enabled_target; | ||
251 | bool bypass_smmu; | ||
252 | bool disable_bigpage; | ||
253 | bool has_physical_mode; | ||
254 | |||
255 | struct nvgpu_mem sysmem_flush; | ||
256 | |||
257 | u32 pramin_window; | ||
258 | struct nvgpu_spinlock pramin_window_lock; | ||
259 | bool force_pramin; /* via debugfs */ | ||
260 | |||
261 | struct { | ||
262 | size_t size; | ||
263 | u64 base; | ||
264 | size_t bootstrap_size; | ||
265 | u64 bootstrap_base; | ||
266 | |||
267 | struct nvgpu_allocator allocator; | ||
268 | struct nvgpu_allocator bootstrap_allocator; | ||
269 | |||
270 | u32 ce_ctx_id; | ||
271 | volatile bool cleared; | ||
272 | struct nvgpu_mutex first_clear_mutex; | ||
273 | |||
274 | struct nvgpu_list_node clear_list_head; | ||
275 | struct nvgpu_mutex clear_list_mutex; | ||
276 | |||
277 | struct nvgpu_cond clearing_thread_cond; | ||
278 | struct nvgpu_thread clearing_thread; | ||
279 | struct nvgpu_mutex clearing_thread_lock; | ||
280 | nvgpu_atomic_t pause_count; | ||
281 | |||
282 | nvgpu_atomic64_t bytes_pending; | ||
283 | } vidmem; | ||
284 | }; | ||
285 | |||
286 | int gk20a_mm_init(struct mm_gk20a *mm); | ||
287 | |||
288 | #define gk20a_from_mm(mm) ((mm)->g) | ||
289 | #define gk20a_from_vm(vm) ((vm)->mm->g) | ||
290 | |||
291 | #define dev_from_vm(vm) dev_from_gk20a(vm->mm->g) | 141 | #define dev_from_vm(vm) dev_from_gk20a(vm->mm->g) |
292 | 142 | ||
293 | #define DEFAULT_ALLOC_ALIGNMENT (4*1024) | 143 | void gk20a_mm_ltc_isr(struct gk20a *g); |
294 | |||
295 | static inline int bar1_aperture_size_mb_gk20a(void) | ||
296 | { | ||
297 | return 16; /* 16MB is more than enough atm. */ | ||
298 | } | ||
299 | |||
300 | /* The maximum GPU VA range supported */ | ||
301 | #define NV_GMMU_VA_RANGE 38 | ||
302 | |||
303 | /* The default userspace-visible GPU VA size */ | ||
304 | #define NV_MM_DEFAULT_USER_SIZE (1ULL << 37) | ||
305 | |||
306 | /* The default kernel-reserved GPU VA size */ | ||
307 | #define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32) | ||
308 | |||
309 | /* | ||
310 | * When not using unified address spaces, the bottom 56GB of the space are used | ||
311 | * for small pages, and the remaining high memory is used for large pages. | ||
312 | */ | ||
313 | static inline u64 __nv_gmmu_va_small_page_limit(void) | ||
314 | { | ||
315 | return ((u64)SZ_1G * 56); | ||
316 | } | ||
317 | |||
318 | enum nvgpu_flush_op { | ||
319 | NVGPU_FLUSH_DEFAULT, | ||
320 | NVGPU_FLUSH_FB, | ||
321 | NVGPU_FLUSH_L2_INV, | ||
322 | NVGPU_FLUSH_L2_FLUSH, | ||
323 | NVGPU_FLUSH_CBC_CLEAN, | ||
324 | }; | ||
325 | 144 | ||
326 | enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm, | 145 | bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g); |
327 | u64 base, u64 size); | ||
328 | enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size); | ||
329 | 146 | ||
330 | #if 0 /*related to addr bits above, concern below TBD on which is accurate */ | 147 | int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g); |
331 | #define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\ | ||
332 | bus_bar1_block_ptr_s()) | ||
333 | #else | ||
334 | #define bar1_instance_block_shift_gk20a() bus_bar1_block_ptr_shift_v() | ||
335 | #endif | ||
336 | 148 | ||
337 | int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block); | 149 | int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block); |
338 | void gk20a_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block); | ||
339 | void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm, | 150 | void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm, |
340 | u32 big_page_size); | 151 | u32 big_page_size); |
341 | u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct nvgpu_mem *mem); | 152 | int gk20a_init_mm_setup_hw(struct gk20a *g); |
342 | |||
343 | void gk20a_mm_dump_vm(struct vm_gk20a *vm, | ||
344 | u64 va_begin, u64 va_end, char *label); | ||
345 | |||
346 | int gk20a_mm_suspend(struct gk20a *g); | ||
347 | |||
348 | void gk20a_mm_ltc_isr(struct gk20a *g); | ||
349 | |||
350 | bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g); | ||
351 | |||
352 | int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g); | ||
353 | 153 | ||
354 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | 154 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, |
355 | u64 map_offset, | 155 | u64 map_offset, |
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index e4dd6a59..2b954e1a 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <nvgpu/bug.h> | 30 | #include <nvgpu/bug.h> |
31 | #include <nvgpu/firmware.h> | 31 | #include <nvgpu/firmware.h> |
32 | #include <nvgpu/falcon.h> | 32 | #include <nvgpu/falcon.h> |
33 | #include <nvgpu/mm.h> | ||
33 | 34 | ||
34 | #include "gk20a.h" | 35 | #include "gk20a.h" |
35 | #include "gr_gk20a.h" | 36 | #include "gr_gk20a.h" |
@@ -181,7 +182,7 @@ int pmu_bootstrap(struct nvgpu_pmu *pmu) | |||
181 | pwr_falcon_itfen_ctxen_enable_f()); | 182 | pwr_falcon_itfen_ctxen_enable_f()); |
182 | gk20a_writel(g, pwr_pmu_new_instblk_r(), | 183 | gk20a_writel(g, pwr_pmu_new_instblk_r(), |
183 | pwr_pmu_new_instblk_ptr_f( | 184 | pwr_pmu_new_instblk_ptr_f( |
184 | gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | | 185 | nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | |
185 | pwr_pmu_new_instblk_valid_f(1) | | 186 | pwr_pmu_new_instblk_valid_f(1) | |
186 | pwr_pmu_new_instblk_target_sys_coh_f()); | 187 | pwr_pmu_new_instblk_target_sys_coh_f()); |
187 | 188 | ||
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c index 7029b477..557948e1 100644 --- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c | |||
@@ -21,7 +21,6 @@ | |||
21 | */ | 21 | */ |
22 | 22 | ||
23 | #include <nvgpu/types.h> | 23 | #include <nvgpu/types.h> |
24 | |||
25 | #include <nvgpu/dma.h> | 24 | #include <nvgpu/dma.h> |
26 | #include <nvgpu/gmmu.h> | 25 | #include <nvgpu/gmmu.h> |
27 | #include <nvgpu/timers.h> | 26 | #include <nvgpu/timers.h> |
@@ -33,6 +32,7 @@ | |||
33 | #include <nvgpu/pmu.h> | 32 | #include <nvgpu/pmu.h> |
34 | #include <nvgpu/falcon.h> | 33 | #include <nvgpu/falcon.h> |
35 | #include <nvgpu/enabled.h> | 34 | #include <nvgpu/enabled.h> |
35 | #include <nvgpu/mm.h> | ||
36 | 36 | ||
37 | #include "gk20a/gk20a.h" | 37 | #include "gk20a/gk20a.h" |
38 | #include "gk20a/pmu_gk20a.h" | 38 | #include "gk20a/pmu_gk20a.h" |
@@ -1170,7 +1170,7 @@ static int bl_bootstrap(struct nvgpu_pmu *pmu, | |||
1170 | pwr_falcon_itfen_ctxen_enable_f()); | 1170 | pwr_falcon_itfen_ctxen_enable_f()); |
1171 | gk20a_writel(g, pwr_pmu_new_instblk_r(), | 1171 | gk20a_writel(g, pwr_pmu_new_instblk_r(), |
1172 | pwr_pmu_new_instblk_ptr_f( | 1172 | pwr_pmu_new_instblk_ptr_f( |
1173 | gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | | 1173 | nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | |
1174 | pwr_pmu_new_instblk_valid_f(1) | | 1174 | pwr_pmu_new_instblk_valid_f(1) | |
1175 | pwr_pmu_new_instblk_target_sys_coh_f()); | 1175 | pwr_pmu_new_instblk_target_sys_coh_f()); |
1176 | 1176 | ||
diff --git a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c index b8d42f7a..34c8d4b7 100644 --- a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c | |||
@@ -24,6 +24,7 @@ | |||
24 | 24 | ||
25 | #include <nvgpu/timers.h> | 25 | #include <nvgpu/timers.h> |
26 | #include <nvgpu/bus.h> | 26 | #include <nvgpu/bus.h> |
27 | #include <nvgpu/mm.h> | ||
27 | 28 | ||
28 | #include "bus_gm20b.h" | 29 | #include "bus_gm20b.h" |
29 | #include "gk20a/gk20a.h" | 30 | #include "gk20a/gk20a.h" |
@@ -35,8 +36,8 @@ int gm20b_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst) | |||
35 | { | 36 | { |
36 | struct nvgpu_timeout timeout; | 37 | struct nvgpu_timeout timeout; |
37 | int err = 0; | 38 | int err = 0; |
38 | u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst); | 39 | u64 iova = nvgpu_inst_block_addr(g, bar1_inst); |
39 | u32 ptr_v = (u32)(iova >> bar1_instance_block_shift_gk20a()); | 40 | u32 ptr_v = (u32)(iova >> bus_bar1_block_ptr_shift_v()); |
40 | 41 | ||
41 | gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v); | 42 | gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v); |
42 | 43 | ||
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c index f4ddd92f..0762e8bd 100644 --- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <nvgpu/log.h> | 31 | #include <nvgpu/log.h> |
32 | #include <nvgpu/atomic.h> | 32 | #include <nvgpu/atomic.h> |
33 | #include <nvgpu/barrier.h> | 33 | #include <nvgpu/barrier.h> |
34 | #include <nvgpu/mm.h> | ||
34 | 35 | ||
35 | #include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h> | 36 | #include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h> |
36 | #include <nvgpu/hw/gm20b/hw_ram_gm20b.h> | 37 | #include <nvgpu/hw/gm20b/hw_ram_gm20b.h> |
@@ -42,7 +43,7 @@ void channel_gm20b_bind(struct channel_gk20a *c) | |||
42 | { | 43 | { |
43 | struct gk20a *g = c->g; | 44 | struct gk20a *g = c->g; |
44 | 45 | ||
45 | u32 inst_ptr = gk20a_mm_inst_block_addr(g, &c->inst_block) | 46 | u32 inst_ptr = nvgpu_inst_block_addr(g, &c->inst_block) |
46 | >> ram_in_base_shift_v(); | 47 | >> ram_in_base_shift_v(); |
47 | 48 | ||
48 | gk20a_dbg_info("bind channel %d inst ptr 0x%08x", | 49 | gk20a_dbg_info("bind channel %d inst ptr 0x%08x", |
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 269fd7f1..d081fb24 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c | |||
@@ -442,6 +442,7 @@ static const struct gpu_ops gm20b_ops = { | |||
442 | .init_pdb = gk20a_mm_init_pdb, | 442 | .init_pdb = gk20a_mm_init_pdb, |
443 | .init_mm_setup_hw = gk20a_init_mm_setup_hw, | 443 | .init_mm_setup_hw = gk20a_init_mm_setup_hw, |
444 | .is_bar1_supported = gm20b_mm_is_bar1_supported, | 444 | .is_bar1_supported = gm20b_mm_is_bar1_supported, |
445 | .alloc_inst_block = gk20a_alloc_inst_block, | ||
445 | .init_inst_block = gk20a_init_inst_block, | 446 | .init_inst_block = gk20a_init_inst_block, |
446 | .mmu_fault_pending = gk20a_fifo_mmu_fault_pending, | 447 | .mmu_fault_pending = gk20a_fifo_mmu_fault_pending, |
447 | .get_kind_invalid = gm20b_get_kind_invalid, | 448 | .get_kind_invalid = gm20b_get_kind_invalid, |
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index 1246ee7f..59f72e13 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c | |||
@@ -524,6 +524,7 @@ static const struct gpu_ops gp106_ops = { | |||
524 | .init_pdb = gp10b_mm_init_pdb, | 524 | .init_pdb = gp10b_mm_init_pdb, |
525 | .init_mm_setup_hw = gp10b_init_mm_setup_hw, | 525 | .init_mm_setup_hw = gp10b_init_mm_setup_hw, |
526 | .is_bar1_supported = gm20b_mm_is_bar1_supported, | 526 | .is_bar1_supported = gm20b_mm_is_bar1_supported, |
527 | .alloc_inst_block = gk20a_alloc_inst_block, | ||
527 | .init_inst_block = gk20a_init_inst_block, | 528 | .init_inst_block = gk20a_init_inst_block, |
528 | .mmu_fault_pending = gk20a_fifo_mmu_fault_pending, | 529 | .mmu_fault_pending = gk20a_fifo_mmu_fault_pending, |
529 | .init_bar2_vm = gb10b_init_bar2_vm, | 530 | .init_bar2_vm = gb10b_init_bar2_vm, |
diff --git a/drivers/gpu/nvgpu/gp106/sec2_gp106.c b/drivers/gpu/nvgpu/gp106/sec2_gp106.c index 9f0fe375..26ded39e 100644 --- a/drivers/gpu/nvgpu/gp106/sec2_gp106.c +++ b/drivers/gpu/nvgpu/gp106/sec2_gp106.c | |||
@@ -22,6 +22,7 @@ | |||
22 | 22 | ||
23 | #include <nvgpu/pmu.h> | 23 | #include <nvgpu/pmu.h> |
24 | #include <nvgpu/falcon.h> | 24 | #include <nvgpu/falcon.h> |
25 | #include <nvgpu/mm.h> | ||
25 | 26 | ||
26 | #include "gk20a/gk20a.h" | 27 | #include "gk20a/gk20a.h" |
27 | #include "sec2_gp106.h" | 28 | #include "sec2_gp106.h" |
@@ -88,7 +89,7 @@ int bl_bootstrap_sec2(struct nvgpu_pmu *pmu, | |||
88 | 89 | ||
89 | gk20a_writel(g, psec_falcon_nxtctx_r(), | 90 | gk20a_writel(g, psec_falcon_nxtctx_r(), |
90 | pwr_pmu_new_instblk_ptr_f( | 91 | pwr_pmu_new_instblk_ptr_f( |
91 | gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | | 92 | nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | |
92 | pwr_pmu_new_instblk_valid_f(1) | | 93 | pwr_pmu_new_instblk_valid_f(1) | |
93 | nvgpu_aperture_mask(g, &mm->pmu.inst_block, | 94 | nvgpu_aperture_mask(g, &mm->pmu.inst_block, |
94 | pwr_pmu_new_instblk_target_sys_coh_f(), | 95 | pwr_pmu_new_instblk_target_sys_coh_f(), |
@@ -154,7 +155,7 @@ void init_pmu_setup_hw1(struct gk20a *g) | |||
154 | pwr_falcon_itfen_ctxen_enable_f()); | 155 | pwr_falcon_itfen_ctxen_enable_f()); |
155 | gk20a_writel(g, pwr_pmu_new_instblk_r(), | 156 | gk20a_writel(g, pwr_pmu_new_instblk_r(), |
156 | pwr_pmu_new_instblk_ptr_f( | 157 | pwr_pmu_new_instblk_ptr_f( |
157 | gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | | 158 | nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | |
158 | pwr_pmu_new_instblk_valid_f(1) | | 159 | pwr_pmu_new_instblk_valid_f(1) | |
159 | nvgpu_aperture_mask(g, &mm->pmu.inst_block, | 160 | nvgpu_aperture_mask(g, &mm->pmu.inst_block, |
160 | pwr_pmu_new_instblk_target_sys_coh_f(), | 161 | pwr_pmu_new_instblk_target_sys_coh_f(), |
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index b80722b8..a10df740 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c | |||
@@ -484,6 +484,7 @@ static const struct gpu_ops gp10b_ops = { | |||
484 | .init_pdb = gp10b_mm_init_pdb, | 484 | .init_pdb = gp10b_mm_init_pdb, |
485 | .init_mm_setup_hw = gp10b_init_mm_setup_hw, | 485 | .init_mm_setup_hw = gp10b_init_mm_setup_hw, |
486 | .is_bar1_supported = gm20b_mm_is_bar1_supported, | 486 | .is_bar1_supported = gm20b_mm_is_bar1_supported, |
487 | .alloc_inst_block = gk20a_alloc_inst_block, | ||
487 | .init_inst_block = gk20a_init_inst_block, | 488 | .init_inst_block = gk20a_init_inst_block, |
488 | .mmu_fault_pending = gk20a_fifo_mmu_fault_pending, | 489 | .mmu_fault_pending = gk20a_fifo_mmu_fault_pending, |
489 | .init_bar2_vm = gb10b_init_bar2_vm, | 490 | .init_bar2_vm = gb10b_init_bar2_vm, |
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index 06a9b929..dc746153 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c | |||
@@ -22,6 +22,7 @@ | |||
22 | * DEALINGS IN THE SOFTWARE. | 22 | * DEALINGS IN THE SOFTWARE. |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <nvgpu/mm.h> | ||
25 | #include <nvgpu/dma.h> | 26 | #include <nvgpu/dma.h> |
26 | #include <nvgpu/gmmu.h> | 27 | #include <nvgpu/gmmu.h> |
27 | 28 | ||
@@ -95,7 +96,7 @@ int gb10b_init_bar2_vm(struct gk20a *g) | |||
95 | return -ENOMEM; | 96 | return -ENOMEM; |
96 | 97 | ||
97 | /* allocate instance mem for bar2 */ | 98 | /* allocate instance mem for bar2 */ |
98 | err = gk20a_alloc_inst_block(g, inst_block); | 99 | err = g->ops.mm.alloc_inst_block(g, inst_block); |
99 | if (err) | 100 | if (err) |
100 | goto clean_up_va; | 101 | goto clean_up_va; |
101 | 102 | ||
@@ -112,7 +113,7 @@ int gb10b_init_bar2_mm_hw_setup(struct gk20a *g) | |||
112 | { | 113 | { |
113 | struct mm_gk20a *mm = &g->mm; | 114 | struct mm_gk20a *mm = &g->mm; |
114 | struct nvgpu_mem *inst_block = &mm->bar2.inst_block; | 115 | struct nvgpu_mem *inst_block = &mm->bar2.inst_block; |
115 | u64 inst_pa = gk20a_mm_inst_block_addr(g, inst_block); | 116 | u64 inst_pa = nvgpu_inst_block_addr(g, inst_block); |
116 | 117 | ||
117 | gk20a_dbg_fn(""); | 118 | gk20a_dbg_fn(""); |
118 | 119 | ||
@@ -374,6 +375,6 @@ void gp10b_remove_bar2_vm(struct gk20a *g) | |||
374 | struct mm_gk20a *mm = &g->mm; | 375 | struct mm_gk20a *mm = &g->mm; |
375 | 376 | ||
376 | gp10b_replayable_pagefault_buffer_deinit(g); | 377 | gp10b_replayable_pagefault_buffer_deinit(g); |
377 | gk20a_free_inst_block(g, &mm->bar2.inst_block); | 378 | nvgpu_free_inst_block(g, &mm->bar2.inst_block); |
378 | nvgpu_vm_put(mm->bar2.vm); | 379 | nvgpu_vm_put(mm->bar2.vm); |
379 | } | 380 | } |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/mm.h b/drivers/gpu/nvgpu/include/nvgpu/mm.h new file mode 100644 index 00000000..13b33d9f --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/mm.h | |||
@@ -0,0 +1,220 @@ | |||
1 | /* | ||
2 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
3 | * copy of this software and associated documentation files (the "Software"), | ||
4 | * to deal in the Software without restriction, including without limitation | ||
5 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
6 | * and/or sell copies of the Software, and to permit persons to whom the | ||
7 | * Software is furnished to do so, subject to the following conditions: | ||
8 | * | ||
9 | * The above copyright notice and this permission notice shall be included in | ||
10 | * all copies or substantial portions of the Software. | ||
11 | * | ||
12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
15 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
17 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
18 | * DEALINGS IN THE SOFTWARE. | ||
19 | */ | ||
20 | |||
21 | #ifndef __NVGPU_MM_H__ | ||
22 | #define __NVGPU_MM_H__ | ||
23 | |||
24 | #include <nvgpu/types.h> | ||
25 | #include <nvgpu/cond.h> | ||
26 | #include <nvgpu/thread.h> | ||
27 | #include <nvgpu/lock.h> | ||
28 | #include <nvgpu/atomic.h> | ||
29 | #include <nvgpu/nvgpu_mem.h> | ||
30 | #include <nvgpu/allocator.h> | ||
31 | #include <nvgpu/list.h> | ||
32 | |||
33 | struct gk20a; | ||
34 | struct vm_gk20a; | ||
35 | struct nvgpu_mem; | ||
36 | struct nvgpu_pd_cache; | ||
37 | |||
38 | #define FAULT_TYPE_NUM 2 /* replay and nonreplay faults */ | ||
39 | |||
40 | struct mmu_fault_info { | ||
41 | u64 inst_ptr; | ||
42 | u32 inst_aperture; | ||
43 | u64 fault_addr; | ||
44 | u32 fault_addr_aperture; | ||
45 | u32 timestamp_lo; | ||
46 | u32 timestamp_hi; | ||
47 | u32 mmu_engine_id; | ||
48 | u32 gpc_id; | ||
49 | u32 client_type; | ||
50 | u32 client_id; | ||
51 | u32 fault_type; | ||
52 | u32 access_type; | ||
53 | u32 protected_mode; | ||
54 | u32 replayable_fault; | ||
55 | u32 replay_fault_en; | ||
56 | u32 valid; | ||
57 | u32 faulted_pbdma; | ||
58 | u32 faulted_engine; | ||
59 | u32 faulted_subid; | ||
60 | u32 chid; | ||
61 | struct channel_gk20a *refch; | ||
62 | const char *client_type_desc; | ||
63 | const char *fault_type_desc; | ||
64 | const char *client_id_desc; | ||
65 | }; | ||
66 | |||
67 | enum nvgpu_flush_op { | ||
68 | NVGPU_FLUSH_DEFAULT, | ||
69 | NVGPU_FLUSH_FB, | ||
70 | NVGPU_FLUSH_L2_INV, | ||
71 | NVGPU_FLUSH_L2_FLUSH, | ||
72 | NVGPU_FLUSH_CBC_CLEAN, | ||
73 | }; | ||
74 | |||
75 | struct mm_gk20a { | ||
76 | struct gk20a *g; | ||
77 | |||
78 | /* GPU VA default sizes address spaces for channels */ | ||
79 | struct { | ||
80 | u64 user_size; /* userspace-visible GPU VA region */ | ||
81 | u64 kernel_size; /* kernel-only GPU VA region */ | ||
82 | } channel; | ||
83 | |||
84 | struct { | ||
85 | u32 aperture_size; | ||
86 | struct vm_gk20a *vm; | ||
87 | struct nvgpu_mem inst_block; | ||
88 | } bar1; | ||
89 | |||
90 | struct { | ||
91 | u32 aperture_size; | ||
92 | struct vm_gk20a *vm; | ||
93 | struct nvgpu_mem inst_block; | ||
94 | } bar2; | ||
95 | |||
96 | struct { | ||
97 | u32 aperture_size; | ||
98 | struct vm_gk20a *vm; | ||
99 | struct nvgpu_mem inst_block; | ||
100 | } pmu; | ||
101 | |||
102 | struct { | ||
103 | /* using pmu vm currently */ | ||
104 | struct nvgpu_mem inst_block; | ||
105 | } hwpm; | ||
106 | |||
107 | struct { | ||
108 | struct vm_gk20a *vm; | ||
109 | struct nvgpu_mem inst_block; | ||
110 | } perfbuf; | ||
111 | |||
112 | struct { | ||
113 | struct vm_gk20a *vm; | ||
114 | } cde; | ||
115 | |||
116 | struct { | ||
117 | struct vm_gk20a *vm; | ||
118 | } ce; | ||
119 | |||
120 | struct nvgpu_pd_cache *pd_cache; | ||
121 | |||
122 | struct nvgpu_mutex l2_op_lock; | ||
123 | struct nvgpu_mutex tlb_lock; | ||
124 | struct nvgpu_mutex priv_lock; | ||
125 | |||
126 | struct nvgpu_mem bar2_desc; | ||
127 | |||
128 | #ifdef CONFIG_TEGRA_19x_GPU | ||
129 | struct nvgpu_mem hw_fault_buf[FAULT_TYPE_NUM]; | ||
130 | unsigned int hw_fault_buf_status[FAULT_TYPE_NUM]; | ||
131 | struct mmu_fault_info *fault_info[FAULT_TYPE_NUM]; | ||
132 | struct nvgpu_mutex hub_isr_mutex; | ||
133 | u32 hub_intr_types; | ||
134 | #endif | ||
135 | /* | ||
136 | * Separate function to cleanup the CE since it requires a channel to | ||
137 | * be closed which must happen before fifo cleanup. | ||
138 | */ | ||
139 | void (*remove_ce_support)(struct mm_gk20a *mm); | ||
140 | void (*remove_support)(struct mm_gk20a *mm); | ||
141 | bool sw_ready; | ||
142 | int physical_bits; | ||
143 | bool use_full_comp_tag_line; | ||
144 | bool ltc_enabled_current; | ||
145 | bool ltc_enabled_target; | ||
146 | bool bypass_smmu; | ||
147 | bool disable_bigpage; | ||
148 | bool has_physical_mode; | ||
149 | |||
150 | struct nvgpu_mem sysmem_flush; | ||
151 | |||
152 | u32 pramin_window; | ||
153 | struct nvgpu_spinlock pramin_window_lock; | ||
154 | bool force_pramin; /* via debugfs */ | ||
155 | |||
156 | struct { | ||
157 | size_t size; | ||
158 | u64 base; | ||
159 | size_t bootstrap_size; | ||
160 | u64 bootstrap_base; | ||
161 | |||
162 | struct nvgpu_allocator allocator; | ||
163 | struct nvgpu_allocator bootstrap_allocator; | ||
164 | |||
165 | u32 ce_ctx_id; | ||
166 | volatile bool cleared; | ||
167 | struct nvgpu_mutex first_clear_mutex; | ||
168 | |||
169 | struct nvgpu_list_node clear_list_head; | ||
170 | struct nvgpu_mutex clear_list_mutex; | ||
171 | |||
172 | struct nvgpu_cond clearing_thread_cond; | ||
173 | struct nvgpu_thread clearing_thread; | ||
174 | struct nvgpu_mutex clearing_thread_lock; | ||
175 | nvgpu_atomic_t pause_count; | ||
176 | |||
177 | nvgpu_atomic64_t bytes_pending; | ||
178 | } vidmem; | ||
179 | }; | ||
180 | |||
181 | #define gk20a_from_mm(mm) ((mm)->g) | ||
182 | #define gk20a_from_vm(vm) ((vm)->mm->g) | ||
183 | |||
184 | static inline int bar1_aperture_size_mb_gk20a(void) | ||
185 | { | ||
186 | return 16; /* 16MB is more than enough atm. */ | ||
187 | } | ||
188 | |||
189 | /* The maximum GPU VA range supported */ | ||
190 | #define NV_GMMU_VA_RANGE 38 | ||
191 | |||
192 | /* The default userspace-visible GPU VA size */ | ||
193 | #define NV_MM_DEFAULT_USER_SIZE (1ULL << 37) | ||
194 | |||
195 | /* The default kernel-reserved GPU VA size */ | ||
196 | #define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32) | ||
197 | |||
198 | /* | ||
199 | * When not using unified address spaces, the bottom 56GB of the space are used | ||
200 | * for small pages, and the remaining high memory is used for large pages. | ||
201 | */ | ||
202 | static inline u64 __nv_gmmu_va_small_page_limit(void) | ||
203 | { | ||
204 | return ((u64)SZ_1G * 56); | ||
205 | } | ||
206 | |||
207 | enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm, | ||
208 | u64 base, u64 size); | ||
209 | enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size); | ||
210 | |||
211 | void nvgpu_init_mm_ce_context(struct gk20a *g); | ||
212 | int nvgpu_init_mm_support(struct gk20a *g); | ||
213 | int nvgpu_init_mm_setup_hw(struct gk20a *g); | ||
214 | |||
215 | u64 nvgpu_inst_block_addr(struct gk20a *g, struct nvgpu_mem *mem); | ||
216 | void nvgpu_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block); | ||
217 | |||
218 | int nvgpu_mm_suspend(struct gk20a *g); | ||
219 | |||
220 | #endif | ||