summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/mm
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-10-06 14:30:29 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-10-24 18:16:49 -0400
commit2a285d0607a20694476399f5719e74dbc26fcd58 (patch)
treeef0246e3ca7b933ce3ea4c74061f61cc2e394b8b /drivers/gpu/nvgpu/common/mm
parent748331cbab1c7af26ab1fbae5ead2cdaff22806a (diff)
gpu: nvgpu: Cleanup generic MM code in gk20a/mm_gk20a.c
Move much of the remaining generic MM code to a new common location: common/mm/mm.c. Also add a corresponding <nvgpu/mm.h> header. This mostly consists of init and cleanup code to handle the common MM data structures like the VIDMEM code, address spaces for various engines, etc. A few more indepth changes were made as well. 1. alloc_inst_block() has been added to the MM HAL. This used to be defined directly in the gk20a code but it used a register. As a result, if this register hypothetically changes in the future, it would need to become a HAL anyway. This path preempts that and for now just defines all HALs to use the gk20a version. 2. Rename as much as possible: global functions are, for the most part, prepended with nvgpu (there are a few exceptions which I have yet to decide what to do with). Functions that are static are renamed to be as consistent with their functionality as possible since in some cases function effect and function name have diverged. JIRA NVGPU-30 Change-Id: Ic948f1ecc2f7976eba4bb7169a44b7226bb7c0b5 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1574499 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm')
-rw-r--r--drivers/gpu/nvgpu/common/mm/buddy_allocator.c4
-rw-r--r--drivers/gpu/nvgpu/common/mm/mm.c426
2 files changed, 428 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
index c6f10a69..a2546e9d 100644
--- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
@@ -25,8 +25,8 @@
25#include <nvgpu/bug.h> 25#include <nvgpu/bug.h>
26#include <nvgpu/log2.h> 26#include <nvgpu/log2.h>
27#include <nvgpu/barrier.h> 27#include <nvgpu/barrier.h>
28 28#include <nvgpu/mm.h>
29#include "gk20a/mm_gk20a.h" 29#include <nvgpu/vm.h>
30 30
31#include "buddy_allocator_priv.h" 31#include "buddy_allocator_priv.h"
32 32
diff --git a/drivers/gpu/nvgpu/common/mm/mm.c b/drivers/gpu/nvgpu/common/mm/mm.c
new file mode 100644
index 00000000..1027ed28
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/mm.c
@@ -0,0 +1,426 @@
1/*
2 * Permission is hereby granted, free of charge, to any person obtaining a
3 * copy of this software and associated documentation files (the "Software"),
4 * to deal in the Software without restriction, including without limitation
5 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
6 * and/or sell copies of the Software, and to permit persons to whom the
7 * Software is furnished to do so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice shall be included in
10 * all copies or substantial portions of the Software.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
15 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
17 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
18 * DEALINGS IN THE SOFTWARE.
19 */
20
21#include <nvgpu/mm.h>
22#include <nvgpu/vm.h>
23#include <nvgpu/dma.h>
24#include <nvgpu/vm_area.h>
25#include <nvgpu/gmmu.h>
26#include <nvgpu/vidmem.h>
27#include <nvgpu/semaphore.h>
28#include <nvgpu/pramin.h>
29#include <nvgpu/enabled.h>
30
31#include "gk20a/gk20a.h"
32
33/*
34 * Attempt to find a reserved memory area to determine PTE size for the passed
35 * mapping. If no reserved area can be found use small pages.
36 */
37enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
38 u64 base, u64 size)
39{
40 struct nvgpu_vm_area *vm_area;
41
42 vm_area = nvgpu_vm_area_find(vm, base);
43 if (!vm_area)
44 return gmmu_page_size_small;
45
46 return vm_area->pgsz_idx;
47}
48
49/*
50 * This is for when the address space does not support unified address spaces.
51 */
52static enum gmmu_pgsz_gk20a __get_pte_size_split_addr(struct vm_gk20a *vm,
53 u64 base, u64 size)
54{
55 if (!base) {
56 if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
57 return gmmu_page_size_big;
58 return gmmu_page_size_small;
59 } else {
60 if (base < __nv_gmmu_va_small_page_limit())
61 return gmmu_page_size_small;
62 else
63 return gmmu_page_size_big;
64 }
65}
66
67/*
68 * This determines the PTE size for a given alloc. Used by both the GVA space
69 * allocator and the mm core code so that agreement can be reached on how to
70 * map allocations.
71 *
72 * The page size of a buffer is this:
73 *
74 * o If the VM doesn't support large pages then obviously small pages
75 * must be used.
76 * o If the base address is non-zero (fixed address map):
77 * - Attempt to find a reserved memory area and use the page size
78 * based on that.
79 * - If no reserved page size is available, default to small pages.
80 * o If the base is zero:
81 * - If the size is larger than or equal to the big page size, use big
82 * pages.
83 * - Otherwise use small pages.
84 */
85enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size)
86{
87 struct gk20a *g = gk20a_from_vm(vm);
88
89 if (!vm->big_pages)
90 return gmmu_page_size_small;
91
92 if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES))
93 return __get_pte_size_split_addr(vm, base, size);
94
95 if (base)
96 return __get_pte_size_fixed_map(vm, base, size);
97
98 if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
99 return gmmu_page_size_big;
100 return gmmu_page_size_small;
101}
102
103int nvgpu_mm_suspend(struct gk20a *g)
104{
105 nvgpu_info(g, "MM suspend running...");
106
107 nvgpu_vidmem_thread_pause_sync(&g->mm);
108
109 g->ops.mm.cbc_clean(g);
110 g->ops.mm.l2_flush(g, false);
111
112 nvgpu_info(g, "MM suspend done!");
113
114 return 0;
115}
116
117u64 nvgpu_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block)
118{
119 if (g->mm.has_physical_mode)
120 return nvgpu_mem_get_phys_addr(g, inst_block);
121 else
122 return nvgpu_mem_get_addr(g, inst_block);
123}
124
125void nvgpu_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
126{
127 if (nvgpu_mem_is_valid(inst_block))
128 nvgpu_dma_free(g, inst_block);
129}
130
131static int nvgpu_alloc_sysmem_flush(struct gk20a *g)
132{
133 return nvgpu_dma_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush);
134}
135
136static void nvgpu_remove_mm_ce_support(struct mm_gk20a *mm)
137{
138 struct gk20a *g = gk20a_from_mm(mm);
139
140 if (mm->vidmem.ce_ctx_id != (u32)~0)
141 gk20a_ce_delete_context_priv(g, mm->vidmem.ce_ctx_id);
142
143 mm->vidmem.ce_ctx_id = (u32)~0;
144
145 nvgpu_vm_put(mm->ce.vm);
146}
147
148static void nvgpu_remove_mm_support(struct mm_gk20a *mm)
149{
150 struct gk20a *g = gk20a_from_mm(mm);
151
152 if (g->ops.mm.fault_info_mem_destroy)
153 g->ops.mm.fault_info_mem_destroy(g);
154
155 if (g->ops.mm.remove_bar2_vm)
156 g->ops.mm.remove_bar2_vm(g);
157
158 if (g->ops.mm.is_bar1_supported(g)) {
159 nvgpu_free_inst_block(g, &mm->bar1.inst_block);
160 nvgpu_vm_put(mm->bar1.vm);
161 }
162
163 nvgpu_free_inst_block(g, &mm->pmu.inst_block);
164 nvgpu_free_inst_block(g, &mm->hwpm.inst_block);
165 nvgpu_vm_put(mm->pmu.vm);
166 nvgpu_vm_put(mm->cde.vm);
167
168 nvgpu_semaphore_sea_destroy(g);
169 nvgpu_vidmem_destroy(g);
170 nvgpu_pd_cache_fini(g);
171}
172
173/* pmu vm, share channel_vm interfaces */
174static int nvgpu_init_system_vm(struct mm_gk20a *mm)
175{
176 int err;
177 struct gk20a *g = gk20a_from_mm(mm);
178 struct nvgpu_mem *inst_block = &mm->pmu.inst_block;
179 u32 big_page_size = g->ops.mm.get_default_big_page_size();
180 u32 low_hole, aperture_size;
181
182 /*
183 * No user region - so we will pass that as zero sized.
184 */
185 low_hole = SZ_4K * 16;
186 aperture_size = GK20A_PMU_VA_SIZE * 2;
187
188 mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
189 nvgpu_info(g, "pmu vm size = 0x%x", mm->pmu.aperture_size);
190
191 mm->pmu.vm = nvgpu_vm_init(g, big_page_size,
192 low_hole,
193 aperture_size - low_hole,
194 aperture_size,
195 true,
196 false,
197 "system");
198 if (!mm->pmu.vm)
199 return -ENOMEM;
200
201 err = g->ops.mm.alloc_inst_block(g, inst_block);
202 if (err)
203 goto clean_up_vm;
204 g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, big_page_size);
205
206 return 0;
207
208clean_up_vm:
209 nvgpu_vm_put(mm->pmu.vm);
210 return err;
211}
212
213static int nvgpu_init_hwpm(struct mm_gk20a *mm)
214{
215 int err;
216 struct gk20a *g = gk20a_from_mm(mm);
217 struct nvgpu_mem *inst_block = &mm->hwpm.inst_block;
218
219 err = g->ops.mm.alloc_inst_block(g, inst_block);
220 if (err)
221 return err;
222 g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, 0);
223
224 return 0;
225}
226
227static int nvgpu_init_cde_vm(struct mm_gk20a *mm)
228{
229 struct gk20a *g = gk20a_from_mm(mm);
230 u32 big_page_size = g->ops.mm.get_default_big_page_size();
231
232 mm->cde.vm = nvgpu_vm_init(g, big_page_size,
233 big_page_size << 10,
234 NV_MM_DEFAULT_KERNEL_SIZE,
235 NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
236 false, false, "cde");
237 if (!mm->cde.vm)
238 return -ENOMEM;
239 return 0;
240}
241
242static int nvgpu_init_ce_vm(struct mm_gk20a *mm)
243{
244 struct gk20a *g = gk20a_from_mm(mm);
245 u32 big_page_size = g->ops.mm.get_default_big_page_size();
246
247 mm->ce.vm = nvgpu_vm_init(g, big_page_size,
248 big_page_size << 10,
249 NV_MM_DEFAULT_KERNEL_SIZE,
250 NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
251 false, false, "ce");
252 if (!mm->ce.vm)
253 return -ENOMEM;
254 return 0;
255}
256
257void nvgpu_init_mm_ce_context(struct gk20a *g)
258{
259#if defined(CONFIG_GK20A_VIDMEM)
260 if (g->mm.vidmem.size && (g->mm.vidmem.ce_ctx_id == (u32)~0)) {
261 g->mm.vidmem.ce_ctx_id =
262 gk20a_ce_create_context_with_cb(g,
263 gk20a_fifo_get_fast_ce_runlist_id(g),
264 -1,
265 -1,
266 -1,
267 NULL);
268
269 if (g->mm.vidmem.ce_ctx_id == (u32)~0)
270 nvgpu_err(g,
271 "Failed to allocate CE context for vidmem page clearing support");
272 }
273#endif
274}
275
276static int nvgpu_init_mm_reset_enable_hw(struct gk20a *g)
277{
278 if (g->ops.fb.reset)
279 g->ops.fb.reset(g);
280
281 if (g->ops.clock_gating.slcg_fb_load_gating_prod)
282 g->ops.clock_gating.slcg_fb_load_gating_prod(g,
283 g->slcg_enabled);
284 if (g->ops.clock_gating.slcg_ltc_load_gating_prod)
285 g->ops.clock_gating.slcg_ltc_load_gating_prod(g,
286 g->slcg_enabled);
287 if (g->ops.clock_gating.blcg_fb_load_gating_prod)
288 g->ops.clock_gating.blcg_fb_load_gating_prod(g,
289 g->blcg_enabled);
290 if (g->ops.clock_gating.blcg_ltc_load_gating_prod)
291 g->ops.clock_gating.blcg_ltc_load_gating_prod(g,
292 g->blcg_enabled);
293
294 if (g->ops.fb.init_fs_state)
295 g->ops.fb.init_fs_state(g);
296
297 return 0;
298}
299
300static int nvgpu_init_bar1_vm(struct mm_gk20a *mm)
301{
302 int err;
303 struct gk20a *g = gk20a_from_mm(mm);
304 struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
305 u32 big_page_size = g->ops.mm.get_default_big_page_size();
306
307 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
308 nvgpu_info(g, "bar1 vm size = 0x%x", mm->bar1.aperture_size);
309 mm->bar1.vm = nvgpu_vm_init(g,
310 big_page_size,
311 SZ_4K,
312 mm->bar1.aperture_size - SZ_4K,
313 mm->bar1.aperture_size,
314 true, false,
315 "bar1");
316 if (!mm->bar1.vm)
317 return -ENOMEM;
318
319 err = g->ops.mm.alloc_inst_block(g, inst_block);
320 if (err)
321 goto clean_up_vm;
322 g->ops.mm.init_inst_block(inst_block, mm->bar1.vm, big_page_size);
323
324 return 0;
325
326clean_up_vm:
327 nvgpu_vm_put(mm->bar1.vm);
328 return err;
329}
330
331static int nvgpu_init_mm_setup_sw(struct gk20a *g)
332{
333 struct mm_gk20a *mm = &g->mm;
334 int err;
335
336 if (mm->sw_ready) {
337 nvgpu_info(g, "skip init");
338 return 0;
339 }
340
341 mm->g = g;
342 nvgpu_mutex_init(&mm->l2_op_lock);
343
344 /*TBD: make channel vm size configurable */
345 mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE -
346 NV_MM_DEFAULT_KERNEL_SIZE;
347 mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
348
349 nvgpu_info(g, "channel vm size: user %dMB kernel %dMB",
350 (int)(mm->channel.user_size >> 20),
351 (int)(mm->channel.kernel_size >> 20));
352
353 nvgpu_init_pramin(mm);
354
355 mm->vidmem.ce_ctx_id = (u32)~0;
356
357 err = nvgpu_vidmem_init(mm);
358 if (err)
359 return err;
360
361 /*
362 * this requires fixed allocations in vidmem which must be
363 * allocated before all other buffers
364 */
365 if (g->ops.pmu.alloc_blob_space
366 && !nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
367 err = g->ops.pmu.alloc_blob_space(g, 0, &g->acr.ucode_blob);
368 if (err)
369 return err;
370 }
371
372 err = nvgpu_alloc_sysmem_flush(g);
373 if (err)
374 return err;
375
376 if (g->ops.mm.is_bar1_supported(g)) {
377 err = nvgpu_init_bar1_vm(mm);
378 if (err)
379 return err;
380 }
381 if (g->ops.mm.init_bar2_vm) {
382 err = g->ops.mm.init_bar2_vm(g);
383 if (err)
384 return err;
385 }
386 err = nvgpu_init_system_vm(mm);
387 if (err)
388 return err;
389
390 err = nvgpu_init_hwpm(mm);
391 if (err)
392 return err;
393
394 err = nvgpu_init_cde_vm(mm);
395 if (err)
396 return err;
397
398 err = nvgpu_init_ce_vm(mm);
399 if (err)
400 return err;
401
402 mm->remove_support = nvgpu_remove_mm_support;
403 mm->remove_ce_support = nvgpu_remove_mm_ce_support;
404
405 mm->sw_ready = true;
406
407 return 0;
408}
409
410int nvgpu_init_mm_support(struct gk20a *g)
411{
412 u32 err;
413
414 err = nvgpu_init_mm_reset_enable_hw(g);
415 if (err)
416 return err;
417
418 err = nvgpu_init_mm_setup_sw(g);
419 if (err)
420 return err;
421
422 if (g->ops.mm.init_mm_setup_hw)
423 err = g->ops.mm.init_mm_setup_hw(g);
424
425 return err;
426}