summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-10-06 14:30:29 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-10-24 18:16:49 -0400
commit2a285d0607a20694476399f5719e74dbc26fcd58 (patch)
treeef0246e3ca7b933ce3ea4c74061f61cc2e394b8b /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent748331cbab1c7af26ab1fbae5ead2cdaff22806a (diff)
gpu: nvgpu: Cleanup generic MM code in gk20a/mm_gk20a.c
Move much of the remaining generic MM code to a new common location: common/mm/mm.c. Also add a corresponding <nvgpu/mm.h> header. This mostly consists of init and cleanup code to handle the common MM data structures like the VIDMEM code, address spaces for various engines, etc. A few more indepth changes were made as well. 1. alloc_inst_block() has been added to the MM HAL. This used to be defined directly in the gk20a code but it used a register. As a result, if this register hypothetically changes in the future, it would need to become a HAL anyway. This path preempts that and for now just defines all HALs to use the gk20a version. 2. Rename as much as possible: global functions are, for the most part, prepended with nvgpu (there are a few exceptions which I have yet to decide what to do with). Functions that are static are renamed to be as consistent with their functionality as possible since in some cases function effect and function name have diverged. JIRA NVGPU-30 Change-Id: Ic948f1ecc2f7976eba4bb7169a44b7226bb7c0b5 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1574499 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c441
1 files changed, 18 insertions, 423 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index d96fa4e1..a17d6bb6 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * GK20A memory management
3 *
4 * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
5 * 3 *
6 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -24,6 +22,7 @@
24 22
25#include <trace/events/gk20a.h> 23#include <trace/events/gk20a.h>
26 24
25#include <nvgpu/mm.h>
27#include <nvgpu/vm.h> 26#include <nvgpu/vm.h>
28#include <nvgpu/vm_area.h> 27#include <nvgpu/vm_area.h>
29#include <nvgpu/dma.h> 28#include <nvgpu/dma.h>
@@ -88,161 +87,6 @@
88 * 87 *
89 */ 88 */
90 89
91static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm);
92static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm);
93static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm);
94static int __must_check gk20a_init_cde_vm(struct mm_gk20a *mm);
95static int __must_check gk20a_init_ce_vm(struct mm_gk20a *mm);
96
97static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
98{
99 gk20a_dbg_fn("");
100 if (g->ops.fb.reset)
101 g->ops.fb.reset(g);
102
103 if (g->ops.clock_gating.slcg_fb_load_gating_prod)
104 g->ops.clock_gating.slcg_fb_load_gating_prod(g,
105 g->slcg_enabled);
106 if (g->ops.clock_gating.slcg_ltc_load_gating_prod)
107 g->ops.clock_gating.slcg_ltc_load_gating_prod(g,
108 g->slcg_enabled);
109 if (g->ops.clock_gating.blcg_fb_load_gating_prod)
110 g->ops.clock_gating.blcg_fb_load_gating_prod(g,
111 g->blcg_enabled);
112 if (g->ops.clock_gating.blcg_ltc_load_gating_prod)
113 g->ops.clock_gating.blcg_ltc_load_gating_prod(g,
114 g->blcg_enabled);
115
116 if (g->ops.fb.init_fs_state)
117 g->ops.fb.init_fs_state(g);
118
119 return 0;
120}
121
122static void gk20a_remove_mm_ce_support(struct mm_gk20a *mm)
123{
124 struct gk20a *g = gk20a_from_mm(mm);
125
126 if (mm->vidmem.ce_ctx_id != (u32)~0)
127 gk20a_ce_delete_context_priv(g, mm->vidmem.ce_ctx_id);
128
129 mm->vidmem.ce_ctx_id = (u32)~0;
130
131 nvgpu_vm_put(mm->ce.vm);
132}
133
134static void gk20a_remove_mm_support(struct mm_gk20a *mm)
135{
136 struct gk20a *g = gk20a_from_mm(mm);
137
138 if (g->ops.mm.fault_info_mem_destroy)
139 g->ops.mm.fault_info_mem_destroy(g);
140
141 if (g->ops.mm.remove_bar2_vm)
142 g->ops.mm.remove_bar2_vm(g);
143
144 if (g->ops.mm.is_bar1_supported(g)) {
145 gk20a_free_inst_block(g, &mm->bar1.inst_block);
146 nvgpu_vm_put(mm->bar1.vm);
147 }
148
149 gk20a_free_inst_block(g, &mm->pmu.inst_block);
150 gk20a_free_inst_block(g, &mm->hwpm.inst_block);
151 nvgpu_vm_put(mm->pmu.vm);
152 nvgpu_vm_put(mm->cde.vm);
153
154 nvgpu_semaphore_sea_destroy(g);
155 nvgpu_vidmem_destroy(g);
156 nvgpu_pd_cache_fini(g);
157}
158
159static int gk20a_alloc_sysmem_flush(struct gk20a *g)
160{
161 return nvgpu_dma_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush);
162}
163
164int gk20a_init_mm_setup_sw(struct gk20a *g)
165{
166 struct mm_gk20a *mm = &g->mm;
167 int err;
168
169 gk20a_dbg_fn("");
170
171 if (mm->sw_ready) {
172 gk20a_dbg_fn("skip init");
173 return 0;
174 }
175
176 mm->g = g;
177 nvgpu_mutex_init(&mm->l2_op_lock);
178
179 /*TBD: make channel vm size configurable */
180 mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE -
181 NV_MM_DEFAULT_KERNEL_SIZE;
182 mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
183
184 gk20a_dbg_info("channel vm size: user %dMB kernel %dMB",
185 (int)(mm->channel.user_size >> 20),
186 (int)(mm->channel.kernel_size >> 20));
187
188 nvgpu_init_pramin(mm);
189
190 mm->vidmem.ce_ctx_id = (u32)~0;
191
192 err = nvgpu_vidmem_init(mm);
193 if (err)
194 return err;
195
196 /*
197 * this requires fixed allocations in vidmem which must be
198 * allocated before all other buffers
199 */
200 if (g->ops.pmu.alloc_blob_space
201 && !nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
202 err = g->ops.pmu.alloc_blob_space(g, 0, &g->acr.ucode_blob);
203 if (err)
204 return err;
205 }
206
207 err = gk20a_alloc_sysmem_flush(g);
208 if (err)
209 return err;
210
211 if (g->ops.mm.is_bar1_supported(g)) {
212 err = gk20a_init_bar1_vm(mm);
213 if (err)
214 return err;
215 }
216 if (g->ops.mm.init_bar2_vm) {
217 err = g->ops.mm.init_bar2_vm(g);
218 if (err)
219 return err;
220 }
221 err = gk20a_init_system_vm(mm);
222 if (err)
223 return err;
224
225 err = gk20a_init_hwpm(mm);
226 if (err)
227 return err;
228
229 err = gk20a_init_cde_vm(mm);
230 if (err)
231 return err;
232
233 err = gk20a_init_ce_vm(mm);
234 if (err)
235 return err;
236
237 mm->remove_support = gk20a_remove_mm_support;
238 mm->remove_ce_support = gk20a_remove_mm_ce_support;
239
240 mm->sw_ready = true;
241
242 gk20a_dbg_fn("done");
243 return 0;
244}
245
246/* make sure gk20a_init_mm_support is called before */ 90/* make sure gk20a_init_mm_support is called before */
247int gk20a_init_mm_setup_hw(struct gk20a *g) 91int gk20a_init_mm_setup_hw(struct gk20a *g)
248{ 92{
@@ -274,43 +118,6 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
274 return 0; 118 return 0;
275} 119}
276 120
277int gk20a_init_mm_support(struct gk20a *g)
278{
279 u32 err;
280
281 err = gk20a_init_mm_reset_enable_hw(g);
282 if (err)
283 return err;
284
285 err = gk20a_init_mm_setup_sw(g);
286 if (err)
287 return err;
288
289 if (g->ops.mm.init_mm_setup_hw)
290 err = g->ops.mm.init_mm_setup_hw(g);
291
292 return err;
293}
294
295void gk20a_init_mm_ce_context(struct gk20a *g)
296{
297#if defined(CONFIG_GK20A_VIDMEM)
298 if (g->mm.vidmem.size && (g->mm.vidmem.ce_ctx_id == (u32)~0)) {
299 g->mm.vidmem.ce_ctx_id =
300 gk20a_ce_create_context_with_cb(g,
301 gk20a_fifo_get_fast_ce_runlist_id(g),
302 -1,
303 -1,
304 -1,
305 NULL);
306
307 if (g->mm.vidmem.ce_ctx_id == (u32)~0)
308 nvgpu_err(g,
309 "Failed to allocate CE context for vidmem page clearing support");
310 }
311#endif
312}
313
314int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm) 121int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm)
315{ 122{
316 return vm->mmu_levels[0].lo_bit[0]; 123 return vm->mmu_levels[0].lo_bit[0];
@@ -505,76 +312,6 @@ const struct gk20a_mmu_level gk20a_mm_levels_128k[] = {
505 {.update_entry = NULL} 312 {.update_entry = NULL}
506}; 313};
507 314
508/*
509 * Attempt to find a reserved memory area to determine PTE size for the passed
510 * mapping. If no reserved area can be found use small pages.
511 */
512enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
513 u64 base, u64 size)
514{
515 struct nvgpu_vm_area *vm_area;
516
517 vm_area = nvgpu_vm_area_find(vm, base);
518 if (!vm_area)
519 return gmmu_page_size_small;
520
521 return vm_area->pgsz_idx;
522}
523
524/*
525 * This is for when the address space does not support unified address spaces.
526 */
527static enum gmmu_pgsz_gk20a __get_pte_size_split_addr(struct vm_gk20a *vm,
528 u64 base, u64 size)
529{
530 if (!base) {
531 if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
532 return gmmu_page_size_big;
533 return gmmu_page_size_small;
534 } else {
535 if (base < __nv_gmmu_va_small_page_limit())
536 return gmmu_page_size_small;
537 else
538 return gmmu_page_size_big;
539 }
540}
541
542/*
543 * This determines the PTE size for a given alloc. Used by both the GVA space
544 * allocator and the mm core code so that agreement can be reached on how to
545 * map allocations.
546 *
547 * The page size of a buffer is this:
548 *
549 * o If the VM doesn't support large pages then obviously small pages
550 * must be used.
551 * o If the base address is non-zero (fixed address map):
552 * - Attempt to find a reserved memory area and use the page size
553 * based on that.
554 * - If no reserved page size is available, default to small pages.
555 * o If the base is zero:
556 * - If the size is larger than or equal to the big page size, use big
557 * pages.
558 * - Otherwise use small pages.
559 */
560enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size)
561{
562 struct gk20a *g = gk20a_from_vm(vm);
563
564 if (!vm->big_pages)
565 return gmmu_page_size_small;
566
567 if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES))
568 return __get_pte_size_split_addr(vm, base, size);
569
570 if (base)
571 return __get_pte_size_fixed_map(vm, base, size);
572
573 if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
574 return gmmu_page_size_big;
575 return gmmu_page_size_small;
576}
577
578int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch) 315int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch)
579{ 316{
580 int err = 0; 317 int err = 0;
@@ -599,151 +336,6 @@ int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
599 return __gk20a_vm_bind_channel(as_share->vm, ch); 336 return __gk20a_vm_bind_channel(as_share->vm, ch);
600} 337}
601 338
602int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
603{
604 int err;
605
606 gk20a_dbg_fn("");
607
608 err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block);
609 if (err) {
610 nvgpu_err(g, "%s: memory allocation failed", __func__);
611 return err;
612 }
613
614 gk20a_dbg_fn("done");
615 return 0;
616}
617
618void gk20a_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
619{
620 if (inst_block->size)
621 nvgpu_dma_free(g, inst_block);
622}
623
624u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block)
625{
626 if (g->mm.has_physical_mode)
627 return nvgpu_mem_get_phys_addr(g, inst_block);
628 else
629 return nvgpu_mem_get_addr(g, inst_block);
630}
631
632static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
633{
634 int err;
635 struct gk20a *g = gk20a_from_mm(mm);
636 struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
637 u32 big_page_size = g->ops.mm.get_default_big_page_size();
638
639 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
640 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
641 mm->bar1.vm = nvgpu_vm_init(g,
642 big_page_size,
643 SZ_4K,
644 mm->bar1.aperture_size - SZ_4K,
645 mm->bar1.aperture_size,
646 true, false,
647 "bar1");
648 if (!mm->bar1.vm)
649 return -ENOMEM;
650
651 err = gk20a_alloc_inst_block(g, inst_block);
652 if (err)
653 goto clean_up_vm;
654 g->ops.mm.init_inst_block(inst_block, mm->bar1.vm, big_page_size);
655
656 return 0;
657
658clean_up_vm:
659 nvgpu_vm_put(mm->bar1.vm);
660 return err;
661}
662
663/* pmu vm, share channel_vm interfaces */
664static int gk20a_init_system_vm(struct mm_gk20a *mm)
665{
666 int err;
667 struct gk20a *g = gk20a_from_mm(mm);
668 struct nvgpu_mem *inst_block = &mm->pmu.inst_block;
669 u32 big_page_size = g->ops.mm.get_default_big_page_size();
670 u32 low_hole, aperture_size;
671
672 /*
673 * No user region - so we will pass that as zero sized.
674 */
675 low_hole = SZ_4K * 16;
676 aperture_size = GK20A_PMU_VA_SIZE * 2;
677
678 mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
679 gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
680
681 mm->pmu.vm = nvgpu_vm_init(g, big_page_size,
682 low_hole,
683 aperture_size - low_hole,
684 aperture_size,
685 true,
686 false,
687 "system");
688 if (!mm->pmu.vm)
689 return -ENOMEM;
690
691 err = gk20a_alloc_inst_block(g, inst_block);
692 if (err)
693 goto clean_up_vm;
694 g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, big_page_size);
695
696 return 0;
697
698clean_up_vm:
699 nvgpu_vm_put(mm->pmu.vm);
700 return err;
701}
702
703static int gk20a_init_hwpm(struct mm_gk20a *mm)
704{
705 int err;
706 struct gk20a *g = gk20a_from_mm(mm);
707 struct nvgpu_mem *inst_block = &mm->hwpm.inst_block;
708
709 err = gk20a_alloc_inst_block(g, inst_block);
710 if (err)
711 return err;
712 g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, 0);
713
714 return 0;
715}
716
717static int gk20a_init_cde_vm(struct mm_gk20a *mm)
718{
719 struct gk20a *g = gk20a_from_mm(mm);
720 u32 big_page_size = g->ops.mm.get_default_big_page_size();
721
722 mm->cde.vm = nvgpu_vm_init(g, big_page_size,
723 big_page_size << 10,
724 NV_MM_DEFAULT_KERNEL_SIZE,
725 NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
726 false, false, "cde");
727 if (!mm->cde.vm)
728 return -ENOMEM;
729 return 0;
730}
731
732static int gk20a_init_ce_vm(struct mm_gk20a *mm)
733{
734 struct gk20a *g = gk20a_from_mm(mm);
735 u32 big_page_size = g->ops.mm.get_default_big_page_size();
736
737 mm->ce.vm = nvgpu_vm_init(g, big_page_size,
738 big_page_size << 10,
739 NV_MM_DEFAULT_KERNEL_SIZE,
740 NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
741 false, false, "ce");
742 if (!mm->ce.vm)
743 return -ENOMEM;
744 return 0;
745}
746
747void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block, 339void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
748 struct vm_gk20a *vm) 340 struct vm_gk20a *vm)
749{ 341{
@@ -770,7 +362,7 @@ void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
770 struct gk20a *g = gk20a_from_vm(vm); 362 struct gk20a *g = gk20a_from_vm(vm);
771 363
772 gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", 364 gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p",
773 gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va); 365 nvgpu_inst_block_addr(g, inst_block), inst_block->cpu_va);
774 366
775 g->ops.mm.init_pdb(g, inst_block, vm); 367 g->ops.mm.init_pdb(g, inst_block, vm);
776 368
@@ -784,6 +376,22 @@ void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
784 g->ops.mm.set_big_page_size(g, inst_block, big_page_size); 376 g->ops.mm.set_big_page_size(g, inst_block, big_page_size);
785} 377}
786 378
379int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
380{
381 int err;
382
383 gk20a_dbg_fn("");
384
385 err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block);
386 if (err) {
387 nvgpu_err(g, "%s: memory allocation failed", __func__);
388 return err;
389 }
390
391 gk20a_dbg_fn("done");
392 return 0;
393}
394
787int gk20a_mm_fb_flush(struct gk20a *g) 395int gk20a_mm_fb_flush(struct gk20a *g)
788{ 396{
789 struct mm_gk20a *mm = &g->mm; 397 struct mm_gk20a *mm = &g->mm;
@@ -992,19 +600,6 @@ hw_was_off:
992 gk20a_idle_nosuspend(g); 600 gk20a_idle_nosuspend(g);
993} 601}
994 602
995int gk20a_mm_suspend(struct gk20a *g)
996{
997 gk20a_dbg_fn("");
998
999 nvgpu_vidmem_thread_pause_sync(&g->mm);
1000
1001 g->ops.mm.cbc_clean(g);
1002 g->ops.mm.l2_flush(g, false);
1003
1004 gk20a_dbg_fn("done");
1005 return 0;
1006}
1007
1008u32 gk20a_mm_get_iommu_bit(struct gk20a *g) 603u32 gk20a_mm_get_iommu_bit(struct gk20a *g)
1009{ 604{
1010 return 34; 605 return 34;