summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c441
1 files changed, 18 insertions, 423 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index d96fa4e1..a17d6bb6 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * GK20A memory management
3 *
4 * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
5 * 3 *
6 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -24,6 +22,7 @@
24 22
25#include <trace/events/gk20a.h> 23#include <trace/events/gk20a.h>
26 24
25#include <nvgpu/mm.h>
27#include <nvgpu/vm.h> 26#include <nvgpu/vm.h>
28#include <nvgpu/vm_area.h> 27#include <nvgpu/vm_area.h>
29#include <nvgpu/dma.h> 28#include <nvgpu/dma.h>
@@ -88,161 +87,6 @@
88 * 87 *
89 */ 88 */
90 89
91static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm);
92static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm);
93static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm);
94static int __must_check gk20a_init_cde_vm(struct mm_gk20a *mm);
95static int __must_check gk20a_init_ce_vm(struct mm_gk20a *mm);
96
97static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
98{
99 gk20a_dbg_fn("");
100 if (g->ops.fb.reset)
101 g->ops.fb.reset(g);
102
103 if (g->ops.clock_gating.slcg_fb_load_gating_prod)
104 g->ops.clock_gating.slcg_fb_load_gating_prod(g,
105 g->slcg_enabled);
106 if (g->ops.clock_gating.slcg_ltc_load_gating_prod)
107 g->ops.clock_gating.slcg_ltc_load_gating_prod(g,
108 g->slcg_enabled);
109 if (g->ops.clock_gating.blcg_fb_load_gating_prod)
110 g->ops.clock_gating.blcg_fb_load_gating_prod(g,
111 g->blcg_enabled);
112 if (g->ops.clock_gating.blcg_ltc_load_gating_prod)
113 g->ops.clock_gating.blcg_ltc_load_gating_prod(g,
114 g->blcg_enabled);
115
116 if (g->ops.fb.init_fs_state)
117 g->ops.fb.init_fs_state(g);
118
119 return 0;
120}
121
122static void gk20a_remove_mm_ce_support(struct mm_gk20a *mm)
123{
124 struct gk20a *g = gk20a_from_mm(mm);
125
126 if (mm->vidmem.ce_ctx_id != (u32)~0)
127 gk20a_ce_delete_context_priv(g, mm->vidmem.ce_ctx_id);
128
129 mm->vidmem.ce_ctx_id = (u32)~0;
130
131 nvgpu_vm_put(mm->ce.vm);
132}
133
134static void gk20a_remove_mm_support(struct mm_gk20a *mm)
135{
136 struct gk20a *g = gk20a_from_mm(mm);
137
138 if (g->ops.mm.fault_info_mem_destroy)
139 g->ops.mm.fault_info_mem_destroy(g);
140
141 if (g->ops.mm.remove_bar2_vm)
142 g->ops.mm.remove_bar2_vm(g);
143
144 if (g->ops.mm.is_bar1_supported(g)) {
145 gk20a_free_inst_block(g, &mm->bar1.inst_block);
146 nvgpu_vm_put(mm->bar1.vm);
147 }
148
149 gk20a_free_inst_block(g, &mm->pmu.inst_block);
150 gk20a_free_inst_block(g, &mm->hwpm.inst_block);
151 nvgpu_vm_put(mm->pmu.vm);
152 nvgpu_vm_put(mm->cde.vm);
153
154 nvgpu_semaphore_sea_destroy(g);
155 nvgpu_vidmem_destroy(g);
156 nvgpu_pd_cache_fini(g);
157}
158
159static int gk20a_alloc_sysmem_flush(struct gk20a *g)
160{
161 return nvgpu_dma_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush);
162}
163
164int gk20a_init_mm_setup_sw(struct gk20a *g)
165{
166 struct mm_gk20a *mm = &g->mm;
167 int err;
168
169 gk20a_dbg_fn("");
170
171 if (mm->sw_ready) {
172 gk20a_dbg_fn("skip init");
173 return 0;
174 }
175
176 mm->g = g;
177 nvgpu_mutex_init(&mm->l2_op_lock);
178
179 /*TBD: make channel vm size configurable */
180 mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE -
181 NV_MM_DEFAULT_KERNEL_SIZE;
182 mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
183
184 gk20a_dbg_info("channel vm size: user %dMB kernel %dMB",
185 (int)(mm->channel.user_size >> 20),
186 (int)(mm->channel.kernel_size >> 20));
187
188 nvgpu_init_pramin(mm);
189
190 mm->vidmem.ce_ctx_id = (u32)~0;
191
192 err = nvgpu_vidmem_init(mm);
193 if (err)
194 return err;
195
196 /*
197 * this requires fixed allocations in vidmem which must be
198 * allocated before all other buffers
199 */
200 if (g->ops.pmu.alloc_blob_space
201 && !nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
202 err = g->ops.pmu.alloc_blob_space(g, 0, &g->acr.ucode_blob);
203 if (err)
204 return err;
205 }
206
207 err = gk20a_alloc_sysmem_flush(g);
208 if (err)
209 return err;
210
211 if (g->ops.mm.is_bar1_supported(g)) {
212 err = gk20a_init_bar1_vm(mm);
213 if (err)
214 return err;
215 }
216 if (g->ops.mm.init_bar2_vm) {
217 err = g->ops.mm.init_bar2_vm(g);
218 if (err)
219 return err;
220 }
221 err = gk20a_init_system_vm(mm);
222 if (err)
223 return err;
224
225 err = gk20a_init_hwpm(mm);
226 if (err)
227 return err;
228
229 err = gk20a_init_cde_vm(mm);
230 if (err)
231 return err;
232
233 err = gk20a_init_ce_vm(mm);
234 if (err)
235 return err;
236
237 mm->remove_support = gk20a_remove_mm_support;
238 mm->remove_ce_support = gk20a_remove_mm_ce_support;
239
240 mm->sw_ready = true;
241
242 gk20a_dbg_fn("done");
243 return 0;
244}
245
246/* make sure gk20a_init_mm_support is called before */ 90/* make sure gk20a_init_mm_support is called before */
247int gk20a_init_mm_setup_hw(struct gk20a *g) 91int gk20a_init_mm_setup_hw(struct gk20a *g)
248{ 92{
@@ -274,43 +118,6 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
274 return 0; 118 return 0;
275} 119}
276 120
277int gk20a_init_mm_support(struct gk20a *g)
278{
279 u32 err;
280
281 err = gk20a_init_mm_reset_enable_hw(g);
282 if (err)
283 return err;
284
285 err = gk20a_init_mm_setup_sw(g);
286 if (err)
287 return err;
288
289 if (g->ops.mm.init_mm_setup_hw)
290 err = g->ops.mm.init_mm_setup_hw(g);
291
292 return err;
293}
294
295void gk20a_init_mm_ce_context(struct gk20a *g)
296{
297#if defined(CONFIG_GK20A_VIDMEM)
298 if (g->mm.vidmem.size && (g->mm.vidmem.ce_ctx_id == (u32)~0)) {
299 g->mm.vidmem.ce_ctx_id =
300 gk20a_ce_create_context_with_cb(g,
301 gk20a_fifo_get_fast_ce_runlist_id(g),
302 -1,
303 -1,
304 -1,
305 NULL);
306
307 if (g->mm.vidmem.ce_ctx_id == (u32)~0)
308 nvgpu_err(g,
309 "Failed to allocate CE context for vidmem page clearing support");
310 }
311#endif
312}
313
314int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm) 121int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm)
315{ 122{
316 return vm->mmu_levels[0].lo_bit[0]; 123 return vm->mmu_levels[0].lo_bit[0];
@@ -505,76 +312,6 @@ const struct gk20a_mmu_level gk20a_mm_levels_128k[] = {
505 {.update_entry = NULL} 312 {.update_entry = NULL}
506}; 313};
507 314
508/*
509 * Attempt to find a reserved memory area to determine PTE size for the passed
510 * mapping. If no reserved area can be found use small pages.
511 */
512enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
513 u64 base, u64 size)
514{
515 struct nvgpu_vm_area *vm_area;
516
517 vm_area = nvgpu_vm_area_find(vm, base);
518 if (!vm_area)
519 return gmmu_page_size_small;
520
521 return vm_area->pgsz_idx;
522}
523
524/*
525 * This is for when the address space does not support unified address spaces.
526 */
527static enum gmmu_pgsz_gk20a __get_pte_size_split_addr(struct vm_gk20a *vm,
528 u64 base, u64 size)
529{
530 if (!base) {
531 if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
532 return gmmu_page_size_big;
533 return gmmu_page_size_small;
534 } else {
535 if (base < __nv_gmmu_va_small_page_limit())
536 return gmmu_page_size_small;
537 else
538 return gmmu_page_size_big;
539 }
540}
541
542/*
543 * This determines the PTE size for a given alloc. Used by both the GVA space
544 * allocator and the mm core code so that agreement can be reached on how to
545 * map allocations.
546 *
547 * The page size of a buffer is this:
548 *
549 * o If the VM doesn't support large pages then obviously small pages
550 * must be used.
551 * o If the base address is non-zero (fixed address map):
552 * - Attempt to find a reserved memory area and use the page size
553 * based on that.
554 * - If no reserved page size is available, default to small pages.
555 * o If the base is zero:
556 * - If the size is larger than or equal to the big page size, use big
557 * pages.
558 * - Otherwise use small pages.
559 */
560enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size)
561{
562 struct gk20a *g = gk20a_from_vm(vm);
563
564 if (!vm->big_pages)
565 return gmmu_page_size_small;
566
567 if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES))
568 return __get_pte_size_split_addr(vm, base, size);
569
570 if (base)
571 return __get_pte_size_fixed_map(vm, base, size);
572
573 if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
574 return gmmu_page_size_big;
575 return gmmu_page_size_small;
576}
577
578int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch) 315int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch)
579{ 316{
580 int err = 0; 317 int err = 0;
@@ -599,151 +336,6 @@ int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
599 return __gk20a_vm_bind_channel(as_share->vm, ch); 336 return __gk20a_vm_bind_channel(as_share->vm, ch);
600} 337}
601 338
602int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
603{
604 int err;
605
606 gk20a_dbg_fn("");
607
608 err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block);
609 if (err) {
610 nvgpu_err(g, "%s: memory allocation failed", __func__);
611 return err;
612 }
613
614 gk20a_dbg_fn("done");
615 return 0;
616}
617
618void gk20a_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
619{
620 if (inst_block->size)
621 nvgpu_dma_free(g, inst_block);
622}
623
624u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block)
625{
626 if (g->mm.has_physical_mode)
627 return nvgpu_mem_get_phys_addr(g, inst_block);
628 else
629 return nvgpu_mem_get_addr(g, inst_block);
630}
631
632static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
633{
634 int err;
635 struct gk20a *g = gk20a_from_mm(mm);
636 struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
637 u32 big_page_size = g->ops.mm.get_default_big_page_size();
638
639 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
640 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
641 mm->bar1.vm = nvgpu_vm_init(g,
642 big_page_size,
643 SZ_4K,
644 mm->bar1.aperture_size - SZ_4K,
645 mm->bar1.aperture_size,
646 true, false,
647 "bar1");
648 if (!mm->bar1.vm)
649 return -ENOMEM;
650
651 err = gk20a_alloc_inst_block(g, inst_block);
652 if (err)
653 goto clean_up_vm;
654 g->ops.mm.init_inst_block(inst_block, mm->bar1.vm, big_page_size);
655
656 return 0;
657
658clean_up_vm:
659 nvgpu_vm_put(mm->bar1.vm);
660 return err;
661}
662
663/* pmu vm, share channel_vm interfaces */
664static int gk20a_init_system_vm(struct mm_gk20a *mm)
665{
666 int err;
667 struct gk20a *g = gk20a_from_mm(mm);
668 struct nvgpu_mem *inst_block = &mm->pmu.inst_block;
669 u32 big_page_size = g->ops.mm.get_default_big_page_size();
670 u32 low_hole, aperture_size;
671
672 /*
673 * No user region - so we will pass that as zero sized.
674 */
675 low_hole = SZ_4K * 16;
676 aperture_size = GK20A_PMU_VA_SIZE * 2;
677
678 mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
679 gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
680
681 mm->pmu.vm = nvgpu_vm_init(g, big_page_size,
682 low_hole,
683 aperture_size - low_hole,
684 aperture_size,
685 true,
686 false,
687 "system");
688 if (!mm->pmu.vm)
689 return -ENOMEM;
690
691 err = gk20a_alloc_inst_block(g, inst_block);
692 if (err)
693 goto clean_up_vm;
694 g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, big_page_size);
695
696 return 0;
697
698clean_up_vm:
699 nvgpu_vm_put(mm->pmu.vm);
700 return err;
701}
702
703static int gk20a_init_hwpm(struct mm_gk20a *mm)
704{
705 int err;
706 struct gk20a *g = gk20a_from_mm(mm);
707 struct nvgpu_mem *inst_block = &mm->hwpm.inst_block;
708
709 err = gk20a_alloc_inst_block(g, inst_block);
710 if (err)
711 return err;
712 g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, 0);
713
714 return 0;
715}
716
717static int gk20a_init_cde_vm(struct mm_gk20a *mm)
718{
719 struct gk20a *g = gk20a_from_mm(mm);
720 u32 big_page_size = g->ops.mm.get_default_big_page_size();
721
722 mm->cde.vm = nvgpu_vm_init(g, big_page_size,
723 big_page_size << 10,
724 NV_MM_DEFAULT_KERNEL_SIZE,
725 NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
726 false, false, "cde");
727 if (!mm->cde.vm)
728 return -ENOMEM;
729 return 0;
730}
731
732static int gk20a_init_ce_vm(struct mm_gk20a *mm)
733{
734 struct gk20a *g = gk20a_from_mm(mm);
735 u32 big_page_size = g->ops.mm.get_default_big_page_size();
736
737 mm->ce.vm = nvgpu_vm_init(g, big_page_size,
738 big_page_size << 10,
739 NV_MM_DEFAULT_KERNEL_SIZE,
740 NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
741 false, false, "ce");
742 if (!mm->ce.vm)
743 return -ENOMEM;
744 return 0;
745}
746
747void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block, 339void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
748 struct vm_gk20a *vm) 340 struct vm_gk20a *vm)
749{ 341{
@@ -770,7 +362,7 @@ void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
770 struct gk20a *g = gk20a_from_vm(vm); 362 struct gk20a *g = gk20a_from_vm(vm);
771 363
772 gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", 364 gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p",
773 gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va); 365 nvgpu_inst_block_addr(g, inst_block), inst_block->cpu_va);
774 366
775 g->ops.mm.init_pdb(g, inst_block, vm); 367 g->ops.mm.init_pdb(g, inst_block, vm);
776 368
@@ -784,6 +376,22 @@ void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
784 g->ops.mm.set_big_page_size(g, inst_block, big_page_size); 376 g->ops.mm.set_big_page_size(g, inst_block, big_page_size);
785} 377}
786 378
379int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
380{
381 int err;
382
383 gk20a_dbg_fn("");
384
385 err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block);
386 if (err) {
387 nvgpu_err(g, "%s: memory allocation failed", __func__);
388 return err;
389 }
390
391 gk20a_dbg_fn("done");
392 return 0;
393}
394
787int gk20a_mm_fb_flush(struct gk20a *g) 395int gk20a_mm_fb_flush(struct gk20a *g)
788{ 396{
789 struct mm_gk20a *mm = &g->mm; 397 struct mm_gk20a *mm = &g->mm;
@@ -992,19 +600,6 @@ hw_was_off:
992 gk20a_idle_nosuspend(g); 600 gk20a_idle_nosuspend(g);
993} 601}
994 602
995int gk20a_mm_suspend(struct gk20a *g)
996{
997 gk20a_dbg_fn("");
998
999 nvgpu_vidmem_thread_pause_sync(&g->mm);
1000
1001 g->ops.mm.cbc_clean(g);
1002 g->ops.mm.l2_flush(g, false);
1003
1004 gk20a_dbg_fn("done");
1005 return 0;
1006}
1007
1008u32 gk20a_mm_get_iommu_bit(struct gk20a *g) 603u32 gk20a_mm_get_iommu_bit(struct gk20a *g)
1009{ 604{
1010 return 34; 605 return 34;