diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2014-10-16 08:15:11 -0400 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-03-18 15:11:46 -0400 |
commit | 2eb6dcb4694c8b83e22c50d9fd4d3fdd85b93c46 (patch) | |
tree | 0a2d10c9873b81fd6a6821959874d4345cc6bfad /drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |
parent | ecc6f27fd13e7560d124faf67d114b93d47b73de (diff) |
gpu: nvgpu: Implement 64k large page support
Implement support for 64kB large page size. Add an API to create an
address space via IOCTL so that we can accept flags, and assign one
flag for enabling 64kB large page size.
Also adds APIs to set per-context large page size. This is possible
only on Maxwell, so return error if caller tries to set large page
size on Kepler.
Default large page size is still 128kB.
Change-Id: I20b51c8f6d4a984acae8411ace3de9000c78e82f
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 41 |
1 files changed, 24 insertions, 17 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 70f4294b..e7fdb336 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -327,17 +327,7 @@ static int gk20a_init_mm_setup_hw(struct gk20a *g) | |||
327 | 327 | ||
328 | gk20a_dbg_fn(""); | 328 | gk20a_dbg_fn(""); |
329 | 329 | ||
330 | /* set large page size in fb | 330 | g->ops.fb.set_mmu_page_size(g); |
331 | * note this is very early on, can we defer it ? */ | ||
332 | { | ||
333 | u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r()); | ||
334 | |||
335 | fb_mmu_ctrl = (fb_mmu_ctrl & | ||
336 | ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) | | ||
337 | fb_mmu_ctrl_vm_pg_size_128kb_f(); | ||
338 | |||
339 | gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl); | ||
340 | } | ||
341 | 331 | ||
342 | inst_pa = (u32)(inst_pa >> bar1_instance_block_shift_gk20a()); | 332 | inst_pa = (u32)(inst_pa >> bar1_instance_block_shift_gk20a()); |
343 | gk20a_dbg_info("bar1 inst block ptr: 0x%08x", (u32)inst_pa); | 333 | gk20a_dbg_info("bar1 inst block ptr: 0x%08x", (u32)inst_pa); |
@@ -2173,6 +2163,7 @@ void gk20a_vm_put(struct vm_gk20a *vm) | |||
2173 | 2163 | ||
2174 | static int gk20a_init_vm(struct mm_gk20a *mm, | 2164 | static int gk20a_init_vm(struct mm_gk20a *mm, |
2175 | struct vm_gk20a *vm, | 2165 | struct vm_gk20a *vm, |
2166 | u32 big_page_size, | ||
2176 | u64 low_hole, | 2167 | u64 low_hole, |
2177 | u64 aperture_size, | 2168 | u64 aperture_size, |
2178 | bool big_pages, | 2169 | bool big_pages, |
@@ -2184,7 +2175,7 @@ static int gk20a_init_vm(struct mm_gk20a *mm, | |||
2184 | size_t vma_size; | 2175 | size_t vma_size; |
2185 | 2176 | ||
2186 | /* note: keep the page sizes sorted lowest to highest here */ | 2177 | /* note: keep the page sizes sorted lowest to highest here */ |
2187 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K }; | 2178 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size }; |
2188 | 2179 | ||
2189 | vm->mm = mm; | 2180 | vm->mm = mm; |
2190 | 2181 | ||
@@ -2331,7 +2322,7 @@ clean_up_pdes: | |||
2331 | } | 2322 | } |
2332 | 2323 | ||
2333 | /* address space interfaces for the gk20a module */ | 2324 | /* address space interfaces for the gk20a module */ |
2334 | int gk20a_vm_alloc_share(struct gk20a_as_share *as_share) | 2325 | int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size) |
2335 | { | 2326 | { |
2336 | struct gk20a_as *as = as_share->as; | 2327 | struct gk20a_as *as = as_share->as; |
2337 | struct gk20a *g = gk20a_from_as(as); | 2328 | struct gk20a *g = gk20a_from_as(as); |
@@ -2351,8 +2342,15 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share) | |||
2351 | vm->enable_ctag = true; | 2342 | vm->enable_ctag = true; |
2352 | 2343 | ||
2353 | snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id); | 2344 | snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id); |
2354 | err = gk20a_init_vm(mm, vm, | 2345 | |
2355 | SZ_128K << 10, mm->channel.size, true, name); | 2346 | if (big_page_size && !g->ops.mm.set_big_page_size) |
2347 | return -EINVAL; | ||
2348 | if (big_page_size == 0) | ||
2349 | big_page_size = | ||
2350 | gk20a_get_platform(g->dev)->default_big_page_size; | ||
2351 | |||
2352 | err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10, | ||
2353 | mm->channel.size, true, name); | ||
2356 | 2354 | ||
2357 | return 0; | 2355 | return 0; |
2358 | } | 2356 | } |
@@ -2709,10 +2707,12 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm) | |||
2709 | struct device *d = dev_from_gk20a(g); | 2707 | struct device *d = dev_from_gk20a(g); |
2710 | struct inst_desc *inst_block = &mm->bar1.inst_block; | 2708 | struct inst_desc *inst_block = &mm->bar1.inst_block; |
2711 | dma_addr_t iova; | 2709 | dma_addr_t iova; |
2710 | u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; | ||
2712 | 2711 | ||
2713 | mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; | 2712 | mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; |
2714 | gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); | 2713 | gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); |
2715 | gk20a_init_vm(mm, vm, SZ_4K, mm->bar1.aperture_size, false, "bar1"); | 2714 | gk20a_init_vm(mm, vm, big_page_size, SZ_4K, |
2715 | mm->bar1.aperture_size, false, "bar1"); | ||
2716 | 2716 | ||
2717 | gk20a_dbg_info("pde pa=0x%llx", | 2717 | gk20a_dbg_info("pde pa=0x%llx", |
2718 | (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); | 2718 | (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); |
@@ -2761,6 +2761,9 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm) | |||
2761 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), | 2761 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), |
2762 | ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); | 2762 | ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); |
2763 | 2763 | ||
2764 | if (g->ops.mm.set_big_page_size) | ||
2765 | g->ops.mm.set_big_page_size(g, inst_ptr, big_page_size); | ||
2766 | |||
2764 | gk20a_dbg_info("bar1 inst block ptr: %08llx", (u64)inst_pa); | 2767 | gk20a_dbg_info("bar1 inst block ptr: %08llx", (u64)inst_pa); |
2765 | return 0; | 2768 | return 0; |
2766 | 2769 | ||
@@ -2789,11 +2792,12 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm) | |||
2789 | struct device *d = dev_from_gk20a(g); | 2792 | struct device *d = dev_from_gk20a(g); |
2790 | struct inst_desc *inst_block = &mm->pmu.inst_block; | 2793 | struct inst_desc *inst_block = &mm->pmu.inst_block; |
2791 | dma_addr_t iova; | 2794 | dma_addr_t iova; |
2795 | u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; | ||
2792 | 2796 | ||
2793 | mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; | 2797 | mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; |
2794 | gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); | 2798 | gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); |
2795 | 2799 | ||
2796 | gk20a_init_vm(mm, vm, | 2800 | gk20a_init_vm(mm, vm, big_page_size, |
2797 | SZ_128K << 10, GK20A_PMU_VA_SIZE, false, "system"); | 2801 | SZ_128K << 10, GK20A_PMU_VA_SIZE, false, "system"); |
2798 | 2802 | ||
2799 | gk20a_dbg_info("pde pa=0x%llx", | 2803 | gk20a_dbg_info("pde pa=0x%llx", |
@@ -2842,6 +2846,9 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm) | |||
2842 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), | 2846 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), |
2843 | ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); | 2847 | ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); |
2844 | 2848 | ||
2849 | if (g->ops.mm.set_big_page_size) | ||
2850 | g->ops.mm.set_big_page_size(g, inst_ptr, big_page_size); | ||
2851 | |||
2845 | return 0; | 2852 | return 0; |
2846 | 2853 | ||
2847 | clean_up_inst_block: | 2854 | clean_up_inst_block: |