summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c76
1 files changed, 55 insertions, 21 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 738df2af..7a02d68e 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * GK20A memory management 2 * GK20A memory management
3 * 3 *
4 * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -323,7 +323,7 @@ static int gk20a_alloc_comptags(struct gk20a *g,
323 if (err) 323 if (err)
324 return err; 324 return err;
325 325
326 /* 326 /*
327 * offset needs to be at the start of a page/cacheline boundary; 327 * offset needs to be at the start of a page/cacheline boundary;
328 * prune the preceding ctaglines that were allocated for alignment. 328 * prune the preceding ctaglines that were allocated for alignment.
329 */ 329 */
@@ -1290,12 +1290,6 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
1290 int ctag_granularity = g->ops.fb.compression_page_size(g); 1290 int ctag_granularity = g->ops.fb.compression_page_size(g);
1291 u32 ctag_lines = DIV_ROUND_UP_ULL(size, ctag_granularity); 1291 u32 ctag_lines = DIV_ROUND_UP_ULL(size, ctag_granularity);
1292 1292
1293 if (clear_ctags && ctag_offset) {
1294 /* init/clear the ctag buffer */
1295 g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
1296 ctag_offset, ctag_offset + ctag_lines - 1);
1297 }
1298
1299 /* Allocate (or validate when map_offset != 0) the virtual address. */ 1293 /* Allocate (or validate when map_offset != 0) the virtual address. */
1300 if (!map_offset) { 1294 if (!map_offset) {
1301 map_offset = gk20a_vm_alloc_va(vm, size, 1295 map_offset = gk20a_vm_alloc_va(vm, size,
@@ -1651,17 +1645,14 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1651 bfr.kind_v = bfr.uc_kind_v; 1645 bfr.kind_v = bfr.uc_kind_v;
1652 } else { 1646 } else {
1653 gk20a_get_comptags(d, dmabuf, &comptags); 1647 gk20a_get_comptags(d, dmabuf, &comptags);
1654 clear_ctags = true;
1655
1656 if (comptags.lines < comptags.allocated_lines) {
1657 /* clear tail-padding comptags */
1658 u32 ctagmin = comptags.offset + comptags.lines;
1659 u32 ctagmax = comptags.offset +
1660 comptags.allocated_lines - 1;
1661 1648
1649 if (g->ops.ltc.cbc_ctrl)
1662 g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear, 1650 g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
1663 ctagmin, ctagmax); 1651 comptags.offset,
1664 } 1652 comptags.offset +
1653 comptags.allocated_lines - 1);
1654 else
1655 clear_ctags = true;
1665 } 1656 }
1666 } 1657 }
1667 1658
@@ -2815,6 +2806,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2815 u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit, 2806 u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit,
2816 kernel_vma_start, kernel_vma_limit; 2807 kernel_vma_start, kernel_vma_limit;
2817 u32 pde_lo, pde_hi; 2808 u32 pde_lo, pde_hi;
2809 struct gk20a *g = mm->g;
2818 2810
2819 /* note: this must match gmmu_pgsz_gk20a enum */ 2811 /* note: this must match gmmu_pgsz_gk20a enum */
2820 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K }; 2812 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K };
@@ -2904,6 +2896,31 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2904 goto clean_up_pdes; 2896 goto clean_up_pdes;
2905 } 2897 }
2906 2898
2899 /*
2900 * Attempt to make a separate VM for fixed allocations.
2901 */
2902 if (g->separate_fixed_allocs &&
2903 small_vma_start < small_vma_limit) {
2904 if (g->separate_fixed_allocs >= small_vma_limit)
2905 goto clean_up_pdes;
2906
2907 snprintf(alloc_name, sizeof(alloc_name),
2908 "gk20a_%s-fixed", name);
2909
2910 err = __gk20a_allocator_init(&vm->fixed,
2911 vm, alloc_name,
2912 small_vma_start,
2913 g->separate_fixed_allocs,
2914 SZ_4K,
2915 GPU_BALLOC_MAX_ORDER,
2916 GPU_BALLOC_GVA_SPACE);
2917 if (err)
2918 goto clean_up_ptes;
2919
2920 /* Make sure to update the user vma size. */
2921 small_vma_start = g->separate_fixed_allocs;
2922 }
2923
2907 if (small_vma_start < small_vma_limit) { 2924 if (small_vma_start < small_vma_limit) {
2908 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, 2925 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name,
2909 vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); 2926 vm->gmmu_page_sizes[gmmu_page_size_small] >> 10);
@@ -3066,14 +3083,17 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
3066 } 3083 }
3067 3084
3068 vma = &vm->vma[pgsz_idx]; 3085 vma = &vm->vma[pgsz_idx];
3069 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) 3086 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) {
3087 if (vm->fixed.init)
3088 vma = &vm->fixed;
3070 vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset, 3089 vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset,
3071 (u64)args->pages * 3090 (u64)args->pages *
3072 (u64)args->page_size); 3091 (u64)args->page_size);
3073 else 3092 } else {
3074 vaddr_start = gk20a_balloc(vma, 3093 vaddr_start = gk20a_balloc(vma,
3075 (u64)args->pages * 3094 (u64)args->pages *
3076 (u64)args->page_size); 3095 (u64)args->page_size);
3096 }
3077 3097
3078 if (!vaddr_start) { 3098 if (!vaddr_start) {
3079 kfree(va_node); 3099 kfree(va_node);
@@ -3140,7 +3160,10 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
3140 pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ? 3160 pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ?
3141 gmmu_page_size_big : gmmu_page_size_small; 3161 gmmu_page_size_big : gmmu_page_size_small;
3142 3162
3143 vma = &vm->vma[pgsz_idx]; 3163 if (vm->fixed.init)
3164 vma = &vm->fixed;
3165 else
3166 vma = &vm->vma[pgsz_idx];
3144 gk20a_bfree(vma, args->offset); 3167 gk20a_bfree(vma, args->offset);
3145 3168
3146 mutex_lock(&vm->update_gmmu_lock); 3169 mutex_lock(&vm->update_gmmu_lock);
@@ -3330,6 +3353,8 @@ void gk20a_deinit_vm(struct vm_gk20a *vm)
3330 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); 3353 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
3331 if (vm->vma[gmmu_page_size_small].init) 3354 if (vm->vma[gmmu_page_size_small].init)
3332 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); 3355 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
3356 if (vm->fixed.init)
3357 gk20a_allocator_destroy(&vm->fixed);
3333 3358
3334 gk20a_vm_free_entries(vm, &vm->pdb, 0); 3359 gk20a_vm_free_entries(vm, &vm->pdb, 0);
3335} 3360}
@@ -3843,6 +3868,16 @@ clean_up:
3843 return err; 3868 return err;
3844} 3869}
3845 3870
3871void gk20a_mm_debugfs_init(struct platform_device *pdev)
3872{
3873 struct gk20a_platform *platform = platform_get_drvdata(pdev);
3874 struct dentry *gpu_root = platform->debugfs;
3875 struct gk20a *g = gk20a_get_platform(pdev)->g;
3876
3877 debugfs_create_x64("separate_fixed_allocs", 0664, gpu_root,
3878 &g->separate_fixed_allocs);
3879}
3880
3846void gk20a_init_mm(struct gpu_ops *gops) 3881void gk20a_init_mm(struct gpu_ops *gops)
3847{ 3882{
3848 gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled; 3883 gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled;
@@ -3863,4 +3898,3 @@ void gk20a_init_mm(struct gpu_ops *gops)
3863 gops->mm.init_pdb = gk20a_mm_init_pdb; 3898 gops->mm.init_pdb = gk20a_mm_init_pdb;
3864 gops->mm.init_mm_setup_hw = gk20a_init_mm_setup_hw; 3899 gops->mm.init_mm_setup_hw = gk20a_init_mm_setup_hw;
3865} 3900}
3866