summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2016-03-11 18:57:42 -0500
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-03-25 16:19:17 -0400
commitfbc21ed2ee1859dfe1814914a7b9367eee6b9f3d (patch)
tree4792971895aca65f74a5480e82d67e7f312151a8 /drivers
parent907a43831513c2863e2f7fefc2d051f396d02354 (diff)
gpu: nvgpu: split address space for fixed allocs
Allow a special address space node to be split out from the user adress space or fixed allocations. A debugfs node, /d/<gpu>/separate_fixed_allocs Controls this feature. To enable it: # echo <SPLIT_ADDR> > /d/<gpu>/separate_fixed_allocs Where <SPLIT_ADDR> is the address to do the split on in the GVA address range. This will cause the split to be made in all subsequent address space ranges that get created until it is turned off. To turn this off just echo 0x0 into the same debugfs node. Change-Id: I21a3f051c635a90a6bfa8deae53a54db400876f9 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1030303 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/as_gk20a.c6
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c1
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h8
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c53
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h5
5 files changed, 66 insertions, 7 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
index b6b38541..0571ca1f 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -279,13 +279,15 @@ static int gk20a_as_ioctl_get_va_regions(
279 279
280 for (i = 0; i < write_entries; ++i) { 280 for (i = 0; i < write_entries; ++i) {
281 struct nvgpu_as_va_region region; 281 struct nvgpu_as_va_region region;
282 struct gk20a_allocator *vma = vm->fixed.init ?
283 &vm->fixed : &vm->vma[i];
282 284
283 memset(&region, 0, sizeof(struct nvgpu_as_va_region)); 285 memset(&region, 0, sizeof(struct nvgpu_as_va_region));
284 286
285 region.page_size = vm->gmmu_page_sizes[i]; 287 region.page_size = vm->gmmu_page_sizes[i];
286 region.offset = vm->vma[i].base; 288 region.offset = vma->base;
287 /* No __aeabi_uldivmod() on some platforms... */ 289 /* No __aeabi_uldivmod() on some platforms... */
288 region.pages = (vm->vma[i].end - vm->vma[i].start) >> 290 region.pages = (vma->end - vma->start) >>
289 ilog2(region.page_size); 291 ilog2(region.page_size);
290 292
291 if (copy_to_user(user_region_ptr + i, &region, sizeof(region))) 293 if (copy_to_user(user_region_ptr + i, &region, sizeof(region)))
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 735bf90b..a382ccd1 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -1565,6 +1565,7 @@ static int gk20a_probe(struct platform_device *dev)
1565 gk20a_pmu_debugfs_init(dev); 1565 gk20a_pmu_debugfs_init(dev);
1566 gk20a_cde_debugfs_init(dev); 1566 gk20a_cde_debugfs_init(dev);
1567 gk20a_alloc_debugfs_init(dev); 1567 gk20a_alloc_debugfs_init(dev);
1568 gk20a_mm_debugfs_init(dev);
1568#endif 1569#endif
1569 1570
1570 gk20a_init_gr(gk20a); 1571 gk20a_init_gr(gk20a);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 541e7b50..8a1f82bc 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -591,6 +591,14 @@ struct gk20a {
591 int dbg_powergating_disabled_refcount; /*refcount for pg disable */ 591 int dbg_powergating_disabled_refcount; /*refcount for pg disable */
592 int dbg_timeout_disabled_refcount; /*refcount for timeout disable */ 592 int dbg_timeout_disabled_refcount; /*refcount for timeout disable */
593 593
594 /*
595 * When set subsequent VMAs will separate fixed and non-fixed
596 * allocations. This avoids conflicts with fixed and non-fixed allocs
597 * for some tests. The value in separate_fixed_allocs is used to
598 * determine the split boundary.
599 */
600 u64 separate_fixed_allocs;
601
594 void (*remove_support)(struct platform_device *); 602 void (*remove_support)(struct platform_device *);
595 603
596 u64 pg_ingating_time_us; 604 u64 pg_ingating_time_us;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 660fbb6f..7a02d68e 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -323,7 +323,7 @@ static int gk20a_alloc_comptags(struct gk20a *g,
323 if (err) 323 if (err)
324 return err; 324 return err;
325 325
326 /* 326 /*
327 * offset needs to be at the start of a page/cacheline boundary; 327 * offset needs to be at the start of a page/cacheline boundary;
328 * prune the preceding ctaglines that were allocated for alignment. 328 * prune the preceding ctaglines that were allocated for alignment.
329 */ 329 */
@@ -2806,6 +2806,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2806 u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit, 2806 u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit,
2807 kernel_vma_start, kernel_vma_limit; 2807 kernel_vma_start, kernel_vma_limit;
2808 u32 pde_lo, pde_hi; 2808 u32 pde_lo, pde_hi;
2809 struct gk20a *g = mm->g;
2809 2810
2810 /* note: this must match gmmu_pgsz_gk20a enum */ 2811 /* note: this must match gmmu_pgsz_gk20a enum */
2811 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K }; 2812 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K };
@@ -2895,6 +2896,31 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2895 goto clean_up_pdes; 2896 goto clean_up_pdes;
2896 } 2897 }
2897 2898
2899 /*
2900 * Attempt to make a separate VM for fixed allocations.
2901 */
2902 if (g->separate_fixed_allocs &&
2903 small_vma_start < small_vma_limit) {
2904 if (g->separate_fixed_allocs >= small_vma_limit)
2905 goto clean_up_pdes;
2906
2907 snprintf(alloc_name, sizeof(alloc_name),
2908 "gk20a_%s-fixed", name);
2909
2910 err = __gk20a_allocator_init(&vm->fixed,
2911 vm, alloc_name,
2912 small_vma_start,
2913 g->separate_fixed_allocs,
2914 SZ_4K,
2915 GPU_BALLOC_MAX_ORDER,
2916 GPU_BALLOC_GVA_SPACE);
2917 if (err)
2918 goto clean_up_ptes;
2919
2920 /* Make sure to update the user vma size. */
2921 small_vma_start = g->separate_fixed_allocs;
2922 }
2923
2898 if (small_vma_start < small_vma_limit) { 2924 if (small_vma_start < small_vma_limit) {
2899 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, 2925 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name,
2900 vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); 2926 vm->gmmu_page_sizes[gmmu_page_size_small] >> 10);
@@ -3057,14 +3083,17 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
3057 } 3083 }
3058 3084
3059 vma = &vm->vma[pgsz_idx]; 3085 vma = &vm->vma[pgsz_idx];
3060 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) 3086 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) {
3087 if (vm->fixed.init)
3088 vma = &vm->fixed;
3061 vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset, 3089 vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset,
3062 (u64)args->pages * 3090 (u64)args->pages *
3063 (u64)args->page_size); 3091 (u64)args->page_size);
3064 else 3092 } else {
3065 vaddr_start = gk20a_balloc(vma, 3093 vaddr_start = gk20a_balloc(vma,
3066 (u64)args->pages * 3094 (u64)args->pages *
3067 (u64)args->page_size); 3095 (u64)args->page_size);
3096 }
3068 3097
3069 if (!vaddr_start) { 3098 if (!vaddr_start) {
3070 kfree(va_node); 3099 kfree(va_node);
@@ -3131,7 +3160,10 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
3131 pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ? 3160 pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ?
3132 gmmu_page_size_big : gmmu_page_size_small; 3161 gmmu_page_size_big : gmmu_page_size_small;
3133 3162
3134 vma = &vm->vma[pgsz_idx]; 3163 if (vm->fixed.init)
3164 vma = &vm->fixed;
3165 else
3166 vma = &vm->vma[pgsz_idx];
3135 gk20a_bfree(vma, args->offset); 3167 gk20a_bfree(vma, args->offset);
3136 3168
3137 mutex_lock(&vm->update_gmmu_lock); 3169 mutex_lock(&vm->update_gmmu_lock);
@@ -3321,6 +3353,8 @@ void gk20a_deinit_vm(struct vm_gk20a *vm)
3321 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); 3353 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
3322 if (vm->vma[gmmu_page_size_small].init) 3354 if (vm->vma[gmmu_page_size_small].init)
3323 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); 3355 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
3356 if (vm->fixed.init)
3357 gk20a_allocator_destroy(&vm->fixed);
3324 3358
3325 gk20a_vm_free_entries(vm, &vm->pdb, 0); 3359 gk20a_vm_free_entries(vm, &vm->pdb, 0);
3326} 3360}
@@ -3834,6 +3868,16 @@ clean_up:
3834 return err; 3868 return err;
3835} 3869}
3836 3870
3871void gk20a_mm_debugfs_init(struct platform_device *pdev)
3872{
3873 struct gk20a_platform *platform = platform_get_drvdata(pdev);
3874 struct dentry *gpu_root = platform->debugfs;
3875 struct gk20a *g = gk20a_get_platform(pdev)->g;
3876
3877 debugfs_create_x64("separate_fixed_allocs", 0664, gpu_root,
3878 &g->separate_fixed_allocs);
3879}
3880
3837void gk20a_init_mm(struct gpu_ops *gops) 3881void gk20a_init_mm(struct gpu_ops *gops)
3838{ 3882{
3839 gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled; 3883 gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled;
@@ -3854,4 +3898,3 @@ void gk20a_init_mm(struct gpu_ops *gops)
3854 gops->mm.init_pdb = gk20a_mm_init_pdb; 3898 gops->mm.init_pdb = gk20a_mm_init_pdb;
3855 gops->mm.init_mm_setup_hw = gk20a_init_mm_setup_hw; 3899 gops->mm.init_mm_setup_hw = gk20a_init_mm_setup_hw;
3856} 3900}
3857
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index b8b0ca49..b151c180 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -259,6 +259,10 @@ struct vm_gk20a {
259 struct gk20a_mm_entry pdb; 259 struct gk20a_mm_entry pdb;
260 260
261 struct gk20a_allocator vma[gmmu_nr_page_sizes]; 261 struct gk20a_allocator vma[gmmu_nr_page_sizes];
262
263 /* If necessary, split fixed from non-fixed. */
264 struct gk20a_allocator fixed;
265
262 struct rb_root mapped_buffers; 266 struct rb_root mapped_buffers;
263 267
264 struct list_head reserved_va_list; 268 struct list_head reserved_va_list;
@@ -279,6 +283,7 @@ struct channel_gk20a;
279int gk20a_init_mm_support(struct gk20a *g); 283int gk20a_init_mm_support(struct gk20a *g);
280int gk20a_init_mm_setup_sw(struct gk20a *g); 284int gk20a_init_mm_setup_sw(struct gk20a *g);
281int gk20a_init_mm_setup_hw(struct gk20a *g); 285int gk20a_init_mm_setup_hw(struct gk20a *g);
286void gk20a_mm_debugfs_init(struct platform_device *pdev);
282 287
283int gk20a_mm_fb_flush(struct gk20a *g); 288int gk20a_mm_fb_flush(struct gk20a *g);
284void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate); 289void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate);