diff options
author | Alex Waterman <alexw@nvidia.com> | 2016-03-11 18:57:42 -0500 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-03-25 16:19:17 -0400 |
commit | fbc21ed2ee1859dfe1814914a7b9367eee6b9f3d (patch) | |
tree | 4792971895aca65f74a5480e82d67e7f312151a8 /drivers | |
parent | 907a43831513c2863e2f7fefc2d051f396d02354 (diff) |
gpu: nvgpu: split address space for fixed allocs
Allow a special address space node to be split out from the
user adress space or fixed allocations. A debugfs node,
/d/<gpu>/separate_fixed_allocs
Controls this feature. To enable it:
# echo <SPLIT_ADDR> > /d/<gpu>/separate_fixed_allocs
Where <SPLIT_ADDR> is the address to do the split on in the
GVA address range. This will cause the split to be made in
all subsequent address space ranges that get created until it
is turned off. To turn this off just echo 0x0 into the same
debugfs node.
Change-Id: I21a3f051c635a90a6bfa8deae53a54db400876f9
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1030303
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/as_gk20a.c | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 53 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 5 |
5 files changed, 66 insertions, 7 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c index b6b38541..0571ca1f 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c | |||
@@ -279,13 +279,15 @@ static int gk20a_as_ioctl_get_va_regions( | |||
279 | 279 | ||
280 | for (i = 0; i < write_entries; ++i) { | 280 | for (i = 0; i < write_entries; ++i) { |
281 | struct nvgpu_as_va_region region; | 281 | struct nvgpu_as_va_region region; |
282 | struct gk20a_allocator *vma = vm->fixed.init ? | ||
283 | &vm->fixed : &vm->vma[i]; | ||
282 | 284 | ||
283 | memset(®ion, 0, sizeof(struct nvgpu_as_va_region)); | 285 | memset(®ion, 0, sizeof(struct nvgpu_as_va_region)); |
284 | 286 | ||
285 | region.page_size = vm->gmmu_page_sizes[i]; | 287 | region.page_size = vm->gmmu_page_sizes[i]; |
286 | region.offset = vm->vma[i].base; | 288 | region.offset = vma->base; |
287 | /* No __aeabi_uldivmod() on some platforms... */ | 289 | /* No __aeabi_uldivmod() on some platforms... */ |
288 | region.pages = (vm->vma[i].end - vm->vma[i].start) >> | 290 | region.pages = (vma->end - vma->start) >> |
289 | ilog2(region.page_size); | 291 | ilog2(region.page_size); |
290 | 292 | ||
291 | if (copy_to_user(user_region_ptr + i, ®ion, sizeof(region))) | 293 | if (copy_to_user(user_region_ptr + i, ®ion, sizeof(region))) |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 735bf90b..a382ccd1 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -1565,6 +1565,7 @@ static int gk20a_probe(struct platform_device *dev) | |||
1565 | gk20a_pmu_debugfs_init(dev); | 1565 | gk20a_pmu_debugfs_init(dev); |
1566 | gk20a_cde_debugfs_init(dev); | 1566 | gk20a_cde_debugfs_init(dev); |
1567 | gk20a_alloc_debugfs_init(dev); | 1567 | gk20a_alloc_debugfs_init(dev); |
1568 | gk20a_mm_debugfs_init(dev); | ||
1568 | #endif | 1569 | #endif |
1569 | 1570 | ||
1570 | gk20a_init_gr(gk20a); | 1571 | gk20a_init_gr(gk20a); |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 541e7b50..8a1f82bc 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -591,6 +591,14 @@ struct gk20a { | |||
591 | int dbg_powergating_disabled_refcount; /*refcount for pg disable */ | 591 | int dbg_powergating_disabled_refcount; /*refcount for pg disable */ |
592 | int dbg_timeout_disabled_refcount; /*refcount for timeout disable */ | 592 | int dbg_timeout_disabled_refcount; /*refcount for timeout disable */ |
593 | 593 | ||
594 | /* | ||
595 | * When set subsequent VMAs will separate fixed and non-fixed | ||
596 | * allocations. This avoids conflicts with fixed and non-fixed allocs | ||
597 | * for some tests. The value in separate_fixed_allocs is used to | ||
598 | * determine the split boundary. | ||
599 | */ | ||
600 | u64 separate_fixed_allocs; | ||
601 | |||
594 | void (*remove_support)(struct platform_device *); | 602 | void (*remove_support)(struct platform_device *); |
595 | 603 | ||
596 | u64 pg_ingating_time_us; | 604 | u64 pg_ingating_time_us; |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 660fbb6f..7a02d68e 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -323,7 +323,7 @@ static int gk20a_alloc_comptags(struct gk20a *g, | |||
323 | if (err) | 323 | if (err) |
324 | return err; | 324 | return err; |
325 | 325 | ||
326 | /* | 326 | /* |
327 | * offset needs to be at the start of a page/cacheline boundary; | 327 | * offset needs to be at the start of a page/cacheline boundary; |
328 | * prune the preceding ctaglines that were allocated for alignment. | 328 | * prune the preceding ctaglines that were allocated for alignment. |
329 | */ | 329 | */ |
@@ -2806,6 +2806,7 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2806 | u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit, | 2806 | u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit, |
2807 | kernel_vma_start, kernel_vma_limit; | 2807 | kernel_vma_start, kernel_vma_limit; |
2808 | u32 pde_lo, pde_hi; | 2808 | u32 pde_lo, pde_hi; |
2809 | struct gk20a *g = mm->g; | ||
2809 | 2810 | ||
2810 | /* note: this must match gmmu_pgsz_gk20a enum */ | 2811 | /* note: this must match gmmu_pgsz_gk20a enum */ |
2811 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K }; | 2812 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K }; |
@@ -2895,6 +2896,31 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2895 | goto clean_up_pdes; | 2896 | goto clean_up_pdes; |
2896 | } | 2897 | } |
2897 | 2898 | ||
2899 | /* | ||
2900 | * Attempt to make a separate VM for fixed allocations. | ||
2901 | */ | ||
2902 | if (g->separate_fixed_allocs && | ||
2903 | small_vma_start < small_vma_limit) { | ||
2904 | if (g->separate_fixed_allocs >= small_vma_limit) | ||
2905 | goto clean_up_pdes; | ||
2906 | |||
2907 | snprintf(alloc_name, sizeof(alloc_name), | ||
2908 | "gk20a_%s-fixed", name); | ||
2909 | |||
2910 | err = __gk20a_allocator_init(&vm->fixed, | ||
2911 | vm, alloc_name, | ||
2912 | small_vma_start, | ||
2913 | g->separate_fixed_allocs, | ||
2914 | SZ_4K, | ||
2915 | GPU_BALLOC_MAX_ORDER, | ||
2916 | GPU_BALLOC_GVA_SPACE); | ||
2917 | if (err) | ||
2918 | goto clean_up_ptes; | ||
2919 | |||
2920 | /* Make sure to update the user vma size. */ | ||
2921 | small_vma_start = g->separate_fixed_allocs; | ||
2922 | } | ||
2923 | |||
2898 | if (small_vma_start < small_vma_limit) { | 2924 | if (small_vma_start < small_vma_limit) { |
2899 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, | 2925 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, |
2900 | vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); | 2926 | vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); |
@@ -3057,14 +3083,17 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
3057 | } | 3083 | } |
3058 | 3084 | ||
3059 | vma = &vm->vma[pgsz_idx]; | 3085 | vma = &vm->vma[pgsz_idx]; |
3060 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) | 3086 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) { |
3087 | if (vm->fixed.init) | ||
3088 | vma = &vm->fixed; | ||
3061 | vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset, | 3089 | vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset, |
3062 | (u64)args->pages * | 3090 | (u64)args->pages * |
3063 | (u64)args->page_size); | 3091 | (u64)args->page_size); |
3064 | else | 3092 | } else { |
3065 | vaddr_start = gk20a_balloc(vma, | 3093 | vaddr_start = gk20a_balloc(vma, |
3066 | (u64)args->pages * | 3094 | (u64)args->pages * |
3067 | (u64)args->page_size); | 3095 | (u64)args->page_size); |
3096 | } | ||
3068 | 3097 | ||
3069 | if (!vaddr_start) { | 3098 | if (!vaddr_start) { |
3070 | kfree(va_node); | 3099 | kfree(va_node); |
@@ -3131,7 +3160,10 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
3131 | pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ? | 3160 | pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ? |
3132 | gmmu_page_size_big : gmmu_page_size_small; | 3161 | gmmu_page_size_big : gmmu_page_size_small; |
3133 | 3162 | ||
3134 | vma = &vm->vma[pgsz_idx]; | 3163 | if (vm->fixed.init) |
3164 | vma = &vm->fixed; | ||
3165 | else | ||
3166 | vma = &vm->vma[pgsz_idx]; | ||
3135 | gk20a_bfree(vma, args->offset); | 3167 | gk20a_bfree(vma, args->offset); |
3136 | 3168 | ||
3137 | mutex_lock(&vm->update_gmmu_lock); | 3169 | mutex_lock(&vm->update_gmmu_lock); |
@@ -3321,6 +3353,8 @@ void gk20a_deinit_vm(struct vm_gk20a *vm) | |||
3321 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); | 3353 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); |
3322 | if (vm->vma[gmmu_page_size_small].init) | 3354 | if (vm->vma[gmmu_page_size_small].init) |
3323 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); | 3355 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); |
3356 | if (vm->fixed.init) | ||
3357 | gk20a_allocator_destroy(&vm->fixed); | ||
3324 | 3358 | ||
3325 | gk20a_vm_free_entries(vm, &vm->pdb, 0); | 3359 | gk20a_vm_free_entries(vm, &vm->pdb, 0); |
3326 | } | 3360 | } |
@@ -3834,6 +3868,16 @@ clean_up: | |||
3834 | return err; | 3868 | return err; |
3835 | } | 3869 | } |
3836 | 3870 | ||
3871 | void gk20a_mm_debugfs_init(struct platform_device *pdev) | ||
3872 | { | ||
3873 | struct gk20a_platform *platform = platform_get_drvdata(pdev); | ||
3874 | struct dentry *gpu_root = platform->debugfs; | ||
3875 | struct gk20a *g = gk20a_get_platform(pdev)->g; | ||
3876 | |||
3877 | debugfs_create_x64("separate_fixed_allocs", 0664, gpu_root, | ||
3878 | &g->separate_fixed_allocs); | ||
3879 | } | ||
3880 | |||
3837 | void gk20a_init_mm(struct gpu_ops *gops) | 3881 | void gk20a_init_mm(struct gpu_ops *gops) |
3838 | { | 3882 | { |
3839 | gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled; | 3883 | gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled; |
@@ -3854,4 +3898,3 @@ void gk20a_init_mm(struct gpu_ops *gops) | |||
3854 | gops->mm.init_pdb = gk20a_mm_init_pdb; | 3898 | gops->mm.init_pdb = gk20a_mm_init_pdb; |
3855 | gops->mm.init_mm_setup_hw = gk20a_init_mm_setup_hw; | 3899 | gops->mm.init_mm_setup_hw = gk20a_init_mm_setup_hw; |
3856 | } | 3900 | } |
3857 | |||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index b8b0ca49..b151c180 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -259,6 +259,10 @@ struct vm_gk20a { | |||
259 | struct gk20a_mm_entry pdb; | 259 | struct gk20a_mm_entry pdb; |
260 | 260 | ||
261 | struct gk20a_allocator vma[gmmu_nr_page_sizes]; | 261 | struct gk20a_allocator vma[gmmu_nr_page_sizes]; |
262 | |||
263 | /* If necessary, split fixed from non-fixed. */ | ||
264 | struct gk20a_allocator fixed; | ||
265 | |||
262 | struct rb_root mapped_buffers; | 266 | struct rb_root mapped_buffers; |
263 | 267 | ||
264 | struct list_head reserved_va_list; | 268 | struct list_head reserved_va_list; |
@@ -279,6 +283,7 @@ struct channel_gk20a; | |||
279 | int gk20a_init_mm_support(struct gk20a *g); | 283 | int gk20a_init_mm_support(struct gk20a *g); |
280 | int gk20a_init_mm_setup_sw(struct gk20a *g); | 284 | int gk20a_init_mm_setup_sw(struct gk20a *g); |
281 | int gk20a_init_mm_setup_hw(struct gk20a *g); | 285 | int gk20a_init_mm_setup_hw(struct gk20a *g); |
286 | void gk20a_mm_debugfs_init(struct platform_device *pdev); | ||
282 | 287 | ||
283 | int gk20a_mm_fb_flush(struct gk20a *g); | 288 | int gk20a_mm_fb_flush(struct gk20a *g); |
284 | void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate); | 289 | void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate); |