diff options
author | Alex Waterman <alexw@nvidia.com> | 2017-04-25 18:56:12 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-05-19 18:34:12 -0400 |
commit | 29cc82844e03b6f9f0e6801169b6fa0e72d56628 (patch) | |
tree | f616b6c651ce80765ee344aa33ca204c555e67f2 /drivers/gpu/nvgpu/gk20a | |
parent | 014ace5a85f274de7debb4c6168d69c803445e19 (diff) |
gpu: nvgpu: Split vm_area management into vm code
The vm_reserve_va_node struct is essentially a special VM area that
can be used for sparse mappings and fixed mappings. The name of this
struct is somewhat confusing (as node is typically used for list
items). Though this struct is a part of a list it doesn't really
make sense to call this a list item since it's much more. Based on
that the struct has been renamed to nvgpu_vm_area to capture the
actual use of the struct more accurately.
This also moves all of the management code of vm areas to a new file
devoted solely to vm_area management.
Also add a brief overview of the VM architecture. This should help
other people follow along the hierachy of ownership and lifetimes in
the rather complex MM code.
JIRA NVGPU-12
JIRA NVGPU-30
Change-Id: If85e1cf868031d0dc265e7bed50b58a2aed2602e
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1477744
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 241 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 21 |
2 files changed, 14 insertions, 248 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 84919d50..5051f028 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <trace/events/gk20a.h> | 26 | #include <trace/events/gk20a.h> |
27 | 27 | ||
28 | #include <nvgpu/vm.h> | 28 | #include <nvgpu/vm.h> |
29 | #include <nvgpu/vm_area.h> | ||
29 | #include <nvgpu/dma.h> | 30 | #include <nvgpu/dma.h> |
30 | #include <nvgpu/kmem.h> | 31 | #include <nvgpu/kmem.h> |
31 | #include <nvgpu/timers.h> | 32 | #include <nvgpu/timers.h> |
@@ -1065,19 +1066,6 @@ u32 pte_index_from_vaddr(struct vm_gk20a *vm, | |||
1065 | return ret; | 1066 | return ret; |
1066 | } | 1067 | } |
1067 | 1068 | ||
1068 | static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm, | ||
1069 | u64 addr) | ||
1070 | { | ||
1071 | struct vm_reserved_va_node *va_node; | ||
1072 | nvgpu_list_for_each_entry(va_node, &vm->reserved_va_list, | ||
1073 | vm_reserved_va_node, reserved_va_list) | ||
1074 | if (addr >= va_node->vaddr_start && | ||
1075 | addr < (u64)va_node->vaddr_start + (u64)va_node->size) | ||
1076 | return va_node; | ||
1077 | |||
1078 | return NULL; | ||
1079 | } | ||
1080 | |||
1081 | int nvgpu_vm_get_buffers(struct vm_gk20a *vm, | 1069 | int nvgpu_vm_get_buffers(struct vm_gk20a *vm, |
1082 | struct nvgpu_mapped_buf ***mapped_buffers, | 1070 | struct nvgpu_mapped_buf ***mapped_buffers, |
1083 | int *num_buffers) | 1071 | int *num_buffers) |
@@ -1301,57 +1289,6 @@ int setup_buffer_kind_and_compression(struct vm_gk20a *vm, | |||
1301 | return 0; | 1289 | return 0; |
1302 | } | 1290 | } |
1303 | 1291 | ||
1304 | int validate_fixed_buffer(struct vm_gk20a *vm, | ||
1305 | struct buffer_attrs *bfr, | ||
1306 | u64 map_offset, u64 map_size, | ||
1307 | struct vm_reserved_va_node **pva_node) | ||
1308 | { | ||
1309 | struct gk20a *g = vm->mm->g; | ||
1310 | struct vm_reserved_va_node *va_node; | ||
1311 | struct nvgpu_mapped_buf *buffer; | ||
1312 | u64 map_end = map_offset + map_size; | ||
1313 | |||
1314 | /* can wrap around with insane map_size; zero is disallowed too */ | ||
1315 | if (map_end <= map_offset) { | ||
1316 | nvgpu_warn(g, "fixed offset mapping with invalid map_size"); | ||
1317 | return -EINVAL; | ||
1318 | } | ||
1319 | |||
1320 | if (map_offset & (vm->gmmu_page_sizes[bfr->pgsz_idx] - 1)) { | ||
1321 | nvgpu_err(g, "map offset must be buffer page size aligned 0x%llx", | ||
1322 | map_offset); | ||
1323 | return -EINVAL; | ||
1324 | } | ||
1325 | |||
1326 | /* Find the space reservation, but it's ok to have none for | ||
1327 | * userspace-managed address spaces */ | ||
1328 | va_node = addr_to_reservation(vm, map_offset); | ||
1329 | if (!va_node && !vm->userspace_managed) { | ||
1330 | nvgpu_warn(g, "fixed offset mapping without space allocation"); | ||
1331 | return -EINVAL; | ||
1332 | } | ||
1333 | |||
1334 | /* Mapped area should fit inside va, if there's one */ | ||
1335 | if (va_node && map_end > va_node->vaddr_start + va_node->size) { | ||
1336 | nvgpu_warn(g, "fixed offset mapping size overflows va node"); | ||
1337 | return -EINVAL; | ||
1338 | } | ||
1339 | |||
1340 | /* check that this mapping does not collide with existing | ||
1341 | * mappings by checking the buffer with the highest GPU VA | ||
1342 | * that is less than our buffer end */ | ||
1343 | buffer = __nvgpu_vm_find_mapped_buf_less_than( | ||
1344 | vm, map_offset + map_size); | ||
1345 | if (buffer && buffer->addr + buffer->size > map_offset) { | ||
1346 | nvgpu_warn(g, "overlapping buffer map requested"); | ||
1347 | return -EINVAL; | ||
1348 | } | ||
1349 | |||
1350 | *pva_node = va_node; | ||
1351 | |||
1352 | return 0; | ||
1353 | } | ||
1354 | |||
1355 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | 1292 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, |
1356 | u64 map_offset, | 1293 | u64 map_offset, |
1357 | struct sg_table *sgt, | 1294 | struct sg_table *sgt, |
@@ -1850,22 +1787,22 @@ int nvgpu_vm_map_compbits(struct vm_gk20a *vm, | |||
1850 | if (fixed_mapping) { | 1787 | if (fixed_mapping) { |
1851 | struct buffer_attrs bfr; | 1788 | struct buffer_attrs bfr; |
1852 | int err; | 1789 | int err; |
1853 | struct vm_reserved_va_node *va_node = NULL; | 1790 | struct nvgpu_vm_area *vm_area = NULL; |
1854 | 1791 | ||
1855 | memset(&bfr, 0, sizeof(bfr)); | 1792 | memset(&bfr, 0, sizeof(bfr)); |
1856 | 1793 | ||
1857 | bfr.pgsz_idx = small_pgsz_index; | 1794 | bfr.pgsz_idx = small_pgsz_index; |
1858 | 1795 | ||
1859 | err = validate_fixed_buffer( | 1796 | err = nvgpu_vm_area_validate_buffer( |
1860 | vm, &bfr, *compbits_win_gva, | 1797 | vm, *compbits_win_gva, mapped_buffer->ctag_map_win_size, |
1861 | mapped_buffer->ctag_map_win_size, &va_node); | 1798 | bfr.pgsz_idx, &vm_area); |
1862 | 1799 | ||
1863 | if (err) { | 1800 | if (err) { |
1864 | nvgpu_mutex_release(&vm->update_gmmu_lock); | 1801 | nvgpu_mutex_release(&vm->update_gmmu_lock); |
1865 | return err; | 1802 | return err; |
1866 | } | 1803 | } |
1867 | 1804 | ||
1868 | if (va_node) { | 1805 | if (vm_area) { |
1869 | /* this would create a dangling GPU VA | 1806 | /* this would create a dangling GPU VA |
1870 | * pointer if the space is freed | 1807 | * pointer if the space is freed |
1871 | * before before the buffer is | 1808 | * before before the buffer is |
@@ -2564,8 +2501,8 @@ void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer, | |||
2564 | mapped_buffer->pgsz_idx, | 2501 | mapped_buffer->pgsz_idx, |
2565 | mapped_buffer->va_allocated, | 2502 | mapped_buffer->va_allocated, |
2566 | gk20a_mem_flag_none, | 2503 | gk20a_mem_flag_none, |
2567 | mapped_buffer->va_node ? | 2504 | mapped_buffer->vm_area ? |
2568 | mapped_buffer->va_node->sparse : false, | 2505 | mapped_buffer->vm_area->sparse : false, |
2569 | batch); | 2506 | batch); |
2570 | 2507 | ||
2571 | gk20a_dbg(gpu_dbg_map, | 2508 | gk20a_dbg(gpu_dbg_map, |
@@ -2712,13 +2649,13 @@ int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size) | |||
2712 | enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm, | 2649 | enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm, |
2713 | u64 base, u64 size) | 2650 | u64 base, u64 size) |
2714 | { | 2651 | { |
2715 | struct vm_reserved_va_node *node; | 2652 | struct nvgpu_vm_area *vm_area; |
2716 | 2653 | ||
2717 | node = addr_to_reservation(vm, base); | 2654 | vm_area = nvgpu_vm_area_find(vm, base); |
2718 | if (!node) | 2655 | if (!vm_area) |
2719 | return gmmu_page_size_small; | 2656 | return gmmu_page_size_small; |
2720 | 2657 | ||
2721 | return node->pgsz_idx; | 2658 | return vm_area->pgsz_idx; |
2722 | } | 2659 | } |
2723 | 2660 | ||
2724 | /* | 2661 | /* |
@@ -3012,7 +2949,7 @@ int nvgpu_init_vm(struct mm_gk20a *mm, | |||
3012 | 2949 | ||
3013 | nvgpu_mutex_init(&vm->update_gmmu_lock); | 2950 | nvgpu_mutex_init(&vm->update_gmmu_lock); |
3014 | kref_init(&vm->ref); | 2951 | kref_init(&vm->ref); |
3015 | nvgpu_init_list_node(&vm->reserved_va_list); | 2952 | nvgpu_init_list_node(&vm->vm_area_list); |
3016 | 2953 | ||
3017 | /* | 2954 | /* |
3018 | * This is only necessary for channel address spaces. The best way to | 2955 | * This is only necessary for channel address spaces. The best way to |
@@ -3100,158 +3037,6 @@ int gk20a_vm_release_share(struct gk20a_as_share *as_share) | |||
3100 | return 0; | 3037 | return 0; |
3101 | } | 3038 | } |
3102 | 3039 | ||
3103 | |||
3104 | int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | ||
3105 | struct nvgpu_as_alloc_space_args *args) | ||
3106 | |||
3107 | { | ||
3108 | int err = -ENOMEM; | ||
3109 | int pgsz_idx = gmmu_page_size_small; | ||
3110 | struct nvgpu_allocator *vma; | ||
3111 | struct vm_gk20a *vm = as_share->vm; | ||
3112 | struct gk20a *g = vm->mm->g; | ||
3113 | struct vm_reserved_va_node *va_node; | ||
3114 | u64 vaddr_start = 0; | ||
3115 | int page_sizes = gmmu_nr_page_sizes; | ||
3116 | |||
3117 | gk20a_dbg_fn("flags=0x%x pgsz=0x%x nr_pages=0x%x o/a=0x%llx", | ||
3118 | args->flags, args->page_size, args->pages, | ||
3119 | args->o_a.offset); | ||
3120 | |||
3121 | if (!vm->big_pages) | ||
3122 | page_sizes--; | ||
3123 | |||
3124 | for (; pgsz_idx < page_sizes; pgsz_idx++) { | ||
3125 | if (vm->gmmu_page_sizes[pgsz_idx] == args->page_size) | ||
3126 | break; | ||
3127 | } | ||
3128 | |||
3129 | if (pgsz_idx >= page_sizes) { | ||
3130 | err = -EINVAL; | ||
3131 | goto clean_up; | ||
3132 | } | ||
3133 | |||
3134 | va_node = nvgpu_kzalloc(g, sizeof(*va_node)); | ||
3135 | if (!va_node) { | ||
3136 | err = -ENOMEM; | ||
3137 | goto clean_up; | ||
3138 | } | ||
3139 | |||
3140 | vma = vm->vma[pgsz_idx]; | ||
3141 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) | ||
3142 | vaddr_start = nvgpu_alloc_fixed(vma, args->o_a.offset, | ||
3143 | (u64)args->pages * | ||
3144 | (u64)args->page_size, | ||
3145 | args->page_size); | ||
3146 | else | ||
3147 | vaddr_start = nvgpu_alloc(vma, | ||
3148 | (u64)args->pages * | ||
3149 | (u64)args->page_size); | ||
3150 | |||
3151 | if (!vaddr_start) { | ||
3152 | nvgpu_kfree(g, va_node); | ||
3153 | goto clean_up; | ||
3154 | } | ||
3155 | |||
3156 | va_node->vaddr_start = vaddr_start; | ||
3157 | va_node->size = (u64)args->page_size * (u64)args->pages; | ||
3158 | va_node->pgsz_idx = pgsz_idx; | ||
3159 | nvgpu_init_list_node(&va_node->buffer_list_head); | ||
3160 | nvgpu_init_list_node(&va_node->reserved_va_list); | ||
3161 | |||
3162 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
3163 | |||
3164 | /* mark that we need to use sparse mappings here */ | ||
3165 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE) { | ||
3166 | u64 map_offset = g->ops.mm.gmmu_map(vm, vaddr_start, | ||
3167 | NULL, | ||
3168 | 0, | ||
3169 | va_node->size, | ||
3170 | pgsz_idx, | ||
3171 | 0, | ||
3172 | 0, | ||
3173 | args->flags, | ||
3174 | gk20a_mem_flag_none, | ||
3175 | false, | ||
3176 | true, | ||
3177 | false, | ||
3178 | NULL, | ||
3179 | APERTURE_INVALID); | ||
3180 | if (!map_offset) { | ||
3181 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
3182 | nvgpu_free(vma, vaddr_start); | ||
3183 | nvgpu_kfree(g, va_node); | ||
3184 | goto clean_up; | ||
3185 | } | ||
3186 | |||
3187 | va_node->sparse = true; | ||
3188 | } | ||
3189 | nvgpu_list_add_tail(&va_node->reserved_va_list, &vm->reserved_va_list); | ||
3190 | |||
3191 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
3192 | |||
3193 | args->o_a.offset = vaddr_start; | ||
3194 | err = 0; | ||
3195 | |||
3196 | clean_up: | ||
3197 | return err; | ||
3198 | } | ||
3199 | |||
3200 | int gk20a_vm_free_space(struct gk20a_as_share *as_share, | ||
3201 | struct nvgpu_as_free_space_args *args) | ||
3202 | { | ||
3203 | int err = -ENOMEM; | ||
3204 | int pgsz_idx; | ||
3205 | struct nvgpu_allocator *vma; | ||
3206 | struct vm_gk20a *vm = as_share->vm; | ||
3207 | struct vm_reserved_va_node *va_node; | ||
3208 | struct gk20a *g = gk20a_from_vm(vm); | ||
3209 | |||
3210 | gk20a_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size, | ||
3211 | args->pages, args->offset); | ||
3212 | |||
3213 | /* determine pagesz idx */ | ||
3214 | pgsz_idx = __get_pte_size(vm, args->offset, | ||
3215 | args->page_size * args->pages); | ||
3216 | |||
3217 | vma = vm->vma[pgsz_idx]; | ||
3218 | nvgpu_free(vma, args->offset); | ||
3219 | |||
3220 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | ||
3221 | va_node = addr_to_reservation(vm, args->offset); | ||
3222 | if (va_node) { | ||
3223 | struct nvgpu_mapped_buf *buffer, *n; | ||
3224 | |||
3225 | /* Decrement the ref count on all buffers in this va_node. This | ||
3226 | * allows userspace to let the kernel free mappings that are | ||
3227 | * only used by this va_node. */ | ||
3228 | nvgpu_list_for_each_entry_safe(buffer, n, | ||
3229 | &va_node->buffer_list_head, | ||
3230 | nvgpu_mapped_buf, buffer_list) { | ||
3231 | nvgpu_list_del(&buffer->buffer_list); | ||
3232 | kref_put(&buffer->ref, gk20a_vm_unmap_locked_kref); | ||
3233 | } | ||
3234 | |||
3235 | nvgpu_list_del(&va_node->reserved_va_list); | ||
3236 | |||
3237 | /* if this was a sparse mapping, free the va */ | ||
3238 | if (va_node->sparse) | ||
3239 | g->ops.mm.gmmu_unmap(vm, | ||
3240 | va_node->vaddr_start, | ||
3241 | va_node->size, | ||
3242 | va_node->pgsz_idx, | ||
3243 | true, | ||
3244 | gk20a_mem_flag_none, | ||
3245 | true, | ||
3246 | NULL); | ||
3247 | nvgpu_kfree(g, va_node); | ||
3248 | } | ||
3249 | nvgpu_mutex_release(&vm->update_gmmu_lock); | ||
3250 | err = 0; | ||
3251 | |||
3252 | return err; | ||
3253 | } | ||
3254 | |||
3255 | int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch) | 3040 | int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch) |
3256 | { | 3041 | { |
3257 | int err = 0; | 3042 | int err = 0; |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 357962c7..6ddf842a 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -140,22 +140,6 @@ struct priv_cmd_entry { | |||
140 | u32 size; /* in words */ | 140 | u32 size; /* in words */ |
141 | }; | 141 | }; |
142 | 142 | ||
143 | struct vm_reserved_va_node { | ||
144 | struct nvgpu_list_node reserved_va_list; | ||
145 | struct nvgpu_list_node buffer_list_head; | ||
146 | u32 pgsz_idx; | ||
147 | u64 vaddr_start; | ||
148 | u64 size; | ||
149 | bool sparse; | ||
150 | }; | ||
151 | |||
152 | static inline struct vm_reserved_va_node * | ||
153 | vm_reserved_va_node_from_reserved_va_list(struct nvgpu_list_node *node) | ||
154 | { | ||
155 | return (struct vm_reserved_va_node *) | ||
156 | ((uintptr_t)node - offsetof(struct vm_reserved_va_node, reserved_va_list)); | ||
157 | }; | ||
158 | |||
159 | struct gk20a; | 143 | struct gk20a; |
160 | struct channel_gk20a; | 144 | struct channel_gk20a; |
161 | 145 | ||
@@ -442,10 +426,6 @@ struct nvgpu_as_free_space_args; | |||
442 | int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size, | 426 | int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size, |
443 | u32 flags); | 427 | u32 flags); |
444 | int gk20a_vm_release_share(struct gk20a_as_share *as_share); | 428 | int gk20a_vm_release_share(struct gk20a_as_share *as_share); |
445 | int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | ||
446 | struct nvgpu_as_alloc_space_args *args); | ||
447 | int gk20a_vm_free_space(struct gk20a_as_share *as_share, | ||
448 | struct nvgpu_as_free_space_args *args); | ||
449 | int gk20a_vm_bind_channel(struct gk20a_as_share *as_share, | 429 | int gk20a_vm_bind_channel(struct gk20a_as_share *as_share, |
450 | struct channel_gk20a *ch); | 430 | struct channel_gk20a *ch); |
451 | int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch); | 431 | int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch); |
@@ -491,5 +471,6 @@ extern const struct gk20a_mmu_level gk20a_mm_levels_128k[]; | |||
491 | 471 | ||
492 | int gk20a_mm_get_buffer_info(struct device *dev, int dmabuf_fd, | 472 | int gk20a_mm_get_buffer_info(struct device *dev, int dmabuf_fd, |
493 | u64 *buffer_id, u64 *buffer_len); | 473 | u64 *buffer_id, u64 *buffer_len); |
474 | void gk20a_vm_unmap_locked_kref(struct kref *ref); | ||
494 | 475 | ||
495 | #endif /* MM_GK20A_H */ | 476 | #endif /* MM_GK20A_H */ |