diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/gr_vgpu.c | 19 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 137 |
2 files changed, 120 insertions, 36 deletions
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c index b259a0c3..a4ec5254 100644 --- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c | |||
@@ -144,7 +144,8 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, | |||
144 | 144 | ||
145 | /* Circular Buffer */ | 145 | /* Circular Buffer */ |
146 | gpu_va = gk20a_vm_alloc_va(ch_vm, | 146 | gpu_va = gk20a_vm_alloc_va(ch_vm, |
147 | gr->global_ctx_buffer[CIRCULAR].mem.size, 0); | 147 | gr->global_ctx_buffer[CIRCULAR].mem.size, |
148 | gmmu_page_size_kernel); | ||
148 | 149 | ||
149 | if (!gpu_va) | 150 | if (!gpu_va) |
150 | goto clean_up; | 151 | goto clean_up; |
@@ -153,7 +154,8 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, | |||
153 | 154 | ||
154 | /* Attribute Buffer */ | 155 | /* Attribute Buffer */ |
155 | gpu_va = gk20a_vm_alloc_va(ch_vm, | 156 | gpu_va = gk20a_vm_alloc_va(ch_vm, |
156 | gr->global_ctx_buffer[ATTRIBUTE].mem.size, 0); | 157 | gr->global_ctx_buffer[ATTRIBUTE].mem.size, |
158 | gmmu_page_size_kernel); | ||
157 | 159 | ||
158 | if (!gpu_va) | 160 | if (!gpu_va) |
159 | goto clean_up; | 161 | goto clean_up; |
@@ -162,7 +164,8 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, | |||
162 | 164 | ||
163 | /* Page Pool */ | 165 | /* Page Pool */ |
164 | gpu_va = gk20a_vm_alloc_va(ch_vm, | 166 | gpu_va = gk20a_vm_alloc_va(ch_vm, |
165 | gr->global_ctx_buffer[PAGEPOOL].mem.size, 0); | 167 | gr->global_ctx_buffer[PAGEPOOL].mem.size, |
168 | gmmu_page_size_kernel); | ||
166 | if (!gpu_va) | 169 | if (!gpu_va) |
167 | goto clean_up; | 170 | goto clean_up; |
168 | g_bfr_va[PAGEPOOL_VA] = gpu_va; | 171 | g_bfr_va[PAGEPOOL_VA] = gpu_va; |
@@ -170,7 +173,8 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, | |||
170 | 173 | ||
171 | /* Priv register Access Map */ | 174 | /* Priv register Access Map */ |
172 | gpu_va = gk20a_vm_alloc_va(ch_vm, | 175 | gpu_va = gk20a_vm_alloc_va(ch_vm, |
173 | gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.size, 0); | 176 | gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.size, |
177 | gmmu_page_size_kernel); | ||
174 | if (!gpu_va) | 178 | if (!gpu_va) |
175 | goto clean_up; | 179 | goto clean_up; |
176 | g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va; | 180 | g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va; |
@@ -257,7 +261,9 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g, | |||
257 | gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; | 261 | gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; |
258 | 262 | ||
259 | gr_ctx->mem.size = gr->ctx_vars.buffer_total_size; | 263 | gr_ctx->mem.size = gr->ctx_vars.buffer_total_size; |
260 | gr_ctx->mem.gpu_va = gk20a_vm_alloc_va(vm, gr_ctx->mem.size, 0); | 264 | gr_ctx->mem.gpu_va = gk20a_vm_alloc_va(vm, |
265 | gr_ctx->mem.size, | ||
266 | gmmu_page_size_kernel); | ||
261 | 267 | ||
262 | if (!gr_ctx->mem.gpu_va) { | 268 | if (!gr_ctx->mem.gpu_va) { |
263 | kfree(gr_ctx); | 269 | kfree(gr_ctx); |
@@ -351,7 +357,8 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g, | |||
351 | 357 | ||
352 | patch_ctx->mem.size = 128 * sizeof(u32); | 358 | patch_ctx->mem.size = 128 * sizeof(u32); |
353 | patch_ctx->mem.gpu_va = gk20a_vm_alloc_va(ch_vm, | 359 | patch_ctx->mem.gpu_va = gk20a_vm_alloc_va(ch_vm, |
354 | patch_ctx->mem.size, 0); | 360 | patch_ctx->mem.size, |
361 | gmmu_page_size_kernel); | ||
355 | if (!patch_ctx->mem.gpu_va) | 362 | if (!patch_ctx->mem.gpu_va) |
356 | return -ENOMEM; | 363 | return -ENOMEM; |
357 | 364 | ||
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index b5846043..c36b135c 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c | |||
@@ -99,7 +99,7 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, | |||
99 | map_offset = gk20a_vm_alloc_va(vm, size, | 99 | map_offset = gk20a_vm_alloc_va(vm, size, |
100 | pgsz_idx); | 100 | pgsz_idx); |
101 | if (!map_offset) { | 101 | if (!map_offset) { |
102 | gk20a_err(d, "failed to allocate va space"); | 102 | gk20a_err(d, "failed to allocate va space\n"); |
103 | err = -ENOMEM; | 103 | err = -ENOMEM; |
104 | goto fail; | 104 | goto fail; |
105 | } | 105 | } |
@@ -118,6 +118,20 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, | |||
118 | p->addr = addr; | 118 | p->addr = addr; |
119 | p->gpu_va = map_offset; | 119 | p->gpu_va = map_offset; |
120 | p->size = size; | 120 | p->size = size; |
121 | if (pgsz_idx == gmmu_page_size_kernel) { | ||
122 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; | ||
123 | |||
124 | if (page_size == vm->gmmu_page_sizes[gmmu_page_size_small]) { | ||
125 | pgsz_idx = gmmu_page_size_small; | ||
126 | } else if (page_size == | ||
127 | vm->gmmu_page_sizes[gmmu_page_size_big]) { | ||
128 | pgsz_idx = gmmu_page_size_big; | ||
129 | } else { | ||
130 | gk20a_err(d, "invalid kernel page size %d\n", | ||
131 | page_size); | ||
132 | goto fail; | ||
133 | } | ||
134 | } | ||
121 | p->pgsz_idx = pgsz_idx; | 135 | p->pgsz_idx = pgsz_idx; |
122 | p->iova = mapping ? 1 : 0; | 136 | p->iova = mapping ? 1 : 0; |
123 | p->kind = kind_v; | 137 | p->kind = kind_v; |
@@ -127,7 +141,8 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, | |||
127 | p->ctag_offset = ctag_offset; | 141 | p->ctag_offset = ctag_offset; |
128 | p->clear_ctags = clear_ctags; | 142 | p->clear_ctags = clear_ctags; |
129 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | 143 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); |
130 | if (err || msg.ret) | 144 | err = err ? err : msg.ret; |
145 | if (err) | ||
131 | goto fail; | 146 | goto fail; |
132 | 147 | ||
133 | /* TLB invalidate handled on server side */ | 148 | /* TLB invalidate handled on server side */ |
@@ -214,8 +229,11 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm) | |||
214 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | 229 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); |
215 | WARN_ON(err || msg.ret); | 230 | WARN_ON(err || msg.ret); |
216 | 231 | ||
217 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); | 232 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_kernel]); |
218 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); | 233 | if (vm->vma[gmmu_page_size_small].init) |
234 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); | ||
235 | if (vm->vma[gmmu_page_size_big].init) | ||
236 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); | ||
219 | 237 | ||
220 | mutex_unlock(&vm->update_gmmu_lock); | 238 | mutex_unlock(&vm->update_gmmu_lock); |
221 | 239 | ||
@@ -258,14 +276,16 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, | |||
258 | struct tegra_vgpu_as_share_params *p = &msg.params.as_share; | 276 | struct tegra_vgpu_as_share_params *p = &msg.params.as_share; |
259 | struct mm_gk20a *mm = &g->mm; | 277 | struct mm_gk20a *mm = &g->mm; |
260 | struct vm_gk20a *vm; | 278 | struct vm_gk20a *vm; |
261 | u64 small_vma_size, large_vma_size; | 279 | u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit, |
280 | kernel_vma_start, kernel_vma_limit; | ||
262 | char name[32]; | 281 | char name[32]; |
263 | int err, i; | 282 | int err, i; |
264 | 283 | ||
265 | /* note: keep the page sizes sorted lowest to highest here */ | 284 | /* note: keep the page sizes sorted lowest to highest here */ |
266 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { | 285 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { |
267 | SZ_4K, | 286 | SZ_4K, |
268 | big_page_size ? big_page_size : platform->default_big_page_size | 287 | big_page_size ? big_page_size : platform->default_big_page_size, |
288 | SZ_4K | ||
269 | }; | 289 | }; |
270 | 290 | ||
271 | gk20a_dbg_fn(""); | 291 | gk20a_dbg_fn(""); |
@@ -288,8 +308,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, | |||
288 | vm->big_page_size = big_page_size; | 308 | vm->big_page_size = big_page_size; |
289 | 309 | ||
290 | vm->va_start = big_page_size << 10; /* create a one pde hole */ | 310 | vm->va_start = big_page_size << 10; /* create a one pde hole */ |
291 | vm->va_limit = mm->channel.user_size; /* note this means channel.size | 311 | vm->va_limit = mm->channel.user_size + mm->channel.kernel_size; |
292 | is really just the max */ | ||
293 | 312 | ||
294 | msg.cmd = TEGRA_VGPU_CMD_AS_ALLOC_SHARE; | 313 | msg.cmd = TEGRA_VGPU_CMD_AS_ALLOC_SHARE; |
295 | msg.handle = platform->virt_handle; | 314 | msg.handle = platform->virt_handle; |
@@ -303,34 +322,88 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, | |||
303 | 322 | ||
304 | vm->handle = p->handle; | 323 | vm->handle = p->handle; |
305 | 324 | ||
306 | /* First 16GB of the address space goes towards small pages. What ever | 325 | /* setup vma limits */ |
307 | * remains is allocated to large pages. */ | 326 | small_vma_start = vm->va_start; |
308 | small_vma_size = (u64)16 << 30; | 327 | |
309 | large_vma_size = vm->va_limit - small_vma_size; | 328 | if (vm->big_pages) { |
329 | /* First 16GB of the address space goes towards small | ||
330 | * pages. The kernel reserved pages are at the end. | ||
331 | * What ever remains is allocated to large pages. | ||
332 | */ | ||
333 | small_vma_limit = __nv_gmmu_va_small_page_limit(); | ||
334 | large_vma_start = small_vma_limit; | ||
335 | large_vma_limit = vm->va_limit - mm->channel.kernel_size; | ||
336 | } else { | ||
337 | small_vma_limit = vm->va_limit - mm->channel.kernel_size; | ||
338 | large_vma_start = 0; | ||
339 | large_vma_limit = 0; | ||
340 | } | ||
310 | 341 | ||
311 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, | 342 | kernel_vma_start = vm->va_limit - mm->channel.kernel_size; |
312 | gmmu_page_sizes[gmmu_page_size_small]>>10); | 343 | kernel_vma_limit = vm->va_limit; |
313 | err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small], | 344 | |
314 | vm, name, | 345 | gk20a_dbg_info( |
315 | vm->va_start, | 346 | "small_vma=[0x%llx,0x%llx) large_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n", |
316 | small_vma_size - vm->va_start, | 347 | small_vma_start, small_vma_limit, |
317 | SZ_4K, | 348 | large_vma_start, large_vma_limit, |
318 | GPU_BALLOC_MAX_ORDER, | 349 | kernel_vma_start, kernel_vma_limit); |
319 | GPU_BALLOC_GVA_SPACE); | 350 | |
320 | if (err) | 351 | /* check that starts do not exceed limits */ |
352 | WARN_ON(small_vma_start > small_vma_limit); | ||
353 | WARN_ON(large_vma_start > large_vma_limit); | ||
354 | /* kernel_vma must also be non-zero */ | ||
355 | WARN_ON(kernel_vma_start >= kernel_vma_limit); | ||
356 | |||
357 | if (small_vma_start > small_vma_limit || | ||
358 | large_vma_start > large_vma_limit || | ||
359 | kernel_vma_start >= kernel_vma_limit) { | ||
360 | err = -EINVAL; | ||
321 | goto clean_up_share; | 361 | goto clean_up_share; |
362 | } | ||
363 | |||
364 | if (small_vma_start < small_vma_limit) { | ||
365 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, | ||
366 | gmmu_page_sizes[gmmu_page_size_small] >> 10); | ||
367 | |||
368 | err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small], | ||
369 | vm, name, | ||
370 | small_vma_start, | ||
371 | small_vma_limit - small_vma_start, | ||
372 | SZ_4K, | ||
373 | GPU_BALLOC_MAX_ORDER, | ||
374 | GPU_BALLOC_GVA_SPACE); | ||
375 | if (err) | ||
376 | goto clean_up_share; | ||
377 | } | ||
378 | |||
379 | if (large_vma_start < large_vma_limit) { | ||
380 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, | ||
381 | gmmu_page_sizes[gmmu_page_size_big] >> 10); | ||
382 | err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big], | ||
383 | vm, name, | ||
384 | large_vma_start, | ||
385 | large_vma_limit - large_vma_start, | ||
386 | big_page_size, | ||
387 | GPU_BALLOC_MAX_ORDER, | ||
388 | GPU_BALLOC_GVA_SPACE); | ||
389 | if (err) | ||
390 | goto clean_up_small_allocator; | ||
391 | } | ||
322 | 392 | ||
323 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, | 393 | snprintf(name, sizeof(name), "gk20a_as_%dKB-sys", |
324 | gmmu_page_sizes[gmmu_page_size_big]>>10); | 394 | gmmu_page_sizes[gmmu_page_size_kernel] >> 10); |
325 | err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big], | 395 | /* |
396 | * kernel reserved VMA is at the end of the aperture | ||
397 | */ | ||
398 | err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_kernel], | ||
326 | vm, name, | 399 | vm, name, |
327 | small_vma_size, | 400 | kernel_vma_start, |
328 | large_vma_size, | 401 | kernel_vma_limit - kernel_vma_start, |
329 | big_page_size, | 402 | SZ_4K, |
330 | GPU_BALLOC_MAX_ORDER, | 403 | GPU_BALLOC_MAX_ORDER, |
331 | GPU_BALLOC_GVA_SPACE); | 404 | GPU_BALLOC_GVA_SPACE); |
332 | if (err) | 405 | if (err) |
333 | goto clean_up_small_allocator; | 406 | goto clean_up_big_allocator; |
334 | 407 | ||
335 | vm->mapped_buffers = RB_ROOT; | 408 | vm->mapped_buffers = RB_ROOT; |
336 | 409 | ||
@@ -342,8 +415,12 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, | |||
342 | 415 | ||
343 | return 0; | 416 | return 0; |
344 | 417 | ||
418 | clean_up_big_allocator: | ||
419 | if (large_vma_start < large_vma_limit) | ||
420 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); | ||
345 | clean_up_small_allocator: | 421 | clean_up_small_allocator: |
346 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); | 422 | if (small_vma_start < small_vma_limit) |
423 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); | ||
347 | clean_up_share: | 424 | clean_up_share: |
348 | msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE; | 425 | msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE; |
349 | msg.handle = platform->virt_handle; | 426 | msg.handle = platform->virt_handle; |