diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c | 343 |
1 files changed, 165 insertions, 178 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c index e8790587..8f1c5d78 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c | |||
@@ -20,14 +20,18 @@ | |||
20 | 20 | ||
21 | #include <nvgpu/kmem.h> | 21 | #include <nvgpu/kmem.h> |
22 | #include <nvgpu/bug.h> | 22 | #include <nvgpu/bug.h> |
23 | #include <nvgpu/dma.h> | ||
23 | #include <nvgpu/error_notifier.h> | 24 | #include <nvgpu/error_notifier.h> |
24 | #include <nvgpu/dma.h> | 25 | #include <nvgpu/dma.h> |
25 | 26 | ||
26 | #include "vgpu.h" | 27 | #include "vgpu.h" |
27 | #include "gr_vgpu.h" | 28 | #include "gr_vgpu.h" |
28 | #include "gk20a/dbg_gpu_gk20a.h" | 29 | #include "gk20a/dbg_gpu_gk20a.h" |
30 | #include "gk20a/channel_gk20a.h" | ||
31 | #include "gk20a/tsg_gk20a.h" | ||
29 | 32 | ||
30 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> | 33 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> |
34 | #include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h> | ||
31 | 35 | ||
32 | void vgpu_gr_detect_sm_arch(struct gk20a *g) | 36 | void vgpu_gr_detect_sm_arch(struct gk20a *g) |
33 | { | 37 | { |
@@ -152,8 +156,9 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, | |||
152 | struct tegra_vgpu_cmd_msg msg; | 156 | struct tegra_vgpu_cmd_msg msg; |
153 | struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; | 157 | struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; |
154 | struct vm_gk20a *ch_vm = c->vm; | 158 | struct vm_gk20a *ch_vm = c->vm; |
155 | u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; | 159 | struct tsg_gk20a *tsg; |
156 | u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; | 160 | u64 *g_bfr_va; |
161 | u64 *g_bfr_size; | ||
157 | struct gr_gk20a *gr = &g->gr; | 162 | struct gr_gk20a *gr = &g->gr; |
158 | u64 gpu_va; | 163 | u64 gpu_va; |
159 | u32 i; | 164 | u32 i; |
@@ -161,7 +166,12 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, | |||
161 | 166 | ||
162 | gk20a_dbg_fn(""); | 167 | gk20a_dbg_fn(""); |
163 | 168 | ||
164 | /* FIXME: add VPR support */ | 169 | tsg = tsg_gk20a_from_ch(c); |
170 | if (!tsg) | ||
171 | return -EINVAL; | ||
172 | |||
173 | g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va; | ||
174 | g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size; | ||
165 | 175 | ||
166 | /* Circular Buffer */ | 176 | /* Circular Buffer */ |
167 | gpu_va = __nvgpu_vm_alloc_va(ch_vm, | 177 | gpu_va = __nvgpu_vm_alloc_va(ch_vm, |
@@ -213,7 +223,7 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, | |||
213 | if (err || msg.ret) | 223 | if (err || msg.ret) |
214 | goto clean_up; | 224 | goto clean_up; |
215 | 225 | ||
216 | c->ch_ctx.global_ctx_buffer_mapped = true; | 226 | tsg->gr_ctx.global_ctx_buffer_mapped = true; |
217 | return 0; | 227 | return 0; |
218 | 228 | ||
219 | clean_up: | 229 | clean_up: |
@@ -227,40 +237,33 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, | |||
227 | return -ENOMEM; | 237 | return -ENOMEM; |
228 | } | 238 | } |
229 | 239 | ||
230 | static void vgpu_gr_unmap_global_ctx_buffers(struct channel_gk20a *c) | 240 | static void vgpu_gr_unmap_global_ctx_buffers(struct tsg_gk20a *tsg) |
231 | { | 241 | { |
232 | struct vm_gk20a *ch_vm = c->vm; | 242 | struct vm_gk20a *ch_vm = tsg->vm; |
233 | u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; | 243 | u64 *g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va; |
234 | u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; | 244 | u64 *g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size; |
235 | u32 i; | 245 | u32 i; |
236 | 246 | ||
237 | gk20a_dbg_fn(""); | 247 | gk20a_dbg_fn(""); |
238 | 248 | ||
239 | if (c->ch_ctx.global_ctx_buffer_mapped) { | 249 | if (tsg->gr_ctx.global_ctx_buffer_mapped) { |
240 | struct tegra_vgpu_cmd_msg msg; | 250 | /* server will unmap on channel close */ |
241 | struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; | ||
242 | int err; | ||
243 | 251 | ||
244 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNMAP_GR_GLOBAL_CTX; | 252 | for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) { |
245 | msg.handle = vgpu_get_handle(c->g); | 253 | if (g_bfr_va[i]) { |
246 | p->handle = c->virt_ctx; | 254 | __nvgpu_vm_free_va(ch_vm, g_bfr_va[i], |
247 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | 255 | gmmu_page_size_kernel); |
248 | WARN_ON(err || msg.ret); | 256 | g_bfr_va[i] = 0; |
249 | } | 257 | g_bfr_size[i] = 0; |
250 | 258 | } | |
251 | for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) { | ||
252 | if (g_bfr_va[i]) { | ||
253 | __nvgpu_vm_free_va(ch_vm, g_bfr_va[i], | ||
254 | gmmu_page_size_kernel); | ||
255 | g_bfr_va[i] = 0; | ||
256 | g_bfr_size[i] = 0; | ||
257 | } | 259 | } |
260 | |||
261 | tsg->gr_ctx.global_ctx_buffer_mapped = false; | ||
258 | } | 262 | } |
259 | c->ch_ctx.global_ctx_buffer_mapped = false; | ||
260 | } | 263 | } |
261 | 264 | ||
262 | int vgpu_gr_alloc_gr_ctx(struct gk20a *g, | 265 | int vgpu_gr_alloc_gr_ctx(struct gk20a *g, |
263 | struct gr_ctx_desc **__gr_ctx, | 266 | struct nvgpu_gr_ctx *gr_ctx, |
264 | struct vm_gk20a *vm, | 267 | struct vm_gk20a *vm, |
265 | u32 class, | 268 | u32 class, |
266 | u32 flags) | 269 | u32 flags) |
@@ -268,7 +271,6 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g, | |||
268 | struct tegra_vgpu_cmd_msg msg = {0}; | 271 | struct tegra_vgpu_cmd_msg msg = {0}; |
269 | struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; | 272 | struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; |
270 | struct gr_gk20a *gr = &g->gr; | 273 | struct gr_gk20a *gr = &g->gr; |
271 | struct gr_ctx_desc *gr_ctx; | ||
272 | int err; | 274 | int err; |
273 | 275 | ||
274 | gk20a_dbg_fn(""); | 276 | gk20a_dbg_fn(""); |
@@ -280,19 +282,14 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g, | |||
280 | gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; | 282 | gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; |
281 | gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; | 283 | gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; |
282 | 284 | ||
283 | gr_ctx = nvgpu_kzalloc(g, sizeof(*gr_ctx)); | ||
284 | if (!gr_ctx) | ||
285 | return -ENOMEM; | ||
286 | |||
287 | gr_ctx->mem.size = gr->ctx_vars.buffer_total_size; | ||
288 | gr_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(vm, | 285 | gr_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(vm, |
289 | gr_ctx->mem.size, | 286 | gr->ctx_vars.buffer_total_size, |
290 | gmmu_page_size_kernel); | 287 | gmmu_page_size_kernel); |
291 | 288 | ||
292 | if (!gr_ctx->mem.gpu_va) { | 289 | if (!gr_ctx->mem.gpu_va) |
293 | nvgpu_kfree(g, gr_ctx); | ||
294 | return -ENOMEM; | 290 | return -ENOMEM; |
295 | } | 291 | gr_ctx->mem.size = gr->ctx_vars.buffer_total_size; |
292 | gr_ctx->mem.aperture = APERTURE_SYSMEM; | ||
296 | 293 | ||
297 | msg.cmd = TEGRA_VGPU_CMD_GR_CTX_ALLOC; | 294 | msg.cmd = TEGRA_VGPU_CMD_GR_CTX_ALLOC; |
298 | msg.handle = vgpu_get_handle(g); | 295 | msg.handle = vgpu_get_handle(g); |
@@ -306,57 +303,19 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g, | |||
306 | nvgpu_err(g, "fail to alloc gr_ctx"); | 303 | nvgpu_err(g, "fail to alloc gr_ctx"); |
307 | __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, | 304 | __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, |
308 | gmmu_page_size_kernel); | 305 | gmmu_page_size_kernel); |
309 | nvgpu_kfree(g, gr_ctx); | 306 | gr_ctx->mem.aperture = APERTURE_INVALID; |
310 | } else { | 307 | } else { |
311 | gr_ctx->virt_ctx = p->gr_ctx_handle; | 308 | gr_ctx->virt_ctx = p->gr_ctx_handle; |
312 | *__gr_ctx = gr_ctx; | ||
313 | } | 309 | } |
314 | 310 | ||
315 | return err; | 311 | return err; |
316 | } | 312 | } |
317 | 313 | ||
318 | void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, | ||
319 | struct gr_ctx_desc *gr_ctx) | ||
320 | { | ||
321 | struct tegra_vgpu_cmd_msg msg; | ||
322 | struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; | ||
323 | int err; | ||
324 | |||
325 | gk20a_dbg_fn(""); | ||
326 | |||
327 | if (!gr_ctx || !gr_ctx->mem.gpu_va) | ||
328 | return; | ||
329 | |||
330 | |||
331 | msg.cmd = TEGRA_VGPU_CMD_GR_CTX_FREE; | ||
332 | msg.handle = vgpu_get_handle(g); | ||
333 | p->gr_ctx_handle = gr_ctx->virt_ctx; | ||
334 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
335 | WARN_ON(err || msg.ret); | ||
336 | |||
337 | __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, | ||
338 | gmmu_page_size_kernel); | ||
339 | |||
340 | nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer); | ||
341 | nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); | ||
342 | nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer); | ||
343 | nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer); | ||
344 | |||
345 | nvgpu_kfree(g, gr_ctx); | ||
346 | } | ||
347 | |||
348 | static void vgpu_gr_free_channel_gr_ctx(struct channel_gk20a *c) | ||
349 | { | ||
350 | gk20a_dbg_fn(""); | ||
351 | |||
352 | c->g->ops.gr.free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx); | ||
353 | c->ch_ctx.gr_ctx = NULL; | ||
354 | } | ||
355 | |||
356 | static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g, | 314 | static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g, |
357 | struct channel_gk20a *c) | 315 | struct channel_gk20a *c) |
358 | { | 316 | { |
359 | struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; | 317 | struct tsg_gk20a *tsg; |
318 | struct patch_desc *patch_ctx; | ||
360 | struct vm_gk20a *ch_vm = c->vm; | 319 | struct vm_gk20a *ch_vm = c->vm; |
361 | struct tegra_vgpu_cmd_msg msg; | 320 | struct tegra_vgpu_cmd_msg msg; |
362 | struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; | 321 | struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; |
@@ -364,6 +323,11 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g, | |||
364 | 323 | ||
365 | gk20a_dbg_fn(""); | 324 | gk20a_dbg_fn(""); |
366 | 325 | ||
326 | tsg = tsg_gk20a_from_ch(c); | ||
327 | if (!tsg) | ||
328 | return -EINVAL; | ||
329 | |||
330 | patch_ctx = &tsg->gr_ctx.patch_ctx; | ||
367 | patch_ctx->mem.size = 128 * sizeof(u32); | 331 | patch_ctx->mem.size = 128 * sizeof(u32); |
368 | patch_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch_vm, | 332 | patch_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch_vm, |
369 | patch_ctx->mem.size, | 333 | patch_ctx->mem.size, |
@@ -385,37 +349,25 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g, | |||
385 | return err; | 349 | return err; |
386 | } | 350 | } |
387 | 351 | ||
388 | static void vgpu_gr_free_channel_patch_ctx(struct channel_gk20a *c) | 352 | static void vgpu_gr_free_channel_patch_ctx(struct tsg_gk20a *tsg) |
389 | { | 353 | { |
390 | struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; | 354 | struct patch_desc *patch_ctx = &tsg->gr_ctx.patch_ctx; |
391 | struct vm_gk20a *ch_vm = c->vm; | ||
392 | 355 | ||
393 | gk20a_dbg_fn(""); | 356 | gk20a_dbg_fn(""); |
394 | 357 | ||
395 | if (patch_ctx->mem.gpu_va) { | 358 | if (patch_ctx->mem.gpu_va) { |
396 | struct tegra_vgpu_cmd_msg msg; | 359 | /* server will free on channel close */ |
397 | struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; | ||
398 | int err; | ||
399 | 360 | ||
400 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_GR_PATCH_CTX; | 361 | __nvgpu_vm_free_va(tsg->vm, patch_ctx->mem.gpu_va, |
401 | msg.handle = vgpu_get_handle(c->g); | ||
402 | p->handle = c->virt_ctx; | ||
403 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
404 | WARN_ON(err || msg.ret); | ||
405 | |||
406 | __nvgpu_vm_free_va(ch_vm, patch_ctx->mem.gpu_va, | ||
407 | gmmu_page_size_kernel); | 362 | gmmu_page_size_kernel); |
408 | patch_ctx->mem.gpu_va = 0; | 363 | patch_ctx->mem.gpu_va = 0; |
409 | } | 364 | } |
410 | } | 365 | } |
411 | 366 | ||
412 | static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c) | 367 | static void vgpu_gr_free_channel_pm_ctx(struct tsg_gk20a *tsg) |
413 | { | 368 | { |
414 | struct tegra_vgpu_cmd_msg msg; | 369 | struct nvgpu_gr_ctx *ch_ctx = &tsg->gr_ctx; |
415 | struct tegra_vgpu_channel_free_hwpm_ctx *p = &msg.params.free_hwpm_ctx; | ||
416 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | ||
417 | struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; | 370 | struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; |
418 | int err; | ||
419 | 371 | ||
420 | gk20a_dbg_fn(""); | 372 | gk20a_dbg_fn(""); |
421 | 373 | ||
@@ -423,44 +375,63 @@ static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c) | |||
423 | if (pm_ctx->mem.gpu_va == 0) | 375 | if (pm_ctx->mem.gpu_va == 0) |
424 | return; | 376 | return; |
425 | 377 | ||
426 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWPM_CTX; | 378 | /* server will free on channel close */ |
427 | msg.handle = vgpu_get_handle(c->g); | ||
428 | p->handle = c->virt_ctx; | ||
429 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
430 | WARN_ON(err || msg.ret); | ||
431 | 379 | ||
432 | __nvgpu_vm_free_va(c->vm, pm_ctx->mem.gpu_va, | 380 | __nvgpu_vm_free_va(tsg->vm, pm_ctx->mem.gpu_va, |
433 | gmmu_page_size_kernel); | 381 | gmmu_page_size_kernel); |
434 | pm_ctx->mem.gpu_va = 0; | 382 | pm_ctx->mem.gpu_va = 0; |
435 | } | 383 | } |
436 | 384 | ||
437 | void vgpu_gr_free_channel_ctx(struct channel_gk20a *c, bool is_tsg) | 385 | void vgpu_gr_free_gr_ctx(struct gk20a *g, |
386 | struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx) | ||
438 | { | 387 | { |
388 | struct tsg_gk20a *tsg; | ||
389 | |||
439 | gk20a_dbg_fn(""); | 390 | gk20a_dbg_fn(""); |
440 | 391 | ||
441 | if (c->g->ops.fifo.free_channel_ctx_header) | 392 | if (gr_ctx->mem.gpu_va) { |
442 | c->g->ops.fifo.free_channel_ctx_header(c); | 393 | struct tegra_vgpu_cmd_msg msg; |
443 | vgpu_gr_unmap_global_ctx_buffers(c); | 394 | struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; |
444 | vgpu_gr_free_channel_patch_ctx(c); | 395 | int err; |
445 | vgpu_gr_free_channel_pm_ctx(c); | ||
446 | if (!is_tsg) | ||
447 | vgpu_gr_free_channel_gr_ctx(c); | ||
448 | 396 | ||
449 | /* zcull_ctx, pm_ctx */ | 397 | msg.cmd = TEGRA_VGPU_CMD_GR_CTX_FREE; |
398 | msg.handle = vgpu_get_handle(g); | ||
399 | p->gr_ctx_handle = gr_ctx->virt_ctx; | ||
400 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
401 | WARN_ON(err || msg.ret); | ||
450 | 402 | ||
451 | memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a)); | 403 | __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, |
404 | gmmu_page_size_kernel); | ||
405 | |||
406 | tsg = &g->fifo.tsg[gr_ctx->tsgid]; | ||
407 | vgpu_gr_unmap_global_ctx_buffers(tsg); | ||
408 | vgpu_gr_free_channel_patch_ctx(tsg); | ||
409 | vgpu_gr_free_channel_pm_ctx(tsg); | ||
410 | |||
411 | nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer); | ||
412 | nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); | ||
413 | nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer); | ||
414 | nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer); | ||
452 | 415 | ||
453 | c->first_init = false; | 416 | memset(gr_ctx, 0, sizeof(*gr_ctx)); |
417 | } | ||
454 | } | 418 | } |
455 | 419 | ||
456 | static int vgpu_gr_ch_bind_gr_ctx(struct channel_gk20a *c) | 420 | static int vgpu_gr_ch_bind_gr_ctx(struct channel_gk20a *c) |
457 | { | 421 | { |
458 | struct gr_ctx_desc *gr_ctx = c->ch_ctx.gr_ctx; | 422 | struct tsg_gk20a *tsg; |
423 | struct nvgpu_gr_ctx *gr_ctx; | ||
459 | struct tegra_vgpu_cmd_msg msg = {0}; | 424 | struct tegra_vgpu_cmd_msg msg = {0}; |
460 | struct tegra_vgpu_channel_bind_gr_ctx_params *p = | 425 | struct tegra_vgpu_channel_bind_gr_ctx_params *p = |
461 | &msg.params.ch_bind_gr_ctx; | 426 | &msg.params.ch_bind_gr_ctx; |
462 | int err; | 427 | int err; |
463 | 428 | ||
429 | tsg = tsg_gk20a_from_ch(c); | ||
430 | if (!tsg) | ||
431 | return -EINVAL; | ||
432 | |||
433 | gr_ctx = &tsg->gr_ctx; | ||
434 | |||
464 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_GR_CTX; | 435 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_GR_CTX; |
465 | msg.handle = vgpu_get_handle(c->g); | 436 | msg.handle = vgpu_get_handle(c->g); |
466 | p->ch_handle = c->virt_ctx; | 437 | p->ch_handle = c->virt_ctx; |
@@ -474,7 +445,7 @@ static int vgpu_gr_ch_bind_gr_ctx(struct channel_gk20a *c) | |||
474 | 445 | ||
475 | static int vgpu_gr_tsg_bind_gr_ctx(struct tsg_gk20a *tsg) | 446 | static int vgpu_gr_tsg_bind_gr_ctx(struct tsg_gk20a *tsg) |
476 | { | 447 | { |
477 | struct gr_ctx_desc *gr_ctx = tsg->tsg_gr_ctx; | 448 | struct nvgpu_gr_ctx *gr_ctx = &tsg->gr_ctx; |
478 | struct tegra_vgpu_cmd_msg msg = {0}; | 449 | struct tegra_vgpu_cmd_msg msg = {0}; |
479 | struct tegra_vgpu_tsg_bind_gr_ctx_params *p = | 450 | struct tegra_vgpu_tsg_bind_gr_ctx_params *p = |
480 | &msg.params.tsg_bind_gr_ctx; | 451 | &msg.params.tsg_bind_gr_ctx; |
@@ -495,7 +466,7 @@ int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) | |||
495 | { | 466 | { |
496 | struct gk20a *g = c->g; | 467 | struct gk20a *g = c->g; |
497 | struct fifo_gk20a *f = &g->fifo; | 468 | struct fifo_gk20a *f = &g->fifo; |
498 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 469 | struct nvgpu_gr_ctx *gr_ctx = NULL; |
499 | struct tsg_gk20a *tsg = NULL; | 470 | struct tsg_gk20a *tsg = NULL; |
500 | int err = 0; | 471 | int err = 0; |
501 | 472 | ||
@@ -515,95 +486,87 @@ int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) | |||
515 | } | 486 | } |
516 | c->obj_class = class_num; | 487 | c->obj_class = class_num; |
517 | 488 | ||
518 | if (gk20a_is_channel_marked_as_tsg(c)) | 489 | if (!gk20a_is_channel_marked_as_tsg(c)) |
519 | tsg = &f->tsg[c->tsgid]; | 490 | return -EINVAL; |
520 | 491 | ||
521 | if (!tsg) { | 492 | tsg = &f->tsg[c->tsgid]; |
522 | /* allocate gr ctx buffer */ | 493 | gr_ctx = &tsg->gr_ctx; |
523 | if (!ch_ctx->gr_ctx) { | 494 | |
524 | err = g->ops.gr.alloc_gr_ctx(g, &c->ch_ctx.gr_ctx, | 495 | if (!nvgpu_mem_is_valid(&gr_ctx->mem)) { |
525 | c->vm, | 496 | tsg->vm = c->vm; |
526 | class_num, | 497 | nvgpu_vm_get(tsg->vm); |
527 | flags); | 498 | err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, |
528 | if (!err) | 499 | c->vm, |
529 | err = vgpu_gr_ch_bind_gr_ctx(c); | 500 | class_num, |
530 | if (err) { | 501 | flags); |
531 | nvgpu_err(g, "fail to allocate gr ctx buffer"); | 502 | if (!err) |
532 | goto out; | 503 | err = vgpu_gr_tsg_bind_gr_ctx(tsg); |
533 | } | 504 | if (err) { |
534 | } else { | ||
535 | /*TBD: needs to be more subtle about which is | ||
536 | * being allocated as some are allowed to be | ||
537 | * allocated along same channel */ | ||
538 | nvgpu_err(g, | 505 | nvgpu_err(g, |
539 | "too many classes alloc'd on same channel"); | 506 | "fail to allocate TSG gr ctx buffer, err=%d", err); |
540 | err = -EINVAL; | 507 | nvgpu_vm_put(tsg->vm); |
508 | tsg->vm = NULL; | ||
541 | goto out; | 509 | goto out; |
542 | } | 510 | } |
543 | } else { | ||
544 | if (!tsg->tsg_gr_ctx) { | ||
545 | tsg->vm = c->vm; | ||
546 | nvgpu_vm_get(tsg->vm); | ||
547 | err = g->ops.gr.alloc_gr_ctx(g, &tsg->tsg_gr_ctx, | ||
548 | c->vm, | ||
549 | class_num, | ||
550 | flags); | ||
551 | if (!err) | ||
552 | err = vgpu_gr_tsg_bind_gr_ctx(tsg); | ||
553 | if (err) { | ||
554 | nvgpu_err(g, | ||
555 | "fail to allocate TSG gr ctx buffer, err=%d", err); | ||
556 | nvgpu_vm_put(tsg->vm); | ||
557 | tsg->vm = NULL; | ||
558 | goto out; | ||
559 | } | ||
560 | } | ||
561 | 511 | ||
562 | ch_ctx->gr_ctx = tsg->tsg_gr_ctx; | ||
563 | err = vgpu_gr_ch_bind_gr_ctx(c); | 512 | err = vgpu_gr_ch_bind_gr_ctx(c); |
564 | if (err) { | 513 | if (err) { |
565 | nvgpu_err(g, "fail to bind gr ctx buffer"); | 514 | nvgpu_err(g, "fail to bind gr ctx buffer"); |
566 | goto out; | 515 | goto out; |
567 | } | 516 | } |
568 | } | ||
569 | 517 | ||
570 | /* commit gr ctx buffer */ | 518 | /* commit gr ctx buffer */ |
571 | err = g->ops.gr.commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); | 519 | err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va); |
572 | if (err) { | 520 | if (err) { |
573 | nvgpu_err(g, "fail to commit gr ctx buffer"); | 521 | nvgpu_err(g, "fail to commit gr ctx buffer"); |
574 | goto out; | 522 | goto out; |
575 | } | 523 | } |
576 | 524 | ||
577 | /* allocate patch buffer */ | 525 | /* allocate patch buffer */ |
578 | if (ch_ctx->patch_ctx.mem.priv.pages == NULL) { | ||
579 | err = vgpu_gr_alloc_channel_patch_ctx(g, c); | 526 | err = vgpu_gr_alloc_channel_patch_ctx(g, c); |
580 | if (err) { | 527 | if (err) { |
581 | nvgpu_err(g, "fail to allocate patch buffer"); | 528 | nvgpu_err(g, "fail to allocate patch buffer"); |
582 | goto out; | 529 | goto out; |
583 | } | 530 | } |
584 | } | ||
585 | 531 | ||
586 | /* map global buffer to channel gpu_va and commit */ | 532 | /* map global buffer to channel gpu_va and commit */ |
587 | if (!ch_ctx->global_ctx_buffer_mapped) { | ||
588 | err = vgpu_gr_map_global_ctx_buffers(g, c); | 533 | err = vgpu_gr_map_global_ctx_buffers(g, c); |
589 | if (err) { | 534 | if (err) { |
590 | nvgpu_err(g, "fail to map global ctx buffer"); | 535 | nvgpu_err(g, "fail to map global ctx buffer"); |
591 | goto out; | 536 | goto out; |
592 | } | 537 | } |
593 | vgpu_gr_commit_global_ctx_buffers(g, c, true); | ||
594 | } | ||
595 | 538 | ||
596 | /* load golden image */ | 539 | err = vgpu_gr_commit_global_ctx_buffers(g, c, true); |
597 | if (!c->first_init) { | 540 | if (err) { |
541 | nvgpu_err(g, "fail to commit global ctx buffers"); | ||
542 | goto out; | ||
543 | } | ||
544 | |||
545 | /* load golden image */ | ||
598 | err = gr_gk20a_elpg_protected_call(g, | 546 | err = gr_gk20a_elpg_protected_call(g, |
599 | vgpu_gr_load_golden_ctx_image(g, c)); | 547 | vgpu_gr_load_golden_ctx_image(g, c)); |
600 | if (err) { | 548 | if (err) { |
601 | nvgpu_err(g, "fail to load golden ctx image"); | 549 | nvgpu_err(g, "fail to load golden ctx image"); |
602 | goto out; | 550 | goto out; |
603 | } | 551 | } |
604 | c->first_init = true; | 552 | } else { |
553 | err = vgpu_gr_ch_bind_gr_ctx(c); | ||
554 | if (err) { | ||
555 | nvgpu_err(g, "fail to bind gr ctx buffer"); | ||
556 | goto out; | ||
557 | } | ||
558 | |||
559 | /* commit gr ctx buffer */ | ||
560 | err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va); | ||
561 | if (err) { | ||
562 | nvgpu_err(g, "fail to commit gr ctx buffer"); | ||
563 | goto out; | ||
564 | } | ||
605 | } | 565 | } |
606 | 566 | ||
567 | /* PM ctxt switch is off by default */ | ||
568 | gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); | ||
569 | |||
607 | gk20a_dbg_fn("done"); | 570 | gk20a_dbg_fn("done"); |
608 | return 0; | 571 | return 0; |
609 | out: | 572 | out: |
@@ -1055,15 +1018,30 @@ int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g, | |||
1055 | int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, | 1018 | int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, |
1056 | struct channel_gk20a *ch, bool enable) | 1019 | struct channel_gk20a *ch, bool enable) |
1057 | { | 1020 | { |
1058 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | 1021 | struct tsg_gk20a *tsg; |
1059 | struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; | 1022 | struct nvgpu_gr_ctx *ch_ctx; |
1023 | struct pm_ctx_desc *pm_ctx; | ||
1060 | struct tegra_vgpu_cmd_msg msg; | 1024 | struct tegra_vgpu_cmd_msg msg; |
1061 | struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode; | 1025 | struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode; |
1062 | int err; | 1026 | int err; |
1063 | 1027 | ||
1064 | gk20a_dbg_fn(""); | 1028 | gk20a_dbg_fn(""); |
1065 | 1029 | ||
1030 | tsg = tsg_gk20a_from_ch(ch); | ||
1031 | if (!tsg) | ||
1032 | return -EINVAL; | ||
1033 | |||
1034 | ch_ctx = &tsg->gr_ctx; | ||
1035 | pm_ctx = &ch_ctx->pm_ctx; | ||
1036 | |||
1066 | if (enable) { | 1037 | if (enable) { |
1038 | /* | ||
1039 | * send command to enable HWPM only once - otherwise server | ||
1040 | * will return an error due to using the same GPU VA twice. | ||
1041 | */ | ||
1042 | if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) | ||
1043 | return 0; | ||
1044 | |||
1067 | p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW; | 1045 | p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW; |
1068 | 1046 | ||
1069 | /* Allocate buffer if necessary */ | 1047 | /* Allocate buffer if necessary */ |
@@ -1076,8 +1054,12 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, | |||
1076 | return -ENOMEM; | 1054 | return -ENOMEM; |
1077 | pm_ctx->mem.size = g->gr.ctx_vars.pm_ctxsw_image_size; | 1055 | pm_ctx->mem.size = g->gr.ctx_vars.pm_ctxsw_image_size; |
1078 | } | 1056 | } |
1079 | } else | 1057 | } else { |
1058 | if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) | ||
1059 | return 0; | ||
1060 | |||
1080 | p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW; | 1061 | p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW; |
1062 | } | ||
1081 | 1063 | ||
1082 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE; | 1064 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE; |
1083 | msg.handle = vgpu_get_handle(g); | 1065 | msg.handle = vgpu_get_handle(g); |
@@ -1086,8 +1068,13 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, | |||
1086 | 1068 | ||
1087 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | 1069 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); |
1088 | WARN_ON(err || msg.ret); | 1070 | WARN_ON(err || msg.ret); |
1071 | err = err ? err : msg.ret; | ||
1072 | if (!err) | ||
1073 | pm_ctx->pm_mode = enable ? | ||
1074 | ctxsw_prog_main_image_pm_mode_ctxsw_f() : | ||
1075 | ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); | ||
1089 | 1076 | ||
1090 | return err ? err : msg.ret; | 1077 | return err; |
1091 | } | 1078 | } |
1092 | 1079 | ||
1093 | int vgpu_gr_clear_sm_error_state(struct gk20a *g, | 1080 | int vgpu_gr_clear_sm_error_state(struct gk20a *g, |