summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c')
-rw-r--r--drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c343
1 files changed, 165 insertions, 178 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c
index e8790587..8f1c5d78 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c
@@ -20,14 +20,18 @@
20 20
21#include <nvgpu/kmem.h> 21#include <nvgpu/kmem.h>
22#include <nvgpu/bug.h> 22#include <nvgpu/bug.h>
23#include <nvgpu/dma.h>
23#include <nvgpu/error_notifier.h> 24#include <nvgpu/error_notifier.h>
24#include <nvgpu/dma.h> 25#include <nvgpu/dma.h>
25 26
26#include "vgpu.h" 27#include "vgpu.h"
27#include "gr_vgpu.h" 28#include "gr_vgpu.h"
28#include "gk20a/dbg_gpu_gk20a.h" 29#include "gk20a/dbg_gpu_gk20a.h"
30#include "gk20a/channel_gk20a.h"
31#include "gk20a/tsg_gk20a.h"
29 32
30#include <nvgpu/hw/gk20a/hw_gr_gk20a.h> 33#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
34#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
31 35
32void vgpu_gr_detect_sm_arch(struct gk20a *g) 36void vgpu_gr_detect_sm_arch(struct gk20a *g)
33{ 37{
@@ -152,8 +156,9 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
152 struct tegra_vgpu_cmd_msg msg; 156 struct tegra_vgpu_cmd_msg msg;
153 struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; 157 struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx;
154 struct vm_gk20a *ch_vm = c->vm; 158 struct vm_gk20a *ch_vm = c->vm;
155 u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; 159 struct tsg_gk20a *tsg;
156 u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; 160 u64 *g_bfr_va;
161 u64 *g_bfr_size;
157 struct gr_gk20a *gr = &g->gr; 162 struct gr_gk20a *gr = &g->gr;
158 u64 gpu_va; 163 u64 gpu_va;
159 u32 i; 164 u32 i;
@@ -161,7 +166,12 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
161 166
162 gk20a_dbg_fn(""); 167 gk20a_dbg_fn("");
163 168
164 /* FIXME: add VPR support */ 169 tsg = tsg_gk20a_from_ch(c);
170 if (!tsg)
171 return -EINVAL;
172
173 g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va;
174 g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size;
165 175
166 /* Circular Buffer */ 176 /* Circular Buffer */
167 gpu_va = __nvgpu_vm_alloc_va(ch_vm, 177 gpu_va = __nvgpu_vm_alloc_va(ch_vm,
@@ -213,7 +223,7 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
213 if (err || msg.ret) 223 if (err || msg.ret)
214 goto clean_up; 224 goto clean_up;
215 225
216 c->ch_ctx.global_ctx_buffer_mapped = true; 226 tsg->gr_ctx.global_ctx_buffer_mapped = true;
217 return 0; 227 return 0;
218 228
219 clean_up: 229 clean_up:
@@ -227,40 +237,33 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
227 return -ENOMEM; 237 return -ENOMEM;
228} 238}
229 239
230static void vgpu_gr_unmap_global_ctx_buffers(struct channel_gk20a *c) 240static void vgpu_gr_unmap_global_ctx_buffers(struct tsg_gk20a *tsg)
231{ 241{
232 struct vm_gk20a *ch_vm = c->vm; 242 struct vm_gk20a *ch_vm = tsg->vm;
233 u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; 243 u64 *g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va;
234 u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; 244 u64 *g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size;
235 u32 i; 245 u32 i;
236 246
237 gk20a_dbg_fn(""); 247 gk20a_dbg_fn("");
238 248
239 if (c->ch_ctx.global_ctx_buffer_mapped) { 249 if (tsg->gr_ctx.global_ctx_buffer_mapped) {
240 struct tegra_vgpu_cmd_msg msg; 250 /* server will unmap on channel close */
241 struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx;
242 int err;
243 251
244 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNMAP_GR_GLOBAL_CTX; 252 for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
245 msg.handle = vgpu_get_handle(c->g); 253 if (g_bfr_va[i]) {
246 p->handle = c->virt_ctx; 254 __nvgpu_vm_free_va(ch_vm, g_bfr_va[i],
247 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); 255 gmmu_page_size_kernel);
248 WARN_ON(err || msg.ret); 256 g_bfr_va[i] = 0;
249 } 257 g_bfr_size[i] = 0;
250 258 }
251 for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
252 if (g_bfr_va[i]) {
253 __nvgpu_vm_free_va(ch_vm, g_bfr_va[i],
254 gmmu_page_size_kernel);
255 g_bfr_va[i] = 0;
256 g_bfr_size[i] = 0;
257 } 259 }
260
261 tsg->gr_ctx.global_ctx_buffer_mapped = false;
258 } 262 }
259 c->ch_ctx.global_ctx_buffer_mapped = false;
260} 263}
261 264
262int vgpu_gr_alloc_gr_ctx(struct gk20a *g, 265int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
263 struct gr_ctx_desc **__gr_ctx, 266 struct nvgpu_gr_ctx *gr_ctx,
264 struct vm_gk20a *vm, 267 struct vm_gk20a *vm,
265 u32 class, 268 u32 class,
266 u32 flags) 269 u32 flags)
@@ -268,7 +271,6 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
268 struct tegra_vgpu_cmd_msg msg = {0}; 271 struct tegra_vgpu_cmd_msg msg = {0};
269 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; 272 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
270 struct gr_gk20a *gr = &g->gr; 273 struct gr_gk20a *gr = &g->gr;
271 struct gr_ctx_desc *gr_ctx;
272 int err; 274 int err;
273 275
274 gk20a_dbg_fn(""); 276 gk20a_dbg_fn("");
@@ -280,19 +282,14 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
280 gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; 282 gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
281 gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; 283 gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
282 284
283 gr_ctx = nvgpu_kzalloc(g, sizeof(*gr_ctx));
284 if (!gr_ctx)
285 return -ENOMEM;
286
287 gr_ctx->mem.size = gr->ctx_vars.buffer_total_size;
288 gr_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(vm, 285 gr_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(vm,
289 gr_ctx->mem.size, 286 gr->ctx_vars.buffer_total_size,
290 gmmu_page_size_kernel); 287 gmmu_page_size_kernel);
291 288
292 if (!gr_ctx->mem.gpu_va) { 289 if (!gr_ctx->mem.gpu_va)
293 nvgpu_kfree(g, gr_ctx);
294 return -ENOMEM; 290 return -ENOMEM;
295 } 291 gr_ctx->mem.size = gr->ctx_vars.buffer_total_size;
292 gr_ctx->mem.aperture = APERTURE_SYSMEM;
296 293
297 msg.cmd = TEGRA_VGPU_CMD_GR_CTX_ALLOC; 294 msg.cmd = TEGRA_VGPU_CMD_GR_CTX_ALLOC;
298 msg.handle = vgpu_get_handle(g); 295 msg.handle = vgpu_get_handle(g);
@@ -306,57 +303,19 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
306 nvgpu_err(g, "fail to alloc gr_ctx"); 303 nvgpu_err(g, "fail to alloc gr_ctx");
307 __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, 304 __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va,
308 gmmu_page_size_kernel); 305 gmmu_page_size_kernel);
309 nvgpu_kfree(g, gr_ctx); 306 gr_ctx->mem.aperture = APERTURE_INVALID;
310 } else { 307 } else {
311 gr_ctx->virt_ctx = p->gr_ctx_handle; 308 gr_ctx->virt_ctx = p->gr_ctx_handle;
312 *__gr_ctx = gr_ctx;
313 } 309 }
314 310
315 return err; 311 return err;
316} 312}
317 313
318void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
319 struct gr_ctx_desc *gr_ctx)
320{
321 struct tegra_vgpu_cmd_msg msg;
322 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
323 int err;
324
325 gk20a_dbg_fn("");
326
327 if (!gr_ctx || !gr_ctx->mem.gpu_va)
328 return;
329
330
331 msg.cmd = TEGRA_VGPU_CMD_GR_CTX_FREE;
332 msg.handle = vgpu_get_handle(g);
333 p->gr_ctx_handle = gr_ctx->virt_ctx;
334 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
335 WARN_ON(err || msg.ret);
336
337 __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va,
338 gmmu_page_size_kernel);
339
340 nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer);
341 nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer);
342 nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer);
343 nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer);
344
345 nvgpu_kfree(g, gr_ctx);
346}
347
348static void vgpu_gr_free_channel_gr_ctx(struct channel_gk20a *c)
349{
350 gk20a_dbg_fn("");
351
352 c->g->ops.gr.free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx);
353 c->ch_ctx.gr_ctx = NULL;
354}
355
356static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g, 314static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g,
357 struct channel_gk20a *c) 315 struct channel_gk20a *c)
358{ 316{
359 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; 317 struct tsg_gk20a *tsg;
318 struct patch_desc *patch_ctx;
360 struct vm_gk20a *ch_vm = c->vm; 319 struct vm_gk20a *ch_vm = c->vm;
361 struct tegra_vgpu_cmd_msg msg; 320 struct tegra_vgpu_cmd_msg msg;
362 struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; 321 struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx;
@@ -364,6 +323,11 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g,
364 323
365 gk20a_dbg_fn(""); 324 gk20a_dbg_fn("");
366 325
326 tsg = tsg_gk20a_from_ch(c);
327 if (!tsg)
328 return -EINVAL;
329
330 patch_ctx = &tsg->gr_ctx.patch_ctx;
367 patch_ctx->mem.size = 128 * sizeof(u32); 331 patch_ctx->mem.size = 128 * sizeof(u32);
368 patch_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch_vm, 332 patch_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch_vm,
369 patch_ctx->mem.size, 333 patch_ctx->mem.size,
@@ -385,37 +349,25 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g,
385 return err; 349 return err;
386} 350}
387 351
388static void vgpu_gr_free_channel_patch_ctx(struct channel_gk20a *c) 352static void vgpu_gr_free_channel_patch_ctx(struct tsg_gk20a *tsg)
389{ 353{
390 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; 354 struct patch_desc *patch_ctx = &tsg->gr_ctx.patch_ctx;
391 struct vm_gk20a *ch_vm = c->vm;
392 355
393 gk20a_dbg_fn(""); 356 gk20a_dbg_fn("");
394 357
395 if (patch_ctx->mem.gpu_va) { 358 if (patch_ctx->mem.gpu_va) {
396 struct tegra_vgpu_cmd_msg msg; 359 /* server will free on channel close */
397 struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx;
398 int err;
399 360
400 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_GR_PATCH_CTX; 361 __nvgpu_vm_free_va(tsg->vm, patch_ctx->mem.gpu_va,
401 msg.handle = vgpu_get_handle(c->g);
402 p->handle = c->virt_ctx;
403 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
404 WARN_ON(err || msg.ret);
405
406 __nvgpu_vm_free_va(ch_vm, patch_ctx->mem.gpu_va,
407 gmmu_page_size_kernel); 362 gmmu_page_size_kernel);
408 patch_ctx->mem.gpu_va = 0; 363 patch_ctx->mem.gpu_va = 0;
409 } 364 }
410} 365}
411 366
412static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c) 367static void vgpu_gr_free_channel_pm_ctx(struct tsg_gk20a *tsg)
413{ 368{
414 struct tegra_vgpu_cmd_msg msg; 369 struct nvgpu_gr_ctx *ch_ctx = &tsg->gr_ctx;
415 struct tegra_vgpu_channel_free_hwpm_ctx *p = &msg.params.free_hwpm_ctx;
416 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
417 struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; 370 struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx;
418 int err;
419 371
420 gk20a_dbg_fn(""); 372 gk20a_dbg_fn("");
421 373
@@ -423,44 +375,63 @@ static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c)
423 if (pm_ctx->mem.gpu_va == 0) 375 if (pm_ctx->mem.gpu_va == 0)
424 return; 376 return;
425 377
426 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWPM_CTX; 378 /* server will free on channel close */
427 msg.handle = vgpu_get_handle(c->g);
428 p->handle = c->virt_ctx;
429 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
430 WARN_ON(err || msg.ret);
431 379
432 __nvgpu_vm_free_va(c->vm, pm_ctx->mem.gpu_va, 380 __nvgpu_vm_free_va(tsg->vm, pm_ctx->mem.gpu_va,
433 gmmu_page_size_kernel); 381 gmmu_page_size_kernel);
434 pm_ctx->mem.gpu_va = 0; 382 pm_ctx->mem.gpu_va = 0;
435} 383}
436 384
437void vgpu_gr_free_channel_ctx(struct channel_gk20a *c, bool is_tsg) 385void vgpu_gr_free_gr_ctx(struct gk20a *g,
386 struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx)
438{ 387{
388 struct tsg_gk20a *tsg;
389
439 gk20a_dbg_fn(""); 390 gk20a_dbg_fn("");
440 391
441 if (c->g->ops.fifo.free_channel_ctx_header) 392 if (gr_ctx->mem.gpu_va) {
442 c->g->ops.fifo.free_channel_ctx_header(c); 393 struct tegra_vgpu_cmd_msg msg;
443 vgpu_gr_unmap_global_ctx_buffers(c); 394 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
444 vgpu_gr_free_channel_patch_ctx(c); 395 int err;
445 vgpu_gr_free_channel_pm_ctx(c);
446 if (!is_tsg)
447 vgpu_gr_free_channel_gr_ctx(c);
448 396
449 /* zcull_ctx, pm_ctx */ 397 msg.cmd = TEGRA_VGPU_CMD_GR_CTX_FREE;
398 msg.handle = vgpu_get_handle(g);
399 p->gr_ctx_handle = gr_ctx->virt_ctx;
400 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
401 WARN_ON(err || msg.ret);
450 402
451 memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a)); 403 __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va,
404 gmmu_page_size_kernel);
405
406 tsg = &g->fifo.tsg[gr_ctx->tsgid];
407 vgpu_gr_unmap_global_ctx_buffers(tsg);
408 vgpu_gr_free_channel_patch_ctx(tsg);
409 vgpu_gr_free_channel_pm_ctx(tsg);
410
411 nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer);
412 nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer);
413 nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer);
414 nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer);
452 415
453 c->first_init = false; 416 memset(gr_ctx, 0, sizeof(*gr_ctx));
417 }
454} 418}
455 419
456static int vgpu_gr_ch_bind_gr_ctx(struct channel_gk20a *c) 420static int vgpu_gr_ch_bind_gr_ctx(struct channel_gk20a *c)
457{ 421{
458 struct gr_ctx_desc *gr_ctx = c->ch_ctx.gr_ctx; 422 struct tsg_gk20a *tsg;
423 struct nvgpu_gr_ctx *gr_ctx;
459 struct tegra_vgpu_cmd_msg msg = {0}; 424 struct tegra_vgpu_cmd_msg msg = {0};
460 struct tegra_vgpu_channel_bind_gr_ctx_params *p = 425 struct tegra_vgpu_channel_bind_gr_ctx_params *p =
461 &msg.params.ch_bind_gr_ctx; 426 &msg.params.ch_bind_gr_ctx;
462 int err; 427 int err;
463 428
429 tsg = tsg_gk20a_from_ch(c);
430 if (!tsg)
431 return -EINVAL;
432
433 gr_ctx = &tsg->gr_ctx;
434
464 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_GR_CTX; 435 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_GR_CTX;
465 msg.handle = vgpu_get_handle(c->g); 436 msg.handle = vgpu_get_handle(c->g);
466 p->ch_handle = c->virt_ctx; 437 p->ch_handle = c->virt_ctx;
@@ -474,7 +445,7 @@ static int vgpu_gr_ch_bind_gr_ctx(struct channel_gk20a *c)
474 445
475static int vgpu_gr_tsg_bind_gr_ctx(struct tsg_gk20a *tsg) 446static int vgpu_gr_tsg_bind_gr_ctx(struct tsg_gk20a *tsg)
476{ 447{
477 struct gr_ctx_desc *gr_ctx = tsg->tsg_gr_ctx; 448 struct nvgpu_gr_ctx *gr_ctx = &tsg->gr_ctx;
478 struct tegra_vgpu_cmd_msg msg = {0}; 449 struct tegra_vgpu_cmd_msg msg = {0};
479 struct tegra_vgpu_tsg_bind_gr_ctx_params *p = 450 struct tegra_vgpu_tsg_bind_gr_ctx_params *p =
480 &msg.params.tsg_bind_gr_ctx; 451 &msg.params.tsg_bind_gr_ctx;
@@ -495,7 +466,7 @@ int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
495{ 466{
496 struct gk20a *g = c->g; 467 struct gk20a *g = c->g;
497 struct fifo_gk20a *f = &g->fifo; 468 struct fifo_gk20a *f = &g->fifo;
498 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 469 struct nvgpu_gr_ctx *gr_ctx = NULL;
499 struct tsg_gk20a *tsg = NULL; 470 struct tsg_gk20a *tsg = NULL;
500 int err = 0; 471 int err = 0;
501 472
@@ -515,95 +486,87 @@ int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
515 } 486 }
516 c->obj_class = class_num; 487 c->obj_class = class_num;
517 488
518 if (gk20a_is_channel_marked_as_tsg(c)) 489 if (!gk20a_is_channel_marked_as_tsg(c))
519 tsg = &f->tsg[c->tsgid]; 490 return -EINVAL;
520 491
521 if (!tsg) { 492 tsg = &f->tsg[c->tsgid];
522 /* allocate gr ctx buffer */ 493 gr_ctx = &tsg->gr_ctx;
523 if (!ch_ctx->gr_ctx) { 494
524 err = g->ops.gr.alloc_gr_ctx(g, &c->ch_ctx.gr_ctx, 495 if (!nvgpu_mem_is_valid(&gr_ctx->mem)) {
525 c->vm, 496 tsg->vm = c->vm;
526 class_num, 497 nvgpu_vm_get(tsg->vm);
527 flags); 498 err = g->ops.gr.alloc_gr_ctx(g, gr_ctx,
528 if (!err) 499 c->vm,
529 err = vgpu_gr_ch_bind_gr_ctx(c); 500 class_num,
530 if (err) { 501 flags);
531 nvgpu_err(g, "fail to allocate gr ctx buffer"); 502 if (!err)
532 goto out; 503 err = vgpu_gr_tsg_bind_gr_ctx(tsg);
533 } 504 if (err) {
534 } else {
535 /*TBD: needs to be more subtle about which is
536 * being allocated as some are allowed to be
537 * allocated along same channel */
538 nvgpu_err(g, 505 nvgpu_err(g,
539 "too many classes alloc'd on same channel"); 506 "fail to allocate TSG gr ctx buffer, err=%d", err);
540 err = -EINVAL; 507 nvgpu_vm_put(tsg->vm);
508 tsg->vm = NULL;
541 goto out; 509 goto out;
542 } 510 }
543 } else {
544 if (!tsg->tsg_gr_ctx) {
545 tsg->vm = c->vm;
546 nvgpu_vm_get(tsg->vm);
547 err = g->ops.gr.alloc_gr_ctx(g, &tsg->tsg_gr_ctx,
548 c->vm,
549 class_num,
550 flags);
551 if (!err)
552 err = vgpu_gr_tsg_bind_gr_ctx(tsg);
553 if (err) {
554 nvgpu_err(g,
555 "fail to allocate TSG gr ctx buffer, err=%d", err);
556 nvgpu_vm_put(tsg->vm);
557 tsg->vm = NULL;
558 goto out;
559 }
560 }
561 511
562 ch_ctx->gr_ctx = tsg->tsg_gr_ctx;
563 err = vgpu_gr_ch_bind_gr_ctx(c); 512 err = vgpu_gr_ch_bind_gr_ctx(c);
564 if (err) { 513 if (err) {
565 nvgpu_err(g, "fail to bind gr ctx buffer"); 514 nvgpu_err(g, "fail to bind gr ctx buffer");
566 goto out; 515 goto out;
567 } 516 }
568 }
569 517
570 /* commit gr ctx buffer */ 518 /* commit gr ctx buffer */
571 err = g->ops.gr.commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); 519 err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va);
572 if (err) { 520 if (err) {
573 nvgpu_err(g, "fail to commit gr ctx buffer"); 521 nvgpu_err(g, "fail to commit gr ctx buffer");
574 goto out; 522 goto out;
575 } 523 }
576 524
577 /* allocate patch buffer */ 525 /* allocate patch buffer */
578 if (ch_ctx->patch_ctx.mem.priv.pages == NULL) {
579 err = vgpu_gr_alloc_channel_patch_ctx(g, c); 526 err = vgpu_gr_alloc_channel_patch_ctx(g, c);
580 if (err) { 527 if (err) {
581 nvgpu_err(g, "fail to allocate patch buffer"); 528 nvgpu_err(g, "fail to allocate patch buffer");
582 goto out; 529 goto out;
583 } 530 }
584 }
585 531
586 /* map global buffer to channel gpu_va and commit */ 532 /* map global buffer to channel gpu_va and commit */
587 if (!ch_ctx->global_ctx_buffer_mapped) {
588 err = vgpu_gr_map_global_ctx_buffers(g, c); 533 err = vgpu_gr_map_global_ctx_buffers(g, c);
589 if (err) { 534 if (err) {
590 nvgpu_err(g, "fail to map global ctx buffer"); 535 nvgpu_err(g, "fail to map global ctx buffer");
591 goto out; 536 goto out;
592 } 537 }
593 vgpu_gr_commit_global_ctx_buffers(g, c, true);
594 }
595 538
596 /* load golden image */ 539 err = vgpu_gr_commit_global_ctx_buffers(g, c, true);
597 if (!c->first_init) { 540 if (err) {
541 nvgpu_err(g, "fail to commit global ctx buffers");
542 goto out;
543 }
544
545 /* load golden image */
598 err = gr_gk20a_elpg_protected_call(g, 546 err = gr_gk20a_elpg_protected_call(g,
599 vgpu_gr_load_golden_ctx_image(g, c)); 547 vgpu_gr_load_golden_ctx_image(g, c));
600 if (err) { 548 if (err) {
601 nvgpu_err(g, "fail to load golden ctx image"); 549 nvgpu_err(g, "fail to load golden ctx image");
602 goto out; 550 goto out;
603 } 551 }
604 c->first_init = true; 552 } else {
553 err = vgpu_gr_ch_bind_gr_ctx(c);
554 if (err) {
555 nvgpu_err(g, "fail to bind gr ctx buffer");
556 goto out;
557 }
558
559 /* commit gr ctx buffer */
560 err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va);
561 if (err) {
562 nvgpu_err(g, "fail to commit gr ctx buffer");
563 goto out;
564 }
605 } 565 }
606 566
567 /* PM ctxt switch is off by default */
568 gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
569
607 gk20a_dbg_fn("done"); 570 gk20a_dbg_fn("done");
608 return 0; 571 return 0;
609out: 572out:
@@ -1055,15 +1018,30 @@ int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g,
1055int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, 1018int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
1056 struct channel_gk20a *ch, bool enable) 1019 struct channel_gk20a *ch, bool enable)
1057{ 1020{
1058 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; 1021 struct tsg_gk20a *tsg;
1059 struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; 1022 struct nvgpu_gr_ctx *ch_ctx;
1023 struct pm_ctx_desc *pm_ctx;
1060 struct tegra_vgpu_cmd_msg msg; 1024 struct tegra_vgpu_cmd_msg msg;
1061 struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode; 1025 struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode;
1062 int err; 1026 int err;
1063 1027
1064 gk20a_dbg_fn(""); 1028 gk20a_dbg_fn("");
1065 1029
1030 tsg = tsg_gk20a_from_ch(ch);
1031 if (!tsg)
1032 return -EINVAL;
1033
1034 ch_ctx = &tsg->gr_ctx;
1035 pm_ctx = &ch_ctx->pm_ctx;
1036
1066 if (enable) { 1037 if (enable) {
1038 /*
1039 * send command to enable HWPM only once - otherwise server
1040 * will return an error due to using the same GPU VA twice.
1041 */
1042 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f())
1043 return 0;
1044
1067 p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW; 1045 p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW;
1068 1046
1069 /* Allocate buffer if necessary */ 1047 /* Allocate buffer if necessary */
@@ -1076,8 +1054,12 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
1076 return -ENOMEM; 1054 return -ENOMEM;
1077 pm_ctx->mem.size = g->gr.ctx_vars.pm_ctxsw_image_size; 1055 pm_ctx->mem.size = g->gr.ctx_vars.pm_ctxsw_image_size;
1078 } 1056 }
1079 } else 1057 } else {
1058 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f())
1059 return 0;
1060
1080 p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW; 1061 p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW;
1062 }
1081 1063
1082 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE; 1064 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE;
1083 msg.handle = vgpu_get_handle(g); 1065 msg.handle = vgpu_get_handle(g);
@@ -1086,8 +1068,13 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
1086 1068
1087 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); 1069 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
1088 WARN_ON(err || msg.ret); 1070 WARN_ON(err || msg.ret);
1071 err = err ? err : msg.ret;
1072 if (!err)
1073 pm_ctx->pm_mode = enable ?
1074 ctxsw_prog_main_image_pm_mode_ctxsw_f() :
1075 ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
1089 1076
1090 return err ? err : msg.ret; 1077 return err;
1091} 1078}
1092 1079
1093int vgpu_gr_clear_sm_error_state(struct gk20a *g, 1080int vgpu_gr_clear_sm_error_state(struct gk20a *g,