summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c178
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h6
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c178
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h6
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c7
5 files changed, 214 insertions, 161 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 97125a99..5661b402 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -24,14 +24,7 @@
24#include <linux/delay.h> 24#include <linux/delay.h>
25 25
26#include "gk20a.h" 26#include "gk20a.h"
27#include "gr_gk20a.h"
28#include "fence_gk20a.h" 27#include "fence_gk20a.h"
29#include "regops_gk20a.h"
30
31#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
32#include <nvgpu/hw/gk20a/hw_fb_gk20a.h>
33#include <nvgpu/hw/gk20a/hw_timer_gk20a.h>
34
35 28
36#define HZ_TO_MHZ(a) ((a > 0xF414F9CD7) ? 0xffff : (a >> 32) ? \ 29#define HZ_TO_MHZ(a) ((a > 0xF414F9CD7) ? 0xffff : (a >> 32) ? \
37 (u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ)) 30 (u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ))
@@ -342,47 +335,16 @@ static int nvgpu_gpu_ioctl_inval_icache(
342 struct gk20a *g, 335 struct gk20a *g,
343 struct nvgpu_gpu_inval_icache_args *args) 336 struct nvgpu_gpu_inval_icache_args *args)
344{ 337{
345
346 int err = 0;
347 u32 cache_ctrl, regval;
348 struct channel_gk20a *ch; 338 struct channel_gk20a *ch;
349 struct nvgpu_dbg_gpu_reg_op ops; 339 int err;
350 340
351 ch = gk20a_get_channel_from_file(args->channel_fd); 341 ch = gk20a_get_channel_from_file(args->channel_fd);
352 if (!ch) 342 if (!ch)
353 return -EINVAL; 343 return -EINVAL;
354 344
355 ops.op = REGOP(READ_32);
356 ops.type = REGOP(TYPE_GR_CTX);
357 ops.status = REGOP(STATUS_SUCCESS);
358 ops.value_hi = 0;
359 ops.and_n_mask_lo = 0;
360 ops.and_n_mask_hi = 0;
361 ops.offset = gr_pri_gpc0_gcc_dbg_r();
362
363 /* Take the global lock, since we'll be doing global regops */ 345 /* Take the global lock, since we'll be doing global regops */
364 nvgpu_mutex_acquire(&g->dbg_sessions_lock); 346 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
365 347 err = g->ops.gr.inval_icache(g, ch);
366 err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
367
368 regval = ops.value_lo;
369
370 if (!err) {
371 ops.op = REGOP(WRITE_32);
372 ops.value_lo = set_field(regval, gr_pri_gpcs_gcc_dbg_invalidate_m(), 1);
373 err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0);
374 }
375
376 if (err) {
377 gk20a_err(dev_from_gk20a(g), "Failed to access register\n");
378 goto end;
379 }
380
381 cache_ctrl = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_cache_control_r());
382 cache_ctrl = set_field(cache_ctrl, gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(), 1);
383 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cache_control_r(), cache_ctrl);
384
385end:
386 nvgpu_mutex_release(&g->dbg_sessions_lock); 348 nvgpu_mutex_release(&g->dbg_sessions_lock);
387 return err; 349 return err;
388} 350}
@@ -428,20 +390,10 @@ static int nvgpu_gpu_ioctl_set_debug_mode(
428 390
429static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g) 391static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g)
430{ 392{
431 int err = 0; 393 int err;
432 u32 dbgr_control0;
433 394
434 nvgpu_mutex_acquire(&g->dbg_sessions_lock); 395 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
435 /* assert stop trigger. uniformity assumption: all SMs will have 396 err = g->ops.gr.trigger_suspend(g);
436 * the same state in dbg_control0. */
437 dbgr_control0 =
438 gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
439 dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f();
440
441 /* broadcast write */
442 gk20a_writel(g,
443 gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
444
445 nvgpu_mutex_release(&g->dbg_sessions_lock); 397 nvgpu_mutex_release(&g->dbg_sessions_lock);
446 return err; 398 return err;
447} 399}
@@ -451,41 +403,16 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
451{ 403{
452 int err = 0; 404 int err = 0;
453 struct warpstate *w_state; 405 struct warpstate *w_state;
454 struct gr_gk20a *gr = &g->gr; 406 u32 sm_count, size;
455 u32 gpc, tpc, sm_count, sm_id, size;
456 u32 global_mask;
457 407
458 sm_count = g->gr.gpc_count * g->gr.tpc_count; 408 sm_count = g->gr.gpc_count * g->gr.tpc_count;
459 size = sm_count * sizeof(struct warpstate); 409 size = sm_count * sizeof(struct warpstate);
460 w_state = kzalloc(size, GFP_KERNEL); 410 w_state = kzalloc(size, GFP_KERNEL);
461 411 if (!w_state)
462 /* Wait for the SMs to reach full stop. This condition is: 412 return -ENOMEM;
463 * 1) All SMs with valid warps must be in the trap handler (SM_IN_TRAP_MODE)
464 * 2) All SMs in the trap handler must have equivalent VALID and PAUSED warp
465 * masks.
466 */
467 global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() |
468 gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() |
469 gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
470 413
471 nvgpu_mutex_acquire(&g->dbg_sessions_lock); 414 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
472 415 g->ops.gr.wait_for_pause(g, w_state);
473 /* Lock down all SMs */
474 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
475
476 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
477 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
478
479 err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, false);
480
481 if (err) {
482 gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n");
483 goto end;
484 }
485 }
486
487 /* Read the warp status */
488 g->ops.gr.bpt_reg_info(g, w_state);
489 416
490 /* Copy to user space - pointed by "args->pwarpstate" */ 417 /* Copy to user space - pointed by "args->pwarpstate" */
491 if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) { 418 if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) {
@@ -493,7 +420,6 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
493 err = -EFAULT; 420 err = -EFAULT;
494 } 421 }
495 422
496end:
497 nvgpu_mutex_release(&g->dbg_sessions_lock); 423 nvgpu_mutex_release(&g->dbg_sessions_lock);
498 kfree(w_state); 424 kfree(w_state);
499 return err; 425 return err;
@@ -504,82 +430,29 @@ static int nvgpu_gpu_ioctl_resume_from_pause(struct gk20a *g)
504 int err = 0; 430 int err = 0;
505 431
506 nvgpu_mutex_acquire(&g->dbg_sessions_lock); 432 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
507 433 err = g->ops.gr.resume_from_pause(g);
508 /* Clear the pause mask to tell the GPU we want to resume everyone */
509 gk20a_writel(g,
510 gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(), 0);
511
512 /* explicitly re-enable forwarding of SM interrupts upon any resume */
513 gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(),
514 gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f());
515
516 /* Now resume all sms, write a 0 to the stop trigger
517 * then a 1 to the run trigger */
518 gk20a_resume_all_sms(g);
519
520 nvgpu_mutex_release(&g->dbg_sessions_lock); 434 nvgpu_mutex_release(&g->dbg_sessions_lock);
521 return err; 435 return err;
522} 436}
523 437
524static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g) 438static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g)
525{ 439{
526 int ret = 0; 440 return g->ops.gr.clear_sm_errors(g);
527 u32 gpc_offset, tpc_offset, gpc, tpc;
528 struct gr_gk20a *gr = &g->gr;
529 u32 global_esr;
530 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
531 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
532
533 for (gpc = 0; gpc < gr->gpc_count; gpc++) {
534
535 gpc_offset = gpc_stride * gpc;
536
537 /* check if any tpc has an exception */
538 for (tpc = 0; tpc < gr->tpc_count; tpc++) {
539
540 tpc_offset = tpc_in_gpc_stride * tpc;
541
542 global_esr = gk20a_readl(g,
543 gr_gpc0_tpc0_sm_hww_global_esr_r() +
544 gpc_offset + tpc_offset);
545
546 /* clear the hwws, also causes tpc and gpc
547 * exceptions to be cleared */
548 gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr);
549 }
550 }
551
552 return ret;
553} 441}
554 442
555static int nvgpu_gpu_ioctl_has_any_exception( 443static int nvgpu_gpu_ioctl_has_any_exception(
556 struct gk20a *g, 444 struct gk20a *g,
557 struct nvgpu_gpu_tpc_exception_en_status_args *args) 445 struct nvgpu_gpu_tpc_exception_en_status_args *args)
558{ 446{
559 int err = 0; 447 u32 tpc_exception_en;
560 struct gr_gk20a *gr = &g->gr;
561 u32 sm_id, tpc_exception_en = 0;
562 u32 offset, regval, tpc_offset, gpc_offset;
563 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
564 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
565 448
566 nvgpu_mutex_acquire(&g->dbg_sessions_lock); 449 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
567 450 tpc_exception_en = g->ops.gr.tpc_enabled_exceptions(g);
568 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
569
570 tpc_offset = tpc_in_gpc_stride * g->gr.sm_to_cluster[sm_id].tpc_index;
571 gpc_offset = gpc_stride * g->gr.sm_to_cluster[sm_id].gpc_index;
572 offset = tpc_offset + gpc_offset;
573
574 regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() +
575 offset);
576 /* Each bit represents corresponding enablement state, bit 0 corrsponds to SM0 */
577 tpc_exception_en |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id;
578 }
579
580 nvgpu_mutex_release(&g->dbg_sessions_lock); 451 nvgpu_mutex_release(&g->dbg_sessions_lock);
452
581 args->tpc_exception_en_sm_mask = tpc_exception_en; 453 args->tpc_exception_en_sm_mask = tpc_exception_en;
582 return err; 454
455 return 0;
583} 456}
584 457
585static int gk20a_ctrl_get_num_vsms(struct gk20a *g, 458static int gk20a_ctrl_get_num_vsms(struct gk20a *g,
@@ -648,8 +521,6 @@ static inline int get_timestamps_zipper(struct gk20a *g,
648{ 521{
649 int err = 0; 522 int err = 0;
650 unsigned int i = 0; 523 unsigned int i = 0;
651 u32 gpu_timestamp_hi_new = 0;
652 u32 gpu_timestamp_hi_old = 0;
653 524
654 if (gk20a_busy(g)) { 525 if (gk20a_busy(g)) {
655 gk20a_err(dev_from_gk20a(g), "GPU not powered on\n"); 526 gk20a_err(dev_from_gk20a(g), "GPU not powered on\n");
@@ -657,25 +528,12 @@ static inline int get_timestamps_zipper(struct gk20a *g,
657 goto end; 528 goto end;
658 } 529 }
659 530
660 /* get zipper reads of gpu and cpu counter values */
661 gpu_timestamp_hi_old = gk20a_readl(g, timer_time_1_r());
662 for (i = 0; i < args->count; i++) { 531 for (i = 0; i < args->count; i++) {
663 u32 gpu_timestamp_lo = 0; 532 err = g->ops.bus.read_ptimer(g, &args->samples[i].gpu_timestamp);
664 u32 gpu_timestamp_hi = 0; 533 if (err)
534 return err;
665 535
666 gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r());
667 args->samples[i].cpu_timestamp = get_cpu_timestamp(); 536 args->samples[i].cpu_timestamp = get_cpu_timestamp();
668 rmb(); /* maintain zipper read order */
669 gpu_timestamp_hi_new = gk20a_readl(g, timer_time_1_r());
670
671 /* pick the appropriate gpu counter hi bits */
672 gpu_timestamp_hi = (gpu_timestamp_lo & (1L << 31)) ?
673 gpu_timestamp_hi_old : gpu_timestamp_hi_new;
674
675 args->samples[i].gpu_timestamp =
676 ((u64)gpu_timestamp_hi << 32) | (u64)gpu_timestamp_lo;
677
678 gpu_timestamp_hi_old = gpu_timestamp_hi_new;
679 } 537 }
680 538
681end: 539end:
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 4f50ae36..951c8267 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -346,6 +346,12 @@ struct gpu_ops {
346 struct mem_desc *mem, u64 gpu_va); 346 struct mem_desc *mem, u64 gpu_va);
347 void (*init_elcg_mode)(struct gk20a *g, u32 mode, u32 engine); 347 void (*init_elcg_mode)(struct gk20a *g, u32 mode, u32 engine);
348 void (*load_tpc_mask)(struct gk20a *g); 348 void (*load_tpc_mask)(struct gk20a *g);
349 int (*inval_icache)(struct gk20a *g, struct channel_gk20a *ch);
350 int (*trigger_suspend)(struct gk20a *g);
351 int (*wait_for_pause)(struct gk20a *g, struct warpstate *w_state);
352 int (*resume_from_pause)(struct gk20a *g);
353 int (*clear_sm_errors)(struct gk20a *g);
354 u32 (*tpc_enabled_exceptions)(struct gk20a *g);
349 } gr; 355 } gr;
350 struct { 356 struct {
351 void (*init_hw)(struct gk20a *g); 357 void (*init_hw)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 3df19a4e..172931d7 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -9291,6 +9291,178 @@ static void gr_gk20a_split_ltc_broadcast_addr_stub(struct gk20a *g, u32 addr,
9291{ 9291{
9292} 9292}
9293 9293
9294int gr_gk20a_inval_icache(struct gk20a *g, struct channel_gk20a *ch)
9295{
9296 int err = 0;
9297 u32 cache_ctrl, regval;
9298 struct nvgpu_dbg_gpu_reg_op ops;
9299
9300 ops.op = REGOP(READ_32);
9301 ops.type = REGOP(TYPE_GR_CTX);
9302 ops.status = REGOP(STATUS_SUCCESS);
9303 ops.value_hi = 0;
9304 ops.and_n_mask_lo = 0;
9305 ops.and_n_mask_hi = 0;
9306 ops.offset = gr_pri_gpc0_gcc_dbg_r();
9307
9308 err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
9309 if (err) {
9310 gk20a_err(dev_from_gk20a(g), "Failed to read register");
9311 return err;
9312 }
9313
9314 regval = ops.value_lo;
9315
9316 ops.op = REGOP(WRITE_32);
9317 ops.value_lo = set_field(regval, gr_pri_gpcs_gcc_dbg_invalidate_m(), 1);
9318 err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0);
9319 if (err) {
9320 gk20a_err(dev_from_gk20a(g), "Failed to write register");
9321 return err;
9322 }
9323
9324 ops.op = REGOP(READ_32);
9325 ops.offset = gr_pri_gpc0_tpc0_sm_cache_control_r();
9326 err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
9327 if (err) {
9328 gk20a_err(dev_from_gk20a(g), "Failed to read register");
9329 return err;
9330 }
9331
9332 cache_ctrl = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_cache_control_r());
9333 cache_ctrl = set_field(cache_ctrl, gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(), 1);
9334 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cache_control_r(), cache_ctrl);
9335
9336 return 0;
9337}
9338
9339int gr_gk20a_trigger_suspend(struct gk20a *g)
9340{
9341 int err = 0;
9342 u32 dbgr_control0;
9343
9344 /* assert stop trigger. uniformity assumption: all SMs will have
9345 * the same state in dbg_control0. */
9346 dbgr_control0 =
9347 gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
9348 dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f();
9349
9350 /* broadcast write */
9351 gk20a_writel(g,
9352 gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
9353
9354 return err;
9355}
9356
9357int gr_gk20a_wait_for_pause(struct gk20a *g, struct warpstate *w_state)
9358{
9359 int err = 0;
9360 struct gr_gk20a *gr = &g->gr;
9361 u32 gpc, tpc, sm_id;
9362 u32 global_mask;
9363
9364 /* Wait for the SMs to reach full stop. This condition is:
9365 * 1) All SMs with valid warps must be in the trap handler (SM_IN_TRAP_MODE)
9366 * 2) All SMs in the trap handler must have equivalent VALID and PAUSED warp
9367 * masks.
9368 */
9369 global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() |
9370 gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() |
9371 gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
9372
9373 /* Lock down all SMs */
9374 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
9375
9376 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
9377 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
9378
9379 err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, false);
9380
9381 if (err) {
9382 gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n");
9383 return err;
9384 }
9385 }
9386
9387 /* Read the warp status */
9388 g->ops.gr.bpt_reg_info(g, w_state);
9389
9390 return 0;
9391}
9392
9393int gr_gk20a_resume_from_pause(struct gk20a *g)
9394{
9395 int err = 0;
9396
9397 /* Clear the pause mask to tell the GPU we want to resume everyone */
9398 gk20a_writel(g,
9399 gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(), 0);
9400
9401 /* explicitly re-enable forwarding of SM interrupts upon any resume */
9402 gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(),
9403 gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f());
9404
9405 /* Now resume all sms, write a 0 to the stop trigger
9406 * then a 1 to the run trigger */
9407 gk20a_resume_all_sms(g);
9408
9409 return err;
9410}
9411
9412int gr_gk20a_clear_sm_errors(struct gk20a *g)
9413{
9414 int ret = 0;
9415 u32 gpc_offset, tpc_offset, gpc, tpc;
9416 struct gr_gk20a *gr = &g->gr;
9417 u32 global_esr;
9418 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
9419 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
9420
9421 for (gpc = 0; gpc < gr->gpc_count; gpc++) {
9422
9423 gpc_offset = gpc_stride * gpc;
9424
9425 /* check if any tpc has an exception */
9426 for (tpc = 0; tpc < gr->tpc_count; tpc++) {
9427
9428 tpc_offset = tpc_in_gpc_stride * tpc;
9429
9430 global_esr = gk20a_readl(g,
9431 gr_gpc0_tpc0_sm_hww_global_esr_r() +
9432 gpc_offset + tpc_offset);
9433
9434 /* clear the hwws, also causes tpc and gpc
9435 * exceptions to be cleared */
9436 gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr);
9437 }
9438 }
9439
9440 return ret;
9441}
9442
9443u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g)
9444{
9445 struct gr_gk20a *gr = &g->gr;
9446 u32 sm_id, tpc_exception_en = 0;
9447 u32 offset, regval, tpc_offset, gpc_offset;
9448 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
9449 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
9450
9451 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
9452
9453 tpc_offset = tpc_in_gpc_stride * g->gr.sm_to_cluster[sm_id].tpc_index;
9454 gpc_offset = gpc_stride * g->gr.sm_to_cluster[sm_id].gpc_index;
9455 offset = tpc_offset + gpc_offset;
9456
9457 regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() +
9458 offset);
9459 /* Each bit represents corresponding enablement state, bit 0 corrsponds to SM0 */
9460 tpc_exception_en |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id;
9461 }
9462
9463 return tpc_exception_en;
9464}
9465
9294void gk20a_init_gr_ops(struct gpu_ops *gops) 9466void gk20a_init_gr_ops(struct gpu_ops *gops)
9295{ 9467{
9296 gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg; 9468 gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg;
@@ -9376,4 +9548,10 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
9376 gops->gr.write_zcull_ptr = gr_gk20a_write_zcull_ptr; 9548 gops->gr.write_zcull_ptr = gr_gk20a_write_zcull_ptr;
9377 gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr; 9549 gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr;
9378 gops->gr.init_elcg_mode = gr_gk20a_init_elcg_mode; 9550 gops->gr.init_elcg_mode = gr_gk20a_init_elcg_mode;
9551 gops->gr.inval_icache = gr_gk20a_inval_icache;
9552 gops->gr.trigger_suspend = gr_gk20a_trigger_suspend;
9553 gops->gr.wait_for_pause = gr_gk20a_wait_for_pause;
9554 gops->gr.resume_from_pause = gr_gk20a_resume_from_pause;
9555 gops->gr.clear_sm_errors = gr_gk20a_clear_sm_errors;
9556 gops->gr.tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions;
9379} 9557}
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 2dd1eaf5..33721f08 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -704,6 +704,12 @@ int gr_gk20a_resume_contexts(struct gk20a *g,
704 struct dbg_session_gk20a *dbg_s, 704 struct dbg_session_gk20a *dbg_s,
705 int *ctx_resident_ch_fd); 705 int *ctx_resident_ch_fd);
706void gk20a_gr_enable_gpc_exceptions(struct gk20a *g); 706void gk20a_gr_enable_gpc_exceptions(struct gk20a *g);
707int gr_gk20a_inval_icache(struct gk20a *g, struct channel_gk20a *ch);
708int gr_gk20a_trigger_suspend(struct gk20a *g);
709int gr_gk20a_wait_for_pause(struct gk20a *g, struct warpstate *w_state);
710int gr_gk20a_resume_from_pause(struct gk20a *g);
711int gr_gk20a_clear_sm_errors(struct gk20a *g);
712u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g);
707 713
708int gr_gk20a_commit_global_timeslice(struct gk20a *g, 714int gr_gk20a_commit_global_timeslice(struct gk20a *g,
709 struct channel_gk20a *c, bool patch); 715 struct channel_gk20a *c, bool patch);
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 4f4b8d4a..a43fcdab 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1609,5 +1609,10 @@ void gm20b_init_gr(struct gpu_ops *gops)
1609 gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr; 1609 gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr;
1610 gops->gr.init_elcg_mode = gr_gk20a_init_elcg_mode; 1610 gops->gr.init_elcg_mode = gr_gk20a_init_elcg_mode;
1611 gops->gr.load_tpc_mask = gr_gm20b_load_tpc_mask; 1611 gops->gr.load_tpc_mask = gr_gm20b_load_tpc_mask;
1612 1612 gops->gr.inval_icache = gr_gk20a_inval_icache;
1613 gops->gr.trigger_suspend = gr_gk20a_trigger_suspend;
1614 gops->gr.wait_for_pause = gr_gk20a_wait_for_pause;
1615 gops->gr.resume_from_pause = gr_gk20a_resume_from_pause;
1616 gops->gr.clear_sm_errors = gr_gk20a_clear_sm_errors;
1617 gops->gr.tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions;
1613} 1618}