diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2017-03-23 14:03:15 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-03-29 12:15:59 -0400 |
commit | 4022b989aa2e91fe77ed52df49d45838f6d8b9bb (patch) | |
tree | 4c8240ac83887c21db902a255306c67041c4525c | |
parent | f04031e5e8837abb2be3feb0ee30e1af54de7845 (diff) |
gpu: nvgpu: Remove direct HW access from ctrl_gk20a.c
ctrl_gk20a.c had some direct accesses to hardware. These violate the
HAL rules, because we don't have per-GPU ctrl, and thus the code
cannot be made GPU independent.
Move all GR accesses to new GR HALs and use existing bus HAL for
accessing timer. Remove #includes of all hardware headers.
JIRA NVGPU-28
Change-Id: I57e67519f62e9bd6c3e725e1bef6e366190f5834
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1327001
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | 178 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 178 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 7 |
5 files changed, 214 insertions, 161 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 97125a99..5661b402 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | |||
@@ -24,14 +24,7 @@ | |||
24 | #include <linux/delay.h> | 24 | #include <linux/delay.h> |
25 | 25 | ||
26 | #include "gk20a.h" | 26 | #include "gk20a.h" |
27 | #include "gr_gk20a.h" | ||
28 | #include "fence_gk20a.h" | 27 | #include "fence_gk20a.h" |
29 | #include "regops_gk20a.h" | ||
30 | |||
31 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> | ||
32 | #include <nvgpu/hw/gk20a/hw_fb_gk20a.h> | ||
33 | #include <nvgpu/hw/gk20a/hw_timer_gk20a.h> | ||
34 | |||
35 | 28 | ||
36 | #define HZ_TO_MHZ(a) ((a > 0xF414F9CD7) ? 0xffff : (a >> 32) ? \ | 29 | #define HZ_TO_MHZ(a) ((a > 0xF414F9CD7) ? 0xffff : (a >> 32) ? \ |
37 | (u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ)) | 30 | (u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ)) |
@@ -342,47 +335,16 @@ static int nvgpu_gpu_ioctl_inval_icache( | |||
342 | struct gk20a *g, | 335 | struct gk20a *g, |
343 | struct nvgpu_gpu_inval_icache_args *args) | 336 | struct nvgpu_gpu_inval_icache_args *args) |
344 | { | 337 | { |
345 | |||
346 | int err = 0; | ||
347 | u32 cache_ctrl, regval; | ||
348 | struct channel_gk20a *ch; | 338 | struct channel_gk20a *ch; |
349 | struct nvgpu_dbg_gpu_reg_op ops; | 339 | int err; |
350 | 340 | ||
351 | ch = gk20a_get_channel_from_file(args->channel_fd); | 341 | ch = gk20a_get_channel_from_file(args->channel_fd); |
352 | if (!ch) | 342 | if (!ch) |
353 | return -EINVAL; | 343 | return -EINVAL; |
354 | 344 | ||
355 | ops.op = REGOP(READ_32); | ||
356 | ops.type = REGOP(TYPE_GR_CTX); | ||
357 | ops.status = REGOP(STATUS_SUCCESS); | ||
358 | ops.value_hi = 0; | ||
359 | ops.and_n_mask_lo = 0; | ||
360 | ops.and_n_mask_hi = 0; | ||
361 | ops.offset = gr_pri_gpc0_gcc_dbg_r(); | ||
362 | |||
363 | /* Take the global lock, since we'll be doing global regops */ | 345 | /* Take the global lock, since we'll be doing global regops */ |
364 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | 346 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); |
365 | 347 | err = g->ops.gr.inval_icache(g, ch); | |
366 | err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1); | ||
367 | |||
368 | regval = ops.value_lo; | ||
369 | |||
370 | if (!err) { | ||
371 | ops.op = REGOP(WRITE_32); | ||
372 | ops.value_lo = set_field(regval, gr_pri_gpcs_gcc_dbg_invalidate_m(), 1); | ||
373 | err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0); | ||
374 | } | ||
375 | |||
376 | if (err) { | ||
377 | gk20a_err(dev_from_gk20a(g), "Failed to access register\n"); | ||
378 | goto end; | ||
379 | } | ||
380 | |||
381 | cache_ctrl = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_cache_control_r()); | ||
382 | cache_ctrl = set_field(cache_ctrl, gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(), 1); | ||
383 | gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cache_control_r(), cache_ctrl); | ||
384 | |||
385 | end: | ||
386 | nvgpu_mutex_release(&g->dbg_sessions_lock); | 348 | nvgpu_mutex_release(&g->dbg_sessions_lock); |
387 | return err; | 349 | return err; |
388 | } | 350 | } |
@@ -428,20 +390,10 @@ static int nvgpu_gpu_ioctl_set_debug_mode( | |||
428 | 390 | ||
429 | static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g) | 391 | static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g) |
430 | { | 392 | { |
431 | int err = 0; | 393 | int err; |
432 | u32 dbgr_control0; | ||
433 | 394 | ||
434 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | 395 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); |
435 | /* assert stop trigger. uniformity assumption: all SMs will have | 396 | err = g->ops.gr.trigger_suspend(g); |
436 | * the same state in dbg_control0. */ | ||
437 | dbgr_control0 = | ||
438 | gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r()); | ||
439 | dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f(); | ||
440 | |||
441 | /* broadcast write */ | ||
442 | gk20a_writel(g, | ||
443 | gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0); | ||
444 | |||
445 | nvgpu_mutex_release(&g->dbg_sessions_lock); | 397 | nvgpu_mutex_release(&g->dbg_sessions_lock); |
446 | return err; | 398 | return err; |
447 | } | 399 | } |
@@ -451,41 +403,16 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g, | |||
451 | { | 403 | { |
452 | int err = 0; | 404 | int err = 0; |
453 | struct warpstate *w_state; | 405 | struct warpstate *w_state; |
454 | struct gr_gk20a *gr = &g->gr; | 406 | u32 sm_count, size; |
455 | u32 gpc, tpc, sm_count, sm_id, size; | ||
456 | u32 global_mask; | ||
457 | 407 | ||
458 | sm_count = g->gr.gpc_count * g->gr.tpc_count; | 408 | sm_count = g->gr.gpc_count * g->gr.tpc_count; |
459 | size = sm_count * sizeof(struct warpstate); | 409 | size = sm_count * sizeof(struct warpstate); |
460 | w_state = kzalloc(size, GFP_KERNEL); | 410 | w_state = kzalloc(size, GFP_KERNEL); |
461 | 411 | if (!w_state) | |
462 | /* Wait for the SMs to reach full stop. This condition is: | 412 | return -ENOMEM; |
463 | * 1) All SMs with valid warps must be in the trap handler (SM_IN_TRAP_MODE) | ||
464 | * 2) All SMs in the trap handler must have equivalent VALID and PAUSED warp | ||
465 | * masks. | ||
466 | */ | ||
467 | global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() | | ||
468 | gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() | | ||
469 | gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f(); | ||
470 | 413 | ||
471 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | 414 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); |
472 | 415 | g->ops.gr.wait_for_pause(g, w_state); | |
473 | /* Lock down all SMs */ | ||
474 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
475 | |||
476 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
477 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
478 | |||
479 | err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, false); | ||
480 | |||
481 | if (err) { | ||
482 | gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n"); | ||
483 | goto end; | ||
484 | } | ||
485 | } | ||
486 | |||
487 | /* Read the warp status */ | ||
488 | g->ops.gr.bpt_reg_info(g, w_state); | ||
489 | 416 | ||
490 | /* Copy to user space - pointed by "args->pwarpstate" */ | 417 | /* Copy to user space - pointed by "args->pwarpstate" */ |
491 | if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) { | 418 | if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) { |
@@ -493,7 +420,6 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g, | |||
493 | err = -EFAULT; | 420 | err = -EFAULT; |
494 | } | 421 | } |
495 | 422 | ||
496 | end: | ||
497 | nvgpu_mutex_release(&g->dbg_sessions_lock); | 423 | nvgpu_mutex_release(&g->dbg_sessions_lock); |
498 | kfree(w_state); | 424 | kfree(w_state); |
499 | return err; | 425 | return err; |
@@ -504,82 +430,29 @@ static int nvgpu_gpu_ioctl_resume_from_pause(struct gk20a *g) | |||
504 | int err = 0; | 430 | int err = 0; |
505 | 431 | ||
506 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | 432 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); |
507 | 433 | err = g->ops.gr.resume_from_pause(g); | |
508 | /* Clear the pause mask to tell the GPU we want to resume everyone */ | ||
509 | gk20a_writel(g, | ||
510 | gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(), 0); | ||
511 | |||
512 | /* explicitly re-enable forwarding of SM interrupts upon any resume */ | ||
513 | gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(), | ||
514 | gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f()); | ||
515 | |||
516 | /* Now resume all sms, write a 0 to the stop trigger | ||
517 | * then a 1 to the run trigger */ | ||
518 | gk20a_resume_all_sms(g); | ||
519 | |||
520 | nvgpu_mutex_release(&g->dbg_sessions_lock); | 434 | nvgpu_mutex_release(&g->dbg_sessions_lock); |
521 | return err; | 435 | return err; |
522 | } | 436 | } |
523 | 437 | ||
524 | static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g) | 438 | static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g) |
525 | { | 439 | { |
526 | int ret = 0; | 440 | return g->ops.gr.clear_sm_errors(g); |
527 | u32 gpc_offset, tpc_offset, gpc, tpc; | ||
528 | struct gr_gk20a *gr = &g->gr; | ||
529 | u32 global_esr; | ||
530 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
531 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
532 | |||
533 | for (gpc = 0; gpc < gr->gpc_count; gpc++) { | ||
534 | |||
535 | gpc_offset = gpc_stride * gpc; | ||
536 | |||
537 | /* check if any tpc has an exception */ | ||
538 | for (tpc = 0; tpc < gr->tpc_count; tpc++) { | ||
539 | |||
540 | tpc_offset = tpc_in_gpc_stride * tpc; | ||
541 | |||
542 | global_esr = gk20a_readl(g, | ||
543 | gr_gpc0_tpc0_sm_hww_global_esr_r() + | ||
544 | gpc_offset + tpc_offset); | ||
545 | |||
546 | /* clear the hwws, also causes tpc and gpc | ||
547 | * exceptions to be cleared */ | ||
548 | gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr); | ||
549 | } | ||
550 | } | ||
551 | |||
552 | return ret; | ||
553 | } | 441 | } |
554 | 442 | ||
555 | static int nvgpu_gpu_ioctl_has_any_exception( | 443 | static int nvgpu_gpu_ioctl_has_any_exception( |
556 | struct gk20a *g, | 444 | struct gk20a *g, |
557 | struct nvgpu_gpu_tpc_exception_en_status_args *args) | 445 | struct nvgpu_gpu_tpc_exception_en_status_args *args) |
558 | { | 446 | { |
559 | int err = 0; | 447 | u32 tpc_exception_en; |
560 | struct gr_gk20a *gr = &g->gr; | ||
561 | u32 sm_id, tpc_exception_en = 0; | ||
562 | u32 offset, regval, tpc_offset, gpc_offset; | ||
563 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
564 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
565 | 448 | ||
566 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | 449 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); |
567 | 450 | tpc_exception_en = g->ops.gr.tpc_enabled_exceptions(g); | |
568 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
569 | |||
570 | tpc_offset = tpc_in_gpc_stride * g->gr.sm_to_cluster[sm_id].tpc_index; | ||
571 | gpc_offset = gpc_stride * g->gr.sm_to_cluster[sm_id].gpc_index; | ||
572 | offset = tpc_offset + gpc_offset; | ||
573 | |||
574 | regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() + | ||
575 | offset); | ||
576 | /* Each bit represents corresponding enablement state, bit 0 corrsponds to SM0 */ | ||
577 | tpc_exception_en |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id; | ||
578 | } | ||
579 | |||
580 | nvgpu_mutex_release(&g->dbg_sessions_lock); | 451 | nvgpu_mutex_release(&g->dbg_sessions_lock); |
452 | |||
581 | args->tpc_exception_en_sm_mask = tpc_exception_en; | 453 | args->tpc_exception_en_sm_mask = tpc_exception_en; |
582 | return err; | 454 | |
455 | return 0; | ||
583 | } | 456 | } |
584 | 457 | ||
585 | static int gk20a_ctrl_get_num_vsms(struct gk20a *g, | 458 | static int gk20a_ctrl_get_num_vsms(struct gk20a *g, |
@@ -648,8 +521,6 @@ static inline int get_timestamps_zipper(struct gk20a *g, | |||
648 | { | 521 | { |
649 | int err = 0; | 522 | int err = 0; |
650 | unsigned int i = 0; | 523 | unsigned int i = 0; |
651 | u32 gpu_timestamp_hi_new = 0; | ||
652 | u32 gpu_timestamp_hi_old = 0; | ||
653 | 524 | ||
654 | if (gk20a_busy(g)) { | 525 | if (gk20a_busy(g)) { |
655 | gk20a_err(dev_from_gk20a(g), "GPU not powered on\n"); | 526 | gk20a_err(dev_from_gk20a(g), "GPU not powered on\n"); |
@@ -657,25 +528,12 @@ static inline int get_timestamps_zipper(struct gk20a *g, | |||
657 | goto end; | 528 | goto end; |
658 | } | 529 | } |
659 | 530 | ||
660 | /* get zipper reads of gpu and cpu counter values */ | ||
661 | gpu_timestamp_hi_old = gk20a_readl(g, timer_time_1_r()); | ||
662 | for (i = 0; i < args->count; i++) { | 531 | for (i = 0; i < args->count; i++) { |
663 | u32 gpu_timestamp_lo = 0; | 532 | err = g->ops.bus.read_ptimer(g, &args->samples[i].gpu_timestamp); |
664 | u32 gpu_timestamp_hi = 0; | 533 | if (err) |
534 | return err; | ||
665 | 535 | ||
666 | gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r()); | ||
667 | args->samples[i].cpu_timestamp = get_cpu_timestamp(); | 536 | args->samples[i].cpu_timestamp = get_cpu_timestamp(); |
668 | rmb(); /* maintain zipper read order */ | ||
669 | gpu_timestamp_hi_new = gk20a_readl(g, timer_time_1_r()); | ||
670 | |||
671 | /* pick the appropriate gpu counter hi bits */ | ||
672 | gpu_timestamp_hi = (gpu_timestamp_lo & (1L << 31)) ? | ||
673 | gpu_timestamp_hi_old : gpu_timestamp_hi_new; | ||
674 | |||
675 | args->samples[i].gpu_timestamp = | ||
676 | ((u64)gpu_timestamp_hi << 32) | (u64)gpu_timestamp_lo; | ||
677 | |||
678 | gpu_timestamp_hi_old = gpu_timestamp_hi_new; | ||
679 | } | 537 | } |
680 | 538 | ||
681 | end: | 539 | end: |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 4f50ae36..951c8267 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -346,6 +346,12 @@ struct gpu_ops { | |||
346 | struct mem_desc *mem, u64 gpu_va); | 346 | struct mem_desc *mem, u64 gpu_va); |
347 | void (*init_elcg_mode)(struct gk20a *g, u32 mode, u32 engine); | 347 | void (*init_elcg_mode)(struct gk20a *g, u32 mode, u32 engine); |
348 | void (*load_tpc_mask)(struct gk20a *g); | 348 | void (*load_tpc_mask)(struct gk20a *g); |
349 | int (*inval_icache)(struct gk20a *g, struct channel_gk20a *ch); | ||
350 | int (*trigger_suspend)(struct gk20a *g); | ||
351 | int (*wait_for_pause)(struct gk20a *g, struct warpstate *w_state); | ||
352 | int (*resume_from_pause)(struct gk20a *g); | ||
353 | int (*clear_sm_errors)(struct gk20a *g); | ||
354 | u32 (*tpc_enabled_exceptions)(struct gk20a *g); | ||
349 | } gr; | 355 | } gr; |
350 | struct { | 356 | struct { |
351 | void (*init_hw)(struct gk20a *g); | 357 | void (*init_hw)(struct gk20a *g); |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 3df19a4e..172931d7 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -9291,6 +9291,178 @@ static void gr_gk20a_split_ltc_broadcast_addr_stub(struct gk20a *g, u32 addr, | |||
9291 | { | 9291 | { |
9292 | } | 9292 | } |
9293 | 9293 | ||
9294 | int gr_gk20a_inval_icache(struct gk20a *g, struct channel_gk20a *ch) | ||
9295 | { | ||
9296 | int err = 0; | ||
9297 | u32 cache_ctrl, regval; | ||
9298 | struct nvgpu_dbg_gpu_reg_op ops; | ||
9299 | |||
9300 | ops.op = REGOP(READ_32); | ||
9301 | ops.type = REGOP(TYPE_GR_CTX); | ||
9302 | ops.status = REGOP(STATUS_SUCCESS); | ||
9303 | ops.value_hi = 0; | ||
9304 | ops.and_n_mask_lo = 0; | ||
9305 | ops.and_n_mask_hi = 0; | ||
9306 | ops.offset = gr_pri_gpc0_gcc_dbg_r(); | ||
9307 | |||
9308 | err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1); | ||
9309 | if (err) { | ||
9310 | gk20a_err(dev_from_gk20a(g), "Failed to read register"); | ||
9311 | return err; | ||
9312 | } | ||
9313 | |||
9314 | regval = ops.value_lo; | ||
9315 | |||
9316 | ops.op = REGOP(WRITE_32); | ||
9317 | ops.value_lo = set_field(regval, gr_pri_gpcs_gcc_dbg_invalidate_m(), 1); | ||
9318 | err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0); | ||
9319 | if (err) { | ||
9320 | gk20a_err(dev_from_gk20a(g), "Failed to write register"); | ||
9321 | return err; | ||
9322 | } | ||
9323 | |||
9324 | ops.op = REGOP(READ_32); | ||
9325 | ops.offset = gr_pri_gpc0_tpc0_sm_cache_control_r(); | ||
9326 | err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1); | ||
9327 | if (err) { | ||
9328 | gk20a_err(dev_from_gk20a(g), "Failed to read register"); | ||
9329 | return err; | ||
9330 | } | ||
9331 | |||
9332 | cache_ctrl = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_cache_control_r()); | ||
9333 | cache_ctrl = set_field(cache_ctrl, gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(), 1); | ||
9334 | gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cache_control_r(), cache_ctrl); | ||
9335 | |||
9336 | return 0; | ||
9337 | } | ||
9338 | |||
9339 | int gr_gk20a_trigger_suspend(struct gk20a *g) | ||
9340 | { | ||
9341 | int err = 0; | ||
9342 | u32 dbgr_control0; | ||
9343 | |||
9344 | /* assert stop trigger. uniformity assumption: all SMs will have | ||
9345 | * the same state in dbg_control0. */ | ||
9346 | dbgr_control0 = | ||
9347 | gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r()); | ||
9348 | dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f(); | ||
9349 | |||
9350 | /* broadcast write */ | ||
9351 | gk20a_writel(g, | ||
9352 | gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0); | ||
9353 | |||
9354 | return err; | ||
9355 | } | ||
9356 | |||
9357 | int gr_gk20a_wait_for_pause(struct gk20a *g, struct warpstate *w_state) | ||
9358 | { | ||
9359 | int err = 0; | ||
9360 | struct gr_gk20a *gr = &g->gr; | ||
9361 | u32 gpc, tpc, sm_id; | ||
9362 | u32 global_mask; | ||
9363 | |||
9364 | /* Wait for the SMs to reach full stop. This condition is: | ||
9365 | * 1) All SMs with valid warps must be in the trap handler (SM_IN_TRAP_MODE) | ||
9366 | * 2) All SMs in the trap handler must have equivalent VALID and PAUSED warp | ||
9367 | * masks. | ||
9368 | */ | ||
9369 | global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() | | ||
9370 | gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() | | ||
9371 | gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f(); | ||
9372 | |||
9373 | /* Lock down all SMs */ | ||
9374 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
9375 | |||
9376 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
9377 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
9378 | |||
9379 | err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, false); | ||
9380 | |||
9381 | if (err) { | ||
9382 | gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n"); | ||
9383 | return err; | ||
9384 | } | ||
9385 | } | ||
9386 | |||
9387 | /* Read the warp status */ | ||
9388 | g->ops.gr.bpt_reg_info(g, w_state); | ||
9389 | |||
9390 | return 0; | ||
9391 | } | ||
9392 | |||
9393 | int gr_gk20a_resume_from_pause(struct gk20a *g) | ||
9394 | { | ||
9395 | int err = 0; | ||
9396 | |||
9397 | /* Clear the pause mask to tell the GPU we want to resume everyone */ | ||
9398 | gk20a_writel(g, | ||
9399 | gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(), 0); | ||
9400 | |||
9401 | /* explicitly re-enable forwarding of SM interrupts upon any resume */ | ||
9402 | gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(), | ||
9403 | gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f()); | ||
9404 | |||
9405 | /* Now resume all sms, write a 0 to the stop trigger | ||
9406 | * then a 1 to the run trigger */ | ||
9407 | gk20a_resume_all_sms(g); | ||
9408 | |||
9409 | return err; | ||
9410 | } | ||
9411 | |||
9412 | int gr_gk20a_clear_sm_errors(struct gk20a *g) | ||
9413 | { | ||
9414 | int ret = 0; | ||
9415 | u32 gpc_offset, tpc_offset, gpc, tpc; | ||
9416 | struct gr_gk20a *gr = &g->gr; | ||
9417 | u32 global_esr; | ||
9418 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
9419 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
9420 | |||
9421 | for (gpc = 0; gpc < gr->gpc_count; gpc++) { | ||
9422 | |||
9423 | gpc_offset = gpc_stride * gpc; | ||
9424 | |||
9425 | /* check if any tpc has an exception */ | ||
9426 | for (tpc = 0; tpc < gr->tpc_count; tpc++) { | ||
9427 | |||
9428 | tpc_offset = tpc_in_gpc_stride * tpc; | ||
9429 | |||
9430 | global_esr = gk20a_readl(g, | ||
9431 | gr_gpc0_tpc0_sm_hww_global_esr_r() + | ||
9432 | gpc_offset + tpc_offset); | ||
9433 | |||
9434 | /* clear the hwws, also causes tpc and gpc | ||
9435 | * exceptions to be cleared */ | ||
9436 | gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr); | ||
9437 | } | ||
9438 | } | ||
9439 | |||
9440 | return ret; | ||
9441 | } | ||
9442 | |||
9443 | u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g) | ||
9444 | { | ||
9445 | struct gr_gk20a *gr = &g->gr; | ||
9446 | u32 sm_id, tpc_exception_en = 0; | ||
9447 | u32 offset, regval, tpc_offset, gpc_offset; | ||
9448 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
9449 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
9450 | |||
9451 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
9452 | |||
9453 | tpc_offset = tpc_in_gpc_stride * g->gr.sm_to_cluster[sm_id].tpc_index; | ||
9454 | gpc_offset = gpc_stride * g->gr.sm_to_cluster[sm_id].gpc_index; | ||
9455 | offset = tpc_offset + gpc_offset; | ||
9456 | |||
9457 | regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() + | ||
9458 | offset); | ||
9459 | /* Each bit represents corresponding enablement state, bit 0 corrsponds to SM0 */ | ||
9460 | tpc_exception_en |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id; | ||
9461 | } | ||
9462 | |||
9463 | return tpc_exception_en; | ||
9464 | } | ||
9465 | |||
9294 | void gk20a_init_gr_ops(struct gpu_ops *gops) | 9466 | void gk20a_init_gr_ops(struct gpu_ops *gops) |
9295 | { | 9467 | { |
9296 | gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg; | 9468 | gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg; |
@@ -9376,4 +9548,10 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) | |||
9376 | gops->gr.write_zcull_ptr = gr_gk20a_write_zcull_ptr; | 9548 | gops->gr.write_zcull_ptr = gr_gk20a_write_zcull_ptr; |
9377 | gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr; | 9549 | gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr; |
9378 | gops->gr.init_elcg_mode = gr_gk20a_init_elcg_mode; | 9550 | gops->gr.init_elcg_mode = gr_gk20a_init_elcg_mode; |
9551 | gops->gr.inval_icache = gr_gk20a_inval_icache; | ||
9552 | gops->gr.trigger_suspend = gr_gk20a_trigger_suspend; | ||
9553 | gops->gr.wait_for_pause = gr_gk20a_wait_for_pause; | ||
9554 | gops->gr.resume_from_pause = gr_gk20a_resume_from_pause; | ||
9555 | gops->gr.clear_sm_errors = gr_gk20a_clear_sm_errors; | ||
9556 | gops->gr.tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions; | ||
9379 | } | 9557 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 2dd1eaf5..33721f08 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -704,6 +704,12 @@ int gr_gk20a_resume_contexts(struct gk20a *g, | |||
704 | struct dbg_session_gk20a *dbg_s, | 704 | struct dbg_session_gk20a *dbg_s, |
705 | int *ctx_resident_ch_fd); | 705 | int *ctx_resident_ch_fd); |
706 | void gk20a_gr_enable_gpc_exceptions(struct gk20a *g); | 706 | void gk20a_gr_enable_gpc_exceptions(struct gk20a *g); |
707 | int gr_gk20a_inval_icache(struct gk20a *g, struct channel_gk20a *ch); | ||
708 | int gr_gk20a_trigger_suspend(struct gk20a *g); | ||
709 | int gr_gk20a_wait_for_pause(struct gk20a *g, struct warpstate *w_state); | ||
710 | int gr_gk20a_resume_from_pause(struct gk20a *g); | ||
711 | int gr_gk20a_clear_sm_errors(struct gk20a *g); | ||
712 | u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g); | ||
707 | 713 | ||
708 | int gr_gk20a_commit_global_timeslice(struct gk20a *g, | 714 | int gr_gk20a_commit_global_timeslice(struct gk20a *g, |
709 | struct channel_gk20a *c, bool patch); | 715 | struct channel_gk20a *c, bool patch); |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 4f4b8d4a..a43fcdab 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -1609,5 +1609,10 @@ void gm20b_init_gr(struct gpu_ops *gops) | |||
1609 | gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr; | 1609 | gops->gr.write_pm_ptr = gr_gk20a_write_pm_ptr; |
1610 | gops->gr.init_elcg_mode = gr_gk20a_init_elcg_mode; | 1610 | gops->gr.init_elcg_mode = gr_gk20a_init_elcg_mode; |
1611 | gops->gr.load_tpc_mask = gr_gm20b_load_tpc_mask; | 1611 | gops->gr.load_tpc_mask = gr_gm20b_load_tpc_mask; |
1612 | 1612 | gops->gr.inval_icache = gr_gk20a_inval_icache; | |
1613 | gops->gr.trigger_suspend = gr_gk20a_trigger_suspend; | ||
1614 | gops->gr.wait_for_pause = gr_gk20a_wait_for_pause; | ||
1615 | gops->gr.resume_from_pause = gr_gk20a_resume_from_pause; | ||
1616 | gops->gr.clear_sm_errors = gr_gk20a_clear_sm_errors; | ||
1617 | gops->gr.tpc_enabled_exceptions = gr_gk20a_tpc_enabled_exceptions; | ||
1613 | } | 1618 | } |