summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2017-03-23 14:03:15 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-03-29 12:15:59 -0400
commit4022b989aa2e91fe77ed52df49d45838f6d8b9bb (patch)
tree4c8240ac83887c21db902a255306c67041c4525c /drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
parentf04031e5e8837abb2be3feb0ee30e1af54de7845 (diff)
gpu: nvgpu: Remove direct HW access from ctrl_gk20a.c
ctrl_gk20a.c had some direct accesses to hardware. These violate the HAL rules, because we don't have per-GPU ctrl, and thus the code cannot be made GPU independent. Move all GR accesses to new GR HALs and use existing bus HAL for accessing timer. Remove #includes of all hardware headers. JIRA NVGPU-28 Change-Id: I57e67519f62e9bd6c3e725e1bef6e366190f5834 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/1327001 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c178
1 files changed, 18 insertions, 160 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 97125a99..5661b402 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -24,14 +24,7 @@
24#include <linux/delay.h> 24#include <linux/delay.h>
25 25
26#include "gk20a.h" 26#include "gk20a.h"
27#include "gr_gk20a.h"
28#include "fence_gk20a.h" 27#include "fence_gk20a.h"
29#include "regops_gk20a.h"
30
31#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
32#include <nvgpu/hw/gk20a/hw_fb_gk20a.h>
33#include <nvgpu/hw/gk20a/hw_timer_gk20a.h>
34
35 28
36#define HZ_TO_MHZ(a) ((a > 0xF414F9CD7) ? 0xffff : (a >> 32) ? \ 29#define HZ_TO_MHZ(a) ((a > 0xF414F9CD7) ? 0xffff : (a >> 32) ? \
37 (u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ)) 30 (u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ))
@@ -342,47 +335,16 @@ static int nvgpu_gpu_ioctl_inval_icache(
342 struct gk20a *g, 335 struct gk20a *g,
343 struct nvgpu_gpu_inval_icache_args *args) 336 struct nvgpu_gpu_inval_icache_args *args)
344{ 337{
345
346 int err = 0;
347 u32 cache_ctrl, regval;
348 struct channel_gk20a *ch; 338 struct channel_gk20a *ch;
349 struct nvgpu_dbg_gpu_reg_op ops; 339 int err;
350 340
351 ch = gk20a_get_channel_from_file(args->channel_fd); 341 ch = gk20a_get_channel_from_file(args->channel_fd);
352 if (!ch) 342 if (!ch)
353 return -EINVAL; 343 return -EINVAL;
354 344
355 ops.op = REGOP(READ_32);
356 ops.type = REGOP(TYPE_GR_CTX);
357 ops.status = REGOP(STATUS_SUCCESS);
358 ops.value_hi = 0;
359 ops.and_n_mask_lo = 0;
360 ops.and_n_mask_hi = 0;
361 ops.offset = gr_pri_gpc0_gcc_dbg_r();
362
363 /* Take the global lock, since we'll be doing global regops */ 345 /* Take the global lock, since we'll be doing global regops */
364 nvgpu_mutex_acquire(&g->dbg_sessions_lock); 346 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
365 347 err = g->ops.gr.inval_icache(g, ch);
366 err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
367
368 regval = ops.value_lo;
369
370 if (!err) {
371 ops.op = REGOP(WRITE_32);
372 ops.value_lo = set_field(regval, gr_pri_gpcs_gcc_dbg_invalidate_m(), 1);
373 err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0);
374 }
375
376 if (err) {
377 gk20a_err(dev_from_gk20a(g), "Failed to access register\n");
378 goto end;
379 }
380
381 cache_ctrl = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_cache_control_r());
382 cache_ctrl = set_field(cache_ctrl, gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(), 1);
383 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cache_control_r(), cache_ctrl);
384
385end:
386 nvgpu_mutex_release(&g->dbg_sessions_lock); 348 nvgpu_mutex_release(&g->dbg_sessions_lock);
387 return err; 349 return err;
388} 350}
@@ -428,20 +390,10 @@ static int nvgpu_gpu_ioctl_set_debug_mode(
428 390
429static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g) 391static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g)
430{ 392{
431 int err = 0; 393 int err;
432 u32 dbgr_control0;
433 394
434 nvgpu_mutex_acquire(&g->dbg_sessions_lock); 395 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
435 /* assert stop trigger. uniformity assumption: all SMs will have 396 err = g->ops.gr.trigger_suspend(g);
436 * the same state in dbg_control0. */
437 dbgr_control0 =
438 gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r());
439 dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f();
440
441 /* broadcast write */
442 gk20a_writel(g,
443 gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0);
444
445 nvgpu_mutex_release(&g->dbg_sessions_lock); 397 nvgpu_mutex_release(&g->dbg_sessions_lock);
446 return err; 398 return err;
447} 399}
@@ -451,41 +403,16 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
451{ 403{
452 int err = 0; 404 int err = 0;
453 struct warpstate *w_state; 405 struct warpstate *w_state;
454 struct gr_gk20a *gr = &g->gr; 406 u32 sm_count, size;
455 u32 gpc, tpc, sm_count, sm_id, size;
456 u32 global_mask;
457 407
458 sm_count = g->gr.gpc_count * g->gr.tpc_count; 408 sm_count = g->gr.gpc_count * g->gr.tpc_count;
459 size = sm_count * sizeof(struct warpstate); 409 size = sm_count * sizeof(struct warpstate);
460 w_state = kzalloc(size, GFP_KERNEL); 410 w_state = kzalloc(size, GFP_KERNEL);
461 411 if (!w_state)
462 /* Wait for the SMs to reach full stop. This condition is: 412 return -ENOMEM;
463 * 1) All SMs with valid warps must be in the trap handler (SM_IN_TRAP_MODE)
464 * 2) All SMs in the trap handler must have equivalent VALID and PAUSED warp
465 * masks.
466 */
467 global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() |
468 gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() |
469 gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
470 413
471 nvgpu_mutex_acquire(&g->dbg_sessions_lock); 414 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
472 415 g->ops.gr.wait_for_pause(g, w_state);
473 /* Lock down all SMs */
474 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
475
476 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
477 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
478
479 err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, false);
480
481 if (err) {
482 gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n");
483 goto end;
484 }
485 }
486
487 /* Read the warp status */
488 g->ops.gr.bpt_reg_info(g, w_state);
489 416
490 /* Copy to user space - pointed by "args->pwarpstate" */ 417 /* Copy to user space - pointed by "args->pwarpstate" */
491 if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) { 418 if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) {
@@ -493,7 +420,6 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g,
493 err = -EFAULT; 420 err = -EFAULT;
494 } 421 }
495 422
496end:
497 nvgpu_mutex_release(&g->dbg_sessions_lock); 423 nvgpu_mutex_release(&g->dbg_sessions_lock);
498 kfree(w_state); 424 kfree(w_state);
499 return err; 425 return err;
@@ -504,82 +430,29 @@ static int nvgpu_gpu_ioctl_resume_from_pause(struct gk20a *g)
504 int err = 0; 430 int err = 0;
505 431
506 nvgpu_mutex_acquire(&g->dbg_sessions_lock); 432 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
507 433 err = g->ops.gr.resume_from_pause(g);
508 /* Clear the pause mask to tell the GPU we want to resume everyone */
509 gk20a_writel(g,
510 gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(), 0);
511
512 /* explicitly re-enable forwarding of SM interrupts upon any resume */
513 gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(),
514 gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f());
515
516 /* Now resume all sms, write a 0 to the stop trigger
517 * then a 1 to the run trigger */
518 gk20a_resume_all_sms(g);
519
520 nvgpu_mutex_release(&g->dbg_sessions_lock); 434 nvgpu_mutex_release(&g->dbg_sessions_lock);
521 return err; 435 return err;
522} 436}
523 437
524static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g) 438static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g)
525{ 439{
526 int ret = 0; 440 return g->ops.gr.clear_sm_errors(g);
527 u32 gpc_offset, tpc_offset, gpc, tpc;
528 struct gr_gk20a *gr = &g->gr;
529 u32 global_esr;
530 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
531 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
532
533 for (gpc = 0; gpc < gr->gpc_count; gpc++) {
534
535 gpc_offset = gpc_stride * gpc;
536
537 /* check if any tpc has an exception */
538 for (tpc = 0; tpc < gr->tpc_count; tpc++) {
539
540 tpc_offset = tpc_in_gpc_stride * tpc;
541
542 global_esr = gk20a_readl(g,
543 gr_gpc0_tpc0_sm_hww_global_esr_r() +
544 gpc_offset + tpc_offset);
545
546 /* clear the hwws, also causes tpc and gpc
547 * exceptions to be cleared */
548 gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr);
549 }
550 }
551
552 return ret;
553} 441}
554 442
555static int nvgpu_gpu_ioctl_has_any_exception( 443static int nvgpu_gpu_ioctl_has_any_exception(
556 struct gk20a *g, 444 struct gk20a *g,
557 struct nvgpu_gpu_tpc_exception_en_status_args *args) 445 struct nvgpu_gpu_tpc_exception_en_status_args *args)
558{ 446{
559 int err = 0; 447 u32 tpc_exception_en;
560 struct gr_gk20a *gr = &g->gr;
561 u32 sm_id, tpc_exception_en = 0;
562 u32 offset, regval, tpc_offset, gpc_offset;
563 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
564 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
565 448
566 nvgpu_mutex_acquire(&g->dbg_sessions_lock); 449 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
567 450 tpc_exception_en = g->ops.gr.tpc_enabled_exceptions(g);
568 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
569
570 tpc_offset = tpc_in_gpc_stride * g->gr.sm_to_cluster[sm_id].tpc_index;
571 gpc_offset = gpc_stride * g->gr.sm_to_cluster[sm_id].gpc_index;
572 offset = tpc_offset + gpc_offset;
573
574 regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() +
575 offset);
576 /* Each bit represents corresponding enablement state, bit 0 corrsponds to SM0 */
577 tpc_exception_en |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id;
578 }
579
580 nvgpu_mutex_release(&g->dbg_sessions_lock); 451 nvgpu_mutex_release(&g->dbg_sessions_lock);
452
581 args->tpc_exception_en_sm_mask = tpc_exception_en; 453 args->tpc_exception_en_sm_mask = tpc_exception_en;
582 return err; 454
455 return 0;
583} 456}
584 457
585static int gk20a_ctrl_get_num_vsms(struct gk20a *g, 458static int gk20a_ctrl_get_num_vsms(struct gk20a *g,
@@ -648,8 +521,6 @@ static inline int get_timestamps_zipper(struct gk20a *g,
648{ 521{
649 int err = 0; 522 int err = 0;
650 unsigned int i = 0; 523 unsigned int i = 0;
651 u32 gpu_timestamp_hi_new = 0;
652 u32 gpu_timestamp_hi_old = 0;
653 524
654 if (gk20a_busy(g)) { 525 if (gk20a_busy(g)) {
655 gk20a_err(dev_from_gk20a(g), "GPU not powered on\n"); 526 gk20a_err(dev_from_gk20a(g), "GPU not powered on\n");
@@ -657,25 +528,12 @@ static inline int get_timestamps_zipper(struct gk20a *g,
657 goto end; 528 goto end;
658 } 529 }
659 530
660 /* get zipper reads of gpu and cpu counter values */
661 gpu_timestamp_hi_old = gk20a_readl(g, timer_time_1_r());
662 for (i = 0; i < args->count; i++) { 531 for (i = 0; i < args->count; i++) {
663 u32 gpu_timestamp_lo = 0; 532 err = g->ops.bus.read_ptimer(g, &args->samples[i].gpu_timestamp);
664 u32 gpu_timestamp_hi = 0; 533 if (err)
534 return err;
665 535
666 gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r());
667 args->samples[i].cpu_timestamp = get_cpu_timestamp(); 536 args->samples[i].cpu_timestamp = get_cpu_timestamp();
668 rmb(); /* maintain zipper read order */
669 gpu_timestamp_hi_new = gk20a_readl(g, timer_time_1_r());
670
671 /* pick the appropriate gpu counter hi bits */
672 gpu_timestamp_hi = (gpu_timestamp_lo & (1L << 31)) ?
673 gpu_timestamp_hi_old : gpu_timestamp_hi_new;
674
675 args->samples[i].gpu_timestamp =
676 ((u64)gpu_timestamp_hi << 32) | (u64)gpu_timestamp_lo;
677
678 gpu_timestamp_hi_old = gpu_timestamp_hi_new;
679 } 537 }
680 538
681end: 539end: