diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2017-03-23 14:03:15 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-03-29 12:15:59 -0400 |
commit | 4022b989aa2e91fe77ed52df49d45838f6d8b9bb (patch) | |
tree | 4c8240ac83887c21db902a255306c67041c4525c /drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | |
parent | f04031e5e8837abb2be3feb0ee30e1af54de7845 (diff) |
gpu: nvgpu: Remove direct HW access from ctrl_gk20a.c
ctrl_gk20a.c had some direct accesses to hardware. These violate the
HAL rules, because we don't have per-GPU ctrl, and thus the code
cannot be made GPU independent.
Move all GR accesses to new GR HALs and use existing bus HAL for
accessing timer. Remove #includes of all hardware headers.
JIRA NVGPU-28
Change-Id: I57e67519f62e9bd6c3e725e1bef6e366190f5834
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1327001
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | 178 |
1 files changed, 18 insertions, 160 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 97125a99..5661b402 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | |||
@@ -24,14 +24,7 @@ | |||
24 | #include <linux/delay.h> | 24 | #include <linux/delay.h> |
25 | 25 | ||
26 | #include "gk20a.h" | 26 | #include "gk20a.h" |
27 | #include "gr_gk20a.h" | ||
28 | #include "fence_gk20a.h" | 27 | #include "fence_gk20a.h" |
29 | #include "regops_gk20a.h" | ||
30 | |||
31 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> | ||
32 | #include <nvgpu/hw/gk20a/hw_fb_gk20a.h> | ||
33 | #include <nvgpu/hw/gk20a/hw_timer_gk20a.h> | ||
34 | |||
35 | 28 | ||
36 | #define HZ_TO_MHZ(a) ((a > 0xF414F9CD7) ? 0xffff : (a >> 32) ? \ | 29 | #define HZ_TO_MHZ(a) ((a > 0xF414F9CD7) ? 0xffff : (a >> 32) ? \ |
37 | (u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ)) | 30 | (u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ)) |
@@ -342,47 +335,16 @@ static int nvgpu_gpu_ioctl_inval_icache( | |||
342 | struct gk20a *g, | 335 | struct gk20a *g, |
343 | struct nvgpu_gpu_inval_icache_args *args) | 336 | struct nvgpu_gpu_inval_icache_args *args) |
344 | { | 337 | { |
345 | |||
346 | int err = 0; | ||
347 | u32 cache_ctrl, regval; | ||
348 | struct channel_gk20a *ch; | 338 | struct channel_gk20a *ch; |
349 | struct nvgpu_dbg_gpu_reg_op ops; | 339 | int err; |
350 | 340 | ||
351 | ch = gk20a_get_channel_from_file(args->channel_fd); | 341 | ch = gk20a_get_channel_from_file(args->channel_fd); |
352 | if (!ch) | 342 | if (!ch) |
353 | return -EINVAL; | 343 | return -EINVAL; |
354 | 344 | ||
355 | ops.op = REGOP(READ_32); | ||
356 | ops.type = REGOP(TYPE_GR_CTX); | ||
357 | ops.status = REGOP(STATUS_SUCCESS); | ||
358 | ops.value_hi = 0; | ||
359 | ops.and_n_mask_lo = 0; | ||
360 | ops.and_n_mask_hi = 0; | ||
361 | ops.offset = gr_pri_gpc0_gcc_dbg_r(); | ||
362 | |||
363 | /* Take the global lock, since we'll be doing global regops */ | 345 | /* Take the global lock, since we'll be doing global regops */ |
364 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | 346 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); |
365 | 347 | err = g->ops.gr.inval_icache(g, ch); | |
366 | err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1); | ||
367 | |||
368 | regval = ops.value_lo; | ||
369 | |||
370 | if (!err) { | ||
371 | ops.op = REGOP(WRITE_32); | ||
372 | ops.value_lo = set_field(regval, gr_pri_gpcs_gcc_dbg_invalidate_m(), 1); | ||
373 | err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0); | ||
374 | } | ||
375 | |||
376 | if (err) { | ||
377 | gk20a_err(dev_from_gk20a(g), "Failed to access register\n"); | ||
378 | goto end; | ||
379 | } | ||
380 | |||
381 | cache_ctrl = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_cache_control_r()); | ||
382 | cache_ctrl = set_field(cache_ctrl, gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(), 1); | ||
383 | gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cache_control_r(), cache_ctrl); | ||
384 | |||
385 | end: | ||
386 | nvgpu_mutex_release(&g->dbg_sessions_lock); | 348 | nvgpu_mutex_release(&g->dbg_sessions_lock); |
387 | return err; | 349 | return err; |
388 | } | 350 | } |
@@ -428,20 +390,10 @@ static int nvgpu_gpu_ioctl_set_debug_mode( | |||
428 | 390 | ||
429 | static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g) | 391 | static int nvgpu_gpu_ioctl_trigger_suspend(struct gk20a *g) |
430 | { | 392 | { |
431 | int err = 0; | 393 | int err; |
432 | u32 dbgr_control0; | ||
433 | 394 | ||
434 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | 395 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); |
435 | /* assert stop trigger. uniformity assumption: all SMs will have | 396 | err = g->ops.gr.trigger_suspend(g); |
436 | * the same state in dbg_control0. */ | ||
437 | dbgr_control0 = | ||
438 | gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_control0_r()); | ||
439 | dbgr_control0 |= gr_gpcs_tpcs_sm_dbgr_control0_stop_trigger_enable_f(); | ||
440 | |||
441 | /* broadcast write */ | ||
442 | gk20a_writel(g, | ||
443 | gr_gpcs_tpcs_sm_dbgr_control0_r(), dbgr_control0); | ||
444 | |||
445 | nvgpu_mutex_release(&g->dbg_sessions_lock); | 397 | nvgpu_mutex_release(&g->dbg_sessions_lock); |
446 | return err; | 398 | return err; |
447 | } | 399 | } |
@@ -451,41 +403,16 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g, | |||
451 | { | 403 | { |
452 | int err = 0; | 404 | int err = 0; |
453 | struct warpstate *w_state; | 405 | struct warpstate *w_state; |
454 | struct gr_gk20a *gr = &g->gr; | 406 | u32 sm_count, size; |
455 | u32 gpc, tpc, sm_count, sm_id, size; | ||
456 | u32 global_mask; | ||
457 | 407 | ||
458 | sm_count = g->gr.gpc_count * g->gr.tpc_count; | 408 | sm_count = g->gr.gpc_count * g->gr.tpc_count; |
459 | size = sm_count * sizeof(struct warpstate); | 409 | size = sm_count * sizeof(struct warpstate); |
460 | w_state = kzalloc(size, GFP_KERNEL); | 410 | w_state = kzalloc(size, GFP_KERNEL); |
461 | 411 | if (!w_state) | |
462 | /* Wait for the SMs to reach full stop. This condition is: | 412 | return -ENOMEM; |
463 | * 1) All SMs with valid warps must be in the trap handler (SM_IN_TRAP_MODE) | ||
464 | * 2) All SMs in the trap handler must have equivalent VALID and PAUSED warp | ||
465 | * masks. | ||
466 | */ | ||
467 | global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() | | ||
468 | gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() | | ||
469 | gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f(); | ||
470 | 413 | ||
471 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | 414 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); |
472 | 415 | g->ops.gr.wait_for_pause(g, w_state); | |
473 | /* Lock down all SMs */ | ||
474 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
475 | |||
476 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
477 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
478 | |||
479 | err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask, false); | ||
480 | |||
481 | if (err) { | ||
482 | gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n"); | ||
483 | goto end; | ||
484 | } | ||
485 | } | ||
486 | |||
487 | /* Read the warp status */ | ||
488 | g->ops.gr.bpt_reg_info(g, w_state); | ||
489 | 416 | ||
490 | /* Copy to user space - pointed by "args->pwarpstate" */ | 417 | /* Copy to user space - pointed by "args->pwarpstate" */ |
491 | if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) { | 418 | if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) { |
@@ -493,7 +420,6 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g, | |||
493 | err = -EFAULT; | 420 | err = -EFAULT; |
494 | } | 421 | } |
495 | 422 | ||
496 | end: | ||
497 | nvgpu_mutex_release(&g->dbg_sessions_lock); | 423 | nvgpu_mutex_release(&g->dbg_sessions_lock); |
498 | kfree(w_state); | 424 | kfree(w_state); |
499 | return err; | 425 | return err; |
@@ -504,82 +430,29 @@ static int nvgpu_gpu_ioctl_resume_from_pause(struct gk20a *g) | |||
504 | int err = 0; | 430 | int err = 0; |
505 | 431 | ||
506 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | 432 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); |
507 | 433 | err = g->ops.gr.resume_from_pause(g); | |
508 | /* Clear the pause mask to tell the GPU we want to resume everyone */ | ||
509 | gk20a_writel(g, | ||
510 | gr_gpcs_tpcs_sm_dbgr_bpt_pause_mask_r(), 0); | ||
511 | |||
512 | /* explicitly re-enable forwarding of SM interrupts upon any resume */ | ||
513 | gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(), | ||
514 | gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f()); | ||
515 | |||
516 | /* Now resume all sms, write a 0 to the stop trigger | ||
517 | * then a 1 to the run trigger */ | ||
518 | gk20a_resume_all_sms(g); | ||
519 | |||
520 | nvgpu_mutex_release(&g->dbg_sessions_lock); | 434 | nvgpu_mutex_release(&g->dbg_sessions_lock); |
521 | return err; | 435 | return err; |
522 | } | 436 | } |
523 | 437 | ||
524 | static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g) | 438 | static int nvgpu_gpu_ioctl_clear_sm_errors(struct gk20a *g) |
525 | { | 439 | { |
526 | int ret = 0; | 440 | return g->ops.gr.clear_sm_errors(g); |
527 | u32 gpc_offset, tpc_offset, gpc, tpc; | ||
528 | struct gr_gk20a *gr = &g->gr; | ||
529 | u32 global_esr; | ||
530 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
531 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
532 | |||
533 | for (gpc = 0; gpc < gr->gpc_count; gpc++) { | ||
534 | |||
535 | gpc_offset = gpc_stride * gpc; | ||
536 | |||
537 | /* check if any tpc has an exception */ | ||
538 | for (tpc = 0; tpc < gr->tpc_count; tpc++) { | ||
539 | |||
540 | tpc_offset = tpc_in_gpc_stride * tpc; | ||
541 | |||
542 | global_esr = gk20a_readl(g, | ||
543 | gr_gpc0_tpc0_sm_hww_global_esr_r() + | ||
544 | gpc_offset + tpc_offset); | ||
545 | |||
546 | /* clear the hwws, also causes tpc and gpc | ||
547 | * exceptions to be cleared */ | ||
548 | gk20a_gr_clear_sm_hww(g, gpc, tpc, global_esr); | ||
549 | } | ||
550 | } | ||
551 | |||
552 | return ret; | ||
553 | } | 441 | } |
554 | 442 | ||
555 | static int nvgpu_gpu_ioctl_has_any_exception( | 443 | static int nvgpu_gpu_ioctl_has_any_exception( |
556 | struct gk20a *g, | 444 | struct gk20a *g, |
557 | struct nvgpu_gpu_tpc_exception_en_status_args *args) | 445 | struct nvgpu_gpu_tpc_exception_en_status_args *args) |
558 | { | 446 | { |
559 | int err = 0; | 447 | u32 tpc_exception_en; |
560 | struct gr_gk20a *gr = &g->gr; | ||
561 | u32 sm_id, tpc_exception_en = 0; | ||
562 | u32 offset, regval, tpc_offset, gpc_offset; | ||
563 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
564 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
565 | 448 | ||
566 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | 449 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); |
567 | 450 | tpc_exception_en = g->ops.gr.tpc_enabled_exceptions(g); | |
568 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
569 | |||
570 | tpc_offset = tpc_in_gpc_stride * g->gr.sm_to_cluster[sm_id].tpc_index; | ||
571 | gpc_offset = gpc_stride * g->gr.sm_to_cluster[sm_id].gpc_index; | ||
572 | offset = tpc_offset + gpc_offset; | ||
573 | |||
574 | regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() + | ||
575 | offset); | ||
576 | /* Each bit represents corresponding enablement state, bit 0 corrsponds to SM0 */ | ||
577 | tpc_exception_en |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id; | ||
578 | } | ||
579 | |||
580 | nvgpu_mutex_release(&g->dbg_sessions_lock); | 451 | nvgpu_mutex_release(&g->dbg_sessions_lock); |
452 | |||
581 | args->tpc_exception_en_sm_mask = tpc_exception_en; | 453 | args->tpc_exception_en_sm_mask = tpc_exception_en; |
582 | return err; | 454 | |
455 | return 0; | ||
583 | } | 456 | } |
584 | 457 | ||
585 | static int gk20a_ctrl_get_num_vsms(struct gk20a *g, | 458 | static int gk20a_ctrl_get_num_vsms(struct gk20a *g, |
@@ -648,8 +521,6 @@ static inline int get_timestamps_zipper(struct gk20a *g, | |||
648 | { | 521 | { |
649 | int err = 0; | 522 | int err = 0; |
650 | unsigned int i = 0; | 523 | unsigned int i = 0; |
651 | u32 gpu_timestamp_hi_new = 0; | ||
652 | u32 gpu_timestamp_hi_old = 0; | ||
653 | 524 | ||
654 | if (gk20a_busy(g)) { | 525 | if (gk20a_busy(g)) { |
655 | gk20a_err(dev_from_gk20a(g), "GPU not powered on\n"); | 526 | gk20a_err(dev_from_gk20a(g), "GPU not powered on\n"); |
@@ -657,25 +528,12 @@ static inline int get_timestamps_zipper(struct gk20a *g, | |||
657 | goto end; | 528 | goto end; |
658 | } | 529 | } |
659 | 530 | ||
660 | /* get zipper reads of gpu and cpu counter values */ | ||
661 | gpu_timestamp_hi_old = gk20a_readl(g, timer_time_1_r()); | ||
662 | for (i = 0; i < args->count; i++) { | 531 | for (i = 0; i < args->count; i++) { |
663 | u32 gpu_timestamp_lo = 0; | 532 | err = g->ops.bus.read_ptimer(g, &args->samples[i].gpu_timestamp); |
664 | u32 gpu_timestamp_hi = 0; | 533 | if (err) |
534 | return err; | ||
665 | 535 | ||
666 | gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r()); | ||
667 | args->samples[i].cpu_timestamp = get_cpu_timestamp(); | 536 | args->samples[i].cpu_timestamp = get_cpu_timestamp(); |
668 | rmb(); /* maintain zipper read order */ | ||
669 | gpu_timestamp_hi_new = gk20a_readl(g, timer_time_1_r()); | ||
670 | |||
671 | /* pick the appropriate gpu counter hi bits */ | ||
672 | gpu_timestamp_hi = (gpu_timestamp_lo & (1L << 31)) ? | ||
673 | gpu_timestamp_hi_old : gpu_timestamp_hi_new; | ||
674 | |||
675 | args->samples[i].gpu_timestamp = | ||
676 | ((u64)gpu_timestamp_hi << 32) | (u64)gpu_timestamp_lo; | ||
677 | |||
678 | gpu_timestamp_hi_old = gpu_timestamp_hi_new; | ||
679 | } | 537 | } |
680 | 538 | ||
681 | end: | 539 | end: |