diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2014-05-22 02:53:51 -0400 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-03-18 15:09:56 -0400 |
commit | 48f0b407f967d73a2301a215bb5c381be3876a20 (patch) | |
tree | 05c72e3a3681f3e84daf5f197661721d71d44289 | |
parent | d78dca61e0eb92e69e1fa5650c0e946a21a930d2 (diff) |
gpu: nvgpu: Add gm20b fecs/gpccs bootloader support
Add support for booting FECS and GPCCS via faster bootloader method.
We leave this disabled until the bootloader binaries are checked in.
Change-Id: I39df5d116f7a33486407518c743638b01923970d
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/413005
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 82 |
3 files changed, 90 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index e6630ebf..115cd7f4 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -27,6 +27,7 @@ struct fifo_gk20a; | |||
27 | struct channel_gk20a; | 27 | struct channel_gk20a; |
28 | struct gr_gk20a; | 28 | struct gr_gk20a; |
29 | struct sim_gk20a; | 29 | struct sim_gk20a; |
30 | struct gk20a_ctxsw_ucode_segments; | ||
30 | 31 | ||
31 | #include <linux/sched.h> | 32 | #include <linux/sched.h> |
32 | #include <linux/spinlock.h> | 33 | #include <linux/spinlock.h> |
@@ -121,6 +122,10 @@ struct gpu_ops { | |||
121 | void (*set_hww_esr_report_mask)(struct gk20a *g); | 122 | void (*set_hww_esr_report_mask)(struct gk20a *g); |
122 | int (*setup_alpha_beta_tables)(struct gk20a *g, | 123 | int (*setup_alpha_beta_tables)(struct gk20a *g, |
123 | struct gr_gk20a *gr); | 124 | struct gr_gk20a *gr); |
125 | int (*falcon_load_ucode)(struct gk20a *g, | ||
126 | u64 addr_base, | ||
127 | struct gk20a_ctxsw_ucode_segments *segments, | ||
128 | u32 reg_offset); | ||
124 | } gr; | 129 | } gr; |
125 | const char *name; | 130 | const char *name; |
126 | struct { | 131 | struct { |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 46a84fd6..50ca0601 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -2141,10 +2141,10 @@ static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g) | |||
2141 | 2141 | ||
2142 | gr_gk20a_load_falcon_bind_instblk(g); | 2142 | gr_gk20a_load_falcon_bind_instblk(g); |
2143 | 2143 | ||
2144 | gr_gk20a_load_ctxsw_ucode_segments(g, addr_base, | 2144 | g->ops.gr.falcon_load_ucode(g, addr_base, |
2145 | &g->ctxsw_ucode_info.fecs, 0); | 2145 | &g->ctxsw_ucode_info.fecs, 0); |
2146 | 2146 | ||
2147 | gr_gk20a_load_ctxsw_ucode_segments(g, addr_base, | 2147 | g->ops.gr.falcon_load_ucode(g, addr_base, |
2148 | &g->ctxsw_ucode_info.gpccs, | 2148 | &g->ctxsw_ucode_info.gpccs, |
2149 | gr_gpcs_gpccs_falcon_hwcfg_r() - | 2149 | gr_gpcs_gpccs_falcon_hwcfg_r() - |
2150 | gr_fecs_falcon_hwcfg_r()); | 2150 | gr_fecs_falcon_hwcfg_r()); |
@@ -6845,4 +6845,5 @@ void gk20a_init_gr(struct gpu_ops *gops) | |||
6845 | gops->gr.init_fs_state = gr_gk20a_ctx_state_floorsweep; | 6845 | gops->gr.init_fs_state = gr_gk20a_ctx_state_floorsweep; |
6846 | gops->gr.set_hww_esr_report_mask = gr_gk20a_set_hww_esr_report_mask; | 6846 | gops->gr.set_hww_esr_report_mask = gr_gk20a_set_hww_esr_report_mask; |
6847 | gops->gr.setup_alpha_beta_tables = gr_gk20a_setup_alpha_beta_tables; | 6847 | gops->gr.setup_alpha_beta_tables = gr_gk20a_setup_alpha_beta_tables; |
6848 | gops->gr.falcon_load_ucode = gr_gk20a_load_ctxsw_ucode_segments; | ||
6848 | } | 6849 | } |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 54184766..c5de8f60 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -570,6 +570,87 @@ static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) | |||
570 | return 0; | 570 | return 0; |
571 | } | 571 | } |
572 | 572 | ||
573 | static int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, | ||
574 | struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) | ||
575 | { | ||
576 | u32 addr_code32; | ||
577 | u32 addr_data32; | ||
578 | u32 addr_load32; | ||
579 | u32 dst = 0; | ||
580 | u32 blocks; | ||
581 | u32 b; | ||
582 | |||
583 | addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8); | ||
584 | addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8); | ||
585 | addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8); | ||
586 | |||
587 | gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(), | ||
588 | gr_fecs_dmactl_require_ctx_f(0)); | ||
589 | |||
590 | /* | ||
591 | * Copy falcon bootloader header into dmem at offset 0. | ||
592 | * Configure dmem port 0 for auto-incrementing writes starting at dmem | ||
593 | * offset 0. | ||
594 | */ | ||
595 | gk20a_writel(g, reg_offset + gr_fecs_dmemc_r(0), | ||
596 | gr_fecs_dmemc_offs_f(0) | | ||
597 | gr_fecs_dmemc_blk_f(0) | | ||
598 | gr_fecs_dmemc_aincw_f(1)); | ||
599 | |||
600 | /* Write out the actual data */ | ||
601 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
602 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
603 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
604 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
605 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 4); | ||
606 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_code32); | ||
607 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
608 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->code.size); | ||
609 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
610 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
611 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
612 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_data32); | ||
613 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->data.size); | ||
614 | |||
615 | blocks = ((segments->boot.size + 0xFF) & ~0xFF) >> 8; | ||
616 | |||
617 | /* | ||
618 | * Set the base FB address for the DMA transfer. Subtract off the 256 | ||
619 | * byte IMEM block offset such that the relative FB and IMEM offsets | ||
620 | * match, allowing the IMEM tags to be properly created. | ||
621 | */ | ||
622 | |||
623 | dst = segments->boot_imem_offset; | ||
624 | gk20a_writel(g, reg_offset + gr_fecs_dmatrfbase_r(), | ||
625 | (addr_load32 - (dst >> 8))); | ||
626 | |||
627 | for (b = 0; b < blocks; b++) { | ||
628 | /* Setup destination IMEM offset */ | ||
629 | gk20a_writel(g, reg_offset + gr_fecs_dmatrfmoffs_r(), | ||
630 | dst + (b << 8)); | ||
631 | |||
632 | /* Setup source offset (relative to BASE) */ | ||
633 | gk20a_writel(g, reg_offset + gr_fecs_dmatrffboffs_r(), | ||
634 | dst + (b << 8)); | ||
635 | |||
636 | gk20a_writel(g, reg_offset + gr_fecs_dmatrfcmd_r(), | ||
637 | gr_fecs_dmatrfcmd_imem_f(0x01) | | ||
638 | gr_fecs_dmatrfcmd_write_f(0x00) | | ||
639 | gr_fecs_dmatrfcmd_size_f(0x06) | | ||
640 | gr_fecs_dmatrfcmd_ctxdma_f(0)); | ||
641 | } | ||
642 | |||
643 | /* Specify the falcon boot vector */ | ||
644 | gk20a_writel(g, reg_offset + gr_fecs_bootvec_r(), | ||
645 | gr_fecs_bootvec_vec_f(segments->boot_entry)); | ||
646 | |||
647 | /* Write to CPUCTL to start the falcon */ | ||
648 | gk20a_writel(g, reg_offset + gr_fecs_cpuctl_r(), | ||
649 | gr_fecs_cpuctl_startcpu_f(0x01)); | ||
650 | |||
651 | return 0; | ||
652 | } | ||
653 | |||
573 | void gm20b_init_gr(struct gpu_ops *gops) | 654 | void gm20b_init_gr(struct gpu_ops *gops) |
574 | { | 655 | { |
575 | gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; | 656 | gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; |
@@ -590,4 +671,5 @@ void gm20b_init_gr(struct gpu_ops *gops) | |||
590 | gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs; | 671 | gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs; |
591 | gops->gr.init_fs_state = gr_gm20b_ctx_state_floorsweep; | 672 | gops->gr.init_fs_state = gr_gm20b_ctx_state_floorsweep; |
592 | gops->gr.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask; | 673 | gops->gr.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask; |
674 | gops->gr.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments; | ||
593 | } | 675 | } |