summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h5
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c5
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c82
3 files changed, 90 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index e6630ebf..115cd7f4 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -27,6 +27,7 @@ struct fifo_gk20a;
27struct channel_gk20a; 27struct channel_gk20a;
28struct gr_gk20a; 28struct gr_gk20a;
29struct sim_gk20a; 29struct sim_gk20a;
30struct gk20a_ctxsw_ucode_segments;
30 31
31#include <linux/sched.h> 32#include <linux/sched.h>
32#include <linux/spinlock.h> 33#include <linux/spinlock.h>
@@ -121,6 +122,10 @@ struct gpu_ops {
121 void (*set_hww_esr_report_mask)(struct gk20a *g); 122 void (*set_hww_esr_report_mask)(struct gk20a *g);
122 int (*setup_alpha_beta_tables)(struct gk20a *g, 123 int (*setup_alpha_beta_tables)(struct gk20a *g,
123 struct gr_gk20a *gr); 124 struct gr_gk20a *gr);
125 int (*falcon_load_ucode)(struct gk20a *g,
126 u64 addr_base,
127 struct gk20a_ctxsw_ucode_segments *segments,
128 u32 reg_offset);
124 } gr; 129 } gr;
125 const char *name; 130 const char *name;
126 struct { 131 struct {
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 46a84fd6..50ca0601 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -2141,10 +2141,10 @@ static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g)
2141 2141
2142 gr_gk20a_load_falcon_bind_instblk(g); 2142 gr_gk20a_load_falcon_bind_instblk(g);
2143 2143
2144 gr_gk20a_load_ctxsw_ucode_segments(g, addr_base, 2144 g->ops.gr.falcon_load_ucode(g, addr_base,
2145 &g->ctxsw_ucode_info.fecs, 0); 2145 &g->ctxsw_ucode_info.fecs, 0);
2146 2146
2147 gr_gk20a_load_ctxsw_ucode_segments(g, addr_base, 2147 g->ops.gr.falcon_load_ucode(g, addr_base,
2148 &g->ctxsw_ucode_info.gpccs, 2148 &g->ctxsw_ucode_info.gpccs,
2149 gr_gpcs_gpccs_falcon_hwcfg_r() - 2149 gr_gpcs_gpccs_falcon_hwcfg_r() -
2150 gr_fecs_falcon_hwcfg_r()); 2150 gr_fecs_falcon_hwcfg_r());
@@ -6845,4 +6845,5 @@ void gk20a_init_gr(struct gpu_ops *gops)
6845 gops->gr.init_fs_state = gr_gk20a_ctx_state_floorsweep; 6845 gops->gr.init_fs_state = gr_gk20a_ctx_state_floorsweep;
6846 gops->gr.set_hww_esr_report_mask = gr_gk20a_set_hww_esr_report_mask; 6846 gops->gr.set_hww_esr_report_mask = gr_gk20a_set_hww_esr_report_mask;
6847 gops->gr.setup_alpha_beta_tables = gr_gk20a_setup_alpha_beta_tables; 6847 gops->gr.setup_alpha_beta_tables = gr_gk20a_setup_alpha_beta_tables;
6848 gops->gr.falcon_load_ucode = gr_gk20a_load_ctxsw_ucode_segments;
6848} 6849}
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 54184766..c5de8f60 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -570,6 +570,87 @@ static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g)
570 return 0; 570 return 0;
571} 571}
572 572
573static int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
574 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
575{
576 u32 addr_code32;
577 u32 addr_data32;
578 u32 addr_load32;
579 u32 dst = 0;
580 u32 blocks;
581 u32 b;
582
583 addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8);
584 addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8);
585 addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8);
586
587 gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(),
588 gr_fecs_dmactl_require_ctx_f(0));
589
590 /*
591 * Copy falcon bootloader header into dmem at offset 0.
592 * Configure dmem port 0 for auto-incrementing writes starting at dmem
593 * offset 0.
594 */
595 gk20a_writel(g, reg_offset + gr_fecs_dmemc_r(0),
596 gr_fecs_dmemc_offs_f(0) |
597 gr_fecs_dmemc_blk_f(0) |
598 gr_fecs_dmemc_aincw_f(1));
599
600 /* Write out the actual data */
601 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
602 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
603 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
604 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
605 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 4);
606 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_code32);
607 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
608 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->code.size);
609 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
610 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
611 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
612 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_data32);
613 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->data.size);
614
615 blocks = ((segments->boot.size + 0xFF) & ~0xFF) >> 8;
616
617 /*
618 * Set the base FB address for the DMA transfer. Subtract off the 256
619 * byte IMEM block offset such that the relative FB and IMEM offsets
620 * match, allowing the IMEM tags to be properly created.
621 */
622
623 dst = segments->boot_imem_offset;
624 gk20a_writel(g, reg_offset + gr_fecs_dmatrfbase_r(),
625 (addr_load32 - (dst >> 8)));
626
627 for (b = 0; b < blocks; b++) {
628 /* Setup destination IMEM offset */
629 gk20a_writel(g, reg_offset + gr_fecs_dmatrfmoffs_r(),
630 dst + (b << 8));
631
632 /* Setup source offset (relative to BASE) */
633 gk20a_writel(g, reg_offset + gr_fecs_dmatrffboffs_r(),
634 dst + (b << 8));
635
636 gk20a_writel(g, reg_offset + gr_fecs_dmatrfcmd_r(),
637 gr_fecs_dmatrfcmd_imem_f(0x01) |
638 gr_fecs_dmatrfcmd_write_f(0x00) |
639 gr_fecs_dmatrfcmd_size_f(0x06) |
640 gr_fecs_dmatrfcmd_ctxdma_f(0));
641 }
642
643 /* Specify the falcon boot vector */
644 gk20a_writel(g, reg_offset + gr_fecs_bootvec_r(),
645 gr_fecs_bootvec_vec_f(segments->boot_entry));
646
647 /* Write to CPUCTL to start the falcon */
648 gk20a_writel(g, reg_offset + gr_fecs_cpuctl_r(),
649 gr_fecs_cpuctl_startcpu_f(0x01));
650
651 return 0;
652}
653
573void gm20b_init_gr(struct gpu_ops *gops) 654void gm20b_init_gr(struct gpu_ops *gops)
574{ 655{
575 gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; 656 gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;
@@ -590,4 +671,5 @@ void gm20b_init_gr(struct gpu_ops *gops)
590 gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs; 671 gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs;
591 gops->gr.init_fs_state = gr_gm20b_ctx_state_floorsweep; 672 gops->gr.init_fs_state = gr_gm20b_ctx_state_floorsweep;
592 gops->gr.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask; 673 gops->gr.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask;
674 gops->gr.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments;
593} 675}