summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2014-10-14 04:48:40 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:11:49 -0400
commit3f3844a11ccac7957fdb7139a1c9c2a767d315a5 (patch)
tree728a145ec168983d833dcb1de04d5dab84bb43e7
parenta870ff1d294126a3b46db4e0fdc14276035a2840 (diff)
gpu: nvgpu: select ucode boot init by signature
Compute a signature checksum for ctxsw ucode boot section and determine the format of boot initialization data by it. This unifies gk20a and gk20b ucode segment loading a lot by separating the bootloader loading logic to separate functions. Note: Whenever the boot segment binary changes, its updated signature must be added here. Management of different bootloaders must be supported for repo-crossing staging issues. Bug 1519397 Change-Id: I96f9b905d3631dfdebf71ea3a652a0968615fd0a Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/556679 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c105
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h22
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c70
3 files changed, 111 insertions, 86 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 84b79b42..30ea49a7 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -1776,9 +1776,17 @@ static int gr_gk20a_copy_ctxsw_ucode_segments(
1776 u32 *bootimage, 1776 u32 *bootimage,
1777 u32 *code, u32 *data) 1777 u32 *code, u32 *data)
1778{ 1778{
1779 int i;
1780
1779 memcpy(buf + segments->boot.offset, bootimage, segments->boot.size); 1781 memcpy(buf + segments->boot.offset, bootimage, segments->boot.size);
1780 memcpy(buf + segments->code.offset, code, segments->code.size); 1782 memcpy(buf + segments->code.offset, code, segments->code.size);
1781 memcpy(buf + segments->data.offset, data, segments->data.size); 1783 memcpy(buf + segments->data.offset, data, segments->data.size);
1784
1785 /* compute a "checksum" for the boot binary to detect its version */
1786 segments->boot_signature = 0;
1787 for (i = 0; i < segments->boot.size / sizeof(u32); i++)
1788 segments->boot_signature += bootimage[i];
1789
1782 return 0; 1790 return 0;
1783} 1791}
1784 1792
@@ -1968,22 +1976,14 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g)
1968 gk20a_err(dev_from_gk20a(g), "arbiter complete timeout"); 1976 gk20a_err(dev_from_gk20a(g), "arbiter complete timeout");
1969} 1977}
1970 1978
1971static int gr_gk20a_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, 1979void gr_gk20a_load_ctxsw_ucode_header(struct gk20a *g, u64 addr_base,
1972 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) 1980 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
1973{ 1981{
1974 u32 addr_code32; 1982 u32 addr_code32;
1975 u32 addr_data32; 1983 u32 addr_data32;
1976 u32 addr_load32;
1977 u32 dst = 0;
1978 u32 blocks;
1979 u32 b;
1980 1984
1981 addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8); 1985 addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8);
1982 addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8); 1986 addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8);
1983 addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8);
1984
1985 gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(),
1986 gr_fecs_dmactl_require_ctx_f(0));
1987 1987
1988 /* 1988 /*
1989 * Copy falcon bootloader header into dmem at offset 0. 1989 * Copy falcon bootloader header into dmem at offset 0.
@@ -1996,17 +1996,73 @@ static int gr_gk20a_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
1996 gr_fecs_dmemc_aincw_f(1)); 1996 gr_fecs_dmemc_aincw_f(1));
1997 1997
1998 /* Write out the actual data */ 1998 /* Write out the actual data */
1999 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); 1999 switch (segments->boot_signature) {
2000 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_code32); 2000 case FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED:
2001 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); 2001 case FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED:
2002 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->code.size); 2002 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2003 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); 2003 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2004 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_data32); 2004 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2005 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->data.size); 2005 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2006 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_code32); 2006 /* fallthrough */
2007 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); 2007 case FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED:
2008 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); 2008 case FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED:
2009 case FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED:
2010 case FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2:
2011 case FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED:
2012 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2013 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2014 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2015 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2016 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 4);
2017 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2018 addr_code32);
2019 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2020 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2021 segments->code.size);
2022 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2023 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2024 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2025 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2026 addr_data32);
2027 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2028 segments->data.size);
2029 break;
2030 case FALCON_UCODE_SIG_T12X_FECS_OLDER:
2031 case FALCON_UCODE_SIG_T12X_GPCCS_OLDER:
2032 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2033 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2034 addr_code32);
2035 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2036 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2037 segments->code.size);
2038 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2039 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2040 addr_data32);
2041 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2042 segments->data.size);
2043 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0),
2044 addr_code32);
2045 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2046 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
2047 break;
2048 default:
2049 gk20a_err(dev_from_gk20a(g),
2050 "unknown falcon ucode boot signature 0x%08x"
2051 " with reg_offset 0x%08x",
2052 segments->boot_signature, reg_offset);
2053 BUG();
2054 }
2055}
2056
2057void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base,
2058 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
2059{
2060 u32 addr_load32;
2061 u32 blocks;
2062 u32 b;
2063 u32 dst;
2009 2064
2065 addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8);
2010 blocks = ((segments->boot.size + 0xFF) & ~0xFF) >> 8; 2066 blocks = ((segments->boot.size + 0xFF) & ~0xFF) >> 8;
2011 2067
2012 /* 2068 /*
@@ -2038,6 +2094,17 @@ static int gr_gk20a_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
2038 /* Specify the falcon boot vector */ 2094 /* Specify the falcon boot vector */
2039 gk20a_writel(g, reg_offset + gr_fecs_bootvec_r(), 2095 gk20a_writel(g, reg_offset + gr_fecs_bootvec_r(),
2040 gr_fecs_bootvec_vec_f(segments->boot_entry)); 2096 gr_fecs_bootvec_vec_f(segments->boot_entry));
2097}
2098
2099int gr_gk20a_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
2100 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
2101{
2102 gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(),
2103 gr_fecs_dmactl_require_ctx_f(0));
2104
2105 /* Copy falcon bootloader into dmem */
2106 gr_gk20a_load_ctxsw_ucode_header(g, addr_base, segments, reg_offset);
2107 gr_gk20a_load_ctxsw_ucode_boot(g, addr_base, segments, reg_offset);
2041 2108
2042 /* Write to CPUCTL to start the falcon */ 2109 /* Write to CPUCTL to start the falcon */
2043 gk20a_writel(g, reg_offset + gr_fecs_cpuctl_r(), 2110 gk20a_writel(g, reg_offset + gr_fecs_cpuctl_r(),
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index e09cfbfa..7db6bccf 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -295,11 +295,28 @@ struct gk20a_ctxsw_ucode_segment {
295struct gk20a_ctxsw_ucode_segments { 295struct gk20a_ctxsw_ucode_segments {
296 u32 boot_entry; 296 u32 boot_entry;
297 u32 boot_imem_offset; 297 u32 boot_imem_offset;
298 u32 boot_signature;
298 struct gk20a_ctxsw_ucode_segment boot; 299 struct gk20a_ctxsw_ucode_segment boot;
299 struct gk20a_ctxsw_ucode_segment code; 300 struct gk20a_ctxsw_ucode_segment code;
300 struct gk20a_ctxsw_ucode_segment data; 301 struct gk20a_ctxsw_ucode_segment data;
301}; 302};
302 303
304/* sums over the ucode files as sequences of u32, computed to the
305 * boot_signature field in the structure above */
306
307#define FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED 0x8a621f78
308#define FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED 0x67e5344b
309#define FALCON_UCODE_SIG_T12X_FECS_OLDER 0x56da09f
310
311#define FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED 0x303465d5
312#define FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED 0x3fdd33d3
313#define FALCON_UCODE_SIG_T12X_GPCCS_OLDER 0x53d7877
314
315#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED 0x93671b7d
316#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2 0x4d6cbc10
317
318#define FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED 0x393161da
319
303struct gk20a_ctxsw_ucode_info { 320struct gk20a_ctxsw_ucode_info {
304 u64 *p_va; 321 u64 *p_va;
305 struct inst_desc inst_blk_desc; 322 struct inst_desc inst_blk_desc;
@@ -422,6 +439,11 @@ int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr);
422int gr_gk20a_init_ctxsw_ucode(struct gk20a *g); 439int gr_gk20a_init_ctxsw_ucode(struct gk20a *g);
423int gr_gk20a_load_ctxsw_ucode(struct gk20a *g); 440int gr_gk20a_load_ctxsw_ucode(struct gk20a *g);
424void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g); 441void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g);
442void gr_gk20a_load_ctxsw_ucode_header(struct gk20a *g, u64 addr_base,
443 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
444void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base,
445 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);
446
425 447
426void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *c); 448void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *c);
427#endif /* GR_GK20A_H */ 449#endif /* GR_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 1256d4a6..660ffa88 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -595,76 +595,12 @@ static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g)
595static int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, 595static int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base,
596 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) 596 struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
597{ 597{
598 u32 addr_code32;
599 u32 addr_data32;
600 u32 addr_load32;
601 u32 dst = 0;
602 u32 blocks;
603 u32 b;
604
605 addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8);
606 addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8);
607 addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8);
608
609 gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(), 598 gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(),
610 gr_fecs_dmactl_require_ctx_f(0)); 599 gr_fecs_dmactl_require_ctx_f(0));
611 600
612 /* 601 /* Copy falcon bootloader into dmem */
613 * Copy falcon bootloader header into dmem at offset 0. 602 gr_gk20a_load_ctxsw_ucode_header(g, addr_base, segments, reg_offset);
614 * Configure dmem port 0 for auto-incrementing writes starting at dmem 603 gr_gk20a_load_ctxsw_ucode_boot(g, addr_base, segments, reg_offset);
615 * offset 0.
616 */
617 gk20a_writel(g, reg_offset + gr_fecs_dmemc_r(0),
618 gr_fecs_dmemc_offs_f(0) |
619 gr_fecs_dmemc_blk_f(0) |
620 gr_fecs_dmemc_aincw_f(1));
621
622 /* Write out the actual data */
623 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
624 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
625 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
626 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
627 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 4);
628 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_code32);
629 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
630 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->code.size);
631 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
632 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
633 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);
634 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_data32);
635 gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->data.size);
636
637 blocks = ((segments->boot.size + 0xFF) & ~0xFF) >> 8;
638
639 /*
640 * Set the base FB address for the DMA transfer. Subtract off the 256
641 * byte IMEM block offset such that the relative FB and IMEM offsets
642 * match, allowing the IMEM tags to be properly created.
643 */
644
645 dst = segments->boot_imem_offset;
646 gk20a_writel(g, reg_offset + gr_fecs_dmatrfbase_r(),
647 (addr_load32 - (dst >> 8)));
648
649 for (b = 0; b < blocks; b++) {
650 /* Setup destination IMEM offset */
651 gk20a_writel(g, reg_offset + gr_fecs_dmatrfmoffs_r(),
652 dst + (b << 8));
653
654 /* Setup source offset (relative to BASE) */
655 gk20a_writel(g, reg_offset + gr_fecs_dmatrffboffs_r(),
656 dst + (b << 8));
657
658 gk20a_writel(g, reg_offset + gr_fecs_dmatrfcmd_r(),
659 gr_fecs_dmatrfcmd_imem_f(0x01) |
660 gr_fecs_dmatrfcmd_write_f(0x00) |
661 gr_fecs_dmatrfcmd_size_f(0x06) |
662 gr_fecs_dmatrfcmd_ctxdma_f(0));
663 }
664
665 /* Specify the falcon boot vector */
666 gk20a_writel(g, reg_offset + gr_fecs_bootvec_r(),
667 gr_fecs_bootvec_vec_f(segments->boot_entry));
668 604
669 /* start the falcon immediately if PRIV security is disabled*/ 605 /* start the falcon immediately if PRIV security is disabled*/
670 if (!g->ops.privsecurity) { 606 if (!g->ops.privsecurity) {