diff options
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 105 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 22 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 70 |
3 files changed, 111 insertions, 86 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 84b79b42..30ea49a7 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -1776,9 +1776,17 @@ static int gr_gk20a_copy_ctxsw_ucode_segments( | |||
1776 | u32 *bootimage, | 1776 | u32 *bootimage, |
1777 | u32 *code, u32 *data) | 1777 | u32 *code, u32 *data) |
1778 | { | 1778 | { |
1779 | int i; | ||
1780 | |||
1779 | memcpy(buf + segments->boot.offset, bootimage, segments->boot.size); | 1781 | memcpy(buf + segments->boot.offset, bootimage, segments->boot.size); |
1780 | memcpy(buf + segments->code.offset, code, segments->code.size); | 1782 | memcpy(buf + segments->code.offset, code, segments->code.size); |
1781 | memcpy(buf + segments->data.offset, data, segments->data.size); | 1783 | memcpy(buf + segments->data.offset, data, segments->data.size); |
1784 | |||
1785 | /* compute a "checksum" for the boot binary to detect its version */ | ||
1786 | segments->boot_signature = 0; | ||
1787 | for (i = 0; i < segments->boot.size / sizeof(u32); i++) | ||
1788 | segments->boot_signature += bootimage[i]; | ||
1789 | |||
1782 | return 0; | 1790 | return 0; |
1783 | } | 1791 | } |
1784 | 1792 | ||
@@ -1968,22 +1976,14 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g) | |||
1968 | gk20a_err(dev_from_gk20a(g), "arbiter complete timeout"); | 1976 | gk20a_err(dev_from_gk20a(g), "arbiter complete timeout"); |
1969 | } | 1977 | } |
1970 | 1978 | ||
1971 | static int gr_gk20a_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, | 1979 | void gr_gk20a_load_ctxsw_ucode_header(struct gk20a *g, u64 addr_base, |
1972 | struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) | 1980 | struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) |
1973 | { | 1981 | { |
1974 | u32 addr_code32; | 1982 | u32 addr_code32; |
1975 | u32 addr_data32; | 1983 | u32 addr_data32; |
1976 | u32 addr_load32; | ||
1977 | u32 dst = 0; | ||
1978 | u32 blocks; | ||
1979 | u32 b; | ||
1980 | 1984 | ||
1981 | addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8); | 1985 | addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8); |
1982 | addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8); | 1986 | addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8); |
1983 | addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8); | ||
1984 | |||
1985 | gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(), | ||
1986 | gr_fecs_dmactl_require_ctx_f(0)); | ||
1987 | 1987 | ||
1988 | /* | 1988 | /* |
1989 | * Copy falcon bootloader header into dmem at offset 0. | 1989 | * Copy falcon bootloader header into dmem at offset 0. |
@@ -1996,17 +1996,73 @@ static int gr_gk20a_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, | |||
1996 | gr_fecs_dmemc_aincw_f(1)); | 1996 | gr_fecs_dmemc_aincw_f(1)); |
1997 | 1997 | ||
1998 | /* Write out the actual data */ | 1998 | /* Write out the actual data */ |
1999 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | 1999 | switch (segments->boot_signature) { |
2000 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_code32); | 2000 | case FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED: |
2001 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | 2001 | case FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED: |
2002 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->code.size); | 2002 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); |
2003 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | 2003 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); |
2004 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_data32); | 2004 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); |
2005 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->data.size); | 2005 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); |
2006 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_code32); | 2006 | /* fallthrough */ |
2007 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | 2007 | case FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED: |
2008 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | 2008 | case FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED: |
2009 | case FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED: | ||
2010 | case FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2: | ||
2011 | case FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED: | ||
2012 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
2013 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
2014 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
2015 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
2016 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 4); | ||
2017 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), | ||
2018 | addr_code32); | ||
2019 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
2020 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), | ||
2021 | segments->code.size); | ||
2022 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
2023 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
2024 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
2025 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), | ||
2026 | addr_data32); | ||
2027 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), | ||
2028 | segments->data.size); | ||
2029 | break; | ||
2030 | case FALCON_UCODE_SIG_T12X_FECS_OLDER: | ||
2031 | case FALCON_UCODE_SIG_T12X_GPCCS_OLDER: | ||
2032 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
2033 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), | ||
2034 | addr_code32); | ||
2035 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
2036 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), | ||
2037 | segments->code.size); | ||
2038 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
2039 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), | ||
2040 | addr_data32); | ||
2041 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), | ||
2042 | segments->data.size); | ||
2043 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), | ||
2044 | addr_code32); | ||
2045 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
2046 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
2047 | break; | ||
2048 | default: | ||
2049 | gk20a_err(dev_from_gk20a(g), | ||
2050 | "unknown falcon ucode boot signature 0x%08x" | ||
2051 | " with reg_offset 0x%08x", | ||
2052 | segments->boot_signature, reg_offset); | ||
2053 | BUG(); | ||
2054 | } | ||
2055 | } | ||
2056 | |||
2057 | void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base, | ||
2058 | struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) | ||
2059 | { | ||
2060 | u32 addr_load32; | ||
2061 | u32 blocks; | ||
2062 | u32 b; | ||
2063 | u32 dst; | ||
2009 | 2064 | ||
2065 | addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8); | ||
2010 | blocks = ((segments->boot.size + 0xFF) & ~0xFF) >> 8; | 2066 | blocks = ((segments->boot.size + 0xFF) & ~0xFF) >> 8; |
2011 | 2067 | ||
2012 | /* | 2068 | /* |
@@ -2038,6 +2094,17 @@ static int gr_gk20a_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, | |||
2038 | /* Specify the falcon boot vector */ | 2094 | /* Specify the falcon boot vector */ |
2039 | gk20a_writel(g, reg_offset + gr_fecs_bootvec_r(), | 2095 | gk20a_writel(g, reg_offset + gr_fecs_bootvec_r(), |
2040 | gr_fecs_bootvec_vec_f(segments->boot_entry)); | 2096 | gr_fecs_bootvec_vec_f(segments->boot_entry)); |
2097 | } | ||
2098 | |||
2099 | int gr_gk20a_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, | ||
2100 | struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) | ||
2101 | { | ||
2102 | gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(), | ||
2103 | gr_fecs_dmactl_require_ctx_f(0)); | ||
2104 | |||
2105 | /* Copy falcon bootloader into dmem */ | ||
2106 | gr_gk20a_load_ctxsw_ucode_header(g, addr_base, segments, reg_offset); | ||
2107 | gr_gk20a_load_ctxsw_ucode_boot(g, addr_base, segments, reg_offset); | ||
2041 | 2108 | ||
2042 | /* Write to CPUCTL to start the falcon */ | 2109 | /* Write to CPUCTL to start the falcon */ |
2043 | gk20a_writel(g, reg_offset + gr_fecs_cpuctl_r(), | 2110 | gk20a_writel(g, reg_offset + gr_fecs_cpuctl_r(), |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index e09cfbfa..7db6bccf 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -295,11 +295,28 @@ struct gk20a_ctxsw_ucode_segment { | |||
295 | struct gk20a_ctxsw_ucode_segments { | 295 | struct gk20a_ctxsw_ucode_segments { |
296 | u32 boot_entry; | 296 | u32 boot_entry; |
297 | u32 boot_imem_offset; | 297 | u32 boot_imem_offset; |
298 | u32 boot_signature; | ||
298 | struct gk20a_ctxsw_ucode_segment boot; | 299 | struct gk20a_ctxsw_ucode_segment boot; |
299 | struct gk20a_ctxsw_ucode_segment code; | 300 | struct gk20a_ctxsw_ucode_segment code; |
300 | struct gk20a_ctxsw_ucode_segment data; | 301 | struct gk20a_ctxsw_ucode_segment data; |
301 | }; | 302 | }; |
302 | 303 | ||
304 | /* sums over the ucode files as sequences of u32, computed to the | ||
305 | * boot_signature field in the structure above */ | ||
306 | |||
307 | #define FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED 0x8a621f78 | ||
308 | #define FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED 0x67e5344b | ||
309 | #define FALCON_UCODE_SIG_T12X_FECS_OLDER 0x56da09f | ||
310 | |||
311 | #define FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED 0x303465d5 | ||
312 | #define FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED 0x3fdd33d3 | ||
313 | #define FALCON_UCODE_SIG_T12X_GPCCS_OLDER 0x53d7877 | ||
314 | |||
315 | #define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED 0x93671b7d | ||
316 | #define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2 0x4d6cbc10 | ||
317 | |||
318 | #define FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED 0x393161da | ||
319 | |||
303 | struct gk20a_ctxsw_ucode_info { | 320 | struct gk20a_ctxsw_ucode_info { |
304 | u64 *p_va; | 321 | u64 *p_va; |
305 | struct inst_desc inst_blk_desc; | 322 | struct inst_desc inst_blk_desc; |
@@ -422,6 +439,11 @@ int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr); | |||
422 | int gr_gk20a_init_ctxsw_ucode(struct gk20a *g); | 439 | int gr_gk20a_init_ctxsw_ucode(struct gk20a *g); |
423 | int gr_gk20a_load_ctxsw_ucode(struct gk20a *g); | 440 | int gr_gk20a_load_ctxsw_ucode(struct gk20a *g); |
424 | void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g); | 441 | void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g); |
442 | void gr_gk20a_load_ctxsw_ucode_header(struct gk20a *g, u64 addr_base, | ||
443 | struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset); | ||
444 | void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base, | ||
445 | struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset); | ||
446 | |||
425 | 447 | ||
426 | void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *c); | 448 | void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *c); |
427 | #endif /* GR_GK20A_H */ | 449 | #endif /* GR_GK20A_H */ |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 1256d4a6..660ffa88 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -595,76 +595,12 @@ static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) | |||
595 | static int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, | 595 | static int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, |
596 | struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) | 596 | struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) |
597 | { | 597 | { |
598 | u32 addr_code32; | ||
599 | u32 addr_data32; | ||
600 | u32 addr_load32; | ||
601 | u32 dst = 0; | ||
602 | u32 blocks; | ||
603 | u32 b; | ||
604 | |||
605 | addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8); | ||
606 | addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8); | ||
607 | addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8); | ||
608 | |||
609 | gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(), | 598 | gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(), |
610 | gr_fecs_dmactl_require_ctx_f(0)); | 599 | gr_fecs_dmactl_require_ctx_f(0)); |
611 | 600 | ||
612 | /* | 601 | /* Copy falcon bootloader into dmem */ |
613 | * Copy falcon bootloader header into dmem at offset 0. | 602 | gr_gk20a_load_ctxsw_ucode_header(g, addr_base, segments, reg_offset); |
614 | * Configure dmem port 0 for auto-incrementing writes starting at dmem | 603 | gr_gk20a_load_ctxsw_ucode_boot(g, addr_base, segments, reg_offset); |
615 | * offset 0. | ||
616 | */ | ||
617 | gk20a_writel(g, reg_offset + gr_fecs_dmemc_r(0), | ||
618 | gr_fecs_dmemc_offs_f(0) | | ||
619 | gr_fecs_dmemc_blk_f(0) | | ||
620 | gr_fecs_dmemc_aincw_f(1)); | ||
621 | |||
622 | /* Write out the actual data */ | ||
623 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
624 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
625 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
626 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
627 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 4); | ||
628 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_code32); | ||
629 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
630 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->code.size); | ||
631 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
632 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
633 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); | ||
634 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_data32); | ||
635 | gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->data.size); | ||
636 | |||
637 | blocks = ((segments->boot.size + 0xFF) & ~0xFF) >> 8; | ||
638 | |||
639 | /* | ||
640 | * Set the base FB address for the DMA transfer. Subtract off the 256 | ||
641 | * byte IMEM block offset such that the relative FB and IMEM offsets | ||
642 | * match, allowing the IMEM tags to be properly created. | ||
643 | */ | ||
644 | |||
645 | dst = segments->boot_imem_offset; | ||
646 | gk20a_writel(g, reg_offset + gr_fecs_dmatrfbase_r(), | ||
647 | (addr_load32 - (dst >> 8))); | ||
648 | |||
649 | for (b = 0; b < blocks; b++) { | ||
650 | /* Setup destination IMEM offset */ | ||
651 | gk20a_writel(g, reg_offset + gr_fecs_dmatrfmoffs_r(), | ||
652 | dst + (b << 8)); | ||
653 | |||
654 | /* Setup source offset (relative to BASE) */ | ||
655 | gk20a_writel(g, reg_offset + gr_fecs_dmatrffboffs_r(), | ||
656 | dst + (b << 8)); | ||
657 | |||
658 | gk20a_writel(g, reg_offset + gr_fecs_dmatrfcmd_r(), | ||
659 | gr_fecs_dmatrfcmd_imem_f(0x01) | | ||
660 | gr_fecs_dmatrfcmd_write_f(0x00) | | ||
661 | gr_fecs_dmatrfcmd_size_f(0x06) | | ||
662 | gr_fecs_dmatrfcmd_ctxdma_f(0)); | ||
663 | } | ||
664 | |||
665 | /* Specify the falcon boot vector */ | ||
666 | gk20a_writel(g, reg_offset + gr_fecs_bootvec_r(), | ||
667 | gr_fecs_bootvec_vec_f(segments->boot_entry)); | ||
668 | 604 | ||
669 | /* start the falcon immediately if PRIV security is disabled*/ | 605 | /* start the falcon immediately if PRIV security is disabled*/ |
670 | if (!g->ops.privsecurity) { | 606 | if (!g->ops.privsecurity) { |