From 3f3844a11ccac7957fdb7139a1c9c2a767d315a5 Mon Sep 17 00:00:00 2001 From: Konsta Holtta Date: Tue, 14 Oct 2014 11:48:40 +0300 Subject: gpu: nvgpu: select ucode boot init by signature Compute a signature checksum for ctxsw ucode boot section and determine the format of boot initialization data by it. This unifies gk20a and gk20b ucode segment loading a lot by separating the bootloader loading logic to separate functions. Note: Whenever the boot segment binary changes, its updated signature must be added here. Management of different bootloaders must be supported for repo-crossing staging issues. Bug 1519397 Change-Id: I96f9b905d3631dfdebf71ea3a652a0968615fd0a Signed-off-by: Konsta Holtta Reviewed-on: http://git-master/r/556679 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 105 ++++++++++++++++++++++++++++++------- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 22 ++++++++ 2 files changed, 108 insertions(+), 19 deletions(-) (limited to 'drivers/gpu/nvgpu/gk20a') diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 84b79b42..30ea49a7 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -1776,9 +1776,17 @@ static int gr_gk20a_copy_ctxsw_ucode_segments( u32 *bootimage, u32 *code, u32 *data) { + int i; + memcpy(buf + segments->boot.offset, bootimage, segments->boot.size); memcpy(buf + segments->code.offset, code, segments->code.size); memcpy(buf + segments->data.offset, data, segments->data.size); + + /* compute a "checksum" for the boot binary to detect its version */ + segments->boot_signature = 0; + for (i = 0; i < segments->boot.size / sizeof(u32); i++) + segments->boot_signature += bootimage[i]; + return 0; } @@ -1968,22 +1976,14 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g) gk20a_err(dev_from_gk20a(g), "arbiter complete timeout"); } -static int gr_gk20a_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, +void gr_gk20a_load_ctxsw_ucode_header(struct gk20a *g, u64 addr_base, struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) { u32 addr_code32; u32 addr_data32; - u32 addr_load32; - u32 dst = 0; - u32 blocks; - u32 b; addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8); addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8); - addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8); - - gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(), - gr_fecs_dmactl_require_ctx_f(0)); /* * Copy falcon bootloader header into dmem at offset 0. @@ -1996,17 +1996,73 @@ static int gr_gk20a_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, gr_fecs_dmemc_aincw_f(1)); /* Write out the actual data */ - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_code32); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->code.size); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_data32); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), segments->data.size); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), addr_code32); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + switch (segments->boot_signature) { + case FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED: + case FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED: + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + /* fallthrough */ + case FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED: + case FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED: + case FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED: + case FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2: + case FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED: + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 4); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), + addr_code32); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), + segments->code.size); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), + addr_data32); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), + segments->data.size); + break; + case FALCON_UCODE_SIG_T12X_FECS_OLDER: + case FALCON_UCODE_SIG_T12X_GPCCS_OLDER: + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), + addr_code32); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), + segments->code.size); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), + addr_data32); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), + segments->data.size); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), + addr_code32); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + break; + default: + gk20a_err(dev_from_gk20a(g), + "unknown falcon ucode boot signature 0x%08x" + " with reg_offset 0x%08x", + segments->boot_signature, reg_offset); + BUG(); + } +} + +void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base, + struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) +{ + u32 addr_load32; + u32 blocks; + u32 b; + u32 dst; + addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8); blocks = ((segments->boot.size + 0xFF) & ~0xFF) >> 8; /* @@ -2038,6 +2094,17 @@ static int gr_gk20a_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, /* Specify the falcon boot vector */ gk20a_writel(g, reg_offset + gr_fecs_bootvec_r(), gr_fecs_bootvec_vec_f(segments->boot_entry)); +} + +int gr_gk20a_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, + struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) +{ + gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(), + gr_fecs_dmactl_require_ctx_f(0)); + + /* Copy falcon bootloader into dmem */ + gr_gk20a_load_ctxsw_ucode_header(g, addr_base, segments, reg_offset); + gr_gk20a_load_ctxsw_ucode_boot(g, addr_base, segments, reg_offset); /* Write to CPUCTL to start the falcon */ gk20a_writel(g, reg_offset + gr_fecs_cpuctl_r(), diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index e09cfbfa..7db6bccf 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -295,11 +295,28 @@ struct gk20a_ctxsw_ucode_segment { struct gk20a_ctxsw_ucode_segments { u32 boot_entry; u32 boot_imem_offset; + u32 boot_signature; struct gk20a_ctxsw_ucode_segment boot; struct gk20a_ctxsw_ucode_segment code; struct gk20a_ctxsw_ucode_segment data; }; +/* sums over the ucode files as sequences of u32, computed to the + * boot_signature field in the structure above */ + +#define FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED 0x8a621f78 +#define FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED 0x67e5344b +#define FALCON_UCODE_SIG_T12X_FECS_OLDER 0x56da09f + +#define FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED 0x303465d5 +#define FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED 0x3fdd33d3 +#define FALCON_UCODE_SIG_T12X_GPCCS_OLDER 0x53d7877 + +#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED 0x93671b7d +#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2 0x4d6cbc10 + +#define FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED 0x393161da + struct gk20a_ctxsw_ucode_info { u64 *p_va; struct inst_desc inst_blk_desc; @@ -422,6 +439,11 @@ int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr); int gr_gk20a_init_ctxsw_ucode(struct gk20a *g); int gr_gk20a_load_ctxsw_ucode(struct gk20a *g); void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g); +void gr_gk20a_load_ctxsw_ucode_header(struct gk20a *g, u64 addr_base, + struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset); +void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base, + struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset); + void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *c); #endif /* GR_GK20A_H */ -- cgit v1.2.2