From 20408d5b32e5564b2fb410bc5b0bb0a198629437 Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Mon, 23 Jun 2014 09:56:45 +0300 Subject: gpu: nvgpu: Boot FECS to secure mode Boot FECS to secure mode if ACR is enabled. Bug 200006956 Change-Id: Ifc107704a6456af837b7f6c513c04d152b2f4d3a Signed-off-by: Terje Bergstrom Reviewed-on: http://git-master/r/424251 --- drivers/gpu/nvgpu/gk20a/gk20a.c | 6 +++ drivers/gpu/nvgpu/gk20a/gk20a.h | 1 + drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 89 +++++++++++++++++++++++++++---------- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 4 ++ drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | 29 +++++------- drivers/gpu/nvgpu/gk20a/pmu_gk20a.h | 3 ++ drivers/gpu/nvgpu/gm20b/acr_gm20b.c | 55 ++++++++++++++++++++++- drivers/gpu/nvgpu/gm20b/acr_gm20b.h | 2 +- drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 55 +++++++++++++++++++++++ 9 files changed, 201 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 5305f612..2310b81c 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -897,6 +897,12 @@ static int gk20a_pm_finalize_poweron(struct device *dev) goto done; } + err = gk20a_enable_gr_hw(g); + if (err) { + gk20a_err(dev, "failed to enable gr"); + goto done; + } + err = g->ops.pmu.prepare_ucode(g); if (err) { gk20a_err(dev, "failed to init pmu ucode"); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 07826675..991891c5 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -128,6 +128,7 @@ struct gpu_ops { u64 addr_base, struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset); + int (*load_ctxsw_ucode)(struct gk20a *g); } gr; const char *name; struct { diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 4a6dd6c5..bd9476e4 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -1867,7 +1867,7 @@ static int gr_gk20a_copy_ctxsw_ucode_segments( return 0; } -static int gr_gk20a_init_ctxsw_ucode(struct gk20a *g) +int gr_gk20a_init_ctxsw_ucode(struct gk20a *g) { struct device *d = dev_from_gk20a(g); struct mm_gk20a *mm = &g->mm; @@ -1992,7 +1992,7 @@ static int gr_gk20a_init_ctxsw_ucode(struct gk20a *g) return err; } -static void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g) +void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g) { struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; int retries = 20; @@ -2149,9 +2149,8 @@ static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g) gr_fecs_falcon_hwcfg_r()); } -static int gr_gk20a_load_ctxsw_ucode(struct gk20a *g, struct gr_gk20a *gr) +int gr_gk20a_load_ctxsw_ucode(struct gk20a *g) { - u32 ret; gk20a_dbg_fn(""); @@ -2171,11 +2170,20 @@ static int gr_gk20a_load_ctxsw_ucode(struct gk20a *g, struct gr_gk20a *gr) gr_gk20a_load_falcon_imem(g); gr_gk20a_start_falcon_ucode(g); } else { - if (!gr->skip_ucode_init) + if (!g->gr.skip_ucode_init) gr_gk20a_init_ctxsw_ucode(g); gr_gk20a_load_falcon_with_bootloader(g); - gr->skip_ucode_init = true; + g->gr.skip_ucode_init = true; } + gk20a_dbg_fn("done"); + return 0; +} + +static int gr_gk20a_wait_ctxsw_ready(struct gk20a *g) +{ + u32 ret; + + gk20a_dbg_fn(""); ret = gr_gk20a_ctx_wait_ucode(g, 0, 0, GR_IS_UCODE_OP_EQUAL, @@ -4449,9 +4457,36 @@ static int gr_gk20a_wait_mem_scrubbing(struct gk20a *g) return -ETIMEDOUT; } -static int gk20a_init_gr_reset_enable_hw(struct gk20a *g) +int gr_gk20a_init_ctxsw(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; + u32 err = 0; + + err = g->ops.gr.load_ctxsw_ucode(g); + if (err) + goto out; + + err = gr_gk20a_wait_ctxsw_ready(g); + if (err) + goto out; + + /* this appears query for sw states but fecs actually init + ramchain, etc so this is hw init */ + err = gr_gk20a_init_ctx_state(g, gr); + if (err) + goto out; + +out: + if (err) + gk20a_err(dev_from_gk20a(g), "fail"); + else + gk20a_dbg_fn("done"); + + return 0; +} + +int gk20a_init_gr_reset_enable_hw(struct gk20a *g) +{ struct av_list_gk20a *sw_non_ctx_load = &g->gr.ctx_vars.sw_non_ctx_load; unsigned long end_jiffies = jiffies + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); @@ -4483,16 +4518,6 @@ static int gk20a_init_gr_reset_enable_hw(struct gk20a *g) if (err) goto out; - err = gr_gk20a_load_ctxsw_ucode(g, gr); - if (err) - goto out; - - /* this appears query for sw states but fecs actually init - ramchain, etc so this is hw init */ - err = gr_gk20a_init_ctx_state(g, gr); - if (err) - goto out; - out: if (err) gk20a_err(dev_from_gk20a(g), "fail"); @@ -4624,14 +4649,10 @@ int gk20a_init_gr_support(struct gk20a *g) gk20a_dbg_fn(""); - err = gk20a_init_gr_prepare(g); - if (err) - return err; - /* this is required before gr_gk20a_init_ctx_state */ mutex_init(&g->gr.fecs_mutex); - err = gk20a_init_gr_reset_enable_hw(g); + err = gr_gk20a_init_ctxsw(g); if (err) return err; @@ -4817,10 +4838,11 @@ static void gk20a_gr_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) } } -int gk20a_gr_reset(struct gk20a *g) +int gk20a_enable_gr_hw(struct gk20a *g) { int err; - u32 size; + + gk20a_dbg_fn(""); err = gk20a_init_gr_prepare(g); if (err) @@ -4830,10 +4852,28 @@ int gk20a_gr_reset(struct gk20a *g) if (err) return err; + gk20a_dbg_fn("done"); + + return 0; +} + +int gk20a_gr_reset(struct gk20a *g) +{ + int err; + u32 size; + + err = gk20a_enable_gr_hw(g); + if (err) + return err; + err = gk20a_init_gr_setup_hw(g); if (err) return err; + err = gr_gk20a_init_ctxsw(g); + if (err) + return err; + size = 0; err = gr_gk20a_fecs_get_reglist_img_size(g, &size); if (err) { @@ -6934,4 +6974,5 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) gops->gr.set_hww_esr_report_mask = gr_gk20a_set_hww_esr_report_mask; gops->gr.setup_alpha_beta_tables = gr_gk20a_setup_alpha_beta_tables; gops->gr.falcon_load_ucode = gr_gk20a_load_ctxsw_ucode_segments; + gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; } diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index cae69ba6..3376747b 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -319,6 +319,7 @@ struct gpu_ops; void gk20a_init_gr(struct gk20a *g); void gk20a_init_gr_ops(struct gpu_ops *gops); int gk20a_init_gr_support(struct gk20a *g); +int gk20a_enable_gr_hw(struct gk20a *g); int gk20a_gr_reset(struct gk20a *g); void gk20a_gr_wait_initialized(struct gk20a *g); @@ -415,6 +416,9 @@ void gr_gk20a_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, u32 **sm_dsm_perf_regs, u32 *perf_register_stride); int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr); +int gr_gk20a_init_ctxsw_ucode(struct gk20a *g); +int gr_gk20a_load_ctxsw_ucode(struct gk20a *g); +void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g); void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *c); #endif /*__GR_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index cfbdceae..de16e403 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c @@ -487,6 +487,17 @@ static void *get_pmu_sequence_out_alloc_ptr_v0(struct pmu_sequence *seq) int gk20a_init_pmu(struct pmu_gk20a *pmu) { struct gk20a *g = gk20a_from_pmu(pmu); + + mutex_init(&pmu->elpg_mutex); + mutex_init(&pmu->isr_mutex); + mutex_init(&pmu->pmu_copy_lock); + mutex_init(&pmu->pmu_seq_lock); + + pmu->perfmon_counter.index = 3; /* GR & CE2 */ + pmu->perfmon_counter.group_id = PMU_DOMAIN_GROUP_PSTATE; + + pmu->remove_support = gk20a_remove_pmu_support; + switch (pmu->desc->app_version) { case APP_VERSION_GM20B_1: case APP_VERSION_GM20B: @@ -1700,25 +1711,9 @@ int gk20a_init_pmu_setup_sw(struct gk20a *g) pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE; gk20a_free_sgtable(&sgt_seq_buf); - pmu->sw_ready = true; skip_init: - mutex_init(&pmu->elpg_mutex); - mutex_init(&pmu->isr_mutex); - mutex_init(&pmu->pmu_copy_lock); - mutex_init(&pmu->pmu_seq_lock); - - pmu->perfmon_counter.index = 3; /* GR & CE2 */ - pmu->perfmon_counter.group_id = PMU_DOMAIN_GROUP_PSTATE; - - pmu->remove_support = gk20a_remove_pmu_support; - err = gk20a_init_pmu(pmu); - if (err) { - gk20a_err(d, "failed to set function pointers\n"); - return err; - } - gk20a_dbg_fn("done"); return 0; @@ -1773,7 +1768,6 @@ int gk20a_init_pmu_setup_hw1(struct gk20a *g) int err; gk20a_dbg_fn(""); - pmu_reset(pmu); /* setup apertures - virtual */ @@ -3770,4 +3764,5 @@ err_out: debugfs_remove_recursive(platform->debugfs); return -ENOMEM; } + #endif diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h index 6f4e6c2e..40d41ee9 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h @@ -1103,4 +1103,7 @@ void pmu_dump_falcon_stats(struct pmu_gk20a *pmu); void gk20a_remove_pmu_support(struct pmu_gk20a *pmu); void pmu_setup_hw(struct work_struct *work); void pmu_seq_init(struct pmu_gk20a *pmu); + +int gk20a_init_pmu(struct pmu_gk20a *pmu); + #endif /*__PMU_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c index 2b7be4f7..c03629fc 100644 --- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c @@ -55,8 +55,9 @@ static int acr_ucode_patch_sig(struct gk20a *g, /*Globals*/ static void __iomem *mc = IO_ADDRESS(TEGRA_MC_BASE); -get_ucode_details pmu_acr_supp_ucode_list[MAX_SUPPORTED_LSFM] = { +get_ucode_details pmu_acr_supp_ucode_list[] = { pmu_ucode_details, + fecs_ucode_details, }; /*Once is LS mode, cpuctl_alias is only accessible*/ @@ -116,6 +117,57 @@ int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img) return 0; } +int fecs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img) +{ + int err = 0; + struct lsf_ucode_desc *lsf_desc; + + lsf_desc = kzalloc(sizeof(struct lsf_ucode_desc), GFP_KERNEL); + if (!lsf_desc) + return -ENOMEM; + lsf_desc->falcon_id = LSF_FALCON_ID_FECS; + + p_img->desc = kzalloc(sizeof(struct pmu_ucode_desc), GFP_KERNEL); + if (p_img->desc == NULL) { + kfree(lsf_desc); + return -ENOMEM; + } + + p_img->desc->bootloader_start_offset = + g->ctxsw_ucode_info.fecs.boot.offset; + p_img->desc->bootloader_size = + g->ctxsw_ucode_info.fecs.boot.size; + p_img->desc->bootloader_imem_offset = + g->ctxsw_ucode_info.fecs.boot_imem_offset; + p_img->desc->bootloader_entry_point = + g->ctxsw_ucode_info.fecs.boot_entry; + + p_img->desc->image_size = g->ctxsw_ucode_info.fecs.boot.size + + g->ctxsw_ucode_info.fecs.code.size + + g->ctxsw_ucode_info.fecs.data.size; + p_img->desc->app_size = 0; + p_img->desc->app_start_offset = 0; + p_img->desc->app_imem_offset = 0; + p_img->desc->app_imem_entry = 0; + p_img->desc->app_dmem_offset = 0; + p_img->desc->app_resident_code_offset = + g->ctxsw_ucode_info.fecs.code.offset; + p_img->desc->app_resident_code_size = + g->ctxsw_ucode_info.fecs.code.size; + p_img->desc->app_resident_data_offset = + g->ctxsw_ucode_info.fecs.data.offset; + p_img->desc->app_resident_data_size = + g->ctxsw_ucode_info.fecs.data.size; + p_img->data = g->ctxsw_ucode_info.surface_desc.cpuva; + p_img->data_size = p_img->desc->image_size; + + p_img->fw_ver = NULL; + p_img->header = NULL; + p_img->lsf_desc = (struct lsf_ucode_desc *)lsf_desc; + gm20b_dbg_pmu("fecs fw loaded 2\n"); + return 0; +} + int prepare_ucode_blob(struct gk20a *g) { struct device *d = dev_from_gk20a(g); @@ -132,6 +184,7 @@ int prepare_ucode_blob(struct gk20a *g) memset((void *)plsfm, 0, sizeof(struct ls_flcn_mgr)); gm20b_dbg_pmu("fetching GMMU regs\n"); gm20b_mm_mmu_vpr_info_fetch(g); + gr_gk20a_init_ctxsw_ucode(g); /* Discover all managed falcons*/ status = lsfm_discover_ucode_images(g, plsfm); diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.h b/drivers/gpu/nvgpu/gm20b/acr_gm20b.h index e0dd50d0..84473c30 100644 --- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.h @@ -21,7 +21,7 @@ /*Defines*/ /*chip specific defines*/ -#define MAX_SUPPORTED_LSFM 1 /*PMU, FECS, GPCCS*/ +#define MAX_SUPPORTED_LSFM 2 /*PMU, FECS, GPCCS*/ #define LSF_UCODE_DATA_ALIGNMENT 4096 #define GM20B_PMU_UCODE_IMAGE "gpmu_ucode.bin" diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 2efb7228..ae7864df 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -655,6 +655,56 @@ static int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, return 0; } +#ifdef CONFIG_TEGRA_ACR +static void gr_gm20b_load_gpccs_with_bootloader(struct gk20a *g) +{ + struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; + u64 addr_base = ucode_info->ucode_gpuva; + + gr_gk20a_load_falcon_bind_instblk(g); + + g->ops.gr.falcon_load_ucode(g, addr_base, + &g->ctxsw_ucode_info.gpccs, + gr_gpcs_gpccs_falcon_hwcfg_r() - + gr_fecs_falcon_hwcfg_r()); +} + +static int gr_gm20b_load_ctxsw_ucode(struct gk20a *g) +{ + struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; + u64 addr_base = ucode_info->ucode_gpuva; + int i; + + gk20a_dbg_fn(""); + + if (tegra_platform_is_linsim()) { + gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7), + gr_fecs_ctxsw_mailbox_value_f(0xc0de7777)); + gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7), + gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777)); + } + + gr_gk20a_load_falcon_bind_instblk(g); + g->ops.gr.falcon_load_ucode(g, addr_base, + &g->ctxsw_ucode_info.gpccs, + gr_gpcs_gpccs_falcon_hwcfg_r() - + gr_fecs_falcon_hwcfg_r()); + + gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0x0); + gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(1), 0x1); + gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(6), 0xffffffff); + + gk20a_writel(g, gr_gpccs_dmactl_r(), gr_gpccs_dmactl_require_ctx_f(0)); + + gk20a_writel(g, gr_gpccs_cpuctl_r(), gr_gpccs_cpuctl_startcpu_f(1)); + gk20a_writel(g, gr_fecs_cpuctl_alias_r(), gr_fecs_cpuctl_startcpu_f(1)); + + gk20a_dbg_fn("done"); + + return 0; +} +#endif + void gm20b_init_gr(struct gpu_ops *gops) { gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; @@ -676,4 +726,9 @@ void gm20b_init_gr(struct gpu_ops *gops) gops->gr.init_fs_state = gr_gm20b_ctx_state_floorsweep; gops->gr.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask; gops->gr.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments; +#ifdef CONFIG_TEGRA_ACR + gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode; +#else + gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; +#endif } -- cgit v1.2.2