From b4c355d32c96137901b2943281d911e385d6f9a7 Mon Sep 17 00:00:00 2001 From: Mahantesh Kumbar Date: Fri, 27 May 2016 13:07:07 +0530 Subject: gpu: nvgpu: Add gm204/gm206 ACR BL supoort Update ACR BL desc & support for ACR boot. JIRA DNVGPU-10 Change-Id: Iced2e10695439b2e1b47835f5c3c8a5d274e4b1e Signed-off-by: Mahantesh Kumbar Reviewed-on: http://git-master/r/1155027 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gm206/acr_gm206.c | 348 ++++++++++++++++++++++++++++++++++++ drivers/gpu/nvgpu/gm206/acr_gm206.h | 80 +++++++++ 2 files changed, 428 insertions(+) create mode 100644 drivers/gpu/nvgpu/gm206/acr_gm206.c create mode 100644 drivers/gpu/nvgpu/gm206/acr_gm206.h (limited to 'drivers/gpu/nvgpu/gm206') diff --git a/drivers/gpu/nvgpu/gm206/acr_gm206.c b/drivers/gpu/nvgpu/gm206/acr_gm206.c new file mode 100644 index 00000000..8db03105 --- /dev/null +++ b/drivers/gpu/nvgpu/gm206/acr_gm206.c @@ -0,0 +1,348 @@ +/* + * Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include /* for mdelay */ +#include +#include +#include +#include +#include +#include + +#include "gk20a/gk20a.h" +#include "gk20a/pmu_gk20a.h" +#include "gk20a/semaphore_gk20a.h" +#include "hw_pwr_gm206.h" +#include "acr.h" +#include "acr_gm206.h" + +/*Defines*/ +#define gm206_dbg_pmu(fmt, arg...) \ + gk20a_dbg(gpu_dbg_pmu, fmt, ##arg) + +/* Both size and address of WPR need to be 128K-aligned */ +#define WPR_ALIGNMENT 0x20000 +#define DGPU_WPR 0x10000000 /* start from 256MB location at VIDMEM */ +#define DGPU_WPR_SIZE 0x100000 + +static int gm206_pmu_populate_loader_cfg(struct gk20a *g, + void *lsfm, u32 *p_bl_gen_desc_size); +static int gm206_flcn_populate_bl_dmem_desc(struct gk20a *g, + void *lsfm, u32 *p_bl_gen_desc_size, u32 falconid); +static int gm206_bootstrap_hs_flcn(struct gk20a *g); + +void gm206_wpr_info(struct gk20a *g, struct wpr_carveout_info *inf) +{ + inf->wpr_base = DGPU_WPR; + inf->size = DGPU_WPR_SIZE; +} + +static void flcn64_set_dma(struct falc_u64 *dma_addr, u64 value) +{ + dma_addr->lo |= u64_lo32(value); + dma_addr->hi |= u64_lo32(value); +} + +int gm206_alloc_blob_space(struct gk20a *g, + size_t size, struct mem_desc *mem) +{ + int err = 0; + struct wpr_carveout_info wpr_inf; + + g->ops.pmu.get_wpr(g, &wpr_inf); + + mem->aperture = APERTURE_VIDMEM; + mem->sgt = kzalloc(sizeof(*mem->sgt), GFP_KERNEL); + if (!mem->sgt) { + gk20a_err(dev_from_gk20a(g), "failed to allocate memory\n"); + return -ENOMEM; + } + + err = sg_alloc_table(mem->sgt, 1, GFP_KERNEL); + if (err) { + gk20a_err(dev_from_gk20a(g), "failed to allocate sg_table\n"); + goto free_sgt; + } + + sg_dma_address(mem->sgt->sgl) = wpr_inf.wpr_base; + + return err; + +free_sgt: + gk20a_free_sgtable(&mem->sgt); + return err; +} + +void gm206_init_secure_pmu(struct gpu_ops *gops) +{ + gm20b_init_secure_pmu(gops); + gops->pmu.prepare_ucode = prepare_ucode_blob; + gops->pmu.pmu_setup_hw_and_bootstrap = gm206_bootstrap_hs_flcn; + gops->pmu.get_wpr = gm206_wpr_info; + gops->pmu.alloc_blob_space = gm206_alloc_blob_space; + gops->pmu.pmu_populate_loader_cfg = gm206_pmu_populate_loader_cfg; + gops->pmu.flcn_populate_bl_dmem_desc = gm206_flcn_populate_bl_dmem_desc; +} + +static int gm206_pmu_populate_loader_cfg(struct gk20a *g, + void *lsfm, u32 *p_bl_gen_desc_size) +{ + struct wpr_carveout_info wpr_inf; + struct pmu_gk20a *pmu = &g->pmu; + struct lsfm_managed_ucode_img_v1 *p_lsfm = + (struct lsfm_managed_ucode_img_v1 *)lsfm; + struct flcn_ucode_img *p_img = &(p_lsfm->ucode_img); + struct loader_config_v1 *ldr_cfg = + &(p_lsfm->bl_gen_desc.loader_cfg_v1); + u64 addr_base; + struct pmu_ucode_desc *desc; + u64 addr_code, addr_data; + u32 addr_args; + + if (p_img->desc == NULL) /*This means its a header based ucode, + and so we do not fill BL gen desc structure*/ + return -EINVAL; + desc = p_img->desc; + /* + Calculate physical and virtual addresses for various portions of + the PMU ucode image + Calculate the 32-bit addresses for the application code, application + data, and bootloader code. These values are all based on IM_BASE. + The 32-bit addresses will be the upper 32-bits of the virtual or + physical addresses of each respective segment. + */ + addr_base = p_lsfm->lsb_header.ucode_off; + g->ops.pmu.get_wpr(g, &wpr_inf); + addr_base += wpr_inf.wpr_base; + + gm206_dbg_pmu("pmu loader cfg u32 addrbase %x\n", (u32)addr_base); + /*From linux*/ + addr_code = u64_lo32((addr_base + + desc->app_start_offset + + desc->app_resident_code_offset)); + gm206_dbg_pmu("app start %d app res code off %d\n", + desc->app_start_offset, desc->app_resident_code_offset); + addr_data = u64_lo32((addr_base + + desc->app_start_offset + + desc->app_resident_data_offset)); + gm206_dbg_pmu("app res data offset%d\n", + desc->app_resident_data_offset); + gm206_dbg_pmu("bl start off %d\n", desc->bootloader_start_offset); + + addr_args = ((pwr_falcon_hwcfg_dmem_size_v( + gk20a_readl(g, pwr_falcon_hwcfg_r()))) + << GK20A_PMU_DMEM_BLKSIZE2); + + addr_args -= g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu); + + gm206_dbg_pmu("addr_args %x\n", addr_args); + + /* Populate the loader_config state*/ + ldr_cfg->dma_idx = GK20A_PMU_DMAIDX_UCODE; + flcn64_set_dma(&ldr_cfg->code_dma_base, addr_code); + ldr_cfg->code_size_total = desc->app_size; + ldr_cfg->code_size_to_load = desc->app_resident_code_size; + ldr_cfg->code_entry_point = desc->app_imem_entry; + flcn64_set_dma(&ldr_cfg->data_dma_base, addr_data); + ldr_cfg->data_size = desc->app_resident_data_size; + flcn64_set_dma(&ldr_cfg->overlay_dma_base, addr_code); + + /* Update the argc/argv members*/ + ldr_cfg->argc = 1; + ldr_cfg->argv = addr_args; + + *p_bl_gen_desc_size = sizeof(struct loader_config_v1); + g->acr.pmu_args = addr_args; + return 0; +} + +static int gm206_flcn_populate_bl_dmem_desc(struct gk20a *g, + void *lsfm, u32 *p_bl_gen_desc_size, u32 falconid) +{ + struct wpr_carveout_info wpr_inf; + struct lsfm_managed_ucode_img_v1 *p_lsfm = + (struct lsfm_managed_ucode_img_v1 *)lsfm; + struct flcn_ucode_img *p_img = &(p_lsfm->ucode_img); + struct flcn_bl_dmem_desc_v1 *ldr_cfg = + &(p_lsfm->bl_gen_desc.bl_dmem_desc_v1); + u64 addr_base; + struct pmu_ucode_desc *desc; + u64 addr_code, addr_data; + + if (p_img->desc == NULL) /*This means its a header based ucode, + and so we do not fill BL gen desc structure*/ + return -EINVAL; + desc = p_img->desc; + + /* + Calculate physical and virtual addresses for various portions of + the PMU ucode image + Calculate the 32-bit addresses for the application code, application + data, and bootloader code. These values are all based on IM_BASE. + The 32-bit addresses will be the upper 32-bits of the virtual or + physical addresses of each respective segment. + */ + addr_base = p_lsfm->lsb_header.ucode_off; + g->ops.pmu.get_wpr(g, &wpr_inf); + addr_base += wpr_inf.wpr_base; + + gm206_dbg_pmu("gen loader cfg %x u32 addrbase %x ID\n", (u32)addr_base, + p_lsfm->wpr_header.falcon_id); + addr_code = u64_lo32((addr_base + + desc->app_start_offset + + desc->app_resident_code_offset)); + addr_data = u64_lo32((addr_base + + desc->app_start_offset + + desc->app_resident_data_offset)); + + gm206_dbg_pmu("gen cfg %x u32 addrcode %x & data %x load offset %xID\n", + (u32)addr_code, (u32)addr_data, desc->bootloader_start_offset, + p_lsfm->wpr_header.falcon_id); + + /* Populate the LOADER_CONFIG state */ + memset((void *) ldr_cfg, 0, sizeof(struct flcn_bl_dmem_desc_v1)); + ldr_cfg->ctx_dma = GK20A_PMU_DMAIDX_UCODE; + flcn64_set_dma(&ldr_cfg->code_dma_base, addr_code); + ldr_cfg->non_sec_code_size = desc->app_resident_code_size; + flcn64_set_dma(&ldr_cfg->data_dma_base, addr_data); + ldr_cfg->data_size = desc->app_resident_data_size; + ldr_cfg->code_entry_point = desc->app_imem_entry; + *p_bl_gen_desc_size = sizeof(struct flcn_bl_dmem_desc_v1); + return 0; +} + +/*Loads ACR bin to FB mem and bootstraps PMU with bootloader code + * start and end are addresses of ucode blob in non-WPR region*/ +int gm206_bootstrap_hs_flcn(struct gk20a *g) +{ + struct mm_gk20a *mm = &g->mm; + struct vm_gk20a *vm = &mm->pmu.vm; + int i, err = 0; + u64 *acr_dmem; + u32 img_size_in_bytes = 0; + u32 status; + struct acr_desc *acr = &g->acr; + const struct firmware *acr_fw = acr->acr_fw; + struct flcn_bl_dmem_desc_v1 *bl_dmem_desc = &acr->bl_dmem_desc_v1; + u32 *acr_ucode_header_t210_load; + u32 *acr_ucode_data_t210_load; + struct wpr_carveout_info wpr_inf; + + gm206_dbg_pmu(""); + + if (!acr_fw) { + /*First time init case*/ + acr_fw = gk20a_request_firmware(g, GM20B_HSBIN_PMU_UCODE_IMAGE); + if (!acr_fw) { + gk20a_err(dev_from_gk20a(g), "pmu ucode get fail"); + return -ENOENT; + } + acr->acr_fw = acr_fw; + acr->hsbin_hdr = (struct bin_hdr *)acr_fw->data; + acr->fw_hdr = (struct acr_fw_header *)(acr_fw->data + + acr->hsbin_hdr->header_offset); + acr_ucode_data_t210_load = (u32 *)(acr_fw->data + + acr->hsbin_hdr->data_offset); + acr_ucode_header_t210_load = (u32 *)(acr_fw->data + + acr->fw_hdr->hdr_offset); + img_size_in_bytes = ALIGN((acr->hsbin_hdr->data_size), 256); + + /* Lets patch the signatures first.. */ + if (acr_ucode_patch_sig(g, acr_ucode_data_t210_load, + (u32 *)(acr_fw->data + + acr->fw_hdr->sig_prod_offset), + (u32 *)(acr_fw->data + + acr->fw_hdr->sig_dbg_offset), + (u32 *)(acr_fw->data + + acr->fw_hdr->patch_loc), + (u32 *)(acr_fw->data + + acr->fw_hdr->patch_sig)) < 0) { + gk20a_err(dev_from_gk20a(g), "patch signatures fail"); + err = -1; + goto err_release_acr_fw; + } + err = gk20a_gmmu_alloc_map(vm, img_size_in_bytes, + &acr->acr_ucode); + if (err) { + err = -ENOMEM; + goto err_release_acr_fw; + } + + g->ops.pmu.get_wpr(g, &wpr_inf); + + acr_dmem = (u64 *) + &(((u8 *)acr_ucode_data_t210_load)[ + acr_ucode_header_t210_load[2]]); + acr->acr_dmem_desc = (struct flcn_acr_desc *)((u8 *)( + acr->acr_ucode.cpu_va) + acr_ucode_header_t210_load[2]); + ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_start = + wpr_inf.wpr_base; + ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_size = + wpr_inf.size; + ((struct flcn_acr_desc *)acr_dmem)->regions.no_regions = 1; + ((struct flcn_acr_desc *)acr_dmem)->wpr_offset = 0; + + ((struct flcn_acr_desc *)acr_dmem)->wpr_region_id = 1; + ((struct flcn_acr_desc *)acr_dmem)->regions.region_props[ + 0].region_id = 1; + ((struct flcn_acr_desc *)acr_dmem)->regions.region_props[ + 0].start_addr = wpr_inf.wpr_base >> 8; + ((struct flcn_acr_desc *)acr_dmem)->regions.region_props[ + 0].end_addr = (wpr_inf.wpr_base + wpr_inf.size) >> 8; + + for (i = 0; i < (img_size_in_bytes/4); i++) { + ((u32 *)acr->acr_ucode.cpu_va)[i] = + acr_ucode_data_t210_load[i]; + } + + /* + * In order to execute this binary, we will be using + * a bootloader which will load this image into PMU IMEM/DMEM. + * Fill up the bootloader descriptor for PMU HAL to use.. + * TODO: Use standard descriptor which the generic bootloader is + * checked in. + */ + + bl_dmem_desc->signature[0] = 0; + bl_dmem_desc->signature[1] = 0; + bl_dmem_desc->signature[2] = 0; + bl_dmem_desc->signature[3] = 0; + bl_dmem_desc->ctx_dma = GK20A_PMU_DMAIDX_VIRT; + flcn64_set_dma(&bl_dmem_desc->code_dma_base, + acr->acr_ucode.gpu_va); + bl_dmem_desc->non_sec_code_off = acr_ucode_header_t210_load[0]; + bl_dmem_desc->non_sec_code_size = acr_ucode_header_t210_load[1]; + bl_dmem_desc->sec_code_off = acr_ucode_header_t210_load[5]; + bl_dmem_desc->sec_code_size = acr_ucode_header_t210_load[6]; + bl_dmem_desc->code_entry_point = 0; /* Start at 0th offset */ + flcn64_set_dma(&bl_dmem_desc->data_dma_base, + acr->acr_ucode.gpu_va + + (acr_ucode_header_t210_load[2])); + bl_dmem_desc->data_size = acr_ucode_header_t210_load[3]; + + } else + acr->acr_dmem_desc->nonwpr_ucode_blob_size = 0; + + status = pmu_exec_gen_bl(g, bl_dmem_desc, 1); + if (status != 0) { + err = status; + goto err_free_ucode_map; + } + return 0; +err_free_ucode_map: + gk20a_gmmu_unmap_free(vm, &acr->acr_ucode); +err_release_acr_fw: + release_firmware(acr_fw); + acr->acr_fw = NULL; + return err; +} diff --git a/drivers/gpu/nvgpu/gm206/acr_gm206.h b/drivers/gpu/nvgpu/gm206/acr_gm206.h new file mode 100644 index 00000000..86bc642a --- /dev/null +++ b/drivers/gpu/nvgpu/gm206/acr_gm206.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __ACR_GM206_H_ +#define __ACR_GM206_H_ + +#include "gm20b/acr_gm20b.h" + +struct loader_config_v1 { + u32 reserved; + u32 dma_idx; + struct falc_u64 code_dma_base; + u32 code_size_total; + u32 code_size_to_load; + u32 code_entry_point; + struct falc_u64 data_dma_base; + u32 data_size; + struct falc_u64 overlay_dma_base; + u32 argc; + u32 argv; +}; + +struct flcn_bl_dmem_desc_v1 { + u32 reserved[4]; /*Should be the first element..*/ + u32 signature[4]; /*Should be the first element..*/ + u32 ctx_dma; + struct falc_u64 code_dma_base; + u32 non_sec_code_off; + u32 non_sec_code_size; + u32 sec_code_off; + u32 sec_code_size; + u32 code_entry_point; + struct falc_u64 data_dma_base; + u32 data_size; +}; + +/*! + * Union of all supported structures used by bootloaders. + */ +union flcn_bl_generic_desc_v1 { + struct flcn_bl_dmem_desc_v1 bl_dmem_desc_v1; + struct loader_config_v1 loader_cfg_v1; +}; + +/*! + * LSFM Managed Ucode Image + * next : Next image the list, NULL if last. + * wpr_header : WPR header for this ucode image + * lsb_header : LSB header for this ucode image + * bl_gen_desc : Bootloader generic desc structure for this ucode image + * bl_gen_desc_size : Sizeof bootloader desc structure for this ucode image + * full_ucode_size : Surface size required for final ucode image + * ucode_img : Ucode image info + */ +struct lsfm_managed_ucode_img_v1 { + struct lsfm_managed_ucode_img_v1 *next; + struct lsf_wpr_header wpr_header; + struct lsf_lsb_header lsb_header; + union flcn_bl_generic_desc_v1 bl_gen_desc; + u32 bl_gen_desc_size; + u32 full_ucode_size; + struct flcn_ucode_img ucode_img; +}; + +void gm206_init_secure_pmu(struct gpu_ops *gops); +int gm206_alloc_blob_space(struct gk20a *g, + size_t size, struct mem_desc *mem); +void gm206_wpr_info(struct gk20a *g, struct wpr_carveout_info *inf); + +#endif /*__ACR_GM206_H_*/ -- cgit v1.2.2