summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gm206
diff options
context:
space:
mode:
authorMahantesh Kumbar <mkumbar@nvidia.com>2016-05-27 03:37:07 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-06-05 18:34:50 -0400
commitb4c355d32c96137901b2943281d911e385d6f9a7 (patch)
tree6fd88be1f89fc063179d389fa2b154cc552ac9f2 /drivers/gpu/nvgpu/gm206
parent3b566957fec720d7315549ae0d5e98eacd7c247e (diff)
gpu: nvgpu: Add gm204/gm206 ACR BL supoort
Update ACR BL desc & support for ACR boot. JIRA DNVGPU-10 Change-Id: Iced2e10695439b2e1b47835f5c3c8a5d274e4b1e Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com> Reviewed-on: http://git-master/r/1155027 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gm206')
-rw-r--r--drivers/gpu/nvgpu/gm206/acr_gm206.c348
-rw-r--r--drivers/gpu/nvgpu/gm206/acr_gm206.h80
2 files changed, 428 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gm206/acr_gm206.c b/drivers/gpu/nvgpu/gm206/acr_gm206.c
new file mode 100644
index 00000000..8db03105
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm206/acr_gm206.c
@@ -0,0 +1,348 @@
1/*
2 * Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#include <linux/delay.h> /* for mdelay */
15#include <linux/firmware.h>
16#include <linux/clk.h>
17#include <linux/module.h>
18#include <linux/debugfs.h>
19#include <linux/dma-mapping.h>
20#include <linux/io.h>
21
22#include "gk20a/gk20a.h"
23#include "gk20a/pmu_gk20a.h"
24#include "gk20a/semaphore_gk20a.h"
25#include "hw_pwr_gm206.h"
26#include "acr.h"
27#include "acr_gm206.h"
28
29/*Defines*/
30#define gm206_dbg_pmu(fmt, arg...) \
31 gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
32
33/* Both size and address of WPR need to be 128K-aligned */
34#define WPR_ALIGNMENT 0x20000
35#define DGPU_WPR 0x10000000 /* start from 256MB location at VIDMEM */
36#define DGPU_WPR_SIZE 0x100000
37
38static int gm206_pmu_populate_loader_cfg(struct gk20a *g,
39 void *lsfm, u32 *p_bl_gen_desc_size);
40static int gm206_flcn_populate_bl_dmem_desc(struct gk20a *g,
41 void *lsfm, u32 *p_bl_gen_desc_size, u32 falconid);
42static int gm206_bootstrap_hs_flcn(struct gk20a *g);
43
44void gm206_wpr_info(struct gk20a *g, struct wpr_carveout_info *inf)
45{
46 inf->wpr_base = DGPU_WPR;
47 inf->size = DGPU_WPR_SIZE;
48}
49
50static void flcn64_set_dma(struct falc_u64 *dma_addr, u64 value)
51{
52 dma_addr->lo |= u64_lo32(value);
53 dma_addr->hi |= u64_lo32(value);
54}
55
56int gm206_alloc_blob_space(struct gk20a *g,
57 size_t size, struct mem_desc *mem)
58{
59 int err = 0;
60 struct wpr_carveout_info wpr_inf;
61
62 g->ops.pmu.get_wpr(g, &wpr_inf);
63
64 mem->aperture = APERTURE_VIDMEM;
65 mem->sgt = kzalloc(sizeof(*mem->sgt), GFP_KERNEL);
66 if (!mem->sgt) {
67 gk20a_err(dev_from_gk20a(g), "failed to allocate memory\n");
68 return -ENOMEM;
69 }
70
71 err = sg_alloc_table(mem->sgt, 1, GFP_KERNEL);
72 if (err) {
73 gk20a_err(dev_from_gk20a(g), "failed to allocate sg_table\n");
74 goto free_sgt;
75 }
76
77 sg_dma_address(mem->sgt->sgl) = wpr_inf.wpr_base;
78
79 return err;
80
81free_sgt:
82 gk20a_free_sgtable(&mem->sgt);
83 return err;
84}
85
86void gm206_init_secure_pmu(struct gpu_ops *gops)
87{
88 gm20b_init_secure_pmu(gops);
89 gops->pmu.prepare_ucode = prepare_ucode_blob;
90 gops->pmu.pmu_setup_hw_and_bootstrap = gm206_bootstrap_hs_flcn;
91 gops->pmu.get_wpr = gm206_wpr_info;
92 gops->pmu.alloc_blob_space = gm206_alloc_blob_space;
93 gops->pmu.pmu_populate_loader_cfg = gm206_pmu_populate_loader_cfg;
94 gops->pmu.flcn_populate_bl_dmem_desc = gm206_flcn_populate_bl_dmem_desc;
95}
96
97static int gm206_pmu_populate_loader_cfg(struct gk20a *g,
98 void *lsfm, u32 *p_bl_gen_desc_size)
99{
100 struct wpr_carveout_info wpr_inf;
101 struct pmu_gk20a *pmu = &g->pmu;
102 struct lsfm_managed_ucode_img_v1 *p_lsfm =
103 (struct lsfm_managed_ucode_img_v1 *)lsfm;
104 struct flcn_ucode_img *p_img = &(p_lsfm->ucode_img);
105 struct loader_config_v1 *ldr_cfg =
106 &(p_lsfm->bl_gen_desc.loader_cfg_v1);
107 u64 addr_base;
108 struct pmu_ucode_desc *desc;
109 u64 addr_code, addr_data;
110 u32 addr_args;
111
112 if (p_img->desc == NULL) /*This means its a header based ucode,
113 and so we do not fill BL gen desc structure*/
114 return -EINVAL;
115 desc = p_img->desc;
116 /*
117 Calculate physical and virtual addresses for various portions of
118 the PMU ucode image
119 Calculate the 32-bit addresses for the application code, application
120 data, and bootloader code. These values are all based on IM_BASE.
121 The 32-bit addresses will be the upper 32-bits of the virtual or
122 physical addresses of each respective segment.
123 */
124 addr_base = p_lsfm->lsb_header.ucode_off;
125 g->ops.pmu.get_wpr(g, &wpr_inf);
126 addr_base += wpr_inf.wpr_base;
127
128 gm206_dbg_pmu("pmu loader cfg u32 addrbase %x\n", (u32)addr_base);
129 /*From linux*/
130 addr_code = u64_lo32((addr_base +
131 desc->app_start_offset +
132 desc->app_resident_code_offset));
133 gm206_dbg_pmu("app start %d app res code off %d\n",
134 desc->app_start_offset, desc->app_resident_code_offset);
135 addr_data = u64_lo32((addr_base +
136 desc->app_start_offset +
137 desc->app_resident_data_offset));
138 gm206_dbg_pmu("app res data offset%d\n",
139 desc->app_resident_data_offset);
140 gm206_dbg_pmu("bl start off %d\n", desc->bootloader_start_offset);
141
142 addr_args = ((pwr_falcon_hwcfg_dmem_size_v(
143 gk20a_readl(g, pwr_falcon_hwcfg_r())))
144 << GK20A_PMU_DMEM_BLKSIZE2);
145
146 addr_args -= g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu);
147
148 gm206_dbg_pmu("addr_args %x\n", addr_args);
149
150 /* Populate the loader_config state*/
151 ldr_cfg->dma_idx = GK20A_PMU_DMAIDX_UCODE;
152 flcn64_set_dma(&ldr_cfg->code_dma_base, addr_code);
153 ldr_cfg->code_size_total = desc->app_size;
154 ldr_cfg->code_size_to_load = desc->app_resident_code_size;
155 ldr_cfg->code_entry_point = desc->app_imem_entry;
156 flcn64_set_dma(&ldr_cfg->data_dma_base, addr_data);
157 ldr_cfg->data_size = desc->app_resident_data_size;
158 flcn64_set_dma(&ldr_cfg->overlay_dma_base, addr_code);
159
160 /* Update the argc/argv members*/
161 ldr_cfg->argc = 1;
162 ldr_cfg->argv = addr_args;
163
164 *p_bl_gen_desc_size = sizeof(struct loader_config_v1);
165 g->acr.pmu_args = addr_args;
166 return 0;
167}
168
169static int gm206_flcn_populate_bl_dmem_desc(struct gk20a *g,
170 void *lsfm, u32 *p_bl_gen_desc_size, u32 falconid)
171{
172 struct wpr_carveout_info wpr_inf;
173 struct lsfm_managed_ucode_img_v1 *p_lsfm =
174 (struct lsfm_managed_ucode_img_v1 *)lsfm;
175 struct flcn_ucode_img *p_img = &(p_lsfm->ucode_img);
176 struct flcn_bl_dmem_desc_v1 *ldr_cfg =
177 &(p_lsfm->bl_gen_desc.bl_dmem_desc_v1);
178 u64 addr_base;
179 struct pmu_ucode_desc *desc;
180 u64 addr_code, addr_data;
181
182 if (p_img->desc == NULL) /*This means its a header based ucode,
183 and so we do not fill BL gen desc structure*/
184 return -EINVAL;
185 desc = p_img->desc;
186
187 /*
188 Calculate physical and virtual addresses for various portions of
189 the PMU ucode image
190 Calculate the 32-bit addresses for the application code, application
191 data, and bootloader code. These values are all based on IM_BASE.
192 The 32-bit addresses will be the upper 32-bits of the virtual or
193 physical addresses of each respective segment.
194 */
195 addr_base = p_lsfm->lsb_header.ucode_off;
196 g->ops.pmu.get_wpr(g, &wpr_inf);
197 addr_base += wpr_inf.wpr_base;
198
199 gm206_dbg_pmu("gen loader cfg %x u32 addrbase %x ID\n", (u32)addr_base,
200 p_lsfm->wpr_header.falcon_id);
201 addr_code = u64_lo32((addr_base +
202 desc->app_start_offset +
203 desc->app_resident_code_offset));
204 addr_data = u64_lo32((addr_base +
205 desc->app_start_offset +
206 desc->app_resident_data_offset));
207
208 gm206_dbg_pmu("gen cfg %x u32 addrcode %x & data %x load offset %xID\n",
209 (u32)addr_code, (u32)addr_data, desc->bootloader_start_offset,
210 p_lsfm->wpr_header.falcon_id);
211
212 /* Populate the LOADER_CONFIG state */
213 memset((void *) ldr_cfg, 0, sizeof(struct flcn_bl_dmem_desc_v1));
214 ldr_cfg->ctx_dma = GK20A_PMU_DMAIDX_UCODE;
215 flcn64_set_dma(&ldr_cfg->code_dma_base, addr_code);
216 ldr_cfg->non_sec_code_size = desc->app_resident_code_size;
217 flcn64_set_dma(&ldr_cfg->data_dma_base, addr_data);
218 ldr_cfg->data_size = desc->app_resident_data_size;
219 ldr_cfg->code_entry_point = desc->app_imem_entry;
220 *p_bl_gen_desc_size = sizeof(struct flcn_bl_dmem_desc_v1);
221 return 0;
222}
223
224/*Loads ACR bin to FB mem and bootstraps PMU with bootloader code
225 * start and end are addresses of ucode blob in non-WPR region*/
226int gm206_bootstrap_hs_flcn(struct gk20a *g)
227{
228 struct mm_gk20a *mm = &g->mm;
229 struct vm_gk20a *vm = &mm->pmu.vm;
230 int i, err = 0;
231 u64 *acr_dmem;
232 u32 img_size_in_bytes = 0;
233 u32 status;
234 struct acr_desc *acr = &g->acr;
235 const struct firmware *acr_fw = acr->acr_fw;
236 struct flcn_bl_dmem_desc_v1 *bl_dmem_desc = &acr->bl_dmem_desc_v1;
237 u32 *acr_ucode_header_t210_load;
238 u32 *acr_ucode_data_t210_load;
239 struct wpr_carveout_info wpr_inf;
240
241 gm206_dbg_pmu("");
242
243 if (!acr_fw) {
244 /*First time init case*/
245 acr_fw = gk20a_request_firmware(g, GM20B_HSBIN_PMU_UCODE_IMAGE);
246 if (!acr_fw) {
247 gk20a_err(dev_from_gk20a(g), "pmu ucode get fail");
248 return -ENOENT;
249 }
250 acr->acr_fw = acr_fw;
251 acr->hsbin_hdr = (struct bin_hdr *)acr_fw->data;
252 acr->fw_hdr = (struct acr_fw_header *)(acr_fw->data +
253 acr->hsbin_hdr->header_offset);
254 acr_ucode_data_t210_load = (u32 *)(acr_fw->data +
255 acr->hsbin_hdr->data_offset);
256 acr_ucode_header_t210_load = (u32 *)(acr_fw->data +
257 acr->fw_hdr->hdr_offset);
258 img_size_in_bytes = ALIGN((acr->hsbin_hdr->data_size), 256);
259
260 /* Lets patch the signatures first.. */
261 if (acr_ucode_patch_sig(g, acr_ucode_data_t210_load,
262 (u32 *)(acr_fw->data +
263 acr->fw_hdr->sig_prod_offset),
264 (u32 *)(acr_fw->data +
265 acr->fw_hdr->sig_dbg_offset),
266 (u32 *)(acr_fw->data +
267 acr->fw_hdr->patch_loc),
268 (u32 *)(acr_fw->data +
269 acr->fw_hdr->patch_sig)) < 0) {
270 gk20a_err(dev_from_gk20a(g), "patch signatures fail");
271 err = -1;
272 goto err_release_acr_fw;
273 }
274 err = gk20a_gmmu_alloc_map(vm, img_size_in_bytes,
275 &acr->acr_ucode);
276 if (err) {
277 err = -ENOMEM;
278 goto err_release_acr_fw;
279 }
280
281 g->ops.pmu.get_wpr(g, &wpr_inf);
282
283 acr_dmem = (u64 *)
284 &(((u8 *)acr_ucode_data_t210_load)[
285 acr_ucode_header_t210_load[2]]);
286 acr->acr_dmem_desc = (struct flcn_acr_desc *)((u8 *)(
287 acr->acr_ucode.cpu_va) + acr_ucode_header_t210_load[2]);
288 ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_start =
289 wpr_inf.wpr_base;
290 ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_size =
291 wpr_inf.size;
292 ((struct flcn_acr_desc *)acr_dmem)->regions.no_regions = 1;
293 ((struct flcn_acr_desc *)acr_dmem)->wpr_offset = 0;
294
295 ((struct flcn_acr_desc *)acr_dmem)->wpr_region_id = 1;
296 ((struct flcn_acr_desc *)acr_dmem)->regions.region_props[
297 0].region_id = 1;
298 ((struct flcn_acr_desc *)acr_dmem)->regions.region_props[
299 0].start_addr = wpr_inf.wpr_base >> 8;
300 ((struct flcn_acr_desc *)acr_dmem)->regions.region_props[
301 0].end_addr = (wpr_inf.wpr_base + wpr_inf.size) >> 8;
302
303 for (i = 0; i < (img_size_in_bytes/4); i++) {
304 ((u32 *)acr->acr_ucode.cpu_va)[i] =
305 acr_ucode_data_t210_load[i];
306 }
307
308 /*
309 * In order to execute this binary, we will be using
310 * a bootloader which will load this image into PMU IMEM/DMEM.
311 * Fill up the bootloader descriptor for PMU HAL to use..
312 * TODO: Use standard descriptor which the generic bootloader is
313 * checked in.
314 */
315
316 bl_dmem_desc->signature[0] = 0;
317 bl_dmem_desc->signature[1] = 0;
318 bl_dmem_desc->signature[2] = 0;
319 bl_dmem_desc->signature[3] = 0;
320 bl_dmem_desc->ctx_dma = GK20A_PMU_DMAIDX_VIRT;
321 flcn64_set_dma(&bl_dmem_desc->code_dma_base,
322 acr->acr_ucode.gpu_va);
323 bl_dmem_desc->non_sec_code_off = acr_ucode_header_t210_load[0];
324 bl_dmem_desc->non_sec_code_size = acr_ucode_header_t210_load[1];
325 bl_dmem_desc->sec_code_off = acr_ucode_header_t210_load[5];
326 bl_dmem_desc->sec_code_size = acr_ucode_header_t210_load[6];
327 bl_dmem_desc->code_entry_point = 0; /* Start at 0th offset */
328 flcn64_set_dma(&bl_dmem_desc->data_dma_base,
329 acr->acr_ucode.gpu_va +
330 (acr_ucode_header_t210_load[2]));
331 bl_dmem_desc->data_size = acr_ucode_header_t210_load[3];
332
333 } else
334 acr->acr_dmem_desc->nonwpr_ucode_blob_size = 0;
335
336 status = pmu_exec_gen_bl(g, bl_dmem_desc, 1);
337 if (status != 0) {
338 err = status;
339 goto err_free_ucode_map;
340 }
341 return 0;
342err_free_ucode_map:
343 gk20a_gmmu_unmap_free(vm, &acr->acr_ucode);
344err_release_acr_fw:
345 release_firmware(acr_fw);
346 acr->acr_fw = NULL;
347 return err;
348}
diff --git a/drivers/gpu/nvgpu/gm206/acr_gm206.h b/drivers/gpu/nvgpu/gm206/acr_gm206.h
new file mode 100644
index 00000000..86bc642a
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm206/acr_gm206.h
@@ -0,0 +1,80 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13
14#ifndef __ACR_GM206_H_
15#define __ACR_GM206_H_
16
17#include "gm20b/acr_gm20b.h"
18
19struct loader_config_v1 {
20 u32 reserved;
21 u32 dma_idx;
22 struct falc_u64 code_dma_base;
23 u32 code_size_total;
24 u32 code_size_to_load;
25 u32 code_entry_point;
26 struct falc_u64 data_dma_base;
27 u32 data_size;
28 struct falc_u64 overlay_dma_base;
29 u32 argc;
30 u32 argv;
31};
32
33struct flcn_bl_dmem_desc_v1 {
34 u32 reserved[4]; /*Should be the first element..*/
35 u32 signature[4]; /*Should be the first element..*/
36 u32 ctx_dma;
37 struct falc_u64 code_dma_base;
38 u32 non_sec_code_off;
39 u32 non_sec_code_size;
40 u32 sec_code_off;
41 u32 sec_code_size;
42 u32 code_entry_point;
43 struct falc_u64 data_dma_base;
44 u32 data_size;
45};
46
47/*!
48 * Union of all supported structures used by bootloaders.
49 */
50union flcn_bl_generic_desc_v1 {
51 struct flcn_bl_dmem_desc_v1 bl_dmem_desc_v1;
52 struct loader_config_v1 loader_cfg_v1;
53};
54
55/*!
56 * LSFM Managed Ucode Image
57 * next : Next image the list, NULL if last.
58 * wpr_header : WPR header for this ucode image
59 * lsb_header : LSB header for this ucode image
60 * bl_gen_desc : Bootloader generic desc structure for this ucode image
61 * bl_gen_desc_size : Sizeof bootloader desc structure for this ucode image
62 * full_ucode_size : Surface size required for final ucode image
63 * ucode_img : Ucode image info
64 */
65struct lsfm_managed_ucode_img_v1 {
66 struct lsfm_managed_ucode_img_v1 *next;
67 struct lsf_wpr_header wpr_header;
68 struct lsf_lsb_header lsb_header;
69 union flcn_bl_generic_desc_v1 bl_gen_desc;
70 u32 bl_gen_desc_size;
71 u32 full_ucode_size;
72 struct flcn_ucode_img ucode_img;
73};
74
75void gm206_init_secure_pmu(struct gpu_ops *gops);
76int gm206_alloc_blob_space(struct gk20a *g,
77 size_t size, struct mem_desc *mem);
78void gm206_wpr_info(struct gk20a *g, struct wpr_carveout_info *inf);
79
80#endif /*__ACR_GM206_H_*/