summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
diff options
context:
space:
mode:
authorSupriya <ssharatkumar@nvidia.com>2014-06-13 03:14:27 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:10:14 -0400
commitb7793a493a1fa292a22d5ce84c43ee342b9824b2 (patch)
tree963d128e317d319d2f53aff96420aec17b732bf6 /drivers/gpu/nvgpu/gm20b/acr_gm20b.c
parentc32ac10b0bba400c1e83540a20c5ca210fa48613 (diff)
nvgpu: Host side changes to support HS mode
GM20B changes in PMU boot sequence to support booting in HS mode and LS mode Bug 1509680 Change-Id: I2832eda0efe17dd5e3a8f11dd06e7d4da267be70 Signed-off-by: Supriya <ssharatkumar@nvidia.com> Reviewed-on: http://git-master/r/423140 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Shridhar Rasal <srasal@nvidia.com> Reviewed-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b/acr_gm20b.c')
-rw-r--r--drivers/gpu/nvgpu/gm20b/acr_gm20b.c1325
1 files changed, 1325 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
new file mode 100644
index 00000000..df1bc429
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -0,0 +1,1325 @@
1/*
2 * GM20B ACR
3 *
4 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
5*
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include <linux/delay.h> /* for mdelay */
17#include <linux/firmware.h>
18#include <linux/clk.h>
19#include <linux/module.h>
20#include <linux/debugfs.h>
21#include <linux/dma-mapping.h>
22#include <linux/io.h>
23#include "../../../../arch/arm/mach-tegra/iomap.h"
24
25#include "gk20a/gk20a.h"
26#include "gk20a/pmu_gk20a.h"
27#include "hw_pwr_gm20b.h"
28#include "mc_carveout_reg.h"
29
30/*Defines*/
31#define gm20b_dbg_pmu(fmt, arg...) \
32 gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
33#define GPU_TIMEOUT_DEFAULT 10000
34
35typedef int (*get_ucode_details)(struct gk20a *g, struct flcn_ucode_img *udata);
36
37/*Externs*/
38
39/*Forwards*/
40static int lsfm_discover_ucode_images(struct gk20a *g,
41 struct ls_flcn_mgr *plsfm);
42static int lsfm_add_ucode_img(struct gk20a *g, struct ls_flcn_mgr *plsfm,
43 struct flcn_ucode_img *ucode_image, u32 falcon_id);
44static void lsfm_free_ucode_img_res(struct flcn_ucode_img *p_img);
45static void lsfm_free_nonpmu_ucode_img_res(struct flcn_ucode_img *p_img);
46static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr *plsfm);
47static int lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm,
48 void *nonwpr_addr);
49static int acr_ucode_patch_sig(struct gk20a *g,
50 unsigned int *p_img,
51 unsigned int *p_prod_sig,
52 unsigned int *p_dbg_sig,
53 unsigned int *p_patch_loc,
54 unsigned int *p_patch_ind);
55
56/*Globals*/
57static void __iomem *mc = IO_ADDRESS(TEGRA_MC_BASE);
58get_ucode_details pmu_acr_supp_ucode_list[MAX_SUPPORTED_LSFM] = {
59 pmu_ucode_details,
60};
61
62/*Once is LS mode, cpuctl_alias is only accessible*/
63void start_gm20b_pmu(struct gk20a *g)
64{
65 gk20a_writel(g, pwr_falcon_cpuctl_alias_r(),
66 pwr_falcon_cpuctl_startcpu_f(1));
67}
68
69void gm20b_init_secure_pmu(struct gpu_ops *gops)
70{
71 gops->pmu.pmu_setup_sw = gm20b_pmu_setup_sw;
72 gops->pmu.pmu_setup_hw_and_bootstrap = gm20b_bootstrap_hs_flcn;
73}
74
75static void free_blob_res(struct gk20a *g)
76{
77 /*TODO */
78}
79
80int gm20b_pmu_setup_sw(struct gk20a *g)
81{
82 /*from pmu_gk20a.c*/
83 struct pmu_gk20a *pmu = &g->pmu;
84 struct mm_gk20a *mm = &g->mm;
85 struct vm_gk20a *vm = &mm->pmu.vm;
86 struct device *d = dev_from_gk20a(g);
87 int i, err = 0;
88 u8 *ptr;
89 struct sg_table *sgt_seq_buf;
90 dma_addr_t iova;
91
92 gk20a_dbg_fn("");
93 /* Make any ACR structure settings here if ever need be*/
94
95 if (pmu->sw_ready) {
96 for (i = 0; i < pmu->mutex_cnt; i++) {
97 pmu->mutex[i].id = i;
98 pmu->mutex[i].index = i;
99 }
100 pmu_seq_init(pmu);
101
102 mutex_init(&pmu->elpg_mutex);
103 mutex_init(&pmu->isr_mutex);
104 mutex_init(&pmu->pmu_copy_lock);
105 mutex_init(&pmu->pmu_seq_lock);
106 gk20a_dbg_fn("skip init");
107 goto skip_init;
108 }
109 gm20b_dbg_pmu("gk20a_init_pmu_setup_sw 2\n");
110
111 /* TBD: sysmon subtask */
112
113 if (IS_ENABLED(CONFIG_TEGRA_GK20A_PERFMON))
114 pmu->perfmon_sampling_enabled = true;
115
116 pmu->mutex_cnt = pwr_pmu_mutex__size_1_v();
117 pmu->mutex = kzalloc(pmu->mutex_cnt *
118 sizeof(struct pmu_mutex), GFP_KERNEL);
119 if (!pmu->mutex) {
120 err = -ENOMEM;
121 goto err;
122 }
123
124 for (i = 0; i < pmu->mutex_cnt; i++) {
125 pmu->mutex[i].id = i;
126 pmu->mutex[i].index = i;
127 }
128 gm20b_dbg_pmu("gk20a_init_pmu_setup_sw 3\n");
129
130 pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES *
131 sizeof(struct pmu_sequence), GFP_KERNEL);
132 if (!pmu->seq) {
133 err = -ENOMEM;
134 goto err_free_mutex;
135 }
136
137 pmu_seq_init(pmu);
138 mutex_init(&pmu->elpg_mutex);
139 mutex_init(&pmu->isr_mutex);
140 mutex_init(&pmu->pmu_copy_lock);
141 mutex_init(&pmu->pmu_seq_lock);
142
143 err = prepare_ucode_blob(g);
144 if (err)
145 goto err_free_seq;
146 INIT_WORK(&pmu->pg_init, pmu_setup_hw);
147 pmu->seq_buf.cpuva = dma_alloc_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
148 &iova,
149 GFP_KERNEL);
150 if (!pmu->seq_buf.cpuva) {
151 gk20a_err(d, "failed to allocate memory\n");
152 err = -ENOMEM;
153 goto err_free_blob_res;
154 }
155
156 pmu->seq_buf.iova = iova;
157 err = gk20a_get_sgtable(d, &sgt_seq_buf,
158 pmu->seq_buf.cpuva,
159 pmu->seq_buf.iova,
160 GK20A_PMU_SEQ_BUF_SIZE);
161 if (err) {
162 gk20a_err(d, "failed to allocate sg table\n");
163 goto err_free_seq_buf;
164 }
165
166 pmu->seq_buf.pmu_va = gk20a_gmmu_map(vm, &sgt_seq_buf,
167 GK20A_PMU_SEQ_BUF_SIZE,
168 0, /* flags */
169 gk20a_mem_flag_none);
170 if (!pmu->seq_buf.pmu_va) {
171 gk20a_err(d, "failed to map pmu ucode memory!!");
172 goto err_free_seq_buf_sgt;
173 }
174
175 ptr = (u8 *)pmu->seq_buf.cpuva;
176 if (!ptr) {
177 gk20a_err(d, "failed to map cpu ptr for zbc buffer");
178 goto err_unmap_seq_buf;
179 }
180
181 /* TBD: remove this if ZBC save/restore is handled by PMU
182 * end an empty ZBC sequence for now */
183 ptr[0] = 0x16; /* opcode EXIT */
184 ptr[1] = 0; ptr[2] = 1; ptr[3] = 0;
185 ptr[4] = 0; ptr[5] = 0; ptr[6] = 0; ptr[7] = 0;
186
187 pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE;
188
189 gk20a_dbg_fn("done");
190 gk20a_free_sgtable(&sgt_seq_buf);
191
192 pmu->sw_ready = true;
193
194skip_init:
195 pmu->perfmon_counter.index = 3; /* GR & CE2 */
196 pmu->perfmon_counter.group_id = PMU_DOMAIN_GROUP_PSTATE;
197
198 pmu->remove_support = gk20a_remove_pmu_support;
199 err = gk20a_init_pmu(pmu);
200 if (err) {
201 gk20a_err(d, "failed to set function pointers\n");
202 goto err_unmap_seq_buf;
203 }
204
205 gk20a_dbg_fn("done");
206 return 0;
207
208 err_unmap_seq_buf:
209 gk20a_gmmu_unmap(vm, pmu->seq_buf.pmu_va,
210 GK20A_PMU_SEQ_BUF_SIZE, gk20a_mem_flag_none);
211 err_free_seq_buf_sgt:
212 gk20a_free_sgtable(&sgt_seq_buf);
213 err_free_seq_buf:
214 dma_free_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
215 pmu->seq_buf.cpuva, pmu->seq_buf.iova);
216 pmu->seq_buf.cpuva = NULL;
217 pmu->seq_buf.iova = 0;
218 err_free_blob_res:
219 free_blob_res(g);
220 err_free_seq:
221 kfree(pmu->seq);
222 err_free_mutex:
223 kfree(pmu->mutex);
224 err:
225 gk20a_dbg_fn("fail");
226 return err;
227}
228
229int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img)
230{
231 const struct firmware *pmu_fw;
232 struct pmu_gk20a *pmu = &g->pmu;
233 struct lsf_ucode_desc *lsf_desc;
234 int err;
235 gm20b_dbg_pmu("requesting PMU ucode in GM20B\n");
236 pmu_fw = gk20a_request_firmware(g, GM20B_PMU_UCODE_IMAGE);
237 if (!pmu_fw) {
238 gk20a_err(dev_from_gk20a(g), "failed to load pmu ucode!!");
239 gm20b_dbg_pmu("requesting PMU ucode in GM20B failed\n");
240 return -ENOENT;
241 }
242 gm20b_dbg_pmu("Loaded PMU ucode in for blob preparation");
243
244 pmu->desc = (struct pmu_ucode_desc *)pmu_fw->data;
245 pmu->ucode_image = (u32 *)((u8 *)pmu->desc +
246 pmu->desc->descriptor_size);
247 err = gk20a_init_pmu(pmu);
248 if (err) {
249 gm20b_dbg_pmu("failed to set function pointers\n");
250 return err;
251 }
252
253 lsf_desc = kzalloc(sizeof(struct lsf_ucode_desc), GFP_KERNEL);
254 if (!lsf_desc)
255 return -ENOMEM;
256 lsf_desc->falcon_id = LSF_FALCON_ID_PMU;
257
258 p_img->desc = pmu->desc;
259 p_img->data = pmu->ucode_image;
260 p_img->data_size = pmu->desc->image_size;
261 p_img->fw_ver = NULL;
262 p_img->header = NULL;
263 p_img->lsf_desc = (struct lsf_ucode_desc *)lsf_desc;
264 gm20b_dbg_pmu("requesting PMU ucode in GM20B exit\n");
265 return 0;
266}
267
268int prepare_ucode_blob(struct gk20a *g)
269{
270 struct device *d = dev_from_gk20a(g);
271 dma_addr_t iova;
272 u32 status;
273 void *nonwpr_addr;
274 u64 nonwpr_pmu_va;
275 struct ls_flcn_mgr lsfm_l, *plsfm;
276 struct sg_table *sgt_nonwpr;
277 struct mm_gk20a *mm = &g->mm;
278 struct vm_gk20a *vm = &mm->pmu.vm;
279
280 plsfm = &lsfm_l;
281 memset((void *)plsfm, 0, sizeof(struct ls_flcn_mgr));
282 gm20b_dbg_pmu("fetching GMMU regs\n");
283 gm20b_mm_mmu_vpr_info_fetch(g);
284
285 /* Discover all managed falcons*/
286 status = lsfm_discover_ucode_images(g, plsfm);
287 gm20b_dbg_pmu(" Managed Falcon cnt %d\n", plsfm->managed_flcn_cnt);
288 if (status != 0)
289 return status;
290
291 if (plsfm->managed_flcn_cnt) {
292 /* Generate WPR requirements*/
293 status = lsf_gen_wpr_requirements(g, plsfm);
294 if (status != 0)
295 return status;
296
297 /*Alloc memory to hold ucode blob contents*/
298 nonwpr_addr = dma_alloc_coherent(d, plsfm->wpr_size, &iova,
299 GFP_KERNEL);
300 if (nonwpr_addr == NULL)
301 return -ENOMEM;
302 status = gk20a_get_sgtable(d, &sgt_nonwpr,
303 nonwpr_addr,
304 iova,
305 plsfm->wpr_size);
306 if (status) {
307 gk20a_err(d, "failed allocate sg table for nonwpr\n");
308 status = -ENOMEM;
309 goto err_free_nonwpr_addr;
310 }
311
312 nonwpr_pmu_va = gk20a_gmmu_map(vm, &sgt_nonwpr,
313 plsfm->wpr_size,
314 0, /* flags */
315 gk20a_mem_flag_read_only);
316 if (!nonwpr_pmu_va) {
317 gk20a_err(d, "failed to map pmu ucode memory!!");
318 status = -ENOMEM;
319 goto err_free_nonwpr_sgt;
320 }
321 gm20b_dbg_pmu("managed LS falcon %d, WPR size %d bytes.\n",
322 plsfm->managed_flcn_cnt, plsfm->wpr_size);
323 lsfm_init_wpr_contents(g, plsfm, nonwpr_addr);
324 g->acr.ucode_blob_start = nonwpr_pmu_va;
325 g->acr.ucode_blob_size = plsfm->wpr_size;
326 gm20b_dbg_pmu("32 bit ucode_start %x, size %d\n",
327 (u32)nonwpr_pmu_va, plsfm->wpr_size);
328 gm20b_dbg_pmu("base reg carveout 2:%x\n",
329 readl(mc + MC_SECURITY_CARVEOUT2_BOM_0));
330 gm20b_dbg_pmu("base reg carveout 3:%x\n",
331 readl(mc + MC_SECURITY_CARVEOUT3_BOM_0));
332 } else {
333 gm20b_dbg_pmu("LSFM is managing no falcons.\n");
334 }
335 gm20b_dbg_pmu("prepare ucode blob return 0\n");
336 return 0;
337err_free_nonwpr_sgt:
338 gk20a_free_sgtable(&sgt_nonwpr);
339err_free_nonwpr_addr:
340 dma_free_coherent(d, plsfm->wpr_size,
341 nonwpr_addr, iova);
342 nonwpr_addr = NULL;
343 iova = 0;
344 gm20b_dbg_pmu("prepare ucode blob return %x\n", status);
345 return status;
346}
347
348u8 lsfm_falcon_disabled(struct gk20a *g, struct ls_flcn_mgr *plsfm,
349 u32 falcon_id)
350{
351 return (plsfm->disable_mask >> falcon_id) & 0x1;
352}
353
354/* Discover all managed falcon ucode images */
355static int lsfm_discover_ucode_images(struct gk20a *g,
356 struct ls_flcn_mgr *plsfm)
357{
358 struct pmu_gk20a *pmu = &g->pmu;
359 struct flcn_ucode_img ucode_img;
360 u32 falcon_id;
361 u32 i;
362 int status;
363
364 /* LSFM requires a secure PMU, discover it first.*/
365 /* Obtain the PMU ucode image and add it to the list if required*/
366 memset(&ucode_img, 0, sizeof(ucode_img));
367 status = pmu_ucode_details(g, &ucode_img);
368 if (status == 0) {
369 if (ucode_img.lsf_desc != NULL) {
370 /* The falonId is formed by grabbing the static base
371 * falonId from the image and adding the
372 * engine-designated falcon instance.*/
373 pmu->pmu_mode |= PMU_SECURE_MODE;
374 falcon_id = ucode_img.lsf_desc->falcon_id +
375 ucode_img.flcn_inst;
376
377 if (!lsfm_falcon_disabled(g, plsfm, falcon_id)) {
378 pmu->falcon_id = falcon_id;
379 if (lsfm_add_ucode_img(g, plsfm, &ucode_img,
380 pmu->falcon_id) == 0)
381 pmu->pmu_mode |= PMU_LSFM_MANAGED;
382
383 plsfm->managed_flcn_cnt++;
384 } else {
385 gm20b_dbg_pmu("id not managed %d\n",
386 ucode_img.lsf_desc->falcon_id);
387 }
388 }
389
390 /*Free any ucode image resources if not managing this falcon*/
391 if (!(pmu->pmu_mode & PMU_LSFM_MANAGED)) {
392 gm20b_dbg_pmu("pmu is not LSFM managed\n");
393 lsfm_free_ucode_img_res(&ucode_img);
394 }
395 }
396
397 /* Enumerate all constructed falcon objects,
398 as we need the ucode image info and total falcon count.*/
399
400 /*0th index is always PMU which is already handled in earlier
401 if condition*/
402 for (i = 1; i < MAX_SUPPORTED_LSFM; i++) {
403 memset(&ucode_img, 0, sizeof(ucode_img));
404 if (pmu_acr_supp_ucode_list[i](g, &ucode_img) == 0) {
405 if (ucode_img.lsf_desc != NULL) {
406 /* We have engine sigs, ensure that this falcon
407 is aware of the secure mode expectations
408 (ACR status)*/
409
410 /* falon_id is formed by grabbing the static
411 base falonId from the image and adding the
412 engine-designated falcon instance. */
413 falcon_id = ucode_img.lsf_desc->falcon_id +
414 ucode_img.flcn_inst;
415
416 if (!lsfm_falcon_disabled(g, plsfm,
417 falcon_id)) {
418 /* Do not manage non-FB ucode*/
419 if (lsfm_add_ucode_img(g,
420 plsfm, &ucode_img, falcon_id)
421 == 0)
422 plsfm->managed_flcn_cnt++;
423 } else {
424 gm20b_dbg_pmu("not managed %d\n",
425 ucode_img.lsf_desc->falcon_id);
426 lsfm_free_nonpmu_ucode_img_res(
427 &ucode_img);
428 }
429 }
430 } else {
431 /* Consumed all available falcon objects */
432 gm20b_dbg_pmu("Done checking for ucodes %d\n", i);
433 break;
434 }
435 }
436 return 0;
437}
438
439
440int pmu_populate_loader_cfg(struct gk20a *g,
441 struct lsfm_managed_ucode_img *lsfm,
442 union flcn_bl_generic_desc *p_bl_gen_desc, u32 *p_bl_gen_desc_size)
443{
444 struct pmu_gk20a *pmu = &g->pmu;
445 struct flcn_ucode_img *p_img = &(lsfm->ucode_img);
446 struct loader_config *ldr_cfg =
447 (struct loader_config *)(&p_bl_gen_desc->loader_cfg);
448 struct gk20a_platform *platform = platform_get_drvdata(g->dev);
449 u64 addr_base;
450 struct pmu_ucode_desc *desc;
451 u64 addr_code, addr_data;
452 u32 addr_args;
453
454 if (p_img->desc == NULL) /*This means its a header based ucode,
455 and so we do not fill BL gen desc structure*/
456 return -EINVAL;
457 desc = p_img->desc;
458 /*
459 Calculate physical and virtual addresses for various portions of
460 the PMU ucode image
461 Calculate the 32-bit addresses for the application code, application
462 data, and bootloader code. These values are all based on IM_BASE.
463 The 32-bit addresses will be the upper 32-bits of the virtual or
464 physical addresses of each respective segment.
465 */
466 addr_base = lsfm->lsb_header.ucode_off;
467 addr_base += readl(mc + MC_SECURITY_CARVEOUT3_BOM_0);
468 gm20b_dbg_pmu("pmu loader cfg u32 addrbase %x\n", (u32)addr_base);
469 /*From linux*/
470 addr_code = u64_lo32((addr_base +
471 desc->app_start_offset +
472 desc->app_resident_code_offset) >> 8);
473 gm20b_dbg_pmu("app start %d app res code off %d\n",
474 desc->app_start_offset, desc->app_resident_code_offset);
475 addr_data = u64_lo32((addr_base +
476 desc->app_start_offset +
477 desc->app_resident_data_offset) >> 8);
478 gm20b_dbg_pmu("app res data offset%d\n",
479 desc->app_resident_data_offset);
480 gm20b_dbg_pmu("bl start off %d\n", desc->bootloader_start_offset);
481
482 addr_args = ((pwr_falcon_hwcfg_dmem_size_v(
483 gk20a_readl(g, pwr_falcon_hwcfg_r())))
484 << GK20A_PMU_DMEM_BLKSIZE2);
485 addr_args -= g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu);
486
487 gm20b_dbg_pmu("addr_args %x\n", addr_args);
488
489 /* Populate the loader_config state*/
490 ldr_cfg->dma_idx = 2;
491 ldr_cfg->code_dma_base = addr_code;
492 ldr_cfg->code_size_total = desc->app_size;
493 ldr_cfg->code_size_to_load = desc->app_resident_code_size;
494 ldr_cfg->code_entry_point = desc->app_imem_entry;
495 ldr_cfg->data_dma_base = addr_data;
496 ldr_cfg->data_size = desc->app_resident_data_size;
497 ldr_cfg->overlay_dma_base = addr_code;
498
499 /* Update the argc/argv members*/
500 ldr_cfg->argc = 1;
501 ldr_cfg->argv = addr_args;
502
503 /*Copying pmu cmdline args*/
504 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu,
505 clk_get_rate(platform->clk[1]));
506 g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode(pmu, 1);
507 pmu_copy_to_dmem(pmu, addr_args,
508 (u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)),
509 g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0);
510 *p_bl_gen_desc_size = sizeof(p_bl_gen_desc->loader_cfg);
511 return 0;
512}
513
514int flcn_populate_bl_dmem_desc(struct gk20a *g,
515 struct lsfm_managed_ucode_img *lsfm,
516 union flcn_bl_generic_desc *p_bl_gen_desc, u32 *p_bl_gen_desc_size)
517{
518
519 struct flcn_ucode_img *p_img = &(lsfm->ucode_img);
520 struct flcn_bl_dmem_desc *ldr_cfg =
521 (struct flcn_bl_dmem_desc *)(&p_bl_gen_desc->loader_cfg);
522 u64 addr_base;
523 struct pmu_ucode_desc *desc;
524 u64 addr_code, addr_data;
525
526 if (p_img->desc == NULL) /*This means its a header based ucode,
527 and so we do not fill BL gen desc structure*/
528 return -EINVAL;
529 desc = p_img->desc;
530
531 /*
532 Calculate physical and virtual addresses for various portions of
533 the PMU ucode image
534 Calculate the 32-bit addresses for the application code, application
535 data, and bootloader code. These values are all based on IM_BASE.
536 The 32-bit addresses will be the upper 32-bits of the virtual or
537 physical addresses of each respective segment.
538 */
539 addr_base = lsfm->lsb_header.ucode_off;
540 addr_base += readl(mc + MC_SECURITY_CARVEOUT3_BOM_0);
541 gm20b_dbg_pmu("gen loader cfg %x u32 addrbase %x ID\n", (u32)addr_base,
542 lsfm->wpr_header.falcon_id);
543 addr_code = u64_lo32((addr_base +
544 desc->app_start_offset +
545 desc->app_resident_code_offset) >> 8);
546 addr_data = u64_lo32((addr_base +
547 desc->app_start_offset +
548 desc->app_resident_data_offset) >> 8);
549
550 gm20b_dbg_pmu("gen cfg %x u32 addrcode %x & data %x load offset %xID\n",
551 (u32)addr_code, (u32)addr_data, desc->bootloader_start_offset,
552 lsfm->wpr_header.falcon_id);
553
554 /* Populate the LOADER_CONFIG state */
555 memset((void *) ldr_cfg, 0, sizeof(struct flcn_bl_dmem_desc));
556 ldr_cfg->ctx_dma = 0;
557 ldr_cfg->code_dma_base = addr_code;
558 ldr_cfg->non_sec_code_size = desc->app_resident_code_size;
559 ldr_cfg->data_dma_base = addr_data;
560 ldr_cfg->data_size = desc->app_resident_data_size;
561 ldr_cfg->code_entry_point = desc->app_imem_entry;
562 *p_bl_gen_desc_size = sizeof(p_bl_gen_desc->bl_dmem_desc);
563 return 0;
564}
565
566/* Populate falcon boot loader generic desc.*/
567static int lsfm_fill_flcn_bl_gen_desc(struct gk20a *g,
568 struct lsfm_managed_ucode_img *pnode)
569{
570
571 struct pmu_gk20a *pmu = &g->pmu;
572 if (pnode->wpr_header.falcon_id != pmu->falcon_id) {
573 gm20b_dbg_pmu("non pmu. write flcn bl gen desc\n");
574 flcn_populate_bl_dmem_desc(g, pnode, &pnode->bl_gen_desc,
575 &pnode->bl_gen_desc_size);
576 return 0;
577 }
578
579 if (pmu->pmu_mode & PMU_LSFM_MANAGED) {
580 gm20b_dbg_pmu("pmu write flcn bl gen desc\n");
581 if (pnode->wpr_header.falcon_id == pmu->falcon_id)
582 return pmu_populate_loader_cfg(g, pnode,
583 &pnode->bl_gen_desc, &pnode->bl_gen_desc_size);
584 }
585
586 /* Failed to find the falcon requested. */
587 return -ENOENT;
588}
589
590/* Initialize WPR contents */
591static int lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm,
592 void *nonwpr_addr)
593{
594
595 int status = 0;
596 union flcn_bl_generic_desc *nonwpr_bl_gen_desc;
597 if (nonwpr_addr == NULL) {
598 status = -ENOMEM;
599 } else {
600 struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list;
601 struct lsf_wpr_header *wpr_hdr;
602 struct lsf_lsb_header *lsb_hdr;
603 void *ucode_off;
604 u32 i;
605
606 /* The WPR array is at the base of the WPR */
607 wpr_hdr = (struct lsf_wpr_header *)nonwpr_addr;
608 pnode = plsfm->ucode_img_list;
609 i = 0;
610
611 /*
612 * Walk the managed falcons, flush WPR and LSB headers to FB.
613 * flush any bl args to the storage area relative to the
614 * ucode image (appended on the end as a DMEM area).
615 */
616 while (pnode) {
617 /* Flush WPR header to memory*/
618 memcpy(&wpr_hdr[i], &pnode->wpr_header,
619 sizeof(struct lsf_wpr_header));
620 gm20b_dbg_pmu("wpr header as in memory and pnode\n");
621 gm20b_dbg_pmu("falconid :%d %d\n",
622 pnode->wpr_header.falcon_id,
623 wpr_hdr[i].falcon_id);
624 gm20b_dbg_pmu("lsb_offset :%x %x\n",
625 pnode->wpr_header.lsb_offset,
626 wpr_hdr[i].lsb_offset);
627 gm20b_dbg_pmu("bootstrap_owner :%d %d\n",
628 pnode->wpr_header.bootstrap_owner,
629 wpr_hdr[i].bootstrap_owner);
630 gm20b_dbg_pmu("lazy_bootstrap :%d %d\n",
631 pnode->wpr_header.lazy_bootstrap,
632 wpr_hdr[i].lazy_bootstrap);
633 gm20b_dbg_pmu("status :%d %d\n",
634 pnode->wpr_header.status, wpr_hdr[i].status);
635
636 /*Flush LSB header to memory*/
637 lsb_hdr = (struct lsf_lsb_header *)((u8 *)nonwpr_addr +
638 pnode->wpr_header.lsb_offset);
639 memcpy(lsb_hdr, &pnode->lsb_header,
640 sizeof(struct lsf_lsb_header));
641 gm20b_dbg_pmu("lsb header as in memory and pnode\n");
642 gm20b_dbg_pmu("ucode_off :%x %x\n",
643 pnode->lsb_header.ucode_off,
644 lsb_hdr->ucode_off);
645 gm20b_dbg_pmu("ucode_size :%x %x\n",
646 pnode->lsb_header.ucode_size,
647 lsb_hdr->ucode_size);
648 gm20b_dbg_pmu("data_size :%x %x\n",
649 pnode->lsb_header.data_size,
650 lsb_hdr->data_size);
651 gm20b_dbg_pmu("bl_code_size :%x %x\n",
652 pnode->lsb_header.bl_code_size,
653 lsb_hdr->bl_code_size);
654 gm20b_dbg_pmu("bl_imem_off :%x %x\n",
655 pnode->lsb_header.bl_imem_off,
656 lsb_hdr->bl_imem_off);
657 gm20b_dbg_pmu("bl_data_off :%x %x\n",
658 pnode->lsb_header.bl_data_off,
659 lsb_hdr->bl_data_off);
660 gm20b_dbg_pmu("bl_data_size :%x %x\n",
661 pnode->lsb_header.bl_data_size,
662 lsb_hdr->bl_data_size);
663 gm20b_dbg_pmu("flags :%x %x\n",
664 pnode->lsb_header.flags, lsb_hdr->flags);
665
666 /*If this falcon has a boot loader and related args,
667 * flush them.*/
668 if (!pnode->ucode_img.header) {
669 nonwpr_bl_gen_desc =
670 (union flcn_bl_generic_desc *)
671 ((u8 *)nonwpr_addr +
672 pnode->lsb_header.bl_data_off);
673
674 /*Populate gen bl and flush to memory*/
675 lsfm_fill_flcn_bl_gen_desc(g, pnode);
676 memcpy(nonwpr_bl_gen_desc, &pnode->bl_gen_desc,
677 pnode->bl_gen_desc_size);
678 }
679 ucode_off = (void *)(pnode->lsb_header.ucode_off +
680 (u8 *)nonwpr_addr);
681 /*Copying of ucode*/
682 memcpy(ucode_off, pnode->ucode_img.data,
683 pnode->ucode_img.data_size);
684 pnode = pnode->next;
685 i++;
686 }
687
688 /* Tag the terminator WPR header with an invalid falcon ID. */
689 gk20a_mem_wr32(&wpr_hdr[plsfm->managed_flcn_cnt].falcon_id,
690 1, LSF_FALCON_ID_INVALID);
691 }
692 return status;
693}
694
695/*!
696 * lsfm_parse_no_loader_ucode: parses UCODE header of falcon
697 *
698 * @param[in] p_ucodehdr : UCODE header
699 * @param[out] lsb_hdr : updates values in LSB header
700 *
701 * @return 0
702 */
703static int lsfm_parse_no_loader_ucode(u32 *p_ucodehdr,
704 struct lsf_lsb_header *lsb_hdr)
705{
706
707 u32 code_size = 0;
708 u32 data_size = 0;
709 u32 i = 0;
710 u32 total_apps = p_ucodehdr[FLCN_NL_UCODE_HDR_NUM_APPS_IND];
711
712 /* Lets calculate code size*/
713 code_size += p_ucodehdr[FLCN_NL_UCODE_HDR_OS_CODE_SIZE_IND];
714 for (i = 0; i < total_apps; i++) {
715 code_size += p_ucodehdr[FLCN_NL_UCODE_HDR_APP_CODE_SIZE_IND
716 (total_apps, i)];
717 }
718 code_size += p_ucodehdr[FLCN_NL_UCODE_HDR_OS_OVL_SIZE_IND(total_apps)];
719
720 /* Calculate data size*/
721 data_size += p_ucodehdr[FLCN_NL_UCODE_HDR_OS_DATA_SIZE_IND];
722 for (i = 0; i < total_apps; i++) {
723 data_size += p_ucodehdr[FLCN_NL_UCODE_HDR_APP_DATA_SIZE_IND
724 (total_apps, i)];
725 }
726
727 lsb_hdr->ucode_size = code_size;
728 lsb_hdr->data_size = data_size;
729 lsb_hdr->bl_code_size = p_ucodehdr[FLCN_NL_UCODE_HDR_OS_CODE_SIZE_IND];
730 lsb_hdr->bl_imem_off = 0;
731 lsb_hdr->bl_data_off = p_ucodehdr[FLCN_NL_UCODE_HDR_OS_DATA_OFF_IND];
732 lsb_hdr->bl_data_size = p_ucodehdr[FLCN_NL_UCODE_HDR_OS_DATA_SIZE_IND];
733 return 0;
734}
735
736/*!
737 * @brief lsfm_fill_static_lsb_hdr_info
738 * Populate static LSB header infomation using the provided ucode image
739 */
740static void lsfm_fill_static_lsb_hdr_info(struct gk20a *g,
741 u32 falcon_id, struct lsfm_managed_ucode_img *pnode)
742{
743
744 struct pmu_gk20a *pmu = &g->pmu;
745 u32 data = 0;
746
747 if (pnode->ucode_img.lsf_desc)
748 memcpy(&pnode->lsb_header.signature, pnode->ucode_img.lsf_desc,
749 sizeof(struct lsf_ucode_desc));
750 pnode->lsb_header.ucode_size = pnode->ucode_img.data_size;
751
752 /* The remainder of the LSB depends on the loader usage */
753 if (pnode->ucode_img.header) {
754 /* Does not use a loader */
755 pnode->lsb_header.data_size = 0;
756 pnode->lsb_header.bl_code_size = 0;
757 pnode->lsb_header.bl_data_off = 0;
758 pnode->lsb_header.bl_data_size = 0;
759
760 lsfm_parse_no_loader_ucode(pnode->ucode_img.header,
761 &(pnode->lsb_header));
762
763 /* Load the first 256 bytes of IMEM. */
764 /* Set LOAD_CODE_AT_0 and DMACTL_REQ_CTX.
765 True for all method based falcons */
766 data = NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_TRUE |
767 NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE;
768 pnode->lsb_header.flags = data;
769 } else {
770 /* Uses a loader. that is has a desc */
771 pnode->lsb_header.data_size = 0;
772
773 /* The loader code size is already aligned (padded) such that
774 the code following it is aligned, but the size in the image
775 desc is not, bloat it up to be on a 256 byte alignment. */
776 pnode->lsb_header.bl_code_size = ALIGN(
777 pnode->ucode_img.desc->bootloader_size,
778 LSF_BL_CODE_SIZE_ALIGNMENT);
779 /* Though the BL is located at 0th offset of the image, the VA
780 is different to make sure that it doesnt collide the actual OS
781 VA range */
782 pnode->lsb_header.bl_imem_off =
783 pnode->ucode_img.desc->bootloader_imem_offset;
784
785 /* TODO: OBJFLCN should export properties using which the below
786 flags should be populated.*/
787 pnode->lsb_header.flags = 0;
788
789 if (falcon_id == pmu->falcon_id) {
790 data = NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE;
791 pnode->lsb_header.flags = data;
792 }
793 }
794}
795
796/* Adds a ucode image to the list of managed ucode images managed. */
797static int lsfm_add_ucode_img(struct gk20a *g, struct ls_flcn_mgr *plsfm,
798 struct flcn_ucode_img *ucode_image, u32 falcon_id)
799{
800
801 struct lsfm_managed_ucode_img *pnode;
802 pnode = kzalloc(sizeof(struct lsfm_managed_ucode_img), GFP_KERNEL);
803 if (pnode == NULL)
804 return -ENOMEM;
805
806 /* Keep a copy of the ucode image info locally */
807 memcpy(&pnode->ucode_img, ucode_image, sizeof(struct flcn_ucode_img));
808
809 /* Fill in static WPR header info*/
810 pnode->wpr_header.falcon_id = falcon_id;
811 pnode->wpr_header.bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;
812 pnode->wpr_header.status = LSF_IMAGE_STATUS_COPY;
813
814 /*TODO to check if PDB_PROP_FLCN_LAZY_BOOTSTRAP is to be supported by
815 Android */
816 /* Fill in static LSB header info elsewhere */
817 lsfm_fill_static_lsb_hdr_info(g, falcon_id, pnode);
818 pnode->next = plsfm->ucode_img_list;
819 plsfm->ucode_img_list = pnode;
820 return 0;
821}
822
823/* Free any ucode image structure resources*/
824static void lsfm_free_ucode_img_res(struct flcn_ucode_img *p_img)
825{
826 if (p_img->lsf_desc != NULL) {
827 kfree(p_img->lsf_desc);
828 p_img->lsf_desc = NULL;
829 }
830}
831
832/* Free any ucode image structure resources*/
833static void lsfm_free_nonpmu_ucode_img_res(struct flcn_ucode_img *p_img)
834{
835 if (p_img->lsf_desc != NULL) {
836 kfree(p_img->lsf_desc);
837 p_img->lsf_desc = NULL;
838 }
839 if (p_img->desc != NULL) {
840 kfree(p_img->desc);
841 p_img->desc = NULL;
842 }
843}
844
845
846/* Generate WPR requirements for ACR allocation request */
847static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr *plsfm)
848{
849 struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list;
850 u32 wpr_offset;
851
852 /* Calculate WPR size required */
853
854 /* Start with an array of WPR headers at the base of the WPR.
855 The expectation here is that the secure falcon will do a single DMA
856 read of this array and cache it internally so it's OK to pack these.
857 Also, we add 1 to the falcon count to indicate the end of the array.*/
858 wpr_offset = sizeof(struct lsf_wpr_header) *
859 (plsfm->managed_flcn_cnt+1);
860
861 /* Walk the managed falcons, accounting for the LSB structs
862 as well as the ucode images. */
863 while (pnode) {
864 /* Align, save off, and include an LSB header size */
865 wpr_offset = ALIGN(wpr_offset,
866 LSF_LSB_HEADER_ALIGNMENT);
867 pnode->wpr_header.lsb_offset = wpr_offset;
868 wpr_offset += sizeof(struct lsf_lsb_header);
869
870 /* Align, save off, and include the original (static)
871 ucode image size */
872 wpr_offset = ALIGN(wpr_offset,
873 LSF_UCODE_DATA_ALIGNMENT);
874 pnode->lsb_header.ucode_off = wpr_offset;
875 wpr_offset += pnode->ucode_img.data_size;
876
877 /* For falcons that use a boot loader (BL), we append a loader
878 desc structure on the end of the ucode image and consider this
879 the boot loader data. The host will then copy the loader desc
880 args to this space within the WPR region (before locking down)
881 and the HS bin will then copy them to DMEM 0 for the loader. */
882 if (!pnode->ucode_img.header) {
883 /* Track the size for LSB details filled in later
884 Note that at this point we don't know what kind of i
885 boot loader desc, so we just take the size of the
886 generic one, which is the largest it will will ever be.
887 */
888 /* Align (size bloat) and save off generic
889 descriptor size*/
890 pnode->lsb_header.bl_data_size = ALIGN(
891 sizeof(pnode->bl_gen_desc),
892 LSF_BL_DATA_SIZE_ALIGNMENT);
893
894 /*Align, save off, and include the additional BL data*/
895 wpr_offset = ALIGN(wpr_offset,
896 LSF_BL_DATA_ALIGNMENT);
897 pnode->lsb_header.bl_data_off = wpr_offset;
898 wpr_offset += pnode->lsb_header.bl_data_size;
899 } else {
900 /* bl_data_off is already assigned in static
901 information. But that is from start of the image */
902 pnode->lsb_header.bl_data_off +=
903 (wpr_offset - pnode->ucode_img.data_size);
904 }
905
906 /* Finally, update ucode surface size to include updates */
907 pnode->full_ucode_size = wpr_offset -
908 pnode->lsb_header.ucode_off;
909 pnode = pnode->next;
910 }
911 plsfm->wpr_size = wpr_offset;
912 return 0;
913}
914
915/*Loads ACR bin to FB mem and bootstraps PMU with bootloader code
916 * start and end are addresses of ucode blob in non-WPR region*/
917int gm20b_bootstrap_hs_flcn(struct gk20a *g)
918{
919 struct mm_gk20a *mm = &g->mm;
920 struct vm_gk20a *vm = &mm->pmu.vm;
921 struct device *d = dev_from_gk20a(g);
922 int i, err = 0;
923 struct sg_table *sgt_pmu_ucode;
924 dma_addr_t iova;
925 u64 *pacr_ucode_cpuva = NULL, pacr_ucode_pmu_va, *acr_dmem;
926 u32 img_size_in_bytes;
927 struct flcn_bl_dmem_desc bl_dmem_desc;
928 u32 status, start, size;
929 const struct firmware *acr_fw;
930 struct acr_gm20b *acr = &g->acr;
931 u32 *acr_ucode_header_t210_load;
932 u32 *acr_ucode_data_t210_load;
933
934 start = g->acr.ucode_blob_start;
935 size = g->acr.ucode_blob_size;
936
937 gm20b_dbg_pmu("");
938
939 acr_fw = gk20a_request_firmware(g, GM20B_HSBIN_PMU_UCODE_IMAGE);
940 if (!acr_fw) {
941 gk20a_err(dev_from_gk20a(g), "failed to load pmu ucode!!");
942 return -ENOENT;
943 }
944 acr->hsbin_hdr = (struct bin_hdr *)acr_fw->data;
945 acr->fw_hdr = (struct acr_fw_header *)(acr_fw->data +
946 acr->hsbin_hdr->header_offset);
947 acr_ucode_data_t210_load = (u32 *)(acr_fw->data +
948 acr->hsbin_hdr->data_offset);
949 acr_ucode_header_t210_load = (u32 *)(acr_fw->data +
950 acr->fw_hdr->hdr_offset);
951 img_size_in_bytes = ALIGN((acr->hsbin_hdr->data_size), 256);
952
953 /* Lets patch the signatures first.. */
954 if (acr_ucode_patch_sig(g, acr_ucode_data_t210_load,
955 (u32 *)(acr_fw->data + acr->fw_hdr->sig_prod_offset),
956 (u32 *)(acr_fw->data + acr->fw_hdr->sig_dbg_offset),
957 (u32 *)(acr_fw->data + acr->fw_hdr->patch_loc),
958 (u32 *)(acr_fw->data + acr->fw_hdr->patch_sig)) < 0)
959 return -1;
960 pacr_ucode_cpuva = dma_alloc_coherent(d, img_size_in_bytes, &iova,
961 GFP_KERNEL);
962 if (!pacr_ucode_cpuva)
963 return -ENOMEM;
964
965 err = gk20a_get_sgtable(d, &sgt_pmu_ucode,
966 pacr_ucode_cpuva,
967 iova,
968 img_size_in_bytes);
969 if (err) {
970 gk20a_err(d, "failed to allocate sg table\n");
971 err = -ENOMEM;
972 goto err_free_acr_buf;
973 }
974 pacr_ucode_pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode,
975 img_size_in_bytes,
976 0, /* flags */
977 gk20a_mem_flag_read_only);
978 if (!pacr_ucode_pmu_va) {
979 gk20a_err(d, "failed to map pmu ucode memory!!");
980 err = -ENOMEM;
981 goto err_free_ucode_sgt;
982 }
983 acr_dmem = (u64 *)
984 &(((u8 *)acr_ucode_data_t210_load)[
985 acr_ucode_header_t210_load[2]]);
986 ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_start =
987 start;
988 ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_size =
989 size;
990 ((struct flcn_acr_desc *)acr_dmem)->wpr_region_id = 2;
991 ((struct flcn_acr_desc *)acr_dmem)->regions.no_regions = 2;
992 ((struct flcn_acr_desc *)acr_dmem)->regions.region_props[0].region_id
993 = 2;
994 ((struct flcn_acr_desc *)acr_dmem)->regions.region_props[1].region_id
995 = 3;
996 ((struct flcn_acr_desc *)acr_dmem)->wpr_offset = 0;
997
998 for (i = 0; i < (img_size_in_bytes/4); i++) {
999 gk20a_mem_wr32(pacr_ucode_cpuva, i,
1000 acr_ucode_data_t210_load[i]);
1001 }
1002 /*
1003 * In order to execute this binary, we will be using PMU HAL to run
1004 * a bootloader which will load this image into PMU IMEM/DMEM.
1005 * Fill up the bootloader descriptor for PMU HAL to use..
1006 * TODO: Use standard descriptor which the generic bootloader is
1007 * checked in.
1008 */
1009
1010 bl_dmem_desc.signature[0] = 0;
1011 bl_dmem_desc.signature[1] = 0;
1012 bl_dmem_desc.signature[2] = 0;
1013 bl_dmem_desc.signature[3] = 0;
1014 bl_dmem_desc.ctx_dma = GK20A_PMU_DMAIDX_UCODE;
1015 bl_dmem_desc.code_dma_base =
1016 (unsigned int)(((u64)pacr_ucode_pmu_va >> 8));
1017 bl_dmem_desc.non_sec_code_off = acr_ucode_header_t210_load[0];
1018 bl_dmem_desc.non_sec_code_size = acr_ucode_header_t210_load[1];
1019 bl_dmem_desc.sec_code_off = acr_ucode_header_t210_load[5];
1020 bl_dmem_desc.sec_code_size = acr_ucode_header_t210_load[6];
1021 bl_dmem_desc.code_entry_point = 0; /* Start at 0th offset */
1022 bl_dmem_desc.data_dma_base =
1023 bl_dmem_desc.code_dma_base +
1024 ((acr_ucode_header_t210_load[2]) >> 8);
1025 bl_dmem_desc.data_size = acr_ucode_header_t210_load[3];
1026 status = pmu_exec_gen_bl(g, &bl_dmem_desc, 1);
1027 if (status != 0) {
1028 err = status;
1029 goto err_free_ucode_map;
1030 }
1031 return 0;
1032err_free_ucode_map:
1033 gk20a_gmmu_unmap(vm, pacr_ucode_pmu_va,
1034 img_size_in_bytes, gk20a_mem_flag_none);
1035err_free_ucode_sgt:
1036 gk20a_free_sgtable(&sgt_pmu_ucode);
1037err_free_acr_buf:
1038 dma_free_coherent(d, img_size_in_bytes,
1039 pacr_ucode_cpuva, iova);
1040 return err;
1041}
1042
1043u8 pmu_is_debug_mode_en(struct gk20a *g)
1044{
1045 int ctl_stat = gk20a_readl(g, pwr_pmu_scpctl_stat_r());
1046 return 1;
1047/*TODO return (ctl_stat & pwr_pmu_scpctl_stat_debug_mode_m());*/
1048}
1049
1050/*
1051 * @brief Patch signatures into ucode image
1052 */
1053static int
1054acr_ucode_patch_sig(struct gk20a *g,
1055 unsigned int *p_img,
1056 unsigned int *p_prod_sig,
1057 unsigned int *p_dbg_sig,
1058 unsigned int *p_patch_loc,
1059 unsigned int *p_patch_ind)
1060{
1061 int i, *p_sig;
1062 gm20b_dbg_pmu("");
1063
1064 if (!pmu_is_debug_mode_en(g)) {
1065 p_sig = p_prod_sig;
1066 gm20b_dbg_pmu("PRODUCTION MODE\n");
1067 } else {
1068 p_sig = p_dbg_sig;
1069 gm20b_dbg_pmu("DEBUG MODE\n");
1070 }
1071
1072 /* Patching logic:*/
1073 for (i = 0; i < sizeof(*p_patch_loc)>>2; i++) {
1074 p_img[(p_patch_loc[i]>>2)] = p_sig[(p_patch_ind[i]<<2)];
1075 p_img[(p_patch_loc[i]>>2)+1] = p_sig[(p_patch_ind[i]<<2)+1];
1076 p_img[(p_patch_loc[i]>>2)+2] = p_sig[(p_patch_ind[i]<<2)+2];
1077 p_img[(p_patch_loc[i]>>2)+3] = p_sig[(p_patch_ind[i]<<2)+3];
1078 }
1079 return 0;
1080}
1081
1082static int bl_bootstrap(struct pmu_gk20a *pmu,
1083 struct flcn_bl_dmem_desc *pbl_desc, u32 bl_sz)
1084{
1085 struct gk20a *g = pmu->g;
1086 struct mm_gk20a *mm = &g->mm;
1087 struct pmu_ucode_desc *desc = pmu->desc;
1088 u32 imem_dst_blk = 0;
1089 u32 virt_addr = 0;
1090 u32 tag = 0;
1091 u32 index = 0;
1092 struct hsflcn_bl_desc *pmu_bl_gm10x_desc = g->acr.pmu_hsbl_desc;
1093 u32 *bl_ucode;
1094
1095 gk20a_dbg_fn("");
1096 gk20a_writel(g, pwr_falcon_itfen_r(),
1097 gk20a_readl(g, pwr_falcon_itfen_r()) |
1098 pwr_falcon_itfen_ctxen_enable_f());
1099 gk20a_writel(g, pwr_pmu_new_instblk_r(),
1100 pwr_pmu_new_instblk_ptr_f(
1101 mm->pmu.inst_block.cpu_pa >> 12) |
1102 pwr_pmu_new_instblk_valid_f(1) |
1103 pwr_pmu_new_instblk_target_sys_coh_f());
1104
1105 /* TBD: load all other surfaces */
1106 /*copy bootloader interface structure to dmem*/
1107 gk20a_writel(g, pwr_falcon_dmemc_r(0),
1108 pwr_falcon_dmemc_offs_f(0) |
1109 pwr_falcon_dmemc_blk_f(0) |
1110 pwr_falcon_dmemc_aincw_f(1));
1111 pmu_copy_to_dmem(pmu, 0, (u8 *)pbl_desc,
1112 sizeof(struct flcn_bl_dmem_desc), 0);
1113 /*TODO This had to be copied to bl_desc_dmem_load_off, but since
1114 * this is 0, so ok for now*/
1115
1116 /* Now copy bootloader to TOP of IMEM */
1117 imem_dst_blk = (pwr_falcon_hwcfg_imem_size_v(
1118 gk20a_readl(g, pwr_falcon_hwcfg_r()))) - bl_sz/256;
1119
1120 /* Set Auto-Increment on write */
1121 gk20a_writel(g, pwr_falcon_imemc_r(0),
1122 pwr_falcon_imemc_offs_f(0) |
1123 pwr_falcon_imemc_blk_f(imem_dst_blk) |
1124 pwr_falcon_imemc_aincw_f(1));
1125 virt_addr = pmu_bl_gm10x_desc->bl_start_tag << 8;
1126 tag = virt_addr >> 8; /* tag is always 256B aligned */
1127 bl_ucode = (u32 *)(pmu->ucode.cpuva);
1128 for (index = 0; index < bl_sz/4; index++) {
1129 if ((index % 64) == 0) {
1130 gk20a_writel(g, pwr_falcon_imemt_r(0),
1131 (tag & 0xffff) << 0);
1132 tag++;
1133 }
1134 gk20a_writel(g, pwr_falcon_imemd_r(0),
1135 bl_ucode[index] & 0xffffffff);
1136 }
1137
1138 gk20a_writel(g, pwr_falcon_imemt_r(0), (0 & 0xffff) << 0);
1139 gm20b_dbg_pmu("Before starting falcon with BL\n");
1140
1141 gk20a_writel(g, pwr_falcon_bootvec_r(),
1142 pwr_falcon_bootvec_vec_f(virt_addr));
1143
1144 gk20a_writel(g, pwr_falcon_cpuctl_r(),
1145 pwr_falcon_cpuctl_startcpu_f(1));
1146
1147 gk20a_writel(g, pwr_falcon_os_r(), desc->app_version);
1148
1149 return 0;
1150}
1151
1152int gm20b_init_pmu_setup_hw1(struct gk20a *g, struct flcn_bl_dmem_desc *desc,
1153 u32 bl_sz)
1154{
1155 struct pmu_gk20a *pmu = &g->pmu;
1156 int err;
1157
1158 gk20a_dbg_fn("");
1159 pmu_reset(pmu);
1160
1161 /* setup apertures - virtual */
1162 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
1163 pwr_fbif_transcfg_mem_type_virtual_f());
1164 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT),
1165 pwr_fbif_transcfg_mem_type_virtual_f());
1166 /* setup apertures - physical */
1167 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID),
1168 pwr_fbif_transcfg_mem_type_physical_f() |
1169 pwr_fbif_transcfg_target_local_fb_f());
1170 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH),
1171 pwr_fbif_transcfg_mem_type_physical_f() |
1172 pwr_fbif_transcfg_target_coherent_sysmem_f());
1173 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH),
1174 pwr_fbif_transcfg_mem_type_physical_f() |
1175 pwr_fbif_transcfg_target_noncoherent_sysmem_f());
1176
1177 err = bl_bootstrap(pmu, desc, bl_sz);
1178 if (err)
1179 return err;
1180 return 0;
1181}
1182
1183/*
1184* Executes a generic bootloader and wait for PMU to halt.
1185* This BL will be used for those binaries that are loaded
1186* and executed at times other than RM PMU Binary execution.
1187*
1188* @param[in] g gk20a pointer
1189* @param[in] desc Bootloader descriptor
1190* @param[in] dma_idx DMA Index
1191* @param[in] b_wait_for_halt Wait for PMU to HALT
1192*/
1193int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt)
1194{
1195 struct pmu_gk20a *pmu = &g->pmu;
1196 struct mm_gk20a *mm = &g->mm;
1197 struct vm_gk20a *vm = &mm->pmu.vm;
1198 struct device *d = dev_from_gk20a(g);
1199 int i, err = 0;
1200 struct sg_table *sgt_pmu_ucode;
1201 dma_addr_t iova;
1202 u32 bl_sz;
1203 void *bl_cpuva;
1204 u64 bl_pmu_va;
1205 const struct firmware *hsbl_fw;
1206 struct acr_gm20b *acr = &g->acr;
1207 struct hsflcn_bl_desc *pmu_bl_gm10x_desc;
1208 u32 *pmu_bl_gm10x = NULL;
1209 DEFINE_DMA_ATTRS(attrs);
1210 gm20b_dbg_pmu("");
1211
1212 hsbl_fw = gk20a_request_firmware(g, GM20B_HSBIN_PMU_BL_UCODE_IMAGE);
1213 if (!hsbl_fw) {
1214 gk20a_err(dev_from_gk20a(g), "failed to load pmu ucode!!");
1215 return -ENOENT;
1216 }
1217 acr->bl_bin_hdr = (struct bin_hdr *)hsbl_fw->data;
1218 acr->pmu_hsbl_desc = (struct hsflcn_bl_desc *)(hsbl_fw->data +
1219 acr->bl_bin_hdr->header_offset);
1220 pmu_bl_gm10x_desc = acr->pmu_hsbl_desc;
1221 pmu_bl_gm10x = (u32 *)(hsbl_fw->data + acr->bl_bin_hdr->data_offset);
1222 bl_sz = ALIGN(pmu_bl_gm10x_desc->bl_img_hdr.bl_code_size,
1223 256);
1224 gm20b_dbg_pmu("Executing Generic Bootloader\n");
1225
1226 /*TODO in code verify that enable PMU is done, scrubbing etc is done*/
1227 /*TODO in code verify that gmmu vm init is done*/
1228 /*
1229 * Disable interrupts to avoid kernel hitting breakpoint due
1230 * to PMU halt
1231 */
1232
1233 gk20a_writel(g, pwr_falcon_irqsclr_r(),
1234 gk20a_readl(g, pwr_falcon_irqsclr_r()) & (~(0x10)));
1235
1236 dma_set_attr(DMA_ATTR_READ_ONLY, &attrs);
1237 bl_cpuva = dma_alloc_attrs(d, bl_sz,
1238 &iova,
1239 GFP_KERNEL,
1240 &attrs);
1241 gm20b_dbg_pmu("bl size is %x\n", bl_sz);
1242 if (!bl_cpuva) {
1243 gk20a_err(d, "failed to allocate memory\n");
1244 err = -ENOMEM;
1245 goto err_done;
1246 }
1247
1248 err = gk20a_get_sgtable(d, &sgt_pmu_ucode,
1249 bl_cpuva,
1250 iova,
1251 bl_sz);
1252 if (err) {
1253 gk20a_err(d, "failed to allocate sg table\n");
1254 goto err_free_cpu_va;
1255 }
1256
1257 bl_pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode,
1258 bl_sz,
1259 0, /* flags */
1260 gk20a_mem_flag_read_only);
1261 if (!bl_pmu_va) {
1262 gk20a_err(d, "failed to map pmu ucode memory!!");
1263 goto err_free_ucode_sgt;
1264 }
1265
1266 for (i = 0; i < (bl_sz) >> 2; i++)
1267 gk20a_mem_wr32(bl_cpuva, i, pmu_bl_gm10x[i]);
1268 gm20b_dbg_pmu("Copied bl ucode to bl_cpuva\n");
1269 pmu->ucode.cpuva = bl_cpuva;
1270 pmu->ucode.pmu_va = bl_pmu_va;
1271 gm20b_init_pmu_setup_hw1(g, desc, bl_sz);
1272 /* Poll for HALT */
1273 if (b_wait_for_halt) {
1274 err = pmu_wait_for_halt(g, GPU_TIMEOUT_DEFAULT);
1275 if (err == 0)
1276 /* Clear the HALT interrupt */
1277 gk20a_writel(g, pwr_falcon_irqsclr_r(),
1278 gk20a_readl(g, pwr_falcon_irqsclr_r()) & (~(0x10)));
1279 else
1280 goto err_unmap_bl;
1281 }
1282 gm20b_dbg_pmu("after waiting for halt, err %x\n", err);
1283 gm20b_dbg_pmu("err reg :%x\n", readl(mc +
1284 MC_ERR_GENERALIZED_CARVEOUT_STATUS_0));
1285 gm20b_dbg_pmu("phys sec reg %x\n", gk20a_readl(g,
1286 pwr_falcon_mmu_phys_sec_r()));
1287 gm20b_dbg_pmu("sctl reg %x\n", gk20a_readl(g, pwr_falcon_sctl_r()));
1288 start_gm20b_pmu(g);
1289 err = 0;
1290err_unmap_bl:
1291 gk20a_gmmu_unmap(vm, pmu->ucode.pmu_va,
1292 bl_sz, gk20a_mem_flag_none);
1293err_free_ucode_sgt:
1294 gk20a_free_sgtable(&sgt_pmu_ucode);
1295err_free_cpu_va:
1296 dma_free_attrs(d, bl_sz,
1297 bl_cpuva, iova, &attrs);
1298err_done:
1299 return err;
1300}
1301
1302/*!
1303* Wait for PMU to halt
1304* @param[in] g GPU object pointer
1305* @param[in] timeout_us Timeout in Us for PMU to halt
1306* @return '0' if PMU halts
1307*/
1308int pmu_wait_for_halt(struct gk20a *g, unsigned int timeout)
1309{
1310 u32 data = 0;
1311 udelay(10);
1312 data = gk20a_readl(g, pwr_falcon_cpuctl_r());
1313 gm20b_dbg_pmu("bef while cpuctl %xi, timeout %d\n", data, timeout);
1314 while (timeout != 0) {
1315 data = gk20a_readl(g, pwr_falcon_cpuctl_r());
1316 if (data & pwr_falcon_cpuctl_halt_intr_m())
1317 /*CPU is halted break*/
1318 break;
1319 timeout--;
1320 udelay(1);
1321 }
1322 if (timeout == 0)
1323 return -EBUSY;
1324 return 0;
1325}