summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSupriya <ssharatkumar@nvidia.com>2014-06-13 03:14:27 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:10:14 -0400
commitb7793a493a1fa292a22d5ce84c43ee342b9824b2 (patch)
tree963d128e317d319d2f53aff96420aec17b732bf6
parentc32ac10b0bba400c1e83540a20c5ca210fa48613 (diff)
nvgpu: Host side changes to support HS mode
GM20B changes in PMU boot sequence to support booting in HS mode and LS mode Bug 1509680 Change-Id: I2832eda0efe17dd5e3a8f11dd06e7d4da267be70 Signed-off-by: Supriya <ssharatkumar@nvidia.com> Reviewed-on: http://git-master/r/423140 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Shridhar Rasal <srasal@nvidia.com> Reviewed-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/Kconfig9
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h9
-rw-r--r--drivers/gpu/nvgpu/gk20a/hal_gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.c37
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.h17
-rw-r--r--drivers/gpu/nvgpu/gm20b/Makefile2
-rw-r--r--drivers/gpu/nvgpu/gm20b/acr_gm20b.c1325
-rw-r--r--drivers/gpu/nvgpu/gm20b/acr_gm20b.h377
-rw-r--r--drivers/gpu/nvgpu/gm20b/hal_gm20b.c2
-rw-r--r--drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h20
-rw-r--r--drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h8
-rw-r--r--drivers/gpu/nvgpu/gm20b/hw_pwr_gm20b.h80
-rw-r--r--drivers/gpu/nvgpu/gm20b/mc_carveout_reg.h22
-rw-r--r--drivers/gpu/nvgpu/gm20b/mm_gm20b.c63
-rw-r--r--drivers/gpu/nvgpu/gm20b/mm_gm20b.h2
-rw-r--r--drivers/gpu/nvgpu/gm20b/pmu_gm20b.c26
-rw-r--r--drivers/gpu/nvgpu/gm20b/pmu_gm20b.h19
17 files changed, 2002 insertions, 18 deletions
diff --git a/drivers/gpu/nvgpu/Kconfig b/drivers/gpu/nvgpu/Kconfig
index 238d87e2..315c4683 100644
--- a/drivers/gpu/nvgpu/Kconfig
+++ b/drivers/gpu/nvgpu/Kconfig
@@ -70,3 +70,12 @@ config TEGRA_GK20A
70 Enable support for the GK20A graphics engine on Tegra 70 Enable support for the GK20A graphics engine on Tegra
71 by adding a Tegra platfrom interface to the GK20A driver. 71 by adding a Tegra platfrom interface to the GK20A driver.
72 The Tegra platform interface requires TEGRA_GRHOST (host1x). 72 The Tegra platform interface requires TEGRA_GRHOST (host1x).
73
74config TEGRA_ACR
75 bool "Enable HS bin support on GM20B GPU on Tegra"
76 depends on GK20A_PMU
77 default n
78 help
79 Enable Support for Loading High Secure binary, and using
80 Write Protected Regions (WPR) for storing ucodes, and bootstrap
81 PMU, FECS and GPCCS in Low Secure mode.
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 726994ff..da5cc917 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -28,6 +28,7 @@ struct channel_gk20a;
28struct gr_gk20a; 28struct gr_gk20a;
29struct sim_gk20a; 29struct sim_gk20a;
30struct gk20a_ctxsw_ucode_segments; 30struct gk20a_ctxsw_ucode_segments;
31struct acr_gm20b;
31 32
32#include <linux/sched.h> 33#include <linux/sched.h>
33#include <linux/spinlock.h> 34#include <linux/spinlock.h>
@@ -45,6 +46,7 @@ struct gk20a_ctxsw_ucode_segments;
45#include "priv_ring_gk20a.h" 46#include "priv_ring_gk20a.h"
46#include "therm_gk20a.h" 47#include "therm_gk20a.h"
47#include "platform_gk20a.h" 48#include "platform_gk20a.h"
49#include "gm20b/acr_gm20b.h"
48 50
49extern struct platform_device tegra_gk20a_device; 51extern struct platform_device tegra_gk20a_device;
50 52
@@ -205,6 +207,8 @@ struct gpu_ops {
205 struct pmu_sequence *seq); 207 struct pmu_sequence *seq);
206 void *(*get_pmu_seq_out_a_ptr)( 208 void *(*get_pmu_seq_out_a_ptr)(
207 struct pmu_sequence *seq); 209 struct pmu_sequence *seq);
210 void (*set_pmu_cmdline_args_secure_mode)(struct pmu_gk20a *pmu,
211 u32 val);
208 } pmu_ver; 212 } pmu_ver;
209 struct { 213 struct {
210 int (*get_netlist_name)(int index, char *name); 214 int (*get_netlist_name)(int index, char *name);
@@ -214,6 +218,10 @@ struct gpu_ops {
214 int (*set_sparse)(struct vm_gk20a *vm, u64 vaddr, 218 int (*set_sparse)(struct vm_gk20a *vm, u64 vaddr,
215 u32 num_pages, u32 pgsz_idx); 219 u32 num_pages, u32 pgsz_idx);
216 } mm; 220 } mm;
221 struct {
222 int (*pmu_setup_sw)(struct gk20a *g);
223 int (*pmu_setup_hw_and_bootstrap)(struct gk20a *g);
224 } pmu;
217}; 225};
218 226
219struct gk20a { 227struct gk20a {
@@ -236,6 +244,7 @@ struct gk20a {
236 struct sim_gk20a sim; 244 struct sim_gk20a sim;
237 struct mm_gk20a mm; 245 struct mm_gk20a mm;
238 struct pmu_gk20a pmu; 246 struct pmu_gk20a pmu;
247 struct acr_gm20b acr;
239 struct cooling_device_gk20a gk20a_cdev; 248 struct cooling_device_gk20a gk20a_cdev;
240 249
241 /* Save pmu fw here so that it lives cross suspend/resume. 250 /* Save pmu fw here so that it lives cross suspend/resume.
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
index 66bc47a9..ad0a3dc7 100644
--- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
@@ -23,6 +23,7 @@
23#include "channel_gk20a.h" 23#include "channel_gk20a.h"
24#include "gr_ctx_gk20a.h" 24#include "gr_ctx_gk20a.h"
25#include "mm_gk20a.h" 25#include "mm_gk20a.h"
26#include "pmu_gk20a.h"
26 27
27struct gpu_ops gk20a_ops = { 28struct gpu_ops gk20a_ops = {
28 .clock_gating = { 29 .clock_gating = {
@@ -48,6 +49,7 @@ int gk20a_init_hal(struct gpu_ops *gops)
48 gk20a_init_fifo(gops); 49 gk20a_init_fifo(gops);
49 gk20a_init_gr_ctx(gops); 50 gk20a_init_gr_ctx(gops);
50 gk20a_init_mm(gops); 51 gk20a_init_mm(gops);
52 gk20a_init_pmu_ops(gops);
51 gops->name = "gk20a"; 53 gops->name = "gk20a";
52 54
53 return 0; 55 return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index b784b9a6..9b1ecea1 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -38,10 +38,8 @@
38#define gk20a_dbg_pmu(fmt, arg...) \ 38#define gk20a_dbg_pmu(fmt, arg...) \
39 gk20a_dbg(gpu_dbg_pmu, fmt, ##arg) 39 gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
40 40
41static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu);
42static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g, 41static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g,
43 u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt); 42 u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt);
44static void pmu_setup_hw(struct work_struct *work);
45static void ap_callback_init_and_enable_ctrl( 43static void ap_callback_init_and_enable_ctrl(
46 struct gk20a *g, struct pmu_msg *msg, 44 struct gk20a *g, struct pmu_msg *msg,
47 void *param, u32 seq_desc, u32 status); 45 void *param, u32 seq_desc, u32 status);
@@ -62,6 +60,10 @@ static void set_pmu_cmdline_args_cpufreq_v1(struct pmu_gk20a *pmu, u32 freq)
62{ 60{
63 pmu->args_v1.cpu_freq_hz = freq; 61 pmu->args_v1.cpu_freq_hz = freq;
64} 62}
63static void set_pmu_cmdline_args_secure_mode_v1(struct pmu_gk20a *pmu, u32 val)
64{
65 pmu->args_v1.secure_mode = val;
66}
65 67
66static void set_pmu_cmdline_args_cpufreq_v0(struct pmu_gk20a *pmu, u32 freq) 68static void set_pmu_cmdline_args_cpufreq_v0(struct pmu_gk20a *pmu, u32 freq)
67{ 69{
@@ -482,10 +484,12 @@ static void *get_pmu_sequence_out_alloc_ptr_v0(struct pmu_sequence *seq)
482 return (void *)(&seq->out_v0); 484 return (void *)(&seq->out_v0);
483} 485}
484 486
485static int gk20a_init_pmu(struct pmu_gk20a *pmu) 487int gk20a_init_pmu(struct pmu_gk20a *pmu)
486{ 488{
487 struct gk20a *g = pmu->g; 489 struct gk20a *g = pmu->g;
488 switch (pmu->desc->app_version) { 490 switch (pmu->desc->app_version) {
491 case APP_VERSION_GM20B_1:
492 case APP_VERSION_GM20B:
489 case APP_VERSION_1: 493 case APP_VERSION_1:
490 case APP_VERSION_2: 494 case APP_VERSION_2:
491 g->ops.pmu_ver.cmd_id_zbc_table_update = 16; 495 g->ops.pmu_ver.cmd_id_zbc_table_update = 16;
@@ -493,6 +497,8 @@ static int gk20a_init_pmu(struct pmu_gk20a *pmu)
493 pmu_cmdline_size_v1; 497 pmu_cmdline_size_v1;
494 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq = 498 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
495 set_pmu_cmdline_args_cpufreq_v1; 499 set_pmu_cmdline_args_cpufreq_v1;
500 g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode =
501 set_pmu_cmdline_args_secure_mode_v1;
496 g->ops.pmu_ver.get_pmu_cmdline_args_ptr = 502 g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
497 get_pmu_cmdline_args_ptr_v1; 503 get_pmu_cmdline_args_ptr_v1;
498 g->ops.pmu_ver.get_pmu_allocation_struct_size = 504 g->ops.pmu_ver.get_pmu_allocation_struct_size =
@@ -558,6 +564,8 @@ static int gk20a_init_pmu(struct pmu_gk20a *pmu)
558 pmu_cmdline_size_v0; 564 pmu_cmdline_size_v0;
559 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq = 565 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
560 set_pmu_cmdline_args_cpufreq_v0; 566 set_pmu_cmdline_args_cpufreq_v0;
567 g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode =
568 NULL;
561 g->ops.pmu_ver.get_pmu_cmdline_args_ptr = 569 g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
562 get_pmu_cmdline_args_ptr_v0; 570 get_pmu_cmdline_args_ptr_v0;
563 g->ops.pmu_ver.get_pmu_allocation_struct_size = 571 g->ops.pmu_ver.get_pmu_allocation_struct_size =
@@ -627,7 +635,7 @@ static int gk20a_init_pmu(struct pmu_gk20a *pmu)
627 return 0; 635 return 0;
628} 636}
629 637
630static void pmu_copy_from_dmem(struct pmu_gk20a *pmu, 638void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
631 u32 src, u8 *dst, u32 size, u8 port) 639 u32 src, u8 *dst, u32 size, u8 port)
632{ 640{
633 struct gk20a *g = pmu->g; 641 struct gk20a *g = pmu->g;
@@ -673,7 +681,7 @@ static void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
673 return; 681 return;
674} 682}
675 683
676static void pmu_copy_to_dmem(struct pmu_gk20a *pmu, 684void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
677 u32 dst, u8 *src, u32 size, u8 port) 685 u32 dst, u8 *src, u32 size, u8 port)
678{ 686{
679 struct gk20a *g = pmu->g; 687 struct gk20a *g = pmu->g;
@@ -887,7 +895,7 @@ static int pmu_enable(struct pmu_gk20a *pmu, bool enable)
887 return 0; 895 return 0;
888} 896}
889 897
890static int pmu_reset(struct pmu_gk20a *pmu) 898int pmu_reset(struct pmu_gk20a *pmu)
891{ 899{
892 int err; 900 int err;
893 901
@@ -999,7 +1007,7 @@ static int pmu_bootstrap(struct pmu_gk20a *pmu)
999 return 0; 1007 return 0;
1000} 1008}
1001 1009
1002static void pmu_seq_init(struct pmu_gk20a *pmu) 1010void pmu_seq_init(struct pmu_gk20a *pmu)
1003{ 1011{
1004 u32 i; 1012 u32 i;
1005 1013
@@ -1784,7 +1792,7 @@ static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id);
1784static void pmu_setup_hw_load_zbc(struct gk20a *g); 1792static void pmu_setup_hw_load_zbc(struct gk20a *g);
1785static void pmu_setup_hw_enable_elpg(struct gk20a *g); 1793static void pmu_setup_hw_enable_elpg(struct gk20a *g);
1786 1794
1787static void pmu_setup_hw(struct work_struct *work) 1795void pmu_setup_hw(struct work_struct *work)
1788{ 1796{
1789 struct pmu_gk20a *pmu = container_of(work, struct pmu_gk20a, pg_init); 1797 struct pmu_gk20a *pmu = container_of(work, struct pmu_gk20a, pg_init);
1790 struct gk20a *g = pmu->g; 1798 struct gk20a *g = pmu->g;
@@ -1967,6 +1975,12 @@ static void pmu_setup_hw_enable_elpg(struct gk20a *g)
1967 } 1975 }
1968} 1976}
1969 1977
1978void gk20a_init_pmu_ops(struct gpu_ops *gops)
1979{
1980 gops->pmu.pmu_setup_sw = gk20a_init_pmu_setup_sw;
1981 gops->pmu.pmu_setup_hw_and_bootstrap = gk20a_init_pmu_setup_hw1;
1982}
1983
1970int gk20a_init_pmu_support(struct gk20a *g) 1984int gk20a_init_pmu_support(struct gk20a *g)
1971{ 1985{
1972 struct pmu_gk20a *pmu = &g->pmu; 1986 struct pmu_gk20a *pmu = &g->pmu;
@@ -1984,11 +1998,10 @@ int gk20a_init_pmu_support(struct gk20a *g)
1984 return err; 1998 return err;
1985 1999
1986 if (support_gk20a_pmu()) { 2000 if (support_gk20a_pmu()) {
1987 err = gk20a_init_pmu_setup_sw(g); 2001 err = g->ops.pmu.pmu_setup_sw(g);
1988 if (err) 2002 if (err)
1989 return err; 2003 return err;
1990 2004 err = g->ops.pmu.pmu_setup_hw_and_bootstrap(g);
1991 err = gk20a_init_pmu_setup_hw1(g);
1992 if (err) 2005 if (err)
1993 return err; 2006 return err;
1994 } 2007 }
@@ -2724,7 +2737,7 @@ static void pmu_dump_elpg_stats(struct pmu_gk20a *pmu)
2724 */ 2737 */
2725} 2738}
2726 2739
2727static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu) 2740void pmu_dump_falcon_stats(struct pmu_gk20a *pmu)
2728{ 2741{
2729 struct gk20a *g = pmu->g; 2742 struct gk20a *g = pmu->g;
2730 int i; 2743 int i;
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
index 2843d483..e9567e14 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
@@ -51,6 +51,8 @@
51/* Mapping between AP_CTRLs and Idle counters */ 51/* Mapping between AP_CTRLs and Idle counters */
52#define PMU_AP_IDLE_MASK_GRAPHICS (PMU_AP_IDLE_MASK_HIST_IDX_1) 52#define PMU_AP_IDLE_MASK_GRAPHICS (PMU_AP_IDLE_MASK_HIST_IDX_1)
53 53
54#define APP_VERSION_GM20B_1 18547257
55#define APP_VERSION_GM20B 17615280
54#define APP_VERSION_2 18542378 56#define APP_VERSION_2 18542378
55#define APP_VERSION_1 17997577 57#define APP_VERSION_1 17997577
56#define APP_VERSION_0 16856675 58#define APP_VERSION_0 16856675
@@ -1058,6 +1060,8 @@ struct pmu_gk20a {
1058 }; 1060 };
1059 unsigned long perfmon_events_cnt; 1061 unsigned long perfmon_events_cnt;
1060 bool perfmon_sampling_enabled; 1062 bool perfmon_sampling_enabled;
1063 u8 pmu_mode; /*Added for GM20b, and ACR*/
1064 u32 falcon_id;
1061}; 1065};
1062 1066
1063int gk20a_init_pmu_support(struct gk20a *g); 1067int gk20a_init_pmu_support(struct gk20a *g);
@@ -1086,5 +1090,16 @@ int gk20a_pmu_debugfs_init(struct platform_device *dev);
1086void gk20a_pmu_reset_load_counters(struct gk20a *g); 1090void gk20a_pmu_reset_load_counters(struct gk20a *g);
1087void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, 1091void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
1088 u32 *total_cycles); 1092 u32 *total_cycles);
1089 1093void gk20a_init_pmu_ops(struct gpu_ops *gops);
1094
1095void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
1096 u32 dst, u8 *src, u32 size, u8 port);
1097void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
1098 u32 src, u8 *dst, u32 size, u8 port);
1099int pmu_reset(struct pmu_gk20a *pmu);
1100int gk20a_init_pmu(struct pmu_gk20a *pmu);
1101void pmu_dump_falcon_stats(struct pmu_gk20a *pmu);
1102void gk20a_remove_pmu_support(struct pmu_gk20a *pmu);
1103void pmu_setup_hw(struct work_struct *work);
1104void pmu_seq_init(struct pmu_gk20a *pmu);
1090#endif /*__PMU_GK20A_H__*/ 1105#endif /*__PMU_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gm20b/Makefile b/drivers/gpu/nvgpu/gm20b/Makefile
index 237ce6f9..20328e9e 100644
--- a/drivers/gpu/nvgpu/gm20b/Makefile
+++ b/drivers/gpu/nvgpu/gm20b/Makefile
@@ -11,4 +11,6 @@ obj-$(CONFIG_GK20A) = \
11 fifo_gm20b.o \ 11 fifo_gm20b.o \
12 gr_ctx_gm20b.o \ 12 gr_ctx_gm20b.o \
13 gm20b_gating_reglist.o \ 13 gm20b_gating_reglist.o \
14 acr_gm20b.o \
15 pmu_gm20b.o \
14 mm_gm20b.o 16 mm_gm20b.o
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
new file mode 100644
index 00000000..df1bc429
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -0,0 +1,1325 @@
1/*
2 * GM20B ACR
3 *
4 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
5*
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include <linux/delay.h> /* for mdelay */
17#include <linux/firmware.h>
18#include <linux/clk.h>
19#include <linux/module.h>
20#include <linux/debugfs.h>
21#include <linux/dma-mapping.h>
22#include <linux/io.h>
23#include "../../../../arch/arm/mach-tegra/iomap.h"
24
25#include "gk20a/gk20a.h"
26#include "gk20a/pmu_gk20a.h"
27#include "hw_pwr_gm20b.h"
28#include "mc_carveout_reg.h"
29
30/*Defines*/
31#define gm20b_dbg_pmu(fmt, arg...) \
32 gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
33#define GPU_TIMEOUT_DEFAULT 10000
34
35typedef int (*get_ucode_details)(struct gk20a *g, struct flcn_ucode_img *udata);
36
37/*Externs*/
38
39/*Forwards*/
40static int lsfm_discover_ucode_images(struct gk20a *g,
41 struct ls_flcn_mgr *plsfm);
42static int lsfm_add_ucode_img(struct gk20a *g, struct ls_flcn_mgr *plsfm,
43 struct flcn_ucode_img *ucode_image, u32 falcon_id);
44static void lsfm_free_ucode_img_res(struct flcn_ucode_img *p_img);
45static void lsfm_free_nonpmu_ucode_img_res(struct flcn_ucode_img *p_img);
46static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr *plsfm);
47static int lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm,
48 void *nonwpr_addr);
49static int acr_ucode_patch_sig(struct gk20a *g,
50 unsigned int *p_img,
51 unsigned int *p_prod_sig,
52 unsigned int *p_dbg_sig,
53 unsigned int *p_patch_loc,
54 unsigned int *p_patch_ind);
55
56/*Globals*/
57static void __iomem *mc = IO_ADDRESS(TEGRA_MC_BASE);
58get_ucode_details pmu_acr_supp_ucode_list[MAX_SUPPORTED_LSFM] = {
59 pmu_ucode_details,
60};
61
62/*Once is LS mode, cpuctl_alias is only accessible*/
63void start_gm20b_pmu(struct gk20a *g)
64{
65 gk20a_writel(g, pwr_falcon_cpuctl_alias_r(),
66 pwr_falcon_cpuctl_startcpu_f(1));
67}
68
69void gm20b_init_secure_pmu(struct gpu_ops *gops)
70{
71 gops->pmu.pmu_setup_sw = gm20b_pmu_setup_sw;
72 gops->pmu.pmu_setup_hw_and_bootstrap = gm20b_bootstrap_hs_flcn;
73}
74
75static void free_blob_res(struct gk20a *g)
76{
77 /*TODO */
78}
79
80int gm20b_pmu_setup_sw(struct gk20a *g)
81{
82 /*from pmu_gk20a.c*/
83 struct pmu_gk20a *pmu = &g->pmu;
84 struct mm_gk20a *mm = &g->mm;
85 struct vm_gk20a *vm = &mm->pmu.vm;
86 struct device *d = dev_from_gk20a(g);
87 int i, err = 0;
88 u8 *ptr;
89 struct sg_table *sgt_seq_buf;
90 dma_addr_t iova;
91
92 gk20a_dbg_fn("");
93 /* Make any ACR structure settings here if ever need be*/
94
95 if (pmu->sw_ready) {
96 for (i = 0; i < pmu->mutex_cnt; i++) {
97 pmu->mutex[i].id = i;
98 pmu->mutex[i].index = i;
99 }
100 pmu_seq_init(pmu);
101
102 mutex_init(&pmu->elpg_mutex);
103 mutex_init(&pmu->isr_mutex);
104 mutex_init(&pmu->pmu_copy_lock);
105 mutex_init(&pmu->pmu_seq_lock);
106 gk20a_dbg_fn("skip init");
107 goto skip_init;
108 }
109 gm20b_dbg_pmu("gk20a_init_pmu_setup_sw 2\n");
110
111 /* TBD: sysmon subtask */
112
113 if (IS_ENABLED(CONFIG_TEGRA_GK20A_PERFMON))
114 pmu->perfmon_sampling_enabled = true;
115
116 pmu->mutex_cnt = pwr_pmu_mutex__size_1_v();
117 pmu->mutex = kzalloc(pmu->mutex_cnt *
118 sizeof(struct pmu_mutex), GFP_KERNEL);
119 if (!pmu->mutex) {
120 err = -ENOMEM;
121 goto err;
122 }
123
124 for (i = 0; i < pmu->mutex_cnt; i++) {
125 pmu->mutex[i].id = i;
126 pmu->mutex[i].index = i;
127 }
128 gm20b_dbg_pmu("gk20a_init_pmu_setup_sw 3\n");
129
130 pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES *
131 sizeof(struct pmu_sequence), GFP_KERNEL);
132 if (!pmu->seq) {
133 err = -ENOMEM;
134 goto err_free_mutex;
135 }
136
137 pmu_seq_init(pmu);
138 mutex_init(&pmu->elpg_mutex);
139 mutex_init(&pmu->isr_mutex);
140 mutex_init(&pmu->pmu_copy_lock);
141 mutex_init(&pmu->pmu_seq_lock);
142
143 err = prepare_ucode_blob(g);
144 if (err)
145 goto err_free_seq;
146 INIT_WORK(&pmu->pg_init, pmu_setup_hw);
147 pmu->seq_buf.cpuva = dma_alloc_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
148 &iova,
149 GFP_KERNEL);
150 if (!pmu->seq_buf.cpuva) {
151 gk20a_err(d, "failed to allocate memory\n");
152 err = -ENOMEM;
153 goto err_free_blob_res;
154 }
155
156 pmu->seq_buf.iova = iova;
157 err = gk20a_get_sgtable(d, &sgt_seq_buf,
158 pmu->seq_buf.cpuva,
159 pmu->seq_buf.iova,
160 GK20A_PMU_SEQ_BUF_SIZE);
161 if (err) {
162 gk20a_err(d, "failed to allocate sg table\n");
163 goto err_free_seq_buf;
164 }
165
166 pmu->seq_buf.pmu_va = gk20a_gmmu_map(vm, &sgt_seq_buf,
167 GK20A_PMU_SEQ_BUF_SIZE,
168 0, /* flags */
169 gk20a_mem_flag_none);
170 if (!pmu->seq_buf.pmu_va) {
171 gk20a_err(d, "failed to map pmu ucode memory!!");
172 goto err_free_seq_buf_sgt;
173 }
174
175 ptr = (u8 *)pmu->seq_buf.cpuva;
176 if (!ptr) {
177 gk20a_err(d, "failed to map cpu ptr for zbc buffer");
178 goto err_unmap_seq_buf;
179 }
180
181 /* TBD: remove this if ZBC save/restore is handled by PMU
182 * end an empty ZBC sequence for now */
183 ptr[0] = 0x16; /* opcode EXIT */
184 ptr[1] = 0; ptr[2] = 1; ptr[3] = 0;
185 ptr[4] = 0; ptr[5] = 0; ptr[6] = 0; ptr[7] = 0;
186
187 pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE;
188
189 gk20a_dbg_fn("done");
190 gk20a_free_sgtable(&sgt_seq_buf);
191
192 pmu->sw_ready = true;
193
194skip_init:
195 pmu->perfmon_counter.index = 3; /* GR & CE2 */
196 pmu->perfmon_counter.group_id = PMU_DOMAIN_GROUP_PSTATE;
197
198 pmu->remove_support = gk20a_remove_pmu_support;
199 err = gk20a_init_pmu(pmu);
200 if (err) {
201 gk20a_err(d, "failed to set function pointers\n");
202 goto err_unmap_seq_buf;
203 }
204
205 gk20a_dbg_fn("done");
206 return 0;
207
208 err_unmap_seq_buf:
209 gk20a_gmmu_unmap(vm, pmu->seq_buf.pmu_va,
210 GK20A_PMU_SEQ_BUF_SIZE, gk20a_mem_flag_none);
211 err_free_seq_buf_sgt:
212 gk20a_free_sgtable(&sgt_seq_buf);
213 err_free_seq_buf:
214 dma_free_coherent(d, GK20A_PMU_SEQ_BUF_SIZE,
215 pmu->seq_buf.cpuva, pmu->seq_buf.iova);
216 pmu->seq_buf.cpuva = NULL;
217 pmu->seq_buf.iova = 0;
218 err_free_blob_res:
219 free_blob_res(g);
220 err_free_seq:
221 kfree(pmu->seq);
222 err_free_mutex:
223 kfree(pmu->mutex);
224 err:
225 gk20a_dbg_fn("fail");
226 return err;
227}
228
229int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img)
230{
231 const struct firmware *pmu_fw;
232 struct pmu_gk20a *pmu = &g->pmu;
233 struct lsf_ucode_desc *lsf_desc;
234 int err;
235 gm20b_dbg_pmu("requesting PMU ucode in GM20B\n");
236 pmu_fw = gk20a_request_firmware(g, GM20B_PMU_UCODE_IMAGE);
237 if (!pmu_fw) {
238 gk20a_err(dev_from_gk20a(g), "failed to load pmu ucode!!");
239 gm20b_dbg_pmu("requesting PMU ucode in GM20B failed\n");
240 return -ENOENT;
241 }
242 gm20b_dbg_pmu("Loaded PMU ucode in for blob preparation");
243
244 pmu->desc = (struct pmu_ucode_desc *)pmu_fw->data;
245 pmu->ucode_image = (u32 *)((u8 *)pmu->desc +
246 pmu->desc->descriptor_size);
247 err = gk20a_init_pmu(pmu);
248 if (err) {
249 gm20b_dbg_pmu("failed to set function pointers\n");
250 return err;
251 }
252
253 lsf_desc = kzalloc(sizeof(struct lsf_ucode_desc), GFP_KERNEL);
254 if (!lsf_desc)
255 return -ENOMEM;
256 lsf_desc->falcon_id = LSF_FALCON_ID_PMU;
257
258 p_img->desc = pmu->desc;
259 p_img->data = pmu->ucode_image;
260 p_img->data_size = pmu->desc->image_size;
261 p_img->fw_ver = NULL;
262 p_img->header = NULL;
263 p_img->lsf_desc = (struct lsf_ucode_desc *)lsf_desc;
264 gm20b_dbg_pmu("requesting PMU ucode in GM20B exit\n");
265 return 0;
266}
267
268int prepare_ucode_blob(struct gk20a *g)
269{
270 struct device *d = dev_from_gk20a(g);
271 dma_addr_t iova;
272 u32 status;
273 void *nonwpr_addr;
274 u64 nonwpr_pmu_va;
275 struct ls_flcn_mgr lsfm_l, *plsfm;
276 struct sg_table *sgt_nonwpr;
277 struct mm_gk20a *mm = &g->mm;
278 struct vm_gk20a *vm = &mm->pmu.vm;
279
280 plsfm = &lsfm_l;
281 memset((void *)plsfm, 0, sizeof(struct ls_flcn_mgr));
282 gm20b_dbg_pmu("fetching GMMU regs\n");
283 gm20b_mm_mmu_vpr_info_fetch(g);
284
285 /* Discover all managed falcons*/
286 status = lsfm_discover_ucode_images(g, plsfm);
287 gm20b_dbg_pmu(" Managed Falcon cnt %d\n", plsfm->managed_flcn_cnt);
288 if (status != 0)
289 return status;
290
291 if (plsfm->managed_flcn_cnt) {
292 /* Generate WPR requirements*/
293 status = lsf_gen_wpr_requirements(g, plsfm);
294 if (status != 0)
295 return status;
296
297 /*Alloc memory to hold ucode blob contents*/
298 nonwpr_addr = dma_alloc_coherent(d, plsfm->wpr_size, &iova,
299 GFP_KERNEL);
300 if (nonwpr_addr == NULL)
301 return -ENOMEM;
302 status = gk20a_get_sgtable(d, &sgt_nonwpr,
303 nonwpr_addr,
304 iova,
305 plsfm->wpr_size);
306 if (status) {
307 gk20a_err(d, "failed allocate sg table for nonwpr\n");
308 status = -ENOMEM;
309 goto err_free_nonwpr_addr;
310 }
311
312 nonwpr_pmu_va = gk20a_gmmu_map(vm, &sgt_nonwpr,
313 plsfm->wpr_size,
314 0, /* flags */
315 gk20a_mem_flag_read_only);
316 if (!nonwpr_pmu_va) {
317 gk20a_err(d, "failed to map pmu ucode memory!!");
318 status = -ENOMEM;
319 goto err_free_nonwpr_sgt;
320 }
321 gm20b_dbg_pmu("managed LS falcon %d, WPR size %d bytes.\n",
322 plsfm->managed_flcn_cnt, plsfm->wpr_size);
323 lsfm_init_wpr_contents(g, plsfm, nonwpr_addr);
324 g->acr.ucode_blob_start = nonwpr_pmu_va;
325 g->acr.ucode_blob_size = plsfm->wpr_size;
326 gm20b_dbg_pmu("32 bit ucode_start %x, size %d\n",
327 (u32)nonwpr_pmu_va, plsfm->wpr_size);
328 gm20b_dbg_pmu("base reg carveout 2:%x\n",
329 readl(mc + MC_SECURITY_CARVEOUT2_BOM_0));
330 gm20b_dbg_pmu("base reg carveout 3:%x\n",
331 readl(mc + MC_SECURITY_CARVEOUT3_BOM_0));
332 } else {
333 gm20b_dbg_pmu("LSFM is managing no falcons.\n");
334 }
335 gm20b_dbg_pmu("prepare ucode blob return 0\n");
336 return 0;
337err_free_nonwpr_sgt:
338 gk20a_free_sgtable(&sgt_nonwpr);
339err_free_nonwpr_addr:
340 dma_free_coherent(d, plsfm->wpr_size,
341 nonwpr_addr, iova);
342 nonwpr_addr = NULL;
343 iova = 0;
344 gm20b_dbg_pmu("prepare ucode blob return %x\n", status);
345 return status;
346}
347
348u8 lsfm_falcon_disabled(struct gk20a *g, struct ls_flcn_mgr *plsfm,
349 u32 falcon_id)
350{
351 return (plsfm->disable_mask >> falcon_id) & 0x1;
352}
353
354/* Discover all managed falcon ucode images */
355static int lsfm_discover_ucode_images(struct gk20a *g,
356 struct ls_flcn_mgr *plsfm)
357{
358 struct pmu_gk20a *pmu = &g->pmu;
359 struct flcn_ucode_img ucode_img;
360 u32 falcon_id;
361 u32 i;
362 int status;
363
364 /* LSFM requires a secure PMU, discover it first.*/
365 /* Obtain the PMU ucode image and add it to the list if required*/
366 memset(&ucode_img, 0, sizeof(ucode_img));
367 status = pmu_ucode_details(g, &ucode_img);
368 if (status == 0) {
369 if (ucode_img.lsf_desc != NULL) {
370 /* The falonId is formed by grabbing the static base
371 * falonId from the image and adding the
372 * engine-designated falcon instance.*/
373 pmu->pmu_mode |= PMU_SECURE_MODE;
374 falcon_id = ucode_img.lsf_desc->falcon_id +
375 ucode_img.flcn_inst;
376
377 if (!lsfm_falcon_disabled(g, plsfm, falcon_id)) {
378 pmu->falcon_id = falcon_id;
379 if (lsfm_add_ucode_img(g, plsfm, &ucode_img,
380 pmu->falcon_id) == 0)
381 pmu->pmu_mode |= PMU_LSFM_MANAGED;
382
383 plsfm->managed_flcn_cnt++;
384 } else {
385 gm20b_dbg_pmu("id not managed %d\n",
386 ucode_img.lsf_desc->falcon_id);
387 }
388 }
389
390 /*Free any ucode image resources if not managing this falcon*/
391 if (!(pmu->pmu_mode & PMU_LSFM_MANAGED)) {
392 gm20b_dbg_pmu("pmu is not LSFM managed\n");
393 lsfm_free_ucode_img_res(&ucode_img);
394 }
395 }
396
397 /* Enumerate all constructed falcon objects,
398 as we need the ucode image info and total falcon count.*/
399
400 /*0th index is always PMU which is already handled in earlier
401 if condition*/
402 for (i = 1; i < MAX_SUPPORTED_LSFM; i++) {
403 memset(&ucode_img, 0, sizeof(ucode_img));
404 if (pmu_acr_supp_ucode_list[i](g, &ucode_img) == 0) {
405 if (ucode_img.lsf_desc != NULL) {
406 /* We have engine sigs, ensure that this falcon
407 is aware of the secure mode expectations
408 (ACR status)*/
409
410 /* falon_id is formed by grabbing the static
411 base falonId from the image and adding the
412 engine-designated falcon instance. */
413 falcon_id = ucode_img.lsf_desc->falcon_id +
414 ucode_img.flcn_inst;
415
416 if (!lsfm_falcon_disabled(g, plsfm,
417 falcon_id)) {
418 /* Do not manage non-FB ucode*/
419 if (lsfm_add_ucode_img(g,
420 plsfm, &ucode_img, falcon_id)
421 == 0)
422 plsfm->managed_flcn_cnt++;
423 } else {
424 gm20b_dbg_pmu("not managed %d\n",
425 ucode_img.lsf_desc->falcon_id);
426 lsfm_free_nonpmu_ucode_img_res(
427 &ucode_img);
428 }
429 }
430 } else {
431 /* Consumed all available falcon objects */
432 gm20b_dbg_pmu("Done checking for ucodes %d\n", i);
433 break;
434 }
435 }
436 return 0;
437}
438
439
440int pmu_populate_loader_cfg(struct gk20a *g,
441 struct lsfm_managed_ucode_img *lsfm,
442 union flcn_bl_generic_desc *p_bl_gen_desc, u32 *p_bl_gen_desc_size)
443{
444 struct pmu_gk20a *pmu = &g->pmu;
445 struct flcn_ucode_img *p_img = &(lsfm->ucode_img);
446 struct loader_config *ldr_cfg =
447 (struct loader_config *)(&p_bl_gen_desc->loader_cfg);
448 struct gk20a_platform *platform = platform_get_drvdata(g->dev);
449 u64 addr_base;
450 struct pmu_ucode_desc *desc;
451 u64 addr_code, addr_data;
452 u32 addr_args;
453
454 if (p_img->desc == NULL) /*This means its a header based ucode,
455 and so we do not fill BL gen desc structure*/
456 return -EINVAL;
457 desc = p_img->desc;
458 /*
459 Calculate physical and virtual addresses for various portions of
460 the PMU ucode image
461 Calculate the 32-bit addresses for the application code, application
462 data, and bootloader code. These values are all based on IM_BASE.
463 The 32-bit addresses will be the upper 32-bits of the virtual or
464 physical addresses of each respective segment.
465 */
466 addr_base = lsfm->lsb_header.ucode_off;
467 addr_base += readl(mc + MC_SECURITY_CARVEOUT3_BOM_0);
468 gm20b_dbg_pmu("pmu loader cfg u32 addrbase %x\n", (u32)addr_base);
469 /*From linux*/
470 addr_code = u64_lo32((addr_base +
471 desc->app_start_offset +
472 desc->app_resident_code_offset) >> 8);
473 gm20b_dbg_pmu("app start %d app res code off %d\n",
474 desc->app_start_offset, desc->app_resident_code_offset);
475 addr_data = u64_lo32((addr_base +
476 desc->app_start_offset +
477 desc->app_resident_data_offset) >> 8);
478 gm20b_dbg_pmu("app res data offset%d\n",
479 desc->app_resident_data_offset);
480 gm20b_dbg_pmu("bl start off %d\n", desc->bootloader_start_offset);
481
482 addr_args = ((pwr_falcon_hwcfg_dmem_size_v(
483 gk20a_readl(g, pwr_falcon_hwcfg_r())))
484 << GK20A_PMU_DMEM_BLKSIZE2);
485 addr_args -= g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu);
486
487 gm20b_dbg_pmu("addr_args %x\n", addr_args);
488
489 /* Populate the loader_config state*/
490 ldr_cfg->dma_idx = 2;
491 ldr_cfg->code_dma_base = addr_code;
492 ldr_cfg->code_size_total = desc->app_size;
493 ldr_cfg->code_size_to_load = desc->app_resident_code_size;
494 ldr_cfg->code_entry_point = desc->app_imem_entry;
495 ldr_cfg->data_dma_base = addr_data;
496 ldr_cfg->data_size = desc->app_resident_data_size;
497 ldr_cfg->overlay_dma_base = addr_code;
498
499 /* Update the argc/argv members*/
500 ldr_cfg->argc = 1;
501 ldr_cfg->argv = addr_args;
502
503 /*Copying pmu cmdline args*/
504 g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu,
505 clk_get_rate(platform->clk[1]));
506 g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode(pmu, 1);
507 pmu_copy_to_dmem(pmu, addr_args,
508 (u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)),
509 g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0);
510 *p_bl_gen_desc_size = sizeof(p_bl_gen_desc->loader_cfg);
511 return 0;
512}
513
514int flcn_populate_bl_dmem_desc(struct gk20a *g,
515 struct lsfm_managed_ucode_img *lsfm,
516 union flcn_bl_generic_desc *p_bl_gen_desc, u32 *p_bl_gen_desc_size)
517{
518
519 struct flcn_ucode_img *p_img = &(lsfm->ucode_img);
520 struct flcn_bl_dmem_desc *ldr_cfg =
521 (struct flcn_bl_dmem_desc *)(&p_bl_gen_desc->loader_cfg);
522 u64 addr_base;
523 struct pmu_ucode_desc *desc;
524 u64 addr_code, addr_data;
525
526 if (p_img->desc == NULL) /*This means its a header based ucode,
527 and so we do not fill BL gen desc structure*/
528 return -EINVAL;
529 desc = p_img->desc;
530
531 /*
532 Calculate physical and virtual addresses for various portions of
533 the PMU ucode image
534 Calculate the 32-bit addresses for the application code, application
535 data, and bootloader code. These values are all based on IM_BASE.
536 The 32-bit addresses will be the upper 32-bits of the virtual or
537 physical addresses of each respective segment.
538 */
539 addr_base = lsfm->lsb_header.ucode_off;
540 addr_base += readl(mc + MC_SECURITY_CARVEOUT3_BOM_0);
541 gm20b_dbg_pmu("gen loader cfg %x u32 addrbase %x ID\n", (u32)addr_base,
542 lsfm->wpr_header.falcon_id);
543 addr_code = u64_lo32((addr_base +
544 desc->app_start_offset +
545 desc->app_resident_code_offset) >> 8);
546 addr_data = u64_lo32((addr_base +
547 desc->app_start_offset +
548 desc->app_resident_data_offset) >> 8);
549
550 gm20b_dbg_pmu("gen cfg %x u32 addrcode %x & data %x load offset %xID\n",
551 (u32)addr_code, (u32)addr_data, desc->bootloader_start_offset,
552 lsfm->wpr_header.falcon_id);
553
554 /* Populate the LOADER_CONFIG state */
555 memset((void *) ldr_cfg, 0, sizeof(struct flcn_bl_dmem_desc));
556 ldr_cfg->ctx_dma = 0;
557 ldr_cfg->code_dma_base = addr_code;
558 ldr_cfg->non_sec_code_size = desc->app_resident_code_size;
559 ldr_cfg->data_dma_base = addr_data;
560 ldr_cfg->data_size = desc->app_resident_data_size;
561 ldr_cfg->code_entry_point = desc->app_imem_entry;
562 *p_bl_gen_desc_size = sizeof(p_bl_gen_desc->bl_dmem_desc);
563 return 0;
564}
565
566/* Populate falcon boot loader generic desc.*/
567static int lsfm_fill_flcn_bl_gen_desc(struct gk20a *g,
568 struct lsfm_managed_ucode_img *pnode)
569{
570
571 struct pmu_gk20a *pmu = &g->pmu;
572 if (pnode->wpr_header.falcon_id != pmu->falcon_id) {
573 gm20b_dbg_pmu("non pmu. write flcn bl gen desc\n");
574 flcn_populate_bl_dmem_desc(g, pnode, &pnode->bl_gen_desc,
575 &pnode->bl_gen_desc_size);
576 return 0;
577 }
578
579 if (pmu->pmu_mode & PMU_LSFM_MANAGED) {
580 gm20b_dbg_pmu("pmu write flcn bl gen desc\n");
581 if (pnode->wpr_header.falcon_id == pmu->falcon_id)
582 return pmu_populate_loader_cfg(g, pnode,
583 &pnode->bl_gen_desc, &pnode->bl_gen_desc_size);
584 }
585
586 /* Failed to find the falcon requested. */
587 return -ENOENT;
588}
589
590/* Initialize WPR contents */
591static int lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm,
592 void *nonwpr_addr)
593{
594
595 int status = 0;
596 union flcn_bl_generic_desc *nonwpr_bl_gen_desc;
597 if (nonwpr_addr == NULL) {
598 status = -ENOMEM;
599 } else {
600 struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list;
601 struct lsf_wpr_header *wpr_hdr;
602 struct lsf_lsb_header *lsb_hdr;
603 void *ucode_off;
604 u32 i;
605
606 /* The WPR array is at the base of the WPR */
607 wpr_hdr = (struct lsf_wpr_header *)nonwpr_addr;
608 pnode = plsfm->ucode_img_list;
609 i = 0;
610
611 /*
612 * Walk the managed falcons, flush WPR and LSB headers to FB.
613 * flush any bl args to the storage area relative to the
614 * ucode image (appended on the end as a DMEM area).
615 */
616 while (pnode) {
617 /* Flush WPR header to memory*/
618 memcpy(&wpr_hdr[i], &pnode->wpr_header,
619 sizeof(struct lsf_wpr_header));
620 gm20b_dbg_pmu("wpr header as in memory and pnode\n");
621 gm20b_dbg_pmu("falconid :%d %d\n",
622 pnode->wpr_header.falcon_id,
623 wpr_hdr[i].falcon_id);
624 gm20b_dbg_pmu("lsb_offset :%x %x\n",
625 pnode->wpr_header.lsb_offset,
626 wpr_hdr[i].lsb_offset);
627 gm20b_dbg_pmu("bootstrap_owner :%d %d\n",
628 pnode->wpr_header.bootstrap_owner,
629 wpr_hdr[i].bootstrap_owner);
630 gm20b_dbg_pmu("lazy_bootstrap :%d %d\n",
631 pnode->wpr_header.lazy_bootstrap,
632 wpr_hdr[i].lazy_bootstrap);
633 gm20b_dbg_pmu("status :%d %d\n",
634 pnode->wpr_header.status, wpr_hdr[i].status);
635
636 /*Flush LSB header to memory*/
637 lsb_hdr = (struct lsf_lsb_header *)((u8 *)nonwpr_addr +
638 pnode->wpr_header.lsb_offset);
639 memcpy(lsb_hdr, &pnode->lsb_header,
640 sizeof(struct lsf_lsb_header));
641 gm20b_dbg_pmu("lsb header as in memory and pnode\n");
642 gm20b_dbg_pmu("ucode_off :%x %x\n",
643 pnode->lsb_header.ucode_off,
644 lsb_hdr->ucode_off);
645 gm20b_dbg_pmu("ucode_size :%x %x\n",
646 pnode->lsb_header.ucode_size,
647 lsb_hdr->ucode_size);
648 gm20b_dbg_pmu("data_size :%x %x\n",
649 pnode->lsb_header.data_size,
650 lsb_hdr->data_size);
651 gm20b_dbg_pmu("bl_code_size :%x %x\n",
652 pnode->lsb_header.bl_code_size,
653 lsb_hdr->bl_code_size);
654 gm20b_dbg_pmu("bl_imem_off :%x %x\n",
655 pnode->lsb_header.bl_imem_off,
656 lsb_hdr->bl_imem_off);
657 gm20b_dbg_pmu("bl_data_off :%x %x\n",
658 pnode->lsb_header.bl_data_off,
659 lsb_hdr->bl_data_off);
660 gm20b_dbg_pmu("bl_data_size :%x %x\n",
661 pnode->lsb_header.bl_data_size,
662 lsb_hdr->bl_data_size);
663 gm20b_dbg_pmu("flags :%x %x\n",
664 pnode->lsb_header.flags, lsb_hdr->flags);
665
666 /*If this falcon has a boot loader and related args,
667 * flush them.*/
668 if (!pnode->ucode_img.header) {
669 nonwpr_bl_gen_desc =
670 (union flcn_bl_generic_desc *)
671 ((u8 *)nonwpr_addr +
672 pnode->lsb_header.bl_data_off);
673
674 /*Populate gen bl and flush to memory*/
675 lsfm_fill_flcn_bl_gen_desc(g, pnode);
676 memcpy(nonwpr_bl_gen_desc, &pnode->bl_gen_desc,
677 pnode->bl_gen_desc_size);
678 }
679 ucode_off = (void *)(pnode->lsb_header.ucode_off +
680 (u8 *)nonwpr_addr);
681 /*Copying of ucode*/
682 memcpy(ucode_off, pnode->ucode_img.data,
683 pnode->ucode_img.data_size);
684 pnode = pnode->next;
685 i++;
686 }
687
688 /* Tag the terminator WPR header with an invalid falcon ID. */
689 gk20a_mem_wr32(&wpr_hdr[plsfm->managed_flcn_cnt].falcon_id,
690 1, LSF_FALCON_ID_INVALID);
691 }
692 return status;
693}
694
695/*!
696 * lsfm_parse_no_loader_ucode: parses UCODE header of falcon
697 *
698 * @param[in] p_ucodehdr : UCODE header
699 * @param[out] lsb_hdr : updates values in LSB header
700 *
701 * @return 0
702 */
703static int lsfm_parse_no_loader_ucode(u32 *p_ucodehdr,
704 struct lsf_lsb_header *lsb_hdr)
705{
706
707 u32 code_size = 0;
708 u32 data_size = 0;
709 u32 i = 0;
710 u32 total_apps = p_ucodehdr[FLCN_NL_UCODE_HDR_NUM_APPS_IND];
711
712 /* Lets calculate code size*/
713 code_size += p_ucodehdr[FLCN_NL_UCODE_HDR_OS_CODE_SIZE_IND];
714 for (i = 0; i < total_apps; i++) {
715 code_size += p_ucodehdr[FLCN_NL_UCODE_HDR_APP_CODE_SIZE_IND
716 (total_apps, i)];
717 }
718 code_size += p_ucodehdr[FLCN_NL_UCODE_HDR_OS_OVL_SIZE_IND(total_apps)];
719
720 /* Calculate data size*/
721 data_size += p_ucodehdr[FLCN_NL_UCODE_HDR_OS_DATA_SIZE_IND];
722 for (i = 0; i < total_apps; i++) {
723 data_size += p_ucodehdr[FLCN_NL_UCODE_HDR_APP_DATA_SIZE_IND
724 (total_apps, i)];
725 }
726
727 lsb_hdr->ucode_size = code_size;
728 lsb_hdr->data_size = data_size;
729 lsb_hdr->bl_code_size = p_ucodehdr[FLCN_NL_UCODE_HDR_OS_CODE_SIZE_IND];
730 lsb_hdr->bl_imem_off = 0;
731 lsb_hdr->bl_data_off = p_ucodehdr[FLCN_NL_UCODE_HDR_OS_DATA_OFF_IND];
732 lsb_hdr->bl_data_size = p_ucodehdr[FLCN_NL_UCODE_HDR_OS_DATA_SIZE_IND];
733 return 0;
734}
735
736/*!
737 * @brief lsfm_fill_static_lsb_hdr_info
738 * Populate static LSB header infomation using the provided ucode image
739 */
740static void lsfm_fill_static_lsb_hdr_info(struct gk20a *g,
741 u32 falcon_id, struct lsfm_managed_ucode_img *pnode)
742{
743
744 struct pmu_gk20a *pmu = &g->pmu;
745 u32 data = 0;
746
747 if (pnode->ucode_img.lsf_desc)
748 memcpy(&pnode->lsb_header.signature, pnode->ucode_img.lsf_desc,
749 sizeof(struct lsf_ucode_desc));
750 pnode->lsb_header.ucode_size = pnode->ucode_img.data_size;
751
752 /* The remainder of the LSB depends on the loader usage */
753 if (pnode->ucode_img.header) {
754 /* Does not use a loader */
755 pnode->lsb_header.data_size = 0;
756 pnode->lsb_header.bl_code_size = 0;
757 pnode->lsb_header.bl_data_off = 0;
758 pnode->lsb_header.bl_data_size = 0;
759
760 lsfm_parse_no_loader_ucode(pnode->ucode_img.header,
761 &(pnode->lsb_header));
762
763 /* Load the first 256 bytes of IMEM. */
764 /* Set LOAD_CODE_AT_0 and DMACTL_REQ_CTX.
765 True for all method based falcons */
766 data = NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_TRUE |
767 NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE;
768 pnode->lsb_header.flags = data;
769 } else {
770 /* Uses a loader. that is has a desc */
771 pnode->lsb_header.data_size = 0;
772
773 /* The loader code size is already aligned (padded) such that
774 the code following it is aligned, but the size in the image
775 desc is not, bloat it up to be on a 256 byte alignment. */
776 pnode->lsb_header.bl_code_size = ALIGN(
777 pnode->ucode_img.desc->bootloader_size,
778 LSF_BL_CODE_SIZE_ALIGNMENT);
779 /* Though the BL is located at 0th offset of the image, the VA
780 is different to make sure that it doesnt collide the actual OS
781 VA range */
782 pnode->lsb_header.bl_imem_off =
783 pnode->ucode_img.desc->bootloader_imem_offset;
784
785 /* TODO: OBJFLCN should export properties using which the below
786 flags should be populated.*/
787 pnode->lsb_header.flags = 0;
788
789 if (falcon_id == pmu->falcon_id) {
790 data = NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE;
791 pnode->lsb_header.flags = data;
792 }
793 }
794}
795
796/* Adds a ucode image to the list of managed ucode images managed. */
797static int lsfm_add_ucode_img(struct gk20a *g, struct ls_flcn_mgr *plsfm,
798 struct flcn_ucode_img *ucode_image, u32 falcon_id)
799{
800
801 struct lsfm_managed_ucode_img *pnode;
802 pnode = kzalloc(sizeof(struct lsfm_managed_ucode_img), GFP_KERNEL);
803 if (pnode == NULL)
804 return -ENOMEM;
805
806 /* Keep a copy of the ucode image info locally */
807 memcpy(&pnode->ucode_img, ucode_image, sizeof(struct flcn_ucode_img));
808
809 /* Fill in static WPR header info*/
810 pnode->wpr_header.falcon_id = falcon_id;
811 pnode->wpr_header.bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;
812 pnode->wpr_header.status = LSF_IMAGE_STATUS_COPY;
813
814 /*TODO to check if PDB_PROP_FLCN_LAZY_BOOTSTRAP is to be supported by
815 Android */
816 /* Fill in static LSB header info elsewhere */
817 lsfm_fill_static_lsb_hdr_info(g, falcon_id, pnode);
818 pnode->next = plsfm->ucode_img_list;
819 plsfm->ucode_img_list = pnode;
820 return 0;
821}
822
823/* Free any ucode image structure resources*/
824static void lsfm_free_ucode_img_res(struct flcn_ucode_img *p_img)
825{
826 if (p_img->lsf_desc != NULL) {
827 kfree(p_img->lsf_desc);
828 p_img->lsf_desc = NULL;
829 }
830}
831
832/* Free any ucode image structure resources*/
833static void lsfm_free_nonpmu_ucode_img_res(struct flcn_ucode_img *p_img)
834{
835 if (p_img->lsf_desc != NULL) {
836 kfree(p_img->lsf_desc);
837 p_img->lsf_desc = NULL;
838 }
839 if (p_img->desc != NULL) {
840 kfree(p_img->desc);
841 p_img->desc = NULL;
842 }
843}
844
845
846/* Generate WPR requirements for ACR allocation request */
847static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr *plsfm)
848{
849 struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list;
850 u32 wpr_offset;
851
852 /* Calculate WPR size required */
853
854 /* Start with an array of WPR headers at the base of the WPR.
855 The expectation here is that the secure falcon will do a single DMA
856 read of this array and cache it internally so it's OK to pack these.
857 Also, we add 1 to the falcon count to indicate the end of the array.*/
858 wpr_offset = sizeof(struct lsf_wpr_header) *
859 (plsfm->managed_flcn_cnt+1);
860
861 /* Walk the managed falcons, accounting for the LSB structs
862 as well as the ucode images. */
863 while (pnode) {
864 /* Align, save off, and include an LSB header size */
865 wpr_offset = ALIGN(wpr_offset,
866 LSF_LSB_HEADER_ALIGNMENT);
867 pnode->wpr_header.lsb_offset = wpr_offset;
868 wpr_offset += sizeof(struct lsf_lsb_header);
869
870 /* Align, save off, and include the original (static)
871 ucode image size */
872 wpr_offset = ALIGN(wpr_offset,
873 LSF_UCODE_DATA_ALIGNMENT);
874 pnode->lsb_header.ucode_off = wpr_offset;
875 wpr_offset += pnode->ucode_img.data_size;
876
877 /* For falcons that use a boot loader (BL), we append a loader
878 desc structure on the end of the ucode image and consider this
879 the boot loader data. The host will then copy the loader desc
880 args to this space within the WPR region (before locking down)
881 and the HS bin will then copy them to DMEM 0 for the loader. */
882 if (!pnode->ucode_img.header) {
883 /* Track the size for LSB details filled in later
884 Note that at this point we don't know what kind of i
885 boot loader desc, so we just take the size of the
886 generic one, which is the largest it will will ever be.
887 */
888 /* Align (size bloat) and save off generic
889 descriptor size*/
890 pnode->lsb_header.bl_data_size = ALIGN(
891 sizeof(pnode->bl_gen_desc),
892 LSF_BL_DATA_SIZE_ALIGNMENT);
893
894 /*Align, save off, and include the additional BL data*/
895 wpr_offset = ALIGN(wpr_offset,
896 LSF_BL_DATA_ALIGNMENT);
897 pnode->lsb_header.bl_data_off = wpr_offset;
898 wpr_offset += pnode->lsb_header.bl_data_size;
899 } else {
900 /* bl_data_off is already assigned in static
901 information. But that is from start of the image */
902 pnode->lsb_header.bl_data_off +=
903 (wpr_offset - pnode->ucode_img.data_size);
904 }
905
906 /* Finally, update ucode surface size to include updates */
907 pnode->full_ucode_size = wpr_offset -
908 pnode->lsb_header.ucode_off;
909 pnode = pnode->next;
910 }
911 plsfm->wpr_size = wpr_offset;
912 return 0;
913}
914
915/*Loads ACR bin to FB mem and bootstraps PMU with bootloader code
916 * start and end are addresses of ucode blob in non-WPR region*/
917int gm20b_bootstrap_hs_flcn(struct gk20a *g)
918{
919 struct mm_gk20a *mm = &g->mm;
920 struct vm_gk20a *vm = &mm->pmu.vm;
921 struct device *d = dev_from_gk20a(g);
922 int i, err = 0;
923 struct sg_table *sgt_pmu_ucode;
924 dma_addr_t iova;
925 u64 *pacr_ucode_cpuva = NULL, pacr_ucode_pmu_va, *acr_dmem;
926 u32 img_size_in_bytes;
927 struct flcn_bl_dmem_desc bl_dmem_desc;
928 u32 status, start, size;
929 const struct firmware *acr_fw;
930 struct acr_gm20b *acr = &g->acr;
931 u32 *acr_ucode_header_t210_load;
932 u32 *acr_ucode_data_t210_load;
933
934 start = g->acr.ucode_blob_start;
935 size = g->acr.ucode_blob_size;
936
937 gm20b_dbg_pmu("");
938
939 acr_fw = gk20a_request_firmware(g, GM20B_HSBIN_PMU_UCODE_IMAGE);
940 if (!acr_fw) {
941 gk20a_err(dev_from_gk20a(g), "failed to load pmu ucode!!");
942 return -ENOENT;
943 }
944 acr->hsbin_hdr = (struct bin_hdr *)acr_fw->data;
945 acr->fw_hdr = (struct acr_fw_header *)(acr_fw->data +
946 acr->hsbin_hdr->header_offset);
947 acr_ucode_data_t210_load = (u32 *)(acr_fw->data +
948 acr->hsbin_hdr->data_offset);
949 acr_ucode_header_t210_load = (u32 *)(acr_fw->data +
950 acr->fw_hdr->hdr_offset);
951 img_size_in_bytes = ALIGN((acr->hsbin_hdr->data_size), 256);
952
953 /* Lets patch the signatures first.. */
954 if (acr_ucode_patch_sig(g, acr_ucode_data_t210_load,
955 (u32 *)(acr_fw->data + acr->fw_hdr->sig_prod_offset),
956 (u32 *)(acr_fw->data + acr->fw_hdr->sig_dbg_offset),
957 (u32 *)(acr_fw->data + acr->fw_hdr->patch_loc),
958 (u32 *)(acr_fw->data + acr->fw_hdr->patch_sig)) < 0)
959 return -1;
960 pacr_ucode_cpuva = dma_alloc_coherent(d, img_size_in_bytes, &iova,
961 GFP_KERNEL);
962 if (!pacr_ucode_cpuva)
963 return -ENOMEM;
964
965 err = gk20a_get_sgtable(d, &sgt_pmu_ucode,
966 pacr_ucode_cpuva,
967 iova,
968 img_size_in_bytes);
969 if (err) {
970 gk20a_err(d, "failed to allocate sg table\n");
971 err = -ENOMEM;
972 goto err_free_acr_buf;
973 }
974 pacr_ucode_pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode,
975 img_size_in_bytes,
976 0, /* flags */
977 gk20a_mem_flag_read_only);
978 if (!pacr_ucode_pmu_va) {
979 gk20a_err(d, "failed to map pmu ucode memory!!");
980 err = -ENOMEM;
981 goto err_free_ucode_sgt;
982 }
983 acr_dmem = (u64 *)
984 &(((u8 *)acr_ucode_data_t210_load)[
985 acr_ucode_header_t210_load[2]]);
986 ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_start =
987 start;
988 ((struct flcn_acr_desc *)acr_dmem)->nonwpr_ucode_blob_size =
989 size;
990 ((struct flcn_acr_desc *)acr_dmem)->wpr_region_id = 2;
991 ((struct flcn_acr_desc *)acr_dmem)->regions.no_regions = 2;
992 ((struct flcn_acr_desc *)acr_dmem)->regions.region_props[0].region_id
993 = 2;
994 ((struct flcn_acr_desc *)acr_dmem)->regions.region_props[1].region_id
995 = 3;
996 ((struct flcn_acr_desc *)acr_dmem)->wpr_offset = 0;
997
998 for (i = 0; i < (img_size_in_bytes/4); i++) {
999 gk20a_mem_wr32(pacr_ucode_cpuva, i,
1000 acr_ucode_data_t210_load[i]);
1001 }
1002 /*
1003 * In order to execute this binary, we will be using PMU HAL to run
1004 * a bootloader which will load this image into PMU IMEM/DMEM.
1005 * Fill up the bootloader descriptor for PMU HAL to use..
1006 * TODO: Use standard descriptor which the generic bootloader is
1007 * checked in.
1008 */
1009
1010 bl_dmem_desc.signature[0] = 0;
1011 bl_dmem_desc.signature[1] = 0;
1012 bl_dmem_desc.signature[2] = 0;
1013 bl_dmem_desc.signature[3] = 0;
1014 bl_dmem_desc.ctx_dma = GK20A_PMU_DMAIDX_UCODE;
1015 bl_dmem_desc.code_dma_base =
1016 (unsigned int)(((u64)pacr_ucode_pmu_va >> 8));
1017 bl_dmem_desc.non_sec_code_off = acr_ucode_header_t210_load[0];
1018 bl_dmem_desc.non_sec_code_size = acr_ucode_header_t210_load[1];
1019 bl_dmem_desc.sec_code_off = acr_ucode_header_t210_load[5];
1020 bl_dmem_desc.sec_code_size = acr_ucode_header_t210_load[6];
1021 bl_dmem_desc.code_entry_point = 0; /* Start at 0th offset */
1022 bl_dmem_desc.data_dma_base =
1023 bl_dmem_desc.code_dma_base +
1024 ((acr_ucode_header_t210_load[2]) >> 8);
1025 bl_dmem_desc.data_size = acr_ucode_header_t210_load[3];
1026 status = pmu_exec_gen_bl(g, &bl_dmem_desc, 1);
1027 if (status != 0) {
1028 err = status;
1029 goto err_free_ucode_map;
1030 }
1031 return 0;
1032err_free_ucode_map:
1033 gk20a_gmmu_unmap(vm, pacr_ucode_pmu_va,
1034 img_size_in_bytes, gk20a_mem_flag_none);
1035err_free_ucode_sgt:
1036 gk20a_free_sgtable(&sgt_pmu_ucode);
1037err_free_acr_buf:
1038 dma_free_coherent(d, img_size_in_bytes,
1039 pacr_ucode_cpuva, iova);
1040 return err;
1041}
1042
1043u8 pmu_is_debug_mode_en(struct gk20a *g)
1044{
1045 int ctl_stat = gk20a_readl(g, pwr_pmu_scpctl_stat_r());
1046 return 1;
1047/*TODO return (ctl_stat & pwr_pmu_scpctl_stat_debug_mode_m());*/
1048}
1049
1050/*
1051 * @brief Patch signatures into ucode image
1052 */
1053static int
1054acr_ucode_patch_sig(struct gk20a *g,
1055 unsigned int *p_img,
1056 unsigned int *p_prod_sig,
1057 unsigned int *p_dbg_sig,
1058 unsigned int *p_patch_loc,
1059 unsigned int *p_patch_ind)
1060{
1061 int i, *p_sig;
1062 gm20b_dbg_pmu("");
1063
1064 if (!pmu_is_debug_mode_en(g)) {
1065 p_sig = p_prod_sig;
1066 gm20b_dbg_pmu("PRODUCTION MODE\n");
1067 } else {
1068 p_sig = p_dbg_sig;
1069 gm20b_dbg_pmu("DEBUG MODE\n");
1070 }
1071
1072 /* Patching logic:*/
1073 for (i = 0; i < sizeof(*p_patch_loc)>>2; i++) {
1074 p_img[(p_patch_loc[i]>>2)] = p_sig[(p_patch_ind[i]<<2)];
1075 p_img[(p_patch_loc[i]>>2)+1] = p_sig[(p_patch_ind[i]<<2)+1];
1076 p_img[(p_patch_loc[i]>>2)+2] = p_sig[(p_patch_ind[i]<<2)+2];
1077 p_img[(p_patch_loc[i]>>2)+3] = p_sig[(p_patch_ind[i]<<2)+3];
1078 }
1079 return 0;
1080}
1081
1082static int bl_bootstrap(struct pmu_gk20a *pmu,
1083 struct flcn_bl_dmem_desc *pbl_desc, u32 bl_sz)
1084{
1085 struct gk20a *g = pmu->g;
1086 struct mm_gk20a *mm = &g->mm;
1087 struct pmu_ucode_desc *desc = pmu->desc;
1088 u32 imem_dst_blk = 0;
1089 u32 virt_addr = 0;
1090 u32 tag = 0;
1091 u32 index = 0;
1092 struct hsflcn_bl_desc *pmu_bl_gm10x_desc = g->acr.pmu_hsbl_desc;
1093 u32 *bl_ucode;
1094
1095 gk20a_dbg_fn("");
1096 gk20a_writel(g, pwr_falcon_itfen_r(),
1097 gk20a_readl(g, pwr_falcon_itfen_r()) |
1098 pwr_falcon_itfen_ctxen_enable_f());
1099 gk20a_writel(g, pwr_pmu_new_instblk_r(),
1100 pwr_pmu_new_instblk_ptr_f(
1101 mm->pmu.inst_block.cpu_pa >> 12) |
1102 pwr_pmu_new_instblk_valid_f(1) |
1103 pwr_pmu_new_instblk_target_sys_coh_f());
1104
1105 /* TBD: load all other surfaces */
1106 /*copy bootloader interface structure to dmem*/
1107 gk20a_writel(g, pwr_falcon_dmemc_r(0),
1108 pwr_falcon_dmemc_offs_f(0) |
1109 pwr_falcon_dmemc_blk_f(0) |
1110 pwr_falcon_dmemc_aincw_f(1));
1111 pmu_copy_to_dmem(pmu, 0, (u8 *)pbl_desc,
1112 sizeof(struct flcn_bl_dmem_desc), 0);
1113 /*TODO This had to be copied to bl_desc_dmem_load_off, but since
1114 * this is 0, so ok for now*/
1115
1116 /* Now copy bootloader to TOP of IMEM */
1117 imem_dst_blk = (pwr_falcon_hwcfg_imem_size_v(
1118 gk20a_readl(g, pwr_falcon_hwcfg_r()))) - bl_sz/256;
1119
1120 /* Set Auto-Increment on write */
1121 gk20a_writel(g, pwr_falcon_imemc_r(0),
1122 pwr_falcon_imemc_offs_f(0) |
1123 pwr_falcon_imemc_blk_f(imem_dst_blk) |
1124 pwr_falcon_imemc_aincw_f(1));
1125 virt_addr = pmu_bl_gm10x_desc->bl_start_tag << 8;
1126 tag = virt_addr >> 8; /* tag is always 256B aligned */
1127 bl_ucode = (u32 *)(pmu->ucode.cpuva);
1128 for (index = 0; index < bl_sz/4; index++) {
1129 if ((index % 64) == 0) {
1130 gk20a_writel(g, pwr_falcon_imemt_r(0),
1131 (tag & 0xffff) << 0);
1132 tag++;
1133 }
1134 gk20a_writel(g, pwr_falcon_imemd_r(0),
1135 bl_ucode[index] & 0xffffffff);
1136 }
1137
1138 gk20a_writel(g, pwr_falcon_imemt_r(0), (0 & 0xffff) << 0);
1139 gm20b_dbg_pmu("Before starting falcon with BL\n");
1140
1141 gk20a_writel(g, pwr_falcon_bootvec_r(),
1142 pwr_falcon_bootvec_vec_f(virt_addr));
1143
1144 gk20a_writel(g, pwr_falcon_cpuctl_r(),
1145 pwr_falcon_cpuctl_startcpu_f(1));
1146
1147 gk20a_writel(g, pwr_falcon_os_r(), desc->app_version);
1148
1149 return 0;
1150}
1151
1152int gm20b_init_pmu_setup_hw1(struct gk20a *g, struct flcn_bl_dmem_desc *desc,
1153 u32 bl_sz)
1154{
1155 struct pmu_gk20a *pmu = &g->pmu;
1156 int err;
1157
1158 gk20a_dbg_fn("");
1159 pmu_reset(pmu);
1160
1161 /* setup apertures - virtual */
1162 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
1163 pwr_fbif_transcfg_mem_type_virtual_f());
1164 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_VIRT),
1165 pwr_fbif_transcfg_mem_type_virtual_f());
1166 /* setup apertures - physical */
1167 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_VID),
1168 pwr_fbif_transcfg_mem_type_physical_f() |
1169 pwr_fbif_transcfg_target_local_fb_f());
1170 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_COH),
1171 pwr_fbif_transcfg_mem_type_physical_f() |
1172 pwr_fbif_transcfg_target_coherent_sysmem_f());
1173 gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH),
1174 pwr_fbif_transcfg_mem_type_physical_f() |
1175 pwr_fbif_transcfg_target_noncoherent_sysmem_f());
1176
1177 err = bl_bootstrap(pmu, desc, bl_sz);
1178 if (err)
1179 return err;
1180 return 0;
1181}
1182
1183/*
1184* Executes a generic bootloader and wait for PMU to halt.
1185* This BL will be used for those binaries that are loaded
1186* and executed at times other than RM PMU Binary execution.
1187*
1188* @param[in] g gk20a pointer
1189* @param[in] desc Bootloader descriptor
1190* @param[in] dma_idx DMA Index
1191* @param[in] b_wait_for_halt Wait for PMU to HALT
1192*/
1193int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt)
1194{
1195 struct pmu_gk20a *pmu = &g->pmu;
1196 struct mm_gk20a *mm = &g->mm;
1197 struct vm_gk20a *vm = &mm->pmu.vm;
1198 struct device *d = dev_from_gk20a(g);
1199 int i, err = 0;
1200 struct sg_table *sgt_pmu_ucode;
1201 dma_addr_t iova;
1202 u32 bl_sz;
1203 void *bl_cpuva;
1204 u64 bl_pmu_va;
1205 const struct firmware *hsbl_fw;
1206 struct acr_gm20b *acr = &g->acr;
1207 struct hsflcn_bl_desc *pmu_bl_gm10x_desc;
1208 u32 *pmu_bl_gm10x = NULL;
1209 DEFINE_DMA_ATTRS(attrs);
1210 gm20b_dbg_pmu("");
1211
1212 hsbl_fw = gk20a_request_firmware(g, GM20B_HSBIN_PMU_BL_UCODE_IMAGE);
1213 if (!hsbl_fw) {
1214 gk20a_err(dev_from_gk20a(g), "failed to load pmu ucode!!");
1215 return -ENOENT;
1216 }
1217 acr->bl_bin_hdr = (struct bin_hdr *)hsbl_fw->data;
1218 acr->pmu_hsbl_desc = (struct hsflcn_bl_desc *)(hsbl_fw->data +
1219 acr->bl_bin_hdr->header_offset);
1220 pmu_bl_gm10x_desc = acr->pmu_hsbl_desc;
1221 pmu_bl_gm10x = (u32 *)(hsbl_fw->data + acr->bl_bin_hdr->data_offset);
1222 bl_sz = ALIGN(pmu_bl_gm10x_desc->bl_img_hdr.bl_code_size,
1223 256);
1224 gm20b_dbg_pmu("Executing Generic Bootloader\n");
1225
1226 /*TODO in code verify that enable PMU is done, scrubbing etc is done*/
1227 /*TODO in code verify that gmmu vm init is done*/
1228 /*
1229 * Disable interrupts to avoid kernel hitting breakpoint due
1230 * to PMU halt
1231 */
1232
1233 gk20a_writel(g, pwr_falcon_irqsclr_r(),
1234 gk20a_readl(g, pwr_falcon_irqsclr_r()) & (~(0x10)));
1235
1236 dma_set_attr(DMA_ATTR_READ_ONLY, &attrs);
1237 bl_cpuva = dma_alloc_attrs(d, bl_sz,
1238 &iova,
1239 GFP_KERNEL,
1240 &attrs);
1241 gm20b_dbg_pmu("bl size is %x\n", bl_sz);
1242 if (!bl_cpuva) {
1243 gk20a_err(d, "failed to allocate memory\n");
1244 err = -ENOMEM;
1245 goto err_done;
1246 }
1247
1248 err = gk20a_get_sgtable(d, &sgt_pmu_ucode,
1249 bl_cpuva,
1250 iova,
1251 bl_sz);
1252 if (err) {
1253 gk20a_err(d, "failed to allocate sg table\n");
1254 goto err_free_cpu_va;
1255 }
1256
1257 bl_pmu_va = gk20a_gmmu_map(vm, &sgt_pmu_ucode,
1258 bl_sz,
1259 0, /* flags */
1260 gk20a_mem_flag_read_only);
1261 if (!bl_pmu_va) {
1262 gk20a_err(d, "failed to map pmu ucode memory!!");
1263 goto err_free_ucode_sgt;
1264 }
1265
1266 for (i = 0; i < (bl_sz) >> 2; i++)
1267 gk20a_mem_wr32(bl_cpuva, i, pmu_bl_gm10x[i]);
1268 gm20b_dbg_pmu("Copied bl ucode to bl_cpuva\n");
1269 pmu->ucode.cpuva = bl_cpuva;
1270 pmu->ucode.pmu_va = bl_pmu_va;
1271 gm20b_init_pmu_setup_hw1(g, desc, bl_sz);
1272 /* Poll for HALT */
1273 if (b_wait_for_halt) {
1274 err = pmu_wait_for_halt(g, GPU_TIMEOUT_DEFAULT);
1275 if (err == 0)
1276 /* Clear the HALT interrupt */
1277 gk20a_writel(g, pwr_falcon_irqsclr_r(),
1278 gk20a_readl(g, pwr_falcon_irqsclr_r()) & (~(0x10)));
1279 else
1280 goto err_unmap_bl;
1281 }
1282 gm20b_dbg_pmu("after waiting for halt, err %x\n", err);
1283 gm20b_dbg_pmu("err reg :%x\n", readl(mc +
1284 MC_ERR_GENERALIZED_CARVEOUT_STATUS_0));
1285 gm20b_dbg_pmu("phys sec reg %x\n", gk20a_readl(g,
1286 pwr_falcon_mmu_phys_sec_r()));
1287 gm20b_dbg_pmu("sctl reg %x\n", gk20a_readl(g, pwr_falcon_sctl_r()));
1288 start_gm20b_pmu(g);
1289 err = 0;
1290err_unmap_bl:
1291 gk20a_gmmu_unmap(vm, pmu->ucode.pmu_va,
1292 bl_sz, gk20a_mem_flag_none);
1293err_free_ucode_sgt:
1294 gk20a_free_sgtable(&sgt_pmu_ucode);
1295err_free_cpu_va:
1296 dma_free_attrs(d, bl_sz,
1297 bl_cpuva, iova, &attrs);
1298err_done:
1299 return err;
1300}
1301
1302/*!
1303* Wait for PMU to halt
1304* @param[in] g GPU object pointer
1305* @param[in] timeout_us Timeout in Us for PMU to halt
1306* @return '0' if PMU halts
1307*/
1308int pmu_wait_for_halt(struct gk20a *g, unsigned int timeout)
1309{
1310 u32 data = 0;
1311 udelay(10);
1312 data = gk20a_readl(g, pwr_falcon_cpuctl_r());
1313 gm20b_dbg_pmu("bef while cpuctl %xi, timeout %d\n", data, timeout);
1314 while (timeout != 0) {
1315 data = gk20a_readl(g, pwr_falcon_cpuctl_r());
1316 if (data & pwr_falcon_cpuctl_halt_intr_m())
1317 /*CPU is halted break*/
1318 break;
1319 timeout--;
1320 udelay(1);
1321 }
1322 if (timeout == 0)
1323 return -EBUSY;
1324 return 0;
1325}
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.h b/drivers/gpu/nvgpu/gm20b/acr_gm20b.h
new file mode 100644
index 00000000..e0dd50d0
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.h
@@ -0,0 +1,377 @@
1/*
2 * GM20B ACR
3 *
4 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#ifndef __ACR_GM20B_H_
17#define __ACR_GM20B_H_
18#include "gk20a/gk20a.h"
19#include "mm_gm20b.h"
20
21/*Defines*/
22
23/*chip specific defines*/
24#define MAX_SUPPORTED_LSFM 1 /*PMU, FECS, GPCCS*/
25#define LSF_UCODE_DATA_ALIGNMENT 4096
26
27#define GM20B_PMU_UCODE_IMAGE "gpmu_ucode.bin"
28#define GM20B_HSBIN_PMU_UCODE_IMAGE "acr_ucode.bin"
29#define GM20B_HSBIN_PMU_BL_UCODE_IMAGE "pmu_bl.bin"
30
31#define LSFM_DISABLE_MASK_NONE (0x00000000) /*Disable all LS falcons*/
32#define LSFM_DISABLE_MASK_ALL (0xFFFFFFFF) /*Enable all LS falcons*/
33
34#define PMU_SECURE_MODE (0x1)
35#define PMU_LSFM_MANAGED (0x2)
36
37/*ACR load related*/
38/*!
39 * Supporting maximum of 2 regions.
40 * This is needed to pre-allocate space in DMEM
41 */
42#define T210_FLCN_ACR_MAX_REGIONS (2)
43#define LSF_BOOTSTRAP_OWNER_RESERVED_DMEM_SIZE (0x200)
44
45/*!
46 * Falcon Id Defines
47 * Defines a common Light Secure Falcon identifier.
48 */
49#define LSF_FALCON_ID_PMU (0)
50#define LSF_FALCON_ID_FECS (2)
51#define LSF_FALCON_ID_GPCCS (3)
52#define LSF_FALCON_ID_INVALID (0xFFFFFFFF)
53
54/*!
55 * Bootstrap Owner Defines
56 */
57#define LSF_BOOTSTRAP_OWNER_DEFAULT (LSF_FALCON_ID_PMU)
58
59/*!
60 * Image Status Defines
61 */
62#define LSF_IMAGE_STATUS_NONE (0)
63#define LSF_IMAGE_STATUS_COPY (1)
64#define LSF_IMAGE_STATUS_VALIDATION (2)
65#define LSF_IMAGE_STATUS_BOOTSTRAP_READY (3)
66
67/*LSB header related defines*/
68#define NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_FALSE 0
69#define NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_TRUE 1
70#define NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_FALSE 0
71#define NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE 4
72
73/*!
74 * Light Secure WPR Content Alignments
75 */
76#define LSF_LSB_HEADER_ALIGNMENT 256
77#define LSF_BL_DATA_ALIGNMENT 256
78#define LSF_BL_DATA_SIZE_ALIGNMENT 256
79#define LSF_BL_CODE_SIZE_ALIGNMENT 256
80
81/*!
82 * Falcon UCODE header index.
83 */
84#define FLCN_NL_UCODE_HDR_OS_CODE_OFF_IND (0)
85#define FLCN_NL_UCODE_HDR_OS_CODE_SIZE_IND (1)
86#define FLCN_NL_UCODE_HDR_OS_DATA_OFF_IND (2)
87#define FLCN_NL_UCODE_HDR_OS_DATA_SIZE_IND (3)
88#define FLCN_NL_UCODE_HDR_NUM_APPS_IND (4)
89/*!
90 * There are total N number of Apps with code and offset defined in UCODE header
91 * This macro provides the CODE and DATA offset and size of Ath application.
92 */
93#define FLCN_NL_UCODE_HDR_APP_CODE_START_IND (5)
94#define FLCN_NL_UCODE_HDR_APP_CODE_OFF_IND(N, A) \
95 (FLCN_NL_UCODE_HDR_APP_CODE_START_IND + (A*2))
96#define FLCN_NL_UCODE_HDR_APP_CODE_SIZE_IND(N, A) \
97 (FLCN_NL_UCODE_HDR_APP_CODE_START_IND + (A*2) + 1)
98#define FLCN_NL_UCODE_HDR_APP_CODE_END_IND(N) \
99 (FLCN_NL_UCODE_HDR_APP_CODE_START_IND + (N*2) - 1)
100
101#define FLCN_NL_UCODE_HDR_APP_DATA_START_IND(N) \
102 (FLCN_NL_UCODE_HDR_APP_CODE_END_IND(N) + 1)
103#define FLCN_NL_UCODE_HDR_APP_DATA_OFF_IND(N, A) \
104 (FLCN_NL_UCODE_HDR_APP_DATA_START_IND(N) + (A*2))
105#define FLCN_NL_UCODE_HDR_APP_DATA_SIZE_IND(N, A) \
106 (FLCN_NL_UCODE_HDR_APP_DATA_START_IND(N) + (A*2) + 1)
107#define FLCN_NL_UCODE_HDR_APP_DATA_END_IND(N) \
108 (FLCN_NL_UCODE_HDR_APP_DATA_START_IND(N) + (N*2) - 1)
109
110#define FLCN_NL_UCODE_HDR_OS_OVL_OFF_IND(N) \
111 (FLCN_NL_UCODE_HDR_APP_DATA_END_IND(N) + 1)
112#define FLCN_NL_UCODE_HDR_OS_OVL_SIZE_IND(N) \
113 (FLCN_NL_UCODE_HDR_APP_DATA_END_IND(N) + 2)
114
115/*Externs*/
116
117/*Structs*/
118
119/*!
120 * Light Secure Falcon Ucode Description Defines
121 * This stucture is prelim and may change as the ucode signing flow evolves.
122 */
123struct lsf_ucode_desc {
124 u8 prd_keys[2][16];
125 u8 dbg_keys[2][16];
126 u32 b_prd_present;
127 u32 b_dbg_present;
128 u32 falcon_id;
129};
130
131/*!
132 * Light Secure WPR Header
133 * Defines state allowing Light Secure Falcon bootstrapping.
134 *
135 * falcon_id - LS falcon ID
136 * lsb_offset - Offset into WPR region holding LSB header
137 * bootstrap_owner - Bootstrap OWNER (either PMU or SEC2)
138 * lazy_bootstrap - Skip bootstrapping by ACR
139 * status - Bootstrapping status
140 */
141struct lsf_wpr_header {
142 u32 falcon_id;
143 u32 lsb_offset;
144 u32 bootstrap_owner;
145 u32 lazy_bootstrap;
146 u32 status;
147};
148
149struct lsf_lsb_header {
150 struct lsf_ucode_desc signature;
151 u32 ucode_off;
152 u32 ucode_size;
153 u32 data_size;
154 u32 bl_code_size;
155 u32 bl_imem_off;
156 u32 bl_data_off;
157 u32 bl_data_size;
158 u32 flags;
159};
160
161/*!
162 * Structure used by the boot-loader to load the rest of the code. This has
163 * to be filled by host and copied into DMEM at offset provided in the
164 * hsflcn_bl_desc.bl_desc_dmem_load_off.
165 *
166 * signature - 16B signature for secure code. 0s if no secure code
167 * ctx_dma - CtxDma to be used by BL while loading code/data
168 * code_dma_base - 256B aligned Physical FB Address where code is located
169 * non_sec_code_off - Offset from code_dma_base where the nonSecure code is
170 * located. The offset must be multiple of 256 to help perf
171 * non_sec_code_size - The size of the nonSecure code part.
172 * sec_code_size - Offset from code_dma_base where the secure code is
173 * located. The offset must be multiple of 256 to help perf
174 * code_entry_point - Code entry point which will be invoked by BL after
175 * code is loaded.
176 * data_dma_base - 256B aligned Physical FB Address where data is located.
177 * data_size - Size of data block. Should be multiple of 256B
178 */
179struct flcn_bl_dmem_desc {
180 u32 signature[4]; /*Should be the first element..*/
181 u32 ctx_dma;
182 u32 code_dma_base;
183 u32 non_sec_code_off;
184 u32 non_sec_code_size;
185 u32 sec_code_off;
186 u32 sec_code_size;
187 u32 code_entry_point;
188 u32 data_dma_base;
189 u32 data_size;
190};
191
192/*!
193 * Legacy structure used by the current PMU/DPU bootloader.
194 */
195struct loader_config {
196 u32 dma_idx;
197 u32 code_dma_base; /*<! upper 32-bits of 40-bit dma address*/
198 u32 code_size_total;
199 u32 code_size_to_load;
200 u32 code_entry_point;
201 u32 data_dma_base; /*<! upper 32-bits of 40-bit dma address*/
202 u32 data_size; /*<! initialized data of the application */
203 u32 overlay_dma_base; /*<! upper 32-bits of the 40-bit dma address*/
204 u32 argc;
205 u32 argv;
206};
207
208/*!
209 * Union of all supported structures used by bootloaders.
210 */
211union flcn_bl_generic_desc {
212 struct flcn_bl_dmem_desc bl_dmem_desc;
213 struct loader_config loader_cfg;
214};
215
216struct flcn_ucode_img {
217 u32 *header; /*only some falcons have header*/
218 u32 *data;
219 struct pmu_ucode_desc *desc; /*only some falcons have descriptor*/
220 u32 data_size;
221 void *fw_ver; /*NV2080_CTRL_GPU_GET_FIRMWARE_VERSION_PARAMS struct*/
222 u8 load_entire_os_data; /* load the whole osData section at boot time.*/
223 struct lsf_ucode_desc *lsf_desc; /* NULL if not a light secure falcon.*/
224 u8 free_res_allocs;/*True if there a resources to freed by the client.*/
225 u32 flcn_inst;
226};
227
228/*!
229 * LSFM Managed Ucode Image
230 * next : Next image the list, NULL if last.
231 * wpr_header : WPR header for this ucode image
232 * lsb_header : LSB header for this ucode image
233 * bl_gen_desc : Bootloader generic desc structure for this ucode image
234 * bl_gen_desc_size : Sizeof bootloader desc structure for this ucode image
235 * full_ucode_size : Surface size required for final ucode image
236 * ucode_img : Ucode image info
237 */
238struct lsfm_managed_ucode_img {
239 struct lsfm_managed_ucode_img *next;
240 struct lsf_wpr_header wpr_header;
241 struct lsf_lsb_header lsb_header;
242 union flcn_bl_generic_desc bl_gen_desc;
243 u32 bl_gen_desc_size;
244 u32 full_ucode_size;
245 struct flcn_ucode_img ucode_img;
246};
247
248struct ls_flcn_mgr {
249 u16 managed_flcn_cnt;
250 u32 wpr_size;
251 u32 disable_mask;
252 struct lsfm_managed_ucode_img *ucode_img_list;
253 void *wpr_client_req_state;/*PACR_CLIENT_REQUEST_STATE originally*/
254};
255
256/*ACR related structs*/
257/*!
258 * start_addr - Starting address of region
259 * end_addr - Ending address of region
260 * region_id - Region ID
261 * read_mask - Read Mask
262 * write_mask - WriteMask
263 * client_mask - Bit map of all clients currently using this region
264 */
265struct flcn_acr_region_prop {
266 u32 start_addr;
267 u32 end_addr;
268 u32 region_id;
269 u32 read_mask;
270 u32 write_mask;
271 u32 client_mask;
272};
273
274/*!
275 * no_regions - Number of regions used.
276 * region_props - Region properties
277 */
278struct flcn_acr_regions {
279 u32 no_regions;
280 struct flcn_acr_region_prop region_props[T210_FLCN_ACR_MAX_REGIONS];
281};
282
283/*!
284 * reserved_dmem-When the bootstrap owner has done bootstrapping other falcons,
285 * and need to switch into LS mode, it needs to have its own
286 * actual DMEM image copied into DMEM as part of LS setup. If
287 * ACR desc is at location 0, it will definitely get overwritten
288 * causing data corruption. Hence we are reserving 0x200 bytes
289 * to give room for any loading data. NOTE: This has to be the
290 * first member always
291 * signature - Signature of ACR ucode.
292 * wpr_region_id - Region ID holding the WPR header and its details
293 * wpr_offset - Offset from the WPR region holding the wpr header
294 * regions - Region descriptors
295 * nonwpr_ucode_blob_start -stores non-WPR start where kernel stores ucode blob
296 * nonwpr_ucode_blob_end -stores non-WPR end where kernel stores ucode blob
297 */
298struct flcn_acr_desc {
299 u32 reserved_dmem[(LSF_BOOTSTRAP_OWNER_RESERVED_DMEM_SIZE/4)];
300 /*Always 1st*/
301 u32 wpr_region_id;
302 u32 wpr_offset;
303 struct flcn_acr_regions regions;
304 u32 nonwpr_ucode_blob_start;
305 u32 nonwpr_ucode_blob_size;
306};
307
308/*!
309 * The header used by RM to figure out code and data sections of bootloader.
310 *
311 * bl_code_off - Offset of code section in the image
312 * bl_code_size - Size of code section in the image
313 * bl_data_off - Offset of data section in the image
314 * bl_data_size - Size of data section in the image
315 */
316struct hsflcn_bl_img_hdr {
317 u32 bl_code_off;
318 u32 bl_code_size;
319 u32 bl_data_off;
320 u32 bl_data_size;
321};
322
323/*!
324 * The descriptor used by RM to figure out the requirements of boot loader.
325 *
326 * bl_start_tag - Starting tag of bootloader
327 * bl_desc_dmem_load_off - Dmem offset where _def_rm_flcn_bl_dmem_desc
328 to be loaded
329 * bl_img_hdr - Description of the image
330 */
331struct hsflcn_bl_desc {
332 u32 bl_start_tag;
333 u32 bl_desc_dmem_load_off;
334 struct hsflcn_bl_img_hdr bl_img_hdr;
335};
336
337struct bin_hdr {
338 u32 bin_magic; /* 0x10de */
339 u32 bin_ver; /* versioning of bin format */
340 u32 bin_size; /* entire image size including this header */
341 u32 header_offset; /* Header offset of executable binary metadata,
342 start @ offset- 0x100 */
343 u32 data_offset; /* Start of executable binary data, start @
344 offset- 0x200 */
345 u32 data_size; /* Size ofexecutable binary */
346};
347
348struct acr_fw_header {
349 u32 sig_dbg_offset;
350 u32 sig_dbg_size;
351 u32 sig_prod_offset;
352 u32 sig_prod_size;
353 u32 patch_loc;
354 u32 patch_sig;
355 u32 hdr_offset; /*this header points to acr_ucode_header_t210_load*/
356 u32 hdr_size; /*size of above header*/
357};
358
359struct acr_gm20b {
360 u64 ucode_blob_start;
361 u32 ucode_blob_size;
362 struct bin_hdr *bl_bin_hdr;
363 struct hsflcn_bl_desc *pmu_hsbl_desc;
364 struct bin_hdr *hsbin_hdr;
365 struct acr_fw_header *fw_hdr;
366};
367
368void gm20b_init_secure_pmu(struct gpu_ops *gops);
369int prepare_ucode_blob(struct gk20a *g);
370int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img);
371int fecs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img);
372int gpccs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img);
373int gm20b_bootstrap_hs_flcn(struct gk20a *g);
374int gm20b_pmu_setup_sw(struct gk20a *g);
375int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt);
376int pmu_wait_for_halt(struct gk20a *g, unsigned int timeout_us);
377#endif /*__ACR_GM20B_H_*/
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index 0fc5fe99..0d6b0447 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -25,6 +25,7 @@
25#include "fifo_gm20b.h" 25#include "fifo_gm20b.h"
26#include "gr_ctx_gm20b.h" 26#include "gr_ctx_gm20b.h"
27#include "mm_gm20b.h" 27#include "mm_gm20b.h"
28#include "pmu_gm20b.h"
28 29
29struct gpu_ops gm20b_ops = { 30struct gpu_ops gm20b_ops = {
30 .clock_gating = { 31 .clock_gating = {
@@ -51,6 +52,7 @@ int gm20b_init_hal(struct gpu_ops *gops)
51 gm20b_init_fifo(gops); 52 gm20b_init_fifo(gops);
52 gm20b_init_gr_ctx(gops); 53 gm20b_init_gr_ctx(gops);
53 gm20b_init_mm(gops); 54 gm20b_init_mm(gops);
55 gm20b_init_pmu_ops(gops);
54 gops->name = "gm20b"; 56 gops->name = "gm20b";
55 57
56 return 0; 58 return 0;
diff --git a/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h
index 39259516..bf0b1ffd 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h
@@ -202,4 +202,24 @@ static inline u32 fb_mmu_debug_ctrl_debug_enabled_v(void)
202{ 202{
203 return 0x00000001; 203 return 0x00000001;
204} 204}
205static inline u32 fb_mmu_vpr_info_r(void)
206{
207 return 0x00100cd0;
208}
209static inline u32 fb_mmu_vpr_info_fetch_f(u32 v)
210{
211 return (v & 0x1) << 2;
212}
213static inline u32 fb_mmu_vpr_info_fetch_v(u32 r)
214{
215 return (r >> 2) & 0x1;
216}
217static inline u32 fb_mmu_vpr_info_fetch_false_v(void)
218{
219 return 0x00000000;
220}
221static inline u32 fb_mmu_vpr_info_fetch_true_v(void)
222{
223 return 0x00000001;
224}
205#endif 225#endif
diff --git a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
index a4ae1ec0..eb6cf4ad 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
@@ -322,6 +322,14 @@ static inline u32 gr_fecs_cpuctl_startcpu_f(u32 v)
322{ 322{
323 return (v & 0x1) << 1; 323 return (v & 0x1) << 1;
324} 324}
325static inline u32 gr_fecs_cpuctl_alias_r(void)
326{
327 return 0x00409130;
328}
329static inline u32 gr_fecs_cpuctl_alias_startcpu_f(u32 v)
330{
331 return (v & 0x1) << 1;
332}
325static inline u32 gr_fecs_dmactl_r(void) 333static inline u32 gr_fecs_dmactl_r(void)
326{ 334{
327 return 0x0040910c; 335 return 0x0040910c;
diff --git a/drivers/gpu/nvgpu/gm20b/hw_pwr_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_pwr_gm20b.h
index 3af9cda8..384a9ab5 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_pwr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_pwr_gm20b.h
@@ -290,6 +290,86 @@ static inline u32 pwr_falcon_cpuctl_startcpu_f(u32 v)
290{ 290{
291 return (v & 0x1) << 1; 291 return (v & 0x1) << 1;
292} 292}
293static inline u32 pwr_falcon_cpuctl_halt_intr_f(u32 v)
294{
295 return (v & 0x1) << 4;
296}
297static inline u32 pwr_falcon_cpuctl_halt_intr_m(void)
298{
299 return 0x1 << 4;
300}
301static inline u32 pwr_falcon_cpuctl_halt_intr_v(u32 r)
302{
303 return (r >> 4) & 0x1;
304}
305static inline u32 pwr_falcon_cpuctl_cpuctl_alias_en_f(u32 v)
306{
307 return (v & 0x1) << 6;
308}
309static inline u32 pwr_falcon_cpuctl_cpuctl_alias_en_m(void)
310{
311 return 0x1 << 6;
312}
313static inline u32 pwr_falcon_cpuctl_cpuctl_alias_en_v(u32 r)
314{
315 return (r >> 6) & 0x1;
316}
317static inline u32 pwr_falcon_cpuctl_alias_r(void)
318{
319 return 0x0010a130;
320}
321static inline u32 pwr_falcon_cpuctl_alias_startcpu_f(u32 v)
322{
323 return (v & 0x1) << 1;
324}
325static inline u32 pwr_pmu_scpctl_stat_r(void)
326{
327 return 0x0010ac08;
328}
329static inline u32 pwr_pmu_scpctl_stat_debug_mode_f(u32 v)
330{
331 return (v & 0x1) << 20;
332}
333static inline u32 pwr_pmu_scpctl_stat_debug_mode_m(void)
334{
335 return 0x1 << 20;
336}
337static inline u32 pwr_pmu_scpctl_stat_debug_mode_v(u32 r)
338{
339 return (r >> 20) & 0x1;
340}
341static inline u32 pwr_falcon_imemc_r(u32 i)
342{
343 return 0x0010a180 + i*16;
344}
345static inline u32 pwr_falcon_imemc_offs_f(u32 v)
346{
347 return (v & 0x3f) << 2;
348}
349static inline u32 pwr_falcon_imemc_blk_f(u32 v)
350{
351 return (v & 0xff) << 8;
352}
353static inline u32 pwr_falcon_imemc_aincw_f(u32 v)
354{
355 return (v & 0x1) << 24;
356}
357static inline u32 pwr_falcon_imemd_r(u32 i)
358{
359 return 0x0010a184 + i*16;
360}
361static inline u32 pwr_falcon_imemt_r(u32 i)
362{
363 return 0x0010a188 + i*16;
364}
365static inline u32 pwr_falcon_sctl_r(void)
366{
367 return 0x0010a240;
368}
369static inline u32 pwr_falcon_mmu_phys_sec_r(void)
370{
371 return 0x00100ce4;
372}
293static inline u32 pwr_falcon_bootvec_r(void) 373static inline u32 pwr_falcon_bootvec_r(void)
294{ 374{
295 return 0x0010a104; 375 return 0x0010a104;
diff --git a/drivers/gpu/nvgpu/gm20b/mc_carveout_reg.h b/drivers/gpu/nvgpu/gm20b/mc_carveout_reg.h
new file mode 100644
index 00000000..a9273a62
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/mc_carveout_reg.h
@@ -0,0 +1,22 @@
1/*
2 * GM20B MC registers used by ACR
3 *
4 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#ifndef _MC_CARVEOUT_REG_H_
17#define _MC_CARVEOUT_REG_H_
18
19#define MC_SECURITY_CARVEOUT2_BOM_0 0xc5c
20#define MC_SECURITY_CARVEOUT3_BOM_0 0xcac
21#define MC_ERR_GENERALIZED_CARVEOUT_STATUS_0 0xc00
22#endif /*_MC_CARVEOUT_REG_H_*/
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
index 67d61569..2c211a57 100644
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -13,9 +13,11 @@
13 * more details. 13 * more details.
14 */ 14 */
15 15
16#include <linux/pm_runtime.h>
16#include "gk20a/gk20a.h" 17#include "gk20a/gk20a.h"
17#include "mm_gm20b.h" 18#include "mm_gm20b.h"
18#include "hw_gmmu_gm20b.h" 19#include "hw_gmmu_gm20b.h"
20#include "hw_fb_gm20b.h"
19 21
20static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K }; 22static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K };
21static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 }; 23static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 };
@@ -24,8 +26,8 @@ static const u64 gmmu_page_offset_masks[gmmu_nr_page_sizes] = { 0xfffLL,
24static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL }; 26static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL };
25 27
26static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, 28static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
27 enum gmmu_pgsz_gk20a pgsz_idx, 29 enum gmmu_pgsz_gk20a pgsz_idx,
28 u64 first_vaddr, u64 last_vaddr) 30 u64 first_vaddr, u64 last_vaddr)
29{ 31{
30 int err; 32 int err;
31 u32 pte_lo, pte_hi; 33 u32 pte_lo, pte_hi;
@@ -39,10 +41,10 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
39 gk20a_dbg_fn(""); 41 gk20a_dbg_fn("");
40 42
41 pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr, 43 pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr,
42 &pde_lo, &pde_hi); 44 &pde_lo, &pde_hi);
43 45
44 gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d", 46 gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d",
45 pgsz_idx, pde_lo, pde_hi); 47 pgsz_idx, pde_lo, pde_hi);
46 48
47 /* Expect ptes of the same pde */ 49 /* Expect ptes of the same pde */
48 BUG_ON(pde_lo != pde_hi); 50 BUG_ON(pde_lo != pde_hi);
@@ -185,7 +187,8 @@ static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
185 vaddr_pde_start = (u64)i << pde_shift; 187 vaddr_pde_start = (u64)i << pde_shift;
186 allocate_gmmu_ptes_sparse(vm, pgsz_idx, 188 allocate_gmmu_ptes_sparse(vm, pgsz_idx,
187 vaddr_pde_start, 189 vaddr_pde_start,
188 PDE_ADDR_END(vaddr_pde_start, pde_shift)); 190 PDE_ADDR_END(vaddr_pde_start,
191 pde_shift));
189 } else { 192 } else {
190 /* Check leading and trailing spaces which doesn't fit 193 /* Check leading and trailing spaces which doesn't fit
191 * into entire pde. */ 194 * into entire pde. */
@@ -212,6 +215,56 @@ fail:
212 return err; 215 return err;
213} 216}
214 217
218static int gm20b_mm_mmu_vpr_info_fetch_wait(struct gk20a *g,
219 const unsigned int msec)
220{
221 unsigned long timeout;
222
223 if (tegra_platform_is_silicon())
224 timeout = jiffies + msecs_to_jiffies(msec);
225 else
226 timeout = msecs_to_jiffies(msec);
227
228 while (1) {
229 u32 val;
230 val = gk20a_readl(g, fb_mmu_vpr_info_r());
231 if (fb_mmu_vpr_info_fetch_v(val) ==
232 fb_mmu_vpr_info_fetch_false_v())
233 break;
234 if (tegra_platform_is_silicon()) {
235 if (WARN_ON(time_after(jiffies, timeout)))
236 return -ETIME;
237 } else if (--timeout == 0)
238 return -ETIME;
239 }
240 return 0;
241}
242
243int gm20b_mm_mmu_vpr_info_fetch(struct gk20a *g)
244{
245 int ret = 0;
246
247 gk20a_busy_noresume(g->dev);
248#ifdef CONFIG_PM_RUNTIME
249 if (!pm_runtime_active(&g->dev->dev))
250 goto fail;
251#endif
252
253 if (gm20b_mm_mmu_vpr_info_fetch_wait(g, VPR_INFO_FETCH_WAIT)) {
254 ret = -ETIME;
255 goto fail;
256 }
257
258 gk20a_writel(g, fb_mmu_vpr_info_r(),
259 fb_mmu_vpr_info_fetch_true_v());
260
261 ret = gm20b_mm_mmu_vpr_info_fetch_wait(g, VPR_INFO_FETCH_WAIT);
262
263fail:
264 gk20a_idle(g->dev);
265 return ret;
266}
267
215void gm20b_init_mm(struct gpu_ops *gops) 268void gm20b_init_mm(struct gpu_ops *gops)
216{ 269{
217 gops->mm.set_sparse = gm20b_vm_put_sparse; 270 gops->mm.set_sparse = gm20b_vm_put_sparse;
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.h b/drivers/gpu/nvgpu/gm20b/mm_gm20b.h
index 0f94d2bf..6939fc1a 100644
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.h
@@ -19,6 +19,8 @@ struct gk20a;
19 19
20#define PDE_ADDR_START(x, y) ((x) & ~((0x1UL << (y)) - 1)) 20#define PDE_ADDR_START(x, y) ((x) & ~((0x1UL << (y)) - 1))
21#define PDE_ADDR_END(x, y) ((x) | ((0x1UL << (y)) - 1)) 21#define PDE_ADDR_END(x, y) ((x) | ((0x1UL << (y)) - 1))
22#define VPR_INFO_FETCH_WAIT (5)
22 23
23void gm20b_init_mm(struct gpu_ops *gops); 24void gm20b_init_mm(struct gpu_ops *gops);
25int gm20b_mm_mmu_vpr_info_fetch(struct gk20a *g);
24#endif 26#endif
diff --git a/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c
new file mode 100644
index 00000000..4b42b838
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c
@@ -0,0 +1,26 @@
1/*
2 * GM20B PMU
3 *
4 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
5*
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include "gk20a/gk20a.h"
17#include "acr_gm20b.h"
18
19void gm20b_init_pmu_ops(struct gpu_ops *gops)
20{
21#ifdef CONFIG_TEGRA_ACR
22 gm20b_init_secure_pmu(gops);
23#else
24 gk20a_init_pmu_ops(gops);
25#endif
26}
diff --git a/drivers/gpu/nvgpu/gm20b/pmu_gm20b.h b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.h
new file mode 100644
index 00000000..d36d3803
--- /dev/null
+++ b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.h
@@ -0,0 +1,19 @@
1/*
2 * GM20B PMU
3 *
4 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#ifndef __PMU_GM20B_H_
17#define __PMU_GM20B_H_
18void gm20b_init_pmu_ops(struct gpu_ops *gops);
19#endif /*__PMU_GM20B_H_*/