summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2014-05-21 06:52:00 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:09:52 -0400
commitaf8c1dc3a834850512f1fba863077048a3e14f21 (patch)
tree9a03fb768d6adfe611243907732f1f38fcccf050 /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parente6b3d1e87f7f2d91c97bf260d4609f17d62ff8bc (diff)
gpu: nvgpu: Use old ctxsw boot method on gm20b
Boot FECS/GPCCS with old method on gm20b. We don't yet have bootloader for it. Change-Id: I09046960cd86b0402d3ea2cd8e4c92597766fa10 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/412604 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Arto Merilainen <amerilainen@nvidia.com> Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c151
1 files changed, 147 insertions, 4 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 466f6eed..46a84fd6 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -162,6 +162,123 @@ void gk20a_fecs_dump_falcon_stats(struct gk20a *g)
162 } 162 }
163} 163}
164 164
165static void gr_gk20a_load_falcon_dmem(struct gk20a *g)
166{
167 u32 i, ucode_u32_size;
168 const u32 *ucode_u32_data;
169 u32 checksum;
170
171 gk20a_dbg_fn("");
172
173 gk20a_writel(g, gr_gpccs_dmemc_r(0), (gr_gpccs_dmemc_offs_f(0) |
174 gr_gpccs_dmemc_blk_f(0) |
175 gr_gpccs_dmemc_aincw_f(1)));
176
177 ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.data.count;
178 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.data.l;
179
180 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
181 gk20a_writel(g, gr_gpccs_dmemd_r(0), ucode_u32_data[i]);
182 checksum += ucode_u32_data[i];
183 }
184
185 gk20a_writel(g, gr_fecs_dmemc_r(0), (gr_fecs_dmemc_offs_f(0) |
186 gr_fecs_dmemc_blk_f(0) |
187 gr_fecs_dmemc_aincw_f(1)));
188
189 ucode_u32_size = g->gr.ctx_vars.ucode.fecs.data.count;
190 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.data.l;
191
192 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
193 gk20a_writel(g, gr_fecs_dmemd_r(0), ucode_u32_data[i]);
194 checksum += ucode_u32_data[i];
195 }
196 gk20a_dbg_fn("done");
197}
198
199static void gr_gk20a_load_falcon_imem(struct gk20a *g)
200{
201 u32 cfg, fecs_imem_size, gpccs_imem_size, ucode_u32_size;
202 const u32 *ucode_u32_data;
203 u32 tag, i, pad_start, pad_end;
204 u32 checksum;
205
206 gk20a_dbg_fn("");
207
208 cfg = gk20a_readl(g, gr_fecs_cfg_r());
209 fecs_imem_size = gr_fecs_cfg_imem_sz_v(cfg);
210
211 cfg = gk20a_readl(g, gr_gpc0_cfg_r());
212 gpccs_imem_size = gr_gpc0_cfg_imem_sz_v(cfg);
213
214 /* Use the broadcast address to access all of the GPCCS units. */
215 gk20a_writel(g, gr_gpccs_imemc_r(0), (gr_gpccs_imemc_offs_f(0) |
216 gr_gpccs_imemc_blk_f(0) |
217 gr_gpccs_imemc_aincw_f(1)));
218
219 /* Setup the tags for the instruction memory. */
220 tag = 0;
221 gk20a_writel(g, gr_gpccs_imemt_r(0), gr_gpccs_imemt_tag_f(tag));
222
223 ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.inst.count;
224 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.inst.l;
225
226 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
227 if (i && ((i % (256/sizeof(u32))) == 0)) {
228 tag++;
229 gk20a_writel(g, gr_gpccs_imemt_r(0),
230 gr_gpccs_imemt_tag_f(tag));
231 }
232 gk20a_writel(g, gr_gpccs_imemd_r(0), ucode_u32_data[i]);
233 checksum += ucode_u32_data[i];
234 }
235
236 pad_start = i*4;
237 pad_end = pad_start+(256-pad_start%256)+256;
238 for (i = pad_start;
239 (i < gpccs_imem_size * 256) && (i < pad_end);
240 i += 4) {
241 if (i && ((i % 256) == 0)) {
242 tag++;
243 gk20a_writel(g, gr_gpccs_imemt_r(0),
244 gr_gpccs_imemt_tag_f(tag));
245 }
246 gk20a_writel(g, gr_gpccs_imemd_r(0), 0);
247 }
248
249 gk20a_writel(g, gr_fecs_imemc_r(0), (gr_fecs_imemc_offs_f(0) |
250 gr_fecs_imemc_blk_f(0) |
251 gr_fecs_imemc_aincw_f(1)));
252
253 /* Setup the tags for the instruction memory. */
254 tag = 0;
255 gk20a_writel(g, gr_fecs_imemt_r(0), gr_fecs_imemt_tag_f(tag));
256
257 ucode_u32_size = g->gr.ctx_vars.ucode.fecs.inst.count;
258 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.inst.l;
259
260 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
261 if (i && ((i % (256/sizeof(u32))) == 0)) {
262 tag++;
263 gk20a_writel(g, gr_fecs_imemt_r(0),
264 gr_fecs_imemt_tag_f(tag));
265 }
266 gk20a_writel(g, gr_fecs_imemd_r(0), ucode_u32_data[i]);
267 checksum += ucode_u32_data[i];
268 }
269
270 pad_start = i*4;
271 pad_end = pad_start+(256-pad_start%256)+256;
272 for (i = pad_start; (i < fecs_imem_size * 256) && i < pad_end; i += 4) {
273 if (i && ((i % 256) == 0)) {
274 tag++;
275 gk20a_writel(g, gr_fecs_imemt_r(0),
276 gr_fecs_imemt_tag_f(tag));
277 }
278 gk20a_writel(g, gr_fecs_imemd_r(0), 0);
279 }
280}
281
165static int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies, 282static int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies,
166 u32 expect_delay) 283 u32 expect_delay)
167{ 284{
@@ -1646,6 +1763,22 @@ static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1646 return ret; 1763 return ret;
1647} 1764}
1648 1765
1766static void gr_gk20a_start_falcon_ucode(struct gk20a *g)
1767{
1768 gk20a_dbg_fn("");
1769
1770 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0),
1771 gr_fecs_ctxsw_mailbox_clear_value_f(~0));
1772
1773 gk20a_writel(g, gr_gpccs_dmactl_r(), gr_gpccs_dmactl_require_ctx_f(0));
1774 gk20a_writel(g, gr_fecs_dmactl_r(), gr_fecs_dmactl_require_ctx_f(0));
1775
1776 gk20a_writel(g, gr_gpccs_cpuctl_r(), gr_gpccs_cpuctl_startcpu_f(1));
1777 gk20a_writel(g, gr_fecs_cpuctl_r(), gr_fecs_cpuctl_startcpu_f(1));
1778
1779 gk20a_dbg_fn("done");
1780}
1781
1649static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g) 1782static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g)
1650{ 1783{
1651 struct mm_gk20a *mm = &g->mm; 1784 struct mm_gk20a *mm = &g->mm;
@@ -2030,10 +2163,20 @@ static int gr_gk20a_load_ctxsw_ucode(struct gk20a *g, struct gr_gk20a *gr)
2030 gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777)); 2163 gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777));
2031 } 2164 }
2032 2165
2033 if (!gr->skip_ucode_init) 2166 /*
2034 gr_gk20a_init_ctxsw_ucode(g); 2167 * In case bootloader is not supported, revert to the old way of
2035 gr_gk20a_load_falcon_with_bootloader(g); 2168 * loading gr ucode, without the faster bootstrap routine.
2036 gr->skip_ucode_init = true; 2169 */
2170 if (g->gpu_characteristics.arch == NVHOST_GPU_ARCH_GM200) {
2171 gr_gk20a_load_falcon_dmem(g);
2172 gr_gk20a_load_falcon_imem(g);
2173 gr_gk20a_start_falcon_ucode(g);
2174 } else {
2175 if (!gr->skip_ucode_init)
2176 gr_gk20a_init_ctxsw_ucode(g);
2177 gr_gk20a_load_falcon_with_bootloader(g);
2178 gr->skip_ucode_init = true;
2179 }
2037 2180
2038 ret = gr_gk20a_ctx_wait_ucode(g, 0, 0, 2181 ret = gr_gk20a_ctx_wait_ucode(g, 0, 0,
2039 GR_IS_UCODE_OP_EQUAL, 2182 GR_IS_UCODE_OP_EQUAL,