diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 151 |
1 files changed, 147 insertions, 4 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 466f6eed..46a84fd6 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -162,6 +162,123 @@ void gk20a_fecs_dump_falcon_stats(struct gk20a *g) | |||
162 | } | 162 | } |
163 | } | 163 | } |
164 | 164 | ||
165 | static void gr_gk20a_load_falcon_dmem(struct gk20a *g) | ||
166 | { | ||
167 | u32 i, ucode_u32_size; | ||
168 | const u32 *ucode_u32_data; | ||
169 | u32 checksum; | ||
170 | |||
171 | gk20a_dbg_fn(""); | ||
172 | |||
173 | gk20a_writel(g, gr_gpccs_dmemc_r(0), (gr_gpccs_dmemc_offs_f(0) | | ||
174 | gr_gpccs_dmemc_blk_f(0) | | ||
175 | gr_gpccs_dmemc_aincw_f(1))); | ||
176 | |||
177 | ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.data.count; | ||
178 | ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.data.l; | ||
179 | |||
180 | for (i = 0, checksum = 0; i < ucode_u32_size; i++) { | ||
181 | gk20a_writel(g, gr_gpccs_dmemd_r(0), ucode_u32_data[i]); | ||
182 | checksum += ucode_u32_data[i]; | ||
183 | } | ||
184 | |||
185 | gk20a_writel(g, gr_fecs_dmemc_r(0), (gr_fecs_dmemc_offs_f(0) | | ||
186 | gr_fecs_dmemc_blk_f(0) | | ||
187 | gr_fecs_dmemc_aincw_f(1))); | ||
188 | |||
189 | ucode_u32_size = g->gr.ctx_vars.ucode.fecs.data.count; | ||
190 | ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.data.l; | ||
191 | |||
192 | for (i = 0, checksum = 0; i < ucode_u32_size; i++) { | ||
193 | gk20a_writel(g, gr_fecs_dmemd_r(0), ucode_u32_data[i]); | ||
194 | checksum += ucode_u32_data[i]; | ||
195 | } | ||
196 | gk20a_dbg_fn("done"); | ||
197 | } | ||
198 | |||
199 | static void gr_gk20a_load_falcon_imem(struct gk20a *g) | ||
200 | { | ||
201 | u32 cfg, fecs_imem_size, gpccs_imem_size, ucode_u32_size; | ||
202 | const u32 *ucode_u32_data; | ||
203 | u32 tag, i, pad_start, pad_end; | ||
204 | u32 checksum; | ||
205 | |||
206 | gk20a_dbg_fn(""); | ||
207 | |||
208 | cfg = gk20a_readl(g, gr_fecs_cfg_r()); | ||
209 | fecs_imem_size = gr_fecs_cfg_imem_sz_v(cfg); | ||
210 | |||
211 | cfg = gk20a_readl(g, gr_gpc0_cfg_r()); | ||
212 | gpccs_imem_size = gr_gpc0_cfg_imem_sz_v(cfg); | ||
213 | |||
214 | /* Use the broadcast address to access all of the GPCCS units. */ | ||
215 | gk20a_writel(g, gr_gpccs_imemc_r(0), (gr_gpccs_imemc_offs_f(0) | | ||
216 | gr_gpccs_imemc_blk_f(0) | | ||
217 | gr_gpccs_imemc_aincw_f(1))); | ||
218 | |||
219 | /* Setup the tags for the instruction memory. */ | ||
220 | tag = 0; | ||
221 | gk20a_writel(g, gr_gpccs_imemt_r(0), gr_gpccs_imemt_tag_f(tag)); | ||
222 | |||
223 | ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.inst.count; | ||
224 | ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.inst.l; | ||
225 | |||
226 | for (i = 0, checksum = 0; i < ucode_u32_size; i++) { | ||
227 | if (i && ((i % (256/sizeof(u32))) == 0)) { | ||
228 | tag++; | ||
229 | gk20a_writel(g, gr_gpccs_imemt_r(0), | ||
230 | gr_gpccs_imemt_tag_f(tag)); | ||
231 | } | ||
232 | gk20a_writel(g, gr_gpccs_imemd_r(0), ucode_u32_data[i]); | ||
233 | checksum += ucode_u32_data[i]; | ||
234 | } | ||
235 | |||
236 | pad_start = i*4; | ||
237 | pad_end = pad_start+(256-pad_start%256)+256; | ||
238 | for (i = pad_start; | ||
239 | (i < gpccs_imem_size * 256) && (i < pad_end); | ||
240 | i += 4) { | ||
241 | if (i && ((i % 256) == 0)) { | ||
242 | tag++; | ||
243 | gk20a_writel(g, gr_gpccs_imemt_r(0), | ||
244 | gr_gpccs_imemt_tag_f(tag)); | ||
245 | } | ||
246 | gk20a_writel(g, gr_gpccs_imemd_r(0), 0); | ||
247 | } | ||
248 | |||
249 | gk20a_writel(g, gr_fecs_imemc_r(0), (gr_fecs_imemc_offs_f(0) | | ||
250 | gr_fecs_imemc_blk_f(0) | | ||
251 | gr_fecs_imemc_aincw_f(1))); | ||
252 | |||
253 | /* Setup the tags for the instruction memory. */ | ||
254 | tag = 0; | ||
255 | gk20a_writel(g, gr_fecs_imemt_r(0), gr_fecs_imemt_tag_f(tag)); | ||
256 | |||
257 | ucode_u32_size = g->gr.ctx_vars.ucode.fecs.inst.count; | ||
258 | ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.inst.l; | ||
259 | |||
260 | for (i = 0, checksum = 0; i < ucode_u32_size; i++) { | ||
261 | if (i && ((i % (256/sizeof(u32))) == 0)) { | ||
262 | tag++; | ||
263 | gk20a_writel(g, gr_fecs_imemt_r(0), | ||
264 | gr_fecs_imemt_tag_f(tag)); | ||
265 | } | ||
266 | gk20a_writel(g, gr_fecs_imemd_r(0), ucode_u32_data[i]); | ||
267 | checksum += ucode_u32_data[i]; | ||
268 | } | ||
269 | |||
270 | pad_start = i*4; | ||
271 | pad_end = pad_start+(256-pad_start%256)+256; | ||
272 | for (i = pad_start; (i < fecs_imem_size * 256) && i < pad_end; i += 4) { | ||
273 | if (i && ((i % 256) == 0)) { | ||
274 | tag++; | ||
275 | gk20a_writel(g, gr_fecs_imemt_r(0), | ||
276 | gr_fecs_imemt_tag_f(tag)); | ||
277 | } | ||
278 | gk20a_writel(g, gr_fecs_imemd_r(0), 0); | ||
279 | } | ||
280 | } | ||
281 | |||
165 | static int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies, | 282 | static int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies, |
166 | u32 expect_delay) | 283 | u32 expect_delay) |
167 | { | 284 | { |
@@ -1646,6 +1763,22 @@ static int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1646 | return ret; | 1763 | return ret; |
1647 | } | 1764 | } |
1648 | 1765 | ||
1766 | static void gr_gk20a_start_falcon_ucode(struct gk20a *g) | ||
1767 | { | ||
1768 | gk20a_dbg_fn(""); | ||
1769 | |||
1770 | gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), | ||
1771 | gr_fecs_ctxsw_mailbox_clear_value_f(~0)); | ||
1772 | |||
1773 | gk20a_writel(g, gr_gpccs_dmactl_r(), gr_gpccs_dmactl_require_ctx_f(0)); | ||
1774 | gk20a_writel(g, gr_fecs_dmactl_r(), gr_fecs_dmactl_require_ctx_f(0)); | ||
1775 | |||
1776 | gk20a_writel(g, gr_gpccs_cpuctl_r(), gr_gpccs_cpuctl_startcpu_f(1)); | ||
1777 | gk20a_writel(g, gr_fecs_cpuctl_r(), gr_fecs_cpuctl_startcpu_f(1)); | ||
1778 | |||
1779 | gk20a_dbg_fn("done"); | ||
1780 | } | ||
1781 | |||
1649 | static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g) | 1782 | static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g) |
1650 | { | 1783 | { |
1651 | struct mm_gk20a *mm = &g->mm; | 1784 | struct mm_gk20a *mm = &g->mm; |
@@ -2030,10 +2163,20 @@ static int gr_gk20a_load_ctxsw_ucode(struct gk20a *g, struct gr_gk20a *gr) | |||
2030 | gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777)); | 2163 | gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777)); |
2031 | } | 2164 | } |
2032 | 2165 | ||
2033 | if (!gr->skip_ucode_init) | 2166 | /* |
2034 | gr_gk20a_init_ctxsw_ucode(g); | 2167 | * In case bootloader is not supported, revert to the old way of |
2035 | gr_gk20a_load_falcon_with_bootloader(g); | 2168 | * loading gr ucode, without the faster bootstrap routine. |
2036 | gr->skip_ucode_init = true; | 2169 | */ |
2170 | if (g->gpu_characteristics.arch == NVHOST_GPU_ARCH_GM200) { | ||
2171 | gr_gk20a_load_falcon_dmem(g); | ||
2172 | gr_gk20a_load_falcon_imem(g); | ||
2173 | gr_gk20a_start_falcon_ucode(g); | ||
2174 | } else { | ||
2175 | if (!gr->skip_ucode_init) | ||
2176 | gr_gk20a_init_ctxsw_ucode(g); | ||
2177 | gr_gk20a_load_falcon_with_bootloader(g); | ||
2178 | gr->skip_ucode_init = true; | ||
2179 | } | ||
2037 | 2180 | ||
2038 | ret = gr_gk20a_ctx_wait_ucode(g, 0, 0, | 2181 | ret = gr_gk20a_ctx_wait_ucode(g, 0, 0, |
2039 | GR_IS_UCODE_OP_EQUAL, | 2182 | GR_IS_UCODE_OP_EQUAL, |