diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2014-05-08 02:09:49 -0400 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-03-18 15:09:49 -0400 |
commit | 3e5c123862c87e22311c21558178f287f85ecb5d (patch) | |
tree | e0bef0d98cf00237fce691e9f2b4cab2901a879b /drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |
parent | df1852752a3ba9365ae6dca2795af4d43267896d (diff) |
gpu: nvgpu: Always initialize system vm
PMU, FECS and GPCCS use the same address space. We used to initialize
the address space only if PMU is enabled. Create the system address
space always.
FECS and GPCCS used to have slower bit bang and faster DMA method
for loading ucode. Slower method is needed when FECS and GPCCS do not
have an address space. Remove the slower method as not anymore
needed.
Change-Id: I155619741ecc36aa6bf13a9c1ccb03c7c1330f0a
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/406771
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 151 |
1 files changed, 4 insertions, 147 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index e6bdf52c..3dbf1435 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -162,123 +162,6 @@ void gk20a_fecs_dump_falcon_stats(struct gk20a *g) | |||
162 | } | 162 | } |
163 | } | 163 | } |
164 | 164 | ||
165 | static void gr_gk20a_load_falcon_dmem(struct gk20a *g) | ||
166 | { | ||
167 | u32 i, ucode_u32_size; | ||
168 | const u32 *ucode_u32_data; | ||
169 | u32 checksum; | ||
170 | |||
171 | gk20a_dbg_fn(""); | ||
172 | |||
173 | gk20a_writel(g, gr_gpccs_dmemc_r(0), (gr_gpccs_dmemc_offs_f(0) | | ||
174 | gr_gpccs_dmemc_blk_f(0) | | ||
175 | gr_gpccs_dmemc_aincw_f(1))); | ||
176 | |||
177 | ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.data.count; | ||
178 | ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.data.l; | ||
179 | |||
180 | for (i = 0, checksum = 0; i < ucode_u32_size; i++) { | ||
181 | gk20a_writel(g, gr_gpccs_dmemd_r(0), ucode_u32_data[i]); | ||
182 | checksum += ucode_u32_data[i]; | ||
183 | } | ||
184 | |||
185 | gk20a_writel(g, gr_fecs_dmemc_r(0), (gr_fecs_dmemc_offs_f(0) | | ||
186 | gr_fecs_dmemc_blk_f(0) | | ||
187 | gr_fecs_dmemc_aincw_f(1))); | ||
188 | |||
189 | ucode_u32_size = g->gr.ctx_vars.ucode.fecs.data.count; | ||
190 | ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.data.l; | ||
191 | |||
192 | for (i = 0, checksum = 0; i < ucode_u32_size; i++) { | ||
193 | gk20a_writel(g, gr_fecs_dmemd_r(0), ucode_u32_data[i]); | ||
194 | checksum += ucode_u32_data[i]; | ||
195 | } | ||
196 | gk20a_dbg_fn("done"); | ||
197 | } | ||
198 | |||
199 | static void gr_gk20a_load_falcon_imem(struct gk20a *g) | ||
200 | { | ||
201 | u32 cfg, fecs_imem_size, gpccs_imem_size, ucode_u32_size; | ||
202 | const u32 *ucode_u32_data; | ||
203 | u32 tag, i, pad_start, pad_end; | ||
204 | u32 checksum; | ||
205 | |||
206 | gk20a_dbg_fn(""); | ||
207 | |||
208 | cfg = gk20a_readl(g, gr_fecs_cfg_r()); | ||
209 | fecs_imem_size = gr_fecs_cfg_imem_sz_v(cfg); | ||
210 | |||
211 | cfg = gk20a_readl(g, gr_gpc0_cfg_r()); | ||
212 | gpccs_imem_size = gr_gpc0_cfg_imem_sz_v(cfg); | ||
213 | |||
214 | /* Use the broadcast address to access all of the GPCCS units. */ | ||
215 | gk20a_writel(g, gr_gpccs_imemc_r(0), (gr_gpccs_imemc_offs_f(0) | | ||
216 | gr_gpccs_imemc_blk_f(0) | | ||
217 | gr_gpccs_imemc_aincw_f(1))); | ||
218 | |||
219 | /* Setup the tags for the instruction memory. */ | ||
220 | tag = 0; | ||
221 | gk20a_writel(g, gr_gpccs_imemt_r(0), gr_gpccs_imemt_tag_f(tag)); | ||
222 | |||
223 | ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.inst.count; | ||
224 | ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.inst.l; | ||
225 | |||
226 | for (i = 0, checksum = 0; i < ucode_u32_size; i++) { | ||
227 | if (i && ((i % (256/sizeof(u32))) == 0)) { | ||
228 | tag++; | ||
229 | gk20a_writel(g, gr_gpccs_imemt_r(0), | ||
230 | gr_gpccs_imemt_tag_f(tag)); | ||
231 | } | ||
232 | gk20a_writel(g, gr_gpccs_imemd_r(0), ucode_u32_data[i]); | ||
233 | checksum += ucode_u32_data[i]; | ||
234 | } | ||
235 | |||
236 | pad_start = i*4; | ||
237 | pad_end = pad_start+(256-pad_start%256)+256; | ||
238 | for (i = pad_start; | ||
239 | (i < gpccs_imem_size * 256) && (i < pad_end); | ||
240 | i += 4) { | ||
241 | if (i && ((i % 256) == 0)) { | ||
242 | tag++; | ||
243 | gk20a_writel(g, gr_gpccs_imemt_r(0), | ||
244 | gr_gpccs_imemt_tag_f(tag)); | ||
245 | } | ||
246 | gk20a_writel(g, gr_gpccs_imemd_r(0), 0); | ||
247 | } | ||
248 | |||
249 | gk20a_writel(g, gr_fecs_imemc_r(0), (gr_fecs_imemc_offs_f(0) | | ||
250 | gr_fecs_imemc_blk_f(0) | | ||
251 | gr_fecs_imemc_aincw_f(1))); | ||
252 | |||
253 | /* Setup the tags for the instruction memory. */ | ||
254 | tag = 0; | ||
255 | gk20a_writel(g, gr_fecs_imemt_r(0), gr_fecs_imemt_tag_f(tag)); | ||
256 | |||
257 | ucode_u32_size = g->gr.ctx_vars.ucode.fecs.inst.count; | ||
258 | ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.inst.l; | ||
259 | |||
260 | for (i = 0, checksum = 0; i < ucode_u32_size; i++) { | ||
261 | if (i && ((i % (256/sizeof(u32))) == 0)) { | ||
262 | tag++; | ||
263 | gk20a_writel(g, gr_fecs_imemt_r(0), | ||
264 | gr_fecs_imemt_tag_f(tag)); | ||
265 | } | ||
266 | gk20a_writel(g, gr_fecs_imemd_r(0), ucode_u32_data[i]); | ||
267 | checksum += ucode_u32_data[i]; | ||
268 | } | ||
269 | |||
270 | pad_start = i*4; | ||
271 | pad_end = pad_start+(256-pad_start%256)+256; | ||
272 | for (i = pad_start; (i < fecs_imem_size * 256) && i < pad_end; i += 4) { | ||
273 | if (i && ((i % 256) == 0)) { | ||
274 | tag++; | ||
275 | gk20a_writel(g, gr_fecs_imemt_r(0), | ||
276 | gr_fecs_imemt_tag_f(tag)); | ||
277 | } | ||
278 | gk20a_writel(g, gr_fecs_imemd_r(0), 0); | ||
279 | } | ||
280 | } | ||
281 | |||
282 | static int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies, | 165 | static int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies, |
283 | u32 expect_delay) | 166 | u32 expect_delay) |
284 | { | 167 | { |
@@ -1784,22 +1667,6 @@ static int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1784 | return ret; | 1667 | return ret; |
1785 | } | 1668 | } |
1786 | 1669 | ||
1787 | static void gr_gk20a_start_falcon_ucode(struct gk20a *g) | ||
1788 | { | ||
1789 | gk20a_dbg_fn(""); | ||
1790 | |||
1791 | gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), | ||
1792 | gr_fecs_ctxsw_mailbox_clear_value_f(~0)); | ||
1793 | |||
1794 | gk20a_writel(g, gr_gpccs_dmactl_r(), gr_gpccs_dmactl_require_ctx_f(0)); | ||
1795 | gk20a_writel(g, gr_fecs_dmactl_r(), gr_fecs_dmactl_require_ctx_f(0)); | ||
1796 | |||
1797 | gk20a_writel(g, gr_gpccs_cpuctl_r(), gr_gpccs_cpuctl_startcpu_f(1)); | ||
1798 | gk20a_writel(g, gr_fecs_cpuctl_r(), gr_fecs_cpuctl_startcpu_f(1)); | ||
1799 | |||
1800 | gk20a_dbg_fn("done"); | ||
1801 | } | ||
1802 | |||
1803 | static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g) | 1670 | static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g) |
1804 | { | 1671 | { |
1805 | struct mm_gk20a *mm = &g->mm; | 1672 | struct mm_gk20a *mm = &g->mm; |
@@ -2184,20 +2051,10 @@ static int gr_gk20a_load_ctxsw_ucode(struct gk20a *g, struct gr_gk20a *gr) | |||
2184 | gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777)); | 2051 | gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777)); |
2185 | } | 2052 | } |
2186 | 2053 | ||
2187 | /* | 2054 | if (!gr->skip_ucode_init) |
2188 | * In case the gPMU falcon is not being used, revert to the old way of | 2055 | gr_gk20a_init_ctxsw_ucode(g); |
2189 | * loading gr ucode, without the faster bootstrap routine. | 2056 | gr_gk20a_load_falcon_with_bootloader(g); |
2190 | */ | 2057 | gr->skip_ucode_init = true; |
2191 | if (!support_gk20a_pmu()) { | ||
2192 | gr_gk20a_load_falcon_dmem(g); | ||
2193 | gr_gk20a_load_falcon_imem(g); | ||
2194 | gr_gk20a_start_falcon_ucode(g); | ||
2195 | } else { | ||
2196 | if (!gr->skip_ucode_init) | ||
2197 | gr_gk20a_init_ctxsw_ucode(g); | ||
2198 | gr_gk20a_load_falcon_with_bootloader(g); | ||
2199 | gr->skip_ucode_init = true; | ||
2200 | } | ||
2201 | 2058 | ||
2202 | ret = gr_gk20a_ctx_wait_ucode(g, 0, 0, | 2059 | ret = gr_gk20a_ctx_wait_ucode(g, 0, 0, |
2203 | GR_IS_UCODE_OP_EQUAL, | 2060 | GR_IS_UCODE_OP_EQUAL, |