summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2014-05-08 02:09:49 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:09:49 -0400
commit3e5c123862c87e22311c21558178f287f85ecb5d (patch)
treee0bef0d98cf00237fce691e9f2b4cab2901a879b /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parentdf1852752a3ba9365ae6dca2795af4d43267896d (diff)
gpu: nvgpu: Always initialize system vm
PMU, FECS and GPCCS use the same address space. We used to initialize the address space only if PMU is enabled. Create the system address space always. FECS and GPCCS used to have slower bit bang and faster DMA method for loading ucode. Slower method is needed when FECS and GPCCS do not have an address space. Remove the slower method as not anymore needed. Change-Id: I155619741ecc36aa6bf13a9c1ccb03c7c1330f0a Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/406771
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c151
1 files changed, 4 insertions, 147 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index e6bdf52c..3dbf1435 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -162,123 +162,6 @@ void gk20a_fecs_dump_falcon_stats(struct gk20a *g)
162 } 162 }
163} 163}
164 164
165static void gr_gk20a_load_falcon_dmem(struct gk20a *g)
166{
167 u32 i, ucode_u32_size;
168 const u32 *ucode_u32_data;
169 u32 checksum;
170
171 gk20a_dbg_fn("");
172
173 gk20a_writel(g, gr_gpccs_dmemc_r(0), (gr_gpccs_dmemc_offs_f(0) |
174 gr_gpccs_dmemc_blk_f(0) |
175 gr_gpccs_dmemc_aincw_f(1)));
176
177 ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.data.count;
178 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.data.l;
179
180 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
181 gk20a_writel(g, gr_gpccs_dmemd_r(0), ucode_u32_data[i]);
182 checksum += ucode_u32_data[i];
183 }
184
185 gk20a_writel(g, gr_fecs_dmemc_r(0), (gr_fecs_dmemc_offs_f(0) |
186 gr_fecs_dmemc_blk_f(0) |
187 gr_fecs_dmemc_aincw_f(1)));
188
189 ucode_u32_size = g->gr.ctx_vars.ucode.fecs.data.count;
190 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.data.l;
191
192 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
193 gk20a_writel(g, gr_fecs_dmemd_r(0), ucode_u32_data[i]);
194 checksum += ucode_u32_data[i];
195 }
196 gk20a_dbg_fn("done");
197}
198
199static void gr_gk20a_load_falcon_imem(struct gk20a *g)
200{
201 u32 cfg, fecs_imem_size, gpccs_imem_size, ucode_u32_size;
202 const u32 *ucode_u32_data;
203 u32 tag, i, pad_start, pad_end;
204 u32 checksum;
205
206 gk20a_dbg_fn("");
207
208 cfg = gk20a_readl(g, gr_fecs_cfg_r());
209 fecs_imem_size = gr_fecs_cfg_imem_sz_v(cfg);
210
211 cfg = gk20a_readl(g, gr_gpc0_cfg_r());
212 gpccs_imem_size = gr_gpc0_cfg_imem_sz_v(cfg);
213
214 /* Use the broadcast address to access all of the GPCCS units. */
215 gk20a_writel(g, gr_gpccs_imemc_r(0), (gr_gpccs_imemc_offs_f(0) |
216 gr_gpccs_imemc_blk_f(0) |
217 gr_gpccs_imemc_aincw_f(1)));
218
219 /* Setup the tags for the instruction memory. */
220 tag = 0;
221 gk20a_writel(g, gr_gpccs_imemt_r(0), gr_gpccs_imemt_tag_f(tag));
222
223 ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.inst.count;
224 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.inst.l;
225
226 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
227 if (i && ((i % (256/sizeof(u32))) == 0)) {
228 tag++;
229 gk20a_writel(g, gr_gpccs_imemt_r(0),
230 gr_gpccs_imemt_tag_f(tag));
231 }
232 gk20a_writel(g, gr_gpccs_imemd_r(0), ucode_u32_data[i]);
233 checksum += ucode_u32_data[i];
234 }
235
236 pad_start = i*4;
237 pad_end = pad_start+(256-pad_start%256)+256;
238 for (i = pad_start;
239 (i < gpccs_imem_size * 256) && (i < pad_end);
240 i += 4) {
241 if (i && ((i % 256) == 0)) {
242 tag++;
243 gk20a_writel(g, gr_gpccs_imemt_r(0),
244 gr_gpccs_imemt_tag_f(tag));
245 }
246 gk20a_writel(g, gr_gpccs_imemd_r(0), 0);
247 }
248
249 gk20a_writel(g, gr_fecs_imemc_r(0), (gr_fecs_imemc_offs_f(0) |
250 gr_fecs_imemc_blk_f(0) |
251 gr_fecs_imemc_aincw_f(1)));
252
253 /* Setup the tags for the instruction memory. */
254 tag = 0;
255 gk20a_writel(g, gr_fecs_imemt_r(0), gr_fecs_imemt_tag_f(tag));
256
257 ucode_u32_size = g->gr.ctx_vars.ucode.fecs.inst.count;
258 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.inst.l;
259
260 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
261 if (i && ((i % (256/sizeof(u32))) == 0)) {
262 tag++;
263 gk20a_writel(g, gr_fecs_imemt_r(0),
264 gr_fecs_imemt_tag_f(tag));
265 }
266 gk20a_writel(g, gr_fecs_imemd_r(0), ucode_u32_data[i]);
267 checksum += ucode_u32_data[i];
268 }
269
270 pad_start = i*4;
271 pad_end = pad_start+(256-pad_start%256)+256;
272 for (i = pad_start; (i < fecs_imem_size * 256) && i < pad_end; i += 4) {
273 if (i && ((i % 256) == 0)) {
274 tag++;
275 gk20a_writel(g, gr_fecs_imemt_r(0),
276 gr_fecs_imemt_tag_f(tag));
277 }
278 gk20a_writel(g, gr_fecs_imemd_r(0), 0);
279 }
280}
281
282static int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies, 165static int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies,
283 u32 expect_delay) 166 u32 expect_delay)
284{ 167{
@@ -1784,22 +1667,6 @@ static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1784 return ret; 1667 return ret;
1785} 1668}
1786 1669
1787static void gr_gk20a_start_falcon_ucode(struct gk20a *g)
1788{
1789 gk20a_dbg_fn("");
1790
1791 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0),
1792 gr_fecs_ctxsw_mailbox_clear_value_f(~0));
1793
1794 gk20a_writel(g, gr_gpccs_dmactl_r(), gr_gpccs_dmactl_require_ctx_f(0));
1795 gk20a_writel(g, gr_fecs_dmactl_r(), gr_fecs_dmactl_require_ctx_f(0));
1796
1797 gk20a_writel(g, gr_gpccs_cpuctl_r(), gr_gpccs_cpuctl_startcpu_f(1));
1798 gk20a_writel(g, gr_fecs_cpuctl_r(), gr_fecs_cpuctl_startcpu_f(1));
1799
1800 gk20a_dbg_fn("done");
1801}
1802
1803static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g) 1670static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g)
1804{ 1671{
1805 struct mm_gk20a *mm = &g->mm; 1672 struct mm_gk20a *mm = &g->mm;
@@ -2184,20 +2051,10 @@ static int gr_gk20a_load_ctxsw_ucode(struct gk20a *g, struct gr_gk20a *gr)
2184 gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777)); 2051 gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777));
2185 } 2052 }
2186 2053
2187 /* 2054 if (!gr->skip_ucode_init)
2188 * In case the gPMU falcon is not being used, revert to the old way of 2055 gr_gk20a_init_ctxsw_ucode(g);
2189 * loading gr ucode, without the faster bootstrap routine. 2056 gr_gk20a_load_falcon_with_bootloader(g);
2190 */ 2057 gr->skip_ucode_init = true;
2191 if (!support_gk20a_pmu()) {
2192 gr_gk20a_load_falcon_dmem(g);
2193 gr_gk20a_load_falcon_imem(g);
2194 gr_gk20a_start_falcon_ucode(g);
2195 } else {
2196 if (!gr->skip_ucode_init)
2197 gr_gk20a_init_ctxsw_ucode(g);
2198 gr_gk20a_load_falcon_with_bootloader(g);
2199 gr->skip_ucode_init = true;
2200 }
2201 2058
2202 ret = gr_gk20a_ctx_wait_ucode(g, 0, 0, 2059 ret = gr_gk20a_ctx_wait_ucode(g, 0, 0,
2203 GR_IS_UCODE_OP_EQUAL, 2060 GR_IS_UCODE_OP_EQUAL,