summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2014-05-08 02:09:49 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:09:49 -0400
commit3e5c123862c87e22311c21558178f287f85ecb5d (patch)
treee0bef0d98cf00237fce691e9f2b4cab2901a879b
parentdf1852752a3ba9365ae6dca2795af4d43267896d (diff)
gpu: nvgpu: Always initialize system vm
PMU, FECS and GPCCS use the same address space. We used to initialize the address space only if PMU is enabled. Create the system address space always. FECS and GPCCS used to have slower bit bang and faster DMA method for loading ucode. Slower method is needed when FECS and GPCCS do not have an address space. Remove the slower method as not anymore needed. Change-Id: I155619741ecc36aa6bf13a9c1ccb03c7c1330f0a Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/406771
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c151
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c8
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.c2
4 files changed, 9 insertions, 154 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index e6bdf52c..3dbf1435 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -162,123 +162,6 @@ void gk20a_fecs_dump_falcon_stats(struct gk20a *g)
162 } 162 }
163} 163}
164 164
165static void gr_gk20a_load_falcon_dmem(struct gk20a *g)
166{
167 u32 i, ucode_u32_size;
168 const u32 *ucode_u32_data;
169 u32 checksum;
170
171 gk20a_dbg_fn("");
172
173 gk20a_writel(g, gr_gpccs_dmemc_r(0), (gr_gpccs_dmemc_offs_f(0) |
174 gr_gpccs_dmemc_blk_f(0) |
175 gr_gpccs_dmemc_aincw_f(1)));
176
177 ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.data.count;
178 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.data.l;
179
180 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
181 gk20a_writel(g, gr_gpccs_dmemd_r(0), ucode_u32_data[i]);
182 checksum += ucode_u32_data[i];
183 }
184
185 gk20a_writel(g, gr_fecs_dmemc_r(0), (gr_fecs_dmemc_offs_f(0) |
186 gr_fecs_dmemc_blk_f(0) |
187 gr_fecs_dmemc_aincw_f(1)));
188
189 ucode_u32_size = g->gr.ctx_vars.ucode.fecs.data.count;
190 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.data.l;
191
192 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
193 gk20a_writel(g, gr_fecs_dmemd_r(0), ucode_u32_data[i]);
194 checksum += ucode_u32_data[i];
195 }
196 gk20a_dbg_fn("done");
197}
198
199static void gr_gk20a_load_falcon_imem(struct gk20a *g)
200{
201 u32 cfg, fecs_imem_size, gpccs_imem_size, ucode_u32_size;
202 const u32 *ucode_u32_data;
203 u32 tag, i, pad_start, pad_end;
204 u32 checksum;
205
206 gk20a_dbg_fn("");
207
208 cfg = gk20a_readl(g, gr_fecs_cfg_r());
209 fecs_imem_size = gr_fecs_cfg_imem_sz_v(cfg);
210
211 cfg = gk20a_readl(g, gr_gpc0_cfg_r());
212 gpccs_imem_size = gr_gpc0_cfg_imem_sz_v(cfg);
213
214 /* Use the broadcast address to access all of the GPCCS units. */
215 gk20a_writel(g, gr_gpccs_imemc_r(0), (gr_gpccs_imemc_offs_f(0) |
216 gr_gpccs_imemc_blk_f(0) |
217 gr_gpccs_imemc_aincw_f(1)));
218
219 /* Setup the tags for the instruction memory. */
220 tag = 0;
221 gk20a_writel(g, gr_gpccs_imemt_r(0), gr_gpccs_imemt_tag_f(tag));
222
223 ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.inst.count;
224 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.inst.l;
225
226 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
227 if (i && ((i % (256/sizeof(u32))) == 0)) {
228 tag++;
229 gk20a_writel(g, gr_gpccs_imemt_r(0),
230 gr_gpccs_imemt_tag_f(tag));
231 }
232 gk20a_writel(g, gr_gpccs_imemd_r(0), ucode_u32_data[i]);
233 checksum += ucode_u32_data[i];
234 }
235
236 pad_start = i*4;
237 pad_end = pad_start+(256-pad_start%256)+256;
238 for (i = pad_start;
239 (i < gpccs_imem_size * 256) && (i < pad_end);
240 i += 4) {
241 if (i && ((i % 256) == 0)) {
242 tag++;
243 gk20a_writel(g, gr_gpccs_imemt_r(0),
244 gr_gpccs_imemt_tag_f(tag));
245 }
246 gk20a_writel(g, gr_gpccs_imemd_r(0), 0);
247 }
248
249 gk20a_writel(g, gr_fecs_imemc_r(0), (gr_fecs_imemc_offs_f(0) |
250 gr_fecs_imemc_blk_f(0) |
251 gr_fecs_imemc_aincw_f(1)));
252
253 /* Setup the tags for the instruction memory. */
254 tag = 0;
255 gk20a_writel(g, gr_fecs_imemt_r(0), gr_fecs_imemt_tag_f(tag));
256
257 ucode_u32_size = g->gr.ctx_vars.ucode.fecs.inst.count;
258 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.inst.l;
259
260 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
261 if (i && ((i % (256/sizeof(u32))) == 0)) {
262 tag++;
263 gk20a_writel(g, gr_fecs_imemt_r(0),
264 gr_fecs_imemt_tag_f(tag));
265 }
266 gk20a_writel(g, gr_fecs_imemd_r(0), ucode_u32_data[i]);
267 checksum += ucode_u32_data[i];
268 }
269
270 pad_start = i*4;
271 pad_end = pad_start+(256-pad_start%256)+256;
272 for (i = pad_start; (i < fecs_imem_size * 256) && i < pad_end; i += 4) {
273 if (i && ((i % 256) == 0)) {
274 tag++;
275 gk20a_writel(g, gr_fecs_imemt_r(0),
276 gr_fecs_imemt_tag_f(tag));
277 }
278 gk20a_writel(g, gr_fecs_imemd_r(0), 0);
279 }
280}
281
282static int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies, 165static int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies,
283 u32 expect_delay) 166 u32 expect_delay)
284{ 167{
@@ -1784,22 +1667,6 @@ static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1784 return ret; 1667 return ret;
1785} 1668}
1786 1669
1787static void gr_gk20a_start_falcon_ucode(struct gk20a *g)
1788{
1789 gk20a_dbg_fn("");
1790
1791 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0),
1792 gr_fecs_ctxsw_mailbox_clear_value_f(~0));
1793
1794 gk20a_writel(g, gr_gpccs_dmactl_r(), gr_gpccs_dmactl_require_ctx_f(0));
1795 gk20a_writel(g, gr_fecs_dmactl_r(), gr_fecs_dmactl_require_ctx_f(0));
1796
1797 gk20a_writel(g, gr_gpccs_cpuctl_r(), gr_gpccs_cpuctl_startcpu_f(1));
1798 gk20a_writel(g, gr_fecs_cpuctl_r(), gr_fecs_cpuctl_startcpu_f(1));
1799
1800 gk20a_dbg_fn("done");
1801}
1802
1803static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g) 1670static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g)
1804{ 1671{
1805 struct mm_gk20a *mm = &g->mm; 1672 struct mm_gk20a *mm = &g->mm;
@@ -2184,20 +2051,10 @@ static int gr_gk20a_load_ctxsw_ucode(struct gk20a *g, struct gr_gk20a *gr)
2184 gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777)); 2051 gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777));
2185 } 2052 }
2186 2053
2187 /* 2054 if (!gr->skip_ucode_init)
2188 * In case the gPMU falcon is not being used, revert to the old way of 2055 gr_gk20a_init_ctxsw_ucode(g);
2189 * loading gr ucode, without the faster bootstrap routine. 2056 gr_gk20a_load_falcon_with_bootloader(g);
2190 */ 2057 gr->skip_ucode_init = true;
2191 if (!support_gk20a_pmu()) {
2192 gr_gk20a_load_falcon_dmem(g);
2193 gr_gk20a_load_falcon_imem(g);
2194 gr_gk20a_start_falcon_ucode(g);
2195 } else {
2196 if (!gr->skip_ucode_init)
2197 gr_gk20a_init_ctxsw_ucode(g);
2198 gr_gk20a_load_falcon_with_bootloader(g);
2199 gr->skip_ucode_init = true;
2200 }
2201 2058
2202 ret = gr_gk20a_ctx_wait_ucode(g, 0, 0, 2059 ret = gr_gk20a_ctx_wait_ucode(g, 0, 0,
2203 GR_IS_UCODE_OP_EQUAL, 2060 GR_IS_UCODE_OP_EQUAL,
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index be22e4a0..234b43c2 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -113,7 +113,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
113 int rw_flag); 113 int rw_flag);
114static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i); 114static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i);
115static void gk20a_vm_remove_support(struct vm_gk20a *vm); 115static void gk20a_vm_remove_support(struct vm_gk20a *vm);
116 116static int gk20a_init_system_vm(struct mm_gk20a *mm);
117static int gk20a_init_bar1_vm(struct mm_gk20a *mm);
117 118
118/* note: keep the page sizes sorted lowest to highest here */ 119/* note: keep the page sizes sorted lowest to highest here */
119static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K }; 120static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K };
@@ -341,6 +342,7 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
341 342
342 343
343 gk20a_init_bar1_vm(mm); 344 gk20a_init_bar1_vm(mm);
345 gk20a_init_system_vm(mm);
344 346
345 mm->remove_support = gk20a_remove_mm_support; 347 mm->remove_support = gk20a_remove_mm_support;
346 mm->sw_ready = true; 348 mm->sw_ready = true;
@@ -2486,7 +2488,7 @@ int gk20a_vm_unmap_buffer(struct gk20a_as_share *as_share, u64 offset)
2486 return 0; 2488 return 0;
2487} 2489}
2488 2490
2489int gk20a_init_bar1_vm(struct mm_gk20a *mm) 2491static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
2490{ 2492{
2491 int err; 2493 int err;
2492 phys_addr_t inst_pa; 2494 phys_addr_t inst_pa;
@@ -2630,7 +2632,7 @@ clean_up:
2630} 2632}
2631 2633
2632/* pmu vm, share channel_vm interfaces */ 2634/* pmu vm, share channel_vm interfaces */
2633int gk20a_init_pmu_vm(struct mm_gk20a *mm) 2635static int gk20a_init_system_vm(struct mm_gk20a *mm)
2634{ 2636{
2635 int err; 2637 int err;
2636 phys_addr_t inst_pa; 2638 phys_addr_t inst_pa;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 4dfc2b7d..c759718e 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -285,8 +285,6 @@ struct channel_gk20a;
285 285
286int gk20a_init_mm_support(struct gk20a *g); 286int gk20a_init_mm_support(struct gk20a *g);
287int gk20a_init_mm_setup_sw(struct gk20a *g); 287int gk20a_init_mm_setup_sw(struct gk20a *g);
288int gk20a_init_bar1_vm(struct mm_gk20a *mm);
289int gk20a_init_pmu_vm(struct mm_gk20a *mm);
290 288
291int gk20a_mm_fb_flush(struct gk20a *g); 289int gk20a_mm_fb_flush(struct gk20a *g);
292void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate); 290void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate);
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index c12496bb..ac01302e 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -1588,8 +1588,6 @@ int gk20a_init_pmu_setup_sw(struct gk20a *g)
1588 INIT_DELAYED_WORK(&pmu->elpg_enable, pmu_elpg_enable_allow); 1588 INIT_DELAYED_WORK(&pmu->elpg_enable, pmu_elpg_enable_allow);
1589 INIT_WORK(&pmu->pg_init, gk20a_init_pmu_setup_hw2_workqueue); 1589 INIT_WORK(&pmu->pg_init, gk20a_init_pmu_setup_hw2_workqueue);
1590 1590
1591 gk20a_init_pmu_vm(mm);
1592
1593 dma_set_attr(DMA_ATTR_READ_ONLY, &attrs); 1591 dma_set_attr(DMA_ATTR_READ_ONLY, &attrs);
1594 pmu->ucode.cpuva = dma_alloc_attrs(d, GK20A_PMU_UCODE_SIZE_MAX, 1592 pmu->ucode.cpuva = dma_alloc_attrs(d, GK20A_PMU_UCODE_SIZE_MAX,
1595 &iova, 1593 &iova,