diff options
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 73 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/debug_gk20a.c | 59 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | 27 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 57 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 359 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 144 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 31 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | 10 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/acr_gm20b.c | 206 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 26 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/mm_gm20b.c | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | 2 |
14 files changed, 493 insertions, 510 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 990972e4..065e8ab1 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -129,28 +129,25 @@ static int channel_gk20a_commit_userd(struct channel_gk20a *c) | |||
129 | { | 129 | { |
130 | u32 addr_lo; | 130 | u32 addr_lo; |
131 | u32 addr_hi; | 131 | u32 addr_hi; |
132 | void *inst_ptr; | ||
133 | struct gk20a *g = c->g; | 132 | struct gk20a *g = c->g; |
134 | 133 | ||
135 | gk20a_dbg_fn(""); | 134 | gk20a_dbg_fn(""); |
136 | 135 | ||
137 | inst_ptr = c->inst_block.cpu_va; | ||
138 | if (!inst_ptr) | ||
139 | return -ENOMEM; | ||
140 | |||
141 | addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v()); | 136 | addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v()); |
142 | addr_hi = u64_hi32(c->userd_iova); | 137 | addr_hi = u64_hi32(c->userd_iova); |
143 | 138 | ||
144 | gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx", | 139 | gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx", |
145 | c->hw_chid, (u64)c->userd_iova); | 140 | c->hw_chid, (u64)c->userd_iova); |
146 | 141 | ||
147 | gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(), | 142 | gk20a_mem_wr32(g, &c->inst_block, |
143 | ram_in_ramfc_w() + ram_fc_userd_w(), | ||
148 | (g->mm.vidmem_is_vidmem ? | 144 | (g->mm.vidmem_is_vidmem ? |
149 | pbdma_userd_target_sys_mem_ncoh_f() : | 145 | pbdma_userd_target_sys_mem_ncoh_f() : |
150 | pbdma_userd_target_vid_mem_f()) | | 146 | pbdma_userd_target_vid_mem_f()) | |
151 | pbdma_userd_addr_f(addr_lo)); | 147 | pbdma_userd_addr_f(addr_lo)); |
152 | 148 | ||
153 | gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(), | 149 | gk20a_mem_wr32(g, &c->inst_block, |
150 | ram_in_ramfc_w() + ram_fc_userd_hi_w(), | ||
154 | pbdma_userd_hi_addr_f(addr_hi)); | 151 | pbdma_userd_hi_addr_f(addr_hi)); |
155 | 152 | ||
156 | return 0; | 153 | return 0; |
@@ -186,13 +183,8 @@ int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, | |||
186 | 183 | ||
187 | static int channel_gk20a_set_schedule_params(struct channel_gk20a *c) | 184 | static int channel_gk20a_set_schedule_params(struct channel_gk20a *c) |
188 | { | 185 | { |
189 | void *inst_ptr; | ||
190 | int shift = 0, value = 0; | 186 | int shift = 0, value = 0; |
191 | 187 | ||
192 | inst_ptr = c->inst_block.cpu_va; | ||
193 | if (!inst_ptr) | ||
194 | return -ENOMEM; | ||
195 | |||
196 | gk20a_channel_get_timescale_from_timeslice(c->g, | 188 | gk20a_channel_get_timescale_from_timeslice(c->g, |
197 | c->timeslice_us, &value, &shift); | 189 | c->timeslice_us, &value, &shift); |
198 | 190 | ||
@@ -203,7 +195,7 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c) | |||
203 | WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid)); | 195 | WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid)); |
204 | 196 | ||
205 | /* set new timeslice */ | 197 | /* set new timeslice */ |
206 | gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(), | 198 | gk20a_mem_wr32(c->g, &c->inst_block, ram_fc_runlist_timeslice_w(), |
207 | value | (shift << 12) | | 199 | value | (shift << 12) | |
208 | fifo_runlist_timeslice_enable_true_f()); | 200 | fifo_runlist_timeslice_enable_true_f()); |
209 | 201 | ||
@@ -255,33 +247,30 @@ u32 channel_gk20a_pbdma_acquire_val(struct channel_gk20a *c) | |||
255 | int channel_gk20a_setup_ramfc(struct channel_gk20a *c, | 247 | int channel_gk20a_setup_ramfc(struct channel_gk20a *c, |
256 | u64 gpfifo_base, u32 gpfifo_entries, u32 flags) | 248 | u64 gpfifo_base, u32 gpfifo_entries, u32 flags) |
257 | { | 249 | { |
258 | void *inst_ptr; | 250 | struct gk20a *g = c->g; |
251 | struct mem_desc *mem = &c->inst_block; | ||
259 | 252 | ||
260 | gk20a_dbg_fn(""); | 253 | gk20a_dbg_fn(""); |
261 | 254 | ||
262 | inst_ptr = c->inst_block.cpu_va; | 255 | gk20a_memset(g, mem, 0, 0, ram_fc_size_val_v()); |
263 | if (!inst_ptr) | ||
264 | return -ENOMEM; | ||
265 | |||
266 | memset(inst_ptr, 0, ram_fc_size_val_v()); | ||
267 | 256 | ||
268 | gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(), | 257 | gk20a_mem_wr32(g, mem, ram_fc_gp_base_w(), |
269 | pbdma_gp_base_offset_f( | 258 | pbdma_gp_base_offset_f( |
270 | u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s()))); | 259 | u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s()))); |
271 | 260 | ||
272 | gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(), | 261 | gk20a_mem_wr32(g, mem, ram_fc_gp_base_hi_w(), |
273 | pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) | | 262 | pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) | |
274 | pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries))); | 263 | pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries))); |
275 | 264 | ||
276 | gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(), | 265 | gk20a_mem_wr32(g, mem, ram_fc_signature_w(), |
277 | c->g->ops.fifo.get_pbdma_signature(c->g)); | 266 | c->g->ops.fifo.get_pbdma_signature(c->g)); |
278 | 267 | ||
279 | gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(), | 268 | gk20a_mem_wr32(g, mem, ram_fc_formats_w(), |
280 | pbdma_formats_gp_fermi0_f() | | 269 | pbdma_formats_gp_fermi0_f() | |
281 | pbdma_formats_pb_fermi1_f() | | 270 | pbdma_formats_pb_fermi1_f() | |
282 | pbdma_formats_mp_fermi0_f()); | 271 | pbdma_formats_mp_fermi0_f()); |
283 | 272 | ||
284 | gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(), | 273 | gk20a_mem_wr32(g, mem, ram_fc_pb_header_w(), |
285 | pbdma_pb_header_priv_user_f() | | 274 | pbdma_pb_header_priv_user_f() | |
286 | pbdma_pb_header_method_zero_f() | | 275 | pbdma_pb_header_method_zero_f() | |
287 | pbdma_pb_header_subchannel_zero_f() | | 276 | pbdma_pb_header_subchannel_zero_f() | |
@@ -289,47 +278,49 @@ int channel_gk20a_setup_ramfc(struct channel_gk20a *c, | |||
289 | pbdma_pb_header_first_true_f() | | 278 | pbdma_pb_header_first_true_f() | |
290 | pbdma_pb_header_type_inc_f()); | 279 | pbdma_pb_header_type_inc_f()); |
291 | 280 | ||
292 | gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(), | 281 | gk20a_mem_wr32(g, mem, ram_fc_subdevice_w(), |
293 | pbdma_subdevice_id_f(1) | | 282 | pbdma_subdevice_id_f(1) | |
294 | pbdma_subdevice_status_active_f() | | 283 | pbdma_subdevice_status_active_f() | |
295 | pbdma_subdevice_channel_dma_enable_f()); | 284 | pbdma_subdevice_channel_dma_enable_f()); |
296 | 285 | ||
297 | gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f()); | 286 | gk20a_mem_wr32(g, mem, ram_fc_target_w(), pbdma_target_engine_sw_f()); |
298 | 287 | ||
299 | gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(), | 288 | gk20a_mem_wr32(g, mem, ram_fc_acquire_w(), |
300 | channel_gk20a_pbdma_acquire_val(c)); | 289 | channel_gk20a_pbdma_acquire_val(c)); |
301 | 290 | ||
302 | gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(), | 291 | gk20a_mem_wr32(g, mem, ram_fc_runlist_timeslice_w(), |
303 | fifo_runlist_timeslice_timeout_128_f() | | 292 | fifo_runlist_timeslice_timeout_128_f() | |
304 | fifo_runlist_timeslice_timescale_3_f() | | 293 | fifo_runlist_timeslice_timescale_3_f() | |
305 | fifo_runlist_timeslice_enable_true_f()); | 294 | fifo_runlist_timeslice_enable_true_f()); |
306 | 295 | ||
307 | gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(), | 296 | gk20a_mem_wr32(g, mem, ram_fc_pb_timeslice_w(), |
308 | fifo_pb_timeslice_timeout_16_f() | | 297 | fifo_pb_timeslice_timeout_16_f() | |
309 | fifo_pb_timeslice_timescale_0_f() | | 298 | fifo_pb_timeslice_timescale_0_f() | |
310 | fifo_pb_timeslice_enable_true_f()); | 299 | fifo_pb_timeslice_enable_true_f()); |
311 | 300 | ||
312 | gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); | 301 | gk20a_mem_wr32(g, mem, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); |
313 | 302 | ||
314 | return channel_gk20a_commit_userd(c); | 303 | return channel_gk20a_commit_userd(c); |
315 | } | 304 | } |
316 | 305 | ||
317 | static int channel_gk20a_setup_userd(struct channel_gk20a *c) | 306 | static int channel_gk20a_setup_userd(struct channel_gk20a *c) |
318 | { | 307 | { |
319 | BUG_ON(!c->userd_cpu_va); | 308 | struct gk20a *g = c->g; |
309 | struct mem_desc *mem = &g->fifo.userd; | ||
310 | u32 offset = c->hw_chid * g->fifo.userd_entry_size / sizeof(u32); | ||
320 | 311 | ||
321 | gk20a_dbg_fn(""); | 312 | gk20a_dbg_fn(""); |
322 | 313 | ||
323 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0); | 314 | gk20a_mem_wr32(g, mem, offset + ram_userd_put_w(), 0); |
324 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0); | 315 | gk20a_mem_wr32(g, mem, offset + ram_userd_get_w(), 0); |
325 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0); | 316 | gk20a_mem_wr32(g, mem, offset + ram_userd_ref_w(), 0); |
326 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0); | 317 | gk20a_mem_wr32(g, mem, offset + ram_userd_put_hi_w(), 0); |
327 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0); | 318 | gk20a_mem_wr32(g, mem, offset + ram_userd_ref_threshold_w(), 0); |
328 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0); | 319 | gk20a_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_w(), 0); |
329 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0); | 320 | gk20a_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_hi_w(), 0); |
330 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0); | 321 | gk20a_mem_wr32(g, mem, offset + ram_userd_get_hi_w(), 0); |
331 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0); | 322 | gk20a_mem_wr32(g, mem, offset + ram_userd_gp_get_w(), 0); |
332 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0); | 323 | gk20a_mem_wr32(g, mem, offset + ram_userd_gp_put_w(), 0); |
333 | 324 | ||
334 | return 0; | 325 | return 0; |
335 | } | 326 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 8840a3ae..b1355f92 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -130,7 +130,6 @@ struct channel_gk20a { | |||
130 | struct mem_desc inst_block; | 130 | struct mem_desc inst_block; |
131 | struct mem_desc_sub ramfc; | 131 | struct mem_desc_sub ramfc; |
132 | 132 | ||
133 | void *userd_cpu_va; | ||
134 | u64 userd_iova; | 133 | u64 userd_iova; |
135 | u64 userd_gpu_va; | 134 | u64 userd_gpu_va; |
136 | 135 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c index c2285c8a..a3fa2ea5 100644 --- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c | |||
@@ -36,7 +36,7 @@ unsigned int gk20a_debug_trace_cmdbuf; | |||
36 | struct ch_state { | 36 | struct ch_state { |
37 | int pid; | 37 | int pid; |
38 | int refs; | 38 | int refs; |
39 | u8 inst_block[0]; | 39 | u32 inst_block[0]; |
40 | }; | 40 | }; |
41 | 41 | ||
42 | static const char * const ccsr_chan_status_str[] = { | 42 | static const char * const ccsr_chan_status_str[] = { |
@@ -108,15 +108,15 @@ static void gk20a_debug_show_channel(struct gk20a *g, | |||
108 | u32 channel = gk20a_readl(g, ccsr_channel_r(hw_chid)); | 108 | u32 channel = gk20a_readl(g, ccsr_channel_r(hw_chid)); |
109 | u32 status = ccsr_channel_status_v(channel); | 109 | u32 status = ccsr_channel_status_v(channel); |
110 | u32 syncpointa, syncpointb; | 110 | u32 syncpointa, syncpointb; |
111 | void *inst_ptr; | 111 | u32 *inst_mem; |
112 | 112 | ||
113 | if (!ch_state) | 113 | if (!ch_state) |
114 | return; | 114 | return; |
115 | 115 | ||
116 | inst_ptr = &ch_state->inst_block[0]; | 116 | inst_mem = &ch_state->inst_block[0]; |
117 | 117 | ||
118 | syncpointa = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointa_w()); | 118 | syncpointa = inst_mem[ram_fc_syncpointa_w()]; |
119 | syncpointb = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointb_w()); | 119 | syncpointb = inst_mem[ram_fc_syncpointb_w()]; |
120 | 120 | ||
121 | gk20a_debug_output(o, "%d-%s, pid %d, refs: %d: ", hw_chid, | 121 | gk20a_debug_output(o, "%d-%s, pid %d, refs: %d: ", hw_chid, |
122 | dev_name(g->dev), | 122 | dev_name(g->dev), |
@@ -129,23 +129,22 @@ static void gk20a_debug_show_channel(struct gk20a *g, | |||
129 | gk20a_debug_output(o, "TOP: %016llx PUT: %016llx GET: %016llx " | 129 | gk20a_debug_output(o, "TOP: %016llx PUT: %016llx GET: %016llx " |
130 | "FETCH: %016llx\nHEADER: %08x COUNT: %08x\n" | 130 | "FETCH: %016llx\nHEADER: %08x COUNT: %08x\n" |
131 | "SYNCPOINT %08x %08x SEMAPHORE %08x %08x %08x %08x\n", | 131 | "SYNCPOINT %08x %08x SEMAPHORE %08x %08x %08x %08x\n", |
132 | (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_top_level_get_w()) + | 132 | (u64)inst_mem[ram_fc_pb_top_level_get_w()] + |
133 | ((u64)gk20a_mem_rd32(inst_ptr, | 133 | ((u64)inst_mem[ram_fc_pb_top_level_get_hi_w()] << 32ULL), |
134 | ram_fc_pb_top_level_get_hi_w()) << 32ULL), | 134 | (u64)inst_mem[ram_fc_pb_put_w()] + |
135 | (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_put_w()) + | 135 | ((u64)inst_mem[ram_fc_pb_put_hi_w()] << 32ULL), |
136 | ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_put_hi_w()) << 32ULL), | 136 | (u64)inst_mem[ram_fc_pb_get_w()] + |
137 | (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_get_w()) + | 137 | ((u64)inst_mem[ram_fc_pb_get_hi_w()] << 32ULL), |
138 | ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_get_hi_w()) << 32ULL), | 138 | (u64)inst_mem[ram_fc_pb_fetch_w()] + |
139 | (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_fetch_w()) + | 139 | ((u64)inst_mem[ram_fc_pb_fetch_hi_w()] << 32ULL), |
140 | ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_fetch_hi_w()) << 32ULL), | 140 | inst_mem[ram_fc_pb_header_w()], |
141 | gk20a_mem_rd32(inst_ptr, ram_fc_pb_header_w()), | 141 | inst_mem[ram_fc_pb_count_w()], |
142 | gk20a_mem_rd32(inst_ptr, ram_fc_pb_count_w()), | ||
143 | syncpointa, | 142 | syncpointa, |
144 | syncpointb, | 143 | syncpointb, |
145 | gk20a_mem_rd32(inst_ptr, ram_fc_semaphorea_w()), | 144 | inst_mem[ram_fc_semaphorea_w()], |
146 | gk20a_mem_rd32(inst_ptr, ram_fc_semaphoreb_w()), | 145 | inst_mem[ram_fc_semaphoreb_w()], |
147 | gk20a_mem_rd32(inst_ptr, ram_fc_semaphorec_w()), | 146 | inst_mem[ram_fc_semaphorec_w()], |
148 | gk20a_mem_rd32(inst_ptr, ram_fc_semaphored_w())); | 147 | inst_mem[ram_fc_semaphored_w()]); |
149 | 148 | ||
150 | #ifdef CONFIG_TEGRA_GK20A | 149 | #ifdef CONFIG_TEGRA_GK20A |
151 | if ((pbdma_syncpointb_op_v(syncpointb) == pbdma_syncpointb_op_wait_v()) | 150 | if ((pbdma_syncpointb_op_v(syncpointb) == pbdma_syncpointb_op_wait_v()) |
@@ -246,17 +245,15 @@ void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o) | |||
246 | 245 | ||
247 | for (chid = 0; chid < f->num_channels; chid++) { | 246 | for (chid = 0; chid < f->num_channels; chid++) { |
248 | struct channel_gk20a *ch = &f->channel[chid]; | 247 | struct channel_gk20a *ch = &f->channel[chid]; |
249 | if (ch_state[chid]) { | 248 | if (!ch_state[chid]) |
250 | if (ch->inst_block.cpu_va) { | 249 | continue; |
251 | ch_state[chid]->pid = ch->pid; | 250 | |
252 | ch_state[chid]->refs = | 251 | ch_state[chid]->pid = ch->pid; |
253 | atomic_read(&ch->ref_count); | 252 | ch_state[chid]->refs = atomic_read(&ch->ref_count); |
254 | memcpy(&ch_state[chid]->inst_block[0], | 253 | gk20a_mem_rd_n(g, &ch->inst_block, 0, |
255 | ch->inst_block.cpu_va, | 254 | &ch_state[chid]->inst_block[0], |
256 | ram_in_alloc_size_v()); | 255 | ram_in_alloc_size_v()); |
257 | } | 256 | gk20a_channel_put(ch); |
258 | gk20a_channel_put(ch); | ||
259 | } | ||
260 | } | 257 | } |
261 | for (chid = 0; chid < f->num_channels; chid++) { | 258 | for (chid = 0; chid < f->num_channels; chid++) { |
262 | if (ch_state[chid]) { | 259 | if (ch_state[chid]) { |
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index f9cddc41..edddcdc1 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | |||
@@ -619,7 +619,7 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g, | |||
619 | phys_addr_t pa; | 619 | phys_addr_t pa; |
620 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | 620 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; |
621 | struct gk20a_fecs_trace *trace = g->fecs_trace; | 621 | struct gk20a_fecs_trace *trace = g->fecs_trace; |
622 | void *ctx_ptr; | 622 | struct mem_desc *mem = &ch_ctx->gr_ctx->mem; |
623 | u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch); | 623 | u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch); |
624 | 624 | ||
625 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, | 625 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, |
@@ -634,10 +634,7 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g, | |||
634 | if (!pa) | 634 | if (!pa) |
635 | return -ENOMEM; | 635 | return -ENOMEM; |
636 | 636 | ||
637 | ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, | 637 | if (gk20a_mem_begin(g, mem)) |
638 | PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, 0, | ||
639 | pgprot_writecombine(PAGE_KERNEL)); | ||
640 | if (!ctx_ptr) | ||
641 | return -ENOMEM; | 638 | return -ENOMEM; |
642 | 639 | ||
643 | lo = u64_lo32(pa); | 640 | lo = u64_lo32(pa); |
@@ -646,18 +643,18 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g, | |||
646 | gk20a_dbg(gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi, | 643 | gk20a_dbg(gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi, |
647 | lo, GK20A_FECS_TRACE_NUM_RECORDS); | 644 | lo, GK20A_FECS_TRACE_NUM_RECORDS); |
648 | 645 | ||
649 | gk20a_mem_wr32(ctx_ptr | 646 | gk20a_mem_wr(g, mem, |
650 | + ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), | 647 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), |
651 | 0, lo); | 648 | lo); |
652 | gk20a_mem_wr32(ctx_ptr | 649 | gk20a_mem_wr(g, mem, |
653 | + ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), | 650 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), |
654 | 0, ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi)); | 651 | ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi)); |
655 | gk20a_mem_wr32(ctx_ptr | 652 | gk20a_mem_wr(g, mem, |
656 | + ctxsw_prog_main_image_context_timestamp_buffer_control_o(), | 653 | ctxsw_prog_main_image_context_timestamp_buffer_control_o(), |
657 | 0, ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f( | 654 | ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f( |
658 | GK20A_FECS_TRACE_NUM_RECORDS)); | 655 | GK20A_FECS_TRACE_NUM_RECORDS)); |
659 | 656 | ||
660 | vunmap(ctx_ptr); | 657 | gk20a_mem_end(g, mem); |
661 | gk20a_fecs_trace_hash_add(g, context_ptr, ch->pid); | 658 | gk20a_fecs_trace_hash_add(g, context_ptr, ch->pid); |
662 | 659 | ||
663 | return 0; | 660 | return 0; |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index dc3debf2..71400331 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -520,8 +520,6 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) | |||
520 | mutex_init(&f->free_chs_mutex); | 520 | mutex_init(&f->free_chs_mutex); |
521 | 521 | ||
522 | for (chid = 0; chid < f->num_channels; chid++) { | 522 | for (chid = 0; chid < f->num_channels; chid++) { |
523 | f->channel[chid].userd_cpu_va = | ||
524 | f->userd.cpu_va + chid * f->userd_entry_size; | ||
525 | f->channel[chid].userd_iova = | 523 | f->channel[chid].userd_iova = |
526 | g->ops.mm.get_iova_addr(g, f->userd.sgt->sgl, 0) | 524 | g->ops.mm.get_iova_addr(g, f->userd.sgt->sgl, 0) |
527 | + chid * f->userd_entry_size; | 525 | + chid * f->userd_entry_size; |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index f228cce4..2f85bf96 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -201,7 +201,7 @@ struct gpu_ops { | |||
201 | struct gr_ctx_desc *gr_ctx); | 201 | struct gr_ctx_desc *gr_ctx); |
202 | void (*update_ctxsw_preemption_mode)(struct gk20a *g, | 202 | void (*update_ctxsw_preemption_mode)(struct gk20a *g, |
203 | struct channel_ctx_gk20a *ch_ctx, | 203 | struct channel_ctx_gk20a *ch_ctx, |
204 | void *ctx_ptr); | 204 | struct mem_desc *mem); |
205 | int (*update_smpc_ctxsw_mode)(struct gk20a *g, | 205 | int (*update_smpc_ctxsw_mode)(struct gk20a *g, |
206 | struct channel_gk20a *c, | 206 | struct channel_gk20a *c, |
207 | bool enable); | 207 | bool enable); |
@@ -221,7 +221,8 @@ struct gpu_ops { | |||
221 | int (*wait_empty)(struct gk20a *g, unsigned long end_jiffies, | 221 | int (*wait_empty)(struct gk20a *g, unsigned long end_jiffies, |
222 | u32 expect_delay); | 222 | u32 expect_delay); |
223 | void (*init_cyclestats)(struct gk20a *g); | 223 | void (*init_cyclestats)(struct gk20a *g); |
224 | void (*enable_cde_in_fecs)(void *ctx_ptr); | 224 | void (*enable_cde_in_fecs)(struct gk20a *g, |
225 | struct mem_desc *mem); | ||
225 | int (*set_sm_debug_mode)(struct gk20a *g, struct channel_gk20a *ch, | 226 | int (*set_sm_debug_mode)(struct gk20a *g, struct channel_gk20a *ch, |
226 | u64 sms, bool enable); | 227 | u64 sms, bool enable); |
227 | void (*bpt_reg_info)(struct gk20a *g, | 228 | void (*bpt_reg_info)(struct gk20a *g, |
@@ -484,7 +485,7 @@ struct gpu_ops { | |||
484 | void (*cbc_clean)(struct gk20a *g); | 485 | void (*cbc_clean)(struct gk20a *g); |
485 | void (*tlb_invalidate)(struct vm_gk20a *vm); | 486 | void (*tlb_invalidate)(struct vm_gk20a *vm); |
486 | void (*set_big_page_size)(struct gk20a *g, | 487 | void (*set_big_page_size)(struct gk20a *g, |
487 | void *inst_ptr, int size); | 488 | struct mem_desc *mem, int size); |
488 | u32 (*get_big_page_sizes)(void); | 489 | u32 (*get_big_page_sizes)(void); |
489 | u32 (*get_physical_addr_bits)(struct gk20a *g); | 490 | u32 (*get_physical_addr_bits)(struct gk20a *g); |
490 | int (*init_mm_setup_hw)(struct gk20a *g); | 491 | int (*init_mm_setup_hw)(struct gk20a *g); |
@@ -493,7 +494,8 @@ struct gpu_ops { | |||
493 | void (*remove_bar2_vm)(struct gk20a *g); | 494 | void (*remove_bar2_vm)(struct gk20a *g); |
494 | const struct gk20a_mmu_level * | 495 | const struct gk20a_mmu_level * |
495 | (*get_mmu_levels)(struct gk20a *g, u32 big_page_size); | 496 | (*get_mmu_levels)(struct gk20a *g, u32 big_page_size); |
496 | void (*init_pdb)(struct gk20a *g, void *inst_ptr, u64 pdb_addr); | 497 | void (*init_pdb)(struct gk20a *g, struct mem_desc *mem, |
498 | u64 pdb_addr); | ||
497 | u64 (*get_iova_addr)(struct gk20a *g, struct scatterlist *sgl, | 499 | u64 (*get_iova_addr)(struct gk20a *g, struct scatterlist *sgl, |
498 | u32 flags); | 500 | u32 flags); |
499 | int (*bar1_bind)(struct gk20a *g, u64 bar1_iova); | 501 | int (*bar1_bind)(struct gk20a *g, u64 bar1_iova); |
@@ -859,53 +861,6 @@ do { \ | |||
859 | #define gk20a_dbg_info(fmt, arg...) \ | 861 | #define gk20a_dbg_info(fmt, arg...) \ |
860 | gk20a_dbg(gpu_dbg_info, fmt, ##arg) | 862 | gk20a_dbg(gpu_dbg_info, fmt, ##arg) |
861 | 863 | ||
862 | /* mem access with dbg_mem logging */ | ||
863 | static inline u8 gk20a_mem_rd08(void *ptr, int b) | ||
864 | { | ||
865 | u8 _b = ((const u8 *)ptr)[b]; | ||
866 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
867 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u8)*b, _b); | ||
868 | #endif | ||
869 | return _b; | ||
870 | } | ||
871 | static inline u16 gk20a_mem_rd16(void *ptr, int s) | ||
872 | { | ||
873 | u16 _s = ((const u16 *)ptr)[s]; | ||
874 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
875 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u16)*s, _s); | ||
876 | #endif | ||
877 | return _s; | ||
878 | } | ||
879 | static inline u32 gk20a_mem_rd32(void *ptr, int w) | ||
880 | { | ||
881 | u32 _w = ((const u32 *)ptr)[w]; | ||
882 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
883 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + sizeof(u32)*w, _w); | ||
884 | #endif | ||
885 | return _w; | ||
886 | } | ||
887 | static inline void gk20a_mem_wr08(void *ptr, int b, u8 data) | ||
888 | { | ||
889 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
890 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u8)*b, data); | ||
891 | #endif | ||
892 | ((u8 *)ptr)[b] = data; | ||
893 | } | ||
894 | static inline void gk20a_mem_wr16(void *ptr, int s, u16 data) | ||
895 | { | ||
896 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
897 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u16)*s, data); | ||
898 | #endif | ||
899 | ((u16 *)ptr)[s] = data; | ||
900 | } | ||
901 | static inline void gk20a_mem_wr32(void *ptr, int w, u32 data) | ||
902 | { | ||
903 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
904 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u32)*w, data); | ||
905 | #endif | ||
906 | ((u32 *)ptr)[w] = data; | ||
907 | } | ||
908 | |||
909 | void gk20a_init_clk_ops(struct gpu_ops *gops); | 864 | void gk20a_init_clk_ops(struct gpu_ops *gops); |
910 | 865 | ||
911 | /* register accessors */ | 866 | /* register accessors */ |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 4e7c36ee..e7e6662a 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -97,22 +97,18 @@ int gr_gk20a_get_ctx_id(struct gk20a *g, | |||
97 | u32 *ctx_id) | 97 | u32 *ctx_id) |
98 | { | 98 | { |
99 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 99 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; |
100 | void *ctx_ptr = NULL; | ||
101 | 100 | ||
102 | /* Channel gr_ctx buffer is gpu cacheable. | 101 | /* Channel gr_ctx buffer is gpu cacheable. |
103 | Flush and invalidate before cpu update. */ | 102 | Flush and invalidate before cpu update. */ |
104 | g->ops.mm.l2_flush(g, true); | 103 | g->ops.mm.l2_flush(g, true); |
105 | 104 | ||
106 | ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, | 105 | if (gk20a_mem_begin(g, &ch_ctx->gr_ctx->mem)) |
107 | PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, | ||
108 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
109 | if (!ctx_ptr) | ||
110 | return -ENOMEM; | 106 | return -ENOMEM; |
111 | 107 | ||
112 | *ctx_id = gk20a_mem_rd32(ctx_ptr + | 108 | *ctx_id = gk20a_mem_rd(g, &ch_ctx->gr_ctx->mem, |
113 | ctxsw_prog_main_image_context_id_o(), 0); | 109 | ctxsw_prog_main_image_context_id_o()); |
114 | 110 | ||
115 | vunmap(ctx_ptr); | 111 | gk20a_mem_end(g, &ch_ctx->gr_ctx->mem); |
116 | 112 | ||
117 | return 0; | 113 | return 0; |
118 | } | 114 | } |
@@ -619,22 +615,17 @@ static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va) | |||
619 | { | 615 | { |
620 | u32 addr_lo; | 616 | u32 addr_lo; |
621 | u32 addr_hi; | 617 | u32 addr_hi; |
622 | void *inst_ptr = NULL; | ||
623 | 618 | ||
624 | gk20a_dbg_fn(""); | 619 | gk20a_dbg_fn(""); |
625 | 620 | ||
626 | inst_ptr = c->inst_block.cpu_va; | ||
627 | if (!inst_ptr) | ||
628 | return -ENOMEM; | ||
629 | |||
630 | addr_lo = u64_lo32(gpu_va) >> 12; | 621 | addr_lo = u64_lo32(gpu_va) >> 12; |
631 | addr_hi = u64_hi32(gpu_va); | 622 | addr_hi = u64_hi32(gpu_va); |
632 | 623 | ||
633 | gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_target_w(), | 624 | gk20a_mem_wr32(c->g, &c->inst_block, ram_in_gr_wfi_target_w(), |
634 | ram_in_gr_cs_wfi_f() | ram_in_gr_wfi_mode_virtual_f() | | 625 | ram_in_gr_cs_wfi_f() | ram_in_gr_wfi_mode_virtual_f() | |
635 | ram_in_gr_wfi_ptr_lo_f(addr_lo)); | 626 | ram_in_gr_wfi_ptr_lo_f(addr_lo)); |
636 | 627 | ||
637 | gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_ptr_hi_w(), | 628 | gk20a_mem_wr32(c->g, &c->inst_block, ram_in_gr_wfi_ptr_hi_w(), |
638 | ram_in_gr_wfi_ptr_hi_f(addr_hi)); | 629 | ram_in_gr_wfi_ptr_hi_f(addr_hi)); |
639 | 630 | ||
640 | return 0; | 631 | return 0; |
@@ -658,11 +649,7 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, | |||
658 | return -EBUSY; | 649 | return -EBUSY; |
659 | } | 650 | } |
660 | 651 | ||
661 | ch_ctx->patch_ctx.mem.cpu_va = vmap(ch_ctx->patch_ctx.mem.pages, | 652 | if (gk20a_mem_begin(g, &ch_ctx->patch_ctx.mem)) |
662 | PAGE_ALIGN(ch_ctx->patch_ctx.mem.size) >> PAGE_SHIFT, | ||
663 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
664 | |||
665 | if (!ch_ctx->patch_ctx.mem.cpu_va) | ||
666 | return -ENOMEM; | 653 | return -ENOMEM; |
667 | 654 | ||
668 | return 0; | 655 | return 0; |
@@ -677,8 +664,7 @@ int gr_gk20a_ctx_patch_write_end(struct gk20a *g, | |||
677 | return -EINVAL; | 664 | return -EINVAL; |
678 | } | 665 | } |
679 | 666 | ||
680 | vunmap(ch_ctx->patch_ctx.mem.cpu_va); | 667 | gk20a_mem_end(g, &ch_ctx->patch_ctx.mem); |
681 | ch_ctx->patch_ctx.mem.cpu_va = NULL; | ||
682 | return 0; | 668 | return 0; |
683 | } | 669 | } |
684 | 670 | ||
@@ -687,7 +673,6 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g, | |||
687 | u32 addr, u32 data, bool patch) | 673 | u32 addr, u32 data, bool patch) |
688 | { | 674 | { |
689 | u32 patch_slot = 0; | 675 | u32 patch_slot = 0; |
690 | void *patch_ptr = NULL; | ||
691 | bool mapped_here = false; | 676 | bool mapped_here = false; |
692 | 677 | ||
693 | BUG_ON(patch != 0 && ch_ctx == NULL); | 678 | BUG_ON(patch != 0 && ch_ctx == NULL); |
@@ -708,11 +693,10 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g, | |||
708 | } else | 693 | } else |
709 | mapped_here = false; | 694 | mapped_here = false; |
710 | 695 | ||
711 | patch_ptr = ch_ctx->patch_ctx.mem.cpu_va; | ||
712 | patch_slot = ch_ctx->patch_ctx.data_count * 2; | 696 | patch_slot = ch_ctx->patch_ctx.data_count * 2; |
713 | 697 | ||
714 | gk20a_mem_wr32(patch_ptr, patch_slot++, addr); | 698 | gk20a_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot++, addr); |
715 | gk20a_mem_wr32(patch_ptr, patch_slot++, data); | 699 | gk20a_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot++, data); |
716 | 700 | ||
717 | ch_ctx->patch_ctx.data_count++; | 701 | ch_ctx->patch_ctx.data_count++; |
718 | 702 | ||
@@ -760,16 +744,13 @@ static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, | |||
760 | static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) | 744 | static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) |
761 | { | 745 | { |
762 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 746 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; |
747 | struct mem_desc *mem = &ch_ctx->gr_ctx->mem; | ||
763 | u32 va_lo, va_hi, va; | 748 | u32 va_lo, va_hi, va; |
764 | int ret = 0; | 749 | int ret = 0; |
765 | void *ctx_ptr = NULL; | ||
766 | 750 | ||
767 | gk20a_dbg_fn(""); | 751 | gk20a_dbg_fn(""); |
768 | 752 | ||
769 | ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, | 753 | if (gk20a_mem_begin(g, mem)) |
770 | PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, | ||
771 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
772 | if (!ctx_ptr) | ||
773 | return -ENOMEM; | 754 | return -ENOMEM; |
774 | 755 | ||
775 | if (ch_ctx->zcull_ctx.gpu_va == 0 && | 756 | if (ch_ctx->zcull_ctx.gpu_va == 0 && |
@@ -792,15 +773,17 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) | |||
792 | goto clean_up; | 773 | goto clean_up; |
793 | } | 774 | } |
794 | 775 | ||
795 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0, | 776 | gk20a_mem_wr(g, mem, |
777 | ctxsw_prog_main_image_zcull_o(), | ||
796 | ch_ctx->zcull_ctx.ctx_sw_mode); | 778 | ch_ctx->zcull_ctx.ctx_sw_mode); |
797 | 779 | ||
798 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_ptr_o(), 0, va); | 780 | gk20a_mem_wr(g, mem, |
781 | ctxsw_prog_main_image_zcull_ptr_o(), va); | ||
799 | 782 | ||
800 | c->g->ops.fifo.enable_channel(c); | 783 | c->g->ops.fifo.enable_channel(c); |
801 | 784 | ||
802 | clean_up: | 785 | clean_up: |
803 | vunmap(ctx_ptr); | 786 | gk20a_mem_end(g, mem); |
804 | 787 | ||
805 | return ret; | 788 | return ret; |
806 | } | 789 | } |
@@ -1500,8 +1483,8 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | |||
1500 | u32 ctx_header_words; | 1483 | u32 ctx_header_words; |
1501 | u32 i; | 1484 | u32 i; |
1502 | u32 data; | 1485 | u32 data; |
1503 | void *ctx_ptr = NULL; | 1486 | struct mem_desc *gold_mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem; |
1504 | void *gold_ptr = NULL; | 1487 | struct mem_desc *gr_mem = &ch_ctx->gr_ctx->mem; |
1505 | u32 err = 0; | 1488 | u32 err = 0; |
1506 | 1489 | ||
1507 | gk20a_dbg_fn(""); | 1490 | gk20a_dbg_fn(""); |
@@ -1527,16 +1510,10 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | |||
1527 | if (err) | 1510 | if (err) |
1528 | goto clean_up; | 1511 | goto clean_up; |
1529 | 1512 | ||
1530 | gold_ptr = vmap(gr->global_ctx_buffer[GOLDEN_CTX].mem.pages, | 1513 | if (gk20a_mem_begin(g, gold_mem)) |
1531 | PAGE_ALIGN(gr->global_ctx_buffer[GOLDEN_CTX].mem.size) >> | ||
1532 | PAGE_SHIFT, 0, pgprot_writecombine(PAGE_KERNEL)); | ||
1533 | if (!gold_ptr) | ||
1534 | goto clean_up; | 1514 | goto clean_up; |
1535 | 1515 | ||
1536 | ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, | 1516 | if (gk20a_mem_begin(g, gr_mem)) |
1537 | PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, | ||
1538 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
1539 | if (!ctx_ptr) | ||
1540 | goto clean_up; | 1517 | goto clean_up; |
1541 | 1518 | ||
1542 | ctx_header_words = roundup(ctx_header_bytes, sizeof(u32)); | 1519 | ctx_header_words = roundup(ctx_header_bytes, sizeof(u32)); |
@@ -1545,14 +1522,14 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | |||
1545 | g->ops.mm.l2_flush(g, true); | 1522 | g->ops.mm.l2_flush(g, true); |
1546 | 1523 | ||
1547 | for (i = 0; i < ctx_header_words; i++) { | 1524 | for (i = 0; i < ctx_header_words; i++) { |
1548 | data = gk20a_mem_rd32(ctx_ptr, i); | 1525 | data = gk20a_mem_rd32(g, gr_mem, i); |
1549 | gk20a_mem_wr32(gold_ptr, i, data); | 1526 | gk20a_mem_wr32(g, gold_mem, i, data); |
1550 | } | 1527 | } |
1551 | 1528 | ||
1552 | gk20a_mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_o(), 0, | 1529 | gk20a_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_o(), |
1553 | ctxsw_prog_main_image_zcull_mode_no_ctxsw_v()); | 1530 | ctxsw_prog_main_image_zcull_mode_no_ctxsw_v()); |
1554 | 1531 | ||
1555 | gk20a_mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_ptr_o(), 0, 0); | 1532 | gk20a_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_ptr_o(), 0); |
1556 | 1533 | ||
1557 | gr_gk20a_commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); | 1534 | gr_gk20a_commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); |
1558 | 1535 | ||
@@ -1568,12 +1545,12 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | |||
1568 | goto clean_up; | 1545 | goto clean_up; |
1569 | } | 1546 | } |
1570 | 1547 | ||
1571 | for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++) | 1548 | gk20a_mem_rd_n(g, gold_mem, 0, |
1572 | gr->ctx_vars.local_golden_image[i] = | 1549 | gr->ctx_vars.local_golden_image, |
1573 | gk20a_mem_rd32(gold_ptr, i); | 1550 | gr->ctx_vars.golden_image_size); |
1574 | } | 1551 | } |
1575 | 1552 | ||
1576 | gr_gk20a_commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); | 1553 | gr_gk20a_commit_inst(c, gr_mem->gpu_va); |
1577 | 1554 | ||
1578 | gr->ctx_vars.golden_image_initialized = true; | 1555 | gr->ctx_vars.golden_image_initialized = true; |
1579 | 1556 | ||
@@ -1586,10 +1563,8 @@ clean_up: | |||
1586 | else | 1563 | else |
1587 | gk20a_dbg_fn("done"); | 1564 | gk20a_dbg_fn("done"); |
1588 | 1565 | ||
1589 | if (gold_ptr) | 1566 | gk20a_mem_end(g, gold_mem); |
1590 | vunmap(gold_ptr); | 1567 | gk20a_mem_end(g, gr_mem); |
1591 | if (ctx_ptr) | ||
1592 | vunmap(ctx_ptr); | ||
1593 | 1568 | ||
1594 | mutex_unlock(&gr->ctx_mutex); | 1569 | mutex_unlock(&gr->ctx_mutex); |
1595 | return err; | 1570 | return err; |
@@ -1600,7 +1575,7 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, | |||
1600 | bool enable_smpc_ctxsw) | 1575 | bool enable_smpc_ctxsw) |
1601 | { | 1576 | { |
1602 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 1577 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; |
1603 | void *ctx_ptr = NULL; | 1578 | struct mem_desc *mem; |
1604 | u32 data; | 1579 | u32 data; |
1605 | int ret; | 1580 | int ret; |
1606 | 1581 | ||
@@ -1611,46 +1586,39 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, | |||
1611 | return -EFAULT; | 1586 | return -EFAULT; |
1612 | } | 1587 | } |
1613 | 1588 | ||
1589 | mem = &ch_ctx->gr_ctx->mem; | ||
1590 | |||
1614 | c->g->ops.fifo.disable_channel(c); | 1591 | c->g->ops.fifo.disable_channel(c); |
1615 | ret = c->g->ops.fifo.preempt_channel(c->g, c->hw_chid); | 1592 | ret = c->g->ops.fifo.preempt_channel(c->g, c->hw_chid); |
1616 | if (ret) { | 1593 | if (ret) { |
1617 | c->g->ops.fifo.enable_channel(c); | 1594 | gk20a_err(dev_from_gk20a(g), "failed to preempt channel"); |
1618 | gk20a_err(dev_from_gk20a(g), | 1595 | goto out; |
1619 | "failed to preempt channel\n"); | ||
1620 | return ret; | ||
1621 | } | 1596 | } |
1622 | 1597 | ||
1623 | /* Channel gr_ctx buffer is gpu cacheable. | 1598 | /* Channel gr_ctx buffer is gpu cacheable. |
1624 | Flush and invalidate before cpu update. */ | 1599 | Flush and invalidate before cpu update. */ |
1625 | g->ops.mm.l2_flush(g, true); | 1600 | g->ops.mm.l2_flush(g, true); |
1626 | 1601 | ||
1627 | if (!ch_ctx->gr_ctx) { | 1602 | if (gk20a_mem_begin(g, mem)) { |
1628 | gk20a_err(dev_from_gk20a(g), "no graphics context allocated"); | 1603 | ret = -ENOMEM; |
1629 | return -EFAULT; | 1604 | goto out; |
1630 | } | ||
1631 | |||
1632 | ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, | ||
1633 | PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, | ||
1634 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
1635 | if (!ctx_ptr) { | ||
1636 | c->g->ops.fifo.enable_channel(c); | ||
1637 | return -ENOMEM; | ||
1638 | } | 1605 | } |
1639 | 1606 | ||
1640 | data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); | 1607 | data = gk20a_mem_rd(g, mem, |
1608 | ctxsw_prog_main_image_pm_o()); | ||
1641 | data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m(); | 1609 | data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m(); |
1642 | data |= enable_smpc_ctxsw ? | 1610 | data |= enable_smpc_ctxsw ? |
1643 | ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() : | 1611 | ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() : |
1644 | ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(); | 1612 | ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(); |
1645 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, | 1613 | gk20a_mem_wr(g, mem, |
1646 | data); | 1614 | ctxsw_prog_main_image_pm_o(), |
1615 | data); | ||
1647 | 1616 | ||
1648 | vunmap(ctx_ptr); | 1617 | gk20a_mem_end(g, mem); |
1649 | 1618 | ||
1650 | /* enable channel */ | 1619 | out: |
1651 | c->g->ops.fifo.enable_channel(c); | 1620 | c->g->ops.fifo.enable_channel(c); |
1652 | 1621 | return ret; | |
1653 | return 0; | ||
1654 | } | 1622 | } |
1655 | 1623 | ||
1656 | int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, | 1624 | int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, |
@@ -1659,8 +1627,7 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, | |||
1659 | { | 1627 | { |
1660 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 1628 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; |
1661 | struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; | 1629 | struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; |
1662 | void *ctx_ptr = NULL; | 1630 | struct mem_desc *gr_mem; |
1663 | void *pm_ctx_ptr; | ||
1664 | u32 data, virt_addr; | 1631 | u32 data, virt_addr; |
1665 | int ret; | 1632 | int ret; |
1666 | 1633 | ||
@@ -1671,6 +1638,8 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, | |||
1671 | return -EFAULT; | 1638 | return -EFAULT; |
1672 | } | 1639 | } |
1673 | 1640 | ||
1641 | gr_mem = &ch_ctx->gr_ctx->mem; | ||
1642 | |||
1674 | if (enable_hwpm_ctxsw) { | 1643 | if (enable_hwpm_ctxsw) { |
1675 | if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) | 1644 | if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) |
1676 | return 0; | 1645 | return 0; |
@@ -1721,29 +1690,22 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, | |||
1721 | } | 1690 | } |
1722 | 1691 | ||
1723 | /* Now clear the buffer */ | 1692 | /* Now clear the buffer */ |
1724 | pm_ctx_ptr = vmap(pm_ctx->mem.pages, | 1693 | if (gk20a_mem_begin(g, &pm_ctx->mem)) { |
1725 | PAGE_ALIGN(pm_ctx->mem.size) >> PAGE_SHIFT, | ||
1726 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
1727 | |||
1728 | if (!pm_ctx_ptr) { | ||
1729 | ret = -ENOMEM; | 1694 | ret = -ENOMEM; |
1730 | goto cleanup_pm_buf; | 1695 | goto cleanup_pm_buf; |
1731 | } | 1696 | } |
1732 | 1697 | ||
1733 | memset(pm_ctx_ptr, 0, pm_ctx->mem.size); | 1698 | gk20a_memset(g, &pm_ctx->mem, 0, 0, pm_ctx->mem.size); |
1734 | 1699 | ||
1735 | vunmap(pm_ctx_ptr); | 1700 | gk20a_mem_end(g, &pm_ctx->mem); |
1736 | } | 1701 | } |
1737 | 1702 | ||
1738 | ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, | 1703 | if (gk20a_mem_begin(g, gr_mem)) { |
1739 | PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, | ||
1740 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
1741 | if (!ctx_ptr) { | ||
1742 | ret = -ENOMEM; | 1704 | ret = -ENOMEM; |
1743 | goto cleanup_pm_buf; | 1705 | goto cleanup_pm_buf; |
1744 | } | 1706 | } |
1745 | 1707 | ||
1746 | data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); | 1708 | data = gk20a_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o()); |
1747 | data = data & ~ctxsw_prog_main_image_pm_mode_m(); | 1709 | data = data & ~ctxsw_prog_main_image_pm_mode_m(); |
1748 | 1710 | ||
1749 | if (enable_hwpm_ctxsw) { | 1711 | if (enable_hwpm_ctxsw) { |
@@ -1760,10 +1722,10 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, | |||
1760 | 1722 | ||
1761 | data |= pm_ctx->pm_mode; | 1723 | data |= pm_ctx->pm_mode; |
1762 | 1724 | ||
1763 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, data); | 1725 | gk20a_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_o(), data); |
1764 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, virt_addr); | 1726 | gk20a_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_ptr_o(), virt_addr); |
1765 | 1727 | ||
1766 | vunmap(ctx_ptr); | 1728 | gk20a_mem_end(g, gr_mem); |
1767 | 1729 | ||
1768 | /* enable channel */ | 1730 | /* enable channel */ |
1769 | c->g->ops.fifo.enable_channel(c); | 1731 | c->g->ops.fifo.enable_channel(c); |
@@ -1788,9 +1750,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1788 | u32 virt_addr_lo; | 1750 | u32 virt_addr_lo; |
1789 | u32 virt_addr_hi; | 1751 | u32 virt_addr_hi; |
1790 | u32 virt_addr = 0; | 1752 | u32 virt_addr = 0; |
1791 | u32 i, v, data; | 1753 | u32 v, data; |
1792 | int ret = 0; | 1754 | int ret = 0; |
1793 | void *ctx_ptr = NULL; | 1755 | struct mem_desc *mem = &ch_ctx->gr_ctx->mem; |
1794 | 1756 | ||
1795 | gk20a_dbg_fn(""); | 1757 | gk20a_dbg_fn(""); |
1796 | 1758 | ||
@@ -1801,20 +1763,18 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1801 | Flush and invalidate before cpu update. */ | 1763 | Flush and invalidate before cpu update. */ |
1802 | g->ops.mm.l2_flush(g, true); | 1764 | g->ops.mm.l2_flush(g, true); |
1803 | 1765 | ||
1804 | ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, | 1766 | if (gk20a_mem_begin(g, mem)) |
1805 | PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, | ||
1806 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
1807 | if (!ctx_ptr) | ||
1808 | return -ENOMEM; | 1767 | return -ENOMEM; |
1809 | 1768 | ||
1810 | for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++) | 1769 | gk20a_mem_wr_n(g, mem, 0, |
1811 | gk20a_mem_wr32(ctx_ptr, i, gr->ctx_vars.local_golden_image[i]); | 1770 | gr->ctx_vars.local_golden_image, |
1771 | gr->ctx_vars.golden_image_size); | ||
1812 | 1772 | ||
1813 | if (g->ops.gr.enable_cde_in_fecs && c->cde) | 1773 | if (g->ops.gr.enable_cde_in_fecs && c->cde) |
1814 | g->ops.gr.enable_cde_in_fecs(ctx_ptr); | 1774 | g->ops.gr.enable_cde_in_fecs(g, mem); |
1815 | 1775 | ||
1816 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0); | 1776 | gk20a_mem_wr(g, mem, ctxsw_prog_main_image_num_save_ops_o(), 0); |
1817 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0); | 1777 | gk20a_mem_wr(g, mem, ctxsw_prog_main_image_num_restore_ops_o(), 0); |
1818 | 1778 | ||
1819 | /* set priv access map */ | 1779 | /* set priv access map */ |
1820 | virt_addr_lo = | 1780 | virt_addr_lo = |
@@ -1827,29 +1787,29 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1827 | else | 1787 | else |
1828 | data = ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f(); | 1788 | data = ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f(); |
1829 | 1789 | ||
1830 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_config_o(), 0, | 1790 | gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(), |
1831 | data); | 1791 | data); |
1832 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_addr_lo_o(), 0, | 1792 | gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_addr_lo_o(), |
1833 | virt_addr_lo); | 1793 | virt_addr_lo); |
1834 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_addr_hi_o(), 0, | 1794 | gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_addr_hi_o(), |
1835 | virt_addr_hi); | 1795 | virt_addr_hi); |
1836 | /* disable verif features */ | 1796 | /* disable verif features */ |
1837 | v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0); | 1797 | v = gk20a_mem_rd(g, mem, ctxsw_prog_main_image_misc_options_o()); |
1838 | v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m()); | 1798 | v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m()); |
1839 | v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f(); | 1799 | v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f(); |
1840 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0, v); | 1800 | gk20a_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v); |
1841 | 1801 | ||
1842 | if (g->ops.gr.update_ctxsw_preemption_mode) | 1802 | if (g->ops.gr.update_ctxsw_preemption_mode) |
1843 | g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, ctx_ptr); | 1803 | g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, mem); |
1844 | 1804 | ||
1845 | virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); | 1805 | virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); |
1846 | virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); | 1806 | virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); |
1847 | 1807 | ||
1848 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_count_o(), 0, | 1808 | gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(), |
1849 | ch_ctx->patch_ctx.data_count); | 1809 | ch_ctx->patch_ctx.data_count); |
1850 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_lo_o(), 0, | 1810 | gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_lo_o(), |
1851 | virt_addr_lo); | 1811 | virt_addr_lo); |
1852 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_hi_o(), 0, | 1812 | gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_hi_o(), |
1853 | virt_addr_hi); | 1813 | virt_addr_hi); |
1854 | 1814 | ||
1855 | /* Update main header region of the context buffer with the info needed | 1815 | /* Update main header region of the context buffer with the info needed |
@@ -1860,7 +1820,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1860 | if (ch_ctx->pm_ctx.mem.gpu_va == 0) { | 1820 | if (ch_ctx->pm_ctx.mem.gpu_va == 0) { |
1861 | gk20a_err(dev_from_gk20a(g), | 1821 | gk20a_err(dev_from_gk20a(g), |
1862 | "context switched pm with no pm buffer!"); | 1822 | "context switched pm with no pm buffer!"); |
1863 | vunmap(ctx_ptr); | 1823 | gk20a_mem_end(g, mem); |
1864 | return -EFAULT; | 1824 | return -EFAULT; |
1865 | } | 1825 | } |
1866 | 1826 | ||
@@ -1871,14 +1831,14 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1871 | } else | 1831 | } else |
1872 | virt_addr = 0; | 1832 | virt_addr = 0; |
1873 | 1833 | ||
1874 | data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); | 1834 | data = gk20a_mem_rd(g, mem, ctxsw_prog_main_image_pm_o()); |
1875 | data = data & ~ctxsw_prog_main_image_pm_mode_m(); | 1835 | data = data & ~ctxsw_prog_main_image_pm_mode_m(); |
1876 | data |= ch_ctx->pm_ctx.pm_mode; | 1836 | data |= ch_ctx->pm_ctx.pm_mode; |
1877 | 1837 | ||
1878 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, data); | 1838 | gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data); |
1879 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, virt_addr); | 1839 | gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_ptr_o(), virt_addr); |
1880 | 1840 | ||
1881 | vunmap(ctx_ptr); | 1841 | gk20a_mem_end(g, mem); |
1882 | 1842 | ||
1883 | if (tegra_platform_is_linsim()) { | 1843 | if (tegra_platform_is_linsim()) { |
1884 | u32 inst_base_ptr = | 1844 | u32 inst_base_ptr = |
@@ -1978,16 +1938,20 @@ static void gr_gk20a_init_ctxsw_ucode_segments( | |||
1978 | } | 1938 | } |
1979 | 1939 | ||
1980 | static int gr_gk20a_copy_ctxsw_ucode_segments( | 1940 | static int gr_gk20a_copy_ctxsw_ucode_segments( |
1981 | u8 *buf, | 1941 | struct gk20a *g, |
1942 | struct mem_desc *dst, | ||
1982 | struct gk20a_ctxsw_ucode_segments *segments, | 1943 | struct gk20a_ctxsw_ucode_segments *segments, |
1983 | u32 *bootimage, | 1944 | u32 *bootimage, |
1984 | u32 *code, u32 *data) | 1945 | u32 *code, u32 *data) |
1985 | { | 1946 | { |
1986 | int i; | 1947 | int i; |
1987 | 1948 | ||
1988 | memcpy(buf + segments->boot.offset, bootimage, segments->boot.size); | 1949 | gk20a_mem_wr_n(g, dst, segments->boot.offset, bootimage, |
1989 | memcpy(buf + segments->code.offset, code, segments->code.size); | 1950 | segments->boot.size); |
1990 | memcpy(buf + segments->data.offset, data, segments->data.size); | 1951 | gk20a_mem_wr_n(g, dst, segments->code.offset, code, |
1952 | segments->code.size); | ||
1953 | gk20a_mem_wr_n(g, dst, segments->data.offset, data, | ||
1954 | segments->data.size); | ||
1991 | 1955 | ||
1992 | /* compute a "checksum" for the boot binary to detect its version */ | 1956 | /* compute a "checksum" for the boot binary to detect its version */ |
1993 | segments->boot_signature = 0; | 1957 | segments->boot_signature = 0; |
@@ -2009,7 +1973,6 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g) | |||
2009 | u32 *fecs_boot_image; | 1973 | u32 *fecs_boot_image; |
2010 | u32 *gpccs_boot_image; | 1974 | u32 *gpccs_boot_image; |
2011 | struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; | 1975 | struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; |
2012 | u8 *buf; | ||
2013 | u32 ucode_size; | 1976 | u32 ucode_size; |
2014 | int err = 0; | 1977 | int err = 0; |
2015 | 1978 | ||
@@ -2049,14 +2012,8 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g) | |||
2049 | if (err) | 2012 | if (err) |
2050 | goto clean_up; | 2013 | goto clean_up; |
2051 | 2014 | ||
2052 | buf = (u8 *)ucode_info->surface_desc.cpu_va; | 2015 | gr_gk20a_copy_ctxsw_ucode_segments(g, &ucode_info->surface_desc, |
2053 | if (!buf) { | 2016 | &ucode_info->fecs, |
2054 | gk20a_err(d, "failed to map surface desc buffer"); | ||
2055 | err = -ENOMEM; | ||
2056 | goto clean_up; | ||
2057 | } | ||
2058 | |||
2059 | gr_gk20a_copy_ctxsw_ucode_segments(buf, &ucode_info->fecs, | ||
2060 | fecs_boot_image, | 2017 | fecs_boot_image, |
2061 | g->gr.ctx_vars.ucode.fecs.inst.l, | 2018 | g->gr.ctx_vars.ucode.fecs.inst.l, |
2062 | g->gr.ctx_vars.ucode.fecs.data.l); | 2019 | g->gr.ctx_vars.ucode.fecs.data.l); |
@@ -2064,7 +2021,8 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g) | |||
2064 | release_firmware(fecs_fw); | 2021 | release_firmware(fecs_fw); |
2065 | fecs_fw = NULL; | 2022 | fecs_fw = NULL; |
2066 | 2023 | ||
2067 | gr_gk20a_copy_ctxsw_ucode_segments(buf, &ucode_info->gpccs, | 2024 | gr_gk20a_copy_ctxsw_ucode_segments(g, &ucode_info->surface_desc, |
2025 | &ucode_info->gpccs, | ||
2068 | gpccs_boot_image, | 2026 | gpccs_boot_image, |
2069 | g->gr.ctx_vars.ucode.gpccs.inst.l, | 2027 | g->gr.ctx_vars.ucode.gpccs.inst.l, |
2070 | g->gr.ctx_vars.ucode.gpccs.data.l); | 2028 | g->gr.ctx_vars.ucode.gpccs.data.l); |
@@ -4690,41 +4648,38 @@ out: | |||
4690 | static int gr_gk20a_init_access_map(struct gk20a *g) | 4648 | static int gr_gk20a_init_access_map(struct gk20a *g) |
4691 | { | 4649 | { |
4692 | struct gr_gk20a *gr = &g->gr; | 4650 | struct gr_gk20a *gr = &g->gr; |
4693 | void *data; | 4651 | struct mem_desc *mem = &gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem; |
4694 | int err = 0; | ||
4695 | u32 w, nr_pages = | 4652 | u32 w, nr_pages = |
4696 | DIV_ROUND_UP(gr->ctx_vars.priv_access_map_size, | 4653 | DIV_ROUND_UP(gr->ctx_vars.priv_access_map_size, |
4697 | PAGE_SIZE); | 4654 | PAGE_SIZE); |
4698 | u32 *whitelist = NULL; | 4655 | u32 *whitelist = NULL; |
4699 | int num_entries = 0; | 4656 | int num_entries = 0; |
4700 | 4657 | ||
4701 | data = vmap(gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.pages, | 4658 | if (gk20a_mem_begin(g, mem)) { |
4702 | PAGE_ALIGN(gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.size) >> | ||
4703 | PAGE_SHIFT, 0, pgprot_writecombine(PAGE_KERNEL)); | ||
4704 | if (!data) { | ||
4705 | gk20a_err(dev_from_gk20a(g), | 4659 | gk20a_err(dev_from_gk20a(g), |
4706 | "failed to map priv access map memory"); | 4660 | "failed to map priv access map memory"); |
4707 | err = -ENOMEM; | 4661 | return -ENOMEM; |
4708 | goto clean_up; | ||
4709 | } | 4662 | } |
4710 | 4663 | ||
4711 | memset(data, 0x0, PAGE_SIZE * nr_pages); | 4664 | gk20a_memset(g, mem, 0, 0, PAGE_SIZE * nr_pages); |
4712 | 4665 | ||
4713 | g->ops.gr.get_access_map(g, &whitelist, &num_entries); | 4666 | g->ops.gr.get_access_map(g, &whitelist, &num_entries); |
4714 | 4667 | ||
4715 | for (w = 0; w < num_entries; w++) { | 4668 | for (w = 0; w < num_entries; w++) { |
4716 | u32 map_bit, map_byte, map_shift; | 4669 | u32 map_bit, map_byte, map_shift, x; |
4717 | map_bit = whitelist[w] >> 2; | 4670 | map_bit = whitelist[w] >> 2; |
4718 | map_byte = map_bit >> 3; | 4671 | map_byte = map_bit >> 3; |
4719 | map_shift = map_bit & 0x7; /* i.e. 0-7 */ | 4672 | map_shift = map_bit & 0x7; /* i.e. 0-7 */ |
4720 | gk20a_dbg_info("access map addr:0x%x byte:0x%x bit:%d", | 4673 | gk20a_dbg_info("access map addr:0x%x byte:0x%x bit:%d", |
4721 | whitelist[w], map_byte, map_shift); | 4674 | whitelist[w], map_byte, map_shift); |
4722 | ((u8 *)data)[map_byte] |= 1 << map_shift; | 4675 | x = gk20a_mem_rd32(g, mem, map_byte / sizeof(u32)); |
4676 | x |= 1 << ( | ||
4677 | (map_byte % sizeof(u32) * BITS_PER_BYTE) | ||
4678 | + map_shift); | ||
4679 | gk20a_mem_wr32(g, mem, map_byte / sizeof(u32), x); | ||
4723 | } | 4680 | } |
4724 | 4681 | ||
4725 | clean_up: | 4682 | gk20a_mem_end(g, mem); |
4726 | if (data) | ||
4727 | vunmap(data); | ||
4728 | return 0; | 4683 | return 0; |
4729 | } | 4684 | } |
4730 | 4685 | ||
@@ -6659,7 +6614,7 @@ static void gr_gk20a_init_sm_dsm_reg_info(void) | |||
6659 | static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | 6614 | static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, |
6660 | struct channel_ctx_gk20a *ch_ctx, | 6615 | struct channel_ctx_gk20a *ch_ctx, |
6661 | u32 addr, u32 data, | 6616 | u32 addr, u32 data, |
6662 | u8 *context) | 6617 | struct mem_desc *mem) |
6663 | { | 6618 | { |
6664 | u32 num_gpc = g->gr.gpc_count; | 6619 | u32 num_gpc = g->gr.gpc_count; |
6665 | u32 num_tpc; | 6620 | u32 num_tpc; |
@@ -6688,8 +6643,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
6688 | /* reset the patch count from previous | 6643 | /* reset the patch count from previous |
6689 | runs,if ucode has already processed | 6644 | runs,if ucode has already processed |
6690 | it */ | 6645 | it */ |
6691 | tmp = gk20a_mem_rd32(context + | 6646 | tmp = gk20a_mem_rd(g, mem, |
6692 | ctxsw_prog_main_image_patch_count_o(), 0); | 6647 | ctxsw_prog_main_image_patch_count_o()); |
6693 | 6648 | ||
6694 | if (!tmp) | 6649 | if (!tmp) |
6695 | ch_ctx->patch_ctx.data_count = 0; | 6650 | ch_ctx->patch_ctx.data_count = 0; |
@@ -6700,15 +6655,15 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
6700 | vaddr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); | 6655 | vaddr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); |
6701 | vaddr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); | 6656 | vaddr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); |
6702 | 6657 | ||
6703 | gk20a_mem_wr32(context + | 6658 | gk20a_mem_wr(g, mem, |
6704 | ctxsw_prog_main_image_patch_count_o(), | 6659 | ctxsw_prog_main_image_patch_count_o(), |
6705 | 0, ch_ctx->patch_ctx.data_count); | 6660 | ch_ctx->patch_ctx.data_count); |
6706 | gk20a_mem_wr32(context + | 6661 | gk20a_mem_wr(g, mem, |
6707 | ctxsw_prog_main_image_patch_adr_lo_o(), | 6662 | ctxsw_prog_main_image_patch_adr_lo_o(), |
6708 | 0, vaddr_lo); | 6663 | vaddr_lo); |
6709 | gk20a_mem_wr32(context + | 6664 | gk20a_mem_wr(g, mem, |
6710 | ctxsw_prog_main_image_patch_adr_hi_o(), | 6665 | ctxsw_prog_main_image_patch_adr_hi_o(), |
6711 | 0, vaddr_hi); | 6666 | vaddr_hi); |
6712 | 6667 | ||
6713 | /* we're not caching these on cpu side, | 6668 | /* we're not caching these on cpu side, |
6714 | but later watch for it */ | 6669 | but later watch for it */ |
@@ -6760,17 +6715,15 @@ static void gr_gk20a_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset) | |||
6760 | 6715 | ||
6761 | #define ILLEGAL_ID (~0) | 6716 | #define ILLEGAL_ID (~0) |
6762 | 6717 | ||
6763 | static inline bool check_main_image_header_magic(void *context) | 6718 | static inline bool check_main_image_header_magic(u8 *context) |
6764 | { | 6719 | { |
6765 | u32 magic = gk20a_mem_rd32(context + | 6720 | u32 magic = *(u32 *)(context + ctxsw_prog_main_image_magic_value_o()); |
6766 | ctxsw_prog_main_image_magic_value_o(), 0); | ||
6767 | gk20a_dbg(gpu_dbg_gpu_dbg, "main image magic=0x%x", magic); | 6721 | gk20a_dbg(gpu_dbg_gpu_dbg, "main image magic=0x%x", magic); |
6768 | return magic == ctxsw_prog_main_image_magic_value_v_value_v(); | 6722 | return magic == ctxsw_prog_main_image_magic_value_v_value_v(); |
6769 | } | 6723 | } |
6770 | static inline bool check_local_header_magic(void *context) | 6724 | static inline bool check_local_header_magic(u8 *context) |
6771 | { | 6725 | { |
6772 | u32 magic = gk20a_mem_rd32(context + | 6726 | u32 magic = *(u32 *)(context + ctxsw_prog_local_magic_value_o()); |
6773 | ctxsw_prog_local_magic_value_o(), 0); | ||
6774 | gk20a_dbg(gpu_dbg_gpu_dbg, "local magic=0x%x", magic); | 6727 | gk20a_dbg(gpu_dbg_gpu_dbg, "local magic=0x%x", magic); |
6775 | return magic == ctxsw_prog_local_magic_value_v_value_v(); | 6728 | return magic == ctxsw_prog_local_magic_value_v_value_v(); |
6776 | 6729 | ||
@@ -6814,7 +6767,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, | |||
6814 | u32 num_gpcs, num_tpcs; | 6767 | u32 num_gpcs, num_tpcs; |
6815 | u32 chk_addr; | 6768 | u32 chk_addr; |
6816 | u32 ext_priv_offset, ext_priv_size; | 6769 | u32 ext_priv_offset, ext_priv_size; |
6817 | void *context; | 6770 | u8 *context; |
6818 | u32 offset_to_segment, offset_to_segment_end; | 6771 | u32 offset_to_segment, offset_to_segment_end; |
6819 | u32 sm_dsm_perf_reg_id = ILLEGAL_ID; | 6772 | u32 sm_dsm_perf_reg_id = ILLEGAL_ID; |
6820 | u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID; | 6773 | u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID; |
@@ -6856,14 +6809,14 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, | |||
6856 | /* note below is in words/num_registers */ | 6809 | /* note below is in words/num_registers */ |
6857 | marker_size = ctxsw_prog_extended_marker_size_in_bytes_v() >> 2; | 6810 | marker_size = ctxsw_prog_extended_marker_size_in_bytes_v() >> 2; |
6858 | 6811 | ||
6859 | context = context_buffer; | 6812 | context = (u8 *)context_buffer; |
6860 | /* sanity check main header */ | 6813 | /* sanity check main header */ |
6861 | if (!check_main_image_header_magic(context)) { | 6814 | if (!check_main_image_header_magic(context)) { |
6862 | gk20a_err(dev_from_gk20a(g), | 6815 | gk20a_err(dev_from_gk20a(g), |
6863 | "Invalid main header: magic value"); | 6816 | "Invalid main header: magic value"); |
6864 | return -EINVAL; | 6817 | return -EINVAL; |
6865 | } | 6818 | } |
6866 | num_gpcs = gk20a_mem_rd32(context + ctxsw_prog_main_image_num_gpcs_o(), 0); | 6819 | num_gpcs = *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o()); |
6867 | if (gpc_num >= num_gpcs) { | 6820 | if (gpc_num >= num_gpcs) { |
6868 | gk20a_err(dev_from_gk20a(g), | 6821 | gk20a_err(dev_from_gk20a(g), |
6869 | "GPC 0x%08x is greater than total count 0x%08x!\n", | 6822 | "GPC 0x%08x is greater than total count 0x%08x!\n", |
@@ -6871,7 +6824,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, | |||
6871 | return -EINVAL; | 6824 | return -EINVAL; |
6872 | } | 6825 | } |
6873 | 6826 | ||
6874 | data32 = gk20a_mem_rd32(context + ctxsw_prog_main_extended_buffer_ctl_o(), 0); | 6827 | data32 = *(u32 *)(context + ctxsw_prog_main_extended_buffer_ctl_o()); |
6875 | ext_priv_size = ctxsw_prog_main_extended_buffer_ctl_size_v(data32); | 6828 | ext_priv_size = ctxsw_prog_main_extended_buffer_ctl_size_v(data32); |
6876 | if (0 == ext_priv_size) { | 6829 | if (0 == ext_priv_size) { |
6877 | gk20a_dbg_info(" No extended memory in context buffer"); | 6830 | gk20a_dbg_info(" No extended memory in context buffer"); |
@@ -7149,7 +7102,7 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g, | |||
7149 | } | 7102 | } |
7150 | 7103 | ||
7151 | static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, | 7104 | static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, |
7152 | void *context, | 7105 | u8 *context, |
7153 | u32 *num_ppcs, u32 *ppc_mask, | 7106 | u32 *num_ppcs, u32 *ppc_mask, |
7154 | u32 *reg_ppc_count) | 7107 | u32 *reg_ppc_count) |
7155 | { | 7108 | { |
@@ -7165,7 +7118,7 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, | |||
7165 | (num_pes_per_gpc > 1))) | 7118 | (num_pes_per_gpc > 1))) |
7166 | return -EINVAL; | 7119 | return -EINVAL; |
7167 | 7120 | ||
7168 | data32 = gk20a_mem_rd32(context + ctxsw_prog_local_image_ppc_info_o(), 0); | 7121 | data32 = *(u32 *)(context + ctxsw_prog_local_image_ppc_info_o()); |
7169 | 7122 | ||
7170 | *num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data32); | 7123 | *num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data32); |
7171 | *ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data32); | 7124 | *ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data32); |
@@ -7177,7 +7130,7 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, | |||
7177 | 7130 | ||
7178 | /* | 7131 | /* |
7179 | * This function will return the 32 bit offset for a priv register if it is | 7132 | * This function will return the 32 bit offset for a priv register if it is |
7180 | * present in the context buffer. | 7133 | * present in the context buffer. The context buffer is in CPU memory. |
7181 | */ | 7134 | */ |
7182 | static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | 7135 | static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, |
7183 | u32 addr, | 7136 | u32 addr, |
@@ -7196,7 +7149,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | |||
7196 | u32 offset; | 7149 | u32 offset; |
7197 | u32 sys_priv_offset, gpc_priv_offset; | 7150 | u32 sys_priv_offset, gpc_priv_offset; |
7198 | u32 ppc_mask, reg_list_ppc_count; | 7151 | u32 ppc_mask, reg_list_ppc_count; |
7199 | void *context; | 7152 | u8 *context; |
7200 | u32 offset_to_segment; | 7153 | u32 offset_to_segment; |
7201 | 7154 | ||
7202 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); | 7155 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); |
@@ -7207,13 +7160,13 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | |||
7207 | if (err) | 7160 | if (err) |
7208 | return err; | 7161 | return err; |
7209 | 7162 | ||
7210 | context = context_buffer; | 7163 | context = (u8 *)context_buffer; |
7211 | if (!check_main_image_header_magic(context)) { | 7164 | if (!check_main_image_header_magic(context)) { |
7212 | gk20a_err(dev_from_gk20a(g), | 7165 | gk20a_err(dev_from_gk20a(g), |
7213 | "Invalid main header: magic value"); | 7166 | "Invalid main header: magic value"); |
7214 | return -EINVAL; | 7167 | return -EINVAL; |
7215 | } | 7168 | } |
7216 | num_gpcs = gk20a_mem_rd32(context + ctxsw_prog_main_image_num_gpcs_o(), 0); | 7169 | num_gpcs = *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o()); |
7217 | 7170 | ||
7218 | /* Parse the FECS local header. */ | 7171 | /* Parse the FECS local header. */ |
7219 | context += ctxsw_prog_ucode_header_size_in_bytes(); | 7172 | context += ctxsw_prog_ucode_header_size_in_bytes(); |
@@ -7222,7 +7175,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | |||
7222 | "Invalid FECS local header: magic value\n"); | 7175 | "Invalid FECS local header: magic value\n"); |
7223 | return -EINVAL; | 7176 | return -EINVAL; |
7224 | } | 7177 | } |
7225 | data32 = gk20a_mem_rd32(context + ctxsw_prog_local_priv_register_ctl_o(), 0); | 7178 | data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o()); |
7226 | sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); | 7179 | sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); |
7227 | 7180 | ||
7228 | /* If found in Ext buffer, ok. | 7181 | /* If found in Ext buffer, ok. |
@@ -7268,7 +7221,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | |||
7268 | return -EINVAL; | 7221 | return -EINVAL; |
7269 | 7222 | ||
7270 | } | 7223 | } |
7271 | data32 = gk20a_mem_rd32(context + ctxsw_prog_local_priv_register_ctl_o(), 0); | 7224 | data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o()); |
7272 | gpc_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); | 7225 | gpc_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); |
7273 | 7226 | ||
7274 | err = gr_gk20a_determine_ppc_configuration(g, context, | 7227 | err = gr_gk20a_determine_ppc_configuration(g, context, |
@@ -7277,7 +7230,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | |||
7277 | if (err) | 7230 | if (err) |
7278 | return err; | 7231 | return err; |
7279 | 7232 | ||
7280 | num_tpcs = gk20a_mem_rd32(context + ctxsw_prog_local_image_num_tpcs_o(), 0); | 7233 | num_tpcs = *(u32 *)(context + ctxsw_prog_local_image_num_tpcs_o()); |
7281 | 7234 | ||
7282 | if ((i == gpc_num) && ((tpc_num + 1) > num_tpcs)) { | 7235 | if ((i == gpc_num) && ((tpc_num + 1) > num_tpcs)) { |
7283 | gk20a_err(dev_from_gk20a(g), | 7236 | gk20a_err(dev_from_gk20a(g), |
@@ -7689,9 +7642,9 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7689 | { | 7642 | { |
7690 | struct gk20a *g = ch->g; | 7643 | struct gk20a *g = ch->g; |
7691 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | 7644 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; |
7692 | void *ctx_ptr = NULL; | 7645 | bool gr_ctx_ready = false; |
7693 | void *pm_ctx_ptr = NULL; | 7646 | bool pm_ctx_ready = false; |
7694 | void *base_ptr = NULL; | 7647 | struct mem_desc *current_mem = NULL; |
7695 | bool ch_is_curr_ctx, restart_gr_ctxsw = false; | 7648 | bool ch_is_curr_ctx, restart_gr_ctxsw = false; |
7696 | u32 i, j, offset, v; | 7649 | u32 i, j, offset, v; |
7697 | struct gr_gk20a *gr = &g->gr; | 7650 | struct gr_gk20a *gr = &g->gr; |
@@ -7821,20 +7774,18 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7821 | ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD), | 7774 | ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD), |
7822 | ctx_ops[i].quad); | 7775 | ctx_ops[i].quad); |
7823 | if (!err) { | 7776 | if (!err) { |
7824 | if (!ctx_ptr) { | 7777 | if (!gr_ctx_ready) { |
7825 | /* would have been a variant of | 7778 | /* would have been a variant of |
7826 | * gr_gk20a_apply_instmem_overrides, | 7779 | * gr_gk20a_apply_instmem_overrides, |
7827 | * recoded in-place instead. | 7780 | * recoded in-place instead. |
7828 | */ | 7781 | */ |
7829 | ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, | 7782 | if (gk20a_mem_begin(g, &ch_ctx->gr_ctx->mem)) { |
7830 | PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, | ||
7831 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
7832 | if (!ctx_ptr) { | ||
7833 | err = -ENOMEM; | 7783 | err = -ENOMEM; |
7834 | goto cleanup; | 7784 | goto cleanup; |
7835 | } | 7785 | } |
7786 | gr_ctx_ready = true; | ||
7836 | } | 7787 | } |
7837 | base_ptr = ctx_ptr; | 7788 | current_mem = &ch_ctx->gr_ctx->mem; |
7838 | } else { | 7789 | } else { |
7839 | err = gr_gk20a_get_pm_ctx_buffer_offsets(g, | 7790 | err = gr_gk20a_get_pm_ctx_buffer_offsets(g, |
7840 | ctx_ops[i].offset, | 7791 | ctx_ops[i].offset, |
@@ -7849,7 +7800,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7849 | NVGPU_DBG_GPU_REG_OP_STATUS_INVALID_OFFSET; | 7800 | NVGPU_DBG_GPU_REG_OP_STATUS_INVALID_OFFSET; |
7850 | continue; | 7801 | continue; |
7851 | } | 7802 | } |
7852 | if (!pm_ctx_ptr) { | 7803 | if (!pm_ctx_ready) { |
7853 | /* Make sure ctx buffer was initialized */ | 7804 | /* Make sure ctx buffer was initialized */ |
7854 | if (!ch_ctx->pm_ctx.mem.pages) { | 7805 | if (!ch_ctx->pm_ctx.mem.pages) { |
7855 | gk20a_err(dev_from_gk20a(g), | 7806 | gk20a_err(dev_from_gk20a(g), |
@@ -7857,15 +7808,13 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7857 | err = -EINVAL; | 7808 | err = -EINVAL; |
7858 | goto cleanup; | 7809 | goto cleanup; |
7859 | } | 7810 | } |
7860 | pm_ctx_ptr = vmap(ch_ctx->pm_ctx.mem.pages, | 7811 | if (gk20a_mem_begin(g, &ch_ctx->pm_ctx.mem)) { |
7861 | PAGE_ALIGN(ch_ctx->pm_ctx.mem.size) >> PAGE_SHIFT, | ||
7862 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
7863 | if (!pm_ctx_ptr) { | ||
7864 | err = -ENOMEM; | 7812 | err = -ENOMEM; |
7865 | goto cleanup; | 7813 | goto cleanup; |
7866 | } | 7814 | } |
7815 | pm_ctx_ready = true; | ||
7867 | } | 7816 | } |
7868 | base_ptr = pm_ctx_ptr; | 7817 | current_mem = &ch_ctx->pm_ctx.mem; |
7869 | } | 7818 | } |
7870 | 7819 | ||
7871 | /* if this is a quad access, setup for special access*/ | 7820 | /* if this is a quad access, setup for special access*/ |
@@ -7878,24 +7827,24 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7878 | /* sanity check gr ctxt offsets, | 7827 | /* sanity check gr ctxt offsets, |
7879 | * don't write outside, worst case | 7828 | * don't write outside, worst case |
7880 | */ | 7829 | */ |
7881 | if ((base_ptr == ctx_ptr) && | 7830 | if ((current_mem == &ch_ctx->gr_ctx->mem) && |
7882 | (offsets[j] >= g->gr.ctx_vars.golden_image_size)) | 7831 | (offsets[j] >= g->gr.ctx_vars.golden_image_size)) |
7883 | continue; | 7832 | continue; |
7884 | if (pass == 0) { /* write pass */ | 7833 | if (pass == 0) { /* write pass */ |
7885 | v = gk20a_mem_rd32(base_ptr + offsets[j], 0); | 7834 | v = gk20a_mem_rd(g, current_mem, offsets[j]); |
7886 | v &= ~ctx_ops[i].and_n_mask_lo; | 7835 | v &= ~ctx_ops[i].and_n_mask_lo; |
7887 | v |= ctx_ops[i].value_lo; | 7836 | v |= ctx_ops[i].value_lo; |
7888 | gk20a_mem_wr32(base_ptr + offsets[j], 0, v); | 7837 | gk20a_mem_wr(g, current_mem, offsets[j], v); |
7889 | 7838 | ||
7890 | gk20a_dbg(gpu_dbg_gpu_dbg, | 7839 | gk20a_dbg(gpu_dbg_gpu_dbg, |
7891 | "context wr: offset=0x%x v=0x%x", | 7840 | "context wr: offset=0x%x v=0x%x", |
7892 | offsets[j], v); | 7841 | offsets[j], v); |
7893 | 7842 | ||
7894 | if (ctx_ops[i].op == REGOP(WRITE_64)) { | 7843 | if (ctx_ops[i].op == REGOP(WRITE_64)) { |
7895 | v = gk20a_mem_rd32(base_ptr + offsets[j] + 4, 0); | 7844 | v = gk20a_mem_rd(g, current_mem, offsets[j] + 4); |
7896 | v &= ~ctx_ops[i].and_n_mask_hi; | 7845 | v &= ~ctx_ops[i].and_n_mask_hi; |
7897 | v |= ctx_ops[i].value_hi; | 7846 | v |= ctx_ops[i].value_hi; |
7898 | gk20a_mem_wr32(base_ptr + offsets[j] + 4, 0, v); | 7847 | gk20a_mem_wr(g, current_mem, offsets[j] + 4, v); |
7899 | 7848 | ||
7900 | gk20a_dbg(gpu_dbg_gpu_dbg, | 7849 | gk20a_dbg(gpu_dbg_gpu_dbg, |
7901 | "context wr: offset=0x%x v=0x%x", | 7850 | "context wr: offset=0x%x v=0x%x", |
@@ -7905,18 +7854,18 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7905 | /* check to see if we need to add a special WAR | 7854 | /* check to see if we need to add a special WAR |
7906 | for some of the SMPC perf regs */ | 7855 | for some of the SMPC perf regs */ |
7907 | gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j], | 7856 | gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j], |
7908 | v, base_ptr); | 7857 | v, current_mem); |
7909 | 7858 | ||
7910 | } else { /* read pass */ | 7859 | } else { /* read pass */ |
7911 | ctx_ops[i].value_lo = | 7860 | ctx_ops[i].value_lo = |
7912 | gk20a_mem_rd32(base_ptr + offsets[0], 0); | 7861 | gk20a_mem_rd(g, current_mem, offsets[0]); |
7913 | 7862 | ||
7914 | gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x", | 7863 | gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x", |
7915 | offsets[0], ctx_ops[i].value_lo); | 7864 | offsets[0], ctx_ops[i].value_lo); |
7916 | 7865 | ||
7917 | if (ctx_ops[i].op == REGOP(READ_64)) { | 7866 | if (ctx_ops[i].op == REGOP(READ_64)) { |
7918 | ctx_ops[i].value_hi = | 7867 | ctx_ops[i].value_hi = |
7919 | gk20a_mem_rd32(base_ptr + offsets[0] + 4, 0); | 7868 | gk20a_mem_rd(g, current_mem, offsets[0] + 4); |
7920 | 7869 | ||
7921 | gk20a_dbg(gpu_dbg_gpu_dbg, | 7870 | gk20a_dbg(gpu_dbg_gpu_dbg, |
7922 | "context rd: offset=0x%x v=0x%x", | 7871 | "context rd: offset=0x%x v=0x%x", |
@@ -7943,12 +7892,10 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7943 | 7892 | ||
7944 | if (ch_ctx->patch_ctx.mem.cpu_va) | 7893 | if (ch_ctx->patch_ctx.mem.cpu_va) |
7945 | gr_gk20a_ctx_patch_write_end(g, ch_ctx); | 7894 | gr_gk20a_ctx_patch_write_end(g, ch_ctx); |
7946 | 7895 | if (gr_ctx_ready) | |
7947 | if (ctx_ptr) | 7896 | gk20a_mem_end(g, &ch_ctx->gr_ctx->mem); |
7948 | vunmap(ctx_ptr); | 7897 | if (pm_ctx_ready) |
7949 | 7898 | gk20a_mem_end(g, &ch_ctx->pm_ctx.mem); | |
7950 | if (pm_ctx_ptr) | ||
7951 | vunmap(pm_ctx_ptr); | ||
7952 | 7899 | ||
7953 | if (restart_gr_ctxsw) { | 7900 | if (restart_gr_ctxsw) { |
7954 | int tmp_err = gr_gk20a_enable_ctxsw(g); | 7901 | int tmp_err = gr_gk20a_enable_ctxsw(g); |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 6f6734b4..13382416 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -44,6 +44,112 @@ | |||
44 | #include "kind_gk20a.h" | 44 | #include "kind_gk20a.h" |
45 | #include "semaphore_gk20a.h" | 45 | #include "semaphore_gk20a.h" |
46 | 46 | ||
47 | int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem) | ||
48 | { | ||
49 | void *cpu_va; | ||
50 | |||
51 | if (WARN_ON(mem->cpu_va)) { | ||
52 | gk20a_warn(dev_from_gk20a(g), "nested %s", __func__); | ||
53 | return -EBUSY; | ||
54 | } | ||
55 | |||
56 | cpu_va = vmap(mem->pages, | ||
57 | PAGE_ALIGN(mem->size) >> PAGE_SHIFT, | ||
58 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
59 | |||
60 | if (WARN_ON(!cpu_va)) | ||
61 | return -ENOMEM; | ||
62 | |||
63 | mem->cpu_va = cpu_va; | ||
64 | return 0; | ||
65 | } | ||
66 | |||
67 | void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem) | ||
68 | { | ||
69 | vunmap(mem->cpu_va); | ||
70 | mem->cpu_va = NULL; | ||
71 | } | ||
72 | |||
73 | u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) | ||
74 | { | ||
75 | u32 *ptr = mem->cpu_va; | ||
76 | u32 data; | ||
77 | |||
78 | WARN_ON(!ptr); | ||
79 | data = ptr[w]; | ||
80 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
81 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); | ||
82 | #endif | ||
83 | return data; | ||
84 | } | ||
85 | |||
86 | u32 gk20a_mem_rd(struct gk20a *g, struct mem_desc *mem, u32 offset) | ||
87 | { | ||
88 | WARN_ON(offset & 3); | ||
89 | return gk20a_mem_rd32(g, mem, offset / sizeof(u32)); | ||
90 | } | ||
91 | |||
92 | void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem, | ||
93 | u32 offset, void *dest, u32 size) | ||
94 | { | ||
95 | u32 i; | ||
96 | u32 *dest_u32 = dest; | ||
97 | |||
98 | WARN_ON(offset & 3); | ||
99 | WARN_ON(size & 3); | ||
100 | offset /= sizeof(u32); | ||
101 | size /= sizeof(u32); | ||
102 | |||
103 | for (i = 0; i < size; i++) | ||
104 | dest_u32[i] = gk20a_mem_rd32(g, mem, offset + i); | ||
105 | } | ||
106 | |||
107 | void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data) | ||
108 | { | ||
109 | u32 *ptr = mem->cpu_va; | ||
110 | |||
111 | WARN_ON(!ptr); | ||
112 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
113 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); | ||
114 | #endif | ||
115 | ptr[w] = data; | ||
116 | } | ||
117 | |||
118 | void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data) | ||
119 | { | ||
120 | WARN_ON(offset & 3); | ||
121 | gk20a_mem_wr32(g, mem, offset / sizeof(u32), data); | ||
122 | } | ||
123 | |||
124 | void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset, | ||
125 | void *src, u32 size) | ||
126 | { | ||
127 | u32 i; | ||
128 | u32 *src_u32 = src; | ||
129 | |||
130 | WARN_ON(offset & 3); | ||
131 | WARN_ON(size & 3); | ||
132 | offset /= sizeof(u32); | ||
133 | size /= sizeof(u32); | ||
134 | |||
135 | for (i = 0; i < size; i++) | ||
136 | gk20a_mem_wr32(g, mem, offset + i, src_u32[i]); | ||
137 | } | ||
138 | |||
139 | void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset, | ||
140 | u32 value, u32 size) | ||
141 | { | ||
142 | u32 i; | ||
143 | |||
144 | WARN_ON(offset & 3); | ||
145 | WARN_ON(size & 3); | ||
146 | offset /= sizeof(u32); | ||
147 | size /= sizeof(u32); | ||
148 | |||
149 | for (i = 0; i < size; i++) | ||
150 | gk20a_mem_wr32(g, mem, offset + i, value); | ||
151 | } | ||
152 | |||
47 | /* | 153 | /* |
48 | * GPU mapping life cycle | 154 | * GPU mapping life cycle |
49 | * ====================== | 155 | * ====================== |
@@ -780,9 +886,14 @@ void pde_range_from_vaddr_range(struct vm_gk20a *vm, | |||
780 | *pde_lo, *pde_hi); | 886 | *pde_lo, *pde_hi); |
781 | } | 887 | } |
782 | 888 | ||
783 | u32 *pde_from_index(struct vm_gk20a *vm, u32 i) | 889 | static u32 pde_from_index(u32 i) |
890 | { | ||
891 | return i * gmmu_pde__size_v() / sizeof(u32); | ||
892 | } | ||
893 | |||
894 | static u32 pte_from_index(u32 i) | ||
784 | { | 895 | { |
785 | return (u32 *) (((u8 *)vm->pdb.mem.cpu_va) + i*gmmu_pde__size_v()); | 896 | return i * gmmu_pte__size_v() / sizeof(u32); |
786 | } | 897 | } |
787 | 898 | ||
788 | u32 pte_index_from_vaddr(struct vm_gk20a *vm, | 899 | u32 pte_index_from_vaddr(struct vm_gk20a *vm, |
@@ -2323,7 +2434,7 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm, | |||
2323 | u64 pte_addr_small = 0, pte_addr_big = 0; | 2434 | u64 pte_addr_small = 0, pte_addr_big = 0; |
2324 | struct gk20a_mm_entry *entry = vm->pdb.entries + i; | 2435 | struct gk20a_mm_entry *entry = vm->pdb.entries + i; |
2325 | u32 pde_v[2] = {0, 0}; | 2436 | u32 pde_v[2] = {0, 0}; |
2326 | u32 *pde; | 2437 | u32 pde; |
2327 | 2438 | ||
2328 | gk20a_dbg_fn(""); | 2439 | gk20a_dbg_fn(""); |
2329 | 2440 | ||
@@ -2348,10 +2459,10 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm, | |||
2348 | (big_valid ? (gmmu_pde_vol_big_true_f()) : | 2459 | (big_valid ? (gmmu_pde_vol_big_true_f()) : |
2349 | gmmu_pde_vol_big_false_f()); | 2460 | gmmu_pde_vol_big_false_f()); |
2350 | 2461 | ||
2351 | pde = pde_from_index(vm, i); | 2462 | pde = pde_from_index(i); |
2352 | 2463 | ||
2353 | gk20a_mem_wr32(pde, 0, pde_v[0]); | 2464 | gk20a_mem_wr32(g, &vm->pdb.mem, pde + 0, pde_v[0]); |
2354 | gk20a_mem_wr32(pde, 1, pde_v[1]); | 2465 | gk20a_mem_wr32(g, &vm->pdb.mem, pde + 1, pde_v[1]); |
2355 | 2466 | ||
2356 | gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", | 2467 | gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", |
2357 | i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); | 2468 | i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); |
@@ -2432,8 +2543,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm, | |||
2432 | gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); | 2543 | gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); |
2433 | } | 2544 | } |
2434 | 2545 | ||
2435 | gk20a_mem_wr32(pte->mem.cpu_va + i*8, 0, pte_w[0]); | 2546 | gk20a_mem_wr32(g, &pte->mem, pte_from_index(i) + 0, pte_w[0]); |
2436 | gk20a_mem_wr32(pte->mem.cpu_va + i*8, 1, pte_w[1]); | 2547 | gk20a_mem_wr32(g, &pte->mem, pte_from_index(i) + 1, pte_w[1]); |
2437 | 2548 | ||
2438 | if (*iova) { | 2549 | if (*iova) { |
2439 | *iova += page_size; | 2550 | *iova += page_size; |
@@ -3489,19 +3600,19 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm) | |||
3489 | false, false, "cde"); | 3600 | false, false, "cde"); |
3490 | } | 3601 | } |
3491 | 3602 | ||
3492 | void gk20a_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr) | 3603 | void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, u64 pdb_addr) |
3493 | { | 3604 | { |
3494 | u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); | 3605 | u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); |
3495 | u32 pdb_addr_hi = u64_hi32(pdb_addr); | 3606 | u32 pdb_addr_hi = u64_hi32(pdb_addr); |
3496 | 3607 | ||
3497 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), | 3608 | gk20a_mem_wr32(g, mem, ram_in_page_dir_base_lo_w(), |
3498 | (g->mm.vidmem_is_vidmem ? | 3609 | (g->mm.vidmem_is_vidmem ? |
3499 | ram_in_page_dir_base_target_sys_mem_ncoh_f() : | 3610 | ram_in_page_dir_base_target_sys_mem_ncoh_f() : |
3500 | ram_in_page_dir_base_target_vid_mem_f()) | | 3611 | ram_in_page_dir_base_target_vid_mem_f()) | |
3501 | ram_in_page_dir_base_vol_true_f() | | 3612 | ram_in_page_dir_base_vol_true_f() | |
3502 | ram_in_page_dir_base_lo_f(pdb_addr_lo)); | 3613 | ram_in_page_dir_base_lo_f(pdb_addr_lo)); |
3503 | 3614 | ||
3504 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), | 3615 | gk20a_mem_wr32(g, mem, ram_in_page_dir_base_hi_w(), |
3505 | ram_in_page_dir_base_hi_f(pdb_addr_hi)); | 3616 | ram_in_page_dir_base_hi_f(pdb_addr_hi)); |
3506 | } | 3617 | } |
3507 | 3618 | ||
@@ -3510,23 +3621,22 @@ void gk20a_init_inst_block(struct mem_desc *inst_block, struct vm_gk20a *vm, | |||
3510 | { | 3621 | { |
3511 | struct gk20a *g = gk20a_from_vm(vm); | 3622 | struct gk20a *g = gk20a_from_vm(vm); |
3512 | u64 pde_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0); | 3623 | u64 pde_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0); |
3513 | void *inst_ptr = inst_block->cpu_va; | ||
3514 | 3624 | ||
3515 | gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", | 3625 | gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", |
3516 | gk20a_mm_inst_block_addr(g, inst_block), inst_ptr); | 3626 | gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va); |
3517 | 3627 | ||
3518 | gk20a_dbg_info("pde pa=0x%llx", (u64)pde_addr); | 3628 | gk20a_dbg_info("pde pa=0x%llx", (u64)pde_addr); |
3519 | 3629 | ||
3520 | g->ops.mm.init_pdb(g, inst_ptr, pde_addr); | 3630 | g->ops.mm.init_pdb(g, inst_block, pde_addr); |
3521 | 3631 | ||
3522 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(), | 3632 | gk20a_mem_wr32(g, inst_block, ram_in_adr_limit_lo_w(), |
3523 | u64_lo32(vm->va_limit - 1) & ~0xfff); | 3633 | u64_lo32(vm->va_limit - 1) & ~0xfff); |
3524 | 3634 | ||
3525 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), | 3635 | gk20a_mem_wr32(g, inst_block, ram_in_adr_limit_hi_w(), |
3526 | ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit - 1))); | 3636 | ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit - 1))); |
3527 | 3637 | ||
3528 | if (big_page_size && g->ops.mm.set_big_page_size) | 3638 | if (big_page_size && g->ops.mm.set_big_page_size) |
3529 | g->ops.mm.set_big_page_size(g, inst_ptr, big_page_size); | 3639 | g->ops.mm.set_big_page_size(g, inst_block, big_page_size); |
3530 | } | 3640 | } |
3531 | 3641 | ||
3532 | int gk20a_mm_fb_flush(struct gk20a *g) | 3642 | int gk20a_mm_fb_flush(struct gk20a *g) |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 7fa0b7fb..e9ac8f18 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -419,6 +419,34 @@ static inline enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, | |||
419 | return gmmu_page_size_small; | 419 | return gmmu_page_size_small; |
420 | } | 420 | } |
421 | 421 | ||
422 | /* | ||
423 | * Buffer accessors - wrap between begin() and end() if there is no permanent | ||
424 | * kernel mapping for this buffer. | ||
425 | */ | ||
426 | |||
427 | int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem); | ||
428 | /* nop for null mem, like with free() or vunmap() */ | ||
429 | void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem); | ||
430 | |||
431 | /* word-indexed offset */ | ||
432 | u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w); | ||
433 | /* byte offset (32b-aligned) */ | ||
434 | u32 gk20a_mem_rd(struct gk20a *g, struct mem_desc *mem, u32 offset); | ||
435 | /* memcpy to cpu, offset and size in bytes (32b-aligned) */ | ||
436 | void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem, u32 offset, | ||
437 | void *dest, u32 size); | ||
438 | |||
439 | /* word-indexed offset */ | ||
440 | void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data); | ||
441 | /* byte offset (32b-aligned) */ | ||
442 | void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data); | ||
443 | /* memcpy from cpu, offset and size in bytes (32b-aligned) */ | ||
444 | void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset, | ||
445 | void *src, u32 size); | ||
446 | /* size and offset in bytes (32b-aligned), filled with u32s */ | ||
447 | void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset, | ||
448 | u32 value, u32 size); | ||
449 | |||
422 | #if 0 /*related to addr bits above, concern below TBD on which is accurate */ | 450 | #if 0 /*related to addr bits above, concern below TBD on which is accurate */ |
423 | #define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\ | 451 | #define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\ |
424 | bus_bar1_block_ptr_s()) | 452 | bus_bar1_block_ptr_s()) |
@@ -673,7 +701,6 @@ void pde_range_from_vaddr_range(struct vm_gk20a *vm, | |||
673 | u64 addr_lo, u64 addr_hi, | 701 | u64 addr_lo, u64 addr_hi, |
674 | u32 *pde_lo, u32 *pde_hi); | 702 | u32 *pde_lo, u32 *pde_hi); |
675 | int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm); | 703 | int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm); |
676 | u32 *pde_from_index(struct vm_gk20a *vm, u32 i); | ||
677 | u32 pte_index_from_vaddr(struct vm_gk20a *vm, | 704 | u32 pte_index_from_vaddr(struct vm_gk20a *vm, |
678 | u64 addr, enum gmmu_pgsz_gk20a pgsz_idx); | 705 | u64 addr, enum gmmu_pgsz_gk20a pgsz_idx); |
679 | void free_gmmu_pages(struct vm_gk20a *vm, | 706 | void free_gmmu_pages(struct vm_gk20a *vm, |
@@ -685,7 +712,7 @@ struct gpu_ops; | |||
685 | void gk20a_init_mm(struct gpu_ops *gops); | 712 | void gk20a_init_mm(struct gpu_ops *gops); |
686 | const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, | 713 | const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, |
687 | u32 big_page_size); | 714 | u32 big_page_size); |
688 | void gk20a_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr); | 715 | void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, u64 pdb_addr); |
689 | 716 | ||
690 | void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block); | 717 | void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block); |
691 | 718 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index 56ad0c2a..54b2eef4 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | |||
@@ -2421,11 +2421,10 @@ static int gk20a_init_pmu_reset_enable_hw(struct gk20a *g) | |||
2421 | static int gk20a_prepare_ucode(struct gk20a *g) | 2421 | static int gk20a_prepare_ucode(struct gk20a *g) |
2422 | { | 2422 | { |
2423 | struct pmu_gk20a *pmu = &g->pmu; | 2423 | struct pmu_gk20a *pmu = &g->pmu; |
2424 | int i, err = 0; | 2424 | int err = 0; |
2425 | struct device *d = dev_from_gk20a(g); | 2425 | struct device *d = dev_from_gk20a(g); |
2426 | struct mm_gk20a *mm = &g->mm; | 2426 | struct mm_gk20a *mm = &g->mm; |
2427 | struct vm_gk20a *vm = &mm->pmu.vm; | 2427 | struct vm_gk20a *vm = &mm->pmu.vm; |
2428 | void *ucode_ptr; | ||
2429 | 2428 | ||
2430 | if (g->pmu_fw) { | 2429 | if (g->pmu_fw) { |
2431 | gk20a_init_pmu(pmu); | 2430 | gk20a_init_pmu(pmu); |
@@ -2449,11 +2448,8 @@ static int gk20a_prepare_ucode(struct gk20a *g) | |||
2449 | if (err) | 2448 | if (err) |
2450 | goto err_release_fw; | 2449 | goto err_release_fw; |
2451 | 2450 | ||
2452 | ucode_ptr = pmu->ucode.cpu_va; | 2451 | gk20a_mem_wr_n(g, &pmu->ucode, 0, pmu->ucode_image, |
2453 | 2452 | pmu->desc->app_start_offset + pmu->desc->app_size); | |
2454 | for (i = 0; i < (pmu->desc->app_start_offset + | ||
2455 | pmu->desc->app_size) >> 2; i++) | ||
2456 | gk20a_mem_wr32(ucode_ptr, i, pmu->ucode_image[i]); | ||
2457 | 2453 | ||
2458 | gk20a_init_pmu(pmu); | 2454 | gk20a_init_pmu(pmu); |
2459 | 2455 | ||
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c index 0e6e715d..3ac2cec8 100644 --- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c | |||
@@ -43,8 +43,8 @@ static int lsfm_add_ucode_img(struct gk20a *g, struct ls_flcn_mgr *plsfm, | |||
43 | static void lsfm_free_ucode_img_res(struct flcn_ucode_img *p_img); | 43 | static void lsfm_free_ucode_img_res(struct flcn_ucode_img *p_img); |
44 | static void lsfm_free_nonpmu_ucode_img_res(struct flcn_ucode_img *p_img); | 44 | static void lsfm_free_nonpmu_ucode_img_res(struct flcn_ucode_img *p_img); |
45 | static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr *plsfm); | 45 | static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr *plsfm); |
46 | static int lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm, | 46 | static void lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm, |
47 | void *nonwpr_addr); | 47 | struct mem_desc *nonwpr); |
48 | static int acr_ucode_patch_sig(struct gk20a *g, | 48 | static int acr_ucode_patch_sig(struct gk20a *g, |
49 | unsigned int *p_img, | 49 | unsigned int *p_img, |
50 | unsigned int *p_prod_sig, | 50 | unsigned int *p_prod_sig, |
@@ -355,7 +355,7 @@ int prepare_ucode_blob(struct gk20a *g) | |||
355 | 355 | ||
356 | gm20b_dbg_pmu("managed LS falcon %d, WPR size %d bytes.\n", | 356 | gm20b_dbg_pmu("managed LS falcon %d, WPR size %d bytes.\n", |
357 | plsfm->managed_flcn_cnt, plsfm->wpr_size); | 357 | plsfm->managed_flcn_cnt, plsfm->wpr_size); |
358 | lsfm_init_wpr_contents(g, plsfm, g->acr.ucode_blob.cpu_va); | 358 | lsfm_init_wpr_contents(g, plsfm, &g->acr.ucode_blob); |
359 | } else { | 359 | } else { |
360 | gm20b_dbg_pmu("LSFM is managing no falcons.\n"); | 360 | gm20b_dbg_pmu("LSFM is managing no falcons.\n"); |
361 | } | 361 | } |
@@ -613,120 +613,91 @@ static int lsfm_fill_flcn_bl_gen_desc(struct gk20a *g, | |||
613 | } | 613 | } |
614 | 614 | ||
615 | /* Initialize WPR contents */ | 615 | /* Initialize WPR contents */ |
616 | static int lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm, | 616 | static void lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm, |
617 | void *nonwpr_addr) | 617 | struct mem_desc *ucode) |
618 | { | 618 | { |
619 | struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list; | ||
620 | u32 i; | ||
619 | 621 | ||
620 | int status = 0; | 622 | /* The WPR array is at the base of the WPR */ |
621 | union flcn_bl_generic_desc *nonwpr_bl_gen_desc; | 623 | pnode = plsfm->ucode_img_list; |
622 | if (nonwpr_addr == NULL) { | 624 | i = 0; |
623 | status = -ENOMEM; | ||
624 | } else { | ||
625 | struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list; | ||
626 | struct lsf_wpr_header *wpr_hdr; | ||
627 | struct lsf_lsb_header *lsb_hdr; | ||
628 | void *ucode_off; | ||
629 | u32 i; | ||
630 | |||
631 | /* The WPR array is at the base of the WPR */ | ||
632 | wpr_hdr = (struct lsf_wpr_header *)nonwpr_addr; | ||
633 | pnode = plsfm->ucode_img_list; | ||
634 | i = 0; | ||
635 | 625 | ||
636 | /* | 626 | /* |
637 | * Walk the managed falcons, flush WPR and LSB headers to FB. | 627 | * Walk the managed falcons, flush WPR and LSB headers to FB. |
638 | * flush any bl args to the storage area relative to the | 628 | * flush any bl args to the storage area relative to the |
639 | * ucode image (appended on the end as a DMEM area). | 629 | * ucode image (appended on the end as a DMEM area). |
640 | */ | 630 | */ |
641 | while (pnode) { | 631 | while (pnode) { |
642 | /* Flush WPR header to memory*/ | 632 | /* Flush WPR header to memory*/ |
643 | memcpy(&wpr_hdr[i], &pnode->wpr_header, | 633 | gk20a_mem_wr_n(g, ucode, i * sizeof(pnode->wpr_header), |
644 | sizeof(struct lsf_wpr_header)); | 634 | &pnode->wpr_header, sizeof(pnode->wpr_header)); |
645 | gm20b_dbg_pmu("wpr header as in memory and pnode\n"); | 635 | |
646 | gm20b_dbg_pmu("falconid :%d %d\n", | 636 | gm20b_dbg_pmu("wpr header"); |
647 | pnode->wpr_header.falcon_id, | 637 | gm20b_dbg_pmu("falconid :%d", |
648 | wpr_hdr[i].falcon_id); | 638 | pnode->wpr_header.falcon_id); |
649 | gm20b_dbg_pmu("lsb_offset :%x %x\n", | 639 | gm20b_dbg_pmu("lsb_offset :%x", |
650 | pnode->wpr_header.lsb_offset, | 640 | pnode->wpr_header.lsb_offset); |
651 | wpr_hdr[i].lsb_offset); | 641 | gm20b_dbg_pmu("bootstrap_owner :%d", |
652 | gm20b_dbg_pmu("bootstrap_owner :%d %d\n", | 642 | pnode->wpr_header.bootstrap_owner); |
653 | pnode->wpr_header.bootstrap_owner, | 643 | gm20b_dbg_pmu("lazy_bootstrap :%d", |
654 | wpr_hdr[i].bootstrap_owner); | 644 | pnode->wpr_header.lazy_bootstrap); |
655 | gm20b_dbg_pmu("lazy_bootstrap :%d %d\n", | 645 | gm20b_dbg_pmu("status :%d", |
656 | pnode->wpr_header.lazy_bootstrap, | 646 | pnode->wpr_header.status); |
657 | wpr_hdr[i].lazy_bootstrap); | 647 | |
658 | gm20b_dbg_pmu("status :%d %d\n", | 648 | /*Flush LSB header to memory*/ |
659 | pnode->wpr_header.status, wpr_hdr[i].status); | 649 | gk20a_mem_wr_n(g, ucode, pnode->wpr_header.lsb_offset, |
660 | 650 | &pnode->lsb_header, sizeof(pnode->lsb_header)); | |
661 | /*Flush LSB header to memory*/ | 651 | |
662 | lsb_hdr = (struct lsf_lsb_header *)((u8 *)nonwpr_addr + | 652 | gm20b_dbg_pmu("lsb header"); |
663 | pnode->wpr_header.lsb_offset); | 653 | gm20b_dbg_pmu("ucode_off :%x", |
664 | memcpy(lsb_hdr, &pnode->lsb_header, | 654 | pnode->lsb_header.ucode_off); |
665 | sizeof(struct lsf_lsb_header)); | 655 | gm20b_dbg_pmu("ucode_size :%x", |
666 | gm20b_dbg_pmu("lsb header as in memory and pnode\n"); | 656 | pnode->lsb_header.ucode_size); |
667 | gm20b_dbg_pmu("ucode_off :%x %x\n", | 657 | gm20b_dbg_pmu("data_size :%x", |
668 | pnode->lsb_header.ucode_off, | 658 | pnode->lsb_header.data_size); |
669 | lsb_hdr->ucode_off); | 659 | gm20b_dbg_pmu("bl_code_size :%x", |
670 | gm20b_dbg_pmu("ucode_size :%x %x\n", | 660 | pnode->lsb_header.bl_code_size); |
671 | pnode->lsb_header.ucode_size, | 661 | gm20b_dbg_pmu("bl_imem_off :%x", |
672 | lsb_hdr->ucode_size); | 662 | pnode->lsb_header.bl_imem_off); |
673 | gm20b_dbg_pmu("data_size :%x %x\n", | 663 | gm20b_dbg_pmu("bl_data_off :%x", |
674 | pnode->lsb_header.data_size, | 664 | pnode->lsb_header.bl_data_off); |
675 | lsb_hdr->data_size); | 665 | gm20b_dbg_pmu("bl_data_size :%x", |
676 | gm20b_dbg_pmu("bl_code_size :%x %x\n", | 666 | pnode->lsb_header.bl_data_size); |
677 | pnode->lsb_header.bl_code_size, | 667 | gm20b_dbg_pmu("app_code_off :%x", |
678 | lsb_hdr->bl_code_size); | 668 | pnode->lsb_header.app_code_off); |
679 | gm20b_dbg_pmu("bl_imem_off :%x %x\n", | 669 | gm20b_dbg_pmu("app_code_size :%x", |
680 | pnode->lsb_header.bl_imem_off, | 670 | pnode->lsb_header.app_code_size); |
681 | lsb_hdr->bl_imem_off); | 671 | gm20b_dbg_pmu("app_data_off :%x", |
682 | gm20b_dbg_pmu("bl_data_off :%x %x\n", | 672 | pnode->lsb_header.app_data_off); |
683 | pnode->lsb_header.bl_data_off, | 673 | gm20b_dbg_pmu("app_data_size :%x", |
684 | lsb_hdr->bl_data_off); | 674 | pnode->lsb_header.app_data_size); |
685 | gm20b_dbg_pmu("bl_data_size :%x %x\n", | 675 | gm20b_dbg_pmu("flags :%x", |
686 | pnode->lsb_header.bl_data_size, | 676 | pnode->lsb_header.flags); |
687 | lsb_hdr->bl_data_size); | 677 | |
688 | gm20b_dbg_pmu("app_code_off :%x %x\n", | 678 | /*If this falcon has a boot loader and related args, |
689 | pnode->lsb_header.app_code_off, | 679 | * flush them.*/ |
690 | lsb_hdr->app_code_off); | 680 | if (!pnode->ucode_img.header) { |
691 | gm20b_dbg_pmu("app_code_size :%x %x\n", | 681 | /*Populate gen bl and flush to memory*/ |
692 | pnode->lsb_header.app_code_size, | 682 | lsfm_fill_flcn_bl_gen_desc(g, pnode); |
693 | lsb_hdr->app_code_size); | 683 | gk20a_mem_wr_n(g, ucode, |
694 | gm20b_dbg_pmu("app_data_off :%x %x\n", | 684 | pnode->lsb_header.bl_data_off, |
695 | pnode->lsb_header.app_data_off, | 685 | &pnode->bl_gen_desc, |
696 | lsb_hdr->app_data_off); | ||
697 | gm20b_dbg_pmu("app_data_size :%x %x\n", | ||
698 | pnode->lsb_header.app_data_size, | ||
699 | lsb_hdr->app_data_size); | ||
700 | gm20b_dbg_pmu("flags :%x %x\n", | ||
701 | pnode->lsb_header.flags, lsb_hdr->flags); | ||
702 | |||
703 | /*If this falcon has a boot loader and related args, | ||
704 | * flush them.*/ | ||
705 | if (!pnode->ucode_img.header) { | ||
706 | nonwpr_bl_gen_desc = | ||
707 | (union flcn_bl_generic_desc *) | ||
708 | ((u8 *)nonwpr_addr + | ||
709 | pnode->lsb_header.bl_data_off); | ||
710 | |||
711 | /*Populate gen bl and flush to memory*/ | ||
712 | lsfm_fill_flcn_bl_gen_desc(g, pnode); | ||
713 | memcpy(nonwpr_bl_gen_desc, &pnode->bl_gen_desc, | ||
714 | pnode->bl_gen_desc_size); | 686 | pnode->bl_gen_desc_size); |
715 | } | ||
716 | ucode_off = (void *)(pnode->lsb_header.ucode_off + | ||
717 | (u8 *)nonwpr_addr); | ||
718 | /*Copying of ucode*/ | ||
719 | memcpy(ucode_off, pnode->ucode_img.data, | ||
720 | pnode->ucode_img.data_size); | ||
721 | pnode = pnode->next; | ||
722 | i++; | ||
723 | } | 687 | } |
724 | 688 | /*Copying of ucode*/ | |
725 | /* Tag the terminator WPR header with an invalid falcon ID. */ | 689 | gk20a_mem_wr_n(g, ucode, pnode->lsb_header.ucode_off, |
726 | gk20a_mem_wr32(&wpr_hdr[plsfm->managed_flcn_cnt].falcon_id, | 690 | pnode->ucode_img.data, |
727 | 0, LSF_FALCON_ID_INVALID); | 691 | pnode->ucode_img.data_size); |
692 | pnode = pnode->next; | ||
693 | i++; | ||
728 | } | 694 | } |
729 | return status; | 695 | |
696 | /* Tag the terminator WPR header with an invalid falcon ID. */ | ||
697 | gk20a_mem_wr32(g, ucode, | ||
698 | plsfm->managed_flcn_cnt * sizeof(struct lsf_wpr_header) + | ||
699 | offsetof(struct lsf_wpr_header, falcon_id), | ||
700 | LSF_FALCON_ID_INVALID); | ||
730 | } | 701 | } |
731 | 702 | ||
732 | /*! | 703 | /*! |
@@ -1000,7 +971,7 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g) | |||
1000 | { | 971 | { |
1001 | struct mm_gk20a *mm = &g->mm; | 972 | struct mm_gk20a *mm = &g->mm; |
1002 | struct vm_gk20a *vm = &mm->pmu.vm; | 973 | struct vm_gk20a *vm = &mm->pmu.vm; |
1003 | int i, err = 0; | 974 | int err = 0; |
1004 | u64 *acr_dmem; | 975 | u64 *acr_dmem; |
1005 | u32 img_size_in_bytes = 0; | 976 | u32 img_size_in_bytes = 0; |
1006 | u32 status, size; | 977 | u32 status, size; |
@@ -1066,10 +1037,8 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g) | |||
1066 | ((struct flcn_acr_desc *)acr_dmem)->regions.no_regions = 2; | 1037 | ((struct flcn_acr_desc *)acr_dmem)->regions.no_regions = 2; |
1067 | ((struct flcn_acr_desc *)acr_dmem)->wpr_offset = 0; | 1038 | ((struct flcn_acr_desc *)acr_dmem)->wpr_offset = 0; |
1068 | 1039 | ||
1069 | for (i = 0; i < (img_size_in_bytes/4); i++) { | 1040 | gk20a_mem_wr_n(g, &acr->acr_ucode, 0, |
1070 | gk20a_mem_wr32(acr->acr_ucode.cpu_va, i, | 1041 | acr_ucode_data_t210_load, img_size_in_bytes); |
1071 | acr_ucode_data_t210_load[i]); | ||
1072 | } | ||
1073 | /* | 1042 | /* |
1074 | * In order to execute this binary, we will be using | 1043 | * In order to execute this binary, we will be using |
1075 | * a bootloader which will load this image into PMU IMEM/DMEM. | 1044 | * a bootloader which will load this image into PMU IMEM/DMEM. |
@@ -1323,7 +1292,7 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt) | |||
1323 | struct mm_gk20a *mm = &g->mm; | 1292 | struct mm_gk20a *mm = &g->mm; |
1324 | struct vm_gk20a *vm = &mm->pmu.vm; | 1293 | struct vm_gk20a *vm = &mm->pmu.vm; |
1325 | struct device *d = dev_from_gk20a(g); | 1294 | struct device *d = dev_from_gk20a(g); |
1326 | int i, err = 0; | 1295 | int err = 0; |
1327 | u32 bl_sz; | 1296 | u32 bl_sz; |
1328 | struct acr_gm20b *acr = &g->acr; | 1297 | struct acr_gm20b *acr = &g->acr; |
1329 | const struct firmware *hsbl_fw = acr->hsbl_fw; | 1298 | const struct firmware *hsbl_fw = acr->hsbl_fw; |
@@ -1369,8 +1338,7 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt) | |||
1369 | goto err_free_ucode; | 1338 | goto err_free_ucode; |
1370 | } | 1339 | } |
1371 | 1340 | ||
1372 | for (i = 0; i < (bl_sz) >> 2; i++) | 1341 | gk20a_mem_wr_n(g, &acr->hsbl_ucode, 0, pmu_bl_gm10x, bl_sz); |
1373 | gk20a_mem_wr32(acr->hsbl_ucode.cpu_va, i, pmu_bl_gm10x[i]); | ||
1374 | gm20b_dbg_pmu("Copied bl ucode to bl_cpuva\n"); | 1342 | gm20b_dbg_pmu("Copied bl ucode to bl_cpuva\n"); |
1375 | } | 1343 | } |
1376 | /* | 1344 | /* |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index b9a1e685..2197bae5 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -849,7 +849,7 @@ static int gr_gm20b_alloc_gr_ctx(struct gk20a *g, | |||
849 | 849 | ||
850 | static void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, | 850 | static void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, |
851 | struct channel_ctx_gk20a *ch_ctx, | 851 | struct channel_ctx_gk20a *ch_ctx, |
852 | void *ctx_ptr) | 852 | struct mem_desc *mem) |
853 | { | 853 | { |
854 | struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; | 854 | struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; |
855 | u32 cta_preempt_option = | 855 | u32 cta_preempt_option = |
@@ -859,7 +859,8 @@ static void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, | |||
859 | 859 | ||
860 | if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CTA) { | 860 | if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CTA) { |
861 | gk20a_dbg_info("CTA: %x", cta_preempt_option); | 861 | gk20a_dbg_info("CTA: %x", cta_preempt_option); |
862 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_preemption_options_o(), 0, | 862 | gk20a_mem_wr(g, mem, |
863 | ctxsw_prog_main_image_preemption_options_o(), | ||
863 | cta_preempt_option); | 864 | cta_preempt_option); |
864 | } | 865 | } |
865 | 866 | ||
@@ -1005,7 +1006,7 @@ static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, | |||
1005 | bool enable) | 1006 | bool enable) |
1006 | { | 1007 | { |
1007 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 1008 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; |
1008 | void *ctx_ptr = NULL; | 1009 | struct mem_desc *mem; |
1009 | u32 v; | 1010 | u32 v; |
1010 | 1011 | ||
1011 | gk20a_dbg_fn(""); | 1012 | gk20a_dbg_fn(""); |
@@ -1013,18 +1014,17 @@ static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, | |||
1013 | if (!ch_ctx || !ch_ctx->gr_ctx || c->vpr) | 1014 | if (!ch_ctx || !ch_ctx->gr_ctx || c->vpr) |
1014 | return -EINVAL; | 1015 | return -EINVAL; |
1015 | 1016 | ||
1016 | ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, | 1017 | mem = &ch_ctx->gr_ctx->mem; |
1017 | PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, | 1018 | |
1018 | 0, pgprot_writecombine(PAGE_KERNEL)); | 1019 | if (gk20a_mem_begin(c->g, mem)) |
1019 | if (!ctx_ptr) | ||
1020 | return -ENOMEM; | 1020 | return -ENOMEM; |
1021 | 1021 | ||
1022 | v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); | 1022 | v = gk20a_mem_rd(c->g, mem, ctxsw_prog_main_image_pm_o()); |
1023 | v &= ~ctxsw_prog_main_image_pm_pc_sampling_m(); | 1023 | v &= ~ctxsw_prog_main_image_pm_pc_sampling_m(); |
1024 | v |= ctxsw_prog_main_image_pm_pc_sampling_f(enable); | 1024 | v |= ctxsw_prog_main_image_pm_pc_sampling_f(enable); |
1025 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, v); | 1025 | gk20a_mem_wr(c->g, mem, ctxsw_prog_main_image_pm_o(), v); |
1026 | 1026 | ||
1027 | vunmap(ctx_ptr); | 1027 | gk20a_mem_end(c->g, mem); |
1028 | 1028 | ||
1029 | gk20a_dbg_fn("done"); | 1029 | gk20a_dbg_fn("done"); |
1030 | 1030 | ||
@@ -1089,13 +1089,13 @@ static void gr_gm20b_init_cyclestats(struct gk20a *g) | |||
1089 | #endif | 1089 | #endif |
1090 | } | 1090 | } |
1091 | 1091 | ||
1092 | static void gr_gm20b_enable_cde_in_fecs(void *ctx_ptr) | 1092 | static void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct mem_desc *mem) |
1093 | { | 1093 | { |
1094 | u32 cde_v; | 1094 | u32 cde_v; |
1095 | 1095 | ||
1096 | cde_v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0); | 1096 | cde_v = gk20a_mem_rd(g, mem, ctxsw_prog_main_image_ctl_o()); |
1097 | cde_v |= ctxsw_prog_main_image_ctl_cde_enabled_f(); | 1097 | cde_v |= ctxsw_prog_main_image_ctl_cde_enabled_f(); |
1098 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0, cde_v); | 1098 | gk20a_mem_wr(g, mem, ctxsw_prog_main_image_ctl_o(), cde_v); |
1099 | } | 1099 | } |
1100 | 1100 | ||
1101 | static void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) | 1101 | static void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) |
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c index ac73b5c8..726d73ed 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c | |||
@@ -106,14 +106,14 @@ static void gm20b_mm_mmu_set_debug_mode(struct gk20a *g, bool enable) | |||
106 | } | 106 | } |
107 | 107 | ||
108 | static void gm20b_mm_set_big_page_size(struct gk20a *g, | 108 | static void gm20b_mm_set_big_page_size(struct gk20a *g, |
109 | void *inst_ptr, int size) | 109 | struct mem_desc *mem, int size) |
110 | { | 110 | { |
111 | u32 val; | 111 | u32 val; |
112 | 112 | ||
113 | gk20a_dbg_fn(""); | 113 | gk20a_dbg_fn(""); |
114 | 114 | ||
115 | gk20a_dbg_info("big page size %d\n", size); | 115 | gk20a_dbg_info("big page size %d\n", size); |
116 | val = gk20a_mem_rd32(inst_ptr, ram_in_big_page_size_w()); | 116 | val = gk20a_mem_rd32(g, mem, ram_in_big_page_size_w()); |
117 | val &= ~ram_in_big_page_size_m(); | 117 | val &= ~ram_in_big_page_size_m(); |
118 | 118 | ||
119 | if (size == SZ_64K) | 119 | if (size == SZ_64K) |
@@ -121,7 +121,7 @@ static void gm20b_mm_set_big_page_size(struct gk20a *g, | |||
121 | else | 121 | else |
122 | val |= ram_in_big_page_size_128kb_f(); | 122 | val |= ram_in_big_page_size_128kb_f(); |
123 | 123 | ||
124 | gk20a_mem_wr32(inst_ptr, ram_in_big_page_size_w(), val); | 124 | gk20a_mem_wr32(g, mem, ram_in_big_page_size_w(), val); |
125 | gk20a_dbg_fn("done"); | 125 | gk20a_dbg_fn("done"); |
126 | } | 126 | } |
127 | 127 | ||
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c index 66b5e410..d1cba979 100644 --- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | |||
@@ -285,8 +285,6 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g) | |||
285 | mutex_init(&f->free_chs_mutex); | 285 | mutex_init(&f->free_chs_mutex); |
286 | 286 | ||
287 | for (chid = 0; chid < f->num_channels; chid++) { | 287 | for (chid = 0; chid < f->num_channels; chid++) { |
288 | f->channel[chid].userd_cpu_va = | ||
289 | f->userd.cpu_va + chid * f->userd_entry_size; | ||
290 | f->channel[chid].userd_iova = | 288 | f->channel[chid].userd_iova = |
291 | g->ops.mm.get_iova_addr(g, f->userd.sgt->sgl, 0) | 289 | g->ops.mm.get_iova_addr(g, f->userd.sgt->sgl, 0) |
292 | + chid * f->userd_entry_size; | 290 | + chid * f->userd_entry_size; |