summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c73
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gk20a/debug_gk20a.c59
-rw-r--r--drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c27
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h57
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c359
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c144
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h31
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.c10
-rw-r--r--drivers/gpu/nvgpu/gm20b/acr_gm20b.c206
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c26
-rw-r--r--drivers/gpu/nvgpu/gm20b/mm_gm20b.c6
-rw-r--r--drivers/gpu/nvgpu/vgpu/fifo_vgpu.c2
14 files changed, 493 insertions, 510 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 990972e4..065e8ab1 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -129,28 +129,25 @@ static int channel_gk20a_commit_userd(struct channel_gk20a *c)
129{ 129{
130 u32 addr_lo; 130 u32 addr_lo;
131 u32 addr_hi; 131 u32 addr_hi;
132 void *inst_ptr;
133 struct gk20a *g = c->g; 132 struct gk20a *g = c->g;
134 133
135 gk20a_dbg_fn(""); 134 gk20a_dbg_fn("");
136 135
137 inst_ptr = c->inst_block.cpu_va;
138 if (!inst_ptr)
139 return -ENOMEM;
140
141 addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v()); 136 addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
142 addr_hi = u64_hi32(c->userd_iova); 137 addr_hi = u64_hi32(c->userd_iova);
143 138
144 gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx", 139 gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
145 c->hw_chid, (u64)c->userd_iova); 140 c->hw_chid, (u64)c->userd_iova);
146 141
147 gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(), 142 gk20a_mem_wr32(g, &c->inst_block,
143 ram_in_ramfc_w() + ram_fc_userd_w(),
148 (g->mm.vidmem_is_vidmem ? 144 (g->mm.vidmem_is_vidmem ?
149 pbdma_userd_target_sys_mem_ncoh_f() : 145 pbdma_userd_target_sys_mem_ncoh_f() :
150 pbdma_userd_target_vid_mem_f()) | 146 pbdma_userd_target_vid_mem_f()) |
151 pbdma_userd_addr_f(addr_lo)); 147 pbdma_userd_addr_f(addr_lo));
152 148
153 gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(), 149 gk20a_mem_wr32(g, &c->inst_block,
150 ram_in_ramfc_w() + ram_fc_userd_hi_w(),
154 pbdma_userd_hi_addr_f(addr_hi)); 151 pbdma_userd_hi_addr_f(addr_hi));
155 152
156 return 0; 153 return 0;
@@ -186,13 +183,8 @@ int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
186 183
187static int channel_gk20a_set_schedule_params(struct channel_gk20a *c) 184static int channel_gk20a_set_schedule_params(struct channel_gk20a *c)
188{ 185{
189 void *inst_ptr;
190 int shift = 0, value = 0; 186 int shift = 0, value = 0;
191 187
192 inst_ptr = c->inst_block.cpu_va;
193 if (!inst_ptr)
194 return -ENOMEM;
195
196 gk20a_channel_get_timescale_from_timeslice(c->g, 188 gk20a_channel_get_timescale_from_timeslice(c->g,
197 c->timeslice_us, &value, &shift); 189 c->timeslice_us, &value, &shift);
198 190
@@ -203,7 +195,7 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c)
203 WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid)); 195 WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid));
204 196
205 /* set new timeslice */ 197 /* set new timeslice */
206 gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(), 198 gk20a_mem_wr32(c->g, &c->inst_block, ram_fc_runlist_timeslice_w(),
207 value | (shift << 12) | 199 value | (shift << 12) |
208 fifo_runlist_timeslice_enable_true_f()); 200 fifo_runlist_timeslice_enable_true_f());
209 201
@@ -255,33 +247,30 @@ u32 channel_gk20a_pbdma_acquire_val(struct channel_gk20a *c)
255int channel_gk20a_setup_ramfc(struct channel_gk20a *c, 247int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
256 u64 gpfifo_base, u32 gpfifo_entries, u32 flags) 248 u64 gpfifo_base, u32 gpfifo_entries, u32 flags)
257{ 249{
258 void *inst_ptr; 250 struct gk20a *g = c->g;
251 struct mem_desc *mem = &c->inst_block;
259 252
260 gk20a_dbg_fn(""); 253 gk20a_dbg_fn("");
261 254
262 inst_ptr = c->inst_block.cpu_va; 255 gk20a_memset(g, mem, 0, 0, ram_fc_size_val_v());
263 if (!inst_ptr)
264 return -ENOMEM;
265
266 memset(inst_ptr, 0, ram_fc_size_val_v());
267 256
268 gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(), 257 gk20a_mem_wr32(g, mem, ram_fc_gp_base_w(),
269 pbdma_gp_base_offset_f( 258 pbdma_gp_base_offset_f(
270 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s()))); 259 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
271 260
272 gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(), 261 gk20a_mem_wr32(g, mem, ram_fc_gp_base_hi_w(),
273 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) | 262 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
274 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries))); 263 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
275 264
276 gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(), 265 gk20a_mem_wr32(g, mem, ram_fc_signature_w(),
277 c->g->ops.fifo.get_pbdma_signature(c->g)); 266 c->g->ops.fifo.get_pbdma_signature(c->g));
278 267
279 gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(), 268 gk20a_mem_wr32(g, mem, ram_fc_formats_w(),
280 pbdma_formats_gp_fermi0_f() | 269 pbdma_formats_gp_fermi0_f() |
281 pbdma_formats_pb_fermi1_f() | 270 pbdma_formats_pb_fermi1_f() |
282 pbdma_formats_mp_fermi0_f()); 271 pbdma_formats_mp_fermi0_f());
283 272
284 gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(), 273 gk20a_mem_wr32(g, mem, ram_fc_pb_header_w(),
285 pbdma_pb_header_priv_user_f() | 274 pbdma_pb_header_priv_user_f() |
286 pbdma_pb_header_method_zero_f() | 275 pbdma_pb_header_method_zero_f() |
287 pbdma_pb_header_subchannel_zero_f() | 276 pbdma_pb_header_subchannel_zero_f() |
@@ -289,47 +278,49 @@ int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
289 pbdma_pb_header_first_true_f() | 278 pbdma_pb_header_first_true_f() |
290 pbdma_pb_header_type_inc_f()); 279 pbdma_pb_header_type_inc_f());
291 280
292 gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(), 281 gk20a_mem_wr32(g, mem, ram_fc_subdevice_w(),
293 pbdma_subdevice_id_f(1) | 282 pbdma_subdevice_id_f(1) |
294 pbdma_subdevice_status_active_f() | 283 pbdma_subdevice_status_active_f() |
295 pbdma_subdevice_channel_dma_enable_f()); 284 pbdma_subdevice_channel_dma_enable_f());
296 285
297 gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f()); 286 gk20a_mem_wr32(g, mem, ram_fc_target_w(), pbdma_target_engine_sw_f());
298 287
299 gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(), 288 gk20a_mem_wr32(g, mem, ram_fc_acquire_w(),
300 channel_gk20a_pbdma_acquire_val(c)); 289 channel_gk20a_pbdma_acquire_val(c));
301 290
302 gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(), 291 gk20a_mem_wr32(g, mem, ram_fc_runlist_timeslice_w(),
303 fifo_runlist_timeslice_timeout_128_f() | 292 fifo_runlist_timeslice_timeout_128_f() |
304 fifo_runlist_timeslice_timescale_3_f() | 293 fifo_runlist_timeslice_timescale_3_f() |
305 fifo_runlist_timeslice_enable_true_f()); 294 fifo_runlist_timeslice_enable_true_f());
306 295
307 gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(), 296 gk20a_mem_wr32(g, mem, ram_fc_pb_timeslice_w(),
308 fifo_pb_timeslice_timeout_16_f() | 297 fifo_pb_timeslice_timeout_16_f() |
309 fifo_pb_timeslice_timescale_0_f() | 298 fifo_pb_timeslice_timescale_0_f() |
310 fifo_pb_timeslice_enable_true_f()); 299 fifo_pb_timeslice_enable_true_f());
311 300
312 gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); 301 gk20a_mem_wr32(g, mem, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
313 302
314 return channel_gk20a_commit_userd(c); 303 return channel_gk20a_commit_userd(c);
315} 304}
316 305
317static int channel_gk20a_setup_userd(struct channel_gk20a *c) 306static int channel_gk20a_setup_userd(struct channel_gk20a *c)
318{ 307{
319 BUG_ON(!c->userd_cpu_va); 308 struct gk20a *g = c->g;
309 struct mem_desc *mem = &g->fifo.userd;
310 u32 offset = c->hw_chid * g->fifo.userd_entry_size / sizeof(u32);
320 311
321 gk20a_dbg_fn(""); 312 gk20a_dbg_fn("");
322 313
323 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0); 314 gk20a_mem_wr32(g, mem, offset + ram_userd_put_w(), 0);
324 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0); 315 gk20a_mem_wr32(g, mem, offset + ram_userd_get_w(), 0);
325 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0); 316 gk20a_mem_wr32(g, mem, offset + ram_userd_ref_w(), 0);
326 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0); 317 gk20a_mem_wr32(g, mem, offset + ram_userd_put_hi_w(), 0);
327 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0); 318 gk20a_mem_wr32(g, mem, offset + ram_userd_ref_threshold_w(), 0);
328 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0); 319 gk20a_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_w(), 0);
329 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0); 320 gk20a_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_hi_w(), 0);
330 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0); 321 gk20a_mem_wr32(g, mem, offset + ram_userd_get_hi_w(), 0);
331 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0); 322 gk20a_mem_wr32(g, mem, offset + ram_userd_gp_get_w(), 0);
332 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0); 323 gk20a_mem_wr32(g, mem, offset + ram_userd_gp_put_w(), 0);
333 324
334 return 0; 325 return 0;
335} 326}
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 8840a3ae..b1355f92 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -130,7 +130,6 @@ struct channel_gk20a {
130 struct mem_desc inst_block; 130 struct mem_desc inst_block;
131 struct mem_desc_sub ramfc; 131 struct mem_desc_sub ramfc;
132 132
133 void *userd_cpu_va;
134 u64 userd_iova; 133 u64 userd_iova;
135 u64 userd_gpu_va; 134 u64 userd_gpu_va;
136 135
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
index c2285c8a..a3fa2ea5 100644
--- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
@@ -36,7 +36,7 @@ unsigned int gk20a_debug_trace_cmdbuf;
36struct ch_state { 36struct ch_state {
37 int pid; 37 int pid;
38 int refs; 38 int refs;
39 u8 inst_block[0]; 39 u32 inst_block[0];
40}; 40};
41 41
42static const char * const ccsr_chan_status_str[] = { 42static const char * const ccsr_chan_status_str[] = {
@@ -108,15 +108,15 @@ static void gk20a_debug_show_channel(struct gk20a *g,
108 u32 channel = gk20a_readl(g, ccsr_channel_r(hw_chid)); 108 u32 channel = gk20a_readl(g, ccsr_channel_r(hw_chid));
109 u32 status = ccsr_channel_status_v(channel); 109 u32 status = ccsr_channel_status_v(channel);
110 u32 syncpointa, syncpointb; 110 u32 syncpointa, syncpointb;
111 void *inst_ptr; 111 u32 *inst_mem;
112 112
113 if (!ch_state) 113 if (!ch_state)
114 return; 114 return;
115 115
116 inst_ptr = &ch_state->inst_block[0]; 116 inst_mem = &ch_state->inst_block[0];
117 117
118 syncpointa = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointa_w()); 118 syncpointa = inst_mem[ram_fc_syncpointa_w()];
119 syncpointb = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointb_w()); 119 syncpointb = inst_mem[ram_fc_syncpointb_w()];
120 120
121 gk20a_debug_output(o, "%d-%s, pid %d, refs: %d: ", hw_chid, 121 gk20a_debug_output(o, "%d-%s, pid %d, refs: %d: ", hw_chid,
122 dev_name(g->dev), 122 dev_name(g->dev),
@@ -129,23 +129,22 @@ static void gk20a_debug_show_channel(struct gk20a *g,
129 gk20a_debug_output(o, "TOP: %016llx PUT: %016llx GET: %016llx " 129 gk20a_debug_output(o, "TOP: %016llx PUT: %016llx GET: %016llx "
130 "FETCH: %016llx\nHEADER: %08x COUNT: %08x\n" 130 "FETCH: %016llx\nHEADER: %08x COUNT: %08x\n"
131 "SYNCPOINT %08x %08x SEMAPHORE %08x %08x %08x %08x\n", 131 "SYNCPOINT %08x %08x SEMAPHORE %08x %08x %08x %08x\n",
132 (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_top_level_get_w()) + 132 (u64)inst_mem[ram_fc_pb_top_level_get_w()] +
133 ((u64)gk20a_mem_rd32(inst_ptr, 133 ((u64)inst_mem[ram_fc_pb_top_level_get_hi_w()] << 32ULL),
134 ram_fc_pb_top_level_get_hi_w()) << 32ULL), 134 (u64)inst_mem[ram_fc_pb_put_w()] +
135 (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_put_w()) + 135 ((u64)inst_mem[ram_fc_pb_put_hi_w()] << 32ULL),
136 ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_put_hi_w()) << 32ULL), 136 (u64)inst_mem[ram_fc_pb_get_w()] +
137 (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_get_w()) + 137 ((u64)inst_mem[ram_fc_pb_get_hi_w()] << 32ULL),
138 ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_get_hi_w()) << 32ULL), 138 (u64)inst_mem[ram_fc_pb_fetch_w()] +
139 (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_fetch_w()) + 139 ((u64)inst_mem[ram_fc_pb_fetch_hi_w()] << 32ULL),
140 ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_fetch_hi_w()) << 32ULL), 140 inst_mem[ram_fc_pb_header_w()],
141 gk20a_mem_rd32(inst_ptr, ram_fc_pb_header_w()), 141 inst_mem[ram_fc_pb_count_w()],
142 gk20a_mem_rd32(inst_ptr, ram_fc_pb_count_w()),
143 syncpointa, 142 syncpointa,
144 syncpointb, 143 syncpointb,
145 gk20a_mem_rd32(inst_ptr, ram_fc_semaphorea_w()), 144 inst_mem[ram_fc_semaphorea_w()],
146 gk20a_mem_rd32(inst_ptr, ram_fc_semaphoreb_w()), 145 inst_mem[ram_fc_semaphoreb_w()],
147 gk20a_mem_rd32(inst_ptr, ram_fc_semaphorec_w()), 146 inst_mem[ram_fc_semaphorec_w()],
148 gk20a_mem_rd32(inst_ptr, ram_fc_semaphored_w())); 147 inst_mem[ram_fc_semaphored_w()]);
149 148
150#ifdef CONFIG_TEGRA_GK20A 149#ifdef CONFIG_TEGRA_GK20A
151 if ((pbdma_syncpointb_op_v(syncpointb) == pbdma_syncpointb_op_wait_v()) 150 if ((pbdma_syncpointb_op_v(syncpointb) == pbdma_syncpointb_op_wait_v())
@@ -246,17 +245,15 @@ void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o)
246 245
247 for (chid = 0; chid < f->num_channels; chid++) { 246 for (chid = 0; chid < f->num_channels; chid++) {
248 struct channel_gk20a *ch = &f->channel[chid]; 247 struct channel_gk20a *ch = &f->channel[chid];
249 if (ch_state[chid]) { 248 if (!ch_state[chid])
250 if (ch->inst_block.cpu_va) { 249 continue;
251 ch_state[chid]->pid = ch->pid; 250
252 ch_state[chid]->refs = 251 ch_state[chid]->pid = ch->pid;
253 atomic_read(&ch->ref_count); 252 ch_state[chid]->refs = atomic_read(&ch->ref_count);
254 memcpy(&ch_state[chid]->inst_block[0], 253 gk20a_mem_rd_n(g, &ch->inst_block, 0,
255 ch->inst_block.cpu_va, 254 &ch_state[chid]->inst_block[0],
256 ram_in_alloc_size_v()); 255 ram_in_alloc_size_v());
257 } 256 gk20a_channel_put(ch);
258 gk20a_channel_put(ch);
259 }
260 } 257 }
261 for (chid = 0; chid < f->num_channels; chid++) { 258 for (chid = 0; chid < f->num_channels; chid++) {
262 if (ch_state[chid]) { 259 if (ch_state[chid]) {
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index f9cddc41..edddcdc1 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -619,7 +619,7 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g,
619 phys_addr_t pa; 619 phys_addr_t pa;
620 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; 620 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
621 struct gk20a_fecs_trace *trace = g->fecs_trace; 621 struct gk20a_fecs_trace *trace = g->fecs_trace;
622 void *ctx_ptr; 622 struct mem_desc *mem = &ch_ctx->gr_ctx->mem;
623 u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch); 623 u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch);
624 624
625 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, 625 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
@@ -634,10 +634,7 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g,
634 if (!pa) 634 if (!pa)
635 return -ENOMEM; 635 return -ENOMEM;
636 636
637 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 637 if (gk20a_mem_begin(g, mem))
638 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, 0,
639 pgprot_writecombine(PAGE_KERNEL));
640 if (!ctx_ptr)
641 return -ENOMEM; 638 return -ENOMEM;
642 639
643 lo = u64_lo32(pa); 640 lo = u64_lo32(pa);
@@ -646,18 +643,18 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g,
646 gk20a_dbg(gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi, 643 gk20a_dbg(gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi,
647 lo, GK20A_FECS_TRACE_NUM_RECORDS); 644 lo, GK20A_FECS_TRACE_NUM_RECORDS);
648 645
649 gk20a_mem_wr32(ctx_ptr 646 gk20a_mem_wr(g, mem,
650 + ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), 647 ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(),
651 0, lo); 648 lo);
652 gk20a_mem_wr32(ctx_ptr 649 gk20a_mem_wr(g, mem,
653 + ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), 650 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(),
654 0, ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi)); 651 ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi));
655 gk20a_mem_wr32(ctx_ptr 652 gk20a_mem_wr(g, mem,
656 + ctxsw_prog_main_image_context_timestamp_buffer_control_o(), 653 ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
657 0, ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f( 654 ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
658 GK20A_FECS_TRACE_NUM_RECORDS)); 655 GK20A_FECS_TRACE_NUM_RECORDS));
659 656
660 vunmap(ctx_ptr); 657 gk20a_mem_end(g, mem);
661 gk20a_fecs_trace_hash_add(g, context_ptr, ch->pid); 658 gk20a_fecs_trace_hash_add(g, context_ptr, ch->pid);
662 659
663 return 0; 660 return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index dc3debf2..71400331 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -520,8 +520,6 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
520 mutex_init(&f->free_chs_mutex); 520 mutex_init(&f->free_chs_mutex);
521 521
522 for (chid = 0; chid < f->num_channels; chid++) { 522 for (chid = 0; chid < f->num_channels; chid++) {
523 f->channel[chid].userd_cpu_va =
524 f->userd.cpu_va + chid * f->userd_entry_size;
525 f->channel[chid].userd_iova = 523 f->channel[chid].userd_iova =
526 g->ops.mm.get_iova_addr(g, f->userd.sgt->sgl, 0) 524 g->ops.mm.get_iova_addr(g, f->userd.sgt->sgl, 0)
527 + chid * f->userd_entry_size; 525 + chid * f->userd_entry_size;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index f228cce4..2f85bf96 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -201,7 +201,7 @@ struct gpu_ops {
201 struct gr_ctx_desc *gr_ctx); 201 struct gr_ctx_desc *gr_ctx);
202 void (*update_ctxsw_preemption_mode)(struct gk20a *g, 202 void (*update_ctxsw_preemption_mode)(struct gk20a *g,
203 struct channel_ctx_gk20a *ch_ctx, 203 struct channel_ctx_gk20a *ch_ctx,
204 void *ctx_ptr); 204 struct mem_desc *mem);
205 int (*update_smpc_ctxsw_mode)(struct gk20a *g, 205 int (*update_smpc_ctxsw_mode)(struct gk20a *g,
206 struct channel_gk20a *c, 206 struct channel_gk20a *c,
207 bool enable); 207 bool enable);
@@ -221,7 +221,8 @@ struct gpu_ops {
221 int (*wait_empty)(struct gk20a *g, unsigned long end_jiffies, 221 int (*wait_empty)(struct gk20a *g, unsigned long end_jiffies,
222 u32 expect_delay); 222 u32 expect_delay);
223 void (*init_cyclestats)(struct gk20a *g); 223 void (*init_cyclestats)(struct gk20a *g);
224 void (*enable_cde_in_fecs)(void *ctx_ptr); 224 void (*enable_cde_in_fecs)(struct gk20a *g,
225 struct mem_desc *mem);
225 int (*set_sm_debug_mode)(struct gk20a *g, struct channel_gk20a *ch, 226 int (*set_sm_debug_mode)(struct gk20a *g, struct channel_gk20a *ch,
226 u64 sms, bool enable); 227 u64 sms, bool enable);
227 void (*bpt_reg_info)(struct gk20a *g, 228 void (*bpt_reg_info)(struct gk20a *g,
@@ -484,7 +485,7 @@ struct gpu_ops {
484 void (*cbc_clean)(struct gk20a *g); 485 void (*cbc_clean)(struct gk20a *g);
485 void (*tlb_invalidate)(struct vm_gk20a *vm); 486 void (*tlb_invalidate)(struct vm_gk20a *vm);
486 void (*set_big_page_size)(struct gk20a *g, 487 void (*set_big_page_size)(struct gk20a *g,
487 void *inst_ptr, int size); 488 struct mem_desc *mem, int size);
488 u32 (*get_big_page_sizes)(void); 489 u32 (*get_big_page_sizes)(void);
489 u32 (*get_physical_addr_bits)(struct gk20a *g); 490 u32 (*get_physical_addr_bits)(struct gk20a *g);
490 int (*init_mm_setup_hw)(struct gk20a *g); 491 int (*init_mm_setup_hw)(struct gk20a *g);
@@ -493,7 +494,8 @@ struct gpu_ops {
493 void (*remove_bar2_vm)(struct gk20a *g); 494 void (*remove_bar2_vm)(struct gk20a *g);
494 const struct gk20a_mmu_level * 495 const struct gk20a_mmu_level *
495 (*get_mmu_levels)(struct gk20a *g, u32 big_page_size); 496 (*get_mmu_levels)(struct gk20a *g, u32 big_page_size);
496 void (*init_pdb)(struct gk20a *g, void *inst_ptr, u64 pdb_addr); 497 void (*init_pdb)(struct gk20a *g, struct mem_desc *mem,
498 u64 pdb_addr);
497 u64 (*get_iova_addr)(struct gk20a *g, struct scatterlist *sgl, 499 u64 (*get_iova_addr)(struct gk20a *g, struct scatterlist *sgl,
498 u32 flags); 500 u32 flags);
499 int (*bar1_bind)(struct gk20a *g, u64 bar1_iova); 501 int (*bar1_bind)(struct gk20a *g, u64 bar1_iova);
@@ -859,53 +861,6 @@ do { \
859#define gk20a_dbg_info(fmt, arg...) \ 861#define gk20a_dbg_info(fmt, arg...) \
860 gk20a_dbg(gpu_dbg_info, fmt, ##arg) 862 gk20a_dbg(gpu_dbg_info, fmt, ##arg)
861 863
862/* mem access with dbg_mem logging */
863static inline u8 gk20a_mem_rd08(void *ptr, int b)
864{
865 u8 _b = ((const u8 *)ptr)[b];
866#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
867 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u8)*b, _b);
868#endif
869 return _b;
870}
871static inline u16 gk20a_mem_rd16(void *ptr, int s)
872{
873 u16 _s = ((const u16 *)ptr)[s];
874#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
875 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u16)*s, _s);
876#endif
877 return _s;
878}
879static inline u32 gk20a_mem_rd32(void *ptr, int w)
880{
881 u32 _w = ((const u32 *)ptr)[w];
882#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
883 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + sizeof(u32)*w, _w);
884#endif
885 return _w;
886}
887static inline void gk20a_mem_wr08(void *ptr, int b, u8 data)
888{
889#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
890 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u8)*b, data);
891#endif
892 ((u8 *)ptr)[b] = data;
893}
894static inline void gk20a_mem_wr16(void *ptr, int s, u16 data)
895{
896#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
897 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u16)*s, data);
898#endif
899 ((u16 *)ptr)[s] = data;
900}
901static inline void gk20a_mem_wr32(void *ptr, int w, u32 data)
902{
903#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
904 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u32)*w, data);
905#endif
906 ((u32 *)ptr)[w] = data;
907}
908
909void gk20a_init_clk_ops(struct gpu_ops *gops); 864void gk20a_init_clk_ops(struct gpu_ops *gops);
910 865
911/* register accessors */ 866/* register accessors */
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 4e7c36ee..e7e6662a 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -97,22 +97,18 @@ int gr_gk20a_get_ctx_id(struct gk20a *g,
97 u32 *ctx_id) 97 u32 *ctx_id)
98{ 98{
99 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 99 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
100 void *ctx_ptr = NULL;
101 100
102 /* Channel gr_ctx buffer is gpu cacheable. 101 /* Channel gr_ctx buffer is gpu cacheable.
103 Flush and invalidate before cpu update. */ 102 Flush and invalidate before cpu update. */
104 g->ops.mm.l2_flush(g, true); 103 g->ops.mm.l2_flush(g, true);
105 104
106 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 105 if (gk20a_mem_begin(g, &ch_ctx->gr_ctx->mem))
107 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
108 0, pgprot_writecombine(PAGE_KERNEL));
109 if (!ctx_ptr)
110 return -ENOMEM; 106 return -ENOMEM;
111 107
112 *ctx_id = gk20a_mem_rd32(ctx_ptr + 108 *ctx_id = gk20a_mem_rd(g, &ch_ctx->gr_ctx->mem,
113 ctxsw_prog_main_image_context_id_o(), 0); 109 ctxsw_prog_main_image_context_id_o());
114 110
115 vunmap(ctx_ptr); 111 gk20a_mem_end(g, &ch_ctx->gr_ctx->mem);
116 112
117 return 0; 113 return 0;
118} 114}
@@ -619,22 +615,17 @@ static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
619{ 615{
620 u32 addr_lo; 616 u32 addr_lo;
621 u32 addr_hi; 617 u32 addr_hi;
622 void *inst_ptr = NULL;
623 618
624 gk20a_dbg_fn(""); 619 gk20a_dbg_fn("");
625 620
626 inst_ptr = c->inst_block.cpu_va;
627 if (!inst_ptr)
628 return -ENOMEM;
629
630 addr_lo = u64_lo32(gpu_va) >> 12; 621 addr_lo = u64_lo32(gpu_va) >> 12;
631 addr_hi = u64_hi32(gpu_va); 622 addr_hi = u64_hi32(gpu_va);
632 623
633 gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_target_w(), 624 gk20a_mem_wr32(c->g, &c->inst_block, ram_in_gr_wfi_target_w(),
634 ram_in_gr_cs_wfi_f() | ram_in_gr_wfi_mode_virtual_f() | 625 ram_in_gr_cs_wfi_f() | ram_in_gr_wfi_mode_virtual_f() |
635 ram_in_gr_wfi_ptr_lo_f(addr_lo)); 626 ram_in_gr_wfi_ptr_lo_f(addr_lo));
636 627
637 gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_ptr_hi_w(), 628 gk20a_mem_wr32(c->g, &c->inst_block, ram_in_gr_wfi_ptr_hi_w(),
638 ram_in_gr_wfi_ptr_hi_f(addr_hi)); 629 ram_in_gr_wfi_ptr_hi_f(addr_hi));
639 630
640 return 0; 631 return 0;
@@ -658,11 +649,7 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
658 return -EBUSY; 649 return -EBUSY;
659 } 650 }
660 651
661 ch_ctx->patch_ctx.mem.cpu_va = vmap(ch_ctx->patch_ctx.mem.pages, 652 if (gk20a_mem_begin(g, &ch_ctx->patch_ctx.mem))
662 PAGE_ALIGN(ch_ctx->patch_ctx.mem.size) >> PAGE_SHIFT,
663 0, pgprot_writecombine(PAGE_KERNEL));
664
665 if (!ch_ctx->patch_ctx.mem.cpu_va)
666 return -ENOMEM; 653 return -ENOMEM;
667 654
668 return 0; 655 return 0;
@@ -677,8 +664,7 @@ int gr_gk20a_ctx_patch_write_end(struct gk20a *g,
677 return -EINVAL; 664 return -EINVAL;
678 } 665 }
679 666
680 vunmap(ch_ctx->patch_ctx.mem.cpu_va); 667 gk20a_mem_end(g, &ch_ctx->patch_ctx.mem);
681 ch_ctx->patch_ctx.mem.cpu_va = NULL;
682 return 0; 668 return 0;
683} 669}
684 670
@@ -687,7 +673,6 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g,
687 u32 addr, u32 data, bool patch) 673 u32 addr, u32 data, bool patch)
688{ 674{
689 u32 patch_slot = 0; 675 u32 patch_slot = 0;
690 void *patch_ptr = NULL;
691 bool mapped_here = false; 676 bool mapped_here = false;
692 677
693 BUG_ON(patch != 0 && ch_ctx == NULL); 678 BUG_ON(patch != 0 && ch_ctx == NULL);
@@ -708,11 +693,10 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g,
708 } else 693 } else
709 mapped_here = false; 694 mapped_here = false;
710 695
711 patch_ptr = ch_ctx->patch_ctx.mem.cpu_va;
712 patch_slot = ch_ctx->patch_ctx.data_count * 2; 696 patch_slot = ch_ctx->patch_ctx.data_count * 2;
713 697
714 gk20a_mem_wr32(patch_ptr, patch_slot++, addr); 698 gk20a_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot++, addr);
715 gk20a_mem_wr32(patch_ptr, patch_slot++, data); 699 gk20a_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot++, data);
716 700
717 ch_ctx->patch_ctx.data_count++; 701 ch_ctx->patch_ctx.data_count++;
718 702
@@ -760,16 +744,13 @@ static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
760static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) 744static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
761{ 745{
762 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 746 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
747 struct mem_desc *mem = &ch_ctx->gr_ctx->mem;
763 u32 va_lo, va_hi, va; 748 u32 va_lo, va_hi, va;
764 int ret = 0; 749 int ret = 0;
765 void *ctx_ptr = NULL;
766 750
767 gk20a_dbg_fn(""); 751 gk20a_dbg_fn("");
768 752
769 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 753 if (gk20a_mem_begin(g, mem))
770 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
771 0, pgprot_writecombine(PAGE_KERNEL));
772 if (!ctx_ptr)
773 return -ENOMEM; 754 return -ENOMEM;
774 755
775 if (ch_ctx->zcull_ctx.gpu_va == 0 && 756 if (ch_ctx->zcull_ctx.gpu_va == 0 &&
@@ -792,15 +773,17 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
792 goto clean_up; 773 goto clean_up;
793 } 774 }
794 775
795 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0, 776 gk20a_mem_wr(g, mem,
777 ctxsw_prog_main_image_zcull_o(),
796 ch_ctx->zcull_ctx.ctx_sw_mode); 778 ch_ctx->zcull_ctx.ctx_sw_mode);
797 779
798 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_ptr_o(), 0, va); 780 gk20a_mem_wr(g, mem,
781 ctxsw_prog_main_image_zcull_ptr_o(), va);
799 782
800 c->g->ops.fifo.enable_channel(c); 783 c->g->ops.fifo.enable_channel(c);
801 784
802clean_up: 785clean_up:
803 vunmap(ctx_ptr); 786 gk20a_mem_end(g, mem);
804 787
805 return ret; 788 return ret;
806} 789}
@@ -1500,8 +1483,8 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1500 u32 ctx_header_words; 1483 u32 ctx_header_words;
1501 u32 i; 1484 u32 i;
1502 u32 data; 1485 u32 data;
1503 void *ctx_ptr = NULL; 1486 struct mem_desc *gold_mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem;
1504 void *gold_ptr = NULL; 1487 struct mem_desc *gr_mem = &ch_ctx->gr_ctx->mem;
1505 u32 err = 0; 1488 u32 err = 0;
1506 1489
1507 gk20a_dbg_fn(""); 1490 gk20a_dbg_fn("");
@@ -1527,16 +1510,10 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1527 if (err) 1510 if (err)
1528 goto clean_up; 1511 goto clean_up;
1529 1512
1530 gold_ptr = vmap(gr->global_ctx_buffer[GOLDEN_CTX].mem.pages, 1513 if (gk20a_mem_begin(g, gold_mem))
1531 PAGE_ALIGN(gr->global_ctx_buffer[GOLDEN_CTX].mem.size) >>
1532 PAGE_SHIFT, 0, pgprot_writecombine(PAGE_KERNEL));
1533 if (!gold_ptr)
1534 goto clean_up; 1514 goto clean_up;
1535 1515
1536 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 1516 if (gk20a_mem_begin(g, gr_mem))
1537 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
1538 0, pgprot_writecombine(PAGE_KERNEL));
1539 if (!ctx_ptr)
1540 goto clean_up; 1517 goto clean_up;
1541 1518
1542 ctx_header_words = roundup(ctx_header_bytes, sizeof(u32)); 1519 ctx_header_words = roundup(ctx_header_bytes, sizeof(u32));
@@ -1545,14 +1522,14 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1545 g->ops.mm.l2_flush(g, true); 1522 g->ops.mm.l2_flush(g, true);
1546 1523
1547 for (i = 0; i < ctx_header_words; i++) { 1524 for (i = 0; i < ctx_header_words; i++) {
1548 data = gk20a_mem_rd32(ctx_ptr, i); 1525 data = gk20a_mem_rd32(g, gr_mem, i);
1549 gk20a_mem_wr32(gold_ptr, i, data); 1526 gk20a_mem_wr32(g, gold_mem, i, data);
1550 } 1527 }
1551 1528
1552 gk20a_mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_o(), 0, 1529 gk20a_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_o(),
1553 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v()); 1530 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());
1554 1531
1555 gk20a_mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_ptr_o(), 0, 0); 1532 gk20a_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_ptr_o(), 0);
1556 1533
1557 gr_gk20a_commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); 1534 gr_gk20a_commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);
1558 1535
@@ -1568,12 +1545,12 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1568 goto clean_up; 1545 goto clean_up;
1569 } 1546 }
1570 1547
1571 for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++) 1548 gk20a_mem_rd_n(g, gold_mem, 0,
1572 gr->ctx_vars.local_golden_image[i] = 1549 gr->ctx_vars.local_golden_image,
1573 gk20a_mem_rd32(gold_ptr, i); 1550 gr->ctx_vars.golden_image_size);
1574 } 1551 }
1575 1552
1576 gr_gk20a_commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); 1553 gr_gk20a_commit_inst(c, gr_mem->gpu_va);
1577 1554
1578 gr->ctx_vars.golden_image_initialized = true; 1555 gr->ctx_vars.golden_image_initialized = true;
1579 1556
@@ -1586,10 +1563,8 @@ clean_up:
1586 else 1563 else
1587 gk20a_dbg_fn("done"); 1564 gk20a_dbg_fn("done");
1588 1565
1589 if (gold_ptr) 1566 gk20a_mem_end(g, gold_mem);
1590 vunmap(gold_ptr); 1567 gk20a_mem_end(g, gr_mem);
1591 if (ctx_ptr)
1592 vunmap(ctx_ptr);
1593 1568
1594 mutex_unlock(&gr->ctx_mutex); 1569 mutex_unlock(&gr->ctx_mutex);
1595 return err; 1570 return err;
@@ -1600,7 +1575,7 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1600 bool enable_smpc_ctxsw) 1575 bool enable_smpc_ctxsw)
1601{ 1576{
1602 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 1577 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1603 void *ctx_ptr = NULL; 1578 struct mem_desc *mem;
1604 u32 data; 1579 u32 data;
1605 int ret; 1580 int ret;
1606 1581
@@ -1611,46 +1586,39 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1611 return -EFAULT; 1586 return -EFAULT;
1612 } 1587 }
1613 1588
1589 mem = &ch_ctx->gr_ctx->mem;
1590
1614 c->g->ops.fifo.disable_channel(c); 1591 c->g->ops.fifo.disable_channel(c);
1615 ret = c->g->ops.fifo.preempt_channel(c->g, c->hw_chid); 1592 ret = c->g->ops.fifo.preempt_channel(c->g, c->hw_chid);
1616 if (ret) { 1593 if (ret) {
1617 c->g->ops.fifo.enable_channel(c); 1594 gk20a_err(dev_from_gk20a(g), "failed to preempt channel");
1618 gk20a_err(dev_from_gk20a(g), 1595 goto out;
1619 "failed to preempt channel\n");
1620 return ret;
1621 } 1596 }
1622 1597
1623 /* Channel gr_ctx buffer is gpu cacheable. 1598 /* Channel gr_ctx buffer is gpu cacheable.
1624 Flush and invalidate before cpu update. */ 1599 Flush and invalidate before cpu update. */
1625 g->ops.mm.l2_flush(g, true); 1600 g->ops.mm.l2_flush(g, true);
1626 1601
1627 if (!ch_ctx->gr_ctx) { 1602 if (gk20a_mem_begin(g, mem)) {
1628 gk20a_err(dev_from_gk20a(g), "no graphics context allocated"); 1603 ret = -ENOMEM;
1629 return -EFAULT; 1604 goto out;
1630 }
1631
1632 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages,
1633 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
1634 0, pgprot_writecombine(PAGE_KERNEL));
1635 if (!ctx_ptr) {
1636 c->g->ops.fifo.enable_channel(c);
1637 return -ENOMEM;
1638 } 1605 }
1639 1606
1640 data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); 1607 data = gk20a_mem_rd(g, mem,
1608 ctxsw_prog_main_image_pm_o());
1641 data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m(); 1609 data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m();
1642 data |= enable_smpc_ctxsw ? 1610 data |= enable_smpc_ctxsw ?
1643 ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() : 1611 ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() :
1644 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(); 1612 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f();
1645 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, 1613 gk20a_mem_wr(g, mem,
1646 data); 1614 ctxsw_prog_main_image_pm_o(),
1615 data);
1647 1616
1648 vunmap(ctx_ptr); 1617 gk20a_mem_end(g, mem);
1649 1618
1650 /* enable channel */ 1619out:
1651 c->g->ops.fifo.enable_channel(c); 1620 c->g->ops.fifo.enable_channel(c);
1652 1621 return ret;
1653 return 0;
1654} 1622}
1655 1623
1656int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, 1624int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
@@ -1659,8 +1627,7 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1659{ 1627{
1660 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 1628 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1661 struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; 1629 struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx;
1662 void *ctx_ptr = NULL; 1630 struct mem_desc *gr_mem;
1663 void *pm_ctx_ptr;
1664 u32 data, virt_addr; 1631 u32 data, virt_addr;
1665 int ret; 1632 int ret;
1666 1633
@@ -1671,6 +1638,8 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1671 return -EFAULT; 1638 return -EFAULT;
1672 } 1639 }
1673 1640
1641 gr_mem = &ch_ctx->gr_ctx->mem;
1642
1674 if (enable_hwpm_ctxsw) { 1643 if (enable_hwpm_ctxsw) {
1675 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) 1644 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f())
1676 return 0; 1645 return 0;
@@ -1721,29 +1690,22 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1721 } 1690 }
1722 1691
1723 /* Now clear the buffer */ 1692 /* Now clear the buffer */
1724 pm_ctx_ptr = vmap(pm_ctx->mem.pages, 1693 if (gk20a_mem_begin(g, &pm_ctx->mem)) {
1725 PAGE_ALIGN(pm_ctx->mem.size) >> PAGE_SHIFT,
1726 0, pgprot_writecombine(PAGE_KERNEL));
1727
1728 if (!pm_ctx_ptr) {
1729 ret = -ENOMEM; 1694 ret = -ENOMEM;
1730 goto cleanup_pm_buf; 1695 goto cleanup_pm_buf;
1731 } 1696 }
1732 1697
1733 memset(pm_ctx_ptr, 0, pm_ctx->mem.size); 1698 gk20a_memset(g, &pm_ctx->mem, 0, 0, pm_ctx->mem.size);
1734 1699
1735 vunmap(pm_ctx_ptr); 1700 gk20a_mem_end(g, &pm_ctx->mem);
1736 } 1701 }
1737 1702
1738 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 1703 if (gk20a_mem_begin(g, gr_mem)) {
1739 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
1740 0, pgprot_writecombine(PAGE_KERNEL));
1741 if (!ctx_ptr) {
1742 ret = -ENOMEM; 1704 ret = -ENOMEM;
1743 goto cleanup_pm_buf; 1705 goto cleanup_pm_buf;
1744 } 1706 }
1745 1707
1746 data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); 1708 data = gk20a_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o());
1747 data = data & ~ctxsw_prog_main_image_pm_mode_m(); 1709 data = data & ~ctxsw_prog_main_image_pm_mode_m();
1748 1710
1749 if (enable_hwpm_ctxsw) { 1711 if (enable_hwpm_ctxsw) {
@@ -1760,10 +1722,10 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1760 1722
1761 data |= pm_ctx->pm_mode; 1723 data |= pm_ctx->pm_mode;
1762 1724
1763 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, data); 1725 gk20a_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_o(), data);
1764 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, virt_addr); 1726 gk20a_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
1765 1727
1766 vunmap(ctx_ptr); 1728 gk20a_mem_end(g, gr_mem);
1767 1729
1768 /* enable channel */ 1730 /* enable channel */
1769 c->g->ops.fifo.enable_channel(c); 1731 c->g->ops.fifo.enable_channel(c);
@@ -1788,9 +1750,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1788 u32 virt_addr_lo; 1750 u32 virt_addr_lo;
1789 u32 virt_addr_hi; 1751 u32 virt_addr_hi;
1790 u32 virt_addr = 0; 1752 u32 virt_addr = 0;
1791 u32 i, v, data; 1753 u32 v, data;
1792 int ret = 0; 1754 int ret = 0;
1793 void *ctx_ptr = NULL; 1755 struct mem_desc *mem = &ch_ctx->gr_ctx->mem;
1794 1756
1795 gk20a_dbg_fn(""); 1757 gk20a_dbg_fn("");
1796 1758
@@ -1801,20 +1763,18 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1801 Flush and invalidate before cpu update. */ 1763 Flush and invalidate before cpu update. */
1802 g->ops.mm.l2_flush(g, true); 1764 g->ops.mm.l2_flush(g, true);
1803 1765
1804 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 1766 if (gk20a_mem_begin(g, mem))
1805 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
1806 0, pgprot_writecombine(PAGE_KERNEL));
1807 if (!ctx_ptr)
1808 return -ENOMEM; 1767 return -ENOMEM;
1809 1768
1810 for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++) 1769 gk20a_mem_wr_n(g, mem, 0,
1811 gk20a_mem_wr32(ctx_ptr, i, gr->ctx_vars.local_golden_image[i]); 1770 gr->ctx_vars.local_golden_image,
1771 gr->ctx_vars.golden_image_size);
1812 1772
1813 if (g->ops.gr.enable_cde_in_fecs && c->cde) 1773 if (g->ops.gr.enable_cde_in_fecs && c->cde)
1814 g->ops.gr.enable_cde_in_fecs(ctx_ptr); 1774 g->ops.gr.enable_cde_in_fecs(g, mem);
1815 1775
1816 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0); 1776 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_num_save_ops_o(), 0);
1817 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0); 1777 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_num_restore_ops_o(), 0);
1818 1778
1819 /* set priv access map */ 1779 /* set priv access map */
1820 virt_addr_lo = 1780 virt_addr_lo =
@@ -1827,29 +1787,29 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1827 else 1787 else
1828 data = ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f(); 1788 data = ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f();
1829 1789
1830 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_config_o(), 0, 1790 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(),
1831 data); 1791 data);
1832 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_addr_lo_o(), 0, 1792 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
1833 virt_addr_lo); 1793 virt_addr_lo);
1834 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_addr_hi_o(), 0, 1794 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
1835 virt_addr_hi); 1795 virt_addr_hi);
1836 /* disable verif features */ 1796 /* disable verif features */
1837 v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0); 1797 v = gk20a_mem_rd(g, mem, ctxsw_prog_main_image_misc_options_o());
1838 v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m()); 1798 v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m());
1839 v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f(); 1799 v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f();
1840 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0, v); 1800 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v);
1841 1801
1842 if (g->ops.gr.update_ctxsw_preemption_mode) 1802 if (g->ops.gr.update_ctxsw_preemption_mode)
1843 g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, ctx_ptr); 1803 g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, mem);
1844 1804
1845 virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); 1805 virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va);
1846 virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); 1806 virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va);
1847 1807
1848 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_count_o(), 0, 1808 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(),
1849 ch_ctx->patch_ctx.data_count); 1809 ch_ctx->patch_ctx.data_count);
1850 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_lo_o(), 0, 1810 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_lo_o(),
1851 virt_addr_lo); 1811 virt_addr_lo);
1852 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_hi_o(), 0, 1812 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_hi_o(),
1853 virt_addr_hi); 1813 virt_addr_hi);
1854 1814
1855 /* Update main header region of the context buffer with the info needed 1815 /* Update main header region of the context buffer with the info needed
@@ -1860,7 +1820,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1860 if (ch_ctx->pm_ctx.mem.gpu_va == 0) { 1820 if (ch_ctx->pm_ctx.mem.gpu_va == 0) {
1861 gk20a_err(dev_from_gk20a(g), 1821 gk20a_err(dev_from_gk20a(g),
1862 "context switched pm with no pm buffer!"); 1822 "context switched pm with no pm buffer!");
1863 vunmap(ctx_ptr); 1823 gk20a_mem_end(g, mem);
1864 return -EFAULT; 1824 return -EFAULT;
1865 } 1825 }
1866 1826
@@ -1871,14 +1831,14 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1871 } else 1831 } else
1872 virt_addr = 0; 1832 virt_addr = 0;
1873 1833
1874 data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); 1834 data = gk20a_mem_rd(g, mem, ctxsw_prog_main_image_pm_o());
1875 data = data & ~ctxsw_prog_main_image_pm_mode_m(); 1835 data = data & ~ctxsw_prog_main_image_pm_mode_m();
1876 data |= ch_ctx->pm_ctx.pm_mode; 1836 data |= ch_ctx->pm_ctx.pm_mode;
1877 1837
1878 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, data); 1838 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data);
1879 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, virt_addr); 1839 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
1880 1840
1881 vunmap(ctx_ptr); 1841 gk20a_mem_end(g, mem);
1882 1842
1883 if (tegra_platform_is_linsim()) { 1843 if (tegra_platform_is_linsim()) {
1884 u32 inst_base_ptr = 1844 u32 inst_base_ptr =
@@ -1978,16 +1938,20 @@ static void gr_gk20a_init_ctxsw_ucode_segments(
1978} 1938}
1979 1939
1980static int gr_gk20a_copy_ctxsw_ucode_segments( 1940static int gr_gk20a_copy_ctxsw_ucode_segments(
1981 u8 *buf, 1941 struct gk20a *g,
1942 struct mem_desc *dst,
1982 struct gk20a_ctxsw_ucode_segments *segments, 1943 struct gk20a_ctxsw_ucode_segments *segments,
1983 u32 *bootimage, 1944 u32 *bootimage,
1984 u32 *code, u32 *data) 1945 u32 *code, u32 *data)
1985{ 1946{
1986 int i; 1947 int i;
1987 1948
1988 memcpy(buf + segments->boot.offset, bootimage, segments->boot.size); 1949 gk20a_mem_wr_n(g, dst, segments->boot.offset, bootimage,
1989 memcpy(buf + segments->code.offset, code, segments->code.size); 1950 segments->boot.size);
1990 memcpy(buf + segments->data.offset, data, segments->data.size); 1951 gk20a_mem_wr_n(g, dst, segments->code.offset, code,
1952 segments->code.size);
1953 gk20a_mem_wr_n(g, dst, segments->data.offset, data,
1954 segments->data.size);
1991 1955
1992 /* compute a "checksum" for the boot binary to detect its version */ 1956 /* compute a "checksum" for the boot binary to detect its version */
1993 segments->boot_signature = 0; 1957 segments->boot_signature = 0;
@@ -2009,7 +1973,6 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
2009 u32 *fecs_boot_image; 1973 u32 *fecs_boot_image;
2010 u32 *gpccs_boot_image; 1974 u32 *gpccs_boot_image;
2011 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; 1975 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
2012 u8 *buf;
2013 u32 ucode_size; 1976 u32 ucode_size;
2014 int err = 0; 1977 int err = 0;
2015 1978
@@ -2049,14 +2012,8 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
2049 if (err) 2012 if (err)
2050 goto clean_up; 2013 goto clean_up;
2051 2014
2052 buf = (u8 *)ucode_info->surface_desc.cpu_va; 2015 gr_gk20a_copy_ctxsw_ucode_segments(g, &ucode_info->surface_desc,
2053 if (!buf) { 2016 &ucode_info->fecs,
2054 gk20a_err(d, "failed to map surface desc buffer");
2055 err = -ENOMEM;
2056 goto clean_up;
2057 }
2058
2059 gr_gk20a_copy_ctxsw_ucode_segments(buf, &ucode_info->fecs,
2060 fecs_boot_image, 2017 fecs_boot_image,
2061 g->gr.ctx_vars.ucode.fecs.inst.l, 2018 g->gr.ctx_vars.ucode.fecs.inst.l,
2062 g->gr.ctx_vars.ucode.fecs.data.l); 2019 g->gr.ctx_vars.ucode.fecs.data.l);
@@ -2064,7 +2021,8 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
2064 release_firmware(fecs_fw); 2021 release_firmware(fecs_fw);
2065 fecs_fw = NULL; 2022 fecs_fw = NULL;
2066 2023
2067 gr_gk20a_copy_ctxsw_ucode_segments(buf, &ucode_info->gpccs, 2024 gr_gk20a_copy_ctxsw_ucode_segments(g, &ucode_info->surface_desc,
2025 &ucode_info->gpccs,
2068 gpccs_boot_image, 2026 gpccs_boot_image,
2069 g->gr.ctx_vars.ucode.gpccs.inst.l, 2027 g->gr.ctx_vars.ucode.gpccs.inst.l,
2070 g->gr.ctx_vars.ucode.gpccs.data.l); 2028 g->gr.ctx_vars.ucode.gpccs.data.l);
@@ -4690,41 +4648,38 @@ out:
4690static int gr_gk20a_init_access_map(struct gk20a *g) 4648static int gr_gk20a_init_access_map(struct gk20a *g)
4691{ 4649{
4692 struct gr_gk20a *gr = &g->gr; 4650 struct gr_gk20a *gr = &g->gr;
4693 void *data; 4651 struct mem_desc *mem = &gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem;
4694 int err = 0;
4695 u32 w, nr_pages = 4652 u32 w, nr_pages =
4696 DIV_ROUND_UP(gr->ctx_vars.priv_access_map_size, 4653 DIV_ROUND_UP(gr->ctx_vars.priv_access_map_size,
4697 PAGE_SIZE); 4654 PAGE_SIZE);
4698 u32 *whitelist = NULL; 4655 u32 *whitelist = NULL;
4699 int num_entries = 0; 4656 int num_entries = 0;
4700 4657
4701 data = vmap(gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.pages, 4658 if (gk20a_mem_begin(g, mem)) {
4702 PAGE_ALIGN(gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.size) >>
4703 PAGE_SHIFT, 0, pgprot_writecombine(PAGE_KERNEL));
4704 if (!data) {
4705 gk20a_err(dev_from_gk20a(g), 4659 gk20a_err(dev_from_gk20a(g),
4706 "failed to map priv access map memory"); 4660 "failed to map priv access map memory");
4707 err = -ENOMEM; 4661 return -ENOMEM;
4708 goto clean_up;
4709 } 4662 }
4710 4663
4711 memset(data, 0x0, PAGE_SIZE * nr_pages); 4664 gk20a_memset(g, mem, 0, 0, PAGE_SIZE * nr_pages);
4712 4665
4713 g->ops.gr.get_access_map(g, &whitelist, &num_entries); 4666 g->ops.gr.get_access_map(g, &whitelist, &num_entries);
4714 4667
4715 for (w = 0; w < num_entries; w++) { 4668 for (w = 0; w < num_entries; w++) {
4716 u32 map_bit, map_byte, map_shift; 4669 u32 map_bit, map_byte, map_shift, x;
4717 map_bit = whitelist[w] >> 2; 4670 map_bit = whitelist[w] >> 2;
4718 map_byte = map_bit >> 3; 4671 map_byte = map_bit >> 3;
4719 map_shift = map_bit & 0x7; /* i.e. 0-7 */ 4672 map_shift = map_bit & 0x7; /* i.e. 0-7 */
4720 gk20a_dbg_info("access map addr:0x%x byte:0x%x bit:%d", 4673 gk20a_dbg_info("access map addr:0x%x byte:0x%x bit:%d",
4721 whitelist[w], map_byte, map_shift); 4674 whitelist[w], map_byte, map_shift);
4722 ((u8 *)data)[map_byte] |= 1 << map_shift; 4675 x = gk20a_mem_rd32(g, mem, map_byte / sizeof(u32));
4676 x |= 1 << (
4677 (map_byte % sizeof(u32) * BITS_PER_BYTE)
4678 + map_shift);
4679 gk20a_mem_wr32(g, mem, map_byte / sizeof(u32), x);
4723 } 4680 }
4724 4681
4725clean_up: 4682 gk20a_mem_end(g, mem);
4726 if (data)
4727 vunmap(data);
4728 return 0; 4683 return 0;
4729} 4684}
4730 4685
@@ -6659,7 +6614,7 @@ static void gr_gk20a_init_sm_dsm_reg_info(void)
6659static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, 6614static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6660 struct channel_ctx_gk20a *ch_ctx, 6615 struct channel_ctx_gk20a *ch_ctx,
6661 u32 addr, u32 data, 6616 u32 addr, u32 data,
6662 u8 *context) 6617 struct mem_desc *mem)
6663{ 6618{
6664 u32 num_gpc = g->gr.gpc_count; 6619 u32 num_gpc = g->gr.gpc_count;
6665 u32 num_tpc; 6620 u32 num_tpc;
@@ -6688,8 +6643,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6688 /* reset the patch count from previous 6643 /* reset the patch count from previous
6689 runs,if ucode has already processed 6644 runs,if ucode has already processed
6690 it */ 6645 it */
6691 tmp = gk20a_mem_rd32(context + 6646 tmp = gk20a_mem_rd(g, mem,
6692 ctxsw_prog_main_image_patch_count_o(), 0); 6647 ctxsw_prog_main_image_patch_count_o());
6693 6648
6694 if (!tmp) 6649 if (!tmp)
6695 ch_ctx->patch_ctx.data_count = 0; 6650 ch_ctx->patch_ctx.data_count = 0;
@@ -6700,15 +6655,15 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6700 vaddr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); 6655 vaddr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va);
6701 vaddr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); 6656 vaddr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va);
6702 6657
6703 gk20a_mem_wr32(context + 6658 gk20a_mem_wr(g, mem,
6704 ctxsw_prog_main_image_patch_count_o(), 6659 ctxsw_prog_main_image_patch_count_o(),
6705 0, ch_ctx->patch_ctx.data_count); 6660 ch_ctx->patch_ctx.data_count);
6706 gk20a_mem_wr32(context + 6661 gk20a_mem_wr(g, mem,
6707 ctxsw_prog_main_image_patch_adr_lo_o(), 6662 ctxsw_prog_main_image_patch_adr_lo_o(),
6708 0, vaddr_lo); 6663 vaddr_lo);
6709 gk20a_mem_wr32(context + 6664 gk20a_mem_wr(g, mem,
6710 ctxsw_prog_main_image_patch_adr_hi_o(), 6665 ctxsw_prog_main_image_patch_adr_hi_o(),
6711 0, vaddr_hi); 6666 vaddr_hi);
6712 6667
6713 /* we're not caching these on cpu side, 6668 /* we're not caching these on cpu side,
6714 but later watch for it */ 6669 but later watch for it */
@@ -6760,17 +6715,15 @@ static void gr_gk20a_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset)
6760 6715
6761#define ILLEGAL_ID (~0) 6716#define ILLEGAL_ID (~0)
6762 6717
6763static inline bool check_main_image_header_magic(void *context) 6718static inline bool check_main_image_header_magic(u8 *context)
6764{ 6719{
6765 u32 magic = gk20a_mem_rd32(context + 6720 u32 magic = *(u32 *)(context + ctxsw_prog_main_image_magic_value_o());
6766 ctxsw_prog_main_image_magic_value_o(), 0);
6767 gk20a_dbg(gpu_dbg_gpu_dbg, "main image magic=0x%x", magic); 6721 gk20a_dbg(gpu_dbg_gpu_dbg, "main image magic=0x%x", magic);
6768 return magic == ctxsw_prog_main_image_magic_value_v_value_v(); 6722 return magic == ctxsw_prog_main_image_magic_value_v_value_v();
6769} 6723}
6770static inline bool check_local_header_magic(void *context) 6724static inline bool check_local_header_magic(u8 *context)
6771{ 6725{
6772 u32 magic = gk20a_mem_rd32(context + 6726 u32 magic = *(u32 *)(context + ctxsw_prog_local_magic_value_o());
6773 ctxsw_prog_local_magic_value_o(), 0);
6774 gk20a_dbg(gpu_dbg_gpu_dbg, "local magic=0x%x", magic); 6727 gk20a_dbg(gpu_dbg_gpu_dbg, "local magic=0x%x", magic);
6775 return magic == ctxsw_prog_local_magic_value_v_value_v(); 6728 return magic == ctxsw_prog_local_magic_value_v_value_v();
6776 6729
@@ -6814,7 +6767,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
6814 u32 num_gpcs, num_tpcs; 6767 u32 num_gpcs, num_tpcs;
6815 u32 chk_addr; 6768 u32 chk_addr;
6816 u32 ext_priv_offset, ext_priv_size; 6769 u32 ext_priv_offset, ext_priv_size;
6817 void *context; 6770 u8 *context;
6818 u32 offset_to_segment, offset_to_segment_end; 6771 u32 offset_to_segment, offset_to_segment_end;
6819 u32 sm_dsm_perf_reg_id = ILLEGAL_ID; 6772 u32 sm_dsm_perf_reg_id = ILLEGAL_ID;
6820 u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID; 6773 u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID;
@@ -6856,14 +6809,14 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
6856 /* note below is in words/num_registers */ 6809 /* note below is in words/num_registers */
6857 marker_size = ctxsw_prog_extended_marker_size_in_bytes_v() >> 2; 6810 marker_size = ctxsw_prog_extended_marker_size_in_bytes_v() >> 2;
6858 6811
6859 context = context_buffer; 6812 context = (u8 *)context_buffer;
6860 /* sanity check main header */ 6813 /* sanity check main header */
6861 if (!check_main_image_header_magic(context)) { 6814 if (!check_main_image_header_magic(context)) {
6862 gk20a_err(dev_from_gk20a(g), 6815 gk20a_err(dev_from_gk20a(g),
6863 "Invalid main header: magic value"); 6816 "Invalid main header: magic value");
6864 return -EINVAL; 6817 return -EINVAL;
6865 } 6818 }
6866 num_gpcs = gk20a_mem_rd32(context + ctxsw_prog_main_image_num_gpcs_o(), 0); 6819 num_gpcs = *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o());
6867 if (gpc_num >= num_gpcs) { 6820 if (gpc_num >= num_gpcs) {
6868 gk20a_err(dev_from_gk20a(g), 6821 gk20a_err(dev_from_gk20a(g),
6869 "GPC 0x%08x is greater than total count 0x%08x!\n", 6822 "GPC 0x%08x is greater than total count 0x%08x!\n",
@@ -6871,7 +6824,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
6871 return -EINVAL; 6824 return -EINVAL;
6872 } 6825 }
6873 6826
6874 data32 = gk20a_mem_rd32(context + ctxsw_prog_main_extended_buffer_ctl_o(), 0); 6827 data32 = *(u32 *)(context + ctxsw_prog_main_extended_buffer_ctl_o());
6875 ext_priv_size = ctxsw_prog_main_extended_buffer_ctl_size_v(data32); 6828 ext_priv_size = ctxsw_prog_main_extended_buffer_ctl_size_v(data32);
6876 if (0 == ext_priv_size) { 6829 if (0 == ext_priv_size) {
6877 gk20a_dbg_info(" No extended memory in context buffer"); 6830 gk20a_dbg_info(" No extended memory in context buffer");
@@ -7149,7 +7102,7 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g,
7149} 7102}
7150 7103
7151static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, 7104static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
7152 void *context, 7105 u8 *context,
7153 u32 *num_ppcs, u32 *ppc_mask, 7106 u32 *num_ppcs, u32 *ppc_mask,
7154 u32 *reg_ppc_count) 7107 u32 *reg_ppc_count)
7155{ 7108{
@@ -7165,7 +7118,7 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
7165 (num_pes_per_gpc > 1))) 7118 (num_pes_per_gpc > 1)))
7166 return -EINVAL; 7119 return -EINVAL;
7167 7120
7168 data32 = gk20a_mem_rd32(context + ctxsw_prog_local_image_ppc_info_o(), 0); 7121 data32 = *(u32 *)(context + ctxsw_prog_local_image_ppc_info_o());
7169 7122
7170 *num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data32); 7123 *num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data32);
7171 *ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data32); 7124 *ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data32);
@@ -7177,7 +7130,7 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
7177 7130
7178/* 7131/*
7179 * This function will return the 32 bit offset for a priv register if it is 7132 * This function will return the 32 bit offset for a priv register if it is
7180 * present in the context buffer. 7133 * present in the context buffer. The context buffer is in CPU memory.
7181 */ 7134 */
7182static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, 7135static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7183 u32 addr, 7136 u32 addr,
@@ -7196,7 +7149,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7196 u32 offset; 7149 u32 offset;
7197 u32 sys_priv_offset, gpc_priv_offset; 7150 u32 sys_priv_offset, gpc_priv_offset;
7198 u32 ppc_mask, reg_list_ppc_count; 7151 u32 ppc_mask, reg_list_ppc_count;
7199 void *context; 7152 u8 *context;
7200 u32 offset_to_segment; 7153 u32 offset_to_segment;
7201 7154
7202 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); 7155 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
@@ -7207,13 +7160,13 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7207 if (err) 7160 if (err)
7208 return err; 7161 return err;
7209 7162
7210 context = context_buffer; 7163 context = (u8 *)context_buffer;
7211 if (!check_main_image_header_magic(context)) { 7164 if (!check_main_image_header_magic(context)) {
7212 gk20a_err(dev_from_gk20a(g), 7165 gk20a_err(dev_from_gk20a(g),
7213 "Invalid main header: magic value"); 7166 "Invalid main header: magic value");
7214 return -EINVAL; 7167 return -EINVAL;
7215 } 7168 }
7216 num_gpcs = gk20a_mem_rd32(context + ctxsw_prog_main_image_num_gpcs_o(), 0); 7169 num_gpcs = *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o());
7217 7170
7218 /* Parse the FECS local header. */ 7171 /* Parse the FECS local header. */
7219 context += ctxsw_prog_ucode_header_size_in_bytes(); 7172 context += ctxsw_prog_ucode_header_size_in_bytes();
@@ -7222,7 +7175,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7222 "Invalid FECS local header: magic value\n"); 7175 "Invalid FECS local header: magic value\n");
7223 return -EINVAL; 7176 return -EINVAL;
7224 } 7177 }
7225 data32 = gk20a_mem_rd32(context + ctxsw_prog_local_priv_register_ctl_o(), 0); 7178 data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o());
7226 sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); 7179 sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
7227 7180
7228 /* If found in Ext buffer, ok. 7181 /* If found in Ext buffer, ok.
@@ -7268,7 +7221,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7268 return -EINVAL; 7221 return -EINVAL;
7269 7222
7270 } 7223 }
7271 data32 = gk20a_mem_rd32(context + ctxsw_prog_local_priv_register_ctl_o(), 0); 7224 data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o());
7272 gpc_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); 7225 gpc_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
7273 7226
7274 err = gr_gk20a_determine_ppc_configuration(g, context, 7227 err = gr_gk20a_determine_ppc_configuration(g, context,
@@ -7277,7 +7230,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7277 if (err) 7230 if (err)
7278 return err; 7231 return err;
7279 7232
7280 num_tpcs = gk20a_mem_rd32(context + ctxsw_prog_local_image_num_tpcs_o(), 0); 7233 num_tpcs = *(u32 *)(context + ctxsw_prog_local_image_num_tpcs_o());
7281 7234
7282 if ((i == gpc_num) && ((tpc_num + 1) > num_tpcs)) { 7235 if ((i == gpc_num) && ((tpc_num + 1) > num_tpcs)) {
7283 gk20a_err(dev_from_gk20a(g), 7236 gk20a_err(dev_from_gk20a(g),
@@ -7689,9 +7642,9 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7689{ 7642{
7690 struct gk20a *g = ch->g; 7643 struct gk20a *g = ch->g;
7691 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; 7644 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
7692 void *ctx_ptr = NULL; 7645 bool gr_ctx_ready = false;
7693 void *pm_ctx_ptr = NULL; 7646 bool pm_ctx_ready = false;
7694 void *base_ptr = NULL; 7647 struct mem_desc *current_mem = NULL;
7695 bool ch_is_curr_ctx, restart_gr_ctxsw = false; 7648 bool ch_is_curr_ctx, restart_gr_ctxsw = false;
7696 u32 i, j, offset, v; 7649 u32 i, j, offset, v;
7697 struct gr_gk20a *gr = &g->gr; 7650 struct gr_gk20a *gr = &g->gr;
@@ -7821,20 +7774,18 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7821 ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD), 7774 ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD),
7822 ctx_ops[i].quad); 7775 ctx_ops[i].quad);
7823 if (!err) { 7776 if (!err) {
7824 if (!ctx_ptr) { 7777 if (!gr_ctx_ready) {
7825 /* would have been a variant of 7778 /* would have been a variant of
7826 * gr_gk20a_apply_instmem_overrides, 7779 * gr_gk20a_apply_instmem_overrides,
7827 * recoded in-place instead. 7780 * recoded in-place instead.
7828 */ 7781 */
7829 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 7782 if (gk20a_mem_begin(g, &ch_ctx->gr_ctx->mem)) {
7830 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
7831 0, pgprot_writecombine(PAGE_KERNEL));
7832 if (!ctx_ptr) {
7833 err = -ENOMEM; 7783 err = -ENOMEM;
7834 goto cleanup; 7784 goto cleanup;
7835 } 7785 }
7786 gr_ctx_ready = true;
7836 } 7787 }
7837 base_ptr = ctx_ptr; 7788 current_mem = &ch_ctx->gr_ctx->mem;
7838 } else { 7789 } else {
7839 err = gr_gk20a_get_pm_ctx_buffer_offsets(g, 7790 err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
7840 ctx_ops[i].offset, 7791 ctx_ops[i].offset,
@@ -7849,7 +7800,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7849 NVGPU_DBG_GPU_REG_OP_STATUS_INVALID_OFFSET; 7800 NVGPU_DBG_GPU_REG_OP_STATUS_INVALID_OFFSET;
7850 continue; 7801 continue;
7851 } 7802 }
7852 if (!pm_ctx_ptr) { 7803 if (!pm_ctx_ready) {
7853 /* Make sure ctx buffer was initialized */ 7804 /* Make sure ctx buffer was initialized */
7854 if (!ch_ctx->pm_ctx.mem.pages) { 7805 if (!ch_ctx->pm_ctx.mem.pages) {
7855 gk20a_err(dev_from_gk20a(g), 7806 gk20a_err(dev_from_gk20a(g),
@@ -7857,15 +7808,13 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7857 err = -EINVAL; 7808 err = -EINVAL;
7858 goto cleanup; 7809 goto cleanup;
7859 } 7810 }
7860 pm_ctx_ptr = vmap(ch_ctx->pm_ctx.mem.pages, 7811 if (gk20a_mem_begin(g, &ch_ctx->pm_ctx.mem)) {
7861 PAGE_ALIGN(ch_ctx->pm_ctx.mem.size) >> PAGE_SHIFT,
7862 0, pgprot_writecombine(PAGE_KERNEL));
7863 if (!pm_ctx_ptr) {
7864 err = -ENOMEM; 7812 err = -ENOMEM;
7865 goto cleanup; 7813 goto cleanup;
7866 } 7814 }
7815 pm_ctx_ready = true;
7867 } 7816 }
7868 base_ptr = pm_ctx_ptr; 7817 current_mem = &ch_ctx->pm_ctx.mem;
7869 } 7818 }
7870 7819
7871 /* if this is a quad access, setup for special access*/ 7820 /* if this is a quad access, setup for special access*/
@@ -7878,24 +7827,24 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7878 /* sanity check gr ctxt offsets, 7827 /* sanity check gr ctxt offsets,
7879 * don't write outside, worst case 7828 * don't write outside, worst case
7880 */ 7829 */
7881 if ((base_ptr == ctx_ptr) && 7830 if ((current_mem == &ch_ctx->gr_ctx->mem) &&
7882 (offsets[j] >= g->gr.ctx_vars.golden_image_size)) 7831 (offsets[j] >= g->gr.ctx_vars.golden_image_size))
7883 continue; 7832 continue;
7884 if (pass == 0) { /* write pass */ 7833 if (pass == 0) { /* write pass */
7885 v = gk20a_mem_rd32(base_ptr + offsets[j], 0); 7834 v = gk20a_mem_rd(g, current_mem, offsets[j]);
7886 v &= ~ctx_ops[i].and_n_mask_lo; 7835 v &= ~ctx_ops[i].and_n_mask_lo;
7887 v |= ctx_ops[i].value_lo; 7836 v |= ctx_ops[i].value_lo;
7888 gk20a_mem_wr32(base_ptr + offsets[j], 0, v); 7837 gk20a_mem_wr(g, current_mem, offsets[j], v);
7889 7838
7890 gk20a_dbg(gpu_dbg_gpu_dbg, 7839 gk20a_dbg(gpu_dbg_gpu_dbg,
7891 "context wr: offset=0x%x v=0x%x", 7840 "context wr: offset=0x%x v=0x%x",
7892 offsets[j], v); 7841 offsets[j], v);
7893 7842
7894 if (ctx_ops[i].op == REGOP(WRITE_64)) { 7843 if (ctx_ops[i].op == REGOP(WRITE_64)) {
7895 v = gk20a_mem_rd32(base_ptr + offsets[j] + 4, 0); 7844 v = gk20a_mem_rd(g, current_mem, offsets[j] + 4);
7896 v &= ~ctx_ops[i].and_n_mask_hi; 7845 v &= ~ctx_ops[i].and_n_mask_hi;
7897 v |= ctx_ops[i].value_hi; 7846 v |= ctx_ops[i].value_hi;
7898 gk20a_mem_wr32(base_ptr + offsets[j] + 4, 0, v); 7847 gk20a_mem_wr(g, current_mem, offsets[j] + 4, v);
7899 7848
7900 gk20a_dbg(gpu_dbg_gpu_dbg, 7849 gk20a_dbg(gpu_dbg_gpu_dbg,
7901 "context wr: offset=0x%x v=0x%x", 7850 "context wr: offset=0x%x v=0x%x",
@@ -7905,18 +7854,18 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7905 /* check to see if we need to add a special WAR 7854 /* check to see if we need to add a special WAR
7906 for some of the SMPC perf regs */ 7855 for some of the SMPC perf regs */
7907 gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j], 7856 gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j],
7908 v, base_ptr); 7857 v, current_mem);
7909 7858
7910 } else { /* read pass */ 7859 } else { /* read pass */
7911 ctx_ops[i].value_lo = 7860 ctx_ops[i].value_lo =
7912 gk20a_mem_rd32(base_ptr + offsets[0], 0); 7861 gk20a_mem_rd(g, current_mem, offsets[0]);
7913 7862
7914 gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x", 7863 gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x",
7915 offsets[0], ctx_ops[i].value_lo); 7864 offsets[0], ctx_ops[i].value_lo);
7916 7865
7917 if (ctx_ops[i].op == REGOP(READ_64)) { 7866 if (ctx_ops[i].op == REGOP(READ_64)) {
7918 ctx_ops[i].value_hi = 7867 ctx_ops[i].value_hi =
7919 gk20a_mem_rd32(base_ptr + offsets[0] + 4, 0); 7868 gk20a_mem_rd(g, current_mem, offsets[0] + 4);
7920 7869
7921 gk20a_dbg(gpu_dbg_gpu_dbg, 7870 gk20a_dbg(gpu_dbg_gpu_dbg,
7922 "context rd: offset=0x%x v=0x%x", 7871 "context rd: offset=0x%x v=0x%x",
@@ -7943,12 +7892,10 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7943 7892
7944 if (ch_ctx->patch_ctx.mem.cpu_va) 7893 if (ch_ctx->patch_ctx.mem.cpu_va)
7945 gr_gk20a_ctx_patch_write_end(g, ch_ctx); 7894 gr_gk20a_ctx_patch_write_end(g, ch_ctx);
7946 7895 if (gr_ctx_ready)
7947 if (ctx_ptr) 7896 gk20a_mem_end(g, &ch_ctx->gr_ctx->mem);
7948 vunmap(ctx_ptr); 7897 if (pm_ctx_ready)
7949 7898 gk20a_mem_end(g, &ch_ctx->pm_ctx.mem);
7950 if (pm_ctx_ptr)
7951 vunmap(pm_ctx_ptr);
7952 7899
7953 if (restart_gr_ctxsw) { 7900 if (restart_gr_ctxsw) {
7954 int tmp_err = gr_gk20a_enable_ctxsw(g); 7901 int tmp_err = gr_gk20a_enable_ctxsw(g);
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 6f6734b4..13382416 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -44,6 +44,112 @@
44#include "kind_gk20a.h" 44#include "kind_gk20a.h"
45#include "semaphore_gk20a.h" 45#include "semaphore_gk20a.h"
46 46
47int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem)
48{
49 void *cpu_va;
50
51 if (WARN_ON(mem->cpu_va)) {
52 gk20a_warn(dev_from_gk20a(g), "nested %s", __func__);
53 return -EBUSY;
54 }
55
56 cpu_va = vmap(mem->pages,
57 PAGE_ALIGN(mem->size) >> PAGE_SHIFT,
58 0, pgprot_writecombine(PAGE_KERNEL));
59
60 if (WARN_ON(!cpu_va))
61 return -ENOMEM;
62
63 mem->cpu_va = cpu_va;
64 return 0;
65}
66
67void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem)
68{
69 vunmap(mem->cpu_va);
70 mem->cpu_va = NULL;
71}
72
73u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w)
74{
75 u32 *ptr = mem->cpu_va;
76 u32 data;
77
78 WARN_ON(!ptr);
79 data = ptr[w];
80#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
81 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
82#endif
83 return data;
84}
85
86u32 gk20a_mem_rd(struct gk20a *g, struct mem_desc *mem, u32 offset)
87{
88 WARN_ON(offset & 3);
89 return gk20a_mem_rd32(g, mem, offset / sizeof(u32));
90}
91
92void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem,
93 u32 offset, void *dest, u32 size)
94{
95 u32 i;
96 u32 *dest_u32 = dest;
97
98 WARN_ON(offset & 3);
99 WARN_ON(size & 3);
100 offset /= sizeof(u32);
101 size /= sizeof(u32);
102
103 for (i = 0; i < size; i++)
104 dest_u32[i] = gk20a_mem_rd32(g, mem, offset + i);
105}
106
107void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data)
108{
109 u32 *ptr = mem->cpu_va;
110
111 WARN_ON(!ptr);
112#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
113 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
114#endif
115 ptr[w] = data;
116}
117
118void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data)
119{
120 WARN_ON(offset & 3);
121 gk20a_mem_wr32(g, mem, offset / sizeof(u32), data);
122}
123
124void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset,
125 void *src, u32 size)
126{
127 u32 i;
128 u32 *src_u32 = src;
129
130 WARN_ON(offset & 3);
131 WARN_ON(size & 3);
132 offset /= sizeof(u32);
133 size /= sizeof(u32);
134
135 for (i = 0; i < size; i++)
136 gk20a_mem_wr32(g, mem, offset + i, src_u32[i]);
137}
138
139void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset,
140 u32 value, u32 size)
141{
142 u32 i;
143
144 WARN_ON(offset & 3);
145 WARN_ON(size & 3);
146 offset /= sizeof(u32);
147 size /= sizeof(u32);
148
149 for (i = 0; i < size; i++)
150 gk20a_mem_wr32(g, mem, offset + i, value);
151}
152
47/* 153/*
48 * GPU mapping life cycle 154 * GPU mapping life cycle
49 * ====================== 155 * ======================
@@ -780,9 +886,14 @@ void pde_range_from_vaddr_range(struct vm_gk20a *vm,
780 *pde_lo, *pde_hi); 886 *pde_lo, *pde_hi);
781} 887}
782 888
783u32 *pde_from_index(struct vm_gk20a *vm, u32 i) 889static u32 pde_from_index(u32 i)
890{
891 return i * gmmu_pde__size_v() / sizeof(u32);
892}
893
894static u32 pte_from_index(u32 i)
784{ 895{
785 return (u32 *) (((u8 *)vm->pdb.mem.cpu_va) + i*gmmu_pde__size_v()); 896 return i * gmmu_pte__size_v() / sizeof(u32);
786} 897}
787 898
788u32 pte_index_from_vaddr(struct vm_gk20a *vm, 899u32 pte_index_from_vaddr(struct vm_gk20a *vm,
@@ -2323,7 +2434,7 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm,
2323 u64 pte_addr_small = 0, pte_addr_big = 0; 2434 u64 pte_addr_small = 0, pte_addr_big = 0;
2324 struct gk20a_mm_entry *entry = vm->pdb.entries + i; 2435 struct gk20a_mm_entry *entry = vm->pdb.entries + i;
2325 u32 pde_v[2] = {0, 0}; 2436 u32 pde_v[2] = {0, 0};
2326 u32 *pde; 2437 u32 pde;
2327 2438
2328 gk20a_dbg_fn(""); 2439 gk20a_dbg_fn("");
2329 2440
@@ -2348,10 +2459,10 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm,
2348 (big_valid ? (gmmu_pde_vol_big_true_f()) : 2459 (big_valid ? (gmmu_pde_vol_big_true_f()) :
2349 gmmu_pde_vol_big_false_f()); 2460 gmmu_pde_vol_big_false_f());
2350 2461
2351 pde = pde_from_index(vm, i); 2462 pde = pde_from_index(i);
2352 2463
2353 gk20a_mem_wr32(pde, 0, pde_v[0]); 2464 gk20a_mem_wr32(g, &vm->pdb.mem, pde + 0, pde_v[0]);
2354 gk20a_mem_wr32(pde, 1, pde_v[1]); 2465 gk20a_mem_wr32(g, &vm->pdb.mem, pde + 1, pde_v[1]);
2355 2466
2356 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", 2467 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x",
2357 i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); 2468 i, gmmu_pgsz_idx, pde_v[1], pde_v[0]);
@@ -2432,8 +2543,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
2432 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); 2543 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i);
2433 } 2544 }
2434 2545
2435 gk20a_mem_wr32(pte->mem.cpu_va + i*8, 0, pte_w[0]); 2546 gk20a_mem_wr32(g, &pte->mem, pte_from_index(i) + 0, pte_w[0]);
2436 gk20a_mem_wr32(pte->mem.cpu_va + i*8, 1, pte_w[1]); 2547 gk20a_mem_wr32(g, &pte->mem, pte_from_index(i) + 1, pte_w[1]);
2437 2548
2438 if (*iova) { 2549 if (*iova) {
2439 *iova += page_size; 2550 *iova += page_size;
@@ -3489,19 +3600,19 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm)
3489 false, false, "cde"); 3600 false, false, "cde");
3490} 3601}
3491 3602
3492void gk20a_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr) 3603void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, u64 pdb_addr)
3493{ 3604{
3494 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); 3605 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
3495 u32 pdb_addr_hi = u64_hi32(pdb_addr); 3606 u32 pdb_addr_hi = u64_hi32(pdb_addr);
3496 3607
3497 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), 3608 gk20a_mem_wr32(g, mem, ram_in_page_dir_base_lo_w(),
3498 (g->mm.vidmem_is_vidmem ? 3609 (g->mm.vidmem_is_vidmem ?
3499 ram_in_page_dir_base_target_sys_mem_ncoh_f() : 3610 ram_in_page_dir_base_target_sys_mem_ncoh_f() :
3500 ram_in_page_dir_base_target_vid_mem_f()) | 3611 ram_in_page_dir_base_target_vid_mem_f()) |
3501 ram_in_page_dir_base_vol_true_f() | 3612 ram_in_page_dir_base_vol_true_f() |
3502 ram_in_page_dir_base_lo_f(pdb_addr_lo)); 3613 ram_in_page_dir_base_lo_f(pdb_addr_lo));
3503 3614
3504 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), 3615 gk20a_mem_wr32(g, mem, ram_in_page_dir_base_hi_w(),
3505 ram_in_page_dir_base_hi_f(pdb_addr_hi)); 3616 ram_in_page_dir_base_hi_f(pdb_addr_hi));
3506} 3617}
3507 3618
@@ -3510,23 +3621,22 @@ void gk20a_init_inst_block(struct mem_desc *inst_block, struct vm_gk20a *vm,
3510{ 3621{
3511 struct gk20a *g = gk20a_from_vm(vm); 3622 struct gk20a *g = gk20a_from_vm(vm);
3512 u64 pde_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0); 3623 u64 pde_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0);
3513 void *inst_ptr = inst_block->cpu_va;
3514 3624
3515 gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", 3625 gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p",
3516 gk20a_mm_inst_block_addr(g, inst_block), inst_ptr); 3626 gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va);
3517 3627
3518 gk20a_dbg_info("pde pa=0x%llx", (u64)pde_addr); 3628 gk20a_dbg_info("pde pa=0x%llx", (u64)pde_addr);
3519 3629
3520 g->ops.mm.init_pdb(g, inst_ptr, pde_addr); 3630 g->ops.mm.init_pdb(g, inst_block, pde_addr);
3521 3631
3522 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(), 3632 gk20a_mem_wr32(g, inst_block, ram_in_adr_limit_lo_w(),
3523 u64_lo32(vm->va_limit - 1) & ~0xfff); 3633 u64_lo32(vm->va_limit - 1) & ~0xfff);
3524 3634
3525 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), 3635 gk20a_mem_wr32(g, inst_block, ram_in_adr_limit_hi_w(),
3526 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit - 1))); 3636 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit - 1)));
3527 3637
3528 if (big_page_size && g->ops.mm.set_big_page_size) 3638 if (big_page_size && g->ops.mm.set_big_page_size)
3529 g->ops.mm.set_big_page_size(g, inst_ptr, big_page_size); 3639 g->ops.mm.set_big_page_size(g, inst_block, big_page_size);
3530} 3640}
3531 3641
3532int gk20a_mm_fb_flush(struct gk20a *g) 3642int gk20a_mm_fb_flush(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 7fa0b7fb..e9ac8f18 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -419,6 +419,34 @@ static inline enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm,
419 return gmmu_page_size_small; 419 return gmmu_page_size_small;
420} 420}
421 421
422/*
423 * Buffer accessors - wrap between begin() and end() if there is no permanent
424 * kernel mapping for this buffer.
425 */
426
427int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem);
428/* nop for null mem, like with free() or vunmap() */
429void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem);
430
431/* word-indexed offset */
432u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w);
433/* byte offset (32b-aligned) */
434u32 gk20a_mem_rd(struct gk20a *g, struct mem_desc *mem, u32 offset);
435/* memcpy to cpu, offset and size in bytes (32b-aligned) */
436void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem, u32 offset,
437 void *dest, u32 size);
438
439/* word-indexed offset */
440void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data);
441/* byte offset (32b-aligned) */
442void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data);
443/* memcpy from cpu, offset and size in bytes (32b-aligned) */
444void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset,
445 void *src, u32 size);
446/* size and offset in bytes (32b-aligned), filled with u32s */
447void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset,
448 u32 value, u32 size);
449
422#if 0 /*related to addr bits above, concern below TBD on which is accurate */ 450#if 0 /*related to addr bits above, concern below TBD on which is accurate */
423#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\ 451#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\
424 bus_bar1_block_ptr_s()) 452 bus_bar1_block_ptr_s())
@@ -673,7 +701,6 @@ void pde_range_from_vaddr_range(struct vm_gk20a *vm,
673 u64 addr_lo, u64 addr_hi, 701 u64 addr_lo, u64 addr_hi,
674 u32 *pde_lo, u32 *pde_hi); 702 u32 *pde_lo, u32 *pde_hi);
675int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm); 703int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm);
676u32 *pde_from_index(struct vm_gk20a *vm, u32 i);
677u32 pte_index_from_vaddr(struct vm_gk20a *vm, 704u32 pte_index_from_vaddr(struct vm_gk20a *vm,
678 u64 addr, enum gmmu_pgsz_gk20a pgsz_idx); 705 u64 addr, enum gmmu_pgsz_gk20a pgsz_idx);
679void free_gmmu_pages(struct vm_gk20a *vm, 706void free_gmmu_pages(struct vm_gk20a *vm,
@@ -685,7 +712,7 @@ struct gpu_ops;
685void gk20a_init_mm(struct gpu_ops *gops); 712void gk20a_init_mm(struct gpu_ops *gops);
686const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, 713const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
687 u32 big_page_size); 714 u32 big_page_size);
688void gk20a_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr); 715void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, u64 pdb_addr);
689 716
690void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block); 717void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block);
691 718
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index 56ad0c2a..54b2eef4 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -2421,11 +2421,10 @@ static int gk20a_init_pmu_reset_enable_hw(struct gk20a *g)
2421static int gk20a_prepare_ucode(struct gk20a *g) 2421static int gk20a_prepare_ucode(struct gk20a *g)
2422{ 2422{
2423 struct pmu_gk20a *pmu = &g->pmu; 2423 struct pmu_gk20a *pmu = &g->pmu;
2424 int i, err = 0; 2424 int err = 0;
2425 struct device *d = dev_from_gk20a(g); 2425 struct device *d = dev_from_gk20a(g);
2426 struct mm_gk20a *mm = &g->mm; 2426 struct mm_gk20a *mm = &g->mm;
2427 struct vm_gk20a *vm = &mm->pmu.vm; 2427 struct vm_gk20a *vm = &mm->pmu.vm;
2428 void *ucode_ptr;
2429 2428
2430 if (g->pmu_fw) { 2429 if (g->pmu_fw) {
2431 gk20a_init_pmu(pmu); 2430 gk20a_init_pmu(pmu);
@@ -2449,11 +2448,8 @@ static int gk20a_prepare_ucode(struct gk20a *g)
2449 if (err) 2448 if (err)
2450 goto err_release_fw; 2449 goto err_release_fw;
2451 2450
2452 ucode_ptr = pmu->ucode.cpu_va; 2451 gk20a_mem_wr_n(g, &pmu->ucode, 0, pmu->ucode_image,
2453 2452 pmu->desc->app_start_offset + pmu->desc->app_size);
2454 for (i = 0; i < (pmu->desc->app_start_offset +
2455 pmu->desc->app_size) >> 2; i++)
2456 gk20a_mem_wr32(ucode_ptr, i, pmu->ucode_image[i]);
2457 2453
2458 gk20a_init_pmu(pmu); 2454 gk20a_init_pmu(pmu);
2459 2455
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
index 0e6e715d..3ac2cec8 100644
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -43,8 +43,8 @@ static int lsfm_add_ucode_img(struct gk20a *g, struct ls_flcn_mgr *plsfm,
43static void lsfm_free_ucode_img_res(struct flcn_ucode_img *p_img); 43static void lsfm_free_ucode_img_res(struct flcn_ucode_img *p_img);
44static void lsfm_free_nonpmu_ucode_img_res(struct flcn_ucode_img *p_img); 44static void lsfm_free_nonpmu_ucode_img_res(struct flcn_ucode_img *p_img);
45static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr *plsfm); 45static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr *plsfm);
46static int lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm, 46static void lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm,
47 void *nonwpr_addr); 47 struct mem_desc *nonwpr);
48static int acr_ucode_patch_sig(struct gk20a *g, 48static int acr_ucode_patch_sig(struct gk20a *g,
49 unsigned int *p_img, 49 unsigned int *p_img,
50 unsigned int *p_prod_sig, 50 unsigned int *p_prod_sig,
@@ -355,7 +355,7 @@ int prepare_ucode_blob(struct gk20a *g)
355 355
356 gm20b_dbg_pmu("managed LS falcon %d, WPR size %d bytes.\n", 356 gm20b_dbg_pmu("managed LS falcon %d, WPR size %d bytes.\n",
357 plsfm->managed_flcn_cnt, plsfm->wpr_size); 357 plsfm->managed_flcn_cnt, plsfm->wpr_size);
358 lsfm_init_wpr_contents(g, plsfm, g->acr.ucode_blob.cpu_va); 358 lsfm_init_wpr_contents(g, plsfm, &g->acr.ucode_blob);
359 } else { 359 } else {
360 gm20b_dbg_pmu("LSFM is managing no falcons.\n"); 360 gm20b_dbg_pmu("LSFM is managing no falcons.\n");
361 } 361 }
@@ -613,120 +613,91 @@ static int lsfm_fill_flcn_bl_gen_desc(struct gk20a *g,
613} 613}
614 614
615/* Initialize WPR contents */ 615/* Initialize WPR contents */
616static int lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm, 616static void lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm,
617 void *nonwpr_addr) 617 struct mem_desc *ucode)
618{ 618{
619 struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list;
620 u32 i;
619 621
620 int status = 0; 622 /* The WPR array is at the base of the WPR */
621 union flcn_bl_generic_desc *nonwpr_bl_gen_desc; 623 pnode = plsfm->ucode_img_list;
622 if (nonwpr_addr == NULL) { 624 i = 0;
623 status = -ENOMEM;
624 } else {
625 struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list;
626 struct lsf_wpr_header *wpr_hdr;
627 struct lsf_lsb_header *lsb_hdr;
628 void *ucode_off;
629 u32 i;
630
631 /* The WPR array is at the base of the WPR */
632 wpr_hdr = (struct lsf_wpr_header *)nonwpr_addr;
633 pnode = plsfm->ucode_img_list;
634 i = 0;
635 625
636 /* 626 /*
637 * Walk the managed falcons, flush WPR and LSB headers to FB. 627 * Walk the managed falcons, flush WPR and LSB headers to FB.
638 * flush any bl args to the storage area relative to the 628 * flush any bl args to the storage area relative to the
639 * ucode image (appended on the end as a DMEM area). 629 * ucode image (appended on the end as a DMEM area).
640 */ 630 */
641 while (pnode) { 631 while (pnode) {
642 /* Flush WPR header to memory*/ 632 /* Flush WPR header to memory*/
643 memcpy(&wpr_hdr[i], &pnode->wpr_header, 633 gk20a_mem_wr_n(g, ucode, i * sizeof(pnode->wpr_header),
644 sizeof(struct lsf_wpr_header)); 634 &pnode->wpr_header, sizeof(pnode->wpr_header));
645 gm20b_dbg_pmu("wpr header as in memory and pnode\n"); 635
646 gm20b_dbg_pmu("falconid :%d %d\n", 636 gm20b_dbg_pmu("wpr header");
647 pnode->wpr_header.falcon_id, 637 gm20b_dbg_pmu("falconid :%d",
648 wpr_hdr[i].falcon_id); 638 pnode->wpr_header.falcon_id);
649 gm20b_dbg_pmu("lsb_offset :%x %x\n", 639 gm20b_dbg_pmu("lsb_offset :%x",
650 pnode->wpr_header.lsb_offset, 640 pnode->wpr_header.lsb_offset);
651 wpr_hdr[i].lsb_offset); 641 gm20b_dbg_pmu("bootstrap_owner :%d",
652 gm20b_dbg_pmu("bootstrap_owner :%d %d\n", 642 pnode->wpr_header.bootstrap_owner);
653 pnode->wpr_header.bootstrap_owner, 643 gm20b_dbg_pmu("lazy_bootstrap :%d",
654 wpr_hdr[i].bootstrap_owner); 644 pnode->wpr_header.lazy_bootstrap);
655 gm20b_dbg_pmu("lazy_bootstrap :%d %d\n", 645 gm20b_dbg_pmu("status :%d",
656 pnode->wpr_header.lazy_bootstrap, 646 pnode->wpr_header.status);
657 wpr_hdr[i].lazy_bootstrap); 647
658 gm20b_dbg_pmu("status :%d %d\n", 648 /*Flush LSB header to memory*/
659 pnode->wpr_header.status, wpr_hdr[i].status); 649 gk20a_mem_wr_n(g, ucode, pnode->wpr_header.lsb_offset,
660 650 &pnode->lsb_header, sizeof(pnode->lsb_header));
661 /*Flush LSB header to memory*/ 651
662 lsb_hdr = (struct lsf_lsb_header *)((u8 *)nonwpr_addr + 652 gm20b_dbg_pmu("lsb header");
663 pnode->wpr_header.lsb_offset); 653 gm20b_dbg_pmu("ucode_off :%x",
664 memcpy(lsb_hdr, &pnode->lsb_header, 654 pnode->lsb_header.ucode_off);
665 sizeof(struct lsf_lsb_header)); 655 gm20b_dbg_pmu("ucode_size :%x",
666 gm20b_dbg_pmu("lsb header as in memory and pnode\n"); 656 pnode->lsb_header.ucode_size);
667 gm20b_dbg_pmu("ucode_off :%x %x\n", 657 gm20b_dbg_pmu("data_size :%x",
668 pnode->lsb_header.ucode_off, 658 pnode->lsb_header.data_size);
669 lsb_hdr->ucode_off); 659 gm20b_dbg_pmu("bl_code_size :%x",
670 gm20b_dbg_pmu("ucode_size :%x %x\n", 660 pnode->lsb_header.bl_code_size);
671 pnode->lsb_header.ucode_size, 661 gm20b_dbg_pmu("bl_imem_off :%x",
672 lsb_hdr->ucode_size); 662 pnode->lsb_header.bl_imem_off);
673 gm20b_dbg_pmu("data_size :%x %x\n", 663 gm20b_dbg_pmu("bl_data_off :%x",
674 pnode->lsb_header.data_size, 664 pnode->lsb_header.bl_data_off);
675 lsb_hdr->data_size); 665 gm20b_dbg_pmu("bl_data_size :%x",
676 gm20b_dbg_pmu("bl_code_size :%x %x\n", 666 pnode->lsb_header.bl_data_size);
677 pnode->lsb_header.bl_code_size, 667 gm20b_dbg_pmu("app_code_off :%x",
678 lsb_hdr->bl_code_size); 668 pnode->lsb_header.app_code_off);
679 gm20b_dbg_pmu("bl_imem_off :%x %x\n", 669 gm20b_dbg_pmu("app_code_size :%x",
680 pnode->lsb_header.bl_imem_off, 670 pnode->lsb_header.app_code_size);
681 lsb_hdr->bl_imem_off); 671 gm20b_dbg_pmu("app_data_off :%x",
682 gm20b_dbg_pmu("bl_data_off :%x %x\n", 672 pnode->lsb_header.app_data_off);
683 pnode->lsb_header.bl_data_off, 673 gm20b_dbg_pmu("app_data_size :%x",
684 lsb_hdr->bl_data_off); 674 pnode->lsb_header.app_data_size);
685 gm20b_dbg_pmu("bl_data_size :%x %x\n", 675 gm20b_dbg_pmu("flags :%x",
686 pnode->lsb_header.bl_data_size, 676 pnode->lsb_header.flags);
687 lsb_hdr->bl_data_size); 677
688 gm20b_dbg_pmu("app_code_off :%x %x\n", 678 /*If this falcon has a boot loader and related args,
689 pnode->lsb_header.app_code_off, 679 * flush them.*/
690 lsb_hdr->app_code_off); 680 if (!pnode->ucode_img.header) {
691 gm20b_dbg_pmu("app_code_size :%x %x\n", 681 /*Populate gen bl and flush to memory*/
692 pnode->lsb_header.app_code_size, 682 lsfm_fill_flcn_bl_gen_desc(g, pnode);
693 lsb_hdr->app_code_size); 683 gk20a_mem_wr_n(g, ucode,
694 gm20b_dbg_pmu("app_data_off :%x %x\n", 684 pnode->lsb_header.bl_data_off,
695 pnode->lsb_header.app_data_off, 685 &pnode->bl_gen_desc,
696 lsb_hdr->app_data_off);
697 gm20b_dbg_pmu("app_data_size :%x %x\n",
698 pnode->lsb_header.app_data_size,
699 lsb_hdr->app_data_size);
700 gm20b_dbg_pmu("flags :%x %x\n",
701 pnode->lsb_header.flags, lsb_hdr->flags);
702
703 /*If this falcon has a boot loader and related args,
704 * flush them.*/
705 if (!pnode->ucode_img.header) {
706 nonwpr_bl_gen_desc =
707 (union flcn_bl_generic_desc *)
708 ((u8 *)nonwpr_addr +
709 pnode->lsb_header.bl_data_off);
710
711 /*Populate gen bl and flush to memory*/
712 lsfm_fill_flcn_bl_gen_desc(g, pnode);
713 memcpy(nonwpr_bl_gen_desc, &pnode->bl_gen_desc,
714 pnode->bl_gen_desc_size); 686 pnode->bl_gen_desc_size);
715 }
716 ucode_off = (void *)(pnode->lsb_header.ucode_off +
717 (u8 *)nonwpr_addr);
718 /*Copying of ucode*/
719 memcpy(ucode_off, pnode->ucode_img.data,
720 pnode->ucode_img.data_size);
721 pnode = pnode->next;
722 i++;
723 } 687 }
724 688 /*Copying of ucode*/
725 /* Tag the terminator WPR header with an invalid falcon ID. */ 689 gk20a_mem_wr_n(g, ucode, pnode->lsb_header.ucode_off,
726 gk20a_mem_wr32(&wpr_hdr[plsfm->managed_flcn_cnt].falcon_id, 690 pnode->ucode_img.data,
727 0, LSF_FALCON_ID_INVALID); 691 pnode->ucode_img.data_size);
692 pnode = pnode->next;
693 i++;
728 } 694 }
729 return status; 695
696 /* Tag the terminator WPR header with an invalid falcon ID. */
697 gk20a_mem_wr32(g, ucode,
698 plsfm->managed_flcn_cnt * sizeof(struct lsf_wpr_header) +
699 offsetof(struct lsf_wpr_header, falcon_id),
700 LSF_FALCON_ID_INVALID);
730} 701}
731 702
732/*! 703/*!
@@ -1000,7 +971,7 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g)
1000{ 971{
1001 struct mm_gk20a *mm = &g->mm; 972 struct mm_gk20a *mm = &g->mm;
1002 struct vm_gk20a *vm = &mm->pmu.vm; 973 struct vm_gk20a *vm = &mm->pmu.vm;
1003 int i, err = 0; 974 int err = 0;
1004 u64 *acr_dmem; 975 u64 *acr_dmem;
1005 u32 img_size_in_bytes = 0; 976 u32 img_size_in_bytes = 0;
1006 u32 status, size; 977 u32 status, size;
@@ -1066,10 +1037,8 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g)
1066 ((struct flcn_acr_desc *)acr_dmem)->regions.no_regions = 2; 1037 ((struct flcn_acr_desc *)acr_dmem)->regions.no_regions = 2;
1067 ((struct flcn_acr_desc *)acr_dmem)->wpr_offset = 0; 1038 ((struct flcn_acr_desc *)acr_dmem)->wpr_offset = 0;
1068 1039
1069 for (i = 0; i < (img_size_in_bytes/4); i++) { 1040 gk20a_mem_wr_n(g, &acr->acr_ucode, 0,
1070 gk20a_mem_wr32(acr->acr_ucode.cpu_va, i, 1041 acr_ucode_data_t210_load, img_size_in_bytes);
1071 acr_ucode_data_t210_load[i]);
1072 }
1073 /* 1042 /*
1074 * In order to execute this binary, we will be using 1043 * In order to execute this binary, we will be using
1075 * a bootloader which will load this image into PMU IMEM/DMEM. 1044 * a bootloader which will load this image into PMU IMEM/DMEM.
@@ -1323,7 +1292,7 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt)
1323 struct mm_gk20a *mm = &g->mm; 1292 struct mm_gk20a *mm = &g->mm;
1324 struct vm_gk20a *vm = &mm->pmu.vm; 1293 struct vm_gk20a *vm = &mm->pmu.vm;
1325 struct device *d = dev_from_gk20a(g); 1294 struct device *d = dev_from_gk20a(g);
1326 int i, err = 0; 1295 int err = 0;
1327 u32 bl_sz; 1296 u32 bl_sz;
1328 struct acr_gm20b *acr = &g->acr; 1297 struct acr_gm20b *acr = &g->acr;
1329 const struct firmware *hsbl_fw = acr->hsbl_fw; 1298 const struct firmware *hsbl_fw = acr->hsbl_fw;
@@ -1369,8 +1338,7 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt)
1369 goto err_free_ucode; 1338 goto err_free_ucode;
1370 } 1339 }
1371 1340
1372 for (i = 0; i < (bl_sz) >> 2; i++) 1341 gk20a_mem_wr_n(g, &acr->hsbl_ucode, 0, pmu_bl_gm10x, bl_sz);
1373 gk20a_mem_wr32(acr->hsbl_ucode.cpu_va, i, pmu_bl_gm10x[i]);
1374 gm20b_dbg_pmu("Copied bl ucode to bl_cpuva\n"); 1342 gm20b_dbg_pmu("Copied bl ucode to bl_cpuva\n");
1375 } 1343 }
1376 /* 1344 /*
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index b9a1e685..2197bae5 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -849,7 +849,7 @@ static int gr_gm20b_alloc_gr_ctx(struct gk20a *g,
849 849
850static void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, 850static void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g,
851 struct channel_ctx_gk20a *ch_ctx, 851 struct channel_ctx_gk20a *ch_ctx,
852 void *ctx_ptr) 852 struct mem_desc *mem)
853{ 853{
854 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; 854 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
855 u32 cta_preempt_option = 855 u32 cta_preempt_option =
@@ -859,7 +859,8 @@ static void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g,
859 859
860 if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CTA) { 860 if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CTA) {
861 gk20a_dbg_info("CTA: %x", cta_preempt_option); 861 gk20a_dbg_info("CTA: %x", cta_preempt_option);
862 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_preemption_options_o(), 0, 862 gk20a_mem_wr(g, mem,
863 ctxsw_prog_main_image_preemption_options_o(),
863 cta_preempt_option); 864 cta_preempt_option);
864 } 865 }
865 866
@@ -1005,7 +1006,7 @@ static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
1005 bool enable) 1006 bool enable)
1006{ 1007{
1007 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 1008 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1008 void *ctx_ptr = NULL; 1009 struct mem_desc *mem;
1009 u32 v; 1010 u32 v;
1010 1011
1011 gk20a_dbg_fn(""); 1012 gk20a_dbg_fn("");
@@ -1013,18 +1014,17 @@ static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
1013 if (!ch_ctx || !ch_ctx->gr_ctx || c->vpr) 1014 if (!ch_ctx || !ch_ctx->gr_ctx || c->vpr)
1014 return -EINVAL; 1015 return -EINVAL;
1015 1016
1016 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 1017 mem = &ch_ctx->gr_ctx->mem;
1017 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, 1018
1018 0, pgprot_writecombine(PAGE_KERNEL)); 1019 if (gk20a_mem_begin(c->g, mem))
1019 if (!ctx_ptr)
1020 return -ENOMEM; 1020 return -ENOMEM;
1021 1021
1022 v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); 1022 v = gk20a_mem_rd(c->g, mem, ctxsw_prog_main_image_pm_o());
1023 v &= ~ctxsw_prog_main_image_pm_pc_sampling_m(); 1023 v &= ~ctxsw_prog_main_image_pm_pc_sampling_m();
1024 v |= ctxsw_prog_main_image_pm_pc_sampling_f(enable); 1024 v |= ctxsw_prog_main_image_pm_pc_sampling_f(enable);
1025 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, v); 1025 gk20a_mem_wr(c->g, mem, ctxsw_prog_main_image_pm_o(), v);
1026 1026
1027 vunmap(ctx_ptr); 1027 gk20a_mem_end(c->g, mem);
1028 1028
1029 gk20a_dbg_fn("done"); 1029 gk20a_dbg_fn("done");
1030 1030
@@ -1089,13 +1089,13 @@ static void gr_gm20b_init_cyclestats(struct gk20a *g)
1089#endif 1089#endif
1090} 1090}
1091 1091
1092static void gr_gm20b_enable_cde_in_fecs(void *ctx_ptr) 1092static void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct mem_desc *mem)
1093{ 1093{
1094 u32 cde_v; 1094 u32 cde_v;
1095 1095
1096 cde_v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0); 1096 cde_v = gk20a_mem_rd(g, mem, ctxsw_prog_main_image_ctl_o());
1097 cde_v |= ctxsw_prog_main_image_ctl_cde_enabled_f(); 1097 cde_v |= ctxsw_prog_main_image_ctl_cde_enabled_f();
1098 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0, cde_v); 1098 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_ctl_o(), cde_v);
1099} 1099}
1100 1100
1101static void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) 1101static void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state)
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
index ac73b5c8..726d73ed 100644
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -106,14 +106,14 @@ static void gm20b_mm_mmu_set_debug_mode(struct gk20a *g, bool enable)
106} 106}
107 107
108static void gm20b_mm_set_big_page_size(struct gk20a *g, 108static void gm20b_mm_set_big_page_size(struct gk20a *g,
109 void *inst_ptr, int size) 109 struct mem_desc *mem, int size)
110{ 110{
111 u32 val; 111 u32 val;
112 112
113 gk20a_dbg_fn(""); 113 gk20a_dbg_fn("");
114 114
115 gk20a_dbg_info("big page size %d\n", size); 115 gk20a_dbg_info("big page size %d\n", size);
116 val = gk20a_mem_rd32(inst_ptr, ram_in_big_page_size_w()); 116 val = gk20a_mem_rd32(g, mem, ram_in_big_page_size_w());
117 val &= ~ram_in_big_page_size_m(); 117 val &= ~ram_in_big_page_size_m();
118 118
119 if (size == SZ_64K) 119 if (size == SZ_64K)
@@ -121,7 +121,7 @@ static void gm20b_mm_set_big_page_size(struct gk20a *g,
121 else 121 else
122 val |= ram_in_big_page_size_128kb_f(); 122 val |= ram_in_big_page_size_128kb_f();
123 123
124 gk20a_mem_wr32(inst_ptr, ram_in_big_page_size_w(), val); 124 gk20a_mem_wr32(g, mem, ram_in_big_page_size_w(), val);
125 gk20a_dbg_fn("done"); 125 gk20a_dbg_fn("done");
126} 126}
127 127
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
index 66b5e410..d1cba979 100644
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -285,8 +285,6 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
285 mutex_init(&f->free_chs_mutex); 285 mutex_init(&f->free_chs_mutex);
286 286
287 for (chid = 0; chid < f->num_channels; chid++) { 287 for (chid = 0; chid < f->num_channels; chid++) {
288 f->channel[chid].userd_cpu_va =
289 f->userd.cpu_va + chid * f->userd_entry_size;
290 f->channel[chid].userd_iova = 288 f->channel[chid].userd_iova =
291 g->ops.mm.get_iova_addr(g, f->userd.sgt->sgl, 0) 289 g->ops.mm.get_iova_addr(g, f->userd.sgt->sgl, 0)
292 + chid * f->userd_entry_size; 290 + chid * f->userd_entry_size;