summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2016-05-12 02:32:05 -0400
committerKen Adams <kadams@nvidia.com>2016-05-13 10:11:33 -0400
commit6eebc87d99f9f04b2b68e0bc0142c161ab3e669d (patch)
tree08e437890869d76072f291ea66f709f05ea07c8a /drivers/gpu/nvgpu/gk20a
parent14ef0dacc94077bc3dae4c942ff8c279cc4c92ba (diff)
gpu: nvgpu: refactor gk20a_mem_{wr,rd} for vidmem
To support vidmem, pass g and mem_desc to the buffer memory accessor functions. This allows the functions to select the memory access method based on the buffer aperture instead of using the cpu pointer directly (like until now). The selection and aperture support will be in another patch; this patch only refactors these accessors, but keeps the underlying functionality as-is. gk20a_mem_{rd,wr}32() work as previously; add also gk20a_mem_{rd,wr}() for byte-indexed accesses, gk20a_mem_{rd,wr}_n() for memcpy()-like functionality, and gk20a_memset() for filling buffers with a constant. The 8 and 16 bit accessor functions are removed. vmap()/vunmap() pairs are abstracted to gk20a_mem_{begin,end}() to support other types of mappings or conditions where mapping the buffer is unnecessary or different. Several function arguments that would access these buffers are also changed to take a mem_desc instead of a plain cpu pointer. Some relevant occasions are changed to use the accessor functions instead of cpu pointers without them (e.g., memcpying to and from), but the majority of direct accesses will be adjusted later, when the buffers are moved to support vidmem. JIRA DNVGPU-23 Change-Id: I3dd22e14290c4ab742d42e2dd327ebeb5cd3f25a Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1121143 Reviewed-by: Ken Adams <kadams@nvidia.com> Tested-by: Ken Adams <kadams@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c73
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gk20a/debug_gk20a.c59
-rw-r--r--drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c27
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h57
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c359
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c144
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h31
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.c10
10 files changed, 390 insertions, 373 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 990972e4..065e8ab1 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -129,28 +129,25 @@ static int channel_gk20a_commit_userd(struct channel_gk20a *c)
129{ 129{
130 u32 addr_lo; 130 u32 addr_lo;
131 u32 addr_hi; 131 u32 addr_hi;
132 void *inst_ptr;
133 struct gk20a *g = c->g; 132 struct gk20a *g = c->g;
134 133
135 gk20a_dbg_fn(""); 134 gk20a_dbg_fn("");
136 135
137 inst_ptr = c->inst_block.cpu_va;
138 if (!inst_ptr)
139 return -ENOMEM;
140
141 addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v()); 136 addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
142 addr_hi = u64_hi32(c->userd_iova); 137 addr_hi = u64_hi32(c->userd_iova);
143 138
144 gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx", 139 gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
145 c->hw_chid, (u64)c->userd_iova); 140 c->hw_chid, (u64)c->userd_iova);
146 141
147 gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(), 142 gk20a_mem_wr32(g, &c->inst_block,
143 ram_in_ramfc_w() + ram_fc_userd_w(),
148 (g->mm.vidmem_is_vidmem ? 144 (g->mm.vidmem_is_vidmem ?
149 pbdma_userd_target_sys_mem_ncoh_f() : 145 pbdma_userd_target_sys_mem_ncoh_f() :
150 pbdma_userd_target_vid_mem_f()) | 146 pbdma_userd_target_vid_mem_f()) |
151 pbdma_userd_addr_f(addr_lo)); 147 pbdma_userd_addr_f(addr_lo));
152 148
153 gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(), 149 gk20a_mem_wr32(g, &c->inst_block,
150 ram_in_ramfc_w() + ram_fc_userd_hi_w(),
154 pbdma_userd_hi_addr_f(addr_hi)); 151 pbdma_userd_hi_addr_f(addr_hi));
155 152
156 return 0; 153 return 0;
@@ -186,13 +183,8 @@ int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
186 183
187static int channel_gk20a_set_schedule_params(struct channel_gk20a *c) 184static int channel_gk20a_set_schedule_params(struct channel_gk20a *c)
188{ 185{
189 void *inst_ptr;
190 int shift = 0, value = 0; 186 int shift = 0, value = 0;
191 187
192 inst_ptr = c->inst_block.cpu_va;
193 if (!inst_ptr)
194 return -ENOMEM;
195
196 gk20a_channel_get_timescale_from_timeslice(c->g, 188 gk20a_channel_get_timescale_from_timeslice(c->g,
197 c->timeslice_us, &value, &shift); 189 c->timeslice_us, &value, &shift);
198 190
@@ -203,7 +195,7 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c)
203 WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid)); 195 WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid));
204 196
205 /* set new timeslice */ 197 /* set new timeslice */
206 gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(), 198 gk20a_mem_wr32(c->g, &c->inst_block, ram_fc_runlist_timeslice_w(),
207 value | (shift << 12) | 199 value | (shift << 12) |
208 fifo_runlist_timeslice_enable_true_f()); 200 fifo_runlist_timeslice_enable_true_f());
209 201
@@ -255,33 +247,30 @@ u32 channel_gk20a_pbdma_acquire_val(struct channel_gk20a *c)
255int channel_gk20a_setup_ramfc(struct channel_gk20a *c, 247int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
256 u64 gpfifo_base, u32 gpfifo_entries, u32 flags) 248 u64 gpfifo_base, u32 gpfifo_entries, u32 flags)
257{ 249{
258 void *inst_ptr; 250 struct gk20a *g = c->g;
251 struct mem_desc *mem = &c->inst_block;
259 252
260 gk20a_dbg_fn(""); 253 gk20a_dbg_fn("");
261 254
262 inst_ptr = c->inst_block.cpu_va; 255 gk20a_memset(g, mem, 0, 0, ram_fc_size_val_v());
263 if (!inst_ptr)
264 return -ENOMEM;
265
266 memset(inst_ptr, 0, ram_fc_size_val_v());
267 256
268 gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(), 257 gk20a_mem_wr32(g, mem, ram_fc_gp_base_w(),
269 pbdma_gp_base_offset_f( 258 pbdma_gp_base_offset_f(
270 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s()))); 259 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
271 260
272 gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(), 261 gk20a_mem_wr32(g, mem, ram_fc_gp_base_hi_w(),
273 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) | 262 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
274 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries))); 263 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
275 264
276 gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(), 265 gk20a_mem_wr32(g, mem, ram_fc_signature_w(),
277 c->g->ops.fifo.get_pbdma_signature(c->g)); 266 c->g->ops.fifo.get_pbdma_signature(c->g));
278 267
279 gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(), 268 gk20a_mem_wr32(g, mem, ram_fc_formats_w(),
280 pbdma_formats_gp_fermi0_f() | 269 pbdma_formats_gp_fermi0_f() |
281 pbdma_formats_pb_fermi1_f() | 270 pbdma_formats_pb_fermi1_f() |
282 pbdma_formats_mp_fermi0_f()); 271 pbdma_formats_mp_fermi0_f());
283 272
284 gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(), 273 gk20a_mem_wr32(g, mem, ram_fc_pb_header_w(),
285 pbdma_pb_header_priv_user_f() | 274 pbdma_pb_header_priv_user_f() |
286 pbdma_pb_header_method_zero_f() | 275 pbdma_pb_header_method_zero_f() |
287 pbdma_pb_header_subchannel_zero_f() | 276 pbdma_pb_header_subchannel_zero_f() |
@@ -289,47 +278,49 @@ int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
289 pbdma_pb_header_first_true_f() | 278 pbdma_pb_header_first_true_f() |
290 pbdma_pb_header_type_inc_f()); 279 pbdma_pb_header_type_inc_f());
291 280
292 gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(), 281 gk20a_mem_wr32(g, mem, ram_fc_subdevice_w(),
293 pbdma_subdevice_id_f(1) | 282 pbdma_subdevice_id_f(1) |
294 pbdma_subdevice_status_active_f() | 283 pbdma_subdevice_status_active_f() |
295 pbdma_subdevice_channel_dma_enable_f()); 284 pbdma_subdevice_channel_dma_enable_f());
296 285
297 gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f()); 286 gk20a_mem_wr32(g, mem, ram_fc_target_w(), pbdma_target_engine_sw_f());
298 287
299 gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(), 288 gk20a_mem_wr32(g, mem, ram_fc_acquire_w(),
300 channel_gk20a_pbdma_acquire_val(c)); 289 channel_gk20a_pbdma_acquire_val(c));
301 290
302 gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(), 291 gk20a_mem_wr32(g, mem, ram_fc_runlist_timeslice_w(),
303 fifo_runlist_timeslice_timeout_128_f() | 292 fifo_runlist_timeslice_timeout_128_f() |
304 fifo_runlist_timeslice_timescale_3_f() | 293 fifo_runlist_timeslice_timescale_3_f() |
305 fifo_runlist_timeslice_enable_true_f()); 294 fifo_runlist_timeslice_enable_true_f());
306 295
307 gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(), 296 gk20a_mem_wr32(g, mem, ram_fc_pb_timeslice_w(),
308 fifo_pb_timeslice_timeout_16_f() | 297 fifo_pb_timeslice_timeout_16_f() |
309 fifo_pb_timeslice_timescale_0_f() | 298 fifo_pb_timeslice_timescale_0_f() |
310 fifo_pb_timeslice_enable_true_f()); 299 fifo_pb_timeslice_enable_true_f());
311 300
312 gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); 301 gk20a_mem_wr32(g, mem, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
313 302
314 return channel_gk20a_commit_userd(c); 303 return channel_gk20a_commit_userd(c);
315} 304}
316 305
317static int channel_gk20a_setup_userd(struct channel_gk20a *c) 306static int channel_gk20a_setup_userd(struct channel_gk20a *c)
318{ 307{
319 BUG_ON(!c->userd_cpu_va); 308 struct gk20a *g = c->g;
309 struct mem_desc *mem = &g->fifo.userd;
310 u32 offset = c->hw_chid * g->fifo.userd_entry_size / sizeof(u32);
320 311
321 gk20a_dbg_fn(""); 312 gk20a_dbg_fn("");
322 313
323 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0); 314 gk20a_mem_wr32(g, mem, offset + ram_userd_put_w(), 0);
324 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0); 315 gk20a_mem_wr32(g, mem, offset + ram_userd_get_w(), 0);
325 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0); 316 gk20a_mem_wr32(g, mem, offset + ram_userd_ref_w(), 0);
326 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0); 317 gk20a_mem_wr32(g, mem, offset + ram_userd_put_hi_w(), 0);
327 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0); 318 gk20a_mem_wr32(g, mem, offset + ram_userd_ref_threshold_w(), 0);
328 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0); 319 gk20a_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_w(), 0);
329 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0); 320 gk20a_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_hi_w(), 0);
330 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0); 321 gk20a_mem_wr32(g, mem, offset + ram_userd_get_hi_w(), 0);
331 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0); 322 gk20a_mem_wr32(g, mem, offset + ram_userd_gp_get_w(), 0);
332 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0); 323 gk20a_mem_wr32(g, mem, offset + ram_userd_gp_put_w(), 0);
333 324
334 return 0; 325 return 0;
335} 326}
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 8840a3ae..b1355f92 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -130,7 +130,6 @@ struct channel_gk20a {
130 struct mem_desc inst_block; 130 struct mem_desc inst_block;
131 struct mem_desc_sub ramfc; 131 struct mem_desc_sub ramfc;
132 132
133 void *userd_cpu_va;
134 u64 userd_iova; 133 u64 userd_iova;
135 u64 userd_gpu_va; 134 u64 userd_gpu_va;
136 135
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
index c2285c8a..a3fa2ea5 100644
--- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
@@ -36,7 +36,7 @@ unsigned int gk20a_debug_trace_cmdbuf;
36struct ch_state { 36struct ch_state {
37 int pid; 37 int pid;
38 int refs; 38 int refs;
39 u8 inst_block[0]; 39 u32 inst_block[0];
40}; 40};
41 41
42static const char * const ccsr_chan_status_str[] = { 42static const char * const ccsr_chan_status_str[] = {
@@ -108,15 +108,15 @@ static void gk20a_debug_show_channel(struct gk20a *g,
108 u32 channel = gk20a_readl(g, ccsr_channel_r(hw_chid)); 108 u32 channel = gk20a_readl(g, ccsr_channel_r(hw_chid));
109 u32 status = ccsr_channel_status_v(channel); 109 u32 status = ccsr_channel_status_v(channel);
110 u32 syncpointa, syncpointb; 110 u32 syncpointa, syncpointb;
111 void *inst_ptr; 111 u32 *inst_mem;
112 112
113 if (!ch_state) 113 if (!ch_state)
114 return; 114 return;
115 115
116 inst_ptr = &ch_state->inst_block[0]; 116 inst_mem = &ch_state->inst_block[0];
117 117
118 syncpointa = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointa_w()); 118 syncpointa = inst_mem[ram_fc_syncpointa_w()];
119 syncpointb = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointb_w()); 119 syncpointb = inst_mem[ram_fc_syncpointb_w()];
120 120
121 gk20a_debug_output(o, "%d-%s, pid %d, refs: %d: ", hw_chid, 121 gk20a_debug_output(o, "%d-%s, pid %d, refs: %d: ", hw_chid,
122 dev_name(g->dev), 122 dev_name(g->dev),
@@ -129,23 +129,22 @@ static void gk20a_debug_show_channel(struct gk20a *g,
129 gk20a_debug_output(o, "TOP: %016llx PUT: %016llx GET: %016llx " 129 gk20a_debug_output(o, "TOP: %016llx PUT: %016llx GET: %016llx "
130 "FETCH: %016llx\nHEADER: %08x COUNT: %08x\n" 130 "FETCH: %016llx\nHEADER: %08x COUNT: %08x\n"
131 "SYNCPOINT %08x %08x SEMAPHORE %08x %08x %08x %08x\n", 131 "SYNCPOINT %08x %08x SEMAPHORE %08x %08x %08x %08x\n",
132 (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_top_level_get_w()) + 132 (u64)inst_mem[ram_fc_pb_top_level_get_w()] +
133 ((u64)gk20a_mem_rd32(inst_ptr, 133 ((u64)inst_mem[ram_fc_pb_top_level_get_hi_w()] << 32ULL),
134 ram_fc_pb_top_level_get_hi_w()) << 32ULL), 134 (u64)inst_mem[ram_fc_pb_put_w()] +
135 (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_put_w()) + 135 ((u64)inst_mem[ram_fc_pb_put_hi_w()] << 32ULL),
136 ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_put_hi_w()) << 32ULL), 136 (u64)inst_mem[ram_fc_pb_get_w()] +
137 (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_get_w()) + 137 ((u64)inst_mem[ram_fc_pb_get_hi_w()] << 32ULL),
138 ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_get_hi_w()) << 32ULL), 138 (u64)inst_mem[ram_fc_pb_fetch_w()] +
139 (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_fetch_w()) + 139 ((u64)inst_mem[ram_fc_pb_fetch_hi_w()] << 32ULL),
140 ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_fetch_hi_w()) << 32ULL), 140 inst_mem[ram_fc_pb_header_w()],
141 gk20a_mem_rd32(inst_ptr, ram_fc_pb_header_w()), 141 inst_mem[ram_fc_pb_count_w()],
142 gk20a_mem_rd32(inst_ptr, ram_fc_pb_count_w()),
143 syncpointa, 142 syncpointa,
144 syncpointb, 143 syncpointb,
145 gk20a_mem_rd32(inst_ptr, ram_fc_semaphorea_w()), 144 inst_mem[ram_fc_semaphorea_w()],
146 gk20a_mem_rd32(inst_ptr, ram_fc_semaphoreb_w()), 145 inst_mem[ram_fc_semaphoreb_w()],
147 gk20a_mem_rd32(inst_ptr, ram_fc_semaphorec_w()), 146 inst_mem[ram_fc_semaphorec_w()],
148 gk20a_mem_rd32(inst_ptr, ram_fc_semaphored_w())); 147 inst_mem[ram_fc_semaphored_w()]);
149 148
150#ifdef CONFIG_TEGRA_GK20A 149#ifdef CONFIG_TEGRA_GK20A
151 if ((pbdma_syncpointb_op_v(syncpointb) == pbdma_syncpointb_op_wait_v()) 150 if ((pbdma_syncpointb_op_v(syncpointb) == pbdma_syncpointb_op_wait_v())
@@ -246,17 +245,15 @@ void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o)
246 245
247 for (chid = 0; chid < f->num_channels; chid++) { 246 for (chid = 0; chid < f->num_channels; chid++) {
248 struct channel_gk20a *ch = &f->channel[chid]; 247 struct channel_gk20a *ch = &f->channel[chid];
249 if (ch_state[chid]) { 248 if (!ch_state[chid])
250 if (ch->inst_block.cpu_va) { 249 continue;
251 ch_state[chid]->pid = ch->pid; 250
252 ch_state[chid]->refs = 251 ch_state[chid]->pid = ch->pid;
253 atomic_read(&ch->ref_count); 252 ch_state[chid]->refs = atomic_read(&ch->ref_count);
254 memcpy(&ch_state[chid]->inst_block[0], 253 gk20a_mem_rd_n(g, &ch->inst_block, 0,
255 ch->inst_block.cpu_va, 254 &ch_state[chid]->inst_block[0],
256 ram_in_alloc_size_v()); 255 ram_in_alloc_size_v());
257 } 256 gk20a_channel_put(ch);
258 gk20a_channel_put(ch);
259 }
260 } 257 }
261 for (chid = 0; chid < f->num_channels; chid++) { 258 for (chid = 0; chid < f->num_channels; chid++) {
262 if (ch_state[chid]) { 259 if (ch_state[chid]) {
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index f9cddc41..edddcdc1 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -619,7 +619,7 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g,
619 phys_addr_t pa; 619 phys_addr_t pa;
620 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; 620 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
621 struct gk20a_fecs_trace *trace = g->fecs_trace; 621 struct gk20a_fecs_trace *trace = g->fecs_trace;
622 void *ctx_ptr; 622 struct mem_desc *mem = &ch_ctx->gr_ctx->mem;
623 u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch); 623 u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch);
624 624
625 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, 625 gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
@@ -634,10 +634,7 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g,
634 if (!pa) 634 if (!pa)
635 return -ENOMEM; 635 return -ENOMEM;
636 636
637 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 637 if (gk20a_mem_begin(g, mem))
638 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, 0,
639 pgprot_writecombine(PAGE_KERNEL));
640 if (!ctx_ptr)
641 return -ENOMEM; 638 return -ENOMEM;
642 639
643 lo = u64_lo32(pa); 640 lo = u64_lo32(pa);
@@ -646,18 +643,18 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g,
646 gk20a_dbg(gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi, 643 gk20a_dbg(gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi,
647 lo, GK20A_FECS_TRACE_NUM_RECORDS); 644 lo, GK20A_FECS_TRACE_NUM_RECORDS);
648 645
649 gk20a_mem_wr32(ctx_ptr 646 gk20a_mem_wr(g, mem,
650 + ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), 647 ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(),
651 0, lo); 648 lo);
652 gk20a_mem_wr32(ctx_ptr 649 gk20a_mem_wr(g, mem,
653 + ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), 650 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(),
654 0, ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi)); 651 ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi));
655 gk20a_mem_wr32(ctx_ptr 652 gk20a_mem_wr(g, mem,
656 + ctxsw_prog_main_image_context_timestamp_buffer_control_o(), 653 ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
657 0, ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f( 654 ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
658 GK20A_FECS_TRACE_NUM_RECORDS)); 655 GK20A_FECS_TRACE_NUM_RECORDS));
659 656
660 vunmap(ctx_ptr); 657 gk20a_mem_end(g, mem);
661 gk20a_fecs_trace_hash_add(g, context_ptr, ch->pid); 658 gk20a_fecs_trace_hash_add(g, context_ptr, ch->pid);
662 659
663 return 0; 660 return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index dc3debf2..71400331 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -520,8 +520,6 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
520 mutex_init(&f->free_chs_mutex); 520 mutex_init(&f->free_chs_mutex);
521 521
522 for (chid = 0; chid < f->num_channels; chid++) { 522 for (chid = 0; chid < f->num_channels; chid++) {
523 f->channel[chid].userd_cpu_va =
524 f->userd.cpu_va + chid * f->userd_entry_size;
525 f->channel[chid].userd_iova = 523 f->channel[chid].userd_iova =
526 g->ops.mm.get_iova_addr(g, f->userd.sgt->sgl, 0) 524 g->ops.mm.get_iova_addr(g, f->userd.sgt->sgl, 0)
527 + chid * f->userd_entry_size; 525 + chid * f->userd_entry_size;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index f228cce4..2f85bf96 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -201,7 +201,7 @@ struct gpu_ops {
201 struct gr_ctx_desc *gr_ctx); 201 struct gr_ctx_desc *gr_ctx);
202 void (*update_ctxsw_preemption_mode)(struct gk20a *g, 202 void (*update_ctxsw_preemption_mode)(struct gk20a *g,
203 struct channel_ctx_gk20a *ch_ctx, 203 struct channel_ctx_gk20a *ch_ctx,
204 void *ctx_ptr); 204 struct mem_desc *mem);
205 int (*update_smpc_ctxsw_mode)(struct gk20a *g, 205 int (*update_smpc_ctxsw_mode)(struct gk20a *g,
206 struct channel_gk20a *c, 206 struct channel_gk20a *c,
207 bool enable); 207 bool enable);
@@ -221,7 +221,8 @@ struct gpu_ops {
221 int (*wait_empty)(struct gk20a *g, unsigned long end_jiffies, 221 int (*wait_empty)(struct gk20a *g, unsigned long end_jiffies,
222 u32 expect_delay); 222 u32 expect_delay);
223 void (*init_cyclestats)(struct gk20a *g); 223 void (*init_cyclestats)(struct gk20a *g);
224 void (*enable_cde_in_fecs)(void *ctx_ptr); 224 void (*enable_cde_in_fecs)(struct gk20a *g,
225 struct mem_desc *mem);
225 int (*set_sm_debug_mode)(struct gk20a *g, struct channel_gk20a *ch, 226 int (*set_sm_debug_mode)(struct gk20a *g, struct channel_gk20a *ch,
226 u64 sms, bool enable); 227 u64 sms, bool enable);
227 void (*bpt_reg_info)(struct gk20a *g, 228 void (*bpt_reg_info)(struct gk20a *g,
@@ -484,7 +485,7 @@ struct gpu_ops {
484 void (*cbc_clean)(struct gk20a *g); 485 void (*cbc_clean)(struct gk20a *g);
485 void (*tlb_invalidate)(struct vm_gk20a *vm); 486 void (*tlb_invalidate)(struct vm_gk20a *vm);
486 void (*set_big_page_size)(struct gk20a *g, 487 void (*set_big_page_size)(struct gk20a *g,
487 void *inst_ptr, int size); 488 struct mem_desc *mem, int size);
488 u32 (*get_big_page_sizes)(void); 489 u32 (*get_big_page_sizes)(void);
489 u32 (*get_physical_addr_bits)(struct gk20a *g); 490 u32 (*get_physical_addr_bits)(struct gk20a *g);
490 int (*init_mm_setup_hw)(struct gk20a *g); 491 int (*init_mm_setup_hw)(struct gk20a *g);
@@ -493,7 +494,8 @@ struct gpu_ops {
493 void (*remove_bar2_vm)(struct gk20a *g); 494 void (*remove_bar2_vm)(struct gk20a *g);
494 const struct gk20a_mmu_level * 495 const struct gk20a_mmu_level *
495 (*get_mmu_levels)(struct gk20a *g, u32 big_page_size); 496 (*get_mmu_levels)(struct gk20a *g, u32 big_page_size);
496 void (*init_pdb)(struct gk20a *g, void *inst_ptr, u64 pdb_addr); 497 void (*init_pdb)(struct gk20a *g, struct mem_desc *mem,
498 u64 pdb_addr);
497 u64 (*get_iova_addr)(struct gk20a *g, struct scatterlist *sgl, 499 u64 (*get_iova_addr)(struct gk20a *g, struct scatterlist *sgl,
498 u32 flags); 500 u32 flags);
499 int (*bar1_bind)(struct gk20a *g, u64 bar1_iova); 501 int (*bar1_bind)(struct gk20a *g, u64 bar1_iova);
@@ -859,53 +861,6 @@ do { \
859#define gk20a_dbg_info(fmt, arg...) \ 861#define gk20a_dbg_info(fmt, arg...) \
860 gk20a_dbg(gpu_dbg_info, fmt, ##arg) 862 gk20a_dbg(gpu_dbg_info, fmt, ##arg)
861 863
862/* mem access with dbg_mem logging */
863static inline u8 gk20a_mem_rd08(void *ptr, int b)
864{
865 u8 _b = ((const u8 *)ptr)[b];
866#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
867 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u8)*b, _b);
868#endif
869 return _b;
870}
871static inline u16 gk20a_mem_rd16(void *ptr, int s)
872{
873 u16 _s = ((const u16 *)ptr)[s];
874#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
875 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u16)*s, _s);
876#endif
877 return _s;
878}
879static inline u32 gk20a_mem_rd32(void *ptr, int w)
880{
881 u32 _w = ((const u32 *)ptr)[w];
882#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
883 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + sizeof(u32)*w, _w);
884#endif
885 return _w;
886}
887static inline void gk20a_mem_wr08(void *ptr, int b, u8 data)
888{
889#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
890 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u8)*b, data);
891#endif
892 ((u8 *)ptr)[b] = data;
893}
894static inline void gk20a_mem_wr16(void *ptr, int s, u16 data)
895{
896#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
897 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u16)*s, data);
898#endif
899 ((u16 *)ptr)[s] = data;
900}
901static inline void gk20a_mem_wr32(void *ptr, int w, u32 data)
902{
903#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
904 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u32)*w, data);
905#endif
906 ((u32 *)ptr)[w] = data;
907}
908
909void gk20a_init_clk_ops(struct gpu_ops *gops); 864void gk20a_init_clk_ops(struct gpu_ops *gops);
910 865
911/* register accessors */ 866/* register accessors */
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 4e7c36ee..e7e6662a 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -97,22 +97,18 @@ int gr_gk20a_get_ctx_id(struct gk20a *g,
97 u32 *ctx_id) 97 u32 *ctx_id)
98{ 98{
99 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 99 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
100 void *ctx_ptr = NULL;
101 100
102 /* Channel gr_ctx buffer is gpu cacheable. 101 /* Channel gr_ctx buffer is gpu cacheable.
103 Flush and invalidate before cpu update. */ 102 Flush and invalidate before cpu update. */
104 g->ops.mm.l2_flush(g, true); 103 g->ops.mm.l2_flush(g, true);
105 104
106 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 105 if (gk20a_mem_begin(g, &ch_ctx->gr_ctx->mem))
107 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
108 0, pgprot_writecombine(PAGE_KERNEL));
109 if (!ctx_ptr)
110 return -ENOMEM; 106 return -ENOMEM;
111 107
112 *ctx_id = gk20a_mem_rd32(ctx_ptr + 108 *ctx_id = gk20a_mem_rd(g, &ch_ctx->gr_ctx->mem,
113 ctxsw_prog_main_image_context_id_o(), 0); 109 ctxsw_prog_main_image_context_id_o());
114 110
115 vunmap(ctx_ptr); 111 gk20a_mem_end(g, &ch_ctx->gr_ctx->mem);
116 112
117 return 0; 113 return 0;
118} 114}
@@ -619,22 +615,17 @@ static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
619{ 615{
620 u32 addr_lo; 616 u32 addr_lo;
621 u32 addr_hi; 617 u32 addr_hi;
622 void *inst_ptr = NULL;
623 618
624 gk20a_dbg_fn(""); 619 gk20a_dbg_fn("");
625 620
626 inst_ptr = c->inst_block.cpu_va;
627 if (!inst_ptr)
628 return -ENOMEM;
629
630 addr_lo = u64_lo32(gpu_va) >> 12; 621 addr_lo = u64_lo32(gpu_va) >> 12;
631 addr_hi = u64_hi32(gpu_va); 622 addr_hi = u64_hi32(gpu_va);
632 623
633 gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_target_w(), 624 gk20a_mem_wr32(c->g, &c->inst_block, ram_in_gr_wfi_target_w(),
634 ram_in_gr_cs_wfi_f() | ram_in_gr_wfi_mode_virtual_f() | 625 ram_in_gr_cs_wfi_f() | ram_in_gr_wfi_mode_virtual_f() |
635 ram_in_gr_wfi_ptr_lo_f(addr_lo)); 626 ram_in_gr_wfi_ptr_lo_f(addr_lo));
636 627
637 gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_ptr_hi_w(), 628 gk20a_mem_wr32(c->g, &c->inst_block, ram_in_gr_wfi_ptr_hi_w(),
638 ram_in_gr_wfi_ptr_hi_f(addr_hi)); 629 ram_in_gr_wfi_ptr_hi_f(addr_hi));
639 630
640 return 0; 631 return 0;
@@ -658,11 +649,7 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
658 return -EBUSY; 649 return -EBUSY;
659 } 650 }
660 651
661 ch_ctx->patch_ctx.mem.cpu_va = vmap(ch_ctx->patch_ctx.mem.pages, 652 if (gk20a_mem_begin(g, &ch_ctx->patch_ctx.mem))
662 PAGE_ALIGN(ch_ctx->patch_ctx.mem.size) >> PAGE_SHIFT,
663 0, pgprot_writecombine(PAGE_KERNEL));
664
665 if (!ch_ctx->patch_ctx.mem.cpu_va)
666 return -ENOMEM; 653 return -ENOMEM;
667 654
668 return 0; 655 return 0;
@@ -677,8 +664,7 @@ int gr_gk20a_ctx_patch_write_end(struct gk20a *g,
677 return -EINVAL; 664 return -EINVAL;
678 } 665 }
679 666
680 vunmap(ch_ctx->patch_ctx.mem.cpu_va); 667 gk20a_mem_end(g, &ch_ctx->patch_ctx.mem);
681 ch_ctx->patch_ctx.mem.cpu_va = NULL;
682 return 0; 668 return 0;
683} 669}
684 670
@@ -687,7 +673,6 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g,
687 u32 addr, u32 data, bool patch) 673 u32 addr, u32 data, bool patch)
688{ 674{
689 u32 patch_slot = 0; 675 u32 patch_slot = 0;
690 void *patch_ptr = NULL;
691 bool mapped_here = false; 676 bool mapped_here = false;
692 677
693 BUG_ON(patch != 0 && ch_ctx == NULL); 678 BUG_ON(patch != 0 && ch_ctx == NULL);
@@ -708,11 +693,10 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g,
708 } else 693 } else
709 mapped_here = false; 694 mapped_here = false;
710 695
711 patch_ptr = ch_ctx->patch_ctx.mem.cpu_va;
712 patch_slot = ch_ctx->patch_ctx.data_count * 2; 696 patch_slot = ch_ctx->patch_ctx.data_count * 2;
713 697
714 gk20a_mem_wr32(patch_ptr, patch_slot++, addr); 698 gk20a_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot++, addr);
715 gk20a_mem_wr32(patch_ptr, patch_slot++, data); 699 gk20a_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot++, data);
716 700
717 ch_ctx->patch_ctx.data_count++; 701 ch_ctx->patch_ctx.data_count++;
718 702
@@ -760,16 +744,13 @@ static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
760static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) 744static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
761{ 745{
762 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 746 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
747 struct mem_desc *mem = &ch_ctx->gr_ctx->mem;
763 u32 va_lo, va_hi, va; 748 u32 va_lo, va_hi, va;
764 int ret = 0; 749 int ret = 0;
765 void *ctx_ptr = NULL;
766 750
767 gk20a_dbg_fn(""); 751 gk20a_dbg_fn("");
768 752
769 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 753 if (gk20a_mem_begin(g, mem))
770 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
771 0, pgprot_writecombine(PAGE_KERNEL));
772 if (!ctx_ptr)
773 return -ENOMEM; 754 return -ENOMEM;
774 755
775 if (ch_ctx->zcull_ctx.gpu_va == 0 && 756 if (ch_ctx->zcull_ctx.gpu_va == 0 &&
@@ -792,15 +773,17 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
792 goto clean_up; 773 goto clean_up;
793 } 774 }
794 775
795 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0, 776 gk20a_mem_wr(g, mem,
777 ctxsw_prog_main_image_zcull_o(),
796 ch_ctx->zcull_ctx.ctx_sw_mode); 778 ch_ctx->zcull_ctx.ctx_sw_mode);
797 779
798 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_ptr_o(), 0, va); 780 gk20a_mem_wr(g, mem,
781 ctxsw_prog_main_image_zcull_ptr_o(), va);
799 782
800 c->g->ops.fifo.enable_channel(c); 783 c->g->ops.fifo.enable_channel(c);
801 784
802clean_up: 785clean_up:
803 vunmap(ctx_ptr); 786 gk20a_mem_end(g, mem);
804 787
805 return ret; 788 return ret;
806} 789}
@@ -1500,8 +1483,8 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1500 u32 ctx_header_words; 1483 u32 ctx_header_words;
1501 u32 i; 1484 u32 i;
1502 u32 data; 1485 u32 data;
1503 void *ctx_ptr = NULL; 1486 struct mem_desc *gold_mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem;
1504 void *gold_ptr = NULL; 1487 struct mem_desc *gr_mem = &ch_ctx->gr_ctx->mem;
1505 u32 err = 0; 1488 u32 err = 0;
1506 1489
1507 gk20a_dbg_fn(""); 1490 gk20a_dbg_fn("");
@@ -1527,16 +1510,10 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1527 if (err) 1510 if (err)
1528 goto clean_up; 1511 goto clean_up;
1529 1512
1530 gold_ptr = vmap(gr->global_ctx_buffer[GOLDEN_CTX].mem.pages, 1513 if (gk20a_mem_begin(g, gold_mem))
1531 PAGE_ALIGN(gr->global_ctx_buffer[GOLDEN_CTX].mem.size) >>
1532 PAGE_SHIFT, 0, pgprot_writecombine(PAGE_KERNEL));
1533 if (!gold_ptr)
1534 goto clean_up; 1514 goto clean_up;
1535 1515
1536 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 1516 if (gk20a_mem_begin(g, gr_mem))
1537 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
1538 0, pgprot_writecombine(PAGE_KERNEL));
1539 if (!ctx_ptr)
1540 goto clean_up; 1517 goto clean_up;
1541 1518
1542 ctx_header_words = roundup(ctx_header_bytes, sizeof(u32)); 1519 ctx_header_words = roundup(ctx_header_bytes, sizeof(u32));
@@ -1545,14 +1522,14 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1545 g->ops.mm.l2_flush(g, true); 1522 g->ops.mm.l2_flush(g, true);
1546 1523
1547 for (i = 0; i < ctx_header_words; i++) { 1524 for (i = 0; i < ctx_header_words; i++) {
1548 data = gk20a_mem_rd32(ctx_ptr, i); 1525 data = gk20a_mem_rd32(g, gr_mem, i);
1549 gk20a_mem_wr32(gold_ptr, i, data); 1526 gk20a_mem_wr32(g, gold_mem, i, data);
1550 } 1527 }
1551 1528
1552 gk20a_mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_o(), 0, 1529 gk20a_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_o(),
1553 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v()); 1530 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());
1554 1531
1555 gk20a_mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_ptr_o(), 0, 0); 1532 gk20a_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_ptr_o(), 0);
1556 1533
1557 gr_gk20a_commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); 1534 gr_gk20a_commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);
1558 1535
@@ -1568,12 +1545,12 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1568 goto clean_up; 1545 goto clean_up;
1569 } 1546 }
1570 1547
1571 for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++) 1548 gk20a_mem_rd_n(g, gold_mem, 0,
1572 gr->ctx_vars.local_golden_image[i] = 1549 gr->ctx_vars.local_golden_image,
1573 gk20a_mem_rd32(gold_ptr, i); 1550 gr->ctx_vars.golden_image_size);
1574 } 1551 }
1575 1552
1576 gr_gk20a_commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); 1553 gr_gk20a_commit_inst(c, gr_mem->gpu_va);
1577 1554
1578 gr->ctx_vars.golden_image_initialized = true; 1555 gr->ctx_vars.golden_image_initialized = true;
1579 1556
@@ -1586,10 +1563,8 @@ clean_up:
1586 else 1563 else
1587 gk20a_dbg_fn("done"); 1564 gk20a_dbg_fn("done");
1588 1565
1589 if (gold_ptr) 1566 gk20a_mem_end(g, gold_mem);
1590 vunmap(gold_ptr); 1567 gk20a_mem_end(g, gr_mem);
1591 if (ctx_ptr)
1592 vunmap(ctx_ptr);
1593 1568
1594 mutex_unlock(&gr->ctx_mutex); 1569 mutex_unlock(&gr->ctx_mutex);
1595 return err; 1570 return err;
@@ -1600,7 +1575,7 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1600 bool enable_smpc_ctxsw) 1575 bool enable_smpc_ctxsw)
1601{ 1576{
1602 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 1577 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1603 void *ctx_ptr = NULL; 1578 struct mem_desc *mem;
1604 u32 data; 1579 u32 data;
1605 int ret; 1580 int ret;
1606 1581
@@ -1611,46 +1586,39 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1611 return -EFAULT; 1586 return -EFAULT;
1612 } 1587 }
1613 1588
1589 mem = &ch_ctx->gr_ctx->mem;
1590
1614 c->g->ops.fifo.disable_channel(c); 1591 c->g->ops.fifo.disable_channel(c);
1615 ret = c->g->ops.fifo.preempt_channel(c->g, c->hw_chid); 1592 ret = c->g->ops.fifo.preempt_channel(c->g, c->hw_chid);
1616 if (ret) { 1593 if (ret) {
1617 c->g->ops.fifo.enable_channel(c); 1594 gk20a_err(dev_from_gk20a(g), "failed to preempt channel");
1618 gk20a_err(dev_from_gk20a(g), 1595 goto out;
1619 "failed to preempt channel\n");
1620 return ret;
1621 } 1596 }
1622 1597
1623 /* Channel gr_ctx buffer is gpu cacheable. 1598 /* Channel gr_ctx buffer is gpu cacheable.
1624 Flush and invalidate before cpu update. */ 1599 Flush and invalidate before cpu update. */
1625 g->ops.mm.l2_flush(g, true); 1600 g->ops.mm.l2_flush(g, true);
1626 1601
1627 if (!ch_ctx->gr_ctx) { 1602 if (gk20a_mem_begin(g, mem)) {
1628 gk20a_err(dev_from_gk20a(g), "no graphics context allocated"); 1603 ret = -ENOMEM;
1629 return -EFAULT; 1604 goto out;
1630 }
1631
1632 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages,
1633 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
1634 0, pgprot_writecombine(PAGE_KERNEL));
1635 if (!ctx_ptr) {
1636 c->g->ops.fifo.enable_channel(c);
1637 return -ENOMEM;
1638 } 1605 }
1639 1606
1640 data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); 1607 data = gk20a_mem_rd(g, mem,
1608 ctxsw_prog_main_image_pm_o());
1641 data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m(); 1609 data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m();
1642 data |= enable_smpc_ctxsw ? 1610 data |= enable_smpc_ctxsw ?
1643 ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() : 1611 ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() :
1644 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(); 1612 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f();
1645 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, 1613 gk20a_mem_wr(g, mem,
1646 data); 1614 ctxsw_prog_main_image_pm_o(),
1615 data);
1647 1616
1648 vunmap(ctx_ptr); 1617 gk20a_mem_end(g, mem);
1649 1618
1650 /* enable channel */ 1619out:
1651 c->g->ops.fifo.enable_channel(c); 1620 c->g->ops.fifo.enable_channel(c);
1652 1621 return ret;
1653 return 0;
1654} 1622}
1655 1623
1656int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, 1624int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
@@ -1659,8 +1627,7 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1659{ 1627{
1660 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 1628 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1661 struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; 1629 struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx;
1662 void *ctx_ptr = NULL; 1630 struct mem_desc *gr_mem;
1663 void *pm_ctx_ptr;
1664 u32 data, virt_addr; 1631 u32 data, virt_addr;
1665 int ret; 1632 int ret;
1666 1633
@@ -1671,6 +1638,8 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1671 return -EFAULT; 1638 return -EFAULT;
1672 } 1639 }
1673 1640
1641 gr_mem = &ch_ctx->gr_ctx->mem;
1642
1674 if (enable_hwpm_ctxsw) { 1643 if (enable_hwpm_ctxsw) {
1675 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) 1644 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f())
1676 return 0; 1645 return 0;
@@ -1721,29 +1690,22 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1721 } 1690 }
1722 1691
1723 /* Now clear the buffer */ 1692 /* Now clear the buffer */
1724 pm_ctx_ptr = vmap(pm_ctx->mem.pages, 1693 if (gk20a_mem_begin(g, &pm_ctx->mem)) {
1725 PAGE_ALIGN(pm_ctx->mem.size) >> PAGE_SHIFT,
1726 0, pgprot_writecombine(PAGE_KERNEL));
1727
1728 if (!pm_ctx_ptr) {
1729 ret = -ENOMEM; 1694 ret = -ENOMEM;
1730 goto cleanup_pm_buf; 1695 goto cleanup_pm_buf;
1731 } 1696 }
1732 1697
1733 memset(pm_ctx_ptr, 0, pm_ctx->mem.size); 1698 gk20a_memset(g, &pm_ctx->mem, 0, 0, pm_ctx->mem.size);
1734 1699
1735 vunmap(pm_ctx_ptr); 1700 gk20a_mem_end(g, &pm_ctx->mem);
1736 } 1701 }
1737 1702
1738 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 1703 if (gk20a_mem_begin(g, gr_mem)) {
1739 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
1740 0, pgprot_writecombine(PAGE_KERNEL));
1741 if (!ctx_ptr) {
1742 ret = -ENOMEM; 1704 ret = -ENOMEM;
1743 goto cleanup_pm_buf; 1705 goto cleanup_pm_buf;
1744 } 1706 }
1745 1707
1746 data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); 1708 data = gk20a_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o());
1747 data = data & ~ctxsw_prog_main_image_pm_mode_m(); 1709 data = data & ~ctxsw_prog_main_image_pm_mode_m();
1748 1710
1749 if (enable_hwpm_ctxsw) { 1711 if (enable_hwpm_ctxsw) {
@@ -1760,10 +1722,10 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1760 1722
1761 data |= pm_ctx->pm_mode; 1723 data |= pm_ctx->pm_mode;
1762 1724
1763 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, data); 1725 gk20a_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_o(), data);
1764 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, virt_addr); 1726 gk20a_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
1765 1727
1766 vunmap(ctx_ptr); 1728 gk20a_mem_end(g, gr_mem);
1767 1729
1768 /* enable channel */ 1730 /* enable channel */
1769 c->g->ops.fifo.enable_channel(c); 1731 c->g->ops.fifo.enable_channel(c);
@@ -1788,9 +1750,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1788 u32 virt_addr_lo; 1750 u32 virt_addr_lo;
1789 u32 virt_addr_hi; 1751 u32 virt_addr_hi;
1790 u32 virt_addr = 0; 1752 u32 virt_addr = 0;
1791 u32 i, v, data; 1753 u32 v, data;
1792 int ret = 0; 1754 int ret = 0;
1793 void *ctx_ptr = NULL; 1755 struct mem_desc *mem = &ch_ctx->gr_ctx->mem;
1794 1756
1795 gk20a_dbg_fn(""); 1757 gk20a_dbg_fn("");
1796 1758
@@ -1801,20 +1763,18 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1801 Flush and invalidate before cpu update. */ 1763 Flush and invalidate before cpu update. */
1802 g->ops.mm.l2_flush(g, true); 1764 g->ops.mm.l2_flush(g, true);
1803 1765
1804 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 1766 if (gk20a_mem_begin(g, mem))
1805 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
1806 0, pgprot_writecombine(PAGE_KERNEL));
1807 if (!ctx_ptr)
1808 return -ENOMEM; 1767 return -ENOMEM;
1809 1768
1810 for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++) 1769 gk20a_mem_wr_n(g, mem, 0,
1811 gk20a_mem_wr32(ctx_ptr, i, gr->ctx_vars.local_golden_image[i]); 1770 gr->ctx_vars.local_golden_image,
1771 gr->ctx_vars.golden_image_size);
1812 1772
1813 if (g->ops.gr.enable_cde_in_fecs && c->cde) 1773 if (g->ops.gr.enable_cde_in_fecs && c->cde)
1814 g->ops.gr.enable_cde_in_fecs(ctx_ptr); 1774 g->ops.gr.enable_cde_in_fecs(g, mem);
1815 1775
1816 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0); 1776 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_num_save_ops_o(), 0);
1817 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0); 1777 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_num_restore_ops_o(), 0);
1818 1778
1819 /* set priv access map */ 1779 /* set priv access map */
1820 virt_addr_lo = 1780 virt_addr_lo =
@@ -1827,29 +1787,29 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1827 else 1787 else
1828 data = ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f(); 1788 data = ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f();
1829 1789
1830 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_config_o(), 0, 1790 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(),
1831 data); 1791 data);
1832 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_addr_lo_o(), 0, 1792 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
1833 virt_addr_lo); 1793 virt_addr_lo);
1834 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_addr_hi_o(), 0, 1794 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
1835 virt_addr_hi); 1795 virt_addr_hi);
1836 /* disable verif features */ 1796 /* disable verif features */
1837 v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0); 1797 v = gk20a_mem_rd(g, mem, ctxsw_prog_main_image_misc_options_o());
1838 v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m()); 1798 v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m());
1839 v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f(); 1799 v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f();
1840 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0, v); 1800 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v);
1841 1801
1842 if (g->ops.gr.update_ctxsw_preemption_mode) 1802 if (g->ops.gr.update_ctxsw_preemption_mode)
1843 g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, ctx_ptr); 1803 g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, mem);
1844 1804
1845 virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); 1805 virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va);
1846 virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); 1806 virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va);
1847 1807
1848 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_count_o(), 0, 1808 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(),
1849 ch_ctx->patch_ctx.data_count); 1809 ch_ctx->patch_ctx.data_count);
1850 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_lo_o(), 0, 1810 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_lo_o(),
1851 virt_addr_lo); 1811 virt_addr_lo);
1852 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_hi_o(), 0, 1812 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_hi_o(),
1853 virt_addr_hi); 1813 virt_addr_hi);
1854 1814
1855 /* Update main header region of the context buffer with the info needed 1815 /* Update main header region of the context buffer with the info needed
@@ -1860,7 +1820,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1860 if (ch_ctx->pm_ctx.mem.gpu_va == 0) { 1820 if (ch_ctx->pm_ctx.mem.gpu_va == 0) {
1861 gk20a_err(dev_from_gk20a(g), 1821 gk20a_err(dev_from_gk20a(g),
1862 "context switched pm with no pm buffer!"); 1822 "context switched pm with no pm buffer!");
1863 vunmap(ctx_ptr); 1823 gk20a_mem_end(g, mem);
1864 return -EFAULT; 1824 return -EFAULT;
1865 } 1825 }
1866 1826
@@ -1871,14 +1831,14 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1871 } else 1831 } else
1872 virt_addr = 0; 1832 virt_addr = 0;
1873 1833
1874 data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); 1834 data = gk20a_mem_rd(g, mem, ctxsw_prog_main_image_pm_o());
1875 data = data & ~ctxsw_prog_main_image_pm_mode_m(); 1835 data = data & ~ctxsw_prog_main_image_pm_mode_m();
1876 data |= ch_ctx->pm_ctx.pm_mode; 1836 data |= ch_ctx->pm_ctx.pm_mode;
1877 1837
1878 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, data); 1838 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data);
1879 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, virt_addr); 1839 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
1880 1840
1881 vunmap(ctx_ptr); 1841 gk20a_mem_end(g, mem);
1882 1842
1883 if (tegra_platform_is_linsim()) { 1843 if (tegra_platform_is_linsim()) {
1884 u32 inst_base_ptr = 1844 u32 inst_base_ptr =
@@ -1978,16 +1938,20 @@ static void gr_gk20a_init_ctxsw_ucode_segments(
1978} 1938}
1979 1939
1980static int gr_gk20a_copy_ctxsw_ucode_segments( 1940static int gr_gk20a_copy_ctxsw_ucode_segments(
1981 u8 *buf, 1941 struct gk20a *g,
1942 struct mem_desc *dst,
1982 struct gk20a_ctxsw_ucode_segments *segments, 1943 struct gk20a_ctxsw_ucode_segments *segments,
1983 u32 *bootimage, 1944 u32 *bootimage,
1984 u32 *code, u32 *data) 1945 u32 *code, u32 *data)
1985{ 1946{
1986 int i; 1947 int i;
1987 1948
1988 memcpy(buf + segments->boot.offset, bootimage, segments->boot.size); 1949 gk20a_mem_wr_n(g, dst, segments->boot.offset, bootimage,
1989 memcpy(buf + segments->code.offset, code, segments->code.size); 1950 segments->boot.size);
1990 memcpy(buf + segments->data.offset, data, segments->data.size); 1951 gk20a_mem_wr_n(g, dst, segments->code.offset, code,
1952 segments->code.size);
1953 gk20a_mem_wr_n(g, dst, segments->data.offset, data,
1954 segments->data.size);
1991 1955
1992 /* compute a "checksum" for the boot binary to detect its version */ 1956 /* compute a "checksum" for the boot binary to detect its version */
1993 segments->boot_signature = 0; 1957 segments->boot_signature = 0;
@@ -2009,7 +1973,6 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
2009 u32 *fecs_boot_image; 1973 u32 *fecs_boot_image;
2010 u32 *gpccs_boot_image; 1974 u32 *gpccs_boot_image;
2011 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; 1975 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
2012 u8 *buf;
2013 u32 ucode_size; 1976 u32 ucode_size;
2014 int err = 0; 1977 int err = 0;
2015 1978
@@ -2049,14 +2012,8 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
2049 if (err) 2012 if (err)
2050 goto clean_up; 2013 goto clean_up;
2051 2014
2052 buf = (u8 *)ucode_info->surface_desc.cpu_va; 2015 gr_gk20a_copy_ctxsw_ucode_segments(g, &ucode_info->surface_desc,
2053 if (!buf) { 2016 &ucode_info->fecs,
2054 gk20a_err(d, "failed to map surface desc buffer");
2055 err = -ENOMEM;
2056 goto clean_up;
2057 }
2058
2059 gr_gk20a_copy_ctxsw_ucode_segments(buf, &ucode_info->fecs,
2060 fecs_boot_image, 2017 fecs_boot_image,
2061 g->gr.ctx_vars.ucode.fecs.inst.l, 2018 g->gr.ctx_vars.ucode.fecs.inst.l,
2062 g->gr.ctx_vars.ucode.fecs.data.l); 2019 g->gr.ctx_vars.ucode.fecs.data.l);
@@ -2064,7 +2021,8 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
2064 release_firmware(fecs_fw); 2021 release_firmware(fecs_fw);
2065 fecs_fw = NULL; 2022 fecs_fw = NULL;
2066 2023
2067 gr_gk20a_copy_ctxsw_ucode_segments(buf, &ucode_info->gpccs, 2024 gr_gk20a_copy_ctxsw_ucode_segments(g, &ucode_info->surface_desc,
2025 &ucode_info->gpccs,
2068 gpccs_boot_image, 2026 gpccs_boot_image,
2069 g->gr.ctx_vars.ucode.gpccs.inst.l, 2027 g->gr.ctx_vars.ucode.gpccs.inst.l,
2070 g->gr.ctx_vars.ucode.gpccs.data.l); 2028 g->gr.ctx_vars.ucode.gpccs.data.l);
@@ -4690,41 +4648,38 @@ out:
4690static int gr_gk20a_init_access_map(struct gk20a *g) 4648static int gr_gk20a_init_access_map(struct gk20a *g)
4691{ 4649{
4692 struct gr_gk20a *gr = &g->gr; 4650 struct gr_gk20a *gr = &g->gr;
4693 void *data; 4651 struct mem_desc *mem = &gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem;
4694 int err = 0;
4695 u32 w, nr_pages = 4652 u32 w, nr_pages =
4696 DIV_ROUND_UP(gr->ctx_vars.priv_access_map_size, 4653 DIV_ROUND_UP(gr->ctx_vars.priv_access_map_size,
4697 PAGE_SIZE); 4654 PAGE_SIZE);
4698 u32 *whitelist = NULL; 4655 u32 *whitelist = NULL;
4699 int num_entries = 0; 4656 int num_entries = 0;
4700 4657
4701 data = vmap(gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.pages, 4658 if (gk20a_mem_begin(g, mem)) {
4702 PAGE_ALIGN(gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.size) >>
4703 PAGE_SHIFT, 0, pgprot_writecombine(PAGE_KERNEL));
4704 if (!data) {
4705 gk20a_err(dev_from_gk20a(g), 4659 gk20a_err(dev_from_gk20a(g),
4706 "failed to map priv access map memory"); 4660 "failed to map priv access map memory");
4707 err = -ENOMEM; 4661 return -ENOMEM;
4708 goto clean_up;
4709 } 4662 }
4710 4663
4711 memset(data, 0x0, PAGE_SIZE * nr_pages); 4664 gk20a_memset(g, mem, 0, 0, PAGE_SIZE * nr_pages);
4712 4665
4713 g->ops.gr.get_access_map(g, &whitelist, &num_entries); 4666 g->ops.gr.get_access_map(g, &whitelist, &num_entries);
4714 4667
4715 for (w = 0; w < num_entries; w++) { 4668 for (w = 0; w < num_entries; w++) {
4716 u32 map_bit, map_byte, map_shift; 4669 u32 map_bit, map_byte, map_shift, x;
4717 map_bit = whitelist[w] >> 2; 4670 map_bit = whitelist[w] >> 2;
4718 map_byte = map_bit >> 3; 4671 map_byte = map_bit >> 3;
4719 map_shift = map_bit & 0x7; /* i.e. 0-7 */ 4672 map_shift = map_bit & 0x7; /* i.e. 0-7 */
4720 gk20a_dbg_info("access map addr:0x%x byte:0x%x bit:%d", 4673 gk20a_dbg_info("access map addr:0x%x byte:0x%x bit:%d",
4721 whitelist[w], map_byte, map_shift); 4674 whitelist[w], map_byte, map_shift);
4722 ((u8 *)data)[map_byte] |= 1 << map_shift; 4675 x = gk20a_mem_rd32(g, mem, map_byte / sizeof(u32));
4676 x |= 1 << (
4677 (map_byte % sizeof(u32) * BITS_PER_BYTE)
4678 + map_shift);
4679 gk20a_mem_wr32(g, mem, map_byte / sizeof(u32), x);
4723 } 4680 }
4724 4681
4725clean_up: 4682 gk20a_mem_end(g, mem);
4726 if (data)
4727 vunmap(data);
4728 return 0; 4683 return 0;
4729} 4684}
4730 4685
@@ -6659,7 +6614,7 @@ static void gr_gk20a_init_sm_dsm_reg_info(void)
6659static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, 6614static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6660 struct channel_ctx_gk20a *ch_ctx, 6615 struct channel_ctx_gk20a *ch_ctx,
6661 u32 addr, u32 data, 6616 u32 addr, u32 data,
6662 u8 *context) 6617 struct mem_desc *mem)
6663{ 6618{
6664 u32 num_gpc = g->gr.gpc_count; 6619 u32 num_gpc = g->gr.gpc_count;
6665 u32 num_tpc; 6620 u32 num_tpc;
@@ -6688,8 +6643,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6688 /* reset the patch count from previous 6643 /* reset the patch count from previous
6689 runs,if ucode has already processed 6644 runs,if ucode has already processed
6690 it */ 6645 it */
6691 tmp = gk20a_mem_rd32(context + 6646 tmp = gk20a_mem_rd(g, mem,
6692 ctxsw_prog_main_image_patch_count_o(), 0); 6647 ctxsw_prog_main_image_patch_count_o());
6693 6648
6694 if (!tmp) 6649 if (!tmp)
6695 ch_ctx->patch_ctx.data_count = 0; 6650 ch_ctx->patch_ctx.data_count = 0;
@@ -6700,15 +6655,15 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6700 vaddr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); 6655 vaddr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va);
6701 vaddr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); 6656 vaddr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va);
6702 6657
6703 gk20a_mem_wr32(context + 6658 gk20a_mem_wr(g, mem,
6704 ctxsw_prog_main_image_patch_count_o(), 6659 ctxsw_prog_main_image_patch_count_o(),
6705 0, ch_ctx->patch_ctx.data_count); 6660 ch_ctx->patch_ctx.data_count);
6706 gk20a_mem_wr32(context + 6661 gk20a_mem_wr(g, mem,
6707 ctxsw_prog_main_image_patch_adr_lo_o(), 6662 ctxsw_prog_main_image_patch_adr_lo_o(),
6708 0, vaddr_lo); 6663 vaddr_lo);
6709 gk20a_mem_wr32(context + 6664 gk20a_mem_wr(g, mem,
6710 ctxsw_prog_main_image_patch_adr_hi_o(), 6665 ctxsw_prog_main_image_patch_adr_hi_o(),
6711 0, vaddr_hi); 6666 vaddr_hi);
6712 6667
6713 /* we're not caching these on cpu side, 6668 /* we're not caching these on cpu side,
6714 but later watch for it */ 6669 but later watch for it */
@@ -6760,17 +6715,15 @@ static void gr_gk20a_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset)
6760 6715
6761#define ILLEGAL_ID (~0) 6716#define ILLEGAL_ID (~0)
6762 6717
6763static inline bool check_main_image_header_magic(void *context) 6718static inline bool check_main_image_header_magic(u8 *context)
6764{ 6719{
6765 u32 magic = gk20a_mem_rd32(context + 6720 u32 magic = *(u32 *)(context + ctxsw_prog_main_image_magic_value_o());
6766 ctxsw_prog_main_image_magic_value_o(), 0);
6767 gk20a_dbg(gpu_dbg_gpu_dbg, "main image magic=0x%x", magic); 6721 gk20a_dbg(gpu_dbg_gpu_dbg, "main image magic=0x%x", magic);
6768 return magic == ctxsw_prog_main_image_magic_value_v_value_v(); 6722 return magic == ctxsw_prog_main_image_magic_value_v_value_v();
6769} 6723}
6770static inline bool check_local_header_magic(void *context) 6724static inline bool check_local_header_magic(u8 *context)
6771{ 6725{
6772 u32 magic = gk20a_mem_rd32(context + 6726 u32 magic = *(u32 *)(context + ctxsw_prog_local_magic_value_o());
6773 ctxsw_prog_local_magic_value_o(), 0);
6774 gk20a_dbg(gpu_dbg_gpu_dbg, "local magic=0x%x", magic); 6727 gk20a_dbg(gpu_dbg_gpu_dbg, "local magic=0x%x", magic);
6775 return magic == ctxsw_prog_local_magic_value_v_value_v(); 6728 return magic == ctxsw_prog_local_magic_value_v_value_v();
6776 6729
@@ -6814,7 +6767,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
6814 u32 num_gpcs, num_tpcs; 6767 u32 num_gpcs, num_tpcs;
6815 u32 chk_addr; 6768 u32 chk_addr;
6816 u32 ext_priv_offset, ext_priv_size; 6769 u32 ext_priv_offset, ext_priv_size;
6817 void *context; 6770 u8 *context;
6818 u32 offset_to_segment, offset_to_segment_end; 6771 u32 offset_to_segment, offset_to_segment_end;
6819 u32 sm_dsm_perf_reg_id = ILLEGAL_ID; 6772 u32 sm_dsm_perf_reg_id = ILLEGAL_ID;
6820 u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID; 6773 u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID;
@@ -6856,14 +6809,14 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
6856 /* note below is in words/num_registers */ 6809 /* note below is in words/num_registers */
6857 marker_size = ctxsw_prog_extended_marker_size_in_bytes_v() >> 2; 6810 marker_size = ctxsw_prog_extended_marker_size_in_bytes_v() >> 2;
6858 6811
6859 context = context_buffer; 6812 context = (u8 *)context_buffer;
6860 /* sanity check main header */ 6813 /* sanity check main header */
6861 if (!check_main_image_header_magic(context)) { 6814 if (!check_main_image_header_magic(context)) {
6862 gk20a_err(dev_from_gk20a(g), 6815 gk20a_err(dev_from_gk20a(g),
6863 "Invalid main header: magic value"); 6816 "Invalid main header: magic value");
6864 return -EINVAL; 6817 return -EINVAL;
6865 } 6818 }
6866 num_gpcs = gk20a_mem_rd32(context + ctxsw_prog_main_image_num_gpcs_o(), 0); 6819 num_gpcs = *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o());
6867 if (gpc_num >= num_gpcs) { 6820 if (gpc_num >= num_gpcs) {
6868 gk20a_err(dev_from_gk20a(g), 6821 gk20a_err(dev_from_gk20a(g),
6869 "GPC 0x%08x is greater than total count 0x%08x!\n", 6822 "GPC 0x%08x is greater than total count 0x%08x!\n",
@@ -6871,7 +6824,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
6871 return -EINVAL; 6824 return -EINVAL;
6872 } 6825 }
6873 6826
6874 data32 = gk20a_mem_rd32(context + ctxsw_prog_main_extended_buffer_ctl_o(), 0); 6827 data32 = *(u32 *)(context + ctxsw_prog_main_extended_buffer_ctl_o());
6875 ext_priv_size = ctxsw_prog_main_extended_buffer_ctl_size_v(data32); 6828 ext_priv_size = ctxsw_prog_main_extended_buffer_ctl_size_v(data32);
6876 if (0 == ext_priv_size) { 6829 if (0 == ext_priv_size) {
6877 gk20a_dbg_info(" No extended memory in context buffer"); 6830 gk20a_dbg_info(" No extended memory in context buffer");
@@ -7149,7 +7102,7 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g,
7149} 7102}
7150 7103
7151static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, 7104static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
7152 void *context, 7105 u8 *context,
7153 u32 *num_ppcs, u32 *ppc_mask, 7106 u32 *num_ppcs, u32 *ppc_mask,
7154 u32 *reg_ppc_count) 7107 u32 *reg_ppc_count)
7155{ 7108{
@@ -7165,7 +7118,7 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
7165 (num_pes_per_gpc > 1))) 7118 (num_pes_per_gpc > 1)))
7166 return -EINVAL; 7119 return -EINVAL;
7167 7120
7168 data32 = gk20a_mem_rd32(context + ctxsw_prog_local_image_ppc_info_o(), 0); 7121 data32 = *(u32 *)(context + ctxsw_prog_local_image_ppc_info_o());
7169 7122
7170 *num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data32); 7123 *num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data32);
7171 *ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data32); 7124 *ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data32);
@@ -7177,7 +7130,7 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
7177 7130
7178/* 7131/*
7179 * This function will return the 32 bit offset for a priv register if it is 7132 * This function will return the 32 bit offset for a priv register if it is
7180 * present in the context buffer. 7133 * present in the context buffer. The context buffer is in CPU memory.
7181 */ 7134 */
7182static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, 7135static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7183 u32 addr, 7136 u32 addr,
@@ -7196,7 +7149,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7196 u32 offset; 7149 u32 offset;
7197 u32 sys_priv_offset, gpc_priv_offset; 7150 u32 sys_priv_offset, gpc_priv_offset;
7198 u32 ppc_mask, reg_list_ppc_count; 7151 u32 ppc_mask, reg_list_ppc_count;
7199 void *context; 7152 u8 *context;
7200 u32 offset_to_segment; 7153 u32 offset_to_segment;
7201 7154
7202 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); 7155 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
@@ -7207,13 +7160,13 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7207 if (err) 7160 if (err)
7208 return err; 7161 return err;
7209 7162
7210 context = context_buffer; 7163 context = (u8 *)context_buffer;
7211 if (!check_main_image_header_magic(context)) { 7164 if (!check_main_image_header_magic(context)) {
7212 gk20a_err(dev_from_gk20a(g), 7165 gk20a_err(dev_from_gk20a(g),
7213 "Invalid main header: magic value"); 7166 "Invalid main header: magic value");
7214 return -EINVAL; 7167 return -EINVAL;
7215 } 7168 }
7216 num_gpcs = gk20a_mem_rd32(context + ctxsw_prog_main_image_num_gpcs_o(), 0); 7169 num_gpcs = *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o());
7217 7170
7218 /* Parse the FECS local header. */ 7171 /* Parse the FECS local header. */
7219 context += ctxsw_prog_ucode_header_size_in_bytes(); 7172 context += ctxsw_prog_ucode_header_size_in_bytes();
@@ -7222,7 +7175,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7222 "Invalid FECS local header: magic value\n"); 7175 "Invalid FECS local header: magic value\n");
7223 return -EINVAL; 7176 return -EINVAL;
7224 } 7177 }
7225 data32 = gk20a_mem_rd32(context + ctxsw_prog_local_priv_register_ctl_o(), 0); 7178 data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o());
7226 sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); 7179 sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
7227 7180
7228 /* If found in Ext buffer, ok. 7181 /* If found in Ext buffer, ok.
@@ -7268,7 +7221,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7268 return -EINVAL; 7221 return -EINVAL;
7269 7222
7270 } 7223 }
7271 data32 = gk20a_mem_rd32(context + ctxsw_prog_local_priv_register_ctl_o(), 0); 7224 data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o());
7272 gpc_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); 7225 gpc_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
7273 7226
7274 err = gr_gk20a_determine_ppc_configuration(g, context, 7227 err = gr_gk20a_determine_ppc_configuration(g, context,
@@ -7277,7 +7230,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7277 if (err) 7230 if (err)
7278 return err; 7231 return err;
7279 7232
7280 num_tpcs = gk20a_mem_rd32(context + ctxsw_prog_local_image_num_tpcs_o(), 0); 7233 num_tpcs = *(u32 *)(context + ctxsw_prog_local_image_num_tpcs_o());
7281 7234
7282 if ((i == gpc_num) && ((tpc_num + 1) > num_tpcs)) { 7235 if ((i == gpc_num) && ((tpc_num + 1) > num_tpcs)) {
7283 gk20a_err(dev_from_gk20a(g), 7236 gk20a_err(dev_from_gk20a(g),
@@ -7689,9 +7642,9 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7689{ 7642{
7690 struct gk20a *g = ch->g; 7643 struct gk20a *g = ch->g;
7691 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; 7644 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
7692 void *ctx_ptr = NULL; 7645 bool gr_ctx_ready = false;
7693 void *pm_ctx_ptr = NULL; 7646 bool pm_ctx_ready = false;
7694 void *base_ptr = NULL; 7647 struct mem_desc *current_mem = NULL;
7695 bool ch_is_curr_ctx, restart_gr_ctxsw = false; 7648 bool ch_is_curr_ctx, restart_gr_ctxsw = false;
7696 u32 i, j, offset, v; 7649 u32 i, j, offset, v;
7697 struct gr_gk20a *gr = &g->gr; 7650 struct gr_gk20a *gr = &g->gr;
@@ -7821,20 +7774,18 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7821 ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD), 7774 ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD),
7822 ctx_ops[i].quad); 7775 ctx_ops[i].quad);
7823 if (!err) { 7776 if (!err) {
7824 if (!ctx_ptr) { 7777 if (!gr_ctx_ready) {
7825 /* would have been a variant of 7778 /* would have been a variant of
7826 * gr_gk20a_apply_instmem_overrides, 7779 * gr_gk20a_apply_instmem_overrides,
7827 * recoded in-place instead. 7780 * recoded in-place instead.
7828 */ 7781 */
7829 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 7782 if (gk20a_mem_begin(g, &ch_ctx->gr_ctx->mem)) {
7830 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
7831 0, pgprot_writecombine(PAGE_KERNEL));
7832 if (!ctx_ptr) {
7833 err = -ENOMEM; 7783 err = -ENOMEM;
7834 goto cleanup; 7784 goto cleanup;
7835 } 7785 }
7786 gr_ctx_ready = true;
7836 } 7787 }
7837 base_ptr = ctx_ptr; 7788 current_mem = &ch_ctx->gr_ctx->mem;
7838 } else { 7789 } else {
7839 err = gr_gk20a_get_pm_ctx_buffer_offsets(g, 7790 err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
7840 ctx_ops[i].offset, 7791 ctx_ops[i].offset,
@@ -7849,7 +7800,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7849 NVGPU_DBG_GPU_REG_OP_STATUS_INVALID_OFFSET; 7800 NVGPU_DBG_GPU_REG_OP_STATUS_INVALID_OFFSET;
7850 continue; 7801 continue;
7851 } 7802 }
7852 if (!pm_ctx_ptr) { 7803 if (!pm_ctx_ready) {
7853 /* Make sure ctx buffer was initialized */ 7804 /* Make sure ctx buffer was initialized */
7854 if (!ch_ctx->pm_ctx.mem.pages) { 7805 if (!ch_ctx->pm_ctx.mem.pages) {
7855 gk20a_err(dev_from_gk20a(g), 7806 gk20a_err(dev_from_gk20a(g),
@@ -7857,15 +7808,13 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7857 err = -EINVAL; 7808 err = -EINVAL;
7858 goto cleanup; 7809 goto cleanup;
7859 } 7810 }
7860 pm_ctx_ptr = vmap(ch_ctx->pm_ctx.mem.pages, 7811 if (gk20a_mem_begin(g, &ch_ctx->pm_ctx.mem)) {
7861 PAGE_ALIGN(ch_ctx->pm_ctx.mem.size) >> PAGE_SHIFT,
7862 0, pgprot_writecombine(PAGE_KERNEL));
7863 if (!pm_ctx_ptr) {
7864 err = -ENOMEM; 7812 err = -ENOMEM;
7865 goto cleanup; 7813 goto cleanup;
7866 } 7814 }
7815 pm_ctx_ready = true;
7867 } 7816 }
7868 base_ptr = pm_ctx_ptr; 7817 current_mem = &ch_ctx->pm_ctx.mem;
7869 } 7818 }
7870 7819
7871 /* if this is a quad access, setup for special access*/ 7820 /* if this is a quad access, setup for special access*/
@@ -7878,24 +7827,24 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7878 /* sanity check gr ctxt offsets, 7827 /* sanity check gr ctxt offsets,
7879 * don't write outside, worst case 7828 * don't write outside, worst case
7880 */ 7829 */
7881 if ((base_ptr == ctx_ptr) && 7830 if ((current_mem == &ch_ctx->gr_ctx->mem) &&
7882 (offsets[j] >= g->gr.ctx_vars.golden_image_size)) 7831 (offsets[j] >= g->gr.ctx_vars.golden_image_size))
7883 continue; 7832 continue;
7884 if (pass == 0) { /* write pass */ 7833 if (pass == 0) { /* write pass */
7885 v = gk20a_mem_rd32(base_ptr + offsets[j], 0); 7834 v = gk20a_mem_rd(g, current_mem, offsets[j]);
7886 v &= ~ctx_ops[i].and_n_mask_lo; 7835 v &= ~ctx_ops[i].and_n_mask_lo;
7887 v |= ctx_ops[i].value_lo; 7836 v |= ctx_ops[i].value_lo;
7888 gk20a_mem_wr32(base_ptr + offsets[j], 0, v); 7837 gk20a_mem_wr(g, current_mem, offsets[j], v);
7889 7838
7890 gk20a_dbg(gpu_dbg_gpu_dbg, 7839 gk20a_dbg(gpu_dbg_gpu_dbg,
7891 "context wr: offset=0x%x v=0x%x", 7840 "context wr: offset=0x%x v=0x%x",
7892 offsets[j], v); 7841 offsets[j], v);
7893 7842
7894 if (ctx_ops[i].op == REGOP(WRITE_64)) { 7843 if (ctx_ops[i].op == REGOP(WRITE_64)) {
7895 v = gk20a_mem_rd32(base_ptr + offsets[j] + 4, 0); 7844 v = gk20a_mem_rd(g, current_mem, offsets[j] + 4);
7896 v &= ~ctx_ops[i].and_n_mask_hi; 7845 v &= ~ctx_ops[i].and_n_mask_hi;
7897 v |= ctx_ops[i].value_hi; 7846 v |= ctx_ops[i].value_hi;
7898 gk20a_mem_wr32(base_ptr + offsets[j] + 4, 0, v); 7847 gk20a_mem_wr(g, current_mem, offsets[j] + 4, v);
7899 7848
7900 gk20a_dbg(gpu_dbg_gpu_dbg, 7849 gk20a_dbg(gpu_dbg_gpu_dbg,
7901 "context wr: offset=0x%x v=0x%x", 7850 "context wr: offset=0x%x v=0x%x",
@@ -7905,18 +7854,18 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7905 /* check to see if we need to add a special WAR 7854 /* check to see if we need to add a special WAR
7906 for some of the SMPC perf regs */ 7855 for some of the SMPC perf regs */
7907 gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j], 7856 gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j],
7908 v, base_ptr); 7857 v, current_mem);
7909 7858
7910 } else { /* read pass */ 7859 } else { /* read pass */
7911 ctx_ops[i].value_lo = 7860 ctx_ops[i].value_lo =
7912 gk20a_mem_rd32(base_ptr + offsets[0], 0); 7861 gk20a_mem_rd(g, current_mem, offsets[0]);
7913 7862
7914 gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x", 7863 gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x",
7915 offsets[0], ctx_ops[i].value_lo); 7864 offsets[0], ctx_ops[i].value_lo);
7916 7865
7917 if (ctx_ops[i].op == REGOP(READ_64)) { 7866 if (ctx_ops[i].op == REGOP(READ_64)) {
7918 ctx_ops[i].value_hi = 7867 ctx_ops[i].value_hi =
7919 gk20a_mem_rd32(base_ptr + offsets[0] + 4, 0); 7868 gk20a_mem_rd(g, current_mem, offsets[0] + 4);
7920 7869
7921 gk20a_dbg(gpu_dbg_gpu_dbg, 7870 gk20a_dbg(gpu_dbg_gpu_dbg,
7922 "context rd: offset=0x%x v=0x%x", 7871 "context rd: offset=0x%x v=0x%x",
@@ -7943,12 +7892,10 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7943 7892
7944 if (ch_ctx->patch_ctx.mem.cpu_va) 7893 if (ch_ctx->patch_ctx.mem.cpu_va)
7945 gr_gk20a_ctx_patch_write_end(g, ch_ctx); 7894 gr_gk20a_ctx_patch_write_end(g, ch_ctx);
7946 7895 if (gr_ctx_ready)
7947 if (ctx_ptr) 7896 gk20a_mem_end(g, &ch_ctx->gr_ctx->mem);
7948 vunmap(ctx_ptr); 7897 if (pm_ctx_ready)
7949 7898 gk20a_mem_end(g, &ch_ctx->pm_ctx.mem);
7950 if (pm_ctx_ptr)
7951 vunmap(pm_ctx_ptr);
7952 7899
7953 if (restart_gr_ctxsw) { 7900 if (restart_gr_ctxsw) {
7954 int tmp_err = gr_gk20a_enable_ctxsw(g); 7901 int tmp_err = gr_gk20a_enable_ctxsw(g);
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 6f6734b4..13382416 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -44,6 +44,112 @@
44#include "kind_gk20a.h" 44#include "kind_gk20a.h"
45#include "semaphore_gk20a.h" 45#include "semaphore_gk20a.h"
46 46
47int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem)
48{
49 void *cpu_va;
50
51 if (WARN_ON(mem->cpu_va)) {
52 gk20a_warn(dev_from_gk20a(g), "nested %s", __func__);
53 return -EBUSY;
54 }
55
56 cpu_va = vmap(mem->pages,
57 PAGE_ALIGN(mem->size) >> PAGE_SHIFT,
58 0, pgprot_writecombine(PAGE_KERNEL));
59
60 if (WARN_ON(!cpu_va))
61 return -ENOMEM;
62
63 mem->cpu_va = cpu_va;
64 return 0;
65}
66
67void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem)
68{
69 vunmap(mem->cpu_va);
70 mem->cpu_va = NULL;
71}
72
73u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w)
74{
75 u32 *ptr = mem->cpu_va;
76 u32 data;
77
78 WARN_ON(!ptr);
79 data = ptr[w];
80#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
81 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
82#endif
83 return data;
84}
85
86u32 gk20a_mem_rd(struct gk20a *g, struct mem_desc *mem, u32 offset)
87{
88 WARN_ON(offset & 3);
89 return gk20a_mem_rd32(g, mem, offset / sizeof(u32));
90}
91
92void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem,
93 u32 offset, void *dest, u32 size)
94{
95 u32 i;
96 u32 *dest_u32 = dest;
97
98 WARN_ON(offset & 3);
99 WARN_ON(size & 3);
100 offset /= sizeof(u32);
101 size /= sizeof(u32);
102
103 for (i = 0; i < size; i++)
104 dest_u32[i] = gk20a_mem_rd32(g, mem, offset + i);
105}
106
107void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data)
108{
109 u32 *ptr = mem->cpu_va;
110
111 WARN_ON(!ptr);
112#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
113 gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
114#endif
115 ptr[w] = data;
116}
117
118void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data)
119{
120 WARN_ON(offset & 3);
121 gk20a_mem_wr32(g, mem, offset / sizeof(u32), data);
122}
123
124void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset,
125 void *src, u32 size)
126{
127 u32 i;
128 u32 *src_u32 = src;
129
130 WARN_ON(offset & 3);
131 WARN_ON(size & 3);
132 offset /= sizeof(u32);
133 size /= sizeof(u32);
134
135 for (i = 0; i < size; i++)
136 gk20a_mem_wr32(g, mem, offset + i, src_u32[i]);
137}
138
139void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset,
140 u32 value, u32 size)
141{
142 u32 i;
143
144 WARN_ON(offset & 3);
145 WARN_ON(size & 3);
146 offset /= sizeof(u32);
147 size /= sizeof(u32);
148
149 for (i = 0; i < size; i++)
150 gk20a_mem_wr32(g, mem, offset + i, value);
151}
152
47/* 153/*
48 * GPU mapping life cycle 154 * GPU mapping life cycle
49 * ====================== 155 * ======================
@@ -780,9 +886,14 @@ void pde_range_from_vaddr_range(struct vm_gk20a *vm,
780 *pde_lo, *pde_hi); 886 *pde_lo, *pde_hi);
781} 887}
782 888
783u32 *pde_from_index(struct vm_gk20a *vm, u32 i) 889static u32 pde_from_index(u32 i)
890{
891 return i * gmmu_pde__size_v() / sizeof(u32);
892}
893
894static u32 pte_from_index(u32 i)
784{ 895{
785 return (u32 *) (((u8 *)vm->pdb.mem.cpu_va) + i*gmmu_pde__size_v()); 896 return i * gmmu_pte__size_v() / sizeof(u32);
786} 897}
787 898
788u32 pte_index_from_vaddr(struct vm_gk20a *vm, 899u32 pte_index_from_vaddr(struct vm_gk20a *vm,
@@ -2323,7 +2434,7 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm,
2323 u64 pte_addr_small = 0, pte_addr_big = 0; 2434 u64 pte_addr_small = 0, pte_addr_big = 0;
2324 struct gk20a_mm_entry *entry = vm->pdb.entries + i; 2435 struct gk20a_mm_entry *entry = vm->pdb.entries + i;
2325 u32 pde_v[2] = {0, 0}; 2436 u32 pde_v[2] = {0, 0};
2326 u32 *pde; 2437 u32 pde;
2327 2438
2328 gk20a_dbg_fn(""); 2439 gk20a_dbg_fn("");
2329 2440
@@ -2348,10 +2459,10 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm,
2348 (big_valid ? (gmmu_pde_vol_big_true_f()) : 2459 (big_valid ? (gmmu_pde_vol_big_true_f()) :
2349 gmmu_pde_vol_big_false_f()); 2460 gmmu_pde_vol_big_false_f());
2350 2461
2351 pde = pde_from_index(vm, i); 2462 pde = pde_from_index(i);
2352 2463
2353 gk20a_mem_wr32(pde, 0, pde_v[0]); 2464 gk20a_mem_wr32(g, &vm->pdb.mem, pde + 0, pde_v[0]);
2354 gk20a_mem_wr32(pde, 1, pde_v[1]); 2465 gk20a_mem_wr32(g, &vm->pdb.mem, pde + 1, pde_v[1]);
2355 2466
2356 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", 2467 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x",
2357 i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); 2468 i, gmmu_pgsz_idx, pde_v[1], pde_v[0]);
@@ -2432,8 +2543,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
2432 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); 2543 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i);
2433 } 2544 }
2434 2545
2435 gk20a_mem_wr32(pte->mem.cpu_va + i*8, 0, pte_w[0]); 2546 gk20a_mem_wr32(g, &pte->mem, pte_from_index(i) + 0, pte_w[0]);
2436 gk20a_mem_wr32(pte->mem.cpu_va + i*8, 1, pte_w[1]); 2547 gk20a_mem_wr32(g, &pte->mem, pte_from_index(i) + 1, pte_w[1]);
2437 2548
2438 if (*iova) { 2549 if (*iova) {
2439 *iova += page_size; 2550 *iova += page_size;
@@ -3489,19 +3600,19 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm)
3489 false, false, "cde"); 3600 false, false, "cde");
3490} 3601}
3491 3602
3492void gk20a_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr) 3603void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, u64 pdb_addr)
3493{ 3604{
3494 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); 3605 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
3495 u32 pdb_addr_hi = u64_hi32(pdb_addr); 3606 u32 pdb_addr_hi = u64_hi32(pdb_addr);
3496 3607
3497 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), 3608 gk20a_mem_wr32(g, mem, ram_in_page_dir_base_lo_w(),
3498 (g->mm.vidmem_is_vidmem ? 3609 (g->mm.vidmem_is_vidmem ?
3499 ram_in_page_dir_base_target_sys_mem_ncoh_f() : 3610 ram_in_page_dir_base_target_sys_mem_ncoh_f() :
3500 ram_in_page_dir_base_target_vid_mem_f()) | 3611 ram_in_page_dir_base_target_vid_mem_f()) |
3501 ram_in_page_dir_base_vol_true_f() | 3612 ram_in_page_dir_base_vol_true_f() |
3502 ram_in_page_dir_base_lo_f(pdb_addr_lo)); 3613 ram_in_page_dir_base_lo_f(pdb_addr_lo));
3503 3614
3504 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), 3615 gk20a_mem_wr32(g, mem, ram_in_page_dir_base_hi_w(),
3505 ram_in_page_dir_base_hi_f(pdb_addr_hi)); 3616 ram_in_page_dir_base_hi_f(pdb_addr_hi));
3506} 3617}
3507 3618
@@ -3510,23 +3621,22 @@ void gk20a_init_inst_block(struct mem_desc *inst_block, struct vm_gk20a *vm,
3510{ 3621{
3511 struct gk20a *g = gk20a_from_vm(vm); 3622 struct gk20a *g = gk20a_from_vm(vm);
3512 u64 pde_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0); 3623 u64 pde_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0);
3513 void *inst_ptr = inst_block->cpu_va;
3514 3624
3515 gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", 3625 gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p",
3516 gk20a_mm_inst_block_addr(g, inst_block), inst_ptr); 3626 gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va);
3517 3627
3518 gk20a_dbg_info("pde pa=0x%llx", (u64)pde_addr); 3628 gk20a_dbg_info("pde pa=0x%llx", (u64)pde_addr);
3519 3629
3520 g->ops.mm.init_pdb(g, inst_ptr, pde_addr); 3630 g->ops.mm.init_pdb(g, inst_block, pde_addr);
3521 3631
3522 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(), 3632 gk20a_mem_wr32(g, inst_block, ram_in_adr_limit_lo_w(),
3523 u64_lo32(vm->va_limit - 1) & ~0xfff); 3633 u64_lo32(vm->va_limit - 1) & ~0xfff);
3524 3634
3525 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), 3635 gk20a_mem_wr32(g, inst_block, ram_in_adr_limit_hi_w(),
3526 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit - 1))); 3636 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit - 1)));
3527 3637
3528 if (big_page_size && g->ops.mm.set_big_page_size) 3638 if (big_page_size && g->ops.mm.set_big_page_size)
3529 g->ops.mm.set_big_page_size(g, inst_ptr, big_page_size); 3639 g->ops.mm.set_big_page_size(g, inst_block, big_page_size);
3530} 3640}
3531 3641
3532int gk20a_mm_fb_flush(struct gk20a *g) 3642int gk20a_mm_fb_flush(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 7fa0b7fb..e9ac8f18 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -419,6 +419,34 @@ static inline enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm,
419 return gmmu_page_size_small; 419 return gmmu_page_size_small;
420} 420}
421 421
422/*
423 * Buffer accessors - wrap between begin() and end() if there is no permanent
424 * kernel mapping for this buffer.
425 */
426
427int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem);
428/* nop for null mem, like with free() or vunmap() */
429void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem);
430
431/* word-indexed offset */
432u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w);
433/* byte offset (32b-aligned) */
434u32 gk20a_mem_rd(struct gk20a *g, struct mem_desc *mem, u32 offset);
435/* memcpy to cpu, offset and size in bytes (32b-aligned) */
436void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem, u32 offset,
437 void *dest, u32 size);
438
439/* word-indexed offset */
440void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data);
441/* byte offset (32b-aligned) */
442void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data);
443/* memcpy from cpu, offset and size in bytes (32b-aligned) */
444void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset,
445 void *src, u32 size);
446/* size and offset in bytes (32b-aligned), filled with u32s */
447void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset,
448 u32 value, u32 size);
449
422#if 0 /*related to addr bits above, concern below TBD on which is accurate */ 450#if 0 /*related to addr bits above, concern below TBD on which is accurate */
423#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\ 451#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\
424 bus_bar1_block_ptr_s()) 452 bus_bar1_block_ptr_s())
@@ -673,7 +701,6 @@ void pde_range_from_vaddr_range(struct vm_gk20a *vm,
673 u64 addr_lo, u64 addr_hi, 701 u64 addr_lo, u64 addr_hi,
674 u32 *pde_lo, u32 *pde_hi); 702 u32 *pde_lo, u32 *pde_hi);
675int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm); 703int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm);
676u32 *pde_from_index(struct vm_gk20a *vm, u32 i);
677u32 pte_index_from_vaddr(struct vm_gk20a *vm, 704u32 pte_index_from_vaddr(struct vm_gk20a *vm,
678 u64 addr, enum gmmu_pgsz_gk20a pgsz_idx); 705 u64 addr, enum gmmu_pgsz_gk20a pgsz_idx);
679void free_gmmu_pages(struct vm_gk20a *vm, 706void free_gmmu_pages(struct vm_gk20a *vm,
@@ -685,7 +712,7 @@ struct gpu_ops;
685void gk20a_init_mm(struct gpu_ops *gops); 712void gk20a_init_mm(struct gpu_ops *gops);
686const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, 713const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
687 u32 big_page_size); 714 u32 big_page_size);
688void gk20a_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr); 715void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, u64 pdb_addr);
689 716
690void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block); 717void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block);
691 718
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index 56ad0c2a..54b2eef4 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -2421,11 +2421,10 @@ static int gk20a_init_pmu_reset_enable_hw(struct gk20a *g)
2421static int gk20a_prepare_ucode(struct gk20a *g) 2421static int gk20a_prepare_ucode(struct gk20a *g)
2422{ 2422{
2423 struct pmu_gk20a *pmu = &g->pmu; 2423 struct pmu_gk20a *pmu = &g->pmu;
2424 int i, err = 0; 2424 int err = 0;
2425 struct device *d = dev_from_gk20a(g); 2425 struct device *d = dev_from_gk20a(g);
2426 struct mm_gk20a *mm = &g->mm; 2426 struct mm_gk20a *mm = &g->mm;
2427 struct vm_gk20a *vm = &mm->pmu.vm; 2427 struct vm_gk20a *vm = &mm->pmu.vm;
2428 void *ucode_ptr;
2429 2428
2430 if (g->pmu_fw) { 2429 if (g->pmu_fw) {
2431 gk20a_init_pmu(pmu); 2430 gk20a_init_pmu(pmu);
@@ -2449,11 +2448,8 @@ static int gk20a_prepare_ucode(struct gk20a *g)
2449 if (err) 2448 if (err)
2450 goto err_release_fw; 2449 goto err_release_fw;
2451 2450
2452 ucode_ptr = pmu->ucode.cpu_va; 2451 gk20a_mem_wr_n(g, &pmu->ucode, 0, pmu->ucode_image,
2453 2452 pmu->desc->app_start_offset + pmu->desc->app_size);
2454 for (i = 0; i < (pmu->desc->app_start_offset +
2455 pmu->desc->app_size) >> 2; i++)
2456 gk20a_mem_wr32(ucode_ptr, i, pmu->ucode_image[i]);
2457 2453
2458 gk20a_init_pmu(pmu); 2454 gk20a_init_pmu(pmu);
2459 2455