summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2016-05-12 02:32:05 -0400
committerKen Adams <kadams@nvidia.com>2016-05-13 10:11:33 -0400
commit6eebc87d99f9f04b2b68e0bc0142c161ab3e669d (patch)
tree08e437890869d76072f291ea66f709f05ea07c8a /drivers/gpu/nvgpu/gk20a/channel_gk20a.c
parent14ef0dacc94077bc3dae4c942ff8c279cc4c92ba (diff)
gpu: nvgpu: refactor gk20a_mem_{wr,rd} for vidmem
To support vidmem, pass g and mem_desc to the buffer memory accessor functions. This allows the functions to select the memory access method based on the buffer aperture instead of using the cpu pointer directly (like until now). The selection and aperture support will be in another patch; this patch only refactors these accessors, but keeps the underlying functionality as-is. gk20a_mem_{rd,wr}32() work as previously; add also gk20a_mem_{rd,wr}() for byte-indexed accesses, gk20a_mem_{rd,wr}_n() for memcpy()-like functionality, and gk20a_memset() for filling buffers with a constant. The 8 and 16 bit accessor functions are removed. vmap()/vunmap() pairs are abstracted to gk20a_mem_{begin,end}() to support other types of mappings or conditions where mapping the buffer is unnecessary or different. Several function arguments that would access these buffers are also changed to take a mem_desc instead of a plain cpu pointer. Some relevant occasions are changed to use the accessor functions instead of cpu pointers without them (e.g., memcpying to and from), but the majority of direct accesses will be adjusted later, when the buffers are moved to support vidmem. JIRA DNVGPU-23 Change-Id: I3dd22e14290c4ab742d42e2dd327ebeb5cd3f25a Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1121143 Reviewed-by: Ken Adams <kadams@nvidia.com> Tested-by: Ken Adams <kadams@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c73
1 files changed, 32 insertions, 41 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 990972e4..065e8ab1 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -129,28 +129,25 @@ static int channel_gk20a_commit_userd(struct channel_gk20a *c)
129{ 129{
130 u32 addr_lo; 130 u32 addr_lo;
131 u32 addr_hi; 131 u32 addr_hi;
132 void *inst_ptr;
133 struct gk20a *g = c->g; 132 struct gk20a *g = c->g;
134 133
135 gk20a_dbg_fn(""); 134 gk20a_dbg_fn("");
136 135
137 inst_ptr = c->inst_block.cpu_va;
138 if (!inst_ptr)
139 return -ENOMEM;
140
141 addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v()); 136 addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
142 addr_hi = u64_hi32(c->userd_iova); 137 addr_hi = u64_hi32(c->userd_iova);
143 138
144 gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx", 139 gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
145 c->hw_chid, (u64)c->userd_iova); 140 c->hw_chid, (u64)c->userd_iova);
146 141
147 gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(), 142 gk20a_mem_wr32(g, &c->inst_block,
143 ram_in_ramfc_w() + ram_fc_userd_w(),
148 (g->mm.vidmem_is_vidmem ? 144 (g->mm.vidmem_is_vidmem ?
149 pbdma_userd_target_sys_mem_ncoh_f() : 145 pbdma_userd_target_sys_mem_ncoh_f() :
150 pbdma_userd_target_vid_mem_f()) | 146 pbdma_userd_target_vid_mem_f()) |
151 pbdma_userd_addr_f(addr_lo)); 147 pbdma_userd_addr_f(addr_lo));
152 148
153 gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(), 149 gk20a_mem_wr32(g, &c->inst_block,
150 ram_in_ramfc_w() + ram_fc_userd_hi_w(),
154 pbdma_userd_hi_addr_f(addr_hi)); 151 pbdma_userd_hi_addr_f(addr_hi));
155 152
156 return 0; 153 return 0;
@@ -186,13 +183,8 @@ int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
186 183
187static int channel_gk20a_set_schedule_params(struct channel_gk20a *c) 184static int channel_gk20a_set_schedule_params(struct channel_gk20a *c)
188{ 185{
189 void *inst_ptr;
190 int shift = 0, value = 0; 186 int shift = 0, value = 0;
191 187
192 inst_ptr = c->inst_block.cpu_va;
193 if (!inst_ptr)
194 return -ENOMEM;
195
196 gk20a_channel_get_timescale_from_timeslice(c->g, 188 gk20a_channel_get_timescale_from_timeslice(c->g,
197 c->timeslice_us, &value, &shift); 189 c->timeslice_us, &value, &shift);
198 190
@@ -203,7 +195,7 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c)
203 WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid)); 195 WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid));
204 196
205 /* set new timeslice */ 197 /* set new timeslice */
206 gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(), 198 gk20a_mem_wr32(c->g, &c->inst_block, ram_fc_runlist_timeslice_w(),
207 value | (shift << 12) | 199 value | (shift << 12) |
208 fifo_runlist_timeslice_enable_true_f()); 200 fifo_runlist_timeslice_enable_true_f());
209 201
@@ -255,33 +247,30 @@ u32 channel_gk20a_pbdma_acquire_val(struct channel_gk20a *c)
255int channel_gk20a_setup_ramfc(struct channel_gk20a *c, 247int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
256 u64 gpfifo_base, u32 gpfifo_entries, u32 flags) 248 u64 gpfifo_base, u32 gpfifo_entries, u32 flags)
257{ 249{
258 void *inst_ptr; 250 struct gk20a *g = c->g;
251 struct mem_desc *mem = &c->inst_block;
259 252
260 gk20a_dbg_fn(""); 253 gk20a_dbg_fn("");
261 254
262 inst_ptr = c->inst_block.cpu_va; 255 gk20a_memset(g, mem, 0, 0, ram_fc_size_val_v());
263 if (!inst_ptr)
264 return -ENOMEM;
265
266 memset(inst_ptr, 0, ram_fc_size_val_v());
267 256
268 gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(), 257 gk20a_mem_wr32(g, mem, ram_fc_gp_base_w(),
269 pbdma_gp_base_offset_f( 258 pbdma_gp_base_offset_f(
270 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s()))); 259 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
271 260
272 gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(), 261 gk20a_mem_wr32(g, mem, ram_fc_gp_base_hi_w(),
273 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) | 262 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
274 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries))); 263 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
275 264
276 gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(), 265 gk20a_mem_wr32(g, mem, ram_fc_signature_w(),
277 c->g->ops.fifo.get_pbdma_signature(c->g)); 266 c->g->ops.fifo.get_pbdma_signature(c->g));
278 267
279 gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(), 268 gk20a_mem_wr32(g, mem, ram_fc_formats_w(),
280 pbdma_formats_gp_fermi0_f() | 269 pbdma_formats_gp_fermi0_f() |
281 pbdma_formats_pb_fermi1_f() | 270 pbdma_formats_pb_fermi1_f() |
282 pbdma_formats_mp_fermi0_f()); 271 pbdma_formats_mp_fermi0_f());
283 272
284 gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(), 273 gk20a_mem_wr32(g, mem, ram_fc_pb_header_w(),
285 pbdma_pb_header_priv_user_f() | 274 pbdma_pb_header_priv_user_f() |
286 pbdma_pb_header_method_zero_f() | 275 pbdma_pb_header_method_zero_f() |
287 pbdma_pb_header_subchannel_zero_f() | 276 pbdma_pb_header_subchannel_zero_f() |
@@ -289,47 +278,49 @@ int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
289 pbdma_pb_header_first_true_f() | 278 pbdma_pb_header_first_true_f() |
290 pbdma_pb_header_type_inc_f()); 279 pbdma_pb_header_type_inc_f());
291 280
292 gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(), 281 gk20a_mem_wr32(g, mem, ram_fc_subdevice_w(),
293 pbdma_subdevice_id_f(1) | 282 pbdma_subdevice_id_f(1) |
294 pbdma_subdevice_status_active_f() | 283 pbdma_subdevice_status_active_f() |
295 pbdma_subdevice_channel_dma_enable_f()); 284 pbdma_subdevice_channel_dma_enable_f());
296 285
297 gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f()); 286 gk20a_mem_wr32(g, mem, ram_fc_target_w(), pbdma_target_engine_sw_f());
298 287
299 gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(), 288 gk20a_mem_wr32(g, mem, ram_fc_acquire_w(),
300 channel_gk20a_pbdma_acquire_val(c)); 289 channel_gk20a_pbdma_acquire_val(c));
301 290
302 gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(), 291 gk20a_mem_wr32(g, mem, ram_fc_runlist_timeslice_w(),
303 fifo_runlist_timeslice_timeout_128_f() | 292 fifo_runlist_timeslice_timeout_128_f() |
304 fifo_runlist_timeslice_timescale_3_f() | 293 fifo_runlist_timeslice_timescale_3_f() |
305 fifo_runlist_timeslice_enable_true_f()); 294 fifo_runlist_timeslice_enable_true_f());
306 295
307 gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(), 296 gk20a_mem_wr32(g, mem, ram_fc_pb_timeslice_w(),
308 fifo_pb_timeslice_timeout_16_f() | 297 fifo_pb_timeslice_timeout_16_f() |
309 fifo_pb_timeslice_timescale_0_f() | 298 fifo_pb_timeslice_timescale_0_f() |
310 fifo_pb_timeslice_enable_true_f()); 299 fifo_pb_timeslice_enable_true_f());
311 300
312 gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); 301 gk20a_mem_wr32(g, mem, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
313 302
314 return channel_gk20a_commit_userd(c); 303 return channel_gk20a_commit_userd(c);
315} 304}
316 305
317static int channel_gk20a_setup_userd(struct channel_gk20a *c) 306static int channel_gk20a_setup_userd(struct channel_gk20a *c)
318{ 307{
319 BUG_ON(!c->userd_cpu_va); 308 struct gk20a *g = c->g;
309 struct mem_desc *mem = &g->fifo.userd;
310 u32 offset = c->hw_chid * g->fifo.userd_entry_size / sizeof(u32);
320 311
321 gk20a_dbg_fn(""); 312 gk20a_dbg_fn("");
322 313
323 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0); 314 gk20a_mem_wr32(g, mem, offset + ram_userd_put_w(), 0);
324 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0); 315 gk20a_mem_wr32(g, mem, offset + ram_userd_get_w(), 0);
325 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0); 316 gk20a_mem_wr32(g, mem, offset + ram_userd_ref_w(), 0);
326 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0); 317 gk20a_mem_wr32(g, mem, offset + ram_userd_put_hi_w(), 0);
327 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0); 318 gk20a_mem_wr32(g, mem, offset + ram_userd_ref_threshold_w(), 0);
328 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0); 319 gk20a_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_w(), 0);
329 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0); 320 gk20a_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_hi_w(), 0);
330 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0); 321 gk20a_mem_wr32(g, mem, offset + ram_userd_get_hi_w(), 0);
331 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0); 322 gk20a_mem_wr32(g, mem, offset + ram_userd_gp_get_w(), 0);
332 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0); 323 gk20a_mem_wr32(g, mem, offset + ram_userd_gp_put_w(), 0);
333 324
334 return 0; 325 return 0;
335} 326}