diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2016-05-12 02:32:05 -0400 |
---|---|---|
committer | Ken Adams <kadams@nvidia.com> | 2016-05-13 10:11:33 -0400 |
commit | 6eebc87d99f9f04b2b68e0bc0142c161ab3e669d (patch) | |
tree | 08e437890869d76072f291ea66f709f05ea07c8a /drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |
parent | 14ef0dacc94077bc3dae4c942ff8c279cc4c92ba (diff) |
gpu: nvgpu: refactor gk20a_mem_{wr,rd} for vidmem
To support vidmem, pass g and mem_desc to the buffer memory accessor
functions. This allows the functions to select the memory access method
based on the buffer aperture instead of using the cpu pointer directly
(like until now). The selection and aperture support will be in another
patch; this patch only refactors these accessors, but keeps the
underlying functionality as-is.
gk20a_mem_{rd,wr}32() work as previously; add also gk20a_mem_{rd,wr}()
for byte-indexed accesses, gk20a_mem_{rd,wr}_n() for memcpy()-like
functionality, and gk20a_memset() for filling buffers with a constant.
The 8 and 16 bit accessor functions are removed.
vmap()/vunmap() pairs are abstracted to gk20a_mem_{begin,end}() to
support other types of mappings or conditions where mapping the buffer
is unnecessary or different.
Several function arguments that would access these buffers are also
changed to take a mem_desc instead of a plain cpu pointer. Some relevant
occasions are changed to use the accessor functions instead of cpu
pointers without them (e.g., memcpying to and from), but the majority of
direct accesses will be adjusted later, when the buffers are moved to
support vidmem.
JIRA DNVGPU-23
Change-Id: I3dd22e14290c4ab742d42e2dd327ebeb5cd3f25a
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1121143
Reviewed-by: Ken Adams <kadams@nvidia.com>
Tested-by: Ken Adams <kadams@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 73 |
1 files changed, 32 insertions, 41 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 990972e4..065e8ab1 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -129,28 +129,25 @@ static int channel_gk20a_commit_userd(struct channel_gk20a *c) | |||
129 | { | 129 | { |
130 | u32 addr_lo; | 130 | u32 addr_lo; |
131 | u32 addr_hi; | 131 | u32 addr_hi; |
132 | void *inst_ptr; | ||
133 | struct gk20a *g = c->g; | 132 | struct gk20a *g = c->g; |
134 | 133 | ||
135 | gk20a_dbg_fn(""); | 134 | gk20a_dbg_fn(""); |
136 | 135 | ||
137 | inst_ptr = c->inst_block.cpu_va; | ||
138 | if (!inst_ptr) | ||
139 | return -ENOMEM; | ||
140 | |||
141 | addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v()); | 136 | addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v()); |
142 | addr_hi = u64_hi32(c->userd_iova); | 137 | addr_hi = u64_hi32(c->userd_iova); |
143 | 138 | ||
144 | gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx", | 139 | gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx", |
145 | c->hw_chid, (u64)c->userd_iova); | 140 | c->hw_chid, (u64)c->userd_iova); |
146 | 141 | ||
147 | gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(), | 142 | gk20a_mem_wr32(g, &c->inst_block, |
143 | ram_in_ramfc_w() + ram_fc_userd_w(), | ||
148 | (g->mm.vidmem_is_vidmem ? | 144 | (g->mm.vidmem_is_vidmem ? |
149 | pbdma_userd_target_sys_mem_ncoh_f() : | 145 | pbdma_userd_target_sys_mem_ncoh_f() : |
150 | pbdma_userd_target_vid_mem_f()) | | 146 | pbdma_userd_target_vid_mem_f()) | |
151 | pbdma_userd_addr_f(addr_lo)); | 147 | pbdma_userd_addr_f(addr_lo)); |
152 | 148 | ||
153 | gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(), | 149 | gk20a_mem_wr32(g, &c->inst_block, |
150 | ram_in_ramfc_w() + ram_fc_userd_hi_w(), | ||
154 | pbdma_userd_hi_addr_f(addr_hi)); | 151 | pbdma_userd_hi_addr_f(addr_hi)); |
155 | 152 | ||
156 | return 0; | 153 | return 0; |
@@ -186,13 +183,8 @@ int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, | |||
186 | 183 | ||
187 | static int channel_gk20a_set_schedule_params(struct channel_gk20a *c) | 184 | static int channel_gk20a_set_schedule_params(struct channel_gk20a *c) |
188 | { | 185 | { |
189 | void *inst_ptr; | ||
190 | int shift = 0, value = 0; | 186 | int shift = 0, value = 0; |
191 | 187 | ||
192 | inst_ptr = c->inst_block.cpu_va; | ||
193 | if (!inst_ptr) | ||
194 | return -ENOMEM; | ||
195 | |||
196 | gk20a_channel_get_timescale_from_timeslice(c->g, | 188 | gk20a_channel_get_timescale_from_timeslice(c->g, |
197 | c->timeslice_us, &value, &shift); | 189 | c->timeslice_us, &value, &shift); |
198 | 190 | ||
@@ -203,7 +195,7 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c) | |||
203 | WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid)); | 195 | WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid)); |
204 | 196 | ||
205 | /* set new timeslice */ | 197 | /* set new timeslice */ |
206 | gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(), | 198 | gk20a_mem_wr32(c->g, &c->inst_block, ram_fc_runlist_timeslice_w(), |
207 | value | (shift << 12) | | 199 | value | (shift << 12) | |
208 | fifo_runlist_timeslice_enable_true_f()); | 200 | fifo_runlist_timeslice_enable_true_f()); |
209 | 201 | ||
@@ -255,33 +247,30 @@ u32 channel_gk20a_pbdma_acquire_val(struct channel_gk20a *c) | |||
255 | int channel_gk20a_setup_ramfc(struct channel_gk20a *c, | 247 | int channel_gk20a_setup_ramfc(struct channel_gk20a *c, |
256 | u64 gpfifo_base, u32 gpfifo_entries, u32 flags) | 248 | u64 gpfifo_base, u32 gpfifo_entries, u32 flags) |
257 | { | 249 | { |
258 | void *inst_ptr; | 250 | struct gk20a *g = c->g; |
251 | struct mem_desc *mem = &c->inst_block; | ||
259 | 252 | ||
260 | gk20a_dbg_fn(""); | 253 | gk20a_dbg_fn(""); |
261 | 254 | ||
262 | inst_ptr = c->inst_block.cpu_va; | 255 | gk20a_memset(g, mem, 0, 0, ram_fc_size_val_v()); |
263 | if (!inst_ptr) | ||
264 | return -ENOMEM; | ||
265 | |||
266 | memset(inst_ptr, 0, ram_fc_size_val_v()); | ||
267 | 256 | ||
268 | gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(), | 257 | gk20a_mem_wr32(g, mem, ram_fc_gp_base_w(), |
269 | pbdma_gp_base_offset_f( | 258 | pbdma_gp_base_offset_f( |
270 | u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s()))); | 259 | u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s()))); |
271 | 260 | ||
272 | gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(), | 261 | gk20a_mem_wr32(g, mem, ram_fc_gp_base_hi_w(), |
273 | pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) | | 262 | pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) | |
274 | pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries))); | 263 | pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries))); |
275 | 264 | ||
276 | gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(), | 265 | gk20a_mem_wr32(g, mem, ram_fc_signature_w(), |
277 | c->g->ops.fifo.get_pbdma_signature(c->g)); | 266 | c->g->ops.fifo.get_pbdma_signature(c->g)); |
278 | 267 | ||
279 | gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(), | 268 | gk20a_mem_wr32(g, mem, ram_fc_formats_w(), |
280 | pbdma_formats_gp_fermi0_f() | | 269 | pbdma_formats_gp_fermi0_f() | |
281 | pbdma_formats_pb_fermi1_f() | | 270 | pbdma_formats_pb_fermi1_f() | |
282 | pbdma_formats_mp_fermi0_f()); | 271 | pbdma_formats_mp_fermi0_f()); |
283 | 272 | ||
284 | gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(), | 273 | gk20a_mem_wr32(g, mem, ram_fc_pb_header_w(), |
285 | pbdma_pb_header_priv_user_f() | | 274 | pbdma_pb_header_priv_user_f() | |
286 | pbdma_pb_header_method_zero_f() | | 275 | pbdma_pb_header_method_zero_f() | |
287 | pbdma_pb_header_subchannel_zero_f() | | 276 | pbdma_pb_header_subchannel_zero_f() | |
@@ -289,47 +278,49 @@ int channel_gk20a_setup_ramfc(struct channel_gk20a *c, | |||
289 | pbdma_pb_header_first_true_f() | | 278 | pbdma_pb_header_first_true_f() | |
290 | pbdma_pb_header_type_inc_f()); | 279 | pbdma_pb_header_type_inc_f()); |
291 | 280 | ||
292 | gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(), | 281 | gk20a_mem_wr32(g, mem, ram_fc_subdevice_w(), |
293 | pbdma_subdevice_id_f(1) | | 282 | pbdma_subdevice_id_f(1) | |
294 | pbdma_subdevice_status_active_f() | | 283 | pbdma_subdevice_status_active_f() | |
295 | pbdma_subdevice_channel_dma_enable_f()); | 284 | pbdma_subdevice_channel_dma_enable_f()); |
296 | 285 | ||
297 | gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f()); | 286 | gk20a_mem_wr32(g, mem, ram_fc_target_w(), pbdma_target_engine_sw_f()); |
298 | 287 | ||
299 | gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(), | 288 | gk20a_mem_wr32(g, mem, ram_fc_acquire_w(), |
300 | channel_gk20a_pbdma_acquire_val(c)); | 289 | channel_gk20a_pbdma_acquire_val(c)); |
301 | 290 | ||
302 | gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(), | 291 | gk20a_mem_wr32(g, mem, ram_fc_runlist_timeslice_w(), |
303 | fifo_runlist_timeslice_timeout_128_f() | | 292 | fifo_runlist_timeslice_timeout_128_f() | |
304 | fifo_runlist_timeslice_timescale_3_f() | | 293 | fifo_runlist_timeslice_timescale_3_f() | |
305 | fifo_runlist_timeslice_enable_true_f()); | 294 | fifo_runlist_timeslice_enable_true_f()); |
306 | 295 | ||
307 | gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(), | 296 | gk20a_mem_wr32(g, mem, ram_fc_pb_timeslice_w(), |
308 | fifo_pb_timeslice_timeout_16_f() | | 297 | fifo_pb_timeslice_timeout_16_f() | |
309 | fifo_pb_timeslice_timescale_0_f() | | 298 | fifo_pb_timeslice_timescale_0_f() | |
310 | fifo_pb_timeslice_enable_true_f()); | 299 | fifo_pb_timeslice_enable_true_f()); |
311 | 300 | ||
312 | gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); | 301 | gk20a_mem_wr32(g, mem, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); |
313 | 302 | ||
314 | return channel_gk20a_commit_userd(c); | 303 | return channel_gk20a_commit_userd(c); |
315 | } | 304 | } |
316 | 305 | ||
317 | static int channel_gk20a_setup_userd(struct channel_gk20a *c) | 306 | static int channel_gk20a_setup_userd(struct channel_gk20a *c) |
318 | { | 307 | { |
319 | BUG_ON(!c->userd_cpu_va); | 308 | struct gk20a *g = c->g; |
309 | struct mem_desc *mem = &g->fifo.userd; | ||
310 | u32 offset = c->hw_chid * g->fifo.userd_entry_size / sizeof(u32); | ||
320 | 311 | ||
321 | gk20a_dbg_fn(""); | 312 | gk20a_dbg_fn(""); |
322 | 313 | ||
323 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0); | 314 | gk20a_mem_wr32(g, mem, offset + ram_userd_put_w(), 0); |
324 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0); | 315 | gk20a_mem_wr32(g, mem, offset + ram_userd_get_w(), 0); |
325 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0); | 316 | gk20a_mem_wr32(g, mem, offset + ram_userd_ref_w(), 0); |
326 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0); | 317 | gk20a_mem_wr32(g, mem, offset + ram_userd_put_hi_w(), 0); |
327 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0); | 318 | gk20a_mem_wr32(g, mem, offset + ram_userd_ref_threshold_w(), 0); |
328 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0); | 319 | gk20a_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_w(), 0); |
329 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0); | 320 | gk20a_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_hi_w(), 0); |
330 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0); | 321 | gk20a_mem_wr32(g, mem, offset + ram_userd_get_hi_w(), 0); |
331 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0); | 322 | gk20a_mem_wr32(g, mem, offset + ram_userd_gp_get_w(), 0); |
332 | gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0); | 323 | gk20a_mem_wr32(g, mem, offset + ram_userd_gp_put_w(), 0); |
333 | 324 | ||
334 | return 0; | 325 | return 0; |
335 | } | 326 | } |