diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2015-02-26 17:37:43 -0500 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-04-04 21:59:26 -0400 |
commit | 7290a6cbd5d03145d6f1ca4c3eacba40f6d4f93c (patch) | |
tree | de452c09f5eef76af273041dc64997fdc351dbd6 /drivers/gpu/nvgpu/vgpu | |
parent | bb51cf9ec6482b50f3020179965ef82f58d91a0a (diff) |
gpu: nvgpu: Implement common allocator and mem_desc
Introduce mem_desc, which holds all information needed for a buffer.
Implement helper functions for allocation and freeing that use this
data type.
Change-Id: I82c88595d058d4fb8c5c5fbf19d13269e48e422f
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/712699
Diffstat (limited to 'drivers/gpu/nvgpu/vgpu')
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | 70 |
1 files changed, 16 insertions, 54 deletions
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c index 45d956a2..25e6e4c7 100644 --- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c | |||
@@ -196,19 +196,11 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) | |||
196 | 196 | ||
197 | runlist_size = sizeof(u16) * f->num_channels; | 197 | runlist_size = sizeof(u16) * f->num_channels; |
198 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { | 198 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { |
199 | dma_addr_t iova; | 199 | int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); |
200 | 200 | if (err) { | |
201 | runlist->mem[i].cpuva = | ||
202 | dma_alloc_coherent(d, | ||
203 | runlist_size, | ||
204 | &iova, | ||
205 | GFP_KERNEL); | ||
206 | if (!runlist->mem[i].cpuva) { | ||
207 | dev_err(d, "memory allocation failed\n"); | 201 | dev_err(d, "memory allocation failed\n"); |
208 | goto clean_up_runlist; | 202 | goto clean_up_runlist; |
209 | } | 203 | } |
210 | runlist->mem[i].iova = iova; | ||
211 | runlist->mem[i].size = runlist_size; | ||
212 | } | 204 | } |
213 | mutex_init(&runlist->mutex); | 205 | mutex_init(&runlist->mutex); |
214 | 206 | ||
@@ -220,15 +212,8 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) | |||
220 | return 0; | 212 | return 0; |
221 | 213 | ||
222 | clean_up_runlist: | 214 | clean_up_runlist: |
223 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { | 215 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) |
224 | if (runlist->mem[i].cpuva) | 216 | gk20a_gmmu_free(g, &runlist->mem[i]); |
225 | dma_free_coherent(d, | ||
226 | runlist->mem[i].size, | ||
227 | runlist->mem[i].cpuva, | ||
228 | runlist->mem[i].iova); | ||
229 | runlist->mem[i].cpuva = NULL; | ||
230 | runlist->mem[i].iova = 0; | ||
231 | } | ||
232 | 217 | ||
233 | kfree(runlist->active_channels); | 218 | kfree(runlist->active_channels); |
234 | runlist->active_channels = NULL; | 219 | runlist->active_channels = NULL; |
@@ -248,7 +233,6 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g) | |||
248 | struct fifo_gk20a *f = &g->fifo; | 233 | struct fifo_gk20a *f = &g->fifo; |
249 | struct device *d = dev_from_gk20a(g); | 234 | struct device *d = dev_from_gk20a(g); |
250 | int chid, err = 0; | 235 | int chid, err = 0; |
251 | dma_addr_t iova; | ||
252 | 236 | ||
253 | gk20a_dbg_fn(""); | 237 | gk20a_dbg_fn(""); |
254 | 238 | ||
@@ -268,28 +252,16 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g) | |||
268 | f->max_engines = ENGINE_INVAL_GK20A; | 252 | f->max_engines = ENGINE_INVAL_GK20A; |
269 | 253 | ||
270 | f->userd_entry_size = 1 << ram_userd_base_shift_v(); | 254 | f->userd_entry_size = 1 << ram_userd_base_shift_v(); |
271 | f->userd_total_size = f->userd_entry_size * f->num_channels; | ||
272 | 255 | ||
273 | f->userd.cpuva = dma_alloc_coherent(d, | 256 | err = gk20a_gmmu_alloc(g, f->userd_entry_size * f->num_channels, |
274 | f->userd_total_size, | 257 | &f->userd); |
275 | &iova, | ||
276 | GFP_KERNEL); | ||
277 | if (!f->userd.cpuva) { | ||
278 | dev_err(d, "memory allocation failed\n"); | ||
279 | goto clean_up; | ||
280 | } | ||
281 | |||
282 | f->userd.iova = iova; | ||
283 | err = gk20a_get_sgtable(d, &f->userd.sgt, | ||
284 | f->userd.cpuva, f->userd.iova, | ||
285 | f->userd_total_size); | ||
286 | if (err) { | 258 | if (err) { |
287 | dev_err(d, "failed to create sg table\n"); | 259 | dev_err(d, "memory allocation failed\n"); |
288 | goto clean_up; | 260 | goto clean_up; |
289 | } | 261 | } |
290 | 262 | ||
291 | /* bar1 va */ | 263 | /* bar1 va */ |
292 | f->userd.gpu_va = vgpu_bar1_map(g, &f->userd.sgt, f->userd_total_size); | 264 | f->userd.gpu_va = vgpu_bar1_map(g, &f->userd.sgt, f->userd.size); |
293 | if (!f->userd.gpu_va) { | 265 | if (!f->userd.gpu_va) { |
294 | dev_err(d, "gmmu mapping failed\n"); | 266 | dev_err(d, "gmmu mapping failed\n"); |
295 | goto clean_up; | 267 | goto clean_up; |
@@ -297,8 +269,6 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g) | |||
297 | 269 | ||
298 | gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va); | 270 | gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va); |
299 | 271 | ||
300 | f->userd.size = f->userd_total_size; | ||
301 | |||
302 | f->channel = kzalloc(f->num_channels * sizeof(*f->channel), | 272 | f->channel = kzalloc(f->num_channels * sizeof(*f->channel), |
303 | GFP_KERNEL); | 273 | GFP_KERNEL); |
304 | f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info), | 274 | f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info), |
@@ -315,9 +285,9 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g) | |||
315 | 285 | ||
316 | for (chid = 0; chid < f->num_channels; chid++) { | 286 | for (chid = 0; chid < f->num_channels; chid++) { |
317 | f->channel[chid].userd_cpu_va = | 287 | f->channel[chid].userd_cpu_va = |
318 | f->userd.cpuva + chid * f->userd_entry_size; | 288 | f->userd.cpu_va + chid * f->userd_entry_size; |
319 | f->channel[chid].userd_iova = | 289 | f->channel[chid].userd_iova = |
320 | gk20a_mm_smmu_vaddr_translate(g, f->userd.iova) | 290 | gk20a_mm_iova_addr(g, f->userd.sgt->sgl) |
321 | + chid * f->userd_entry_size; | 291 | + chid * f->userd_entry_size; |
322 | f->channel[chid].userd_gpu_va = | 292 | f->channel[chid].userd_gpu_va = |
323 | f->userd.gpu_va + chid * f->userd_entry_size; | 293 | f->userd.gpu_va + chid * f->userd_entry_size; |
@@ -337,17 +307,9 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g) | |||
337 | clean_up: | 307 | clean_up: |
338 | gk20a_dbg_fn("fail"); | 308 | gk20a_dbg_fn("fail"); |
339 | /* FIXME: unmap from bar1 */ | 309 | /* FIXME: unmap from bar1 */ |
340 | if (f->userd.sgt) | 310 | gk20a_gmmu_free(g, &f->userd); |
341 | gk20a_free_sgtable(&f->userd.sgt); | 311 | |
342 | if (f->userd.cpuva) | 312 | memset(&f->userd, 0, sizeof(f->userd)); |
343 | dma_free_coherent(d, | ||
344 | f->userd_total_size, | ||
345 | f->userd.cpuva, | ||
346 | f->userd.iova); | ||
347 | f->userd.cpuva = NULL; | ||
348 | f->userd.iova = 0; | ||
349 | |||
350 | memset(&f->userd, 0, sizeof(struct userd_desc)); | ||
351 | 313 | ||
352 | kfree(f->channel); | 314 | kfree(f->channel); |
353 | f->channel = NULL; | 315 | f->channel = NULL; |
@@ -368,7 +330,7 @@ static int vgpu_init_fifo_setup_hw(struct gk20a *g) | |||
368 | u32 v, v1 = 0x33, v2 = 0x55; | 330 | u32 v, v1 = 0x33, v2 = 0x55; |
369 | 331 | ||
370 | u32 bar1_vaddr = f->userd.gpu_va; | 332 | u32 bar1_vaddr = f->userd.gpu_va; |
371 | volatile u32 *cpu_vaddr = f->userd.cpuva; | 333 | volatile u32 *cpu_vaddr = f->userd.cpu_va; |
372 | 334 | ||
373 | gk20a_dbg_info("test bar1 @ vaddr 0x%x", | 335 | gk20a_dbg_info("test bar1 @ vaddr 0x%x", |
374 | bar1_vaddr); | 336 | bar1_vaddr); |
@@ -505,7 +467,7 @@ static int vgpu_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
505 | add /* resume to add all channels back */) { | 467 | add /* resume to add all channels back */) { |
506 | u32 chid; | 468 | u32 chid; |
507 | 469 | ||
508 | runlist_entry = runlist->mem[0].cpuva; | 470 | runlist_entry = runlist->mem[0].cpu_va; |
509 | for_each_set_bit(chid, | 471 | for_each_set_bit(chid, |
510 | runlist->active_channels, f->num_channels) { | 472 | runlist->active_channels, f->num_channels) { |
511 | gk20a_dbg_info("add channel %d to runlist", chid); | 473 | gk20a_dbg_info("add channel %d to runlist", chid); |
@@ -517,7 +479,7 @@ static int vgpu_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
517 | count = 0; | 479 | count = 0; |
518 | 480 | ||
519 | return vgpu_submit_runlist(platform->virt_handle, runlist_id, | 481 | return vgpu_submit_runlist(platform->virt_handle, runlist_id, |
520 | runlist->mem[0].cpuva, count); | 482 | runlist->mem[0].cpu_va, count); |
521 | } | 483 | } |
522 | 484 | ||
523 | /* add/remove a channel from runlist | 485 | /* add/remove a channel from runlist |