diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2015-02-26 17:37:43 -0500 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-04-04 21:59:26 -0400 |
commit | 7290a6cbd5d03145d6f1ca4c3eacba40f6d4f93c (patch) | |
tree | de452c09f5eef76af273041dc64997fdc351dbd6 /drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |
parent | bb51cf9ec6482b50f3020179965ef82f58d91a0a (diff) |
gpu: nvgpu: Implement common allocator and mem_desc
Introduce mem_desc, which holds all information needed for a buffer.
Implement helper functions for allocation and freeing that use this
data type.
Change-Id: I82c88595d058d4fb8c5c5fbf19d13269e48e422f
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/712699
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fifo_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 125 |
1 files changed, 22 insertions, 103 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index cf1242ab..dee58d0a 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | |||
@@ -159,7 +159,6 @@ u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g) | |||
159 | static void gk20a_remove_fifo_support(struct fifo_gk20a *f) | 159 | static void gk20a_remove_fifo_support(struct fifo_gk20a *f) |
160 | { | 160 | { |
161 | struct gk20a *g = f->g; | 161 | struct gk20a *g = f->g; |
162 | struct device *d = dev_from_gk20a(g); | ||
163 | struct fifo_engine_info_gk20a *engine_info; | 162 | struct fifo_engine_info_gk20a *engine_info; |
164 | struct fifo_runlist_info_gk20a *runlist; | 163 | struct fifo_runlist_info_gk20a *runlist; |
165 | u32 runlist_id; | 164 | u32 runlist_id; |
@@ -175,36 +174,14 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f) | |||
175 | } | 174 | } |
176 | kfree(f->channel); | 175 | kfree(f->channel); |
177 | } | 176 | } |
178 | if (f->userd.gpu_va) | 177 | gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd); |
179 | gk20a_gmmu_unmap(&g->mm.bar1.vm, | ||
180 | f->userd.gpu_va, | ||
181 | f->userd.size, | ||
182 | gk20a_mem_flag_none); | ||
183 | |||
184 | if (f->userd.sgt) | ||
185 | gk20a_free_sgtable(&f->userd.sgt); | ||
186 | |||
187 | if (f->userd.cpuva) | ||
188 | dma_free_coherent(d, | ||
189 | f->userd_total_size, | ||
190 | f->userd.cpuva, | ||
191 | f->userd.iova); | ||
192 | f->userd.cpuva = NULL; | ||
193 | f->userd.iova = 0; | ||
194 | 178 | ||
195 | engine_info = f->engine_info + ENGINE_GR_GK20A; | 179 | engine_info = f->engine_info + ENGINE_GR_GK20A; |
196 | runlist_id = engine_info->runlist_id; | 180 | runlist_id = engine_info->runlist_id; |
197 | runlist = &f->runlist_info[runlist_id]; | 181 | runlist = &f->runlist_info[runlist_id]; |
198 | 182 | ||
199 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { | 183 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) |
200 | if (runlist->mem[i].cpuva) | 184 | gk20a_gmmu_free(g, &runlist->mem[i]); |
201 | dma_free_coherent(d, | ||
202 | runlist->mem[i].size, | ||
203 | runlist->mem[i].cpuva, | ||
204 | runlist->mem[i].iova); | ||
205 | runlist->mem[i].cpuva = NULL; | ||
206 | runlist->mem[i].iova = 0; | ||
207 | } | ||
208 | 185 | ||
209 | kfree(runlist->active_channels); | 186 | kfree(runlist->active_channels); |
210 | kfree(runlist->active_tsgs); | 187 | kfree(runlist->active_tsgs); |
@@ -327,19 +304,11 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) | |||
327 | 304 | ||
328 | runlist_size = ram_rl_entry_size_v() * f->num_channels; | 305 | runlist_size = ram_rl_entry_size_v() * f->num_channels; |
329 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { | 306 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { |
330 | dma_addr_t iova; | 307 | int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); |
331 | 308 | if (err) { | |
332 | runlist->mem[i].cpuva = | ||
333 | dma_alloc_coherent(d, | ||
334 | runlist_size, | ||
335 | &iova, | ||
336 | GFP_KERNEL); | ||
337 | if (!runlist->mem[i].cpuva) { | ||
338 | dev_err(d, "memory allocation failed\n"); | 309 | dev_err(d, "memory allocation failed\n"); |
339 | goto clean_up_runlist; | 310 | goto clean_up_runlist; |
340 | } | 311 | } |
341 | runlist->mem[i].iova = iova; | ||
342 | runlist->mem[i].size = runlist_size; | ||
343 | } | 312 | } |
344 | mutex_init(&runlist->mutex); | 313 | mutex_init(&runlist->mutex); |
345 | 314 | ||
@@ -351,15 +320,8 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) | |||
351 | return 0; | 320 | return 0; |
352 | 321 | ||
353 | clean_up_runlist: | 322 | clean_up_runlist: |
354 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { | 323 | for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) |
355 | if (runlist->mem[i].cpuva) | 324 | gk20a_gmmu_free(g, &runlist->mem[i]); |
356 | dma_free_coherent(d, | ||
357 | runlist->mem[i].size, | ||
358 | runlist->mem[i].cpuva, | ||
359 | runlist->mem[i].iova); | ||
360 | runlist->mem[i].cpuva = NULL; | ||
361 | runlist->mem[i].iova = 0; | ||
362 | } | ||
363 | 325 | ||
364 | kfree(runlist->active_channels); | 326 | kfree(runlist->active_channels); |
365 | runlist->active_channels = NULL; | 327 | runlist->active_channels = NULL; |
@@ -502,7 +464,6 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) | |||
502 | struct fifo_gk20a *f = &g->fifo; | 464 | struct fifo_gk20a *f = &g->fifo; |
503 | struct device *d = dev_from_gk20a(g); | 465 | struct device *d = dev_from_gk20a(g); |
504 | int chid, i, err = 0; | 466 | int chid, i, err = 0; |
505 | dma_addr_t iova; | ||
506 | 467 | ||
507 | gk20a_dbg_fn(""); | 468 | gk20a_dbg_fn(""); |
508 | 469 | ||
@@ -521,43 +482,17 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) | |||
521 | f->max_engines = ENGINE_INVAL_GK20A; | 482 | f->max_engines = ENGINE_INVAL_GK20A; |
522 | 483 | ||
523 | f->userd_entry_size = 1 << ram_userd_base_shift_v(); | 484 | f->userd_entry_size = 1 << ram_userd_base_shift_v(); |
524 | f->userd_total_size = f->userd_entry_size * f->num_channels; | ||
525 | 485 | ||
526 | f->userd.cpuva = dma_alloc_coherent(d, | 486 | err = gk20a_gmmu_alloc_map(&g->mm.bar1.vm, |
527 | f->userd_total_size, | 487 | f->userd_entry_size * f->num_channels, |
528 | &iova, | 488 | &f->userd); |
529 | GFP_KERNEL); | ||
530 | if (!f->userd.cpuva) { | ||
531 | dev_err(d, "memory allocation failed\n"); | ||
532 | err = -ENOMEM; | ||
533 | goto clean_up; | ||
534 | } | ||
535 | |||
536 | f->userd.iova = iova; | ||
537 | err = gk20a_get_sgtable(d, &f->userd.sgt, | ||
538 | f->userd.cpuva, f->userd.iova, | ||
539 | f->userd_total_size); | ||
540 | if (err) { | 489 | if (err) { |
541 | dev_err(d, "failed to create sg table\n"); | 490 | dev_err(d, "memory allocation failed\n"); |
542 | goto clean_up; | ||
543 | } | ||
544 | |||
545 | /* bar1 va */ | ||
546 | f->userd.gpu_va = gk20a_gmmu_map(&g->mm.bar1.vm, | ||
547 | &f->userd.sgt, | ||
548 | f->userd_total_size, | ||
549 | 0, /* flags */ | ||
550 | gk20a_mem_flag_none); | ||
551 | if (!f->userd.gpu_va) { | ||
552 | dev_err(d, "gmmu mapping failed\n"); | ||
553 | err = -ENOMEM; | ||
554 | goto clean_up; | 491 | goto clean_up; |
555 | } | 492 | } |
556 | 493 | ||
557 | gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va); | 494 | gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va); |
558 | 495 | ||
559 | f->userd.size = f->userd_total_size; | ||
560 | |||
561 | f->channel = kzalloc(f->num_channels * sizeof(*f->channel), | 496 | f->channel = kzalloc(f->num_channels * sizeof(*f->channel), |
562 | GFP_KERNEL); | 497 | GFP_KERNEL); |
563 | f->tsg = kzalloc(f->num_channels * sizeof(*f->tsg), | 498 | f->tsg = kzalloc(f->num_channels * sizeof(*f->tsg), |
@@ -582,9 +517,9 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) | |||
582 | 517 | ||
583 | for (chid = 0; chid < f->num_channels; chid++) { | 518 | for (chid = 0; chid < f->num_channels; chid++) { |
584 | f->channel[chid].userd_cpu_va = | 519 | f->channel[chid].userd_cpu_va = |
585 | f->userd.cpuva + chid * f->userd_entry_size; | 520 | f->userd.cpu_va + chid * f->userd_entry_size; |
586 | f->channel[chid].userd_iova = | 521 | f->channel[chid].userd_iova = |
587 | gk20a_mm_smmu_vaddr_translate(g, f->userd.iova) | 522 | gk20a_mm_iova_addr(g, f->userd.sgt->sgl) |
588 | + chid * f->userd_entry_size; | 523 | + chid * f->userd_entry_size; |
589 | f->channel[chid].userd_gpu_va = | 524 | f->channel[chid].userd_gpu_va = |
590 | f->userd.gpu_va + chid * f->userd_entry_size; | 525 | f->userd.gpu_va + chid * f->userd_entry_size; |
@@ -607,22 +542,7 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) | |||
607 | 542 | ||
608 | clean_up: | 543 | clean_up: |
609 | gk20a_dbg_fn("fail"); | 544 | gk20a_dbg_fn("fail"); |
610 | if (f->userd.gpu_va) | 545 | gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd); |
611 | gk20a_gmmu_unmap(&g->mm.bar1.vm, | ||
612 | f->userd.gpu_va, | ||
613 | f->userd.size, | ||
614 | gk20a_mem_flag_none); | ||
615 | if (f->userd.sgt) | ||
616 | gk20a_free_sgtable(&f->userd.sgt); | ||
617 | if (f->userd.cpuva) | ||
618 | dma_free_coherent(d, | ||
619 | f->userd_total_size, | ||
620 | f->userd.cpuva, | ||
621 | f->userd.iova); | ||
622 | f->userd.cpuva = NULL; | ||
623 | f->userd.iova = 0; | ||
624 | |||
625 | memset(&f->userd, 0, sizeof(struct userd_desc)); | ||
626 | 546 | ||
627 | kfree(f->channel); | 547 | kfree(f->channel); |
628 | f->channel = NULL; | 548 | f->channel = NULL; |
@@ -657,7 +577,7 @@ static int gk20a_init_fifo_setup_hw(struct gk20a *g) | |||
657 | u32 v, v1 = 0x33, v2 = 0x55; | 577 | u32 v, v1 = 0x33, v2 = 0x55; |
658 | 578 | ||
659 | u32 bar1_vaddr = f->userd.gpu_va; | 579 | u32 bar1_vaddr = f->userd.gpu_va; |
660 | volatile u32 *cpu_vaddr = f->userd.cpuva; | 580 | volatile u32 *cpu_vaddr = f->userd.cpu_va; |
661 | 581 | ||
662 | gk20a_dbg_info("test bar1 @ vaddr 0x%x", | 582 | gk20a_dbg_info("test bar1 @ vaddr 0x%x", |
663 | bar1_vaddr); | 583 | bar1_vaddr); |
@@ -725,8 +645,8 @@ channel_from_inst_ptr(struct fifo_gk20a *f, u64 inst_ptr) | |||
725 | return NULL; | 645 | return NULL; |
726 | for (ci = 0; ci < f->num_channels; ci++) { | 646 | for (ci = 0; ci < f->num_channels; ci++) { |
727 | struct channel_gk20a *c = f->channel+ci; | 647 | struct channel_gk20a *c = f->channel+ci; |
728 | if (c->inst_block.cpuva && | 648 | if (c->inst_block.cpu_va && |
729 | (inst_ptr == c->inst_block.cpu_pa)) | 649 | (inst_ptr == sg_phys(c->inst_block.sgt->sgl))) |
730 | return f->channel+ci; | 650 | return f->channel+ci; |
731 | } | 651 | } |
732 | return NULL; | 652 | return NULL; |
@@ -1082,10 +1002,10 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) | |||
1082 | gk20a_fifo_set_ctx_mmu_error_ch(g, ch); | 1002 | gk20a_fifo_set_ctx_mmu_error_ch(g, ch); |
1083 | gk20a_channel_abort(ch); | 1003 | gk20a_channel_abort(ch); |
1084 | } else if (f.inst_ptr == | 1004 | } else if (f.inst_ptr == |
1085 | g->mm.bar1.inst_block.cpu_pa) { | 1005 | sg_phys(g->mm.bar1.inst_block.sgt->sgl)) { |
1086 | gk20a_err(dev_from_gk20a(g), "mmu fault from bar1"); | 1006 | gk20a_err(dev_from_gk20a(g), "mmu fault from bar1"); |
1087 | } else if (f.inst_ptr == | 1007 | } else if (f.inst_ptr == |
1088 | g->mm.pmu.inst_block.cpu_pa) { | 1008 | sg_phys(g->mm.pmu.inst_block.sgt->sgl)) { |
1089 | gk20a_err(dev_from_gk20a(g), "mmu fault from pmu"); | 1009 | gk20a_err(dev_from_gk20a(g), "mmu fault from pmu"); |
1090 | } else | 1010 | } else |
1091 | gk20a_err(dev_from_gk20a(g), "couldn't locate channel for mmu fault"); | 1011 | gk20a_err(dev_from_gk20a(g), "couldn't locate channel for mmu fault"); |
@@ -1893,7 +1813,6 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
1893 | bool wait_for_finish) | 1813 | bool wait_for_finish) |
1894 | { | 1814 | { |
1895 | u32 ret = 0; | 1815 | u32 ret = 0; |
1896 | struct device *d = dev_from_gk20a(g); | ||
1897 | struct fifo_gk20a *f = &g->fifo; | 1816 | struct fifo_gk20a *f = &g->fifo; |
1898 | struct fifo_runlist_info_gk20a *runlist = NULL; | 1817 | struct fifo_runlist_info_gk20a *runlist = NULL; |
1899 | u32 *runlist_entry_base = NULL; | 1818 | u32 *runlist_entry_base = NULL; |
@@ -1935,15 +1854,15 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, | |||
1935 | new_buf = !runlist->cur_buffer; | 1854 | new_buf = !runlist->cur_buffer; |
1936 | 1855 | ||
1937 | gk20a_dbg_info("runlist_id : %d, switch to new buffer 0x%16llx", | 1856 | gk20a_dbg_info("runlist_id : %d, switch to new buffer 0x%16llx", |
1938 | runlist_id, runlist->mem[new_buf].iova); | 1857 | runlist_id, (u64)gk20a_mem_phys(&runlist->mem[new_buf])); |
1939 | 1858 | ||
1940 | runlist_pa = gk20a_get_phys_from_iova(d, runlist->mem[new_buf].iova); | 1859 | runlist_pa = gk20a_mem_phys(&runlist->mem[new_buf]); |
1941 | if (!runlist_pa) { | 1860 | if (!runlist_pa) { |
1942 | ret = -EINVAL; | 1861 | ret = -EINVAL; |
1943 | goto clean_up; | 1862 | goto clean_up; |
1944 | } | 1863 | } |
1945 | 1864 | ||
1946 | runlist_entry_base = runlist->mem[new_buf].cpuva; | 1865 | runlist_entry_base = runlist->mem[new_buf].cpu_va; |
1947 | if (!runlist_entry_base) { | 1866 | if (!runlist_entry_base) { |
1948 | ret = -ENOMEM; | 1867 | ret = -ENOMEM; |
1949 | goto clean_up; | 1868 | goto clean_up; |