summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2015-02-26 17:37:43 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-04-04 21:59:26 -0400
commit7290a6cbd5d03145d6f1ca4c3eacba40f6d4f93c (patch)
treede452c09f5eef76af273041dc64997fdc351dbd6 /drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
parentbb51cf9ec6482b50f3020179965ef82f58d91a0a (diff)
gpu: nvgpu: Implement common allocator and mem_desc
Introduce mem_desc, which holds all information needed for a buffer. Implement helper functions for allocation and freeing that use this data type. Change-Id: I82c88595d058d4fb8c5c5fbf19d13269e48e422f Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/712699
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fifo_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c125
1 files changed, 22 insertions, 103 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index cf1242ab..dee58d0a 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -159,7 +159,6 @@ u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g)
159static void gk20a_remove_fifo_support(struct fifo_gk20a *f) 159static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
160{ 160{
161 struct gk20a *g = f->g; 161 struct gk20a *g = f->g;
162 struct device *d = dev_from_gk20a(g);
163 struct fifo_engine_info_gk20a *engine_info; 162 struct fifo_engine_info_gk20a *engine_info;
164 struct fifo_runlist_info_gk20a *runlist; 163 struct fifo_runlist_info_gk20a *runlist;
165 u32 runlist_id; 164 u32 runlist_id;
@@ -175,36 +174,14 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
175 } 174 }
176 kfree(f->channel); 175 kfree(f->channel);
177 } 176 }
178 if (f->userd.gpu_va) 177 gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd);
179 gk20a_gmmu_unmap(&g->mm.bar1.vm,
180 f->userd.gpu_va,
181 f->userd.size,
182 gk20a_mem_flag_none);
183
184 if (f->userd.sgt)
185 gk20a_free_sgtable(&f->userd.sgt);
186
187 if (f->userd.cpuva)
188 dma_free_coherent(d,
189 f->userd_total_size,
190 f->userd.cpuva,
191 f->userd.iova);
192 f->userd.cpuva = NULL;
193 f->userd.iova = 0;
194 178
195 engine_info = f->engine_info + ENGINE_GR_GK20A; 179 engine_info = f->engine_info + ENGINE_GR_GK20A;
196 runlist_id = engine_info->runlist_id; 180 runlist_id = engine_info->runlist_id;
197 runlist = &f->runlist_info[runlist_id]; 181 runlist = &f->runlist_info[runlist_id];
198 182
199 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { 183 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++)
200 if (runlist->mem[i].cpuva) 184 gk20a_gmmu_free(g, &runlist->mem[i]);
201 dma_free_coherent(d,
202 runlist->mem[i].size,
203 runlist->mem[i].cpuva,
204 runlist->mem[i].iova);
205 runlist->mem[i].cpuva = NULL;
206 runlist->mem[i].iova = 0;
207 }
208 185
209 kfree(runlist->active_channels); 186 kfree(runlist->active_channels);
210 kfree(runlist->active_tsgs); 187 kfree(runlist->active_tsgs);
@@ -327,19 +304,11 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
327 304
328 runlist_size = ram_rl_entry_size_v() * f->num_channels; 305 runlist_size = ram_rl_entry_size_v() * f->num_channels;
329 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { 306 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
330 dma_addr_t iova; 307 int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]);
331 308 if (err) {
332 runlist->mem[i].cpuva =
333 dma_alloc_coherent(d,
334 runlist_size,
335 &iova,
336 GFP_KERNEL);
337 if (!runlist->mem[i].cpuva) {
338 dev_err(d, "memory allocation failed\n"); 309 dev_err(d, "memory allocation failed\n");
339 goto clean_up_runlist; 310 goto clean_up_runlist;
340 } 311 }
341 runlist->mem[i].iova = iova;
342 runlist->mem[i].size = runlist_size;
343 } 312 }
344 mutex_init(&runlist->mutex); 313 mutex_init(&runlist->mutex);
345 314
@@ -351,15 +320,8 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
351 return 0; 320 return 0;
352 321
353clean_up_runlist: 322clean_up_runlist:
354 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { 323 for (i = 0; i < MAX_RUNLIST_BUFFERS; i++)
355 if (runlist->mem[i].cpuva) 324 gk20a_gmmu_free(g, &runlist->mem[i]);
356 dma_free_coherent(d,
357 runlist->mem[i].size,
358 runlist->mem[i].cpuva,
359 runlist->mem[i].iova);
360 runlist->mem[i].cpuva = NULL;
361 runlist->mem[i].iova = 0;
362 }
363 325
364 kfree(runlist->active_channels); 326 kfree(runlist->active_channels);
365 runlist->active_channels = NULL; 327 runlist->active_channels = NULL;
@@ -502,7 +464,6 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
502 struct fifo_gk20a *f = &g->fifo; 464 struct fifo_gk20a *f = &g->fifo;
503 struct device *d = dev_from_gk20a(g); 465 struct device *d = dev_from_gk20a(g);
504 int chid, i, err = 0; 466 int chid, i, err = 0;
505 dma_addr_t iova;
506 467
507 gk20a_dbg_fn(""); 468 gk20a_dbg_fn("");
508 469
@@ -521,43 +482,17 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
521 f->max_engines = ENGINE_INVAL_GK20A; 482 f->max_engines = ENGINE_INVAL_GK20A;
522 483
523 f->userd_entry_size = 1 << ram_userd_base_shift_v(); 484 f->userd_entry_size = 1 << ram_userd_base_shift_v();
524 f->userd_total_size = f->userd_entry_size * f->num_channels;
525 485
526 f->userd.cpuva = dma_alloc_coherent(d, 486 err = gk20a_gmmu_alloc_map(&g->mm.bar1.vm,
527 f->userd_total_size, 487 f->userd_entry_size * f->num_channels,
528 &iova, 488 &f->userd);
529 GFP_KERNEL);
530 if (!f->userd.cpuva) {
531 dev_err(d, "memory allocation failed\n");
532 err = -ENOMEM;
533 goto clean_up;
534 }
535
536 f->userd.iova = iova;
537 err = gk20a_get_sgtable(d, &f->userd.sgt,
538 f->userd.cpuva, f->userd.iova,
539 f->userd_total_size);
540 if (err) { 489 if (err) {
541 dev_err(d, "failed to create sg table\n"); 490 dev_err(d, "memory allocation failed\n");
542 goto clean_up;
543 }
544
545 /* bar1 va */
546 f->userd.gpu_va = gk20a_gmmu_map(&g->mm.bar1.vm,
547 &f->userd.sgt,
548 f->userd_total_size,
549 0, /* flags */
550 gk20a_mem_flag_none);
551 if (!f->userd.gpu_va) {
552 dev_err(d, "gmmu mapping failed\n");
553 err = -ENOMEM;
554 goto clean_up; 491 goto clean_up;
555 } 492 }
556 493
557 gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va); 494 gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va);
558 495
559 f->userd.size = f->userd_total_size;
560
561 f->channel = kzalloc(f->num_channels * sizeof(*f->channel), 496 f->channel = kzalloc(f->num_channels * sizeof(*f->channel),
562 GFP_KERNEL); 497 GFP_KERNEL);
563 f->tsg = kzalloc(f->num_channels * sizeof(*f->tsg), 498 f->tsg = kzalloc(f->num_channels * sizeof(*f->tsg),
@@ -582,9 +517,9 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
582 517
583 for (chid = 0; chid < f->num_channels; chid++) { 518 for (chid = 0; chid < f->num_channels; chid++) {
584 f->channel[chid].userd_cpu_va = 519 f->channel[chid].userd_cpu_va =
585 f->userd.cpuva + chid * f->userd_entry_size; 520 f->userd.cpu_va + chid * f->userd_entry_size;
586 f->channel[chid].userd_iova = 521 f->channel[chid].userd_iova =
587 gk20a_mm_smmu_vaddr_translate(g, f->userd.iova) 522 gk20a_mm_iova_addr(g, f->userd.sgt->sgl)
588 + chid * f->userd_entry_size; 523 + chid * f->userd_entry_size;
589 f->channel[chid].userd_gpu_va = 524 f->channel[chid].userd_gpu_va =
590 f->userd.gpu_va + chid * f->userd_entry_size; 525 f->userd.gpu_va + chid * f->userd_entry_size;
@@ -607,22 +542,7 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
607 542
608clean_up: 543clean_up:
609 gk20a_dbg_fn("fail"); 544 gk20a_dbg_fn("fail");
610 if (f->userd.gpu_va) 545 gk20a_gmmu_unmap_free(&g->mm.bar1.vm, &f->userd);
611 gk20a_gmmu_unmap(&g->mm.bar1.vm,
612 f->userd.gpu_va,
613 f->userd.size,
614 gk20a_mem_flag_none);
615 if (f->userd.sgt)
616 gk20a_free_sgtable(&f->userd.sgt);
617 if (f->userd.cpuva)
618 dma_free_coherent(d,
619 f->userd_total_size,
620 f->userd.cpuva,
621 f->userd.iova);
622 f->userd.cpuva = NULL;
623 f->userd.iova = 0;
624
625 memset(&f->userd, 0, sizeof(struct userd_desc));
626 546
627 kfree(f->channel); 547 kfree(f->channel);
628 f->channel = NULL; 548 f->channel = NULL;
@@ -657,7 +577,7 @@ static int gk20a_init_fifo_setup_hw(struct gk20a *g)
657 u32 v, v1 = 0x33, v2 = 0x55; 577 u32 v, v1 = 0x33, v2 = 0x55;
658 578
659 u32 bar1_vaddr = f->userd.gpu_va; 579 u32 bar1_vaddr = f->userd.gpu_va;
660 volatile u32 *cpu_vaddr = f->userd.cpuva; 580 volatile u32 *cpu_vaddr = f->userd.cpu_va;
661 581
662 gk20a_dbg_info("test bar1 @ vaddr 0x%x", 582 gk20a_dbg_info("test bar1 @ vaddr 0x%x",
663 bar1_vaddr); 583 bar1_vaddr);
@@ -725,8 +645,8 @@ channel_from_inst_ptr(struct fifo_gk20a *f, u64 inst_ptr)
725 return NULL; 645 return NULL;
726 for (ci = 0; ci < f->num_channels; ci++) { 646 for (ci = 0; ci < f->num_channels; ci++) {
727 struct channel_gk20a *c = f->channel+ci; 647 struct channel_gk20a *c = f->channel+ci;
728 if (c->inst_block.cpuva && 648 if (c->inst_block.cpu_va &&
729 (inst_ptr == c->inst_block.cpu_pa)) 649 (inst_ptr == sg_phys(c->inst_block.sgt->sgl)))
730 return f->channel+ci; 650 return f->channel+ci;
731 } 651 }
732 return NULL; 652 return NULL;
@@ -1082,10 +1002,10 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g)
1082 gk20a_fifo_set_ctx_mmu_error_ch(g, ch); 1002 gk20a_fifo_set_ctx_mmu_error_ch(g, ch);
1083 gk20a_channel_abort(ch); 1003 gk20a_channel_abort(ch);
1084 } else if (f.inst_ptr == 1004 } else if (f.inst_ptr ==
1085 g->mm.bar1.inst_block.cpu_pa) { 1005 sg_phys(g->mm.bar1.inst_block.sgt->sgl)) {
1086 gk20a_err(dev_from_gk20a(g), "mmu fault from bar1"); 1006 gk20a_err(dev_from_gk20a(g), "mmu fault from bar1");
1087 } else if (f.inst_ptr == 1007 } else if (f.inst_ptr ==
1088 g->mm.pmu.inst_block.cpu_pa) { 1008 sg_phys(g->mm.pmu.inst_block.sgt->sgl)) {
1089 gk20a_err(dev_from_gk20a(g), "mmu fault from pmu"); 1009 gk20a_err(dev_from_gk20a(g), "mmu fault from pmu");
1090 } else 1010 } else
1091 gk20a_err(dev_from_gk20a(g), "couldn't locate channel for mmu fault"); 1011 gk20a_err(dev_from_gk20a(g), "couldn't locate channel for mmu fault");
@@ -1893,7 +1813,6 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
1893 bool wait_for_finish) 1813 bool wait_for_finish)
1894{ 1814{
1895 u32 ret = 0; 1815 u32 ret = 0;
1896 struct device *d = dev_from_gk20a(g);
1897 struct fifo_gk20a *f = &g->fifo; 1816 struct fifo_gk20a *f = &g->fifo;
1898 struct fifo_runlist_info_gk20a *runlist = NULL; 1817 struct fifo_runlist_info_gk20a *runlist = NULL;
1899 u32 *runlist_entry_base = NULL; 1818 u32 *runlist_entry_base = NULL;
@@ -1935,15 +1854,15 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
1935 new_buf = !runlist->cur_buffer; 1854 new_buf = !runlist->cur_buffer;
1936 1855
1937 gk20a_dbg_info("runlist_id : %d, switch to new buffer 0x%16llx", 1856 gk20a_dbg_info("runlist_id : %d, switch to new buffer 0x%16llx",
1938 runlist_id, runlist->mem[new_buf].iova); 1857 runlist_id, (u64)gk20a_mem_phys(&runlist->mem[new_buf]));
1939 1858
1940 runlist_pa = gk20a_get_phys_from_iova(d, runlist->mem[new_buf].iova); 1859 runlist_pa = gk20a_mem_phys(&runlist->mem[new_buf]);
1941 if (!runlist_pa) { 1860 if (!runlist_pa) {
1942 ret = -EINVAL; 1861 ret = -EINVAL;
1943 goto clean_up; 1862 goto clean_up;
1944 } 1863 }
1945 1864
1946 runlist_entry_base = runlist->mem[new_buf].cpuva; 1865 runlist_entry_base = runlist->mem[new_buf].cpu_va;
1947 if (!runlist_entry_base) { 1866 if (!runlist_entry_base) {
1948 ret = -ENOMEM; 1867 ret = -ENOMEM;
1949 goto clean_up; 1868 goto clean_up;