summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2015-03-21 11:10:59 -0400
committerAlexander Van Brunt <avanbrunt@nvidia.com>2015-05-05 16:57:34 -0400
commit2204f2a524af40d8e5fa0f3d764b61fefa397989 (patch)
tree25b2cb694526c4cb8bb484288db5953642cd842d /drivers/gpu/nvgpu/gk20a
parent5486503343ca9db24d7ce4acd4f7cf22e1f515f2 (diff)
gpu: nvgpu: Use common allocator for patch
Reduce amount of duplicate code around memory allocation by using common helpers, and common data structure for storing results of allocations. Bug 1605769 Change-Id: Idf51831e8be9cabe1ab9122b18317137fde6339f Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/721030 Reviewed-on: http://git-master/r/737530 Reviewed-by: Alexander Van Brunt <avanbrunt@nvidia.com> Tested-by: Alexander Van Brunt <avanbrunt@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c95
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c1
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h6
3 files changed, 25 insertions, 77 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 49b70767..293ccd97 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -597,16 +597,16 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
597 struct channel_ctx_gk20a *ch_ctx) 597 struct channel_ctx_gk20a *ch_ctx)
598{ 598{
599 /* being defensive still... */ 599 /* being defensive still... */
600 if (WARN_ON(ch_ctx->patch_ctx.cpu_va)) { 600 if (WARN_ON(ch_ctx->patch_ctx.mem.cpu_va)) {
601 gk20a_err(dev_from_gk20a(g), "nested ctx patch begin?"); 601 gk20a_err(dev_from_gk20a(g), "nested ctx patch begin?");
602 return -EBUSY; 602 return -EBUSY;
603 } 603 }
604 604
605 ch_ctx->patch_ctx.cpu_va = vmap(ch_ctx->patch_ctx.pages, 605 ch_ctx->patch_ctx.mem.cpu_va = vmap(ch_ctx->patch_ctx.mem.pages,
606 PAGE_ALIGN(ch_ctx->patch_ctx.size) >> PAGE_SHIFT, 606 PAGE_ALIGN(ch_ctx->patch_ctx.mem.size) >> PAGE_SHIFT,
607 0, pgprot_writecombine(PAGE_KERNEL)); 607 0, pgprot_writecombine(PAGE_KERNEL));
608 608
609 if (!ch_ctx->patch_ctx.cpu_va) 609 if (!ch_ctx->patch_ctx.mem.cpu_va)
610 return -ENOMEM; 610 return -ENOMEM;
611 611
612 return 0; 612 return 0;
@@ -616,13 +616,13 @@ int gr_gk20a_ctx_patch_write_end(struct gk20a *g,
616 struct channel_ctx_gk20a *ch_ctx) 616 struct channel_ctx_gk20a *ch_ctx)
617{ 617{
618 /* being defensive still... */ 618 /* being defensive still... */
619 if (!ch_ctx->patch_ctx.cpu_va) { 619 if (!ch_ctx->patch_ctx.mem.cpu_va) {
620 gk20a_err(dev_from_gk20a(g), "dangling ctx patch end?"); 620 gk20a_err(dev_from_gk20a(g), "dangling ctx patch end?");
621 return -EINVAL; 621 return -EINVAL;
622 } 622 }
623 623
624 vunmap(ch_ctx->patch_ctx.cpu_va); 624 vunmap(ch_ctx->patch_ctx.mem.cpu_va);
625 ch_ctx->patch_ctx.cpu_va = NULL; 625 ch_ctx->patch_ctx.mem.cpu_va = NULL;
626 return 0; 626 return 0;
627} 627}
628 628
@@ -642,7 +642,7 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g,
642 /* we added an optimization prolog, epilog 642 /* we added an optimization prolog, epilog
643 * to get rid of unnecessary maps and l2 invals. 643 * to get rid of unnecessary maps and l2 invals.
644 * but be defensive still... */ 644 * but be defensive still... */
645 if (!ch_ctx->patch_ctx.cpu_va) { 645 if (!ch_ctx->patch_ctx.mem.cpu_va) {
646 int err; 646 int err;
647 gk20a_err(dev_from_gk20a(g), 647 gk20a_err(dev_from_gk20a(g),
648 "per-write ctx patch begin?"); 648 "per-write ctx patch begin?");
@@ -654,7 +654,7 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g,
654 } else 654 } else
655 mapped_here = false; 655 mapped_here = false;
656 656
657 patch_ptr = ch_ctx->patch_ctx.cpu_va; 657 patch_ptr = ch_ctx->patch_ctx.mem.cpu_va;
658 patch_slot = ch_ctx->patch_ctx.data_count * 2; 658 patch_slot = ch_ctx->patch_ctx.data_count * 2;
659 659
660 gk20a_mem_wr32(patch_ptr, patch_slot++, addr); 660 gk20a_mem_wr32(patch_ptr, patch_slot++, addr);
@@ -1622,8 +1622,8 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1622 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0); 1622 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0);
1623 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0); 1623 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0);
1624 1624
1625 virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.gpu_va); 1625 virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va);
1626 virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.gpu_va); 1626 virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va);
1627 1627
1628 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_count_o(), 0, 1628 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_count_o(), 0,
1629 ch_ctx->patch_ctx.data_count); 1629 ch_ctx->patch_ctx.data_count);
@@ -2568,82 +2568,33 @@ static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
2568 struct channel_gk20a *c) 2568 struct channel_gk20a *c)
2569{ 2569{
2570 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; 2570 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
2571 struct device *d = dev_from_gk20a(g);
2572 struct vm_gk20a *ch_vm = c->vm; 2571 struct vm_gk20a *ch_vm = c->vm;
2573 DEFINE_DMA_ATTRS(attrs);
2574 struct sg_table *sgt;
2575 int err = 0; 2572 int err = 0;
2576 dma_addr_t iova;
2577 2573
2578 gk20a_dbg_fn(""); 2574 gk20a_dbg_fn("");
2579 2575
2580 patch_ctx->size = 128 * sizeof(u32); 2576 err = gk20a_gmmu_alloc_map_attr(ch_vm, DMA_ATTR_NO_KERNEL_MAPPING,
2581 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); 2577 128 * sizeof(u32), &patch_ctx->mem);
2582 patch_ctx->pages = dma_alloc_attrs(d, patch_ctx->size,
2583 &iova, GFP_KERNEL,
2584 &attrs);
2585 if (!patch_ctx->pages)
2586 return -ENOMEM;
2587
2588 patch_ctx->iova = iova;
2589 err = gk20a_get_sgtable_from_pages(d, &sgt, patch_ctx->pages,
2590 patch_ctx->iova, patch_ctx->size);
2591 if (err) 2578 if (err)
2592 goto err_free; 2579 return err;
2593
2594 patch_ctx->gpu_va = gk20a_gmmu_map(ch_vm, &sgt, patch_ctx->size,
2595 0, gk20a_mem_flag_none);
2596 if (!patch_ctx->gpu_va)
2597 goto err_free_sgtable;
2598
2599 gk20a_free_sgtable(&sgt);
2600 2580
2601 gk20a_dbg_fn("done"); 2581 gk20a_dbg_fn("done");
2602 return 0; 2582 return 0;
2603
2604 err_free_sgtable:
2605 gk20a_free_sgtable(&sgt);
2606 err_free:
2607 dma_free_attrs(d, patch_ctx->size,
2608 patch_ctx->pages, patch_ctx->iova, &attrs);
2609 patch_ctx->pages = NULL;
2610 patch_ctx->iova = 0;
2611 gk20a_err(dev_from_gk20a(g), "fail");
2612 return err;
2613}
2614
2615static void gr_gk20a_unmap_channel_patch_ctx(struct channel_gk20a *c)
2616{
2617 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
2618 struct vm_gk20a *ch_vm = c->vm;
2619
2620 gk20a_dbg_fn("");
2621
2622 if (patch_ctx->gpu_va)
2623 gk20a_gmmu_unmap(ch_vm, patch_ctx->gpu_va,
2624 patch_ctx->size, gk20a_mem_flag_none);
2625 patch_ctx->gpu_va = 0;
2626 patch_ctx->data_count = 0;
2627} 2583}
2628 2584
2629static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c) 2585static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c)
2630{ 2586{
2631 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; 2587 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
2632 struct gk20a *g = c->g; 2588 struct gk20a *g = c->g;
2633 struct device *d = dev_from_gk20a(g);
2634 DEFINE_DMA_ATTRS(attrs);
2635 2589
2636 gk20a_dbg_fn(""); 2590 gk20a_dbg_fn("");
2637 2591
2638 gr_gk20a_unmap_channel_patch_ctx(c); 2592 if (patch_ctx->mem.gpu_va)
2593 gk20a_gmmu_unmap(c->vm, patch_ctx->mem.gpu_va,
2594 patch_ctx->mem.size, gk20a_mem_flag_none);
2639 2595
2640 if (patch_ctx->pages) { 2596 gk20a_gmmu_free_attr(g, DMA_ATTR_NO_KERNEL_MAPPING, &patch_ctx->mem);
2641 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); 2597 patch_ctx->data_count = 0;
2642 dma_free_attrs(d, patch_ctx->size,
2643 patch_ctx->pages, patch_ctx->iova, &attrs);
2644 patch_ctx->pages = NULL;
2645 patch_ctx->iova = 0;
2646 }
2647} 2598}
2648 2599
2649void gk20a_free_channel_ctx(struct channel_gk20a *c) 2600void gk20a_free_channel_ctx(struct channel_gk20a *c)
@@ -2757,7 +2708,7 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
2757 } 2708 }
2758 2709
2759 /* allocate patch buffer */ 2710 /* allocate patch buffer */
2760 if (ch_ctx->patch_ctx.pages == NULL) { 2711 if (ch_ctx->patch_ctx.mem.sgt == NULL) {
2761 err = gr_gk20a_alloc_channel_patch_ctx(g, c); 2712 err = gr_gk20a_alloc_channel_patch_ctx(g, c);
2762 if (err) { 2713 if (err) {
2763 gk20a_err(dev_from_gk20a(g), 2714 gk20a_err(dev_from_gk20a(g),
@@ -2883,7 +2834,7 @@ int gk20a_free_obj_ctx(struct channel_gk20a *c,
2883 gk20a_disable_channel(c, 2834 gk20a_disable_channel(c,
2884 !c->has_timedout, 2835 !c->has_timedout,
2885 timeout); 2836 timeout);
2886 gr_gk20a_unmap_channel_patch_ctx(c); 2837 gr_gk20a_free_channel_patch_ctx(c);
2887 } 2838 }
2888 2839
2889 return 0; 2840 return 0;
@@ -6081,8 +6032,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6081 gr_gk20a_ctx_patch_write(g, ch_ctx, 6032 gr_gk20a_ctx_patch_write(g, ch_ctx,
6082 addr, data, true); 6033 addr, data, true);
6083 6034
6084 vaddr_lo = u64_lo32(ch_ctx->patch_ctx.gpu_va); 6035 vaddr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va);
6085 vaddr_hi = u64_hi32(ch_ctx->patch_ctx.gpu_va); 6036 vaddr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va);
6086 6037
6087 gk20a_mem_wr32(context + 6038 gk20a_mem_wr32(context +
6088 ctxsw_prog_main_image_patch_count_o(), 6039 ctxsw_prog_main_image_patch_count_o(),
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index fa84337b..bde8afae 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1616,6 +1616,7 @@ void gk20a_gmmu_free_attr(struct gk20a *g, enum dma_attr attr,
1616 } 1616 }
1617 1617
1618 mem->cpu_va = NULL; 1618 mem->cpu_va = NULL;
1619 mem->pages = NULL;
1619 } 1620 }
1620 1621
1621 if (mem->sgt) 1622 if (mem->sgt)
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 91659537..df9ebec3 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -68,11 +68,7 @@ struct gpfifo_desc {
68}; 68};
69 69
70struct patch_desc { 70struct patch_desc {
71 struct page **pages; 71 struct mem_desc mem;
72 u64 iova;
73 size_t size;
74 void *cpu_va;
75 u64 gpu_va;
76 u32 data_count; 72 u32 data_count;
77}; 73};
78 74