summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2014-06-18 09:02:03 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:10:17 -0400
commitee66559a0b3b82b3dc9be684261ddd0954731ff5 (patch)
tree34156c1d4f3393a5a5fe945185b8548ae2427a07 /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parentb6466fbe07d28fcc1a2ea93715a1f88b48dd8550 (diff)
gpu: nvgpu: add TSG support for engine context
All channels in a TSG need to share same engine context i.e. pointer in RAMFC of all channels in a TSG must point to same NV_RAMIN_GR_WFI_TARGET To get this, add a pointer to gr_ctx inside TSG struct so that TSG can maintain its own unique gr_ctx Also, change the type of gr_ctx in a channel to pointer variable so that if channel is part of TSG it can point to TSG's gr_ctx otherwise it will point to its own gr_ctx In gk20a_alloc_obj_ctx(), allocate gr_ctx as below : 1) If channel is not part of any TSG - allocate its own gr_ctx buffer if it is already not allocated 2) If channel is part of TSG - Check if TSG has already allocated gr_ctx (as part of TSG) - If yes, channel's gr_ctx will point to that of TSG's - If not, then it means channels is first to be bounded to this TSG - And in this case we will allocate new gr_ctx on TSG first and then make channel's gr_ctx to point to this gr_ctx Also, gr_ctx will be released as below ; 1) If channels is not part of TSG, then it will be released when channels is closed 2) Otherwise, it will be released when TSG itself is closed Bug 1470692 Change-Id: Id347217d5b462e0e972cd3d79d17795b37034a50 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/417065 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c155
1 files changed, 113 insertions, 42 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 0e178e9e..4a6dd6c5 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -801,8 +801,8 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
801 801
802 gk20a_dbg_fn(""); 802 gk20a_dbg_fn("");
803 803
804 ctx_ptr = vmap(ch_ctx->gr_ctx.pages, 804 ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
805 PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, 805 PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
806 0, pgprot_dmacoherent(PAGE_KERNEL)); 806 0, pgprot_dmacoherent(PAGE_KERNEL));
807 if (!ctx_ptr) 807 if (!ctx_ptr)
808 return -ENOMEM; 808 return -ENOMEM;
@@ -1562,8 +1562,8 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1562 if (!gold_ptr) 1562 if (!gold_ptr)
1563 goto clean_up; 1563 goto clean_up;
1564 1564
1565 ctx_ptr = vmap(ch_ctx->gr_ctx.pages, 1565 ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
1566 PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, 1566 PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
1567 0, pgprot_dmacoherent(PAGE_KERNEL)); 1567 0, pgprot_dmacoherent(PAGE_KERNEL));
1568 if (!ctx_ptr) 1568 if (!ctx_ptr)
1569 goto clean_up; 1569 goto clean_up;
@@ -1602,7 +1602,7 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1602 gk20a_mem_rd32(gold_ptr, i); 1602 gk20a_mem_rd32(gold_ptr, i);
1603 } 1603 }
1604 1604
1605 gr_gk20a_commit_inst(c, ch_ctx->gr_ctx.gpu_va); 1605 gr_gk20a_commit_inst(c, ch_ctx->gr_ctx->gpu_va);
1606 1606
1607 gr->ctx_vars.golden_image_initialized = true; 1607 gr->ctx_vars.golden_image_initialized = true;
1608 1608
@@ -1636,8 +1636,8 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1636 Flush and invalidate before cpu update. */ 1636 Flush and invalidate before cpu update. */
1637 gk20a_mm_l2_flush(g, true); 1637 gk20a_mm_l2_flush(g, true);
1638 1638
1639 ctx_ptr = vmap(ch_ctx->gr_ctx.pages, 1639 ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
1640 PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, 1640 PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
1641 0, pgprot_dmacoherent(PAGE_KERNEL)); 1641 0, pgprot_dmacoherent(PAGE_KERNEL));
1642 if (!ctx_ptr) 1642 if (!ctx_ptr)
1643 return -ENOMEM; 1643 return -ENOMEM;
@@ -1676,8 +1676,8 @@ static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1676 Flush and invalidate before cpu update. */ 1676 Flush and invalidate before cpu update. */
1677 gk20a_mm_l2_flush(g, true); 1677 gk20a_mm_l2_flush(g, true);
1678 1678
1679 ctx_ptr = vmap(ch_ctx->gr_ctx.pages, 1679 ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
1680 PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, 1680 PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
1681 0, pgprot_dmacoherent(PAGE_KERNEL)); 1681 0, pgprot_dmacoherent(PAGE_KERNEL));
1682 if (!ctx_ptr) 1682 if (!ctx_ptr)
1683 return -ENOMEM; 1683 return -ENOMEM;
@@ -2521,12 +2521,11 @@ static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c)
2521 c->ch_ctx.global_ctx_buffer_mapped = false; 2521 c->ch_ctx.global_ctx_buffer_mapped = false;
2522} 2522}
2523 2523
2524static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, 2524static int __gr_gk20a_alloc_gr_ctx(struct gk20a *g,
2525 struct channel_gk20a *c) 2525 struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm)
2526{ 2526{
2527 struct gr_ctx_desc *gr_ctx = NULL;
2527 struct gr_gk20a *gr = &g->gr; 2528 struct gr_gk20a *gr = &g->gr;
2528 struct gr_ctx_desc *gr_ctx = &c->ch_ctx.gr_ctx;
2529 struct vm_gk20a *ch_vm = c->vm;
2530 struct device *d = dev_from_gk20a(g); 2529 struct device *d = dev_from_gk20a(g);
2531 struct sg_table *sgt; 2530 struct sg_table *sgt;
2532 DEFINE_DMA_ATTRS(attrs); 2531 DEFINE_DMA_ATTRS(attrs);
@@ -2542,12 +2541,18 @@ static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
2542 gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; 2541 gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
2543 gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; 2542 gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
2544 2543
2544 gr_ctx = kzalloc(sizeof(*gr_ctx), GFP_KERNEL);
2545 if (!gr_ctx)
2546 return -ENOMEM;
2547
2545 gr_ctx->size = gr->ctx_vars.buffer_total_size; 2548 gr_ctx->size = gr->ctx_vars.buffer_total_size;
2546 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); 2549 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
2547 gr_ctx->pages = dma_alloc_attrs(d, gr_ctx->size, 2550 gr_ctx->pages = dma_alloc_attrs(d, gr_ctx->size,
2548 &iova, GFP_KERNEL, &attrs); 2551 &iova, GFP_KERNEL, &attrs);
2549 if (!gr_ctx->pages) 2552 if (!gr_ctx->pages) {
2550 return -ENOMEM; 2553 err = -ENOMEM;
2554 goto err_free_ctx;
2555 }
2551 2556
2552 gr_ctx->iova = iova; 2557 gr_ctx->iova = iova;
2553 err = gk20a_get_sgtable_from_pages(d, &sgt, gr_ctx->pages, 2558 err = gk20a_get_sgtable_from_pages(d, &sgt, gr_ctx->pages,
@@ -2555,7 +2560,7 @@ static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
2555 if (err) 2560 if (err)
2556 goto err_free; 2561 goto err_free;
2557 2562
2558 gr_ctx->gpu_va = gk20a_gmmu_map(ch_vm, &sgt, gr_ctx->size, 2563 gr_ctx->gpu_va = gk20a_gmmu_map(vm, &sgt, gr_ctx->size,
2559 NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, 2564 NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
2560 gk20a_mem_flag_none); 2565 gk20a_mem_flag_none);
2561 if (!gr_ctx->gpu_va) 2566 if (!gr_ctx->gpu_va)
@@ -2563,6 +2568,8 @@ static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
2563 2568
2564 gk20a_free_sgtable(&sgt); 2569 gk20a_free_sgtable(&sgt);
2565 2570
2571 *__gr_ctx = gr_ctx;
2572
2566 return 0; 2573 return 0;
2567 2574
2568 err_free_sgt: 2575 err_free_sgt:
@@ -2572,30 +2579,74 @@ static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
2572 gr_ctx->pages, gr_ctx->iova, &attrs); 2579 gr_ctx->pages, gr_ctx->iova, &attrs);
2573 gr_ctx->pages = NULL; 2580 gr_ctx->pages = NULL;
2574 gr_ctx->iova = 0; 2581 gr_ctx->iova = 0;
2582 err_free_ctx:
2583 kfree(gr_ctx);
2584 gr_ctx = NULL;
2575 2585
2576 return err; 2586 return err;
2577} 2587}
2578 2588
2579static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c) 2589static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g,
2590 struct tsg_gk20a *tsg)
2591{
2592 struct gr_ctx_desc **gr_ctx = &tsg->tsg_gr_ctx;
2593 int err;
2594
2595 if (!tsg->vm) {
2596 gk20a_err(dev_from_gk20a(tsg->g), "No address space bound\n");
2597 return -ENOMEM;
2598 }
2599
2600 err = __gr_gk20a_alloc_gr_ctx(g, gr_ctx, tsg->vm);
2601 if (err)
2602 return err;
2603
2604 return 0;
2605}
2606
2607static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
2608 struct channel_gk20a *c)
2609{
2610 struct gr_ctx_desc **gr_ctx = &c->ch_ctx.gr_ctx;
2611 int err = __gr_gk20a_alloc_gr_ctx(g, gr_ctx, c->vm);
2612 if (err)
2613 return err;
2614
2615 return 0;
2616}
2617
2618static void __gr_gk20a_free_gr_ctx(struct gk20a *g,
2619 struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx)
2580{ 2620{
2581 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
2582 struct vm_gk20a *ch_vm = c->vm;
2583 struct gk20a *g = c->g;
2584 struct device *d = dev_from_gk20a(g); 2621 struct device *d = dev_from_gk20a(g);
2585 DEFINE_DMA_ATTRS(attrs); 2622 DEFINE_DMA_ATTRS(attrs);
2586 2623
2587 gk20a_dbg_fn(""); 2624 gk20a_dbg_fn("");
2588 2625
2589 if (!ch_ctx->gr_ctx.gpu_va) 2626 if (!gr_ctx || !gr_ctx->gpu_va)
2590 return; 2627 return;
2591 2628
2592 gk20a_gmmu_unmap(ch_vm, ch_ctx->gr_ctx.gpu_va, 2629 gk20a_gmmu_unmap(vm, gr_ctx->gpu_va,
2593 ch_ctx->gr_ctx.size, gk20a_mem_flag_none); 2630 gr_ctx->size, gk20a_mem_flag_none);
2594 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); 2631 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
2595 dma_free_attrs(d, ch_ctx->gr_ctx.size, 2632 dma_free_attrs(d, gr_ctx->size,
2596 ch_ctx->gr_ctx.pages, ch_ctx->gr_ctx.iova, &attrs); 2633 gr_ctx->pages, gr_ctx->iova, &attrs);
2597 ch_ctx->gr_ctx.pages = NULL; 2634 gr_ctx->pages = NULL;
2598 ch_ctx->gr_ctx.iova = 0; 2635 gr_ctx->iova = 0;
2636}
2637
2638void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg)
2639{
2640 if (!tsg->vm) {
2641 gk20a_err(dev_from_gk20a(tsg->g), "No address space bound\n");
2642 return;
2643 }
2644 __gr_gk20a_free_gr_ctx(tsg->g, tsg->vm, tsg->tsg_gr_ctx);
2645}
2646
2647static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c)
2648{
2649 __gr_gk20a_free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx);
2599} 2650}
2600 2651
2601static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, 2652static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
@@ -2684,7 +2735,8 @@ void gk20a_free_channel_ctx(struct channel_gk20a *c)
2684{ 2735{
2685 gr_gk20a_unmap_global_ctx_buffers(c); 2736 gr_gk20a_unmap_global_ctx_buffers(c);
2686 gr_gk20a_free_channel_patch_ctx(c); 2737 gr_gk20a_free_channel_patch_ctx(c);
2687 gr_gk20a_free_channel_gr_ctx(c); 2738 if (!gk20a_is_channel_marked_as_tsg(c))
2739 gr_gk20a_free_channel_gr_ctx(c);
2688 2740
2689 /* zcull_ctx, pm_ctx */ 2741 /* zcull_ctx, pm_ctx */
2690 2742
@@ -2717,7 +2769,9 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
2717 struct nvhost_alloc_obj_ctx_args *args) 2769 struct nvhost_alloc_obj_ctx_args *args)
2718{ 2770{
2719 struct gk20a *g = c->g; 2771 struct gk20a *g = c->g;
2772 struct fifo_gk20a *f = &g->fifo;
2720 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 2773 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
2774 struct tsg_gk20a *tsg = NULL;
2721 int err = 0; 2775 int err = 0;
2722 2776
2723 gk20a_dbg_fn(""); 2777 gk20a_dbg_fn("");
@@ -2736,27 +2790,44 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
2736 err = -EINVAL; 2790 err = -EINVAL;
2737 goto out; 2791 goto out;
2738 } 2792 }
2793 c->obj_class = args->class_num;
2794
2795 if (gk20a_is_channel_marked_as_tsg(c))
2796 tsg = &f->tsg[c->tsgid];
2739 2797
2740 /* allocate gr ctx buffer */ 2798 /* allocate gr ctx buffer */
2741 if (ch_ctx->gr_ctx.pages == NULL) { 2799 if (!tsg) {
2742 err = gr_gk20a_alloc_channel_gr_ctx(g, c); 2800 if (!ch_ctx->gr_ctx) {
2743 if (err) { 2801 err = gr_gk20a_alloc_channel_gr_ctx(g, c);
2802 if (err) {
2803 gk20a_err(dev_from_gk20a(g),
2804 "fail to allocate gr ctx buffer");
2805 goto out;
2806 }
2807 } else {
2808 /*TBD: needs to be more subtle about which is
2809 * being allocated as some are allowed to be
2810 * allocated along same channel */
2744 gk20a_err(dev_from_gk20a(g), 2811 gk20a_err(dev_from_gk20a(g),
2745 "fail to allocate gr ctx buffer"); 2812 "too many classes alloc'd on same channel");
2813 err = -EINVAL;
2746 goto out; 2814 goto out;
2747 } 2815 }
2748 c->obj_class = args->class_num;
2749 } else { 2816 } else {
2750 /*TBD: needs to be more subtle about which is being allocated 2817 if (!tsg->tsg_gr_ctx) {
2751 * as some are allowed to be allocated along same channel */ 2818 tsg->vm = c->vm;
2752 gk20a_err(dev_from_gk20a(g), 2819 err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg);
2753 "too many classes alloc'd on same channel"); 2820 if (err) {
2754 err = -EINVAL; 2821 gk20a_err(dev_from_gk20a(g),
2755 goto out; 2822 "fail to allocate TSG gr ctx buffer");
2823 goto out;
2824 }
2825 }
2826 ch_ctx->gr_ctx = tsg->tsg_gr_ctx;
2756 } 2827 }
2757 2828
2758 /* commit gr ctx buffer */ 2829 /* commit gr ctx buffer */
2759 err = gr_gk20a_commit_inst(c, ch_ctx->gr_ctx.gpu_va); 2830 err = gr_gk20a_commit_inst(c, ch_ctx->gr_ctx->gpu_va);
2760 if (err) { 2831 if (err) {
2761 gk20a_err(dev_from_gk20a(g), 2832 gk20a_err(dev_from_gk20a(g),
2762 "fail to commit gr ctx buffer"); 2833 "fail to commit gr ctx buffer");
@@ -6657,8 +6728,8 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
6657 6728
6658 /* would have been a variant of gr_gk20a_apply_instmem_overrides */ 6729 /* would have been a variant of gr_gk20a_apply_instmem_overrides */
6659 /* recoded in-place instead.*/ 6730 /* recoded in-place instead.*/
6660 ctx_ptr = vmap(ch_ctx->gr_ctx.pages, 6731 ctx_ptr = vmap(ch_ctx->gr_ctx->pages,
6661 PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, 6732 PAGE_ALIGN(ch_ctx->gr_ctx->size) >> PAGE_SHIFT,
6662 0, pgprot_dmacoherent(PAGE_KERNEL)); 6733 0, pgprot_dmacoherent(PAGE_KERNEL));
6663 if (!ctx_ptr) { 6734 if (!ctx_ptr) {
6664 err = -ENOMEM; 6735 err = -ENOMEM;