summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-07-21 16:11:56 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-07-27 14:14:59 -0400
commitea180847e146450b53de5c41c3f43b19391b4c5d (patch)
treeef4782d69605a6632bbc6b19212263d6cb9abf97 /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parent1b5035132b8f28cb05a21eca4d2f7a4771e59914 (diff)
gpu: nvgpu: Fix gr ctx unmap logic
The GR context buffers were not being properly unmapped. The awkward VPR vs non-VPR context setup requires some extra checks when determining which nvgpu_mem is associated with what GPU VA (which are tracked separately in a different sized array). Change-Id: I4c7be1c5b7835aea4309a142df5b0bdfaae91e4c Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1524689 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c74
1 files changed, 45 insertions, 29 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index cc57b09f..d7c80b91 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -2581,24 +2581,60 @@ static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
2581 return -ENOMEM; 2581 return -ENOMEM;
2582} 2582}
2583 2583
2584static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c)
2585{
2586 struct vm_gk20a *ch_vm = c->vm;
2587 struct gr_gk20a *gr = &c->g->gr;
2588 u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
2589 u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
2590 int *g_bfr_index = c->ch_ctx.global_ctx_buffer_index;
2591 u32 i;
2592
2593 gk20a_dbg_fn("");
2594
2595 for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
2596 if (g_bfr_index[i]) {
2597 struct nvgpu_mem *mem;
2598
2599 /*
2600 * Translate from VA index to buffer index to determine
2601 * the correct struct nvgpu_mem to use. Handles the VPR
2602 * vs non-VPR difference in context images.
2603 */
2604 mem = &gr->global_ctx_buffer[g_bfr_index[i]].mem;
2605
2606 nvgpu_gmmu_unmap(ch_vm, mem, g_bfr_va[i]);
2607 }
2608 }
2609
2610 memset(g_bfr_va, 0, sizeof(c->ch_ctx.global_ctx_buffer_va));
2611 memset(g_bfr_size, 0, sizeof(c->ch_ctx.global_ctx_buffer_size));
2612 memset(g_bfr_index, 0, sizeof(c->ch_ctx.global_ctx_buffer_index));
2613
2614 c->ch_ctx.global_ctx_buffer_mapped = false;
2615}
2616
2584static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, 2617static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
2585 struct channel_gk20a *c) 2618 struct channel_gk20a *c)
2586{ 2619{
2587 struct vm_gk20a *ch_vm = c->vm; 2620 struct vm_gk20a *ch_vm = c->vm;
2588 u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; 2621 u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
2589 u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; 2622 u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
2623 int *g_bfr_index = c->ch_ctx.global_ctx_buffer_index;
2590 struct gr_gk20a *gr = &g->gr; 2624 struct gr_gk20a *gr = &g->gr;
2591 struct nvgpu_mem *mem; 2625 struct nvgpu_mem *mem;
2592 u64 gpu_va; 2626 u64 gpu_va;
2593 u32 i; 2627
2594 gk20a_dbg_fn(""); 2628 gk20a_dbg_fn("");
2595 2629
2596 /* Circular Buffer */ 2630 /* Circular Buffer */
2597 if (!c->vpr || 2631 if (!c->vpr ||
2598 (gr->global_ctx_buffer[CIRCULAR_VPR].mem.priv.sgt == NULL)) { 2632 (gr->global_ctx_buffer[CIRCULAR_VPR].mem.priv.sgt == NULL)) {
2599 mem = &gr->global_ctx_buffer[CIRCULAR].mem; 2633 mem = &gr->global_ctx_buffer[CIRCULAR].mem;
2634 g_bfr_index[CIRCULAR_VA] = CIRCULAR;
2600 } else { 2635 } else {
2601 mem = &gr->global_ctx_buffer[CIRCULAR_VPR].mem; 2636 mem = &gr->global_ctx_buffer[CIRCULAR_VPR].mem;
2637 g_bfr_index[CIRCULAR_VA] = CIRCULAR_VPR;
2602 } 2638 }
2603 2639
2604 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 2640 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size,
@@ -2613,8 +2649,10 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
2613 if (!c->vpr || 2649 if (!c->vpr ||
2614 (gr->global_ctx_buffer[ATTRIBUTE_VPR].mem.priv.sgt == NULL)) { 2650 (gr->global_ctx_buffer[ATTRIBUTE_VPR].mem.priv.sgt == NULL)) {
2615 mem = &gr->global_ctx_buffer[ATTRIBUTE].mem; 2651 mem = &gr->global_ctx_buffer[ATTRIBUTE].mem;
2652 g_bfr_index[ATTRIBUTE_VA] = ATTRIBUTE;
2616 } else { 2653 } else {
2617 mem = &gr->global_ctx_buffer[ATTRIBUTE_VPR].mem; 2654 mem = &gr->global_ctx_buffer[ATTRIBUTE_VPR].mem;
2655 g_bfr_index[ATTRIBUTE_VA] = ATTRIBUTE_VPR;
2618 } 2656 }
2619 2657
2620 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 2658 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size,
@@ -2629,8 +2667,10 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
2629 if (!c->vpr || 2667 if (!c->vpr ||
2630 (gr->global_ctx_buffer[PAGEPOOL_VPR].mem.priv.sgt == NULL)) { 2668 (gr->global_ctx_buffer[PAGEPOOL_VPR].mem.priv.sgt == NULL)) {
2631 mem = &gr->global_ctx_buffer[PAGEPOOL].mem; 2669 mem = &gr->global_ctx_buffer[PAGEPOOL].mem;
2670 g_bfr_index[PAGEPOOL_VA] = PAGEPOOL;
2632 } else { 2671 } else {
2633 mem = &gr->global_ctx_buffer[PAGEPOOL_VPR].mem; 2672 mem = &gr->global_ctx_buffer[PAGEPOOL_VPR].mem;
2673 g_bfr_index[PAGEPOOL_VA] = PAGEPOOL_VPR;
2634 } 2674 }
2635 2675
2636 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 2676 gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size,
@@ -2649,6 +2689,7 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
2649 goto clean_up; 2689 goto clean_up;
2650 g_bfr_va[GOLDEN_CTX_VA] = gpu_va; 2690 g_bfr_va[GOLDEN_CTX_VA] = gpu_va;
2651 g_bfr_size[GOLDEN_CTX_VA] = mem->size; 2691 g_bfr_size[GOLDEN_CTX_VA] = mem->size;
2692 g_bfr_index[GOLDEN_CTX_VA] = GOLDEN_CTX;
2652 2693
2653 /* Priv register Access Map */ 2694 /* Priv register Access Map */
2654 mem = &gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem; 2695 mem = &gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem;
@@ -2658,40 +2699,15 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
2658 goto clean_up; 2699 goto clean_up;
2659 g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va; 2700 g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va;
2660 g_bfr_size[PRIV_ACCESS_MAP_VA] = mem->size; 2701 g_bfr_size[PRIV_ACCESS_MAP_VA] = mem->size;
2702 g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP;
2661 2703
2662 c->ch_ctx.global_ctx_buffer_mapped = true; 2704 c->ch_ctx.global_ctx_buffer_mapped = true;
2663 return 0; 2705 return 0;
2664 2706
2665clean_up: 2707clean_up:
2666 for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) { 2708 gr_gk20a_unmap_global_ctx_buffers(c);
2667 if (g_bfr_va[i]) {
2668 nvgpu_gmmu_unmap(ch_vm, &gr->global_ctx_buffer[i].mem,
2669 g_bfr_va[i]);
2670 g_bfr_va[i] = 0;
2671 }
2672 }
2673 return -ENOMEM;
2674}
2675
2676static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c)
2677{
2678 struct vm_gk20a *ch_vm = c->vm;
2679 struct gr_gk20a *gr = &c->g->gr;
2680 u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
2681 u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
2682 u32 i;
2683
2684 gk20a_dbg_fn("");
2685 2709
2686 for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) { 2710 return -ENOMEM;
2687 if (g_bfr_va[i]) {
2688 nvgpu_gmmu_unmap(ch_vm, &gr->global_ctx_buffer[i].mem,
2689 g_bfr_va[i]);
2690 g_bfr_va[i] = 0;
2691 g_bfr_size[i] = 0;
2692 }
2693 }
2694 c->ch_ctx.global_ctx_buffer_mapped = false;
2695} 2711}
2696 2712
2697int gr_gk20a_alloc_gr_ctx(struct gk20a *g, 2713int gr_gk20a_alloc_gr_ctx(struct gk20a *g,