diff options
author | Alex Waterman <alexw@nvidia.com> | 2017-07-21 16:11:56 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-07-27 14:14:59 -0400 |
commit | ea180847e146450b53de5c41c3f43b19391b4c5d (patch) | |
tree | ef4782d69605a6632bbc6b19212263d6cb9abf97 /drivers/gpu/nvgpu/gk20a | |
parent | 1b5035132b8f28cb05a21eca4d2f7a4771e59914 (diff) |
gpu: nvgpu: Fix gr ctx unmap logic
The GR context buffers were not being properly unmapped. The awkward
VPR vs non-VPR context setup requires some extra checks when determining
which nvgpu_mem is associated with what GPU VA (which are tracked
separately in a different sized array).
Change-Id: I4c7be1c5b7835aea4309a142df5b0bdfaae91e4c
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1524689
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 74 |
2 files changed, 46 insertions, 29 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 46a9b0a7..a9ccd93f 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -57,6 +57,7 @@ struct channel_ctx_gk20a { | |||
57 | struct pm_ctx_desc pm_ctx; | 57 | struct pm_ctx_desc pm_ctx; |
58 | u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA]; | 58 | u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA]; |
59 | u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA]; | 59 | u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA]; |
60 | int global_ctx_buffer_index[NR_GLOBAL_CTX_BUF_VA]; | ||
60 | bool global_ctx_buffer_mapped; | 61 | bool global_ctx_buffer_mapped; |
61 | struct ctx_header_desc ctx_header; | 62 | struct ctx_header_desc ctx_header; |
62 | }; | 63 | }; |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index cc57b09f..d7c80b91 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -2581,24 +2581,60 @@ static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g) | |||
2581 | return -ENOMEM; | 2581 | return -ENOMEM; |
2582 | } | 2582 | } |
2583 | 2583 | ||
2584 | static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c) | ||
2585 | { | ||
2586 | struct vm_gk20a *ch_vm = c->vm; | ||
2587 | struct gr_gk20a *gr = &c->g->gr; | ||
2588 | u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; | ||
2589 | u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; | ||
2590 | int *g_bfr_index = c->ch_ctx.global_ctx_buffer_index; | ||
2591 | u32 i; | ||
2592 | |||
2593 | gk20a_dbg_fn(""); | ||
2594 | |||
2595 | for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) { | ||
2596 | if (g_bfr_index[i]) { | ||
2597 | struct nvgpu_mem *mem; | ||
2598 | |||
2599 | /* | ||
2600 | * Translate from VA index to buffer index to determine | ||
2601 | * the correct struct nvgpu_mem to use. Handles the VPR | ||
2602 | * vs non-VPR difference in context images. | ||
2603 | */ | ||
2604 | mem = &gr->global_ctx_buffer[g_bfr_index[i]].mem; | ||
2605 | |||
2606 | nvgpu_gmmu_unmap(ch_vm, mem, g_bfr_va[i]); | ||
2607 | } | ||
2608 | } | ||
2609 | |||
2610 | memset(g_bfr_va, 0, sizeof(c->ch_ctx.global_ctx_buffer_va)); | ||
2611 | memset(g_bfr_size, 0, sizeof(c->ch_ctx.global_ctx_buffer_size)); | ||
2612 | memset(g_bfr_index, 0, sizeof(c->ch_ctx.global_ctx_buffer_index)); | ||
2613 | |||
2614 | c->ch_ctx.global_ctx_buffer_mapped = false; | ||
2615 | } | ||
2616 | |||
2584 | static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, | 2617 | static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, |
2585 | struct channel_gk20a *c) | 2618 | struct channel_gk20a *c) |
2586 | { | 2619 | { |
2587 | struct vm_gk20a *ch_vm = c->vm; | 2620 | struct vm_gk20a *ch_vm = c->vm; |
2588 | u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; | 2621 | u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; |
2589 | u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; | 2622 | u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; |
2623 | int *g_bfr_index = c->ch_ctx.global_ctx_buffer_index; | ||
2590 | struct gr_gk20a *gr = &g->gr; | 2624 | struct gr_gk20a *gr = &g->gr; |
2591 | struct nvgpu_mem *mem; | 2625 | struct nvgpu_mem *mem; |
2592 | u64 gpu_va; | 2626 | u64 gpu_va; |
2593 | u32 i; | 2627 | |
2594 | gk20a_dbg_fn(""); | 2628 | gk20a_dbg_fn(""); |
2595 | 2629 | ||
2596 | /* Circular Buffer */ | 2630 | /* Circular Buffer */ |
2597 | if (!c->vpr || | 2631 | if (!c->vpr || |
2598 | (gr->global_ctx_buffer[CIRCULAR_VPR].mem.priv.sgt == NULL)) { | 2632 | (gr->global_ctx_buffer[CIRCULAR_VPR].mem.priv.sgt == NULL)) { |
2599 | mem = &gr->global_ctx_buffer[CIRCULAR].mem; | 2633 | mem = &gr->global_ctx_buffer[CIRCULAR].mem; |
2634 | g_bfr_index[CIRCULAR_VA] = CIRCULAR; | ||
2600 | } else { | 2635 | } else { |
2601 | mem = &gr->global_ctx_buffer[CIRCULAR_VPR].mem; | 2636 | mem = &gr->global_ctx_buffer[CIRCULAR_VPR].mem; |
2637 | g_bfr_index[CIRCULAR_VA] = CIRCULAR_VPR; | ||
2602 | } | 2638 | } |
2603 | 2639 | ||
2604 | gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, | 2640 | gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, |
@@ -2613,8 +2649,10 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, | |||
2613 | if (!c->vpr || | 2649 | if (!c->vpr || |
2614 | (gr->global_ctx_buffer[ATTRIBUTE_VPR].mem.priv.sgt == NULL)) { | 2650 | (gr->global_ctx_buffer[ATTRIBUTE_VPR].mem.priv.sgt == NULL)) { |
2615 | mem = &gr->global_ctx_buffer[ATTRIBUTE].mem; | 2651 | mem = &gr->global_ctx_buffer[ATTRIBUTE].mem; |
2652 | g_bfr_index[ATTRIBUTE_VA] = ATTRIBUTE; | ||
2616 | } else { | 2653 | } else { |
2617 | mem = &gr->global_ctx_buffer[ATTRIBUTE_VPR].mem; | 2654 | mem = &gr->global_ctx_buffer[ATTRIBUTE_VPR].mem; |
2655 | g_bfr_index[ATTRIBUTE_VA] = ATTRIBUTE_VPR; | ||
2618 | } | 2656 | } |
2619 | 2657 | ||
2620 | gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, | 2658 | gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, |
@@ -2629,8 +2667,10 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, | |||
2629 | if (!c->vpr || | 2667 | if (!c->vpr || |
2630 | (gr->global_ctx_buffer[PAGEPOOL_VPR].mem.priv.sgt == NULL)) { | 2668 | (gr->global_ctx_buffer[PAGEPOOL_VPR].mem.priv.sgt == NULL)) { |
2631 | mem = &gr->global_ctx_buffer[PAGEPOOL].mem; | 2669 | mem = &gr->global_ctx_buffer[PAGEPOOL].mem; |
2670 | g_bfr_index[PAGEPOOL_VA] = PAGEPOOL; | ||
2632 | } else { | 2671 | } else { |
2633 | mem = &gr->global_ctx_buffer[PAGEPOOL_VPR].mem; | 2672 | mem = &gr->global_ctx_buffer[PAGEPOOL_VPR].mem; |
2673 | g_bfr_index[PAGEPOOL_VA] = PAGEPOOL_VPR; | ||
2634 | } | 2674 | } |
2635 | 2675 | ||
2636 | gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, | 2676 | gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, |
@@ -2649,6 +2689,7 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, | |||
2649 | goto clean_up; | 2689 | goto clean_up; |
2650 | g_bfr_va[GOLDEN_CTX_VA] = gpu_va; | 2690 | g_bfr_va[GOLDEN_CTX_VA] = gpu_va; |
2651 | g_bfr_size[GOLDEN_CTX_VA] = mem->size; | 2691 | g_bfr_size[GOLDEN_CTX_VA] = mem->size; |
2692 | g_bfr_index[GOLDEN_CTX_VA] = GOLDEN_CTX; | ||
2652 | 2693 | ||
2653 | /* Priv register Access Map */ | 2694 | /* Priv register Access Map */ |
2654 | mem = &gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem; | 2695 | mem = &gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem; |
@@ -2658,40 +2699,15 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, | |||
2658 | goto clean_up; | 2699 | goto clean_up; |
2659 | g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va; | 2700 | g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va; |
2660 | g_bfr_size[PRIV_ACCESS_MAP_VA] = mem->size; | 2701 | g_bfr_size[PRIV_ACCESS_MAP_VA] = mem->size; |
2702 | g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP; | ||
2661 | 2703 | ||
2662 | c->ch_ctx.global_ctx_buffer_mapped = true; | 2704 | c->ch_ctx.global_ctx_buffer_mapped = true; |
2663 | return 0; | 2705 | return 0; |
2664 | 2706 | ||
2665 | clean_up: | 2707 | clean_up: |
2666 | for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) { | 2708 | gr_gk20a_unmap_global_ctx_buffers(c); |
2667 | if (g_bfr_va[i]) { | ||
2668 | nvgpu_gmmu_unmap(ch_vm, &gr->global_ctx_buffer[i].mem, | ||
2669 | g_bfr_va[i]); | ||
2670 | g_bfr_va[i] = 0; | ||
2671 | } | ||
2672 | } | ||
2673 | return -ENOMEM; | ||
2674 | } | ||
2675 | |||
2676 | static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c) | ||
2677 | { | ||
2678 | struct vm_gk20a *ch_vm = c->vm; | ||
2679 | struct gr_gk20a *gr = &c->g->gr; | ||
2680 | u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; | ||
2681 | u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; | ||
2682 | u32 i; | ||
2683 | |||
2684 | gk20a_dbg_fn(""); | ||
2685 | 2709 | ||
2686 | for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) { | 2710 | return -ENOMEM; |
2687 | if (g_bfr_va[i]) { | ||
2688 | nvgpu_gmmu_unmap(ch_vm, &gr->global_ctx_buffer[i].mem, | ||
2689 | g_bfr_va[i]); | ||
2690 | g_bfr_va[i] = 0; | ||
2691 | g_bfr_size[i] = 0; | ||
2692 | } | ||
2693 | } | ||
2694 | c->ch_ctx.global_ctx_buffer_mapped = false; | ||
2695 | } | 2711 | } |
2696 | 2712 | ||
2697 | int gr_gk20a_alloc_gr_ctx(struct gk20a *g, | 2713 | int gr_gk20a_alloc_gr_ctx(struct gk20a *g, |