gpu: nvgpu: Use new kmem API functions (gk20a core)

Use the new kmem API functions in core gk20a code. Also add a struct gk20a pointer to several functions to ensure that the kmem APIs can be used. Bug 1799159 Bug 1823380 Change-Id: I41276509c4f0b68e80b989aa55cf94d8dbbdf156 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1318322 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Alex Waterman <alexw@nvidia.com> 2017-03-08 20:08:32 -0500
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-03-29 14:50:21 -0400
commit: bc92e2fb972e039ee33c1f1477204a4d145a8b96 (patch)
tree: a43df80fe921f3e4b50c70bf67aef30a0b5dd5bb /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parent: 4022b989aa2e91fe77ed52df49d45838f6d8b9bb (diff)
1 files changed, 91 insertions, 94 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 172931d7..0e3bcdbe 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -22,7 +22,6 @@
 #include <linux/scatterlist.h>
 #include <linux/debugfs.h>
 #include <uapi/linux/nvgpu.h>
-#include <linux/vmalloc.h>
 #include <linux/dma-mapping.h>
 #include <linux/firmware.h>
 #include <linux/nvhost.h>
@@ -1250,8 +1249,8 @@ static int gr_gk20a_setup_alpha_beta_tables(struct gk20a *g,
        gk20a_dbg_fn("");
-        map_alpha = kzalloc(3 * gr_pd_alpha_ratio_table__size_1_v() *
+        map_alpha = nvgpu_kzalloc(g, 3 * gr_pd_alpha_ratio_table__size_1_v() *
-                                sizeof(u32), GFP_KERNEL);
+                                  sizeof(u32));
        if (!map_alpha)
                return -ENOMEM;
        map_beta = map_alpha + gr_pd_alpha_ratio_table__size_1_v();
@@ -1321,7 +1320,7 @@ static int gr_gk20a_setup_alpha_beta_tables(struct gk20a *g,
                }
        }
-        kfree(map_alpha);
+        nvgpu_kfree(g, map_alpha);
        return 0;
 }
@@ -1744,14 +1743,14 @@ restore_fe_go_idle:
        if (err)
                goto clean_up;
-        kfree(gr->sm_error_states);
+        nvgpu_kfree(g, gr->sm_error_states);
        /* we need to allocate this after g->ops.gr.init_fs_state() since
         * we initialize gr->no_of_sm in this function
         */
-        gr->sm_error_states = kzalloc(
+        gr->sm_error_states = nvgpu_kzalloc(g,
                        sizeof(struct nvgpu_dbg_gpu_sm_error_state_record)
-                        * gr->no_of_sm, GFP_KERNEL);
+                        * gr->no_of_sm);
        if (!gr->sm_error_states) {
                err = -ENOMEM;
                goto restore_fe_go_idle;
@@ -1794,7 +1793,7 @@ restore_fe_go_idle:
        if (gr->ctx_vars.local_golden_image == NULL) {
                gr->ctx_vars.local_golden_image =
-                        vzalloc(gr->ctx_vars.golden_image_size);
+                        nvgpu_vzalloc(g, gr->ctx_vars.golden_image_size);
                if (gr->ctx_vars.local_golden_image == NULL) {
                        err = -ENOMEM;
@@ -2949,7 +2948,7 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
        gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
        gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
-        gr_ctx = kzalloc(sizeof(*gr_ctx), GFP_KERNEL);
+        gr_ctx = nvgpu_kzalloc(g, sizeof(*gr_ctx));
        if (!gr_ctx)
                return -ENOMEM;
@@ -2975,7 +2974,7 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
 err_free_mem:
        gk20a_gmmu_free(g, &gr_ctx->mem);
 err_free_ctx:
-        kfree(gr_ctx);
+        nvgpu_kfree(g, gr_ctx);
        gr_ctx = NULL;
        return err;
@@ -3023,7 +3022,7 @@ void gr_gk20a_free_gr_ctx(struct gk20a *g,
        gk20a_gmmu_unmap(vm, gr_ctx->mem.gpu_va,
                gr_ctx->mem.size, gk20a_mem_flag_none);
        gk20a_gmmu_free(g, &gr_ctx->mem);
-        kfree(gr_ctx);
+        nvgpu_kfree(g, gr_ctx);
 }
 void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg)
@@ -3370,18 +3369,18 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
        memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc));
-        kfree(gr->sm_error_states);
+        nvgpu_kfree(g, gr->sm_error_states);
-        kfree(gr->gpc_tpc_count);
+        nvgpu_kfree(g, gr->gpc_tpc_count);
-        kfree(gr->gpc_zcb_count);
+        nvgpu_kfree(g, gr->gpc_zcb_count);
-        kfree(gr->gpc_ppc_count);
+        nvgpu_kfree(g, gr->gpc_ppc_count);
-        kfree(gr->pes_tpc_count[0]);
+        nvgpu_kfree(g, gr->pes_tpc_count[0]);
-        kfree(gr->pes_tpc_count[1]);
+        nvgpu_kfree(g, gr->pes_tpc_count[1]);
-        kfree(gr->pes_tpc_mask[0]);
+        nvgpu_kfree(g, gr->pes_tpc_mask[0]);
-        kfree(gr->pes_tpc_mask[1]);
+        nvgpu_kfree(g, gr->pes_tpc_mask[1]);
-        kfree(gr->sm_to_cluster);
+        nvgpu_kfree(g, gr->sm_to_cluster);
-        kfree(gr->gpc_skip_mask);
+        nvgpu_kfree(g, gr->gpc_skip_mask);
-        kfree(gr->map_tiles);
+        nvgpu_kfree(g, gr->map_tiles);
-        kfree(gr->fbp_rop_l2_en_mask);
+        nvgpu_kfree(g, gr->fbp_rop_l2_en_mask);
        gr->gpc_tpc_count = NULL;
        gr->gpc_zcb_count = NULL;
        gr->gpc_ppc_count = NULL;
@@ -3394,31 +3393,31 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
        gr->fbp_rop_l2_en_mask = NULL;
        gr->ctx_vars.valid = false;
-        kfree(gr->ctx_vars.ucode.fecs.inst.l);
+        nvgpu_kfree(g, gr->ctx_vars.ucode.fecs.inst.l);
-        kfree(gr->ctx_vars.ucode.fecs.data.l);
+        nvgpu_kfree(g, gr->ctx_vars.ucode.fecs.data.l);
-        kfree(gr->ctx_vars.ucode.gpccs.inst.l);
+        nvgpu_kfree(g, gr->ctx_vars.ucode.gpccs.inst.l);
-        kfree(gr->ctx_vars.ucode.gpccs.data.l);
+        nvgpu_kfree(g, gr->ctx_vars.ucode.gpccs.data.l);
-        kfree(gr->ctx_vars.sw_bundle_init.l);
+        nvgpu_kfree(g, gr->ctx_vars.sw_bundle_init.l);
-        kfree(gr->ctx_vars.sw_veid_bundle_init.l);
+        nvgpu_kfree(g, gr->ctx_vars.sw_veid_bundle_init.l);
-        kfree(gr->ctx_vars.sw_method_init.l);
+        nvgpu_kfree(g, gr->ctx_vars.sw_method_init.l);
-        kfree(gr->ctx_vars.sw_ctx_load.l);
+        nvgpu_kfree(g, gr->ctx_vars.sw_ctx_load.l);
-        kfree(gr->ctx_vars.sw_non_ctx_load.l);
+        nvgpu_kfree(g, gr->ctx_vars.sw_non_ctx_load.l);
-        kfree(gr->ctx_vars.ctxsw_regs.sys.l);
+        nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.sys.l);
-        kfree(gr->ctx_vars.ctxsw_regs.gpc.l);
+        nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.gpc.l);
-        kfree(gr->ctx_vars.ctxsw_regs.tpc.l);
+        nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.tpc.l);
-        kfree(gr->ctx_vars.ctxsw_regs.zcull_gpc.l);
+        nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.zcull_gpc.l);
-        kfree(gr->ctx_vars.ctxsw_regs.ppc.l);
+        nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.ppc.l);
-        kfree(gr->ctx_vars.ctxsw_regs.pm_sys.l);
+        nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_sys.l);
-        kfree(gr->ctx_vars.ctxsw_regs.pm_gpc.l);
+        nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_gpc.l);
-        kfree(gr->ctx_vars.ctxsw_regs.pm_tpc.l);
+        nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_tpc.l);
-        kfree(gr->ctx_vars.ctxsw_regs.pm_ppc.l);
+        nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_ppc.l);
-        kfree(gr->ctx_vars.ctxsw_regs.perf_sys.l);
+        nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.perf_sys.l);
-        kfree(gr->ctx_vars.ctxsw_regs.fbp.l);
+        nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.fbp.l);
-        kfree(gr->ctx_vars.ctxsw_regs.perf_gpc.l);
+        nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.perf_gpc.l);
-        kfree(gr->ctx_vars.ctxsw_regs.fbp_router.l);
+        nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.fbp_router.l);
-        kfree(gr->ctx_vars.ctxsw_regs.gpc_router.l);
+        nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.gpc_router.l);
-        kfree(gr->ctx_vars.ctxsw_regs.pm_ltc.l);
+        nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_ltc.l);
-        kfree(gr->ctx_vars.ctxsw_regs.pm_fbpa.l);
+        nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_fbpa.l);
        vfree(gr->ctx_vars.local_golden_image);
        gr->ctx_vars.local_golden_image = NULL;
@@ -3464,7 +3463,7 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
        gr->fbp_en_mask = g->ops.gr.get_fbp_en_mask(g);
        gr->fbp_rop_l2_en_mask =
-                kzalloc(gr->max_fbps_count * sizeof(u32), GFP_KERNEL);
+                nvgpu_kzalloc(g, gr->max_fbps_count * sizeof(u32));
        if (!gr->fbp_rop_l2_en_mask)
                goto clean_up;
@@ -3491,14 +3490,14 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
                goto clean_up;
        }
-        gr->gpc_tpc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
+        gr->gpc_tpc_count = nvgpu_kzalloc(g, gr->gpc_count * sizeof(u32));
-        gr->gpc_tpc_mask = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
+        gr->gpc_tpc_mask = nvgpu_kzalloc(g, gr->gpc_count * sizeof(u32));
-        gr->gpc_zcb_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
+        gr->gpc_zcb_count = nvgpu_kzalloc(g, gr->gpc_count * sizeof(u32));
-        gr->gpc_ppc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
+        gr->gpc_ppc_count = nvgpu_kzalloc(g, gr->gpc_count * sizeof(u32));
        gr->gpc_skip_mask =
-                kzalloc(gr_pd_dist_skip_table__size_1_v() * 4 * sizeof(u32),
+                nvgpu_kzalloc(g, gr_pd_dist_skip_table__size_1_v() *
-                        GFP_KERNEL);
+                              4 * sizeof(u32));
        if (!gr->gpc_tpc_count || !gr->gpc_tpc_mask || !gr->gpc_zcb_count ||
            !gr->gpc_ppc_count || !gr->gpc_skip_mask)
@@ -3526,11 +3525,11 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
                for (pes_index = 0; pes_index < gr->pe_count_per_gpc; pes_index++) {
                        if (!gr->pes_tpc_count[pes_index]) {
                                gr->pes_tpc_count[pes_index] =
-                                        kzalloc(gr->gpc_count * sizeof(u32),
+                                        nvgpu_kzalloc(g, gr->gpc_count *
-                                                GFP_KERNEL);
+                                                      sizeof(u32));
                                gr->pes_tpc_mask[pes_index] =
-                                        kzalloc(gr->gpc_count * sizeof(u32),
+                                        nvgpu_kzalloc(g, gr->gpc_count *
-                                                GFP_KERNEL);
+                                                      sizeof(u32));
                                if (!gr->pes_tpc_count[pes_index] ||
                                    !gr->pes_tpc_mask[pes_index])
                                        goto clean_up;
@@ -3585,8 +3584,8 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
                gr->gpc_skip_mask[gpc_index] = gpc_new_skip_mask;
        }
-        gr->sm_to_cluster = kzalloc(gr->gpc_count * gr->tpc_count *
+        gr->sm_to_cluster = nvgpu_kzalloc(g, gr->gpc_count * gr->tpc_count *
-                                                        sizeof(struct sm_info), GFP_KERNEL);
+                                          sizeof(struct sm_info));
        gr->no_of_sm = 0;
        gk20a_dbg_info("fbps: %d", gr->num_fbps);
@@ -3696,14 +3695,13 @@ static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr)
        int num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
        int map_tile_count = num_gpcs * num_tpc_per_gpc;
-        init_frac = kzalloc(num_gpcs * sizeof(s32), GFP_KERNEL);
+        init_frac = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
-        init_err = kzalloc(num_gpcs * sizeof(s32), GFP_KERNEL);
+        init_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
-        run_err = kzalloc(num_gpcs * sizeof(s32), GFP_KERNEL);
+        run_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
        sorted_num_tpcs =
-                kzalloc(num_gpcs * num_tpc_per_gpc * sizeof(s32),
+                nvgpu_kzalloc(g, num_gpcs * num_tpc_per_gpc * sizeof(s32));
-                        GFP_KERNEL);
        sorted_to_unsorted_gpc_map =
-                kzalloc(num_gpcs * sizeof(s32), GFP_KERNEL);
+                nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
        if (!(init_frac && init_err && run_err && sorted_num_tpcs &&
              sorted_to_unsorted_gpc_map)) {
@@ -3764,15 +3762,14 @@ static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr)
                }
                if (delete_map) {
-                        kfree(gr->map_tiles);
+                        nvgpu_kfree(g, gr->map_tiles);
                        gr->map_tiles = NULL;
                        gr->map_tile_count = 0;
                }
        }
        if (gr->map_tiles == NULL) {
-                gr->map_tiles = kzalloc(map_tile_count * sizeof(u8),
+                gr->map_tiles = nvgpu_kzalloc(g, num_gpcs * sizeof(u8));
-                                        GFP_KERNEL);
                if (gr->map_tiles == NULL) {
                        ret = -ENOMEM;
                        goto clean_up;
@@ -3838,11 +3835,11 @@ static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr)
        }
 clean_up:
-        kfree(init_frac);
+        nvgpu_kfree(g, init_frac);
-        kfree(init_err);
+        nvgpu_kfree(g, init_err);
-        kfree(run_err);
+        nvgpu_kfree(g, run_err);
-        kfree(sorted_num_tpcs);
+        nvgpu_kfree(g, sorted_num_tpcs);
-        kfree(sorted_to_unsorted_gpc_map);
+        nvgpu_kfree(g, sorted_to_unsorted_gpc_map);
        if (ret)
                gk20a_err(dev_from_gk20a(g), "fail");
@@ -4588,20 +4585,20 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr)
                /* Total 8 fields per map reg i.e. tile_0 to tile_7*/
                zcull_alloc_num += (zcull_alloc_num % 8);
        }
-        zcull_map_tiles = kzalloc(zcull_alloc_num *
+        zcull_map_tiles = nvgpu_kzalloc(g, zcull_alloc_num * sizeof(u32));
-                                 sizeof(u32), GFP_KERNEL);
        if (!zcull_map_tiles) {
                gk20a_err(dev_from_gk20a(g),
                        "failed to allocate zcull map titles");
                return -ENOMEM;
        }
-        zcull_bank_counters = kzalloc(zcull_alloc_num *
-                                 sizeof(u32), GFP_KERNEL);
+        zcull_bank_counters = nvgpu_kzalloc(g, zcull_alloc_num * sizeof(u32));
        if (!zcull_bank_counters) {
                gk20a_err(dev_from_gk20a(g),
                        "failed to allocate zcull bank counters");
-                kfree(zcull_map_tiles);
+                nvgpu_kfree(g, zcull_map_tiles);
                return -ENOMEM;
        }
@@ -4616,8 +4613,8 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr)
                g->ops.gr.program_zcull_mapping(g, zcull_alloc_num,
                                         zcull_map_tiles);
-        kfree(zcull_map_tiles);
+        nvgpu_kfree(g, zcull_map_tiles);
-        kfree(zcull_bank_counters);
+        nvgpu_kfree(g, zcull_bank_counters);
        for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
                gpc_tpc_count = gr->gpc_tpc_count[gpc_index];
@@ -4891,14 +4888,14 @@ restore_fe_go_idle:
        if (err)
                goto out;
-        kfree(gr->sm_error_states);
+        nvgpu_kfree(g, gr->sm_error_states);
        /* we need to allocate this after g->ops.gr.init_fs_state() since
         * we initialize gr->no_of_sm in this function
         */
-        gr->sm_error_states = kzalloc(
+        gr->sm_error_states = nvgpu_kzalloc(g,
-                        sizeof(struct nvgpu_dbg_gpu_sm_error_state_record)
+                        sizeof(struct nvgpu_dbg_gpu_sm_error_state_record) *
-                        * gr->no_of_sm, GFP_KERNEL);
+                        gr->no_of_sm);
        if (!gr->sm_error_states) {
                err = -ENOMEM;
                goto restore_fe_go_idle;
@@ -6945,7 +6942,7 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
        if (!g->gr.ctx_vars.golden_image_initialized)
                return -ENODEV;
-        priv_registers = kzalloc(sizeof(u32) * potential_offsets, GFP_KERNEL);
+        priv_registers = nvgpu_kzalloc(g, sizeof(u32) * potential_offsets);
        if (!priv_registers) {
                gk20a_dbg_fn("failed alloc for potential_offsets=%d", potential_offsets);
                err = PTR_ERR(priv_registers);
@@ -6991,7 +6988,7 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
        *num_offsets = num_registers;
 cleanup:
        if (!IS_ERR_OR_NULL(priv_registers))
-                kfree(priv_registers);
+                nvgpu_kfree(g, priv_registers);
        return err;
 }
@@ -7019,7 +7016,7 @@ int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g,
        if (!g->gr.ctx_vars.golden_image_initialized)
                return -ENODEV;
-        priv_registers = kzalloc(sizeof(u32) * potential_offsets, GFP_KERNEL);
+        priv_registers = nvgpu_kzalloc(g, sizeof(u32) * potential_offsets);
        if (ZERO_OR_NULL_PTR(priv_registers)) {
                gk20a_dbg_fn("failed alloc for potential_offsets=%d", potential_offsets);
                return -ENOMEM;
@@ -7060,7 +7057,7 @@ int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g,
        *num_offsets = num_registers;
 cleanup:
-        kfree(priv_registers);
+        nvgpu_kfree(g, priv_registers);
        return err;
 }
@@ -8352,7 +8349,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
        }
        /* they're the same size, so just use one alloc for both */
-        offsets = kzalloc(2 * sizeof(u32) * max_offsets, GFP_KERNEL);
+        offsets = nvgpu_kzalloc(g, 2 * sizeof(u32) * max_offsets);
        if (!offsets) {
                err = -ENOMEM;
                goto cleanup;
@@ -8502,7 +8499,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
 cleanup:
        if (offsets)
-                kfree(offsets);
+                nvgpu_kfree(g, offsets);
        if (ch_ctx->patch_ctx.mem.cpu_va)
                gr_gk20a_ctx_patch_write_end(g, ch_ctx);
@@ -9025,7 +9022,7 @@ int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
        u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
        u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
-        ops = kcalloc(g->gr.no_of_sm, sizeof(*ops), GFP_KERNEL);
+        ops = nvgpu_kcalloc(g, g->gr.no_of_sm, sizeof(*ops));
        if (!ops)
                return -ENOMEM;
        for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) {
@@ -9068,7 +9065,7 @@ int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
        err = gr_gk20a_exec_ctx_ops(ch, ops, i, i, 0);
        if (err)
                gk20a_err(dev_from_gk20a(g), "Failed to access register\n");
-        kfree(ops);
+        nvgpu_kfree(g, ops);
        return err;
 }
author	Alex Waterman <alexw@nvidia.com>	2017-03-08 20:08:32 -0500
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-03-29 14:50:21 -0400
commit	bc92e2fb972e039ee33c1f1477204a4d145a8b96 (patch)
tree	a43df80fe921f3e4b50c70bf67aef30a0b5dd5bb /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parent	4022b989aa2e91fe77ed52df49d45838f6d8b9bb (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 172931d7..0e3bcdbe 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -22,7 +22,6 @@
22	#include <linux/scatterlist.h>	22	#include <linux/scatterlist.h>
23	#include <linux/debugfs.h>	23	#include <linux/debugfs.h>
24	#include <uapi/linux/nvgpu.h>	24	#include <uapi/linux/nvgpu.h>
25	#include <linux/vmalloc.h>
26	#include <linux/dma-mapping.h>	25	#include <linux/dma-mapping.h>
27	#include <linux/firmware.h>	26	#include <linux/firmware.h>
28	#include <linux/nvhost.h>	27	#include <linux/nvhost.h>
@@ -1250,8 +1249,8 @@ static int gr_gk20a_setup_alpha_beta_tables(struct gk20a *g,
1250		1249
1251	gk20a_dbg_fn("");	1250	gk20a_dbg_fn("");
1252		1251
1253	map_alpha = kzalloc(3 * gr_pd_alpha_ratio_table__size_1_v() *	1252	map_alpha = nvgpu_kzalloc(g, 3 * gr_pd_alpha_ratio_table__size_1_v() *
1254	sizeof(u32), GFP_KERNEL);	1253	sizeof(u32));
1255	if (!map_alpha)	1254	if (!map_alpha)
1256	return -ENOMEM;	1255	return -ENOMEM;
1257	map_beta = map_alpha + gr_pd_alpha_ratio_table__size_1_v();	1256	map_beta = map_alpha + gr_pd_alpha_ratio_table__size_1_v();
@@ -1321,7 +1320,7 @@ static int gr_gk20a_setup_alpha_beta_tables(struct gk20a *g,
1321	}	1320	}
1322	}	1321	}
1323		1322
1324	kfree(map_alpha);	1323	nvgpu_kfree(g, map_alpha);
1325	return 0;	1324	return 0;
1326	}	1325	}
1327		1326
@@ -1744,14 +1743,14 @@ restore_fe_go_idle:
1744	if (err)	1743	if (err)
1745	goto clean_up;	1744	goto clean_up;
1746		1745
1747	kfree(gr->sm_error_states);	1746	nvgpu_kfree(g, gr->sm_error_states);
1748		1747
1749	/* we need to allocate this after g->ops.gr.init_fs_state() since	1748	/* we need to allocate this after g->ops.gr.init_fs_state() since
1750	* we initialize gr->no_of_sm in this function	1749	* we initialize gr->no_of_sm in this function
1751	*/	1750	*/
1752	gr->sm_error_states = kzalloc(	1751	gr->sm_error_states = nvgpu_kzalloc(g,
1753	sizeof(struct nvgpu_dbg_gpu_sm_error_state_record)	1752	sizeof(struct nvgpu_dbg_gpu_sm_error_state_record)
1754	* gr->no_of_sm, GFP_KERNEL);	1753	* gr->no_of_sm);
1755	if (!gr->sm_error_states) {	1754	if (!gr->sm_error_states) {
1756	err = -ENOMEM;	1755	err = -ENOMEM;
1757	goto restore_fe_go_idle;	1756	goto restore_fe_go_idle;
@@ -1794,7 +1793,7 @@ restore_fe_go_idle:
1794	if (gr->ctx_vars.local_golden_image == NULL) {	1793	if (gr->ctx_vars.local_golden_image == NULL) {
1795		1794
1796	gr->ctx_vars.local_golden_image =	1795	gr->ctx_vars.local_golden_image =
1797	vzalloc(gr->ctx_vars.golden_image_size);	1796	nvgpu_vzalloc(g, gr->ctx_vars.golden_image_size);
1798		1797
1799	if (gr->ctx_vars.local_golden_image == NULL) {	1798	if (gr->ctx_vars.local_golden_image == NULL) {
1800	err = -ENOMEM;	1799	err = -ENOMEM;
@@ -2949,7 +2948,7 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
2949	gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;	2948	gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
2950	gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;	2949	gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
2951		2950
2952	gr_ctx = kzalloc(sizeof(*gr_ctx), GFP_KERNEL);	2951	gr_ctx = nvgpu_kzalloc(g, sizeof(*gr_ctx));
2953	if (!gr_ctx)	2952	if (!gr_ctx)
2954	return -ENOMEM;	2953	return -ENOMEM;
2955		2954
@@ -2975,7 +2974,7 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
2975	err_free_mem:	2974	err_free_mem:
2976	gk20a_gmmu_free(g, &gr_ctx->mem);	2975	gk20a_gmmu_free(g, &gr_ctx->mem);
2977	err_free_ctx:	2976	err_free_ctx:
2978	kfree(gr_ctx);	2977	nvgpu_kfree(g, gr_ctx);
2979	gr_ctx = NULL;	2978	gr_ctx = NULL;
2980		2979
2981	return err;	2980	return err;
@@ -3023,7 +3022,7 @@ void gr_gk20a_free_gr_ctx(struct gk20a *g,
3023	gk20a_gmmu_unmap(vm, gr_ctx->mem.gpu_va,	3022	gk20a_gmmu_unmap(vm, gr_ctx->mem.gpu_va,
3024	gr_ctx->mem.size, gk20a_mem_flag_none);	3023	gr_ctx->mem.size, gk20a_mem_flag_none);
3025	gk20a_gmmu_free(g, &gr_ctx->mem);	3024	gk20a_gmmu_free(g, &gr_ctx->mem);
3026	kfree(gr_ctx);	3025	nvgpu_kfree(g, gr_ctx);
3027	}	3026	}
3028		3027
3029	void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg)	3028	void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg)
@@ -3370,18 +3369,18 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
3370		3369
3371	memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc));	3370	memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc));
3372		3371
3373	kfree(gr->sm_error_states);	3372	nvgpu_kfree(g, gr->sm_error_states);
3374	kfree(gr->gpc_tpc_count);	3373	nvgpu_kfree(g, gr->gpc_tpc_count);
3375	kfree(gr->gpc_zcb_count);	3374	nvgpu_kfree(g, gr->gpc_zcb_count);
3376	kfree(gr->gpc_ppc_count);	3375	nvgpu_kfree(g, gr->gpc_ppc_count);
3377	kfree(gr->pes_tpc_count[0]);	3376	nvgpu_kfree(g, gr->pes_tpc_count[0]);
3378	kfree(gr->pes_tpc_count[1]);	3377	nvgpu_kfree(g, gr->pes_tpc_count[1]);
3379	kfree(gr->pes_tpc_mask[0]);	3378	nvgpu_kfree(g, gr->pes_tpc_mask[0]);
3380	kfree(gr->pes_tpc_mask[1]);	3379	nvgpu_kfree(g, gr->pes_tpc_mask[1]);
3381	kfree(gr->sm_to_cluster);	3380	nvgpu_kfree(g, gr->sm_to_cluster);
3382	kfree(gr->gpc_skip_mask);	3381	nvgpu_kfree(g, gr->gpc_skip_mask);
3383	kfree(gr->map_tiles);	3382	nvgpu_kfree(g, gr->map_tiles);
3384	kfree(gr->fbp_rop_l2_en_mask);	3383	nvgpu_kfree(g, gr->fbp_rop_l2_en_mask);
3385	gr->gpc_tpc_count = NULL;	3384	gr->gpc_tpc_count = NULL;
3386	gr->gpc_zcb_count = NULL;	3385	gr->gpc_zcb_count = NULL;
3387	gr->gpc_ppc_count = NULL;	3386	gr->gpc_ppc_count = NULL;
@@ -3394,31 +3393,31 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
3394	gr->fbp_rop_l2_en_mask = NULL;	3393	gr->fbp_rop_l2_en_mask = NULL;
3395		3394
3396	gr->ctx_vars.valid = false;	3395	gr->ctx_vars.valid = false;
3397	kfree(gr->ctx_vars.ucode.fecs.inst.l);	3396	nvgpu_kfree(g, gr->ctx_vars.ucode.fecs.inst.l);
3398	kfree(gr->ctx_vars.ucode.fecs.data.l);	3397	nvgpu_kfree(g, gr->ctx_vars.ucode.fecs.data.l);
3399	kfree(gr->ctx_vars.ucode.gpccs.inst.l);	3398	nvgpu_kfree(g, gr->ctx_vars.ucode.gpccs.inst.l);
3400	kfree(gr->ctx_vars.ucode.gpccs.data.l);	3399	nvgpu_kfree(g, gr->ctx_vars.ucode.gpccs.data.l);
3401	kfree(gr->ctx_vars.sw_bundle_init.l);	3400	nvgpu_kfree(g, gr->ctx_vars.sw_bundle_init.l);
3402	kfree(gr->ctx_vars.sw_veid_bundle_init.l);	3401	nvgpu_kfree(g, gr->ctx_vars.sw_veid_bundle_init.l);
3403	kfree(gr->ctx_vars.sw_method_init.l);	3402	nvgpu_kfree(g, gr->ctx_vars.sw_method_init.l);
3404	kfree(gr->ctx_vars.sw_ctx_load.l);	3403	nvgpu_kfree(g, gr->ctx_vars.sw_ctx_load.l);
3405	kfree(gr->ctx_vars.sw_non_ctx_load.l);	3404	nvgpu_kfree(g, gr->ctx_vars.sw_non_ctx_load.l);
3406	kfree(gr->ctx_vars.ctxsw_regs.sys.l);	3405	nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.sys.l);
3407	kfree(gr->ctx_vars.ctxsw_regs.gpc.l);	3406	nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.gpc.l);
3408	kfree(gr->ctx_vars.ctxsw_regs.tpc.l);	3407	nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.tpc.l);
3409	kfree(gr->ctx_vars.ctxsw_regs.zcull_gpc.l);	3408	nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.zcull_gpc.l);
3410	kfree(gr->ctx_vars.ctxsw_regs.ppc.l);	3409	nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.ppc.l);
3411	kfree(gr->ctx_vars.ctxsw_regs.pm_sys.l);	3410	nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_sys.l);
3412	kfree(gr->ctx_vars.ctxsw_regs.pm_gpc.l);	3411	nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_gpc.l);
3413	kfree(gr->ctx_vars.ctxsw_regs.pm_tpc.l);	3412	nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_tpc.l);
3414	kfree(gr->ctx_vars.ctxsw_regs.pm_ppc.l);	3413	nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_ppc.l);
3415	kfree(gr->ctx_vars.ctxsw_regs.perf_sys.l);	3414	nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.perf_sys.l);
3416	kfree(gr->ctx_vars.ctxsw_regs.fbp.l);	3415	nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.fbp.l);
3417	kfree(gr->ctx_vars.ctxsw_regs.perf_gpc.l);	3416	nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.perf_gpc.l);
3418	kfree(gr->ctx_vars.ctxsw_regs.fbp_router.l);	3417	nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.fbp_router.l);
3419	kfree(gr->ctx_vars.ctxsw_regs.gpc_router.l);	3418	nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.gpc_router.l);
3420	kfree(gr->ctx_vars.ctxsw_regs.pm_ltc.l);	3419	nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_ltc.l);
3421	kfree(gr->ctx_vars.ctxsw_regs.pm_fbpa.l);	3420	nvgpu_kfree(g, gr->ctx_vars.ctxsw_regs.pm_fbpa.l);
3422		3421
3423	vfree(gr->ctx_vars.local_golden_image);	3422	vfree(gr->ctx_vars.local_golden_image);
3424	gr->ctx_vars.local_golden_image = NULL;	3423	gr->ctx_vars.local_golden_image = NULL;
@@ -3464,7 +3463,7 @@ static int gr_gk20a_init_gr_config(struct gk20a g, struct gr_gk20a gr)
3464	gr->fbp_en_mask = g->ops.gr.get_fbp_en_mask(g);	3463	gr->fbp_en_mask = g->ops.gr.get_fbp_en_mask(g);
3465		3464
3466	gr->fbp_rop_l2_en_mask =	3465	gr->fbp_rop_l2_en_mask =
3467	kzalloc(gr->max_fbps_count * sizeof(u32), GFP_KERNEL);	3466	nvgpu_kzalloc(g, gr->max_fbps_count * sizeof(u32));
3468	if (!gr->fbp_rop_l2_en_mask)	3467	if (!gr->fbp_rop_l2_en_mask)
3469	goto clean_up;	3468	goto clean_up;
3470		3469
@@ -3491,14 +3490,14 @@ static int gr_gk20a_init_gr_config(struct gk20a g, struct gr_gk20a gr)
3491	goto clean_up;	3490	goto clean_up;
3492	}	3491	}
3493		3492
3494	gr->gpc_tpc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);	3493	gr->gpc_tpc_count = nvgpu_kzalloc(g, gr->gpc_count * sizeof(u32));
3495	gr->gpc_tpc_mask = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);	3494	gr->gpc_tpc_mask = nvgpu_kzalloc(g, gr->gpc_count * sizeof(u32));
3496	gr->gpc_zcb_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);	3495	gr->gpc_zcb_count = nvgpu_kzalloc(g, gr->gpc_count * sizeof(u32));
3497	gr->gpc_ppc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);	3496	gr->gpc_ppc_count = nvgpu_kzalloc(g, gr->gpc_count * sizeof(u32));
3498		3497
3499	gr->gpc_skip_mask =	3498	gr->gpc_skip_mask =
3500	kzalloc(gr_pd_dist_skip_table__size_1_v() * 4 * sizeof(u32),	3499	nvgpu_kzalloc(g, gr_pd_dist_skip_table__size_1_v() *
3501	GFP_KERNEL);	3500	4 * sizeof(u32));
3502		3501
3503	if (!gr->gpc_tpc_count \|\| !gr->gpc_tpc_mask \|\| !gr->gpc_zcb_count \|\|	3502	if (!gr->gpc_tpc_count \|\| !gr->gpc_tpc_mask \|\| !gr->gpc_zcb_count \|\|
3504	!gr->gpc_ppc_count \|\| !gr->gpc_skip_mask)	3503	!gr->gpc_ppc_count \|\| !gr->gpc_skip_mask)
@@ -3526,11 +3525,11 @@ static int gr_gk20a_init_gr_config(struct gk20a g, struct gr_gk20a gr)
3526	for (pes_index = 0; pes_index < gr->pe_count_per_gpc; pes_index++) {	3525	for (pes_index = 0; pes_index < gr->pe_count_per_gpc; pes_index++) {
3527	if (!gr->pes_tpc_count[pes_index]) {	3526	if (!gr->pes_tpc_count[pes_index]) {
3528	gr->pes_tpc_count[pes_index] =	3527	gr->pes_tpc_count[pes_index] =
3529	kzalloc(gr->gpc_count * sizeof(u32),	3528	nvgpu_kzalloc(g, gr->gpc_count *
3530	GFP_KERNEL);	3529	sizeof(u32));
3531	gr->pes_tpc_mask[pes_index] =	3530	gr->pes_tpc_mask[pes_index] =
3532	kzalloc(gr->gpc_count * sizeof(u32),	3531	nvgpu_kzalloc(g, gr->gpc_count *
3533	GFP_KERNEL);	3532	sizeof(u32));
3534	if (!gr->pes_tpc_count[pes_index] \|\|	3533	if (!gr->pes_tpc_count[pes_index] \|\|
3535	!gr->pes_tpc_mask[pes_index])	3534	!gr->pes_tpc_mask[pes_index])
3536	goto clean_up;	3535	goto clean_up;
@@ -3585,8 +3584,8 @@ static int gr_gk20a_init_gr_config(struct gk20a g, struct gr_gk20a gr)
3585	gr->gpc_skip_mask[gpc_index] = gpc_new_skip_mask;	3584	gr->gpc_skip_mask[gpc_index] = gpc_new_skip_mask;
3586	}	3585	}
3587		3586
3588	gr->sm_to_cluster = kzalloc(gr->gpc_count * gr->tpc_count *	3587	gr->sm_to_cluster = nvgpu_kzalloc(g, gr->gpc_count * gr->tpc_count *
3589	sizeof(struct sm_info), GFP_KERNEL);	3588	sizeof(struct sm_info));
3590	gr->no_of_sm = 0;	3589	gr->no_of_sm = 0;
3591		3590
3592	gk20a_dbg_info("fbps: %d", gr->num_fbps);	3591	gk20a_dbg_info("fbps: %d", gr->num_fbps);
@@ -3696,14 +3695,13 @@ static int gr_gk20a_init_map_tiles(struct gk20a g, struct gr_gk20a gr)
3696	int num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);	3695	int num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
3697	int map_tile_count = num_gpcs * num_tpc_per_gpc;	3696	int map_tile_count = num_gpcs * num_tpc_per_gpc;
3698		3697
3699	init_frac = kzalloc(num_gpcs * sizeof(s32), GFP_KERNEL);	3698	init_frac = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
3700	init_err = kzalloc(num_gpcs * sizeof(s32), GFP_KERNEL);	3699	init_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
3701	run_err = kzalloc(num_gpcs * sizeof(s32), GFP_KERNEL);	3700	run_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
3702	sorted_num_tpcs =	3701	sorted_num_tpcs =
3703	kzalloc(num_gpcs * num_tpc_per_gpc * sizeof(s32),	3702	nvgpu_kzalloc(g, num_gpcs * num_tpc_per_gpc * sizeof(s32));
3704	GFP_KERNEL);
3705	sorted_to_unsorted_gpc_map =	3703	sorted_to_unsorted_gpc_map =
3706	kzalloc(num_gpcs * sizeof(s32), GFP_KERNEL);	3704	nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
3707		3705
3708	if (!(init_frac && init_err && run_err && sorted_num_tpcs &&	3706	if (!(init_frac && init_err && run_err && sorted_num_tpcs &&
3709	sorted_to_unsorted_gpc_map)) {	3707	sorted_to_unsorted_gpc_map)) {
@@ -3764,15 +3762,14 @@ static int gr_gk20a_init_map_tiles(struct gk20a g, struct gr_gk20a gr)
3764	}	3762	}
3765		3763
3766	if (delete_map) {	3764	if (delete_map) {
3767	kfree(gr->map_tiles);	3765	nvgpu_kfree(g, gr->map_tiles);
3768	gr->map_tiles = NULL;	3766	gr->map_tiles = NULL;
3769	gr->map_tile_count = 0;	3767	gr->map_tile_count = 0;
3770	}	3768	}
3771	}	3769	}
3772		3770
3773	if (gr->map_tiles == NULL) {	3771	if (gr->map_tiles == NULL) {
3774	gr->map_tiles = kzalloc(map_tile_count * sizeof(u8),	3772	gr->map_tiles = nvgpu_kzalloc(g, num_gpcs * sizeof(u8));
3775	GFP_KERNEL);
3776	if (gr->map_tiles == NULL) {	3773	if (gr->map_tiles == NULL) {
3777	ret = -ENOMEM;	3774	ret = -ENOMEM;
3778	goto clean_up;	3775	goto clean_up;
@@ -3838,11 +3835,11 @@ static int gr_gk20a_init_map_tiles(struct gk20a g, struct gr_gk20a gr)
3838	}	3835	}
3839		3836
3840	clean_up:	3837	clean_up:
3841	kfree(init_frac);	3838	nvgpu_kfree(g, init_frac);
3842	kfree(init_err);	3839	nvgpu_kfree(g, init_err);
3843	kfree(run_err);	3840	nvgpu_kfree(g, run_err);
3844	kfree(sorted_num_tpcs);	3841	nvgpu_kfree(g, sorted_num_tpcs);
3845	kfree(sorted_to_unsorted_gpc_map);	3842	nvgpu_kfree(g, sorted_to_unsorted_gpc_map);
3846		3843
3847	if (ret)	3844	if (ret)
3848	gk20a_err(dev_from_gk20a(g), "fail");	3845	gk20a_err(dev_from_gk20a(g), "fail");
@@ -4588,20 +4585,20 @@ static int gr_gk20a_zcull_init_hw(struct gk20a g, struct gr_gk20a gr)
4588	/* Total 8 fields per map reg i.e. tile_0 to tile_7*/	4585	/* Total 8 fields per map reg i.e. tile_0 to tile_7*/
4589	zcull_alloc_num += (zcull_alloc_num % 8);	4586	zcull_alloc_num += (zcull_alloc_num % 8);
4590	}	4587	}
4591	zcull_map_tiles = kzalloc(zcull_alloc_num *	4588	zcull_map_tiles = nvgpu_kzalloc(g, zcull_alloc_num * sizeof(u32));
4592	sizeof(u32), GFP_KERNEL);	4589
4593	if (!zcull_map_tiles) {	4590	if (!zcull_map_tiles) {
4594	gk20a_err(dev_from_gk20a(g),	4591	gk20a_err(dev_from_gk20a(g),
4595	"failed to allocate zcull map titles");	4592	"failed to allocate zcull map titles");
4596	return -ENOMEM;	4593	return -ENOMEM;
4597	}	4594	}
4598	zcull_bank_counters = kzalloc(zcull_alloc_num *	4595
4599	sizeof(u32), GFP_KERNEL);	4596	zcull_bank_counters = nvgpu_kzalloc(g, zcull_alloc_num * sizeof(u32));
4600		4597
4601	if (!zcull_bank_counters) {	4598	if (!zcull_bank_counters) {
4602	gk20a_err(dev_from_gk20a(g),	4599	gk20a_err(dev_from_gk20a(g),
4603	"failed to allocate zcull bank counters");	4600	"failed to allocate zcull bank counters");
4604	kfree(zcull_map_tiles);	4601	nvgpu_kfree(g, zcull_map_tiles);
4605	return -ENOMEM;	4602	return -ENOMEM;
4606	}	4603	}
4607		4604
@@ -4616,8 +4613,8 @@ static int gr_gk20a_zcull_init_hw(struct gk20a g, struct gr_gk20a gr)
4616	g->ops.gr.program_zcull_mapping(g, zcull_alloc_num,	4613	g->ops.gr.program_zcull_mapping(g, zcull_alloc_num,
4617	zcull_map_tiles);	4614	zcull_map_tiles);
4618		4615
4619	kfree(zcull_map_tiles);	4616	nvgpu_kfree(g, zcull_map_tiles);
4620	kfree(zcull_bank_counters);	4617	nvgpu_kfree(g, zcull_bank_counters);
4621		4618
4622	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {	4619	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
4623	gpc_tpc_count = gr->gpc_tpc_count[gpc_index];	4620	gpc_tpc_count = gr->gpc_tpc_count[gpc_index];
@@ -4891,14 +4888,14 @@ restore_fe_go_idle:
4891	if (err)	4888	if (err)
4892	goto out;	4889	goto out;
4893		4890
4894	kfree(gr->sm_error_states);	4891	nvgpu_kfree(g, gr->sm_error_states);
4895		4892
4896	/* we need to allocate this after g->ops.gr.init_fs_state() since	4893	/* we need to allocate this after g->ops.gr.init_fs_state() since
4897	* we initialize gr->no_of_sm in this function	4894	* we initialize gr->no_of_sm in this function
4898	*/	4895	*/
4899	gr->sm_error_states = kzalloc(	4896	gr->sm_error_states = nvgpu_kzalloc(g,
4900	sizeof(struct nvgpu_dbg_gpu_sm_error_state_record)	4897	sizeof(struct nvgpu_dbg_gpu_sm_error_state_record) *
4901	* gr->no_of_sm, GFP_KERNEL);	4898	gr->no_of_sm);
4902	if (!gr->sm_error_states) {	4899	if (!gr->sm_error_states) {
4903	err = -ENOMEM;	4900	err = -ENOMEM;
4904	goto restore_fe_go_idle;	4901	goto restore_fe_go_idle;
@@ -6945,7 +6942,7 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
6945	if (!g->gr.ctx_vars.golden_image_initialized)	6942	if (!g->gr.ctx_vars.golden_image_initialized)
6946	return -ENODEV;	6943	return -ENODEV;
6947		6944
6948	priv_registers = kzalloc(sizeof(u32) * potential_offsets, GFP_KERNEL);	6945	priv_registers = nvgpu_kzalloc(g, sizeof(u32) * potential_offsets);
6949	if (!priv_registers) {	6946	if (!priv_registers) {
6950	gk20a_dbg_fn("failed alloc for potential_offsets=%d", potential_offsets);	6947	gk20a_dbg_fn("failed alloc for potential_offsets=%d", potential_offsets);
6951	err = PTR_ERR(priv_registers);	6948	err = PTR_ERR(priv_registers);
@@ -6991,7 +6988,7 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
6991	*num_offsets = num_registers;	6988	*num_offsets = num_registers;
6992	cleanup:	6989	cleanup:
6993	if (!IS_ERR_OR_NULL(priv_registers))	6990	if (!IS_ERR_OR_NULL(priv_registers))
6994	kfree(priv_registers);	6991	nvgpu_kfree(g, priv_registers);
6995		6992
6996	return err;	6993	return err;
6997	}	6994	}
@@ -7019,7 +7016,7 @@ int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g,
7019	if (!g->gr.ctx_vars.golden_image_initialized)	7016	if (!g->gr.ctx_vars.golden_image_initialized)
7020	return -ENODEV;	7017	return -ENODEV;
7021		7018
7022	priv_registers = kzalloc(sizeof(u32) * potential_offsets, GFP_KERNEL);	7019	priv_registers = nvgpu_kzalloc(g, sizeof(u32) * potential_offsets);
7023	if (ZERO_OR_NULL_PTR(priv_registers)) {	7020	if (ZERO_OR_NULL_PTR(priv_registers)) {
7024	gk20a_dbg_fn("failed alloc for potential_offsets=%d", potential_offsets);	7021	gk20a_dbg_fn("failed alloc for potential_offsets=%d", potential_offsets);
7025	return -ENOMEM;	7022	return -ENOMEM;
@@ -7060,7 +7057,7 @@ int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g,
7060		7057
7061	*num_offsets = num_registers;	7058	*num_offsets = num_registers;
7062	cleanup:	7059	cleanup:
7063	kfree(priv_registers);	7060	nvgpu_kfree(g, priv_registers);
7064		7061
7065	return err;	7062	return err;
7066	}	7063	}
@@ -8352,7 +8349,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
8352	}	8349	}
8353		8350
8354	/* they're the same size, so just use one alloc for both */	8351	/* they're the same size, so just use one alloc for both */
8355	offsets = kzalloc(2 * sizeof(u32) * max_offsets, GFP_KERNEL);	8352	offsets = nvgpu_kzalloc(g, 2 * sizeof(u32) * max_offsets);
8356	if (!offsets) {	8353	if (!offsets) {
8357	err = -ENOMEM;	8354	err = -ENOMEM;
8358	goto cleanup;	8355	goto cleanup;
@@ -8502,7 +8499,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
8502		8499
8503	cleanup:	8500	cleanup:
8504	if (offsets)	8501	if (offsets)
8505	kfree(offsets);	8502	nvgpu_kfree(g, offsets);
8506		8503
8507	if (ch_ctx->patch_ctx.mem.cpu_va)	8504	if (ch_ctx->patch_ctx.mem.cpu_va)
8508	gr_gk20a_ctx_patch_write_end(g, ch_ctx);	8505	gr_gk20a_ctx_patch_write_end(g, ch_ctx);
@@ -9025,7 +9022,7 @@ int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
9025	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);	9022	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
9026	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);	9023	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
9027		9024
9028	ops = kcalloc(g->gr.no_of_sm, sizeof(*ops), GFP_KERNEL);	9025	ops = nvgpu_kcalloc(g, g->gr.no_of_sm, sizeof(*ops));
9029	if (!ops)	9026	if (!ops)
9030	return -ENOMEM;	9027	return -ENOMEM;
9031	for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) {	9028	for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) {
@@ -9068,7 +9065,7 @@ int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
9068	err = gr_gk20a_exec_ctx_ops(ch, ops, i, i, 0);	9065	err = gr_gk20a_exec_ctx_ops(ch, ops, i, i, 0);
9069	if (err)	9066	if (err)
9070	gk20a_err(dev_from_gk20a(g), "Failed to access register\n");	9067	gk20a_err(dev_from_gk20a(g), "Failed to access register\n");
9071	kfree(ops);	9068	nvgpu_kfree(g, ops);
9072	return err;	9069	return err;
9073	}	9070	}
9074		9071