summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSeema Khowala <seemaj@nvidia.com>2018-02-28 15:25:13 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-03-06 01:22:36 -0500
commit33d1e22230726a2176e72567a95bfceb6062a017 (patch)
tree05e5b1387df602500638382c123d22af7433d92b
parentc2d01257d79e52a6f4e0ae4335df1b125284accf (diff)
gpu: nvgpu: clean up memory leaks in gr init
This is to resolve memory leak after modifying tpc_fs_mask sysfs which sets gr.sw_ready to false and forces gr to re-initialize. echo 1 > /sys/devices/gpu.0/force_idle echo 5 > /sys/devices/gpu.0/tpc_fs_mask echo 0 > /sys/devices/gpu.0/force_idle Bug 200393029 Change-Id: I76299f53fc87823071c672ec682c3eb51f72f513 Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1666018 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/common/ltc.c6
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c79
2 files changed, 66 insertions, 19 deletions
diff --git a/drivers/gpu/nvgpu/common/ltc.c b/drivers/gpu/nvgpu/common/ltc.c
index 006e2ed2..024c2e49 100644
--- a/drivers/gpu/nvgpu/common/ltc.c
+++ b/drivers/gpu/nvgpu/common/ltc.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -22,6 +22,7 @@
22 22
23#include <nvgpu/ltc.h> 23#include <nvgpu/ltc.h>
24#include <nvgpu/dma.h> 24#include <nvgpu/dma.h>
25#include <nvgpu/nvgpu_mem.h>
25 26
26#include "gk20a/gk20a.h" 27#include "gk20a/gk20a.h"
27#include "gk20a/gr_gk20a.h" 28#include "gk20a/gr_gk20a.h"
@@ -54,6 +55,9 @@ int nvgpu_ltc_alloc_cbc(struct gk20a *g, size_t compbit_backing_size)
54 struct gr_gk20a *gr = &g->gr; 55 struct gr_gk20a *gr = &g->gr;
55 unsigned long flags = 0; 56 unsigned long flags = 0;
56 57
58 if (nvgpu_mem_is_valid(&gr->compbit_store.mem))
59 return 0;
60
57 if (!nvgpu_iommuable(g)) 61 if (!nvgpu_iommuable(g))
58 flags = NVGPU_DMA_FORCE_CONTIGUOUS; 62 flags = NVGPU_DMA_FORCE_CONTIGUOUS;
59 63
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 7160ab6f..09e8aa0f 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3124,10 +3124,15 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
3124 3124
3125 gr->fbp_en_mask = g->ops.gr.get_fbp_en_mask(g); 3125 gr->fbp_en_mask = g->ops.gr.get_fbp_en_mask(g);
3126 3126
3127 gr->fbp_rop_l2_en_mask = 3127 if (gr->fbp_rop_l2_en_mask == NULL) {
3128 nvgpu_kzalloc(g, gr->max_fbps_count * sizeof(u32)); 3128 gr->fbp_rop_l2_en_mask =
3129 if (!gr->fbp_rop_l2_en_mask) 3129 nvgpu_kzalloc(g, gr->max_fbps_count * sizeof(u32));
3130 goto clean_up; 3130 if (!gr->fbp_rop_l2_en_mask)
3131 goto clean_up;
3132 } else {
3133 memset(gr->fbp_rop_l2_en_mask, 0, gr->max_fbps_count *
3134 sizeof(u32));
3135 }
3131 3136
3132 tmp = gk20a_readl(g, top_tpc_per_gpc_r()); 3137 tmp = gk20a_readl(g, top_tpc_per_gpc_r());
3133 gr->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp); 3138 gr->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp);
@@ -3152,13 +3157,40 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
3152 goto clean_up; 3157 goto clean_up;
3153 } 3158 }
3154 3159
3155 gr->gpc_tpc_count = nvgpu_kzalloc(g, gr->gpc_count * sizeof(u32)); 3160 if (gr->gpc_tpc_count == NULL)
3156 gr->gpc_tpc_mask = nvgpu_kzalloc(g, gr->gpc_count * sizeof(u32)); 3161 gr->gpc_tpc_count = nvgpu_kzalloc(g, gr->gpc_count *
3157 gr->gpc_zcb_count = nvgpu_kzalloc(g, gr->gpc_count * sizeof(u32)); 3162 sizeof(u32));
3158 gr->gpc_ppc_count = nvgpu_kzalloc(g, gr->gpc_count * sizeof(u32)); 3163 else
3164 memset(gr->gpc_tpc_count, 0, gr->gpc_count *
3165 sizeof(u32));
3166
3167 if (gr->gpc_tpc_mask == NULL)
3168 gr->gpc_tpc_mask = nvgpu_kzalloc(g, gr->gpc_count *
3169 sizeof(u32));
3170 else
3171 memset(gr->gpc_tpc_mask, 0, gr->gpc_count *
3172 sizeof(u32));
3173
3174 if (gr->gpc_zcb_count == NULL)
3175 gr->gpc_zcb_count = nvgpu_kzalloc(g, gr->gpc_count *
3176 sizeof(u32));
3177 else
3178 memset(gr->gpc_zcb_count, 0, gr->gpc_count *
3179 sizeof(u32));
3180
3181 if (gr->gpc_ppc_count == NULL)
3182 gr->gpc_ppc_count = nvgpu_kzalloc(g, gr->gpc_count *
3183 sizeof(u32));
3184 else
3185 memset(gr->gpc_ppc_count, 0, gr->gpc_count *
3186 sizeof(u32));
3159 3187
3160 gr->gpc_skip_mask = 3188 if (gr->gpc_skip_mask == NULL)
3161 nvgpu_kzalloc(g, gr_pd_dist_skip_table__size_1_v() * 3189 gr->gpc_skip_mask =
3190 nvgpu_kzalloc(g, gr_pd_dist_skip_table__size_1_v() *
3191 4 * sizeof(u32));
3192 else
3193 memset(gr->gpc_skip_mask, 0, gr_pd_dist_skip_table__size_1_v() *
3162 4 * sizeof(u32)); 3194 4 * sizeof(u32));
3163 3195
3164 if (!gr->gpc_tpc_count || !gr->gpc_tpc_mask || !gr->gpc_zcb_count || 3196 if (!gr->gpc_tpc_count || !gr->gpc_tpc_mask || !gr->gpc_zcb_count ||
@@ -3246,8 +3278,15 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
3246 gr->gpc_skip_mask[gpc_index] = gpc_new_skip_mask; 3278 gr->gpc_skip_mask[gpc_index] = gpc_new_skip_mask;
3247 } 3279 }
3248 3280
3249 gr->sm_to_cluster = nvgpu_kzalloc(g, gr->gpc_count * gr->tpc_count * 3281 /* allocate for max tpc per gpc */
3250 sm_per_tpc * sizeof(struct sm_info)); 3282 if (gr->sm_to_cluster == NULL)
3283 gr->sm_to_cluster = nvgpu_kzalloc(g, gr->gpc_count *
3284 gr->max_tpc_per_gpc_count *
3285 sm_per_tpc * sizeof(struct sm_info));
3286 else
3287 memset(gr->sm_to_cluster, 0, gr->gpc_count *
3288 gr->max_tpc_per_gpc_count *
3289 sm_per_tpc * sizeof(struct sm_info));
3251 gr->no_of_sm = 0; 3290 gr->no_of_sm = 0;
3252 3291
3253 gk20a_dbg_info("fbps: %d", gr->num_fbps); 3292 gk20a_dbg_info("fbps: %d", gr->num_fbps);
@@ -3316,13 +3355,17 @@ static int gr_gk20a_init_mmu_sw(struct gk20a *g, struct gr_gk20a *gr)
3316{ 3355{
3317 int err; 3356 int err;
3318 3357
3319 err = nvgpu_dma_alloc_sys(g, 0x1000, &gr->mmu_wr_mem); 3358 if (!nvgpu_mem_is_valid(&gr->mmu_wr_mem)) {
3320 if (err) 3359 err = nvgpu_dma_alloc_sys(g, 0x1000, &gr->mmu_wr_mem);
3321 goto err; 3360 if (err)
3361 goto err;
3362 }
3322 3363
3323 err = nvgpu_dma_alloc_sys(g, 0x1000, &gr->mmu_rd_mem); 3364 if (!nvgpu_mem_is_valid(&gr->mmu_rd_mem)) {
3324 if (err) 3365 err = nvgpu_dma_alloc_sys(g, 0x1000, &gr->mmu_rd_mem);
3325 goto err_free_wr_mem; 3366 if (err)
3367 goto err_free_wr_mem;
3368 }
3326 return 0; 3369 return 0;
3327 3370
3328 err_free_wr_mem: 3371 err_free_wr_mem: