From f329e674f477f0120f9a92a9e7b4945a1ddaefbb Mon Sep 17 00:00:00 2001 From: Sami Kiminki Date: Fri, 28 Oct 2016 23:05:23 +0300 Subject: gpu: nvgpu: gk20a: Fix FBP/L2 masks, add GET_FBP_L2_MASKS Fix FBP and ROP_L2 enable masks for Maxwell+. Deprecate rop_l2_en_mask in GPU characteristics by adding _DEPRECATED postfix. The array is too small to hold ROP_L2 enable masks for desktop GPUs. Add NVGPU_GPU_IOCTL_GET_FBP_L2_MASKS to expose the ROP_L2 masks for userspace. Bug 200136909 Bug 200241845 Change-Id: I5ad5a5c09f3962ebb631b8d6e7a2f9df02f75ac7 Signed-off-by: Sami Kiminki Reviewed-on: http://git-master/r/1245294 (cherry picked from commit 0823b33e59defec341ea7919dae4e5f73a36d256) Reviewed-on: http://git-master/r/1249883 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | 28 ++++++++++++++++++++++++++++ drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 7 +++++++ drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 1 + drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 27 ++++++++++++++++++++------- 4 files changed, 56 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 2fdf719a..493cbe80 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c @@ -246,6 +246,30 @@ static int gk20a_ctrl_get_tpc_masks(struct gk20a *g, return err; } +static int gk20a_ctrl_get_fbp_l2_masks( + struct gk20a *g, struct nvgpu_gpu_get_fbp_l2_masks_args *args) +{ + struct gr_gk20a *gr = &g->gr; + int err = 0; + const u32 fbp_l2_mask_size = sizeof(u32) * gr->max_fbps_count; + + if (args->mask_buf_size > 0) { + size_t write_size = fbp_l2_mask_size; + + if (write_size > args->mask_buf_size) + write_size = args->mask_buf_size; + + err = copy_to_user((void __user *)(uintptr_t) + args->mask_buf_addr, + gr->fbp_rop_l2_en_mask, write_size); + } + + if (err == 0) + args->mask_buf_size = fbp_l2_mask_size; + + return err; +} + static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g, struct nvgpu_gpu_l2_fb_args *args) { @@ -936,6 +960,10 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg err = gk20a_ctrl_get_tpc_masks(g, (struct nvgpu_gpu_get_tpc_masks_args *)buf); break; + case NVGPU_GPU_IOCTL_GET_FBP_L2_MASKS: + err = gk20a_ctrl_get_fbp_l2_masks(g, + (struct nvgpu_gpu_get_fbp_l2_masks_args *)buf); + break; case NVGPU_GPU_IOCTL_OPEN_CHANNEL: /* this arg type here, but ..gpu_open_channel_args in nvgpu.h * for consistency - they are the same */ diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 41ef5424..4dbdb777 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -3257,6 +3257,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) kfree(gr->sm_to_cluster); kfree(gr->gpc_skip_mask); kfree(gr->map_tiles); + kfree(gr->fbp_rop_l2_en_mask); gr->gpc_tpc_count = NULL; gr->gpc_zcb_count = NULL; gr->gpc_ppc_count = NULL; @@ -3266,6 +3267,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) gr->pes_tpc_mask[1] = NULL; gr->gpc_skip_mask = NULL; gr->map_tiles = NULL; + gr->fbp_rop_l2_en_mask = NULL; gr->ctx_vars.valid = false; kfree(gr->ctx_vars.ucode.fecs.inst.l); @@ -3336,6 +3338,11 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) gr->fbp_en_mask = g->ops.gr.get_fbp_en_mask(g); + gr->fbp_rop_l2_en_mask = + kzalloc(gr->max_fbps_count * sizeof(u32), GFP_KERNEL); + if (!gr->fbp_rop_l2_en_mask) + goto clean_up; + tmp = gk20a_readl(g, top_tpc_per_gpc_r()); gr->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 1b7bc252..d03f945c 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -351,6 +351,7 @@ struct gr_gk20a { struct gr_t18x t18x; #endif u32 fbp_en_mask; + u32 *fbp_rop_l2_en_mask; u32 no_of_sm; struct sm_info *sm_to_cluster; struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_states; diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 23144275..37b9737b 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -1028,8 +1028,15 @@ static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, static u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g) { u32 fbp_en_mask, opt_fbio; - opt_fbio = gk20a_readl(g, fuse_status_opt_fbio_r()); - fbp_en_mask = fuse_status_opt_fbio_data_v(opt_fbio); + u32 tmp, max_fbps_count; + + tmp = gk20a_readl(g, top_num_fbps_r()); + max_fbps_count = top_num_fbps_value_v(tmp); + + opt_fbio = gk20a_readl(g, fuse_status_opt_fbio_r()); + fbp_en_mask = + ((1 << max_fbps_count) - 1) ^ + fuse_status_opt_fbio_data_v(opt_fbio); return fbp_en_mask; } @@ -1051,16 +1058,22 @@ static u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g) static u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g) { - struct nvgpu_gpu_characteristics *gpu = &g->gpu_characteristics; - u32 i, tmp, max_fbps_count; + struct gr_gk20a *gr = &g->gr; + u32 i, tmp, max_fbps_count, max_ltc_per_fbp; + u32 rop_l2_all_en; + tmp = gk20a_readl(g, top_num_fbps_r()); max_fbps_count = top_num_fbps_value_v(tmp); + max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp(g); + rop_l2_all_en = (1 << max_ltc_per_fbp) - 1; /* mask of Rop_L2 for each FBP */ - for (i = 0; i < max_fbps_count; i++) - gpu->rop_l2_en_mask[i] = fuse_status_opt_rop_l2_fbp_r(i); + for (i = 0; i < max_fbps_count; i++) { + tmp = gk20a_readl(g, fuse_status_opt_rop_l2_fbp_r(i)); + gr->fbp_rop_l2_en_mask[i] = rop_l2_all_en ^ tmp; + } - return gpu->rop_l2_en_mask; + return gr->fbp_rop_l2_en_mask; } static u32 gr_gm20b_get_max_fbps_count(struct gk20a *g) -- cgit v1.2.2