From 2155dfeaba1714bb00cb86af090aa056aec3acfd Mon Sep 17 00:00:00 2001 From: sujeet baranwal Date: Fri, 6 Mar 2015 11:55:36 -0800 Subject: gpu: nvgpu: Gpu characterstics enhancement New members are added in nvgpu_gpu_characterstics to export more information required specially from CUDA tools. Change-Id: I907f3bcbd272405a13f47ef6236bc2cff01c6c80 Signed-off-by: Sujeet Baranwal Reviewed-on: http://git-master/r/679202 Reviewed-by: Terje Bergstrom Tested-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | 32 ++++++++++++++++++++ drivers/gpu/nvgpu/gk20a/gk20a.c | 10 ++++++- drivers/gpu/nvgpu/gk20a/gk20a.h | 5 ++++ drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 43 ++++++++++++++++++++++++++- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 1 + drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h | 26 ++++++++++++++++- drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 52 +++++++++++++++++++++++++++++++++ drivers/gpu/nvgpu/gm20b/hw_fuse_gm20b.h | 22 +++++++++++++- drivers/gpu/nvgpu/gm20b/hw_top_gm20b.h | 18 +++++++++++- include/uapi/linux/nvgpu.h | 33 ++++++++++++++++++--- 10 files changed, 233 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 5df420ff..7c4ec108 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c @@ -496,6 +496,28 @@ static int nvgpu_gpu_ioctl_has_any_exception( return err; } +static int gk20a_ctrl_get_num_vsms(struct gk20a *g, + struct nvgpu_gpu_num_vsms *args) +{ + struct gr_gk20a *gr = &g->gr; + args->num_vsms = gr->no_of_sm; + return 0; +} + +static int gk20a_ctrl_vsm_mapping(struct gk20a *g, + struct nvgpu_gpu_vsms_mapping *args) +{ + int err = 0; + struct gr_gk20a *gr = &g->gr; + size_t write_size = gr->no_of_sm * sizeof(struct sm_info); + + err = copy_to_user((void __user *)(uintptr_t) + args->vsms_map_buf_addr, + gr->sm_to_cluster, write_size); + + return err; +} + long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct platform_device *dev = filp->private_data; @@ -705,6 +727,16 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg (struct nvgpu_gpu_tpc_exception_en_status_args *)buf); break; + case NVGPU_GPU_IOCTL_NUM_VSMS: + err = gk20a_ctrl_get_num_vsms(g, + (struct nvgpu_gpu_num_vsms *)buf); + break; + case NVGPU_GPU_IOCTL_VSMS_MAPPING: + err = gk20a_ctrl_vsm_mapping(g, + (struct nvgpu_gpu_vsms_mapping *)buf); + break; + + default: dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); err = -ENOTTY; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 3389aca5..84fc3e93 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -1927,6 +1927,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) gpu->on_board_video_memory_size = 0; /* integrated GPU */ gpu->num_gpc = g->gr.gpc_count; + gpu->max_gpc_count = g->gr.gpc_count; + gpu->num_tpc_per_gpc = g->gr.max_tpc_per_gpc_count; gpu->bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */ @@ -1962,9 +1964,15 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) gpu->dbg_gpu_ioctl_nr_last = NVGPU_DBG_GPU_IOCTL_LAST; gpu->ioctl_channel_nr_last = NVGPU_IOCTL_CHANNEL_LAST; gpu->as_ioctl_nr_last = NVGPU_AS_IOCTL_LAST; - gpu->gpu_va_bit_count = 40; + memcpy(gpu->chipname, g->ops.name, strlen(g->ops.name)); + gpu->max_fbps_count = g->ops.gr.get_max_fbps_count(g); + gpu->fbp_en_mask = g->ops.gr.get_fbp_en_mask(g); + gpu->max_ltc_per_fbp = g->ops.gr.get_max_ltc_per_fbp(g); + gpu->max_lts_per_ltc = g->ops.gr.get_max_lts_per_ltc(g); + g->ops.gr.get_rop_l2_en_mask(g); + gpu->reserved = 0; return 0; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index bcea5655..695e3f69 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -167,6 +167,11 @@ struct gpu_ops { struct gk20a_debug_output *o); int (*update_pc_sampling)(struct channel_gk20a *ch, bool enable); + u32 (*get_max_fbps_count)(struct gk20a *g); + u32 (*get_fbp_en_mask)(struct gk20a *g); + u32 (*get_max_ltc_per_fbp)(struct gk20a *g); + u32 (*get_max_lts_per_ltc)(struct gk20a *g); + u32* (*get_rop_l2_en_mask)(struct gk20a *g); } gr; const char *name; struct { diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 7e8d4e13..e9b209c4 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -3066,6 +3066,8 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) tmp = gk20a_readl(g, top_num_fbps_r()); gr->max_fbps_count = top_num_fbps_value_v(tmp); + gr->fbp_en_mask = g->ops.gr.get_fbp_en_mask(g); + tmp = gk20a_readl(g, top_tpc_per_gpc_r()); gr->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp); @@ -7313,6 +7315,41 @@ static u32 gr_gk20a_pagepool_default_size(struct gk20a *g) return gr_scc_pagepool_total_pages_hwmax_value_v(); } +static u32 gr_gk20a_get_max_fbps_count(struct gk20a *g) +{ + u32 max_fbps_count, tmp; + tmp = gk20a_readl(g, top_num_fbps_r()); + max_fbps_count = top_num_fbps_value_v(tmp); + return max_fbps_count; +} + + +static u32 gr_gk20a_get_fbp_en_mask(struct gk20a *g) +{ + u32 fbp_en_mask, opt_fbio; + opt_fbio = gk20a_readl(g, top_fs_status_fbp_r()); + fbp_en_mask = top_fs_status_fbp_cluster_v(opt_fbio); + return fbp_en_mask; +} + +static u32 gr_gk20a_get_max_ltc_per_fbp(struct gk20a *g) +{ + return 1; +} + +static u32 gr_gk20a_get_max_lts_per_ltc(struct gk20a *g) +{ + return 1; +} + +u32 *gr_gk20a_rop_l2_en_mask(struct gk20a *g) +{ + /* gk20a doesnt have rop_l2_en_mask */ + return NULL; +} + + + static int gr_gk20a_dump_gr_status_regs(struct gk20a *g, struct gk20a_debug_output *o) { @@ -7470,5 +7507,9 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) gops->gr.alloc_gr_ctx = gr_gk20a_alloc_gr_ctx; gops->gr.free_gr_ctx = gr_gk20a_free_gr_ctx; gops->gr.dump_gr_regs = gr_gk20a_dump_gr_status_regs; + gops->gr.get_max_fbps_count = gr_gk20a_get_max_fbps_count; + gops->gr.get_fbp_en_mask = gr_gk20a_get_fbp_en_mask; + gops->gr.get_max_ltc_per_fbp = gr_gk20a_get_max_ltc_per_fbp; + gops->gr.get_max_lts_per_ltc = gr_gk20a_get_max_lts_per_ltc; + gops->gr.get_rop_l2_en_mask = gr_gk20a_rop_l2_en_mask; } - diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 6cabe526..5dfaac5f 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -295,6 +295,7 @@ struct gr_gk20a { #ifdef CONFIG_ARCH_TEGRA_18x_SOC struct gr_t18x t18x; #endif + u32 fbp_en_mask; u32 no_of_sm; struct sm_info *sm_to_cluster; }; diff --git a/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h index f3ca7498..35d9d347 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2012-2015, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -138,4 +138,28 @@ static inline u32 top_device_info_entry_enum_v(void) { return 0x00000002; } +static inline u32 top_fs_status_fbp_r(void) +{ + return 0x00022548; +} +static inline u32 top_fs_status_fbp_cluster_v(u32 r) +{ + return (r >> 0) & 0xffff; +} +static inline u32 top_fs_status_fbp_cluster_enable_v(void) +{ + return 0x00000000; +} +static inline u32 top_fs_status_fbp_cluster_enable_f(void) +{ + return 0x0; +} +static inline u32 top_fs_status_fbp_cluster_disable_v(void) +{ + return 0x00000001; +} +static inline u32 top_fs_status_fbp_cluster_disable_f(void) +{ + return 0x1; +} #endif diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 3d99e94d..c199964f 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -26,6 +26,7 @@ #include "hw_gr_gm20b.h" #include "hw_fifo_gm20b.h" #include "hw_fb_gm20b.h" +#include "hw_top_gm20b.h" #include "hw_proj_gm20b.h" #include "hw_ctxsw_prog_gm20b.h" #include "hw_fuse_gm20b.h" @@ -975,6 +976,52 @@ static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, return 0; } +static u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g) +{ + u32 fbp_en_mask, opt_fbio; + opt_fbio = gk20a_readl(g, fuse_status_opt_fbio_r()); + fbp_en_mask = fuse_status_opt_fbio_data_v(opt_fbio); + return fbp_en_mask; +} + +static u32 gr_gm20b_get_max_ltc_per_fbp(struct gk20a *g) +{ + u32 ltc_per_fbp, reg; + reg = gk20a_readl(g, top_ltc_per_fbp_r()); + ltc_per_fbp = top_ltc_per_fbp_value_v(reg); + return ltc_per_fbp; +} + +static u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g) +{ + u32 lts_per_ltc, reg; + reg = gk20a_readl(g, top_slices_per_ltc_r()); + lts_per_ltc = top_slices_per_ltc_value_v(reg); + return lts_per_ltc; +} + +u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g) +{ + struct nvgpu_gpu_characteristics *gpu = &g->gpu_characteristics; + u32 i, tmp, max_fbps_count; + tmp = gk20a_readl(g, top_num_fbps_r()); + max_fbps_count = top_num_fbps_value_v(tmp); + + /* mask of Rop_L2 for each FBP */ + for (i = 0; i < max_fbps_count; i++) + gpu->rop_l2_en_mask[i] = fuse_status_opt_rop_l2_fbp_r(i); + + return gpu->rop_l2_en_mask; +} + +static u32 gr_gm20b_get_max_fbps_count(struct gk20a *g) +{ + u32 tmp, max_fbps_count; + tmp = gk20a_readl(g, top_num_fbps_r()); + max_fbps_count = top_num_fbps_value_v(tmp); + return max_fbps_count; +} + void gm20b_init_gr(struct gpu_ops *gops) { gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; @@ -1020,4 +1067,9 @@ void gm20b_init_gr(struct gpu_ops *gops) gr_gm20b_update_ctxsw_preemption_mode; gops->gr.dump_gr_regs = gr_gm20b_dump_gr_status_regs; gops->gr.update_pc_sampling = gr_gm20b_update_pc_sampling; + gops->gr.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask; + gops->gr.get_max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp; + gops->gr.get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc; + gops->gr.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask; + gops->gr.get_max_fbps_count = gr_gm20b_get_max_fbps_count; } diff --git a/drivers/gpu/nvgpu/gm20b/hw_fuse_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_fuse_gm20b.h index 67a82fa2..729d6541 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_fuse_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_fuse_gm20b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -98,4 +98,24 @@ static inline u32 fuse_ctrl_opt_ram_svop_pdp_override_data_no_f(void) { return 0x0; } +static inline u32 fuse_status_opt_fbio_r(void) +{ + return 0x00021c14; +} +static inline u32 fuse_status_opt_fbio_data_f(u32 v) +{ + return (v & 0xffff) << 0; +} +static inline u32 fuse_status_opt_fbio_data_m(void) +{ + return 0xffff << 0; +} +static inline u32 fuse_status_opt_fbio_data_v(u32 r) +{ + return (r >> 0) & 0xffff; +} +static inline u32 fuse_status_opt_rop_l2_fbp_r(u32 i) +{ + return 0x00021d70 + i*4; +} #endif diff --git a/drivers/gpu/nvgpu/gm20b/hw_top_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_top_gm20b.h index 42a82a12..c0ad007d 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_top_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_top_gm20b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -74,6 +74,22 @@ static inline u32 top_num_fbps_value_v(u32 r) { return (r >> 0) & 0x1f; } +static inline u32 top_ltc_per_fbp_r(void) +{ + return 0x00022450; +} +static inline u32 top_ltc_per_fbp_value_v(u32 r) +{ + return (r >> 0) & 0x1f; +} +static inline u32 top_slices_per_ltc_r(void) +{ + return 0x0002245c; +} +static inline u32 top_slices_per_ltc_value_v(u32 r) +{ + return (r >> 0) & 0x1f; +} static inline u32 top_num_ltcs_r(void) { return 0x00022454; diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 3a7c7831..ab84b699 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -111,7 +111,6 @@ struct nvgpu_gpu_characteristics { __u32 arch; __u32 impl; __u32 rev; - __u32 num_gpc; __u64 L2_cache_size; /* bytes */ @@ -153,9 +152,22 @@ struct nvgpu_gpu_characteristics { __s16 as_ioctl_nr_last; __u8 gpu_va_bit_count; - __u8 reserved; + __u32 max_fbps_count; + __u32 fbp_en_mask; + __u32 max_ltc_per_fbp; + __u32 max_lts_per_ltc; + __u32 max_tex_per_tpc; + __u32 max_gpc_count; + /* mask of Rop_L2 for each FBP */ + __u32 rop_l2_en_mask[2]; + + + __u8 chipname[8]; + + + /* Notes: - This struct can be safely appended with new fields. However, always keep the structure size multiple of 8 and make sure that the binary @@ -282,6 +294,15 @@ struct nvgpu_gpu_tpc_exception_en_status_args { __u64 tpc_exception_en_sm_mask; }; +struct nvgpu_gpu_num_vsms { + __u32 num_vsms; + __u32 reserved; +}; + +struct nvgpu_gpu_vsms_mapping { + __u64 vsms_map_buf_addr; +}; + #define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \ _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args) #define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \ @@ -316,9 +337,13 @@ struct nvgpu_gpu_tpc_exception_en_status_args { _IOWR(NVGPU_GPU_IOCTL_MAGIC, 16, struct nvgpu_gpu_wait_pause_args) #define NVGPU_GPU_IOCTL_GET_TPC_EXCEPTION_EN_STATUS \ _IOWR(NVGPU_GPU_IOCTL_MAGIC, 17, struct nvgpu_gpu_tpc_exception_en_status_args) +#define NVGPU_GPU_IOCTL_NUM_VSMS \ + _IOWR(NVGPU_GPU_IOCTL_MAGIC, 18, struct nvgpu_gpu_num_vsms) +#define NVGPU_GPU_IOCTL_VSMS_MAPPING \ + _IOWR(NVGPU_GPU_IOCTL_MAGIC, 19, struct nvgpu_gpu_vsms_mapping) #define NVGPU_GPU_IOCTL_LAST \ - _IOC_NR(NVGPU_GPU_IOCTL_GET_TPC_EXCEPTION_EN_STATUS) + _IOC_NR(NVGPU_GPU_IOCTL_VSMS_MAPPING) #define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \ sizeof(struct nvgpu_gpu_prepare_compressible_read_args) @@ -913,7 +938,7 @@ struct nvgpu_as_get_va_regions_args { #define NVGPU_AS_IOCTL_GET_VA_REGIONS \ _IOWR(NVGPU_AS_IOCTL_MAGIC, 8, struct nvgpu_as_get_va_regions_args) -#define NVGPU_AS_IOCTL_LAST \ +#define NVGPU_AS_IOCTL_LAST \ _IOC_NR(NVGPU_AS_IOCTL_GET_VA_REGIONS) #define NVGPU_AS_IOCTL_MAX_ARG_SIZE \ sizeof(struct nvgpu_as_map_buffer_ex_args) -- cgit v1.2.2