summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu
diff options
context:
space:
mode:
authorsujeet baranwal <sbaranwal@nvidia.com>2015-03-06 14:55:36 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-04-04 21:58:05 -0400
commit2155dfeaba1714bb00cb86af090aa056aec3acfd (patch)
tree545b791cbf1271750f8728e1e5ec30d107a4ef7b /drivers/gpu/nvgpu
parent895675e1d5790e2361b22edb50d702f7dd9a8edd (diff)
gpu: nvgpu: Gpu characterstics enhancement
New members are added in nvgpu_gpu_characterstics to export more information required specially from CUDA tools. Change-Id: I907f3bcbd272405a13f47ef6236bc2cff01c6c80 Signed-off-by: Sujeet Baranwal <sbaranwal@nvidia.com> Reviewed-on: http://git-master/r/679202 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c32
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c10
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h5
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c43
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h26
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c52
-rw-r--r--drivers/gpu/nvgpu/gm20b/hw_fuse_gm20b.h22
-rw-r--r--drivers/gpu/nvgpu/gm20b/hw_top_gm20b.h18
9 files changed, 204 insertions, 5 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 5df420ff..7c4ec108 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -496,6 +496,28 @@ static int nvgpu_gpu_ioctl_has_any_exception(
496 return err; 496 return err;
497} 497}
498 498
499static int gk20a_ctrl_get_num_vsms(struct gk20a *g,
500 struct nvgpu_gpu_num_vsms *args)
501{
502 struct gr_gk20a *gr = &g->gr;
503 args->num_vsms = gr->no_of_sm;
504 return 0;
505}
506
507static int gk20a_ctrl_vsm_mapping(struct gk20a *g,
508 struct nvgpu_gpu_vsms_mapping *args)
509{
510 int err = 0;
511 struct gr_gk20a *gr = &g->gr;
512 size_t write_size = gr->no_of_sm * sizeof(struct sm_info);
513
514 err = copy_to_user((void __user *)(uintptr_t)
515 args->vsms_map_buf_addr,
516 gr->sm_to_cluster, write_size);
517
518 return err;
519}
520
499long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 521long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
500{ 522{
501 struct platform_device *dev = filp->private_data; 523 struct platform_device *dev = filp->private_data;
@@ -705,6 +727,16 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
705 (struct nvgpu_gpu_tpc_exception_en_status_args *)buf); 727 (struct nvgpu_gpu_tpc_exception_en_status_args *)buf);
706 break; 728 break;
707 729
730 case NVGPU_GPU_IOCTL_NUM_VSMS:
731 err = gk20a_ctrl_get_num_vsms(g,
732 (struct nvgpu_gpu_num_vsms *)buf);
733 break;
734 case NVGPU_GPU_IOCTL_VSMS_MAPPING:
735 err = gk20a_ctrl_vsm_mapping(g,
736 (struct nvgpu_gpu_vsms_mapping *)buf);
737 break;
738
739
708 default: 740 default:
709 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); 741 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
710 err = -ENOTTY; 742 err = -ENOTTY;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 3389aca5..84fc3e93 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -1927,6 +1927,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
1927 gpu->on_board_video_memory_size = 0; /* integrated GPU */ 1927 gpu->on_board_video_memory_size = 0; /* integrated GPU */
1928 1928
1929 gpu->num_gpc = g->gr.gpc_count; 1929 gpu->num_gpc = g->gr.gpc_count;
1930 gpu->max_gpc_count = g->gr.gpc_count;
1931
1930 gpu->num_tpc_per_gpc = g->gr.max_tpc_per_gpc_count; 1932 gpu->num_tpc_per_gpc = g->gr.max_tpc_per_gpc_count;
1931 1933
1932 gpu->bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */ 1934 gpu->bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */
@@ -1962,9 +1964,15 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
1962 gpu->dbg_gpu_ioctl_nr_last = NVGPU_DBG_GPU_IOCTL_LAST; 1964 gpu->dbg_gpu_ioctl_nr_last = NVGPU_DBG_GPU_IOCTL_LAST;
1963 gpu->ioctl_channel_nr_last = NVGPU_IOCTL_CHANNEL_LAST; 1965 gpu->ioctl_channel_nr_last = NVGPU_IOCTL_CHANNEL_LAST;
1964 gpu->as_ioctl_nr_last = NVGPU_AS_IOCTL_LAST; 1966 gpu->as_ioctl_nr_last = NVGPU_AS_IOCTL_LAST;
1965
1966 gpu->gpu_va_bit_count = 40; 1967 gpu->gpu_va_bit_count = 40;
1967 1968
1969 memcpy(gpu->chipname, g->ops.name, strlen(g->ops.name));
1970 gpu->max_fbps_count = g->ops.gr.get_max_fbps_count(g);
1971 gpu->fbp_en_mask = g->ops.gr.get_fbp_en_mask(g);
1972 gpu->max_ltc_per_fbp = g->ops.gr.get_max_ltc_per_fbp(g);
1973 gpu->max_lts_per_ltc = g->ops.gr.get_max_lts_per_ltc(g);
1974 g->ops.gr.get_rop_l2_en_mask(g);
1975
1968 gpu->reserved = 0; 1976 gpu->reserved = 0;
1969 1977
1970 return 0; 1978 return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index bcea5655..695e3f69 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -167,6 +167,11 @@ struct gpu_ops {
167 struct gk20a_debug_output *o); 167 struct gk20a_debug_output *o);
168 int (*update_pc_sampling)(struct channel_gk20a *ch, 168 int (*update_pc_sampling)(struct channel_gk20a *ch,
169 bool enable); 169 bool enable);
170 u32 (*get_max_fbps_count)(struct gk20a *g);
171 u32 (*get_fbp_en_mask)(struct gk20a *g);
172 u32 (*get_max_ltc_per_fbp)(struct gk20a *g);
173 u32 (*get_max_lts_per_ltc)(struct gk20a *g);
174 u32* (*get_rop_l2_en_mask)(struct gk20a *g);
170 } gr; 175 } gr;
171 const char *name; 176 const char *name;
172 struct { 177 struct {
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 7e8d4e13..e9b209c4 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3066,6 +3066,8 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
3066 tmp = gk20a_readl(g, top_num_fbps_r()); 3066 tmp = gk20a_readl(g, top_num_fbps_r());
3067 gr->max_fbps_count = top_num_fbps_value_v(tmp); 3067 gr->max_fbps_count = top_num_fbps_value_v(tmp);
3068 3068
3069 gr->fbp_en_mask = g->ops.gr.get_fbp_en_mask(g);
3070
3069 tmp = gk20a_readl(g, top_tpc_per_gpc_r()); 3071 tmp = gk20a_readl(g, top_tpc_per_gpc_r());
3070 gr->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp); 3072 gr->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp);
3071 3073
@@ -7313,6 +7315,41 @@ static u32 gr_gk20a_pagepool_default_size(struct gk20a *g)
7313 return gr_scc_pagepool_total_pages_hwmax_value_v(); 7315 return gr_scc_pagepool_total_pages_hwmax_value_v();
7314} 7316}
7315 7317
7318static u32 gr_gk20a_get_max_fbps_count(struct gk20a *g)
7319{
7320 u32 max_fbps_count, tmp;
7321 tmp = gk20a_readl(g, top_num_fbps_r());
7322 max_fbps_count = top_num_fbps_value_v(tmp);
7323 return max_fbps_count;
7324}
7325
7326
7327static u32 gr_gk20a_get_fbp_en_mask(struct gk20a *g)
7328{
7329 u32 fbp_en_mask, opt_fbio;
7330 opt_fbio = gk20a_readl(g, top_fs_status_fbp_r());
7331 fbp_en_mask = top_fs_status_fbp_cluster_v(opt_fbio);
7332 return fbp_en_mask;
7333}
7334
7335static u32 gr_gk20a_get_max_ltc_per_fbp(struct gk20a *g)
7336{
7337 return 1;
7338}
7339
7340static u32 gr_gk20a_get_max_lts_per_ltc(struct gk20a *g)
7341{
7342 return 1;
7343}
7344
7345u32 *gr_gk20a_rop_l2_en_mask(struct gk20a *g)
7346{
7347 /* gk20a doesnt have rop_l2_en_mask */
7348 return NULL;
7349}
7350
7351
7352
7316static int gr_gk20a_dump_gr_status_regs(struct gk20a *g, 7353static int gr_gk20a_dump_gr_status_regs(struct gk20a *g,
7317 struct gk20a_debug_output *o) 7354 struct gk20a_debug_output *o)
7318{ 7355{
@@ -7470,5 +7507,9 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
7470 gops->gr.alloc_gr_ctx = gr_gk20a_alloc_gr_ctx; 7507 gops->gr.alloc_gr_ctx = gr_gk20a_alloc_gr_ctx;
7471 gops->gr.free_gr_ctx = gr_gk20a_free_gr_ctx; 7508 gops->gr.free_gr_ctx = gr_gk20a_free_gr_ctx;
7472 gops->gr.dump_gr_regs = gr_gk20a_dump_gr_status_regs; 7509 gops->gr.dump_gr_regs = gr_gk20a_dump_gr_status_regs;
7510 gops->gr.get_max_fbps_count = gr_gk20a_get_max_fbps_count;
7511 gops->gr.get_fbp_en_mask = gr_gk20a_get_fbp_en_mask;
7512 gops->gr.get_max_ltc_per_fbp = gr_gk20a_get_max_ltc_per_fbp;
7513 gops->gr.get_max_lts_per_ltc = gr_gk20a_get_max_lts_per_ltc;
7514 gops->gr.get_rop_l2_en_mask = gr_gk20a_rop_l2_en_mask;
7473} 7515}
7474
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 6cabe526..5dfaac5f 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -295,6 +295,7 @@ struct gr_gk20a {
295#ifdef CONFIG_ARCH_TEGRA_18x_SOC 295#ifdef CONFIG_ARCH_TEGRA_18x_SOC
296 struct gr_t18x t18x; 296 struct gr_t18x t18x;
297#endif 297#endif
298 u32 fbp_en_mask;
298 u32 no_of_sm; 299 u32 no_of_sm;
299 struct sm_info *sm_to_cluster; 300 struct sm_info *sm_to_cluster;
300}; 301};
diff --git a/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h
index f3ca7498..35d9d347 100644
--- a/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_top_gk20a.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2012-2015, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License, 5 * under the terms and conditions of the GNU General Public License,
@@ -138,4 +138,28 @@ static inline u32 top_device_info_entry_enum_v(void)
138{ 138{
139 return 0x00000002; 139 return 0x00000002;
140} 140}
141static inline u32 top_fs_status_fbp_r(void)
142{
143 return 0x00022548;
144}
145static inline u32 top_fs_status_fbp_cluster_v(u32 r)
146{
147 return (r >> 0) & 0xffff;
148}
149static inline u32 top_fs_status_fbp_cluster_enable_v(void)
150{
151 return 0x00000000;
152}
153static inline u32 top_fs_status_fbp_cluster_enable_f(void)
154{
155 return 0x0;
156}
157static inline u32 top_fs_status_fbp_cluster_disable_v(void)
158{
159 return 0x00000001;
160}
161static inline u32 top_fs_status_fbp_cluster_disable_f(void)
162{
163 return 0x1;
164}
141#endif 165#endif
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 3d99e94d..c199964f 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -26,6 +26,7 @@
26#include "hw_gr_gm20b.h" 26#include "hw_gr_gm20b.h"
27#include "hw_fifo_gm20b.h" 27#include "hw_fifo_gm20b.h"
28#include "hw_fb_gm20b.h" 28#include "hw_fb_gm20b.h"
29#include "hw_top_gm20b.h"
29#include "hw_proj_gm20b.h" 30#include "hw_proj_gm20b.h"
30#include "hw_ctxsw_prog_gm20b.h" 31#include "hw_ctxsw_prog_gm20b.h"
31#include "hw_fuse_gm20b.h" 32#include "hw_fuse_gm20b.h"
@@ -975,6 +976,52 @@ static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
975 return 0; 976 return 0;
976} 977}
977 978
979static u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g)
980{
981 u32 fbp_en_mask, opt_fbio;
982 opt_fbio = gk20a_readl(g, fuse_status_opt_fbio_r());
983 fbp_en_mask = fuse_status_opt_fbio_data_v(opt_fbio);
984 return fbp_en_mask;
985}
986
987static u32 gr_gm20b_get_max_ltc_per_fbp(struct gk20a *g)
988{
989 u32 ltc_per_fbp, reg;
990 reg = gk20a_readl(g, top_ltc_per_fbp_r());
991 ltc_per_fbp = top_ltc_per_fbp_value_v(reg);
992 return ltc_per_fbp;
993}
994
995static u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g)
996{
997 u32 lts_per_ltc, reg;
998 reg = gk20a_readl(g, top_slices_per_ltc_r());
999 lts_per_ltc = top_slices_per_ltc_value_v(reg);
1000 return lts_per_ltc;
1001}
1002
1003u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g)
1004{
1005 struct nvgpu_gpu_characteristics *gpu = &g->gpu_characteristics;
1006 u32 i, tmp, max_fbps_count;
1007 tmp = gk20a_readl(g, top_num_fbps_r());
1008 max_fbps_count = top_num_fbps_value_v(tmp);
1009
1010 /* mask of Rop_L2 for each FBP */
1011 for (i = 0; i < max_fbps_count; i++)
1012 gpu->rop_l2_en_mask[i] = fuse_status_opt_rop_l2_fbp_r(i);
1013
1014 return gpu->rop_l2_en_mask;
1015}
1016
1017static u32 gr_gm20b_get_max_fbps_count(struct gk20a *g)
1018{
1019 u32 tmp, max_fbps_count;
1020 tmp = gk20a_readl(g, top_num_fbps_r());
1021 max_fbps_count = top_num_fbps_value_v(tmp);
1022 return max_fbps_count;
1023}
1024
978void gm20b_init_gr(struct gpu_ops *gops) 1025void gm20b_init_gr(struct gpu_ops *gops)
979{ 1026{
980 gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; 1027 gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;
@@ -1020,4 +1067,9 @@ void gm20b_init_gr(struct gpu_ops *gops)
1020 gr_gm20b_update_ctxsw_preemption_mode; 1067 gr_gm20b_update_ctxsw_preemption_mode;
1021 gops->gr.dump_gr_regs = gr_gm20b_dump_gr_status_regs; 1068 gops->gr.dump_gr_regs = gr_gm20b_dump_gr_status_regs;
1022 gops->gr.update_pc_sampling = gr_gm20b_update_pc_sampling; 1069 gops->gr.update_pc_sampling = gr_gm20b_update_pc_sampling;
1070 gops->gr.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask;
1071 gops->gr.get_max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp;
1072 gops->gr.get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc;
1073 gops->gr.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask;
1074 gops->gr.get_max_fbps_count = gr_gm20b_get_max_fbps_count;
1023} 1075}
diff --git a/drivers/gpu/nvgpu/gm20b/hw_fuse_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_fuse_gm20b.h
index 67a82fa2..729d6541 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_fuse_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_fuse_gm20b.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License, 5 * under the terms and conditions of the GNU General Public License,
@@ -98,4 +98,24 @@ static inline u32 fuse_ctrl_opt_ram_svop_pdp_override_data_no_f(void)
98{ 98{
99 return 0x0; 99 return 0x0;
100} 100}
101static inline u32 fuse_status_opt_fbio_r(void)
102{
103 return 0x00021c14;
104}
105static inline u32 fuse_status_opt_fbio_data_f(u32 v)
106{
107 return (v & 0xffff) << 0;
108}
109static inline u32 fuse_status_opt_fbio_data_m(void)
110{
111 return 0xffff << 0;
112}
113static inline u32 fuse_status_opt_fbio_data_v(u32 r)
114{
115 return (r >> 0) & 0xffff;
116}
117static inline u32 fuse_status_opt_rop_l2_fbp_r(u32 i)
118{
119 return 0x00021d70 + i*4;
120}
101#endif 121#endif
diff --git a/drivers/gpu/nvgpu/gm20b/hw_top_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_top_gm20b.h
index 42a82a12..c0ad007d 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_top_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_top_gm20b.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License, 5 * under the terms and conditions of the GNU General Public License,
@@ -74,6 +74,22 @@ static inline u32 top_num_fbps_value_v(u32 r)
74{ 74{
75 return (r >> 0) & 0x1f; 75 return (r >> 0) & 0x1f;
76} 76}
77static inline u32 top_ltc_per_fbp_r(void)
78{
79 return 0x00022450;
80}
81static inline u32 top_ltc_per_fbp_value_v(u32 r)
82{
83 return (r >> 0) & 0x1f;
84}
85static inline u32 top_slices_per_ltc_r(void)
86{
87 return 0x0002245c;
88}
89static inline u32 top_slices_per_ltc_value_v(u32 r)
90{
91 return (r >> 0) & 0x1f;
92}
77static inline u32 top_num_ltcs_r(void) 93static inline u32 top_num_ltcs_r(void)
78{ 94{
79 return 0x00022454; 95 return 0x00022454;