summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSami Kiminki <skiminki@nvidia.com>2016-10-28 16:05:23 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2016-11-11 05:21:04 -0500
commitf329e674f477f0120f9a92a9e7b4945a1ddaefbb (patch)
tree55107ad03a029e438e93c1793afd5e940ab7206b
parentcc4208a27831faf95409b491aa29b8a161bf630a (diff)
gpu: nvgpu: gk20a: Fix FBP/L2 masks, add GET_FBP_L2_MASKS
Fix FBP and ROP_L2 enable masks for Maxwell+. Deprecate rop_l2_en_mask in GPU characteristics by adding _DEPRECATED postfix. The array is too small to hold ROP_L2 enable masks for desktop GPUs. Add NVGPU_GPU_IOCTL_GET_FBP_L2_MASKS to expose the ROP_L2 masks for userspace. Bug 200136909 Bug 200241845 Change-Id: I5ad5a5c09f3962ebb631b8d6e7a2f9df02f75ac7 Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/1245294 (cherry picked from commit 0823b33e59defec341ea7919dae4e5f73a36d256) Reviewed-on: http://git-master/r/1249883 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c28
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c7
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c27
-rw-r--r--include/uapi/linux/nvgpu.h22
5 files changed, 76 insertions, 9 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 2fdf719a..493cbe80 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -246,6 +246,30 @@ static int gk20a_ctrl_get_tpc_masks(struct gk20a *g,
246 return err; 246 return err;
247} 247}
248 248
249static int gk20a_ctrl_get_fbp_l2_masks(
250 struct gk20a *g, struct nvgpu_gpu_get_fbp_l2_masks_args *args)
251{
252 struct gr_gk20a *gr = &g->gr;
253 int err = 0;
254 const u32 fbp_l2_mask_size = sizeof(u32) * gr->max_fbps_count;
255
256 if (args->mask_buf_size > 0) {
257 size_t write_size = fbp_l2_mask_size;
258
259 if (write_size > args->mask_buf_size)
260 write_size = args->mask_buf_size;
261
262 err = copy_to_user((void __user *)(uintptr_t)
263 args->mask_buf_addr,
264 gr->fbp_rop_l2_en_mask, write_size);
265 }
266
267 if (err == 0)
268 args->mask_buf_size = fbp_l2_mask_size;
269
270 return err;
271}
272
249static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g, 273static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g,
250 struct nvgpu_gpu_l2_fb_args *args) 274 struct nvgpu_gpu_l2_fb_args *args)
251{ 275{
@@ -936,6 +960,10 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
936 err = gk20a_ctrl_get_tpc_masks(g, 960 err = gk20a_ctrl_get_tpc_masks(g,
937 (struct nvgpu_gpu_get_tpc_masks_args *)buf); 961 (struct nvgpu_gpu_get_tpc_masks_args *)buf);
938 break; 962 break;
963 case NVGPU_GPU_IOCTL_GET_FBP_L2_MASKS:
964 err = gk20a_ctrl_get_fbp_l2_masks(g,
965 (struct nvgpu_gpu_get_fbp_l2_masks_args *)buf);
966 break;
939 case NVGPU_GPU_IOCTL_OPEN_CHANNEL: 967 case NVGPU_GPU_IOCTL_OPEN_CHANNEL:
940 /* this arg type here, but ..gpu_open_channel_args in nvgpu.h 968 /* this arg type here, but ..gpu_open_channel_args in nvgpu.h
941 * for consistency - they are the same */ 969 * for consistency - they are the same */
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 41ef5424..4dbdb777 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3257,6 +3257,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
3257 kfree(gr->sm_to_cluster); 3257 kfree(gr->sm_to_cluster);
3258 kfree(gr->gpc_skip_mask); 3258 kfree(gr->gpc_skip_mask);
3259 kfree(gr->map_tiles); 3259 kfree(gr->map_tiles);
3260 kfree(gr->fbp_rop_l2_en_mask);
3260 gr->gpc_tpc_count = NULL; 3261 gr->gpc_tpc_count = NULL;
3261 gr->gpc_zcb_count = NULL; 3262 gr->gpc_zcb_count = NULL;
3262 gr->gpc_ppc_count = NULL; 3263 gr->gpc_ppc_count = NULL;
@@ -3266,6 +3267,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
3266 gr->pes_tpc_mask[1] = NULL; 3267 gr->pes_tpc_mask[1] = NULL;
3267 gr->gpc_skip_mask = NULL; 3268 gr->gpc_skip_mask = NULL;
3268 gr->map_tiles = NULL; 3269 gr->map_tiles = NULL;
3270 gr->fbp_rop_l2_en_mask = NULL;
3269 3271
3270 gr->ctx_vars.valid = false; 3272 gr->ctx_vars.valid = false;
3271 kfree(gr->ctx_vars.ucode.fecs.inst.l); 3273 kfree(gr->ctx_vars.ucode.fecs.inst.l);
@@ -3336,6 +3338,11 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
3336 3338
3337 gr->fbp_en_mask = g->ops.gr.get_fbp_en_mask(g); 3339 gr->fbp_en_mask = g->ops.gr.get_fbp_en_mask(g);
3338 3340
3341 gr->fbp_rop_l2_en_mask =
3342 kzalloc(gr->max_fbps_count * sizeof(u32), GFP_KERNEL);
3343 if (!gr->fbp_rop_l2_en_mask)
3344 goto clean_up;
3345
3339 tmp = gk20a_readl(g, top_tpc_per_gpc_r()); 3346 tmp = gk20a_readl(g, top_tpc_per_gpc_r());
3340 gr->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp); 3347 gr->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp);
3341 3348
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 1b7bc252..d03f945c 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -351,6 +351,7 @@ struct gr_gk20a {
351 struct gr_t18x t18x; 351 struct gr_t18x t18x;
352#endif 352#endif
353 u32 fbp_en_mask; 353 u32 fbp_en_mask;
354 u32 *fbp_rop_l2_en_mask;
354 u32 no_of_sm; 355 u32 no_of_sm;
355 struct sm_info *sm_to_cluster; 356 struct sm_info *sm_to_cluster;
356 struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_states; 357 struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_states;
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 23144275..37b9737b 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1028,8 +1028,15 @@ static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
1028static u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g) 1028static u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g)
1029{ 1029{
1030 u32 fbp_en_mask, opt_fbio; 1030 u32 fbp_en_mask, opt_fbio;
1031 opt_fbio = gk20a_readl(g, fuse_status_opt_fbio_r()); 1031 u32 tmp, max_fbps_count;
1032 fbp_en_mask = fuse_status_opt_fbio_data_v(opt_fbio); 1032
1033 tmp = gk20a_readl(g, top_num_fbps_r());
1034 max_fbps_count = top_num_fbps_value_v(tmp);
1035
1036 opt_fbio = gk20a_readl(g, fuse_status_opt_fbio_r());
1037 fbp_en_mask =
1038 ((1 << max_fbps_count) - 1) ^
1039 fuse_status_opt_fbio_data_v(opt_fbio);
1033 return fbp_en_mask; 1040 return fbp_en_mask;
1034} 1041}
1035 1042
@@ -1051,16 +1058,22 @@ static u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g)
1051 1058
1052static u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g) 1059static u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g)
1053{ 1060{
1054 struct nvgpu_gpu_characteristics *gpu = &g->gpu_characteristics; 1061 struct gr_gk20a *gr = &g->gr;
1055 u32 i, tmp, max_fbps_count; 1062 u32 i, tmp, max_fbps_count, max_ltc_per_fbp;
1063 u32 rop_l2_all_en;
1064
1056 tmp = gk20a_readl(g, top_num_fbps_r()); 1065 tmp = gk20a_readl(g, top_num_fbps_r());
1057 max_fbps_count = top_num_fbps_value_v(tmp); 1066 max_fbps_count = top_num_fbps_value_v(tmp);
1067 max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp(g);
1068 rop_l2_all_en = (1 << max_ltc_per_fbp) - 1;
1058 1069
1059 /* mask of Rop_L2 for each FBP */ 1070 /* mask of Rop_L2 for each FBP */
1060 for (i = 0; i < max_fbps_count; i++) 1071 for (i = 0; i < max_fbps_count; i++) {
1061 gpu->rop_l2_en_mask[i] = fuse_status_opt_rop_l2_fbp_r(i); 1072 tmp = gk20a_readl(g, fuse_status_opt_rop_l2_fbp_r(i));
1073 gr->fbp_rop_l2_en_mask[i] = rop_l2_all_en ^ tmp;
1074 }
1062 1075
1063 return gpu->rop_l2_en_mask; 1076 return gr->fbp_rop_l2_en_mask;
1064} 1077}
1065 1078
1066static u32 gr_gm20b_get_max_fbps_count(struct gk20a *g) 1079static u32 gr_gm20b_get_max_fbps_count(struct gk20a *g)
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index d12ba772..a8ba2189 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -171,7 +171,7 @@ struct nvgpu_gpu_characteristics {
171 __u32 max_tex_per_tpc; 171 __u32 max_tex_per_tpc;
172 __u32 max_gpc_count; 172 __u32 max_gpc_count;
173 /* mask of Rop_L2 for each FBP */ 173 /* mask of Rop_L2 for each FBP */
174 __u32 rop_l2_en_mask[2]; 174 __u32 rop_l2_en_mask_DEPRECATED[2];
175 175
176 176
177 __u8 chipname[8]; 177 __u8 chipname[8];
@@ -522,6 +522,22 @@ struct nvgpu_gpu_get_memory_state_args {
522 __u64 reserved[4]; 522 __u64 reserved[4];
523}; 523};
524 524
525struct nvgpu_gpu_get_fbp_l2_masks_args {
526 /* [in] L2 mask buffer size reserved by userspace. Should be
527 at least sizeof(__u32) * fls(fbp_en_mask) to receive LTC
528 mask for each FBP.
529 [out] full kernel buffer size
530 */
531 __u32 mask_buf_size;
532 __u32 reserved;
533
534 /* [in] pointer to L2 mask buffer. It will receive one
535 32-bit L2 mask per FBP or 0 if FBP is not enabled or
536 not present. This parameter is ignored if
537 mask_buf_size is 0. */
538 __u64 mask_buf_addr;
539};
540
525#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \ 541#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
526 _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args) 542 _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
527#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \ 543#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
@@ -583,8 +599,10 @@ struct nvgpu_gpu_get_memory_state_args {
583#define NVGPU_GPU_IOCTL_GET_MEMORY_STATE \ 599#define NVGPU_GPU_IOCTL_GET_MEMORY_STATE \
584 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 33, \ 600 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 33, \
585 struct nvgpu_gpu_get_memory_state_args) 601 struct nvgpu_gpu_get_memory_state_args)
602#define NVGPU_GPU_IOCTL_GET_FBP_L2_MASKS \
603 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 38, struct nvgpu_gpu_get_fbp_l2_masks_args)
586#define NVGPU_GPU_IOCTL_LAST \ 604#define NVGPU_GPU_IOCTL_LAST \
587 _IOC_NR(NVGPU_GPU_IOCTL_GET_MEMORY_STATE) 605 _IOC_NR(NVGPU_GPU_IOCTL_GET_FBP_L2_MASKS)
588#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \ 606#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
589 sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args) 607 sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args)
590 608