summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2018-03-16 08:24:55 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-03-21 09:04:35 -0400
commit66751bc05d7a1efca3668d59a2820e3e92985f91 (patch)
treeab85f317d77c8c76d6a7430039d19d406b9eb8f5 /drivers/gpu/nvgpu/gk20a
parentc5ca711f1efbd30fa760df139f3b63aa471d28a9 (diff)
gpu: nvgpu: gv100: fix num_fbpas while adding ctxsw buffer entries
For LIST_nv_pm_fbpa_ctx_regs, we right now call add_ctxsw_buffer_map_entries_subunits() to add registers corresponding to all the FBPAs But while configuring total number of registers, we do not consider floorswept FBPAs and that causes misalignment in subsequent lists for GV100 Fix this by reading disabled/floorswept FBPAs from fuse and consider only those FBPAs which are active for GV100 Add new HAL (*add_ctxsw_reg_pm_fbpa) to support this setting and define a common HAL gr_gk20a_add_ctxsw_reg_pm_fbpa() for all chips except GV100 Define GV100 specific gr_gv100_add_ctxsw_reg_pm_fbpa() with above mentioned implementation to consider floorsweeping Bug 1998067 Change-Id: Id560551bb0b8142791c117b6d27864566c90b489 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1676654 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h6
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c13
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h6
3 files changed, 24 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index c1824b07..4ab1cd1b 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -450,6 +450,12 @@ struct gpu_ops {
450 struct gr_gk20a_isr_data *isr_data); 450 struct gr_gk20a_isr_data *isr_data);
451 int (*handle_semaphore_pending)(struct gk20a *g, 451 int (*handle_semaphore_pending)(struct gk20a *g,
452 struct gr_gk20a_isr_data *isr_data); 452 struct gr_gk20a_isr_data *isr_data);
453 int (*add_ctxsw_reg_pm_fbpa)(struct gk20a *g,
454 struct ctxsw_buf_offset_map_entry *map,
455 struct aiv_list_gk20a *regs,
456 u32 *count, u32 *offset,
457 u32 max_cnt, u32 base,
458 u32 num_fbpas, u32 stride, u32 mask);
453 } gr; 459 } gr;
454 struct { 460 struct {
455 void (*init_hw)(struct gk20a *g); 461 void (*init_hw)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 94bfd224..bec33293 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -7495,6 +7495,17 @@ static int add_ctxsw_buffer_map_entries_subunits(
7495 return 0; 7495 return 0;
7496} 7496}
7497 7497
7498int gr_gk20a_add_ctxsw_reg_pm_fbpa(struct gk20a *g,
7499 struct ctxsw_buf_offset_map_entry *map,
7500 struct aiv_list_gk20a *regs,
7501 u32 *count, u32 *offset,
7502 u32 max_cnt, u32 base,
7503 u32 num_fbpas, u32 stride, u32 mask)
7504{
7505 return add_ctxsw_buffer_map_entries_subunits(map, regs, count, offset,
7506 max_cnt, base, num_fbpas, stride, mask);
7507}
7508
7498static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g, 7509static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
7499 struct ctxsw_buf_offset_map_entry *map, 7510 struct ctxsw_buf_offset_map_entry *map,
7500 u32 *count, u32 *offset, u32 max_cnt) 7511 u32 *count, u32 *offset, u32 max_cnt)
@@ -7683,7 +7694,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
7683 goto cleanup; 7694 goto cleanup;
7684 7695
7685 /* Add entries from _LIST_nv_pm_fbpa_ctx_regs */ 7696 /* Add entries from _LIST_nv_pm_fbpa_ctx_regs */
7686 if (add_ctxsw_buffer_map_entries_subunits(map, 7697 if (g->ops.gr.add_ctxsw_reg_pm_fbpa(g, map,
7687 &g->gr.ctx_vars.ctxsw_regs.pm_fbpa, 7698 &g->gr.ctx_vars.ctxsw_regs.pm_fbpa,
7688 &count, &offset, 7699 &count, &offset,
7689 hwpm_ctxsw_reg_count_max, 0, 7700 hwpm_ctxsw_reg_count_max, 0,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 7f89037e..5ac363e1 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -811,4 +811,10 @@ u32 gk20a_init_sw_bundle(struct gk20a *g);
811int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type); 811int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type);
812int gk20a_gr_handle_semaphore_pending(struct gk20a *g, 812int gk20a_gr_handle_semaphore_pending(struct gk20a *g,
813 struct gr_gk20a_isr_data *isr_data); 813 struct gr_gk20a_isr_data *isr_data);
814int gr_gk20a_add_ctxsw_reg_pm_fbpa(struct gk20a *g,
815 struct ctxsw_buf_offset_map_entry *map,
816 struct aiv_list_gk20a *regs,
817 u32 *count, u32 *offset,
818 u32 max_cnt, u32 base,
819 u32 num_fbpas, u32 stride, u32 mask);
814#endif /*__GR_GK20A_H__*/ 820#endif /*__GR_GK20A_H__*/