gpu: nvgpu: gv100: fix num_fbpas while adding ctxsw buffer entries

For LIST_nv_pm_fbpa_ctx_regs, we right now call add_ctxsw_buffer_map_entries_subunits() to add registers corresponding to all the FBPAs But while configuring total number of registers, we do not consider floorswept FBPAs and that causes misalignment in subsequent lists for GV100 Fix this by reading disabled/floorswept FBPAs from fuse and consider only those FBPAs which are active for GV100 Add new HAL (*add_ctxsw_reg_pm_fbpa) to support this setting and define a common HAL gr_gk20a_add_ctxsw_reg_pm_fbpa() for all chips except GV100 Define GV100 specific gr_gv100_add_ctxsw_reg_pm_fbpa() with above mentioned implementation to consider floorsweeping Bug 1998067 Change-Id: Id560551bb0b8142791c117b6d27864566c90b489 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1676654 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Deepak Nibade <dnibade@nvidia.com> 2018-03-16 08:24:55 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-03-21 09:04:35 -0400
commit: 66751bc05d7a1efca3668d59a2820e3e92985f91 (patch)
tree: ab85f317d77c8c76d6a7430039d19d406b9eb8f5 /drivers/gpu
parent: c5ca711f1efbd30fa760df139f3b63aa471d28a9 (diff)
12 files changed, 81 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index c1824b07..4ab1cd1b 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -450,6 +450,12 @@ struct gpu_ops {
                                        struct gr_gk20a_isr_data *isr_data);
                int (*handle_semaphore_pending)(struct gk20a *g,
                                        struct gr_gk20a_isr_data *isr_data);
+                int (*add_ctxsw_reg_pm_fbpa)(struct gk20a *g,
+                                struct ctxsw_buf_offset_map_entry *map,
+                                struct aiv_list_gk20a *regs,
+                                u32 *count, u32 *offset,
+                                u32 max_cnt, u32 base,
+                                u32 num_fbpas, u32 stride, u32 mask);
        } gr;
        struct {
                void (*init_hw)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 94bfd224..bec33293 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -7495,6 +7495,17 @@ static int add_ctxsw_buffer_map_entries_subunits(
        return 0;
 }
+int gr_gk20a_add_ctxsw_reg_pm_fbpa(struct gk20a *g,
+                                struct ctxsw_buf_offset_map_entry *map,
+                                struct aiv_list_gk20a *regs,
+                                u32 *count, u32 *offset,
+                                u32 max_cnt, u32 base,
+                                u32 num_fbpas, u32 stride, u32 mask)
+{
+        return add_ctxsw_buffer_map_entries_subunits(map, regs, count, offset,
+                        max_cnt, base, num_fbpas, stride, mask);
+}
 static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
                                        struct ctxsw_buf_offset_map_entry *map,
                                        u32 *count, u32 *offset, u32 max_cnt)
@@ -7683,7 +7694,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
                goto cleanup;
        /* Add entries from _LIST_nv_pm_fbpa_ctx_regs */
-        if (add_ctxsw_buffer_map_entries_subunits(map,
+        if (g->ops.gr.add_ctxsw_reg_pm_fbpa(g, map,
                                        &g->gr.ctx_vars.ctxsw_regs.pm_fbpa,
                                        &count, &offset,
                                        hwpm_ctxsw_reg_count_max, 0,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 7f89037e..5ac363e1 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -811,4 +811,10 @@ u32 gk20a_init_sw_bundle(struct gk20a *g);
 int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type);
 int gk20a_gr_handle_semaphore_pending(struct gk20a *g,
                                struct gr_gk20a_isr_data *isr_data);
+int gr_gk20a_add_ctxsw_reg_pm_fbpa(struct gk20a *g,
+                                struct ctxsw_buf_offset_map_entry *map,
+                                struct aiv_list_gk20a *regs,
+                                u32 *count, u32 *offset,
+                                u32 max_cnt, u32 base,
+                                u32 num_fbpas, u32 stride, u32 mask);
 #endif /*__GR_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index dd0a50bc..9de5a4ca 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -314,6 +314,7 @@ static const struct gpu_ops gm20b_ops = {
                .set_error_notifier = gk20a_gr_set_error_notifier,
                .handle_notify_pending = gk20a_gr_handle_notify_pending,
                .handle_semaphore_pending = gk20a_gr_handle_semaphore_pending,
+                .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa,
        },
        .fb = {
                .reset = fb_gk20a_reset,
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index af6cf9ed..bab32a92 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -376,6 +376,7 @@ static const struct gpu_ops gp106_ops = {
                .set_error_notifier = gk20a_gr_set_error_notifier,
                .handle_notify_pending = gk20a_gr_handle_notify_pending,
                .handle_semaphore_pending = gk20a_gr_handle_semaphore_pending,
+                .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa,
        },
        .fb = {
                .reset = gp106_fb_reset,
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index cce1ed4f..843b6ee7 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -344,6 +344,7 @@ static const struct gpu_ops gp10b_ops = {
                .set_error_notifier = gk20a_gr_set_error_notifier,
                .handle_notify_pending = gk20a_gr_handle_notify_pending,
                .handle_semaphore_pending = gk20a_gr_handle_semaphore_pending,
+                .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa,
        },
        .fb = {
                .reset = fb_gk20a_reset,
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c
index bf143dbb..3aed7a19 100644
--- a/drivers/gpu/nvgpu/gv100/gr_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c
@@ -34,6 +34,8 @@
 #include <nvgpu/hw/gv100/hw_gr_gv100.h>
 #include <nvgpu/hw/gv100/hw_proj_gv100.h>
+#include <nvgpu/hw/gv100/hw_fuse_gv100.h>
 /*
 *  Estimate performance if the given logical TPC in the given logical GPC were
@@ -329,3 +331,45 @@ u32 gr_gv100_get_patch_slots(struct gk20a *g)
        return size;
 }
+int gr_gv100_add_ctxsw_reg_pm_fbpa(struct gk20a *g,
+                                struct ctxsw_buf_offset_map_entry *map,
+                                struct aiv_list_gk20a *regs,
+                                u32 *count, u32 *offset,
+                                u32 max_cnt, u32 base,
+                                u32 num_fbpas, u32 stride, u32 mask)
+{
+        u32 fbpa_id;
+        u32 idx;
+        u32 cnt = *count;
+        u32 off = *offset;
+        u32 active_fbpa_mask;
+        if ((cnt + (regs->count * num_fbpas)) > max_cnt)
+                return -EINVAL;
+        /*
+         * Read active fbpa mask from fuse
+         * Note that 0:enable and 1:disable in value read from fuse so we've to
+         * flip the bits.
+         * Also set unused bits to zero
+         */
+        active_fbpa_mask = nvgpu_readl(g, fuse_status_opt_fbio_r());
+        active_fbpa_mask = ~active_fbpa_mask;
+        active_fbpa_mask = active_fbpa_mask & ((1 << num_fbpas) - 1);
+        for (idx = 0; idx < regs->count; idx++) {
+                for (fbpa_id = 0; fbpa_id < num_fbpas; fbpa_id++) {
+                        if (active_fbpa_mask & BIT(fbpa_id)) {
+                                map[cnt].addr = base +
+                                                (regs->l[idx].addr & mask) +
+                                                (fbpa_id * stride);
+                                map[cnt++].offset = off;
+                                off += 4;
+                        }
+                }
+        }
+        *count = cnt;
+        *offset = off;
+        return 0;
+}
diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h
index 690bba57..e1174686 100644
--- a/drivers/gpu/nvgpu/gv100/gr_gv100.h
+++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h
@@ -33,4 +33,10 @@ void gr_gv100_program_sm_id_numbering(struct gk20a *g,
                                        u32 gpc, u32 tpc, u32 smid);
 int gr_gv100_load_smid_config(struct gk20a *g);
 u32 gr_gv100_get_patch_slots(struct gk20a *g);
+int gr_gv100_add_ctxsw_reg_pm_fbpa(struct gk20a *g,
+                                struct ctxsw_buf_offset_map_entry *map,
+                                struct aiv_list_gk20a *regs,
+                                u32 *count, u32 *offset,
+                                u32 max_cnt, u32 base,
+                                u32 num_fbpas, u32 stride, u32 mask);
 #endif
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index 3dbd1e01..08fc7c34 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -429,6 +429,7 @@ static const struct gpu_ops gv100_ops = {
                .set_error_notifier = gk20a_gr_set_error_notifier,
                .handle_notify_pending = gk20a_gr_handle_notify_pending,
                .handle_semaphore_pending = gk20a_gr_handle_semaphore_pending,
+                .add_ctxsw_reg_pm_fbpa = gr_gv100_add_ctxsw_reg_pm_fbpa,
        },
        .fb = {
                .reset = gv100_fb_reset,
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index 1fe09827..1edf67d6 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -396,6 +396,7 @@ static const struct gpu_ops gv11b_ops = {
                .set_error_notifier = gk20a_gr_set_error_notifier,
                .handle_notify_pending = gk20a_gr_handle_notify_pending,
                .handle_semaphore_pending = gk20a_gr_handle_semaphore_pending,
+                .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa,
        },
        .fb = {
                .reset = gv11b_fb_reset,
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
index 18eee404..5750fd78 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -220,6 +220,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
                        gr_gp10b_init_gfxp_wfi_timeout_count,
                .get_max_gfxp_wfi_timeout_count =
                        gr_gp10b_get_max_gfxp_wfi_timeout_count,
+                .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa,
        },
        .fb = {
                .reset = fb_gk20a_reset,
diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c
index 465b57a5..9588f4bd 100644
--- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -256,6 +256,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
                        gr_gv11b_init_gfxp_wfi_timeout_count,
                .get_max_gfxp_wfi_timeout_count =
                        gr_gv11b_get_max_gfxp_wfi_timeout_count,
+                .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa,
        },
        .fb = {
                .reset = gv11b_fb_reset,
author	Deepak Nibade <dnibade@nvidia.com>	2018-03-16 08:24:55 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-03-21 09:04:35 -0400
commit	66751bc05d7a1efca3668d59a2820e3e92985f91 (patch)
tree	ab85f317d77c8c76d6a7430039d19d406b9eb8f5 /drivers/gpu
parent	c5ca711f1efbd30fa760df139f3b63aa471d28a9 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index c1824b07..4ab1cd1b 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -450,6 +450,12 @@ struct gpu_ops {
450	struct gr_gk20a_isr_data *isr_data);	450	struct gr_gk20a_isr_data *isr_data);
451	int (handle_semaphore_pending)(struct gk20a g,	451	int (handle_semaphore_pending)(struct gk20a g,
452	struct gr_gk20a_isr_data *isr_data);	452	struct gr_gk20a_isr_data *isr_data);
		453	int (add_ctxsw_reg_pm_fbpa)(struct gk20a g,
		454	struct ctxsw_buf_offset_map_entry *map,
		455	struct aiv_list_gk20a *regs,
		456	u32 count, u32 offset,
		457	u32 max_cnt, u32 base,
		458	u32 num_fbpas, u32 stride, u32 mask);
453	} gr;	459	} gr;
454	struct {	460	struct {
455	void (init_hw)(struct gk20a g);	461	void (init_hw)(struct gk20a g);


diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 94bfd224..bec33293 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -7495,6 +7495,17 @@ static int add_ctxsw_buffer_map_entries_subunits(
7495	return 0;	7495	return 0;
7496	}	7496	}
7497		7497
		7498	int gr_gk20a_add_ctxsw_reg_pm_fbpa(struct gk20a *g,
		7499	struct ctxsw_buf_offset_map_entry *map,
		7500	struct aiv_list_gk20a *regs,
		7501	u32 count, u32 offset,
		7502	u32 max_cnt, u32 base,
		7503	u32 num_fbpas, u32 stride, u32 mask)
		7504	{
		7505	return add_ctxsw_buffer_map_entries_subunits(map, regs, count, offset,
		7506	max_cnt, base, num_fbpas, stride, mask);
		7507	}
		7508
7498	static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,	7509	static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
7499	struct ctxsw_buf_offset_map_entry *map,	7510	struct ctxsw_buf_offset_map_entry *map,
7500	u32 count, u32 offset, u32 max_cnt)	7511	u32 count, u32 offset, u32 max_cnt)
@@ -7683,7 +7694,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
7683	goto cleanup;	7694	goto cleanup;
7684		7695
7685	/* Add entries from _LIST_nv_pm_fbpa_ctx_regs */	7696	/* Add entries from _LIST_nv_pm_fbpa_ctx_regs */
7686	if (add_ctxsw_buffer_map_entries_subunits(map,	7697	if (g->ops.gr.add_ctxsw_reg_pm_fbpa(g, map,
7687	&g->gr.ctx_vars.ctxsw_regs.pm_fbpa,	7698	&g->gr.ctx_vars.ctxsw_regs.pm_fbpa,
7688	&count, &offset,	7699	&count, &offset,
7689	hwpm_ctxsw_reg_count_max, 0,	7700	hwpm_ctxsw_reg_count_max, 0,


diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 7f89037e..5ac363e1 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -811,4 +811,10 @@ u32 gk20a_init_sw_bundle(struct gk20a *g);
811	int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type);	811	int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type);
812	int gk20a_gr_handle_semaphore_pending(struct gk20a *g,	812	int gk20a_gr_handle_semaphore_pending(struct gk20a *g,
813	struct gr_gk20a_isr_data *isr_data);	813	struct gr_gk20a_isr_data *isr_data);
		814	int gr_gk20a_add_ctxsw_reg_pm_fbpa(struct gk20a *g,
		815	struct ctxsw_buf_offset_map_entry *map,
		816	struct aiv_list_gk20a *regs,
		817	u32 count, u32 offset,
		818	u32 max_cnt, u32 base,
		819	u32 num_fbpas, u32 stride, u32 mask);
814	#endif /__GR_GK20A_H__/	820	#endif /__GR_GK20A_H__/


diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index dd0a50bc..9de5a4ca 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -314,6 +314,7 @@ static const struct gpu_ops gm20b_ops = {
314	.set_error_notifier = gk20a_gr_set_error_notifier,	314	.set_error_notifier = gk20a_gr_set_error_notifier,
315	.handle_notify_pending = gk20a_gr_handle_notify_pending,	315	.handle_notify_pending = gk20a_gr_handle_notify_pending,
316	.handle_semaphore_pending = gk20a_gr_handle_semaphore_pending,	316	.handle_semaphore_pending = gk20a_gr_handle_semaphore_pending,
		317	.add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa,
317	},	318	},
318	.fb = {	319	.fb = {
319	.reset = fb_gk20a_reset,	320	.reset = fb_gk20a_reset,


diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index af6cf9ed..bab32a92 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -376,6 +376,7 @@ static const struct gpu_ops gp106_ops = {
376	.set_error_notifier = gk20a_gr_set_error_notifier,	376	.set_error_notifier = gk20a_gr_set_error_notifier,
377	.handle_notify_pending = gk20a_gr_handle_notify_pending,	377	.handle_notify_pending = gk20a_gr_handle_notify_pending,
378	.handle_semaphore_pending = gk20a_gr_handle_semaphore_pending,	378	.handle_semaphore_pending = gk20a_gr_handle_semaphore_pending,
		379	.add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa,
379	},	380	},
380	.fb = {	381	.fb = {
381	.reset = gp106_fb_reset,	382	.reset = gp106_fb_reset,


diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index cce1ed4f..843b6ee7 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -344,6 +344,7 @@ static const struct gpu_ops gp10b_ops = {
344	.set_error_notifier = gk20a_gr_set_error_notifier,	344	.set_error_notifier = gk20a_gr_set_error_notifier,
345	.handle_notify_pending = gk20a_gr_handle_notify_pending,	345	.handle_notify_pending = gk20a_gr_handle_notify_pending,
346	.handle_semaphore_pending = gk20a_gr_handle_semaphore_pending,	346	.handle_semaphore_pending = gk20a_gr_handle_semaphore_pending,
		347	.add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa,
347	},	348	},
348	.fb = {	349	.fb = {
349	.reset = fb_gk20a_reset,	350	.reset = fb_gk20a_reset,


diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c index bf143dbb..3aed7a19 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.c +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c
@@ -34,6 +34,8 @@
34		34
35	#include <nvgpu/hw/gv100/hw_gr_gv100.h>	35	#include <nvgpu/hw/gv100/hw_gr_gv100.h>
36	#include <nvgpu/hw/gv100/hw_proj_gv100.h>	36	#include <nvgpu/hw/gv100/hw_proj_gv100.h>
		37	#include <nvgpu/hw/gv100/hw_fuse_gv100.h>
		38
37		39
38	/*	40	/*
39	* Estimate performance if the given logical TPC in the given logical GPC were	41	* Estimate performance if the given logical TPC in the given logical GPC were
@@ -329,3 +331,45 @@ u32 gr_gv100_get_patch_slots(struct gk20a *g)
329		331
330	return size;	332	return size;
331	}	333	}
		334
		335	int gr_gv100_add_ctxsw_reg_pm_fbpa(struct gk20a *g,
		336	struct ctxsw_buf_offset_map_entry *map,
		337	struct aiv_list_gk20a *regs,
		338	u32 count, u32 offset,
		339	u32 max_cnt, u32 base,
		340	u32 num_fbpas, u32 stride, u32 mask)
		341	{
		342	u32 fbpa_id;
		343	u32 idx;
		344	u32 cnt = *count;
		345	u32 off = *offset;
		346	u32 active_fbpa_mask;
		347
		348	if ((cnt + (regs->count * num_fbpas)) > max_cnt)
		349	return -EINVAL;
		350
		351	/*
		352	* Read active fbpa mask from fuse
		353	* Note that 0:enable and 1:disable in value read from fuse so we've to
		354	* flip the bits.
		355	* Also set unused bits to zero
		356	*/
		357	active_fbpa_mask = nvgpu_readl(g, fuse_status_opt_fbio_r());
		358	active_fbpa_mask = ~active_fbpa_mask;
		359	active_fbpa_mask = active_fbpa_mask & ((1 << num_fbpas) - 1);
		360
		361	for (idx = 0; idx < regs->count; idx++) {
		362	for (fbpa_id = 0; fbpa_id < num_fbpas; fbpa_id++) {
		363	if (active_fbpa_mask & BIT(fbpa_id)) {
		364	map[cnt].addr = base +
		365	(regs->l[idx].addr & mask) +
		366	(fbpa_id * stride);
		367	map[cnt++].offset = off;
		368	off += 4;
		369	}
		370	}
		371	}
		372	*count = cnt;
		373	*offset = off;
		374	return 0;
		375	}


diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h index 690bba57..e1174686 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.h +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h
@@ -33,4 +33,10 @@ void gr_gv100_program_sm_id_numbering(struct gk20a *g,
33	u32 gpc, u32 tpc, u32 smid);	33	u32 gpc, u32 tpc, u32 smid);
34	int gr_gv100_load_smid_config(struct gk20a *g);	34	int gr_gv100_load_smid_config(struct gk20a *g);
35	u32 gr_gv100_get_patch_slots(struct gk20a *g);	35	u32 gr_gv100_get_patch_slots(struct gk20a *g);
		36	int gr_gv100_add_ctxsw_reg_pm_fbpa(struct gk20a *g,
		37	struct ctxsw_buf_offset_map_entry *map,
		38	struct aiv_list_gk20a *regs,
		39	u32 count, u32 offset,
		40	u32 max_cnt, u32 base,
		41	u32 num_fbpas, u32 stride, u32 mask);
36	#endif	42	#endif


diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 3dbd1e01..08fc7c34 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -429,6 +429,7 @@ static const struct gpu_ops gv100_ops = {
429	.set_error_notifier = gk20a_gr_set_error_notifier,	429	.set_error_notifier = gk20a_gr_set_error_notifier,
430	.handle_notify_pending = gk20a_gr_handle_notify_pending,	430	.handle_notify_pending = gk20a_gr_handle_notify_pending,
431	.handle_semaphore_pending = gk20a_gr_handle_semaphore_pending,	431	.handle_semaphore_pending = gk20a_gr_handle_semaphore_pending,
		432	.add_ctxsw_reg_pm_fbpa = gr_gv100_add_ctxsw_reg_pm_fbpa,
432	},	433	},
433	.fb = {	434	.fb = {
434	.reset = gv100_fb_reset,	435	.reset = gv100_fb_reset,


diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 1fe09827..1edf67d6 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -396,6 +396,7 @@ static const struct gpu_ops gv11b_ops = {
396	.set_error_notifier = gk20a_gr_set_error_notifier,	396	.set_error_notifier = gk20a_gr_set_error_notifier,
397	.handle_notify_pending = gk20a_gr_handle_notify_pending,	397	.handle_notify_pending = gk20a_gr_handle_notify_pending,
398	.handle_semaphore_pending = gk20a_gr_handle_semaphore_pending,	398	.handle_semaphore_pending = gk20a_gr_handle_semaphore_pending,
		399	.add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa,
399	},	400	},
400	.fb = {	401	.fb = {
401	.reset = gv11b_fb_reset,	402	.reset = gv11b_fb_reset,


diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c index 18eee404..5750fd78 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -220,6 +220,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
220	gr_gp10b_init_gfxp_wfi_timeout_count,	220	gr_gp10b_init_gfxp_wfi_timeout_count,
221	.get_max_gfxp_wfi_timeout_count =	221	.get_max_gfxp_wfi_timeout_count =
222	gr_gp10b_get_max_gfxp_wfi_timeout_count,	222	gr_gp10b_get_max_gfxp_wfi_timeout_count,
		223	.add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa,
223	},	224	},
224	.fb = {	225	.fb = {
225	.reset = fb_gk20a_reset,	226	.reset = fb_gk20a_reset,


diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c index 465b57a5..9588f4bd 100644 --- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -256,6 +256,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
256	gr_gv11b_init_gfxp_wfi_timeout_count,	256	gr_gv11b_init_gfxp_wfi_timeout_count,
257	.get_max_gfxp_wfi_timeout_count =	257	.get_max_gfxp_wfi_timeout_count =
258	gr_gv11b_get_max_gfxp_wfi_timeout_count,	258	gr_gv11b_get_max_gfxp_wfi_timeout_count,
		259	.add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa,
259	},	260	},
260	.fb = {	261	.fb = {
261	.reset = gv11b_fb_reset,	262	.reset = gv11b_fb_reset,