From 77b806fe7e68e853676f7c4bad14349aba1affa5 Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Fri, 16 Mar 2018 17:55:18 +0530 Subject: gpu: nvgpu: gv100: fix PMA list alignment in ctxsw buffer GV100 ucode is changed so that it expects LIST_nv_perf_pma_ctx_reg list in ctxsw buffer to be 256 byte aligned but same change is not applied to other chip ucodes ADD new HAL (*add_ctxsw_reg_perf_pma) to configure PMA register list and define a common HAL gr_gk20a_add_ctxsw_reg_perf_pma() for all other chips except GV100 Define a separate HAL for GV100 gr_gv100_add_ctxsw_reg_perf_pma() and fix the required alignment in this function Bug 1998067 Change-Id: Ie172fe90e2cdbac2509f2ece953cd8552e66fc56 Signed-off-by: Deepak Nibade Reviewed-on: https://git-master.nvidia.com/r/1676655 Reviewed-by: svc-mobile-coverity GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/gk20a.h | 4 ++++ drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 11 ++++++++++- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 4 ++++ drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 1 + drivers/gpu/nvgpu/gp106/hal_gp106.c | 1 + drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 1 + drivers/gpu/nvgpu/gv100/gr_gv100.c | 10 ++++++++++ drivers/gpu/nvgpu/gv100/gr_gv100.h | 4 ++++ drivers/gpu/nvgpu/gv100/hal_gv100.c | 1 + drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 1 + drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c | 1 + drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c | 1 + 12 files changed, 39 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 4ab1cd1b..f64a2b96 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -456,6 +456,10 @@ struct gpu_ops { u32 *count, u32 *offset, u32 max_cnt, u32 base, u32 num_fbpas, u32 stride, u32 mask); + int (*add_ctxsw_reg_perf_pma)(struct ctxsw_buf_offset_map_entry *map, + struct aiv_list_gk20a *regs, + u32 *count, u32 *offset, + u32 max_cnt, u32 base, u32 mask); } gr; struct { void (*init_hw)(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index bec33293..9d85a5ef 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -7568,6 +7568,15 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g, return 0; } +int gr_gk20a_add_ctxsw_reg_perf_pma(struct ctxsw_buf_offset_map_entry *map, + struct aiv_list_gk20a *regs, + u32 *count, u32 *offset, + u32 max_cnt, u32 base, u32 mask) +{ + return add_ctxsw_buffer_map_entries(map, regs, + count, offset, max_cnt, base, mask); +} + /* * PM CTXSW BUFFER LAYOUT : *|---------------------------------------------|0x00 <----PM CTXSW BUFFER BASE @@ -7671,7 +7680,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g) goto cleanup; /* Add entries from _LIST_nv_perf_pma_ctx_reg*/ - if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_pma, + if (g->ops.gr.add_ctxsw_reg_perf_pma(map, &g->gr.ctx_vars.ctxsw_regs.perf_pma, &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) goto cleanup; diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 5ac363e1..54833028 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -817,4 +817,8 @@ int gr_gk20a_add_ctxsw_reg_pm_fbpa(struct gk20a *g, u32 *count, u32 *offset, u32 max_cnt, u32 base, u32 num_fbpas, u32 stride, u32 mask); +int gr_gk20a_add_ctxsw_reg_perf_pma(struct ctxsw_buf_offset_map_entry *map, + struct aiv_list_gk20a *regs, + u32 *count, u32 *offset, + u32 max_cnt, u32 base, u32 mask); #endif /*__GR_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 9de5a4ca..3c1970eb 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -315,6 +315,7 @@ static const struct gpu_ops gm20b_ops = { .handle_notify_pending = gk20a_gr_handle_notify_pending, .handle_semaphore_pending = gk20a_gr_handle_semaphore_pending, .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, + .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, }, .fb = { .reset = fb_gk20a_reset, diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index bab32a92..84e72e98 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c @@ -377,6 +377,7 @@ static const struct gpu_ops gp106_ops = { .handle_notify_pending = gk20a_gr_handle_notify_pending, .handle_semaphore_pending = gk20a_gr_handle_semaphore_pending, .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, + .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, }, .fb = { .reset = gp106_fb_reset, diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 843b6ee7..825d11e5 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -345,6 +345,7 @@ static const struct gpu_ops gp10b_ops = { .handle_notify_pending = gk20a_gr_handle_notify_pending, .handle_semaphore_pending = gk20a_gr_handle_semaphore_pending, .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, + .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, }, .fb = { .reset = fb_gk20a_reset, diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c index 3aed7a19..c6273733 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.c +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c @@ -373,3 +373,13 @@ int gr_gv100_add_ctxsw_reg_pm_fbpa(struct gk20a *g, *offset = off; return 0; } + +int gr_gv100_add_ctxsw_reg_perf_pma(struct ctxsw_buf_offset_map_entry *map, + struct aiv_list_gk20a *regs, + u32 *count, u32 *offset, + u32 max_cnt, u32 base, u32 mask) +{ + *offset = ALIGN(*offset, 256); + return gr_gk20a_add_ctxsw_reg_perf_pma(map, regs, + count, offset, max_cnt, base, mask); +} diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h index e1174686..7b107db2 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.h +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h @@ -39,4 +39,8 @@ int gr_gv100_add_ctxsw_reg_pm_fbpa(struct gk20a *g, u32 *count, u32 *offset, u32 max_cnt, u32 base, u32 num_fbpas, u32 stride, u32 mask); +int gr_gv100_add_ctxsw_reg_perf_pma(struct ctxsw_buf_offset_map_entry *map, + struct aiv_list_gk20a *regs, + u32 *count, u32 *offset, + u32 max_cnt, u32 base, u32 mask); #endif diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 08fc7c34..cfac8e0e 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -430,6 +430,7 @@ static const struct gpu_ops gv100_ops = { .handle_notify_pending = gk20a_gr_handle_notify_pending, .handle_semaphore_pending = gk20a_gr_handle_semaphore_pending, .add_ctxsw_reg_pm_fbpa = gr_gv100_add_ctxsw_reg_pm_fbpa, + .add_ctxsw_reg_perf_pma = gr_gv100_add_ctxsw_reg_perf_pma, }, .fb = { .reset = gv100_fb_reset, diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 1edf67d6..94aa2dc4 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -397,6 +397,7 @@ static const struct gpu_ops gv11b_ops = { .handle_notify_pending = gk20a_gr_handle_notify_pending, .handle_semaphore_pending = gk20a_gr_handle_semaphore_pending, .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, + .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, }, .fb = { .reset = gv11b_fb_reset, diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c index 5750fd78..7156a29d 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c @@ -221,6 +221,7 @@ static const struct gpu_ops vgpu_gp10b_ops = { .get_max_gfxp_wfi_timeout_count = gr_gp10b_get_max_gfxp_wfi_timeout_count, .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, + .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, }, .fb = { .reset = fb_gk20a_reset, diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c index 9588f4bd..2c81607d 100644 --- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c @@ -257,6 +257,7 @@ static const struct gpu_ops vgpu_gv11b_ops = { .get_max_gfxp_wfi_timeout_count = gr_gv11b_get_max_gfxp_wfi_timeout_count, .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, + .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, }, .fb = { .reset = gv11b_fb_reset, -- cgit v1.2.2