From 84db72a21ce4076a66acbb936b5a6dabf39c7ee2 Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Tue, 26 Jun 2018 03:44:55 -0700 Subject: gpu: nvgpu: add HAL to get offset in gpccs segment In gr_gk20a_find_priv_offset_in_buffer() we right now calculate offset of a register in gpccs segment based on register address type Separate out sequence to find offset in gpccs segment and move it to new API gr_gk20a_get_offset_in_gpccs_segment() Introduce new HAL gops.gr.get_offset_in_gpccs_segment() and set above API to this HAL Call HAL from gr_gk20a_find_priv_offset_in_buffer() instead of calling direct API Jira NVGPUT-118 Change-Id: I0df798456cf63e3c3a43131f3c4ca7990b89ede0 Signed-off-by: Deepak Nibade Reviewed-on: https://git-master.nvidia.com/r/1761669 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/gk20a.h | 3 + drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 115 +++++++++++++++++--------- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 3 + drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 2 + drivers/gpu/nvgpu/gp106/hal_gp106.c | 2 + drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 2 + drivers/gpu/nvgpu/gv100/hal_gv100.c | 2 + drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 2 + drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c | 2 + drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c | 2 + 10 files changed, 95 insertions(+), 40 deletions(-) (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 263d0632..ee1f3304 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -492,6 +492,9 @@ struct gpu_ops { int (*commit_global_ctx_buffers)(struct gk20a *g, struct channel_gk20a *c, bool patch); u32 (*get_nonpes_aware_tpc)(struct gk20a *g, u32 gpc, u32 tpc); + int (*get_offset_in_gpccs_segment)(struct gk20a *g, + int addr_type, u32 num_tpcs, u32 num_ppcs, + u32 reg_list_ppc_count, u32 *__offset_in_segment); } gr; struct { void (*init_hw)(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index d787a693..074a74c0 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -7136,6 +7136,69 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, return 0; } +int gr_gk20a_get_offset_in_gpccs_segment(struct gk20a *g, + int addr_type, + u32 num_tpcs, + u32 num_ppcs, + u32 reg_list_ppc_count, + u32 *__offset_in_segment) +{ + u32 offset_in_segment = 0; + struct gr_gk20a *gr = &g->gr; + + if (addr_type == CTXSW_ADDR_TYPE_TPC) { + /* + * reg = gr->ctx_vars.ctxsw_regs.tpc.l; + * offset_in_segment = 0; + */ + } else if ((addr_type == CTXSW_ADDR_TYPE_EGPC) || + (addr_type == CTXSW_ADDR_TYPE_ETPC)) { + offset_in_segment = + ((gr->ctx_vars.ctxsw_regs.tpc.count * + num_tpcs) << 2); + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gpu_dbg, + "egpc etpc offset_in_segment 0x%#08x", + offset_in_segment); + } else if (addr_type == CTXSW_ADDR_TYPE_PPC) { + /* + * The ucode stores TPC data before PPC data. + * Advance offset past TPC data to PPC data. + */ + offset_in_segment = + (((gr->ctx_vars.ctxsw_regs.tpc.count + + gr->ctx_vars.ctxsw_regs.etpc.count) * + num_tpcs) << 2); + } else if (addr_type == CTXSW_ADDR_TYPE_GPC) { + /* + * The ucode stores TPC/PPC data before GPC data. + * Advance offset past TPC/PPC data to GPC data. + * + * Note 1 PES_PER_GPC case + */ + u32 num_pes_per_gpc = nvgpu_get_litter_value(g, + GPU_LIT_NUM_PES_PER_GPC); + if (num_pes_per_gpc > 1) { + offset_in_segment = + ((((gr->ctx_vars.ctxsw_regs.tpc.count + + gr->ctx_vars.ctxsw_regs.etpc.count) * + num_tpcs) << 2) + + ((reg_list_ppc_count * num_ppcs) << 2)); + } else { + offset_in_segment = + (((gr->ctx_vars.ctxsw_regs.tpc.count + + gr->ctx_vars.ctxsw_regs.etpc.count) * + num_tpcs) << 2); + } + } else { + nvgpu_log_fn(g, "Unknown address type."); + return -EINVAL; + } + + *__offset_in_segment = offset_in_segment; + return 0; +} + /* * This function will return the 32 bit offset for a priv register if it is * present in the context buffer. The context buffer is in CPU memory. @@ -7147,7 +7210,6 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, u32 context_buffer_size, u32 *priv_offset) { - struct gr_gk20a *gr = &g->gr; u32 i, data32; int err; int addr_type; /*enum ctxsw_addr_type */ @@ -7158,7 +7220,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, u32 sys_priv_offset, gpc_priv_offset; u32 ppc_mask, reg_list_ppc_count; u8 *context; - u32 offset_to_segment; + u32 offset_to_segment, offset_in_segment = 0; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); @@ -7266,45 +7328,18 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, offset_to_segment = gpc_priv_offset * ctxsw_prog_ucode_header_size_in_bytes(); - if (addr_type == CTXSW_ADDR_TYPE_TPC) { - /*reg = gr->ctx_vars.ctxsw_regs.tpc.l;*/ - } else if ((addr_type == CTXSW_ADDR_TYPE_EGPC) || - (addr_type == CTXSW_ADDR_TYPE_ETPC)) { - nvgpu_log(g, gpu_dbg_info | gpu_dbg_gpu_dbg, - "egpc etpc offset_to_segment 0x%#08x", - offset_to_segment); - offset_to_segment += - ((gr->ctx_vars.ctxsw_regs.tpc.count * - num_tpcs) << 2); - } else if (addr_type == CTXSW_ADDR_TYPE_PPC) { - /* The ucode stores TPC data before PPC data. - * Advance offset past TPC data to PPC data. */ - offset_to_segment += - (((gr->ctx_vars.ctxsw_regs.tpc.count + - gr->ctx_vars.ctxsw_regs.etpc.count) * - num_tpcs) << 2); - } else if (addr_type == CTXSW_ADDR_TYPE_GPC) { - /* The ucode stores TPC/PPC data before GPC data. - * Advance offset past TPC/PPC data to GPC data. */ - /* note 1 PES_PER_GPC case */ - u32 num_pes_per_gpc = nvgpu_get_litter_value(g, - GPU_LIT_NUM_PES_PER_GPC); - if (num_pes_per_gpc > 1) { - offset_to_segment += - ((((gr->ctx_vars.ctxsw_regs.tpc.count + - gr->ctx_vars.ctxsw_regs.etpc.count) * - num_tpcs) << 2) + - ((reg_list_ppc_count * num_ppcs) << 2)); - } else { - offset_to_segment += - (((gr->ctx_vars.ctxsw_regs.tpc.count + - gr->ctx_vars.ctxsw_regs.etpc.count) * - num_tpcs) << 2); - } - } else { - nvgpu_log_fn(g, "Unknown address type."); + err = g->ops.gr.get_offset_in_gpccs_segment(g, + addr_type, + num_tpcs, num_ppcs, reg_list_ppc_count, + &offset_in_segment); + if (err) return -EINVAL; - } + + offset_to_segment += offset_in_segment; + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, + "offset_to_segment 0x%#08x", + offset_to_segment); + err = gr_gk20a_process_context_buffer_priv_segment(g, addr_type, addr, i, num_tpcs, diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 6cf3d69b..a77136a6 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -849,6 +849,9 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g, void gr_gk20a_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr, u32 num_fbpas, u32 *priv_addr_table, u32 *t); +int gr_gk20a_get_offset_in_gpccs_segment(struct gk20a *g, + int addr_type, u32 num_tpcs, u32 num_ppcs, + u32 reg_list_ppc_count, u32 *__offset_in_segment); void gk20a_gr_destroy_ctx_buffer(struct gk20a *g, struct gr_ctx_buffer_desc *desc); diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 6bc13a7f..798b5f06 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -330,6 +330,8 @@ static const struct gpu_ops gm20b_ops = { .alloc_global_ctx_buffers = gr_gk20a_alloc_global_ctx_buffers, .map_global_ctx_buffers = gr_gk20a_map_global_ctx_buffers, .commit_global_ctx_buffers = gr_gk20a_commit_global_ctx_buffers, + .get_offset_in_gpccs_segment = + gr_gk20a_get_offset_in_gpccs_segment, }, .fb = { .reset = fb_gk20a_reset, diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index 8af70cf5..a22350ce 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c @@ -401,6 +401,8 @@ static const struct gpu_ops gp106_ops = { .alloc_global_ctx_buffers = gr_gk20a_alloc_global_ctx_buffers, .map_global_ctx_buffers = gr_gk20a_map_global_ctx_buffers, .commit_global_ctx_buffers = gr_gk20a_commit_global_ctx_buffers, + .get_offset_in_gpccs_segment = + gr_gk20a_get_offset_in_gpccs_segment, }, .fb = { .reset = gp106_fb_reset, diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 2430be79..a1c32a5f 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -364,6 +364,8 @@ static const struct gpu_ops gp10b_ops = { .alloc_global_ctx_buffers = gr_gk20a_alloc_global_ctx_buffers, .map_global_ctx_buffers = gr_gk20a_map_global_ctx_buffers, .commit_global_ctx_buffers = gr_gk20a_commit_global_ctx_buffers, + .get_offset_in_gpccs_segment = + gr_gk20a_get_offset_in_gpccs_segment, }, .fb = { .reset = fb_gk20a_reset, diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 102e6a04..9d059b72 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -451,6 +451,8 @@ static const struct gpu_ops gv100_ops = { .map_global_ctx_buffers = gr_gk20a_map_global_ctx_buffers, .commit_global_ctx_buffers = gr_gk20a_commit_global_ctx_buffers, .get_nonpes_aware_tpc = gr_gv11b_get_nonpes_aware_tpc, + .get_offset_in_gpccs_segment = + gr_gk20a_get_offset_in_gpccs_segment, }, .fb = { .reset = gv100_fb_reset, diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 87775a3f..2b58e1c4 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -418,6 +418,8 @@ static const struct gpu_ops gv11b_ops = { .map_global_ctx_buffers = gr_gk20a_map_global_ctx_buffers, .commit_global_ctx_buffers = gr_gk20a_commit_global_ctx_buffers, .get_nonpes_aware_tpc = gr_gv11b_get_nonpes_aware_tpc, + .get_offset_in_gpccs_segment = + gr_gk20a_get_offset_in_gpccs_segment, }, .fb = { .reset = gv11b_fb_reset, diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c index 5c210519..349548cd 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c @@ -235,6 +235,8 @@ static const struct gpu_ops vgpu_gp10b_ops = { .alloc_global_ctx_buffers = gr_gk20a_alloc_global_ctx_buffers, .map_global_ctx_buffers = gr_gk20a_map_global_ctx_buffers, .commit_global_ctx_buffers = gr_gk20a_commit_global_ctx_buffers, + .get_offset_in_gpccs_segment = + gr_gk20a_get_offset_in_gpccs_segment, }, .fb = { .reset = fb_gk20a_reset, diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c index 2b65c992..d4a113f8 100644 --- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c @@ -272,6 +272,8 @@ static const struct gpu_ops vgpu_gv11b_ops = { .alloc_global_ctx_buffers = gr_gk20a_alloc_global_ctx_buffers, .map_global_ctx_buffers = gr_gk20a_map_global_ctx_buffers, .commit_global_ctx_buffers = gr_gk20a_commit_global_ctx_buffers, + .get_offset_in_gpccs_segment = + gr_gk20a_get_offset_in_gpccs_segment, }, .fb = { .reset = gv11b_fb_reset, -- cgit v1.2.2