From f3d89a2997800a185c2b645593fffe342dc332df Mon Sep 17 00:00:00 2001 From: neha Date: Mon, 11 Apr 2016 17:42:39 +0530 Subject: gpu: nvgpu: Full chip support for ctxsw nvgpu changes needed to handle the newly added ctxsw lists Fix regops support for ppc registers Squashed from: Change-Id: I08e6dec3bb2f7aa51de912c9d1c84a350ce07f72 Signed-off-by: neha Reviewed-on: http://git-master/r/1151010 (cherry picked from commit fd03ad9f09e66f78db88fb7ece448e26e0515821) and: Change-Id: I75a7f810ee0b613c22ac2cef2d936563d8067f97 Signed-off-by: Peter Daifuku Reviewed-on: http://git-master/r/1158888 (cherry picked from commit f00a7fcc57fb937b800e46760087ff6f7637520c) Bug 200180000 Bug 1771830 Reviewed-on: http://git-master/r/1164397 (cherry picked from commit 7028f051e4f37edeff90a9923f022cec6c645a8f) Signed-off-by: Peter Daifuku Change-Id: I796ddf93ef37170843a4a6b44190cd6780d25852 Reviewed-on: http://git-master/r/1183588 Reviewed-by: Vladislav Buzov GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/gk20a/gk20a.h | 1 + drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c | 33 ++++++++++ drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h | 5 ++ drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 113 +++++++++++++++++++++++++++++++- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 4 ++ drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h | 29 ++++++++ drivers/gpu/nvgpu/gk20a/hal_gk20a.c | 8 +++ drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h | 4 ++ drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 8 +++ drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h | 4 ++ 10 files changed, 206 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 03a698dc..41eb7eb7 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -100,6 +100,7 @@ enum nvgpu_litter_value { GPU_LIT_TPC_IN_GPC_SHARED_BASE, GPU_LIT_PPC_IN_GPC_BASE, GPU_LIT_PPC_IN_GPC_STRIDE, + GPU_LIT_PPC_IN_GPC_SHARED_BASE, GPU_LIT_ROP_BASE, GPU_LIT_ROP_STRIDE, GPU_LIT_ROP_SHARED_BASE, diff --git a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c index 5029db8d..81ac341f 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.c @@ -336,6 +336,34 @@ static int gr_gk20a_init_ctx_vars_fw(struct gk20a *g, struct gr_gk20a *gr) if (err) goto clean_up; break; + case NETLIST_REGIONID_NVPERF_SYS_ROUTER: + gk20a_dbg_info("NETLIST_REGIONID_NVPERF_SYS_ROUTER"); + err = gr_gk20a_alloc_load_netlist_aiv( + src, size, &g->gr.ctx_vars.ctxsw_regs.perf_sys_router); + if (err) + goto clean_up; + break; + case NETLIST_REGIONID_NVPERF_PMA: + gk20a_dbg_info("NETLIST_REGIONID_NVPERF_PMA"); + err = gr_gk20a_alloc_load_netlist_aiv( + src, size, &g->gr.ctx_vars.ctxsw_regs.perf_pma); + if (err) + goto clean_up; + break; + case NETLIST_REGIONID_CTXREG_PMROP: + gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PMROP"); + err = gr_gk20a_alloc_load_netlist_aiv( + src, size, &g->gr.ctx_vars.ctxsw_regs.pm_rop); + if (err) + goto clean_up; + break; + case NETLIST_REGIONID_CTXREG_PMUCGPC: + gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PMUCGPC"); + err = gr_gk20a_alloc_load_netlist_aiv( + src, size, &g->gr.ctx_vars.ctxsw_regs.pm_ucgpc); + if (err) + goto clean_up; + break; default: gk20a_dbg_info("unrecognized region %d skipped", i); break; @@ -381,6 +409,11 @@ clean_up: kfree(g->gr.ctx_vars.ctxsw_regs.gpc_router.l); kfree(g->gr.ctx_vars.ctxsw_regs.pm_ltc.l); kfree(g->gr.ctx_vars.ctxsw_regs.pm_fbpa.l); + kfree(g->gr.ctx_vars.ctxsw_regs.pm_fbpa.l); + kfree(g->gr.ctx_vars.ctxsw_regs.perf_sys_router.l); + kfree(g->gr.ctx_vars.ctxsw_regs.perf_pma.l); + kfree(g->gr.ctx_vars.ctxsw_regs.pm_rop.l); + kfree(g->gr.ctx_vars.ctxsw_regs.pm_ucgpc.l); release_firmware(netlist_fw); err = -ENOENT; } diff --git a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h index b0e4c4c2..2b384993 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_ctx_gk20a.h @@ -93,6 +93,11 @@ union __max_name { #define NETLIST_REGIONID_NVPERF_GPC_ROUTER 25 #define NETLIST_REGIONID_CTXREG_PMLTC 26 #define NETLIST_REGIONID_CTXREG_PMFBPA 27 +#define NETLIST_REGIONID_SWVEIDBUNDLEINIT 28 +#define NETLIST_REGIONID_NVPERF_SYS_ROUTER 29 +#define NETLIST_REGIONID_NVPERF_PMA 30 +#define NETLIST_REGIONID_CTXREG_PMROP 31 +#define NETLIST_REGIONID_CTXREG_PMUCGPC 32 struct netlist_region { u32 region_id; diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index c5b2ba5c..62d3b231 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -64,6 +64,8 @@ #define NV_PERF_PMM_FBP_ROUTER_STRIDE 0x0200 #define NV_PERF_PMMGPC_CHIPLET_OFFSET 0x1000 #define NV_PERF_PMMGPCROUTER_STRIDE 0x0200 +#define NV_PCFG_BASE 0x00088000 +#define NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE 0x0020 #define FE_PWR_MODE_TIMEOUT_MAX 2000 #define FE_PWR_MODE_TIMEOUT_DEFAULT 10 #define CTXSW_MEM_SCRUBBING_TIMEOUT_MAX 1000 @@ -6446,6 +6448,13 @@ static int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr, } else *gpc_num = pri_get_gpc_num(g, addr); + if (pri_is_ppc_addr(g, gpc_addr)) { + *addr_type = CTXSW_ADDR_TYPE_PPC; + if (pri_is_ppc_addr_shared(g, gpc_addr)) { + *broadcast_flags |= PRI_BROADCAST_FLAGS_PPC; + return 0; + } + } if (g->ops.gr.is_tpc_addr(g, gpc_addr)) { *addr_type = CTXSW_ADDR_TYPE_TPC; if (pri_is_tpc_addr_shared(g, gpc_addr)) { @@ -7490,6 +7499,72 @@ static int map_cmp(const void *a, const void *b) return 0; } +static int add_ctxsw_buffer_map_entries_pmsys(struct ctxsw_buf_offset_map_entry *map, + struct aiv_list_gk20a *regs, + u32 *count, u32 *offset, + u32 max_cnt, u32 base, u32 mask) +{ + u32 idx; + u32 cnt = *count; + u32 off = *offset; + + if ((cnt + regs->count) > max_cnt) + return -EINVAL; + + for (idx = 0; idx < regs->count; idx++) { + if ((base + (regs->l[idx].addr & mask)) < 0xFFF) + map[cnt].addr = base + (regs->l[idx].addr & mask) + + NV_PCFG_BASE; + else + map[cnt].addr = base + (regs->l[idx].addr & mask); + map[cnt++].offset = off; + off += 4; + } + *count = cnt; + *offset = off; + return 0; +} + +static int add_ctxsw_buffer_map_entries_pmgpc(struct gk20a *g, + struct ctxsw_buf_offset_map_entry *map, + struct aiv_list_gk20a *regs, + u32 *count, u32 *offset, + u32 max_cnt, u32 base, u32 mask) +{ + u32 idx; + u32 cnt = *count; + u32 off = *offset; + + if ((cnt + regs->count) > max_cnt) + return -EINVAL; + + /* NOTE: The PPC offsets get added to the pm_gpc list if numPpc <= 1 + * To handle the case of PPC registers getting added into GPC, the below + * code specifically checks for any PPC offsets and adds them using + * proper mask + */ + for (idx = 0; idx < regs->count; idx++) { + /* Check if the address is PPC address */ + if (pri_is_ppc_addr_shared(g, regs->l[idx].addr & mask)) { + u32 ppc_in_gpc_base = nvgpu_get_litter_value(g, + GPU_LIT_PPC_IN_GPC_BASE); + u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, + GPU_LIT_PPC_IN_GPC_STRIDE); + /* Use PPC mask instead of the GPC mask provided */ + u32 ppcmask = ppc_in_gpc_stride - 1; + + map[cnt].addr = base + ppc_in_gpc_base + + (regs->l[idx].addr & ppcmask); + } else + map[cnt].addr = base + (regs->l[idx].addr & mask); + map[cnt++].offset = off; + off += 4; + } + *count = cnt; + *offset = off; + return 0; +} + static int add_ctxsw_buffer_map_entries(struct ctxsw_buf_offset_map_entry *map, struct aiv_list_gk20a *regs, u32 *count, u32 *offset, @@ -7577,12 +7652,18 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g, return -EINVAL; base = gpc_base + (gpc_stride * gpc_num); - if (add_ctxsw_buffer_map_entries(map, + if (add_ctxsw_buffer_map_entries_pmgpc(g, map, &g->gr.ctx_vars.ctxsw_regs.pm_gpc, count, offset, max_cnt, base, (gpc_stride - 1))) return -EINVAL; + base = NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE * gpc_num; + if (add_ctxsw_buffer_map_entries(map, + &g->gr.ctx_vars.ctxsw_regs.pm_ucgpc, + count, offset, max_cnt, base, ~0)) + return -EINVAL; + base = (NV_PERF_PMMGPC_CHIPLET_OFFSET * gpc_num); if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_gpc, @@ -7609,6 +7690,12 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g, *| | *| LIST_compressed_nv_perf_ctx_reg_SYS |Space allocated: numRegs words *|---------------------------------------------| + *| | + *| LIST_compressed_nv_perf_ctx_reg_sysrouter|Space allocated: numRegs words + *|---------------------------------------------| + *| | + *| LIST_compressed_nv_perf_ctx_reg_PMA |Space allocated: numRegs words + *|---------------------------------------------| *| PADDING for 256 byte alignment | *|---------------------------------------------|<----256 byte aligned *| LIST_compressed_nv_perf_fbp_ctx_regs | @@ -7620,6 +7707,8 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g, *| LIST_compressed_pm_fbpa_ctx_regs | *| |Space allocated: numRegs * n words (for n FB units) *|---------------------------------------------| + *| LIST_compressed_pm_rop_ctx_regs | + *|---------------------------------------------| *| LIST_compressed_pm_ltc_ctx_regs | *| LTC0 LTS0 | *| LTC1 LTS0 |Space allocated: numRegs * n words (for n LTC units) @@ -7641,7 +7730,8 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g, *| LIST_pm_ctx_reg_PPC REG1 TPCn | *| * numPpcs REGn TPC0 | *| LIST_pm_ctx_reg_GPC REGn TPC1 | - *| LIST_nv_perf_ctx_reg_GPC REGn TPCn | + *| List_pm_ctx_reg_uc_GPC REGn TPCn | + *| LIST_nv_perf_ctx_reg_GPC | *| ---- |-- *| GPC1 . | *| . |<---- @@ -7679,7 +7769,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g) return -ENOMEM; /* Add entries from _LIST_pm_ctx_reg_SYS */ - if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.pm_sys, + if (add_ctxsw_buffer_map_entries_pmsys(map, &g->gr.ctx_vars.ctxsw_regs.pm_sys, &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) goto cleanup; @@ -7688,6 +7778,16 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g) &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) goto cleanup; + /* Add entries from _LIST_nv_perf_sysrouter_ctx_reg*/ + if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_sys_router, + &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) + goto cleanup; + + /* Add entries from _LIST_nv_perf_pma_ctx_reg*/ + if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_pma, + &count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0)) + goto cleanup; + offset = ALIGN(offset, 256); /* Add entries from _LIST_nv_perf_fbp_ctx_regs */ @@ -7714,6 +7814,13 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g) num_fbpas, fbpa_stride, ~0)) goto cleanup; + /* Add entries from _LIST_nv_pm_rop_ctx_regs */ + if (add_ctxsw_buffer_map_entries(map, + &g->gr.ctx_vars.ctxsw_regs.pm_rop, + &count, &offset, + hwpm_ctxsw_reg_count_max, 0, ~0)) + goto cleanup; + /* Add entries from _LIST_compressed_nv_pm_ltc_ctx_regs */ if (add_ctxsw_buffer_map_entries_subunits(map, &g->gr.ctx_vars.ctxsw_regs.pm_ltc, diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index aa83f85a..e1335b89 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -248,6 +248,10 @@ struct gr_gk20a { struct aiv_list_gk20a gpc_router; struct aiv_list_gk20a pm_ltc; struct aiv_list_gk20a pm_fbpa; + struct aiv_list_gk20a perf_sys_router; + struct aiv_list_gk20a perf_pma; + struct aiv_list_gk20a pm_rop; + struct aiv_list_gk20a pm_ucgpc; } ctxsw_regs; int regs_base_index; bool valid; diff --git a/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h index a7656d38..88521555 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h @@ -69,6 +69,35 @@ static inline u32 pri_get_gpc_num(struct gk20a *g, u32 addr) } return 0; } + +/* + * PPC pri addressing + */ +static inline bool pri_is_ppc_addr_shared(struct gk20a *g, u32 addr) +{ + u32 ppc_in_gpc_shared_base = nvgpu_get_litter_value(g, + GPU_LIT_PPC_IN_GPC_SHARED_BASE); + u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, + GPU_LIT_PPC_IN_GPC_STRIDE); + + return ((addr >= ppc_in_gpc_shared_base) && + (addr < (ppc_in_gpc_shared_base + ppc_in_gpc_stride))); +} + +static inline bool pri_is_ppc_addr(struct gk20a *g, u32 addr) +{ + u32 ppc_in_gpc_base = nvgpu_get_litter_value(g, + GPU_LIT_PPC_IN_GPC_BASE); + u32 num_pes_per_gpc = nvgpu_get_litter_value(g, + GPU_LIT_NUM_PES_PER_GPC); + u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, + GPU_LIT_PPC_IN_GPC_STRIDE); + + return ((addr >= ppc_in_gpc_base) && + (addr < ppc_in_gpc_base + num_pes_per_gpc * ppc_in_gpc_stride)) + || pri_is_ppc_addr_shared(g, addr); +} + /* * TPC pri addressing */ diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c index 99c55992..e0479573 100644 --- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c @@ -87,9 +87,15 @@ static int gk20a_get_litter_value(struct gk20a *g, case GPU_LIT_TPC_IN_GPC_SHARED_BASE: ret = proj_tpc_in_gpc_shared_base_v(); break; + case GPU_LIT_PPC_IN_GPC_BASE: + ret = proj_ppc_in_gpc_base_v(); + break; case GPU_LIT_PPC_IN_GPC_STRIDE: ret = proj_ppc_in_gpc_stride_v(); break; + case GPU_LIT_PPC_IN_GPC_SHARED_BASE: + ret = proj_ppc_in_gpc_shared_base_v(); + break; case GPU_LIT_ROP_BASE: ret = proj_rop_base_v(); break; @@ -118,6 +124,8 @@ static int gk20a_get_litter_value(struct gk20a *g, ret = proj_fbpa_stride_v(); break; default: + gk20a_err(dev_from_gk20a(g), "Missing definition %d", value); + BUG(); break; } diff --git a/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h index 7dce9f91..047dc7d5 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_proj_gk20a.h @@ -78,6 +78,10 @@ static inline u32 proj_ppc_in_gpc_base_v(void) { return 0x00003000; } +static inline u32 proj_ppc_in_gpc_shared_base_v(void) +{ + return 0x00003e00; +} static inline u32 proj_ppc_in_gpc_stride_v(void) { return 0x00000200; diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 4f2fffc8..57cafd38 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -128,9 +128,15 @@ static int gm20b_get_litter_value(struct gk20a *g, case GPU_LIT_TPC_IN_GPC_SHARED_BASE: ret = proj_tpc_in_gpc_shared_base_v(); break; + case GPU_LIT_PPC_IN_GPC_BASE: + ret = proj_ppc_in_gpc_base_v(); + break; case GPU_LIT_PPC_IN_GPC_STRIDE: ret = proj_ppc_in_gpc_stride_v(); break; + case GPU_LIT_PPC_IN_GPC_SHARED_BASE: + ret = proj_ppc_in_gpc_shared_base_v(); + break; case GPU_LIT_ROP_BASE: ret = proj_rop_base_v(); break; @@ -159,6 +165,8 @@ static int gm20b_get_litter_value(struct gk20a *g, ret = proj_fbpa_stride_v(); break; default: + gk20a_err(dev_from_gk20a(g), "Missing definition %d", value); + BUG(); break; } diff --git a/drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h index d4d412e1..026c7848 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_proj_gm20b.h @@ -78,6 +78,10 @@ static inline u32 proj_ppc_in_gpc_base_v(void) { return 0x00003000; } +static inline u32 proj_ppc_in_gpc_shared_base_v(void) +{ + return 0x00003e00; +} static inline u32 proj_ppc_in_gpc_stride_v(void) { return 0x00000200; -- cgit v1.2.2