From 19aa748be53787da6abe435ea7043a7827d0fde0 Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Fri, 6 Apr 2018 18:34:01 +0530 Subject: gpu: nvgpu: add support to get unicast addresses on volta We have new broadcast registers on Volta, and we need to generate correct unicast addresses for them so that we can write those registers to context image Add new GR HAL create_priv_addr_table() to do this conversion Set gr_gk20a_create_priv_addr_table() for older chips Set gr_gv11b_create_priv_addr_table() for Volta gr_gv11b_create_priv_addr_table() will use the broadcast flags and then generate appriate list of unicast register for each broadcast register Bug 200398811 Jira NVGPU-556 Change-Id: Id53a9e56106d200fe560ffc93394cc0e976f455f Signed-off-by: Deepak Nibade Reviewed-on: https://git-master.nvidia.com/r/1690027 Reviewed-by: svc-mobile-coverity GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 208 +++++++++++++++++++++++++++++++++ drivers/gpu/nvgpu/gv11b/gr_gv11b.h | 4 + drivers/gpu/nvgpu/gv11b/gr_pri_gv11b.h | 7 ++ drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 1 + 4 files changed, 220 insertions(+) (limited to 'drivers/gpu/nvgpu/gv11b') diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 61649d06..67603739 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -57,6 +57,7 @@ #include #include #include +#include #define GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT 100 @@ -4511,3 +4512,210 @@ int gr_gv11b_decode_priv_addr(struct gk20a *g, u32 addr, *addr_type = CTXSW_ADDR_TYPE_SYS; return 0; } + +static u32 gr_gv11b_pri_pmmgpc_addr(u32 gpc_num, u32 domain_idx, u32 offset) +{ + return perf_pmmgpc_base_v() + + (gpc_num * (perf_pmmsys_extent_v() - perf_pmmsys_base_v() + 1)) + + (domain_idx * perf_pmmgpc_perdomain_offset_v()) + + offset; +} + +static void gr_gv11b_split_pmm_fbp_broadcast_address(struct gk20a *g, + u32 offset, u32 *priv_addr_table, u32 *t, + u32 domain_start, u32 num_domains) +{ + u32 domain_idx = 0; + u32 fbp_num = 0; + u32 base = 0; + + for (fbp_num = 0; fbp_num < g->gr.num_fbps; fbp_num++) { + base = perf_pmmfbp_base_v() + + (fbp_num * + (perf_pmmsys_extent_v() - perf_pmmsys_base_v() + 1)); + + for (domain_idx = domain_start; + domain_idx < (domain_start + num_domains); + domain_idx++) { + priv_addr_table[(*t)++] = base + + (domain_idx * perf_pmmgpc_perdomain_offset_v()) + + offset; + } + } +} + + +int gr_gv11b_create_priv_addr_table(struct gk20a *g, + u32 addr, + u32 *priv_addr_table, + u32 *num_registers) +{ + int addr_type; /*enum ctxsw_addr_type */ + u32 gpc_num, tpc_num, ppc_num, be_num; + u32 priv_addr, gpc_addr; + u32 broadcast_flags; + u32 t; + int err; + int fbpa_num; + + t = 0; + *num_registers = 0; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); + + err = g->ops.gr.decode_priv_addr(g, addr, &addr_type, + &gpc_num, &tpc_num, &ppc_num, &be_num, + &broadcast_flags); + gk20a_dbg(gpu_dbg_gpu_dbg, "addr_type = %d", addr_type); + if (err) + return err; + + if ((addr_type == CTXSW_ADDR_TYPE_SYS) || + (addr_type == CTXSW_ADDR_TYPE_BE)) { + /* + * The BE broadcast registers are included in the compressed PRI + * table. Convert a BE unicast address to a broadcast address + * so that we can look up the offset + */ + if ((addr_type == CTXSW_ADDR_TYPE_BE) && + !(broadcast_flags & PRI_BROADCAST_FLAGS_BE)) + priv_addr_table[t++] = pri_be_shared_addr(g, addr); + else + priv_addr_table[t++] = addr; + + *num_registers = t; + return 0; + } + + /* + * The GPC/TPC unicast registers are included in the compressed PRI + * tables. Convert a GPC/TPC broadcast address to unicast addresses so + * that we can look up the offsets + */ + if (broadcast_flags & PRI_BROADCAST_FLAGS_GPC) { + for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) { + + if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) + for (tpc_num = 0; + tpc_num < g->gr.gpc_tpc_count[gpc_num]; + tpc_num++) + priv_addr_table[t++] = + pri_tpc_addr(g, + pri_tpccs_addr_mask(addr), + gpc_num, tpc_num); + + else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) { + err = gr_gk20a_split_ppc_broadcast_addr(g, + addr, gpc_num, priv_addr_table, &t); + if (err) + return err; + } else { + priv_addr = pri_gpc_addr(g, + pri_gpccs_addr_mask(addr), + gpc_num); + + gpc_addr = pri_gpccs_addr_mask(priv_addr); + tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); + if (tpc_num >= g->gr.gpc_tpc_count[gpc_num]) + continue; + + priv_addr_table[t++] = priv_addr; + } + } + } else if (broadcast_flags & PRI_BROADCAST_FLAGS_PMMGPC) { + u32 pmm_domain_start = 0; + u32 domain_idx = 0; + u32 num_domains = 0; + u32 offset = 0; + + if (broadcast_flags & PRI_BROADCAST_FLAGS_PMM_GPCGS_GPCTPCA) { + pmm_domain_start = NV_PERF_PMMGPCTPCA_DOMAIN_START; + num_domains = NV_PERF_PMMGPC_NUM_DOMAINS; + offset = PRI_PMMGS_OFFSET_MASK(addr); + } else if (broadcast_flags & + PRI_BROADCAST_FLAGS_PMM_GPCGS_GPCTPCB) { + pmm_domain_start = NV_PERF_PMMGPCTPCA_DOMAIN_START + + NV_PERF_PMMGPC_NUM_DOMAINS; + num_domains = NV_PERF_PMMGPC_NUM_DOMAINS; + offset = PRI_PMMGS_OFFSET_MASK(addr); + } else if (broadcast_flags & PRI_BROADCAST_FLAGS_PMM_GPCS) { + pmm_domain_start = (addr - + (NV_PERF_PMMGPC_GPCS + PRI_PMMS_ADDR_MASK(addr)))/ + perf_pmmgpc_perdomain_offset_v(); + num_domains = 1; + offset = PRI_PMMS_ADDR_MASK(addr); + } else { + return -EINVAL; + } + + for (gpc_num = 0; gpc_num < g->gr.gpc_count; gpc_num++) { + for (domain_idx = pmm_domain_start; + domain_idx < (pmm_domain_start + num_domains); + domain_idx++) { + priv_addr_table[t++] = + gr_gv11b_pri_pmmgpc_addr(gpc_num, + domain_idx, offset); + } + } + } else if (((addr_type == CTXSW_ADDR_TYPE_EGPC) || + (addr_type == CTXSW_ADDR_TYPE_ETPC)) && + g->ops.gr.egpc_etpc_priv_addr_table) { + gk20a_dbg(gpu_dbg_gpu_dbg, "addr_type : EGPC/ETPC"); + g->ops.gr.egpc_etpc_priv_addr_table(g, addr, gpc_num, + broadcast_flags, priv_addr_table, &t); + } else if (broadcast_flags & PRI_BROADCAST_FLAGS_LTSS) { + g->ops.gr.split_lts_broadcast_addr(g, addr, + priv_addr_table, &t); + } else if (broadcast_flags & PRI_BROADCAST_FLAGS_LTCS) { + g->ops.gr.split_ltc_broadcast_addr(g, addr, + priv_addr_table, &t); + } else if (broadcast_flags & PRI_BROADCAST_FLAGS_FBPA) { + for (fbpa_num = 0; + fbpa_num < nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS); + fbpa_num++) + priv_addr_table[t++] = pri_fbpa_addr(g, + pri_fbpa_addr_mask(g, addr), fbpa_num); + } else if ((addr_type == CTXSW_ADDR_TYPE_LTCS) && + (broadcast_flags & PRI_BROADCAST_FLAGS_PMM_FBPGS_LTC)) { + gr_gv11b_split_pmm_fbp_broadcast_address(g, + PRI_PMMGS_OFFSET_MASK(addr), + priv_addr_table, &t, + NV_PERF_PMMFBP_LTC_DOMAIN_START, + NV_PERF_PMMFBP_LTC_NUM_DOMAINS); + } else if ((addr_type == CTXSW_ADDR_TYPE_ROP) && + (broadcast_flags & PRI_BROADCAST_FLAGS_PMM_FBPGS_ROP)) { + gr_gv11b_split_pmm_fbp_broadcast_address(g, + PRI_PMMGS_OFFSET_MASK(addr), + priv_addr_table, &t, + NV_PERF_PMMFBP_ROP_DOMAIN_START, + NV_PERF_PMMFBP_ROP_NUM_DOMAINS); + } else if ((addr_type == CTXSW_ADDR_TYPE_FBP) && + (broadcast_flags & PRI_BROADCAST_FLAGS_PMM_FBPS)) { + u32 domain_start; + + domain_start = (addr - + (NV_PERF_PMMFBP_FBPS + PRI_PMMS_ADDR_MASK(addr)))/ + perf_pmmgpc_perdomain_offset_v(); + gr_gv11b_split_pmm_fbp_broadcast_address(g, + PRI_PMMS_ADDR_MASK(addr), + priv_addr_table, &t, + domain_start, 1); + } else if (!(broadcast_flags & PRI_BROADCAST_FLAGS_GPC)) { + if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) + for (tpc_num = 0; + tpc_num < g->gr.gpc_tpc_count[gpc_num]; + tpc_num++) + priv_addr_table[t++] = + pri_tpc_addr(g, + pri_tpccs_addr_mask(addr), + gpc_num, tpc_num); + else if (broadcast_flags & PRI_BROADCAST_FLAGS_PPC) + err = gr_gk20a_split_ppc_broadcast_addr(g, + addr, gpc_num, priv_addr_table, &t); + else + priv_addr_table[t++] = addr; + } + + *num_registers = t; + return 0; +} diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h index 7d286535..3c581326 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h @@ -238,4 +238,8 @@ int gr_gv11b_decode_priv_addr(struct gk20a *g, u32 addr, int *addr_type, u32 *gpc_num, u32 *tpc_num, u32 *ppc_num, u32 *be_num, u32 *broadcast_flags); +int gr_gv11b_create_priv_addr_table(struct gk20a *g, + u32 addr, + u32 *priv_addr_table, + u32 *num_registers); #endif diff --git a/drivers/gpu/nvgpu/gv11b/gr_pri_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_pri_gv11b.h index c71f4c9c..78658bf8 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_pri_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/gr_pri_gv11b.h @@ -37,6 +37,13 @@ #define NV_PERF_PMMGPC_GPCS 0x00278000 #define NV_PERF_PMMFBP_FBPS 0x0027C000 +#define NV_PERF_PMMGPCTPCA_DOMAIN_START 2 +#define NV_PERF_PMMFBP_LTC_DOMAIN_START 2 +#define NV_PERF_PMMFBP_ROP_DOMAIN_START 6 +#define NV_PERF_PMMGPC_NUM_DOMAINS 7 +#define NV_PERF_PMMFBP_LTC_NUM_DOMAINS 4 +#define NV_PERF_PMMFBP_ROP_NUM_DOMAINS 2 + #define PRI_PMMGS_ADDR_WIDTH 9 #define PRI_PMMS_ADDR_WIDTH 14 diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index c33844dc..e39df1db 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -404,6 +404,7 @@ static const struct gpu_ops gv11b_ops = { .add_ctxsw_reg_pm_fbpa = gr_gk20a_add_ctxsw_reg_pm_fbpa, .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, .decode_priv_addr = gr_gv11b_decode_priv_addr, + .create_priv_addr_table = gr_gv11b_create_priv_addr_table, }, .fb = { .reset = gv11b_fb_reset, -- cgit v1.2.2