From 6839341bf8ffafa115cfc0427bba694ee1d131f3 Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Wed, 6 Apr 2016 13:10:32 -0700 Subject: gpu: nvgpu: Add litter values HAL Move per-chip constants to be returned by a chip specific function. Implement get_litter_value() for each chip. Change-Id: I2a2730fce14010924d2507f6fa15cc2ea0795113 Signed-off-by: Terje Bergstrom Reviewed-on: http://git-master/r/1121383 --- drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 76 ++++++++++++++++++++++--------------- drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 74 ++++++++++++++++++++++++++++++++++++ drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | 21 +++++----- 3 files changed, 131 insertions(+), 40 deletions(-) (limited to 'drivers/gpu/nvgpu/gm20b') diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 050c2bee..b49f2301 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -27,7 +27,6 @@ #include "hw_fifo_gm20b.h" #include "hw_fb_gm20b.h" #include "hw_top_gm20b.h" -#include "hw_proj_gm20b.h" #include "hw_ctxsw_prog_gm20b.h" #include "hw_fuse_gm20b.h" #include "pmu_gm20b.h" @@ -178,6 +177,8 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g, u32 gpc_index, ppc_index; u32 temp; u32 cbm_cfg_size1, cbm_cfg_size2; + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); gk20a_dbg_fn(""); @@ -198,7 +199,7 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g, gr->tpc_count * gr->attrib_cb_size; for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { - temp = proj_gpc_stride_v() * gpc_index; + temp = gpc_stride * gpc_index; for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; ppc_index++) { cbm_cfg_size1 = gr->attrib_cb_default_size * @@ -208,12 +209,12 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g, gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + - proj_ppc_in_gpc_stride_v() * ppc_index, + ppc_in_gpc_stride * ppc_index, cbm_cfg_size1, patch); gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + - proj_ppc_in_gpc_stride_v() * ppc_index, + ppc_in_gpc_stride * ppc_index, attrib_offset_in_chunk, patch); attrib_offset_in_chunk += gr->attrib_cb_size * @@ -221,12 +222,12 @@ static int gr_gm20b_commit_global_cb_manager(struct gk20a *g, gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + - proj_ppc_in_gpc_stride_v() * ppc_index, + ppc_in_gpc_stride * ppc_index, cbm_cfg_size2, patch); gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + - proj_ppc_in_gpc_stride_v() * ppc_index, + ppc_in_gpc_stride * ppc_index, alpha_offset_in_chunk, patch); alpha_offset_in_chunk += gr->alpha_cb_size * @@ -297,6 +298,8 @@ static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) u32 gpc_index, ppc_index, stride, val; u32 pd_ab_max_output; u32 alpha_cb_size = data * 4; + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); gk20a_dbg_fn(""); /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF) @@ -319,14 +322,14 @@ static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) gr_pd_ab_dist_cfg1_max_batches_init_f()); for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { - stride = proj_gpc_stride_v() * gpc_index; + stride = gpc_stride * gpc_index; for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; ppc_index++) { val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + stride + - proj_ppc_in_gpc_stride_v() * ppc_index); + ppc_in_gpc_stride * ppc_index); val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(), gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size * @@ -334,7 +337,7 @@ static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + stride + - proj_ppc_in_gpc_stride_v() * ppc_index, val); + ppc_in_gpc_stride * ppc_index, val); } } } @@ -344,6 +347,8 @@ static void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data) struct gr_gk20a *gr = &g->gr; u32 gpc_index, ppc_index, stride, val; u32 cb_size = data * 4; + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); gk20a_dbg_fn(""); @@ -356,14 +361,14 @@ static void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data) gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size)); for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { - stride = proj_gpc_stride_v() * gpc_index; + stride = gpc_stride * gpc_index; for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; ppc_index++) { val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + stride + - proj_ppc_in_gpc_stride_v() * ppc_index); + ppc_in_gpc_stride * ppc_index); val = set_field(val, gr_gpc0_ppc0_cbm_beta_cb_size_v_m(), @@ -372,7 +377,7 @@ static void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data) gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + stride + - proj_ppc_in_gpc_stride_v() * ppc_index, val); + ppc_in_gpc_stride * ppc_index, val); val = gk20a_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r( ppc_index + gpc_index)); @@ -527,14 +532,16 @@ int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) u32 tpc_per_gpc = 0; u32 tpc_sm_id = 0, gpc_tpc_id = 0; u32 pes_tpc_mask = 0, pes_index; + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); gk20a_dbg_fn(""); for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { - gpc_offset = proj_gpc_stride_v() * gpc_index; + gpc_offset = gpc_stride * gpc_index; for (tpc_index = 0; tpc_index < gr->gpc_tpc_count[gpc_index]; tpc_index++) { - tpc_offset = proj_tpc_in_gpc_stride_v() * tpc_index; + tpc_offset = tpc_in_gpc_stride * tpc_index; gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, @@ -640,32 +647,37 @@ static int gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, return 0; } -static bool gr_gm20b_is_tpc_addr_shared(u32 addr) +static bool gr_gm20b_is_tpc_addr_shared(struct gk20a *g, u32 addr) { - return (addr >= proj_tpc_in_gpc_shared_base_v()) && - (addr < (proj_tpc_in_gpc_shared_base_v() + - proj_tpc_in_gpc_stride_v())); + u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); + u32 tpc_in_gpc_shared_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_SHARED_BASE); + return (addr >= tpc_in_gpc_shared_base) && + (addr < (tpc_in_gpc_shared_base + + tpc_in_gpc_stride)); } -static bool gr_gm20b_is_tpc_addr(u32 addr) +static bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr) { - return ((addr >= proj_tpc_in_gpc_base_v()) && - (addr < proj_tpc_in_gpc_base_v() + - (proj_scal_litter_num_tpc_per_gpc_v() * - proj_tpc_in_gpc_stride_v()))) - || gr_gm20b_is_tpc_addr_shared(addr); + u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); + u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); + u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); + return ((addr >= tpc_in_gpc_base) && + (addr < tpc_in_gpc_base + + (num_tpc_per_gpc * tpc_in_gpc_stride))) + || gr_gm20b_is_tpc_addr_shared(g, addr); } -static u32 gr_gm20b_get_tpc_num(u32 addr) +static u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr) { u32 i, start; - u32 num_tpcs = proj_scal_litter_num_tpc_per_gpc_v(); + u32 num_tpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); + u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); + u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); for (i = 0; i < num_tpcs; i++) { - start = proj_tpc_in_gpc_base_v() + - (i * proj_tpc_in_gpc_stride_v()); + start = tpc_in_gpc_base + (i * tpc_in_gpc_stride); if ((addr >= start) && - (addr < (start + proj_tpc_in_gpc_stride_v()))) + (addr < (start + tpc_in_gpc_stride))) return i; } return 0; @@ -1066,6 +1078,8 @@ static void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) u32 gpc, tpc, sm_id; u32 tpc_offset, gpc_offset, reg_offset; u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; + u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); + u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); /* for maxwell & kepler */ u32 numSmPerTpc = 1; @@ -1075,8 +1089,8 @@ static void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) gpc = g->gr.sm_to_cluster[sm_id].gpc_index; tpc = g->gr.sm_to_cluster[sm_id].tpc_index; - tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; - gpc_offset = proj_gpc_stride_v() * gpc; + tpc_offset = tpc_in_gpc_stride * tpc; + gpc_offset = gpc_stride * gpc; reg_offset = tpc_offset + gpc_offset; /* 64 bit read */ diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 559fee61..df25be5e 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -34,6 +34,7 @@ #include "debug_gm20b.h" #include "cde_gm20b.h" #include "therm_gm20b.h" +#include "hw_proj_gm20b.h" #define FUSE_OPT_PRIV_SEC_DIS_0 0x264 #define PRIV_SECURITY_DISABLE 0x01 @@ -89,6 +90,78 @@ static struct gpu_ops gm20b_ops = { }, }; +static int gm20b_get_litter_value(struct gk20a *g, + enum nvgpu_litter_value value) +{ + int ret = EINVAL; + switch (value) { + case GPU_LIT_NUM_GPCS: + ret = proj_scal_litter_num_gpcs_v(); + break; + case GPU_LIT_NUM_PES_PER_GPC: + ret = proj_scal_litter_num_pes_per_gpc_v(); + break; + case GPU_LIT_NUM_ZCULL_BANKS: + ret = proj_scal_litter_num_zcull_banks_v(); + break; + case GPU_LIT_NUM_TPC_PER_GPC: + ret = proj_scal_litter_num_tpc_per_gpc_v(); + break; + case GPU_LIT_NUM_FBPS: + ret = proj_scal_litter_num_fbps_v(); + break; + case GPU_LIT_GPC_BASE: + ret = proj_gpc_base_v(); + break; + case GPU_LIT_GPC_STRIDE: + ret = proj_gpc_stride_v(); + break; + case GPU_LIT_GPC_SHARED_BASE: + ret = proj_gpc_shared_base_v(); + break; + case GPU_LIT_TPC_IN_GPC_BASE: + ret = proj_tpc_in_gpc_base_v(); + break; + case GPU_LIT_TPC_IN_GPC_STRIDE: + ret = proj_tpc_in_gpc_stride_v(); + break; + case GPU_LIT_TPC_IN_GPC_SHARED_BASE: + ret = proj_tpc_in_gpc_shared_base_v(); + break; + case GPU_LIT_PPC_IN_GPC_STRIDE: + ret = proj_ppc_in_gpc_stride_v(); + break; + case GPU_LIT_ROP_BASE: + ret = proj_rop_base_v(); + break; + case GPU_LIT_ROP_STRIDE: + ret = proj_rop_stride_v(); + break; + case GPU_LIT_ROP_SHARED_BASE: + ret = proj_rop_shared_base_v(); + break; + case GPU_LIT_HOST_NUM_PBDMA: + ret = proj_host_num_pbdma_v(); + break; + case GPU_LIT_LTC_STRIDE: + ret = proj_ltc_stride_v(); + break; + case GPU_LIT_LTS_STRIDE: + ret = proj_lts_stride_v(); + break; + case GPU_LIT_NUM_FBPAS: + ret = proj_scal_litter_num_fbpas_v(); + break; + case GPU_LIT_FBPA_STRIDE: + ret = proj_fbpa_stride_v(); + break; + default: + break; + } + + return ret; +} + int gm20b_init_hal(struct gk20a *g) { struct gpu_ops *gops = &g->ops; @@ -140,6 +213,7 @@ int gm20b_init_hal(struct gk20a *g) gm20b_init_therm_ops(gops); gops->name = "gm20b"; gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics; + gops->get_litter_value = gm20b_get_litter_value; c->twod_class = FERMI_TWOD_A; c->threed_class = MAXWELL_B; diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c index e4e27764..4fc9d51b 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c @@ -20,7 +20,6 @@ #include "hw_mc_gm20b.h" #include "hw_ltc_gm20b.h" #include "hw_top_gm20b.h" -#include "hw_proj_gm20b.h" #include "hw_pri_ringmaster_gm20b.h" #include "gk20a/ltc_common.c" @@ -109,6 +108,8 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, s32 retry = 200; u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); + u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); + u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); gk20a_dbg_fn(""); @@ -139,8 +140,7 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, for (slice = 0; slice < slices_per_ltc; slice++) { ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + - ltc * proj_ltc_stride_v() + - slice * proj_lts_stride_v(); + ltc * ltc_stride + slice * lts_stride; retry = 200; do { @@ -198,6 +198,8 @@ void gm20b_ltc_isr(struct gk20a *g) { u32 mc_intr, ltc_intr; int ltc, slice; + u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); + u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); mc_intr = gk20a_readl(g, mc_intr_ltc_r()); gk20a_err(dev_from_gk20a(g), "mc_ltc_intr: %08x", @@ -207,13 +209,13 @@ void gm20b_ltc_isr(struct gk20a *g) continue; for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() + - proj_ltc_stride_v() * ltc + - proj_lts_stride_v() * slice); + ltc_stride * ltc + + lts_stride * slice); gk20a_err(dev_from_gk20a(g), "ltc%d, slice %d: %08x", ltc, slice, ltc_intr); gk20a_writel(g, ltc_ltc0_lts0_intr_r() + - proj_ltc_stride_v() * ltc + - proj_lts_stride_v() * slice, + ltc_stride * ltc + + lts_stride * slice, ltc_intr); } } @@ -287,6 +289,7 @@ void gm20b_flush_ltc(struct gk20a *g) { unsigned long timeout; int ltc; + u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); #define __timeout_init() \ do { \ @@ -317,7 +320,7 @@ void gm20b_flush_ltc(struct gk20a *g) __timeout_init(); do { int cmgmt1 = ltc_ltc0_ltss_tstg_cmgmt1_r() + - ltc * proj_ltc_stride_v(); + ltc * ltc_stride; op_pending = gk20a_readl(g, cmgmt1); __timeout_check(); } while (op_pending & @@ -338,7 +341,7 @@ void gm20b_flush_ltc(struct gk20a *g) __timeout_init(); do { int cmgmt0 = ltc_ltc0_ltss_tstg_cmgmt0_r() + - ltc * proj_ltc_stride_v(); + ltc * ltc_stride; op_pending = gk20a_readl(g, cmgmt0); __timeout_check(); } while (op_pending & -- cgit v1.2.2