From c3b5b48c0fbecfb874d0fa4aa52286849bb36a5b Mon Sep 17 00:00:00 2001 From: Richard Zhao Date: Wed, 20 Jun 2018 17:27:02 -0700 Subject: gpu: nvgpu: move slices_per_ltc & cacheline_size init to floorsweeping It was initialized at .init_comptags, but we may also need them without comptags. Jira NVGPUT-63 Change-Id: Ie818c3ecf890fc84323b9662a32d666a6d2b3936 Signed-off-by: Richard Zhao Reviewed-on: https://git-master.nvidia.com/r/1756373 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | 16 ++++++++-------- drivers/gpu/nvgpu/gp10b/ltc_gp10b.c | 16 +++++----------- drivers/gpu/nvgpu/gv11b/ltc_gv11b.c | 6 ++++++ drivers/gpu/nvgpu/vgpu/ltc_vgpu.c | 5 +++-- 4 files changed, 22 insertions(+), 21 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c index 66cd49e7..a8cbca13 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c @@ -52,10 +52,6 @@ int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); u32 comptags_per_cacheline = ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); - u32 cacheline_size = - 512U << ltc_ltcs_ltss_cbc_param_cache_line_size_v(cbc_param); - u32 slices_per_ltc = - ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(cbc_param); u32 compbit_backing_size; @@ -71,7 +67,7 @@ int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) compbit_backing_size = DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) * - cacheline_size * slices_per_ltc * g->ltc_count; + gr->cacheline_size * gr->slices_per_ltc * g->ltc_count; /* aligned to 2KB * ltc_count */ compbit_backing_size += @@ -82,7 +78,7 @@ int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) max_comptag_lines = (compbit_backing_size * comptags_per_cacheline) / - (cacheline_size * slices_per_ltc * g->ltc_count); + (gr->cacheline_size * gr->slices_per_ltc * g->ltc_count); if (max_comptag_lines > hw_max_comptag_lines) max_comptag_lines = hw_max_comptag_lines; @@ -102,8 +98,6 @@ int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) gr->max_comptag_lines = max_comptag_lines; gr->comptags_per_cacheline = comptags_per_cacheline; - gr->slices_per_ltc = slices_per_ltc; - gr->cacheline_size = cacheline_size; return 0; } @@ -203,6 +197,7 @@ out: void gm20b_ltc_init_fs_state(struct gk20a *g) { + struct gr_gk20a *gr = &g->gr; u32 reg; nvgpu_log_info(g, "initialize gm20b l2"); @@ -211,6 +206,11 @@ void gm20b_ltc_init_fs_state(struct gk20a *g) g->ltc_count = gk20a_readl(g, pri_ringmaster_enum_ltc_r()); nvgpu_log_info(g, "%d ltcs out of %d", g->ltc_count, g->max_ltc_count); + reg = gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); + gr->slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(reg);; + gr->cacheline_size = + 512U << ltc_ltcs_ltss_cbc_param_cache_line_size_v(reg); + gk20a_writel(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r(), g->ltc_count); gk20a_writel(g, ltc_ltcs_misc_ltc_num_active_ltcs_r(), diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c index f74ca8f3..b0938f75 100644 --- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c @@ -70,10 +70,6 @@ int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); u32 comptags_per_cacheline = ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); - u32 cacheline_size = - 512U << ltc_ltcs_ltss_cbc_param_cache_line_size_v(cbc_param); - u32 slices_per_ltc = - ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(cbc_param); u32 cbc_param2 = gk20a_readl(g, ltc_ltcs_ltss_cbc_param2_r()); u32 gobs_per_comptagline_per_slice = @@ -89,7 +85,7 @@ int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) return 0; /* Already initialized */ - if (gr->cacheline_size) + if (gr->max_comptag_lines) return 0; if (max_comptag_lines > hw_max_comptag_lines) @@ -97,10 +93,10 @@ int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) compbit_backing_size = roundup(max_comptag_lines * gobs_per_comptagline_per_slice, - cacheline_size); - compbit_backing_size = - roundup(compbit_backing_size * slices_per_ltc * g->ltc_count, - g->ops.fb.compressible_page_size(g)); + gr->cacheline_size); + compbit_backing_size = roundup( + compbit_backing_size * gr->slices_per_ltc * g->ltc_count, + g->ops.fb.compressible_page_size(g)); /* aligned to 2KB * ltc_count */ compbit_backing_size += @@ -126,8 +122,6 @@ int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) gr->max_comptag_lines = max_comptag_lines; gr->comptags_per_cacheline = comptags_per_cacheline; - gr->slices_per_ltc = slices_per_ltc; - gr->cacheline_size = cacheline_size; gr->gobs_per_comptagline_per_slice = gobs_per_comptagline_per_slice; return 0; diff --git a/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c b/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c index 96844ebd..b64faaa6 100644 --- a/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c @@ -53,6 +53,7 @@ void gv11b_ltc_set_zbc_stencil_entry(struct gk20a *g, void gv11b_ltc_init_fs_state(struct gk20a *g) { + struct gr_gk20a *gr = &g->gr; u32 ltc_intr; u32 reg; @@ -62,6 +63,11 @@ void gv11b_ltc_init_fs_state(struct gk20a *g) g->ltc_count = gk20a_readl(g, pri_ringmaster_enum_ltc_r()); nvgpu_log_info(g, "%u ltcs out of %u", g->ltc_count, g->max_ltc_count); + reg = gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); + gr->slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(reg);; + gr->cacheline_size = + 512U << ltc_ltcs_ltss_cbc_param_cache_line_size_v(reg); + /* Disable LTC interrupts */ reg = gk20a_readl(g, ltc_ltcs_ltss_intr_r()); reg &= ~ltc_ltcs_ltss_intr_en_evicted_cb_m(); diff --git a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c index f68c8454..873ddd24 100644 --- a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c @@ -44,9 +44,7 @@ int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) nvgpu_log_fn(g, " "); - gr->cacheline_size = priv->constants.cacheline_size; gr->comptags_per_cacheline = priv->constants.comptags_per_cacheline; - gr->slices_per_ltc = priv->constants.slices_per_ltc; max_comptag_lines = priv->constants.comptag_lines; if (max_comptag_lines < 2) @@ -64,8 +62,11 @@ int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) void vgpu_ltc_init_fs_state(struct gk20a *g) { struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + struct gr_gk20a *gr = &g->gr; nvgpu_log_fn(g, " "); g->ltc_count = priv->constants.ltc_count; + gr->cacheline_size = priv->constants.cacheline_size; + gr->slices_per_ltc = priv->constants.slices_per_ltc; } -- cgit v1.2.2