summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard Zhao <rizhao@nvidia.com>2018-06-20 20:27:02 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-06-21 16:58:07 -0400
commitc3b5b48c0fbecfb874d0fa4aa52286849bb36a5b (patch)
tree453ae0733cbdf1611a39d04ff50d533fbad31603
parente4e2c1882865163ad53eeaf96acf83802ffbec71 (diff)
gpu: nvgpu: move slices_per_ltc & cacheline_size init to floorsweeping
It was initialized at .init_comptags, but we may also need them without comptags. Jira NVGPUT-63 Change-Id: Ie818c3ecf890fc84323b9662a32d666a6d2b3936 Signed-off-by: Richard Zhao <rizhao@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1756373 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gm20b/ltc_gm20b.c16
-rw-r--r--drivers/gpu/nvgpu/gp10b/ltc_gp10b.c16
-rw-r--r--drivers/gpu/nvgpu/gv11b/ltc_gv11b.c6
-rw-r--r--drivers/gpu/nvgpu/vgpu/ltc_vgpu.c5
4 files changed, 22 insertions, 21 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
index 66cd49e7..a8cbca13 100644
--- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
@@ -52,10 +52,6 @@ int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
52 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); 52 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r());
53 u32 comptags_per_cacheline = 53 u32 comptags_per_cacheline =
54 ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); 54 ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param);
55 u32 cacheline_size =
56 512U << ltc_ltcs_ltss_cbc_param_cache_line_size_v(cbc_param);
57 u32 slices_per_ltc =
58 ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(cbc_param);
59 55
60 u32 compbit_backing_size; 56 u32 compbit_backing_size;
61 57
@@ -71,7 +67,7 @@ int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
71 67
72 compbit_backing_size = 68 compbit_backing_size =
73 DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) * 69 DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) *
74 cacheline_size * slices_per_ltc * g->ltc_count; 70 gr->cacheline_size * gr->slices_per_ltc * g->ltc_count;
75 71
76 /* aligned to 2KB * ltc_count */ 72 /* aligned to 2KB * ltc_count */
77 compbit_backing_size += 73 compbit_backing_size +=
@@ -82,7 +78,7 @@ int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
82 78
83 max_comptag_lines = 79 max_comptag_lines =
84 (compbit_backing_size * comptags_per_cacheline) / 80 (compbit_backing_size * comptags_per_cacheline) /
85 (cacheline_size * slices_per_ltc * g->ltc_count); 81 (gr->cacheline_size * gr->slices_per_ltc * g->ltc_count);
86 82
87 if (max_comptag_lines > hw_max_comptag_lines) 83 if (max_comptag_lines > hw_max_comptag_lines)
88 max_comptag_lines = hw_max_comptag_lines; 84 max_comptag_lines = hw_max_comptag_lines;
@@ -102,8 +98,6 @@ int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
102 98
103 gr->max_comptag_lines = max_comptag_lines; 99 gr->max_comptag_lines = max_comptag_lines;
104 gr->comptags_per_cacheline = comptags_per_cacheline; 100 gr->comptags_per_cacheline = comptags_per_cacheline;
105 gr->slices_per_ltc = slices_per_ltc;
106 gr->cacheline_size = cacheline_size;
107 101
108 return 0; 102 return 0;
109} 103}
@@ -203,6 +197,7 @@ out:
203 197
204void gm20b_ltc_init_fs_state(struct gk20a *g) 198void gm20b_ltc_init_fs_state(struct gk20a *g)
205{ 199{
200 struct gr_gk20a *gr = &g->gr;
206 u32 reg; 201 u32 reg;
207 202
208 nvgpu_log_info(g, "initialize gm20b l2"); 203 nvgpu_log_info(g, "initialize gm20b l2");
@@ -211,6 +206,11 @@ void gm20b_ltc_init_fs_state(struct gk20a *g)
211 g->ltc_count = gk20a_readl(g, pri_ringmaster_enum_ltc_r()); 206 g->ltc_count = gk20a_readl(g, pri_ringmaster_enum_ltc_r());
212 nvgpu_log_info(g, "%d ltcs out of %d", g->ltc_count, g->max_ltc_count); 207 nvgpu_log_info(g, "%d ltcs out of %d", g->ltc_count, g->max_ltc_count);
213 208
209 reg = gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r());
210 gr->slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(reg);;
211 gr->cacheline_size =
212 512U << ltc_ltcs_ltss_cbc_param_cache_line_size_v(reg);
213
214 gk20a_writel(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r(), 214 gk20a_writel(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r(),
215 g->ltc_count); 215 g->ltc_count);
216 gk20a_writel(g, ltc_ltcs_misc_ltc_num_active_ltcs_r(), 216 gk20a_writel(g, ltc_ltcs_misc_ltc_num_active_ltcs_r(),
diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index f74ca8f3..b0938f75 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -70,10 +70,6 @@ int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
70 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); 70 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r());
71 u32 comptags_per_cacheline = 71 u32 comptags_per_cacheline =
72 ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); 72 ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param);
73 u32 cacheline_size =
74 512U << ltc_ltcs_ltss_cbc_param_cache_line_size_v(cbc_param);
75 u32 slices_per_ltc =
76 ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(cbc_param);
77 u32 cbc_param2 = 73 u32 cbc_param2 =
78 gk20a_readl(g, ltc_ltcs_ltss_cbc_param2_r()); 74 gk20a_readl(g, ltc_ltcs_ltss_cbc_param2_r());
79 u32 gobs_per_comptagline_per_slice = 75 u32 gobs_per_comptagline_per_slice =
@@ -89,7 +85,7 @@ int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
89 return 0; 85 return 0;
90 86
91 /* Already initialized */ 87 /* Already initialized */
92 if (gr->cacheline_size) 88 if (gr->max_comptag_lines)
93 return 0; 89 return 0;
94 90
95 if (max_comptag_lines > hw_max_comptag_lines) 91 if (max_comptag_lines > hw_max_comptag_lines)
@@ -97,10 +93,10 @@ int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
97 93
98 compbit_backing_size = 94 compbit_backing_size =
99 roundup(max_comptag_lines * gobs_per_comptagline_per_slice, 95 roundup(max_comptag_lines * gobs_per_comptagline_per_slice,
100 cacheline_size); 96 gr->cacheline_size);
101 compbit_backing_size = 97 compbit_backing_size = roundup(
102 roundup(compbit_backing_size * slices_per_ltc * g->ltc_count, 98 compbit_backing_size * gr->slices_per_ltc * g->ltc_count,
103 g->ops.fb.compressible_page_size(g)); 99 g->ops.fb.compressible_page_size(g));
104 100
105 /* aligned to 2KB * ltc_count */ 101 /* aligned to 2KB * ltc_count */
106 compbit_backing_size += 102 compbit_backing_size +=
@@ -126,8 +122,6 @@ int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
126 122
127 gr->max_comptag_lines = max_comptag_lines; 123 gr->max_comptag_lines = max_comptag_lines;
128 gr->comptags_per_cacheline = comptags_per_cacheline; 124 gr->comptags_per_cacheline = comptags_per_cacheline;
129 gr->slices_per_ltc = slices_per_ltc;
130 gr->cacheline_size = cacheline_size;
131 gr->gobs_per_comptagline_per_slice = gobs_per_comptagline_per_slice; 125 gr->gobs_per_comptagline_per_slice = gobs_per_comptagline_per_slice;
132 126
133 return 0; 127 return 0;
diff --git a/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c b/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c
index 96844ebd..b64faaa6 100644
--- a/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c
@@ -53,6 +53,7 @@ void gv11b_ltc_set_zbc_stencil_entry(struct gk20a *g,
53 53
54void gv11b_ltc_init_fs_state(struct gk20a *g) 54void gv11b_ltc_init_fs_state(struct gk20a *g)
55{ 55{
56 struct gr_gk20a *gr = &g->gr;
56 u32 ltc_intr; 57 u32 ltc_intr;
57 u32 reg; 58 u32 reg;
58 59
@@ -62,6 +63,11 @@ void gv11b_ltc_init_fs_state(struct gk20a *g)
62 g->ltc_count = gk20a_readl(g, pri_ringmaster_enum_ltc_r()); 63 g->ltc_count = gk20a_readl(g, pri_ringmaster_enum_ltc_r());
63 nvgpu_log_info(g, "%u ltcs out of %u", g->ltc_count, g->max_ltc_count); 64 nvgpu_log_info(g, "%u ltcs out of %u", g->ltc_count, g->max_ltc_count);
64 65
66 reg = gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r());
67 gr->slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(reg);;
68 gr->cacheline_size =
69 512U << ltc_ltcs_ltss_cbc_param_cache_line_size_v(reg);
70
65 /* Disable LTC interrupts */ 71 /* Disable LTC interrupts */
66 reg = gk20a_readl(g, ltc_ltcs_ltss_intr_r()); 72 reg = gk20a_readl(g, ltc_ltcs_ltss_intr_r());
67 reg &= ~ltc_ltcs_ltss_intr_en_evicted_cb_m(); 73 reg &= ~ltc_ltcs_ltss_intr_en_evicted_cb_m();
diff --git a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c
index f68c8454..873ddd24 100644
--- a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c
@@ -44,9 +44,7 @@ int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
44 44
45 nvgpu_log_fn(g, " "); 45 nvgpu_log_fn(g, " ");
46 46
47 gr->cacheline_size = priv->constants.cacheline_size;
48 gr->comptags_per_cacheline = priv->constants.comptags_per_cacheline; 47 gr->comptags_per_cacheline = priv->constants.comptags_per_cacheline;
49 gr->slices_per_ltc = priv->constants.slices_per_ltc;
50 max_comptag_lines = priv->constants.comptag_lines; 48 max_comptag_lines = priv->constants.comptag_lines;
51 49
52 if (max_comptag_lines < 2) 50 if (max_comptag_lines < 2)
@@ -64,8 +62,11 @@ int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
64void vgpu_ltc_init_fs_state(struct gk20a *g) 62void vgpu_ltc_init_fs_state(struct gk20a *g)
65{ 63{
66 struct vgpu_priv_data *priv = vgpu_get_priv_data(g); 64 struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
65 struct gr_gk20a *gr = &g->gr;
67 66
68 nvgpu_log_fn(g, " "); 67 nvgpu_log_fn(g, " ");
69 68
70 g->ltc_count = priv->constants.ltc_count; 69 g->ltc_count = priv->constants.ltc_count;
70 gr->cacheline_size = priv->constants.cacheline_size;
71 gr->slices_per_ltc = priv->constants.slices_per_ltc;
71} 72}