diff options
author | Richard Zhao <rizhao@nvidia.com> | 2018-06-20 20:27:02 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-06-21 16:58:07 -0400 |
commit | c3b5b48c0fbecfb874d0fa4aa52286849bb36a5b (patch) | |
tree | 453ae0733cbdf1611a39d04ff50d533fbad31603 /drivers | |
parent | e4e2c1882865163ad53eeaf96acf83802ffbec71 (diff) |
gpu: nvgpu: move slices_per_ltc & cacheline_size init to floorsweeping
It was initialized at .init_comptags, but we may also need them without
comptags.
Jira NVGPUT-63
Change-Id: Ie818c3ecf890fc84323b9662a32d666a6d2b3936
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1756373
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | 16 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/ltc_gp10b.c | 16 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/ltc_gv11b.c | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/ltc_vgpu.c | 5 |
4 files changed, 22 insertions, 21 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c index 66cd49e7..a8cbca13 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | |||
@@ -52,10 +52,6 @@ int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) | |||
52 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); | 52 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); |
53 | u32 comptags_per_cacheline = | 53 | u32 comptags_per_cacheline = |
54 | ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); | 54 | ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); |
55 | u32 cacheline_size = | ||
56 | 512U << ltc_ltcs_ltss_cbc_param_cache_line_size_v(cbc_param); | ||
57 | u32 slices_per_ltc = | ||
58 | ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(cbc_param); | ||
59 | 55 | ||
60 | u32 compbit_backing_size; | 56 | u32 compbit_backing_size; |
61 | 57 | ||
@@ -71,7 +67,7 @@ int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) | |||
71 | 67 | ||
72 | compbit_backing_size = | 68 | compbit_backing_size = |
73 | DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) * | 69 | DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) * |
74 | cacheline_size * slices_per_ltc * g->ltc_count; | 70 | gr->cacheline_size * gr->slices_per_ltc * g->ltc_count; |
75 | 71 | ||
76 | /* aligned to 2KB * ltc_count */ | 72 | /* aligned to 2KB * ltc_count */ |
77 | compbit_backing_size += | 73 | compbit_backing_size += |
@@ -82,7 +78,7 @@ int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) | |||
82 | 78 | ||
83 | max_comptag_lines = | 79 | max_comptag_lines = |
84 | (compbit_backing_size * comptags_per_cacheline) / | 80 | (compbit_backing_size * comptags_per_cacheline) / |
85 | (cacheline_size * slices_per_ltc * g->ltc_count); | 81 | (gr->cacheline_size * gr->slices_per_ltc * g->ltc_count); |
86 | 82 | ||
87 | if (max_comptag_lines > hw_max_comptag_lines) | 83 | if (max_comptag_lines > hw_max_comptag_lines) |
88 | max_comptag_lines = hw_max_comptag_lines; | 84 | max_comptag_lines = hw_max_comptag_lines; |
@@ -102,8 +98,6 @@ int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) | |||
102 | 98 | ||
103 | gr->max_comptag_lines = max_comptag_lines; | 99 | gr->max_comptag_lines = max_comptag_lines; |
104 | gr->comptags_per_cacheline = comptags_per_cacheline; | 100 | gr->comptags_per_cacheline = comptags_per_cacheline; |
105 | gr->slices_per_ltc = slices_per_ltc; | ||
106 | gr->cacheline_size = cacheline_size; | ||
107 | 101 | ||
108 | return 0; | 102 | return 0; |
109 | } | 103 | } |
@@ -203,6 +197,7 @@ out: | |||
203 | 197 | ||
204 | void gm20b_ltc_init_fs_state(struct gk20a *g) | 198 | void gm20b_ltc_init_fs_state(struct gk20a *g) |
205 | { | 199 | { |
200 | struct gr_gk20a *gr = &g->gr; | ||
206 | u32 reg; | 201 | u32 reg; |
207 | 202 | ||
208 | nvgpu_log_info(g, "initialize gm20b l2"); | 203 | nvgpu_log_info(g, "initialize gm20b l2"); |
@@ -211,6 +206,11 @@ void gm20b_ltc_init_fs_state(struct gk20a *g) | |||
211 | g->ltc_count = gk20a_readl(g, pri_ringmaster_enum_ltc_r()); | 206 | g->ltc_count = gk20a_readl(g, pri_ringmaster_enum_ltc_r()); |
212 | nvgpu_log_info(g, "%d ltcs out of %d", g->ltc_count, g->max_ltc_count); | 207 | nvgpu_log_info(g, "%d ltcs out of %d", g->ltc_count, g->max_ltc_count); |
213 | 208 | ||
209 | reg = gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); | ||
210 | gr->slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(reg);; | ||
211 | gr->cacheline_size = | ||
212 | 512U << ltc_ltcs_ltss_cbc_param_cache_line_size_v(reg); | ||
213 | |||
214 | gk20a_writel(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r(), | 214 | gk20a_writel(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r(), |
215 | g->ltc_count); | 215 | g->ltc_count); |
216 | gk20a_writel(g, ltc_ltcs_misc_ltc_num_active_ltcs_r(), | 216 | gk20a_writel(g, ltc_ltcs_misc_ltc_num_active_ltcs_r(), |
diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c index f74ca8f3..b0938f75 100644 --- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c | |||
@@ -70,10 +70,6 @@ int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) | |||
70 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); | 70 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); |
71 | u32 comptags_per_cacheline = | 71 | u32 comptags_per_cacheline = |
72 | ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); | 72 | ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); |
73 | u32 cacheline_size = | ||
74 | 512U << ltc_ltcs_ltss_cbc_param_cache_line_size_v(cbc_param); | ||
75 | u32 slices_per_ltc = | ||
76 | ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(cbc_param); | ||
77 | u32 cbc_param2 = | 73 | u32 cbc_param2 = |
78 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param2_r()); | 74 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param2_r()); |
79 | u32 gobs_per_comptagline_per_slice = | 75 | u32 gobs_per_comptagline_per_slice = |
@@ -89,7 +85,7 @@ int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) | |||
89 | return 0; | 85 | return 0; |
90 | 86 | ||
91 | /* Already initialized */ | 87 | /* Already initialized */ |
92 | if (gr->cacheline_size) | 88 | if (gr->max_comptag_lines) |
93 | return 0; | 89 | return 0; |
94 | 90 | ||
95 | if (max_comptag_lines > hw_max_comptag_lines) | 91 | if (max_comptag_lines > hw_max_comptag_lines) |
@@ -97,10 +93,10 @@ int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) | |||
97 | 93 | ||
98 | compbit_backing_size = | 94 | compbit_backing_size = |
99 | roundup(max_comptag_lines * gobs_per_comptagline_per_slice, | 95 | roundup(max_comptag_lines * gobs_per_comptagline_per_slice, |
100 | cacheline_size); | 96 | gr->cacheline_size); |
101 | compbit_backing_size = | 97 | compbit_backing_size = roundup( |
102 | roundup(compbit_backing_size * slices_per_ltc * g->ltc_count, | 98 | compbit_backing_size * gr->slices_per_ltc * g->ltc_count, |
103 | g->ops.fb.compressible_page_size(g)); | 99 | g->ops.fb.compressible_page_size(g)); |
104 | 100 | ||
105 | /* aligned to 2KB * ltc_count */ | 101 | /* aligned to 2KB * ltc_count */ |
106 | compbit_backing_size += | 102 | compbit_backing_size += |
@@ -126,8 +122,6 @@ int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) | |||
126 | 122 | ||
127 | gr->max_comptag_lines = max_comptag_lines; | 123 | gr->max_comptag_lines = max_comptag_lines; |
128 | gr->comptags_per_cacheline = comptags_per_cacheline; | 124 | gr->comptags_per_cacheline = comptags_per_cacheline; |
129 | gr->slices_per_ltc = slices_per_ltc; | ||
130 | gr->cacheline_size = cacheline_size; | ||
131 | gr->gobs_per_comptagline_per_slice = gobs_per_comptagline_per_slice; | 125 | gr->gobs_per_comptagline_per_slice = gobs_per_comptagline_per_slice; |
132 | 126 | ||
133 | return 0; | 127 | return 0; |
diff --git a/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c b/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c index 96844ebd..b64faaa6 100644 --- a/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/ltc_gv11b.c | |||
@@ -53,6 +53,7 @@ void gv11b_ltc_set_zbc_stencil_entry(struct gk20a *g, | |||
53 | 53 | ||
54 | void gv11b_ltc_init_fs_state(struct gk20a *g) | 54 | void gv11b_ltc_init_fs_state(struct gk20a *g) |
55 | { | 55 | { |
56 | struct gr_gk20a *gr = &g->gr; | ||
56 | u32 ltc_intr; | 57 | u32 ltc_intr; |
57 | u32 reg; | 58 | u32 reg; |
58 | 59 | ||
@@ -62,6 +63,11 @@ void gv11b_ltc_init_fs_state(struct gk20a *g) | |||
62 | g->ltc_count = gk20a_readl(g, pri_ringmaster_enum_ltc_r()); | 63 | g->ltc_count = gk20a_readl(g, pri_ringmaster_enum_ltc_r()); |
63 | nvgpu_log_info(g, "%u ltcs out of %u", g->ltc_count, g->max_ltc_count); | 64 | nvgpu_log_info(g, "%u ltcs out of %u", g->ltc_count, g->max_ltc_count); |
64 | 65 | ||
66 | reg = gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); | ||
67 | gr->slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(reg);; | ||
68 | gr->cacheline_size = | ||
69 | 512U << ltc_ltcs_ltss_cbc_param_cache_line_size_v(reg); | ||
70 | |||
65 | /* Disable LTC interrupts */ | 71 | /* Disable LTC interrupts */ |
66 | reg = gk20a_readl(g, ltc_ltcs_ltss_intr_r()); | 72 | reg = gk20a_readl(g, ltc_ltcs_ltss_intr_r()); |
67 | reg &= ~ltc_ltcs_ltss_intr_en_evicted_cb_m(); | 73 | reg &= ~ltc_ltcs_ltss_intr_en_evicted_cb_m(); |
diff --git a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c index f68c8454..873ddd24 100644 --- a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c | |||
@@ -44,9 +44,7 @@ int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) | |||
44 | 44 | ||
45 | nvgpu_log_fn(g, " "); | 45 | nvgpu_log_fn(g, " "); |
46 | 46 | ||
47 | gr->cacheline_size = priv->constants.cacheline_size; | ||
48 | gr->comptags_per_cacheline = priv->constants.comptags_per_cacheline; | 47 | gr->comptags_per_cacheline = priv->constants.comptags_per_cacheline; |
49 | gr->slices_per_ltc = priv->constants.slices_per_ltc; | ||
50 | max_comptag_lines = priv->constants.comptag_lines; | 48 | max_comptag_lines = priv->constants.comptag_lines; |
51 | 49 | ||
52 | if (max_comptag_lines < 2) | 50 | if (max_comptag_lines < 2) |
@@ -64,8 +62,11 @@ int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) | |||
64 | void vgpu_ltc_init_fs_state(struct gk20a *g) | 62 | void vgpu_ltc_init_fs_state(struct gk20a *g) |
65 | { | 63 | { |
66 | struct vgpu_priv_data *priv = vgpu_get_priv_data(g); | 64 | struct vgpu_priv_data *priv = vgpu_get_priv_data(g); |
65 | struct gr_gk20a *gr = &g->gr; | ||
67 | 66 | ||
68 | nvgpu_log_fn(g, " "); | 67 | nvgpu_log_fn(g, " "); |
69 | 68 | ||
70 | g->ltc_count = priv->constants.ltc_count; | 69 | g->ltc_count = priv->constants.ltc_count; |
70 | gr->cacheline_size = priv->constants.cacheline_size; | ||
71 | gr->slices_per_ltc = priv->constants.slices_per_ltc; | ||
71 | } | 72 | } |