diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2018-08-13 15:58:18 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-08-16 13:14:40 -0400 |
commit | 974d541623929fa2622d27d5d338a5b63596794b (patch) | |
tree | f47a540bf07efd7f6cda68f49d3675c2462d731a /drivers/gpu/nvgpu/gp10b/ltc_gp10b.c | |
parent | 1e7f229e5d92078f772d4f81893b23504cd847a8 (diff) |
gpu: nvgpu: Move ltc HAL to common
Move implementation of ltc HAL to common/ltc.
JIRA NVGPU-956
Change-Id: Id78d74e8612d7dacfb8d322d491abecd798e42b5
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1798461
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gp10b/ltc_gp10b.c')
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/ltc_gp10b.c | 320 |
1 files changed, 0 insertions, 320 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c deleted file mode 100644 index 79ebe86d..00000000 --- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c +++ /dev/null | |||
@@ -1,320 +0,0 @@ | |||
1 | /* | ||
2 | * GP10B L2 | ||
3 | * | ||
4 | * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <trace/events/gk20a.h> | ||
26 | |||
27 | #include <nvgpu/ltc.h> | ||
28 | #include <nvgpu/log.h> | ||
29 | #include <nvgpu/enabled.h> | ||
30 | #include <nvgpu/io.h> | ||
31 | |||
32 | #include <nvgpu/hw/gp10b/hw_mc_gp10b.h> | ||
33 | #include <nvgpu/hw/gp10b/hw_ltc_gp10b.h> | ||
34 | |||
35 | #include "gk20a/gk20a.h" | ||
36 | #include "gm20b/ltc_gm20b.h" | ||
37 | |||
38 | #include "ltc_gp10b.h" | ||
39 | |||
40 | int gp10b_determine_L2_size_bytes(struct gk20a *g) | ||
41 | { | ||
42 | u32 tmp; | ||
43 | int ret; | ||
44 | |||
45 | nvgpu_log_fn(g, " "); | ||
46 | |||
47 | tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_info_1_r()); | ||
48 | |||
49 | ret = g->ltc_count * | ||
50 | ltc_ltc0_lts0_tstg_info_1_slice_size_in_kb_v(tmp)*1024 * | ||
51 | ltc_ltc0_lts0_tstg_info_1_slices_per_l2_v(tmp); | ||
52 | |||
53 | nvgpu_log(g, gpu_dbg_info, "L2 size: %d\n", ret); | ||
54 | |||
55 | nvgpu_log_fn(g, "done"); | ||
56 | |||
57 | return ret; | ||
58 | } | ||
59 | |||
60 | int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) | ||
61 | { | ||
62 | /* max memory size (MB) to cover */ | ||
63 | u32 max_size = gr->max_comptag_mem; | ||
64 | /* one tag line covers 64KB */ | ||
65 | u32 max_comptag_lines = max_size << 4U; | ||
66 | |||
67 | u32 hw_max_comptag_lines = | ||
68 | ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(); | ||
69 | |||
70 | u32 cbc_param = | ||
71 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); | ||
72 | u32 comptags_per_cacheline = | ||
73 | ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); | ||
74 | u32 cbc_param2 = | ||
75 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param2_r()); | ||
76 | u32 gobs_per_comptagline_per_slice = | ||
77 | ltc_ltcs_ltss_cbc_param2_gobs_per_comptagline_per_slice_v(cbc_param2); | ||
78 | |||
79 | u32 compbit_backing_size; | ||
80 | |||
81 | int err; | ||
82 | |||
83 | nvgpu_log_fn(g, " "); | ||
84 | |||
85 | if (max_comptag_lines == 0U) | ||
86 | return 0; | ||
87 | |||
88 | /* Already initialized */ | ||
89 | if (gr->max_comptag_lines) | ||
90 | return 0; | ||
91 | |||
92 | if (max_comptag_lines > hw_max_comptag_lines) | ||
93 | max_comptag_lines = hw_max_comptag_lines; | ||
94 | |||
95 | compbit_backing_size = | ||
96 | roundup(max_comptag_lines * gobs_per_comptagline_per_slice, | ||
97 | gr->cacheline_size); | ||
98 | compbit_backing_size = roundup( | ||
99 | compbit_backing_size * gr->slices_per_ltc * g->ltc_count, | ||
100 | g->ops.fb.compressible_page_size(g)); | ||
101 | |||
102 | /* aligned to 2KB * ltc_count */ | ||
103 | compbit_backing_size += | ||
104 | g->ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); | ||
105 | |||
106 | /* must be a multiple of 64KB */ | ||
107 | compbit_backing_size = roundup(compbit_backing_size, 64*1024); | ||
108 | |||
109 | nvgpu_log_info(g, "compbit backing store size : %d", | ||
110 | compbit_backing_size); | ||
111 | nvgpu_log_info(g, "max comptag lines : %d", | ||
112 | max_comptag_lines); | ||
113 | nvgpu_log_info(g, "gobs_per_comptagline_per_slice: %d", | ||
114 | gobs_per_comptagline_per_slice); | ||
115 | |||
116 | err = nvgpu_ltc_alloc_cbc(g, compbit_backing_size); | ||
117 | if (err) | ||
118 | return err; | ||
119 | |||
120 | err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); | ||
121 | if (err) | ||
122 | return err; | ||
123 | |||
124 | gr->max_comptag_lines = max_comptag_lines; | ||
125 | gr->comptags_per_cacheline = comptags_per_cacheline; | ||
126 | gr->gobs_per_comptagline_per_slice = gobs_per_comptagline_per_slice; | ||
127 | |||
128 | return 0; | ||
129 | } | ||
130 | |||
131 | int gp10b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | ||
132 | u32 min, u32 max) | ||
133 | { | ||
134 | struct gr_gk20a *gr = &g->gr; | ||
135 | struct nvgpu_timeout timeout; | ||
136 | int err = 0; | ||
137 | u32 ltc, slice, ctrl1, val, hw_op = 0U; | ||
138 | u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( | ||
139 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); | ||
140 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
141 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
142 | const u32 max_lines = 16384U; | ||
143 | |||
144 | nvgpu_log_fn(g, " "); | ||
145 | |||
146 | trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); | ||
147 | |||
148 | if (gr->compbit_store.mem.size == 0U) | ||
149 | return 0; | ||
150 | |||
151 | while (1) { | ||
152 | const u32 iter_max = min(min + max_lines - 1, max); | ||
153 | bool full_cache_op = true; | ||
154 | |||
155 | nvgpu_mutex_acquire(&g->mm.l2_op_lock); | ||
156 | |||
157 | nvgpu_log_info(g, "clearing CBC lines %u..%u", min, iter_max); | ||
158 | |||
159 | if (op == gk20a_cbc_op_clear) { | ||
160 | nvgpu_writel_check( | ||
161 | g, ltc_ltcs_ltss_cbc_ctrl2_r(), | ||
162 | ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f( | ||
163 | min)); | ||
164 | |||
165 | nvgpu_writel_check( | ||
166 | g, ltc_ltcs_ltss_cbc_ctrl3_r(), | ||
167 | ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f( | ||
168 | iter_max)); | ||
169 | |||
170 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); | ||
171 | full_cache_op = false; | ||
172 | } else if (op == gk20a_cbc_op_clean) { | ||
173 | /* this is full-cache op */ | ||
174 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); | ||
175 | } else if (op == gk20a_cbc_op_invalidate) { | ||
176 | /* this is full-cache op */ | ||
177 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); | ||
178 | } else { | ||
179 | nvgpu_err(g, "Unknown op: %u", (unsigned)op); | ||
180 | err = -EINVAL; | ||
181 | goto out; | ||
182 | } | ||
183 | gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), | ||
184 | gk20a_readl(g, | ||
185 | ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); | ||
186 | |||
187 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | ||
188 | for (slice = 0; slice < slices_per_ltc; slice++) { | ||
189 | |||
190 | ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + | ||
191 | ltc * ltc_stride + slice * lts_stride; | ||
192 | |||
193 | nvgpu_timeout_init(g, &timeout, 2000, | ||
194 | NVGPU_TIMER_RETRY_TIMER); | ||
195 | do { | ||
196 | val = gk20a_readl(g, ctrl1); | ||
197 | if (!(val & hw_op)) | ||
198 | break; | ||
199 | nvgpu_udelay(5); | ||
200 | } while (!nvgpu_timeout_expired(&timeout)); | ||
201 | |||
202 | if (nvgpu_timeout_peek_expired(&timeout)) { | ||
203 | nvgpu_err(g, "comp tag clear timeout"); | ||
204 | err = -EBUSY; | ||
205 | goto out; | ||
206 | } | ||
207 | } | ||
208 | } | ||
209 | |||
210 | /* are we done? */ | ||
211 | if (full_cache_op || iter_max == max) | ||
212 | break; | ||
213 | |||
214 | /* note: iter_max is inclusive upper bound */ | ||
215 | min = iter_max + 1; | ||
216 | |||
217 | /* give a chance for higher-priority threads to progress */ | ||
218 | nvgpu_mutex_release(&g->mm.l2_op_lock); | ||
219 | } | ||
220 | out: | ||
221 | trace_gk20a_ltc_cbc_ctrl_done(g->name); | ||
222 | nvgpu_mutex_release(&g->mm.l2_op_lock); | ||
223 | return err; | ||
224 | } | ||
225 | |||
226 | void gp10b_ltc_isr(struct gk20a *g) | ||
227 | { | ||
228 | u32 mc_intr, ltc_intr; | ||
229 | unsigned int ltc, slice; | ||
230 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
231 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
232 | |||
233 | mc_intr = gk20a_readl(g, mc_intr_ltc_r()); | ||
234 | nvgpu_err(g, "mc_ltc_intr: %08x", mc_intr); | ||
235 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | ||
236 | if ((mc_intr & 1U << ltc) == 0) | ||
237 | continue; | ||
238 | for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { | ||
239 | u32 offset = ltc_stride * ltc + lts_stride * slice; | ||
240 | ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() + offset); | ||
241 | |||
242 | /* Detect and handle ECC errors */ | ||
243 | if (ltc_intr & | ||
244 | ltc_ltcs_ltss_intr_ecc_sec_error_pending_f()) { | ||
245 | u32 ecc_stats_reg_val; | ||
246 | |||
247 | nvgpu_err(g, | ||
248 | "Single bit error detected in GPU L2!"); | ||
249 | |||
250 | ecc_stats_reg_val = | ||
251 | gk20a_readl(g, | ||
252 | ltc_ltc0_lts0_dstg_ecc_report_r() + offset); | ||
253 | g->ecc.ltc.ecc_sec_count[ltc][slice].counter += | ||
254 | ltc_ltc0_lts0_dstg_ecc_report_sec_count_v(ecc_stats_reg_val); | ||
255 | ecc_stats_reg_val &= | ||
256 | ~(ltc_ltc0_lts0_dstg_ecc_report_sec_count_m()); | ||
257 | nvgpu_writel_check(g, | ||
258 | ltc_ltc0_lts0_dstg_ecc_report_r() + offset, | ||
259 | ecc_stats_reg_val); | ||
260 | g->ops.mm.l2_flush(g, true); | ||
261 | } | ||
262 | if (ltc_intr & | ||
263 | ltc_ltcs_ltss_intr_ecc_ded_error_pending_f()) { | ||
264 | u32 ecc_stats_reg_val; | ||
265 | |||
266 | nvgpu_err(g, | ||
267 | "Double bit error detected in GPU L2!"); | ||
268 | |||
269 | ecc_stats_reg_val = | ||
270 | gk20a_readl(g, | ||
271 | ltc_ltc0_lts0_dstg_ecc_report_r() + offset); | ||
272 | g->ecc.ltc.ecc_ded_count[ltc][slice].counter += | ||
273 | ltc_ltc0_lts0_dstg_ecc_report_ded_count_v(ecc_stats_reg_val); | ||
274 | ecc_stats_reg_val &= | ||
275 | ~(ltc_ltc0_lts0_dstg_ecc_report_ded_count_m()); | ||
276 | nvgpu_writel_check(g, | ||
277 | ltc_ltc0_lts0_dstg_ecc_report_r() + offset, | ||
278 | ecc_stats_reg_val); | ||
279 | } | ||
280 | |||
281 | nvgpu_err(g, "ltc%d, slice %d: %08x", | ||
282 | ltc, slice, ltc_intr); | ||
283 | nvgpu_writel_check(g, ltc_ltc0_lts0_intr_r() + | ||
284 | ltc_stride * ltc + lts_stride * slice, | ||
285 | ltc_intr); | ||
286 | } | ||
287 | } | ||
288 | } | ||
289 | |||
290 | void gp10b_ltc_init_fs_state(struct gk20a *g) | ||
291 | { | ||
292 | u32 ltc_intr; | ||
293 | |||
294 | gm20b_ltc_init_fs_state(g); | ||
295 | |||
296 | gk20a_writel(g, ltc_ltca_g_axi_pctrl_r(), | ||
297 | ltc_ltca_g_axi_pctrl_user_sid_f(g->ltc_streamid)); | ||
298 | |||
299 | /* Enable ECC interrupts */ | ||
300 | ltc_intr = gk20a_readl(g, ltc_ltcs_ltss_intr_r()); | ||
301 | ltc_intr |= ltc_ltcs_ltss_intr_en_ecc_sec_error_enabled_f() | | ||
302 | ltc_ltcs_ltss_intr_en_ecc_ded_error_enabled_f(); | ||
303 | gk20a_writel(g, ltc_ltcs_ltss_intr_r(), | ||
304 | ltc_intr); | ||
305 | } | ||
306 | |||
307 | void gp10b_ltc_set_enabled(struct gk20a *g, bool enabled) | ||
308 | { | ||
309 | u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f(); | ||
310 | u32 reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r()); | ||
311 | |||
312 | if (enabled) | ||
313 | /* bypass disabled (normal caching ops)*/ | ||
314 | reg &= ~reg_f; | ||
315 | else | ||
316 | /* bypass enabled (no caching) */ | ||
317 | reg |= reg_f; | ||
318 | |||
319 | nvgpu_writel_check(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r(), reg); | ||
320 | } | ||