diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2018-08-13 15:58:18 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-08-16 13:14:40 -0400 |
commit | 974d541623929fa2622d27d5d338a5b63596794b (patch) | |
tree | f47a540bf07efd7f6cda68f49d3675c2462d731a /drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | |
parent | 1e7f229e5d92078f772d4f81893b23504cd847a8 (diff) |
gpu: nvgpu: Move ltc HAL to common
Move implementation of ltc HAL to common/ltc.
JIRA NVGPU-956
Change-Id: Id78d74e8612d7dacfb8d322d491abecd798e42b5
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1798461
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b/ltc_gm20b.c')
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | 489 |
1 files changed, 0 insertions, 489 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c deleted file mode 100644 index 65945fad..00000000 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c +++ /dev/null | |||
@@ -1,489 +0,0 @@ | |||
1 | /* | ||
2 | * GM20B L2 | ||
3 | * | ||
4 | * Copyright (c) 2014-2018 NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <trace/events/gk20a.h> | ||
26 | |||
27 | #include <nvgpu/timers.h> | ||
28 | #include <nvgpu/enabled.h> | ||
29 | #include <nvgpu/bug.h> | ||
30 | #include <nvgpu/ltc.h> | ||
31 | #include <nvgpu/io.h> | ||
32 | #include <nvgpu/utils.h> | ||
33 | |||
34 | #include <nvgpu/hw/gm20b/hw_mc_gm20b.h> | ||
35 | #include <nvgpu/hw/gm20b/hw_ltc_gm20b.h> | ||
36 | #include <nvgpu/hw/gm20b/hw_top_gm20b.h> | ||
37 | #include <nvgpu/hw/gm20b/hw_pri_ringmaster_gm20b.h> | ||
38 | |||
39 | #include "gk20a/gk20a.h" | ||
40 | |||
41 | #include "ltc_gm20b.h" | ||
42 | |||
43 | int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) | ||
44 | { | ||
45 | /* max memory size (MB) to cover */ | ||
46 | u32 max_size = gr->max_comptag_mem; | ||
47 | /* one tag line covers 128KB */ | ||
48 | u32 max_comptag_lines = max_size << 3U; | ||
49 | |||
50 | u32 hw_max_comptag_lines = | ||
51 | ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(); | ||
52 | |||
53 | u32 cbc_param = | ||
54 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); | ||
55 | u32 comptags_per_cacheline = | ||
56 | ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); | ||
57 | |||
58 | u32 compbit_backing_size; | ||
59 | |||
60 | int err; | ||
61 | |||
62 | nvgpu_log_fn(g, " "); | ||
63 | |||
64 | if (max_comptag_lines == 0U) | ||
65 | return 0; | ||
66 | |||
67 | if (max_comptag_lines > hw_max_comptag_lines) | ||
68 | max_comptag_lines = hw_max_comptag_lines; | ||
69 | |||
70 | compbit_backing_size = | ||
71 | DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) * | ||
72 | gr->cacheline_size * gr->slices_per_ltc * g->ltc_count; | ||
73 | |||
74 | /* aligned to 2KB * ltc_count */ | ||
75 | compbit_backing_size += | ||
76 | g->ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); | ||
77 | |||
78 | /* must be a multiple of 64KB */ | ||
79 | compbit_backing_size = roundup(compbit_backing_size, 64*1024); | ||
80 | |||
81 | max_comptag_lines = | ||
82 | (compbit_backing_size * comptags_per_cacheline) / | ||
83 | (gr->cacheline_size * gr->slices_per_ltc * g->ltc_count); | ||
84 | |||
85 | if (max_comptag_lines > hw_max_comptag_lines) | ||
86 | max_comptag_lines = hw_max_comptag_lines; | ||
87 | |||
88 | nvgpu_log_info(g, "compbit backing store size : %d", | ||
89 | compbit_backing_size); | ||
90 | nvgpu_log_info(g, "max comptag lines : %d", | ||
91 | max_comptag_lines); | ||
92 | |||
93 | err = nvgpu_ltc_alloc_cbc(g, compbit_backing_size); | ||
94 | if (err) | ||
95 | return err; | ||
96 | |||
97 | err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); | ||
98 | if (err) | ||
99 | return err; | ||
100 | |||
101 | gr->max_comptag_lines = max_comptag_lines; | ||
102 | gr->comptags_per_cacheline = comptags_per_cacheline; | ||
103 | |||
104 | return 0; | ||
105 | } | ||
106 | |||
107 | int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | ||
108 | u32 min, u32 max) | ||
109 | { | ||
110 | struct gr_gk20a *gr = &g->gr; | ||
111 | struct nvgpu_timeout timeout; | ||
112 | int err = 0; | ||
113 | u32 ltc, slice, ctrl1, val, hw_op = 0U; | ||
114 | u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( | ||
115 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); | ||
116 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
117 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
118 | const u32 max_lines = 16384U; | ||
119 | |||
120 | nvgpu_log_fn(g, " "); | ||
121 | |||
122 | trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); | ||
123 | |||
124 | if (gr->compbit_store.mem.size == 0) | ||
125 | return 0; | ||
126 | |||
127 | while (1) { | ||
128 | const u32 iter_max = min(min + max_lines - 1, max); | ||
129 | bool full_cache_op = true; | ||
130 | |||
131 | nvgpu_mutex_acquire(&g->mm.l2_op_lock); | ||
132 | |||
133 | nvgpu_log_info(g, "clearing CBC lines %u..%u", min, iter_max); | ||
134 | |||
135 | if (op == gk20a_cbc_op_clear) { | ||
136 | gk20a_writel( | ||
137 | g, ltc_ltcs_ltss_cbc_ctrl2_r(), | ||
138 | ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f( | ||
139 | min)); | ||
140 | gk20a_writel( | ||
141 | g, ltc_ltcs_ltss_cbc_ctrl3_r(), | ||
142 | ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f( | ||
143 | iter_max)); | ||
144 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); | ||
145 | full_cache_op = false; | ||
146 | } else if (op == gk20a_cbc_op_clean) { | ||
147 | /* this is full-cache op */ | ||
148 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); | ||
149 | } else if (op == gk20a_cbc_op_invalidate) { | ||
150 | /* this is full-cache op */ | ||
151 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); | ||
152 | } else { | ||
153 | nvgpu_err(g, "Unknown op: %u", (unsigned)op); | ||
154 | err = -EINVAL; | ||
155 | goto out; | ||
156 | } | ||
157 | gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), | ||
158 | gk20a_readl(g, | ||
159 | ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); | ||
160 | |||
161 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | ||
162 | for (slice = 0; slice < slices_per_ltc; slice++) { | ||
163 | |||
164 | ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + | ||
165 | ltc * ltc_stride + slice * lts_stride; | ||
166 | |||
167 | nvgpu_timeout_init(g, &timeout, 2000, | ||
168 | NVGPU_TIMER_RETRY_TIMER); | ||
169 | do { | ||
170 | val = gk20a_readl(g, ctrl1); | ||
171 | if (!(val & hw_op)) | ||
172 | break; | ||
173 | nvgpu_udelay(5); | ||
174 | } while (!nvgpu_timeout_expired(&timeout)); | ||
175 | |||
176 | if (nvgpu_timeout_peek_expired(&timeout)) { | ||
177 | nvgpu_err(g, "comp tag clear timeout"); | ||
178 | err = -EBUSY; | ||
179 | goto out; | ||
180 | } | ||
181 | } | ||
182 | } | ||
183 | |||
184 | /* are we done? */ | ||
185 | if (full_cache_op || iter_max == max) | ||
186 | break; | ||
187 | |||
188 | /* note: iter_max is inclusive upper bound */ | ||
189 | min = iter_max + 1; | ||
190 | |||
191 | /* give a chance for higher-priority threads to progress */ | ||
192 | nvgpu_mutex_release(&g->mm.l2_op_lock); | ||
193 | } | ||
194 | out: | ||
195 | trace_gk20a_ltc_cbc_ctrl_done(g->name); | ||
196 | nvgpu_mutex_release(&g->mm.l2_op_lock); | ||
197 | return err; | ||
198 | } | ||
199 | |||
200 | void gm20b_ltc_init_fs_state(struct gk20a *g) | ||
201 | { | ||
202 | struct gr_gk20a *gr = &g->gr; | ||
203 | u32 reg; | ||
204 | |||
205 | nvgpu_log_info(g, "initialize gm20b l2"); | ||
206 | |||
207 | g->max_ltc_count = gk20a_readl(g, top_num_ltcs_r()); | ||
208 | g->ltc_count = gk20a_readl(g, pri_ringmaster_enum_ltc_r()); | ||
209 | nvgpu_log_info(g, "%d ltcs out of %d", g->ltc_count, g->max_ltc_count); | ||
210 | |||
211 | reg = gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); | ||
212 | gr->slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(reg);; | ||
213 | gr->cacheline_size = | ||
214 | 512U << ltc_ltcs_ltss_cbc_param_cache_line_size_v(reg); | ||
215 | |||
216 | gk20a_writel(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r(), | ||
217 | g->ltc_count); | ||
218 | gk20a_writel(g, ltc_ltcs_misc_ltc_num_active_ltcs_r(), | ||
219 | g->ltc_count); | ||
220 | |||
221 | gk20a_writel(g, ltc_ltcs_ltss_dstg_cfg0_r(), | ||
222 | gk20a_readl(g, ltc_ltc0_lts0_dstg_cfg0_r()) | | ||
223 | ltc_ltcs_ltss_dstg_cfg0_vdc_4to2_disable_m()); | ||
224 | |||
225 | /* Disable LTC interrupts */ | ||
226 | reg = gk20a_readl(g, ltc_ltcs_ltss_intr_r()); | ||
227 | reg &= ~ltc_ltcs_ltss_intr_en_evicted_cb_m(); | ||
228 | reg &= ~ltc_ltcs_ltss_intr_en_illegal_compstat_access_m(); | ||
229 | reg &= ~ltc_ltcs_ltss_intr_en_illegal_compstat_m(); | ||
230 | gk20a_writel(g, ltc_ltcs_ltss_intr_r(), reg); | ||
231 | } | ||
232 | |||
233 | void gm20b_ltc_isr(struct gk20a *g) | ||
234 | { | ||
235 | u32 mc_intr, ltc_intr; | ||
236 | unsigned int ltc, slice; | ||
237 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
238 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
239 | |||
240 | mc_intr = gk20a_readl(g, mc_intr_ltc_r()); | ||
241 | nvgpu_err(g, "mc_ltc_intr: %08x", mc_intr); | ||
242 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | ||
243 | if ((mc_intr & 1U << ltc) == 0) | ||
244 | continue; | ||
245 | for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { | ||
246 | ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() + | ||
247 | ltc_stride * ltc + | ||
248 | lts_stride * slice); | ||
249 | nvgpu_err(g, "ltc%d, slice %d: %08x", | ||
250 | ltc, slice, ltc_intr); | ||
251 | gk20a_writel(g, ltc_ltc0_lts0_intr_r() + | ||
252 | ltc_stride * ltc + | ||
253 | lts_stride * slice, | ||
254 | ltc_intr); | ||
255 | } | ||
256 | } | ||
257 | } | ||
258 | |||
259 | u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base) | ||
260 | { | ||
261 | u32 val = gk20a_readl(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r()); | ||
262 | if (val == 2U) { | ||
263 | return base * 2; | ||
264 | } else if (val != 1) { | ||
265 | nvgpu_err(g, "Invalid number of active ltcs: %08x", val); | ||
266 | } | ||
267 | |||
268 | return base; | ||
269 | } | ||
270 | |||
271 | /* | ||
272 | * Performs a full flush of the L2 cache. | ||
273 | */ | ||
274 | void gm20b_flush_ltc(struct gk20a *g) | ||
275 | { | ||
276 | struct nvgpu_timeout timeout; | ||
277 | unsigned int ltc; | ||
278 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
279 | |||
280 | /* Clean... */ | ||
281 | nvgpu_writel_check(g, ltc_ltcs_ltss_tstg_cmgmt1_r(), | ||
282 | ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_f() | | ||
283 | ltc_ltcs_ltss_tstg_cmgmt1_max_cycles_between_cleans_3_f() | | ||
284 | ltc_ltcs_ltss_tstg_cmgmt1_clean_wait_for_fb_to_pull_true_f() | | ||
285 | ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_last_class_true_f() | | ||
286 | ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_normal_class_true_f() | | ||
287 | ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_first_class_true_f()); | ||
288 | |||
289 | /* Wait on each LTC individually. */ | ||
290 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | ||
291 | u32 op_pending; | ||
292 | |||
293 | /* | ||
294 | * Use 5ms - this should be sufficient time to flush the cache. | ||
295 | * On tegra, rough EMC BW available for old tegra chips (newer | ||
296 | * chips are strictly faster) can be estimated as follows: | ||
297 | * | ||
298 | * Lowest reasonable EMC clock speed will be around 102MHz on | ||
299 | * t124 for display enabled boards and generally fixed to max | ||
300 | * for non-display boards (since they are generally plugged in). | ||
301 | * | ||
302 | * Thus, the available BW is 64b * 2 * 102MHz = 1.3GB/s. Of that | ||
303 | * BW the GPU will likely get about half (display and overhead/ | ||
304 | * utilization inefficiency eating the rest) so 650MB/s at | ||
305 | * worst. Assuming at most 1MB of GPU L2 cache (less for most | ||
306 | * chips) worst case is we take 1MB/650MB/s = 1.5ms. | ||
307 | * | ||
308 | * So 5ms timeout here should be more than sufficient. | ||
309 | */ | ||
310 | nvgpu_timeout_init(g, &timeout, 5, NVGPU_TIMER_CPU_TIMER); | ||
311 | |||
312 | do { | ||
313 | int cmgmt1 = ltc_ltc0_ltss_tstg_cmgmt1_r() + | ||
314 | ltc * ltc_stride; | ||
315 | op_pending = gk20a_readl(g, cmgmt1); | ||
316 | } while ((op_pending & | ||
317 | ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f()) && | ||
318 | !nvgpu_timeout_expired_msg(&timeout, | ||
319 | "L2 flush timeout!")); | ||
320 | } | ||
321 | |||
322 | /* And invalidate. */ | ||
323 | nvgpu_writel_check(g, ltc_ltcs_ltss_tstg_cmgmt0_r(), | ||
324 | ltc_ltcs_ltss_tstg_cmgmt0_invalidate_pending_f() | | ||
325 | ltc_ltcs_ltss_tstg_cmgmt0_max_cycles_between_invalidates_3_f() | | ||
326 | ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_last_class_true_f() | | ||
327 | ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_normal_class_true_f() | | ||
328 | ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_first_class_true_f()); | ||
329 | |||
330 | /* Wait on each LTC individually. */ | ||
331 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | ||
332 | u32 op_pending; | ||
333 | |||
334 | /* Again, 5ms. */ | ||
335 | nvgpu_timeout_init(g, &timeout, 5, NVGPU_TIMER_CPU_TIMER); | ||
336 | |||
337 | do { | ||
338 | int cmgmt0 = ltc_ltc0_ltss_tstg_cmgmt0_r() + | ||
339 | ltc * ltc_stride; | ||
340 | op_pending = gk20a_readl(g, cmgmt0); | ||
341 | } while ((op_pending & | ||
342 | ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f()) && | ||
343 | !nvgpu_timeout_expired_msg(&timeout, | ||
344 | "L2 flush timeout!")); | ||
345 | } | ||
346 | } | ||
347 | |||
348 | int gm20b_determine_L2_size_bytes(struct gk20a *g) | ||
349 | { | ||
350 | u32 lts_per_ltc; | ||
351 | u32 ways; | ||
352 | u32 sets; | ||
353 | u32 bytes_per_line; | ||
354 | u32 active_ltcs; | ||
355 | u32 cache_size; | ||
356 | |||
357 | u32 tmp; | ||
358 | u32 active_sets_value; | ||
359 | |||
360 | tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_cfg1_r()); | ||
361 | ways = hweight32(ltc_ltc0_lts0_tstg_cfg1_active_ways_v(tmp)); | ||
362 | |||
363 | active_sets_value = ltc_ltc0_lts0_tstg_cfg1_active_sets_v(tmp); | ||
364 | if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v()) { | ||
365 | sets = 64U; | ||
366 | } else if (active_sets_value == | ||
367 | ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v()) { | ||
368 | sets = 32U; | ||
369 | } else if (active_sets_value == | ||
370 | ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v()) { | ||
371 | sets = 16U; | ||
372 | } else { | ||
373 | nvgpu_err(g, "Unknown constant %u for active sets", | ||
374 | (unsigned)active_sets_value); | ||
375 | sets = 0U; | ||
376 | } | ||
377 | |||
378 | active_ltcs = g->gr.num_fbps; | ||
379 | |||
380 | /* chip-specific values */ | ||
381 | lts_per_ltc = 2U; | ||
382 | bytes_per_line = 128U; | ||
383 | cache_size = active_ltcs * lts_per_ltc * ways * sets * bytes_per_line; | ||
384 | |||
385 | return cache_size; | ||
386 | } | ||
387 | |||
388 | /* | ||
389 | * Sets the ZBC color for the passed index. | ||
390 | */ | ||
391 | void gm20b_ltc_set_zbc_color_entry(struct gk20a *g, | ||
392 | struct zbc_entry *color_val, | ||
393 | u32 index) | ||
394 | { | ||
395 | u32 i; | ||
396 | u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; | ||
397 | |||
398 | nvgpu_writel_check(g, ltc_ltcs_ltss_dstg_zbc_index_r(), | ||
399 | ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); | ||
400 | |||
401 | for (i = 0; | ||
402 | i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++) { | ||
403 | nvgpu_writel_check(g, | ||
404 | ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(i), | ||
405 | color_val->color_l2[i]); | ||
406 | } | ||
407 | } | ||
408 | |||
409 | /* | ||
410 | * Sets the ZBC depth for the passed index. | ||
411 | */ | ||
412 | void gm20b_ltc_set_zbc_depth_entry(struct gk20a *g, | ||
413 | struct zbc_entry *depth_val, | ||
414 | u32 index) | ||
415 | { | ||
416 | u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; | ||
417 | |||
418 | nvgpu_writel_check(g, ltc_ltcs_ltss_dstg_zbc_index_r(), | ||
419 | ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); | ||
420 | |||
421 | nvgpu_writel_check(g, | ||
422 | ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), | ||
423 | depth_val->depth); | ||
424 | } | ||
425 | |||
426 | void gm20b_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr) | ||
427 | { | ||
428 | u32 max_size = gr->max_comptag_mem; | ||
429 | u32 max_comptag_lines = max_size << 3U; | ||
430 | |||
431 | u32 compbit_base_post_divide; | ||
432 | u64 compbit_base_post_multiply64; | ||
433 | u64 compbit_store_iova; | ||
434 | u64 compbit_base_post_divide64; | ||
435 | |||
436 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) | ||
437 | compbit_store_iova = nvgpu_mem_get_phys_addr(g, | ||
438 | &gr->compbit_store.mem); | ||
439 | else | ||
440 | compbit_store_iova = nvgpu_mem_get_addr(g, | ||
441 | &gr->compbit_store.mem); | ||
442 | |||
443 | compbit_base_post_divide64 = compbit_store_iova >> | ||
444 | ltc_ltcs_ltss_cbc_base_alignment_shift_v(); | ||
445 | |||
446 | do_div(compbit_base_post_divide64, g->ltc_count); | ||
447 | compbit_base_post_divide = u64_lo32(compbit_base_post_divide64); | ||
448 | |||
449 | compbit_base_post_multiply64 = ((u64)compbit_base_post_divide * | ||
450 | g->ltc_count) << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); | ||
451 | |||
452 | if (compbit_base_post_multiply64 < compbit_store_iova) | ||
453 | compbit_base_post_divide++; | ||
454 | |||
455 | /* Bug 1477079 indicates sw adjustment on the posted divided base. */ | ||
456 | if (g->ops.ltc.cbc_fix_config) | ||
457 | compbit_base_post_divide = | ||
458 | g->ops.ltc.cbc_fix_config(g, compbit_base_post_divide); | ||
459 | |||
460 | gk20a_writel(g, ltc_ltcs_ltss_cbc_base_r(), | ||
461 | compbit_base_post_divide); | ||
462 | |||
463 | nvgpu_log(g, gpu_dbg_info | gpu_dbg_map_v | gpu_dbg_pte, | ||
464 | "compbit base.pa: 0x%x,%08x cbc_base:0x%08x\n", | ||
465 | (u32)(compbit_store_iova >> 32), | ||
466 | (u32)(compbit_store_iova & 0xffffffff), | ||
467 | compbit_base_post_divide); | ||
468 | |||
469 | gr->compbit_store.base_hw = compbit_base_post_divide; | ||
470 | |||
471 | g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_invalidate, | ||
472 | 0, max_comptag_lines - 1); | ||
473 | |||
474 | } | ||
475 | |||
476 | void gm20b_ltc_set_enabled(struct gk20a *g, bool enabled) | ||
477 | { | ||
478 | u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f(); | ||
479 | u32 reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r()); | ||
480 | |||
481 | if (enabled) | ||
482 | /* bypass disabled (normal caching ops)*/ | ||
483 | reg &= ~reg_f; | ||
484 | else | ||
485 | /* bypass enabled (no caching) */ | ||
486 | reg |= reg_f; | ||
487 | |||
488 | gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r(), reg); | ||
489 | } | ||