diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b/ltc_gm20b.c')
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | 487 |
1 files changed, 487 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c new file mode 100644 index 00000000..6ec9aec5 --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | |||
@@ -0,0 +1,487 @@ | |||
1 | /* | ||
2 | * GM20B L2 | ||
3 | * | ||
4 | * Copyright (c) 2014-2017 NVIDIA CORPORATION. All rights reserved. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
22 | * DEALINGS IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <trace/events/gk20a.h> | ||
26 | |||
27 | #include <nvgpu/timers.h> | ||
28 | #include <nvgpu/enabled.h> | ||
29 | #include <nvgpu/bug.h> | ||
30 | #include <nvgpu/ltc.h> | ||
31 | |||
32 | #include <nvgpu/hw/gm20b/hw_mc_gm20b.h> | ||
33 | #include <nvgpu/hw/gm20b/hw_ltc_gm20b.h> | ||
34 | #include <nvgpu/hw/gm20b/hw_top_gm20b.h> | ||
35 | #include <nvgpu/hw/gm20b/hw_pri_ringmaster_gm20b.h> | ||
36 | |||
37 | #include "gk20a/gk20a.h" | ||
38 | |||
39 | #include "ltc_gm20b.h" | ||
40 | |||
41 | int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) | ||
42 | { | ||
43 | /* max memory size (MB) to cover */ | ||
44 | u32 max_size = gr->max_comptag_mem; | ||
45 | /* one tag line covers 128KB */ | ||
46 | u32 max_comptag_lines = max_size << 3; | ||
47 | |||
48 | u32 hw_max_comptag_lines = | ||
49 | ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(); | ||
50 | |||
51 | u32 cbc_param = | ||
52 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); | ||
53 | u32 comptags_per_cacheline = | ||
54 | ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); | ||
55 | u32 cacheline_size = | ||
56 | 512 << ltc_ltcs_ltss_cbc_param_cache_line_size_v(cbc_param); | ||
57 | u32 slices_per_ltc = | ||
58 | ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(cbc_param); | ||
59 | |||
60 | u32 compbit_backing_size; | ||
61 | |||
62 | int err; | ||
63 | |||
64 | gk20a_dbg_fn(""); | ||
65 | |||
66 | if (max_comptag_lines == 0) | ||
67 | return 0; | ||
68 | |||
69 | if (max_comptag_lines > hw_max_comptag_lines) | ||
70 | max_comptag_lines = hw_max_comptag_lines; | ||
71 | |||
72 | compbit_backing_size = | ||
73 | DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) * | ||
74 | cacheline_size * slices_per_ltc * g->ltc_count; | ||
75 | |||
76 | /* aligned to 2KB * ltc_count */ | ||
77 | compbit_backing_size += | ||
78 | g->ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); | ||
79 | |||
80 | /* must be a multiple of 64KB */ | ||
81 | compbit_backing_size = roundup(compbit_backing_size, 64*1024); | ||
82 | |||
83 | max_comptag_lines = | ||
84 | (compbit_backing_size * comptags_per_cacheline) / | ||
85 | (cacheline_size * slices_per_ltc * g->ltc_count); | ||
86 | |||
87 | if (max_comptag_lines > hw_max_comptag_lines) | ||
88 | max_comptag_lines = hw_max_comptag_lines; | ||
89 | |||
90 | gk20a_dbg_info("compbit backing store size : %d", | ||
91 | compbit_backing_size); | ||
92 | gk20a_dbg_info("max comptag lines : %d", | ||
93 | max_comptag_lines); | ||
94 | |||
95 | err = nvgpu_ltc_alloc_cbc(g, compbit_backing_size); | ||
96 | if (err) | ||
97 | return err; | ||
98 | |||
99 | err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); | ||
100 | if (err) | ||
101 | return err; | ||
102 | |||
103 | gr->comptags_per_cacheline = comptags_per_cacheline; | ||
104 | gr->slices_per_ltc = slices_per_ltc; | ||
105 | gr->cacheline_size = cacheline_size; | ||
106 | |||
107 | return 0; | ||
108 | } | ||
109 | |||
110 | int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | ||
111 | u32 min, u32 max) | ||
112 | { | ||
113 | struct gr_gk20a *gr = &g->gr; | ||
114 | struct nvgpu_timeout timeout; | ||
115 | int err = 0; | ||
116 | u32 ltc, slice, ctrl1, val, hw_op = 0; | ||
117 | u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( | ||
118 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); | ||
119 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
120 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
121 | const u32 max_lines = 16384; | ||
122 | |||
123 | gk20a_dbg_fn(""); | ||
124 | |||
125 | trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); | ||
126 | |||
127 | if (gr->compbit_store.mem.size == 0) | ||
128 | return 0; | ||
129 | |||
130 | while (1) { | ||
131 | const u32 iter_max = min(min + max_lines - 1, max); | ||
132 | bool full_cache_op = true; | ||
133 | |||
134 | nvgpu_mutex_acquire(&g->mm.l2_op_lock); | ||
135 | |||
136 | gk20a_dbg_info("clearing CBC lines %u..%u", min, iter_max); | ||
137 | |||
138 | if (op == gk20a_cbc_op_clear) { | ||
139 | gk20a_writel( | ||
140 | g, ltc_ltcs_ltss_cbc_ctrl2_r(), | ||
141 | ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f( | ||
142 | min)); | ||
143 | gk20a_writel( | ||
144 | g, ltc_ltcs_ltss_cbc_ctrl3_r(), | ||
145 | ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f( | ||
146 | iter_max)); | ||
147 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); | ||
148 | full_cache_op = false; | ||
149 | } else if (op == gk20a_cbc_op_clean) { | ||
150 | /* this is full-cache op */ | ||
151 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); | ||
152 | } else if (op == gk20a_cbc_op_invalidate) { | ||
153 | /* this is full-cache op */ | ||
154 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); | ||
155 | } else { | ||
156 | nvgpu_err(g, "Unknown op: %u", (unsigned)op); | ||
157 | err = -EINVAL; | ||
158 | goto out; | ||
159 | } | ||
160 | gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), | ||
161 | gk20a_readl(g, | ||
162 | ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); | ||
163 | |||
164 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | ||
165 | for (slice = 0; slice < slices_per_ltc; slice++) { | ||
166 | |||
167 | ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + | ||
168 | ltc * ltc_stride + slice * lts_stride; | ||
169 | |||
170 | nvgpu_timeout_init(g, &timeout, 2000, | ||
171 | NVGPU_TIMER_RETRY_TIMER); | ||
172 | do { | ||
173 | val = gk20a_readl(g, ctrl1); | ||
174 | if (!(val & hw_op)) | ||
175 | break; | ||
176 | nvgpu_udelay(5); | ||
177 | } while (!nvgpu_timeout_expired(&timeout)); | ||
178 | |||
179 | if (nvgpu_timeout_peek_expired(&timeout)) { | ||
180 | nvgpu_err(g, "comp tag clear timeout"); | ||
181 | err = -EBUSY; | ||
182 | goto out; | ||
183 | } | ||
184 | } | ||
185 | } | ||
186 | |||
187 | /* are we done? */ | ||
188 | if (full_cache_op || iter_max == max) | ||
189 | break; | ||
190 | |||
191 | /* note: iter_max is inclusive upper bound */ | ||
192 | min = iter_max + 1; | ||
193 | |||
194 | /* give a chance for higher-priority threads to progress */ | ||
195 | nvgpu_mutex_release(&g->mm.l2_op_lock); | ||
196 | } | ||
197 | out: | ||
198 | trace_gk20a_ltc_cbc_ctrl_done(g->name); | ||
199 | nvgpu_mutex_release(&g->mm.l2_op_lock); | ||
200 | return err; | ||
201 | } | ||
202 | |||
203 | void gm20b_ltc_init_fs_state(struct gk20a *g) | ||
204 | { | ||
205 | u32 reg; | ||
206 | |||
207 | gk20a_dbg_info("initialize gm20b l2"); | ||
208 | |||
209 | g->max_ltc_count = gk20a_readl(g, top_num_ltcs_r()); | ||
210 | g->ltc_count = gk20a_readl(g, pri_ringmaster_enum_ltc_r()); | ||
211 | gk20a_dbg_info("%d ltcs out of %d", g->ltc_count, g->max_ltc_count); | ||
212 | |||
213 | gk20a_writel(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r(), | ||
214 | g->ltc_count); | ||
215 | gk20a_writel(g, ltc_ltcs_misc_ltc_num_active_ltcs_r(), | ||
216 | g->ltc_count); | ||
217 | |||
218 | gk20a_writel(g, ltc_ltcs_ltss_dstg_cfg0_r(), | ||
219 | gk20a_readl(g, ltc_ltc0_lts0_dstg_cfg0_r()) | | ||
220 | ltc_ltcs_ltss_dstg_cfg0_vdc_4to2_disable_m()); | ||
221 | |||
222 | /* Disable LTC interrupts */ | ||
223 | reg = gk20a_readl(g, ltc_ltcs_ltss_intr_r()); | ||
224 | reg &= ~ltc_ltcs_ltss_intr_en_evicted_cb_m(); | ||
225 | reg &= ~ltc_ltcs_ltss_intr_en_illegal_compstat_access_m(); | ||
226 | reg &= ~ltc_ltcs_ltss_intr_en_illegal_compstat_m(); | ||
227 | gk20a_writel(g, ltc_ltcs_ltss_intr_r(), reg); | ||
228 | } | ||
229 | |||
230 | void gm20b_ltc_isr(struct gk20a *g) | ||
231 | { | ||
232 | u32 mc_intr, ltc_intr; | ||
233 | unsigned int ltc, slice; | ||
234 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
235 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
236 | |||
237 | mc_intr = gk20a_readl(g, mc_intr_ltc_r()); | ||
238 | nvgpu_err(g, "mc_ltc_intr: %08x", mc_intr); | ||
239 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | ||
240 | if ((mc_intr & 1 << ltc) == 0) | ||
241 | continue; | ||
242 | for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { | ||
243 | ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() + | ||
244 | ltc_stride * ltc + | ||
245 | lts_stride * slice); | ||
246 | nvgpu_err(g, "ltc%d, slice %d: %08x", | ||
247 | ltc, slice, ltc_intr); | ||
248 | gk20a_writel(g, ltc_ltc0_lts0_intr_r() + | ||
249 | ltc_stride * ltc + | ||
250 | lts_stride * slice, | ||
251 | ltc_intr); | ||
252 | } | ||
253 | } | ||
254 | } | ||
255 | |||
256 | u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base) | ||
257 | { | ||
258 | u32 val = gk20a_readl(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r()); | ||
259 | if (val == 2) { | ||
260 | return base * 2; | ||
261 | } else if (val != 1) { | ||
262 | nvgpu_err(g, "Invalid number of active ltcs: %08x", val); | ||
263 | } | ||
264 | |||
265 | return base; | ||
266 | } | ||
267 | |||
268 | /* | ||
269 | * Performs a full flush of the L2 cache. | ||
270 | */ | ||
271 | void gm20b_flush_ltc(struct gk20a *g) | ||
272 | { | ||
273 | struct nvgpu_timeout timeout; | ||
274 | unsigned int ltc; | ||
275 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
276 | |||
277 | /* Clean... */ | ||
278 | gk20a_writel(g, ltc_ltcs_ltss_tstg_cmgmt1_r(), | ||
279 | ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_f() | | ||
280 | ltc_ltcs_ltss_tstg_cmgmt1_max_cycles_between_cleans_3_f() | | ||
281 | ltc_ltcs_ltss_tstg_cmgmt1_clean_wait_for_fb_to_pull_true_f() | | ||
282 | ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_last_class_true_f() | | ||
283 | ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_normal_class_true_f() | | ||
284 | ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_first_class_true_f()); | ||
285 | |||
286 | /* Wait on each LTC individually. */ | ||
287 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | ||
288 | u32 op_pending; | ||
289 | |||
290 | /* | ||
291 | * Use 5ms - this should be sufficient time to flush the cache. | ||
292 | * On tegra, rough EMC BW available for old tegra chips (newer | ||
293 | * chips are strictly faster) can be estimated as follows: | ||
294 | * | ||
295 | * Lowest reasonable EMC clock speed will be around 102MHz on | ||
296 | * t124 for display enabled boards and generally fixed to max | ||
297 | * for non-display boards (since they are generally plugged in). | ||
298 | * | ||
299 | * Thus, the available BW is 64b * 2 * 102MHz = 1.3GB/s. Of that | ||
300 | * BW the GPU will likely get about half (display and overhead/ | ||
301 | * utilization inefficiency eating the rest) so 650MB/s at | ||
302 | * worst. Assuming at most 1MB of GPU L2 cache (less for most | ||
303 | * chips) worst case is we take 1MB/650MB/s = 1.5ms. | ||
304 | * | ||
305 | * So 5ms timeout here should be more than sufficient. | ||
306 | */ | ||
307 | nvgpu_timeout_init(g, &timeout, 5, NVGPU_TIMER_CPU_TIMER); | ||
308 | |||
309 | do { | ||
310 | int cmgmt1 = ltc_ltc0_ltss_tstg_cmgmt1_r() + | ||
311 | ltc * ltc_stride; | ||
312 | op_pending = gk20a_readl(g, cmgmt1); | ||
313 | } while ((op_pending & | ||
314 | ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f()) && | ||
315 | !nvgpu_timeout_expired_msg(&timeout, | ||
316 | "L2 flush timeout!")); | ||
317 | } | ||
318 | |||
319 | /* And invalidate. */ | ||
320 | gk20a_writel(g, ltc_ltcs_ltss_tstg_cmgmt0_r(), | ||
321 | ltc_ltcs_ltss_tstg_cmgmt0_invalidate_pending_f() | | ||
322 | ltc_ltcs_ltss_tstg_cmgmt0_max_cycles_between_invalidates_3_f() | | ||
323 | ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_last_class_true_f() | | ||
324 | ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_normal_class_true_f() | | ||
325 | ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_first_class_true_f()); | ||
326 | |||
327 | /* Wait on each LTC individually. */ | ||
328 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | ||
329 | u32 op_pending; | ||
330 | |||
331 | /* Again, 5ms. */ | ||
332 | nvgpu_timeout_init(g, &timeout, 5, NVGPU_TIMER_CPU_TIMER); | ||
333 | |||
334 | do { | ||
335 | int cmgmt0 = ltc_ltc0_ltss_tstg_cmgmt0_r() + | ||
336 | ltc * ltc_stride; | ||
337 | op_pending = gk20a_readl(g, cmgmt0); | ||
338 | } while ((op_pending & | ||
339 | ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f()) && | ||
340 | !nvgpu_timeout_expired_msg(&timeout, | ||
341 | "L2 flush timeout!")); | ||
342 | } | ||
343 | } | ||
344 | |||
345 | int gm20b_determine_L2_size_bytes(struct gk20a *g) | ||
346 | { | ||
347 | u32 lts_per_ltc; | ||
348 | u32 ways; | ||
349 | u32 sets; | ||
350 | u32 bytes_per_line; | ||
351 | u32 active_ltcs; | ||
352 | u32 cache_size; | ||
353 | |||
354 | u32 tmp; | ||
355 | u32 active_sets_value; | ||
356 | |||
357 | tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_cfg1_r()); | ||
358 | ways = hweight32(ltc_ltc0_lts0_tstg_cfg1_active_ways_v(tmp)); | ||
359 | |||
360 | active_sets_value = ltc_ltc0_lts0_tstg_cfg1_active_sets_v(tmp); | ||
361 | if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v()) { | ||
362 | sets = 64; | ||
363 | } else if (active_sets_value == | ||
364 | ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v()) { | ||
365 | sets = 32; | ||
366 | } else if (active_sets_value == | ||
367 | ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v()) { | ||
368 | sets = 16; | ||
369 | } else { | ||
370 | nvgpu_err(g, "Unknown constant %u for active sets", | ||
371 | (unsigned)active_sets_value); | ||
372 | sets = 0; | ||
373 | } | ||
374 | |||
375 | active_ltcs = g->gr.num_fbps; | ||
376 | |||
377 | /* chip-specific values */ | ||
378 | lts_per_ltc = 2; | ||
379 | bytes_per_line = 128; | ||
380 | cache_size = active_ltcs * lts_per_ltc * ways * sets * bytes_per_line; | ||
381 | |||
382 | return cache_size; | ||
383 | } | ||
384 | |||
385 | /* | ||
386 | * Sets the ZBC color for the passed index. | ||
387 | */ | ||
388 | void gm20b_ltc_set_zbc_color_entry(struct gk20a *g, | ||
389 | struct zbc_entry *color_val, | ||
390 | u32 index) | ||
391 | { | ||
392 | u32 i; | ||
393 | u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; | ||
394 | |||
395 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(), | ||
396 | ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); | ||
397 | |||
398 | for (i = 0; | ||
399 | i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++) { | ||
400 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(i), | ||
401 | color_val->color_l2[i]); | ||
402 | } | ||
403 | gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()); | ||
404 | } | ||
405 | |||
406 | /* | ||
407 | * Sets the ZBC depth for the passed index. | ||
408 | */ | ||
409 | void gm20b_ltc_set_zbc_depth_entry(struct gk20a *g, | ||
410 | struct zbc_entry *depth_val, | ||
411 | u32 index) | ||
412 | { | ||
413 | u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; | ||
414 | |||
415 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(), | ||
416 | ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); | ||
417 | |||
418 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), | ||
419 | depth_val->depth); | ||
420 | |||
421 | gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()); | ||
422 | } | ||
423 | |||
424 | void gm20b_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr) | ||
425 | { | ||
426 | u32 max_size = gr->max_comptag_mem; | ||
427 | u32 max_comptag_lines = max_size << 3; | ||
428 | |||
429 | u32 compbit_base_post_divide; | ||
430 | u64 compbit_base_post_multiply64; | ||
431 | u64 compbit_store_iova; | ||
432 | u64 compbit_base_post_divide64; | ||
433 | |||
434 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) | ||
435 | compbit_store_iova = nvgpu_mem_get_phys_addr(g, | ||
436 | &gr->compbit_store.mem); | ||
437 | else | ||
438 | compbit_store_iova = nvgpu_mem_get_addr(g, | ||
439 | &gr->compbit_store.mem); | ||
440 | |||
441 | compbit_base_post_divide64 = compbit_store_iova >> | ||
442 | ltc_ltcs_ltss_cbc_base_alignment_shift_v(); | ||
443 | |||
444 | do_div(compbit_base_post_divide64, g->ltc_count); | ||
445 | compbit_base_post_divide = u64_lo32(compbit_base_post_divide64); | ||
446 | |||
447 | compbit_base_post_multiply64 = ((u64)compbit_base_post_divide * | ||
448 | g->ltc_count) << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); | ||
449 | |||
450 | if (compbit_base_post_multiply64 < compbit_store_iova) | ||
451 | compbit_base_post_divide++; | ||
452 | |||
453 | /* Bug 1477079 indicates sw adjustment on the posted divided base. */ | ||
454 | if (g->ops.ltc.cbc_fix_config) | ||
455 | compbit_base_post_divide = | ||
456 | g->ops.ltc.cbc_fix_config(g, compbit_base_post_divide); | ||
457 | |||
458 | gk20a_writel(g, ltc_ltcs_ltss_cbc_base_r(), | ||
459 | compbit_base_post_divide); | ||
460 | |||
461 | gk20a_dbg(gpu_dbg_info | gpu_dbg_map_v | gpu_dbg_pte, | ||
462 | "compbit base.pa: 0x%x,%08x cbc_base:0x%08x\n", | ||
463 | (u32)(compbit_store_iova >> 32), | ||
464 | (u32)(compbit_store_iova & 0xffffffff), | ||
465 | compbit_base_post_divide); | ||
466 | |||
467 | gr->compbit_store.base_hw = compbit_base_post_divide; | ||
468 | |||
469 | g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_invalidate, | ||
470 | 0, max_comptag_lines - 1); | ||
471 | |||
472 | } | ||
473 | |||
474 | void gm20b_ltc_set_enabled(struct gk20a *g, bool enabled) | ||
475 | { | ||
476 | u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f(); | ||
477 | u32 reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r()); | ||
478 | |||
479 | if (enabled) | ||
480 | /* bypass disabled (normal caching ops)*/ | ||
481 | reg &= ~reg_f; | ||
482 | else | ||
483 | /* bypass enabled (no caching) */ | ||
484 | reg |= reg_f; | ||
485 | |||
486 | gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r(), reg); | ||
487 | } | ||