diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/ltc_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ltc_gk20a.c | 135 |
1 files changed, 134 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c index 8867202f..1d517c27 100644 --- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c | |||
@@ -21,14 +21,36 @@ | |||
21 | #include <nvgpu/log.h> | 21 | #include <nvgpu/log.h> |
22 | #include <nvgpu/bug.h> | 22 | #include <nvgpu/bug.h> |
23 | #include <nvgpu/enabled.h> | 23 | #include <nvgpu/enabled.h> |
24 | #include <nvgpu/dma.h> | ||
24 | 25 | ||
25 | #include "gk20a.h" | 26 | #include "gk20a.h" |
27 | #include "gr_gk20a.h" | ||
26 | #include "ltc_gk20a.h" | 28 | #include "ltc_gk20a.h" |
27 | 29 | ||
28 | #include <nvgpu/hw/gk20a/hw_ltc_gk20a.h> | 30 | #include <nvgpu/hw/gk20a/hw_ltc_gk20a.h> |
29 | 31 | ||
30 | #include "ltc_common.c" | ||
31 | 32 | ||
33 | /* Non HW reg dependent stuff: */ | ||
34 | |||
35 | int gk20a_ltc_alloc_phys_cbc(struct gk20a *g, size_t compbit_backing_size) | ||
36 | { | ||
37 | struct gr_gk20a *gr = &g->gr; | ||
38 | |||
39 | return nvgpu_dma_alloc_flags_sys(g, NVGPU_DMA_FORCE_CONTIGUOUS, | ||
40 | compbit_backing_size, | ||
41 | &gr->compbit_store.mem); | ||
42 | } | ||
43 | |||
44 | int gk20a_ltc_alloc_virt_cbc(struct gk20a *g, size_t compbit_backing_size) | ||
45 | { | ||
46 | struct gr_gk20a *gr = &g->gr; | ||
47 | |||
48 | return nvgpu_dma_alloc_flags_sys(g, NVGPU_DMA_NO_KERNEL_MAPPING, | ||
49 | compbit_backing_size, | ||
50 | &gr->compbit_store.mem); | ||
51 | } | ||
52 | |||
53 | /* HW reg dependent stuff: */ | ||
32 | static int gk20a_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) | 54 | static int gk20a_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) |
33 | { | 55 | { |
34 | /* max memory size (MB) to cover */ | 56 | /* max memory size (MB) to cover */ |
@@ -231,6 +253,117 @@ static int gk20a_determine_L2_size_bytes(struct gk20a *g) | |||
231 | return cache_size; | 253 | return cache_size; |
232 | } | 254 | } |
233 | 255 | ||
256 | /* | ||
257 | * Sets the ZBC color for the passed index. | ||
258 | */ | ||
259 | static void gk20a_ltc_set_zbc_color_entry(struct gk20a *g, | ||
260 | struct zbc_entry *color_val, | ||
261 | u32 index) | ||
262 | { | ||
263 | u32 i; | ||
264 | u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; | ||
265 | |||
266 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(), | ||
267 | ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); | ||
268 | |||
269 | for (i = 0; | ||
270 | i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++) { | ||
271 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(i), | ||
272 | color_val->color_l2[i]); | ||
273 | } | ||
274 | gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()); | ||
275 | } | ||
276 | |||
277 | /* | ||
278 | * Sets the ZBC depth for the passed index. | ||
279 | */ | ||
280 | static void gk20a_ltc_set_zbc_depth_entry(struct gk20a *g, | ||
281 | struct zbc_entry *depth_val, | ||
282 | u32 index) | ||
283 | { | ||
284 | u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; | ||
285 | |||
286 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(), | ||
287 | ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); | ||
288 | |||
289 | gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), | ||
290 | depth_val->depth); | ||
291 | |||
292 | gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()); | ||
293 | } | ||
294 | |||
295 | static void gk20a_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr) | ||
296 | { | ||
297 | u32 max_size = gr->max_comptag_mem; | ||
298 | u32 max_comptag_lines = max_size << 3; | ||
299 | |||
300 | u32 compbit_base_post_divide; | ||
301 | u64 compbit_base_post_multiply64; | ||
302 | u64 compbit_store_iova; | ||
303 | u64 compbit_base_post_divide64; | ||
304 | |||
305 | if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) | ||
306 | compbit_store_iova = gk20a_mem_phys(&gr->compbit_store.mem); | ||
307 | else | ||
308 | compbit_store_iova = g->ops.mm.get_iova_addr(g, | ||
309 | gr->compbit_store.mem.priv.sgt->sgl, 0); | ||
310 | |||
311 | compbit_base_post_divide64 = compbit_store_iova >> | ||
312 | ltc_ltcs_ltss_cbc_base_alignment_shift_v(); | ||
313 | |||
314 | do_div(compbit_base_post_divide64, g->ltc_count); | ||
315 | compbit_base_post_divide = u64_lo32(compbit_base_post_divide64); | ||
316 | |||
317 | compbit_base_post_multiply64 = ((u64)compbit_base_post_divide * | ||
318 | g->ltc_count) << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); | ||
319 | |||
320 | if (compbit_base_post_multiply64 < compbit_store_iova) | ||
321 | compbit_base_post_divide++; | ||
322 | |||
323 | /* Bug 1477079 indicates sw adjustment on the posted divided base. */ | ||
324 | if (g->ops.ltc.cbc_fix_config) | ||
325 | compbit_base_post_divide = | ||
326 | g->ops.ltc.cbc_fix_config(g, compbit_base_post_divide); | ||
327 | |||
328 | gk20a_writel(g, ltc_ltcs_ltss_cbc_base_r(), | ||
329 | compbit_base_post_divide); | ||
330 | |||
331 | gk20a_dbg(gpu_dbg_info | gpu_dbg_map_v | gpu_dbg_pte, | ||
332 | "compbit base.pa: 0x%x,%08x cbc_base:0x%08x\n", | ||
333 | (u32)(compbit_store_iova >> 32), | ||
334 | (u32)(compbit_store_iova & 0xffffffff), | ||
335 | compbit_base_post_divide); | ||
336 | |||
337 | gr->compbit_store.base_hw = compbit_base_post_divide; | ||
338 | |||
339 | g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_invalidate, | ||
340 | 0, max_comptag_lines - 1); | ||
341 | |||
342 | } | ||
343 | |||
344 | #ifdef CONFIG_DEBUG_FS | ||
345 | static void gk20a_ltc_sync_debugfs(struct gk20a *g) | ||
346 | { | ||
347 | u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f(); | ||
348 | |||
349 | nvgpu_spinlock_acquire(&g->debugfs_lock); | ||
350 | if (g->mm.ltc_enabled != g->mm.ltc_enabled_debug) { | ||
351 | u32 reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r()); | ||
352 | |||
353 | if (g->mm.ltc_enabled_debug) | ||
354 | /* bypass disabled (normal caching ops)*/ | ||
355 | reg &= ~reg_f; | ||
356 | else | ||
357 | /* bypass enabled (no caching) */ | ||
358 | reg |= reg_f; | ||
359 | |||
360 | gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r(), reg); | ||
361 | g->mm.ltc_enabled = g->mm.ltc_enabled_debug; | ||
362 | } | ||
363 | nvgpu_spinlock_release(&g->debugfs_lock); | ||
364 | } | ||
365 | #endif | ||
366 | |||
234 | void gk20a_init_ltc(struct gpu_ops *gops) | 367 | void gk20a_init_ltc(struct gpu_ops *gops) |
235 | { | 368 | { |
236 | gops->ltc.determine_L2_size_bytes = gk20a_determine_L2_size_bytes; | 369 | gops->ltc.determine_L2_size_bytes = gk20a_determine_L2_size_bytes; |