From fb0a23ea168fd0947d9bc1064f91f15ef8a0c057 Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Fri, 19 Jan 2018 15:16:44 -0800 Subject: gpu: nvgpu: Implement gp10b variant of cbc_ctrl Pascal has support for more comptags than Maxwell, but we were using gm20b definitions for cbc_ctrl on all chips. Specifically field clear_upper_bound is one bit wider in Pascal. Implement gp10b version of cbc_ctrl and take that into use in Pascal and Volta. Bug 200381317 Change-Id: I7d3cb9e92498e08f8704f156e2afb34404ce587e Signed-off-by: Terje Bergstrom Reviewed-on: https://git-master.nvidia.com/r/1642574 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svc-mobile-coverity Reviewed-by: Seema Khowala GVS: Gerrit_Virtual_Submit Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gp106/hal_gp106.c | 2 +- drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 2 +- drivers/gpu/nvgpu/gp10b/ltc_gp10b.c | 95 +++++++++++++++++++++++++++++++++++++ drivers/gpu/nvgpu/gp10b/ltc_gp10b.h | 4 +- drivers/gpu/nvgpu/gv100/hal_gv100.c | 2 +- drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 2 +- 6 files changed, 102 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index 68562955..7a57672e 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c @@ -236,7 +236,7 @@ static const struct gpu_ops gp106_ops = { .init_cbc = NULL, .init_fs_state = gm20b_ltc_init_fs_state, .init_comptags = gp10b_ltc_init_comptags, - .cbc_ctrl = gm20b_ltc_cbc_ctrl, + .cbc_ctrl = gp10b_ltc_cbc_ctrl, .isr = gp10b_ltc_isr, .cbc_fix_config = NULL, .flush = gm20b_flush_ltc, diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index a5b42565..eb3d1f9d 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -199,7 +199,7 @@ static const struct gpu_ops gp10b_ops = { .init_cbc = gm20b_ltc_init_cbc, .init_fs_state = gp10b_ltc_init_fs_state, .init_comptags = gp10b_ltc_init_comptags, - .cbc_ctrl = gm20b_ltc_cbc_ctrl, + .cbc_ctrl = gp10b_ltc_cbc_ctrl, .isr = gp10b_ltc_isr, .cbc_fix_config = gm20b_ltc_cbc_fix_config, .flush = gm20b_flush_ltc, diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c index d191d778..7735d1ae 100644 --- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c @@ -22,6 +22,8 @@ * DEALINGS IN THE SOFTWARE. */ +#include + #include #include @@ -132,6 +134,99 @@ int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) return 0; } +int gp10b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, + u32 min, u32 max) +{ + struct gr_gk20a *gr = &g->gr; + struct nvgpu_timeout timeout; + int err = 0; + u32 ltc, slice, ctrl1, val, hw_op = 0; + u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( + gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); + u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); + u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); + const u32 max_lines = 16384; + + nvgpu_log_fn(g, " "); + + trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); + + if (gr->compbit_store.mem.size == 0) + return 0; + + while (1) { + const u32 iter_max = min(min + max_lines - 1, max); + bool full_cache_op = true; + + nvgpu_mutex_acquire(&g->mm.l2_op_lock); + + nvgpu_log_info(g, "clearing CBC lines %u..%u", min, iter_max); + + if (op == gk20a_cbc_op_clear) { + gk20a_writel( + g, ltc_ltcs_ltss_cbc_ctrl2_r(), + ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f( + min)); + gk20a_writel( + g, ltc_ltcs_ltss_cbc_ctrl3_r(), + ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f( + iter_max)); + hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); + full_cache_op = false; + } else if (op == gk20a_cbc_op_clean) { + /* this is full-cache op */ + hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); + } else if (op == gk20a_cbc_op_invalidate) { + /* this is full-cache op */ + hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); + } else { + nvgpu_err(g, "Unknown op: %u", (unsigned)op); + err = -EINVAL; + goto out; + } + gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), + gk20a_readl(g, + ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); + + for (ltc = 0; ltc < g->ltc_count; ltc++) { + for (slice = 0; slice < slices_per_ltc; slice++) { + + ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + + ltc * ltc_stride + slice * lts_stride; + + nvgpu_timeout_init(g, &timeout, 2000, + NVGPU_TIMER_RETRY_TIMER); + do { + val = gk20a_readl(g, ctrl1); + if (!(val & hw_op)) + break; + nvgpu_udelay(5); + } while (!nvgpu_timeout_expired(&timeout)); + + if (nvgpu_timeout_peek_expired(&timeout)) { + nvgpu_err(g, "comp tag clear timeout"); + err = -EBUSY; + goto out; + } + } + } + + /* are we done? */ + if (full_cache_op || iter_max == max) + break; + + /* note: iter_max is inclusive upper bound */ + min = iter_max + 1; + + /* give a chance for higher-priority threads to progress */ + nvgpu_mutex_release(&g->mm.l2_op_lock); + } +out: + trace_gk20a_ltc_cbc_ctrl_done(g->name); + nvgpu_mutex_release(&g->mm.l2_op_lock); + return err; +} + void gp10b_ltc_isr(struct gk20a *g) { u32 mc_intr, ltc_intr; diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.h b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.h index 825204cb..c1a2bf64 100644 --- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.h +++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -29,5 +29,7 @@ void gp10b_ltc_isr(struct gk20a *g); int gp10b_determine_L2_size_bytes(struct gk20a *g); int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr); void gp10b_ltc_init_fs_state(struct gk20a *g); +int gp10b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, + u32 min, u32 max); void gp10b_ltc_set_enabled(struct gk20a *g, bool enabled); #endif diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 6103b923..f8302621 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -267,7 +267,7 @@ static const struct gpu_ops gv100_ops = { .init_cbc = NULL, .init_fs_state = gv11b_ltc_init_fs_state, .init_comptags = gp10b_ltc_init_comptags, - .cbc_ctrl = gm20b_ltc_cbc_ctrl, + .cbc_ctrl = gp10b_ltc_cbc_ctrl, .isr = gv11b_ltc_isr, .cbc_fix_config = NULL, .flush = gm20b_flush_ltc, diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 93f819e4..e4e9323b 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -234,7 +234,7 @@ static const struct gpu_ops gv11b_ops = { .init_cbc = NULL, .init_fs_state = gv11b_ltc_init_fs_state, .init_comptags = gp10b_ltc_init_comptags, - .cbc_ctrl = gm20b_ltc_cbc_ctrl, + .cbc_ctrl = gp10b_ltc_cbc_ctrl, .isr = gv11b_ltc_isr, .cbc_fix_config = gv11b_ltc_cbc_fix_config, .flush = gm20b_flush_ltc, -- cgit v1.2.2