From 1c9aaa1eafcf91fbc29404b449f2bec072c804a5 Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Fri, 25 Apr 2014 15:00:54 +0300 Subject: gpu: nvgpu: Implement ELPG flush for gm20b ELPG flush is initiated from a common broadcast register, but must be waited on via per-L2 registers. Split gk20a and gm20b versions of the flush. Change-Id: I75c2d65e8da311b50d35bee70308b60464ec2d4d Signed-off-by: Terje Bergstrom Reviewed-on: http://git-master/r/401545 Reviewed-by: Automatic_Commit_Validation_User --- drivers/gpu/nvgpu/gk20a/ltc_common.c | 34 ------------------------- drivers/gpu/nvgpu/gk20a/ltc_gk20a.c | 34 +++++++++++++++++++++++++ drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h | 20 +++++++++++++-- drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | 46 +++++++++++++++++++++++++++++++++- 4 files changed, 97 insertions(+), 37 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/gk20a/ltc_common.c b/drivers/gpu/nvgpu/gk20a/ltc_common.c index ac46a9a0..72477983 100644 --- a/drivers/gpu/nvgpu/gk20a/ltc_common.c +++ b/drivers/gpu/nvgpu/gk20a/ltc_common.c @@ -313,37 +313,3 @@ static void gk20a_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr) 0, max_comptag_lines - 1); } - -/* Flushes the compression bit cache as well as "data". - * Note: the name here is a bit of a misnomer. ELPG uses this - * internally... but ELPG doesn't have to be on to do it manually. - */ -static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g) -{ - u32 data; - s32 retry = 100; - - gk20a_dbg_fn(""); - - /* Make sure all previous writes are committed to the L2. There's no - guarantee that writes are to DRAM. This will be a sysmembar internal - to the L2. */ - gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(), - ltc_ltcs_ltss_g_elpg_flush_pending_f()); - do { - data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r()); - - if (ltc_ltc0_ltss_g_elpg_flush_v(data) == - ltc_ltc0_ltss_g_elpg_flush_pending_v()) { - gk20a_dbg_info("g_elpg_flush 0x%x", data); - retry--; - usleep_range(20, 40); - } else - break; - } while (retry >= 0 || !tegra_platform_is_silicon()); - - if (retry < 0) - gk20a_warn(dev_from_gk20a(g), - "g_elpg_flush too many retries"); - -} diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c index c1ba2aee..9f5317fc 100644 --- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c @@ -212,6 +212,40 @@ void gk20a_ltc_isr(struct gk20a *g) gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr); } +/* Flushes the compression bit cache as well as "data". + * Note: the name here is a bit of a misnomer. ELPG uses this + * internally... but ELPG doesn't have to be on to do it manually. + */ +static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g) +{ + u32 data; + s32 retry = 100; + + gk20a_dbg_fn(""); + + /* Make sure all previous writes are committed to the L2. There's no + guarantee that writes are to DRAM. This will be a sysmembar internal + to the L2. */ + gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(), + ltc_ltcs_ltss_g_elpg_flush_pending_f()); + do { + data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r()); + + if (ltc_ltc0_ltss_g_elpg_flush_v(data) == + ltc_ltc0_ltss_g_elpg_flush_pending_v()) { + gk20a_dbg_info("g_elpg_flush 0x%x", data); + retry--; + usleep_range(20, 40); + } else + break; + } while (retry >= 0 || !tegra_platform_is_silicon()); + + if (retry < 0) + gk20a_warn(dev_from_gk20a(g), + "g_elpg_flush too many retries"); + +} + void gk20a_init_ltc(struct gpu_ops *gops) { gops->ltc.determine_L2_size_bytes = gk20a_determine_L2_size_bytes; diff --git a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h index 28c58f50..9840805d 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h @@ -96,11 +96,11 @@ static inline u32 ltc_ltcs_ltss_cbc_ctrl1_r(void) } static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(void) { - return 0x1; + return 0x1; } static inline u32 ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(void) { - return 0x2; + return 0x2; } static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_v(u32 r) { @@ -258,6 +258,22 @@ static inline u32 ltc_ltc0_ltss_g_elpg_flush_pending_f(void) { return 0x1; } +static inline u32 ltc_ltc1_ltss_g_elpg_r(void) +{ + return 0x00142214; +} +static inline u32 ltc_ltc1_ltss_g_elpg_flush_v(u32 r) +{ + return (r >> 0) & 0x1; +} +static inline u32 ltc_ltc1_ltss_g_elpg_flush_pending_v(void) +{ + return 0x00000001; +} +static inline u32 ltc_ltc1_ltss_g_elpg_flush_pending_f(void) +{ + return 0x1; +} static inline u32 ltc_ltc0_ltss_intr_r(void) { return 0x0014020c; diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c index 5da21c64..43c90970 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c @@ -193,6 +193,50 @@ void gm20b_ltc_isr(struct gk20a *g) gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr); } +static void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g) +{ + u32 data; + bool done[g->ltc_count]; + s32 retry = 100; + int i; + int num_done = 0; + u32 ltc_d = ltc_ltc1_ltss_g_elpg_r() - ltc_ltc0_ltss_g_elpg_r(); + + gk20a_dbg_fn(""); + + for (i = 0; i < g->ltc_count; i++) + done[i] = 0; + + gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(), + ltc_ltcs_ltss_g_elpg_flush_pending_f()); + do { + for (i = 0; i < g->ltc_count; i++) { + if (done[i]) + continue; + + data = gk20a_readl(g, + ltc_ltc0_ltss_g_elpg_r() + ltc_d * i); + + if (ltc_ltc0_ltss_g_elpg_flush_v(data)) { + gk20a_dbg_info("g_elpg_flush 0x%x", data); + } else { + done[i] = 1; + num_done++; + } + } + + if (num_done < g->ltc_count) { + retry--; + usleep_range(20, 40); + } else + break; + } while (retry >= 0 || !tegra_platform_is_silicon()); + + if (retry < 0) + gk20a_warn(dev_from_gk20a(g), + "g_elpg_flush too many retries"); +} + void gm20b_init_ltc(struct gpu_ops *gops) { /* Gk20a reused ops. */ @@ -209,6 +253,6 @@ void gm20b_init_ltc(struct gpu_ops *gops) gops->ltc.init_fs_state = gm20b_ltc_init_fs_state; gops->ltc.init_comptags = gm20b_ltc_init_comptags; gops->ltc.cbc_ctrl = gm20b_ltc_cbc_ctrl; - gops->ltc.elpg_flush = gk20a_mm_g_elpg_flush_locked; + gops->ltc.elpg_flush = gm20b_ltc_g_elpg_flush_locked; gops->ltc.isr = gm20b_ltc_isr; } -- cgit v1.2.2