From 8e53d790902b8a40098a5851584ae7ba58b357b6 Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Mon, 19 Dec 2016 15:23:01 -0800 Subject: gpu: nvgpu: Use timer API in gm20b code Use the timer API instead of Linux specific APIs for handling timeouts. Also, lower the L2 timeout from 1 second (absurdly long) to 5ms. Bug 1799159 Change-Id: I27dbc35b12e9bc22ff2207bb87543f76203e20f1 Signed-off-by: Alex Waterman Reviewed-on: http://git-master/r/1273825 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gm20b/acr_gm20b.c | 72 +++++++++++++++++++---------------- drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | 11 ++++-- drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | 73 ++++++++++++++++++++---------------- drivers/gpu/nvgpu/gm20b/mm_gm20b.c | 31 +++++++-------- drivers/gpu/nvgpu/gm20b/pmu_gm20b.c | 13 +++++-- 5 files changed, 111 insertions(+), 89 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c index f4311ee9..e47bc773 100644 --- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c @@ -18,10 +18,13 @@ #include #include #include -#include "nvgpu_common.h" #include +#include + +#include "nvgpu_common.h" + #include "gk20a/gk20a.h" #include "gk20a/pmu_gk20a.h" #include "gk20a/semaphore_gk20a.h" @@ -1476,64 +1479,69 @@ err_done: /*! * Wait for PMU to halt * @param[in] g GPU object pointer -* @param[in] timeout Timeout in msec for PMU to halt +* @param[in] timeout_ms Timeout in msec for PMU to halt * @return '0' if PMU halts */ -static int pmu_wait_for_halt(struct gk20a *g, unsigned int timeout) +static int pmu_wait_for_halt(struct gk20a *g, unsigned int timeout_ms) { u32 data = 0; - int completion = -EBUSY; - unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout); + int ret = -EBUSY; + struct nvgpu_timeout timeout; + + nvgpu_timeout_init(g, &timeout, timeout_ms, NVGPU_TIMER_CPU_TIMER); - while (time_before(jiffies, end_jiffies) || - !tegra_platform_is_silicon()) { + do { data = gk20a_readl(g, pwr_falcon_cpuctl_r()); if (data & pwr_falcon_cpuctl_halt_intr_m()) { - /*CPU is halted break*/ - completion = 0; + /* CPU is halted break */ + ret = 0; break; } udelay(1); - } - if (completion) + } while (!nvgpu_timeout_expired(&timeout)); + + if (ret) { gk20a_err(dev_from_gk20a(g), "ACR boot timed out"); - else { - g->acr.capabilities = gk20a_readl(g, pwr_falcon_mailbox1_r()); - gm20b_dbg_pmu("ACR capabilities %x\n", g->acr.capabilities); - data = gk20a_readl(g, pwr_falcon_mailbox0_r()); - if (data) { - gk20a_err(dev_from_gk20a(g), - "ACR boot failed, err %x", data); - completion = -EAGAIN; - } + return ret; + } + + g->acr.capabilities = gk20a_readl(g, pwr_falcon_mailbox1_r()); + gm20b_dbg_pmu("ACR capabilities %x\n", g->acr.capabilities); + data = gk20a_readl(g, pwr_falcon_mailbox0_r()); + if (data) { + gk20a_err(dev_from_gk20a(g), + "ACR boot failed, err %x", data); + ret = -EAGAIN; } - return completion; + + return ret; } /*! * Wait for PMU halt interrupt status to be cleared * @param[in] g GPU object pointer -* @param[in] timeout_us Timeout in msec for halt to clear +* @param[in] timeout_ms Timeout in msec for halt to clear * @return '0' if PMU halt irq status is clear */ -static int clear_halt_interrupt_status(struct gk20a *g, unsigned int timeout) +static int clear_halt_interrupt_status(struct gk20a *g, unsigned int timeout_ms) { u32 data = 0; - unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout); + struct nvgpu_timeout timeout; + + nvgpu_timeout_init(g, &timeout, timeout_ms, NVGPU_TIMER_CPU_TIMER); - while (time_before(jiffies, end_jiffies) || - !tegra_platform_is_silicon()) { + do { gk20a_writel(g, pwr_falcon_irqsclr_r(), gk20a_readl(g, pwr_falcon_irqsclr_r()) | (0x10)); data = gk20a_readl(g, (pwr_falcon_irqstat_r())); + if ((data & pwr_falcon_irqstat_halt_true_f()) != pwr_falcon_irqstat_halt_true_f()) /*halt irq is clear*/ - break; - timeout--; + return 0; + udelay(1); - } - if (timeout == 0) - return -EBUSY; - return 0; + } while (!nvgpu_timeout_expired(&timeout)); + + return -ETIMEDOUT; } diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c index 6be6be04..bd94a54b 100644 --- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c @@ -20,6 +20,8 @@ #include "fifo_gm20b.h" +#include + #include #include #include @@ -69,11 +71,10 @@ static inline u32 gm20b_engine_id_to_mmu_id(struct gk20a *g, u32 engine_id) static void gm20b_fifo_trigger_mmu_fault(struct gk20a *g, unsigned long engine_ids) { - unsigned long end_jiffies = jiffies + - msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); unsigned long delay = GR_IDLE_CHECK_DEFAULT; unsigned long engine_id; int ret = -EBUSY; + struct nvgpu_timeout timeout; /* trigger faults for all bad engines */ for_each_set_bit(engine_id, &engine_ids, 32) { @@ -89,6 +90,9 @@ static void gm20b_fifo_trigger_mmu_fault(struct gk20a *g, } } + nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), + NVGPU_TIMER_CPU_TIMER); + /* Wait for MMU fault to trigger */ do { if (gk20a_readl(g, fifo_intr_0_r()) & @@ -99,8 +103,7 @@ static void gm20b_fifo_trigger_mmu_fault(struct gk20a *g, usleep_range(delay, delay * 2); delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); - } while (time_before(jiffies, end_jiffies) || - !tegra_platform_is_silicon()); + } while (!nvgpu_timeout_expired(&timeout)); if (ret) gk20a_err(dev_from_gk20a(g), "mmu fault timeout"); diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c index 5b97b388..3324d3df 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c @@ -1,7 +1,7 @@ /* * GM20B L2 * - * Copyright (c) 2014-2016 NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2017 NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -14,11 +14,12 @@ */ #include -#include #include #include "gk20a/gk20a.h" +#include + #include #include #include @@ -103,10 +104,10 @@ static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, u32 min, u32 max) { - int err = 0; struct gr_gk20a *gr = &g->gr; + struct nvgpu_timeout timeout; + int err = 0; u32 ltc, slice, ctrl1, val, hw_op = 0; - s32 retry = 200; u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); @@ -143,18 +144,16 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + ltc * ltc_stride + slice * lts_stride; - retry = 200; + nvgpu_timeout_init(g, &timeout, 200, + NVGPU_TIMER_RETRY_TIMER); do { val = gk20a_readl(g, ctrl1); if (!(val & hw_op)) break; - retry--; udelay(5); + } while (!nvgpu_timeout_expired(&timeout)); - } while (retry >= 0 || - !tegra_platform_is_silicon()); - - if (retry < 0 && tegra_platform_is_silicon()) { + if (nvgpu_timeout_peek_expired(&timeout)) { gk20a_err(dev_from_gk20a(g), "comp tag clear timeout\n"); err = -EBUSY; @@ -288,23 +287,10 @@ u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base) */ void gm20b_flush_ltc(struct gk20a *g) { - unsigned long timeout; + struct nvgpu_timeout timeout; unsigned int ltc; u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); -#define __timeout_init() \ - do { \ - timeout = jiffies + HZ; \ - } while (0) -#define __timeout_check() \ - do { \ - if (tegra_platform_is_silicon() && \ - time_after(jiffies, timeout)) { \ - gk20a_err(dev_from_gk20a(g), "L2 flush timeout!"); \ - break; \ - } \ - } while (0) - /* Clean... */ gk20a_writel(g, ltc_ltcs_ltss_tstg_cmgmt1_r(), ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_f() | @@ -318,14 +304,33 @@ void gm20b_flush_ltc(struct gk20a *g) for (ltc = 0; ltc < g->ltc_count; ltc++) { u32 op_pending; - __timeout_init(); + /* + * Use 5ms - this should be sufficient time to flush the cache. + * On tegra, rough EMC BW available for old tegra chips (newer + * chips are strictly faster) can be estimated as follows: + * + * Lowest reasonable EMC clock speed will be around 102MHz on + * t124 for display enabled boards and generally fixed to max + * for non-display boards (since they are generally plugged in). + * + * Thus, the available BW is 64b * 2 * 102MHz = 1.3GB/s. Of that + * BW the GPU will likely get about half (display and overhead/ + * utilization inefficiency eating the rest) so 650MB/s at + * worst. Assuming at most 1MB of GPU L2 cache (less for most + * chips) worst case is we take 1MB/650MB/s = 1.5ms. + * + * So 5ms timeout here should be more than sufficient. + */ + nvgpu_timeout_init(g, &timeout, 5, NVGPU_TIMER_CPU_TIMER); + do { int cmgmt1 = ltc_ltc0_ltss_tstg_cmgmt1_r() + ltc * ltc_stride; op_pending = gk20a_readl(g, cmgmt1); - __timeout_check(); - } while (op_pending & - ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f()); + } while ((op_pending & + ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f()) && + !nvgpu_timeout_expired_msg(&timeout, + "L2 flush timeout!")); } /* And invalidate. */ @@ -339,14 +344,18 @@ void gm20b_flush_ltc(struct gk20a *g) /* Wait on each LTC individually. */ for (ltc = 0; ltc < g->ltc_count; ltc++) { u32 op_pending; - __timeout_init(); + + /* Again, 5ms. */ + nvgpu_timeout_init(g, &timeout, 5, NVGPU_TIMER_CPU_TIMER); + do { int cmgmt0 = ltc_ltc0_ltss_tstg_cmgmt0_r() + ltc * ltc_stride; op_pending = gk20a_readl(g, cmgmt0); - __timeout_check(); - } while (op_pending & - ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f()); + } while ((op_pending & + ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f()) && + !nvgpu_timeout_expired_msg(&timeout, + "L2 flush timeout!")); } } diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c index ca8fbaee..8f5d1e10 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c @@ -1,7 +1,7 @@ /* * GM20B MMU * - * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -20,6 +20,8 @@ #include "mm_gm20b.h" +#include + #include #include #include @@ -27,28 +29,23 @@ #include static int gm20b_mm_mmu_vpr_info_fetch_wait(struct gk20a *g, - const unsigned int msec) + unsigned int msec) { - unsigned long timeout; + struct nvgpu_timeout timeout; - if (tegra_platform_is_silicon()) - timeout = jiffies + msecs_to_jiffies(msec); - else - timeout = msecs_to_jiffies(msec); + nvgpu_timeout_init(g, &timeout, msec, NVGPU_TIMER_CPU_TIMER); - while (1) { + do { u32 val; + val = gk20a_readl(g, fb_mmu_vpr_info_r()); if (fb_mmu_vpr_info_fetch_v(val) == - fb_mmu_vpr_info_fetch_false_v()) - break; - if (tegra_platform_is_silicon()) { - if (WARN_ON(time_after(jiffies, timeout))) - return -ETIME; - } else if (--timeout == 0) - return -ETIME; - } - return 0; + fb_mmu_vpr_info_fetch_false_v()) + return 0; + + } while (!nvgpu_timeout_expired(&timeout)); + + return -ETIMEDOUT; } int gm20b_mm_mmu_vpr_info_fetch(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c index 2e568e83..4b87b877 100644 --- a/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c @@ -22,6 +22,8 @@ #include "acr_gm20b.h" #include "pmu_gm20b.h" +#include + #include #include #include @@ -173,21 +175,24 @@ void pmu_handle_fecs_boot_acr_msg(struct gk20a *g, struct pmu_msg *msg, gk20a_dbg_fn("done"); } -static int pmu_gm20b_ctx_wait_lsf_ready(struct gk20a *g, u32 timeout, u32 val) +static int pmu_gm20b_ctx_wait_lsf_ready(struct gk20a *g, u32 timeout_ms, + u32 val) { - unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout); unsigned long delay = GR_FECS_POLL_INTERVAL; u32 reg; + struct nvgpu_timeout timeout; gk20a_dbg_fn(""); reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0)); + + nvgpu_timeout_init(g, &timeout, (int)timeout_ms, NVGPU_TIMER_CPU_TIMER); + do { reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0)); if (reg == val) return 0; udelay(delay); - } while (time_before(jiffies, end_jiffies) || - !tegra_platform_is_silicon()); + } while (!nvgpu_timeout_expired(&timeout)); return -ETIMEDOUT; } -- cgit v1.2.2