From 8e53d790902b8a40098a5851584ae7ba58b357b6 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Mon, 19 Dec 2016 15:23:01 -0800
Subject: gpu: nvgpu: Use timer API in gm20b code

Use the timer API instead of Linux specific APIs for handling
timeouts.

Also, lower the L2 timeout from 1 second (absurdly long) to 5ms.

Bug 1799159

Change-Id: I27dbc35b12e9bc22ff2207bb87543f76203e20f1
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1273825
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gm20b/acr_gm20b.c  | 72 +++++++++++++++++++----------------
 drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | 11 ++++--
 drivers/gpu/nvgpu/gm20b/ltc_gm20b.c  | 73 ++++++++++++++++++++----------------
 drivers/gpu/nvgpu/gm20b/mm_gm20b.c   | 31 +++++++--------
 drivers/gpu/nvgpu/gm20b/pmu_gm20b.c  | 13 +++++--
 5 files changed, 111 insertions(+), 89 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
index f4311ee9..e47bc773 100644
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -18,10 +18,13 @@
 #include <linux/debugfs.h>
 #include <linux/dma-mapping.h>
 #include <linux/io.h>
-#include "nvgpu_common.h"
 
 #include <linux/platform/tegra/mc.h>
 
+#include <nvgpu/timers.h>
+
+#include "nvgpu_common.h"
+
 #include "gk20a/gk20a.h"
 #include "gk20a/pmu_gk20a.h"
 #include "gk20a/semaphore_gk20a.h"
@@ -1476,64 +1479,69 @@ err_done:
 /*!
 *	Wait for PMU to halt
 *	@param[in]	g		GPU object pointer
-*	@param[in]	timeout		Timeout in msec for PMU to halt
+*	@param[in]	timeout_ms	Timeout in msec for PMU to halt
 *	@return '0' if PMU halts
 */
-static int pmu_wait_for_halt(struct gk20a *g, unsigned int timeout)
+static int pmu_wait_for_halt(struct gk20a *g, unsigned int timeout_ms)
 {
 	u32 data = 0;
-	int completion = -EBUSY;
-	unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
+	int ret = -EBUSY;
+	struct nvgpu_timeout timeout;
+
+	nvgpu_timeout_init(g, &timeout, timeout_ms, NVGPU_TIMER_CPU_TIMER);
 
-	while (time_before(jiffies, end_jiffies) ||
-			!tegra_platform_is_silicon()) {
+	do {
 		data = gk20a_readl(g, pwr_falcon_cpuctl_r());
 		if (data & pwr_falcon_cpuctl_halt_intr_m()) {
-			/*CPU is halted break*/
-			completion = 0;
+			/* CPU is halted break */
+			ret = 0;
 			break;
 		}
 		udelay(1);
-	}
-	if (completion)
+	} while (!nvgpu_timeout_expired(&timeout));
+
+	if (ret) {
 		gk20a_err(dev_from_gk20a(g), "ACR boot timed out");
-	else {
-		g->acr.capabilities = gk20a_readl(g, pwr_falcon_mailbox1_r());
-		gm20b_dbg_pmu("ACR capabilities %x\n", g->acr.capabilities);
-		data = gk20a_readl(g, pwr_falcon_mailbox0_r());
-		if (data) {
-			gk20a_err(dev_from_gk20a(g),
-				"ACR boot failed, err %x", data);
-			completion = -EAGAIN;
-		}
+		return ret;
+	}
+
+	g->acr.capabilities = gk20a_readl(g, pwr_falcon_mailbox1_r());
+	gm20b_dbg_pmu("ACR capabilities %x\n", g->acr.capabilities);
+	data = gk20a_readl(g, pwr_falcon_mailbox0_r());
+	if (data) {
+		gk20a_err(dev_from_gk20a(g),
+			  "ACR boot failed, err %x", data);
+		ret = -EAGAIN;
 	}
-	return completion;
+
+	return ret;
 }
 
 /*!
 *	Wait for PMU halt interrupt status to be cleared
 *	@param[in]	g		GPU object pointer
-*	@param[in]	timeout_us	Timeout in msec for halt to clear
+*	@param[in]	timeout_ms	Timeout in msec for halt to clear
 *	@return '0' if PMU halt irq status is clear
 */
-static int clear_halt_interrupt_status(struct gk20a *g, unsigned int timeout)
+static int clear_halt_interrupt_status(struct gk20a *g, unsigned int timeout_ms)
 {
 	u32 data = 0;
-	unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
+	struct nvgpu_timeout timeout;
+
+	nvgpu_timeout_init(g, &timeout, timeout_ms, NVGPU_TIMER_CPU_TIMER);
 
-	while (time_before(jiffies, end_jiffies) ||
-			!tegra_platform_is_silicon()) {
+	do {
 		gk20a_writel(g, pwr_falcon_irqsclr_r(),
 			     gk20a_readl(g, pwr_falcon_irqsclr_r()) | (0x10));
 		data = gk20a_readl(g, (pwr_falcon_irqstat_r()));
+
 		if ((data & pwr_falcon_irqstat_halt_true_f()) !=
 			pwr_falcon_irqstat_halt_true_f())
 			/*halt irq is clear*/
-			break;
-		timeout--;
+			return 0;
+
 		udelay(1);
-	}
-	if (timeout == 0)
-		return -EBUSY;
-	return 0;
+	} while (!nvgpu_timeout_expired(&timeout));
+
+	return -ETIMEDOUT;
 }
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
index 6be6be04..bd94a54b 100644
--- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
@@ -20,6 +20,8 @@
 
 #include "fifo_gm20b.h"
 
+#include <nvgpu/timers.h>
+
 #include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h>
 #include <nvgpu/hw/gm20b/hw_ram_gm20b.h>
 #include <nvgpu/hw/gm20b/hw_fifo_gm20b.h>
@@ -69,11 +71,10 @@ static inline u32 gm20b_engine_id_to_mmu_id(struct gk20a *g, u32 engine_id)
 static void gm20b_fifo_trigger_mmu_fault(struct gk20a *g,
 		unsigned long engine_ids)
 {
-	unsigned long end_jiffies = jiffies +
-		msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
 	unsigned long delay = GR_IDLE_CHECK_DEFAULT;
 	unsigned long engine_id;
 	int ret = -EBUSY;
+	struct nvgpu_timeout timeout;
 
 	/* trigger faults for all bad engines */
 	for_each_set_bit(engine_id, &engine_ids, 32) {
@@ -89,6 +90,9 @@ static void gm20b_fifo_trigger_mmu_fault(struct gk20a *g,
 		}
 	}
 
+	nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
+			   NVGPU_TIMER_CPU_TIMER);
+
 	/* Wait for MMU fault to trigger */
 	do {
 		if (gk20a_readl(g, fifo_intr_0_r()) &
@@ -99,8 +103,7 @@ static void gm20b_fifo_trigger_mmu_fault(struct gk20a *g,
 
 		usleep_range(delay, delay * 2);
 		delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
-	} while (time_before(jiffies, end_jiffies) ||
-			!tegra_platform_is_silicon());
+	} while (!nvgpu_timeout_expired(&timeout));
 
 	if (ret)
 		gk20a_err(dev_from_gk20a(g), "mmu fault timeout");
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
index 5b97b388..3324d3df 100644
--- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
@@ -1,7 +1,7 @@
 /*
  * GM20B L2
  *
- * Copyright (c) 2014-2016 NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017 NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -14,11 +14,12 @@
  */
 
 #include <linux/types.h>
-#include <linux/jiffies.h>
 #include <trace/events/gk20a.h>
 
 #include "gk20a/gk20a.h"
 
+#include <nvgpu/timers.h>
+
 #include <nvgpu/hw/gm20b/hw_mc_gm20b.h>
 #include <nvgpu/hw/gm20b/hw_ltc_gm20b.h>
 #include <nvgpu/hw/gm20b/hw_top_gm20b.h>
@@ -103,10 +104,10 @@ static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
 int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
 		       u32 min, u32 max)
 {
-	int err = 0;
 	struct gr_gk20a *gr = &g->gr;
+	struct nvgpu_timeout timeout;
+	int err = 0;
 	u32 ltc, slice, ctrl1, val, hw_op = 0;
-	s32 retry = 200;
 	u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(
 				gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
 	u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
@@ -143,18 +144,16 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
 			ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
 				ltc * ltc_stride + slice * lts_stride;
 
-			retry = 200;
+			nvgpu_timeout_init(g, &timeout, 200,
+					   NVGPU_TIMER_RETRY_TIMER);
 			do {
 				val = gk20a_readl(g, ctrl1);
 				if (!(val & hw_op))
 					break;
-				retry--;
 				udelay(5);
+			} while (!nvgpu_timeout_expired(&timeout));
 
-			} while (retry >= 0 ||
-					!tegra_platform_is_silicon());
-
-			if (retry < 0 && tegra_platform_is_silicon()) {
+			if (nvgpu_timeout_peek_expired(&timeout)) {
 				gk20a_err(dev_from_gk20a(g),
 					   "comp tag clear timeout\n");
 				err = -EBUSY;
@@ -288,23 +287,10 @@ u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base)
  */
 void gm20b_flush_ltc(struct gk20a *g)
 {
-	unsigned long timeout;
+	struct nvgpu_timeout timeout;
 	unsigned int ltc;
 	u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
 
-#define __timeout_init()				\
-	do {						\
-		timeout = jiffies + HZ;			\
-	} while (0)
-#define __timeout_check()						\
-	do {								\
-		if (tegra_platform_is_silicon() &&			\
-		    time_after(jiffies, timeout)) {			\
-			gk20a_err(dev_from_gk20a(g), "L2 flush timeout!"); \
-			break;						\
-		}							\
-	} while (0)
-
 	/* Clean... */
 	gk20a_writel(g, ltc_ltcs_ltss_tstg_cmgmt1_r(),
 		ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_f() |
@@ -318,14 +304,33 @@ void gm20b_flush_ltc(struct gk20a *g)
 	for (ltc = 0; ltc < g->ltc_count; ltc++) {
 		u32 op_pending;
 
-		__timeout_init();
+		/*
+		 * Use 5ms - this should be sufficient time to flush the cache.
+		 * On tegra, rough EMC BW available for old tegra chips (newer
+		 * chips are strictly faster) can be estimated as follows:
+		 *
+		 * Lowest reasonable EMC clock speed will be around 102MHz on
+		 * t124 for display enabled boards and generally fixed to max
+		 * for non-display boards (since they are generally plugged in).
+		 *
+		 * Thus, the available BW is 64b * 2 * 102MHz = 1.3GB/s. Of that
+		 * BW the GPU will likely get about half (display and overhead/
+		 * utilization inefficiency eating the rest) so 650MB/s at
+		 * worst. Assuming at most 1MB of GPU L2 cache (less for most
+		 * chips) worst case is we take 1MB/650MB/s = 1.5ms.
+		 *
+		 * So 5ms timeout here should be more than sufficient.
+		 */
+		nvgpu_timeout_init(g, &timeout, 5, NVGPU_TIMER_CPU_TIMER);
+
 		do {
 			int cmgmt1 = ltc_ltc0_ltss_tstg_cmgmt1_r() +
 				     ltc * ltc_stride;
 			op_pending = gk20a_readl(g, cmgmt1);
-			__timeout_check();
-		} while (op_pending &
-			 ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f());
+		} while ((op_pending &
+			  ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f()) &&
+			 !nvgpu_timeout_expired_msg(&timeout,
+						    "L2 flush timeout!"));
 	}
 
 	/* And invalidate. */
@@ -339,14 +344,18 @@ void gm20b_flush_ltc(struct gk20a *g)
 	/* Wait on each LTC individually. */
 	for (ltc = 0; ltc < g->ltc_count; ltc++) {
 		u32 op_pending;
-		__timeout_init();
+
+		/* Again, 5ms. */
+		nvgpu_timeout_init(g, &timeout, 5, NVGPU_TIMER_CPU_TIMER);
+
 		do {
 			int cmgmt0 = ltc_ltc0_ltss_tstg_cmgmt0_r() +
 				     ltc * ltc_stride;
 			op_pending = gk20a_readl(g, cmgmt0);
-			__timeout_check();
-		} while (op_pending &
-			 ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f());
+		} while ((op_pending &
+			  ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f()) &&
+			 !nvgpu_timeout_expired_msg(&timeout,
+						    "L2 flush timeout!"));
 	}
 }
 
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
index ca8fbaee..8f5d1e10 100644
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -1,7 +1,7 @@
 /*
  * GM20B MMU
  *
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -20,6 +20,8 @@
 
 #include "mm_gm20b.h"
 
+#include <nvgpu/timers.h>
+
 #include <nvgpu/hw/gm20b/hw_gmmu_gm20b.h>
 #include <nvgpu/hw/gm20b/hw_fb_gm20b.h>
 #include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
@@ -27,28 +29,23 @@
 #include <nvgpu/hw/gm20b/hw_bus_gm20b.h>
 
 static int gm20b_mm_mmu_vpr_info_fetch_wait(struct gk20a *g,
-		const unsigned int msec)
+					    unsigned int msec)
 {
-	unsigned long timeout;
+	struct nvgpu_timeout timeout;
 
-	if (tegra_platform_is_silicon())
-		timeout = jiffies + msecs_to_jiffies(msec);
-	else
-		timeout = msecs_to_jiffies(msec);
+	nvgpu_timeout_init(g, &timeout, msec, NVGPU_TIMER_CPU_TIMER);
 
-	while (1) {
+	do {
 		u32 val;
+
 		val = gk20a_readl(g, fb_mmu_vpr_info_r());
 		if (fb_mmu_vpr_info_fetch_v(val) ==
-				fb_mmu_vpr_info_fetch_false_v())
-			break;
-		if (tegra_platform_is_silicon()) {
-			if (WARN_ON(time_after(jiffies, timeout)))
-				return -ETIME;
-		} else if (--timeout == 0)
-			return -ETIME;
-	}
-	return 0;
+		    fb_mmu_vpr_info_fetch_false_v())
+			return 0;
+
+	} while (!nvgpu_timeout_expired(&timeout));
+
+	return -ETIMEDOUT;
 }
 
 int gm20b_mm_mmu_vpr_info_fetch(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c
index 2e568e83..4b87b877 100644
--- a/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c
@@ -22,6 +22,8 @@
 #include "acr_gm20b.h"
 #include "pmu_gm20b.h"
 
+#include <nvgpu/timers.h>
+
 #include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
 #include <nvgpu/hw/gm20b/hw_pwr_gm20b.h>
 #include <nvgpu/hw/gm20b/hw_fuse_gm20b.h>
@@ -173,21 +175,24 @@ void pmu_handle_fecs_boot_acr_msg(struct gk20a *g, struct pmu_msg *msg,
 	gk20a_dbg_fn("done");
 }
 
-static int pmu_gm20b_ctx_wait_lsf_ready(struct gk20a *g, u32 timeout, u32 val)
+static int pmu_gm20b_ctx_wait_lsf_ready(struct gk20a *g, u32 timeout_ms,
+					u32 val)
 {
-	unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
 	unsigned long delay = GR_FECS_POLL_INTERVAL;
 	u32 reg;
+	struct nvgpu_timeout timeout;
 
 	gk20a_dbg_fn("");
 	reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
+
+	nvgpu_timeout_init(g, &timeout, (int)timeout_ms, NVGPU_TIMER_CPU_TIMER);
+
 	do {
 		reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
 		if (reg == val)
 			return 0;
 		udelay(delay);
-	} while (time_before(jiffies, end_jiffies) ||
-			!tegra_platform_is_silicon());
+	} while (!nvgpu_timeout_expired(&timeout));
 
 	return -ETIMEDOUT;
 }
-- 
cgit v1.2.2