gpu: nvgpu: Use timer API in gm20b code

Use the timer API instead of Linux specific APIs for handling timeouts. Also, lower the L2 timeout from 1 second (absurdly long) to 5ms. Bug 1799159 Change-Id: I27dbc35b12e9bc22ff2207bb87543f76203e20f1 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1273825 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Alex Waterman <alexw@nvidia.com> 2016-12-19 18:23:01 -0500
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-01-18 19:46:38 -0500
commit: 8e53d790902b8a40098a5851584ae7ba58b357b6 (patch)
tree: 48fd2c6b26ac3137dd2dfe5255cc04f24bcc8834 /drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
parent: 6e2237ef622113b8fa1149aa48988a99fa30594f (diff)
1 files changed, 41 insertions, 32 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
index 5b97b388..3324d3df 100644
--- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
@@ -1,7 +1,7 @@
 /*
 * GM20B L2
 *
- * Copyright (c) 2014-2016 NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017 NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -14,11 +14,12 @@
 */
 #include <linux/types.h>
-#include <linux/jiffies.h>
 #include <trace/events/gk20a.h>
 #include "gk20a/gk20a.h"
+#include <nvgpu/timers.h>
 #include <nvgpu/hw/gm20b/hw_mc_gm20b.h>
 #include <nvgpu/hw/gm20b/hw_ltc_gm20b.h>
 #include <nvgpu/hw/gm20b/hw_top_gm20b.h>
@@ -103,10 +104,10 @@ static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
 int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
                       u32 min, u32 max)
 {
-        int err = 0;
        struct gr_gk20a *gr = &g->gr;
+        struct nvgpu_timeout timeout;
+        int err = 0;
        u32 ltc, slice, ctrl1, val, hw_op = 0;
-        s32 retry = 200;
        u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(
                                gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
        u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
@@ -143,18 +144,16 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
                        ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
                                ltc * ltc_stride + slice * lts_stride;
-                        retry = 200;
+                        nvgpu_timeout_init(g, &timeout, 200,
+                                           NVGPU_TIMER_RETRY_TIMER);
                        do {
                                val = gk20a_readl(g, ctrl1);
                                if (!(val & hw_op))
                                        break;
-                                retry--;
                                udelay(5);
+                        } while (!nvgpu_timeout_expired(&timeout));
-                        } while (retry >= 0 ||
+                        if (nvgpu_timeout_peek_expired(&timeout)) {
-                                        !tegra_platform_is_silicon());
-                        if (retry < 0 && tegra_platform_is_silicon()) {
                                gk20a_err(dev_from_gk20a(g),
                                           "comp tag clear timeout\n");
                                err = -EBUSY;
@@ -288,23 +287,10 @@ u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base)
 */
 void gm20b_flush_ltc(struct gk20a *g)
 {
-        unsigned long timeout;
+        struct nvgpu_timeout timeout;
        unsigned int ltc;
        u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
-#define __timeout_init()                                \
-        do {                                            \
-                timeout = jiffies + HZ;                 \
-        } while (0)
-#define __timeout_check()                                               \
-        do {                                                            \
-                if (tegra_platform_is_silicon() &&                      \
-                    time_after(jiffies, timeout)) {                     \
-                        gk20a_err(dev_from_gk20a(g), "L2 flush timeout!"); \
-                        break;                                          \
-                }                                                       \
-        } while (0)
        /* Clean... */
        gk20a_writel(g, ltc_ltcs_ltss_tstg_cmgmt1_r(),
                ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_f() |
@@ -318,14 +304,33 @@ void gm20b_flush_ltc(struct gk20a *g)
        for (ltc = 0; ltc < g->ltc_count; ltc++) {
                u32 op_pending;
-                __timeout_init();
+                /*
+                 * Use 5ms - this should be sufficient time to flush the cache.
+                 * On tegra, rough EMC BW available for old tegra chips (newer
+                 * chips are strictly faster) can be estimated as follows:
+                 *
+                 * Lowest reasonable EMC clock speed will be around 102MHz on
+                 * t124 for display enabled boards and generally fixed to max
+                 * for non-display boards (since they are generally plugged in).
+                 *
+                 * Thus, the available BW is 64b * 2 * 102MHz = 1.3GB/s. Of that
+                 * BW the GPU will likely get about half (display and overhead/
+                 * utilization inefficiency eating the rest) so 650MB/s at
+                 * worst. Assuming at most 1MB of GPU L2 cache (less for most
+                 * chips) worst case is we take 1MB/650MB/s = 1.5ms.
+                 *
+                 * So 5ms timeout here should be more than sufficient.
+                 */
+                nvgpu_timeout_init(g, &timeout, 5, NVGPU_TIMER_CPU_TIMER);
                do {
                        int cmgmt1 = ltc_ltc0_ltss_tstg_cmgmt1_r() +
                                     ltc * ltc_stride;
                        op_pending = gk20a_readl(g, cmgmt1);
-                        __timeout_check();
+                } while ((op_pending &
-                } while (op_pending &
+                          ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f()) &&
-                         ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f());
+                         !nvgpu_timeout_expired_msg(&timeout,
+                                                    "L2 flush timeout!"));
        }
        /* And invalidate. */
@@ -339,14 +344,18 @@ void gm20b_flush_ltc(struct gk20a *g)
        /* Wait on each LTC individually. */
        for (ltc = 0; ltc < g->ltc_count; ltc++) {
                u32 op_pending;
-                __timeout_init();
+                /* Again, 5ms. */
+                nvgpu_timeout_init(g, &timeout, 5, NVGPU_TIMER_CPU_TIMER);
                do {
                        int cmgmt0 = ltc_ltc0_ltss_tstg_cmgmt0_r() +
                                     ltc * ltc_stride;
                        op_pending = gk20a_readl(g, cmgmt0);
-                        __timeout_check();
+                } while ((op_pending &
-                } while (op_pending &
+                          ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f()) &&
-                         ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f());
+                         !nvgpu_timeout_expired_msg(&timeout,
+                                                    "L2 flush timeout!"));
        }
 }
author	Alex Waterman <alexw@nvidia.com>	2016-12-19 18:23:01 -0500
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-01-18 19:46:38 -0500
commit	8e53d790902b8a40098a5851584ae7ba58b357b6 (patch)
tree	48fd2c6b26ac3137dd2dfe5255cc04f24bcc8834 /drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
parent	6e2237ef622113b8fa1149aa48988a99fa30594f (diff)

diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c index 5b97b388..3324d3df 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
@@ -1,7 +1,7 @@
1	/*	1	/*
2	* GM20B L2	2	* GM20B L2
3	*	3	*
4	* Copyright (c) 2014-2016 NVIDIA CORPORATION. All rights reserved.	4	* Copyright (c) 2014-2017 NVIDIA CORPORATION. All rights reserved.
5	*	5	*
6	* This program is free software; you can redistribute it and/or modify it	6	* This program is free software; you can redistribute it and/or modify it
7	* under the terms and conditions of the GNU General Public License,	7	* under the terms and conditions of the GNU General Public License,
@@ -14,11 +14,12 @@
14	*/	14	*/
15		15
16	#include <linux/types.h>	16	#include <linux/types.h>
17	#include <linux/jiffies.h>
18	#include <trace/events/gk20a.h>	17	#include <trace/events/gk20a.h>
19		18
20	#include "gk20a/gk20a.h"	19	#include "gk20a/gk20a.h"
21		20
		21	#include <nvgpu/timers.h>
		22
22	#include <nvgpu/hw/gm20b/hw_mc_gm20b.h>	23	#include <nvgpu/hw/gm20b/hw_mc_gm20b.h>
23	#include <nvgpu/hw/gm20b/hw_ltc_gm20b.h>	24	#include <nvgpu/hw/gm20b/hw_ltc_gm20b.h>
24	#include <nvgpu/hw/gm20b/hw_top_gm20b.h>	25	#include <nvgpu/hw/gm20b/hw_top_gm20b.h>
@@ -103,10 +104,10 @@ static int gm20b_ltc_init_comptags(struct gk20a g, struct gr_gk20a gr)
103	int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,	104	int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
104	u32 min, u32 max)	105	u32 min, u32 max)
105	{	106	{
106	int err = 0;
107	struct gr_gk20a *gr = &g->gr;	107	struct gr_gk20a *gr = &g->gr;
		108	struct nvgpu_timeout timeout;
		109	int err = 0;
108	u32 ltc, slice, ctrl1, val, hw_op = 0;	110	u32 ltc, slice, ctrl1, val, hw_op = 0;
109	s32 retry = 200;
110	u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(	111	u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(
111	gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));	112	gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
112	u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);	113	u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
@@ -143,18 +144,16 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
143	ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +	144	ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
144	ltc * ltc_stride + slice * lts_stride;	145	ltc * ltc_stride + slice * lts_stride;
145		146
146	retry = 200;	147	nvgpu_timeout_init(g, &timeout, 200,
		148	NVGPU_TIMER_RETRY_TIMER);
147	do {	149	do {
148	val = gk20a_readl(g, ctrl1);	150	val = gk20a_readl(g, ctrl1);
149	if (!(val & hw_op))	151	if (!(val & hw_op))
150	break;	152	break;
151	retry--;
152	udelay(5);	153	udelay(5);
		154	} while (!nvgpu_timeout_expired(&timeout));
153		155
154	} while (retry >= 0 \|\|	156	if (nvgpu_timeout_peek_expired(&timeout)) {
155	!tegra_platform_is_silicon());
156
157	if (retry < 0 && tegra_platform_is_silicon()) {
158	gk20a_err(dev_from_gk20a(g),	157	gk20a_err(dev_from_gk20a(g),
159	"comp tag clear timeout\n");	158	"comp tag clear timeout\n");
160	err = -EBUSY;	159	err = -EBUSY;
@@ -288,23 +287,10 @@ u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base)
288	*/	287	*/
289	void gm20b_flush_ltc(struct gk20a *g)	288	void gm20b_flush_ltc(struct gk20a *g)
290	{	289	{
291	unsigned long timeout;	290	struct nvgpu_timeout timeout;
292	unsigned int ltc;	291	unsigned int ltc;
293	u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);	292	u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
294		293
295	#define __timeout_init() \
296	do { \
297	timeout = jiffies + HZ; \
298	} while (0)
299	#define __timeout_check() \
300	do { \
301	if (tegra_platform_is_silicon() && \
302	time_after(jiffies, timeout)) { \
303	gk20a_err(dev_from_gk20a(g), "L2 flush timeout!"); \
304	break; \
305	} \
306	} while (0)
307
308	/* Clean... */	294	/* Clean... */
309	gk20a_writel(g, ltc_ltcs_ltss_tstg_cmgmt1_r(),	295	gk20a_writel(g, ltc_ltcs_ltss_tstg_cmgmt1_r(),
310	ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_f() \|	296	ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_f() \|
@@ -318,14 +304,33 @@ void gm20b_flush_ltc(struct gk20a *g)
318	for (ltc = 0; ltc < g->ltc_count; ltc++) {	304	for (ltc = 0; ltc < g->ltc_count; ltc++) {
319	u32 op_pending;	305	u32 op_pending;
320		306
321	__timeout_init();	307	/*
		308	* Use 5ms - this should be sufficient time to flush the cache.
		309	* On tegra, rough EMC BW available for old tegra chips (newer
		310	* chips are strictly faster) can be estimated as follows:
		311	*
		312	* Lowest reasonable EMC clock speed will be around 102MHz on
		313	* t124 for display enabled boards and generally fixed to max
		314	* for non-display boards (since they are generally plugged in).
		315	*
		316	* Thus, the available BW is 64b * 2 * 102MHz = 1.3GB/s. Of that
		317	* BW the GPU will likely get about half (display and overhead/
		318	* utilization inefficiency eating the rest) so 650MB/s at
		319	* worst. Assuming at most 1MB of GPU L2 cache (less for most
		320	* chips) worst case is we take 1MB/650MB/s = 1.5ms.
		321	*
		322	* So 5ms timeout here should be more than sufficient.
		323	*/
		324	nvgpu_timeout_init(g, &timeout, 5, NVGPU_TIMER_CPU_TIMER);
		325
322	do {	326	do {
323	int cmgmt1 = ltc_ltc0_ltss_tstg_cmgmt1_r() +	327	int cmgmt1 = ltc_ltc0_ltss_tstg_cmgmt1_r() +
324	ltc * ltc_stride;	328	ltc * ltc_stride;
325	op_pending = gk20a_readl(g, cmgmt1);	329	op_pending = gk20a_readl(g, cmgmt1);
326	__timeout_check();	330	} while ((op_pending &
327	} while (op_pending &	331	ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f()) &&
328	ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f());	332	!nvgpu_timeout_expired_msg(&timeout,
		333	"L2 flush timeout!"));
329	}	334	}
330		335
331	/* And invalidate. */	336	/* And invalidate. */
@@ -339,14 +344,18 @@ void gm20b_flush_ltc(struct gk20a *g)
339	/* Wait on each LTC individually. */	344	/* Wait on each LTC individually. */
340	for (ltc = 0; ltc < g->ltc_count; ltc++) {	345	for (ltc = 0; ltc < g->ltc_count; ltc++) {
341	u32 op_pending;	346	u32 op_pending;
342	__timeout_init();	347
		348	/* Again, 5ms. */
		349	nvgpu_timeout_init(g, &timeout, 5, NVGPU_TIMER_CPU_TIMER);
		350
343	do {	351	do {
344	int cmgmt0 = ltc_ltc0_ltss_tstg_cmgmt0_r() +	352	int cmgmt0 = ltc_ltc0_ltss_tstg_cmgmt0_r() +
345	ltc * ltc_stride;	353	ltc * ltc_stride;
346	op_pending = gk20a_readl(g, cmgmt0);	354	op_pending = gk20a_readl(g, cmgmt0);
347	__timeout_check();	355	} while ((op_pending &
348	} while (op_pending &	356	ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f()) &&
349	ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f());	357	!nvgpu_timeout_expired_msg(&timeout,
		358	"L2 flush timeout!"));
350	}	359	}
351	}	360	}
352		361