summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2016-12-19 18:23:01 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2017-01-18 19:46:38 -0500
commit8e53d790902b8a40098a5851584ae7ba58b357b6 (patch)
tree48fd2c6b26ac3137dd2dfe5255cc04f24bcc8834 /drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
parent6e2237ef622113b8fa1149aa48988a99fa30594f (diff)
gpu: nvgpu: Use timer API in gm20b code
Use the timer API instead of Linux specific APIs for handling timeouts. Also, lower the L2 timeout from 1 second (absurdly long) to 5ms. Bug 1799159 Change-Id: I27dbc35b12e9bc22ff2207bb87543f76203e20f1 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1273825 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b/ltc_gm20b.c')
-rw-r--r--drivers/gpu/nvgpu/gm20b/ltc_gm20b.c73
1 files changed, 41 insertions, 32 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
index 5b97b388..3324d3df 100644
--- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * GM20B L2 2 * GM20B L2
3 * 3 *
4 * Copyright (c) 2014-2016 NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2014-2017 NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -14,11 +14,12 @@
14 */ 14 */
15 15
16#include <linux/types.h> 16#include <linux/types.h>
17#include <linux/jiffies.h>
18#include <trace/events/gk20a.h> 17#include <trace/events/gk20a.h>
19 18
20#include "gk20a/gk20a.h" 19#include "gk20a/gk20a.h"
21 20
21#include <nvgpu/timers.h>
22
22#include <nvgpu/hw/gm20b/hw_mc_gm20b.h> 23#include <nvgpu/hw/gm20b/hw_mc_gm20b.h>
23#include <nvgpu/hw/gm20b/hw_ltc_gm20b.h> 24#include <nvgpu/hw/gm20b/hw_ltc_gm20b.h>
24#include <nvgpu/hw/gm20b/hw_top_gm20b.h> 25#include <nvgpu/hw/gm20b/hw_top_gm20b.h>
@@ -103,10 +104,10 @@ static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
103int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, 104int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
104 u32 min, u32 max) 105 u32 min, u32 max)
105{ 106{
106 int err = 0;
107 struct gr_gk20a *gr = &g->gr; 107 struct gr_gk20a *gr = &g->gr;
108 struct nvgpu_timeout timeout;
109 int err = 0;
108 u32 ltc, slice, ctrl1, val, hw_op = 0; 110 u32 ltc, slice, ctrl1, val, hw_op = 0;
109 s32 retry = 200;
110 u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( 111 u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(
111 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); 112 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
112 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); 113 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
@@ -143,18 +144,16 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
143 ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + 144 ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
144 ltc * ltc_stride + slice * lts_stride; 145 ltc * ltc_stride + slice * lts_stride;
145 146
146 retry = 200; 147 nvgpu_timeout_init(g, &timeout, 200,
148 NVGPU_TIMER_RETRY_TIMER);
147 do { 149 do {
148 val = gk20a_readl(g, ctrl1); 150 val = gk20a_readl(g, ctrl1);
149 if (!(val & hw_op)) 151 if (!(val & hw_op))
150 break; 152 break;
151 retry--;
152 udelay(5); 153 udelay(5);
154 } while (!nvgpu_timeout_expired(&timeout));
153 155
154 } while (retry >= 0 || 156 if (nvgpu_timeout_peek_expired(&timeout)) {
155 !tegra_platform_is_silicon());
156
157 if (retry < 0 && tegra_platform_is_silicon()) {
158 gk20a_err(dev_from_gk20a(g), 157 gk20a_err(dev_from_gk20a(g),
159 "comp tag clear timeout\n"); 158 "comp tag clear timeout\n");
160 err = -EBUSY; 159 err = -EBUSY;
@@ -288,23 +287,10 @@ u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base)
288 */ 287 */
289void gm20b_flush_ltc(struct gk20a *g) 288void gm20b_flush_ltc(struct gk20a *g)
290{ 289{
291 unsigned long timeout; 290 struct nvgpu_timeout timeout;
292 unsigned int ltc; 291 unsigned int ltc;
293 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); 292 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
294 293
295#define __timeout_init() \
296 do { \
297 timeout = jiffies + HZ; \
298 } while (0)
299#define __timeout_check() \
300 do { \
301 if (tegra_platform_is_silicon() && \
302 time_after(jiffies, timeout)) { \
303 gk20a_err(dev_from_gk20a(g), "L2 flush timeout!"); \
304 break; \
305 } \
306 } while (0)
307
308 /* Clean... */ 294 /* Clean... */
309 gk20a_writel(g, ltc_ltcs_ltss_tstg_cmgmt1_r(), 295 gk20a_writel(g, ltc_ltcs_ltss_tstg_cmgmt1_r(),
310 ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_f() | 296 ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_f() |
@@ -318,14 +304,33 @@ void gm20b_flush_ltc(struct gk20a *g)
318 for (ltc = 0; ltc < g->ltc_count; ltc++) { 304 for (ltc = 0; ltc < g->ltc_count; ltc++) {
319 u32 op_pending; 305 u32 op_pending;
320 306
321 __timeout_init(); 307 /*
308 * Use 5ms - this should be sufficient time to flush the cache.
309 * On tegra, rough EMC BW available for old tegra chips (newer
310 * chips are strictly faster) can be estimated as follows:
311 *
312 * Lowest reasonable EMC clock speed will be around 102MHz on
313 * t124 for display enabled boards and generally fixed to max
314 * for non-display boards (since they are generally plugged in).
315 *
316 * Thus, the available BW is 64b * 2 * 102MHz = 1.3GB/s. Of that
317 * BW the GPU will likely get about half (display and overhead/
318 * utilization inefficiency eating the rest) so 650MB/s at
319 * worst. Assuming at most 1MB of GPU L2 cache (less for most
320 * chips) worst case is we take 1MB/650MB/s = 1.5ms.
321 *
322 * So 5ms timeout here should be more than sufficient.
323 */
324 nvgpu_timeout_init(g, &timeout, 5, NVGPU_TIMER_CPU_TIMER);
325
322 do { 326 do {
323 int cmgmt1 = ltc_ltc0_ltss_tstg_cmgmt1_r() + 327 int cmgmt1 = ltc_ltc0_ltss_tstg_cmgmt1_r() +
324 ltc * ltc_stride; 328 ltc * ltc_stride;
325 op_pending = gk20a_readl(g, cmgmt1); 329 op_pending = gk20a_readl(g, cmgmt1);
326 __timeout_check(); 330 } while ((op_pending &
327 } while (op_pending & 331 ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f()) &&
328 ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f()); 332 !nvgpu_timeout_expired_msg(&timeout,
333 "L2 flush timeout!"));
329 } 334 }
330 335
331 /* And invalidate. */ 336 /* And invalidate. */
@@ -339,14 +344,18 @@ void gm20b_flush_ltc(struct gk20a *g)
339 /* Wait on each LTC individually. */ 344 /* Wait on each LTC individually. */
340 for (ltc = 0; ltc < g->ltc_count; ltc++) { 345 for (ltc = 0; ltc < g->ltc_count; ltc++) {
341 u32 op_pending; 346 u32 op_pending;
342 __timeout_init(); 347
348 /* Again, 5ms. */
349 nvgpu_timeout_init(g, &timeout, 5, NVGPU_TIMER_CPU_TIMER);
350
343 do { 351 do {
344 int cmgmt0 = ltc_ltc0_ltss_tstg_cmgmt0_r() + 352 int cmgmt0 = ltc_ltc0_ltss_tstg_cmgmt0_r() +
345 ltc * ltc_stride; 353 ltc * ltc_stride;
346 op_pending = gk20a_readl(g, cmgmt0); 354 op_pending = gk20a_readl(g, cmgmt0);
347 __timeout_check(); 355 } while ((op_pending &
348 } while (op_pending & 356 ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f()) &&
349 ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f()); 357 !nvgpu_timeout_expired_msg(&timeout,
358 "L2 flush timeout!"));
350 } 359 }
351} 360}
352 361