diff options
author | Alex Waterman <alexw@nvidia.com> | 2016-12-19 18:23:01 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-01-18 19:46:38 -0500 |
commit | 8e53d790902b8a40098a5851584ae7ba58b357b6 (patch) | |
tree | 48fd2c6b26ac3137dd2dfe5255cc04f24bcc8834 /drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | |
parent | 6e2237ef622113b8fa1149aa48988a99fa30594f (diff) |
gpu: nvgpu: Use timer API in gm20b code
Use the timer API instead of Linux specific APIs for handling
timeouts.
Also, lower the L2 timeout from 1 second (absurdly long) to 5ms.
Bug 1799159
Change-Id: I27dbc35b12e9bc22ff2207bb87543f76203e20f1
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1273825
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b/ltc_gm20b.c')
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | 73 |
1 files changed, 41 insertions, 32 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c index 5b97b388..3324d3df 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * GM20B L2 | 2 | * GM20B L2 |
3 | * | 3 | * |
4 | * Copyright (c) 2014-2016 NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2014-2017 NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -14,11 +14,12 @@ | |||
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include <linux/types.h> | 16 | #include <linux/types.h> |
17 | #include <linux/jiffies.h> | ||
18 | #include <trace/events/gk20a.h> | 17 | #include <trace/events/gk20a.h> |
19 | 18 | ||
20 | #include "gk20a/gk20a.h" | 19 | #include "gk20a/gk20a.h" |
21 | 20 | ||
21 | #include <nvgpu/timers.h> | ||
22 | |||
22 | #include <nvgpu/hw/gm20b/hw_mc_gm20b.h> | 23 | #include <nvgpu/hw/gm20b/hw_mc_gm20b.h> |
23 | #include <nvgpu/hw/gm20b/hw_ltc_gm20b.h> | 24 | #include <nvgpu/hw/gm20b/hw_ltc_gm20b.h> |
24 | #include <nvgpu/hw/gm20b/hw_top_gm20b.h> | 25 | #include <nvgpu/hw/gm20b/hw_top_gm20b.h> |
@@ -103,10 +104,10 @@ static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) | |||
103 | int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | 104 | int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, |
104 | u32 min, u32 max) | 105 | u32 min, u32 max) |
105 | { | 106 | { |
106 | int err = 0; | ||
107 | struct gr_gk20a *gr = &g->gr; | 107 | struct gr_gk20a *gr = &g->gr; |
108 | struct nvgpu_timeout timeout; | ||
109 | int err = 0; | ||
108 | u32 ltc, slice, ctrl1, val, hw_op = 0; | 110 | u32 ltc, slice, ctrl1, val, hw_op = 0; |
109 | s32 retry = 200; | ||
110 | u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( | 111 | u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( |
111 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); | 112 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); |
112 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | 113 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); |
@@ -143,18 +144,16 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | |||
143 | ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + | 144 | ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + |
144 | ltc * ltc_stride + slice * lts_stride; | 145 | ltc * ltc_stride + slice * lts_stride; |
145 | 146 | ||
146 | retry = 200; | 147 | nvgpu_timeout_init(g, &timeout, 200, |
148 | NVGPU_TIMER_RETRY_TIMER); | ||
147 | do { | 149 | do { |
148 | val = gk20a_readl(g, ctrl1); | 150 | val = gk20a_readl(g, ctrl1); |
149 | if (!(val & hw_op)) | 151 | if (!(val & hw_op)) |
150 | break; | 152 | break; |
151 | retry--; | ||
152 | udelay(5); | 153 | udelay(5); |
154 | } while (!nvgpu_timeout_expired(&timeout)); | ||
153 | 155 | ||
154 | } while (retry >= 0 || | 156 | if (nvgpu_timeout_peek_expired(&timeout)) { |
155 | !tegra_platform_is_silicon()); | ||
156 | |||
157 | if (retry < 0 && tegra_platform_is_silicon()) { | ||
158 | gk20a_err(dev_from_gk20a(g), | 157 | gk20a_err(dev_from_gk20a(g), |
159 | "comp tag clear timeout\n"); | 158 | "comp tag clear timeout\n"); |
160 | err = -EBUSY; | 159 | err = -EBUSY; |
@@ -288,23 +287,10 @@ u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base) | |||
288 | */ | 287 | */ |
289 | void gm20b_flush_ltc(struct gk20a *g) | 288 | void gm20b_flush_ltc(struct gk20a *g) |
290 | { | 289 | { |
291 | unsigned long timeout; | 290 | struct nvgpu_timeout timeout; |
292 | unsigned int ltc; | 291 | unsigned int ltc; |
293 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | 292 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); |
294 | 293 | ||
295 | #define __timeout_init() \ | ||
296 | do { \ | ||
297 | timeout = jiffies + HZ; \ | ||
298 | } while (0) | ||
299 | #define __timeout_check() \ | ||
300 | do { \ | ||
301 | if (tegra_platform_is_silicon() && \ | ||
302 | time_after(jiffies, timeout)) { \ | ||
303 | gk20a_err(dev_from_gk20a(g), "L2 flush timeout!"); \ | ||
304 | break; \ | ||
305 | } \ | ||
306 | } while (0) | ||
307 | |||
308 | /* Clean... */ | 294 | /* Clean... */ |
309 | gk20a_writel(g, ltc_ltcs_ltss_tstg_cmgmt1_r(), | 295 | gk20a_writel(g, ltc_ltcs_ltss_tstg_cmgmt1_r(), |
310 | ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_f() | | 296 | ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_f() | |
@@ -318,14 +304,33 @@ void gm20b_flush_ltc(struct gk20a *g) | |||
318 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | 304 | for (ltc = 0; ltc < g->ltc_count; ltc++) { |
319 | u32 op_pending; | 305 | u32 op_pending; |
320 | 306 | ||
321 | __timeout_init(); | 307 | /* |
308 | * Use 5ms - this should be sufficient time to flush the cache. | ||
309 | * On tegra, rough EMC BW available for old tegra chips (newer | ||
310 | * chips are strictly faster) can be estimated as follows: | ||
311 | * | ||
312 | * Lowest reasonable EMC clock speed will be around 102MHz on | ||
313 | * t124 for display enabled boards and generally fixed to max | ||
314 | * for non-display boards (since they are generally plugged in). | ||
315 | * | ||
316 | * Thus, the available BW is 64b * 2 * 102MHz = 1.3GB/s. Of that | ||
317 | * BW the GPU will likely get about half (display and overhead/ | ||
318 | * utilization inefficiency eating the rest) so 650MB/s at | ||
319 | * worst. Assuming at most 1MB of GPU L2 cache (less for most | ||
320 | * chips) worst case is we take 1MB/650MB/s = 1.5ms. | ||
321 | * | ||
322 | * So 5ms timeout here should be more than sufficient. | ||
323 | */ | ||
324 | nvgpu_timeout_init(g, &timeout, 5, NVGPU_TIMER_CPU_TIMER); | ||
325 | |||
322 | do { | 326 | do { |
323 | int cmgmt1 = ltc_ltc0_ltss_tstg_cmgmt1_r() + | 327 | int cmgmt1 = ltc_ltc0_ltss_tstg_cmgmt1_r() + |
324 | ltc * ltc_stride; | 328 | ltc * ltc_stride; |
325 | op_pending = gk20a_readl(g, cmgmt1); | 329 | op_pending = gk20a_readl(g, cmgmt1); |
326 | __timeout_check(); | 330 | } while ((op_pending & |
327 | } while (op_pending & | 331 | ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f()) && |
328 | ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f()); | 332 | !nvgpu_timeout_expired_msg(&timeout, |
333 | "L2 flush timeout!")); | ||
329 | } | 334 | } |
330 | 335 | ||
331 | /* And invalidate. */ | 336 | /* And invalidate. */ |
@@ -339,14 +344,18 @@ void gm20b_flush_ltc(struct gk20a *g) | |||
339 | /* Wait on each LTC individually. */ | 344 | /* Wait on each LTC individually. */ |
340 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | 345 | for (ltc = 0; ltc < g->ltc_count; ltc++) { |
341 | u32 op_pending; | 346 | u32 op_pending; |
342 | __timeout_init(); | 347 | |
348 | /* Again, 5ms. */ | ||
349 | nvgpu_timeout_init(g, &timeout, 5, NVGPU_TIMER_CPU_TIMER); | ||
350 | |||
343 | do { | 351 | do { |
344 | int cmgmt0 = ltc_ltc0_ltss_tstg_cmgmt0_r() + | 352 | int cmgmt0 = ltc_ltc0_ltss_tstg_cmgmt0_r() + |
345 | ltc * ltc_stride; | 353 | ltc * ltc_stride; |
346 | op_pending = gk20a_readl(g, cmgmt0); | 354 | op_pending = gk20a_readl(g, cmgmt0); |
347 | __timeout_check(); | 355 | } while ((op_pending & |
348 | } while (op_pending & | 356 | ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f()) && |
349 | ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f()); | 357 | !nvgpu_timeout_expired_msg(&timeout, |
358 | "L2 flush timeout!")); | ||
350 | } | 359 | } |
351 | } | 360 | } |
352 | 361 | ||