summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
diff options
context:
space:
mode:
authorSeshendra Gadagottu <sgadagottu@nvidia.com>2015-02-23 16:42:03 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-04-04 21:57:26 -0400
commitced17a2d31a49b771b0cc9715f15330a408f8e69 (patch)
tree839b5a7f3c3b32aa3b5177cf2b21e19afc06b0fe /drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
parent5f6cc1289e4282ac034bd97a67a86e05b82915d0 (diff)
gpu: nvgpu: Use busy looping for flush operations
Use busy looping for l2 tag flush and elpg flush operations. This is making total flash time more accurate and reduced overall time compared with usleep. Also added trace points to measure performance for these operations. Also corrected timeout error check for non-silicon platforms. Bug 200081799 Change-Id: I63410bb7528db9258501633996fbdee5fdec1c74 Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: http://git-master/r/710472 (cherry picked from commit 18684cf9d5d6870a1a1fd5711c4fc2d733caad20) Reviewed-on: http://git-master/r/710986 GVS: Gerrit_Virtual_Submit Reviewed-by: Yu-Huan Hsu <yhsu@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b/ltc_gm20b.c')
-rw-r--r--drivers/gpu/nvgpu/gm20b/ltc_gm20b.c32
1 files changed, 18 insertions, 14 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
index 0a0efe41..522cd1dc 100644
--- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * GM20B L2 2 * GM20B L2
3 * 3 *
4 * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2014-2015 NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -15,6 +15,7 @@
15 15
16#include <linux/types.h> 16#include <linux/types.h>
17#include <linux/jiffies.h> 17#include <linux/jiffies.h>
18#include <trace/events/gk20a.h>
18 19
19#include "hw_mc_gm20b.h" 20#include "hw_mc_gm20b.h"
20#include "hw_ltc_gm20b.h" 21#include "hw_ltc_gm20b.h"
@@ -26,6 +27,7 @@
26#include "gk20a/gk20a.h" 27#include "gk20a/gk20a.h"
27#include "gk20a/gk20a_allocator.h" 28#include "gk20a/gk20a_allocator.h"
28 29
30
29static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) 31static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
30{ 32{
31 /* max memory size (MB) to cover */ 33 /* max memory size (MB) to cover */
@@ -107,14 +109,14 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
107 int err = 0; 109 int err = 0;
108 struct gr_gk20a *gr = &g->gr; 110 struct gr_gk20a *gr = &g->gr;
109 u32 ltc, slice, ctrl1, val, hw_op = 0; 111 u32 ltc, slice, ctrl1, val, hw_op = 0;
110 unsigned long end_jiffies = jiffies + 112 s32 retry = 200;
111 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
112 u32 delay = GR_IDLE_CHECK_DEFAULT;
113 u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( 113 u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(
114 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); 114 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
115 115
116 gk20a_dbg_fn(""); 116 gk20a_dbg_fn("");
117 117
118 trace_gk20a_ltc_cbc_ctrl_start(g->dev->name, op, min, max);
119
118 if (gr->compbit_store.size == 0) 120 if (gr->compbit_store.size == 0)
119 return 0; 121 return 0;
120 122
@@ -139,25 +141,22 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
139 for (ltc = 0; ltc < g->ltc_count; ltc++) { 141 for (ltc = 0; ltc < g->ltc_count; ltc++) {
140 for (slice = 0; slice < slices_per_ltc; slice++) { 142 for (slice = 0; slice < slices_per_ltc; slice++) {
141 143
142 delay = GR_IDLE_CHECK_DEFAULT;
143
144 ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + 144 ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
145 ltc * proj_ltc_stride_v() + 145 ltc * proj_ltc_stride_v() +
146 slice * proj_lts_stride_v(); 146 slice * proj_lts_stride_v();
147 147
148 retry = 200;
148 do { 149 do {
149 val = gk20a_readl(g, ctrl1); 150 val = gk20a_readl(g, ctrl1);
150 if (!(val & hw_op)) 151 if (!(val & hw_op))
151 break; 152 break;
153 retry--;
154 udelay(5);
152 155
153 usleep_range(delay, delay * 2); 156 } while (retry >= 0 ||
154 delay = min_t(u32, delay << 1,
155 GR_IDLE_CHECK_MAX);
156
157 } while (time_before(jiffies, end_jiffies) |
158 !tegra_platform_is_silicon()); 157 !tegra_platform_is_silicon());
159 158
160 if (!time_before(jiffies, end_jiffies)) { 159 if (retry < 0 && tegra_platform_is_silicon()) {
161 gk20a_err(dev_from_gk20a(g), 160 gk20a_err(dev_from_gk20a(g),
162 "comp tag clear timeout\n"); 161 "comp tag clear timeout\n");
163 err = -EBUSY; 162 err = -EBUSY;
@@ -166,6 +165,7 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
166 } 165 }
167 } 166 }
168out: 167out:
168 trace_gk20a_ltc_cbc_ctrl_done(g->dev->name);
169 mutex_unlock(&g->mm.l2_op_lock); 169 mutex_unlock(&g->mm.l2_op_lock);
170 return 0; 170 return 0;
171} 171}
@@ -232,6 +232,8 @@ void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g)
232 232
233 gk20a_dbg_fn(""); 233 gk20a_dbg_fn("");
234 234
235 trace_gk20a_mm_g_elpg_flush_locked(g->dev->name);
236
235 for (i = 0; i < g->ltc_count; i++) 237 for (i = 0; i < g->ltc_count; i++)
236 done[i] = 0; 238 done[i] = 0;
237 239
@@ -255,14 +257,16 @@ void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g)
255 257
256 if (num_done < g->ltc_count) { 258 if (num_done < g->ltc_count) {
257 retry--; 259 retry--;
258 usleep_range(20, 40); 260 udelay(5);
259 } else 261 } else
260 break; 262 break;
261 } while (retry >= 0 || !tegra_platform_is_silicon()); 263 } while (retry >= 0 || !tegra_platform_is_silicon());
262 264
263 if (retry < 0) 265 if (retry < 0 && tegra_platform_is_silicon())
264 gk20a_warn(dev_from_gk20a(g), 266 gk20a_warn(dev_from_gk20a(g),
265 "g_elpg_flush too many retries"); 267 "g_elpg_flush too many retries");
268
269 trace_gk20a_mm_g_elpg_flush_locked_done(g->dev->name);
266} 270}
267 271
268u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base) 272u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base)