summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
diff options
context:
space:
mode:
authorSeshendra Gadagottu <sgadagottu@nvidia.com>2015-02-23 16:42:03 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-04-04 21:57:26 -0400
commitced17a2d31a49b771b0cc9715f15330a408f8e69 (patch)
tree839b5a7f3c3b32aa3b5177cf2b21e19afc06b0fe /drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
parent5f6cc1289e4282ac034bd97a67a86e05b82915d0 (diff)
gpu: nvgpu: Use busy looping for flush operations
Use busy looping for l2 tag flush and elpg flush operations. This is making total flash time more accurate and reduced overall time compared with usleep. Also added trace points to measure performance for these operations. Also corrected timeout error check for non-silicon platforms. Bug 200081799 Change-Id: I63410bb7528db9258501633996fbdee5fdec1c74 Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: http://git-master/r/710472 (cherry picked from commit 18684cf9d5d6870a1a1fd5711c4fc2d733caad20) Reviewed-on: http://git-master/r/710986 GVS: Gerrit_Virtual_Submit Reviewed-by: Yu-Huan Hsu <yhsu@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/ltc_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/ltc_gk20a.c28
1 files changed, 16 insertions, 12 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
index 4359f282..1a780212 100644
--- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * GK20A Graphics 4 * GK20A Graphics
5 * 5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. 6 * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
7 * 7 *
8 * This program is free software; you can redistribute it and/or modify it 8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License, 9 * under the terms and conditions of the GNU General Public License,
@@ -19,6 +19,7 @@
19 */ 19 */
20 20
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <trace/events/gk20a.h>
22 23
23#include "hw_ltc_gk20a.h" 24#include "hw_ltc_gk20a.h"
24#include "hw_proj_gk20a.h" 25#include "hw_proj_gk20a.h"
@@ -107,15 +108,15 @@ static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
107 int err = 0; 108 int err = 0;
108 struct gr_gk20a *gr = &g->gr; 109 struct gr_gk20a *gr = &g->gr;
109 u32 fbp, slice, ctrl1, val, hw_op = 0; 110 u32 fbp, slice, ctrl1, val, hw_op = 0;
110 unsigned long end_jiffies = jiffies + 111 u32 retry = 200;
111 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
112 u32 delay = GR_IDLE_CHECK_DEFAULT;
113 u32 slices_per_fbp = 112 u32 slices_per_fbp =
114 ltc_ltcs_ltss_cbc_param_slices_per_fbp_v( 113 ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(
115 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); 114 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
116 115
117 gk20a_dbg_fn(""); 116 gk20a_dbg_fn("");
118 117
118 trace_gk20a_ltc_cbc_ctrl_start(g->dev->name, op, min, max);
119
119 if (gr->compbit_store.size == 0) 120 if (gr->compbit_store.size == 0)
120 return 0; 121 return 0;
121 122
@@ -141,25 +142,23 @@ static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
141 for (fbp = 0; fbp < gr->num_fbps; fbp++) { 142 for (fbp = 0; fbp < gr->num_fbps; fbp++) {
142 for (slice = 0; slice < slices_per_fbp; slice++) { 143 for (slice = 0; slice < slices_per_fbp; slice++) {
143 144
144 delay = GR_IDLE_CHECK_DEFAULT;
145 145
146 ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + 146 ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
147 fbp * proj_ltc_stride_v() + 147 fbp * proj_ltc_stride_v() +
148 slice * proj_lts_stride_v(); 148 slice * proj_lts_stride_v();
149 149
150 retry = 200;
150 do { 151 do {
151 val = gk20a_readl(g, ctrl1); 152 val = gk20a_readl(g, ctrl1);
152 if (!(val & hw_op)) 153 if (!(val & hw_op))
153 break; 154 break;
155 retry--;
156 udelay(5);
154 157
155 usleep_range(delay, delay * 2); 158 } while (retry >= 0 ||
156 delay = min_t(u32, delay << 1,
157 GR_IDLE_CHECK_MAX);
158
159 } while (time_before(jiffies, end_jiffies) ||
160 !tegra_platform_is_silicon()); 159 !tegra_platform_is_silicon());
161 160
162 if (!time_before(jiffies, end_jiffies)) { 161 if (retry < 0 && tegra_platform_is_silicon()) {
163 gk20a_err(dev_from_gk20a(g), 162 gk20a_err(dev_from_gk20a(g),
164 "comp tag clear timeout\n"); 163 "comp tag clear timeout\n");
165 err = -EBUSY; 164 err = -EBUSY;
@@ -168,6 +167,7 @@ static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
168 } 167 }
169 } 168 }
170out: 169out:
170 trace_gk20a_ltc_cbc_ctrl_done(g->dev->name);
171 mutex_unlock(&g->mm.l2_op_lock); 171 mutex_unlock(&g->mm.l2_op_lock);
172 return 0; 172 return 0;
173} 173}
@@ -200,6 +200,8 @@ static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
200 200
201 gk20a_dbg_fn(""); 201 gk20a_dbg_fn("");
202 202
203 trace_gk20a_mm_g_elpg_flush_locked(g->dev->name);
204
203 /* Make sure all previous writes are committed to the L2. There's no 205 /* Make sure all previous writes are committed to the L2. There's no
204 guarantee that writes are to DRAM. This will be a sysmembar internal 206 guarantee that writes are to DRAM. This will be a sysmembar internal
205 to the L2. */ 207 to the L2. */
@@ -212,7 +214,7 @@ static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
212 ltc_ltc0_ltss_g_elpg_flush_pending_v()) { 214 ltc_ltc0_ltss_g_elpg_flush_pending_v()) {
213 gk20a_dbg_info("g_elpg_flush 0x%x", data); 215 gk20a_dbg_info("g_elpg_flush 0x%x", data);
214 retry--; 216 retry--;
215 usleep_range(20, 40); 217 udelay(5);
216 } else 218 } else
217 break; 219 break;
218 } while (retry >= 0 || !tegra_platform_is_silicon()); 220 } while (retry >= 0 || !tegra_platform_is_silicon());
@@ -221,6 +223,8 @@ static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
221 gk20a_warn(dev_from_gk20a(g), 223 gk20a_warn(dev_from_gk20a(g),
222 "g_elpg_flush too many retries"); 224 "g_elpg_flush too many retries");
223 225
226 trace_gk20a_mm_g_elpg_flush_locked_done(g->dev->name);
227
224} 228}
225 229
226static int gk20a_determine_L2_size_bytes(struct gk20a *g) 230static int gk20a_determine_L2_size_bytes(struct gk20a *g)