summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSeshendra Gadagottu <sgadagottu@nvidia.com>2015-02-23 16:42:03 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-04-04 21:57:26 -0400
commitced17a2d31a49b771b0cc9715f15330a408f8e69 (patch)
tree839b5a7f3c3b32aa3b5177cf2b21e19afc06b0fe
parent5f6cc1289e4282ac034bd97a67a86e05b82915d0 (diff)
gpu: nvgpu: Use busy looping for flush operations
Use busy looping for l2 tag flush and elpg flush operations. This is making total flash time more accurate and reduced overall time compared with usleep. Also added trace points to measure performance for these operations. Also corrected timeout error check for non-silicon platforms. Bug 200081799 Change-Id: I63410bb7528db9258501633996fbdee5fdec1c74 Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: http://git-master/r/710472 (cherry picked from commit 18684cf9d5d6870a1a1fd5711c4fc2d733caad20) Reviewed-on: http://git-master/r/710986 GVS: Gerrit_Virtual_Submit Reviewed-by: Yu-Huan Hsu <yhsu@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/ltc_gk20a.c28
-rw-r--r--drivers/gpu/nvgpu/gm20b/ltc_gm20b.c32
-rw-r--r--include/trace/events/gk20a.h47
3 files changed, 81 insertions, 26 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
index 4359f282..1a780212 100644
--- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * GK20A Graphics 4 * GK20A Graphics
5 * 5 *
6 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. 6 * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
7 * 7 *
8 * This program is free software; you can redistribute it and/or modify it 8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License, 9 * under the terms and conditions of the GNU General Public License,
@@ -19,6 +19,7 @@
19 */ 19 */
20 20
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <trace/events/gk20a.h>
22 23
23#include "hw_ltc_gk20a.h" 24#include "hw_ltc_gk20a.h"
24#include "hw_proj_gk20a.h" 25#include "hw_proj_gk20a.h"
@@ -107,15 +108,15 @@ static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
107 int err = 0; 108 int err = 0;
108 struct gr_gk20a *gr = &g->gr; 109 struct gr_gk20a *gr = &g->gr;
109 u32 fbp, slice, ctrl1, val, hw_op = 0; 110 u32 fbp, slice, ctrl1, val, hw_op = 0;
110 unsigned long end_jiffies = jiffies + 111 u32 retry = 200;
111 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
112 u32 delay = GR_IDLE_CHECK_DEFAULT;
113 u32 slices_per_fbp = 112 u32 slices_per_fbp =
114 ltc_ltcs_ltss_cbc_param_slices_per_fbp_v( 113 ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(
115 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); 114 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
116 115
117 gk20a_dbg_fn(""); 116 gk20a_dbg_fn("");
118 117
118 trace_gk20a_ltc_cbc_ctrl_start(g->dev->name, op, min, max);
119
119 if (gr->compbit_store.size == 0) 120 if (gr->compbit_store.size == 0)
120 return 0; 121 return 0;
121 122
@@ -141,25 +142,23 @@ static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
141 for (fbp = 0; fbp < gr->num_fbps; fbp++) { 142 for (fbp = 0; fbp < gr->num_fbps; fbp++) {
142 for (slice = 0; slice < slices_per_fbp; slice++) { 143 for (slice = 0; slice < slices_per_fbp; slice++) {
143 144
144 delay = GR_IDLE_CHECK_DEFAULT;
145 145
146 ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + 146 ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
147 fbp * proj_ltc_stride_v() + 147 fbp * proj_ltc_stride_v() +
148 slice * proj_lts_stride_v(); 148 slice * proj_lts_stride_v();
149 149
150 retry = 200;
150 do { 151 do {
151 val = gk20a_readl(g, ctrl1); 152 val = gk20a_readl(g, ctrl1);
152 if (!(val & hw_op)) 153 if (!(val & hw_op))
153 break; 154 break;
155 retry--;
156 udelay(5);
154 157
155 usleep_range(delay, delay * 2); 158 } while (retry >= 0 ||
156 delay = min_t(u32, delay << 1,
157 GR_IDLE_CHECK_MAX);
158
159 } while (time_before(jiffies, end_jiffies) ||
160 !tegra_platform_is_silicon()); 159 !tegra_platform_is_silicon());
161 160
162 if (!time_before(jiffies, end_jiffies)) { 161 if (retry < 0 && tegra_platform_is_silicon()) {
163 gk20a_err(dev_from_gk20a(g), 162 gk20a_err(dev_from_gk20a(g),
164 "comp tag clear timeout\n"); 163 "comp tag clear timeout\n");
165 err = -EBUSY; 164 err = -EBUSY;
@@ -168,6 +167,7 @@ static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
168 } 167 }
169 } 168 }
170out: 169out:
170 trace_gk20a_ltc_cbc_ctrl_done(g->dev->name);
171 mutex_unlock(&g->mm.l2_op_lock); 171 mutex_unlock(&g->mm.l2_op_lock);
172 return 0; 172 return 0;
173} 173}
@@ -200,6 +200,8 @@ static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
200 200
201 gk20a_dbg_fn(""); 201 gk20a_dbg_fn("");
202 202
203 trace_gk20a_mm_g_elpg_flush_locked(g->dev->name);
204
203 /* Make sure all previous writes are committed to the L2. There's no 205 /* Make sure all previous writes are committed to the L2. There's no
204 guarantee that writes are to DRAM. This will be a sysmembar internal 206 guarantee that writes are to DRAM. This will be a sysmembar internal
205 to the L2. */ 207 to the L2. */
@@ -212,7 +214,7 @@ static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
212 ltc_ltc0_ltss_g_elpg_flush_pending_v()) { 214 ltc_ltc0_ltss_g_elpg_flush_pending_v()) {
213 gk20a_dbg_info("g_elpg_flush 0x%x", data); 215 gk20a_dbg_info("g_elpg_flush 0x%x", data);
214 retry--; 216 retry--;
215 usleep_range(20, 40); 217 udelay(5);
216 } else 218 } else
217 break; 219 break;
218 } while (retry >= 0 || !tegra_platform_is_silicon()); 220 } while (retry >= 0 || !tegra_platform_is_silicon());
@@ -221,6 +223,8 @@ static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
221 gk20a_warn(dev_from_gk20a(g), 223 gk20a_warn(dev_from_gk20a(g),
222 "g_elpg_flush too many retries"); 224 "g_elpg_flush too many retries");
223 225
226 trace_gk20a_mm_g_elpg_flush_locked_done(g->dev->name);
227
224} 228}
225 229
226static int gk20a_determine_L2_size_bytes(struct gk20a *g) 230static int gk20a_determine_L2_size_bytes(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
index 0a0efe41..522cd1dc 100644
--- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * GM20B L2 2 * GM20B L2
3 * 3 *
4 * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2014-2015 NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -15,6 +15,7 @@
15 15
16#include <linux/types.h> 16#include <linux/types.h>
17#include <linux/jiffies.h> 17#include <linux/jiffies.h>
18#include <trace/events/gk20a.h>
18 19
19#include "hw_mc_gm20b.h" 20#include "hw_mc_gm20b.h"
20#include "hw_ltc_gm20b.h" 21#include "hw_ltc_gm20b.h"
@@ -26,6 +27,7 @@
26#include "gk20a/gk20a.h" 27#include "gk20a/gk20a.h"
27#include "gk20a/gk20a_allocator.h" 28#include "gk20a/gk20a_allocator.h"
28 29
30
29static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) 31static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
30{ 32{
31 /* max memory size (MB) to cover */ 33 /* max memory size (MB) to cover */
@@ -107,14 +109,14 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
107 int err = 0; 109 int err = 0;
108 struct gr_gk20a *gr = &g->gr; 110 struct gr_gk20a *gr = &g->gr;
109 u32 ltc, slice, ctrl1, val, hw_op = 0; 111 u32 ltc, slice, ctrl1, val, hw_op = 0;
110 unsigned long end_jiffies = jiffies + 112 s32 retry = 200;
111 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
112 u32 delay = GR_IDLE_CHECK_DEFAULT;
113 u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( 113 u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(
114 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); 114 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
115 115
116 gk20a_dbg_fn(""); 116 gk20a_dbg_fn("");
117 117
118 trace_gk20a_ltc_cbc_ctrl_start(g->dev->name, op, min, max);
119
118 if (gr->compbit_store.size == 0) 120 if (gr->compbit_store.size == 0)
119 return 0; 121 return 0;
120 122
@@ -139,25 +141,22 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
139 for (ltc = 0; ltc < g->ltc_count; ltc++) { 141 for (ltc = 0; ltc < g->ltc_count; ltc++) {
140 for (slice = 0; slice < slices_per_ltc; slice++) { 142 for (slice = 0; slice < slices_per_ltc; slice++) {
141 143
142 delay = GR_IDLE_CHECK_DEFAULT;
143
144 ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + 144 ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
145 ltc * proj_ltc_stride_v() + 145 ltc * proj_ltc_stride_v() +
146 slice * proj_lts_stride_v(); 146 slice * proj_lts_stride_v();
147 147
148 retry = 200;
148 do { 149 do {
149 val = gk20a_readl(g, ctrl1); 150 val = gk20a_readl(g, ctrl1);
150 if (!(val & hw_op)) 151 if (!(val & hw_op))
151 break; 152 break;
153 retry--;
154 udelay(5);
152 155
153 usleep_range(delay, delay * 2); 156 } while (retry >= 0 ||
154 delay = min_t(u32, delay << 1,
155 GR_IDLE_CHECK_MAX);
156
157 } while (time_before(jiffies, end_jiffies) |
158 !tegra_platform_is_silicon()); 157 !tegra_platform_is_silicon());
159 158
160 if (!time_before(jiffies, end_jiffies)) { 159 if (retry < 0 && tegra_platform_is_silicon()) {
161 gk20a_err(dev_from_gk20a(g), 160 gk20a_err(dev_from_gk20a(g),
162 "comp tag clear timeout\n"); 161 "comp tag clear timeout\n");
163 err = -EBUSY; 162 err = -EBUSY;
@@ -166,6 +165,7 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
166 } 165 }
167 } 166 }
168out: 167out:
168 trace_gk20a_ltc_cbc_ctrl_done(g->dev->name);
169 mutex_unlock(&g->mm.l2_op_lock); 169 mutex_unlock(&g->mm.l2_op_lock);
170 return 0; 170 return 0;
171} 171}
@@ -232,6 +232,8 @@ void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g)
232 232
233 gk20a_dbg_fn(""); 233 gk20a_dbg_fn("");
234 234
235 trace_gk20a_mm_g_elpg_flush_locked(g->dev->name);
236
235 for (i = 0; i < g->ltc_count; i++) 237 for (i = 0; i < g->ltc_count; i++)
236 done[i] = 0; 238 done[i] = 0;
237 239
@@ -255,14 +257,16 @@ void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g)
255 257
256 if (num_done < g->ltc_count) { 258 if (num_done < g->ltc_count) {
257 retry--; 259 retry--;
258 usleep_range(20, 40); 260 udelay(5);
259 } else 261 } else
260 break; 262 break;
261 } while (retry >= 0 || !tegra_platform_is_silicon()); 263 } while (retry >= 0 || !tegra_platform_is_silicon());
262 264
263 if (retry < 0) 265 if (retry < 0 && tegra_platform_is_silicon())
264 gk20a_warn(dev_from_gk20a(g), 266 gk20a_warn(dev_from_gk20a(g),
265 "g_elpg_flush too many retries"); 267 "g_elpg_flush too many retries");
268
269 trace_gk20a_mm_g_elpg_flush_locked_done(g->dev->name);
266} 270}
267 271
268u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base) 272u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base)
diff --git a/include/trace/events/gk20a.h b/include/trace/events/gk20a.h
index 096b0559..ad738f43 100644
--- a/include/trace/events/gk20a.h
+++ b/include/trace/events/gk20a.h
@@ -130,6 +130,16 @@ DEFINE_EVENT(gk20a, gr_gk20a_handle_sw_method,
130 TP_ARGS(name) 130 TP_ARGS(name)
131); 131);
132 132
133DEFINE_EVENT(gk20a, gk20a_mm_g_elpg_flush_locked,
134 TP_PROTO(const char *name),
135 TP_ARGS(name)
136);
137
138DEFINE_EVENT(gk20a, gk20a_mm_g_elpg_flush_locked_done,
139 TP_PROTO(const char *name),
140 TP_ARGS(name)
141);
142
133TRACE_EVENT(gk20a_channel_update, 143TRACE_EVENT(gk20a_channel_update,
134 TP_PROTO(const void *channel), 144 TP_PROTO(const void *channel),
135 TP_ARGS(channel), 145 TP_ARGS(channel),
@@ -368,6 +378,43 @@ TRACE_EVENT(gk20a_mmu_fault,
368 __entry->engine, __entry->client, __entry->fault_type) 378 __entry->engine, __entry->client, __entry->fault_type)
369); 379);
370 380
381TRACE_EVENT(gk20a_ltc_cbc_ctrl_start,
382 TP_PROTO(const char *name, u32 cbc_ctrl, u32 min_value,
383 u32 max_value),
384 TP_ARGS(name, cbc_ctrl, min_value, max_value),
385
386 TP_STRUCT__entry(
387 __field(const char *, name)
388 __field(u32, cbc_ctrl)
389 __field(u32, min_value)
390 __field(u32, max_value)
391 ),
392
393 TP_fast_assign(
394 __entry->name = name;
395 __entry->cbc_ctrl = cbc_ctrl;
396 __entry->min_value = min_value;
397 __entry->max_value = max_value;
398 ),
399
400 TP_printk("name=%s, cbc_ctrl=%d, min_value=%u, max_value=%u",
401 __entry->name, __entry->cbc_ctrl, __entry->min_value,
402 __entry->max_value)
403);
404
405TRACE_EVENT(gk20a_ltc_cbc_ctrl_done,
406 TP_PROTO(const char *name),
407 TP_ARGS(name),
408 TP_STRUCT__entry(
409 __field(const char *, name)
410 ),
411 TP_fast_assign(
412 __entry->name = name;
413 ),
414 TP_printk("name=%s ", __entry->name)
415
416);
417
371DECLARE_EVENT_CLASS(gk20a_cde, 418DECLARE_EVENT_CLASS(gk20a_cde,
372 TP_PROTO(const void *ctx), 419 TP_PROTO(const void *ctx),
373 TP_ARGS(ctx), 420 TP_ARGS(ctx),