diff options
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ltc_gk20a.c | 28 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | 32 | ||||
-rw-r--r-- | include/trace/events/gk20a.h | 47 |
3 files changed, 81 insertions, 26 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c index 4359f282..1a780212 100644 --- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c | |||
@@ -3,7 +3,7 @@ | |||
3 | * | 3 | * |
4 | * GK20A Graphics | 4 | * GK20A Graphics |
5 | * | 5 | * |
6 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | 6 | * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. |
7 | * | 7 | * |
8 | * This program is free software; you can redistribute it and/or modify it | 8 | * This program is free software; you can redistribute it and/or modify it |
9 | * under the terms and conditions of the GNU General Public License, | 9 | * under the terms and conditions of the GNU General Public License, |
@@ -19,6 +19,7 @@ | |||
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include <linux/kernel.h> | 21 | #include <linux/kernel.h> |
22 | #include <trace/events/gk20a.h> | ||
22 | 23 | ||
23 | #include "hw_ltc_gk20a.h" | 24 | #include "hw_ltc_gk20a.h" |
24 | #include "hw_proj_gk20a.h" | 25 | #include "hw_proj_gk20a.h" |
@@ -107,15 +108,15 @@ static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | |||
107 | int err = 0; | 108 | int err = 0; |
108 | struct gr_gk20a *gr = &g->gr; | 109 | struct gr_gk20a *gr = &g->gr; |
109 | u32 fbp, slice, ctrl1, val, hw_op = 0; | 110 | u32 fbp, slice, ctrl1, val, hw_op = 0; |
110 | unsigned long end_jiffies = jiffies + | 111 | u32 retry = 200; |
111 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); | ||
112 | u32 delay = GR_IDLE_CHECK_DEFAULT; | ||
113 | u32 slices_per_fbp = | 112 | u32 slices_per_fbp = |
114 | ltc_ltcs_ltss_cbc_param_slices_per_fbp_v( | 113 | ltc_ltcs_ltss_cbc_param_slices_per_fbp_v( |
115 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); | 114 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); |
116 | 115 | ||
117 | gk20a_dbg_fn(""); | 116 | gk20a_dbg_fn(""); |
118 | 117 | ||
118 | trace_gk20a_ltc_cbc_ctrl_start(g->dev->name, op, min, max); | ||
119 | |||
119 | if (gr->compbit_store.size == 0) | 120 | if (gr->compbit_store.size == 0) |
120 | return 0; | 121 | return 0; |
121 | 122 | ||
@@ -141,25 +142,23 @@ static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | |||
141 | for (fbp = 0; fbp < gr->num_fbps; fbp++) { | 142 | for (fbp = 0; fbp < gr->num_fbps; fbp++) { |
142 | for (slice = 0; slice < slices_per_fbp; slice++) { | 143 | for (slice = 0; slice < slices_per_fbp; slice++) { |
143 | 144 | ||
144 | delay = GR_IDLE_CHECK_DEFAULT; | ||
145 | 145 | ||
146 | ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + | 146 | ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + |
147 | fbp * proj_ltc_stride_v() + | 147 | fbp * proj_ltc_stride_v() + |
148 | slice * proj_lts_stride_v(); | 148 | slice * proj_lts_stride_v(); |
149 | 149 | ||
150 | retry = 200; | ||
150 | do { | 151 | do { |
151 | val = gk20a_readl(g, ctrl1); | 152 | val = gk20a_readl(g, ctrl1); |
152 | if (!(val & hw_op)) | 153 | if (!(val & hw_op)) |
153 | break; | 154 | break; |
155 | retry--; | ||
156 | udelay(5); | ||
154 | 157 | ||
155 | usleep_range(delay, delay * 2); | 158 | } while (retry >= 0 || |
156 | delay = min_t(u32, delay << 1, | ||
157 | GR_IDLE_CHECK_MAX); | ||
158 | |||
159 | } while (time_before(jiffies, end_jiffies) || | ||
160 | !tegra_platform_is_silicon()); | 159 | !tegra_platform_is_silicon()); |
161 | 160 | ||
162 | if (!time_before(jiffies, end_jiffies)) { | 161 | if (retry < 0 && tegra_platform_is_silicon()) { |
163 | gk20a_err(dev_from_gk20a(g), | 162 | gk20a_err(dev_from_gk20a(g), |
164 | "comp tag clear timeout\n"); | 163 | "comp tag clear timeout\n"); |
165 | err = -EBUSY; | 164 | err = -EBUSY; |
@@ -168,6 +167,7 @@ static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | |||
168 | } | 167 | } |
169 | } | 168 | } |
170 | out: | 169 | out: |
170 | trace_gk20a_ltc_cbc_ctrl_done(g->dev->name); | ||
171 | mutex_unlock(&g->mm.l2_op_lock); | 171 | mutex_unlock(&g->mm.l2_op_lock); |
172 | return 0; | 172 | return 0; |
173 | } | 173 | } |
@@ -200,6 +200,8 @@ static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g) | |||
200 | 200 | ||
201 | gk20a_dbg_fn(""); | 201 | gk20a_dbg_fn(""); |
202 | 202 | ||
203 | trace_gk20a_mm_g_elpg_flush_locked(g->dev->name); | ||
204 | |||
203 | /* Make sure all previous writes are committed to the L2. There's no | 205 | /* Make sure all previous writes are committed to the L2. There's no |
204 | guarantee that writes are to DRAM. This will be a sysmembar internal | 206 | guarantee that writes are to DRAM. This will be a sysmembar internal |
205 | to the L2. */ | 207 | to the L2. */ |
@@ -212,7 +214,7 @@ static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g) | |||
212 | ltc_ltc0_ltss_g_elpg_flush_pending_v()) { | 214 | ltc_ltc0_ltss_g_elpg_flush_pending_v()) { |
213 | gk20a_dbg_info("g_elpg_flush 0x%x", data); | 215 | gk20a_dbg_info("g_elpg_flush 0x%x", data); |
214 | retry--; | 216 | retry--; |
215 | usleep_range(20, 40); | 217 | udelay(5); |
216 | } else | 218 | } else |
217 | break; | 219 | break; |
218 | } while (retry >= 0 || !tegra_platform_is_silicon()); | 220 | } while (retry >= 0 || !tegra_platform_is_silicon()); |
@@ -221,6 +223,8 @@ static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g) | |||
221 | gk20a_warn(dev_from_gk20a(g), | 223 | gk20a_warn(dev_from_gk20a(g), |
222 | "g_elpg_flush too many retries"); | 224 | "g_elpg_flush too many retries"); |
223 | 225 | ||
226 | trace_gk20a_mm_g_elpg_flush_locked_done(g->dev->name); | ||
227 | |||
224 | } | 228 | } |
225 | 229 | ||
226 | static int gk20a_determine_L2_size_bytes(struct gk20a *g) | 230 | static int gk20a_determine_L2_size_bytes(struct gk20a *g) |
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c index 0a0efe41..522cd1dc 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * GM20B L2 | 2 | * GM20B L2 |
3 | * | 3 | * |
4 | * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2014-2015 NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -15,6 +15,7 @@ | |||
15 | 15 | ||
16 | #include <linux/types.h> | 16 | #include <linux/types.h> |
17 | #include <linux/jiffies.h> | 17 | #include <linux/jiffies.h> |
18 | #include <trace/events/gk20a.h> | ||
18 | 19 | ||
19 | #include "hw_mc_gm20b.h" | 20 | #include "hw_mc_gm20b.h" |
20 | #include "hw_ltc_gm20b.h" | 21 | #include "hw_ltc_gm20b.h" |
@@ -26,6 +27,7 @@ | |||
26 | #include "gk20a/gk20a.h" | 27 | #include "gk20a/gk20a.h" |
27 | #include "gk20a/gk20a_allocator.h" | 28 | #include "gk20a/gk20a_allocator.h" |
28 | 29 | ||
30 | |||
29 | static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) | 31 | static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) |
30 | { | 32 | { |
31 | /* max memory size (MB) to cover */ | 33 | /* max memory size (MB) to cover */ |
@@ -107,14 +109,14 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | |||
107 | int err = 0; | 109 | int err = 0; |
108 | struct gr_gk20a *gr = &g->gr; | 110 | struct gr_gk20a *gr = &g->gr; |
109 | u32 ltc, slice, ctrl1, val, hw_op = 0; | 111 | u32 ltc, slice, ctrl1, val, hw_op = 0; |
110 | unsigned long end_jiffies = jiffies + | 112 | s32 retry = 200; |
111 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); | ||
112 | u32 delay = GR_IDLE_CHECK_DEFAULT; | ||
113 | u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( | 113 | u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( |
114 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); | 114 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); |
115 | 115 | ||
116 | gk20a_dbg_fn(""); | 116 | gk20a_dbg_fn(""); |
117 | 117 | ||
118 | trace_gk20a_ltc_cbc_ctrl_start(g->dev->name, op, min, max); | ||
119 | |||
118 | if (gr->compbit_store.size == 0) | 120 | if (gr->compbit_store.size == 0) |
119 | return 0; | 121 | return 0; |
120 | 122 | ||
@@ -139,25 +141,22 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | |||
139 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | 141 | for (ltc = 0; ltc < g->ltc_count; ltc++) { |
140 | for (slice = 0; slice < slices_per_ltc; slice++) { | 142 | for (slice = 0; slice < slices_per_ltc; slice++) { |
141 | 143 | ||
142 | delay = GR_IDLE_CHECK_DEFAULT; | ||
143 | |||
144 | ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + | 144 | ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + |
145 | ltc * proj_ltc_stride_v() + | 145 | ltc * proj_ltc_stride_v() + |
146 | slice * proj_lts_stride_v(); | 146 | slice * proj_lts_stride_v(); |
147 | 147 | ||
148 | retry = 200; | ||
148 | do { | 149 | do { |
149 | val = gk20a_readl(g, ctrl1); | 150 | val = gk20a_readl(g, ctrl1); |
150 | if (!(val & hw_op)) | 151 | if (!(val & hw_op)) |
151 | break; | 152 | break; |
153 | retry--; | ||
154 | udelay(5); | ||
152 | 155 | ||
153 | usleep_range(delay, delay * 2); | 156 | } while (retry >= 0 || |
154 | delay = min_t(u32, delay << 1, | ||
155 | GR_IDLE_CHECK_MAX); | ||
156 | |||
157 | } while (time_before(jiffies, end_jiffies) | | ||
158 | !tegra_platform_is_silicon()); | 157 | !tegra_platform_is_silicon()); |
159 | 158 | ||
160 | if (!time_before(jiffies, end_jiffies)) { | 159 | if (retry < 0 && tegra_platform_is_silicon()) { |
161 | gk20a_err(dev_from_gk20a(g), | 160 | gk20a_err(dev_from_gk20a(g), |
162 | "comp tag clear timeout\n"); | 161 | "comp tag clear timeout\n"); |
163 | err = -EBUSY; | 162 | err = -EBUSY; |
@@ -166,6 +165,7 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | |||
166 | } | 165 | } |
167 | } | 166 | } |
168 | out: | 167 | out: |
168 | trace_gk20a_ltc_cbc_ctrl_done(g->dev->name); | ||
169 | mutex_unlock(&g->mm.l2_op_lock); | 169 | mutex_unlock(&g->mm.l2_op_lock); |
170 | return 0; | 170 | return 0; |
171 | } | 171 | } |
@@ -232,6 +232,8 @@ void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g) | |||
232 | 232 | ||
233 | gk20a_dbg_fn(""); | 233 | gk20a_dbg_fn(""); |
234 | 234 | ||
235 | trace_gk20a_mm_g_elpg_flush_locked(g->dev->name); | ||
236 | |||
235 | for (i = 0; i < g->ltc_count; i++) | 237 | for (i = 0; i < g->ltc_count; i++) |
236 | done[i] = 0; | 238 | done[i] = 0; |
237 | 239 | ||
@@ -255,14 +257,16 @@ void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g) | |||
255 | 257 | ||
256 | if (num_done < g->ltc_count) { | 258 | if (num_done < g->ltc_count) { |
257 | retry--; | 259 | retry--; |
258 | usleep_range(20, 40); | 260 | udelay(5); |
259 | } else | 261 | } else |
260 | break; | 262 | break; |
261 | } while (retry >= 0 || !tegra_platform_is_silicon()); | 263 | } while (retry >= 0 || !tegra_platform_is_silicon()); |
262 | 264 | ||
263 | if (retry < 0) | 265 | if (retry < 0 && tegra_platform_is_silicon()) |
264 | gk20a_warn(dev_from_gk20a(g), | 266 | gk20a_warn(dev_from_gk20a(g), |
265 | "g_elpg_flush too many retries"); | 267 | "g_elpg_flush too many retries"); |
268 | |||
269 | trace_gk20a_mm_g_elpg_flush_locked_done(g->dev->name); | ||
266 | } | 270 | } |
267 | 271 | ||
268 | u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base) | 272 | u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base) |
diff --git a/include/trace/events/gk20a.h b/include/trace/events/gk20a.h index 096b0559..ad738f43 100644 --- a/include/trace/events/gk20a.h +++ b/include/trace/events/gk20a.h | |||
@@ -130,6 +130,16 @@ DEFINE_EVENT(gk20a, gr_gk20a_handle_sw_method, | |||
130 | TP_ARGS(name) | 130 | TP_ARGS(name) |
131 | ); | 131 | ); |
132 | 132 | ||
133 | DEFINE_EVENT(gk20a, gk20a_mm_g_elpg_flush_locked, | ||
134 | TP_PROTO(const char *name), | ||
135 | TP_ARGS(name) | ||
136 | ); | ||
137 | |||
138 | DEFINE_EVENT(gk20a, gk20a_mm_g_elpg_flush_locked_done, | ||
139 | TP_PROTO(const char *name), | ||
140 | TP_ARGS(name) | ||
141 | ); | ||
142 | |||
133 | TRACE_EVENT(gk20a_channel_update, | 143 | TRACE_EVENT(gk20a_channel_update, |
134 | TP_PROTO(const void *channel), | 144 | TP_PROTO(const void *channel), |
135 | TP_ARGS(channel), | 145 | TP_ARGS(channel), |
@@ -368,6 +378,43 @@ TRACE_EVENT(gk20a_mmu_fault, | |||
368 | __entry->engine, __entry->client, __entry->fault_type) | 378 | __entry->engine, __entry->client, __entry->fault_type) |
369 | ); | 379 | ); |
370 | 380 | ||
381 | TRACE_EVENT(gk20a_ltc_cbc_ctrl_start, | ||
382 | TP_PROTO(const char *name, u32 cbc_ctrl, u32 min_value, | ||
383 | u32 max_value), | ||
384 | TP_ARGS(name, cbc_ctrl, min_value, max_value), | ||
385 | |||
386 | TP_STRUCT__entry( | ||
387 | __field(const char *, name) | ||
388 | __field(u32, cbc_ctrl) | ||
389 | __field(u32, min_value) | ||
390 | __field(u32, max_value) | ||
391 | ), | ||
392 | |||
393 | TP_fast_assign( | ||
394 | __entry->name = name; | ||
395 | __entry->cbc_ctrl = cbc_ctrl; | ||
396 | __entry->min_value = min_value; | ||
397 | __entry->max_value = max_value; | ||
398 | ), | ||
399 | |||
400 | TP_printk("name=%s, cbc_ctrl=%d, min_value=%u, max_value=%u", | ||
401 | __entry->name, __entry->cbc_ctrl, __entry->min_value, | ||
402 | __entry->max_value) | ||
403 | ); | ||
404 | |||
405 | TRACE_EVENT(gk20a_ltc_cbc_ctrl_done, | ||
406 | TP_PROTO(const char *name), | ||
407 | TP_ARGS(name), | ||
408 | TP_STRUCT__entry( | ||
409 | __field(const char *, name) | ||
410 | ), | ||
411 | TP_fast_assign( | ||
412 | __entry->name = name; | ||
413 | ), | ||
414 | TP_printk("name=%s ", __entry->name) | ||
415 | |||
416 | ); | ||
417 | |||
371 | DECLARE_EVENT_CLASS(gk20a_cde, | 418 | DECLARE_EVENT_CLASS(gk20a_cde, |
372 | TP_PROTO(const void *ctx), | 419 | TP_PROTO(const void *ctx), |
373 | TP_ARGS(ctx), | 420 | TP_ARGS(ctx), |