summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux/debug_fifo.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/debug_fifo.c')
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_fifo.c369
1 files changed, 369 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.c b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
new file mode 100644
index 00000000..6a28b1a5
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
@@ -0,0 +1,369 @@
1/*
2 * Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_fifo.h"
16#include "gk20a/platform_gk20a.h"
17
18#include <linux/debugfs.h>
19#include <linux/seq_file.h>
20
21#include <nvgpu/sort.h>
22
23void __gk20a_fifo_profile_free(struct kref *ref);
24
25static void *gk20a_fifo_sched_debugfs_seq_start(
26 struct seq_file *s, loff_t *pos)
27{
28 struct gk20a *g = s->private;
29 struct fifo_gk20a *f = &g->fifo;
30
31 if (*pos >= f->num_channels)
32 return NULL;
33
34 return &f->channel[*pos];
35}
36
37static void *gk20a_fifo_sched_debugfs_seq_next(
38 struct seq_file *s, void *v, loff_t *pos)
39{
40 struct gk20a *g = s->private;
41 struct fifo_gk20a *f = &g->fifo;
42
43 ++(*pos);
44 if (*pos >= f->num_channels)
45 return NULL;
46
47 return &f->channel[*pos];
48}
49
50static void gk20a_fifo_sched_debugfs_seq_stop(
51 struct seq_file *s, void *v)
52{
53}
54
55static int gk20a_fifo_sched_debugfs_seq_show(
56 struct seq_file *s, void *v)
57{
58 struct gk20a *g = s->private;
59 struct fifo_gk20a *f = &g->fifo;
60 struct channel_gk20a *ch = v;
61 struct tsg_gk20a *tsg = NULL;
62
63 struct fifo_engine_info_gk20a *engine_info;
64 struct fifo_runlist_info_gk20a *runlist;
65 u32 runlist_id;
66 int ret = SEQ_SKIP;
67 u32 engine_id;
68
69 engine_id = gk20a_fifo_get_gr_engine_id(g);
70 engine_info = (f->engine_info + engine_id);
71 runlist_id = engine_info->runlist_id;
72 runlist = &f->runlist_info[runlist_id];
73
74 if (ch == f->channel) {
75 seq_puts(s, "chid tsgid pid timeslice timeout interleave graphics_preempt compute_preempt\n");
76 seq_puts(s, " (usecs) (msecs)\n");
77 ret = 0;
78 }
79
80 if (!test_bit(ch->hw_chid, runlist->active_channels))
81 return ret;
82
83 if (gk20a_channel_get(ch)) {
84 if (gk20a_is_channel_marked_as_tsg(ch))
85 tsg = &f->tsg[ch->tsgid];
86
87 seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n",
88 ch->hw_chid,
89 ch->tsgid,
90 ch->tgid,
91 tsg ? tsg->timeslice_us : ch->timeslice_us,
92 ch->timeout_ms_max,
93 tsg ? tsg->interleave_level : ch->interleave_level,
94 ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->graphics_preempt_mode : U32_MAX,
95 ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->compute_preempt_mode : U32_MAX);
96 gk20a_channel_put(ch);
97 }
98 return 0;
99}
100
101static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = {
102 .start = gk20a_fifo_sched_debugfs_seq_start,
103 .next = gk20a_fifo_sched_debugfs_seq_next,
104 .stop = gk20a_fifo_sched_debugfs_seq_stop,
105 .show = gk20a_fifo_sched_debugfs_seq_show
106};
107
108static int gk20a_fifo_sched_debugfs_open(struct inode *inode,
109 struct file *file)
110{
111 int err;
112
113 if (!capable(CAP_SYS_ADMIN))
114 return -EPERM;
115
116 err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops);
117 if (err)
118 return err;
119
120 gk20a_dbg(gpu_dbg_info, "i_private=%p", inode->i_private);
121
122 ((struct seq_file *)file->private_data)->private = inode->i_private;
123 return 0;
124};
125
126/*
127 * The file operations structure contains our open function along with
128 * set of the canned seq_ ops.
129 */
130static const struct file_operations gk20a_fifo_sched_debugfs_fops = {
131 .owner = THIS_MODULE,
132 .open = gk20a_fifo_sched_debugfs_open,
133 .read = seq_read,
134 .llseek = seq_lseek,
135 .release = seq_release
136};
137
138static int gk20a_fifo_profile_enable(void *data, u64 val)
139{
140 struct gk20a *g = (struct gk20a *) data;
141 struct fifo_gk20a *f = &g->fifo;
142
143
144 nvgpu_mutex_acquire(&f->profile.lock);
145 if (val == 0) {
146 if (f->profile.enabled) {
147 f->profile.enabled = false;
148 kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
149 }
150 } else {
151 if (!f->profile.enabled) {
152 /* not kref init as it can have a running condition if
153 * we enable/disable/enable while kickoff is happening
154 */
155 if (!kref_get_unless_zero(&f->profile.ref)) {
156 f->profile.data = vzalloc(
157 FIFO_PROFILING_ENTRIES *
158 sizeof(struct fifo_profile_gk20a));
159 f->profile.sorted = vzalloc(
160 FIFO_PROFILING_ENTRIES *
161 sizeof(u64));
162 if (!(f->profile.data && f->profile.sorted)) {
163 nvgpu_vfree(g, f->profile.data);
164 nvgpu_vfree(g, f->profile.sorted);
165 nvgpu_mutex_release(&f->profile.lock);
166 return -ENOMEM;
167 }
168 kref_init(&f->profile.ref);
169 }
170 atomic_set(&f->profile.get, 0);
171 f->profile.enabled = true;
172 }
173 }
174 nvgpu_mutex_release(&f->profile.lock);
175
176 return 0;
177}
178
179DEFINE_SIMPLE_ATTRIBUTE(
180 gk20a_fifo_profile_enable_debugfs_fops,
181 NULL,
182 gk20a_fifo_profile_enable,
183 "%llu\n"
184);
185
186static int __profile_cmp(const void *a, const void *b)
187{
188 return *((unsigned long long *) a) - *((unsigned long long *) b);
189}
190
191/*
192 * This uses about 800b in the stack, but the function using it is not part
193 * of a callstack where much memory is being used, so it is fine
194 */
195#define PERCENTILE_WIDTH 5
196#define PERCENTILE_RANGES (100/PERCENTILE_WIDTH)
197
198static unsigned int __gk20a_fifo_create_stats(struct gk20a *g,
199 u64 *percentiles, u32 index_end, u32 index_start)
200{
201 unsigned int nelem = 0;
202 unsigned int index;
203 struct fifo_profile_gk20a *profile;
204
205 for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) {
206 profile = &g->fifo.profile.data[index];
207
208 if (profile->timestamp[index_end] >
209 profile->timestamp[index_start]) {
210 /* This is a valid element */
211 g->fifo.profile.sorted[nelem] =
212 profile->timestamp[index_end] -
213 profile->timestamp[index_start];
214 nelem++;
215 }
216 }
217
218 /* sort it */
219 sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long),
220 __profile_cmp, NULL);
221
222 /* build ranges */
223 for (index = 0; index < PERCENTILE_RANGES; index++)
224 percentiles[index] =
225 g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) *
226 nelem)/100 - 1];
227 return nelem;
228}
229
230static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
231{
232 struct gk20a *g = s->private;
233 unsigned int get, nelem, index;
234 /*
235 * 800B in the stack, but function is declared statically and only
236 * called from debugfs handler
237 */
238 u64 percentiles_ioctl[PERCENTILE_RANGES];
239 u64 percentiles_kickoff[PERCENTILE_RANGES];
240 u64 percentiles_jobtracking[PERCENTILE_RANGES];
241 u64 percentiles_append[PERCENTILE_RANGES];
242 u64 percentiles_userd[PERCENTILE_RANGES];
243
244 if (!kref_get_unless_zero(&g->fifo.profile.ref)) {
245 seq_printf(s, "Profiling disabled\n");
246 return 0;
247 }
248
249 get = atomic_read(&g->fifo.profile.get);
250
251 __gk20a_fifo_create_stats(g, percentiles_ioctl,
252 PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY);
253 __gk20a_fifo_create_stats(g, percentiles_kickoff,
254 PROFILE_END, PROFILE_ENTRY);
255 __gk20a_fifo_create_stats(g, percentiles_jobtracking,
256 PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY);
257 __gk20a_fifo_create_stats(g, percentiles_append,
258 PROFILE_APPEND, PROFILE_JOB_TRACKING);
259 nelem = __gk20a_fifo_create_stats(g, percentiles_userd,
260 PROFILE_END, PROFILE_APPEND);
261
262 seq_printf(s, "Number of kickoffs: %d\n", nelem);
263 seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n");
264
265 for (index = 0; index < PERCENTILE_RANGES; index++)
266 seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n",
267 PERCENTILE_WIDTH * (index+1),
268 percentiles_ioctl[index],
269 percentiles_kickoff[index],
270 percentiles_append[index],
271 percentiles_jobtracking[index],
272 percentiles_userd[index]);
273
274 kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
275
276 return 0;
277}
278
279static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file)
280{
281 return single_open(file, gk20a_fifo_profile_stats, inode->i_private);
282}
283
284static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = {
285 .open = gk20a_fifo_profile_stats_open,
286 .read = seq_read,
287 .llseek = seq_lseek,
288 .release = single_release,
289};
290
291
292void gk20a_fifo_debugfs_init(struct gk20a *g)
293{
294 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
295
296 struct dentry *gpu_root = platform->debugfs;
297 struct dentry *fifo_root;
298 struct dentry *profile_root;
299
300 fifo_root = debugfs_create_dir("fifo", gpu_root);
301 if (IS_ERR_OR_NULL(fifo_root))
302 return;
303
304 gk20a_dbg(gpu_dbg_info, "g=%p", g);
305
306 debugfs_create_file("sched", 0600, fifo_root, g,
307 &gk20a_fifo_sched_debugfs_fops);
308
309 profile_root = debugfs_create_dir("profile", fifo_root);
310 if (IS_ERR_OR_NULL(profile_root))
311 return;
312
313 nvgpu_mutex_init(&g->fifo.profile.lock);
314 g->fifo.profile.enabled = false;
315 atomic_set(&g->fifo.profile.get, 0);
316 atomic_set(&g->fifo.profile.ref.refcount, 0);
317
318 debugfs_create_file("enable", 0600, profile_root, g,
319 &gk20a_fifo_profile_enable_debugfs_fops);
320
321 debugfs_create_file("stats", 0600, profile_root, g,
322 &gk20a_fifo_profile_stats_debugfs_fops);
323
324}
325
326void __gk20a_fifo_profile_free(struct kref *ref)
327{
328 struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a,
329 profile.ref);
330 nvgpu_vfree(f->g, f->profile.data);
331 nvgpu_vfree(f->g, f->profile.sorted);
332}
333
334/* Get the next element in the ring buffer of profile entries
335 * and grab a reference to the structure
336 */
337struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g)
338{
339 struct fifo_gk20a *f = &g->fifo;
340 struct fifo_profile_gk20a *profile;
341 unsigned int index;
342
343 /* If kref is zero, profiling is not enabled */
344 if (!kref_get_unless_zero(&f->profile.ref))
345 return NULL;
346 index = atomic_inc_return(&f->profile.get);
347 profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES];
348
349 return profile;
350}
351
352/* Free the reference to the structure. This allows deferred cleanups */
353void gk20a_fifo_profile_release(struct gk20a *g,
354 struct fifo_profile_gk20a *profile)
355{
356 kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
357}
358
359void gk20a_fifo_debugfs_deinit(struct gk20a *g)
360{
361 struct fifo_gk20a *f = &g->fifo;
362
363 nvgpu_mutex_acquire(&f->profile.lock);
364 if (f->profile.enabled) {
365 f->profile.enabled = false;
366 kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
367 }
368 nvgpu_mutex_release(&f->profile.lock);
369}