aboutsummaryrefslogtreecommitdiffstats
path: root/include/os/linux/debug_fifo.c
diff options
context:
space:
mode:
Diffstat (limited to 'include/os/linux/debug_fifo.c')
-rw-r--r--include/os/linux/debug_fifo.c376
1 files changed, 0 insertions, 376 deletions
diff --git a/include/os/linux/debug_fifo.c b/include/os/linux/debug_fifo.c
deleted file mode 100644
index 98da8bc..0000000
--- a/include/os/linux/debug_fifo.c
+++ /dev/null
@@ -1,376 +0,0 @@
1/*
2 * Copyright (C) 2017-2020 NVIDIA Corporation. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 */
14
15#include "debug_fifo.h"
16#include "os_linux.h"
17
18#include <linux/debugfs.h>
19#include <linux/seq_file.h>
20
21#include <nvgpu/sort.h>
22#include <nvgpu/timers.h>
23#include <nvgpu/channel.h>
24
25void __gk20a_fifo_profile_free(struct nvgpu_ref *ref);
26
27static void *gk20a_fifo_sched_debugfs_seq_start(
28 struct seq_file *s, loff_t *pos)
29{
30 struct gk20a *g = s->private;
31 struct fifo_gk20a *f = &g->fifo;
32
33 if (*pos >= f->num_channels)
34 return NULL;
35
36 return &f->channel[*pos];
37}
38
39static void *gk20a_fifo_sched_debugfs_seq_next(
40 struct seq_file *s, void *v, loff_t *pos)
41{
42 struct gk20a *g = s->private;
43 struct fifo_gk20a *f = &g->fifo;
44
45 ++(*pos);
46 if (*pos >= f->num_channels)
47 return NULL;
48
49 return &f->channel[*pos];
50}
51
52static void gk20a_fifo_sched_debugfs_seq_stop(
53 struct seq_file *s, void *v)
54{
55}
56
57static int gk20a_fifo_sched_debugfs_seq_show(
58 struct seq_file *s, void *v)
59{
60 struct gk20a *g = s->private;
61 struct fifo_gk20a *f = &g->fifo;
62 struct channel_gk20a *ch = v;
63 struct tsg_gk20a *tsg = NULL;
64
65 struct fifo_engine_info_gk20a *engine_info;
66 struct fifo_runlist_info_gk20a *runlist;
67 u32 runlist_id;
68 int ret = SEQ_SKIP;
69 u32 engine_id;
70
71 engine_id = gk20a_fifo_get_gr_engine_id(g);
72 engine_info = (f->engine_info + engine_id);
73 runlist_id = engine_info->runlist_id;
74 runlist = &f->runlist_info[runlist_id];
75
76 if (ch == f->channel) {
77 seq_puts(s, "chid tsgid pid timeslice timeout interleave graphics_preempt compute_preempt\n");
78 seq_puts(s, " (usecs) (msecs)\n");
79 ret = 0;
80 }
81
82 if (!test_bit(ch->chid, runlist->active_channels))
83 return ret;
84
85 if (gk20a_channel_get(ch)) {
86 tsg = tsg_gk20a_from_ch(ch);
87
88 if (tsg)
89 seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n",
90 ch->chid,
91 ch->tsgid,
92 ch->tgid,
93 tsg->timeslice_us,
94 ch->timeout_ms_max,
95 tsg->interleave_level,
96 tsg->gr_ctx.graphics_preempt_mode,
97 tsg->gr_ctx.compute_preempt_mode);
98 gk20a_channel_put(ch);
99 }
100 return 0;
101}
102
103static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = {
104 .start = gk20a_fifo_sched_debugfs_seq_start,
105 .next = gk20a_fifo_sched_debugfs_seq_next,
106 .stop = gk20a_fifo_sched_debugfs_seq_stop,
107 .show = gk20a_fifo_sched_debugfs_seq_show
108};
109
110static int gk20a_fifo_sched_debugfs_open(struct inode *inode,
111 struct file *file)
112{
113 struct gk20a *g = inode->i_private;
114 int err;
115
116 err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops);
117 if (err)
118 return err;
119
120 nvgpu_log(g, gpu_dbg_info, "i_private=%p", inode->i_private);
121
122 ((struct seq_file *)file->private_data)->private = inode->i_private;
123 return 0;
124};
125
126/*
127 * The file operations structure contains our open function along with
128 * set of the canned seq_ ops.
129 */
130static const struct file_operations gk20a_fifo_sched_debugfs_fops = {
131 .owner = THIS_MODULE,
132 .open = gk20a_fifo_sched_debugfs_open,
133 .read = seq_read,
134 .llseek = seq_lseek,
135 .release = seq_release
136};
137
138static int gk20a_fifo_profile_enable(void *data, u64 val)
139{
140 struct gk20a *g = (struct gk20a *) data;
141 struct fifo_gk20a *f = &g->fifo;
142
143
144 nvgpu_mutex_acquire(&f->profile.lock);
145 if (val == 0) {
146 if (f->profile.enabled) {
147 f->profile.enabled = false;
148 nvgpu_ref_put(&f->profile.ref,
149 __gk20a_fifo_profile_free);
150 }
151 } else {
152 if (!f->profile.enabled) {
153 /* not kref init as it can have a running condition if
154 * we enable/disable/enable while kickoff is happening
155 */
156 if (!nvgpu_ref_get_unless_zero(&f->profile.ref)) {
157 f->profile.data = nvgpu_vzalloc(g,
158 FIFO_PROFILING_ENTRIES *
159 sizeof(struct fifo_profile_gk20a));
160 f->profile.sorted = nvgpu_vzalloc(g,
161 FIFO_PROFILING_ENTRIES *
162 sizeof(u64));
163 if (!(f->profile.data && f->profile.sorted)) {
164 nvgpu_vfree(g, f->profile.data);
165 nvgpu_vfree(g, f->profile.sorted);
166 nvgpu_mutex_release(&f->profile.lock);
167 return -ENOMEM;
168 }
169 nvgpu_ref_init(&f->profile.ref);
170 }
171 atomic_set(&f->profile.get.atomic_var, 0);
172 f->profile.enabled = true;
173 }
174 }
175 nvgpu_mutex_release(&f->profile.lock);
176
177 return 0;
178}
179
180DEFINE_SIMPLE_ATTRIBUTE(
181 gk20a_fifo_profile_enable_debugfs_fops,
182 NULL,
183 gk20a_fifo_profile_enable,
184 "%llu\n"
185);
186
187static int __profile_cmp(const void *a, const void *b)
188{
189 return *((unsigned long long *) a) - *((unsigned long long *) b);
190}
191
192/*
193 * This uses about 800b in the stack, but the function using it is not part
194 * of a callstack where much memory is being used, so it is fine
195 */
196#define PERCENTILE_WIDTH 5
197#define PERCENTILE_RANGES (100/PERCENTILE_WIDTH)
198
199static unsigned int __gk20a_fifo_create_stats(struct gk20a *g,
200 u64 *percentiles, u32 index_end, u32 index_start)
201{
202 unsigned int nelem = 0;
203 unsigned int index;
204 struct fifo_profile_gk20a *profile;
205
206 for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) {
207 profile = &g->fifo.profile.data[index];
208
209 if (profile->timestamp[index_end] >
210 profile->timestamp[index_start]) {
211 /* This is a valid element */
212 g->fifo.profile.sorted[nelem] =
213 profile->timestamp[index_end] -
214 profile->timestamp[index_start];
215 nelem++;
216 }
217 }
218
219 /* sort it */
220 sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long),
221 __profile_cmp, NULL);
222
223 /* build ranges */
224 for (index = 0; index < PERCENTILE_RANGES; index++) {
225 percentiles[index] = nelem < PERCENTILE_RANGES ? 0 :
226 g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) *
227 nelem)/100 - 1];
228 }
229 return nelem;
230}
231
232static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
233{
234 struct gk20a *g = s->private;
235 unsigned int get, nelem, index;
236 /*
237 * 800B in the stack, but function is declared statically and only
238 * called from debugfs handler
239 */
240 u64 percentiles_ioctl[PERCENTILE_RANGES];
241 u64 percentiles_kickoff[PERCENTILE_RANGES];
242 u64 percentiles_jobtracking[PERCENTILE_RANGES];
243 u64 percentiles_append[PERCENTILE_RANGES];
244 u64 percentiles_userd[PERCENTILE_RANGES];
245
246 if (!nvgpu_ref_get_unless_zero(&g->fifo.profile.ref)) {
247 seq_printf(s, "Profiling disabled\n");
248 return 0;
249 }
250
251 get = atomic_read(&g->fifo.profile.get.atomic_var);
252
253 __gk20a_fifo_create_stats(g, percentiles_ioctl,
254 PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY);
255 __gk20a_fifo_create_stats(g, percentiles_kickoff,
256 PROFILE_END, PROFILE_ENTRY);
257 __gk20a_fifo_create_stats(g, percentiles_jobtracking,
258 PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY);
259 __gk20a_fifo_create_stats(g, percentiles_append,
260 PROFILE_APPEND, PROFILE_JOB_TRACKING);
261 nelem = __gk20a_fifo_create_stats(g, percentiles_userd,
262 PROFILE_END, PROFILE_APPEND);
263
264 seq_printf(s, "Number of kickoffs: %d\n", nelem);
265 seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n");
266
267 for (index = 0; index < PERCENTILE_RANGES; index++)
268 seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n",
269 PERCENTILE_WIDTH * (index+1),
270 percentiles_ioctl[index],
271 percentiles_kickoff[index],
272 percentiles_append[index],
273 percentiles_jobtracking[index],
274 percentiles_userd[index]);
275
276 nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
277
278 return 0;
279}
280
281static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file)
282{
283 return single_open(file, gk20a_fifo_profile_stats, inode->i_private);
284}
285
286static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = {
287 .open = gk20a_fifo_profile_stats_open,
288 .read = seq_read,
289 .llseek = seq_lseek,
290 .release = single_release,
291};
292
293
294void gk20a_fifo_debugfs_init(struct gk20a *g)
295{
296 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
297 struct dentry *gpu_root = l->debugfs;
298 struct dentry *fifo_root;
299 struct dentry *profile_root;
300
301 fifo_root = debugfs_create_dir("fifo", gpu_root);
302 if (IS_ERR_OR_NULL(fifo_root))
303 return;
304
305 nvgpu_log(g, gpu_dbg_info, "g=%p", g);
306
307 debugfs_create_file("sched", 0600, fifo_root, g,
308 &gk20a_fifo_sched_debugfs_fops);
309
310 profile_root = debugfs_create_dir("profile", fifo_root);
311 if (IS_ERR_OR_NULL(profile_root))
312 return;
313
314 nvgpu_mutex_init(&g->fifo.profile.lock);
315 g->fifo.profile.enabled = false;
316 atomic_set(&g->fifo.profile.get.atomic_var, 0);
317 atomic_set(&g->fifo.profile.ref.refcount.atomic_var, 0);
318
319 debugfs_create_file("enable", 0600, profile_root, g,
320 &gk20a_fifo_profile_enable_debugfs_fops);
321
322 debugfs_create_file("stats", 0600, profile_root, g,
323 &gk20a_fifo_profile_stats_debugfs_fops);
324
325}
326
327void gk20a_fifo_profile_snapshot(struct fifo_profile_gk20a *profile, int idx)
328{
329 if (profile)
330 profile->timestamp[idx] = nvgpu_current_time_ns();
331}
332
333void __gk20a_fifo_profile_free(struct nvgpu_ref *ref)
334{
335 struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a,
336 profile.ref);
337 nvgpu_vfree(f->g, f->profile.data);
338 nvgpu_vfree(f->g, f->profile.sorted);
339}
340
341/* Get the next element in the ring buffer of profile entries
342 * and grab a reference to the structure
343 */
344struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g)
345{
346 struct fifo_gk20a *f = &g->fifo;
347 struct fifo_profile_gk20a *profile;
348 unsigned int index;
349
350 /* If kref is zero, profiling is not enabled */
351 if (!nvgpu_ref_get_unless_zero(&f->profile.ref))
352 return NULL;
353 index = atomic_inc_return(&f->profile.get.atomic_var);
354 profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES];
355
356 return profile;
357}
358
359/* Free the reference to the structure. This allows deferred cleanups */
360void gk20a_fifo_profile_release(struct gk20a *g,
361 struct fifo_profile_gk20a *profile)
362{
363 nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
364}
365
366void gk20a_fifo_debugfs_deinit(struct gk20a *g)
367{
368 struct fifo_gk20a *f = &g->fifo;
369
370 nvgpu_mutex_acquire(&f->profile.lock);
371 if (f->profile.enabled) {
372 f->profile.enabled = false;
373 nvgpu_ref_put(&f->profile.ref, __gk20a_fifo_profile_free);
374 }
375 nvgpu_mutex_release(&f->profile.lock);
376}