diff options
Diffstat (limited to 'include/os/linux/debug_fifo.c')
| -rw-r--r-- | include/os/linux/debug_fifo.c | 376 |
1 files changed, 376 insertions, 0 deletions
diff --git a/include/os/linux/debug_fifo.c b/include/os/linux/debug_fifo.c new file mode 100644 index 0000000..98da8bc --- /dev/null +++ b/include/os/linux/debug_fifo.c | |||
| @@ -0,0 +1,376 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017-2020 NVIDIA Corporation. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include "debug_fifo.h" | ||
| 16 | #include "os_linux.h" | ||
| 17 | |||
| 18 | #include <linux/debugfs.h> | ||
| 19 | #include <linux/seq_file.h> | ||
| 20 | |||
| 21 | #include <nvgpu/sort.h> | ||
| 22 | #include <nvgpu/timers.h> | ||
| 23 | #include <nvgpu/channel.h> | ||
| 24 | |||
| 25 | void __gk20a_fifo_profile_free(struct nvgpu_ref *ref); | ||
| 26 | |||
| 27 | static void *gk20a_fifo_sched_debugfs_seq_start( | ||
| 28 | struct seq_file *s, loff_t *pos) | ||
| 29 | { | ||
| 30 | struct gk20a *g = s->private; | ||
| 31 | struct fifo_gk20a *f = &g->fifo; | ||
| 32 | |||
| 33 | if (*pos >= f->num_channels) | ||
| 34 | return NULL; | ||
| 35 | |||
| 36 | return &f->channel[*pos]; | ||
| 37 | } | ||
| 38 | |||
| 39 | static void *gk20a_fifo_sched_debugfs_seq_next( | ||
| 40 | struct seq_file *s, void *v, loff_t *pos) | ||
| 41 | { | ||
| 42 | struct gk20a *g = s->private; | ||
| 43 | struct fifo_gk20a *f = &g->fifo; | ||
| 44 | |||
| 45 | ++(*pos); | ||
| 46 | if (*pos >= f->num_channels) | ||
| 47 | return NULL; | ||
| 48 | |||
| 49 | return &f->channel[*pos]; | ||
| 50 | } | ||
| 51 | |||
| 52 | static void gk20a_fifo_sched_debugfs_seq_stop( | ||
| 53 | struct seq_file *s, void *v) | ||
| 54 | { | ||
| 55 | } | ||
| 56 | |||
| 57 | static int gk20a_fifo_sched_debugfs_seq_show( | ||
| 58 | struct seq_file *s, void *v) | ||
| 59 | { | ||
| 60 | struct gk20a *g = s->private; | ||
| 61 | struct fifo_gk20a *f = &g->fifo; | ||
| 62 | struct channel_gk20a *ch = v; | ||
| 63 | struct tsg_gk20a *tsg = NULL; | ||
| 64 | |||
| 65 | struct fifo_engine_info_gk20a *engine_info; | ||
| 66 | struct fifo_runlist_info_gk20a *runlist; | ||
| 67 | u32 runlist_id; | ||
| 68 | int ret = SEQ_SKIP; | ||
| 69 | u32 engine_id; | ||
| 70 | |||
| 71 | engine_id = gk20a_fifo_get_gr_engine_id(g); | ||
| 72 | engine_info = (f->engine_info + engine_id); | ||
| 73 | runlist_id = engine_info->runlist_id; | ||
| 74 | runlist = &f->runlist_info[runlist_id]; | ||
| 75 | |||
| 76 | if (ch == f->channel) { | ||
| 77 | seq_puts(s, "chid tsgid pid timeslice timeout interleave graphics_preempt compute_preempt\n"); | ||
| 78 | seq_puts(s, " (usecs) (msecs)\n"); | ||
| 79 | ret = 0; | ||
| 80 | } | ||
| 81 | |||
| 82 | if (!test_bit(ch->chid, runlist->active_channels)) | ||
| 83 | return ret; | ||
| 84 | |||
| 85 | if (gk20a_channel_get(ch)) { | ||
| 86 | tsg = tsg_gk20a_from_ch(ch); | ||
| 87 | |||
| 88 | if (tsg) | ||
| 89 | seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n", | ||
| 90 | ch->chid, | ||
| 91 | ch->tsgid, | ||
| 92 | ch->tgid, | ||
| 93 | tsg->timeslice_us, | ||
| 94 | ch->timeout_ms_max, | ||
| 95 | tsg->interleave_level, | ||
| 96 | tsg->gr_ctx.graphics_preempt_mode, | ||
| 97 | tsg->gr_ctx.compute_preempt_mode); | ||
| 98 | gk20a_channel_put(ch); | ||
| 99 | } | ||
| 100 | return 0; | ||
| 101 | } | ||
| 102 | |||
| 103 | static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = { | ||
| 104 | .start = gk20a_fifo_sched_debugfs_seq_start, | ||
| 105 | .next = gk20a_fifo_sched_debugfs_seq_next, | ||
| 106 | .stop = gk20a_fifo_sched_debugfs_seq_stop, | ||
| 107 | .show = gk20a_fifo_sched_debugfs_seq_show | ||
| 108 | }; | ||
| 109 | |||
| 110 | static int gk20a_fifo_sched_debugfs_open(struct inode *inode, | ||
| 111 | struct file *file) | ||
| 112 | { | ||
| 113 | struct gk20a *g = inode->i_private; | ||
| 114 | int err; | ||
| 115 | |||
| 116 | err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops); | ||
| 117 | if (err) | ||
| 118 | return err; | ||
| 119 | |||
| 120 | nvgpu_log(g, gpu_dbg_info, "i_private=%p", inode->i_private); | ||
| 121 | |||
| 122 | ((struct seq_file *)file->private_data)->private = inode->i_private; | ||
| 123 | return 0; | ||
| 124 | }; | ||
| 125 | |||
| 126 | /* | ||
| 127 | * The file operations structure contains our open function along with | ||
| 128 | * set of the canned seq_ ops. | ||
| 129 | */ | ||
| 130 | static const struct file_operations gk20a_fifo_sched_debugfs_fops = { | ||
| 131 | .owner = THIS_MODULE, | ||
| 132 | .open = gk20a_fifo_sched_debugfs_open, | ||
| 133 | .read = seq_read, | ||
| 134 | .llseek = seq_lseek, | ||
| 135 | .release = seq_release | ||
| 136 | }; | ||
| 137 | |||
| 138 | static int gk20a_fifo_profile_enable(void *data, u64 val) | ||
| 139 | { | ||
| 140 | struct gk20a *g = (struct gk20a *) data; | ||
| 141 | struct fifo_gk20a *f = &g->fifo; | ||
| 142 | |||
| 143 | |||
| 144 | nvgpu_mutex_acquire(&f->profile.lock); | ||
| 145 | if (val == 0) { | ||
| 146 | if (f->profile.enabled) { | ||
| 147 | f->profile.enabled = false; | ||
| 148 | nvgpu_ref_put(&f->profile.ref, | ||
| 149 | __gk20a_fifo_profile_free); | ||
| 150 | } | ||
| 151 | } else { | ||
| 152 | if (!f->profile.enabled) { | ||
| 153 | /* not kref init as it can have a running condition if | ||
| 154 | * we enable/disable/enable while kickoff is happening | ||
| 155 | */ | ||
| 156 | if (!nvgpu_ref_get_unless_zero(&f->profile.ref)) { | ||
| 157 | f->profile.data = nvgpu_vzalloc(g, | ||
| 158 | FIFO_PROFILING_ENTRIES * | ||
| 159 | sizeof(struct fifo_profile_gk20a)); | ||
| 160 | f->profile.sorted = nvgpu_vzalloc(g, | ||
| 161 | FIFO_PROFILING_ENTRIES * | ||
| 162 | sizeof(u64)); | ||
| 163 | if (!(f->profile.data && f->profile.sorted)) { | ||
| 164 | nvgpu_vfree(g, f->profile.data); | ||
| 165 | nvgpu_vfree(g, f->profile.sorted); | ||
| 166 | nvgpu_mutex_release(&f->profile.lock); | ||
| 167 | return -ENOMEM; | ||
| 168 | } | ||
| 169 | nvgpu_ref_init(&f->profile.ref); | ||
| 170 | } | ||
| 171 | atomic_set(&f->profile.get.atomic_var, 0); | ||
| 172 | f->profile.enabled = true; | ||
| 173 | } | ||
| 174 | } | ||
| 175 | nvgpu_mutex_release(&f->profile.lock); | ||
| 176 | |||
| 177 | return 0; | ||
| 178 | } | ||
| 179 | |||
| 180 | DEFINE_SIMPLE_ATTRIBUTE( | ||
| 181 | gk20a_fifo_profile_enable_debugfs_fops, | ||
| 182 | NULL, | ||
| 183 | gk20a_fifo_profile_enable, | ||
| 184 | "%llu\n" | ||
| 185 | ); | ||
| 186 | |||
| 187 | static int __profile_cmp(const void *a, const void *b) | ||
| 188 | { | ||
| 189 | return *((unsigned long long *) a) - *((unsigned long long *) b); | ||
| 190 | } | ||
| 191 | |||
| 192 | /* | ||
| 193 | * This uses about 800b in the stack, but the function using it is not part | ||
| 194 | * of a callstack where much memory is being used, so it is fine | ||
| 195 | */ | ||
| 196 | #define PERCENTILE_WIDTH 5 | ||
| 197 | #define PERCENTILE_RANGES (100/PERCENTILE_WIDTH) | ||
| 198 | |||
| 199 | static unsigned int __gk20a_fifo_create_stats(struct gk20a *g, | ||
| 200 | u64 *percentiles, u32 index_end, u32 index_start) | ||
| 201 | { | ||
| 202 | unsigned int nelem = 0; | ||
| 203 | unsigned int index; | ||
| 204 | struct fifo_profile_gk20a *profile; | ||
| 205 | |||
| 206 | for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) { | ||
| 207 | profile = &g->fifo.profile.data[index]; | ||
| 208 | |||
| 209 | if (profile->timestamp[index_end] > | ||
| 210 | profile->timestamp[index_start]) { | ||
| 211 | /* This is a valid element */ | ||
| 212 | g->fifo.profile.sorted[nelem] = | ||
| 213 | profile->timestamp[index_end] - | ||
| 214 | profile->timestamp[index_start]; | ||
| 215 | nelem++; | ||
| 216 | } | ||
| 217 | } | ||
| 218 | |||
| 219 | /* sort it */ | ||
| 220 | sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long), | ||
| 221 | __profile_cmp, NULL); | ||
| 222 | |||
| 223 | /* build ranges */ | ||
| 224 | for (index = 0; index < PERCENTILE_RANGES; index++) { | ||
| 225 | percentiles[index] = nelem < PERCENTILE_RANGES ? 0 : | ||
| 226 | g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) * | ||
| 227 | nelem)/100 - 1]; | ||
| 228 | } | ||
| 229 | return nelem; | ||
| 230 | } | ||
| 231 | |||
| 232 | static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused) | ||
| 233 | { | ||
| 234 | struct gk20a *g = s->private; | ||
| 235 | unsigned int get, nelem, index; | ||
| 236 | /* | ||
| 237 | * 800B in the stack, but function is declared statically and only | ||
| 238 | * called from debugfs handler | ||
| 239 | */ | ||
| 240 | u64 percentiles_ioctl[PERCENTILE_RANGES]; | ||
| 241 | u64 percentiles_kickoff[PERCENTILE_RANGES]; | ||
| 242 | u64 percentiles_jobtracking[PERCENTILE_RANGES]; | ||
| 243 | u64 percentiles_append[PERCENTILE_RANGES]; | ||
| 244 | u64 percentiles_userd[PERCENTILE_RANGES]; | ||
| 245 | |||
| 246 | if (!nvgpu_ref_get_unless_zero(&g->fifo.profile.ref)) { | ||
| 247 | seq_printf(s, "Profiling disabled\n"); | ||
| 248 | return 0; | ||
| 249 | } | ||
| 250 | |||
| 251 | get = atomic_read(&g->fifo.profile.get.atomic_var); | ||
| 252 | |||
| 253 | __gk20a_fifo_create_stats(g, percentiles_ioctl, | ||
| 254 | PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY); | ||
| 255 | __gk20a_fifo_create_stats(g, percentiles_kickoff, | ||
| 256 | PROFILE_END, PROFILE_ENTRY); | ||
| 257 | __gk20a_fifo_create_stats(g, percentiles_jobtracking, | ||
| 258 | PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY); | ||
| 259 | __gk20a_fifo_create_stats(g, percentiles_append, | ||
| 260 | PROFILE_APPEND, PROFILE_JOB_TRACKING); | ||
| 261 | nelem = __gk20a_fifo_create_stats(g, percentiles_userd, | ||
| 262 | PROFILE_END, PROFILE_APPEND); | ||
| 263 | |||
| 264 | seq_printf(s, "Number of kickoffs: %d\n", nelem); | ||
| 265 | seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n"); | ||
| 266 | |||
| 267 | for (index = 0; index < PERCENTILE_RANGES; index++) | ||
| 268 | seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n", | ||
| 269 | PERCENTILE_WIDTH * (index+1), | ||
| 270 | percentiles_ioctl[index], | ||
| 271 | percentiles_kickoff[index], | ||
| 272 | percentiles_append[index], | ||
| 273 | percentiles_jobtracking[index], | ||
| 274 | percentiles_userd[index]); | ||
| 275 | |||
| 276 | nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free); | ||
| 277 | |||
| 278 | return 0; | ||
| 279 | } | ||
| 280 | |||
| 281 | static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file) | ||
| 282 | { | ||
| 283 | return single_open(file, gk20a_fifo_profile_stats, inode->i_private); | ||
| 284 | } | ||
| 285 | |||
| 286 | static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = { | ||
| 287 | .open = gk20a_fifo_profile_stats_open, | ||
| 288 | .read = seq_read, | ||
| 289 | .llseek = seq_lseek, | ||
| 290 | .release = single_release, | ||
| 291 | }; | ||
| 292 | |||
| 293 | |||
| 294 | void gk20a_fifo_debugfs_init(struct gk20a *g) | ||
| 295 | { | ||
| 296 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
| 297 | struct dentry *gpu_root = l->debugfs; | ||
| 298 | struct dentry *fifo_root; | ||
| 299 | struct dentry *profile_root; | ||
| 300 | |||
| 301 | fifo_root = debugfs_create_dir("fifo", gpu_root); | ||
| 302 | if (IS_ERR_OR_NULL(fifo_root)) | ||
| 303 | return; | ||
| 304 | |||
| 305 | nvgpu_log(g, gpu_dbg_info, "g=%p", g); | ||
| 306 | |||
| 307 | debugfs_create_file("sched", 0600, fifo_root, g, | ||
| 308 | &gk20a_fifo_sched_debugfs_fops); | ||
| 309 | |||
| 310 | profile_root = debugfs_create_dir("profile", fifo_root); | ||
| 311 | if (IS_ERR_OR_NULL(profile_root)) | ||
| 312 | return; | ||
| 313 | |||
| 314 | nvgpu_mutex_init(&g->fifo.profile.lock); | ||
| 315 | g->fifo.profile.enabled = false; | ||
| 316 | atomic_set(&g->fifo.profile.get.atomic_var, 0); | ||
| 317 | atomic_set(&g->fifo.profile.ref.refcount.atomic_var, 0); | ||
| 318 | |||
| 319 | debugfs_create_file("enable", 0600, profile_root, g, | ||
| 320 | &gk20a_fifo_profile_enable_debugfs_fops); | ||
| 321 | |||
| 322 | debugfs_create_file("stats", 0600, profile_root, g, | ||
| 323 | &gk20a_fifo_profile_stats_debugfs_fops); | ||
| 324 | |||
| 325 | } | ||
| 326 | |||
| 327 | void gk20a_fifo_profile_snapshot(struct fifo_profile_gk20a *profile, int idx) | ||
| 328 | { | ||
| 329 | if (profile) | ||
| 330 | profile->timestamp[idx] = nvgpu_current_time_ns(); | ||
| 331 | } | ||
| 332 | |||
| 333 | void __gk20a_fifo_profile_free(struct nvgpu_ref *ref) | ||
| 334 | { | ||
| 335 | struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a, | ||
| 336 | profile.ref); | ||
| 337 | nvgpu_vfree(f->g, f->profile.data); | ||
| 338 | nvgpu_vfree(f->g, f->profile.sorted); | ||
| 339 | } | ||
| 340 | |||
| 341 | /* Get the next element in the ring buffer of profile entries | ||
| 342 | * and grab a reference to the structure | ||
| 343 | */ | ||
| 344 | struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g) | ||
| 345 | { | ||
| 346 | struct fifo_gk20a *f = &g->fifo; | ||
| 347 | struct fifo_profile_gk20a *profile; | ||
| 348 | unsigned int index; | ||
| 349 | |||
| 350 | /* If kref is zero, profiling is not enabled */ | ||
| 351 | if (!nvgpu_ref_get_unless_zero(&f->profile.ref)) | ||
| 352 | return NULL; | ||
| 353 | index = atomic_inc_return(&f->profile.get.atomic_var); | ||
| 354 | profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES]; | ||
| 355 | |||
| 356 | return profile; | ||
| 357 | } | ||
| 358 | |||
| 359 | /* Free the reference to the structure. This allows deferred cleanups */ | ||
| 360 | void gk20a_fifo_profile_release(struct gk20a *g, | ||
| 361 | struct fifo_profile_gk20a *profile) | ||
| 362 | { | ||
| 363 | nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free); | ||
| 364 | } | ||
| 365 | |||
| 366 | void gk20a_fifo_debugfs_deinit(struct gk20a *g) | ||
| 367 | { | ||
| 368 | struct fifo_gk20a *f = &g->fifo; | ||
| 369 | |||
| 370 | nvgpu_mutex_acquire(&f->profile.lock); | ||
| 371 | if (f->profile.enabled) { | ||
| 372 | f->profile.enabled = false; | ||
| 373 | nvgpu_ref_put(&f->profile.ref, __gk20a_fifo_profile_free); | ||
| 374 | } | ||
| 375 | nvgpu_mutex_release(&f->profile.lock); | ||
| 376 | } | ||
