From 93678f571c323861458992de16d552ea7313e831 Mon Sep 17 00:00:00 2001 From: Thomas Fleury Date: Tue, 15 Mar 2016 19:06:43 -0700 Subject: gpu: nvgpu: Add trace and debugfs for sched params JIRA EVLR-244 JIRA EVLR-318 Change-Id: Ie95f42212dadcf2d0c1737eeb28812afb03b712f Signed-off-by: Thomas Fleury Reviewed-on: http://git-master/r/1120603 GVS: Gerrit_Virtual_Submit Reviewed-by: Deepak Nibade Reviewed-by: Ken Adams --- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 38 ++++++--- drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 1 + drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 132 ++++++++++++++++++++++++++++++++ drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | 23 ++++++ drivers/gpu/nvgpu/gk20a/gk20a.c | 1 + drivers/gpu/nvgpu/gk20a/gk20a.h | 26 +++++++ drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 17 ++++ drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 3 - drivers/gpu/nvgpu/gk20a/tsg_gk20a.c | 11 ++- drivers/gpu/nvgpu/gk20a/tsg_gk20a.h | 1 + include/trace/events/gk20a.h | 66 +++++++++++++++- 11 files changed, 297 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 9e023e16..697861e2 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -184,8 +184,7 @@ int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, return 0; } -static int channel_gk20a_set_schedule_params(struct channel_gk20a *c, - u32 timeslice_period) +static int channel_gk20a_set_schedule_params(struct channel_gk20a *c) { void *inst_ptr; int shift = 0, value = 0; @@ -194,8 +193,8 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c, if (!inst_ptr) return -ENOMEM; - gk20a_channel_get_timescale_from_timeslice(c->g, timeslice_period, - &value, &shift); + gk20a_channel_get_timescale_from_timeslice(c->g, + c->timeslice_us, &value, &shift); /* disable channel */ c->g->ops.fifo.disable_channel(c); @@ -1162,6 +1161,7 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g) ch->obj_class = 0; ch->clean_up.scheduled = false; ch->interleave_level = NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW; + ch->timeslice_us = g->timeslice_low_priority_us; /* The channel is *not* runnable at this point. It still needs to have * an address space bound and allocate a gpfifo and grctx. */ @@ -1207,6 +1207,8 @@ static int __gk20a_channel_open(struct gk20a *g, struct file *filp) return -ENOMEM; } + trace_gk20a_channel_sched_defaults(GK20A_TP_ARGS_SCHED(ch)); + filp->private_data = ch; return 0; } @@ -2699,8 +2701,6 @@ static int gk20a_channel_event_id_ctrl(struct channel_gk20a *ch, int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority) { - u32 timeslice_timeout; - if (gk20a_is_channel_marked_as_tsg(ch)) { gk20a_err(dev_from_gk20a(ch->g), "invalid operation for TSG!\n"); @@ -2710,21 +2710,20 @@ int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority) /* set priority of graphics channel */ switch (priority) { case NVGPU_PRIORITY_LOW: - timeslice_timeout = ch->g->timeslice_low_priority_us; + ch->timeslice_us = ch->g->timeslice_low_priority_us; break; case NVGPU_PRIORITY_MEDIUM: - timeslice_timeout = ch->g->timeslice_medium_priority_us; + ch->timeslice_us = ch->g->timeslice_medium_priority_us; break; case NVGPU_PRIORITY_HIGH: - timeslice_timeout = ch->g->timeslice_high_priority_us; + ch->timeslice_us = ch->g->timeslice_high_priority_us; break; default: pr_err("Unsupported priority"); return -EINVAL; } - return channel_gk20a_set_schedule_params(ch, - timeslice_timeout); + return channel_gk20a_set_schedule_params(ch); } int gk20a_channel_set_timeslice(struct channel_gk20a *ch, u32 timeslice) @@ -2739,7 +2738,9 @@ int gk20a_channel_set_timeslice(struct channel_gk20a *ch, u32 timeslice) timeslice > NVGPU_CHANNEL_MAX_TIMESLICE_US) return -EINVAL; - return channel_gk20a_set_schedule_params(ch, timeslice); + ch->timeslice_us = timeslice; + + return channel_gk20a_set_schedule_params(ch); } static int gk20a_channel_zcull_bind(struct channel_gk20a *ch, @@ -3059,6 +3060,8 @@ long gk20a_channel_ioctl(struct file *filp, gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d", timeout, ch->hw_chid); ch->timeout_ms_max = timeout; + + trace_gk20a_channel_set_timeout(GK20A_TP_ARGS_SCHED(ch)); break; } case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX: @@ -3072,6 +3075,8 @@ long gk20a_channel_ioctl(struct file *filp, timeout, ch->hw_chid); ch->timeout_ms_max = timeout; ch->timeout_debug_dump = timeout_debug_dump; + + trace_gk20a_channel_set_timeout(GK20A_TP_ARGS_SCHED(ch)); break; } case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT: @@ -3088,7 +3093,10 @@ long gk20a_channel_ioctl(struct file *filp, } err = ch->g->ops.fifo.channel_set_priority(ch, ((struct nvgpu_set_priority_args *)buf)->priority); + gk20a_idle(dev); + + trace_gk20a_channel_set_priority(GK20A_TP_ARGS_SCHED(ch)); break; case NVGPU_IOCTL_CHANNEL_ENABLE: err = gk20a_busy(dev); @@ -3172,7 +3180,10 @@ long gk20a_channel_ioctl(struct file *filp, } err = gk20a_channel_set_runlist_interleave(ch, ((struct nvgpu_runlist_interleave_args *)buf)->level); + gk20a_idle(dev); + + trace_gk20a_channel_set_runlist_interleave(GK20A_TP_ARGS_SCHED(ch)); break; case NVGPU_IOCTL_CHANNEL_SET_TIMESLICE: err = gk20a_busy(dev); @@ -3184,7 +3195,10 @@ long gk20a_channel_ioctl(struct file *filp, } err = ch->g->ops.fifo.channel_set_timeslice(ch, ((struct nvgpu_timeslice_args *)buf)->timeslice_us); + gk20a_idle(dev); + + trace_gk20a_channel_set_timeslice(GK20A_TP_ARGS_SCHED(ch)); break; default: dev_dbg(dev, "unrecognized ioctl cmd: 0x%x", cmd); diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 1e9341ae..8840a3ae 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -176,6 +176,7 @@ struct channel_gk20a { bool has_timedout; u32 timeout_ms_max; bool timeout_debug_dump; + u32 timeslice_us; struct dma_buf *error_notifier_ref; struct nvgpu_notification *error_notifier; diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 66dfa48a..fa8ab5b1 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -2550,6 +2550,138 @@ struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g, return g->fifo.channel + hw_chid; } +#ifdef CONFIG_DEBUG_FS +static void *gk20a_fifo_sched_debugfs_seq_start( + struct seq_file *s, loff_t *pos) +{ + struct gk20a *g = s->private; + struct fifo_gk20a *f = &g->fifo; + + if (*pos >= f->num_channels) + return NULL; + + return &f->channel[*pos]; +} + +static void *gk20a_fifo_sched_debugfs_seq_next( + struct seq_file *s, void *v, loff_t *pos) +{ + struct gk20a *g = s->private; + struct fifo_gk20a *f = &g->fifo; + + ++(*pos); + if (*pos >= f->num_channels) + return NULL; + + return &f->channel[*pos]; +} + +static void gk20a_fifo_sched_debugfs_seq_stop( + struct seq_file *s, void *v) +{ +} + +static int gk20a_fifo_sched_debugfs_seq_show( + struct seq_file *s, void *v) +{ + struct gk20a *g = s->private; + struct fifo_gk20a *f = &g->fifo; + struct channel_gk20a *ch = v; + struct tsg_gk20a *tsg = NULL; + + struct fifo_engine_info_gk20a *engine_info; + struct fifo_runlist_info_gk20a *runlist; + u32 runlist_id; + int ret = SEQ_SKIP; + + engine_info = f->engine_info + ENGINE_GR_GK20A; + runlist_id = engine_info->runlist_id; + runlist = &f->runlist_info[runlist_id]; + + if (ch == f->channel) { + seq_puts(s, "chid tsgid pid timeslice timeout interleave preempt\n"); + seq_puts(s, " (usecs) (msecs)\n"); + ret = 0; + } + + if (!test_bit(ch->hw_chid, runlist->active_channels)) + return ret; + + if (gk20a_channel_get(ch)) { + if (gk20a_is_channel_marked_as_tsg(ch)) + tsg = &f->tsg[ch->tsgid]; + + seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d\n", + ch->hw_chid, + ch->tsgid, + ch->pid, + tsg ? tsg->timeslice_us : ch->timeslice_us, + ch->timeout_ms_max, + ch->interleave_level, + ch->ch_ctx.gr_ctx ? + ch->ch_ctx.gr_ctx->preempt_mode : -1); + gk20a_channel_put(ch); + } + return 0; +} + +const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = { + .start = gk20a_fifo_sched_debugfs_seq_start, + .next = gk20a_fifo_sched_debugfs_seq_next, + .stop = gk20a_fifo_sched_debugfs_seq_stop, + .show = gk20a_fifo_sched_debugfs_seq_show +}; + +static int gk20a_fifo_sched_debugfs_open(struct inode *inode, + struct file *file) +{ + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops); + if (err) + return err; + + gk20a_dbg(gpu_dbg_info, "i_private=%p", inode->i_private); + + ((struct seq_file *)file->private_data)->private = inode->i_private; + return 0; +}; + +/* + * The file operations structure contains our open function along with + * set of the canned seq_ ops. + */ +const struct file_operations gk20a_fifo_sched_debugfs_fops = { + .owner = THIS_MODULE, + .open = gk20a_fifo_sched_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +void gk20a_fifo_debugfs_init(struct device *dev) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + struct gk20a *g = get_gk20a(dev); + + struct dentry *gpu_root = platform->debugfs; + struct dentry *fifo_root; + + fifo_root = debugfs_create_dir("fifo", gpu_root); + if (IS_ERR_OR_NULL(fifo_root)) + return; + + gk20a_dbg(gpu_dbg_info, "g=%p", g); + + debugfs_create_file("sched", 0600, fifo_root, g, + &gk20a_fifo_sched_debugfs_fops); + +} +#endif /* CONFIG_DEBUG_FS */ + void gk20a_init_fifo(struct gpu_ops *gops) { gk20a_init_channel(gops); diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index 0979bf2b..c95ba71c 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h @@ -140,6 +140,23 @@ struct fifo_gk20a { struct mutex deferred_reset_mutex; }; +static inline const char *gk20a_fifo_interleave_level_name(u32 interleave_level) +{ + switch (interleave_level) { + case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW: + return "LOW"; + + case NVGPU_RUNLIST_INTERLEAVE_LEVEL_MEDIUM: + return "MEDIUM"; + + case NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH: + return "HIGH"; + + default: + return "?"; + } +} + int gk20a_init_fifo_support(struct gk20a *g); void gk20a_fifo_isr(struct gk20a *g); @@ -200,4 +217,10 @@ int gk20a_fifo_set_runlist_interleave(struct gk20a *g, bool is_tsg, u32 runlist_id, u32 new_level); + +void gk20a_fifo_debugfs_init(struct device *dev); + +const char *gk20a_fifo_interleave_level_name(u32 interleave_level); + + #endif /*__GR_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index dd27ef64..fd226ee5 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -1608,6 +1608,7 @@ static int gk20a_probe(struct platform_device *dev) gk20a_cde_debugfs_init(&dev->dev); gk20a_alloc_debugfs_init(dev); gk20a_mm_debugfs_init(&dev->dev); + gk20a_fifo_debugfs_init(&dev->dev); #endif gk20a_init_gr(gk20a); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 15f838d9..0b15783b 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -1074,4 +1074,30 @@ int gk20a_pm_init(struct device *dev); int gk20a_pm_finalize_poweron(struct device *dev); void gk20a_remove_support(struct device *dev); +static inline struct tsg_gk20a *tsg_gk20a_from_ch(struct channel_gk20a *ch) +{ + struct tsg_gk20a *tsg = NULL; + + if (gk20a_is_channel_marked_as_tsg(ch)) + { + struct gk20a *g = ch->g; + struct fifo_gk20a *f = &g->fifo; + tsg = &f->tsg[ch->tsgid]; + } + + return tsg; +} + + +#define GK20A_TP_ARGS_SCHED(ch) \ + ch->hw_chid, \ + ch->tsgid, \ + ch->pid, \ + gk20a_is_channel_marked_as_tsg(ch) ? \ + tsg_gk20a_from_ch(ch)->timeslice_us : ch->timeslice_us, \ + ch->timeout_ms_max, \ + gk20a_fifo_interleave_level_name(ch->interleave_level), \ + gr_gk20a_preempt_mode_name(ch->ch_ctx.gr_ctx ? \ + ch->ch_ctx.gr_ctx->preempt_mode : 0) + #endif /* GK20A_H */ diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 681a0437..e1e6e262 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -626,4 +626,21 @@ int gr_gk20a_resume_contexts(struct gk20a *g, struct dbg_session_gk20a *dbg_s, int *ctx_resident_ch_fd); +#define NVGPU_GR_PREEMPTION_MODE_WFI 0 +#define NVGPU_GR_PREEMPTION_MODE_CTA 2 + +static inline const char *gr_gk20a_preempt_mode_name(u32 preempt_mode) +{ + switch (preempt_mode) { + case NVGPU_GR_PREEMPTION_MODE_WFI: + return "WFI"; + + case NVGPU_GR_PREEMPTION_MODE_CTA: + return "CTA"; + + default: + return "?"; + } +} + #endif /*__GR_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 3bc568d6..29f32d8f 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -103,9 +103,6 @@ struct gr_ctx_desc { #endif }; -#define NVGPU_GR_PREEMPTION_MODE_WFI 0 -#define NVGPU_GR_PREEMPTION_MODE_CTA 2 - struct compbit_store_desc { struct mem_desc mem; diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c index 40c871ce..5f4cf931 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c @@ -165,24 +165,22 @@ int gk20a_init_tsg_support(struct gk20a *g, u32 tsgid) static int gk20a_tsg_set_priority(struct gk20a *g, struct tsg_gk20a *tsg, u32 priority) { - int timeslice_period; - switch (priority) { case NVGPU_PRIORITY_LOW: - timeslice_period = g->timeslice_low_priority_us; + tsg->timeslice_us = g->timeslice_low_priority_us; break; case NVGPU_PRIORITY_MEDIUM: - timeslice_period = g->timeslice_medium_priority_us; + tsg->timeslice_us = g->timeslice_medium_priority_us; break; case NVGPU_PRIORITY_HIGH: - timeslice_period = g->timeslice_high_priority_us; + tsg->timeslice_us = g->timeslice_high_priority_us; break; default: pr_err("Unsupported priority"); return -EINVAL; } - gk20a_channel_get_timescale_from_timeslice(g, timeslice_period, + gk20a_channel_get_timescale_from_timeslice(g, tsg->timeslice_us, &tsg->timeslice_timeout, &tsg->timeslice_scale); g->ops.fifo.update_runlist(g, 0, ~0, true, true); @@ -407,6 +405,7 @@ int gk20a_tsg_open(struct gk20a *g, struct file *filp) tsg->tsg_gr_ctx = NULL; tsg->vm = NULL; tsg->interleave_level = NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW; + tsg->timeslice_us = 0; filp->private_data = tsg; diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h index 9a23c973..9a8bfada 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h @@ -43,6 +43,7 @@ struct tsg_gk20a { int num_active_channels; struct mutex ch_list_lock; + int timeslice_us; int timeslice_timeout; int timeslice_scale; diff --git a/include/trace/events/gk20a.h b/include/trace/events/gk20a.h index c382099a..fc5cb872 100644 --- a/include/trace/events/gk20a.h +++ b/include/trace/events/gk20a.h @@ -1,7 +1,7 @@ /* * gk20a event logging to ftrace. * - * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -190,6 +190,70 @@ DEFINE_EVENT(gk20a_channel_getput, gk20a_channel_put_nofree, TP_ARGS(channel, caller) ); +DECLARE_EVENT_CLASS(gk20a_channel_sched_params, + TP_PROTO(int chid, int tsgid, pid_t pid, u32 timeslice, + u32 timeout, const char *interleave, const char *preempt_mode), + TP_ARGS(chid, tsgid, pid, timeslice, timeout, + interleave, preempt_mode), + TP_STRUCT__entry( + __field(int, chid) + __field(int, tsgid) + __field(pid_t, pid) + __field(u32, timeslice) + __field(u32, timeout) + __field(const char *, interleave) /* no need to copy */ + __field(const char *, preempt_mode) /* no need to copy */ + ), + TP_fast_assign( + __entry->chid = chid; + __entry->tsgid = tsgid; + __entry->pid = pid; + __entry->timeslice = timeslice; + __entry->timeout = timeout; + __entry->interleave = interleave; + __entry->preempt_mode = preempt_mode; + ), + TP_printk("chid=%d tsgid=%d pid=%d timeslice=%u timeout=%u interleave=%s preempt=%s", + __entry->chid, __entry->tsgid, __entry->pid, + __entry->timeslice, __entry->timeout, + __entry->interleave, __entry->preempt_mode) +); + +DEFINE_EVENT(gk20a_channel_sched_params, gk20a_channel_sched_defaults, + TP_PROTO(int chid, int tsgid, pid_t pid, u32 timeslice, + u32 timeout, const char *interleave, const char *preempt_mode), + TP_ARGS(chid, tsgid, pid, timeslice, timeout, + interleave, preempt_mode) +); + +DEFINE_EVENT(gk20a_channel_sched_params, gk20a_channel_set_priority, + TP_PROTO(int chid, int tsgid, pid_t pid, u32 timeslice, + u32 timeout, const char *interleave, const char *preempt_mode), + TP_ARGS(chid, tsgid, pid, timeslice, timeout, + interleave, preempt_mode) +); + +DEFINE_EVENT(gk20a_channel_sched_params, gk20a_channel_set_runlist_interleave, + TP_PROTO(int chid, int tsgid, pid_t pid, u32 timeslice, + u32 timeout, const char *interleave, const char *preempt_mode), + TP_ARGS(chid, tsgid, pid, timeslice, timeout, + interleave, preempt_mode) +); + +DEFINE_EVENT(gk20a_channel_sched_params, gk20a_channel_set_timeslice, + TP_PROTO(int chid, int tsgid, pid_t pid, u32 timeslice, + u32 timeout, const char *interleave, const char *preempt_mode), + TP_ARGS(chid, tsgid, pid, timeslice, timeout, + interleave, preempt_mode) +); + +DEFINE_EVENT(gk20a_channel_sched_params, gk20a_channel_set_timeout, + TP_PROTO(int chid, int tsgid, pid_t pid, u32 timeslice, + u32 timeout, const char *interleave, const char *preempt_mode), + TP_ARGS(chid, tsgid, pid, timeslice, timeout, + interleave, preempt_mode) +); + TRACE_EVENT(gk20a_push_cmdbuf, TP_PROTO(const char *name, u32 mem_id, u32 words, u32 offset, void *cmdbuf), -- cgit v1.2.2