/* * drivers/video/tegra/host/t20/debug_gk20a.c * * Copyright (C) 2011-2016 NVIDIA Corporation. All rights reserved. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and * may be copied, distributed, and modified under those terms. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * */ #ifdef CONFIG_TEGRA_GK20A #include #endif #include #include #include #include "gk20a.h" #include "debug_gk20a.h" #include "semaphore_gk20a.h" #include "hw_ram_gk20a.h" #include "hw_fifo_gk20a.h" #include "hw_ccsr_gk20a.h" #include "hw_pbdma_gk20a.h" unsigned int gk20a_debug_trace_cmdbuf; struct ch_state { int pid; int refs; u32 inst_block[0]; }; static const char * const ccsr_chan_status_str[] = { "idle", "pending", "pending_ctx_reload", "pending_acquire", "pending_acq_ctx_reload", "on_pbdma", "on_pbdma_and_eng", "on_eng", "on_eng_pending_acquire", "on_eng_pending", "on_pbdma_ctx_reload", "on_pbdma_and_eng_ctx_reload", "on_eng_ctx_reload", "on_eng_pending_ctx_reload", "on_eng_pending_acq_ctx_reload", }; static const char * const chan_status_str[] = { "invalid", "valid", "chsw_load", "chsw_save", "chsw_switch", }; static const char * const ctx_status_str[] = { "invalid", "valid", NULL, NULL, NULL, "ctxsw_load", "ctxsw_save", "ctxsw_switch", }; static inline void gk20a_debug_write_printk(void *ctx, const char *str, size_t len) { pr_info("%s", str); } static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str, size_t len) { seq_write((struct seq_file *)ctx, str, len); } void gk20a_debug_output(struct gk20a_debug_output *o, const char *fmt, ...) { va_list args; int len; va_start(args, fmt); len = vsnprintf(o->buf, sizeof(o->buf), fmt, args); va_end(args); o->fn(o->ctx, o->buf, len); } static void gk20a_debug_show_channel(struct gk20a *g, struct gk20a_debug_output *o, u32 hw_chid, struct ch_state *ch_state) { u32 channel = gk20a_readl(g, ccsr_channel_r(hw_chid)); u32 status = ccsr_channel_status_v(channel); u32 syncpointa, syncpointb; u32 *inst_mem; struct channel_gk20a *c = g->fifo.channel + hw_chid; struct gk20a_semaphore_int *hw_sema = NULL; if (c->hw_sema) hw_sema = c->hw_sema; if (!ch_state) return; inst_mem = &ch_state->inst_block[0]; syncpointa = inst_mem[ram_fc_syncpointa_w()]; syncpointb = inst_mem[ram_fc_syncpointb_w()]; gk20a_debug_output(o, "%d-%s, pid %d, refs: %d: ", hw_chid, dev_name(g->dev), ch_state->pid, ch_state->refs); gk20a_debug_output(o, "%s in use %s %s\n", ccsr_channel_enable_v(channel) ? "" : "not", ccsr_chan_status_str[status], ccsr_channel_busy_v(channel) ? "busy" : "not busy"); gk20a_debug_output(o, "TOP: %016llx PUT: %016llx GET: %016llx " "FETCH: %016llx\nHEADER: %08x COUNT: %08x\n" "SYNCPOINT %08x %08x SEMAPHORE %08x %08x %08x %08x\n", (u64)inst_mem[ram_fc_pb_top_level_get_w()] + ((u64)inst_mem[ram_fc_pb_top_level_get_hi_w()] << 32ULL), (u64)inst_mem[ram_fc_pb_put_w()] + ((u64)inst_mem[ram_fc_pb_put_hi_w()] << 32ULL), (u64)inst_mem[ram_fc_pb_get_w()] + ((u64)inst_mem[ram_fc_pb_get_hi_w()] << 32ULL), (u64)inst_mem[ram_fc_pb_fetch_w()] + ((u64)inst_mem[ram_fc_pb_fetch_hi_w()] << 32ULL), inst_mem[ram_fc_pb_header_w()], inst_mem[ram_fc_pb_count_w()], syncpointa, syncpointb, inst_mem[ram_fc_semaphorea_w()], inst_mem[ram_fc_semaphoreb_w()], inst_mem[ram_fc_semaphorec_w()], inst_mem[ram_fc_semaphored_w()]); if (hw_sema) gk20a_debug_output(o, "SEMA STATE: value: 0x%08x " "next_val: 0x%08x addr: 0x%010llx\n", readl(hw_sema->value), atomic_read(&hw_sema->next_value), gk20a_hw_sema_addr(hw_sema)); #ifdef CONFIG_TEGRA_GK20A if ((pbdma_syncpointb_op_v(syncpointb) == pbdma_syncpointb_op_wait_v()) && (pbdma_syncpointb_wait_switch_v(syncpointb) == pbdma_syncpointb_wait_switch_en_v())) gk20a_debug_output(o, "%s on syncpt %u (%s) val %u\n", (status == 3 || status == 8) ? "Waiting" : "Waited", pbdma_syncpointb_syncpt_index_v(syncpointb), nvhost_syncpt_get_name(g->host1x_dev, pbdma_syncpointb_syncpt_index_v(syncpointb)), pbdma_syncpointa_payload_v(syncpointa)); #endif gk20a_debug_output(o, "\n"); } void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o) { struct fifo_gk20a *f = &g->fifo; u32 chid; int i; struct ch_state **ch_state; for (i = 0; i < fifo_pbdma_status__size_1_v(); i++) { u32 status = gk20a_readl(g, fifo_pbdma_status_r(i)); u32 chan_status = fifo_pbdma_status_chan_status_v(status); gk20a_debug_output(o, "%s pbdma %d: ", dev_name(g->dev), i); gk20a_debug_output(o, "id: %d (%s), next_id: %d (%s) status: %s\n", fifo_pbdma_status_id_v(status), fifo_pbdma_status_id_type_v(status) ? "tsg" : "channel", fifo_pbdma_status_next_id_v(status), fifo_pbdma_status_next_id_type_v(status) ? "tsg" : "channel", chan_status_str[chan_status]); gk20a_debug_output(o, "PUT: %016llx GET: %016llx " "FETCH: %08x HEADER: %08x\n", (u64)gk20a_readl(g, pbdma_put_r(i)) + ((u64)gk20a_readl(g, pbdma_put_hi_r(i)) << 32ULL), (u64)gk20a_readl(g, pbdma_get_r(i)) + ((u64)gk20a_readl(g, pbdma_get_hi_r(i)) << 32ULL), gk20a_readl(g, pbdma_gp_fetch_r(i)), gk20a_readl(g, pbdma_pb_header_r(i))); } gk20a_debug_output(o, "\n"); for (i = 0; i < fifo_engine_status__size_1_v(); i++) { u32 status = gk20a_readl(g, fifo_engine_status_r(i)); u32 ctx_status = fifo_engine_status_ctx_status_v(status); gk20a_debug_output(o, "%s eng %d: ", dev_name(g->dev), i); gk20a_debug_output(o, "id: %d (%s), next_id: %d (%s), ctx: %s ", fifo_engine_status_id_v(status), fifo_engine_status_id_type_v(status) ? "tsg" : "channel", fifo_engine_status_next_id_v(status), fifo_engine_status_next_id_type_v(status) ? "tsg" : "channel", ctx_status_str[ctx_status]); if (fifo_engine_status_faulted_v(status)) gk20a_debug_output(o, "faulted "); if (fifo_engine_status_engine_v(status)) gk20a_debug_output(o, "busy "); gk20a_debug_output(o, "\n"); } gk20a_debug_output(o, "\n"); ch_state = kzalloc(sizeof(*ch_state) * f->num_channels, GFP_KERNEL); if (!ch_state) { gk20a_debug_output(o, "cannot alloc memory for channels\n"); return; } for (chid = 0; chid < f->num_channels; chid++) { struct channel_gk20a *ch = &f->channel[chid]; if (gk20a_channel_get(ch)) { ch_state[chid] = kmalloc(sizeof(struct ch_state) + ram_in_alloc_size_v(), GFP_KERNEL); /* ref taken stays to below loop with * successful allocs */ if (!ch_state[chid]) gk20a_channel_put(ch); } } for (chid = 0; chid < f->num_channels; chid++) { struct channel_gk20a *ch = &f->channel[chid]; if (!ch_state[chid]) continue; ch_state[chid]->pid = ch->pid; ch_state[chid]->refs = atomic_read(&ch->ref_count); gk20a_mem_rd_n(g, &ch->inst_block, 0, &ch_state[chid]->inst_block[0], ram_in_alloc_size_v()); gk20a_channel_put(ch); } for (chid = 0; chid < f->num_channels; chid++) { if (ch_state[chid]) { gk20a_debug_show_channel(g, o, chid, ch_state[chid]); kfree(ch_state[chid]); } } kfree(ch_state); } static int gk20a_gr_dump_regs(struct device *dev, struct gk20a_debug_output *o) { struct gk20a_platform *platform = gk20a_get_platform(dev); struct gk20a *g = platform->g; if (g->ops.gr.dump_gr_regs) gr_gk20a_elpg_protected_call(g, g->ops.gr.dump_gr_regs(g, o)); return 0; } int gk20a_gr_debug_dump(struct device *dev) { struct gk20a_debug_output o = { .fn = gk20a_debug_write_printk }; gk20a_gr_dump_regs(dev, &o); return 0; } static int gk20a_gr_debug_show(struct seq_file *s, void *unused) { struct device *dev = s->private; struct gk20a_debug_output o = { .fn = gk20a_debug_write_to_seqfile, .ctx = s, }; int err; err = gk20a_busy(dev); if (err) { gk20a_err(dev, "failed to power on gpu: %d", err); return -EINVAL; } gk20a_gr_dump_regs(dev, &o); gk20a_idle(dev); return 0; } void gk20a_debug_dump(struct device *dev) { struct gk20a_platform *platform = gk20a_get_platform(dev); struct gk20a *g = platform->g; struct gk20a_debug_output o = { .fn = gk20a_debug_write_printk }; if (platform->dump_platform_dependencies) platform->dump_platform_dependencies(dev); /* HAL only initialized after 1st power-on */ if (g->ops.debug.show_dump) g->ops.debug.show_dump(g, &o); } static int gk20a_debug_show(struct seq_file *s, void *unused) { struct device *dev = s->private; struct gk20a_debug_output o = { .fn = gk20a_debug_write_to_seqfile, .ctx = s, }; struct gk20a *g; int err; g = gk20a_get_platform(dev)->g; err = gk20a_busy(g->dev); if (err) { gk20a_err(g->dev, "failed to power on gpu: %d", err); return -EFAULT; } /* HAL only initialized after 1st power-on */ if (g->ops.debug.show_dump) g->ops.debug.show_dump(g, &o); gk20a_idle(g->dev); return 0; } static int gk20a_gr_debug_open(struct inode *inode, struct file *file) { return single_open(file, gk20a_gr_debug_show, inode->i_private); } static int gk20a_debug_open(struct inode *inode, struct file *file) { return single_open(file, gk20a_debug_show, inode->i_private); } static const struct file_operations gk20a_gr_debug_fops = { .open = gk20a_gr_debug_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; static const struct file_operations gk20a_debug_fops = { .open = gk20a_debug_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; void gk20a_init_debug_ops(struct gpu_ops *gops) { gops->debug.show_dump = gk20a_debug_show_dump; } void gk20a_debug_init(struct device *dev, const char *debugfs_symlink) { struct gk20a_platform *platform = dev_get_drvdata(dev); #ifdef CONFIG_DEBUG_FS struct gk20a *g = platform->g; #endif platform->debugfs = debugfs_create_dir(dev_name(dev), NULL); if (!platform->debugfs) return; if (debugfs_symlink) platform->debugfs_alias = debugfs_create_symlink(debugfs_symlink, NULL, dev_name(dev)); debugfs_create_file("status", S_IRUGO, platform->debugfs, dev, &gk20a_debug_fops); debugfs_create_file("gr_status", S_IRUGO, platform->debugfs, dev, &gk20a_gr_debug_fops); debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR, platform->debugfs, &gk20a_debug_trace_cmdbuf); debugfs_create_u32("ch_wdt_timeout_ms", S_IRUGO|S_IWUSR, platform->debugfs, &platform->ch_wdt_timeout_ms); #if defined(GK20A_DEBUG) debugfs_create_u32("dbg_mask", S_IRUGO|S_IWUSR, platform->debugfs, &gk20a_dbg_mask); debugfs_create_u32("dbg_ftrace", S_IRUGO|S_IWUSR, platform->debugfs, &gk20a_dbg_ftrace); #endif #ifdef CONFIG_DEBUG_FS spin_lock_init(&g->debugfs_lock); g->mm.ltc_enabled = true; g->mm.ltc_enabled_debug = true; g->debugfs_ltc_enabled = debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR, platform->debugfs, &g->mm.ltc_enabled_debug); g->debugfs_gr_idle_timeout_default = debugfs_create_u32("gr_idle_timeout_default_us", S_IRUGO|S_IWUSR, platform->debugfs, &g->gr_idle_timeout_default); g->debugfs_timeouts_enabled = debugfs_create_bool("timeouts_enabled", S_IRUGO|S_IWUSR, platform->debugfs, &g->timeouts_enabled); g->debugfs_bypass_smmu = debugfs_create_bool("bypass_smmu", S_IRUGO|S_IWUSR, platform->debugfs, &g->mm.bypass_smmu); g->debugfs_disable_bigpage = debugfs_create_bool("disable_bigpage", S_IRUGO|S_IWUSR, platform->debugfs, &g->mm.disable_bigpage); g->debugfs_timeslice_low_priority_us = debugfs_create_u32("timeslice_low_priority_us", S_IRUGO|S_IWUSR, platform->debugfs, &g->timeslice_low_priority_us); g->debugfs_timeslice_medium_priority_us = debugfs_create_u32("timeslice_medium_priority_us", S_IRUGO|S_IWUSR, platform->debugfs, &g->timeslice_medium_priority_us); g->debugfs_timeslice_high_priority_us = debugfs_create_u32("timeslice_high_priority_us", S_IRUGO|S_IWUSR, platform->debugfs, &g->timeslice_high_priority_us); g->debugfs_runlist_interleave = debugfs_create_bool("runlist_interleave", S_IRUGO|S_IWUSR, platform->debugfs, &g->runlist_interleave); gr_gk20a_debugfs_init(g); gk20a_pmu_debugfs_init(g->dev); gk20a_railgating_debugfs_init(g->dev); gk20a_cde_debugfs_init(g->dev); gk20a_ce_debugfs_init(g->dev); gk20a_alloc_debugfs_init(g->dev); gk20a_mm_debugfs_init(g->dev); gk20a_fifo_debugfs_init(g->dev); gk20a_sched_debugfs_init(g->dev); #endif }