29 files changed, 2064 insertions, 388 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/debug.c b/drivers/gpu/nvgpu/common/linux/debug.c
new file mode 100644
index 00000000..2962a467
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include "debug_cde.h"
+#include "debug_ce.h"
+#include "debug_fifo.h"
+#include "debug_gr.h"
+#include "debug_mm.h"
+#include "debug_allocator.h"
+#include "debug_kmem.h"
+#include "debug_pmu.h"
+#include "debug_sched.h"
+#include "gk20a/gk20a.h"
+#include "gk20a/platform_gk20a.h"
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <nvgpu/debug.h>
+unsigned int gk20a_debug_trace_cmdbuf;
+static inline void gk20a_debug_write_printk(void *ctx, const char *str,
+                                            size_t len)
+{
+        pr_info("%s", str);
+}
+static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str,
+                                                size_t len)
+{
+        seq_write((struct seq_file *)ctx, str, len);
+}
+void gk20a_debug_output(struct gk20a_debug_output *o,
+                                        const char *fmt, ...)
+{
+        va_list args;
+        int len;
+        va_start(args, fmt);
+        len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
+        va_end(args);
+        o->fn(o->ctx, o->buf, len);
+}
+static int gk20a_gr_dump_regs(struct gk20a *g,
+                struct gk20a_debug_output *o)
+{
+        if (g->ops.gr.dump_gr_regs)
+                gr_gk20a_elpg_protected_call(g, g->ops.gr.dump_gr_regs(g, o));
+        return 0;
+}
+int gk20a_gr_debug_dump(struct gk20a *g)
+{
+        struct gk20a_debug_output o = {
+                .fn = gk20a_debug_write_printk
+        };
+        gk20a_gr_dump_regs(g, &o);
+        return 0;
+}
+static int gk20a_gr_debug_show(struct seq_file *s, void *unused)
+{
+        struct device *dev = s->private;
+        struct gk20a *g = gk20a_get_platform(dev)->g;
+        struct gk20a_debug_output o = {
+                .fn = gk20a_debug_write_to_seqfile,
+                .ctx = s,
+        };
+        int err;
+        err = gk20a_busy(g);
+        if (err) {
+                nvgpu_err(g, "failed to power on gpu: %d", err);
+                return -EINVAL;
+        }
+        gk20a_gr_dump_regs(g, &o);
+        gk20a_idle(g);
+        return 0;
+}
+void gk20a_debug_dump(struct gk20a *g)
+{
+        struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+        struct gk20a_debug_output o = {
+                .fn = gk20a_debug_write_printk
+        };
+        if (platform->dump_platform_dependencies)
+                platform->dump_platform_dependencies(g->dev);
+        /* HAL only initialized after 1st power-on */
+        if (g->ops.debug.show_dump)
+                g->ops.debug.show_dump(g, &o);
+}
+static int gk20a_debug_show(struct seq_file *s, void *unused)
+{
+        struct device *dev = s->private;
+        struct gk20a_debug_output o = {
+                .fn = gk20a_debug_write_to_seqfile,
+                .ctx = s,
+        };
+        struct gk20a *g;
+        int err;
+        g = gk20a_get_platform(dev)->g;
+        err = gk20a_busy(g);
+        if (err) {
+                nvgpu_err(g, "failed to power on gpu: %d", err);
+                return -EFAULT;
+        }
+        /* HAL only initialized after 1st power-on */
+        if (g->ops.debug.show_dump)
+                g->ops.debug.show_dump(g, &o);
+        gk20a_idle(g);
+        return 0;
+}
+static int gk20a_gr_debug_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, gk20a_gr_debug_show, inode->i_private);
+}
+static int gk20a_debug_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, gk20a_debug_show, inode->i_private);
+}
+static const struct file_operations gk20a_gr_debug_fops = {
+        .open           = gk20a_gr_debug_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
+static const struct file_operations gk20a_debug_fops = {
+        .open           = gk20a_debug_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
+void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o)
+{
+        g->ops.fifo.dump_pbdma_status(g, o);
+        g->ops.fifo.dump_eng_status(g, o);
+        gk20a_debug_dump_all_channel_status_ramfc(g, o);
+}
+void gk20a_init_debug_ops(struct gpu_ops *gops)
+{
+        gops->debug.show_dump = gk20a_debug_show_dump;
+}
+static int railgate_residency_show(struct seq_file *s, void *data)
+{
+        struct gk20a *g = s->private;
+        struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+        unsigned long time_since_last_state_transition_ms;
+        unsigned long total_rail_gate_time_ms;
+        unsigned long total_rail_ungate_time_ms;
+        if (platform->is_railgated(g->dev)) {
+                time_since_last_state_transition_ms =
+                                jiffies_to_msecs(jiffies -
+                                g->pstats.last_rail_gate_complete);
+                total_rail_ungate_time_ms = g->pstats.total_rail_ungate_time_ms;
+                total_rail_gate_time_ms =
+                                        g->pstats.total_rail_gate_time_ms +
+                                        time_since_last_state_transition_ms;
+        } else {
+                time_since_last_state_transition_ms =
+                                jiffies_to_msecs(jiffies -
+                                g->pstats.last_rail_ungate_complete);
+                total_rail_gate_time_ms = g->pstats.total_rail_gate_time_ms;
+                total_rail_ungate_time_ms =
+                                        g->pstats.total_rail_ungate_time_ms +
+                                        time_since_last_state_transition_ms;
+        }
+        seq_printf(s, "Time with Rails Gated: %lu ms\n"
+                        "Time with Rails UnGated: %lu ms\n"
+                        "Total railgating cycles: %lu\n",
+                        total_rail_gate_time_ms,
+                        total_rail_ungate_time_ms,
+                        g->pstats.railgating_cycle_count - 1);
+        return 0;
+}
+static int railgate_residency_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, railgate_residency_show, inode->i_private);
+}
+static const struct file_operations railgate_residency_fops = {
+        .open           = railgate_residency_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
+static int gk20a_railgating_debugfs_init(struct gk20a *g)
+{
+        struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+        struct dentry *d;
+        if (!g->can_railgate)
+                return 0;
+        d = debugfs_create_file(
+                "railgate_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
+                                                &railgate_residency_fops);
+        if (!d)
+                return -ENOMEM;
+        return 0;
+}
+void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink)
+{
+        struct device *dev = g->dev;
+        struct gk20a_platform *platform = dev_get_drvdata(dev);
+        platform->debugfs = debugfs_create_dir(dev_name(dev), NULL);
+        if (!platform->debugfs)
+                return;
+        if (debugfs_symlink)
+                platform->debugfs_alias =
+                        debugfs_create_symlink(debugfs_symlink,
+                                        NULL, dev_name(dev));
+        debugfs_create_file("status", S_IRUGO, platform->debugfs,
+                dev, &gk20a_debug_fops);
+        debugfs_create_file("gr_status", S_IRUGO, platform->debugfs,
+                dev, &gk20a_gr_debug_fops);
+        debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR,
+                platform->debugfs, &gk20a_debug_trace_cmdbuf);
+        debugfs_create_u32("ch_wdt_timeout_ms", S_IRUGO|S_IWUSR,
+                platform->debugfs, &g->ch_wdt_timeout_ms);
+        debugfs_create_u32("disable_syncpoints", S_IRUGO|S_IWUSR,
+                platform->debugfs, &g->disable_syncpoints);
+        /* Legacy debugging API. */
+        debugfs_create_u32("dbg_mask", S_IRUGO|S_IWUSR,
+                platform->debugfs, &nvgpu_dbg_mask);
+        /* New debug logging API. */
+        debugfs_create_u32("log_mask", S_IRUGO|S_IWUSR,
+                platform->debugfs, &g->log_mask);
+        debugfs_create_u32("log_trace", S_IRUGO|S_IWUSR,
+                platform->debugfs, &g->log_trace);
+        nvgpu_spinlock_init(&g->debugfs_lock);
+        g->mm.ltc_enabled = true;
+        g->mm.ltc_enabled_debug = true;
+        g->debugfs_ltc_enabled =
+                        debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR,
+                                 platform->debugfs,
+                                 &g->mm.ltc_enabled_debug);
+        g->debugfs_gr_idle_timeout_default =
+                        debugfs_create_u32("gr_idle_timeout_default_us",
+                                        S_IRUGO|S_IWUSR, platform->debugfs,
+                                         &g->gr_idle_timeout_default);
+        g->debugfs_timeouts_enabled =
+                        debugfs_create_bool("timeouts_enabled",
+                                        S_IRUGO|S_IWUSR,
+                                        platform->debugfs,
+                                        &g->timeouts_enabled);
+        g->debugfs_bypass_smmu =
+                        debugfs_create_bool("bypass_smmu",
+                                        S_IRUGO|S_IWUSR,
+                                        platform->debugfs,
+                                        &g->mm.bypass_smmu);
+        g->debugfs_disable_bigpage =
+                        debugfs_create_bool("disable_bigpage",
+                                        S_IRUGO|S_IWUSR,
+                                        platform->debugfs,
+                                        &g->mm.disable_bigpage);
+        g->debugfs_timeslice_low_priority_us =
+                        debugfs_create_u32("timeslice_low_priority_us",
+                                        S_IRUGO|S_IWUSR,
+                                        platform->debugfs,
+                                        &g->timeslice_low_priority_us);
+        g->debugfs_timeslice_medium_priority_us =
+                        debugfs_create_u32("timeslice_medium_priority_us",
+                                        S_IRUGO|S_IWUSR,
+                                        platform->debugfs,
+                                        &g->timeslice_medium_priority_us);
+        g->debugfs_timeslice_high_priority_us =
+                        debugfs_create_u32("timeslice_high_priority_us",
+                                        S_IRUGO|S_IWUSR,
+                                        platform->debugfs,
+                                        &g->timeslice_high_priority_us);
+        g->debugfs_runlist_interleave =
+                        debugfs_create_bool("runlist_interleave",
+                                        S_IRUGO|S_IWUSR,
+                                        platform->debugfs,
+                                        &g->runlist_interleave);
+#ifdef CONFIG_ARCH_TEGRA_18x_SOC
+        g->gr.t18x.ctx_vars.debugfs_force_preemption_gfxp =
+                debugfs_create_bool("force_preemption_gfxp", S_IRUGO|S_IWUSR,
+                platform->debugfs,
+                &g->gr.t18x.ctx_vars.force_preemption_gfxp);
+        g->gr.t18x.ctx_vars.debugfs_force_preemption_cilp =
+                debugfs_create_bool("force_preemption_cilp", S_IRUGO|S_IWUSR,
+                platform->debugfs,
+                &g->gr.t18x.ctx_vars.force_preemption_cilp);
+        g->gr.t18x.ctx_vars.debugfs_dump_ctxsw_stats =
+                debugfs_create_bool("dump_ctxsw_stats_on_channel_close",
+                        S_IRUGO|S_IWUSR, platform->debugfs,
+                        &g->gr.t18x.
+                                ctx_vars.dump_ctxsw_stats_on_channel_close);
+#endif
+        gr_gk20a_debugfs_init(g);
+        gk20a_pmu_debugfs_init(g);
+        gk20a_railgating_debugfs_init(g);
+        gk20a_cde_debugfs_init(g);
+        gk20a_ce_debugfs_init(g);
+        nvgpu_alloc_debugfs_init(g);
+        gk20a_mm_debugfs_init(g);
+        gk20a_fifo_debugfs_init(g);
+        gk20a_sched_debugfs_init(g);
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+        nvgpu_kmem_debugfs_init(g);
+#endif
+}
+void gk20a_debug_deinit(struct gk20a *g)
+{
+        struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+        if (!platform->debugfs)
+                return;
+        gk20a_fifo_debugfs_deinit(g);
+        debugfs_remove_recursive(platform->debugfs);
+        debugfs_remove_recursive(platform->debugfs_alias);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_allocator.c b/drivers/gpu/nvgpu/common/linux/debug_allocator.c
new file mode 100644
index 00000000..3d4a2bb2
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_allocator.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include "debug_allocator.h"
+#include "gk20a/platform_gk20a.h"
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <nvgpu/allocator.h>
+u32 nvgpu_alloc_tracing_on;
+void nvgpu_alloc_print_stats(struct nvgpu_allocator *__a,
+                             struct seq_file *s, int lock)
+{
+        __a->ops->print_stats(__a, s, lock);
+}
+static int __alloc_show(struct seq_file *s, void *unused)
+{
+        struct nvgpu_allocator *a = s->private;
+        nvgpu_alloc_print_stats(a, s, 1);
+        return 0;
+}
+static int __alloc_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, __alloc_show, inode->i_private);
+}
+static const struct file_operations __alloc_fops = {
+        .open = __alloc_open,
+        .read = seq_read,
+        .llseek = seq_lseek,
+        .release = single_release,
+};
+void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a)
+{
+        if (!g->debugfs_allocators)
+                return;
+        a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO,
+                                               g->debugfs_allocators,
+                                               a, &__alloc_fops);
+}
+void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a)
+{
+        if (!IS_ERR_OR_NULL(a->debugfs_entry))
+                debugfs_remove(a->debugfs_entry);
+}
+void nvgpu_alloc_debugfs_init(struct gk20a *g)
+{
+        struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+        g->debugfs_allocators = debugfs_create_dir("allocators", platform->debugfs);
+        if (IS_ERR_OR_NULL(g->debugfs_allocators)) {
+                g->debugfs_allocators = NULL;
+                return;
+        }
+        debugfs_create_u32("tracing", 0664, g->debugfs_allocators,
+                           &nvgpu_alloc_tracing_on);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_allocator.h b/drivers/gpu/nvgpu/common/linux/debug_allocator.h
new file mode 100644
index 00000000..1b21cfc5
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_allocator.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __NVGPU_DEBUG_ALLOCATOR_H__
+#define __NVGPU_DEBUG_ALLOCATOR_H__
+struct gk20a;
+void nvgpu_alloc_debugfs_init(struct gk20a *g);
+#endif /* __NVGPU_DEBUG_ALLOCATOR_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_cde.c b/drivers/gpu/nvgpu/common/linux/debug_cde.c
new file mode 100644
index 00000000..eb7c33e2
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_cde.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include "debug_cde.h"
+#include "gk20a/platform_gk20a.h"
+#include <linux/debugfs.h>
+static ssize_t gk20a_cde_reload_write(struct file *file,
+        const char __user *userbuf, size_t count, loff_t *ppos)
+{
+        struct gk20a *g = file->private_data;
+        gk20a_cde_reload(g);
+        return count;
+}
+static const struct file_operations gk20a_cde_reload_fops = {
+        .open           = simple_open,
+        .write          = gk20a_cde_reload_write,
+};
+void gk20a_cde_debugfs_init(struct gk20a *g)
+{
+        struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+        if (!platform->has_cde)
+                return;
+        debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO,
+                           platform->debugfs, &g->cde_app.shader_parameter);
+        debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO,
+                           platform->debugfs, &g->cde_app.ctx_count);
+        debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO,
+                           platform->debugfs, &g->cde_app.ctx_usecount);
+        debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO,
+                           platform->debugfs, &g->cde_app.ctx_count_top);
+        debugfs_create_file("reload_cde_firmware", S_IWUSR, platform->debugfs,
+                            g, &gk20a_cde_reload_fops);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_cde.h b/drivers/gpu/nvgpu/common/linux/debug_cde.h
new file mode 100644
index 00000000..4895edd6
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_cde.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __NVGPU_DEBUG_CDE_H__
+#define __NVGPU_DEBUG_CDE_H__
+struct gk20a;
+void gk20a_cde_debugfs_init(struct gk20a *g);
+#endif /* __NVGPU_DEBUG_CDE_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_ce.c b/drivers/gpu/nvgpu/common/linux/debug_ce.c
new file mode 100644
index 00000000..9c50870e
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_ce.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include "debug_ce.h"
+#include "gk20a/platform_gk20a.h"
+#include <linux/debugfs.h>
+void gk20a_ce_debugfs_init(struct gk20a *g)
+{
+        struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+        debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO,
+                           platform->debugfs, &g->ce_app.ctx_count);
+        debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO,
+                           platform->debugfs, &g->ce_app.app_state);
+        debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO,
+                           platform->debugfs, &g->ce_app.next_ctx_id);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_ce.h b/drivers/gpu/nvgpu/common/linux/debug_ce.h
new file mode 100644
index 00000000..2a8750c4
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_ce.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __NVGPU_DEBUG_CE_H__
+#define __NVGPU_DEBUG_CE_H__
+struct gk20a;
+void gk20a_ce_debugfs_init(struct gk20a *g);
+#endif /* __NVGPU_DEBUG_CE_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.c b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
new file mode 100644
index 00000000..6a28b1a5
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include "debug_fifo.h"
+#include "gk20a/platform_gk20a.h"
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <nvgpu/sort.h>
+void __gk20a_fifo_profile_free(struct kref *ref);
+static void *gk20a_fifo_sched_debugfs_seq_start(
+                struct seq_file *s, loff_t *pos)
+{
+        struct gk20a *g = s->private;
+        struct fifo_gk20a *f = &g->fifo;
+        if (*pos >= f->num_channels)
+                return NULL;
+        return &f->channel[*pos];
+}
+static void *gk20a_fifo_sched_debugfs_seq_next(
+                struct seq_file *s, void *v, loff_t *pos)
+{
+        struct gk20a *g = s->private;
+        struct fifo_gk20a *f = &g->fifo;
+        ++(*pos);
+        if (*pos >= f->num_channels)
+                return NULL;
+        return &f->channel[*pos];
+}
+static void gk20a_fifo_sched_debugfs_seq_stop(
+                struct seq_file *s, void *v)
+{
+}
+static int gk20a_fifo_sched_debugfs_seq_show(
+                struct seq_file *s, void *v)
+{
+        struct gk20a *g = s->private;
+        struct fifo_gk20a *f = &g->fifo;
+        struct channel_gk20a *ch = v;
+        struct tsg_gk20a *tsg = NULL;
+        struct fifo_engine_info_gk20a *engine_info;
+        struct fifo_runlist_info_gk20a *runlist;
+        u32 runlist_id;
+        int ret = SEQ_SKIP;
+        u32 engine_id;
+        engine_id = gk20a_fifo_get_gr_engine_id(g);
+        engine_info = (f->engine_info + engine_id);
+        runlist_id = engine_info->runlist_id;
+        runlist = &f->runlist_info[runlist_id];
+        if (ch == f->channel) {
+                seq_puts(s, "chid     tsgid    pid      timeslice  timeout  interleave graphics_preempt compute_preempt\n");
+                seq_puts(s, "                            (usecs)   (msecs)\n");
+                ret = 0;
+        }
+        if (!test_bit(ch->hw_chid, runlist->active_channels))
+                return ret;
+        if (gk20a_channel_get(ch)) {
+                if (gk20a_is_channel_marked_as_tsg(ch))
+                        tsg = &f->tsg[ch->tsgid];
+                seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n",
+                                ch->hw_chid,
+                                ch->tsgid,
+                                ch->tgid,
+                                tsg ? tsg->timeslice_us : ch->timeslice_us,
+                                ch->timeout_ms_max,
+                                tsg ? tsg->interleave_level : ch->interleave_level,
+                                ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->graphics_preempt_mode : U32_MAX,
+                                ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->compute_preempt_mode : U32_MAX);
+                gk20a_channel_put(ch);
+        }
+        return 0;
+}
+static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = {
+        .start = gk20a_fifo_sched_debugfs_seq_start,
+        .next = gk20a_fifo_sched_debugfs_seq_next,
+        .stop = gk20a_fifo_sched_debugfs_seq_stop,
+        .show = gk20a_fifo_sched_debugfs_seq_show
+};
+static int gk20a_fifo_sched_debugfs_open(struct inode *inode,
+        struct file *file)
+{
+        int err;
+        if (!capable(CAP_SYS_ADMIN))
+                return -EPERM;
+        err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops);
+        if (err)
+                return err;
+        gk20a_dbg(gpu_dbg_info, "i_private=%p", inode->i_private);
+        ((struct seq_file *)file->private_data)->private = inode->i_private;
+        return 0;
+};
+/*
+ * The file operations structure contains our open function along with
+ * set of the canned seq_ ops.
+ */
+static const struct file_operations gk20a_fifo_sched_debugfs_fops = {
+        .owner = THIS_MODULE,
+        .open = gk20a_fifo_sched_debugfs_open,
+        .read = seq_read,
+        .llseek = seq_lseek,
+        .release = seq_release
+};
+static int gk20a_fifo_profile_enable(void *data, u64 val)
+{
+        struct gk20a *g = (struct gk20a *) data;
+        struct fifo_gk20a *f = &g->fifo;
+        nvgpu_mutex_acquire(&f->profile.lock);
+        if (val == 0) {
+                if (f->profile.enabled) {
+                        f->profile.enabled = false;
+                        kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
+                }
+        } else {
+                if (!f->profile.enabled) {
+                        /* not kref init as it can have a running condition if
+                         * we enable/disable/enable while kickoff is happening
+                         */
+                        if (!kref_get_unless_zero(&f->profile.ref)) {
+                                f->profile.data = vzalloc(
+                                                        FIFO_PROFILING_ENTRIES *
+                                        sizeof(struct fifo_profile_gk20a));
+                                f->profile.sorted  = vzalloc(
+                                                        FIFO_PROFILING_ENTRIES *
+                                                        sizeof(u64));
+                                if (!(f->profile.data && f->profile.sorted)) {
+                                        nvgpu_vfree(g, f->profile.data);
+                                        nvgpu_vfree(g, f->profile.sorted);
+                                        nvgpu_mutex_release(&f->profile.lock);
+                                        return -ENOMEM;
+                                }
+                                kref_init(&f->profile.ref);
+                        }
+                        atomic_set(&f->profile.get, 0);
+                        f->profile.enabled = true;
+                }
+        }
+        nvgpu_mutex_release(&f->profile.lock);
+        return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(
+        gk20a_fifo_profile_enable_debugfs_fops,
+        NULL,
+        gk20a_fifo_profile_enable,
+        "%llu\n"
+);
+static int __profile_cmp(const void *a, const void *b)
+{
+        return *((unsigned long long *) a) - *((unsigned long long *) b);
+}
+/*
+ * This uses about 800b in the stack, but the function using it is not part
+ * of a callstack where much memory is being used, so it is fine
+ */
+#define PERCENTILE_WIDTH        5
+#define PERCENTILE_RANGES       (100/PERCENTILE_WIDTH)
+static unsigned int __gk20a_fifo_create_stats(struct gk20a *g,
+                u64 *percentiles, u32 index_end, u32 index_start)
+{
+        unsigned int nelem = 0;
+        unsigned int index;
+        struct fifo_profile_gk20a *profile;
+        for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) {
+                profile = &g->fifo.profile.data[index];
+                if (profile->timestamp[index_end] >
+                                profile->timestamp[index_start]) {
+                        /* This is a valid element */
+                        g->fifo.profile.sorted[nelem] =
+                                                profile->timestamp[index_end] -
+                                                profile->timestamp[index_start];
+                        nelem++;
+                }
+        }
+        /* sort it */
+        sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long),
+                __profile_cmp, NULL);
+        /* build ranges */
+        for (index = 0; index < PERCENTILE_RANGES; index++)
+                percentiles[index] =
+                        g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) *
+                                                nelem)/100 - 1];
+        return nelem;
+}
+static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
+{
+        struct gk20a *g = s->private;
+        unsigned int get, nelem, index;
+        /*
+         * 800B in the stack, but function is declared statically and only
+         * called from debugfs handler
+         */
+        u64 percentiles_ioctl[PERCENTILE_RANGES];
+        u64 percentiles_kickoff[PERCENTILE_RANGES];
+        u64 percentiles_jobtracking[PERCENTILE_RANGES];
+        u64 percentiles_append[PERCENTILE_RANGES];
+        u64 percentiles_userd[PERCENTILE_RANGES];
+        if (!kref_get_unless_zero(&g->fifo.profile.ref)) {
+                seq_printf(s, "Profiling disabled\n");
+                return 0;
+        }
+        get = atomic_read(&g->fifo.profile.get);
+        __gk20a_fifo_create_stats(g, percentiles_ioctl,
+                PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY);
+        __gk20a_fifo_create_stats(g, percentiles_kickoff,
+                PROFILE_END, PROFILE_ENTRY);
+        __gk20a_fifo_create_stats(g, percentiles_jobtracking,
+                PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY);
+        __gk20a_fifo_create_stats(g, percentiles_append,
+                PROFILE_APPEND, PROFILE_JOB_TRACKING);
+        nelem = __gk20a_fifo_create_stats(g, percentiles_userd,
+                PROFILE_END, PROFILE_APPEND);
+        seq_printf(s, "Number of kickoffs: %d\n", nelem);
+        seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n");
+        for (index = 0; index < PERCENTILE_RANGES; index++)
+                seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n",
+                        PERCENTILE_WIDTH * (index+1),
+                        percentiles_ioctl[index],
+                        percentiles_kickoff[index],
+                        percentiles_append[index],
+                        percentiles_jobtracking[index],
+                        percentiles_userd[index]);
+        kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
+        return 0;
+}
+static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, gk20a_fifo_profile_stats, inode->i_private);
+}
+static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = {
+        .open           = gk20a_fifo_profile_stats_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
+void gk20a_fifo_debugfs_init(struct gk20a *g)
+{
+        struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+        struct dentry *gpu_root = platform->debugfs;
+        struct dentry *fifo_root;
+        struct dentry *profile_root;
+        fifo_root = debugfs_create_dir("fifo", gpu_root);
+        if (IS_ERR_OR_NULL(fifo_root))
+                return;
+        gk20a_dbg(gpu_dbg_info, "g=%p", g);
+        debugfs_create_file("sched", 0600, fifo_root, g,
+                &gk20a_fifo_sched_debugfs_fops);
+        profile_root = debugfs_create_dir("profile", fifo_root);
+        if (IS_ERR_OR_NULL(profile_root))
+                return;
+        nvgpu_mutex_init(&g->fifo.profile.lock);
+        g->fifo.profile.enabled = false;
+        atomic_set(&g->fifo.profile.get, 0);
+        atomic_set(&g->fifo.profile.ref.refcount, 0);
+        debugfs_create_file("enable", 0600, profile_root, g,
+                &gk20a_fifo_profile_enable_debugfs_fops);
+        debugfs_create_file("stats", 0600, profile_root, g,
+                &gk20a_fifo_profile_stats_debugfs_fops);
+}
+void __gk20a_fifo_profile_free(struct kref *ref)
+{
+        struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a,
+                                                profile.ref);
+        nvgpu_vfree(f->g, f->profile.data);
+        nvgpu_vfree(f->g, f->profile.sorted);
+}
+/* Get the next element in the ring buffer of profile entries
+ * and grab a reference to the structure
+ */
+struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g)
+{
+        struct fifo_gk20a *f = &g->fifo;
+        struct fifo_profile_gk20a *profile;
+        unsigned int index;
+        /* If kref is zero, profiling is not enabled */
+        if (!kref_get_unless_zero(&f->profile.ref))
+                return NULL;
+        index = atomic_inc_return(&f->profile.get);
+        profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES];
+        return profile;
+}
+/* Free the reference to the structure. This allows deferred cleanups */
+void gk20a_fifo_profile_release(struct gk20a *g,
+                                        struct fifo_profile_gk20a *profile)
+{
+        kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
+}
+void gk20a_fifo_debugfs_deinit(struct gk20a *g)
+{
+        struct fifo_gk20a *f = &g->fifo;
+        nvgpu_mutex_acquire(&f->profile.lock);
+        if (f->profile.enabled) {
+                f->profile.enabled = false;
+                kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
+        }
+        nvgpu_mutex_release(&f->profile.lock);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.h b/drivers/gpu/nvgpu/common/linux/debug_fifo.h
new file mode 100644
index 00000000..46ac853e
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_fifo.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __NVGPU_DEBUG_FIFO_H__
+#define __NVGPU_DEBUG_FIFO_H__
+struct gk20a;
+void gk20a_fifo_debugfs_init(struct gk20a *g);
+void gk20a_fifo_debugfs_deinit(struct gk20a *g);
+#endif /* __NVGPU_DEBUG_FIFO_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_gr.c b/drivers/gpu/nvgpu/common/linux/debug_gr.c
new file mode 100644
index 00000000..56b8612e
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_gr.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include "debug_gr.h"
+#include "gk20a/platform_gk20a.h"
+#include <linux/debugfs.h>
+int gr_gk20a_debugfs_init(struct gk20a *g)
+{
+        struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+        g->debugfs_gr_default_attrib_cb_size =
+                debugfs_create_u32("gr_default_attrib_cb_size",
+                                   S_IRUGO|S_IWUSR, platform->debugfs,
+                                   &g->gr.attrib_cb_default_size);
+        return 0;
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_gr.h b/drivers/gpu/nvgpu/common/linux/debug_gr.h
new file mode 100644
index 00000000..4b46acbb
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_gr.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __NVGPU_DEBUG_GR_H__
+#define __NVGPU_DEBUG_GR_H__
+struct gk20a;
+int gr_gk20a_debugfs_init(struct gk20a *g);
+#endif /* __NVGPU_DEBUG_GR_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_kmem.c b/drivers/gpu/nvgpu/common/linux/debug_kmem.c
new file mode 100644
index 00000000..2ee542a8
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_kmem.c
@@ -0,0 +1,315 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include "debug_kmem.h"
+#include "kmem_priv.h"
+#include "gk20a/platform_gk20a.h"
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+/**
+ * to_human_readable_bytes - Determine  suffix for passed size.
+ *
+ * @bytes - Number of bytes to generate a suffix for.
+ * @hr_bytes [out] - The human readable number of bytes.
+ * @hr_suffix [out] - The suffix for the HR number of bytes.
+ *
+ * Computes a human readable decomposition of the passed number of bytes. The
+ * suffix for the bytes is passed back through the @hr_suffix pointer. The right
+ * number of bytes is then passed back in @hr_bytes. This returns the following
+ * ranges:
+ *
+ *   0 - 1023 B
+ *   1 - 1023 KB
+ *   1 - 1023 MB
+ *   1 - 1023 GB
+ *   1 - 1023 TB
+ *   1 - ...  PB
+ */
+static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
+                                      const char **hr_suffix)
+{
+        static const char *suffixes[] =
+                { "B", "KB", "MB", "GB", "TB", "PB" };
+        u64 suffix_ind = 0;
+        while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
+                bytes >>= 10;
+                suffix_ind++;
+        }
+        /*
+         * Handle case where bytes > 1023PB.
+         */
+        suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
+                suffix_ind : ARRAY_SIZE(suffixes) - 1;
+        *hr_bytes = bytes;
+        *hr_suffix = suffixes[suffix_ind];
+}
+/**
+ * print_hr_bytes - Print human readable bytes
+ *
+ * @s - A seq_file to print to. May be NULL.
+ * @msg - A message to print before the bytes.
+ * @bytes - Number of bytes.
+ *
+ * Print @msg followed by the human readable decomposition of the passed number
+ * of bytes.
+ *
+ * If @s is NULL then this prints will be made to the kernel log.
+ */
+static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
+{
+        u64 hr_bytes;
+        const char *hr_suffix;
+        __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
+        __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
+}
+/**
+ * print_histogram - Build a histogram of the memory usage.
+ *
+ * @tracker The tracking to pull data from.
+ * @s       A seq_file to dump info into.
+ */
+static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
+                            struct seq_file *s)
+{
+        int i;
+        u64 pot_min, pot_max;
+        u64 nr_buckets;
+        unsigned int *buckets;
+        unsigned int total_allocs;
+        struct nvgpu_rbtree_node *node;
+        static const char histogram_line[] =
+                "++++++++++++++++++++++++++++++++++++++++";
+        /*
+         * pot_min is essentially a round down to the nearest power of 2. This
+         * is the start of the histogram. pot_max is just a round up to the
+         * nearest power of two. Each histogram bucket is one power of two so
+         * the histogram buckets are exponential.
+         */
+        pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
+        pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
+        nr_buckets = __ffs(pot_max) - __ffs(pot_min);
+        buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
+        if (!buckets) {
+                __pstat(s, "OOM: could not allocate bucket storage!?\n");
+                return;
+        }
+        /*
+         * Iterate across all of the allocs and determine what bucket they
+         * should go in. Round the size down to the nearest power of two to
+         * find the right bucket.
+         */
+        nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
+        while (node) {
+                int b;
+                u64 bucket_min;
+                struct nvgpu_mem_alloc *alloc =
+                        nvgpu_mem_alloc_from_rbtree_node(node);
+                bucket_min = (u64)rounddown_pow_of_two(alloc->size);
+                if (bucket_min < tracker->min_alloc)
+                        bucket_min = tracker->min_alloc;
+                b = __ffs(bucket_min) - __ffs(pot_min);
+                /*
+                 * Handle the one case were there's an alloc exactly as big as
+                 * the maximum bucket size of the largest bucket. Most of the
+                 * buckets have an inclusive minimum and exclusive maximum. But
+                 * the largest bucket needs to have an _inclusive_ maximum as
+                 * well.
+                 */
+                if (b == (int)nr_buckets)
+                        b--;
+                buckets[b]++;
+                nvgpu_rbtree_enum_next(&node, node);
+        }
+        total_allocs = 0;
+        for (i = 0; i < (int)nr_buckets; i++)
+                total_allocs += buckets[i];
+        __pstat(s, "Alloc histogram:\n");
+        /*
+         * Actually compute the histogram lines.
+         */
+        for (i = 0; i < (int)nr_buckets; i++) {
+                char this_line[sizeof(histogram_line) + 1];
+                u64 line_length;
+                u64 hr_bytes;
+                const char *hr_suffix;
+                memset(this_line, 0, sizeof(this_line));
+                /*
+                 * Compute the normalized line length. Cant use floating point
+                 * so we will just multiply everything by 1000 and use fixed
+                 * point.
+                 */
+                line_length = (1000 * buckets[i]) / total_allocs;
+                line_length *= sizeof(histogram_line);
+                line_length /= 1000;
+                memset(this_line, '+', line_length);
+                __to_human_readable_bytes(1 << (__ffs(pot_min) + i),
+                                          &hr_bytes, &hr_suffix);
+                __pstat(s, "  [%-4lld %-4lld] %-2s %5u | %s\n",
+                        hr_bytes, hr_bytes << 1,
+                        hr_suffix, buckets[i], this_line);
+        }
+}
+/**
+ * nvgpu_kmem_print_stats - Print kmem tracking stats.
+ *
+ * @tracker The tracking to pull data from.
+ * @s       A seq_file to dump info into.
+ *
+ * Print stats from a tracker. If @s is non-null then seq_printf() will be
+ * used with @s. Otherwise the stats are pr_info()ed.
+ */
+void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
+                            struct seq_file *s)
+{
+        nvgpu_lock_tracker(tracker);
+        __pstat(s, "Mem tracker: %s\n\n", tracker->name);
+        __pstat(s, "Basic Stats:\n");
+        __pstat(s,        "  Number of allocs        %lld\n",
+                tracker->nr_allocs);
+        __pstat(s,        "  Number of frees         %lld\n",
+                tracker->nr_frees);
+        print_hr_bytes(s, "  Smallest alloc          ", tracker->min_alloc);
+        print_hr_bytes(s, "  Largest alloc           ", tracker->max_alloc);
+        print_hr_bytes(s, "  Bytes allocated         ", tracker->bytes_alloced);
+        print_hr_bytes(s, "  Bytes freed             ", tracker->bytes_freed);
+        print_hr_bytes(s, "  Bytes allocated (real)  ",
+                       tracker->bytes_alloced_real);
+        print_hr_bytes(s, "  Bytes freed (real)      ",
+                       tracker->bytes_freed_real);
+        __pstat(s, "\n");
+        print_histogram(tracker, s);
+        nvgpu_unlock_tracker(tracker);
+}
+static int __kmem_tracking_show(struct seq_file *s, void *unused)
+{
+        struct nvgpu_mem_alloc_tracker *tracker = s->private;
+        nvgpu_kmem_print_stats(tracker, s);
+        return 0;
+}
+static int __kmem_tracking_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, __kmem_tracking_show, inode->i_private);
+}
+static const struct file_operations __kmem_tracking_fops = {
+        .open = __kmem_tracking_open,
+        .read = seq_read,
+        .llseek = seq_lseek,
+        .release = single_release,
+};
+static int __kmem_traces_dump_tracker(struct gk20a *g,
+                                      struct nvgpu_mem_alloc_tracker *tracker,
+                                      struct seq_file *s)
+{
+        struct nvgpu_rbtree_node *node;
+        nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
+        while (node) {
+                struct nvgpu_mem_alloc *alloc =
+                        nvgpu_mem_alloc_from_rbtree_node(node);
+                kmem_print_mem_alloc(g, alloc, s);
+                nvgpu_rbtree_enum_next(&node, node);
+        }
+        return 0;
+}
+static int __kmem_traces_show(struct seq_file *s, void *unused)
+{
+        struct gk20a *g = s->private;
+        nvgpu_lock_tracker(g->vmallocs);
+        seq_puts(s, "Oustanding vmallocs:\n");
+        __kmem_traces_dump_tracker(g, g->vmallocs, s);
+        seq_puts(s, "\n");
+        nvgpu_unlock_tracker(g->vmallocs);
+        nvgpu_lock_tracker(g->kmallocs);
+        seq_puts(s, "Oustanding kmallocs:\n");
+        __kmem_traces_dump_tracker(g, g->kmallocs, s);
+        nvgpu_unlock_tracker(g->kmallocs);
+        return 0;
+}
+static int __kmem_traces_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, __kmem_traces_show, inode->i_private);
+}
+static const struct file_operations __kmem_traces_fops = {
+        .open = __kmem_traces_open,
+        .read = seq_read,
+        .llseek = seq_lseek,
+        .release = single_release,
+};
+void nvgpu_kmem_debugfs_init(struct gk20a *g)
+{
+        struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+        struct dentry *node;
+        g->debugfs_kmem = debugfs_create_dir("kmem_tracking", platform->debugfs);
+        if (IS_ERR_OR_NULL(g->debugfs_kmem))
+                return;
+        node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
+                                   g->debugfs_kmem,
+                                   g->vmallocs, &__kmem_tracking_fops);
+        node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
+                                   g->debugfs_kmem,
+                                   g->kmallocs, &__kmem_tracking_fops);
+        node = debugfs_create_file("traces", S_IRUGO,
+                                   g->debugfs_kmem,
+                                   g, &__kmem_traces_fops);
+}
+#endif
diff --git a/drivers/gpu/nvgpu/common/linux/debug_kmem.h b/drivers/gpu/nvgpu/common/linux/debug_kmem.h
new file mode 100644
index 00000000..44322b53
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_kmem.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __NVGPU_DEBUG_KMEM_H__
+#define __NVGPU_DEBUG_KMEM_H__
+struct gk20a;
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+void nvgpu_kmem_debugfs_init(struct gk20a *g);
+#endif
+#endif /* __NVGPU_DEBUG_KMEM_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_mm.c b/drivers/gpu/nvgpu/common/linux/debug_mm.c
new file mode 100644
index 00000000..1e260f89
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_mm.c
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include "debug_mm.h"
+#include "gk20a/platform_gk20a.h"
+#include <linux/debugfs.h>
+void gk20a_mm_debugfs_init(struct gk20a *g)
+{
+        struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+        debugfs_create_bool("force_pramin", 0664, platform->debugfs,
+                           &g->mm.force_pramin);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_mm.h b/drivers/gpu/nvgpu/common/linux/debug_mm.h
new file mode 100644
index 00000000..bf7bc985
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_mm.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __NVGPU_DEBUG_MM_H__
+#define __NVGPU_DEBUG_MM_H__
+struct gk20a;
+void gk20a_mm_debugfs_init(struct gk20a *g);
+#endif /* __NVGPU_DEBUG_MM_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_pmu.c b/drivers/gpu/nvgpu/common/linux/debug_pmu.c
new file mode 100644
index 00000000..f19f5139
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_pmu.c
@@ -0,0 +1,479 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include "debug_pmu.h"
+#include "gk20a/platform_gk20a.h"
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+static int lpwr_debug_show(struct seq_file *s, void *data)
+{
+        struct gk20a *g = s->private;
+        if (g->ops.pmu.pmu_pg_engines_feature_list &&
+                g->ops.pmu.pmu_pg_engines_feature_list(g,
+                PMU_PG_ELPG_ENGINE_ID_GRAPHICS) !=
+                PMU_PG_FEATURE_GR_POWER_GATING_ENABLED) {
+                seq_printf(s, "PSTATE: %u\n"
+                        "RPPG Enabled: %u\n"
+                        "RPPG ref count: %u\n"
+                        "RPPG state: %u\n"
+                        "MSCG Enabled: %u\n"
+                        "MSCG pstate state: %u\n"
+                        "MSCG transition state: %u\n",
+                        g->ops.clk_arb.get_current_pstate(g),
+                        g->elpg_enabled, g->pmu.elpg_refcnt,
+                        g->pmu.elpg_stat, g->mscg_enabled,
+                        g->pmu.mscg_stat, g->pmu.mscg_transition_state);
+        } else
+                seq_printf(s, "ELPG Enabled: %u\n"
+                        "ELPG ref count: %u\n"
+                        "ELPG state: %u\n",
+                        g->elpg_enabled, g->pmu.elpg_refcnt,
+                        g->pmu.elpg_stat);
+        return 0;
+}
+static int lpwr_debug_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, lpwr_debug_show, inode->i_private);
+}
+static const struct file_operations lpwr_debug_fops = {
+        .open           = lpwr_debug_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
+static int mscg_stat_show(struct seq_file *s, void *data)
+{
+        struct gk20a *g = s->private;
+        u64 total_ingating, total_ungating, residency, divisor, dividend;
+        struct pmu_pg_stats_data pg_stat_data = { 0 };
+        int err;
+        /* Don't unnecessarily power on the device */
+        if (g->power_on) {
+                err = gk20a_busy(g);
+                if (err)
+                        return err;
+                gk20a_pmu_get_pg_stats(g,
+                        PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
+                gk20a_idle(g);
+        }
+        total_ingating = g->pg_ingating_time_us +
+                        (u64)pg_stat_data.ingating_time;
+        total_ungating = g->pg_ungating_time_us +
+                        (u64)pg_stat_data.ungating_time;
+        divisor = total_ingating + total_ungating;
+        /* We compute the residency on a scale of 1000 */
+        dividend = total_ingating * 1000;
+        if (divisor)
+                residency = div64_u64(dividend, divisor);
+        else
+                residency = 0;
+        seq_printf(s,
+                        "Time in MSCG: %llu us\n"
+                        "Time out of MSCG: %llu us\n"
+                        "MSCG residency ratio: %llu\n"
+                        "MSCG Entry Count: %u\n"
+                        "MSCG Avg Entry latency %u\n"
+                        "MSCG Avg Exit latency %u\n",
+                        total_ingating, total_ungating,
+                        residency, pg_stat_data.gating_cnt,
+                        pg_stat_data.avg_entry_latency_us,
+                        pg_stat_data.avg_exit_latency_us);
+        return 0;
+}
+static int mscg_stat_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, mscg_stat_show, inode->i_private);
+}
+static const struct file_operations mscg_stat_fops = {
+        .open           = mscg_stat_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
+static int mscg_transitions_show(struct seq_file *s, void *data)
+{
+        struct gk20a *g = s->private;
+        struct pmu_pg_stats_data pg_stat_data = { 0 };
+        u32 total_gating_cnt;
+        int err;
+        if (g->power_on) {
+                err = gk20a_busy(g);
+                if (err)
+                        return err;
+                gk20a_pmu_get_pg_stats(g,
+                        PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
+                gk20a_idle(g);
+        }
+        total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
+        seq_printf(s, "%u\n", total_gating_cnt);
+        return 0;
+}
+static int mscg_transitions_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, mscg_transitions_show, inode->i_private);
+}
+static const struct file_operations mscg_transitions_fops = {
+        .open           = mscg_transitions_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
+static int elpg_stat_show(struct seq_file *s, void *data)
+{
+        struct gk20a *g = s->private;
+        struct pmu_pg_stats_data pg_stat_data = { 0 };
+        u64 total_ingating, total_ungating, residency, divisor, dividend;
+        int err;
+        /* Don't unnecessarily power on the device */
+        if (g->power_on) {
+                err = gk20a_busy(g);
+                if (err)
+                        return err;
+                gk20a_pmu_get_pg_stats(g,
+                        PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
+                gk20a_idle(g);
+        }
+        total_ingating = g->pg_ingating_time_us +
+                        (u64)pg_stat_data.ingating_time;
+        total_ungating = g->pg_ungating_time_us +
+                        (u64)pg_stat_data.ungating_time;
+        divisor = total_ingating + total_ungating;
+        /* We compute the residency on a scale of 1000 */
+        dividend = total_ingating * 1000;
+        if (divisor)
+                residency = div64_u64(dividend, divisor);
+        else
+                residency = 0;
+        seq_printf(s,
+                        "Time in ELPG: %llu us\n"
+                        "Time out of ELPG: %llu us\n"
+                        "ELPG residency ratio: %llu\n"
+                        "ELPG Entry Count: %u\n"
+                        "ELPG Avg Entry latency %u us\n"
+                        "ELPG Avg Exit latency %u us\n",
+                        total_ingating, total_ungating,
+                        residency, pg_stat_data.gating_cnt,
+                        pg_stat_data.avg_entry_latency_us,
+                        pg_stat_data.avg_exit_latency_us);
+        return 0;
+}
+static int elpg_stat_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, elpg_stat_show, inode->i_private);
+}
+static const struct file_operations elpg_stat_fops = {
+        .open           = elpg_stat_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
+static int elpg_transitions_show(struct seq_file *s, void *data)
+{
+        struct gk20a *g = s->private;
+        struct pmu_pg_stats_data pg_stat_data = { 0 };
+        u32 total_gating_cnt;
+        int err;
+        if (g->power_on) {
+                err = gk20a_busy(g);
+                if (err)
+                        return err;
+                gk20a_pmu_get_pg_stats(g,
+                        PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
+                gk20a_idle(g);
+        }
+        total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
+        seq_printf(s, "%u\n", total_gating_cnt);
+        return 0;
+}
+static int elpg_transitions_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, elpg_transitions_show, inode->i_private);
+}
+static const struct file_operations elpg_transitions_fops = {
+        .open           = elpg_transitions_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
+static int falc_trace_show(struct seq_file *s, void *data)
+{
+        struct gk20a *g = s->private;
+        struct pmu_gk20a *pmu = &g->pmu;
+        u32 i = 0, j = 0, k, l, m;
+        char part_str[40];
+        void *tracebuffer;
+        char *trace;
+        u32 *trace1;
+        /* allocate system memory to copy pmu trace buffer */
+        tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE);
+        if (tracebuffer == NULL)
+                return -ENOMEM;
+        /* read pmu traces into system memory buffer */
+        nvgpu_mem_rd_n(g, &pmu->trace_buf,
+                       0, tracebuffer, GK20A_PMU_TRACE_BUFSIZE);
+        trace = (char *)tracebuffer;
+        trace1 = (u32 *)tracebuffer;
+        for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) {
+                for (j = 0; j < 0x40; j++)
+                        if (trace1[(i / 4) + j])
+                                break;
+                if (j == 0x40)
+                        break;
+                seq_printf(s, "Index %x: ", trace1[(i / 4)]);
+                l = 0;
+                m = 0;
+                while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) {
+                        if (k >= 40)
+                                break;
+                        strncpy(part_str, (trace+i+20+m), k);
+                        part_str[k] = 0;
+                        seq_printf(s, "%s0x%x", part_str,
+                                        trace1[(i / 4) + 1 + l]);
+                        l++;
+                        m += k + 2;
+                }
+                seq_printf(s, "%s", (trace+i+20+m));
+        }
+        nvgpu_kfree(g, tracebuffer);
+        return 0;
+}
+static int falc_trace_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, falc_trace_show, inode->i_private);
+}
+static const struct file_operations falc_trace_fops = {
+        .open           = falc_trace_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
+static int perfmon_events_enable_show(struct seq_file *s, void *data)
+{
+        struct gk20a *g = s->private;
+        seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0);
+        return 0;
+}
+static int perfmon_events_enable_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, perfmon_events_enable_show, inode->i_private);
+}
+static ssize_t perfmon_events_enable_write(struct file *file,
+        const char __user *userbuf, size_t count, loff_t *ppos)
+{
+        struct seq_file *s = file->private_data;
+        struct gk20a *g = s->private;
+        unsigned long val = 0;
+        char buf[40];
+        int buf_size;
+        int err;
+        memset(buf, 0, sizeof(buf));
+        buf_size = min(count, (sizeof(buf)-1));
+        if (copy_from_user(buf, userbuf, buf_size))
+                return -EFAULT;
+        if (kstrtoul(buf, 10, &val) < 0)
+                return -EINVAL;
+        /* Don't turn on gk20a unnecessarily */
+        if (g->power_on) {
+                err = gk20a_busy(g);
+                if (err)
+                        return err;
+                if (val && !g->pmu.perfmon_sampling_enabled) {
+                        g->pmu.perfmon_sampling_enabled = true;
+                        nvgpu_pmu_perfmon_start_sampling(&(g->pmu));
+                } else if (!val && g->pmu.perfmon_sampling_enabled) {
+                        g->pmu.perfmon_sampling_enabled = false;
+                        nvgpu_pmu_perfmon_stop_sampling(&(g->pmu));
+                }
+                gk20a_idle(g);
+        } else {
+                g->pmu.perfmon_sampling_enabled = val ? true : false;
+        }
+        return count;
+}
+static const struct file_operations perfmon_events_enable_fops = {
+        .open           = perfmon_events_enable_open,
+        .read           = seq_read,
+        .write          = perfmon_events_enable_write,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
+static int perfmon_events_count_show(struct seq_file *s, void *data)
+{
+        struct gk20a *g = s->private;
+        seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt);
+        return 0;
+}
+static int perfmon_events_count_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, perfmon_events_count_show, inode->i_private);
+}
+static const struct file_operations perfmon_events_count_fops = {
+        .open           = perfmon_events_count_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
+static int security_show(struct seq_file *s, void *data)
+{
+        struct gk20a *g = s->private;
+        seq_printf(s, "%d\n", g->pmu.pmu_mode);
+        return 0;
+}
+static int security_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, security_show, inode->i_private);
+}
+static const struct file_operations security_fops = {
+        .open           = security_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
+int gk20a_pmu_debugfs_init(struct gk20a *g)
+{
+        struct dentry *d;
+        struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+        d = debugfs_create_file(
+                "lpwr_debug", S_IRUGO|S_IWUSR, platform->debugfs, g,
+                                                &lpwr_debug_fops);
+        if (!d)
+                goto err_out;
+        d = debugfs_create_file(
+                "mscg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
+                                                &mscg_stat_fops);
+        if (!d)
+                goto err_out;
+        d = debugfs_create_file(
+                "mscg_transitions", S_IRUGO, platform->debugfs, g,
+                                                &mscg_transitions_fops);
+        if (!d)
+                goto err_out;
+        d = debugfs_create_file(
+                "elpg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
+                                                &elpg_stat_fops);
+        if (!d)
+                goto err_out;
+        d = debugfs_create_file(
+                "elpg_transitions", S_IRUGO, platform->debugfs, g,
+                                                &elpg_transitions_fops);
+        if (!d)
+                goto err_out;
+        d = debugfs_create_file(
+                "falc_trace", S_IRUGO, platform->debugfs, g,
+                                                &falc_trace_fops);
+        if (!d)
+                goto err_out;
+        d = debugfs_create_file(
+                "perfmon_events_enable", S_IRUGO, platform->debugfs, g,
+                                                &perfmon_events_enable_fops);
+        if (!d)
+                goto err_out;
+        d = debugfs_create_file(
+                "perfmon_events_count", S_IRUGO, platform->debugfs, g,
+                                                &perfmon_events_count_fops);
+        if (!d)
+                goto err_out;
+        d = debugfs_create_file(
+                "pmu_security", S_IRUGO, platform->debugfs, g,
+                                                &security_fops);
+        if (!d)
+                goto err_out;
+        return 0;
+err_out:
+        pr_err("%s: Failed to make debugfs node\n", __func__);
+        debugfs_remove_recursive(platform->debugfs);
+        return -ENOMEM;
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_pmu.h b/drivers/gpu/nvgpu/common/linux/debug_pmu.h
new file mode 100644
index 00000000..c4e3243d
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_pmu.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __NVGPU_DEBUG_PMU_H__
+#define __NVGPU_DEBUG_PMU_H__
+struct gk20a;
+int gk20a_pmu_debugfs_init(struct gk20a *g);
+#endif /* __NVGPU_DEBUG_PMU_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_sched.c b/drivers/gpu/nvgpu/common/linux/debug_sched.c
new file mode 100644
index 00000000..40b93149
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_sched.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include "debug_sched.h"
+#include "gk20a/platform_gk20a.h"
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused)
+{
+        struct gk20a *g = s->private;
+        struct gk20a_sched_ctrl *sched = &g->sched_ctrl;
+        bool sched_busy = true;
+        int n = sched->bitmap_size / sizeof(u64);
+        int i;
+        int err;
+        err = gk20a_busy(g);
+        if (err)
+                return err;
+        if (nvgpu_mutex_tryacquire(&sched->busy_lock)) {
+                sched_busy = false;
+                nvgpu_mutex_release(&sched->busy_lock);
+        }
+        seq_printf(s, "control_locked=%d\n", sched->control_locked);
+        seq_printf(s, "busy=%d\n", sched_busy);
+        seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size);
+        nvgpu_mutex_acquire(&sched->status_lock);
+        seq_puts(s, "active_tsg_bitmap\n");
+        for (i = 0; i < n; i++)
+                seq_printf(s, "\t0x%016llx\n", sched->active_tsg_bitmap[i]);
+        seq_puts(s, "recent_tsg_bitmap\n");
+        for (i = 0; i < n; i++)
+                seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]);
+        nvgpu_mutex_release(&sched->status_lock);
+        gk20a_idle(g);
+        return 0;
+}
+static int gk20a_sched_debugfs_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, gk20a_sched_debugfs_show, inode->i_private);
+}
+static const struct file_operations gk20a_sched_debugfs_fops = {
+        .open           = gk20a_sched_debugfs_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
+void gk20a_sched_debugfs_init(struct gk20a *g)
+{
+        struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+        debugfs_create_file("sched_ctrl", S_IRUGO, platform->debugfs,
+                        g, &gk20a_sched_debugfs_fops);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_sched.h b/drivers/gpu/nvgpu/common/linux/debug_sched.h
new file mode 100644
index 00000000..34a8f55f
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_sched.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __NVGPU_DEBUG_SCHED_H__
+#define __NVGPU_DEBUG_SCHED_H__
+struct gk20a;
+void gk20a_sched_debugfs_init(struct gk20a *g);
+#endif /* __NVGPU_DEBUG_SCHED_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/driver_common.c b/drivers/gpu/nvgpu/common/linux/driver_common.c
index 80e7698b..f85016d4 100644
--- a/drivers/gpu/nvgpu/common/linux/driver_common.c
+++ b/drivers/gpu/nvgpu/common/linux/driver_common.c
@@ -21,6 +21,7 @@
 #include <nvgpu/soc.h>
 #include <nvgpu/bug.h>
 #include <nvgpu/enabled.h>
+#include <nvgpu/debug.h>
 #include "gk20a/gk20a_scale.h"
 #include "gk20a/gk20a.h"
@@ -182,7 +183,7 @@ int nvgpu_probe(struct gk20a *g,
        nvgpu_init_mm_vars(g);
        gk20a_create_sysfs(g->dev);
-        gk20a_debug_init(g->dev, debugfs_symlink);
+        gk20a_debug_init(g, debugfs_symlink);
        g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K);
        if (!g->dbg_regops_tmp_buf) {
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
index 2502ff30..d81328f0 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
@@ -26,9 +26,9 @@
 #include <nvgpu/kmem.h>
 #include <nvgpu/log.h>
 #include <nvgpu/list.h>
+#include <nvgpu/debug.h>
 #include "gk20a/gk20a.h"
-#include "gk20a/debug_gk20a.h"
 #include "gk20a/ctxsw_trace_gk20a.h"
 #include "gk20a/dbg_gpu_gk20a.h"
 #include "gk20a/fence_gk20a.h"
diff --git a/drivers/gpu/nvgpu/common/linux/kmem.c b/drivers/gpu/nvgpu/common/linux/kmem.c
index d058eba5..41aaa729 100644
--- a/drivers/gpu/nvgpu/common/linux/kmem.c
+++ b/drivers/gpu/nvgpu/common/linux/kmem.c
@@ -134,19 +134,19 @@ void __nvgpu_vfree(struct gk20a *g, void *addr)
 #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
-static void lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
+void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
 {
        nvgpu_mutex_acquire(&tracker->lock);
 }
-static void unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
+void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
 {
        nvgpu_mutex_release(&tracker->lock);
 }
-static void kmem_print_mem_alloc(struct gk20a *g,
+void kmem_print_mem_alloc(struct gk20a *g,
-                                 struct nvgpu_mem_alloc *alloc,
+                         struct nvgpu_mem_alloc *alloc,
-                                 struct seq_file *s)
+                         struct seq_file *s)
 {
 #ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
        int i;
@@ -231,7 +231,7 @@ static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
        alloc->stack_length = stack_trace.nr_entries;
 #endif
-        lock_tracker(tracker);
+        nvgpu_lock_tracker(tracker);
        tracker->bytes_alloced += size;
        tracker->bytes_alloced_real += real_size;
        tracker->nr_allocs++;
@@ -246,10 +246,10 @@ static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
        if (ret) {
                WARN(1, "Duplicate alloc??? 0x%llx\n", addr);
                kfree(alloc);
-                unlock_tracker(tracker);
+                nvgpu_unlock_tracker(tracker);
                return ret;
        }
-        unlock_tracker(tracker);
+        nvgpu_unlock_tracker(tracker);
        return 0;
 }
@@ -259,17 +259,17 @@ static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
 {
        struct nvgpu_mem_alloc *alloc;
-        lock_tracker(tracker);
+        nvgpu_lock_tracker(tracker);
        alloc = nvgpu_rem_alloc(tracker, addr);
        if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) {
-                unlock_tracker(tracker);
+                nvgpu_unlock_tracker(tracker);
                return -EINVAL;
        }
        tracker->nr_frees++;
        tracker->bytes_freed += alloc->size;
        tracker->bytes_freed_real += alloc->real_size;
-        unlock_tracker(tracker);
+        nvgpu_unlock_tracker(tracker);
        return 0;
 }
@@ -407,307 +407,6 @@ void __nvgpu_track_kfree(struct gk20a *g, void *addr)
        __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr);
 }
-/**
- * to_human_readable_bytes - Determine  suffix for passed size.
- *
- * @bytes - Number of bytes to generate a suffix for.
- * @hr_bytes [out] - The human readable number of bytes.
- * @hr_suffix [out] - The suffix for the HR number of bytes.
- *
- * Computes a human readable decomposition of the passed number of bytes. The
- * suffix for the bytes is passed back through the @hr_suffix pointer. The right
- * number of bytes is then passed back in @hr_bytes. This returns the following
- * ranges:
- *
- *   0 - 1023 B
- *   1 - 1023 KB
- *   1 - 1023 MB
- *   1 - 1023 GB
- *   1 - 1023 TB
- *   1 - ...  PB
- */
-static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
-                                      const char **hr_suffix)
-{
-        static const char *suffixes[] =
-                { "B", "KB", "MB", "GB", "TB", "PB" };
-        u64 suffix_ind = 0;
-        while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
-                bytes >>= 10;
-                suffix_ind++;
-        }
-        /*
-         * Handle case where bytes > 1023PB.
-         */
-        suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
-                suffix_ind : ARRAY_SIZE(suffixes) - 1;
-        *hr_bytes = bytes;
-        *hr_suffix = suffixes[suffix_ind];
-}
-/**
- * print_hr_bytes - Print human readable bytes
- *
- * @s - A seq_file to print to. May be NULL.
- * @msg - A message to print before the bytes.
- * @bytes - Number of bytes.
- *
- * Print @msg followed by the human readable decomposition of the passed number
- * of bytes.
- *
- * If @s is NULL then this prints will be made to the kernel log.
- */
-static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
-{
-        u64 hr_bytes;
-        const char *hr_suffix;
-        __to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
-        __pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
-}
-/**
- * print_histogram - Build a histogram of the memory usage.
- *
- * @tracker The tracking to pull data from.
- * @s       A seq_file to dump info into.
- */
-static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
-                            struct seq_file *s)
-{
-        int i;
-        u64 pot_min, pot_max;
-        u64 nr_buckets;
-        unsigned int *buckets;
-        unsigned int total_allocs;
-        struct nvgpu_rbtree_node *node;
-        static const char histogram_line[] =
-                "++++++++++++++++++++++++++++++++++++++++";
-        /*
-         * pot_min is essentially a round down to the nearest power of 2. This
-         * is the start of the histogram. pot_max is just a round up to the
-         * nearest power of two. Each histogram bucket is one power of two so
-         * the histogram buckets are exponential.
-         */
-        pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
-        pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
-        nr_buckets = __ffs(pot_max) - __ffs(pot_min);
-        buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
-        if (!buckets) {
-                __pstat(s, "OOM: could not allocate bucket storage!?\n");
-                return;
-        }
-        /*
-         * Iterate across all of the allocs and determine what bucket they
-         * should go in. Round the size down to the nearest power of two to
-         * find the right bucket.
-         */
-        nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
-        while (node) {
-                int b;
-                u64 bucket_min;
-                struct nvgpu_mem_alloc *alloc =
-                        nvgpu_mem_alloc_from_rbtree_node(node);
-                bucket_min = (u64)rounddown_pow_of_two(alloc->size);
-                if (bucket_min < tracker->min_alloc)
-                        bucket_min = tracker->min_alloc;
-                b = __ffs(bucket_min) - __ffs(pot_min);
-                /*
-                 * Handle the one case were there's an alloc exactly as big as
-                 * the maximum bucket size of the largest bucket. Most of the
-                 * buckets have an inclusive minimum and exclusive maximum. But
-                 * the largest bucket needs to have an _inclusive_ maximum as
-                 * well.
-                 */
-                if (b == (int)nr_buckets)
-                        b--;
-                buckets[b]++;
-                nvgpu_rbtree_enum_next(&node, node);
-        }
-        total_allocs = 0;
-        for (i = 0; i < (int)nr_buckets; i++)
-                total_allocs += buckets[i];
-        __pstat(s, "Alloc histogram:\n");
-        /*
-         * Actually compute the histogram lines.
-         */
-        for (i = 0; i < (int)nr_buckets; i++) {
-                char this_line[sizeof(histogram_line) + 1];
-                u64 line_length;
-                u64 hr_bytes;
-                const char *hr_suffix;
-                memset(this_line, 0, sizeof(this_line));
-                /*
-                 * Compute the normalized line length. Cant use floating point
-                 * so we will just multiply everything by 1000 and use fixed
-                 * point.
-                 */
-                line_length = (1000 * buckets[i]) / total_allocs;
-                line_length *= sizeof(histogram_line);
-                line_length /= 1000;
-                memset(this_line, '+', line_length);
-                __to_human_readable_bytes(1 << (__ffs(pot_min) + i),
-                                          &hr_bytes, &hr_suffix);
-                __pstat(s, "  [%-4lld %-4lld] %-2s %5u | %s\n",
-                        hr_bytes, hr_bytes << 1,
-                        hr_suffix, buckets[i], this_line);
-        }
-}
-#ifdef CONFIG_DEBUG_FS
-/**
- * nvgpu_kmem_print_stats - Print kmem tracking stats.
- *
- * @tracker The tracking to pull data from.
- * @s       A seq_file to dump info into.
- *
- * Print stats from a tracker. If @s is non-null then seq_printf() will be
- * used with @s. Otherwise the stats are pr_info()ed.
- */
-void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
-                            struct seq_file *s)
-{
-        lock_tracker(tracker);
-        __pstat(s, "Mem tracker: %s\n\n", tracker->name);
-        __pstat(s, "Basic Stats:\n");
-        __pstat(s,        "  Number of allocs        %lld\n",
-                tracker->nr_allocs);
-        __pstat(s,        "  Number of frees         %lld\n",
-                tracker->nr_frees);
-        print_hr_bytes(s, "  Smallest alloc          ", tracker->min_alloc);
-        print_hr_bytes(s, "  Largest alloc           ", tracker->max_alloc);
-        print_hr_bytes(s, "  Bytes allocated         ", tracker->bytes_alloced);
-        print_hr_bytes(s, "  Bytes freed             ", tracker->bytes_freed);
-        print_hr_bytes(s, "  Bytes allocated (real)  ",
-                       tracker->bytes_alloced_real);
-        print_hr_bytes(s, "  Bytes freed (real)      ",
-                       tracker->bytes_freed_real);
-        __pstat(s, "\n");
-        print_histogram(tracker, s);
-        unlock_tracker(tracker);
-}
-static int __kmem_tracking_show(struct seq_file *s, void *unused)
-{
-        struct nvgpu_mem_alloc_tracker *tracker = s->private;
-        nvgpu_kmem_print_stats(tracker, s);
-        return 0;
-}
-static int __kmem_tracking_open(struct inode *inode, struct file *file)
-{
-        return single_open(file, __kmem_tracking_show, inode->i_private);
-}
-static const struct file_operations __kmem_tracking_fops = {
-        .open = __kmem_tracking_open,
-        .read = seq_read,
-        .llseek = seq_lseek,
-        .release = single_release,
-};
-static int __kmem_traces_dump_tracker(struct gk20a *g,
-                                      struct nvgpu_mem_alloc_tracker *tracker,
-                                      struct seq_file *s)
-{
-        struct nvgpu_rbtree_node *node;
-        nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
-        while (node) {
-                struct nvgpu_mem_alloc *alloc =
-                        nvgpu_mem_alloc_from_rbtree_node(node);
-                kmem_print_mem_alloc(g, alloc, s);
-                nvgpu_rbtree_enum_next(&node, node);
-        }
-        return 0;
-}
-static int __kmem_traces_show(struct seq_file *s, void *unused)
-{
-        struct gk20a *g = s->private;
-        lock_tracker(g->vmallocs);
-        seq_puts(s, "Oustanding vmallocs:\n");
-        __kmem_traces_dump_tracker(g, g->vmallocs, s);
-        seq_puts(s, "\n");
-        unlock_tracker(g->vmallocs);
-        lock_tracker(g->kmallocs);
-        seq_puts(s, "Oustanding kmallocs:\n");
-        __kmem_traces_dump_tracker(g, g->kmallocs, s);
-        unlock_tracker(g->kmallocs);
-        return 0;
-}
-static int __kmem_traces_open(struct inode *inode, struct file *file)
-{
-        return single_open(file, __kmem_traces_show, inode->i_private);
-}
-static const struct file_operations __kmem_traces_fops = {
-        .open = __kmem_traces_open,
-        .read = seq_read,
-        .llseek = seq_lseek,
-        .release = single_release,
-};
-void nvgpu_kmem_debugfs_init(struct device *dev)
-{
-        struct gk20a_platform *plat = dev_get_drvdata(dev);
-        struct gk20a *g = get_gk20a(dev);
-        struct dentry *gpu_root = plat->debugfs;
-        struct dentry *node;
-        g->debugfs_kmem = debugfs_create_dir("kmem_tracking", gpu_root);
-        if (IS_ERR_OR_NULL(g->debugfs_kmem))
-                return;
-        node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
-                                   g->debugfs_kmem,
-                                   g->vmallocs, &__kmem_tracking_fops);
-        node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
-                                   g->debugfs_kmem,
-                                   g->kmallocs, &__kmem_tracking_fops);
-        node = debugfs_create_file("traces", S_IRUGO,
-                                   g->debugfs_kmem,
-                                   g, &__kmem_traces_fops);
-}
-#else
-void nvgpu_kmem_debugfs_init(struct device *dev)
-{
-}
-#endif
 static int __do_check_for_outstanding_allocs(
        struct gk20a *g,
        struct nvgpu_mem_alloc_tracker *tracker,
diff --git a/drivers/gpu/nvgpu/common/linux/kmem_priv.h b/drivers/gpu/nvgpu/common/linux/kmem_priv.h
index d3abb378..a41762af 100644
--- a/drivers/gpu/nvgpu/common/linux/kmem_priv.h
+++ b/drivers/gpu/nvgpu/common/linux/kmem_priv.h
@@ -20,6 +20,8 @@
 #include <nvgpu/rbtree.h>
 #include <nvgpu/lock.h>
+struct seq_file;
 #define __pstat(s, fmt, msg...)                         \
        do {                                            \
                if (s)                                  \
@@ -92,6 +94,12 @@ struct nvgpu_mem_alloc_tracker {
        unsigned long max_alloc;
 };
+void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker);
+void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker);
+void kmem_print_mem_alloc(struct gk20a *g,
+                         struct nvgpu_mem_alloc *alloc,
+                         struct seq_file *s);
 #endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
 #endif /* __KMEM_PRIV_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c
index d5fc40de..4f7fc3fa 100644
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -29,6 +29,7 @@
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/soc.h>
 #include <nvgpu/enabled.h>
+#include <nvgpu/debug.h>
 #include "gk20a/gk20a.h"
 #include "gk20a/platform_gk20a.h"
@@ -970,10 +971,7 @@ static int __exit gk20a_remove(struct platform_device *pdev)
        gk20a_user_deinit(dev, &nvgpu_class);
-#ifdef CONFIG_DEBUG_FS
+        gk20a_debug_deinit(g);
-        debugfs_remove_recursive(platform->debugfs);
-        debugfs_remove_recursive(platform->debugfs_alias);
-#endif
        gk20a_remove_sysfs(dev);
diff --git a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
index 40ee199a..eae0475a 100644
--- a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
@@ -411,7 +411,9 @@ int nvgpu_bitmap_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
        wmb();
        a->inited = true;
+#ifdef CONFIG_DEBUG_FS
        nvgpu_init_alloc_debug(g, __a);
+#endif
        alloc_dbg(__a, "New allocator: type      bitmap\n");
        alloc_dbg(__a, "               base      0x%llx\n", a->base);
        alloc_dbg(__a, "               bit_offs  0x%llx\n", a->bit_offs);
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
index 34bc51df..0ef94c10 100644
--- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
@@ -251,7 +251,9 @@ static void nvgpu_buddy_allocator_destroy(struct nvgpu_allocator *__a)
        alloc_lock(__a);
+#ifdef CONFIG_DEBUG_FS
        nvgpu_fini_alloc_debug(__a);
+#endif
        /*
         * Free the fixed allocs first.
@@ -1290,7 +1292,9 @@ int __nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
        wmb();
        a->initialized = 1;
+#ifdef CONFIG_DEBUG_FS
        nvgpu_init_alloc_debug(g, __a);
+#endif
        alloc_dbg(__a, "New allocator: type      buddy\n");
        alloc_dbg(__a, "               base      0x%llx\n", a->base);
        alloc_dbg(__a, "               size      0x%llx\n", a->length);
diff --git a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
index 234ae4a3..944b4b0f 100644
--- a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
@@ -99,7 +99,9 @@ static void nvgpu_lockless_alloc_destroy(struct nvgpu_allocator *a)
 {
        struct nvgpu_lockless_allocator *pa = a->priv;
+#ifdef CONFIG_DEBUG_FS
        nvgpu_fini_alloc_debug(a);
+#endif
        nvgpu_vfree(a->g, pa->next);
        nvgpu_kfree(nvgpu_alloc_to_gpu(a), pa);
@@ -191,7 +193,9 @@ int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
        wmb();
        a->inited = true;
+#ifdef CONFIG_DEBUG_FS
        nvgpu_init_alloc_debug(g, __a);
+#endif
        alloc_dbg(__a, "New allocator: type          lockless\n");
        alloc_dbg(__a, "               base          0x%llx\n", a->base);
        alloc_dbg(__a, "               nodes         %d\n", a->nr_nodes);
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
index 211b353b..1646d2b1 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
@@ -20,11 +20,6 @@
 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"
-#ifdef CONFIG_DEBUG_FS
-#include "gk20a/platform_gk20a.h"
-#endif
-u32 nvgpu_alloc_tracing_on;
 u64 nvgpu_alloc_length(struct nvgpu_allocator *a)
 {
@@ -151,68 +146,3 @@ int __nvgpu_alloc_common_init(struct nvgpu_allocator *a, struct gk20a *g,
        return 0;
 }
-#ifdef CONFIG_DEBUG_FS
-void nvgpu_alloc_print_stats(struct nvgpu_allocator *__a,
-                             struct seq_file *s, int lock)
-{
-        __a->ops->print_stats(__a, s, lock);
-}
-static int __alloc_show(struct seq_file *s, void *unused)
-{
-        struct nvgpu_allocator *a = s->private;
-        nvgpu_alloc_print_stats(a, s, 1);
-        return 0;
-}
-static int __alloc_open(struct inode *inode, struct file *file)
-{
-        return single_open(file, __alloc_show, inode->i_private);
-}
-static const struct file_operations __alloc_fops = {
-        .open = __alloc_open,
-        .read = seq_read,
-        .llseek = seq_lseek,
-        .release = single_release,
-};
-#endif
-void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a)
-{
-#ifdef CONFIG_DEBUG_FS
-        if (!g->debugfs_allocators)
-                return;
-        a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO,
-                                               g->debugfs_allocators,
-                                               a, &__alloc_fops);
-#endif
-}
-void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a)
-{
-#ifdef CONFIG_DEBUG_FS
-        if (!IS_ERR_OR_NULL(a->debugfs_entry))
-                debugfs_remove(a->debugfs_entry);
-#endif
-}
-#ifdef CONFIG_DEBUG_FS
-void nvgpu_alloc_debugfs_init(struct device *dev)
-{
-        struct gk20a_platform *platform = dev_get_drvdata(dev);
-        struct dentry *gpu_root = platform->debugfs;
-        struct gk20a *g = get_gk20a(dev);
-        g->debugfs_allocators = debugfs_create_dir("allocators", gpu_root);
-        if (IS_ERR_OR_NULL(g->debugfs_allocators))
-                return;
-        debugfs_create_u32("tracing", 0664, g->debugfs_allocators,
-                           &nvgpu_alloc_tracing_on);
-}
-#endif
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c
index 14b5da3c..3f4f3706 100644
--- a/drivers/gpu/nvgpu/common/mm/page_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c
@@ -916,7 +916,9 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
        if (err)
                goto fail;
+#ifdef CONFIG_DEBUG_FS
        nvgpu_init_alloc_debug(g, __a);
+#endif
        palloc_dbg(a, "New allocator: type      page\n");
        palloc_dbg(a, "               base      0x%llx\n", a->base);
        palloc_dbg(a, "               size      0x%llx\n", a->length);