From 6090a8a7ee347f92d806f104d3a0082208f5df64 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Wed, 24 May 2017 17:37:04 +0530
Subject: gpu: nvgpu: move debugfs code to linux module

Since all debugfs code is Linux specific, remove
it from common code and move it to Linux module

Debugfs code is now divided into below
module specific files :

common/linux/debug.c
common/linux/debug_cde.c
common/linux/debug_ce.c
common/linux/debug_fifo.c
common/linux/debug_gr.c
common/linux/debug_mm.c
common/linux/debug_allocator.c
common/linux/debug_kmem.c
common/linux/debug_pmu.c
common/linux/debug_sched.c

Add corresponding header files for above modules too
And compile all of above files only if CONFIG_DEBUG_FS is set

Some more details of the changes made

- Move and rename gk20a/debug_gk20a.c to common/linux/debug.c
- Move and rename gk20a/debug_gk20a.h to include/nvgpu/debug.h

- Remove gm20b/debug_gm20b.c and gm20b/debug_gm20b.h and call
  gk20a_init_debug_ops() directly from gm20b_init_hal()

- Update all debug APIs to receive struct gk20a as parameter
  instead of receiving struct device pointer
- Update API gk20a_dmabuf_get_state() to receive struct gk20a
  pointer instead of struct device

- Include <nvgpu/debug.h> explicitly in all files where debug
  operations are used
- Remove "gk20a/platform_gk20a.h" include from HAL files
  which no longer need this include

- Add new API gk20a_debug_deinit() to deinitialize debugfs
  and call it from gk20a_remove()
- Move API gk20a_debug_dump_all_channel_status_ramfc() to
  gk20a/fifo_gk20a.c

Jira NVGPU-62

Change-Id: I076975d3d7f669bdbe9212fa33d98529377feeb6
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1488902
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile.nvgpu                 |  14 +-
 drivers/gpu/nvgpu/common/linux/debug.c           | 376 +++++++++++++++++
 drivers/gpu/nvgpu/common/linux/debug_allocator.c |  80 ++++
 drivers/gpu/nvgpu/common/linux/debug_allocator.h |  21 +
 drivers/gpu/nvgpu/common/linux/debug_cde.c       |  51 +++
 drivers/gpu/nvgpu/common/linux/debug_cde.h       |  21 +
 drivers/gpu/nvgpu/common/linux/debug_ce.c        |  30 ++
 drivers/gpu/nvgpu/common/linux/debug_ce.h        |  21 +
 drivers/gpu/nvgpu/common/linux/debug_fifo.c      | 369 +++++++++++++++++
 drivers/gpu/nvgpu/common/linux/debug_fifo.h      |  22 +
 drivers/gpu/nvgpu/common/linux/debug_gr.c        |  31 ++
 drivers/gpu/nvgpu/common/linux/debug_gr.h        |  21 +
 drivers/gpu/nvgpu/common/linux/debug_kmem.c      | 315 +++++++++++++++
 drivers/gpu/nvgpu/common/linux/debug_kmem.h      |  23 ++
 drivers/gpu/nvgpu/common/linux/debug_mm.c        |  26 ++
 drivers/gpu/nvgpu/common/linux/debug_mm.h        |  21 +
 drivers/gpu/nvgpu/common/linux/debug_pmu.c       | 479 ++++++++++++++++++++++
 drivers/gpu/nvgpu/common/linux/debug_pmu.h       |  21 +
 drivers/gpu/nvgpu/common/linux/debug_sched.c     |  79 ++++
 drivers/gpu/nvgpu/common/linux/debug_sched.h     |  21 +
 drivers/gpu/nvgpu/common/linux/driver_common.c   |   3 +-
 drivers/gpu/nvgpu/common/linux/ioctl_channel.c   |   2 +-
 drivers/gpu/nvgpu/common/linux/kmem.c            | 323 +--------------
 drivers/gpu/nvgpu/common/linux/kmem_priv.h       |   8 +
 drivers/gpu/nvgpu/common/linux/module.c          |   6 +-
 drivers/gpu/nvgpu/common/mm/bitmap_allocator.c   |   2 +
 drivers/gpu/nvgpu/common/mm/buddy_allocator.c    |   4 +
 drivers/gpu/nvgpu/common/mm/lockless_allocator.c |   4 +
 drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c    |  70 ----
 drivers/gpu/nvgpu/common/mm/page_allocator.c     |   2 +
 drivers/gpu/nvgpu/gk20a/cde_gk20a.c              |  45 +--
 drivers/gpu/nvgpu/gk20a/cde_gk20a.h              |   1 -
 drivers/gpu/nvgpu/gk20a/ce2_gk20a.c              |  24 --
 drivers/gpu/nvgpu/gk20a/ce2_gk20a.h              |   6 -
 drivers/gpu/nvgpu/gk20a/channel_gk20a.c          |  10 +-
 drivers/gpu/nvgpu/gk20a/debug_gk20a.c            | 425 --------------------
 drivers/gpu/nvgpu/gk20a/debug_gk20a.h            |  41 --
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.c             | 414 +++----------------
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.h             |   7 +-
 drivers/gpu/nvgpu/gk20a/gk20a.h                  |   6 +-
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c               |  22 +-
 drivers/gpu/nvgpu/gk20a/gr_gk20a.h               |   1 -
 drivers/gpu/nvgpu/gk20a/hal_gk20a.c              |   1 +
 drivers/gpu/nvgpu/gk20a/mm_gk20a.c               |  16 +-
 drivers/gpu/nvgpu/gk20a/mm_gk20a.h               |   3 +-
 drivers/gpu/nvgpu/gk20a/pmu_gk20a.c              | 487 +----------------------
 drivers/gpu/nvgpu/gk20a/pmu_gk20a.h              |   7 +-
 drivers/gpu/nvgpu/gk20a/sched_gk20a.c            |  67 ----
 drivers/gpu/nvgpu/gk20a/sched_gk20a.h            |   1 -
 drivers/gpu/nvgpu/gm20b/debug_gm20b.c            |  21 -
 drivers/gpu/nvgpu/gm20b/debug_gm20b.h            |  24 --
 drivers/gpu/nvgpu/gm20b/gr_gm20b.c               |   1 +
 drivers/gpu/nvgpu/gm20b/hal_gm20b.c              |   4 +-
 drivers/gpu/nvgpu/gp106/hal_gp106.c              |   1 +
 drivers/gpu/nvgpu/gp10b/gr_gp10b.c               |   1 +
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c              |   1 +
 drivers/gpu/nvgpu/include/nvgpu/allocator.h      |   7 +-
 drivers/gpu/nvgpu/include/nvgpu/debug.h          |  55 +++
 drivers/gpu/nvgpu/include/nvgpu/linux/kmem.h     |   6 -
 drivers/gpu/nvgpu/vgpu/vgpu.c                    |   4 +-
 60 files changed, 2229 insertions(+), 1946 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug.c
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_allocator.c
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_allocator.h
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_cde.c
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_cde.h
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_ce.c
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_ce.h
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_fifo.c
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_fifo.h
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_gr.c
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_gr.h
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_kmem.c
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_kmem.h
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_mm.c
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_mm.h
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_pmu.c
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_pmu.h
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_sched.c
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_sched.h
 delete mode 100644 drivers/gpu/nvgpu/gk20a/debug_gk20a.c
 delete mode 100644 drivers/gpu/nvgpu/gk20a/debug_gk20a.h
 delete mode 100644 drivers/gpu/nvgpu/gm20b/debug_gm20b.c
 delete mode 100644 drivers/gpu/nvgpu/gm20b/debug_gm20b.h
 create mode 100644 drivers/gpu/nvgpu/include/nvgpu/debug.h

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index e7ea3c5d..4b6a8e87 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -66,7 +66,6 @@ nvgpu-y := \
 	gk20a/fifo_gk20a.o \
 	gk20a/channel_gk20a.o \
 	gk20a/channel_sync_gk20a.o \
-	gk20a/debug_gk20a.o \
 	gk20a/dbg_gpu_gk20a.o \
 	gk20a/regops_gk20a.o \
 	gk20a/gr_gk20a.o \
@@ -107,7 +106,6 @@ nvgpu-y := \
 	gm20b/mm_gm20b.o \
 	gm20b/regops_gm20b.o \
 	gm20b/mc_gm20b.o \
-	gm20b/debug_gm20b.o \
 	gm20b/cde_gm20b.o \
 	gm20b/therm_gm20b.o \
 	gm206/bios_gm206.o \
@@ -117,6 +115,18 @@ nvgpu-y := \
 	boardobj/boardobjgrp_e255.o \
 	boardobj/boardobjgrp_e32.o
 
+nvgpu-$(CONFIG_DEBUG_FS) += \
+	common/linux/debug.o \
+	common/linux/debug_gr.o \
+	common/linux/debug_fifo.o \
+	common/linux/debug_cde.o \
+	common/linux/debug_ce.o \
+	common/linux/debug_pmu.o \
+	common/linux/debug_sched.o \
+	common/linux/debug_mm.o \
+	common/linux/debug_allocator.o \
+	common/linux/debug_kmem.o
+
 nvgpu-$(CONFIG_TEGRA_GK20A) += tegra/linux/platform_gk20a_tegra.o
 nvgpu-$(CONFIG_SYNC) += gk20a/sync_gk20a.o
 nvgpu-$(CONFIG_GK20A_PCI) += common/linux/pci.o
diff --git a/drivers/gpu/nvgpu/common/linux/debug.c b/drivers/gpu/nvgpu/common/linux/debug.c
new file mode 100644
index 00000000..2962a467
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_cde.h"
+#include "debug_ce.h"
+#include "debug_fifo.h"
+#include "debug_gr.h"
+#include "debug_mm.h"
+#include "debug_allocator.h"
+#include "debug_kmem.h"
+#include "debug_pmu.h"
+#include "debug_sched.h"
+
+#include "gk20a/gk20a.h"
+#include "gk20a/platform_gk20a.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <nvgpu/debug.h>
+
+unsigned int gk20a_debug_trace_cmdbuf;
+
+static inline void gk20a_debug_write_printk(void *ctx, const char *str,
+					    size_t len)
+{
+	pr_info("%s", str);
+}
+
+static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str,
+						size_t len)
+{
+	seq_write((struct seq_file *)ctx, str, len);
+}
+
+void gk20a_debug_output(struct gk20a_debug_output *o,
+					const char *fmt, ...)
+{
+	va_list args;
+	int len;
+
+	va_start(args, fmt);
+	len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
+	va_end(args);
+	o->fn(o->ctx, o->buf, len);
+}
+
+static int gk20a_gr_dump_regs(struct gk20a *g,
+		struct gk20a_debug_output *o)
+{
+	if (g->ops.gr.dump_gr_regs)
+		gr_gk20a_elpg_protected_call(g, g->ops.gr.dump_gr_regs(g, o));
+
+	return 0;
+}
+
+int gk20a_gr_debug_dump(struct gk20a *g)
+{
+	struct gk20a_debug_output o = {
+		.fn = gk20a_debug_write_printk
+	};
+
+	gk20a_gr_dump_regs(g, &o);
+
+	return 0;
+}
+
+static int gk20a_gr_debug_show(struct seq_file *s, void *unused)
+{
+	struct device *dev = s->private;
+	struct gk20a *g = gk20a_get_platform(dev)->g;
+	struct gk20a_debug_output o = {
+		.fn = gk20a_debug_write_to_seqfile,
+		.ctx = s,
+	};
+	int err;
+
+	err = gk20a_busy(g);
+	if (err) {
+		nvgpu_err(g, "failed to power on gpu: %d", err);
+		return -EINVAL;
+	}
+
+	gk20a_gr_dump_regs(g, &o);
+
+	gk20a_idle(g);
+
+	return 0;
+}
+
+void gk20a_debug_dump(struct gk20a *g)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+	struct gk20a_debug_output o = {
+		.fn = gk20a_debug_write_printk
+	};
+
+	if (platform->dump_platform_dependencies)
+		platform->dump_platform_dependencies(g->dev);
+
+	/* HAL only initialized after 1st power-on */
+	if (g->ops.debug.show_dump)
+		g->ops.debug.show_dump(g, &o);
+}
+
+static int gk20a_debug_show(struct seq_file *s, void *unused)
+{
+	struct device *dev = s->private;
+	struct gk20a_debug_output o = {
+		.fn = gk20a_debug_write_to_seqfile,
+		.ctx = s,
+	};
+	struct gk20a *g;
+	int err;
+
+	g = gk20a_get_platform(dev)->g;
+
+	err = gk20a_busy(g);
+	if (err) {
+		nvgpu_err(g, "failed to power on gpu: %d", err);
+		return -EFAULT;
+	}
+
+	/* HAL only initialized after 1st power-on */
+	if (g->ops.debug.show_dump)
+		g->ops.debug.show_dump(g, &o);
+
+	gk20a_idle(g);
+	return 0;
+}
+
+static int gk20a_gr_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, gk20a_gr_debug_show, inode->i_private);
+}
+
+static int gk20a_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, gk20a_debug_show, inode->i_private);
+}
+
+static const struct file_operations gk20a_gr_debug_fops = {
+	.open		= gk20a_gr_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const struct file_operations gk20a_debug_fops = {
+	.open		= gk20a_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o)
+{
+	g->ops.fifo.dump_pbdma_status(g, o);
+	g->ops.fifo.dump_eng_status(g, o);
+
+	gk20a_debug_dump_all_channel_status_ramfc(g, o);
+}
+
+void gk20a_init_debug_ops(struct gpu_ops *gops)
+{
+	gops->debug.show_dump = gk20a_debug_show_dump;
+}
+
+static int railgate_residency_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+	unsigned long time_since_last_state_transition_ms;
+	unsigned long total_rail_gate_time_ms;
+	unsigned long total_rail_ungate_time_ms;
+
+	if (platform->is_railgated(g->dev)) {
+		time_since_last_state_transition_ms =
+				jiffies_to_msecs(jiffies -
+				g->pstats.last_rail_gate_complete);
+		total_rail_ungate_time_ms = g->pstats.total_rail_ungate_time_ms;
+		total_rail_gate_time_ms =
+					g->pstats.total_rail_gate_time_ms +
+					time_since_last_state_transition_ms;
+	} else {
+		time_since_last_state_transition_ms =
+				jiffies_to_msecs(jiffies -
+				g->pstats.last_rail_ungate_complete);
+		total_rail_gate_time_ms = g->pstats.total_rail_gate_time_ms;
+		total_rail_ungate_time_ms =
+					g->pstats.total_rail_ungate_time_ms +
+					time_since_last_state_transition_ms;
+	}
+
+	seq_printf(s, "Time with Rails Gated: %lu ms\n"
+			"Time with Rails UnGated: %lu ms\n"
+			"Total railgating cycles: %lu\n",
+			total_rail_gate_time_ms,
+			total_rail_ungate_time_ms,
+			g->pstats.railgating_cycle_count - 1);
+	return 0;
+
+}
+
+static int railgate_residency_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, railgate_residency_show, inode->i_private);
+}
+
+static const struct file_operations railgate_residency_fops = {
+	.open		= railgate_residency_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int gk20a_railgating_debugfs_init(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+	struct dentry *d;
+
+	if (!g->can_railgate)
+		return 0;
+
+	d = debugfs_create_file(
+		"railgate_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
+						&railgate_residency_fops);
+	if (!d)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink)
+{
+	struct device *dev = g->dev;
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+
+	platform->debugfs = debugfs_create_dir(dev_name(dev), NULL);
+	if (!platform->debugfs)
+		return;
+
+	if (debugfs_symlink)
+		platform->debugfs_alias =
+			debugfs_create_symlink(debugfs_symlink,
+					NULL, dev_name(dev));
+
+	debugfs_create_file("status", S_IRUGO, platform->debugfs,
+		dev, &gk20a_debug_fops);
+	debugfs_create_file("gr_status", S_IRUGO, platform->debugfs,
+		dev, &gk20a_gr_debug_fops);
+	debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR,
+		platform->debugfs, &gk20a_debug_trace_cmdbuf);
+
+	debugfs_create_u32("ch_wdt_timeout_ms", S_IRUGO|S_IWUSR,
+		platform->debugfs, &g->ch_wdt_timeout_ms);
+
+	debugfs_create_u32("disable_syncpoints", S_IRUGO|S_IWUSR,
+		platform->debugfs, &g->disable_syncpoints);
+
+	/* Legacy debugging API. */
+	debugfs_create_u32("dbg_mask", S_IRUGO|S_IWUSR,
+		platform->debugfs, &nvgpu_dbg_mask);
+
+	/* New debug logging API. */
+	debugfs_create_u32("log_mask", S_IRUGO|S_IWUSR,
+		platform->debugfs, &g->log_mask);
+	debugfs_create_u32("log_trace", S_IRUGO|S_IWUSR,
+		platform->debugfs, &g->log_trace);
+
+	nvgpu_spinlock_init(&g->debugfs_lock);
+
+	g->mm.ltc_enabled = true;
+	g->mm.ltc_enabled_debug = true;
+
+	g->debugfs_ltc_enabled =
+			debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR,
+				 platform->debugfs,
+				 &g->mm.ltc_enabled_debug);
+
+	g->debugfs_gr_idle_timeout_default =
+			debugfs_create_u32("gr_idle_timeout_default_us",
+					S_IRUGO|S_IWUSR, platform->debugfs,
+					 &g->gr_idle_timeout_default);
+	g->debugfs_timeouts_enabled =
+			debugfs_create_bool("timeouts_enabled",
+					S_IRUGO|S_IWUSR,
+					platform->debugfs,
+					&g->timeouts_enabled);
+
+	g->debugfs_bypass_smmu =
+			debugfs_create_bool("bypass_smmu",
+					S_IRUGO|S_IWUSR,
+					platform->debugfs,
+					&g->mm.bypass_smmu);
+	g->debugfs_disable_bigpage =
+			debugfs_create_bool("disable_bigpage",
+					S_IRUGO|S_IWUSR,
+					platform->debugfs,
+					&g->mm.disable_bigpage);
+
+	g->debugfs_timeslice_low_priority_us =
+			debugfs_create_u32("timeslice_low_priority_us",
+					S_IRUGO|S_IWUSR,
+					platform->debugfs,
+					&g->timeslice_low_priority_us);
+	g->debugfs_timeslice_medium_priority_us =
+			debugfs_create_u32("timeslice_medium_priority_us",
+					S_IRUGO|S_IWUSR,
+					platform->debugfs,
+					&g->timeslice_medium_priority_us);
+	g->debugfs_timeslice_high_priority_us =
+			debugfs_create_u32("timeslice_high_priority_us",
+					S_IRUGO|S_IWUSR,
+					platform->debugfs,
+					&g->timeslice_high_priority_us);
+	g->debugfs_runlist_interleave =
+			debugfs_create_bool("runlist_interleave",
+					S_IRUGO|S_IWUSR,
+					platform->debugfs,
+					&g->runlist_interleave);
+#ifdef CONFIG_ARCH_TEGRA_18x_SOC
+	g->gr.t18x.ctx_vars.debugfs_force_preemption_gfxp =
+		debugfs_create_bool("force_preemption_gfxp", S_IRUGO|S_IWUSR,
+		platform->debugfs,
+		&g->gr.t18x.ctx_vars.force_preemption_gfxp);
+
+	g->gr.t18x.ctx_vars.debugfs_force_preemption_cilp =
+		debugfs_create_bool("force_preemption_cilp", S_IRUGO|S_IWUSR,
+		platform->debugfs,
+		&g->gr.t18x.ctx_vars.force_preemption_cilp);
+
+	g->gr.t18x.ctx_vars.debugfs_dump_ctxsw_stats =
+		debugfs_create_bool("dump_ctxsw_stats_on_channel_close",
+			S_IRUGO|S_IWUSR, platform->debugfs,
+			&g->gr.t18x.
+				ctx_vars.dump_ctxsw_stats_on_channel_close);
+#endif
+
+	gr_gk20a_debugfs_init(g);
+	gk20a_pmu_debugfs_init(g);
+	gk20a_railgating_debugfs_init(g);
+	gk20a_cde_debugfs_init(g);
+	gk20a_ce_debugfs_init(g);
+	nvgpu_alloc_debugfs_init(g);
+	gk20a_mm_debugfs_init(g);
+	gk20a_fifo_debugfs_init(g);
+	gk20a_sched_debugfs_init(g);
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+	nvgpu_kmem_debugfs_init(g);
+#endif
+}
+
+void gk20a_debug_deinit(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+
+	if (!platform->debugfs)
+		return;
+
+	gk20a_fifo_debugfs_deinit(g);
+
+	debugfs_remove_recursive(platform->debugfs);
+	debugfs_remove_recursive(platform->debugfs_alias);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_allocator.c b/drivers/gpu/nvgpu/common/linux/debug_allocator.c
new file mode 100644
index 00000000..3d4a2bb2
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_allocator.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_allocator.h"
+#include "gk20a/platform_gk20a.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <nvgpu/allocator.h>
+
+u32 nvgpu_alloc_tracing_on;
+
+void nvgpu_alloc_print_stats(struct nvgpu_allocator *__a,
+			     struct seq_file *s, int lock)
+{
+	__a->ops->print_stats(__a, s, lock);
+}
+
+static int __alloc_show(struct seq_file *s, void *unused)
+{
+	struct nvgpu_allocator *a = s->private;
+
+	nvgpu_alloc_print_stats(a, s, 1);
+
+	return 0;
+}
+
+static int __alloc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, __alloc_show, inode->i_private);
+}
+
+static const struct file_operations __alloc_fops = {
+	.open = __alloc_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a)
+{
+	if (!g->debugfs_allocators)
+		return;
+
+	a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO,
+					       g->debugfs_allocators,
+					       a, &__alloc_fops);
+}
+
+void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a)
+{
+	if (!IS_ERR_OR_NULL(a->debugfs_entry))
+		debugfs_remove(a->debugfs_entry);
+}
+
+void nvgpu_alloc_debugfs_init(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+
+	g->debugfs_allocators = debugfs_create_dir("allocators", platform->debugfs);
+	if (IS_ERR_OR_NULL(g->debugfs_allocators)) {
+		g->debugfs_allocators = NULL;
+		return;
+	}
+
+	debugfs_create_u32("tracing", 0664, g->debugfs_allocators,
+			   &nvgpu_alloc_tracing_on);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_allocator.h b/drivers/gpu/nvgpu/common/linux/debug_allocator.h
new file mode 100644
index 00000000..1b21cfc5
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_allocator.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_ALLOCATOR_H__
+#define __NVGPU_DEBUG_ALLOCATOR_H__
+
+struct gk20a;
+void nvgpu_alloc_debugfs_init(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_ALLOCATOR_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_cde.c b/drivers/gpu/nvgpu/common/linux/debug_cde.c
new file mode 100644
index 00000000..eb7c33e2
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_cde.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_cde.h"
+#include "gk20a/platform_gk20a.h"
+
+#include <linux/debugfs.h>
+
+
+static ssize_t gk20a_cde_reload_write(struct file *file,
+	const char __user *userbuf, size_t count, loff_t *ppos)
+{
+	struct gk20a *g = file->private_data;
+	gk20a_cde_reload(g);
+	return count;
+}
+
+static const struct file_operations gk20a_cde_reload_fops = {
+	.open		= simple_open,
+	.write		= gk20a_cde_reload_write,
+};
+
+void gk20a_cde_debugfs_init(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+
+	if (!platform->has_cde)
+		return;
+
+	debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO,
+			   platform->debugfs, &g->cde_app.shader_parameter);
+	debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO,
+			   platform->debugfs, &g->cde_app.ctx_count);
+	debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO,
+			   platform->debugfs, &g->cde_app.ctx_usecount);
+	debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO,
+			   platform->debugfs, &g->cde_app.ctx_count_top);
+	debugfs_create_file("reload_cde_firmware", S_IWUSR, platform->debugfs,
+			    g, &gk20a_cde_reload_fops);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_cde.h b/drivers/gpu/nvgpu/common/linux/debug_cde.h
new file mode 100644
index 00000000..4895edd6
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_cde.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_CDE_H__
+#define __NVGPU_DEBUG_CDE_H__
+
+struct gk20a;
+void gk20a_cde_debugfs_init(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_CDE_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_ce.c b/drivers/gpu/nvgpu/common/linux/debug_ce.c
new file mode 100644
index 00000000..9c50870e
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_ce.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_ce.h"
+#include "gk20a/platform_gk20a.h"
+
+#include <linux/debugfs.h>
+
+void gk20a_ce_debugfs_init(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+
+	debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO,
+			   platform->debugfs, &g->ce_app.ctx_count);
+	debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO,
+			   platform->debugfs, &g->ce_app.app_state);
+	debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO,
+			   platform->debugfs, &g->ce_app.next_ctx_id);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_ce.h b/drivers/gpu/nvgpu/common/linux/debug_ce.h
new file mode 100644
index 00000000..2a8750c4
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_ce.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_CE_H__
+#define __NVGPU_DEBUG_CE_H__
+
+struct gk20a;
+void gk20a_ce_debugfs_init(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_CE_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.c b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
new file mode 100644
index 00000000..6a28b1a5
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_fifo.h"
+#include "gk20a/platform_gk20a.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <nvgpu/sort.h>
+
+void __gk20a_fifo_profile_free(struct kref *ref);
+
+static void *gk20a_fifo_sched_debugfs_seq_start(
+		struct seq_file *s, loff_t *pos)
+{
+	struct gk20a *g = s->private;
+	struct fifo_gk20a *f = &g->fifo;
+
+	if (*pos >= f->num_channels)
+		return NULL;
+
+	return &f->channel[*pos];
+}
+
+static void *gk20a_fifo_sched_debugfs_seq_next(
+		struct seq_file *s, void *v, loff_t *pos)
+{
+	struct gk20a *g = s->private;
+	struct fifo_gk20a *f = &g->fifo;
+
+	++(*pos);
+	if (*pos >= f->num_channels)
+		return NULL;
+
+	return &f->channel[*pos];
+}
+
+static void gk20a_fifo_sched_debugfs_seq_stop(
+		struct seq_file *s, void *v)
+{
+}
+
+static int gk20a_fifo_sched_debugfs_seq_show(
+		struct seq_file *s, void *v)
+{
+	struct gk20a *g = s->private;
+	struct fifo_gk20a *f = &g->fifo;
+	struct channel_gk20a *ch = v;
+	struct tsg_gk20a *tsg = NULL;
+
+	struct fifo_engine_info_gk20a *engine_info;
+	struct fifo_runlist_info_gk20a *runlist;
+	u32 runlist_id;
+	int ret = SEQ_SKIP;
+	u32 engine_id;
+
+	engine_id = gk20a_fifo_get_gr_engine_id(g);
+	engine_info = (f->engine_info + engine_id);
+	runlist_id = engine_info->runlist_id;
+	runlist = &f->runlist_info[runlist_id];
+
+	if (ch == f->channel) {
+		seq_puts(s, "chid     tsgid    pid      timeslice  timeout  interleave graphics_preempt compute_preempt\n");
+		seq_puts(s, "                            (usecs)   (msecs)\n");
+		ret = 0;
+	}
+
+	if (!test_bit(ch->hw_chid, runlist->active_channels))
+		return ret;
+
+	if (gk20a_channel_get(ch)) {
+		if (gk20a_is_channel_marked_as_tsg(ch))
+			tsg = &f->tsg[ch->tsgid];
+
+		seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n",
+				ch->hw_chid,
+				ch->tsgid,
+				ch->tgid,
+				tsg ? tsg->timeslice_us : ch->timeslice_us,
+				ch->timeout_ms_max,
+				tsg ? tsg->interleave_level : ch->interleave_level,
+				ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->graphics_preempt_mode : U32_MAX,
+				ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->compute_preempt_mode : U32_MAX);
+		gk20a_channel_put(ch);
+	}
+	return 0;
+}
+
+static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = {
+	.start = gk20a_fifo_sched_debugfs_seq_start,
+	.next = gk20a_fifo_sched_debugfs_seq_next,
+	.stop = gk20a_fifo_sched_debugfs_seq_stop,
+	.show = gk20a_fifo_sched_debugfs_seq_show
+};
+
+static int gk20a_fifo_sched_debugfs_open(struct inode *inode,
+	struct file *file)
+{
+	int err;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops);
+	if (err)
+		return err;
+
+	gk20a_dbg(gpu_dbg_info, "i_private=%p", inode->i_private);
+
+	((struct seq_file *)file->private_data)->private = inode->i_private;
+	return 0;
+};
+
+/*
+ * The file operations structure contains our open function along with
+ * set of the canned seq_ ops.
+ */
+static const struct file_operations gk20a_fifo_sched_debugfs_fops = {
+	.owner = THIS_MODULE,
+	.open = gk20a_fifo_sched_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release
+};
+
+static int gk20a_fifo_profile_enable(void *data, u64 val)
+{
+	struct gk20a *g = (struct gk20a *) data;
+	struct fifo_gk20a *f = &g->fifo;
+
+
+	nvgpu_mutex_acquire(&f->profile.lock);
+	if (val == 0) {
+		if (f->profile.enabled) {
+			f->profile.enabled = false;
+			kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
+		}
+	} else {
+		if (!f->profile.enabled) {
+			/* not kref init as it can have a running condition if
+			 * we enable/disable/enable while kickoff is happening
+			 */
+			if (!kref_get_unless_zero(&f->profile.ref)) {
+				f->profile.data = vzalloc(
+							FIFO_PROFILING_ENTRIES *
+					sizeof(struct fifo_profile_gk20a));
+				f->profile.sorted  = vzalloc(
+							FIFO_PROFILING_ENTRIES *
+							sizeof(u64));
+				if (!(f->profile.data && f->profile.sorted)) {
+					nvgpu_vfree(g, f->profile.data);
+					nvgpu_vfree(g, f->profile.sorted);
+					nvgpu_mutex_release(&f->profile.lock);
+					return -ENOMEM;
+				}
+				kref_init(&f->profile.ref);
+			}
+			atomic_set(&f->profile.get, 0);
+			f->profile.enabled = true;
+		}
+	}
+	nvgpu_mutex_release(&f->profile.lock);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(
+	gk20a_fifo_profile_enable_debugfs_fops,
+	NULL,
+	gk20a_fifo_profile_enable,
+	"%llu\n"
+);
+
+static int __profile_cmp(const void *a, const void *b)
+{
+	return *((unsigned long long *) a) - *((unsigned long long *) b);
+}
+
+/*
+ * This uses about 800b in the stack, but the function using it is not part
+ * of a callstack where much memory is being used, so it is fine
+ */
+#define PERCENTILE_WIDTH	5
+#define PERCENTILE_RANGES	(100/PERCENTILE_WIDTH)
+
+static unsigned int __gk20a_fifo_create_stats(struct gk20a *g,
+		u64 *percentiles, u32 index_end, u32 index_start)
+{
+	unsigned int nelem = 0;
+	unsigned int index;
+	struct fifo_profile_gk20a *profile;
+
+	for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) {
+		profile = &g->fifo.profile.data[index];
+
+		if (profile->timestamp[index_end] >
+				profile->timestamp[index_start]) {
+			/* This is a valid element */
+			g->fifo.profile.sorted[nelem] =
+						profile->timestamp[index_end] -
+						profile->timestamp[index_start];
+			nelem++;
+		}
+	}
+
+	/* sort it */
+	sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long),
+		__profile_cmp, NULL);
+
+	/* build ranges */
+	for (index = 0; index < PERCENTILE_RANGES; index++)
+		percentiles[index] =
+			g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) *
+						nelem)/100 - 1];
+	return nelem;
+}
+
+static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
+{
+	struct gk20a *g = s->private;
+	unsigned int get, nelem, index;
+	/*
+	 * 800B in the stack, but function is declared statically and only
+	 * called from debugfs handler
+	 */
+	u64 percentiles_ioctl[PERCENTILE_RANGES];
+	u64 percentiles_kickoff[PERCENTILE_RANGES];
+	u64 percentiles_jobtracking[PERCENTILE_RANGES];
+	u64 percentiles_append[PERCENTILE_RANGES];
+	u64 percentiles_userd[PERCENTILE_RANGES];
+
+	if (!kref_get_unless_zero(&g->fifo.profile.ref)) {
+		seq_printf(s, "Profiling disabled\n");
+		return 0;
+	}
+
+	get = atomic_read(&g->fifo.profile.get);
+
+	__gk20a_fifo_create_stats(g, percentiles_ioctl,
+		PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY);
+	__gk20a_fifo_create_stats(g, percentiles_kickoff,
+		PROFILE_END, PROFILE_ENTRY);
+	__gk20a_fifo_create_stats(g, percentiles_jobtracking,
+		PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY);
+	__gk20a_fifo_create_stats(g, percentiles_append,
+		PROFILE_APPEND, PROFILE_JOB_TRACKING);
+	nelem = __gk20a_fifo_create_stats(g, percentiles_userd,
+		PROFILE_END, PROFILE_APPEND);
+
+	seq_printf(s, "Number of kickoffs: %d\n", nelem);
+	seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n");
+
+	for (index = 0; index < PERCENTILE_RANGES; index++)
+		seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n",
+			PERCENTILE_WIDTH * (index+1),
+			percentiles_ioctl[index],
+			percentiles_kickoff[index],
+			percentiles_append[index],
+			percentiles_jobtracking[index],
+			percentiles_userd[index]);
+
+	kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
+
+	return 0;
+}
+
+static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, gk20a_fifo_profile_stats, inode->i_private);
+}
+
+static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = {
+	.open		= gk20a_fifo_profile_stats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+
+void gk20a_fifo_debugfs_init(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+
+	struct dentry *gpu_root = platform->debugfs;
+	struct dentry *fifo_root;
+	struct dentry *profile_root;
+
+	fifo_root = debugfs_create_dir("fifo", gpu_root);
+	if (IS_ERR_OR_NULL(fifo_root))
+		return;
+
+	gk20a_dbg(gpu_dbg_info, "g=%p", g);
+
+	debugfs_create_file("sched", 0600, fifo_root, g,
+		&gk20a_fifo_sched_debugfs_fops);
+
+	profile_root = debugfs_create_dir("profile", fifo_root);
+	if (IS_ERR_OR_NULL(profile_root))
+		return;
+
+	nvgpu_mutex_init(&g->fifo.profile.lock);
+	g->fifo.profile.enabled = false;
+	atomic_set(&g->fifo.profile.get, 0);
+	atomic_set(&g->fifo.profile.ref.refcount, 0);
+
+	debugfs_create_file("enable", 0600, profile_root, g,
+		&gk20a_fifo_profile_enable_debugfs_fops);
+
+	debugfs_create_file("stats", 0600, profile_root, g,
+		&gk20a_fifo_profile_stats_debugfs_fops);
+
+}
+
+void __gk20a_fifo_profile_free(struct kref *ref)
+{
+	struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a,
+						profile.ref);
+	nvgpu_vfree(f->g, f->profile.data);
+	nvgpu_vfree(f->g, f->profile.sorted);
+}
+
+/* Get the next element in the ring buffer of profile entries
+ * and grab a reference to the structure
+ */
+struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g)
+{
+	struct fifo_gk20a *f = &g->fifo;
+	struct fifo_profile_gk20a *profile;
+	unsigned int index;
+
+	/* If kref is zero, profiling is not enabled */
+	if (!kref_get_unless_zero(&f->profile.ref))
+		return NULL;
+	index = atomic_inc_return(&f->profile.get);
+	profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES];
+
+	return profile;
+}
+
+/* Free the reference to the structure. This allows deferred cleanups */
+void gk20a_fifo_profile_release(struct gk20a *g,
+					struct fifo_profile_gk20a *profile)
+{
+	kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
+}
+
+void gk20a_fifo_debugfs_deinit(struct gk20a *g)
+{
+	struct fifo_gk20a *f = &g->fifo;
+
+	nvgpu_mutex_acquire(&f->profile.lock);
+	if (f->profile.enabled) {
+		f->profile.enabled = false;
+		kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
+	}
+	nvgpu_mutex_release(&f->profile.lock);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.h b/drivers/gpu/nvgpu/common/linux/debug_fifo.h
new file mode 100644
index 00000000..46ac853e
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_fifo.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_FIFO_H__
+#define __NVGPU_DEBUG_FIFO_H__
+
+struct gk20a;
+void gk20a_fifo_debugfs_init(struct gk20a *g);
+void gk20a_fifo_debugfs_deinit(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_FIFO_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_gr.c b/drivers/gpu/nvgpu/common/linux/debug_gr.c
new file mode 100644
index 00000000..56b8612e
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_gr.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_gr.h"
+#include "gk20a/platform_gk20a.h"
+
+#include <linux/debugfs.h>
+
+int gr_gk20a_debugfs_init(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+
+	g->debugfs_gr_default_attrib_cb_size =
+		debugfs_create_u32("gr_default_attrib_cb_size",
+				   S_IRUGO|S_IWUSR, platform->debugfs,
+				   &g->gr.attrib_cb_default_size);
+
+	return 0;
+}
+
diff --git a/drivers/gpu/nvgpu/common/linux/debug_gr.h b/drivers/gpu/nvgpu/common/linux/debug_gr.h
new file mode 100644
index 00000000..4b46acbb
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_gr.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_GR_H__
+#define __NVGPU_DEBUG_GR_H__
+
+struct gk20a;
+int gr_gk20a_debugfs_init(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_GR_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_kmem.c b/drivers/gpu/nvgpu/common/linux/debug_kmem.c
new file mode 100644
index 00000000..2ee542a8
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_kmem.c
@@ -0,0 +1,315 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_kmem.h"
+#include "kmem_priv.h"
+#include "gk20a/platform_gk20a.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+/**
+ * to_human_readable_bytes - Determine  suffix for passed size.
+ *
+ * @bytes - Number of bytes to generate a suffix for.
+ * @hr_bytes [out] - The human readable number of bytes.
+ * @hr_suffix [out] - The suffix for the HR number of bytes.
+ *
+ * Computes a human readable decomposition of the passed number of bytes. The
+ * suffix for the bytes is passed back through the @hr_suffix pointer. The right
+ * number of bytes is then passed back in @hr_bytes. This returns the following
+ * ranges:
+ *
+ *   0 - 1023 B
+ *   1 - 1023 KB
+ *   1 - 1023 MB
+ *   1 - 1023 GB
+ *   1 - 1023 TB
+ *   1 - ...  PB
+ */
+static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
+				      const char **hr_suffix)
+{
+	static const char *suffixes[] =
+		{ "B", "KB", "MB", "GB", "TB", "PB" };
+
+	u64 suffix_ind = 0;
+
+	while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
+		bytes >>= 10;
+		suffix_ind++;
+	}
+
+	/*
+	 * Handle case where bytes > 1023PB.
+	 */
+	suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
+		suffix_ind : ARRAY_SIZE(suffixes) - 1;
+
+	*hr_bytes = bytes;
+	*hr_suffix = suffixes[suffix_ind];
+}
+
+/**
+ * print_hr_bytes - Print human readable bytes
+ *
+ * @s - A seq_file to print to. May be NULL.
+ * @msg - A message to print before the bytes.
+ * @bytes - Number of bytes.
+ *
+ * Print @msg followed by the human readable decomposition of the passed number
+ * of bytes.
+ *
+ * If @s is NULL then this prints will be made to the kernel log.
+ */
+static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
+{
+	u64 hr_bytes;
+	const char *hr_suffix;
+
+	__to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
+	__pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
+}
+
+/**
+ * print_histogram - Build a histogram of the memory usage.
+ *
+ * @tracker The tracking to pull data from.
+ * @s       A seq_file to dump info into.
+ */
+static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
+			    struct seq_file *s)
+{
+	int i;
+	u64 pot_min, pot_max;
+	u64 nr_buckets;
+	unsigned int *buckets;
+	unsigned int total_allocs;
+	struct nvgpu_rbtree_node *node;
+	static const char histogram_line[] =
+		"++++++++++++++++++++++++++++++++++++++++";
+
+	/*
+	 * pot_min is essentially a round down to the nearest power of 2. This
+	 * is the start of the histogram. pot_max is just a round up to the
+	 * nearest power of two. Each histogram bucket is one power of two so
+	 * the histogram buckets are exponential.
+	 */
+	pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
+	pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
+
+	nr_buckets = __ffs(pot_max) - __ffs(pot_min);
+
+	buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
+	if (!buckets) {
+		__pstat(s, "OOM: could not allocate bucket storage!?\n");
+		return;
+	}
+
+	/*
+	 * Iterate across all of the allocs and determine what bucket they
+	 * should go in. Round the size down to the nearest power of two to
+	 * find the right bucket.
+	 */
+	nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
+	while (node) {
+		int b;
+		u64 bucket_min;
+		struct nvgpu_mem_alloc *alloc =
+			nvgpu_mem_alloc_from_rbtree_node(node);
+
+		bucket_min = (u64)rounddown_pow_of_two(alloc->size);
+		if (bucket_min < tracker->min_alloc)
+			bucket_min = tracker->min_alloc;
+
+		b = __ffs(bucket_min) - __ffs(pot_min);
+
+		/*
+		 * Handle the one case were there's an alloc exactly as big as
+		 * the maximum bucket size of the largest bucket. Most of the
+		 * buckets have an inclusive minimum and exclusive maximum. But
+		 * the largest bucket needs to have an _inclusive_ maximum as
+		 * well.
+		 */
+		if (b == (int)nr_buckets)
+			b--;
+
+		buckets[b]++;
+
+		nvgpu_rbtree_enum_next(&node, node);
+	}
+
+	total_allocs = 0;
+	for (i = 0; i < (int)nr_buckets; i++)
+		total_allocs += buckets[i];
+
+	__pstat(s, "Alloc histogram:\n");
+
+	/*
+	 * Actually compute the histogram lines.
+	 */
+	for (i = 0; i < (int)nr_buckets; i++) {
+		char this_line[sizeof(histogram_line) + 1];
+		u64 line_length;
+		u64 hr_bytes;
+		const char *hr_suffix;
+
+		memset(this_line, 0, sizeof(this_line));
+
+		/*
+		 * Compute the normalized line length. Cant use floating point
+		 * so we will just multiply everything by 1000 and use fixed
+		 * point.
+		 */
+		line_length = (1000 * buckets[i]) / total_allocs;
+		line_length *= sizeof(histogram_line);
+		line_length /= 1000;
+
+		memset(this_line, '+', line_length);
+
+		__to_human_readable_bytes(1 << (__ffs(pot_min) + i),
+					  &hr_bytes, &hr_suffix);
+		__pstat(s, "  [%-4lld %-4lld] %-2s %5u | %s\n",
+			hr_bytes, hr_bytes << 1,
+			hr_suffix, buckets[i], this_line);
+	}
+}
+
+/**
+ * nvgpu_kmem_print_stats - Print kmem tracking stats.
+ *
+ * @tracker The tracking to pull data from.
+ * @s       A seq_file to dump info into.
+ *
+ * Print stats from a tracker. If @s is non-null then seq_printf() will be
+ * used with @s. Otherwise the stats are pr_info()ed.
+ */
+void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
+			    struct seq_file *s)
+{
+	nvgpu_lock_tracker(tracker);
+
+	__pstat(s, "Mem tracker: %s\n\n", tracker->name);
+
+	__pstat(s, "Basic Stats:\n");
+	__pstat(s,        "  Number of allocs        %lld\n",
+		tracker->nr_allocs);
+	__pstat(s,        "  Number of frees         %lld\n",
+		tracker->nr_frees);
+	print_hr_bytes(s, "  Smallest alloc          ", tracker->min_alloc);
+	print_hr_bytes(s, "  Largest alloc           ", tracker->max_alloc);
+	print_hr_bytes(s, "  Bytes allocated         ", tracker->bytes_alloced);
+	print_hr_bytes(s, "  Bytes freed             ", tracker->bytes_freed);
+	print_hr_bytes(s, "  Bytes allocated (real)  ",
+		       tracker->bytes_alloced_real);
+	print_hr_bytes(s, "  Bytes freed (real)      ",
+		       tracker->bytes_freed_real);
+	__pstat(s, "\n");
+
+	print_histogram(tracker, s);
+
+	nvgpu_unlock_tracker(tracker);
+}
+
+static int __kmem_tracking_show(struct seq_file *s, void *unused)
+{
+	struct nvgpu_mem_alloc_tracker *tracker = s->private;
+
+	nvgpu_kmem_print_stats(tracker, s);
+
+	return 0;
+}
+
+static int __kmem_tracking_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, __kmem_tracking_show, inode->i_private);
+}
+
+static const struct file_operations __kmem_tracking_fops = {
+	.open = __kmem_tracking_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int __kmem_traces_dump_tracker(struct gk20a *g,
+				      struct nvgpu_mem_alloc_tracker *tracker,
+				      struct seq_file *s)
+{
+	struct nvgpu_rbtree_node *node;
+
+	nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
+	while (node) {
+		struct nvgpu_mem_alloc *alloc =
+			nvgpu_mem_alloc_from_rbtree_node(node);
+
+		kmem_print_mem_alloc(g, alloc, s);
+
+		nvgpu_rbtree_enum_next(&node, node);
+	}
+
+	return 0;
+}
+
+static int __kmem_traces_show(struct seq_file *s, void *unused)
+{
+	struct gk20a *g = s->private;
+
+	nvgpu_lock_tracker(g->vmallocs);
+	seq_puts(s, "Oustanding vmallocs:\n");
+	__kmem_traces_dump_tracker(g, g->vmallocs, s);
+	seq_puts(s, "\n");
+	nvgpu_unlock_tracker(g->vmallocs);
+
+	nvgpu_lock_tracker(g->kmallocs);
+	seq_puts(s, "Oustanding kmallocs:\n");
+	__kmem_traces_dump_tracker(g, g->kmallocs, s);
+	nvgpu_unlock_tracker(g->kmallocs);
+
+	return 0;
+}
+
+static int __kmem_traces_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, __kmem_traces_show, inode->i_private);
+}
+
+static const struct file_operations __kmem_traces_fops = {
+	.open = __kmem_traces_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void nvgpu_kmem_debugfs_init(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+	struct dentry *node;
+
+	g->debugfs_kmem = debugfs_create_dir("kmem_tracking", platform->debugfs);
+	if (IS_ERR_OR_NULL(g->debugfs_kmem))
+		return;
+
+	node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
+				   g->debugfs_kmem,
+				   g->vmallocs, &__kmem_tracking_fops);
+	node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
+				   g->debugfs_kmem,
+				   g->kmallocs, &__kmem_tracking_fops);
+	node = debugfs_create_file("traces", S_IRUGO,
+				   g->debugfs_kmem,
+				   g, &__kmem_traces_fops);
+}
+#endif
diff --git a/drivers/gpu/nvgpu/common/linux/debug_kmem.h b/drivers/gpu/nvgpu/common/linux/debug_kmem.h
new file mode 100644
index 00000000..44322b53
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_kmem.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_KMEM_H__
+#define __NVGPU_DEBUG_KMEM_H__
+
+struct gk20a;
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+void nvgpu_kmem_debugfs_init(struct gk20a *g);
+#endif
+
+#endif /* __NVGPU_DEBUG_KMEM_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_mm.c b/drivers/gpu/nvgpu/common/linux/debug_mm.c
new file mode 100644
index 00000000..1e260f89
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_mm.c
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_mm.h"
+#include "gk20a/platform_gk20a.h"
+
+#include <linux/debugfs.h>
+
+void gk20a_mm_debugfs_init(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+
+	debugfs_create_bool("force_pramin", 0664, platform->debugfs,
+			   &g->mm.force_pramin);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_mm.h b/drivers/gpu/nvgpu/common/linux/debug_mm.h
new file mode 100644
index 00000000..bf7bc985
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_mm.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_MM_H__
+#define __NVGPU_DEBUG_MM_H__
+
+struct gk20a;
+void gk20a_mm_debugfs_init(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_MM_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_pmu.c b/drivers/gpu/nvgpu/common/linux/debug_pmu.c
new file mode 100644
index 00000000..f19f5139
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_pmu.c
@@ -0,0 +1,479 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_pmu.h"
+#include "gk20a/platform_gk20a.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+
+static int lpwr_debug_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+
+	if (g->ops.pmu.pmu_pg_engines_feature_list &&
+		g->ops.pmu.pmu_pg_engines_feature_list(g,
+		PMU_PG_ELPG_ENGINE_ID_GRAPHICS) !=
+		PMU_PG_FEATURE_GR_POWER_GATING_ENABLED) {
+		seq_printf(s, "PSTATE: %u\n"
+			"RPPG Enabled: %u\n"
+			"RPPG ref count: %u\n"
+			"RPPG state: %u\n"
+			"MSCG Enabled: %u\n"
+			"MSCG pstate state: %u\n"
+			"MSCG transition state: %u\n",
+			g->ops.clk_arb.get_current_pstate(g),
+			g->elpg_enabled, g->pmu.elpg_refcnt,
+			g->pmu.elpg_stat, g->mscg_enabled,
+			g->pmu.mscg_stat, g->pmu.mscg_transition_state);
+
+	} else
+		seq_printf(s, "ELPG Enabled: %u\n"
+			"ELPG ref count: %u\n"
+			"ELPG state: %u\n",
+			g->elpg_enabled, g->pmu.elpg_refcnt,
+			g->pmu.elpg_stat);
+
+	return 0;
+
+}
+
+static int lpwr_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, lpwr_debug_show, inode->i_private);
+}
+
+static const struct file_operations lpwr_debug_fops = {
+	.open		= lpwr_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int mscg_stat_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+	u64 total_ingating, total_ungating, residency, divisor, dividend;
+	struct pmu_pg_stats_data pg_stat_data = { 0 };
+	int err;
+
+	/* Don't unnecessarily power on the device */
+	if (g->power_on) {
+		err = gk20a_busy(g);
+		if (err)
+			return err;
+
+		gk20a_pmu_get_pg_stats(g,
+			PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
+		gk20a_idle(g);
+	}
+	total_ingating = g->pg_ingating_time_us +
+			(u64)pg_stat_data.ingating_time;
+	total_ungating = g->pg_ungating_time_us +
+			(u64)pg_stat_data.ungating_time;
+
+	divisor = total_ingating + total_ungating;
+
+	/* We compute the residency on a scale of 1000 */
+	dividend = total_ingating * 1000;
+
+	if (divisor)
+		residency = div64_u64(dividend, divisor);
+	else
+		residency = 0;
+
+	seq_printf(s,
+			"Time in MSCG: %llu us\n"
+			"Time out of MSCG: %llu us\n"
+			"MSCG residency ratio: %llu\n"
+			"MSCG Entry Count: %u\n"
+			"MSCG Avg Entry latency %u\n"
+			"MSCG Avg Exit latency %u\n",
+			total_ingating, total_ungating,
+			residency, pg_stat_data.gating_cnt,
+			pg_stat_data.avg_entry_latency_us,
+			pg_stat_data.avg_exit_latency_us);
+	return 0;
+
+}
+
+static int mscg_stat_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mscg_stat_show, inode->i_private);
+}
+
+static const struct file_operations mscg_stat_fops = {
+	.open		= mscg_stat_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int mscg_transitions_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+	struct pmu_pg_stats_data pg_stat_data = { 0 };
+	u32 total_gating_cnt;
+	int err;
+
+	if (g->power_on) {
+		err = gk20a_busy(g);
+		if (err)
+			return err;
+
+		gk20a_pmu_get_pg_stats(g,
+			PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
+		gk20a_idle(g);
+	}
+	total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
+
+	seq_printf(s, "%u\n", total_gating_cnt);
+	return 0;
+
+}
+
+static int mscg_transitions_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mscg_transitions_show, inode->i_private);
+}
+
+static const struct file_operations mscg_transitions_fops = {
+	.open		= mscg_transitions_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int elpg_stat_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+	struct pmu_pg_stats_data pg_stat_data = { 0 };
+	u64 total_ingating, total_ungating, residency, divisor, dividend;
+	int err;
+
+	/* Don't unnecessarily power on the device */
+	if (g->power_on) {
+		err = gk20a_busy(g);
+		if (err)
+			return err;
+
+		gk20a_pmu_get_pg_stats(g,
+			PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
+		gk20a_idle(g);
+	}
+	total_ingating = g->pg_ingating_time_us +
+			(u64)pg_stat_data.ingating_time;
+	total_ungating = g->pg_ungating_time_us +
+			(u64)pg_stat_data.ungating_time;
+	divisor = total_ingating + total_ungating;
+
+	/* We compute the residency on a scale of 1000 */
+	dividend = total_ingating * 1000;
+
+	if (divisor)
+		residency = div64_u64(dividend, divisor);
+	else
+		residency = 0;
+
+	seq_printf(s,
+			"Time in ELPG: %llu us\n"
+			"Time out of ELPG: %llu us\n"
+			"ELPG residency ratio: %llu\n"
+			"ELPG Entry Count: %u\n"
+			"ELPG Avg Entry latency %u us\n"
+			"ELPG Avg Exit latency %u us\n",
+			total_ingating, total_ungating,
+			residency, pg_stat_data.gating_cnt,
+			pg_stat_data.avg_entry_latency_us,
+			pg_stat_data.avg_exit_latency_us);
+	return 0;
+
+}
+
+static int elpg_stat_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, elpg_stat_show, inode->i_private);
+}
+
+static const struct file_operations elpg_stat_fops = {
+	.open		= elpg_stat_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int elpg_transitions_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+	struct pmu_pg_stats_data pg_stat_data = { 0 };
+	u32 total_gating_cnt;
+	int err;
+
+	if (g->power_on) {
+		err = gk20a_busy(g);
+		if (err)
+			return err;
+
+		gk20a_pmu_get_pg_stats(g,
+			PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
+		gk20a_idle(g);
+	}
+	total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
+
+	seq_printf(s, "%u\n", total_gating_cnt);
+	return 0;
+
+}
+
+static int elpg_transitions_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, elpg_transitions_show, inode->i_private);
+}
+
+static const struct file_operations elpg_transitions_fops = {
+	.open		= elpg_transitions_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int falc_trace_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+	struct pmu_gk20a *pmu = &g->pmu;
+	u32 i = 0, j = 0, k, l, m;
+	char part_str[40];
+	void *tracebuffer;
+	char *trace;
+	u32 *trace1;
+
+	/* allocate system memory to copy pmu trace buffer */
+	tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE);
+	if (tracebuffer == NULL)
+		return -ENOMEM;
+
+	/* read pmu traces into system memory buffer */
+	nvgpu_mem_rd_n(g, &pmu->trace_buf,
+		       0, tracebuffer, GK20A_PMU_TRACE_BUFSIZE);
+
+	trace = (char *)tracebuffer;
+	trace1 = (u32 *)tracebuffer;
+
+	for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) {
+		for (j = 0; j < 0x40; j++)
+			if (trace1[(i / 4) + j])
+				break;
+		if (j == 0x40)
+			break;
+		seq_printf(s, "Index %x: ", trace1[(i / 4)]);
+		l = 0;
+		m = 0;
+		while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) {
+			if (k >= 40)
+				break;
+			strncpy(part_str, (trace+i+20+m), k);
+			part_str[k] = 0;
+			seq_printf(s, "%s0x%x", part_str,
+					trace1[(i / 4) + 1 + l]);
+			l++;
+			m += k + 2;
+		}
+		seq_printf(s, "%s", (trace+i+20+m));
+	}
+
+	nvgpu_kfree(g, tracebuffer);
+	return 0;
+}
+
+static int falc_trace_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, falc_trace_show, inode->i_private);
+}
+
+static const struct file_operations falc_trace_fops = {
+	.open		= falc_trace_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int perfmon_events_enable_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+
+	seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0);
+	return 0;
+
+}
+
+static int perfmon_events_enable_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, perfmon_events_enable_show, inode->i_private);
+}
+
+static ssize_t perfmon_events_enable_write(struct file *file,
+	const char __user *userbuf, size_t count, loff_t *ppos)
+{
+	struct seq_file *s = file->private_data;
+	struct gk20a *g = s->private;
+	unsigned long val = 0;
+	char buf[40];
+	int buf_size;
+	int err;
+
+	memset(buf, 0, sizeof(buf));
+	buf_size = min(count, (sizeof(buf)-1));
+
+	if (copy_from_user(buf, userbuf, buf_size))
+		return -EFAULT;
+
+	if (kstrtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	/* Don't turn on gk20a unnecessarily */
+	if (g->power_on) {
+		err = gk20a_busy(g);
+		if (err)
+			return err;
+
+		if (val && !g->pmu.perfmon_sampling_enabled) {
+			g->pmu.perfmon_sampling_enabled = true;
+			nvgpu_pmu_perfmon_start_sampling(&(g->pmu));
+		} else if (!val && g->pmu.perfmon_sampling_enabled) {
+			g->pmu.perfmon_sampling_enabled = false;
+			nvgpu_pmu_perfmon_stop_sampling(&(g->pmu));
+		}
+		gk20a_idle(g);
+	} else {
+		g->pmu.perfmon_sampling_enabled = val ? true : false;
+	}
+
+	return count;
+}
+
+static const struct file_operations perfmon_events_enable_fops = {
+	.open		= perfmon_events_enable_open,
+	.read		= seq_read,
+	.write		= perfmon_events_enable_write,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int perfmon_events_count_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+
+	seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt);
+	return 0;
+
+}
+
+static int perfmon_events_count_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, perfmon_events_count_show, inode->i_private);
+}
+
+static const struct file_operations perfmon_events_count_fops = {
+	.open		= perfmon_events_count_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int security_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+
+	seq_printf(s, "%d\n", g->pmu.pmu_mode);
+	return 0;
+
+}
+
+static int security_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, security_show, inode->i_private);
+}
+
+static const struct file_operations security_fops = {
+	.open		= security_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+int gk20a_pmu_debugfs_init(struct gk20a *g)
+{
+	struct dentry *d;
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+
+	d = debugfs_create_file(
+		"lpwr_debug", S_IRUGO|S_IWUSR, platform->debugfs, g,
+						&lpwr_debug_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"mscg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
+						&mscg_stat_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"mscg_transitions", S_IRUGO, platform->debugfs, g,
+						&mscg_transitions_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"elpg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
+						&elpg_stat_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"elpg_transitions", S_IRUGO, platform->debugfs, g,
+						&elpg_transitions_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"falc_trace", S_IRUGO, platform->debugfs, g,
+						&falc_trace_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"perfmon_events_enable", S_IRUGO, platform->debugfs, g,
+						&perfmon_events_enable_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"perfmon_events_count", S_IRUGO, platform->debugfs, g,
+						&perfmon_events_count_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"pmu_security", S_IRUGO, platform->debugfs, g,
+						&security_fops);
+	if (!d)
+		goto err_out;
+	return 0;
+err_out:
+	pr_err("%s: Failed to make debugfs node\n", __func__);
+	debugfs_remove_recursive(platform->debugfs);
+	return -ENOMEM;
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_pmu.h b/drivers/gpu/nvgpu/common/linux/debug_pmu.h
new file mode 100644
index 00000000..c4e3243d
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_pmu.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_PMU_H__
+#define __NVGPU_DEBUG_PMU_H__
+
+struct gk20a;
+int gk20a_pmu_debugfs_init(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_PMU_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_sched.c b/drivers/gpu/nvgpu/common/linux/debug_sched.c
new file mode 100644
index 00000000..40b93149
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_sched.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_sched.h"
+#include "gk20a/platform_gk20a.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused)
+{
+	struct gk20a *g = s->private;
+	struct gk20a_sched_ctrl *sched = &g->sched_ctrl;
+	bool sched_busy = true;
+
+	int n = sched->bitmap_size / sizeof(u64);
+	int i;
+	int err;
+
+	err = gk20a_busy(g);
+	if (err)
+		return err;
+
+	if (nvgpu_mutex_tryacquire(&sched->busy_lock)) {
+		sched_busy = false;
+		nvgpu_mutex_release(&sched->busy_lock);
+	}
+
+	seq_printf(s, "control_locked=%d\n", sched->control_locked);
+	seq_printf(s, "busy=%d\n", sched_busy);
+	seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size);
+
+	nvgpu_mutex_acquire(&sched->status_lock);
+
+	seq_puts(s, "active_tsg_bitmap\n");
+	for (i = 0; i < n; i++)
+		seq_printf(s, "\t0x%016llx\n", sched->active_tsg_bitmap[i]);
+
+	seq_puts(s, "recent_tsg_bitmap\n");
+	for (i = 0; i < n; i++)
+		seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]);
+
+	nvgpu_mutex_release(&sched->status_lock);
+
+	gk20a_idle(g);
+
+	return 0;
+}
+
+static int gk20a_sched_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, gk20a_sched_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations gk20a_sched_debugfs_fops = {
+	.open		= gk20a_sched_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+void gk20a_sched_debugfs_init(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+
+	debugfs_create_file("sched_ctrl", S_IRUGO, platform->debugfs,
+			g, &gk20a_sched_debugfs_fops);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_sched.h b/drivers/gpu/nvgpu/common/linux/debug_sched.h
new file mode 100644
index 00000000..34a8f55f
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_sched.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_SCHED_H__
+#define __NVGPU_DEBUG_SCHED_H__
+
+struct gk20a;
+void gk20a_sched_debugfs_init(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_SCHED_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/driver_common.c b/drivers/gpu/nvgpu/common/linux/driver_common.c
index 80e7698b..f85016d4 100644
--- a/drivers/gpu/nvgpu/common/linux/driver_common.c
+++ b/drivers/gpu/nvgpu/common/linux/driver_common.c
@@ -21,6 +21,7 @@
 #include <nvgpu/soc.h>
 #include <nvgpu/bug.h>
 #include <nvgpu/enabled.h>
+#include <nvgpu/debug.h>
 
 #include "gk20a/gk20a_scale.h"
 #include "gk20a/gk20a.h"
@@ -182,7 +183,7 @@ int nvgpu_probe(struct gk20a *g,
 	nvgpu_init_mm_vars(g);
 
 	gk20a_create_sysfs(g->dev);
-	gk20a_debug_init(g->dev, debugfs_symlink);
+	gk20a_debug_init(g, debugfs_symlink);
 
 	g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K);
 	if (!g->dbg_regops_tmp_buf) {
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
index 2502ff30..d81328f0 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
@@ -26,9 +26,9 @@
 #include <nvgpu/kmem.h>
 #include <nvgpu/log.h>
 #include <nvgpu/list.h>
+#include <nvgpu/debug.h>
 
 #include "gk20a/gk20a.h"
-#include "gk20a/debug_gk20a.h"
 #include "gk20a/ctxsw_trace_gk20a.h"
 #include "gk20a/dbg_gpu_gk20a.h"
 #include "gk20a/fence_gk20a.h"
diff --git a/drivers/gpu/nvgpu/common/linux/kmem.c b/drivers/gpu/nvgpu/common/linux/kmem.c
index d058eba5..41aaa729 100644
--- a/drivers/gpu/nvgpu/common/linux/kmem.c
+++ b/drivers/gpu/nvgpu/common/linux/kmem.c
@@ -134,19 +134,19 @@ void __nvgpu_vfree(struct gk20a *g, void *addr)
 
 #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
 
-static void lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
+void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
 {
 	nvgpu_mutex_acquire(&tracker->lock);
 }
 
-static void unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
+void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
 {
 	nvgpu_mutex_release(&tracker->lock);
 }
 
-static void kmem_print_mem_alloc(struct gk20a *g,
-				 struct nvgpu_mem_alloc *alloc,
-				 struct seq_file *s)
+void kmem_print_mem_alloc(struct gk20a *g,
+			 struct nvgpu_mem_alloc *alloc,
+			 struct seq_file *s)
 {
 #ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
 	int i;
@@ -231,7 +231,7 @@ static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
 	alloc->stack_length = stack_trace.nr_entries;
 #endif
 
-	lock_tracker(tracker);
+	nvgpu_lock_tracker(tracker);
 	tracker->bytes_alloced += size;
 	tracker->bytes_alloced_real += real_size;
 	tracker->nr_allocs++;
@@ -246,10 +246,10 @@ static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
 	if (ret) {
 		WARN(1, "Duplicate alloc??? 0x%llx\n", addr);
 		kfree(alloc);
-		unlock_tracker(tracker);
+		nvgpu_unlock_tracker(tracker);
 		return ret;
 	}
-	unlock_tracker(tracker);
+	nvgpu_unlock_tracker(tracker);
 
 	return 0;
 }
@@ -259,17 +259,17 @@ static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
 {
 	struct nvgpu_mem_alloc *alloc;
 
-	lock_tracker(tracker);
+	nvgpu_lock_tracker(tracker);
 	alloc = nvgpu_rem_alloc(tracker, addr);
 	if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) {
-		unlock_tracker(tracker);
+		nvgpu_unlock_tracker(tracker);
 		return -EINVAL;
 	}
 
 	tracker->nr_frees++;
 	tracker->bytes_freed += alloc->size;
 	tracker->bytes_freed_real += alloc->real_size;
-	unlock_tracker(tracker);
+	nvgpu_unlock_tracker(tracker);
 
 	return 0;
 }
@@ -407,307 +407,6 @@ void __nvgpu_track_kfree(struct gk20a *g, void *addr)
 	__nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr);
 }
 
-/**
- * to_human_readable_bytes - Determine  suffix for passed size.
- *
- * @bytes - Number of bytes to generate a suffix for.
- * @hr_bytes [out] - The human readable number of bytes.
- * @hr_suffix [out] - The suffix for the HR number of bytes.
- *
- * Computes a human readable decomposition of the passed number of bytes. The
- * suffix for the bytes is passed back through the @hr_suffix pointer. The right
- * number of bytes is then passed back in @hr_bytes. This returns the following
- * ranges:
- *
- *   0 - 1023 B
- *   1 - 1023 KB
- *   1 - 1023 MB
- *   1 - 1023 GB
- *   1 - 1023 TB
- *   1 - ...  PB
- */
-static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
-				      const char **hr_suffix)
-{
-	static const char *suffixes[] =
-		{ "B", "KB", "MB", "GB", "TB", "PB" };
-
-	u64 suffix_ind = 0;
-
-	while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
-		bytes >>= 10;
-		suffix_ind++;
-	}
-
-	/*
-	 * Handle case where bytes > 1023PB.
-	 */
-	suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
-		suffix_ind : ARRAY_SIZE(suffixes) - 1;
-
-	*hr_bytes = bytes;
-	*hr_suffix = suffixes[suffix_ind];
-}
-
-/**
- * print_hr_bytes - Print human readable bytes
- *
- * @s - A seq_file to print to. May be NULL.
- * @msg - A message to print before the bytes.
- * @bytes - Number of bytes.
- *
- * Print @msg followed by the human readable decomposition of the passed number
- * of bytes.
- *
- * If @s is NULL then this prints will be made to the kernel log.
- */
-static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
-{
-	u64 hr_bytes;
-	const char *hr_suffix;
-
-	__to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
-	__pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
-}
-
-/**
- * print_histogram - Build a histogram of the memory usage.
- *
- * @tracker The tracking to pull data from.
- * @s       A seq_file to dump info into.
- */
-static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
-			    struct seq_file *s)
-{
-	int i;
-	u64 pot_min, pot_max;
-	u64 nr_buckets;
-	unsigned int *buckets;
-	unsigned int total_allocs;
-	struct nvgpu_rbtree_node *node;
-	static const char histogram_line[] =
-		"++++++++++++++++++++++++++++++++++++++++";
-
-	/*
-	 * pot_min is essentially a round down to the nearest power of 2. This
-	 * is the start of the histogram. pot_max is just a round up to the
-	 * nearest power of two. Each histogram bucket is one power of two so
-	 * the histogram buckets are exponential.
-	 */
-	pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
-	pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
-
-	nr_buckets = __ffs(pot_max) - __ffs(pot_min);
-
-	buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
-	if (!buckets) {
-		__pstat(s, "OOM: could not allocate bucket storage!?\n");
-		return;
-	}
-
-	/*
-	 * Iterate across all of the allocs and determine what bucket they
-	 * should go in. Round the size down to the nearest power of two to
-	 * find the right bucket.
-	 */
-	nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
-	while (node) {
-		int b;
-		u64 bucket_min;
-		struct nvgpu_mem_alloc *alloc =
-			nvgpu_mem_alloc_from_rbtree_node(node);
-
-		bucket_min = (u64)rounddown_pow_of_two(alloc->size);
-		if (bucket_min < tracker->min_alloc)
-			bucket_min = tracker->min_alloc;
-
-		b = __ffs(bucket_min) - __ffs(pot_min);
-
-		/*
-		 * Handle the one case were there's an alloc exactly as big as
-		 * the maximum bucket size of the largest bucket. Most of the
-		 * buckets have an inclusive minimum and exclusive maximum. But
-		 * the largest bucket needs to have an _inclusive_ maximum as
-		 * well.
-		 */
-		if (b == (int)nr_buckets)
-			b--;
-
-		buckets[b]++;
-
-		nvgpu_rbtree_enum_next(&node, node);
-	}
-
-	total_allocs = 0;
-	for (i = 0; i < (int)nr_buckets; i++)
-		total_allocs += buckets[i];
-
-	__pstat(s, "Alloc histogram:\n");
-
-	/*
-	 * Actually compute the histogram lines.
-	 */
-	for (i = 0; i < (int)nr_buckets; i++) {
-		char this_line[sizeof(histogram_line) + 1];
-		u64 line_length;
-		u64 hr_bytes;
-		const char *hr_suffix;
-
-		memset(this_line, 0, sizeof(this_line));
-
-		/*
-		 * Compute the normalized line length. Cant use floating point
-		 * so we will just multiply everything by 1000 and use fixed
-		 * point.
-		 */
-		line_length = (1000 * buckets[i]) / total_allocs;
-		line_length *= sizeof(histogram_line);
-		line_length /= 1000;
-
-		memset(this_line, '+', line_length);
-
-		__to_human_readable_bytes(1 << (__ffs(pot_min) + i),
-					  &hr_bytes, &hr_suffix);
-		__pstat(s, "  [%-4lld %-4lld] %-2s %5u | %s\n",
-			hr_bytes, hr_bytes << 1,
-			hr_suffix, buckets[i], this_line);
-	}
-}
-
-#ifdef CONFIG_DEBUG_FS
-/**
- * nvgpu_kmem_print_stats - Print kmem tracking stats.
- *
- * @tracker The tracking to pull data from.
- * @s       A seq_file to dump info into.
- *
- * Print stats from a tracker. If @s is non-null then seq_printf() will be
- * used with @s. Otherwise the stats are pr_info()ed.
- */
-void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
-			    struct seq_file *s)
-{
-	lock_tracker(tracker);
-
-	__pstat(s, "Mem tracker: %s\n\n", tracker->name);
-
-	__pstat(s, "Basic Stats:\n");
-	__pstat(s,        "  Number of allocs        %lld\n",
-		tracker->nr_allocs);
-	__pstat(s,        "  Number of frees         %lld\n",
-		tracker->nr_frees);
-	print_hr_bytes(s, "  Smallest alloc          ", tracker->min_alloc);
-	print_hr_bytes(s, "  Largest alloc           ", tracker->max_alloc);
-	print_hr_bytes(s, "  Bytes allocated         ", tracker->bytes_alloced);
-	print_hr_bytes(s, "  Bytes freed             ", tracker->bytes_freed);
-	print_hr_bytes(s, "  Bytes allocated (real)  ",
-		       tracker->bytes_alloced_real);
-	print_hr_bytes(s, "  Bytes freed (real)      ",
-		       tracker->bytes_freed_real);
-	__pstat(s, "\n");
-
-	print_histogram(tracker, s);
-
-	unlock_tracker(tracker);
-}
-
-static int __kmem_tracking_show(struct seq_file *s, void *unused)
-{
-	struct nvgpu_mem_alloc_tracker *tracker = s->private;
-
-	nvgpu_kmem_print_stats(tracker, s);
-
-	return 0;
-}
-
-static int __kmem_tracking_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, __kmem_tracking_show, inode->i_private);
-}
-
-static const struct file_operations __kmem_tracking_fops = {
-	.open = __kmem_tracking_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-static int __kmem_traces_dump_tracker(struct gk20a *g,
-				      struct nvgpu_mem_alloc_tracker *tracker,
-				      struct seq_file *s)
-{
-	struct nvgpu_rbtree_node *node;
-
-	nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
-	while (node) {
-		struct nvgpu_mem_alloc *alloc =
-			nvgpu_mem_alloc_from_rbtree_node(node);
-
-		kmem_print_mem_alloc(g, alloc, s);
-
-		nvgpu_rbtree_enum_next(&node, node);
-	}
-
-	return 0;
-}
-
-static int __kmem_traces_show(struct seq_file *s, void *unused)
-{
-	struct gk20a *g = s->private;
-
-	lock_tracker(g->vmallocs);
-	seq_puts(s, "Oustanding vmallocs:\n");
-	__kmem_traces_dump_tracker(g, g->vmallocs, s);
-	seq_puts(s, "\n");
-	unlock_tracker(g->vmallocs);
-
-	lock_tracker(g->kmallocs);
-	seq_puts(s, "Oustanding kmallocs:\n");
-	__kmem_traces_dump_tracker(g, g->kmallocs, s);
-	unlock_tracker(g->kmallocs);
-
-	return 0;
-}
-
-static int __kmem_traces_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, __kmem_traces_show, inode->i_private);
-}
-
-static const struct file_operations __kmem_traces_fops = {
-	.open = __kmem_traces_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-void nvgpu_kmem_debugfs_init(struct device *dev)
-{
-	struct gk20a_platform *plat = dev_get_drvdata(dev);
-	struct gk20a *g = get_gk20a(dev);
-	struct dentry *gpu_root = plat->debugfs;
-	struct dentry *node;
-
-	g->debugfs_kmem = debugfs_create_dir("kmem_tracking", gpu_root);
-	if (IS_ERR_OR_NULL(g->debugfs_kmem))
-		return;
-
-	node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
-				   g->debugfs_kmem,
-				   g->vmallocs, &__kmem_tracking_fops);
-	node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
-				   g->debugfs_kmem,
-				   g->kmallocs, &__kmem_tracking_fops);
-	node = debugfs_create_file("traces", S_IRUGO,
-				   g->debugfs_kmem,
-				   g, &__kmem_traces_fops);
-}
-#else
-void nvgpu_kmem_debugfs_init(struct device *dev)
-{
-}
-#endif
-
 static int __do_check_for_outstanding_allocs(
 	struct gk20a *g,
 	struct nvgpu_mem_alloc_tracker *tracker,
diff --git a/drivers/gpu/nvgpu/common/linux/kmem_priv.h b/drivers/gpu/nvgpu/common/linux/kmem_priv.h
index d3abb378..a41762af 100644
--- a/drivers/gpu/nvgpu/common/linux/kmem_priv.h
+++ b/drivers/gpu/nvgpu/common/linux/kmem_priv.h
@@ -20,6 +20,8 @@
 #include <nvgpu/rbtree.h>
 #include <nvgpu/lock.h>
 
+struct seq_file;
+
 #define __pstat(s, fmt, msg...)				\
 	do {						\
 		if (s)					\
@@ -92,6 +94,12 @@ struct nvgpu_mem_alloc_tracker {
 	unsigned long max_alloc;
 };
 
+void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker);
+void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker);
+
+void kmem_print_mem_alloc(struct gk20a *g,
+			 struct nvgpu_mem_alloc *alloc,
+			 struct seq_file *s);
 #endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
 
 #endif /* __KMEM_PRIV_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c
index d5fc40de..4f7fc3fa 100644
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -29,6 +29,7 @@
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/soc.h>
 #include <nvgpu/enabled.h>
+#include <nvgpu/debug.h>
 
 #include "gk20a/gk20a.h"
 #include "gk20a/platform_gk20a.h"
@@ -970,10 +971,7 @@ static int __exit gk20a_remove(struct platform_device *pdev)
 
 	gk20a_user_deinit(dev, &nvgpu_class);
 
-#ifdef CONFIG_DEBUG_FS
-	debugfs_remove_recursive(platform->debugfs);
-	debugfs_remove_recursive(platform->debugfs_alias);
-#endif
+	gk20a_debug_deinit(g);
 
 	gk20a_remove_sysfs(dev);
 
diff --git a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
index 40ee199a..eae0475a 100644
--- a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
@@ -411,7 +411,9 @@ int nvgpu_bitmap_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
 	wmb();
 	a->inited = true;
 
+#ifdef CONFIG_DEBUG_FS
 	nvgpu_init_alloc_debug(g, __a);
+#endif
 	alloc_dbg(__a, "New allocator: type      bitmap\n");
 	alloc_dbg(__a, "               base      0x%llx\n", a->base);
 	alloc_dbg(__a, "               bit_offs  0x%llx\n", a->bit_offs);
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
index 34bc51df..0ef94c10 100644
--- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
@@ -251,7 +251,9 @@ static void nvgpu_buddy_allocator_destroy(struct nvgpu_allocator *__a)
 
 	alloc_lock(__a);
 
+#ifdef CONFIG_DEBUG_FS
 	nvgpu_fini_alloc_debug(__a);
+#endif
 
 	/*
 	 * Free the fixed allocs first.
@@ -1290,7 +1292,9 @@ int __nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
 	wmb();
 	a->initialized = 1;
 
+#ifdef CONFIG_DEBUG_FS
 	nvgpu_init_alloc_debug(g, __a);
+#endif
 	alloc_dbg(__a, "New allocator: type      buddy\n");
 	alloc_dbg(__a, "               base      0x%llx\n", a->base);
 	alloc_dbg(__a, "               size      0x%llx\n", a->length);
diff --git a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
index 234ae4a3..944b4b0f 100644
--- a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
@@ -99,7 +99,9 @@ static void nvgpu_lockless_alloc_destroy(struct nvgpu_allocator *a)
 {
 	struct nvgpu_lockless_allocator *pa = a->priv;
 
+#ifdef CONFIG_DEBUG_FS
 	nvgpu_fini_alloc_debug(a);
+#endif
 
 	nvgpu_vfree(a->g, pa->next);
 	nvgpu_kfree(nvgpu_alloc_to_gpu(a), pa);
@@ -191,7 +193,9 @@ int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
 	wmb();
 	a->inited = true;
 
+#ifdef CONFIG_DEBUG_FS
 	nvgpu_init_alloc_debug(g, __a);
+#endif
 	alloc_dbg(__a, "New allocator: type          lockless\n");
 	alloc_dbg(__a, "               base          0x%llx\n", a->base);
 	alloc_dbg(__a, "               nodes         %d\n", a->nr_nodes);
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
index 211b353b..1646d2b1 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
@@ -20,11 +20,6 @@
 
 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"
-#ifdef CONFIG_DEBUG_FS
-#include "gk20a/platform_gk20a.h"
-#endif
-
-u32 nvgpu_alloc_tracing_on;
 
 u64 nvgpu_alloc_length(struct nvgpu_allocator *a)
 {
@@ -151,68 +146,3 @@ int __nvgpu_alloc_common_init(struct nvgpu_allocator *a, struct gk20a *g,
 
 	return 0;
 }
-
-#ifdef CONFIG_DEBUG_FS
-void nvgpu_alloc_print_stats(struct nvgpu_allocator *__a,
-			     struct seq_file *s, int lock)
-{
-	__a->ops->print_stats(__a, s, lock);
-}
-
-static int __alloc_show(struct seq_file *s, void *unused)
-{
-	struct nvgpu_allocator *a = s->private;
-
-	nvgpu_alloc_print_stats(a, s, 1);
-
-	return 0;
-}
-
-static int __alloc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, __alloc_show, inode->i_private);
-}
-
-static const struct file_operations __alloc_fops = {
-	.open = __alloc_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-#endif
-
-void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a)
-{
-#ifdef CONFIG_DEBUG_FS
-	if (!g->debugfs_allocators)
-		return;
-
-	a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO,
-					       g->debugfs_allocators,
-					       a, &__alloc_fops);
-#endif
-}
-
-void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a)
-{
-#ifdef CONFIG_DEBUG_FS
-	if (!IS_ERR_OR_NULL(a->debugfs_entry))
-		debugfs_remove(a->debugfs_entry);
-#endif
-}
-
-#ifdef CONFIG_DEBUG_FS
-void nvgpu_alloc_debugfs_init(struct device *dev)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	struct dentry *gpu_root = platform->debugfs;
-	struct gk20a *g = get_gk20a(dev);
-
-	g->debugfs_allocators = debugfs_create_dir("allocators", gpu_root);
-	if (IS_ERR_OR_NULL(g->debugfs_allocators))
-		return;
-
-	debugfs_create_u32("tracing", 0664, g->debugfs_allocators,
-			   &nvgpu_alloc_tracing_on);
-}
-#endif
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c
index 14b5da3c..3f4f3706 100644
--- a/drivers/gpu/nvgpu/common/mm/page_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c
@@ -916,7 +916,9 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
 	if (err)
 		goto fail;
 
+#ifdef CONFIG_DEBUG_FS
 	nvgpu_init_alloc_debug(g, __a);
+#endif
 	palloc_dbg(a, "New allocator: type      page\n");
 	palloc_dbg(a, "               base      0x%llx\n", a->base);
 	palloc_dbg(a, "               size      0x%llx\n", a->length);
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index a0160274..084f1793 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -18,9 +18,6 @@
 
 #include <linux/dma-mapping.h>
 #include <linux/fs.h>
-#ifdef CONFIG_DEBUG_FS
-#include <linux/debugfs.h>
-#endif
 #include <linux/dma-buf.h>
 
 #include <trace/events/gk20a.h>
@@ -40,8 +37,6 @@
 #include "cde_gk20a.h"
 #include "fence_gk20a.h"
 #include "gr_gk20a.h"
-#include "debug_gk20a.h"
-#include "platform_gk20a.h"
 
 #include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
@@ -1585,8 +1580,7 @@ int gk20a_prepare_compressible_read(
 	if (IS_ERR(dmabuf))
 		return -EINVAL;
 
-	err = gk20a_dmabuf_get_state(dmabuf, dev_from_gk20a(g),
-				     offset, &state);
+	err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
 	if (err) {
 		dma_buf_put(dmabuf);
 		return err;
@@ -1650,7 +1644,7 @@ int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
 		return -EINVAL;
 	}
 
-	err = gk20a_dmabuf_get_state(dmabuf, dev_from_gk20a(g), offset, &state);
+	err = gk20a_dmabuf_get_state(dmabuf, g, offset, &state);
 	if (err) {
 		nvgpu_err(g, "could not get state from dmabuf");
 		dma_buf_put(dmabuf);
@@ -1671,38 +1665,3 @@ int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
 	dma_buf_put(dmabuf);
 	return 0;
 }
-
-#ifdef CONFIG_DEBUG_FS
-static ssize_t gk20a_cde_reload_write(struct file *file,
-	const char __user *userbuf, size_t count, loff_t *ppos)
-{
-	struct gk20a *g = file->private_data;
-	gk20a_cde_reload(g);
-	return count;
-}
-
-static const struct file_operations gk20a_cde_reload_fops = {
-	.open		= simple_open,
-	.write		= gk20a_cde_reload_write,
-};
-
-void gk20a_cde_debugfs_init(struct device *dev)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	struct gk20a *g = get_gk20a(dev);
-
-	if (!platform->has_cde)
-		return;
-
-	debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO,
-			   platform->debugfs, &g->cde_app.shader_parameter);
-	debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO,
-			   platform->debugfs, &g->cde_app.ctx_count);
-	debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO,
-			   platform->debugfs, &g->cde_app.ctx_usecount);
-	debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO,
-			   platform->debugfs, &g->cde_app.ctx_count_top);
-	debugfs_create_file("reload_cde_firmware", S_IWUSR, platform->debugfs,
-			    g, &gk20a_cde_reload_fops);
-}
-#endif
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
index ffd55b4d..4f400bf3 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.h
@@ -295,7 +295,6 @@ int gk20a_cde_convert(struct gk20a *g,
 		struct nvgpu_fence *fence,
 		u32 __flags, struct gk20a_cde_param *params,
 		int num_params, struct gk20a_fence **fence_out);
-void gk20a_cde_debugfs_init(struct device *dev);
 
 int gk20a_prepare_compressible_read(
 		struct gk20a *g, u32 buffer_fd, u32 request, u64 offset,
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index 1ed90b14..c905bedb 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -13,15 +13,10 @@
  * more details.
  */
 
-#ifdef CONFIG_DEBUG_FS
-#include <linux/debugfs.h>
-#endif
-
 #include <nvgpu/kmem.h>
 #include <nvgpu/dma.h>
 
 #include "gk20a.h"
-#include "debug_gk20a.h"
 
 #include <nvgpu/log.h>
 
@@ -33,10 +28,6 @@
 #include <nvgpu/hw/gk20a/hw_mc_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
 
-#ifdef CONFIG_DEBUG_FS
-#include "platform_gk20a.h"
-#endif
-
 static u32 ce2_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr)
 {
 	gk20a_dbg(gpu_dbg_intr, "ce2 non-blocking pipe interrupt\n");
@@ -728,18 +719,3 @@ void gk20a_ce_delete_context_priv(struct gk20a *g,
 	return;
 }
 EXPORT_SYMBOL(gk20a_ce_delete_context);
-
-#ifdef CONFIG_DEBUG_FS
-void gk20a_ce_debugfs_init(struct device *dev)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	struct gk20a *g = get_gk20a(dev);
-
-	debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO,
-			   platform->debugfs, &g->ce_app.ctx_count);
-	debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO,
-			   platform->debugfs, &g->ce_app.app_state);
-	debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO,
-			   platform->debugfs, &g->ce_app.next_ctx_id);
-}
-#endif
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
index dfd19019..f972e175 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
@@ -156,10 +156,4 @@ void gk20a_ce_delete_context_priv(struct gk20a *g,
 void gk20a_ce_delete_context(struct gk20a *g,
 		u32 ce_ctx_id);
 
-
-#ifdef CONFIG_DEBUG_FS
-/* CE app debugfs api */
-void gk20a_ce_debugfs_init(struct device *dev);
-#endif
-
 #endif /*__CE2_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 571570d8..13abed95 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -30,9 +30,9 @@
 #include <nvgpu/circ_buf.h>
 #include <nvgpu/cond.h>
 #include <nvgpu/enabled.h>
+#include <nvgpu/debug.h>
 
 #include "gk20a.h"
-#include "debug_gk20a.h"
 #include "ctxsw_trace_gk20a.h"
 #include "dbg_gpu_gk20a.h"
 #include "fence_gk20a.h"
@@ -1403,6 +1403,7 @@ static u32 get_gp_free_count(struct channel_gk20a *c)
 	return gp_free_count(c);
 }
 
+#ifdef CONFIG_DEBUG_FS
 static void trace_write_pushbuffer(struct channel_gk20a *c,
 				   struct nvgpu_gpfifo *g)
 {
@@ -1439,6 +1440,7 @@ static void trace_write_pushbuffer(struct channel_gk20a *c,
 		dma_buf_vunmap(dmabuf, mem);
 	}
 }
+#endif
 
 static void trace_write_pushbuffer_range(struct channel_gk20a *c,
 					 struct nvgpu_gpfifo *g,
@@ -1446,6 +1448,7 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
 					 int offset,
 					 int count)
 {
+#ifdef CONFIG_DEBUG_FS
 	u32 size;
 	int i;
 	struct nvgpu_gpfifo *gp;
@@ -1478,6 +1481,7 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
 
 	if (gpfifo_allocated)
 		nvgpu_big_free(c->g, g);
+#endif
 }
 
 static void __gk20a_channel_timeout_start(struct channel_gk20a *ch)
@@ -1629,8 +1633,8 @@ static void gk20a_channel_timeout_handler(struct channel_gk20a *ch)
 	nvgpu_err(g, "Job on channel %d timed out",
 		  ch->hw_chid);
 
-	gk20a_debug_dump(g->dev);
-	gk20a_gr_debug_dump(g->dev);
+	gk20a_debug_dump(g);
+	gk20a_gr_debug_dump(g);
 
 	g->ops.fifo.force_reset_ch(ch,
 		NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT, true);
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
deleted file mode 100644
index ac435046..00000000
--- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
+++ /dev/null
@@ -1,425 +0,0 @@
-/*
- * Copyright (C) 2011-2017 NVIDIA Corporation.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifdef CONFIG_DEBUG_FS
-#include <linux/debugfs.h>
-#endif
-#include <linux/seq_file.h>
-#include <linux/io.h>
-#include <linux/fs.h>
-
-#include <nvgpu/log.h>
-#include <nvgpu/kmem.h>
-#include <nvgpu/semaphore.h>
-#include <nvgpu/log.h>
-
-#include "gk20a.h"
-#include "gk20a/platform_gk20a.h"
-#include "debug_gk20a.h"
-
-#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
-#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
-#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
-#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
-
-unsigned int gk20a_debug_trace_cmdbuf;
-
-static inline void gk20a_debug_write_printk(void *ctx, const char *str,
-					    size_t len)
-{
-	pr_info("%s", str);
-}
-
-static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str,
-						size_t len)
-{
-	seq_write((struct seq_file *)ctx, str, len);
-}
-
-void gk20a_debug_output(struct gk20a_debug_output *o,
-					const char *fmt, ...)
-{
-	va_list args;
-	int len;
-
-	va_start(args, fmt);
-	len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
-	va_end(args);
-	o->fn(o->ctx, o->buf, len);
-}
-
-static void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
-		 struct gk20a_debug_output *o)
-{
-	struct fifo_gk20a *f = &g->fifo;
-	u32 chid;
-	struct ch_state **ch_state;
-
-	ch_state = nvgpu_kzalloc(g, sizeof(*ch_state) * f->num_channels);
-	if (!ch_state) {
-		gk20a_debug_output(o, "cannot alloc memory for channels\n");
-		return;
-	}
-
-	for (chid = 0; chid < f->num_channels; chid++) {
-		struct channel_gk20a *ch = &f->channel[chid];
-		if (gk20a_channel_get(ch)) {
-			ch_state[chid] =
-				nvgpu_kmalloc(g, sizeof(struct ch_state) +
-					ram_in_alloc_size_v());
-			/* ref taken stays to below loop with
-			 * successful allocs */
-			if (!ch_state[chid])
-				gk20a_channel_put(ch);
-		}
-	}
-
-	for (chid = 0; chid < f->num_channels; chid++) {
-		struct channel_gk20a *ch = &f->channel[chid];
-		if (!ch_state[chid])
-			continue;
-
-		ch_state[chid]->pid = ch->pid;
-		ch_state[chid]->refs = atomic_read(&ch->ref_count);
-		nvgpu_mem_rd_n(g, &ch->inst_block, 0,
-				&ch_state[chid]->inst_block[0],
-				ram_in_alloc_size_v());
-		gk20a_channel_put(ch);
-	}
-	for (chid = 0; chid < f->num_channels; chid++) {
-		if (ch_state[chid]) {
-			g->ops.fifo.dump_channel_status_ramfc(g, o, chid,
-						 ch_state[chid]);
-			nvgpu_kfree(g, ch_state[chid]);
-		}
-	}
-	nvgpu_kfree(g, ch_state);
-}
-
-void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o)
-{
-	g->ops.fifo.dump_pbdma_status(g, o);
-	g->ops.fifo.dump_eng_status(g, o);
-
-	gk20a_debug_dump_all_channel_status_ramfc(g, o);
-}
-
-static int gk20a_gr_dump_regs(struct device *dev,
-		struct gk20a_debug_output *o)
-{
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	struct gk20a *g = platform->g;
-
-	if (g->ops.gr.dump_gr_regs)
-		gr_gk20a_elpg_protected_call(g, g->ops.gr.dump_gr_regs(g, o));
-
-	return 0;
-}
-
-int gk20a_gr_debug_dump(struct device *dev)
-{
-	struct gk20a_debug_output o = {
-		.fn = gk20a_debug_write_printk
-	};
-
-	gk20a_gr_dump_regs(dev, &o);
-
-	return 0;
-}
-
-static int gk20a_gr_debug_show(struct seq_file *s, void *unused)
-{
-	struct device *dev = s->private;
-	struct gk20a *g = gk20a_get_platform(dev)->g;
-	struct gk20a_debug_output o = {
-		.fn = gk20a_debug_write_to_seqfile,
-		.ctx = s,
-	};
-	int err;
-
-	err = gk20a_busy(g);
-	if (err) {
-		nvgpu_err(g, "failed to power on gpu: %d", err);
-		return -EINVAL;
-	}
-
-	gk20a_gr_dump_regs(dev, &o);
-
-	gk20a_idle(g);
-
-	return 0;
-}
-
-void gk20a_debug_dump(struct device *dev)
-{
-	struct gk20a_platform *platform = gk20a_get_platform(dev);
-	struct gk20a *g = platform->g;
-	struct gk20a_debug_output o = {
-		.fn = gk20a_debug_write_printk
-	};
-
-	if (platform->dump_platform_dependencies)
-		platform->dump_platform_dependencies(dev);
-
-	/* HAL only initialized after 1st power-on */
-	if (g->ops.debug.show_dump)
-		g->ops.debug.show_dump(g, &o);
-}
-
-static int gk20a_debug_show(struct seq_file *s, void *unused)
-{
-	struct device *dev = s->private;
-	struct gk20a_debug_output o = {
-		.fn = gk20a_debug_write_to_seqfile,
-		.ctx = s,
-	};
-	struct gk20a *g;
-	int err;
-
-	g = gk20a_get_platform(dev)->g;
-
-	err = gk20a_busy(g);
-	if (err) {
-		nvgpu_err(g, "failed to power on gpu: %d", err);
-		return -EFAULT;
-	}
-
-	/* HAL only initialized after 1st power-on */
-	if (g->ops.debug.show_dump)
-		g->ops.debug.show_dump(g, &o);
-
-	gk20a_idle(g);
-	return 0;
-}
-
-static int gk20a_gr_debug_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, gk20a_gr_debug_show, inode->i_private);
-}
-
-static int gk20a_debug_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, gk20a_debug_show, inode->i_private);
-}
-
-static const struct file_operations gk20a_gr_debug_fops = {
-	.open		= gk20a_gr_debug_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static const struct file_operations gk20a_debug_fops = {
-	.open		= gk20a_debug_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-void gk20a_init_debug_ops(struct gpu_ops *gops)
-{
-	gops->debug.show_dump = gk20a_debug_show_dump;
-}
-
-#ifdef CONFIG_DEBUG_FS
-static int railgate_residency_show(struct seq_file *s, void *data)
-{
-	struct device *dev = s->private;
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	struct gk20a *g = get_gk20a(dev);
-	unsigned long time_since_last_state_transition_ms;
-	unsigned long total_rail_gate_time_ms;
-	unsigned long total_rail_ungate_time_ms;
-
-	if (platform->is_railgated(dev)) {
-		time_since_last_state_transition_ms =
-				jiffies_to_msecs(jiffies -
-				g->pstats.last_rail_gate_complete);
-		total_rail_ungate_time_ms = g->pstats.total_rail_ungate_time_ms;
-		total_rail_gate_time_ms =
-					g->pstats.total_rail_gate_time_ms +
-					time_since_last_state_transition_ms;
-	} else {
-		time_since_last_state_transition_ms =
-				jiffies_to_msecs(jiffies -
-				g->pstats.last_rail_ungate_complete);
-		total_rail_gate_time_ms = g->pstats.total_rail_gate_time_ms;
-		total_rail_ungate_time_ms =
-					g->pstats.total_rail_ungate_time_ms +
-					time_since_last_state_transition_ms;
-	}
-
-	seq_printf(s, "Time with Rails Gated: %lu ms\n"
-			"Time with Rails UnGated: %lu ms\n"
-			"Total railgating cycles: %lu\n",
-			total_rail_gate_time_ms,
-			total_rail_ungate_time_ms,
-			g->pstats.railgating_cycle_count - 1);
-	return 0;
-
-}
-
-static int railgate_residency_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, railgate_residency_show, inode->i_private);
-}
-
-static const struct file_operations railgate_residency_fops = {
-	.open		= railgate_residency_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-int gk20a_railgating_debugfs_init(struct device *dev)
-{
-	struct dentry *d;
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	struct gk20a *g = get_gk20a(dev);
-
-	if (!g->can_railgate)
-		return 0;
-
-	d = debugfs_create_file(
-		"railgate_residency", S_IRUGO|S_IWUSR, platform->debugfs, dev,
-						&railgate_residency_fops);
-	if (!d)
-		return -ENOMEM;
-
-	return 0;
-}
-#endif
-
-void gk20a_debug_init(struct device *dev, const char *debugfs_symlink)
-{
-#ifdef CONFIG_DEBUG_FS
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	struct gk20a *g = platform->g;
-
-	platform->debugfs = debugfs_create_dir(dev_name(dev), NULL);
-	if (!platform->debugfs)
-		return;
-
-	if (debugfs_symlink)
-		platform->debugfs_alias =
-			debugfs_create_symlink(debugfs_symlink,
-					NULL, dev_name(dev));
-
-	debugfs_create_file("status", S_IRUGO, platform->debugfs,
-		dev, &gk20a_debug_fops);
-	debugfs_create_file("gr_status", S_IRUGO, platform->debugfs,
-		dev, &gk20a_gr_debug_fops);
-	debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR,
-		platform->debugfs, &gk20a_debug_trace_cmdbuf);
-
-	debugfs_create_u32("ch_wdt_timeout_ms", S_IRUGO|S_IWUSR,
-		platform->debugfs, &g->ch_wdt_timeout_ms);
-
-	debugfs_create_u32("disable_syncpoints", S_IRUGO|S_IWUSR,
-		platform->debugfs, &g->disable_syncpoints);
-
-	/* Legacy debugging API. */
-	debugfs_create_u32("dbg_mask", S_IRUGO|S_IWUSR,
-		platform->debugfs, &nvgpu_dbg_mask);
-
-	/* New debug logging API. */
-	debugfs_create_u32("log_mask", S_IRUGO|S_IWUSR,
-		platform->debugfs, &g->log_mask);
-	debugfs_create_u32("log_trace", S_IRUGO|S_IWUSR,
-		platform->debugfs, &g->log_trace);
-
-	nvgpu_spinlock_init(&g->debugfs_lock);
-
-	g->mm.ltc_enabled = true;
-	g->mm.ltc_enabled_debug = true;
-
-	g->debugfs_ltc_enabled =
-			debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR,
-				 platform->debugfs,
-				 &g->mm.ltc_enabled_debug);
-
-	g->debugfs_gr_idle_timeout_default =
-			debugfs_create_u32("gr_idle_timeout_default_us",
-					S_IRUGO|S_IWUSR, platform->debugfs,
-					 &g->gr_idle_timeout_default);
-	g->debugfs_timeouts_enabled =
-			debugfs_create_bool("timeouts_enabled",
-					S_IRUGO|S_IWUSR,
-					platform->debugfs,
-					&g->timeouts_enabled);
-
-	g->debugfs_bypass_smmu =
-			debugfs_create_bool("bypass_smmu",
-					S_IRUGO|S_IWUSR,
-					platform->debugfs,
-					&g->mm.bypass_smmu);
-	g->debugfs_disable_bigpage =
-			debugfs_create_bool("disable_bigpage",
-					S_IRUGO|S_IWUSR,
-					platform->debugfs,
-					&g->mm.disable_bigpage);
-
-	g->debugfs_timeslice_low_priority_us =
-			debugfs_create_u32("timeslice_low_priority_us",
-					S_IRUGO|S_IWUSR,
-					platform->debugfs,
-					&g->timeslice_low_priority_us);
-	g->debugfs_timeslice_medium_priority_us =
-			debugfs_create_u32("timeslice_medium_priority_us",
-					S_IRUGO|S_IWUSR,
-					platform->debugfs,
-					&g->timeslice_medium_priority_us);
-	g->debugfs_timeslice_high_priority_us =
-			debugfs_create_u32("timeslice_high_priority_us",
-					S_IRUGO|S_IWUSR,
-					platform->debugfs,
-					&g->timeslice_high_priority_us);
-	g->debugfs_runlist_interleave =
-			debugfs_create_bool("runlist_interleave",
-					S_IRUGO|S_IWUSR,
-					platform->debugfs,
-					&g->runlist_interleave);
-#ifdef CONFIG_ARCH_TEGRA_18x_SOC
-	g->gr.t18x.ctx_vars.debugfs_force_preemption_gfxp =
-		debugfs_create_bool("force_preemption_gfxp", S_IRUGO|S_IWUSR,
-		platform->debugfs,
-		&g->gr.t18x.ctx_vars.force_preemption_gfxp);
-
-	g->gr.t18x.ctx_vars.debugfs_force_preemption_cilp =
-		debugfs_create_bool("force_preemption_cilp", S_IRUGO|S_IWUSR,
-		platform->debugfs,
-		&g->gr.t18x.ctx_vars.force_preemption_cilp);
-
-	g->gr.t18x.ctx_vars.debugfs_dump_ctxsw_stats =
-		debugfs_create_bool("dump_ctxsw_stats_on_channel_close",
-			S_IRUGO|S_IWUSR, platform->debugfs,
-			&g->gr.t18x.
-				ctx_vars.dump_ctxsw_stats_on_channel_close);
-#endif
-
-	gr_gk20a_debugfs_init(g);
-	gk20a_pmu_debugfs_init(g->dev);
-	gk20a_railgating_debugfs_init(g->dev);
-	gk20a_cde_debugfs_init(g->dev);
-	gk20a_ce_debugfs_init(g->dev);
-	nvgpu_alloc_debugfs_init(g->dev);
-	gk20a_mm_debugfs_init(g->dev);
-	gk20a_fifo_debugfs_init(g->dev);
-	gk20a_sched_debugfs_init(g->dev);
-#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
-	nvgpu_kmem_debugfs_init(g->dev);
-#endif
-#endif
-
-}
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.h b/drivers/gpu/nvgpu/gk20a/debug_gk20a.h
deleted file mode 100644
index 213922b3..00000000
--- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * GK20A Debug functionality
- *
- * Copyright (C) 2011-2017 NVIDIA CORPORATION.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef _DEBUG_GK20A_H_
-#define _DEBUG_GK20A_H_
-
-struct platform_device;
-struct gk20a;
-struct gpu_ops;
-
-extern unsigned int gk20a_debug_trace_cmdbuf;
-
-struct gk20a_debug_output {
-	void (*fn)(void *ctx, const char *str, size_t len);
-	void *ctx;
-	char buf[256];
-};
-
-void gk20a_debug_output(struct gk20a_debug_output *o,
-					const char *fmt, ...);
-
-void gk20a_debug_dump(struct device *pdev);
-void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o);
-int gk20a_gr_debug_dump(struct device *pdev);
-void gk20a_debug_init(struct device *dev, const char *debugfs_symlink);
-void gk20a_init_debug_ops(struct gpu_ops *gops);
-void gk20a_debug_dump_device(void *dev);
-#endif
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index ac3a3d57..46560a56 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -29,12 +29,11 @@
 #include <nvgpu/log.h>
 #include <nvgpu/soc.h>
 #include <nvgpu/atomic.h>
-#include <nvgpu/sort.h>
 #include <nvgpu/bug.h>
 #include <nvgpu/log2.h>
+#include <nvgpu/debug.h>
 
 #include "gk20a.h"
-#include "debug_gk20a.h"
 #include "ctxsw_trace_gk20a.h"
 #include "mm_gk20a.h"
 
@@ -46,10 +45,6 @@
 #include <nvgpu/hw/gk20a/hw_mc_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
 
-#ifdef CONFIG_DEBUG_FS
-#include "platform_gk20a.h"
-#endif
-
 #define FECS_METHOD_WFI_RESTORE 0x80000
 
 static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
@@ -57,10 +52,6 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
 					    bool wait_for_finish);
 static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg);
 
-#ifdef CONFIG_DEBUG_FS
-static void __gk20a_fifo_profile_free(struct kref *ref);
-#endif
-
 u32 gk20a_fifo_get_engine_ids(struct gk20a *g,
 		u32 engine_id[], u32 engine_id_sz,
 		u32 engine_enum)
@@ -562,14 +553,6 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
 	f->engine_info = NULL;
 	nvgpu_kfree(g, f->active_engines_list);
 	f->active_engines_list = NULL;
-#ifdef CONFIG_DEBUG_FS
-	nvgpu_mutex_acquire(&f->profile.lock);
-	if (f->profile.enabled) {
-		f->profile.enabled = false;
-		kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
-	}
-	nvgpu_mutex_release(&f->profile.lock);
-#endif
 }
 
 /* reads info from hardware and fills in pbmda exception info record */
@@ -1543,7 +1526,7 @@ static bool gk20a_fifo_handle_mmu_fault(
 	} else {
 		fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r());
 		fake_fault = false;
-		gk20a_debug_dump(g->dev);
+		gk20a_debug_dump(g);
 	}
 
 
@@ -1833,7 +1816,7 @@ void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose)
 			gk20a_channel_abort(ch, false);
 
 			if (gk20a_fifo_error_ch(g, ch))
-				gk20a_debug_dump(g->dev);
+				gk20a_debug_dump(g);
 
 			gk20a_channel_put(ch);
 		}
@@ -1860,7 +1843,7 @@ void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose)
 		struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid];
 
 		if (gk20a_fifo_error_tsg(g, tsg))
-			gk20a_debug_dump(g->dev);
+			gk20a_debug_dump(g);
 
 		gk20a_fifo_abort_tsg(g, tsgid, false);
 	}
@@ -1957,7 +1940,7 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
 	unsigned int id_type;
 
 	if (verbose)
-		gk20a_debug_dump(g->dev);
+		gk20a_debug_dump(g);
 
 	if (g->ops.ltc.flush)
 		g->ops.ltc.flush(g);
@@ -3441,345 +3424,6 @@ struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g,
 		return NULL;
 }
 
-#ifdef CONFIG_DEBUG_FS
-
-/* Get the next element in the ring buffer of profile entries
- * and grab a reference to the structure
- */
-struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g)
-{
-	struct fifo_gk20a *f = &g->fifo;
-	struct fifo_profile_gk20a *profile;
-	unsigned int index;
-
-	/* If kref is zero, profiling is not enabled */
-	if (!kref_get_unless_zero(&f->profile.ref))
-		return NULL;
-	index = atomic_inc_return(&f->profile.get);
-	profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES];
-
-	return profile;
-}
-
-/* Free the reference to the structure. This allows deferred cleanups */
-void gk20a_fifo_profile_release(struct gk20a *g,
-					struct fifo_profile_gk20a *profile)
-{
-	kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
-}
-
-static void *gk20a_fifo_sched_debugfs_seq_start(
-		struct seq_file *s, loff_t *pos)
-{
-	struct gk20a *g = s->private;
-	struct fifo_gk20a *f = &g->fifo;
-
-	if (*pos >= f->num_channels)
-		return NULL;
-
-	return &f->channel[*pos];
-}
-
-static void *gk20a_fifo_sched_debugfs_seq_next(
-		struct seq_file *s, void *v, loff_t *pos)
-{
-	struct gk20a *g = s->private;
-	struct fifo_gk20a *f = &g->fifo;
-
-	++(*pos);
-	if (*pos >= f->num_channels)
-		return NULL;
-
-	return &f->channel[*pos];
-}
-
-static void gk20a_fifo_sched_debugfs_seq_stop(
-		struct seq_file *s, void *v)
-{
-}
-
-static int gk20a_fifo_sched_debugfs_seq_show(
-		struct seq_file *s, void *v)
-{
-	struct gk20a *g = s->private;
-	struct fifo_gk20a *f = &g->fifo;
-	struct channel_gk20a *ch = v;
-	struct tsg_gk20a *tsg = NULL;
-
-	struct fifo_engine_info_gk20a *engine_info;
-	struct fifo_runlist_info_gk20a *runlist;
-	u32 runlist_id;
-	int ret = SEQ_SKIP;
-	u32 engine_id;
-
-	engine_id = gk20a_fifo_get_gr_engine_id(g);
-	engine_info = (f->engine_info + engine_id);
-	runlist_id = engine_info->runlist_id;
-	runlist = &f->runlist_info[runlist_id];
-
-	if (ch == f->channel) {
-		seq_puts(s, "chid     tsgid    pid      timeslice  timeout  interleave graphics_preempt compute_preempt\n");
-		seq_puts(s, "                            (usecs)   (msecs)\n");
-		ret = 0;
-	}
-
-	if (!test_bit(ch->hw_chid, runlist->active_channels))
-		return ret;
-
-	if (gk20a_channel_get(ch)) {
-		if (gk20a_is_channel_marked_as_tsg(ch))
-			tsg = &f->tsg[ch->tsgid];
-
-		seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n",
-				ch->hw_chid,
-				ch->tsgid,
-				ch->tgid,
-				tsg ? tsg->timeslice_us : ch->timeslice_us,
-				ch->timeout_ms_max,
-				tsg ? tsg->interleave_level : ch->interleave_level,
-				ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->graphics_preempt_mode : U32_MAX,
-				ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->compute_preempt_mode : U32_MAX);
-		gk20a_channel_put(ch);
-	}
-	return 0;
-}
-
-static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = {
-	.start = gk20a_fifo_sched_debugfs_seq_start,
-	.next = gk20a_fifo_sched_debugfs_seq_next,
-	.stop = gk20a_fifo_sched_debugfs_seq_stop,
-	.show = gk20a_fifo_sched_debugfs_seq_show
-};
-
-static int gk20a_fifo_sched_debugfs_open(struct inode *inode,
-	struct file *file)
-{
-	int err;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops);
-	if (err)
-		return err;
-
-	gk20a_dbg(gpu_dbg_info, "i_private=%p", inode->i_private);
-
-	((struct seq_file *)file->private_data)->private = inode->i_private;
-	return 0;
-};
-
-/*
- * The file operations structure contains our open function along with
- * set of the canned seq_ ops.
- */
-static const struct file_operations gk20a_fifo_sched_debugfs_fops = {
-	.owner = THIS_MODULE,
-	.open = gk20a_fifo_sched_debugfs_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release
-};
-
-static void __gk20a_fifo_profile_free(struct kref *ref)
-{
-	struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a,
-						profile.ref);
-	nvgpu_vfree(f->g, f->profile.data);
-	nvgpu_vfree(f->g, f->profile.sorted);
-}
-
-static int gk20a_fifo_profile_enable(void *data, u64 val)
-{
-	struct gk20a *g = (struct gk20a *) data;
-	struct fifo_gk20a *f = &g->fifo;
-
-
-	nvgpu_mutex_acquire(&f->profile.lock);
-	if (val == 0) {
-		if (f->profile.enabled) {
-			f->profile.enabled = false;
-			kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
-		}
-	} else {
-		if (!f->profile.enabled) {
-			/* not kref init as it can have a running condition if
-			 * we enable/disable/enable while kickoff is happening
-			 */
-			if (!kref_get_unless_zero(&f->profile.ref)) {
-				f->profile.data = vzalloc(
-							FIFO_PROFILING_ENTRIES *
-					sizeof(struct fifo_profile_gk20a));
-				f->profile.sorted  = vzalloc(
-							FIFO_PROFILING_ENTRIES *
-							sizeof(u64));
-				if (!(f->profile.data && f->profile.sorted)) {
-					nvgpu_vfree(g, f->profile.data);
-					nvgpu_vfree(g, f->profile.sorted);
-					nvgpu_mutex_release(&f->profile.lock);
-					return -ENOMEM;
-				}
-				kref_init(&f->profile.ref);
-			}
-			atomic_set(&f->profile.get, 0);
-			f->profile.enabled = true;
-		}
-	}
-	nvgpu_mutex_release(&f->profile.lock);
-
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(
-	gk20a_fifo_profile_enable_debugfs_fops,
-	NULL,
-	gk20a_fifo_profile_enable,
-	"%llu\n"
-);
-
-static int __profile_cmp(const void *a, const void *b)
-{
-	return *((unsigned long long *) a) - *((unsigned long long *) b);
-}
-
-/*
- * This uses about 800b in the stack, but the function using it is not part
- * of a callstack where much memory is being used, so it is fine
- */
-#define PERCENTILE_WIDTH	5
-#define PERCENTILE_RANGES	(100/PERCENTILE_WIDTH)
-
-static unsigned int __gk20a_fifo_create_stats(struct gk20a *g,
-		u64 *percentiles, u32 index_end, u32 index_start)
-{
-	unsigned int nelem = 0;
-	unsigned int index;
-	struct fifo_profile_gk20a *profile;
-
-	for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) {
-		profile = &g->fifo.profile.data[index];
-
-		if (profile->timestamp[index_end] >
-				profile->timestamp[index_start]) {
-			/* This is a valid element */
-			g->fifo.profile.sorted[nelem] =
-						profile->timestamp[index_end] -
-						profile->timestamp[index_start];
-			nelem++;
-		}
-	}
-
-	/* sort it */
-	sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long),
-		__profile_cmp, NULL);
-
-	/* build ranges */
-	for (index = 0; index < PERCENTILE_RANGES; index++)
-		percentiles[index] =
-			g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) *
-						nelem)/100 - 1];
-	return nelem;
-}
-
-static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
-{
-	struct gk20a *g = s->private;
-	unsigned int get, nelem, index;
-	/*
-	 * 800B in the stack, but function is declared statically and only
-	 * called from debugfs handler
-	 */
-	u64 percentiles_ioctl[PERCENTILE_RANGES];
-	u64 percentiles_kickoff[PERCENTILE_RANGES];
-	u64 percentiles_jobtracking[PERCENTILE_RANGES];
-	u64 percentiles_append[PERCENTILE_RANGES];
-	u64 percentiles_userd[PERCENTILE_RANGES];
-
-	if (!kref_get_unless_zero(&g->fifo.profile.ref)) {
-		seq_printf(s, "Profiling disabled\n");
-		return 0;
-	}
-
-	get = atomic_read(&g->fifo.profile.get);
-
-	__gk20a_fifo_create_stats(g, percentiles_ioctl,
-		PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY);
-	__gk20a_fifo_create_stats(g, percentiles_kickoff,
-		PROFILE_END, PROFILE_ENTRY);
-	__gk20a_fifo_create_stats(g, percentiles_jobtracking,
-		PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY);
-	__gk20a_fifo_create_stats(g, percentiles_append,
-		PROFILE_APPEND, PROFILE_JOB_TRACKING);
-	nelem = __gk20a_fifo_create_stats(g, percentiles_userd,
-		PROFILE_END, PROFILE_APPEND);
-
-	seq_printf(s, "Number of kickoffs: %d\n", nelem);
-	seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n");
-
-	for (index = 0; index < PERCENTILE_RANGES; index++)
-		seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n",
-			PERCENTILE_WIDTH * (index+1),
-			percentiles_ioctl[index],
-			percentiles_kickoff[index],
-			percentiles_append[index],
-			percentiles_jobtracking[index],
-			percentiles_userd[index]);
-
-	kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
-
-	return 0;
-}
-
-static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, gk20a_fifo_profile_stats, inode->i_private);
-}
-
-static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = {
-	.open		= gk20a_fifo_profile_stats_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-
-void gk20a_fifo_debugfs_init(struct device *dev)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	struct gk20a *g = get_gk20a(dev);
-
-	struct dentry *gpu_root = platform->debugfs;
-	struct dentry *fifo_root;
-	struct dentry *profile_root;
-
-
-	fifo_root = debugfs_create_dir("fifo", gpu_root);
-	if (IS_ERR_OR_NULL(fifo_root))
-		return;
-
-	gk20a_dbg(gpu_dbg_info, "g=%p", g);
-
-	debugfs_create_file("sched", 0600, fifo_root, g,
-		&gk20a_fifo_sched_debugfs_fops);
-
-	profile_root = debugfs_create_dir("profile", fifo_root);
-	if (IS_ERR_OR_NULL(profile_root))
-		return;
-
-	nvgpu_mutex_init(&g->fifo.profile.lock);
-	g->fifo.profile.enabled = false;
-	atomic_set(&g->fifo.profile.get, 0);
-	atomic_set(&g->fifo.profile.ref.refcount, 0);
-
-	debugfs_create_file("enable", 0600, profile_root, g,
-		&gk20a_fifo_profile_enable_debugfs_fops);
-
-	debugfs_create_file("stats", 0600, profile_root, g,
-		&gk20a_fifo_profile_stats_debugfs_fops);
-
-}
-#endif /* CONFIG_DEBUG_FS */
-
 static const char * const ccsr_chan_status_str[] = {
 	"idle",
 	"pending",
@@ -3901,6 +3545,54 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g,
 	gk20a_debug_output(o, "\n");
 }
 
+void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
+		 struct gk20a_debug_output *o)
+{
+	struct fifo_gk20a *f = &g->fifo;
+	u32 chid;
+	struct ch_state **ch_state;
+
+	ch_state = nvgpu_kzalloc(g, sizeof(*ch_state) * f->num_channels);
+	if (!ch_state) {
+		gk20a_debug_output(o, "cannot alloc memory for channels\n");
+		return;
+	}
+
+	for (chid = 0; chid < f->num_channels; chid++) {
+		struct channel_gk20a *ch = &f->channel[chid];
+		if (gk20a_channel_get(ch)) {
+			ch_state[chid] =
+				nvgpu_kmalloc(g, sizeof(struct ch_state) +
+					ram_in_alloc_size_v());
+			/* ref taken stays to below loop with
+			 * successful allocs */
+			if (!ch_state[chid])
+				gk20a_channel_put(ch);
+		}
+	}
+
+	for (chid = 0; chid < f->num_channels; chid++) {
+		struct channel_gk20a *ch = &f->channel[chid];
+		if (!ch_state[chid])
+			continue;
+
+		ch_state[chid]->pid = ch->pid;
+		ch_state[chid]->refs = atomic_read(&ch->ref_count);
+		nvgpu_mem_rd_n(g, &ch->inst_block, 0,
+				&ch_state[chid]->inst_block[0],
+				ram_in_alloc_size_v());
+		gk20a_channel_put(ch);
+	}
+	for (chid = 0; chid < f->num_channels; chid++) {
+		if (ch_state[chid]) {
+			g->ops.fifo.dump_channel_status_ramfc(g, o, chid,
+						 ch_state[chid]);
+			nvgpu_kfree(g, ch_state[chid]);
+		}
+	}
+	nvgpu_kfree(g, ch_state);
+}
+
 void gk20a_dump_pbdma_status(struct gk20a *g,
 				 struct gk20a_debug_output *o)
 {
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index 6c8868a2..228e5130 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -23,10 +23,11 @@
 
 #include "channel_gk20a.h"
 #include "tsg_gk20a.h"
-#include "debug_gk20a.h"
 
 #include <nvgpu/kref.h>
 
+struct gk20a_debug_output;
+
 #define MAX_RUNLIST_BUFFERS		2
 
 #define FIFO_INVAL_ENGINE_ID		((u32)~0)
@@ -287,8 +288,6 @@ int gk20a_fifo_set_runlist_interleave(struct gk20a *g,
 int gk20a_fifo_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice);
 
 
-void gk20a_fifo_debugfs_init(struct device *dev);
-
 const char *gk20a_fifo_interleave_level_name(u32 interleave_level);
 
 int gk20a_fifo_engine_enum_from_type(struct gk20a *g, u32 engine_type,
@@ -341,6 +340,8 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g,
 				     struct gk20a_debug_output *o,
 				     u32 hw_chid,
 				     struct ch_state *ch_state);
+void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
+		 struct gk20a_debug_output *o);
 void gk20a_dump_pbdma_status(struct gk20a *g,
 				 struct gk20a_debug_output *o);
 void gk20a_dump_eng_status(struct gk20a *g,
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 689fafb1..899c1d6a 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -30,6 +30,7 @@ struct acr_desc;
 struct nvgpu_mem_alloc_tracker;
 struct dbg_profiler_object_data;
 struct ecc_gk20a;
+struct gk20a_debug_output;
 
 #include <linux/sched.h>
 #include <nvgpu/lock.h>
@@ -61,7 +62,6 @@ struct ecc_gk20a;
 #include "therm_gk20a.h"
 #include "gm20b/acr_gm20b.h"
 #include "cde_gk20a.h"
-#include "debug_gk20a.h"
 #include "sched_gk20a.h"
 #ifdef CONFIG_ARCH_TEGRA_18x_SOC
 #include "clk/clk.h"
@@ -1544,10 +1544,6 @@ void nvgpu_wait_for_deferred_interrupts(struct gk20a *g);
 struct gk20a * __must_check gk20a_get(struct gk20a *g);
 void gk20a_put(struct gk20a *g);
 
-#ifdef CONFIG_DEBUG_FS
-int gk20a_railgating_debugfs_init(struct device *dev);
-#endif
-
 static inline bool gk20a_platform_has_syncpoints(struct gk20a *g)
 {
 #ifdef CONFIG_TEGRA_GK20A_NVHOST
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 2188618c..982cfac8 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -30,6 +30,7 @@
 #include <nvgpu/bug.h>
 #include <nvgpu/firmware.h>
 #include <nvgpu/enabled.h>
+#include <nvgpu/debug.h>
 
 #include "gk20a.h"
 #include "kind_gk20a.h"
@@ -37,13 +38,8 @@
 #include "gr_pri_gk20a.h"
 #include "regops_gk20a.h"
 #include "dbg_gpu_gk20a.h"
-#include "debug_gk20a.h"
 #include "ctxsw_trace_gk20a.h"
 
-#ifdef CONFIG_DEBUG_FS
-#include "platform_gk20a.h"
-#endif
-
 #include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
@@ -514,7 +510,7 @@ int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
 		nvgpu_err(g,
 			   "timeout waiting on ucode response");
 		gk20a_fecs_dump_falcon_stats(g);
-		gk20a_gr_debug_dump(g->dev);
+		gk20a_gr_debug_dump(g);
 		return -1;
 	} else if (check == WAIT_UCODE_ERROR) {
 		nvgpu_err(g,
@@ -9032,20 +9028,6 @@ static int gr_gk20a_dump_gr_status_regs(struct gk20a *g,
 	return 0;
 }
 
-#ifdef CONFIG_DEBUG_FS
-int gr_gk20a_debugfs_init(struct gk20a *g)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
-
-	g->debugfs_gr_default_attrib_cb_size =
-		debugfs_create_u32("gr_default_attrib_cb_size",
-				   S_IRUGO|S_IWUSR, platform->debugfs,
-				   &g->gr.attrib_cb_default_size);
-
-	return 0;
-}
-#endif
-
 static void gr_gk20a_init_cyclestats(struct gk20a *g)
 {
 #if defined(CONFIG_GK20A_CYCLE_STATS)
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 79aeb42f..deb8ea9c 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -653,7 +653,6 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
 void gr_gk20a_free_gr_ctx(struct gk20a *g,
 			  struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx);
 int gr_gk20a_halt_pipe(struct gk20a *g);
-int gr_gk20a_debugfs_init(struct gk20a *g);
 
 #if defined(CONFIG_GK20A_CYCLE_STATS)
 int gr_gk20a_css_attach(struct channel_gk20a *ch,   /* in - main hw structure */
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
index 8a3beb39..b19398a6 100644
--- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
@@ -37,6 +37,7 @@
 #include "pramin_gk20a.h"
 #include "priv_ring_gk20a.h"
 
+#include <nvgpu/debug.h>
 #include <nvgpu/log.h>
 #include <nvgpu/bug.h>
 
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 53d22a7d..08e2e9cc 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -2563,13 +2563,13 @@ priv_exist_or_err:
 	return 0;
 }
 
-int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct device *dev,
+int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
 			   u64 offset, struct gk20a_buffer_state **state)
 {
 	int err = 0;
 	struct gk20a_dmabuf_priv *priv;
 	struct gk20a_buffer_state *s;
-	struct gk20a *g = get_gk20a(dev);
+	struct device *dev = g->dev;
 
 	if (WARN_ON(offset >= (u64)dmabuf->size))
 		return -EINVAL;
@@ -3123,18 +3123,6 @@ static bool gk20a_mm_is_bar1_supported(struct gk20a *g)
 	return true;
 }
 
-#ifdef CONFIG_DEBUG_FS
-void gk20a_mm_debugfs_init(struct device *dev)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	struct dentry *gpu_root = platform->debugfs;
-	struct gk20a *g = gk20a_get_platform(dev)->g;
-
-	debugfs_create_bool("force_pramin", 0664, gpu_root,
-			   &g->mm.force_pramin);
-}
-#endif
-
 void gk20a_init_mm(struct gpu_ops *gops)
 {
 	gops->mm.gmmu_map = gk20a_locked_gmmu_map;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 79b55371..5d90cbf6 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -146,7 +146,6 @@ struct channel_gk20a;
 int gk20a_init_mm_support(struct gk20a *g);
 int gk20a_init_mm_setup_sw(struct gk20a *g);
 int gk20a_init_mm_setup_hw(struct gk20a *g);
-void gk20a_mm_debugfs_init(struct device *dev);
 void gk20a_init_mm_ce_context(struct gk20a *g);
 
 int gk20a_mm_fb_flush(struct gk20a *g);
@@ -437,7 +436,7 @@ dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr);
 
 int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev);
 
-int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct device *dev,
+int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
 			   u64 offset, struct gk20a_buffer_state **state);
 
 int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry);
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index a9e03943..552d5d73 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -35,12 +35,6 @@
 #include "nvgpu_gpuid_t19x.h"
 #endif
 
-#ifdef CONFIG_DEBUG_FS
-#include <linux/debugfs.h>
-#include <linux/uaccess.h>
-#include "platform_gk20a.h"
-#endif
-
 #define GK20A_PMU_UCODE_IMAGE	"gpmu_ucode.bin"
 
 #define PMU_MEM_SCRUBBING_TIMEOUT_MAX 1000
@@ -49,7 +43,7 @@
 #define gk20a_dbg_pmu(fmt, arg...) \
 	gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
 
-static int gk20a_pmu_get_pg_stats(struct gk20a *g,
+int gk20a_pmu_get_pg_stats(struct gk20a *g,
 		u32 pg_engine_id,
 		struct pmu_pg_stats_data *pg_stat_data);
 static void ap_callback_init_and_enable_ctrl(
@@ -281,7 +275,7 @@ static void set_pmu_cmdline_args_falctracesize_v1(
 	pmu->args_v1.falc_trace_size = size;
 }
 
-static bool find_hex_in_string(char *strings, struct gk20a *g, u32 *hex_pos)
+bool nvgpu_find_hex_in_string(char *strings, struct gk20a *g, u32 *hex_pos)
 {
 	u32 i = 0, j = strlen(strings);
 	for (; i < j; i++) {
@@ -326,7 +320,7 @@ static void printtrace(struct pmu_gk20a *pmu)
 		count = scnprintf(buf, 0x40, "Index %x: ", trace1[(i / 4)]);
 		l = 0;
 		m = 0;
-		while (find_hex_in_string((trace+i+20+m), g, &k)) {
+		while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) {
 			if (k >= 40)
 				break;
 			strncpy(part_str, (trace+i+20+m), k);
@@ -4141,7 +4135,7 @@ void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries)
 		nvgpu_err(g, "ZBC save timeout");
 }
 
-static int pmu_perfmon_start_sampling(struct pmu_gk20a *pmu)
+int nvgpu_pmu_perfmon_start_sampling(struct pmu_gk20a *pmu)
 {
 	struct gk20a *g = gk20a_from_pmu(pmu);
 	struct pmu_v *pv = &g->ops.pmu_ver;
@@ -4185,7 +4179,7 @@ static int pmu_perfmon_start_sampling(struct pmu_gk20a *pmu)
 	return 0;
 }
 
-static int pmu_perfmon_stop_sampling(struct pmu_gk20a *pmu)
+int nvgpu_pmu_perfmon_stop_sampling(struct pmu_gk20a *pmu)
 {
 	struct gk20a *g = gk20a_from_pmu(pmu);
 	struct pmu_cmd cmd;
@@ -4231,7 +4225,7 @@ static int pmu_handle_perfmon_event(struct pmu_gk20a *pmu,
 
 	/* restart sampling */
 	if (pmu->perfmon_sampling_enabled)
-		return pmu_perfmon_start_sampling(pmu);
+		return nvgpu_pmu_perfmon_start_sampling(pmu);
 	return 0;
 }
 
@@ -5173,9 +5167,9 @@ int gk20a_pmu_perfmon_enable(struct gk20a *g, bool enable)
 	gk20a_dbg_fn("");
 
 	if (enable)
-		err = pmu_perfmon_start_sampling(pmu);
+		err = nvgpu_pmu_perfmon_start_sampling(pmu);
 	else
-		err = pmu_perfmon_stop_sampling(pmu);
+		err = nvgpu_pmu_perfmon_stop_sampling(pmu);
 
 	return err;
 }
@@ -5293,7 +5287,7 @@ void gk20a_pmu_elpg_statistics(struct gk20a *g, u32 pg_engine_id,
 	pg_stat_data->avg_exit_latency_us = stats.pg_avg_exit_time_us;
 }
 
-static int gk20a_pmu_get_pg_stats(struct gk20a *g,
+int gk20a_pmu_get_pg_stats(struct gk20a *g,
 		u32 pg_engine_id,
 		struct pmu_pg_stats_data *pg_stat_data)
 {
@@ -5463,466 +5457,3 @@ int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id)
 	status = gk20a_pmu_ap_send_command(g, &ap_cmd, true);
 	return status;
 }
-
-#ifdef CONFIG_DEBUG_FS
-static int lpwr_debug_show(struct seq_file *s, void *data)
-{
-	struct gk20a *g = s->private;
-
-	if (g->ops.pmu.pmu_pg_engines_feature_list &&
-		g->ops.pmu.pmu_pg_engines_feature_list(g,
-		PMU_PG_ELPG_ENGINE_ID_GRAPHICS) !=
-		PMU_PG_FEATURE_GR_POWER_GATING_ENABLED) {
-		seq_printf(s, "PSTATE: %u\n"
-			"RPPG Enabled: %u\n"
-			"RPPG ref count: %u\n"
-			"RPPG state: %u\n"
-			"MSCG Enabled: %u\n"
-			"MSCG pstate state: %u\n"
-			"MSCG transition state: %u\n",
-			g->ops.clk_arb.get_current_pstate(g),
-			g->elpg_enabled, g->pmu.elpg_refcnt,
-			g->pmu.elpg_stat, g->mscg_enabled,
-			g->pmu.mscg_stat, g->pmu.mscg_transition_state);
-
-	} else
-		seq_printf(s, "ELPG Enabled: %u\n"
-			"ELPG ref count: %u\n"
-			"ELPG state: %u\n",
-			g->elpg_enabled, g->pmu.elpg_refcnt,
-			g->pmu.elpg_stat);
-
-	return 0;
-
-}
-
-static int lpwr_debug_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, lpwr_debug_show, inode->i_private);
-}
-
-static const struct file_operations lpwr_debug_fops = {
-	.open		= lpwr_debug_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int mscg_stat_show(struct seq_file *s, void *data)
-{
-	struct gk20a *g = s->private;
-	u64 total_ingating, total_ungating, residency, divisor, dividend;
-	struct pmu_pg_stats_data pg_stat_data = { 0 };
-	int err;
-
-	/* Don't unnecessarily power on the device */
-	if (g->power_on) {
-		err = gk20a_busy(g);
-		if (err)
-			return err;
-
-		gk20a_pmu_get_pg_stats(g,
-			PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
-		gk20a_idle(g);
-	}
-	total_ingating = g->pg_ingating_time_us +
-			(u64)pg_stat_data.ingating_time;
-	total_ungating = g->pg_ungating_time_us +
-			(u64)pg_stat_data.ungating_time;
-
-	divisor = total_ingating + total_ungating;
-
-	/* We compute the residency on a scale of 1000 */
-	dividend = total_ingating * 1000;
-
-	if (divisor)
-		residency = div64_u64(dividend, divisor);
-	else
-		residency = 0;
-
-	seq_printf(s,
-			"Time in MSCG: %llu us\n"
-			"Time out of MSCG: %llu us\n"
-			"MSCG residency ratio: %llu\n"
-			"MSCG Entry Count: %u\n"
-			"MSCG Avg Entry latency %u\n"
-			"MSCG Avg Exit latency %u\n",
-			total_ingating, total_ungating,
-			residency, pg_stat_data.gating_cnt,
-			pg_stat_data.avg_entry_latency_us,
-			pg_stat_data.avg_exit_latency_us);
-	return 0;
-
-}
-
-static int mscg_stat_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, mscg_stat_show, inode->i_private);
-}
-
-static const struct file_operations mscg_stat_fops = {
-	.open		= mscg_stat_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int mscg_transitions_show(struct seq_file *s, void *data)
-{
-	struct gk20a *g = s->private;
-	struct pmu_pg_stats_data pg_stat_data = { 0 };
-	u32 total_gating_cnt;
-	int err;
-
-	if (g->power_on) {
-		err = gk20a_busy(g);
-		if (err)
-			return err;
-
-		gk20a_pmu_get_pg_stats(g,
-			PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
-		gk20a_idle(g);
-	}
-	total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
-
-	seq_printf(s, "%u\n", total_gating_cnt);
-	return 0;
-
-}
-
-static int mscg_transitions_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, mscg_transitions_show, inode->i_private);
-}
-
-static const struct file_operations mscg_transitions_fops = {
-	.open		= mscg_transitions_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int elpg_stat_show(struct seq_file *s, void *data)
-{
-	struct gk20a *g = s->private;
-	struct pmu_pg_stats_data pg_stat_data = { 0 };
-	u64 total_ingating, total_ungating, residency, divisor, dividend;
-	int err;
-
-	/* Don't unnecessarily power on the device */
-	if (g->power_on) {
-		err = gk20a_busy(g);
-		if (err)
-			return err;
-
-		gk20a_pmu_get_pg_stats(g,
-			PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
-		gk20a_idle(g);
-	}
-	total_ingating = g->pg_ingating_time_us +
-			(u64)pg_stat_data.ingating_time;
-	total_ungating = g->pg_ungating_time_us +
-			(u64)pg_stat_data.ungating_time;
-	divisor = total_ingating + total_ungating;
-
-	/* We compute the residency on a scale of 1000 */
-	dividend = total_ingating * 1000;
-
-	if (divisor)
-		residency = div64_u64(dividend, divisor);
-	else
-		residency = 0;
-
-	seq_printf(s,
-			"Time in ELPG: %llu us\n"
-			"Time out of ELPG: %llu us\n"
-			"ELPG residency ratio: %llu\n"
-			"ELPG Entry Count: %u\n"
-			"ELPG Avg Entry latency %u us\n"
-			"ELPG Avg Exit latency %u us\n",
-			total_ingating, total_ungating,
-			residency, pg_stat_data.gating_cnt,
-			pg_stat_data.avg_entry_latency_us,
-			pg_stat_data.avg_exit_latency_us);
-	return 0;
-
-}
-
-static int elpg_stat_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, elpg_stat_show, inode->i_private);
-}
-
-static const struct file_operations elpg_stat_fops = {
-	.open		= elpg_stat_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int elpg_transitions_show(struct seq_file *s, void *data)
-{
-	struct gk20a *g = s->private;
-	struct pmu_pg_stats_data pg_stat_data = { 0 };
-	u32 total_gating_cnt;
-	int err;
-
-	if (g->power_on) {
-		err = gk20a_busy(g);
-		if (err)
-			return err;
-
-		gk20a_pmu_get_pg_stats(g,
-			PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
-		gk20a_idle(g);
-	}
-	total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
-
-	seq_printf(s, "%u\n", total_gating_cnt);
-	return 0;
-
-}
-
-static int elpg_transitions_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, elpg_transitions_show, inode->i_private);
-}
-
-static const struct file_operations elpg_transitions_fops = {
-	.open		= elpg_transitions_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int falc_trace_show(struct seq_file *s, void *data)
-{
-	struct gk20a *g = s->private;
-	struct pmu_gk20a *pmu = &g->pmu;
-	u32 i = 0, j = 0, k, l, m;
-	char part_str[40];
-	void *tracebuffer;
-	char *trace;
-	u32 *trace1;
-
-	/* allocate system memory to copy pmu trace buffer */
-	tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE);
-	if (tracebuffer == NULL)
-		return -ENOMEM;
-
-	/* read pmu traces into system memory buffer */
-	nvgpu_mem_rd_n(g, &pmu->trace_buf,
-		       0, tracebuffer, GK20A_PMU_TRACE_BUFSIZE);
-
-	trace = (char *)tracebuffer;
-	trace1 = (u32 *)tracebuffer;
-
-	for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) {
-		for (j = 0; j < 0x40; j++)
-			if (trace1[(i / 4) + j])
-				break;
-		if (j == 0x40)
-			break;
-		seq_printf(s, "Index %x: ", trace1[(i / 4)]);
-		l = 0;
-		m = 0;
-		while (find_hex_in_string((trace+i+20+m), g, &k)) {
-			if (k >= 40)
-				break;
-			strncpy(part_str, (trace+i+20+m), k);
-			part_str[k] = 0;
-			seq_printf(s, "%s0x%x", part_str,
-					trace1[(i / 4) + 1 + l]);
-			l++;
-			m += k + 2;
-		}
-		seq_printf(s, "%s", (trace+i+20+m));
-	}
-
-	nvgpu_kfree(g, tracebuffer);
-	return 0;
-}
-
-static int falc_trace_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, falc_trace_show, inode->i_private);
-}
-
-static const struct file_operations falc_trace_fops = {
-	.open		= falc_trace_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int perfmon_events_enable_show(struct seq_file *s, void *data)
-{
-	struct gk20a *g = s->private;
-
-	seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0);
-	return 0;
-
-}
-
-static int perfmon_events_enable_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, perfmon_events_enable_show, inode->i_private);
-}
-
-static ssize_t perfmon_events_enable_write(struct file *file,
-	const char __user *userbuf, size_t count, loff_t *ppos)
-{
-	struct seq_file *s = file->private_data;
-	struct gk20a *g = s->private;
-	unsigned long val = 0;
-	char buf[40];
-	int buf_size;
-	int err;
-
-	memset(buf, 0, sizeof(buf));
-	buf_size = min(count, (sizeof(buf)-1));
-
-	if (copy_from_user(buf, userbuf, buf_size))
-		return -EFAULT;
-
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	/* Don't turn on gk20a unnecessarily */
-	if (g->power_on) {
-		err = gk20a_busy(g);
-		if (err)
-			return err;
-
-		if (val && !g->pmu.perfmon_sampling_enabled) {
-			g->pmu.perfmon_sampling_enabled = true;
-			pmu_perfmon_start_sampling(&(g->pmu));
-		} else if (!val && g->pmu.perfmon_sampling_enabled) {
-			g->pmu.perfmon_sampling_enabled = false;
-			pmu_perfmon_stop_sampling(&(g->pmu));
-		}
-		gk20a_idle(g);
-	} else {
-		g->pmu.perfmon_sampling_enabled = val ? true : false;
-	}
-
-	return count;
-}
-
-static const struct file_operations perfmon_events_enable_fops = {
-	.open		= perfmon_events_enable_open,
-	.read		= seq_read,
-	.write		= perfmon_events_enable_write,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int perfmon_events_count_show(struct seq_file *s, void *data)
-{
-	struct gk20a *g = s->private;
-
-	seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt);
-	return 0;
-
-}
-
-static int perfmon_events_count_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, perfmon_events_count_show, inode->i_private);
-}
-
-static const struct file_operations perfmon_events_count_fops = {
-	.open		= perfmon_events_count_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int security_show(struct seq_file *s, void *data)
-{
-	struct gk20a *g = s->private;
-
-	seq_printf(s, "%d\n", g->pmu.pmu_mode);
-	return 0;
-
-}
-
-static int security_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, security_show, inode->i_private);
-}
-
-static const struct file_operations security_fops = {
-	.open		= security_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-int gk20a_pmu_debugfs_init(struct device *dev)
-{
-	struct dentry *d;
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	struct gk20a *g = get_gk20a(dev);
-
-	d = debugfs_create_file(
-		"lpwr_debug", S_IRUGO|S_IWUSR, platform->debugfs, g,
-						&lpwr_debug_fops);
-	if (!d)
-		goto err_out;
-
-	d = debugfs_create_file(
-		"mscg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
-						&mscg_stat_fops);
-	if (!d)
-		goto err_out;
-
-	d = debugfs_create_file(
-		"mscg_transitions", S_IRUGO, platform->debugfs, g,
-						&mscg_transitions_fops);
-	if (!d)
-		goto err_out;
-
-	d = debugfs_create_file(
-		"elpg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
-						&elpg_stat_fops);
-	if (!d)
-		goto err_out;
-
-	d = debugfs_create_file(
-		"elpg_transitions", S_IRUGO, platform->debugfs, g,
-						&elpg_transitions_fops);
-	if (!d)
-		goto err_out;
-
-	d = debugfs_create_file(
-		"falc_trace", S_IRUGO, platform->debugfs, g,
-						&falc_trace_fops);
-	if (!d)
-		goto err_out;
-
-	d = debugfs_create_file(
-		"perfmon_events_enable", S_IRUGO, platform->debugfs, g,
-						&perfmon_events_enable_fops);
-	if (!d)
-		goto err_out;
-
-	d = debugfs_create_file(
-		"perfmon_events_count", S_IRUGO, platform->debugfs, g,
-						&perfmon_events_count_fops);
-	if (!d)
-		goto err_out;
-
-	d = debugfs_create_file(
-		"pmu_security", S_IRUGO, platform->debugfs, g,
-						&security_fops);
-	if (!d)
-		goto err_out;
-	return 0;
-err_out:
-	pr_err("%s: Failed to make debugfs node\n", __func__);
-	debugfs_remove_recursive(platform->debugfs);
-	return -ENOMEM;
-}
-
-#endif
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
index e7a8b7c2..cefb6577 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
@@ -426,7 +426,6 @@ int pmu_mutex_release(struct pmu_gk20a *pmu, u32 id, u32 *token);
 int gk20a_pmu_destroy(struct gk20a *g);
 int gk20a_pmu_load_norm(struct gk20a *g, u32 *load);
 int gk20a_pmu_load_update(struct gk20a *g);
-int gk20a_pmu_debugfs_init(struct device *dev);
 void gk20a_pmu_reset_load_counters(struct gk20a *g);
 void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
 		u32 *total_cycles);
@@ -468,5 +467,11 @@ int gk20a_pmu_vidmem_surface_alloc(struct gk20a *g, struct nvgpu_mem *mem,
 		u32 size);
 int gk20a_pmu_sysmem_surface_alloc(struct gk20a *g, struct nvgpu_mem *mem,
 		u32 size);
+int gk20a_pmu_get_pg_stats(struct gk20a *g,
+		u32 pg_engine_id, struct pmu_pg_stats_data *pg_stat_data);
+bool nvgpu_find_hex_in_string(char *strings, struct gk20a *g, u32 *hex_pos);
+
+int nvgpu_pmu_perfmon_start_sampling(struct pmu_gk20a *pmu);
+int nvgpu_pmu_perfmon_stop_sampling(struct pmu_gk20a *pmu);
 
 #endif /*__PMU_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/sched_gk20a.c b/drivers/gpu/nvgpu/gk20a/sched_gk20a.c
index b7edf3f0..3f3119af 100644
--- a/drivers/gpu/nvgpu/gk20a/sched_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sched_gk20a.c
@@ -13,10 +13,6 @@
 
 #include <asm/barrier.h>
 #include <linux/wait.h>
-#ifdef CONFIG_DEBUG_FS
-#include <linux/debugfs.h>
-#include "platform_gk20a.h"
-#endif
 #include <linux/uaccess.h>
 #include <linux/poll.h>
 #include <uapi/linux/nvgpu.h>
@@ -523,69 +519,6 @@ int gk20a_sched_dev_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-#ifdef CONFIG_DEBUG_FS
-static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused)
-{
-	struct device *dev = s->private;
-	struct gk20a *g = gk20a_get_platform(dev)->g;
-	struct gk20a_sched_ctrl *sched = &g->sched_ctrl;
-	bool sched_busy = true;
-
-	int n = sched->bitmap_size / sizeof(u64);
-	int i;
-	int err;
-
-	err = gk20a_busy(g);
-	if (err)
-		return err;
-
-	if (nvgpu_mutex_tryacquire(&sched->busy_lock)) {
-		sched_busy = false;
-		nvgpu_mutex_release(&sched->busy_lock);
-	}
-
-	seq_printf(s, "control_locked=%d\n", sched->control_locked);
-	seq_printf(s, "busy=%d\n", sched_busy);
-	seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size);
-
-	nvgpu_mutex_acquire(&sched->status_lock);
-
-	seq_puts(s, "active_tsg_bitmap\n");
-	for (i = 0; i < n; i++)
-		seq_printf(s, "\t0x%016llx\n", sched->active_tsg_bitmap[i]);
-
-	seq_puts(s, "recent_tsg_bitmap\n");
-	for (i = 0; i < n; i++)
-		seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]);
-
-	nvgpu_mutex_release(&sched->status_lock);
-
-	gk20a_idle(g);
-
-	return 0;
-}
-
-static int gk20a_sched_debugfs_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, gk20a_sched_debugfs_show, inode->i_private);
-}
-
-static const struct file_operations gk20a_sched_debugfs_fops = {
-	.open		= gk20a_sched_debugfs_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-void gk20a_sched_debugfs_init(struct device *dev)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-
-	debugfs_create_file("sched_ctrl", S_IRUGO, platform->debugfs,
-			dev, &gk20a_sched_debugfs_fops);
-}
-#endif /* CONFIG_DEBUG_FS */
-
 void gk20a_sched_ctrl_tsg_added(struct gk20a *g, struct tsg_gk20a *tsg)
 {
 	struct gk20a_sched_ctrl *sched = &g->sched_ctrl;
diff --git a/drivers/gpu/nvgpu/gk20a/sched_gk20a.h b/drivers/gpu/nvgpu/gk20a/sched_gk20a.h
index 4f6d1510..776f689d 100644
--- a/drivers/gpu/nvgpu/gk20a/sched_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/sched_gk20a.h
@@ -48,7 +48,6 @@ void gk20a_sched_ctrl_tsg_added(struct gk20a *, struct tsg_gk20a *);
 void gk20a_sched_ctrl_tsg_removed(struct gk20a *, struct tsg_gk20a *);
 int gk20a_sched_ctrl_init(struct gk20a *);
 
-void gk20a_sched_debugfs_init(struct device *dev);
 void gk20a_sched_ctrl_cleanup(struct gk20a *g);
 
 #endif /* __SCHED_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gm20b/debug_gm20b.c b/drivers/gpu/nvgpu/gm20b/debug_gm20b.c
deleted file mode 100644
index b266200c..00000000
--- a/drivers/gpu/nvgpu/gm20b/debug_gm20b.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2015 NVIDIA Corporation.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#include "gk20a/gk20a.h"
-#include "debug_gm20b.h"
-
-void gm20b_init_debug_ops(struct gpu_ops *gops)
-{
-	gops->debug.show_dump = gk20a_debug_show_dump;
-}
diff --git a/drivers/gpu/nvgpu/gm20b/debug_gm20b.h b/drivers/gpu/nvgpu/gm20b/debug_gm20b.h
deleted file mode 100644
index c3c5fed6..00000000
--- a/drivers/gpu/nvgpu/gm20b/debug_gm20b.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * GM20B Debug functionality
- *
- * Copyright (C) 2015 NVIDIA CORPORATION.  All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef _DEBUG_GM20B_H_
-#define _DEBUG_GM20B_H_
-
-struct gpu_ops;
-
-void gm20b_init_debug_ops(struct gpu_ops *gops);
-
-#endif
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 82c587f9..c6e451e1 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -20,6 +20,7 @@
 #include <nvgpu/kmem.h>
 #include <nvgpu/log.h>
 #include <nvgpu/enabled.h>
+#include <nvgpu/debug.h>
 
 #include "gk20a/gk20a.h"
 #include "gk20a/gr_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index f5328f03..831fd5da 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -33,11 +33,11 @@
 #include "clk_gm20b.h"
 #include "mc_gm20b.h"
 #include "regops_gm20b.h"
-#include "debug_gm20b.h"
 #include "cde_gm20b.h"
 #include "therm_gm20b.h"
 #include "hal_gm20b.h"
 
+#include <nvgpu/debug.h>
 #include <nvgpu/bug.h>
 #include <nvgpu/enabled.h>
 
@@ -234,7 +234,7 @@ int gm20b_init_hal(struct gk20a *g)
 	gm20b_init_pmu_ops(gops);
 	gm20b_init_clk_ops(gops);
 	gm20b_init_regops(gops);
-	gm20b_init_debug_ops(gops);
+	gk20a_init_debug_ops(gops);
 	gk20a_init_dbg_session_ops(gops);
 	gm20b_init_cde_ops(gops);
 	gm20b_init_therm_ops(gops);
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index f28ff45f..d923e5e9 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -53,6 +53,7 @@
 
 #include "hal_gp106.h"
 
+#include <nvgpu/debug.h>
 #include <nvgpu/bug.h>
 
 #include <nvgpu/hw/gp106/hw_proj_gp106.h>
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 98a8be2f..9a30ad7c 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -23,6 +23,7 @@
 #include <nvgpu/gmmu.h>
 #include <nvgpu/dma.h>
 #include <nvgpu/bug.h>
+#include <nvgpu/debug.h>
 
 #include "gk20a/gk20a.h"
 #include "gk20a/gr_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index e2a931be..a1906a08 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -44,6 +44,7 @@
 #include "gp10b.h"
 #include "hal_gp10b.h"
 
+#include <nvgpu/debug.h>
 #include <nvgpu/bug.h>
 #include <nvgpu/enabled.h>
 
diff --git a/drivers/gpu/nvgpu/include/nvgpu/allocator.h b/drivers/gpu/nvgpu/include/nvgpu/allocator.h
index 3579b0fb..567c4422 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/allocator.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/allocator.h
@@ -256,11 +256,13 @@ static inline struct gk20a *nvgpu_alloc_to_gpu(struct nvgpu_allocator *a)
 	return a->g;
 }
 
+#ifdef CONFIG_DEBUG_FS
 /*
  * Common functionality for the internals of the allocators.
  */
 void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a);
 void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a);
+#endif
 
 int  __nvgpu_alloc_common_init(struct nvgpu_allocator *a, struct gk20a *g,
 			       const char *name, void *priv, bool dbg,
@@ -281,11 +283,6 @@ static inline void nvgpu_alloc_disable_dbg(struct nvgpu_allocator *a)
  */
 extern u32 nvgpu_alloc_tracing_on;
 
-#ifdef CONFIG_DEBUG_FS
-struct device;
-void nvgpu_alloc_debugfs_init(struct device *dev);
-#endif
-
 #define nvgpu_alloc_trace_func()			\
 	do {						\
 		if (nvgpu_alloc_tracing_on)		\
diff --git a/drivers/gpu/nvgpu/include/nvgpu/debug.h b/drivers/gpu/nvgpu/include/nvgpu/debug.h
new file mode 100644
index 00000000..70a03978
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/debug.h
@@ -0,0 +1,55 @@
+/*
+ * GK20A Debug functionality
+ *
+ * Copyright (C) 2011-2017 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_H__
+#define __NVGPU_DEBUG_H__
+
+struct gk20a;
+struct gpu_ops;
+
+struct gk20a_debug_output {
+	void (*fn)(void *ctx, const char *str, size_t len);
+	void *ctx;
+	char buf[256];
+};
+
+#ifdef CONFIG_DEBUG_FS
+extern unsigned int gk20a_debug_trace_cmdbuf;
+
+void gk20a_debug_output(struct gk20a_debug_output *o,
+					const char *fmt, ...);
+
+void gk20a_debug_dump(struct gk20a *g);
+void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o);
+int gk20a_gr_debug_dump(struct gk20a *g);
+void gk20a_init_debug_ops(struct gpu_ops *gops);
+
+void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink);
+void gk20a_debug_deinit(struct gk20a *g);
+#else
+static inline void gk20a_debug_output(struct gk20a_debug_output *o,
+					const char *fmt, ...) {}
+
+static inline void gk20a_debug_dump(struct gk20a *g) {}
+static inline void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o) {}
+static inline int gk20a_gr_debug_dump(struct gk20a *g) { return 0;}
+static inline void gk20a_init_debug_ops(struct gpu_ops *gops) {}
+
+static inline void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink) {}
+static inline void gk20a_debug_deinit(struct gk20a *g) {}
+#endif
+
+#endif /* __NVGPU_DEBUG_H__ */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/kmem.h b/drivers/gpu/nvgpu/include/nvgpu/linux/kmem.h
index dc198a04..611854f2 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/linux/kmem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/kmem.h
@@ -31,12 +31,6 @@ void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size,
 			    unsigned long ip);
 void  __nvgpu_track_vfree(struct gk20a *g, void *addr);
 void  __nvgpu_track_kfree(struct gk20a *g, void *addr);
-
-void nvgpu_kmem_debugfs_init(struct device *dev);
-#else
-static inline void nvgpu_kmem_debugfs_init(struct device *dev)
-{
-}
 #endif
 
 /**
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c
index 02cc5b47..cdd0d378 100644
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -22,10 +22,10 @@
 #include <nvgpu/kmem.h>
 #include <nvgpu/bug.h>
 #include <nvgpu/enabled.h>
+#include <nvgpu/debug.h>
 
 #include "vgpu/vgpu.h"
 #include "vgpu/fecs_trace_vgpu.h"
-#include "gk20a/debug_gk20a.h"
 #include "gk20a/hal_gk20a.h"
 #include "gk20a/ctxsw_trace_gk20a.h"
 #include "gk20a/tsg_gk20a.h"
@@ -667,7 +667,7 @@ int vgpu_probe(struct platform_device *pdev)
 	if (err)
 		return err;
 
-	gk20a_debug_init(dev, "gpu.0");
+	gk20a_debug_init(gk20a, "gpu.0");
 
 	/* Set DMA parameters to allow larger sgt lists */
 	dev->dma_parms = &gk20a->dma_parms;
-- 
cgit v1.2.2