From 6090a8a7ee347f92d806f104d3a0082208f5df64 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Wed, 24 May 2017 17:37:04 +0530
Subject: gpu: nvgpu: move debugfs code to linux module

Since all debugfs code is Linux specific, remove
it from common code and move it to Linux module

Debugfs code is now divided into below
module specific files :

common/linux/debug.c
common/linux/debug_cde.c
common/linux/debug_ce.c
common/linux/debug_fifo.c
common/linux/debug_gr.c
common/linux/debug_mm.c
common/linux/debug_allocator.c
common/linux/debug_kmem.c
common/linux/debug_pmu.c
common/linux/debug_sched.c

Add corresponding header files for above modules too
And compile all of above files only if CONFIG_DEBUG_FS is set

Some more details of the changes made

- Move and rename gk20a/debug_gk20a.c to common/linux/debug.c
- Move and rename gk20a/debug_gk20a.h to include/nvgpu/debug.h

- Remove gm20b/debug_gm20b.c and gm20b/debug_gm20b.h and call
  gk20a_init_debug_ops() directly from gm20b_init_hal()

- Update all debug APIs to receive struct gk20a as parameter
  instead of receiving struct device pointer
- Update API gk20a_dmabuf_get_state() to receive struct gk20a
  pointer instead of struct device

- Include <nvgpu/debug.h> explicitly in all files where debug
  operations are used
- Remove "gk20a/platform_gk20a.h" include from HAL files
  which no longer need this include

- Add new API gk20a_debug_deinit() to deinitialize debugfs
  and call it from gk20a_remove()
- Move API gk20a_debug_dump_all_channel_status_ramfc() to
  gk20a/fifo_gk20a.c

Jira NVGPU-62

Change-Id: I076975d3d7f669bdbe9212fa33d98529377feeb6
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1488902
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
---
 drivers/gpu/nvgpu/common/linux/debug.c           | 376 ++++++++++++++++++
 drivers/gpu/nvgpu/common/linux/debug_allocator.c |  80 ++++
 drivers/gpu/nvgpu/common/linux/debug_allocator.h |  21 +
 drivers/gpu/nvgpu/common/linux/debug_cde.c       |  51 +++
 drivers/gpu/nvgpu/common/linux/debug_cde.h       |  21 +
 drivers/gpu/nvgpu/common/linux/debug_ce.c        |  30 ++
 drivers/gpu/nvgpu/common/linux/debug_ce.h        |  21 +
 drivers/gpu/nvgpu/common/linux/debug_fifo.c      | 369 +++++++++++++++++
 drivers/gpu/nvgpu/common/linux/debug_fifo.h      |  22 ++
 drivers/gpu/nvgpu/common/linux/debug_gr.c        |  31 ++
 drivers/gpu/nvgpu/common/linux/debug_gr.h        |  21 +
 drivers/gpu/nvgpu/common/linux/debug_kmem.c      | 315 +++++++++++++++
 drivers/gpu/nvgpu/common/linux/debug_kmem.h      |  23 ++
 drivers/gpu/nvgpu/common/linux/debug_mm.c        |  26 ++
 drivers/gpu/nvgpu/common/linux/debug_mm.h        |  21 +
 drivers/gpu/nvgpu/common/linux/debug_pmu.c       | 479 +++++++++++++++++++++++
 drivers/gpu/nvgpu/common/linux/debug_pmu.h       |  21 +
 drivers/gpu/nvgpu/common/linux/debug_sched.c     |  79 ++++
 drivers/gpu/nvgpu/common/linux/debug_sched.h     |  21 +
 drivers/gpu/nvgpu/common/linux/driver_common.c   |   3 +-
 drivers/gpu/nvgpu/common/linux/ioctl_channel.c   |   2 +-
 drivers/gpu/nvgpu/common/linux/kmem.c            | 323 +--------------
 drivers/gpu/nvgpu/common/linux/kmem_priv.h       |   8 +
 drivers/gpu/nvgpu/common/linux/module.c          |   6 +-
 drivers/gpu/nvgpu/common/mm/bitmap_allocator.c   |   2 +
 drivers/gpu/nvgpu/common/mm/buddy_allocator.c    |   4 +
 drivers/gpu/nvgpu/common/mm/lockless_allocator.c |   4 +
 drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c    |  70 ----
 drivers/gpu/nvgpu/common/mm/page_allocator.c     |   2 +
 29 files changed, 2064 insertions(+), 388 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug.c
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_allocator.c
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_allocator.h
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_cde.c
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_cde.h
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_ce.c
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_ce.h
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_fifo.c
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_fifo.h
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_gr.c
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_gr.h
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_kmem.c
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_kmem.h
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_mm.c
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_mm.h
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_pmu.c
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_pmu.h
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_sched.c
 create mode 100644 drivers/gpu/nvgpu/common/linux/debug_sched.h

(limited to 'drivers/gpu/nvgpu/common')

diff --git a/drivers/gpu/nvgpu/common/linux/debug.c b/drivers/gpu/nvgpu/common/linux/debug.c
new file mode 100644
index 00000000..2962a467
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_cde.h"
+#include "debug_ce.h"
+#include "debug_fifo.h"
+#include "debug_gr.h"
+#include "debug_mm.h"
+#include "debug_allocator.h"
+#include "debug_kmem.h"
+#include "debug_pmu.h"
+#include "debug_sched.h"
+
+#include "gk20a/gk20a.h"
+#include "gk20a/platform_gk20a.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <nvgpu/debug.h>
+
+unsigned int gk20a_debug_trace_cmdbuf;
+
+static inline void gk20a_debug_write_printk(void *ctx, const char *str,
+					    size_t len)
+{
+	pr_info("%s", str);
+}
+
+static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str,
+						size_t len)
+{
+	seq_write((struct seq_file *)ctx, str, len);
+}
+
+void gk20a_debug_output(struct gk20a_debug_output *o,
+					const char *fmt, ...)
+{
+	va_list args;
+	int len;
+
+	va_start(args, fmt);
+	len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
+	va_end(args);
+	o->fn(o->ctx, o->buf, len);
+}
+
+static int gk20a_gr_dump_regs(struct gk20a *g,
+		struct gk20a_debug_output *o)
+{
+	if (g->ops.gr.dump_gr_regs)
+		gr_gk20a_elpg_protected_call(g, g->ops.gr.dump_gr_regs(g, o));
+
+	return 0;
+}
+
+int gk20a_gr_debug_dump(struct gk20a *g)
+{
+	struct gk20a_debug_output o = {
+		.fn = gk20a_debug_write_printk
+	};
+
+	gk20a_gr_dump_regs(g, &o);
+
+	return 0;
+}
+
+static int gk20a_gr_debug_show(struct seq_file *s, void *unused)
+{
+	struct device *dev = s->private;
+	struct gk20a *g = gk20a_get_platform(dev)->g;
+	struct gk20a_debug_output o = {
+		.fn = gk20a_debug_write_to_seqfile,
+		.ctx = s,
+	};
+	int err;
+
+	err = gk20a_busy(g);
+	if (err) {
+		nvgpu_err(g, "failed to power on gpu: %d", err);
+		return -EINVAL;
+	}
+
+	gk20a_gr_dump_regs(g, &o);
+
+	gk20a_idle(g);
+
+	return 0;
+}
+
+void gk20a_debug_dump(struct gk20a *g)
+{
+	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+	struct gk20a_debug_output o = {
+		.fn = gk20a_debug_write_printk
+	};
+
+	if (platform->dump_platform_dependencies)
+		platform->dump_platform_dependencies(g->dev);
+
+	/* HAL only initialized after 1st power-on */
+	if (g->ops.debug.show_dump)
+		g->ops.debug.show_dump(g, &o);
+}
+
+static int gk20a_debug_show(struct seq_file *s, void *unused)
+{
+	struct device *dev = s->private;
+	struct gk20a_debug_output o = {
+		.fn = gk20a_debug_write_to_seqfile,
+		.ctx = s,
+	};
+	struct gk20a *g;
+	int err;
+
+	g = gk20a_get_platform(dev)->g;
+
+	err = gk20a_busy(g);
+	if (err) {
+		nvgpu_err(g, "failed to power on gpu: %d", err);
+		return -EFAULT;
+	}
+
+	/* HAL only initialized after 1st power-on */
+	if (g->ops.debug.show_dump)
+		g->ops.debug.show_dump(g, &o);
+
+	gk20a_idle(g);
+	return 0;
+}
+
+static int gk20a_gr_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, gk20a_gr_debug_show, inode->i_private);
+}
+
+static int gk20a_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, gk20a_debug_show, inode->i_private);
+}
+
+static const struct file_operations gk20a_gr_debug_fops = {
+	.open		= gk20a_gr_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const struct file_operations gk20a_debug_fops = {
+	.open		= gk20a_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o)
+{
+	g->ops.fifo.dump_pbdma_status(g, o);
+	g->ops.fifo.dump_eng_status(g, o);
+
+	gk20a_debug_dump_all_channel_status_ramfc(g, o);
+}
+
+void gk20a_init_debug_ops(struct gpu_ops *gops)
+{
+	gops->debug.show_dump = gk20a_debug_show_dump;
+}
+
+static int railgate_residency_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+	unsigned long time_since_last_state_transition_ms;
+	unsigned long total_rail_gate_time_ms;
+	unsigned long total_rail_ungate_time_ms;
+
+	if (platform->is_railgated(g->dev)) {
+		time_since_last_state_transition_ms =
+				jiffies_to_msecs(jiffies -
+				g->pstats.last_rail_gate_complete);
+		total_rail_ungate_time_ms = g->pstats.total_rail_ungate_time_ms;
+		total_rail_gate_time_ms =
+					g->pstats.total_rail_gate_time_ms +
+					time_since_last_state_transition_ms;
+	} else {
+		time_since_last_state_transition_ms =
+				jiffies_to_msecs(jiffies -
+				g->pstats.last_rail_ungate_complete);
+		total_rail_gate_time_ms = g->pstats.total_rail_gate_time_ms;
+		total_rail_ungate_time_ms =
+					g->pstats.total_rail_ungate_time_ms +
+					time_since_last_state_transition_ms;
+	}
+
+	seq_printf(s, "Time with Rails Gated: %lu ms\n"
+			"Time with Rails UnGated: %lu ms\n"
+			"Total railgating cycles: %lu\n",
+			total_rail_gate_time_ms,
+			total_rail_ungate_time_ms,
+			g->pstats.railgating_cycle_count - 1);
+	return 0;
+
+}
+
+static int railgate_residency_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, railgate_residency_show, inode->i_private);
+}
+
+static const struct file_operations railgate_residency_fops = {
+	.open		= railgate_residency_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int gk20a_railgating_debugfs_init(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+	struct dentry *d;
+
+	if (!g->can_railgate)
+		return 0;
+
+	d = debugfs_create_file(
+		"railgate_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
+						&railgate_residency_fops);
+	if (!d)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink)
+{
+	struct device *dev = g->dev;
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+
+	platform->debugfs = debugfs_create_dir(dev_name(dev), NULL);
+	if (!platform->debugfs)
+		return;
+
+	if (debugfs_symlink)
+		platform->debugfs_alias =
+			debugfs_create_symlink(debugfs_symlink,
+					NULL, dev_name(dev));
+
+	debugfs_create_file("status", S_IRUGO, platform->debugfs,
+		dev, &gk20a_debug_fops);
+	debugfs_create_file("gr_status", S_IRUGO, platform->debugfs,
+		dev, &gk20a_gr_debug_fops);
+	debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR,
+		platform->debugfs, &gk20a_debug_trace_cmdbuf);
+
+	debugfs_create_u32("ch_wdt_timeout_ms", S_IRUGO|S_IWUSR,
+		platform->debugfs, &g->ch_wdt_timeout_ms);
+
+	debugfs_create_u32("disable_syncpoints", S_IRUGO|S_IWUSR,
+		platform->debugfs, &g->disable_syncpoints);
+
+	/* Legacy debugging API. */
+	debugfs_create_u32("dbg_mask", S_IRUGO|S_IWUSR,
+		platform->debugfs, &nvgpu_dbg_mask);
+
+	/* New debug logging API. */
+	debugfs_create_u32("log_mask", S_IRUGO|S_IWUSR,
+		platform->debugfs, &g->log_mask);
+	debugfs_create_u32("log_trace", S_IRUGO|S_IWUSR,
+		platform->debugfs, &g->log_trace);
+
+	nvgpu_spinlock_init(&g->debugfs_lock);
+
+	g->mm.ltc_enabled = true;
+	g->mm.ltc_enabled_debug = true;
+
+	g->debugfs_ltc_enabled =
+			debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR,
+				 platform->debugfs,
+				 &g->mm.ltc_enabled_debug);
+
+	g->debugfs_gr_idle_timeout_default =
+			debugfs_create_u32("gr_idle_timeout_default_us",
+					S_IRUGO|S_IWUSR, platform->debugfs,
+					 &g->gr_idle_timeout_default);
+	g->debugfs_timeouts_enabled =
+			debugfs_create_bool("timeouts_enabled",
+					S_IRUGO|S_IWUSR,
+					platform->debugfs,
+					&g->timeouts_enabled);
+
+	g->debugfs_bypass_smmu =
+			debugfs_create_bool("bypass_smmu",
+					S_IRUGO|S_IWUSR,
+					platform->debugfs,
+					&g->mm.bypass_smmu);
+	g->debugfs_disable_bigpage =
+			debugfs_create_bool("disable_bigpage",
+					S_IRUGO|S_IWUSR,
+					platform->debugfs,
+					&g->mm.disable_bigpage);
+
+	g->debugfs_timeslice_low_priority_us =
+			debugfs_create_u32("timeslice_low_priority_us",
+					S_IRUGO|S_IWUSR,
+					platform->debugfs,
+					&g->timeslice_low_priority_us);
+	g->debugfs_timeslice_medium_priority_us =
+			debugfs_create_u32("timeslice_medium_priority_us",
+					S_IRUGO|S_IWUSR,
+					platform->debugfs,
+					&g->timeslice_medium_priority_us);
+	g->debugfs_timeslice_high_priority_us =
+			debugfs_create_u32("timeslice_high_priority_us",
+					S_IRUGO|S_IWUSR,
+					platform->debugfs,
+					&g->timeslice_high_priority_us);
+	g->debugfs_runlist_interleave =
+			debugfs_create_bool("runlist_interleave",
+					S_IRUGO|S_IWUSR,
+					platform->debugfs,
+					&g->runlist_interleave);
+#ifdef CONFIG_ARCH_TEGRA_18x_SOC
+	g->gr.t18x.ctx_vars.debugfs_force_preemption_gfxp =
+		debugfs_create_bool("force_preemption_gfxp", S_IRUGO|S_IWUSR,
+		platform->debugfs,
+		&g->gr.t18x.ctx_vars.force_preemption_gfxp);
+
+	g->gr.t18x.ctx_vars.debugfs_force_preemption_cilp =
+		debugfs_create_bool("force_preemption_cilp", S_IRUGO|S_IWUSR,
+		platform->debugfs,
+		&g->gr.t18x.ctx_vars.force_preemption_cilp);
+
+	g->gr.t18x.ctx_vars.debugfs_dump_ctxsw_stats =
+		debugfs_create_bool("dump_ctxsw_stats_on_channel_close",
+			S_IRUGO|S_IWUSR, platform->debugfs,
+			&g->gr.t18x.
+				ctx_vars.dump_ctxsw_stats_on_channel_close);
+#endif
+
+	gr_gk20a_debugfs_init(g);
+	gk20a_pmu_debugfs_init(g);
+	gk20a_railgating_debugfs_init(g);
+	gk20a_cde_debugfs_init(g);
+	gk20a_ce_debugfs_init(g);
+	nvgpu_alloc_debugfs_init(g);
+	gk20a_mm_debugfs_init(g);
+	gk20a_fifo_debugfs_init(g);
+	gk20a_sched_debugfs_init(g);
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+	nvgpu_kmem_debugfs_init(g);
+#endif
+}
+
+void gk20a_debug_deinit(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+
+	if (!platform->debugfs)
+		return;
+
+	gk20a_fifo_debugfs_deinit(g);
+
+	debugfs_remove_recursive(platform->debugfs);
+	debugfs_remove_recursive(platform->debugfs_alias);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_allocator.c b/drivers/gpu/nvgpu/common/linux/debug_allocator.c
new file mode 100644
index 00000000..3d4a2bb2
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_allocator.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_allocator.h"
+#include "gk20a/platform_gk20a.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <nvgpu/allocator.h>
+
+u32 nvgpu_alloc_tracing_on;
+
+void nvgpu_alloc_print_stats(struct nvgpu_allocator *__a,
+			     struct seq_file *s, int lock)
+{
+	__a->ops->print_stats(__a, s, lock);
+}
+
+static int __alloc_show(struct seq_file *s, void *unused)
+{
+	struct nvgpu_allocator *a = s->private;
+
+	nvgpu_alloc_print_stats(a, s, 1);
+
+	return 0;
+}
+
+static int __alloc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, __alloc_show, inode->i_private);
+}
+
+static const struct file_operations __alloc_fops = {
+	.open = __alloc_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a)
+{
+	if (!g->debugfs_allocators)
+		return;
+
+	a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO,
+					       g->debugfs_allocators,
+					       a, &__alloc_fops);
+}
+
+void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a)
+{
+	if (!IS_ERR_OR_NULL(a->debugfs_entry))
+		debugfs_remove(a->debugfs_entry);
+}
+
+void nvgpu_alloc_debugfs_init(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+
+	g->debugfs_allocators = debugfs_create_dir("allocators", platform->debugfs);
+	if (IS_ERR_OR_NULL(g->debugfs_allocators)) {
+		g->debugfs_allocators = NULL;
+		return;
+	}
+
+	debugfs_create_u32("tracing", 0664, g->debugfs_allocators,
+			   &nvgpu_alloc_tracing_on);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_allocator.h b/drivers/gpu/nvgpu/common/linux/debug_allocator.h
new file mode 100644
index 00000000..1b21cfc5
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_allocator.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_ALLOCATOR_H__
+#define __NVGPU_DEBUG_ALLOCATOR_H__
+
+struct gk20a;
+void nvgpu_alloc_debugfs_init(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_ALLOCATOR_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_cde.c b/drivers/gpu/nvgpu/common/linux/debug_cde.c
new file mode 100644
index 00000000..eb7c33e2
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_cde.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_cde.h"
+#include "gk20a/platform_gk20a.h"
+
+#include <linux/debugfs.h>
+
+
+static ssize_t gk20a_cde_reload_write(struct file *file,
+	const char __user *userbuf, size_t count, loff_t *ppos)
+{
+	struct gk20a *g = file->private_data;
+	gk20a_cde_reload(g);
+	return count;
+}
+
+static const struct file_operations gk20a_cde_reload_fops = {
+	.open		= simple_open,
+	.write		= gk20a_cde_reload_write,
+};
+
+void gk20a_cde_debugfs_init(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+
+	if (!platform->has_cde)
+		return;
+
+	debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO,
+			   platform->debugfs, &g->cde_app.shader_parameter);
+	debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO,
+			   platform->debugfs, &g->cde_app.ctx_count);
+	debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO,
+			   platform->debugfs, &g->cde_app.ctx_usecount);
+	debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO,
+			   platform->debugfs, &g->cde_app.ctx_count_top);
+	debugfs_create_file("reload_cde_firmware", S_IWUSR, platform->debugfs,
+			    g, &gk20a_cde_reload_fops);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_cde.h b/drivers/gpu/nvgpu/common/linux/debug_cde.h
new file mode 100644
index 00000000..4895edd6
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_cde.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_CDE_H__
+#define __NVGPU_DEBUG_CDE_H__
+
+struct gk20a;
+void gk20a_cde_debugfs_init(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_CDE_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_ce.c b/drivers/gpu/nvgpu/common/linux/debug_ce.c
new file mode 100644
index 00000000..9c50870e
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_ce.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_ce.h"
+#include "gk20a/platform_gk20a.h"
+
+#include <linux/debugfs.h>
+
+void gk20a_ce_debugfs_init(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+
+	debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO,
+			   platform->debugfs, &g->ce_app.ctx_count);
+	debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO,
+			   platform->debugfs, &g->ce_app.app_state);
+	debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO,
+			   platform->debugfs, &g->ce_app.next_ctx_id);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_ce.h b/drivers/gpu/nvgpu/common/linux/debug_ce.h
new file mode 100644
index 00000000..2a8750c4
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_ce.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_CE_H__
+#define __NVGPU_DEBUG_CE_H__
+
+struct gk20a;
+void gk20a_ce_debugfs_init(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_CE_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.c b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
new file mode 100644
index 00000000..6a28b1a5
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_fifo.h"
+#include "gk20a/platform_gk20a.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <nvgpu/sort.h>
+
+void __gk20a_fifo_profile_free(struct kref *ref);
+
+static void *gk20a_fifo_sched_debugfs_seq_start(
+		struct seq_file *s, loff_t *pos)
+{
+	struct gk20a *g = s->private;
+	struct fifo_gk20a *f = &g->fifo;
+
+	if (*pos >= f->num_channels)
+		return NULL;
+
+	return &f->channel[*pos];
+}
+
+static void *gk20a_fifo_sched_debugfs_seq_next(
+		struct seq_file *s, void *v, loff_t *pos)
+{
+	struct gk20a *g = s->private;
+	struct fifo_gk20a *f = &g->fifo;
+
+	++(*pos);
+	if (*pos >= f->num_channels)
+		return NULL;
+
+	return &f->channel[*pos];
+}
+
+static void gk20a_fifo_sched_debugfs_seq_stop(
+		struct seq_file *s, void *v)
+{
+}
+
+static int gk20a_fifo_sched_debugfs_seq_show(
+		struct seq_file *s, void *v)
+{
+	struct gk20a *g = s->private;
+	struct fifo_gk20a *f = &g->fifo;
+	struct channel_gk20a *ch = v;
+	struct tsg_gk20a *tsg = NULL;
+
+	struct fifo_engine_info_gk20a *engine_info;
+	struct fifo_runlist_info_gk20a *runlist;
+	u32 runlist_id;
+	int ret = SEQ_SKIP;
+	u32 engine_id;
+
+	engine_id = gk20a_fifo_get_gr_engine_id(g);
+	engine_info = (f->engine_info + engine_id);
+	runlist_id = engine_info->runlist_id;
+	runlist = &f->runlist_info[runlist_id];
+
+	if (ch == f->channel) {
+		seq_puts(s, "chid     tsgid    pid      timeslice  timeout  interleave graphics_preempt compute_preempt\n");
+		seq_puts(s, "                            (usecs)   (msecs)\n");
+		ret = 0;
+	}
+
+	if (!test_bit(ch->hw_chid, runlist->active_channels))
+		return ret;
+
+	if (gk20a_channel_get(ch)) {
+		if (gk20a_is_channel_marked_as_tsg(ch))
+			tsg = &f->tsg[ch->tsgid];
+
+		seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n",
+				ch->hw_chid,
+				ch->tsgid,
+				ch->tgid,
+				tsg ? tsg->timeslice_us : ch->timeslice_us,
+				ch->timeout_ms_max,
+				tsg ? tsg->interleave_level : ch->interleave_level,
+				ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->graphics_preempt_mode : U32_MAX,
+				ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->compute_preempt_mode : U32_MAX);
+		gk20a_channel_put(ch);
+	}
+	return 0;
+}
+
+static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = {
+	.start = gk20a_fifo_sched_debugfs_seq_start,
+	.next = gk20a_fifo_sched_debugfs_seq_next,
+	.stop = gk20a_fifo_sched_debugfs_seq_stop,
+	.show = gk20a_fifo_sched_debugfs_seq_show
+};
+
+static int gk20a_fifo_sched_debugfs_open(struct inode *inode,
+	struct file *file)
+{
+	int err;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops);
+	if (err)
+		return err;
+
+	gk20a_dbg(gpu_dbg_info, "i_private=%p", inode->i_private);
+
+	((struct seq_file *)file->private_data)->private = inode->i_private;
+	return 0;
+};
+
+/*
+ * The file operations structure contains our open function along with
+ * set of the canned seq_ ops.
+ */
+static const struct file_operations gk20a_fifo_sched_debugfs_fops = {
+	.owner = THIS_MODULE,
+	.open = gk20a_fifo_sched_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release
+};
+
+static int gk20a_fifo_profile_enable(void *data, u64 val)
+{
+	struct gk20a *g = (struct gk20a *) data;
+	struct fifo_gk20a *f = &g->fifo;
+
+
+	nvgpu_mutex_acquire(&f->profile.lock);
+	if (val == 0) {
+		if (f->profile.enabled) {
+			f->profile.enabled = false;
+			kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
+		}
+	} else {
+		if (!f->profile.enabled) {
+			/* not kref init as it can have a running condition if
+			 * we enable/disable/enable while kickoff is happening
+			 */
+			if (!kref_get_unless_zero(&f->profile.ref)) {
+				f->profile.data = vzalloc(
+							FIFO_PROFILING_ENTRIES *
+					sizeof(struct fifo_profile_gk20a));
+				f->profile.sorted  = vzalloc(
+							FIFO_PROFILING_ENTRIES *
+							sizeof(u64));
+				if (!(f->profile.data && f->profile.sorted)) {
+					nvgpu_vfree(g, f->profile.data);
+					nvgpu_vfree(g, f->profile.sorted);
+					nvgpu_mutex_release(&f->profile.lock);
+					return -ENOMEM;
+				}
+				kref_init(&f->profile.ref);
+			}
+			atomic_set(&f->profile.get, 0);
+			f->profile.enabled = true;
+		}
+	}
+	nvgpu_mutex_release(&f->profile.lock);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(
+	gk20a_fifo_profile_enable_debugfs_fops,
+	NULL,
+	gk20a_fifo_profile_enable,
+	"%llu\n"
+);
+
+static int __profile_cmp(const void *a, const void *b)
+{
+	return *((unsigned long long *) a) - *((unsigned long long *) b);
+}
+
+/*
+ * This uses about 800b in the stack, but the function using it is not part
+ * of a callstack where much memory is being used, so it is fine
+ */
+#define PERCENTILE_WIDTH	5
+#define PERCENTILE_RANGES	(100/PERCENTILE_WIDTH)
+
+static unsigned int __gk20a_fifo_create_stats(struct gk20a *g,
+		u64 *percentiles, u32 index_end, u32 index_start)
+{
+	unsigned int nelem = 0;
+	unsigned int index;
+	struct fifo_profile_gk20a *profile;
+
+	for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) {
+		profile = &g->fifo.profile.data[index];
+
+		if (profile->timestamp[index_end] >
+				profile->timestamp[index_start]) {
+			/* This is a valid element */
+			g->fifo.profile.sorted[nelem] =
+						profile->timestamp[index_end] -
+						profile->timestamp[index_start];
+			nelem++;
+		}
+	}
+
+	/* sort it */
+	sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long),
+		__profile_cmp, NULL);
+
+	/* build ranges */
+	for (index = 0; index < PERCENTILE_RANGES; index++)
+		percentiles[index] =
+			g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) *
+						nelem)/100 - 1];
+	return nelem;
+}
+
+static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
+{
+	struct gk20a *g = s->private;
+	unsigned int get, nelem, index;
+	/*
+	 * 800B in the stack, but function is declared statically and only
+	 * called from debugfs handler
+	 */
+	u64 percentiles_ioctl[PERCENTILE_RANGES];
+	u64 percentiles_kickoff[PERCENTILE_RANGES];
+	u64 percentiles_jobtracking[PERCENTILE_RANGES];
+	u64 percentiles_append[PERCENTILE_RANGES];
+	u64 percentiles_userd[PERCENTILE_RANGES];
+
+	if (!kref_get_unless_zero(&g->fifo.profile.ref)) {
+		seq_printf(s, "Profiling disabled\n");
+		return 0;
+	}
+
+	get = atomic_read(&g->fifo.profile.get);
+
+	__gk20a_fifo_create_stats(g, percentiles_ioctl,
+		PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY);
+	__gk20a_fifo_create_stats(g, percentiles_kickoff,
+		PROFILE_END, PROFILE_ENTRY);
+	__gk20a_fifo_create_stats(g, percentiles_jobtracking,
+		PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY);
+	__gk20a_fifo_create_stats(g, percentiles_append,
+		PROFILE_APPEND, PROFILE_JOB_TRACKING);
+	nelem = __gk20a_fifo_create_stats(g, percentiles_userd,
+		PROFILE_END, PROFILE_APPEND);
+
+	seq_printf(s, "Number of kickoffs: %d\n", nelem);
+	seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n");
+
+	for (index = 0; index < PERCENTILE_RANGES; index++)
+		seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n",
+			PERCENTILE_WIDTH * (index+1),
+			percentiles_ioctl[index],
+			percentiles_kickoff[index],
+			percentiles_append[index],
+			percentiles_jobtracking[index],
+			percentiles_userd[index]);
+
+	kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
+
+	return 0;
+}
+
+static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, gk20a_fifo_profile_stats, inode->i_private);
+}
+
+static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = {
+	.open		= gk20a_fifo_profile_stats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+
+void gk20a_fifo_debugfs_init(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+
+	struct dentry *gpu_root = platform->debugfs;
+	struct dentry *fifo_root;
+	struct dentry *profile_root;
+
+	fifo_root = debugfs_create_dir("fifo", gpu_root);
+	if (IS_ERR_OR_NULL(fifo_root))
+		return;
+
+	gk20a_dbg(gpu_dbg_info, "g=%p", g);
+
+	debugfs_create_file("sched", 0600, fifo_root, g,
+		&gk20a_fifo_sched_debugfs_fops);
+
+	profile_root = debugfs_create_dir("profile", fifo_root);
+	if (IS_ERR_OR_NULL(profile_root))
+		return;
+
+	nvgpu_mutex_init(&g->fifo.profile.lock);
+	g->fifo.profile.enabled = false;
+	atomic_set(&g->fifo.profile.get, 0);
+	atomic_set(&g->fifo.profile.ref.refcount, 0);
+
+	debugfs_create_file("enable", 0600, profile_root, g,
+		&gk20a_fifo_profile_enable_debugfs_fops);
+
+	debugfs_create_file("stats", 0600, profile_root, g,
+		&gk20a_fifo_profile_stats_debugfs_fops);
+
+}
+
+void __gk20a_fifo_profile_free(struct kref *ref)
+{
+	struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a,
+						profile.ref);
+	nvgpu_vfree(f->g, f->profile.data);
+	nvgpu_vfree(f->g, f->profile.sorted);
+}
+
+/* Get the next element in the ring buffer of profile entries
+ * and grab a reference to the structure
+ */
+struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g)
+{
+	struct fifo_gk20a *f = &g->fifo;
+	struct fifo_profile_gk20a *profile;
+	unsigned int index;
+
+	/* If kref is zero, profiling is not enabled */
+	if (!kref_get_unless_zero(&f->profile.ref))
+		return NULL;
+	index = atomic_inc_return(&f->profile.get);
+	profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES];
+
+	return profile;
+}
+
+/* Free the reference to the structure. This allows deferred cleanups */
+void gk20a_fifo_profile_release(struct gk20a *g,
+					struct fifo_profile_gk20a *profile)
+{
+	kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
+}
+
+void gk20a_fifo_debugfs_deinit(struct gk20a *g)
+{
+	struct fifo_gk20a *f = &g->fifo;
+
+	nvgpu_mutex_acquire(&f->profile.lock);
+	if (f->profile.enabled) {
+		f->profile.enabled = false;
+		kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
+	}
+	nvgpu_mutex_release(&f->profile.lock);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.h b/drivers/gpu/nvgpu/common/linux/debug_fifo.h
new file mode 100644
index 00000000..46ac853e
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_fifo.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_FIFO_H__
+#define __NVGPU_DEBUG_FIFO_H__
+
+struct gk20a;
+void gk20a_fifo_debugfs_init(struct gk20a *g);
+void gk20a_fifo_debugfs_deinit(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_FIFO_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_gr.c b/drivers/gpu/nvgpu/common/linux/debug_gr.c
new file mode 100644
index 00000000..56b8612e
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_gr.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_gr.h"
+#include "gk20a/platform_gk20a.h"
+
+#include <linux/debugfs.h>
+
+int gr_gk20a_debugfs_init(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+
+	g->debugfs_gr_default_attrib_cb_size =
+		debugfs_create_u32("gr_default_attrib_cb_size",
+				   S_IRUGO|S_IWUSR, platform->debugfs,
+				   &g->gr.attrib_cb_default_size);
+
+	return 0;
+}
+
diff --git a/drivers/gpu/nvgpu/common/linux/debug_gr.h b/drivers/gpu/nvgpu/common/linux/debug_gr.h
new file mode 100644
index 00000000..4b46acbb
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_gr.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_GR_H__
+#define __NVGPU_DEBUG_GR_H__
+
+struct gk20a;
+int gr_gk20a_debugfs_init(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_GR_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_kmem.c b/drivers/gpu/nvgpu/common/linux/debug_kmem.c
new file mode 100644
index 00000000..2ee542a8
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_kmem.c
@@ -0,0 +1,315 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_kmem.h"
+#include "kmem_priv.h"
+#include "gk20a/platform_gk20a.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+/**
+ * to_human_readable_bytes - Determine  suffix for passed size.
+ *
+ * @bytes - Number of bytes to generate a suffix for.
+ * @hr_bytes [out] - The human readable number of bytes.
+ * @hr_suffix [out] - The suffix for the HR number of bytes.
+ *
+ * Computes a human readable decomposition of the passed number of bytes. The
+ * suffix for the bytes is passed back through the @hr_suffix pointer. The right
+ * number of bytes is then passed back in @hr_bytes. This returns the following
+ * ranges:
+ *
+ *   0 - 1023 B
+ *   1 - 1023 KB
+ *   1 - 1023 MB
+ *   1 - 1023 GB
+ *   1 - 1023 TB
+ *   1 - ...  PB
+ */
+static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
+				      const char **hr_suffix)
+{
+	static const char *suffixes[] =
+		{ "B", "KB", "MB", "GB", "TB", "PB" };
+
+	u64 suffix_ind = 0;
+
+	while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
+		bytes >>= 10;
+		suffix_ind++;
+	}
+
+	/*
+	 * Handle case where bytes > 1023PB.
+	 */
+	suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
+		suffix_ind : ARRAY_SIZE(suffixes) - 1;
+
+	*hr_bytes = bytes;
+	*hr_suffix = suffixes[suffix_ind];
+}
+
+/**
+ * print_hr_bytes - Print human readable bytes
+ *
+ * @s - A seq_file to print to. May be NULL.
+ * @msg - A message to print before the bytes.
+ * @bytes - Number of bytes.
+ *
+ * Print @msg followed by the human readable decomposition of the passed number
+ * of bytes.
+ *
+ * If @s is NULL then this prints will be made to the kernel log.
+ */
+static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
+{
+	u64 hr_bytes;
+	const char *hr_suffix;
+
+	__to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
+	__pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
+}
+
+/**
+ * print_histogram - Build a histogram of the memory usage.
+ *
+ * @tracker The tracking to pull data from.
+ * @s       A seq_file to dump info into.
+ */
+static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
+			    struct seq_file *s)
+{
+	int i;
+	u64 pot_min, pot_max;
+	u64 nr_buckets;
+	unsigned int *buckets;
+	unsigned int total_allocs;
+	struct nvgpu_rbtree_node *node;
+	static const char histogram_line[] =
+		"++++++++++++++++++++++++++++++++++++++++";
+
+	/*
+	 * pot_min is essentially a round down to the nearest power of 2. This
+	 * is the start of the histogram. pot_max is just a round up to the
+	 * nearest power of two. Each histogram bucket is one power of two so
+	 * the histogram buckets are exponential.
+	 */
+	pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
+	pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
+
+	nr_buckets = __ffs(pot_max) - __ffs(pot_min);
+
+	buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
+	if (!buckets) {
+		__pstat(s, "OOM: could not allocate bucket storage!?\n");
+		return;
+	}
+
+	/*
+	 * Iterate across all of the allocs and determine what bucket they
+	 * should go in. Round the size down to the nearest power of two to
+	 * find the right bucket.
+	 */
+	nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
+	while (node) {
+		int b;
+		u64 bucket_min;
+		struct nvgpu_mem_alloc *alloc =
+			nvgpu_mem_alloc_from_rbtree_node(node);
+
+		bucket_min = (u64)rounddown_pow_of_two(alloc->size);
+		if (bucket_min < tracker->min_alloc)
+			bucket_min = tracker->min_alloc;
+
+		b = __ffs(bucket_min) - __ffs(pot_min);
+
+		/*
+		 * Handle the one case were there's an alloc exactly as big as
+		 * the maximum bucket size of the largest bucket. Most of the
+		 * buckets have an inclusive minimum and exclusive maximum. But
+		 * the largest bucket needs to have an _inclusive_ maximum as
+		 * well.
+		 */
+		if (b == (int)nr_buckets)
+			b--;
+
+		buckets[b]++;
+
+		nvgpu_rbtree_enum_next(&node, node);
+	}
+
+	total_allocs = 0;
+	for (i = 0; i < (int)nr_buckets; i++)
+		total_allocs += buckets[i];
+
+	__pstat(s, "Alloc histogram:\n");
+
+	/*
+	 * Actually compute the histogram lines.
+	 */
+	for (i = 0; i < (int)nr_buckets; i++) {
+		char this_line[sizeof(histogram_line) + 1];
+		u64 line_length;
+		u64 hr_bytes;
+		const char *hr_suffix;
+
+		memset(this_line, 0, sizeof(this_line));
+
+		/*
+		 * Compute the normalized line length. Cant use floating point
+		 * so we will just multiply everything by 1000 and use fixed
+		 * point.
+		 */
+		line_length = (1000 * buckets[i]) / total_allocs;
+		line_length *= sizeof(histogram_line);
+		line_length /= 1000;
+
+		memset(this_line, '+', line_length);
+
+		__to_human_readable_bytes(1 << (__ffs(pot_min) + i),
+					  &hr_bytes, &hr_suffix);
+		__pstat(s, "  [%-4lld %-4lld] %-2s %5u | %s\n",
+			hr_bytes, hr_bytes << 1,
+			hr_suffix, buckets[i], this_line);
+	}
+}
+
+/**
+ * nvgpu_kmem_print_stats - Print kmem tracking stats.
+ *
+ * @tracker The tracking to pull data from.
+ * @s       A seq_file to dump info into.
+ *
+ * Print stats from a tracker. If @s is non-null then seq_printf() will be
+ * used with @s. Otherwise the stats are pr_info()ed.
+ */
+void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
+			    struct seq_file *s)
+{
+	nvgpu_lock_tracker(tracker);
+
+	__pstat(s, "Mem tracker: %s\n\n", tracker->name);
+
+	__pstat(s, "Basic Stats:\n");
+	__pstat(s,        "  Number of allocs        %lld\n",
+		tracker->nr_allocs);
+	__pstat(s,        "  Number of frees         %lld\n",
+		tracker->nr_frees);
+	print_hr_bytes(s, "  Smallest alloc          ", tracker->min_alloc);
+	print_hr_bytes(s, "  Largest alloc           ", tracker->max_alloc);
+	print_hr_bytes(s, "  Bytes allocated         ", tracker->bytes_alloced);
+	print_hr_bytes(s, "  Bytes freed             ", tracker->bytes_freed);
+	print_hr_bytes(s, "  Bytes allocated (real)  ",
+		       tracker->bytes_alloced_real);
+	print_hr_bytes(s, "  Bytes freed (real)      ",
+		       tracker->bytes_freed_real);
+	__pstat(s, "\n");
+
+	print_histogram(tracker, s);
+
+	nvgpu_unlock_tracker(tracker);
+}
+
+static int __kmem_tracking_show(struct seq_file *s, void *unused)
+{
+	struct nvgpu_mem_alloc_tracker *tracker = s->private;
+
+	nvgpu_kmem_print_stats(tracker, s);
+
+	return 0;
+}
+
+static int __kmem_tracking_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, __kmem_tracking_show, inode->i_private);
+}
+
+static const struct file_operations __kmem_tracking_fops = {
+	.open = __kmem_tracking_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int __kmem_traces_dump_tracker(struct gk20a *g,
+				      struct nvgpu_mem_alloc_tracker *tracker,
+				      struct seq_file *s)
+{
+	struct nvgpu_rbtree_node *node;
+
+	nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
+	while (node) {
+		struct nvgpu_mem_alloc *alloc =
+			nvgpu_mem_alloc_from_rbtree_node(node);
+
+		kmem_print_mem_alloc(g, alloc, s);
+
+		nvgpu_rbtree_enum_next(&node, node);
+	}
+
+	return 0;
+}
+
+static int __kmem_traces_show(struct seq_file *s, void *unused)
+{
+	struct gk20a *g = s->private;
+
+	nvgpu_lock_tracker(g->vmallocs);
+	seq_puts(s, "Oustanding vmallocs:\n");
+	__kmem_traces_dump_tracker(g, g->vmallocs, s);
+	seq_puts(s, "\n");
+	nvgpu_unlock_tracker(g->vmallocs);
+
+	nvgpu_lock_tracker(g->kmallocs);
+	seq_puts(s, "Oustanding kmallocs:\n");
+	__kmem_traces_dump_tracker(g, g->kmallocs, s);
+	nvgpu_unlock_tracker(g->kmallocs);
+
+	return 0;
+}
+
+static int __kmem_traces_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, __kmem_traces_show, inode->i_private);
+}
+
+static const struct file_operations __kmem_traces_fops = {
+	.open = __kmem_traces_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void nvgpu_kmem_debugfs_init(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+	struct dentry *node;
+
+	g->debugfs_kmem = debugfs_create_dir("kmem_tracking", platform->debugfs);
+	if (IS_ERR_OR_NULL(g->debugfs_kmem))
+		return;
+
+	node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
+				   g->debugfs_kmem,
+				   g->vmallocs, &__kmem_tracking_fops);
+	node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
+				   g->debugfs_kmem,
+				   g->kmallocs, &__kmem_tracking_fops);
+	node = debugfs_create_file("traces", S_IRUGO,
+				   g->debugfs_kmem,
+				   g, &__kmem_traces_fops);
+}
+#endif
diff --git a/drivers/gpu/nvgpu/common/linux/debug_kmem.h b/drivers/gpu/nvgpu/common/linux/debug_kmem.h
new file mode 100644
index 00000000..44322b53
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_kmem.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_KMEM_H__
+#define __NVGPU_DEBUG_KMEM_H__
+
+struct gk20a;
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+void nvgpu_kmem_debugfs_init(struct gk20a *g);
+#endif
+
+#endif /* __NVGPU_DEBUG_KMEM_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_mm.c b/drivers/gpu/nvgpu/common/linux/debug_mm.c
new file mode 100644
index 00000000..1e260f89
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_mm.c
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_mm.h"
+#include "gk20a/platform_gk20a.h"
+
+#include <linux/debugfs.h>
+
+void gk20a_mm_debugfs_init(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+
+	debugfs_create_bool("force_pramin", 0664, platform->debugfs,
+			   &g->mm.force_pramin);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_mm.h b/drivers/gpu/nvgpu/common/linux/debug_mm.h
new file mode 100644
index 00000000..bf7bc985
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_mm.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_MM_H__
+#define __NVGPU_DEBUG_MM_H__
+
+struct gk20a;
+void gk20a_mm_debugfs_init(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_MM_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_pmu.c b/drivers/gpu/nvgpu/common/linux/debug_pmu.c
new file mode 100644
index 00000000..f19f5139
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_pmu.c
@@ -0,0 +1,479 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_pmu.h"
+#include "gk20a/platform_gk20a.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+
+static int lpwr_debug_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+
+	if (g->ops.pmu.pmu_pg_engines_feature_list &&
+		g->ops.pmu.pmu_pg_engines_feature_list(g,
+		PMU_PG_ELPG_ENGINE_ID_GRAPHICS) !=
+		PMU_PG_FEATURE_GR_POWER_GATING_ENABLED) {
+		seq_printf(s, "PSTATE: %u\n"
+			"RPPG Enabled: %u\n"
+			"RPPG ref count: %u\n"
+			"RPPG state: %u\n"
+			"MSCG Enabled: %u\n"
+			"MSCG pstate state: %u\n"
+			"MSCG transition state: %u\n",
+			g->ops.clk_arb.get_current_pstate(g),
+			g->elpg_enabled, g->pmu.elpg_refcnt,
+			g->pmu.elpg_stat, g->mscg_enabled,
+			g->pmu.mscg_stat, g->pmu.mscg_transition_state);
+
+	} else
+		seq_printf(s, "ELPG Enabled: %u\n"
+			"ELPG ref count: %u\n"
+			"ELPG state: %u\n",
+			g->elpg_enabled, g->pmu.elpg_refcnt,
+			g->pmu.elpg_stat);
+
+	return 0;
+
+}
+
+static int lpwr_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, lpwr_debug_show, inode->i_private);
+}
+
+static const struct file_operations lpwr_debug_fops = {
+	.open		= lpwr_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int mscg_stat_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+	u64 total_ingating, total_ungating, residency, divisor, dividend;
+	struct pmu_pg_stats_data pg_stat_data = { 0 };
+	int err;
+
+	/* Don't unnecessarily power on the device */
+	if (g->power_on) {
+		err = gk20a_busy(g);
+		if (err)
+			return err;
+
+		gk20a_pmu_get_pg_stats(g,
+			PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
+		gk20a_idle(g);
+	}
+	total_ingating = g->pg_ingating_time_us +
+			(u64)pg_stat_data.ingating_time;
+	total_ungating = g->pg_ungating_time_us +
+			(u64)pg_stat_data.ungating_time;
+
+	divisor = total_ingating + total_ungating;
+
+	/* We compute the residency on a scale of 1000 */
+	dividend = total_ingating * 1000;
+
+	if (divisor)
+		residency = div64_u64(dividend, divisor);
+	else
+		residency = 0;
+
+	seq_printf(s,
+			"Time in MSCG: %llu us\n"
+			"Time out of MSCG: %llu us\n"
+			"MSCG residency ratio: %llu\n"
+			"MSCG Entry Count: %u\n"
+			"MSCG Avg Entry latency %u\n"
+			"MSCG Avg Exit latency %u\n",
+			total_ingating, total_ungating,
+			residency, pg_stat_data.gating_cnt,
+			pg_stat_data.avg_entry_latency_us,
+			pg_stat_data.avg_exit_latency_us);
+	return 0;
+
+}
+
+static int mscg_stat_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mscg_stat_show, inode->i_private);
+}
+
+static const struct file_operations mscg_stat_fops = {
+	.open		= mscg_stat_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int mscg_transitions_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+	struct pmu_pg_stats_data pg_stat_data = { 0 };
+	u32 total_gating_cnt;
+	int err;
+
+	if (g->power_on) {
+		err = gk20a_busy(g);
+		if (err)
+			return err;
+
+		gk20a_pmu_get_pg_stats(g,
+			PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
+		gk20a_idle(g);
+	}
+	total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
+
+	seq_printf(s, "%u\n", total_gating_cnt);
+	return 0;
+
+}
+
+static int mscg_transitions_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mscg_transitions_show, inode->i_private);
+}
+
+static const struct file_operations mscg_transitions_fops = {
+	.open		= mscg_transitions_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int elpg_stat_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+	struct pmu_pg_stats_data pg_stat_data = { 0 };
+	u64 total_ingating, total_ungating, residency, divisor, dividend;
+	int err;
+
+	/* Don't unnecessarily power on the device */
+	if (g->power_on) {
+		err = gk20a_busy(g);
+		if (err)
+			return err;
+
+		gk20a_pmu_get_pg_stats(g,
+			PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
+		gk20a_idle(g);
+	}
+	total_ingating = g->pg_ingating_time_us +
+			(u64)pg_stat_data.ingating_time;
+	total_ungating = g->pg_ungating_time_us +
+			(u64)pg_stat_data.ungating_time;
+	divisor = total_ingating + total_ungating;
+
+	/* We compute the residency on a scale of 1000 */
+	dividend = total_ingating * 1000;
+
+	if (divisor)
+		residency = div64_u64(dividend, divisor);
+	else
+		residency = 0;
+
+	seq_printf(s,
+			"Time in ELPG: %llu us\n"
+			"Time out of ELPG: %llu us\n"
+			"ELPG residency ratio: %llu\n"
+			"ELPG Entry Count: %u\n"
+			"ELPG Avg Entry latency %u us\n"
+			"ELPG Avg Exit latency %u us\n",
+			total_ingating, total_ungating,
+			residency, pg_stat_data.gating_cnt,
+			pg_stat_data.avg_entry_latency_us,
+			pg_stat_data.avg_exit_latency_us);
+	return 0;
+
+}
+
+static int elpg_stat_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, elpg_stat_show, inode->i_private);
+}
+
+static const struct file_operations elpg_stat_fops = {
+	.open		= elpg_stat_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int elpg_transitions_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+	struct pmu_pg_stats_data pg_stat_data = { 0 };
+	u32 total_gating_cnt;
+	int err;
+
+	if (g->power_on) {
+		err = gk20a_busy(g);
+		if (err)
+			return err;
+
+		gk20a_pmu_get_pg_stats(g,
+			PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
+		gk20a_idle(g);
+	}
+	total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
+
+	seq_printf(s, "%u\n", total_gating_cnt);
+	return 0;
+
+}
+
+static int elpg_transitions_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, elpg_transitions_show, inode->i_private);
+}
+
+static const struct file_operations elpg_transitions_fops = {
+	.open		= elpg_transitions_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int falc_trace_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+	struct pmu_gk20a *pmu = &g->pmu;
+	u32 i = 0, j = 0, k, l, m;
+	char part_str[40];
+	void *tracebuffer;
+	char *trace;
+	u32 *trace1;
+
+	/* allocate system memory to copy pmu trace buffer */
+	tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE);
+	if (tracebuffer == NULL)
+		return -ENOMEM;
+
+	/* read pmu traces into system memory buffer */
+	nvgpu_mem_rd_n(g, &pmu->trace_buf,
+		       0, tracebuffer, GK20A_PMU_TRACE_BUFSIZE);
+
+	trace = (char *)tracebuffer;
+	trace1 = (u32 *)tracebuffer;
+
+	for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) {
+		for (j = 0; j < 0x40; j++)
+			if (trace1[(i / 4) + j])
+				break;
+		if (j == 0x40)
+			break;
+		seq_printf(s, "Index %x: ", trace1[(i / 4)]);
+		l = 0;
+		m = 0;
+		while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) {
+			if (k >= 40)
+				break;
+			strncpy(part_str, (trace+i+20+m), k);
+			part_str[k] = 0;
+			seq_printf(s, "%s0x%x", part_str,
+					trace1[(i / 4) + 1 + l]);
+			l++;
+			m += k + 2;
+		}
+		seq_printf(s, "%s", (trace+i+20+m));
+	}
+
+	nvgpu_kfree(g, tracebuffer);
+	return 0;
+}
+
+static int falc_trace_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, falc_trace_show, inode->i_private);
+}
+
+static const struct file_operations falc_trace_fops = {
+	.open		= falc_trace_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int perfmon_events_enable_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+
+	seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0);
+	return 0;
+
+}
+
+static int perfmon_events_enable_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, perfmon_events_enable_show, inode->i_private);
+}
+
+static ssize_t perfmon_events_enable_write(struct file *file,
+	const char __user *userbuf, size_t count, loff_t *ppos)
+{
+	struct seq_file *s = file->private_data;
+	struct gk20a *g = s->private;
+	unsigned long val = 0;
+	char buf[40];
+	int buf_size;
+	int err;
+
+	memset(buf, 0, sizeof(buf));
+	buf_size = min(count, (sizeof(buf)-1));
+
+	if (copy_from_user(buf, userbuf, buf_size))
+		return -EFAULT;
+
+	if (kstrtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	/* Don't turn on gk20a unnecessarily */
+	if (g->power_on) {
+		err = gk20a_busy(g);
+		if (err)
+			return err;
+
+		if (val && !g->pmu.perfmon_sampling_enabled) {
+			g->pmu.perfmon_sampling_enabled = true;
+			nvgpu_pmu_perfmon_start_sampling(&(g->pmu));
+		} else if (!val && g->pmu.perfmon_sampling_enabled) {
+			g->pmu.perfmon_sampling_enabled = false;
+			nvgpu_pmu_perfmon_stop_sampling(&(g->pmu));
+		}
+		gk20a_idle(g);
+	} else {
+		g->pmu.perfmon_sampling_enabled = val ? true : false;
+	}
+
+	return count;
+}
+
+static const struct file_operations perfmon_events_enable_fops = {
+	.open		= perfmon_events_enable_open,
+	.read		= seq_read,
+	.write		= perfmon_events_enable_write,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int perfmon_events_count_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+
+	seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt);
+	return 0;
+
+}
+
+static int perfmon_events_count_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, perfmon_events_count_show, inode->i_private);
+}
+
+static const struct file_operations perfmon_events_count_fops = {
+	.open		= perfmon_events_count_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int security_show(struct seq_file *s, void *data)
+{
+	struct gk20a *g = s->private;
+
+	seq_printf(s, "%d\n", g->pmu.pmu_mode);
+	return 0;
+
+}
+
+static int security_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, security_show, inode->i_private);
+}
+
+static const struct file_operations security_fops = {
+	.open		= security_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+int gk20a_pmu_debugfs_init(struct gk20a *g)
+{
+	struct dentry *d;
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+
+	d = debugfs_create_file(
+		"lpwr_debug", S_IRUGO|S_IWUSR, platform->debugfs, g,
+						&lpwr_debug_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"mscg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
+						&mscg_stat_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"mscg_transitions", S_IRUGO, platform->debugfs, g,
+						&mscg_transitions_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"elpg_residency", S_IRUGO|S_IWUSR, platform->debugfs, g,
+						&elpg_stat_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"elpg_transitions", S_IRUGO, platform->debugfs, g,
+						&elpg_transitions_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"falc_trace", S_IRUGO, platform->debugfs, g,
+						&falc_trace_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"perfmon_events_enable", S_IRUGO, platform->debugfs, g,
+						&perfmon_events_enable_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"perfmon_events_count", S_IRUGO, platform->debugfs, g,
+						&perfmon_events_count_fops);
+	if (!d)
+		goto err_out;
+
+	d = debugfs_create_file(
+		"pmu_security", S_IRUGO, platform->debugfs, g,
+						&security_fops);
+	if (!d)
+		goto err_out;
+	return 0;
+err_out:
+	pr_err("%s: Failed to make debugfs node\n", __func__);
+	debugfs_remove_recursive(platform->debugfs);
+	return -ENOMEM;
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_pmu.h b/drivers/gpu/nvgpu/common/linux/debug_pmu.h
new file mode 100644
index 00000000..c4e3243d
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_pmu.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_PMU_H__
+#define __NVGPU_DEBUG_PMU_H__
+
+struct gk20a;
+int gk20a_pmu_debugfs_init(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_PMU_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/debug_sched.c b/drivers/gpu/nvgpu/common/linux/debug_sched.c
new file mode 100644
index 00000000..40b93149
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_sched.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "debug_sched.h"
+#include "gk20a/platform_gk20a.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused)
+{
+	struct gk20a *g = s->private;
+	struct gk20a_sched_ctrl *sched = &g->sched_ctrl;
+	bool sched_busy = true;
+
+	int n = sched->bitmap_size / sizeof(u64);
+	int i;
+	int err;
+
+	err = gk20a_busy(g);
+	if (err)
+		return err;
+
+	if (nvgpu_mutex_tryacquire(&sched->busy_lock)) {
+		sched_busy = false;
+		nvgpu_mutex_release(&sched->busy_lock);
+	}
+
+	seq_printf(s, "control_locked=%d\n", sched->control_locked);
+	seq_printf(s, "busy=%d\n", sched_busy);
+	seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size);
+
+	nvgpu_mutex_acquire(&sched->status_lock);
+
+	seq_puts(s, "active_tsg_bitmap\n");
+	for (i = 0; i < n; i++)
+		seq_printf(s, "\t0x%016llx\n", sched->active_tsg_bitmap[i]);
+
+	seq_puts(s, "recent_tsg_bitmap\n");
+	for (i = 0; i < n; i++)
+		seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]);
+
+	nvgpu_mutex_release(&sched->status_lock);
+
+	gk20a_idle(g);
+
+	return 0;
+}
+
+static int gk20a_sched_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, gk20a_sched_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations gk20a_sched_debugfs_fops = {
+	.open		= gk20a_sched_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+void gk20a_sched_debugfs_init(struct gk20a *g)
+{
+	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
+
+	debugfs_create_file("sched_ctrl", S_IRUGO, platform->debugfs,
+			g, &gk20a_sched_debugfs_fops);
+}
diff --git a/drivers/gpu/nvgpu/common/linux/debug_sched.h b/drivers/gpu/nvgpu/common/linux/debug_sched.h
new file mode 100644
index 00000000..34a8f55f
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/debug_sched.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 NVIDIA Corporation.  All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __NVGPU_DEBUG_SCHED_H__
+#define __NVGPU_DEBUG_SCHED_H__
+
+struct gk20a;
+void gk20a_sched_debugfs_init(struct gk20a *g);
+
+#endif /* __NVGPU_DEBUG_SCHED_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/driver_common.c b/drivers/gpu/nvgpu/common/linux/driver_common.c
index 80e7698b..f85016d4 100644
--- a/drivers/gpu/nvgpu/common/linux/driver_common.c
+++ b/drivers/gpu/nvgpu/common/linux/driver_common.c
@@ -21,6 +21,7 @@
 #include <nvgpu/soc.h>
 #include <nvgpu/bug.h>
 #include <nvgpu/enabled.h>
+#include <nvgpu/debug.h>
 
 #include "gk20a/gk20a_scale.h"
 #include "gk20a/gk20a.h"
@@ -182,7 +183,7 @@ int nvgpu_probe(struct gk20a *g,
 	nvgpu_init_mm_vars(g);
 
 	gk20a_create_sysfs(g->dev);
-	gk20a_debug_init(g->dev, debugfs_symlink);
+	gk20a_debug_init(g, debugfs_symlink);
 
 	g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K);
 	if (!g->dbg_regops_tmp_buf) {
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
index 2502ff30..d81328f0 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
@@ -26,9 +26,9 @@
 #include <nvgpu/kmem.h>
 #include <nvgpu/log.h>
 #include <nvgpu/list.h>
+#include <nvgpu/debug.h>
 
 #include "gk20a/gk20a.h"
-#include "gk20a/debug_gk20a.h"
 #include "gk20a/ctxsw_trace_gk20a.h"
 #include "gk20a/dbg_gpu_gk20a.h"
 #include "gk20a/fence_gk20a.h"
diff --git a/drivers/gpu/nvgpu/common/linux/kmem.c b/drivers/gpu/nvgpu/common/linux/kmem.c
index d058eba5..41aaa729 100644
--- a/drivers/gpu/nvgpu/common/linux/kmem.c
+++ b/drivers/gpu/nvgpu/common/linux/kmem.c
@@ -134,19 +134,19 @@ void __nvgpu_vfree(struct gk20a *g, void *addr)
 
 #ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
 
-static void lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
+void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
 {
 	nvgpu_mutex_acquire(&tracker->lock);
 }
 
-static void unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
+void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
 {
 	nvgpu_mutex_release(&tracker->lock);
 }
 
-static void kmem_print_mem_alloc(struct gk20a *g,
-				 struct nvgpu_mem_alloc *alloc,
-				 struct seq_file *s)
+void kmem_print_mem_alloc(struct gk20a *g,
+			 struct nvgpu_mem_alloc *alloc,
+			 struct seq_file *s)
 {
 #ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
 	int i;
@@ -231,7 +231,7 @@ static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
 	alloc->stack_length = stack_trace.nr_entries;
 #endif
 
-	lock_tracker(tracker);
+	nvgpu_lock_tracker(tracker);
 	tracker->bytes_alloced += size;
 	tracker->bytes_alloced_real += real_size;
 	tracker->nr_allocs++;
@@ -246,10 +246,10 @@ static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
 	if (ret) {
 		WARN(1, "Duplicate alloc??? 0x%llx\n", addr);
 		kfree(alloc);
-		unlock_tracker(tracker);
+		nvgpu_unlock_tracker(tracker);
 		return ret;
 	}
-	unlock_tracker(tracker);
+	nvgpu_unlock_tracker(tracker);
 
 	return 0;
 }
@@ -259,17 +259,17 @@ static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
 {
 	struct nvgpu_mem_alloc *alloc;
 
-	lock_tracker(tracker);
+	nvgpu_lock_tracker(tracker);
 	alloc = nvgpu_rem_alloc(tracker, addr);
 	if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) {
-		unlock_tracker(tracker);
+		nvgpu_unlock_tracker(tracker);
 		return -EINVAL;
 	}
 
 	tracker->nr_frees++;
 	tracker->bytes_freed += alloc->size;
 	tracker->bytes_freed_real += alloc->real_size;
-	unlock_tracker(tracker);
+	nvgpu_unlock_tracker(tracker);
 
 	return 0;
 }
@@ -407,307 +407,6 @@ void __nvgpu_track_kfree(struct gk20a *g, void *addr)
 	__nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr);
 }
 
-/**
- * to_human_readable_bytes - Determine  suffix for passed size.
- *
- * @bytes - Number of bytes to generate a suffix for.
- * @hr_bytes [out] - The human readable number of bytes.
- * @hr_suffix [out] - The suffix for the HR number of bytes.
- *
- * Computes a human readable decomposition of the passed number of bytes. The
- * suffix for the bytes is passed back through the @hr_suffix pointer. The right
- * number of bytes is then passed back in @hr_bytes. This returns the following
- * ranges:
- *
- *   0 - 1023 B
- *   1 - 1023 KB
- *   1 - 1023 MB
- *   1 - 1023 GB
- *   1 - 1023 TB
- *   1 - ...  PB
- */
-static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
-				      const char **hr_suffix)
-{
-	static const char *suffixes[] =
-		{ "B", "KB", "MB", "GB", "TB", "PB" };
-
-	u64 suffix_ind = 0;
-
-	while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
-		bytes >>= 10;
-		suffix_ind++;
-	}
-
-	/*
-	 * Handle case where bytes > 1023PB.
-	 */
-	suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
-		suffix_ind : ARRAY_SIZE(suffixes) - 1;
-
-	*hr_bytes = bytes;
-	*hr_suffix = suffixes[suffix_ind];
-}
-
-/**
- * print_hr_bytes - Print human readable bytes
- *
- * @s - A seq_file to print to. May be NULL.
- * @msg - A message to print before the bytes.
- * @bytes - Number of bytes.
- *
- * Print @msg followed by the human readable decomposition of the passed number
- * of bytes.
- *
- * If @s is NULL then this prints will be made to the kernel log.
- */
-static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
-{
-	u64 hr_bytes;
-	const char *hr_suffix;
-
-	__to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
-	__pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
-}
-
-/**
- * print_histogram - Build a histogram of the memory usage.
- *
- * @tracker The tracking to pull data from.
- * @s       A seq_file to dump info into.
- */
-static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
-			    struct seq_file *s)
-{
-	int i;
-	u64 pot_min, pot_max;
-	u64 nr_buckets;
-	unsigned int *buckets;
-	unsigned int total_allocs;
-	struct nvgpu_rbtree_node *node;
-	static const char histogram_line[] =
-		"++++++++++++++++++++++++++++++++++++++++";
-
-	/*
-	 * pot_min is essentially a round down to the nearest power of 2. This
-	 * is the start of the histogram. pot_max is just a round up to the
-	 * nearest power of two. Each histogram bucket is one power of two so
-	 * the histogram buckets are exponential.
-	 */
-	pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
-	pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
-
-	nr_buckets = __ffs(pot_max) - __ffs(pot_min);
-
-	buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
-	if (!buckets) {
-		__pstat(s, "OOM: could not allocate bucket storage!?\n");
-		return;
-	}
-
-	/*
-	 * Iterate across all of the allocs and determine what bucket they
-	 * should go in. Round the size down to the nearest power of two to
-	 * find the right bucket.
-	 */
-	nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
-	while (node) {
-		int b;
-		u64 bucket_min;
-		struct nvgpu_mem_alloc *alloc =
-			nvgpu_mem_alloc_from_rbtree_node(node);
-
-		bucket_min = (u64)rounddown_pow_of_two(alloc->size);
-		if (bucket_min < tracker->min_alloc)
-			bucket_min = tracker->min_alloc;
-
-		b = __ffs(bucket_min) - __ffs(pot_min);
-
-		/*
-		 * Handle the one case were there's an alloc exactly as big as
-		 * the maximum bucket size of the largest bucket. Most of the
-		 * buckets have an inclusive minimum and exclusive maximum. But
-		 * the largest bucket needs to have an _inclusive_ maximum as
-		 * well.
-		 */
-		if (b == (int)nr_buckets)
-			b--;
-
-		buckets[b]++;
-
-		nvgpu_rbtree_enum_next(&node, node);
-	}
-
-	total_allocs = 0;
-	for (i = 0; i < (int)nr_buckets; i++)
-		total_allocs += buckets[i];
-
-	__pstat(s, "Alloc histogram:\n");
-
-	/*
-	 * Actually compute the histogram lines.
-	 */
-	for (i = 0; i < (int)nr_buckets; i++) {
-		char this_line[sizeof(histogram_line) + 1];
-		u64 line_length;
-		u64 hr_bytes;
-		const char *hr_suffix;
-
-		memset(this_line, 0, sizeof(this_line));
-
-		/*
-		 * Compute the normalized line length. Cant use floating point
-		 * so we will just multiply everything by 1000 and use fixed
-		 * point.
-		 */
-		line_length = (1000 * buckets[i]) / total_allocs;
-		line_length *= sizeof(histogram_line);
-		line_length /= 1000;
-
-		memset(this_line, '+', line_length);
-
-		__to_human_readable_bytes(1 << (__ffs(pot_min) + i),
-					  &hr_bytes, &hr_suffix);
-		__pstat(s, "  [%-4lld %-4lld] %-2s %5u | %s\n",
-			hr_bytes, hr_bytes << 1,
-			hr_suffix, buckets[i], this_line);
-	}
-}
-
-#ifdef CONFIG_DEBUG_FS
-/**
- * nvgpu_kmem_print_stats - Print kmem tracking stats.
- *
- * @tracker The tracking to pull data from.
- * @s       A seq_file to dump info into.
- *
- * Print stats from a tracker. If @s is non-null then seq_printf() will be
- * used with @s. Otherwise the stats are pr_info()ed.
- */
-void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
-			    struct seq_file *s)
-{
-	lock_tracker(tracker);
-
-	__pstat(s, "Mem tracker: %s\n\n", tracker->name);
-
-	__pstat(s, "Basic Stats:\n");
-	__pstat(s,        "  Number of allocs        %lld\n",
-		tracker->nr_allocs);
-	__pstat(s,        "  Number of frees         %lld\n",
-		tracker->nr_frees);
-	print_hr_bytes(s, "  Smallest alloc          ", tracker->min_alloc);
-	print_hr_bytes(s, "  Largest alloc           ", tracker->max_alloc);
-	print_hr_bytes(s, "  Bytes allocated         ", tracker->bytes_alloced);
-	print_hr_bytes(s, "  Bytes freed             ", tracker->bytes_freed);
-	print_hr_bytes(s, "  Bytes allocated (real)  ",
-		       tracker->bytes_alloced_real);
-	print_hr_bytes(s, "  Bytes freed (real)      ",
-		       tracker->bytes_freed_real);
-	__pstat(s, "\n");
-
-	print_histogram(tracker, s);
-
-	unlock_tracker(tracker);
-}
-
-static int __kmem_tracking_show(struct seq_file *s, void *unused)
-{
-	struct nvgpu_mem_alloc_tracker *tracker = s->private;
-
-	nvgpu_kmem_print_stats(tracker, s);
-
-	return 0;
-}
-
-static int __kmem_tracking_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, __kmem_tracking_show, inode->i_private);
-}
-
-static const struct file_operations __kmem_tracking_fops = {
-	.open = __kmem_tracking_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-static int __kmem_traces_dump_tracker(struct gk20a *g,
-				      struct nvgpu_mem_alloc_tracker *tracker,
-				      struct seq_file *s)
-{
-	struct nvgpu_rbtree_node *node;
-
-	nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
-	while (node) {
-		struct nvgpu_mem_alloc *alloc =
-			nvgpu_mem_alloc_from_rbtree_node(node);
-
-		kmem_print_mem_alloc(g, alloc, s);
-
-		nvgpu_rbtree_enum_next(&node, node);
-	}
-
-	return 0;
-}
-
-static int __kmem_traces_show(struct seq_file *s, void *unused)
-{
-	struct gk20a *g = s->private;
-
-	lock_tracker(g->vmallocs);
-	seq_puts(s, "Oustanding vmallocs:\n");
-	__kmem_traces_dump_tracker(g, g->vmallocs, s);
-	seq_puts(s, "\n");
-	unlock_tracker(g->vmallocs);
-
-	lock_tracker(g->kmallocs);
-	seq_puts(s, "Oustanding kmallocs:\n");
-	__kmem_traces_dump_tracker(g, g->kmallocs, s);
-	unlock_tracker(g->kmallocs);
-
-	return 0;
-}
-
-static int __kmem_traces_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, __kmem_traces_show, inode->i_private);
-}
-
-static const struct file_operations __kmem_traces_fops = {
-	.open = __kmem_traces_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-void nvgpu_kmem_debugfs_init(struct device *dev)
-{
-	struct gk20a_platform *plat = dev_get_drvdata(dev);
-	struct gk20a *g = get_gk20a(dev);
-	struct dentry *gpu_root = plat->debugfs;
-	struct dentry *node;
-
-	g->debugfs_kmem = debugfs_create_dir("kmem_tracking", gpu_root);
-	if (IS_ERR_OR_NULL(g->debugfs_kmem))
-		return;
-
-	node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
-				   g->debugfs_kmem,
-				   g->vmallocs, &__kmem_tracking_fops);
-	node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
-				   g->debugfs_kmem,
-				   g->kmallocs, &__kmem_tracking_fops);
-	node = debugfs_create_file("traces", S_IRUGO,
-				   g->debugfs_kmem,
-				   g, &__kmem_traces_fops);
-}
-#else
-void nvgpu_kmem_debugfs_init(struct device *dev)
-{
-}
-#endif
-
 static int __do_check_for_outstanding_allocs(
 	struct gk20a *g,
 	struct nvgpu_mem_alloc_tracker *tracker,
diff --git a/drivers/gpu/nvgpu/common/linux/kmem_priv.h b/drivers/gpu/nvgpu/common/linux/kmem_priv.h
index d3abb378..a41762af 100644
--- a/drivers/gpu/nvgpu/common/linux/kmem_priv.h
+++ b/drivers/gpu/nvgpu/common/linux/kmem_priv.h
@@ -20,6 +20,8 @@
 #include <nvgpu/rbtree.h>
 #include <nvgpu/lock.h>
 
+struct seq_file;
+
 #define __pstat(s, fmt, msg...)				\
 	do {						\
 		if (s)					\
@@ -92,6 +94,12 @@ struct nvgpu_mem_alloc_tracker {
 	unsigned long max_alloc;
 };
 
+void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker);
+void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker);
+
+void kmem_print_mem_alloc(struct gk20a *g,
+			 struct nvgpu_mem_alloc *alloc,
+			 struct seq_file *s);
 #endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
 
 #endif /* __KMEM_PRIV_H__ */
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c
index d5fc40de..4f7fc3fa 100644
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -29,6 +29,7 @@
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/soc.h>
 #include <nvgpu/enabled.h>
+#include <nvgpu/debug.h>
 
 #include "gk20a/gk20a.h"
 #include "gk20a/platform_gk20a.h"
@@ -970,10 +971,7 @@ static int __exit gk20a_remove(struct platform_device *pdev)
 
 	gk20a_user_deinit(dev, &nvgpu_class);
 
-#ifdef CONFIG_DEBUG_FS
-	debugfs_remove_recursive(platform->debugfs);
-	debugfs_remove_recursive(platform->debugfs_alias);
-#endif
+	gk20a_debug_deinit(g);
 
 	gk20a_remove_sysfs(dev);
 
diff --git a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
index 40ee199a..eae0475a 100644
--- a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c
@@ -411,7 +411,9 @@ int nvgpu_bitmap_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
 	wmb();
 	a->inited = true;
 
+#ifdef CONFIG_DEBUG_FS
 	nvgpu_init_alloc_debug(g, __a);
+#endif
 	alloc_dbg(__a, "New allocator: type      bitmap\n");
 	alloc_dbg(__a, "               base      0x%llx\n", a->base);
 	alloc_dbg(__a, "               bit_offs  0x%llx\n", a->bit_offs);
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
index 34bc51df..0ef94c10 100644
--- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
@@ -251,7 +251,9 @@ static void nvgpu_buddy_allocator_destroy(struct nvgpu_allocator *__a)
 
 	alloc_lock(__a);
 
+#ifdef CONFIG_DEBUG_FS
 	nvgpu_fini_alloc_debug(__a);
+#endif
 
 	/*
 	 * Free the fixed allocs first.
@@ -1290,7 +1292,9 @@ int __nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
 	wmb();
 	a->initialized = 1;
 
+#ifdef CONFIG_DEBUG_FS
 	nvgpu_init_alloc_debug(g, __a);
+#endif
 	alloc_dbg(__a, "New allocator: type      buddy\n");
 	alloc_dbg(__a, "               base      0x%llx\n", a->base);
 	alloc_dbg(__a, "               size      0x%llx\n", a->length);
diff --git a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
index 234ae4a3..944b4b0f 100644
--- a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
@@ -99,7 +99,9 @@ static void nvgpu_lockless_alloc_destroy(struct nvgpu_allocator *a)
 {
 	struct nvgpu_lockless_allocator *pa = a->priv;
 
+#ifdef CONFIG_DEBUG_FS
 	nvgpu_fini_alloc_debug(a);
+#endif
 
 	nvgpu_vfree(a->g, pa->next);
 	nvgpu_kfree(nvgpu_alloc_to_gpu(a), pa);
@@ -191,7 +193,9 @@ int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
 	wmb();
 	a->inited = true;
 
+#ifdef CONFIG_DEBUG_FS
 	nvgpu_init_alloc_debug(g, __a);
+#endif
 	alloc_dbg(__a, "New allocator: type          lockless\n");
 	alloc_dbg(__a, "               base          0x%llx\n", a->base);
 	alloc_dbg(__a, "               nodes         %d\n", a->nr_nodes);
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
index 211b353b..1646d2b1 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c
@@ -20,11 +20,6 @@
 
 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"
-#ifdef CONFIG_DEBUG_FS
-#include "gk20a/platform_gk20a.h"
-#endif
-
-u32 nvgpu_alloc_tracing_on;
 
 u64 nvgpu_alloc_length(struct nvgpu_allocator *a)
 {
@@ -151,68 +146,3 @@ int __nvgpu_alloc_common_init(struct nvgpu_allocator *a, struct gk20a *g,
 
 	return 0;
 }
-
-#ifdef CONFIG_DEBUG_FS
-void nvgpu_alloc_print_stats(struct nvgpu_allocator *__a,
-			     struct seq_file *s, int lock)
-{
-	__a->ops->print_stats(__a, s, lock);
-}
-
-static int __alloc_show(struct seq_file *s, void *unused)
-{
-	struct nvgpu_allocator *a = s->private;
-
-	nvgpu_alloc_print_stats(a, s, 1);
-
-	return 0;
-}
-
-static int __alloc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, __alloc_show, inode->i_private);
-}
-
-static const struct file_operations __alloc_fops = {
-	.open = __alloc_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-#endif
-
-void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a)
-{
-#ifdef CONFIG_DEBUG_FS
-	if (!g->debugfs_allocators)
-		return;
-
-	a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO,
-					       g->debugfs_allocators,
-					       a, &__alloc_fops);
-#endif
-}
-
-void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a)
-{
-#ifdef CONFIG_DEBUG_FS
-	if (!IS_ERR_OR_NULL(a->debugfs_entry))
-		debugfs_remove(a->debugfs_entry);
-#endif
-}
-
-#ifdef CONFIG_DEBUG_FS
-void nvgpu_alloc_debugfs_init(struct device *dev)
-{
-	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	struct dentry *gpu_root = platform->debugfs;
-	struct gk20a *g = get_gk20a(dev);
-
-	g->debugfs_allocators = debugfs_create_dir("allocators", gpu_root);
-	if (IS_ERR_OR_NULL(g->debugfs_allocators))
-		return;
-
-	debugfs_create_u32("tracing", 0664, g->debugfs_allocators,
-			   &nvgpu_alloc_tracing_on);
-}
-#endif
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c
index 14b5da3c..3f4f3706 100644
--- a/drivers/gpu/nvgpu/common/mm/page_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c
@@ -916,7 +916,9 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
 	if (err)
 		goto fail;
 
+#ifdef CONFIG_DEBUG_FS
 	nvgpu_init_alloc_debug(g, __a);
+#endif
 	palloc_dbg(a, "New allocator: type      page\n");
 	palloc_dbg(a, "               base      0x%llx\n", a->base);
 	palloc_dbg(a, "               size      0x%llx\n", a->length);
-- 
cgit v1.2.2