Linux 5.17+ support and allow including nvdebug.h independently

- Move Linux-specific functions to nvdebug_linux.h and .c - Workaround PDE_DATA() being pde_data() on Linux 5.17+
author: Joshua Bakita <jbakita@cs.unc.edu> 2024-04-11 13:42:54 -0400
committer: Joshua Bakita <jbakita@cs.unc.edu> 2024-04-11 13:42:54 -0400
commit: 7cfa24cebeaf144b446d07e15fb25e78bb14841e (patch)
tree: 639db3695c54a1e50b3f10e1abb2bfaf5b8dd906
parent: a8fd5a8dee066d0008e7667b0c9e6a60cd5f3a2e (diff)
8 files changed, 140 insertions, 108 deletions
diff --git a/Makefile b/Makefile
index c2d2e96..972bc0b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 obj-m += nvdebug.o
 nvdebug-objs = runlist_procfs.o device_info_procfs.o runlist.o mmu.o \
-               nvdebug_entry.o bus.o copy_topology_procfs.o
+               nvdebug_entry.o bus.o nvdebug_linux.o copy_topology_procfs.o
 KBUILD_CFLAGS += -DGIT_HASH=\"$(shell git --git-dir=$(PWD)/.git rev-parse --short HEAD)\"
 # -mfentry above if not building due to mcount missing
diff --git a/copy_topology_procfs.c b/copy_topology_procfs.c
index cfedce7..a1aa0ad 100644
--- a/copy_topology_procfs.c
+++ b/copy_topology_procfs.c
@@ -1,6 +1,6 @@
 // Copyright 2024 Joshua Bakita
-#include "nvdebug.h"
+#include "nvdebug_linux.h"
 // Maximum number of LCEs that we will print
 #define MAX_LCES 32
diff --git a/device_info_procfs.c b/device_info_procfs.c
index 168905f..8fe9709 100644
--- a/device_info_procfs.c
+++ b/device_info_procfs.c
@@ -1,4 +1,4 @@
-#include "nvdebug.h"
+#include "nvdebug_linux.h"
 #include <linux/seq_file.h> // For seq_* functions and types
 #include <linux/uaccess.h> // For copy_to_user()
@@ -16,7 +16,7 @@ static ssize_t nvdebug_reg32_read(struct file *f, char __user *buf, size_t size,
        if (size < 16 || *off != 0)
                return 0;
        // 32 bit register will always take less than 16 characters to print
-        chars_written = scnprintf(out, 16, "%#0x\n", nvdebug_readl(g, (uintptr_t)PDE_DATA(file_inode(f))));
+        chars_written = scnprintf(out, 16, "%#0x\n", nvdebug_readl(g, (uintptr_t)pde_data(file_inode(f))));
        if (copy_to_user(buf, out, chars_written))
                printk(KERN_WARNING "Unable to copy all data for %s\n", file_dentry(f)->d_name.name);
        *off += chars_written;
@@ -30,7 +30,7 @@ static ssize_t nvdebug_reg_range_read(struct file *f, char __user *buf, size_t s
        struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)];
        // See comment in nvdebug_entry.c to understand `union reg_range`
        union reg_range range;
-        range.raw = (uintptr_t)PDE_DATA(file_inode(f));
+        range.raw = (uintptr_t)pde_data(file_inode(f));
        // "0x" + up to 32-bit register as hex + "\n\0" is at most 12 characters
        if (size < 12 || *off != 0)
@@ -55,7 +55,7 @@ struct file_operations nvdebug_read_reg32_file_ops = {
 };
 // Generic mechanism used for printing a subset of bits from a register
-// Please store a `union reg_range` rather than a `uintptr_t` in the PDE_DATA
+// Please store a `union reg_range` rather than a `uintptr_t` in the pde_data
 struct file_operations nvdebug_read_reg_range_file_ops = {
        .read = nvdebug_reg_range_read,
        .llseek = default_llseek,
diff --git a/nvdebug.h b/nvdebug.h
index eff1470..ff35f70 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -7,13 +7,11 @@
 * - Detailed GPU information (PTOP, FUSE, and CE)
 * - PRAMIN, BAR1/2, and page table status
 * - Helper functions for nvdebug
+ *
+ * This function should not depend on any Linux-internal headers, and may be
+ * included outside of nvdebug.
 */
+#include <linux/types.h>
-#include <linux/device.h>   // For dev_get_drvdata()
-#include <linux/seq_file.h> // For struct seq_file
-#include <linux/proc_fs.h>  // For PDE_DATA() macro
-#include <linux/version.h>  // For KERNEL_VERSION and LINUX_VERSION_CODE
-#include <asm/io.h>
 // Fully defined in include/nvgpu/gk20a.h. We only pass around pointers to
 // this, so declare as incomplete type to avoid pulling in the nvgpu headers.
@@ -1147,8 +1145,11 @@ typedef union {
 } page_tbl_entry_v0_t;
 */
-// TODO(jbakita): Maybe put the above GPU types in a different file.
+/* Begin nvdebug types and functions */
+// Vendor ID for PCI devices manufactured by NVIDIA
 #define NV_PCI_VENDOR 0x10de
 struct nvdebug_state {
        // Pointer to the mapped base address of the GPU control registers (obtained
@@ -1195,7 +1196,6 @@ struct nvdebug_state {
                        const struct gk110_runlist_ ## _ENTRY_TYPE *entry = (struct gk110_runlist_ ## _ENTRY_TYPE*)raw; \
                        return entry->prop; \
                } else { \
-                        printk(KERN_WARNING "[nvdebug] " #prop " unavailable on GPU ID %x, which is older than Kepler.\n", g->chip_id); \
                        return (type)0; \
                } \
        }
@@ -1250,96 +1250,18 @@ uint64_t search_v1_page_directory(
        struct nvdebug_state *g,
        page_dir_config_t pd_config,
        uint64_t addr_to_find);
-static inline struct gk20a *get_gk20a(struct device *dev) {
-        // XXX: Only works because gk20a* is the first member of gk20a_platform
-        return *((struct gk20a**)dev_get_drvdata(dev));
-}
-// We us the data field of the proc_dir_entry ("PDE" in this function) to store
-// our index into the g_nvdebug_state array
-static inline int seq2gpuidx(struct seq_file *s) {
-        const struct file *f = s->file;
-        return (uintptr_t)PDE_DATA(file_inode(f));
-}
-static inline int file2gpuidx(const struct file *f) {
-        return (uintptr_t)PDE_DATA(file_inode(f));
-}
-static inline int file2parentgpuidx(const struct file *f) {
-        // Should be safe to call on ProcFS entries, as our parent should (?)
-        // still exist if we're called. If not, there are worse races in this
-        // module.
-        return (uintptr_t)PDE_DATA(file_dentry(f)->d_parent->d_inode);
-}
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0)
-// Commit 643eb158a3 in nvgpu moved the mapped registers to the second entry
-// of the gk20a struct (after a function pointer). This change was made as L4T
-// was upgraded from Linux 4.9 to 5.10 (r32 -> r34+)
-// Note that this is wrong if nvgpu was built without CONFIG_NVGPU_NON_FUSA
-// i.e. if FUSA was enabled, this is wrong.
-#define gk20a_regs(gk20a) (*(void**)((void*)gk20a + sizeof(void(*)(void))))
-#else
-#include <os/linux/os_linux.h>  // For struct nvgpu_os_linux, which holds regs
-#define gk20a_regs(gk20a) (container_of(gk20a, struct nvgpu_os_linux, g)->regs)
-#endif
-// Similar to nvgpu_readl()
-// (except we don't try to resolve situations where regs is NULL)
-static inline u32 nvdebug_readl(struct nvdebug_state *s, u32 r) {
-        u32 ret;
-        if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) {
-                printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read; registers unavailable. Is GPU on?\n");
-                return -1;
-        }
-        ret = readl(s->regs + r);
-        // It seems like the GPU returns this as a flag value for bad addresses
-        if (ret == 0xbadf5040) {
-                printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read from register offset %#x; bad data\n", r);
-                return -1;
-        }
-        return ret;
-}
-// quadword version of nvdebug_readl()
-static inline u64 nvdebug_readq(struct nvdebug_state *s, u32 r) {
-        u64 ret;
-        if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) {
-                printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read; registers unavailable. Is GPU on?\n");
-                return -1;
-        }
-        // readq seems to always return the uppermost 32 bits as 0, so workaround with readl
-        ret = readl(s->regs + r);
-        ret |= ((u64)readl(s->regs + r + 4)) << 32;
-        // It seems like the GPU returns this as a flag value for bad addresses
-        if ((ret & 0xffffffffull) == 0xbadf5040ull) {
-                printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read from register offset %#x; bad data\n", r);
-                return -1;
-        }
-        return ret;
-}
-// Similar to nvgpu_writel()
-static inline void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) {
-        if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) {
-                printk(KERN_ERR "[nvdebug] nvdebug_writel: Unable to write; registers unavailable. Is GPU on?\n");
-                return;
-        }
-        writel_relaxed(v, s->regs + r);
-        wmb();
-}
-// quadword version of nvdebug_writel()
-// XXX: This probably doesn't work XXX: Untested
-static inline void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) {
-        if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) {
-                printk(KERN_ERR "[nvdebug] nvdebug_writeq: Unable to write; registers unavailable. Is GPU on?\n");
-                return;
-        }
-        writeq_relaxed(v, s->regs + r);
-        wmb();
-}
 // Defined in bus.c
 int addr_to_pramin_mut(struct nvdebug_state *g, uint64_t addr, enum INST_TARGET target);
 int get_bar2_pdb(struct nvdebug_state *g, page_dir_config_t* pd);
+// Some portions of nvdebug can be included from kernel- or user-space (just
+// this file at present). In order for these compiled object files to be
+// usable in either setting, the appropriate version of the following functions
+// must be selected at link-time. Unfortunately, this precludes inlining (as
+// the implementation of an inline function must be known at compile time)
+// Implementations of these functions are provided for kernel-space by
+// nvdebug_linux.c.
+uint32_t nvdebug_readl(struct nvdebug_state *s, uint32_t r);
+uint64_t nvdebug_readq(struct nvdebug_state *s, uint32_t r);
+void nvdebug_writel(struct nvdebug_state *s, uint32_t r, uint32_t v);
+void nvdebug_writeq(struct nvdebug_state *s, uint32_t r, uint64_t v);
diff --git a/nvdebug_entry.c b/nvdebug_entry.c
index 68e4d71..0caa289 100644
--- a/nvdebug_entry.c
+++ b/nvdebug_entry.c
@@ -9,7 +9,7 @@
 #include <linux/pci.h>  // For PCI device scanning
 #include <linux/proc_fs.h>  // So we can set up entries in /proc
-#include "nvdebug.h"
+#include "nvdebug_linux.h"
 #include "stubs.h"
 // Enable to intercept and log GPU interrupts. Historically used to benchmark
@@ -93,7 +93,7 @@ int probe_and_cache_devices(void) {
        // Search for GP10B (Jetson TX2)
        while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gp10b")))
                dev = temp_dev;
-        // Search for GM10A (Jetson TX1)
+        // Search for GM20B (Jetson TX1)
        while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "57000000.gpu")))
                dev = temp_dev;
        // TODO: Support other platform bus devices (gk20a - TK1)
@@ -173,7 +173,7 @@ int create_runlist_files(int device_id, struct proc_dir_entry *dir) {
                        max_rl_id = info.runlist_enum;
        }
        // Create files to read each runlist. The read handling code looks at the
-        // PDE_DATA associated with the file to determine what the runlist ID is.
+        // `pde_data` associated with the file to determine what the runlist ID is.
        for (rl_id = 0; rl_id <= max_rl_id; rl_id++) {
                snprintf(runlist_name, 12, "runlist%d", rl_id);
                rl_entry = proc_create_data(
diff --git a/nvdebug_linux.c b/nvdebug_linux.c
new file mode 100644
index 0000000..1d76bc9
--- /dev/null
+++ b/nvdebug_linux.c
@@ -0,0 +1,61 @@
+/* Copyright 2024 Joshua Bakita
+ * Implementation of Kernel-specific function implementations
+ */
+#include "nvdebug_linux.h"
+#include <asm/io.h> // For read[l,q] and write[l,q]
+// Similar to nvgpu_readl()
+// (except we don't try to resolve situations where regs is NULL)
+u32 nvdebug_readl(struct nvdebug_state *s, u32 r) {
+        u32 ret;
+        if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) {
+                printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read; registers unavailable. Is GPU on?\n");
+                return -1;
+        }
+        ret = readl(s->regs + r);
+        // It seems like the GPU returns this as a flag value for bad addresses
+        if (ret == 0xbadf5040) {
+                printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read from register offset %#x; bad data\n", r);
+                return -1;
+        }
+        return ret;
+}
+// quadword version of nvdebug_readl()
+u64 nvdebug_readq(struct nvdebug_state *s, u32 r) {
+        u64 ret;
+        if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) {
+                printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read; registers unavailable. Is GPU on?\n");
+                return -1;
+        }
+        // readq seems to always return the uppermost 32 bits as 0, so workaround with readl
+        ret = readl(s->regs + r);
+        ret |= ((u64)readl(s->regs + r + 4)) << 32;
+        // It seems like the GPU returns this as a flag value for bad addresses
+        if ((ret & 0xffffffffull) == 0xbadf5040ull) {
+                printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read from register offset %#x; bad data\n", r);
+                return -1;
+        }
+        return ret;
+}
+// Similar to nvgpu_writel()
+void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) {
+        if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) {
+                printk(KERN_ERR "[nvdebug] nvdebug_writel: Unable to write; registers unavailable. Is GPU on?\n");
+                return;
+        }
+        writel_relaxed(v, s->regs + r);
+        wmb();
+}
+// quadword version of nvdebug_writel()
+// XXX: This probably doesn't work XXX: Untested
+void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) {
+        if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) {
+                printk(KERN_ERR "[nvdebug] nvdebug_writeq: Unable to write; registers unavailable. Is GPU on?\n");
+                return;
+        }
+        writeq_relaxed(v, s->regs + r);
+        wmb();
+}
diff --git a/nvdebug_linux.h b/nvdebug_linux.h
new file mode 100644
index 0000000..022d1cf
--- /dev/null
+++ b/nvdebug_linux.h
@@ -0,0 +1,49 @@
+/* Copyright 2024 Joshua Bakita
+ * SPDX-License-Identifier: MIT
+ *
+ * Helpers which are kernel-specific
+ */
+#include "nvdebug.h"
+#include <linux/device.h>   // For dev_get_drvdata()
+#include <linux/seq_file.h> // For struct seq_file
+#include <linux/proc_fs.h>  // For pde_data() macro
+#include <linux/version.h>  // For KERNEL_VERSION and LINUX_VERSION_CODE
+static inline struct gk20a *get_gk20a(struct device *dev) {
+        // Only works because gk20a* is the first member of gk20a_platform
+        return *((struct gk20a**)dev_get_drvdata(dev));
+}
+// PDE_DATA was Renamed to pde_data in Linux 5.17 to deconflict with a driver
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5,17,0)
+#define pde_data PDE_DATA
+#endif
+// We us the data field of the proc_dir_entry ("PDE" in this function) to store
+// our index into the g_nvdebug_state array
+static inline int seq2gpuidx(struct seq_file *s) {
+        const struct file *f = s->file;
+        return (uintptr_t)pde_data(file_inode(f));
+}
+static inline int file2gpuidx(const struct file *f) {
+        return (uintptr_t)pde_data(file_inode(f));
+}
+static inline int file2parentgpuidx(const struct file *f) {
+        // Should be safe to call on ProcFS entries, as our parent should (?)
+        // still exist if we're called. If not, there are worse races in this
+        // module.
+        return (uintptr_t)pde_data(file_dentry(f)->d_parent->d_inode);
+}
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0)
+// Commit 643eb158a3 in nvgpu moved the mapped registers to the second entry
+// of the gk20a struct (after a function pointer). This change was made as L4T
+// was upgraded from Linux 4.9 to 5.10 (r32 -> r34+)
+// Note that this is wrong if nvgpu was built without CONFIG_NVGPU_NON_FUSA
+// i.e. if FUSA was enabled, this is wrong.
+#define gk20a_regs(gk20a) (*(void**)((void*)gk20a + sizeof(void(*)(void))))
+#else
+#include <os/linux/os_linux.h>  // For struct nvgpu_os_linux, which holds regs
+#define gk20a_regs(gk20a) (container_of(gk20a, struct nvgpu_os_linux, g)->regs)
+#endif
diff --git a/runlist_procfs.c b/runlist_procfs.c
index e50c34a..0087d90 100644
--- a/runlist_procfs.c
+++ b/runlist_procfs.c
@@ -1,7 +1,7 @@
 #include <linux/seq_file.h> // For seq_* functions and types
 #include <linux/version.h>  // Macros to detect kernel version
-#include "nvdebug.h"
+#include "nvdebug_linux.h"
 #define RUNLIST_PROCFS_NAME "runlist"
 #define DETAILED_CHANNEL_INFO
author	Joshua Bakita <jbakita@cs.unc.edu>	2024-04-11 13:42:54 -0400
committer	Joshua Bakita <jbakita@cs.unc.edu>	2024-04-11 13:42:54 -0400
commit	7cfa24cebeaf144b446d07e15fb25e78bb14841e (patch)
tree	639db3695c54a1e50b3f10e1abb2bfaf5b8dd906
parent	a8fd5a8dee066d0008e7667b0c9e6a60cd5f3a2e (diff)

diff --git a/Makefile b/Makefile index c2d2e96..972bc0b 100644 --- a/Makefile +++ b/Makefile
@@ -1,6 +1,6 @@
1	obj-m += nvdebug.o	1	obj-m += nvdebug.o
2	nvdebug-objs = runlist_procfs.o device_info_procfs.o runlist.o mmu.o \	2	nvdebug-objs = runlist_procfs.o device_info_procfs.o runlist.o mmu.o \
3	nvdebug_entry.o bus.o copy_topology_procfs.o	3	nvdebug_entry.o bus.o nvdebug_linux.o copy_topology_procfs.o
4	KBUILD_CFLAGS += -DGIT_HASH=\"$(shell git --git-dir=$(PWD)/.git rev-parse --short HEAD)\"	4	KBUILD_CFLAGS += -DGIT_HASH=\"$(shell git --git-dir=$(PWD)/.git rev-parse --short HEAD)\"
5	# -mfentry above if not building due to mcount missing	5	# -mfentry above if not building due to mcount missing
6		6


diff --git a/copy_topology_procfs.c b/copy_topology_procfs.c index cfedce7..a1aa0ad 100644 --- a/copy_topology_procfs.c +++ b/copy_topology_procfs.c
@@ -1,6 +1,6 @@
1	// Copyright 2024 Joshua Bakita	1	// Copyright 2024 Joshua Bakita
2		2
3	#include "nvdebug.h"	3	#include "nvdebug_linux.h"
4		4
5	// Maximum number of LCEs that we will print	5	// Maximum number of LCEs that we will print
6	#define MAX_LCES 32	6	#define MAX_LCES 32


diff --git a/device_info_procfs.c b/device_info_procfs.c index 168905f..8fe9709 100644 --- a/device_info_procfs.c +++ b/device_info_procfs.c
@@ -1,4 +1,4 @@
1	#include "nvdebug.h"	1	#include "nvdebug_linux.h"
2	#include <linux/seq_file.h> // For seq_* functions and types	2	#include <linux/seq_file.h> // For seq_* functions and types
3	#include <linux/uaccess.h> // For copy_to_user()	3	#include <linux/uaccess.h> // For copy_to_user()
4		4
@@ -16,7 +16,7 @@ static ssize_t nvdebug_reg32_read(struct file f, char __user buf, size_t size,
16	if (size < 16 \|\| *off != 0)	16	if (size < 16 \|\| *off != 0)
17	return 0;	17	return 0;
18	// 32 bit register will always take less than 16 characters to print	18	// 32 bit register will always take less than 16 characters to print
19	chars_written = scnprintf(out, 16, "%#0x\n", nvdebug_readl(g, (uintptr_t)PDE_DATA(file_inode(f))));	19	chars_written = scnprintf(out, 16, "%#0x\n", nvdebug_readl(g, (uintptr_t)pde_data(file_inode(f))));
20	if (copy_to_user(buf, out, chars_written))	20	if (copy_to_user(buf, out, chars_written))
21	printk(KERN_WARNING "Unable to copy all data for %s\n", file_dentry(f)->d_name.name);	21	printk(KERN_WARNING "Unable to copy all data for %s\n", file_dentry(f)->d_name.name);
22	*off += chars_written;	22	*off += chars_written;
@@ -30,7 +30,7 @@ static ssize_t nvdebug_reg_range_read(struct file f, char __user buf, size_t s
30	struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)];	30	struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)];
31	// See comment in nvdebug_entry.c to understand `union reg_range`	31	// See comment in nvdebug_entry.c to understand `union reg_range`
32	union reg_range range;	32	union reg_range range;
33	range.raw = (uintptr_t)PDE_DATA(file_inode(f));	33	range.raw = (uintptr_t)pde_data(file_inode(f));
34		34
35	// "0x" + up to 32-bit register as hex + "\n\0" is at most 12 characters	35	// "0x" + up to 32-bit register as hex + "\n\0" is at most 12 characters
36	if (size < 12 \|\| *off != 0)	36	if (size < 12 \|\| *off != 0)
@@ -55,7 +55,7 @@ struct file_operations nvdebug_read_reg32_file_ops = {
55	};	55	};
56		56
57	// Generic mechanism used for printing a subset of bits from a register	57	// Generic mechanism used for printing a subset of bits from a register
58	// Please store a `union reg_range` rather than a `uintptr_t` in the PDE_DATA	58	// Please store a `union reg_range` rather than a `uintptr_t` in the pde_data
59	struct file_operations nvdebug_read_reg_range_file_ops = {	59	struct file_operations nvdebug_read_reg_range_file_ops = {
60	.read = nvdebug_reg_range_read,	60	.read = nvdebug_reg_range_read,
61	.llseek = default_llseek,	61	.llseek = default_llseek,


diff --git a/nvdebug.h b/nvdebug.h index eff1470..ff35f70 100644 --- a/nvdebug.h +++ b/nvdebug.h
@@ -7,13 +7,11 @@
7	* - Detailed GPU information (PTOP, FUSE, and CE)	7	* - Detailed GPU information (PTOP, FUSE, and CE)
8	* - PRAMIN, BAR1/2, and page table status	8	* - PRAMIN, BAR1/2, and page table status
9	* - Helper functions for nvdebug	9	* - Helper functions for nvdebug
		10	*
		11	* This function should not depend on any Linux-internal headers, and may be
		12	* included outside of nvdebug.
10	*/	13	*/
11		14	#include <linux/types.h>
12	#include <linux/device.h> // For dev_get_drvdata()
13	#include <linux/seq_file.h> // For struct seq_file
14	#include <linux/proc_fs.h> // For PDE_DATA() macro
15	#include <linux/version.h> // For KERNEL_VERSION and LINUX_VERSION_CODE
16	#include <asm/io.h>
17		15
18	// Fully defined in include/nvgpu/gk20a.h. We only pass around pointers to	16	// Fully defined in include/nvgpu/gk20a.h. We only pass around pointers to
19	// this, so declare as incomplete type to avoid pulling in the nvgpu headers.	17	// this, so declare as incomplete type to avoid pulling in the nvgpu headers.
@@ -1147,8 +1145,11 @@ typedef union {
1147	} page_tbl_entry_v0_t;	1145	} page_tbl_entry_v0_t;
1148	*/	1146	*/
1149		1147
1150	// TODO(jbakita): Maybe put the above GPU types in a different file.
1151		1148
		1149
		1150	/* Begin nvdebug types and functions */
		1151
		1152	// Vendor ID for PCI devices manufactured by NVIDIA
1152	#define NV_PCI_VENDOR 0x10de	1153	#define NV_PCI_VENDOR 0x10de
1153	struct nvdebug_state {	1154	struct nvdebug_state {
1154	// Pointer to the mapped base address of the GPU control registers (obtained	1155	// Pointer to the mapped base address of the GPU control registers (obtained
@@ -1195,7 +1196,6 @@ struct nvdebug_state {
1195	const struct gk110_runlist_ ## _ENTRY_TYPE entry = (struct gk110_runlist_ ## _ENTRY_TYPE)raw; \	1196	const struct gk110_runlist_ ## _ENTRY_TYPE entry = (struct gk110_runlist_ ## _ENTRY_TYPE)raw; \
1196	return entry->prop; \	1197	return entry->prop; \
1197	} else { \	1198	} else { \
1198	printk(KERN_WARNING "[nvdebug] " #prop " unavailable on GPU ID %x, which is older than Kepler.\n", g->chip_id); \
1199	return (type)0; \	1199	return (type)0; \
1200	} \	1200	} \
1201	}	1201	}
@@ -1250,96 +1250,18 @@ uint64_t search_v1_page_directory(
1250	struct nvdebug_state *g,	1250	struct nvdebug_state *g,
1251	page_dir_config_t pd_config,	1251	page_dir_config_t pd_config,
1252	uint64_t addr_to_find);	1252	uint64_t addr_to_find);
1253
1254
1255	static inline struct gk20a get_gk20a(struct device dev) {
1256	// XXX: Only works because gk20a* is the first member of gk20a_platform
1257	return ((struct gk20a*)dev_get_drvdata(dev));
1258	}
1259
1260	// We us the data field of the proc_dir_entry ("PDE" in this function) to store
1261	// our index into the g_nvdebug_state array
1262	static inline int seq2gpuidx(struct seq_file *s) {
1263	const struct file *f = s->file;
1264	return (uintptr_t)PDE_DATA(file_inode(f));
1265	}
1266	static inline int file2gpuidx(const struct file *f) {
1267	return (uintptr_t)PDE_DATA(file_inode(f));
1268	}
1269	static inline int file2parentgpuidx(const struct file *f) {
1270	// Should be safe to call on ProcFS entries, as our parent should (?)
1271	// still exist if we're called. If not, there are worse races in this
1272	// module.
1273	return (uintptr_t)PDE_DATA(file_dentry(f)->d_parent->d_inode);
1274	}
1275
1276	#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0)
1277	// Commit 643eb158a3 in nvgpu moved the mapped registers to the second entry
1278	// of the gk20a struct (after a function pointer). This change was made as L4T
1279	// was upgraded from Linux 4.9 to 5.10 (r32 -> r34+)
1280	// Note that this is wrong if nvgpu was built without CONFIG_NVGPU_NON_FUSA
1281	// i.e. if FUSA was enabled, this is wrong.
1282	#define gk20a_regs(gk20a) ((void)((void)gk20a + sizeof(void(*)(void))))
1283	#else
1284	#include <os/linux/os_linux.h> // For struct nvgpu_os_linux, which holds regs
1285	#define gk20a_regs(gk20a) (container_of(gk20a, struct nvgpu_os_linux, g)->regs)
1286	#endif
1287
1288	// Similar to nvgpu_readl()
1289	// (except we don't try to resolve situations where regs is NULL)
1290	static inline u32 nvdebug_readl(struct nvdebug_state *s, u32 r) {
1291	u32 ret;
1292	if (unlikely(!s->regs \|\| (s->g && !gk20a_regs(s->g)))) {
1293	printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read; registers unavailable. Is GPU on?\n");
1294	return -1;
1295	}
1296	ret = readl(s->regs + r);
1297	// It seems like the GPU returns this as a flag value for bad addresses
1298	if (ret == 0xbadf5040) {
1299	printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read from register offset %#x; bad data\n", r);
1300	return -1;
1301	}
1302	return ret;
1303	}
1304
1305	// quadword version of nvdebug_readl()
1306	static inline u64 nvdebug_readq(struct nvdebug_state *s, u32 r) {
1307	u64 ret;
1308	if (unlikely(!s->regs \|\| (s->g && !gk20a_regs(s->g)))) {
1309	printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read; registers unavailable. Is GPU on?\n");
1310	return -1;
1311	}
1312	// readq seems to always return the uppermost 32 bits as 0, so workaround with readl
1313	ret = readl(s->regs + r);
1314	ret \|= ((u64)readl(s->regs + r + 4)) << 32;
1315	// It seems like the GPU returns this as a flag value for bad addresses
1316	if ((ret & 0xffffffffull) == 0xbadf5040ull) {
1317	printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read from register offset %#x; bad data\n", r);
1318	return -1;
1319	}
1320	return ret;
1321	}
1322
1323	// Similar to nvgpu_writel()
1324	static inline void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) {
1325	if (unlikely(!s->regs \|\| (s->g && !gk20a_regs(s->g)))) {
1326	printk(KERN_ERR "[nvdebug] nvdebug_writel: Unable to write; registers unavailable. Is GPU on?\n");
1327	return;
1328	}
1329	writel_relaxed(v, s->regs + r);
1330	wmb();
1331	}
1332
1333	// quadword version of nvdebug_writel()
1334	// XXX: This probably doesn't work XXX: Untested
1335	static inline void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) {
1336	if (unlikely(!s->regs \|\| (s->g && !gk20a_regs(s->g)))) {
1337	printk(KERN_ERR "[nvdebug] nvdebug_writeq: Unable to write; registers unavailable. Is GPU on?\n");
1338	return;
1339	}
1340	writeq_relaxed(v, s->regs + r);
1341	wmb();
1342	}
1343	// Defined in bus.c	1253	// Defined in bus.c
1344	int addr_to_pramin_mut(struct nvdebug_state *g, uint64_t addr, enum INST_TARGET target);	1254	int addr_to_pramin_mut(struct nvdebug_state *g, uint64_t addr, enum INST_TARGET target);
1345	int get_bar2_pdb(struct nvdebug_state g, page_dir_config_t pd);	1255	int get_bar2_pdb(struct nvdebug_state g, page_dir_config_t pd);
		1256
		1257	// Some portions of nvdebug can be included from kernel- or user-space (just
		1258	// this file at present). In order for these compiled object files to be
		1259	// usable in either setting, the appropriate version of the following functions
		1260	// must be selected at link-time. Unfortunately, this precludes inlining (as
		1261	// the implementation of an inline function must be known at compile time)
		1262	// Implementations of these functions are provided for kernel-space by
		1263	// nvdebug_linux.c.
		1264	uint32_t nvdebug_readl(struct nvdebug_state *s, uint32_t r);
		1265	uint64_t nvdebug_readq(struct nvdebug_state *s, uint32_t r);
		1266	void nvdebug_writel(struct nvdebug_state *s, uint32_t r, uint32_t v);
		1267	void nvdebug_writeq(struct nvdebug_state *s, uint32_t r, uint64_t v);


diff --git a/nvdebug_entry.c b/nvdebug_entry.c index 68e4d71..0caa289 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c
@@ -9,7 +9,7 @@
9	#include <linux/pci.h> // For PCI device scanning	9	#include <linux/pci.h> // For PCI device scanning
10	#include <linux/proc_fs.h> // So we can set up entries in /proc	10	#include <linux/proc_fs.h> // So we can set up entries in /proc
11		11
12	#include "nvdebug.h"	12	#include "nvdebug_linux.h"
13	#include "stubs.h"	13	#include "stubs.h"
14		14
15	// Enable to intercept and log GPU interrupts. Historically used to benchmark	15	// Enable to intercept and log GPU interrupts. Historically used to benchmark
@@ -93,7 +93,7 @@ int probe_and_cache_devices(void) {
93	// Search for GP10B (Jetson TX2)	93	// Search for GP10B (Jetson TX2)
94	while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gp10b")))	94	while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gp10b")))
95	dev = temp_dev;	95	dev = temp_dev;
96	// Search for GM10A (Jetson TX1)	96	// Search for GM20B (Jetson TX1)
97	while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "57000000.gpu")))	97	while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "57000000.gpu")))
98	dev = temp_dev;	98	dev = temp_dev;
99	// TODO: Support other platform bus devices (gk20a - TK1)	99	// TODO: Support other platform bus devices (gk20a - TK1)
@@ -173,7 +173,7 @@ int create_runlist_files(int device_id, struct proc_dir_entry *dir) {
173	max_rl_id = info.runlist_enum;	173	max_rl_id = info.runlist_enum;
174	}	174	}
175	// Create files to read each runlist. The read handling code looks at the	175	// Create files to read each runlist. The read handling code looks at the
176	// PDE_DATA associated with the file to determine what the runlist ID is.	176	// `pde_data` associated with the file to determine what the runlist ID is.
177	for (rl_id = 0; rl_id <= max_rl_id; rl_id++) {	177	for (rl_id = 0; rl_id <= max_rl_id; rl_id++) {
178	snprintf(runlist_name, 12, "runlist%d", rl_id);	178	snprintf(runlist_name, 12, "runlist%d", rl_id);
179	rl_entry = proc_create_data(	179	rl_entry = proc_create_data(


diff --git a/nvdebug_linux.c b/nvdebug_linux.c new file mode 100644 index 0000000..1d76bc9 --- /dev/null +++ b/nvdebug_linux.c
@@ -0,0 +1,61 @@
		1	/* Copyright 2024 Joshua Bakita
		2	* Implementation of Kernel-specific function implementations
		3	*/
		4	#include "nvdebug_linux.h"
		5	#include <asm/io.h> // For read[l,q] and write[l,q]
		6
		7	// Similar to nvgpu_readl()
		8	// (except we don't try to resolve situations where regs is NULL)
		9	u32 nvdebug_readl(struct nvdebug_state *s, u32 r) {
		10	u32 ret;
		11	if (unlikely(!s->regs \|\| (s->g && !gk20a_regs(s->g)))) {
		12	printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read; registers unavailable. Is GPU on?\n");
		13	return -1;
		14	}
		15	ret = readl(s->regs + r);
		16	// It seems like the GPU returns this as a flag value for bad addresses
		17	if (ret == 0xbadf5040) {
		18	printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read from register offset %#x; bad data\n", r);
		19	return -1;
		20	}
		21	return ret;
		22	}
		23
		24	// quadword version of nvdebug_readl()
		25	u64 nvdebug_readq(struct nvdebug_state *s, u32 r) {
		26	u64 ret;
		27	if (unlikely(!s->regs \|\| (s->g && !gk20a_regs(s->g)))) {
		28	printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read; registers unavailable. Is GPU on?\n");
		29	return -1;
		30	}
		31	// readq seems to always return the uppermost 32 bits as 0, so workaround with readl
		32	ret = readl(s->regs + r);
		33	ret \|= ((u64)readl(s->regs + r + 4)) << 32;
		34	// It seems like the GPU returns this as a flag value for bad addresses
		35	if ((ret & 0xffffffffull) == 0xbadf5040ull) {
		36	printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read from register offset %#x; bad data\n", r);
		37	return -1;
		38	}
		39	return ret;
		40	}
		41
		42	// Similar to nvgpu_writel()
		43	void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) {
		44	if (unlikely(!s->regs \|\| (s->g && !gk20a_regs(s->g)))) {
		45	printk(KERN_ERR "[nvdebug] nvdebug_writel: Unable to write; registers unavailable. Is GPU on?\n");
		46	return;
		47	}
		48	writel_relaxed(v, s->regs + r);
		49	wmb();
		50	}
		51
		52	// quadword version of nvdebug_writel()
		53	// XXX: This probably doesn't work XXX: Untested
		54	void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) {
		55	if (unlikely(!s->regs \|\| (s->g && !gk20a_regs(s->g)))) {
		56	printk(KERN_ERR "[nvdebug] nvdebug_writeq: Unable to write; registers unavailable. Is GPU on?\n");
		57	return;
		58	}
		59	writeq_relaxed(v, s->regs + r);
		60	wmb();
		61	}


diff --git a/nvdebug_linux.h b/nvdebug_linux.h new file mode 100644 index 0000000..022d1cf --- /dev/null +++ b/nvdebug_linux.h
@@ -0,0 +1,49 @@
		1	/* Copyright 2024 Joshua Bakita
		2	* SPDX-License-Identifier: MIT
		3	*
		4	* Helpers which are kernel-specific
		5	*/
		6	#include "nvdebug.h"
		7
		8	#include <linux/device.h> // For dev_get_drvdata()
		9	#include <linux/seq_file.h> // For struct seq_file
		10	#include <linux/proc_fs.h> // For pde_data() macro
		11	#include <linux/version.h> // For KERNEL_VERSION and LINUX_VERSION_CODE
		12
		13	static inline struct gk20a get_gk20a(struct device dev) {
		14	// Only works because gk20a* is the first member of gk20a_platform
		15	return ((struct gk20a*)dev_get_drvdata(dev));
		16	}
		17
		18	// PDE_DATA was Renamed to pde_data in Linux 5.17 to deconflict with a driver
		19	#if LINUX_VERSION_CODE < KERNEL_VERSION(5,17,0)
		20	#define pde_data PDE_DATA
		21	#endif
		22
		23	// We us the data field of the proc_dir_entry ("PDE" in this function) to store
		24	// our index into the g_nvdebug_state array
		25	static inline int seq2gpuidx(struct seq_file *s) {
		26	const struct file *f = s->file;
		27	return (uintptr_t)pde_data(file_inode(f));
		28	}
		29	static inline int file2gpuidx(const struct file *f) {
		30	return (uintptr_t)pde_data(file_inode(f));
		31	}
		32	static inline int file2parentgpuidx(const struct file *f) {
		33	// Should be safe to call on ProcFS entries, as our parent should (?)
		34	// still exist if we're called. If not, there are worse races in this
		35	// module.
		36	return (uintptr_t)pde_data(file_dentry(f)->d_parent->d_inode);
		37	}
		38
		39	#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0)
		40	// Commit 643eb158a3 in nvgpu moved the mapped registers to the second entry
		41	// of the gk20a struct (after a function pointer). This change was made as L4T
		42	// was upgraded from Linux 4.9 to 5.10 (r32 -> r34+)
		43	// Note that this is wrong if nvgpu was built without CONFIG_NVGPU_NON_FUSA
		44	// i.e. if FUSA was enabled, this is wrong.
		45	#define gk20a_regs(gk20a) ((void)((void)gk20a + sizeof(void(*)(void))))
		46	#else
		47	#include <os/linux/os_linux.h> // For struct nvgpu_os_linux, which holds regs
		48	#define gk20a_regs(gk20a) (container_of(gk20a, struct nvgpu_os_linux, g)->regs)
		49	#endif


diff --git a/runlist_procfs.c b/runlist_procfs.c index e50c34a..0087d90 100644 --- a/runlist_procfs.c +++ b/runlist_procfs.c
@@ -1,7 +1,7 @@
1	#include <linux/seq_file.h> // For seq_* functions and types	1	#include <linux/seq_file.h> // For seq_* functions and types
2	#include <linux/version.h> // Macros to detect kernel version	2	#include <linux/version.h> // Macros to detect kernel version
3		3
4	#include "nvdebug.h"	4	#include "nvdebug_linux.h"
5		5
6	#define RUNLIST_PROCFS_NAME "runlist"	6	#define RUNLIST_PROCFS_NAME "runlist"
7	#define DETAILED_CHANNEL_INFO	7	#define DETAILED_CHANNEL_INFO