From 7cfa24cebeaf144b446d07e15fb25e78bb14841e Mon Sep 17 00:00:00 2001 From: Joshua Bakita Date: Thu, 11 Apr 2024 13:42:54 -0400 Subject: Linux 5.17+ support and allow including nvdebug.h independently - Move Linux-specific functions to nvdebug_linux.h and .c - Workaround PDE_DATA() being pde_data() on Linux 5.17+ --- Makefile | 2 +- copy_topology_procfs.c | 2 +- device_info_procfs.c | 8 ++-- nvdebug.h | 118 +++++++++---------------------------------------- nvdebug_entry.c | 6 +-- nvdebug_linux.c | 61 +++++++++++++++++++++++++ nvdebug_linux.h | 49 ++++++++++++++++++++ runlist_procfs.c | 2 +- 8 files changed, 140 insertions(+), 108 deletions(-) create mode 100644 nvdebug_linux.c create mode 100644 nvdebug_linux.h diff --git a/Makefile b/Makefile index c2d2e96..972bc0b 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ obj-m += nvdebug.o nvdebug-objs = runlist_procfs.o device_info_procfs.o runlist.o mmu.o \ - nvdebug_entry.o bus.o copy_topology_procfs.o + nvdebug_entry.o bus.o nvdebug_linux.o copy_topology_procfs.o KBUILD_CFLAGS += -DGIT_HASH=\"$(shell git --git-dir=$(PWD)/.git rev-parse --short HEAD)\" # -mfentry above if not building due to mcount missing diff --git a/copy_topology_procfs.c b/copy_topology_procfs.c index cfedce7..a1aa0ad 100644 --- a/copy_topology_procfs.c +++ b/copy_topology_procfs.c @@ -1,6 +1,6 @@ // Copyright 2024 Joshua Bakita -#include "nvdebug.h" +#include "nvdebug_linux.h" // Maximum number of LCEs that we will print #define MAX_LCES 32 diff --git a/device_info_procfs.c b/device_info_procfs.c index 168905f..8fe9709 100644 --- a/device_info_procfs.c +++ b/device_info_procfs.c @@ -1,4 +1,4 @@ -#include "nvdebug.h" +#include "nvdebug_linux.h" #include // For seq_* functions and types #include // For copy_to_user() @@ -16,7 +16,7 @@ static ssize_t nvdebug_reg32_read(struct file *f, char __user *buf, size_t size, if (size < 16 || *off != 0) return 0; // 32 bit register will always take less than 16 characters to print - chars_written = scnprintf(out, 16, "%#0x\n", nvdebug_readl(g, (uintptr_t)PDE_DATA(file_inode(f)))); + chars_written = scnprintf(out, 16, "%#0x\n", nvdebug_readl(g, (uintptr_t)pde_data(file_inode(f)))); if (copy_to_user(buf, out, chars_written)) printk(KERN_WARNING "Unable to copy all data for %s\n", file_dentry(f)->d_name.name); *off += chars_written; @@ -30,7 +30,7 @@ static ssize_t nvdebug_reg_range_read(struct file *f, char __user *buf, size_t s struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; // See comment in nvdebug_entry.c to understand `union reg_range` union reg_range range; - range.raw = (uintptr_t)PDE_DATA(file_inode(f)); + range.raw = (uintptr_t)pde_data(file_inode(f)); // "0x" + up to 32-bit register as hex + "\n\0" is at most 12 characters if (size < 12 || *off != 0) @@ -55,7 +55,7 @@ struct file_operations nvdebug_read_reg32_file_ops = { }; // Generic mechanism used for printing a subset of bits from a register -// Please store a `union reg_range` rather than a `uintptr_t` in the PDE_DATA +// Please store a `union reg_range` rather than a `uintptr_t` in the pde_data struct file_operations nvdebug_read_reg_range_file_ops = { .read = nvdebug_reg_range_read, .llseek = default_llseek, diff --git a/nvdebug.h b/nvdebug.h index eff1470..ff35f70 100644 --- a/nvdebug.h +++ b/nvdebug.h @@ -7,13 +7,11 @@ * - Detailed GPU information (PTOP, FUSE, and CE) * - PRAMIN, BAR1/2, and page table status * - Helper functions for nvdebug + * + * This function should not depend on any Linux-internal headers, and may be + * included outside of nvdebug. */ - -#include // For dev_get_drvdata() -#include // For struct seq_file -#include // For PDE_DATA() macro -#include // For KERNEL_VERSION and LINUX_VERSION_CODE -#include +#include // Fully defined in include/nvgpu/gk20a.h. We only pass around pointers to // this, so declare as incomplete type to avoid pulling in the nvgpu headers. @@ -1147,8 +1145,11 @@ typedef union { } page_tbl_entry_v0_t; */ -// TODO(jbakita): Maybe put the above GPU types in a different file. + +/* Begin nvdebug types and functions */ + +// Vendor ID for PCI devices manufactured by NVIDIA #define NV_PCI_VENDOR 0x10de struct nvdebug_state { // Pointer to the mapped base address of the GPU control registers (obtained @@ -1195,7 +1196,6 @@ struct nvdebug_state { const struct gk110_runlist_ ## _ENTRY_TYPE *entry = (struct gk110_runlist_ ## _ENTRY_TYPE*)raw; \ return entry->prop; \ } else { \ - printk(KERN_WARNING "[nvdebug] " #prop " unavailable on GPU ID %x, which is older than Kepler.\n", g->chip_id); \ return (type)0; \ } \ } @@ -1250,96 +1250,18 @@ uint64_t search_v1_page_directory( struct nvdebug_state *g, page_dir_config_t pd_config, uint64_t addr_to_find); - - -static inline struct gk20a *get_gk20a(struct device *dev) { - // XXX: Only works because gk20a* is the first member of gk20a_platform - return *((struct gk20a**)dev_get_drvdata(dev)); -} - -// We us the data field of the proc_dir_entry ("PDE" in this function) to store -// our index into the g_nvdebug_state array -static inline int seq2gpuidx(struct seq_file *s) { - const struct file *f = s->file; - return (uintptr_t)PDE_DATA(file_inode(f)); -} -static inline int file2gpuidx(const struct file *f) { - return (uintptr_t)PDE_DATA(file_inode(f)); -} -static inline int file2parentgpuidx(const struct file *f) { - // Should be safe to call on ProcFS entries, as our parent should (?) - // still exist if we're called. If not, there are worse races in this - // module. - return (uintptr_t)PDE_DATA(file_dentry(f)->d_parent->d_inode); -} - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) -// Commit 643eb158a3 in nvgpu moved the mapped registers to the second entry -// of the gk20a struct (after a function pointer). This change was made as L4T -// was upgraded from Linux 4.9 to 5.10 (r32 -> r34+) -// Note that this is wrong if nvgpu was built without CONFIG_NVGPU_NON_FUSA -// i.e. if FUSA was enabled, this is wrong. -#define gk20a_regs(gk20a) (*(void**)((void*)gk20a + sizeof(void(*)(void)))) -#else -#include // For struct nvgpu_os_linux, which holds regs -#define gk20a_regs(gk20a) (container_of(gk20a, struct nvgpu_os_linux, g)->regs) -#endif - -// Similar to nvgpu_readl() -// (except we don't try to resolve situations where regs is NULL) -static inline u32 nvdebug_readl(struct nvdebug_state *s, u32 r) { - u32 ret; - if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { - printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read; registers unavailable. Is GPU on?\n"); - return -1; - } - ret = readl(s->regs + r); - // It seems like the GPU returns this as a flag value for bad addresses - if (ret == 0xbadf5040) { - printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read from register offset %#x; bad data\n", r); - return -1; - } - return ret; -} - -// quadword version of nvdebug_readl() -static inline u64 nvdebug_readq(struct nvdebug_state *s, u32 r) { - u64 ret; - if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { - printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read; registers unavailable. Is GPU on?\n"); - return -1; - } - // readq seems to always return the uppermost 32 bits as 0, so workaround with readl - ret = readl(s->regs + r); - ret |= ((u64)readl(s->regs + r + 4)) << 32; - // It seems like the GPU returns this as a flag value for bad addresses - if ((ret & 0xffffffffull) == 0xbadf5040ull) { - printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read from register offset %#x; bad data\n", r); - return -1; - } - return ret; -} - -// Similar to nvgpu_writel() -static inline void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) { - if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { - printk(KERN_ERR "[nvdebug] nvdebug_writel: Unable to write; registers unavailable. Is GPU on?\n"); - return; - } - writel_relaxed(v, s->regs + r); - wmb(); -} - -// quadword version of nvdebug_writel() -// XXX: This probably doesn't work XXX: Untested -static inline void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) { - if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { - printk(KERN_ERR "[nvdebug] nvdebug_writeq: Unable to write; registers unavailable. Is GPU on?\n"); - return; - } - writeq_relaxed(v, s->regs + r); - wmb(); -} // Defined in bus.c int addr_to_pramin_mut(struct nvdebug_state *g, uint64_t addr, enum INST_TARGET target); int get_bar2_pdb(struct nvdebug_state *g, page_dir_config_t* pd); + +// Some portions of nvdebug can be included from kernel- or user-space (just +// this file at present). In order for these compiled object files to be +// usable in either setting, the appropriate version of the following functions +// must be selected at link-time. Unfortunately, this precludes inlining (as +// the implementation of an inline function must be known at compile time) +// Implementations of these functions are provided for kernel-space by +// nvdebug_linux.c. +uint32_t nvdebug_readl(struct nvdebug_state *s, uint32_t r); +uint64_t nvdebug_readq(struct nvdebug_state *s, uint32_t r); +void nvdebug_writel(struct nvdebug_state *s, uint32_t r, uint32_t v); +void nvdebug_writeq(struct nvdebug_state *s, uint32_t r, uint64_t v); diff --git a/nvdebug_entry.c b/nvdebug_entry.c index 68e4d71..0caa289 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c @@ -9,7 +9,7 @@ #include // For PCI device scanning #include // So we can set up entries in /proc -#include "nvdebug.h" +#include "nvdebug_linux.h" #include "stubs.h" // Enable to intercept and log GPU interrupts. Historically used to benchmark @@ -93,7 +93,7 @@ int probe_and_cache_devices(void) { // Search for GP10B (Jetson TX2) while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gp10b"))) dev = temp_dev; - // Search for GM10A (Jetson TX1) + // Search for GM20B (Jetson TX1) while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "57000000.gpu"))) dev = temp_dev; // TODO: Support other platform bus devices (gk20a - TK1) @@ -173,7 +173,7 @@ int create_runlist_files(int device_id, struct proc_dir_entry *dir) { max_rl_id = info.runlist_enum; } // Create files to read each runlist. The read handling code looks at the - // PDE_DATA associated with the file to determine what the runlist ID is. + // `pde_data` associated with the file to determine what the runlist ID is. for (rl_id = 0; rl_id <= max_rl_id; rl_id++) { snprintf(runlist_name, 12, "runlist%d", rl_id); rl_entry = proc_create_data( diff --git a/nvdebug_linux.c b/nvdebug_linux.c new file mode 100644 index 0000000..1d76bc9 --- /dev/null +++ b/nvdebug_linux.c @@ -0,0 +1,61 @@ +/* Copyright 2024 Joshua Bakita + * Implementation of Kernel-specific function implementations + */ +#include "nvdebug_linux.h" +#include // For read[l,q] and write[l,q] + +// Similar to nvgpu_readl() +// (except we don't try to resolve situations where regs is NULL) +u32 nvdebug_readl(struct nvdebug_state *s, u32 r) { + u32 ret; + if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { + printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read; registers unavailable. Is GPU on?\n"); + return -1; + } + ret = readl(s->regs + r); + // It seems like the GPU returns this as a flag value for bad addresses + if (ret == 0xbadf5040) { + printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read from register offset %#x; bad data\n", r); + return -1; + } + return ret; +} + +// quadword version of nvdebug_readl() +u64 nvdebug_readq(struct nvdebug_state *s, u32 r) { + u64 ret; + if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { + printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read; registers unavailable. Is GPU on?\n"); + return -1; + } + // readq seems to always return the uppermost 32 bits as 0, so workaround with readl + ret = readl(s->regs + r); + ret |= ((u64)readl(s->regs + r + 4)) << 32; + // It seems like the GPU returns this as a flag value for bad addresses + if ((ret & 0xffffffffull) == 0xbadf5040ull) { + printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read from register offset %#x; bad data\n", r); + return -1; + } + return ret; +} + +// Similar to nvgpu_writel() +void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) { + if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { + printk(KERN_ERR "[nvdebug] nvdebug_writel: Unable to write; registers unavailable. Is GPU on?\n"); + return; + } + writel_relaxed(v, s->regs + r); + wmb(); +} + +// quadword version of nvdebug_writel() +// XXX: This probably doesn't work XXX: Untested +void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) { + if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { + printk(KERN_ERR "[nvdebug] nvdebug_writeq: Unable to write; registers unavailable. Is GPU on?\n"); + return; + } + writeq_relaxed(v, s->regs + r); + wmb(); +} diff --git a/nvdebug_linux.h b/nvdebug_linux.h new file mode 100644 index 0000000..022d1cf --- /dev/null +++ b/nvdebug_linux.h @@ -0,0 +1,49 @@ +/* Copyright 2024 Joshua Bakita + * SPDX-License-Identifier: MIT + * + * Helpers which are kernel-specific + */ +#include "nvdebug.h" + +#include // For dev_get_drvdata() +#include // For struct seq_file +#include // For pde_data() macro +#include // For KERNEL_VERSION and LINUX_VERSION_CODE + +static inline struct gk20a *get_gk20a(struct device *dev) { + // Only works because gk20a* is the first member of gk20a_platform + return *((struct gk20a**)dev_get_drvdata(dev)); +} + +// PDE_DATA was Renamed to pde_data in Linux 5.17 to deconflict with a driver +#if LINUX_VERSION_CODE < KERNEL_VERSION(5,17,0) +#define pde_data PDE_DATA +#endif + +// We us the data field of the proc_dir_entry ("PDE" in this function) to store +// our index into the g_nvdebug_state array +static inline int seq2gpuidx(struct seq_file *s) { + const struct file *f = s->file; + return (uintptr_t)pde_data(file_inode(f)); +} +static inline int file2gpuidx(const struct file *f) { + return (uintptr_t)pde_data(file_inode(f)); +} +static inline int file2parentgpuidx(const struct file *f) { + // Should be safe to call on ProcFS entries, as our parent should (?) + // still exist if we're called. If not, there are worse races in this + // module. + return (uintptr_t)pde_data(file_dentry(f)->d_parent->d_inode); +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) +// Commit 643eb158a3 in nvgpu moved the mapped registers to the second entry +// of the gk20a struct (after a function pointer). This change was made as L4T +// was upgraded from Linux 4.9 to 5.10 (r32 -> r34+) +// Note that this is wrong if nvgpu was built without CONFIG_NVGPU_NON_FUSA +// i.e. if FUSA was enabled, this is wrong. +#define gk20a_regs(gk20a) (*(void**)((void*)gk20a + sizeof(void(*)(void)))) +#else +#include // For struct nvgpu_os_linux, which holds regs +#define gk20a_regs(gk20a) (container_of(gk20a, struct nvgpu_os_linux, g)->regs) +#endif diff --git a/runlist_procfs.c b/runlist_procfs.c index e50c34a..0087d90 100644 --- a/runlist_procfs.c +++ b/runlist_procfs.c @@ -1,7 +1,7 @@ #include // For seq_* functions and types #include // Macros to detect kernel version -#include "nvdebug.h" +#include "nvdebug_linux.h" #define RUNLIST_PROCFS_NAME "runlist" #define DETAILED_CHANNEL_INFO -- cgit v1.2.2