From a3fe3788535dd099d15e0ddca455be58e1d63156 Mon Sep 17 00:00:00 2001 From: Joshua Bakita Date: Thu, 29 Jun 2023 17:09:54 -0400 Subject: Hacky support for Linux 5.6+ and the Jetson AGX Orin Works around change in parameters to proc initialization functions via a hacky function which rewrites the layout. This also required making all the struct file_operations writable. Also start reducing dependency on nvgpu headers. Known issues: - Incorrect message printed in log after module is loaded. Unclear if this is because the register detection logic is broken, or if the layout of the data at NV_MC_BOOT_0 has changed. - Not tested --- device_info_procfs.c | 4 +-- include/nvgpu/nvlink.h | 2 +- nvdebug.h | 14 ++++++++++- nvdebug_entry.c | 68 ++++++++++++++++++++++++++++++++------------------ runlist_procfs.c | 10 ++++---- 5 files changed, 65 insertions(+), 33 deletions(-) diff --git a/device_info_procfs.c b/device_info_procfs.c index cd6c53c..c96007a 100644 --- a/device_info_procfs.c +++ b/device_info_procfs.c @@ -22,7 +22,7 @@ static ssize_t nvdebug_reg32_read(struct file *f, char __user *buf, size_t size, *off += chars_written; return chars_written; } -const struct file_operations nvdebug_read_reg32_file_ops = { +struct file_operations nvdebug_read_reg32_file_ops = { .read = nvdebug_reg32_read, }; @@ -118,7 +118,7 @@ static int device_info_file_open(struct inode *inode, struct file *f) { return seq_open(f, &device_info_file_seq_ops); } -const struct file_operations device_info_file_ops = { +struct file_operations device_info_file_ops = { .open = device_info_file_open, .read = seq_read, .llseek = seq_lseek, diff --git a/include/nvgpu/nvlink.h b/include/nvgpu/nvlink.h index a74111c..26c83f1 100644 --- a/include/nvgpu/nvlink.h +++ b/include/nvgpu/nvlink.h @@ -26,7 +26,7 @@ #include #ifdef __KERNEL__ -#include +//#include #elif defined(__NVGPU_POSIX__) #include #else diff --git a/nvdebug.h b/nvdebug.h index 1882756..968a60b 100644 --- a/nvdebug.h +++ b/nvdebug.h @@ -4,8 +4,10 @@ // TODO(jbakita): Don't depend on these. #include // For struct gk20a -#include // For struct nvgpu_os_linux #include // For PDE_DATA() macro +#include // For dev_get_drvdata() +#include // For KERNEL_VERSION and LINUX_VERSION_CODE +#include /* Runlist Channel A timeslice group (TSG) is composed of channels. Each channel is a FIFO queue @@ -943,7 +945,17 @@ static inline int file2parentgpuidx(const struct file *f) { return (uintptr_t)PDE_DATA(file_dentry(f)->d_parent->d_inode); } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) +// Commit 643eb158a3 in nvgpu moved the mapped registers to the second entry +// of the gk20a struct (after a function pointer). This change was made as L4T +// was upgraded from Linux 4.9 to 5.10 (r32 -> r34+) +// Note that this is wrong if nvgpu was built without CONFIG_NVGPU_NON_FUSA +// i.e. if FUSA was enabled, this is wrong. +#define gk20a_regs(gk20a) ((void*)gk20a + sizeof(void(*)(void))) +#else +#include // For struct nvgpu_os_linux, which holds regs #define gk20a_regs(gk20a) (container_of(gk20a, struct nvgpu_os_linux, g)->regs) +#endif // Similar to nvgpu_readl() // (except we don't try to resolve situations where regs is NULL) diff --git a/nvdebug_entry.c b/nvdebug_entry.c index fa35fb2..60fb7af 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c @@ -18,19 +18,38 @@ MODULE_LICENSE("Dual MIT/GPL"); MODULE_AUTHOR("Joshua Bakita"); MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs"); -extern const struct file_operations runlist_file_ops; -extern const struct file_operations preempt_tsg_file_ops; -extern const struct file_operations disable_channel_file_ops; -extern const struct file_operations enable_channel_file_ops; -extern const struct file_operations switch_to_tsg_file_ops; -extern const struct file_operations device_info_file_ops; -extern const struct file_operations nvdebug_read_reg32_file_ops; +extern struct file_operations runlist_file_ops; +extern struct file_operations preempt_tsg_file_ops; +extern struct file_operations disable_channel_file_ops; +extern struct file_operations enable_channel_file_ops; +extern struct file_operations switch_to_tsg_file_ops; +extern struct file_operations device_info_file_ops; +extern struct file_operations nvdebug_read_reg32_file_ops; // Bus types are global symbols in the kernel extern struct bus_type platform_bus_type; struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; unsigned int g_nvdebug_devices = 0; +// Starting in Kernel 5.6, proc_ops is required instead of file_operations +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) +// This rewrites the struct to the proc_ops layout on newer kernels +const struct proc_ops* compat_ops(const struct file_operations* ops) { + struct proc_ops new_ops; + new_ops.proc_open = ops->open; + new_ops.proc_read = ops->read; + new_ops.proc_write = ops->write; + new_ops.proc_lseek = ops->llseek; + new_ops.proc_release = ops->release; + memcpy((void*)ops, &new_ops, sizeof(new_ops)); + return (struct proc_ops*)ops; +} +#else +const struct file_operations* compat_ops(const struct file_operations* ops) { + return ops; +} +#endif + // TEMP irqreturn_t nvdebug_irq_tap(int irq_num, void * dev) { printk(KERN_INFO "[nvdebug] Interrupt tap triggered on IRQ %d.\n", irq_num); @@ -56,19 +75,20 @@ int probe_and_cache_device(void) { }; int i = 0; // Search the platform bus for the first device that matches our name - // Search for GV10B (Jetson Xavier) + // Search for GA10B (Jetson Orin) + while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.ga10b"))) + dev = temp_dev; + // Search for GV11B (Jetson Xavier) while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gv11b"))) dev = temp_dev; // Search for GP10B (Jetson TX2) while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gp10b"))) dev = temp_dev; - // TODO: Support other platform bus devices (gk20a, gm20b) + // TODO: Support other platform bus devices (gk20a - TK1, gm20b - TX1) if (dev) { - struct nvgpu_os_linux *l; mc_boot_0_t ids; g_nvdebug_state[i].g = get_gk20a(dev); - l = container_of(g_nvdebug_state[i].g, struct nvgpu_os_linux, g); - g_nvdebug_state[i].regs = l->regs; + g_nvdebug_state[i].regs = gk20a_regs(g_nvdebug_state[i].g); if (!g_nvdebug_state[i].regs) return -EADDRNOTAVAIL; ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0); @@ -139,7 +159,7 @@ int create_runlist_files(int device_id, struct proc_dir_entry *dir) { for (rl_id = 0; rl_id <= max_rl_id; rl_id++) { snprintf(runlist_name, 12, "runlist%d", rl_id); rl_entry = proc_create_data( - runlist_name, 0444, dir, &runlist_file_ops, + runlist_name, 0444, dir, compat_ops(&runlist_file_ops), (void*)(uintptr_t)rl_id); if (!rl_entry) return -ENOMEM; @@ -165,7 +185,7 @@ int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) { // If GPC is enabled, create an entry to read disabled TPCs mask snprintf(file_name, 20, "gpc%d_tpc_mask", i); gpc_tpc_mask_entry = proc_create_data( - file_name, 0444, dir, &nvdebug_read_reg32_file_ops, + file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), (void*)(uintptr_t)NV_FUSE_TPC_FOR_GPC(i)); if (!gpc_tpc_mask_entry) return -ENOMEM; @@ -197,46 +217,46 @@ int __init nvdebug_init(void) { tpc_masks_create_err = create_tpc_mask_files(device_id, dir); // Create file `/proc/gpu#/preempt_tsg`, world writable preempt_entry = proc_create_data( - "preempt_tsg", 0222, dir, &preempt_tsg_file_ops, + "preempt_tsg", 0222, dir, compat_ops(&preempt_tsg_file_ops), (void*)device_id); // Create file `/proc/gpu#/disable_channel`, world writable disable_channel_entry = proc_create_data( - "disable_channel", 0222, dir, &disable_channel_file_ops, + "disable_channel", 0222, dir, compat_ops(&disable_channel_file_ops), (void*)device_id); // Create file `/proc/gpu#/enable_channel`, world writable enable_channel_entry = proc_create_data( - "enable_channel", 0222, dir, &enable_channel_file_ops, + "enable_channel", 0222, dir, compat_ops(&enable_channel_file_ops), (void*)device_id); // Create file `/proc/gpu#/switch_to_tsg`, world writable switch_to_tsg_entry = proc_create_data( - "switch_to_tsg", 0222, dir, &switch_to_tsg_file_ops, + "switch_to_tsg", 0222, dir, compat_ops(&switch_to_tsg_file_ops), (void*)device_id); // Create file `/proc/gpu#/device_info`, world readable device_info_entry = proc_create_data( - "device_info", 0444, dir, &device_info_file_ops, + "device_info", 0444, dir, compat_ops(&device_info_file_ops), (void*)device_id); // Create file `/proc/gpu#/num_gpcs`, world readable num_gpcs_entry = proc_create_data( - "num_gpcs", 0444, dir, &nvdebug_read_reg32_file_ops, + "num_gpcs", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), (void*)NV_PTOP_SCAL_NUM_GPCS); // Create file `/proc/gpu#/num_tpc_per_gpc`, world readable num_gpcs_entry = proc_create_data( - "num_tpc_per_gpc", 0444, dir, &nvdebug_read_reg32_file_ops, + "num_tpc_per_gpc", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), (void*)NV_PTOP_SCAL_NUM_TPC_PER_GPC); // Create file `/proc/gpu#/num_ces`, world readable num_gpcs_entry = proc_create_data( - "num_ces", 0444, dir, &nvdebug_read_reg32_file_ops, + "num_ces", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), (void*)NV_PTOP_SCAL_NUM_CES); // Create file `/proc/gpu#/num_ces`, world readable num_gpcs_entry = proc_create_data( - "gpc_mask", 0444, dir, &nvdebug_read_reg32_file_ops, + "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), (void*)NV_FUSE_GPC); // In both nouveau and nvgpu, the PCE_MAP register is only available on Volta+ if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_VOLTA) { // TODO: Redo to num_pces // Create file `/proc/gpu#/pce_map`, world readable num_gpcs_entry = proc_create_data( - "pce_map", 0444, dir, &nvdebug_read_reg32_file_ops, + "pce_map", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), (void*)NV_CE_PCE_MAP); } // ProcFS entry creation only fails if out of memory diff --git a/runlist_procfs.c b/runlist_procfs.c index a6b0d94..a0e71b0 100644 --- a/runlist_procfs.c +++ b/runlist_procfs.c @@ -171,7 +171,7 @@ static int runlist_file_open(struct inode *inode, struct file *f) { return seq_open(f, &runlist_file_seq_ops); } -const struct file_operations runlist_file_ops = { +struct file_operations runlist_file_ops = { .open = runlist_file_open, .read = seq_read, .llseek = seq_lseek, @@ -199,7 +199,7 @@ ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer, return count; } -const struct file_operations preempt_tsg_file_ops = { +struct file_operations preempt_tsg_file_ops = { .write = preempt_tsg_file_write, }; @@ -236,7 +236,7 @@ ssize_t disable_channel_file_write(struct file *f, const char __user *buffer, return count; } -const struct file_operations disable_channel_file_ops = { +struct file_operations disable_channel_file_ops = { .write = disable_channel_file_write, }; @@ -262,7 +262,7 @@ ssize_t enable_channel_file_write(struct file *f, const char __user *buffer, return count; } -const struct file_operations enable_channel_file_ops = { +struct file_operations enable_channel_file_ops = { .write = enable_channel_file_write, }; @@ -317,6 +317,6 @@ ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer, return count; } -const struct file_operations switch_to_tsg_file_ops = { +struct file_operations switch_to_tsg_file_ops = { .write = switch_to_tsg_file_write, }; -- cgit v1.2.2