From 4768fe31f114c5ad788012db5518ce8e37f79c7a Mon Sep 17 00:00:00 2001 From: Joshua Bakita Date: Tue, 9 Apr 2024 13:07:19 -0400 Subject: Correctly handle startup errors and fix gpc*_mask APIs - Do not create gpc*_mask files on pre-Maxwell GPUs (tested unavailable on the K5000s) - Use correct register offsets for gpc*_mask files on Ampere+ GPUs - Document GPC and TPC count and fuse registers. - Correctly handle errors for creation of all ProcFS files - Remove unecessary error-handling temp variables in nvdebug_entry - Misc naming, comment, and layout cleanup --- nvdebug_entry.c | 150 ++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 92 insertions(+), 58 deletions(-) (limited to 'nvdebug_entry.c') diff --git a/nvdebug_entry.c b/nvdebug_entry.c index 7593a3a..0cf5344 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c @@ -12,7 +12,8 @@ #include "nvdebug.h" #include "stubs.h" -// Enable to intercept and log GPU interrupts +// Enable to intercept and log GPU interrupts. Historically used to benchmark +// interrupt latency. #define INTERRUPT_DEBUG 0 // MIT is GPL-compatible. We need to be GPL-compatible for symbols like @@ -31,14 +32,16 @@ extern struct file_operations copy_topology_file_ops; extern struct file_operations nvdebug_read_reg32_file_ops; extern struct file_operations nvdebug_read_reg_range_file_ops; -// Bus types are global symbols in the kernel -extern struct bus_type platform_bus_type; struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; unsigned int g_nvdebug_devices = 0; +// Bus types are global symbols in the kernel +extern struct bus_type platform_bus_type; -// Starting in Kernel 5.6, proc_ops is required instead of file_operations +// Starting in Kernel 5.6, proc_ops is required instead of file_operations. +// As file_operations is larger than proc_ops, we can overwrite the memory +// backing the file_operations struct to follow the proc_ops layout, and then +// cast on newer kernels. #if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) -// This rewrites the struct to the proc_ops layout on newer kernels const struct proc_ops* compat_ops(const struct file_operations* ops) { struct proc_ops new_ops = {}; new_ops.proc_open = ops->open; @@ -64,7 +67,7 @@ irqreturn_t nvdebug_irq_tap(int irq_num, void * dev) { // Find any and all NVIDIA GPUs in the system // Note: This function fails if any of them are in a bad state -int probe_and_cache_device(void) { +int probe_and_cache_devices(void) { // platform bus (SoC) iterators struct device *dev = NULL; struct device *temp_dev; @@ -143,13 +146,14 @@ int probe_and_cache_device(void) { #endif // INTERRUPT_DEBUG i++; } - // Return the number of devices we found + // Return the number of devices found if (i > 0) return i; return -ENODEV; } // Create files `/proc/gpu#/runlist#`, world readable +// Support: Fermi, Maxwell, Pascal, Volta, Turing int create_runlist_files(int device_id, struct proc_dir_entry *dir) { ptop_device_info_gk104_t info; struct proc_dir_entry *rl_entry; @@ -179,16 +183,24 @@ int create_runlist_files(int device_id, struct proc_dir_entry *dir) { return 0; } -// Create files /proc/gpu# -// TODO: Don't run this on unsupported GPUs +// Create files `/proc/gpu#/gpc#_tpc_mask`, world readable +// Support: Maxwell+ int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) { + struct nvdebug_state* g = &g_nvdebug_state[device_id]; char file_name[20]; int i; struct proc_dir_entry *gpc_tpc_mask_entry; - // Get a bitmask of which GPCs are disabled - uint32_t gpcs_mask = nvdebug_readl(&g_nvdebug_state[device_id], NV_FUSE_GPC); // Get maximum number of enabled GPCs for this chip uint32_t max_gpcs = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_SCAL_NUM_GPCS); + // Get a bitmask of which GPCs are disabled + uint32_t gpcs_mask; + if (g->chip_id < NV_CHIP_ID_AMPERE) + gpcs_mask = nvdebug_readl(g, NV_FUSE_GPC_GM107); + else + gpcs_mask = nvdebug_readl(g, NV_FUSE_GPC_GA100); + // Verify the reads succeeded + if (max_gpcs == -1 || gpcs_mask == -1) + return -EIO; // For each enabled GPC, expose a mask of disabled TPCs for (i = 0; i < max_gpcs; i++) { // Do nothing if GPC is disabled @@ -196,9 +208,14 @@ int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) { continue; // If GPC is enabled, create an entry to read disabled TPCs mask snprintf(file_name, 20, "gpc%d_tpc_mask", i); - gpc_tpc_mask_entry = proc_create_data( - file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), - (void*)(uintptr_t)NV_FUSE_TPC_FOR_GPC(i)); + if (g->chip_id < NV_CHIP_ID_AMPERE) + gpc_tpc_mask_entry = proc_create_data( + file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), + (void*)(uintptr_t)NV_FUSE_TPC_FOR_GPC_GM107(i)); + else + gpc_tpc_mask_entry = proc_create_data( + file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), + (void*)(uintptr_t)NV_FUSE_TPC_FOR_GPC_GA100(i)); if (!gpc_tpc_mask_entry) return -ENOMEM; } @@ -206,64 +223,84 @@ int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) { } int __init nvdebug_init(void) { - struct proc_dir_entry *dir, *preempt_entry, *disable_channel_entry, - *enable_channel_entry, *switch_to_tsg_entry, *device_info_entry, - *num_gpcs_entry; - int rl_create_err, tpc_masks_create_err; + struct proc_dir_entry *dir; + int err, res; // Check that an NVIDIA GPU is present and initialize g_nvdebug_state - int res = probe_and_cache_device(); - if (res < 0) + if ((res = probe_and_cache_devices()) < 0) return res; g_nvdebug_devices = res; // Create seperate ProcFS directories for each gpu while (res--) { char device_id_str[7]; - uintptr_t device_id = res; // This is uintptr as we abuse the *data field on proc_dir_entry to store the GPU id + // Create a wider copy of the GPU ID to allow us to abuse the *data + // field of proc_dir_entry to store the GPU ID. + uintptr_t device_id = res; // Create directory /proc/gpu# where # is the GPU number + // As ProcFS entry creation only fails if out of memory, we auto-skip + // to handling that on any error in creating ProcFS files. snprintf(device_id_str, 7, "gpu%ld", device_id); if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id))) goto out_nomem; // Create files `/proc/gpu#/runlist#`, world readable if (g_nvdebug_state[device_id].chip_id < NV_CHIP_ID_AMPERE) - create_runlist_files(device_id, dir); - // Create files `/proc/gpu#/gpc#_tpc_mask`, world readable - tpc_masks_create_err = create_tpc_mask_files(device_id, dir); + if ((err = create_runlist_files(device_id, dir))) + goto out_err; // Create file `/proc/gpu#/preempt_tsg`, world writable - preempt_entry = proc_create_data( - "preempt_tsg", 0222, dir, compat_ops(&preempt_tsg_file_ops), - (void*)device_id); + if (!proc_create_data( + "preempt_tsg", 0222, dir, compat_ops(&preempt_tsg_file_ops), + (void*)device_id)) + goto out_nomem; // Create file `/proc/gpu#/disable_channel`, world writable - disable_channel_entry = proc_create_data( - "disable_channel", 0222, dir, compat_ops(&disable_channel_file_ops), - (void*)device_id); + if (!proc_create_data( + "disable_channel", 0222, dir, compat_ops(&disable_channel_file_ops), + (void*)device_id)) + goto out_nomem; // Create file `/proc/gpu#/enable_channel`, world writable - enable_channel_entry = proc_create_data( - "enable_channel", 0222, dir, compat_ops(&enable_channel_file_ops), - (void*)device_id); + if (!proc_create_data( + "enable_channel", 0222, dir, compat_ops(&enable_channel_file_ops), + (void*)device_id)) + goto out_nomem; // Create file `/proc/gpu#/switch_to_tsg`, world writable - switch_to_tsg_entry = proc_create_data( - "switch_to_tsg", 0222, dir, compat_ops(&switch_to_tsg_file_ops), - (void*)device_id); + if (!proc_create_data( + "switch_to_tsg", 0222, dir, compat_ops(&switch_to_tsg_file_ops), + (void*)device_id)) + goto out_nomem; // Create file `/proc/gpu#/device_info`, world readable - device_info_entry = proc_create_data( - "device_info", 0444, dir, compat_ops(&device_info_file_ops), - (void*)device_id); + if (!proc_create_data( + "device_info", 0444, dir, compat_ops(&device_info_file_ops), + (void*)device_id)) + goto out_nomem; // Create file `/proc/gpu#/num_gpcs`, world readable - num_gpcs_entry = proc_create_data( - "num_gpcs", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), - (void*)NV_PTOP_SCAL_NUM_GPCS); + if (!proc_create_data( + "num_gpcs", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), + (void*)NV_PTOP_SCAL_NUM_GPCS)) + goto out_nomem; // Create file `/proc/gpu#/num_tpc_per_gpc`, world readable - num_gpcs_entry = proc_create_data( - "num_tpc_per_gpc", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), - (void*)NV_PTOP_SCAL_NUM_TPC_PER_GPC); - // Create file `/proc/gpu#/num_ces`, world readable - num_gpcs_entry = proc_create_data( - "num_ces", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), - (void*)NV_PTOP_SCAL_NUM_CES); + if (!proc_create_data( + "num_tpc_per_gpc", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), + (void*)NV_PTOP_SCAL_NUM_TPC_PER_GPC)) + goto out_nomem; // Create file `/proc/gpu#/num_ces`, world readable - num_gpcs_entry = proc_create_data( - "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), - (void*)NV_FUSE_GPC); + if (!proc_create_data( + "num_ces", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), + (void*)NV_PTOP_SCAL_NUM_CES)) + goto out_nomem; + // Create files `/proc/gpu#/gpc#_tpc_mask`, world readable (Maxwell+) + if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_MAXWELL) + if ((err = create_tpc_mask_files(device_id, dir))) + goto out_err; + // Create file `/proc/gpu#/gpc_mask`, world readable (Maxwell+) + if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_AMPERE) { + if (!proc_create_data( + "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), + (void*)NV_FUSE_GPC_GA100)) + goto out_nomem; + } else if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_MAXWELL) { + if (!proc_create_data( + "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), + (void*)NV_FUSE_GPC_GM107)) + goto out_nomem; + } // Create files exposing LCE and PCE configuration (Pascal+) if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL) { // Create file `/proc/gpu#/copy_topology`, world readable @@ -277,16 +314,13 @@ int __init nvdebug_init(void) { (void*)NV_CE_PCE_MAP)) goto out_nomem; } - // ProcFS entry creation only fails if out of memory - if (rl_create_err || tpc_masks_create_err || !preempt_entry || - !disable_channel_entry || !enable_channel_entry || - !switch_to_tsg_entry || !device_info_entry || !num_gpcs_entry) - goto out_nomem; } // (See Makefile if you want to know the origin of GIT_HASH.) printk(KERN_INFO "[nvdebug] Module version "GIT_HASH" initialized\n"); return 0; out_nomem: + err = -ENOMEM; +out_err: // Make sure to clear all ProcFS directories on error while (res < g_nvdebug_devices) { char device_id_str[7]; @@ -294,7 +328,7 @@ out_nomem: remove_proc_subtree(device_id_str, NULL); res++; } - return -ENOMEM; + return err; } static void __exit nvdebug_exit(void) { -- cgit v1.2.2