diff options
| author | Joshua Bakita <jbakita@cs.unc.edu> | 2024-04-09 13:07:19 -0400 |
|---|---|---|
| committer | Joshua Bakita <jbakita@cs.unc.edu> | 2024-04-09 13:07:19 -0400 |
| commit | 4768fe31f114c5ad788012db5518ce8e37f79c7a (patch) | |
| tree | 03fe90108bf9341b8b9d299df3ba8a6245c509d0 | |
| parent | 31964208e4dc0243b6b31b9967c77a791aeb995c (diff) | |
Correctly handle startup errors and fix gpc*_mask APIs
- Do not create gpc*_mask files on pre-Maxwell GPUs (tested
unavailable on the K5000s)
- Use correct register offsets for gpc*_mask files on Ampere+ GPUs
- Document GPC and TPC count and fuse registers.
- Correctly handle errors for creation of all ProcFS files
- Remove unecessary error-handling temp variables in nvdebug_entry
- Misc naming, comment, and layout cleanup
| -rw-r--r-- | nvdebug.h | 46 | ||||
| -rw-r--r-- | nvdebug_entry.c | 150 |
2 files changed, 125 insertions, 71 deletions
| @@ -436,7 +436,9 @@ typedef union { | |||
| 436 | #define NV_MC_BOOT_0 0x00000000 | 436 | #define NV_MC_BOOT_0 0x00000000 |
| 437 | #define NV_CHIP_ID_GP106 0x136 // Discrete GeForce GTX 1060 | 437 | #define NV_CHIP_ID_GP106 0x136 // Discrete GeForce GTX 1060 |
| 438 | #define NV_CHIP_ID_GV11B 0x15B // Jetson Xavier embedded GPU | 438 | #define NV_CHIP_ID_GV11B 0x15B // Jetson Xavier embedded GPU |
| 439 | |||
| 439 | #define NV_CHIP_ID_KEPLER 0x0E0 | 440 | #define NV_CHIP_ID_KEPLER 0x0E0 |
| 441 | #define NV_CHIP_ID_MAXWELL 0x120 | ||
| 440 | #define NV_CHIP_ID_PASCAL 0x130 | 442 | #define NV_CHIP_ID_PASCAL 0x130 |
| 441 | #define NV_CHIP_ID_VOLTA 0x140 | 443 | #define NV_CHIP_ID_VOLTA 0x140 |
| 442 | #define NV_CHIP_ID_VOLTA_INTEGRATED 0x150 | 444 | #define NV_CHIP_ID_VOLTA_INTEGRATED 0x150 |
| @@ -700,29 +702,47 @@ typedef union { | |||
| 700 | uint32_t raw; | 702 | uint32_t raw; |
| 701 | } ptop_device_info_gk104_t; | 703 | } ptop_device_info_gk104_t; |
| 702 | 704 | ||
| 703 | /* Graphics Processing Cluster (GPC) information | 705 | /* Graphics Processing Cluster (GPC) on-chip information |
| 704 | The GPU's Compute/Graphics engine is subdivided into Graphics Processing | 706 | The GPU's Compute/Graphics engine is subdivided into Graphics Processing |
| 705 | Clusters (also known as GPU Processing Clusters, starting with Ampere). | 707 | Clusters (also known as GPU Processing Clusters, starting with Ampere). |
| 706 | 708 | ||
| 707 | Each GPC is subdivided into Texture Processing Clusters (TPCs) which contain | 709 | Each GPC is subdivided into Texture Processing Clusters (TPCs) which contain |
| 708 | Streaming Multiprocessors (SMs). | 710 | Streaming Multiprocessors (SMs). |
| 709 | 711 | ||
| 712 | The number of these units etched onto the chip may vary from the number | ||
| 713 | enabled and software-visible. These registers expose the number of on-chip | ||
| 714 | GPCs, the number of on-chip TPCs inside a GPC. | ||
| 710 | 715 | ||
| 716 | Support: Fermi through (at least) Blackwell | ||
| 711 | */ | 717 | */ |
| 712 | // Support: Fermi through Blackwell | ||
| 713 | // Get the number of GPCs **on die** | ||
| 714 | #define NV_PTOP_SCAL_NUM_GPCS 0x00022430 | 718 | #define NV_PTOP_SCAL_NUM_GPCS 0x00022430 |
| 715 | // Get the number of TPCs per GPC **on die** | ||
| 716 | #define NV_PTOP_SCAL_NUM_TPC_PER_GPC 0x00022434 | 719 | #define NV_PTOP_SCAL_NUM_TPC_PER_GPC 0x00022434 |
| 717 | // GPC and TPC masks | 720 | |
| 718 | // Support: Maxwell, Pascal, Volta, Turing | 721 | /* Graphics Processing Cluster (GPC) enablement information |
| 719 | // Bitmask of which GPC **are enabled** of the max on die | 722 | (See above for a description of GPCs and TPCs.) |
| 720 | #define NV_FUSE_GPC 0x00021c1c | 723 | |
| 721 | // Bitmask of which TPCs **are enabled** on each GPC | 724 | The number of on-chip GPCs and TPCs enabled is driven by: |
| 722 | #define NV_FUSE_TPC_FOR_GPC(i) (0x00021c38+(i)*4) | 725 | 1) Manufacturing errors which make some units nonfunctional. |
| 723 | // Support: Ampere, Ada, Hopper, Blackwell | 726 | 2) Commercialization decisions about how many units should be enabled for a |
| 724 | //#define NV_FUSE_GPC 0x00820c1c | 727 | specific GPU model. |
| 725 | //#define NV_FUSE_TPC_FOR_GPC(i) (0x00820c38+(i)*4) | 728 | |
| 729 | Generally, reason (1) drives disablement early in product manufacturing, | ||
| 730 | whereas, as the manufacturing process matures, (2) steps in to ensure | ||
| 731 | consistency between early-manufactured and late-manufactured products. | ||
| 732 | |||
| 733 | On-chip fuses are used to dictate which units are enabled and disabled. These | ||
| 734 | registers expose the fuse configuration for GPCs, and the TPCs in each GPC. | ||
| 735 | |||
| 736 | FUSE_GPC : Bitmask of which GPCs are enabled | ||
| 737 | FUSE_TPC_FOR_GPC(i) : Bitmask of which TPCs are enabled for GPC i | ||
| 738 | |||
| 739 | Support: Maxwell through Blackwell | ||
| 740 | Note the registers were relocated starting with Ampere. | ||
| 741 | */ | ||
| 742 | #define NV_FUSE_GPC_GM107 0x00021c1c | ||
| 743 | #define NV_FUSE_TPC_FOR_GPC_GM107(i) (0x00021c38+(i)*4) | ||
| 744 | #define NV_FUSE_GPC_GA100 0x00820c1c | ||
| 745 | #define NV_FUSE_TPC_FOR_GPC_GA100(i) (0x00820c38+(i)*4) | ||
| 726 | 746 | ||
| 727 | /* Logical Copy Engine (LCE) Information | 747 | /* Logical Copy Engine (LCE) Information |
| 728 | Every GPU has some number of copy engines which can process transfers to, | 748 | Every GPU has some number of copy engines which can process transfers to, |
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index 7593a3a..0cf5344 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
| @@ -12,7 +12,8 @@ | |||
| 12 | #include "nvdebug.h" | 12 | #include "nvdebug.h" |
| 13 | #include "stubs.h" | 13 | #include "stubs.h" |
| 14 | 14 | ||
| 15 | // Enable to intercept and log GPU interrupts | 15 | // Enable to intercept and log GPU interrupts. Historically used to benchmark |
| 16 | // interrupt latency. | ||
| 16 | #define INTERRUPT_DEBUG 0 | 17 | #define INTERRUPT_DEBUG 0 |
| 17 | 18 | ||
| 18 | // MIT is GPL-compatible. We need to be GPL-compatible for symbols like | 19 | // MIT is GPL-compatible. We need to be GPL-compatible for symbols like |
| @@ -31,14 +32,16 @@ extern struct file_operations copy_topology_file_ops; | |||
| 31 | extern struct file_operations nvdebug_read_reg32_file_ops; | 32 | extern struct file_operations nvdebug_read_reg32_file_ops; |
| 32 | extern struct file_operations nvdebug_read_reg_range_file_ops; | 33 | extern struct file_operations nvdebug_read_reg_range_file_ops; |
| 33 | 34 | ||
| 34 | // Bus types are global symbols in the kernel | ||
| 35 | extern struct bus_type platform_bus_type; | ||
| 36 | struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; | 35 | struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; |
| 37 | unsigned int g_nvdebug_devices = 0; | 36 | unsigned int g_nvdebug_devices = 0; |
| 37 | // Bus types are global symbols in the kernel | ||
| 38 | extern struct bus_type platform_bus_type; | ||
| 38 | 39 | ||
| 39 | // Starting in Kernel 5.6, proc_ops is required instead of file_operations | 40 | // Starting in Kernel 5.6, proc_ops is required instead of file_operations. |
| 41 | // As file_operations is larger than proc_ops, we can overwrite the memory | ||
| 42 | // backing the file_operations struct to follow the proc_ops layout, and then | ||
| 43 | // cast on newer kernels. | ||
| 40 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) | 44 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) |
| 41 | // This rewrites the struct to the proc_ops layout on newer kernels | ||
| 42 | const struct proc_ops* compat_ops(const struct file_operations* ops) { | 45 | const struct proc_ops* compat_ops(const struct file_operations* ops) { |
| 43 | struct proc_ops new_ops = {}; | 46 | struct proc_ops new_ops = {}; |
| 44 | new_ops.proc_open = ops->open; | 47 | new_ops.proc_open = ops->open; |
| @@ -64,7 +67,7 @@ irqreturn_t nvdebug_irq_tap(int irq_num, void * dev) { | |||
| 64 | 67 | ||
| 65 | // Find any and all NVIDIA GPUs in the system | 68 | // Find any and all NVIDIA GPUs in the system |
| 66 | // Note: This function fails if any of them are in a bad state | 69 | // Note: This function fails if any of them are in a bad state |
| 67 | int probe_and_cache_device(void) { | 70 | int probe_and_cache_devices(void) { |
| 68 | // platform bus (SoC) iterators | 71 | // platform bus (SoC) iterators |
| 69 | struct device *dev = NULL; | 72 | struct device *dev = NULL; |
| 70 | struct device *temp_dev; | 73 | struct device *temp_dev; |
| @@ -143,13 +146,14 @@ int probe_and_cache_device(void) { | |||
| 143 | #endif // INTERRUPT_DEBUG | 146 | #endif // INTERRUPT_DEBUG |
| 144 | i++; | 147 | i++; |
| 145 | } | 148 | } |
| 146 | // Return the number of devices we found | 149 | // Return the number of devices found |
| 147 | if (i > 0) | 150 | if (i > 0) |
| 148 | return i; | 151 | return i; |
| 149 | return -ENODEV; | 152 | return -ENODEV; |
| 150 | } | 153 | } |
| 151 | 154 | ||
| 152 | // Create files `/proc/gpu#/runlist#`, world readable | 155 | // Create files `/proc/gpu#/runlist#`, world readable |
| 156 | // Support: Fermi, Maxwell, Pascal, Volta, Turing | ||
| 153 | int create_runlist_files(int device_id, struct proc_dir_entry *dir) { | 157 | int create_runlist_files(int device_id, struct proc_dir_entry *dir) { |
| 154 | ptop_device_info_gk104_t info; | 158 | ptop_device_info_gk104_t info; |
| 155 | struct proc_dir_entry *rl_entry; | 159 | struct proc_dir_entry *rl_entry; |
| @@ -179,16 +183,24 @@ int create_runlist_files(int device_id, struct proc_dir_entry *dir) { | |||
| 179 | return 0; | 183 | return 0; |
| 180 | } | 184 | } |
| 181 | 185 | ||
| 182 | // Create files /proc/gpu# | 186 | // Create files `/proc/gpu#/gpc#_tpc_mask`, world readable |
| 183 | // TODO: Don't run this on unsupported GPUs | 187 | // Support: Maxwell+ |
| 184 | int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) { | 188 | int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) { |
| 189 | struct nvdebug_state* g = &g_nvdebug_state[device_id]; | ||
| 185 | char file_name[20]; | 190 | char file_name[20]; |
| 186 | int i; | 191 | int i; |
| 187 | struct proc_dir_entry *gpc_tpc_mask_entry; | 192 | struct proc_dir_entry *gpc_tpc_mask_entry; |
| 188 | // Get a bitmask of which GPCs are disabled | ||
| 189 | uint32_t gpcs_mask = nvdebug_readl(&g_nvdebug_state[device_id], NV_FUSE_GPC); | ||
| 190 | // Get maximum number of enabled GPCs for this chip | 193 | // Get maximum number of enabled GPCs for this chip |
| 191 | uint32_t max_gpcs = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_SCAL_NUM_GPCS); | 194 | uint32_t max_gpcs = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_SCAL_NUM_GPCS); |
| 195 | // Get a bitmask of which GPCs are disabled | ||
| 196 | uint32_t gpcs_mask; | ||
| 197 | if (g->chip_id < NV_CHIP_ID_AMPERE) | ||
| 198 | gpcs_mask = nvdebug_readl(g, NV_FUSE_GPC_GM107); | ||
| 199 | else | ||
| 200 | gpcs_mask = nvdebug_readl(g, NV_FUSE_GPC_GA100); | ||
| 201 | // Verify the reads succeeded | ||
| 202 | if (max_gpcs == -1 || gpcs_mask == -1) | ||
| 203 | return -EIO; | ||
| 192 | // For each enabled GPC, expose a mask of disabled TPCs | 204 | // For each enabled GPC, expose a mask of disabled TPCs |
| 193 | for (i = 0; i < max_gpcs; i++) { | 205 | for (i = 0; i < max_gpcs; i++) { |
| 194 | // Do nothing if GPC is disabled | 206 | // Do nothing if GPC is disabled |
| @@ -196,9 +208,14 @@ int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) { | |||
| 196 | continue; | 208 | continue; |
| 197 | // If GPC is enabled, create an entry to read disabled TPCs mask | 209 | // If GPC is enabled, create an entry to read disabled TPCs mask |
| 198 | snprintf(file_name, 20, "gpc%d_tpc_mask", i); | 210 | snprintf(file_name, 20, "gpc%d_tpc_mask", i); |
| 199 | gpc_tpc_mask_entry = proc_create_data( | 211 | if (g->chip_id < NV_CHIP_ID_AMPERE) |
| 200 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | 212 | gpc_tpc_mask_entry = proc_create_data( |
| 201 | (void*)(uintptr_t)NV_FUSE_TPC_FOR_GPC(i)); | 213 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), |
| 214 | (void*)(uintptr_t)NV_FUSE_TPC_FOR_GPC_GM107(i)); | ||
| 215 | else | ||
| 216 | gpc_tpc_mask_entry = proc_create_data( | ||
| 217 | file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
| 218 | (void*)(uintptr_t)NV_FUSE_TPC_FOR_GPC_GA100(i)); | ||
| 202 | if (!gpc_tpc_mask_entry) | 219 | if (!gpc_tpc_mask_entry) |
| 203 | return -ENOMEM; | 220 | return -ENOMEM; |
| 204 | } | 221 | } |
| @@ -206,64 +223,84 @@ int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) { | |||
| 206 | } | 223 | } |
| 207 | 224 | ||
| 208 | int __init nvdebug_init(void) { | 225 | int __init nvdebug_init(void) { |
| 209 | struct proc_dir_entry *dir, *preempt_entry, *disable_channel_entry, | 226 | struct proc_dir_entry *dir; |
| 210 | *enable_channel_entry, *switch_to_tsg_entry, *device_info_entry, | 227 | int err, res; |
| 211 | *num_gpcs_entry; | ||
| 212 | int rl_create_err, tpc_masks_create_err; | ||
| 213 | // Check that an NVIDIA GPU is present and initialize g_nvdebug_state | 228 | // Check that an NVIDIA GPU is present and initialize g_nvdebug_state |
| 214 | int res = probe_and_cache_device(); | 229 | if ((res = probe_and_cache_devices()) < 0) |
| 215 | if (res < 0) | ||
| 216 | return res; | 230 | return res; |
| 217 | g_nvdebug_devices = res; | 231 | g_nvdebug_devices = res; |
| 218 | // Create seperate ProcFS directories for each gpu | 232 | // Create seperate ProcFS directories for each gpu |
| 219 | while (res--) { | 233 | while (res--) { |
| 220 | char device_id_str[7]; | 234 | char device_id_str[7]; |
| 221 | uintptr_t device_id = res; // This is uintptr as we abuse the *data field on proc_dir_entry to store the GPU id | 235 | // Create a wider copy of the GPU ID to allow us to abuse the *data |
| 236 | // field of proc_dir_entry to store the GPU ID. | ||
| 237 | uintptr_t device_id = res; | ||
| 222 | // Create directory /proc/gpu# where # is the GPU number | 238 | // Create directory /proc/gpu# where # is the GPU number |
| 239 | // As ProcFS entry creation only fails if out of memory, we auto-skip | ||
| 240 | // to handling that on any error in creating ProcFS files. | ||
| 223 | snprintf(device_id_str, 7, "gpu%ld", device_id); | 241 | snprintf(device_id_str, 7, "gpu%ld", device_id); |
| 224 | if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id))) | 242 | if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id))) |
| 225 | goto out_nomem; | 243 | goto out_nomem; |
| 226 | // Create files `/proc/gpu#/runlist#`, world readable | 244 | // Create files `/proc/gpu#/runlist#`, world readable |
| 227 | if (g_nvdebug_state[device_id].chip_id < NV_CHIP_ID_AMPERE) | 245 | if (g_nvdebug_state[device_id].chip_id < NV_CHIP_ID_AMPERE) |
| 228 | create_runlist_files(device_id, dir); | 246 | if ((err = create_runlist_files(device_id, dir))) |
| 229 | // Create files `/proc/gpu#/gpc#_tpc_mask`, world readable | 247 | goto out_err; |
| 230 | tpc_masks_create_err = create_tpc_mask_files(device_id, dir); | ||
| 231 | // Create file `/proc/gpu#/preempt_tsg`, world writable | 248 | // Create file `/proc/gpu#/preempt_tsg`, world writable |
| 232 | preempt_entry = proc_create_data( | 249 | if (!proc_create_data( |
| 233 | "preempt_tsg", 0222, dir, compat_ops(&preempt_tsg_file_ops), | 250 | "preempt_tsg", 0222, dir, compat_ops(&preempt_tsg_file_ops), |
| 234 | (void*)device_id); | 251 | (void*)device_id)) |
| 252 | goto out_nomem; | ||
| 235 | // Create file `/proc/gpu#/disable_channel`, world writable | 253 | // Create file `/proc/gpu#/disable_channel`, world writable |
| 236 | disable_channel_entry = proc_create_data( | 254 | if (!proc_create_data( |
| 237 | "disable_channel", 0222, dir, compat_ops(&disable_channel_file_ops), | 255 | "disable_channel", 0222, dir, compat_ops(&disable_channel_file_ops), |
| 238 | (void*)device_id); | 256 | (void*)device_id)) |
| 257 | goto out_nomem; | ||
| 239 | // Create file `/proc/gpu#/enable_channel`, world writable | 258 | // Create file `/proc/gpu#/enable_channel`, world writable |
| 240 | enable_channel_entry = proc_create_data( | 259 | if (!proc_create_data( |
| 241 | "enable_channel", 0222, dir, compat_ops(&enable_channel_file_ops), | 260 | "enable_channel", 0222, dir, compat_ops(&enable_channel_file_ops), |
| 242 | (void*)device_id); | 261 | (void*)device_id)) |
| 262 | goto out_nomem; | ||
| 243 | // Create file `/proc/gpu#/switch_to_tsg`, world writable | 263 | // Create file `/proc/gpu#/switch_to_tsg`, world writable |
| 244 | switch_to_tsg_entry = proc_create_data( | 264 | if (!proc_create_data( |
| 245 | "switch_to_tsg", 0222, dir, compat_ops(&switch_to_tsg_file_ops), | 265 | "switch_to_tsg", 0222, dir, compat_ops(&switch_to_tsg_file_ops), |
| 246 | (void*)device_id); | 266 | (void*)device_id)) |
| 267 | goto out_nomem; | ||
| 247 | // Create file `/proc/gpu#/device_info`, world readable | 268 | // Create file `/proc/gpu#/device_info`, world readable |
| 248 | device_info_entry = proc_create_data( | 269 | if (!proc_create_data( |
| 249 | "device_info", 0444, dir, compat_ops(&device_info_file_ops), | 270 | "device_info", 0444, dir, compat_ops(&device_info_file_ops), |
| 250 | (void*)device_id); | 271 | (void*)device_id)) |
| 272 | goto out_nomem; | ||
| 251 | // Create file `/proc/gpu#/num_gpcs`, world readable | 273 | // Create file `/proc/gpu#/num_gpcs`, world readable |
| 252 | num_gpcs_entry = proc_create_data( | 274 | if (!proc_create_data( |
| 253 | "num_gpcs", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | 275 | "num_gpcs", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), |
| 254 | (void*)NV_PTOP_SCAL_NUM_GPCS); | 276 | (void*)NV_PTOP_SCAL_NUM_GPCS)) |
| 277 | goto out_nomem; | ||
| 255 | // Create file `/proc/gpu#/num_tpc_per_gpc`, world readable | 278 | // Create file `/proc/gpu#/num_tpc_per_gpc`, world readable |
| 256 | num_gpcs_entry = proc_create_data( | 279 | if (!proc_create_data( |
| 257 | "num_tpc_per_gpc", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | 280 | "num_tpc_per_gpc", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), |
| 258 | (void*)NV_PTOP_SCAL_NUM_TPC_PER_GPC); | 281 | (void*)NV_PTOP_SCAL_NUM_TPC_PER_GPC)) |
| 259 | // Create file `/proc/gpu#/num_ces`, world readable | 282 | goto out_nomem; |
| 260 | num_gpcs_entry = proc_create_data( | ||
| 261 | "num_ces", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
| 262 | (void*)NV_PTOP_SCAL_NUM_CES); | ||
| 263 | // Create file `/proc/gpu#/num_ces`, world readable | 283 | // Create file `/proc/gpu#/num_ces`, world readable |
| 264 | num_gpcs_entry = proc_create_data( | 284 | if (!proc_create_data( |
| 265 | "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | 285 | "num_ces", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), |
| 266 | (void*)NV_FUSE_GPC); | 286 | (void*)NV_PTOP_SCAL_NUM_CES)) |
| 287 | goto out_nomem; | ||
| 288 | // Create files `/proc/gpu#/gpc#_tpc_mask`, world readable (Maxwell+) | ||
| 289 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_MAXWELL) | ||
| 290 | if ((err = create_tpc_mask_files(device_id, dir))) | ||
| 291 | goto out_err; | ||
| 292 | // Create file `/proc/gpu#/gpc_mask`, world readable (Maxwell+) | ||
| 293 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_AMPERE) { | ||
| 294 | if (!proc_create_data( | ||
| 295 | "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
| 296 | (void*)NV_FUSE_GPC_GA100)) | ||
| 297 | goto out_nomem; | ||
| 298 | } else if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_MAXWELL) { | ||
| 299 | if (!proc_create_data( | ||
| 300 | "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops), | ||
| 301 | (void*)NV_FUSE_GPC_GM107)) | ||
| 302 | goto out_nomem; | ||
| 303 | } | ||
| 267 | // Create files exposing LCE and PCE configuration (Pascal+) | 304 | // Create files exposing LCE and PCE configuration (Pascal+) |
| 268 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL) { | 305 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL) { |
| 269 | // Create file `/proc/gpu#/copy_topology`, world readable | 306 | // Create file `/proc/gpu#/copy_topology`, world readable |
| @@ -277,16 +314,13 @@ int __init nvdebug_init(void) { | |||
| 277 | (void*)NV_CE_PCE_MAP)) | 314 | (void*)NV_CE_PCE_MAP)) |
| 278 | goto out_nomem; | 315 | goto out_nomem; |
| 279 | } | 316 | } |
| 280 | // ProcFS entry creation only fails if out of memory | ||
| 281 | if (rl_create_err || tpc_masks_create_err || !preempt_entry || | ||
| 282 | !disable_channel_entry || !enable_channel_entry || | ||
| 283 | !switch_to_tsg_entry || !device_info_entry || !num_gpcs_entry) | ||
| 284 | goto out_nomem; | ||
| 285 | } | 317 | } |
| 286 | // (See Makefile if you want to know the origin of GIT_HASH.) | 318 | // (See Makefile if you want to know the origin of GIT_HASH.) |
| 287 | printk(KERN_INFO "[nvdebug] Module version "GIT_HASH" initialized\n"); | 319 | printk(KERN_INFO "[nvdebug] Module version "GIT_HASH" initialized\n"); |
| 288 | return 0; | 320 | return 0; |
| 289 | out_nomem: | 321 | out_nomem: |
| 322 | err = -ENOMEM; | ||
| 323 | out_err: | ||
| 290 | // Make sure to clear all ProcFS directories on error | 324 | // Make sure to clear all ProcFS directories on error |
| 291 | while (res < g_nvdebug_devices) { | 325 | while (res < g_nvdebug_devices) { |
| 292 | char device_id_str[7]; | 326 | char device_id_str[7]; |
| @@ -294,7 +328,7 @@ out_nomem: | |||
| 294 | remove_proc_subtree(device_id_str, NULL); | 328 | remove_proc_subtree(device_id_str, NULL); |
| 295 | res++; | 329 | res++; |
| 296 | } | 330 | } |
| 297 | return -ENOMEM; | 331 | return err; |
| 298 | } | 332 | } |
| 299 | 333 | ||
| 300 | static void __exit nvdebug_exit(void) { | 334 | static void __exit nvdebug_exit(void) { |
