From 51f808e092846a60ea6c88ea3a1d2e349c92977b Mon Sep 17 00:00:00 2001 From: Joshua Bakita Date: Wed, 20 Sep 2023 13:09:17 -0400 Subject: Bug fixes and cleanup for new device_info logic - Update comments to match new structure - Make show() function idempotent - Skip empty table entries without aborting - Include names for new engine types - Add warning log messages for skipped table entries - Remove non-functional runlist file creation logic for Ampere+ --- device_info_procfs.c | 179 ++++++++++++++++++++++++++++----------------------- nvdebug.h | 52 ++++++++------- nvdebug_entry.c | 39 ++--------- 3 files changed, 134 insertions(+), 136 deletions(-) diff --git a/device_info_procfs.c b/device_info_procfs.c index b2bcd1a..195b3ff 100644 --- a/device_info_procfs.c +++ b/device_info_procfs.c @@ -28,49 +28,64 @@ struct file_operations nvdebug_read_reg32_file_ops = { }; typedef struct { - int total_entries; - int index; - int type_of_next_entry; + int idx; // Current index in the device_info table + int length; // Length of device_info table (including unpopulated entries) + int type_of_next_entry; // Only used on Ampere+ GPUs + bool has_next_entry; // Only used on Ampere+ GPUs for show() idempotence } device_info_iter; //// ==v== PTOP_DEVICE_INFO ==v== //// // Called to start or resume a sequence. Prior to 4.19, *pos is unreliable. -// Initializes iterator `idx` state and returns it. Ends sequence on NULL. +// Initializes iterator `iter` state and returns it. Ends sequence on NULL. static void* device_info_file_seq_start(struct seq_file *s, loff_t *pos) { - static device_info_iter idx; - struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; - int is_ampere = g->chip_id >= NV_CHIP_ID_AMPERE; - // If start of sequence, reset `idx` - if (*pos == 0) { - idx.index = 0; - idx.type_of_next_entry = is_ampere ? 0 : -1; + static device_info_iter iter; + // If freshly starting a sequence, reset the iterator + if (*pos == 0) { + struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; + iter.idx = 0; + iter.type_of_next_entry = 0; + iter.has_next_entry = 0; + // On Ampere+, the device_info table length can vary + if (g->chip_id > NV_CHIP_ID_AMPERE) + iter.length = NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g); + else + iter.length = NV_PTOP_DEVICE_INFO__SIZE_1_GK104; } - idx.total_entries = is_ampere ? NV_PTOP_DEVICE_INFO__SIZE_1_AMPERE(g) : NV_PTOP_DEVICE_INFO__SIZE_1_PREVIOUS; - // Number of possible info entries is fixed, and list is sparse - if (idx.index >= idx.total_entries) - return NULL; - return &idx; + // Number of possible info entries is fixed, and list is sparse, so stop + // iterating only when all entries have been checked, rather than on the first + // empty entry. + if (iter.idx >= iter.length) + return NULL; + return &iter; } -// Steps to next record. Returns new value of `idx`. +// Steps to next record. Returns `&iter` (address should not change) // Calls show() on non-NULL return -static void* device_info_file_seq_next(struct seq_file *s, void *idx, +static void* device_info_file_seq_next(struct seq_file *s, void *iter_raw, loff_t *pos) { - device_info_iter *idx_iter = (device_info_iter*)idx; + device_info_iter *iter = (device_info_iter*)iter_raw; (*pos)++; // Required by seq interface - // Number of possible info entries is fixed, and list is sparse - if (idx_iter->index++ >= idx_iter->total_entries) + // Number of possible info entries is fixed, and list is sparse, so stop + // iterating only when all entries have been checked, rather than on the first + // empty entry. + if (++iter->idx >= iter->length) return NULL; - return idx; + // The info_type field is not available in the Ampere device_info data, so + // it must be inferred. NOP for older devices (cheaper than another branch). + // This has to be here (rather than in the show function) to support the + // idempotence requirements of show() in the seq_file interface. + iter->type_of_next_entry = iter->has_next_entry ? iter->type_of_next_entry + 1 : 0; + return iter; } -// Print info at index *idx. Returns non-zero on error. -static int device_info_file_seq_show_previous(struct seq_file *s, void *idx) { - device_info_iter *idx_iter = (device_info_iter*)idx; - ptop_device_info_previous_t curr_info; +// Print info at iter->idx for Kepler--Turing GPUs. Returns non-zero on error. +// Implementation of this function must be idempotent +static int device_info_file_seq_show_gk104(struct seq_file *s, void *iter_raw) { + device_info_iter *iter = (device_info_iter*)iter_raw; + ptop_device_info_gk104_t curr_info; struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; - curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_PREVIOUS(idx_iter->index)); + curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GK104(iter->idx)); // Check for read errors if (curr_info.raw == -1) return -EIO; @@ -102,7 +117,7 @@ static int device_info_file_seq_show_previous(struct seq_file *s, void *idx) { if (curr_info.engine_type < ENGINE_TYPES_LEN) seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]); else - seq_printf(s, "Unknown Engine, introduced post-Ampere)\n"); + seq_printf(s, "Unknown Historical)\n"); break; case INFO_TYPE_NOT_VALID: default: @@ -116,69 +131,75 @@ static int device_info_file_seq_show_previous(struct seq_file *s, void *idx) { return 0; } -// Print info at index *idx for Ampere GPUs. Returns non-zero on error. -static int device_info_file_seq_show_ampere(struct seq_file *s, void *idx) { - device_info_iter *idx_iter = (device_info_iter*)idx; - ptop_device_info_ampere_t curr_info; - struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; - curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_AMPERE(idx_iter->index)); - // Check for read errors - if (curr_info.raw == -1) - return -EIO; - // The info_type field is not available in the Ampere device_info data, so it must be inferred - int info_type = curr_info.raw ? idx_iter->type_of_next_entry : -1; - // Parse and print the data - switch(info_type) { - case 0: - seq_printf(s, "| instance %d\n", curr_info.inst_id); - seq_printf(s, "| Fault ID: %3d\n", curr_info.fault_id); +// Print info at iter->idx for Ampere+ GPUs. Returns non-zero on error. +// Implementation of this function must be idempotent +static int device_info_file_seq_show_ga100(struct seq_file *s, void *iter_raw) { + device_info_iter *iter = (device_info_iter*)iter_raw; + ptop_device_info_ga100_t curr_info; + struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; + curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GA100(iter->idx)); + // Check for read errors + if (curr_info.raw == -1) + return -EIO; + + // Update tracking data only used by next(); allows preserving idempotence + iter->has_next_entry = curr_info.has_next_entry; + // Silently skip empty entries + if (curr_info.raw == 0) + return 0; + // In nvdebug, an entry is considered invalid if it does not consist of at + // least two rows. So, if this is the first row of an entry, but another row + // is not indicated, this entry is invalid and should be skipped. + if (iter->type_of_next_entry == 0 && !curr_info.has_next_entry) { + printk(KERN_WARNING "[nvdebug] Skipping seemingly-invalid device_info entry (idx: %d, raw: %#0x)\n", iter->idx, curr_info.raw); + return 0; + } + + // Parse and print the data + switch(iter->type_of_next_entry) { + case 0: + seq_printf(s, "| instance %d\n", curr_info.inst_id); + seq_printf(s, "| Fault ID: %3d\n", curr_info.fault_id); seq_printf(s, "| Engine Type: %2d (", curr_info.engine_type); - if (curr_info.engine_type < ENGINE_TYPES_LEN) - seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]); - else - seq_printf(s, "Unknown Engine, introduced post-Ampere)\n"); - break; - case 1: - seq_printf(s, "| BAR0 Base %#.8x\n", curr_info.pri_base << 12); - seq_printf(s, "| Reset ID: %2d\n", curr_info.reset_enum); - break; - case 2: - seq_printf(s, "| Host's Engine ID: %2d\n", curr_info.engine_enum); - seq_printf(s, "| Runlist ID: %2d\n", curr_info.runlist_enum); - break; - default: - // Device info records are sparse, so skip unset or unknown ones - return 0; - } - if(info_type != -1) idx_iter->type_of_next_entry++; - // Draw a line between each device entry - if (!curr_info.has_next_entry) { - idx_iter->type_of_next_entry = 0; - seq_printf(s, "+---------------------+\n"); + if (curr_info.engine_type < ENGINE_TYPES_LEN) + seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]); + else + seq_printf(s, "Unknown, introduced post-Lovelace)\n"); + break; + case 1: + seq_printf(s, "| BAR0 Base %#.8x\n", curr_info.pri_base << 12); + seq_printf(s, "| Reset ID: %2d\n", curr_info.reset_enum); + break; + case 2: + seq_printf(s, "| Host's Engine ID: %2d\n", curr_info.engine_enum); + seq_printf(s, "| Runlist ID: %2d\n", curr_info.runlist_enum); + break; + default: + printk(KERN_WARNING "[nvdebug] Skipping unexpected continuation of device_info entry (idx: %d, raw: %#0x)\n", iter->idx, curr_info.raw); } - return 0; + + // Draw a line between each device entry + if (!curr_info.has_next_entry) + seq_printf(s, "+---------------------+\n"); + return 0; } static void device_info_file_seq_stop(struct seq_file *s, void *idx) { - // No cleanup needed + // No cleanup needed } -static const struct seq_operations device_info_file_seq_ops = { +static struct seq_operations device_info_file_seq_ops = { .start = device_info_file_seq_start, .next = device_info_file_seq_next, - .stop = device_info_file_seq_stop + .stop = device_info_file_seq_stop, }; static int device_info_file_open(struct inode *inode, struct file *f) { - if(g_nvdebug_state[file2parentgpuidx(f)].chip_id >= NV_CHIP_ID_AMPERE) { - static struct seq_operations file_seq_ops_ampere = device_info_file_seq_ops; - file_seq_ops_ampere.show = device_info_file_seq_show_ampere; - return seq_open(f, &file_seq_ops_ampere); - } else { - static struct seq_operations file_seq_ops_previous = device_info_file_seq_ops; - file_seq_ops_previous.show = device_info_file_seq_show_previous; - return seq_open(f, &file_seq_ops_previous); - } + if (g_nvdebug_state[file2parentgpuidx(f)].chip_id >= NV_CHIP_ID_AMPERE) + device_info_file_seq_ops.show = device_info_file_seq_show_ga100; + else + device_info_file_seq_ops.show = device_info_file_seq_show_gk104; + return seq_open(f, &device_info_file_seq_ops); } struct file_operations device_info_file_ops = { diff --git a/nvdebug.h b/nvdebug.h index be718dd..39e07b4 100644 --- a/nvdebug.h +++ b/nvdebug.h @@ -457,7 +457,7 @@ enum ENGINE_TYPES { ENGINE_COPY2 = 3, // [raw/physical] COPY #2 ENGINE_MSPDEC = 8, // Picture DECoder - ENGINE_MSPPP = 9, // [Video] Post Processing + ENGINE_MSPPP = 9, // [Video] Picture Post Processor ENGINE_MSVLD = 10, // [Video] Variable Length Decoder ENGINE_MSENC = 11, // [Video] ENCoding ENGINE_VIC = 12, // Video Image Compositor @@ -468,10 +468,12 @@ enum ENGINE_TYPES { ENGINE_IOCTRL = 18, // I/O ConTRoLler [of NVLINK at least] ENGINE_LCE = 19, // Logical Copy Engine - ENGINE_GSP = 20, // Gpu System Processor - ENGINE_NVJPG = 21, // NVidia JPeG [Decoder] (Ampere+) + ENGINE_GSP = 20, // Gpu System Processor (Volta+) + ENGINE_NVJPG = 21, // NVidia JPeG [Decoder] (Turing+) + ENGINE_OFA = 22, // Optical Flow Accelerator (Turing+) + ENGINE_FLA = 23, // [NVLink] Fabric Logical Addressing [?] }; -#define ENGINE_TYPES_LEN 22 +#define ENGINE_TYPES_LEN 24 static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { "Graphics/Compute", "COPY0", @@ -495,9 +497,11 @@ static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { "LCE: Logical Copy Engine", "GSP: GPU System Processor", "NVJPG: NVIDIA JPEG Decoder", + "OFA: Optical Flow Accelerator", + "FLA: Fabric Logical Addressing", }; -/* GPU engine information and control register offsets +/* GPU engine information and control register offsets (GPU TOPology) Each engine is described by one or more entries (terminated by an entry with the `has_next_entry` flag unset) in the fixed-size PTOP_DEVICE_INFO table. A typical device, such as the graphics/compute engine and any copy engines, are @@ -559,34 +563,34 @@ static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { See dev_top.ref.txt of NVIDIA's open-gpu-doc for more info. */ -#define NV_PTOP_DEVICE_INFO_AMPERE(i) (0x00022800+(i)*4) -#define NV_PTOP_DEVICE_INFO_PREVIOUS(i) (0x00022700+(i)*4) -#define NV_PTOP_DEVICE_INFO__SIZE_1_AMPERE(g) (nvdebug_readl(g, 0x0224fc) >> 20) -#define NV_PTOP_DEVICE_INFO__SIZE_1_PREVIOUS 64 +#define NV_PTOP_DEVICE_INFO_GA100(i) (0x00022800+(i)*4) +#define NV_PTOP_DEVICE_INFO_GK104(i) (0x00022700+(i)*4) +#define NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g) (nvdebug_readl(g, 0x0224fc) >> 20) +#define NV_PTOP_DEVICE_INFO__SIZE_1_GK104 64 #define NV_PTOP_DEVICE_INFO_TYPE_COUNT 3 typedef union { struct { - uint32_t fault_id:7; - uint32_t padding0:9; - uint32_t inst_id:4; - uint32_t padding1:4; - enum ENGINE_TYPES engine_type:7; + uint32_t fault_id:11; + uint32_t padding0:5; + uint32_t inst_id:8; + enum ENGINE_TYPES engine_type:7; // "type_enum" bool has_next_entry:1; } __attribute__((packed)); struct { - uint32_t reset_enum:5; - uint32_t padding2:7; - uint32_t pri_base:12; - uint32_t padding3:8; + uint32_t reset_enum:8; // "reset_id" + uint32_t pri_base:18; // "device_pri_base" + uint32_t padding1:4; + uint32_t is_engine:1; + uint32_t padding2:1; } __attribute__((packed)); struct { - uint32_t engine_enum:2; - uint32_t padding4:8; - uint32_t runlist_enum:14; - uint32_t padding5:8; + uint32_t engine_enum:2; // "rleng_id" + uint32_t padding3:8; + uint32_t runlist_enum:16; // "runlist_pri_base" + uint32_t padding4:6; } __attribute__((packed)); uint32_t raw; -} ptop_device_info_ampere_t; +} ptop_device_info_ga100_t; typedef union { // DATA type fields @@ -625,7 +629,7 @@ typedef union { uint32_t padding9:1; } __attribute__((packed)); uint32_t raw; -} ptop_device_info_previous_t; +} ptop_device_info_gk104_t; #define NV_PTOP_SCAL_NUM_GPCS 0x00022430 #define NV_PTOP_SCAL_NUM_TPC_PER_GPC 0x00022434 diff --git a/nvdebug_entry.c b/nvdebug_entry.c index cae5aea..0754f12 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c @@ -138,8 +138,8 @@ int probe_and_cache_device(void) { } // Create files `/proc/gpu#/runlist#`, world readable -int create_runlist_files_previous(int device_id, struct proc_dir_entry *dir) { - ptop_device_info_previous_t info; +int create_runlist_files(int device_id, struct proc_dir_entry *dir) { + ptop_device_info_gk104_t info; struct proc_dir_entry *rl_entry; int i, rl_id; char runlist_name[12]; @@ -147,8 +147,8 @@ int create_runlist_files_previous(int device_id, struct proc_dir_entry *dir) { // Figure out how many runlists there are by checking the device info // registers. Runlists are always numbered sequentially, so we just have // to find the highest-valued one and add 1 to get the number of runlists. - for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1_PREVIOUS; i++) { - info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO_PREVIOUS(i)); + for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1_GK104; i++) { + info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO_GK104(i)); if (info.info_type != INFO_TYPE_ENUM || !info.runlist_is_valid) continue; if (info.runlist_enum > max_rl_id) @@ -167,34 +167,6 @@ int create_runlist_files_previous(int device_id, struct proc_dir_entry *dir) { return 0; } -// Create files `/proc/gpu#/runlist#`, world readable -int create_runlist_files_ampere(int device_id, struct proc_dir_entry *dir) { - ptop_device_info_ampere_t info; - struct proc_dir_entry *rl_entry; - int i, rl_id; - char runlist_name[12]; - int max_rl_id = 0; // Always at least one runlist - // Figure out how many runlists there are by checking the device info - // registers. Runlists are always numbered sequentially, so we just have - // to find the highest-valued one and add 1 to get the number of runlists. - for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1_AMPERE(&g_nvdebug_state[device_id]); i++) { - info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO_AMPERE(i)); - if (info.runlist_enum > max_rl_id) - max_rl_id = info.runlist_enum; - } - // Create files to read each runlist. The read handling code looks at the - // PDE_DATA associated with the file to determine what the runlist ID is. - for (rl_id = 0; rl_id <= 0 * max_rl_id; rl_id++) { - snprintf(runlist_name, 12, "runlist%d", rl_id); - rl_entry = proc_create_data( - runlist_name, 0444, dir, compat_ops(&runlist_file_ops), - (void*)(uintptr_t)rl_id); - if (!rl_entry) - return -ENOMEM; - } - return 0; -} - // Create files /proc/gpu# // TODO: Don't run this on unsupported GPUs int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) { @@ -240,7 +212,8 @@ int __init nvdebug_init(void) { if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id))) goto out_nomem; // Create files `/proc/gpu#/runlist#`, world readable - rl_create_err = (g_nvdebug_state[device_id].chip_id >= NV_CHIP_ID_AMPERE) ? create_runlist_files_ampere(device_id, dir) : create_runlist_files_previous(device_id, dir); + if (g_nvdebug_state[device_id].chip_id < NV_CHIP_ID_AMPERE) + create_runlist_files(device_id, dir); // Create files `/proc/gpu#/gpc#_tpc_mask`, world readable tpc_masks_create_err = create_tpc_mask_files(device_id, dir); // Create file `/proc/gpu#/preempt_tsg`, world writable -- cgit v1.2.2