#include "nvdebug.h" #include // For seq_* functions and types #include // For copy_to_user() // Generic register printing function, used for PTOP_*_NUM registers (+more) // @param f File being read from. `data` field is register offset to read. // @param buf User buffer for result // @param size Length of user buffer // @param off Requested offset. Updated by number of characters written. // @return -errno on error, otherwise number of bytes written to *buf // Note: Parent `data` field MUST be the GPU index static ssize_t nvdebug_reg32_read(struct file *f, char __user *buf, size_t size, loff_t *off) { char out[16]; int chars_written; struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; if (size < 16 || *off != 0) return 0; // 32 bit register will always take less than 16 characters to print chars_written = scnprintf(out, 16, "%#0x\n", nvdebug_readl(g, (uintptr_t)PDE_DATA(file_inode(f)))); if (copy_to_user(buf, out, chars_written)) printk(KERN_WARNING "Unable to copy all data for %s\n", file_dentry(f)->d_name.name); *off += chars_written; return chars_written; } static ssize_t nvdebug_reg_range_read(struct file *f, char __user *buf, size_t size, loff_t *off) { char out[12]; int chars_written; uint32_t read, mask; struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; // See comment in nvdebug_entry.c to understand `union reg_range` union reg_range range; range.raw = (uintptr_t)PDE_DATA(file_inode(f)); // "0x" + up to 32-bit register as hex + "\n\0" is at most 12 characters if (size < 12 || *off != 0) return 0; // Print bits `start_bit` to `stop_bit` from 32 bits at address `offset` if ((read = nvdebug_readl(g, range.offset)) == -1) return -EOPNOTSUPP; // Setup `mask` used to throw out unused upper bits mask = -1u >> (32 - range.stop_bit + range.start_bit); // Throw out unused lower bits via a shift, apply the mask, and print chars_written = scnprintf(out, 12, "%#0x\n", (read >> range.start_bit) & mask); if (copy_to_user(buf, out, chars_written)) printk(KERN_WARNING "Unable to copy all data for %s\n", file_dentry(f)->d_name.name); *off += chars_written; return chars_written; } struct file_operations nvdebug_read_reg32_file_ops = { .read = nvdebug_reg32_read, .llseek = default_llseek, }; // Generic mechanism used for printing a subset of bits from a register // Please store a `union reg_range` rather than a `uintptr_t` in the PDE_DATA struct file_operations nvdebug_read_reg_range_file_ops = { .read = nvdebug_reg_range_read, .llseek = default_llseek, }; typedef struct { int idx; // Current index in the device_info table int length; // Length of device_info table (including unpopulated entries) int type_of_next_entry; // Only used on Ampere+ GPUs bool has_next_entry; // Only used on Ampere+ GPUs for show() idempotence } device_info_iter; //// ==v== PTOP_DEVICE_INFO ==v== //// // Called to start or resume a sequence. Prior to 4.19, *pos is unreliable. // Initializes iterator `iter` state and returns it. Ends sequence on NULL. static void* device_info_file_seq_start(struct seq_file *s, loff_t *pos) { static device_info_iter iter; // If freshly starting a sequence, reset the iterator if (*pos == 0) { struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; iter.idx = 0; iter.type_of_next_entry = 0; iter.has_next_entry = 0; // On Ampere+, the device_info table length can vary if (g->chip_id >= NV_CHIP_ID_AMPERE) iter.length = NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g); else iter.length = NV_PTOP_DEVICE_INFO__SIZE_1_GK104; } // Number of possible info entries is fixed, and list is sparse, so stop // iterating only when all entries have been checked, rather than on the first // empty entry. if (iter.idx >= iter.length) return NULL; return &iter; } // Steps to next record. Returns `&iter` (address should not change) // Calls show() on non-NULL return static void* device_info_file_seq_next(struct seq_file *s, void *iter_raw, loff_t *pos) { device_info_iter *iter = (device_info_iter*)iter_raw; (*pos)++; // Required by seq interface // Number of possible info entries is fixed, and list is sparse, so stop // iterating only when all entries have been checked, rather than on the first // empty entry. if (++iter->idx >= iter->length) return NULL; // The info_type field is not available in the Ampere device_info data, so // it must be inferred. NOP for older devices (cheaper than another branch). // This has to be here (rather than in the show function) to support the // idempotence requirements of show() in the seq_file interface. iter->type_of_next_entry = iter->has_next_entry ? iter->type_of_next_entry + 1 : 0; return iter; } // Print info at iter->idx for Kepler--Turing GPUs. Returns non-zero on error. // Implementation of this function must be idempotent static int device_info_file_seq_show_gk104(struct seq_file *s, void *iter_raw) { device_info_iter *iter = (device_info_iter*)iter_raw; ptop_device_info_gk104_t curr_info; struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GK104(iter->idx)); // Check for read errors if (curr_info.raw == -1) return -EIO; // Parse and print the data switch(curr_info.info_type) { case INFO_TYPE_DATA: // As of early 2022, only the ENUM2 format of this entry exists if (curr_info.is_not_enum2) break; seq_printf(s, "| BAR0 Base %#.8x\n" "| instance %d\n", curr_info.pri_base << 12, curr_info.inst_id); if (curr_info.fault_id_is_valid) seq_printf(s, "| Fault ID: %3d\n", curr_info.fault_id); break; case INFO_TYPE_ENUM: if (curr_info.engine_is_valid) seq_printf(s, "| Host's Engine ID: %2d\n", curr_info.engine_enum); if (curr_info.runlist_is_valid) seq_printf(s, "| Runlist ID: %2d\n", curr_info.runlist_enum); if (curr_info.intr_is_valid) seq_printf(s, "| Interrupt ID: %2d\n", curr_info.intr_enum); if (curr_info.reset_is_valid) seq_printf(s, "| Reset ID: %2d\n", curr_info.reset_enum); break; case INFO_TYPE_ENGINE_TYPE: seq_printf(s, "| Engine Type: %2d (", curr_info.engine_type); if (curr_info.engine_type < ENGINE_TYPES_LEN) seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]); else seq_printf(s, "Unknown Historical)\n"); break; case INFO_TYPE_NOT_VALID: default: // Device info records are sparse, so skip unset or unknown ones return 0; } // Draw a line between each device entry if (!curr_info.has_next_entry) seq_printf(s, "+---------------------+\n"); return 0; } // Print info at iter->idx for Ampere+ GPUs. Returns non-zero on error. // Implementation of this function must be idempotent static int device_info_file_seq_show_ga100(struct seq_file *s, void *iter_raw) { device_info_iter *iter = (device_info_iter*)iter_raw; ptop_device_info_ga100_t curr_info; struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GA100(iter->idx)); // Check for read errors if (curr_info.raw == -1) return -EIO; // Update tracking data only used by next(); allows preserving idempotence iter->has_next_entry = curr_info.has_next_entry; // Silently skip empty entries if (curr_info.raw == 0) return 0; // In nvdebug, an entry is considered invalid if it does not consist of at // least two rows. So, if this is the first row of an entry, but another row // is not indicated, this entry is invalid and should be skipped. if (iter->type_of_next_entry == 0 && !curr_info.has_next_entry) { printk(KERN_WARNING "[nvdebug] Skipping seemingly-invalid device_info entry (idx: %d, raw: %#0x)\n", iter->idx, curr_info.raw); return 0; } // Parse and print the data // Note: The goal of this interface is to present useful information to // a human user, NOT to provide a stable format for scripts to parse. // Because of this, we favor accurately printing the data in each entry, // rather than providing stable (if imperfectly correct) field names switch(iter->type_of_next_entry) { case 0: seq_printf(s, "| Engine Type: %3d (", curr_info.engine_type); if (curr_info.engine_type < ENGINE_TYPES_LEN) seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]); else seq_printf(s, "Unknown, introduced post-Lovelace)\n"); seq_printf(s, "| instance %d\n", curr_info.inst_id); seq_printf(s, "| Fault ID: %4d\n", curr_info.fault_id); break; case 1: seq_printf(s, "| BAR0 Base %#.8x\n", curr_info.pri_base << 8); seq_printf(s, "| Reset ID: %3d\n", curr_info.reset_id); seq_printf(s, "| Is Engine: %1d\n", curr_info.is_engine); break; case 2: seq_printf(s, "| Runlist Eng. ID: %1d\n", curr_info.rleng_id); // Theoretically, we could extract an ID from the runlist RAM seq_printf(s, "| RL Base: %#.8x\n", curr_info.runlist_pri_base << 10); break; default: printk(KERN_WARNING "[nvdebug] Skipping unexpected continuation of device_info entry (idx: %d, raw: %#0x)\n", iter->idx, curr_info.raw); } // Draw a line between each device entry if (!curr_info.has_next_entry) seq_printf(s, "+---------------------+\n"); return 0; } static void device_info_file_seq_stop(struct seq_file *s, void *idx) { // No cleanup needed } static struct seq_operations device_info_file_seq_ops = { .start = device_info_file_seq_start, .next = device_info_file_seq_next, .stop = device_info_file_seq_stop, }; static int device_info_file_open(struct inode *inode, struct file *f) { if (g_nvdebug_state[file2parentgpuidx(f)].chip_id >= NV_CHIP_ID_AMPERE) device_info_file_seq_ops.show = device_info_file_seq_show_ga100; else device_info_file_seq_ops.show = device_info_file_seq_show_gk104; return seq_open(f, &device_info_file_seq_ops); } struct file_operations device_info_file_ops = { .open = device_info_file_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, };