Support printing device info on Ampere+ GPUs. By Benjamin Hadad IV

commit c3d6f2c852eb046e9d4f4f1e6527b52c746b2693 Author: Joshua Bakita <bakitajoshua@gmail.com> Date: Sun Oct 29 14:37:51 2023 -0400 Print Ampere+ device_info fields with correct offsets/widths Everything now has been checked against how nvgpu handles it commit b70849d1ce67a58f9f69b37dc62122f789f4cdf7 Author: Joshua Bakita <jbakita@cs.unc.edu> Date: Wed Sep 20 14:27:38 2023 -0400 Rearrange, fix an off-by-one error, and remove an unused define The code in nvdebug.h has been rearranged to enable an easier merge against the jbakita-wip branch. commit 51f808e092846a60ea6c88ea3a1d2e349c92977b Author: Joshua Bakita <jbakita@cs.unc.edu> Date: Wed Sep 20 13:09:17 2023 -0400 Bug fixes and cleanup for new device_info logic - Update comments to match new structure - Make show() function idempotent - Skip empty table entries without aborting - Include names for new engine types - Add warning log messages for skipped table entries - Remove non-functional runlist file creation logic for Ampere+ commit 1d7adc3be1aef5ac9c144bb24008fd8cc5d688a5 Author: Benjamin Hadad IV <bh4@unc.edu> Date: Sat Aug 19 12:47:18 2023 -0400 Debugging changes made to restore functionality following refactoring. - Debugged data display errors. - Debugged crash bugs. - Debugged memory issue. commit 9e6cc03cdf736fbd817ed53fa9a7f506bc91a244 Author: Benjamin Hadad IV <bh4@unc.edu> Date: Wed Aug 16 22:00:20 2023 -0400 A variety of changes have been made as part of the code review. - Functions have been consolidated. - Code was clarified and tidied up overall. - Unnecessary elements were removed. commit 845960fc1b15995fdbd6d61c384567652a150bc4 Author: Benjamin Hadad IV <bh4@unc.edu> Date: Fri Jul 28 11:39:28 2023 -0400 Refactored various systems and debugged minor issues - Added device_info_iter - Merged functions in device_info_procfs.c - Separated device_info data structs by version in nvdebug.h - Fixed issue with device_info runlist ID data commit 8a57aaeba41c43233c323d7e0fc8bf1a81ebc65e Author: Benjamin Hadad IV <bh4@unc.edu> Date: Fri Jul 21 11:32:51 2023 -0400 I have updated the ptop_device_info_t comment in nvdebug.h. commit 33c915f08f5dc63674b158ecc18897494256a6d0 Author: Benjamin Hadad IV <bh4@unc.edu> Date: Wed Jul 19 13:02:52 2023 -0400 Debugged device_info functionality - Fixed device_info crash bugs - Made further edits to display functionality - Refactored code to enhance readability commit bfb4dcf0e78954c0163f3a06a5a088c4d1b437a8 Author: Benjamin Hadad IV <bh4@unc.edu> Date: Thu Jul 13 12:13:17 2023 -0400 This commit is to update the repo for display during a meeting. - Added an Ampere version of the device info data. - Added Ampere versions of auxillary functions. - Modified display functions to accommodate Ampere data. - Made other various small modifications. commit 068e7f4e7208d6c9250ad72208e0b36fd9fdf2f6 Merge: 3725b15 073e897 Author: Benjamin Hadad IV <bh4@unc.edu> Date: Mon Jul 10 12:39:12 2023 -0400 Merge branch 'jbakita-wip' of ssh://rtsrv.cs.unc.edu/public/nvdebug into wip I am merging Mr. Bakita's changes (046d7d2) into this repository. commit 3725b15d5da3e06ef202045d710aa5f15eb72fcc Author: Benjamin Hadad IV <bh4@unc.edu> Date: Mon Jul 3 04:30:54 2023 -0400 I modified nvdebug.h for Ampere.
author: Joshua Bakita <bakitajoshua@gmail.com> 2023-10-29 14:43:40 -0400
committer: Joshua Bakita <bakitajoshua@gmail.com> 2023-10-29 15:43:16 -0400
commit: 8da37f4bbb30027a6efa289bd97d98b2acb5c160 (patch)
tree: 08ffee914a0ea9b37fc8da07bae2a3bc88ab5ed9
parent: 874aecaf06a96f5866f9bf31437a1ebd27c2f408 (diff)
3 files changed, 167 insertions, 35 deletions
diff --git a/device_info_procfs.c b/device_info_procfs.c
index 5fc417f..b139c36 100644
--- a/device_info_procfs.c
+++ b/device_info_procfs.c
@@ -27,38 +27,65 @@ struct file_operations nvdebug_read_reg32_file_ops = {
        .llseek = default_llseek,
 };
+typedef struct {
+        int idx; // Current index in the device_info table
+        int length; // Length of device_info table (including unpopulated entries)
+        int type_of_next_entry; // Only used on Ampere+ GPUs
+        bool has_next_entry; // Only used on Ampere+ GPUs for show() idempotence
+} device_info_iter;
 //// ==v== PTOP_DEVICE_INFO ==v== ////
 // Called to start or resume a sequence. Prior to 4.19, *pos is unreliable.
-// Initializes iterator `idx` state and returns it. Ends sequence on NULL.
+// Initializes iterator `iter` state and returns it. Ends sequence on NULL.
 static void* device_info_file_seq_start(struct seq_file *s, loff_t *pos) {
-        static int idx;
+        static device_info_iter iter;
-        // If start of sequence, reset `idx`
+        // If freshly starting a sequence, reset the iterator
-        if (*pos == 0)
+        if (*pos == 0) {
-                idx = 0;
+                struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)];
-        // Number of possible info entries is fixed, and list is sparse
+                iter.idx = 0;
-        if (idx >= NV_PTOP_DEVICE_INFO__SIZE_1)
+                iter.type_of_next_entry = 0;
+                iter.has_next_entry = 0;
+                // On Ampere+, the device_info table length can vary
+                if (g->chip_id >= NV_CHIP_ID_AMPERE)
+                        iter.length = NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g);
+                else
+                        iter.length = NV_PTOP_DEVICE_INFO__SIZE_1_GK104;
+        }
+        // Number of possible info entries is fixed, and list is sparse, so stop
+        // iterating only when all entries have been checked, rather than on the first
+        // empty entry.
+        if (iter.idx >= iter.length)
                return NULL;
-        return &idx;
+        return &iter;
 }
-// Steps to next record. Returns new value of `idx`.
+// Steps to next record. Returns `&iter` (address should not change)
 // Calls show() on non-NULL return
-static void* device_info_file_seq_next(struct seq_file *s, void *idx,
+static void* device_info_file_seq_next(struct seq_file *s, void *iter_raw,
                                       loff_t *pos) {
+        device_info_iter *iter = (device_info_iter*)iter_raw;
        (*pos)++; // Required by seq interface
-        // Number of possible info entries is fixed, and list is sparse
+        // Number of possible info entries is fixed, and list is sparse, so stop
-        if ((*(int*)idx)++ >= NV_PTOP_DEVICE_INFO__SIZE_1)
+        // iterating only when all entries have been checked, rather than on the first
+        // empty entry.
+        if (++iter->idx >= iter->length)
                return NULL;
-        return idx;
+        // The info_type field is not available in the Ampere device_info data, so
+        // it must be inferred. NOP for older devices (cheaper than another branch).
+        // This has to be here (rather than in the show function) to support the
+        // idempotence requirements of show() in the seq_file interface.
+        iter->type_of_next_entry = iter->has_next_entry ? iter->type_of_next_entry + 1 : 0;
+        return iter;
 }
-// Print info at index *idx. Returns non-zero on error.
+// Print info at iter->idx for Kepler--Turing GPUs. Returns non-zero on error.
-static int device_info_file_seq_show(struct seq_file *s, void *idx) {
+// Implementation of this function must be idempotent
-        ptop_device_info_t curr_info;
+static int device_info_file_seq_show_gk104(struct seq_file *s, void *iter_raw) {
+        device_info_iter *iter = (device_info_iter*)iter_raw;
+        ptop_device_info_gk104_t curr_info;
        struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)];
+        curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GK104(iter->idx));
-        curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO(*(int*)idx));
        // Check for read errors
        if (curr_info.raw == -1)
                return -EIO;
@@ -90,7 +117,7 @@ static int device_info_file_seq_show(struct seq_file *s, void *idx) {
                if (curr_info.engine_type < ENGINE_TYPES_LEN)
                        seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]);
                else
-                        seq_printf(s, "Unknown Engine, introduced post-Ampere)\n");
+                        seq_printf(s, "Unknown Historical)\n");
                break;
        case INFO_TYPE_NOT_VALID:
        default:
@@ -104,18 +131,81 @@ static int device_info_file_seq_show(struct seq_file *s, void *idx) {
        return 0;
 }
+// Print info at iter->idx for Ampere+ GPUs. Returns non-zero on error.
+// Implementation of this function must be idempotent
+static int device_info_file_seq_show_ga100(struct seq_file *s, void *iter_raw) {
+        device_info_iter *iter = (device_info_iter*)iter_raw;
+        ptop_device_info_ga100_t curr_info;
+        struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)];
+        curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GA100(iter->idx));
+        // Check for read errors
+        if (curr_info.raw == -1)
+                return -EIO;
+        // Update tracking data only used by next(); allows preserving idempotence
+        iter->has_next_entry = curr_info.has_next_entry;
+        // Silently skip empty entries
+        if (curr_info.raw == 0)
+                return 0;
+        // In nvdebug, an entry is considered invalid if it does not consist of at
+        // least two rows. So, if this is the first row of an entry, but another row
+        // is not indicated, this entry is invalid and should be skipped.
+        if (iter->type_of_next_entry == 0 && !curr_info.has_next_entry) {
+                printk(KERN_WARNING "[nvdebug] Skipping seemingly-invalid device_info entry (idx: %d, raw: %#0x)\n", iter->idx, curr_info.raw);
+                return 0;
+        }
+        // Parse and print the data
+        // Note: The goal of this interface is to present useful information to
+        // a human user, NOT to provide a stable format for scripts to parse.
+        // Because of this, we favor accurately printing the data in each entry,
+        // rather than providing stable (if imperfectly correct) field names
+        switch(iter->type_of_next_entry) {
+        case 0:
+                seq_printf(s, "| Engine Type:     %3d (", curr_info.engine_type);
+                if (curr_info.engine_type < ENGINE_TYPES_LEN)
+                        seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]);
+                else
+                        seq_printf(s, "Unknown, introduced post-Lovelace)\n");
+                seq_printf(s, "|           instance %d\n", curr_info.inst_id);
+                seq_printf(s, "| Fault ID:       %4d\n", curr_info.fault_id);
+                break;
+        case 1:
+                seq_printf(s, "| BAR0 Base %#.8x\n", curr_info.pri_base << 8);
+                seq_printf(s, "| Reset ID:        %3d\n", curr_info.reset_id);
+                seq_printf(s, "| Is Engine:         %1d\n", curr_info.is_engine);
+                break;
+        case 2:
+                seq_printf(s, "| Runlist Eng. ID:   %1d\n", curr_info.rleng_id);
+                // Theoretically, we could extract an ID from the runlist RAM
+                seq_printf(s, "| RL Base:  %#.8x\n", curr_info.runlist_pri_base << 10);
+                break;
+        default:
+                printk(KERN_WARNING "[nvdebug] Skipping unexpected continuation of device_info entry (idx: %d, raw: %#0x)\n", iter->idx, curr_info.raw);
+        }
+        // Draw a line between each device entry
+        if (!curr_info.has_next_entry)
+                seq_printf(s, "+---------------------+\n");
+        return 0;
+}
 static void device_info_file_seq_stop(struct seq_file *s, void *idx) {
-        // No cleanup needed
+        // No cleanup needed
 }
-static const struct seq_operations device_info_file_seq_ops = {
+static struct seq_operations device_info_file_seq_ops = {
        .start = device_info_file_seq_start,
        .next = device_info_file_seq_next,
        .stop = device_info_file_seq_stop,
-        .show = device_info_file_seq_show,
 };
 static int device_info_file_open(struct inode *inode, struct file *f) {
+        if (g_nvdebug_state[file2parentgpuidx(f)].chip_id >= NV_CHIP_ID_AMPERE)
+                device_info_file_seq_ops.show = device_info_file_seq_show_ga100;
+        else
+                device_info_file_seq_ops.show = device_info_file_seq_show_gk104;
        return seq_open(f, &device_info_file_seq_ops);
 }
diff --git a/nvdebug.h b/nvdebug.h
index 3860c2e..b79ede1 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -390,6 +390,7 @@ typedef union {
 #define NV_CHIP_ID_GV11B 0x15B // Jetson Xavier embedded GPU
 #define NV_CHIP_ID_KEPLER 0x0E0
 #define NV_CHIP_ID_VOLTA 0x140
+#define NV_CHIP_ID_AMPERE 0x170
 inline static const char* ARCH2NAME(uint32_t arch) {
        switch (arch) {
@@ -461,8 +462,7 @@ typedef union {
        } __attribute__((packed));
 } mc_boot_0_t;
+/* GPU engine information and control register offsets (GPU TOPology)
-/* GPU engine information and control register offsets
  Each engine is described by one or more entries (terminated by an entry with
  the `has_next_entry` flag unset) in the fixed-size PTOP_DEVICE_INFO table. A
  typical device, such as the graphics/compute engine and any copy engines, are
@@ -473,6 +473,12 @@ typedef union {
  code should check all NV_PTOP_DEVICE_INFO__SIZE_1 entries and not terminate
  upon reaching the first entry of INFO_TYPE_NOT_VALID.
+  The fields for the Ampere version of the GPU are a strict subset of those for
+  the earlier versions. They are in different positions within the struct and
+  have names ending in _ampere to distinguish them. Other than that, each
+  Ampere device info field is functionally identical to the equivalent field in
+  the previous version.
  INFO_TYPE          : Is this a DATA, ENUM, or ENGINE_TYPE table entry?
  HAS_NEXT_ENTRY     : Does the following entry refer to the same engine?
@@ -517,8 +523,11 @@ typedef union {
  Support: Kepler, Maxwell, Pascal, Volta, Ampere
  See dev_top.ref.txt of NVIDIA's open-gpu-doc for more info.
 */
-#define NV_PTOP_DEVICE_INFO(i) (0x00022700+(i)*4)
-#define NV_PTOP_DEVICE_INFO__SIZE_1 64
+#define NV_PTOP_DEVICE_INFO_GA100(i) (0x00022800+(i)*4)
+#define NV_PTOP_DEVICE_INFO_GK104(i) (0x00022700+(i)*4)
+#define NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g) (nvdebug_readl(g, 0x0224fc) >> 20)
+#define NV_PTOP_DEVICE_INFO__SIZE_1_GK104 64
 enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3};
 enum ENGINE_TYPES {
        ENGINE_GRAPHICS = 0, // GRAPHICS [/compute]
@@ -527,7 +536,7 @@ enum ENGINE_TYPES {
        ENGINE_COPY2 = 3, // [raw/physical] COPY #2
        ENGINE_MSPDEC = 8, // Picture DECoder
-        ENGINE_MSPPP = 9, // [Video] Post Processing
+        ENGINE_MSPPP = 9, // [Video] Picture Post Processor
        ENGINE_MSVLD = 10, // [Video] Variable Length Decoder
        ENGINE_MSENC = 11, // [Video] ENCoding
        ENGINE_VIC = 12, // Video Image Compositor
@@ -538,10 +547,12 @@ enum ENGINE_TYPES {
        ENGINE_IOCTRL = 18, // I/O ConTRoLler [of NVLINK at least]
        ENGINE_LCE = 19, // Logical Copy Engine
-        ENGINE_GSP = 20, // Gpu System Processor
+        ENGINE_GSP = 20, // Gpu System Processor (Volta+)
-        ENGINE_NVJPG = 21, // NVidia JPeG [Decoder] (Ampere+)
+        ENGINE_NVJPG = 21, // NVidia JPeG [Decoder] (Turing+)
+        ENGINE_OFA = 22, // Optical Flow Accelerator (Turing+)
+        ENGINE_FLA = 23, // [NVLink] Fabric Logical Addressing [?]
 };
-#define ENGINE_TYPES_LEN 22
+#define ENGINE_TYPES_LEN 24
 static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = {
        "Graphics/Compute",
        "COPY0",
@@ -565,8 +576,38 @@ static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = {
        "LCE: Logical Copy Engine",
        "GSP: GPU System Processor",
        "NVJPG: NVIDIA JPEG Decoder",
+        "OFA: Optical Flow Accelerator",
+        "FLA: Fabric Logical Addressing",
 };
+// These field are from nvgpu/include/nvgpu/hw/ga100/hw_top_ga100.h
+typedef union {
+        // _info type fields
+        struct {
+                uint32_t fault_id:11;
+                 uint32_t padding0:5;
+                uint32_t inst_id:8;
+                enum ENGINE_TYPES engine_type:7; // "type_enum"
+                bool has_next_entry:1;
+        } __attribute__((packed));
+        // _info2 type fields
+        struct {
+                uint32_t reset_id:8;
+                uint32_t pri_base:18; // "device_pri_base"
+                 uint32_t padding1:4;
+                uint32_t is_engine:1;
+                 uint32_t padding2:1;
+        } __attribute__((packed));
+        struct {
+                uint32_t rleng_id:2;
+                 uint32_t padding3:8;
+                uint32_t runlist_pri_base:16;
+                 uint32_t padding4:6;
+        } __attribute__((packed));
+        uint32_t raw;
+} ptop_device_info_ga100_t;
+// These field are from open-gpu-doc/manuals/volta/gv100/dev_top.ref.txt
 typedef union {
        // DATA type fields
        struct {
@@ -604,7 +645,7 @@ typedef union {
                 uint32_t padding9:1;
        } __attribute__((packed));
        uint32_t raw;
-} ptop_device_info_t;
+} ptop_device_info_gk104_t;
 /* Graphics Processing Cluster (GPC) information
  The GPU's Compute/Graphics engine is subdivided into Graphics Processing
diff --git a/nvdebug_entry.c b/nvdebug_entry.c
index c362209..0abe658 100644
--- a/nvdebug_entry.c
+++ b/nvdebug_entry.c
@@ -146,7 +146,7 @@ int probe_and_cache_device(void) {
 // Create files `/proc/gpu#/runlist#`, world readable
 int create_runlist_files(int device_id, struct proc_dir_entry *dir) {
-        ptop_device_info_t info;
+        ptop_device_info_gk104_t info;
        struct proc_dir_entry *rl_entry;
        int i, rl_id;
        char runlist_name[12];
@@ -154,8 +154,8 @@ int create_runlist_files(int device_id, struct proc_dir_entry *dir) {
        // Figure out how many runlists there are by checking the device info
        // registers. Runlists are always numbered sequentially, so we just have
        // to find the highest-valued one and add 1 to get the number of runlists.
-        for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1; i++) {
+        for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1_GK104; i++) {
-                info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO(i));
+                info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO_GK104(i));
                if (info.info_type != INFO_TYPE_ENUM || !info.runlist_is_valid)
                        continue;
                if (info.runlist_enum > max_rl_id)
@@ -219,7 +219,8 @@ int __init nvdebug_init(void) {
                if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id)))
                        goto out_nomem;
                // Create files `/proc/gpu#/runlist#`, world readable
-                rl_create_err = create_runlist_files(device_id, dir);
+                if (g_nvdebug_state[device_id].chip_id < NV_CHIP_ID_AMPERE)
+                        create_runlist_files(device_id, dir);
                // Create files `/proc/gpu#/gpc#_tpc_mask`, world readable
                tpc_masks_create_err = create_tpc_mask_files(device_id, dir);
                // Create file `/proc/gpu#/preempt_tsg`, world writable
author	Joshua Bakita <bakitajoshua@gmail.com>	2023-10-29 14:43:40 -0400
committer	Joshua Bakita <bakitajoshua@gmail.com>	2023-10-29 15:43:16 -0400
commit	8da37f4bbb30027a6efa289bd97d98b2acb5c160 (patch)
tree	08ffee914a0ea9b37fc8da07bae2a3bc88ab5ed9
parent	874aecaf06a96f5866f9bf31437a1ebd27c2f408 (diff)

diff --git a/device_info_procfs.c b/device_info_procfs.c index 5fc417f..b139c36 100644 --- a/device_info_procfs.c +++ b/device_info_procfs.c
@@ -27,38 +27,65 @@ struct file_operations nvdebug_read_reg32_file_ops = {
27	.llseek = default_llseek,	27	.llseek = default_llseek,
28	};	28	};
29		29
		30	typedef struct {
		31	int idx; // Current index in the device_info table
		32	int length; // Length of device_info table (including unpopulated entries)
		33	int type_of_next_entry; // Only used on Ampere+ GPUs
		34	bool has_next_entry; // Only used on Ampere+ GPUs for show() idempotence
		35	} device_info_iter;
		36
30	//// ==v== PTOP_DEVICE_INFO ==v== ////	37	//// ==v== PTOP_DEVICE_INFO ==v== ////
31		38
32	// Called to start or resume a sequence. Prior to 4.19, *pos is unreliable.	39	// Called to start or resume a sequence. Prior to 4.19, *pos is unreliable.
33	// Initializes iterator `idx` state and returns it. Ends sequence on NULL.	40	// Initializes iterator `iter` state and returns it. Ends sequence on NULL.
34	static void* device_info_file_seq_start(struct seq_file s, loff_t pos) {	41	static void* device_info_file_seq_start(struct seq_file s, loff_t pos) {
35	static int idx;	42	static device_info_iter iter;
36	// If start of sequence, reset `idx`	43	// If freshly starting a sequence, reset the iterator
37	if (*pos == 0)	44	if (*pos == 0) {
38	idx = 0;	45	struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)];
39	// Number of possible info entries is fixed, and list is sparse	46	iter.idx = 0;
40	if (idx >= NV_PTOP_DEVICE_INFO__SIZE_1)	47	iter.type_of_next_entry = 0;
		48	iter.has_next_entry = 0;
		49	// On Ampere+, the device_info table length can vary
		50	if (g->chip_id >= NV_CHIP_ID_AMPERE)
		51	iter.length = NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g);
		52	else
		53	iter.length = NV_PTOP_DEVICE_INFO__SIZE_1_GK104;
		54	}
		55	// Number of possible info entries is fixed, and list is sparse, so stop
		56	// iterating only when all entries have been checked, rather than on the first
		57	// empty entry.
		58	if (iter.idx >= iter.length)
41	return NULL;	59	return NULL;
42	return &idx;	60	return &iter;
43	}	61	}
44		62
45	// Steps to next record. Returns new value of `idx`.	63	// Steps to next record. Returns `&iter` (address should not change)
46	// Calls show() on non-NULL return	64	// Calls show() on non-NULL return
47	static void* device_info_file_seq_next(struct seq_file s, void idx,	65	static void* device_info_file_seq_next(struct seq_file s, void iter_raw,
48	loff_t *pos) {	66	loff_t *pos) {
		67	device_info_iter iter = (device_info_iter)iter_raw;
49	(*pos)++; // Required by seq interface	68	(*pos)++; // Required by seq interface
50	// Number of possible info entries is fixed, and list is sparse	69	// Number of possible info entries is fixed, and list is sparse, so stop
51	if (((int)idx)++ >= NV_PTOP_DEVICE_INFO__SIZE_1)	70	// iterating only when all entries have been checked, rather than on the first
		71	// empty entry.
		72	if (++iter->idx >= iter->length)
52	return NULL;	73	return NULL;
53	return idx;	74	// The info_type field is not available in the Ampere device_info data, so
		75	// it must be inferred. NOP for older devices (cheaper than another branch).
		76	// This has to be here (rather than in the show function) to support the
		77	// idempotence requirements of show() in the seq_file interface.
		78	iter->type_of_next_entry = iter->has_next_entry ? iter->type_of_next_entry + 1 : 0;
		79	return iter;
54	}	80	}
55		81
56	// Print info at index *idx. Returns non-zero on error.	82	// Print info at iter->idx for Kepler--Turing GPUs. Returns non-zero on error.
57	static int device_info_file_seq_show(struct seq_file s, void idx) {	83	// Implementation of this function must be idempotent
58	ptop_device_info_t curr_info;	84	static int device_info_file_seq_show_gk104(struct seq_file s, void iter_raw) {
		85	device_info_iter iter = (device_info_iter)iter_raw;
		86	ptop_device_info_gk104_t curr_info;
59	struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)];	87	struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)];
60		88	curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GK104(iter->idx));
61	curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO((int)idx));
62	// Check for read errors	89	// Check for read errors
63	if (curr_info.raw == -1)	90	if (curr_info.raw == -1)
64	return -EIO;	91	return -EIO;
@@ -90,7 +117,7 @@ static int device_info_file_seq_show(struct seq_file s, void idx) {
90	if (curr_info.engine_type < ENGINE_TYPES_LEN)	117	if (curr_info.engine_type < ENGINE_TYPES_LEN)
91	seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]);	118	seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]);
92	else	119	else
93	seq_printf(s, "Unknown Engine, introduced post-Ampere)\n");	120	seq_printf(s, "Unknown Historical)\n");
94	break;	121	break;
95	case INFO_TYPE_NOT_VALID:	122	case INFO_TYPE_NOT_VALID:
96	default:	123	default:
@@ -104,18 +131,81 @@ static int device_info_file_seq_show(struct seq_file s, void idx) {
104	return 0;	131	return 0;
105	}	132	}
106		133
		134	// Print info at iter->idx for Ampere+ GPUs. Returns non-zero on error.
		135	// Implementation of this function must be idempotent
		136	static int device_info_file_seq_show_ga100(struct seq_file s, void iter_raw) {
		137	device_info_iter iter = (device_info_iter)iter_raw;
		138	ptop_device_info_ga100_t curr_info;
		139	struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)];
		140	curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GA100(iter->idx));
		141	// Check for read errors
		142	if (curr_info.raw == -1)
		143	return -EIO;
		144
		145	// Update tracking data only used by next(); allows preserving idempotence
		146	iter->has_next_entry = curr_info.has_next_entry;
		147	// Silently skip empty entries
		148	if (curr_info.raw == 0)
		149	return 0;
		150	// In nvdebug, an entry is considered invalid if it does not consist of at
		151	// least two rows. So, if this is the first row of an entry, but another row
		152	// is not indicated, this entry is invalid and should be skipped.
		153	if (iter->type_of_next_entry == 0 && !curr_info.has_next_entry) {
		154	printk(KERN_WARNING "[nvdebug] Skipping seemingly-invalid device_info entry (idx: %d, raw: %#0x)\n", iter->idx, curr_info.raw);
		155	return 0;
		156	}
		157
		158	// Parse and print the data
		159	// Note: The goal of this interface is to present useful information to
		160	// a human user, NOT to provide a stable format for scripts to parse.
		161	// Because of this, we favor accurately printing the data in each entry,
		162	// rather than providing stable (if imperfectly correct) field names
		163	switch(iter->type_of_next_entry) {
		164	case 0:
		165	seq_printf(s, "\| Engine Type: %3d (", curr_info.engine_type);
		166	if (curr_info.engine_type < ENGINE_TYPES_LEN)
		167	seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]);
		168	else
		169	seq_printf(s, "Unknown, introduced post-Lovelace)\n");
		170	seq_printf(s, "\| instance %d\n", curr_info.inst_id);
		171	seq_printf(s, "\| Fault ID: %4d\n", curr_info.fault_id);
		172	break;
		173	case 1:
		174	seq_printf(s, "\| BAR0 Base %#.8x\n", curr_info.pri_base << 8);
		175	seq_printf(s, "\| Reset ID: %3d\n", curr_info.reset_id);
		176	seq_printf(s, "\| Is Engine: %1d\n", curr_info.is_engine);
		177
		178	break;
		179	case 2:
		180	seq_printf(s, "\| Runlist Eng. ID: %1d\n", curr_info.rleng_id);
		181	// Theoretically, we could extract an ID from the runlist RAM
		182	seq_printf(s, "\| RL Base: %#.8x\n", curr_info.runlist_pri_base << 10);
		183	break;
		184	default:
		185	printk(KERN_WARNING "[nvdebug] Skipping unexpected continuation of device_info entry (idx: %d, raw: %#0x)\n", iter->idx, curr_info.raw);
		186	}
		187
		188	// Draw a line between each device entry
		189	if (!curr_info.has_next_entry)
		190	seq_printf(s, "+---------------------+\n");
		191	return 0;
		192	}
		193
107	static void device_info_file_seq_stop(struct seq_file s, void idx) {	194	static void device_info_file_seq_stop(struct seq_file s, void idx) {
108	// No cleanup needed	195	// No cleanup needed
109	}	196	}
110		197
111	static const struct seq_operations device_info_file_seq_ops = {	198	static struct seq_operations device_info_file_seq_ops = {
112	.start = device_info_file_seq_start,	199	.start = device_info_file_seq_start,
113	.next = device_info_file_seq_next,	200	.next = device_info_file_seq_next,
114	.stop = device_info_file_seq_stop,	201	.stop = device_info_file_seq_stop,
115	.show = device_info_file_seq_show,
116	};	202	};
117		203
118	static int device_info_file_open(struct inode inode, struct file f) {	204	static int device_info_file_open(struct inode inode, struct file f) {
		205	if (g_nvdebug_state[file2parentgpuidx(f)].chip_id >= NV_CHIP_ID_AMPERE)
		206	device_info_file_seq_ops.show = device_info_file_seq_show_ga100;
		207	else
		208	device_info_file_seq_ops.show = device_info_file_seq_show_gk104;
119	return seq_open(f, &device_info_file_seq_ops);	209	return seq_open(f, &device_info_file_seq_ops);
120	}	210	}
121		211


diff --git a/nvdebug.h b/nvdebug.h index 3860c2e..b79ede1 100644 --- a/nvdebug.h +++ b/nvdebug.h
@@ -390,6 +390,7 @@ typedef union {
390	#define NV_CHIP_ID_GV11B 0x15B // Jetson Xavier embedded GPU	390	#define NV_CHIP_ID_GV11B 0x15B // Jetson Xavier embedded GPU
391	#define NV_CHIP_ID_KEPLER 0x0E0	391	#define NV_CHIP_ID_KEPLER 0x0E0
392	#define NV_CHIP_ID_VOLTA 0x140	392	#define NV_CHIP_ID_VOLTA 0x140
		393	#define NV_CHIP_ID_AMPERE 0x170
393		394
394	inline static const char* ARCH2NAME(uint32_t arch) {	395	inline static const char* ARCH2NAME(uint32_t arch) {
395	switch (arch) {	396	switch (arch) {
@@ -461,8 +462,7 @@ typedef union {
461	} __attribute__((packed));	462	} __attribute__((packed));
462	} mc_boot_0_t;	463	} mc_boot_0_t;
463		464
464		465	/* GPU engine information and control register offsets (GPU TOPology)
465	/* GPU engine information and control register offsets
466	Each engine is described by one or more entries (terminated by an entry with	466	Each engine is described by one or more entries (terminated by an entry with
467	the `has_next_entry` flag unset) in the fixed-size PTOP_DEVICE_INFO table. A	467	the `has_next_entry` flag unset) in the fixed-size PTOP_DEVICE_INFO table. A
468	typical device, such as the graphics/compute engine and any copy engines, are	468	typical device, such as the graphics/compute engine and any copy engines, are
@@ -473,6 +473,12 @@ typedef union {
473	code should check all NV_PTOP_DEVICE_INFO__SIZE_1 entries and not terminate	473	code should check all NV_PTOP_DEVICE_INFO__SIZE_1 entries and not terminate
474	upon reaching the first entry of INFO_TYPE_NOT_VALID.	474	upon reaching the first entry of INFO_TYPE_NOT_VALID.
475		475
		476	The fields for the Ampere version of the GPU are a strict subset of those for
		477	the earlier versions. They are in different positions within the struct and
		478	have names ending in _ampere to distinguish them. Other than that, each
		479	Ampere device info field is functionally identical to the equivalent field in
		480	the previous version.
		481
476	INFO_TYPE : Is this a DATA, ENUM, or ENGINE_TYPE table entry?	482	INFO_TYPE : Is this a DATA, ENUM, or ENGINE_TYPE table entry?
477	HAS_NEXT_ENTRY : Does the following entry refer to the same engine?	483	HAS_NEXT_ENTRY : Does the following entry refer to the same engine?
478		484
@@ -517,8 +523,11 @@ typedef union {
517	Support: Kepler, Maxwell, Pascal, Volta, Ampere	523	Support: Kepler, Maxwell, Pascal, Volta, Ampere
518	See dev_top.ref.txt of NVIDIA's open-gpu-doc for more info.	524	See dev_top.ref.txt of NVIDIA's open-gpu-doc for more info.
519	*/	525	*/
520	#define NV_PTOP_DEVICE_INFO(i) (0x00022700+(i)*4)	526
521	#define NV_PTOP_DEVICE_INFO__SIZE_1 64	527	#define NV_PTOP_DEVICE_INFO_GA100(i) (0x00022800+(i)*4)
		528	#define NV_PTOP_DEVICE_INFO_GK104(i) (0x00022700+(i)*4)
		529	#define NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g) (nvdebug_readl(g, 0x0224fc) >> 20)
		530	#define NV_PTOP_DEVICE_INFO__SIZE_1_GK104 64
522	enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3};	531	enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3};
523	enum ENGINE_TYPES {	532	enum ENGINE_TYPES {
524	ENGINE_GRAPHICS = 0, // GRAPHICS [/compute]	533	ENGINE_GRAPHICS = 0, // GRAPHICS [/compute]
@@ -527,7 +536,7 @@ enum ENGINE_TYPES {
527	ENGINE_COPY2 = 3, // [raw/physical] COPY #2	536	ENGINE_COPY2 = 3, // [raw/physical] COPY #2
528		537
529	ENGINE_MSPDEC = 8, // Picture DECoder	538	ENGINE_MSPDEC = 8, // Picture DECoder
530	ENGINE_MSPPP = 9, // [Video] Post Processing	539	ENGINE_MSPPP = 9, // [Video] Picture Post Processor
531	ENGINE_MSVLD = 10, // [Video] Variable Length Decoder	540	ENGINE_MSVLD = 10, // [Video] Variable Length Decoder
532	ENGINE_MSENC = 11, // [Video] ENCoding	541	ENGINE_MSENC = 11, // [Video] ENCoding
533	ENGINE_VIC = 12, // Video Image Compositor	542	ENGINE_VIC = 12, // Video Image Compositor
@@ -538,10 +547,12 @@ enum ENGINE_TYPES {
538		547
539	ENGINE_IOCTRL = 18, // I/O ConTRoLler [of NVLINK at least]	548	ENGINE_IOCTRL = 18, // I/O ConTRoLler [of NVLINK at least]
540	ENGINE_LCE = 19, // Logical Copy Engine	549	ENGINE_LCE = 19, // Logical Copy Engine
541	ENGINE_GSP = 20, // Gpu System Processor	550	ENGINE_GSP = 20, // Gpu System Processor (Volta+)
542	ENGINE_NVJPG = 21, // NVidia JPeG [Decoder] (Ampere+)	551	ENGINE_NVJPG = 21, // NVidia JPeG [Decoder] (Turing+)
		552	ENGINE_OFA = 22, // Optical Flow Accelerator (Turing+)
		553	ENGINE_FLA = 23, // [NVLink] Fabric Logical Addressing [?]
543	};	554	};
544	#define ENGINE_TYPES_LEN 22	555	#define ENGINE_TYPES_LEN 24
545	static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = {	556	static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = {
546	"Graphics/Compute",	557	"Graphics/Compute",
547	"COPY0",	558	"COPY0",
@@ -565,8 +576,38 @@ static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = {
565	"LCE: Logical Copy Engine",	576	"LCE: Logical Copy Engine",
566	"GSP: GPU System Processor",	577	"GSP: GPU System Processor",
567	"NVJPG: NVIDIA JPEG Decoder",	578	"NVJPG: NVIDIA JPEG Decoder",
		579	"OFA: Optical Flow Accelerator",
		580	"FLA: Fabric Logical Addressing",
568	};	581	};
569		582
		583	// These field are from nvgpu/include/nvgpu/hw/ga100/hw_top_ga100.h
		584	typedef union {
		585	// _info type fields
		586	struct {
		587	uint32_t fault_id:11;
		588	uint32_t padding0:5;
		589	uint32_t inst_id:8;
		590	enum ENGINE_TYPES engine_type:7; // "type_enum"
		591	bool has_next_entry:1;
		592	} __attribute__((packed));
		593	// _info2 type fields
		594	struct {
		595	uint32_t reset_id:8;
		596	uint32_t pri_base:18; // "device_pri_base"
		597	uint32_t padding1:4;
		598	uint32_t is_engine:1;
		599	uint32_t padding2:1;
		600	} __attribute__((packed));
		601	struct {
		602	uint32_t rleng_id:2;
		603	uint32_t padding3:8;
		604	uint32_t runlist_pri_base:16;
		605	uint32_t padding4:6;
		606	} __attribute__((packed));
		607	uint32_t raw;
		608	} ptop_device_info_ga100_t;
		609
		610	// These field are from open-gpu-doc/manuals/volta/gv100/dev_top.ref.txt
570	typedef union {	611	typedef union {
571	// DATA type fields	612	// DATA type fields
572	struct {	613	struct {
@@ -604,7 +645,7 @@ typedef union {
604	uint32_t padding9:1;	645	uint32_t padding9:1;
605	} __attribute__((packed));	646	} __attribute__((packed));
606	uint32_t raw;	647	uint32_t raw;
607	} ptop_device_info_t;	648	} ptop_device_info_gk104_t;
608		649
609	/* Graphics Processing Cluster (GPC) information	650	/* Graphics Processing Cluster (GPC) information
610	The GPU's Compute/Graphics engine is subdivided into Graphics Processing	651	The GPU's Compute/Graphics engine is subdivided into Graphics Processing


diff --git a/nvdebug_entry.c b/nvdebug_entry.c index c362209..0abe658 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c
@@ -146,7 +146,7 @@ int probe_and_cache_device(void) {
146		146
147	// Create files `/proc/gpu#/runlist#`, world readable	147	// Create files `/proc/gpu#/runlist#`, world readable
148	int create_runlist_files(int device_id, struct proc_dir_entry *dir) {	148	int create_runlist_files(int device_id, struct proc_dir_entry *dir) {
149	ptop_device_info_t info;	149	ptop_device_info_gk104_t info;
150	struct proc_dir_entry *rl_entry;	150	struct proc_dir_entry *rl_entry;
151	int i, rl_id;	151	int i, rl_id;
152	char runlist_name[12];	152	char runlist_name[12];
@@ -154,8 +154,8 @@ int create_runlist_files(int device_id, struct proc_dir_entry *dir) {
154	// Figure out how many runlists there are by checking the device info	154	// Figure out how many runlists there are by checking the device info
155	// registers. Runlists are always numbered sequentially, so we just have	155	// registers. Runlists are always numbered sequentially, so we just have
156	// to find the highest-valued one and add 1 to get the number of runlists.	156	// to find the highest-valued one and add 1 to get the number of runlists.
157	for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1; i++) {	157	for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1_GK104; i++) {
158	info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO(i));	158	info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO_GK104(i));
159	if (info.info_type != INFO_TYPE_ENUM \|\| !info.runlist_is_valid)	159	if (info.info_type != INFO_TYPE_ENUM \|\| !info.runlist_is_valid)
160	continue;	160	continue;
161	if (info.runlist_enum > max_rl_id)	161	if (info.runlist_enum > max_rl_id)
@@ -219,7 +219,8 @@ int __init nvdebug_init(void) {
219	if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id)))	219	if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id)))
220	goto out_nomem;	220	goto out_nomem;
221	// Create files `/proc/gpu#/runlist#`, world readable	221	// Create files `/proc/gpu#/runlist#`, world readable
222	rl_create_err = create_runlist_files(device_id, dir);	222	if (g_nvdebug_state[device_id].chip_id < NV_CHIP_ID_AMPERE)
		223	create_runlist_files(device_id, dir);
223	// Create files `/proc/gpu#/gpc#_tpc_mask`, world readable	224	// Create files `/proc/gpu#/gpc#_tpc_mask`, world readable
224	tpc_masks_create_err = create_tpc_mask_files(device_id, dir);	225	tpc_masks_create_err = create_tpc_mask_files(device_id, dir);
225	// Create file `/proc/gpu#/preempt_tsg`, world writable	226	// Create file `/proc/gpu#/preempt_tsg`, world writable