diff options
author | Joshua Bakita <bakitajoshua@gmail.com> | 2023-10-29 14:43:40 -0400 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2023-10-29 15:43:16 -0400 |
commit | 8da37f4bbb30027a6efa289bd97d98b2acb5c160 (patch) | |
tree | 08ffee914a0ea9b37fc8da07bae2a3bc88ab5ed9 | |
parent | 874aecaf06a96f5866f9bf31437a1ebd27c2f408 (diff) |
Support printing device info on Ampere+ GPUs. By Benjamin Hadad IV
commit c3d6f2c852eb046e9d4f4f1e6527b52c746b2693
Author: Joshua Bakita <bakitajoshua@gmail.com>
Date: Sun Oct 29 14:37:51 2023 -0400
Print Ampere+ device_info fields with correct offsets/widths
Everything now has been checked against how nvgpu handles it
commit b70849d1ce67a58f9f69b37dc62122f789f4cdf7
Author: Joshua Bakita <jbakita@cs.unc.edu>
Date: Wed Sep 20 14:27:38 2023 -0400
Rearrange, fix an off-by-one error, and remove an unused define
The code in nvdebug.h has been rearranged to enable an easier merge
against the jbakita-wip branch.
commit 51f808e092846a60ea6c88ea3a1d2e349c92977b
Author: Joshua Bakita <jbakita@cs.unc.edu>
Date: Wed Sep 20 13:09:17 2023 -0400
Bug fixes and cleanup for new device_info logic
- Update comments to match new structure
- Make show() function idempotent
- Skip empty table entries without aborting
- Include names for new engine types
- Add warning log messages for skipped table entries
- Remove non-functional runlist file creation logic for Ampere+
commit 1d7adc3be1aef5ac9c144bb24008fd8cc5d688a5
Author: Benjamin Hadad IV <bh4@unc.edu>
Date: Sat Aug 19 12:47:18 2023 -0400
Debugging changes made to restore functionality following refactoring.
- Debugged data display errors.
- Debugged crash bugs.
- Debugged memory issue.
commit 9e6cc03cdf736fbd817ed53fa9a7f506bc91a244
Author: Benjamin Hadad IV <bh4@unc.edu>
Date: Wed Aug 16 22:00:20 2023 -0400
A variety of changes have been made as part of the code review.
- Functions have been consolidated.
- Code was clarified and tidied up overall.
- Unnecessary elements were removed.
commit 845960fc1b15995fdbd6d61c384567652a150bc4
Author: Benjamin Hadad IV <bh4@unc.edu>
Date: Fri Jul 28 11:39:28 2023 -0400
Refactored various systems and debugged minor issues
- Added device_info_iter
- Merged functions in device_info_procfs.c
- Separated device_info data structs by version in nvdebug.h
- Fixed issue with device_info runlist ID data
commit 8a57aaeba41c43233c323d7e0fc8bf1a81ebc65e
Author: Benjamin Hadad IV <bh4@unc.edu>
Date: Fri Jul 21 11:32:51 2023 -0400
I have updated the ptop_device_info_t comment in nvdebug.h.
commit 33c915f08f5dc63674b158ecc18897494256a6d0
Author: Benjamin Hadad IV <bh4@unc.edu>
Date: Wed Jul 19 13:02:52 2023 -0400
Debugged device_info functionality
- Fixed device_info crash bugs
- Made further edits to display functionality
- Refactored code to enhance readability
commit bfb4dcf0e78954c0163f3a06a5a088c4d1b437a8
Author: Benjamin Hadad IV <bh4@unc.edu>
Date: Thu Jul 13 12:13:17 2023 -0400
This commit is to update the repo for display during a meeting.
- Added an Ampere version of the device info data.
- Added Ampere versions of auxillary functions.
- Modified display functions to accommodate Ampere data.
- Made other various small modifications.
commit 068e7f4e7208d6c9250ad72208e0b36fd9fdf2f6
Merge: 3725b15 073e897
Author: Benjamin Hadad IV <bh4@unc.edu>
Date: Mon Jul 10 12:39:12 2023 -0400
Merge branch 'jbakita-wip' of ssh://rtsrv.cs.unc.edu/public/nvdebug into wip
I am merging Mr. Bakita's changes (046d7d2) into this repository.
commit 3725b15d5da3e06ef202045d710aa5f15eb72fcc
Author: Benjamin Hadad IV <bh4@unc.edu>
Date: Mon Jul 3 04:30:54 2023 -0400
I modified nvdebug.h for Ampere.
-rw-r--r-- | device_info_procfs.c | 134 | ||||
-rw-r--r-- | nvdebug.h | 59 | ||||
-rw-r--r-- | nvdebug_entry.c | 9 |
3 files changed, 167 insertions, 35 deletions
diff --git a/device_info_procfs.c b/device_info_procfs.c index 5fc417f..b139c36 100644 --- a/device_info_procfs.c +++ b/device_info_procfs.c | |||
@@ -27,38 +27,65 @@ struct file_operations nvdebug_read_reg32_file_ops = { | |||
27 | .llseek = default_llseek, | 27 | .llseek = default_llseek, |
28 | }; | 28 | }; |
29 | 29 | ||
30 | typedef struct { | ||
31 | int idx; // Current index in the device_info table | ||
32 | int length; // Length of device_info table (including unpopulated entries) | ||
33 | int type_of_next_entry; // Only used on Ampere+ GPUs | ||
34 | bool has_next_entry; // Only used on Ampere+ GPUs for show() idempotence | ||
35 | } device_info_iter; | ||
36 | |||
30 | //// ==v== PTOP_DEVICE_INFO ==v== //// | 37 | //// ==v== PTOP_DEVICE_INFO ==v== //// |
31 | 38 | ||
32 | // Called to start or resume a sequence. Prior to 4.19, *pos is unreliable. | 39 | // Called to start or resume a sequence. Prior to 4.19, *pos is unreliable. |
33 | // Initializes iterator `idx` state and returns it. Ends sequence on NULL. | 40 | // Initializes iterator `iter` state and returns it. Ends sequence on NULL. |
34 | static void* device_info_file_seq_start(struct seq_file *s, loff_t *pos) { | 41 | static void* device_info_file_seq_start(struct seq_file *s, loff_t *pos) { |
35 | static int idx; | 42 | static device_info_iter iter; |
36 | // If start of sequence, reset `idx` | 43 | // If freshly starting a sequence, reset the iterator |
37 | if (*pos == 0) | 44 | if (*pos == 0) { |
38 | idx = 0; | 45 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; |
39 | // Number of possible info entries is fixed, and list is sparse | 46 | iter.idx = 0; |
40 | if (idx >= NV_PTOP_DEVICE_INFO__SIZE_1) | 47 | iter.type_of_next_entry = 0; |
48 | iter.has_next_entry = 0; | ||
49 | // On Ampere+, the device_info table length can vary | ||
50 | if (g->chip_id >= NV_CHIP_ID_AMPERE) | ||
51 | iter.length = NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g); | ||
52 | else | ||
53 | iter.length = NV_PTOP_DEVICE_INFO__SIZE_1_GK104; | ||
54 | } | ||
55 | // Number of possible info entries is fixed, and list is sparse, so stop | ||
56 | // iterating only when all entries have been checked, rather than on the first | ||
57 | // empty entry. | ||
58 | if (iter.idx >= iter.length) | ||
41 | return NULL; | 59 | return NULL; |
42 | return &idx; | 60 | return &iter; |
43 | } | 61 | } |
44 | 62 | ||
45 | // Steps to next record. Returns new value of `idx`. | 63 | // Steps to next record. Returns `&iter` (address should not change) |
46 | // Calls show() on non-NULL return | 64 | // Calls show() on non-NULL return |
47 | static void* device_info_file_seq_next(struct seq_file *s, void *idx, | 65 | static void* device_info_file_seq_next(struct seq_file *s, void *iter_raw, |
48 | loff_t *pos) { | 66 | loff_t *pos) { |
67 | device_info_iter *iter = (device_info_iter*)iter_raw; | ||
49 | (*pos)++; // Required by seq interface | 68 | (*pos)++; // Required by seq interface |
50 | // Number of possible info entries is fixed, and list is sparse | 69 | // Number of possible info entries is fixed, and list is sparse, so stop |
51 | if ((*(int*)idx)++ >= NV_PTOP_DEVICE_INFO__SIZE_1) | 70 | // iterating only when all entries have been checked, rather than on the first |
71 | // empty entry. | ||
72 | if (++iter->idx >= iter->length) | ||
52 | return NULL; | 73 | return NULL; |
53 | return idx; | 74 | // The info_type field is not available in the Ampere device_info data, so |
75 | // it must be inferred. NOP for older devices (cheaper than another branch). | ||
76 | // This has to be here (rather than in the show function) to support the | ||
77 | // idempotence requirements of show() in the seq_file interface. | ||
78 | iter->type_of_next_entry = iter->has_next_entry ? iter->type_of_next_entry + 1 : 0; | ||
79 | return iter; | ||
54 | } | 80 | } |
55 | 81 | ||
56 | // Print info at index *idx. Returns non-zero on error. | 82 | // Print info at iter->idx for Kepler--Turing GPUs. Returns non-zero on error. |
57 | static int device_info_file_seq_show(struct seq_file *s, void *idx) { | 83 | // Implementation of this function must be idempotent |
58 | ptop_device_info_t curr_info; | 84 | static int device_info_file_seq_show_gk104(struct seq_file *s, void *iter_raw) { |
85 | device_info_iter *iter = (device_info_iter*)iter_raw; | ||
86 | ptop_device_info_gk104_t curr_info; | ||
59 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; | 87 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; |
60 | 88 | curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GK104(iter->idx)); | |
61 | curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO(*(int*)idx)); | ||
62 | // Check for read errors | 89 | // Check for read errors |
63 | if (curr_info.raw == -1) | 90 | if (curr_info.raw == -1) |
64 | return -EIO; | 91 | return -EIO; |
@@ -90,7 +117,7 @@ static int device_info_file_seq_show(struct seq_file *s, void *idx) { | |||
90 | if (curr_info.engine_type < ENGINE_TYPES_LEN) | 117 | if (curr_info.engine_type < ENGINE_TYPES_LEN) |
91 | seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]); | 118 | seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]); |
92 | else | 119 | else |
93 | seq_printf(s, "Unknown Engine, introduced post-Ampere)\n"); | 120 | seq_printf(s, "Unknown Historical)\n"); |
94 | break; | 121 | break; |
95 | case INFO_TYPE_NOT_VALID: | 122 | case INFO_TYPE_NOT_VALID: |
96 | default: | 123 | default: |
@@ -104,18 +131,81 @@ static int device_info_file_seq_show(struct seq_file *s, void *idx) { | |||
104 | return 0; | 131 | return 0; |
105 | } | 132 | } |
106 | 133 | ||
134 | // Print info at iter->idx for Ampere+ GPUs. Returns non-zero on error. | ||
135 | // Implementation of this function must be idempotent | ||
136 | static int device_info_file_seq_show_ga100(struct seq_file *s, void *iter_raw) { | ||
137 | device_info_iter *iter = (device_info_iter*)iter_raw; | ||
138 | ptop_device_info_ga100_t curr_info; | ||
139 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; | ||
140 | curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GA100(iter->idx)); | ||
141 | // Check for read errors | ||
142 | if (curr_info.raw == -1) | ||
143 | return -EIO; | ||
144 | |||
145 | // Update tracking data only used by next(); allows preserving idempotence | ||
146 | iter->has_next_entry = curr_info.has_next_entry; | ||
147 | // Silently skip empty entries | ||
148 | if (curr_info.raw == 0) | ||
149 | return 0; | ||
150 | // In nvdebug, an entry is considered invalid if it does not consist of at | ||
151 | // least two rows. So, if this is the first row of an entry, but another row | ||
152 | // is not indicated, this entry is invalid and should be skipped. | ||
153 | if (iter->type_of_next_entry == 0 && !curr_info.has_next_entry) { | ||
154 | printk(KERN_WARNING "[nvdebug] Skipping seemingly-invalid device_info entry (idx: %d, raw: %#0x)\n", iter->idx, curr_info.raw); | ||
155 | return 0; | ||
156 | } | ||
157 | |||
158 | // Parse and print the data | ||
159 | // Note: The goal of this interface is to present useful information to | ||
160 | // a human user, NOT to provide a stable format for scripts to parse. | ||
161 | // Because of this, we favor accurately printing the data in each entry, | ||
162 | // rather than providing stable (if imperfectly correct) field names | ||
163 | switch(iter->type_of_next_entry) { | ||
164 | case 0: | ||
165 | seq_printf(s, "| Engine Type: %3d (", curr_info.engine_type); | ||
166 | if (curr_info.engine_type < ENGINE_TYPES_LEN) | ||
167 | seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]); | ||
168 | else | ||
169 | seq_printf(s, "Unknown, introduced post-Lovelace)\n"); | ||
170 | seq_printf(s, "| instance %d\n", curr_info.inst_id); | ||
171 | seq_printf(s, "| Fault ID: %4d\n", curr_info.fault_id); | ||
172 | break; | ||
173 | case 1: | ||
174 | seq_printf(s, "| BAR0 Base %#.8x\n", curr_info.pri_base << 8); | ||
175 | seq_printf(s, "| Reset ID: %3d\n", curr_info.reset_id); | ||
176 | seq_printf(s, "| Is Engine: %1d\n", curr_info.is_engine); | ||
177 | |||
178 | break; | ||
179 | case 2: | ||
180 | seq_printf(s, "| Runlist Eng. ID: %1d\n", curr_info.rleng_id); | ||
181 | // Theoretically, we could extract an ID from the runlist RAM | ||
182 | seq_printf(s, "| RL Base: %#.8x\n", curr_info.runlist_pri_base << 10); | ||
183 | break; | ||
184 | default: | ||
185 | printk(KERN_WARNING "[nvdebug] Skipping unexpected continuation of device_info entry (idx: %d, raw: %#0x)\n", iter->idx, curr_info.raw); | ||
186 | } | ||
187 | |||
188 | // Draw a line between each device entry | ||
189 | if (!curr_info.has_next_entry) | ||
190 | seq_printf(s, "+---------------------+\n"); | ||
191 | return 0; | ||
192 | } | ||
193 | |||
107 | static void device_info_file_seq_stop(struct seq_file *s, void *idx) { | 194 | static void device_info_file_seq_stop(struct seq_file *s, void *idx) { |
108 | // No cleanup needed | 195 | // No cleanup needed |
109 | } | 196 | } |
110 | 197 | ||
111 | static const struct seq_operations device_info_file_seq_ops = { | 198 | static struct seq_operations device_info_file_seq_ops = { |
112 | .start = device_info_file_seq_start, | 199 | .start = device_info_file_seq_start, |
113 | .next = device_info_file_seq_next, | 200 | .next = device_info_file_seq_next, |
114 | .stop = device_info_file_seq_stop, | 201 | .stop = device_info_file_seq_stop, |
115 | .show = device_info_file_seq_show, | ||
116 | }; | 202 | }; |
117 | 203 | ||
118 | static int device_info_file_open(struct inode *inode, struct file *f) { | 204 | static int device_info_file_open(struct inode *inode, struct file *f) { |
205 | if (g_nvdebug_state[file2parentgpuidx(f)].chip_id >= NV_CHIP_ID_AMPERE) | ||
206 | device_info_file_seq_ops.show = device_info_file_seq_show_ga100; | ||
207 | else | ||
208 | device_info_file_seq_ops.show = device_info_file_seq_show_gk104; | ||
119 | return seq_open(f, &device_info_file_seq_ops); | 209 | return seq_open(f, &device_info_file_seq_ops); |
120 | } | 210 | } |
121 | 211 | ||
@@ -390,6 +390,7 @@ typedef union { | |||
390 | #define NV_CHIP_ID_GV11B 0x15B // Jetson Xavier embedded GPU | 390 | #define NV_CHIP_ID_GV11B 0x15B // Jetson Xavier embedded GPU |
391 | #define NV_CHIP_ID_KEPLER 0x0E0 | 391 | #define NV_CHIP_ID_KEPLER 0x0E0 |
392 | #define NV_CHIP_ID_VOLTA 0x140 | 392 | #define NV_CHIP_ID_VOLTA 0x140 |
393 | #define NV_CHIP_ID_AMPERE 0x170 | ||
393 | 394 | ||
394 | inline static const char* ARCH2NAME(uint32_t arch) { | 395 | inline static const char* ARCH2NAME(uint32_t arch) { |
395 | switch (arch) { | 396 | switch (arch) { |
@@ -461,8 +462,7 @@ typedef union { | |||
461 | } __attribute__((packed)); | 462 | } __attribute__((packed)); |
462 | } mc_boot_0_t; | 463 | } mc_boot_0_t; |
463 | 464 | ||
464 | 465 | /* GPU engine information and control register offsets (GPU TOPology) | |
465 | /* GPU engine information and control register offsets | ||
466 | Each engine is described by one or more entries (terminated by an entry with | 466 | Each engine is described by one or more entries (terminated by an entry with |
467 | the `has_next_entry` flag unset) in the fixed-size PTOP_DEVICE_INFO table. A | 467 | the `has_next_entry` flag unset) in the fixed-size PTOP_DEVICE_INFO table. A |
468 | typical device, such as the graphics/compute engine and any copy engines, are | 468 | typical device, such as the graphics/compute engine and any copy engines, are |
@@ -473,6 +473,12 @@ typedef union { | |||
473 | code should check all NV_PTOP_DEVICE_INFO__SIZE_1 entries and not terminate | 473 | code should check all NV_PTOP_DEVICE_INFO__SIZE_1 entries and not terminate |
474 | upon reaching the first entry of INFO_TYPE_NOT_VALID. | 474 | upon reaching the first entry of INFO_TYPE_NOT_VALID. |
475 | 475 | ||
476 | The fields for the Ampere version of the GPU are a strict subset of those for | ||
477 | the earlier versions. They are in different positions within the struct and | ||
478 | have names ending in _ampere to distinguish them. Other than that, each | ||
479 | Ampere device info field is functionally identical to the equivalent field in | ||
480 | the previous version. | ||
481 | |||
476 | INFO_TYPE : Is this a DATA, ENUM, or ENGINE_TYPE table entry? | 482 | INFO_TYPE : Is this a DATA, ENUM, or ENGINE_TYPE table entry? |
477 | HAS_NEXT_ENTRY : Does the following entry refer to the same engine? | 483 | HAS_NEXT_ENTRY : Does the following entry refer to the same engine? |
478 | 484 | ||
@@ -517,8 +523,11 @@ typedef union { | |||
517 | Support: Kepler, Maxwell, Pascal, Volta, Ampere | 523 | Support: Kepler, Maxwell, Pascal, Volta, Ampere |
518 | See dev_top.ref.txt of NVIDIA's open-gpu-doc for more info. | 524 | See dev_top.ref.txt of NVIDIA's open-gpu-doc for more info. |
519 | */ | 525 | */ |
520 | #define NV_PTOP_DEVICE_INFO(i) (0x00022700+(i)*4) | 526 | |
521 | #define NV_PTOP_DEVICE_INFO__SIZE_1 64 | 527 | #define NV_PTOP_DEVICE_INFO_GA100(i) (0x00022800+(i)*4) |
528 | #define NV_PTOP_DEVICE_INFO_GK104(i) (0x00022700+(i)*4) | ||
529 | #define NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g) (nvdebug_readl(g, 0x0224fc) >> 20) | ||
530 | #define NV_PTOP_DEVICE_INFO__SIZE_1_GK104 64 | ||
522 | enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3}; | 531 | enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3}; |
523 | enum ENGINE_TYPES { | 532 | enum ENGINE_TYPES { |
524 | ENGINE_GRAPHICS = 0, // GRAPHICS [/compute] | 533 | ENGINE_GRAPHICS = 0, // GRAPHICS [/compute] |
@@ -527,7 +536,7 @@ enum ENGINE_TYPES { | |||
527 | ENGINE_COPY2 = 3, // [raw/physical] COPY #2 | 536 | ENGINE_COPY2 = 3, // [raw/physical] COPY #2 |
528 | 537 | ||
529 | ENGINE_MSPDEC = 8, // Picture DECoder | 538 | ENGINE_MSPDEC = 8, // Picture DECoder |
530 | ENGINE_MSPPP = 9, // [Video] Post Processing | 539 | ENGINE_MSPPP = 9, // [Video] Picture Post Processor |
531 | ENGINE_MSVLD = 10, // [Video] Variable Length Decoder | 540 | ENGINE_MSVLD = 10, // [Video] Variable Length Decoder |
532 | ENGINE_MSENC = 11, // [Video] ENCoding | 541 | ENGINE_MSENC = 11, // [Video] ENCoding |
533 | ENGINE_VIC = 12, // Video Image Compositor | 542 | ENGINE_VIC = 12, // Video Image Compositor |
@@ -538,10 +547,12 @@ enum ENGINE_TYPES { | |||
538 | 547 | ||
539 | ENGINE_IOCTRL = 18, // I/O ConTRoLler [of NVLINK at least] | 548 | ENGINE_IOCTRL = 18, // I/O ConTRoLler [of NVLINK at least] |
540 | ENGINE_LCE = 19, // Logical Copy Engine | 549 | ENGINE_LCE = 19, // Logical Copy Engine |
541 | ENGINE_GSP = 20, // Gpu System Processor | 550 | ENGINE_GSP = 20, // Gpu System Processor (Volta+) |
542 | ENGINE_NVJPG = 21, // NVidia JPeG [Decoder] (Ampere+) | 551 | ENGINE_NVJPG = 21, // NVidia JPeG [Decoder] (Turing+) |
552 | ENGINE_OFA = 22, // Optical Flow Accelerator (Turing+) | ||
553 | ENGINE_FLA = 23, // [NVLink] Fabric Logical Addressing [?] | ||
543 | }; | 554 | }; |
544 | #define ENGINE_TYPES_LEN 22 | 555 | #define ENGINE_TYPES_LEN 24 |
545 | static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { | 556 | static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { |
546 | "Graphics/Compute", | 557 | "Graphics/Compute", |
547 | "COPY0", | 558 | "COPY0", |
@@ -565,8 +576,38 @@ static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { | |||
565 | "LCE: Logical Copy Engine", | 576 | "LCE: Logical Copy Engine", |
566 | "GSP: GPU System Processor", | 577 | "GSP: GPU System Processor", |
567 | "NVJPG: NVIDIA JPEG Decoder", | 578 | "NVJPG: NVIDIA JPEG Decoder", |
579 | "OFA: Optical Flow Accelerator", | ||
580 | "FLA: Fabric Logical Addressing", | ||
568 | }; | 581 | }; |
569 | 582 | ||
583 | // These field are from nvgpu/include/nvgpu/hw/ga100/hw_top_ga100.h | ||
584 | typedef union { | ||
585 | // _info type fields | ||
586 | struct { | ||
587 | uint32_t fault_id:11; | ||
588 | uint32_t padding0:5; | ||
589 | uint32_t inst_id:8; | ||
590 | enum ENGINE_TYPES engine_type:7; // "type_enum" | ||
591 | bool has_next_entry:1; | ||
592 | } __attribute__((packed)); | ||
593 | // _info2 type fields | ||
594 | struct { | ||
595 | uint32_t reset_id:8; | ||
596 | uint32_t pri_base:18; // "device_pri_base" | ||
597 | uint32_t padding1:4; | ||
598 | uint32_t is_engine:1; | ||
599 | uint32_t padding2:1; | ||
600 | } __attribute__((packed)); | ||
601 | struct { | ||
602 | uint32_t rleng_id:2; | ||
603 | uint32_t padding3:8; | ||
604 | uint32_t runlist_pri_base:16; | ||
605 | uint32_t padding4:6; | ||
606 | } __attribute__((packed)); | ||
607 | uint32_t raw; | ||
608 | } ptop_device_info_ga100_t; | ||
609 | |||
610 | // These field are from open-gpu-doc/manuals/volta/gv100/dev_top.ref.txt | ||
570 | typedef union { | 611 | typedef union { |
571 | // DATA type fields | 612 | // DATA type fields |
572 | struct { | 613 | struct { |
@@ -604,7 +645,7 @@ typedef union { | |||
604 | uint32_t padding9:1; | 645 | uint32_t padding9:1; |
605 | } __attribute__((packed)); | 646 | } __attribute__((packed)); |
606 | uint32_t raw; | 647 | uint32_t raw; |
607 | } ptop_device_info_t; | 648 | } ptop_device_info_gk104_t; |
608 | 649 | ||
609 | /* Graphics Processing Cluster (GPC) information | 650 | /* Graphics Processing Cluster (GPC) information |
610 | The GPU's Compute/Graphics engine is subdivided into Graphics Processing | 651 | The GPU's Compute/Graphics engine is subdivided into Graphics Processing |
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index c362209..0abe658 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
@@ -146,7 +146,7 @@ int probe_and_cache_device(void) { | |||
146 | 146 | ||
147 | // Create files `/proc/gpu#/runlist#`, world readable | 147 | // Create files `/proc/gpu#/runlist#`, world readable |
148 | int create_runlist_files(int device_id, struct proc_dir_entry *dir) { | 148 | int create_runlist_files(int device_id, struct proc_dir_entry *dir) { |
149 | ptop_device_info_t info; | 149 | ptop_device_info_gk104_t info; |
150 | struct proc_dir_entry *rl_entry; | 150 | struct proc_dir_entry *rl_entry; |
151 | int i, rl_id; | 151 | int i, rl_id; |
152 | char runlist_name[12]; | 152 | char runlist_name[12]; |
@@ -154,8 +154,8 @@ int create_runlist_files(int device_id, struct proc_dir_entry *dir) { | |||
154 | // Figure out how many runlists there are by checking the device info | 154 | // Figure out how many runlists there are by checking the device info |
155 | // registers. Runlists are always numbered sequentially, so we just have | 155 | // registers. Runlists are always numbered sequentially, so we just have |
156 | // to find the highest-valued one and add 1 to get the number of runlists. | 156 | // to find the highest-valued one and add 1 to get the number of runlists. |
157 | for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1; i++) { | 157 | for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1_GK104; i++) { |
158 | info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO(i)); | 158 | info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO_GK104(i)); |
159 | if (info.info_type != INFO_TYPE_ENUM || !info.runlist_is_valid) | 159 | if (info.info_type != INFO_TYPE_ENUM || !info.runlist_is_valid) |
160 | continue; | 160 | continue; |
161 | if (info.runlist_enum > max_rl_id) | 161 | if (info.runlist_enum > max_rl_id) |
@@ -219,7 +219,8 @@ int __init nvdebug_init(void) { | |||
219 | if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id))) | 219 | if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id))) |
220 | goto out_nomem; | 220 | goto out_nomem; |
221 | // Create files `/proc/gpu#/runlist#`, world readable | 221 | // Create files `/proc/gpu#/runlist#`, world readable |
222 | rl_create_err = create_runlist_files(device_id, dir); | 222 | if (g_nvdebug_state[device_id].chip_id < NV_CHIP_ID_AMPERE) |
223 | create_runlist_files(device_id, dir); | ||
223 | // Create files `/proc/gpu#/gpc#_tpc_mask`, world readable | 224 | // Create files `/proc/gpu#/gpc#_tpc_mask`, world readable |
224 | tpc_masks_create_err = create_tpc_mask_files(device_id, dir); | 225 | tpc_masks_create_err = create_tpc_mask_files(device_id, dir); |
225 | // Create file `/proc/gpu#/preempt_tsg`, world writable | 226 | // Create file `/proc/gpu#/preempt_tsg`, world writable |