aboutsummaryrefslogtreecommitdiffstats
path: root/nvdebug_entry.c
diff options
context:
space:
mode:
authorJoshua Bakita <bakitajoshua@gmail.com>2024-09-16 15:34:41 -0400
committerJoshua Bakita <bakitajoshua@gmail.com>2024-09-16 15:34:41 -0400
commit232eafd04f272ed69d97a250c50a7bbed4d2894c (patch)
treebf1d03cd66e6f37b2c9ac9a9d48e4f359fcdd6b5 /nvdebug_entry.c
parent0b1c304e53b88fe628d350d1380a88317f071e69 (diff)
Support printing the runlist and channels on Ampere+ GPUs
**Modifes the user API from `cat /proc/gpuX/runlist0` to `cat /proc/gpuX/runlist0/runlist` to support runlist-scoped registers** - Count number of runlists via Ampere-style PTOP parsing. - Create a ProcFS directory for each runlist, and create the runlist printing file in this directory. - Document the newly-added/-formatted Runlist RAM and Channel RAM registers. - Add a helper function `get_runlist_ram()` to obtain the location of each runlist's registers. - Support printing Ampere-style Channel RAM entries. Tested on Jetson Orin (ga10b), A100, H100, and AD102 (RTX 6000 Ada)
Diffstat (limited to 'nvdebug_entry.c')
-rw-r--r--nvdebug_entry.c77
1 files changed, 55 insertions, 22 deletions
diff --git a/nvdebug_entry.c b/nvdebug_entry.c
index eee7351..1f9e1c9 100644
--- a/nvdebug_entry.c
+++ b/nvdebug_entry.c
@@ -159,35 +159,53 @@ int probe_and_cache_devices(void) {
159 return -ENODEV; 159 return -ENODEV;
160} 160}
161 161
162// Create files `/proc/gpu#/runlist#`, world readable
163// Support: Fermi, Maxwell, Pascal, Volta, Turing 162// Support: Fermi, Maxwell, Pascal, Volta, Turing
164int create_runlist_files(int device_id, struct proc_dir_entry *dir) { 163int get_last_runlist_id_gk104(struct nvdebug_state *g) {
165 ptop_device_info_gk104_t info; 164 ptop_device_info_gk104_t info;
166 struct proc_dir_entry *rl_entry; 165 int i, max_rl_id = 0; // Always at least one runlist
167 int i, rl_id;
168 char runlist_name[12];
169 int max_rl_id = 0; // Always at least one runlist
170 // Figure out how many runlists there are by checking the device info 166 // Figure out how many runlists there are by checking the device info
171 // registers. Runlists are always numbered sequentially, so we just have 167 // registers. Runlists are always numbered sequentially, so we just have
172 // to find the highest-valued one and add 1 to get the number of runlists. 168 // to find the highest-valued one and add 1 to get the number of runlists.
173 for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1_GK104; i++) { 169 for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1_GK104; i++) {
174 info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO_GK104(i)); 170 if ((info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GK104(i))) == -1)
171 return -EIO;
175 if (info.info_type != INFO_TYPE_ENUM || !info.runlist_is_valid) 172 if (info.info_type != INFO_TYPE_ENUM || !info.runlist_is_valid)
176 continue; 173 continue;
177 if (info.runlist_enum > max_rl_id) 174 if (info.runlist_enum > max_rl_id)
178 max_rl_id = info.runlist_enum; 175 max_rl_id = info.runlist_enum;
179 } 176 }
180 // Create files to read each runlist. The read handling code looks at the 177 return max_rl_id;
181 // `pde_data` associated with the file to determine what the runlist ID is. 178}
182 for (rl_id = 0; rl_id <= max_rl_id; rl_id++) { 179
183 snprintf(runlist_name, 12, "runlist%d", rl_id); 180// Support: Ampere, Hopper, Ada (and newer likely)
184 rl_entry = proc_create_data( 181// Identical structure to get_runlist_ram() in runlist.c. See comments there.
185 runlist_name, 0444, dir, compat_ops(&runlist_file_ops), 182int get_last_runlist_id_ga100(struct nvdebug_state *g) {
186 (void*)(uintptr_t)rl_id); 183 ptop_device_info_ga100_t ptop_entry;
187 if (!rl_entry) 184 int i, runlist_count = 0;
188 return -ENOMEM; 185 int ptop_size = NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g);
186 int ptop_entry_subrow = 0;
187 for (i = 0; i < ptop_size; i++) {
188 if ((ptop_entry.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GA100(i))) == -1)
189 return -EIO;
190 if (!ptop_entry.raw)
191 continue;
192 if (ptop_entry_subrow == 2 && ptop_entry.rleng_id == 0)
193 runlist_count++;
194 if (ptop_entry.has_next_entry)
195 ptop_entry_subrow += 1;
196 else
197 ptop_entry_subrow = 0;
189 } 198 }
190 return 0; 199 return runlist_count - 1;
200}
201
202// Return the maximum runlist ID. For a two-runlist GPU, this would return 1.
203int get_last_runlist_id(int device_id) {
204 struct nvdebug_state* g = &g_nvdebug_state[device_id];
205 if (g->chip_id >= NV_CHIP_ID_AMPERE)
206 return get_last_runlist_id_ga100(g);
207 else
208 return get_last_runlist_id_gk104(g);
191} 209}
192 210
193// Create files `/proc/gpu#/gpc#_tpc_mask`, world readable 211// Create files `/proc/gpu#/gpc#_tpc_mask`, world readable
@@ -238,6 +256,7 @@ int __init nvdebug_init(void) {
238 g_nvdebug_devices = res; 256 g_nvdebug_devices = res;
239 // Create seperate ProcFS directories for each gpu 257 // Create seperate ProcFS directories for each gpu
240 while (res--) { 258 while (res--) {
259 uintptr_t last_runlist = 0;
241 char device_id_str[7]; 260 char device_id_str[7];
242 // Create a wider copy of the GPU ID to allow us to abuse the *data 261 // Create a wider copy of the GPU ID to allow us to abuse the *data
243 // field of proc_dir_entry to store the GPU ID. 262 // field of proc_dir_entry to store the GPU ID.
@@ -248,10 +267,24 @@ int __init nvdebug_init(void) {
248 snprintf(device_id_str, 7, "gpu%ld", device_id); 267 snprintf(device_id_str, 7, "gpu%ld", device_id);
249 if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id))) 268 if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id)))
250 goto out_nomem; 269 goto out_nomem;
251 // Create files `/proc/gpu#/runlist#`, world readable 270 // Create files in the `/proc/gpu#/runlist#/` directory
252 if (g_nvdebug_state[device_id].chip_id < NV_CHIP_ID_AMPERE) 271 // The read handling code looks at the `pde_data` associated with the parent
253 if ((err = create_runlist_files(device_id, dir))) 272 // directory to determine what the runlist ID is.
254 goto out_err; 273 if ((last_runlist = get_last_runlist_id(device_id)) < 0)
274 return last_runlist;
275 do {
276 char runlist_name[12];
277 struct proc_dir_entry *rl_dir;
278 // Create `/proc/gpu#/runlist#` directory
279 snprintf(runlist_name, 12, "runlist%lu", last_runlist);
280 if (!(rl_dir = proc_mkdir_data(runlist_name, 0555, dir, (void*)device_id)))
281 goto out_nomem;
282 // Create file `/proc/gpu#/runlist#/runlist`, world readable
283 if (!proc_create_data(
284 "runlist", 0444, rl_dir, compat_ops(&runlist_file_ops),
285 (void*)last_runlist))
286 goto out_nomem;
287 } while (last_runlist-- > 0);
255 // Create file `/proc/gpu#/preempt_tsg`, world writable 288 // Create file `/proc/gpu#/preempt_tsg`, world writable
256 if (!proc_create_data( 289 if (!proc_create_data(
257 "preempt_tsg", 0222, dir, compat_ops(&preempt_tsg_file_ops), 290 "preempt_tsg", 0222, dir, compat_ops(&preempt_tsg_file_ops),
@@ -325,7 +358,7 @@ int __init nvdebug_init(void) {
325 "local_memory", 0444, dir, compat_ops(&local_memory_file_ops), 358 "local_memory", 0444, dir, compat_ops(&local_memory_file_ops),
326 (void*)0x00100ce0)) 359 (void*)0x00100ce0))
327 goto out_nomem; 360 goto out_nomem;
328 } 361 }
329 // Create files exposing LCE and PCE configuration (Pascal+) 362 // Create files exposing LCE and PCE configuration (Pascal+)
330 if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL) { 363 if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL) {
331 // Create file `/proc/gpu#/copy_topology`, world readable 364 // Create file `/proc/gpu#/copy_topology`, world readable