diff options
author | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-16 15:34:41 -0400 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-16 15:34:41 -0400 |
commit | 232eafd04f272ed69d97a250c50a7bbed4d2894c (patch) | |
tree | bf1d03cd66e6f37b2c9ac9a9d48e4f359fcdd6b5 /nvdebug_entry.c | |
parent | 0b1c304e53b88fe628d350d1380a88317f071e69 (diff) |
Support printing the runlist and channels on Ampere+ GPUs
**Modifes the user API from `cat /proc/gpuX/runlist0` to
`cat /proc/gpuX/runlist0/runlist` to support runlist-scoped
registers**
- Count number of runlists via Ampere-style PTOP parsing.
- Create a ProcFS directory for each runlist, and create the runlist
printing file in this directory.
- Document the newly-added/-formatted Runlist RAM and Channel RAM
registers.
- Add a helper function `get_runlist_ram()` to obtain the location
of each runlist's registers.
- Support printing Ampere-style Channel RAM entries.
Tested on Jetson Orin (ga10b), A100, H100, and AD102 (RTX 6000 Ada)
Diffstat (limited to 'nvdebug_entry.c')
-rw-r--r-- | nvdebug_entry.c | 77 |
1 files changed, 55 insertions, 22 deletions
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index eee7351..1f9e1c9 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
@@ -159,35 +159,53 @@ int probe_and_cache_devices(void) { | |||
159 | return -ENODEV; | 159 | return -ENODEV; |
160 | } | 160 | } |
161 | 161 | ||
162 | // Create files `/proc/gpu#/runlist#`, world readable | ||
163 | // Support: Fermi, Maxwell, Pascal, Volta, Turing | 162 | // Support: Fermi, Maxwell, Pascal, Volta, Turing |
164 | int create_runlist_files(int device_id, struct proc_dir_entry *dir) { | 163 | int get_last_runlist_id_gk104(struct nvdebug_state *g) { |
165 | ptop_device_info_gk104_t info; | 164 | ptop_device_info_gk104_t info; |
166 | struct proc_dir_entry *rl_entry; | 165 | int i, max_rl_id = 0; // Always at least one runlist |
167 | int i, rl_id; | ||
168 | char runlist_name[12]; | ||
169 | int max_rl_id = 0; // Always at least one runlist | ||
170 | // Figure out how many runlists there are by checking the device info | 166 | // Figure out how many runlists there are by checking the device info |
171 | // registers. Runlists are always numbered sequentially, so we just have | 167 | // registers. Runlists are always numbered sequentially, so we just have |
172 | // to find the highest-valued one and add 1 to get the number of runlists. | 168 | // to find the highest-valued one and add 1 to get the number of runlists. |
173 | for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1_GK104; i++) { | 169 | for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1_GK104; i++) { |
174 | info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO_GK104(i)); | 170 | if ((info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GK104(i))) == -1) |
171 | return -EIO; | ||
175 | if (info.info_type != INFO_TYPE_ENUM || !info.runlist_is_valid) | 172 | if (info.info_type != INFO_TYPE_ENUM || !info.runlist_is_valid) |
176 | continue; | 173 | continue; |
177 | if (info.runlist_enum > max_rl_id) | 174 | if (info.runlist_enum > max_rl_id) |
178 | max_rl_id = info.runlist_enum; | 175 | max_rl_id = info.runlist_enum; |
179 | } | 176 | } |
180 | // Create files to read each runlist. The read handling code looks at the | 177 | return max_rl_id; |
181 | // `pde_data` associated with the file to determine what the runlist ID is. | 178 | } |
182 | for (rl_id = 0; rl_id <= max_rl_id; rl_id++) { | 179 | |
183 | snprintf(runlist_name, 12, "runlist%d", rl_id); | 180 | // Support: Ampere, Hopper, Ada (and newer likely) |
184 | rl_entry = proc_create_data( | 181 | // Identical structure to get_runlist_ram() in runlist.c. See comments there. |
185 | runlist_name, 0444, dir, compat_ops(&runlist_file_ops), | 182 | int get_last_runlist_id_ga100(struct nvdebug_state *g) { |
186 | (void*)(uintptr_t)rl_id); | 183 | ptop_device_info_ga100_t ptop_entry; |
187 | if (!rl_entry) | 184 | int i, runlist_count = 0; |
188 | return -ENOMEM; | 185 | int ptop_size = NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g); |
186 | int ptop_entry_subrow = 0; | ||
187 | for (i = 0; i < ptop_size; i++) { | ||
188 | if ((ptop_entry.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO_GA100(i))) == -1) | ||
189 | return -EIO; | ||
190 | if (!ptop_entry.raw) | ||
191 | continue; | ||
192 | if (ptop_entry_subrow == 2 && ptop_entry.rleng_id == 0) | ||
193 | runlist_count++; | ||
194 | if (ptop_entry.has_next_entry) | ||
195 | ptop_entry_subrow += 1; | ||
196 | else | ||
197 | ptop_entry_subrow = 0; | ||
189 | } | 198 | } |
190 | return 0; | 199 | return runlist_count - 1; |
200 | } | ||
201 | |||
202 | // Return the maximum runlist ID. For a two-runlist GPU, this would return 1. | ||
203 | int get_last_runlist_id(int device_id) { | ||
204 | struct nvdebug_state* g = &g_nvdebug_state[device_id]; | ||
205 | if (g->chip_id >= NV_CHIP_ID_AMPERE) | ||
206 | return get_last_runlist_id_ga100(g); | ||
207 | else | ||
208 | return get_last_runlist_id_gk104(g); | ||
191 | } | 209 | } |
192 | 210 | ||
193 | // Create files `/proc/gpu#/gpc#_tpc_mask`, world readable | 211 | // Create files `/proc/gpu#/gpc#_tpc_mask`, world readable |
@@ -238,6 +256,7 @@ int __init nvdebug_init(void) { | |||
238 | g_nvdebug_devices = res; | 256 | g_nvdebug_devices = res; |
239 | // Create seperate ProcFS directories for each gpu | 257 | // Create seperate ProcFS directories for each gpu |
240 | while (res--) { | 258 | while (res--) { |
259 | uintptr_t last_runlist = 0; | ||
241 | char device_id_str[7]; | 260 | char device_id_str[7]; |
242 | // Create a wider copy of the GPU ID to allow us to abuse the *data | 261 | // Create a wider copy of the GPU ID to allow us to abuse the *data |
243 | // field of proc_dir_entry to store the GPU ID. | 262 | // field of proc_dir_entry to store the GPU ID. |
@@ -248,10 +267,24 @@ int __init nvdebug_init(void) { | |||
248 | snprintf(device_id_str, 7, "gpu%ld", device_id); | 267 | snprintf(device_id_str, 7, "gpu%ld", device_id); |
249 | if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id))) | 268 | if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id))) |
250 | goto out_nomem; | 269 | goto out_nomem; |
251 | // Create files `/proc/gpu#/runlist#`, world readable | 270 | // Create files in the `/proc/gpu#/runlist#/` directory |
252 | if (g_nvdebug_state[device_id].chip_id < NV_CHIP_ID_AMPERE) | 271 | // The read handling code looks at the `pde_data` associated with the parent |
253 | if ((err = create_runlist_files(device_id, dir))) | 272 | // directory to determine what the runlist ID is. |
254 | goto out_err; | 273 | if ((last_runlist = get_last_runlist_id(device_id)) < 0) |
274 | return last_runlist; | ||
275 | do { | ||
276 | char runlist_name[12]; | ||
277 | struct proc_dir_entry *rl_dir; | ||
278 | // Create `/proc/gpu#/runlist#` directory | ||
279 | snprintf(runlist_name, 12, "runlist%lu", last_runlist); | ||
280 | if (!(rl_dir = proc_mkdir_data(runlist_name, 0555, dir, (void*)device_id))) | ||
281 | goto out_nomem; | ||
282 | // Create file `/proc/gpu#/runlist#/runlist`, world readable | ||
283 | if (!proc_create_data( | ||
284 | "runlist", 0444, rl_dir, compat_ops(&runlist_file_ops), | ||
285 | (void*)last_runlist)) | ||
286 | goto out_nomem; | ||
287 | } while (last_runlist-- > 0); | ||
255 | // Create file `/proc/gpu#/preempt_tsg`, world writable | 288 | // Create file `/proc/gpu#/preempt_tsg`, world writable |
256 | if (!proc_create_data( | 289 | if (!proc_create_data( |
257 | "preempt_tsg", 0222, dir, compat_ops(&preempt_tsg_file_ops), | 290 | "preempt_tsg", 0222, dir, compat_ops(&preempt_tsg_file_ops), |
@@ -325,7 +358,7 @@ int __init nvdebug_init(void) { | |||
325 | "local_memory", 0444, dir, compat_ops(&local_memory_file_ops), | 358 | "local_memory", 0444, dir, compat_ops(&local_memory_file_ops), |
326 | (void*)0x00100ce0)) | 359 | (void*)0x00100ce0)) |
327 | goto out_nomem; | 360 | goto out_nomem; |
328 | } | 361 | } |
329 | // Create files exposing LCE and PCE configuration (Pascal+) | 362 | // Create files exposing LCE and PCE configuration (Pascal+) |
330 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL) { | 363 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_PASCAL) { |
331 | // Create file `/proc/gpu#/copy_topology`, world readable | 364 | // Create file `/proc/gpu#/copy_topology`, world readable |