diff options
author | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-19 12:50:02 -0400 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-19 13:59:56 -0400 |
commit | 48f9e45b9d9ebfca7d3c673597f7fbed9427a5af (patch) | |
tree | d63c3863e9b187fb1f62d2f3c58b8c6a9568b6cc | |
parent | ac0113ab52d2ca12a5c10feeaa887d10c37ee4f1 (diff) |
Ampere: disable/enable_channel, preempt/switch_to_tsg, and resubmit_runlist
**Modifes the user API from `echo 1 > /proc/gpuX/switch_to_tsg` to
`echo 1 > /proc/gpuX/runlist0/switch_to_tsg` to switch to TSG 1 on
runlist 0 on GPU X for pre-Ampere GPUs (for example).**
Feature changes:
- switch_to_tsg only makes sense on a per-runlist level. Before, this
always operated on runlist0; this commit allows operating on any
runlist by moving the API to the per-runlist paths.
- On Ampere+, channel and TSG IDs are per-runlist, and no longer
GPU-global. Consequently, the disable/enable_channel and
preempt_tsg APIs have been moved from GPU-global to per-runlist
paths on Ampere+.
Bug fixes:
- `preempt_runlist()` is now supported on Maxwell and Pascal.
- `resubmit_runlist()` detects too-old GPUs.
- MAX_CHID corrected from 512 to 511 and documented.
- switch_to_tsg now includes a runlist resubmit, which appears to be
necessary on Turing+ GPUs.
Tested on GK104 (Quadro K5000), GM204 (GTX 970), GP106 (GTX 1060 3GB),
GP104 (GTX 1080 Ti), GP10B (Jetson TX2), GV11B (Jetson Xavier), GV100
(Titan V), TU102 (RTX 2080 Ti), and AD102 (RTX 6000 Ada).
-rw-r--r-- | nvdebug.h | 37 | ||||
-rw-r--r-- | nvdebug_entry.c | 53 | ||||
-rw-r--r-- | runlist.c | 69 | ||||
-rw-r--r-- | runlist_procfs.c | 198 |
4 files changed, 260 insertions, 97 deletions
@@ -172,7 +172,8 @@ enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1}; | |||
172 | IS_PENDING : Is a context switch pending? (read-only) | 172 | IS_PENDING : Is a context switch pending? (read-only) |
173 | TYPE : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG | 173 | TYPE : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG |
174 | 174 | ||
175 | Support: Kepler, Maxwell, Pascal, Volta, Turing | 175 | Support: Fermi*, Kepler, Maxwell, Pascal, Volta, Turing |
176 | *Fermi only supports PREEMPT_TYPE_CHANNEL. | ||
176 | */ | 177 | */ |
177 | #define NV_PFIFO_PREEMPT 0x00002634 | 178 | #define NV_PFIFO_PREEMPT 0x00002634 |
178 | typedef union { | 179 | typedef union { |
@@ -187,10 +188,26 @@ typedef union { | |||
187 | uint32_t raw; | 188 | uint32_t raw; |
188 | } pfifo_preempt_t; | 189 | } pfifo_preempt_t; |
189 | 190 | ||
191 | /* Preempt a TSG or Runlist by ID | ||
192 | Similar as on older GPUs (see above), but located at an offset in Runlist RAM. | ||
193 | This means that there's one instance of this register for each runlist. | ||
194 | |||
195 | IS_PENDING is now IS_TSG_PREEMPT_PENDING and IS_RUNLIST_PREEMPT_PENDING was | ||
196 | added in the following bit (bit 22). As these fields are unused in nvdebug, | ||
197 | we use the old structure for simplicity. | ||
198 | |||
199 | TYPE is now better described as IS_TSG_PREEMPT. TYPE == 0 requests a preempt | ||
200 | of the runlist (rather than a channel preemption, as on older GPUs). | ||
201 | |||
202 | Support: Ampere, Hopper, Ada, [newer untested] | ||
203 | */ | ||
204 | #define NV_RUNLIST_PREEMPT_GA100 0x098 | ||
205 | #define PREEMPT_TYPE_RUNLIST 0 | ||
206 | |||
190 | /* | 207 | /* |
191 | "Initiate a preempt of the engine by writing the bit associated with its | 208 | "Initiate a preempt of the engine by writing the bit associated with its |
192 | runlist to NV_PFIFO_RUNLIST_PREEMPT... Do not poll NV_PFIFO_RUNLIST_PREEMPT | 209 | runlist to NV_PFIFO_RUNLIST_PREEMPT... Do not poll NV_PFIFO_RUNLIST_PREEMPT |
193 | for the preempt to complete." | 210 | for the preempt to complete." (open-gpu-doc) |
194 | 211 | ||
195 | Useful for preempting multiple runlists at once. | 212 | Useful for preempting multiple runlists at once. |
196 | 213 | ||
@@ -204,7 +221,10 @@ typedef union { | |||
204 | rl_preempt.raw |= BIT(nr); | 221 | rl_preempt.raw |= BIT(nr); |
205 | nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw); | 222 | nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw); |
206 | 223 | ||
207 | Support: Volta, Turing | 224 | Support: Maxwell, Pascal, Volta, Turing |
225 | |||
226 | This register was deleted starting with Ampere, with functionality subsumed by | ||
227 | the NV_RUNLIST_PREEMPT register. | ||
208 | */ | 228 | */ |
209 | #define NV_PFIFO_RUNLIST_PREEMPT 0x00002638 | 229 | #define NV_PFIFO_RUNLIST_PREEMPT 0x00002638 |
210 | typedef union { | 230 | typedef union { |
@@ -285,7 +305,7 @@ typedef union { | |||
285 | TARGET : Aperture of runlist (video or system memory) | 305 | TARGET : Aperture of runlist (video or system memory) |
286 | 306 | ||
287 | Support: Fermi*, Kepler, Maxwell, Pascal, Volta | 307 | Support: Fermi*, Kepler, Maxwell, Pascal, Volta |
288 | *Fermi may expose this information 8 bytes earlier, starting at 0x227C? | 308 | *Fermi may expose ENG_RUNLING_* 8 bytes earlier, starting at 0x227C? |
289 | */ | 309 | */ |
290 | #define NV_PFIFO_RUNLIST_BASE_GF100 0x00002270 // Write-only | 310 | #define NV_PFIFO_RUNLIST_BASE_GF100 0x00002270 // Write-only |
291 | #define NV_PFIFO_ENG_RUNLIST_BASE_GF100(i) (0x00002280+(i)*8) // Read-only | 311 | #define NV_PFIFO_ENG_RUNLIST_BASE_GF100(i) (0x00002280+(i)*8) // Read-only |
@@ -428,7 +448,11 @@ typedef union { | |||
428 | See also: manuals/turing/tu104/dev_fifo.ref.txt in NVIDIA's open-gpu-doc | 448 | See also: manuals/turing/tu104/dev_fifo.ref.txt in NVIDIA's open-gpu-doc |
429 | */ | 449 | */ |
430 | #define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8) | 450 | #define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8) |
431 | #define MAX_CHID 512 | 451 | // Maximum valid channel index in the PCCSR region |
452 | // Channel IDs start at 0, and there are 4096 bytes of 8-byte CCSR entries (per | ||
453 | // NV_PCCSR_CHANNEL_INST__SIZE_1 in at least Volta and Turing), yielding a total | ||
454 | // of 512 channel IDs, with a maximum ID of 511. | ||
455 | #define MAX_CHID 511 | ||
432 | typedef union { | 456 | typedef union { |
433 | struct { | 457 | struct { |
434 | // 0:31 | 458 | // 0:31 |
@@ -554,6 +578,7 @@ typedef union { | |||
554 | #define NV_CHIP_ID_GP106 0x136 // Discrete GeForce GTX 1060 | 578 | #define NV_CHIP_ID_GP106 0x136 // Discrete GeForce GTX 1060 |
555 | #define NV_CHIP_ID_GV11B 0x15B // Jetson Xavier embedded GPU | 579 | #define NV_CHIP_ID_GV11B 0x15B // Jetson Xavier embedded GPU |
556 | 580 | ||
581 | #define NV_CHIP_ID_FERMI 0x0C0 | ||
557 | #define NV_CHIP_ID_KEPLER 0x0E0 | 582 | #define NV_CHIP_ID_KEPLER 0x0E0 |
558 | #define NV_CHIP_ID_MAXWELL 0x120 | 583 | #define NV_CHIP_ID_MAXWELL 0x120 |
559 | #define NV_CHIP_ID_PASCAL 0x130 | 584 | #define NV_CHIP_ID_PASCAL 0x130 |
@@ -1505,7 +1530,7 @@ int get_runlist_iter( | |||
1505 | struct nvdebug_state *g, | 1530 | struct nvdebug_state *g, |
1506 | int rl_id, | 1531 | int rl_id, |
1507 | struct runlist_iter *rl_iter /* out */); | 1532 | struct runlist_iter *rl_iter /* out */); |
1508 | int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id); | 1533 | int preempt_tsg(struct nvdebug_state *g, uint32_t rl_id, uint32_t tsg_id); |
1509 | int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id); | 1534 | int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id); |
1510 | int resubmit_runlist(struct nvdebug_state *g, uint32_t rl_id); | 1535 | int resubmit_runlist(struct nvdebug_state *g, uint32_t rl_id); |
1511 | 1536 | ||
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index 5f99976..d5df7db 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
@@ -289,17 +289,45 @@ int __init nvdebug_init(void) { | |||
289 | snprintf(runlist_name, 12, "runlist%lu", last_runlist); | 289 | snprintf(runlist_name, 12, "runlist%lu", last_runlist); |
290 | if (!(rl_dir = proc_mkdir_data(runlist_name, 0555, dir, (void*)device_id))) | 290 | if (!(rl_dir = proc_mkdir_data(runlist_name, 0555, dir, (void*)device_id))) |
291 | goto out_nomem; | 291 | goto out_nomem; |
292 | // Create one file for each runlist on Ampere+, or one file for each GPU on older | ||
293 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_AMPERE || last_runlist == 0) { | ||
294 | struct proc_dir_entry *chram_scope; | ||
295 | // preempt_tsg, enable_channel, and disable_channel refer to a GPU-global channel | ||
296 | // RAM on pre-Ampere GPUs | ||
297 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_AMPERE) | ||
298 | chram_scope = rl_dir; | ||
299 | else | ||
300 | chram_scope = dir; | ||
301 | // Create file `/proc/gpu#/runlist#/preempt_tsg`, world writable | ||
302 | // On Turing and older, `/proc/gpu#/preempt_tsg` | ||
303 | if (!proc_create_data( | ||
304 | "preempt_tsg", 0222, chram_scope, compat_ops(&preempt_tsg_file_ops), | ||
305 | (void*)last_runlist)) | ||
306 | goto out_nomem; | ||
307 | // Create file `/proc/gpu#/runlist#/disable_channel`, world writable | ||
308 | // On Turing and older, `/proc/gpu#/disable_channel` | ||
309 | if (!proc_create_data( | ||
310 | "disable_channel", 0222, chram_scope, compat_ops(&disable_channel_file_ops), | ||
311 | (void*)last_runlist)) | ||
312 | goto out_nomem; | ||
313 | // Create file `/proc/gpu#/runlist#/enable_channel`, world writable | ||
314 | // On Turing and older, `/proc/gpu#/enable_channel` | ||
315 | if (!proc_create_data( | ||
316 | "enable_channel", 0222, chram_scope, compat_ops(&enable_channel_file_ops), | ||
317 | (void*)last_runlist)) | ||
318 | goto out_nomem; | ||
319 | } | ||
292 | // Create file `/proc/gpu#/runlist#/runlist`, world readable | 320 | // Create file `/proc/gpu#/runlist#/runlist`, world readable |
293 | if (!proc_create_data( | 321 | if (!proc_create_data( |
294 | "runlist", 0444, rl_dir, compat_ops(&runlist_file_ops), | 322 | "runlist", 0444, rl_dir, compat_ops(&runlist_file_ops), |
295 | (void*)last_runlist)) | 323 | (void*)last_runlist)) |
296 | goto out_nomem; | 324 | goto out_nomem; |
325 | // Create file `/proc/gpu#/runlist#/switch_to_tsg`, world writable | ||
326 | if (!proc_create_data( | ||
327 | "switch_to_tsg", 0222, rl_dir, compat_ops(&switch_to_tsg_file_ops), | ||
328 | (void*)last_runlist)) | ||
329 | goto out_nomem; | ||
297 | } while (last_runlist-- > 0); | 330 | } while (last_runlist-- > 0); |
298 | // Create file `/proc/gpu#/preempt_tsg`, world writable | ||
299 | if (!proc_create_data( | ||
300 | "preempt_tsg", 0222, dir, compat_ops(&preempt_tsg_file_ops), | ||
301 | (void*)device_id)) | ||
302 | goto out_nomem; | ||
303 | /* On the TU104, the context scheduler (contained in the Host, aka | 331 | /* On the TU104, the context scheduler (contained in the Host, aka |
304 | * PFIFO, unit) has been observed to sometimes to fail to schedule TSGs | 332 | * PFIFO, unit) has been observed to sometimes to fail to schedule TSGs |
305 | * containing re-enabled channels. Resubmitting the runlist | 333 | * containing re-enabled channels. Resubmitting the runlist |
@@ -311,21 +339,6 @@ int __init nvdebug_init(void) { | |||
311 | "resubmit_runlist", 0222, dir, compat_ops(&resubmit_runlist_file_ops), | 339 | "resubmit_runlist", 0222, dir, compat_ops(&resubmit_runlist_file_ops), |
312 | (void*)device_id)) | 340 | (void*)device_id)) |
313 | goto out_nomem; | 341 | goto out_nomem; |
314 | // Create file `/proc/gpu#/disable_channel`, world writable | ||
315 | if (!proc_create_data( | ||
316 | "disable_channel", 0222, dir, compat_ops(&disable_channel_file_ops), | ||
317 | (void*)device_id)) | ||
318 | goto out_nomem; | ||
319 | // Create file `/proc/gpu#/enable_channel`, world writable | ||
320 | if (!proc_create_data( | ||
321 | "enable_channel", 0222, dir, compat_ops(&enable_channel_file_ops), | ||
322 | (void*)device_id)) | ||
323 | goto out_nomem; | ||
324 | // Create file `/proc/gpu#/switch_to_tsg`, world writable | ||
325 | if (!proc_create_data( | ||
326 | "switch_to_tsg", 0222, dir, compat_ops(&switch_to_tsg_file_ops), | ||
327 | (void*)device_id)) | ||
328 | goto out_nomem; | ||
329 | // Create file `/proc/gpu#/device_info`, world readable | 342 | // Create file `/proc/gpu#/device_info`, world readable |
330 | if (!proc_create_data( | 343 | if (!proc_create_data( |
331 | "device_info", 0444, dir, compat_ops(&device_info_file_ops), | 344 | "device_info", 0444, dir, compat_ops(&device_info_file_ops), |
@@ -169,24 +169,35 @@ attempt_pramin_access: | |||
169 | 169 | ||
170 | /* Trigger a preempt of the specified TSG | 170 | /* Trigger a preempt of the specified TSG |
171 | @param tsg_id ID of TSG to preempt. | 171 | @param tsg_id ID of TSG to preempt. |
172 | @param rl_id Which channel RAM address space to search? | ||
172 | @return 0 or -errno on error | 173 | @return 0 or -errno on error |
173 | 174 | ||
174 | Note: If no other TSGs exist in the associated runlist, this TSG may | 175 | Note: If no other TSGs exist in the associated runlist, this TSG may |
175 | continue executing, unless NV_PFIFO_SCHED_DISABLE is set, or all the | 176 | continue executing, unless NV_PFIFO_SCHED_DISABLE is set, or all the |
176 | channels of the TSG to be preempted are disabled. | 177 | channels of the TSG to be preempted are disabled. |
177 | */ | 178 | */ |
178 | int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id) { | 179 | int preempt_tsg(struct nvdebug_state *g, uint32_t rl_id, uint32_t tsg_id) { |
179 | pfifo_preempt_t pfifo_preempt; | 180 | pfifo_preempt_t preempt; |
181 | // Fermi does not support time-slice groups | ||
180 | if (g->chip_id < NV_CHIP_ID_KEPLER) | 182 | if (g->chip_id < NV_CHIP_ID_KEPLER) |
181 | return -EOPNOTSUPP; | 183 | return -EOPNOTSUPP; |
182 | 184 | ||
183 | pfifo_preempt.raw = 0; | 185 | preempt.raw = 0; |
184 | pfifo_preempt.id = tsg_id; | 186 | preempt.id = tsg_id; |
185 | pfifo_preempt.is_pending = 0; | 187 | preempt.type = PREEMPT_TYPE_TSG; |
186 | pfifo_preempt.type = PREEMPT_TYPE_TSG; | ||
187 | 188 | ||
188 | // Actually trigger the preemption | 189 | // Actually trigger the preemption |
189 | nvdebug_writel(g, NV_PFIFO_PREEMPT, pfifo_preempt.raw); | 190 | if (g->chip_id < NV_CHIP_ID_AMPERE) { |
191 | nvdebug_writel(g, NV_PFIFO_PREEMPT, preempt.raw); | ||
192 | } else { | ||
193 | uint32_t runlist_reg_base; | ||
194 | int err; | ||
195 | // As TSG and channel IDs are namespaced per-runlist starting with | ||
196 | // Ampere, the PREEMPT register is also per-runlist. | ||
197 | if ((err = get_runlist_ram(g, rl_id, &runlist_reg_base))) | ||
198 | return err; | ||
199 | nvdebug_writel(g, runlist_reg_base + NV_RUNLIST_PREEMPT_GA100, preempt.raw); | ||
200 | } | ||
190 | return 0; | 201 | return 0; |
191 | } | 202 | } |
192 | 203 | ||
@@ -195,18 +206,38 @@ int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id) { | |||
195 | @return 0 or -errno on error | 206 | @return 0 or -errno on error |
196 | */ | 207 | */ |
197 | int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id) { | 208 | int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id) { |
198 | runlist_preempt_t rl_preempt; | 209 | // The runlist preempt register does not exist on Kepler (tested gk104) |
199 | if (g->chip_id < NV_CHIP_ID_VOLTA) | 210 | if (g->chip_id < NV_CHIP_ID_MAXWELL) |
200 | return -EOPNOTSUPP; | 211 | return -EOPNOTSUPP; |
201 | 212 | ||
202 | // Overwrite, as the register contains nothing to preserve | 213 | // Write to trigger the preemption (the register contains nothing to |
203 | rl_preempt.raw = BIT(rl_id); | 214 | // preserve, and can thus just be overwritten) |
204 | nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw); | 215 | if (g->chip_id < NV_CHIP_ID_AMPERE) { |
216 | runlist_preempt_t rl_preempt; | ||
217 | rl_preempt.raw = BIT(rl_id); | ||
218 | nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw); | ||
219 | } else { | ||
220 | int err; | ||
221 | uint32_t runlist_regs_base; | ||
222 | pfifo_preempt_t preempt; | ||
223 | // The RUNLIST_PREEMPT register was deleted, and the _PREEMPT register | ||
224 | // was extended to support runlist-level preemptions starting on Ampere | ||
225 | preempt.id = rl_id; | ||
226 | preempt.type = PREEMPT_TYPE_RUNLIST; | ||
227 | // The preempt register is scoped per-runlist on Ampere+ | ||
228 | if ((err = get_runlist_ram(g, rl_id, &runlist_regs_base))) | ||
229 | return err; | ||
230 | nvdebug_writel(g, runlist_regs_base + NV_RUNLIST_PREEMPT_GA100, preempt.raw); | ||
231 | } | ||
205 | return 0; | 232 | return 0; |
206 | } | 233 | } |
207 | 234 | ||
208 | // Read and write runlist configuration, triggering a resubmit | 235 | // Read and write runlist configuration, triggering a resubmit |
209 | int resubmit_runlist(struct nvdebug_state *g, uint32_t rl_id) { | 236 | int resubmit_runlist(struct nvdebug_state *g, uint32_t rl_id) { |
237 | // Necessary registers do not exist pre-Fermi | ||
238 | if (g->chip_id < NV_CHIP_ID_FERMI) | ||
239 | return -EOPNOTSUPP; | ||
240 | |||
210 | if (g->chip_id < NV_CHIP_ID_TURING) { | 241 | if (g->chip_id < NV_CHIP_ID_TURING) { |
211 | eng_runlist_gf100_t rl; | 242 | eng_runlist_gf100_t rl; |
212 | if (rl_id > MAX_RUNLISTS_GF100) | 243 | if (rl_id > MAX_RUNLISTS_GF100) |
@@ -223,7 +254,19 @@ int resubmit_runlist(struct nvdebug_state *g, uint32_t rl_id) { | |||
223 | return -EIO; | 254 | return -EIO; |
224 | nvdebug_writeq(g, NV_PFIFO_RUNLIST_SUBMIT_TU102(rl_id), submit.raw); | 255 | nvdebug_writeq(g, NV_PFIFO_RUNLIST_SUBMIT_TU102(rl_id), submit.raw); |
225 | } else { | 256 | } else { |
226 | return -EOPNOTSUPP; | 257 | int err; |
258 | uint32_t runlist_pri_base; | ||
259 | runlist_submit_tu102_t submit; | ||
260 | if ((err = get_runlist_ram(g, rl_id, &runlist_pri_base)) < 0) | ||
261 | return err; | ||
262 | if ((submit.raw = nvdebug_readq(g, runlist_pri_base + NV_RUNLIST_SUBMIT_GA100)) == -1) | ||
263 | return -EIO; | ||
264 | // On Ampere, this does not appear to trigger a preempt of the | ||
265 | // currently-running channel (even if the currently running channel | ||
266 | // becomes disabled), but will cause newly re-enabled channels | ||
267 | // (at least if nothing else is pending) to become ready (tested on | ||
268 | // Jetson Orin). | ||
269 | nvdebug_writeq(g, runlist_pri_base + NV_RUNLIST_SUBMIT_GA100, submit.raw); | ||
227 | } | 270 | } |
228 | return 0; | 271 | return 0; |
229 | } | 272 | } |
diff --git a/runlist_procfs.c b/runlist_procfs.c index c1cfc87..b2159f6 100644 --- a/runlist_procfs.c +++ b/runlist_procfs.c | |||
@@ -199,11 +199,11 @@ struct file_operations runlist_file_ops = { | |||
199 | }; | 199 | }; |
200 | 200 | ||
201 | ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer, | 201 | ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer, |
202 | size_t count, loff_t *off) { | 202 | size_t count, loff_t *off) { |
203 | uint32_t target_tsgid; | 203 | uint32_t target_tsgid, target_runlist_ram; |
204 | struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; | ||
204 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | 205 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec |
205 | int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid); | 206 | int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid); |
206 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; | ||
207 | if (err) | 207 | if (err) |
208 | return err; | 208 | return err; |
209 | 209 | ||
@@ -211,8 +211,15 @@ ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer, | |||
211 | if (target_tsgid > MAX_TSGID) | 211 | if (target_tsgid > MAX_TSGID) |
212 | return -ERANGE; | 212 | return -ERANGE; |
213 | 213 | ||
214 | // (Ab)use the PDE_DATA field for the index into which Runlist RAM this TSG | ||
215 | // ID is scoped to (only applicable on Ampere+) | ||
216 | if (g->chip_id >= NV_CHIP_ID_AMPERE) | ||
217 | target_runlist_ram = file2gpuidx(f); | ||
218 | else | ||
219 | target_runlist_ram = 0; | ||
220 | |||
214 | // Execute preemption | 221 | // Execute preemption |
215 | if ((err = preempt_tsg(g, target_tsgid))) | 222 | if ((err = preempt_tsg(g, target_runlist_ram, target_tsgid))) |
216 | return err; | 223 | return err; |
217 | 224 | ||
218 | return count; | 225 | return count; |
@@ -223,13 +230,12 @@ struct file_operations preempt_tsg_file_ops = { | |||
223 | .llseek = default_llseek, | 230 | .llseek = default_llseek, |
224 | }; | 231 | }; |
225 | 232 | ||
226 | |||
227 | ssize_t resubmit_runlist_file_write(struct file *f, const char __user *buffer, | 233 | ssize_t resubmit_runlist_file_write(struct file *f, const char __user *buffer, |
228 | size_t count, loff_t *off) { | 234 | size_t count, loff_t *off) { |
229 | uint32_t target_runlist; | 235 | uint32_t target_runlist; |
236 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; | ||
230 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | 237 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec |
231 | int err = kstrtou32_from_user(buffer, count, 0, &target_runlist); | 238 | int err = kstrtou32_from_user(buffer, count, 0, &target_runlist); |
232 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; | ||
233 | if (err) | 239 | if (err) |
234 | return err; | 240 | return err; |
235 | 241 | ||
@@ -245,26 +251,48 @@ struct file_operations resubmit_runlist_file_ops = { | |||
245 | .llseek = default_llseek, | 251 | .llseek = default_llseek, |
246 | }; | 252 | }; |
247 | 253 | ||
254 | |||
248 | ssize_t disable_channel_file_write(struct file *f, const char __user *buffer, | 255 | ssize_t disable_channel_file_write(struct file *f, const char __user *buffer, |
249 | size_t count, loff_t *off) { | 256 | size_t count, loff_t *off) { |
250 | uint32_t target_channel; | 257 | uint32_t target_channel; |
251 | channel_ctrl_t chan; | 258 | struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; |
252 | int err; | ||
253 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; | ||
254 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | 259 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec |
255 | err = kstrtou32_from_user(buffer, count, 0, &target_channel); | 260 | int err = kstrtou32_from_user(buffer, count, 0, &target_channel); |
256 | if (err) | 261 | if (err) |
257 | return err; | 262 | return err; |
258 | 263 | ||
259 | if (target_channel > MAX_CHID) | 264 | if (g->chip_id < NV_CHIP_ID_AMPERE) { |
260 | return -ERANGE; | 265 | channel_ctrl_t chan; |
261 | 266 | if (target_channel > MAX_CHID) | |
262 | // Read current configuration | 267 | return -ERANGE; |
263 | if ((chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel))) == -1) | 268 | // Read current configuration |
264 | return -EIO; | 269 | if ((chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel))) == -1) |
265 | // Request disablement | 270 | return -EIO; |
266 | chan.enable_clear = true; | 271 | // Request disablement |
267 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw); | 272 | chan.enable_clear = true; |
273 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw); | ||
274 | } else { | ||
275 | uint32_t runlist_reg_base, chram_base, channel_max; | ||
276 | runlist_channel_config_t channel_config; | ||
277 | channel_ctrl_ga100_t chan; | ||
278 | // (Ab)use the PDE_DATA field for the runlist ID | ||
279 | if ((err = get_runlist_ram(g, file2gpuidx(f), &runlist_reg_base))) | ||
280 | return err; | ||
281 | // Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere | ||
282 | if ((channel_config.raw = nvdebug_readl(g, runlist_reg_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1) | ||
283 | return -EIO; | ||
284 | channel_max = 1u << channel_config.num_channels_log2; | ||
285 | if (target_channel >= channel_max) | ||
286 | return -ERANGE; | ||
287 | chram_base = (uint32_t)channel_config.bar0_offset << 4; | ||
288 | // Writing zeros to any field of the Ampere+ channel control structure | ||
289 | // does nothing, so don't bother to read the structure first, and just | ||
290 | // write zeros to all the fields we don't care about. | ||
291 | chan.raw = 0; | ||
292 | chan.is_write_one_clears_bits = 1; // Invert meaning of writing 1 | ||
293 | chan.enable = 1; | ||
294 | nvdebug_writel(g, chram_base + sizeof(channel_ctrl_ga100_t) * target_channel, chan.raw); | ||
295 | } | ||
268 | 296 | ||
269 | return count; | 297 | return count; |
270 | } | 298 | } |
@@ -275,23 +303,45 @@ struct file_operations disable_channel_file_ops = { | |||
275 | }; | 303 | }; |
276 | 304 | ||
277 | ssize_t enable_channel_file_write(struct file *f, const char __user *buffer, | 305 | ssize_t enable_channel_file_write(struct file *f, const char __user *buffer, |
278 | size_t count, loff_t *off) { | 306 | size_t count, loff_t *off) { |
279 | uint32_t target_channel; | 307 | uint32_t target_channel; |
280 | channel_ctrl_t chan; | 308 | struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; |
281 | int err; | ||
282 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; | ||
283 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | 309 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec |
284 | err = kstrtou32_from_user(buffer, count, 0, &target_channel); | 310 | int err = kstrtou32_from_user(buffer, count, 0, &target_channel); |
285 | if (err) | 311 | if (err) |
286 | return err; | 312 | return err; |
287 | 313 | ||
288 | if (target_channel > MAX_CHID) | 314 | if (g->chip_id < NV_CHIP_ID_AMPERE) { |
289 | return -ERANGE; | 315 | channel_ctrl_t chan; |
290 | 316 | if (target_channel > MAX_CHID) | |
291 | // Disable channel | 317 | return -ERANGE; |
292 | chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel)); | 318 | // Read current configuration |
293 | chan.enable_set = true; | 319 | if ((chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel))) == -1) |
294 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw); | 320 | return -EIO; |
321 | // Disable channel | ||
322 | chan.enable_set = true; | ||
323 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw); | ||
324 | } else { | ||
325 | uint32_t runlist_reg_base, chram_base, channel_max; | ||
326 | runlist_channel_config_t channel_config; | ||
327 | channel_ctrl_ga100_t chan; | ||
328 | // (Ab)use the PDE_DATA field for the runlist ID | ||
329 | if ((err = get_runlist_ram(g, file2gpuidx(f), &runlist_reg_base))) | ||
330 | return err; | ||
331 | // Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere | ||
332 | if ((channel_config.raw = nvdebug_readl(g, runlist_reg_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1) | ||
333 | return -EIO; | ||
334 | channel_max = 1u << channel_config.num_channels_log2; | ||
335 | if (target_channel >= channel_max) | ||
336 | return -ERANGE; | ||
337 | chram_base = (uint32_t)channel_config.bar0_offset << 4; | ||
338 | // Writing zeros to any field of the Ampere+ channel control structure | ||
339 | // does nothing, so don't bother to read the structure first, and just | ||
340 | // write zeros to all the fields we don't care about. | ||
341 | chan.raw = 0; | ||
342 | chan.enable = 1; | ||
343 | nvdebug_writel(g, chram_base + sizeof(channel_ctrl_ga100_t) * target_channel, chan.raw); | ||
344 | } | ||
295 | 345 | ||
296 | return count; | 346 | return count; |
297 | } | 347 | } |
@@ -301,52 +351,84 @@ struct file_operations enable_channel_file_ops = { | |||
301 | .llseek = default_llseek, | 351 | .llseek = default_llseek, |
302 | }; | 352 | }; |
303 | 353 | ||
304 | // Note: Operates only on runlist 0 (Compute/Graphics) | 354 | // Tested working on Pascal (gp106) through Ada (ad102) |
305 | ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer, | 355 | ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer, |
306 | size_t count, loff_t *off) { | 356 | size_t count, loff_t *off) { |
307 | uint32_t target_tsgid; | 357 | uint32_t target_tsgid, target_runlist, channel_regs_base; |
308 | struct gv100_runlist_chan* chan; | 358 | struct gv100_runlist_chan* chan; |
309 | channel_ctrl_t chan_ctl; | 359 | channel_ctrl_t chan_ctl; |
360 | channel_ctrl_ga100_t chan_ctl_ga100; | ||
310 | struct runlist_iter rl_iter; | 361 | struct runlist_iter rl_iter; |
311 | int err; | ||
312 | loff_t pos = 0; | 362 | loff_t pos = 0; |
313 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; | 363 | struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; |
314 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | 364 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec |
315 | err = kstrtou32_from_user(buffer, count, 0, &target_tsgid); | 365 | int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid); |
316 | if (err) | 366 | if (err) |
317 | return err; | 367 | return err; |
318 | 368 | ||
319 | if (target_tsgid > MAX_TSGID) | 369 | if (target_tsgid > MAX_TSGID) |
320 | return -ERANGE; | 370 | return -ERANGE; |
321 | 371 | ||
322 | err = get_runlist_iter(g, 0, &rl_iter); | 372 | // (Ab)use the PDE_DATA field for the runlist ID |
323 | if (err) | 373 | target_runlist = file2gpuidx(f); |
374 | |||
375 | if ((err = get_runlist_iter(g, target_runlist, &rl_iter))) | ||
324 | return err; | 376 | return err; |
325 | 377 | ||
378 | // On Ampere, TSG and Channel IDs are only unique per-runlist, so we need | ||
379 | // to pull the per-runlist copy of Channel RAM. | ||
380 | if (g->chip_id >= NV_CHIP_ID_AMPERE) { | ||
381 | uint32_t runlist_regs_base; | ||
382 | runlist_channel_config_t chan_config; | ||
383 | if ((err = get_runlist_ram(g, target_runlist, &runlist_regs_base))) | ||
384 | return err; | ||
385 | // Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere | ||
386 | if ((chan_config.raw = nvdebug_readl(g, runlist_regs_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1) | ||
387 | return -EIO; | ||
388 | channel_regs_base = (uint32_t)chan_config.bar0_offset << 4; | ||
389 | } | ||
390 | |||
326 | // Iterate through all TSGs | 391 | // Iterate through all TSGs |
327 | while (pos < rl_iter.len) { | 392 | while (pos < rl_iter.len) { |
328 | if (tsgid(g, rl_iter.curr_entry) == target_tsgid) { | 393 | bool enable = false; |
329 | // Enable channels of target TSG | 394 | if (tsgid(g, rl_iter.curr_entry) == target_tsgid) |
330 | for_chan_in_tsg(g, chan, rl_iter.curr_entry) { | 395 | enable = true; |
331 | chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid)); | 396 | |
332 | chan_ctl.enable_set = true; | 397 | // Either enable or disable all channels of each TSG, dependent on if |
333 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw); | 398 | // they are contained within the target TSG or not. |
334 | } | 399 | for_chan_in_tsg(g, chan, rl_iter.curr_entry) { |
335 | } else { | 400 | if (g->chip_id < NV_CHIP_ID_AMPERE) { |
336 | // XXX: Fix for bare channels. Maybe a "for_chan_until_tsg" macro? | 401 | // Read, update, write for PCCSR |
337 | // Disable all other channels | 402 | if ((chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chid(g, chan)))) == -1) |
338 | // (This is how the Jetson nvgpu driver disables TSGs) | 403 | return -EIO; |
339 | for_chan_in_tsg(g, chan, rl_iter.curr_entry) { | 404 | if (enable) |
340 | chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid)); | 405 | chan_ctl.enable_set = true; |
341 | chan_ctl.enable_clear = true; | 406 | else |
342 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw); | 407 | chan_ctl.enable_clear = true; |
408 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chid(g, chan)), chan_ctl.raw); | ||
409 | } else { | ||
410 | // Writing a 0 does nothing on Ampere+, so we can just write | ||
411 | chan_ctl_ga100.raw = 0; | ||
412 | chan_ctl_ga100.is_write_one_clears_bits = !enable; | ||
413 | chan_ctl_ga100.enable = true; | ||
414 | nvdebug_writel(g, channel_regs_base + sizeof(chan_ctl_ga100) * chid(g, chan), chan_ctl_ga100.raw); | ||
343 | } | 415 | } |
344 | } | 416 | } |
345 | pos += 1 + tsg_length(g, rl_iter.curr_entry); | 417 | pos += 1 + tsg_length(g, rl_iter.curr_entry); |
346 | rl_iter.curr_entry = next_tsg(g, rl_iter.curr_entry); | 418 | rl_iter.curr_entry = next_tsg(g, rl_iter.curr_entry); |
419 | |||
420 | // TODO: Fix the above for bare channels. Add "for_chan_until_tsg"? | ||
347 | } | 421 | } |
348 | // Trigger a runlist-level preempt to switch to `target_tsgid` | 422 | |
349 | if ((err = preempt_runlist(g, 0))) | 423 | // Resubmit the runlist to ensure that changes to channel enablement are |
424 | // picked up on Turing+ GPUs (channel enablements may not be otherwise). | ||
425 | if (g->chip_id >= NV_CHIP_ID_TURING) | ||
426 | if ((err = resubmit_runlist(g, target_runlist))) | ||
427 | return err; | ||
428 | |||
429 | // Trigger a runlist-level preempt to stop whatever was running, triggering | ||
430 | // the runlist scheduler to select and run the next-enabled channel. | ||
431 | if ((err = preempt_runlist(g, target_runlist))) | ||
350 | return err; | 432 | return err; |
351 | 433 | ||
352 | return count; | 434 | return count; |