diff options
author | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-19 12:50:02 -0400 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-19 13:59:56 -0400 |
commit | 48f9e45b9d9ebfca7d3c673597f7fbed9427a5af (patch) | |
tree | d63c3863e9b187fb1f62d2f3c58b8c6a9568b6cc /runlist_procfs.c | |
parent | ac0113ab52d2ca12a5c10feeaa887d10c37ee4f1 (diff) |
Ampere: disable/enable_channel, preempt/switch_to_tsg, and resubmit_runlist
**Modifes the user API from `echo 1 > /proc/gpuX/switch_to_tsg` to
`echo 1 > /proc/gpuX/runlist0/switch_to_tsg` to switch to TSG 1 on
runlist 0 on GPU X for pre-Ampere GPUs (for example).**
Feature changes:
- switch_to_tsg only makes sense on a per-runlist level. Before, this
always operated on runlist0; this commit allows operating on any
runlist by moving the API to the per-runlist paths.
- On Ampere+, channel and TSG IDs are per-runlist, and no longer
GPU-global. Consequently, the disable/enable_channel and
preempt_tsg APIs have been moved from GPU-global to per-runlist
paths on Ampere+.
Bug fixes:
- `preempt_runlist()` is now supported on Maxwell and Pascal.
- `resubmit_runlist()` detects too-old GPUs.
- MAX_CHID corrected from 512 to 511 and documented.
- switch_to_tsg now includes a runlist resubmit, which appears to be
necessary on Turing+ GPUs.
Tested on GK104 (Quadro K5000), GM204 (GTX 970), GP106 (GTX 1060 3GB),
GP104 (GTX 1080 Ti), GP10B (Jetson TX2), GV11B (Jetson Xavier), GV100
(Titan V), TU102 (RTX 2080 Ti), and AD102 (RTX 6000 Ada).
Diffstat (limited to 'runlist_procfs.c')
-rw-r--r-- | runlist_procfs.c | 198 |
1 files changed, 140 insertions, 58 deletions
diff --git a/runlist_procfs.c b/runlist_procfs.c index c1cfc87..b2159f6 100644 --- a/runlist_procfs.c +++ b/runlist_procfs.c | |||
@@ -199,11 +199,11 @@ struct file_operations runlist_file_ops = { | |||
199 | }; | 199 | }; |
200 | 200 | ||
201 | ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer, | 201 | ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer, |
202 | size_t count, loff_t *off) { | 202 | size_t count, loff_t *off) { |
203 | uint32_t target_tsgid; | 203 | uint32_t target_tsgid, target_runlist_ram; |
204 | struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; | ||
204 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | 205 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec |
205 | int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid); | 206 | int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid); |
206 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; | ||
207 | if (err) | 207 | if (err) |
208 | return err; | 208 | return err; |
209 | 209 | ||
@@ -211,8 +211,15 @@ ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer, | |||
211 | if (target_tsgid > MAX_TSGID) | 211 | if (target_tsgid > MAX_TSGID) |
212 | return -ERANGE; | 212 | return -ERANGE; |
213 | 213 | ||
214 | // (Ab)use the PDE_DATA field for the index into which Runlist RAM this TSG | ||
215 | // ID is scoped to (only applicable on Ampere+) | ||
216 | if (g->chip_id >= NV_CHIP_ID_AMPERE) | ||
217 | target_runlist_ram = file2gpuidx(f); | ||
218 | else | ||
219 | target_runlist_ram = 0; | ||
220 | |||
214 | // Execute preemption | 221 | // Execute preemption |
215 | if ((err = preempt_tsg(g, target_tsgid))) | 222 | if ((err = preempt_tsg(g, target_runlist_ram, target_tsgid))) |
216 | return err; | 223 | return err; |
217 | 224 | ||
218 | return count; | 225 | return count; |
@@ -223,13 +230,12 @@ struct file_operations preempt_tsg_file_ops = { | |||
223 | .llseek = default_llseek, | 230 | .llseek = default_llseek, |
224 | }; | 231 | }; |
225 | 232 | ||
226 | |||
227 | ssize_t resubmit_runlist_file_write(struct file *f, const char __user *buffer, | 233 | ssize_t resubmit_runlist_file_write(struct file *f, const char __user *buffer, |
228 | size_t count, loff_t *off) { | 234 | size_t count, loff_t *off) { |
229 | uint32_t target_runlist; | 235 | uint32_t target_runlist; |
236 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; | ||
230 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | 237 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec |
231 | int err = kstrtou32_from_user(buffer, count, 0, &target_runlist); | 238 | int err = kstrtou32_from_user(buffer, count, 0, &target_runlist); |
232 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; | ||
233 | if (err) | 239 | if (err) |
234 | return err; | 240 | return err; |
235 | 241 | ||
@@ -245,26 +251,48 @@ struct file_operations resubmit_runlist_file_ops = { | |||
245 | .llseek = default_llseek, | 251 | .llseek = default_llseek, |
246 | }; | 252 | }; |
247 | 253 | ||
254 | |||
248 | ssize_t disable_channel_file_write(struct file *f, const char __user *buffer, | 255 | ssize_t disable_channel_file_write(struct file *f, const char __user *buffer, |
249 | size_t count, loff_t *off) { | 256 | size_t count, loff_t *off) { |
250 | uint32_t target_channel; | 257 | uint32_t target_channel; |
251 | channel_ctrl_t chan; | 258 | struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; |
252 | int err; | ||
253 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; | ||
254 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | 259 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec |
255 | err = kstrtou32_from_user(buffer, count, 0, &target_channel); | 260 | int err = kstrtou32_from_user(buffer, count, 0, &target_channel); |
256 | if (err) | 261 | if (err) |
257 | return err; | 262 | return err; |
258 | 263 | ||
259 | if (target_channel > MAX_CHID) | 264 | if (g->chip_id < NV_CHIP_ID_AMPERE) { |
260 | return -ERANGE; | 265 | channel_ctrl_t chan; |
261 | 266 | if (target_channel > MAX_CHID) | |
262 | // Read current configuration | 267 | return -ERANGE; |
263 | if ((chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel))) == -1) | 268 | // Read current configuration |
264 | return -EIO; | 269 | if ((chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel))) == -1) |
265 | // Request disablement | 270 | return -EIO; |
266 | chan.enable_clear = true; | 271 | // Request disablement |
267 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw); | 272 | chan.enable_clear = true; |
273 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw); | ||
274 | } else { | ||
275 | uint32_t runlist_reg_base, chram_base, channel_max; | ||
276 | runlist_channel_config_t channel_config; | ||
277 | channel_ctrl_ga100_t chan; | ||
278 | // (Ab)use the PDE_DATA field for the runlist ID | ||
279 | if ((err = get_runlist_ram(g, file2gpuidx(f), &runlist_reg_base))) | ||
280 | return err; | ||
281 | // Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere | ||
282 | if ((channel_config.raw = nvdebug_readl(g, runlist_reg_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1) | ||
283 | return -EIO; | ||
284 | channel_max = 1u << channel_config.num_channels_log2; | ||
285 | if (target_channel >= channel_max) | ||
286 | return -ERANGE; | ||
287 | chram_base = (uint32_t)channel_config.bar0_offset << 4; | ||
288 | // Writing zeros to any field of the Ampere+ channel control structure | ||
289 | // does nothing, so don't bother to read the structure first, and just | ||
290 | // write zeros to all the fields we don't care about. | ||
291 | chan.raw = 0; | ||
292 | chan.is_write_one_clears_bits = 1; // Invert meaning of writing 1 | ||
293 | chan.enable = 1; | ||
294 | nvdebug_writel(g, chram_base + sizeof(channel_ctrl_ga100_t) * target_channel, chan.raw); | ||
295 | } | ||
268 | 296 | ||
269 | return count; | 297 | return count; |
270 | } | 298 | } |
@@ -275,23 +303,45 @@ struct file_operations disable_channel_file_ops = { | |||
275 | }; | 303 | }; |
276 | 304 | ||
277 | ssize_t enable_channel_file_write(struct file *f, const char __user *buffer, | 305 | ssize_t enable_channel_file_write(struct file *f, const char __user *buffer, |
278 | size_t count, loff_t *off) { | 306 | size_t count, loff_t *off) { |
279 | uint32_t target_channel; | 307 | uint32_t target_channel; |
280 | channel_ctrl_t chan; | 308 | struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; |
281 | int err; | ||
282 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; | ||
283 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | 309 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec |
284 | err = kstrtou32_from_user(buffer, count, 0, &target_channel); | 310 | int err = kstrtou32_from_user(buffer, count, 0, &target_channel); |
285 | if (err) | 311 | if (err) |
286 | return err; | 312 | return err; |
287 | 313 | ||
288 | if (target_channel > MAX_CHID) | 314 | if (g->chip_id < NV_CHIP_ID_AMPERE) { |
289 | return -ERANGE; | 315 | channel_ctrl_t chan; |
290 | 316 | if (target_channel > MAX_CHID) | |
291 | // Disable channel | 317 | return -ERANGE; |
292 | chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel)); | 318 | // Read current configuration |
293 | chan.enable_set = true; | 319 | if ((chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel))) == -1) |
294 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw); | 320 | return -EIO; |
321 | // Disable channel | ||
322 | chan.enable_set = true; | ||
323 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw); | ||
324 | } else { | ||
325 | uint32_t runlist_reg_base, chram_base, channel_max; | ||
326 | runlist_channel_config_t channel_config; | ||
327 | channel_ctrl_ga100_t chan; | ||
328 | // (Ab)use the PDE_DATA field for the runlist ID | ||
329 | if ((err = get_runlist_ram(g, file2gpuidx(f), &runlist_reg_base))) | ||
330 | return err; | ||
331 | // Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere | ||
332 | if ((channel_config.raw = nvdebug_readl(g, runlist_reg_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1) | ||
333 | return -EIO; | ||
334 | channel_max = 1u << channel_config.num_channels_log2; | ||
335 | if (target_channel >= channel_max) | ||
336 | return -ERANGE; | ||
337 | chram_base = (uint32_t)channel_config.bar0_offset << 4; | ||
338 | // Writing zeros to any field of the Ampere+ channel control structure | ||
339 | // does nothing, so don't bother to read the structure first, and just | ||
340 | // write zeros to all the fields we don't care about. | ||
341 | chan.raw = 0; | ||
342 | chan.enable = 1; | ||
343 | nvdebug_writel(g, chram_base + sizeof(channel_ctrl_ga100_t) * target_channel, chan.raw); | ||
344 | } | ||
295 | 345 | ||
296 | return count; | 346 | return count; |
297 | } | 347 | } |
@@ -301,52 +351,84 @@ struct file_operations enable_channel_file_ops = { | |||
301 | .llseek = default_llseek, | 351 | .llseek = default_llseek, |
302 | }; | 352 | }; |
303 | 353 | ||
304 | // Note: Operates only on runlist 0 (Compute/Graphics) | 354 | // Tested working on Pascal (gp106) through Ada (ad102) |
305 | ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer, | 355 | ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer, |
306 | size_t count, loff_t *off) { | 356 | size_t count, loff_t *off) { |
307 | uint32_t target_tsgid; | 357 | uint32_t target_tsgid, target_runlist, channel_regs_base; |
308 | struct gv100_runlist_chan* chan; | 358 | struct gv100_runlist_chan* chan; |
309 | channel_ctrl_t chan_ctl; | 359 | channel_ctrl_t chan_ctl; |
360 | channel_ctrl_ga100_t chan_ctl_ga100; | ||
310 | struct runlist_iter rl_iter; | 361 | struct runlist_iter rl_iter; |
311 | int err; | ||
312 | loff_t pos = 0; | 362 | loff_t pos = 0; |
313 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; | 363 | struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; |
314 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | 364 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec |
315 | err = kstrtou32_from_user(buffer, count, 0, &target_tsgid); | 365 | int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid); |
316 | if (err) | 366 | if (err) |
317 | return err; | 367 | return err; |
318 | 368 | ||
319 | if (target_tsgid > MAX_TSGID) | 369 | if (target_tsgid > MAX_TSGID) |
320 | return -ERANGE; | 370 | return -ERANGE; |
321 | 371 | ||
322 | err = get_runlist_iter(g, 0, &rl_iter); | 372 | // (Ab)use the PDE_DATA field for the runlist ID |
323 | if (err) | 373 | target_runlist = file2gpuidx(f); |
374 | |||
375 | if ((err = get_runlist_iter(g, target_runlist, &rl_iter))) | ||
324 | return err; | 376 | return err; |
325 | 377 | ||
378 | // On Ampere, TSG and Channel IDs are only unique per-runlist, so we need | ||
379 | // to pull the per-runlist copy of Channel RAM. | ||
380 | if (g->chip_id >= NV_CHIP_ID_AMPERE) { | ||
381 | uint32_t runlist_regs_base; | ||
382 | runlist_channel_config_t chan_config; | ||
383 | if ((err = get_runlist_ram(g, target_runlist, &runlist_regs_base))) | ||
384 | return err; | ||
385 | // Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere | ||
386 | if ((chan_config.raw = nvdebug_readl(g, runlist_regs_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1) | ||
387 | return -EIO; | ||
388 | channel_regs_base = (uint32_t)chan_config.bar0_offset << 4; | ||
389 | } | ||
390 | |||
326 | // Iterate through all TSGs | 391 | // Iterate through all TSGs |
327 | while (pos < rl_iter.len) { | 392 | while (pos < rl_iter.len) { |
328 | if (tsgid(g, rl_iter.curr_entry) == target_tsgid) { | 393 | bool enable = false; |
329 | // Enable channels of target TSG | 394 | if (tsgid(g, rl_iter.curr_entry) == target_tsgid) |
330 | for_chan_in_tsg(g, chan, rl_iter.curr_entry) { | 395 | enable = true; |
331 | chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid)); | 396 | |
332 | chan_ctl.enable_set = true; | 397 | // Either enable or disable all channels of each TSG, dependent on if |
333 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw); | 398 | // they are contained within the target TSG or not. |
334 | } | 399 | for_chan_in_tsg(g, chan, rl_iter.curr_entry) { |
335 | } else { | 400 | if (g->chip_id < NV_CHIP_ID_AMPERE) { |
336 | // XXX: Fix for bare channels. Maybe a "for_chan_until_tsg" macro? | 401 | // Read, update, write for PCCSR |
337 | // Disable all other channels | 402 | if ((chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chid(g, chan)))) == -1) |
338 | // (This is how the Jetson nvgpu driver disables TSGs) | 403 | return -EIO; |
339 | for_chan_in_tsg(g, chan, rl_iter.curr_entry) { | 404 | if (enable) |
340 | chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid)); | 405 | chan_ctl.enable_set = true; |
341 | chan_ctl.enable_clear = true; | 406 | else |
342 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw); | 407 | chan_ctl.enable_clear = true; |
408 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chid(g, chan)), chan_ctl.raw); | ||
409 | } else { | ||
410 | // Writing a 0 does nothing on Ampere+, so we can just write | ||
411 | chan_ctl_ga100.raw = 0; | ||
412 | chan_ctl_ga100.is_write_one_clears_bits = !enable; | ||
413 | chan_ctl_ga100.enable = true; | ||
414 | nvdebug_writel(g, channel_regs_base + sizeof(chan_ctl_ga100) * chid(g, chan), chan_ctl_ga100.raw); | ||
343 | } | 415 | } |
344 | } | 416 | } |
345 | pos += 1 + tsg_length(g, rl_iter.curr_entry); | 417 | pos += 1 + tsg_length(g, rl_iter.curr_entry); |
346 | rl_iter.curr_entry = next_tsg(g, rl_iter.curr_entry); | 418 | rl_iter.curr_entry = next_tsg(g, rl_iter.curr_entry); |
419 | |||
420 | // TODO: Fix the above for bare channels. Add "for_chan_until_tsg"? | ||
347 | } | 421 | } |
348 | // Trigger a runlist-level preempt to switch to `target_tsgid` | 422 | |
349 | if ((err = preempt_runlist(g, 0))) | 423 | // Resubmit the runlist to ensure that changes to channel enablement are |
424 | // picked up on Turing+ GPUs (channel enablements may not be otherwise). | ||
425 | if (g->chip_id >= NV_CHIP_ID_TURING) | ||
426 | if ((err = resubmit_runlist(g, target_runlist))) | ||
427 | return err; | ||
428 | |||
429 | // Trigger a runlist-level preempt to stop whatever was running, triggering | ||
430 | // the runlist scheduler to select and run the next-enabled channel. | ||
431 | if ((err = preempt_runlist(g, target_runlist))) | ||
350 | return err; | 432 | return err; |
351 | 433 | ||
352 | return count; | 434 | return count; |