aboutsummaryrefslogtreecommitdiffstats
path: root/runlist_procfs.c
diff options
context:
space:
mode:
authorJoshua Bakita <bakitajoshua@gmail.com>2024-09-19 12:50:02 -0400
committerJoshua Bakita <bakitajoshua@gmail.com>2024-09-19 13:59:56 -0400
commit48f9e45b9d9ebfca7d3c673597f7fbed9427a5af (patch)
treed63c3863e9b187fb1f62d2f3c58b8c6a9568b6cc /runlist_procfs.c
parentac0113ab52d2ca12a5c10feeaa887d10c37ee4f1 (diff)
Ampere: disable/enable_channel, preempt/switch_to_tsg, and resubmit_runlist
**Modifes the user API from `echo 1 > /proc/gpuX/switch_to_tsg` to `echo 1 > /proc/gpuX/runlist0/switch_to_tsg` to switch to TSG 1 on runlist 0 on GPU X for pre-Ampere GPUs (for example).** Feature changes: - switch_to_tsg only makes sense on a per-runlist level. Before, this always operated on runlist0; this commit allows operating on any runlist by moving the API to the per-runlist paths. - On Ampere+, channel and TSG IDs are per-runlist, and no longer GPU-global. Consequently, the disable/enable_channel and preempt_tsg APIs have been moved from GPU-global to per-runlist paths on Ampere+. Bug fixes: - `preempt_runlist()` is now supported on Maxwell and Pascal. - `resubmit_runlist()` detects too-old GPUs. - MAX_CHID corrected from 512 to 511 and documented. - switch_to_tsg now includes a runlist resubmit, which appears to be necessary on Turing+ GPUs. Tested on GK104 (Quadro K5000), GM204 (GTX 970), GP106 (GTX 1060 3GB), GP104 (GTX 1080 Ti), GP10B (Jetson TX2), GV11B (Jetson Xavier), GV100 (Titan V), TU102 (RTX 2080 Ti), and AD102 (RTX 6000 Ada).
Diffstat (limited to 'runlist_procfs.c')
-rw-r--r--runlist_procfs.c198
1 files changed, 140 insertions, 58 deletions
diff --git a/runlist_procfs.c b/runlist_procfs.c
index c1cfc87..b2159f6 100644
--- a/runlist_procfs.c
+++ b/runlist_procfs.c
@@ -199,11 +199,11 @@ struct file_operations runlist_file_ops = {
199}; 199};
200 200
201ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer, 201ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer,
202 size_t count, loff_t *off) { 202 size_t count, loff_t *off) {
203 uint32_t target_tsgid; 203 uint32_t target_tsgid, target_runlist_ram;
204 struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)];
204 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec 205 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
205 int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid); 206 int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid);
206 struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)];
207 if (err) 207 if (err)
208 return err; 208 return err;
209 209
@@ -211,8 +211,15 @@ ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer,
211 if (target_tsgid > MAX_TSGID) 211 if (target_tsgid > MAX_TSGID)
212 return -ERANGE; 212 return -ERANGE;
213 213
214 // (Ab)use the PDE_DATA field for the index into which Runlist RAM this TSG
215 // ID is scoped to (only applicable on Ampere+)
216 if (g->chip_id >= NV_CHIP_ID_AMPERE)
217 target_runlist_ram = file2gpuidx(f);
218 else
219 target_runlist_ram = 0;
220
214 // Execute preemption 221 // Execute preemption
215 if ((err = preempt_tsg(g, target_tsgid))) 222 if ((err = preempt_tsg(g, target_runlist_ram, target_tsgid)))
216 return err; 223 return err;
217 224
218 return count; 225 return count;
@@ -223,13 +230,12 @@ struct file_operations preempt_tsg_file_ops = {
223 .llseek = default_llseek, 230 .llseek = default_llseek,
224}; 231};
225 232
226
227ssize_t resubmit_runlist_file_write(struct file *f, const char __user *buffer, 233ssize_t resubmit_runlist_file_write(struct file *f, const char __user *buffer,
228 size_t count, loff_t *off) { 234 size_t count, loff_t *off) {
229 uint32_t target_runlist; 235 uint32_t target_runlist;
236 struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)];
230 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec 237 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
231 int err = kstrtou32_from_user(buffer, count, 0, &target_runlist); 238 int err = kstrtou32_from_user(buffer, count, 0, &target_runlist);
232 struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)];
233 if (err) 239 if (err)
234 return err; 240 return err;
235 241
@@ -245,26 +251,48 @@ struct file_operations resubmit_runlist_file_ops = {
245 .llseek = default_llseek, 251 .llseek = default_llseek,
246}; 252};
247 253
254
248ssize_t disable_channel_file_write(struct file *f, const char __user *buffer, 255ssize_t disable_channel_file_write(struct file *f, const char __user *buffer,
249 size_t count, loff_t *off) { 256 size_t count, loff_t *off) {
250 uint32_t target_channel; 257 uint32_t target_channel;
251 channel_ctrl_t chan; 258 struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)];
252 int err;
253 struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)];
254 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec 259 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
255 err = kstrtou32_from_user(buffer, count, 0, &target_channel); 260 int err = kstrtou32_from_user(buffer, count, 0, &target_channel);
256 if (err) 261 if (err)
257 return err; 262 return err;
258 263
259 if (target_channel > MAX_CHID) 264 if (g->chip_id < NV_CHIP_ID_AMPERE) {
260 return -ERANGE; 265 channel_ctrl_t chan;
261 266 if (target_channel > MAX_CHID)
262 // Read current configuration 267 return -ERANGE;
263 if ((chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel))) == -1) 268 // Read current configuration
264 return -EIO; 269 if ((chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel))) == -1)
265 // Request disablement 270 return -EIO;
266 chan.enable_clear = true; 271 // Request disablement
267 nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw); 272 chan.enable_clear = true;
273 nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw);
274 } else {
275 uint32_t runlist_reg_base, chram_base, channel_max;
276 runlist_channel_config_t channel_config;
277 channel_ctrl_ga100_t chan;
278 // (Ab)use the PDE_DATA field for the runlist ID
279 if ((err = get_runlist_ram(g, file2gpuidx(f), &runlist_reg_base)))
280 return err;
281 // Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere
282 if ((channel_config.raw = nvdebug_readl(g, runlist_reg_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1)
283 return -EIO;
284 channel_max = 1u << channel_config.num_channels_log2;
285 if (target_channel >= channel_max)
286 return -ERANGE;
287 chram_base = (uint32_t)channel_config.bar0_offset << 4;
288 // Writing zeros to any field of the Ampere+ channel control structure
289 // does nothing, so don't bother to read the structure first, and just
290 // write zeros to all the fields we don't care about.
291 chan.raw = 0;
292 chan.is_write_one_clears_bits = 1; // Invert meaning of writing 1
293 chan.enable = 1;
294 nvdebug_writel(g, chram_base + sizeof(channel_ctrl_ga100_t) * target_channel, chan.raw);
295 }
268 296
269 return count; 297 return count;
270} 298}
@@ -275,23 +303,45 @@ struct file_operations disable_channel_file_ops = {
275}; 303};
276 304
277ssize_t enable_channel_file_write(struct file *f, const char __user *buffer, 305ssize_t enable_channel_file_write(struct file *f, const char __user *buffer,
278 size_t count, loff_t *off) { 306 size_t count, loff_t *off) {
279 uint32_t target_channel; 307 uint32_t target_channel;
280 channel_ctrl_t chan; 308 struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)];
281 int err;
282 struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)];
283 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec 309 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
284 err = kstrtou32_from_user(buffer, count, 0, &target_channel); 310 int err = kstrtou32_from_user(buffer, count, 0, &target_channel);
285 if (err) 311 if (err)
286 return err; 312 return err;
287 313
288 if (target_channel > MAX_CHID) 314 if (g->chip_id < NV_CHIP_ID_AMPERE) {
289 return -ERANGE; 315 channel_ctrl_t chan;
290 316 if (target_channel > MAX_CHID)
291 // Disable channel 317 return -ERANGE;
292 chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel)); 318 // Read current configuration
293 chan.enable_set = true; 319 if ((chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel))) == -1)
294 nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw); 320 return -EIO;
321 // Disable channel
322 chan.enable_set = true;
323 nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw);
324 } else {
325 uint32_t runlist_reg_base, chram_base, channel_max;
326 runlist_channel_config_t channel_config;
327 channel_ctrl_ga100_t chan;
328 // (Ab)use the PDE_DATA field for the runlist ID
329 if ((err = get_runlist_ram(g, file2gpuidx(f), &runlist_reg_base)))
330 return err;
331 // Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere
332 if ((channel_config.raw = nvdebug_readl(g, runlist_reg_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1)
333 return -EIO;
334 channel_max = 1u << channel_config.num_channels_log2;
335 if (target_channel >= channel_max)
336 return -ERANGE;
337 chram_base = (uint32_t)channel_config.bar0_offset << 4;
338 // Writing zeros to any field of the Ampere+ channel control structure
339 // does nothing, so don't bother to read the structure first, and just
340 // write zeros to all the fields we don't care about.
341 chan.raw = 0;
342 chan.enable = 1;
343 nvdebug_writel(g, chram_base + sizeof(channel_ctrl_ga100_t) * target_channel, chan.raw);
344 }
295 345
296 return count; 346 return count;
297} 347}
@@ -301,52 +351,84 @@ struct file_operations enable_channel_file_ops = {
301 .llseek = default_llseek, 351 .llseek = default_llseek,
302}; 352};
303 353
304// Note: Operates only on runlist 0 (Compute/Graphics) 354// Tested working on Pascal (gp106) through Ada (ad102)
305ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer, 355ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer,
306 size_t count, loff_t *off) { 356 size_t count, loff_t *off) {
307 uint32_t target_tsgid; 357 uint32_t target_tsgid, target_runlist, channel_regs_base;
308 struct gv100_runlist_chan* chan; 358 struct gv100_runlist_chan* chan;
309 channel_ctrl_t chan_ctl; 359 channel_ctrl_t chan_ctl;
360 channel_ctrl_ga100_t chan_ctl_ga100;
310 struct runlist_iter rl_iter; 361 struct runlist_iter rl_iter;
311 int err;
312 loff_t pos = 0; 362 loff_t pos = 0;
313 struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; 363 struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)];
314 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec 364 // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
315 err = kstrtou32_from_user(buffer, count, 0, &target_tsgid); 365 int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid);
316 if (err) 366 if (err)
317 return err; 367 return err;
318 368
319 if (target_tsgid > MAX_TSGID) 369 if (target_tsgid > MAX_TSGID)
320 return -ERANGE; 370 return -ERANGE;
321 371
322 err = get_runlist_iter(g, 0, &rl_iter); 372 // (Ab)use the PDE_DATA field for the runlist ID
323 if (err) 373 target_runlist = file2gpuidx(f);
374
375 if ((err = get_runlist_iter(g, target_runlist, &rl_iter)))
324 return err; 376 return err;
325 377
378 // On Ampere, TSG and Channel IDs are only unique per-runlist, so we need
379 // to pull the per-runlist copy of Channel RAM.
380 if (g->chip_id >= NV_CHIP_ID_AMPERE) {
381 uint32_t runlist_regs_base;
382 runlist_channel_config_t chan_config;
383 if ((err = get_runlist_ram(g, target_runlist, &runlist_regs_base)))
384 return err;
385 // Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere
386 if ((chan_config.raw = nvdebug_readl(g, runlist_regs_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1)
387 return -EIO;
388 channel_regs_base = (uint32_t)chan_config.bar0_offset << 4;
389 }
390
326 // Iterate through all TSGs 391 // Iterate through all TSGs
327 while (pos < rl_iter.len) { 392 while (pos < rl_iter.len) {
328 if (tsgid(g, rl_iter.curr_entry) == target_tsgid) { 393 bool enable = false;
329 // Enable channels of target TSG 394 if (tsgid(g, rl_iter.curr_entry) == target_tsgid)
330 for_chan_in_tsg(g, chan, rl_iter.curr_entry) { 395 enable = true;
331 chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid)); 396
332 chan_ctl.enable_set = true; 397 // Either enable or disable all channels of each TSG, dependent on if
333 nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw); 398 // they are contained within the target TSG or not.
334 } 399 for_chan_in_tsg(g, chan, rl_iter.curr_entry) {
335 } else { 400 if (g->chip_id < NV_CHIP_ID_AMPERE) {
336 // XXX: Fix for bare channels. Maybe a "for_chan_until_tsg" macro? 401 // Read, update, write for PCCSR
337 // Disable all other channels 402 if ((chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chid(g, chan)))) == -1)
338 // (This is how the Jetson nvgpu driver disables TSGs) 403 return -EIO;
339 for_chan_in_tsg(g, chan, rl_iter.curr_entry) { 404 if (enable)
340 chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid)); 405 chan_ctl.enable_set = true;
341 chan_ctl.enable_clear = true; 406 else
342 nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw); 407 chan_ctl.enable_clear = true;
408 nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chid(g, chan)), chan_ctl.raw);
409 } else {
410 // Writing a 0 does nothing on Ampere+, so we can just write
411 chan_ctl_ga100.raw = 0;
412 chan_ctl_ga100.is_write_one_clears_bits = !enable;
413 chan_ctl_ga100.enable = true;
414 nvdebug_writel(g, channel_regs_base + sizeof(chan_ctl_ga100) * chid(g, chan), chan_ctl_ga100.raw);
343 } 415 }
344 } 416 }
345 pos += 1 + tsg_length(g, rl_iter.curr_entry); 417 pos += 1 + tsg_length(g, rl_iter.curr_entry);
346 rl_iter.curr_entry = next_tsg(g, rl_iter.curr_entry); 418 rl_iter.curr_entry = next_tsg(g, rl_iter.curr_entry);
419
420 // TODO: Fix the above for bare channels. Add "for_chan_until_tsg"?
347 } 421 }
348 // Trigger a runlist-level preempt to switch to `target_tsgid` 422
349 if ((err = preempt_runlist(g, 0))) 423 // Resubmit the runlist to ensure that changes to channel enablement are
424 // picked up on Turing+ GPUs (channel enablements may not be otherwise).
425 if (g->chip_id >= NV_CHIP_ID_TURING)
426 if ((err = resubmit_runlist(g, target_runlist)))
427 return err;
428
429 // Trigger a runlist-level preempt to stop whatever was running, triggering
430 // the runlist scheduler to select and run the next-enabled channel.
431 if ((err = preempt_runlist(g, target_runlist)))
350 return err; 432 return err;
351 433
352 return count; 434 return count;