diff options
Diffstat (limited to 'runlist_procfs.c')
-rw-r--r-- | runlist_procfs.c | 198 |
1 files changed, 140 insertions, 58 deletions
diff --git a/runlist_procfs.c b/runlist_procfs.c index c1cfc87..b2159f6 100644 --- a/runlist_procfs.c +++ b/runlist_procfs.c | |||
@@ -199,11 +199,11 @@ struct file_operations runlist_file_ops = { | |||
199 | }; | 199 | }; |
200 | 200 | ||
201 | ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer, | 201 | ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer, |
202 | size_t count, loff_t *off) { | 202 | size_t count, loff_t *off) { |
203 | uint32_t target_tsgid; | 203 | uint32_t target_tsgid, target_runlist_ram; |
204 | struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; | ||
204 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | 205 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec |
205 | int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid); | 206 | int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid); |
206 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; | ||
207 | if (err) | 207 | if (err) |
208 | return err; | 208 | return err; |
209 | 209 | ||
@@ -211,8 +211,15 @@ ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer, | |||
211 | if (target_tsgid > MAX_TSGID) | 211 | if (target_tsgid > MAX_TSGID) |
212 | return -ERANGE; | 212 | return -ERANGE; |
213 | 213 | ||
214 | // (Ab)use the PDE_DATA field for the index into which Runlist RAM this TSG | ||
215 | // ID is scoped to (only applicable on Ampere+) | ||
216 | if (g->chip_id >= NV_CHIP_ID_AMPERE) | ||
217 | target_runlist_ram = file2gpuidx(f); | ||
218 | else | ||
219 | target_runlist_ram = 0; | ||
220 | |||
214 | // Execute preemption | 221 | // Execute preemption |
215 | if ((err = preempt_tsg(g, target_tsgid))) | 222 | if ((err = preempt_tsg(g, target_runlist_ram, target_tsgid))) |
216 | return err; | 223 | return err; |
217 | 224 | ||
218 | return count; | 225 | return count; |
@@ -223,13 +230,12 @@ struct file_operations preempt_tsg_file_ops = { | |||
223 | .llseek = default_llseek, | 230 | .llseek = default_llseek, |
224 | }; | 231 | }; |
225 | 232 | ||
226 | |||
227 | ssize_t resubmit_runlist_file_write(struct file *f, const char __user *buffer, | 233 | ssize_t resubmit_runlist_file_write(struct file *f, const char __user *buffer, |
228 | size_t count, loff_t *off) { | 234 | size_t count, loff_t *off) { |
229 | uint32_t target_runlist; | 235 | uint32_t target_runlist; |
236 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; | ||
230 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | 237 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec |
231 | int err = kstrtou32_from_user(buffer, count, 0, &target_runlist); | 238 | int err = kstrtou32_from_user(buffer, count, 0, &target_runlist); |
232 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; | ||
233 | if (err) | 239 | if (err) |
234 | return err; | 240 | return err; |
235 | 241 | ||
@@ -245,26 +251,48 @@ struct file_operations resubmit_runlist_file_ops = { | |||
245 | .llseek = default_llseek, | 251 | .llseek = default_llseek, |
246 | }; | 252 | }; |
247 | 253 | ||
254 | |||
248 | ssize_t disable_channel_file_write(struct file *f, const char __user *buffer, | 255 | ssize_t disable_channel_file_write(struct file *f, const char __user *buffer, |
249 | size_t count, loff_t *off) { | 256 | size_t count, loff_t *off) { |
250 | uint32_t target_channel; | 257 | uint32_t target_channel; |
251 | channel_ctrl_t chan; | 258 | struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; |
252 | int err; | ||
253 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; | ||
254 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | 259 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec |
255 | err = kstrtou32_from_user(buffer, count, 0, &target_channel); | 260 | int err = kstrtou32_from_user(buffer, count, 0, &target_channel); |
256 | if (err) | 261 | if (err) |
257 | return err; | 262 | return err; |
258 | 263 | ||
259 | if (target_channel > MAX_CHID) | 264 | if (g->chip_id < NV_CHIP_ID_AMPERE) { |
260 | return -ERANGE; | 265 | channel_ctrl_t chan; |
261 | 266 | if (target_channel > MAX_CHID) | |
262 | // Read current configuration | 267 | return -ERANGE; |
263 | if ((chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel))) == -1) | 268 | // Read current configuration |
264 | return -EIO; | 269 | if ((chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel))) == -1) |
265 | // Request disablement | 270 | return -EIO; |
266 | chan.enable_clear = true; | 271 | // Request disablement |
267 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw); | 272 | chan.enable_clear = true; |
273 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw); | ||
274 | } else { | ||
275 | uint32_t runlist_reg_base, chram_base, channel_max; | ||
276 | runlist_channel_config_t channel_config; | ||
277 | channel_ctrl_ga100_t chan; | ||
278 | // (Ab)use the PDE_DATA field for the runlist ID | ||
279 | if ((err = get_runlist_ram(g, file2gpuidx(f), &runlist_reg_base))) | ||
280 | return err; | ||
281 | // Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere | ||
282 | if ((channel_config.raw = nvdebug_readl(g, runlist_reg_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1) | ||
283 | return -EIO; | ||
284 | channel_max = 1u << channel_config.num_channels_log2; | ||
285 | if (target_channel >= channel_max) | ||
286 | return -ERANGE; | ||
287 | chram_base = (uint32_t)channel_config.bar0_offset << 4; | ||
288 | // Writing zeros to any field of the Ampere+ channel control structure | ||
289 | // does nothing, so don't bother to read the structure first, and just | ||
290 | // write zeros to all the fields we don't care about. | ||
291 | chan.raw = 0; | ||
292 | chan.is_write_one_clears_bits = 1; // Invert meaning of writing 1 | ||
293 | chan.enable = 1; | ||
294 | nvdebug_writel(g, chram_base + sizeof(channel_ctrl_ga100_t) * target_channel, chan.raw); | ||
295 | } | ||
268 | 296 | ||
269 | return count; | 297 | return count; |
270 | } | 298 | } |
@@ -275,23 +303,45 @@ struct file_operations disable_channel_file_ops = { | |||
275 | }; | 303 | }; |
276 | 304 | ||
277 | ssize_t enable_channel_file_write(struct file *f, const char __user *buffer, | 305 | ssize_t enable_channel_file_write(struct file *f, const char __user *buffer, |
278 | size_t count, loff_t *off) { | 306 | size_t count, loff_t *off) { |
279 | uint32_t target_channel; | 307 | uint32_t target_channel; |
280 | channel_ctrl_t chan; | 308 | struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; |
281 | int err; | ||
282 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; | ||
283 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | 309 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec |
284 | err = kstrtou32_from_user(buffer, count, 0, &target_channel); | 310 | int err = kstrtou32_from_user(buffer, count, 0, &target_channel); |
285 | if (err) | 311 | if (err) |
286 | return err; | 312 | return err; |
287 | 313 | ||
288 | if (target_channel > MAX_CHID) | 314 | if (g->chip_id < NV_CHIP_ID_AMPERE) { |
289 | return -ERANGE; | 315 | channel_ctrl_t chan; |
290 | 316 | if (target_channel > MAX_CHID) | |
291 | // Disable channel | 317 | return -ERANGE; |
292 | chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel)); | 318 | // Read current configuration |
293 | chan.enable_set = true; | 319 | if ((chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel))) == -1) |
294 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw); | 320 | return -EIO; |
321 | // Disable channel | ||
322 | chan.enable_set = true; | ||
323 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw); | ||
324 | } else { | ||
325 | uint32_t runlist_reg_base, chram_base, channel_max; | ||
326 | runlist_channel_config_t channel_config; | ||
327 | channel_ctrl_ga100_t chan; | ||
328 | // (Ab)use the PDE_DATA field for the runlist ID | ||
329 | if ((err = get_runlist_ram(g, file2gpuidx(f), &runlist_reg_base))) | ||
330 | return err; | ||
331 | // Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere | ||
332 | if ((channel_config.raw = nvdebug_readl(g, runlist_reg_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1) | ||
333 | return -EIO; | ||
334 | channel_max = 1u << channel_config.num_channels_log2; | ||
335 | if (target_channel >= channel_max) | ||
336 | return -ERANGE; | ||
337 | chram_base = (uint32_t)channel_config.bar0_offset << 4; | ||
338 | // Writing zeros to any field of the Ampere+ channel control structure | ||
339 | // does nothing, so don't bother to read the structure first, and just | ||
340 | // write zeros to all the fields we don't care about. | ||
341 | chan.raw = 0; | ||
342 | chan.enable = 1; | ||
343 | nvdebug_writel(g, chram_base + sizeof(channel_ctrl_ga100_t) * target_channel, chan.raw); | ||
344 | } | ||
295 | 345 | ||
296 | return count; | 346 | return count; |
297 | } | 347 | } |
@@ -301,52 +351,84 @@ struct file_operations enable_channel_file_ops = { | |||
301 | .llseek = default_llseek, | 351 | .llseek = default_llseek, |
302 | }; | 352 | }; |
303 | 353 | ||
304 | // Note: Operates only on runlist 0 (Compute/Graphics) | 354 | // Tested working on Pascal (gp106) through Ada (ad102) |
305 | ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer, | 355 | ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer, |
306 | size_t count, loff_t *off) { | 356 | size_t count, loff_t *off) { |
307 | uint32_t target_tsgid; | 357 | uint32_t target_tsgid, target_runlist, channel_regs_base; |
308 | struct gv100_runlist_chan* chan; | 358 | struct gv100_runlist_chan* chan; |
309 | channel_ctrl_t chan_ctl; | 359 | channel_ctrl_t chan_ctl; |
360 | channel_ctrl_ga100_t chan_ctl_ga100; | ||
310 | struct runlist_iter rl_iter; | 361 | struct runlist_iter rl_iter; |
311 | int err; | ||
312 | loff_t pos = 0; | 362 | loff_t pos = 0; |
313 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; | 363 | struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; |
314 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | 364 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec |
315 | err = kstrtou32_from_user(buffer, count, 0, &target_tsgid); | 365 | int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid); |
316 | if (err) | 366 | if (err) |
317 | return err; | 367 | return err; |
318 | 368 | ||
319 | if (target_tsgid > MAX_TSGID) | 369 | if (target_tsgid > MAX_TSGID) |
320 | return -ERANGE; | 370 | return -ERANGE; |
321 | 371 | ||
322 | err = get_runlist_iter(g, 0, &rl_iter); | 372 | // (Ab)use the PDE_DATA field for the runlist ID |
323 | if (err) | 373 | target_runlist = file2gpuidx(f); |
374 | |||
375 | if ((err = get_runlist_iter(g, target_runlist, &rl_iter))) | ||
324 | return err; | 376 | return err; |
325 | 377 | ||
378 | // On Ampere, TSG and Channel IDs are only unique per-runlist, so we need | ||
379 | // to pull the per-runlist copy of Channel RAM. | ||
380 | if (g->chip_id >= NV_CHIP_ID_AMPERE) { | ||
381 | uint32_t runlist_regs_base; | ||
382 | runlist_channel_config_t chan_config; | ||
383 | if ((err = get_runlist_ram(g, target_runlist, &runlist_regs_base))) | ||
384 | return err; | ||
385 | // Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere | ||
386 | if ((chan_config.raw = nvdebug_readl(g, runlist_regs_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1) | ||
387 | return -EIO; | ||
388 | channel_regs_base = (uint32_t)chan_config.bar0_offset << 4; | ||
389 | } | ||
390 | |||
326 | // Iterate through all TSGs | 391 | // Iterate through all TSGs |
327 | while (pos < rl_iter.len) { | 392 | while (pos < rl_iter.len) { |
328 | if (tsgid(g, rl_iter.curr_entry) == target_tsgid) { | 393 | bool enable = false; |
329 | // Enable channels of target TSG | 394 | if (tsgid(g, rl_iter.curr_entry) == target_tsgid) |
330 | for_chan_in_tsg(g, chan, rl_iter.curr_entry) { | 395 | enable = true; |
331 | chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid)); | 396 | |
332 | chan_ctl.enable_set = true; | 397 | // Either enable or disable all channels of each TSG, dependent on if |
333 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw); | 398 | // they are contained within the target TSG or not. |
334 | } | 399 | for_chan_in_tsg(g, chan, rl_iter.curr_entry) { |
335 | } else { | 400 | if (g->chip_id < NV_CHIP_ID_AMPERE) { |
336 | // XXX: Fix for bare channels. Maybe a "for_chan_until_tsg" macro? | 401 | // Read, update, write for PCCSR |
337 | // Disable all other channels | 402 | if ((chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chid(g, chan)))) == -1) |
338 | // (This is how the Jetson nvgpu driver disables TSGs) | 403 | return -EIO; |
339 | for_chan_in_tsg(g, chan, rl_iter.curr_entry) { | 404 | if (enable) |
340 | chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid)); | 405 | chan_ctl.enable_set = true; |
341 | chan_ctl.enable_clear = true; | 406 | else |
342 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw); | 407 | chan_ctl.enable_clear = true; |
408 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chid(g, chan)), chan_ctl.raw); | ||
409 | } else { | ||
410 | // Writing a 0 does nothing on Ampere+, so we can just write | ||
411 | chan_ctl_ga100.raw = 0; | ||
412 | chan_ctl_ga100.is_write_one_clears_bits = !enable; | ||
413 | chan_ctl_ga100.enable = true; | ||
414 | nvdebug_writel(g, channel_regs_base + sizeof(chan_ctl_ga100) * chid(g, chan), chan_ctl_ga100.raw); | ||
343 | } | 415 | } |
344 | } | 416 | } |
345 | pos += 1 + tsg_length(g, rl_iter.curr_entry); | 417 | pos += 1 + tsg_length(g, rl_iter.curr_entry); |
346 | rl_iter.curr_entry = next_tsg(g, rl_iter.curr_entry); | 418 | rl_iter.curr_entry = next_tsg(g, rl_iter.curr_entry); |
419 | |||
420 | // TODO: Fix the above for bare channels. Add "for_chan_until_tsg"? | ||
347 | } | 421 | } |
348 | // Trigger a runlist-level preempt to switch to `target_tsgid` | 422 | |
349 | if ((err = preempt_runlist(g, 0))) | 423 | // Resubmit the runlist to ensure that changes to channel enablement are |
424 | // picked up on Turing+ GPUs (channel enablements may not be otherwise). | ||
425 | if (g->chip_id >= NV_CHIP_ID_TURING) | ||
426 | if ((err = resubmit_runlist(g, target_runlist))) | ||
427 | return err; | ||
428 | |||
429 | // Trigger a runlist-level preempt to stop whatever was running, triggering | ||
430 | // the runlist scheduler to select and run the next-enabled channel. | ||
431 | if ((err = preempt_runlist(g, target_runlist))) | ||
350 | return err; | 432 | return err; |
351 | 433 | ||
352 | return count; | 434 | return count; |