diff options
author | Joshua Bakita <bakitajoshua@gmail.com> | 2025-05-05 03:53:01 -0400 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2025-05-05 03:53:13 -0400 |
commit | 293430fcb5d4013b573556c58457ee706e482b7f (patch) | |
tree | 9328fa680f55b4e1a08d24714275b8437be3be5d /runlist_procfs.c | |
parent | 494df296bf4abe9b2b484bde1a4fad28c989afec (diff) |
Snapshot for ECRTS'25 artifact evaluation
Diffstat (limited to 'runlist_procfs.c')
-rw-r--r-- | runlist_procfs.c | 645 |
1 files changed, 636 insertions, 9 deletions
diff --git a/runlist_procfs.c b/runlist_procfs.c index b2159f6..a3a6df3 100644 --- a/runlist_procfs.c +++ b/runlist_procfs.c | |||
@@ -1,12 +1,117 @@ | |||
1 | #include <linux/seq_file.h> // For seq_* functions and types | 1 | #include <linux/seq_file.h> // For seq_* functions and types |
2 | #include <linux/version.h> // Macros to detect kernel version | 2 | #include <linux/version.h> // Macros to detect kernel version |
3 | #include <linux/platform_device.h> // For platform_get_resource() | ||
4 | #include <linux/pci.h> // For pci_resource_start() | ||
5 | #include <linux/iommu.h> // For iommu_ functions | ||
6 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(5,10,0) | ||
7 | #include <linux/dma-map-ops.h> // For get_dma_ops() | ||
8 | #endif | ||
3 | 9 | ||
4 | #include "nvdebug_linux.h" | 10 | #include "nvdebug_linux.h" |
5 | 11 | ||
6 | // Uncomment to expand channel status information when printing the runlist | 12 | // We cannot touch PRAMIN (via page table operations or ctxsw access) if we're |
13 | // using it to walk the runlist | ||
14 | //#ifndef FALLBACK_TO_PRAMIN | ||
15 | // Uncomment to expand channel status, instance, and context information when | ||
16 | // printing the runlist | ||
7 | #define DETAILED_CHANNEL_INFO | 17 | #define DETAILED_CHANNEL_INFO |
18 | //#endif | ||
8 | 19 | ||
9 | #ifdef DETAILED_CHANNEL_INFO | 20 | #ifdef DETAILED_CHANNEL_INFO |
21 | // Print the channel instance and context swtich blocks | ||
22 | // XXX: THIS IS UNSAFE ON KEPLER! | ||
23 | // instance_deref() will call into the page table logic, which may move PRAMIN | ||
24 | // PRAMIN appears heavily utilized by the driver on Bonham (at least), and | ||
25 | // moving it causes problems. | ||
26 | static int runlist_detail_seq_show_inst(struct seq_file *s, struct nvdebug_state *g, char *prefix, uint64_t instance_ptr, enum INST_TARGET instance_target) { | ||
27 | instance_ctrl_t *inst = NULL; | ||
28 | context_switch_ctrl_t *ctxsw = NULL; | ||
29 | int i; | ||
30 | |||
31 | #ifdef FALLBACK_TO_PRAMIN | ||
32 | bar0_window_t win; | ||
33 | win.raw = nvdebug_readl(g, NV_XAL_EP_BAR0_WINDOW_BASE); | ||
34 | inst = g->regs + NV_PRAMIN + addr_to_pramin_mut(g, instance_ptr, instance_target); | ||
35 | #else | ||
36 | if (IS_ERR(inst = instance_deref(g, instance_ptr, instance_target))) | ||
37 | return PTR_ERR(ctxsw); | ||
38 | #endif // FALLBACK_TO_PRAMIN | ||
39 | // If unable to access instance block, skip | ||
40 | if (!inst) | ||
41 | return 0; | ||
42 | |||
43 | // Print the channel instance block | ||
44 | // As an ID, use upper 52 bits of the instance address (lower 12 are zero) | ||
45 | //seq_printf(s, "%s+- Inst %-13llx-+\n", prefix, instance_ptr >> 12); | ||
46 | seq_printf(s, "%s|= Instance Block ====|\n", prefix); | ||
47 | seq_printf(s, "%s| Target Engine: %2d|\n", prefix, inst->fc_target); | ||
48 | seq_printf(s, "%s| Privileged: %1d|\n", prefix, inst->fc_config_is_priv); | ||
49 | seq_printf(s, "%s| Channel VEID: %2d|\n", prefix, inst->fc_chan_info_veid); | ||
50 | seq_printf(s, "%s| WFI PTR: |\n", prefix); | ||
51 | seq_printf(s, "%s| %#018llx|\n", prefix, (uint64_t)inst->engine_wfi_ptr << 12); | ||
52 | seq_printf(s, "%s| %20s|\n", prefix, target_to_text(inst->engine_wfi_target)); | ||
53 | seq_printf(s, "%s| Virtual address? %d|\n", prefix, inst->engine_wfi_is_virtual); | ||
54 | seq_printf(s, "%s| WFI VEID: %2d|\n", prefix, inst->engine_wfi_veid); | ||
55 | seq_printf(s, "%s| All PDB PTR: |\n", prefix); | ||
56 | seq_printf(s, "%s| %#018llx|\n", prefix, (u64)inst->pdb.page_dir << 12); | ||
57 | seq_printf(s, "%s| %20s|\n", prefix, target_to_text(inst->pdb.target)); | ||
58 | seq_printf(s, "%s| %20s|\n", prefix, inst->pdb.is_volatile ? "volatile" : "non-volatile"); | ||
59 | // seq_printf(s, "%s|raw: %0#10lx|\n", prefix, inst->pdb.raw); | ||
60 | seq_printf(s, "%s| Num subcontexts: %2ld|\n", prefix, hweight64(inst->subcontext_pdb_valid)); | ||
61 | // Print configuration of every enabled subcontext | ||
62 | for (i = 0; i < 64; i++) { | ||
63 | // Skip subcontexts without their enable bit set | ||
64 | if (!(1 & (inst->subcontext_pdb_valid >> i))) | ||
65 | continue; | ||
66 | seq_printf(s, "%s| CPU SC%02d ASID%7d|\n", prefix, i, inst->subcontext[i].pasid); | ||
67 | seq_printf(s, "%s| SC%02d PDB PTR: |\n", prefix, i); | ||
68 | seq_printf(s, "%s| %#018llx|\n", prefix, ((u64)inst->subcontext[i].pdb.page_dir_hi << 32) | ((u64)inst->subcontext[i].pdb.page_dir_lo << 12)); | ||
69 | seq_printf(s, "%s| %20s|\n", prefix, target_to_text(inst->subcontext[i].pdb.target)); | ||
70 | seq_printf(s, "%s| %20s|\n", prefix, inst->subcontext[i].pdb.is_volatile ? "volatile" : "non-volatile"); | ||
71 | // seq_printf(s, "%s|raw: %0#10lx|\n", prefix, inst->subcontext[i].pdb.raw); | ||
72 | } | ||
73 | |||
74 | // XXX: CTXSW is only accessible via PRAMIN. Accessing PRAMIN appears to | ||
75 | // either be broken, or race with the driver on Kepler (gk104 tested). So, | ||
76 | // do not attempt to touch the CTXSW block on Kepler. | ||
77 | // TODO: This check should be moved into addr_to_pramin_mut(). | ||
78 | if (g->chip_id < NV_CHIP_ID_MAXWELL) | ||
79 | return 0; | ||
80 | // End XXX | ||
81 | |||
82 | if (IS_ERR(ctxsw = get_ctxsw(g, inst))) { | ||
83 | #ifdef FALLBACK_TO_PRAMIN | ||
84 | nvdebug_writel(g, NV_XAL_EP_BAR0_WINDOW_BASE, win.raw); | ||
85 | #endif | ||
86 | return PTR_ERR(ctxsw); | ||
87 | } | ||
88 | // If unable to access CTXSW block, skip | ||
89 | if (!ctxsw) { | ||
90 | #ifdef FALLBACK_TO_PRAMIN | ||
91 | nvdebug_writel(g, NV_XAL_EP_BAR0_WINDOW_BASE, win.raw); | ||
92 | #endif | ||
93 | return 0; | ||
94 | } | ||
95 | // Access and print the preemption mode and context ID | ||
96 | seq_printf(s, "%s|= Context State =====|\n", prefix); | ||
97 | seq_printf(s, "%s| Ctx. ID: %#10x|\n", prefix, ctxsw->context_id); | ||
98 | // No other CTXSW fields are supported pre-Pascal | ||
99 | if (g->chip_id < NV_CHIP_ID_PASCAL) | ||
100 | return 0; | ||
101 | seq_printf(s, "%s| Gfx. Preemption:%4s|\n", prefix, | ||
102 | graphics_preempt_type_to_text(ctxsw->graphics_preemption_options)); | ||
103 | seq_printf(s, "%s| Cmp. Preemption:%4s|\n", prefix, | ||
104 | compute_preempt_type_to_text(ctxsw->compute_preemption_options)); | ||
105 | seq_printf(s, "%s| #WFI Saves:%9d|\n", prefix, ctxsw->num_wfi_save_operations); | ||
106 | seq_printf(s, "%s| #CTA Saves:%9d|\n", prefix, ctxsw->num_cta_save_operations); | ||
107 | seq_printf(s, "%s| #GFXP Saves:%8d|\n", prefix, ctxsw->num_gfxp_save_operations); | ||
108 | seq_printf(s, "%s| #CILP Saves:%8d|\n", prefix, ctxsw->num_cilp_save_operations); | ||
109 | #ifdef FALLBACK_TO_PRAMIN | ||
110 | nvdebug_writel(g, NV_XAL_EP_BAR0_WINDOW_BASE, win.raw); | ||
111 | #endif | ||
112 | return 0; | ||
113 | } | ||
114 | |||
10 | /* Print channel details using PCCSR (Programmable Channel Control System RAM?) | 115 | /* Print channel details using PCCSR (Programmable Channel Control System RAM?) |
11 | @param s Pointer to state from seq_file subsystem to pass to seq_printf | 116 | @param s Pointer to state from seq_file subsystem to pass to seq_printf |
12 | @param g Pointer to our internal GPU state | 117 | @param g Pointer to our internal GPU state |
@@ -32,16 +137,19 @@ static int runlist_detail_seq_show_chan(struct seq_file *s, struct nvdebug_state | |||
32 | seq_printf(s, "%s| %#018llx|\n", prefix, instance_ptr); | 137 | seq_printf(s, "%s| %#018llx|\n", prefix, instance_ptr); |
33 | seq_printf(s, "%s| %20s|\n", prefix, target_to_text(chan.inst_target)); | 138 | seq_printf(s, "%s| %20s|\n", prefix, target_to_text(chan.inst_target)); |
34 | seq_printf(s, "%s| Instance bound: %d|\n", prefix, chan.inst_bind); | 139 | seq_printf(s, "%s| Instance bound: %d|\n", prefix, chan.inst_bind); |
35 | return 0; | 140 | // Print instance block |
141 | return runlist_detail_seq_show_inst(s, g, prefix, instance_ptr, chan.inst_target); | ||
36 | } | 142 | } |
37 | 143 | ||
38 | /* `runlist_detail_seq_show_chan()`, but for Ampere+ | 144 | /* `runlist_detail_seq_show_chan()`, but for Ampere+ |
145 | @param instance_ptr Address for the channel instance block | ||
146 | @param instance_target Aperture of `instance_ptr` | ||
39 | @param runlist_pri_base Base of the RLRAM region for this runlist | 147 | @param runlist_pri_base Base of the RLRAM region for this runlist |
40 | 148 | ||
41 | `runlist_pri_base` is necessary, since Channel RAM is now per-runlist on | 149 | `runlist_pri_base` is necessary, since Channel RAM is now per-runlist on |
42 | Ampere+, and its location is configured in Runlist RAM. | 150 | Ampere+, and its location is configured in Runlist RAM. |
43 | */ | 151 | */ |
44 | static int runlist_detail_seq_show_chan_ga100(struct seq_file *s, struct nvdebug_state *g, uint32_t chid, char *prefix, uint32_t runlist_pri_base) { | 152 | static int runlist_detail_seq_show_chan_ga100(struct seq_file *s, struct nvdebug_state *g, uint32_t chid, char *prefix, uint32_t runlist_pri_base, uint64_t instance_ptr, enum INST_TARGET instance_target) { |
45 | runlist_channel_config_t channel_config; | 153 | runlist_channel_config_t channel_config; |
46 | channel_ctrl_ga100_t chan; | 154 | channel_ctrl_ga100_t chan; |
47 | 155 | ||
@@ -63,7 +171,7 @@ static int runlist_detail_seq_show_chan_ga100(struct seq_file *s, struct nvdebug | |||
63 | seq_printf(s, "%s| PBDMA Busy: %d|\n", prefix, chan.pbdma_busy); | 171 | seq_printf(s, "%s| PBDMA Busy: %d|\n", prefix, chan.pbdma_busy); |
64 | seq_printf(s, "%s| ENG Busy: %d|\n", prefix, chan.eng_busy); | 172 | seq_printf(s, "%s| ENG Busy: %d|\n", prefix, chan.eng_busy); |
65 | seq_printf(s, "%s| Acquire Fail: %d|\n", prefix, chan.acquire_fail); | 173 | seq_printf(s, "%s| Acquire Fail: %d|\n", prefix, chan.acquire_fail); |
66 | return 0; | 174 | return runlist_detail_seq_show_inst(s, g, prefix, instance_ptr, instance_target); |
67 | } | 175 | } |
68 | #endif | 176 | #endif |
69 | 177 | ||
@@ -173,7 +281,7 @@ static int runlist_file_seq_show(struct seq_file *s, void *raw_rl_iter) { | |||
173 | if (g->chip_id < NV_CHIP_ID_AMPERE) | 281 | if (g->chip_id < NV_CHIP_ID_AMPERE) |
174 | runlist_detail_seq_show_chan(s, g, chid(g, entry), indt); | 282 | runlist_detail_seq_show_chan(s, g, chid(g, entry), indt); |
175 | else | 283 | else |
176 | runlist_detail_seq_show_chan_ga100(s, g, chid(g, entry), indt, rl_iter->runlist_pri_base); | 284 | runlist_detail_seq_show_chan_ga100(s, g, chid(g, entry), indt, rl_iter->runlist_pri_base, instance_ptr, inst_target(g, entry)); |
177 | #endif | 285 | #endif |
178 | seq_printf(s, "%s+---------------------+\n", indt); | 286 | seq_printf(s, "%s+---------------------+\n", indt); |
179 | } | 287 | } |
@@ -232,15 +340,17 @@ struct file_operations preempt_tsg_file_ops = { | |||
232 | 340 | ||
233 | ssize_t resubmit_runlist_file_write(struct file *f, const char __user *buffer, | 341 | ssize_t resubmit_runlist_file_write(struct file *f, const char __user *buffer, |
234 | size_t count, loff_t *off) { | 342 | size_t count, loff_t *off) { |
235 | uint32_t target_runlist; | 343 | uint32_t target_runlist, target_offset; |
236 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; | 344 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; |
237 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | 345 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec |
238 | int err = kstrtou32_from_user(buffer, count, 0, &target_runlist); | 346 | int err = kstrtou32_from_user(buffer, count, 0, &target_offset); |
239 | if (err) | 347 | if (err) |
240 | return err; | 348 | return err; |
349 | // (Ab)use the PDE_DATA field for the runlist ID | ||
350 | target_runlist = file2gpuidx(f); | ||
241 | 351 | ||
242 | // resubmit_runlist() checks that target_runlist is valid | 352 | // resubmit_runlist() checks that target_runlist is valid |
243 | if ((err = resubmit_runlist(g, target_runlist))) | 353 | if ((err = resubmit_runlist(g, target_runlist, target_offset))) |
244 | return err; | 354 | return err; |
245 | 355 | ||
246 | return count; | 356 | return count; |
@@ -351,6 +461,54 @@ struct file_operations enable_channel_file_ops = { | |||
351 | .llseek = default_llseek, | 461 | .llseek = default_llseek, |
352 | }; | 462 | }; |
353 | 463 | ||
464 | ssize_t comm_preempt_channel_file_write(struct file *f, const char __user *buf, | ||
465 | size_t count, loff_t *off, | ||
466 | enum COMPUTE_PREEMPT_TYPE mode) { | ||
467 | uint32_t target_channel, target_runlist; | ||
468 | struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; | ||
469 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | ||
470 | int err = kstrtou32_from_user(buf, count, 0, &target_channel); | ||
471 | if (err) | ||
472 | return err; | ||
473 | // (Ab)use the PDE_DATA field used by file2gpuidx() for the runlist ID | ||
474 | target_runlist = file2gpuidx(f); | ||
475 | // Set preemption mode for the context of this channel | ||
476 | if ((err = set_channel_preemption_mode(g, target_channel, target_runlist, mode))) | ||
477 | return err; | ||
478 | |||
479 | return count; | ||
480 | } | ||
481 | |||
482 | ssize_t wfi_preempt_channel_file_write(struct file *f, const char __user *buf, | ||
483 | size_t count, loff_t *off) { | ||
484 | return comm_preempt_channel_file_write(f, buf, count, off, PREEMPT_WFI); | ||
485 | } | ||
486 | |||
487 | struct file_operations wfi_preempt_channel_file_ops = { | ||
488 | .write = wfi_preempt_channel_file_write, | ||
489 | .llseek = default_llseek, | ||
490 | }; | ||
491 | |||
492 | ssize_t cta_preempt_channel_file_write(struct file *f, const char __user *buf, | ||
493 | size_t count, loff_t *off) { | ||
494 | return comm_preempt_channel_file_write(f, buf, count, off, PREEMPT_CTA); | ||
495 | } | ||
496 | |||
497 | struct file_operations cta_preempt_channel_file_ops = { | ||
498 | .write = cta_preempt_channel_file_write, | ||
499 | .llseek = default_llseek, | ||
500 | }; | ||
501 | |||
502 | ssize_t cil_preempt_channel_file_write(struct file *f, const char __user *buf, | ||
503 | size_t count, loff_t *off) { | ||
504 | return comm_preempt_channel_file_write(f, buf, count, off, PREEMPT_CILP); | ||
505 | } | ||
506 | |||
507 | struct file_operations cil_preempt_channel_file_ops = { | ||
508 | .write = cil_preempt_channel_file_write, | ||
509 | .llseek = default_llseek, | ||
510 | }; | ||
511 | |||
354 | // Tested working on Pascal (gp106) through Ada (ad102) | 512 | // Tested working on Pascal (gp106) through Ada (ad102) |
355 | ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer, | 513 | ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer, |
356 | size_t count, loff_t *off) { | 514 | size_t count, loff_t *off) { |
@@ -419,11 +577,13 @@ ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer, | |||
419 | 577 | ||
420 | // TODO: Fix the above for bare channels. Add "for_chan_until_tsg"? | 578 | // TODO: Fix the above for bare channels. Add "for_chan_until_tsg"? |
421 | } | 579 | } |
580 | #warning switch_to_tsg has preempt_runlist omitted! | ||
581 | return count; | ||
422 | 582 | ||
423 | // Resubmit the runlist to ensure that changes to channel enablement are | 583 | // Resubmit the runlist to ensure that changes to channel enablement are |
424 | // picked up on Turing+ GPUs (channel enablements may not be otherwise). | 584 | // picked up on Turing+ GPUs (channel enablements may not be otherwise). |
425 | if (g->chip_id >= NV_CHIP_ID_TURING) | 585 | if (g->chip_id >= NV_CHIP_ID_TURING) |
426 | if ((err = resubmit_runlist(g, target_runlist))) | 586 | if ((err = resubmit_runlist(g, target_runlist, -1))) |
427 | return err; | 587 | return err; |
428 | 588 | ||
429 | // Trigger a runlist-level preempt to stop whatever was running, triggering | 589 | // Trigger a runlist-level preempt to stop whatever was running, triggering |
@@ -438,3 +598,470 @@ struct file_operations switch_to_tsg_file_ops = { | |||
438 | .write = switch_to_tsg_file_write, | 598 | .write = switch_to_tsg_file_write, |
439 | .llseek = default_llseek, | 599 | .llseek = default_llseek, |
440 | }; | 600 | }; |
601 | |||
602 | ssize_t preempt_runlist_file_write(struct file *f, const char __user *buffer, | ||
603 | size_t count, loff_t *off) { | ||
604 | uint32_t target_runlist; | ||
605 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; | ||
606 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | ||
607 | int err = kstrtou32_from_user(buffer, count, 0, &target_runlist); | ||
608 | if (err) | ||
609 | return err; | ||
610 | |||
611 | // TODO: Check runlist is in-range | ||
612 | if ((err = preempt_runlist(g, target_runlist))) | ||
613 | return err; | ||
614 | |||
615 | return count; | ||
616 | } | ||
617 | |||
618 | struct file_operations preempt_runlist_file_ops = { | ||
619 | .write = preempt_runlist_file_write, | ||
620 | .llseek = default_llseek, | ||
621 | }; | ||
622 | |||
623 | // Value written to this file is which runlist to ack the IRQ for | ||
624 | ssize_t ack_bad_tsg_file_write(struct file *f, const char __user *buffer, | ||
625 | size_t count, loff_t *off) { | ||
626 | uint32_t target_runlist; | ||
627 | uint32_t rl_ram_off; | ||
628 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; | ||
629 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | ||
630 | int err = kstrtou32_from_user(buffer, count, 0, &target_runlist); | ||
631 | if (err) | ||
632 | return err; | ||
633 | |||
634 | if ((err = get_runlist_ram(g, target_runlist, &rl_ram_off))) | ||
635 | return err; | ||
636 | |||
637 | nvdebug_writel(g, rl_ram_off + 0x100, 1 << 12); | ||
638 | |||
639 | return count; | ||
640 | } | ||
641 | |||
642 | struct file_operations ack_bad_tsg_file_ops = { | ||
643 | .write = ack_bad_tsg_file_write, | ||
644 | .llseek = default_llseek, | ||
645 | }; | ||
646 | |||
647 | // Rather than mapping all of BAR0, we just map: | ||
648 | // - On Pascal, Volta, Turing: MC_BOOT, PFIFO, PCCSR, PTOP | ||
649 | // - On Ampere: MC_BOOT, RAMRL(0), CHRAM(0), PTOP | ||
650 | // "All CUDA-managed pointers are within---the first 40 bits of the process's | ||
651 | // VA space" (Sec. 4.1, GPUDirect RDMA Documentation) | ||
652 | // - This means 0x00ff_ffff_ffff is the highest valid CUDA virtual address, | ||
653 | // and all higher addresses are unused. | ||
654 | // - So we use 0x6000_0000_0000+; this falls within the first PDE3 entry, and | ||
655 | // at the end of the PDE2 entries | ||
656 | // + Using the second PDE3 entry did not appear to work on Jetson (IIRC) | ||
657 | #define BAR0_USER_ADDR 0x0000700000000000llu | ||
658 | #define MEM_USER_ADDR 0x0000600000000000llu | ||
659 | |||
660 | /* Map all of GPU VRAM, and selected BAR0 regions, into a channel instance's | ||
661 | * virtual address space at predefined offsets (above). | ||
662 | * | ||
663 | * @param g Pointer to the nvdebug state for the selected GPU | ||
664 | * @param inst_ptr Dereferencible pointer to the channel's instance block | ||
665 | * @returns 0 on success, -errno on error | ||
666 | * | ||
667 | * Support: Pascal, Volta, Turing, Ampere | ||
668 | */ | ||
669 | int map_mem_for_instance(struct nvdebug_state *g, instance_ctrl_t *inst_ptr) { | ||
670 | int ret; | ||
671 | uintptr_t off, ram_size; | ||
672 | dma_addr_t bus_mc_boot_ram, bus_ptop_ram, bus_fifo_ram, bus_chan_ctrl_ram; | ||
673 | uint64_t mc_boot_ram, ptop_ram, fifo_ram, chan_ctrl_ram; | ||
674 | page_dir_config_t chan_pd_config; | ||
675 | memory_range_t mem_range; | ||
676 | uint32_t channel_ram_off, runlist_ram_off, channel_ram_size, bar0_base; | ||
677 | struct iommu_domain *dom; | ||
678 | |||
679 | if (g->chip_id >= NV_CHIP_ID_AMPERE) { | ||
680 | runlist_channel_config_t channel_config; | ||
681 | if ((ret = get_runlist_ram(g, 0, &runlist_ram_off))) { | ||
682 | printk(KERN_ERR "[nvdebug] %s: Unable to determine location of runlist0 RAM!\n", __func__); | ||
683 | return ret; | ||
684 | } | ||
685 | if (runlist_ram_off & 0xfff) { | ||
686 | printk(KERN_ERR "[nvdebug] %s: Runlist0 RAM is not page-aligned!\n", __func__); | ||
687 | return -EAFNOSUPPORT; | ||
688 | } | ||
689 | if ((channel_config.raw = nvdebug_readl(g, runlist_ram_off + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1) | ||
690 | return -EIO; | ||
691 | channel_ram_off = (uint32_t)channel_config.bar0_offset << 4; | ||
692 | if (channel_ram_off & 0xfff) { | ||
693 | printk(KERN_ERR "[nvdebug] %s: Runlist0 CHRAM is not page-aligned!\n", __func__); | ||
694 | return -EAFNOSUPPORT; | ||
695 | } | ||
696 | channel_ram_size = (1 << channel_config.num_channels_log2) * sizeof(channel_ctrl_ga100_t); | ||
697 | printk(KERN_DEBUG "[nvdebug] %s: Mapping CHRAM at %#018llx--%x and RLRAM at %#018llx--%x.\n", __func__, BAR0_USER_ADDR + channel_ram_off, channel_ram_size-1, BAR0_USER_ADDR + runlist_ram_off, 4095); | ||
698 | } else { | ||
699 | channel_ram_off = NV_PCCSR; | ||
700 | // MAX_CHID * sizeof(channel_ctrl_gf100_t) is < 4 KiB, so hardcode | ||
701 | channel_ram_size = 4096; | ||
702 | runlist_ram_off = NV_PFIFO; | ||
703 | } | ||
704 | |||
705 | // map_mem_by_chid() pulls the instance block via PRAMIN, so inst_ptr will | ||
706 | // be invalid after moving PRAMIN (eg. as part of a page table operation). | ||
707 | // To avoid accessing inst_ptr after invalidation, keep a copy of what we | ||
708 | // need. | ||
709 | chan_pd_config = inst_ptr->pdb; | ||
710 | |||
711 | // map_page_directory_v1() is unimplemented, precluding Maxwell (or older) | ||
712 | // support (as they don't support v2 page tables). | ||
713 | if (!chan_pd_config.is_ver2) | ||
714 | return -EOPNOTSUPP; | ||
715 | |||
716 | // Determine the size of GPU physical memory (VRAM). | ||
717 | if ((mem_range.raw = nvdebug_readl(g, NV_FB_MMU_LOCAL_MEMORY_RANGE)) == -1) | ||
718 | return -EIO; | ||
719 | ram_size = memory_range_to_bytes(mem_range); | ||
720 | |||
721 | // We map memory using huge pages, and thus do not support GPUs with | ||
722 | // non-2-MiB-divisible VID_MEM sizes. | ||
723 | if (ram_size % (1 << 21) != 0) { | ||
724 | printk(KERN_ERR "[nvdebug] %s: GPU VID_MEM of %lu bytes is not a multiple of 2 MiB!\n", __func__, ram_size); | ||
725 | return -EAFNOSUPPORT; | ||
726 | } | ||
727 | |||
728 | // Map all of physical GPU memory (VID_MEM) into this channels's GPU virtual | ||
729 | // address space using huge (2 MiB) pages. | ||
730 | for (off = 0; off < ram_size; off += (1 << 21)) { | ||
731 | if ((ret = map_page_directory(g, chan_pd_config, | ||
732 | MEM_USER_ADDR + off, off, TARGET_VID_MEM, true)) < 0) | ||
733 | return ret; | ||
734 | // If the mapping already exists for this page directory, the other | ||
735 | // mappings should already exist, and can be skipped. | ||
736 | if (ret == 1) { | ||
737 | printk(KERN_INFO "[nvdebug] %s: VRAM mapping from %llx to %lx already exists. Assuming all mappings already exist and returning early...\n", __func__, MEM_USER_ADDR + off, off); | ||
738 | return 0; | ||
739 | } | ||
740 | } | ||
741 | |||
742 | // Map Channel RAM to a GPU-accessible bus address (gets past any IOMMU or | ||
743 | // IOVA layers), then map that address into this channel's GPU virtual | ||
744 | // address space. NV_PCCSR_CHANNEL_INST(0) is 4k-aligned, so it can be | ||
745 | // directly mapped. | ||
746 | // XXX: All these mappings are currently returning -1 on all reads on | ||
747 | // sunlight, jbakita-old, jetson-xavier, jetson-orin, and bonham, | ||
748 | // which seems to be returned from the PCIe root (on PCIe GPUs). | ||
749 | if (g->pcid) | ||
750 | bar0_base = pci_resource_start(g->pcid, 0); | ||
751 | else if (g->platd) | ||
752 | bar0_base = platform_get_resource(g->platd, IORESOURCE_MEM, 0)->start; | ||
753 | else | ||
754 | return -ENOTRECOVERABLE; | ||
755 | mc_boot_ram = NV_MC_BOOT_0 + bar0_base; | ||
756 | // PTOP fits within a page, but not page-aligned; round down. | ||
757 | ptop_ram = (NV_PTOP & ~0xfffu) + bar0_base; | ||
758 | fifo_ram = runlist_ram_off + bar0_base; | ||
759 | chan_ctrl_ram = channel_ram_off + bar0_base; | ||
760 | |||
761 | // Check if GPU-accessible bus addresses are the same as CPU-visible physical | ||
762 | // addresses. Logic from amdgpu_device_check_iommu_direct_map(). | ||
763 | dom = iommu_get_domain_for_dev(g->dev); | ||
764 | if (!dom || dom->type == IOMMU_DOMAIN_IDENTITY) { | ||
765 | // Used for: jbakita-old, sunlight, jetson-xavier, jetson-orin integrated, bonham, ? | ||
766 | // (For all these, reads on the mapping return only -1.) | ||
767 | // (Forcing these through dma_map_resource()/iommu_map() changes nothing) | ||
768 | // (Note that the `ls -l /sys/class/iommu/*/devices` also reports that the | ||
769 | // GPU is not available under the I/O MMU on these platforms.) | ||
770 | // To fix this, please enable AMD-Vi/ARM SMMU/Intel VT-d in your BIOS | ||
771 | // settings, UEFI settings, or device-tree file. Supported on: | ||
772 | // - AMD: Bulldozer+ (or Phenom II w/ 890FX or 990FX Chipset) | ||
773 | // - Intel: Most since Core2 Duo | ||
774 | // Note that while the Jetson Orin has an SMMU (I/O MMU), the GPU does not | ||
775 | // appear to be configured by any pre-provided device tree files to use the | ||
776 | // SMMU. | ||
777 | printk(KERN_INFO "[nvdebug] map_mem_ctxid: I/O MMU is unavailable/disabled for GPU %x. Assuming phys and bus addresses are identical...\n", g->chip_id); | ||
778 | bus_mc_boot_ram = mc_boot_ram; | ||
779 | bus_ptop_ram = ptop_ram; | ||
780 | bus_fifo_ram = fifo_ram; | ||
781 | bus_chan_ctrl_ram = chan_ctrl_ram; | ||
782 | } else { | ||
783 | printk(KERN_INFO "[nvdebug] map_mem_ctxid: I/O MMU is enabled. Attempting to use dma_map_resource()...\n"); | ||
784 | // Used for: tama, yamaha | ||
785 | // Fails on tama, yamaha | ||
786 | // (Works on jetson-xavier, jetson-orin and bonham, but appears to be a no-op, and | ||
787 | // yields inaccessible memory. Get `mc-err: (255) csr_nvl7r: EMEM address decode error` | ||
788 | // on access on jetson boards, and a -1 read on all.) | ||
789 | bus_mc_boot_ram = dma_map_resource(g->dev, mc_boot_ram, 4096*2 /* *2 is a XXX hack to include PBUS */, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); | ||
790 | bus_ptop_ram = dma_map_resource(g->dev, ptop_ram, 4096, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); | ||
791 | bus_fifo_ram = dma_map_resource(g->dev, fifo_ram, 4096*8 /* *8 is a XXX hack */, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); | ||
792 | bus_chan_ctrl_ram = dma_map_resource(g->dev, chan_ctrl_ram, 2*4096, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); | ||
793 | if (dma_mapping_error(g->dev, bus_mc_boot_ram) || | ||
794 | dma_mapping_error(g->dev, bus_ptop_ram) || | ||
795 | dma_mapping_error(g->dev, bus_fifo_ram) || | ||
796 | dma_mapping_error(g->dev, bus_chan_ctrl_ram)) { | ||
797 | // Used for: tama, yamaha | ||
798 | printk(KERN_WARNING "[nvdebug] map_mem_ctxid: Unable to map BAR0 addresses to device-accessible addresses via dma_map_resource(). Return codes: %d for MC_BOOT, %d for PFIFO, %d for PCCSR.\n", | ||
799 | dma_mapping_error(g->dev, bus_mc_boot_ram), | ||
800 | dma_mapping_error(g->dev, bus_fifo_ram), | ||
801 | dma_mapping_error(g->dev, bus_chan_ctrl_ram)); | ||
802 | // This fallback does not appear to work on jbakita-old (5.4, GART IOMMU), but works on tama | ||
803 | if (!get_dma_ops(g->dev)) | ||
804 | printk(KERN_WARNING "[nvdebug] Reason: No DMA `ops`, and direct mapping failed.\n"); | ||
805 | else if (!get_dma_ops(g->dev)->map_resource) | ||
806 | // Fires on: tama, yamaha | ||
807 | printk(KERN_WARNING "[nvdebug] Reason: `map_resource` function undefined on this platform.\n"); | ||
808 | if (!dom) { | ||
809 | printk(KERN_ERR "[nvdebug] map_mem_ctxid: No I/O MMU available and dma_map_resource() failed. Aborting mapping of BAR0 regions!\n"); | ||
810 | return -ENOTRECOVERABLE; | ||
811 | } | ||
812 | printk(KERN_INFO "[nvdebug] map_mem_ctxid: Trying to fall back to direct I/O MMU manipulation...\n"); | ||
813 | // XXX: Fallback to directly creating the I/O MMU mappings. | ||
814 | // This is necessary. Directly accessing BAR0 addresses throws I/O MMU | ||
815 | // errors in the kernel log on yamaha. | ||
816 | // See also: comment on kfd_mem_dmamap_sg_bo() in amdgpu | ||
817 | // Note: dma_map_resource -> map_resource -> [arm_]iommu_map_resource | ||
818 | // -> __iommu_dma_map -> iommu_map is the happy-path, but this seems to | ||
819 | // regularly fail, even though the iommu_map path works. One key | ||
820 | // difference is that the dma_map_resource() path also includes | ||
821 | // IOMMU_MMIO in the iommu_map() flags. | ||
822 | bus_mc_boot_ram = mc_boot_ram; | ||
823 | bus_ptop_ram = ptop_ram; | ||
824 | bus_fifo_ram = fifo_ram; | ||
825 | bus_chan_ctrl_ram = chan_ctrl_ram; | ||
826 | // Create identity mapping | ||
827 | ret = iommu_map(dom, mc_boot_ram, mc_boot_ram, 4096*2 /* *2 is a hack to fit in PBUS*/, IOMMU_READ | IOMMU_WRITE); | ||
828 | if (ret < 0) { | ||
829 | printk(KERN_ERR "[nvdebug] map_mem_ctxid: Attempt to bypass and go directly to I/O MMU failed for MC_BOOT!\n"); | ||
830 | return ret; | ||
831 | } | ||
832 | ret = iommu_map(dom, ptop_ram, ptop_ram, 4096, IOMMU_READ | IOMMU_WRITE); | ||
833 | if (ret < 0) { | ||
834 | printk(KERN_ERR "[nvdebug] map_mem_ctxid: Attempt to bypass and go directly to I/O MMU failed for PTOP!\n"); | ||
835 | return ret; | ||
836 | } | ||
837 | ret = iommu_map(dom, fifo_ram, fifo_ram, 4096*8 /* *8 is XXX hack*/, IOMMU_READ | IOMMU_WRITE); | ||
838 | if (ret < 0) { | ||
839 | printk(KERN_ERR "[nvdebug] map_mem_ctxid: Attempt to bypass and go directly to I/O MMU failed for FIFO!\n"); | ||
840 | return ret; | ||
841 | } | ||
842 | ret = iommu_map(dom, chan_ctrl_ram, chan_ctrl_ram, channel_ram_size, IOMMU_READ | IOMMU_WRITE); | ||
843 | if (ret < 0) { | ||
844 | printk(KERN_ERR "[nvdebug] map_mem_ctxid: Attempt to bypass and go directly to I/O MMU failed for PCCSR!\n"); | ||
845 | return ret; | ||
846 | } | ||
847 | } | ||
848 | } | ||
849 | // TARGET_SYS_MEM_NONCOHERENT tells the GPU to bypass the CPU L2 cache for | ||
850 | // accesses to this memory. | ||
851 | // "Clients should normally use [SYS_MEM_NON_COHERENT]" (nvgpu) | ||
852 | // | ||
853 | // "Non-coherent system memory. | ||
854 | // (GPU) MMU will NOT maintain coherence with CPU L2 cache. | ||
855 | // Higher-level APIs should only allow this when it is known | ||
856 | // the memory is not cacheable by CPU or the coherency is | ||
857 | // managed explicitly (e.g. w/ flushes in SW). | ||
858 | // Also consider that this path is not necessarily faster." (open-gpu-kernel-modules) | ||
859 | // | ||
860 | // "Coherent system memory. | ||
861 | // (GPU) MMU will snoop CPU L2 cache if possible. | ||
862 | // This is usually the safer choice over NONCOH since it works | ||
863 | // whether the memory is cached by CPU L2 or not. | ||
864 | // On some CPU architectures going through CPU L2 may | ||
865 | // even be faster than the non-coherent path." (open-gpu-kernel-modules) | ||
866 | // | ||
867 | // I suspect that that for SYS_MEM_NONCOHERENT mappings, the "no snoop" | ||
868 | // attribute bit will be set on associated PCIe read/write transactions. | ||
869 | // | ||
870 | // The only other bits in a PCIe read/write transaction that could be | ||
871 | // relevant are the two AT (Address Translation) bits added in PCIe 2.0. | ||
872 | if ((ret = map_page_directory(g, chan_pd_config, BAR0_USER_ADDR + NV_MC_BOOT_0, | ||
873 | bus_mc_boot_ram, TARGET_SYS_MEM_NONCOHERENT, false)) < 0) | ||
874 | return ret; | ||
875 | // XXX | ||
876 | if ((ret = map_page_directory(g, chan_pd_config, BAR0_USER_ADDR + NV_MC_BOOT_0 + 4096, | ||
877 | bus_mc_boot_ram + 4096, TARGET_SYS_MEM_NONCOHERENT, false)) < 0) | ||
878 | return ret; | ||
879 | if ((ret = map_page_directory(g, chan_pd_config, BAR0_USER_ADDR + (NV_PTOP & ~0xfffu), | ||
880 | bus_ptop_ram, TARGET_SYS_MEM_NONCOHERENT, false)) < 0) | ||
881 | return ret; | ||
882 | if ((ret = map_page_directory(g, chan_pd_config, BAR0_USER_ADDR + runlist_ram_off, | ||
883 | bus_fifo_ram, TARGET_SYS_MEM_NONCOHERENT, false)) < 0) | ||
884 | return ret; | ||
885 | // XXX | ||
886 | for (off = 4096; off < 8*4096; off += 4096) | ||
887 | if ((ret = map_page_directory(g, chan_pd_config, BAR0_USER_ADDR + runlist_ram_off+off, | ||
888 | bus_fifo_ram+off, TARGET_SYS_MEM_NONCOHERENT, false)) < 0) | ||
889 | return ret; | ||
890 | // Channel control RAM can span two or more pages on Ampere+ | ||
891 | for (off = 0; off < channel_ram_size; off += 4096) | ||
892 | if ((ret = map_page_directory(g, chan_pd_config, BAR0_USER_ADDR + channel_ram_off + off, | ||
893 | bus_chan_ctrl_ram + off, TARGET_SYS_MEM_NONCOHERENT, false)) < 0) | ||
894 | return ret; | ||
895 | return 0; | ||
896 | } | ||
897 | |||
898 | // Map by context ID | ||
899 | // See constituent functions for info on what they do; comments not repeated. | ||
900 | // Tested on Pascal, Volta, Turing, and Kepler | ||
901 | ssize_t map_mem_ctxid_file_write(struct file *f, const char __user *buffer, | ||
902 | size_t count, loff_t *off) { | ||
903 | int err, target_context, target_runlist; | ||
904 | loff_t pos; | ||
905 | uint64_t instance_ptr; | ||
906 | enum INST_TARGET instance_target; | ||
907 | struct runlist_iter rl_iter; | ||
908 | instance_ctrl_t *inst; | ||
909 | context_switch_ctrl_t *ctx_block; | ||
910 | struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; | ||
911 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | ||
912 | if ((err = kstrtou32_from_user(buffer, count, 0, &target_context))) | ||
913 | return err; | ||
914 | target_runlist = file2gpuidx(f); | ||
915 | |||
916 | // Get dereferencable pointer to the runlist | ||
917 | if ((err = get_runlist_iter(g, target_runlist, &rl_iter))) | ||
918 | return err; | ||
919 | // Find a channel in the runlist matching the provided context ID | ||
920 | for (pos = 0; pos < rl_iter.len; pos++, rl_iter.curr_entry += NV_RL_ENTRY_SIZE(g)) { | ||
921 | uint32_t ctxsw_timeout_pri_base = NV_PFIFO_ENG_CTXSW_TIMEOUT; | ||
922 | if (entry_type(g, rl_iter.curr_entry) == ENTRY_TYPE_TSG) | ||
923 | continue; | ||
924 | // Get instance block address | ||
925 | if (g->chip_id >= NV_CHIP_ID_AMPERE) { | ||
926 | instance_ptr = ((struct gv100_runlist_chan*)rl_iter.curr_entry)->inst_ptr_hi; | ||
927 | instance_ptr <<= 32; | ||
928 | instance_ptr |= (uint64_t)inst_ptr_lo(g, rl_iter.curr_entry) << 12; | ||
929 | instance_target = inst_target(g, rl_iter.curr_entry); | ||
930 | ctxsw_timeout_pri_base = rl_iter.runlist_pri_base + NV_RUNLIST_ENGINE_CTXSW_TIMEOUT_CONFIG(0); | ||
931 | } else { | ||
932 | channel_ctrl_t chan; | ||
933 | chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chid(g, rl_iter.curr_entry))); | ||
934 | if (chan.raw == -1) | ||
935 | return -EIO; | ||
936 | instance_ptr = (uint64_t)chan.inst_ptr << 12; | ||
937 | instance_target = chan.inst_target; | ||
938 | } | ||
939 | // Skip channels with unconfigured or INVALID instance blocks | ||
940 | if (!instance_ptr || instance_target == 1) { | ||
941 | printk(KERN_WARNING "[nvdebug] Channel %d is in runlist %d, but " | ||
942 | "lacks a valid instance block", chid(g, rl_iter.curr_entry), | ||
943 | target_runlist); | ||
944 | continue; | ||
945 | } | ||
946 | |||
947 | // Get a dereferencable pointer to the instance block | ||
948 | if (IS_ERR(inst = instance_deref(g, instance_ptr, instance_target))) | ||
949 | return PTR_ERR(inst); | ||
950 | // If unable to access instance block, skip | ||
951 | if (!inst) | ||
952 | continue; | ||
953 | |||
954 | // Get dereferencable pointer to CTXSW block | ||
955 | if (IS_ERR(ctx_block = get_ctxsw(g, inst))) | ||
956 | return PTR_ERR(ctx_block); | ||
957 | // If unable to access CTXSW block, skip | ||
958 | if (!ctx_block) | ||
959 | continue; | ||
960 | // Check if the context ID matches | ||
961 | if (ctx_block->context_id != target_context) | ||
962 | continue; | ||
963 | |||
964 | // XXX: Disable the context switch timeout while we're here | ||
965 | ctxsw_timeout_t timeout_config; | ||
966 | if ((timeout_config.raw = nvdebug_readl(g, ctxsw_timeout_pri_base)) == -1) | ||
967 | return -EIO; | ||
968 | timeout_config.enabled = 0; | ||
969 | nvdebug_writel(g, ctxsw_timeout_pri_base, timeout_config.raw); | ||
970 | // XXX: Attempt setting preemption mode while we're here | ||
971 | ctx_block->compute_preemption_options = PREEMPT_CTA; | ||
972 | |||
973 | // Map memory and return | ||
974 | if ((err = map_mem_for_instance(g, inst)) < 0) | ||
975 | return err; | ||
976 | return count; | ||
977 | } | ||
978 | return -ESRCH; | ||
979 | } | ||
980 | |||
981 | struct file_operations map_mem_ctxid_file_ops = { | ||
982 | .write = map_mem_ctxid_file_write, | ||
983 | .llseek = default_llseek, | ||
984 | }; | ||
985 | |||
986 | // Map by channel ID (LEGACY; unclear if this needs to be kept) | ||
987 | // Support: Pascal, Volta, and Turing only | ||
988 | ssize_t map_mem_chid_file_write(struct file *f, const char __user *buffer, | ||
989 | size_t count, loff_t *off) { | ||
990 | int ret, target_channel; | ||
991 | struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; | ||
992 | channel_ctrl_t chan; | ||
993 | instance_ctrl_t *inst_ptr; | ||
994 | bool all = false; | ||
995 | uint64_t inst_ptr_off; | ||
996 | page_dir_config_t bar2_pd_config; | ||
997 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | ||
998 | if ((ret = kstrtos32_from_user(buffer, count, 0, &target_channel))) | ||
999 | return ret; | ||
1000 | |||
1001 | if (g->chip_id >= NV_CHIP_ID_AMPERE) | ||
1002 | return -ENOSYS; | ||
1003 | |||
1004 | // This API is for nvsched, which is only supported on GPUs which support | ||
1005 | // instruction-level preemption (Pascal+). | ||
1006 | if (g->chip_id < NV_CHIP_ID_PASCAL) | ||
1007 | return -EOPNOTSUPP; | ||
1008 | |||
1009 | if (target_channel > MAX_CHID) | ||
1010 | return -ERANGE; | ||
1011 | |||
1012 | // Passing -1 indicates that all channels should be mapped | ||
1013 | if (target_channel == -1) { | ||
1014 | all = true; | ||
1015 | target_channel = 0; | ||
1016 | } | ||
1017 | |||
1018 | do { | ||
1019 | printk(KERN_INFO "[nvdebug] Mapping channel %d\n", target_channel); | ||
1020 | // Read the channel's configuration block, which includes the address of | ||
1021 | // this channel's instance block, which contains a page table pointer. | ||
1022 | // TODO: Verify this works with the channel RAM changes on Ampere+ | ||
1023 | chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel)); | ||
1024 | if (chan.raw == -1) | ||
1025 | return -EIO; | ||
1026 | |||
1027 | // If the instance pointer is unconfigured or the target is 1 (INVALID), | ||
1028 | // this channel is not in-use on any runlist and can be skipped. | ||
1029 | if (chan.inst_ptr == 0 || chan.inst_target == 1) | ||
1030 | continue; | ||
1031 | |||
1032 | // Find page tables which define how BAR2 offsets are tranlated to physical | ||
1033 | // VID_MEM/SYS_MEM addresses. (We have to do this every time since we reset | ||
1034 | // PRAMIN.) | ||
1035 | if ((ret = get_bar2_pdb(g, &bar2_pd_config)) < 0) | ||
1036 | return ret; | ||
1037 | |||
1038 | // Pascal+ GPUs use Version 2 page tables, so this shouldn't be a problem | ||
1039 | if (!bar2_pd_config.is_ver2) | ||
1040 | return -ENOSYS; | ||
1041 | |||
1042 | // To read the instance block, first find where it is mapped in BAR2 | ||
1043 | if ((inst_ptr_off = search_page_directory(g, bar2_pd_config, (u64)chan.inst_ptr << 12, chan.inst_target)) == 0) { | ||
1044 | // If no mapping can be found in BAR2, fallback to accessing the | ||
1045 | // instance block via the PRAMIN window. | ||
1046 | printk(KERN_WARNING "[nvdebug] Warning: Channel %d has no instance " | ||
1047 | "block mapped in BAR2. Falling back to PRAMIN...\n", target_channel); | ||
1048 | if ((ret = addr_to_pramin_mut(g, (u64)chan.inst_ptr << 12, chan.inst_target)) < 0) | ||
1049 | return -EOPNOTSUPP; | ||
1050 | inst_ptr = g->regs + NV_PRAMIN + ret; | ||
1051 | } else { | ||
1052 | inst_ptr = g->bar2 + inst_ptr_off; | ||
1053 | } | ||
1054 | |||
1055 | if ((ret = map_mem_for_instance(g, inst_ptr))) | ||
1056 | return ret; | ||
1057 | |||
1058 | // If mapping all channels, start again at the next one | ||
1059 | } while (all && ++target_channel <= MAX_CHID); | ||
1060 | |||
1061 | return count; | ||
1062 | } | ||
1063 | |||
1064 | struct file_operations map_mem_chid_file_ops = { | ||
1065 | .write = map_mem_chid_file_write, | ||
1066 | .llseek = default_llseek, | ||
1067 | }; | ||