aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--bus.c73
-rw-r--r--nvdebug.h67
2 files changed, 111 insertions, 29 deletions
diff --git a/bus.c b/bus.c
index c4d991e..f2369eb 100644
--- a/bus.c
+++ b/bus.c
@@ -24,10 +24,11 @@
24/* Obtain the PRAMIN offset at which `addr` can be accessed 24/* Obtain the PRAMIN offset at which `addr` can be accessed
25 @param addr Address to find 25 @param addr Address to find
26 @param target Which address space to use (VRAM, SYS_MEM, PEER(?)) 26 @param target Which address space to use (VRAM, SYS_MEM, PEER(?))
27 @return positive offset or -EINVAL on invalid arguments 27 @return positive offset, -EINVAL on invalid arguments, or -EOPNOTSUPP on
28 an unsupported platform.
28 29
29 Note: Will move the PRAMIN window to accomodate the request. Only guarantees 30 Note: Will move the PRAMIN window to accomodate the request. Only guarantees
30 that the surrounding 64KiB window will be accessible. 31 that the surrounding 64-KiB-aligned window will be accessible.
31 Note: Moving the PRAMIN window will cause problems if it races with driver 32 Note: Moving the PRAMIN window will cause problems if it races with driver
32 code that tries to do the same, or expects the window not to move. 33 code that tries to do the same, or expects the window not to move.
33 Bugs: Untested on PEER. 34 Bugs: Untested on PEER.
@@ -36,6 +37,7 @@ int addr_to_pramin_mut(struct nvdebug_state *g,
36 uint64_t addr, enum INST_TARGET target) { 37 uint64_t addr, enum INST_TARGET target) {
37 bar0_window_t window; 38 bar0_window_t window;
38 uint64_t pramin_base; 39 uint64_t pramin_base;
40 uint32_t window_reg;
39 // For us, accuracy and robustness is more important than speed 41 // For us, accuracy and robustness is more important than speed
40 // Check that the address is valid (49 bits are addressable on-GPU, but 42 // Check that the address is valid (49 bits are addressable on-GPU, but
41 // PRAMIN only supports up to 40 bits). 43 // PRAMIN only supports up to 40 bits).
@@ -44,21 +46,38 @@ int addr_to_pramin_mut(struct nvdebug_state *g,
44 addr, __func__); 46 addr, __func__);
45 return -EINVAL; 47 return -EINVAL;
46 } 48 }
47 window.raw = nvdebug_readl(g, NV_PBUS_BAR0_WINDOW); 49 // Register relocated on Hopper and Blackwell+
48 if (window.target != target) 50 if ((g->chip_id >= NV_CHIP_ID_HOPPER && g->chip_id < NV_CHIP_ID_ADA) || g->chip_id >= NV_CHIP_ID_BLACKWELL)
49 goto relocate; 51 window_reg = NV_XAL_EP_BAR0_WINDOW_BASE;
52 else
53 window_reg = NV_PBUS_BAR0_WINDOW;
54 if ((window.raw = nvdebug_readl(g, window_reg)) == -1) {
55 printk(KERN_ERR "[nvdebug] PRAMIN window configuration inaccessible; "
56 "failing %s\n", __func__);
57 return -EOPNOTSUPP;
58 }
59 if (window.target != target) {
60 // On Hopper and Blackwell+, the window always points at VID_MEM
61 if ((g->chip_id >= NV_CHIP_ID_HOPPER && g->chip_id < NV_CHIP_ID_ADA) || g->chip_id >= NV_CHIP_ID_BLACKWELL)
62 return -EOPNOTSUPP;
63 else
64 goto relocate;
65 }
50 pramin_base = ((uint64_t)window.base) << 16; 66 pramin_base = ((uint64_t)window.base) << 16;
51 if (addr < pramin_base || addr > pramin_base + NV_PRAMIN_LEN) 67 if (addr < pramin_base || addr > pramin_base + NV_PRAMIN_LEN)
52 goto relocate; 68 goto relocate;
53 return addr - pramin_base; // Guaranteed to be < 1MiB, so safe for int 69 return addr - pramin_base; // Guaranteed to be < 1MiB, so safe for int
54relocate: 70relocate:
55 printk(KERN_INFO "[nvdebug] Moving PRAMIN win from base %llx (%s) to %llx (%s) to accomodate %#018llx\n", pramin_base, target_to_text(window.target), (addr >> 16) << 16, target_to_text(target), addr); 71 printk(KERN_INFO "[nvdebug] [SIDE EFFECT] Moving PRAMIN window from base "
72 "%llx (%s) to %llx (%s) to accomodate %#018llx\n",
73 ((uint64_t)window.base) << 16, target_to_text(window.target),
74 (addr >> 16) << 16, target_to_text(target), addr);
56 // Move PRAMIN window to a 64KiB-aligned address 75 // Move PRAMIN window to a 64KiB-aligned address
57 window.base = (u32)(addr >> 16); // Safe, due to above range check 76 window.base = (u32)(addr >> 16); // Safe, due to above range check
58 window.target = target; 77 window.target = target;
59 nvdebug_writel(g, NV_PBUS_BAR0_WINDOW, window.raw); 78 nvdebug_writel(g, window_reg, window.raw);
60 // Wait for the window to move by re-reading (as done in nvgpu driver) 79 // Wait for the window to move by re-reading (as done in nvgpu driver)
61 (void) nvdebug_readl(g, NV_PBUS_BAR0_WINDOW); 80 (void) nvdebug_readl(g, window_reg);
62 return (int)(addr & 0xffffull); 81 return (int)(addr & 0xffffull);
63} 82}
64 83
@@ -72,7 +91,9 @@ relocate:
72*/ 91*/
73int get_bar2_pdb(struct nvdebug_state *g, page_dir_config_t* pd) { 92int get_bar2_pdb(struct nvdebug_state *g, page_dir_config_t* pd) {
74 int ret; 93 int ret;
75 bar_config_block_t bar2_block; 94 uint64_t bar2_ptr;
95 enum INST_TARGET bar2_target;
96 bool bar2_is_virtual;
76 97
77 if (!pd) 98 if (!pd)
78 return -EINVAL; 99 return -EINVAL;
@@ -85,17 +106,37 @@ int get_bar2_pdb(struct nvdebug_state *g, page_dir_config_t* pd) {
85 // hierarchy used to translate BAR2 offsets to VRAM or SYS_MEM addresses. 106 // hierarchy used to translate BAR2 offsets to VRAM or SYS_MEM addresses.
86 107
87 // Determine location of BAR2 instance block 108 // Determine location of BAR2 instance block
88 if ((bar2_block.raw = nvdebug_readl(g, NV_PBUS_BAR2_BLOCK)) == -1) { 109 if ((g->chip_id >= NV_CHIP_ID_HOPPER && g->chip_id < NV_CHIP_ID_ADA) || g->chip_id >= NV_CHIP_ID_BLACKWELL) {
89 printk(KERN_ERR "[nvdebug] Unable to read BAR2/3 configuration! BAR2/3 inaccessible.\n"); 110 // Register layout updated on Hopper and Blackwell+ to support 52-bit
90 return -EOPNOTSUPP; 111 // instance block pointers (vs. 40 bits before)
112 bar_config_block_gh100_t bar2_block;
113 if ((bar2_block.raw = nvdebug_readq(g, NV_VIRTUAL_FUNCTION_PRIV_FUNC_BAR2_BLOCK)) == -1) {
114 printk(KERN_ERR "[nvdebug] Unable to read BAR2/3 configuration! BAR2/3 inaccessible.\n");
115 return -EOPNOTSUPP;
116 }
117 bar2_ptr = (uint64_t)bar2_block.ptr << 12;
118 bar2_target = bar2_block.target;
119 bar2_is_virtual = bar2_block.is_virtual;
120 } else {
121 bar_config_block_t bar2_block;
122 if ((bar2_block.raw = nvdebug_readl(g, NV_PBUS_BAR2_BLOCK)) == -1) {
123 printk(KERN_ERR "[nvdebug] Unable to read BAR2/3 configuration! BAR2/3 inaccessible.\n");
124 return -EOPNOTSUPP;
125 }
126 bar2_ptr = (uint64_t)bar2_block.ptr << 12;
127 bar2_target = bar2_block.target;
128 bar2_is_virtual = bar2_block.is_virtual;
91 } 129 }
92 printk(KERN_INFO "[nvdebug] BAR2 inst block @ %llx in %s's %s address space.\n", ((u64)bar2_block.ptr) << 12, target_to_text(bar2_block.target), bar2_block.is_virtual ? "virtual" : "physical"); 130 printk(KERN_INFO "[nvdebug] BAR2 inst block @ %llx in %s's %s address space.\n", bar2_ptr, target_to_text(bar2_target), bar2_is_virtual ? "virtual" : "physical");
93 // Setup PRAMIN to point at the BAR2 instance block 131 // Setup PRAMIN to point at the BAR2 instance block
94 if ((ret = addr_to_pramin_mut(g, (uint64_t)bar2_block.ptr << 12, bar2_block.target)) < 0) { 132 // TODO: This won't work if the instance block is in SYS_MEM on Hopper or
95 printk(KERN_ERR "[nvdebug] Invalid BAR2/3 Instance Block configuration! BAR2/3 inaccessible.\n"); 133 // Blackwell+. Going through the I/O MMU appears to be fairly
134 // reliable, so I need to switch to using that logic whenever
135 // SYS_MEM may be accessed.
136 if ((ret = addr_to_pramin_mut(g, bar2_ptr, bar2_target)) < 0) {
137 printk(KERN_ERR "[nvdebug] Unable to access BAR2/3 Instance Block configuration via PRAMIN! BAR2/3 inaccessible.\n");
96 return ret; 138 return ret;
97 } 139 }
98 printk(KERN_INFO "[nvdebug] BAR2 inst block at off %x in PRAMIN\n", ret);
99 // Pull the page directory base configuration from the instance block 140 // Pull the page directory base configuration from the instance block
100 if ((pd->raw = nvdebug_readq(g, NV_PRAMIN + ret + NV_PRAMIN_PDB_CONFIG_OFF)) == -1) { 141 if ((pd->raw = nvdebug_readq(g, NV_PRAMIN + ret + NV_PRAMIN_PDB_CONFIG_OFF)) == -1) {
101 printk(KERN_ERR "[nvdebug] Unable to read BAR2/3 PDB configuration! BAR2/3 inaccessible.\n"); 142 printk(KERN_ERR "[nvdebug] Unable to read BAR2/3 PDB configuration! BAR2/3 inaccessible.\n");
diff --git a/nvdebug.h b/nvdebug.h
index f644500..409b013 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -484,6 +484,7 @@ typedef union {
484#define NV_CHIP_ID_AMPERE 0x170 484#define NV_CHIP_ID_AMPERE 0x170
485#define NV_CHIP_ID_HOPPER 0x180 485#define NV_CHIP_ID_HOPPER 0x180
486#define NV_CHIP_ID_ADA 0x190 486#define NV_CHIP_ID_ADA 0x190
487#define NV_CHIP_ID_BLACKWELL 0x1A0
487 488
488inline static const char* ARCH2NAME(uint32_t arch) { 489inline static const char* ARCH2NAME(uint32_t arch) {
489 switch (arch) { 490 switch (arch) {
@@ -521,14 +522,18 @@ inline static const char* ARCH2NAME(uint32_t arch) {
521 return "Turing"; 522 return "Turing";
522 case 0x17: 523 case 0x17:
523 return "Ampere"; 524 return "Ampere";
524 case 0x18: 525 case 0x18: // Despite the Chip ID, Hopper functionally proceeds Ada
525 return "Hopper"; 526 return "Hopper";
526 case 0x19: 527 case 0x19:
527 return "Ada Lovelace"; 528 return "Ada Lovelace";
528 case 0x20: 529 case 0x1A:
529 return "Blackwell (?)"; 530 return "Blackwell";
531 case 0x1B:
532 return "Rubin (?)";
533 case 0x1F: // NVIDIA-internal simulator
534 return "AMODEL";
530 default: 535 default:
531 if (arch < 0x19) 536 if (arch < 0x1A)
532 return "[unknown historical architecture]"; 537 return "[unknown historical architecture]";
533 else 538 else
534 return "[future]"; 539 return "[future]";
@@ -881,25 +886,57 @@ union reg_range {
881#define NV_CE_PCE_MAP_SIZE 32 886#define NV_CE_PCE_MAP_SIZE 32
882 887
883 888
884/* Location of the 1Kb instance block with page tables for BAR1 and BAR2. 889/* Location of the 1Kb instance block with page tables for the BAR1/2 regions.
885 Support: Fermi+ (?), Pascal 890
891 On the H100, the "BAR1 block" describes what is actually BAR2, and the
892 "BAR2 block" describes BAR4.
893
894 PTR : Upper 28 bits of the 40-bit, (4k-aligned) address where the instance
895 block configuration is for the listed BAR region.
896
897 "Hopper+ uses 64-bit BARs, so GPU BAR2 should be at BAR4/5 and GPU BAR1 is at
898 BAR2/3" (open-gpu-kernel-modules)
886*/ 899*/
900// Support: Fermi through Ampere, Ada
887#define NV_PBUS_BAR1_BLOCK 0x00001704 901#define NV_PBUS_BAR1_BLOCK 0x00001704
888#define NV_PBUS_BAR2_BLOCK 0x00001714 902#define NV_PBUS_BAR2_BLOCK 0x00001714
889typedef union { 903typedef union {
890 struct { 904 struct {
891 uint32_t ptr:28; 905 uint32_t ptr:28;
892 enum INST_TARGET target:2; 906 enum INST_TARGET target:2;
893 uint32_t padding0:1; 907 uint32_t :1;
894 bool is_virtual:1; 908 bool is_virtual:1;
895 } __attribute__((packed)); 909 } __attribute__((packed));
896 uint32_t raw; 910 uint32_t raw;
897 struct { 911 struct {
898 uint32_t map:30; 912 uint32_t map:30;
899 uint32_t padding1:2; 913 uint32_t :2;
900 } __attribute__((packed)); 914 } __attribute__((packed));
901} bar_config_block_t; 915} bar_config_block_t;
902 916
917// Support: Hopper, Blackwell+
918// This is a "VREG" (virtual register?) in the documentation, meaning that it
919// needs the VREG base added first.
920#define NV_VIRTUAL_FUNCTION_FULL_PHYS_OFFSET 0x00B80000
921#define NV_VIRTUAL_FUNCTION_PRIV_FUNC_BAR2_BLOCK (NV_VIRTUAL_FUNCTION_FULL_PHYS_OFFSET+0x00000F70)
922typedef union {
923 struct {
924 bool is_pending:1;
925 bool is_outstanding:1;
926 uint32_t :7;
927 bool is_virtual:1;
928 enum INST_TARGET target:2;
929 uint64_t ptr:40;
930 uint32_t :12;
931 } __attribute__((packed));
932 uint64_t raw;
933 struct {
934 uint32_t :10;
935 uint32_t map:22;
936 uint32_t :32;
937 } __attribute__((packed));
938} bar_config_block_gh100_t;
939
903/* BAR0 PRAMIN (Private RAM Instance) window configuration 940/* BAR0 PRAMIN (Private RAM Instance) window configuration
904 One of the oldest ways to access video memory on NVIDIA GPUs is by using 941 One of the oldest ways to access video memory on NVIDIA GPUs is by using
905 a configurable 1MB window into VRAM which is mapped into BAR0 (register) 942 a configurable 1MB window into VRAM which is mapped into BAR0 (register)
@@ -914,21 +951,25 @@ typedef union {
914 TARGET : Which address space BASE points into 951 TARGET : Which address space BASE points into
915 952
916 Note: This seems to be set to 0x0bff00000 - 0x0c0000000 at least sometimes 953 Note: This seems to be set to 0x0bff00000 - 0x0c0000000 at least sometimes
917
918 Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere
919*/ 954*/
955// Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere, Ada
920#define NV_PBUS_BAR0_WINDOW 0x00001700 956#define NV_PBUS_BAR0_WINDOW 0x00001700
921#define NV_PRAMIN 0x00700000 // Goes until 0x00800000 (1MB window) 957// On Hopper, and Blackwell+, TARGET must always be 0 (VIDMEM)
922#define NV_PRAMIN_LEN 0x00100000 958// Support: Hopper, Blackwell+
959#define NV_XAL_EP_BAR0_WINDOW_BASE 0x0010fd40
923typedef union { 960typedef union {
924 struct { 961 struct {
925 uint32_t base:24; 962 uint32_t base:24;
926 enum INST_TARGET target:2; 963 enum INST_TARGET target:2;
927 uint32_t padding0:6; 964 uint32_t :6;
928 } __attribute__((packed)); 965 } __attribute__((packed));
929 uint32_t raw; 966 uint32_t raw;
930} bar0_window_t; 967} bar0_window_t;
931 968
969// Support: Tesla 2.0 through (at least) Blackwell
970#define NV_PRAMIN 0x00700000 // Goes until 0x00800000 (1MB window)
971#define NV_PRAMIN_LEN 0x00100000
972
932// Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere 973// Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere
933#define NV_PRAMIN_PDB_CONFIG_OFF 0x200 974#define NV_PRAMIN_PDB_CONFIG_OFF 0x200
934typedef union { 975typedef union {