diff options
| author | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-10 13:39:59 -0400 |
|---|---|---|
| committer | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-10 16:37:09 -0400 |
| commit | 16163d89b564029cabf4842815590b0a484cc172 (patch) | |
| tree | 7057eaff355b54e8a454243b14f58732be279676 | |
| parent | 8b9c6400d0c88e127be2d31ab3fb507da49f9d6f (diff) | |
Add Hopper and Blackwell support to bus.c
- Handle relocation of PRAMIN window configuration register
- Handle new format for BAR2 configuration
- Catch unreadable PRAMIN configuration
Tested on A100, H100, and AD102 (RTX 6000 Ada).
| -rw-r--r-- | bus.c | 73 | ||||
| -rw-r--r-- | nvdebug.h | 67 |
2 files changed, 111 insertions, 29 deletions
| @@ -24,10 +24,11 @@ | |||
| 24 | /* Obtain the PRAMIN offset at which `addr` can be accessed | 24 | /* Obtain the PRAMIN offset at which `addr` can be accessed |
| 25 | @param addr Address to find | 25 | @param addr Address to find |
| 26 | @param target Which address space to use (VRAM, SYS_MEM, PEER(?)) | 26 | @param target Which address space to use (VRAM, SYS_MEM, PEER(?)) |
| 27 | @return positive offset or -EINVAL on invalid arguments | 27 | @return positive offset, -EINVAL on invalid arguments, or -EOPNOTSUPP on |
| 28 | an unsupported platform. | ||
| 28 | 29 | ||
| 29 | Note: Will move the PRAMIN window to accomodate the request. Only guarantees | 30 | Note: Will move the PRAMIN window to accomodate the request. Only guarantees |
| 30 | that the surrounding 64KiB window will be accessible. | 31 | that the surrounding 64-KiB-aligned window will be accessible. |
| 31 | Note: Moving the PRAMIN window will cause problems if it races with driver | 32 | Note: Moving the PRAMIN window will cause problems if it races with driver |
| 32 | code that tries to do the same, or expects the window not to move. | 33 | code that tries to do the same, or expects the window not to move. |
| 33 | Bugs: Untested on PEER. | 34 | Bugs: Untested on PEER. |
| @@ -36,6 +37,7 @@ int addr_to_pramin_mut(struct nvdebug_state *g, | |||
| 36 | uint64_t addr, enum INST_TARGET target) { | 37 | uint64_t addr, enum INST_TARGET target) { |
| 37 | bar0_window_t window; | 38 | bar0_window_t window; |
| 38 | uint64_t pramin_base; | 39 | uint64_t pramin_base; |
| 40 | uint32_t window_reg; | ||
| 39 | // For us, accuracy and robustness is more important than speed | 41 | // For us, accuracy and robustness is more important than speed |
| 40 | // Check that the address is valid (49 bits are addressable on-GPU, but | 42 | // Check that the address is valid (49 bits are addressable on-GPU, but |
| 41 | // PRAMIN only supports up to 40 bits). | 43 | // PRAMIN only supports up to 40 bits). |
| @@ -44,21 +46,38 @@ int addr_to_pramin_mut(struct nvdebug_state *g, | |||
| 44 | addr, __func__); | 46 | addr, __func__); |
| 45 | return -EINVAL; | 47 | return -EINVAL; |
| 46 | } | 48 | } |
| 47 | window.raw = nvdebug_readl(g, NV_PBUS_BAR0_WINDOW); | 49 | // Register relocated on Hopper and Blackwell+ |
| 48 | if (window.target != target) | 50 | if ((g->chip_id >= NV_CHIP_ID_HOPPER && g->chip_id < NV_CHIP_ID_ADA) || g->chip_id >= NV_CHIP_ID_BLACKWELL) |
| 49 | goto relocate; | 51 | window_reg = NV_XAL_EP_BAR0_WINDOW_BASE; |
| 52 | else | ||
| 53 | window_reg = NV_PBUS_BAR0_WINDOW; | ||
| 54 | if ((window.raw = nvdebug_readl(g, window_reg)) == -1) { | ||
| 55 | printk(KERN_ERR "[nvdebug] PRAMIN window configuration inaccessible; " | ||
| 56 | "failing %s\n", __func__); | ||
| 57 | return -EOPNOTSUPP; | ||
| 58 | } | ||
| 59 | if (window.target != target) { | ||
| 60 | // On Hopper and Blackwell+, the window always points at VID_MEM | ||
| 61 | if ((g->chip_id >= NV_CHIP_ID_HOPPER && g->chip_id < NV_CHIP_ID_ADA) || g->chip_id >= NV_CHIP_ID_BLACKWELL) | ||
| 62 | return -EOPNOTSUPP; | ||
| 63 | else | ||
| 64 | goto relocate; | ||
| 65 | } | ||
| 50 | pramin_base = ((uint64_t)window.base) << 16; | 66 | pramin_base = ((uint64_t)window.base) << 16; |
| 51 | if (addr < pramin_base || addr > pramin_base + NV_PRAMIN_LEN) | 67 | if (addr < pramin_base || addr > pramin_base + NV_PRAMIN_LEN) |
| 52 | goto relocate; | 68 | goto relocate; |
| 53 | return addr - pramin_base; // Guaranteed to be < 1MiB, so safe for int | 69 | return addr - pramin_base; // Guaranteed to be < 1MiB, so safe for int |
| 54 | relocate: | 70 | relocate: |
| 55 | printk(KERN_INFO "[nvdebug] Moving PRAMIN win from base %llx (%s) to %llx (%s) to accomodate %#018llx\n", pramin_base, target_to_text(window.target), (addr >> 16) << 16, target_to_text(target), addr); | 71 | printk(KERN_INFO "[nvdebug] [SIDE EFFECT] Moving PRAMIN window from base " |
| 72 | "%llx (%s) to %llx (%s) to accomodate %#018llx\n", | ||
| 73 | ((uint64_t)window.base) << 16, target_to_text(window.target), | ||
| 74 | (addr >> 16) << 16, target_to_text(target), addr); | ||
| 56 | // Move PRAMIN window to a 64KiB-aligned address | 75 | // Move PRAMIN window to a 64KiB-aligned address |
| 57 | window.base = (u32)(addr >> 16); // Safe, due to above range check | 76 | window.base = (u32)(addr >> 16); // Safe, due to above range check |
| 58 | window.target = target; | 77 | window.target = target; |
| 59 | nvdebug_writel(g, NV_PBUS_BAR0_WINDOW, window.raw); | 78 | nvdebug_writel(g, window_reg, window.raw); |
| 60 | // Wait for the window to move by re-reading (as done in nvgpu driver) | 79 | // Wait for the window to move by re-reading (as done in nvgpu driver) |
| 61 | (void) nvdebug_readl(g, NV_PBUS_BAR0_WINDOW); | 80 | (void) nvdebug_readl(g, window_reg); |
| 62 | return (int)(addr & 0xffffull); | 81 | return (int)(addr & 0xffffull); |
| 63 | } | 82 | } |
| 64 | 83 | ||
| @@ -72,7 +91,9 @@ relocate: | |||
| 72 | */ | 91 | */ |
| 73 | int get_bar2_pdb(struct nvdebug_state *g, page_dir_config_t* pd) { | 92 | int get_bar2_pdb(struct nvdebug_state *g, page_dir_config_t* pd) { |
| 74 | int ret; | 93 | int ret; |
| 75 | bar_config_block_t bar2_block; | 94 | uint64_t bar2_ptr; |
| 95 | enum INST_TARGET bar2_target; | ||
| 96 | bool bar2_is_virtual; | ||
| 76 | 97 | ||
| 77 | if (!pd) | 98 | if (!pd) |
| 78 | return -EINVAL; | 99 | return -EINVAL; |
| @@ -85,17 +106,37 @@ int get_bar2_pdb(struct nvdebug_state *g, page_dir_config_t* pd) { | |||
| 85 | // hierarchy used to translate BAR2 offsets to VRAM or SYS_MEM addresses. | 106 | // hierarchy used to translate BAR2 offsets to VRAM or SYS_MEM addresses. |
| 86 | 107 | ||
| 87 | // Determine location of BAR2 instance block | 108 | // Determine location of BAR2 instance block |
| 88 | if ((bar2_block.raw = nvdebug_readl(g, NV_PBUS_BAR2_BLOCK)) == -1) { | 109 | if ((g->chip_id >= NV_CHIP_ID_HOPPER && g->chip_id < NV_CHIP_ID_ADA) || g->chip_id >= NV_CHIP_ID_BLACKWELL) { |
| 89 | printk(KERN_ERR "[nvdebug] Unable to read BAR2/3 configuration! BAR2/3 inaccessible.\n"); | 110 | // Register layout updated on Hopper and Blackwell+ to support 52-bit |
| 90 | return -EOPNOTSUPP; | 111 | // instance block pointers (vs. 40 bits before) |
| 112 | bar_config_block_gh100_t bar2_block; | ||
| 113 | if ((bar2_block.raw = nvdebug_readq(g, NV_VIRTUAL_FUNCTION_PRIV_FUNC_BAR2_BLOCK)) == -1) { | ||
| 114 | printk(KERN_ERR "[nvdebug] Unable to read BAR2/3 configuration! BAR2/3 inaccessible.\n"); | ||
| 115 | return -EOPNOTSUPP; | ||
| 116 | } | ||
| 117 | bar2_ptr = (uint64_t)bar2_block.ptr << 12; | ||
| 118 | bar2_target = bar2_block.target; | ||
| 119 | bar2_is_virtual = bar2_block.is_virtual; | ||
| 120 | } else { | ||
| 121 | bar_config_block_t bar2_block; | ||
| 122 | if ((bar2_block.raw = nvdebug_readl(g, NV_PBUS_BAR2_BLOCK)) == -1) { | ||
| 123 | printk(KERN_ERR "[nvdebug] Unable to read BAR2/3 configuration! BAR2/3 inaccessible.\n"); | ||
| 124 | return -EOPNOTSUPP; | ||
| 125 | } | ||
| 126 | bar2_ptr = (uint64_t)bar2_block.ptr << 12; | ||
| 127 | bar2_target = bar2_block.target; | ||
| 128 | bar2_is_virtual = bar2_block.is_virtual; | ||
| 91 | } | 129 | } |
| 92 | printk(KERN_INFO "[nvdebug] BAR2 inst block @ %llx in %s's %s address space.\n", ((u64)bar2_block.ptr) << 12, target_to_text(bar2_block.target), bar2_block.is_virtual ? "virtual" : "physical"); | 130 | printk(KERN_INFO "[nvdebug] BAR2 inst block @ %llx in %s's %s address space.\n", bar2_ptr, target_to_text(bar2_target), bar2_is_virtual ? "virtual" : "physical"); |
| 93 | // Setup PRAMIN to point at the BAR2 instance block | 131 | // Setup PRAMIN to point at the BAR2 instance block |
| 94 | if ((ret = addr_to_pramin_mut(g, (uint64_t)bar2_block.ptr << 12, bar2_block.target)) < 0) { | 132 | // TODO: This won't work if the instance block is in SYS_MEM on Hopper or |
| 95 | printk(KERN_ERR "[nvdebug] Invalid BAR2/3 Instance Block configuration! BAR2/3 inaccessible.\n"); | 133 | // Blackwell+. Going through the I/O MMU appears to be fairly |
| 134 | // reliable, so I need to switch to using that logic whenever | ||
| 135 | // SYS_MEM may be accessed. | ||
| 136 | if ((ret = addr_to_pramin_mut(g, bar2_ptr, bar2_target)) < 0) { | ||
| 137 | printk(KERN_ERR "[nvdebug] Unable to access BAR2/3 Instance Block configuration via PRAMIN! BAR2/3 inaccessible.\n"); | ||
| 96 | return ret; | 138 | return ret; |
| 97 | } | 139 | } |
| 98 | printk(KERN_INFO "[nvdebug] BAR2 inst block at off %x in PRAMIN\n", ret); | ||
| 99 | // Pull the page directory base configuration from the instance block | 140 | // Pull the page directory base configuration from the instance block |
| 100 | if ((pd->raw = nvdebug_readq(g, NV_PRAMIN + ret + NV_PRAMIN_PDB_CONFIG_OFF)) == -1) { | 141 | if ((pd->raw = nvdebug_readq(g, NV_PRAMIN + ret + NV_PRAMIN_PDB_CONFIG_OFF)) == -1) { |
| 101 | printk(KERN_ERR "[nvdebug] Unable to read BAR2/3 PDB configuration! BAR2/3 inaccessible.\n"); | 142 | printk(KERN_ERR "[nvdebug] Unable to read BAR2/3 PDB configuration! BAR2/3 inaccessible.\n"); |
| @@ -484,6 +484,7 @@ typedef union { | |||
| 484 | #define NV_CHIP_ID_AMPERE 0x170 | 484 | #define NV_CHIP_ID_AMPERE 0x170 |
| 485 | #define NV_CHIP_ID_HOPPER 0x180 | 485 | #define NV_CHIP_ID_HOPPER 0x180 |
| 486 | #define NV_CHIP_ID_ADA 0x190 | 486 | #define NV_CHIP_ID_ADA 0x190 |
| 487 | #define NV_CHIP_ID_BLACKWELL 0x1A0 | ||
| 487 | 488 | ||
| 488 | inline static const char* ARCH2NAME(uint32_t arch) { | 489 | inline static const char* ARCH2NAME(uint32_t arch) { |
| 489 | switch (arch) { | 490 | switch (arch) { |
| @@ -521,14 +522,18 @@ inline static const char* ARCH2NAME(uint32_t arch) { | |||
| 521 | return "Turing"; | 522 | return "Turing"; |
| 522 | case 0x17: | 523 | case 0x17: |
| 523 | return "Ampere"; | 524 | return "Ampere"; |
| 524 | case 0x18: | 525 | case 0x18: // Despite the Chip ID, Hopper functionally proceeds Ada |
| 525 | return "Hopper"; | 526 | return "Hopper"; |
| 526 | case 0x19: | 527 | case 0x19: |
| 527 | return "Ada Lovelace"; | 528 | return "Ada Lovelace"; |
| 528 | case 0x20: | 529 | case 0x1A: |
| 529 | return "Blackwell (?)"; | 530 | return "Blackwell"; |
| 531 | case 0x1B: | ||
| 532 | return "Rubin (?)"; | ||
| 533 | case 0x1F: // NVIDIA-internal simulator | ||
| 534 | return "AMODEL"; | ||
| 530 | default: | 535 | default: |
| 531 | if (arch < 0x19) | 536 | if (arch < 0x1A) |
| 532 | return "[unknown historical architecture]"; | 537 | return "[unknown historical architecture]"; |
| 533 | else | 538 | else |
| 534 | return "[future]"; | 539 | return "[future]"; |
| @@ -881,25 +886,57 @@ union reg_range { | |||
| 881 | #define NV_CE_PCE_MAP_SIZE 32 | 886 | #define NV_CE_PCE_MAP_SIZE 32 |
| 882 | 887 | ||
| 883 | 888 | ||
| 884 | /* Location of the 1Kb instance block with page tables for BAR1 and BAR2. | 889 | /* Location of the 1Kb instance block with page tables for the BAR1/2 regions. |
| 885 | Support: Fermi+ (?), Pascal | 890 | |
| 891 | On the H100, the "BAR1 block" describes what is actually BAR2, and the | ||
| 892 | "BAR2 block" describes BAR4. | ||
| 893 | |||
| 894 | PTR : Upper 28 bits of the 40-bit, (4k-aligned) address where the instance | ||
| 895 | block configuration is for the listed BAR region. | ||
| 896 | |||
| 897 | "Hopper+ uses 64-bit BARs, so GPU BAR2 should be at BAR4/5 and GPU BAR1 is at | ||
| 898 | BAR2/3" (open-gpu-kernel-modules) | ||
| 886 | */ | 899 | */ |
| 900 | // Support: Fermi through Ampere, Ada | ||
| 887 | #define NV_PBUS_BAR1_BLOCK 0x00001704 | 901 | #define NV_PBUS_BAR1_BLOCK 0x00001704 |
| 888 | #define NV_PBUS_BAR2_BLOCK 0x00001714 | 902 | #define NV_PBUS_BAR2_BLOCK 0x00001714 |
| 889 | typedef union { | 903 | typedef union { |
| 890 | struct { | 904 | struct { |
| 891 | uint32_t ptr:28; | 905 | uint32_t ptr:28; |
| 892 | enum INST_TARGET target:2; | 906 | enum INST_TARGET target:2; |
| 893 | uint32_t padding0:1; | 907 | uint32_t :1; |
| 894 | bool is_virtual:1; | 908 | bool is_virtual:1; |
| 895 | } __attribute__((packed)); | 909 | } __attribute__((packed)); |
| 896 | uint32_t raw; | 910 | uint32_t raw; |
| 897 | struct { | 911 | struct { |
| 898 | uint32_t map:30; | 912 | uint32_t map:30; |
| 899 | uint32_t padding1:2; | 913 | uint32_t :2; |
| 900 | } __attribute__((packed)); | 914 | } __attribute__((packed)); |
| 901 | } bar_config_block_t; | 915 | } bar_config_block_t; |
| 902 | 916 | ||
| 917 | // Support: Hopper, Blackwell+ | ||
| 918 | // This is a "VREG" (virtual register?) in the documentation, meaning that it | ||
| 919 | // needs the VREG base added first. | ||
| 920 | #define NV_VIRTUAL_FUNCTION_FULL_PHYS_OFFSET 0x00B80000 | ||
| 921 | #define NV_VIRTUAL_FUNCTION_PRIV_FUNC_BAR2_BLOCK (NV_VIRTUAL_FUNCTION_FULL_PHYS_OFFSET+0x00000F70) | ||
| 922 | typedef union { | ||
| 923 | struct { | ||
| 924 | bool is_pending:1; | ||
| 925 | bool is_outstanding:1; | ||
| 926 | uint32_t :7; | ||
| 927 | bool is_virtual:1; | ||
| 928 | enum INST_TARGET target:2; | ||
| 929 | uint64_t ptr:40; | ||
| 930 | uint32_t :12; | ||
| 931 | } __attribute__((packed)); | ||
| 932 | uint64_t raw; | ||
| 933 | struct { | ||
| 934 | uint32_t :10; | ||
| 935 | uint32_t map:22; | ||
| 936 | uint32_t :32; | ||
| 937 | } __attribute__((packed)); | ||
| 938 | } bar_config_block_gh100_t; | ||
| 939 | |||
| 903 | /* BAR0 PRAMIN (Private RAM Instance) window configuration | 940 | /* BAR0 PRAMIN (Private RAM Instance) window configuration |
| 904 | One of the oldest ways to access video memory on NVIDIA GPUs is by using | 941 | One of the oldest ways to access video memory on NVIDIA GPUs is by using |
| 905 | a configurable 1MB window into VRAM which is mapped into BAR0 (register) | 942 | a configurable 1MB window into VRAM which is mapped into BAR0 (register) |
| @@ -914,21 +951,25 @@ typedef union { | |||
| 914 | TARGET : Which address space BASE points into | 951 | TARGET : Which address space BASE points into |
| 915 | 952 | ||
| 916 | Note: This seems to be set to 0x0bff00000 - 0x0c0000000 at least sometimes | 953 | Note: This seems to be set to 0x0bff00000 - 0x0c0000000 at least sometimes |
| 917 | |||
| 918 | Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere | ||
| 919 | */ | 954 | */ |
| 955 | // Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere, Ada | ||
| 920 | #define NV_PBUS_BAR0_WINDOW 0x00001700 | 956 | #define NV_PBUS_BAR0_WINDOW 0x00001700 |
| 921 | #define NV_PRAMIN 0x00700000 // Goes until 0x00800000 (1MB window) | 957 | // On Hopper, and Blackwell+, TARGET must always be 0 (VIDMEM) |
| 922 | #define NV_PRAMIN_LEN 0x00100000 | 958 | // Support: Hopper, Blackwell+ |
| 959 | #define NV_XAL_EP_BAR0_WINDOW_BASE 0x0010fd40 | ||
| 923 | typedef union { | 960 | typedef union { |
| 924 | struct { | 961 | struct { |
| 925 | uint32_t base:24; | 962 | uint32_t base:24; |
| 926 | enum INST_TARGET target:2; | 963 | enum INST_TARGET target:2; |
| 927 | uint32_t padding0:6; | 964 | uint32_t :6; |
| 928 | } __attribute__((packed)); | 965 | } __attribute__((packed)); |
| 929 | uint32_t raw; | 966 | uint32_t raw; |
| 930 | } bar0_window_t; | 967 | } bar0_window_t; |
| 931 | 968 | ||
| 969 | // Support: Tesla 2.0 through (at least) Blackwell | ||
| 970 | #define NV_PRAMIN 0x00700000 // Goes until 0x00800000 (1MB window) | ||
| 971 | #define NV_PRAMIN_LEN 0x00100000 | ||
| 972 | |||
| 932 | // Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere | 973 | // Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere |
| 933 | #define NV_PRAMIN_PDB_CONFIG_OFF 0x200 | 974 | #define NV_PRAMIN_PDB_CONFIG_OFF 0x200 |
| 934 | typedef union { | 975 | typedef union { |
