diff options
Diffstat (limited to 'nvdebug.h')
-rw-r--r-- | nvdebug.h | 67 |
1 files changed, 54 insertions, 13 deletions
@@ -484,6 +484,7 @@ typedef union { | |||
484 | #define NV_CHIP_ID_AMPERE 0x170 | 484 | #define NV_CHIP_ID_AMPERE 0x170 |
485 | #define NV_CHIP_ID_HOPPER 0x180 | 485 | #define NV_CHIP_ID_HOPPER 0x180 |
486 | #define NV_CHIP_ID_ADA 0x190 | 486 | #define NV_CHIP_ID_ADA 0x190 |
487 | #define NV_CHIP_ID_BLACKWELL 0x1A0 | ||
487 | 488 | ||
488 | inline static const char* ARCH2NAME(uint32_t arch) { | 489 | inline static const char* ARCH2NAME(uint32_t arch) { |
489 | switch (arch) { | 490 | switch (arch) { |
@@ -521,14 +522,18 @@ inline static const char* ARCH2NAME(uint32_t arch) { | |||
521 | return "Turing"; | 522 | return "Turing"; |
522 | case 0x17: | 523 | case 0x17: |
523 | return "Ampere"; | 524 | return "Ampere"; |
524 | case 0x18: | 525 | case 0x18: // Despite the Chip ID, Hopper functionally proceeds Ada |
525 | return "Hopper"; | 526 | return "Hopper"; |
526 | case 0x19: | 527 | case 0x19: |
527 | return "Ada Lovelace"; | 528 | return "Ada Lovelace"; |
528 | case 0x20: | 529 | case 0x1A: |
529 | return "Blackwell (?)"; | 530 | return "Blackwell"; |
531 | case 0x1B: | ||
532 | return "Rubin (?)"; | ||
533 | case 0x1F: // NVIDIA-internal simulator | ||
534 | return "AMODEL"; | ||
530 | default: | 535 | default: |
531 | if (arch < 0x19) | 536 | if (arch < 0x1A) |
532 | return "[unknown historical architecture]"; | 537 | return "[unknown historical architecture]"; |
533 | else | 538 | else |
534 | return "[future]"; | 539 | return "[future]"; |
@@ -881,25 +886,57 @@ union reg_range { | |||
881 | #define NV_CE_PCE_MAP_SIZE 32 | 886 | #define NV_CE_PCE_MAP_SIZE 32 |
882 | 887 | ||
883 | 888 | ||
884 | /* Location of the 1Kb instance block with page tables for BAR1 and BAR2. | 889 | /* Location of the 1Kb instance block with page tables for the BAR1/2 regions. |
885 | Support: Fermi+ (?), Pascal | 890 | |
891 | On the H100, the "BAR1 block" describes what is actually BAR2, and the | ||
892 | "BAR2 block" describes BAR4. | ||
893 | |||
894 | PTR : Upper 28 bits of the 40-bit, (4k-aligned) address where the instance | ||
895 | block configuration is for the listed BAR region. | ||
896 | |||
897 | "Hopper+ uses 64-bit BARs, so GPU BAR2 should be at BAR4/5 and GPU BAR1 is at | ||
898 | BAR2/3" (open-gpu-kernel-modules) | ||
886 | */ | 899 | */ |
900 | // Support: Fermi through Ampere, Ada | ||
887 | #define NV_PBUS_BAR1_BLOCK 0x00001704 | 901 | #define NV_PBUS_BAR1_BLOCK 0x00001704 |
888 | #define NV_PBUS_BAR2_BLOCK 0x00001714 | 902 | #define NV_PBUS_BAR2_BLOCK 0x00001714 |
889 | typedef union { | 903 | typedef union { |
890 | struct { | 904 | struct { |
891 | uint32_t ptr:28; | 905 | uint32_t ptr:28; |
892 | enum INST_TARGET target:2; | 906 | enum INST_TARGET target:2; |
893 | uint32_t padding0:1; | 907 | uint32_t :1; |
894 | bool is_virtual:1; | 908 | bool is_virtual:1; |
895 | } __attribute__((packed)); | 909 | } __attribute__((packed)); |
896 | uint32_t raw; | 910 | uint32_t raw; |
897 | struct { | 911 | struct { |
898 | uint32_t map:30; | 912 | uint32_t map:30; |
899 | uint32_t padding1:2; | 913 | uint32_t :2; |
900 | } __attribute__((packed)); | 914 | } __attribute__((packed)); |
901 | } bar_config_block_t; | 915 | } bar_config_block_t; |
902 | 916 | ||
917 | // Support: Hopper, Blackwell+ | ||
918 | // This is a "VREG" (virtual register?) in the documentation, meaning that it | ||
919 | // needs the VREG base added first. | ||
920 | #define NV_VIRTUAL_FUNCTION_FULL_PHYS_OFFSET 0x00B80000 | ||
921 | #define NV_VIRTUAL_FUNCTION_PRIV_FUNC_BAR2_BLOCK (NV_VIRTUAL_FUNCTION_FULL_PHYS_OFFSET+0x00000F70) | ||
922 | typedef union { | ||
923 | struct { | ||
924 | bool is_pending:1; | ||
925 | bool is_outstanding:1; | ||
926 | uint32_t :7; | ||
927 | bool is_virtual:1; | ||
928 | enum INST_TARGET target:2; | ||
929 | uint64_t ptr:40; | ||
930 | uint32_t :12; | ||
931 | } __attribute__((packed)); | ||
932 | uint64_t raw; | ||
933 | struct { | ||
934 | uint32_t :10; | ||
935 | uint32_t map:22; | ||
936 | uint32_t :32; | ||
937 | } __attribute__((packed)); | ||
938 | } bar_config_block_gh100_t; | ||
939 | |||
903 | /* BAR0 PRAMIN (Private RAM Instance) window configuration | 940 | /* BAR0 PRAMIN (Private RAM Instance) window configuration |
904 | One of the oldest ways to access video memory on NVIDIA GPUs is by using | 941 | One of the oldest ways to access video memory on NVIDIA GPUs is by using |
905 | a configurable 1MB window into VRAM which is mapped into BAR0 (register) | 942 | a configurable 1MB window into VRAM which is mapped into BAR0 (register) |
@@ -914,21 +951,25 @@ typedef union { | |||
914 | TARGET : Which address space BASE points into | 951 | TARGET : Which address space BASE points into |
915 | 952 | ||
916 | Note: This seems to be set to 0x0bff00000 - 0x0c0000000 at least sometimes | 953 | Note: This seems to be set to 0x0bff00000 - 0x0c0000000 at least sometimes |
917 | |||
918 | Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere | ||
919 | */ | 954 | */ |
955 | // Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere, Ada | ||
920 | #define NV_PBUS_BAR0_WINDOW 0x00001700 | 956 | #define NV_PBUS_BAR0_WINDOW 0x00001700 |
921 | #define NV_PRAMIN 0x00700000 // Goes until 0x00800000 (1MB window) | 957 | // On Hopper, and Blackwell+, TARGET must always be 0 (VIDMEM) |
922 | #define NV_PRAMIN_LEN 0x00100000 | 958 | // Support: Hopper, Blackwell+ |
959 | #define NV_XAL_EP_BAR0_WINDOW_BASE 0x0010fd40 | ||
923 | typedef union { | 960 | typedef union { |
924 | struct { | 961 | struct { |
925 | uint32_t base:24; | 962 | uint32_t base:24; |
926 | enum INST_TARGET target:2; | 963 | enum INST_TARGET target:2; |
927 | uint32_t padding0:6; | 964 | uint32_t :6; |
928 | } __attribute__((packed)); | 965 | } __attribute__((packed)); |
929 | uint32_t raw; | 966 | uint32_t raw; |
930 | } bar0_window_t; | 967 | } bar0_window_t; |
931 | 968 | ||
969 | // Support: Tesla 2.0 through (at least) Blackwell | ||
970 | #define NV_PRAMIN 0x00700000 // Goes until 0x00800000 (1MB window) | ||
971 | #define NV_PRAMIN_LEN 0x00100000 | ||
972 | |||
932 | // Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere | 973 | // Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere |
933 | #define NV_PRAMIN_PDB_CONFIG_OFF 0x200 | 974 | #define NV_PRAMIN_PDB_CONFIG_OFF 0x200 |
934 | typedef union { | 975 | typedef union { |