aboutsummaryrefslogtreecommitdiffstats
path: root/nvdebug.h
diff options
context:
space:
mode:
Diffstat (limited to 'nvdebug.h')
-rw-r--r--nvdebug.h67
1 files changed, 54 insertions, 13 deletions
diff --git a/nvdebug.h b/nvdebug.h
index f644500..409b013 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -484,6 +484,7 @@ typedef union {
484#define NV_CHIP_ID_AMPERE 0x170 484#define NV_CHIP_ID_AMPERE 0x170
485#define NV_CHIP_ID_HOPPER 0x180 485#define NV_CHIP_ID_HOPPER 0x180
486#define NV_CHIP_ID_ADA 0x190 486#define NV_CHIP_ID_ADA 0x190
487#define NV_CHIP_ID_BLACKWELL 0x1A0
487 488
488inline static const char* ARCH2NAME(uint32_t arch) { 489inline static const char* ARCH2NAME(uint32_t arch) {
489 switch (arch) { 490 switch (arch) {
@@ -521,14 +522,18 @@ inline static const char* ARCH2NAME(uint32_t arch) {
521 return "Turing"; 522 return "Turing";
522 case 0x17: 523 case 0x17:
523 return "Ampere"; 524 return "Ampere";
524 case 0x18: 525 case 0x18: // Despite the Chip ID, Hopper functionally proceeds Ada
525 return "Hopper"; 526 return "Hopper";
526 case 0x19: 527 case 0x19:
527 return "Ada Lovelace"; 528 return "Ada Lovelace";
528 case 0x20: 529 case 0x1A:
529 return "Blackwell (?)"; 530 return "Blackwell";
531 case 0x1B:
532 return "Rubin (?)";
533 case 0x1F: // NVIDIA-internal simulator
534 return "AMODEL";
530 default: 535 default:
531 if (arch < 0x19) 536 if (arch < 0x1A)
532 return "[unknown historical architecture]"; 537 return "[unknown historical architecture]";
533 else 538 else
534 return "[future]"; 539 return "[future]";
@@ -881,25 +886,57 @@ union reg_range {
881#define NV_CE_PCE_MAP_SIZE 32 886#define NV_CE_PCE_MAP_SIZE 32
882 887
883 888
884/* Location of the 1Kb instance block with page tables for BAR1 and BAR2. 889/* Location of the 1Kb instance block with page tables for the BAR1/2 regions.
885 Support: Fermi+ (?), Pascal 890
891 On the H100, the "BAR1 block" describes what is actually BAR2, and the
892 "BAR2 block" describes BAR4.
893
894 PTR : Upper 28 bits of the 40-bit, (4k-aligned) address where the instance
895 block configuration is for the listed BAR region.
896
897 "Hopper+ uses 64-bit BARs, so GPU BAR2 should be at BAR4/5 and GPU BAR1 is at
898 BAR2/3" (open-gpu-kernel-modules)
886*/ 899*/
900// Support: Fermi through Ampere, Ada
887#define NV_PBUS_BAR1_BLOCK 0x00001704 901#define NV_PBUS_BAR1_BLOCK 0x00001704
888#define NV_PBUS_BAR2_BLOCK 0x00001714 902#define NV_PBUS_BAR2_BLOCK 0x00001714
889typedef union { 903typedef union {
890 struct { 904 struct {
891 uint32_t ptr:28; 905 uint32_t ptr:28;
892 enum INST_TARGET target:2; 906 enum INST_TARGET target:2;
893 uint32_t padding0:1; 907 uint32_t :1;
894 bool is_virtual:1; 908 bool is_virtual:1;
895 } __attribute__((packed)); 909 } __attribute__((packed));
896 uint32_t raw; 910 uint32_t raw;
897 struct { 911 struct {
898 uint32_t map:30; 912 uint32_t map:30;
899 uint32_t padding1:2; 913 uint32_t :2;
900 } __attribute__((packed)); 914 } __attribute__((packed));
901} bar_config_block_t; 915} bar_config_block_t;
902 916
917// Support: Hopper, Blackwell+
918// This is a "VREG" (virtual register?) in the documentation, meaning that it
919// needs the VREG base added first.
920#define NV_VIRTUAL_FUNCTION_FULL_PHYS_OFFSET 0x00B80000
921#define NV_VIRTUAL_FUNCTION_PRIV_FUNC_BAR2_BLOCK (NV_VIRTUAL_FUNCTION_FULL_PHYS_OFFSET+0x00000F70)
922typedef union {
923 struct {
924 bool is_pending:1;
925 bool is_outstanding:1;
926 uint32_t :7;
927 bool is_virtual:1;
928 enum INST_TARGET target:2;
929 uint64_t ptr:40;
930 uint32_t :12;
931 } __attribute__((packed));
932 uint64_t raw;
933 struct {
934 uint32_t :10;
935 uint32_t map:22;
936 uint32_t :32;
937 } __attribute__((packed));
938} bar_config_block_gh100_t;
939
903/* BAR0 PRAMIN (Private RAM Instance) window configuration 940/* BAR0 PRAMIN (Private RAM Instance) window configuration
904 One of the oldest ways to access video memory on NVIDIA GPUs is by using 941 One of the oldest ways to access video memory on NVIDIA GPUs is by using
905 a configurable 1MB window into VRAM which is mapped into BAR0 (register) 942 a configurable 1MB window into VRAM which is mapped into BAR0 (register)
@@ -914,21 +951,25 @@ typedef union {
914 TARGET : Which address space BASE points into 951 TARGET : Which address space BASE points into
915 952
916 Note: This seems to be set to 0x0bff00000 - 0x0c0000000 at least sometimes 953 Note: This seems to be set to 0x0bff00000 - 0x0c0000000 at least sometimes
917
918 Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere
919*/ 954*/
955// Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere, Ada
920#define NV_PBUS_BAR0_WINDOW 0x00001700 956#define NV_PBUS_BAR0_WINDOW 0x00001700
921#define NV_PRAMIN 0x00700000 // Goes until 0x00800000 (1MB window) 957// On Hopper, and Blackwell+, TARGET must always be 0 (VIDMEM)
922#define NV_PRAMIN_LEN 0x00100000 958// Support: Hopper, Blackwell+
959#define NV_XAL_EP_BAR0_WINDOW_BASE 0x0010fd40
923typedef union { 960typedef union {
924 struct { 961 struct {
925 uint32_t base:24; 962 uint32_t base:24;
926 enum INST_TARGET target:2; 963 enum INST_TARGET target:2;
927 uint32_t padding0:6; 964 uint32_t :6;
928 } __attribute__((packed)); 965 } __attribute__((packed));
929 uint32_t raw; 966 uint32_t raw;
930} bar0_window_t; 967} bar0_window_t;
931 968
969// Support: Tesla 2.0 through (at least) Blackwell
970#define NV_PRAMIN 0x00700000 // Goes until 0x00800000 (1MB window)
971#define NV_PRAMIN_LEN 0x00100000
972
932// Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere 973// Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere
933#define NV_PRAMIN_PDB_CONFIG_OFF 0x200 974#define NV_PRAMIN_PDB_CONFIG_OFF 0x200
934typedef union { 975typedef union {