aboutsummaryrefslogtreecommitdiffstats
path: root/nvdebug.h
diff options
context:
space:
mode:
Diffstat (limited to 'nvdebug.h')
-rw-r--r--nvdebug.h158
1 files changed, 124 insertions, 34 deletions
diff --git a/nvdebug.h b/nvdebug.h
index 567806d..eff1470 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -818,6 +818,14 @@ typedef union {
818} bar_config_block_t; 818} bar_config_block_t;
819 819
820/* BAR0 PRAMIN (Private RAM Instance) window configuration 820/* BAR0 PRAMIN (Private RAM Instance) window configuration
821 One of the oldest ways to access video memory on NVIDIA GPUs is by using
822 a configurable 1MB window into VRAM which is mapped into BAR0 (register)
823 space starting at offset NV_PRAMIN. This is still supported on NVIDIA GPUs
824 and appear to be used today to bootstrap page table configuration.
825
826 Why is it mapped at a location called NVIDIA Private RAM Instance? Because
827 this used to point to the entirety of intance RAM, which was seperate from
828 VRAM on older NVIDIA GPUs.
821 829
822 BASE : Base of window >> 16 in [TARGET] virtual address space 830 BASE : Base of window >> 16 in [TARGET] virtual address space
823 TARGET : Which address space BASE points into 831 TARGET : Which address space BASE points into
@@ -843,7 +851,7 @@ typedef union {
843typedef union { 851typedef union {
844 struct { 852 struct {
845 uint32_t target:2; 853 uint32_t target:2;
846 uint32_t vol:1; 854 uint32_t is_volatile:1;
847 uint32_t padding0:1; 855 uint32_t padding0:1;
848 uint32_t fault_replay_tex:1; 856 uint32_t fault_replay_tex:1;
849 uint32_t fault_replay_gcc:1; 857 uint32_t fault_replay_gcc:1;
@@ -853,6 +861,10 @@ typedef union {
853 uint32_t page_dir_lo:20; 861 uint32_t page_dir_lo:20;
854 uint32_t page_dir_hi:32; 862 uint32_t page_dir_hi:32;
855 } __attribute__((packed)); 863 } __attribute__((packed));
864 struct {
865 uint32_t pad:12;
866 uint64_t page_dir:52; // Confirmed working on Xavier and tama
867 } __attribute__((packed));
856 uint64_t raw; 868 uint64_t raw;
857} page_dir_config_t; 869} page_dir_config_t;
858 870
@@ -888,6 +900,14 @@ typedef union {
888 The following arrays merely represent different projections of Fig. 1, and 900 The following arrays merely represent different projections of Fig. 1, and
889 only one is strictly needed to reconstruct all the others. However, due to 901 only one is strictly needed to reconstruct all the others. However, due to
890 the complexity of page tables, we include all of these to aid in readability. 902 the complexity of page tables, we include all of these to aid in readability.
903
904 Support: Pascal, Volta, Turing, Ampere, Ada, Ampere, Hopper*, Blackwell*
905 Note: *Hopper introduces Version 3 Page Tables, but is backwards-compatible.
906 The newer version adds a PD4 level to support 57-bit virtual
907 addresses, and slightly shifts the PDE and PTE fields.
908
909 See also: gp100-mmu-format.pdf in open-gpu-doc. In open-gpu-kernel-modules
910 this is synonymously the "NEW" and "VER2" layout.
891*/ 911*/
892// How many nodes/entries per level in V2 of NVIDIA's page table format 912// How many nodes/entries per level in V2 of NVIDIA's page table format
893static const int NV_MMU_PT_V2_SZ[5] = {4, 512, 512, 256, 512}; 913static const int NV_MMU_PT_V2_SZ[5] = {4, 512, 512, 256, 512};
@@ -907,6 +927,12 @@ enum PD_TARGET {
907 PTE_AND_TARGET_SYS_MEM_COHERENT = 5, // b101 927 PTE_AND_TARGET_SYS_MEM_COHERENT = 5, // b101
908 PTE_AND_TARGET_SYS_MEM_NONCOHERENT = 7, // b111 928 PTE_AND_TARGET_SYS_MEM_NONCOHERENT = 7, // b111
909}; 929};
930// The low bit is unset on page directory (PD) targets
931#define IS_PD_TARGET(target) (!(target & 0x1u))
932// Convert from an enum INST_TARGET to an enum PD_TARGET
933#define INST2PD_TARGET(target) ((target & 0x2) ? (target << 1) : (!target) << 1)
934// Convert from an enum V1_PD_TARGET to an enum PD_TARGET
935#define V12PD_TARGET(target) (target << 1)
910static inline const char *pd_target_to_text(enum PD_TARGET t) { 936static inline const char *pd_target_to_text(enum PD_TARGET t) {
911 switch (t) { 937 switch (t) {
912 case PD_AND_TARGET_INVALID: 938 case PD_AND_TARGET_INVALID:
@@ -928,13 +954,10 @@ static inline const char *pd_target_to_text(enum PD_TARGET t) {
928} 954}
929 955
930// Page Directory Entry/Page Table Entry V2 type 956// Page Directory Entry/Page Table Entry V2 type
931// Note: As the meaning of target (bits 2:1) changes depending on if the entry 957// Note: As the meaning of target (bits 2:1) at a PDE-level changes if the
932// is a PTE or not, this combines them into a single target field to 958// entry is a large-page PTE or not. To simply the logic, we combine them
933// simplify comparisons. 959// into a single target field to simplify comparisons.
934// Support: Pascal, Volta, Turing, Ampere, Ada 960#define TARGET_PEER 1
935//
936// V3 introduced with Hopper, but Hopper and Blackwell also support V2
937//
938typedef union { 961typedef union {
939 // Page Directory Entry (PDE) 962 // Page Directory Entry (PDE)
940 struct { 963 struct {
@@ -965,21 +988,74 @@ typedef union {
965 uint64_t raw_w; 988 uint64_t raw_w;
966} page_dir_entry_t; 989} page_dir_entry_t;
967 990
968// Page Directory Entry/Page Table Entry V1 type 991/* GMMU Page Tables Version 1
969// Support: Fermi, Kepler, Maxwell 992 These page tables contain 2 levels and are used in the Fermi, Kepler, and
993 Maxwell architectures to support a 40-bit virtual address space.
994
995 Version 1 Page Tables may be configured to support either 64 KiB or 128 KiB
996 large pages. Table addressing differs between the modes---even if the table
997 contains no large pages. The format for 4 KiB pages in each mode is shown
998 below.
999
1000 V1 of NVIDIA's page table format uses 1 level of PDEs and a level of PTEs.
1001 How the virtual address is sliced to yield an index into each level and a
1002 page offset is shown by Fig 1 and Fig 2 (for 64 KiB and 128 KiB large page
1003 modes respectively).
1004
1005 == Figure 1: 64 KiB mode ==
1006 Page Offset (12 bits) <----------------------------------+
1007 Page Table Entry (PTE) (13 bits) <--------------+ |
1008 Page Directory Entry (PDE) (13 bits) <-+ | |
1009 ^ ^ ^
1010 Virtual address: [39, 25] [24, 12] [11, 0]
1011
1012 == Figure 2: 128 KiB mode ==
1013 Page Offset (12 bits) <----------------------------------+
1014 Page Table Entry (PTE) (14 bits) <--------------+ |
1015 Page Directory Entry (PDE) (12 bits) <-+ | |
1016 ^ ^ ^
1017 Virtual address: [39, 26] [25, 12] [11, 0]
1018
1019
1020 Support: Fermi, Kepler, Maxwell, Pascal*
1021 Note: *Pascal introduces Version 2 Page Tables, but is backwards-compatible.
1022 Note: We only implement the 64-KiB-large-page mode in nvdebug.
1023
1024 See also: mm_gk20a.c in nvgpu (Jetson GPU driver) and kern_gmmu_fmt_gm10x.c
1025 in open-gpu-kernel-modules (open-source NVRM variant). This is
1026 synonymously the "VER1" and unversioned layout in
1027 open-gpu-kernel-modules, with some differences noted in Appdx 1.
1028
1029 == Appdx 1 ==
1030 In open-gpu-kernel-modules, the unversioned MMU layout adds:
1031 - Bit 35: NV_MMU_PTE_LOCK synonym for NV_MMU_PTE_ATOMIC_DISABLE
1032 - Bit 62: NV_MMU_PTE_READ_DISABLE overlapping NV_MMU_PTE_COMPTAGLINE
1033 - Bit 63: NV_MMU_PTE_WRITE_DISABLE overlapping NV_MMU_PTE_COMPTAGLINE
1034 And removes:
1035 - Bit 40, 41, 42, 43 from NV_MMU_PTE_KIND
1036 The PDE layouts are identical. Given that the unversioned defines seem to
1037 predate renaming and/or field extension/relocation, they are likely artifacts
1038 from the page table development process, and have no meaning now.
1039*/
1040// Number of entries in the PDE and PTE levels
1041static const int NV_MMU_PT_V1_SZ[2] = {8192, 8192};
1042// Which bit index is the least significant in indexing each page level
1043static const int NV_MMU_PT_V1_LSB[2] = {25, 12};
1044
1045// V1 Page Directory Entry target
970enum V1_PD_TARGET { 1046enum V1_PD_TARGET {
971 PD_TARGET_INVALID = 0, 1047 PD_TARGET_INVALID = 0,
972 PD_TARGET_VID_MEM = 1, 1048 PD_TARGET_VID_MEM = 1,
973 PD_TARGET_SYS_MEM_COHERENT = 2, 1049 PD_TARGET_SYS_MEM_COHERENT = 2,
974 PD_TARGET_SYS_MEM_NONCOHERENT = 3, 1050 PD_TARGET_SYS_MEM_NONCOHERENT = 3,
975}; 1051};
976// Page Directory Entry (PDE) 1052// V1 Page Directory Entry (PDE)
977typedef union { 1053typedef union {
978// Large page fields 1054// Large page fields
979 struct { 1055 struct {
980// 0:32 1056// 0:32
981 enum V1_PD_TARGET target:2; 1057 enum V1_PD_TARGET target:2;
982 uint32_t padding0:2; 1058 uint32_t padding0:2; // Documented as "PDE_SIZE"?
983 uint64_t addr:28; // May be wider? 1059 uint64_t addr:28; // May be wider?
984// 32:63 1060// 32:63
985 uint32_t padding2:3; 1061 uint32_t padding2:3;
@@ -998,45 +1074,58 @@ typedef union {
998 } __attribute__((packed)); 1074 } __attribute__((packed));
999 uint64_t raw; 1075 uint64_t raw;
1000} page_dir_entry_v1_t; 1076} page_dir_entry_v1_t;
1001// Page Table Entry (PTE) 1077
1002// Reconstructed from info in Jetson nvgpu driver 1078// V1 Page Table Entry (PTE)
1003typedef union { 1079typedef union {
1004 struct { 1080 struct {
1005// 0:32 1081// 0:32
1006 bool is_present:1; 1082 bool is_present:1;
1007 bool is_privileged:1; 1083 bool is_privileged:1;
1008 bool is_readonly:1; 1084 bool is_readonly:1;
1009 uint32_t padding0:1; 1085 bool is_encrypted:1;
1010 uint64_t addr:28; 1086 uint64_t addr:28;
1011// 32:63 1087// 32:63
1012 bool is_volatile:1; 1088 bool is_volatile:1;
1013 enum INST_TARGET:2; 1089 enum INST_TARGET:2;
1014 uint32_t padding1:1; 1090 bool atomics_disabled:1;
1015 uint32_t kind:8; 1091 uint32_t kind:8;
1016 uint32_t comptag:17; 1092 uint32_t comptag:20;
1017 uint32_t padding2:1;
1018 bool is_read_disabled:1;
1019 bool is_write_disabled:1;
1020 } __attribute__((packed)); 1093 } __attribute__((packed));
1021 uint64_t raw; 1094 uint64_t raw;
1022} page_tbl_entry_v1_t; 1095} page_tbl_entry_v1_t;
1023//enum V0_PDE_TYPE {NOT_PRESENT = 0, PAGE_64K = 1, PAGE_16K = 2, PAGE_4K = 3}; 1096
1024//enum V0_PDE_SIZE {PDE_SZ_128K = 0, PDE_SZ_32K = 1, PDE_SZ_16K = 2, PDE_SZ_8K = 3}; 1097/* GMMU Page Tables Version 0
1025//static const int V0_PDE_SIZE2NUM[4] = {128*1024, 32*1024, 16*1024, 8*1024}; 1098 This page table contains 2 levels to support a 40-bit virtual address space,
1026/* PDE V0 (nv50/Tesla) 1099 and is used in the Tesla (2.0?) architecture.
1100
1101 It is unclear what NVIDIA calls this page table layout. It predates V1, so we
1102 call it V0.
1103
1104 See also: https://envytools.readthedocs.io/en/latest/hw/memory/g80-vm.html
1105 */
1106/*
1107// What size pages are in the pointed-to page table?
1108enum V0_PDE_TYPE {NOT_PRESENT = 0, PAGE_64K = 1, PAGE_16K = 2, PAGE_4K = 3};
1109// How large is the pointed-to page table?
1110enum V0_PDE_SIZE {PDE_SZ_128K = 0, PDE_SZ_32K = 1, PDE_SZ_16K = 2, PDE_SZ_8K = 3};
1111// Given a page table size, how many entries does it have?
1112static const int V0_PDE_SIZE2NUM[4] = {128*1024, 32*1024, 16*1024, 8*1024};
1113
1114// PDE V0 (nv50/Tesla)
1027typedef union { 1115typedef union {
1028 struct { 1116 struct {
1029 enum V1_PDE_TYPE type:2; 1117 enum V0_PDE_TYPE type:2;
1030 enum INST_TARGET target:2; 1118 enum INST_TARGET target:2;
1031 uint32_t padding0:1; 1119 uint32_t padding0:1;
1032 enum V1_PDE_SIZE sublevel_size:2; 1120 enum V0_PDE_SIZE sublevel_size:2;
1033 uint32_t padding1:5; 1121 uint32_t padding1:5;
1034 uint32_t addr:28; 1122 uint32_t addr:28;
1035 uint32_t padding2:24; 1123 uint32_t padding2:24;
1036 } __attribute__((packed)); 1124 } __attribute__((packed));
1037 uint64_t raw; 1125 uint64_t raw;
1038} page_dir_entry_v1_t;*/ 1126} page_dir_entry_v0_t;
1039/* PTE V0 (nv50) 1127
1128// PTE V0 (nv50) for small pages
1040typedef union { 1129typedef union {
1041 struct { 1130 struct {
1042 bool is_present:1; 1131 bool is_present:1;
@@ -1055,7 +1144,8 @@ typedef union {
1055 uint32_t padding5:1; 1144 uint32_t padding5:1;
1056 } __attribute__((packed)); 1145 } __attribute__((packed));
1057 uint64_t raw; 1146 uint64_t raw;
1058} page_tbl_entry_v1_t;*/ 1147} page_tbl_entry_v0_t;
1148*/
1059 1149
1060// TODO(jbakita): Maybe put the above GPU types in a different file. 1150// TODO(jbakita): Maybe put the above GPU types in a different file.
1061 1151
@@ -1077,6 +1167,8 @@ struct nvdebug_state {
1077 struct gk20a *g; 1167 struct gk20a *g;
1078 // Pointer to PCI device needed for pci_iounmap 1168 // Pointer to PCI device needed for pci_iounmap
1079 struct pci_dev *pcid; 1169 struct pci_dev *pcid;
1170 // Pointer to generic device struct (both platform and pcie devices)
1171 struct device *dev;
1080}; 1172};
1081 1173
1082/*const struct runlist_funcs { 1174/*const struct runlist_funcs {
@@ -1152,13 +1244,11 @@ int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id);
1152void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy); 1244void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy);
1153uint64_t search_page_directory( 1245uint64_t search_page_directory(
1154 struct nvdebug_state *g, 1246 struct nvdebug_state *g,
1155 void __iomem *pde_offset, 1247 page_dir_config_t pd_config,
1156 void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t),
1157 uint64_t addr_to_find); 1248 uint64_t addr_to_find);
1158uint64_t search_v1_page_directory( 1249uint64_t search_v1_page_directory(
1159 struct nvdebug_state *g, 1250 struct nvdebug_state *g,
1160 void __iomem *pde_offset, 1251 page_dir_config_t pd_config,
1161 void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t),
1162 uint64_t addr_to_find); 1252 uint64_t addr_to_find);
1163 1253
1164 1254
@@ -1252,4 +1342,4 @@ static inline void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) {
1252} 1342}
1253// Defined in bus.c 1343// Defined in bus.c
1254int addr_to_pramin_mut(struct nvdebug_state *g, uint64_t addr, enum INST_TARGET target); 1344int addr_to_pramin_mut(struct nvdebug_state *g, uint64_t addr, enum INST_TARGET target);
1255int get_bar2_pdb(struct nvdebug_state *g, void **pdb, bool *is_v2_pdb); 1345int get_bar2_pdb(struct nvdebug_state *g, page_dir_config_t* pd);