diff options
Diffstat (limited to 'nvdebug.h')
-rw-r--r-- | nvdebug.h | 158 |
1 files changed, 124 insertions, 34 deletions
@@ -818,6 +818,14 @@ typedef union { | |||
818 | } bar_config_block_t; | 818 | } bar_config_block_t; |
819 | 819 | ||
820 | /* BAR0 PRAMIN (Private RAM Instance) window configuration | 820 | /* BAR0 PRAMIN (Private RAM Instance) window configuration |
821 | One of the oldest ways to access video memory on NVIDIA GPUs is by using | ||
822 | a configurable 1MB window into VRAM which is mapped into BAR0 (register) | ||
823 | space starting at offset NV_PRAMIN. This is still supported on NVIDIA GPUs | ||
824 | and appear to be used today to bootstrap page table configuration. | ||
825 | |||
826 | Why is it mapped at a location called NVIDIA Private RAM Instance? Because | ||
827 | this used to point to the entirety of intance RAM, which was seperate from | ||
828 | VRAM on older NVIDIA GPUs. | ||
821 | 829 | ||
822 | BASE : Base of window >> 16 in [TARGET] virtual address space | 830 | BASE : Base of window >> 16 in [TARGET] virtual address space |
823 | TARGET : Which address space BASE points into | 831 | TARGET : Which address space BASE points into |
@@ -843,7 +851,7 @@ typedef union { | |||
843 | typedef union { | 851 | typedef union { |
844 | struct { | 852 | struct { |
845 | uint32_t target:2; | 853 | uint32_t target:2; |
846 | uint32_t vol:1; | 854 | uint32_t is_volatile:1; |
847 | uint32_t padding0:1; | 855 | uint32_t padding0:1; |
848 | uint32_t fault_replay_tex:1; | 856 | uint32_t fault_replay_tex:1; |
849 | uint32_t fault_replay_gcc:1; | 857 | uint32_t fault_replay_gcc:1; |
@@ -853,6 +861,10 @@ typedef union { | |||
853 | uint32_t page_dir_lo:20; | 861 | uint32_t page_dir_lo:20; |
854 | uint32_t page_dir_hi:32; | 862 | uint32_t page_dir_hi:32; |
855 | } __attribute__((packed)); | 863 | } __attribute__((packed)); |
864 | struct { | ||
865 | uint32_t pad:12; | ||
866 | uint64_t page_dir:52; // Confirmed working on Xavier and tama | ||
867 | } __attribute__((packed)); | ||
856 | uint64_t raw; | 868 | uint64_t raw; |
857 | } page_dir_config_t; | 869 | } page_dir_config_t; |
858 | 870 | ||
@@ -888,6 +900,14 @@ typedef union { | |||
888 | The following arrays merely represent different projections of Fig. 1, and | 900 | The following arrays merely represent different projections of Fig. 1, and |
889 | only one is strictly needed to reconstruct all the others. However, due to | 901 | only one is strictly needed to reconstruct all the others. However, due to |
890 | the complexity of page tables, we include all of these to aid in readability. | 902 | the complexity of page tables, we include all of these to aid in readability. |
903 | |||
904 | Support: Pascal, Volta, Turing, Ampere, Ada, Ampere, Hopper*, Blackwell* | ||
905 | Note: *Hopper introduces Version 3 Page Tables, but is backwards-compatible. | ||
906 | The newer version adds a PD4 level to support 57-bit virtual | ||
907 | addresses, and slightly shifts the PDE and PTE fields. | ||
908 | |||
909 | See also: gp100-mmu-format.pdf in open-gpu-doc. In open-gpu-kernel-modules | ||
910 | this is synonymously the "NEW" and "VER2" layout. | ||
891 | */ | 911 | */ |
892 | // How many nodes/entries per level in V2 of NVIDIA's page table format | 912 | // How many nodes/entries per level in V2 of NVIDIA's page table format |
893 | static const int NV_MMU_PT_V2_SZ[5] = {4, 512, 512, 256, 512}; | 913 | static const int NV_MMU_PT_V2_SZ[5] = {4, 512, 512, 256, 512}; |
@@ -907,6 +927,12 @@ enum PD_TARGET { | |||
907 | PTE_AND_TARGET_SYS_MEM_COHERENT = 5, // b101 | 927 | PTE_AND_TARGET_SYS_MEM_COHERENT = 5, // b101 |
908 | PTE_AND_TARGET_SYS_MEM_NONCOHERENT = 7, // b111 | 928 | PTE_AND_TARGET_SYS_MEM_NONCOHERENT = 7, // b111 |
909 | }; | 929 | }; |
930 | // The low bit is unset on page directory (PD) targets | ||
931 | #define IS_PD_TARGET(target) (!(target & 0x1u)) | ||
932 | // Convert from an enum INST_TARGET to an enum PD_TARGET | ||
933 | #define INST2PD_TARGET(target) ((target & 0x2) ? (target << 1) : (!target) << 1) | ||
934 | // Convert from an enum V1_PD_TARGET to an enum PD_TARGET | ||
935 | #define V12PD_TARGET(target) (target << 1) | ||
910 | static inline const char *pd_target_to_text(enum PD_TARGET t) { | 936 | static inline const char *pd_target_to_text(enum PD_TARGET t) { |
911 | switch (t) { | 937 | switch (t) { |
912 | case PD_AND_TARGET_INVALID: | 938 | case PD_AND_TARGET_INVALID: |
@@ -928,13 +954,10 @@ static inline const char *pd_target_to_text(enum PD_TARGET t) { | |||
928 | } | 954 | } |
929 | 955 | ||
930 | // Page Directory Entry/Page Table Entry V2 type | 956 | // Page Directory Entry/Page Table Entry V2 type |
931 | // Note: As the meaning of target (bits 2:1) changes depending on if the entry | 957 | // Note: As the meaning of target (bits 2:1) at a PDE-level changes if the |
932 | // is a PTE or not, this combines them into a single target field to | 958 | // entry is a large-page PTE or not. To simply the logic, we combine them |
933 | // simplify comparisons. | 959 | // into a single target field to simplify comparisons. |
934 | // Support: Pascal, Volta, Turing, Ampere, Ada | 960 | #define TARGET_PEER 1 |
935 | // | ||
936 | // V3 introduced with Hopper, but Hopper and Blackwell also support V2 | ||
937 | // | ||
938 | typedef union { | 961 | typedef union { |
939 | // Page Directory Entry (PDE) | 962 | // Page Directory Entry (PDE) |
940 | struct { | 963 | struct { |
@@ -965,21 +988,74 @@ typedef union { | |||
965 | uint64_t raw_w; | 988 | uint64_t raw_w; |
966 | } page_dir_entry_t; | 989 | } page_dir_entry_t; |
967 | 990 | ||
968 | // Page Directory Entry/Page Table Entry V1 type | 991 | /* GMMU Page Tables Version 1 |
969 | // Support: Fermi, Kepler, Maxwell | 992 | These page tables contain 2 levels and are used in the Fermi, Kepler, and |
993 | Maxwell architectures to support a 40-bit virtual address space. | ||
994 | |||
995 | Version 1 Page Tables may be configured to support either 64 KiB or 128 KiB | ||
996 | large pages. Table addressing differs between the modes---even if the table | ||
997 | contains no large pages. The format for 4 KiB pages in each mode is shown | ||
998 | below. | ||
999 | |||
1000 | V1 of NVIDIA's page table format uses 1 level of PDEs and a level of PTEs. | ||
1001 | How the virtual address is sliced to yield an index into each level and a | ||
1002 | page offset is shown by Fig 1 and Fig 2 (for 64 KiB and 128 KiB large page | ||
1003 | modes respectively). | ||
1004 | |||
1005 | == Figure 1: 64 KiB mode == | ||
1006 | Page Offset (12 bits) <----------------------------------+ | ||
1007 | Page Table Entry (PTE) (13 bits) <--------------+ | | ||
1008 | Page Directory Entry (PDE) (13 bits) <-+ | | | ||
1009 | ^ ^ ^ | ||
1010 | Virtual address: [39, 25] [24, 12] [11, 0] | ||
1011 | |||
1012 | == Figure 2: 128 KiB mode == | ||
1013 | Page Offset (12 bits) <----------------------------------+ | ||
1014 | Page Table Entry (PTE) (14 bits) <--------------+ | | ||
1015 | Page Directory Entry (PDE) (12 bits) <-+ | | | ||
1016 | ^ ^ ^ | ||
1017 | Virtual address: [39, 26] [25, 12] [11, 0] | ||
1018 | |||
1019 | |||
1020 | Support: Fermi, Kepler, Maxwell, Pascal* | ||
1021 | Note: *Pascal introduces Version 2 Page Tables, but is backwards-compatible. | ||
1022 | Note: We only implement the 64-KiB-large-page mode in nvdebug. | ||
1023 | |||
1024 | See also: mm_gk20a.c in nvgpu (Jetson GPU driver) and kern_gmmu_fmt_gm10x.c | ||
1025 | in open-gpu-kernel-modules (open-source NVRM variant). This is | ||
1026 | synonymously the "VER1" and unversioned layout in | ||
1027 | open-gpu-kernel-modules, with some differences noted in Appdx 1. | ||
1028 | |||
1029 | == Appdx 1 == | ||
1030 | In open-gpu-kernel-modules, the unversioned MMU layout adds: | ||
1031 | - Bit 35: NV_MMU_PTE_LOCK synonym for NV_MMU_PTE_ATOMIC_DISABLE | ||
1032 | - Bit 62: NV_MMU_PTE_READ_DISABLE overlapping NV_MMU_PTE_COMPTAGLINE | ||
1033 | - Bit 63: NV_MMU_PTE_WRITE_DISABLE overlapping NV_MMU_PTE_COMPTAGLINE | ||
1034 | And removes: | ||
1035 | - Bit 40, 41, 42, 43 from NV_MMU_PTE_KIND | ||
1036 | The PDE layouts are identical. Given that the unversioned defines seem to | ||
1037 | predate renaming and/or field extension/relocation, they are likely artifacts | ||
1038 | from the page table development process, and have no meaning now. | ||
1039 | */ | ||
1040 | // Number of entries in the PDE and PTE levels | ||
1041 | static const int NV_MMU_PT_V1_SZ[2] = {8192, 8192}; | ||
1042 | // Which bit index is the least significant in indexing each page level | ||
1043 | static const int NV_MMU_PT_V1_LSB[2] = {25, 12}; | ||
1044 | |||
1045 | // V1 Page Directory Entry target | ||
970 | enum V1_PD_TARGET { | 1046 | enum V1_PD_TARGET { |
971 | PD_TARGET_INVALID = 0, | 1047 | PD_TARGET_INVALID = 0, |
972 | PD_TARGET_VID_MEM = 1, | 1048 | PD_TARGET_VID_MEM = 1, |
973 | PD_TARGET_SYS_MEM_COHERENT = 2, | 1049 | PD_TARGET_SYS_MEM_COHERENT = 2, |
974 | PD_TARGET_SYS_MEM_NONCOHERENT = 3, | 1050 | PD_TARGET_SYS_MEM_NONCOHERENT = 3, |
975 | }; | 1051 | }; |
976 | // Page Directory Entry (PDE) | 1052 | // V1 Page Directory Entry (PDE) |
977 | typedef union { | 1053 | typedef union { |
978 | // Large page fields | 1054 | // Large page fields |
979 | struct { | 1055 | struct { |
980 | // 0:32 | 1056 | // 0:32 |
981 | enum V1_PD_TARGET target:2; | 1057 | enum V1_PD_TARGET target:2; |
982 | uint32_t padding0:2; | 1058 | uint32_t padding0:2; // Documented as "PDE_SIZE"? |
983 | uint64_t addr:28; // May be wider? | 1059 | uint64_t addr:28; // May be wider? |
984 | // 32:63 | 1060 | // 32:63 |
985 | uint32_t padding2:3; | 1061 | uint32_t padding2:3; |
@@ -998,45 +1074,58 @@ typedef union { | |||
998 | } __attribute__((packed)); | 1074 | } __attribute__((packed)); |
999 | uint64_t raw; | 1075 | uint64_t raw; |
1000 | } page_dir_entry_v1_t; | 1076 | } page_dir_entry_v1_t; |
1001 | // Page Table Entry (PTE) | 1077 | |
1002 | // Reconstructed from info in Jetson nvgpu driver | 1078 | // V1 Page Table Entry (PTE) |
1003 | typedef union { | 1079 | typedef union { |
1004 | struct { | 1080 | struct { |
1005 | // 0:32 | 1081 | // 0:32 |
1006 | bool is_present:1; | 1082 | bool is_present:1; |
1007 | bool is_privileged:1; | 1083 | bool is_privileged:1; |
1008 | bool is_readonly:1; | 1084 | bool is_readonly:1; |
1009 | uint32_t padding0:1; | 1085 | bool is_encrypted:1; |
1010 | uint64_t addr:28; | 1086 | uint64_t addr:28; |
1011 | // 32:63 | 1087 | // 32:63 |
1012 | bool is_volatile:1; | 1088 | bool is_volatile:1; |
1013 | enum INST_TARGET:2; | 1089 | enum INST_TARGET:2; |
1014 | uint32_t padding1:1; | 1090 | bool atomics_disabled:1; |
1015 | uint32_t kind:8; | 1091 | uint32_t kind:8; |
1016 | uint32_t comptag:17; | 1092 | uint32_t comptag:20; |
1017 | uint32_t padding2:1; | ||
1018 | bool is_read_disabled:1; | ||
1019 | bool is_write_disabled:1; | ||
1020 | } __attribute__((packed)); | 1093 | } __attribute__((packed)); |
1021 | uint64_t raw; | 1094 | uint64_t raw; |
1022 | } page_tbl_entry_v1_t; | 1095 | } page_tbl_entry_v1_t; |
1023 | //enum V0_PDE_TYPE {NOT_PRESENT = 0, PAGE_64K = 1, PAGE_16K = 2, PAGE_4K = 3}; | 1096 | |
1024 | //enum V0_PDE_SIZE {PDE_SZ_128K = 0, PDE_SZ_32K = 1, PDE_SZ_16K = 2, PDE_SZ_8K = 3}; | 1097 | /* GMMU Page Tables Version 0 |
1025 | //static const int V0_PDE_SIZE2NUM[4] = {128*1024, 32*1024, 16*1024, 8*1024}; | 1098 | This page table contains 2 levels to support a 40-bit virtual address space, |
1026 | /* PDE V0 (nv50/Tesla) | 1099 | and is used in the Tesla (2.0?) architecture. |
1100 | |||
1101 | It is unclear what NVIDIA calls this page table layout. It predates V1, so we | ||
1102 | call it V0. | ||
1103 | |||
1104 | See also: https://envytools.readthedocs.io/en/latest/hw/memory/g80-vm.html | ||
1105 | */ | ||
1106 | /* | ||
1107 | // What size pages are in the pointed-to page table? | ||
1108 | enum V0_PDE_TYPE {NOT_PRESENT = 0, PAGE_64K = 1, PAGE_16K = 2, PAGE_4K = 3}; | ||
1109 | // How large is the pointed-to page table? | ||
1110 | enum V0_PDE_SIZE {PDE_SZ_128K = 0, PDE_SZ_32K = 1, PDE_SZ_16K = 2, PDE_SZ_8K = 3}; | ||
1111 | // Given a page table size, how many entries does it have? | ||
1112 | static const int V0_PDE_SIZE2NUM[4] = {128*1024, 32*1024, 16*1024, 8*1024}; | ||
1113 | |||
1114 | // PDE V0 (nv50/Tesla) | ||
1027 | typedef union { | 1115 | typedef union { |
1028 | struct { | 1116 | struct { |
1029 | enum V1_PDE_TYPE type:2; | 1117 | enum V0_PDE_TYPE type:2; |
1030 | enum INST_TARGET target:2; | 1118 | enum INST_TARGET target:2; |
1031 | uint32_t padding0:1; | 1119 | uint32_t padding0:1; |
1032 | enum V1_PDE_SIZE sublevel_size:2; | 1120 | enum V0_PDE_SIZE sublevel_size:2; |
1033 | uint32_t padding1:5; | 1121 | uint32_t padding1:5; |
1034 | uint32_t addr:28; | 1122 | uint32_t addr:28; |
1035 | uint32_t padding2:24; | 1123 | uint32_t padding2:24; |
1036 | } __attribute__((packed)); | 1124 | } __attribute__((packed)); |
1037 | uint64_t raw; | 1125 | uint64_t raw; |
1038 | } page_dir_entry_v1_t;*/ | 1126 | } page_dir_entry_v0_t; |
1039 | /* PTE V0 (nv50) | 1127 | |
1128 | // PTE V0 (nv50) for small pages | ||
1040 | typedef union { | 1129 | typedef union { |
1041 | struct { | 1130 | struct { |
1042 | bool is_present:1; | 1131 | bool is_present:1; |
@@ -1055,7 +1144,8 @@ typedef union { | |||
1055 | uint32_t padding5:1; | 1144 | uint32_t padding5:1; |
1056 | } __attribute__((packed)); | 1145 | } __attribute__((packed)); |
1057 | uint64_t raw; | 1146 | uint64_t raw; |
1058 | } page_tbl_entry_v1_t;*/ | 1147 | } page_tbl_entry_v0_t; |
1148 | */ | ||
1059 | 1149 | ||
1060 | // TODO(jbakita): Maybe put the above GPU types in a different file. | 1150 | // TODO(jbakita): Maybe put the above GPU types in a different file. |
1061 | 1151 | ||
@@ -1077,6 +1167,8 @@ struct nvdebug_state { | |||
1077 | struct gk20a *g; | 1167 | struct gk20a *g; |
1078 | // Pointer to PCI device needed for pci_iounmap | 1168 | // Pointer to PCI device needed for pci_iounmap |
1079 | struct pci_dev *pcid; | 1169 | struct pci_dev *pcid; |
1170 | // Pointer to generic device struct (both platform and pcie devices) | ||
1171 | struct device *dev; | ||
1080 | }; | 1172 | }; |
1081 | 1173 | ||
1082 | /*const struct runlist_funcs { | 1174 | /*const struct runlist_funcs { |
@@ -1152,13 +1244,11 @@ int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id); | |||
1152 | void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy); | 1244 | void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy); |
1153 | uint64_t search_page_directory( | 1245 | uint64_t search_page_directory( |
1154 | struct nvdebug_state *g, | 1246 | struct nvdebug_state *g, |
1155 | void __iomem *pde_offset, | 1247 | page_dir_config_t pd_config, |
1156 | void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), | ||
1157 | uint64_t addr_to_find); | 1248 | uint64_t addr_to_find); |
1158 | uint64_t search_v1_page_directory( | 1249 | uint64_t search_v1_page_directory( |
1159 | struct nvdebug_state *g, | 1250 | struct nvdebug_state *g, |
1160 | void __iomem *pde_offset, | 1251 | page_dir_config_t pd_config, |
1161 | void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), | ||
1162 | uint64_t addr_to_find); | 1252 | uint64_t addr_to_find); |
1163 | 1253 | ||
1164 | 1254 | ||
@@ -1252,4 +1342,4 @@ static inline void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) { | |||
1252 | } | 1342 | } |
1253 | // Defined in bus.c | 1343 | // Defined in bus.c |
1254 | int addr_to_pramin_mut(struct nvdebug_state *g, uint64_t addr, enum INST_TARGET target); | 1344 | int addr_to_pramin_mut(struct nvdebug_state *g, uint64_t addr, enum INST_TARGET target); |
1255 | int get_bar2_pdb(struct nvdebug_state *g, void **pdb, bool *is_v2_pdb); | 1345 | int get_bar2_pdb(struct nvdebug_state *g, page_dir_config_t* pd); |