diff options
-rw-r--r-- | mmu.c | 54 | ||||
-rw-r--r-- | nvdebug.h | 57 |
2 files changed, 56 insertions, 55 deletions
@@ -39,46 +39,6 @@ uint32_t vram2PRAMIN(struct nvdebug_state *g, uint64_t addr) { | |||
39 | return addr - pramin_base_va; | 39 | return addr - pramin_base_va; |
40 | } | 40 | } |
41 | 41 | ||
42 | /* NVIDIA GMMU (GPU Memory Management Unit) uses page tables that are mostly | ||
43 | straight-forward starting with Pascal ("page table version 2"), except for a | ||
44 | few quirks (like 16-byte PDE0 entries, but all other entries are 8 bytes). | ||
45 | |||
46 | All you really need to know is that any given Page Directory Entry (PDE) | ||
47 | contains a pointer to the start of a 4k page densely filled with PDEs or Page | ||
48 | Table Entries (PTEs). | ||
49 | |||
50 | == Page Table Refresher == | ||
51 | Page tables convert virtual addresses to physical addresses, and they do this | ||
52 | via a tree structure. Leafs (PTEs) contain a physical address, and the path | ||
53 | from root to leaf is defined by the virtual address. Non-leaf nodes are PDEs. | ||
54 | When decending, the virtual address is sliced into pieces, and one slice is | ||
55 | used at each level (as an index) to select the next-visited node (in level+1). | ||
56 | |||
57 | V2 of NVIDIA's page table format uses 4 levels of PDEs and a final level of | ||
58 | PTEs. How the virtual address is sliced to yield an index into each level and | ||
59 | a page offset is shown by Fig 1. | ||
60 | |||
61 | == Figure 1 == | ||
62 | Page Offset (12 bits) <---------------------------------------+ | ||
63 | Page Table Entry (PTE) (9 bits) <--------------------+ | | ||
64 | Page Directory Entry (PDE) 0 (8 bits) <-----+ | | | ||
65 | PDE1 (8 bits) <--------------------+ | | | | ||
66 | PDE2 (8 bits) <-----------+ | | | | | ||
67 | PDE3 (2 bits) <--+ | | | | | | ||
68 | ^ ^ ^ ^ ^ ^ | ||
69 | Virtual addr: [49, 47] [46, 38] [37, 29] [28, 21] [20, 12] [11, 0] | ||
70 | |||
71 | The following arrays merely represent different projections of Fig. 1, and | ||
72 | only one is strictly needed to reconstruct all the others. However, due to | ||
73 | the complexity of page tables, we include all of these to aid in readability. | ||
74 | */ | ||
75 | // How many nodes/entries per level in V2 of NVIDIA's page table format | ||
76 | static const int NV_MMU_PT_V2_SZ[5] = {4, 512, 512, 256, 512}; | ||
77 | // Size in bytes of an entry at a particular level | ||
78 | static const int NV_MMU_PT_V2_ENTRY_SZ[5] = {8, 8, 8, 16, 8}; | ||
79 | // Which bit index is the least significant in indexing each page level | ||
80 | static const int NV_MMU_PT_V2_LSB[5] = {47, 38, 29, 21, 12}; | ||
81 | |||
82 | // Convert a GPU physical address to CPU virtual address via the PRAMIN window | 42 | // Convert a GPU physical address to CPU virtual address via the PRAMIN window |
83 | void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy) { | 43 | void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy) { |
84 | return g->regs + NV_PRAMIN + vram2PRAMIN(g, phy); | 44 | return g->regs + NV_PRAMIN + vram2PRAMIN(g, phy); |
@@ -90,6 +50,7 @@ void __iomem *off2BAR2(struct nvdebug_state* g, uint32_t off) { | |||
90 | } | 50 | } |
91 | */ | 51 | */ |
92 | 52 | ||
53 | // Internal helper for search_page_directory(). | ||
93 | uint64_t search_page_directory_subtree(struct nvdebug_state *g, | 54 | uint64_t search_page_directory_subtree(struct nvdebug_state *g, |
94 | void __iomem *pde_offset, | 55 | void __iomem *pde_offset, |
95 | void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), | 56 | void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), |
@@ -114,7 +75,7 @@ uint64_t search_page_directory_subtree(struct nvdebug_state *g, | |||
114 | } | 75 | } |
115 | printk(KERN_INFO "[nvdebug] Found PDE pointing to %llx in ap '%d' at lvl %d (raw: %x)\n", ((u64)entry.addr) << 12, entry.target, level, entry.raw); | 76 | printk(KERN_INFO "[nvdebug] Found PDE pointing to %llx in ap '%d' at lvl %d (raw: %x)\n", ((u64)entry.addr) << 12, entry.target, level, entry.raw); |
116 | // Depth-first search of the page table | 77 | // Depth-first search of the page table |
117 | for (i = 0; i < NV_MMU_PT_V2_SZ[level]; i++) { | 78 | for (i = 0; i < NV_MMU_PT_V2_SZ[level + 1]; i++) { |
118 | next = off2addr(g, ((uint64_t)entry.addr << 12) + NV_MMU_PT_V2_ENTRY_SZ[level + 1] * i); | 79 | next = off2addr(g, ((uint64_t)entry.addr << 12) + NV_MMU_PT_V2_ENTRY_SZ[level + 1] * i); |
119 | // off2addr can fail | 80 | // off2addr can fail |
120 | if (!next) { | 81 | if (!next) { |
@@ -128,12 +89,17 @@ uint64_t search_page_directory_subtree(struct nvdebug_state *g, | |||
128 | return 0; | 89 | return 0; |
129 | } | 90 | } |
130 | 91 | ||
131 | /* Search a page directory of the GPU MMU | 92 | /* GPU Physical address -> Virtual address ("reverse" translation) |
93 | |||
94 | Depth-first search a page directory of the GPU MMU for where a particular | ||
95 | physical address is mapped. Upon finding a mapping, the virtual address is | ||
96 | returned. | ||
97 | |||
132 | @param pde_offset Dereferenceable pointer to the start of the PDE3 entries | 98 | @param pde_offset Dereferenceable pointer to the start of the PDE3 entries |
133 | @param off2addr Func to converts VRAM phys addresses to valid CPU VAs | 99 | @param off2addr Func to convert VRAM phys addresses to valid CPU VAs |
134 | @param addr_to_find Physical address to reconstruct the virtual address of | 100 | @param addr_to_find Physical address to reconstruct the virtual address of |
135 | @return 0 on error, otherwise the virtual address at which addr_to_find is | 101 | @return 0 on error, otherwise the virtual address at which addr_to_find is |
136 | mapped into by this page table. | 102 | mapped into by this page table. (Zero is not a valid virtual address) |
137 | */ | 103 | */ |
138 | uint64_t search_page_directory(struct nvdebug_state *g, | 104 | uint64_t search_page_directory(struct nvdebug_state *g, |
139 | void __iomem *pde_offset, | 105 | void __iomem *pde_offset, |
@@ -856,13 +856,46 @@ typedef union { | |||
856 | uint64_t raw; | 856 | uint64_t raw; |
857 | } page_dir_config_t; | 857 | } page_dir_config_t; |
858 | 858 | ||
859 | /* Page directory entry | 859 | /* NVIDIA GMMU (GPU Memory Management Unit) uses page tables that are mostly |
860 | 860 | straight-forward starting with Pascal ("page table version 2"), except for a | |
861 | Note: Format changed with Pascal (how?) | 861 | few quirks (like 16-byte PDE0 entries, but all other entries are 8 bytes). |
862 | 862 | ||
863 | Support: Pascal, Volta, Turing, Ampere, Ada | 863 | All you really need to know is that any given Page Directory Entry (PDE) |
864 | contains a pointer to the start of a 4k page densely filled with PDEs or Page | ||
865 | Table Entries (PTEs). | ||
866 | |||
867 | == Page Table Refresher == | ||
868 | Page tables convert virtual addresses to physical addresses, and they do this | ||
869 | via a tree structure. Leafs (PTEs) contain a physical address, and the path | ||
870 | from root to leaf is defined by the virtual address. Non-leaf nodes are PDEs. | ||
871 | When decending, the virtual address is sliced into pieces, and one slice is | ||
872 | used at each level (as an index) to select the next-visited node (in level+1). | ||
873 | |||
874 | V2 of NVIDIA's page table format uses 4 levels of PDEs and a final level of | ||
875 | PTEs. How the virtual address is sliced to yield an index into each level and | ||
876 | a page offset is shown by Fig 1. | ||
877 | |||
878 | == Figure 1 == | ||
879 | Page Offset (12 bits) <---------------------------------------+ | ||
880 | Page Table Entry (PTE) (9 bits) <--------------------+ | | ||
881 | Page Directory Entry (PDE) 0 (8 bits) <-----+ | | | ||
882 | PDE1 (9 bits) <--------------------+ | | | | ||
883 | PDE2 (9 bits) <-----------+ | | | | | ||
884 | PDE3 (2 bits) <--+ | | | | | | ||
885 | ^ ^ ^ ^ ^ ^ | ||
886 | Virtual addr: [48, 47] [46, 38] [37, 29] [28, 21] [20, 12] [11, 0] | ||
887 | |||
888 | The following arrays merely represent different projections of Fig. 1, and | ||
889 | only one is strictly needed to reconstruct all the others. However, due to | ||
890 | the complexity of page tables, we include all of these to aid in readability. | ||
864 | */ | 891 | */ |
865 | // FIXME: PDE/PTEs are actually 64 bits =S | 892 | // How many nodes/entries per level in V2 of NVIDIA's page table format |
893 | static const int NV_MMU_PT_V2_SZ[5] = {4, 512, 512, 256, 512}; | ||
894 | // Size in bytes of an entry at a particular level | ||
895 | static const int NV_MMU_PT_V2_ENTRY_SZ[5] = {8, 8, 8, 16, 8}; | ||
896 | // Which bit index is the least significant in indexing each page level | ||
897 | static const int NV_MMU_PT_V2_LSB[5] = {47, 38, 29, 21, 12}; | ||
898 | |||
866 | // Important: Aperture keys are different with PDEs | 899 | // Important: Aperture keys are different with PDEs |
867 | enum PD_TARGET { | 900 | enum PD_TARGET { |
868 | PD_AND_TARGET_INVALID = 0, // b000 | 901 | PD_AND_TARGET_INVALID = 0, // b000 |
@@ -894,25 +927,27 @@ static inline const char *pd_target_to_text(enum PD_TARGET t) { | |||
894 | } | 927 | } |
895 | } | 928 | } |
896 | 929 | ||
897 | // PDE/PTE V2 type | 930 | // Page Directory Entry/Page Table Entry V2 type |
898 | // Note: As the meaning of target (bits 2:1) changes depending on if the entry | 931 | // Note: As the meaning of target (bits 2:1) changes depending on if the entry |
899 | // is a PTE or not, this combines them into a single target field to | 932 | // is a PTE or not, this combines them into a single target field to |
900 | // simplify comparisons. | 933 | // simplify comparisons. |
901 | // Support: Pascal, Volta, Turing, Ampere, Ada | 934 | // Support: Pascal, Volta, Turing, Ampere, Ada |
902 | // | 935 | // |
903 | // V3 introduced with Hopper, but Hopper and Blackwell also support V2 | 936 | // V3 introduced with Hopper, but Hopper and Blackwell also support V2 |
937 | // | ||
938 | // FIXME: This structure is 32 bits, but PDE/PTEs are actually 64 bits! | ||
904 | typedef union { | 939 | typedef union { |
905 | // Page Directory Entry (PDE) | 940 | // Page Directory Entry (PDE) |
906 | struct { | 941 | struct { |
907 | bool is_pte:1; | 942 | enum PD_TARGET target:3; |
908 | uint32_t __target:2; | ||
909 | bool is_volatile:1; | 943 | bool is_volatile:1; |
910 | uint32_t padding1:4; | 944 | uint32_t padding1:4; |
911 | uint32_t addr:24; | 945 | uint32_t addr:24; |
912 | } __attribute__((packed)); | 946 | } __attribute__((packed)); |
913 | // Page Table Entry (PTE) | 947 | // Page Table Entry (PTE) |
914 | struct { | 948 | struct { |
915 | enum PD_TARGET target:3; | 949 | bool is_pte:1; |
950 | enum INST_TARGET aperture:2; | ||
916 | uint32_t __is_volatile:1; | 951 | uint32_t __is_volatile:1; |
917 | bool is_encrypted:1; | 952 | bool is_encrypted:1; |
918 | bool is_privileged:1; | 953 | bool is_privileged:1; |
@@ -923,7 +958,7 @@ typedef union { | |||
923 | uint32_t raw; | 958 | uint32_t raw; |
924 | } page_dir_entry_t; | 959 | } page_dir_entry_t; |
925 | 960 | ||
926 | // PDE/PTE V1 types | 961 | // Page Directory Entry/Page Table Entry V1 type |
927 | // Support: Fermi, Kepler, Maxwell | 962 | // Support: Fermi, Kepler, Maxwell |
928 | enum V1_PD_TARGET { | 963 | enum V1_PD_TARGET { |
929 | PD_TARGET_INVALID = 0, | 964 | PD_TARGET_INVALID = 0, |