diff options
author | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-10 17:23:46 -0400 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2024-09-10 17:23:46 -0400 |
commit | 0b1c304e53b88fe628d350d1380a88317f071e69 (patch) | |
tree | 5c1d0fcb451313b57e23841dc189744f2d9085ef | |
parent | 16163d89b564029cabf4842815590b0a484cc172 (diff) |
Documentation and style cleanup. No functional changes.
-rw-r--r-- | nvdebug.h | 202 |
1 files changed, 116 insertions, 86 deletions
@@ -10,6 +10,9 @@ | |||
10 | * | 10 | * |
11 | * This function should not depend on any Linux-internal headers, and may be | 11 | * This function should not depend on any Linux-internal headers, and may be |
12 | * included outside of nvdebug. | 12 | * included outside of nvdebug. |
13 | * | ||
14 | * Style: This file uses up to 82-character lines to accomodate 2-character | ||
15 | * indented quotes from open-gpu-doc without reflowing. | ||
13 | */ | 16 | */ |
14 | #include <linux/types.h> | 17 | #include <linux/types.h> |
15 | 18 | ||
@@ -53,7 +56,7 @@ struct gk20a; | |||
53 | on either runqueue.)" | 56 | on either runqueue.)" |
54 | 57 | ||
55 | INST_PTR_LO : lower 20 bits of the 4k-aligned instance block pointer | 58 | INST_PTR_LO : lower 20 bits of the 4k-aligned instance block pointer |
56 | INST_PTR_HI : upper 32 bit of instance block pointer | 59 | INST_PTR_HI : upper 32 bits of instance block pointer |
57 | INST_TARGET (TGI) : aperture of the instance block | 60 | INST_TARGET (TGI) : aperture of the instance block |
58 | 61 | ||
59 | USERD_PTR_LO : upper 24 bits of the low 32 bits, of the 512-byte-aligned USERD pointer | 62 | USERD_PTR_LO : upper 24 bits of the low 32 bits, of the 512-byte-aligned USERD pointer |
@@ -78,14 +81,14 @@ static inline const char *target_to_text(enum INST_TARGET t) { | |||
78 | } | 81 | } |
79 | } | 82 | } |
80 | 83 | ||
81 | // Support: Volta, Ampere, Turing, Ampere | 84 | // Support: Volta, Ampere, Turing, Ampere, Hopper, Ada |
82 | struct gv100_runlist_chan { | 85 | struct gv100_runlist_chan { |
83 | // 0:63 | 86 | // 0:63 |
84 | enum ENTRY_TYPE entry_type:1; | 87 | enum ENTRY_TYPE entry_type:1; |
85 | uint32_t runqueue_selector:1; | 88 | uint32_t runqueue_selector:1; |
86 | uint32_t padding:2; | 89 | uint32_t :2; |
87 | enum INST_TARGET inst_target:2; | 90 | enum INST_TARGET inst_target:2; |
88 | uint32_t padding2:2; | 91 | uint32_t :2; |
89 | uint32_t userd_ptr_lo:24; | 92 | uint32_t userd_ptr_lo:24; |
90 | uint32_t userd_ptr_hi:32; | 93 | uint32_t userd_ptr_hi:32; |
91 | // 64:128 | 94 | // 64:128 |
@@ -95,15 +98,15 @@ struct gv100_runlist_chan { | |||
95 | } __attribute__((packed)); | 98 | } __attribute__((packed)); |
96 | 99 | ||
97 | // Support: Fermi, Kepler*, Maxwell, Pascal | 100 | // Support: Fermi, Kepler*, Maxwell, Pascal |
98 | // *In Kepler, inst fields may be unpopulated? | 101 | // *On Kepler (and older?), inst fields are unpopulated (ex. gk104) |
99 | struct gm107_runlist_chan { | 102 | struct gm107_runlist_chan { |
100 | uint32_t chid:12; | 103 | uint32_t chid:12; |
101 | uint32_t padding0:1; | 104 | uint32_t :1; |
102 | enum ENTRY_TYPE entry_type:1; | 105 | enum ENTRY_TYPE entry_type:1; |
103 | uint32_t padding1:18; | 106 | uint32_t :18; |
104 | uint32_t inst_ptr_lo:20; | 107 | uint32_t inst_ptr_lo:20; |
105 | enum INST_TARGET inst_target:2; // Totally guessing on this | 108 | enum INST_TARGET inst_target:2; // Totally guessing on this |
106 | uint32_t padding2:10; | 109 | uint32_t :10; |
107 | } __attribute__((packed)); | 110 | } __attribute__((packed)); |
108 | 111 | ||
109 | #define gk110_runlist_chan gm107_runlist_chan | 112 | #define gk110_runlist_chan gm107_runlist_chan |
@@ -131,20 +134,20 @@ struct gm107_runlist_chan { | |||
131 | than 1GHz." | 134 | than 1GHz." |
132 | */ | 135 | */ |
133 | 136 | ||
134 | // Support: Volta, Turing*, Ampere* | 137 | // Support: Volta, Turing*, Ampere*, Hopper, Ada |
135 | // *These treat bits 4:11 (8 bits) as GFID (unused) | 138 | // *These treat bits 4:11 (8 bits) as GFID (unused) |
136 | struct gv100_runlist_tsg { | 139 | struct gv100_runlist_tsg { |
137 | // 0:63 | 140 | // 0:63 |
138 | enum ENTRY_TYPE entry_type:1; | 141 | enum ENTRY_TYPE entry_type:1; |
139 | uint64_t padding:15; | 142 | uint64_t :15; |
140 | uint32_t timeslice_scale:4; | 143 | uint32_t timeslice_scale:4; |
141 | uint64_t padding2:4; | 144 | uint64_t :4; |
142 | uint32_t timeslice_timeout:8; | 145 | uint32_t timeslice_timeout:8; |
143 | uint32_t tsg_length:8; | 146 | uint32_t tsg_length:8; |
144 | uint32_t padding3:24; | 147 | uint32_t :24; |
145 | // 64:128 | 148 | // 64:128 |
146 | uint32_t tsgid:12; | 149 | uint32_t tsgid:12; |
147 | uint64_t padding4:52; | 150 | uint64_t :52; |
148 | } __attribute__((packed)); | 151 | } __attribute__((packed)); |
149 | #define MAX_TSGID (1 << 12) | 152 | #define MAX_TSGID (1 << 12) |
150 | 153 | ||
@@ -153,12 +156,12 @@ struct gv100_runlist_tsg { | |||
153 | // Last 32 bits appear to contain an undocumented inst ptr | 156 | // Last 32 bits appear to contain an undocumented inst ptr |
154 | struct gk110_runlist_tsg { | 157 | struct gk110_runlist_tsg { |
155 | uint32_t tsgid:12; | 158 | uint32_t tsgid:12; |
156 | uint32_t padding0:1; | 159 | uint32_t :1; |
157 | enum ENTRY_TYPE entry_type:1; | 160 | enum ENTRY_TYPE entry_type:1; |
158 | uint32_t timeslice_scale:4; | 161 | uint32_t timeslice_scale:4; |
159 | uint32_t timeslice_timeout:8; | 162 | uint32_t timeslice_timeout:8; |
160 | uint32_t tsg_length:6; | 163 | uint32_t tsg_length:6; |
161 | uint32_t padding1:32; | 164 | uint32_t :32; |
162 | } __attribute__((packed)); | 165 | } __attribute__((packed)); |
163 | 166 | ||
164 | 167 | ||
@@ -175,11 +178,11 @@ enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1}; | |||
175 | typedef union { | 178 | typedef union { |
176 | struct { | 179 | struct { |
177 | uint32_t id:12; | 180 | uint32_t id:12; |
178 | uint32_t padding:8; | 181 | uint32_t :8; |
179 | bool is_pending:1; | 182 | bool is_pending:1; |
180 | uint32_t padding2:3; | 183 | uint32_t :3; |
181 | enum PREEMPT_TYPE type:2; | 184 | enum PREEMPT_TYPE type:2; |
182 | uint32_t padding3:6; | 185 | uint32_t :6; |
183 | } __attribute__((packed)); | 186 | } __attribute__((packed)); |
184 | uint32_t raw; | 187 | uint32_t raw; |
185 | } pfifo_preempt_t; | 188 | } pfifo_preempt_t; |
@@ -220,7 +223,7 @@ typedef union { | |||
220 | bool runlist_11:1; | 223 | bool runlist_11:1; |
221 | bool runlist_12:1; | 224 | bool runlist_12:1; |
222 | bool runlist_13:1; | 225 | bool runlist_13:1; |
223 | uint32_t padding:18; | 226 | uint32_t :18; |
224 | } __attribute__((packed)); | 227 | } __attribute__((packed)); |
225 | uint32_t raw; | 228 | uint32_t raw; |
226 | } runlist_preempt_t; | 229 | } runlist_preempt_t; |
@@ -313,7 +316,8 @@ typedef union { | |||
313 | of this, the layout was modified to allow for larger runlist pointers (upper | 316 | of this, the layout was modified to allow for larger runlist pointers (upper |
314 | 52 of 64 bits). | 317 | 52 of 64 bits). |
315 | 318 | ||
316 | Support: Turing, Ampere, Lovelace?, Hopper? | 319 | Support: Turing, Ampere*, Hopper*, Ada* |
320 | *Only the register layout | ||
317 | */ | 321 | */ |
318 | // Support: Turing | 322 | // Support: Turing |
319 | #define NV_PFIFO_RUNLIST_BASE_TU102(i) (0x00002B00+(i)*16) // Read/write | 323 | #define NV_PFIFO_RUNLIST_BASE_TU102(i) (0x00002B00+(i)*16) // Read/write |
@@ -324,9 +328,9 @@ typedef union { | |||
324 | typedef union { | 328 | typedef union { |
325 | struct { | 329 | struct { |
326 | enum INST_TARGET target:2; | 330 | enum INST_TARGET target:2; |
327 | uint32_t padding:10; | 331 | uint32_t :10; |
328 | uint64_t ptr:28; | 332 | uint64_t ptr:28; |
329 | uint32_t padding2:24; | 333 | uint32_t :24; |
330 | } __attribute__((packed)); | 334 | } __attribute__((packed)); |
331 | uint64_t raw; | 335 | uint64_t raw; |
332 | } runlist_base_tu102_t; | 336 | } runlist_base_tu102_t; |
@@ -453,7 +457,7 @@ typedef union { | |||
453 | bool runlist_8:1; | 457 | bool runlist_8:1; |
454 | bool runlist_9:1; | 458 | bool runlist_9:1; |
455 | bool runlist_10:1; | 459 | bool runlist_10:1; |
456 | uint32_t padding:21; | 460 | uint32_t :21; |
457 | } __attribute__((packed)); | 461 | } __attribute__((packed)); |
458 | uint32_t raw; | 462 | uint32_t raw; |
459 | } runlist_disable_t; | 463 | } runlist_disable_t; |
@@ -546,17 +550,17 @@ typedef union { | |||
546 | uint32_t minor_revision:4; | 550 | uint32_t minor_revision:4; |
547 | uint32_t major_revision:4; | 551 | uint32_t major_revision:4; |
548 | uint32_t reserved:4; | 552 | uint32_t reserved:4; |
549 | uint32_t padding0:8; | 553 | uint32_t :8; |
550 | uint32_t implementation:4; | 554 | uint32_t implementation:4; |
551 | uint32_t architecture:5; | 555 | uint32_t architecture:5; |
552 | uint32_t padding1:3; | 556 | uint32_t :3; |
553 | } __attribute__((packed)); | 557 | } __attribute__((packed)); |
554 | uint32_t raw; | 558 | uint32_t raw; |
555 | // Arch << 4 + impl is also often used | 559 | // Arch << 4 + impl is also often used |
556 | struct { | 560 | struct { |
557 | uint32_t padding2:20; | 561 | uint32_t :20; |
558 | uint32_t chip_id:9; | 562 | uint32_t chip_id:9; |
559 | uint32_t padding3:3; | 563 | uint32_t :3; |
560 | } __attribute__((packed)); | 564 | } __attribute__((packed)); |
561 | } mc_boot_0_t; | 565 | } mc_boot_0_t; |
562 | 566 | ||
@@ -679,35 +683,35 @@ typedef union { | |||
679 | enum DEVICE_INFO_TYPE info_type:2; | 683 | enum DEVICE_INFO_TYPE info_type:2; |
680 | bool fault_id_is_valid:1; | 684 | bool fault_id_is_valid:1; |
681 | uint32_t fault_id:7; | 685 | uint32_t fault_id:7; |
682 | uint32_t padding0:2; | 686 | uint32_t :2; |
683 | uint32_t pri_base:12; | 687 | uint32_t pri_base:12; |
684 | uint32_t padding1:2; | 688 | uint32_t :2; |
685 | uint32_t inst_id:4; | 689 | uint32_t inst_id:4; |
686 | uint32_t is_not_enum2:1; | 690 | uint32_t is_not_enum2:1; |
687 | bool has_next_entry:1; | 691 | bool has_next_entry:1; |
688 | } __attribute__((packed)); | 692 | } __attribute__((packed)); |
689 | // ENUM type fields | 693 | // ENUM type fields |
690 | struct { | 694 | struct { |
691 | uint32_t padding2:2; | 695 | uint32_t :2; |
692 | bool reset_is_valid:1; | 696 | bool reset_is_valid:1; |
693 | bool intr_is_valid:1; | 697 | bool intr_is_valid:1; |
694 | bool runlist_is_valid:1; | 698 | bool runlist_is_valid:1; |
695 | bool engine_is_valid:1; | 699 | bool engine_is_valid:1; |
696 | uint32_t padding3:3; | 700 | uint32_t :3; |
697 | uint32_t reset_enum:5; | 701 | uint32_t reset_enum:5; |
698 | uint32_t padding4:1; | 702 | uint32_t :1; |
699 | uint32_t intr_enum:5; | 703 | uint32_t intr_enum:5; |
700 | uint32_t padding5:1; | 704 | uint32_t :1; |
701 | uint32_t runlist_enum:4; | 705 | uint32_t runlist_enum:4; |
702 | uint32_t padding6:1; | 706 | uint32_t :1; |
703 | uint32_t engine_enum:4; | 707 | uint32_t engine_enum:4; |
704 | uint32_t padding7:2; | 708 | uint32_t :2; |
705 | } __attribute__((packed)); | 709 | } __attribute__((packed)); |
706 | // ENGINE_TYPE type fields | 710 | // ENGINE_TYPE type fields |
707 | struct { | 711 | struct { |
708 | uint32_t padding8:2; | 712 | uint32_t :2; |
709 | enum ENGINE_TYPES engine_type:29; | 713 | enum ENGINE_TYPES engine_type:29; |
710 | uint32_t padding9:1; | 714 | uint32_t :1; |
711 | } __attribute__((packed)); | 715 | } __attribute__((packed)); |
712 | uint32_t raw; | 716 | uint32_t raw; |
713 | } ptop_device_info_gk104_t; | 717 | } ptop_device_info_gk104_t; |
@@ -744,17 +748,19 @@ typedef union { | |||
744 | runlist of this engine. | 748 | runlist of this engine. |
745 | RLENG_ID : What is the per-runlist ID of this engine? | 749 | RLENG_ID : What is the per-runlist ID of this engine? |
746 | 750 | ||
747 | Support: Ampere, Ada, Hopper, (and newer likely) | 751 | Support: Ampere, Hopper, Ada (and newer likely) |
748 | See also: hw_top_ga100.h in nvgpu (NVIDIA's open-source Jetson GPU driver) | 752 | See also: hw_top_ga100.h in nvgpu (NVIDIA's open-source Jetson GPU driver) |
749 | */ | 753 | */ |
750 | #define NV_PTOP_DEVICE_INFO_GA100(i) (0x00022800+(i)*4) | 754 | #define NV_PTOP_DEVICE_INFO_GA100(i) (0x00022800+(i)*4) |
751 | #define NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g) (nvdebug_readl(g, 0x0224fc) >> 20) | 755 | #define NV_PTOP_DEVICE_INFO_LEN_GA100 0x000224fc |
756 | #define NV_PTOP_DEVICE_INFO_LEN_SHIFT_GA100 20 | ||
757 | #define NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g) (nvdebug_readl(g, NV_PTOP_DEVICE_INFO_LEN_GA100) >> NV_PTOP_DEVICE_INFO_LEN_SHIFT_GA100) | ||
752 | 758 | ||
753 | typedef union { | 759 | typedef union { |
754 | // _info type fields | 760 | // _info type fields |
755 | struct { | 761 | struct { |
756 | uint32_t fault_id:11; | 762 | uint32_t fault_id:11; |
757 | uint32_t padding0:5; | 763 | uint32_t :5; |
758 | uint32_t inst_id:8; | 764 | uint32_t inst_id:8; |
759 | enum ENGINE_TYPES engine_type:7; // "type_enum" | 765 | enum ENGINE_TYPES engine_type:7; // "type_enum" |
760 | bool has_next_entry:1; | 766 | bool has_next_entry:1; |
@@ -763,15 +769,15 @@ typedef union { | |||
763 | struct { | 769 | struct { |
764 | uint32_t reset_id:8; | 770 | uint32_t reset_id:8; |
765 | uint32_t pri_base:18; // "device_pri_base" | 771 | uint32_t pri_base:18; // "device_pri_base" |
766 | uint32_t padding1:4; | 772 | uint32_t :4; |
767 | uint32_t is_engine:1; | 773 | uint32_t is_engine:1; |
768 | uint32_t padding2:1; | 774 | uint32_t :1; |
769 | } __attribute__((packed)); | 775 | } __attribute__((packed)); |
770 | struct { | 776 | struct { |
771 | uint32_t rleng_id:2; | 777 | uint32_t rleng_id:2; |
772 | uint32_t padding3:8; | 778 | uint32_t :8; |
773 | uint32_t runlist_pri_base:16; | 779 | uint32_t runlist_pri_base:16; |
774 | uint32_t padding4:6; | 780 | uint32_t :6; |
775 | } __attribute__((packed)); | 781 | } __attribute__((packed)); |
776 | uint32_t raw; | 782 | uint32_t raw; |
777 | } ptop_device_info_ga100_t; | 783 | } ptop_device_info_ga100_t; |
@@ -970,24 +976,31 @@ typedef union { | |||
970 | #define NV_PRAMIN 0x00700000 // Goes until 0x00800000 (1MB window) | 976 | #define NV_PRAMIN 0x00700000 // Goes until 0x00800000 (1MB window) |
971 | #define NV_PRAMIN_LEN 0x00100000 | 977 | #define NV_PRAMIN_LEN 0x00100000 |
972 | 978 | ||
973 | // Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere | 979 | /* Page Directory Base (PDB) configuration for an instance block |
980 | |||
981 | Note: "Volta only supports [the] new page table format [V2] and [a] 64KB big | ||
982 | page size" (kern_gmmu_gv100.c in open-gpu-kernel-modules). | ||
983 | Support: Tesla 2.0* through Ampere, Ada | ||
984 | *FAULT_REPLAY_* fields are Pascal+ only | ||
985 | See also: dev_ram.h (open-gpu-kernel-modules) or dev_ram.ref.txt (open-gpu-doc) | ||
986 | */ | ||
974 | #define NV_PRAMIN_PDB_CONFIG_OFF 0x200 | 987 | #define NV_PRAMIN_PDB_CONFIG_OFF 0x200 |
975 | typedef union { | 988 | typedef union { |
976 | struct { | 989 | struct { |
977 | uint32_t target:2; | 990 | enum INST_TARGET target:2; |
978 | uint32_t is_volatile:1; | 991 | uint32_t is_volatile:1; |
979 | uint32_t padding0:1; | 992 | uint32_t :1; |
980 | uint32_t fault_replay_tex:1; | 993 | bool fault_replay_tex:1; |
981 | uint32_t fault_replay_gcc:1; | 994 | bool fault_replay_gcc:1; |
982 | uint32_t padding1:4; | 995 | uint32_t :4; |
983 | bool is_ver2:1; | 996 | bool is_ver2:1; |
984 | bool is_64k_big_page:1; // 128Kb otherwise | 997 | bool is_64k_big_page:1; // 128Kb otherwise |
985 | uint32_t page_dir_lo:20; | 998 | uint32_t page_dir_lo:20; |
986 | uint32_t page_dir_hi:32; | 999 | uint32_t page_dir_hi:32; |
987 | } __attribute__((packed)); | 1000 | } __attribute__((packed)); |
988 | struct { | 1001 | struct { |
989 | uint32_t pad:12; | 1002 | uint32_t :12; |
990 | uint64_t page_dir:52; // Confirmed working on Xavier and tama | 1003 | uint64_t page_dir:52; |
991 | } __attribute__((packed)); | 1004 | } __attribute__((packed)); |
992 | uint64_t raw; | 1005 | uint64_t raw; |
993 | } page_dir_config_t; | 1006 | } page_dir_config_t; |
@@ -1025,7 +1038,7 @@ typedef union { | |||
1025 | only one is strictly needed to reconstruct all the others. However, due to | 1038 | only one is strictly needed to reconstruct all the others. However, due to |
1026 | the complexity of page tables, we include all of these to aid in readability. | 1039 | the complexity of page tables, we include all of these to aid in readability. |
1027 | 1040 | ||
1028 | Support: Pascal, Volta, Turing, Ampere, Ada, Ampere, Hopper*, Blackwell* | 1041 | Support: Pascal, Volta, Turing, Ampere, Hopper*, Ada, Blackwell* |
1029 | Note: *Hopper introduces Version 3 Page Tables, but is backwards-compatible. | 1042 | Note: *Hopper introduces Version 3 Page Tables, but is backwards-compatible. |
1030 | The newer version adds a PD4 level to support 57-bit virtual | 1043 | The newer version adds a PD4 level to support 57-bit virtual |
1031 | addresses, and slightly shifts the PDE and PTE fields. | 1044 | addresses, and slightly shifts the PDE and PTE fields. |
@@ -1077,17 +1090,46 @@ static inline const char *pd_target_to_text(enum PD_TARGET t) { | |||
1077 | } | 1090 | } |
1078 | } | 1091 | } |
1079 | 1092 | ||
1080 | // Page Directory Entry/Page Table Entry V2 type | 1093 | /* Page Directory Entry/Page Table Entry V2 type |
1081 | // Note: As the meaning of target (bits 2:1) at a PDE-level changes if the | 1094 | We consider the least-significant bit to be 0, and use interval notation. |
1082 | // entry is a large-page PTE or not. To simply the logic, we combine them | 1095 | Example: The first 8 bits of an address could be identically described as |
1083 | // into a single target field to simplify comparisons. | 1096 | (8, 0], [7, 0], [7, -1), or (8, -1). |
1097 | |||
1098 | ADDR : Bits [35, 12] of the physical address; bits [11, 0] are 0. This is the | ||
1099 | full 36-bit address for VID_MEM or PEER targets. For SYS_MEM targets, | ||
1100 | use ADDR_W to include bits [57, 36] (per gp100-mmu-format.pdf, Pascal | ||
1101 | only uses bits [46, 0]---a 2^47 = 128 TiB physical address space). | ||
1102 | Points to first entry of the next level of the page table (in a PDE), | ||
1103 | or the start of the physical frame (in a PTE). | ||
1104 | ADDR_W : Bits [57, 12] of a SYS_MEM address. Only necessary for physical | ||
1105 | addresses over 128 TiB. See ADDR. | ||
1106 | IS_VOL : If set, the pointed-to frame should not be cached in the GPU L2 cache. | ||
1107 | This applies to PDEs (then the pointed-to page table/directory frame | ||
1108 | will not be cached), and to PTEs (then the pointed to data frame will | ||
1109 | not be cached). This **does not apply to VID_MEM**, except on Tegra. | ||
1110 | NO_ATS : "GPUs which support ATS [Volta+] perform a parallel lookup on both | ||
1111 | ATS and GMMU page tables. The ATS lookup can be disabled by setting a | ||
1112 | bit in the GMMU page tables. All GPUs which support ATS use the same | ||
1113 | mechanism (a bit in PDE1), and have the same PDE1 coverage (512MB)." | ||
1114 | (nvidia-uvm/uvm_mmu.h) | ||
1115 | Other parts of the nvidia-uvm documentation note that disabling the | ||
1116 | ATS lookup helps performance. | ||
1117 | |||
1118 | Note: As the meaning of target (bits 2:1) at a PDE-level changes if the | ||
1119 | entry is a large-page PTE or not. To simply the logic, we combine them | ||
1120 | into a single target field to simplify comparisons. | ||
1121 | |||
1122 | See also: gp100-mmu-format.pdf in open-gpu-doc. | ||
1123 | */ | ||
1084 | #define TARGET_PEER 1 | 1124 | #define TARGET_PEER 1 |
1085 | typedef union { | 1125 | typedef union { |
1086 | // Page Directory Entry (PDE) | 1126 | // Page Directory Entry (PDE) |
1087 | struct { | 1127 | struct { |
1088 | enum PD_TARGET target:3; | 1128 | enum PD_TARGET target:3; |
1089 | bool is_volatile:1; | 1129 | bool is_volatile:1; |
1090 | uint32_t padding1:4; | 1130 | uint32_t :1; |
1131 | bool no_ats:1; // Set to disable PCIe (?) Address Translation Services | ||
1132 | uint32_t :2; | ||
1091 | uint32_t addr:24; | 1133 | uint32_t addr:24; |
1092 | uint32_t __unused1; | 1134 | uint32_t __unused1; |
1093 | } __attribute__((packed)); | 1135 | } __attribute__((packed)); |
@@ -1179,21 +1221,21 @@ typedef union { | |||
1179 | struct { | 1221 | struct { |
1180 | // 0:32 | 1222 | // 0:32 |
1181 | enum V1_PD_TARGET target:2; | 1223 | enum V1_PD_TARGET target:2; |
1182 | uint32_t padding0:2; // Documented as "PDE_SIZE"? | 1224 | uint32_t :2; // Documented as "PDE_SIZE"? |
1183 | uint64_t addr:28; // May be wider? | 1225 | uint64_t addr:28; // May be wider? |
1184 | // 32:63 | 1226 | // 32:63 |
1185 | uint32_t padding2:3; | 1227 | uint32_t :3; |
1186 | uint32_t is_volatile:1; // Might have counted wrong? | 1228 | uint32_t is_volatile:1; // Might have counted wrong? |
1187 | uint32_t padding3:28; | 1229 | uint32_t :28; |
1188 | } __attribute__((packed)); | 1230 | } __attribute__((packed)); |
1189 | // Small page fields | 1231 | // Small page fields |
1190 | struct { | 1232 | struct { |
1191 | // 0:32 | 1233 | // 0:32 |
1192 | uint32_t padding00:32; | 1234 | uint32_t :32; |
1193 | // 32:63 | 1235 | // 32:63 |
1194 | enum V1_PD_TARGET alt_target:2; | 1236 | enum V1_PD_TARGET alt_target:2; |
1195 | uint32_t alt_is_volatile:1; // Might have counted wrong? | 1237 | uint32_t alt_is_volatile:1; // Might have counted wrong? |
1196 | uint32_t padding03:1; | 1238 | uint32_t :1; |
1197 | uint64_t alt_addr:28; | 1239 | uint64_t alt_addr:28; |
1198 | } __attribute__((packed)); | 1240 | } __attribute__((packed)); |
1199 | uint64_t raw; | 1241 | uint64_t raw; |
@@ -1240,11 +1282,11 @@ typedef union { | |||
1240 | struct { | 1282 | struct { |
1241 | enum V0_PDE_TYPE type:2; | 1283 | enum V0_PDE_TYPE type:2; |
1242 | enum INST_TARGET target:2; | 1284 | enum INST_TARGET target:2; |
1243 | uint32_t padding0:1; | 1285 | uint32_t :1; |
1244 | enum V0_PDE_SIZE sublevel_size:2; | 1286 | enum V0_PDE_SIZE sublevel_size:2; |
1245 | uint32_t padding1:5; | 1287 | uint32_t :5; |
1246 | uint32_t addr:28; | 1288 | uint32_t addr:28; // Bits [12, 39] of the 40-bit page table address |
1247 | uint32_t padding2:24; | 1289 | uint32_t :24; |
1248 | } __attribute__((packed)); | 1290 | } __attribute__((packed)); |
1249 | uint64_t raw; | 1291 | uint64_t raw; |
1250 | } page_dir_entry_v0_t; | 1292 | } page_dir_entry_v0_t; |
@@ -1253,19 +1295,19 @@ typedef union { | |||
1253 | typedef union { | 1295 | typedef union { |
1254 | struct { | 1296 | struct { |
1255 | bool is_present:1; | 1297 | bool is_present:1; |
1256 | uint32_t padding3:2; | 1298 | uint32_t :2; |
1257 | bool is_readonly:1; | 1299 | bool is_readonly:1; |
1258 | enum INST_TARGET target:2; | 1300 | enum INST_TARGET target:2; |
1259 | bool is_privileged:1; | 1301 | bool is_privileged:1; |
1260 | uint32_t contig_blk_sz:3; | 1302 | uint32_t contig_blk_sz:3; |
1261 | uint32_t padding4:2; | 1303 | uint32_t :2; |
1262 | uint32_t addr:28; | 1304 | uint32_t addr:28; // Bits [12, 39] of the 40-bit frame address |
1263 | uint32_t storage_type:7; // ??? | 1305 | uint32_t storage_type:7; // ??? |
1264 | uint32_t compression_mode:2; // ??? | 1306 | uint32_t compression_mode:2; // ??? |
1265 | uint32_t compression_tag:12; // ??? | 1307 | uint32_t compression_tag:12; // ??? |
1266 | bool is_long_partition_cycle:1; // ??? | 1308 | bool is_long_partition_cycle:1; // ??? |
1267 | bool is_encrypted:1; | 1309 | bool is_encrypted:1; |
1268 | uint32_t padding5:1; | 1310 | uint32_t :1; |
1269 | } __attribute__((packed)); | 1311 | } __attribute__((packed)); |
1270 | uint64_t raw; | 1312 | uint64_t raw; |
1271 | } page_tbl_entry_v0_t; | 1313 | } page_tbl_entry_v0_t; |
@@ -1316,27 +1358,14 @@ struct nvdebug_state { | |||
1316 | void __iomem *bar3; | 1358 | void __iomem *bar3; |
1317 | }; | 1359 | }; |
1318 | int chip_id; | 1360 | int chip_id; |
1319 | // Additional state from the built-in driver. Only set iff | 1361 | // Additional state from the built-in driver. Only set on Jetson boards |
1320 | // chip_id == NV_CHIP_ID_GV11B | ||
1321 | struct gk20a *g; | 1362 | struct gk20a *g; |
1322 | // Pointer to PCI device needed for pci_iounmap | 1363 | // Pointer to PCI device needed for pci_iounmap and pci_resource_start |
1323 | struct pci_dev *pcid; | 1364 | struct pci_dev *pcid; |
1324 | // Pointer to generic device struct (both platform and pcie devices) | 1365 | // Pointer to generic device struct (both platform and pcie devices) |
1325 | struct device *dev; | 1366 | struct device *dev; |
1326 | }; | 1367 | }; |
1327 | 1368 | ||
1328 | /*const struct runlist_funcs { | ||
1329 | u8 size; | ||
1330 | enum ENTRY_TYPE (*entry_type)(struct nvdebug_state *, void *); | ||
1331 | uint32_t (*chid)(struct nvdebug_state *, void *); | ||
1332 | uint32_t (*inst_ptr_lo)(struct nvdebug_state *, void *); | ||
1333 | enum INST_TARGET (*inst_target)(struct nvdebug_state *, void *): | ||
1334 | uint32_t (*tsgid)(struct nvdebug_state *, void *); | ||
1335 | uint32_t (*timeslice_scale)(struct nvdebug_state *, void *); | ||
1336 | uint32_t (*timeslice_timeout)(struct nvdebug_state *, void *); | ||
1337 | uint32_t (*tsg_length)(struct nvdebug_state *, void *); | ||
1338 | };*/ | ||
1339 | |||
1340 | // This disgusting macro is a crutch to work around the fact that runlists were | 1369 | // This disgusting macro is a crutch to work around the fact that runlists were |
1341 | // different prior to Volta. | 1370 | // different prior to Volta. |
1342 | #define VERSIONED_RL_ACCESSOR(_ENTRY_TYPE, type, prop) \ | 1371 | #define VERSIONED_RL_ACCESSOR(_ENTRY_TYPE, type, prop) \ |
@@ -1366,6 +1395,7 @@ VERSIONED_RL_ACCESSOR(tsg, uint32_t, tsg_length); | |||
1366 | #define NV_RL_ENTRY_SIZE(g) \ | 1395 | #define NV_RL_ENTRY_SIZE(g) \ |
1367 | ((g)->chip_id >= NV_CHIP_ID_VOLTA ? sizeof(struct gv100_runlist_tsg) : sizeof(struct gk110_runlist_tsg)) | 1396 | ((g)->chip_id >= NV_CHIP_ID_VOLTA ? sizeof(struct gv100_runlist_tsg) : sizeof(struct gk110_runlist_tsg)) |
1368 | 1397 | ||
1398 | // chan and tsg should be pointers | ||
1369 | #define for_chan_in_tsg(g, chan, tsg) \ | 1399 | #define for_chan_in_tsg(g, chan, tsg) \ |
1370 | for (chan = (typeof(chan))(((u8*)tsg) + NV_RL_ENTRY_SIZE(g)); \ | 1400 | for (chan = (typeof(chan))(((u8*)tsg) + NV_RL_ENTRY_SIZE(g)); \ |
1371 | (u8*)chan < ((u8*)tsg) + (1 + tsg_length(g, tsg)) * NV_RL_ENTRY_SIZE(g); \ | 1401 | (u8*)chan < ((u8*)tsg) + (1 + tsg_length(g, tsg)) * NV_RL_ENTRY_SIZE(g); \ |