diff options
Diffstat (limited to 'nvdebug.h')
-rw-r--r-- | nvdebug.h | 137 |
1 files changed, 95 insertions, 42 deletions
@@ -64,7 +64,7 @@ struct gk20a; | |||
64 | add a USERD pointer, a longer INST pointer, and a runqueue selector flag. | 64 | add a USERD pointer, a longer INST pointer, and a runqueue selector flag. |
65 | */ | 65 | */ |
66 | enum ENTRY_TYPE {ENTRY_TYPE_CHAN = 0, ENTRY_TYPE_TSG = 1}; | 66 | enum ENTRY_TYPE {ENTRY_TYPE_CHAN = 0, ENTRY_TYPE_TSG = 1}; |
67 | enum INST_TARGET {TARGET_VID_MEM = 0, TARGET_SYS_MEM_COHERENT = 2, TARGET_SYS_MEM_NONCOHERENT = 3}; | 67 | enum INST_TARGET {TARGET_VID_MEM = 0, TARGET_INVALID = 1, TARGET_SYS_MEM_COHERENT = 2, TARGET_SYS_MEM_NONCOHERENT = 3}; |
68 | static inline const char *target_to_text(enum INST_TARGET t) { | 68 | static inline const char *target_to_text(enum INST_TARGET t) { |
69 | switch (t) { | 69 | switch (t) { |
70 | case TARGET_VID_MEM: | 70 | case TARGET_VID_MEM: |
@@ -78,7 +78,7 @@ static inline const char *target_to_text(enum INST_TARGET t) { | |||
78 | } | 78 | } |
79 | } | 79 | } |
80 | 80 | ||
81 | // Support: Volta, Ampere, Turing | 81 | // Support: Volta, Ampere, Turing, Ampere |
82 | struct gv100_runlist_chan { | 82 | struct gv100_runlist_chan { |
83 | // 0:63 | 83 | // 0:63 |
84 | enum ENTRY_TYPE entry_type:1; | 84 | enum ENTRY_TYPE entry_type:1; |
@@ -308,7 +308,7 @@ typedef union { | |||
308 | } eng_runlist_gf100_t; | 308 | } eng_runlist_gf100_t; |
309 | 309 | ||
310 | /* | 310 | /* |
311 | Starting with Turing, the seperate registers for reading and writing runlist | 311 | Starting with Turing, the separate registers for reading and writing runlist |
312 | configuration were dropped in favor of read/write indexed registers. As part | 312 | configuration were dropped in favor of read/write indexed registers. As part |
313 | of this, the layout was modified to allow for larger runlist pointers (upper | 313 | of this, the layout was modified to allow for larger runlist pointers (upper |
314 | 52 of 64 bits). | 314 | 52 of 64 bits). |
@@ -362,7 +362,6 @@ enum CHANNEL_STATUS { | |||
362 | }; | 362 | }; |
363 | 363 | ||
364 | /* Programmable Channel Control System RAM (PCCSR) | 364 | /* Programmable Channel Control System RAM (PCCSR) |
365 | |||
366 | 512-entry array of channel control and status data structures. | 365 | 512-entry array of channel control and status data structures. |
367 | 366 | ||
368 | === Read/Write Fields === | 367 | === Read/Write Fields === |
@@ -391,6 +390,7 @@ enum CHANNEL_STATUS { | |||
391 | *Field only available on Turing. | 390 | *Field only available on Turing. |
392 | 391 | ||
393 | Support: Fermi, Maxwell, Pascal, Volta, Turing | 392 | Support: Fermi, Maxwell, Pascal, Volta, Turing |
393 | See also: manuals/turing/tu104/dev_fifo.ref.txt in NVIDIA's open-gpu-doc | ||
394 | */ | 394 | */ |
395 | #define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8) | 395 | #define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8) |
396 | #define MAX_CHID 512 | 396 | #define MAX_CHID 512 |
@@ -611,12 +611,10 @@ typedef union { | |||
611 | ENGINE_TYPE : What type of engine is this? (see ENGINE_TYPES_NAMES) | 611 | ENGINE_TYPE : What type of engine is this? (see ENGINE_TYPES_NAMES) |
612 | 612 | ||
613 | Support: Kepler, Maxwell, Pascal, Volta, Turing, Ampere | 613 | Support: Kepler, Maxwell, Pascal, Volta, Turing, Ampere |
614 | See dev_top.ref.txt of NVIDIA's open-gpu-doc for more info. | 614 | See also: manuals/volta/gv100/dev_top.ref.txt in open-gpu-doc. |
615 | */ | 615 | */ |
616 | 616 | ||
617 | #define NV_PTOP_DEVICE_INFO_GA100(i) (0x00022800+(i)*4) | ||
618 | #define NV_PTOP_DEVICE_INFO_GK104(i) (0x00022700+(i)*4) | 617 | #define NV_PTOP_DEVICE_INFO_GK104(i) (0x00022700+(i)*4) |
619 | #define NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g) (nvdebug_readl(g, 0x0224fc) >> 20) | ||
620 | #define NV_PTOP_DEVICE_INFO__SIZE_1_GK104 64 | 618 | #define NV_PTOP_DEVICE_INFO__SIZE_1_GK104 64 |
621 | enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3}; | 619 | enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3}; |
622 | enum ENGINE_TYPES { | 620 | enum ENGINE_TYPES { |
@@ -670,34 +668,6 @@ static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { | |||
670 | "FLA: Fabric Logical Addressing", | 668 | "FLA: Fabric Logical Addressing", |
671 | }; | 669 | }; |
672 | 670 | ||
673 | // These field are from nvgpu/include/nvgpu/hw/ga100/hw_top_ga100.h | ||
674 | typedef union { | ||
675 | // _info type fields | ||
676 | struct { | ||
677 | uint32_t fault_id:11; | ||
678 | uint32_t padding0:5; | ||
679 | uint32_t inst_id:8; | ||
680 | enum ENGINE_TYPES engine_type:7; // "type_enum" | ||
681 | bool has_next_entry:1; | ||
682 | } __attribute__((packed)); | ||
683 | // _info2 type fields | ||
684 | struct { | ||
685 | uint32_t reset_id:8; | ||
686 | uint32_t pri_base:18; // "device_pri_base" | ||
687 | uint32_t padding1:4; | ||
688 | uint32_t is_engine:1; | ||
689 | uint32_t padding2:1; | ||
690 | } __attribute__((packed)); | ||
691 | struct { | ||
692 | uint32_t rleng_id:2; | ||
693 | uint32_t padding3:8; | ||
694 | uint32_t runlist_pri_base:16; | ||
695 | uint32_t padding4:6; | ||
696 | } __attribute__((packed)); | ||
697 | uint32_t raw; | ||
698 | } ptop_device_info_ga100_t; | ||
699 | |||
700 | // These field are from open-gpu-doc/manuals/volta/gv100/dev_top.ref.txt | ||
701 | typedef union { | 671 | typedef union { |
702 | // DATA type fields | 672 | // DATA type fields |
703 | struct { | 673 | struct { |
@@ -737,6 +707,70 @@ typedef union { | |||
737 | uint32_t raw; | 707 | uint32_t raw; |
738 | } ptop_device_info_gk104_t; | 708 | } ptop_device_info_gk104_t; |
739 | 709 | ||
710 | /* GPU TOPology on Ampere and newer GPUs | ||
711 | On Ampere+, the array of device topology entries continues to describe all GPU | ||
712 | engines, but the layout is entirely different to principly accomodate a | ||
713 | pointer to the runlist configuration region for each engine. (Runlist | ||
714 | configuration was moved out of the Host (PFIFO) region into per-engine spaces | ||
715 | starting with Ampere.) | ||
716 | |||
717 | Parsing is somewhat more difficult than with the older version, as entries | ||
718 | no longer include an `info_type`. Instead, each entry has 1--3 subrows, where | ||
719 | `has_next_entry` is 0 for the last subrow. | ||
720 | |||
721 | Empty rows should be skipped. | ||
722 | |||
723 | HAS_NEXT_ENTRY : Is the following entry a descriptor of the same engine? | ||
724 | |||
725 | == Subrow 1 fields == | ||
726 | FAULT_ID : [UNKNOWN] | ||
727 | INST_ID : [UNKNOWN] | ||
728 | ENGINE_TYPE : Enumerated name of the type of engine. (Seemingly identical | ||
729 | to ENGINE_ENUM in old PTOP layout.) | ||
730 | |||
731 | == Subrow 2 fields == | ||
732 | RESET_ID : [UNKNOWN] | ||
733 | PRI_BASE : [UNKNOWN] | ||
734 | IS_ENGINE : Does this entry describe an engine with a runlist? (Seemingly | ||
735 | identical to RUNLIST_IS_VALID in old PTOP layout.) | ||
736 | |||
737 | == Subrow 3 fields == | ||
738 | RUNLIST_PRI_BASE : Offset in BAR0 of the RunList RAM (RLRAM) region for the | ||
739 | runlist of this engine. | ||
740 | RLENG_ID : What is the per-runlist ID of this engine? | ||
741 | |||
742 | Support: Ampere, Ada, Hopper, (and newer likely) | ||
743 | See also: hw_top_ga100.h in nvgpu (NVIDIA's open-source Jetson GPU driver) | ||
744 | */ | ||
745 | #define NV_PTOP_DEVICE_INFO_GA100(i) (0x00022800+(i)*4) | ||
746 | #define NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g) (nvdebug_readl(g, 0x0224fc) >> 20) | ||
747 | |||
748 | typedef union { | ||
749 | // _info type fields | ||
750 | struct { | ||
751 | uint32_t fault_id:11; | ||
752 | uint32_t padding0:5; | ||
753 | uint32_t inst_id:8; | ||
754 | enum ENGINE_TYPES engine_type:7; // "type_enum" | ||
755 | bool has_next_entry:1; | ||
756 | } __attribute__((packed)); | ||
757 | // _info2 type fields | ||
758 | struct { | ||
759 | uint32_t reset_id:8; | ||
760 | uint32_t pri_base:18; // "device_pri_base" | ||
761 | uint32_t padding1:4; | ||
762 | uint32_t is_engine:1; | ||
763 | uint32_t padding2:1; | ||
764 | } __attribute__((packed)); | ||
765 | struct { | ||
766 | uint32_t rleng_id:2; | ||
767 | uint32_t padding3:8; | ||
768 | uint32_t runlist_pri_base:16; | ||
769 | uint32_t padding4:6; | ||
770 | } __attribute__((packed)); | ||
771 | uint32_t raw; | ||
772 | } ptop_device_info_ga100_t; | ||
773 | |||
740 | /* Graphics Processing Cluster (GPC) on-chip information | 774 | /* Graphics Processing Cluster (GPC) on-chip information |
741 | The GPU's Compute/Graphics engine is subdivided into Graphics Processing | 775 | The GPU's Compute/Graphics engine is subdivided into Graphics Processing |
742 | Clusters (also known as GPU Processing Clusters, starting with Ampere). | 776 | Clusters (also known as GPU Processing Clusters, starting with Ampere). |
@@ -792,21 +826,35 @@ typedef union { | |||
792 | SCAL_NUM_CES : Number of externally accessible copy engines | 826 | SCAL_NUM_CES : Number of externally accessible copy engines |
793 | 827 | ||
794 | Errata: Incorrectly reports "3" on Jetson TX1 and TX2. Should report "1" to be | 828 | Errata: Incorrectly reports "3" on Jetson TX1 and TX2. Should report "1" to be |
795 | consistent with PTOP data. | 829 | consistent with PTOP data. |
796 | 830 | ||
797 | Support: Kepler through (at least) Blackwell | 831 | Support: Kepler through (at least) Blackwell |
798 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. | 832 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. |
799 | */ | 833 | */ |
800 | #define NV_PTOP_SCAL_NUM_CES 0x00022444 | 834 | #define NV_PTOP_SCAL_NUM_CES 0x00022444 |
835 | // Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu) | ||
836 | #define NV_LCE_FOR_PCE_GP100 0x0010402c | ||
837 | #define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4) | ||
838 | #define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4) | ||
839 | /* GRaphics Copy Engine (GRCE) Information | ||
840 | "There's two types of CE... ASYNC_CEs which are copy engines with their own | ||
841 | runlists and GRCEs which are CEs that share a runlist with GR." (nvgpu, | ||
842 | ioctl_ctrl.c) | ||
843 | |||
844 | Starting with Pascal, the GRCEs are LCEs 0 and 1, but have the added capability | ||
845 | to share a PCE with another LCE. (Normally a PCE may only be associated with | ||
846 | one LCE.) These registers include that configuration, which should only be set | ||
847 | if no PCE has been directly associated with the specific GRCE. | ||
848 | |||
849 | Support: Pascal through (at least) Ada | ||
850 | Note that Volta through Ada use a different bit format than Pascal. | ||
851 | */ | ||
801 | // Defined max number of GRCEs for a GPU (TX2 has only one) | 852 | // Defined max number of GRCEs for a GPU (TX2 has only one) |
802 | # define NV_GRCE_MAX 2 | 853 | # define NV_GRCE_MAX 2 |
803 | // Defined GRCE->CE mapping offsets from nvgpu | 854 | // Defined GRCE->CE mapping offsets from nvgpu |
804 | #define NV_GRCE_FOR_CE_GP100(i) (0x00104034+(i)*4) | 855 | #define NV_GRCE_FOR_CE_GP100(i) (0x00104034+(i)*4) |
805 | #define NV_GRCE_FOR_CE_GA100(i) (0x001041c0+(i)*4) | 856 | #define NV_GRCE_FOR_CE_GA100(i) (0x001041c0+(i)*4) |
806 | // Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu) | 857 | |
807 | #define NV_LCE_FOR_PCE_GP100 0x0010402c | ||
808 | #define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4) | ||
809 | #define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4) | ||
810 | // Struct for use with nvdebug_reg_range_read() | 858 | // Struct for use with nvdebug_reg_range_read() |
811 | union reg_range { | 859 | union reg_range { |
812 | struct { | 860 | struct { |
@@ -1294,13 +1342,18 @@ struct runlist_iter { | |||
1294 | int entries_left_in_tsg; | 1342 | int entries_left_in_tsg; |
1295 | // Number of entries in runlist | 1343 | // Number of entries in runlist |
1296 | int len; | 1344 | int len; |
1345 | // Offset to start of Channel RAM (as this is per-runlist on Ampere+) | ||
1346 | uint32_t channel_ram; | ||
1297 | }; | 1347 | }; |
1298 | 1348 | ||
1299 | #define NVDEBUG_MAX_DEVICES 8 | 1349 | #define NVDEBUG_MAX_DEVICES 8 |
1300 | extern struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; | 1350 | extern struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; |
1301 | 1351 | ||
1302 | // Defined in runlist.c | 1352 | // Defined in runlist.c |
1303 | int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl_iter); | 1353 | int get_runlist_iter( |
1354 | struct nvdebug_state *g, | ||
1355 | int rl_id, | ||
1356 | struct runlist_iter *rl_iter /* out */); | ||
1304 | int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id); | 1357 | int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id); |
1305 | int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id); | 1358 | int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id); |
1306 | int resubmit_runlist(struct nvdebug_state *g, uint32_t rl_id); | 1359 | int resubmit_runlist(struct nvdebug_state *g, uint32_t rl_id); |
@@ -1318,7 +1371,7 @@ uint64_t search_v1_page_directory( | |||
1318 | enum INST_TARGET addr_to_find_aperture); | 1371 | enum INST_TARGET addr_to_find_aperture); |
1319 | // Defined in bus.c | 1372 | // Defined in bus.c |
1320 | int addr_to_pramin_mut(struct nvdebug_state *g, uint64_t addr, enum INST_TARGET target); | 1373 | int addr_to_pramin_mut(struct nvdebug_state *g, uint64_t addr, enum INST_TARGET target); |
1321 | int get_bar2_pdb(struct nvdebug_state *g, page_dir_config_t* pd); | 1374 | int get_bar2_pdb(struct nvdebug_state *g, page_dir_config_t* pd /* out */); |
1322 | 1375 | ||
1323 | // Some portions of nvdebug can be included from kernel- or user-space (just | 1376 | // Some portions of nvdebug can be included from kernel- or user-space (just |
1324 | // this file at present). In order for these compiled object files to be | 1377 | // this file at present). In order for these compiled object files to be |