aboutsummaryrefslogtreecommitdiffstats
path: root/nvdebug.h
diff options
context:
space:
mode:
Diffstat (limited to 'nvdebug.h')
-rw-r--r--nvdebug.h137
1 files changed, 95 insertions, 42 deletions
diff --git a/nvdebug.h b/nvdebug.h
index e9ae3db..f644500 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -64,7 +64,7 @@ struct gk20a;
64 add a USERD pointer, a longer INST pointer, and a runqueue selector flag. 64 add a USERD pointer, a longer INST pointer, and a runqueue selector flag.
65*/ 65*/
66enum ENTRY_TYPE {ENTRY_TYPE_CHAN = 0, ENTRY_TYPE_TSG = 1}; 66enum ENTRY_TYPE {ENTRY_TYPE_CHAN = 0, ENTRY_TYPE_TSG = 1};
67enum INST_TARGET {TARGET_VID_MEM = 0, TARGET_SYS_MEM_COHERENT = 2, TARGET_SYS_MEM_NONCOHERENT = 3}; 67enum INST_TARGET {TARGET_VID_MEM = 0, TARGET_INVALID = 1, TARGET_SYS_MEM_COHERENT = 2, TARGET_SYS_MEM_NONCOHERENT = 3};
68static inline const char *target_to_text(enum INST_TARGET t) { 68static inline const char *target_to_text(enum INST_TARGET t) {
69 switch (t) { 69 switch (t) {
70 case TARGET_VID_MEM: 70 case TARGET_VID_MEM:
@@ -78,7 +78,7 @@ static inline const char *target_to_text(enum INST_TARGET t) {
78 } 78 }
79} 79}
80 80
81// Support: Volta, Ampere, Turing 81// Support: Volta, Ampere, Turing, Ampere
82struct gv100_runlist_chan { 82struct gv100_runlist_chan {
83// 0:63 83// 0:63
84 enum ENTRY_TYPE entry_type:1; 84 enum ENTRY_TYPE entry_type:1;
@@ -308,7 +308,7 @@ typedef union {
308} eng_runlist_gf100_t; 308} eng_runlist_gf100_t;
309 309
310/* 310/*
311 Starting with Turing, the seperate registers for reading and writing runlist 311 Starting with Turing, the separate registers for reading and writing runlist
312 configuration were dropped in favor of read/write indexed registers. As part 312 configuration were dropped in favor of read/write indexed registers. As part
313 of this, the layout was modified to allow for larger runlist pointers (upper 313 of this, the layout was modified to allow for larger runlist pointers (upper
314 52 of 64 bits). 314 52 of 64 bits).
@@ -362,7 +362,6 @@ enum CHANNEL_STATUS {
362}; 362};
363 363
364/* Programmable Channel Control System RAM (PCCSR) 364/* Programmable Channel Control System RAM (PCCSR)
365
366 512-entry array of channel control and status data structures. 365 512-entry array of channel control and status data structures.
367 366
368 === Read/Write Fields === 367 === Read/Write Fields ===
@@ -391,6 +390,7 @@ enum CHANNEL_STATUS {
391 *Field only available on Turing. 390 *Field only available on Turing.
392 391
393 Support: Fermi, Maxwell, Pascal, Volta, Turing 392 Support: Fermi, Maxwell, Pascal, Volta, Turing
393 See also: manuals/turing/tu104/dev_fifo.ref.txt in NVIDIA's open-gpu-doc
394*/ 394*/
395#define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8) 395#define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8)
396#define MAX_CHID 512 396#define MAX_CHID 512
@@ -611,12 +611,10 @@ typedef union {
611 ENGINE_TYPE : What type of engine is this? (see ENGINE_TYPES_NAMES) 611 ENGINE_TYPE : What type of engine is this? (see ENGINE_TYPES_NAMES)
612 612
613 Support: Kepler, Maxwell, Pascal, Volta, Turing, Ampere 613 Support: Kepler, Maxwell, Pascal, Volta, Turing, Ampere
614 See dev_top.ref.txt of NVIDIA's open-gpu-doc for more info. 614 See also: manuals/volta/gv100/dev_top.ref.txt in open-gpu-doc.
615*/ 615*/
616 616
617#define NV_PTOP_DEVICE_INFO_GA100(i) (0x00022800+(i)*4)
618#define NV_PTOP_DEVICE_INFO_GK104(i) (0x00022700+(i)*4) 617#define NV_PTOP_DEVICE_INFO_GK104(i) (0x00022700+(i)*4)
619#define NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g) (nvdebug_readl(g, 0x0224fc) >> 20)
620#define NV_PTOP_DEVICE_INFO__SIZE_1_GK104 64 618#define NV_PTOP_DEVICE_INFO__SIZE_1_GK104 64
621enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3}; 619enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3};
622enum ENGINE_TYPES { 620enum ENGINE_TYPES {
@@ -670,34 +668,6 @@ static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = {
670 "FLA: Fabric Logical Addressing", 668 "FLA: Fabric Logical Addressing",
671}; 669};
672 670
673// These field are from nvgpu/include/nvgpu/hw/ga100/hw_top_ga100.h
674typedef union {
675 // _info type fields
676 struct {
677 uint32_t fault_id:11;
678 uint32_t padding0:5;
679 uint32_t inst_id:8;
680 enum ENGINE_TYPES engine_type:7; // "type_enum"
681 bool has_next_entry:1;
682 } __attribute__((packed));
683 // _info2 type fields
684 struct {
685 uint32_t reset_id:8;
686 uint32_t pri_base:18; // "device_pri_base"
687 uint32_t padding1:4;
688 uint32_t is_engine:1;
689 uint32_t padding2:1;
690 } __attribute__((packed));
691 struct {
692 uint32_t rleng_id:2;
693 uint32_t padding3:8;
694 uint32_t runlist_pri_base:16;
695 uint32_t padding4:6;
696 } __attribute__((packed));
697 uint32_t raw;
698} ptop_device_info_ga100_t;
699
700// These field are from open-gpu-doc/manuals/volta/gv100/dev_top.ref.txt
701typedef union { 671typedef union {
702 // DATA type fields 672 // DATA type fields
703 struct { 673 struct {
@@ -737,6 +707,70 @@ typedef union {
737 uint32_t raw; 707 uint32_t raw;
738} ptop_device_info_gk104_t; 708} ptop_device_info_gk104_t;
739 709
710/* GPU TOPology on Ampere and newer GPUs
711 On Ampere+, the array of device topology entries continues to describe all GPU
712 engines, but the layout is entirely different to principly accomodate a
713 pointer to the runlist configuration region for each engine. (Runlist
714 configuration was moved out of the Host (PFIFO) region into per-engine spaces
715 starting with Ampere.)
716
717 Parsing is somewhat more difficult than with the older version, as entries
718 no longer include an `info_type`. Instead, each entry has 1--3 subrows, where
719 `has_next_entry` is 0 for the last subrow.
720
721 Empty rows should be skipped.
722
723 HAS_NEXT_ENTRY : Is the following entry a descriptor of the same engine?
724
725 == Subrow 1 fields ==
726 FAULT_ID : [UNKNOWN]
727 INST_ID : [UNKNOWN]
728 ENGINE_TYPE : Enumerated name of the type of engine. (Seemingly identical
729 to ENGINE_ENUM in old PTOP layout.)
730
731 == Subrow 2 fields ==
732 RESET_ID : [UNKNOWN]
733 PRI_BASE : [UNKNOWN]
734 IS_ENGINE : Does this entry describe an engine with a runlist? (Seemingly
735 identical to RUNLIST_IS_VALID in old PTOP layout.)
736
737 == Subrow 3 fields ==
738 RUNLIST_PRI_BASE : Offset in BAR0 of the RunList RAM (RLRAM) region for the
739 runlist of this engine.
740 RLENG_ID : What is the per-runlist ID of this engine?
741
742 Support: Ampere, Ada, Hopper, (and newer likely)
743 See also: hw_top_ga100.h in nvgpu (NVIDIA's open-source Jetson GPU driver)
744*/
745#define NV_PTOP_DEVICE_INFO_GA100(i) (0x00022800+(i)*4)
746#define NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g) (nvdebug_readl(g, 0x0224fc) >> 20)
747
748typedef union {
749 // _info type fields
750 struct {
751 uint32_t fault_id:11;
752 uint32_t padding0:5;
753 uint32_t inst_id:8;
754 enum ENGINE_TYPES engine_type:7; // "type_enum"
755 bool has_next_entry:1;
756 } __attribute__((packed));
757 // _info2 type fields
758 struct {
759 uint32_t reset_id:8;
760 uint32_t pri_base:18; // "device_pri_base"
761 uint32_t padding1:4;
762 uint32_t is_engine:1;
763 uint32_t padding2:1;
764 } __attribute__((packed));
765 struct {
766 uint32_t rleng_id:2;
767 uint32_t padding3:8;
768 uint32_t runlist_pri_base:16;
769 uint32_t padding4:6;
770 } __attribute__((packed));
771 uint32_t raw;
772} ptop_device_info_ga100_t;
773
740/* Graphics Processing Cluster (GPC) on-chip information 774/* Graphics Processing Cluster (GPC) on-chip information
741 The GPU's Compute/Graphics engine is subdivided into Graphics Processing 775 The GPU's Compute/Graphics engine is subdivided into Graphics Processing
742 Clusters (also known as GPU Processing Clusters, starting with Ampere). 776 Clusters (also known as GPU Processing Clusters, starting with Ampere).
@@ -792,21 +826,35 @@ typedef union {
792 SCAL_NUM_CES : Number of externally accessible copy engines 826 SCAL_NUM_CES : Number of externally accessible copy engines
793 827
794 Errata: Incorrectly reports "3" on Jetson TX1 and TX2. Should report "1" to be 828 Errata: Incorrectly reports "3" on Jetson TX1 and TX2. Should report "1" to be
795 consistent with PTOP data. 829 consistent with PTOP data.
796 830
797 Support: Kepler through (at least) Blackwell 831 Support: Kepler through (at least) Blackwell
798 Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. 832 Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info.
799*/ 833*/
800#define NV_PTOP_SCAL_NUM_CES 0x00022444 834#define NV_PTOP_SCAL_NUM_CES 0x00022444
835// Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu)
836#define NV_LCE_FOR_PCE_GP100 0x0010402c
837#define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4)
838#define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4)
839/* GRaphics Copy Engine (GRCE) Information
840 "There's two types of CE... ASYNC_CEs which are copy engines with their own
841 runlists and GRCEs which are CEs that share a runlist with GR." (nvgpu,
842 ioctl_ctrl.c)
843
844 Starting with Pascal, the GRCEs are LCEs 0 and 1, but have the added capability
845 to share a PCE with another LCE. (Normally a PCE may only be associated with
846 one LCE.) These registers include that configuration, which should only be set
847 if no PCE has been directly associated with the specific GRCE.
848
849 Support: Pascal through (at least) Ada
850 Note that Volta through Ada use a different bit format than Pascal.
851*/
801// Defined max number of GRCEs for a GPU (TX2 has only one) 852// Defined max number of GRCEs for a GPU (TX2 has only one)
802# define NV_GRCE_MAX 2 853# define NV_GRCE_MAX 2
803// Defined GRCE->CE mapping offsets from nvgpu 854// Defined GRCE->CE mapping offsets from nvgpu
804#define NV_GRCE_FOR_CE_GP100(i) (0x00104034+(i)*4) 855#define NV_GRCE_FOR_CE_GP100(i) (0x00104034+(i)*4)
805#define NV_GRCE_FOR_CE_GA100(i) (0x001041c0+(i)*4) 856#define NV_GRCE_FOR_CE_GA100(i) (0x001041c0+(i)*4)
806// Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu) 857
807#define NV_LCE_FOR_PCE_GP100 0x0010402c
808#define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4)
809#define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4)
810// Struct for use with nvdebug_reg_range_read() 858// Struct for use with nvdebug_reg_range_read()
811union reg_range { 859union reg_range {
812 struct { 860 struct {
@@ -1294,13 +1342,18 @@ struct runlist_iter {
1294 int entries_left_in_tsg; 1342 int entries_left_in_tsg;
1295 // Number of entries in runlist 1343 // Number of entries in runlist
1296 int len; 1344 int len;
1345 // Offset to start of Channel RAM (as this is per-runlist on Ampere+)
1346 uint32_t channel_ram;
1297}; 1347};
1298 1348
1299#define NVDEBUG_MAX_DEVICES 8 1349#define NVDEBUG_MAX_DEVICES 8
1300extern struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; 1350extern struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES];
1301 1351
1302// Defined in runlist.c 1352// Defined in runlist.c
1303int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl_iter); 1353int get_runlist_iter(
1354 struct nvdebug_state *g,
1355 int rl_id,
1356 struct runlist_iter *rl_iter /* out */);
1304int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id); 1357int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id);
1305int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id); 1358int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id);
1306int resubmit_runlist(struct nvdebug_state *g, uint32_t rl_id); 1359int resubmit_runlist(struct nvdebug_state *g, uint32_t rl_id);
@@ -1318,7 +1371,7 @@ uint64_t search_v1_page_directory(
1318 enum INST_TARGET addr_to_find_aperture); 1371 enum INST_TARGET addr_to_find_aperture);
1319// Defined in bus.c 1372// Defined in bus.c
1320int addr_to_pramin_mut(struct nvdebug_state *g, uint64_t addr, enum INST_TARGET target); 1373int addr_to_pramin_mut(struct nvdebug_state *g, uint64_t addr, enum INST_TARGET target);
1321int get_bar2_pdb(struct nvdebug_state *g, page_dir_config_t* pd); 1374int get_bar2_pdb(struct nvdebug_state *g, page_dir_config_t* pd /* out */);
1322 1375
1323// Some portions of nvdebug can be included from kernel- or user-space (just 1376// Some portions of nvdebug can be included from kernel- or user-space (just
1324// this file at present). In order for these compiled object files to be 1377// this file at present). In order for these compiled object files to be