diff options
author | Joshua Bakita <bakitajoshua@gmail.com> | 2024-04-23 17:44:59 -0400 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2024-04-23 17:44:59 -0400 |
commit | 8b9c6400d0c88e127be2d31ab3fb507da49f9d6f (patch) | |
tree | 98cc8cf93a0ba75c7864a783cb2c7c66b83fdf25 | |
parent | 71c6ab5ac5270d3e1c2fb809667225add109ec6b (diff) |
Style and documentation cleanup
- Document topology registers (PTOP) on Ampere+
- Document graphics copy engine configuration registers
- Move resubmit_runlist range checks into runlist.c
- Miscellaneous spacing, typo, and minor documentation fixes
-rw-r--r-- | mmu.c | 30 | ||||
-rw-r--r-- | nvdebug.h | 137 | ||||
-rw-r--r-- | runlist.c | 10 | ||||
-rw-r--r-- | runlist_procfs.c | 9 |
4 files changed, 120 insertions, 66 deletions
@@ -24,7 +24,8 @@ int g_verbose = 0; | |||
24 | @param pd_ap PD-type aperture (target address space) for `addr` | 24 | @param pd_ap PD-type aperture (target address space) for `addr` |
25 | @return A dereferencable kernel address, or an ERR_PTR-wrapped error | 25 | @return A dereferencable kernel address, or an ERR_PTR-wrapped error |
26 | */ | 26 | */ |
27 | static void __iomem *pd_deref(struct nvdebug_state *g, uintptr_t addr, enum PD_TARGET pd_ap) { | 27 | static void __iomem *pd_deref(struct nvdebug_state *g, uintptr_t addr, |
28 | enum PD_TARGET pd_ap) { | ||
28 | struct iommu_domain *dom; | 29 | struct iommu_domain *dom; |
29 | phys_addr_t phys; | 30 | phys_addr_t phys; |
30 | 31 | ||
@@ -67,11 +68,11 @@ static void __iomem *pd_deref(struct nvdebug_state *g, uintptr_t addr, enum PD_T | |||
67 | 68 | ||
68 | // Internal helper for search_page_directory(). | 69 | // Internal helper for search_page_directory(). |
69 | uint64_t search_page_directory_subtree(struct nvdebug_state *g, | 70 | uint64_t search_page_directory_subtree(struct nvdebug_state *g, |
70 | uintptr_t pde_addr, | 71 | uintptr_t pde_addr, |
71 | enum PD_TARGET pde_target, | 72 | enum PD_TARGET pde_target, |
72 | uint64_t addr_to_find, | 73 | uint64_t addr_to_find, |
73 | enum INST_TARGET addr_to_find_aperture, | 74 | enum INST_TARGET addr_to_find_aperture, |
74 | uint32_t level) { | 75 | uint32_t level) { |
75 | uint64_t res, i; | 76 | uint64_t res, i; |
76 | void __iomem *pde_kern; | 77 | void __iomem *pde_kern; |
77 | page_dir_entry_t entry; | 78 | page_dir_entry_t entry; |
@@ -110,13 +111,12 @@ uint64_t search_page_directory_subtree(struct nvdebug_state *g, | |||
110 | } | 111 | } |
111 | 112 | ||
112 | /* GPU Physical address -> Virtual address ("reverse" translation) for V2 tables | 113 | /* GPU Physical address -> Virtual address ("reverse" translation) for V2 tables |
113 | |||
114 | Depth-first search a page directory of the GPU MMU for where a particular | 114 | Depth-first search a page directory of the GPU MMU for where a particular |
115 | physical address is mapped. Upon finding a mapping, the virtual address is | 115 | physical address is mapped. Upon finding a mapping, the virtual address is |
116 | returned. | 116 | returned. |
117 | 117 | ||
118 | The page directory may be located in VID_MEM, SYS_MEM, or some combination of | 118 | The page directory and tables may be located in VID_MEM, SYS_MEM, or spread |
119 | the two. | 119 | across multiple apertures. |
120 | 120 | ||
121 | @param pd_config Page Directory configuration, containing pointer and | 121 | @param pd_config Page Directory configuration, containing pointer and |
122 | aperture for the start of the PDE3 entries | 122 | aperture for the start of the PDE3 entries |
@@ -126,9 +126,9 @@ uint64_t search_page_directory_subtree(struct nvdebug_state *g, | |||
126 | mapped into by this page table. (Zero is not a valid virtual address) | 126 | mapped into by this page table. (Zero is not a valid virtual address) |
127 | */ | 127 | */ |
128 | uint64_t search_page_directory(struct nvdebug_state *g, | 128 | uint64_t search_page_directory(struct nvdebug_state *g, |
129 | page_dir_config_t pd_config, | 129 | page_dir_config_t pd_config, |
130 | uint64_t addr_to_find, | 130 | uint64_t addr_to_find, |
131 | enum INST_TARGET addr_to_find_aperture) { | 131 | enum INST_TARGET addr_to_find_aperture) { |
132 | uint64_t res, i; | 132 | uint64_t res, i; |
133 | // Make sure that the query is page-aligned | 133 | // Make sure that the query is page-aligned |
134 | if (addr_to_find & 0xfff) { | 134 | if (addr_to_find & 0xfff) { |
@@ -147,9 +147,9 @@ uint64_t search_page_directory(struct nvdebug_state *g, | |||
147 | (See `search_page_directory()` for documentation.) | 147 | (See `search_page_directory()` for documentation.) |
148 | */ | 148 | */ |
149 | uint64_t search_v1_page_directory(struct nvdebug_state *g, | 149 | uint64_t search_v1_page_directory(struct nvdebug_state *g, |
150 | page_dir_config_t pd_config, | 150 | page_dir_config_t pd_config, |
151 | uint64_t addr_to_find, | 151 | uint64_t addr_to_find, |
152 | enum INST_TARGET addr_to_find_aperture) { | 152 | enum INST_TARGET addr_to_find_aperture) { |
153 | uint64_t j, i = 0; | 153 | uint64_t j, i = 0; |
154 | page_dir_entry_v1_t pde; | 154 | page_dir_entry_v1_t pde; |
155 | page_tbl_entry_v1_t pte; | 155 | page_tbl_entry_v1_t pte; |
@@ -64,7 +64,7 @@ struct gk20a; | |||
64 | add a USERD pointer, a longer INST pointer, and a runqueue selector flag. | 64 | add a USERD pointer, a longer INST pointer, and a runqueue selector flag. |
65 | */ | 65 | */ |
66 | enum ENTRY_TYPE {ENTRY_TYPE_CHAN = 0, ENTRY_TYPE_TSG = 1}; | 66 | enum ENTRY_TYPE {ENTRY_TYPE_CHAN = 0, ENTRY_TYPE_TSG = 1}; |
67 | enum INST_TARGET {TARGET_VID_MEM = 0, TARGET_SYS_MEM_COHERENT = 2, TARGET_SYS_MEM_NONCOHERENT = 3}; | 67 | enum INST_TARGET {TARGET_VID_MEM = 0, TARGET_INVALID = 1, TARGET_SYS_MEM_COHERENT = 2, TARGET_SYS_MEM_NONCOHERENT = 3}; |
68 | static inline const char *target_to_text(enum INST_TARGET t) { | 68 | static inline const char *target_to_text(enum INST_TARGET t) { |
69 | switch (t) { | 69 | switch (t) { |
70 | case TARGET_VID_MEM: | 70 | case TARGET_VID_MEM: |
@@ -78,7 +78,7 @@ static inline const char *target_to_text(enum INST_TARGET t) { | |||
78 | } | 78 | } |
79 | } | 79 | } |
80 | 80 | ||
81 | // Support: Volta, Ampere, Turing | 81 | // Support: Volta, Ampere, Turing, Ampere |
82 | struct gv100_runlist_chan { | 82 | struct gv100_runlist_chan { |
83 | // 0:63 | 83 | // 0:63 |
84 | enum ENTRY_TYPE entry_type:1; | 84 | enum ENTRY_TYPE entry_type:1; |
@@ -308,7 +308,7 @@ typedef union { | |||
308 | } eng_runlist_gf100_t; | 308 | } eng_runlist_gf100_t; |
309 | 309 | ||
310 | /* | 310 | /* |
311 | Starting with Turing, the seperate registers for reading and writing runlist | 311 | Starting with Turing, the separate registers for reading and writing runlist |
312 | configuration were dropped in favor of read/write indexed registers. As part | 312 | configuration were dropped in favor of read/write indexed registers. As part |
313 | of this, the layout was modified to allow for larger runlist pointers (upper | 313 | of this, the layout was modified to allow for larger runlist pointers (upper |
314 | 52 of 64 bits). | 314 | 52 of 64 bits). |
@@ -362,7 +362,6 @@ enum CHANNEL_STATUS { | |||
362 | }; | 362 | }; |
363 | 363 | ||
364 | /* Programmable Channel Control System RAM (PCCSR) | 364 | /* Programmable Channel Control System RAM (PCCSR) |
365 | |||
366 | 512-entry array of channel control and status data structures. | 365 | 512-entry array of channel control and status data structures. |
367 | 366 | ||
368 | === Read/Write Fields === | 367 | === Read/Write Fields === |
@@ -391,6 +390,7 @@ enum CHANNEL_STATUS { | |||
391 | *Field only available on Turing. | 390 | *Field only available on Turing. |
392 | 391 | ||
393 | Support: Fermi, Maxwell, Pascal, Volta, Turing | 392 | Support: Fermi, Maxwell, Pascal, Volta, Turing |
393 | See also: manuals/turing/tu104/dev_fifo.ref.txt in NVIDIA's open-gpu-doc | ||
394 | */ | 394 | */ |
395 | #define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8) | 395 | #define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8) |
396 | #define MAX_CHID 512 | 396 | #define MAX_CHID 512 |
@@ -611,12 +611,10 @@ typedef union { | |||
611 | ENGINE_TYPE : What type of engine is this? (see ENGINE_TYPES_NAMES) | 611 | ENGINE_TYPE : What type of engine is this? (see ENGINE_TYPES_NAMES) |
612 | 612 | ||
613 | Support: Kepler, Maxwell, Pascal, Volta, Turing, Ampere | 613 | Support: Kepler, Maxwell, Pascal, Volta, Turing, Ampere |
614 | See dev_top.ref.txt of NVIDIA's open-gpu-doc for more info. | 614 | See also: manuals/volta/gv100/dev_top.ref.txt in open-gpu-doc. |
615 | */ | 615 | */ |
616 | 616 | ||
617 | #define NV_PTOP_DEVICE_INFO_GA100(i) (0x00022800+(i)*4) | ||
618 | #define NV_PTOP_DEVICE_INFO_GK104(i) (0x00022700+(i)*4) | 617 | #define NV_PTOP_DEVICE_INFO_GK104(i) (0x00022700+(i)*4) |
619 | #define NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g) (nvdebug_readl(g, 0x0224fc) >> 20) | ||
620 | #define NV_PTOP_DEVICE_INFO__SIZE_1_GK104 64 | 618 | #define NV_PTOP_DEVICE_INFO__SIZE_1_GK104 64 |
621 | enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3}; | 619 | enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3}; |
622 | enum ENGINE_TYPES { | 620 | enum ENGINE_TYPES { |
@@ -670,34 +668,6 @@ static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { | |||
670 | "FLA: Fabric Logical Addressing", | 668 | "FLA: Fabric Logical Addressing", |
671 | }; | 669 | }; |
672 | 670 | ||
673 | // These field are from nvgpu/include/nvgpu/hw/ga100/hw_top_ga100.h | ||
674 | typedef union { | ||
675 | // _info type fields | ||
676 | struct { | ||
677 | uint32_t fault_id:11; | ||
678 | uint32_t padding0:5; | ||
679 | uint32_t inst_id:8; | ||
680 | enum ENGINE_TYPES engine_type:7; // "type_enum" | ||
681 | bool has_next_entry:1; | ||
682 | } __attribute__((packed)); | ||
683 | // _info2 type fields | ||
684 | struct { | ||
685 | uint32_t reset_id:8; | ||
686 | uint32_t pri_base:18; // "device_pri_base" | ||
687 | uint32_t padding1:4; | ||
688 | uint32_t is_engine:1; | ||
689 | uint32_t padding2:1; | ||
690 | } __attribute__((packed)); | ||
691 | struct { | ||
692 | uint32_t rleng_id:2; | ||
693 | uint32_t padding3:8; | ||
694 | uint32_t runlist_pri_base:16; | ||
695 | uint32_t padding4:6; | ||
696 | } __attribute__((packed)); | ||
697 | uint32_t raw; | ||
698 | } ptop_device_info_ga100_t; | ||
699 | |||
700 | // These field are from open-gpu-doc/manuals/volta/gv100/dev_top.ref.txt | ||
701 | typedef union { | 671 | typedef union { |
702 | // DATA type fields | 672 | // DATA type fields |
703 | struct { | 673 | struct { |
@@ -737,6 +707,70 @@ typedef union { | |||
737 | uint32_t raw; | 707 | uint32_t raw; |
738 | } ptop_device_info_gk104_t; | 708 | } ptop_device_info_gk104_t; |
739 | 709 | ||
710 | /* GPU TOPology on Ampere and newer GPUs | ||
711 | On Ampere+, the array of device topology entries continues to describe all GPU | ||
712 | engines, but the layout is entirely different to principly accomodate a | ||
713 | pointer to the runlist configuration region for each engine. (Runlist | ||
714 | configuration was moved out of the Host (PFIFO) region into per-engine spaces | ||
715 | starting with Ampere.) | ||
716 | |||
717 | Parsing is somewhat more difficult than with the older version, as entries | ||
718 | no longer include an `info_type`. Instead, each entry has 1--3 subrows, where | ||
719 | `has_next_entry` is 0 for the last subrow. | ||
720 | |||
721 | Empty rows should be skipped. | ||
722 | |||
723 | HAS_NEXT_ENTRY : Is the following entry a descriptor of the same engine? | ||
724 | |||
725 | == Subrow 1 fields == | ||
726 | FAULT_ID : [UNKNOWN] | ||
727 | INST_ID : [UNKNOWN] | ||
728 | ENGINE_TYPE : Enumerated name of the type of engine. (Seemingly identical | ||
729 | to ENGINE_ENUM in old PTOP layout.) | ||
730 | |||
731 | == Subrow 2 fields == | ||
732 | RESET_ID : [UNKNOWN] | ||
733 | PRI_BASE : [UNKNOWN] | ||
734 | IS_ENGINE : Does this entry describe an engine with a runlist? (Seemingly | ||
735 | identical to RUNLIST_IS_VALID in old PTOP layout.) | ||
736 | |||
737 | == Subrow 3 fields == | ||
738 | RUNLIST_PRI_BASE : Offset in BAR0 of the RunList RAM (RLRAM) region for the | ||
739 | runlist of this engine. | ||
740 | RLENG_ID : What is the per-runlist ID of this engine? | ||
741 | |||
742 | Support: Ampere, Ada, Hopper, (and newer likely) | ||
743 | See also: hw_top_ga100.h in nvgpu (NVIDIA's open-source Jetson GPU driver) | ||
744 | */ | ||
745 | #define NV_PTOP_DEVICE_INFO_GA100(i) (0x00022800+(i)*4) | ||
746 | #define NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g) (nvdebug_readl(g, 0x0224fc) >> 20) | ||
747 | |||
748 | typedef union { | ||
749 | // _info type fields | ||
750 | struct { | ||
751 | uint32_t fault_id:11; | ||
752 | uint32_t padding0:5; | ||
753 | uint32_t inst_id:8; | ||
754 | enum ENGINE_TYPES engine_type:7; // "type_enum" | ||
755 | bool has_next_entry:1; | ||
756 | } __attribute__((packed)); | ||
757 | // _info2 type fields | ||
758 | struct { | ||
759 | uint32_t reset_id:8; | ||
760 | uint32_t pri_base:18; // "device_pri_base" | ||
761 | uint32_t padding1:4; | ||
762 | uint32_t is_engine:1; | ||
763 | uint32_t padding2:1; | ||
764 | } __attribute__((packed)); | ||
765 | struct { | ||
766 | uint32_t rleng_id:2; | ||
767 | uint32_t padding3:8; | ||
768 | uint32_t runlist_pri_base:16; | ||
769 | uint32_t padding4:6; | ||
770 | } __attribute__((packed)); | ||
771 | uint32_t raw; | ||
772 | } ptop_device_info_ga100_t; | ||
773 | |||
740 | /* Graphics Processing Cluster (GPC) on-chip information | 774 | /* Graphics Processing Cluster (GPC) on-chip information |
741 | The GPU's Compute/Graphics engine is subdivided into Graphics Processing | 775 | The GPU's Compute/Graphics engine is subdivided into Graphics Processing |
742 | Clusters (also known as GPU Processing Clusters, starting with Ampere). | 776 | Clusters (also known as GPU Processing Clusters, starting with Ampere). |
@@ -792,21 +826,35 @@ typedef union { | |||
792 | SCAL_NUM_CES : Number of externally accessible copy engines | 826 | SCAL_NUM_CES : Number of externally accessible copy engines |
793 | 827 | ||
794 | Errata: Incorrectly reports "3" on Jetson TX1 and TX2. Should report "1" to be | 828 | Errata: Incorrectly reports "3" on Jetson TX1 and TX2. Should report "1" to be |
795 | consistent with PTOP data. | 829 | consistent with PTOP data. |
796 | 830 | ||
797 | Support: Kepler through (at least) Blackwell | 831 | Support: Kepler through (at least) Blackwell |
798 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. | 832 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. |
799 | */ | 833 | */ |
800 | #define NV_PTOP_SCAL_NUM_CES 0x00022444 | 834 | #define NV_PTOP_SCAL_NUM_CES 0x00022444 |
835 | // Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu) | ||
836 | #define NV_LCE_FOR_PCE_GP100 0x0010402c | ||
837 | #define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4) | ||
838 | #define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4) | ||
839 | /* GRaphics Copy Engine (GRCE) Information | ||
840 | "There's two types of CE... ASYNC_CEs which are copy engines with their own | ||
841 | runlists and GRCEs which are CEs that share a runlist with GR." (nvgpu, | ||
842 | ioctl_ctrl.c) | ||
843 | |||
844 | Starting with Pascal, the GRCEs are LCEs 0 and 1, but have the added capability | ||
845 | to share a PCE with another LCE. (Normally a PCE may only be associated with | ||
846 | one LCE.) These registers include that configuration, which should only be set | ||
847 | if no PCE has been directly associated with the specific GRCE. | ||
848 | |||
849 | Support: Pascal through (at least) Ada | ||
850 | Note that Volta through Ada use a different bit format than Pascal. | ||
851 | */ | ||
801 | // Defined max number of GRCEs for a GPU (TX2 has only one) | 852 | // Defined max number of GRCEs for a GPU (TX2 has only one) |
802 | # define NV_GRCE_MAX 2 | 853 | # define NV_GRCE_MAX 2 |
803 | // Defined GRCE->CE mapping offsets from nvgpu | 854 | // Defined GRCE->CE mapping offsets from nvgpu |
804 | #define NV_GRCE_FOR_CE_GP100(i) (0x00104034+(i)*4) | 855 | #define NV_GRCE_FOR_CE_GP100(i) (0x00104034+(i)*4) |
805 | #define NV_GRCE_FOR_CE_GA100(i) (0x001041c0+(i)*4) | 856 | #define NV_GRCE_FOR_CE_GA100(i) (0x001041c0+(i)*4) |
806 | // Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu) | 857 | |
807 | #define NV_LCE_FOR_PCE_GP100 0x0010402c | ||
808 | #define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4) | ||
809 | #define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4) | ||
810 | // Struct for use with nvdebug_reg_range_read() | 858 | // Struct for use with nvdebug_reg_range_read() |
811 | union reg_range { | 859 | union reg_range { |
812 | struct { | 860 | struct { |
@@ -1294,13 +1342,18 @@ struct runlist_iter { | |||
1294 | int entries_left_in_tsg; | 1342 | int entries_left_in_tsg; |
1295 | // Number of entries in runlist | 1343 | // Number of entries in runlist |
1296 | int len; | 1344 | int len; |
1345 | // Offset to start of Channel RAM (as this is per-runlist on Ampere+) | ||
1346 | uint32_t channel_ram; | ||
1297 | }; | 1347 | }; |
1298 | 1348 | ||
1299 | #define NVDEBUG_MAX_DEVICES 8 | 1349 | #define NVDEBUG_MAX_DEVICES 8 |
1300 | extern struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; | 1350 | extern struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; |
1301 | 1351 | ||
1302 | // Defined in runlist.c | 1352 | // Defined in runlist.c |
1303 | int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl_iter); | 1353 | int get_runlist_iter( |
1354 | struct nvdebug_state *g, | ||
1355 | int rl_id, | ||
1356 | struct runlist_iter *rl_iter /* out */); | ||
1304 | int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id); | 1357 | int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id); |
1305 | int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id); | 1358 | int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id); |
1306 | int resubmit_runlist(struct nvdebug_state *g, uint32_t rl_id); | 1359 | int resubmit_runlist(struct nvdebug_state *g, uint32_t rl_id); |
@@ -1318,7 +1371,7 @@ uint64_t search_v1_page_directory( | |||
1318 | enum INST_TARGET addr_to_find_aperture); | 1371 | enum INST_TARGET addr_to_find_aperture); |
1319 | // Defined in bus.c | 1372 | // Defined in bus.c |
1320 | int addr_to_pramin_mut(struct nvdebug_state *g, uint64_t addr, enum INST_TARGET target); | 1373 | int addr_to_pramin_mut(struct nvdebug_state *g, uint64_t addr, enum INST_TARGET target); |
1321 | int get_bar2_pdb(struct nvdebug_state *g, page_dir_config_t* pd); | 1374 | int get_bar2_pdb(struct nvdebug_state *g, page_dir_config_t* pd /* out */); |
1322 | 1375 | ||
1323 | // Some portions of nvdebug can be included from kernel- or user-space (just | 1376 | // Some portions of nvdebug can be included from kernel- or user-space (just |
1324 | // this file at present). In order for these compiled object files to be | 1377 | // this file at present). In order for these compiled object files to be |
@@ -8,8 +8,10 @@ | |||
8 | #include "nvdebug.h" | 8 | #include "nvdebug.h" |
9 | 9 | ||
10 | // Uncomment to, upon BAR2 access failure, return a PRAMIN-based runlist pointer | 10 | // Uncomment to, upon BAR2 access failure, return a PRAMIN-based runlist pointer |
11 | // **If enabled, PRAMIN may not be otherwise used while walking the runlist!** | 11 | // in get_runlist_iter(). In order for this pointer to remain valid, PRAMIN |
12 | // Runlists can only be printed on the Jetson TX2 if this is enabled. | 12 | // **must** not be moved during runlist traversal. |
13 | // The Jetson TX2 has no BAR2, and stores the runlist in VID_MEM, so this must | ||
14 | // be enabled to print the runlist on the TX2. | ||
13 | //#define FALLBACK_TO_PRAMIN | 15 | //#define FALLBACK_TO_PRAMIN |
14 | 16 | ||
15 | /* Get runlist head and info (incl. length) | 17 | /* Get runlist head and info (incl. length) |
@@ -142,12 +144,16 @@ int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id) { | |||
142 | int resubmit_runlist(struct nvdebug_state *g, uint32_t rl_id) { | 144 | int resubmit_runlist(struct nvdebug_state *g, uint32_t rl_id) { |
143 | if (g->chip_id < NV_CHIP_ID_TURING) { | 145 | if (g->chip_id < NV_CHIP_ID_TURING) { |
144 | eng_runlist_gf100_t rl; | 146 | eng_runlist_gf100_t rl; |
147 | if (rl_id > MAX_RUNLISTS_GF100) | ||
148 | return -EINVAL; | ||
145 | if ((rl.raw = nvdebug_readq(g, NV_PFIFO_ENG_RUNLIST_BASE_GF100(rl_id))) == -1) | 149 | if ((rl.raw = nvdebug_readq(g, NV_PFIFO_ENG_RUNLIST_BASE_GF100(rl_id))) == -1) |
146 | return -EIO; | 150 | return -EIO; |
147 | rl.id = rl_id; | 151 | rl.id = rl_id; |
148 | nvdebug_writeq(g, NV_PFIFO_RUNLIST_BASE_GF100, rl.raw); | 152 | nvdebug_writeq(g, NV_PFIFO_RUNLIST_BASE_GF100, rl.raw); |
149 | } else if (g->chip_id < NV_CHIP_ID_AMPERE) { | 153 | } else if (g->chip_id < NV_CHIP_ID_AMPERE) { |
150 | runlist_submit_tu102_t submit; | 154 | runlist_submit_tu102_t submit; |
155 | if (rl_id > MAX_RUNLISTS_TU102) | ||
156 | return -EINVAL; | ||
151 | if ((submit.raw = nvdebug_readq(g, NV_PFIFO_RUNLIST_SUBMIT_TU102(rl_id))) == -1) | 157 | if ((submit.raw = nvdebug_readq(g, NV_PFIFO_RUNLIST_SUBMIT_TU102(rl_id))) == -1) |
152 | return -EIO; | 158 | return -EIO; |
153 | nvdebug_writeq(g, NV_PFIFO_RUNLIST_SUBMIT_TU102(rl_id), submit.raw); | 159 | nvdebug_writeq(g, NV_PFIFO_RUNLIST_SUBMIT_TU102(rl_id), submit.raw); |
diff --git a/runlist_procfs.c b/runlist_procfs.c index 986465d..8152463 100644 --- a/runlist_procfs.c +++ b/runlist_procfs.c | |||
@@ -3,7 +3,7 @@ | |||
3 | 3 | ||
4 | #include "nvdebug_linux.h" | 4 | #include "nvdebug_linux.h" |
5 | 5 | ||
6 | #define RUNLIST_PROCFS_NAME "runlist" | 6 | // Uncomment to expand channel status information when printing the runlist |
7 | #define DETAILED_CHANNEL_INFO | 7 | #define DETAILED_CHANNEL_INFO |
8 | 8 | ||
9 | #ifdef DETAILED_CHANNEL_INFO | 9 | #ifdef DETAILED_CHANNEL_INFO |
@@ -197,12 +197,7 @@ ssize_t resubmit_runlist_file_write(struct file *f, const char __user *buffer, | |||
197 | if (err) | 197 | if (err) |
198 | return err; | 198 | return err; |
199 | 199 | ||
200 | // Verify valid runlist (in terms of absolute maximums) | 200 | // resubmit_runlist() checks that target_runlist is valid |
201 | if (g->chip_id < NV_CHIP_ID_TURING && target_runlist > MAX_RUNLISTS_GF100) | ||
202 | return -ERANGE; | ||
203 | else if (g->chip_id < NV_CHIP_ID_AMPERE && target_runlist > MAX_RUNLISTS_TU102) | ||
204 | return -ERANGE; | ||
205 | |||
206 | if ((err = resubmit_runlist(g, target_runlist))) | 201 | if ((err = resubmit_runlist(g, target_runlist))) |
207 | return err; | 202 | return err; |
208 | 203 | ||