diff options
| author | Joshua Bakita <bakitajoshua@gmail.com> | 2024-04-23 17:44:59 -0400 |
|---|---|---|
| committer | Joshua Bakita <bakitajoshua@gmail.com> | 2024-04-23 17:44:59 -0400 |
| commit | 8b9c6400d0c88e127be2d31ab3fb507da49f9d6f (patch) | |
| tree | 98cc8cf93a0ba75c7864a783cb2c7c66b83fdf25 | |
| parent | 71c6ab5ac5270d3e1c2fb809667225add109ec6b (diff) | |
Style and documentation cleanup
- Document topology registers (PTOP) on Ampere+
- Document graphics copy engine configuration registers
- Move resubmit_runlist range checks into runlist.c
- Miscellaneous spacing, typo, and minor documentation fixes
| -rw-r--r-- | mmu.c | 30 | ||||
| -rw-r--r-- | nvdebug.h | 137 | ||||
| -rw-r--r-- | runlist.c | 10 | ||||
| -rw-r--r-- | runlist_procfs.c | 9 |
4 files changed, 120 insertions, 66 deletions
| @@ -24,7 +24,8 @@ int g_verbose = 0; | |||
| 24 | @param pd_ap PD-type aperture (target address space) for `addr` | 24 | @param pd_ap PD-type aperture (target address space) for `addr` |
| 25 | @return A dereferencable kernel address, or an ERR_PTR-wrapped error | 25 | @return A dereferencable kernel address, or an ERR_PTR-wrapped error |
| 26 | */ | 26 | */ |
| 27 | static void __iomem *pd_deref(struct nvdebug_state *g, uintptr_t addr, enum PD_TARGET pd_ap) { | 27 | static void __iomem *pd_deref(struct nvdebug_state *g, uintptr_t addr, |
| 28 | enum PD_TARGET pd_ap) { | ||
| 28 | struct iommu_domain *dom; | 29 | struct iommu_domain *dom; |
| 29 | phys_addr_t phys; | 30 | phys_addr_t phys; |
| 30 | 31 | ||
| @@ -67,11 +68,11 @@ static void __iomem *pd_deref(struct nvdebug_state *g, uintptr_t addr, enum PD_T | |||
| 67 | 68 | ||
| 68 | // Internal helper for search_page_directory(). | 69 | // Internal helper for search_page_directory(). |
| 69 | uint64_t search_page_directory_subtree(struct nvdebug_state *g, | 70 | uint64_t search_page_directory_subtree(struct nvdebug_state *g, |
| 70 | uintptr_t pde_addr, | 71 | uintptr_t pde_addr, |
| 71 | enum PD_TARGET pde_target, | 72 | enum PD_TARGET pde_target, |
| 72 | uint64_t addr_to_find, | 73 | uint64_t addr_to_find, |
| 73 | enum INST_TARGET addr_to_find_aperture, | 74 | enum INST_TARGET addr_to_find_aperture, |
| 74 | uint32_t level) { | 75 | uint32_t level) { |
| 75 | uint64_t res, i; | 76 | uint64_t res, i; |
| 76 | void __iomem *pde_kern; | 77 | void __iomem *pde_kern; |
| 77 | page_dir_entry_t entry; | 78 | page_dir_entry_t entry; |
| @@ -110,13 +111,12 @@ uint64_t search_page_directory_subtree(struct nvdebug_state *g, | |||
| 110 | } | 111 | } |
| 111 | 112 | ||
| 112 | /* GPU Physical address -> Virtual address ("reverse" translation) for V2 tables | 113 | /* GPU Physical address -> Virtual address ("reverse" translation) for V2 tables |
| 113 | |||
| 114 | Depth-first search a page directory of the GPU MMU for where a particular | 114 | Depth-first search a page directory of the GPU MMU for where a particular |
| 115 | physical address is mapped. Upon finding a mapping, the virtual address is | 115 | physical address is mapped. Upon finding a mapping, the virtual address is |
| 116 | returned. | 116 | returned. |
| 117 | 117 | ||
| 118 | The page directory may be located in VID_MEM, SYS_MEM, or some combination of | 118 | The page directory and tables may be located in VID_MEM, SYS_MEM, or spread |
| 119 | the two. | 119 | across multiple apertures. |
| 120 | 120 | ||
| 121 | @param pd_config Page Directory configuration, containing pointer and | 121 | @param pd_config Page Directory configuration, containing pointer and |
| 122 | aperture for the start of the PDE3 entries | 122 | aperture for the start of the PDE3 entries |
| @@ -126,9 +126,9 @@ uint64_t search_page_directory_subtree(struct nvdebug_state *g, | |||
| 126 | mapped into by this page table. (Zero is not a valid virtual address) | 126 | mapped into by this page table. (Zero is not a valid virtual address) |
| 127 | */ | 127 | */ |
| 128 | uint64_t search_page_directory(struct nvdebug_state *g, | 128 | uint64_t search_page_directory(struct nvdebug_state *g, |
| 129 | page_dir_config_t pd_config, | 129 | page_dir_config_t pd_config, |
| 130 | uint64_t addr_to_find, | 130 | uint64_t addr_to_find, |
| 131 | enum INST_TARGET addr_to_find_aperture) { | 131 | enum INST_TARGET addr_to_find_aperture) { |
| 132 | uint64_t res, i; | 132 | uint64_t res, i; |
| 133 | // Make sure that the query is page-aligned | 133 | // Make sure that the query is page-aligned |
| 134 | if (addr_to_find & 0xfff) { | 134 | if (addr_to_find & 0xfff) { |
| @@ -147,9 +147,9 @@ uint64_t search_page_directory(struct nvdebug_state *g, | |||
| 147 | (See `search_page_directory()` for documentation.) | 147 | (See `search_page_directory()` for documentation.) |
| 148 | */ | 148 | */ |
| 149 | uint64_t search_v1_page_directory(struct nvdebug_state *g, | 149 | uint64_t search_v1_page_directory(struct nvdebug_state *g, |
| 150 | page_dir_config_t pd_config, | 150 | page_dir_config_t pd_config, |
| 151 | uint64_t addr_to_find, | 151 | uint64_t addr_to_find, |
| 152 | enum INST_TARGET addr_to_find_aperture) { | 152 | enum INST_TARGET addr_to_find_aperture) { |
| 153 | uint64_t j, i = 0; | 153 | uint64_t j, i = 0; |
| 154 | page_dir_entry_v1_t pde; | 154 | page_dir_entry_v1_t pde; |
| 155 | page_tbl_entry_v1_t pte; | 155 | page_tbl_entry_v1_t pte; |
| @@ -64,7 +64,7 @@ struct gk20a; | |||
| 64 | add a USERD pointer, a longer INST pointer, and a runqueue selector flag. | 64 | add a USERD pointer, a longer INST pointer, and a runqueue selector flag. |
| 65 | */ | 65 | */ |
| 66 | enum ENTRY_TYPE {ENTRY_TYPE_CHAN = 0, ENTRY_TYPE_TSG = 1}; | 66 | enum ENTRY_TYPE {ENTRY_TYPE_CHAN = 0, ENTRY_TYPE_TSG = 1}; |
| 67 | enum INST_TARGET {TARGET_VID_MEM = 0, TARGET_SYS_MEM_COHERENT = 2, TARGET_SYS_MEM_NONCOHERENT = 3}; | 67 | enum INST_TARGET {TARGET_VID_MEM = 0, TARGET_INVALID = 1, TARGET_SYS_MEM_COHERENT = 2, TARGET_SYS_MEM_NONCOHERENT = 3}; |
| 68 | static inline const char *target_to_text(enum INST_TARGET t) { | 68 | static inline const char *target_to_text(enum INST_TARGET t) { |
| 69 | switch (t) { | 69 | switch (t) { |
| 70 | case TARGET_VID_MEM: | 70 | case TARGET_VID_MEM: |
| @@ -78,7 +78,7 @@ static inline const char *target_to_text(enum INST_TARGET t) { | |||
| 78 | } | 78 | } |
| 79 | } | 79 | } |
| 80 | 80 | ||
| 81 | // Support: Volta, Ampere, Turing | 81 | // Support: Volta, Ampere, Turing, Ampere |
| 82 | struct gv100_runlist_chan { | 82 | struct gv100_runlist_chan { |
| 83 | // 0:63 | 83 | // 0:63 |
| 84 | enum ENTRY_TYPE entry_type:1; | 84 | enum ENTRY_TYPE entry_type:1; |
| @@ -308,7 +308,7 @@ typedef union { | |||
| 308 | } eng_runlist_gf100_t; | 308 | } eng_runlist_gf100_t; |
| 309 | 309 | ||
| 310 | /* | 310 | /* |
| 311 | Starting with Turing, the seperate registers for reading and writing runlist | 311 | Starting with Turing, the separate registers for reading and writing runlist |
| 312 | configuration were dropped in favor of read/write indexed registers. As part | 312 | configuration were dropped in favor of read/write indexed registers. As part |
| 313 | of this, the layout was modified to allow for larger runlist pointers (upper | 313 | of this, the layout was modified to allow for larger runlist pointers (upper |
| 314 | 52 of 64 bits). | 314 | 52 of 64 bits). |
| @@ -362,7 +362,6 @@ enum CHANNEL_STATUS { | |||
| 362 | }; | 362 | }; |
| 363 | 363 | ||
| 364 | /* Programmable Channel Control System RAM (PCCSR) | 364 | /* Programmable Channel Control System RAM (PCCSR) |
| 365 | |||
| 366 | 512-entry array of channel control and status data structures. | 365 | 512-entry array of channel control and status data structures. |
| 367 | 366 | ||
| 368 | === Read/Write Fields === | 367 | === Read/Write Fields === |
| @@ -391,6 +390,7 @@ enum CHANNEL_STATUS { | |||
| 391 | *Field only available on Turing. | 390 | *Field only available on Turing. |
| 392 | 391 | ||
| 393 | Support: Fermi, Maxwell, Pascal, Volta, Turing | 392 | Support: Fermi, Maxwell, Pascal, Volta, Turing |
| 393 | See also: manuals/turing/tu104/dev_fifo.ref.txt in NVIDIA's open-gpu-doc | ||
| 394 | */ | 394 | */ |
| 395 | #define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8) | 395 | #define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8) |
| 396 | #define MAX_CHID 512 | 396 | #define MAX_CHID 512 |
| @@ -611,12 +611,10 @@ typedef union { | |||
| 611 | ENGINE_TYPE : What type of engine is this? (see ENGINE_TYPES_NAMES) | 611 | ENGINE_TYPE : What type of engine is this? (see ENGINE_TYPES_NAMES) |
| 612 | 612 | ||
| 613 | Support: Kepler, Maxwell, Pascal, Volta, Turing, Ampere | 613 | Support: Kepler, Maxwell, Pascal, Volta, Turing, Ampere |
| 614 | See dev_top.ref.txt of NVIDIA's open-gpu-doc for more info. | 614 | See also: manuals/volta/gv100/dev_top.ref.txt in open-gpu-doc. |
| 615 | */ | 615 | */ |
| 616 | 616 | ||
| 617 | #define NV_PTOP_DEVICE_INFO_GA100(i) (0x00022800+(i)*4) | ||
| 618 | #define NV_PTOP_DEVICE_INFO_GK104(i) (0x00022700+(i)*4) | 617 | #define NV_PTOP_DEVICE_INFO_GK104(i) (0x00022700+(i)*4) |
| 619 | #define NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g) (nvdebug_readl(g, 0x0224fc) >> 20) | ||
| 620 | #define NV_PTOP_DEVICE_INFO__SIZE_1_GK104 64 | 618 | #define NV_PTOP_DEVICE_INFO__SIZE_1_GK104 64 |
| 621 | enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3}; | 619 | enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3}; |
| 622 | enum ENGINE_TYPES { | 620 | enum ENGINE_TYPES { |
| @@ -670,34 +668,6 @@ static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { | |||
| 670 | "FLA: Fabric Logical Addressing", | 668 | "FLA: Fabric Logical Addressing", |
| 671 | }; | 669 | }; |
| 672 | 670 | ||
| 673 | // These field are from nvgpu/include/nvgpu/hw/ga100/hw_top_ga100.h | ||
| 674 | typedef union { | ||
| 675 | // _info type fields | ||
| 676 | struct { | ||
| 677 | uint32_t fault_id:11; | ||
| 678 | uint32_t padding0:5; | ||
| 679 | uint32_t inst_id:8; | ||
| 680 | enum ENGINE_TYPES engine_type:7; // "type_enum" | ||
| 681 | bool has_next_entry:1; | ||
| 682 | } __attribute__((packed)); | ||
| 683 | // _info2 type fields | ||
| 684 | struct { | ||
| 685 | uint32_t reset_id:8; | ||
| 686 | uint32_t pri_base:18; // "device_pri_base" | ||
| 687 | uint32_t padding1:4; | ||
| 688 | uint32_t is_engine:1; | ||
| 689 | uint32_t padding2:1; | ||
| 690 | } __attribute__((packed)); | ||
| 691 | struct { | ||
| 692 | uint32_t rleng_id:2; | ||
| 693 | uint32_t padding3:8; | ||
| 694 | uint32_t runlist_pri_base:16; | ||
| 695 | uint32_t padding4:6; | ||
| 696 | } __attribute__((packed)); | ||
| 697 | uint32_t raw; | ||
| 698 | } ptop_device_info_ga100_t; | ||
| 699 | |||
| 700 | // These field are from open-gpu-doc/manuals/volta/gv100/dev_top.ref.txt | ||
| 701 | typedef union { | 671 | typedef union { |
| 702 | // DATA type fields | 672 | // DATA type fields |
| 703 | struct { | 673 | struct { |
| @@ -737,6 +707,70 @@ typedef union { | |||
| 737 | uint32_t raw; | 707 | uint32_t raw; |
| 738 | } ptop_device_info_gk104_t; | 708 | } ptop_device_info_gk104_t; |
| 739 | 709 | ||
| 710 | /* GPU TOPology on Ampere and newer GPUs | ||
| 711 | On Ampere+, the array of device topology entries continues to describe all GPU | ||
| 712 | engines, but the layout is entirely different to principly accomodate a | ||
| 713 | pointer to the runlist configuration region for each engine. (Runlist | ||
| 714 | configuration was moved out of the Host (PFIFO) region into per-engine spaces | ||
| 715 | starting with Ampere.) | ||
| 716 | |||
| 717 | Parsing is somewhat more difficult than with the older version, as entries | ||
| 718 | no longer include an `info_type`. Instead, each entry has 1--3 subrows, where | ||
| 719 | `has_next_entry` is 0 for the last subrow. | ||
| 720 | |||
| 721 | Empty rows should be skipped. | ||
| 722 | |||
| 723 | HAS_NEXT_ENTRY : Is the following entry a descriptor of the same engine? | ||
| 724 | |||
| 725 | == Subrow 1 fields == | ||
| 726 | FAULT_ID : [UNKNOWN] | ||
| 727 | INST_ID : [UNKNOWN] | ||
| 728 | ENGINE_TYPE : Enumerated name of the type of engine. (Seemingly identical | ||
| 729 | to ENGINE_ENUM in old PTOP layout.) | ||
| 730 | |||
| 731 | == Subrow 2 fields == | ||
| 732 | RESET_ID : [UNKNOWN] | ||
| 733 | PRI_BASE : [UNKNOWN] | ||
| 734 | IS_ENGINE : Does this entry describe an engine with a runlist? (Seemingly | ||
| 735 | identical to RUNLIST_IS_VALID in old PTOP layout.) | ||
| 736 | |||
| 737 | == Subrow 3 fields == | ||
| 738 | RUNLIST_PRI_BASE : Offset in BAR0 of the RunList RAM (RLRAM) region for the | ||
| 739 | runlist of this engine. | ||
| 740 | RLENG_ID : What is the per-runlist ID of this engine? | ||
| 741 | |||
| 742 | Support: Ampere, Ada, Hopper, (and newer likely) | ||
| 743 | See also: hw_top_ga100.h in nvgpu (NVIDIA's open-source Jetson GPU driver) | ||
| 744 | */ | ||
| 745 | #define NV_PTOP_DEVICE_INFO_GA100(i) (0x00022800+(i)*4) | ||
| 746 | #define NV_PTOP_DEVICE_INFO__SIZE_1_GA100(g) (nvdebug_readl(g, 0x0224fc) >> 20) | ||
| 747 | |||
| 748 | typedef union { | ||
| 749 | // _info type fields | ||
| 750 | struct { | ||
| 751 | uint32_t fault_id:11; | ||
| 752 | uint32_t padding0:5; | ||
| 753 | uint32_t inst_id:8; | ||
| 754 | enum ENGINE_TYPES engine_type:7; // "type_enum" | ||
| 755 | bool has_next_entry:1; | ||
| 756 | } __attribute__((packed)); | ||
| 757 | // _info2 type fields | ||
| 758 | struct { | ||
| 759 | uint32_t reset_id:8; | ||
| 760 | uint32_t pri_base:18; // "device_pri_base" | ||
| 761 | uint32_t padding1:4; | ||
| 762 | uint32_t is_engine:1; | ||
| 763 | uint32_t padding2:1; | ||
| 764 | } __attribute__((packed)); | ||
| 765 | struct { | ||
| 766 | uint32_t rleng_id:2; | ||
| 767 | uint32_t padding3:8; | ||
| 768 | uint32_t runlist_pri_base:16; | ||
| 769 | uint32_t padding4:6; | ||
| 770 | } __attribute__((packed)); | ||
| 771 | uint32_t raw; | ||
| 772 | } ptop_device_info_ga100_t; | ||
| 773 | |||
| 740 | /* Graphics Processing Cluster (GPC) on-chip information | 774 | /* Graphics Processing Cluster (GPC) on-chip information |
| 741 | The GPU's Compute/Graphics engine is subdivided into Graphics Processing | 775 | The GPU's Compute/Graphics engine is subdivided into Graphics Processing |
| 742 | Clusters (also known as GPU Processing Clusters, starting with Ampere). | 776 | Clusters (also known as GPU Processing Clusters, starting with Ampere). |
| @@ -792,21 +826,35 @@ typedef union { | |||
| 792 | SCAL_NUM_CES : Number of externally accessible copy engines | 826 | SCAL_NUM_CES : Number of externally accessible copy engines |
| 793 | 827 | ||
| 794 | Errata: Incorrectly reports "3" on Jetson TX1 and TX2. Should report "1" to be | 828 | Errata: Incorrectly reports "3" on Jetson TX1 and TX2. Should report "1" to be |
| 795 | consistent with PTOP data. | 829 | consistent with PTOP data. |
| 796 | 830 | ||
| 797 | Support: Kepler through (at least) Blackwell | 831 | Support: Kepler through (at least) Blackwell |
| 798 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. | 832 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. |
| 799 | */ | 833 | */ |
| 800 | #define NV_PTOP_SCAL_NUM_CES 0x00022444 | 834 | #define NV_PTOP_SCAL_NUM_CES 0x00022444 |
| 835 | // Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu) | ||
| 836 | #define NV_LCE_FOR_PCE_GP100 0x0010402c | ||
| 837 | #define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4) | ||
| 838 | #define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4) | ||
| 839 | /* GRaphics Copy Engine (GRCE) Information | ||
| 840 | "There's two types of CE... ASYNC_CEs which are copy engines with their own | ||
| 841 | runlists and GRCEs which are CEs that share a runlist with GR." (nvgpu, | ||
| 842 | ioctl_ctrl.c) | ||
| 843 | |||
| 844 | Starting with Pascal, the GRCEs are LCEs 0 and 1, but have the added capability | ||
| 845 | to share a PCE with another LCE. (Normally a PCE may only be associated with | ||
| 846 | one LCE.) These registers include that configuration, which should only be set | ||
| 847 | if no PCE has been directly associated with the specific GRCE. | ||
| 848 | |||
| 849 | Support: Pascal through (at least) Ada | ||
| 850 | Note that Volta through Ada use a different bit format than Pascal. | ||
| 851 | */ | ||
| 801 | // Defined max number of GRCEs for a GPU (TX2 has only one) | 852 | // Defined max number of GRCEs for a GPU (TX2 has only one) |
| 802 | # define NV_GRCE_MAX 2 | 853 | # define NV_GRCE_MAX 2 |
| 803 | // Defined GRCE->CE mapping offsets from nvgpu | 854 | // Defined GRCE->CE mapping offsets from nvgpu |
| 804 | #define NV_GRCE_FOR_CE_GP100(i) (0x00104034+(i)*4) | 855 | #define NV_GRCE_FOR_CE_GP100(i) (0x00104034+(i)*4) |
| 805 | #define NV_GRCE_FOR_CE_GA100(i) (0x001041c0+(i)*4) | 856 | #define NV_GRCE_FOR_CE_GA100(i) (0x001041c0+(i)*4) |
| 806 | // Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu) | 857 | |
| 807 | #define NV_LCE_FOR_PCE_GP100 0x0010402c | ||
| 808 | #define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4) | ||
| 809 | #define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4) | ||
| 810 | // Struct for use with nvdebug_reg_range_read() | 858 | // Struct for use with nvdebug_reg_range_read() |
| 811 | union reg_range { | 859 | union reg_range { |
| 812 | struct { | 860 | struct { |
| @@ -1294,13 +1342,18 @@ struct runlist_iter { | |||
| 1294 | int entries_left_in_tsg; | 1342 | int entries_left_in_tsg; |
| 1295 | // Number of entries in runlist | 1343 | // Number of entries in runlist |
| 1296 | int len; | 1344 | int len; |
| 1345 | // Offset to start of Channel RAM (as this is per-runlist on Ampere+) | ||
| 1346 | uint32_t channel_ram; | ||
| 1297 | }; | 1347 | }; |
| 1298 | 1348 | ||
| 1299 | #define NVDEBUG_MAX_DEVICES 8 | 1349 | #define NVDEBUG_MAX_DEVICES 8 |
| 1300 | extern struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; | 1350 | extern struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; |
| 1301 | 1351 | ||
| 1302 | // Defined in runlist.c | 1352 | // Defined in runlist.c |
| 1303 | int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl_iter); | 1353 | int get_runlist_iter( |
| 1354 | struct nvdebug_state *g, | ||
| 1355 | int rl_id, | ||
| 1356 | struct runlist_iter *rl_iter /* out */); | ||
| 1304 | int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id); | 1357 | int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id); |
| 1305 | int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id); | 1358 | int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id); |
| 1306 | int resubmit_runlist(struct nvdebug_state *g, uint32_t rl_id); | 1359 | int resubmit_runlist(struct nvdebug_state *g, uint32_t rl_id); |
| @@ -1318,7 +1371,7 @@ uint64_t search_v1_page_directory( | |||
| 1318 | enum INST_TARGET addr_to_find_aperture); | 1371 | enum INST_TARGET addr_to_find_aperture); |
| 1319 | // Defined in bus.c | 1372 | // Defined in bus.c |
| 1320 | int addr_to_pramin_mut(struct nvdebug_state *g, uint64_t addr, enum INST_TARGET target); | 1373 | int addr_to_pramin_mut(struct nvdebug_state *g, uint64_t addr, enum INST_TARGET target); |
| 1321 | int get_bar2_pdb(struct nvdebug_state *g, page_dir_config_t* pd); | 1374 | int get_bar2_pdb(struct nvdebug_state *g, page_dir_config_t* pd /* out */); |
| 1322 | 1375 | ||
| 1323 | // Some portions of nvdebug can be included from kernel- or user-space (just | 1376 | // Some portions of nvdebug can be included from kernel- or user-space (just |
| 1324 | // this file at present). In order for these compiled object files to be | 1377 | // this file at present). In order for these compiled object files to be |
| @@ -8,8 +8,10 @@ | |||
| 8 | #include "nvdebug.h" | 8 | #include "nvdebug.h" |
| 9 | 9 | ||
| 10 | // Uncomment to, upon BAR2 access failure, return a PRAMIN-based runlist pointer | 10 | // Uncomment to, upon BAR2 access failure, return a PRAMIN-based runlist pointer |
| 11 | // **If enabled, PRAMIN may not be otherwise used while walking the runlist!** | 11 | // in get_runlist_iter(). In order for this pointer to remain valid, PRAMIN |
| 12 | // Runlists can only be printed on the Jetson TX2 if this is enabled. | 12 | // **must** not be moved during runlist traversal. |
| 13 | // The Jetson TX2 has no BAR2, and stores the runlist in VID_MEM, so this must | ||
| 14 | // be enabled to print the runlist on the TX2. | ||
| 13 | //#define FALLBACK_TO_PRAMIN | 15 | //#define FALLBACK_TO_PRAMIN |
| 14 | 16 | ||
| 15 | /* Get runlist head and info (incl. length) | 17 | /* Get runlist head and info (incl. length) |
| @@ -142,12 +144,16 @@ int preempt_runlist(struct nvdebug_state *g, uint32_t rl_id) { | |||
| 142 | int resubmit_runlist(struct nvdebug_state *g, uint32_t rl_id) { | 144 | int resubmit_runlist(struct nvdebug_state *g, uint32_t rl_id) { |
| 143 | if (g->chip_id < NV_CHIP_ID_TURING) { | 145 | if (g->chip_id < NV_CHIP_ID_TURING) { |
| 144 | eng_runlist_gf100_t rl; | 146 | eng_runlist_gf100_t rl; |
| 147 | if (rl_id > MAX_RUNLISTS_GF100) | ||
| 148 | return -EINVAL; | ||
| 145 | if ((rl.raw = nvdebug_readq(g, NV_PFIFO_ENG_RUNLIST_BASE_GF100(rl_id))) == -1) | 149 | if ((rl.raw = nvdebug_readq(g, NV_PFIFO_ENG_RUNLIST_BASE_GF100(rl_id))) == -1) |
| 146 | return -EIO; | 150 | return -EIO; |
| 147 | rl.id = rl_id; | 151 | rl.id = rl_id; |
| 148 | nvdebug_writeq(g, NV_PFIFO_RUNLIST_BASE_GF100, rl.raw); | 152 | nvdebug_writeq(g, NV_PFIFO_RUNLIST_BASE_GF100, rl.raw); |
| 149 | } else if (g->chip_id < NV_CHIP_ID_AMPERE) { | 153 | } else if (g->chip_id < NV_CHIP_ID_AMPERE) { |
| 150 | runlist_submit_tu102_t submit; | 154 | runlist_submit_tu102_t submit; |
| 155 | if (rl_id > MAX_RUNLISTS_TU102) | ||
| 156 | return -EINVAL; | ||
| 151 | if ((submit.raw = nvdebug_readq(g, NV_PFIFO_RUNLIST_SUBMIT_TU102(rl_id))) == -1) | 157 | if ((submit.raw = nvdebug_readq(g, NV_PFIFO_RUNLIST_SUBMIT_TU102(rl_id))) == -1) |
| 152 | return -EIO; | 158 | return -EIO; |
| 153 | nvdebug_writeq(g, NV_PFIFO_RUNLIST_SUBMIT_TU102(rl_id), submit.raw); | 159 | nvdebug_writeq(g, NV_PFIFO_RUNLIST_SUBMIT_TU102(rl_id), submit.raw); |
diff --git a/runlist_procfs.c b/runlist_procfs.c index 986465d..8152463 100644 --- a/runlist_procfs.c +++ b/runlist_procfs.c | |||
| @@ -3,7 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #include "nvdebug_linux.h" | 4 | #include "nvdebug_linux.h" |
| 5 | 5 | ||
| 6 | #define RUNLIST_PROCFS_NAME "runlist" | 6 | // Uncomment to expand channel status information when printing the runlist |
| 7 | #define DETAILED_CHANNEL_INFO | 7 | #define DETAILED_CHANNEL_INFO |
| 8 | 8 | ||
| 9 | #ifdef DETAILED_CHANNEL_INFO | 9 | #ifdef DETAILED_CHANNEL_INFO |
| @@ -197,12 +197,7 @@ ssize_t resubmit_runlist_file_write(struct file *f, const char __user *buffer, | |||
| 197 | if (err) | 197 | if (err) |
| 198 | return err; | 198 | return err; |
| 199 | 199 | ||
| 200 | // Verify valid runlist (in terms of absolute maximums) | 200 | // resubmit_runlist() checks that target_runlist is valid |
| 201 | if (g->chip_id < NV_CHIP_ID_TURING && target_runlist > MAX_RUNLISTS_GF100) | ||
| 202 | return -ERANGE; | ||
| 203 | else if (g->chip_id < NV_CHIP_ID_AMPERE && target_runlist > MAX_RUNLISTS_TU102) | ||
| 204 | return -ERANGE; | ||
| 205 | |||
| 206 | if ((err = resubmit_runlist(g, target_runlist))) | 201 | if ((err = resubmit_runlist(g, target_runlist))) |
| 207 | return err; | 202 | return err; |
| 208 | 203 | ||
