diff options
author | Joshua Bakita <bakitajoshua@gmail.com> | 2023-06-22 12:52:59 -0400 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2023-06-22 12:52:59 -0400 |
commit | 306a03d18b305e4e573be3b2931978fa10679eb9 (patch) | |
tree | 349570dfbe5f531e903c949c3f663627ee1097a8 | |
parent | f4b83713672acaf88a526b930b8e417453f6edc5 (diff) |
Quick dump of current state for Ben to review.
-rw-r--r-- | Makefile | 13 | ||||
-rw-r--r-- | device_info_procfs.c | 126 | ||||
-rw-r--r-- | mmu.c | 251 | ||||
-rw-r--r-- | nvdebug.h | 719 | ||||
-rw-r--r-- | nvdebug_entry.c | 288 | ||||
-rw-r--r-- | runlist.c | 221 | ||||
-rw-r--r-- | runlist_procfs.c | 188 | ||||
-rw-r--r-- | stubs.h | 80 |
8 files changed, 1614 insertions, 272 deletions
@@ -1,13 +1,14 @@ | |||
1 | obj-m += nvdebug.o | 1 | obj-m += nvdebug.o |
2 | nvdebug-objs = runlist_procfs.o runlist.o nvdebug_entry.o | 2 | nvdebug-objs = runlist_procfs.o device_info_procfs.o runlist.o mmu.o nvdebug_entry.o |
3 | KBUILD_CFLAGS += -DGIT_HASH=\"$(shell git --git-dir=$(PWD)/.git rev-parse --short HEAD)\" | 3 | KBUILD_CFLAGS += -DGIT_HASH=\"$(shell git --git-dir=$(PWD)/.git rev-parse --short HEAD)\" |
4 | # -mfentry above if not building due to mcount missing | ||
4 | 5 | ||
5 | # TODO: Avoid needing to distribute NVIDIA's headers (at least they're MIT...) | 6 | # TODO: Avoid needing to distribute NVIDIA's headers (at least they're MIT...) |
6 | #ccflags-y += -I$(PWD)/include | 7 | ccflags-y += -I$(PWD)/include |
7 | ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/drivers/gpu/nvgpu/include | 8 | #ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/drivers/gpu/nvgpu/include |
8 | ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/drivers/gpu/nvgpu | 9 | #ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/drivers/gpu/nvgpu |
9 | ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/include | 10 | #ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/include |
10 | ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/include/uapi | 11 | #ccflags-y += -I/playpen/Linux_for_Tegra/source/public/kernel/nvgpu/include/uapi |
11 | 12 | ||
12 | all: | 13 | all: |
13 | make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules | 14 | make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules |
diff --git a/device_info_procfs.c b/device_info_procfs.c new file mode 100644 index 0000000..cd6c53c --- /dev/null +++ b/device_info_procfs.c | |||
@@ -0,0 +1,126 @@ | |||
1 | #include "nvdebug.h" | ||
2 | #include <linux/seq_file.h> // For seq_* functions and types | ||
3 | #include <linux/uaccess.h> // For copy_to_user() | ||
4 | |||
5 | // Generic register printing function, used for PTOP_*_NUM registers (+more) | ||
6 | // @param f File being read from. `data` field is register offset to read. | ||
7 | // @param buf User buffer for result | ||
8 | // @param size Length of user buffer | ||
9 | // @param off Requested offset. Updated by number of characters written. | ||
10 | // @return -errno on error, otherwise number of bytes written to *buf | ||
11 | // Note: Parent `data` field MUST be the GPU index | ||
12 | static ssize_t nvdebug_reg32_read(struct file *f, char __user *buf, size_t size, loff_t *off) { | ||
13 | char out[16]; | ||
14 | int chars_written; | ||
15 | struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; | ||
16 | if (size < 16 || *off != 0) | ||
17 | return 0; | ||
18 | // 32 bit register will always take less than 16 characters to print | ||
19 | chars_written = scnprintf(out, 16, "%#0x\n", nvdebug_readl(g, (uintptr_t)PDE_DATA(file_inode(f)))); | ||
20 | if (copy_to_user(buf, out, chars_written)) | ||
21 | printk(KERN_WARNING "Unable to copy all data for %s\n", file_dentry(f)->d_name.name); | ||
22 | *off += chars_written; | ||
23 | return chars_written; | ||
24 | } | ||
25 | const struct file_operations nvdebug_read_reg32_file_ops = { | ||
26 | .read = nvdebug_reg32_read, | ||
27 | }; | ||
28 | |||
29 | //// ==v== PTOP_DEVICE_INFO ==v== //// | ||
30 | |||
31 | // Called to start or resume a sequence. Prior to 4.19, *pos is unreliable. | ||
32 | // Initializes iterator `idx` state and returns it. Ends sequence on NULL. | ||
33 | static void* device_info_file_seq_start(struct seq_file *s, loff_t *pos) { | ||
34 | static int idx; | ||
35 | // If start of sequence, reset `idx` | ||
36 | if (*pos == 0) | ||
37 | idx = 0; | ||
38 | // Number of possible info entries is fixed, and list is sparse | ||
39 | if (idx >= NV_PTOP_DEVICE_INFO__SIZE_1) | ||
40 | return NULL; | ||
41 | return &idx; | ||
42 | } | ||
43 | |||
44 | // Steps to next record. Returns new value of `idx`. | ||
45 | // Calls show() on non-NULL return | ||
46 | static void* device_info_file_seq_next(struct seq_file *s, void *idx, | ||
47 | loff_t *pos) { | ||
48 | (*pos)++; // Required by seq interface | ||
49 | // Number of possible info entries is fixed, and list is sparse | ||
50 | if ((*(int*)idx)++ >= NV_PTOP_DEVICE_INFO__SIZE_1) | ||
51 | return NULL; | ||
52 | return idx; | ||
53 | } | ||
54 | |||
55 | // Print info at index *idx. Returns non-zero on error. | ||
56 | static int device_info_file_seq_show(struct seq_file *s, void *idx) { | ||
57 | ptop_device_info_t curr_info; | ||
58 | struct nvdebug_state *g = &g_nvdebug_state[seq2gpuidx(s)]; | ||
59 | |||
60 | curr_info.raw = nvdebug_readl(g, NV_PTOP_DEVICE_INFO(*(int*)idx)); | ||
61 | // Check for read errors | ||
62 | if (curr_info.raw == -1) | ||
63 | return -EIO; | ||
64 | |||
65 | // Parse and print the data | ||
66 | switch(curr_info.info_type) { | ||
67 | case INFO_TYPE_DATA: | ||
68 | // As of early 2022, only the ENUM2 format of this entry exists | ||
69 | if (curr_info.is_not_enum2) | ||
70 | break; | ||
71 | seq_printf(s, "| BAR0 Base %#.8x\n" | ||
72 | "| instance %d\n", | ||
73 | curr_info.pri_base << 12, curr_info.inst_id); | ||
74 | if (curr_info.fault_id_is_valid) | ||
75 | seq_printf(s, "| Fault ID: %3d\n", curr_info.fault_id); | ||
76 | break; | ||
77 | case INFO_TYPE_ENUM: | ||
78 | if (curr_info.engine_is_valid) | ||
79 | seq_printf(s, "| Host's Engine ID: %2d\n", curr_info.engine_enum); | ||
80 | if (curr_info.runlist_is_valid) | ||
81 | seq_printf(s, "| Runlist ID: %2d\n", curr_info.runlist_enum); | ||
82 | if (curr_info.intr_is_valid) | ||
83 | seq_printf(s, "| Interrupt ID: %2d\n", curr_info.intr_enum); | ||
84 | if (curr_info.reset_is_valid) | ||
85 | seq_printf(s, "| Reset ID: %2d\n", curr_info.reset_enum); | ||
86 | break; | ||
87 | case INFO_TYPE_ENGINE_TYPE: | ||
88 | seq_printf(s, "| Engine Type: %2d (", curr_info.engine_type); | ||
89 | if (curr_info.engine_type < ENGINE_TYPES_LEN) | ||
90 | seq_printf(s, "%s)\n", ENGINE_TYPES_NAMES[curr_info.engine_type]); | ||
91 | else | ||
92 | seq_printf(s, "Unknown Engine, introduced post-Ampere)\n"); | ||
93 | break; | ||
94 | case INFO_TYPE_NOT_VALID: | ||
95 | default: | ||
96 | // Device info records are sparse, so skip unset or unknown ones | ||
97 | return 0; | ||
98 | } | ||
99 | |||
100 | // Draw a line between each device entry | ||
101 | if (!curr_info.has_next_entry) | ||
102 | seq_printf(s, "+---------------------+\n"); | ||
103 | return 0; | ||
104 | } | ||
105 | |||
106 | static void device_info_file_seq_stop(struct seq_file *s, void *idx) { | ||
107 | // No cleanup needed | ||
108 | } | ||
109 | |||
110 | static const struct seq_operations device_info_file_seq_ops = { | ||
111 | .start = device_info_file_seq_start, | ||
112 | .next = device_info_file_seq_next, | ||
113 | .stop = device_info_file_seq_stop, | ||
114 | .show = device_info_file_seq_show, | ||
115 | }; | ||
116 | |||
117 | static int device_info_file_open(struct inode *inode, struct file *f) { | ||
118 | return seq_open(f, &device_info_file_seq_ops); | ||
119 | } | ||
120 | |||
121 | const struct file_operations device_info_file_ops = { | ||
122 | .open = device_info_file_open, | ||
123 | .read = seq_read, | ||
124 | .llseek = seq_lseek, | ||
125 | .release = seq_release, | ||
126 | }; | ||
@@ -0,0 +1,251 @@ | |||
1 | // Helpers to deal with NVIDIA's MMU and associated page tables | ||
2 | #include <linux/kernel.h> // Kernel types | ||
3 | |||
4 | #include "nvdebug.h" | ||
5 | |||
6 | /* One of the oldest ways to access video memory on NVIDIA GPUs is by using | ||
7 | a configurable 1MB window into VRAM which is mapped into BAR0 (register) | ||
8 | space starting at offset NV_PRAMIN. This is still supported on NVIDIA GPUs | ||
9 | and appear to be used today to bootstrap page table configuration. | ||
10 | |||
11 | Why is it mapped at a location called NVIDIA Private RAM Instance? Because | ||
12 | this used to point to the entirety of intance RAM, which was seperate from | ||
13 | VRAM on older NVIDIA GPUs. | ||
14 | */ | ||
15 | |||
16 | /* Convert a physical VRAM address to an offset in the PRAMIN window | ||
17 | @param addr VRAM address to convert | ||
18 | @return 0 on error, PRAMIN offset on success | ||
19 | |||
20 | Note: Use off2PRAMIN() instead if you want a dereferenceable address | ||
21 | */ | ||
22 | uint32_t vram2PRAMIN(struct nvdebug_state *g, uint64_t addr) { | ||
23 | uint64_t pramin_base_va; | ||
24 | bar0_window_t window; | ||
25 | window.raw = nvdebug_readl(g, NV_PBUS_BAR0_WINDOW); | ||
26 | // Check if the address is valid (49 bits are addressable on-GPU) | ||
27 | if (addr & ~0x0001ffffffffffff) { | ||
28 | printk(KERN_ERR "[nvdebug] Invalid address %llx passed to %s!\n", | ||
29 | addr, __func__); | ||
30 | return 0; | ||
31 | } | ||
32 | // For unclear (debugging?) reasons, PRAMIN can point to SYSMEM | ||
33 | if (window.target != TARGET_VID_MEM) | ||
34 | return 0; | ||
35 | pramin_base_va = ((uint64_t)window.base) << 16; | ||
36 | // Protect against out-of-bounds accesses | ||
37 | if (addr < pramin_base_va || addr > pramin_base_va + NV_PRAMIN_LEN) | ||
38 | return 0; | ||
39 | return addr - pramin_base_va; | ||
40 | } | ||
41 | |||
42 | /* NVIDIA GMMU (GPU Memory Management Unit) uses page tables that are mostly | ||
43 | straight-forward starting with Pascal ("page table version 2"), except for a | ||
44 | few quirks (like 16-byte PDE0 entries, but all other entries are 8 bytes). | ||
45 | |||
46 | All you really need to know is that any given Page Directory Entry (PDE) | ||
47 | contains a pointer to the start of a 4k page densely filled with PDEs or Page | ||
48 | Table Entries (PTEs). | ||
49 | |||
50 | == Page Table Refresher == | ||
51 | Page tables convert virtual addresses to physical addresses, and they do this | ||
52 | via a tree structure. Leafs (PTEs) contain a physical address, and the path | ||
53 | from root to leaf is defined by the virtual address. Non-leaf nodes are PDEs. | ||
54 | When decending, the virtual address is sliced into pieces, and one slice is | ||
55 | used at each level (as an index) to select the next-visited node (in level+1). | ||
56 | |||
57 | V2 of NVIDIA's page table format uses 4 levels of PDEs and a final level of | ||
58 | PTEs. How the virtual address is sliced to yield an index into each level and | ||
59 | a page offset is shown by Fig 1. | ||
60 | |||
61 | == Figure 1 == | ||
62 | Page Offset (12 bits) <---------------------------------------+ | ||
63 | Page Table Entry (PTE) (9 bits) <--------------------+ | | ||
64 | Page Directory Entry (PDE) 0 (8 bits) <-----+ | | | ||
65 | PDE1 (8 bits) <--------------------+ | | | | ||
66 | PDE2 (8 bits) <-----------+ | | | | | ||
67 | PDE3 (2 bits) <--+ | | | | | | ||
68 | ^ ^ ^ ^ ^ ^ | ||
69 | Virtual addr: [49, 47] [46, 38] [37, 29] [28, 21] [20, 12] [11, 0] | ||
70 | |||
71 | The following arrays merely represent different projections of Fig. 1, and | ||
72 | only one is strictly needed to reconstruct all the others. However, due to | ||
73 | the complexity of page tables, we include all of these to aid in readability. | ||
74 | */ | ||
75 | // How many nodes/entries per level in V2 of NVIDIA's page table format | ||
76 | static const int NV_MMU_PT_V2_SZ[5] = {4, 512, 512, 256, 512}; | ||
77 | // Size in bytes of an entry at a particular level | ||
78 | static const int NV_MMU_PT_V2_ENTRY_SZ[5] = {8, 8, 8, 16, 8}; | ||
79 | // Which bit index is the least significant in indexing each page level | ||
80 | static const int NV_MMU_PT_V2_LSB[5] = {47, 38, 29, 21, 12}; | ||
81 | |||
82 | // Convert a GPU physical address to CPU virtual address via the PRAMIN window | ||
83 | void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy) { | ||
84 | return g->regs + NV_PRAMIN + vram2PRAMIN(g, phy); | ||
85 | } | ||
86 | |||
87 | /* FIXME | ||
88 | void __iomem *off2BAR2(struct nvdebug_state* g, uint32_t off) { | ||
89 | return g->bar2 + off; | ||
90 | } | ||
91 | */ | ||
92 | |||
93 | uint64_t search_page_directory_subtree(struct nvdebug_state *g, | ||
94 | void __iomem *pde_offset, | ||
95 | void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), | ||
96 | uint64_t addr_to_find, | ||
97 | uint32_t level) { | ||
98 | uint64_t res, i; | ||
99 | void __iomem *next; | ||
100 | page_dir_entry_t entry; | ||
101 | if (level > sizeof(NV_MMU_PT_V2_SZ)) | ||
102 | return 0; | ||
103 | // Hack to workaround PDE0 being double-size and strangely formatted | ||
104 | if (NV_MMU_PT_V2_ENTRY_SZ[level] == 16) | ||
105 | pde_offset += 8; | ||
106 | entry.raw = readl(pde_offset); | ||
107 | // If we reached an invalid (unpopulated) PDE, walk back up the tree | ||
108 | if (entry.target == PD_AND_TARGET_INVALID) | ||
109 | return 0; | ||
110 | // Succeed when we reach a PTE with the address we want | ||
111 | if (entry.is_pte) { | ||
112 | printk(KERN_INFO "[nvdebug] PTE for phy addr %llx (raw: %x)\n", ((u64)entry.addr) << 12, entry.raw); | ||
113 | return (uint64_t)entry.addr << 12 == addr_to_find; | ||
114 | } | ||
115 | printk(KERN_INFO "[nvdebug] Found PDE pointing to %llx in ap '%d' at lvl %d (raw: %x)\n", ((u64)entry.addr) << 12, entry.target, level, entry.raw); | ||
116 | // Depth-first search of the page table | ||
117 | for (i = 0; i < NV_MMU_PT_V2_SZ[level]; i++) { | ||
118 | next = off2addr(g, ((uint64_t)entry.addr << 12) + NV_MMU_PT_V2_ENTRY_SZ[level + 1] * i); | ||
119 | // off2addr can fail | ||
120 | if (!next) { | ||
121 | printk(KERN_ERR "[nvdebug] %s: Unable to resolve GPU PA to CPU PA\n", __func__); | ||
122 | return 0; | ||
123 | } | ||
124 | res = search_page_directory_subtree(g, next, off2addr, addr_to_find, level + 1); | ||
125 | if (res) | ||
126 | return res | (i << NV_MMU_PT_V2_LSB[level + 1]); | ||
127 | } | ||
128 | return 0; | ||
129 | } | ||
130 | |||
131 | /* Search a page directory of the GPU MMU | ||
132 | @param pde_offset Dereferenceable pointer to the start of the PDE3 entries | ||
133 | @param off2addr Func to converts VRAM phys addresses to valid CPU VAs | ||
134 | @param addr_to_find Physical address to reconstruct the virtual address of | ||
135 | @return 0 on error, otherwise the virtual address at which addr_to_find is | ||
136 | mapped into by this page table. | ||
137 | */ | ||
138 | uint64_t search_page_directory(struct nvdebug_state *g, | ||
139 | void __iomem *pde_offset, | ||
140 | void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), | ||
141 | uint64_t addr_to_find) { | ||
142 | uint64_t res, i; | ||
143 | // Make sure that the query is page-aligned | ||
144 | if (addr_to_find & 0xfff) { | ||
145 | printk(KERN_WARNING "[nvdebug] Attempting to search for unaligned address %llx in search_page_directory()!\n", addr_to_find); | ||
146 | return 0; | ||
147 | } | ||
148 | // Search the top-level page directory (PDE3) | ||
149 | for (i = 0; i < NV_MMU_PT_V2_SZ[0]; i++) | ||
150 | if ((res = search_page_directory_subtree(g, pde_offset + NV_MMU_PT_V2_ENTRY_SZ[0] * i, off2addr, addr_to_find, 0))) | ||
151 | return (res & ~0xfff) | (i << NV_MMU_PT_V2_LSB[0]); | ||
152 | return 0; | ||
153 | } | ||
154 | |||
155 | /* GMMU Page Tables Version 1 | ||
156 | This page table only contains 2 levels and is used in the Fermi, Kepler, and | ||
157 | Maxwell architectures | ||
158 | */ | ||
159 | // Number of entries in the PDE and PTE levels | ||
160 | static const int NV_MMU_PT_V1_SZ[2] = {512, 1<<13}; // 2<<13 is an educated guess!!! | ||
161 | // Which bit index is the least significant in indexing each page level | ||
162 | static const int NV_MMU_PT_V1_LSB[2] = {25, 12}; // 25 is an educated guess!!! | ||
163 | uint64_t search_v1_page_directory(struct nvdebug_state *g, | ||
164 | void __iomem *pde_offset, | ||
165 | void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), | ||
166 | uint64_t addr_to_find) { | ||
167 | uint64_t j, i = 0; | ||
168 | page_dir_entry_v1_t pde; | ||
169 | page_tbl_entry_v1_t pte; | ||
170 | void __iomem *pte_offset; | ||
171 | // For each PDE | ||
172 | do { | ||
173 | // readq doesn't seem to work on BAR0 | ||
174 | pde.raw = readl(pde_offset + i * sizeof(page_dir_entry_v1_t) + 4); | ||
175 | pde.raw <<= 32; | ||
176 | pde.raw |= readl(pde_offset + i * sizeof(page_dir_entry_v1_t)); | ||
177 | // Verify PDE is present | ||
178 | if (pde.target == PD_TARGET_INVALID && pde.alt_target == PD_TARGET_INVALID) | ||
179 | continue; | ||
180 | // Convert to a dereferencable pointer from CPU virtual address space | ||
181 | pte_offset = off2addr(g, (uint64_t)pde.alt_addr << 12); | ||
182 | if (!pte_offset) | ||
183 | continue; | ||
184 | // printk(KERN_INFO "[nvdebug] Found %s PDE pointing to PTEs @ %llx in ap '%d' (raw: %llx)\n", pde.is_volatile ? "volatile" : "non-volatile", ((u64)pde.addr) << 12, pde.target, pde.raw); | ||
185 | // printk(KERN_INFO "[nvdebug] Found %s PDE pointing to PTEs @ %llx in ap '%d' (raw: %llx)\n", pde.alt_is_volatile ? "volatile" : "non-volatile", ((u64)pde.alt_addr) << 12, pde.target, pde.raw); | ||
186 | // For each PTE | ||
187 | for (j = 0; j < NV_MMU_PT_V1_SZ[1]; j++) { | ||
188 | // Don't overrun the PRAMIN window | ||
189 | if (pte_offset > NV_PRAMIN + g->regs + NV_PRAMIN_LEN) | ||
190 | return 0; | ||
191 | pte.raw = readl(pte_offset + j * sizeof(page_tbl_entry_v1_t) + 4); | ||
192 | pte.raw <<= 32; | ||
193 | pte.raw |= readl(pte_offset + j * sizeof(page_tbl_entry_v1_t)); | ||
194 | // Skip non-present PTEs | ||
195 | if (!pte.is_present) | ||
196 | continue; | ||
197 | // printk(KERN_INFO "[nvdebug] PTE for phy addr %llx %s (raw: %llx)\n", ((u64)pte.addr) << 12, pte.is_present ? "present" : "non-present", pte.raw); | ||
198 | // If we find a matching PTE, return its virtual address | ||
199 | if ((uint64_t)pte.addr << 12 == addr_to_find) | ||
200 | return i << NV_MMU_PT_V1_LSB[0] | j << NV_MMU_PT_V1_LSB[1]; | ||
201 | |||
202 | } | ||
203 | } while (++i < NV_MMU_PT_V1_SZ[0]); | ||
204 | return 0; | ||
205 | } | ||
206 | |||
207 | /* GMMU Page Tables Version 0 | ||
208 | This page table only contains 2 levels and is used in the Tesla architecture | ||
209 | */ | ||
210 | /* *** UNTESTED *** | ||
211 | #define NV_MMU_PT_V0_SZ 2048 | ||
212 | #define NV_MMU_PT_V0_LSB 29 | ||
213 | uint64_t search_v0_page_directory(struct nvdebug_state *g, | ||
214 | void __iomem *pde_offset, | ||
215 | void __iomem *(*off2addr)(struct nvdebug_state*, uint32_t), | ||
216 | uint32_t addr_to_find) { | ||
217 | int j, i = 0; | ||
218 | page_dir_entry_v0_t pde; | ||
219 | page_tbl_entry_v0_t pte; | ||
220 | void __iomem *pte_offset; | ||
221 | // For each PDE | ||
222 | do { | ||
223 | // readq doesn't seem to work on BAR0 | ||
224 | pde.raw = readl(pde_offset + i * sizeof(page_dir_entry_v0_t) + 4); | ||
225 | pde.raw <<= 32; | ||
226 | pde.raw |= readl(pde_offset + i * sizeof(page_dir_entry_v0_t)); | ||
227 | //if (pde.raw) | ||
228 | //printk(KERN_INFO "[nvdebug] Read raw PDE @ %x: %llx\n", pde_offset + i * sizeof(page_dir_entry_v1_t), pde.raw); | ||
229 | // Skip unpopulated PDEs | ||
230 | if (pde.type == NOT_PRESENT) | ||
231 | continue; | ||
232 | //printk(KERN_INFO "[nvdebug] PDE to %llx present\n", ((uint64_t)pde.addr) << 12); | ||
233 | pte_offset = off2addr(g, ((uint64_t)pde.addr) << 12); | ||
234 | // For each PTE | ||
235 | for (j = 0; j < V0_PDE_SIZE2NUM[pde.sublevel_size]; j++) { | ||
236 | pte.raw = readl(pte_offset + j * sizeof(page_tbl_entry_v0_t) + 4); | ||
237 | pte.raw <<= 32; | ||
238 | pte.raw |= readl(pte_offset + j * sizeof(page_tbl_entry_v0_t)); | ||
239 | // Skip non-present PTEs | ||
240 | if (!pte.is_present) | ||
241 | continue; | ||
242 | // If we find a matching PTE, return its virtual address | ||
243 | //if (pte.addr != 0x5555555) | ||
244 | // printk(KERN_INFO "[nvdebug] PTE for phy addr %llx %s\n", ((uint64_t)pte.addr) << 12, pte.is_present ? "present" : "non-present"); | ||
245 | if (pte.addr << 12 == addr_to_find) | ||
246 | return i << NV_MMU_PT_V0_LSB | j << 12; | ||
247 | } | ||
248 | } while (++i < NV_MMU_PT_V0_SZ); | ||
249 | return 0; // No match | ||
250 | } | ||
251 | */ | ||
@@ -5,14 +5,18 @@ | |||
5 | // TODO(jbakita): Don't depend on these. | 5 | // TODO(jbakita): Don't depend on these. |
6 | #include <nvgpu/gk20a.h> // For struct gk20a | 6 | #include <nvgpu/gk20a.h> // For struct gk20a |
7 | #include <os/linux/os_linux.h> // For struct nvgpu_os_linux | 7 | #include <os/linux/os_linux.h> // For struct nvgpu_os_linux |
8 | #include <linux/proc_fs.h> // For PDE_DATA() macro | ||
8 | 9 | ||
9 | /* Runlist Channel | 10 | /* Runlist Channel |
10 | A timeslice group (TSG) is composed of channels. Each channel is a FIFO queue | 11 | A timeslice group (TSG) is composed of channels. Each channel is a FIFO queue |
11 | of GPU commands. These commands are typically queued from userspace. | 12 | of GPU commands. These commands are typically queued from userspace. |
12 | 13 | ||
13 | `INST_PTR` points to a GPU Instance Block which contains pointers to the GPU | 14 | Prior to Volta, channels could also exist independent of a TSG. These are |
14 | virtual address space for this context. All channels in a TSG point to the | 15 | called "bare channels" in the Jetson nvgpu driver. |
15 | same GPU Instance Block (?). | 16 | |
17 | `INST_PTR` points to a GPU Instance Block which contains FIFO states, virtual | ||
18 | address space configuration for this context, and a pointer to the page | ||
19 | tables. All channels in a TSG point to the same GPU Instance Block (?). | ||
16 | 20 | ||
17 | "RUNQUEUE_SELECTOR determines to which runqueue the channel belongs, and | 21 | "RUNQUEUE_SELECTOR determines to which runqueue the channel belongs, and |
18 | thereby which PBDMA will run the channel. Increasing values select | 22 | thereby which PBDMA will run the channel. Increasing values select |
@@ -30,7 +34,13 @@ | |||
30 | ENTRY_TYPE (T) : type of this entry: ENTRY_TYPE_CHAN | 34 | ENTRY_TYPE (T) : type of this entry: ENTRY_TYPE_CHAN |
31 | CHID (ID) : identifier of the channel to run (overlays ENTRY_ID) | 35 | CHID (ID) : identifier of the channel to run (overlays ENTRY_ID) |
32 | RUNQUEUE_SELECTOR (Q) : selects which PBDMA should run this channel if | 36 | RUNQUEUE_SELECTOR (Q) : selects which PBDMA should run this channel if |
33 | more than one PBDMA is supported by the runlist | 37 | more than one PBDMA is supported by the runlist, |
38 | additionally, "A value of 0 targets the first FE | ||
39 | pipe, which can process all FE driven engines: | ||
40 | Graphics, Compute, Inline2Memory, and TwoD. A value | ||
41 | of 1 targets the second FE pipe, which can only | ||
42 | process Compute work. Note that GRCE work is allowed | ||
43 | on either runqueue.)" | ||
34 | 44 | ||
35 | INST_PTR_LO : lower 20 bits of the 4k-aligned instance block pointer | 45 | INST_PTR_LO : lower 20 bits of the 4k-aligned instance block pointer |
36 | INST_PTR_HI : upper 32 bit of instance block pointer | 46 | INST_PTR_HI : upper 32 bit of instance block pointer |
@@ -39,6 +49,9 @@ | |||
39 | USERD_PTR_LO : upper 24 bits of the low 32 bits, of the 512-byte-aligned USERD pointer | 49 | USERD_PTR_LO : upper 24 bits of the low 32 bits, of the 512-byte-aligned USERD pointer |
40 | USERD_PTR_HI : upper 32 bits of USERD pointer | 50 | USERD_PTR_HI : upper 32 bits of USERD pointer |
41 | USERD_TARGET (TGU) : aperture of the USERD data structure | 51 | USERD_TARGET (TGU) : aperture of the USERD data structure |
52 | |||
53 | Channels were around since at least Fermi, but were rearranged with Volta to | ||
54 | add a USERD pointer, a longer INST pointer, and a runqueue selector flag. | ||
42 | */ | 55 | */ |
43 | enum ENTRY_TYPE {ENTRY_TYPE_CHAN = 0, ENTRY_TYPE_TSG = 1}; | 56 | enum ENTRY_TYPE {ENTRY_TYPE_CHAN = 0, ENTRY_TYPE_TSG = 1}; |
44 | enum INST_TARGET {TARGET_VID_MEM = 0, TARGET_SYS_MEM_COHERENT = 2, TARGET_SYS_MEM_NONCOHERENT = 3}; | 57 | enum INST_TARGET {TARGET_VID_MEM = 0, TARGET_SYS_MEM_COHERENT = 2, TARGET_SYS_MEM_NONCOHERENT = 3}; |
@@ -52,11 +65,12 @@ static inline char* target_to_text(enum INST_TARGET t) { | |||
52 | return "SYS_MEM_NONCOHERENT"; | 65 | return "SYS_MEM_NONCOHERENT"; |
53 | default: | 66 | default: |
54 | printk(KERN_WARNING "[nvdebug] Invalid aperture!\n"); | 67 | printk(KERN_WARNING "[nvdebug] Invalid aperture!\n"); |
55 | return NULL; | 68 | return "INVALID"; |
56 | } | 69 | } |
57 | } | 70 | } |
58 | 71 | ||
59 | struct runlist_chan { | 72 | // Support: Volta, Ampere, Turing |
73 | struct gv100_runlist_chan { | ||
60 | // 0:63 | 74 | // 0:63 |
61 | enum ENTRY_TYPE entry_type:1; | 75 | enum ENTRY_TYPE entry_type:1; |
62 | uint32_t runqueue_selector:1; | 76 | uint32_t runqueue_selector:1; |
@@ -71,6 +85,20 @@ struct runlist_chan { | |||
71 | uint32_t inst_ptr_hi:32; | 85 | uint32_t inst_ptr_hi:32; |
72 | } __attribute__((packed)); | 86 | } __attribute__((packed)); |
73 | 87 | ||
88 | // Support: Fermi, Kepler*, Maxwell, Pascal | ||
89 | // *In Kepler, inst fields may be unpopulated? | ||
90 | struct gm107_runlist_chan { | ||
91 | uint32_t chid:12; | ||
92 | uint32_t padding0:1; | ||
93 | enum ENTRY_TYPE entry_type:1; | ||
94 | uint32_t padding1:18; | ||
95 | uint32_t inst_ptr_lo:20; | ||
96 | enum INST_TARGET inst_target:2; // Totally guessing on this | ||
97 | uint32_t padding2:10; | ||
98 | } __attribute__((packed)); | ||
99 | |||
100 | #define gk110_runlist_chan gm107_runlist_chan | ||
101 | |||
74 | /* Runlist TSG (TimeSlice Group) | 102 | /* Runlist TSG (TimeSlice Group) |
75 | The runlist is composed of timeslice groups (TSG). Each TSG corresponds | 103 | The runlist is composed of timeslice groups (TSG). Each TSG corresponds |
76 | to a single virtual address space on the GPU and contains `TSG_LENGTH` | 104 | to a single virtual address space on the GPU and contains `TSG_LENGTH` |
@@ -85,8 +113,15 @@ struct runlist_chan { | |||
85 | TIMESLICE_TIMEOUT : timeout amount for the TSG's timeslice | 113 | TIMESLICE_TIMEOUT : timeout amount for the TSG's timeslice |
86 | TSG_LENGTH : number of channels that are part of this timeslice group | 114 | TSG_LENGTH : number of channels that are part of this timeslice group |
87 | TSGID : identifier of the Timeslice group (overlays ENTRY_ID) | 115 | TSGID : identifier of the Timeslice group (overlays ENTRY_ID) |
116 | |||
117 | TSGs appear to have been introduced with Kepler and stayed the same until | ||
118 | they were rearranged at the time of channel rearrangement to support longer | ||
119 | GPU instance addresses with Volta. | ||
88 | */ | 120 | */ |
89 | struct entry_tsg { | 121 | |
122 | // Support: Volta, Ampere*, Turing* | ||
123 | // *These treat the top 8 bits of TSGID as GFID (unused) | ||
124 | struct gv100_runlist_tsg { | ||
90 | // 0:63 | 125 | // 0:63 |
91 | enum ENTRY_TYPE entry_type:1; | 126 | enum ENTRY_TYPE entry_type:1; |
92 | uint64_t padding:15; | 127 | uint64_t padding:15; |
@@ -101,14 +136,28 @@ struct entry_tsg { | |||
101 | } __attribute__((packed)); | 136 | } __attribute__((packed)); |
102 | #define MAX_TSGID (1 << 12) | 137 | #define MAX_TSGID (1 << 12) |
103 | 138 | ||
139 | // Support: Kepler (v2?), Maxwell, Pascal | ||
140 | // Same fields as Volta except tsg_length is 6 bits rather than 8 | ||
141 | // Last 32 bits appear to contain an undocumented inst ptr | ||
142 | struct gk110_runlist_tsg { | ||
143 | uint32_t tsgid:12; | ||
144 | uint32_t padding0:1; | ||
145 | enum ENTRY_TYPE entry_type:1; | ||
146 | uint32_t timeslice_scale:4; | ||
147 | uint32_t timeslice_timeout:8; | ||
148 | uint32_t tsg_length:6; | ||
149 | uint32_t padding1:32; | ||
150 | } __attribute__((packed)); | ||
151 | |||
152 | |||
104 | enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1}; | 153 | enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1}; |
105 | 154 | ||
106 | /* Preempt a TSG or Channel by ID | 155 | /* Preempt a TSG or Channel by ID |
107 | ID/CHID : Id of TSG or channel to preempt | 156 | ID/CHID : Id of TSG or channel to preempt |
108 | IS_PENDING : ???? | 157 | IS_PENDING : Is a context switch pending? |
109 | TYPE : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG | 158 | TYPE : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG |
110 | 159 | ||
111 | Support: Kepler, Maxwell, Pascal, Volta | 160 | Support: Kepler, Maxwell, Pascal, Volta, Turing |
112 | */ | 161 | */ |
113 | #define NV_PFIFO_PREEMPT 0x00002634 | 162 | #define NV_PFIFO_PREEMPT 0x00002634 |
114 | typedef union { | 163 | typedef union { |
@@ -195,26 +244,36 @@ typedef union { | |||
195 | */ | 244 | */ |
196 | 245 | ||
197 | // Note: This is different with Turing | 246 | // Note: This is different with Turing |
198 | // Support: Kepler, Maxwell, Pascal, Volta | 247 | // Support: Fermi, Kepler, Maxwell, Pascal, Volta |
199 | #define NV_PFIFO_RUNLIST_BASE 0x00002270 | 248 | #define NV_PFIFO_RUNLIST_BASE 0x00002270 |
249 | #define NV_PFIFO_ENG_RUNLIST_BASE(i) (0x00002280+(i)*8) | ||
200 | typedef union { | 250 | typedef union { |
201 | struct { | 251 | struct { |
202 | uint32_t ptr:28; | 252 | uint32_t ptr:28; |
203 | uint32_t type:2; | 253 | enum INST_TARGET target:2; |
204 | uint32_t padding:2; | 254 | uint32_t padding:2; |
205 | } __attribute__((packed)); | 255 | } __attribute__((packed)); |
206 | uint32_t raw; | 256 | uint32_t raw; |
207 | } runlist_base_t; | 257 | } runlist_base_t; |
208 | 258 | ||
209 | // Support: Kepler, Maxwell, Pascal, Volta | 259 | // Support: Kepler, Maxwell, Pascal, Volta |
260 | // Works on Fermi, but id is one bit longer and is b11111 | ||
210 | #define NV_PFIFO_RUNLIST 0x00002274 | 261 | #define NV_PFIFO_RUNLIST 0x00002274 |
262 | #define NV_PFIFO_ENG_RUNLIST(i) (0x00002284+(i)*8) | ||
211 | typedef union { | 263 | typedef union { |
264 | // RUNLIST fields | ||
212 | struct { | 265 | struct { |
213 | uint32_t len:16; | 266 | uint32_t len:16; |
214 | uint32_t padding:4; | 267 | uint32_t padding:4; |
215 | uint32_t id:4; | 268 | uint32_t id:4; // Runlist ID (each engine may have a seperate runlist) |
216 | uint32_t padding2:8; | 269 | uint32_t padding2:8; |
217 | } __attribute__((packed)); | 270 | } __attribute__((packed)); |
271 | // ENG_RUNLIST fields that differ | ||
272 | struct { | ||
273 | uint32_t padding3:20; | ||
274 | bool is_pending:1; // Is runlist not yet committed? | ||
275 | uint32_t padding4:11; | ||
276 | } __attribute__((packed)); | ||
218 | uint32_t raw; | 277 | uint32_t raw; |
219 | } runlist_info_t; | 278 | } runlist_info_t; |
220 | 279 | ||
@@ -301,63 +360,631 @@ typedef union { | |||
301 | uint32_t raw; | 360 | uint32_t raw; |
302 | } runlist_disable_t; | 361 | } runlist_disable_t; |
303 | 362 | ||
363 | /* Read GPU descriptors from the Master Controller (MC) | ||
364 | |||
365 | MINOR_REVISION : Legacy (only used with Celvin in Nouveau) | ||
366 | MAJOR_REVISION : Legacy (only used with Celvin in Nouveau) | ||
367 | IMPLEMENTATION : Which implementation of the GPU architecture | ||
368 | ARCHITECTURE : Which GPU architecture | ||
369 | |||
370 | CHIP_ID = IMPLEMENTATION + ARCHITECTURE << 4 | ||
371 | CHIP_ID : Unique ID of all chips since Kelvin | ||
372 | |||
373 | Support: Kelvin, Rankline, Curie, Tesla, Fermi, Kepler, Maxwell, Pascal, | ||
374 | Volta, Turing, Ampere | ||
375 | */ | ||
376 | #define NV_MC_BOOT_0 0x00000000 | ||
377 | #define NV_CHIP_ID_GP106 0x136 // Discrete GeForce GTX 1060 | ||
378 | #define NV_CHIP_ID_GV11B 0x15B // Jetson Xavier embedded GPU | ||
379 | #define NV_CHIP_ID_KEPLER 0x0E0 | ||
380 | #define NV_CHIP_ID_VOLTA 0x140 | ||
381 | |||
382 | inline static const char* ARCH2NAME(uint32_t arch) { | ||
383 | switch (arch) { | ||
384 | case 0x01: | ||
385 | return "Celsius"; | ||
386 | case 0x02: | ||
387 | return "Kelvin"; | ||
388 | case 0x03: | ||
389 | return "Rankline"; | ||
390 | case 0x04: | ||
391 | case 0x06: // 0x06 is (nForce 6XX integrated only) | ||
392 | return "Curie"; | ||
393 | // 0x07 is unused/skipped | ||
394 | case 0x05: // First Tesla card was released before the nForce 6XX | ||
395 | case 0x08: | ||
396 | case 0x09: | ||
397 | case 0x0A: | ||
398 | return "Tesla"; | ||
399 | // 0x0B is unused/skipped | ||
400 | case 0x0C: | ||
401 | case 0x0D: | ||
402 | return "Fermi"; | ||
403 | case 0x0E: | ||
404 | case 0x0F: | ||
405 | case 0x11: | ||
406 | return "Kepler"; | ||
407 | case 0x12: | ||
408 | return "Maxwell"; | ||
409 | case 0x13: | ||
410 | return "Pascal"; | ||
411 | case 0x14: | ||
412 | case 0x15: // Volta integrated | ||
413 | return "Volta"; | ||
414 | case 0x16: | ||
415 | return "Turing"; | ||
416 | case 0x17: | ||
417 | return "Ampere"; | ||
418 | case 0x18: | ||
419 | case 0x19: | ||
420 | return "Hopper (?) or Lovelace (?)"; | ||
421 | default: | ||
422 | if (arch < 0x19) | ||
423 | return "[unknown historical architecture]"; | ||
424 | else | ||
425 | return "[future]"; | ||
426 | } | ||
427 | } | ||
428 | |||
429 | typedef union { | ||
430 | // Fields as defined in the NVIDIA reference | ||
431 | struct { | ||
432 | uint32_t minor_revision:4; | ||
433 | uint32_t major_revision:4; | ||
434 | uint32_t reserved:4; | ||
435 | uint32_t padding0:8; | ||
436 | uint32_t implementation:4; | ||
437 | uint32_t architecture:5; | ||
438 | uint32_t padding1:3; | ||
439 | } __attribute__((packed)); | ||
440 | uint32_t raw; | ||
441 | // Arch << 4 + impl is also often used | ||
442 | struct { | ||
443 | uint32_t padding2:20; | ||
444 | uint32_t chip_id:9; | ||
445 | uint32_t padding3:3; | ||
446 | } __attribute__((packed)); | ||
447 | } mc_boot_0_t; | ||
448 | |||
449 | enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3}; | ||
450 | enum ENGINE_TYPES { | ||
451 | ENGINE_GRAPHICS = 0, // GRAPHICS [/compute] | ||
452 | ENGINE_COPY0 = 1, // [raw/physical] COPY #0 | ||
453 | ENGINE_COPY1 = 2, // [raw/physical] COPY #1 | ||
454 | ENGINE_COPY2 = 3, // [raw/physical] COPY #2 | ||
455 | |||
456 | ENGINE_MSPDEC = 8, // Picture DECoder | ||
457 | ENGINE_MSPPP = 9, // [Video] Post Processing | ||
458 | ENGINE_MSVLD = 10, // [Video] Variable Length Decoder | ||
459 | ENGINE_MSENC = 11, // [Video] ENCoding | ||
460 | ENGINE_VIC = 12, // Video Image Compositor | ||
461 | ENGINE_SEC = 13, // SEquenCer [?] | ||
462 | ENGINE_NVENC0 = 14, // Nvidia Video ENCoder #0 | ||
463 | ENGINE_NVENC1 = 15, // Nvidia Video ENCoder #1 | ||
464 | ENGINE_NVDEC = 16, // Nvidia Video DECoder | ||
465 | |||
466 | ENGINE_IOCTRL = 18, // I/O ConTRoLler [of NVLINK at least] | ||
467 | ENGINE_LCE = 19, // Logical Copy Engine | ||
468 | ENGINE_GSP = 20, // Gpu System Processor | ||
469 | ENGINE_NVJPG = 21, // NVidia JPeG [Decoder] (Ampere+) | ||
470 | }; | ||
471 | #define ENGINE_TYPES_LEN 22 | ||
472 | static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { | ||
473 | "Graphics/Compute", | ||
474 | "COPY0", | ||
475 | "COPY1", | ||
476 | "COPY2", | ||
477 | "Unknown Engine ID#4", | ||
478 | "Unknown Engine ID#5", | ||
479 | "Unknown Engine ID#6", | ||
480 | "Unknown Engine ID#7", | ||
481 | "MSPDEC: Picture Decoder", | ||
482 | "MSPPP: Post Processing", | ||
483 | "MSVLD: Variable Length Decoder", | ||
484 | "MSENC: Encoder", | ||
485 | "VIC: Video Image Compositor", | ||
486 | "SEC: Sequencer", | ||
487 | "NVENC0: NVIDIA Video Encoder #0", | ||
488 | "NVENC1: NVIDIA Video Encoder #1", | ||
489 | "NVDEC: NVIDIA Video Decoder", | ||
490 | "Unknown Engine ID#17", | ||
491 | "IOCTRL: I/O Controller", | ||
492 | "LCE: Logical Copy Engine", | ||
493 | "GSP: GPU System Processor", | ||
494 | "NVJPG: NVIDIA JPEG Decoder", | ||
495 | }; | ||
496 | |||
497 | /* GPU engine information and control register offsets | ||
498 | Each engine is described by one or more entries (terminated by an entry with | ||
499 | the `has_next_entry` flag unset) in the fixed-size PTOP_DEVICE_INFO table. A | ||
500 | typical device, such as the graphics/compute engine and any copy engines, are | ||
501 | described by three entries, one of each type. | ||
502 | |||
503 | The PTOP_DEVICE_INFO table is sparsely populated (entries of type | ||
504 | INFO_TYPE_NOT_VALID may be intermingled with valid entries), so any traversal | ||
505 | code should check all NV_PTOP_DEVICE_INFO__SIZE_1 entries and not terminate | ||
506 | upon reaching the first entry of INFO_TYPE_NOT_VALID. | ||
507 | |||
508 | INFO_TYPE : Is this a DATA, ENUM, or ENGINE_TYPE table entry? | ||
509 | HAS_NEXT_ENTRY : Does the following entry refer to the same engine? | ||
510 | |||
511 | == INFO_TYPE_DATA fields == | ||
512 | PRI_BASE : BAR0 base = (PRI_BASE << 12) aka 4k aligned. | ||
513 | INST_ID : "Note that some instanced [engines] (such as logical copy | ||
514 | engines aka LCE) share a PRI_BASE across all [engines] of | ||
515 | the same engine type; such [engines] require an additional | ||
516 | offset: instanced base = BAR0 base + stride * INST_ID. | ||
517 | FAULT_ID_IS_VALID : Does this engine have its own bind point and fault ID | ||
518 | with the MMU? | ||
519 | FAULT_ID : "The MMU fault id used by this [engine]. These IDs | ||
520 | correspond to the NV_PFAULT_MMU_ENG_ID define list." | ||
521 | |||
522 | == INFO_TYPE_ENUM fields == | ||
523 | ENGINE_IS_VALID : Is this engine a host engine? | ||
524 | ENGINE_ENUM : "[T]he host engine ID for the current [engine] if it is | ||
525 | a host engine, meaning Host can send methods to the | ||
526 | engine. This id is used to index into any register array | ||
527 | whose __SIZE_1 is equal to NV_HOST_NUM_ENGINES. A given | ||
528 | ENGINE_ENUM can be present for at most one device in the | ||
529 | table. Devices corresponding to all ENGINE_ENUM ids 0 | ||
530 | through NV_HOST_NUM_ENGINES - 1 must be present in the | ||
531 | device info table." | ||
532 | RUNLIST_IS_VALID : Is this engine a host engine with a runlist? | ||
533 | RUNLIST_ENUM : "[T]he Host runlist ID on which methods for the current | ||
534 | [engine] should be submitted... The runlist id is used to | ||
535 | index into any register array whose __SIZE_1 is equal to | ||
536 | NV_HOST_NUM_RUNLISTS. [Engines] corresponding to all | ||
537 | RUNLIST_ENUM ids 0 through NV_HOST_NUM_RUNLISTS - 1 must | ||
538 | be present in the device info table." | ||
539 | INTR_IS_VALID : Does this device have an interrupt? | ||
540 | INTR_ENUM : Interrupt ID for use with "the NV_PMC_INTR_*_DEVICE | ||
541 | register bitfields." | ||
542 | RESET_IS_VALID : Does this engine have a reset ID? | ||
543 | RESET_ENUM : Reset ID for use indexing the "NV_PMC_ENABLE_DEVICE(i) | ||
544 | and NV_PMC_ELPG_ENABLE_DEVICE(i) register bitfields." | ||
545 | |||
546 | == INFO_TYPE_ENGINE_TYPE fields == | ||
547 | ENGINE_TYPE : What type of engine is this? (see ENGINE_TYPES_NAMES) | ||
548 | |||
549 | Support: Kepler, Maxwell, Pascal, Volta, Ampere | ||
550 | See dev_top.ref.txt of NVIDIA's open-gpu-doc for more info. | ||
551 | */ | ||
552 | #define NV_PTOP_DEVICE_INFO(i) (0x00022700+(i)*4) | ||
553 | #define NV_PTOP_DEVICE_INFO__SIZE_1 64 | ||
554 | typedef union { | ||
555 | // DATA type fields | ||
556 | struct { | ||
557 | enum DEVICE_INFO_TYPE info_type:2; | ||
558 | bool fault_id_is_valid:1; | ||
559 | uint32_t fault_id:7; | ||
560 | uint32_t padding0:2; | ||
561 | uint32_t pri_base:12; | ||
562 | uint32_t padding1:2; | ||
563 | uint32_t inst_id:4; | ||
564 | uint32_t is_not_enum2:1; | ||
565 | bool has_next_entry:1; | ||
566 | } __attribute__((packed)); | ||
567 | // ENUM type fields | ||
568 | struct { | ||
569 | uint32_t padding2:2; | ||
570 | bool reset_is_valid:1; | ||
571 | bool intr_is_valid:1; | ||
572 | bool runlist_is_valid:1; | ||
573 | bool engine_is_valid:1; | ||
574 | uint32_t padding3:3; | ||
575 | uint32_t reset_enum:5; | ||
576 | uint32_t padding4:1; | ||
577 | uint32_t intr_enum:5; | ||
578 | uint32_t padding5:1; | ||
579 | uint32_t runlist_enum:4; | ||
580 | uint32_t padding6:1; | ||
581 | uint32_t engine_enum:4; | ||
582 | uint32_t padding7:2; | ||
583 | } __attribute__((packed)); | ||
584 | // ENGINE_TYPE type fields | ||
585 | struct { | ||
586 | uint32_t padding8:2; | ||
587 | enum ENGINE_TYPES engine_type:29; | ||
588 | uint32_t padding9:1; | ||
589 | } __attribute__((packed)); | ||
590 | uint32_t raw; | ||
591 | } ptop_device_info_t; | ||
592 | |||
593 | #define NV_PTOP_SCAL_NUM_GPCS 0x00022430 | ||
594 | #define NV_PTOP_SCAL_NUM_TPC_PER_GPC 0x00022434 | ||
595 | #define NV_PTOP_SCAL_NUM_CES 0x00022444 | ||
596 | // PCE_MAP is Volta+ only | ||
597 | #define NV_CE_PCE_MAP 0x00104028 | ||
598 | |||
599 | // GPC and TPC masks | ||
600 | // Support: Maxwell+ | ||
601 | #define NV_FUSE_GPC 0x00021c1c | ||
602 | #define NV_FUSE_TPC_FOR_GPC(i) (0x00021c38+(i)*4) | ||
603 | |||
604 | /* Location of the 1Kb instance block with page tables for BAR1 and BAR2. | ||
605 | Support: Fermi+ (?), Pascal | ||
606 | */ | ||
607 | #define NV_PBUS_BAR1_BLOCK 0x00001704 | ||
608 | #define NV_PBUS_BAR2_BLOCK 0x00001714 | ||
609 | typedef union { | ||
610 | struct { | ||
611 | uint32_t ptr:28; | ||
612 | enum INST_TARGET target:2; | ||
613 | uint32_t padding0:1; | ||
614 | bool is_virtual:1; | ||
615 | } __attribute__((packed)); | ||
616 | uint32_t raw; | ||
617 | struct { | ||
618 | uint32_t map:30; | ||
619 | uint32_t padding1:2; | ||
620 | } __attribute__((packed)); | ||
621 | } bar_config_block_t; | ||
622 | |||
623 | /* BAR0 PRAMIN (Private RAM Instance) window configuration | ||
624 | |||
625 | BASE : Base of window >> 16 in [TARGET] virtual address space | ||
626 | TARGET : Which address space BASE points into | ||
627 | |||
628 | Note: This seems to be set to 0x0bff00000 - 0x0c0000000 at least sometimes | ||
629 | |||
630 | Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere | ||
631 | */ | ||
632 | #define NV_PBUS_BAR0_WINDOW 0x00001700 | ||
633 | #define NV_PRAMIN 0x00700000 // Goes until 0x00800000 (1MB window) | ||
634 | #define NV_PRAMIN_LEN 0x00100000 | ||
635 | typedef union { | ||
636 | struct { | ||
637 | uint32_t base:24; | ||
638 | enum INST_TARGET target:2; | ||
639 | uint32_t padding0:6; | ||
640 | } __attribute__((packed)); | ||
641 | uint32_t raw; | ||
642 | } bar0_window_t; | ||
643 | |||
644 | // Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere | ||
645 | #define NV_PRAMIN_PDB_CONFIG_OFF 0x200 | ||
646 | typedef union { | ||
647 | struct { | ||
648 | uint32_t target:2; | ||
649 | uint32_t vol:1; | ||
650 | uint32_t padding0:1; | ||
651 | uint32_t fault_replay_tex:1; | ||
652 | uint32_t fault_replay_gcc:1; | ||
653 | uint32_t padding1:4; | ||
654 | bool is_ver2:1; | ||
655 | bool is_64k_big_page:1; // 128Kb otherwise | ||
656 | uint32_t page_dir_lo:20; | ||
657 | uint32_t page_dir_hi:32; | ||
658 | } __attribute__((packed)); | ||
659 | uint64_t raw; | ||
660 | } page_dir_config_t; | ||
661 | |||
662 | /* Page directory entry | ||
663 | |||
664 | Note: Format changed with Pascal (how?) | ||
665 | |||
666 | Support: Pascal, Volta, Turing, Ampere | ||
667 | */ | ||
668 | // FIXME: PDE/PTEs are actually 64 bits =S | ||
669 | // Important: Aperture keys are different with PDEs | ||
670 | enum PD_TARGET { | ||
671 | PD_AND_TARGET_INVALID = 0, // b000 | ||
672 | PD_AND_TARGET_VID_MEM = 2, // b010 | ||
673 | PD_AND_TARGET_SYS_MEM_COHERENT = 4, // b100 | ||
674 | PD_AND_TARGET_SYS_MEM_NONCOHERENT = 6, // b110 | ||
675 | PTE_AND_TARGET_VID_MEM = 1, // b001 | ||
676 | PTE_AND_TARGET_PEER = 3, // b011 | ||
677 | PTE_AND_TARGET_SYS_MEM_COHERENT = 5, // b101 | ||
678 | PTE_AND_TARGET_SYS_MEM_NONCOHERENT = 7, // b111 | ||
679 | }; | ||
680 | static inline char* pd_target_to_text(enum PD_TARGET t) { | ||
681 | switch (t) { | ||
682 | case PD_AND_TARGET_INVALID: | ||
683 | return "INVALID"; | ||
684 | case PD_AND_TARGET_VID_MEM: | ||
685 | case PTE_AND_TARGET_VID_MEM: | ||
686 | return "VID_MEM"; | ||
687 | case PTE_AND_TARGET_PEER: | ||
688 | return "PEER"; | ||
689 | case PD_AND_TARGET_SYS_MEM_COHERENT: | ||
690 | case PTE_AND_TARGET_SYS_MEM_COHERENT: | ||
691 | return "SYS_MEM_COHERENT"; | ||
692 | case PD_AND_TARGET_SYS_MEM_NONCOHERENT: | ||
693 | case PTE_AND_TARGET_SYS_MEM_NONCOHERENT: | ||
694 | return "SYS_MEM_NONCOHERENT"; | ||
695 | default: | ||
696 | printk(KERN_WARNING "[nvdebug] Invalid aperture!\n"); | ||
697 | return NULL; | ||
698 | } | ||
699 | } | ||
700 | |||
701 | // PDE/PTE V2 type | ||
702 | // Note: As the meaning of target (bits 2:1) changes depending on if the entry | ||
703 | // is a PTE or not, this combines them into a single target field to | ||
704 | // simplify comparisons. | ||
705 | // Support: Pascal, Turing, Ampere | ||
706 | typedef union { | ||
707 | // Page Directory Entry (PDE) | ||
708 | struct { | ||
709 | bool is_pte:1; | ||
710 | uint32_t __target:2; | ||
711 | bool is_volatile:1; | ||
712 | uint32_t padding1:4; | ||
713 | uint32_t addr:24; | ||
714 | } __attribute__((packed)); | ||
715 | // Page Table Entry (PTE) | ||
716 | struct { | ||
717 | enum PD_TARGET target:3; | ||
718 | uint32_t __is_volatile:1; | ||
719 | bool is_encrypted:1; | ||
720 | bool is_privileged:1; | ||
721 | bool is_readonly:1; | ||
722 | bool atomics_disabled:1; | ||
723 | uint32_t __addr:24; | ||
724 | } __attribute__((packed)); | ||
725 | uint32_t raw; | ||
726 | } page_dir_entry_t; | ||
727 | |||
728 | // PDE/PTE V1 types | ||
729 | // Support: Fermi, Kepler, Maxwell | ||
730 | enum V1_PD_TARGET { | ||
731 | PD_TARGET_INVALID = 0, | ||
732 | PD_TARGET_VID_MEM = 1, | ||
733 | PD_TARGET_SYS_MEM_COHERENT = 2, | ||
734 | PD_TARGET_SYS_MEM_NONCOHERENT = 3, | ||
735 | }; | ||
736 | // Page Directory Entry (PDE) | ||
737 | typedef union { | ||
738 | // Large page fields | ||
739 | struct { | ||
740 | // 0:32 | ||
741 | enum V1_PD_TARGET target:2; | ||
742 | uint32_t padding0:2; | ||
743 | uint64_t addr:28; // May be wider? | ||
744 | // 32:63 | ||
745 | uint32_t padding2:3; | ||
746 | uint32_t is_volatile:1; // Might have counted wrong? | ||
747 | uint32_t padding3:28; | ||
748 | } __attribute__((packed)); | ||
749 | // Small page fields | ||
750 | struct { | ||
751 | // 0:32 | ||
752 | uint32_t padding00:32; | ||
753 | // 32:63 | ||
754 | enum V1_PD_TARGET alt_target:2; | ||
755 | uint32_t alt_is_volatile:1; // Might have counted wrong? | ||
756 | uint32_t padding03:1; | ||
757 | uint64_t alt_addr:28; | ||
758 | } __attribute__((packed)); | ||
759 | uint64_t raw; | ||
760 | } page_dir_entry_v1_t; | ||
761 | // Page Table Entry (PTE) | ||
762 | // Reconstructed from info in Jetson nvgpu driver | ||
763 | typedef union { | ||
764 | struct { | ||
765 | // 0:32 | ||
766 | bool is_present:1; | ||
767 | bool is_privileged:1; | ||
768 | bool is_readonly:1; | ||
769 | uint32_t padding0:1; | ||
770 | uint64_t addr:28; | ||
771 | // 32:63 | ||
772 | bool is_volatile:1; | ||
773 | enum INST_TARGET:2; | ||
774 | uint32_t padding1:1; | ||
775 | uint32_t kind:8; | ||
776 | uint32_t comptag:17; | ||
777 | uint32_t padding2:1; | ||
778 | bool is_read_disabled:1; | ||
779 | bool is_write_disabled:1; | ||
780 | } __attribute__((packed)); | ||
781 | uint64_t raw; | ||
782 | } page_tbl_entry_v1_t; | ||
783 | //enum V0_PDE_TYPE {NOT_PRESENT = 0, PAGE_64K = 1, PAGE_16K = 2, PAGE_4K = 3}; | ||
784 | //enum V0_PDE_SIZE {PDE_SZ_128K = 0, PDE_SZ_32K = 1, PDE_SZ_16K = 2, PDE_SZ_8K = 3}; | ||
785 | //static const int V0_PDE_SIZE2NUM[4] = {128*1024, 32*1024, 16*1024, 8*1024}; | ||
786 | /* PDE V0 (nv50/Tesla) | ||
787 | typedef union { | ||
788 | struct { | ||
789 | enum V1_PDE_TYPE type:2; | ||
790 | enum INST_TARGET target:2; | ||
791 | uint32_t padding0:1; | ||
792 | enum V1_PDE_SIZE sublevel_size:2; | ||
793 | uint32_t padding1:5; | ||
794 | uint32_t addr:28; | ||
795 | uint32_t padding2:24; | ||
796 | } __attribute__((packed)); | ||
797 | uint64_t raw; | ||
798 | } page_dir_entry_v1_t;*/ | ||
799 | /* PTE V0 (nv50) | ||
800 | typedef union { | ||
801 | struct { | ||
802 | bool is_present:1; | ||
803 | uint32_t padding3:2; | ||
804 | bool is_readonly:1; | ||
805 | enum INST_TARGET target:2; | ||
806 | bool is_privileged:1; | ||
807 | uint32_t contig_blk_sz:3; | ||
808 | uint32_t padding4:2; | ||
809 | uint32_t addr:28; | ||
810 | uint32_t storage_type:7; // ??? | ||
811 | uint32_t compression_mode:2; // ??? | ||
812 | uint32_t compression_tag:12; // ??? | ||
813 | bool is_long_partition_cycle:1; // ??? | ||
814 | bool is_encrypted:1; | ||
815 | uint32_t padding5:1; | ||
816 | } __attribute__((packed)); | ||
817 | uint64_t raw; | ||
818 | } page_tbl_entry_v1_t;*/ | ||
819 | |||
304 | // TODO(jbakita): Maybe put the above GPU types in a different file. | 820 | // TODO(jbakita): Maybe put the above GPU types in a different file. |
305 | 821 | ||
306 | #define for_chan_in_tsg(chan, tsg) \ | 822 | #define NV_PCI_VENDOR 0x10de |
307 | for (chan = (struct runlist_chan*)(tsg + 1); \ | 823 | struct nvdebug_state { |
308 | (void*)chan < (void*)(tsg + 1) + sizeof(struct runlist_chan) * tsg->tsg_length; \ | 824 | // Pointer to the mapped base address of the GPU control registers (obtained |
309 | chan++) | 825 | // via ioremap() originally). For embedded GPUs, we extract this from their |
826 | // struct nvgpu_os_linux. For discrete GPUs, we create our own mapping of | ||
827 | // BAR0 with pci_iomap(). Access via nvgpu_readl/writel functions. | ||
828 | void __iomem *regs; | ||
829 | // Depending on the architecture, BAR2 or BAR3 are used to access PRAMIN | ||
830 | union { | ||
831 | void __iomem *bar2; | ||
832 | void __iomem *bar3; | ||
833 | }; | ||
834 | int chip_id; | ||
835 | // Additional state from the built-in driver. Only set iff | ||
836 | // chip_id == NV_CHIP_ID_GV11B | ||
837 | struct gk20a *g; | ||
838 | // Pointer to PCI device needed for pci_iounmap | ||
839 | struct pci_dev *pcid; | ||
840 | }; | ||
841 | |||
842 | /*const struct runlist_funcs { | ||
843 | u8 size; | ||
844 | enum ENTRY_TYPE (*entry_type)(struct nvdebug_state *, void *); | ||
845 | uint32_t (*chid)(struct nvdebug_state *, void *); | ||
846 | uint32_t (*inst_ptr_lo)(struct nvdebug_state *, void *); | ||
847 | enum INST_TARGET (*inst_target)(struct nvdebug_state *, void *): | ||
848 | uint32_t (*tsgid)(struct nvdebug_state *, void *); | ||
849 | uint32_t (*timeslice_scale)(struct nvdebug_state *, void *); | ||
850 | uint32_t (*timeslice_timeout)(struct nvdebug_state *, void *); | ||
851 | uint32_t (*tsg_length)(struct nvdebug_state *, void *); | ||
852 | };*/ | ||
853 | |||
854 | // This disgusting macro is a crutch to work around the fact that runlists were | ||
855 | // different prior to Volta. | ||
856 | #define VERSIONED_RL_ACCESSOR(_ENTRY_TYPE, type, prop) \ | ||
857 | __attribute__((unused)) \ | ||
858 | static type (prop)(const struct nvdebug_state *g, const void *raw) { \ | ||
859 | if (g->chip_id > NV_CHIP_ID_VOLTA) { \ | ||
860 | const struct gv100_runlist_ ## _ENTRY_TYPE *entry = (struct gv100_runlist_ ## _ENTRY_TYPE*)raw; \ | ||
861 | return entry->prop; \ | ||
862 | } else if (g->chip_id > NV_CHIP_ID_KEPLER) { \ | ||
863 | const struct gk110_runlist_ ## _ENTRY_TYPE *entry = (struct gk110_runlist_ ## _ENTRY_TYPE*)raw; \ | ||
864 | return entry->prop; \ | ||
865 | } else { \ | ||
866 | printk(KERN_WARNING "[nvdebug] " #prop " unavailable on GPU ID %x, which is older than Kepler.\n", g->chip_id); \ | ||
867 | return (type)0; \ | ||
868 | } \ | ||
869 | } | ||
870 | |||
871 | VERSIONED_RL_ACCESSOR(chan, uint32_t, chid); | ||
872 | VERSIONED_RL_ACCESSOR(chan, uint32_t, inst_ptr_lo); | ||
873 | VERSIONED_RL_ACCESSOR(chan, enum INST_TARGET, inst_target); | ||
874 | VERSIONED_RL_ACCESSOR(tsg, uint32_t, tsgid); | ||
875 | VERSIONED_RL_ACCESSOR(tsg, enum ENTRY_TYPE, entry_type); | ||
876 | VERSIONED_RL_ACCESSOR(tsg, uint32_t, timeslice_scale); | ||
877 | VERSIONED_RL_ACCESSOR(tsg, uint32_t, timeslice_timeout); | ||
878 | VERSIONED_RL_ACCESSOR(tsg, uint32_t, tsg_length); | ||
310 | 879 | ||
311 | #define next_tsg(tsg) \ | 880 | |
312 | (void*)(tsg + 1) + sizeof(struct runlist_chan) * tsg->tsg_length | 881 | #define NV_RL_ENTRY_SIZE(g) \ |
882 | ((g)->chip_id >= NV_CHIP_ID_VOLTA ? sizeof(struct gv100_runlist_tsg) : sizeof(struct gk110_runlist_tsg)) | ||
883 | |||
884 | #define for_chan_in_tsg(g, chan, tsg) \ | ||
885 | for (chan = (typeof(chan))(((u8*)tsg) + NV_RL_ENTRY_SIZE(g)); \ | ||
886 | (u8*)chan < ((u8*)tsg) + (1 + tsg_length(g, tsg)) * NV_RL_ENTRY_SIZE(g); \ | ||
887 | chan = (typeof(chan))(((u8*)chan) + NV_RL_ENTRY_SIZE(g))) | ||
888 | |||
889 | #define next_tsg(g, tsg) \ | ||
890 | (typeof(tsg))((u8*)(tsg) + NV_RL_ENTRY_SIZE(g) * (tsg_length(g, tsg) + 1)) | ||
313 | 891 | ||
314 | struct runlist_iter { | 892 | struct runlist_iter { |
315 | struct entry_tsg *curr_tsg; | 893 | // Pointer to either a TSG or channel entry (they're the same size) |
894 | void *curr_entry; | ||
895 | // This should be set to tsg_length when a TSG is reached, and | ||
896 | // decremented as each subsequent channel is printed. This allows us to | ||
897 | // track which channel are and are not part of the TSG. | ||
898 | int channels_left_in_tsg; | ||
899 | // Total runlist length, etc | ||
316 | runlist_info_t rl_info; | 900 | runlist_info_t rl_info; |
317 | }; | 901 | }; |
318 | 902 | ||
903 | #define NVDEBUG_MAX_DEVICES 8 | ||
904 | extern struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; | ||
905 | |||
319 | // Defined in runlist.c | 906 | // Defined in runlist.c |
320 | struct gk20a* get_live_gk20a(void); | 907 | int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl_iter); |
321 | int get_runlist_iter(struct runlist_iter *rl_iter); | 908 | int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id); |
322 | int preempt_tsg(uint32_t tsg_id); | 909 | |
910 | // Defined in mmu.c | ||
911 | uint32_t vram2PRAMIN(struct nvdebug_state *g, uint64_t addr); | ||
912 | void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy); | ||
913 | uint64_t search_page_directory( | ||
914 | struct nvdebug_state *g, | ||
915 | void __iomem *pde_offset, | ||
916 | void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), | ||
917 | uint64_t addr_to_find); | ||
918 | uint64_t search_v1_page_directory( | ||
919 | struct nvdebug_state *g, | ||
920 | void __iomem *pde_offset, | ||
921 | void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), | ||
922 | uint64_t addr_to_find); | ||
923 | |||
323 | 924 | ||
324 | static inline struct gk20a *get_gk20a(struct device *dev) { | 925 | static inline struct gk20a *get_gk20a(struct device *dev) { |
325 | // XXX: Only works because gk20a* is the first member of gk20a_platform | 926 | // XXX: Only works because gk20a* is the first member of gk20a_platform |
326 | return *((struct gk20a**)dev_get_drvdata(dev)); | 927 | return *((struct gk20a**)dev_get_drvdata(dev)); |
327 | } | 928 | } |
328 | 929 | ||
329 | // Functionally identical to nvgpu_readl() | 930 | // We us the data field of the proc_dir_entry ("PDE" in this function) to store |
931 | // our index into the g_nvdebug_state array | ||
932 | static inline int seq2gpuidx(struct seq_file *s) { | ||
933 | const struct file *f = s->file; | ||
934 | return (uintptr_t)PDE_DATA(file_inode(f)); | ||
935 | } | ||
936 | static inline int file2gpuidx(const struct file *f) { | ||
937 | return (uintptr_t)PDE_DATA(file_inode(f)); | ||
938 | } | ||
939 | static inline int file2parentgpuidx(const struct file *f) { | ||
940 | // Should be safe to call on ProcFS entries, as our parent should (?) | ||
941 | // still exist if we're called. If not, there are worse races in this | ||
942 | // module. | ||
943 | return (uintptr_t)PDE_DATA(file_dentry(f)->d_parent->d_inode); | ||
944 | } | ||
945 | |||
946 | #define gk20a_regs(gk20a) (container_of(gk20a, struct nvgpu_os_linux, g)->regs) | ||
947 | |||
948 | // Similar to nvgpu_readl() | ||
330 | // (except we don't try to resolve situations where regs is NULL) | 949 | // (except we don't try to resolve situations where regs is NULL) |
331 | static inline u32 nvdebug_readl(struct gk20a* g, u32 r) { | 950 | static inline u32 nvdebug_readl(struct nvdebug_state *s, u32 r) { |
332 | struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g); | 951 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { |
333 | if (unlikely(!g_os->regs)) { | 952 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_readl on non-existent registers!\n"); |
334 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_readl on non-existent registers!\n"); | 953 | return -1; |
335 | return -1; | 954 | } |
336 | } | 955 | return readl(s->regs + r); |
337 | return readl(g_os->regs + r); | ||
338 | } | 956 | } |
339 | 957 | ||
340 | // quadword version of nvdebug_readl() | 958 | // quadword version of nvdebug_readl() |
341 | static inline u64 nvdebug_readq(struct gk20a* g, u32 r) { | 959 | static inline u64 nvdebug_readq(struct nvdebug_state *s, u32 r) { |
342 | struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g); | 960 | u64 ret; |
343 | u64 ret; | 961 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { |
344 | if (unlikely(!g_os->regs)) { | 962 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_readl on non-existent registers!\n"); |
345 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_readl on non-existent registers!\n"); | 963 | return -1; |
346 | return -1; | 964 | } |
347 | } | ||
348 | // readq seems to always return the uppermost 32 bits as 0, so workaround with readl | 965 | // readq seems to always return the uppermost 32 bits as 0, so workaround with readl |
349 | ret = readl(g_os->regs + r); | 966 | ret = readl(s->regs + r); |
350 | ret |= ((u64)readl(g_os->regs + r + 4)) << 32; | 967 | ret |= ((u64)readl(s->regs + r + 4)) << 32; |
351 | return ret; | 968 | return ret; |
352 | } | 969 | } |
353 | 970 | ||
354 | // Functionally identical to nvgpu_writel() | 971 | // Similar to nvgpu_writel() |
355 | static inline void nvdebug_writel(struct gk20a* g, u32 r, u32 v) { | 972 | static inline void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) { |
356 | struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g); | 973 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { |
357 | if (unlikely(!g_os->regs)) { | 974 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_writel on non-existent registers!\n"); |
975 | return; | ||
976 | } | ||
977 | writel_relaxed(v, s->regs + r); | ||
978 | wmb(); | ||
979 | } | ||
980 | |||
981 | // quadword version of nvdebug_writel() | ||
982 | // XXX: This probably doesn't work XXX: Untested | ||
983 | static inline void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) { | ||
984 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { | ||
358 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_writel on non-existent registers!\n"); | 985 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_writel on non-existent registers!\n"); |
359 | return; | 986 | return; |
360 | } | 987 | } |
361 | writel_relaxed(v, g_os->regs + r); | 988 | writeq_relaxed(v, s->regs + r); |
362 | wmb(); | 989 | wmb(); |
363 | } | 990 | } |
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index 0854b8b..695b5fd 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
@@ -2,64 +2,282 @@ | |||
2 | * SPDX-License-Identifier: MIT | 2 | * SPDX-License-Identifier: MIT |
3 | */ | 3 | */ |
4 | 4 | ||
5 | /* TODO | ||
6 | * - Add sysfs trigger for a preemption | ||
7 | */ | ||
8 | |||
9 | #include <linux/device.h> // For struct device, bus_find_device*(), struct bus_type | 5 | #include <linux/device.h> // For struct device, bus_find_device*(), struct bus_type |
6 | #include <linux/interrupt.h> // For hooking the nvidia driver interrupts | ||
10 | #include <linux/kernel.h> | 7 | #include <linux/kernel.h> |
11 | #include <linux/module.h> | 8 | #include <linux/module.h> |
12 | #include <linux/proc_fs.h> // So we can set up entries in /proc | 9 | #include <linux/pci.h> // For PCI device scanning |
10 | #include <linux/proc_fs.h> // So we can set up entries in /proc | ||
13 | 11 | ||
14 | #include "nvdebug.h" | 12 | #include "nvdebug.h" |
13 | #include "stubs.h" | ||
15 | 14 | ||
16 | // LIAR. But without this we can't use GPL-only exported symbols like | 15 | // MIT is GPL-compatible. We need to be GPL-compatible for symbols like |
17 | // platform_bus_type or bus_find_device_by_name... | 16 | // platform_bus_type or bus_find_device_by_name... |
18 | MODULE_LICENSE("GPL"); | 17 | MODULE_LICENSE("Dual MIT/GPL"); |
19 | MODULE_AUTHOR("Joshua Bakita"); | 18 | MODULE_AUTHOR("Joshua Bakita"); |
20 | MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs"); | 19 | MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs"); |
21 | MODULE_SOFTDEP("pre: nvgpu"); // We only support the Jetson boards for now | ||
22 | 20 | ||
23 | extern const struct file_operations runlist_file_ops; | 21 | extern const struct file_operations runlist_file_ops; |
24 | extern const struct file_operations preempt_tsg_file_ops; | 22 | extern const struct file_operations preempt_tsg_file_ops; |
25 | extern const struct file_operations disable_channel_file_ops; | 23 | extern const struct file_operations disable_channel_file_ops; |
26 | extern const struct file_operations enable_channel_file_ops; | 24 | extern const struct file_operations enable_channel_file_ops; |
27 | extern const struct file_operations switch_to_tsg_file_ops; | 25 | extern const struct file_operations switch_to_tsg_file_ops; |
26 | extern const struct file_operations device_info_file_ops; | ||
27 | extern const struct file_operations nvdebug_read_reg32_file_ops; | ||
28 | |||
29 | // Bus types are global symbols in the kernel | ||
30 | extern struct bus_type platform_bus_type; | ||
31 | struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; | ||
32 | unsigned int g_nvdebug_devices = 0; | ||
33 | |||
34 | // TEMP | ||
35 | irqreturn_t nvdebug_irq_tap(int irq_num, void * dev) { | ||
36 | printk(KERN_INFO "[nvdebug] Interrupt tap triggered on IRQ %d.\n", irq_num); | ||
37 | return IRQ_NONE; // We don't actually handle any interrupts. Pass them on. | ||
38 | } | ||
39 | |||
40 | // Find any and all NVIDIA GPUs in the system | ||
41 | // Note: This function fails if any of them are in a bad state | ||
42 | int probe_and_cache_device(void) { | ||
43 | // platform bus (SoC) iterators | ||
44 | struct device *dev = NULL; | ||
45 | struct device *temp_dev; | ||
46 | // PCI search iterator and search query | ||
47 | struct pci_dev *pcid = NULL; | ||
48 | // This query pattern is mirrored off nouveau | ||
49 | struct pci_device_id query = { | ||
50 | .vendor = NV_PCI_VENDOR, // Match NVIDIA devices | ||
51 | .device = PCI_ANY_ID, | ||
52 | .subvendor = PCI_ANY_ID, | ||
53 | .subdevice = PCI_ANY_ID, | ||
54 | .class_mask = 0xff << 16, | ||
55 | .class = PCI_BASE_CLASS_DISPLAY << 16, // Match display devs | ||
56 | }; | ||
57 | int i = 0; | ||
58 | // Search the platform bus for the first device that matches our name | ||
59 | // Search for GV10B (Jetson Xavier) | ||
60 | while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gv11b"))) | ||
61 | dev = temp_dev; | ||
62 | // Search for GP10B (Jetson TX2) | ||
63 | while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gp10b"))) | ||
64 | dev = temp_dev; | ||
65 | // TODO: Support other platform bus devices (gk20a, gm20b) | ||
66 | if (dev) { | ||
67 | struct nvgpu_os_linux *l; | ||
68 | mc_boot_0_t ids; | ||
69 | g_nvdebug_state[i].g = get_gk20a(dev); | ||
70 | l = container_of(g_nvdebug_state[i].g, struct nvgpu_os_linux, g); | ||
71 | g_nvdebug_state[i].regs = l->regs; | ||
72 | if (!g_nvdebug_state[i].regs) | ||
73 | return -EADDRNOTAVAIL; | ||
74 | ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0); | ||
75 | if (ids.raw == -1) | ||
76 | return -EADDRNOTAVAIL; | ||
77 | g_nvdebug_state[i].chip_id = ids.chip_id; | ||
78 | printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on platform bus and initialized.", | ||
79 | ids.chip_id, ARCH2NAME(ids.architecture)); | ||
80 | i++; | ||
81 | } | ||
82 | // Search the PCI bus and iterate through all matches | ||
83 | // FIXME: State rollback | ||
84 | while ((pcid = pci_get_dev_by_id(&query, pcid)) && i < NVDEBUG_MAX_DEVICES) { | ||
85 | mc_boot_0_t ids; | ||
86 | g_nvdebug_state[i].g = NULL; | ||
87 | // Map BAR0 (GPU control registers) | ||
88 | g_nvdebug_state[i].regs = pci_iomap(pcid, 0, 0); | ||
89 | if (!g_nvdebug_state[i].regs) { | ||
90 | pci_err(pcid, "[nvdebug] Unable to map BAR0 on this GPU\n"); | ||
91 | return -EADDRNOTAVAIL; | ||
92 | } | ||
93 | // Map BAR3 (CPU-accessible mappings of GPU DRAM) | ||
94 | g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, 0); | ||
95 | // Try mapping only the lower half of BAR3 on fail | ||
96 | // (vesafb may map the top half for display) | ||
97 | if (!g_nvdebug_state[i].bar3) | ||
98 | g_nvdebug_state[i].bar3 = pci_iomap(pcid, 3, pci_resource_len(pcid, 3)/2); | ||
99 | g_nvdebug_state[i].pcid = pcid; | ||
100 | ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0); | ||
101 | if (ids.raw == -1) { | ||
102 | pci_err(pcid, "[nvdebug] Unable to read config from Master Controller on this GPU\n"); | ||
103 | return -EADDRNOTAVAIL; | ||
104 | } | ||
105 | g_nvdebug_state[i].chip_id = ids.chip_id; | ||
106 | printk(KERN_INFO "[nvdebug] Chip ID %x (architecture %s) detected on PCI bus and initialized.", | ||
107 | ids.chip_id, ARCH2NAME(ids.architecture)); | ||
108 | // TEMP | ||
109 | if (request_irq(pcid->irq, nvdebug_irq_tap, IRQF_SHARED, "nvdebug tap", pcid)) { | ||
110 | printk(KERN_WARNING "[nvdebug] Unable to initialize IRQ tap\n"); | ||
111 | } | ||
112 | i++; | ||
113 | } | ||
114 | // Return the number of devices we found | ||
115 | if (i > 0) | ||
116 | return i; | ||
117 | return -ENODEV; | ||
118 | } | ||
119 | |||
120 | // Create files `/proc/gpu#/runlist#`, world readable | ||
121 | int create_runlist_files(int device_id, struct proc_dir_entry *dir) { | ||
122 | ptop_device_info_t info; | ||
123 | struct proc_dir_entry *rl_entry; | ||
124 | int i, rl_id; | ||
125 | char runlist_name[12]; | ||
126 | int max_rl_id = 0; // Always at least one runlist | ||
127 | // Figure out how many runlists there are by checking the device info | ||
128 | // registers. Runlists are always numbered sequentially, so we just have | ||
129 | // to find the highest-valued one and add 1 to get the number of runlists. | ||
130 | for (i = 0; i < NV_PTOP_DEVICE_INFO__SIZE_1; i++) { | ||
131 | info.raw = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_DEVICE_INFO(i)); | ||
132 | if (info.info_type != INFO_TYPE_ENUM || !info.runlist_is_valid) | ||
133 | continue; | ||
134 | if (info.runlist_enum > max_rl_id) | ||
135 | max_rl_id = info.runlist_enum; | ||
136 | } | ||
137 | // Create files to read each runlist. The read handling code looks at the | ||
138 | // PDE_DATA associated with the file to determine what the runlist ID is. | ||
139 | for (rl_id = 0; rl_id <= max_rl_id; rl_id++) { | ||
140 | snprintf(runlist_name, 12, "runlist%d", rl_id); | ||
141 | rl_entry = proc_create_data( | ||
142 | runlist_name, 0444, dir, &runlist_file_ops, | ||
143 | (void*)(uintptr_t)rl_id); | ||
144 | if (!rl_entry) | ||
145 | return -ENOMEM; | ||
146 | } | ||
147 | return 0; | ||
148 | } | ||
149 | |||
150 | // Create files /proc/gpu# | ||
151 | // TODO: Don't run this on unsupported GPUs | ||
152 | int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) { | ||
153 | char file_name[20]; | ||
154 | int i; | ||
155 | struct proc_dir_entry *gpc_tpc_mask_entry; | ||
156 | // Get a bitmask of which GPCs are disabled | ||
157 | uint32_t gpcs_mask = nvdebug_readl(&g_nvdebug_state[device_id], NV_FUSE_GPC); | ||
158 | // Get maximum number of enabled GPCs for this chip | ||
159 | uint32_t max_gpcs = nvdebug_readl(&g_nvdebug_state[device_id], NV_PTOP_SCAL_NUM_GPCS); | ||
160 | // For each enabled GPC, expose a mask of disabled TPCs | ||
161 | for (i = 0; i < max_gpcs; i++) { | ||
162 | // Do nothing if GPC is disabled | ||
163 | if ((1 << i) & gpcs_mask) | ||
164 | continue; | ||
165 | // If GPC is enabled, create an entry to read disabled TPCs mask | ||
166 | snprintf(file_name, 20, "gpc%d_tpc_mask", i); | ||
167 | gpc_tpc_mask_entry = proc_create_data( | ||
168 | file_name, 0444, dir, &nvdebug_read_reg32_file_ops, | ||
169 | (void*)(uintptr_t)NV_FUSE_TPC_FOR_GPC(i)); | ||
170 | if (!gpc_tpc_mask_entry) | ||
171 | return -ENOMEM; | ||
172 | } | ||
173 | return 0; | ||
174 | } | ||
28 | 175 | ||
29 | int __init nvdebug_init(void) { | 176 | int __init nvdebug_init(void) { |
30 | struct proc_dir_entry *rl_entry, *preempt_entry, *disable_channel_entry, | 177 | struct proc_dir_entry *dir, *preempt_entry, *disable_channel_entry, |
31 | *enable_channel_entry, *switch_to_tsg_entry; | 178 | *enable_channel_entry, *switch_to_tsg_entry, *device_info_entry, |
32 | // Create file `/proc/preempt_tsg`, world readable | 179 | *num_gpcs_entry; |
33 | rl_entry = proc_create("runlist", 0444, NULL, &runlist_file_ops); | 180 | int rl_create_err, tpc_masks_create_err; |
34 | // Create file `/proc/preempt_tsg`, world writable | 181 | // Check that an NVIDIA GPU is present and initialize g_nvdebug_state |
35 | preempt_entry = proc_create("preempt_tsg", 0222, NULL, &preempt_tsg_file_ops); | 182 | int res = probe_and_cache_device(); |
36 | // Create file `/proc/disable_channel`, world writable | 183 | if (res < 0) |
37 | disable_channel_entry = proc_create("disable_channel", 0222, NULL, &disable_channel_file_ops); | 184 | return res; |
38 | // Create file `/proc/enable_channel`, world writable | 185 | g_nvdebug_devices = res; |
39 | enable_channel_entry = proc_create("enable_channel", 0222, NULL, &enable_channel_file_ops); | 186 | // Create seperate ProcFS directories for each gpu |
40 | // Create file `/proc/switch_to_tsg`, world writable | 187 | while (res--) { |
41 | switch_to_tsg_entry = proc_create("switch_to_tsg", 0222, NULL, &switch_to_tsg_file_ops); | 188 | char device_id_str[7]; |
42 | // ProcFS entry creation only fails if out of memory | 189 | uintptr_t device_id = res; // This is uintptr as we abuse the *data field on proc_dir_entry to store the GPU id |
43 | if (!rl_entry || !preempt_entry || !disable_channel_entry || !enable_channel_entry || !switch_to_tsg_entry) { | 190 | // Create directory /proc/gpu# where # is the GPU number |
44 | remove_proc_entry("runlist", NULL); | 191 | snprintf(device_id_str, 7, "gpu%ld", device_id); |
45 | remove_proc_entry("preempt_tsg", NULL); | 192 | if (!(dir = proc_mkdir_data(device_id_str, 0555, NULL, (void*)device_id))) |
46 | remove_proc_entry("disable_channel", NULL); | 193 | goto out_nomem; |
47 | remove_proc_entry("enable_channel", NULL); | 194 | // Create files `/proc/gpu#/runlist#`, world readable |
48 | remove_proc_entry("switch_to_tsg", NULL); | 195 | rl_create_err = create_runlist_files(device_id, dir); |
49 | printk(KERN_ERR "[nvdebug] Unable to initialize procfs entries!\n"); | 196 | // Create files `/proc/gpu#/gpc#_tpc_mask`, world readable |
50 | return -ENOMEM; | 197 | tpc_masks_create_err = create_tpc_mask_files(device_id, dir); |
198 | // Create file `/proc/gpu#/preempt_tsg`, world writable | ||
199 | preempt_entry = proc_create_data( | ||
200 | "preempt_tsg", 0222, dir, &preempt_tsg_file_ops, | ||
201 | (void*)device_id); | ||
202 | // Create file `/proc/gpu#/disable_channel`, world writable | ||
203 | disable_channel_entry = proc_create_data( | ||
204 | "disable_channel", 0222, dir, &disable_channel_file_ops, | ||
205 | (void*)device_id); | ||
206 | // Create file `/proc/gpu#/enable_channel`, world writable | ||
207 | enable_channel_entry = proc_create_data( | ||
208 | "enable_channel", 0222, dir, &enable_channel_file_ops, | ||
209 | (void*)device_id); | ||
210 | // Create file `/proc/gpu#/switch_to_tsg`, world writable | ||
211 | switch_to_tsg_entry = proc_create_data( | ||
212 | "switch_to_tsg", 0222, dir, &switch_to_tsg_file_ops, | ||
213 | (void*)device_id); | ||
214 | // Create file `/proc/gpu#/device_info`, world readable | ||
215 | device_info_entry = proc_create_data( | ||
216 | "device_info", 0444, dir, &device_info_file_ops, | ||
217 | (void*)device_id); | ||
218 | // Create file `/proc/gpu#/num_gpcs`, world readable | ||
219 | num_gpcs_entry = proc_create_data( | ||
220 | "num_gpcs", 0444, dir, &nvdebug_read_reg32_file_ops, | ||
221 | (void*)NV_PTOP_SCAL_NUM_GPCS); | ||
222 | // Create file `/proc/gpu#/num_tpc_per_gpc`, world readable | ||
223 | num_gpcs_entry = proc_create_data( | ||
224 | "num_tpc_per_gpc", 0444, dir, &nvdebug_read_reg32_file_ops, | ||
225 | (void*)NV_PTOP_SCAL_NUM_TPC_PER_GPC); | ||
226 | // Create file `/proc/gpu#/num_ces`, world readable | ||
227 | num_gpcs_entry = proc_create_data( | ||
228 | "num_ces", 0444, dir, &nvdebug_read_reg32_file_ops, | ||
229 | (void*)NV_PTOP_SCAL_NUM_CES); | ||
230 | // Create file `/proc/gpu#/num_ces`, world readable | ||
231 | num_gpcs_entry = proc_create_data( | ||
232 | "gpc_mask", 0444, dir, &nvdebug_read_reg32_file_ops, | ||
233 | (void*)NV_FUSE_GPC); | ||
234 | // In both nouveau and nvgpu, the PCE_MAP register is only available on Volta+ | ||
235 | if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_VOLTA) { | ||
236 | // TODO: Redo to num_pces | ||
237 | // Create file `/proc/gpu#/pce_map`, world readable | ||
238 | num_gpcs_entry = proc_create_data( | ||
239 | "pce_map", 0444, dir, &nvdebug_read_reg32_file_ops, | ||
240 | (void*)NV_CE_PCE_MAP); | ||
241 | } | ||
242 | // ProcFS entry creation only fails if out of memory | ||
243 | if (rl_create_err || tpc_masks_create_err || !preempt_entry || | ||
244 | !disable_channel_entry || !enable_channel_entry || | ||
245 | !switch_to_tsg_entry || !device_info_entry || !num_gpcs_entry) | ||
246 | goto out_nomem; | ||
51 | } | 247 | } |
248 | // (See Makefile if you want to know the origin of GIT_HASH.) | ||
52 | printk(KERN_INFO "[nvdebug] Module version "GIT_HASH" initialized\n"); | 249 | printk(KERN_INFO "[nvdebug] Module version "GIT_HASH" initialized\n"); |
53 | return 0; | 250 | return 0; |
251 | out_nomem: | ||
252 | // Make sure to clear all ProcFS directories on error | ||
253 | while (res < g_nvdebug_devices) { | ||
254 | char device_id_str[7]; | ||
255 | snprintf(device_id_str, 7, "gpu%d", res); | ||
256 | remove_proc_subtree(device_id_str, NULL); | ||
257 | res++; | ||
258 | } | ||
259 | return -ENOMEM; | ||
54 | } | 260 | } |
55 | 261 | ||
56 | static void __exit nvdebug_exit(void) { | 262 | static void __exit nvdebug_exit(void) { |
57 | remove_proc_entry("runlist", NULL); | 263 | struct nvdebug_state *g; |
58 | remove_proc_entry("preempt_tsg", NULL); | 264 | // Deinitialize each device |
59 | remove_proc_entry("disable_channel", NULL); | 265 | while (g_nvdebug_devices--) { |
60 | remove_proc_entry("enable_channel", NULL); | 266 | // Remove procfs directory |
61 | remove_proc_entry("switch_to_tsg", NULL); | 267 | char device_id[7]; |
62 | printk(KERN_INFO "[nvdebug] Exiting...\n"); | 268 | snprintf(device_id, 7, "gpu%d", g_nvdebug_devices); |
269 | remove_proc_subtree(device_id, NULL); | ||
270 | // Free BAR mappings | ||
271 | g = &g_nvdebug_state[g_nvdebug_devices]; | ||
272 | if (g && g->regs) | ||
273 | pci_iounmap(g->pcid, g->regs); | ||
274 | if (g && g->bar2) | ||
275 | pci_iounmap(g->pcid, g->bar2); | ||
276 | // TEMP | ||
277 | free_irq(g->pcid->irq, g->pcid); | ||
278 | printk(KERN_INFO "[nvdebug] Chip ID %x deinitialized.", g->chip_id); | ||
279 | } | ||
280 | printk(KERN_INFO "[nvdebug] Module exit complete.\n"); | ||
63 | } | 281 | } |
64 | 282 | ||
65 | module_init(nvdebug_init); | 283 | module_init(nvdebug_init); |
@@ -1,122 +1,127 @@ | |||
1 | #include <linux/device.h> // For struct device, bus_find_device*(), struct bus_type | ||
2 | //#include <linux/iommu.h> // For struct iommu_domain | ||
3 | #include <linux/kernel.h> // Kernel types | 1 | #include <linux/kernel.h> // Kernel types |
4 | #include <asm/io.h> | ||
5 | 2 | ||
6 | #include "nvdebug.h" | 3 | #include "nvdebug.h" |
7 | 4 | ||
8 | // Bus types are global symbols in the kernel | ||
9 | extern struct bus_type platform_bus_type; | ||
10 | |||
11 | struct gk20a* get_live_gk20a(void) { | ||
12 | struct device *dev = NULL; | ||
13 | struct device *temp_dev; | ||
14 | struct gk20a *g; | ||
15 | struct nvgpu_os_linux *l; | ||
16 | // Get the last device that matches our name | ||
17 | while ((temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gv11b"))) { | ||
18 | dev = temp_dev; | ||
19 | printk(KERN_INFO "[nvdebug] Found a matching device %s\n", dev_name(dev)); | ||
20 | } | ||
21 | if (!dev) | ||
22 | return NULL; | ||
23 | g = get_gk20a(dev); | ||
24 | // The address pointed to `regs` + NV_PFIFO_RUNLIST_BASE seems to not be: | ||
25 | // - A GPU address (type is sysmem_coherent) | ||
26 | // - A physical address (dereferencing after ioremap crashes) | ||
27 | // - A kernel virtual address (dereferencing segfaults) | ||
28 | // So maybe it's some sort of custom thing? This is an address that the GPU | ||
29 | // can use, so it would make most sense for it to be a physical address. | ||
30 | // | ||
31 | // BUT, it can't possibly be a physical address, as it would refer to an | ||
32 | // address greater than the maximum one on our system (by a lot!). | ||
33 | // Maybe I'm reading the runlist base wrong? | ||
34 | // Aha, the driver calls it runlist_iova. Sounds like runlist I/O virtual | ||
35 | // address! So, what's this I/O address space? All I know is that it's what | ||
36 | // nvgpu_mem_get_addr() returns. That function returns the result of either: | ||
37 | // - gpu_phys_addr which is __nvgpu_sgl_phys on our platform which (?) | ||
38 | // converts an IPA to a PA? | ||
39 | // - nvgpu_mem_iommu_translate | ||
40 | // | ||
41 | // The original memory is allocated with nvgpu_dma_alloc_flags_sys(), which | ||
42 | // returns SYSMEM. | ||
43 | // | ||
44 | // To convert a physical address to a IOMMU address, we add a bit | ||
45 | // | ||
46 | // BUT, it turns out that it IS JUST A PHYSICAL ADDRESS! It wasn't working | ||
47 | // before because the GPU had simply gone to sleep and invalidated its | ||
48 | // register state, so nvgpu_readl() was simply returning garbage. | ||
49 | l = container_of(g, struct nvgpu_os_linux, g); | ||
50 | if (!l->regs) | ||
51 | return NULL; | ||
52 | return g; | ||
53 | } | ||
54 | |||
55 | /* Get runlist head and info (incl. length) | 5 | /* Get runlist head and info (incl. length) |
56 | @param rl_iter Location at which to store output | 6 | @param rl_iter Location at which to store output |
7 | @param rl_id Which runlist to obtain? | ||
57 | */ | 8 | */ |
58 | int get_runlist_iter(struct runlist_iter *rl_iter) { | 9 | int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl_iter) { |
59 | struct entry_tsg head; | 10 | runlist_base_t rl_base; |
60 | runlist_base_t rl_base; | 11 | runlist_info_t rl_info; |
61 | runlist_info_t rl_info; | 12 | u64 runlist_iova; |
62 | u64 runlist_iova; | 13 | *rl_iter = (struct runlist_iter){0}; |
63 | struct gk20a *g = get_live_gk20a(); | 14 | rl_base.raw = nvdebug_readl(g, NV_PFIFO_ENG_RUNLIST_BASE(rl_id)); |
64 | if (!g) | 15 | // Check that reads are working |
16 | if (rl_base.raw == -1) | ||
65 | return -EIO; | 17 | return -EIO; |
66 | rl_base.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST_BASE); | 18 | // The address pointed to `regs` + NV_PFIFO_RUNLIST_BASE seems to not be: |
67 | rl_info.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST); | 19 | // - A GPU address (type is sysmem_coherent) |
68 | runlist_iova = ((u64)rl_base.ptr) << 12; | 20 | // - A physical address (dereferencing after ioremap crashes) |
69 | printk(KERN_INFO "[nvdebug] Runlist ptr: %x, type: %d, raw: %x, IOVA: %px\n", | 21 | // - A kernel virtual address (dereferencing segfaults) |
70 | rl_base.ptr, rl_base.type, rl_base.raw, (void*)runlist_iova); | 22 | // So maybe it's some sort of custom thing? This is an address that the GPU |
71 | // TODO: Support reading video memory | 23 | // can use, so it would make most sense for it to be a physical address. |
72 | if (rl_base.type == TARGET_VID_MEM) { | 24 | // |
73 | printk(KERN_ERR "[nvdebug] Runlist is located in video memory. Access to video memory is unimplemented."); | 25 | // BUT, it can't possibly be a physical address, as it would refer to an |
74 | return -ENOTSUPP; | 26 | // address greater than the maximum one on our system (by a lot!). |
27 | // Maybe I'm reading the runlist base wrong? | ||
28 | // Aha, the driver calls it runlist_iova. Sounds like runlist I/O virtual | ||
29 | // address! So, what's this I/O address space? All I know is that it's what | ||
30 | // nvgpu_mem_get_addr() returns. That function returns the result of either: | ||
31 | // - gpu_phys_addr which is __nvgpu_sgl_phys on our platform which (?) | ||
32 | // converts an IPA to a PA? | ||
33 | // - nvgpu_mem_iommu_translate | ||
34 | // | ||
35 | // The original memory is allocated with nvgpu_dma_alloc_flags_sys(), which | ||
36 | // returns SYSMEM. | ||
37 | // | ||
38 | // To convert a physical address to a IOMMU address, we add a bit | ||
39 | // | ||
40 | // BUT, it turns out that it IS JUST A PHYSICAL ADDRESS! It wasn't working | ||
41 | // before because the GPU had simply gone to sleep and invalidated its | ||
42 | // register state, so nvgpu_readl() was simply returning garbage. | ||
43 | rl_info.raw = nvdebug_readl(g, NV_PFIFO_ENG_RUNLIST(rl_id)); | ||
44 | runlist_iova = ((u64)rl_base.ptr) << 12; | ||
45 | printk(KERN_INFO "[nvdebug] Runlist %d @ %llx in %s (config raw: %x)\n", | ||
46 | rl_id, runlist_iova, target_to_text(rl_base.target), rl_base.raw); | ||
47 | printk(KERN_INFO "[nvdebug] Runlist length %d, ID %d\n", rl_info.len, rl_info.id); | ||
48 | // Return early on an empty runlist | ||
49 | if (!rl_info.len) | ||
50 | return 0; | ||
51 | // If the runlist is in VID_MEM, search the BAR2/3 page tables for a mapping | ||
52 | if (rl_base.target == TARGET_VID_MEM) { | ||
53 | printk(KERN_WARNING "[nvdebug] Runlist is located in video memory. Access to video memory is experimental."); | ||
54 | bar_config_block_t bar1_block, bar2_block; | ||
55 | bar1_block.raw = nvdebug_readl(g, NV_PBUS_BAR1_BLOCK); | ||
56 | printk(KERN_INFO "[nvdebug] BAR1 inst block @ %llx in %s's %s address space.\n", ((u64)bar1_block.ptr) << 12, target_to_text(bar1_block.target), bar1_block.is_virtual ? "virtual" : "physical"); | ||
57 | bar2_block.raw = nvdebug_readl(g, NV_PBUS_BAR2_BLOCK); | ||
58 | printk(KERN_INFO "[nvdebug] BAR2 inst block @ %llx in %s's %s address space.\n", ((u64)bar2_block.ptr) << 12, target_to_text(bar2_block.target), bar1_block.is_virtual ? "virtual" : "physical"); | ||
59 | uint32_t bar_inst_pramin_offset = vram2PRAMIN(g, (uint64_t)bar2_block.ptr << 12); | ||
60 | if (!bar_inst_pramin_offset) { | ||
61 | printk(KERN_WARNING "[nvdebug] Unable to find instance block for BAR2/3 in the current NV_PRAMIN window. VRAM inaccessible.\n"); | ||
62 | return -EOPNOTSUPP; | ||
63 | } | ||
64 | /* TODO: Support BAR1? | ||
65 | bar_inst_pramin_offset = vram2PRAMIN(g, bar1_block.ptr << 12); | ||
66 | if (!bar_inst_pramin_offset) { | ||
67 | printk(KERN_WARNING "[nvdebug] Unable to find instance block for BAR1 in the current NV_PRAMIN window. VRAM inaccessible.\n"); | ||
68 | return -EOPNOTSUPP; | ||
69 | }*/ | ||
70 | // Instance blocks (size == 1kb) contain many things, but we only care about | ||
71 | // the section which describes the location of the page directory (page table) | ||
72 | uint32_t bar_pdb_config_pramin_offset = bar_inst_pramin_offset + NV_PRAMIN_PDB_CONFIG_OFF; | ||
73 | page_dir_config_t pd_config; | ||
74 | pd_config.raw = nvdebug_readq(g, bar_pdb_config_pramin_offset + NV_PRAMIN); | ||
75 | uint64_t bar_pdb_vram_addr = pd_config.page_dir_hi; | ||
76 | bar_pdb_vram_addr <<= 20; | ||
77 | bar_pdb_vram_addr |= pd_config.page_dir_lo; | ||
78 | bar_pdb_vram_addr <<= 12; | ||
79 | printk(KERN_INFO "[nvdebug] BAR2 PDB @ %llx in %s of version %s (config raw: %llx)\n", bar_pdb_vram_addr, target_to_text(pd_config.target), pd_config.is_ver2 ? "2" : "1", pd_config.raw); | ||
80 | // TODO: SYSMEM support for page table location | ||
81 | if (pd_config.target != TARGET_VID_MEM) { | ||
82 | printk(KERN_WARNING "[nvdebug] BAR2 PDB is in an unsupported location.\n"); | ||
83 | return -EOPNOTSUPP; | ||
84 | } | ||
85 | uint32_t bar_pdb_pramin_offset = vram2PRAMIN(g, bar_pdb_vram_addr); | ||
86 | if (!bar_pdb_pramin_offset) { | ||
87 | printk(KERN_WARNING "[nvdebug] Unable to find page directory BAR2/3 in the current NV_PRAMIN window. VRAM inaccessible.\n"); | ||
88 | return -EOPNOTSUPP; | ||
89 | } | ||
90 | uint64_t runlist_bar_vaddr; | ||
91 | if (pd_config.is_ver2) | ||
92 | runlist_bar_vaddr = search_page_directory(g, g->regs + NV_PRAMIN + bar_pdb_pramin_offset, phy2PRAMIN, runlist_iova); | ||
93 | else | ||
94 | runlist_bar_vaddr = search_v1_page_directory(g, g->regs + NV_PRAMIN + bar_pdb_pramin_offset, phy2PRAMIN, runlist_iova); | ||
95 | if (!runlist_bar_vaddr) { | ||
96 | printk(KERN_WARNING "[nvdebug] Unable to find runlist mapping in BAR2/3 page tables.\n"); | ||
97 | return -EOPNOTSUPP; | ||
98 | } | ||
99 | printk(KERN_INFO "[nvdebug] Runlist @ %llx in BAR2 virtual address space.\n", runlist_bar_vaddr); | ||
100 | /* XXX: Old test code | ||
101 | uint32_t bar2_pd_pramin_offset = vram_to_pramin_off(bar2_pd); | ||
102 | //walk_pd_subtree(bar2_pd_pramin_offset); | ||
103 | uint64_t runlist_bar2_vaddr = search_pd_subtree(bar2_pd_pramin_offset, runlist_iova); | ||
104 | page_dir_entry_t pde_0; | ||
105 | pde_0.raw = nvdebug_readl(g, NV_PRAMIN + bar2_pd_pramin_offset); | ||
106 | uint32_t pde_1 = nvdebug_readl(g, NV_PRAMIN + vram_to_pramin_off(((u64)pde_0.addr) << 12)); | ||
107 | uint64_t pde_bar2_vaddr = search_pd_subtree(bar2_pd_pramin_offset, ((u64)pde_0.addr) << 12); | ||
108 | uint32_t pde_2 = readl(g->bar3 + pde_bar2_vaddr); | ||
109 | printk(KERN_INFO "[nvdebug] PDE0 via PRAMIN: %x, via BAR3: %x\n", pde_1, pde_2); | ||
110 | */ | ||
111 | if (!g->bar3) { | ||
112 | printk(KERN_WARNING "[nvdebug] BAR2/3 not mapped.\n"); | ||
113 | return -ENODEV; | ||
114 | } | ||
115 | rl_iter->curr_entry = g->bar2 + runlist_bar_vaddr; | ||
116 | } else { | ||
117 | // Directly access the runlist if stored in SYS_MEM (physically addressed) | ||
118 | rl_iter->curr_entry = phys_to_virt(runlist_iova); | ||
75 | } | 119 | } |
76 | // Segfaults | 120 | rl_iter->rl_info = rl_info; |
77 | //u32 attempted_read = ioread32(runlist_iova); | 121 | return 0; |
78 | //printk(KERN_INFO "[nvdebug] first word of runlist: %0x\n", attempted_read); | ||
79 | |||
80 | // Errors out | ||
81 | //u32* virt_rt_addr = ioremap(phys_rl_addr, sizeof(struct entry_tsg)); | ||
82 | //printk(KERN_INFO "[nvdebug] Runlist virt_addr: %px\n", virt_rt_addr); | ||
83 | |||
84 | /* Overcomplicated? | ||
85 | struct iommu_domain *domain = iommu_get_domain_for_dev(dev); | ||
86 | if (!domain) { | ||
87 | printk(KERN_INFO "[nvdebug] No IOMMU domain!\n"); | ||
88 | return -EIO; | ||
89 | } | ||
90 | u64 phys_addr = platform_bus_type.iommu_ops->iova_to_phys(domain, runlist_iova); | ||
91 | printk(KERN_INFO "[nvdebug] Runlist PA: %px\n", phys_addr); | ||
92 | */ | ||
93 | |||
94 | printk(KERN_INFO "[nvdebug] Runlist phys_to_virt: %px\n", (void*)phys_to_virt(runlist_iova)); | ||
95 | printk(KERN_INFO "[nvdebug] Runlist *phys_to_virt: %x\n", *(u32*)phys_to_virt(runlist_iova)); | ||
96 | head = *(struct entry_tsg*)phys_to_virt(runlist_iova); | ||
97 | |||
98 | rl_iter->curr_tsg = (struct entry_tsg*)phys_to_virt(runlist_iova); | ||
99 | rl_iter->rl_info = rl_info; | ||
100 | return 0; | ||
101 | //printk(KERN_INFO "[nvdebug] entry_type: %d\n", head.entry_type); | ||
102 | //printk(KERN_INFO "[nvdebug] timeslice_scale: %d\n", head.timeslice_scale); | ||
103 | //printk(KERN_INFO "[nvdebug] timeslice_timeout: %d\n", head.timeslice_timeout); | ||
104 | //printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length); | ||
105 | //printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid); | ||
106 | |||
107 | //printk(KERN_INFO "[nvdebug] Mem base phys: %p\n", (void*)virt_to_phys((void*)0xffffffc000000000ULL)); | ||
108 | //printk(KERN_INFO "[nvdebug] Mem end phys: %p\n", (void*)virt_to_phys((void*)0xffffffc400000000ULL)); | ||
109 | //printk(KERN_INFO "[nvdebug] Runlist *virt_addr: %x\n", readl(virt_rt_addr)); // This crashes | ||
110 | //read_bytes(&head, virt_rt_addr, sizeof(struct entry_tsg)); | ||
111 | /*printk(KERN_INFO "[nvdebug] entry_type: %d\n", head.entry_type); | ||
112 | printk(KERN_INFO "[nvdebug] timeslice_scale: %d\n", head.timeslice_scale); | ||
113 | printk(KERN_INFO "[nvdebug] timeslice_timeout: %d\n", head.timeslice_timeout); | ||
114 | printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length); | ||
115 | printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid); */ | ||
116 | } | 122 | } |
117 | 123 | ||
118 | int preempt_tsg(uint32_t tsg_id) { | 124 | int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id) { |
119 | struct gk20a *g = get_live_gk20a(); | ||
120 | runlist_info_t rl_info; | 125 | runlist_info_t rl_info; |
121 | pfifo_preempt_t pfifo_preempt; | 126 | pfifo_preempt_t pfifo_preempt; |
122 | runlist_disable_t rl_disable; | 127 | runlist_disable_t rl_disable; |
diff --git a/runlist_procfs.c b/runlist_procfs.c index 411f844..a6b0d94 100644 --- a/runlist_procfs.c +++ b/runlist_procfs.c | |||
@@ -6,7 +6,14 @@ | |||
6 | #define RUNLIST_PROCFS_NAME "runlist" | 6 | #define RUNLIST_PROCFS_NAME "runlist" |
7 | #define DETAILED_CHANNEL_INFO | 7 | #define DETAILED_CHANNEL_INFO |
8 | 8 | ||
9 | static int runlist_detail_seq_show_chan(struct seq_file *s, struct gk20a *g, uint32_t chid) { | 9 | /* Print channel details using PCCSR (Programmable Channel Control System RAM?) |
10 | * @param s Pointer to state from seq_file subsystem to pass to seq_printf | ||
11 | * @param g Pointer to our internal GPU state | ||
12 | * @param chid ID of channel to print details on, range [0, 512) | ||
13 | * @param prefix Text string to prefix each line with, or empty string | ||
14 | */ | ||
15 | #ifdef DETAILED_CHANNEL_INFO | ||
16 | static int runlist_detail_seq_show_chan(struct seq_file *s, struct nvdebug_state *g, uint32_t chid, char *prefix) { | ||
10 | channel_ctrl_t chan; | 17 | channel_ctrl_t chan; |
11 | char *loc_txt; | 18 | char *loc_txt; |
12 | u64 instance_ptr; | 19 | u64 instance_ptr; |
@@ -16,23 +23,37 @@ static int runlist_detail_seq_show_chan(struct seq_file *s, struct gk20a *g, uin | |||
16 | return -EIO; | 23 | return -EIO; |
17 | instance_ptr = chan.inst_ptr; | 24 | instance_ptr = chan.inst_ptr; |
18 | instance_ptr <<= 12; | 25 | instance_ptr <<= 12; |
19 | seq_printf(s, " +- Channel Info %-4d -+\n", chid); | 26 | seq_printf(s, "%s+- Channel Info %-4d -+\n", prefix, chid); |
20 | seq_printf(s, " | Enabled: %d|\n", chan.enable); | 27 | seq_printf(s, "%s| Enabled: %d|\n", prefix, chan.enable); |
21 | seq_printf(s, " | Next: %d|\n", chan.next); | 28 | seq_printf(s, "%s| Next: %d|\n", prefix, chan.next); |
22 | seq_printf(s, " | Force CTX Reload: %d|\n", chan.force_ctx_reload); | 29 | seq_printf(s, "%s| Force CTX Reload: %d|\n", prefix, chan.force_ctx_reload); |
23 | seq_printf(s, " | Enable set: %d|\n", chan.enable_set); | 30 | seq_printf(s, "%s| Enable set: %d|\n", prefix, chan.enable_set); |
24 | seq_printf(s, " | Enable clear: %d|\n", chan.enable_clear); | 31 | seq_printf(s, "%s| Enable clear: %d|\n", prefix, chan.enable_clear); |
25 | seq_printf(s, " | PBDMA Faulted: %d|\n", chan.pbdma_faulted); | 32 | seq_printf(s, "%s| PBDMA Faulted: %d|\n", prefix, chan.pbdma_faulted); |
26 | seq_printf(s, " | ENG Faulted: %d|\n", chan.eng_faulted); | 33 | seq_printf(s, "%s| ENG Faulted: %d|\n", prefix, chan.eng_faulted); |
27 | seq_printf(s, " | Status: %2d|\n", chan.status); | 34 | seq_printf(s, "%s| Status: %2d|\n", prefix, chan.status); |
28 | seq_printf(s, " | Busy: %d|\n", chan.busy); | 35 | seq_printf(s, "%s| Busy: %d|\n", prefix, chan.busy); |
29 | seq_printf(s, " | Instance PTR: |\n"); | 36 | seq_printf(s, "%s| Instance PTR: |\n", prefix); |
30 | seq_printf(s, " | %#018llx |\n", instance_ptr); | 37 | seq_printf(s, "%s| %#018llx |\n", prefix, instance_ptr); |
31 | seq_printf(s, " | %-20s|\n", loc_txt); | 38 | seq_printf(s, "%s| %-20s|\n", prefix, loc_txt); |
32 | seq_printf(s, " | Instance bound: %d|\n", chan.inst_bind); | 39 | seq_printf(s, "%s| Instance bound: %d|\n", prefix, chan.inst_bind); |
33 | seq_printf(s, " +---------------------+\n"); | 40 | // START TEMP |
41 | // "runlist_id -1 is synonym for the ENGINE_GR_GK20A runlist id" | ||
42 | // GR, GRCE, and ASYNC_CE | ||
43 | // Note that this appears to be broken?? | ||
44 | // Peek into the channel instance RAM | ||
45 | if (chan.inst_target == TARGET_SYS_MEM_COHERENT) { | ||
46 | seq_printf(s, "%s| Target Engine: %2d|\n", prefix, *(uint32_t*)phys_to_virt(instance_ptr + 4/*bytes for 32bits*/*43/*NV_RAMFC_TARGET*/) & 0x1f); | ||
47 | seq_printf(s, "%s| PDB LO: %#08x|\n", prefix, *(uint32_t*)phys_to_virt(instance_ptr + 4/*bytes for 32bits*/*128/*NV_RAMIN_PAGE_DIR_BASE_LO*/) & 0xfffff000); | ||
48 | seq_printf(s, "%s| Num subcontexts: %2ld|\n", prefix, hweight64(*(uint64_t*)phys_to_virt(instance_ptr + 4/*bytes for 32bits*/*166/*NV_RAMIN_SC_PDB_VALID*/))); | ||
49 | // This appears to be unset on Xavier | ||
50 | //seq_printf(s, "%s| PAS ID: %8ld|\n", prefix, *(uint32_t*)phys_to_virt(instance_ptr + 4/*bytes for 32bits*/*135/*NV_RAMIN_PASID*/) & 0xfffff); | ||
51 | } | ||
52 | // END TEMP | ||
53 | seq_printf(s, "%s+---------------------+\n", prefix); | ||
34 | return 0; | 54 | return 0; |
35 | } | 55 | } |
56 | #endif | ||
36 | 57 | ||
37 | #if LINUX_VERSION_CODE < KERNEL_VERSION(4,19,0) | 58 | #if LINUX_VERSION_CODE < KERNEL_VERSION(4,19,0) |
38 | // Bug workaround. See comment in runlist_file_seq_start() | 59 | // Bug workaround. See comment in runlist_file_seq_start() |
@@ -41,10 +62,14 @@ static loff_t pos_fixup; | |||
41 | 62 | ||
42 | static void *runlist_file_seq_start(struct seq_file *s, loff_t *pos) { | 63 | static void *runlist_file_seq_start(struct seq_file *s, loff_t *pos) { |
43 | static struct runlist_iter rl_iter; | 64 | static struct runlist_iter rl_iter; |
65 | struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(s->file)]; | ||
44 | // *pos == 0 for first call after read of file | 66 | // *pos == 0 for first call after read of file |
45 | if (*pos == 0) { | 67 | if (*pos == 0) { |
46 | int err = get_runlist_iter(&rl_iter); | 68 | int err = get_runlist_iter(g, seq2gpuidx(s), &rl_iter); |
47 | if (err) | 69 | if (err) |
70 | return ERR_PTR(err); | ||
71 | // Don't try to print an empty runlist | ||
72 | if (rl_iter.rl_info.len <= 0) | ||
48 | return NULL; | 73 | return NULL; |
49 | return &rl_iter; | 74 | return &rl_iter; |
50 | } | 75 | } |
@@ -68,12 +93,13 @@ static void* runlist_file_seq_next(struct seq_file *s, void *raw_rl_iter, | |||
68 | loff_t *pos) { | 93 | loff_t *pos) { |
69 | struct runlist_iter* rl_iter = raw_rl_iter; | 94 | struct runlist_iter* rl_iter = raw_rl_iter; |
70 | void *ret = NULL; | 95 | void *ret = NULL; |
71 | // Advance by one TSG + channels under last TSG | 96 | struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(s->file)]; |
72 | *pos += 1 + rl_iter->curr_tsg->tsg_length; | 97 | // Advance by one TSG or channel |
98 | (*pos)++; | ||
99 | rl_iter->curr_entry += NV_RL_ENTRY_SIZE(g); | ||
73 | // Verify we haven't reached the end of the runlist | 100 | // Verify we haven't reached the end of the runlist |
74 | // rl_info.len is the num of tsg entries + total num of channel entries | 101 | // rl_info.len is the num of tsg entries + total num of channel entries |
75 | if (*pos < rl_iter->rl_info.len) { | 102 | if (*pos < rl_iter->rl_info.len) { |
76 | rl_iter->curr_tsg = next_tsg(rl_iter->curr_tsg); | ||
77 | ret = rl_iter; | 103 | ret = rl_iter; |
78 | } | 104 | } |
79 | #if LINUX_VERSION_CODE < KERNEL_VERSION(4,19,0) | 105 | #if LINUX_VERSION_CODE < KERNEL_VERSION(4,19,0) |
@@ -88,57 +114,57 @@ static void runlist_file_seq_stop(struct seq_file *s, void *raw_rl_iter) { | |||
88 | } | 114 | } |
89 | 115 | ||
90 | static int runlist_file_seq_show(struct seq_file *s, void *raw_rl_iter) { | 116 | static int runlist_file_seq_show(struct seq_file *s, void *raw_rl_iter) { |
91 | struct entry_tsg* tsg = ((struct runlist_iter*)raw_rl_iter)->curr_tsg; | 117 | struct runlist_iter *rl_iter = raw_rl_iter; |
92 | struct runlist_chan* chan; | 118 | void *entry = rl_iter->curr_entry; |
93 | struct gk20a *g = get_live_gk20a(); | 119 | struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(s->file)]; |
94 | if (!g) | 120 | if (entry_type(g, entry) == ENTRY_TYPE_TSG) { |
95 | return -EIO; | 121 | if (rl_iter->channels_left_in_tsg) { |
96 | if (tsg->entry_type != ENTRY_TYPE_TSG) { | 122 | printk(KERN_WARNING "[nvdebug] Found a TSG @ %px when %d channels were still expected under the previous TSG in the runlist!\n", entry, rl_iter->channels_left_in_tsg); |
97 | printk(KERN_WARNING "[nvdebug] Attempted to print non-TSG in tsg print logic!\n"); | 123 | return -EIO; |
98 | return -EIO; | 124 | } |
99 | } | 125 | rl_iter->channels_left_in_tsg = tsg_length(g, entry); |
100 | seq_printf(s, "+---- TSG Entry %-2d----+\n", tsg->tsgid); | 126 | seq_printf(s, "+---- TSG Entry %-3d---+\n", tsgid(g, entry)); |
101 | seq_printf(s, "| Scale: %-13d|\n", tsg->timeslice_scale); | 127 | seq_printf(s, "| Scale: %-13d|\n", timeslice_scale(g, entry)); |
102 | seq_printf(s, "| Timeout: %-11d|\n", tsg->timeslice_timeout); | 128 | seq_printf(s, "| Timeout: %-11d|\n", timeslice_timeout(g, entry)); |
103 | seq_printf(s, "+---------------------+\n"); | 129 | seq_printf(s, "| Length: %-12d|\n", tsg_length(g, entry)); |
104 | for_chan_in_tsg(chan, tsg) { | 130 | seq_printf(s, "+---------------------+\n"); |
131 | } else { | ||
132 | char *indt = ""; | ||
105 | #ifndef DETAILED_CHANNEL_INFO | 133 | #ifndef DETAILED_CHANNEL_INFO |
106 | char* loc_txt; | 134 | u64 instance_ptr = 0; |
107 | u64 instance_ptr; | ||
108 | #endif | 135 | #endif |
109 | if (chan->entry_type != ENTRY_TYPE_CHAN) { | 136 | if (rl_iter->channels_left_in_tsg) { |
110 | printk(KERN_WARNING "[nvdebug] Attempted to print non-channel in channel print logic!\n"); | 137 | indt = " "; |
111 | return -EIO; | 138 | rl_iter->channels_left_in_tsg--; |
112 | } | 139 | } |
113 | #ifdef DETAILED_CHANNEL_INFO | 140 | #ifdef DETAILED_CHANNEL_INFO |
114 | runlist_detail_seq_show_chan(s, g, chan->chid); | 141 | runlist_detail_seq_show_chan(s, g, chid(g, entry), indt); |
115 | #else | 142 | #else |
116 | loc_txt = target_to_text(chan->inst_target); | ||
117 | if (!loc_txt) { | ||
118 | printk(KERN_WARNING "[nvdebug] Invalid apature in channel print logic!\n"); | ||
119 | return -EIO; | ||
120 | } | ||
121 | // Reconstruct pointer to channel instance block | 143 | // Reconstruct pointer to channel instance block |
122 | instance_ptr = chan->inst_ptr_hi; | 144 | if (g->chip_id >= NV_CHIP_ID_VOLTA) { |
123 | instance_ptr <<= 32; | 145 | instance_ptr = ((struct gv100_runlist_chan*)entry)->inst_ptr_hi; |
124 | instance_ptr |= chan->inst_ptr_lo << 12; | 146 | instance_ptr <<= 32; |
125 | 147 | } | |
126 | seq_printf(s, " +- Channel Entry %-4d-+\n", chan->chid); | 148 | instance_ptr |= inst_ptr_lo(g, entry) << 12; |
127 | seq_printf(s, " | Runqueue Selector: %d|\n", chan->runqueue_selector); | 149 | |
128 | seq_printf(s, " | Instance PTR: |\n"); | 150 | seq_printf(s, "%s+- Channel Entry %-4d-+\n", indt, chid(g, entry)); |
129 | seq_printf(s, " | %#018llx |\n", instance_ptr); | 151 | if (g->chip_id >= NV_CHIP_ID_VOLTA) |
130 | seq_printf(s, " | %-20s|\n", loc_txt); | 152 | seq_printf(s, "%s| Runqueue Selector: %d|\n", indt, |
131 | seq_printf(s, " +---------------------+\n"); | 153 | ((struct gv100_runlist_chan*)entry)->runqueue_selector); |
154 | seq_printf(s, "%s| Instance PTR: |\n", indt); | ||
155 | seq_printf(s, "%s| %#018llx |\n", indt, instance_ptr); | ||
156 | seq_printf(s, "%s| %-20s|\n", indt, target_to_text(inst_target(g, entry))); | ||
157 | seq_printf(s, "%s+---------------------+\n", indt); | ||
132 | #endif | 158 | #endif |
133 | } | 159 | } |
134 | return 0; | 160 | return 0; |
135 | } | 161 | } |
136 | 162 | ||
137 | static const struct seq_operations runlist_file_seq_ops = { | 163 | static const struct seq_operations runlist_file_seq_ops = { |
138 | .start = runlist_file_seq_start, | 164 | .start = runlist_file_seq_start, |
139 | .next = runlist_file_seq_next, | 165 | .next = runlist_file_seq_next, |
140 | .stop = runlist_file_seq_stop, | 166 | .stop = runlist_file_seq_stop, |
141 | .show = runlist_file_seq_show, | 167 | .show = runlist_file_seq_show, |
142 | }; | 168 | }; |
143 | 169 | ||
144 | static int runlist_file_open(struct inode *inode, struct file *f) { | 170 | static int runlist_file_open(struct inode *inode, struct file *f) { |
@@ -157,6 +183,7 @@ ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer, | |||
157 | uint32_t target_tsgid; | 183 | uint32_t target_tsgid; |
158 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | 184 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec |
159 | int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid); | 185 | int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid); |
186 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; | ||
160 | if (err) | 187 | if (err) |
161 | return err; | 188 | return err; |
162 | 189 | ||
@@ -165,7 +192,7 @@ ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer, | |||
165 | return -ERANGE; | 192 | return -ERANGE; |
166 | 193 | ||
167 | // Execute preemption | 194 | // Execute preemption |
168 | err = preempt_tsg(target_tsgid); | 195 | err = preempt_tsg(g, target_tsgid); |
169 | if (err) | 196 | if (err) |
170 | return err; | 197 | return err; |
171 | 198 | ||
@@ -181,9 +208,9 @@ ssize_t disable_channel_file_write(struct file *f, const char __user *buffer, | |||
181 | uint32_t target_channel; | 208 | uint32_t target_channel; |
182 | channel_ctrl_t chan; | 209 | channel_ctrl_t chan; |
183 | int err; | 210 | int err; |
184 | struct gk20a *g = get_live_gk20a(); | 211 | runlist_info_t rl_info; |
185 | if (!g) | 212 | runlist_disable_t rl_disable; |
186 | return -EIO; | 213 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; |
187 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | 214 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec |
188 | err = kstrtou32_from_user(buffer, count, 0, &target_channel); | 215 | err = kstrtou32_from_user(buffer, count, 0, &target_channel); |
189 | if (err) | 216 | if (err) |
@@ -195,7 +222,16 @@ ssize_t disable_channel_file_write(struct file *f, const char __user *buffer, | |||
195 | // Disable channel | 222 | // Disable channel |
196 | chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel)); | 223 | chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel)); |
197 | chan.enable_clear = true; | 224 | chan.enable_clear = true; |
225 | // disable sched | ||
226 | rl_info.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST); | ||
227 | rl_disable.raw = nvdebug_readl(g, NV_PFIFO_SCHED_DISABLE); | ||
228 | rl_disable.raw |= BIT(rl_info.id); | ||
229 | nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw); | ||
230 | // disable chan | ||
198 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw); | 231 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw); |
232 | // enable sched | ||
233 | rl_disable.raw &= ~BIT(rl_info.id); | ||
234 | nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw); | ||
199 | 235 | ||
200 | return count; | 236 | return count; |
201 | } | 237 | } |
@@ -209,9 +245,7 @@ ssize_t enable_channel_file_write(struct file *f, const char __user *buffer, | |||
209 | uint32_t target_channel; | 245 | uint32_t target_channel; |
210 | channel_ctrl_t chan; | 246 | channel_ctrl_t chan; |
211 | int err; | 247 | int err; |
212 | struct gk20a *g = get_live_gk20a(); | 248 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; |
213 | if (!g) | ||
214 | return -EIO; | ||
215 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | 249 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec |
216 | err = kstrtou32_from_user(buffer, count, 0, &target_channel); | 250 | err = kstrtou32_from_user(buffer, count, 0, &target_channel); |
217 | if (err) | 251 | if (err) |
@@ -235,14 +269,12 @@ const struct file_operations enable_channel_file_ops = { | |||
235 | ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer, | 269 | ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer, |
236 | size_t count, loff_t *off) { | 270 | size_t count, loff_t *off) { |
237 | uint32_t target_tsgid; | 271 | uint32_t target_tsgid; |
238 | struct runlist_chan* chan; | 272 | struct gv100_runlist_chan* chan; |
239 | channel_ctrl_t chan_ctl; | 273 | channel_ctrl_t chan_ctl; |
240 | struct runlist_iter rl_iter; | 274 | struct runlist_iter rl_iter; |
241 | int err; | 275 | int err; |
242 | loff_t pos = 0; | 276 | loff_t pos = 0; |
243 | struct gk20a *g = get_live_gk20a(); | 277 | struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)]; |
244 | if (!g) | ||
245 | return -EIO; | ||
246 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec | 278 | // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec |
247 | err = kstrtou32_from_user(buffer, count, 0, &target_tsgid); | 279 | err = kstrtou32_from_user(buffer, count, 0, &target_tsgid); |
248 | if (err) | 280 | if (err) |
@@ -251,32 +283,34 @@ ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer, | |||
251 | if (target_tsgid > MAX_TSGID) | 283 | if (target_tsgid > MAX_TSGID) |
252 | return -ERANGE; | 284 | return -ERANGE; |
253 | 285 | ||
254 | err = get_runlist_iter(&rl_iter); | 286 | err = get_runlist_iter(g, 0, &rl_iter); |
255 | if (err) | 287 | if (err) |
256 | return err; | 288 | return err; |
257 | 289 | ||
258 | // Iterate through all TSGs | 290 | // Iterate through all TSGs |
259 | while (pos < rl_iter.rl_info.len) { | 291 | while (pos < rl_iter.rl_info.len) { |
260 | if (rl_iter.curr_tsg->tsgid == target_tsgid) { | 292 | if (tsgid(g, rl_iter.curr_entry) == target_tsgid) { |
261 | // Enable channels of target TSG | 293 | // Enable channels of target TSG |
262 | for_chan_in_tsg(chan, rl_iter.curr_tsg) { | 294 | for_chan_in_tsg(g, chan, rl_iter.curr_entry) { |
263 | chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid)); | 295 | chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid)); |
264 | chan_ctl.enable_set = true; | 296 | chan_ctl.enable_set = true; |
265 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw); | 297 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw); |
266 | } | 298 | } |
267 | } else { | 299 | } else { |
300 | // XXX: Fix for bare channels. Maybe a "for_chan_until_tsg" macro? | ||
268 | // Disable all other channels | 301 | // Disable all other channels |
269 | for_chan_in_tsg(chan, rl_iter.curr_tsg) { | 302 | // (This is how the Jetson nvgpu driver disables TSGs) |
303 | for_chan_in_tsg(g, chan, rl_iter.curr_entry) { | ||
270 | chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid)); | 304 | chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid)); |
271 | chan_ctl.enable_clear = true; | 305 | chan_ctl.enable_clear = true; |
272 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw); | 306 | nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw); |
273 | } | 307 | } |
274 | } | 308 | } |
275 | pos += 1 + rl_iter.curr_tsg->tsg_length; | 309 | pos += 1 + tsg_length(g, rl_iter.curr_entry); |
276 | rl_iter.curr_tsg = next_tsg(rl_iter.curr_tsg); | 310 | rl_iter.curr_entry = next_tsg(g, rl_iter.curr_entry); |
277 | } | 311 | } |
278 | // Switch to next TSG with active channels (should be our TSG) | 312 | // Switch to next TSG with active channels (should be our TSG) |
279 | err = preempt_tsg(target_tsgid); | 313 | err = preempt_tsg(g, target_tsgid); |
280 | if (err) | 314 | if (err) |
281 | return err; | 315 | return err; |
282 | 316 | ||
@@ -0,0 +1,80 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * Helpful private functions copied from elsewhere in the kernel tree | ||
4 | * DO NOT MODIFY | ||
5 | */ | ||
6 | #include <linux/version.h> | ||
7 | |||
8 | // Functions from drivers/pci/pci.h | ||
9 | /** | ||
10 | * pci_match_one_device - Tell if a PCI device structure has a matching | ||
11 | * PCI device id structure | ||
12 | * @id: single PCI device id structure to match | ||
13 | * @dev: the PCI device structure to match against | ||
14 | * | ||
15 | * Returns the matching pci_device_id structure or %NULL if there is no match. | ||
16 | */ | ||
17 | static inline const struct pci_device_id * | ||
18 | pci_match_one_device(const struct pci_device_id *id, const struct pci_dev *dev) | ||
19 | { | ||
20 | if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) && | ||
21 | (id->device == PCI_ANY_ID || id->device == dev->device) && | ||
22 | (id->subvendor == PCI_ANY_ID || id->subvendor == dev->subsystem_vendor) && | ||
23 | (id->subdevice == PCI_ANY_ID || id->subdevice == dev->subsystem_device) && | ||
24 | !((id->class ^ dev->class) & id->class_mask)) | ||
25 | return id; | ||
26 | return NULL; | ||
27 | } | ||
28 | |||
29 | // Functions from drivers/pci/search.h | ||
30 | #include <linux/device.h> | ||
31 | #include <linux/pci.h> | ||
32 | extern struct bus_type pci_bus_type; | ||
33 | |||
34 | #if LINUX_VERSION_CODE < KERNEL_VERSION(5,3,0) | ||
35 | static int match_pci_dev_by_id(struct device *dev, void *data) | ||
36 | #else | ||
37 | static int match_pci_dev_by_id(struct device *dev, const void *data) | ||
38 | #endif | ||
39 | { | ||
40 | struct pci_dev *pdev = to_pci_dev(dev); | ||
41 | const struct pci_device_id *id = data; | ||
42 | |||
43 | if (pci_match_one_device(id, pdev)) | ||
44 | return 1; | ||
45 | return 0; | ||
46 | } | ||
47 | |||
48 | /* | ||
49 | * pci_get_dev_by_id - begin or continue searching for a PCI device by id | ||
50 | * @id: pointer to struct pci_device_id to match for the device | ||
51 | * @from: Previous PCI device found in search, or %NULL for new search. | ||
52 | * | ||
53 | * Iterates through the list of known PCI devices. If a PCI device is found | ||
54 | * with a matching id a pointer to its device structure is returned, and the | ||
55 | * reference count to the device is incremented. Otherwise, %NULL is returned. | ||
56 | * A new search is initiated by passing %NULL as the @from argument. Otherwise | ||
57 | * if @from is not %NULL, searches continue from next device on the global | ||
58 | * list. The reference count for @from is always decremented if it is not | ||
59 | * %NULL. | ||
60 | * | ||
61 | * This is an internal function for use by the other search functions in | ||
62 | * this file. | ||
63 | */ | ||
64 | static struct pci_dev *pci_get_dev_by_id(const struct pci_device_id *id, | ||
65 | struct pci_dev *from) | ||
66 | { | ||
67 | struct device *dev; | ||
68 | struct device *dev_start = NULL; | ||
69 | struct pci_dev *pdev = NULL; | ||
70 | |||
71 | if (from) | ||
72 | dev_start = &from->dev; | ||
73 | dev = bus_find_device(&pci_bus_type, dev_start, (void *)id, | ||
74 | match_pci_dev_by_id); | ||
75 | if (dev) | ||
76 | pdev = to_pci_dev(dev); | ||
77 | pci_dev_put(from); | ||
78 | return pdev; | ||
79 | } | ||
80 | |||