aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--device_info_procfs.c4
-rw-r--r--include/nvgpu/nvlink.h2
-rw-r--r--nvdebug.h14
-rw-r--r--nvdebug_entry.c68
-rw-r--r--runlist_procfs.c10
5 files changed, 65 insertions, 33 deletions
diff --git a/device_info_procfs.c b/device_info_procfs.c
index cd6c53c..c96007a 100644
--- a/device_info_procfs.c
+++ b/device_info_procfs.c
@@ -22,7 +22,7 @@ static ssize_t nvdebug_reg32_read(struct file *f, char __user *buf, size_t size,
22 *off += chars_written; 22 *off += chars_written;
23 return chars_written; 23 return chars_written;
24} 24}
25const struct file_operations nvdebug_read_reg32_file_ops = { 25struct file_operations nvdebug_read_reg32_file_ops = {
26 .read = nvdebug_reg32_read, 26 .read = nvdebug_reg32_read,
27}; 27};
28 28
@@ -118,7 +118,7 @@ static int device_info_file_open(struct inode *inode, struct file *f) {
118 return seq_open(f, &device_info_file_seq_ops); 118 return seq_open(f, &device_info_file_seq_ops);
119} 119}
120 120
121const struct file_operations device_info_file_ops = { 121struct file_operations device_info_file_ops = {
122 .open = device_info_file_open, 122 .open = device_info_file_open,
123 .read = seq_read, 123 .read = seq_read,
124 .llseek = seq_lseek, 124 .llseek = seq_lseek,
diff --git a/include/nvgpu/nvlink.h b/include/nvgpu/nvlink.h
index a74111c..26c83f1 100644
--- a/include/nvgpu/nvlink.h
+++ b/include/nvgpu/nvlink.h
@@ -26,7 +26,7 @@
26#include <nvgpu/types.h> 26#include <nvgpu/types.h>
27 27
28#ifdef __KERNEL__ 28#ifdef __KERNEL__
29#include <nvgpu/linux/nvlink.h> 29//#include <nvgpu/linux/nvlink.h>
30#elif defined(__NVGPU_POSIX__) 30#elif defined(__NVGPU_POSIX__)
31#include <nvgpu/posix/nvlink.h> 31#include <nvgpu/posix/nvlink.h>
32#else 32#else
diff --git a/nvdebug.h b/nvdebug.h
index 1882756..968a60b 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -4,8 +4,10 @@
4 4
5// TODO(jbakita): Don't depend on these. 5// TODO(jbakita): Don't depend on these.
6#include <nvgpu/gk20a.h> // For struct gk20a 6#include <nvgpu/gk20a.h> // For struct gk20a
7#include <os/linux/os_linux.h> // For struct nvgpu_os_linux
8#include <linux/proc_fs.h> // For PDE_DATA() macro 7#include <linux/proc_fs.h> // For PDE_DATA() macro
8#include <linux/device.h> // For dev_get_drvdata()
9#include <linux/version.h> // For KERNEL_VERSION and LINUX_VERSION_CODE
10#include <asm/io.h>
9 11
10/* Runlist Channel 12/* Runlist Channel
11 A timeslice group (TSG) is composed of channels. Each channel is a FIFO queue 13 A timeslice group (TSG) is composed of channels. Each channel is a FIFO queue
@@ -943,7 +945,17 @@ static inline int file2parentgpuidx(const struct file *f) {
943 return (uintptr_t)PDE_DATA(file_dentry(f)->d_parent->d_inode); 945 return (uintptr_t)PDE_DATA(file_dentry(f)->d_parent->d_inode);
944} 946}
945 947
948#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0)
949// Commit 643eb158a3 in nvgpu moved the mapped registers to the second entry
950// of the gk20a struct (after a function pointer). This change was made as L4T
951// was upgraded from Linux 4.9 to 5.10 (r32 -> r34+)
952// Note that this is wrong if nvgpu was built without CONFIG_NVGPU_NON_FUSA
953// i.e. if FUSA was enabled, this is wrong.
954#define gk20a_regs(gk20a) ((void*)gk20a + sizeof(void(*)(void)))
955#else
956#include <os/linux/os_linux.h> // For struct nvgpu_os_linux, which holds regs
946#define gk20a_regs(gk20a) (container_of(gk20a, struct nvgpu_os_linux, g)->regs) 957#define gk20a_regs(gk20a) (container_of(gk20a, struct nvgpu_os_linux, g)->regs)
958#endif
947 959
948// Similar to nvgpu_readl() 960// Similar to nvgpu_readl()
949// (except we don't try to resolve situations where regs is NULL) 961// (except we don't try to resolve situations where regs is NULL)
diff --git a/nvdebug_entry.c b/nvdebug_entry.c
index fa35fb2..60fb7af 100644
--- a/nvdebug_entry.c
+++ b/nvdebug_entry.c
@@ -18,19 +18,38 @@ MODULE_LICENSE("Dual MIT/GPL");
18MODULE_AUTHOR("Joshua Bakita"); 18MODULE_AUTHOR("Joshua Bakita");
19MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs"); 19MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs");
20 20
21extern const struct file_operations runlist_file_ops; 21extern struct file_operations runlist_file_ops;
22extern const struct file_operations preempt_tsg_file_ops; 22extern struct file_operations preempt_tsg_file_ops;
23extern const struct file_operations disable_channel_file_ops; 23extern struct file_operations disable_channel_file_ops;
24extern const struct file_operations enable_channel_file_ops; 24extern struct file_operations enable_channel_file_ops;
25extern const struct file_operations switch_to_tsg_file_ops; 25extern struct file_operations switch_to_tsg_file_ops;
26extern const struct file_operations device_info_file_ops; 26extern struct file_operations device_info_file_ops;
27extern const struct file_operations nvdebug_read_reg32_file_ops; 27extern struct file_operations nvdebug_read_reg32_file_ops;
28 28
29// Bus types are global symbols in the kernel 29// Bus types are global symbols in the kernel
30extern struct bus_type platform_bus_type; 30extern struct bus_type platform_bus_type;
31struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; 31struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES];
32unsigned int g_nvdebug_devices = 0; 32unsigned int g_nvdebug_devices = 0;
33 33
34// Starting in Kernel 5.6, proc_ops is required instead of file_operations
35#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0)
36// This rewrites the struct to the proc_ops layout on newer kernels
37const struct proc_ops* compat_ops(const struct file_operations* ops) {
38 struct proc_ops new_ops;
39 new_ops.proc_open = ops->open;
40 new_ops.proc_read = ops->read;
41 new_ops.proc_write = ops->write;
42 new_ops.proc_lseek = ops->llseek;
43 new_ops.proc_release = ops->release;
44 memcpy((void*)ops, &new_ops, sizeof(new_ops));
45 return (struct proc_ops*)ops;
46}
47#else
48const struct file_operations* compat_ops(const struct file_operations* ops) {
49 return ops;
50}
51#endif
52
34// TEMP 53// TEMP
35irqreturn_t nvdebug_irq_tap(int irq_num, void * dev) { 54irqreturn_t nvdebug_irq_tap(int irq_num, void * dev) {
36 printk(KERN_INFO "[nvdebug] Interrupt tap triggered on IRQ %d.\n", irq_num); 55 printk(KERN_INFO "[nvdebug] Interrupt tap triggered on IRQ %d.\n", irq_num);
@@ -56,19 +75,20 @@ int probe_and_cache_device(void) {
56 }; 75 };
57 int i = 0; 76 int i = 0;
58 // Search the platform bus for the first device that matches our name 77 // Search the platform bus for the first device that matches our name
59 // Search for GV10B (Jetson Xavier) 78 // Search for GA10B (Jetson Orin)
79 while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.ga10b")))
80 dev = temp_dev;
81 // Search for GV11B (Jetson Xavier)
60 while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gv11b"))) 82 while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gv11b")))
61 dev = temp_dev; 83 dev = temp_dev;
62 // Search for GP10B (Jetson TX2) 84 // Search for GP10B (Jetson TX2)
63 while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gp10b"))) 85 while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gp10b")))
64 dev = temp_dev; 86 dev = temp_dev;
65 // TODO: Support other platform bus devices (gk20a, gm20b) 87 // TODO: Support other platform bus devices (gk20a - TK1, gm20b - TX1)
66 if (dev) { 88 if (dev) {
67 struct nvgpu_os_linux *l;
68 mc_boot_0_t ids; 89 mc_boot_0_t ids;
69 g_nvdebug_state[i].g = get_gk20a(dev); 90 g_nvdebug_state[i].g = get_gk20a(dev);
70 l = container_of(g_nvdebug_state[i].g, struct nvgpu_os_linux, g); 91 g_nvdebug_state[i].regs = gk20a_regs(g_nvdebug_state[i].g);
71 g_nvdebug_state[i].regs = l->regs;
72 if (!g_nvdebug_state[i].regs) 92 if (!g_nvdebug_state[i].regs)
73 return -EADDRNOTAVAIL; 93 return -EADDRNOTAVAIL;
74 ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0); 94 ids.raw = nvdebug_readl(&g_nvdebug_state[i], NV_MC_BOOT_0);
@@ -139,7 +159,7 @@ int create_runlist_files(int device_id, struct proc_dir_entry *dir) {
139 for (rl_id = 0; rl_id <= max_rl_id; rl_id++) { 159 for (rl_id = 0; rl_id <= max_rl_id; rl_id++) {
140 snprintf(runlist_name, 12, "runlist%d", rl_id); 160 snprintf(runlist_name, 12, "runlist%d", rl_id);
141 rl_entry = proc_create_data( 161 rl_entry = proc_create_data(
142 runlist_name, 0444, dir, &runlist_file_ops, 162 runlist_name, 0444, dir, compat_ops(&runlist_file_ops),
143 (void*)(uintptr_t)rl_id); 163 (void*)(uintptr_t)rl_id);
144 if (!rl_entry) 164 if (!rl_entry)
145 return -ENOMEM; 165 return -ENOMEM;
@@ -165,7 +185,7 @@ int create_tpc_mask_files(int device_id, struct proc_dir_entry *dir) {
165 // If GPC is enabled, create an entry to read disabled TPCs mask 185 // If GPC is enabled, create an entry to read disabled TPCs mask
166 snprintf(file_name, 20, "gpc%d_tpc_mask", i); 186 snprintf(file_name, 20, "gpc%d_tpc_mask", i);
167 gpc_tpc_mask_entry = proc_create_data( 187 gpc_tpc_mask_entry = proc_create_data(
168 file_name, 0444, dir, &nvdebug_read_reg32_file_ops, 188 file_name, 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
169 (void*)(uintptr_t)NV_FUSE_TPC_FOR_GPC(i)); 189 (void*)(uintptr_t)NV_FUSE_TPC_FOR_GPC(i));
170 if (!gpc_tpc_mask_entry) 190 if (!gpc_tpc_mask_entry)
171 return -ENOMEM; 191 return -ENOMEM;
@@ -197,46 +217,46 @@ int __init nvdebug_init(void) {
197 tpc_masks_create_err = create_tpc_mask_files(device_id, dir); 217 tpc_masks_create_err = create_tpc_mask_files(device_id, dir);
198 // Create file `/proc/gpu#/preempt_tsg`, world writable 218 // Create file `/proc/gpu#/preempt_tsg`, world writable
199 preempt_entry = proc_create_data( 219 preempt_entry = proc_create_data(
200 "preempt_tsg", 0222, dir, &preempt_tsg_file_ops, 220 "preempt_tsg", 0222, dir, compat_ops(&preempt_tsg_file_ops),
201 (void*)device_id); 221 (void*)device_id);
202 // Create file `/proc/gpu#/disable_channel`, world writable 222 // Create file `/proc/gpu#/disable_channel`, world writable
203 disable_channel_entry = proc_create_data( 223 disable_channel_entry = proc_create_data(
204 "disable_channel", 0222, dir, &disable_channel_file_ops, 224 "disable_channel", 0222, dir, compat_ops(&disable_channel_file_ops),
205 (void*)device_id); 225 (void*)device_id);
206 // Create file `/proc/gpu#/enable_channel`, world writable 226 // Create file `/proc/gpu#/enable_channel`, world writable
207 enable_channel_entry = proc_create_data( 227 enable_channel_entry = proc_create_data(
208 "enable_channel", 0222, dir, &enable_channel_file_ops, 228 "enable_channel", 0222, dir, compat_ops(&enable_channel_file_ops),
209 (void*)device_id); 229 (void*)device_id);
210 // Create file `/proc/gpu#/switch_to_tsg`, world writable 230 // Create file `/proc/gpu#/switch_to_tsg`, world writable
211 switch_to_tsg_entry = proc_create_data( 231 switch_to_tsg_entry = proc_create_data(
212 "switch_to_tsg", 0222, dir, &switch_to_tsg_file_ops, 232 "switch_to_tsg", 0222, dir, compat_ops(&switch_to_tsg_file_ops),
213 (void*)device_id); 233 (void*)device_id);
214 // Create file `/proc/gpu#/device_info`, world readable 234 // Create file `/proc/gpu#/device_info`, world readable
215 device_info_entry = proc_create_data( 235 device_info_entry = proc_create_data(
216 "device_info", 0444, dir, &device_info_file_ops, 236 "device_info", 0444, dir, compat_ops(&device_info_file_ops),
217 (void*)device_id); 237 (void*)device_id);
218 // Create file `/proc/gpu#/num_gpcs`, world readable 238 // Create file `/proc/gpu#/num_gpcs`, world readable
219 num_gpcs_entry = proc_create_data( 239 num_gpcs_entry = proc_create_data(
220 "num_gpcs", 0444, dir, &nvdebug_read_reg32_file_ops, 240 "num_gpcs", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
221 (void*)NV_PTOP_SCAL_NUM_GPCS); 241 (void*)NV_PTOP_SCAL_NUM_GPCS);
222 // Create file `/proc/gpu#/num_tpc_per_gpc`, world readable 242 // Create file `/proc/gpu#/num_tpc_per_gpc`, world readable
223 num_gpcs_entry = proc_create_data( 243 num_gpcs_entry = proc_create_data(
224 "num_tpc_per_gpc", 0444, dir, &nvdebug_read_reg32_file_ops, 244 "num_tpc_per_gpc", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
225 (void*)NV_PTOP_SCAL_NUM_TPC_PER_GPC); 245 (void*)NV_PTOP_SCAL_NUM_TPC_PER_GPC);
226 // Create file `/proc/gpu#/num_ces`, world readable 246 // Create file `/proc/gpu#/num_ces`, world readable
227 num_gpcs_entry = proc_create_data( 247 num_gpcs_entry = proc_create_data(
228 "num_ces", 0444, dir, &nvdebug_read_reg32_file_ops, 248 "num_ces", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
229 (void*)NV_PTOP_SCAL_NUM_CES); 249 (void*)NV_PTOP_SCAL_NUM_CES);
230 // Create file `/proc/gpu#/num_ces`, world readable 250 // Create file `/proc/gpu#/num_ces`, world readable
231 num_gpcs_entry = proc_create_data( 251 num_gpcs_entry = proc_create_data(
232 "gpc_mask", 0444, dir, &nvdebug_read_reg32_file_ops, 252 "gpc_mask", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
233 (void*)NV_FUSE_GPC); 253 (void*)NV_FUSE_GPC);
234 // In both nouveau and nvgpu, the PCE_MAP register is only available on Volta+ 254 // In both nouveau and nvgpu, the PCE_MAP register is only available on Volta+
235 if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_VOLTA) { 255 if (g_nvdebug_state[res].chip_id >= NV_CHIP_ID_VOLTA) {
236 // TODO: Redo to num_pces 256 // TODO: Redo to num_pces
237 // Create file `/proc/gpu#/pce_map`, world readable 257 // Create file `/proc/gpu#/pce_map`, world readable
238 num_gpcs_entry = proc_create_data( 258 num_gpcs_entry = proc_create_data(
239 "pce_map", 0444, dir, &nvdebug_read_reg32_file_ops, 259 "pce_map", 0444, dir, compat_ops(&nvdebug_read_reg32_file_ops),
240 (void*)NV_CE_PCE_MAP); 260 (void*)NV_CE_PCE_MAP);
241 } 261 }
242 // ProcFS entry creation only fails if out of memory 262 // ProcFS entry creation only fails if out of memory
diff --git a/runlist_procfs.c b/runlist_procfs.c
index a6b0d94..a0e71b0 100644
--- a/runlist_procfs.c
+++ b/runlist_procfs.c
@@ -171,7 +171,7 @@ static int runlist_file_open(struct inode *inode, struct file *f) {
171 return seq_open(f, &runlist_file_seq_ops); 171 return seq_open(f, &runlist_file_seq_ops);
172} 172}
173 173
174const struct file_operations runlist_file_ops = { 174struct file_operations runlist_file_ops = {
175 .open = runlist_file_open, 175 .open = runlist_file_open,
176 .read = seq_read, 176 .read = seq_read,
177 .llseek = seq_lseek, 177 .llseek = seq_lseek,
@@ -199,7 +199,7 @@ ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer,
199 return count; 199 return count;
200} 200}
201 201
202const struct file_operations preempt_tsg_file_ops = { 202struct file_operations preempt_tsg_file_ops = {
203 .write = preempt_tsg_file_write, 203 .write = preempt_tsg_file_write,
204}; 204};
205 205
@@ -236,7 +236,7 @@ ssize_t disable_channel_file_write(struct file *f, const char __user *buffer,
236 return count; 236 return count;
237} 237}
238 238
239const struct file_operations disable_channel_file_ops = { 239struct file_operations disable_channel_file_ops = {
240 .write = disable_channel_file_write, 240 .write = disable_channel_file_write,
241}; 241};
242 242
@@ -262,7 +262,7 @@ ssize_t enable_channel_file_write(struct file *f, const char __user *buffer,
262 return count; 262 return count;
263} 263}
264 264
265const struct file_operations enable_channel_file_ops = { 265struct file_operations enable_channel_file_ops = {
266 .write = enable_channel_file_write, 266 .write = enable_channel_file_write,
267}; 267};
268 268
@@ -317,6 +317,6 @@ ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer,
317 return count; 317 return count;
318} 318}
319 319
320const struct file_operations switch_to_tsg_file_ops = { 320struct file_operations switch_to_tsg_file_ops = {
321 .write = switch_to_tsg_file_write, 321 .write = switch_to_tsg_file_write,
322}; 322};