diff options
| author | Joshua Bakita <jbakita@cs.unc.edu> | 2024-04-11 13:42:54 -0400 |
|---|---|---|
| committer | Joshua Bakita <jbakita@cs.unc.edu> | 2024-04-11 13:42:54 -0400 |
| commit | 7cfa24cebeaf144b446d07e15fb25e78bb14841e (patch) | |
| tree | 639db3695c54a1e50b3f10e1abb2bfaf5b8dd906 | |
| parent | a8fd5a8dee066d0008e7667b0c9e6a60cd5f3a2e (diff) | |
Linux 5.17+ support and allow including nvdebug.h independently
- Move Linux-specific functions to nvdebug_linux.h and .c
- Workaround PDE_DATA() being pde_data() on Linux 5.17+
| -rw-r--r-- | Makefile | 2 | ||||
| -rw-r--r-- | copy_topology_procfs.c | 2 | ||||
| -rw-r--r-- | device_info_procfs.c | 8 | ||||
| -rw-r--r-- | nvdebug.h | 118 | ||||
| -rw-r--r-- | nvdebug_entry.c | 6 | ||||
| -rw-r--r-- | nvdebug_linux.c | 61 | ||||
| -rw-r--r-- | nvdebug_linux.h | 49 | ||||
| -rw-r--r-- | runlist_procfs.c | 2 |
8 files changed, 140 insertions, 108 deletions
| @@ -1,6 +1,6 @@ | |||
| 1 | obj-m += nvdebug.o | 1 | obj-m += nvdebug.o |
| 2 | nvdebug-objs = runlist_procfs.o device_info_procfs.o runlist.o mmu.o \ | 2 | nvdebug-objs = runlist_procfs.o device_info_procfs.o runlist.o mmu.o \ |
| 3 | nvdebug_entry.o bus.o copy_topology_procfs.o | 3 | nvdebug_entry.o bus.o nvdebug_linux.o copy_topology_procfs.o |
| 4 | KBUILD_CFLAGS += -DGIT_HASH=\"$(shell git --git-dir=$(PWD)/.git rev-parse --short HEAD)\" | 4 | KBUILD_CFLAGS += -DGIT_HASH=\"$(shell git --git-dir=$(PWD)/.git rev-parse --short HEAD)\" |
| 5 | # -mfentry above if not building due to mcount missing | 5 | # -mfentry above if not building due to mcount missing |
| 6 | 6 | ||
diff --git a/copy_topology_procfs.c b/copy_topology_procfs.c index cfedce7..a1aa0ad 100644 --- a/copy_topology_procfs.c +++ b/copy_topology_procfs.c | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | // Copyright 2024 Joshua Bakita | 1 | // Copyright 2024 Joshua Bakita |
| 2 | 2 | ||
| 3 | #include "nvdebug.h" | 3 | #include "nvdebug_linux.h" |
| 4 | 4 | ||
| 5 | // Maximum number of LCEs that we will print | 5 | // Maximum number of LCEs that we will print |
| 6 | #define MAX_LCES 32 | 6 | #define MAX_LCES 32 |
diff --git a/device_info_procfs.c b/device_info_procfs.c index 168905f..8fe9709 100644 --- a/device_info_procfs.c +++ b/device_info_procfs.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | #include "nvdebug.h" | 1 | #include "nvdebug_linux.h" |
| 2 | #include <linux/seq_file.h> // For seq_* functions and types | 2 | #include <linux/seq_file.h> // For seq_* functions and types |
| 3 | #include <linux/uaccess.h> // For copy_to_user() | 3 | #include <linux/uaccess.h> // For copy_to_user() |
| 4 | 4 | ||
| @@ -16,7 +16,7 @@ static ssize_t nvdebug_reg32_read(struct file *f, char __user *buf, size_t size, | |||
| 16 | if (size < 16 || *off != 0) | 16 | if (size < 16 || *off != 0) |
| 17 | return 0; | 17 | return 0; |
| 18 | // 32 bit register will always take less than 16 characters to print | 18 | // 32 bit register will always take less than 16 characters to print |
| 19 | chars_written = scnprintf(out, 16, "%#0x\n", nvdebug_readl(g, (uintptr_t)PDE_DATA(file_inode(f)))); | 19 | chars_written = scnprintf(out, 16, "%#0x\n", nvdebug_readl(g, (uintptr_t)pde_data(file_inode(f)))); |
| 20 | if (copy_to_user(buf, out, chars_written)) | 20 | if (copy_to_user(buf, out, chars_written)) |
| 21 | printk(KERN_WARNING "Unable to copy all data for %s\n", file_dentry(f)->d_name.name); | 21 | printk(KERN_WARNING "Unable to copy all data for %s\n", file_dentry(f)->d_name.name); |
| 22 | *off += chars_written; | 22 | *off += chars_written; |
| @@ -30,7 +30,7 @@ static ssize_t nvdebug_reg_range_read(struct file *f, char __user *buf, size_t s | |||
| 30 | struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; | 30 | struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)]; |
| 31 | // See comment in nvdebug_entry.c to understand `union reg_range` | 31 | // See comment in nvdebug_entry.c to understand `union reg_range` |
| 32 | union reg_range range; | 32 | union reg_range range; |
| 33 | range.raw = (uintptr_t)PDE_DATA(file_inode(f)); | 33 | range.raw = (uintptr_t)pde_data(file_inode(f)); |
| 34 | 34 | ||
| 35 | // "0x" + up to 32-bit register as hex + "\n\0" is at most 12 characters | 35 | // "0x" + up to 32-bit register as hex + "\n\0" is at most 12 characters |
| 36 | if (size < 12 || *off != 0) | 36 | if (size < 12 || *off != 0) |
| @@ -55,7 +55,7 @@ struct file_operations nvdebug_read_reg32_file_ops = { | |||
| 55 | }; | 55 | }; |
| 56 | 56 | ||
| 57 | // Generic mechanism used for printing a subset of bits from a register | 57 | // Generic mechanism used for printing a subset of bits from a register |
| 58 | // Please store a `union reg_range` rather than a `uintptr_t` in the PDE_DATA | 58 | // Please store a `union reg_range` rather than a `uintptr_t` in the pde_data |
| 59 | struct file_operations nvdebug_read_reg_range_file_ops = { | 59 | struct file_operations nvdebug_read_reg_range_file_ops = { |
| 60 | .read = nvdebug_reg_range_read, | 60 | .read = nvdebug_reg_range_read, |
| 61 | .llseek = default_llseek, | 61 | .llseek = default_llseek, |
| @@ -7,13 +7,11 @@ | |||
| 7 | * - Detailed GPU information (PTOP, FUSE, and CE) | 7 | * - Detailed GPU information (PTOP, FUSE, and CE) |
| 8 | * - PRAMIN, BAR1/2, and page table status | 8 | * - PRAMIN, BAR1/2, and page table status |
| 9 | * - Helper functions for nvdebug | 9 | * - Helper functions for nvdebug |
| 10 | * | ||
| 11 | * This function should not depend on any Linux-internal headers, and may be | ||
| 12 | * included outside of nvdebug. | ||
| 10 | */ | 13 | */ |
| 11 | 14 | #include <linux/types.h> | |
| 12 | #include <linux/device.h> // For dev_get_drvdata() | ||
| 13 | #include <linux/seq_file.h> // For struct seq_file | ||
| 14 | #include <linux/proc_fs.h> // For PDE_DATA() macro | ||
| 15 | #include <linux/version.h> // For KERNEL_VERSION and LINUX_VERSION_CODE | ||
| 16 | #include <asm/io.h> | ||
| 17 | 15 | ||
| 18 | // Fully defined in include/nvgpu/gk20a.h. We only pass around pointers to | 16 | // Fully defined in include/nvgpu/gk20a.h. We only pass around pointers to |
| 19 | // this, so declare as incomplete type to avoid pulling in the nvgpu headers. | 17 | // this, so declare as incomplete type to avoid pulling in the nvgpu headers. |
| @@ -1147,8 +1145,11 @@ typedef union { | |||
| 1147 | } page_tbl_entry_v0_t; | 1145 | } page_tbl_entry_v0_t; |
| 1148 | */ | 1146 | */ |
| 1149 | 1147 | ||
| 1150 | // TODO(jbakita): Maybe put the above GPU types in a different file. | ||
| 1151 | 1148 | ||
| 1149 | |||
| 1150 | /* Begin nvdebug types and functions */ | ||
| 1151 | |||
| 1152 | // Vendor ID for PCI devices manufactured by NVIDIA | ||
| 1152 | #define NV_PCI_VENDOR 0x10de | 1153 | #define NV_PCI_VENDOR 0x10de |
| 1153 | struct nvdebug_state { | 1154 | struct nvdebug_state { |
| 1154 | // Pointer to the mapped base address of the GPU control registers (obtained | 1155 | // Pointer to the mapped base address of the GPU control registers (obtained |
| @@ -1195,7 +1196,6 @@ struct nvdebug_state { | |||
| 1195 | const struct gk110_runlist_ ## _ENTRY_TYPE *entry = (struct gk110_runlist_ ## _ENTRY_TYPE*)raw; \ | 1196 | const struct gk110_runlist_ ## _ENTRY_TYPE *entry = (struct gk110_runlist_ ## _ENTRY_TYPE*)raw; \ |
| 1196 | return entry->prop; \ | 1197 | return entry->prop; \ |
| 1197 | } else { \ | 1198 | } else { \ |
| 1198 | printk(KERN_WARNING "[nvdebug] " #prop " unavailable on GPU ID %x, which is older than Kepler.\n", g->chip_id); \ | ||
| 1199 | return (type)0; \ | 1199 | return (type)0; \ |
| 1200 | } \ | 1200 | } \ |
| 1201 | } | 1201 | } |
| @@ -1250,96 +1250,18 @@ uint64_t search_v1_page_directory( | |||
| 1250 | struct nvdebug_state *g, | 1250 | struct nvdebug_state *g, |
| 1251 | page_dir_config_t pd_config, | 1251 | page_dir_config_t pd_config, |
| 1252 | uint64_t addr_to_find); | 1252 | uint64_t addr_to_find); |
| 1253 | |||
| 1254 | |||
| 1255 | static inline struct gk20a *get_gk20a(struct device *dev) { | ||
| 1256 | // XXX: Only works because gk20a* is the first member of gk20a_platform | ||
| 1257 | return *((struct gk20a**)dev_get_drvdata(dev)); | ||
| 1258 | } | ||
| 1259 | |||
| 1260 | // We us the data field of the proc_dir_entry ("PDE" in this function) to store | ||
| 1261 | // our index into the g_nvdebug_state array | ||
| 1262 | static inline int seq2gpuidx(struct seq_file *s) { | ||
| 1263 | const struct file *f = s->file; | ||
| 1264 | return (uintptr_t)PDE_DATA(file_inode(f)); | ||
| 1265 | } | ||
| 1266 | static inline int file2gpuidx(const struct file *f) { | ||
| 1267 | return (uintptr_t)PDE_DATA(file_inode(f)); | ||
| 1268 | } | ||
| 1269 | static inline int file2parentgpuidx(const struct file *f) { | ||
| 1270 | // Should be safe to call on ProcFS entries, as our parent should (?) | ||
| 1271 | // still exist if we're called. If not, there are worse races in this | ||
| 1272 | // module. | ||
| 1273 | return (uintptr_t)PDE_DATA(file_dentry(f)->d_parent->d_inode); | ||
| 1274 | } | ||
| 1275 | |||
| 1276 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) | ||
| 1277 | // Commit 643eb158a3 in nvgpu moved the mapped registers to the second entry | ||
| 1278 | // of the gk20a struct (after a function pointer). This change was made as L4T | ||
| 1279 | // was upgraded from Linux 4.9 to 5.10 (r32 -> r34+) | ||
| 1280 | // Note that this is wrong if nvgpu was built without CONFIG_NVGPU_NON_FUSA | ||
| 1281 | // i.e. if FUSA was enabled, this is wrong. | ||
| 1282 | #define gk20a_regs(gk20a) (*(void**)((void*)gk20a + sizeof(void(*)(void)))) | ||
| 1283 | #else | ||
| 1284 | #include <os/linux/os_linux.h> // For struct nvgpu_os_linux, which holds regs | ||
| 1285 | #define gk20a_regs(gk20a) (container_of(gk20a, struct nvgpu_os_linux, g)->regs) | ||
| 1286 | #endif | ||
| 1287 | |||
| 1288 | // Similar to nvgpu_readl() | ||
| 1289 | // (except we don't try to resolve situations where regs is NULL) | ||
| 1290 | static inline u32 nvdebug_readl(struct nvdebug_state *s, u32 r) { | ||
| 1291 | u32 ret; | ||
| 1292 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { | ||
| 1293 | printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read; registers unavailable. Is GPU on?\n"); | ||
| 1294 | return -1; | ||
| 1295 | } | ||
| 1296 | ret = readl(s->regs + r); | ||
| 1297 | // It seems like the GPU returns this as a flag value for bad addresses | ||
| 1298 | if (ret == 0xbadf5040) { | ||
| 1299 | printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read from register offset %#x; bad data\n", r); | ||
| 1300 | return -1; | ||
| 1301 | } | ||
| 1302 | return ret; | ||
| 1303 | } | ||
| 1304 | |||
| 1305 | // quadword version of nvdebug_readl() | ||
| 1306 | static inline u64 nvdebug_readq(struct nvdebug_state *s, u32 r) { | ||
| 1307 | u64 ret; | ||
| 1308 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { | ||
| 1309 | printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read; registers unavailable. Is GPU on?\n"); | ||
| 1310 | return -1; | ||
| 1311 | } | ||
| 1312 | // readq seems to always return the uppermost 32 bits as 0, so workaround with readl | ||
| 1313 | ret = readl(s->regs + r); | ||
| 1314 | ret |= ((u64)readl(s->regs + r + 4)) << 32; | ||
| 1315 | // It seems like the GPU returns this as a flag value for bad addresses | ||
| 1316 | if ((ret & 0xffffffffull) == 0xbadf5040ull) { | ||
| 1317 | printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read from register offset %#x; bad data\n", r); | ||
| 1318 | return -1; | ||
| 1319 | } | ||
| 1320 | return ret; | ||
| 1321 | } | ||
| 1322 | |||
| 1323 | // Similar to nvgpu_writel() | ||
| 1324 | static inline void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) { | ||
| 1325 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { | ||
| 1326 | printk(KERN_ERR "[nvdebug] nvdebug_writel: Unable to write; registers unavailable. Is GPU on?\n"); | ||
| 1327 | return; | ||
| 1328 | } | ||
| 1329 | writel_relaxed(v, s->regs + r); | ||
| 1330 | wmb(); | ||
| 1331 | } | ||
| 1332 | |||
| 1333 | // quadword version of nvdebug_writel() | ||
| 1334 | // XXX: This probably doesn't work XXX: Untested | ||
| 1335 | static inline void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) { | ||
| 1336 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { | ||
| 1337 | printk(KERN_ERR "[nvdebug] nvdebug_writeq: Unable to write; registers unavailable. Is GPU on?\n"); | ||
| 1338 | return; | ||
| 1339 | } | ||
| 1340 | writeq_relaxed(v, s->regs + r); | ||
| 1341 | wmb(); | ||
| 1342 | } | ||
| 1343 | // Defined in bus.c | 1253 | // Defined in bus.c |
| 1344 | int addr_to_pramin_mut(struct nvdebug_state *g, uint64_t addr, enum INST_TARGET target); | 1254 | int addr_to_pramin_mut(struct nvdebug_state *g, uint64_t addr, enum INST_TARGET target); |
| 1345 | int get_bar2_pdb(struct nvdebug_state *g, page_dir_config_t* pd); | 1255 | int get_bar2_pdb(struct nvdebug_state *g, page_dir_config_t* pd); |
| 1256 | |||
| 1257 | // Some portions of nvdebug can be included from kernel- or user-space (just | ||
| 1258 | // this file at present). In order for these compiled object files to be | ||
| 1259 | // usable in either setting, the appropriate version of the following functions | ||
| 1260 | // must be selected at link-time. Unfortunately, this precludes inlining (as | ||
| 1261 | // the implementation of an inline function must be known at compile time) | ||
| 1262 | // Implementations of these functions are provided for kernel-space by | ||
| 1263 | // nvdebug_linux.c. | ||
| 1264 | uint32_t nvdebug_readl(struct nvdebug_state *s, uint32_t r); | ||
| 1265 | uint64_t nvdebug_readq(struct nvdebug_state *s, uint32_t r); | ||
| 1266 | void nvdebug_writel(struct nvdebug_state *s, uint32_t r, uint32_t v); | ||
| 1267 | void nvdebug_writeq(struct nvdebug_state *s, uint32_t r, uint64_t v); | ||
diff --git a/nvdebug_entry.c b/nvdebug_entry.c index 68e4d71..0caa289 100644 --- a/nvdebug_entry.c +++ b/nvdebug_entry.c | |||
| @@ -9,7 +9,7 @@ | |||
| 9 | #include <linux/pci.h> // For PCI device scanning | 9 | #include <linux/pci.h> // For PCI device scanning |
| 10 | #include <linux/proc_fs.h> // So we can set up entries in /proc | 10 | #include <linux/proc_fs.h> // So we can set up entries in /proc |
| 11 | 11 | ||
| 12 | #include "nvdebug.h" | 12 | #include "nvdebug_linux.h" |
| 13 | #include "stubs.h" | 13 | #include "stubs.h" |
| 14 | 14 | ||
| 15 | // Enable to intercept and log GPU interrupts. Historically used to benchmark | 15 | // Enable to intercept and log GPU interrupts. Historically used to benchmark |
| @@ -93,7 +93,7 @@ int probe_and_cache_devices(void) { | |||
| 93 | // Search for GP10B (Jetson TX2) | 93 | // Search for GP10B (Jetson TX2) |
| 94 | while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gp10b"))) | 94 | while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gp10b"))) |
| 95 | dev = temp_dev; | 95 | dev = temp_dev; |
| 96 | // Search for GM10A (Jetson TX1) | 96 | // Search for GM20B (Jetson TX1) |
| 97 | while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "57000000.gpu"))) | 97 | while (!dev && (temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "57000000.gpu"))) |
| 98 | dev = temp_dev; | 98 | dev = temp_dev; |
| 99 | // TODO: Support other platform bus devices (gk20a - TK1) | 99 | // TODO: Support other platform bus devices (gk20a - TK1) |
| @@ -173,7 +173,7 @@ int create_runlist_files(int device_id, struct proc_dir_entry *dir) { | |||
| 173 | max_rl_id = info.runlist_enum; | 173 | max_rl_id = info.runlist_enum; |
| 174 | } | 174 | } |
| 175 | // Create files to read each runlist. The read handling code looks at the | 175 | // Create files to read each runlist. The read handling code looks at the |
| 176 | // PDE_DATA associated with the file to determine what the runlist ID is. | 176 | // `pde_data` associated with the file to determine what the runlist ID is. |
| 177 | for (rl_id = 0; rl_id <= max_rl_id; rl_id++) { | 177 | for (rl_id = 0; rl_id <= max_rl_id; rl_id++) { |
| 178 | snprintf(runlist_name, 12, "runlist%d", rl_id); | 178 | snprintf(runlist_name, 12, "runlist%d", rl_id); |
| 179 | rl_entry = proc_create_data( | 179 | rl_entry = proc_create_data( |
diff --git a/nvdebug_linux.c b/nvdebug_linux.c new file mode 100644 index 0000000..1d76bc9 --- /dev/null +++ b/nvdebug_linux.c | |||
| @@ -0,0 +1,61 @@ | |||
| 1 | /* Copyright 2024 Joshua Bakita | ||
| 2 | * Implementation of Kernel-specific function implementations | ||
| 3 | */ | ||
| 4 | #include "nvdebug_linux.h" | ||
| 5 | #include <asm/io.h> // For read[l,q] and write[l,q] | ||
| 6 | |||
| 7 | // Similar to nvgpu_readl() | ||
| 8 | // (except we don't try to resolve situations where regs is NULL) | ||
| 9 | u32 nvdebug_readl(struct nvdebug_state *s, u32 r) { | ||
| 10 | u32 ret; | ||
| 11 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { | ||
| 12 | printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read; registers unavailable. Is GPU on?\n"); | ||
| 13 | return -1; | ||
| 14 | } | ||
| 15 | ret = readl(s->regs + r); | ||
| 16 | // It seems like the GPU returns this as a flag value for bad addresses | ||
| 17 | if (ret == 0xbadf5040) { | ||
| 18 | printk(KERN_ERR "[nvdebug] nvdebug_readl: Unable to read from register offset %#x; bad data\n", r); | ||
| 19 | return -1; | ||
| 20 | } | ||
| 21 | return ret; | ||
| 22 | } | ||
| 23 | |||
| 24 | // quadword version of nvdebug_readl() | ||
| 25 | u64 nvdebug_readq(struct nvdebug_state *s, u32 r) { | ||
| 26 | u64 ret; | ||
| 27 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { | ||
| 28 | printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read; registers unavailable. Is GPU on?\n"); | ||
| 29 | return -1; | ||
| 30 | } | ||
| 31 | // readq seems to always return the uppermost 32 bits as 0, so workaround with readl | ||
| 32 | ret = readl(s->regs + r); | ||
| 33 | ret |= ((u64)readl(s->regs + r + 4)) << 32; | ||
| 34 | // It seems like the GPU returns this as a flag value for bad addresses | ||
| 35 | if ((ret & 0xffffffffull) == 0xbadf5040ull) { | ||
| 36 | printk(KERN_ERR "[nvdebug] nvdebug_readq: Unable to read from register offset %#x; bad data\n", r); | ||
| 37 | return -1; | ||
| 38 | } | ||
| 39 | return ret; | ||
| 40 | } | ||
| 41 | |||
| 42 | // Similar to nvgpu_writel() | ||
| 43 | void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) { | ||
| 44 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { | ||
| 45 | printk(KERN_ERR "[nvdebug] nvdebug_writel: Unable to write; registers unavailable. Is GPU on?\n"); | ||
| 46 | return; | ||
| 47 | } | ||
| 48 | writel_relaxed(v, s->regs + r); | ||
| 49 | wmb(); | ||
| 50 | } | ||
| 51 | |||
| 52 | // quadword version of nvdebug_writel() | ||
| 53 | // XXX: This probably doesn't work XXX: Untested | ||
| 54 | void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) { | ||
| 55 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { | ||
| 56 | printk(KERN_ERR "[nvdebug] nvdebug_writeq: Unable to write; registers unavailable. Is GPU on?\n"); | ||
| 57 | return; | ||
| 58 | } | ||
| 59 | writeq_relaxed(v, s->regs + r); | ||
| 60 | wmb(); | ||
| 61 | } | ||
diff --git a/nvdebug_linux.h b/nvdebug_linux.h new file mode 100644 index 0000000..022d1cf --- /dev/null +++ b/nvdebug_linux.h | |||
| @@ -0,0 +1,49 @@ | |||
| 1 | /* Copyright 2024 Joshua Bakita | ||
| 2 | * SPDX-License-Identifier: MIT | ||
| 3 | * | ||
| 4 | * Helpers which are kernel-specific | ||
| 5 | */ | ||
| 6 | #include "nvdebug.h" | ||
| 7 | |||
| 8 | #include <linux/device.h> // For dev_get_drvdata() | ||
| 9 | #include <linux/seq_file.h> // For struct seq_file | ||
| 10 | #include <linux/proc_fs.h> // For pde_data() macro | ||
| 11 | #include <linux/version.h> // For KERNEL_VERSION and LINUX_VERSION_CODE | ||
| 12 | |||
| 13 | static inline struct gk20a *get_gk20a(struct device *dev) { | ||
| 14 | // Only works because gk20a* is the first member of gk20a_platform | ||
| 15 | return *((struct gk20a**)dev_get_drvdata(dev)); | ||
| 16 | } | ||
| 17 | |||
| 18 | // PDE_DATA was Renamed to pde_data in Linux 5.17 to deconflict with a driver | ||
| 19 | #if LINUX_VERSION_CODE < KERNEL_VERSION(5,17,0) | ||
| 20 | #define pde_data PDE_DATA | ||
| 21 | #endif | ||
| 22 | |||
| 23 | // We us the data field of the proc_dir_entry ("PDE" in this function) to store | ||
| 24 | // our index into the g_nvdebug_state array | ||
| 25 | static inline int seq2gpuidx(struct seq_file *s) { | ||
| 26 | const struct file *f = s->file; | ||
| 27 | return (uintptr_t)pde_data(file_inode(f)); | ||
| 28 | } | ||
| 29 | static inline int file2gpuidx(const struct file *f) { | ||
| 30 | return (uintptr_t)pde_data(file_inode(f)); | ||
| 31 | } | ||
| 32 | static inline int file2parentgpuidx(const struct file *f) { | ||
| 33 | // Should be safe to call on ProcFS entries, as our parent should (?) | ||
| 34 | // still exist if we're called. If not, there are worse races in this | ||
| 35 | // module. | ||
| 36 | return (uintptr_t)pde_data(file_dentry(f)->d_parent->d_inode); | ||
| 37 | } | ||
| 38 | |||
| 39 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) | ||
| 40 | // Commit 643eb158a3 in nvgpu moved the mapped registers to the second entry | ||
| 41 | // of the gk20a struct (after a function pointer). This change was made as L4T | ||
| 42 | // was upgraded from Linux 4.9 to 5.10 (r32 -> r34+) | ||
| 43 | // Note that this is wrong if nvgpu was built without CONFIG_NVGPU_NON_FUSA | ||
| 44 | // i.e. if FUSA was enabled, this is wrong. | ||
| 45 | #define gk20a_regs(gk20a) (*(void**)((void*)gk20a + sizeof(void(*)(void)))) | ||
| 46 | #else | ||
| 47 | #include <os/linux/os_linux.h> // For struct nvgpu_os_linux, which holds regs | ||
| 48 | #define gk20a_regs(gk20a) (container_of(gk20a, struct nvgpu_os_linux, g)->regs) | ||
| 49 | #endif | ||
diff --git a/runlist_procfs.c b/runlist_procfs.c index e50c34a..0087d90 100644 --- a/runlist_procfs.c +++ b/runlist_procfs.c | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | #include <linux/seq_file.h> // For seq_* functions and types | 1 | #include <linux/seq_file.h> // For seq_* functions and types |
| 2 | #include <linux/version.h> // Macros to detect kernel version | 2 | #include <linux/version.h> // Macros to detect kernel version |
| 3 | 3 | ||
| 4 | #include "nvdebug.h" | 4 | #include "nvdebug_linux.h" |
| 5 | 5 | ||
| 6 | #define RUNLIST_PROCFS_NAME "runlist" | 6 | #define RUNLIST_PROCFS_NAME "runlist" |
| 7 | #define DETAILED_CHANNEL_INFO | 7 | #define DETAILED_CHANNEL_INFO |
