summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoshua Bakita <jbakita@cs.unc.edu>2021-09-22 11:02:45 -0400
committerJoshua Bakita <jbakita@cs.unc.edu>2021-09-22 11:16:58 -0400
commitb69863043538d9fd4590acb249124526772a80ea (patch)
tree5f1329a97c7c1ff80823362e7538ab5fc6ea6eaf
parent54e783959b5d3622556bbf34a3a7ad8e481d9e25 (diff)
Fix a pre-4.19 bug in seq procfs files and add detailed channel print
- The sequence file infrastructure prior to kernel version 4.19 has a bug in the retry code when the write buffer overflows that causes our private iterator state to be corrupted. Work around this by tracking some info out-of-band. - Now supports including detailed channel status information from channel RAM when printing the runlist. - Adds helper function to probe for and return struct gk20a*.
-rw-r--r--nvdebug.h93
-rw-r--r--runlist.c35
-rw-r--r--runlist_procfs.c87
3 files changed, 179 insertions, 36 deletions
diff --git a/nvdebug.h b/nvdebug.h
index b4ff0a4..cd0dc90 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -12,7 +12,20 @@
12 12
13 `INST_PTR` points to a GPU Instance Block which contains pointers to the GPU 13 `INST_PTR` points to a GPU Instance Block which contains pointers to the GPU
14 virtual address space for this context. All channels in a TSG point to the 14 virtual address space for this context. All channels in a TSG point to the
15 same GPU Instance Block. 15 same GPU Instance Block (?).
16
17 "RUNQUEUE_SELECTOR determines to which runqueue the channel belongs, and
18 thereby which PBDMA will run the channel. Increasing values select
19 increasingly numbered PBDMA IDs serving the runlist. If the selector value
20 exceeds the number of PBDMAs on the runlist, the hardware will silently
21 reassign the channel to run on the first PBDMA as though RUNQUEUE_SELECTOR had
22 been set to 0. (In current hardware, this is used by SCG on the graphics
23 runlist only to determine which FE pipe should service a given channel. A
24 value of 0 targets the first FE pipe, which can process all FE driven engines:
25 Graphics, Compute, Inline2Memory, and TwoD. A value of 1 targets the second
26 FE pipe, which can only process Compute work. Note that GRCE work is allowed
27 on either runqueue." (NVIDIA) Note that it appears runqueue 1 is the default
28 for CUDA work on the Jetson Xavier.
16 29
17 ENTRY_TYPE (T) : type of this entry: ENTRY_TYPE_CHAN 30 ENTRY_TYPE (T) : type of this entry: ENTRY_TYPE_CHAN
18 CHID (ID) : identifier of the channel to run (overlays ENTRY_ID) 31 CHID (ID) : identifier of the channel to run (overlays ENTRY_ID)
@@ -29,6 +42,19 @@
29*/ 42*/
30enum ENTRY_TYPE {ENTRY_TYPE_CHAN = 0, ENTRY_TYPE_TSG = 1}; 43enum ENTRY_TYPE {ENTRY_TYPE_CHAN = 0, ENTRY_TYPE_TSG = 1};
31enum INST_TARGET {TARGET_VID_MEM = 0, TARGET_SYS_MEM_COHERENT = 2, TARGET_SYS_MEM_NONCOHERENT = 3}; 44enum INST_TARGET {TARGET_VID_MEM = 0, TARGET_SYS_MEM_COHERENT = 2, TARGET_SYS_MEM_NONCOHERENT = 3};
45static inline char* target_to_text(enum INST_TARGET t) {
46 switch (t) {
47 case TARGET_VID_MEM:
48 return "VID_MEM";
49 case TARGET_SYS_MEM_COHERENT:
50 return "SYS_MEM_COHERENT";
51 case TARGET_SYS_MEM_NONCOHERENT:
52 return "SYS_MEM_NONCOHERENT";
53 default:
54 printk(KERN_WARNING "[nvdebug] Invalid aperture!\n");
55 return NULL;
56 }
57}
32 58
33struct runlist_chan { 59struct runlist_chan {
34// 0:63 60// 0:63
@@ -55,10 +81,10 @@ struct runlist_chan {
55 timeslice = (TSG_TIMESLICE_TIMEOUT << TSG_TIMESLICE_SCALE) * 1024 nanoseconds 81 timeslice = (TSG_TIMESLICE_TIMEOUT << TSG_TIMESLICE_SCALE) * 1024 nanoseconds
56 82
57 ENTRY_TYPE (T) : type of this entry: ENTRY_TYPE_TSG 83 ENTRY_TYPE (T) : type of this entry: ENTRY_TYPE_TSG
58 TSGID : identifier of the Timeslice group (overlays ENTRY_ID)
59 TSG_LENGTH : number of channels that are part of this timeslice group
60 TIMESLICE_SCALE : scale factor for the TSG's timeslice 84 TIMESLICE_SCALE : scale factor for the TSG's timeslice
61 TIMESLICE_TIMEOUT : timeout amount for the TSG's timeslice 85 TIMESLICE_TIMEOUT : timeout amount for the TSG's timeslice
86 TSG_LENGTH : number of channels that are part of this timeslice group
87 TSGID : identifier of the Timeslice group (overlays ENTRY_ID)
62*/ 88*/
63struct entry_tsg { 89struct entry_tsg {
64// 0:63 90// 0:63
@@ -130,6 +156,52 @@ typedef union {
130 uint32_t raw; 156 uint32_t raw;
131} runlist_info_t; 157} runlist_info_t;
132 158
159enum CHANNEL_STATUS {
160 CHANNEL_STATUS_IDLE = 0,
161 CHANNEL_STATUS_PENDING = 1,
162 CHANNEL_STATUS_PENDING_CTX_RELOAD = 2,
163 CHANNEL_STATUS_PENDING_ACQUIRE = 3,
164 CHANNEL_STATUS_PENDING_ACQ_CTX_RELOAD = 4,
165 CHANNEL_STATUS_ON_PBDMA = 5,
166 CHANNEL_STATUS_ON_PBDMA_AND_ENG = 6,
167 CHANNEL_STATUS_ON_ENG = 7,
168 CHANNEL_STATUS_ON_ENG_PENDING_ACQUIRE = 8,
169 CHANNEL_STATUS_ON_ENG_PENDING = 9,
170 CHANNEL_STATUS_ON_PBDMA_CTX_RELOAD = 10,
171 CHANNEL_STATUS_ON_PBDMA_AND_ENG_CTX_RELOAD = 11,
172 CHANNEL_STATUS_ON_ENG_CTX_RELOAD = 12,
173 CHANNEL_STATUS_ON_ENG_PENDING_CTX_RELOAD = 13,
174 CHANNEL_STATUS_ON_ENG_PENDING_ACQ_CTX_RELOAD = 14,
175};
176
177#define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8)
178#define MAX_CHID 512 // TODO: Double-check this is right
179// There are a total of 512 possible channels
180typedef union {
181 struct {
182// 0:31
183 uint32_t inst_ptr:28;
184 enum INST_TARGET inst_target:2;
185 uint32_t padding0:1;
186 bool inst_bind:1;
187// 32:64
188 bool enable:1;
189 bool next:1;
190 uint32_t padding:6;
191 bool force_ctx_reload:1;
192 uint32_t padding2:1;
193 bool enable_set:1;
194 bool enable_clear:1;
195 uint32_t padding3:10;
196 bool pbdma_faulted:1;
197 bool eng_faulted:1;
198 enum CHANNEL_STATUS status:4;
199 bool busy:1;
200 uint32_t padding4:3;
201 } __attribute__((packed));
202 uint64_t raw;
203} channel_ctrl_t;
204
133// TODO(jbakita): Maybe put the above GPU types in a different file. 205// TODO(jbakita): Maybe put the above GPU types in a different file.
134 206
135#define for_chan_in_tsg(chan, tsg) \ 207#define for_chan_in_tsg(chan, tsg) \
@@ -146,6 +218,7 @@ struct runlist_iter {
146}; 218};
147 219
148// Defined in runlist.c 220// Defined in runlist.c
221struct gk20a* get_live_gk20a(void);
149int get_runlist_iter(struct runlist_iter *rl_iter); 222int get_runlist_iter(struct runlist_iter *rl_iter);
150 223
151static inline struct gk20a *get_gk20a(struct device *dev) { 224static inline struct gk20a *get_gk20a(struct device *dev) {
@@ -164,6 +237,20 @@ static inline u32 nvdebug_readl(struct gk20a* g, u32 r) {
164 return readl(g_os->regs + r); 237 return readl(g_os->regs + r);
165} 238}
166 239
240// quadword version of nvdebug_readl()
241static inline u64 nvdebug_readq(struct gk20a* g, u32 r) {
242 struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g);
243 u64 ret;
244 if (unlikely(!g_os->regs)) {
245 printk(KERN_ERR "[nvdebug] Attempted nvgpu_readl on non-existent registers!\n");
246 return -1;
247 }
248 // readq seems to always return the uppermost 32 bits as 0, so workaround with readl
249 ret = readl(g_os->regs + r);
250 ret |= ((u64)readl(g_os->regs + r + 4)) << 32;
251 return ret;
252}
253
167// Functionally identical to nvgpu_writel() 254// Functionally identical to nvgpu_writel()
168static inline void nvdebug_writel(struct gk20a* g, u32 r, u32 v) { 255static inline void nvdebug_writel(struct gk20a* g, u32 r, u32 v) {
169 struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g); 256 struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g);
diff --git a/runlist.c b/runlist.c
index 8691b51..8dfa1c7 100644
--- a/runlist.c
+++ b/runlist.c
@@ -8,23 +8,20 @@
8// Bus types are global symbols in the kernel 8// Bus types are global symbols in the kernel
9extern struct bus_type platform_bus_type; 9extern struct bus_type platform_bus_type;
10 10
11int get_runlist_iter(struct runlist_iter *rl_iter) { 11struct gk20a* get_live_gk20a(void) {
12 struct device *dev = NULL; 12 struct device *dev = NULL;
13 struct device *temp_dev; 13 struct device *temp_dev;
14 struct gk20a *g; 14 struct gk20a *g;
15 struct entry_tsg head; 15 struct nvgpu_os_linux *l;
16 runlist_base_t rl_base;
17 runlist_info_t rl_info;
18 u64 runlist_iova;
19 // Get the last device that matches our name 16 // Get the last device that matches our name
20 while ((temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gv11b"))) { 17 while ((temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gv11b"))) {
21 dev = temp_dev; 18 dev = temp_dev;
22 printk(KERN_INFO "[nvdebug] Found a matching device %s\n", dev_name(dev)); 19 printk(KERN_INFO "[nvdebug] Found a matching device %s\n", dev_name(dev));
23 } 20 }
24 if (!dev) 21 if (!dev)
25 return -EIO; 22 return NULL;
26 g = get_gk20a(dev); 23 g = get_gk20a(dev);
27 // This address seems to not be: 24 // The address pointed to `regs` + NV_PFIFO_RUNLIST_BASE seems to not be:
28 // - A GPU address (type is sysmem_coherent) 25 // - A GPU address (type is sysmem_coherent)
29 // - A physical address (dereferencing after ioremap crashes) 26 // - A physical address (dereferencing after ioremap crashes)
30 // - A kernel virtual address (dereferencing segfaults) 27 // - A kernel virtual address (dereferencing segfaults)
@@ -49,15 +46,23 @@ int get_runlist_iter(struct runlist_iter *rl_iter) {
49 // BUT, it turns out that it IS JUST A PHYSICAL ADDRESS! It wasn't working 46 // BUT, it turns out that it IS JUST A PHYSICAL ADDRESS! It wasn't working
50 // before because the GPU had simply gone to sleep and invalidated its 47 // before because the GPU had simply gone to sleep and invalidated its
51 // register state, so nvgpu_readl() was simply returning garbage. 48 // register state, so nvgpu_readl() was simply returning garbage.
52 49 l = container_of(g, struct nvgpu_os_linux, g);
53 printk(KERN_INFO "[nvdebug] Pulling runlist base address from %x\n", NV_PFIFO_RUNLIST_BASE);
54 printk(KERN_INFO "[nvdebug] Using struct gk20a* of %px\n", g);
55 printk(KERN_INFO "[nvdebug] g->name: %s, g->power_on: %d, g->sw_ready: %d, g->is_virtual %d\n",
56 g->name, g->power_on, g->sw_ready, g->is_virtual);
57 struct nvgpu_os_linux *l = container_of(g, struct nvgpu_os_linux, g);
58 printk(KERN_INFO "[nvdebug] l->regs %px, l->regs_saved %px\n", l->regs, l->regs_saved);
59 if (!l->regs) 50 if (!l->regs)
60 return -EIO; 51 return NULL;
52 return g;
53}
54
55/* Get runlist head and info (incl. length)
56 @param rl_iter Location at which to store output
57*/
58int get_runlist_iter(struct runlist_iter *rl_iter) {
59 struct entry_tsg head;
60 runlist_base_t rl_base;
61 runlist_info_t rl_info;
62 u64 runlist_iova;
63 struct gk20a *g = get_live_gk20a();
64 if (!g)
65 return -EIO;
61 rl_base.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST_BASE); 66 rl_base.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST_BASE);
62 rl_info.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST); 67 rl_info.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST);
63 runlist_iova = ((u64)rl_base.ptr) << 12; 68 runlist_iova = ((u64)rl_base.ptr) << 12;
diff --git a/runlist_procfs.c b/runlist_procfs.c
index 2107bd4..183eab6 100644
--- a/runlist_procfs.c
+++ b/runlist_procfs.c
@@ -1,12 +1,46 @@
1#include <linux/seq_file.h> // For seq_* functions and types 1#include <linux/seq_file.h> // For seq_* functions and types
2#include <linux/version.h> // Macros to detect kernel version
2 3
3#include "nvdebug.h" 4#include "nvdebug.h"
4 5
5#define RUNLIST_PROCFS_NAME "runlist" 6#define RUNLIST_PROCFS_NAME "runlist"
7#define DETAILED_CHANNEL_INFO
8
9static int runlist_detail_seq_show_chan(struct seq_file *s, struct gk20a *g, uint32_t chid) {
10 channel_ctrl_t chan;
11 char *loc_txt;
12 u64 instance_ptr;
13 chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chid));
14 loc_txt = target_to_text(chan.inst_target);
15 if (!loc_txt)
16 return -EIO;
17 instance_ptr = chan.inst_ptr;
18 instance_ptr <<= 12;
19 seq_printf(s, " +- Channel Info %-4d -+\n", chid);
20 seq_printf(s, " | Enabled: %d|\n", chan.enable);
21 seq_printf(s, " | Next: %d|\n", chan.next);
22 seq_printf(s, " | Force CTX Reload: %d|\n", chan.force_ctx_reload);
23 seq_printf(s, " | Enable set: %d|\n", chan.enable_set);
24 seq_printf(s, " | Enable clear: %d|\n", chan.enable_clear);
25 seq_printf(s, " | PBDMA Faulted: %d|\n", chan.pbdma_faulted);
26 seq_printf(s, " | ENG Faulted: %d|\n", chan.eng_faulted);
27 seq_printf(s, " | Status: %2d|\n", chan.status);
28 seq_printf(s, " | Busy: %d|\n", chan.busy);
29 seq_printf(s, " | Instance PTR: |\n");
30 seq_printf(s, " | %#018llx |\n", instance_ptr);
31 seq_printf(s, " | %-20s|\n", loc_txt);
32 seq_printf(s, " | Instance bound: %d|\n", chan.inst_bind);
33 seq_printf(s, " +---------------------+\n");
34 return 0;
35}
36
37#if LINUX_VERSION_CODE < KERNEL_VERSION(4,19,0)
38// Bug workaround. See comment in runlist_file_seq_start()
39static loff_t pos_fixup;
40#endif
6 41
7static void *runlist_file_seq_start(struct seq_file *s, loff_t *pos) { 42static void *runlist_file_seq_start(struct seq_file *s, loff_t *pos) {
8 static struct runlist_iter rl_iter; 43 static struct runlist_iter rl_iter;
9
10 // *pos == 0 for first call after read of file 44 // *pos == 0 for first call after read of file
11 if (*pos == 0) { 45 if (*pos == 0) {
12 int err = get_runlist_iter(&rl_iter); 46 int err = get_runlist_iter(&rl_iter);
@@ -14,6 +48,18 @@ static void *runlist_file_seq_start(struct seq_file *s, loff_t *pos) {
14 return NULL; 48 return NULL;
15 return &rl_iter; 49 return &rl_iter;
16 } 50 }
51 // If we're resuming an earlier print
52 if (*pos < rl_iter.rl_info.len) {
53#if LINUX_VERSION_CODE < KERNEL_VERSION(4,19,0)
54 // There's a nasty bug prior to 4.19-rc1 that if the buffer overflows, the
55 // last update to `pos` is not saved. Work around that here by reloading a
56 // saved copy of `pos`.
57 if (!pos_fixup)
58 return NULL;
59 *pos = pos_fixup;
60#endif
61 return &rl_iter;
62 }
17 // When called with *pos != 0, we already traversed the runlist 63 // When called with *pos != 0, we already traversed the runlist
18 return NULL; 64 return NULL;
19} 65}
@@ -21,15 +67,20 @@ static void *runlist_file_seq_start(struct seq_file *s, loff_t *pos) {
21static void* runlist_file_seq_next(struct seq_file *s, void *raw_rl_iter, 67static void* runlist_file_seq_next(struct seq_file *s, void *raw_rl_iter,
22 loff_t *pos) { 68 loff_t *pos) {
23 struct runlist_iter* rl_iter = raw_rl_iter; 69 struct runlist_iter* rl_iter = raw_rl_iter;
70 void *ret = NULL;
24 // Advance by one TSG + channels under last TSG 71 // Advance by one TSG + channels under last TSG
25 *pos += 1 + rl_iter->curr_tsg->tsg_length; 72 *pos += 1 + rl_iter->curr_tsg->tsg_length;
26 // Verify we haven't reached the end of the runlist 73 // Verify we haven't reached the end of the runlist
27 // rl_info.len is the num of tsg entries + total num of channel entries 74 // rl_info.len is the num of tsg entries + total num of channel entries
28 if (*pos < rl_iter->rl_info.len) { 75 if (*pos < rl_iter->rl_info.len) {
29 rl_iter->curr_tsg = next_tsg(rl_iter->curr_tsg); 76 rl_iter->curr_tsg = next_tsg(rl_iter->curr_tsg);
30 return rl_iter; 77 ret = rl_iter;
31 } 78 }
32 return NULL; 79#if LINUX_VERSION_CODE < KERNEL_VERSION(4,19,0)
80 // Bug workaround. See comment in runlist_file_seq_start()
81 pos_fixup = ret ? *pos : 0;
82#endif
83 return ret;
33} 84}
34 85
35static void runlist_file_seq_stop(struct seq_file *s, void *raw_rl_iter) { 86static void runlist_file_seq_stop(struct seq_file *s, void *raw_rl_iter) {
@@ -39,8 +90,11 @@ static void runlist_file_seq_stop(struct seq_file *s, void *raw_rl_iter) {
39static int runlist_file_seq_show(struct seq_file *s, void *raw_rl_iter) { 90static int runlist_file_seq_show(struct seq_file *s, void *raw_rl_iter) {
40 struct entry_tsg* tsg = ((struct runlist_iter*)raw_rl_iter)->curr_tsg; 91 struct entry_tsg* tsg = ((struct runlist_iter*)raw_rl_iter)->curr_tsg;
41 struct runlist_chan* chan; 92 struct runlist_chan* chan;
93 struct gk20a *g = get_live_gk20a();
94 if (!g)
95 return -EIO;
42 if (tsg->entry_type != ENTRY_TYPE_TSG) { 96 if (tsg->entry_type != ENTRY_TYPE_TSG) {
43 printk(KERN_WARNING "[nvdebug] Attempted to print non-TSG in nvdebug_print_tsg()!\n"); 97 printk(KERN_WARNING "[nvdebug] Attempted to print non-TSG in tsg print logic!\n");
44 return -EIO; 98 return -EIO;
45 } 99 }
46 seq_printf(s, "+---- TSG Entry %-2d----+\n", tsg->tsgid); 100 seq_printf(s, "+---- TSG Entry %-2d----+\n", tsg->tsgid);
@@ -48,25 +102,21 @@ static int runlist_file_seq_show(struct seq_file *s, void *raw_rl_iter) {
48 seq_printf(s, "| Timeout: %-11d|\n", tsg->timeslice_timeout); 102 seq_printf(s, "| Timeout: %-11d|\n", tsg->timeslice_timeout);
49 seq_printf(s, "+---------------------+\n"); 103 seq_printf(s, "+---------------------+\n");
50 for_chan_in_tsg(chan, tsg) { 104 for_chan_in_tsg(chan, tsg) {
105#ifndef DETAILED_CHANNEL_INFO
51 char* loc_txt; 106 char* loc_txt;
52 u64 instance_ptr; 107 u64 instance_ptr;
108#endif
53 if (chan->entry_type != ENTRY_TYPE_CHAN) { 109 if (chan->entry_type != ENTRY_TYPE_CHAN) {
54 printk(KERN_WARNING "[nvdebug] Attempted to print non-channel in nvdebug_print_channel()!\n"); 110 printk(KERN_WARNING "[nvdebug] Attempted to print non-channel in channel print logic!\n");
55 return -EIO; 111 return -EIO;
56 } 112 }
57 switch (chan->inst_target) { 113#ifdef DETAILED_CHANNEL_INFO
58 case TARGET_VID_MEM: 114 runlist_detail_seq_show_chan(s, g, chan->chid);
59 loc_txt = "VID_MEM"; 115#else
60 break; 116 loc_txt = target_to_text(chan->inst_target);
61 case TARGET_SYS_MEM_COHERENT: 117 if (!loc_txt) {
62 loc_txt = "SYS_MEM_COHERENT"; 118 printk(KERN_WARNING "[nvdebug] Invalid apature in channel print logic!\n");
63 break; 119 return -EIO;
64 case TARGET_SYS_MEM_NONCOHERENT:
65 loc_txt = "SYS_MEM_NONCOHERENT";
66 break;
67 default:
68 printk(KERN_WARNING "[nvdebug] Invalid aperture in runlist channel!\n");
69 return -EIO;
70 } 120 }
71 // Reconstruct pointer to channel instance block 121 // Reconstruct pointer to channel instance block
72 instance_ptr = chan->inst_ptr_hi; 122 instance_ptr = chan->inst_ptr_hi;
@@ -79,6 +129,7 @@ static int runlist_file_seq_show(struct seq_file *s, void *raw_rl_iter) {
79 seq_printf(s, " | %#018llx |\n", instance_ptr); 129 seq_printf(s, " | %#018llx |\n", instance_ptr);
80 seq_printf(s, " | %-20s|\n", loc_txt); 130 seq_printf(s, " | %-20s|\n", loc_txt);
81 seq_printf(s, " +---------------------+\n"); 131 seq_printf(s, " +---------------------+\n");
132#endif
82 } 133 }
83 return 0; 134 return 0;
84} 135}