From 0596e479ffcca43957f8d32127cea5527460b983 Mon Sep 17 00:00:00 2001
From: Joshua Bakita <jbakita@cs.unc.edu>
Date: Thu, 23 Sep 2021 12:49:27 -0400
Subject: Add APIs to enable/disable a channel and switch to or preempt a
 specific TSG

Adds:
- /proc/preempt_tsg which takes a TSG ID
- /proc/disable_channel which takes a channel ID
- /proc/enable_channel which takes a channel ID
- /proc/switch_to_tsg which takes a TSG ID

Also significantly expands documentation and structs available in
nvdebug.h.
---
 nvdebug.h        | 154 +++++++++++++++++++++++++++++++++++++++++++++----------
 nvdebug_entry.c  |  28 +++++++++-
 runlist.c        |  25 +++++++++
 runlist_procfs.c | 135 ++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 313 insertions(+), 29 deletions(-)

diff --git a/nvdebug.h b/nvdebug.h
index cd0dc90..9ac71da 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -99,42 +99,103 @@ struct entry_tsg {
 	uint32_t tsgid:12;
 	 uint64_t padding4:52;
 } __attribute__((packed));
+#define MAX_TSGID (1 << 12)
 
 enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1};
 
-/* Preempt
+/* Preempt a TSG or Channel by ID
   ID/CHID             : Id of TSG or channel to preempt
+  IS_PENDING          : ????
+  TYPE                : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG
+
+  Support: Kepler, Maxwell, Pascal, Volta
 */
 #define NV_PFIFO_PREEMPT 0x00002634
-struct pfifo_preempt {
-	uint32_t id:12;
-	 uint32_t padding:8;
-	bool is_pending:1;
-	 uint32_t padding2:3;
-	enum PREEMPT_TYPE type:2;
-	 uint32_t padding3:6;
-} __attribute__((packed));
+typedef union {
+	struct {
+		uint32_t id:12;
+		 uint32_t padding:8;
+		bool is_pending:1;
+		 uint32_t padding2:3;
+		enum PREEMPT_TYPE type:2;
+		 uint32_t padding3:6;
+	} __attribute__((packed));
+	uint32_t raw;
+} pfifo_preempt_t;
+
+/*
+  "Initiate a preempt of the engine by writing the bit associated with its
+  runlist to NV_PFIFO_RUNLIST_PREEMPT...  Do not poll NV_PFIFO_RUNLIST_PREEMPT
+  for the preempt to complete."
 
+  Useful for preempting multiple runlists at once.
+
+  Appears to trigger an interrupt or some other side-effect on the Jetson
+  Xavier, as the built-in nvgpu driver seems to be disturbed by writing to this.
+
+  To select the runlist dynamically, use the BIT(nr) kernel macro.
+  Example:
+    runlist_preempt_t rl_preempt;
+    rl_preempt.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST_PREEMPT);
+    rl_preempt.raw |= BIT(nr);
+    nvdebug_writel(g, NV_PFIFO_RUNLIST_PREEMPT, rl_preempt.raw);
+
+  Support: Volta
+*/
 #define NV_PFIFO_RUNLIST_PREEMPT 0x00002638
-struct runlist_preempt {
-	bool runlist_0:1;
-	bool runlist_1:1;
-	bool runlist_2:1;
-	bool runlist_3:1;
-	bool runlist_4:1;
-	bool runlist_5:1;
-	bool runlist_6:1;
-	bool runlist_7:1;
-	bool runlist_8:1;
-	bool runlist_9:1;
-	bool runlist_10:1;
-	bool runlist_11:1;
-	bool runlist_12:1;
-	bool runlist_13:1;
-	 uint32_t padding:28;
-} __attribute__((packed));
+typedef union {
+	struct {
+		bool runlist_0:1;
+		bool runlist_1:1;
+		bool runlist_2:1;
+		bool runlist_3:1;
+		bool runlist_4:1;
+		bool runlist_5:1;
+		bool runlist_6:1;
+		bool runlist_7:1;
+		bool runlist_8:1;
+		bool runlist_9:1;
+		bool runlist_10:1;
+		bool runlist_11:1;
+		bool runlist_12:1;
+		bool runlist_13:1;
+		 uint32_t padding:18;
+	} __attribute__((packed));
+	uint32_t raw;
+} runlist_preempt_t;
+
+/* Additional information on preempting from NVIDIA's driver (commit b1d0d8ece)
+ * "From h/w team
+ * Engine save can be blocked by eng  stalling interrupts.
+ * FIFO interrupts shouldn’t block an engine save from
+ * finishing, but could block FIFO from reporting preempt done.
+ * No immediate reason to reset the engine if FIFO interrupt is
+ * pending.
+ * The hub, priv_ring, and ltc interrupts could block context
+ * switch (or memory), but doesn’t necessarily have to.
+ * For Hub interrupts they just report access counters and page
+ * faults. Neither of these necessarily block context switch
+ * or preemption, but they could.
+ * For example a page fault for graphics would prevent graphics
+ * from saving out. An access counter interrupt is a
+ * notification and has no effect.
+ * SW should handle page faults though for preempt to complete.
+ * PRI interrupt (due to a failed PRI transaction) will result
+ * in ctxsw failure reported to HOST.
+ * LTC interrupts are generally ECC related and if so,
+ * certainly don’t block preemption/ctxsw but they could.
+ * Bus interrupts shouldn’t have anything to do with preemption
+ * state as they are part of the Host EXT pipe, though they may
+ * exhibit a symptom that indicates that GPU is in a bad state.
+ * To be completely fair, when an engine is preempting SW
+ * really should just handle other interrupts as they come in.
+ * It’s generally bad to just poll and wait on a preempt
+ * to complete since there are many things in the GPU which may
+ * cause a system to hang/stop responding."
+ */
 
 // Note: This is different with Turing
+// Support: Kepler, Maxwell, Pascal, Volta
 #define NV_PFIFO_RUNLIST_BASE 0x00002270
 typedef union {
 	struct {
@@ -145,6 +206,7 @@ typedef union {
 	uint32_t raw;
 } runlist_base_t;
 
+// Support: Kepler, Maxwell, Pascal, Volta
 #define NV_PFIFO_RUNLIST 0x00002274
 typedef union {
 	struct {
@@ -175,8 +237,8 @@ enum CHANNEL_STATUS {
 };
 
 #define NV_PCCSR_CHANNEL_INST(i) (0x00800000+(i)*8)
-#define MAX_CHID 512  // TODO: Double-check this is right
 // There are a total of 512 possible channels
+#define MAX_CHID 512
 typedef union {
 	struct {
 // 0:31
@@ -202,6 +264,43 @@ typedef union {
 	uint64_t raw;
 } channel_ctrl_t;
 
+/* Control word for runlist enable/disable.
+
+  RUNLIST_N           : Is runlist n disabled? (1 == disabled, 0 == enabled)
+
+  To select the runlist dynamically, use the BIT(nr) kernel macro.
+  Disabling example:
+    runlist_disable_t rl_disable;
+    rl_disable.raw = nvdebug_readl(g, NV_PFIFO_SCHED_DISABLE);
+    rl_disable.raw |= BIT(nr);
+    nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw);
+  Enabling example:
+    runlist_disable_t rl_disable;
+    rl_disable.raw = nvdebug_readl(g, NV_PFIFO_SCHED_DISABLE);
+    rl_disable.raw &= ~BIT(nr);
+    nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw);
+
+  Support: Fermi, Kepler, Maxwell, Pascal, Volta, Turing
+*/
+#define NV_PFIFO_SCHED_DISABLE 0x00002630
+typedef union {
+	struct {
+		bool runlist_0:1;
+		bool runlist_1:1;
+		bool runlist_2:1;
+		bool runlist_3:1;
+		bool runlist_4:1;
+		bool runlist_5:1;
+		bool runlist_6:1;
+		bool runlist_7:1;
+		bool runlist_8:1;
+		bool runlist_9:1;
+		bool runlist_10:1;
+		 uint32_t padding:21;
+	} __attribute__((packed));
+	uint32_t raw;
+} runlist_disable_t;
+
 // TODO(jbakita): Maybe put the above GPU types in a different file.
 
 #define for_chan_in_tsg(chan, tsg) \
@@ -220,6 +319,7 @@ struct runlist_iter {
 // Defined in runlist.c
 struct gk20a* get_live_gk20a(void);
 int get_runlist_iter(struct runlist_iter *rl_iter);
+int preempt_tsg(uint32_t tsg_id);
 
 static inline struct gk20a *get_gk20a(struct device *dev) {
         // XXX: Only works because gk20a* is the first member of gk20a_platform
diff --git a/nvdebug_entry.c b/nvdebug_entry.c
index 6346659..14ad6e9 100644
--- a/nvdebug_entry.c
+++ b/nvdebug_entry.c
@@ -20,11 +20,31 @@ MODULE_DESCRIPTION("A scheduling debugging module for NVIDIA GPUs");
 MODULE_SOFTDEP("pre: nvgpu"); // We only support the Jetson boards for now
 
 extern const struct file_operations runlist_file_ops;
+extern const struct file_operations preempt_tsg_file_ops;
+extern const struct file_operations disable_channel_file_ops;
+extern const struct file_operations enable_channel_file_ops;
+extern const struct file_operations switch_to_tsg_file_ops;
 
 int __init nvdebug_init(void) {
-	struct proc_dir_entry *entry = proc_create("runlist", 0444, NULL, &runlist_file_ops);
-	if (!entry) {
+	struct proc_dir_entry *rl_entry, *preempt_entry, *disable_channel_entry,
+			      *enable_channel_entry, *switch_to_tsg_entry;
+	// Create file `/proc/preempt_tsg`, world readable
+	rl_entry = proc_create("runlist", 0444, NULL, &runlist_file_ops);
+	// Create file `/proc/preempt_tsg`, world writable
+	preempt_entry = proc_create("preempt_tsg", 0222, NULL, &preempt_tsg_file_ops);
+	// Create file `/proc/disable_channel`, world writable
+	disable_channel_entry = proc_create("disable_channel", 0222, NULL, &disable_channel_file_ops);
+	// Create file `/proc/enable_channel`, world writable
+	enable_channel_entry = proc_create("enable_channel", 0222, NULL, &enable_channel_file_ops);
+	// Create file `/proc/switch_to_tsg`, world writable
+	switch_to_tsg_entry = proc_create("switch_to_tsg", 0222, NULL, &switch_to_tsg_file_ops);
+	// ProcFS entry creation only fails if out of memory
+	if (!rl_entry || !preempt_entry || !disable_channel_entry || !enable_channel_entry || !switch_to_tsg_entry) {
 		remove_proc_entry("runlist", NULL);
+		remove_proc_entry("preempt_tsg", NULL);
+		remove_proc_entry("disable_channel", NULL);
+		remove_proc_entry("enable_channel", NULL);
+		remove_proc_entry("switch_to_tsg", NULL);
 		printk(KERN_ERR "[nvdebug] Unable to initialize procfs entries!\n");
 		return -ENOMEM;
 	}
@@ -34,6 +54,10 @@ int __init nvdebug_init(void) {
 
 static void __exit nvdebug_exit(void) {
 	remove_proc_entry("runlist", NULL);
+	remove_proc_entry("preempt_tsg", NULL);
+	remove_proc_entry("disable_channel", NULL);
+	remove_proc_entry("enable_channel", NULL);
+	remove_proc_entry("switch_to_tsg", NULL);
 	printk(KERN_INFO "[nvdebug] Exiting...\n");
 }
 
diff --git a/runlist.c b/runlist.c
index 8dfa1c7..03528af 100644
--- a/runlist.c
+++ b/runlist.c
@@ -109,3 +109,28 @@ int get_runlist_iter(struct runlist_iter *rl_iter) {
         printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length);
         printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid); */
 }
+
+int preempt_tsg(uint32_t tsg_id) {
+	struct gk20a *g = get_live_gk20a();
+	runlist_info_t rl_info;
+	pfifo_preempt_t pfifo_preempt;
+	runlist_disable_t rl_disable;
+	if (!g)
+		return -EIO;
+        rl_info.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST);
+	pfifo_preempt.id = tsg_id;
+	pfifo_preempt.is_pending = 0;
+	pfifo_preempt.type = PREEMPT_TYPE_TSG;
+	// There may be a bug (?) that requires us to disable scheduling before preempting
+	rl_disable.raw = nvdebug_readl(g, NV_PFIFO_SCHED_DISABLE);
+	rl_disable.raw |= BIT(rl_info.id);  // Disable runlist rl_info.id
+	nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw);
+	// Actually trigger the preemption
+	nvdebug_writel(g, NV_PFIFO_PREEMPT, pfifo_preempt.raw);
+	// Renable scheduling
+	rl_disable.raw &= ~BIT(rl_info.id);  // Enable runlist rl_info.id
+	nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw);
+
+	printk(KERN_INFO "[nvdebug] TSG %d preempted (runlist %d)\n", tsg_id, rl_info.id);
+	return 0;
+}
diff --git a/runlist_procfs.c b/runlist_procfs.c
index 183eab6..411f844 100644
--- a/runlist_procfs.c
+++ b/runlist_procfs.c
@@ -151,3 +151,138 @@ const struct file_operations runlist_file_ops = {
 	.llseek = seq_lseek,
 	.release = seq_release,
 };
+
+ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer,
+			       size_t count, loff_t *off) {
+	uint32_t target_tsgid;
+	// Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
+	int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid);
+	if (err)
+		return err;
+
+	// TSG IDs are a 12-bit field, so make sure the request is in-range
+	if (target_tsgid > MAX_TSGID)
+		return -ERANGE;
+
+	// Execute preemption
+	err = preempt_tsg(target_tsgid);
+	if (err)
+		return err;
+
+	return count;
+}
+
+const struct file_operations preempt_tsg_file_ops = {
+	.write = preempt_tsg_file_write,
+};
+
+ssize_t disable_channel_file_write(struct file *f, const char __user *buffer,
+				   size_t count, loff_t *off) {
+	uint32_t target_channel;
+	channel_ctrl_t chan;
+	int err;
+	struct gk20a *g = get_live_gk20a();
+	if (!g)
+		return -EIO;
+	// Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
+	err = kstrtou32_from_user(buffer, count, 0, &target_channel);
+	if (err)
+		return err;
+
+	if (target_channel > MAX_CHID)
+		return -ERANGE;
+
+	// Disable channel
+	chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel));
+	chan.enable_clear = true;
+	nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw);
+
+	return count;
+}
+
+const struct file_operations disable_channel_file_ops = {
+	.write = disable_channel_file_write,
+};
+
+ssize_t enable_channel_file_write(struct file *f, const char __user *buffer,
+				   size_t count, loff_t *off) {
+	uint32_t target_channel;
+	channel_ctrl_t chan;
+	int err;
+	struct gk20a *g = get_live_gk20a();
+	if (!g)
+		return -EIO;
+	// Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
+	err = kstrtou32_from_user(buffer, count, 0, &target_channel);
+	if (err)
+		return err;
+
+	if (target_channel > MAX_CHID)
+		return -ERANGE;
+
+	// Disable channel
+	chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel));
+	chan.enable_set = true;
+	nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw);
+
+	return count;
+}
+
+const struct file_operations enable_channel_file_ops = {
+	.write = enable_channel_file_write,
+};
+
+ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer,
+				   size_t count, loff_t *off) {
+	uint32_t target_tsgid;
+	struct runlist_chan* chan;
+	channel_ctrl_t chan_ctl;
+	struct runlist_iter rl_iter;
+	int err;
+	loff_t pos = 0;
+	struct gk20a *g = get_live_gk20a();
+	if (!g)
+		return -EIO;
+	// Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
+	err = kstrtou32_from_user(buffer, count, 0, &target_tsgid);
+	if (err)
+		return err;
+
+	if (target_tsgid > MAX_TSGID)
+		return -ERANGE;
+
+	err = get_runlist_iter(&rl_iter);
+	if (err)
+		return err;
+
+	// Iterate through all TSGs
+	while (pos < rl_iter.rl_info.len) {
+		if (rl_iter.curr_tsg->tsgid == target_tsgid) {
+			// Enable channels of target TSG
+			for_chan_in_tsg(chan, rl_iter.curr_tsg) {
+				chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid));
+				chan_ctl.enable_set = true;
+				nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw);
+			}
+		} else {
+			// Disable all other channels
+			for_chan_in_tsg(chan, rl_iter.curr_tsg) {
+				chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid));
+				chan_ctl.enable_clear = true;
+				nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw);
+			}
+		}
+		pos += 1 + rl_iter.curr_tsg->tsg_length;
+		rl_iter.curr_tsg = next_tsg(rl_iter.curr_tsg);
+	}
+	// Switch to next TSG with active channels (should be our TSG)
+	err = preempt_tsg(target_tsgid);
+	if (err)
+		return err;
+
+	return count;
+}
+
+const struct file_operations switch_to_tsg_file_ops = {
+	.write = switch_to_tsg_file_write,
+};
-- 
cgit v1.2.2