Ampere: disable/enable_channel, preempt/switch_to_tsg, and resubmit_runlist

**Modifes the user API from `echo 1 > /proc/gpuX/switch_to_tsg` to `echo 1 > /proc/gpuX/runlist0/switch_to_tsg` to switch to TSG 1 on runlist 0 on GPU X for pre-Ampere GPUs (for example).** Feature changes: - switch_to_tsg only makes sense on a per-runlist level. Before, this always operated on runlist0; this commit allows operating on any runlist by moving the API to the per-runlist paths. - On Ampere+, channel and TSG IDs are per-runlist, and no longer GPU-global. Consequently, the disable/enable_channel and preempt_tsg APIs have been moved from GPU-global to per-runlist paths on Ampere+. Bug fixes: - `preempt_runlist()` is now supported on Maxwell and Pascal. - `resubmit_runlist()` detects too-old GPUs. - MAX_CHID corrected from 512 to 511 and documented. - switch_to_tsg now includes a runlist resubmit, which appears to be necessary on Turing+ GPUs. Tested on GK104 (Quadro K5000), GM204 (GTX 970), GP106 (GTX 1060 3GB), GP104 (GTX 1080 Ti), GP10B (Jetson TX2), GV11B (Jetson Xavier), GV100 (Titan V), TU102 (RTX 2080 Ti), and AD102 (RTX 6000 Ada).
author: Joshua Bakita <bakitajoshua@gmail.com> 2024-09-19 12:50:02 -0400
committer: Joshua Bakita <bakitajoshua@gmail.com> 2024-09-19 13:59:56 -0400
commit: 48f9e45b9d9ebfca7d3c673597f7fbed9427a5af (patch)
tree: d63c3863e9b187fb1f62d2f3c58b8c6a9568b6cc /runlist_procfs.c
parent: ac0113ab52d2ca12a5c10feeaa887d10c37ee4f1 (diff)
1 files changed, 140 insertions, 58 deletions
diff --git a/runlist_procfs.c b/runlist_procfs.c
index c1cfc87..b2159f6 100644
--- a/runlist_procfs.c
+++ b/runlist_procfs.c
@@ -199,11 +199,11 @@ struct file_operations runlist_file_ops = {
 };
 ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer,
-                               size_t count, loff_t *off) {
+                               size_t count, loff_t *off) {
-        uint32_t target_tsgid;
+        uint32_t target_tsgid, target_runlist_ram;
+        struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)];
        // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
        int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid);
-        struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)];
        if (err)
                return err;
@@ -211,8 +211,15 @@ ssize_t preempt_tsg_file_write(struct file *f, const char __user *buffer,
        if (target_tsgid > MAX_TSGID)
                return -ERANGE;
+        // (Ab)use the PDE_DATA field for the index into which Runlist RAM this TSG
+        // ID is scoped to (only applicable on Ampere+)
+        if (g->chip_id >= NV_CHIP_ID_AMPERE)
+                target_runlist_ram = file2gpuidx(f);
+        else
+                target_runlist_ram = 0;
        // Execute preemption
-        if ((err = preempt_tsg(g, target_tsgid)))
+        if ((err = preempt_tsg(g, target_runlist_ram, target_tsgid)))
                return err;
        return count;
@@ -223,13 +230,12 @@ struct file_operations preempt_tsg_file_ops = {
        .llseek = default_llseek,
 };
 ssize_t resubmit_runlist_file_write(struct file *f, const char __user *buffer,
-                               size_t count, loff_t *off) {
+                                    size_t count, loff_t *off) {
        uint32_t target_runlist;
+        struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)];
        // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
        int err = kstrtou32_from_user(buffer, count, 0, &target_runlist);
-        struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)];
        if (err)
                return err;
@@ -245,26 +251,48 @@ struct file_operations resubmit_runlist_file_ops = {
        .llseek = default_llseek,
 };
 ssize_t disable_channel_file_write(struct file *f, const char __user *buffer,
-                                   size_t count, loff_t *off) {
+                                   size_t count, loff_t *off) {
        uint32_t target_channel;
-        channel_ctrl_t chan;
+        struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)];
-        int err;
-        struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)];
        // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
-        err = kstrtou32_from_user(buffer, count, 0, &target_channel);
+        int err = kstrtou32_from_user(buffer, count, 0, &target_channel);
        if (err)
                return err;
-        if (target_channel > MAX_CHID)
+        if (g->chip_id < NV_CHIP_ID_AMPERE) {
-                return -ERANGE;
+                channel_ctrl_t chan;
+                if (target_channel > MAX_CHID)
-        // Read current configuration
+                        return -ERANGE;
-        if ((chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel))) == -1)
+                // Read current configuration
-                return -EIO;
+                if ((chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel))) == -1)
-        // Request disablement
+                        return -EIO;
-        chan.enable_clear = true;
+                // Request disablement
-        nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw);
+                chan.enable_clear = true;
+                nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw);
+        } else {
+                uint32_t runlist_reg_base, chram_base, channel_max;
+                runlist_channel_config_t channel_config;
+                channel_ctrl_ga100_t chan;
+                // (Ab)use the PDE_DATA field for the runlist ID
+                if ((err = get_runlist_ram(g, file2gpuidx(f), &runlist_reg_base)))
+                        return err;
+                // Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere
+                if ((channel_config.raw = nvdebug_readl(g, runlist_reg_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1)
+                        return -EIO;
+                channel_max = 1u << channel_config.num_channels_log2;
+                if (target_channel >= channel_max)
+                        return -ERANGE;
+                chram_base = (uint32_t)channel_config.bar0_offset << 4;
+                // Writing zeros to any field of the Ampere+ channel control structure
+                // does nothing, so don't bother to read the structure first, and just
+                // write zeros to all the fields we don't care about.
+                chan.raw = 0;
+                chan.is_write_one_clears_bits = 1; // Invert meaning of writing 1
+                chan.enable = 1;
+                nvdebug_writel(g, chram_base + sizeof(channel_ctrl_ga100_t) * target_channel, chan.raw);
+        }
        return count;
 }
@@ -275,23 +303,45 @@ struct file_operations disable_channel_file_ops = {
 };
 ssize_t enable_channel_file_write(struct file *f, const char __user *buffer,
-                                   size_t count, loff_t *off) {
+                                  size_t count, loff_t *off) {
        uint32_t target_channel;
-        channel_ctrl_t chan;
+        struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)];
-        int err;
-        struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)];
        // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
-        err = kstrtou32_from_user(buffer, count, 0, &target_channel);
+        int err = kstrtou32_from_user(buffer, count, 0, &target_channel);
        if (err)
                return err;
-        if (target_channel > MAX_CHID)
+        if (g->chip_id < NV_CHIP_ID_AMPERE) {
-                return -ERANGE;
+                channel_ctrl_t chan;
+                if (target_channel > MAX_CHID)
-        // Disable channel
+                        return -ERANGE;
-        chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel));
+                // Read current configuration
-        chan.enable_set = true;
+                if ((chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel))) == -1)
-        nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw);
+                        return -EIO;
+                // Disable channel
+                chan.enable_set = true;
+                nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw);
+        } else {
+                uint32_t runlist_reg_base, chram_base, channel_max;
+                runlist_channel_config_t channel_config;
+                channel_ctrl_ga100_t chan;
+                // (Ab)use the PDE_DATA field for the runlist ID
+                if ((err = get_runlist_ram(g, file2gpuidx(f), &runlist_reg_base)))
+                        return err;
+                // Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere
+                if ((channel_config.raw = nvdebug_readl(g, runlist_reg_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1)
+                        return -EIO;
+                channel_max = 1u << channel_config.num_channels_log2;
+                if (target_channel >= channel_max)
+                        return -ERANGE;
+                chram_base = (uint32_t)channel_config.bar0_offset << 4;
+                // Writing zeros to any field of the Ampere+ channel control structure
+                // does nothing, so don't bother to read the structure first, and just
+                // write zeros to all the fields we don't care about.
+                chan.raw = 0;
+                chan.enable = 1;
+                nvdebug_writel(g, chram_base + sizeof(channel_ctrl_ga100_t) * target_channel, chan.raw);
+        }
        return count;
 }
@@ -301,52 +351,84 @@ struct file_operations enable_channel_file_ops = {
        .llseek = default_llseek,
 };
-// Note: Operates only on runlist 0 (Compute/Graphics)
+// Tested working on Pascal (gp106) through Ada (ad102)
 ssize_t switch_to_tsg_file_write(struct file *f, const char __user *buffer,
-                                   size_t count, loff_t *off) {
+                                 size_t count, loff_t *off) {
-        uint32_t target_tsgid;
+        uint32_t target_tsgid, target_runlist, channel_regs_base;
        struct gv100_runlist_chan* chan;
        channel_ctrl_t chan_ctl;
+        channel_ctrl_ga100_t chan_ctl_ga100;
        struct runlist_iter rl_iter;
-        int err;
        loff_t pos = 0;
-        struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)];
+        struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)];
        // Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
-        err = kstrtou32_from_user(buffer, count, 0, &target_tsgid);
+        int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid);
        if (err)
                return err;
        if (target_tsgid > MAX_TSGID)
                return -ERANGE;
-        err = get_runlist_iter(g, 0, &rl_iter);
+        // (Ab)use the PDE_DATA field for the runlist ID
-        if (err)
+        target_runlist = file2gpuidx(f);
+        if ((err = get_runlist_iter(g, target_runlist, &rl_iter)))
                return err;
+        // On Ampere, TSG and Channel IDs are only unique per-runlist, so we need
+        // to pull the per-runlist copy of Channel RAM.
+        if (g->chip_id >= NV_CHIP_ID_AMPERE) {
+                uint32_t runlist_regs_base;
+                runlist_channel_config_t chan_config;
+                if ((err = get_runlist_ram(g, target_runlist, &runlist_regs_base)))
+                        return err;
+                // Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere
+                if ((chan_config.raw = nvdebug_readl(g, runlist_regs_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1)
+                        return -EIO;
+                channel_regs_base = (uint32_t)chan_config.bar0_offset << 4;
+        }
        // Iterate through all TSGs
        while (pos < rl_iter.len) {
-                if (tsgid(g, rl_iter.curr_entry) == target_tsgid) {
+                bool enable = false;
-                        // Enable channels of target TSG
+                if (tsgid(g, rl_iter.curr_entry) == target_tsgid)
-                        for_chan_in_tsg(g, chan, rl_iter.curr_entry) {
+                        enable = true;
-                                chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid));
-                                chan_ctl.enable_set = true;
+                // Either enable or disable all channels of each TSG, dependent on if
-                                nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw);
+                // they are contained within the target TSG or not.
-                        }
+                for_chan_in_tsg(g, chan, rl_iter.curr_entry) {
-                } else {
+                        if (g->chip_id < NV_CHIP_ID_AMPERE) {
-                        // XXX: Fix for bare channels. Maybe a "for_chan_until_tsg" macro?
+                                // Read, update, write for PCCSR
-                        // Disable all other channels
+                                if ((chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chid(g, chan)))) == -1)
-                        // (This is how the Jetson nvgpu driver disables TSGs)
+                                        return -EIO;
-                        for_chan_in_tsg(g, chan, rl_iter.curr_entry) {
+                                if (enable)
-                                chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid));
+                                        chan_ctl.enable_set = true;
-                                chan_ctl.enable_clear = true;
+                                else
-                                nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw);
+                                        chan_ctl.enable_clear = true;
+                                nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chid(g, chan)), chan_ctl.raw);
+                        } else {
+                                // Writing a 0 does nothing on Ampere+, so we can just write
+                                chan_ctl_ga100.raw = 0;
+                                chan_ctl_ga100.is_write_one_clears_bits = !enable;
+                                chan_ctl_ga100.enable = true;
+                                nvdebug_writel(g, channel_regs_base + sizeof(chan_ctl_ga100) * chid(g, chan), chan_ctl_ga100.raw);
                        }
                }
                pos += 1 + tsg_length(g, rl_iter.curr_entry);
                rl_iter.curr_entry = next_tsg(g, rl_iter.curr_entry);
+                // TODO: Fix the above for bare channels. Add "for_chan_until_tsg"?
        }
-        // Trigger a runlist-level preempt to switch to `target_tsgid`
-        if ((err = preempt_runlist(g, 0)))
+        // Resubmit the runlist to ensure that changes to channel enablement are
+        // picked up on Turing+ GPUs (channel enablements may not be otherwise).
+        if (g->chip_id >= NV_CHIP_ID_TURING)
+                if ((err = resubmit_runlist(g, target_runlist)))
+                        return err;
+        // Trigger a runlist-level preempt to stop whatever was running, triggering
+        // the runlist scheduler to select and run the next-enabled channel.
+        if ((err = preempt_runlist(g, target_runlist)))
                return err;
        return count;
author	Joshua Bakita <bakitajoshua@gmail.com>	2024-09-19 12:50:02 -0400
committer	Joshua Bakita <bakitajoshua@gmail.com>	2024-09-19 13:59:56 -0400
commit	48f9e45b9d9ebfca7d3c673597f7fbed9427a5af (patch)
tree	d63c3863e9b187fb1f62d2f3c58b8c6a9568b6cc /runlist_procfs.c
parent	ac0113ab52d2ca12a5c10feeaa887d10c37ee4f1 (diff)

diff --git a/runlist_procfs.c b/runlist_procfs.c index c1cfc87..b2159f6 100644 --- a/runlist_procfs.c +++ b/runlist_procfs.c
@@ -199,11 +199,11 @@ struct file_operations runlist_file_ops = {
199	};	199	};
200		200
201	ssize_t preempt_tsg_file_write(struct file f, const char __user buffer,	201	ssize_t preempt_tsg_file_write(struct file f, const char __user buffer,
202	size_t count, loff_t *off) {	202	size_t count, loff_t *off) {
203	uint32_t target_tsgid;	203	uint32_t target_tsgid, target_runlist_ram;
		204	struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)];
204	// Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec	205	// Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
205	int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid);	206	int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid);
206	struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)];
207	if (err)	207	if (err)
208	return err;	208	return err;
209		209
@@ -211,8 +211,15 @@ ssize_t preempt_tsg_file_write(struct file f, const char __user buffer,
211	if (target_tsgid > MAX_TSGID)	211	if (target_tsgid > MAX_TSGID)
212	return -ERANGE;	212	return -ERANGE;
213		213
		214	// (Ab)use the PDE_DATA field for the index into which Runlist RAM this TSG
		215	// ID is scoped to (only applicable on Ampere+)
		216	if (g->chip_id >= NV_CHIP_ID_AMPERE)
		217	target_runlist_ram = file2gpuidx(f);
		218	else
		219	target_runlist_ram = 0;
		220
214	// Execute preemption	221	// Execute preemption
215	if ((err = preempt_tsg(g, target_tsgid)))	222	if ((err = preempt_tsg(g, target_runlist_ram, target_tsgid)))
216	return err;	223	return err;
217		224
218	return count;	225	return count;
@@ -223,13 +230,12 @@ struct file_operations preempt_tsg_file_ops = {
223	.llseek = default_llseek,	230	.llseek = default_llseek,
224	};	231	};
225		232
226
227	ssize_t resubmit_runlist_file_write(struct file f, const char __user buffer,	233	ssize_t resubmit_runlist_file_write(struct file f, const char __user buffer,
228	size_t count, loff_t *off) {	234	size_t count, loff_t *off) {
229	uint32_t target_runlist;	235	uint32_t target_runlist;
		236	struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)];
230	// Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec	237	// Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
231	int err = kstrtou32_from_user(buffer, count, 0, &target_runlist);	238	int err = kstrtou32_from_user(buffer, count, 0, &target_runlist);
232	struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)];
233	if (err)	239	if (err)
234	return err;	240	return err;
235		241
@@ -245,26 +251,48 @@ struct file_operations resubmit_runlist_file_ops = {
245	.llseek = default_llseek,	251	.llseek = default_llseek,
246	};	252	};
247		253
		254
248	ssize_t disable_channel_file_write(struct file f, const char __user buffer,	255	ssize_t disable_channel_file_write(struct file f, const char __user buffer,
249	size_t count, loff_t *off) {	256	size_t count, loff_t *off) {
250	uint32_t target_channel;	257	uint32_t target_channel;
251	channel_ctrl_t chan;	258	struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)];
252	int err;
253	struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)];
254	// Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec	259	// Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
255	err = kstrtou32_from_user(buffer, count, 0, &target_channel);	260	int err = kstrtou32_from_user(buffer, count, 0, &target_channel);
256	if (err)	261	if (err)
257	return err;	262	return err;
258		263
259	if (target_channel > MAX_CHID)	264	if (g->chip_id < NV_CHIP_ID_AMPERE) {
260	return -ERANGE;	265	channel_ctrl_t chan;
261		266	if (target_channel > MAX_CHID)
262	// Read current configuration	267	return -ERANGE;
263	if ((chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel))) == -1)	268	// Read current configuration
264	return -EIO;	269	if ((chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel))) == -1)
265	// Request disablement	270	return -EIO;
266	chan.enable_clear = true;	271	// Request disablement
267	nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw);	272	chan.enable_clear = true;
		273	nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw);
		274	} else {
		275	uint32_t runlist_reg_base, chram_base, channel_max;
		276	runlist_channel_config_t channel_config;
		277	channel_ctrl_ga100_t chan;
		278	// (Ab)use the PDE_DATA field for the runlist ID
		279	if ((err = get_runlist_ram(g, file2gpuidx(f), &runlist_reg_base)))
		280	return err;
		281	// Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere
		282	if ((channel_config.raw = nvdebug_readl(g, runlist_reg_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1)
		283	return -EIO;
		284	channel_max = 1u << channel_config.num_channels_log2;
		285	if (target_channel >= channel_max)
		286	return -ERANGE;
		287	chram_base = (uint32_t)channel_config.bar0_offset << 4;
		288	// Writing zeros to any field of the Ampere+ channel control structure
		289	// does nothing, so don't bother to read the structure first, and just
		290	// write zeros to all the fields we don't care about.
		291	chan.raw = 0;
		292	chan.is_write_one_clears_bits = 1; // Invert meaning of writing 1
		293	chan.enable = 1;
		294	nvdebug_writel(g, chram_base + sizeof(channel_ctrl_ga100_t) * target_channel, chan.raw);
		295	}
268		296
269	return count;	297	return count;
270	}	298	}
@@ -275,23 +303,45 @@ struct file_operations disable_channel_file_ops = {
275	};	303	};
276		304
277	ssize_t enable_channel_file_write(struct file f, const char __user buffer,	305	ssize_t enable_channel_file_write(struct file f, const char __user buffer,
278	size_t count, loff_t *off) {	306	size_t count, loff_t *off) {
279	uint32_t target_channel;	307	uint32_t target_channel;
280	channel_ctrl_t chan;	308	struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)];
281	int err;
282	struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)];
283	// Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec	309	// Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
284	err = kstrtou32_from_user(buffer, count, 0, &target_channel);	310	int err = kstrtou32_from_user(buffer, count, 0, &target_channel);
285	if (err)	311	if (err)
286	return err;	312	return err;
287		313
288	if (target_channel > MAX_CHID)	314	if (g->chip_id < NV_CHIP_ID_AMPERE) {
289	return -ERANGE;	315	channel_ctrl_t chan;
290		316	if (target_channel > MAX_CHID)
291	// Disable channel	317	return -ERANGE;
292	chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel));	318	// Read current configuration
293	chan.enable_set = true;	319	if ((chan.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(target_channel))) == -1)
294	nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw);	320	return -EIO;
		321	// Disable channel
		322	chan.enable_set = true;
		323	nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(target_channel), chan.raw);
		324	} else {
		325	uint32_t runlist_reg_base, chram_base, channel_max;
		326	runlist_channel_config_t channel_config;
		327	channel_ctrl_ga100_t chan;
		328	// (Ab)use the PDE_DATA field for the runlist ID
		329	if ((err = get_runlist_ram(g, file2gpuidx(f), &runlist_reg_base)))
		330	return err;
		331	// Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere
		332	if ((channel_config.raw = nvdebug_readl(g, runlist_reg_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1)
		333	return -EIO;
		334	channel_max = 1u << channel_config.num_channels_log2;
		335	if (target_channel >= channel_max)
		336	return -ERANGE;
		337	chram_base = (uint32_t)channel_config.bar0_offset << 4;
		338	// Writing zeros to any field of the Ampere+ channel control structure
		339	// does nothing, so don't bother to read the structure first, and just
		340	// write zeros to all the fields we don't care about.
		341	chan.raw = 0;
		342	chan.enable = 1;
		343	nvdebug_writel(g, chram_base + sizeof(channel_ctrl_ga100_t) * target_channel, chan.raw);
		344	}
295		345
296	return count;	346	return count;
297	}	347	}
@@ -301,52 +351,84 @@ struct file_operations enable_channel_file_ops = {
301	.llseek = default_llseek,	351	.llseek = default_llseek,
302	};	352	};
303		353
304	// Note: Operates only on runlist 0 (Compute/Graphics)	354	// Tested working on Pascal (gp106) through Ada (ad102)
305	ssize_t switch_to_tsg_file_write(struct file f, const char __user buffer,	355	ssize_t switch_to_tsg_file_write(struct file f, const char __user buffer,
306	size_t count, loff_t *off) {	356	size_t count, loff_t *off) {
307	uint32_t target_tsgid;	357	uint32_t target_tsgid, target_runlist, channel_regs_base;
308	struct gv100_runlist_chan* chan;	358	struct gv100_runlist_chan* chan;
309	channel_ctrl_t chan_ctl;	359	channel_ctrl_t chan_ctl;
		360	channel_ctrl_ga100_t chan_ctl_ga100;
310	struct runlist_iter rl_iter;	361	struct runlist_iter rl_iter;
311	int err;
312	loff_t pos = 0;	362	loff_t pos = 0;
313	struct nvdebug_state *g = &g_nvdebug_state[file2gpuidx(f)];	363	struct nvdebug_state *g = &g_nvdebug_state[file2parentgpuidx(f)];
314	// Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec	364	// Passing 0 as the base to kstrtou32 indicates autodetect hex/octal/dec
315	err = kstrtou32_from_user(buffer, count, 0, &target_tsgid);	365	int err = kstrtou32_from_user(buffer, count, 0, &target_tsgid);
316	if (err)	366	if (err)
317	return err;	367	return err;
318		368
319	if (target_tsgid > MAX_TSGID)	369	if (target_tsgid > MAX_TSGID)
320	return -ERANGE;	370	return -ERANGE;
321		371
322	err = get_runlist_iter(g, 0, &rl_iter);	372	// (Ab)use the PDE_DATA field for the runlist ID
323	if (err)	373	target_runlist = file2gpuidx(f);
		374
		375	if ((err = get_runlist_iter(g, target_runlist, &rl_iter)))
324	return err;	376	return err;
325		377
		378	// On Ampere, TSG and Channel IDs are only unique per-runlist, so we need
		379	// to pull the per-runlist copy of Channel RAM.
		380	if (g->chip_id >= NV_CHIP_ID_AMPERE) {
		381	uint32_t runlist_regs_base;
		382	runlist_channel_config_t chan_config;
		383	if ((err = get_runlist_ram(g, target_runlist, &runlist_regs_base)))
		384	return err;
		385	// Channel RAM is subsidiary to Runlist RAM (ie. per-runlist) on Ampere
		386	if ((chan_config.raw = nvdebug_readl(g, runlist_regs_base + NV_RUNLIST_CHANNEL_CONFIG_GA100)) == -1)
		387	return -EIO;
		388	channel_regs_base = (uint32_t)chan_config.bar0_offset << 4;
		389	}
		390
326	// Iterate through all TSGs	391	// Iterate through all TSGs
327	while (pos < rl_iter.len) {	392	while (pos < rl_iter.len) {
328	if (tsgid(g, rl_iter.curr_entry) == target_tsgid) {	393	bool enable = false;
329	// Enable channels of target TSG	394	if (tsgid(g, rl_iter.curr_entry) == target_tsgid)
330	for_chan_in_tsg(g, chan, rl_iter.curr_entry) {	395	enable = true;
331	chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid));	396
332	chan_ctl.enable_set = true;	397	// Either enable or disable all channels of each TSG, dependent on if
333	nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw);	398	// they are contained within the target TSG or not.
334	}	399	for_chan_in_tsg(g, chan, rl_iter.curr_entry) {
335	} else {	400	if (g->chip_id < NV_CHIP_ID_AMPERE) {
336	// XXX: Fix for bare channels. Maybe a "for_chan_until_tsg" macro?	401	// Read, update, write for PCCSR
337	// Disable all other channels	402	if ((chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chid(g, chan)))) == -1)
338	// (This is how the Jetson nvgpu driver disables TSGs)	403	return -EIO;
339	for_chan_in_tsg(g, chan, rl_iter.curr_entry) {	404	if (enable)
340	chan_ctl.raw = nvdebug_readq(g, NV_PCCSR_CHANNEL_INST(chan->chid));	405	chan_ctl.enable_set = true;
341	chan_ctl.enable_clear = true;	406	else
342	nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chan->chid), chan_ctl.raw);	407	chan_ctl.enable_clear = true;
		408	nvdebug_writeq(g, NV_PCCSR_CHANNEL_INST(chid(g, chan)), chan_ctl.raw);
		409	} else {
		410	// Writing a 0 does nothing on Ampere+, so we can just write
		411	chan_ctl_ga100.raw = 0;
		412	chan_ctl_ga100.is_write_one_clears_bits = !enable;
		413	chan_ctl_ga100.enable = true;
		414	nvdebug_writel(g, channel_regs_base + sizeof(chan_ctl_ga100) * chid(g, chan), chan_ctl_ga100.raw);
343	}	415	}
344	}	416	}
345	pos += 1 + tsg_length(g, rl_iter.curr_entry);	417	pos += 1 + tsg_length(g, rl_iter.curr_entry);
346	rl_iter.curr_entry = next_tsg(g, rl_iter.curr_entry);	418	rl_iter.curr_entry = next_tsg(g, rl_iter.curr_entry);
		419
		420	// TODO: Fix the above for bare channels. Add "for_chan_until_tsg"?
347	}	421	}
348	// Trigger a runlist-level preempt to switch to `target_tsgid`	422
349	if ((err = preempt_runlist(g, 0)))	423	// Resubmit the runlist to ensure that changes to channel enablement are
		424	// picked up on Turing+ GPUs (channel enablements may not be otherwise).
		425	if (g->chip_id >= NV_CHIP_ID_TURING)
		426	if ((err = resubmit_runlist(g, target_runlist)))
		427	return err;
		428
		429	// Trigger a runlist-level preempt to stop whatever was running, triggering
		430	// the runlist scheduler to select and run the next-enabled channel.
		431	if ((err = preempt_runlist(g, target_runlist)))
350	return err;	432	return err;
351		433
352	return count;	434	return count;