From 2a58d3c27b45ca9d0d9dc2136377b7a41b9ed82d Mon Sep 17 00:00:00 2001
From: Aingara Paramakuru <aparamakuru@nvidia.com>
Date: Mon, 22 Feb 2016 12:35:49 -0500
Subject: gpu: nvgpu: improve channel interleave support

Previously, only "high" priority bare channels were interleaved
between all other bare channels and TSGs. This patch decouples
priority from interleaving and introduces 3 levels for interleaving
a bare channel or TSG: high, medium, and low. The levels define
the number of times a channel or TSG will appear on a runlist (see
nvgpu.h for details).

By default, all bare channels and TSGs are set to interleave level
low. Userspace can then request the interleave level to be increased
via the CHANNEL_SET_RUNLIST_INTERLEAVE ioctl (TSG-specific ioctl will
be added later).

As timeslice settings will soon be coming from userspace, the default
timeslice for "high" priority channels has been restored.

JIRA VFND-1302
Bug 1729664

Change-Id: I178bc1cecda23f5002fec6d791e6dcaedfa05c0c
Signed-off-by: Aingara Paramakuru <aparamakuru@nvidia.com>
Reviewed-on: http://git-master/r/1014962
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/channel_gk20a.c |  85 +++++----
 drivers/gpu/nvgpu/gk20a/channel_gk20a.h |   3 +-
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.c    | 300 ++++++++++++++++----------------
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.h    |   8 +-
 drivers/gpu/nvgpu/gk20a/gk20a.c         |  16 +-
 drivers/gpu/nvgpu/gk20a/gk20a.h         |  12 +-
 drivers/gpu/nvgpu/gk20a/tsg_gk20a.c     |   1 +
 drivers/gpu/nvgpu/gk20a/tsg_gk20a.h     |   2 +
 drivers/gpu/nvgpu/gm20b/fifo_gm20b.c    |   3 +-
 drivers/gpu/nvgpu/vgpu/fifo_vgpu.c      |  11 +-
 10 files changed, 216 insertions(+), 225 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 2c2850c6..6eecebf5 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -177,7 +177,7 @@ int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
 }
 
 static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
-				u32 timeslice_period, bool interleave)
+				u32 timeslice_period)
 {
 	void *inst_ptr;
 	int shift = 0, value = 0;
@@ -205,30 +205,6 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
 		gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
 		ccsr_channel_enable_set_true_f());
 
-	if (c->interleave != interleave) {
-		mutex_lock(&c->g->interleave_lock);
-		c->interleave = interleave;
-		if (interleave)
-			if (c->g->num_interleaved_channels >=
-					MAX_INTERLEAVED_CHANNELS) {
-				gk20a_err(dev_from_gk20a(c->g),
-					"Change of priority would exceed runlist length, only changing timeslice\n");
-				c->interleave = false;
-			} else
-				c->g->num_interleaved_channels += 1;
-		else
-			c->g->num_interleaved_channels -= 1;
-
-		mutex_unlock(&c->g->interleave_lock);
-		gk20a_dbg_info("Set channel %d to interleave %d",
-			c->hw_chid, c->interleave);
-
-		gk20a_fifo_set_channel_priority(
-				c->g, 0, c->hw_chid, c->interleave);
-		c->g->ops.fifo.update_runlist(
-				c->g, 0, ~0, true, false);
-	}
-
 	return 0;
 }
 
@@ -711,6 +687,32 @@ static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch,
 	return 0;
 }
 
+static int gk20a_channel_set_runlist_interleave(struct channel_gk20a *ch,
+						u32 level)
+{
+	struct gk20a *g = ch->g;
+	int ret;
+
+	if (gk20a_is_channel_marked_as_tsg(ch)) {
+		gk20a_err(dev_from_gk20a(g), "invalid operation for TSG!\n");
+		return -EINVAL;
+	}
+
+	switch (level) {
+	case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW:
+	case NVGPU_RUNLIST_INTERLEAVE_LEVEL_MEDIUM:
+	case NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH:
+		ret = g->ops.fifo.set_runlist_interleave(g, ch->hw_chid,
+							false, 0, level);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret ? ret : g->ops.fifo.update_runlist(g, 0, ~0, true, true);
+}
+
 static int gk20a_init_error_notifier(struct channel_gk20a *ch,
 		struct nvgpu_set_error_notifier *args)
 {
@@ -899,17 +901,6 @@ static void gk20a_free_channel(struct channel_gk20a *ch)
 	}
 	mutex_unlock(&f->deferred_reset_mutex);
 
-	if (ch->interleave) {
-		ch->interleave = false;
-		gk20a_fifo_set_channel_priority(
-				ch->g, 0, ch->hw_chid, ch->interleave);
-
-		mutex_lock(&f->g->interleave_lock);
-		WARN_ON(f->g->num_interleaved_channels == 0);
-		f->g->num_interleaved_channels -= 1;
-		mutex_unlock(&f->g->interleave_lock);
-	}
-
 	if (!ch->bound)
 		goto release;
 
@@ -1154,11 +1145,8 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
 	ch->has_timedout = false;
 	ch->wdt_enabled = true;
 	ch->obj_class = 0;
-	ch->interleave = false;
 	ch->clean_up.scheduled = false;
-	gk20a_fifo_set_channel_priority(
-			ch->g, 0, ch->hw_chid, ch->interleave);
-
+	ch->interleave_level = NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW;
 
 	/* The channel is *not* runnable at this point. It still needs to have
 	 * an address space bound and allocate a gpfifo and grctx. */
@@ -2613,7 +2601,6 @@ unsigned int gk20a_channel_poll(struct file *filep, poll_table *wait)
 int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority)
 {
 	u32 timeslice_timeout;
-	bool interleave = false;
 
 	if (gk20a_is_channel_marked_as_tsg(ch)) {
 		gk20a_err(dev_from_gk20a(ch->g),
@@ -2630,8 +2617,6 @@ int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority)
 		timeslice_timeout = ch->g->timeslice_medium_priority_us;
 		break;
 	case NVGPU_PRIORITY_HIGH:
-		if (ch->g->interleave_high_priority)
-			interleave = true;
 		timeslice_timeout = ch->g->timeslice_high_priority_us;
 		break;
 	default:
@@ -2640,7 +2625,7 @@ int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority)
 	}
 
 	return channel_gk20a_set_schedule_params(ch,
-			timeslice_timeout, interleave);
+			timeslice_timeout);
 }
 
 static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
@@ -3045,6 +3030,18 @@ long gk20a_channel_ioctl(struct file *filp,
 		err = gk20a_channel_set_wdt_status(ch,
 				(struct nvgpu_channel_wdt_args *)buf);
 		break;
+	case NVGPU_IOCTL_CHANNEL_SET_RUNLIST_INTERLEAVE:
+		err = gk20a_busy(dev);
+		if (err) {
+			dev_err(&dev->dev,
+				"%s: failed to host gk20a for ioctl cmd: 0x%x",
+				__func__, cmd);
+			break;
+		}
+		err = gk20a_channel_set_runlist_interleave(ch,
+			((struct nvgpu_runlist_interleave_args *)buf)->level);
+		gk20a_idle(dev);
+		break;
 	default:
 		dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
 		err = -ENOTTY;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 4aea9d19..3f5a657a 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -188,8 +188,7 @@ struct channel_gk20a {
 	spinlock_t update_fn_lock; /* make access to the two above atomic */
 	struct work_struct update_fn_work;
 
-	/* true if channel is interleaved with lower priority channels */
-	bool interleave;
+	u32 interleave_level;
 };
 
 static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch)
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 769960af..28cc3086 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -303,12 +303,6 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
 	if (!runlist->active_tsgs)
 		goto clean_up_runlist_info;
 
-	runlist->high_prio_channels =
-		kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
-			GFP_KERNEL);
-	if (!runlist->high_prio_channels)
-		goto clean_up_runlist_info;
-
 	runlist_size  = ram_rl_entry_size_v() * f->num_runlist_entries;
 	for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
 		int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]);
@@ -337,9 +331,6 @@ clean_up_runlist_info:
 	kfree(runlist->active_tsgs);
 	runlist->active_tsgs = NULL;
 
-	kfree(runlist->high_prio_channels);
-	runlist->high_prio_channels = NULL;
-
 	kfree(f->runlist_info);
 	f->runlist_info = NULL;
 
@@ -2162,32 +2153,153 @@ static inline u32 gk20a_get_tsg_runlist_entry_0(struct tsg_gk20a *tsg)
 	return runlist_entry_0;
 }
 
-/* add all active high priority channels */
-static inline u32 gk20a_fifo_runlist_add_high_prio_entries(
-		struct fifo_gk20a *f,
-		struct fifo_runlist_info_gk20a *runlist,
-		u32 *runlist_entry)
+/* recursively construct a runlist with interleaved bare channels and TSGs */
+static u32 *gk20a_runlist_construct_locked(struct fifo_gk20a *f,
+				struct fifo_runlist_info_gk20a *runlist,
+				u32 cur_level,
+				u32 *runlist_entry,
+				bool interleave_enabled,
+				bool prev_empty,
+				u32 *entries_left)
 {
-	struct channel_gk20a *ch = NULL;
-	unsigned long high_prio_chid;
-	u32 count = 0;
+	bool last_level = cur_level == NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH;
+	struct channel_gk20a *ch;
+	bool skip_next = false;
+	u32 chid, tsgid, count = 0;
+
+	gk20a_dbg_fn("");
 
-	for_each_set_bit(high_prio_chid,
-			runlist->high_prio_channels, f->num_channels) {
-		ch = &f->channel[high_prio_chid];
+	/* for each bare channel, CH, on this level, insert all higher-level
+	   channels and TSGs before inserting CH. */
+	for_each_set_bit(chid, runlist->active_channels, f->num_channels) {
+		ch = &f->channel[chid];
+
+		if (ch->interleave_level != cur_level)
+			continue;
+
+		if (gk20a_is_channel_marked_as_tsg(ch))
+			continue;
+
+		if (!last_level && !skip_next) {
+			runlist_entry = gk20a_runlist_construct_locked(f,
+							runlist,
+							cur_level + 1,
+							runlist_entry,
+							interleave_enabled,
+							false,
+							entries_left);
+			/* if interleaving is disabled, higher-level channels
+			   and TSGs only need to be inserted once */
+			if (!interleave_enabled)
+				skip_next = true;
+		}
+
+		if (!(*entries_left))
+			return NULL;
+
+		gk20a_dbg_info("add channel %d to runlist", chid);
+		runlist_entry[0] = ram_rl_entry_chid_f(chid);
+		runlist_entry[1] = 0;
+		runlist_entry += 2;
+		count++;
+		(*entries_left)--;
+	}
 
-		if (!gk20a_is_channel_marked_as_tsg(ch) &&
-		     test_bit(high_prio_chid, runlist->active_channels) == 1) {
-			gk20a_dbg_info("add high prio channel %lu to runlist",
-					high_prio_chid);
-			runlist_entry[0] = ram_rl_entry_chid_f(high_prio_chid);
+	/* for each TSG, T, on this level, insert all higher-level channels
+	   and TSGs before inserting T. */
+	for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) {
+		struct tsg_gk20a *tsg = &f->tsg[tsgid];
+
+		if (tsg->interleave_level != cur_level)
+			continue;
+
+		if (!last_level && !skip_next) {
+			runlist_entry = gk20a_runlist_construct_locked(f,
+							runlist,
+							cur_level + 1,
+							runlist_entry,
+							interleave_enabled,
+							false,
+							entries_left);
+			if (!interleave_enabled)
+				skip_next = true;
+		}
+
+		if (!(*entries_left))
+			return NULL;
+
+		/* add TSG entry */
+		gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid);
+		runlist_entry[0] = gk20a_get_tsg_runlist_entry_0(tsg);
+		runlist_entry[1] = 0;
+		runlist_entry += 2;
+		count++;
+		(*entries_left)--;
+
+		mutex_lock(&tsg->ch_list_lock);
+		/* add runnable channels bound to this TSG */
+		list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
+			if (!test_bit(ch->hw_chid,
+				      runlist->active_channels))
+				continue;
+
+			if (!(*entries_left)) {
+				mutex_unlock(&tsg->ch_list_lock);
+				return NULL;
+			}
+
+			gk20a_dbg_info("add channel %d to runlist",
+				ch->hw_chid);
+			runlist_entry[0] = ram_rl_entry_chid_f(ch->hw_chid);
 			runlist_entry[1] = 0;
 			runlist_entry += 2;
 			count++;
+			(*entries_left)--;
 		}
+		mutex_unlock(&tsg->ch_list_lock);
 	}
 
-	return count;
+	/* append entries from higher level if this level is empty */
+	if (!count && !last_level)
+		runlist_entry = gk20a_runlist_construct_locked(f,
+							runlist,
+							cur_level + 1,
+							runlist_entry,
+							interleave_enabled,
+							true,
+							entries_left);
+
+	/*
+	 * if previous and this level have entries, append
+	 * entries from higher level.
+	 *
+	 * ex. dropping from MEDIUM to LOW, need to insert HIGH
+	 */
+	if (interleave_enabled && count && !prev_empty && !last_level)
+		runlist_entry = gk20a_runlist_construct_locked(f,
+							runlist,
+							cur_level + 1,
+							runlist_entry,
+							interleave_enabled,
+							false,
+							entries_left);
+	return runlist_entry;
+}
+
+int gk20a_fifo_set_runlist_interleave(struct gk20a *g,
+				u32 id,
+				bool is_tsg,
+				u32 runlist_id,
+				u32 new_level)
+{
+	gk20a_dbg_fn("");
+
+	if (is_tsg)
+		g->fifo.tsg[id].interleave_level = new_level;
+	else
+		g->fifo.channel[id].interleave_level = new_level;
+
+	return 0;
 }
 
 static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
@@ -2198,14 +2310,11 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
 	struct fifo_gk20a *f = &g->fifo;
 	struct fifo_runlist_info_gk20a *runlist = NULL;
 	u32 *runlist_entry_base = NULL;
-	u32 *runlist_entry = NULL;
 	u64 runlist_iova;
 	u32 old_buf, new_buf;
-	u32 chid, tsgid;
 	struct channel_gk20a *ch = NULL;
 	struct tsg_gk20a *tsg = NULL;
 	u32 count = 0;
-	u32 count_channels_in_tsg;
 	runlist = &f->runlist_info[runlist_id];
 
 	/* valid channel, add/remove it from active list.
@@ -2254,91 +2363,23 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
 
 	if (hw_chid != ~0 || /* add/remove a valid channel */
 	    add /* resume to add all channels back */) {
-		runlist_entry = runlist_entry_base;
-
-		/* Runlist manipulation:
-		   Insert an entry of all high priority channels inbetween
-		   all lower priority channels. This ensure that the maximum
-		   delay a runnable high priority channel has to wait is one
-		   medium timeslice + any context switching overhead +
-		   wait on other high priority channels.
-		   add non-TSG channels first */
-		for_each_set_bit(chid,
-			runlist->active_channels, f->num_channels) {
-			ch = &f->channel[chid];
-
-			if (!gk20a_is_channel_marked_as_tsg(ch) &&
-				!ch->interleave) {
-				u32 added;
-
-				gk20a_dbg_info("add normal prio channel %d to runlist",
-					chid);
-				runlist_entry[0] = ram_rl_entry_chid_f(chid);
-				runlist_entry[1] = 0;
-				runlist_entry += 2;
-				count++;
-
-				added =	gk20a_fifo_runlist_add_high_prio_entries(
-						f,
-						runlist,
-						runlist_entry);
-				count += added;
-				runlist_entry += 2 * added;
-			}
-		}
+		u32 max_entries = f->num_runlist_entries;
+		u32 *runlist_end;
 
-		/* if there were no lower priority channels, then just
-		 * add the high priority channels once. */
-		if (count == 0) {
-			count =	gk20a_fifo_runlist_add_high_prio_entries(
-					f,
-					runlist,
-					runlist_entry);
-			runlist_entry += 2 * count;
+		runlist_end = gk20a_runlist_construct_locked(f,
+						runlist,
+						0,
+						runlist_entry_base,
+						g->runlist_interleave,
+						true,
+						&max_entries);
+		if (!runlist_end) {
+			ret = -E2BIG;
+			goto clean_up;
 		}
 
-		/* now add TSG entries and channels bound to TSG */
-		mutex_lock(&f->tsg_inuse_mutex);
-		for_each_set_bit(tsgid,
-				runlist->active_tsgs, f->num_channels) {
-			u32 added;
-			tsg = &f->tsg[tsgid];
-			/* add TSG entry */
-			gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid);
-			runlist_entry[0] = gk20a_get_tsg_runlist_entry_0(tsg);
-			runlist_entry[1] = 0;
-			runlist_entry += 2;
-			count++;
-
-			/* add runnable channels bound to this TSG */
-			count_channels_in_tsg = 0;
-			mutex_lock(&tsg->ch_list_lock);
-			list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
-				if (!test_bit(ch->hw_chid,
-						runlist->active_channels))
-					continue;
-				gk20a_dbg_info("add channel %d to runlist",
-					ch->hw_chid);
-				runlist_entry[0] =
-					ram_rl_entry_chid_f(ch->hw_chid);
-				runlist_entry[1] = 0;
-				runlist_entry += 2;
-				count++;
-				count_channels_in_tsg++;
-			}
-			mutex_unlock(&tsg->ch_list_lock);
-
-			WARN_ON(tsg->num_active_channels !=
-				count_channels_in_tsg);
-
-			added = gk20a_fifo_runlist_add_high_prio_entries(
-					f,
-					runlist,
-					runlist_entry);
-			count += added;
-			runlist_entry += 2 * added;
-		}
-		mutex_unlock(&f->tsg_inuse_mutex);
+		count = (runlist_end - runlist_entry_base) / 2;
+		WARN_ON(count > f->num_runlist_entries);
 	} else	/* suspend to remove all channels */
 		count = 0;
 
@@ -2493,42 +2534,6 @@ u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g)
 	return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f();
 }
 
-int gk20a_fifo_set_channel_priority(
-		struct gk20a *g,
-		u32 runlist_id,
-		u32 hw_chid,
-		bool interleave)
-{
-	struct fifo_runlist_info_gk20a *runlist = NULL;
-	struct fifo_gk20a *f = &g->fifo;
-	struct channel_gk20a *ch = NULL;
-
-	if (hw_chid >= f->num_channels)
-		return -EINVAL;
-
-	if (runlist_id >= f->max_runlists)
-		return -EINVAL;
-
-	ch = &f->channel[hw_chid];
-
-	gk20a_dbg_fn("");
-
-	runlist = &f->runlist_info[runlist_id];
-
-	mutex_lock(&runlist->mutex);
-
-	if (ch->interleave)
-		set_bit(hw_chid, runlist->high_prio_channels);
-	else
-		clear_bit(hw_chid, runlist->high_prio_channels);
-
-	gk20a_dbg_fn("done");
-
-	mutex_unlock(&runlist->mutex);
-
-	return 0;
-}
-
 struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g,
 		u32 hw_chid)
 {
@@ -2545,4 +2550,5 @@ void gk20a_init_fifo(struct gpu_ops *gops)
 	gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle;
 	gops->fifo.get_num_fifos = gk20a_fifo_get_num_fifos;
 	gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature;
+	gops->fifo.set_runlist_interleave = gk20a_fifo_set_runlist_interleave;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index ee4e7328..0979bf2b 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -31,7 +31,6 @@
 struct fifo_runlist_info_gk20a {
 	unsigned long *active_channels;
 	unsigned long *active_tsgs;
-	unsigned long *high_prio_channels;
 	/* Each engine has its own SW and HW runlist buffer.*/
 	struct mem_desc mem[MAX_RUNLIST_BUFFERS];
 	u32  cur_buffer;
@@ -184,8 +183,6 @@ void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g,
 int gk20a_fifo_wait_engine_idle(struct gk20a *g);
 u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g);
 u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g);
-int gk20a_fifo_set_channel_priority(struct gk20a *g, u32 runlist_id,
-		u32 hw_chid, bool interleave);
 u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g,
 		int *__id, bool *__is_tsg);
 bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g,
@@ -198,4 +195,9 @@ struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g,
 		u32 hw_chid);
 
 void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg);
+int gk20a_fifo_set_runlist_interleave(struct gk20a *g,
+				u32 id,
+				bool is_tsg,
+				u32 runlist_id,
+				u32 new_level);
 #endif /*__GR_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index fa2c61e1..0fee58e8 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -672,9 +672,6 @@ static int gk20a_init_support(struct platform_device *dev)
 	mutex_init(&g->ch_wdt_lock);
 	mutex_init(&g->poweroff_lock);
 
-	mutex_init(&g->interleave_lock);
-	g->num_interleaved_channels = 0;
-
 	g->remove_support = gk20a_remove_support;
 	return 0;
 
@@ -1439,14 +1436,11 @@ static int gk20a_probe(struct platform_device *dev)
 	if (tegra_platform_is_silicon())
 		gk20a->timeouts_enabled = true;
 
-	gk20a->interleave_high_priority = true;
+	gk20a->runlist_interleave = true;
 
 	gk20a->timeslice_low_priority_us = 1300;
 	gk20a->timeslice_medium_priority_us = 2600;
-	if (gk20a->interleave_high_priority)
-		gk20a->timeslice_high_priority_us = 3000;
-	else
-		gk20a->timeslice_high_priority_us = 5200;
+	gk20a->timeslice_high_priority_us = 5200;
 
 	/* Set up initial power settings. For non-slicon platforms, disable *
 	 * power features and for silicon platforms, read from platform data */
@@ -1527,11 +1521,11 @@ static int gk20a_probe(struct platform_device *dev)
 					platform->debugfs,
 					&gk20a->timeslice_high_priority_us);
 
-	gk20a->debugfs_interleave_high_priority =
-			debugfs_create_bool("interleave_high_priority",
+	gk20a->debugfs_runlist_interleave =
+			debugfs_create_bool("runlist_interleave",
 					S_IRUGO|S_IWUSR,
 					platform->debugfs,
-					&gk20a->interleave_high_priority);
+					&gk20a->runlist_interleave);
 
 	gr_gk20a_debugfs_init(gk20a);
 	gk20a_pmu_debugfs_init(dev);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index afdbeef7..faccf04a 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -54,8 +54,6 @@ struct acr_gm20b;
     32 ns is the resolution of ptimer. */
 #define PTIMER_REF_FREQ_HZ                      31250000
 
-#define MAX_INTERLEAVED_CHANNELS                32
-
 struct cooling_device_gk20a {
 	struct thermal_cooling_device *gk20a_cooling_dev;
 	unsigned int gk20a_freq_state;
@@ -268,6 +266,9 @@ struct gpu_ops {
 		u32 (*get_num_fifos)(struct gk20a *g);
 		u32 (*get_pbdma_signature)(struct gk20a *g);
 		int (*channel_set_priority)(struct channel_gk20a *ch, u32 priority);
+		int (*set_runlist_interleave)(struct gk20a *g, u32 id,
+					bool is_tsg, u32 runlist_id,
+					u32 new_level);
 	} fifo;
 	struct pmu_v {
 		/*used for change of enum zbc update cmd id from ver 0 to ver1*/
@@ -536,10 +537,7 @@ struct gk20a {
 	u32 timeslice_low_priority_us;
 	u32 timeslice_medium_priority_us;
 	u32 timeslice_high_priority_us;
-	u32 interleave_high_priority;
-
-	struct mutex interleave_lock;
-	u32 num_interleaved_channels;
+	u32 runlist_interleave;
 
 	bool slcg_enabled;
 	bool blcg_enabled;
@@ -564,7 +562,7 @@ struct gk20a {
 	struct dentry *debugfs_timeslice_low_priority_us;
 	struct dentry *debugfs_timeslice_medium_priority_us;
 	struct dentry *debugfs_timeslice_high_priority_us;
-	struct dentry *debugfs_interleave_high_priority;
+	struct dentry *debugfs_runlist_interleave;
 
 #endif
 	struct gk20a_ctxsw_ucode_info ctxsw_ucode_info;
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
index 4421744c..b41cca08 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
@@ -228,6 +228,7 @@ int gk20a_tsg_open(struct gk20a *g, struct file *filp)
 
 	tsg->tsg_gr_ctx = NULL;
 	tsg->vm = NULL;
+	tsg->interleave_level = NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW;
 
 	filp->private_data = tsg;
 
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
index bcc4d0c4..7e0a75d1 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
@@ -49,6 +49,8 @@ struct tsg_gk20a {
 	struct gr_ctx_desc *tsg_gr_ctx;
 
 	struct vm_gk20a *vm;
+
+	u32 interleave_level;
 };
 
 int gk20a_enable_tsg(struct tsg_gk20a *tsg);
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
index d1deffb9..3fded03c 100644
--- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
@@ -1,7 +1,7 @@
 /*
  * GM20B Fifo
  *
- * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -121,4 +121,5 @@ void gm20b_init_fifo(struct gpu_ops *gops)
 	gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle;
 	gops->fifo.get_num_fifos = gm20b_fifo_get_num_fifos;
 	gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature;
+	gops->fifo.set_runlist_interleave = gk20a_fifo_set_runlist_interleave;
 }
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
index e776e97c..b4bb7f38 100644
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -1,7 +1,7 @@
 /*
  * Virtualized GPU Fifo
  *
- * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -194,12 +194,6 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
 	if (!runlist->active_channels)
 		goto clean_up_runlist_info;
 
-	runlist->high_prio_channels =
-		kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
-			GFP_KERNEL);
-	if (!runlist->high_prio_channels)
-		goto clean_up_runlist_info;
-
 	runlist_size  = sizeof(u16) * f->num_channels;
 	for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
 		int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]);
@@ -222,9 +216,6 @@ clean_up_runlist:
 		gk20a_gmmu_free(g, &runlist->mem[i]);
 
 clean_up_runlist_info:
-	kfree(runlist->high_prio_channels);
-	runlist->high_prio_channels = NULL;
-
 	kfree(runlist->active_channels);
 	runlist->active_channels = NULL;
 
-- 
cgit v1.2.2