gpu: nvgpu: add high priority channel interleave

Interleave all high priority channels between all other channels. This reduces the latency for high priority work when there are a lot of lower priority work present, imposing an upper bound on the latency. Change the default high priority timeslice from 5.2ms to 3.0 in the process, to prevent long running high priority apps from hogging the GPU too much. Introduce a new debugfs node to enable/disable high priority channel interleaving. It is currently enabled by default. Adds new runlist length max register, used for allocating suitable sized runlist. Limit the number of interleaved channels to 32. This change reduces the maximum time a lower priority job is running (one timeslice) before we check that high priority jobs are running. Tested with gles2_context_priority (still passes) Basic sanity testing is done with graphics_submit (one app is high priority) Also more functional testing using lots of parallel runs with: NVRM_GPU_CHANNEL_PRIORITY=3 ./gles2_expensive_draw –drawsperframe 20000 –triangles 50 –runtime 30 –finish plus multiple: NVRM_GPU_CHANNEL_PRIORITY=2 ./gles2_expensive_draw –drawsperframe 20000 –triangles 50 –runtime 30 -finish Previous to this change, the relative performance between high priority work and normal priority work comes down to timeslice value. This means that when there are many low priority channels, the high priority work will still drop quite a lot. But with this change, the high priority work will roughly get about half the entire GPU time, meaning that after the initial lower performance, it is less likely to get lower in performance due to more apps running on the system. This change makes a large step towards real priority levels. It is not perfect and there are no guarantees on anything, but it is a step forwards without any additional CPU overhead or other complications. It will also serve as a baseline to judge other algorithms against. Support for priorities with TSG is future work. Support for interleave mid + high priority channels, instead of just high, is also future work. Bug 1419900 Change-Id: I0f7d0ce83b6598fe86000577d72e14d312fdad98 Signed-off-by: Peter Pipkorn <ppipkorn@nvidia.com> Reviewed-on: http://git-master/r/805961 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Peter Pipkorn <ppipkorn@nvidia.com> 2015-09-28 07:49:53 -0400
committer: Terje Bergstrom <tbergstrom@nvidia.com> 2016-01-11 12:04:01 -0500
commit: 2b064ce65e0035a860d1bc3bcccfcf8aac1f31c7 (patch)
tree: 1f20c0e608efcca51ef321d308df8e8cb059ad8c /drivers/gpu/nvgpu/gk20a/channel_gk20a.c
parent: a9c6f595399074e88c16f3557e5acb29db1d52d5 (diff)
1 files changed, 46 insertions, 4 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index a5c2efb3..0421c0f6 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -175,7 +175,7 @@ int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
 }
 static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
-                                u32 timeslice_period)
+                                u32 timeslice_period, bool interleave)
 {
        void *inst_ptr;
        int shift = 0, value = 0;
@@ -203,6 +203,30 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
                gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
                ccsr_channel_enable_set_true_f());
+        if (c->interleave != interleave) {
+                mutex_lock(&c->g->interleave_lock);
+                c->interleave = interleave;
+                if (interleave)
+                        if (c->g->num_interleaved_channels >=
+                                        MAX_INTERLEAVED_CHANNELS) {
+                                gk20a_err(dev_from_gk20a(c->g),
+                                        "Change of priority would exceed runlist length, only changing timeslice\n");
+                                c->interleave = false;
+                        } else
+                                c->g->num_interleaved_channels += 1;
+                else
+                        c->g->num_interleaved_channels -= 1;
+                mutex_unlock(&c->g->interleave_lock);
+                gk20a_dbg_info("Set channel %d to interleave %d",
+                        c->hw_chid, c->interleave);
+                gk20a_fifo_set_channel_priority(
+                                c->g, 0, c->hw_chid, c->interleave);
+                c->g->ops.fifo.update_runlist(
+                                c->g, 0, ~0, true, false);
+        }
        return 0;
 }
@@ -836,6 +860,17 @@ static void gk20a_free_channel(struct channel_gk20a *ch)
        }
        mutex_unlock(&f->deferred_reset_mutex);
+        if (ch->interleave) {
+                ch->interleave = false;
+                gk20a_fifo_set_channel_priority(
+                                ch->g, 0, ch->hw_chid, ch->interleave);
+                mutex_lock(&f->g->interleave_lock);
+                WARN_ON(f->g->num_interleaved_channels == 0);
+                f->g->num_interleaved_channels -= 1;
+                mutex_unlock(&f->g->interleave_lock);
+        }
        if (!ch->bound)
                goto release;
@@ -1079,6 +1114,10 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
        ch->timeout_debug_dump = true;
        ch->has_timedout = false;
        ch->obj_class = 0;
+        ch->interleave = false;
+        gk20a_fifo_set_channel_priority(
+                        ch->g, 0, ch->hw_chid, ch->interleave);
        /* The channel is *not* runnable at this point. It still needs to have
         * an address space bound and allocate a gpfifo and grctx. */
@@ -2458,6 +2497,7 @@ static int gk20a_channel_set_priority(struct channel_gk20a *ch,
                u32 priority)
 {
        u32 timeslice_timeout;
+        bool interleave = false;
        if (gk20a_is_channel_marked_as_tsg(ch)) {
                gk20a_err(dev_from_gk20a(ch->g),
@@ -2474,15 +2514,17 @@ static int gk20a_channel_set_priority(struct channel_gk20a *ch,
                timeslice_timeout = ch->g->timeslice_medium_priority_us;
                break;
        case NVGPU_PRIORITY_HIGH:
+                if (ch->g->interleave_high_priority)
+                        interleave = true;
                timeslice_timeout = ch->g->timeslice_high_priority_us;
                break;
        default:
                pr_err("Unsupported priority");
                return -EINVAL;
        }
-        channel_gk20a_set_schedule_params(ch,
-                        timeslice_timeout);
+        return channel_gk20a_set_schedule_params(ch,
-        return 0;
+                        timeslice_timeout, interleave);
 }
 static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
author	Peter Pipkorn <ppipkorn@nvidia.com>	2015-09-28 07:49:53 -0400
committer	Terje Bergstrom <tbergstrom@nvidia.com>	2016-01-11 12:04:01 -0500
commit	2b064ce65e0035a860d1bc3bcccfcf8aac1f31c7 (patch)
tree	1f20c0e608efcca51ef321d308df8e8cb059ad8c /drivers/gpu/nvgpu/gk20a/channel_gk20a.c
parent	a9c6f595399074e88c16f3557e5acb29db1d52d5 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index a5c2efb3..0421c0f6 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -175,7 +175,7 @@ int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
175	}	175	}
176		176
177	static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,	177	static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
178	u32 timeslice_period)	178	u32 timeslice_period, bool interleave)
179	{	179	{
180	void *inst_ptr;	180	void *inst_ptr;
181	int shift = 0, value = 0;	181	int shift = 0, value = 0;
@@ -203,6 +203,30 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
203	gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) \|	203	gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) \|
204	ccsr_channel_enable_set_true_f());	204	ccsr_channel_enable_set_true_f());
205		205
		206	if (c->interleave != interleave) {
		207	mutex_lock(&c->g->interleave_lock);
		208	c->interleave = interleave;
		209	if (interleave)
		210	if (c->g->num_interleaved_channels >=
		211	MAX_INTERLEAVED_CHANNELS) {
		212	gk20a_err(dev_from_gk20a(c->g),
		213	"Change of priority would exceed runlist length, only changing timeslice\n");
		214	c->interleave = false;
		215	} else
		216	c->g->num_interleaved_channels += 1;
		217	else
		218	c->g->num_interleaved_channels -= 1;
		219
		220	mutex_unlock(&c->g->interleave_lock);
		221	gk20a_dbg_info("Set channel %d to interleave %d",
		222	c->hw_chid, c->interleave);
		223
		224	gk20a_fifo_set_channel_priority(
		225	c->g, 0, c->hw_chid, c->interleave);
		226	c->g->ops.fifo.update_runlist(
		227	c->g, 0, ~0, true, false);
		228	}
		229
206	return 0;	230	return 0;
207	}	231	}
208		232
@@ -836,6 +860,17 @@ static void gk20a_free_channel(struct channel_gk20a *ch)
836	}	860	}
837	mutex_unlock(&f->deferred_reset_mutex);	861	mutex_unlock(&f->deferred_reset_mutex);
838		862
		863	if (ch->interleave) {
		864	ch->interleave = false;
		865	gk20a_fifo_set_channel_priority(
		866	ch->g, 0, ch->hw_chid, ch->interleave);
		867
		868	mutex_lock(&f->g->interleave_lock);
		869	WARN_ON(f->g->num_interleaved_channels == 0);
		870	f->g->num_interleaved_channels -= 1;
		871	mutex_unlock(&f->g->interleave_lock);
		872	}
		873
839	if (!ch->bound)	874	if (!ch->bound)
840	goto release;	875	goto release;
841		876
@@ -1079,6 +1114,10 @@ struct channel_gk20a gk20a_open_new_channel(struct gk20a g)
1079	ch->timeout_debug_dump = true;	1114	ch->timeout_debug_dump = true;
1080	ch->has_timedout = false;	1115	ch->has_timedout = false;
1081	ch->obj_class = 0;	1116	ch->obj_class = 0;
		1117	ch->interleave = false;
		1118	gk20a_fifo_set_channel_priority(
		1119	ch->g, 0, ch->hw_chid, ch->interleave);
		1120
1082		1121
1083	/* The channel is not runnable at this point. It still needs to have	1122	/* The channel is not runnable at this point. It still needs to have
1084	* an address space bound and allocate a gpfifo and grctx. */	1123	* an address space bound and allocate a gpfifo and grctx. */
@@ -2458,6 +2497,7 @@ static int gk20a_channel_set_priority(struct channel_gk20a *ch,
2458	u32 priority)	2497	u32 priority)
2459	{	2498	{
2460	u32 timeslice_timeout;	2499	u32 timeslice_timeout;
		2500	bool interleave = false;
2461		2501
2462	if (gk20a_is_channel_marked_as_tsg(ch)) {	2502	if (gk20a_is_channel_marked_as_tsg(ch)) {
2463	gk20a_err(dev_from_gk20a(ch->g),	2503	gk20a_err(dev_from_gk20a(ch->g),
@@ -2474,15 +2514,17 @@ static int gk20a_channel_set_priority(struct channel_gk20a *ch,
2474	timeslice_timeout = ch->g->timeslice_medium_priority_us;	2514	timeslice_timeout = ch->g->timeslice_medium_priority_us;
2475	break;	2515	break;
2476	case NVGPU_PRIORITY_HIGH:	2516	case NVGPU_PRIORITY_HIGH:
		2517	if (ch->g->interleave_high_priority)
		2518	interleave = true;
2477	timeslice_timeout = ch->g->timeslice_high_priority_us;	2519	timeslice_timeout = ch->g->timeslice_high_priority_us;
2478	break;	2520	break;
2479	default:	2521	default:
2480	pr_err("Unsupported priority");	2522	pr_err("Unsupported priority");
2481	return -EINVAL;	2523	return -EINVAL;
2482	}	2524	}
2483	channel_gk20a_set_schedule_params(ch,	2525
2484	timeslice_timeout);	2526	return channel_gk20a_set_schedule_params(ch,
2485	return 0;	2527	timeslice_timeout, interleave);
2486	}	2528	}
2487		2529
2488	static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,	2530	static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,