summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
diff options
context:
space:
mode:
authorPeter Pipkorn <ppipkorn@nvidia.com>2015-09-28 07:49:53 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-01-11 12:04:01 -0500
commit2b064ce65e0035a860d1bc3bcccfcf8aac1f31c7 (patch)
tree1f20c0e608efcca51ef321d308df8e8cb059ad8c /drivers/gpu/nvgpu/gk20a/channel_gk20a.c
parenta9c6f595399074e88c16f3557e5acb29db1d52d5 (diff)
gpu: nvgpu: add high priority channel interleave
Interleave all high priority channels between all other channels. This reduces the latency for high priority work when there are a lot of lower priority work present, imposing an upper bound on the latency. Change the default high priority timeslice from 5.2ms to 3.0 in the process, to prevent long running high priority apps from hogging the GPU too much. Introduce a new debugfs node to enable/disable high priority channel interleaving. It is currently enabled by default. Adds new runlist length max register, used for allocating suitable sized runlist. Limit the number of interleaved channels to 32. This change reduces the maximum time a lower priority job is running (one timeslice) before we check that high priority jobs are running. Tested with gles2_context_priority (still passes) Basic sanity testing is done with graphics_submit (one app is high priority) Also more functional testing using lots of parallel runs with: NVRM_GPU_CHANNEL_PRIORITY=3 ./gles2_expensive_draw –drawsperframe 20000 –triangles 50 –runtime 30 –finish plus multiple: NVRM_GPU_CHANNEL_PRIORITY=2 ./gles2_expensive_draw –drawsperframe 20000 –triangles 50 –runtime 30 -finish Previous to this change, the relative performance between high priority work and normal priority work comes down to timeslice value. This means that when there are many low priority channels, the high priority work will still drop quite a lot. But with this change, the high priority work will roughly get about half the entire GPU time, meaning that after the initial lower performance, it is less likely to get lower in performance due to more apps running on the system. This change makes a large step towards real priority levels. It is not perfect and there are no guarantees on anything, but it is a step forwards without any additional CPU overhead or other complications. It will also serve as a baseline to judge other algorithms against. Support for priorities with TSG is future work. Support for interleave mid + high priority channels, instead of just high, is also future work. Bug 1419900 Change-Id: I0f7d0ce83b6598fe86000577d72e14d312fdad98 Signed-off-by: Peter Pipkorn <ppipkorn@nvidia.com> Reviewed-on: http://git-master/r/805961 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c50
1 files changed, 46 insertions, 4 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index a5c2efb3..0421c0f6 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -175,7 +175,7 @@ int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
175} 175}
176 176
177static int channel_gk20a_set_schedule_params(struct channel_gk20a *c, 177static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
178 u32 timeslice_period) 178 u32 timeslice_period, bool interleave)
179{ 179{
180 void *inst_ptr; 180 void *inst_ptr;
181 int shift = 0, value = 0; 181 int shift = 0, value = 0;
@@ -203,6 +203,30 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
203 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) | 203 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
204 ccsr_channel_enable_set_true_f()); 204 ccsr_channel_enable_set_true_f());
205 205
206 if (c->interleave != interleave) {
207 mutex_lock(&c->g->interleave_lock);
208 c->interleave = interleave;
209 if (interleave)
210 if (c->g->num_interleaved_channels >=
211 MAX_INTERLEAVED_CHANNELS) {
212 gk20a_err(dev_from_gk20a(c->g),
213 "Change of priority would exceed runlist length, only changing timeslice\n");
214 c->interleave = false;
215 } else
216 c->g->num_interleaved_channels += 1;
217 else
218 c->g->num_interleaved_channels -= 1;
219
220 mutex_unlock(&c->g->interleave_lock);
221 gk20a_dbg_info("Set channel %d to interleave %d",
222 c->hw_chid, c->interleave);
223
224 gk20a_fifo_set_channel_priority(
225 c->g, 0, c->hw_chid, c->interleave);
226 c->g->ops.fifo.update_runlist(
227 c->g, 0, ~0, true, false);
228 }
229
206 return 0; 230 return 0;
207} 231}
208 232
@@ -836,6 +860,17 @@ static void gk20a_free_channel(struct channel_gk20a *ch)
836 } 860 }
837 mutex_unlock(&f->deferred_reset_mutex); 861 mutex_unlock(&f->deferred_reset_mutex);
838 862
863 if (ch->interleave) {
864 ch->interleave = false;
865 gk20a_fifo_set_channel_priority(
866 ch->g, 0, ch->hw_chid, ch->interleave);
867
868 mutex_lock(&f->g->interleave_lock);
869 WARN_ON(f->g->num_interleaved_channels == 0);
870 f->g->num_interleaved_channels -= 1;
871 mutex_unlock(&f->g->interleave_lock);
872 }
873
839 if (!ch->bound) 874 if (!ch->bound)
840 goto release; 875 goto release;
841 876
@@ -1079,6 +1114,10 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
1079 ch->timeout_debug_dump = true; 1114 ch->timeout_debug_dump = true;
1080 ch->has_timedout = false; 1115 ch->has_timedout = false;
1081 ch->obj_class = 0; 1116 ch->obj_class = 0;
1117 ch->interleave = false;
1118 gk20a_fifo_set_channel_priority(
1119 ch->g, 0, ch->hw_chid, ch->interleave);
1120
1082 1121
1083 /* The channel is *not* runnable at this point. It still needs to have 1122 /* The channel is *not* runnable at this point. It still needs to have
1084 * an address space bound and allocate a gpfifo and grctx. */ 1123 * an address space bound and allocate a gpfifo and grctx. */
@@ -2458,6 +2497,7 @@ static int gk20a_channel_set_priority(struct channel_gk20a *ch,
2458 u32 priority) 2497 u32 priority)
2459{ 2498{
2460 u32 timeslice_timeout; 2499 u32 timeslice_timeout;
2500 bool interleave = false;
2461 2501
2462 if (gk20a_is_channel_marked_as_tsg(ch)) { 2502 if (gk20a_is_channel_marked_as_tsg(ch)) {
2463 gk20a_err(dev_from_gk20a(ch->g), 2503 gk20a_err(dev_from_gk20a(ch->g),
@@ -2474,15 +2514,17 @@ static int gk20a_channel_set_priority(struct channel_gk20a *ch,
2474 timeslice_timeout = ch->g->timeslice_medium_priority_us; 2514 timeslice_timeout = ch->g->timeslice_medium_priority_us;
2475 break; 2515 break;
2476 case NVGPU_PRIORITY_HIGH: 2516 case NVGPU_PRIORITY_HIGH:
2517 if (ch->g->interleave_high_priority)
2518 interleave = true;
2477 timeslice_timeout = ch->g->timeslice_high_priority_us; 2519 timeslice_timeout = ch->g->timeslice_high_priority_us;
2478 break; 2520 break;
2479 default: 2521 default:
2480 pr_err("Unsupported priority"); 2522 pr_err("Unsupported priority");
2481 return -EINVAL; 2523 return -EINVAL;
2482 } 2524 }
2483 channel_gk20a_set_schedule_params(ch, 2525
2484 timeslice_timeout); 2526 return channel_gk20a_set_schedule_params(ch,
2485 return 0; 2527 timeslice_timeout, interleave);
2486} 2528}
2487 2529
2488static int gk20a_channel_zcull_bind(struct channel_gk20a *ch, 2530static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,