From 2b064ce65e0035a860d1bc3bcccfcf8aac1f31c7 Mon Sep 17 00:00:00 2001
From: Peter Pipkorn <ppipkorn@nvidia.com>
Date: Mon, 28 Sep 2015 13:49:53 +0200
Subject: gpu: nvgpu: add high priority channel interleave
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Interleave all high priority channels between all other channels.
This reduces the latency for high priority work when there
are a lot of lower priority work present, imposing an upper
bound on the latency. Change the default high priority timeslice
from 5.2ms to 3.0 in the process, to prevent long running high priority
apps from hogging the GPU too much.

Introduce a new debugfs node to enable/disable high priority
channel interleaving. It is currently enabled by default.

Adds new runlist length max register, used for allocating
suitable sized runlist.

Limit the number of interleaved channels to 32.

This change reduces the maximum time a lower priority job
is running (one timeslice) before we check that high priority
jobs are running.

Tested with gles2_context_priority (still passes)
Basic sanity testing is done with graphics_submit
(one app is high priority)

Also more functional testing using lots of parallel runs with:
NVRM_GPU_CHANNEL_PRIORITY=3 ./gles2_expensive_draw
 –drawsperframe 20000 –triangles 50 –runtime 30 –finish
plus multiple:
NVRM_GPU_CHANNEL_PRIORITY=2 ./gles2_expensive_draw
–drawsperframe 20000 –triangles 50 –runtime 30 -finish

Previous to this change, the relative performance between
high priority work and normal priority work comes down
to timeslice value. This means that when there are many
low priority channels, the high priority work will still
drop quite a lot. But with this change, the high priority
work will roughly get about half the entire GPU time, meaning
that after the initial lower performance, it is less likely
to get lower in performance due to more apps running on the system.

This change makes a large step towards real priority levels.
It is not perfect and there are no guarantees on anything,
but it is a step forwards without any additional CPU overhead
or other complications. It will also serve as a baseline to
judge other algorithms against.

Support for priorities with TSG is future work.
Support for interleave mid + high priority channels,
instead of just high, is also future work.

Bug 1419900

Change-Id: I0f7d0ce83b6598fe86000577d72e14d312fdad98
Signed-off-by: Peter Pipkorn <ppipkorn@nvidia.com>
Reviewed-on: http://git-master/r/805961
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/gk20a.h | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/nvgpu/gk20a/gk20a.h')

diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index f7b98e39..da115fa8 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -54,6 +54,8 @@ struct acr_gm20b;
     32 ns is the resolution of ptimer. */
 #define PTIMER_REF_FREQ_HZ                      31250000
 
+#define MAX_INTERLEAVED_CHANNELS                32
+
 struct cooling_device_gk20a {
 	struct thermal_cooling_device *gk20a_cooling_dev;
 	unsigned int gk20a_freq_state;
@@ -512,6 +514,10 @@ struct gk20a {
 	u32 timeslice_low_priority_us;
 	u32 timeslice_medium_priority_us;
 	u32 timeslice_high_priority_us;
+	u32 interleave_high_priority;
+
+	struct mutex interleave_lock;
+	u32 num_interleaved_channels;
 
 	bool slcg_enabled;
 	bool blcg_enabled;
@@ -533,9 +539,11 @@ struct gk20a {
 	struct dentry *debugfs_disable_bigpage;
 	struct dentry *debugfs_gr_default_attrib_cb_size;
 
-	struct dentry * debugfs_timeslice_low_priority_us;
-	struct dentry * debugfs_timeslice_medium_priority_us;
-	struct dentry * debugfs_timeslice_high_priority_us;
+	struct dentry *debugfs_timeslice_low_priority_us;
+	struct dentry *debugfs_timeslice_medium_priority_us;
+	struct dentry *debugfs_timeslice_high_priority_us;
+	struct dentry *debugfs_interleave_high_priority;
+
 #endif
 	struct gk20a_ctxsw_ucode_info ctxsw_ucode_info;
 
-- 
cgit v1.2.2