summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gk20a.c
diff options
context:
space:
mode:
authorPeter Pipkorn <ppipkorn@nvidia.com>2015-09-28 07:49:53 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-01-11 12:04:01 -0500
commit2b064ce65e0035a860d1bc3bcccfcf8aac1f31c7 (patch)
tree1f20c0e608efcca51ef321d308df8e8cb059ad8c /drivers/gpu/nvgpu/gk20a/gk20a.c
parenta9c6f595399074e88c16f3557e5acb29db1d52d5 (diff)
gpu: nvgpu: add high priority channel interleave
Interleave all high priority channels between all other channels. This reduces the latency for high priority work when there are a lot of lower priority work present, imposing an upper bound on the latency. Change the default high priority timeslice from 5.2ms to 3.0 in the process, to prevent long running high priority apps from hogging the GPU too much. Introduce a new debugfs node to enable/disable high priority channel interleaving. It is currently enabled by default. Adds new runlist length max register, used for allocating suitable sized runlist. Limit the number of interleaved channels to 32. This change reduces the maximum time a lower priority job is running (one timeslice) before we check that high priority jobs are running. Tested with gles2_context_priority (still passes) Basic sanity testing is done with graphics_submit (one app is high priority) Also more functional testing using lots of parallel runs with: NVRM_GPU_CHANNEL_PRIORITY=3 ./gles2_expensive_draw –drawsperframe 20000 –triangles 50 –runtime 30 –finish plus multiple: NVRM_GPU_CHANNEL_PRIORITY=2 ./gles2_expensive_draw –drawsperframe 20000 –triangles 50 –runtime 30 -finish Previous to this change, the relative performance between high priority work and normal priority work comes down to timeslice value. This means that when there are many low priority channels, the high priority work will still drop quite a lot. But with this change, the high priority work will roughly get about half the entire GPU time, meaning that after the initial lower performance, it is less likely to get lower in performance due to more apps running on the system. This change makes a large step towards real priority levels. It is not perfect and there are no guarantees on anything, but it is a step forwards without any additional CPU overhead or other complications. It will also serve as a baseline to judge other algorithms against. Support for priorities with TSG is future work. Support for interleave mid + high priority channels, instead of just high, is also future work. Bug 1419900 Change-Id: I0f7d0ce83b6598fe86000577d72e14d312fdad98 Signed-off-by: Peter Pipkorn <ppipkorn@nvidia.com> Reviewed-on: http://git-master/r/805961 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c16
1 files changed, 15 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 9bbc9bd8..c5124c51 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -670,6 +670,9 @@ static int gk20a_init_support(struct platform_device *dev)
670 mutex_init(&g->client_lock); 670 mutex_init(&g->client_lock);
671 mutex_init(&g->ch_wdt_lock); 671 mutex_init(&g->ch_wdt_lock);
672 672
673 mutex_init(&g->interleave_lock);
674 g->num_interleaved_channels = 0;
675
673 g->remove_support = gk20a_remove_support; 676 g->remove_support = gk20a_remove_support;
674 return 0; 677 return 0;
675 678
@@ -1437,9 +1440,14 @@ static int gk20a_probe(struct platform_device *dev)
1437 if (tegra_platform_is_silicon()) 1440 if (tegra_platform_is_silicon())
1438 gk20a->timeouts_enabled = true; 1441 gk20a->timeouts_enabled = true;
1439 1442
1443 gk20a->interleave_high_priority = true;
1444
1440 gk20a->timeslice_low_priority_us = 1300; 1445 gk20a->timeslice_low_priority_us = 1300;
1441 gk20a->timeslice_medium_priority_us = 2600; 1446 gk20a->timeslice_medium_priority_us = 2600;
1442 gk20a->timeslice_high_priority_us = 5200; 1447 if (gk20a->interleave_high_priority)
1448 gk20a->timeslice_high_priority_us = 3000;
1449 else
1450 gk20a->timeslice_high_priority_us = 5200;
1443 1451
1444 /* Set up initial power settings. For non-slicon platforms, disable * 1452 /* Set up initial power settings. For non-slicon platforms, disable *
1445 * power features and for silicon platforms, read from platform data */ 1453 * power features and for silicon platforms, read from platform data */
@@ -1512,6 +1520,12 @@ static int gk20a_probe(struct platform_device *dev)
1512 platform->debugfs, 1520 platform->debugfs,
1513 &gk20a->timeslice_high_priority_us); 1521 &gk20a->timeslice_high_priority_us);
1514 1522
1523 gk20a->debugfs_interleave_high_priority =
1524 debugfs_create_bool("interleave_high_priority",
1525 S_IRUGO|S_IWUSR,
1526 platform->debugfs,
1527 &gk20a->interleave_high_priority);
1528
1515 gr_gk20a_debugfs_init(gk20a); 1529 gr_gk20a_debugfs_init(gk20a);
1516 gk20a_pmu_debugfs_init(dev); 1530 gk20a_pmu_debugfs_init(dev);
1517 gk20a_cde_debugfs_init(dev); 1531 gk20a_cde_debugfs_init(dev);