summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
diff options
context:
space:
mode:
authorSachit Kadle <skadle@nvidia.com>2016-09-19 17:18:11 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2016-09-20 13:43:35 -0400
commit246dcb824bc299b836be92fb74416c07a57e085b (patch)
treef1ce23cdea84e873fbf965ce903e31c7effa4739 /drivers/gpu/nvgpu/gk20a/channel_gk20a.c
parent3180ed70489113365203abc049223ad5956cb22e (diff)
gpu: nvgpu: improve sync create/destroy logic
This change improves the aggressive sync creation & destruction logic to avoid lock contention in the submit path. It does the following: 1) Removes the global sync destruction (channel) threshold, and adds a per-platform parameter. 2) Avoids lock contention in the clean-up/submit path when aggressive sync destruction is disabled. 3) Creates sync object at gpfifo allocation time (as long as we are not in aggressive sync destroy mode), to enable faster first submits Bug 1795076 Change-Id: Ifdb680100b08d00f37338063355bb2123ceb1b9f Signed-off-by: Sachit Kadle <skadle@nvidia.com> Reviewed-on: http://git-master/r/1202425 (cherry picked from commit ac0978711943a59c6f28c98c76b10759e0bff610) Reviewed-on: http://git-master/r/1202427 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c77
1 files changed, 53 insertions, 24 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 7df794bf..79d449e6 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -43,8 +43,6 @@
43 43
44#define NVMAP_HANDLE_PARAM_SIZE 1 44#define NVMAP_HANDLE_PARAM_SIZE 1
45 45
46#define NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT 64 /* channels */
47
48#define NVGPU_CHANNEL_MIN_TIMESLICE_US 1000 46#define NVGPU_CHANNEL_MIN_TIMESLICE_US 1000
49#define NVGPU_CHANNEL_MAX_TIMESLICE_US 50000 47#define NVGPU_CHANNEL_MAX_TIMESLICE_US 50000
50 48
@@ -91,7 +89,9 @@ static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
91 } 89 }
92 mutex_unlock(&f->free_chs_mutex); 90 mutex_unlock(&f->free_chs_mutex);
93 91
94 if (f->used_channels > NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT) 92 if (platform->aggressive_sync_destroy_thresh &&
93 (f->used_channels >
94 platform->aggressive_sync_destroy_thresh))
95 platform->aggressive_sync_destroy = true; 95 platform->aggressive_sync_destroy = true;
96 96
97 return ch; 97 return ch;
@@ -110,7 +110,9 @@ static void free_channel(struct fifo_gk20a *f,
110 f->used_channels--; 110 f->used_channels--;
111 mutex_unlock(&f->free_chs_mutex); 111 mutex_unlock(&f->free_chs_mutex);
112 112
113 if (f->used_channels < NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT) 113 if (platform->aggressive_sync_destroy_thresh &&
114 (f->used_channels <
115 platform->aggressive_sync_destroy_thresh))
114 platform->aggressive_sync_destroy = false; 116 platform->aggressive_sync_destroy = false;
115} 117}
116 118
@@ -1424,6 +1426,7 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1424{ 1426{
1425 struct gk20a *g = c->g; 1427 struct gk20a *g = c->g;
1426 struct device *d = dev_from_gk20a(g); 1428 struct device *d = dev_from_gk20a(g);
1429 struct gk20a_platform *platform = gk20a_get_platform(d);
1427 struct vm_gk20a *ch_vm; 1430 struct vm_gk20a *ch_vm;
1428 u32 gpfifo_size; 1431 u32 gpfifo_size;
1429 int err = 0; 1432 int err = 0;
@@ -1487,26 +1490,46 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1487 1490
1488 channel_gk20a_setup_userd(c); 1491 channel_gk20a_setup_userd(c);
1489 1492
1493 if (!platform->aggressive_sync_destroy_thresh) {
1494 mutex_lock(&c->sync_lock);
1495 c->sync = gk20a_channel_sync_create(c);
1496 if (!c->sync) {
1497 err = -ENOMEM;
1498 mutex_unlock(&c->sync_lock);
1499 goto clean_up_unmap;
1500 }
1501 mutex_unlock(&c->sync_lock);
1502
1503 if (g->ops.fifo.resetup_ramfc) {
1504 err = g->ops.fifo.resetup_ramfc(c);
1505 if (err)
1506 goto clean_up_sync;
1507 }
1508 }
1509
1490 err = g->ops.fifo.setup_ramfc(c, c->gpfifo.mem.gpu_va, 1510 err = g->ops.fifo.setup_ramfc(c, c->gpfifo.mem.gpu_va,
1491 c->gpfifo.entry_num, args->flags); 1511 c->gpfifo.entry_num, args->flags);
1492 if (err) 1512 if (err)
1493 goto clean_up_unmap; 1513 goto clean_up_sync;
1494 1514
1495 /* TBD: setup engine contexts */ 1515 /* TBD: setup engine contexts */
1496 1516
1497 err = channel_gk20a_alloc_priv_cmdbuf(c); 1517 err = channel_gk20a_alloc_priv_cmdbuf(c);
1498 if (err) 1518 if (err)
1499 goto clean_up_unmap; 1519 goto clean_up_sync;
1500 1520
1501 err = channel_gk20a_update_runlist(c, true); 1521 err = channel_gk20a_update_runlist(c, true);
1502 if (err) 1522 if (err)
1503 goto clean_up_unmap; 1523 goto clean_up_sync;
1504 1524
1505 g->ops.fifo.bind_channel(c); 1525 g->ops.fifo.bind_channel(c);
1506 1526
1507 gk20a_dbg_fn("done"); 1527 gk20a_dbg_fn("done");
1508 return 0; 1528 return 0;
1509 1529
1530clean_up_sync:
1531 gk20a_channel_sync_destroy(c->sync);
1532 c->sync = NULL;
1510clean_up_unmap: 1533clean_up_unmap:
1511 nvgpu_free(c->gpfifo.pipe); 1534 nvgpu_free(c->gpfifo.pipe);
1512 gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem); 1535 gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem);
@@ -1911,18 +1934,21 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work)
1911 1934
1912 gk20a_channel_timeout_stop(c); 1935 gk20a_channel_timeout_stop(c);
1913 1936
1914 mutex_lock(&c->sync_lock); 1937 WARN_ON(!c->sync);
1938
1915 if (c->sync) { 1939 if (c->sync) {
1916 c->sync->signal_timeline(c->sync); 1940 c->sync->signal_timeline(c->sync);
1917 if (atomic_dec_and_test(&c->sync->refcount) && 1941
1918 platform->aggressive_sync_destroy) { 1942 if (platform->aggressive_sync_destroy_thresh) {
1919 gk20a_channel_sync_destroy(c->sync); 1943 mutex_lock(&c->sync_lock);
1920 c->sync = NULL; 1944 if (atomic_dec_and_test(&c->sync->refcount) &&
1945 platform->aggressive_sync_destroy) {
1946 gk20a_channel_sync_destroy(c->sync);
1947 c->sync = NULL;
1948 }
1949 mutex_unlock(&c->sync_lock);
1921 } 1950 }
1922 } else {
1923 WARN_ON(1);
1924 } 1951 }
1925 mutex_unlock(&c->sync_lock);
1926 1952
1927 if (job->num_mapped_buffers) 1953 if (job->num_mapped_buffers)
1928 gk20a_vm_put_buffers(vm, job->mapped_buffers, 1954 gk20a_vm_put_buffers(vm, job->mapped_buffers,
@@ -2099,6 +2125,7 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
2099 u32 flags) 2125 u32 flags)
2100{ 2126{
2101 struct gk20a *g = c->g; 2127 struct gk20a *g = c->g;
2128 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
2102 bool need_sync_fence = false; 2129 bool need_sync_fence = false;
2103 bool new_sync_created = false; 2130 bool new_sync_created = false;
2104 int wait_fence_fd = -1; 2131 int wait_fence_fd = -1;
@@ -2112,18 +2139,20 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
2112 if (force_need_sync_fence) 2139 if (force_need_sync_fence)
2113 need_sync_fence = true; 2140 need_sync_fence = true;
2114 2141
2115 mutex_lock(&c->sync_lock); 2142 if (platform->aggressive_sync_destroy_thresh) {
2116 if (!c->sync) { 2143 mutex_lock(&c->sync_lock);
2117 c->sync = gk20a_channel_sync_create(c);
2118 if (!c->sync) { 2144 if (!c->sync) {
2119 err = -ENOMEM; 2145 c->sync = gk20a_channel_sync_create(c);
2120 mutex_unlock(&c->sync_lock); 2146 if (!c->sync) {
2121 goto fail; 2147 err = -ENOMEM;
2148 mutex_unlock(&c->sync_lock);
2149 goto fail;
2150 }
2151 new_sync_created = true;
2122 } 2152 }
2123 new_sync_created = true; 2153 atomic_inc(&c->sync->refcount);
2154 mutex_unlock(&c->sync_lock);
2124 } 2155 }
2125 atomic_inc(&c->sync->refcount);
2126 mutex_unlock(&c->sync_lock);
2127 2156
2128 if (g->ops.fifo.resetup_ramfc && new_sync_created) { 2157 if (g->ops.fifo.resetup_ramfc && new_sync_created) {
2129 err = g->ops.fifo.resetup_ramfc(c); 2158 err = g->ops.fifo.resetup_ramfc(c);