diff options
author | Sachit Kadle <skadle@nvidia.com> | 2016-09-19 17:18:11 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2016-09-20 13:43:35 -0400 |
commit | 246dcb824bc299b836be92fb74416c07a57e085b (patch) | |
tree | f1ce23cdea84e873fbf965ce903e31c7effa4739 /drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |
parent | 3180ed70489113365203abc049223ad5956cb22e (diff) |
gpu: nvgpu: improve sync create/destroy logic
This change improves the aggressive sync creation
& destruction logic to avoid lock contention in
the submit path. It does the following:
1) Removes the global sync destruction (channel)
threshold, and adds a per-platform parameter.
2) Avoids lock contention in the clean-up/submit
path when aggressive sync destruction is disabled.
3) Creates sync object at gpfifo
allocation time (as long as we are not in aggressive
sync destroy mode), to enable faster first submits
Bug 1795076
Change-Id: Ifdb680100b08d00f37338063355bb2123ceb1b9f
Signed-off-by: Sachit Kadle <skadle@nvidia.com>
Reviewed-on: http://git-master/r/1202425
(cherry picked from commit ac0978711943a59c6f28c98c76b10759e0bff610)
Reviewed-on: http://git-master/r/1202427
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 77 |
1 files changed, 53 insertions, 24 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 7df794bf..79d449e6 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -43,8 +43,6 @@ | |||
43 | 43 | ||
44 | #define NVMAP_HANDLE_PARAM_SIZE 1 | 44 | #define NVMAP_HANDLE_PARAM_SIZE 1 |
45 | 45 | ||
46 | #define NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT 64 /* channels */ | ||
47 | |||
48 | #define NVGPU_CHANNEL_MIN_TIMESLICE_US 1000 | 46 | #define NVGPU_CHANNEL_MIN_TIMESLICE_US 1000 |
49 | #define NVGPU_CHANNEL_MAX_TIMESLICE_US 50000 | 47 | #define NVGPU_CHANNEL_MAX_TIMESLICE_US 50000 |
50 | 48 | ||
@@ -91,7 +89,9 @@ static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f) | |||
91 | } | 89 | } |
92 | mutex_unlock(&f->free_chs_mutex); | 90 | mutex_unlock(&f->free_chs_mutex); |
93 | 91 | ||
94 | if (f->used_channels > NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT) | 92 | if (platform->aggressive_sync_destroy_thresh && |
93 | (f->used_channels > | ||
94 | platform->aggressive_sync_destroy_thresh)) | ||
95 | platform->aggressive_sync_destroy = true; | 95 | platform->aggressive_sync_destroy = true; |
96 | 96 | ||
97 | return ch; | 97 | return ch; |
@@ -110,7 +110,9 @@ static void free_channel(struct fifo_gk20a *f, | |||
110 | f->used_channels--; | 110 | f->used_channels--; |
111 | mutex_unlock(&f->free_chs_mutex); | 111 | mutex_unlock(&f->free_chs_mutex); |
112 | 112 | ||
113 | if (f->used_channels < NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT) | 113 | if (platform->aggressive_sync_destroy_thresh && |
114 | (f->used_channels < | ||
115 | platform->aggressive_sync_destroy_thresh)) | ||
114 | platform->aggressive_sync_destroy = false; | 116 | platform->aggressive_sync_destroy = false; |
115 | } | 117 | } |
116 | 118 | ||
@@ -1424,6 +1426,7 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, | |||
1424 | { | 1426 | { |
1425 | struct gk20a *g = c->g; | 1427 | struct gk20a *g = c->g; |
1426 | struct device *d = dev_from_gk20a(g); | 1428 | struct device *d = dev_from_gk20a(g); |
1429 | struct gk20a_platform *platform = gk20a_get_platform(d); | ||
1427 | struct vm_gk20a *ch_vm; | 1430 | struct vm_gk20a *ch_vm; |
1428 | u32 gpfifo_size; | 1431 | u32 gpfifo_size; |
1429 | int err = 0; | 1432 | int err = 0; |
@@ -1487,26 +1490,46 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, | |||
1487 | 1490 | ||
1488 | channel_gk20a_setup_userd(c); | 1491 | channel_gk20a_setup_userd(c); |
1489 | 1492 | ||
1493 | if (!platform->aggressive_sync_destroy_thresh) { | ||
1494 | mutex_lock(&c->sync_lock); | ||
1495 | c->sync = gk20a_channel_sync_create(c); | ||
1496 | if (!c->sync) { | ||
1497 | err = -ENOMEM; | ||
1498 | mutex_unlock(&c->sync_lock); | ||
1499 | goto clean_up_unmap; | ||
1500 | } | ||
1501 | mutex_unlock(&c->sync_lock); | ||
1502 | |||
1503 | if (g->ops.fifo.resetup_ramfc) { | ||
1504 | err = g->ops.fifo.resetup_ramfc(c); | ||
1505 | if (err) | ||
1506 | goto clean_up_sync; | ||
1507 | } | ||
1508 | } | ||
1509 | |||
1490 | err = g->ops.fifo.setup_ramfc(c, c->gpfifo.mem.gpu_va, | 1510 | err = g->ops.fifo.setup_ramfc(c, c->gpfifo.mem.gpu_va, |
1491 | c->gpfifo.entry_num, args->flags); | 1511 | c->gpfifo.entry_num, args->flags); |
1492 | if (err) | 1512 | if (err) |
1493 | goto clean_up_unmap; | 1513 | goto clean_up_sync; |
1494 | 1514 | ||
1495 | /* TBD: setup engine contexts */ | 1515 | /* TBD: setup engine contexts */ |
1496 | 1516 | ||
1497 | err = channel_gk20a_alloc_priv_cmdbuf(c); | 1517 | err = channel_gk20a_alloc_priv_cmdbuf(c); |
1498 | if (err) | 1518 | if (err) |
1499 | goto clean_up_unmap; | 1519 | goto clean_up_sync; |
1500 | 1520 | ||
1501 | err = channel_gk20a_update_runlist(c, true); | 1521 | err = channel_gk20a_update_runlist(c, true); |
1502 | if (err) | 1522 | if (err) |
1503 | goto clean_up_unmap; | 1523 | goto clean_up_sync; |
1504 | 1524 | ||
1505 | g->ops.fifo.bind_channel(c); | 1525 | g->ops.fifo.bind_channel(c); |
1506 | 1526 | ||
1507 | gk20a_dbg_fn("done"); | 1527 | gk20a_dbg_fn("done"); |
1508 | return 0; | 1528 | return 0; |
1509 | 1529 | ||
1530 | clean_up_sync: | ||
1531 | gk20a_channel_sync_destroy(c->sync); | ||
1532 | c->sync = NULL; | ||
1510 | clean_up_unmap: | 1533 | clean_up_unmap: |
1511 | nvgpu_free(c->gpfifo.pipe); | 1534 | nvgpu_free(c->gpfifo.pipe); |
1512 | gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem); | 1535 | gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem); |
@@ -1911,18 +1934,21 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work) | |||
1911 | 1934 | ||
1912 | gk20a_channel_timeout_stop(c); | 1935 | gk20a_channel_timeout_stop(c); |
1913 | 1936 | ||
1914 | mutex_lock(&c->sync_lock); | 1937 | WARN_ON(!c->sync); |
1938 | |||
1915 | if (c->sync) { | 1939 | if (c->sync) { |
1916 | c->sync->signal_timeline(c->sync); | 1940 | c->sync->signal_timeline(c->sync); |
1917 | if (atomic_dec_and_test(&c->sync->refcount) && | 1941 | |
1918 | platform->aggressive_sync_destroy) { | 1942 | if (platform->aggressive_sync_destroy_thresh) { |
1919 | gk20a_channel_sync_destroy(c->sync); | 1943 | mutex_lock(&c->sync_lock); |
1920 | c->sync = NULL; | 1944 | if (atomic_dec_and_test(&c->sync->refcount) && |
1945 | platform->aggressive_sync_destroy) { | ||
1946 | gk20a_channel_sync_destroy(c->sync); | ||
1947 | c->sync = NULL; | ||
1948 | } | ||
1949 | mutex_unlock(&c->sync_lock); | ||
1921 | } | 1950 | } |
1922 | } else { | ||
1923 | WARN_ON(1); | ||
1924 | } | 1951 | } |
1925 | mutex_unlock(&c->sync_lock); | ||
1926 | 1952 | ||
1927 | if (job->num_mapped_buffers) | 1953 | if (job->num_mapped_buffers) |
1928 | gk20a_vm_put_buffers(vm, job->mapped_buffers, | 1954 | gk20a_vm_put_buffers(vm, job->mapped_buffers, |
@@ -2099,6 +2125,7 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, | |||
2099 | u32 flags) | 2125 | u32 flags) |
2100 | { | 2126 | { |
2101 | struct gk20a *g = c->g; | 2127 | struct gk20a *g = c->g; |
2128 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
2102 | bool need_sync_fence = false; | 2129 | bool need_sync_fence = false; |
2103 | bool new_sync_created = false; | 2130 | bool new_sync_created = false; |
2104 | int wait_fence_fd = -1; | 2131 | int wait_fence_fd = -1; |
@@ -2112,18 +2139,20 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, | |||
2112 | if (force_need_sync_fence) | 2139 | if (force_need_sync_fence) |
2113 | need_sync_fence = true; | 2140 | need_sync_fence = true; |
2114 | 2141 | ||
2115 | mutex_lock(&c->sync_lock); | 2142 | if (platform->aggressive_sync_destroy_thresh) { |
2116 | if (!c->sync) { | 2143 | mutex_lock(&c->sync_lock); |
2117 | c->sync = gk20a_channel_sync_create(c); | ||
2118 | if (!c->sync) { | 2144 | if (!c->sync) { |
2119 | err = -ENOMEM; | 2145 | c->sync = gk20a_channel_sync_create(c); |
2120 | mutex_unlock(&c->sync_lock); | 2146 | if (!c->sync) { |
2121 | goto fail; | 2147 | err = -ENOMEM; |
2148 | mutex_unlock(&c->sync_lock); | ||
2149 | goto fail; | ||
2150 | } | ||
2151 | new_sync_created = true; | ||
2122 | } | 2152 | } |
2123 | new_sync_created = true; | 2153 | atomic_inc(&c->sync->refcount); |
2154 | mutex_unlock(&c->sync_lock); | ||
2124 | } | 2155 | } |
2125 | atomic_inc(&c->sync->refcount); | ||
2126 | mutex_unlock(&c->sync_lock); | ||
2127 | 2156 | ||
2128 | if (g->ops.fifo.resetup_ramfc && new_sync_created) { | 2157 | if (g->ops.fifo.resetup_ramfc && new_sync_created) { |
2129 | err = g->ops.fifo.resetup_ramfc(c); | 2158 | err = g->ops.fifo.resetup_ramfc(c); |