summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSachit Kadle <skadle@nvidia.com>2016-08-22 21:06:30 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2016-10-20 11:14:04 -0400
commit63e8592e06939e20c7b9e56b430353ebbee31ad6 (patch)
treeb91247eebf886f4e987d38eb4069aceace284ecf
parent3c2656c8c6ebf7cef7376d3a28451249643121c4 (diff)
gpu: nvgpu: use inplace allocation in sync framework
This change is the first of a series of changes to support the usage of pre-allocated job tracking resources in the submit path. With this change, we still maintain a dynamically-allocated joblist, but make the necessary changes in the channel_sync & fence framework to use in-place allocations. Specifically, we: 1) Update channel sync framework routines to take in pre-allocated priv_cmd_entry(s) & gk20a_fence(s) rather than dynamically allocating themselves 2) Move allocation of priv_cmd_entry(s) & gk20a_fence(s) to gk20a_submit_prepare_syncs 3) Modify fence framework to have seperate allocation and init APIs. We expose allocation as a seperate API, so the client can allocate the object before passing it into the channel sync framework. 4) Fix clean_up logic in channel sync framework Bug 1795076 Change-Id: I96db457683cd207fd029c31c45f548f98055e844 Signed-off-by: Sachit Kadle <skadle@nvidia.com> Reviewed-on: http://git-master/r/1206725 (cherry picked from commit 9d196fd10db6c2f934c2a53b1fc0500eb4626624) Reviewed-on: http://git-master/r/1223933 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c156
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c168
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h20
-rw-r--r--drivers/gpu/nvgpu/gk20a/fence_gk20a.c88
-rw-r--r--drivers/gpu/nvgpu/gk20a/fence_gk20a.h15
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h1
7 files changed, 275 insertions, 175 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index f60a92b4..4019721a 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1376,16 +1376,20 @@ static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
1376 1376
1377/* allocate a cmd buffer with given size. size is number of u32 entries */ 1377/* allocate a cmd buffer with given size. size is number of u32 entries */
1378int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, 1378int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
1379 struct priv_cmd_entry **entry) 1379 struct priv_cmd_entry *e)
1380{ 1380{
1381 struct priv_cmd_queue *q = &c->priv_cmd_q; 1381 struct priv_cmd_queue *q = &c->priv_cmd_q;
1382 struct priv_cmd_entry *e;
1383 u32 free_count; 1382 u32 free_count;
1384 u32 size = orig_size; 1383 u32 size = orig_size;
1385 1384
1386 gk20a_dbg_fn("size %d", orig_size); 1385 gk20a_dbg_fn("size %d", orig_size);
1387 1386
1388 *entry = NULL; 1387 if (!e) {
1388 gk20a_err(dev_from_gk20a(c->g),
1389 "ch %d: priv cmd entry is null",
1390 c->hw_chid);
1391 return -EINVAL;
1392 }
1389 1393
1390 /* if free space in the end is less than requested, increase the size 1394 /* if free space in the end is less than requested, increase the size
1391 * to make the real allocated space start from beginning. */ 1395 * to make the real allocated space start from beginning. */
@@ -1400,14 +1404,6 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
1400 if (size > free_count) 1404 if (size > free_count)
1401 return -EAGAIN; 1405 return -EAGAIN;
1402 1406
1403 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
1404 if (!e) {
1405 gk20a_err(dev_from_gk20a(c->g),
1406 "ch %d: fail to allocate priv cmd entry",
1407 c->hw_chid);
1408 return -ENOMEM;
1409 }
1410
1411 e->size = orig_size; 1407 e->size = orig_size;
1412 e->mem = &q->mem; 1408 e->mem = &q->mem;
1413 1409
@@ -1426,8 +1422,10 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
1426 /* we already handled q->put + size > q->size so BUG_ON this */ 1422 /* we already handled q->put + size > q->size so BUG_ON this */
1427 BUG_ON(q->put > q->size); 1423 BUG_ON(q->put > q->size);
1428 1424
1429 *entry = e; 1425 /* commit the previous writes before making the entry valid */
1426 wmb();
1430 1427
1428 e->valid = true;
1431 gk20a_dbg_fn("done"); 1429 gk20a_dbg_fn("done");
1432 1430
1433 return 0; 1431 return 0;
@@ -1441,6 +1439,21 @@ static void free_priv_cmdbuf(struct channel_gk20a *c,
1441 kfree(e); 1439 kfree(e);
1442} 1440}
1443 1441
1442static struct channel_gk20a_job *channel_gk20a_alloc_job(
1443 struct channel_gk20a *c)
1444{
1445 struct channel_gk20a_job *job = NULL;
1446
1447 job = kzalloc(sizeof(*job), GFP_KERNEL);
1448 return job;
1449}
1450
1451static void channel_gk20a_free_job(struct channel_gk20a *c,
1452 struct channel_gk20a_job *job)
1453{
1454 kfree(job);
1455}
1456
1444int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, 1457int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1445 struct nvgpu_alloc_gpfifo_args *args) 1458 struct nvgpu_alloc_gpfifo_args *args)
1446{ 1459{
@@ -1818,10 +1831,15 @@ int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e)
1818 if (!e) 1831 if (!e)
1819 return 0; 1832 return 0;
1820 1833
1821 if ((q->get != e->off) && e->off != 0) 1834 if (e->valid) {
1822 gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid); 1835 /* read the entry's valid flag before reading its contents */
1836 rmb();
1837 if ((q->get != e->off) && e->off != 0)
1838 gk20a_err(d, "requests out-of-order, ch=%d\n",
1839 c->hw_chid);
1840 q->get = e->off + e->size;
1841 }
1823 1842
1824 q->get = e->off + e->size;
1825 free_priv_cmdbuf(c, e); 1843 free_priv_cmdbuf(c, e);
1826 1844
1827 return 0; 1845 return 0;
@@ -1854,14 +1872,10 @@ static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,
1854} 1872}
1855 1873
1856static int gk20a_channel_add_job(struct channel_gk20a *c, 1874static int gk20a_channel_add_job(struct channel_gk20a *c,
1857 struct gk20a_fence *pre_fence, 1875 struct channel_gk20a_job *job,
1858 struct gk20a_fence *post_fence,
1859 struct priv_cmd_entry *wait_cmd,
1860 struct priv_cmd_entry *incr_cmd,
1861 bool skip_buffer_refcounting) 1876 bool skip_buffer_refcounting)
1862{ 1877{
1863 struct vm_gk20a *vm = c->vm; 1878 struct vm_gk20a *vm = c->vm;
1864 struct channel_gk20a_job *job = NULL;
1865 struct mapped_buffer_node **mapped_buffers = NULL; 1879 struct mapped_buffer_node **mapped_buffers = NULL;
1866 int err = 0, num_mapped_buffers = 0; 1880 int err = 0, num_mapped_buffers = 0;
1867 1881
@@ -1875,22 +1889,12 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
1875 goto err_put_vm; 1889 goto err_put_vm;
1876 } 1890 }
1877 1891
1878 job = kzalloc(sizeof(*job), GFP_KERNEL);
1879 if (!job) {
1880 err = -ENOMEM;
1881 goto err_put_buffers;
1882 }
1883
1884 /* put() is done in gk20a_channel_update() when the job is done */ 1892 /* put() is done in gk20a_channel_update() when the job is done */
1885 c = gk20a_channel_get(c); 1893 c = gk20a_channel_get(c);
1886 1894
1887 if (c) { 1895 if (c) {
1888 job->num_mapped_buffers = num_mapped_buffers; 1896 job->num_mapped_buffers = num_mapped_buffers;
1889 job->mapped_buffers = mapped_buffers; 1897 job->mapped_buffers = mapped_buffers;
1890 job->pre_fence = pre_fence;
1891 job->post_fence = post_fence;
1892 job->wait_cmd = wait_cmd;
1893 job->incr_cmd = incr_cmd;
1894 1898
1895 gk20a_channel_timeout_start(c, job); 1899 gk20a_channel_timeout_start(c, job);
1896 1900
@@ -1899,13 +1903,11 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
1899 spin_unlock(&c->jobs_lock); 1903 spin_unlock(&c->jobs_lock);
1900 } else { 1904 } else {
1901 err = -ETIMEDOUT; 1905 err = -ETIMEDOUT;
1902 goto err_free_job; 1906 goto err_put_buffers;
1903 } 1907 }
1904 1908
1905 return 0; 1909 return 0;
1906 1910
1907err_free_job:
1908 kfree(job);
1909err_put_buffers: 1911err_put_buffers:
1910 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers); 1912 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1911err_put_vm: 1913err_put_vm:
@@ -2000,7 +2002,7 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work)
2000 list_del_init(&job->list); 2002 list_del_init(&job->list);
2001 spin_unlock(&c->jobs_lock); 2003 spin_unlock(&c->jobs_lock);
2002 2004
2003 kfree(job); 2005 channel_gk20a_free_job(c, job);
2004 job_finished = 1; 2006 job_finished = 1;
2005 gk20a_idle(g->dev); 2007 gk20a_idle(g->dev);
2006 } 2008 }
@@ -2143,6 +2145,7 @@ out:
2143 */ 2145 */
2144static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, 2146static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
2145 struct nvgpu_fence *fence, 2147 struct nvgpu_fence *fence,
2148 struct channel_gk20a_job *job,
2146 struct priv_cmd_entry **wait_cmd, 2149 struct priv_cmd_entry **wait_cmd,
2147 struct priv_cmd_entry **incr_cmd, 2150 struct priv_cmd_entry **incr_cmd,
2148 struct gk20a_fence **pre_fence, 2151 struct gk20a_fence **pre_fence,
@@ -2194,18 +2197,32 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
2194 * this condition. 2197 * this condition.
2195 */ 2198 */
2196 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) { 2199 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
2200 job->wait_cmd = kzalloc(sizeof(struct priv_cmd_entry),
2201 GFP_KERNEL);
2202 job->pre_fence = gk20a_alloc_fence(c);
2203
2204 if (!job->wait_cmd || !job->pre_fence) {
2205 err = -ENOMEM;
2206 goto clean_up_pre_fence;
2207 }
2208
2197 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) { 2209 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
2198 wait_fence_fd = fence->id; 2210 wait_fence_fd = fence->id;
2199 err = c->sync->wait_fd(c->sync, wait_fence_fd, 2211 err = c->sync->wait_fd(c->sync, wait_fence_fd,
2200 wait_cmd, pre_fence); 2212 job->wait_cmd, job->pre_fence);
2201 } else { 2213 } else {
2202 err = c->sync->wait_syncpt(c->sync, fence->id, 2214 err = c->sync->wait_syncpt(c->sync, fence->id,
2203 fence->value, wait_cmd, 2215 fence->value, job->wait_cmd,
2204 pre_fence); 2216 job->pre_fence);
2205 } 2217 }
2218
2219 if (!err) {
2220 if (job->wait_cmd->valid)
2221 *wait_cmd = job->wait_cmd;
2222 *pre_fence = job->pre_fence;
2223 } else
2224 goto clean_up_pre_fence;
2206 } 2225 }
2207 if (err)
2208 goto fail;
2209 2226
2210 if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) && 2227 if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) &&
2211 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)) 2228 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE))
@@ -2216,22 +2233,41 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
2216 * is used to keep track of method completion for idle railgating. The 2233 * is used to keep track of method completion for idle railgating. The
2217 * sync_pt/semaphore PB is added to the GPFIFO later on in submit. 2234 * sync_pt/semaphore PB is added to the GPFIFO later on in submit.
2218 */ 2235 */
2236 job->incr_cmd = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
2237 job->post_fence = gk20a_alloc_fence(c);
2238
2239 if (!job->incr_cmd || !job->post_fence) {
2240 err = -ENOMEM;
2241 goto clean_up_post_fence;
2242 }
2243
2219 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) 2244 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
2220 err = c->sync->incr_user(c->sync, wait_fence_fd, incr_cmd, 2245 err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd,
2221 post_fence, need_wfi, need_sync_fence); 2246 job->post_fence, need_wfi, need_sync_fence);
2222 else 2247 else
2223 err = c->sync->incr(c->sync, incr_cmd, 2248 err = c->sync->incr(c->sync, job->incr_cmd,
2224 post_fence, need_sync_fence); 2249 job->post_fence, need_sync_fence);
2225 if (err) 2250 if (!err) {
2226 goto fail; 2251 *incr_cmd = job->incr_cmd;
2252 *post_fence = job->post_fence;
2253 } else
2254 goto clean_up_post_fence;
2227 2255
2228 return 0; 2256 return 0;
2229 2257
2258clean_up_post_fence:
2259 gk20a_free_priv_cmdbuf(c, job->incr_cmd);
2260 gk20a_fence_put(job->post_fence);
2261 job->incr_cmd = NULL;
2262 job->post_fence = NULL;
2263clean_up_pre_fence:
2264 gk20a_free_priv_cmdbuf(c, job->wait_cmd);
2265 gk20a_fence_put(job->pre_fence);
2266 job->wait_cmd = NULL;
2267 job->pre_fence = NULL;
2268 *wait_cmd = NULL;
2269 *pre_fence = NULL;
2230fail: 2270fail:
2231 /*
2232 * Cleanup is handled by gk20a_submit_channel_gpfifo() since it is the
2233 * real owner of the objects we make here.
2234 */
2235 return err; 2271 return err;
2236} 2272}
2237 2273
@@ -2250,6 +2286,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2250 struct priv_cmd_entry *incr_cmd = NULL; 2286 struct priv_cmd_entry *incr_cmd = NULL;
2251 struct gk20a_fence *pre_fence = NULL; 2287 struct gk20a_fence *pre_fence = NULL;
2252 struct gk20a_fence *post_fence = NULL; 2288 struct gk20a_fence *post_fence = NULL;
2289 struct channel_gk20a_job *job = NULL;
2253 /* we might need two extra gpfifo entries - one for pre fence 2290 /* we might need two extra gpfifo entries - one for pre fence
2254 * and one for post fence. */ 2291 * and one for post fence. */
2255 const int extra_entries = 2; 2292 const int extra_entries = 2;
@@ -2351,11 +2388,18 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2351 } 2388 }
2352 2389
2353 if (need_job_tracking) { 2390 if (need_job_tracking) {
2354 err = gk20a_submit_prepare_syncs(c, fence, &wait_cmd, &incr_cmd, 2391 job = channel_gk20a_alloc_job(c);
2392 if (!job) {
2393 err = -ENOMEM;
2394 goto clean_up;
2395 }
2396
2397 err = gk20a_submit_prepare_syncs(c, fence, job,
2398 &wait_cmd, &incr_cmd,
2355 &pre_fence, &post_fence, 2399 &pre_fence, &post_fence,
2356 force_need_sync_fence, flags); 2400 force_need_sync_fence, flags);
2357 if (err) 2401 if (err)
2358 goto clean_up; 2402 goto clean_up_job;
2359 } 2403 }
2360 2404
2361 if (wait_cmd) 2405 if (wait_cmd)
@@ -2365,7 +2409,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2365 err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo, 2409 err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo,
2366 num_entries); 2410 num_entries);
2367 if (err) 2411 if (err)
2368 goto clean_up; 2412 goto clean_up_job;
2369 2413
2370 /* 2414 /*
2371 * And here's where we add the incr_cmd we generated earlier. It should 2415 * And here's where we add the incr_cmd we generated earlier. It should
@@ -2379,9 +2423,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2379 2423
2380 if (need_job_tracking) 2424 if (need_job_tracking)
2381 /* TODO! Check for errors... */ 2425 /* TODO! Check for errors... */
2382 gk20a_channel_add_job(c, pre_fence, post_fence, 2426 gk20a_channel_add_job(c, job, skip_buffer_refcounting);
2383 wait_cmd, incr_cmd,
2384 skip_buffer_refcounting);
2385 2427
2386 g->ops.fifo.userd_gp_put(g, c); 2428 g->ops.fifo.userd_gp_put(g, c);
2387 2429
@@ -2398,10 +2440,10 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2398 gk20a_dbg_fn("done"); 2440 gk20a_dbg_fn("done");
2399 return err; 2441 return err;
2400 2442
2443clean_up_job:
2444 channel_gk20a_free_job(c, job);
2401clean_up: 2445clean_up:
2402 gk20a_dbg_fn("fail"); 2446 gk20a_dbg_fn("fail");
2403 free_priv_cmdbuf(c, wait_cmd);
2404 free_priv_cmdbuf(c, incr_cmd);
2405 gk20a_fence_put(pre_fence); 2447 gk20a_fence_put(pre_fence);
2406 gk20a_fence_put(post_fence); 2448 gk20a_fence_put(post_fence);
2407 if (need_job_tracking) 2449 if (need_job_tracking)
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index f6571b6f..0d8746b8 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -218,7 +218,7 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch);
218void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error); 218void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error);
219void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events); 219void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events);
220int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size, 220int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size,
221 struct priv_cmd_entry **entry); 221 struct priv_cmd_entry *entry);
222int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e); 222int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e);
223 223
224int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch); 224int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 7a71c4eb..767738ea 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -57,12 +57,11 @@ static void add_wait_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, u32 off,
57} 57}
58 58
59static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, 59static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
60 u32 id, u32 thresh, struct priv_cmd_entry **entry, 60 u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd,
61 struct gk20a_fence **fence) 61 struct gk20a_fence *fence)
62{ 62{
63 struct gk20a_channel_syncpt *sp = 63 struct gk20a_channel_syncpt *sp =
64 container_of(s, struct gk20a_channel_syncpt, ops); 64 container_of(s, struct gk20a_channel_syncpt, ops);
65 struct priv_cmd_entry *wait_cmd = NULL;
66 struct channel_gk20a *c = sp->c; 65 struct channel_gk20a *c = sp->c;
67 int err = 0; 66 int err = 0;
68 67
@@ -75,7 +74,7 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
75 if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, id, thresh)) 74 if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, id, thresh))
76 return 0; 75 return 0;
77 76
78 err = gk20a_channel_alloc_priv_cmdbuf(c, 4, &wait_cmd); 77 err = gk20a_channel_alloc_priv_cmdbuf(c, 4, wait_cmd);
79 if (err) { 78 if (err) {
80 gk20a_err(dev_from_gk20a(c->g), 79 gk20a_err(dev_from_gk20a(c->g),
81 "not enough priv cmd buffer space"); 80 "not enough priv cmd buffer space");
@@ -84,21 +83,18 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
84 83
85 add_wait_cmd(c->g, wait_cmd, 0, id, thresh); 84 add_wait_cmd(c->g, wait_cmd, 0, id, thresh);
86 85
87 *entry = wait_cmd;
88 *fence = NULL;
89 return 0; 86 return 0;
90} 87}
91 88
92static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, 89static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
93 struct priv_cmd_entry **entry, 90 struct priv_cmd_entry *wait_cmd,
94 struct gk20a_fence **fence) 91 struct gk20a_fence *fence)
95{ 92{
96#ifdef CONFIG_SYNC 93#ifdef CONFIG_SYNC
97 int i; 94 int i;
98 int num_wait_cmds; 95 int num_wait_cmds;
99 struct sync_fence *sync_fence; 96 struct sync_fence *sync_fence;
100 struct sync_pt *pt; 97 struct sync_pt *pt;
101 struct priv_cmd_entry *wait_cmd = NULL;
102 struct gk20a_channel_syncpt *sp = 98 struct gk20a_channel_syncpt *sp =
103 container_of(s, struct gk20a_channel_syncpt, ops); 99 container_of(s, struct gk20a_channel_syncpt, ops);
104 struct channel_gk20a *c = sp->c; 100 struct channel_gk20a *c = sp->c;
@@ -134,7 +130,7 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
134 return 0; 130 return 0;
135 } 131 }
136 132
137 err = gk20a_channel_alloc_priv_cmdbuf(c, 4 * num_wait_cmds, &wait_cmd); 133 err = gk20a_channel_alloc_priv_cmdbuf(c, 4 * num_wait_cmds, wait_cmd);
138 if (err) { 134 if (err) {
139 gk20a_err(dev_from_gk20a(c->g), 135 gk20a_err(dev_from_gk20a(c->g),
140 "not enough priv cmd buffer space"); 136 "not enough priv cmd buffer space");
@@ -172,8 +168,6 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
172 WARN_ON(i != num_wait_cmds); 168 WARN_ON(i != num_wait_cmds);
173 sync_fence_put(sync_fence); 169 sync_fence_put(sync_fence);
174 170
175 *entry = wait_cmd;
176 *fence = NULL;
177 return 0; 171 return 0;
178#else 172#else
179 return -ENODEV; 173 return -ENODEV;
@@ -193,15 +187,14 @@ static void gk20a_channel_syncpt_update(void *priv, int nr_completed)
193static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, 187static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
194 bool wfi_cmd, 188 bool wfi_cmd,
195 bool register_irq, 189 bool register_irq,
196 struct priv_cmd_entry **entry, 190 struct priv_cmd_entry *incr_cmd,
197 struct gk20a_fence **fence, 191 struct gk20a_fence *fence,
198 bool need_sync_fence) 192 bool need_sync_fence)
199{ 193{
200 u32 thresh; 194 u32 thresh;
201 int incr_cmd_size; 195 int incr_cmd_size;
202 int off; 196 int off;
203 int err; 197 int err;
204 struct priv_cmd_entry *incr_cmd = NULL;
205 struct gk20a_channel_syncpt *sp = 198 struct gk20a_channel_syncpt *sp =
206 container_of(s, struct gk20a_channel_syncpt, ops); 199 container_of(s, struct gk20a_channel_syncpt, ops);
207 struct channel_gk20a *c = sp->c; 200 struct channel_gk20a *c = sp->c;
@@ -210,7 +203,7 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
210 if (wfi_cmd) 203 if (wfi_cmd)
211 incr_cmd_size += 2; 204 incr_cmd_size += 2;
212 205
213 err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd); 206 err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, incr_cmd);
214 if (err) 207 if (err)
215 return err; 208 return err;
216 209
@@ -267,15 +260,21 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
267 } 260 }
268 } 261 }
269 262
270 *fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh, 263 err = gk20a_fence_from_syncpt(fence, sp->host1x_pdev, sp->id, thresh,
271 wfi_cmd, need_sync_fence); 264 wfi_cmd, need_sync_fence);
272 *entry = incr_cmd; 265 if (err)
266 goto clean_up_priv_cmd;
267
273 return 0; 268 return 0;
269
270clean_up_priv_cmd:
271 gk20a_free_priv_cmdbuf(c, incr_cmd);
272 return err;
274} 273}
275 274
276static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s, 275static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s,
277 struct priv_cmd_entry **entry, 276 struct priv_cmd_entry *entry,
278 struct gk20a_fence **fence) 277 struct gk20a_fence *fence)
279{ 278{
280 return __gk20a_channel_syncpt_incr(s, 279 return __gk20a_channel_syncpt_incr(s,
281 true /* wfi */, 280 true /* wfi */,
@@ -284,8 +283,8 @@ static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s,
284} 283}
285 284
286static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, 285static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
287 struct priv_cmd_entry **entry, 286 struct priv_cmd_entry *entry,
288 struct gk20a_fence **fence, 287 struct gk20a_fence *fence,
289 bool need_sync_fence) 288 bool need_sync_fence)
290{ 289{
291 /* Don't put wfi cmd to this one since we're not returning 290 /* Don't put wfi cmd to this one since we're not returning
@@ -298,8 +297,8 @@ static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
298 297
299static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s, 298static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s,
300 int wait_fence_fd, 299 int wait_fence_fd,
301 struct priv_cmd_entry **entry, 300 struct priv_cmd_entry *entry,
302 struct gk20a_fence **fence, 301 struct gk20a_fence *fence,
303 bool wfi, 302 bool wfi,
304 bool need_sync_fence) 303 bool need_sync_fence)
305{ 304{
@@ -500,8 +499,8 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
500 499
501static int gk20a_channel_semaphore_wait_syncpt( 500static int gk20a_channel_semaphore_wait_syncpt(
502 struct gk20a_channel_sync *s, u32 id, 501 struct gk20a_channel_sync *s, u32 id,
503 u32 thresh, struct priv_cmd_entry **entry, 502 u32 thresh, struct priv_cmd_entry *entry,
504 struct gk20a_fence **fence) 503 struct gk20a_fence *fence)
505{ 504{
506 struct gk20a_channel_semaphore *sema = 505 struct gk20a_channel_semaphore *sema =
507 container_of(s, struct gk20a_channel_semaphore, ops); 506 container_of(s, struct gk20a_channel_semaphore, ops);
@@ -525,7 +524,7 @@ static int gk20a_channel_semaphore_wait_syncpt(
525 */ 524 */
526static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c, 525static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c,
527 struct sync_fence *fence, 526 struct sync_fence *fence,
528 struct priv_cmd_entry **wait_cmd, 527 struct priv_cmd_entry *wait_cmd,
529 struct gk20a_semaphore **fp_sema) 528 struct gk20a_semaphore **fp_sema)
530{ 529{
531 struct gk20a_semaphore *sema; 530 struct gk20a_semaphore *sema;
@@ -551,7 +550,7 @@ static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c,
551 550
552 gk20a_semaphore_get(sema); 551 gk20a_semaphore_get(sema);
553 BUG_ON(!atomic_read(&sema->value)); 552 BUG_ON(!atomic_read(&sema->value));
554 add_sema_cmd(c->g, c, sema, *wait_cmd, 8, true, false); 553 add_sema_cmd(c->g, c, sema, wait_cmd, 8, true, false);
555 554
556 /* 555 /*
557 * Make sure that gk20a_channel_semaphore_wait_fd() can create another 556 * Make sure that gk20a_channel_semaphore_wait_fd() can create another
@@ -565,8 +564,8 @@ static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c,
565 564
566static int gk20a_channel_semaphore_wait_fd( 565static int gk20a_channel_semaphore_wait_fd(
567 struct gk20a_channel_sync *s, int fd, 566 struct gk20a_channel_sync *s, int fd,
568 struct priv_cmd_entry **entry, 567 struct priv_cmd_entry *entry,
569 struct gk20a_fence **fence) 568 struct gk20a_fence *fence)
570{ 569{
571 struct gk20a_channel_semaphore *sema = 570 struct gk20a_channel_semaphore *sema =
572 container_of(s, struct gk20a_channel_semaphore, ops); 571 container_of(s, struct gk20a_channel_semaphore, ops);
@@ -574,7 +573,7 @@ static int gk20a_channel_semaphore_wait_fd(
574#ifdef CONFIG_SYNC 573#ifdef CONFIG_SYNC
575 struct gk20a_semaphore *fp_sema; 574 struct gk20a_semaphore *fp_sema;
576 struct sync_fence *sync_fence; 575 struct sync_fence *sync_fence;
577 struct priv_cmd_entry *wait_cmd = NULL; 576 struct priv_cmd_entry *wait_cmd = entry;
578 struct wait_fence_work *w = NULL; 577 struct wait_fence_work *w = NULL;
579 int err, ret, status; 578 int err, ret, status;
580 579
@@ -582,19 +581,24 @@ static int gk20a_channel_semaphore_wait_fd(
582 if (!sync_fence) 581 if (!sync_fence)
583 return -EINVAL; 582 return -EINVAL;
584 583
585 ret = __semaphore_wait_fd_fast_path(c, sync_fence, &wait_cmd, &fp_sema); 584 ret = __semaphore_wait_fd_fast_path(c, sync_fence, wait_cmd, &fp_sema);
586 if (ret == 0) { 585 if (ret == 0) {
587 if (fp_sema) 586 if (fp_sema) {
588 *fence = gk20a_fence_from_semaphore(sema->timeline, 587 err = gk20a_fence_from_semaphore(fence,
589 fp_sema, 588 sema->timeline,
590 &c->semaphore_wq, 589 fp_sema,
591 NULL, false, false); 590 &c->semaphore_wq,
592 else 591 NULL, false, false);
592 if (err) {
593 gk20a_semaphore_put(fp_sema);
594 goto clean_up_priv_cmd;
595 }
596 } else
593 /* 597 /*
594 * Allocate an empty fence. It will instantly return 598 * Init an empty fence. It will instantly return
595 * from gk20a_fence_wait(). 599 * from gk20a_fence_wait().
596 */ 600 */
597 *fence = gk20a_alloc_fence(NULL, NULL, false); 601 gk20a_init_fence(fence, NULL, NULL, false);
598 602
599 sync_fence_put(sync_fence); 603 sync_fence_put(sync_fence);
600 goto skip_slow_path; 604 goto skip_slow_path;
@@ -611,18 +615,17 @@ static int gk20a_channel_semaphore_wait_fd(
611 goto skip_slow_path; 615 goto skip_slow_path;
612 } 616 }
613 617
614 err = gk20a_channel_alloc_priv_cmdbuf(c, 8, &wait_cmd); 618 err = gk20a_channel_alloc_priv_cmdbuf(c, 8, wait_cmd);
615 if (err) { 619 if (err) {
616 gk20a_err(dev_from_gk20a(c->g), 620 gk20a_err(dev_from_gk20a(c->g),
617 "not enough priv cmd buffer space"); 621 "not enough priv cmd buffer space");
618 sync_fence_put(sync_fence); 622 goto clean_up_sync_fence;
619 return -ENOMEM;
620 } 623 }
621 624
622 w = kzalloc(sizeof(*w), GFP_KERNEL); 625 w = kzalloc(sizeof(*w), GFP_KERNEL);
623 if (!w) { 626 if (!w) {
624 err = -ENOMEM; 627 err = -ENOMEM;
625 goto fail_free_cmdbuf; 628 goto clean_up_priv_cmd;
626 } 629 }
627 630
628 sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher); 631 sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher);
@@ -631,7 +634,7 @@ static int gk20a_channel_semaphore_wait_fd(
631 if (!w->sema) { 634 if (!w->sema) {
632 gk20a_err(dev_from_gk20a(c->g), "ran out of semaphores"); 635 gk20a_err(dev_from_gk20a(c->g), "ran out of semaphores");
633 err = -ENOMEM; 636 err = -ENOMEM;
634 goto fail_free_worker; 637 goto clean_up_worker;
635 } 638 }
636 639
637 /* worker takes one reference */ 640 /* worker takes one reference */
@@ -641,6 +644,16 @@ static int gk20a_channel_semaphore_wait_fd(
641 /* GPU unblocked when the semaphore value increments. */ 644 /* GPU unblocked when the semaphore value increments. */
642 add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false); 645 add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false);
643 646
647 /*
648 * We need to create the fence before adding the waiter to ensure
649 * that we properly clean up in the event the sync_fence has
650 * already signaled
651 */
652 err = gk20a_fence_from_semaphore(fence, sema->timeline, w->sema,
653 &c->semaphore_wq, NULL, false, false);
654 if (err)
655 goto clean_up_sema;
656
644 ret = sync_fence_wait_async(sync_fence, &w->waiter); 657 ret = sync_fence_wait_async(sync_fence, &w->waiter);
645 658
646 /* 659 /*
@@ -655,24 +668,22 @@ static int gk20a_channel_semaphore_wait_fd(
655 gk20a_semaphore_put(w->sema); 668 gk20a_semaphore_put(w->sema);
656 } 669 }
657 670
658 /* XXX - this fixes an actual bug, we need to hold a ref to this
659 semaphore while the job is in flight. */
660 *fence = gk20a_fence_from_semaphore(sema->timeline, w->sema,
661 &c->semaphore_wq,
662 NULL, false, false);
663
664skip_slow_path: 671skip_slow_path:
665 *entry = wait_cmd;
666 return 0; 672 return 0;
667 673
668fail_free_worker: 674clean_up_sema:
669 if (w && w->sema) 675 /*
670 gk20a_semaphore_put(w->sema); 676 * Release the refs to the semaphore, including
677 * the one for the worker since it will never run.
678 */
679 gk20a_semaphore_put(w->sema);
680 gk20a_semaphore_put(w->sema);
681clean_up_worker:
671 kfree(w); 682 kfree(w);
683clean_up_priv_cmd:
684 gk20a_free_priv_cmdbuf(c, entry);
685clean_up_sync_fence:
672 sync_fence_put(sync_fence); 686 sync_fence_put(sync_fence);
673fail_free_cmdbuf:
674 if (wait_cmd)
675 gk20a_free_priv_cmdbuf(c, wait_cmd);
676 return err; 687 return err;
677#else 688#else
678 gk20a_err(dev_from_gk20a(c->g), 689 gk20a_err(dev_from_gk20a(c->g),
@@ -684,12 +695,11 @@ fail_free_cmdbuf:
684static int __gk20a_channel_semaphore_incr( 695static int __gk20a_channel_semaphore_incr(
685 struct gk20a_channel_sync *s, bool wfi_cmd, 696 struct gk20a_channel_sync *s, bool wfi_cmd,
686 struct sync_fence *dependency, 697 struct sync_fence *dependency,
687 struct priv_cmd_entry **entry, 698 struct priv_cmd_entry *incr_cmd,
688 struct gk20a_fence **fence, 699 struct gk20a_fence *fence,
689 bool need_sync_fence) 700 bool need_sync_fence)
690{ 701{
691 int incr_cmd_size; 702 int incr_cmd_size;
692 struct priv_cmd_entry *incr_cmd = NULL;
693 struct gk20a_channel_semaphore *sp = 703 struct gk20a_channel_semaphore *sp =
694 container_of(s, struct gk20a_channel_semaphore, ops); 704 container_of(s, struct gk20a_channel_semaphore, ops);
695 struct channel_gk20a *c = sp->c; 705 struct channel_gk20a *c = sp->c;
@@ -704,29 +714,37 @@ static int __gk20a_channel_semaphore_incr(
704 } 714 }
705 715
706 incr_cmd_size = 10; 716 incr_cmd_size = 10;
707 err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd); 717 err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, incr_cmd);
708 if (err) { 718 if (err) {
709 gk20a_err(dev_from_gk20a(c->g), 719 gk20a_err(dev_from_gk20a(c->g),
710 "not enough priv cmd buffer space"); 720 "not enough priv cmd buffer space");
711 gk20a_semaphore_put(semaphore); 721 goto clean_up_sema;
712 return err;
713 } 722 }
714 723
715 /* Release the completion semaphore. */ 724 /* Release the completion semaphore. */
716 add_sema_cmd(c->g, c, semaphore, incr_cmd, 14, false, wfi_cmd); 725 add_sema_cmd(c->g, c, semaphore, incr_cmd, 14, false, wfi_cmd);
717 726
718 *fence = gk20a_fence_from_semaphore(sp->timeline, semaphore, 727 err = gk20a_fence_from_semaphore(fence,
719 &c->semaphore_wq, 728 sp->timeline, semaphore,
720 dependency, wfi_cmd, 729 &c->semaphore_wq,
721 need_sync_fence); 730 dependency, wfi_cmd,
722 *entry = incr_cmd; 731 need_sync_fence);
732 if (err)
733 goto clean_up_priv_cmd;
734
723 return 0; 735 return 0;
736
737clean_up_priv_cmd:
738 gk20a_free_priv_cmdbuf(c, incr_cmd);
739clean_up_sema:
740 gk20a_semaphore_put(semaphore);
741 return err;
724} 742}
725 743
726static int gk20a_channel_semaphore_incr_wfi( 744static int gk20a_channel_semaphore_incr_wfi(
727 struct gk20a_channel_sync *s, 745 struct gk20a_channel_sync *s,
728 struct priv_cmd_entry **entry, 746 struct priv_cmd_entry *entry,
729 struct gk20a_fence **fence) 747 struct gk20a_fence *fence)
730{ 748{
731 return __gk20a_channel_semaphore_incr(s, 749 return __gk20a_channel_semaphore_incr(s,
732 true /* wfi */, 750 true /* wfi */,
@@ -736,8 +754,8 @@ static int gk20a_channel_semaphore_incr_wfi(
736 754
737static int gk20a_channel_semaphore_incr( 755static int gk20a_channel_semaphore_incr(
738 struct gk20a_channel_sync *s, 756 struct gk20a_channel_sync *s,
739 struct priv_cmd_entry **entry, 757 struct priv_cmd_entry *entry,
740 struct gk20a_fence **fence, 758 struct gk20a_fence *fence,
741 bool need_sync_fence) 759 bool need_sync_fence)
742{ 760{
743 /* Don't put wfi cmd to this one since we're not returning 761 /* Don't put wfi cmd to this one since we're not returning
@@ -751,8 +769,8 @@ static int gk20a_channel_semaphore_incr(
751static int gk20a_channel_semaphore_incr_user( 769static int gk20a_channel_semaphore_incr_user(
752 struct gk20a_channel_sync *s, 770 struct gk20a_channel_sync *s,
753 int wait_fence_fd, 771 int wait_fence_fd,
754 struct priv_cmd_entry **entry, 772 struct priv_cmd_entry *entry,
755 struct gk20a_fence **fence, 773 struct gk20a_fence *fence,
756 bool wfi, 774 bool wfi,
757 bool need_sync_fence) 775 bool need_sync_fence)
758{ 776{
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
index 4b0918de..c3a92ad2 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -36,8 +36,8 @@ struct gk20a_channel_sync {
36 * cmdbuf is executed. 36 * cmdbuf is executed.
37 */ 37 */
38 int (*wait_syncpt)(struct gk20a_channel_sync *s, u32 id, u32 thresh, 38 int (*wait_syncpt)(struct gk20a_channel_sync *s, u32 id, u32 thresh,
39 struct priv_cmd_entry **entry, 39 struct priv_cmd_entry *entry,
40 struct gk20a_fence **fence); 40 struct gk20a_fence *fence);
41 41
42 /* Generate a gpu wait cmdbuf from sync fd. 42 /* Generate a gpu wait cmdbuf from sync fd.
43 * Returns 43 * Returns
@@ -46,8 +46,8 @@ struct gk20a_channel_sync {
46 * cmdbuf is executed. 46 * cmdbuf is executed.
47 */ 47 */
48 int (*wait_fd)(struct gk20a_channel_sync *s, int fd, 48 int (*wait_fd)(struct gk20a_channel_sync *s, int fd,
49 struct priv_cmd_entry **entry, 49 struct priv_cmd_entry *entry,
50 struct gk20a_fence **fence); 50 struct gk20a_fence *fence);
51 51
52 /* Increment syncpoint/semaphore. 52 /* Increment syncpoint/semaphore.
53 * Returns 53 * Returns
@@ -55,8 +55,8 @@ struct gk20a_channel_sync {
55 * - a fence that can be passed to wait_cpu() and is_expired(). 55 * - a fence that can be passed to wait_cpu() and is_expired().
56 */ 56 */
57 int (*incr)(struct gk20a_channel_sync *s, 57 int (*incr)(struct gk20a_channel_sync *s,
58 struct priv_cmd_entry **entry, 58 struct priv_cmd_entry *entry,
59 struct gk20a_fence **fence, 59 struct gk20a_fence *fence,
60 bool need_sync_fence); 60 bool need_sync_fence);
61 61
62 /* Increment syncpoint/semaphore, preceded by a wfi. 62 /* Increment syncpoint/semaphore, preceded by a wfi.
@@ -65,8 +65,8 @@ struct gk20a_channel_sync {
65 * - a fence that can be passed to wait_cpu() and is_expired(). 65 * - a fence that can be passed to wait_cpu() and is_expired().
66 */ 66 */
67 int (*incr_wfi)(struct gk20a_channel_sync *s, 67 int (*incr_wfi)(struct gk20a_channel_sync *s,
68 struct priv_cmd_entry **entry, 68 struct priv_cmd_entry *entry,
69 struct gk20a_fence **fence); 69 struct gk20a_fence *fence);
70 70
71 /* Increment syncpoint/semaphore, so that the returned fence represents 71 /* Increment syncpoint/semaphore, so that the returned fence represents
72 * work completion (may need wfi) and can be returned to user space. 72 * work completion (may need wfi) and can be returned to user space.
@@ -77,8 +77,8 @@ struct gk20a_channel_sync {
77 */ 77 */
78 int (*incr_user)(struct gk20a_channel_sync *s, 78 int (*incr_user)(struct gk20a_channel_sync *s,
79 int wait_fence_fd, 79 int wait_fence_fd,
80 struct priv_cmd_entry **entry, 80 struct priv_cmd_entry *entry,
81 struct gk20a_fence **fence, 81 struct gk20a_fence *fence,
82 bool wfi, 82 bool wfi,
83 bool need_sync_fence); 83 bool need_sync_fence);
84 84
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
index 596dc549..f788829f 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -63,16 +63,27 @@ struct gk20a_fence *gk20a_fence_get(struct gk20a_fence *f)
63 return f; 63 return f;
64} 64}
65 65
66static inline bool gk20a_fence_is_valid(struct gk20a_fence *f)
67{
68 bool valid = f->valid;
69
70 rmb();
71 return valid;
72}
73
66int gk20a_fence_wait(struct gk20a_fence *f, int timeout) 74int gk20a_fence_wait(struct gk20a_fence *f, int timeout)
67{ 75{
68 if (!tegra_platform_is_silicon()) 76 if (f && gk20a_fence_is_valid(f)) {
69 timeout = (u32)MAX_SCHEDULE_TIMEOUT; 77 if (!tegra_platform_is_silicon())
70 return f->ops->wait(f, timeout); 78 timeout = (u32)MAX_SCHEDULE_TIMEOUT;
79 return f->ops->wait(f, timeout);
80 }
81 return 0;
71} 82}
72 83
73bool gk20a_fence_is_expired(struct gk20a_fence *f) 84bool gk20a_fence_is_expired(struct gk20a_fence *f)
74{ 85{
75 if (f && f->ops) 86 if (f && gk20a_fence_is_valid(f) && f->ops)
76 return f->ops->is_expired(f); 87 return f->ops->is_expired(f);
77 else 88 else
78 return true; 89 return true;
@@ -83,7 +94,7 @@ int gk20a_fence_install_fd(struct gk20a_fence *f)
83#ifdef CONFIG_SYNC 94#ifdef CONFIG_SYNC
84 int fd; 95 int fd;
85 96
86 if (!f->sync_fence) 97 if (!f || !gk20a_fence_is_valid(f) || !f->sync_fence)
87 return -EINVAL; 98 return -EINVAL;
88 99
89 fd = get_unused_fd_flags(O_RDWR); 100 fd = get_unused_fd_flags(O_RDWR);
@@ -98,18 +109,28 @@ int gk20a_fence_install_fd(struct gk20a_fence *f)
98#endif 109#endif
99} 110}
100 111
101struct gk20a_fence *gk20a_alloc_fence(const struct gk20a_fence_ops *ops, 112struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c)
102 struct sync_fence *sync_fence, bool wfi)
103{ 113{
104 struct gk20a_fence *f = kzalloc(sizeof(*f), GFP_KERNEL); 114 struct gk20a_fence *fence;
105 if (!f) 115
116 fence = kzalloc(sizeof(struct gk20a_fence), GFP_KERNEL);
117 if (!fence)
106 return NULL; 118 return NULL;
107 kref_init(&f->ref); 119
120 kref_init(&fence->ref);
121 return fence;
122}
123
124void gk20a_init_fence(struct gk20a_fence *f,
125 const struct gk20a_fence_ops *ops,
126 struct sync_fence *sync_fence, bool wfi)
127{
128 if (!f)
129 return;
108 f->ops = ops; 130 f->ops = ops;
109 f->sync_fence = sync_fence; 131 f->sync_fence = sync_fence;
110 f->wfi = wfi; 132 f->wfi = wfi;
111 f->syncpt_id = -1; 133 f->syncpt_id = -1;
112 return f;
113} 134}
114 135
115/* Fences that are backed by GPU semaphores: */ 136/* Fences that are backed by GPU semaphores: */
@@ -143,14 +164,15 @@ static const struct gk20a_fence_ops gk20a_semaphore_fence_ops = {
143}; 164};
144 165
145/* This function takes ownership of the semaphore */ 166/* This function takes ownership of the semaphore */
146struct gk20a_fence *gk20a_fence_from_semaphore( 167int gk20a_fence_from_semaphore(
168 struct gk20a_fence *fence_out,
147 struct sync_timeline *timeline, 169 struct sync_timeline *timeline,
148 struct gk20a_semaphore *semaphore, 170 struct gk20a_semaphore *semaphore,
149 wait_queue_head_t *semaphore_wq, 171 wait_queue_head_t *semaphore_wq,
150 struct sync_fence *dependency, 172 struct sync_fence *dependency,
151 bool wfi, bool need_sync_fence) 173 bool wfi, bool need_sync_fence)
152{ 174{
153 struct gk20a_fence *f; 175 struct gk20a_fence *f = fence_out;
154 struct sync_fence *sync_fence = NULL; 176 struct sync_fence *sync_fence = NULL;
155 177
156#ifdef CONFIG_SYNC 178#ifdef CONFIG_SYNC
@@ -159,21 +181,26 @@ struct gk20a_fence *gk20a_fence_from_semaphore(
159 dependency, "f-gk20a-0x%04x", 181 dependency, "f-gk20a-0x%04x",
160 gk20a_semaphore_gpu_ro_va(semaphore)); 182 gk20a_semaphore_gpu_ro_va(semaphore));
161 if (!sync_fence) 183 if (!sync_fence)
162 return NULL; 184 return -1;
163 } 185 }
164#endif 186#endif
165 187
166 f = gk20a_alloc_fence(&gk20a_semaphore_fence_ops, sync_fence, wfi); 188 gk20a_init_fence(f, &gk20a_semaphore_fence_ops, sync_fence, wfi);
167 if (!f) { 189 if (!f) {
168#ifdef CONFIG_SYNC 190#ifdef CONFIG_SYNC
169 sync_fence_put(sync_fence); 191 sync_fence_put(sync_fence);
170#endif 192#endif
171 return NULL; 193 return -EINVAL;
172 } 194 }
173 195
174 f->semaphore = semaphore; 196 f->semaphore = semaphore;
175 f->semaphore_wq = semaphore_wq; 197 f->semaphore_wq = semaphore_wq;
176 return f; 198
199 /* commit previous writes before setting the valid flag */
200 wmb();
201 f->valid = true;
202
203 return 0;
177} 204}
178 205
179#ifdef CONFIG_TEGRA_GK20A 206#ifdef CONFIG_TEGRA_GK20A
@@ -197,11 +224,13 @@ static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = {
197 .is_expired = &gk20a_syncpt_fence_is_expired, 224 .is_expired = &gk20a_syncpt_fence_is_expired,
198}; 225};
199 226
200struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev, 227int gk20a_fence_from_syncpt(
201 u32 id, u32 value, bool wfi, 228 struct gk20a_fence *fence_out,
202 bool need_sync_fence) 229 struct platform_device *host1x_pdev,
230 u32 id, u32 value, bool wfi,
231 bool need_sync_fence)
203{ 232{
204 struct gk20a_fence *f; 233 struct gk20a_fence *f = fence_out;
205 struct sync_fence *sync_fence = NULL; 234 struct sync_fence *sync_fence = NULL;
206 235
207#ifdef CONFIG_SYNC 236#ifdef CONFIG_SYNC
@@ -214,27 +243,32 @@ struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
214 sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1, 243 sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1,
215 "fence"); 244 "fence");
216 if (IS_ERR(sync_fence)) 245 if (IS_ERR(sync_fence))
217 return NULL; 246 return -1;
218 } 247 }
219#endif 248#endif
220 249
221 f = gk20a_alloc_fence(&gk20a_syncpt_fence_ops, sync_fence, wfi); 250 gk20a_init_fence(f, &gk20a_syncpt_fence_ops, sync_fence, wfi);
222 if (!f) { 251 if (!f) {
223#ifdef CONFIG_SYNC 252#ifdef CONFIG_SYNC
224 if (sync_fence) 253 if (sync_fence)
225 sync_fence_put(sync_fence); 254 sync_fence_put(sync_fence);
226#endif 255#endif
227 return NULL; 256 return -EINVAL;
228 } 257 }
229 f->host1x_pdev = host1x_pdev; 258 f->host1x_pdev = host1x_pdev;
230 f->syncpt_id = id; 259 f->syncpt_id = id;
231 f->syncpt_value = value; 260 f->syncpt_value = value;
232 return f; 261
262 /* commit previous writes before setting the valid flag */
263 wmb();
264 f->valid = true;
265
266 return 0;
233} 267}
234#else 268#else
235struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev, 269int gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
236 u32 id, u32 value, bool wfi) 270 u32 id, u32 value, bool wfi)
237{ 271{
238 return NULL; 272 return -EINVAL;
239} 273}
240#endif 274#endif
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
index 35488ea3..3fe2d8b2 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
@@ -31,6 +31,7 @@ struct gk20a_fence_ops;
31 31
32struct gk20a_fence { 32struct gk20a_fence {
33 /* Valid for all fence types: */ 33 /* Valid for all fence types: */
34 bool valid;
34 struct kref ref; 35 struct kref ref;
35 bool wfi; 36 bool wfi;
36 struct sync_fence *sync_fence; 37 struct sync_fence *sync_fence;
@@ -47,21 +48,25 @@ struct gk20a_fence {
47}; 48};
48 49
49/* Fences can be created from semaphores or syncpoint (id, value) pairs */ 50/* Fences can be created from semaphores or syncpoint (id, value) pairs */
50struct gk20a_fence *gk20a_fence_from_semaphore( 51int gk20a_fence_from_semaphore(
52 struct gk20a_fence *fence_out,
51 struct sync_timeline *timeline, 53 struct sync_timeline *timeline,
52 struct gk20a_semaphore *semaphore, 54 struct gk20a_semaphore *semaphore,
53 wait_queue_head_t *semaphore_wq, 55 wait_queue_head_t *semaphore_wq,
54 struct sync_fence *dependency, 56 struct sync_fence *dependency,
55 bool wfi, bool need_sync_fence); 57 bool wfi, bool need_sync_fence);
56 58
57struct gk20a_fence *gk20a_fence_from_syncpt( 59int gk20a_fence_from_syncpt(
60 struct gk20a_fence *fence_out,
58 struct platform_device *host1x_pdev, 61 struct platform_device *host1x_pdev,
59 u32 id, u32 value, bool wfi, 62 u32 id, u32 value, bool wfi,
60 bool need_sync_fence); 63 bool need_sync_fence);
61 64
62struct gk20a_fence *gk20a_alloc_fence(const struct gk20a_fence_ops *ops, 65struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c);
63 struct sync_fence *sync_fence, 66
64 bool wfi); 67void gk20a_init_fence(struct gk20a_fence *f,
68 const struct gk20a_fence_ops *ops,
69 struct sync_fence *sync_fence, bool wfi);
65 70
66/* Fence operations */ 71/* Fence operations */
67void gk20a_fence_put(struct gk20a_fence *f); 72void gk20a_fence_put(struct gk20a_fence *f);
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index b34ff4a7..b2cca072 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -198,6 +198,7 @@ struct priv_cmd_queue {
198}; 198};
199 199
200struct priv_cmd_entry { 200struct priv_cmd_entry {
201 bool valid;
201 struct mem_desc *mem; 202 struct mem_desc *mem;
202 u32 off; /* offset in mem, in u32 entries */ 203 u32 off; /* offset in mem, in u32 entries */
203 u64 gva; 204 u64 gva;