summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c79
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h8
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c424
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h19
4 files changed, 492 insertions, 38 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 68a30392..651ea08c 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -33,6 +33,7 @@
33 33
34#include "gk20a.h" 34#include "gk20a.h"
35#include "dbg_gpu_gk20a.h" 35#include "dbg_gpu_gk20a.h"
36#include "semaphore_gk20a.h"
36 37
37#include "hw_ram_gk20a.h" 38#include "hw_ram_gk20a.h"
38#include "hw_fifo_gk20a.h" 39#include "hw_fifo_gk20a.h"
@@ -340,7 +341,7 @@ static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
340 * resource at this point 341 * resource at this point
341 * if not, then it will be destroyed at channel_free() 342 * if not, then it will be destroyed at channel_free()
342 */ 343 */
343 if (ch_gk20a->sync && ch_gk20a->sync->syncpt_aggressive_destroy) { 344 if (ch_gk20a->sync && ch_gk20a->sync->aggressive_destroy) {
344 ch_gk20a->sync->destroy(ch_gk20a->sync); 345 ch_gk20a->sync->destroy(ch_gk20a->sync);
345 ch_gk20a->sync = NULL; 346 ch_gk20a->sync = NULL;
346 } 347 }
@@ -657,6 +658,8 @@ unbind:
657 ch->vpr = false; 658 ch->vpr = false;
658 ch->vm = NULL; 659 ch->vm = NULL;
659 660
661 gk20a_channel_fence_close(&ch->last_submit.pre_fence);
662 gk20a_channel_fence_close(&ch->last_submit.post_fence);
660 if (ch->sync) { 663 if (ch->sync) {
661 ch->sync->destroy(ch->sync); 664 ch->sync->destroy(ch->sync);
662 ch->sync = NULL; 665 ch->sync = NULL;
@@ -1089,7 +1092,8 @@ static int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1089 ch_vm = c->vm; 1092 ch_vm = c->vm;
1090 1093
1091 c->cmds_pending = false; 1094 c->cmds_pending = false;
1092 c->last_submit_fence.valid = false; 1095 gk20a_channel_fence_close(&c->last_submit.pre_fence);
1096 gk20a_channel_fence_close(&c->last_submit.post_fence);
1093 1097
1094 c->ramfc.offset = 0; 1098 c->ramfc.offset = 0;
1095 c->ramfc.size = ram_in_ramfc_s() / 8; 1099 c->ramfc.size = ram_in_ramfc_s() / 8;
@@ -1272,13 +1276,16 @@ static int gk20a_channel_submit_wfi(struct channel_gk20a *c)
1272 } 1276 }
1273 } 1277 }
1274 1278
1275 err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit_fence); 1279 gk20a_channel_fence_close(&c->last_submit.pre_fence);
1280 gk20a_channel_fence_close(&c->last_submit.post_fence);
1281
1282 err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit.post_fence);
1276 if (unlikely(err)) { 1283 if (unlikely(err)) {
1277 mutex_unlock(&c->submit_lock); 1284 mutex_unlock(&c->submit_lock);
1278 return err; 1285 return err;
1279 } 1286 }
1280 1287
1281 WARN_ON(!c->last_submit_fence.wfi); 1288 WARN_ON(!c->last_submit.post_fence.wfi);
1282 1289
1283 c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva); 1290 c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva);
1284 c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) | 1291 c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) |
@@ -1344,7 +1351,8 @@ static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g)
1344} 1351}
1345 1352
1346static int gk20a_channel_add_job(struct channel_gk20a *c, 1353static int gk20a_channel_add_job(struct channel_gk20a *c,
1347 struct gk20a_channel_fence *fence) 1354 struct gk20a_channel_fence *pre_fence,
1355 struct gk20a_channel_fence *post_fence)
1348{ 1356{
1349 struct vm_gk20a *vm = c->vm; 1357 struct vm_gk20a *vm = c->vm;
1350 struct channel_gk20a_job *job = NULL; 1358 struct channel_gk20a_job *job = NULL;
@@ -1369,7 +1377,8 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
1369 1377
1370 job->num_mapped_buffers = num_mapped_buffers; 1378 job->num_mapped_buffers = num_mapped_buffers;
1371 job->mapped_buffers = mapped_buffers; 1379 job->mapped_buffers = mapped_buffers;
1372 job->fence = *fence; 1380 gk20a_channel_fence_dup(pre_fence, &job->pre_fence);
1381 gk20a_channel_fence_dup(post_fence, &job->post_fence);
1373 1382
1374 mutex_lock(&c->jobs_lock); 1383 mutex_lock(&c->jobs_lock);
1375 list_add_tail(&job->list, &c->jobs); 1384 list_add_tail(&job->list, &c->jobs);
@@ -1391,13 +1400,18 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1391 mutex_lock(&c->jobs_lock); 1400 mutex_lock(&c->jobs_lock);
1392 list_for_each_entry_safe(job, n, &c->jobs, list) { 1401 list_for_each_entry_safe(job, n, &c->jobs, list) {
1393 bool completed = WARN_ON(!c->sync) || 1402 bool completed = WARN_ON(!c->sync) ||
1394 c->sync->is_expired(c->sync, &job->fence); 1403 c->sync->is_expired(c->sync, &job->post_fence);
1395 if (!completed) 1404 if (!completed)
1396 break; 1405 break;
1397 1406
1398 gk20a_vm_put_buffers(vm, job->mapped_buffers, 1407 gk20a_vm_put_buffers(vm, job->mapped_buffers,
1399 job->num_mapped_buffers); 1408 job->num_mapped_buffers);
1400 1409
1410 /* Close the fences (this will unref the semaphores and release
1411 * them to the pool). */
1412 gk20a_channel_fence_close(&job->pre_fence);
1413 gk20a_channel_fence_close(&job->post_fence);
1414
1401 /* job is done. release its reference to vm */ 1415 /* job is done. release its reference to vm */
1402 gk20a_vm_put(vm); 1416 gk20a_vm_put(vm);
1403 1417
@@ -1413,8 +1427,8 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1413 * the sync resource 1427 * the sync resource
1414 */ 1428 */
1415 if (list_empty(&c->jobs)) { 1429 if (list_empty(&c->jobs)) {
1416 if (c->sync && c->sync->syncpt_aggressive_destroy && 1430 if (c->sync && c->sync->aggressive_destroy &&
1417 c->sync->is_expired(c->sync, &c->last_submit_fence)) { 1431 c->sync->is_expired(c->sync, &c->last_submit.post_fence)) {
1418 c->sync->destroy(c->sync); 1432 c->sync->destroy(c->sync);
1419 c->sync = NULL; 1433 c->sync = NULL;
1420 } 1434 }
@@ -1448,8 +1462,11 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1448 struct device *d = dev_from_gk20a(g); 1462 struct device *d = dev_from_gk20a(g);
1449 int err = 0; 1463 int err = 0;
1450 int i; 1464 int i;
1465 int wait_fence_fd = -1;
1451 struct priv_cmd_entry *wait_cmd = NULL; 1466 struct priv_cmd_entry *wait_cmd = NULL;
1452 struct priv_cmd_entry *incr_cmd = NULL; 1467 struct priv_cmd_entry *incr_cmd = NULL;
1468 struct gk20a_channel_fence pre_fence = { 0 };
1469 struct gk20a_channel_fence post_fence = { 0 };
1453 /* we might need two extra gpfifo entries - one for pre fence 1470 /* we might need two extra gpfifo entries - one for pre fence
1454 * and one for post fence. */ 1471 * and one for post fence. */
1455 const int extra_entries = 2; 1472 const int extra_entries = 2;
@@ -1534,12 +1551,14 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1534 * keep running some tests which trigger this condition 1551 * keep running some tests which trigger this condition
1535 */ 1552 */
1536 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) { 1553 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1537 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) 1554 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
1538 err = c->sync->wait_fd(c->sync, fence->syncpt_id, 1555 wait_fence_fd = fence->syncpt_id;
1539 &wait_cmd); 1556 err = c->sync->wait_fd(c->sync, wait_fence_fd,
1540 else 1557 &wait_cmd, &pre_fence);
1558 } else {
1541 err = c->sync->wait_syncpt(c->sync, fence->syncpt_id, 1559 err = c->sync->wait_syncpt(c->sync, fence->syncpt_id,
1542 fence->value, &wait_cmd); 1560 fence->value, &wait_cmd, &pre_fence);
1561 }
1543 } 1562 }
1544 if (err) { 1563 if (err) {
1545 mutex_unlock(&c->submit_lock); 1564 mutex_unlock(&c->submit_lock);
@@ -1551,19 +1570,19 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1551 to keep track of method completion for idle railgating */ 1570 to keep track of method completion for idle railgating */
1552 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET && 1571 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET &&
1553 flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) 1572 flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
1554 err = c->sync->incr_user_fd(c->sync, &incr_cmd, 1573 err = c->sync->incr_user_fd(c->sync, wait_fence_fd, &incr_cmd,
1555 &c->last_submit_fence, 1574 &post_fence,
1556 need_wfi, 1575 need_wfi,
1557 &fence->syncpt_id); 1576 &fence->syncpt_id);
1558 else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET) 1577 else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1559 err = c->sync->incr_user_syncpt(c->sync, &incr_cmd, 1578 err = c->sync->incr_user_syncpt(c->sync, &incr_cmd,
1560 &c->last_submit_fence, 1579 &post_fence,
1561 need_wfi, 1580 need_wfi,
1562 &fence->syncpt_id, 1581 &fence->syncpt_id,
1563 &fence->value); 1582 &fence->value);
1564 else 1583 else
1565 err = c->sync->incr(c->sync, &incr_cmd, 1584 err = c->sync->incr(c->sync, &incr_cmd,
1566 &c->last_submit_fence); 1585 &post_fence);
1567 if (err) { 1586 if (err) {
1568 mutex_unlock(&c->submit_lock); 1587 mutex_unlock(&c->submit_lock);
1569 goto clean_up; 1588 goto clean_up;
@@ -1611,8 +1630,13 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1611 incr_cmd->gp_put = c->gpfifo.put; 1630 incr_cmd->gp_put = c->gpfifo.put;
1612 } 1631 }
1613 1632
1633 gk20a_channel_fence_close(&c->last_submit.pre_fence);
1634 gk20a_channel_fence_close(&c->last_submit.post_fence);
1635 c->last_submit.pre_fence = pre_fence;
1636 c->last_submit.post_fence = post_fence;
1637
1614 /* TODO! Check for errors... */ 1638 /* TODO! Check for errors... */
1615 gk20a_channel_add_job(c, &c->last_submit_fence); 1639 gk20a_channel_add_job(c, &pre_fence, &post_fence);
1616 1640
1617 c->cmds_pending = true; 1641 c->cmds_pending = true;
1618 gk20a_bar1_writel(g, 1642 gk20a_bar1_writel(g,
@@ -1637,6 +1661,8 @@ clean_up:
1637 gk20a_err(d, "fail"); 1661 gk20a_err(d, "fail");
1638 free_priv_cmdbuf(c, wait_cmd); 1662 free_priv_cmdbuf(c, wait_cmd);
1639 free_priv_cmdbuf(c, incr_cmd); 1663 free_priv_cmdbuf(c, incr_cmd);
1664 gk20a_channel_fence_close(&pre_fence);
1665 gk20a_channel_fence_close(&post_fence);
1640 gk20a_idle(g->dev); 1666 gk20a_idle(g->dev);
1641 return err; 1667 return err;
1642} 1668}
@@ -1669,6 +1695,7 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1669int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout) 1695int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
1670{ 1696{
1671 int err = 0; 1697 int err = 0;
1698 struct gk20a_channel_fence *fence = &ch->last_submit.post_fence;
1672 1699
1673 if (!ch->cmds_pending) 1700 if (!ch->cmds_pending)
1674 return 0; 1701 return 0;
@@ -1677,21 +1704,20 @@ int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
1677 if (ch->has_timedout) 1704 if (ch->has_timedout)
1678 return -ETIMEDOUT; 1705 return -ETIMEDOUT;
1679 1706
1680 if (!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)) { 1707 if (!(fence->valid && fence->wfi)) {
1681 gk20a_dbg_fn("issuing wfi, incr to finish the channel"); 1708 gk20a_dbg_fn("issuing wfi, incr to finish the channel");
1682 err = gk20a_channel_submit_wfi(ch); 1709 err = gk20a_channel_submit_wfi(ch);
1683 } 1710 }
1684 if (err) 1711 if (err)
1685 return err; 1712 return err;
1686 1713
1687 BUG_ON(!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)); 1714 BUG_ON(!(fence->valid && fence->wfi));
1688 1715
1689 gk20a_dbg_fn("waiting for channel to finish thresh:%d", 1716 gk20a_dbg_fn("waiting for channel to finish thresh:%d sema:%p",
1690 ch->last_submit_fence.thresh); 1717 fence->thresh, fence->semaphore);
1691 1718
1692 if (ch->sync) { 1719 if (ch->sync) {
1693 err = ch->sync->wait_cpu(ch->sync, &ch->last_submit_fence, 1720 err = ch->sync->wait_cpu(ch->sync, fence, timeout);
1694 timeout);
1695 if (WARN_ON(err)) 1721 if (WARN_ON(err))
1696 dev_warn(dev_from_gk20a(ch->g), 1722 dev_warn(dev_from_gk20a(ch->g),
1697 "timed out waiting for gk20a channel to finish"); 1723 "timed out waiting for gk20a channel to finish");
@@ -1900,7 +1926,8 @@ int gk20a_channel_suspend(struct gk20a *g)
1900 1926
1901 if (c->sync) 1927 if (c->sync)
1902 c->sync->wait_cpu(c->sync, 1928 c->sync->wait_cpu(c->sync,
1903 &c->last_submit_fence, 500000); 1929 &c->last_submit.post_fence,
1930 500000);
1904 break; 1931 break;
1905 } 1932 }
1906 } 1933 }
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index dd0197d6..84983cc6 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -68,7 +68,8 @@ struct channel_ctx_gk20a {
68struct channel_gk20a_job { 68struct channel_gk20a_job {
69 struct mapped_buffer_node **mapped_buffers; 69 struct mapped_buffer_node **mapped_buffers;
70 int num_mapped_buffers; 70 int num_mapped_buffers;
71 struct gk20a_channel_fence fence; 71 struct gk20a_channel_fence pre_fence;
72 struct gk20a_channel_fence post_fence;
72 struct list_head list; 73 struct list_head list;
73}; 74};
74 75
@@ -112,7 +113,10 @@ struct channel_gk20a {
112 u32 timeout_gpfifo_get; 113 u32 timeout_gpfifo_get;
113 114
114 bool cmds_pending; 115 bool cmds_pending;
115 struct gk20a_channel_fence last_submit_fence; 116 struct {
117 struct gk20a_channel_fence pre_fence;
118 struct gk20a_channel_fence post_fence;
119 } last_submit;
116 120
117 void (*remove_support)(struct channel_gk20a *); 121 void (*remove_support)(struct channel_gk20a *);
118#if defined(CONFIG_GK20A_CYCLE_STATS) 122#if defined(CONFIG_GK20A_CYCLE_STATS)
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index f91dd52d..677c4b49 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -19,6 +19,9 @@
19 19
20#include "channel_sync_gk20a.h" 20#include "channel_sync_gk20a.h"
21#include "gk20a.h" 21#include "gk20a.h"
22#include "semaphore_gk20a.h"
23#include "sync_gk20a.h"
24#include "mm_gk20a.h"
22 25
23#ifdef CONFIG_SYNC 26#ifdef CONFIG_SYNC
24#include "../../../staging/android/sync.h" 27#include "../../../staging/android/sync.h"
@@ -74,7 +77,8 @@ bool gk20a_channel_syncpt_is_expired(struct gk20a_channel_sync *s,
74} 77}
75 78
76int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, u32 id, 79int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, u32 id,
77 u32 thresh, struct priv_cmd_entry **entry) 80 u32 thresh, struct priv_cmd_entry **entry,
81 struct gk20a_channel_fence *fence)
78{ 82{
79 struct gk20a_channel_syncpt *sp = 83 struct gk20a_channel_syncpt *sp =
80 container_of(s, struct gk20a_channel_syncpt, ops); 84 container_of(s, struct gk20a_channel_syncpt, ops);
@@ -99,11 +103,13 @@ int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, u32 id,
99 add_wait_cmd(&wait_cmd->ptr[0], id, thresh); 103 add_wait_cmd(&wait_cmd->ptr[0], id, thresh);
100 104
101 *entry = wait_cmd; 105 *entry = wait_cmd;
106 fence->valid = false;
102 return 0; 107 return 0;
103} 108}
104 109
105int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, 110int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
106 struct priv_cmd_entry **entry) 111 struct priv_cmd_entry **entry,
112 struct gk20a_channel_fence *fence)
107{ 113{
108#ifdef CONFIG_SYNC 114#ifdef CONFIG_SYNC
109 int i; 115 int i;
@@ -158,6 +164,7 @@ int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
158 sync_fence_put(sync_fence); 164 sync_fence_put(sync_fence);
159 165
160 *entry = wait_cmd; 166 *entry = wait_cmd;
167 fence->valid = false;
161 return 0; 168 return 0;
162#else 169#else
163 return -ENODEV; 170 return -ENODEV;
@@ -301,6 +308,7 @@ int gk20a_channel_syncpt_incr_user_syncpt(struct gk20a_channel_sync *s,
301} 308}
302 309
303int gk20a_channel_syncpt_incr_user_fd(struct gk20a_channel_sync *s, 310int gk20a_channel_syncpt_incr_user_fd(struct gk20a_channel_sync *s,
311 int wait_fence_fd,
304 struct priv_cmd_entry **entry, 312 struct priv_cmd_entry **entry,
305 struct gk20a_channel_fence *fence, 313 struct gk20a_channel_fence *fence,
306 bool wfi, 314 bool wfi,
@@ -366,18 +374,424 @@ gk20a_channel_syncpt_create(struct channel_gk20a *c)
366 sp->ops.set_min_eq_max = gk20a_channel_syncpt_set_min_eq_max; 374 sp->ops.set_min_eq_max = gk20a_channel_syncpt_set_min_eq_max;
367 sp->ops.destroy = gk20a_channel_syncpt_destroy; 375 sp->ops.destroy = gk20a_channel_syncpt_destroy;
368 376
369 sp->ops.syncpt_aggressive_destroy = true; 377 sp->ops.aggressive_destroy = true;
370 378
371 return &sp->ops; 379 return &sp->ops;
372} 380}
373#endif /* CONFIG_TEGRA_GK20A */ 381#endif /* CONFIG_TEGRA_GK20A */
374 382
383struct gk20a_channel_semaphore {
384 struct gk20a_channel_sync ops;
385 struct channel_gk20a *c;
386
387 /* A semaphore pool owned by this channel. */
388 struct gk20a_semaphore_pool *pool;
389
390 /* A sync timeline that advances when gpu completes work. */
391 struct sync_timeline *timeline;
392};
393
394#ifdef CONFIG_SYNC
395struct wait_fence_work {
396 struct sync_fence_waiter waiter;
397 struct channel_gk20a *ch;
398 struct gk20a_semaphore *sema;
399};
400
401static void gk20a_channel_semaphore_launcher(
402 struct sync_fence *fence,
403 struct sync_fence_waiter *waiter)
404{
405 int err;
406 struct wait_fence_work *w =
407 container_of(waiter, struct wait_fence_work, waiter);
408 struct gk20a *g = w->ch->g;
409
410 gk20a_dbg_info("waiting for pre fence %p '%s'",
411 fence, fence->name);
412 err = sync_fence_wait(fence, -1);
413 if (err < 0)
414 dev_err(&g->dev->dev, "error waiting pre-fence: %d\n", err);
415
416 gk20a_dbg_info(
417 "wait completed (%d) for fence %p '%s', triggering gpu work",
418 err, fence, fence->name);
419 sync_fence_put(fence);
420 gk20a_semaphore_release(w->sema);
421 gk20a_semaphore_put(w->sema);
422 kfree(w);
423}
424#endif
425
426static int add_sema_cmd(u32 *ptr, u64 sema, u32 payload,
427 bool acquire, bool wfi)
428{
429 int i = 0;
430 /* semaphore_a */
431 ptr[i++] = 0x20010004;
432 /* offset_upper */
433 ptr[i++] = (sema >> 32) & 0xff;
434 /* semaphore_b */
435 ptr[i++] = 0x20010005;
436 /* offset */
437 ptr[i++] = sema & 0xffffffff;
438 /* semaphore_c */
439 ptr[i++] = 0x20010006;
440 /* payload */
441 ptr[i++] = payload;
442 if (acquire) {
443 /* semaphore_d */
444 ptr[i++] = 0x20010007;
445 /* operation: acq_geq, switch_en */
446 ptr[i++] = 0x4 | (0x1 << 12);
447 } else {
448 /* semaphore_d */
449 ptr[i++] = 0x20010007;
450 /* operation: release, wfi */
451 ptr[i++] = 0x2 | ((wfi ? 0x0 : 0x1) << 20);
452 /* non_stall_int */
453 ptr[i++] = 0x20010008;
454 /* ignored */
455 ptr[i++] = 0;
456 }
457 return i;
458}
459
460static int gk20a_channel_semaphore_wait_cpu(
461 struct gk20a_channel_sync *s,
462 struct gk20a_channel_fence *fence,
463 int timeout)
464{
465 int remain;
466 struct gk20a_channel_semaphore *sp =
467 container_of(s, struct gk20a_channel_semaphore, ops);
468 if (!fence->valid || WARN_ON(!fence->semaphore))
469 return 0;
470
471 remain = wait_event_interruptible_timeout(
472 sp->c->semaphore_wq,
473 !gk20a_semaphore_is_acquired(fence->semaphore),
474 timeout);
475 if (remain == 0 && gk20a_semaphore_is_acquired(fence->semaphore))
476 return -ETIMEDOUT;
477 else if (remain < 0)
478 return remain;
479 return 0;
480}
481
482static bool gk20a_channel_semaphore_is_expired(
483 struct gk20a_channel_sync *s,
484 struct gk20a_channel_fence *fence)
485{
486 bool expired;
487 struct gk20a_channel_semaphore *sp =
488 container_of(s, struct gk20a_channel_semaphore, ops);
489 if (!fence->valid || WARN_ON(!fence->semaphore))
490 return true;
491
492 expired = !gk20a_semaphore_is_acquired(fence->semaphore);
493 if (expired)
494 gk20a_sync_timeline_signal(sp->timeline);
495 return expired;
496}
497
498static int gk20a_channel_semaphore_wait_syncpt(
499 struct gk20a_channel_sync *s, u32 id,
500 u32 thresh, struct priv_cmd_entry **entry,
501 struct gk20a_channel_fence *fence)
502{
503 struct gk20a_channel_semaphore *sema =
504 container_of(s, struct gk20a_channel_semaphore, ops);
505 struct device *dev = dev_from_gk20a(sema->c->g);
506 gk20a_err(dev, "trying to use syncpoint synchronization");
507 return -ENODEV;
508}
509
510static int gk20a_channel_semaphore_wait_fd(
511 struct gk20a_channel_sync *s, int fd,
512 struct priv_cmd_entry **entry,
513 struct gk20a_channel_fence *fence)
514{
515 struct gk20a_channel_semaphore *sema =
516 container_of(s, struct gk20a_channel_semaphore, ops);
517 struct channel_gk20a *c = sema->c;
518#ifdef CONFIG_SYNC
519 struct sync_fence *sync_fence;
520 struct priv_cmd_entry *wait_cmd = NULL;
521 struct wait_fence_work *w;
522 int written;
523 int err;
524 u64 va;
525
526 sync_fence = gk20a_sync_fence_fdget(fd);
527 if (!sync_fence)
528 return -EINVAL;
529
530 w = kzalloc(sizeof(*w), GFP_KERNEL);
531 if (!w) {
532 err = -ENOMEM;
533 goto fail;
534 }
535 sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher);
536 w->ch = c;
537 w->sema = gk20a_semaphore_alloc(sema->pool);
538 if (!w->sema) {
539 gk20a_err(dev_from_gk20a(c->g), "ran out of semaphores");
540 err = -EAGAIN;
541 goto fail;
542 }
543
544 gk20a_channel_alloc_priv_cmdbuf(c, 8, &wait_cmd);
545 if (wait_cmd == NULL) {
546 gk20a_err(dev_from_gk20a(c->g),
547 "not enough priv cmd buffer space");
548 err = -EAGAIN;
549 goto fail;
550 }
551
552 va = gk20a_semaphore_gpu_va(w->sema, c->vm);
553 /* GPU unblocked when when the semaphore value becomes 1. */
554 written = add_sema_cmd(wait_cmd->ptr, va, 1, true, false);
555 WARN_ON(written != wait_cmd->size);
556 sync_fence_wait_async(sync_fence, &w->waiter);
557
558 *entry = wait_cmd;
559 return 0;
560fail:
561 if (w && w->sema)
562 gk20a_semaphore_put(w->sema);
563 kfree(w);
564 sync_fence_put(sync_fence);
565 return err;
566#else
567 gk20a_err(dev_from_gk20a(c->g),
568 "trying to use sync fds with CONFIG_SYNC disabled");
569 return -ENODEV;
570#endif
571}
572
573static int __gk20a_channel_semaphore_incr(
574 struct gk20a_channel_sync *s, bool wfi_cmd,
575 struct priv_cmd_entry **entry,
576 struct gk20a_channel_fence *fence)
577{
578 u64 va;
579 int incr_cmd_size;
580 int written;
581 struct priv_cmd_entry *incr_cmd = NULL;
582 struct gk20a_channel_semaphore *sp =
583 container_of(s, struct gk20a_channel_semaphore, ops);
584 struct channel_gk20a *c = sp->c;
585 struct gk20a_semaphore *semaphore;
586
587 semaphore = gk20a_semaphore_alloc(sp->pool);
588 if (!semaphore) {
589 gk20a_err(dev_from_gk20a(c->g),
590 "ran out of semaphores");
591 return -EAGAIN;
592 }
593
594 incr_cmd_size = 10;
595 gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd);
596 if (incr_cmd == NULL) {
597 gk20a_err(dev_from_gk20a(c->g),
598 "not enough priv cmd buffer space");
599 gk20a_semaphore_put(semaphore);
600 return -EAGAIN;
601 }
602
603 /* Release the completion semaphore. */
604 va = gk20a_semaphore_gpu_va(semaphore, c->vm);
605 written = add_sema_cmd(incr_cmd->ptr, va, 1, false, wfi_cmd);
606 WARN_ON(written != incr_cmd_size);
607
608 fence->valid = true;
609 fence->wfi = wfi_cmd;
610 fence->semaphore = semaphore;
611 *entry = incr_cmd;
612 return 0;
613}
614
615static int gk20a_channel_semaphore_incr_wfi(
616 struct gk20a_channel_sync *s,
617 struct priv_cmd_entry **entry,
618 struct gk20a_channel_fence *fence)
619{
620 return __gk20a_channel_semaphore_incr(s,
621 true /* wfi */,
622 entry, fence);
623}
624
625static int gk20a_channel_semaphore_incr(
626 struct gk20a_channel_sync *s,
627 struct priv_cmd_entry **entry,
628 struct gk20a_channel_fence *fence)
629{
630 /* Don't put wfi cmd to this one since we're not returning
631 * a fence to user space. */
632 return __gk20a_channel_semaphore_incr(s, false /* no wfi */,
633 entry, fence);
634}
635
636static int gk20a_channel_semaphore_incr_user_syncpt(
637 struct gk20a_channel_sync *s,
638 struct priv_cmd_entry **entry,
639 struct gk20a_channel_fence *fence,
640 bool wfi,
641 u32 *id, u32 *thresh)
642{
643 struct gk20a_channel_semaphore *sema =
644 container_of(s, struct gk20a_channel_semaphore, ops);
645 struct device *dev = dev_from_gk20a(sema->c->g);
646 gk20a_err(dev, "trying to use syncpoint synchronization");
647 return -ENODEV;
648}
649
650static int gk20a_channel_semaphore_incr_user_fd(
651 struct gk20a_channel_sync *s,
652 int wait_fence_fd,
653 struct priv_cmd_entry **entry,
654 struct gk20a_channel_fence *fence,
655 bool wfi,
656 int *fd)
657{
658 struct gk20a_channel_semaphore *sema =
659 container_of(s, struct gk20a_channel_semaphore, ops);
660#ifdef CONFIG_SYNC
661 struct sync_fence *dependency = NULL;
662 int err;
663
664 err = __gk20a_channel_semaphore_incr(s, wfi,
665 entry, fence);
666 if (err)
667 return err;
668
669 if (wait_fence_fd >= 0) {
670 dependency = gk20a_sync_fence_fdget(wait_fence_fd);
671 if (!dependency)
672 return -EINVAL;
673 }
674
675 *fd = gk20a_sync_fence_create(sema->timeline, fence->semaphore,
676 dependency, "fence");
677 if (*fd < 0) {
678 if (dependency)
679 sync_fence_put(dependency);
680 return *fd;
681 }
682 return 0;
683#else
684 gk20a_err(dev_from_gk20a(sema->c->g),
685 "trying to use sync fds with CONFIG_SYNC disabled");
686 return -ENODEV;
687#endif
688}
689
690static void gk20a_channel_semaphore_set_min_eq_max(struct gk20a_channel_sync *s)
691{
692 /* Nothing to do. */
693}
694
695static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s)
696{
697 struct gk20a_channel_semaphore *sema =
698 container_of(s, struct gk20a_channel_semaphore, ops);
699 if (sema->timeline)
700 gk20a_sync_timeline_destroy(sema->timeline);
701 if (sema->pool) {
702 gk20a_semaphore_pool_unmap(sema->pool, sema->c->vm);
703 gk20a_semaphore_pool_put(sema->pool);
704 }
705 kfree(sema);
706}
707
708static struct gk20a_channel_sync *
709gk20a_channel_semaphore_create(struct channel_gk20a *c)
710{
711 int err;
712 int asid = -1;
713 struct gk20a_channel_semaphore *sema;
714 char pool_name[20];
715
716 if (WARN_ON(!c->vm))
717 return NULL;
718
719 sema = kzalloc(sizeof(*sema), GFP_KERNEL);
720 if (!sema)
721 return NULL;
722 sema->c = c;
723
724 if (c->vm->as_share)
725 asid = c->vm->as_share->id;
726
727 /* A pool of 256 semaphores fits into one 4k page. */
728 sprintf(pool_name, "semaphore_pool-%d", c->hw_chid);
729 sema->pool = gk20a_semaphore_pool_alloc(dev_from_gk20a(c->g),
730 pool_name, 256);
731 if (!sema->pool)
732 goto clean_up;
733
734 /* Map the semaphore pool to the channel vm. Map as read-write to the
735 * owner channel (all other channels should map as read only!). */
736 err = gk20a_semaphore_pool_map(sema->pool, c->vm, gk20a_mem_flag_none);
737 if (err)
738 goto clean_up;
739
740#ifdef CONFIG_SYNC
741 sema->timeline = gk20a_sync_timeline_create(
742 "gk20a_ch%d_as%d", c->hw_chid, asid);
743 if (!sema->timeline)
744 goto clean_up;
745#endif
746 sema->ops.wait_cpu = gk20a_channel_semaphore_wait_cpu;
747 sema->ops.is_expired = gk20a_channel_semaphore_is_expired;
748 sema->ops.wait_syncpt = gk20a_channel_semaphore_wait_syncpt;
749 sema->ops.wait_fd = gk20a_channel_semaphore_wait_fd;
750 sema->ops.incr = gk20a_channel_semaphore_incr;
751 sema->ops.incr_wfi = gk20a_channel_semaphore_incr_wfi;
752 sema->ops.incr_user_syncpt = gk20a_channel_semaphore_incr_user_syncpt;
753 sema->ops.incr_user_fd = gk20a_channel_semaphore_incr_user_fd;
754 sema->ops.set_min_eq_max = gk20a_channel_semaphore_set_min_eq_max;
755 sema->ops.destroy = gk20a_channel_semaphore_destroy;
756
757 /* Aggressively destroying the semaphore sync would cause overhead
758 * since the pool needs to be mapped to GMMU. */
759 sema->ops.aggressive_destroy = false;
760
761 return &sema->ops;
762clean_up:
763 gk20a_channel_semaphore_destroy(&sema->ops);
764 return NULL;
765}
766
375struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c) 767struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c)
376{ 768{
377#ifdef CONFIG_TEGRA_GK20A 769#ifdef CONFIG_TEGRA_GK20A
378 if (gk20a_platform_has_syncpoints(c->g->dev)) 770 if (gk20a_platform_has_syncpoints(c->g->dev))
379 return gk20a_channel_syncpt_create(c); 771 return gk20a_channel_syncpt_create(c);
380#endif 772#endif
381 WARN_ON(1); 773 return gk20a_channel_semaphore_create(c);
382 return NULL; 774}
775
776static inline bool gk20a_channel_fence_is_closed(struct gk20a_channel_fence *f)
777{
778 if (f->valid || f->semaphore)
779 return false;
780 return true;
781}
782
783void gk20a_channel_fence_close(struct gk20a_channel_fence *f)
784{
785 if (f->semaphore)
786 gk20a_semaphore_put(f->semaphore);
787 memset(f, 0, sizeof(*f));
788}
789
790void gk20a_channel_fence_dup(struct gk20a_channel_fence *from,
791 struct gk20a_channel_fence *to)
792{
793 WARN_ON(!gk20a_channel_fence_is_closed(to));
794 *to = *from;
795 if (to->semaphore)
796 gk20a_semaphore_get(to->semaphore);
383} 797}
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
index 90b61bfd..baa4a151 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -23,11 +23,13 @@
23struct gk20a_channel_sync; 23struct gk20a_channel_sync;
24struct priv_cmd_entry; 24struct priv_cmd_entry;
25struct channel_gk20a; 25struct channel_gk20a;
26struct gk20a_semaphore;
26 27
27struct gk20a_channel_fence { 28struct gk20a_channel_fence {
28 bool valid; 29 bool valid;
29 bool wfi; /* was issued with preceding wfi */ 30 bool wfi; /* was issued with preceding wfi */
30 u32 thresh; /* either semaphore or syncpoint value */ 31 u32 thresh; /* syncpoint fences only */
32 struct gk20a_semaphore *semaphore; /* semaphore fences only */
31}; 33};
32 34
33struct gk20a_channel_sync { 35struct gk20a_channel_sync {
@@ -43,11 +45,13 @@ struct gk20a_channel_sync {
43 45
44 /* Generate a gpu wait cmdbuf from syncpoint. */ 46 /* Generate a gpu wait cmdbuf from syncpoint. */
45 int (*wait_syncpt)(struct gk20a_channel_sync *s, u32 id, u32 thresh, 47 int (*wait_syncpt)(struct gk20a_channel_sync *s, u32 id, u32 thresh,
46 struct priv_cmd_entry **entry); 48 struct priv_cmd_entry **entry,
49 struct gk20a_channel_fence *fence);
47 50
48 /* Generate a gpu wait cmdbuf from sync fd. */ 51 /* Generate a gpu wait cmdbuf from sync fd. */
49 int (*wait_fd)(struct gk20a_channel_sync *s, int fd, 52 int (*wait_fd)(struct gk20a_channel_sync *s, int fd,
50 struct priv_cmd_entry **entry); 53 struct priv_cmd_entry **entry,
54 struct gk20a_channel_fence *fence);
51 55
52 /* Increment syncpoint/semaphore. 56 /* Increment syncpoint/semaphore.
53 * Returns 57 * Returns
@@ -88,6 +92,7 @@ struct gk20a_channel_sync {
88 * - a sync fd that can be returned to user space. 92 * - a sync fd that can be returned to user space.
89 */ 93 */
90 int (*incr_user_fd)(struct gk20a_channel_sync *s, 94 int (*incr_user_fd)(struct gk20a_channel_sync *s,
95 int wait_fence_fd,
91 struct priv_cmd_entry **entry, 96 struct priv_cmd_entry **entry,
92 struct gk20a_channel_fence *fence, 97 struct gk20a_channel_fence *fence,
93 bool wfi, 98 bool wfi,
@@ -96,12 +101,16 @@ struct gk20a_channel_sync {
96 /* Reset the channel syncpoint/semaphore. */ 101 /* Reset the channel syncpoint/semaphore. */
97 void (*set_min_eq_max)(struct gk20a_channel_sync *s); 102 void (*set_min_eq_max)(struct gk20a_channel_sync *s);
98 103
99 /* flag to set syncpt destroy aggressiveness */ 104 /* flag to set sync destroy aggressiveness */
100 bool syncpt_aggressive_destroy; 105 bool aggressive_destroy;
101 106
102 /* Free the resources allocated by gk20a_channel_sync_create. */ 107 /* Free the resources allocated by gk20a_channel_sync_create. */
103 void (*destroy)(struct gk20a_channel_sync *s); 108 void (*destroy)(struct gk20a_channel_sync *s);
104}; 109};
105 110
106struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c); 111struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c);
112
113void gk20a_channel_fence_close(struct gk20a_channel_fence *f);
114void gk20a_channel_fence_dup(struct gk20a_channel_fence *from,
115 struct gk20a_channel_fence *to);
107#endif 116#endif