summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu
diff options
context:
space:
mode:
authorLauri Peltonen <lpeltonen@nvidia.com>2014-02-25 06:31:47 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:10:08 -0400
commite204224b26e6b5f609bc4e542368c1a13aeece61 (patch)
tree9d351eb734a54ff677a2f26fec2d6f96adc1e220 /drivers/gpu/nvgpu
parent4d278fdfd70082da3c020a15ba0dd722f9da1b3b (diff)
gpu: nvgpu: Add semaphore based gk20a_channel_sync
Add semaphore implementation of the gk20a_channel_sync interface. Each channel has one semaphore pool, which is mapped as read-write to the channel vm. We allocate one or two semaphores from the pool for each submit. The first semaphore is only needed if we need to wait for an opaque sync fd. In that case, we allocate the semaphore, and ask GPU to wait for it's value to become 1 (semaphore acquire method). We also queue a kernel work that waits on the fence fd, and subsequently releases the semaphore (sets its value to 1) so that the command buffer can proceed. The second semaphore is used on every submit, and is used for work completion tracking. The GPU sets its value to 1 when the command buffer has been processed. The channel jobs need to hold references to both semaphores so that their backing semaphore pool slots are not reused while the job is in flight. Therefore gk20a_channel_fence will keep a reference to the semaphore that it represents (channel fences are stored in the job structure). This means that we must diligently close and dup the gk20a_channel_fence objects to avoid leaking semaphores. Bug 1450122 Bug 1445450 Change-Id: Ib61091a1b7632fa36efe0289011040ef7c4ae8f8 Signed-off-by: Lauri Peltonen <lpeltonen@nvidia.com> Reviewed-on: http://git-master/r/374844 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c79
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h8
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c424
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h19
4 files changed, 492 insertions, 38 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 68a30392..651ea08c 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -33,6 +33,7 @@
33 33
34#include "gk20a.h" 34#include "gk20a.h"
35#include "dbg_gpu_gk20a.h" 35#include "dbg_gpu_gk20a.h"
36#include "semaphore_gk20a.h"
36 37
37#include "hw_ram_gk20a.h" 38#include "hw_ram_gk20a.h"
38#include "hw_fifo_gk20a.h" 39#include "hw_fifo_gk20a.h"
@@ -340,7 +341,7 @@ static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
340 * resource at this point 341 * resource at this point
341 * if not, then it will be destroyed at channel_free() 342 * if not, then it will be destroyed at channel_free()
342 */ 343 */
343 if (ch_gk20a->sync && ch_gk20a->sync->syncpt_aggressive_destroy) { 344 if (ch_gk20a->sync && ch_gk20a->sync->aggressive_destroy) {
344 ch_gk20a->sync->destroy(ch_gk20a->sync); 345 ch_gk20a->sync->destroy(ch_gk20a->sync);
345 ch_gk20a->sync = NULL; 346 ch_gk20a->sync = NULL;
346 } 347 }
@@ -657,6 +658,8 @@ unbind:
657 ch->vpr = false; 658 ch->vpr = false;
658 ch->vm = NULL; 659 ch->vm = NULL;
659 660
661 gk20a_channel_fence_close(&ch->last_submit.pre_fence);
662 gk20a_channel_fence_close(&ch->last_submit.post_fence);
660 if (ch->sync) { 663 if (ch->sync) {
661 ch->sync->destroy(ch->sync); 664 ch->sync->destroy(ch->sync);
662 ch->sync = NULL; 665 ch->sync = NULL;
@@ -1089,7 +1092,8 @@ static int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1089 ch_vm = c->vm; 1092 ch_vm = c->vm;
1090 1093
1091 c->cmds_pending = false; 1094 c->cmds_pending = false;
1092 c->last_submit_fence.valid = false; 1095 gk20a_channel_fence_close(&c->last_submit.pre_fence);
1096 gk20a_channel_fence_close(&c->last_submit.post_fence);
1093 1097
1094 c->ramfc.offset = 0; 1098 c->ramfc.offset = 0;
1095 c->ramfc.size = ram_in_ramfc_s() / 8; 1099 c->ramfc.size = ram_in_ramfc_s() / 8;
@@ -1272,13 +1276,16 @@ static int gk20a_channel_submit_wfi(struct channel_gk20a *c)
1272 } 1276 }
1273 } 1277 }
1274 1278
1275 err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit_fence); 1279 gk20a_channel_fence_close(&c->last_submit.pre_fence);
1280 gk20a_channel_fence_close(&c->last_submit.post_fence);
1281
1282 err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit.post_fence);
1276 if (unlikely(err)) { 1283 if (unlikely(err)) {
1277 mutex_unlock(&c->submit_lock); 1284 mutex_unlock(&c->submit_lock);
1278 return err; 1285 return err;
1279 } 1286 }
1280 1287
1281 WARN_ON(!c->last_submit_fence.wfi); 1288 WARN_ON(!c->last_submit.post_fence.wfi);
1282 1289
1283 c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva); 1290 c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva);
1284 c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) | 1291 c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) |
@@ -1344,7 +1351,8 @@ static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g)
1344} 1351}
1345 1352
1346static int gk20a_channel_add_job(struct channel_gk20a *c, 1353static int gk20a_channel_add_job(struct channel_gk20a *c,
1347 struct gk20a_channel_fence *fence) 1354 struct gk20a_channel_fence *pre_fence,
1355 struct gk20a_channel_fence *post_fence)
1348{ 1356{
1349 struct vm_gk20a *vm = c->vm; 1357 struct vm_gk20a *vm = c->vm;
1350 struct channel_gk20a_job *job = NULL; 1358 struct channel_gk20a_job *job = NULL;
@@ -1369,7 +1377,8 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
1369 1377
1370 job->num_mapped_buffers = num_mapped_buffers; 1378 job->num_mapped_buffers = num_mapped_buffers;
1371 job->mapped_buffers = mapped_buffers; 1379 job->mapped_buffers = mapped_buffers;
1372 job->fence = *fence; 1380 gk20a_channel_fence_dup(pre_fence, &job->pre_fence);
1381 gk20a_channel_fence_dup(post_fence, &job->post_fence);
1373 1382
1374 mutex_lock(&c->jobs_lock); 1383 mutex_lock(&c->jobs_lock);
1375 list_add_tail(&job->list, &c->jobs); 1384 list_add_tail(&job->list, &c->jobs);
@@ -1391,13 +1400,18 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1391 mutex_lock(&c->jobs_lock); 1400 mutex_lock(&c->jobs_lock);
1392 list_for_each_entry_safe(job, n, &c->jobs, list) { 1401 list_for_each_entry_safe(job, n, &c->jobs, list) {
1393 bool completed = WARN_ON(!c->sync) || 1402 bool completed = WARN_ON(!c->sync) ||
1394 c->sync->is_expired(c->sync, &job->fence); 1403 c->sync->is_expired(c->sync, &job->post_fence);
1395 if (!completed) 1404 if (!completed)
1396 break; 1405 break;
1397 1406
1398 gk20a_vm_put_buffers(vm, job->mapped_buffers, 1407 gk20a_vm_put_buffers(vm, job->mapped_buffers,
1399 job->num_mapped_buffers); 1408 job->num_mapped_buffers);
1400 1409
1410 /* Close the fences (this will unref the semaphores and release
1411 * them to the pool). */
1412 gk20a_channel_fence_close(&job->pre_fence);
1413 gk20a_channel_fence_close(&job->post_fence);
1414
1401 /* job is done. release its reference to vm */ 1415 /* job is done. release its reference to vm */
1402 gk20a_vm_put(vm); 1416 gk20a_vm_put(vm);
1403 1417
@@ -1413,8 +1427,8 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1413 * the sync resource 1427 * the sync resource
1414 */ 1428 */
1415 if (list_empty(&c->jobs)) { 1429 if (list_empty(&c->jobs)) {
1416 if (c->sync && c->sync->syncpt_aggressive_destroy && 1430 if (c->sync && c->sync->aggressive_destroy &&
1417 c->sync->is_expired(c->sync, &c->last_submit_fence)) { 1431 c->sync->is_expired(c->sync, &c->last_submit.post_fence)) {
1418 c->sync->destroy(c->sync); 1432 c->sync->destroy(c->sync);
1419 c->sync = NULL; 1433 c->sync = NULL;
1420 } 1434 }
@@ -1448,8 +1462,11 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1448 struct device *d = dev_from_gk20a(g); 1462 struct device *d = dev_from_gk20a(g);
1449 int err = 0; 1463 int err = 0;
1450 int i; 1464 int i;
1465 int wait_fence_fd = -1;
1451 struct priv_cmd_entry *wait_cmd = NULL; 1466 struct priv_cmd_entry *wait_cmd = NULL;
1452 struct priv_cmd_entry *incr_cmd = NULL; 1467 struct priv_cmd_entry *incr_cmd = NULL;
1468 struct gk20a_channel_fence pre_fence = { 0 };
1469 struct gk20a_channel_fence post_fence = { 0 };
1453 /* we might need two extra gpfifo entries - one for pre fence 1470 /* we might need two extra gpfifo entries - one for pre fence
1454 * and one for post fence. */ 1471 * and one for post fence. */
1455 const int extra_entries = 2; 1472 const int extra_entries = 2;
@@ -1534,12 +1551,14 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1534 * keep running some tests which trigger this condition 1551 * keep running some tests which trigger this condition
1535 */ 1552 */
1536 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) { 1553 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1537 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) 1554 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
1538 err = c->sync->wait_fd(c->sync, fence->syncpt_id, 1555 wait_fence_fd = fence->syncpt_id;
1539 &wait_cmd); 1556 err = c->sync->wait_fd(c->sync, wait_fence_fd,
1540 else 1557 &wait_cmd, &pre_fence);
1558 } else {
1541 err = c->sync->wait_syncpt(c->sync, fence->syncpt_id, 1559 err = c->sync->wait_syncpt(c->sync, fence->syncpt_id,
1542 fence->value, &wait_cmd); 1560 fence->value, &wait_cmd, &pre_fence);
1561 }
1543 } 1562 }
1544 if (err) { 1563 if (err) {
1545 mutex_unlock(&c->submit_lock); 1564 mutex_unlock(&c->submit_lock);
@@ -1551,19 +1570,19 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1551 to keep track of method completion for idle railgating */ 1570 to keep track of method completion for idle railgating */
1552 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET && 1571 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET &&
1553 flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) 1572 flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
1554 err = c->sync->incr_user_fd(c->sync, &incr_cmd, 1573 err = c->sync->incr_user_fd(c->sync, wait_fence_fd, &incr_cmd,
1555 &c->last_submit_fence, 1574 &post_fence,
1556 need_wfi, 1575 need_wfi,
1557 &fence->syncpt_id); 1576 &fence->syncpt_id);
1558 else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET) 1577 else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1559 err = c->sync->incr_user_syncpt(c->sync, &incr_cmd, 1578 err = c->sync->incr_user_syncpt(c->sync, &incr_cmd,
1560 &c->last_submit_fence, 1579 &post_fence,
1561 need_wfi, 1580 need_wfi,
1562 &fence->syncpt_id, 1581 &fence->syncpt_id,
1563 &fence->value); 1582 &fence->value);
1564 else 1583 else
1565 err = c->sync->incr(c->sync, &incr_cmd, 1584 err = c->sync->incr(c->sync, &incr_cmd,
1566 &c->last_submit_fence); 1585 &post_fence);
1567 if (err) { 1586 if (err) {
1568 mutex_unlock(&c->submit_lock); 1587 mutex_unlock(&c->submit_lock);
1569 goto clean_up; 1588 goto clean_up;
@@ -1611,8 +1630,13 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1611 incr_cmd->gp_put = c->gpfifo.put; 1630 incr_cmd->gp_put = c->gpfifo.put;
1612 } 1631 }
1613 1632
1633 gk20a_channel_fence_close(&c->last_submit.pre_fence);
1634 gk20a_channel_fence_close(&c->last_submit.post_fence);
1635 c->last_submit.pre_fence = pre_fence;
1636 c->last_submit.post_fence = post_fence;
1637
1614 /* TODO! Check for errors... */ 1638 /* TODO! Check for errors... */
1615 gk20a_channel_add_job(c, &c->last_submit_fence); 1639 gk20a_channel_add_job(c, &pre_fence, &post_fence);
1616 1640
1617 c->cmds_pending = true; 1641 c->cmds_pending = true;
1618 gk20a_bar1_writel(g, 1642 gk20a_bar1_writel(g,
@@ -1637,6 +1661,8 @@ clean_up:
1637 gk20a_err(d, "fail"); 1661 gk20a_err(d, "fail");
1638 free_priv_cmdbuf(c, wait_cmd); 1662 free_priv_cmdbuf(c, wait_cmd);
1639 free_priv_cmdbuf(c, incr_cmd); 1663 free_priv_cmdbuf(c, incr_cmd);
1664 gk20a_channel_fence_close(&pre_fence);
1665 gk20a_channel_fence_close(&post_fence);
1640 gk20a_idle(g->dev); 1666 gk20a_idle(g->dev);
1641 return err; 1667 return err;
1642} 1668}
@@ -1669,6 +1695,7 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1669int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout) 1695int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
1670{ 1696{
1671 int err = 0; 1697 int err = 0;
1698 struct gk20a_channel_fence *fence = &ch->last_submit.post_fence;
1672 1699
1673 if (!ch->cmds_pending) 1700 if (!ch->cmds_pending)
1674 return 0; 1701 return 0;
@@ -1677,21 +1704,20 @@ int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
1677 if (ch->has_timedout) 1704 if (ch->has_timedout)
1678 return -ETIMEDOUT; 1705 return -ETIMEDOUT;
1679 1706
1680 if (!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)) { 1707 if (!(fence->valid && fence->wfi)) {
1681 gk20a_dbg_fn("issuing wfi, incr to finish the channel"); 1708 gk20a_dbg_fn("issuing wfi, incr to finish the channel");
1682 err = gk20a_channel_submit_wfi(ch); 1709 err = gk20a_channel_submit_wfi(ch);
1683 } 1710 }
1684 if (err) 1711 if (err)
1685 return err; 1712 return err;
1686 1713
1687 BUG_ON(!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)); 1714 BUG_ON(!(fence->valid && fence->wfi));
1688 1715
1689 gk20a_dbg_fn("waiting for channel to finish thresh:%d", 1716 gk20a_dbg_fn("waiting for channel to finish thresh:%d sema:%p",
1690 ch->last_submit_fence.thresh); 1717 fence->thresh, fence->semaphore);
1691 1718
1692 if (ch->sync) { 1719 if (ch->sync) {
1693 err = ch->sync->wait_cpu(ch->sync, &ch->last_submit_fence, 1720 err = ch->sync->wait_cpu(ch->sync, fence, timeout);
1694 timeout);
1695 if (WARN_ON(err)) 1721 if (WARN_ON(err))
1696 dev_warn(dev_from_gk20a(ch->g), 1722 dev_warn(dev_from_gk20a(ch->g),
1697 "timed out waiting for gk20a channel to finish"); 1723 "timed out waiting for gk20a channel to finish");
@@ -1900,7 +1926,8 @@ int gk20a_channel_suspend(struct gk20a *g)
1900 1926
1901 if (c->sync) 1927 if (c->sync)
1902 c->sync->wait_cpu(c->sync, 1928 c->sync->wait_cpu(c->sync,
1903 &c->last_submit_fence, 500000); 1929 &c->last_submit.post_fence,
1930 500000);
1904 break; 1931 break;
1905 } 1932 }
1906 } 1933 }
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index dd0197d6..84983cc6 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -68,7 +68,8 @@ struct channel_ctx_gk20a {
68struct channel_gk20a_job { 68struct channel_gk20a_job {
69 struct mapped_buffer_node **mapped_buffers; 69 struct mapped_buffer_node **mapped_buffers;
70 int num_mapped_buffers; 70 int num_mapped_buffers;
71 struct gk20a_channel_fence fence; 71 struct gk20a_channel_fence pre_fence;
72 struct gk20a_channel_fence post_fence;
72 struct list_head list; 73 struct list_head list;
73}; 74};
74 75
@@ -112,7 +113,10 @@ struct channel_gk20a {
112 u32 timeout_gpfifo_get; 113 u32 timeout_gpfifo_get;
113 114
114 bool cmds_pending; 115 bool cmds_pending;
115 struct gk20a_channel_fence last_submit_fence; 116 struct {
117 struct gk20a_channel_fence pre_fence;
118 struct gk20a_channel_fence post_fence;
119 } last_submit;
116 120
117 void (*remove_support)(struct channel_gk20a *); 121 void (*remove_support)(struct channel_gk20a *);
118#if defined(CONFIG_GK20A_CYCLE_STATS) 122#if defined(CONFIG_GK20A_CYCLE_STATS)
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index f91dd52d..677c4b49 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -19,6 +19,9 @@
19 19
20#include "channel_sync_gk20a.h" 20#include "channel_sync_gk20a.h"
21#include "gk20a.h" 21#include "gk20a.h"
22#include "semaphore_gk20a.h"
23#include "sync_gk20a.h"
24#include "mm_gk20a.h"
22 25
23#ifdef CONFIG_SYNC 26#ifdef CONFIG_SYNC
24#include "../../../staging/android/sync.h" 27#include "../../../staging/android/sync.h"
@@ -74,7 +77,8 @@ bool gk20a_channel_syncpt_is_expired(struct gk20a_channel_sync *s,
74} 77}
75 78
76int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, u32 id, 79int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, u32 id,
77 u32 thresh, struct priv_cmd_entry **entry) 80 u32 thresh, struct priv_cmd_entry **entry,
81 struct gk20a_channel_fence *fence)
78{ 82{
79 struct gk20a_channel_syncpt *sp = 83 struct gk20a_channel_syncpt *sp =
80 container_of(s, struct gk20a_channel_syncpt, ops); 84 container_of(s, struct gk20a_channel_syncpt, ops);
@@ -99,11 +103,13 @@ int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, u32 id,
99 add_wait_cmd(&wait_cmd->ptr[0], id, thresh); 103 add_wait_cmd(&wait_cmd->ptr[0], id, thresh);
100 104
101 *entry = wait_cmd; 105 *entry = wait_cmd;
106 fence->valid = false;
102 return 0; 107 return 0;
103} 108}
104 109
105int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, 110int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
106 struct priv_cmd_entry **entry) 111 struct priv_cmd_entry **entry,
112 struct gk20a_channel_fence *fence)
107{ 113{
108#ifdef CONFIG_SYNC 114#ifdef CONFIG_SYNC
109 int i; 115 int i;
@@ -158,6 +164,7 @@ int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
158 sync_fence_put(sync_fence); 164 sync_fence_put(sync_fence);
159 165
160 *entry = wait_cmd; 166 *entry = wait_cmd;
167 fence->valid = false;
161 return 0; 168 return 0;
162#else 169#else
163 return -ENODEV; 170 return -ENODEV;
@@ -301,6 +308,7 @@ int gk20a_channel_syncpt_incr_user_syncpt(struct gk20a_channel_sync *s,
301} 308}
302 309
303int gk20a_channel_syncpt_incr_user_fd(struct gk20a_channel_sync *s, 310int gk20a_channel_syncpt_incr_user_fd(struct gk20a_channel_sync *s,
311 int wait_fence_fd,
304 struct priv_cmd_entry **entry, 312 struct priv_cmd_entry **entry,
305 struct gk20a_channel_fence *fence, 313 struct gk20a_channel_fence *fence,
306 bool wfi, 314 bool wfi,
@@ -366,18 +374,424 @@ gk20a_channel_syncpt_create(struct channel_gk20a *c)
366 sp->ops.set_min_eq_max = gk20a_channel_syncpt_set_min_eq_max; 374 sp->ops.set_min_eq_max = gk20a_channel_syncpt_set_min_eq_max;
367 sp->ops.destroy = gk20a_channel_syncpt_destroy; 375 sp->ops.destroy = gk20a_channel_syncpt_destroy;
368 376
369 sp->ops.syncpt_aggressive_destroy = true; 377 sp->ops.aggressive_destroy = true;
370 378
371 return &sp->ops; 379 return &sp->ops;
372} 380}
373#endif /* CONFIG_TEGRA_GK20A */ 381#endif /* CONFIG_TEGRA_GK20A */
374 382
383struct gk20a_channel_semaphore {
384 struct gk20a_channel_sync ops;
385 struct channel_gk20a *c;
386
387 /* A semaphore pool owned by this channel. */
388 struct gk20a_semaphore_pool *pool;
389
390 /* A sync timeline that advances when gpu completes work. */
391 struct sync_timeline *timeline;
392};
393
394#ifdef CONFIG_SYNC
395struct wait_fence_work {
396 struct sync_fence_waiter waiter;
397 struct channel_gk20a *ch;
398 struct gk20a_semaphore *sema;
399};
400
401static void gk20a_channel_semaphore_launcher(
402 struct sync_fence *fence,
403 struct sync_fence_waiter *waiter)
404{
405 int err;
406 struct wait_fence_work *w =
407 container_of(waiter, struct wait_fence_work, waiter);
408 struct gk20a *g = w->ch->g;
409
410 gk20a_dbg_info("waiting for pre fence %p '%s'",
411 fence, fence->name);
412 err = sync_fence_wait(fence, -1);
413 if (err < 0)
414 dev_err(&g->dev->dev, "error waiting pre-fence: %d\n", err);
415
416 gk20a_dbg_info(
417 "wait completed (%d) for fence %p '%s', triggering gpu work",
418 err, fence, fence->name);
419 sync_fence_put(fence);
420 gk20a_semaphore_release(w->sema);
421 gk20a_semaphore_put(w->sema);
422 kfree(w);
423}
424#endif
425
426static int add_sema_cmd(u32 *ptr, u64 sema, u32 payload,
427 bool acquire, bool wfi)
428{
429 int i = 0;
430 /* semaphore_a */
431 ptr[i++] = 0x20010004;
432 /* offset_upper */
433 ptr[i++] = (sema >> 32) & 0xff;
434 /* semaphore_b */
435 ptr[i++] = 0x20010005;
436 /* offset */
437 ptr[i++] = sema & 0xffffffff;
438 /* semaphore_c */
439 ptr[i++] = 0x20010006;
440 /* payload */
441 ptr[i++] = payload;
442 if (acquire) {
443 /* semaphore_d */
444 ptr[i++] = 0x20010007;
445 /* operation: acq_geq, switch_en */
446 ptr[i++] = 0x4 | (0x1 << 12);
447 } else {
448 /* semaphore_d */
449 ptr[i++] = 0x20010007;
450 /* operation: release, wfi */
451 ptr[i++] = 0x2 | ((wfi ? 0x0 : 0x1) << 20);
452 /* non_stall_int */
453 ptr[i++] = 0x20010008;
454 /* ignored */
455 ptr[i++] = 0;
456 }
457 return i;
458}
459
460static int gk20a_channel_semaphore_wait_cpu(
461 struct gk20a_channel_sync *s,
462 struct gk20a_channel_fence *fence,
463 int timeout)
464{
465 int remain;
466 struct gk20a_channel_semaphore *sp =
467 container_of(s, struct gk20a_channel_semaphore, ops);
468 if (!fence->valid || WARN_ON(!fence->semaphore))
469 return 0;
470
471 remain = wait_event_interruptible_timeout(
472 sp->c->semaphore_wq,
473 !gk20a_semaphore_is_acquired(fence->semaphore),
474 timeout);
475 if (remain == 0 && gk20a_semaphore_is_acquired(fence->semaphore))
476 return -ETIMEDOUT;
477 else if (remain < 0)
478 return remain;
479 return 0;
480}
481
482static bool gk20a_channel_semaphore_is_expired(
483 struct gk20a_channel_sync *s,
484 struct gk20a_channel_fence *fence)
485{
486 bool expired;
487 struct gk20a_channel_semaphore *sp =
488 container_of(s, struct gk20a_channel_semaphore, ops);
489 if (!fence->valid || WARN_ON(!fence->semaphore))
490 return true;
491
492 expired = !gk20a_semaphore_is_acquired(fence->semaphore);
493 if (expired)
494 gk20a_sync_timeline_signal(sp->timeline);
495 return expired;
496}
497
498static int gk20a_channel_semaphore_wait_syncpt(
499 struct gk20a_channel_sync *s, u32 id,
500 u32 thresh, struct priv_cmd_entry **entry,
501 struct gk20a_channel_fence *fence)
502{
503 struct gk20a_channel_semaphore *sema =
504 container_of(s, struct gk20a_channel_semaphore, ops);
505 struct device *dev = dev_from_gk20a(sema->c->g);
506 gk20a_err(dev, "trying to use syncpoint synchronization");
507 return -ENODEV;
508}
509
510static int gk20a_channel_semaphore_wait_fd(
511 struct gk20a_channel_sync *s, int fd,
512 struct priv_cmd_entry **entry,
513 struct gk20a_channel_fence *fence)
514{
515 struct gk20a_channel_semaphore *sema =
516 container_of(s, struct gk20a_channel_semaphore, ops);
517 struct channel_gk20a *c = sema->c;
518#ifdef CONFIG_SYNC
519 struct sync_fence *sync_fence;
520 struct priv_cmd_entry *wait_cmd = NULL;
521 struct wait_fence_work *w;
522 int written;
523 int err;
524 u64 va;
525
526 sync_fence = gk20a_sync_fence_fdget(fd);
527 if (!sync_fence)
528 return -EINVAL;
529
530 w = kzalloc(sizeof(*w), GFP_KERNEL);
531 if (!w) {
532 err = -ENOMEM;
533 goto fail;
534 }
535 sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher);
536 w->ch = c;
537 w->sema = gk20a_semaphore_alloc(sema->pool);
538 if (!w->sema) {
539 gk20a_err(dev_from_gk20a(c->g), "ran out of semaphores");
540 err = -EAGAIN;
541 goto fail;
542 }
543
544 gk20a_channel_alloc_priv_cmdbuf(c, 8, &wait_cmd);
545 if (wait_cmd == NULL) {
546 gk20a_err(dev_from_gk20a(c->g),
547 "not enough priv cmd buffer space");
548 err = -EAGAIN;
549 goto fail;
550 }
551
552 va = gk20a_semaphore_gpu_va(w->sema, c->vm);
553 /* GPU unblocked when when the semaphore value becomes 1. */
554 written = add_sema_cmd(wait_cmd->ptr, va, 1, true, false);
555 WARN_ON(written != wait_cmd->size);
556 sync_fence_wait_async(sync_fence, &w->waiter);
557
558 *entry = wait_cmd;
559 return 0;
560fail:
561 if (w && w->sema)
562 gk20a_semaphore_put(w->sema);
563 kfree(w);
564 sync_fence_put(sync_fence);
565 return err;
566#else
567 gk20a_err(dev_from_gk20a(c->g),
568 "trying to use sync fds with CONFIG_SYNC disabled");
569 return -ENODEV;
570#endif
571}
572
573static int __gk20a_channel_semaphore_incr(
574 struct gk20a_channel_sync *s, bool wfi_cmd,
575 struct priv_cmd_entry **entry,
576 struct gk20a_channel_fence *fence)
577{
578 u64 va;
579 int incr_cmd_size;
580 int written;
581 struct priv_cmd_entry *incr_cmd = NULL;
582 struct gk20a_channel_semaphore *sp =
583 container_of(s, struct gk20a_channel_semaphore, ops);
584 struct channel_gk20a *c = sp->c;
585 struct gk20a_semaphore *semaphore;
586
587 semaphore = gk20a_semaphore_alloc(sp->pool);
588 if (!semaphore) {
589 gk20a_err(dev_from_gk20a(c->g),
590 "ran out of semaphores");
591 return -EAGAIN;
592 }
593
594 incr_cmd_size = 10;
595 gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd);
596 if (incr_cmd == NULL) {
597 gk20a_err(dev_from_gk20a(c->g),
598 "not enough priv cmd buffer space");
599 gk20a_semaphore_put(semaphore);
600 return -EAGAIN;
601 }
602
603 /* Release the completion semaphore. */
604 va = gk20a_semaphore_gpu_va(semaphore, c->vm);
605 written = add_sema_cmd(incr_cmd->ptr, va, 1, false, wfi_cmd);
606 WARN_ON(written != incr_cmd_size);
607
608 fence->valid = true;
609 fence->wfi = wfi_cmd;
610 fence->semaphore = semaphore;
611 *entry = incr_cmd;
612 return 0;
613}
614
615static int gk20a_channel_semaphore_incr_wfi(
616 struct gk20a_channel_sync *s,
617 struct priv_cmd_entry **entry,
618 struct gk20a_channel_fence *fence)
619{
620 return __gk20a_channel_semaphore_incr(s,
621 true /* wfi */,
622 entry, fence);
623}
624
625static int gk20a_channel_semaphore_incr(
626 struct gk20a_channel_sync *s,
627 struct priv_cmd_entry **entry,
628 struct gk20a_channel_fence *fence)
629{
630 /* Don't put wfi cmd to this one since we're not returning
631 * a fence to user space. */
632 return __gk20a_channel_semaphore_incr(s, false /* no wfi */,
633 entry, fence);
634}
635
636static int gk20a_channel_semaphore_incr_user_syncpt(
637 struct gk20a_channel_sync *s,
638 struct priv_cmd_entry **entry,
639 struct gk20a_channel_fence *fence,
640 bool wfi,
641 u32 *id, u32 *thresh)
642{
643 struct gk20a_channel_semaphore *sema =
644 container_of(s, struct gk20a_channel_semaphore, ops);
645 struct device *dev = dev_from_gk20a(sema->c->g);
646 gk20a_err(dev, "trying to use syncpoint synchronization");
647 return -ENODEV;
648}
649
650static int gk20a_channel_semaphore_incr_user_fd(
651 struct gk20a_channel_sync *s,
652 int wait_fence_fd,
653 struct priv_cmd_entry **entry,
654 struct gk20a_channel_fence *fence,
655 bool wfi,
656 int *fd)
657{
658 struct gk20a_channel_semaphore *sema =
659 container_of(s, struct gk20a_channel_semaphore, ops);
660#ifdef CONFIG_SYNC
661 struct sync_fence *dependency = NULL;
662 int err;
663
664 err = __gk20a_channel_semaphore_incr(s, wfi,
665 entry, fence);
666 if (err)
667 return err;
668
669 if (wait_fence_fd >= 0) {
670 dependency = gk20a_sync_fence_fdget(wait_fence_fd);
671 if (!dependency)
672 return -EINVAL;
673 }
674
675 *fd = gk20a_sync_fence_create(sema->timeline, fence->semaphore,
676 dependency, "fence");
677 if (*fd < 0) {
678 if (dependency)
679 sync_fence_put(dependency);
680 return *fd;
681 }
682 return 0;
683#else
684 gk20a_err(dev_from_gk20a(sema->c->g),
685 "trying to use sync fds with CONFIG_SYNC disabled");
686 return -ENODEV;
687#endif
688}
689
690static void gk20a_channel_semaphore_set_min_eq_max(struct gk20a_channel_sync *s)
691{
692 /* Nothing to do. */
693}
694
695static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s)
696{
697 struct gk20a_channel_semaphore *sema =
698 container_of(s, struct gk20a_channel_semaphore, ops);
699 if (sema->timeline)
700 gk20a_sync_timeline_destroy(sema->timeline);
701 if (sema->pool) {
702 gk20a_semaphore_pool_unmap(sema->pool, sema->c->vm);
703 gk20a_semaphore_pool_put(sema->pool);
704 }
705 kfree(sema);
706}
707
708static struct gk20a_channel_sync *
709gk20a_channel_semaphore_create(struct channel_gk20a *c)
710{
711 int err;
712 int asid = -1;
713 struct gk20a_channel_semaphore *sema;
714 char pool_name[20];
715
716 if (WARN_ON(!c->vm))
717 return NULL;
718
719 sema = kzalloc(sizeof(*sema), GFP_KERNEL);
720 if (!sema)
721 return NULL;
722 sema->c = c;
723
724 if (c->vm->as_share)
725 asid = c->vm->as_share->id;
726
727 /* A pool of 256 semaphores fits into one 4k page. */
728 sprintf(pool_name, "semaphore_pool-%d", c->hw_chid);
729 sema->pool = gk20a_semaphore_pool_alloc(dev_from_gk20a(c->g),
730 pool_name, 256);
731 if (!sema->pool)
732 goto clean_up;
733
734 /* Map the semaphore pool to the channel vm. Map as read-write to the
735 * owner channel (all other channels should map as read only!). */
736 err = gk20a_semaphore_pool_map(sema->pool, c->vm, gk20a_mem_flag_none);
737 if (err)
738 goto clean_up;
739
740#ifdef CONFIG_SYNC
741 sema->timeline = gk20a_sync_timeline_create(
742 "gk20a_ch%d_as%d", c->hw_chid, asid);
743 if (!sema->timeline)
744 goto clean_up;
745#endif
746 sema->ops.wait_cpu = gk20a_channel_semaphore_wait_cpu;
747 sema->ops.is_expired = gk20a_channel_semaphore_is_expired;
748 sema->ops.wait_syncpt = gk20a_channel_semaphore_wait_syncpt;
749 sema->ops.wait_fd = gk20a_channel_semaphore_wait_fd;
750 sema->ops.incr = gk20a_channel_semaphore_incr;
751 sema->ops.incr_wfi = gk20a_channel_semaphore_incr_wfi;
752 sema->ops.incr_user_syncpt = gk20a_channel_semaphore_incr_user_syncpt;
753 sema->ops.incr_user_fd = gk20a_channel_semaphore_incr_user_fd;
754 sema->ops.set_min_eq_max = gk20a_channel_semaphore_set_min_eq_max;
755 sema->ops.destroy = gk20a_channel_semaphore_destroy;
756
757 /* Aggressively destroying the semaphore sync would cause overhead
758 * since the pool needs to be mapped to GMMU. */
759 sema->ops.aggressive_destroy = false;
760
761 return &sema->ops;
762clean_up:
763 gk20a_channel_semaphore_destroy(&sema->ops);
764 return NULL;
765}
766
375struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c) 767struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c)
376{ 768{
377#ifdef CONFIG_TEGRA_GK20A 769#ifdef CONFIG_TEGRA_GK20A
378 if (gk20a_platform_has_syncpoints(c->g->dev)) 770 if (gk20a_platform_has_syncpoints(c->g->dev))
379 return gk20a_channel_syncpt_create(c); 771 return gk20a_channel_syncpt_create(c);
380#endif 772#endif
381 WARN_ON(1); 773 return gk20a_channel_semaphore_create(c);
382 return NULL; 774}
775
776static inline bool gk20a_channel_fence_is_closed(struct gk20a_channel_fence *f)
777{
778 if (f->valid || f->semaphore)
779 return false;
780 return true;
781}
782
783void gk20a_channel_fence_close(struct gk20a_channel_fence *f)
784{
785 if (f->semaphore)
786 gk20a_semaphore_put(f->semaphore);
787 memset(f, 0, sizeof(*f));
788}
789
790void gk20a_channel_fence_dup(struct gk20a_channel_fence *from,
791 struct gk20a_channel_fence *to)
792{
793 WARN_ON(!gk20a_channel_fence_is_closed(to));
794 *to = *from;
795 if (to->semaphore)
796 gk20a_semaphore_get(to->semaphore);
383} 797}
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
index 90b61bfd..baa4a151 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -23,11 +23,13 @@
23struct gk20a_channel_sync; 23struct gk20a_channel_sync;
24struct priv_cmd_entry; 24struct priv_cmd_entry;
25struct channel_gk20a; 25struct channel_gk20a;
26struct gk20a_semaphore;
26 27
27struct gk20a_channel_fence { 28struct gk20a_channel_fence {
28 bool valid; 29 bool valid;
29 bool wfi; /* was issued with preceding wfi */ 30 bool wfi; /* was issued with preceding wfi */
30 u32 thresh; /* either semaphore or syncpoint value */ 31 u32 thresh; /* syncpoint fences only */
32 struct gk20a_semaphore *semaphore; /* semaphore fences only */
31}; 33};
32 34
33struct gk20a_channel_sync { 35struct gk20a_channel_sync {
@@ -43,11 +45,13 @@ struct gk20a_channel_sync {
43 45
44 /* Generate a gpu wait cmdbuf from syncpoint. */ 46 /* Generate a gpu wait cmdbuf from syncpoint. */
45 int (*wait_syncpt)(struct gk20a_channel_sync *s, u32 id, u32 thresh, 47 int (*wait_syncpt)(struct gk20a_channel_sync *s, u32 id, u32 thresh,
46 struct priv_cmd_entry **entry); 48 struct priv_cmd_entry **entry,
49 struct gk20a_channel_fence *fence);
47 50
48 /* Generate a gpu wait cmdbuf from sync fd. */ 51 /* Generate a gpu wait cmdbuf from sync fd. */
49 int (*wait_fd)(struct gk20a_channel_sync *s, int fd, 52 int (*wait_fd)(struct gk20a_channel_sync *s, int fd,
50 struct priv_cmd_entry **entry); 53 struct priv_cmd_entry **entry,
54 struct gk20a_channel_fence *fence);
51 55
52 /* Increment syncpoint/semaphore. 56 /* Increment syncpoint/semaphore.
53 * Returns 57 * Returns
@@ -88,6 +92,7 @@ struct gk20a_channel_sync {
88 * - a sync fd that can be returned to user space. 92 * - a sync fd that can be returned to user space.
89 */ 93 */
90 int (*incr_user_fd)(struct gk20a_channel_sync *s, 94 int (*incr_user_fd)(struct gk20a_channel_sync *s,
95 int wait_fence_fd,
91 struct priv_cmd_entry **entry, 96 struct priv_cmd_entry **entry,
92 struct gk20a_channel_fence *fence, 97 struct gk20a_channel_fence *fence,
93 bool wfi, 98 bool wfi,
@@ -96,12 +101,16 @@ struct gk20a_channel_sync {
96 /* Reset the channel syncpoint/semaphore. */ 101 /* Reset the channel syncpoint/semaphore. */
97 void (*set_min_eq_max)(struct gk20a_channel_sync *s); 102 void (*set_min_eq_max)(struct gk20a_channel_sync *s);
98 103
99 /* flag to set syncpt destroy aggressiveness */ 104 /* flag to set sync destroy aggressiveness */
100 bool syncpt_aggressive_destroy; 105 bool aggressive_destroy;
101 106
102 /* Free the resources allocated by gk20a_channel_sync_create. */ 107 /* Free the resources allocated by gk20a_channel_sync_create. */
103 void (*destroy)(struct gk20a_channel_sync *s); 108 void (*destroy)(struct gk20a_channel_sync *s);
104}; 109};
105 110
106struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c); 111struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c);
112
113void gk20a_channel_fence_close(struct gk20a_channel_fence *f);
114void gk20a_channel_fence_dup(struct gk20a_channel_fence *from,
115 struct gk20a_channel_fence *to);
107#endif 116#endif