summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorLauri Peltonen <lpeltonen@nvidia.com>2014-07-17 19:21:34 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:10:40 -0400
commitbcf60a22c3e8671468517d34aa37548272455c1f (patch)
treec3544f6714c291e611e33a8d0e39c5cb2c795821 /drivers/gpu
parent55295c6087ed975be12e92f9be799269aef94678 (diff)
gpu: nvgpu: Add gk20a_fence type
When moving compression state tracking and compbit management ops to kernel, we need to attach a fence to dma-buf metadata, along with the compbit state. To make in-kernel fence management easier, introduce a new gk20a_fence abstraction. A gk20a_fence may be backed by a semaphore or a syncpoint (id, value) pair. If the kernel is configured with CONFIG_SYNC, it will also contain a sync_fence. The gk20a_fence can easily be converted back to a syncpoint (id, value) parir or sync FD when we need to return it to user space. Change gk20a_submit_channel_gpfifo to return a gk20a_fence instead of nvhost_fence. This is to facilitate work submission initiated from kernel. Bug 1509620 Change-Id: I6154764a279dba83f5e91ba9e0cb5e227ca08e1b Signed-off-by: Lauri Peltonen <lpeltonen@nvidia.com> Reviewed-on: http://git-master/r/439846 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/Makefile1
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c131
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h13
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c233
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h74
-rw-r--r--drivers/gpu/nvgpu/gk20a/fence_gk20a.c229
-rw-r--r--drivers/gpu/nvgpu/gk20a/fence_gk20a.h68
-rw-r--r--drivers/gpu/nvgpu/gk20a/sync_gk20a.c17
-rw-r--r--drivers/gpu/nvgpu/gk20a/sync_gk20a.h2
9 files changed, 476 insertions, 292 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/Makefile b/drivers/gpu/nvgpu/gk20a/Makefile
index d234db50..246f9447 100644
--- a/drivers/gpu/nvgpu/gk20a/Makefile
+++ b/drivers/gpu/nvgpu/gk20a/Makefile
@@ -21,6 +21,7 @@ nvgpu-y := \
21 pmu_gk20a.o \ 21 pmu_gk20a.o \
22 priv_ring_gk20a.o \ 22 priv_ring_gk20a.o \
23 semaphore_gk20a.o \ 23 semaphore_gk20a.o \
24 fence_gk20a.o \
24 clk_gk20a.o \ 25 clk_gk20a.o \
25 therm_gk20a.o \ 26 therm_gk20a.o \
26 gr_ctx_gk20a_sim.o \ 27 gr_ctx_gk20a_sim.o \
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index de82be36..9f8876c3 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -33,6 +33,7 @@
33 33
34#include "gk20a.h" 34#include "gk20a.h"
35#include "dbg_gpu_gk20a.h" 35#include "dbg_gpu_gk20a.h"
36#include "fence_gk20a.h"
36#include "semaphore_gk20a.h" 37#include "semaphore_gk20a.h"
37 38
38#include "hw_ram_gk20a.h" 39#include "hw_ram_gk20a.h"
@@ -418,8 +419,8 @@ void gk20a_channel_abort(struct channel_gk20a *ch)
418 semaphore synchronization) */ 419 semaphore synchronization) */
419 mutex_lock(&ch->jobs_lock); 420 mutex_lock(&ch->jobs_lock);
420 list_for_each_entry_safe(job, n, &ch->jobs, list) { 421 list_for_each_entry_safe(job, n, &ch->jobs, list) {
421 if (job->post_fence.semaphore) { 422 if (job->post_fence->semaphore) {
422 gk20a_semaphore_release(job->post_fence.semaphore); 423 gk20a_semaphore_release(job->post_fence->semaphore);
423 released_job_semaphore = true; 424 released_job_semaphore = true;
424 } 425 }
425 } 426 }
@@ -685,8 +686,12 @@ unbind:
685 ch->vpr = false; 686 ch->vpr = false;
686 ch->vm = NULL; 687 ch->vm = NULL;
687 688
688 gk20a_channel_fence_close(&ch->last_submit.pre_fence); 689 mutex_lock(&ch->submit_lock);
689 gk20a_channel_fence_close(&ch->last_submit.post_fence); 690 gk20a_fence_put(ch->last_submit.pre_fence);
691 gk20a_fence_put(ch->last_submit.post_fence);
692 ch->last_submit.pre_fence = NULL;
693 ch->last_submit.post_fence = NULL;
694 mutex_unlock(&ch->submit_lock);
690 WARN_ON(ch->sync); 695 WARN_ON(ch->sync);
691 696
692 /* unlink all debug sessions */ 697 /* unlink all debug sessions */
@@ -1119,8 +1124,12 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1119 ch_vm = c->vm; 1124 ch_vm = c->vm;
1120 1125
1121 c->cmds_pending = false; 1126 c->cmds_pending = false;
1122 gk20a_channel_fence_close(&c->last_submit.pre_fence); 1127 mutex_lock(&c->submit_lock);
1123 gk20a_channel_fence_close(&c->last_submit.post_fence); 1128 gk20a_fence_put(c->last_submit.pre_fence);
1129 gk20a_fence_put(c->last_submit.post_fence);
1130 c->last_submit.pre_fence = NULL;
1131 c->last_submit.post_fence = NULL;
1132 mutex_unlock(&c->submit_lock);
1124 1133
1125 c->ramfc.offset = 0; 1134 c->ramfc.offset = 0;
1126 c->ramfc.size = ram_in_ramfc_s() / 8; 1135 c->ramfc.size = ram_in_ramfc_s() / 8;
@@ -1303,8 +1312,10 @@ static int gk20a_channel_submit_wfi(struct channel_gk20a *c)
1303 } 1312 }
1304 } 1313 }
1305 1314
1306 gk20a_channel_fence_close(&c->last_submit.pre_fence); 1315 gk20a_fence_put(c->last_submit.pre_fence);
1307 gk20a_channel_fence_close(&c->last_submit.post_fence); 1316 gk20a_fence_put(c->last_submit.post_fence);
1317 c->last_submit.pre_fence = NULL;
1318 c->last_submit.post_fence = NULL;
1308 1319
1309 err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit.post_fence); 1320 err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit.post_fence);
1310 if (unlikely(err)) { 1321 if (unlikely(err)) {
@@ -1312,7 +1323,7 @@ static int gk20a_channel_submit_wfi(struct channel_gk20a *c)
1312 return err; 1323 return err;
1313 } 1324 }
1314 1325
1315 WARN_ON(!c->last_submit.post_fence.wfi); 1326 WARN_ON(!c->last_submit.post_fence->wfi);
1316 1327
1317 c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva); 1328 c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva);
1318 c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) | 1329 c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) |
@@ -1378,8 +1389,8 @@ static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g)
1378} 1389}
1379 1390
1380static int gk20a_channel_add_job(struct channel_gk20a *c, 1391static int gk20a_channel_add_job(struct channel_gk20a *c,
1381 struct gk20a_channel_fence *pre_fence, 1392 struct gk20a_fence *pre_fence,
1382 struct gk20a_channel_fence *post_fence) 1393 struct gk20a_fence *post_fence)
1383{ 1394{
1384 struct vm_gk20a *vm = c->vm; 1395 struct vm_gk20a *vm = c->vm;
1385 struct channel_gk20a_job *job = NULL; 1396 struct channel_gk20a_job *job = NULL;
@@ -1404,8 +1415,8 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
1404 1415
1405 job->num_mapped_buffers = num_mapped_buffers; 1416 job->num_mapped_buffers = num_mapped_buffers;
1406 job->mapped_buffers = mapped_buffers; 1417 job->mapped_buffers = mapped_buffers;
1407 gk20a_channel_fence_dup(pre_fence, &job->pre_fence); 1418 job->pre_fence = gk20a_fence_get(pre_fence);
1408 gk20a_channel_fence_dup(post_fence, &job->post_fence); 1419 job->post_fence = gk20a_fence_get(post_fence);
1409 1420
1410 mutex_lock(&c->jobs_lock); 1421 mutex_lock(&c->jobs_lock);
1411 list_add_tail(&job->list, &c->jobs); 1422 list_add_tail(&job->list, &c->jobs);
@@ -1424,18 +1435,19 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1424 mutex_lock(&c->submit_lock); 1435 mutex_lock(&c->submit_lock);
1425 mutex_lock(&c->jobs_lock); 1436 mutex_lock(&c->jobs_lock);
1426 list_for_each_entry_safe(job, n, &c->jobs, list) { 1437 list_for_each_entry_safe(job, n, &c->jobs, list) {
1427 bool completed = WARN_ON(!c->sync) || 1438 bool completed = gk20a_fence_is_expired(job->post_fence);
1428 c->sync->is_expired(c->sync, &job->post_fence);
1429 if (!completed) 1439 if (!completed)
1430 break; 1440 break;
1431 1441
1442 c->sync->signal_timeline(c->sync);
1443
1432 gk20a_vm_put_buffers(vm, job->mapped_buffers, 1444 gk20a_vm_put_buffers(vm, job->mapped_buffers,
1433 job->num_mapped_buffers); 1445 job->num_mapped_buffers);
1434 1446
1435 /* Close the fences (this will unref the semaphores and release 1447 /* Close the fences (this will unref the semaphores and release
1436 * them to the pool). */ 1448 * them to the pool). */
1437 gk20a_channel_fence_close(&job->pre_fence); 1449 gk20a_fence_put(job->pre_fence);
1438 gk20a_channel_fence_close(&job->post_fence); 1450 gk20a_fence_put(job->post_fence);
1439 1451
1440 /* job is done. release its reference to vm */ 1452 /* job is done. release its reference to vm */
1441 gk20a_vm_put(vm); 1453 gk20a_vm_put(vm);
@@ -1453,7 +1465,7 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1453 */ 1465 */
1454 if (list_empty(&c->jobs)) { 1466 if (list_empty(&c->jobs)) {
1455 if (c->sync && c->sync->aggressive_destroy && 1467 if (c->sync && c->sync->aggressive_destroy &&
1456 c->sync->is_expired(c->sync, &c->last_submit.post_fence)) { 1468 gk20a_fence_is_expired(c->last_submit.post_fence)) {
1457 c->sync->destroy(c->sync); 1469 c->sync->destroy(c->sync);
1458 c->sync = NULL; 1470 c->sync = NULL;
1459 } 1471 }
@@ -1477,8 +1489,9 @@ void add_wait_cmd(u32 *ptr, u32 id, u32 thresh)
1477int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, 1489int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1478 struct nvhost_gpfifo *gpfifo, 1490 struct nvhost_gpfifo *gpfifo,
1479 u32 num_entries, 1491 u32 num_entries,
1492 u32 flags,
1480 struct nvhost_fence *fence, 1493 struct nvhost_fence *fence,
1481 u32 flags) 1494 struct gk20a_fence **fence_out)
1482{ 1495{
1483 struct gk20a *g = c->g; 1496 struct gk20a *g = c->g;
1484 struct device *d = dev_from_gk20a(g); 1497 struct device *d = dev_from_gk20a(g);
@@ -1487,8 +1500,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1487 int wait_fence_fd = -1; 1500 int wait_fence_fd = -1;
1488 struct priv_cmd_entry *wait_cmd = NULL; 1501 struct priv_cmd_entry *wait_cmd = NULL;
1489 struct priv_cmd_entry *incr_cmd = NULL; 1502 struct priv_cmd_entry *incr_cmd = NULL;
1490 struct gk20a_channel_fence pre_fence = { 0 }; 1503 struct gk20a_fence *pre_fence = NULL;
1491 struct gk20a_channel_fence post_fence = { 0 }; 1504 struct gk20a_fence *post_fence = NULL;
1492 /* we might need two extra gpfifo entries - one for pre fence 1505 /* we might need two extra gpfifo entries - one for pre fence
1493 * and one for post fence. */ 1506 * and one for post fence. */
1494 const int extra_entries = 2; 1507 const int extra_entries = 2;
@@ -1591,18 +1604,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1591 1604
1592 /* always insert syncpt increment at end of gpfifo submission 1605 /* always insert syncpt increment at end of gpfifo submission
1593 to keep track of method completion for idle railgating */ 1606 to keep track of method completion for idle railgating */
1594 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET && 1607 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1595 flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) 1608 err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd,
1596 err = c->sync->incr_user_fd(c->sync, wait_fence_fd, &incr_cmd, 1609 &post_fence, need_wfi);
1597 &post_fence,
1598 need_wfi,
1599 &fence->syncpt_id);
1600 else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1601 err = c->sync->incr_user_syncpt(c->sync, &incr_cmd,
1602 &post_fence,
1603 need_wfi,
1604 &fence->syncpt_id,
1605 &fence->value);
1606 else 1610 else
1607 err = c->sync->incr(c->sync, &incr_cmd, 1611 err = c->sync->incr(c->sync, &incr_cmd,
1608 &post_fence); 1612 &post_fence);
@@ -1653,13 +1657,15 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1653 incr_cmd->gp_put = c->gpfifo.put; 1657 incr_cmd->gp_put = c->gpfifo.put;
1654 } 1658 }
1655 1659
1656 gk20a_channel_fence_close(&c->last_submit.pre_fence); 1660 gk20a_fence_put(c->last_submit.pre_fence);
1657 gk20a_channel_fence_close(&c->last_submit.post_fence); 1661 gk20a_fence_put(c->last_submit.post_fence);
1658 c->last_submit.pre_fence = pre_fence; 1662 c->last_submit.pre_fence = pre_fence;
1659 c->last_submit.post_fence = post_fence; 1663 c->last_submit.post_fence = post_fence;
1664 if (fence_out)
1665 *fence_out = gk20a_fence_get(post_fence);
1660 1666
1661 /* TODO! Check for errors... */ 1667 /* TODO! Check for errors... */
1662 gk20a_channel_add_job(c, &pre_fence, &post_fence); 1668 gk20a_channel_add_job(c, pre_fence, post_fence);
1663 1669
1664 c->cmds_pending = true; 1670 c->cmds_pending = true;
1665 gk20a_bar1_writel(g, 1671 gk20a_bar1_writel(g,
@@ -1672,8 +1678,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1672 c->hw_chid, 1678 c->hw_chid,
1673 num_entries, 1679 num_entries,
1674 flags, 1680 flags,
1675 fence ? fence->syncpt_id : 0, 1681 post_fence->syncpt_id,
1676 fence ? fence->value : 0); 1682 post_fence->syncpt_value);
1677 1683
1678 gk20a_dbg_info("post-submit put %d, get %d, size %d", 1684 gk20a_dbg_info("post-submit put %d, get %d, size %d",
1679 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num); 1685 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
@@ -1685,8 +1691,8 @@ clean_up:
1685 gk20a_err(d, "fail"); 1691 gk20a_err(d, "fail");
1686 free_priv_cmdbuf(c, wait_cmd); 1692 free_priv_cmdbuf(c, wait_cmd);
1687 free_priv_cmdbuf(c, incr_cmd); 1693 free_priv_cmdbuf(c, incr_cmd);
1688 gk20a_channel_fence_close(&pre_fence); 1694 gk20a_fence_put(pre_fence);
1689 gk20a_channel_fence_close(&post_fence); 1695 gk20a_fence_put(post_fence);
1690 gk20a_idle(g->dev); 1696 gk20a_idle(g->dev);
1691 return err; 1697 return err;
1692} 1698}
@@ -1719,7 +1725,7 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1719int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout) 1725int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
1720{ 1726{
1721 int err = 0; 1727 int err = 0;
1722 struct gk20a_channel_fence *fence = &ch->last_submit.post_fence; 1728 struct gk20a_fence *fence = ch->last_submit.post_fence;
1723 1729
1724 if (!ch->cmds_pending) 1730 if (!ch->cmds_pending)
1725 return 0; 1731 return 0;
@@ -1728,26 +1734,25 @@ int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
1728 if (ch->has_timedout) 1734 if (ch->has_timedout)
1729 return -ETIMEDOUT; 1735 return -ETIMEDOUT;
1730 1736
1731 if (!(fence->valid && fence->wfi) && ch->obj_class != KEPLER_C) { 1737 if (!(fence && fence->wfi) && ch->obj_class != KEPLER_C) {
1732 gk20a_dbg_fn("issuing wfi, incr to finish the channel"); 1738 gk20a_dbg_fn("issuing wfi, incr to finish the channel");
1733 err = gk20a_channel_submit_wfi(ch); 1739 err = gk20a_channel_submit_wfi(ch);
1740 fence = ch->last_submit.post_fence;
1734 } 1741 }
1735 if (err) 1742 if (err)
1736 return err; 1743 return err;
1737 1744
1738 BUG_ON(!(fence->valid && fence->wfi) && ch->obj_class != KEPLER_C); 1745 BUG_ON(!(fence && fence->wfi) && ch->obj_class != KEPLER_C);
1739 1746
1740 gk20a_dbg_fn("waiting for channel to finish thresh:%d sema:%p", 1747 gk20a_dbg_fn("waiting for channel to finish thresh:%d sema:%p",
1741 fence->thresh, fence->semaphore); 1748 fence->syncpt_value, fence->semaphore);
1742 1749
1743 if (ch->sync) { 1750 err = gk20a_fence_wait(fence, timeout);
1744 err = ch->sync->wait_cpu(ch->sync, fence, timeout); 1751 if (WARN_ON(err))
1745 if (WARN_ON(err)) 1752 dev_warn(dev_from_gk20a(ch->g),
1746 dev_warn(dev_from_gk20a(ch->g), 1753 "timed out waiting for gk20a channel to finish");
1747 "timed out waiting for gk20a channel to finish"); 1754 else
1748 else 1755 ch->cmds_pending = false;
1749 ch->cmds_pending = false;
1750 }
1751 1756
1752 return err; 1757 return err;
1753} 1758}
@@ -2014,6 +2019,7 @@ static int gk20a_ioctl_channel_submit_gpfifo(
2014 struct channel_gk20a *ch, 2019 struct channel_gk20a *ch,
2015 struct nvhost_submit_gpfifo_args *args) 2020 struct nvhost_submit_gpfifo_args *args)
2016{ 2021{
2022 struct gk20a_fence *fence_out;
2017 void *gpfifo; 2023 void *gpfifo;
2018 u32 size; 2024 u32 size;
2019 int ret = 0; 2025 int ret = 0;
@@ -2036,7 +2042,26 @@ static int gk20a_ioctl_channel_submit_gpfifo(
2036 } 2042 }
2037 2043
2038 ret = gk20a_submit_channel_gpfifo(ch, gpfifo, args->num_entries, 2044 ret = gk20a_submit_channel_gpfifo(ch, gpfifo, args->num_entries,
2039 &args->fence, args->flags); 2045 args->flags, &args->fence,
2046 &fence_out);
2047
2048 if (ret)
2049 goto clean_up;
2050
2051 /* Convert fence_out to something we can pass back to user space. */
2052 if (args->flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
2053 if (args->flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
2054 int fd = gk20a_fence_install_fd(fence_out);
2055 if (fd < 0)
2056 ret = fd;
2057 else
2058 args->fence.syncpt_id = fd;
2059 } else {
2060 args->fence.syncpt_id = fence_out->syncpt_id;
2061 args->fence.value = fence_out->syncpt_value;
2062 }
2063 }
2064 gk20a_fence_put(fence_out);
2040 2065
2041clean_up: 2066clean_up:
2042 kfree(gpfifo); 2067 kfree(gpfifo);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 161d2b83..2ea3eccb 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -29,6 +29,7 @@
29struct gk20a; 29struct gk20a;
30struct gr_gk20a; 30struct gr_gk20a;
31struct dbg_session_gk20a; 31struct dbg_session_gk20a;
32struct gk20a_fence;
32 33
33#include "channel_sync_gk20a.h" 34#include "channel_sync_gk20a.h"
34 35
@@ -68,8 +69,8 @@ struct channel_ctx_gk20a {
68struct channel_gk20a_job { 69struct channel_gk20a_job {
69 struct mapped_buffer_node **mapped_buffers; 70 struct mapped_buffer_node **mapped_buffers;
70 int num_mapped_buffers; 71 int num_mapped_buffers;
71 struct gk20a_channel_fence pre_fence; 72 struct gk20a_fence *pre_fence;
72 struct gk20a_channel_fence post_fence; 73 struct gk20a_fence *post_fence;
73 struct list_head list; 74 struct list_head list;
74}; 75};
75 76
@@ -117,8 +118,9 @@ struct channel_gk20a {
117 118
118 bool cmds_pending; 119 bool cmds_pending;
119 struct { 120 struct {
120 struct gk20a_channel_fence pre_fence; 121 /* These fences should be accessed with submit_lock held. */
121 struct gk20a_channel_fence post_fence; 122 struct gk20a_fence *pre_fence;
123 struct gk20a_fence *post_fence;
122 } last_submit; 124 } last_submit;
123 125
124 void (*remove_support)(struct channel_gk20a *); 126 void (*remove_support)(struct channel_gk20a *);
@@ -184,8 +186,9 @@ void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a);
184int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, 186int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
185 struct nvhost_gpfifo *gpfifo, 187 struct nvhost_gpfifo *gpfifo,
186 u32 num_entries, 188 u32 num_entries,
189 u32 flags,
187 struct nvhost_fence *fence, 190 struct nvhost_fence *fence,
188 u32 flags); 191 struct gk20a_fence **fence_out);
189 192
190int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, 193int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
191 struct nvhost_alloc_gpfifo_args *args); 194 struct nvhost_alloc_gpfifo_args *args);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 0dcc5abb..4d366fa9 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -19,6 +19,7 @@
19 19
20#include "channel_sync_gk20a.h" 20#include "channel_sync_gk20a.h"
21#include "gk20a.h" 21#include "gk20a.h"
22#include "fence_gk20a.h"
22#include "semaphore_gk20a.h" 23#include "semaphore_gk20a.h"
23#include "sync_gk20a.h" 24#include "sync_gk20a.h"
24#include "mm_gk20a.h" 25#include "mm_gk20a.h"
@@ -52,33 +53,9 @@ static void add_wait_cmd(u32 *ptr, u32 id, u32 thresh)
52 ptr[3] = (id << 8) | 0x10; 53 ptr[3] = (id << 8) | 0x10;
53} 54}
54 55
55int gk20a_channel_syncpt_wait_cpu(struct gk20a_channel_sync *s,
56 struct gk20a_channel_fence *fence,
57 int timeout)
58{
59 struct gk20a_channel_syncpt *sp =
60 container_of(s, struct gk20a_channel_syncpt, ops);
61 if (!fence->valid)
62 return 0;
63 return nvhost_syncpt_wait_timeout_ext(
64 sp->host1x_pdev, sp->id, fence->thresh,
65 timeout, NULL, NULL);
66}
67
68bool gk20a_channel_syncpt_is_expired(struct gk20a_channel_sync *s,
69 struct gk20a_channel_fence *fence)
70{
71 struct gk20a_channel_syncpt *sp =
72 container_of(s, struct gk20a_channel_syncpt, ops);
73 if (!fence->valid)
74 return true;
75 return nvhost_syncpt_is_expired_ext(sp->host1x_pdev, sp->id,
76 fence->thresh);
77}
78
79int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, u32 id, 56int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, u32 id,
80 u32 thresh, struct priv_cmd_entry **entry, 57 u32 thresh, struct priv_cmd_entry **entry,
81 struct gk20a_channel_fence *fence) 58 struct gk20a_fence **fence)
82{ 59{
83 struct gk20a_channel_syncpt *sp = 60 struct gk20a_channel_syncpt *sp =
84 container_of(s, struct gk20a_channel_syncpt, ops); 61 container_of(s, struct gk20a_channel_syncpt, ops);
@@ -103,13 +80,13 @@ int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, u32 id,
103 add_wait_cmd(&wait_cmd->ptr[0], id, thresh); 80 add_wait_cmd(&wait_cmd->ptr[0], id, thresh);
104 81
105 *entry = wait_cmd; 82 *entry = wait_cmd;
106 fence->valid = false; 83 *fence = NULL;
107 return 0; 84 return 0;
108} 85}
109 86
110int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, 87int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
111 struct priv_cmd_entry **entry, 88 struct priv_cmd_entry **entry,
112 struct gk20a_channel_fence *fence) 89 struct gk20a_fence **fence)
113{ 90{
114#ifdef CONFIG_SYNC 91#ifdef CONFIG_SYNC
115 int i; 92 int i;
@@ -164,7 +141,7 @@ int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
164 sync_fence_put(sync_fence); 141 sync_fence_put(sync_fence);
165 142
166 *entry = wait_cmd; 143 *entry = wait_cmd;
167 fence->valid = false; 144 *fence = NULL;
168 return 0; 145 return 0;
169#else 146#else
170 return -ENODEV; 147 return -ENODEV;
@@ -181,7 +158,7 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
181 bool gfx_class, bool wfi_cmd, 158 bool gfx_class, bool wfi_cmd,
182 bool register_irq, 159 bool register_irq,
183 struct priv_cmd_entry **entry, 160 struct priv_cmd_entry **entry,
184 struct gk20a_channel_fence *fence) 161 struct gk20a_fence **fence)
185{ 162{
186 u32 thresh; 163 u32 thresh;
187 int incr_cmd_size; 164 int incr_cmd_size;
@@ -253,16 +230,15 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
253 WARN(err, "failed to set submit complete interrupt"); 230 WARN(err, "failed to set submit complete interrupt");
254 } 231 }
255 232
256 fence->thresh = thresh; 233 *fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh,
257 fence->valid = true; 234 wfi_cmd);
258 fence->wfi = wfi_cmd;
259 *entry = incr_cmd; 235 *entry = incr_cmd;
260 return 0; 236 return 0;
261} 237}
262 238
263int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s, 239int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s,
264 struct priv_cmd_entry **entry, 240 struct priv_cmd_entry **entry,
265 struct gk20a_channel_fence *fence) 241 struct gk20a_fence **fence)
266{ 242{
267 return __gk20a_channel_syncpt_incr(s, 243 return __gk20a_channel_syncpt_incr(s,
268 false /* use host class */, 244 false /* use host class */,
@@ -273,7 +249,7 @@ int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s,
273 249
274int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, 250int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
275 struct priv_cmd_entry **entry, 251 struct priv_cmd_entry **entry,
276 struct gk20a_channel_fence *fence) 252 struct gk20a_fence **fence)
277{ 253{
278 struct gk20a_channel_syncpt *sp = 254 struct gk20a_channel_syncpt *sp =
279 container_of(s, struct gk20a_channel_syncpt, ops); 255 container_of(s, struct gk20a_channel_syncpt, ops);
@@ -286,58 +262,36 @@ int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
286 entry, fence); 262 entry, fence);
287} 263}
288 264
289int gk20a_channel_syncpt_incr_user_syncpt(struct gk20a_channel_sync *s, 265int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s,
290 struct priv_cmd_entry **entry, 266 int wait_fence_fd,
291 struct gk20a_channel_fence *fence, 267 struct priv_cmd_entry **entry,
292 bool wfi, 268 struct gk20a_fence **fence,
293 u32 *id, u32 *thresh) 269 bool wfi)
294{ 270{
295 struct gk20a_channel_syncpt *sp = 271 struct gk20a_channel_syncpt *sp =
296 container_of(s, struct gk20a_channel_syncpt, ops); 272 container_of(s, struct gk20a_channel_syncpt, ops);
297 /* Need to do 'host incr + wfi' or 'gfx incr' since we return the fence 273 /* Need to do 'host incr + wfi' or 'gfx incr' since we return the fence
298 * to user space. */ 274 * to user space. */
299 int err = __gk20a_channel_syncpt_incr(s, 275 return __gk20a_channel_syncpt_incr(s,
300 wfi && 276 wfi &&
301 sp->c->obj_class == KEPLER_C /* use gfx class? */, 277 sp->c->obj_class == KEPLER_C /* use gfx class? */,
302 wfi && 278 wfi &&
303 sp->c->obj_class != KEPLER_C /* wfi if host class */, 279 sp->c->obj_class != KEPLER_C /* wfi if host class */,
304 true /* register irq */, 280 true /* register irq */,
305 entry, fence); 281 entry, fence);
306 if (err)
307 return err;
308 *id = sp->id;
309 *thresh = fence->thresh;
310 return 0;
311} 282}
312 283
313int gk20a_channel_syncpt_incr_user_fd(struct gk20a_channel_sync *s, 284void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s)
314 int wait_fence_fd,
315 struct priv_cmd_entry **entry,
316 struct gk20a_channel_fence *fence,
317 bool wfi,
318 int *fd)
319{ 285{
320#ifdef CONFIG_SYNC
321 int err;
322 struct nvhost_ctrl_sync_fence_info pt;
323 struct gk20a_channel_syncpt *sp = 286 struct gk20a_channel_syncpt *sp =
324 container_of(s, struct gk20a_channel_syncpt, ops); 287 container_of(s, struct gk20a_channel_syncpt, ops);
325 err = gk20a_channel_syncpt_incr_user_syncpt(s, entry, fence, wfi, 288 nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id);
326 &pt.id, &pt.thresh);
327 if (err)
328 return err;
329 return nvhost_sync_create_fence_fd(sp->host1x_pdev, &pt, 1,
330 "fence", fd);
331#else
332 return -ENODEV;
333#endif
334} 289}
335 290
336void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s) 291static void gk20a_channel_syncpt_signal_timeline(
292 struct gk20a_channel_sync *s)
337{ 293{
338 struct gk20a_channel_syncpt *sp = 294 /* Nothing to do. */
339 container_of(s, struct gk20a_channel_syncpt, ops);
340 nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id);
341} 295}
342 296
343static void gk20a_channel_syncpt_destroy(struct gk20a_channel_sync *s) 297static void gk20a_channel_syncpt_destroy(struct gk20a_channel_sync *s)
@@ -366,15 +320,13 @@ gk20a_channel_syncpt_create(struct channel_gk20a *c)
366 return NULL; 320 return NULL;
367 } 321 }
368 322
369 sp->ops.wait_cpu = gk20a_channel_syncpt_wait_cpu;
370 sp->ops.is_expired = gk20a_channel_syncpt_is_expired;
371 sp->ops.wait_syncpt = gk20a_channel_syncpt_wait_syncpt; 323 sp->ops.wait_syncpt = gk20a_channel_syncpt_wait_syncpt;
372 sp->ops.wait_fd = gk20a_channel_syncpt_wait_fd; 324 sp->ops.wait_fd = gk20a_channel_syncpt_wait_fd;
373 sp->ops.incr = gk20a_channel_syncpt_incr; 325 sp->ops.incr = gk20a_channel_syncpt_incr;
374 sp->ops.incr_wfi = gk20a_channel_syncpt_incr_wfi; 326 sp->ops.incr_wfi = gk20a_channel_syncpt_incr_wfi;
375 sp->ops.incr_user_syncpt = gk20a_channel_syncpt_incr_user_syncpt; 327 sp->ops.incr_user = gk20a_channel_syncpt_incr_user;
376 sp->ops.incr_user_fd = gk20a_channel_syncpt_incr_user_fd;
377 sp->ops.set_min_eq_max = gk20a_channel_syncpt_set_min_eq_max; 328 sp->ops.set_min_eq_max = gk20a_channel_syncpt_set_min_eq_max;
329 sp->ops.signal_timeline = gk20a_channel_syncpt_signal_timeline;
378 sp->ops.destroy = gk20a_channel_syncpt_destroy; 330 sp->ops.destroy = gk20a_channel_syncpt_destroy;
379 331
380 sp->ops.aggressive_destroy = true; 332 sp->ops.aggressive_destroy = true;
@@ -460,48 +412,10 @@ static int add_sema_cmd(u32 *ptr, u64 sema, u32 payload,
460 return i; 412 return i;
461} 413}
462 414
463static int gk20a_channel_semaphore_wait_cpu(
464 struct gk20a_channel_sync *s,
465 struct gk20a_channel_fence *fence,
466 int timeout)
467{
468 int remain;
469 struct gk20a_channel_semaphore *sp =
470 container_of(s, struct gk20a_channel_semaphore, ops);
471 if (!fence->valid || WARN_ON(!fence->semaphore))
472 return 0;
473
474 remain = wait_event_interruptible_timeout(
475 sp->c->semaphore_wq,
476 !gk20a_semaphore_is_acquired(fence->semaphore),
477 timeout);
478 if (remain == 0 && gk20a_semaphore_is_acquired(fence->semaphore))
479 return -ETIMEDOUT;
480 else if (remain < 0)
481 return remain;
482 return 0;
483}
484
485static bool gk20a_channel_semaphore_is_expired(
486 struct gk20a_channel_sync *s,
487 struct gk20a_channel_fence *fence)
488{
489 bool expired;
490 struct gk20a_channel_semaphore *sp =
491 container_of(s, struct gk20a_channel_semaphore, ops);
492 if (!fence->valid || WARN_ON(!fence->semaphore))
493 return true;
494
495 expired = !gk20a_semaphore_is_acquired(fence->semaphore);
496 if (expired)
497 gk20a_sync_timeline_signal(sp->timeline);
498 return expired;
499}
500
501static int gk20a_channel_semaphore_wait_syncpt( 415static int gk20a_channel_semaphore_wait_syncpt(
502 struct gk20a_channel_sync *s, u32 id, 416 struct gk20a_channel_sync *s, u32 id,
503 u32 thresh, struct priv_cmd_entry **entry, 417 u32 thresh, struct priv_cmd_entry **entry,
504 struct gk20a_channel_fence *fence) 418 struct gk20a_fence **fence)
505{ 419{
506 struct gk20a_channel_semaphore *sema = 420 struct gk20a_channel_semaphore *sema =
507 container_of(s, struct gk20a_channel_semaphore, ops); 421 container_of(s, struct gk20a_channel_semaphore, ops);
@@ -513,7 +427,7 @@ static int gk20a_channel_semaphore_wait_syncpt(
513static int gk20a_channel_semaphore_wait_fd( 427static int gk20a_channel_semaphore_wait_fd(
514 struct gk20a_channel_sync *s, int fd, 428 struct gk20a_channel_sync *s, int fd,
515 struct priv_cmd_entry **entry, 429 struct priv_cmd_entry **entry,
516 struct gk20a_channel_fence *fence) 430 struct gk20a_fence **fence)
517{ 431{
518 struct gk20a_channel_semaphore *sema = 432 struct gk20a_channel_semaphore *sema =
519 container_of(s, struct gk20a_channel_semaphore, ops); 433 container_of(s, struct gk20a_channel_semaphore, ops);
@@ -558,6 +472,11 @@ static int gk20a_channel_semaphore_wait_fd(
558 WARN_ON(written != wait_cmd->size); 472 WARN_ON(written != wait_cmd->size);
559 sync_fence_wait_async(sync_fence, &w->waiter); 473 sync_fence_wait_async(sync_fence, &w->waiter);
560 474
475 /* XXX - this fixes an actual bug, we need to hold a ref to this
476 semaphore while the job is in flight. */
477 *fence = gk20a_fence_from_semaphore(sema->timeline, w->sema,
478 &c->semaphore_wq,
479 NULL, false);
561 *entry = wait_cmd; 480 *entry = wait_cmd;
562 return 0; 481 return 0;
563fail: 482fail:
@@ -575,8 +494,9 @@ fail:
575 494
576static int __gk20a_channel_semaphore_incr( 495static int __gk20a_channel_semaphore_incr(
577 struct gk20a_channel_sync *s, bool wfi_cmd, 496 struct gk20a_channel_sync *s, bool wfi_cmd,
497 struct sync_fence *dependency,
578 struct priv_cmd_entry **entry, 498 struct priv_cmd_entry **entry,
579 struct gk20a_channel_fence *fence) 499 struct gk20a_fence **fence)
580{ 500{
581 u64 va; 501 u64 va;
582 int incr_cmd_size; 502 int incr_cmd_size;
@@ -608,9 +528,9 @@ static int __gk20a_channel_semaphore_incr(
608 written = add_sema_cmd(incr_cmd->ptr, va, 1, false, wfi_cmd); 528 written = add_sema_cmd(incr_cmd->ptr, va, 1, false, wfi_cmd);
609 WARN_ON(written != incr_cmd_size); 529 WARN_ON(written != incr_cmd_size);
610 530
611 fence->valid = true; 531 *fence = gk20a_fence_from_semaphore(sp->timeline, semaphore,
612 fence->wfi = wfi_cmd; 532 &c->semaphore_wq,
613 fence->semaphore = semaphore; 533 dependency, wfi_cmd);
614 *entry = incr_cmd; 534 *entry = incr_cmd;
615 return 0; 535 return 0;
616} 536}
@@ -618,72 +538,54 @@ static int __gk20a_channel_semaphore_incr(
618static int gk20a_channel_semaphore_incr_wfi( 538static int gk20a_channel_semaphore_incr_wfi(
619 struct gk20a_channel_sync *s, 539 struct gk20a_channel_sync *s,
620 struct priv_cmd_entry **entry, 540 struct priv_cmd_entry **entry,
621 struct gk20a_channel_fence *fence) 541 struct gk20a_fence **fence)
622{ 542{
623 return __gk20a_channel_semaphore_incr(s, 543 return __gk20a_channel_semaphore_incr(s,
624 true /* wfi */, 544 true /* wfi */,
545 NULL,
625 entry, fence); 546 entry, fence);
626} 547}
627 548
628static int gk20a_channel_semaphore_incr( 549static int gk20a_channel_semaphore_incr(
629 struct gk20a_channel_sync *s, 550 struct gk20a_channel_sync *s,
630 struct priv_cmd_entry **entry, 551 struct priv_cmd_entry **entry,
631 struct gk20a_channel_fence *fence) 552 struct gk20a_fence **fence)
632{ 553{
633 /* Don't put wfi cmd to this one since we're not returning 554 /* Don't put wfi cmd to this one since we're not returning
634 * a fence to user space. */ 555 * a fence to user space. */
635 return __gk20a_channel_semaphore_incr(s, false /* no wfi */, 556 return __gk20a_channel_semaphore_incr(s, false /* no wfi */,
636 entry, fence); 557 NULL, entry, fence);
637}
638
639static int gk20a_channel_semaphore_incr_user_syncpt(
640 struct gk20a_channel_sync *s,
641 struct priv_cmd_entry **entry,
642 struct gk20a_channel_fence *fence,
643 bool wfi,
644 u32 *id, u32 *thresh)
645{
646 struct gk20a_channel_semaphore *sema =
647 container_of(s, struct gk20a_channel_semaphore, ops);
648 struct device *dev = dev_from_gk20a(sema->c->g);
649 gk20a_err(dev, "trying to use syncpoint synchronization");
650 return -ENODEV;
651} 558}
652 559
653static int gk20a_channel_semaphore_incr_user_fd( 560static int gk20a_channel_semaphore_incr_user(
654 struct gk20a_channel_sync *s, 561 struct gk20a_channel_sync *s,
655 int wait_fence_fd, 562 int wait_fence_fd,
656 struct priv_cmd_entry **entry, 563 struct priv_cmd_entry **entry,
657 struct gk20a_channel_fence *fence, 564 struct gk20a_fence **fence,
658 bool wfi, 565 bool wfi)
659 int *fd)
660{ 566{
661 struct gk20a_channel_semaphore *sema =
662 container_of(s, struct gk20a_channel_semaphore, ops);
663#ifdef CONFIG_SYNC 567#ifdef CONFIG_SYNC
664 struct sync_fence *dependency = NULL; 568 struct sync_fence *dependency = NULL;
665 int err; 569 int err;
666 570
667 err = __gk20a_channel_semaphore_incr(s, wfi,
668 entry, fence);
669 if (err)
670 return err;
671
672 if (wait_fence_fd >= 0) { 571 if (wait_fence_fd >= 0) {
673 dependency = gk20a_sync_fence_fdget(wait_fence_fd); 572 dependency = gk20a_sync_fence_fdget(wait_fence_fd);
674 if (!dependency) 573 if (!dependency)
675 return -EINVAL; 574 return -EINVAL;
676 } 575 }
677 576
678 *fd = gk20a_sync_fence_create(sema->timeline, fence->semaphore, 577 err = __gk20a_channel_semaphore_incr(s, wfi, dependency,
679 dependency, "fence"); 578 entry, fence);
680 if (*fd < 0) { 579 if (err) {
681 if (dependency) 580 if (dependency)
682 sync_fence_put(dependency); 581 sync_fence_put(dependency);
683 return *fd; 582 return err;
684 } 583 }
584
685 return 0; 585 return 0;
686#else 586#else
587 struct gk20a_channel_semaphore *sema =
588 container_of(s, struct gk20a_channel_semaphore, ops);
687 gk20a_err(dev_from_gk20a(sema->c->g), 589 gk20a_err(dev_from_gk20a(sema->c->g),
688 "trying to use sync fds with CONFIG_SYNC disabled"); 590 "trying to use sync fds with CONFIG_SYNC disabled");
689 return -ENODEV; 591 return -ENODEV;
@@ -695,6 +597,14 @@ static void gk20a_channel_semaphore_set_min_eq_max(struct gk20a_channel_sync *s)
695 /* Nothing to do. */ 597 /* Nothing to do. */
696} 598}
697 599
600static void gk20a_channel_semaphore_signal_timeline(
601 struct gk20a_channel_sync *s)
602{
603 struct gk20a_channel_semaphore *sp =
604 container_of(s, struct gk20a_channel_semaphore, ops);
605 gk20a_sync_timeline_signal(sp->timeline);
606}
607
698static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s) 608static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s)
699{ 609{
700 struct gk20a_channel_semaphore *sema = 610 struct gk20a_channel_semaphore *sema =
@@ -746,15 +656,13 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c)
746 if (!sema->timeline) 656 if (!sema->timeline)
747 goto clean_up; 657 goto clean_up;
748#endif 658#endif
749 sema->ops.wait_cpu = gk20a_channel_semaphore_wait_cpu;
750 sema->ops.is_expired = gk20a_channel_semaphore_is_expired;
751 sema->ops.wait_syncpt = gk20a_channel_semaphore_wait_syncpt; 659 sema->ops.wait_syncpt = gk20a_channel_semaphore_wait_syncpt;
752 sema->ops.wait_fd = gk20a_channel_semaphore_wait_fd; 660 sema->ops.wait_fd = gk20a_channel_semaphore_wait_fd;
753 sema->ops.incr = gk20a_channel_semaphore_incr; 661 sema->ops.incr = gk20a_channel_semaphore_incr;
754 sema->ops.incr_wfi = gk20a_channel_semaphore_incr_wfi; 662 sema->ops.incr_wfi = gk20a_channel_semaphore_incr_wfi;
755 sema->ops.incr_user_syncpt = gk20a_channel_semaphore_incr_user_syncpt; 663 sema->ops.incr_user = gk20a_channel_semaphore_incr_user;
756 sema->ops.incr_user_fd = gk20a_channel_semaphore_incr_user_fd;
757 sema->ops.set_min_eq_max = gk20a_channel_semaphore_set_min_eq_max; 664 sema->ops.set_min_eq_max = gk20a_channel_semaphore_set_min_eq_max;
665 sema->ops.signal_timeline = gk20a_channel_semaphore_signal_timeline;
758 sema->ops.destroy = gk20a_channel_semaphore_destroy; 666 sema->ops.destroy = gk20a_channel_semaphore_destroy;
759 667
760 /* Aggressively destroying the semaphore sync would cause overhead 668 /* Aggressively destroying the semaphore sync would cause overhead
@@ -775,26 +683,3 @@ struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c)
775#endif 683#endif
776 return gk20a_channel_semaphore_create(c); 684 return gk20a_channel_semaphore_create(c);
777} 685}
778
779static inline bool gk20a_channel_fence_is_closed(struct gk20a_channel_fence *f)
780{
781 if (f->valid || f->semaphore)
782 return false;
783 return true;
784}
785
786void gk20a_channel_fence_close(struct gk20a_channel_fence *f)
787{
788 if (f->semaphore)
789 gk20a_semaphore_put(f->semaphore);
790 memset(f, 0, sizeof(*f));
791}
792
793void gk20a_channel_fence_dup(struct gk20a_channel_fence *from,
794 struct gk20a_channel_fence *to)
795{
796 WARN_ON(!gk20a_channel_fence_is_closed(to));
797 *to = *from;
798 if (to->semaphore)
799 gk20a_semaphore_get(to->semaphore);
800}
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
index baa4a151..a3cd8208 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -24,34 +24,28 @@ struct gk20a_channel_sync;
24struct priv_cmd_entry; 24struct priv_cmd_entry;
25struct channel_gk20a; 25struct channel_gk20a;
26struct gk20a_semaphore; 26struct gk20a_semaphore;
27 27struct gk20a_fence;
28struct gk20a_channel_fence {
29 bool valid;
30 bool wfi; /* was issued with preceding wfi */
31 u32 thresh; /* syncpoint fences only */
32 struct gk20a_semaphore *semaphore; /* semaphore fences only */
33};
34 28
35struct gk20a_channel_sync { 29struct gk20a_channel_sync {
36 /* CPU wait for a fence returned by incr_syncpt() or incr_fd(). */ 30 /* Generate a gpu wait cmdbuf from syncpoint.
37 int (*wait_cpu)(struct gk20a_channel_sync *s, 31 * Returns
38 struct gk20a_channel_fence *fence, 32 * - a gpu cmdbuf that performs the wait when executed,
39 int timeout); 33 * - possibly a helper fence that the caller must hold until the
40 34 * cmdbuf is executed.
41 /* Test whether a fence returned by incr_syncpt() or incr_fd() is 35 */
42 * expired. */
43 bool (*is_expired)(struct gk20a_channel_sync *s,
44 struct gk20a_channel_fence *fence);
45
46 /* Generate a gpu wait cmdbuf from syncpoint. */
47 int (*wait_syncpt)(struct gk20a_channel_sync *s, u32 id, u32 thresh, 36 int (*wait_syncpt)(struct gk20a_channel_sync *s, u32 id, u32 thresh,
48 struct priv_cmd_entry **entry, 37 struct priv_cmd_entry **entry,
49 struct gk20a_channel_fence *fence); 38 struct gk20a_fence **fence);
50 39
51 /* Generate a gpu wait cmdbuf from sync fd. */ 40 /* Generate a gpu wait cmdbuf from sync fd.
41 * Returns
42 * - a gpu cmdbuf that performs the wait when executed,
43 * - possibly a helper fence that the caller must hold until the
44 * cmdbuf is executed.
45 */
52 int (*wait_fd)(struct gk20a_channel_sync *s, int fd, 46 int (*wait_fd)(struct gk20a_channel_sync *s, int fd,
53 struct priv_cmd_entry **entry, 47 struct priv_cmd_entry **entry,
54 struct gk20a_channel_fence *fence); 48 struct gk20a_fence **fence);
55 49
56 /* Increment syncpoint/semaphore. 50 /* Increment syncpoint/semaphore.
57 * Returns 51 * Returns
@@ -60,7 +54,7 @@ struct gk20a_channel_sync {
60 */ 54 */
61 int (*incr)(struct gk20a_channel_sync *s, 55 int (*incr)(struct gk20a_channel_sync *s,
62 struct priv_cmd_entry **entry, 56 struct priv_cmd_entry **entry,
63 struct gk20a_channel_fence *fence); 57 struct gk20a_fence **fence);
64 58
65 /* Increment syncpoint/semaphore, preceded by a wfi. 59 /* Increment syncpoint/semaphore, preceded by a wfi.
66 * Returns 60 * Returns
@@ -69,38 +63,29 @@ struct gk20a_channel_sync {
69 */ 63 */
70 int (*incr_wfi)(struct gk20a_channel_sync *s, 64 int (*incr_wfi)(struct gk20a_channel_sync *s,
71 struct priv_cmd_entry **entry, 65 struct priv_cmd_entry **entry,
72 struct gk20a_channel_fence *fence); 66 struct gk20a_fence **fence);
73
74 /* Increment syncpoint, so that the returned fence represents
75 * work completion (may need wfi) and can be returned to user space.
76 * Returns
77 * - a gpu cmdbuf that performs the increment when executed,
78 * - a fence that can be passed to wait_cpu() and is_expired(),
79 * - a syncpoint id/value pair that can be returned to user space.
80 */
81 int (*incr_user_syncpt)(struct gk20a_channel_sync *s,
82 struct priv_cmd_entry **entry,
83 struct gk20a_channel_fence *fence,
84 bool wfi,
85 u32 *id, u32 *thresh);
86 67
87 /* Increment syncpoint/semaphore, so that the returned fence represents 68 /* Increment syncpoint/semaphore, so that the returned fence represents
88 * work completion (may need wfi) and can be returned to user space. 69 * work completion (may need wfi) and can be returned to user space.
89 * Returns 70 * Returns
90 * - a gpu cmdbuf that performs the increment when executed, 71 * - a gpu cmdbuf that performs the increment when executed,
91 * - a fence that can be passed to wait_cpu() and is_expired(), 72 * - a fence that can be passed to wait_cpu() and is_expired(),
92 * - a sync fd that can be returned to user space. 73 * - a gk20a_fence that signals when the incr has happened.
93 */ 74 */
94 int (*incr_user_fd)(struct gk20a_channel_sync *s, 75 int (*incr_user)(struct gk20a_channel_sync *s,
95 int wait_fence_fd, 76 int wait_fence_fd,
96 struct priv_cmd_entry **entry, 77 struct priv_cmd_entry **entry,
97 struct gk20a_channel_fence *fence, 78 struct gk20a_fence **fence,
98 bool wfi, 79 bool wfi);
99 int *fd);
100 80
101 /* Reset the channel syncpoint/semaphore. */ 81 /* Reset the channel syncpoint/semaphore. */
102 void (*set_min_eq_max)(struct gk20a_channel_sync *s); 82 void (*set_min_eq_max)(struct gk20a_channel_sync *s);
103 83
84 /* Signals the sync timeline (if owned by the gk20a_channel_sync layer).
85 * This should be called when we notice that a gk20a_fence is
86 * expired. */
87 void (*signal_timeline)(struct gk20a_channel_sync *s);
88
104 /* flag to set sync destroy aggressiveness */ 89 /* flag to set sync destroy aggressiveness */
105 bool aggressive_destroy; 90 bool aggressive_destroy;
106 91
@@ -110,7 +95,4 @@ struct gk20a_channel_sync {
110 95
111struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c); 96struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c);
112 97
113void gk20a_channel_fence_close(struct gk20a_channel_fence *f);
114void gk20a_channel_fence_dup(struct gk20a_channel_fence *from,
115 struct gk20a_channel_fence *to);
116#endif 98#endif
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
new file mode 100644
index 00000000..1a28e660
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -0,0 +1,229 @@
1/*
2 * drivers/video/tegra/host/gk20a/fence_gk20a.c
3 *
4 * GK20A Fences
5 *
6 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 */
17
18#include "fence_gk20a.h"
19
20#include <linux/gk20a.h>
21#include <linux/file.h>
22
23#include "gk20a.h"
24#include "semaphore_gk20a.h"
25#include "channel_gk20a.h"
26#include "sync_gk20a.h"
27
28#ifdef CONFIG_SYNC
29#include "../../../staging/android/sync.h"
30#endif
31
32#ifdef CONFIG_TEGRA_GK20A
33#include <linux/nvhost.h>
34#endif
35
36struct gk20a_fence_ops {
37 int (*wait)(struct gk20a_fence *, int timeout);
38 bool (*is_expired)(struct gk20a_fence *);
39 void *(*free)(struct kref *);
40};
41
42static void gk20a_fence_free(struct kref *ref)
43{
44 struct gk20a_fence *f =
45 container_of(ref, struct gk20a_fence, ref);
46#ifdef CONFIG_SYNC
47 if (f->sync_fence)
48 sync_fence_put(f->sync_fence);
49#endif
50 if (f->semaphore)
51 gk20a_semaphore_put(f->semaphore);
52 kfree(f);
53}
54
55void gk20a_fence_put(struct gk20a_fence *f)
56{
57 if (f)
58 kref_put(&f->ref, gk20a_fence_free);
59}
60
61struct gk20a_fence *gk20a_fence_get(struct gk20a_fence *f)
62{
63 if (f)
64 kref_get(&f->ref);
65 return f;
66}
67
68int gk20a_fence_wait(struct gk20a_fence *f, int timeout)
69{
70 return f->ops->wait(f, timeout);
71}
72
73bool gk20a_fence_is_expired(struct gk20a_fence *f)
74{
75 return f->ops->is_expired(f);
76}
77
78int gk20a_fence_install_fd(struct gk20a_fence *f)
79{
80#ifdef CONFIG_SYNC
81 int fd;
82
83 if (!f->sync_fence)
84 return -EINVAL;
85
86 fd = get_unused_fd();
87 if (fd < 0)
88 return fd;
89
90 sync_fence_get(f->sync_fence);
91 sync_fence_install(f->sync_fence, fd);
92 return fd;
93#else
94 return -ENODEV;
95#endif
96}
97
98static struct gk20a_fence *alloc_fence(const struct gk20a_fence_ops *ops,
99 struct sync_fence *sync_fence, bool wfi)
100{
101 struct gk20a_fence *f = kzalloc(sizeof(*f), GFP_KERNEL);
102 if (!f)
103 return NULL;
104 kref_init(&f->ref);
105 f->ops = ops;
106 f->sync_fence = sync_fence;
107 f->wfi = wfi;
108 f->syncpt_id = -1;
109 return f;
110}
111
112/* Fences that are backed by GPU semaphores: */
113
114static int gk20a_semaphore_fence_wait(struct gk20a_fence *f, int timeout)
115{
116 int remain;
117
118 if (!gk20a_semaphore_is_acquired(f->semaphore))
119 return 0;
120
121 remain = wait_event_interruptible_timeout(
122 *f->semaphore_wq,
123 !gk20a_semaphore_is_acquired(f->semaphore),
124 timeout);
125 if (remain == 0 && gk20a_semaphore_is_acquired(f->semaphore))
126 return -ETIMEDOUT;
127 else if (remain < 0)
128 return remain;
129 return 0;
130}
131
132static bool gk20a_semaphore_fence_is_expired(struct gk20a_fence *f)
133{
134 return !gk20a_semaphore_is_acquired(f->semaphore);
135}
136
137static const struct gk20a_fence_ops gk20a_semaphore_fence_ops = {
138 .wait = &gk20a_semaphore_fence_wait,
139 .is_expired = &gk20a_semaphore_fence_is_expired,
140};
141
142struct gk20a_fence *gk20a_fence_from_semaphore(
143 struct sync_timeline *timeline,
144 struct gk20a_semaphore *semaphore,
145 wait_queue_head_t *semaphore_wq,
146 struct sync_fence *dependency,
147 bool wfi)
148{
149 struct gk20a_fence *f;
150 struct sync_fence *sync_fence = NULL;
151
152#ifdef CONFIG_SYNC
153 sync_fence = gk20a_sync_fence_create(timeline, semaphore,
154 dependency, "fence");
155 if (!sync_fence)
156 return NULL;
157#endif
158
159 f = alloc_fence(&gk20a_semaphore_fence_ops, sync_fence, wfi);
160 if (!f) {
161#ifdef CONFIG_SYNC
162 sync_fence_put(sync_fence);
163#endif
164 return NULL;
165 }
166 gk20a_semaphore_get(semaphore);
167 f->semaphore = semaphore;
168 f->semaphore_wq = semaphore_wq;
169 return f;
170}
171
172#ifdef CONFIG_TEGRA_GK20A
173/* Fences that are backed by host1x syncpoints: */
174
175static int gk20a_syncpt_fence_wait(struct gk20a_fence *f, int timeout)
176{
177 return nvhost_syncpt_wait_timeout_ext(
178 f->host1x_pdev, f->syncpt_id, f->syncpt_value,
179 timeout, NULL, NULL);
180}
181
182static bool gk20a_syncpt_fence_is_expired(struct gk20a_fence *f)
183{
184 return nvhost_syncpt_is_expired_ext(f->host1x_pdev, f->syncpt_id,
185 f->syncpt_value);
186}
187
188static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = {
189 .wait = &gk20a_syncpt_fence_wait,
190 .is_expired = &gk20a_syncpt_fence_is_expired,
191};
192
193struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
194 u32 id, u32 value, bool wfi)
195{
196 struct gk20a_fence *f;
197 struct sync_fence *sync_fence = NULL;
198
199#ifdef CONFIG_SYNC
200 struct nvhost_ctrl_sync_fence_info pt = {
201 .id = id,
202 .thresh = value
203 };
204
205 sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1,
206 "fence");
207 if (!sync_fence)
208 return NULL;
209#endif
210
211 f = alloc_fence(&gk20a_syncpt_fence_ops, sync_fence, wfi);
212 if (!f) {
213#ifdef CONFIG_SYNC
214 sync_fence_put(sync_fence);
215#endif
216 return NULL;
217 }
218 f->host1x_pdev = host1x_pdev;
219 f->syncpt_id = id;
220 f->syncpt_value = value;
221 return f;
222}
223#else
224struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
225 u32 id, u32 value, bool wfi)
226{
227 return NULL;
228}
229#endif
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
new file mode 100644
index 00000000..629dc694
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
@@ -0,0 +1,68 @@
1/*
2 * drivers/video/tegra/host/gk20a/fence_gk20a.h
3 *
4 * GK20A Fences
5 *
6 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 */
17#ifndef _GK20A_FENCE_H_
18#define _GK20A_FENCE_H_
19
20#include <linux/types.h>
21#include <linux/kref.h>
22#include <linux/wait.h>
23
24struct platform_device;
25struct sync_timeline;
26struct sync_fence;
27struct gk20a_semaphore;
28struct channel_gk20a;
29
30struct gk20a_fence_ops;
31
32struct gk20a_fence {
33 /* Valid for all fence types: */
34 struct kref ref;
35 bool wfi;
36 struct sync_fence *sync_fence;
37 const struct gk20a_fence_ops *ops;
38
39 /* Valid for fences created from semaphores: */
40 struct gk20a_semaphore *semaphore;
41 wait_queue_head_t *semaphore_wq;
42
43 /* Valid for fences created from syncpoints: */
44 struct platform_device *host1x_pdev;
45 u32 syncpt_id;
46 u32 syncpt_value;
47};
48
49/* Fences can be created from semaphores or syncpoint (id, value) pairs */
50struct gk20a_fence *gk20a_fence_from_semaphore(
51 struct sync_timeline *timeline,
52 struct gk20a_semaphore *semaphore,
53 wait_queue_head_t *semaphore_wq,
54 struct sync_fence *dependency,
55 bool wfi);
56
57struct gk20a_fence *gk20a_fence_from_syncpt(
58 struct platform_device *host1x_pdev,
59 u32 id, u32 value, bool wfi);
60
61/* Fence operations */
62void gk20a_fence_put(struct gk20a_fence *f);
63struct gk20a_fence *gk20a_fence_get(struct gk20a_fence *f);
64int gk20a_fence_wait(struct gk20a_fence *f, int timeout);
65bool gk20a_fence_is_expired(struct gk20a_fence *f);
66int gk20a_fence_install_fd(struct gk20a_fence *f);
67
68#endif
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
index 1f2eae1a..da9a0f5e 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -371,12 +371,11 @@ struct sync_timeline *gk20a_sync_timeline_create(
371 return &obj->obj; 371 return &obj->obj;
372} 372}
373 373
374int gk20a_sync_fence_create(struct sync_timeline *obj, 374struct sync_fence *gk20a_sync_fence_create(struct sync_timeline *obj,
375 struct gk20a_semaphore *sema, 375 struct gk20a_semaphore *sema,
376 struct sync_fence *dependency, 376 struct sync_fence *dependency,
377 const char *fmt, ...) 377 const char *fmt, ...)
378{ 378{
379 int fd;
380 char name[30]; 379 char name[30];
381 va_list args; 380 va_list args;
382 struct sync_pt *pt; 381 struct sync_pt *pt;
@@ -385,7 +384,7 @@ int gk20a_sync_fence_create(struct sync_timeline *obj,
385 384
386 pt = gk20a_sync_pt_create_inst(timeline, sema, dependency); 385 pt = gk20a_sync_pt_create_inst(timeline, sema, dependency);
387 if (pt == NULL) 386 if (pt == NULL)
388 return -ENOMEM; 387 return NULL;
389 388
390 va_start(args, fmt); 389 va_start(args, fmt);
391 vsnprintf(name, sizeof(name), fmt, args); 390 vsnprintf(name, sizeof(name), fmt, args);
@@ -394,15 +393,7 @@ int gk20a_sync_fence_create(struct sync_timeline *obj,
394 fence = sync_fence_create(name, pt); 393 fence = sync_fence_create(name, pt);
395 if (fence == NULL) { 394 if (fence == NULL) {
396 sync_pt_free(pt); 395 sync_pt_free(pt);
397 return -ENOMEM; 396 return NULL;
398 }
399
400 fd = get_unused_fd();
401 if (fd < 0) {
402 sync_fence_put(fence);
403 return fd;
404 } 397 }
405 398 return fence;
406 sync_fence_install(fence, fd);
407 return fd;
408} 399}
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
index 49597f06..e9c26221 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
@@ -29,7 +29,7 @@ struct gk20a_semaphore;
29struct sync_timeline *gk20a_sync_timeline_create(const char *fmt, ...); 29struct sync_timeline *gk20a_sync_timeline_create(const char *fmt, ...);
30void gk20a_sync_timeline_destroy(struct sync_timeline *); 30void gk20a_sync_timeline_destroy(struct sync_timeline *);
31void gk20a_sync_timeline_signal(struct sync_timeline *); 31void gk20a_sync_timeline_signal(struct sync_timeline *);
32int gk20a_sync_fence_create(struct sync_timeline *, 32struct sync_fence *gk20a_sync_fence_create(struct sync_timeline *,
33 struct gk20a_semaphore *, 33 struct gk20a_semaphore *,
34 struct sync_fence *dependency, 34 struct sync_fence *dependency,
35 const char *fmt, ...); 35 const char *fmt, ...);