summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
diff options
context:
space:
mode:
authorLauri Peltonen <lpeltonen@nvidia.com>2014-02-25 06:31:47 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:10:08 -0400
commite204224b26e6b5f609bc4e542368c1a13aeece61 (patch)
tree9d351eb734a54ff677a2f26fec2d6f96adc1e220 /drivers/gpu/nvgpu/gk20a/channel_gk20a.c
parent4d278fdfd70082da3c020a15ba0dd722f9da1b3b (diff)
gpu: nvgpu: Add semaphore based gk20a_channel_sync
Add semaphore implementation of the gk20a_channel_sync interface. Each channel has one semaphore pool, which is mapped as read-write to the channel vm. We allocate one or two semaphores from the pool for each submit. The first semaphore is only needed if we need to wait for an opaque sync fd. In that case, we allocate the semaphore, and ask GPU to wait for it's value to become 1 (semaphore acquire method). We also queue a kernel work that waits on the fence fd, and subsequently releases the semaphore (sets its value to 1) so that the command buffer can proceed. The second semaphore is used on every submit, and is used for work completion tracking. The GPU sets its value to 1 when the command buffer has been processed. The channel jobs need to hold references to both semaphores so that their backing semaphore pool slots are not reused while the job is in flight. Therefore gk20a_channel_fence will keep a reference to the semaphore that it represents (channel fences are stored in the job structure). This means that we must diligently close and dup the gk20a_channel_fence objects to avoid leaking semaphores. Bug 1450122 Bug 1445450 Change-Id: Ib61091a1b7632fa36efe0289011040ef7c4ae8f8 Signed-off-by: Lauri Peltonen <lpeltonen@nvidia.com> Reviewed-on: http://git-master/r/374844 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c79
1 files changed, 53 insertions, 26 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 68a30392..651ea08c 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -33,6 +33,7 @@
33 33
34#include "gk20a.h" 34#include "gk20a.h"
35#include "dbg_gpu_gk20a.h" 35#include "dbg_gpu_gk20a.h"
36#include "semaphore_gk20a.h"
36 37
37#include "hw_ram_gk20a.h" 38#include "hw_ram_gk20a.h"
38#include "hw_fifo_gk20a.h" 39#include "hw_fifo_gk20a.h"
@@ -340,7 +341,7 @@ static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
340 * resource at this point 341 * resource at this point
341 * if not, then it will be destroyed at channel_free() 342 * if not, then it will be destroyed at channel_free()
342 */ 343 */
343 if (ch_gk20a->sync && ch_gk20a->sync->syncpt_aggressive_destroy) { 344 if (ch_gk20a->sync && ch_gk20a->sync->aggressive_destroy) {
344 ch_gk20a->sync->destroy(ch_gk20a->sync); 345 ch_gk20a->sync->destroy(ch_gk20a->sync);
345 ch_gk20a->sync = NULL; 346 ch_gk20a->sync = NULL;
346 } 347 }
@@ -657,6 +658,8 @@ unbind:
657 ch->vpr = false; 658 ch->vpr = false;
658 ch->vm = NULL; 659 ch->vm = NULL;
659 660
661 gk20a_channel_fence_close(&ch->last_submit.pre_fence);
662 gk20a_channel_fence_close(&ch->last_submit.post_fence);
660 if (ch->sync) { 663 if (ch->sync) {
661 ch->sync->destroy(ch->sync); 664 ch->sync->destroy(ch->sync);
662 ch->sync = NULL; 665 ch->sync = NULL;
@@ -1089,7 +1092,8 @@ static int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1089 ch_vm = c->vm; 1092 ch_vm = c->vm;
1090 1093
1091 c->cmds_pending = false; 1094 c->cmds_pending = false;
1092 c->last_submit_fence.valid = false; 1095 gk20a_channel_fence_close(&c->last_submit.pre_fence);
1096 gk20a_channel_fence_close(&c->last_submit.post_fence);
1093 1097
1094 c->ramfc.offset = 0; 1098 c->ramfc.offset = 0;
1095 c->ramfc.size = ram_in_ramfc_s() / 8; 1099 c->ramfc.size = ram_in_ramfc_s() / 8;
@@ -1272,13 +1276,16 @@ static int gk20a_channel_submit_wfi(struct channel_gk20a *c)
1272 } 1276 }
1273 } 1277 }
1274 1278
1275 err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit_fence); 1279 gk20a_channel_fence_close(&c->last_submit.pre_fence);
1280 gk20a_channel_fence_close(&c->last_submit.post_fence);
1281
1282 err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit.post_fence);
1276 if (unlikely(err)) { 1283 if (unlikely(err)) {
1277 mutex_unlock(&c->submit_lock); 1284 mutex_unlock(&c->submit_lock);
1278 return err; 1285 return err;
1279 } 1286 }
1280 1287
1281 WARN_ON(!c->last_submit_fence.wfi); 1288 WARN_ON(!c->last_submit.post_fence.wfi);
1282 1289
1283 c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva); 1290 c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva);
1284 c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) | 1291 c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) |
@@ -1344,7 +1351,8 @@ static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g)
1344} 1351}
1345 1352
1346static int gk20a_channel_add_job(struct channel_gk20a *c, 1353static int gk20a_channel_add_job(struct channel_gk20a *c,
1347 struct gk20a_channel_fence *fence) 1354 struct gk20a_channel_fence *pre_fence,
1355 struct gk20a_channel_fence *post_fence)
1348{ 1356{
1349 struct vm_gk20a *vm = c->vm; 1357 struct vm_gk20a *vm = c->vm;
1350 struct channel_gk20a_job *job = NULL; 1358 struct channel_gk20a_job *job = NULL;
@@ -1369,7 +1377,8 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
1369 1377
1370 job->num_mapped_buffers = num_mapped_buffers; 1378 job->num_mapped_buffers = num_mapped_buffers;
1371 job->mapped_buffers = mapped_buffers; 1379 job->mapped_buffers = mapped_buffers;
1372 job->fence = *fence; 1380 gk20a_channel_fence_dup(pre_fence, &job->pre_fence);
1381 gk20a_channel_fence_dup(post_fence, &job->post_fence);
1373 1382
1374 mutex_lock(&c->jobs_lock); 1383 mutex_lock(&c->jobs_lock);
1375 list_add_tail(&job->list, &c->jobs); 1384 list_add_tail(&job->list, &c->jobs);
@@ -1391,13 +1400,18 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1391 mutex_lock(&c->jobs_lock); 1400 mutex_lock(&c->jobs_lock);
1392 list_for_each_entry_safe(job, n, &c->jobs, list) { 1401 list_for_each_entry_safe(job, n, &c->jobs, list) {
1393 bool completed = WARN_ON(!c->sync) || 1402 bool completed = WARN_ON(!c->sync) ||
1394 c->sync->is_expired(c->sync, &job->fence); 1403 c->sync->is_expired(c->sync, &job->post_fence);
1395 if (!completed) 1404 if (!completed)
1396 break; 1405 break;
1397 1406
1398 gk20a_vm_put_buffers(vm, job->mapped_buffers, 1407 gk20a_vm_put_buffers(vm, job->mapped_buffers,
1399 job->num_mapped_buffers); 1408 job->num_mapped_buffers);
1400 1409
1410 /* Close the fences (this will unref the semaphores and release
1411 * them to the pool). */
1412 gk20a_channel_fence_close(&job->pre_fence);
1413 gk20a_channel_fence_close(&job->post_fence);
1414
1401 /* job is done. release its reference to vm */ 1415 /* job is done. release its reference to vm */
1402 gk20a_vm_put(vm); 1416 gk20a_vm_put(vm);
1403 1417
@@ -1413,8 +1427,8 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1413 * the sync resource 1427 * the sync resource
1414 */ 1428 */
1415 if (list_empty(&c->jobs)) { 1429 if (list_empty(&c->jobs)) {
1416 if (c->sync && c->sync->syncpt_aggressive_destroy && 1430 if (c->sync && c->sync->aggressive_destroy &&
1417 c->sync->is_expired(c->sync, &c->last_submit_fence)) { 1431 c->sync->is_expired(c->sync, &c->last_submit.post_fence)) {
1418 c->sync->destroy(c->sync); 1432 c->sync->destroy(c->sync);
1419 c->sync = NULL; 1433 c->sync = NULL;
1420 } 1434 }
@@ -1448,8 +1462,11 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1448 struct device *d = dev_from_gk20a(g); 1462 struct device *d = dev_from_gk20a(g);
1449 int err = 0; 1463 int err = 0;
1450 int i; 1464 int i;
1465 int wait_fence_fd = -1;
1451 struct priv_cmd_entry *wait_cmd = NULL; 1466 struct priv_cmd_entry *wait_cmd = NULL;
1452 struct priv_cmd_entry *incr_cmd = NULL; 1467 struct priv_cmd_entry *incr_cmd = NULL;
1468 struct gk20a_channel_fence pre_fence = { 0 };
1469 struct gk20a_channel_fence post_fence = { 0 };
1453 /* we might need two extra gpfifo entries - one for pre fence 1470 /* we might need two extra gpfifo entries - one for pre fence
1454 * and one for post fence. */ 1471 * and one for post fence. */
1455 const int extra_entries = 2; 1472 const int extra_entries = 2;
@@ -1534,12 +1551,14 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1534 * keep running some tests which trigger this condition 1551 * keep running some tests which trigger this condition
1535 */ 1552 */
1536 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) { 1553 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1537 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) 1554 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
1538 err = c->sync->wait_fd(c->sync, fence->syncpt_id, 1555 wait_fence_fd = fence->syncpt_id;
1539 &wait_cmd); 1556 err = c->sync->wait_fd(c->sync, wait_fence_fd,
1540 else 1557 &wait_cmd, &pre_fence);
1558 } else {
1541 err = c->sync->wait_syncpt(c->sync, fence->syncpt_id, 1559 err = c->sync->wait_syncpt(c->sync, fence->syncpt_id,
1542 fence->value, &wait_cmd); 1560 fence->value, &wait_cmd, &pre_fence);
1561 }
1543 } 1562 }
1544 if (err) { 1563 if (err) {
1545 mutex_unlock(&c->submit_lock); 1564 mutex_unlock(&c->submit_lock);
@@ -1551,19 +1570,19 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1551 to keep track of method completion for idle railgating */ 1570 to keep track of method completion for idle railgating */
1552 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET && 1571 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET &&
1553 flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) 1572 flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
1554 err = c->sync->incr_user_fd(c->sync, &incr_cmd, 1573 err = c->sync->incr_user_fd(c->sync, wait_fence_fd, &incr_cmd,
1555 &c->last_submit_fence, 1574 &post_fence,
1556 need_wfi, 1575 need_wfi,
1557 &fence->syncpt_id); 1576 &fence->syncpt_id);
1558 else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET) 1577 else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1559 err = c->sync->incr_user_syncpt(c->sync, &incr_cmd, 1578 err = c->sync->incr_user_syncpt(c->sync, &incr_cmd,
1560 &c->last_submit_fence, 1579 &post_fence,
1561 need_wfi, 1580 need_wfi,
1562 &fence->syncpt_id, 1581 &fence->syncpt_id,
1563 &fence->value); 1582 &fence->value);
1564 else 1583 else
1565 err = c->sync->incr(c->sync, &incr_cmd, 1584 err = c->sync->incr(c->sync, &incr_cmd,
1566 &c->last_submit_fence); 1585 &post_fence);
1567 if (err) { 1586 if (err) {
1568 mutex_unlock(&c->submit_lock); 1587 mutex_unlock(&c->submit_lock);
1569 goto clean_up; 1588 goto clean_up;
@@ -1611,8 +1630,13 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1611 incr_cmd->gp_put = c->gpfifo.put; 1630 incr_cmd->gp_put = c->gpfifo.put;
1612 } 1631 }
1613 1632
1633 gk20a_channel_fence_close(&c->last_submit.pre_fence);
1634 gk20a_channel_fence_close(&c->last_submit.post_fence);
1635 c->last_submit.pre_fence = pre_fence;
1636 c->last_submit.post_fence = post_fence;
1637
1614 /* TODO! Check for errors... */ 1638 /* TODO! Check for errors... */
1615 gk20a_channel_add_job(c, &c->last_submit_fence); 1639 gk20a_channel_add_job(c, &pre_fence, &post_fence);
1616 1640
1617 c->cmds_pending = true; 1641 c->cmds_pending = true;
1618 gk20a_bar1_writel(g, 1642 gk20a_bar1_writel(g,
@@ -1637,6 +1661,8 @@ clean_up:
1637 gk20a_err(d, "fail"); 1661 gk20a_err(d, "fail");
1638 free_priv_cmdbuf(c, wait_cmd); 1662 free_priv_cmdbuf(c, wait_cmd);
1639 free_priv_cmdbuf(c, incr_cmd); 1663 free_priv_cmdbuf(c, incr_cmd);
1664 gk20a_channel_fence_close(&pre_fence);
1665 gk20a_channel_fence_close(&post_fence);
1640 gk20a_idle(g->dev); 1666 gk20a_idle(g->dev);
1641 return err; 1667 return err;
1642} 1668}
@@ -1669,6 +1695,7 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1669int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout) 1695int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
1670{ 1696{
1671 int err = 0; 1697 int err = 0;
1698 struct gk20a_channel_fence *fence = &ch->last_submit.post_fence;
1672 1699
1673 if (!ch->cmds_pending) 1700 if (!ch->cmds_pending)
1674 return 0; 1701 return 0;
@@ -1677,21 +1704,20 @@ int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
1677 if (ch->has_timedout) 1704 if (ch->has_timedout)
1678 return -ETIMEDOUT; 1705 return -ETIMEDOUT;
1679 1706
1680 if (!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)) { 1707 if (!(fence->valid && fence->wfi)) {
1681 gk20a_dbg_fn("issuing wfi, incr to finish the channel"); 1708 gk20a_dbg_fn("issuing wfi, incr to finish the channel");
1682 err = gk20a_channel_submit_wfi(ch); 1709 err = gk20a_channel_submit_wfi(ch);
1683 } 1710 }
1684 if (err) 1711 if (err)
1685 return err; 1712 return err;
1686 1713
1687 BUG_ON(!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)); 1714 BUG_ON(!(fence->valid && fence->wfi));
1688 1715
1689 gk20a_dbg_fn("waiting for channel to finish thresh:%d", 1716 gk20a_dbg_fn("waiting for channel to finish thresh:%d sema:%p",
1690 ch->last_submit_fence.thresh); 1717 fence->thresh, fence->semaphore);
1691 1718
1692 if (ch->sync) { 1719 if (ch->sync) {
1693 err = ch->sync->wait_cpu(ch->sync, &ch->last_submit_fence, 1720 err = ch->sync->wait_cpu(ch->sync, fence, timeout);
1694 timeout);
1695 if (WARN_ON(err)) 1721 if (WARN_ON(err))
1696 dev_warn(dev_from_gk20a(ch->g), 1722 dev_warn(dev_from_gk20a(ch->g),
1697 "timed out waiting for gk20a channel to finish"); 1723 "timed out waiting for gk20a channel to finish");
@@ -1900,7 +1926,8 @@ int gk20a_channel_suspend(struct gk20a *g)
1900 1926
1901 if (c->sync) 1927 if (c->sync)
1902 c->sync->wait_cpu(c->sync, 1928 c->sync->wait_cpu(c->sync,
1903 &c->last_submit_fence, 500000); 1929 &c->last_submit.post_fence,
1930 500000);
1904 break; 1931 break;
1905 } 1932 }
1906 } 1933 }