diff options
author | Lauri Peltonen <lpeltonen@nvidia.com> | 2014-02-25 06:31:47 -0500 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-03-18 15:10:08 -0400 |
commit | e204224b26e6b5f609bc4e542368c1a13aeece61 (patch) | |
tree | 9d351eb734a54ff677a2f26fec2d6f96adc1e220 /drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |
parent | 4d278fdfd70082da3c020a15ba0dd722f9da1b3b (diff) |
gpu: nvgpu: Add semaphore based gk20a_channel_sync
Add semaphore implementation of the gk20a_channel_sync interface.
Each channel has one semaphore pool, which is mapped as read-write to
the channel vm. We allocate one or two semaphores from the pool for each
submit.
The first semaphore is only needed if we need to wait for an opaque sync
fd. In that case, we allocate the semaphore, and ask GPU to wait for
it's value to become 1 (semaphore acquire method). We also queue a
kernel work that waits on the fence fd, and subsequently releases the
semaphore (sets its value to 1) so that the command buffer can proceed.
The second semaphore is used on every submit, and is used for work
completion tracking. The GPU sets its value to 1 when the command buffer
has been processed.
The channel jobs need to hold references to both semaphores so that
their backing semaphore pool slots are not reused while the job is in
flight. Therefore gk20a_channel_fence will keep a reference to the
semaphore that it represents (channel fences are stored in the job
structure). This means that we must diligently close and dup the
gk20a_channel_fence objects to avoid leaking semaphores.
Bug 1450122
Bug 1445450
Change-Id: Ib61091a1b7632fa36efe0289011040ef7c4ae8f8
Signed-off-by: Lauri Peltonen <lpeltonen@nvidia.com>
Reviewed-on: http://git-master/r/374844
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 79 |
1 files changed, 53 insertions, 26 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 68a30392..651ea08c 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -33,6 +33,7 @@ | |||
33 | 33 | ||
34 | #include "gk20a.h" | 34 | #include "gk20a.h" |
35 | #include "dbg_gpu_gk20a.h" | 35 | #include "dbg_gpu_gk20a.h" |
36 | #include "semaphore_gk20a.h" | ||
36 | 37 | ||
37 | #include "hw_ram_gk20a.h" | 38 | #include "hw_ram_gk20a.h" |
38 | #include "hw_fifo_gk20a.h" | 39 | #include "hw_fifo_gk20a.h" |
@@ -340,7 +341,7 @@ static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a) | |||
340 | * resource at this point | 341 | * resource at this point |
341 | * if not, then it will be destroyed at channel_free() | 342 | * if not, then it will be destroyed at channel_free() |
342 | */ | 343 | */ |
343 | if (ch_gk20a->sync && ch_gk20a->sync->syncpt_aggressive_destroy) { | 344 | if (ch_gk20a->sync && ch_gk20a->sync->aggressive_destroy) { |
344 | ch_gk20a->sync->destroy(ch_gk20a->sync); | 345 | ch_gk20a->sync->destroy(ch_gk20a->sync); |
345 | ch_gk20a->sync = NULL; | 346 | ch_gk20a->sync = NULL; |
346 | } | 347 | } |
@@ -657,6 +658,8 @@ unbind: | |||
657 | ch->vpr = false; | 658 | ch->vpr = false; |
658 | ch->vm = NULL; | 659 | ch->vm = NULL; |
659 | 660 | ||
661 | gk20a_channel_fence_close(&ch->last_submit.pre_fence); | ||
662 | gk20a_channel_fence_close(&ch->last_submit.post_fence); | ||
660 | if (ch->sync) { | 663 | if (ch->sync) { |
661 | ch->sync->destroy(ch->sync); | 664 | ch->sync->destroy(ch->sync); |
662 | ch->sync = NULL; | 665 | ch->sync = NULL; |
@@ -1089,7 +1092,8 @@ static int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, | |||
1089 | ch_vm = c->vm; | 1092 | ch_vm = c->vm; |
1090 | 1093 | ||
1091 | c->cmds_pending = false; | 1094 | c->cmds_pending = false; |
1092 | c->last_submit_fence.valid = false; | 1095 | gk20a_channel_fence_close(&c->last_submit.pre_fence); |
1096 | gk20a_channel_fence_close(&c->last_submit.post_fence); | ||
1093 | 1097 | ||
1094 | c->ramfc.offset = 0; | 1098 | c->ramfc.offset = 0; |
1095 | c->ramfc.size = ram_in_ramfc_s() / 8; | 1099 | c->ramfc.size = ram_in_ramfc_s() / 8; |
@@ -1272,13 +1276,16 @@ static int gk20a_channel_submit_wfi(struct channel_gk20a *c) | |||
1272 | } | 1276 | } |
1273 | } | 1277 | } |
1274 | 1278 | ||
1275 | err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit_fence); | 1279 | gk20a_channel_fence_close(&c->last_submit.pre_fence); |
1280 | gk20a_channel_fence_close(&c->last_submit.post_fence); | ||
1281 | |||
1282 | err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit.post_fence); | ||
1276 | if (unlikely(err)) { | 1283 | if (unlikely(err)) { |
1277 | mutex_unlock(&c->submit_lock); | 1284 | mutex_unlock(&c->submit_lock); |
1278 | return err; | 1285 | return err; |
1279 | } | 1286 | } |
1280 | 1287 | ||
1281 | WARN_ON(!c->last_submit_fence.wfi); | 1288 | WARN_ON(!c->last_submit.post_fence.wfi); |
1282 | 1289 | ||
1283 | c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva); | 1290 | c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva); |
1284 | c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) | | 1291 | c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) | |
@@ -1344,7 +1351,8 @@ static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g) | |||
1344 | } | 1351 | } |
1345 | 1352 | ||
1346 | static int gk20a_channel_add_job(struct channel_gk20a *c, | 1353 | static int gk20a_channel_add_job(struct channel_gk20a *c, |
1347 | struct gk20a_channel_fence *fence) | 1354 | struct gk20a_channel_fence *pre_fence, |
1355 | struct gk20a_channel_fence *post_fence) | ||
1348 | { | 1356 | { |
1349 | struct vm_gk20a *vm = c->vm; | 1357 | struct vm_gk20a *vm = c->vm; |
1350 | struct channel_gk20a_job *job = NULL; | 1358 | struct channel_gk20a_job *job = NULL; |
@@ -1369,7 +1377,8 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, | |||
1369 | 1377 | ||
1370 | job->num_mapped_buffers = num_mapped_buffers; | 1378 | job->num_mapped_buffers = num_mapped_buffers; |
1371 | job->mapped_buffers = mapped_buffers; | 1379 | job->mapped_buffers = mapped_buffers; |
1372 | job->fence = *fence; | 1380 | gk20a_channel_fence_dup(pre_fence, &job->pre_fence); |
1381 | gk20a_channel_fence_dup(post_fence, &job->post_fence); | ||
1373 | 1382 | ||
1374 | mutex_lock(&c->jobs_lock); | 1383 | mutex_lock(&c->jobs_lock); |
1375 | list_add_tail(&job->list, &c->jobs); | 1384 | list_add_tail(&job->list, &c->jobs); |
@@ -1391,13 +1400,18 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed) | |||
1391 | mutex_lock(&c->jobs_lock); | 1400 | mutex_lock(&c->jobs_lock); |
1392 | list_for_each_entry_safe(job, n, &c->jobs, list) { | 1401 | list_for_each_entry_safe(job, n, &c->jobs, list) { |
1393 | bool completed = WARN_ON(!c->sync) || | 1402 | bool completed = WARN_ON(!c->sync) || |
1394 | c->sync->is_expired(c->sync, &job->fence); | 1403 | c->sync->is_expired(c->sync, &job->post_fence); |
1395 | if (!completed) | 1404 | if (!completed) |
1396 | break; | 1405 | break; |
1397 | 1406 | ||
1398 | gk20a_vm_put_buffers(vm, job->mapped_buffers, | 1407 | gk20a_vm_put_buffers(vm, job->mapped_buffers, |
1399 | job->num_mapped_buffers); | 1408 | job->num_mapped_buffers); |
1400 | 1409 | ||
1410 | /* Close the fences (this will unref the semaphores and release | ||
1411 | * them to the pool). */ | ||
1412 | gk20a_channel_fence_close(&job->pre_fence); | ||
1413 | gk20a_channel_fence_close(&job->post_fence); | ||
1414 | |||
1401 | /* job is done. release its reference to vm */ | 1415 | /* job is done. release its reference to vm */ |
1402 | gk20a_vm_put(vm); | 1416 | gk20a_vm_put(vm); |
1403 | 1417 | ||
@@ -1413,8 +1427,8 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed) | |||
1413 | * the sync resource | 1427 | * the sync resource |
1414 | */ | 1428 | */ |
1415 | if (list_empty(&c->jobs)) { | 1429 | if (list_empty(&c->jobs)) { |
1416 | if (c->sync && c->sync->syncpt_aggressive_destroy && | 1430 | if (c->sync && c->sync->aggressive_destroy && |
1417 | c->sync->is_expired(c->sync, &c->last_submit_fence)) { | 1431 | c->sync->is_expired(c->sync, &c->last_submit.post_fence)) { |
1418 | c->sync->destroy(c->sync); | 1432 | c->sync->destroy(c->sync); |
1419 | c->sync = NULL; | 1433 | c->sync = NULL; |
1420 | } | 1434 | } |
@@ -1448,8 +1462,11 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
1448 | struct device *d = dev_from_gk20a(g); | 1462 | struct device *d = dev_from_gk20a(g); |
1449 | int err = 0; | 1463 | int err = 0; |
1450 | int i; | 1464 | int i; |
1465 | int wait_fence_fd = -1; | ||
1451 | struct priv_cmd_entry *wait_cmd = NULL; | 1466 | struct priv_cmd_entry *wait_cmd = NULL; |
1452 | struct priv_cmd_entry *incr_cmd = NULL; | 1467 | struct priv_cmd_entry *incr_cmd = NULL; |
1468 | struct gk20a_channel_fence pre_fence = { 0 }; | ||
1469 | struct gk20a_channel_fence post_fence = { 0 }; | ||
1453 | /* we might need two extra gpfifo entries - one for pre fence | 1470 | /* we might need two extra gpfifo entries - one for pre fence |
1454 | * and one for post fence. */ | 1471 | * and one for post fence. */ |
1455 | const int extra_entries = 2; | 1472 | const int extra_entries = 2; |
@@ -1534,12 +1551,14 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
1534 | * keep running some tests which trigger this condition | 1551 | * keep running some tests which trigger this condition |
1535 | */ | 1552 | */ |
1536 | if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) { | 1553 | if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) { |
1537 | if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) | 1554 | if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) { |
1538 | err = c->sync->wait_fd(c->sync, fence->syncpt_id, | 1555 | wait_fence_fd = fence->syncpt_id; |
1539 | &wait_cmd); | 1556 | err = c->sync->wait_fd(c->sync, wait_fence_fd, |
1540 | else | 1557 | &wait_cmd, &pre_fence); |
1558 | } else { | ||
1541 | err = c->sync->wait_syncpt(c->sync, fence->syncpt_id, | 1559 | err = c->sync->wait_syncpt(c->sync, fence->syncpt_id, |
1542 | fence->value, &wait_cmd); | 1560 | fence->value, &wait_cmd, &pre_fence); |
1561 | } | ||
1543 | } | 1562 | } |
1544 | if (err) { | 1563 | if (err) { |
1545 | mutex_unlock(&c->submit_lock); | 1564 | mutex_unlock(&c->submit_lock); |
@@ -1551,19 +1570,19 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
1551 | to keep track of method completion for idle railgating */ | 1570 | to keep track of method completion for idle railgating */ |
1552 | if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET && | 1571 | if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET && |
1553 | flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) | 1572 | flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) |
1554 | err = c->sync->incr_user_fd(c->sync, &incr_cmd, | 1573 | err = c->sync->incr_user_fd(c->sync, wait_fence_fd, &incr_cmd, |
1555 | &c->last_submit_fence, | 1574 | &post_fence, |
1556 | need_wfi, | 1575 | need_wfi, |
1557 | &fence->syncpt_id); | 1576 | &fence->syncpt_id); |
1558 | else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET) | 1577 | else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET) |
1559 | err = c->sync->incr_user_syncpt(c->sync, &incr_cmd, | 1578 | err = c->sync->incr_user_syncpt(c->sync, &incr_cmd, |
1560 | &c->last_submit_fence, | 1579 | &post_fence, |
1561 | need_wfi, | 1580 | need_wfi, |
1562 | &fence->syncpt_id, | 1581 | &fence->syncpt_id, |
1563 | &fence->value); | 1582 | &fence->value); |
1564 | else | 1583 | else |
1565 | err = c->sync->incr(c->sync, &incr_cmd, | 1584 | err = c->sync->incr(c->sync, &incr_cmd, |
1566 | &c->last_submit_fence); | 1585 | &post_fence); |
1567 | if (err) { | 1586 | if (err) { |
1568 | mutex_unlock(&c->submit_lock); | 1587 | mutex_unlock(&c->submit_lock); |
1569 | goto clean_up; | 1588 | goto clean_up; |
@@ -1611,8 +1630,13 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
1611 | incr_cmd->gp_put = c->gpfifo.put; | 1630 | incr_cmd->gp_put = c->gpfifo.put; |
1612 | } | 1631 | } |
1613 | 1632 | ||
1633 | gk20a_channel_fence_close(&c->last_submit.pre_fence); | ||
1634 | gk20a_channel_fence_close(&c->last_submit.post_fence); | ||
1635 | c->last_submit.pre_fence = pre_fence; | ||
1636 | c->last_submit.post_fence = post_fence; | ||
1637 | |||
1614 | /* TODO! Check for errors... */ | 1638 | /* TODO! Check for errors... */ |
1615 | gk20a_channel_add_job(c, &c->last_submit_fence); | 1639 | gk20a_channel_add_job(c, &pre_fence, &post_fence); |
1616 | 1640 | ||
1617 | c->cmds_pending = true; | 1641 | c->cmds_pending = true; |
1618 | gk20a_bar1_writel(g, | 1642 | gk20a_bar1_writel(g, |
@@ -1637,6 +1661,8 @@ clean_up: | |||
1637 | gk20a_err(d, "fail"); | 1661 | gk20a_err(d, "fail"); |
1638 | free_priv_cmdbuf(c, wait_cmd); | 1662 | free_priv_cmdbuf(c, wait_cmd); |
1639 | free_priv_cmdbuf(c, incr_cmd); | 1663 | free_priv_cmdbuf(c, incr_cmd); |
1664 | gk20a_channel_fence_close(&pre_fence); | ||
1665 | gk20a_channel_fence_close(&post_fence); | ||
1640 | gk20a_idle(g->dev); | 1666 | gk20a_idle(g->dev); |
1641 | return err; | 1667 | return err; |
1642 | } | 1668 | } |
@@ -1669,6 +1695,7 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid) | |||
1669 | int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout) | 1695 | int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout) |
1670 | { | 1696 | { |
1671 | int err = 0; | 1697 | int err = 0; |
1698 | struct gk20a_channel_fence *fence = &ch->last_submit.post_fence; | ||
1672 | 1699 | ||
1673 | if (!ch->cmds_pending) | 1700 | if (!ch->cmds_pending) |
1674 | return 0; | 1701 | return 0; |
@@ -1677,21 +1704,20 @@ int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout) | |||
1677 | if (ch->has_timedout) | 1704 | if (ch->has_timedout) |
1678 | return -ETIMEDOUT; | 1705 | return -ETIMEDOUT; |
1679 | 1706 | ||
1680 | if (!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)) { | 1707 | if (!(fence->valid && fence->wfi)) { |
1681 | gk20a_dbg_fn("issuing wfi, incr to finish the channel"); | 1708 | gk20a_dbg_fn("issuing wfi, incr to finish the channel"); |
1682 | err = gk20a_channel_submit_wfi(ch); | 1709 | err = gk20a_channel_submit_wfi(ch); |
1683 | } | 1710 | } |
1684 | if (err) | 1711 | if (err) |
1685 | return err; | 1712 | return err; |
1686 | 1713 | ||
1687 | BUG_ON(!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)); | 1714 | BUG_ON(!(fence->valid && fence->wfi)); |
1688 | 1715 | ||
1689 | gk20a_dbg_fn("waiting for channel to finish thresh:%d", | 1716 | gk20a_dbg_fn("waiting for channel to finish thresh:%d sema:%p", |
1690 | ch->last_submit_fence.thresh); | 1717 | fence->thresh, fence->semaphore); |
1691 | 1718 | ||
1692 | if (ch->sync) { | 1719 | if (ch->sync) { |
1693 | err = ch->sync->wait_cpu(ch->sync, &ch->last_submit_fence, | 1720 | err = ch->sync->wait_cpu(ch->sync, fence, timeout); |
1694 | timeout); | ||
1695 | if (WARN_ON(err)) | 1721 | if (WARN_ON(err)) |
1696 | dev_warn(dev_from_gk20a(ch->g), | 1722 | dev_warn(dev_from_gk20a(ch->g), |
1697 | "timed out waiting for gk20a channel to finish"); | 1723 | "timed out waiting for gk20a channel to finish"); |
@@ -1900,7 +1926,8 @@ int gk20a_channel_suspend(struct gk20a *g) | |||
1900 | 1926 | ||
1901 | if (c->sync) | 1927 | if (c->sync) |
1902 | c->sync->wait_cpu(c->sync, | 1928 | c->sync->wait_cpu(c->sync, |
1903 | &c->last_submit_fence, 500000); | 1929 | &c->last_submit.post_fence, |
1930 | 500000); | ||
1904 | break; | 1931 | break; |
1905 | } | 1932 | } |
1906 | } | 1933 | } |