diff options
author | Sachit Kadle <skadle@nvidia.com> | 2016-08-15 17:32:39 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2016-10-20 11:14:11 -0400 |
commit | 733fb79b39869665addcd80ccdf1c15f4a5aaa29 (patch) | |
tree | b3c9c1ba32406ed9c2af4bebee820e83ea6172e4 /drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |
parent | 63e8592e06939e20c7b9e56b430353ebbee31ad6 (diff) |
gpu: nvgpu: add support for pre-allocated resources
Add support for pre-allocation of job tracking resources
w/ new (extended) ioctl. Goal is to avoid dynamic memory
allocation in the submit path. This patch does the following:
1) Intoduces a new ioctl, NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX,
which enables pre-allocation of tracking resources per job:
a) 2x priv_cmd_entry
b) 2x gk20a_fence
2) Implements circular ring buffer for job
tracking to avoid lock contention between producer
(submitter) and consumer (clean-up)
Bug 1795076
Change-Id: I6b52e5c575871107ff380f9a5790f440a6969347
Signed-off-by: Sachit Kadle <skadle@nvidia.com>
Reviewed-on: http://git-master/r/1203300
(cherry picked from commit 9fd270c22b860935dffe244753dabd87454bef39)
Reviewed-on: http://git-master/r/1223934
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 390 |
1 files changed, 345 insertions, 45 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 4019721a..cc3bbbd2 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/anon_inodes.h> | 26 | #include <linux/anon_inodes.h> |
27 | #include <linux/dma-buf.h> | 27 | #include <linux/dma-buf.h> |
28 | #include <linux/vmalloc.h> | 28 | #include <linux/vmalloc.h> |
29 | #include <linux/circ_buf.h> | ||
29 | 30 | ||
30 | #include "debug_gk20a.h" | 31 | #include "debug_gk20a.h" |
31 | #include "ctxsw_trace_gk20a.h" | 32 | #include "ctxsw_trace_gk20a.h" |
@@ -55,6 +56,15 @@ static void free_priv_cmdbuf(struct channel_gk20a *c, | |||
55 | static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c); | 56 | static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c); |
56 | static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c); | 57 | static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c); |
57 | 58 | ||
59 | static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c); | ||
60 | |||
61 | static void channel_gk20a_joblist_add(struct channel_gk20a *c, | ||
62 | struct channel_gk20a_job *job); | ||
63 | static void channel_gk20a_joblist_delete(struct channel_gk20a *c, | ||
64 | struct channel_gk20a_job *job); | ||
65 | static struct channel_gk20a_job *channel_gk20a_joblist_peek( | ||
66 | struct channel_gk20a *c); | ||
67 | |||
58 | static int channel_gk20a_commit_userd(struct channel_gk20a *c); | 68 | static int channel_gk20a_commit_userd(struct channel_gk20a *c); |
59 | static int channel_gk20a_setup_userd(struct channel_gk20a *c); | 69 | static int channel_gk20a_setup_userd(struct channel_gk20a *c); |
60 | 70 | ||
@@ -460,6 +470,7 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch) | |||
460 | { | 470 | { |
461 | struct channel_gk20a_job *job, *n; | 471 | struct channel_gk20a_job *job, *n; |
462 | bool released_job_semaphore = false; | 472 | bool released_job_semaphore = false; |
473 | bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(ch); | ||
463 | 474 | ||
464 | gk20a_channel_cancel_job_clean_up(ch, true); | 475 | gk20a_channel_cancel_job_clean_up(ch, true); |
465 | 476 | ||
@@ -471,14 +482,37 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch) | |||
471 | 482 | ||
472 | /* release all job semaphores (applies only to jobs that use | 483 | /* release all job semaphores (applies only to jobs that use |
473 | semaphore synchronization) */ | 484 | semaphore synchronization) */ |
474 | spin_lock(&ch->jobs_lock); | 485 | channel_gk20a_joblist_lock(ch); |
475 | list_for_each_entry_safe(job, n, &ch->jobs, list) { | 486 | if (pre_alloc_enabled) { |
476 | if (job->post_fence->semaphore) { | 487 | int tmp_get = ch->joblist.pre_alloc.get; |
477 | gk20a_semaphore_release(job->post_fence->semaphore); | 488 | int put = ch->joblist.pre_alloc.put; |
478 | released_job_semaphore = true; | 489 | |
490 | /* | ||
491 | * ensure put is read before any subsequent reads. | ||
492 | * see corresponding wmb in gk20a_channel_add_job() | ||
493 | */ | ||
494 | rmb(); | ||
495 | |||
496 | while (tmp_get != put) { | ||
497 | job = &ch->joblist.pre_alloc.jobs[tmp_get]; | ||
498 | if (job->post_fence->semaphore) { | ||
499 | gk20a_semaphore_release( | ||
500 | job->post_fence->semaphore); | ||
501 | released_job_semaphore = true; | ||
502 | } | ||
503 | tmp_get = (tmp_get + 1) % ch->joblist.pre_alloc.length; | ||
504 | } | ||
505 | } else { | ||
506 | list_for_each_entry_safe(job, n, | ||
507 | &ch->joblist.dynamic.jobs, list) { | ||
508 | if (job->post_fence->semaphore) { | ||
509 | gk20a_semaphore_release( | ||
510 | job->post_fence->semaphore); | ||
511 | released_job_semaphore = true; | ||
512 | } | ||
479 | } | 513 | } |
480 | } | 514 | } |
481 | spin_unlock(&ch->jobs_lock); | 515 | channel_gk20a_joblist_unlock(ch); |
482 | 516 | ||
483 | if (released_job_semaphore) | 517 | if (released_job_semaphore) |
484 | wake_up_interruptible_all(&ch->semaphore_wq); | 518 | wake_up_interruptible_all(&ch->semaphore_wq); |
@@ -511,9 +545,9 @@ int gk20a_wait_channel_idle(struct channel_gk20a *ch) | |||
511 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g)); | 545 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g)); |
512 | 546 | ||
513 | do { | 547 | do { |
514 | spin_lock(&ch->jobs_lock); | 548 | channel_gk20a_joblist_lock(ch); |
515 | channel_idle = list_empty(&ch->jobs); | 549 | channel_idle = channel_gk20a_joblist_is_empty(ch); |
516 | spin_unlock(&ch->jobs_lock); | 550 | channel_gk20a_joblist_unlock(ch); |
517 | if (channel_idle) | 551 | if (channel_idle) |
518 | break; | 552 | break; |
519 | 553 | ||
@@ -1016,6 +1050,10 @@ unbind: | |||
1016 | 1050 | ||
1017 | mutex_unlock(&g->dbg_sessions_lock); | 1051 | mutex_unlock(&g->dbg_sessions_lock); |
1018 | 1052 | ||
1053 | /* free pre-allocated resources, if applicable */ | ||
1054 | if (channel_gk20a_is_prealloc_enabled(ch)) | ||
1055 | channel_gk20a_free_prealloc_resources(ch); | ||
1056 | |||
1019 | /* make sure we catch accesses of unopened channels in case | 1057 | /* make sure we catch accesses of unopened channels in case |
1020 | * there's non-refcounted channel pointers hanging around */ | 1058 | * there's non-refcounted channel pointers hanging around */ |
1021 | ch->g = NULL; | 1059 | ch->g = NULL; |
@@ -1422,7 +1460,10 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, | |||
1422 | /* we already handled q->put + size > q->size so BUG_ON this */ | 1460 | /* we already handled q->put + size > q->size so BUG_ON this */ |
1423 | BUG_ON(q->put > q->size); | 1461 | BUG_ON(q->put > q->size); |
1424 | 1462 | ||
1425 | /* commit the previous writes before making the entry valid */ | 1463 | /* |
1464 | * commit the previous writes before making the entry valid. | ||
1465 | * see the corresponding rmb() in gk20a_free_priv_cmdbuf(). | ||
1466 | */ | ||
1426 | wmb(); | 1467 | wmb(); |
1427 | 1468 | ||
1428 | e->valid = true; | 1469 | e->valid = true; |
@@ -1436,26 +1477,222 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, | |||
1436 | static void free_priv_cmdbuf(struct channel_gk20a *c, | 1477 | static void free_priv_cmdbuf(struct channel_gk20a *c, |
1437 | struct priv_cmd_entry *e) | 1478 | struct priv_cmd_entry *e) |
1438 | { | 1479 | { |
1439 | kfree(e); | 1480 | if (channel_gk20a_is_prealloc_enabled(c)) |
1481 | memset(e, 0, sizeof(struct priv_cmd_entry)); | ||
1482 | else | ||
1483 | kfree(e); | ||
1484 | } | ||
1485 | |||
1486 | static int channel_gk20a_alloc_job(struct channel_gk20a *c, | ||
1487 | struct channel_gk20a_job **job_out) | ||
1488 | { | ||
1489 | int err = 0; | ||
1490 | |||
1491 | if (channel_gk20a_is_prealloc_enabled(c)) { | ||
1492 | int put = c->joblist.pre_alloc.put; | ||
1493 | int get = c->joblist.pre_alloc.get; | ||
1494 | |||
1495 | /* | ||
1496 | * ensure all subsequent reads happen after reading get. | ||
1497 | * see corresponding wmb in gk20a_channel_clean_up_jobs() | ||
1498 | */ | ||
1499 | rmb(); | ||
1500 | |||
1501 | if (CIRC_SPACE(put, get, c->joblist.pre_alloc.length)) | ||
1502 | *job_out = &c->joblist.pre_alloc.jobs[put]; | ||
1503 | else { | ||
1504 | gk20a_warn(dev_from_gk20a(c->g), | ||
1505 | "out of job ringbuffer space\n"); | ||
1506 | err = -EAGAIN; | ||
1507 | } | ||
1508 | } else { | ||
1509 | *job_out = kzalloc(sizeof(struct channel_gk20a_job), | ||
1510 | GFP_KERNEL); | ||
1511 | if (!job_out) | ||
1512 | err = -ENOMEM; | ||
1513 | } | ||
1514 | |||
1515 | return err; | ||
1516 | } | ||
1517 | |||
1518 | static void channel_gk20a_free_job(struct channel_gk20a *c, | ||
1519 | struct channel_gk20a_job *job) | ||
1520 | { | ||
1521 | /* | ||
1522 | * In case of pre_allocated jobs, we need to clean out | ||
1523 | * the job but maintain the pointers to the priv_cmd_entry, | ||
1524 | * since they're inherently tied to the job node. | ||
1525 | */ | ||
1526 | if (channel_gk20a_is_prealloc_enabled(c)) { | ||
1527 | struct priv_cmd_entry *wait_cmd = job->wait_cmd; | ||
1528 | struct priv_cmd_entry *incr_cmd = job->incr_cmd; | ||
1529 | memset(job, 0, sizeof(*job)); | ||
1530 | job->wait_cmd = wait_cmd; | ||
1531 | job->incr_cmd = incr_cmd; | ||
1532 | } else | ||
1533 | kfree(job); | ||
1534 | } | ||
1535 | |||
1536 | void channel_gk20a_joblist_lock(struct channel_gk20a *c) | ||
1537 | { | ||
1538 | if (channel_gk20a_is_prealloc_enabled(c)) | ||
1539 | mutex_lock(&c->joblist.pre_alloc.read_lock); | ||
1540 | else | ||
1541 | spin_lock(&c->joblist.dynamic.lock); | ||
1440 | } | 1542 | } |
1441 | 1543 | ||
1442 | static struct channel_gk20a_job *channel_gk20a_alloc_job( | 1544 | void channel_gk20a_joblist_unlock(struct channel_gk20a *c) |
1545 | { | ||
1546 | if (channel_gk20a_is_prealloc_enabled(c)) | ||
1547 | mutex_unlock(&c->joblist.pre_alloc.read_lock); | ||
1548 | else | ||
1549 | spin_unlock(&c->joblist.dynamic.lock); | ||
1550 | } | ||
1551 | |||
1552 | static struct channel_gk20a_job *channel_gk20a_joblist_peek( | ||
1443 | struct channel_gk20a *c) | 1553 | struct channel_gk20a *c) |
1444 | { | 1554 | { |
1555 | int get; | ||
1445 | struct channel_gk20a_job *job = NULL; | 1556 | struct channel_gk20a_job *job = NULL; |
1446 | 1557 | ||
1447 | job = kzalloc(sizeof(*job), GFP_KERNEL); | 1558 | if (channel_gk20a_is_prealloc_enabled(c)) { |
1559 | if (!channel_gk20a_joblist_is_empty(c)) { | ||
1560 | get = c->joblist.pre_alloc.get; | ||
1561 | job = &c->joblist.pre_alloc.jobs[get]; | ||
1562 | } | ||
1563 | } else { | ||
1564 | if (!list_empty(&c->joblist.dynamic.jobs)) | ||
1565 | job = list_first_entry(&c->joblist.dynamic.jobs, | ||
1566 | struct channel_gk20a_job, list); | ||
1567 | } | ||
1568 | |||
1448 | return job; | 1569 | return job; |
1449 | } | 1570 | } |
1450 | 1571 | ||
1451 | static void channel_gk20a_free_job(struct channel_gk20a *c, | 1572 | static void channel_gk20a_joblist_add(struct channel_gk20a *c, |
1452 | struct channel_gk20a_job *job) | 1573 | struct channel_gk20a_job *job) |
1453 | { | 1574 | { |
1454 | kfree(job); | 1575 | if (channel_gk20a_is_prealloc_enabled(c)) { |
1576 | c->joblist.pre_alloc.put = (c->joblist.pre_alloc.put + 1) % | ||
1577 | (c->joblist.pre_alloc.length); | ||
1578 | } else { | ||
1579 | list_add_tail(&job->list, &c->joblist.dynamic.jobs); | ||
1580 | } | ||
1581 | } | ||
1582 | |||
1583 | static void channel_gk20a_joblist_delete(struct channel_gk20a *c, | ||
1584 | struct channel_gk20a_job *job) | ||
1585 | { | ||
1586 | if (channel_gk20a_is_prealloc_enabled(c)) { | ||
1587 | c->joblist.pre_alloc.get = (c->joblist.pre_alloc.get + 1) % | ||
1588 | (c->joblist.pre_alloc.length); | ||
1589 | } else { | ||
1590 | list_del_init(&job->list); | ||
1591 | } | ||
1592 | } | ||
1593 | |||
1594 | bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c) | ||
1595 | { | ||
1596 | if (channel_gk20a_is_prealloc_enabled(c)) { | ||
1597 | int get = c->joblist.pre_alloc.get; | ||
1598 | int put = c->joblist.pre_alloc.put; | ||
1599 | return !(CIRC_CNT(put, get, c->joblist.pre_alloc.length)); | ||
1600 | } | ||
1601 | |||
1602 | return list_empty(&c->joblist.dynamic.jobs); | ||
1603 | } | ||
1604 | |||
1605 | bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c) | ||
1606 | { | ||
1607 | bool pre_alloc_enabled = c->joblist.pre_alloc.enabled; | ||
1608 | |||
1609 | rmb(); | ||
1610 | return pre_alloc_enabled; | ||
1611 | } | ||
1612 | |||
1613 | static int channel_gk20a_prealloc_resources(struct channel_gk20a *c, | ||
1614 | unsigned int num_jobs) | ||
1615 | { | ||
1616 | int i, err; | ||
1617 | size_t size; | ||
1618 | struct priv_cmd_entry *entries = NULL; | ||
1619 | |||
1620 | if (channel_gk20a_is_prealloc_enabled(c) || !num_jobs) | ||
1621 | return -EINVAL; | ||
1622 | |||
1623 | /* | ||
1624 | * pre-allocate the job list. | ||
1625 | * since vmalloc take in an unsigned long, we need | ||
1626 | * to make sure we don't hit an overflow condition | ||
1627 | */ | ||
1628 | size = sizeof(struct channel_gk20a_job); | ||
1629 | if (num_jobs <= ULONG_MAX / size) | ||
1630 | c->joblist.pre_alloc.jobs = vzalloc(num_jobs * size); | ||
1631 | if (!c->joblist.pre_alloc.jobs) { | ||
1632 | err = -ENOMEM; | ||
1633 | goto clean_up; | ||
1634 | } | ||
1635 | |||
1636 | /* | ||
1637 | * pre-allocate 2x priv_cmd_entry for each job up front. | ||
1638 | * since vmalloc take in an unsigned long, we need | ||
1639 | * to make sure we don't hit an overflow condition | ||
1640 | */ | ||
1641 | size = sizeof(struct priv_cmd_entry); | ||
1642 | if (num_jobs <= ULONG_MAX / (size << 1)) | ||
1643 | entries = vzalloc((num_jobs << 1) * size); | ||
1644 | if (!entries) { | ||
1645 | err = -ENOMEM; | ||
1646 | goto clean_up_joblist; | ||
1647 | } | ||
1648 | |||
1649 | for (i = 0; i < num_jobs; i++) { | ||
1650 | c->joblist.pre_alloc.jobs[i].wait_cmd = &entries[i]; | ||
1651 | c->joblist.pre_alloc.jobs[i].incr_cmd = | ||
1652 | &entries[i + num_jobs]; | ||
1653 | } | ||
1654 | |||
1655 | /* pre-allocate a fence pool */ | ||
1656 | err = gk20a_alloc_fence_pool(c, num_jobs); | ||
1657 | if (err) | ||
1658 | goto clean_up_priv_cmd; | ||
1659 | |||
1660 | c->joblist.pre_alloc.length = num_jobs; | ||
1661 | |||
1662 | /* | ||
1663 | * commit the previous writes before setting the flag. | ||
1664 | * see corresponding rmb in channel_gk20a_is_prealloc_enabled() | ||
1665 | */ | ||
1666 | wmb(); | ||
1667 | c->joblist.pre_alloc.enabled = true; | ||
1668 | |||
1669 | return 0; | ||
1670 | |||
1671 | clean_up_priv_cmd: | ||
1672 | vfree(entries); | ||
1673 | clean_up_joblist: | ||
1674 | vfree(c->joblist.pre_alloc.jobs); | ||
1675 | clean_up: | ||
1676 | memset(&c->joblist.pre_alloc, 0, sizeof(c->joblist.pre_alloc)); | ||
1677 | return err; | ||
1678 | } | ||
1679 | |||
1680 | static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c) | ||
1681 | { | ||
1682 | vfree(c->joblist.pre_alloc.jobs[0].wait_cmd); | ||
1683 | vfree(c->joblist.pre_alloc.jobs); | ||
1684 | gk20a_free_fence_pool(c); | ||
1685 | |||
1686 | /* | ||
1687 | * commit the previous writes before disabling the flag. | ||
1688 | * see corresponding rmb in channel_gk20a_is_prealloc_enabled() | ||
1689 | */ | ||
1690 | wmb(); | ||
1691 | c->joblist.pre_alloc.enabled = false; | ||
1455 | } | 1692 | } |
1456 | 1693 | ||
1457 | int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, | 1694 | int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, |
1458 | struct nvgpu_alloc_gpfifo_args *args) | 1695 | struct nvgpu_alloc_gpfifo_ex_args *args) |
1459 | { | 1696 | { |
1460 | struct gk20a *g = c->g; | 1697 | struct gk20a *g = c->g; |
1461 | struct device *d = dev_from_gk20a(g); | 1698 | struct device *d = dev_from_gk20a(g); |
@@ -1539,19 +1776,30 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, | |||
1539 | 1776 | ||
1540 | /* TBD: setup engine contexts */ | 1777 | /* TBD: setup engine contexts */ |
1541 | 1778 | ||
1779 | if (args->num_inflight_jobs) { | ||
1780 | err = channel_gk20a_prealloc_resources(c, | ||
1781 | args->num_inflight_jobs); | ||
1782 | if (err) | ||
1783 | goto clean_up_sync; | ||
1784 | } | ||
1785 | |||
1542 | err = channel_gk20a_alloc_priv_cmdbuf(c); | 1786 | err = channel_gk20a_alloc_priv_cmdbuf(c); |
1543 | if (err) | 1787 | if (err) |
1544 | goto clean_up_sync; | 1788 | goto clean_up_prealloc; |
1545 | 1789 | ||
1546 | err = channel_gk20a_update_runlist(c, true); | 1790 | err = channel_gk20a_update_runlist(c, true); |
1547 | if (err) | 1791 | if (err) |
1548 | goto clean_up_sync; | 1792 | goto clean_up_priv_cmd; |
1549 | 1793 | ||
1550 | g->ops.fifo.bind_channel(c); | 1794 | g->ops.fifo.bind_channel(c); |
1551 | 1795 | ||
1552 | gk20a_dbg_fn("done"); | 1796 | gk20a_dbg_fn("done"); |
1553 | return 0; | 1797 | return 0; |
1554 | 1798 | ||
1799 | clean_up_priv_cmd: | ||
1800 | channel_gk20a_free_priv_cmdbuf(c); | ||
1801 | clean_up_prealloc: | ||
1802 | channel_gk20a_free_prealloc_resources(c); | ||
1555 | clean_up_sync: | 1803 | clean_up_sync: |
1556 | gk20a_channel_sync_destroy(c->sync); | 1804 | gk20a_channel_sync_destroy(c->sync); |
1557 | c->sync = NULL; | 1805 | c->sync = NULL; |
@@ -1878,6 +2126,7 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, | |||
1878 | struct vm_gk20a *vm = c->vm; | 2126 | struct vm_gk20a *vm = c->vm; |
1879 | struct mapped_buffer_node **mapped_buffers = NULL; | 2127 | struct mapped_buffer_node **mapped_buffers = NULL; |
1880 | int err = 0, num_mapped_buffers = 0; | 2128 | int err = 0, num_mapped_buffers = 0; |
2129 | bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c); | ||
1881 | 2130 | ||
1882 | /* job needs reference to this vm (released in channel_update) */ | 2131 | /* job needs reference to this vm (released in channel_update) */ |
1883 | gk20a_vm_get(vm); | 2132 | gk20a_vm_get(vm); |
@@ -1898,9 +2147,19 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, | |||
1898 | 2147 | ||
1899 | gk20a_channel_timeout_start(c, job); | 2148 | gk20a_channel_timeout_start(c, job); |
1900 | 2149 | ||
1901 | spin_lock(&c->jobs_lock); | 2150 | if (!pre_alloc_enabled) |
1902 | list_add_tail(&job->list, &c->jobs); | 2151 | channel_gk20a_joblist_lock(c); |
1903 | spin_unlock(&c->jobs_lock); | 2152 | |
2153 | /* | ||
2154 | * ensure all pending write complete before adding to the list. | ||
2155 | * see corresponding rmb in gk20a_channel_clean_up_jobs() & | ||
2156 | * gk20a_channel_abort_clean_up() | ||
2157 | */ | ||
2158 | wmb(); | ||
2159 | channel_gk20a_joblist_add(c, job); | ||
2160 | |||
2161 | if (!pre_alloc_enabled) | ||
2162 | channel_gk20a_joblist_unlock(c); | ||
1904 | } else { | 2163 | } else { |
1905 | err = -ETIMEDOUT; | 2164 | err = -ETIMEDOUT; |
1906 | goto err_put_buffers; | 2165 | goto err_put_buffers; |
@@ -1945,14 +2204,20 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work) | |||
1945 | while (1) { | 2204 | while (1) { |
1946 | bool completed; | 2205 | bool completed; |
1947 | 2206 | ||
1948 | spin_lock(&c->jobs_lock); | 2207 | channel_gk20a_joblist_lock(c); |
1949 | if (list_empty(&c->jobs)) { | 2208 | if (channel_gk20a_joblist_is_empty(c)) { |
1950 | spin_unlock(&c->jobs_lock); | 2209 | channel_gk20a_joblist_unlock(c); |
1951 | break; | 2210 | break; |
1952 | } | 2211 | } |
1953 | job = list_first_entry(&c->jobs, | 2212 | |
1954 | struct channel_gk20a_job, list); | 2213 | /* |
1955 | spin_unlock(&c->jobs_lock); | 2214 | * ensure that all subsequent reads occur after checking |
2215 | * that we have a valid node. see corresponding wmb in | ||
2216 | * gk20a_channel_add_job(). | ||
2217 | */ | ||
2218 | rmb(); | ||
2219 | job = channel_gk20a_joblist_peek(c); | ||
2220 | channel_gk20a_joblist_unlock(c); | ||
1956 | 2221 | ||
1957 | completed = gk20a_fence_is_expired(job->post_fence); | 2222 | completed = gk20a_fence_is_expired(job->post_fence); |
1958 | if (!completed) { | 2223 | if (!completed) { |
@@ -1998,9 +2263,14 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work) | |||
1998 | * so this wouldn't get freed here. */ | 2263 | * so this wouldn't get freed here. */ |
1999 | gk20a_channel_put(c); | 2264 | gk20a_channel_put(c); |
2000 | 2265 | ||
2001 | spin_lock(&c->jobs_lock); | 2266 | /* |
2002 | list_del_init(&job->list); | 2267 | * ensure all pending writes complete before deleting the node. |
2003 | spin_unlock(&c->jobs_lock); | 2268 | * see corresponding rmb in channel_gk20a_alloc_job(). |
2269 | */ | ||
2270 | wmb(); | ||
2271 | channel_gk20a_joblist_lock(c); | ||
2272 | channel_gk20a_joblist_delete(c, job); | ||
2273 | channel_gk20a_joblist_unlock(c); | ||
2004 | 2274 | ||
2005 | channel_gk20a_free_job(c, job); | 2275 | channel_gk20a_free_job(c, job); |
2006 | job_finished = 1; | 2276 | job_finished = 1; |
@@ -2160,6 +2430,7 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, | |||
2160 | int wait_fence_fd = -1; | 2430 | int wait_fence_fd = -1; |
2161 | int err = 0; | 2431 | int err = 0; |
2162 | bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI); | 2432 | bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI); |
2433 | bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c); | ||
2163 | 2434 | ||
2164 | /* | 2435 | /* |
2165 | * If user wants to always allocate sync_fence_fds then respect that; | 2436 | * If user wants to always allocate sync_fence_fds then respect that; |
@@ -2197,9 +2468,10 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, | |||
2197 | * this condition. | 2468 | * this condition. |
2198 | */ | 2469 | */ |
2199 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) { | 2470 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) { |
2200 | job->wait_cmd = kzalloc(sizeof(struct priv_cmd_entry), | ||
2201 | GFP_KERNEL); | ||
2202 | job->pre_fence = gk20a_alloc_fence(c); | 2471 | job->pre_fence = gk20a_alloc_fence(c); |
2472 | if (!pre_alloc_enabled) | ||
2473 | job->wait_cmd = kzalloc(sizeof(struct priv_cmd_entry), | ||
2474 | GFP_KERNEL); | ||
2203 | 2475 | ||
2204 | if (!job->wait_cmd || !job->pre_fence) { | 2476 | if (!job->wait_cmd || !job->pre_fence) { |
2205 | err = -ENOMEM; | 2477 | err = -ENOMEM; |
@@ -2233,8 +2505,10 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, | |||
2233 | * is used to keep track of method completion for idle railgating. The | 2505 | * is used to keep track of method completion for idle railgating. The |
2234 | * sync_pt/semaphore PB is added to the GPFIFO later on in submit. | 2506 | * sync_pt/semaphore PB is added to the GPFIFO later on in submit. |
2235 | */ | 2507 | */ |
2236 | job->incr_cmd = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL); | ||
2237 | job->post_fence = gk20a_alloc_fence(c); | 2508 | job->post_fence = gk20a_alloc_fence(c); |
2509 | if (!pre_alloc_enabled) | ||
2510 | job->incr_cmd = kzalloc(sizeof(struct priv_cmd_entry), | ||
2511 | GFP_KERNEL); | ||
2238 | 2512 | ||
2239 | if (!job->incr_cmd || !job->post_fence) { | 2513 | if (!job->incr_cmd || !job->post_fence) { |
2240 | err = -ENOMEM; | 2514 | err = -ENOMEM; |
@@ -2256,15 +2530,17 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, | |||
2256 | return 0; | 2530 | return 0; |
2257 | 2531 | ||
2258 | clean_up_post_fence: | 2532 | clean_up_post_fence: |
2259 | gk20a_free_priv_cmdbuf(c, job->incr_cmd); | ||
2260 | gk20a_fence_put(job->post_fence); | 2533 | gk20a_fence_put(job->post_fence); |
2261 | job->incr_cmd = NULL; | ||
2262 | job->post_fence = NULL; | 2534 | job->post_fence = NULL; |
2535 | free_priv_cmdbuf(c, job->incr_cmd); | ||
2536 | if (!pre_alloc_enabled) | ||
2537 | job->incr_cmd = NULL; | ||
2263 | clean_up_pre_fence: | 2538 | clean_up_pre_fence: |
2264 | gk20a_free_priv_cmdbuf(c, job->wait_cmd); | ||
2265 | gk20a_fence_put(job->pre_fence); | 2539 | gk20a_fence_put(job->pre_fence); |
2266 | job->wait_cmd = NULL; | ||
2267 | job->pre_fence = NULL; | 2540 | job->pre_fence = NULL; |
2541 | free_priv_cmdbuf(c, job->wait_cmd); | ||
2542 | if (!pre_alloc_enabled) | ||
2543 | job->wait_cmd = NULL; | ||
2268 | *wait_cmd = NULL; | 2544 | *wait_cmd = NULL; |
2269 | *pre_fence = NULL; | 2545 | *pre_fence = NULL; |
2270 | fail: | 2546 | fail: |
@@ -2388,11 +2664,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2388 | } | 2664 | } |
2389 | 2665 | ||
2390 | if (need_job_tracking) { | 2666 | if (need_job_tracking) { |
2391 | job = channel_gk20a_alloc_job(c); | 2667 | err = channel_gk20a_alloc_job(c, &job); |
2392 | if (!job) { | 2668 | if (err) |
2393 | err = -ENOMEM; | ||
2394 | goto clean_up; | 2669 | goto clean_up; |
2395 | } | ||
2396 | 2670 | ||
2397 | err = gk20a_submit_prepare_syncs(c, fence, job, | 2671 | err = gk20a_submit_prepare_syncs(c, fence, job, |
2398 | &wait_cmd, &incr_cmd, | 2672 | &wait_cmd, &incr_cmd, |
@@ -2463,13 +2737,14 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid) | |||
2463 | init_waitqueue_head(&c->ref_count_dec_wq); | 2737 | init_waitqueue_head(&c->ref_count_dec_wq); |
2464 | mutex_init(&c->ioctl_lock); | 2738 | mutex_init(&c->ioctl_lock); |
2465 | mutex_init(&c->error_notifier_mutex); | 2739 | mutex_init(&c->error_notifier_mutex); |
2466 | spin_lock_init(&c->jobs_lock); | 2740 | spin_lock_init(&c->joblist.dynamic.lock); |
2741 | mutex_init(&c->joblist.pre_alloc.read_lock); | ||
2467 | raw_spin_lock_init(&c->timeout.lock); | 2742 | raw_spin_lock_init(&c->timeout.lock); |
2468 | mutex_init(&c->sync_lock); | 2743 | mutex_init(&c->sync_lock); |
2469 | INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler); | 2744 | INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler); |
2470 | INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_jobs); | 2745 | INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_jobs); |
2471 | mutex_init(&c->clean_up.lock); | 2746 | mutex_init(&c->clean_up.lock); |
2472 | INIT_LIST_HEAD(&c->jobs); | 2747 | INIT_LIST_HEAD(&c->joblist.dynamic.jobs); |
2473 | #if defined(CONFIG_GK20A_CYCLE_STATS) | 2748 | #if defined(CONFIG_GK20A_CYCLE_STATS) |
2474 | mutex_init(&c->cyclestate.cyclestate_buffer_mutex); | 2749 | mutex_init(&c->cyclestate.cyclestate_buffer_mutex); |
2475 | mutex_init(&c->cs_client_mutex); | 2750 | mutex_init(&c->cs_client_mutex); |
@@ -3119,7 +3394,7 @@ long gk20a_channel_ioctl(struct file *filp, | |||
3119 | (struct nvgpu_free_obj_ctx_args *)buf); | 3394 | (struct nvgpu_free_obj_ctx_args *)buf); |
3120 | gk20a_idle(dev); | 3395 | gk20a_idle(dev); |
3121 | break; | 3396 | break; |
3122 | case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO: | 3397 | case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX: |
3123 | err = gk20a_busy(dev); | 3398 | err = gk20a_busy(dev); |
3124 | if (err) { | 3399 | if (err) { |
3125 | dev_err(dev, | 3400 | dev_err(dev, |
@@ -3128,9 +3403,34 @@ long gk20a_channel_ioctl(struct file *filp, | |||
3128 | break; | 3403 | break; |
3129 | } | 3404 | } |
3130 | err = gk20a_alloc_channel_gpfifo(ch, | 3405 | err = gk20a_alloc_channel_gpfifo(ch, |
3131 | (struct nvgpu_alloc_gpfifo_args *)buf); | 3406 | (struct nvgpu_alloc_gpfifo_ex_args *)buf); |
3407 | gk20a_idle(dev); | ||
3408 | break; | ||
3409 | case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO: | ||
3410 | { | ||
3411 | struct nvgpu_alloc_gpfifo_ex_args alloc_gpfifo_ex_args; | ||
3412 | struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args = | ||
3413 | (struct nvgpu_alloc_gpfifo_args *)buf; | ||
3414 | |||
3415 | err = gk20a_busy(dev); | ||
3416 | if (err) { | ||
3417 | dev_err(dev, | ||
3418 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
3419 | __func__, cmd); | ||
3420 | break; | ||
3421 | } | ||
3422 | |||
3423 | /* prepare new args structure */ | ||
3424 | memset(&alloc_gpfifo_ex_args, 0, | ||
3425 | sizeof(struct nvgpu_alloc_gpfifo_ex_args)); | ||
3426 | alloc_gpfifo_ex_args.num_entries = | ||
3427 | alloc_gpfifo_args->num_entries; | ||
3428 | alloc_gpfifo_ex_args.flags = alloc_gpfifo_args->flags; | ||
3429 | |||
3430 | err = gk20a_alloc_channel_gpfifo(ch, &alloc_gpfifo_ex_args); | ||
3132 | gk20a_idle(dev); | 3431 | gk20a_idle(dev); |
3133 | break; | 3432 | break; |
3433 | } | ||
3134 | case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO: | 3434 | case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO: |
3135 | err = gk20a_ioctl_channel_submit_gpfifo(ch, | 3435 | err = gk20a_ioctl_channel_submit_gpfifo(ch, |
3136 | (struct nvgpu_submit_gpfifo_args *)buf); | 3436 | (struct nvgpu_submit_gpfifo_args *)buf); |