summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
diff options
context:
space:
mode:
authorSachit Kadle <skadle@nvidia.com>2016-08-15 17:32:39 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2016-10-20 11:14:11 -0400
commit733fb79b39869665addcd80ccdf1c15f4a5aaa29 (patch)
treeb3c9c1ba32406ed9c2af4bebee820e83ea6172e4 /drivers/gpu/nvgpu/gk20a/channel_gk20a.c
parent63e8592e06939e20c7b9e56b430353ebbee31ad6 (diff)
gpu: nvgpu: add support for pre-allocated resources
Add support for pre-allocation of job tracking resources w/ new (extended) ioctl. Goal is to avoid dynamic memory allocation in the submit path. This patch does the following: 1) Intoduces a new ioctl, NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX, which enables pre-allocation of tracking resources per job: a) 2x priv_cmd_entry b) 2x gk20a_fence 2) Implements circular ring buffer for job tracking to avoid lock contention between producer (submitter) and consumer (clean-up) Bug 1795076 Change-Id: I6b52e5c575871107ff380f9a5790f440a6969347 Signed-off-by: Sachit Kadle <skadle@nvidia.com> Reviewed-on: http://git-master/r/1203300 (cherry picked from commit 9fd270c22b860935dffe244753dabd87454bef39) Reviewed-on: http://git-master/r/1223934 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c390
1 files changed, 345 insertions, 45 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 4019721a..cc3bbbd2 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -26,6 +26,7 @@
26#include <linux/anon_inodes.h> 26#include <linux/anon_inodes.h>
27#include <linux/dma-buf.h> 27#include <linux/dma-buf.h>
28#include <linux/vmalloc.h> 28#include <linux/vmalloc.h>
29#include <linux/circ_buf.h>
29 30
30#include "debug_gk20a.h" 31#include "debug_gk20a.h"
31#include "ctxsw_trace_gk20a.h" 32#include "ctxsw_trace_gk20a.h"
@@ -55,6 +56,15 @@ static void free_priv_cmdbuf(struct channel_gk20a *c,
55static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c); 56static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
56static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c); 57static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
57 58
59static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c);
60
61static void channel_gk20a_joblist_add(struct channel_gk20a *c,
62 struct channel_gk20a_job *job);
63static void channel_gk20a_joblist_delete(struct channel_gk20a *c,
64 struct channel_gk20a_job *job);
65static struct channel_gk20a_job *channel_gk20a_joblist_peek(
66 struct channel_gk20a *c);
67
58static int channel_gk20a_commit_userd(struct channel_gk20a *c); 68static int channel_gk20a_commit_userd(struct channel_gk20a *c);
59static int channel_gk20a_setup_userd(struct channel_gk20a *c); 69static int channel_gk20a_setup_userd(struct channel_gk20a *c);
60 70
@@ -460,6 +470,7 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
460{ 470{
461 struct channel_gk20a_job *job, *n; 471 struct channel_gk20a_job *job, *n;
462 bool released_job_semaphore = false; 472 bool released_job_semaphore = false;
473 bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(ch);
463 474
464 gk20a_channel_cancel_job_clean_up(ch, true); 475 gk20a_channel_cancel_job_clean_up(ch, true);
465 476
@@ -471,14 +482,37 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
471 482
472 /* release all job semaphores (applies only to jobs that use 483 /* release all job semaphores (applies only to jobs that use
473 semaphore synchronization) */ 484 semaphore synchronization) */
474 spin_lock(&ch->jobs_lock); 485 channel_gk20a_joblist_lock(ch);
475 list_for_each_entry_safe(job, n, &ch->jobs, list) { 486 if (pre_alloc_enabled) {
476 if (job->post_fence->semaphore) { 487 int tmp_get = ch->joblist.pre_alloc.get;
477 gk20a_semaphore_release(job->post_fence->semaphore); 488 int put = ch->joblist.pre_alloc.put;
478 released_job_semaphore = true; 489
490 /*
491 * ensure put is read before any subsequent reads.
492 * see corresponding wmb in gk20a_channel_add_job()
493 */
494 rmb();
495
496 while (tmp_get != put) {
497 job = &ch->joblist.pre_alloc.jobs[tmp_get];
498 if (job->post_fence->semaphore) {
499 gk20a_semaphore_release(
500 job->post_fence->semaphore);
501 released_job_semaphore = true;
502 }
503 tmp_get = (tmp_get + 1) % ch->joblist.pre_alloc.length;
504 }
505 } else {
506 list_for_each_entry_safe(job, n,
507 &ch->joblist.dynamic.jobs, list) {
508 if (job->post_fence->semaphore) {
509 gk20a_semaphore_release(
510 job->post_fence->semaphore);
511 released_job_semaphore = true;
512 }
479 } 513 }
480 } 514 }
481 spin_unlock(&ch->jobs_lock); 515 channel_gk20a_joblist_unlock(ch);
482 516
483 if (released_job_semaphore) 517 if (released_job_semaphore)
484 wake_up_interruptible_all(&ch->semaphore_wq); 518 wake_up_interruptible_all(&ch->semaphore_wq);
@@ -511,9 +545,9 @@ int gk20a_wait_channel_idle(struct channel_gk20a *ch)
511 msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g)); 545 msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
512 546
513 do { 547 do {
514 spin_lock(&ch->jobs_lock); 548 channel_gk20a_joblist_lock(ch);
515 channel_idle = list_empty(&ch->jobs); 549 channel_idle = channel_gk20a_joblist_is_empty(ch);
516 spin_unlock(&ch->jobs_lock); 550 channel_gk20a_joblist_unlock(ch);
517 if (channel_idle) 551 if (channel_idle)
518 break; 552 break;
519 553
@@ -1016,6 +1050,10 @@ unbind:
1016 1050
1017 mutex_unlock(&g->dbg_sessions_lock); 1051 mutex_unlock(&g->dbg_sessions_lock);
1018 1052
1053 /* free pre-allocated resources, if applicable */
1054 if (channel_gk20a_is_prealloc_enabled(ch))
1055 channel_gk20a_free_prealloc_resources(ch);
1056
1019 /* make sure we catch accesses of unopened channels in case 1057 /* make sure we catch accesses of unopened channels in case
1020 * there's non-refcounted channel pointers hanging around */ 1058 * there's non-refcounted channel pointers hanging around */
1021 ch->g = NULL; 1059 ch->g = NULL;
@@ -1422,7 +1460,10 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
1422 /* we already handled q->put + size > q->size so BUG_ON this */ 1460 /* we already handled q->put + size > q->size so BUG_ON this */
1423 BUG_ON(q->put > q->size); 1461 BUG_ON(q->put > q->size);
1424 1462
1425 /* commit the previous writes before making the entry valid */ 1463 /*
1464 * commit the previous writes before making the entry valid.
1465 * see the corresponding rmb() in gk20a_free_priv_cmdbuf().
1466 */
1426 wmb(); 1467 wmb();
1427 1468
1428 e->valid = true; 1469 e->valid = true;
@@ -1436,26 +1477,222 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
1436static void free_priv_cmdbuf(struct channel_gk20a *c, 1477static void free_priv_cmdbuf(struct channel_gk20a *c,
1437 struct priv_cmd_entry *e) 1478 struct priv_cmd_entry *e)
1438{ 1479{
1439 kfree(e); 1480 if (channel_gk20a_is_prealloc_enabled(c))
1481 memset(e, 0, sizeof(struct priv_cmd_entry));
1482 else
1483 kfree(e);
1484}
1485
1486static int channel_gk20a_alloc_job(struct channel_gk20a *c,
1487 struct channel_gk20a_job **job_out)
1488{
1489 int err = 0;
1490
1491 if (channel_gk20a_is_prealloc_enabled(c)) {
1492 int put = c->joblist.pre_alloc.put;
1493 int get = c->joblist.pre_alloc.get;
1494
1495 /*
1496 * ensure all subsequent reads happen after reading get.
1497 * see corresponding wmb in gk20a_channel_clean_up_jobs()
1498 */
1499 rmb();
1500
1501 if (CIRC_SPACE(put, get, c->joblist.pre_alloc.length))
1502 *job_out = &c->joblist.pre_alloc.jobs[put];
1503 else {
1504 gk20a_warn(dev_from_gk20a(c->g),
1505 "out of job ringbuffer space\n");
1506 err = -EAGAIN;
1507 }
1508 } else {
1509 *job_out = kzalloc(sizeof(struct channel_gk20a_job),
1510 GFP_KERNEL);
1511 if (!job_out)
1512 err = -ENOMEM;
1513 }
1514
1515 return err;
1516}
1517
1518static void channel_gk20a_free_job(struct channel_gk20a *c,
1519 struct channel_gk20a_job *job)
1520{
1521 /*
1522 * In case of pre_allocated jobs, we need to clean out
1523 * the job but maintain the pointers to the priv_cmd_entry,
1524 * since they're inherently tied to the job node.
1525 */
1526 if (channel_gk20a_is_prealloc_enabled(c)) {
1527 struct priv_cmd_entry *wait_cmd = job->wait_cmd;
1528 struct priv_cmd_entry *incr_cmd = job->incr_cmd;
1529 memset(job, 0, sizeof(*job));
1530 job->wait_cmd = wait_cmd;
1531 job->incr_cmd = incr_cmd;
1532 } else
1533 kfree(job);
1534}
1535
1536void channel_gk20a_joblist_lock(struct channel_gk20a *c)
1537{
1538 if (channel_gk20a_is_prealloc_enabled(c))
1539 mutex_lock(&c->joblist.pre_alloc.read_lock);
1540 else
1541 spin_lock(&c->joblist.dynamic.lock);
1440} 1542}
1441 1543
1442static struct channel_gk20a_job *channel_gk20a_alloc_job( 1544void channel_gk20a_joblist_unlock(struct channel_gk20a *c)
1545{
1546 if (channel_gk20a_is_prealloc_enabled(c))
1547 mutex_unlock(&c->joblist.pre_alloc.read_lock);
1548 else
1549 spin_unlock(&c->joblist.dynamic.lock);
1550}
1551
1552static struct channel_gk20a_job *channel_gk20a_joblist_peek(
1443 struct channel_gk20a *c) 1553 struct channel_gk20a *c)
1444{ 1554{
1555 int get;
1445 struct channel_gk20a_job *job = NULL; 1556 struct channel_gk20a_job *job = NULL;
1446 1557
1447 job = kzalloc(sizeof(*job), GFP_KERNEL); 1558 if (channel_gk20a_is_prealloc_enabled(c)) {
1559 if (!channel_gk20a_joblist_is_empty(c)) {
1560 get = c->joblist.pre_alloc.get;
1561 job = &c->joblist.pre_alloc.jobs[get];
1562 }
1563 } else {
1564 if (!list_empty(&c->joblist.dynamic.jobs))
1565 job = list_first_entry(&c->joblist.dynamic.jobs,
1566 struct channel_gk20a_job, list);
1567 }
1568
1448 return job; 1569 return job;
1449} 1570}
1450 1571
1451static void channel_gk20a_free_job(struct channel_gk20a *c, 1572static void channel_gk20a_joblist_add(struct channel_gk20a *c,
1452 struct channel_gk20a_job *job) 1573 struct channel_gk20a_job *job)
1453{ 1574{
1454 kfree(job); 1575 if (channel_gk20a_is_prealloc_enabled(c)) {
1576 c->joblist.pre_alloc.put = (c->joblist.pre_alloc.put + 1) %
1577 (c->joblist.pre_alloc.length);
1578 } else {
1579 list_add_tail(&job->list, &c->joblist.dynamic.jobs);
1580 }
1581}
1582
1583static void channel_gk20a_joblist_delete(struct channel_gk20a *c,
1584 struct channel_gk20a_job *job)
1585{
1586 if (channel_gk20a_is_prealloc_enabled(c)) {
1587 c->joblist.pre_alloc.get = (c->joblist.pre_alloc.get + 1) %
1588 (c->joblist.pre_alloc.length);
1589 } else {
1590 list_del_init(&job->list);
1591 }
1592}
1593
1594bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c)
1595{
1596 if (channel_gk20a_is_prealloc_enabled(c)) {
1597 int get = c->joblist.pre_alloc.get;
1598 int put = c->joblist.pre_alloc.put;
1599 return !(CIRC_CNT(put, get, c->joblist.pre_alloc.length));
1600 }
1601
1602 return list_empty(&c->joblist.dynamic.jobs);
1603}
1604
1605bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c)
1606{
1607 bool pre_alloc_enabled = c->joblist.pre_alloc.enabled;
1608
1609 rmb();
1610 return pre_alloc_enabled;
1611}
1612
1613static int channel_gk20a_prealloc_resources(struct channel_gk20a *c,
1614 unsigned int num_jobs)
1615{
1616 int i, err;
1617 size_t size;
1618 struct priv_cmd_entry *entries = NULL;
1619
1620 if (channel_gk20a_is_prealloc_enabled(c) || !num_jobs)
1621 return -EINVAL;
1622
1623 /*
1624 * pre-allocate the job list.
1625 * since vmalloc take in an unsigned long, we need
1626 * to make sure we don't hit an overflow condition
1627 */
1628 size = sizeof(struct channel_gk20a_job);
1629 if (num_jobs <= ULONG_MAX / size)
1630 c->joblist.pre_alloc.jobs = vzalloc(num_jobs * size);
1631 if (!c->joblist.pre_alloc.jobs) {
1632 err = -ENOMEM;
1633 goto clean_up;
1634 }
1635
1636 /*
1637 * pre-allocate 2x priv_cmd_entry for each job up front.
1638 * since vmalloc take in an unsigned long, we need
1639 * to make sure we don't hit an overflow condition
1640 */
1641 size = sizeof(struct priv_cmd_entry);
1642 if (num_jobs <= ULONG_MAX / (size << 1))
1643 entries = vzalloc((num_jobs << 1) * size);
1644 if (!entries) {
1645 err = -ENOMEM;
1646 goto clean_up_joblist;
1647 }
1648
1649 for (i = 0; i < num_jobs; i++) {
1650 c->joblist.pre_alloc.jobs[i].wait_cmd = &entries[i];
1651 c->joblist.pre_alloc.jobs[i].incr_cmd =
1652 &entries[i + num_jobs];
1653 }
1654
1655 /* pre-allocate a fence pool */
1656 err = gk20a_alloc_fence_pool(c, num_jobs);
1657 if (err)
1658 goto clean_up_priv_cmd;
1659
1660 c->joblist.pre_alloc.length = num_jobs;
1661
1662 /*
1663 * commit the previous writes before setting the flag.
1664 * see corresponding rmb in channel_gk20a_is_prealloc_enabled()
1665 */
1666 wmb();
1667 c->joblist.pre_alloc.enabled = true;
1668
1669 return 0;
1670
1671clean_up_priv_cmd:
1672 vfree(entries);
1673clean_up_joblist:
1674 vfree(c->joblist.pre_alloc.jobs);
1675clean_up:
1676 memset(&c->joblist.pre_alloc, 0, sizeof(c->joblist.pre_alloc));
1677 return err;
1678}
1679
1680static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c)
1681{
1682 vfree(c->joblist.pre_alloc.jobs[0].wait_cmd);
1683 vfree(c->joblist.pre_alloc.jobs);
1684 gk20a_free_fence_pool(c);
1685
1686 /*
1687 * commit the previous writes before disabling the flag.
1688 * see corresponding rmb in channel_gk20a_is_prealloc_enabled()
1689 */
1690 wmb();
1691 c->joblist.pre_alloc.enabled = false;
1455} 1692}
1456 1693
1457int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, 1694int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1458 struct nvgpu_alloc_gpfifo_args *args) 1695 struct nvgpu_alloc_gpfifo_ex_args *args)
1459{ 1696{
1460 struct gk20a *g = c->g; 1697 struct gk20a *g = c->g;
1461 struct device *d = dev_from_gk20a(g); 1698 struct device *d = dev_from_gk20a(g);
@@ -1539,19 +1776,30 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1539 1776
1540 /* TBD: setup engine contexts */ 1777 /* TBD: setup engine contexts */
1541 1778
1779 if (args->num_inflight_jobs) {
1780 err = channel_gk20a_prealloc_resources(c,
1781 args->num_inflight_jobs);
1782 if (err)
1783 goto clean_up_sync;
1784 }
1785
1542 err = channel_gk20a_alloc_priv_cmdbuf(c); 1786 err = channel_gk20a_alloc_priv_cmdbuf(c);
1543 if (err) 1787 if (err)
1544 goto clean_up_sync; 1788 goto clean_up_prealloc;
1545 1789
1546 err = channel_gk20a_update_runlist(c, true); 1790 err = channel_gk20a_update_runlist(c, true);
1547 if (err) 1791 if (err)
1548 goto clean_up_sync; 1792 goto clean_up_priv_cmd;
1549 1793
1550 g->ops.fifo.bind_channel(c); 1794 g->ops.fifo.bind_channel(c);
1551 1795
1552 gk20a_dbg_fn("done"); 1796 gk20a_dbg_fn("done");
1553 return 0; 1797 return 0;
1554 1798
1799clean_up_priv_cmd:
1800 channel_gk20a_free_priv_cmdbuf(c);
1801clean_up_prealloc:
1802 channel_gk20a_free_prealloc_resources(c);
1555clean_up_sync: 1803clean_up_sync:
1556 gk20a_channel_sync_destroy(c->sync); 1804 gk20a_channel_sync_destroy(c->sync);
1557 c->sync = NULL; 1805 c->sync = NULL;
@@ -1878,6 +2126,7 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
1878 struct vm_gk20a *vm = c->vm; 2126 struct vm_gk20a *vm = c->vm;
1879 struct mapped_buffer_node **mapped_buffers = NULL; 2127 struct mapped_buffer_node **mapped_buffers = NULL;
1880 int err = 0, num_mapped_buffers = 0; 2128 int err = 0, num_mapped_buffers = 0;
2129 bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
1881 2130
1882 /* job needs reference to this vm (released in channel_update) */ 2131 /* job needs reference to this vm (released in channel_update) */
1883 gk20a_vm_get(vm); 2132 gk20a_vm_get(vm);
@@ -1898,9 +2147,19 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
1898 2147
1899 gk20a_channel_timeout_start(c, job); 2148 gk20a_channel_timeout_start(c, job);
1900 2149
1901 spin_lock(&c->jobs_lock); 2150 if (!pre_alloc_enabled)
1902 list_add_tail(&job->list, &c->jobs); 2151 channel_gk20a_joblist_lock(c);
1903 spin_unlock(&c->jobs_lock); 2152
2153 /*
2154 * ensure all pending write complete before adding to the list.
2155 * see corresponding rmb in gk20a_channel_clean_up_jobs() &
2156 * gk20a_channel_abort_clean_up()
2157 */
2158 wmb();
2159 channel_gk20a_joblist_add(c, job);
2160
2161 if (!pre_alloc_enabled)
2162 channel_gk20a_joblist_unlock(c);
1904 } else { 2163 } else {
1905 err = -ETIMEDOUT; 2164 err = -ETIMEDOUT;
1906 goto err_put_buffers; 2165 goto err_put_buffers;
@@ -1945,14 +2204,20 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work)
1945 while (1) { 2204 while (1) {
1946 bool completed; 2205 bool completed;
1947 2206
1948 spin_lock(&c->jobs_lock); 2207 channel_gk20a_joblist_lock(c);
1949 if (list_empty(&c->jobs)) { 2208 if (channel_gk20a_joblist_is_empty(c)) {
1950 spin_unlock(&c->jobs_lock); 2209 channel_gk20a_joblist_unlock(c);
1951 break; 2210 break;
1952 } 2211 }
1953 job = list_first_entry(&c->jobs, 2212
1954 struct channel_gk20a_job, list); 2213 /*
1955 spin_unlock(&c->jobs_lock); 2214 * ensure that all subsequent reads occur after checking
2215 * that we have a valid node. see corresponding wmb in
2216 * gk20a_channel_add_job().
2217 */
2218 rmb();
2219 job = channel_gk20a_joblist_peek(c);
2220 channel_gk20a_joblist_unlock(c);
1956 2221
1957 completed = gk20a_fence_is_expired(job->post_fence); 2222 completed = gk20a_fence_is_expired(job->post_fence);
1958 if (!completed) { 2223 if (!completed) {
@@ -1998,9 +2263,14 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work)
1998 * so this wouldn't get freed here. */ 2263 * so this wouldn't get freed here. */
1999 gk20a_channel_put(c); 2264 gk20a_channel_put(c);
2000 2265
2001 spin_lock(&c->jobs_lock); 2266 /*
2002 list_del_init(&job->list); 2267 * ensure all pending writes complete before deleting the node.
2003 spin_unlock(&c->jobs_lock); 2268 * see corresponding rmb in channel_gk20a_alloc_job().
2269 */
2270 wmb();
2271 channel_gk20a_joblist_lock(c);
2272 channel_gk20a_joblist_delete(c, job);
2273 channel_gk20a_joblist_unlock(c);
2004 2274
2005 channel_gk20a_free_job(c, job); 2275 channel_gk20a_free_job(c, job);
2006 job_finished = 1; 2276 job_finished = 1;
@@ -2160,6 +2430,7 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
2160 int wait_fence_fd = -1; 2430 int wait_fence_fd = -1;
2161 int err = 0; 2431 int err = 0;
2162 bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI); 2432 bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
2433 bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
2163 2434
2164 /* 2435 /*
2165 * If user wants to always allocate sync_fence_fds then respect that; 2436 * If user wants to always allocate sync_fence_fds then respect that;
@@ -2197,9 +2468,10 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
2197 * this condition. 2468 * this condition.
2198 */ 2469 */
2199 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) { 2470 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
2200 job->wait_cmd = kzalloc(sizeof(struct priv_cmd_entry),
2201 GFP_KERNEL);
2202 job->pre_fence = gk20a_alloc_fence(c); 2471 job->pre_fence = gk20a_alloc_fence(c);
2472 if (!pre_alloc_enabled)
2473 job->wait_cmd = kzalloc(sizeof(struct priv_cmd_entry),
2474 GFP_KERNEL);
2203 2475
2204 if (!job->wait_cmd || !job->pre_fence) { 2476 if (!job->wait_cmd || !job->pre_fence) {
2205 err = -ENOMEM; 2477 err = -ENOMEM;
@@ -2233,8 +2505,10 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
2233 * is used to keep track of method completion for idle railgating. The 2505 * is used to keep track of method completion for idle railgating. The
2234 * sync_pt/semaphore PB is added to the GPFIFO later on in submit. 2506 * sync_pt/semaphore PB is added to the GPFIFO later on in submit.
2235 */ 2507 */
2236 job->incr_cmd = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
2237 job->post_fence = gk20a_alloc_fence(c); 2508 job->post_fence = gk20a_alloc_fence(c);
2509 if (!pre_alloc_enabled)
2510 job->incr_cmd = kzalloc(sizeof(struct priv_cmd_entry),
2511 GFP_KERNEL);
2238 2512
2239 if (!job->incr_cmd || !job->post_fence) { 2513 if (!job->incr_cmd || !job->post_fence) {
2240 err = -ENOMEM; 2514 err = -ENOMEM;
@@ -2256,15 +2530,17 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
2256 return 0; 2530 return 0;
2257 2531
2258clean_up_post_fence: 2532clean_up_post_fence:
2259 gk20a_free_priv_cmdbuf(c, job->incr_cmd);
2260 gk20a_fence_put(job->post_fence); 2533 gk20a_fence_put(job->post_fence);
2261 job->incr_cmd = NULL;
2262 job->post_fence = NULL; 2534 job->post_fence = NULL;
2535 free_priv_cmdbuf(c, job->incr_cmd);
2536 if (!pre_alloc_enabled)
2537 job->incr_cmd = NULL;
2263clean_up_pre_fence: 2538clean_up_pre_fence:
2264 gk20a_free_priv_cmdbuf(c, job->wait_cmd);
2265 gk20a_fence_put(job->pre_fence); 2539 gk20a_fence_put(job->pre_fence);
2266 job->wait_cmd = NULL;
2267 job->pre_fence = NULL; 2540 job->pre_fence = NULL;
2541 free_priv_cmdbuf(c, job->wait_cmd);
2542 if (!pre_alloc_enabled)
2543 job->wait_cmd = NULL;
2268 *wait_cmd = NULL; 2544 *wait_cmd = NULL;
2269 *pre_fence = NULL; 2545 *pre_fence = NULL;
2270fail: 2546fail:
@@ -2388,11 +2664,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2388 } 2664 }
2389 2665
2390 if (need_job_tracking) { 2666 if (need_job_tracking) {
2391 job = channel_gk20a_alloc_job(c); 2667 err = channel_gk20a_alloc_job(c, &job);
2392 if (!job) { 2668 if (err)
2393 err = -ENOMEM;
2394 goto clean_up; 2669 goto clean_up;
2395 }
2396 2670
2397 err = gk20a_submit_prepare_syncs(c, fence, job, 2671 err = gk20a_submit_prepare_syncs(c, fence, job,
2398 &wait_cmd, &incr_cmd, 2672 &wait_cmd, &incr_cmd,
@@ -2463,13 +2737,14 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
2463 init_waitqueue_head(&c->ref_count_dec_wq); 2737 init_waitqueue_head(&c->ref_count_dec_wq);
2464 mutex_init(&c->ioctl_lock); 2738 mutex_init(&c->ioctl_lock);
2465 mutex_init(&c->error_notifier_mutex); 2739 mutex_init(&c->error_notifier_mutex);
2466 spin_lock_init(&c->jobs_lock); 2740 spin_lock_init(&c->joblist.dynamic.lock);
2741 mutex_init(&c->joblist.pre_alloc.read_lock);
2467 raw_spin_lock_init(&c->timeout.lock); 2742 raw_spin_lock_init(&c->timeout.lock);
2468 mutex_init(&c->sync_lock); 2743 mutex_init(&c->sync_lock);
2469 INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler); 2744 INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler);
2470 INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_jobs); 2745 INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_jobs);
2471 mutex_init(&c->clean_up.lock); 2746 mutex_init(&c->clean_up.lock);
2472 INIT_LIST_HEAD(&c->jobs); 2747 INIT_LIST_HEAD(&c->joblist.dynamic.jobs);
2473#if defined(CONFIG_GK20A_CYCLE_STATS) 2748#if defined(CONFIG_GK20A_CYCLE_STATS)
2474 mutex_init(&c->cyclestate.cyclestate_buffer_mutex); 2749 mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
2475 mutex_init(&c->cs_client_mutex); 2750 mutex_init(&c->cs_client_mutex);
@@ -3119,7 +3394,7 @@ long gk20a_channel_ioctl(struct file *filp,
3119 (struct nvgpu_free_obj_ctx_args *)buf); 3394 (struct nvgpu_free_obj_ctx_args *)buf);
3120 gk20a_idle(dev); 3395 gk20a_idle(dev);
3121 break; 3396 break;
3122 case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO: 3397 case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX:
3123 err = gk20a_busy(dev); 3398 err = gk20a_busy(dev);
3124 if (err) { 3399 if (err) {
3125 dev_err(dev, 3400 dev_err(dev,
@@ -3128,9 +3403,34 @@ long gk20a_channel_ioctl(struct file *filp,
3128 break; 3403 break;
3129 } 3404 }
3130 err = gk20a_alloc_channel_gpfifo(ch, 3405 err = gk20a_alloc_channel_gpfifo(ch,
3131 (struct nvgpu_alloc_gpfifo_args *)buf); 3406 (struct nvgpu_alloc_gpfifo_ex_args *)buf);
3407 gk20a_idle(dev);
3408 break;
3409 case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO:
3410 {
3411 struct nvgpu_alloc_gpfifo_ex_args alloc_gpfifo_ex_args;
3412 struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args =
3413 (struct nvgpu_alloc_gpfifo_args *)buf;
3414
3415 err = gk20a_busy(dev);
3416 if (err) {
3417 dev_err(dev,
3418 "%s: failed to host gk20a for ioctl cmd: 0x%x",
3419 __func__, cmd);
3420 break;
3421 }
3422
3423 /* prepare new args structure */
3424 memset(&alloc_gpfifo_ex_args, 0,
3425 sizeof(struct nvgpu_alloc_gpfifo_ex_args));
3426 alloc_gpfifo_ex_args.num_entries =
3427 alloc_gpfifo_args->num_entries;
3428 alloc_gpfifo_ex_args.flags = alloc_gpfifo_args->flags;
3429
3430 err = gk20a_alloc_channel_gpfifo(ch, &alloc_gpfifo_ex_args);
3132 gk20a_idle(dev); 3431 gk20a_idle(dev);
3133 break; 3432 break;
3433 }
3134 case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO: 3434 case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO:
3135 err = gk20a_ioctl_channel_submit_gpfifo(ch, 3435 err = gk20a_ioctl_channel_submit_gpfifo(ch,
3136 (struct nvgpu_submit_gpfifo_args *)buf); 3436 (struct nvgpu_submit_gpfifo_args *)buf);