diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/cde_gk20a.c | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 390 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 28 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fence_gk20a.c | 70 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fence_gk20a.h | 15 |
6 files changed, 454 insertions, 65 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index ca785b19..17453489 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c | |||
@@ -1126,9 +1126,9 @@ __releases(&cde_app->mutex) | |||
1126 | struct gk20a_cde_app *cde_app = &g->cde_app; | 1126 | struct gk20a_cde_app *cde_app = &g->cde_app; |
1127 | bool channel_idle; | 1127 | bool channel_idle; |
1128 | 1128 | ||
1129 | spin_lock(&ch->jobs_lock); | 1129 | channel_gk20a_joblist_lock(ch); |
1130 | channel_idle = list_empty(&ch->jobs); | 1130 | channel_idle = channel_gk20a_joblist_is_empty(ch); |
1131 | spin_unlock(&ch->jobs_lock); | 1131 | channel_gk20a_joblist_unlock(ch); |
1132 | 1132 | ||
1133 | if (!channel_idle) | 1133 | if (!channel_idle) |
1134 | return; | 1134 | return; |
@@ -1207,7 +1207,7 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) | |||
1207 | 1207 | ||
1208 | /* allocate gpfifo (1024 should be more than enough) */ | 1208 | /* allocate gpfifo (1024 should be more than enough) */ |
1209 | err = gk20a_alloc_channel_gpfifo(ch, | 1209 | err = gk20a_alloc_channel_gpfifo(ch, |
1210 | &(struct nvgpu_alloc_gpfifo_args){1024, 0}); | 1210 | &(struct nvgpu_alloc_gpfifo_ex_args){1024, 0, 0, {}}); |
1211 | if (err) { | 1211 | if (err) { |
1212 | gk20a_warn(cde_ctx->dev, "cde: unable to allocate gpfifo"); | 1212 | gk20a_warn(cde_ctx->dev, "cde: unable to allocate gpfifo"); |
1213 | goto err_alloc_gpfifo; | 1213 | goto err_alloc_gpfifo; |
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c index 109ec240..bfd183fb 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c | |||
@@ -126,9 +126,9 @@ static void gk20a_ce_finished_ctx_cb(struct channel_gk20a *ch, void *data) | |||
126 | bool channel_idle; | 126 | bool channel_idle; |
127 | u32 event; | 127 | u32 event; |
128 | 128 | ||
129 | spin_lock(&ch->jobs_lock); | 129 | channel_gk20a_joblist_lock(ch); |
130 | channel_idle = list_empty(&ch->jobs); | 130 | channel_idle = channel_gk20a_joblist_is_empty(ch); |
131 | spin_unlock(&ch->jobs_lock); | 131 | channel_gk20a_joblist_unlock(ch); |
132 | 132 | ||
133 | if (!channel_idle) | 133 | if (!channel_idle) |
134 | return; | 134 | return; |
@@ -462,7 +462,7 @@ u32 gk20a_ce_create_context_with_cb(struct device *dev, | |||
462 | 462 | ||
463 | /* allocate gpfifo (1024 should be more than enough) */ | 463 | /* allocate gpfifo (1024 should be more than enough) */ |
464 | err = gk20a_alloc_channel_gpfifo(ce_ctx->ch, | 464 | err = gk20a_alloc_channel_gpfifo(ce_ctx->ch, |
465 | &(struct nvgpu_alloc_gpfifo_args){1024, 0}); | 465 | &(struct nvgpu_alloc_gpfifo_ex_args){1024, 0, 0, {}}); |
466 | if (err) { | 466 | if (err) { |
467 | gk20a_err(ce_ctx->dev, "ce: unable to allocate gpfifo"); | 467 | gk20a_err(ce_ctx->dev, "ce: unable to allocate gpfifo"); |
468 | goto end; | 468 | goto end; |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 4019721a..cc3bbbd2 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/anon_inodes.h> | 26 | #include <linux/anon_inodes.h> |
27 | #include <linux/dma-buf.h> | 27 | #include <linux/dma-buf.h> |
28 | #include <linux/vmalloc.h> | 28 | #include <linux/vmalloc.h> |
29 | #include <linux/circ_buf.h> | ||
29 | 30 | ||
30 | #include "debug_gk20a.h" | 31 | #include "debug_gk20a.h" |
31 | #include "ctxsw_trace_gk20a.h" | 32 | #include "ctxsw_trace_gk20a.h" |
@@ -55,6 +56,15 @@ static void free_priv_cmdbuf(struct channel_gk20a *c, | |||
55 | static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c); | 56 | static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c); |
56 | static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c); | 57 | static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c); |
57 | 58 | ||
59 | static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c); | ||
60 | |||
61 | static void channel_gk20a_joblist_add(struct channel_gk20a *c, | ||
62 | struct channel_gk20a_job *job); | ||
63 | static void channel_gk20a_joblist_delete(struct channel_gk20a *c, | ||
64 | struct channel_gk20a_job *job); | ||
65 | static struct channel_gk20a_job *channel_gk20a_joblist_peek( | ||
66 | struct channel_gk20a *c); | ||
67 | |||
58 | static int channel_gk20a_commit_userd(struct channel_gk20a *c); | 68 | static int channel_gk20a_commit_userd(struct channel_gk20a *c); |
59 | static int channel_gk20a_setup_userd(struct channel_gk20a *c); | 69 | static int channel_gk20a_setup_userd(struct channel_gk20a *c); |
60 | 70 | ||
@@ -460,6 +470,7 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch) | |||
460 | { | 470 | { |
461 | struct channel_gk20a_job *job, *n; | 471 | struct channel_gk20a_job *job, *n; |
462 | bool released_job_semaphore = false; | 472 | bool released_job_semaphore = false; |
473 | bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(ch); | ||
463 | 474 | ||
464 | gk20a_channel_cancel_job_clean_up(ch, true); | 475 | gk20a_channel_cancel_job_clean_up(ch, true); |
465 | 476 | ||
@@ -471,14 +482,37 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch) | |||
471 | 482 | ||
472 | /* release all job semaphores (applies only to jobs that use | 483 | /* release all job semaphores (applies only to jobs that use |
473 | semaphore synchronization) */ | 484 | semaphore synchronization) */ |
474 | spin_lock(&ch->jobs_lock); | 485 | channel_gk20a_joblist_lock(ch); |
475 | list_for_each_entry_safe(job, n, &ch->jobs, list) { | 486 | if (pre_alloc_enabled) { |
476 | if (job->post_fence->semaphore) { | 487 | int tmp_get = ch->joblist.pre_alloc.get; |
477 | gk20a_semaphore_release(job->post_fence->semaphore); | 488 | int put = ch->joblist.pre_alloc.put; |
478 | released_job_semaphore = true; | 489 | |
490 | /* | ||
491 | * ensure put is read before any subsequent reads. | ||
492 | * see corresponding wmb in gk20a_channel_add_job() | ||
493 | */ | ||
494 | rmb(); | ||
495 | |||
496 | while (tmp_get != put) { | ||
497 | job = &ch->joblist.pre_alloc.jobs[tmp_get]; | ||
498 | if (job->post_fence->semaphore) { | ||
499 | gk20a_semaphore_release( | ||
500 | job->post_fence->semaphore); | ||
501 | released_job_semaphore = true; | ||
502 | } | ||
503 | tmp_get = (tmp_get + 1) % ch->joblist.pre_alloc.length; | ||
504 | } | ||
505 | } else { | ||
506 | list_for_each_entry_safe(job, n, | ||
507 | &ch->joblist.dynamic.jobs, list) { | ||
508 | if (job->post_fence->semaphore) { | ||
509 | gk20a_semaphore_release( | ||
510 | job->post_fence->semaphore); | ||
511 | released_job_semaphore = true; | ||
512 | } | ||
479 | } | 513 | } |
480 | } | 514 | } |
481 | spin_unlock(&ch->jobs_lock); | 515 | channel_gk20a_joblist_unlock(ch); |
482 | 516 | ||
483 | if (released_job_semaphore) | 517 | if (released_job_semaphore) |
484 | wake_up_interruptible_all(&ch->semaphore_wq); | 518 | wake_up_interruptible_all(&ch->semaphore_wq); |
@@ -511,9 +545,9 @@ int gk20a_wait_channel_idle(struct channel_gk20a *ch) | |||
511 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g)); | 545 | msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g)); |
512 | 546 | ||
513 | do { | 547 | do { |
514 | spin_lock(&ch->jobs_lock); | 548 | channel_gk20a_joblist_lock(ch); |
515 | channel_idle = list_empty(&ch->jobs); | 549 | channel_idle = channel_gk20a_joblist_is_empty(ch); |
516 | spin_unlock(&ch->jobs_lock); | 550 | channel_gk20a_joblist_unlock(ch); |
517 | if (channel_idle) | 551 | if (channel_idle) |
518 | break; | 552 | break; |
519 | 553 | ||
@@ -1016,6 +1050,10 @@ unbind: | |||
1016 | 1050 | ||
1017 | mutex_unlock(&g->dbg_sessions_lock); | 1051 | mutex_unlock(&g->dbg_sessions_lock); |
1018 | 1052 | ||
1053 | /* free pre-allocated resources, if applicable */ | ||
1054 | if (channel_gk20a_is_prealloc_enabled(ch)) | ||
1055 | channel_gk20a_free_prealloc_resources(ch); | ||
1056 | |||
1019 | /* make sure we catch accesses of unopened channels in case | 1057 | /* make sure we catch accesses of unopened channels in case |
1020 | * there's non-refcounted channel pointers hanging around */ | 1058 | * there's non-refcounted channel pointers hanging around */ |
1021 | ch->g = NULL; | 1059 | ch->g = NULL; |
@@ -1422,7 +1460,10 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, | |||
1422 | /* we already handled q->put + size > q->size so BUG_ON this */ | 1460 | /* we already handled q->put + size > q->size so BUG_ON this */ |
1423 | BUG_ON(q->put > q->size); | 1461 | BUG_ON(q->put > q->size); |
1424 | 1462 | ||
1425 | /* commit the previous writes before making the entry valid */ | 1463 | /* |
1464 | * commit the previous writes before making the entry valid. | ||
1465 | * see the corresponding rmb() in gk20a_free_priv_cmdbuf(). | ||
1466 | */ | ||
1426 | wmb(); | 1467 | wmb(); |
1427 | 1468 | ||
1428 | e->valid = true; | 1469 | e->valid = true; |
@@ -1436,26 +1477,222 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, | |||
1436 | static void free_priv_cmdbuf(struct channel_gk20a *c, | 1477 | static void free_priv_cmdbuf(struct channel_gk20a *c, |
1437 | struct priv_cmd_entry *e) | 1478 | struct priv_cmd_entry *e) |
1438 | { | 1479 | { |
1439 | kfree(e); | 1480 | if (channel_gk20a_is_prealloc_enabled(c)) |
1481 | memset(e, 0, sizeof(struct priv_cmd_entry)); | ||
1482 | else | ||
1483 | kfree(e); | ||
1484 | } | ||
1485 | |||
1486 | static int channel_gk20a_alloc_job(struct channel_gk20a *c, | ||
1487 | struct channel_gk20a_job **job_out) | ||
1488 | { | ||
1489 | int err = 0; | ||
1490 | |||
1491 | if (channel_gk20a_is_prealloc_enabled(c)) { | ||
1492 | int put = c->joblist.pre_alloc.put; | ||
1493 | int get = c->joblist.pre_alloc.get; | ||
1494 | |||
1495 | /* | ||
1496 | * ensure all subsequent reads happen after reading get. | ||
1497 | * see corresponding wmb in gk20a_channel_clean_up_jobs() | ||
1498 | */ | ||
1499 | rmb(); | ||
1500 | |||
1501 | if (CIRC_SPACE(put, get, c->joblist.pre_alloc.length)) | ||
1502 | *job_out = &c->joblist.pre_alloc.jobs[put]; | ||
1503 | else { | ||
1504 | gk20a_warn(dev_from_gk20a(c->g), | ||
1505 | "out of job ringbuffer space\n"); | ||
1506 | err = -EAGAIN; | ||
1507 | } | ||
1508 | } else { | ||
1509 | *job_out = kzalloc(sizeof(struct channel_gk20a_job), | ||
1510 | GFP_KERNEL); | ||
1511 | if (!job_out) | ||
1512 | err = -ENOMEM; | ||
1513 | } | ||
1514 | |||
1515 | return err; | ||
1516 | } | ||
1517 | |||
1518 | static void channel_gk20a_free_job(struct channel_gk20a *c, | ||
1519 | struct channel_gk20a_job *job) | ||
1520 | { | ||
1521 | /* | ||
1522 | * In case of pre_allocated jobs, we need to clean out | ||
1523 | * the job but maintain the pointers to the priv_cmd_entry, | ||
1524 | * since they're inherently tied to the job node. | ||
1525 | */ | ||
1526 | if (channel_gk20a_is_prealloc_enabled(c)) { | ||
1527 | struct priv_cmd_entry *wait_cmd = job->wait_cmd; | ||
1528 | struct priv_cmd_entry *incr_cmd = job->incr_cmd; | ||
1529 | memset(job, 0, sizeof(*job)); | ||
1530 | job->wait_cmd = wait_cmd; | ||
1531 | job->incr_cmd = incr_cmd; | ||
1532 | } else | ||
1533 | kfree(job); | ||
1534 | } | ||
1535 | |||
1536 | void channel_gk20a_joblist_lock(struct channel_gk20a *c) | ||
1537 | { | ||
1538 | if (channel_gk20a_is_prealloc_enabled(c)) | ||
1539 | mutex_lock(&c->joblist.pre_alloc.read_lock); | ||
1540 | else | ||
1541 | spin_lock(&c->joblist.dynamic.lock); | ||
1440 | } | 1542 | } |
1441 | 1543 | ||
1442 | static struct channel_gk20a_job *channel_gk20a_alloc_job( | 1544 | void channel_gk20a_joblist_unlock(struct channel_gk20a *c) |
1545 | { | ||
1546 | if (channel_gk20a_is_prealloc_enabled(c)) | ||
1547 | mutex_unlock(&c->joblist.pre_alloc.read_lock); | ||
1548 | else | ||
1549 | spin_unlock(&c->joblist.dynamic.lock); | ||
1550 | } | ||
1551 | |||
1552 | static struct channel_gk20a_job *channel_gk20a_joblist_peek( | ||
1443 | struct channel_gk20a *c) | 1553 | struct channel_gk20a *c) |
1444 | { | 1554 | { |
1555 | int get; | ||
1445 | struct channel_gk20a_job *job = NULL; | 1556 | struct channel_gk20a_job *job = NULL; |
1446 | 1557 | ||
1447 | job = kzalloc(sizeof(*job), GFP_KERNEL); | 1558 | if (channel_gk20a_is_prealloc_enabled(c)) { |
1559 | if (!channel_gk20a_joblist_is_empty(c)) { | ||
1560 | get = c->joblist.pre_alloc.get; | ||
1561 | job = &c->joblist.pre_alloc.jobs[get]; | ||
1562 | } | ||
1563 | } else { | ||
1564 | if (!list_empty(&c->joblist.dynamic.jobs)) | ||
1565 | job = list_first_entry(&c->joblist.dynamic.jobs, | ||
1566 | struct channel_gk20a_job, list); | ||
1567 | } | ||
1568 | |||
1448 | return job; | 1569 | return job; |
1449 | } | 1570 | } |
1450 | 1571 | ||
1451 | static void channel_gk20a_free_job(struct channel_gk20a *c, | 1572 | static void channel_gk20a_joblist_add(struct channel_gk20a *c, |
1452 | struct channel_gk20a_job *job) | 1573 | struct channel_gk20a_job *job) |
1453 | { | 1574 | { |
1454 | kfree(job); | 1575 | if (channel_gk20a_is_prealloc_enabled(c)) { |
1576 | c->joblist.pre_alloc.put = (c->joblist.pre_alloc.put + 1) % | ||
1577 | (c->joblist.pre_alloc.length); | ||
1578 | } else { | ||
1579 | list_add_tail(&job->list, &c->joblist.dynamic.jobs); | ||
1580 | } | ||
1581 | } | ||
1582 | |||
1583 | static void channel_gk20a_joblist_delete(struct channel_gk20a *c, | ||
1584 | struct channel_gk20a_job *job) | ||
1585 | { | ||
1586 | if (channel_gk20a_is_prealloc_enabled(c)) { | ||
1587 | c->joblist.pre_alloc.get = (c->joblist.pre_alloc.get + 1) % | ||
1588 | (c->joblist.pre_alloc.length); | ||
1589 | } else { | ||
1590 | list_del_init(&job->list); | ||
1591 | } | ||
1592 | } | ||
1593 | |||
1594 | bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c) | ||
1595 | { | ||
1596 | if (channel_gk20a_is_prealloc_enabled(c)) { | ||
1597 | int get = c->joblist.pre_alloc.get; | ||
1598 | int put = c->joblist.pre_alloc.put; | ||
1599 | return !(CIRC_CNT(put, get, c->joblist.pre_alloc.length)); | ||
1600 | } | ||
1601 | |||
1602 | return list_empty(&c->joblist.dynamic.jobs); | ||
1603 | } | ||
1604 | |||
1605 | bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c) | ||
1606 | { | ||
1607 | bool pre_alloc_enabled = c->joblist.pre_alloc.enabled; | ||
1608 | |||
1609 | rmb(); | ||
1610 | return pre_alloc_enabled; | ||
1611 | } | ||
1612 | |||
1613 | static int channel_gk20a_prealloc_resources(struct channel_gk20a *c, | ||
1614 | unsigned int num_jobs) | ||
1615 | { | ||
1616 | int i, err; | ||
1617 | size_t size; | ||
1618 | struct priv_cmd_entry *entries = NULL; | ||
1619 | |||
1620 | if (channel_gk20a_is_prealloc_enabled(c) || !num_jobs) | ||
1621 | return -EINVAL; | ||
1622 | |||
1623 | /* | ||
1624 | * pre-allocate the job list. | ||
1625 | * since vmalloc take in an unsigned long, we need | ||
1626 | * to make sure we don't hit an overflow condition | ||
1627 | */ | ||
1628 | size = sizeof(struct channel_gk20a_job); | ||
1629 | if (num_jobs <= ULONG_MAX / size) | ||
1630 | c->joblist.pre_alloc.jobs = vzalloc(num_jobs * size); | ||
1631 | if (!c->joblist.pre_alloc.jobs) { | ||
1632 | err = -ENOMEM; | ||
1633 | goto clean_up; | ||
1634 | } | ||
1635 | |||
1636 | /* | ||
1637 | * pre-allocate 2x priv_cmd_entry for each job up front. | ||
1638 | * since vmalloc take in an unsigned long, we need | ||
1639 | * to make sure we don't hit an overflow condition | ||
1640 | */ | ||
1641 | size = sizeof(struct priv_cmd_entry); | ||
1642 | if (num_jobs <= ULONG_MAX / (size << 1)) | ||
1643 | entries = vzalloc((num_jobs << 1) * size); | ||
1644 | if (!entries) { | ||
1645 | err = -ENOMEM; | ||
1646 | goto clean_up_joblist; | ||
1647 | } | ||
1648 | |||
1649 | for (i = 0; i < num_jobs; i++) { | ||
1650 | c->joblist.pre_alloc.jobs[i].wait_cmd = &entries[i]; | ||
1651 | c->joblist.pre_alloc.jobs[i].incr_cmd = | ||
1652 | &entries[i + num_jobs]; | ||
1653 | } | ||
1654 | |||
1655 | /* pre-allocate a fence pool */ | ||
1656 | err = gk20a_alloc_fence_pool(c, num_jobs); | ||
1657 | if (err) | ||
1658 | goto clean_up_priv_cmd; | ||
1659 | |||
1660 | c->joblist.pre_alloc.length = num_jobs; | ||
1661 | |||
1662 | /* | ||
1663 | * commit the previous writes before setting the flag. | ||
1664 | * see corresponding rmb in channel_gk20a_is_prealloc_enabled() | ||
1665 | */ | ||
1666 | wmb(); | ||
1667 | c->joblist.pre_alloc.enabled = true; | ||
1668 | |||
1669 | return 0; | ||
1670 | |||
1671 | clean_up_priv_cmd: | ||
1672 | vfree(entries); | ||
1673 | clean_up_joblist: | ||
1674 | vfree(c->joblist.pre_alloc.jobs); | ||
1675 | clean_up: | ||
1676 | memset(&c->joblist.pre_alloc, 0, sizeof(c->joblist.pre_alloc)); | ||
1677 | return err; | ||
1678 | } | ||
1679 | |||
1680 | static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c) | ||
1681 | { | ||
1682 | vfree(c->joblist.pre_alloc.jobs[0].wait_cmd); | ||
1683 | vfree(c->joblist.pre_alloc.jobs); | ||
1684 | gk20a_free_fence_pool(c); | ||
1685 | |||
1686 | /* | ||
1687 | * commit the previous writes before disabling the flag. | ||
1688 | * see corresponding rmb in channel_gk20a_is_prealloc_enabled() | ||
1689 | */ | ||
1690 | wmb(); | ||
1691 | c->joblist.pre_alloc.enabled = false; | ||
1455 | } | 1692 | } |
1456 | 1693 | ||
1457 | int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, | 1694 | int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, |
1458 | struct nvgpu_alloc_gpfifo_args *args) | 1695 | struct nvgpu_alloc_gpfifo_ex_args *args) |
1459 | { | 1696 | { |
1460 | struct gk20a *g = c->g; | 1697 | struct gk20a *g = c->g; |
1461 | struct device *d = dev_from_gk20a(g); | 1698 | struct device *d = dev_from_gk20a(g); |
@@ -1539,19 +1776,30 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, | |||
1539 | 1776 | ||
1540 | /* TBD: setup engine contexts */ | 1777 | /* TBD: setup engine contexts */ |
1541 | 1778 | ||
1779 | if (args->num_inflight_jobs) { | ||
1780 | err = channel_gk20a_prealloc_resources(c, | ||
1781 | args->num_inflight_jobs); | ||
1782 | if (err) | ||
1783 | goto clean_up_sync; | ||
1784 | } | ||
1785 | |||
1542 | err = channel_gk20a_alloc_priv_cmdbuf(c); | 1786 | err = channel_gk20a_alloc_priv_cmdbuf(c); |
1543 | if (err) | 1787 | if (err) |
1544 | goto clean_up_sync; | 1788 | goto clean_up_prealloc; |
1545 | 1789 | ||
1546 | err = channel_gk20a_update_runlist(c, true); | 1790 | err = channel_gk20a_update_runlist(c, true); |
1547 | if (err) | 1791 | if (err) |
1548 | goto clean_up_sync; | 1792 | goto clean_up_priv_cmd; |
1549 | 1793 | ||
1550 | g->ops.fifo.bind_channel(c); | 1794 | g->ops.fifo.bind_channel(c); |
1551 | 1795 | ||
1552 | gk20a_dbg_fn("done"); | 1796 | gk20a_dbg_fn("done"); |
1553 | return 0; | 1797 | return 0; |
1554 | 1798 | ||
1799 | clean_up_priv_cmd: | ||
1800 | channel_gk20a_free_priv_cmdbuf(c); | ||
1801 | clean_up_prealloc: | ||
1802 | channel_gk20a_free_prealloc_resources(c); | ||
1555 | clean_up_sync: | 1803 | clean_up_sync: |
1556 | gk20a_channel_sync_destroy(c->sync); | 1804 | gk20a_channel_sync_destroy(c->sync); |
1557 | c->sync = NULL; | 1805 | c->sync = NULL; |
@@ -1878,6 +2126,7 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, | |||
1878 | struct vm_gk20a *vm = c->vm; | 2126 | struct vm_gk20a *vm = c->vm; |
1879 | struct mapped_buffer_node **mapped_buffers = NULL; | 2127 | struct mapped_buffer_node **mapped_buffers = NULL; |
1880 | int err = 0, num_mapped_buffers = 0; | 2128 | int err = 0, num_mapped_buffers = 0; |
2129 | bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c); | ||
1881 | 2130 | ||
1882 | /* job needs reference to this vm (released in channel_update) */ | 2131 | /* job needs reference to this vm (released in channel_update) */ |
1883 | gk20a_vm_get(vm); | 2132 | gk20a_vm_get(vm); |
@@ -1898,9 +2147,19 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, | |||
1898 | 2147 | ||
1899 | gk20a_channel_timeout_start(c, job); | 2148 | gk20a_channel_timeout_start(c, job); |
1900 | 2149 | ||
1901 | spin_lock(&c->jobs_lock); | 2150 | if (!pre_alloc_enabled) |
1902 | list_add_tail(&job->list, &c->jobs); | 2151 | channel_gk20a_joblist_lock(c); |
1903 | spin_unlock(&c->jobs_lock); | 2152 | |
2153 | /* | ||
2154 | * ensure all pending write complete before adding to the list. | ||
2155 | * see corresponding rmb in gk20a_channel_clean_up_jobs() & | ||
2156 | * gk20a_channel_abort_clean_up() | ||
2157 | */ | ||
2158 | wmb(); | ||
2159 | channel_gk20a_joblist_add(c, job); | ||
2160 | |||
2161 | if (!pre_alloc_enabled) | ||
2162 | channel_gk20a_joblist_unlock(c); | ||
1904 | } else { | 2163 | } else { |
1905 | err = -ETIMEDOUT; | 2164 | err = -ETIMEDOUT; |
1906 | goto err_put_buffers; | 2165 | goto err_put_buffers; |
@@ -1945,14 +2204,20 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work) | |||
1945 | while (1) { | 2204 | while (1) { |
1946 | bool completed; | 2205 | bool completed; |
1947 | 2206 | ||
1948 | spin_lock(&c->jobs_lock); | 2207 | channel_gk20a_joblist_lock(c); |
1949 | if (list_empty(&c->jobs)) { | 2208 | if (channel_gk20a_joblist_is_empty(c)) { |
1950 | spin_unlock(&c->jobs_lock); | 2209 | channel_gk20a_joblist_unlock(c); |
1951 | break; | 2210 | break; |
1952 | } | 2211 | } |
1953 | job = list_first_entry(&c->jobs, | 2212 | |
1954 | struct channel_gk20a_job, list); | 2213 | /* |
1955 | spin_unlock(&c->jobs_lock); | 2214 | * ensure that all subsequent reads occur after checking |
2215 | * that we have a valid node. see corresponding wmb in | ||
2216 | * gk20a_channel_add_job(). | ||
2217 | */ | ||
2218 | rmb(); | ||
2219 | job = channel_gk20a_joblist_peek(c); | ||
2220 | channel_gk20a_joblist_unlock(c); | ||
1956 | 2221 | ||
1957 | completed = gk20a_fence_is_expired(job->post_fence); | 2222 | completed = gk20a_fence_is_expired(job->post_fence); |
1958 | if (!completed) { | 2223 | if (!completed) { |
@@ -1998,9 +2263,14 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work) | |||
1998 | * so this wouldn't get freed here. */ | 2263 | * so this wouldn't get freed here. */ |
1999 | gk20a_channel_put(c); | 2264 | gk20a_channel_put(c); |
2000 | 2265 | ||
2001 | spin_lock(&c->jobs_lock); | 2266 | /* |
2002 | list_del_init(&job->list); | 2267 | * ensure all pending writes complete before deleting the node. |
2003 | spin_unlock(&c->jobs_lock); | 2268 | * see corresponding rmb in channel_gk20a_alloc_job(). |
2269 | */ | ||
2270 | wmb(); | ||
2271 | channel_gk20a_joblist_lock(c); | ||
2272 | channel_gk20a_joblist_delete(c, job); | ||
2273 | channel_gk20a_joblist_unlock(c); | ||
2004 | 2274 | ||
2005 | channel_gk20a_free_job(c, job); | 2275 | channel_gk20a_free_job(c, job); |
2006 | job_finished = 1; | 2276 | job_finished = 1; |
@@ -2160,6 +2430,7 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, | |||
2160 | int wait_fence_fd = -1; | 2430 | int wait_fence_fd = -1; |
2161 | int err = 0; | 2431 | int err = 0; |
2162 | bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI); | 2432 | bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI); |
2433 | bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c); | ||
2163 | 2434 | ||
2164 | /* | 2435 | /* |
2165 | * If user wants to always allocate sync_fence_fds then respect that; | 2436 | * If user wants to always allocate sync_fence_fds then respect that; |
@@ -2197,9 +2468,10 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, | |||
2197 | * this condition. | 2468 | * this condition. |
2198 | */ | 2469 | */ |
2199 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) { | 2470 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) { |
2200 | job->wait_cmd = kzalloc(sizeof(struct priv_cmd_entry), | ||
2201 | GFP_KERNEL); | ||
2202 | job->pre_fence = gk20a_alloc_fence(c); | 2471 | job->pre_fence = gk20a_alloc_fence(c); |
2472 | if (!pre_alloc_enabled) | ||
2473 | job->wait_cmd = kzalloc(sizeof(struct priv_cmd_entry), | ||
2474 | GFP_KERNEL); | ||
2203 | 2475 | ||
2204 | if (!job->wait_cmd || !job->pre_fence) { | 2476 | if (!job->wait_cmd || !job->pre_fence) { |
2205 | err = -ENOMEM; | 2477 | err = -ENOMEM; |
@@ -2233,8 +2505,10 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, | |||
2233 | * is used to keep track of method completion for idle railgating. The | 2505 | * is used to keep track of method completion for idle railgating. The |
2234 | * sync_pt/semaphore PB is added to the GPFIFO later on in submit. | 2506 | * sync_pt/semaphore PB is added to the GPFIFO later on in submit. |
2235 | */ | 2507 | */ |
2236 | job->incr_cmd = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL); | ||
2237 | job->post_fence = gk20a_alloc_fence(c); | 2508 | job->post_fence = gk20a_alloc_fence(c); |
2509 | if (!pre_alloc_enabled) | ||
2510 | job->incr_cmd = kzalloc(sizeof(struct priv_cmd_entry), | ||
2511 | GFP_KERNEL); | ||
2238 | 2512 | ||
2239 | if (!job->incr_cmd || !job->post_fence) { | 2513 | if (!job->incr_cmd || !job->post_fence) { |
2240 | err = -ENOMEM; | 2514 | err = -ENOMEM; |
@@ -2256,15 +2530,17 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, | |||
2256 | return 0; | 2530 | return 0; |
2257 | 2531 | ||
2258 | clean_up_post_fence: | 2532 | clean_up_post_fence: |
2259 | gk20a_free_priv_cmdbuf(c, job->incr_cmd); | ||
2260 | gk20a_fence_put(job->post_fence); | 2533 | gk20a_fence_put(job->post_fence); |
2261 | job->incr_cmd = NULL; | ||
2262 | job->post_fence = NULL; | 2534 | job->post_fence = NULL; |
2535 | free_priv_cmdbuf(c, job->incr_cmd); | ||
2536 | if (!pre_alloc_enabled) | ||
2537 | job->incr_cmd = NULL; | ||
2263 | clean_up_pre_fence: | 2538 | clean_up_pre_fence: |
2264 | gk20a_free_priv_cmdbuf(c, job->wait_cmd); | ||
2265 | gk20a_fence_put(job->pre_fence); | 2539 | gk20a_fence_put(job->pre_fence); |
2266 | job->wait_cmd = NULL; | ||
2267 | job->pre_fence = NULL; | 2540 | job->pre_fence = NULL; |
2541 | free_priv_cmdbuf(c, job->wait_cmd); | ||
2542 | if (!pre_alloc_enabled) | ||
2543 | job->wait_cmd = NULL; | ||
2268 | *wait_cmd = NULL; | 2544 | *wait_cmd = NULL; |
2269 | *pre_fence = NULL; | 2545 | *pre_fence = NULL; |
2270 | fail: | 2546 | fail: |
@@ -2388,11 +2664,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2388 | } | 2664 | } |
2389 | 2665 | ||
2390 | if (need_job_tracking) { | 2666 | if (need_job_tracking) { |
2391 | job = channel_gk20a_alloc_job(c); | 2667 | err = channel_gk20a_alloc_job(c, &job); |
2392 | if (!job) { | 2668 | if (err) |
2393 | err = -ENOMEM; | ||
2394 | goto clean_up; | 2669 | goto clean_up; |
2395 | } | ||
2396 | 2670 | ||
2397 | err = gk20a_submit_prepare_syncs(c, fence, job, | 2671 | err = gk20a_submit_prepare_syncs(c, fence, job, |
2398 | &wait_cmd, &incr_cmd, | 2672 | &wait_cmd, &incr_cmd, |
@@ -2463,13 +2737,14 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid) | |||
2463 | init_waitqueue_head(&c->ref_count_dec_wq); | 2737 | init_waitqueue_head(&c->ref_count_dec_wq); |
2464 | mutex_init(&c->ioctl_lock); | 2738 | mutex_init(&c->ioctl_lock); |
2465 | mutex_init(&c->error_notifier_mutex); | 2739 | mutex_init(&c->error_notifier_mutex); |
2466 | spin_lock_init(&c->jobs_lock); | 2740 | spin_lock_init(&c->joblist.dynamic.lock); |
2741 | mutex_init(&c->joblist.pre_alloc.read_lock); | ||
2467 | raw_spin_lock_init(&c->timeout.lock); | 2742 | raw_spin_lock_init(&c->timeout.lock); |
2468 | mutex_init(&c->sync_lock); | 2743 | mutex_init(&c->sync_lock); |
2469 | INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler); | 2744 | INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler); |
2470 | INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_jobs); | 2745 | INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_jobs); |
2471 | mutex_init(&c->clean_up.lock); | 2746 | mutex_init(&c->clean_up.lock); |
2472 | INIT_LIST_HEAD(&c->jobs); | 2747 | INIT_LIST_HEAD(&c->joblist.dynamic.jobs); |
2473 | #if defined(CONFIG_GK20A_CYCLE_STATS) | 2748 | #if defined(CONFIG_GK20A_CYCLE_STATS) |
2474 | mutex_init(&c->cyclestate.cyclestate_buffer_mutex); | 2749 | mutex_init(&c->cyclestate.cyclestate_buffer_mutex); |
2475 | mutex_init(&c->cs_client_mutex); | 2750 | mutex_init(&c->cs_client_mutex); |
@@ -3119,7 +3394,7 @@ long gk20a_channel_ioctl(struct file *filp, | |||
3119 | (struct nvgpu_free_obj_ctx_args *)buf); | 3394 | (struct nvgpu_free_obj_ctx_args *)buf); |
3120 | gk20a_idle(dev); | 3395 | gk20a_idle(dev); |
3121 | break; | 3396 | break; |
3122 | case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO: | 3397 | case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX: |
3123 | err = gk20a_busy(dev); | 3398 | err = gk20a_busy(dev); |
3124 | if (err) { | 3399 | if (err) { |
3125 | dev_err(dev, | 3400 | dev_err(dev, |
@@ -3128,9 +3403,34 @@ long gk20a_channel_ioctl(struct file *filp, | |||
3128 | break; | 3403 | break; |
3129 | } | 3404 | } |
3130 | err = gk20a_alloc_channel_gpfifo(ch, | 3405 | err = gk20a_alloc_channel_gpfifo(ch, |
3131 | (struct nvgpu_alloc_gpfifo_args *)buf); | 3406 | (struct nvgpu_alloc_gpfifo_ex_args *)buf); |
3407 | gk20a_idle(dev); | ||
3408 | break; | ||
3409 | case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO: | ||
3410 | { | ||
3411 | struct nvgpu_alloc_gpfifo_ex_args alloc_gpfifo_ex_args; | ||
3412 | struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args = | ||
3413 | (struct nvgpu_alloc_gpfifo_args *)buf; | ||
3414 | |||
3415 | err = gk20a_busy(dev); | ||
3416 | if (err) { | ||
3417 | dev_err(dev, | ||
3418 | "%s: failed to host gk20a for ioctl cmd: 0x%x", | ||
3419 | __func__, cmd); | ||
3420 | break; | ||
3421 | } | ||
3422 | |||
3423 | /* prepare new args structure */ | ||
3424 | memset(&alloc_gpfifo_ex_args, 0, | ||
3425 | sizeof(struct nvgpu_alloc_gpfifo_ex_args)); | ||
3426 | alloc_gpfifo_ex_args.num_entries = | ||
3427 | alloc_gpfifo_args->num_entries; | ||
3428 | alloc_gpfifo_ex_args.flags = alloc_gpfifo_args->flags; | ||
3429 | |||
3430 | err = gk20a_alloc_channel_gpfifo(ch, &alloc_gpfifo_ex_args); | ||
3132 | gk20a_idle(dev); | 3431 | gk20a_idle(dev); |
3133 | break; | 3432 | break; |
3433 | } | ||
3134 | case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO: | 3434 | case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO: |
3135 | err = gk20a_ioctl_channel_submit_gpfifo(ch, | 3435 | err = gk20a_ioctl_channel_submit_gpfifo(ch, |
3136 | (struct nvgpu_submit_gpfifo_args *)buf); | 3436 | (struct nvgpu_submit_gpfifo_args *)buf); |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 0d8746b8..8cceb6b2 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -70,6 +70,22 @@ struct channel_gk20a_job { | |||
70 | struct list_head list; | 70 | struct list_head list; |
71 | }; | 71 | }; |
72 | 72 | ||
73 | struct channel_gk20a_joblist { | ||
74 | struct { | ||
75 | bool enabled; | ||
76 | unsigned int length; | ||
77 | unsigned int put; | ||
78 | unsigned int get; | ||
79 | struct channel_gk20a_job *jobs; | ||
80 | struct mutex read_lock; | ||
81 | } pre_alloc; | ||
82 | |||
83 | struct { | ||
84 | struct list_head jobs; | ||
85 | spinlock_t lock; | ||
86 | } dynamic; | ||
87 | }; | ||
88 | |||
73 | struct channel_gk20a_timeout { | 89 | struct channel_gk20a_timeout { |
74 | struct delayed_work wq; | 90 | struct delayed_work wq; |
75 | raw_spinlock_t lock; | 91 | raw_spinlock_t lock; |
@@ -115,6 +131,7 @@ struct channel_gk20a { | |||
115 | bool bound; | 131 | bool bound; |
116 | bool first_init; | 132 | bool first_init; |
117 | bool vpr; | 133 | bool vpr; |
134 | bool no_block; | ||
118 | bool cde; | 135 | bool cde; |
119 | pid_t pid; | 136 | pid_t pid; |
120 | pid_t tgid; | 137 | pid_t tgid; |
@@ -123,8 +140,8 @@ struct channel_gk20a { | |||
123 | int tsgid; | 140 | int tsgid; |
124 | struct list_head ch_entry; /* channel's entry in TSG */ | 141 | struct list_head ch_entry; /* channel's entry in TSG */ |
125 | 142 | ||
126 | struct list_head jobs; | 143 | struct channel_gk20a_joblist joblist; |
127 | spinlock_t jobs_lock; | 144 | struct gk20a_allocator fence_allocator; |
128 | 145 | ||
129 | struct vm_gk20a *vm; | 146 | struct vm_gk20a *vm; |
130 | 147 | ||
@@ -272,7 +289,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
272 | bool force_need_sync_fence); | 289 | bool force_need_sync_fence); |
273 | 290 | ||
274 | int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, | 291 | int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, |
275 | struct nvgpu_alloc_gpfifo_args *args); | 292 | struct nvgpu_alloc_gpfifo_ex_args *args); |
276 | 293 | ||
277 | void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a); | 294 | void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a); |
278 | void channel_gk20a_disable(struct channel_gk20a *ch); | 295 | void channel_gk20a_disable(struct channel_gk20a *ch); |
@@ -284,6 +301,11 @@ int channel_gk20a_setup_ramfc(struct channel_gk20a *c, | |||
284 | void channel_gk20a_enable(struct channel_gk20a *ch); | 301 | void channel_gk20a_enable(struct channel_gk20a *ch); |
285 | void gk20a_channel_timeout_restart_all_channels(struct gk20a *g); | 302 | void gk20a_channel_timeout_restart_all_channels(struct gk20a *g); |
286 | 303 | ||
304 | bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c); | ||
305 | void channel_gk20a_joblist_lock(struct channel_gk20a *c); | ||
306 | void channel_gk20a_joblist_unlock(struct channel_gk20a *c); | ||
307 | bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c); | ||
308 | |||
287 | int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, | 309 | int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, |
288 | int timeslice_period, | 310 | int timeslice_period, |
289 | int *__timeslice_timeout, int *__timeslice_scale); | 311 | int *__timeslice_timeout, int *__timeslice_scale); |
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c index f788829f..c11d363e 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c | |||
@@ -47,7 +47,12 @@ static void gk20a_fence_free(struct kref *ref) | |||
47 | #endif | 47 | #endif |
48 | if (f->semaphore) | 48 | if (f->semaphore) |
49 | gk20a_semaphore_put(f->semaphore); | 49 | gk20a_semaphore_put(f->semaphore); |
50 | kfree(f); | 50 | |
51 | if (f->allocator) { | ||
52 | if (gk20a_alloc_initialized(f->allocator)) | ||
53 | gk20a_free(f->allocator, (u64)f); | ||
54 | } else | ||
55 | kfree(f); | ||
51 | } | 56 | } |
52 | 57 | ||
53 | void gk20a_fence_put(struct gk20a_fence *f) | 58 | void gk20a_fence_put(struct gk20a_fence *f) |
@@ -109,15 +114,66 @@ int gk20a_fence_install_fd(struct gk20a_fence *f) | |||
109 | #endif | 114 | #endif |
110 | } | 115 | } |
111 | 116 | ||
112 | struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c) | 117 | int gk20a_alloc_fence_pool(struct channel_gk20a *c, int count) |
118 | { | ||
119 | int err; | ||
120 | size_t size; | ||
121 | struct gk20a_fence *fence_pool = NULL; | ||
122 | |||
123 | size = sizeof(struct gk20a_fence); | ||
124 | if (count <= ULONG_MAX / size) { | ||
125 | size = count * size; | ||
126 | fence_pool = vzalloc(size); | ||
127 | } | ||
128 | |||
129 | if (!fence_pool) | ||
130 | return -ENOMEM; | ||
131 | |||
132 | err = gk20a_lockless_allocator_init(&c->fence_allocator, | ||
133 | "fence_pool", (u64)fence_pool, size, | ||
134 | sizeof(struct gk20a_fence), 0); | ||
135 | if (err) | ||
136 | goto fail; | ||
137 | |||
138 | return 0; | ||
139 | |||
140 | fail: | ||
141 | vfree(fence_pool); | ||
142 | return err; | ||
143 | } | ||
144 | |||
145 | void gk20a_free_fence_pool(struct channel_gk20a *c) | ||
113 | { | 146 | { |
114 | struct gk20a_fence *fence; | 147 | if (gk20a_alloc_initialized(&c->fence_allocator)) { |
148 | void *base = (void *)gk20a_alloc_base(&c->fence_allocator); | ||
149 | |||
150 | gk20a_alloc_destroy(&c->fence_allocator); | ||
151 | vfree(base); | ||
152 | } | ||
153 | } | ||
115 | 154 | ||
116 | fence = kzalloc(sizeof(struct gk20a_fence), GFP_KERNEL); | 155 | struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c) |
117 | if (!fence) | 156 | { |
118 | return NULL; | 157 | struct gk20a_fence *fence = NULL; |
158 | |||
159 | if (channel_gk20a_is_prealloc_enabled(c)) { | ||
160 | if (gk20a_alloc_initialized(&c->fence_allocator)) { | ||
161 | fence = (struct gk20a_fence *) | ||
162 | gk20a_alloc(&c->fence_allocator, | ||
163 | sizeof(struct gk20a_fence)); | ||
164 | |||
165 | /* clear the node and reset the allocator pointer */ | ||
166 | if (fence) { | ||
167 | memset(fence, 0, sizeof(*fence)); | ||
168 | fence->allocator = &c->fence_allocator; | ||
169 | } | ||
170 | } | ||
171 | } else | ||
172 | fence = kzalloc(sizeof(struct gk20a_fence), GFP_KERNEL); | ||
173 | |||
174 | if (fence) | ||
175 | kref_init(&fence->ref); | ||
119 | 176 | ||
120 | kref_init(&fence->ref); | ||
121 | return fence; | 177 | return fence; |
122 | } | 178 | } |
123 | 179 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h index 3fe2d8b2..97a7d957 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h | |||
@@ -3,7 +3,7 @@ | |||
3 | * | 3 | * |
4 | * GK20A Fences | 4 | * GK20A Fences |
5 | * | 5 | * |
6 | * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. | 6 | * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. |
7 | * | 7 | * |
8 | * This program is free software; you can redistribute it and/or modify it | 8 | * This program is free software; you can redistribute it and/or modify it |
9 | * under the terms and conditions of the GNU General Public License, | 9 | * under the terms and conditions of the GNU General Public License, |
@@ -45,6 +45,9 @@ struct gk20a_fence { | |||
45 | struct platform_device *host1x_pdev; | 45 | struct platform_device *host1x_pdev; |
46 | u32 syncpt_id; | 46 | u32 syncpt_id; |
47 | u32 syncpt_value; | 47 | u32 syncpt_value; |
48 | |||
49 | /* Valid for fences part of a pre-allocated fence pool */ | ||
50 | struct gk20a_allocator *allocator; | ||
48 | }; | 51 | }; |
49 | 52 | ||
50 | /* Fences can be created from semaphores or syncpoint (id, value) pairs */ | 53 | /* Fences can be created from semaphores or syncpoint (id, value) pairs */ |
@@ -62,7 +65,15 @@ int gk20a_fence_from_syncpt( | |||
62 | u32 id, u32 value, bool wfi, | 65 | u32 id, u32 value, bool wfi, |
63 | bool need_sync_fence); | 66 | bool need_sync_fence); |
64 | 67 | ||
65 | struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c); | 68 | int gk20a_alloc_fence_pool( |
69 | struct channel_gk20a *c, | ||
70 | int size); | ||
71 | |||
72 | void gk20a_free_fence_pool( | ||
73 | struct channel_gk20a *c); | ||
74 | |||
75 | struct gk20a_fence *gk20a_alloc_fence( | ||
76 | struct channel_gk20a *c); | ||
66 | 77 | ||
67 | void gk20a_init_fence(struct gk20a_fence *f, | 78 | void gk20a_init_fence(struct gk20a_fence *f, |
68 | const struct gk20a_fence_ops *ops, | 79 | const struct gk20a_fence_ops *ops, |