diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 359 |
1 files changed, 153 insertions, 206 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 4e7c36ee..e7e6662a 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -97,22 +97,18 @@ int gr_gk20a_get_ctx_id(struct gk20a *g, | |||
97 | u32 *ctx_id) | 97 | u32 *ctx_id) |
98 | { | 98 | { |
99 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 99 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; |
100 | void *ctx_ptr = NULL; | ||
101 | 100 | ||
102 | /* Channel gr_ctx buffer is gpu cacheable. | 101 | /* Channel gr_ctx buffer is gpu cacheable. |
103 | Flush and invalidate before cpu update. */ | 102 | Flush and invalidate before cpu update. */ |
104 | g->ops.mm.l2_flush(g, true); | 103 | g->ops.mm.l2_flush(g, true); |
105 | 104 | ||
106 | ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, | 105 | if (gk20a_mem_begin(g, &ch_ctx->gr_ctx->mem)) |
107 | PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, | ||
108 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
109 | if (!ctx_ptr) | ||
110 | return -ENOMEM; | 106 | return -ENOMEM; |
111 | 107 | ||
112 | *ctx_id = gk20a_mem_rd32(ctx_ptr + | 108 | *ctx_id = gk20a_mem_rd(g, &ch_ctx->gr_ctx->mem, |
113 | ctxsw_prog_main_image_context_id_o(), 0); | 109 | ctxsw_prog_main_image_context_id_o()); |
114 | 110 | ||
115 | vunmap(ctx_ptr); | 111 | gk20a_mem_end(g, &ch_ctx->gr_ctx->mem); |
116 | 112 | ||
117 | return 0; | 113 | return 0; |
118 | } | 114 | } |
@@ -619,22 +615,17 @@ static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va) | |||
619 | { | 615 | { |
620 | u32 addr_lo; | 616 | u32 addr_lo; |
621 | u32 addr_hi; | 617 | u32 addr_hi; |
622 | void *inst_ptr = NULL; | ||
623 | 618 | ||
624 | gk20a_dbg_fn(""); | 619 | gk20a_dbg_fn(""); |
625 | 620 | ||
626 | inst_ptr = c->inst_block.cpu_va; | ||
627 | if (!inst_ptr) | ||
628 | return -ENOMEM; | ||
629 | |||
630 | addr_lo = u64_lo32(gpu_va) >> 12; | 621 | addr_lo = u64_lo32(gpu_va) >> 12; |
631 | addr_hi = u64_hi32(gpu_va); | 622 | addr_hi = u64_hi32(gpu_va); |
632 | 623 | ||
633 | gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_target_w(), | 624 | gk20a_mem_wr32(c->g, &c->inst_block, ram_in_gr_wfi_target_w(), |
634 | ram_in_gr_cs_wfi_f() | ram_in_gr_wfi_mode_virtual_f() | | 625 | ram_in_gr_cs_wfi_f() | ram_in_gr_wfi_mode_virtual_f() | |
635 | ram_in_gr_wfi_ptr_lo_f(addr_lo)); | 626 | ram_in_gr_wfi_ptr_lo_f(addr_lo)); |
636 | 627 | ||
637 | gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_ptr_hi_w(), | 628 | gk20a_mem_wr32(c->g, &c->inst_block, ram_in_gr_wfi_ptr_hi_w(), |
638 | ram_in_gr_wfi_ptr_hi_f(addr_hi)); | 629 | ram_in_gr_wfi_ptr_hi_f(addr_hi)); |
639 | 630 | ||
640 | return 0; | 631 | return 0; |
@@ -658,11 +649,7 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, | |||
658 | return -EBUSY; | 649 | return -EBUSY; |
659 | } | 650 | } |
660 | 651 | ||
661 | ch_ctx->patch_ctx.mem.cpu_va = vmap(ch_ctx->patch_ctx.mem.pages, | 652 | if (gk20a_mem_begin(g, &ch_ctx->patch_ctx.mem)) |
662 | PAGE_ALIGN(ch_ctx->patch_ctx.mem.size) >> PAGE_SHIFT, | ||
663 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
664 | |||
665 | if (!ch_ctx->patch_ctx.mem.cpu_va) | ||
666 | return -ENOMEM; | 653 | return -ENOMEM; |
667 | 654 | ||
668 | return 0; | 655 | return 0; |
@@ -677,8 +664,7 @@ int gr_gk20a_ctx_patch_write_end(struct gk20a *g, | |||
677 | return -EINVAL; | 664 | return -EINVAL; |
678 | } | 665 | } |
679 | 666 | ||
680 | vunmap(ch_ctx->patch_ctx.mem.cpu_va); | 667 | gk20a_mem_end(g, &ch_ctx->patch_ctx.mem); |
681 | ch_ctx->patch_ctx.mem.cpu_va = NULL; | ||
682 | return 0; | 668 | return 0; |
683 | } | 669 | } |
684 | 670 | ||
@@ -687,7 +673,6 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g, | |||
687 | u32 addr, u32 data, bool patch) | 673 | u32 addr, u32 data, bool patch) |
688 | { | 674 | { |
689 | u32 patch_slot = 0; | 675 | u32 patch_slot = 0; |
690 | void *patch_ptr = NULL; | ||
691 | bool mapped_here = false; | 676 | bool mapped_here = false; |
692 | 677 | ||
693 | BUG_ON(patch != 0 && ch_ctx == NULL); | 678 | BUG_ON(patch != 0 && ch_ctx == NULL); |
@@ -708,11 +693,10 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g, | |||
708 | } else | 693 | } else |
709 | mapped_here = false; | 694 | mapped_here = false; |
710 | 695 | ||
711 | patch_ptr = ch_ctx->patch_ctx.mem.cpu_va; | ||
712 | patch_slot = ch_ctx->patch_ctx.data_count * 2; | 696 | patch_slot = ch_ctx->patch_ctx.data_count * 2; |
713 | 697 | ||
714 | gk20a_mem_wr32(patch_ptr, patch_slot++, addr); | 698 | gk20a_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot++, addr); |
715 | gk20a_mem_wr32(patch_ptr, patch_slot++, data); | 699 | gk20a_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot++, data); |
716 | 700 | ||
717 | ch_ctx->patch_ctx.data_count++; | 701 | ch_ctx->patch_ctx.data_count++; |
718 | 702 | ||
@@ -760,16 +744,13 @@ static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, | |||
760 | static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) | 744 | static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) |
761 | { | 745 | { |
762 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 746 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; |
747 | struct mem_desc *mem = &ch_ctx->gr_ctx->mem; | ||
763 | u32 va_lo, va_hi, va; | 748 | u32 va_lo, va_hi, va; |
764 | int ret = 0; | 749 | int ret = 0; |
765 | void *ctx_ptr = NULL; | ||
766 | 750 | ||
767 | gk20a_dbg_fn(""); | 751 | gk20a_dbg_fn(""); |
768 | 752 | ||
769 | ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, | 753 | if (gk20a_mem_begin(g, mem)) |
770 | PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, | ||
771 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
772 | if (!ctx_ptr) | ||
773 | return -ENOMEM; | 754 | return -ENOMEM; |
774 | 755 | ||
775 | if (ch_ctx->zcull_ctx.gpu_va == 0 && | 756 | if (ch_ctx->zcull_ctx.gpu_va == 0 && |
@@ -792,15 +773,17 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) | |||
792 | goto clean_up; | 773 | goto clean_up; |
793 | } | 774 | } |
794 | 775 | ||
795 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0, | 776 | gk20a_mem_wr(g, mem, |
777 | ctxsw_prog_main_image_zcull_o(), | ||
796 | ch_ctx->zcull_ctx.ctx_sw_mode); | 778 | ch_ctx->zcull_ctx.ctx_sw_mode); |
797 | 779 | ||
798 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_ptr_o(), 0, va); | 780 | gk20a_mem_wr(g, mem, |
781 | ctxsw_prog_main_image_zcull_ptr_o(), va); | ||
799 | 782 | ||
800 | c->g->ops.fifo.enable_channel(c); | 783 | c->g->ops.fifo.enable_channel(c); |
801 | 784 | ||
802 | clean_up: | 785 | clean_up: |
803 | vunmap(ctx_ptr); | 786 | gk20a_mem_end(g, mem); |
804 | 787 | ||
805 | return ret; | 788 | return ret; |
806 | } | 789 | } |
@@ -1500,8 +1483,8 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | |||
1500 | u32 ctx_header_words; | 1483 | u32 ctx_header_words; |
1501 | u32 i; | 1484 | u32 i; |
1502 | u32 data; | 1485 | u32 data; |
1503 | void *ctx_ptr = NULL; | 1486 | struct mem_desc *gold_mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem; |
1504 | void *gold_ptr = NULL; | 1487 | struct mem_desc *gr_mem = &ch_ctx->gr_ctx->mem; |
1505 | u32 err = 0; | 1488 | u32 err = 0; |
1506 | 1489 | ||
1507 | gk20a_dbg_fn(""); | 1490 | gk20a_dbg_fn(""); |
@@ -1527,16 +1510,10 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | |||
1527 | if (err) | 1510 | if (err) |
1528 | goto clean_up; | 1511 | goto clean_up; |
1529 | 1512 | ||
1530 | gold_ptr = vmap(gr->global_ctx_buffer[GOLDEN_CTX].mem.pages, | 1513 | if (gk20a_mem_begin(g, gold_mem)) |
1531 | PAGE_ALIGN(gr->global_ctx_buffer[GOLDEN_CTX].mem.size) >> | ||
1532 | PAGE_SHIFT, 0, pgprot_writecombine(PAGE_KERNEL)); | ||
1533 | if (!gold_ptr) | ||
1534 | goto clean_up; | 1514 | goto clean_up; |
1535 | 1515 | ||
1536 | ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, | 1516 | if (gk20a_mem_begin(g, gr_mem)) |
1537 | PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, | ||
1538 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
1539 | if (!ctx_ptr) | ||
1540 | goto clean_up; | 1517 | goto clean_up; |
1541 | 1518 | ||
1542 | ctx_header_words = roundup(ctx_header_bytes, sizeof(u32)); | 1519 | ctx_header_words = roundup(ctx_header_bytes, sizeof(u32)); |
@@ -1545,14 +1522,14 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | |||
1545 | g->ops.mm.l2_flush(g, true); | 1522 | g->ops.mm.l2_flush(g, true); |
1546 | 1523 | ||
1547 | for (i = 0; i < ctx_header_words; i++) { | 1524 | for (i = 0; i < ctx_header_words; i++) { |
1548 | data = gk20a_mem_rd32(ctx_ptr, i); | 1525 | data = gk20a_mem_rd32(g, gr_mem, i); |
1549 | gk20a_mem_wr32(gold_ptr, i, data); | 1526 | gk20a_mem_wr32(g, gold_mem, i, data); |
1550 | } | 1527 | } |
1551 | 1528 | ||
1552 | gk20a_mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_o(), 0, | 1529 | gk20a_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_o(), |
1553 | ctxsw_prog_main_image_zcull_mode_no_ctxsw_v()); | 1530 | ctxsw_prog_main_image_zcull_mode_no_ctxsw_v()); |
1554 | 1531 | ||
1555 | gk20a_mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_ptr_o(), 0, 0); | 1532 | gk20a_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_ptr_o(), 0); |
1556 | 1533 | ||
1557 | gr_gk20a_commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); | 1534 | gr_gk20a_commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); |
1558 | 1535 | ||
@@ -1568,12 +1545,12 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | |||
1568 | goto clean_up; | 1545 | goto clean_up; |
1569 | } | 1546 | } |
1570 | 1547 | ||
1571 | for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++) | 1548 | gk20a_mem_rd_n(g, gold_mem, 0, |
1572 | gr->ctx_vars.local_golden_image[i] = | 1549 | gr->ctx_vars.local_golden_image, |
1573 | gk20a_mem_rd32(gold_ptr, i); | 1550 | gr->ctx_vars.golden_image_size); |
1574 | } | 1551 | } |
1575 | 1552 | ||
1576 | gr_gk20a_commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); | 1553 | gr_gk20a_commit_inst(c, gr_mem->gpu_va); |
1577 | 1554 | ||
1578 | gr->ctx_vars.golden_image_initialized = true; | 1555 | gr->ctx_vars.golden_image_initialized = true; |
1579 | 1556 | ||
@@ -1586,10 +1563,8 @@ clean_up: | |||
1586 | else | 1563 | else |
1587 | gk20a_dbg_fn("done"); | 1564 | gk20a_dbg_fn("done"); |
1588 | 1565 | ||
1589 | if (gold_ptr) | 1566 | gk20a_mem_end(g, gold_mem); |
1590 | vunmap(gold_ptr); | 1567 | gk20a_mem_end(g, gr_mem); |
1591 | if (ctx_ptr) | ||
1592 | vunmap(ctx_ptr); | ||
1593 | 1568 | ||
1594 | mutex_unlock(&gr->ctx_mutex); | 1569 | mutex_unlock(&gr->ctx_mutex); |
1595 | return err; | 1570 | return err; |
@@ -1600,7 +1575,7 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, | |||
1600 | bool enable_smpc_ctxsw) | 1575 | bool enable_smpc_ctxsw) |
1601 | { | 1576 | { |
1602 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 1577 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; |
1603 | void *ctx_ptr = NULL; | 1578 | struct mem_desc *mem; |
1604 | u32 data; | 1579 | u32 data; |
1605 | int ret; | 1580 | int ret; |
1606 | 1581 | ||
@@ -1611,46 +1586,39 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, | |||
1611 | return -EFAULT; | 1586 | return -EFAULT; |
1612 | } | 1587 | } |
1613 | 1588 | ||
1589 | mem = &ch_ctx->gr_ctx->mem; | ||
1590 | |||
1614 | c->g->ops.fifo.disable_channel(c); | 1591 | c->g->ops.fifo.disable_channel(c); |
1615 | ret = c->g->ops.fifo.preempt_channel(c->g, c->hw_chid); | 1592 | ret = c->g->ops.fifo.preempt_channel(c->g, c->hw_chid); |
1616 | if (ret) { | 1593 | if (ret) { |
1617 | c->g->ops.fifo.enable_channel(c); | 1594 | gk20a_err(dev_from_gk20a(g), "failed to preempt channel"); |
1618 | gk20a_err(dev_from_gk20a(g), | 1595 | goto out; |
1619 | "failed to preempt channel\n"); | ||
1620 | return ret; | ||
1621 | } | 1596 | } |
1622 | 1597 | ||
1623 | /* Channel gr_ctx buffer is gpu cacheable. | 1598 | /* Channel gr_ctx buffer is gpu cacheable. |
1624 | Flush and invalidate before cpu update. */ | 1599 | Flush and invalidate before cpu update. */ |
1625 | g->ops.mm.l2_flush(g, true); | 1600 | g->ops.mm.l2_flush(g, true); |
1626 | 1601 | ||
1627 | if (!ch_ctx->gr_ctx) { | 1602 | if (gk20a_mem_begin(g, mem)) { |
1628 | gk20a_err(dev_from_gk20a(g), "no graphics context allocated"); | 1603 | ret = -ENOMEM; |
1629 | return -EFAULT; | 1604 | goto out; |
1630 | } | ||
1631 | |||
1632 | ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, | ||
1633 | PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, | ||
1634 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
1635 | if (!ctx_ptr) { | ||
1636 | c->g->ops.fifo.enable_channel(c); | ||
1637 | return -ENOMEM; | ||
1638 | } | 1605 | } |
1639 | 1606 | ||
1640 | data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); | 1607 | data = gk20a_mem_rd(g, mem, |
1608 | ctxsw_prog_main_image_pm_o()); | ||
1641 | data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m(); | 1609 | data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m(); |
1642 | data |= enable_smpc_ctxsw ? | 1610 | data |= enable_smpc_ctxsw ? |
1643 | ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() : | 1611 | ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() : |
1644 | ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(); | 1612 | ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(); |
1645 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, | 1613 | gk20a_mem_wr(g, mem, |
1646 | data); | 1614 | ctxsw_prog_main_image_pm_o(), |
1615 | data); | ||
1647 | 1616 | ||
1648 | vunmap(ctx_ptr); | 1617 | gk20a_mem_end(g, mem); |
1649 | 1618 | ||
1650 | /* enable channel */ | 1619 | out: |
1651 | c->g->ops.fifo.enable_channel(c); | 1620 | c->g->ops.fifo.enable_channel(c); |
1652 | 1621 | return ret; | |
1653 | return 0; | ||
1654 | } | 1622 | } |
1655 | 1623 | ||
1656 | int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, | 1624 | int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, |
@@ -1659,8 +1627,7 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, | |||
1659 | { | 1627 | { |
1660 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 1628 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; |
1661 | struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; | 1629 | struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; |
1662 | void *ctx_ptr = NULL; | 1630 | struct mem_desc *gr_mem; |
1663 | void *pm_ctx_ptr; | ||
1664 | u32 data, virt_addr; | 1631 | u32 data, virt_addr; |
1665 | int ret; | 1632 | int ret; |
1666 | 1633 | ||
@@ -1671,6 +1638,8 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, | |||
1671 | return -EFAULT; | 1638 | return -EFAULT; |
1672 | } | 1639 | } |
1673 | 1640 | ||
1641 | gr_mem = &ch_ctx->gr_ctx->mem; | ||
1642 | |||
1674 | if (enable_hwpm_ctxsw) { | 1643 | if (enable_hwpm_ctxsw) { |
1675 | if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) | 1644 | if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) |
1676 | return 0; | 1645 | return 0; |
@@ -1721,29 +1690,22 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, | |||
1721 | } | 1690 | } |
1722 | 1691 | ||
1723 | /* Now clear the buffer */ | 1692 | /* Now clear the buffer */ |
1724 | pm_ctx_ptr = vmap(pm_ctx->mem.pages, | 1693 | if (gk20a_mem_begin(g, &pm_ctx->mem)) { |
1725 | PAGE_ALIGN(pm_ctx->mem.size) >> PAGE_SHIFT, | ||
1726 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
1727 | |||
1728 | if (!pm_ctx_ptr) { | ||
1729 | ret = -ENOMEM; | 1694 | ret = -ENOMEM; |
1730 | goto cleanup_pm_buf; | 1695 | goto cleanup_pm_buf; |
1731 | } | 1696 | } |
1732 | 1697 | ||
1733 | memset(pm_ctx_ptr, 0, pm_ctx->mem.size); | 1698 | gk20a_memset(g, &pm_ctx->mem, 0, 0, pm_ctx->mem.size); |
1734 | 1699 | ||
1735 | vunmap(pm_ctx_ptr); | 1700 | gk20a_mem_end(g, &pm_ctx->mem); |
1736 | } | 1701 | } |
1737 | 1702 | ||
1738 | ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, | 1703 | if (gk20a_mem_begin(g, gr_mem)) { |
1739 | PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, | ||
1740 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
1741 | if (!ctx_ptr) { | ||
1742 | ret = -ENOMEM; | 1704 | ret = -ENOMEM; |
1743 | goto cleanup_pm_buf; | 1705 | goto cleanup_pm_buf; |
1744 | } | 1706 | } |
1745 | 1707 | ||
1746 | data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); | 1708 | data = gk20a_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o()); |
1747 | data = data & ~ctxsw_prog_main_image_pm_mode_m(); | 1709 | data = data & ~ctxsw_prog_main_image_pm_mode_m(); |
1748 | 1710 | ||
1749 | if (enable_hwpm_ctxsw) { | 1711 | if (enable_hwpm_ctxsw) { |
@@ -1760,10 +1722,10 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, | |||
1760 | 1722 | ||
1761 | data |= pm_ctx->pm_mode; | 1723 | data |= pm_ctx->pm_mode; |
1762 | 1724 | ||
1763 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, data); | 1725 | gk20a_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_o(), data); |
1764 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, virt_addr); | 1726 | gk20a_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_ptr_o(), virt_addr); |
1765 | 1727 | ||
1766 | vunmap(ctx_ptr); | 1728 | gk20a_mem_end(g, gr_mem); |
1767 | 1729 | ||
1768 | /* enable channel */ | 1730 | /* enable channel */ |
1769 | c->g->ops.fifo.enable_channel(c); | 1731 | c->g->ops.fifo.enable_channel(c); |
@@ -1788,9 +1750,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1788 | u32 virt_addr_lo; | 1750 | u32 virt_addr_lo; |
1789 | u32 virt_addr_hi; | 1751 | u32 virt_addr_hi; |
1790 | u32 virt_addr = 0; | 1752 | u32 virt_addr = 0; |
1791 | u32 i, v, data; | 1753 | u32 v, data; |
1792 | int ret = 0; | 1754 | int ret = 0; |
1793 | void *ctx_ptr = NULL; | 1755 | struct mem_desc *mem = &ch_ctx->gr_ctx->mem; |
1794 | 1756 | ||
1795 | gk20a_dbg_fn(""); | 1757 | gk20a_dbg_fn(""); |
1796 | 1758 | ||
@@ -1801,20 +1763,18 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1801 | Flush and invalidate before cpu update. */ | 1763 | Flush and invalidate before cpu update. */ |
1802 | g->ops.mm.l2_flush(g, true); | 1764 | g->ops.mm.l2_flush(g, true); |
1803 | 1765 | ||
1804 | ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, | 1766 | if (gk20a_mem_begin(g, mem)) |
1805 | PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, | ||
1806 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
1807 | if (!ctx_ptr) | ||
1808 | return -ENOMEM; | 1767 | return -ENOMEM; |
1809 | 1768 | ||
1810 | for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++) | 1769 | gk20a_mem_wr_n(g, mem, 0, |
1811 | gk20a_mem_wr32(ctx_ptr, i, gr->ctx_vars.local_golden_image[i]); | 1770 | gr->ctx_vars.local_golden_image, |
1771 | gr->ctx_vars.golden_image_size); | ||
1812 | 1772 | ||
1813 | if (g->ops.gr.enable_cde_in_fecs && c->cde) | 1773 | if (g->ops.gr.enable_cde_in_fecs && c->cde) |
1814 | g->ops.gr.enable_cde_in_fecs(ctx_ptr); | 1774 | g->ops.gr.enable_cde_in_fecs(g, mem); |
1815 | 1775 | ||
1816 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0); | 1776 | gk20a_mem_wr(g, mem, ctxsw_prog_main_image_num_save_ops_o(), 0); |
1817 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0); | 1777 | gk20a_mem_wr(g, mem, ctxsw_prog_main_image_num_restore_ops_o(), 0); |
1818 | 1778 | ||
1819 | /* set priv access map */ | 1779 | /* set priv access map */ |
1820 | virt_addr_lo = | 1780 | virt_addr_lo = |
@@ -1827,29 +1787,29 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1827 | else | 1787 | else |
1828 | data = ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f(); | 1788 | data = ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f(); |
1829 | 1789 | ||
1830 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_config_o(), 0, | 1790 | gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(), |
1831 | data); | 1791 | data); |
1832 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_addr_lo_o(), 0, | 1792 | gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_addr_lo_o(), |
1833 | virt_addr_lo); | 1793 | virt_addr_lo); |
1834 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_addr_hi_o(), 0, | 1794 | gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_addr_hi_o(), |
1835 | virt_addr_hi); | 1795 | virt_addr_hi); |
1836 | /* disable verif features */ | 1796 | /* disable verif features */ |
1837 | v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0); | 1797 | v = gk20a_mem_rd(g, mem, ctxsw_prog_main_image_misc_options_o()); |
1838 | v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m()); | 1798 | v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m()); |
1839 | v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f(); | 1799 | v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f(); |
1840 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0, v); | 1800 | gk20a_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v); |
1841 | 1801 | ||
1842 | if (g->ops.gr.update_ctxsw_preemption_mode) | 1802 | if (g->ops.gr.update_ctxsw_preemption_mode) |
1843 | g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, ctx_ptr); | 1803 | g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, mem); |
1844 | 1804 | ||
1845 | virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); | 1805 | virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); |
1846 | virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); | 1806 | virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); |
1847 | 1807 | ||
1848 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_count_o(), 0, | 1808 | gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(), |
1849 | ch_ctx->patch_ctx.data_count); | 1809 | ch_ctx->patch_ctx.data_count); |
1850 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_lo_o(), 0, | 1810 | gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_lo_o(), |
1851 | virt_addr_lo); | 1811 | virt_addr_lo); |
1852 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_hi_o(), 0, | 1812 | gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_hi_o(), |
1853 | virt_addr_hi); | 1813 | virt_addr_hi); |
1854 | 1814 | ||
1855 | /* Update main header region of the context buffer with the info needed | 1815 | /* Update main header region of the context buffer with the info needed |
@@ -1860,7 +1820,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1860 | if (ch_ctx->pm_ctx.mem.gpu_va == 0) { | 1820 | if (ch_ctx->pm_ctx.mem.gpu_va == 0) { |
1861 | gk20a_err(dev_from_gk20a(g), | 1821 | gk20a_err(dev_from_gk20a(g), |
1862 | "context switched pm with no pm buffer!"); | 1822 | "context switched pm with no pm buffer!"); |
1863 | vunmap(ctx_ptr); | 1823 | gk20a_mem_end(g, mem); |
1864 | return -EFAULT; | 1824 | return -EFAULT; |
1865 | } | 1825 | } |
1866 | 1826 | ||
@@ -1871,14 +1831,14 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1871 | } else | 1831 | } else |
1872 | virt_addr = 0; | 1832 | virt_addr = 0; |
1873 | 1833 | ||
1874 | data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); | 1834 | data = gk20a_mem_rd(g, mem, ctxsw_prog_main_image_pm_o()); |
1875 | data = data & ~ctxsw_prog_main_image_pm_mode_m(); | 1835 | data = data & ~ctxsw_prog_main_image_pm_mode_m(); |
1876 | data |= ch_ctx->pm_ctx.pm_mode; | 1836 | data |= ch_ctx->pm_ctx.pm_mode; |
1877 | 1837 | ||
1878 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, data); | 1838 | gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data); |
1879 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, virt_addr); | 1839 | gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_ptr_o(), virt_addr); |
1880 | 1840 | ||
1881 | vunmap(ctx_ptr); | 1841 | gk20a_mem_end(g, mem); |
1882 | 1842 | ||
1883 | if (tegra_platform_is_linsim()) { | 1843 | if (tegra_platform_is_linsim()) { |
1884 | u32 inst_base_ptr = | 1844 | u32 inst_base_ptr = |
@@ -1978,16 +1938,20 @@ static void gr_gk20a_init_ctxsw_ucode_segments( | |||
1978 | } | 1938 | } |
1979 | 1939 | ||
1980 | static int gr_gk20a_copy_ctxsw_ucode_segments( | 1940 | static int gr_gk20a_copy_ctxsw_ucode_segments( |
1981 | u8 *buf, | 1941 | struct gk20a *g, |
1942 | struct mem_desc *dst, | ||
1982 | struct gk20a_ctxsw_ucode_segments *segments, | 1943 | struct gk20a_ctxsw_ucode_segments *segments, |
1983 | u32 *bootimage, | 1944 | u32 *bootimage, |
1984 | u32 *code, u32 *data) | 1945 | u32 *code, u32 *data) |
1985 | { | 1946 | { |
1986 | int i; | 1947 | int i; |
1987 | 1948 | ||
1988 | memcpy(buf + segments->boot.offset, bootimage, segments->boot.size); | 1949 | gk20a_mem_wr_n(g, dst, segments->boot.offset, bootimage, |
1989 | memcpy(buf + segments->code.offset, code, segments->code.size); | 1950 | segments->boot.size); |
1990 | memcpy(buf + segments->data.offset, data, segments->data.size); | 1951 | gk20a_mem_wr_n(g, dst, segments->code.offset, code, |
1952 | segments->code.size); | ||
1953 | gk20a_mem_wr_n(g, dst, segments->data.offset, data, | ||
1954 | segments->data.size); | ||
1991 | 1955 | ||
1992 | /* compute a "checksum" for the boot binary to detect its version */ | 1956 | /* compute a "checksum" for the boot binary to detect its version */ |
1993 | segments->boot_signature = 0; | 1957 | segments->boot_signature = 0; |
@@ -2009,7 +1973,6 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g) | |||
2009 | u32 *fecs_boot_image; | 1973 | u32 *fecs_boot_image; |
2010 | u32 *gpccs_boot_image; | 1974 | u32 *gpccs_boot_image; |
2011 | struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; | 1975 | struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; |
2012 | u8 *buf; | ||
2013 | u32 ucode_size; | 1976 | u32 ucode_size; |
2014 | int err = 0; | 1977 | int err = 0; |
2015 | 1978 | ||
@@ -2049,14 +2012,8 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g) | |||
2049 | if (err) | 2012 | if (err) |
2050 | goto clean_up; | 2013 | goto clean_up; |
2051 | 2014 | ||
2052 | buf = (u8 *)ucode_info->surface_desc.cpu_va; | 2015 | gr_gk20a_copy_ctxsw_ucode_segments(g, &ucode_info->surface_desc, |
2053 | if (!buf) { | 2016 | &ucode_info->fecs, |
2054 | gk20a_err(d, "failed to map surface desc buffer"); | ||
2055 | err = -ENOMEM; | ||
2056 | goto clean_up; | ||
2057 | } | ||
2058 | |||
2059 | gr_gk20a_copy_ctxsw_ucode_segments(buf, &ucode_info->fecs, | ||
2060 | fecs_boot_image, | 2017 | fecs_boot_image, |
2061 | g->gr.ctx_vars.ucode.fecs.inst.l, | 2018 | g->gr.ctx_vars.ucode.fecs.inst.l, |
2062 | g->gr.ctx_vars.ucode.fecs.data.l); | 2019 | g->gr.ctx_vars.ucode.fecs.data.l); |
@@ -2064,7 +2021,8 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g) | |||
2064 | release_firmware(fecs_fw); | 2021 | release_firmware(fecs_fw); |
2065 | fecs_fw = NULL; | 2022 | fecs_fw = NULL; |
2066 | 2023 | ||
2067 | gr_gk20a_copy_ctxsw_ucode_segments(buf, &ucode_info->gpccs, | 2024 | gr_gk20a_copy_ctxsw_ucode_segments(g, &ucode_info->surface_desc, |
2025 | &ucode_info->gpccs, | ||
2068 | gpccs_boot_image, | 2026 | gpccs_boot_image, |
2069 | g->gr.ctx_vars.ucode.gpccs.inst.l, | 2027 | g->gr.ctx_vars.ucode.gpccs.inst.l, |
2070 | g->gr.ctx_vars.ucode.gpccs.data.l); | 2028 | g->gr.ctx_vars.ucode.gpccs.data.l); |
@@ -4690,41 +4648,38 @@ out: | |||
4690 | static int gr_gk20a_init_access_map(struct gk20a *g) | 4648 | static int gr_gk20a_init_access_map(struct gk20a *g) |
4691 | { | 4649 | { |
4692 | struct gr_gk20a *gr = &g->gr; | 4650 | struct gr_gk20a *gr = &g->gr; |
4693 | void *data; | 4651 | struct mem_desc *mem = &gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem; |
4694 | int err = 0; | ||
4695 | u32 w, nr_pages = | 4652 | u32 w, nr_pages = |
4696 | DIV_ROUND_UP(gr->ctx_vars.priv_access_map_size, | 4653 | DIV_ROUND_UP(gr->ctx_vars.priv_access_map_size, |
4697 | PAGE_SIZE); | 4654 | PAGE_SIZE); |
4698 | u32 *whitelist = NULL; | 4655 | u32 *whitelist = NULL; |
4699 | int num_entries = 0; | 4656 | int num_entries = 0; |
4700 | 4657 | ||
4701 | data = vmap(gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.pages, | 4658 | if (gk20a_mem_begin(g, mem)) { |
4702 | PAGE_ALIGN(gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.size) >> | ||
4703 | PAGE_SHIFT, 0, pgprot_writecombine(PAGE_KERNEL)); | ||
4704 | if (!data) { | ||
4705 | gk20a_err(dev_from_gk20a(g), | 4659 | gk20a_err(dev_from_gk20a(g), |
4706 | "failed to map priv access map memory"); | 4660 | "failed to map priv access map memory"); |
4707 | err = -ENOMEM; | 4661 | return -ENOMEM; |
4708 | goto clean_up; | ||
4709 | } | 4662 | } |
4710 | 4663 | ||
4711 | memset(data, 0x0, PAGE_SIZE * nr_pages); | 4664 | gk20a_memset(g, mem, 0, 0, PAGE_SIZE * nr_pages); |
4712 | 4665 | ||
4713 | g->ops.gr.get_access_map(g, &whitelist, &num_entries); | 4666 | g->ops.gr.get_access_map(g, &whitelist, &num_entries); |
4714 | 4667 | ||
4715 | for (w = 0; w < num_entries; w++) { | 4668 | for (w = 0; w < num_entries; w++) { |
4716 | u32 map_bit, map_byte, map_shift; | 4669 | u32 map_bit, map_byte, map_shift, x; |
4717 | map_bit = whitelist[w] >> 2; | 4670 | map_bit = whitelist[w] >> 2; |
4718 | map_byte = map_bit >> 3; | 4671 | map_byte = map_bit >> 3; |
4719 | map_shift = map_bit & 0x7; /* i.e. 0-7 */ | 4672 | map_shift = map_bit & 0x7; /* i.e. 0-7 */ |
4720 | gk20a_dbg_info("access map addr:0x%x byte:0x%x bit:%d", | 4673 | gk20a_dbg_info("access map addr:0x%x byte:0x%x bit:%d", |
4721 | whitelist[w], map_byte, map_shift); | 4674 | whitelist[w], map_byte, map_shift); |
4722 | ((u8 *)data)[map_byte] |= 1 << map_shift; | 4675 | x = gk20a_mem_rd32(g, mem, map_byte / sizeof(u32)); |
4676 | x |= 1 << ( | ||
4677 | (map_byte % sizeof(u32) * BITS_PER_BYTE) | ||
4678 | + map_shift); | ||
4679 | gk20a_mem_wr32(g, mem, map_byte / sizeof(u32), x); | ||
4723 | } | 4680 | } |
4724 | 4681 | ||
4725 | clean_up: | 4682 | gk20a_mem_end(g, mem); |
4726 | if (data) | ||
4727 | vunmap(data); | ||
4728 | return 0; | 4683 | return 0; |
4729 | } | 4684 | } |
4730 | 4685 | ||
@@ -6659,7 +6614,7 @@ static void gr_gk20a_init_sm_dsm_reg_info(void) | |||
6659 | static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | 6614 | static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, |
6660 | struct channel_ctx_gk20a *ch_ctx, | 6615 | struct channel_ctx_gk20a *ch_ctx, |
6661 | u32 addr, u32 data, | 6616 | u32 addr, u32 data, |
6662 | u8 *context) | 6617 | struct mem_desc *mem) |
6663 | { | 6618 | { |
6664 | u32 num_gpc = g->gr.gpc_count; | 6619 | u32 num_gpc = g->gr.gpc_count; |
6665 | u32 num_tpc; | 6620 | u32 num_tpc; |
@@ -6688,8 +6643,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
6688 | /* reset the patch count from previous | 6643 | /* reset the patch count from previous |
6689 | runs,if ucode has already processed | 6644 | runs,if ucode has already processed |
6690 | it */ | 6645 | it */ |
6691 | tmp = gk20a_mem_rd32(context + | 6646 | tmp = gk20a_mem_rd(g, mem, |
6692 | ctxsw_prog_main_image_patch_count_o(), 0); | 6647 | ctxsw_prog_main_image_patch_count_o()); |
6693 | 6648 | ||
6694 | if (!tmp) | 6649 | if (!tmp) |
6695 | ch_ctx->patch_ctx.data_count = 0; | 6650 | ch_ctx->patch_ctx.data_count = 0; |
@@ -6700,15 +6655,15 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
6700 | vaddr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); | 6655 | vaddr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); |
6701 | vaddr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); | 6656 | vaddr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); |
6702 | 6657 | ||
6703 | gk20a_mem_wr32(context + | 6658 | gk20a_mem_wr(g, mem, |
6704 | ctxsw_prog_main_image_patch_count_o(), | 6659 | ctxsw_prog_main_image_patch_count_o(), |
6705 | 0, ch_ctx->patch_ctx.data_count); | 6660 | ch_ctx->patch_ctx.data_count); |
6706 | gk20a_mem_wr32(context + | 6661 | gk20a_mem_wr(g, mem, |
6707 | ctxsw_prog_main_image_patch_adr_lo_o(), | 6662 | ctxsw_prog_main_image_patch_adr_lo_o(), |
6708 | 0, vaddr_lo); | 6663 | vaddr_lo); |
6709 | gk20a_mem_wr32(context + | 6664 | gk20a_mem_wr(g, mem, |
6710 | ctxsw_prog_main_image_patch_adr_hi_o(), | 6665 | ctxsw_prog_main_image_patch_adr_hi_o(), |
6711 | 0, vaddr_hi); | 6666 | vaddr_hi); |
6712 | 6667 | ||
6713 | /* we're not caching these on cpu side, | 6668 | /* we're not caching these on cpu side, |
6714 | but later watch for it */ | 6669 | but later watch for it */ |
@@ -6760,17 +6715,15 @@ static void gr_gk20a_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset) | |||
6760 | 6715 | ||
6761 | #define ILLEGAL_ID (~0) | 6716 | #define ILLEGAL_ID (~0) |
6762 | 6717 | ||
6763 | static inline bool check_main_image_header_magic(void *context) | 6718 | static inline bool check_main_image_header_magic(u8 *context) |
6764 | { | 6719 | { |
6765 | u32 magic = gk20a_mem_rd32(context + | 6720 | u32 magic = *(u32 *)(context + ctxsw_prog_main_image_magic_value_o()); |
6766 | ctxsw_prog_main_image_magic_value_o(), 0); | ||
6767 | gk20a_dbg(gpu_dbg_gpu_dbg, "main image magic=0x%x", magic); | 6721 | gk20a_dbg(gpu_dbg_gpu_dbg, "main image magic=0x%x", magic); |
6768 | return magic == ctxsw_prog_main_image_magic_value_v_value_v(); | 6722 | return magic == ctxsw_prog_main_image_magic_value_v_value_v(); |
6769 | } | 6723 | } |
6770 | static inline bool check_local_header_magic(void *context) | 6724 | static inline bool check_local_header_magic(u8 *context) |
6771 | { | 6725 | { |
6772 | u32 magic = gk20a_mem_rd32(context + | 6726 | u32 magic = *(u32 *)(context + ctxsw_prog_local_magic_value_o()); |
6773 | ctxsw_prog_local_magic_value_o(), 0); | ||
6774 | gk20a_dbg(gpu_dbg_gpu_dbg, "local magic=0x%x", magic); | 6727 | gk20a_dbg(gpu_dbg_gpu_dbg, "local magic=0x%x", magic); |
6775 | return magic == ctxsw_prog_local_magic_value_v_value_v(); | 6728 | return magic == ctxsw_prog_local_magic_value_v_value_v(); |
6776 | 6729 | ||
@@ -6814,7 +6767,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, | |||
6814 | u32 num_gpcs, num_tpcs; | 6767 | u32 num_gpcs, num_tpcs; |
6815 | u32 chk_addr; | 6768 | u32 chk_addr; |
6816 | u32 ext_priv_offset, ext_priv_size; | 6769 | u32 ext_priv_offset, ext_priv_size; |
6817 | void *context; | 6770 | u8 *context; |
6818 | u32 offset_to_segment, offset_to_segment_end; | 6771 | u32 offset_to_segment, offset_to_segment_end; |
6819 | u32 sm_dsm_perf_reg_id = ILLEGAL_ID; | 6772 | u32 sm_dsm_perf_reg_id = ILLEGAL_ID; |
6820 | u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID; | 6773 | u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID; |
@@ -6856,14 +6809,14 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, | |||
6856 | /* note below is in words/num_registers */ | 6809 | /* note below is in words/num_registers */ |
6857 | marker_size = ctxsw_prog_extended_marker_size_in_bytes_v() >> 2; | 6810 | marker_size = ctxsw_prog_extended_marker_size_in_bytes_v() >> 2; |
6858 | 6811 | ||
6859 | context = context_buffer; | 6812 | context = (u8 *)context_buffer; |
6860 | /* sanity check main header */ | 6813 | /* sanity check main header */ |
6861 | if (!check_main_image_header_magic(context)) { | 6814 | if (!check_main_image_header_magic(context)) { |
6862 | gk20a_err(dev_from_gk20a(g), | 6815 | gk20a_err(dev_from_gk20a(g), |
6863 | "Invalid main header: magic value"); | 6816 | "Invalid main header: magic value"); |
6864 | return -EINVAL; | 6817 | return -EINVAL; |
6865 | } | 6818 | } |
6866 | num_gpcs = gk20a_mem_rd32(context + ctxsw_prog_main_image_num_gpcs_o(), 0); | 6819 | num_gpcs = *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o()); |
6867 | if (gpc_num >= num_gpcs) { | 6820 | if (gpc_num >= num_gpcs) { |
6868 | gk20a_err(dev_from_gk20a(g), | 6821 | gk20a_err(dev_from_gk20a(g), |
6869 | "GPC 0x%08x is greater than total count 0x%08x!\n", | 6822 | "GPC 0x%08x is greater than total count 0x%08x!\n", |
@@ -6871,7 +6824,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, | |||
6871 | return -EINVAL; | 6824 | return -EINVAL; |
6872 | } | 6825 | } |
6873 | 6826 | ||
6874 | data32 = gk20a_mem_rd32(context + ctxsw_prog_main_extended_buffer_ctl_o(), 0); | 6827 | data32 = *(u32 *)(context + ctxsw_prog_main_extended_buffer_ctl_o()); |
6875 | ext_priv_size = ctxsw_prog_main_extended_buffer_ctl_size_v(data32); | 6828 | ext_priv_size = ctxsw_prog_main_extended_buffer_ctl_size_v(data32); |
6876 | if (0 == ext_priv_size) { | 6829 | if (0 == ext_priv_size) { |
6877 | gk20a_dbg_info(" No extended memory in context buffer"); | 6830 | gk20a_dbg_info(" No extended memory in context buffer"); |
@@ -7149,7 +7102,7 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g, | |||
7149 | } | 7102 | } |
7150 | 7103 | ||
7151 | static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, | 7104 | static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, |
7152 | void *context, | 7105 | u8 *context, |
7153 | u32 *num_ppcs, u32 *ppc_mask, | 7106 | u32 *num_ppcs, u32 *ppc_mask, |
7154 | u32 *reg_ppc_count) | 7107 | u32 *reg_ppc_count) |
7155 | { | 7108 | { |
@@ -7165,7 +7118,7 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, | |||
7165 | (num_pes_per_gpc > 1))) | 7118 | (num_pes_per_gpc > 1))) |
7166 | return -EINVAL; | 7119 | return -EINVAL; |
7167 | 7120 | ||
7168 | data32 = gk20a_mem_rd32(context + ctxsw_prog_local_image_ppc_info_o(), 0); | 7121 | data32 = *(u32 *)(context + ctxsw_prog_local_image_ppc_info_o()); |
7169 | 7122 | ||
7170 | *num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data32); | 7123 | *num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data32); |
7171 | *ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data32); | 7124 | *ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data32); |
@@ -7177,7 +7130,7 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, | |||
7177 | 7130 | ||
7178 | /* | 7131 | /* |
7179 | * This function will return the 32 bit offset for a priv register if it is | 7132 | * This function will return the 32 bit offset for a priv register if it is |
7180 | * present in the context buffer. | 7133 | * present in the context buffer. The context buffer is in CPU memory. |
7181 | */ | 7134 | */ |
7182 | static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | 7135 | static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, |
7183 | u32 addr, | 7136 | u32 addr, |
@@ -7196,7 +7149,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | |||
7196 | u32 offset; | 7149 | u32 offset; |
7197 | u32 sys_priv_offset, gpc_priv_offset; | 7150 | u32 sys_priv_offset, gpc_priv_offset; |
7198 | u32 ppc_mask, reg_list_ppc_count; | 7151 | u32 ppc_mask, reg_list_ppc_count; |
7199 | void *context; | 7152 | u8 *context; |
7200 | u32 offset_to_segment; | 7153 | u32 offset_to_segment; |
7201 | 7154 | ||
7202 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); | 7155 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); |
@@ -7207,13 +7160,13 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | |||
7207 | if (err) | 7160 | if (err) |
7208 | return err; | 7161 | return err; |
7209 | 7162 | ||
7210 | context = context_buffer; | 7163 | context = (u8 *)context_buffer; |
7211 | if (!check_main_image_header_magic(context)) { | 7164 | if (!check_main_image_header_magic(context)) { |
7212 | gk20a_err(dev_from_gk20a(g), | 7165 | gk20a_err(dev_from_gk20a(g), |
7213 | "Invalid main header: magic value"); | 7166 | "Invalid main header: magic value"); |
7214 | return -EINVAL; | 7167 | return -EINVAL; |
7215 | } | 7168 | } |
7216 | num_gpcs = gk20a_mem_rd32(context + ctxsw_prog_main_image_num_gpcs_o(), 0); | 7169 | num_gpcs = *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o()); |
7217 | 7170 | ||
7218 | /* Parse the FECS local header. */ | 7171 | /* Parse the FECS local header. */ |
7219 | context += ctxsw_prog_ucode_header_size_in_bytes(); | 7172 | context += ctxsw_prog_ucode_header_size_in_bytes(); |
@@ -7222,7 +7175,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | |||
7222 | "Invalid FECS local header: magic value\n"); | 7175 | "Invalid FECS local header: magic value\n"); |
7223 | return -EINVAL; | 7176 | return -EINVAL; |
7224 | } | 7177 | } |
7225 | data32 = gk20a_mem_rd32(context + ctxsw_prog_local_priv_register_ctl_o(), 0); | 7178 | data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o()); |
7226 | sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); | 7179 | sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); |
7227 | 7180 | ||
7228 | /* If found in Ext buffer, ok. | 7181 | /* If found in Ext buffer, ok. |
@@ -7268,7 +7221,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | |||
7268 | return -EINVAL; | 7221 | return -EINVAL; |
7269 | 7222 | ||
7270 | } | 7223 | } |
7271 | data32 = gk20a_mem_rd32(context + ctxsw_prog_local_priv_register_ctl_o(), 0); | 7224 | data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o()); |
7272 | gpc_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); | 7225 | gpc_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); |
7273 | 7226 | ||
7274 | err = gr_gk20a_determine_ppc_configuration(g, context, | 7227 | err = gr_gk20a_determine_ppc_configuration(g, context, |
@@ -7277,7 +7230,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | |||
7277 | if (err) | 7230 | if (err) |
7278 | return err; | 7231 | return err; |
7279 | 7232 | ||
7280 | num_tpcs = gk20a_mem_rd32(context + ctxsw_prog_local_image_num_tpcs_o(), 0); | 7233 | num_tpcs = *(u32 *)(context + ctxsw_prog_local_image_num_tpcs_o()); |
7281 | 7234 | ||
7282 | if ((i == gpc_num) && ((tpc_num + 1) > num_tpcs)) { | 7235 | if ((i == gpc_num) && ((tpc_num + 1) > num_tpcs)) { |
7283 | gk20a_err(dev_from_gk20a(g), | 7236 | gk20a_err(dev_from_gk20a(g), |
@@ -7689,9 +7642,9 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7689 | { | 7642 | { |
7690 | struct gk20a *g = ch->g; | 7643 | struct gk20a *g = ch->g; |
7691 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | 7644 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; |
7692 | void *ctx_ptr = NULL; | 7645 | bool gr_ctx_ready = false; |
7693 | void *pm_ctx_ptr = NULL; | 7646 | bool pm_ctx_ready = false; |
7694 | void *base_ptr = NULL; | 7647 | struct mem_desc *current_mem = NULL; |
7695 | bool ch_is_curr_ctx, restart_gr_ctxsw = false; | 7648 | bool ch_is_curr_ctx, restart_gr_ctxsw = false; |
7696 | u32 i, j, offset, v; | 7649 | u32 i, j, offset, v; |
7697 | struct gr_gk20a *gr = &g->gr; | 7650 | struct gr_gk20a *gr = &g->gr; |
@@ -7821,20 +7774,18 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7821 | ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD), | 7774 | ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD), |
7822 | ctx_ops[i].quad); | 7775 | ctx_ops[i].quad); |
7823 | if (!err) { | 7776 | if (!err) { |
7824 | if (!ctx_ptr) { | 7777 | if (!gr_ctx_ready) { |
7825 | /* would have been a variant of | 7778 | /* would have been a variant of |
7826 | * gr_gk20a_apply_instmem_overrides, | 7779 | * gr_gk20a_apply_instmem_overrides, |
7827 | * recoded in-place instead. | 7780 | * recoded in-place instead. |
7828 | */ | 7781 | */ |
7829 | ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, | 7782 | if (gk20a_mem_begin(g, &ch_ctx->gr_ctx->mem)) { |
7830 | PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, | ||
7831 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
7832 | if (!ctx_ptr) { | ||
7833 | err = -ENOMEM; | 7783 | err = -ENOMEM; |
7834 | goto cleanup; | 7784 | goto cleanup; |
7835 | } | 7785 | } |
7786 | gr_ctx_ready = true; | ||
7836 | } | 7787 | } |
7837 | base_ptr = ctx_ptr; | 7788 | current_mem = &ch_ctx->gr_ctx->mem; |
7838 | } else { | 7789 | } else { |
7839 | err = gr_gk20a_get_pm_ctx_buffer_offsets(g, | 7790 | err = gr_gk20a_get_pm_ctx_buffer_offsets(g, |
7840 | ctx_ops[i].offset, | 7791 | ctx_ops[i].offset, |
@@ -7849,7 +7800,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7849 | NVGPU_DBG_GPU_REG_OP_STATUS_INVALID_OFFSET; | 7800 | NVGPU_DBG_GPU_REG_OP_STATUS_INVALID_OFFSET; |
7850 | continue; | 7801 | continue; |
7851 | } | 7802 | } |
7852 | if (!pm_ctx_ptr) { | 7803 | if (!pm_ctx_ready) { |
7853 | /* Make sure ctx buffer was initialized */ | 7804 | /* Make sure ctx buffer was initialized */ |
7854 | if (!ch_ctx->pm_ctx.mem.pages) { | 7805 | if (!ch_ctx->pm_ctx.mem.pages) { |
7855 | gk20a_err(dev_from_gk20a(g), | 7806 | gk20a_err(dev_from_gk20a(g), |
@@ -7857,15 +7808,13 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7857 | err = -EINVAL; | 7808 | err = -EINVAL; |
7858 | goto cleanup; | 7809 | goto cleanup; |
7859 | } | 7810 | } |
7860 | pm_ctx_ptr = vmap(ch_ctx->pm_ctx.mem.pages, | 7811 | if (gk20a_mem_begin(g, &ch_ctx->pm_ctx.mem)) { |
7861 | PAGE_ALIGN(ch_ctx->pm_ctx.mem.size) >> PAGE_SHIFT, | ||
7862 | 0, pgprot_writecombine(PAGE_KERNEL)); | ||
7863 | if (!pm_ctx_ptr) { | ||
7864 | err = -ENOMEM; | 7812 | err = -ENOMEM; |
7865 | goto cleanup; | 7813 | goto cleanup; |
7866 | } | 7814 | } |
7815 | pm_ctx_ready = true; | ||
7867 | } | 7816 | } |
7868 | base_ptr = pm_ctx_ptr; | 7817 | current_mem = &ch_ctx->pm_ctx.mem; |
7869 | } | 7818 | } |
7870 | 7819 | ||
7871 | /* if this is a quad access, setup for special access*/ | 7820 | /* if this is a quad access, setup for special access*/ |
@@ -7878,24 +7827,24 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7878 | /* sanity check gr ctxt offsets, | 7827 | /* sanity check gr ctxt offsets, |
7879 | * don't write outside, worst case | 7828 | * don't write outside, worst case |
7880 | */ | 7829 | */ |
7881 | if ((base_ptr == ctx_ptr) && | 7830 | if ((current_mem == &ch_ctx->gr_ctx->mem) && |
7882 | (offsets[j] >= g->gr.ctx_vars.golden_image_size)) | 7831 | (offsets[j] >= g->gr.ctx_vars.golden_image_size)) |
7883 | continue; | 7832 | continue; |
7884 | if (pass == 0) { /* write pass */ | 7833 | if (pass == 0) { /* write pass */ |
7885 | v = gk20a_mem_rd32(base_ptr + offsets[j], 0); | 7834 | v = gk20a_mem_rd(g, current_mem, offsets[j]); |
7886 | v &= ~ctx_ops[i].and_n_mask_lo; | 7835 | v &= ~ctx_ops[i].and_n_mask_lo; |
7887 | v |= ctx_ops[i].value_lo; | 7836 | v |= ctx_ops[i].value_lo; |
7888 | gk20a_mem_wr32(base_ptr + offsets[j], 0, v); | 7837 | gk20a_mem_wr(g, current_mem, offsets[j], v); |
7889 | 7838 | ||
7890 | gk20a_dbg(gpu_dbg_gpu_dbg, | 7839 | gk20a_dbg(gpu_dbg_gpu_dbg, |
7891 | "context wr: offset=0x%x v=0x%x", | 7840 | "context wr: offset=0x%x v=0x%x", |
7892 | offsets[j], v); | 7841 | offsets[j], v); |
7893 | 7842 | ||
7894 | if (ctx_ops[i].op == REGOP(WRITE_64)) { | 7843 | if (ctx_ops[i].op == REGOP(WRITE_64)) { |
7895 | v = gk20a_mem_rd32(base_ptr + offsets[j] + 4, 0); | 7844 | v = gk20a_mem_rd(g, current_mem, offsets[j] + 4); |
7896 | v &= ~ctx_ops[i].and_n_mask_hi; | 7845 | v &= ~ctx_ops[i].and_n_mask_hi; |
7897 | v |= ctx_ops[i].value_hi; | 7846 | v |= ctx_ops[i].value_hi; |
7898 | gk20a_mem_wr32(base_ptr + offsets[j] + 4, 0, v); | 7847 | gk20a_mem_wr(g, current_mem, offsets[j] + 4, v); |
7899 | 7848 | ||
7900 | gk20a_dbg(gpu_dbg_gpu_dbg, | 7849 | gk20a_dbg(gpu_dbg_gpu_dbg, |
7901 | "context wr: offset=0x%x v=0x%x", | 7850 | "context wr: offset=0x%x v=0x%x", |
@@ -7905,18 +7854,18 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7905 | /* check to see if we need to add a special WAR | 7854 | /* check to see if we need to add a special WAR |
7906 | for some of the SMPC perf regs */ | 7855 | for some of the SMPC perf regs */ |
7907 | gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j], | 7856 | gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j], |
7908 | v, base_ptr); | 7857 | v, current_mem); |
7909 | 7858 | ||
7910 | } else { /* read pass */ | 7859 | } else { /* read pass */ |
7911 | ctx_ops[i].value_lo = | 7860 | ctx_ops[i].value_lo = |
7912 | gk20a_mem_rd32(base_ptr + offsets[0], 0); | 7861 | gk20a_mem_rd(g, current_mem, offsets[0]); |
7913 | 7862 | ||
7914 | gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x", | 7863 | gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x", |
7915 | offsets[0], ctx_ops[i].value_lo); | 7864 | offsets[0], ctx_ops[i].value_lo); |
7916 | 7865 | ||
7917 | if (ctx_ops[i].op == REGOP(READ_64)) { | 7866 | if (ctx_ops[i].op == REGOP(READ_64)) { |
7918 | ctx_ops[i].value_hi = | 7867 | ctx_ops[i].value_hi = |
7919 | gk20a_mem_rd32(base_ptr + offsets[0] + 4, 0); | 7868 | gk20a_mem_rd(g, current_mem, offsets[0] + 4); |
7920 | 7869 | ||
7921 | gk20a_dbg(gpu_dbg_gpu_dbg, | 7870 | gk20a_dbg(gpu_dbg_gpu_dbg, |
7922 | "context rd: offset=0x%x v=0x%x", | 7871 | "context rd: offset=0x%x v=0x%x", |
@@ -7943,12 +7892,10 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7943 | 7892 | ||
7944 | if (ch_ctx->patch_ctx.mem.cpu_va) | 7893 | if (ch_ctx->patch_ctx.mem.cpu_va) |
7945 | gr_gk20a_ctx_patch_write_end(g, ch_ctx); | 7894 | gr_gk20a_ctx_patch_write_end(g, ch_ctx); |
7946 | 7895 | if (gr_ctx_ready) | |
7947 | if (ctx_ptr) | 7896 | gk20a_mem_end(g, &ch_ctx->gr_ctx->mem); |
7948 | vunmap(ctx_ptr); | 7897 | if (pm_ctx_ready) |
7949 | 7898 | gk20a_mem_end(g, &ch_ctx->pm_ctx.mem); | |
7950 | if (pm_ctx_ptr) | ||
7951 | vunmap(pm_ctx_ptr); | ||
7952 | 7899 | ||
7953 | if (restart_gr_ctxsw) { | 7900 | if (restart_gr_ctxsw) { |
7954 | int tmp_err = gr_gk20a_enable_ctxsw(g); | 7901 | int tmp_err = gr_gk20a_enable_ctxsw(g); |