summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c359
1 files changed, 153 insertions, 206 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 4e7c36ee..e7e6662a 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -97,22 +97,18 @@ int gr_gk20a_get_ctx_id(struct gk20a *g,
97 u32 *ctx_id) 97 u32 *ctx_id)
98{ 98{
99 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 99 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
100 void *ctx_ptr = NULL;
101 100
102 /* Channel gr_ctx buffer is gpu cacheable. 101 /* Channel gr_ctx buffer is gpu cacheable.
103 Flush and invalidate before cpu update. */ 102 Flush and invalidate before cpu update. */
104 g->ops.mm.l2_flush(g, true); 103 g->ops.mm.l2_flush(g, true);
105 104
106 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 105 if (gk20a_mem_begin(g, &ch_ctx->gr_ctx->mem))
107 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
108 0, pgprot_writecombine(PAGE_KERNEL));
109 if (!ctx_ptr)
110 return -ENOMEM; 106 return -ENOMEM;
111 107
112 *ctx_id = gk20a_mem_rd32(ctx_ptr + 108 *ctx_id = gk20a_mem_rd(g, &ch_ctx->gr_ctx->mem,
113 ctxsw_prog_main_image_context_id_o(), 0); 109 ctxsw_prog_main_image_context_id_o());
114 110
115 vunmap(ctx_ptr); 111 gk20a_mem_end(g, &ch_ctx->gr_ctx->mem);
116 112
117 return 0; 113 return 0;
118} 114}
@@ -619,22 +615,17 @@ static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
619{ 615{
620 u32 addr_lo; 616 u32 addr_lo;
621 u32 addr_hi; 617 u32 addr_hi;
622 void *inst_ptr = NULL;
623 618
624 gk20a_dbg_fn(""); 619 gk20a_dbg_fn("");
625 620
626 inst_ptr = c->inst_block.cpu_va;
627 if (!inst_ptr)
628 return -ENOMEM;
629
630 addr_lo = u64_lo32(gpu_va) >> 12; 621 addr_lo = u64_lo32(gpu_va) >> 12;
631 addr_hi = u64_hi32(gpu_va); 622 addr_hi = u64_hi32(gpu_va);
632 623
633 gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_target_w(), 624 gk20a_mem_wr32(c->g, &c->inst_block, ram_in_gr_wfi_target_w(),
634 ram_in_gr_cs_wfi_f() | ram_in_gr_wfi_mode_virtual_f() | 625 ram_in_gr_cs_wfi_f() | ram_in_gr_wfi_mode_virtual_f() |
635 ram_in_gr_wfi_ptr_lo_f(addr_lo)); 626 ram_in_gr_wfi_ptr_lo_f(addr_lo));
636 627
637 gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_ptr_hi_w(), 628 gk20a_mem_wr32(c->g, &c->inst_block, ram_in_gr_wfi_ptr_hi_w(),
638 ram_in_gr_wfi_ptr_hi_f(addr_hi)); 629 ram_in_gr_wfi_ptr_hi_f(addr_hi));
639 630
640 return 0; 631 return 0;
@@ -658,11 +649,7 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
658 return -EBUSY; 649 return -EBUSY;
659 } 650 }
660 651
661 ch_ctx->patch_ctx.mem.cpu_va = vmap(ch_ctx->patch_ctx.mem.pages, 652 if (gk20a_mem_begin(g, &ch_ctx->patch_ctx.mem))
662 PAGE_ALIGN(ch_ctx->patch_ctx.mem.size) >> PAGE_SHIFT,
663 0, pgprot_writecombine(PAGE_KERNEL));
664
665 if (!ch_ctx->patch_ctx.mem.cpu_va)
666 return -ENOMEM; 653 return -ENOMEM;
667 654
668 return 0; 655 return 0;
@@ -677,8 +664,7 @@ int gr_gk20a_ctx_patch_write_end(struct gk20a *g,
677 return -EINVAL; 664 return -EINVAL;
678 } 665 }
679 666
680 vunmap(ch_ctx->patch_ctx.mem.cpu_va); 667 gk20a_mem_end(g, &ch_ctx->patch_ctx.mem);
681 ch_ctx->patch_ctx.mem.cpu_va = NULL;
682 return 0; 668 return 0;
683} 669}
684 670
@@ -687,7 +673,6 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g,
687 u32 addr, u32 data, bool patch) 673 u32 addr, u32 data, bool patch)
688{ 674{
689 u32 patch_slot = 0; 675 u32 patch_slot = 0;
690 void *patch_ptr = NULL;
691 bool mapped_here = false; 676 bool mapped_here = false;
692 677
693 BUG_ON(patch != 0 && ch_ctx == NULL); 678 BUG_ON(patch != 0 && ch_ctx == NULL);
@@ -708,11 +693,10 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g,
708 } else 693 } else
709 mapped_here = false; 694 mapped_here = false;
710 695
711 patch_ptr = ch_ctx->patch_ctx.mem.cpu_va;
712 patch_slot = ch_ctx->patch_ctx.data_count * 2; 696 patch_slot = ch_ctx->patch_ctx.data_count * 2;
713 697
714 gk20a_mem_wr32(patch_ptr, patch_slot++, addr); 698 gk20a_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot++, addr);
715 gk20a_mem_wr32(patch_ptr, patch_slot++, data); 699 gk20a_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot++, data);
716 700
717 ch_ctx->patch_ctx.data_count++; 701 ch_ctx->patch_ctx.data_count++;
718 702
@@ -760,16 +744,13 @@ static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
760static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) 744static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
761{ 745{
762 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 746 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
747 struct mem_desc *mem = &ch_ctx->gr_ctx->mem;
763 u32 va_lo, va_hi, va; 748 u32 va_lo, va_hi, va;
764 int ret = 0; 749 int ret = 0;
765 void *ctx_ptr = NULL;
766 750
767 gk20a_dbg_fn(""); 751 gk20a_dbg_fn("");
768 752
769 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 753 if (gk20a_mem_begin(g, mem))
770 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
771 0, pgprot_writecombine(PAGE_KERNEL));
772 if (!ctx_ptr)
773 return -ENOMEM; 754 return -ENOMEM;
774 755
775 if (ch_ctx->zcull_ctx.gpu_va == 0 && 756 if (ch_ctx->zcull_ctx.gpu_va == 0 &&
@@ -792,15 +773,17 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
792 goto clean_up; 773 goto clean_up;
793 } 774 }
794 775
795 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0, 776 gk20a_mem_wr(g, mem,
777 ctxsw_prog_main_image_zcull_o(),
796 ch_ctx->zcull_ctx.ctx_sw_mode); 778 ch_ctx->zcull_ctx.ctx_sw_mode);
797 779
798 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_ptr_o(), 0, va); 780 gk20a_mem_wr(g, mem,
781 ctxsw_prog_main_image_zcull_ptr_o(), va);
799 782
800 c->g->ops.fifo.enable_channel(c); 783 c->g->ops.fifo.enable_channel(c);
801 784
802clean_up: 785clean_up:
803 vunmap(ctx_ptr); 786 gk20a_mem_end(g, mem);
804 787
805 return ret; 788 return ret;
806} 789}
@@ -1500,8 +1483,8 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1500 u32 ctx_header_words; 1483 u32 ctx_header_words;
1501 u32 i; 1484 u32 i;
1502 u32 data; 1485 u32 data;
1503 void *ctx_ptr = NULL; 1486 struct mem_desc *gold_mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem;
1504 void *gold_ptr = NULL; 1487 struct mem_desc *gr_mem = &ch_ctx->gr_ctx->mem;
1505 u32 err = 0; 1488 u32 err = 0;
1506 1489
1507 gk20a_dbg_fn(""); 1490 gk20a_dbg_fn("");
@@ -1527,16 +1510,10 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1527 if (err) 1510 if (err)
1528 goto clean_up; 1511 goto clean_up;
1529 1512
1530 gold_ptr = vmap(gr->global_ctx_buffer[GOLDEN_CTX].mem.pages, 1513 if (gk20a_mem_begin(g, gold_mem))
1531 PAGE_ALIGN(gr->global_ctx_buffer[GOLDEN_CTX].mem.size) >>
1532 PAGE_SHIFT, 0, pgprot_writecombine(PAGE_KERNEL));
1533 if (!gold_ptr)
1534 goto clean_up; 1514 goto clean_up;
1535 1515
1536 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 1516 if (gk20a_mem_begin(g, gr_mem))
1537 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
1538 0, pgprot_writecombine(PAGE_KERNEL));
1539 if (!ctx_ptr)
1540 goto clean_up; 1517 goto clean_up;
1541 1518
1542 ctx_header_words = roundup(ctx_header_bytes, sizeof(u32)); 1519 ctx_header_words = roundup(ctx_header_bytes, sizeof(u32));
@@ -1545,14 +1522,14 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1545 g->ops.mm.l2_flush(g, true); 1522 g->ops.mm.l2_flush(g, true);
1546 1523
1547 for (i = 0; i < ctx_header_words; i++) { 1524 for (i = 0; i < ctx_header_words; i++) {
1548 data = gk20a_mem_rd32(ctx_ptr, i); 1525 data = gk20a_mem_rd32(g, gr_mem, i);
1549 gk20a_mem_wr32(gold_ptr, i, data); 1526 gk20a_mem_wr32(g, gold_mem, i, data);
1550 } 1527 }
1551 1528
1552 gk20a_mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_o(), 0, 1529 gk20a_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_o(),
1553 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v()); 1530 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());
1554 1531
1555 gk20a_mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_ptr_o(), 0, 0); 1532 gk20a_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_ptr_o(), 0);
1556 1533
1557 gr_gk20a_commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); 1534 gr_gk20a_commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);
1558 1535
@@ -1568,12 +1545,12 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1568 goto clean_up; 1545 goto clean_up;
1569 } 1546 }
1570 1547
1571 for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++) 1548 gk20a_mem_rd_n(g, gold_mem, 0,
1572 gr->ctx_vars.local_golden_image[i] = 1549 gr->ctx_vars.local_golden_image,
1573 gk20a_mem_rd32(gold_ptr, i); 1550 gr->ctx_vars.golden_image_size);
1574 } 1551 }
1575 1552
1576 gr_gk20a_commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); 1553 gr_gk20a_commit_inst(c, gr_mem->gpu_va);
1577 1554
1578 gr->ctx_vars.golden_image_initialized = true; 1555 gr->ctx_vars.golden_image_initialized = true;
1579 1556
@@ -1586,10 +1563,8 @@ clean_up:
1586 else 1563 else
1587 gk20a_dbg_fn("done"); 1564 gk20a_dbg_fn("done");
1588 1565
1589 if (gold_ptr) 1566 gk20a_mem_end(g, gold_mem);
1590 vunmap(gold_ptr); 1567 gk20a_mem_end(g, gr_mem);
1591 if (ctx_ptr)
1592 vunmap(ctx_ptr);
1593 1568
1594 mutex_unlock(&gr->ctx_mutex); 1569 mutex_unlock(&gr->ctx_mutex);
1595 return err; 1570 return err;
@@ -1600,7 +1575,7 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1600 bool enable_smpc_ctxsw) 1575 bool enable_smpc_ctxsw)
1601{ 1576{
1602 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 1577 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1603 void *ctx_ptr = NULL; 1578 struct mem_desc *mem;
1604 u32 data; 1579 u32 data;
1605 int ret; 1580 int ret;
1606 1581
@@ -1611,46 +1586,39 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1611 return -EFAULT; 1586 return -EFAULT;
1612 } 1587 }
1613 1588
1589 mem = &ch_ctx->gr_ctx->mem;
1590
1614 c->g->ops.fifo.disable_channel(c); 1591 c->g->ops.fifo.disable_channel(c);
1615 ret = c->g->ops.fifo.preempt_channel(c->g, c->hw_chid); 1592 ret = c->g->ops.fifo.preempt_channel(c->g, c->hw_chid);
1616 if (ret) { 1593 if (ret) {
1617 c->g->ops.fifo.enable_channel(c); 1594 gk20a_err(dev_from_gk20a(g), "failed to preempt channel");
1618 gk20a_err(dev_from_gk20a(g), 1595 goto out;
1619 "failed to preempt channel\n");
1620 return ret;
1621 } 1596 }
1622 1597
1623 /* Channel gr_ctx buffer is gpu cacheable. 1598 /* Channel gr_ctx buffer is gpu cacheable.
1624 Flush and invalidate before cpu update. */ 1599 Flush and invalidate before cpu update. */
1625 g->ops.mm.l2_flush(g, true); 1600 g->ops.mm.l2_flush(g, true);
1626 1601
1627 if (!ch_ctx->gr_ctx) { 1602 if (gk20a_mem_begin(g, mem)) {
1628 gk20a_err(dev_from_gk20a(g), "no graphics context allocated"); 1603 ret = -ENOMEM;
1629 return -EFAULT; 1604 goto out;
1630 }
1631
1632 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages,
1633 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
1634 0, pgprot_writecombine(PAGE_KERNEL));
1635 if (!ctx_ptr) {
1636 c->g->ops.fifo.enable_channel(c);
1637 return -ENOMEM;
1638 } 1605 }
1639 1606
1640 data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); 1607 data = gk20a_mem_rd(g, mem,
1608 ctxsw_prog_main_image_pm_o());
1641 data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m(); 1609 data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m();
1642 data |= enable_smpc_ctxsw ? 1610 data |= enable_smpc_ctxsw ?
1643 ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() : 1611 ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() :
1644 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(); 1612 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f();
1645 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, 1613 gk20a_mem_wr(g, mem,
1646 data); 1614 ctxsw_prog_main_image_pm_o(),
1615 data);
1647 1616
1648 vunmap(ctx_ptr); 1617 gk20a_mem_end(g, mem);
1649 1618
1650 /* enable channel */ 1619out:
1651 c->g->ops.fifo.enable_channel(c); 1620 c->g->ops.fifo.enable_channel(c);
1652 1621 return ret;
1653 return 0;
1654} 1622}
1655 1623
1656int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, 1624int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
@@ -1659,8 +1627,7 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1659{ 1627{
1660 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 1628 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1661 struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; 1629 struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx;
1662 void *ctx_ptr = NULL; 1630 struct mem_desc *gr_mem;
1663 void *pm_ctx_ptr;
1664 u32 data, virt_addr; 1631 u32 data, virt_addr;
1665 int ret; 1632 int ret;
1666 1633
@@ -1671,6 +1638,8 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1671 return -EFAULT; 1638 return -EFAULT;
1672 } 1639 }
1673 1640
1641 gr_mem = &ch_ctx->gr_ctx->mem;
1642
1674 if (enable_hwpm_ctxsw) { 1643 if (enable_hwpm_ctxsw) {
1675 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) 1644 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f())
1676 return 0; 1645 return 0;
@@ -1721,29 +1690,22 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1721 } 1690 }
1722 1691
1723 /* Now clear the buffer */ 1692 /* Now clear the buffer */
1724 pm_ctx_ptr = vmap(pm_ctx->mem.pages, 1693 if (gk20a_mem_begin(g, &pm_ctx->mem)) {
1725 PAGE_ALIGN(pm_ctx->mem.size) >> PAGE_SHIFT,
1726 0, pgprot_writecombine(PAGE_KERNEL));
1727
1728 if (!pm_ctx_ptr) {
1729 ret = -ENOMEM; 1694 ret = -ENOMEM;
1730 goto cleanup_pm_buf; 1695 goto cleanup_pm_buf;
1731 } 1696 }
1732 1697
1733 memset(pm_ctx_ptr, 0, pm_ctx->mem.size); 1698 gk20a_memset(g, &pm_ctx->mem, 0, 0, pm_ctx->mem.size);
1734 1699
1735 vunmap(pm_ctx_ptr); 1700 gk20a_mem_end(g, &pm_ctx->mem);
1736 } 1701 }
1737 1702
1738 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 1703 if (gk20a_mem_begin(g, gr_mem)) {
1739 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
1740 0, pgprot_writecombine(PAGE_KERNEL));
1741 if (!ctx_ptr) {
1742 ret = -ENOMEM; 1704 ret = -ENOMEM;
1743 goto cleanup_pm_buf; 1705 goto cleanup_pm_buf;
1744 } 1706 }
1745 1707
1746 data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); 1708 data = gk20a_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o());
1747 data = data & ~ctxsw_prog_main_image_pm_mode_m(); 1709 data = data & ~ctxsw_prog_main_image_pm_mode_m();
1748 1710
1749 if (enable_hwpm_ctxsw) { 1711 if (enable_hwpm_ctxsw) {
@@ -1760,10 +1722,10 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1760 1722
1761 data |= pm_ctx->pm_mode; 1723 data |= pm_ctx->pm_mode;
1762 1724
1763 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, data); 1725 gk20a_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_o(), data);
1764 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, virt_addr); 1726 gk20a_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
1765 1727
1766 vunmap(ctx_ptr); 1728 gk20a_mem_end(g, gr_mem);
1767 1729
1768 /* enable channel */ 1730 /* enable channel */
1769 c->g->ops.fifo.enable_channel(c); 1731 c->g->ops.fifo.enable_channel(c);
@@ -1788,9 +1750,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1788 u32 virt_addr_lo; 1750 u32 virt_addr_lo;
1789 u32 virt_addr_hi; 1751 u32 virt_addr_hi;
1790 u32 virt_addr = 0; 1752 u32 virt_addr = 0;
1791 u32 i, v, data; 1753 u32 v, data;
1792 int ret = 0; 1754 int ret = 0;
1793 void *ctx_ptr = NULL; 1755 struct mem_desc *mem = &ch_ctx->gr_ctx->mem;
1794 1756
1795 gk20a_dbg_fn(""); 1757 gk20a_dbg_fn("");
1796 1758
@@ -1801,20 +1763,18 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1801 Flush and invalidate before cpu update. */ 1763 Flush and invalidate before cpu update. */
1802 g->ops.mm.l2_flush(g, true); 1764 g->ops.mm.l2_flush(g, true);
1803 1765
1804 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 1766 if (gk20a_mem_begin(g, mem))
1805 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
1806 0, pgprot_writecombine(PAGE_KERNEL));
1807 if (!ctx_ptr)
1808 return -ENOMEM; 1767 return -ENOMEM;
1809 1768
1810 for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++) 1769 gk20a_mem_wr_n(g, mem, 0,
1811 gk20a_mem_wr32(ctx_ptr, i, gr->ctx_vars.local_golden_image[i]); 1770 gr->ctx_vars.local_golden_image,
1771 gr->ctx_vars.golden_image_size);
1812 1772
1813 if (g->ops.gr.enable_cde_in_fecs && c->cde) 1773 if (g->ops.gr.enable_cde_in_fecs && c->cde)
1814 g->ops.gr.enable_cde_in_fecs(ctx_ptr); 1774 g->ops.gr.enable_cde_in_fecs(g, mem);
1815 1775
1816 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0); 1776 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_num_save_ops_o(), 0);
1817 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0); 1777 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_num_restore_ops_o(), 0);
1818 1778
1819 /* set priv access map */ 1779 /* set priv access map */
1820 virt_addr_lo = 1780 virt_addr_lo =
@@ -1827,29 +1787,29 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1827 else 1787 else
1828 data = ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f(); 1788 data = ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f();
1829 1789
1830 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_config_o(), 0, 1790 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(),
1831 data); 1791 data);
1832 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_addr_lo_o(), 0, 1792 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
1833 virt_addr_lo); 1793 virt_addr_lo);
1834 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_addr_hi_o(), 0, 1794 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
1835 virt_addr_hi); 1795 virt_addr_hi);
1836 /* disable verif features */ 1796 /* disable verif features */
1837 v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0); 1797 v = gk20a_mem_rd(g, mem, ctxsw_prog_main_image_misc_options_o());
1838 v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m()); 1798 v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m());
1839 v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f(); 1799 v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f();
1840 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0, v); 1800 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v);
1841 1801
1842 if (g->ops.gr.update_ctxsw_preemption_mode) 1802 if (g->ops.gr.update_ctxsw_preemption_mode)
1843 g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, ctx_ptr); 1803 g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, mem);
1844 1804
1845 virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); 1805 virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va);
1846 virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); 1806 virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va);
1847 1807
1848 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_count_o(), 0, 1808 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(),
1849 ch_ctx->patch_ctx.data_count); 1809 ch_ctx->patch_ctx.data_count);
1850 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_lo_o(), 0, 1810 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_lo_o(),
1851 virt_addr_lo); 1811 virt_addr_lo);
1852 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_hi_o(), 0, 1812 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_hi_o(),
1853 virt_addr_hi); 1813 virt_addr_hi);
1854 1814
1855 /* Update main header region of the context buffer with the info needed 1815 /* Update main header region of the context buffer with the info needed
@@ -1860,7 +1820,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1860 if (ch_ctx->pm_ctx.mem.gpu_va == 0) { 1820 if (ch_ctx->pm_ctx.mem.gpu_va == 0) {
1861 gk20a_err(dev_from_gk20a(g), 1821 gk20a_err(dev_from_gk20a(g),
1862 "context switched pm with no pm buffer!"); 1822 "context switched pm with no pm buffer!");
1863 vunmap(ctx_ptr); 1823 gk20a_mem_end(g, mem);
1864 return -EFAULT; 1824 return -EFAULT;
1865 } 1825 }
1866 1826
@@ -1871,14 +1831,14 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1871 } else 1831 } else
1872 virt_addr = 0; 1832 virt_addr = 0;
1873 1833
1874 data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); 1834 data = gk20a_mem_rd(g, mem, ctxsw_prog_main_image_pm_o());
1875 data = data & ~ctxsw_prog_main_image_pm_mode_m(); 1835 data = data & ~ctxsw_prog_main_image_pm_mode_m();
1876 data |= ch_ctx->pm_ctx.pm_mode; 1836 data |= ch_ctx->pm_ctx.pm_mode;
1877 1837
1878 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, data); 1838 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data);
1879 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, virt_addr); 1839 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
1880 1840
1881 vunmap(ctx_ptr); 1841 gk20a_mem_end(g, mem);
1882 1842
1883 if (tegra_platform_is_linsim()) { 1843 if (tegra_platform_is_linsim()) {
1884 u32 inst_base_ptr = 1844 u32 inst_base_ptr =
@@ -1978,16 +1938,20 @@ static void gr_gk20a_init_ctxsw_ucode_segments(
1978} 1938}
1979 1939
1980static int gr_gk20a_copy_ctxsw_ucode_segments( 1940static int gr_gk20a_copy_ctxsw_ucode_segments(
1981 u8 *buf, 1941 struct gk20a *g,
1942 struct mem_desc *dst,
1982 struct gk20a_ctxsw_ucode_segments *segments, 1943 struct gk20a_ctxsw_ucode_segments *segments,
1983 u32 *bootimage, 1944 u32 *bootimage,
1984 u32 *code, u32 *data) 1945 u32 *code, u32 *data)
1985{ 1946{
1986 int i; 1947 int i;
1987 1948
1988 memcpy(buf + segments->boot.offset, bootimage, segments->boot.size); 1949 gk20a_mem_wr_n(g, dst, segments->boot.offset, bootimage,
1989 memcpy(buf + segments->code.offset, code, segments->code.size); 1950 segments->boot.size);
1990 memcpy(buf + segments->data.offset, data, segments->data.size); 1951 gk20a_mem_wr_n(g, dst, segments->code.offset, code,
1952 segments->code.size);
1953 gk20a_mem_wr_n(g, dst, segments->data.offset, data,
1954 segments->data.size);
1991 1955
1992 /* compute a "checksum" for the boot binary to detect its version */ 1956 /* compute a "checksum" for the boot binary to detect its version */
1993 segments->boot_signature = 0; 1957 segments->boot_signature = 0;
@@ -2009,7 +1973,6 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
2009 u32 *fecs_boot_image; 1973 u32 *fecs_boot_image;
2010 u32 *gpccs_boot_image; 1974 u32 *gpccs_boot_image;
2011 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; 1975 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
2012 u8 *buf;
2013 u32 ucode_size; 1976 u32 ucode_size;
2014 int err = 0; 1977 int err = 0;
2015 1978
@@ -2049,14 +2012,8 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
2049 if (err) 2012 if (err)
2050 goto clean_up; 2013 goto clean_up;
2051 2014
2052 buf = (u8 *)ucode_info->surface_desc.cpu_va; 2015 gr_gk20a_copy_ctxsw_ucode_segments(g, &ucode_info->surface_desc,
2053 if (!buf) { 2016 &ucode_info->fecs,
2054 gk20a_err(d, "failed to map surface desc buffer");
2055 err = -ENOMEM;
2056 goto clean_up;
2057 }
2058
2059 gr_gk20a_copy_ctxsw_ucode_segments(buf, &ucode_info->fecs,
2060 fecs_boot_image, 2017 fecs_boot_image,
2061 g->gr.ctx_vars.ucode.fecs.inst.l, 2018 g->gr.ctx_vars.ucode.fecs.inst.l,
2062 g->gr.ctx_vars.ucode.fecs.data.l); 2019 g->gr.ctx_vars.ucode.fecs.data.l);
@@ -2064,7 +2021,8 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
2064 release_firmware(fecs_fw); 2021 release_firmware(fecs_fw);
2065 fecs_fw = NULL; 2022 fecs_fw = NULL;
2066 2023
2067 gr_gk20a_copy_ctxsw_ucode_segments(buf, &ucode_info->gpccs, 2024 gr_gk20a_copy_ctxsw_ucode_segments(g, &ucode_info->surface_desc,
2025 &ucode_info->gpccs,
2068 gpccs_boot_image, 2026 gpccs_boot_image,
2069 g->gr.ctx_vars.ucode.gpccs.inst.l, 2027 g->gr.ctx_vars.ucode.gpccs.inst.l,
2070 g->gr.ctx_vars.ucode.gpccs.data.l); 2028 g->gr.ctx_vars.ucode.gpccs.data.l);
@@ -4690,41 +4648,38 @@ out:
4690static int gr_gk20a_init_access_map(struct gk20a *g) 4648static int gr_gk20a_init_access_map(struct gk20a *g)
4691{ 4649{
4692 struct gr_gk20a *gr = &g->gr; 4650 struct gr_gk20a *gr = &g->gr;
4693 void *data; 4651 struct mem_desc *mem = &gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem;
4694 int err = 0;
4695 u32 w, nr_pages = 4652 u32 w, nr_pages =
4696 DIV_ROUND_UP(gr->ctx_vars.priv_access_map_size, 4653 DIV_ROUND_UP(gr->ctx_vars.priv_access_map_size,
4697 PAGE_SIZE); 4654 PAGE_SIZE);
4698 u32 *whitelist = NULL; 4655 u32 *whitelist = NULL;
4699 int num_entries = 0; 4656 int num_entries = 0;
4700 4657
4701 data = vmap(gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.pages, 4658 if (gk20a_mem_begin(g, mem)) {
4702 PAGE_ALIGN(gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.size) >>
4703 PAGE_SHIFT, 0, pgprot_writecombine(PAGE_KERNEL));
4704 if (!data) {
4705 gk20a_err(dev_from_gk20a(g), 4659 gk20a_err(dev_from_gk20a(g),
4706 "failed to map priv access map memory"); 4660 "failed to map priv access map memory");
4707 err = -ENOMEM; 4661 return -ENOMEM;
4708 goto clean_up;
4709 } 4662 }
4710 4663
4711 memset(data, 0x0, PAGE_SIZE * nr_pages); 4664 gk20a_memset(g, mem, 0, 0, PAGE_SIZE * nr_pages);
4712 4665
4713 g->ops.gr.get_access_map(g, &whitelist, &num_entries); 4666 g->ops.gr.get_access_map(g, &whitelist, &num_entries);
4714 4667
4715 for (w = 0; w < num_entries; w++) { 4668 for (w = 0; w < num_entries; w++) {
4716 u32 map_bit, map_byte, map_shift; 4669 u32 map_bit, map_byte, map_shift, x;
4717 map_bit = whitelist[w] >> 2; 4670 map_bit = whitelist[w] >> 2;
4718 map_byte = map_bit >> 3; 4671 map_byte = map_bit >> 3;
4719 map_shift = map_bit & 0x7; /* i.e. 0-7 */ 4672 map_shift = map_bit & 0x7; /* i.e. 0-7 */
4720 gk20a_dbg_info("access map addr:0x%x byte:0x%x bit:%d", 4673 gk20a_dbg_info("access map addr:0x%x byte:0x%x bit:%d",
4721 whitelist[w], map_byte, map_shift); 4674 whitelist[w], map_byte, map_shift);
4722 ((u8 *)data)[map_byte] |= 1 << map_shift; 4675 x = gk20a_mem_rd32(g, mem, map_byte / sizeof(u32));
4676 x |= 1 << (
4677 (map_byte % sizeof(u32) * BITS_PER_BYTE)
4678 + map_shift);
4679 gk20a_mem_wr32(g, mem, map_byte / sizeof(u32), x);
4723 } 4680 }
4724 4681
4725clean_up: 4682 gk20a_mem_end(g, mem);
4726 if (data)
4727 vunmap(data);
4728 return 0; 4683 return 0;
4729} 4684}
4730 4685
@@ -6659,7 +6614,7 @@ static void gr_gk20a_init_sm_dsm_reg_info(void)
6659static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, 6614static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6660 struct channel_ctx_gk20a *ch_ctx, 6615 struct channel_ctx_gk20a *ch_ctx,
6661 u32 addr, u32 data, 6616 u32 addr, u32 data,
6662 u8 *context) 6617 struct mem_desc *mem)
6663{ 6618{
6664 u32 num_gpc = g->gr.gpc_count; 6619 u32 num_gpc = g->gr.gpc_count;
6665 u32 num_tpc; 6620 u32 num_tpc;
@@ -6688,8 +6643,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6688 /* reset the patch count from previous 6643 /* reset the patch count from previous
6689 runs,if ucode has already processed 6644 runs,if ucode has already processed
6690 it */ 6645 it */
6691 tmp = gk20a_mem_rd32(context + 6646 tmp = gk20a_mem_rd(g, mem,
6692 ctxsw_prog_main_image_patch_count_o(), 0); 6647 ctxsw_prog_main_image_patch_count_o());
6693 6648
6694 if (!tmp) 6649 if (!tmp)
6695 ch_ctx->patch_ctx.data_count = 0; 6650 ch_ctx->patch_ctx.data_count = 0;
@@ -6700,15 +6655,15 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6700 vaddr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); 6655 vaddr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va);
6701 vaddr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); 6656 vaddr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va);
6702 6657
6703 gk20a_mem_wr32(context + 6658 gk20a_mem_wr(g, mem,
6704 ctxsw_prog_main_image_patch_count_o(), 6659 ctxsw_prog_main_image_patch_count_o(),
6705 0, ch_ctx->patch_ctx.data_count); 6660 ch_ctx->patch_ctx.data_count);
6706 gk20a_mem_wr32(context + 6661 gk20a_mem_wr(g, mem,
6707 ctxsw_prog_main_image_patch_adr_lo_o(), 6662 ctxsw_prog_main_image_patch_adr_lo_o(),
6708 0, vaddr_lo); 6663 vaddr_lo);
6709 gk20a_mem_wr32(context + 6664 gk20a_mem_wr(g, mem,
6710 ctxsw_prog_main_image_patch_adr_hi_o(), 6665 ctxsw_prog_main_image_patch_adr_hi_o(),
6711 0, vaddr_hi); 6666 vaddr_hi);
6712 6667
6713 /* we're not caching these on cpu side, 6668 /* we're not caching these on cpu side,
6714 but later watch for it */ 6669 but later watch for it */
@@ -6760,17 +6715,15 @@ static void gr_gk20a_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset)
6760 6715
6761#define ILLEGAL_ID (~0) 6716#define ILLEGAL_ID (~0)
6762 6717
6763static inline bool check_main_image_header_magic(void *context) 6718static inline bool check_main_image_header_magic(u8 *context)
6764{ 6719{
6765 u32 magic = gk20a_mem_rd32(context + 6720 u32 magic = *(u32 *)(context + ctxsw_prog_main_image_magic_value_o());
6766 ctxsw_prog_main_image_magic_value_o(), 0);
6767 gk20a_dbg(gpu_dbg_gpu_dbg, "main image magic=0x%x", magic); 6721 gk20a_dbg(gpu_dbg_gpu_dbg, "main image magic=0x%x", magic);
6768 return magic == ctxsw_prog_main_image_magic_value_v_value_v(); 6722 return magic == ctxsw_prog_main_image_magic_value_v_value_v();
6769} 6723}
6770static inline bool check_local_header_magic(void *context) 6724static inline bool check_local_header_magic(u8 *context)
6771{ 6725{
6772 u32 magic = gk20a_mem_rd32(context + 6726 u32 magic = *(u32 *)(context + ctxsw_prog_local_magic_value_o());
6773 ctxsw_prog_local_magic_value_o(), 0);
6774 gk20a_dbg(gpu_dbg_gpu_dbg, "local magic=0x%x", magic); 6727 gk20a_dbg(gpu_dbg_gpu_dbg, "local magic=0x%x", magic);
6775 return magic == ctxsw_prog_local_magic_value_v_value_v(); 6728 return magic == ctxsw_prog_local_magic_value_v_value_v();
6776 6729
@@ -6814,7 +6767,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
6814 u32 num_gpcs, num_tpcs; 6767 u32 num_gpcs, num_tpcs;
6815 u32 chk_addr; 6768 u32 chk_addr;
6816 u32 ext_priv_offset, ext_priv_size; 6769 u32 ext_priv_offset, ext_priv_size;
6817 void *context; 6770 u8 *context;
6818 u32 offset_to_segment, offset_to_segment_end; 6771 u32 offset_to_segment, offset_to_segment_end;
6819 u32 sm_dsm_perf_reg_id = ILLEGAL_ID; 6772 u32 sm_dsm_perf_reg_id = ILLEGAL_ID;
6820 u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID; 6773 u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID;
@@ -6856,14 +6809,14 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
6856 /* note below is in words/num_registers */ 6809 /* note below is in words/num_registers */
6857 marker_size = ctxsw_prog_extended_marker_size_in_bytes_v() >> 2; 6810 marker_size = ctxsw_prog_extended_marker_size_in_bytes_v() >> 2;
6858 6811
6859 context = context_buffer; 6812 context = (u8 *)context_buffer;
6860 /* sanity check main header */ 6813 /* sanity check main header */
6861 if (!check_main_image_header_magic(context)) { 6814 if (!check_main_image_header_magic(context)) {
6862 gk20a_err(dev_from_gk20a(g), 6815 gk20a_err(dev_from_gk20a(g),
6863 "Invalid main header: magic value"); 6816 "Invalid main header: magic value");
6864 return -EINVAL; 6817 return -EINVAL;
6865 } 6818 }
6866 num_gpcs = gk20a_mem_rd32(context + ctxsw_prog_main_image_num_gpcs_o(), 0); 6819 num_gpcs = *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o());
6867 if (gpc_num >= num_gpcs) { 6820 if (gpc_num >= num_gpcs) {
6868 gk20a_err(dev_from_gk20a(g), 6821 gk20a_err(dev_from_gk20a(g),
6869 "GPC 0x%08x is greater than total count 0x%08x!\n", 6822 "GPC 0x%08x is greater than total count 0x%08x!\n",
@@ -6871,7 +6824,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
6871 return -EINVAL; 6824 return -EINVAL;
6872 } 6825 }
6873 6826
6874 data32 = gk20a_mem_rd32(context + ctxsw_prog_main_extended_buffer_ctl_o(), 0); 6827 data32 = *(u32 *)(context + ctxsw_prog_main_extended_buffer_ctl_o());
6875 ext_priv_size = ctxsw_prog_main_extended_buffer_ctl_size_v(data32); 6828 ext_priv_size = ctxsw_prog_main_extended_buffer_ctl_size_v(data32);
6876 if (0 == ext_priv_size) { 6829 if (0 == ext_priv_size) {
6877 gk20a_dbg_info(" No extended memory in context buffer"); 6830 gk20a_dbg_info(" No extended memory in context buffer");
@@ -7149,7 +7102,7 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g,
7149} 7102}
7150 7103
7151static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, 7104static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
7152 void *context, 7105 u8 *context,
7153 u32 *num_ppcs, u32 *ppc_mask, 7106 u32 *num_ppcs, u32 *ppc_mask,
7154 u32 *reg_ppc_count) 7107 u32 *reg_ppc_count)
7155{ 7108{
@@ -7165,7 +7118,7 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
7165 (num_pes_per_gpc > 1))) 7118 (num_pes_per_gpc > 1)))
7166 return -EINVAL; 7119 return -EINVAL;
7167 7120
7168 data32 = gk20a_mem_rd32(context + ctxsw_prog_local_image_ppc_info_o(), 0); 7121 data32 = *(u32 *)(context + ctxsw_prog_local_image_ppc_info_o());
7169 7122
7170 *num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data32); 7123 *num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data32);
7171 *ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data32); 7124 *ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data32);
@@ -7177,7 +7130,7 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
7177 7130
7178/* 7131/*
7179 * This function will return the 32 bit offset for a priv register if it is 7132 * This function will return the 32 bit offset for a priv register if it is
7180 * present in the context buffer. 7133 * present in the context buffer. The context buffer is in CPU memory.
7181 */ 7134 */
7182static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, 7135static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7183 u32 addr, 7136 u32 addr,
@@ -7196,7 +7149,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7196 u32 offset; 7149 u32 offset;
7197 u32 sys_priv_offset, gpc_priv_offset; 7150 u32 sys_priv_offset, gpc_priv_offset;
7198 u32 ppc_mask, reg_list_ppc_count; 7151 u32 ppc_mask, reg_list_ppc_count;
7199 void *context; 7152 u8 *context;
7200 u32 offset_to_segment; 7153 u32 offset_to_segment;
7201 7154
7202 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); 7155 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
@@ -7207,13 +7160,13 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7207 if (err) 7160 if (err)
7208 return err; 7161 return err;
7209 7162
7210 context = context_buffer; 7163 context = (u8 *)context_buffer;
7211 if (!check_main_image_header_magic(context)) { 7164 if (!check_main_image_header_magic(context)) {
7212 gk20a_err(dev_from_gk20a(g), 7165 gk20a_err(dev_from_gk20a(g),
7213 "Invalid main header: magic value"); 7166 "Invalid main header: magic value");
7214 return -EINVAL; 7167 return -EINVAL;
7215 } 7168 }
7216 num_gpcs = gk20a_mem_rd32(context + ctxsw_prog_main_image_num_gpcs_o(), 0); 7169 num_gpcs = *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o());
7217 7170
7218 /* Parse the FECS local header. */ 7171 /* Parse the FECS local header. */
7219 context += ctxsw_prog_ucode_header_size_in_bytes(); 7172 context += ctxsw_prog_ucode_header_size_in_bytes();
@@ -7222,7 +7175,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7222 "Invalid FECS local header: magic value\n"); 7175 "Invalid FECS local header: magic value\n");
7223 return -EINVAL; 7176 return -EINVAL;
7224 } 7177 }
7225 data32 = gk20a_mem_rd32(context + ctxsw_prog_local_priv_register_ctl_o(), 0); 7178 data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o());
7226 sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); 7179 sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
7227 7180
7228 /* If found in Ext buffer, ok. 7181 /* If found in Ext buffer, ok.
@@ -7268,7 +7221,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7268 return -EINVAL; 7221 return -EINVAL;
7269 7222
7270 } 7223 }
7271 data32 = gk20a_mem_rd32(context + ctxsw_prog_local_priv_register_ctl_o(), 0); 7224 data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o());
7272 gpc_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); 7225 gpc_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
7273 7226
7274 err = gr_gk20a_determine_ppc_configuration(g, context, 7227 err = gr_gk20a_determine_ppc_configuration(g, context,
@@ -7277,7 +7230,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7277 if (err) 7230 if (err)
7278 return err; 7231 return err;
7279 7232
7280 num_tpcs = gk20a_mem_rd32(context + ctxsw_prog_local_image_num_tpcs_o(), 0); 7233 num_tpcs = *(u32 *)(context + ctxsw_prog_local_image_num_tpcs_o());
7281 7234
7282 if ((i == gpc_num) && ((tpc_num + 1) > num_tpcs)) { 7235 if ((i == gpc_num) && ((tpc_num + 1) > num_tpcs)) {
7283 gk20a_err(dev_from_gk20a(g), 7236 gk20a_err(dev_from_gk20a(g),
@@ -7689,9 +7642,9 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7689{ 7642{
7690 struct gk20a *g = ch->g; 7643 struct gk20a *g = ch->g;
7691 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; 7644 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
7692 void *ctx_ptr = NULL; 7645 bool gr_ctx_ready = false;
7693 void *pm_ctx_ptr = NULL; 7646 bool pm_ctx_ready = false;
7694 void *base_ptr = NULL; 7647 struct mem_desc *current_mem = NULL;
7695 bool ch_is_curr_ctx, restart_gr_ctxsw = false; 7648 bool ch_is_curr_ctx, restart_gr_ctxsw = false;
7696 u32 i, j, offset, v; 7649 u32 i, j, offset, v;
7697 struct gr_gk20a *gr = &g->gr; 7650 struct gr_gk20a *gr = &g->gr;
@@ -7821,20 +7774,18 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7821 ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD), 7774 ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD),
7822 ctx_ops[i].quad); 7775 ctx_ops[i].quad);
7823 if (!err) { 7776 if (!err) {
7824 if (!ctx_ptr) { 7777 if (!gr_ctx_ready) {
7825 /* would have been a variant of 7778 /* would have been a variant of
7826 * gr_gk20a_apply_instmem_overrides, 7779 * gr_gk20a_apply_instmem_overrides,
7827 * recoded in-place instead. 7780 * recoded in-place instead.
7828 */ 7781 */
7829 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 7782 if (gk20a_mem_begin(g, &ch_ctx->gr_ctx->mem)) {
7830 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
7831 0, pgprot_writecombine(PAGE_KERNEL));
7832 if (!ctx_ptr) {
7833 err = -ENOMEM; 7783 err = -ENOMEM;
7834 goto cleanup; 7784 goto cleanup;
7835 } 7785 }
7786 gr_ctx_ready = true;
7836 } 7787 }
7837 base_ptr = ctx_ptr; 7788 current_mem = &ch_ctx->gr_ctx->mem;
7838 } else { 7789 } else {
7839 err = gr_gk20a_get_pm_ctx_buffer_offsets(g, 7790 err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
7840 ctx_ops[i].offset, 7791 ctx_ops[i].offset,
@@ -7849,7 +7800,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7849 NVGPU_DBG_GPU_REG_OP_STATUS_INVALID_OFFSET; 7800 NVGPU_DBG_GPU_REG_OP_STATUS_INVALID_OFFSET;
7850 continue; 7801 continue;
7851 } 7802 }
7852 if (!pm_ctx_ptr) { 7803 if (!pm_ctx_ready) {
7853 /* Make sure ctx buffer was initialized */ 7804 /* Make sure ctx buffer was initialized */
7854 if (!ch_ctx->pm_ctx.mem.pages) { 7805 if (!ch_ctx->pm_ctx.mem.pages) {
7855 gk20a_err(dev_from_gk20a(g), 7806 gk20a_err(dev_from_gk20a(g),
@@ -7857,15 +7808,13 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7857 err = -EINVAL; 7808 err = -EINVAL;
7858 goto cleanup; 7809 goto cleanup;
7859 } 7810 }
7860 pm_ctx_ptr = vmap(ch_ctx->pm_ctx.mem.pages, 7811 if (gk20a_mem_begin(g, &ch_ctx->pm_ctx.mem)) {
7861 PAGE_ALIGN(ch_ctx->pm_ctx.mem.size) >> PAGE_SHIFT,
7862 0, pgprot_writecombine(PAGE_KERNEL));
7863 if (!pm_ctx_ptr) {
7864 err = -ENOMEM; 7812 err = -ENOMEM;
7865 goto cleanup; 7813 goto cleanup;
7866 } 7814 }
7815 pm_ctx_ready = true;
7867 } 7816 }
7868 base_ptr = pm_ctx_ptr; 7817 current_mem = &ch_ctx->pm_ctx.mem;
7869 } 7818 }
7870 7819
7871 /* if this is a quad access, setup for special access*/ 7820 /* if this is a quad access, setup for special access*/
@@ -7878,24 +7827,24 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7878 /* sanity check gr ctxt offsets, 7827 /* sanity check gr ctxt offsets,
7879 * don't write outside, worst case 7828 * don't write outside, worst case
7880 */ 7829 */
7881 if ((base_ptr == ctx_ptr) && 7830 if ((current_mem == &ch_ctx->gr_ctx->mem) &&
7882 (offsets[j] >= g->gr.ctx_vars.golden_image_size)) 7831 (offsets[j] >= g->gr.ctx_vars.golden_image_size))
7883 continue; 7832 continue;
7884 if (pass == 0) { /* write pass */ 7833 if (pass == 0) { /* write pass */
7885 v = gk20a_mem_rd32(base_ptr + offsets[j], 0); 7834 v = gk20a_mem_rd(g, current_mem, offsets[j]);
7886 v &= ~ctx_ops[i].and_n_mask_lo; 7835 v &= ~ctx_ops[i].and_n_mask_lo;
7887 v |= ctx_ops[i].value_lo; 7836 v |= ctx_ops[i].value_lo;
7888 gk20a_mem_wr32(base_ptr + offsets[j], 0, v); 7837 gk20a_mem_wr(g, current_mem, offsets[j], v);
7889 7838
7890 gk20a_dbg(gpu_dbg_gpu_dbg, 7839 gk20a_dbg(gpu_dbg_gpu_dbg,
7891 "context wr: offset=0x%x v=0x%x", 7840 "context wr: offset=0x%x v=0x%x",
7892 offsets[j], v); 7841 offsets[j], v);
7893 7842
7894 if (ctx_ops[i].op == REGOP(WRITE_64)) { 7843 if (ctx_ops[i].op == REGOP(WRITE_64)) {
7895 v = gk20a_mem_rd32(base_ptr + offsets[j] + 4, 0); 7844 v = gk20a_mem_rd(g, current_mem, offsets[j] + 4);
7896 v &= ~ctx_ops[i].and_n_mask_hi; 7845 v &= ~ctx_ops[i].and_n_mask_hi;
7897 v |= ctx_ops[i].value_hi; 7846 v |= ctx_ops[i].value_hi;
7898 gk20a_mem_wr32(base_ptr + offsets[j] + 4, 0, v); 7847 gk20a_mem_wr(g, current_mem, offsets[j] + 4, v);
7899 7848
7900 gk20a_dbg(gpu_dbg_gpu_dbg, 7849 gk20a_dbg(gpu_dbg_gpu_dbg,
7901 "context wr: offset=0x%x v=0x%x", 7850 "context wr: offset=0x%x v=0x%x",
@@ -7905,18 +7854,18 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7905 /* check to see if we need to add a special WAR 7854 /* check to see if we need to add a special WAR
7906 for some of the SMPC perf regs */ 7855 for some of the SMPC perf regs */
7907 gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j], 7856 gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j],
7908 v, base_ptr); 7857 v, current_mem);
7909 7858
7910 } else { /* read pass */ 7859 } else { /* read pass */
7911 ctx_ops[i].value_lo = 7860 ctx_ops[i].value_lo =
7912 gk20a_mem_rd32(base_ptr + offsets[0], 0); 7861 gk20a_mem_rd(g, current_mem, offsets[0]);
7913 7862
7914 gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x", 7863 gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x",
7915 offsets[0], ctx_ops[i].value_lo); 7864 offsets[0], ctx_ops[i].value_lo);
7916 7865
7917 if (ctx_ops[i].op == REGOP(READ_64)) { 7866 if (ctx_ops[i].op == REGOP(READ_64)) {
7918 ctx_ops[i].value_hi = 7867 ctx_ops[i].value_hi =
7919 gk20a_mem_rd32(base_ptr + offsets[0] + 4, 0); 7868 gk20a_mem_rd(g, current_mem, offsets[0] + 4);
7920 7869
7921 gk20a_dbg(gpu_dbg_gpu_dbg, 7870 gk20a_dbg(gpu_dbg_gpu_dbg,
7922 "context rd: offset=0x%x v=0x%x", 7871 "context rd: offset=0x%x v=0x%x",
@@ -7943,12 +7892,10 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7943 7892
7944 if (ch_ctx->patch_ctx.mem.cpu_va) 7893 if (ch_ctx->patch_ctx.mem.cpu_va)
7945 gr_gk20a_ctx_patch_write_end(g, ch_ctx); 7894 gr_gk20a_ctx_patch_write_end(g, ch_ctx);
7946 7895 if (gr_ctx_ready)
7947 if (ctx_ptr) 7896 gk20a_mem_end(g, &ch_ctx->gr_ctx->mem);
7948 vunmap(ctx_ptr); 7897 if (pm_ctx_ready)
7949 7898 gk20a_mem_end(g, &ch_ctx->pm_ctx.mem);
7950 if (pm_ctx_ptr)
7951 vunmap(pm_ctx_ptr);
7952 7899
7953 if (restart_gr_ctxsw) { 7900 if (restart_gr_ctxsw) {
7954 int tmp_err = gr_gk20a_enable_ctxsw(g); 7901 int tmp_err = gr_gk20a_enable_ctxsw(g);