summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2016-05-12 02:32:05 -0400
committerKen Adams <kadams@nvidia.com>2016-05-13 10:11:33 -0400
commit6eebc87d99f9f04b2b68e0bc0142c161ab3e669d (patch)
tree08e437890869d76072f291ea66f709f05ea07c8a /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parent14ef0dacc94077bc3dae4c942ff8c279cc4c92ba (diff)
gpu: nvgpu: refactor gk20a_mem_{wr,rd} for vidmem
To support vidmem, pass g and mem_desc to the buffer memory accessor functions. This allows the functions to select the memory access method based on the buffer aperture instead of using the cpu pointer directly (like until now). The selection and aperture support will be in another patch; this patch only refactors these accessors, but keeps the underlying functionality as-is. gk20a_mem_{rd,wr}32() work as previously; add also gk20a_mem_{rd,wr}() for byte-indexed accesses, gk20a_mem_{rd,wr}_n() for memcpy()-like functionality, and gk20a_memset() for filling buffers with a constant. The 8 and 16 bit accessor functions are removed. vmap()/vunmap() pairs are abstracted to gk20a_mem_{begin,end}() to support other types of mappings or conditions where mapping the buffer is unnecessary or different. Several function arguments that would access these buffers are also changed to take a mem_desc instead of a plain cpu pointer. Some relevant occasions are changed to use the accessor functions instead of cpu pointers without them (e.g., memcpying to and from), but the majority of direct accesses will be adjusted later, when the buffers are moved to support vidmem. JIRA DNVGPU-23 Change-Id: I3dd22e14290c4ab742d42e2dd327ebeb5cd3f25a Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1121143 Reviewed-by: Ken Adams <kadams@nvidia.com> Tested-by: Ken Adams <kadams@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c359
1 files changed, 153 insertions, 206 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 4e7c36ee..e7e6662a 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -97,22 +97,18 @@ int gr_gk20a_get_ctx_id(struct gk20a *g,
97 u32 *ctx_id) 97 u32 *ctx_id)
98{ 98{
99 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 99 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
100 void *ctx_ptr = NULL;
101 100
102 /* Channel gr_ctx buffer is gpu cacheable. 101 /* Channel gr_ctx buffer is gpu cacheable.
103 Flush and invalidate before cpu update. */ 102 Flush and invalidate before cpu update. */
104 g->ops.mm.l2_flush(g, true); 103 g->ops.mm.l2_flush(g, true);
105 104
106 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 105 if (gk20a_mem_begin(g, &ch_ctx->gr_ctx->mem))
107 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
108 0, pgprot_writecombine(PAGE_KERNEL));
109 if (!ctx_ptr)
110 return -ENOMEM; 106 return -ENOMEM;
111 107
112 *ctx_id = gk20a_mem_rd32(ctx_ptr + 108 *ctx_id = gk20a_mem_rd(g, &ch_ctx->gr_ctx->mem,
113 ctxsw_prog_main_image_context_id_o(), 0); 109 ctxsw_prog_main_image_context_id_o());
114 110
115 vunmap(ctx_ptr); 111 gk20a_mem_end(g, &ch_ctx->gr_ctx->mem);
116 112
117 return 0; 113 return 0;
118} 114}
@@ -619,22 +615,17 @@ static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
619{ 615{
620 u32 addr_lo; 616 u32 addr_lo;
621 u32 addr_hi; 617 u32 addr_hi;
622 void *inst_ptr = NULL;
623 618
624 gk20a_dbg_fn(""); 619 gk20a_dbg_fn("");
625 620
626 inst_ptr = c->inst_block.cpu_va;
627 if (!inst_ptr)
628 return -ENOMEM;
629
630 addr_lo = u64_lo32(gpu_va) >> 12; 621 addr_lo = u64_lo32(gpu_va) >> 12;
631 addr_hi = u64_hi32(gpu_va); 622 addr_hi = u64_hi32(gpu_va);
632 623
633 gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_target_w(), 624 gk20a_mem_wr32(c->g, &c->inst_block, ram_in_gr_wfi_target_w(),
634 ram_in_gr_cs_wfi_f() | ram_in_gr_wfi_mode_virtual_f() | 625 ram_in_gr_cs_wfi_f() | ram_in_gr_wfi_mode_virtual_f() |
635 ram_in_gr_wfi_ptr_lo_f(addr_lo)); 626 ram_in_gr_wfi_ptr_lo_f(addr_lo));
636 627
637 gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_ptr_hi_w(), 628 gk20a_mem_wr32(c->g, &c->inst_block, ram_in_gr_wfi_ptr_hi_w(),
638 ram_in_gr_wfi_ptr_hi_f(addr_hi)); 629 ram_in_gr_wfi_ptr_hi_f(addr_hi));
639 630
640 return 0; 631 return 0;
@@ -658,11 +649,7 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
658 return -EBUSY; 649 return -EBUSY;
659 } 650 }
660 651
661 ch_ctx->patch_ctx.mem.cpu_va = vmap(ch_ctx->patch_ctx.mem.pages, 652 if (gk20a_mem_begin(g, &ch_ctx->patch_ctx.mem))
662 PAGE_ALIGN(ch_ctx->patch_ctx.mem.size) >> PAGE_SHIFT,
663 0, pgprot_writecombine(PAGE_KERNEL));
664
665 if (!ch_ctx->patch_ctx.mem.cpu_va)
666 return -ENOMEM; 653 return -ENOMEM;
667 654
668 return 0; 655 return 0;
@@ -677,8 +664,7 @@ int gr_gk20a_ctx_patch_write_end(struct gk20a *g,
677 return -EINVAL; 664 return -EINVAL;
678 } 665 }
679 666
680 vunmap(ch_ctx->patch_ctx.mem.cpu_va); 667 gk20a_mem_end(g, &ch_ctx->patch_ctx.mem);
681 ch_ctx->patch_ctx.mem.cpu_va = NULL;
682 return 0; 668 return 0;
683} 669}
684 670
@@ -687,7 +673,6 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g,
687 u32 addr, u32 data, bool patch) 673 u32 addr, u32 data, bool patch)
688{ 674{
689 u32 patch_slot = 0; 675 u32 patch_slot = 0;
690 void *patch_ptr = NULL;
691 bool mapped_here = false; 676 bool mapped_here = false;
692 677
693 BUG_ON(patch != 0 && ch_ctx == NULL); 678 BUG_ON(patch != 0 && ch_ctx == NULL);
@@ -708,11 +693,10 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g,
708 } else 693 } else
709 mapped_here = false; 694 mapped_here = false;
710 695
711 patch_ptr = ch_ctx->patch_ctx.mem.cpu_va;
712 patch_slot = ch_ctx->patch_ctx.data_count * 2; 696 patch_slot = ch_ctx->patch_ctx.data_count * 2;
713 697
714 gk20a_mem_wr32(patch_ptr, patch_slot++, addr); 698 gk20a_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot++, addr);
715 gk20a_mem_wr32(patch_ptr, patch_slot++, data); 699 gk20a_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot++, data);
716 700
717 ch_ctx->patch_ctx.data_count++; 701 ch_ctx->patch_ctx.data_count++;
718 702
@@ -760,16 +744,13 @@ static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
760static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) 744static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
761{ 745{
762 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 746 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
747 struct mem_desc *mem = &ch_ctx->gr_ctx->mem;
763 u32 va_lo, va_hi, va; 748 u32 va_lo, va_hi, va;
764 int ret = 0; 749 int ret = 0;
765 void *ctx_ptr = NULL;
766 750
767 gk20a_dbg_fn(""); 751 gk20a_dbg_fn("");
768 752
769 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 753 if (gk20a_mem_begin(g, mem))
770 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
771 0, pgprot_writecombine(PAGE_KERNEL));
772 if (!ctx_ptr)
773 return -ENOMEM; 754 return -ENOMEM;
774 755
775 if (ch_ctx->zcull_ctx.gpu_va == 0 && 756 if (ch_ctx->zcull_ctx.gpu_va == 0 &&
@@ -792,15 +773,17 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
792 goto clean_up; 773 goto clean_up;
793 } 774 }
794 775
795 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0, 776 gk20a_mem_wr(g, mem,
777 ctxsw_prog_main_image_zcull_o(),
796 ch_ctx->zcull_ctx.ctx_sw_mode); 778 ch_ctx->zcull_ctx.ctx_sw_mode);
797 779
798 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_ptr_o(), 0, va); 780 gk20a_mem_wr(g, mem,
781 ctxsw_prog_main_image_zcull_ptr_o(), va);
799 782
800 c->g->ops.fifo.enable_channel(c); 783 c->g->ops.fifo.enable_channel(c);
801 784
802clean_up: 785clean_up:
803 vunmap(ctx_ptr); 786 gk20a_mem_end(g, mem);
804 787
805 return ret; 788 return ret;
806} 789}
@@ -1500,8 +1483,8 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1500 u32 ctx_header_words; 1483 u32 ctx_header_words;
1501 u32 i; 1484 u32 i;
1502 u32 data; 1485 u32 data;
1503 void *ctx_ptr = NULL; 1486 struct mem_desc *gold_mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem;
1504 void *gold_ptr = NULL; 1487 struct mem_desc *gr_mem = &ch_ctx->gr_ctx->mem;
1505 u32 err = 0; 1488 u32 err = 0;
1506 1489
1507 gk20a_dbg_fn(""); 1490 gk20a_dbg_fn("");
@@ -1527,16 +1510,10 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1527 if (err) 1510 if (err)
1528 goto clean_up; 1511 goto clean_up;
1529 1512
1530 gold_ptr = vmap(gr->global_ctx_buffer[GOLDEN_CTX].mem.pages, 1513 if (gk20a_mem_begin(g, gold_mem))
1531 PAGE_ALIGN(gr->global_ctx_buffer[GOLDEN_CTX].mem.size) >>
1532 PAGE_SHIFT, 0, pgprot_writecombine(PAGE_KERNEL));
1533 if (!gold_ptr)
1534 goto clean_up; 1514 goto clean_up;
1535 1515
1536 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 1516 if (gk20a_mem_begin(g, gr_mem))
1537 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
1538 0, pgprot_writecombine(PAGE_KERNEL));
1539 if (!ctx_ptr)
1540 goto clean_up; 1517 goto clean_up;
1541 1518
1542 ctx_header_words = roundup(ctx_header_bytes, sizeof(u32)); 1519 ctx_header_words = roundup(ctx_header_bytes, sizeof(u32));
@@ -1545,14 +1522,14 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1545 g->ops.mm.l2_flush(g, true); 1522 g->ops.mm.l2_flush(g, true);
1546 1523
1547 for (i = 0; i < ctx_header_words; i++) { 1524 for (i = 0; i < ctx_header_words; i++) {
1548 data = gk20a_mem_rd32(ctx_ptr, i); 1525 data = gk20a_mem_rd32(g, gr_mem, i);
1549 gk20a_mem_wr32(gold_ptr, i, data); 1526 gk20a_mem_wr32(g, gold_mem, i, data);
1550 } 1527 }
1551 1528
1552 gk20a_mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_o(), 0, 1529 gk20a_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_o(),
1553 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v()); 1530 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());
1554 1531
1555 gk20a_mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_ptr_o(), 0, 0); 1532 gk20a_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_ptr_o(), 0);
1556 1533
1557 gr_gk20a_commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); 1534 gr_gk20a_commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);
1558 1535
@@ -1568,12 +1545,12 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1568 goto clean_up; 1545 goto clean_up;
1569 } 1546 }
1570 1547
1571 for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++) 1548 gk20a_mem_rd_n(g, gold_mem, 0,
1572 gr->ctx_vars.local_golden_image[i] = 1549 gr->ctx_vars.local_golden_image,
1573 gk20a_mem_rd32(gold_ptr, i); 1550 gr->ctx_vars.golden_image_size);
1574 } 1551 }
1575 1552
1576 gr_gk20a_commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); 1553 gr_gk20a_commit_inst(c, gr_mem->gpu_va);
1577 1554
1578 gr->ctx_vars.golden_image_initialized = true; 1555 gr->ctx_vars.golden_image_initialized = true;
1579 1556
@@ -1586,10 +1563,8 @@ clean_up:
1586 else 1563 else
1587 gk20a_dbg_fn("done"); 1564 gk20a_dbg_fn("done");
1588 1565
1589 if (gold_ptr) 1566 gk20a_mem_end(g, gold_mem);
1590 vunmap(gold_ptr); 1567 gk20a_mem_end(g, gr_mem);
1591 if (ctx_ptr)
1592 vunmap(ctx_ptr);
1593 1568
1594 mutex_unlock(&gr->ctx_mutex); 1569 mutex_unlock(&gr->ctx_mutex);
1595 return err; 1570 return err;
@@ -1600,7 +1575,7 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1600 bool enable_smpc_ctxsw) 1575 bool enable_smpc_ctxsw)
1601{ 1576{
1602 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 1577 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1603 void *ctx_ptr = NULL; 1578 struct mem_desc *mem;
1604 u32 data; 1579 u32 data;
1605 int ret; 1580 int ret;
1606 1581
@@ -1611,46 +1586,39 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1611 return -EFAULT; 1586 return -EFAULT;
1612 } 1587 }
1613 1588
1589 mem = &ch_ctx->gr_ctx->mem;
1590
1614 c->g->ops.fifo.disable_channel(c); 1591 c->g->ops.fifo.disable_channel(c);
1615 ret = c->g->ops.fifo.preempt_channel(c->g, c->hw_chid); 1592 ret = c->g->ops.fifo.preempt_channel(c->g, c->hw_chid);
1616 if (ret) { 1593 if (ret) {
1617 c->g->ops.fifo.enable_channel(c); 1594 gk20a_err(dev_from_gk20a(g), "failed to preempt channel");
1618 gk20a_err(dev_from_gk20a(g), 1595 goto out;
1619 "failed to preempt channel\n");
1620 return ret;
1621 } 1596 }
1622 1597
1623 /* Channel gr_ctx buffer is gpu cacheable. 1598 /* Channel gr_ctx buffer is gpu cacheable.
1624 Flush and invalidate before cpu update. */ 1599 Flush and invalidate before cpu update. */
1625 g->ops.mm.l2_flush(g, true); 1600 g->ops.mm.l2_flush(g, true);
1626 1601
1627 if (!ch_ctx->gr_ctx) { 1602 if (gk20a_mem_begin(g, mem)) {
1628 gk20a_err(dev_from_gk20a(g), "no graphics context allocated"); 1603 ret = -ENOMEM;
1629 return -EFAULT; 1604 goto out;
1630 }
1631
1632 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages,
1633 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
1634 0, pgprot_writecombine(PAGE_KERNEL));
1635 if (!ctx_ptr) {
1636 c->g->ops.fifo.enable_channel(c);
1637 return -ENOMEM;
1638 } 1605 }
1639 1606
1640 data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); 1607 data = gk20a_mem_rd(g, mem,
1608 ctxsw_prog_main_image_pm_o());
1641 data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m(); 1609 data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m();
1642 data |= enable_smpc_ctxsw ? 1610 data |= enable_smpc_ctxsw ?
1643 ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() : 1611 ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() :
1644 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(); 1612 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f();
1645 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, 1613 gk20a_mem_wr(g, mem,
1646 data); 1614 ctxsw_prog_main_image_pm_o(),
1615 data);
1647 1616
1648 vunmap(ctx_ptr); 1617 gk20a_mem_end(g, mem);
1649 1618
1650 /* enable channel */ 1619out:
1651 c->g->ops.fifo.enable_channel(c); 1620 c->g->ops.fifo.enable_channel(c);
1652 1621 return ret;
1653 return 0;
1654} 1622}
1655 1623
1656int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, 1624int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
@@ -1659,8 +1627,7 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1659{ 1627{
1660 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 1628 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1661 struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; 1629 struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx;
1662 void *ctx_ptr = NULL; 1630 struct mem_desc *gr_mem;
1663 void *pm_ctx_ptr;
1664 u32 data, virt_addr; 1631 u32 data, virt_addr;
1665 int ret; 1632 int ret;
1666 1633
@@ -1671,6 +1638,8 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1671 return -EFAULT; 1638 return -EFAULT;
1672 } 1639 }
1673 1640
1641 gr_mem = &ch_ctx->gr_ctx->mem;
1642
1674 if (enable_hwpm_ctxsw) { 1643 if (enable_hwpm_ctxsw) {
1675 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) 1644 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f())
1676 return 0; 1645 return 0;
@@ -1721,29 +1690,22 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1721 } 1690 }
1722 1691
1723 /* Now clear the buffer */ 1692 /* Now clear the buffer */
1724 pm_ctx_ptr = vmap(pm_ctx->mem.pages, 1693 if (gk20a_mem_begin(g, &pm_ctx->mem)) {
1725 PAGE_ALIGN(pm_ctx->mem.size) >> PAGE_SHIFT,
1726 0, pgprot_writecombine(PAGE_KERNEL));
1727
1728 if (!pm_ctx_ptr) {
1729 ret = -ENOMEM; 1694 ret = -ENOMEM;
1730 goto cleanup_pm_buf; 1695 goto cleanup_pm_buf;
1731 } 1696 }
1732 1697
1733 memset(pm_ctx_ptr, 0, pm_ctx->mem.size); 1698 gk20a_memset(g, &pm_ctx->mem, 0, 0, pm_ctx->mem.size);
1734 1699
1735 vunmap(pm_ctx_ptr); 1700 gk20a_mem_end(g, &pm_ctx->mem);
1736 } 1701 }
1737 1702
1738 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 1703 if (gk20a_mem_begin(g, gr_mem)) {
1739 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
1740 0, pgprot_writecombine(PAGE_KERNEL));
1741 if (!ctx_ptr) {
1742 ret = -ENOMEM; 1704 ret = -ENOMEM;
1743 goto cleanup_pm_buf; 1705 goto cleanup_pm_buf;
1744 } 1706 }
1745 1707
1746 data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); 1708 data = gk20a_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o());
1747 data = data & ~ctxsw_prog_main_image_pm_mode_m(); 1709 data = data & ~ctxsw_prog_main_image_pm_mode_m();
1748 1710
1749 if (enable_hwpm_ctxsw) { 1711 if (enable_hwpm_ctxsw) {
@@ -1760,10 +1722,10 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1760 1722
1761 data |= pm_ctx->pm_mode; 1723 data |= pm_ctx->pm_mode;
1762 1724
1763 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, data); 1725 gk20a_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_o(), data);
1764 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, virt_addr); 1726 gk20a_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
1765 1727
1766 vunmap(ctx_ptr); 1728 gk20a_mem_end(g, gr_mem);
1767 1729
1768 /* enable channel */ 1730 /* enable channel */
1769 c->g->ops.fifo.enable_channel(c); 1731 c->g->ops.fifo.enable_channel(c);
@@ -1788,9 +1750,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1788 u32 virt_addr_lo; 1750 u32 virt_addr_lo;
1789 u32 virt_addr_hi; 1751 u32 virt_addr_hi;
1790 u32 virt_addr = 0; 1752 u32 virt_addr = 0;
1791 u32 i, v, data; 1753 u32 v, data;
1792 int ret = 0; 1754 int ret = 0;
1793 void *ctx_ptr = NULL; 1755 struct mem_desc *mem = &ch_ctx->gr_ctx->mem;
1794 1756
1795 gk20a_dbg_fn(""); 1757 gk20a_dbg_fn("");
1796 1758
@@ -1801,20 +1763,18 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1801 Flush and invalidate before cpu update. */ 1763 Flush and invalidate before cpu update. */
1802 g->ops.mm.l2_flush(g, true); 1764 g->ops.mm.l2_flush(g, true);
1803 1765
1804 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 1766 if (gk20a_mem_begin(g, mem))
1805 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
1806 0, pgprot_writecombine(PAGE_KERNEL));
1807 if (!ctx_ptr)
1808 return -ENOMEM; 1767 return -ENOMEM;
1809 1768
1810 for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++) 1769 gk20a_mem_wr_n(g, mem, 0,
1811 gk20a_mem_wr32(ctx_ptr, i, gr->ctx_vars.local_golden_image[i]); 1770 gr->ctx_vars.local_golden_image,
1771 gr->ctx_vars.golden_image_size);
1812 1772
1813 if (g->ops.gr.enable_cde_in_fecs && c->cde) 1773 if (g->ops.gr.enable_cde_in_fecs && c->cde)
1814 g->ops.gr.enable_cde_in_fecs(ctx_ptr); 1774 g->ops.gr.enable_cde_in_fecs(g, mem);
1815 1775
1816 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0); 1776 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_num_save_ops_o(), 0);
1817 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0); 1777 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_num_restore_ops_o(), 0);
1818 1778
1819 /* set priv access map */ 1779 /* set priv access map */
1820 virt_addr_lo = 1780 virt_addr_lo =
@@ -1827,29 +1787,29 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1827 else 1787 else
1828 data = ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f(); 1788 data = ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f();
1829 1789
1830 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_config_o(), 0, 1790 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(),
1831 data); 1791 data);
1832 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_addr_lo_o(), 0, 1792 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
1833 virt_addr_lo); 1793 virt_addr_lo);
1834 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_addr_hi_o(), 0, 1794 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
1835 virt_addr_hi); 1795 virt_addr_hi);
1836 /* disable verif features */ 1796 /* disable verif features */
1837 v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0); 1797 v = gk20a_mem_rd(g, mem, ctxsw_prog_main_image_misc_options_o());
1838 v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m()); 1798 v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m());
1839 v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f(); 1799 v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f();
1840 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0, v); 1800 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v);
1841 1801
1842 if (g->ops.gr.update_ctxsw_preemption_mode) 1802 if (g->ops.gr.update_ctxsw_preemption_mode)
1843 g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, ctx_ptr); 1803 g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, mem);
1844 1804
1845 virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); 1805 virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va);
1846 virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); 1806 virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va);
1847 1807
1848 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_count_o(), 0, 1808 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(),
1849 ch_ctx->patch_ctx.data_count); 1809 ch_ctx->patch_ctx.data_count);
1850 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_lo_o(), 0, 1810 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_lo_o(),
1851 virt_addr_lo); 1811 virt_addr_lo);
1852 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_hi_o(), 0, 1812 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_hi_o(),
1853 virt_addr_hi); 1813 virt_addr_hi);
1854 1814
1855 /* Update main header region of the context buffer with the info needed 1815 /* Update main header region of the context buffer with the info needed
@@ -1860,7 +1820,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1860 if (ch_ctx->pm_ctx.mem.gpu_va == 0) { 1820 if (ch_ctx->pm_ctx.mem.gpu_va == 0) {
1861 gk20a_err(dev_from_gk20a(g), 1821 gk20a_err(dev_from_gk20a(g),
1862 "context switched pm with no pm buffer!"); 1822 "context switched pm with no pm buffer!");
1863 vunmap(ctx_ptr); 1823 gk20a_mem_end(g, mem);
1864 return -EFAULT; 1824 return -EFAULT;
1865 } 1825 }
1866 1826
@@ -1871,14 +1831,14 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1871 } else 1831 } else
1872 virt_addr = 0; 1832 virt_addr = 0;
1873 1833
1874 data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); 1834 data = gk20a_mem_rd(g, mem, ctxsw_prog_main_image_pm_o());
1875 data = data & ~ctxsw_prog_main_image_pm_mode_m(); 1835 data = data & ~ctxsw_prog_main_image_pm_mode_m();
1876 data |= ch_ctx->pm_ctx.pm_mode; 1836 data |= ch_ctx->pm_ctx.pm_mode;
1877 1837
1878 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, data); 1838 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data);
1879 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, virt_addr); 1839 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_ptr_o(), virt_addr);
1880 1840
1881 vunmap(ctx_ptr); 1841 gk20a_mem_end(g, mem);
1882 1842
1883 if (tegra_platform_is_linsim()) { 1843 if (tegra_platform_is_linsim()) {
1884 u32 inst_base_ptr = 1844 u32 inst_base_ptr =
@@ -1978,16 +1938,20 @@ static void gr_gk20a_init_ctxsw_ucode_segments(
1978} 1938}
1979 1939
1980static int gr_gk20a_copy_ctxsw_ucode_segments( 1940static int gr_gk20a_copy_ctxsw_ucode_segments(
1981 u8 *buf, 1941 struct gk20a *g,
1942 struct mem_desc *dst,
1982 struct gk20a_ctxsw_ucode_segments *segments, 1943 struct gk20a_ctxsw_ucode_segments *segments,
1983 u32 *bootimage, 1944 u32 *bootimage,
1984 u32 *code, u32 *data) 1945 u32 *code, u32 *data)
1985{ 1946{
1986 int i; 1947 int i;
1987 1948
1988 memcpy(buf + segments->boot.offset, bootimage, segments->boot.size); 1949 gk20a_mem_wr_n(g, dst, segments->boot.offset, bootimage,
1989 memcpy(buf + segments->code.offset, code, segments->code.size); 1950 segments->boot.size);
1990 memcpy(buf + segments->data.offset, data, segments->data.size); 1951 gk20a_mem_wr_n(g, dst, segments->code.offset, code,
1952 segments->code.size);
1953 gk20a_mem_wr_n(g, dst, segments->data.offset, data,
1954 segments->data.size);
1991 1955
1992 /* compute a "checksum" for the boot binary to detect its version */ 1956 /* compute a "checksum" for the boot binary to detect its version */
1993 segments->boot_signature = 0; 1957 segments->boot_signature = 0;
@@ -2009,7 +1973,6 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
2009 u32 *fecs_boot_image; 1973 u32 *fecs_boot_image;
2010 u32 *gpccs_boot_image; 1974 u32 *gpccs_boot_image;
2011 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; 1975 struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
2012 u8 *buf;
2013 u32 ucode_size; 1976 u32 ucode_size;
2014 int err = 0; 1977 int err = 0;
2015 1978
@@ -2049,14 +2012,8 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
2049 if (err) 2012 if (err)
2050 goto clean_up; 2013 goto clean_up;
2051 2014
2052 buf = (u8 *)ucode_info->surface_desc.cpu_va; 2015 gr_gk20a_copy_ctxsw_ucode_segments(g, &ucode_info->surface_desc,
2053 if (!buf) { 2016 &ucode_info->fecs,
2054 gk20a_err(d, "failed to map surface desc buffer");
2055 err = -ENOMEM;
2056 goto clean_up;
2057 }
2058
2059 gr_gk20a_copy_ctxsw_ucode_segments(buf, &ucode_info->fecs,
2060 fecs_boot_image, 2017 fecs_boot_image,
2061 g->gr.ctx_vars.ucode.fecs.inst.l, 2018 g->gr.ctx_vars.ucode.fecs.inst.l,
2062 g->gr.ctx_vars.ucode.fecs.data.l); 2019 g->gr.ctx_vars.ucode.fecs.data.l);
@@ -2064,7 +2021,8 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
2064 release_firmware(fecs_fw); 2021 release_firmware(fecs_fw);
2065 fecs_fw = NULL; 2022 fecs_fw = NULL;
2066 2023
2067 gr_gk20a_copy_ctxsw_ucode_segments(buf, &ucode_info->gpccs, 2024 gr_gk20a_copy_ctxsw_ucode_segments(g, &ucode_info->surface_desc,
2025 &ucode_info->gpccs,
2068 gpccs_boot_image, 2026 gpccs_boot_image,
2069 g->gr.ctx_vars.ucode.gpccs.inst.l, 2027 g->gr.ctx_vars.ucode.gpccs.inst.l,
2070 g->gr.ctx_vars.ucode.gpccs.data.l); 2028 g->gr.ctx_vars.ucode.gpccs.data.l);
@@ -4690,41 +4648,38 @@ out:
4690static int gr_gk20a_init_access_map(struct gk20a *g) 4648static int gr_gk20a_init_access_map(struct gk20a *g)
4691{ 4649{
4692 struct gr_gk20a *gr = &g->gr; 4650 struct gr_gk20a *gr = &g->gr;
4693 void *data; 4651 struct mem_desc *mem = &gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem;
4694 int err = 0;
4695 u32 w, nr_pages = 4652 u32 w, nr_pages =
4696 DIV_ROUND_UP(gr->ctx_vars.priv_access_map_size, 4653 DIV_ROUND_UP(gr->ctx_vars.priv_access_map_size,
4697 PAGE_SIZE); 4654 PAGE_SIZE);
4698 u32 *whitelist = NULL; 4655 u32 *whitelist = NULL;
4699 int num_entries = 0; 4656 int num_entries = 0;
4700 4657
4701 data = vmap(gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.pages, 4658 if (gk20a_mem_begin(g, mem)) {
4702 PAGE_ALIGN(gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.size) >>
4703 PAGE_SHIFT, 0, pgprot_writecombine(PAGE_KERNEL));
4704 if (!data) {
4705 gk20a_err(dev_from_gk20a(g), 4659 gk20a_err(dev_from_gk20a(g),
4706 "failed to map priv access map memory"); 4660 "failed to map priv access map memory");
4707 err = -ENOMEM; 4661 return -ENOMEM;
4708 goto clean_up;
4709 } 4662 }
4710 4663
4711 memset(data, 0x0, PAGE_SIZE * nr_pages); 4664 gk20a_memset(g, mem, 0, 0, PAGE_SIZE * nr_pages);
4712 4665
4713 g->ops.gr.get_access_map(g, &whitelist, &num_entries); 4666 g->ops.gr.get_access_map(g, &whitelist, &num_entries);
4714 4667
4715 for (w = 0; w < num_entries; w++) { 4668 for (w = 0; w < num_entries; w++) {
4716 u32 map_bit, map_byte, map_shift; 4669 u32 map_bit, map_byte, map_shift, x;
4717 map_bit = whitelist[w] >> 2; 4670 map_bit = whitelist[w] >> 2;
4718 map_byte = map_bit >> 3; 4671 map_byte = map_bit >> 3;
4719 map_shift = map_bit & 0x7; /* i.e. 0-7 */ 4672 map_shift = map_bit & 0x7; /* i.e. 0-7 */
4720 gk20a_dbg_info("access map addr:0x%x byte:0x%x bit:%d", 4673 gk20a_dbg_info("access map addr:0x%x byte:0x%x bit:%d",
4721 whitelist[w], map_byte, map_shift); 4674 whitelist[w], map_byte, map_shift);
4722 ((u8 *)data)[map_byte] |= 1 << map_shift; 4675 x = gk20a_mem_rd32(g, mem, map_byte / sizeof(u32));
4676 x |= 1 << (
4677 (map_byte % sizeof(u32) * BITS_PER_BYTE)
4678 + map_shift);
4679 gk20a_mem_wr32(g, mem, map_byte / sizeof(u32), x);
4723 } 4680 }
4724 4681
4725clean_up: 4682 gk20a_mem_end(g, mem);
4726 if (data)
4727 vunmap(data);
4728 return 0; 4683 return 0;
4729} 4684}
4730 4685
@@ -6659,7 +6614,7 @@ static void gr_gk20a_init_sm_dsm_reg_info(void)
6659static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, 6614static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6660 struct channel_ctx_gk20a *ch_ctx, 6615 struct channel_ctx_gk20a *ch_ctx,
6661 u32 addr, u32 data, 6616 u32 addr, u32 data,
6662 u8 *context) 6617 struct mem_desc *mem)
6663{ 6618{
6664 u32 num_gpc = g->gr.gpc_count; 6619 u32 num_gpc = g->gr.gpc_count;
6665 u32 num_tpc; 6620 u32 num_tpc;
@@ -6688,8 +6643,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6688 /* reset the patch count from previous 6643 /* reset the patch count from previous
6689 runs,if ucode has already processed 6644 runs,if ucode has already processed
6690 it */ 6645 it */
6691 tmp = gk20a_mem_rd32(context + 6646 tmp = gk20a_mem_rd(g, mem,
6692 ctxsw_prog_main_image_patch_count_o(), 0); 6647 ctxsw_prog_main_image_patch_count_o());
6693 6648
6694 if (!tmp) 6649 if (!tmp)
6695 ch_ctx->patch_ctx.data_count = 0; 6650 ch_ctx->patch_ctx.data_count = 0;
@@ -6700,15 +6655,15 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6700 vaddr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); 6655 vaddr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va);
6701 vaddr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); 6656 vaddr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va);
6702 6657
6703 gk20a_mem_wr32(context + 6658 gk20a_mem_wr(g, mem,
6704 ctxsw_prog_main_image_patch_count_o(), 6659 ctxsw_prog_main_image_patch_count_o(),
6705 0, ch_ctx->patch_ctx.data_count); 6660 ch_ctx->patch_ctx.data_count);
6706 gk20a_mem_wr32(context + 6661 gk20a_mem_wr(g, mem,
6707 ctxsw_prog_main_image_patch_adr_lo_o(), 6662 ctxsw_prog_main_image_patch_adr_lo_o(),
6708 0, vaddr_lo); 6663 vaddr_lo);
6709 gk20a_mem_wr32(context + 6664 gk20a_mem_wr(g, mem,
6710 ctxsw_prog_main_image_patch_adr_hi_o(), 6665 ctxsw_prog_main_image_patch_adr_hi_o(),
6711 0, vaddr_hi); 6666 vaddr_hi);
6712 6667
6713 /* we're not caching these on cpu side, 6668 /* we're not caching these on cpu side,
6714 but later watch for it */ 6669 but later watch for it */
@@ -6760,17 +6715,15 @@ static void gr_gk20a_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset)
6760 6715
6761#define ILLEGAL_ID (~0) 6716#define ILLEGAL_ID (~0)
6762 6717
6763static inline bool check_main_image_header_magic(void *context) 6718static inline bool check_main_image_header_magic(u8 *context)
6764{ 6719{
6765 u32 magic = gk20a_mem_rd32(context + 6720 u32 magic = *(u32 *)(context + ctxsw_prog_main_image_magic_value_o());
6766 ctxsw_prog_main_image_magic_value_o(), 0);
6767 gk20a_dbg(gpu_dbg_gpu_dbg, "main image magic=0x%x", magic); 6721 gk20a_dbg(gpu_dbg_gpu_dbg, "main image magic=0x%x", magic);
6768 return magic == ctxsw_prog_main_image_magic_value_v_value_v(); 6722 return magic == ctxsw_prog_main_image_magic_value_v_value_v();
6769} 6723}
6770static inline bool check_local_header_magic(void *context) 6724static inline bool check_local_header_magic(u8 *context)
6771{ 6725{
6772 u32 magic = gk20a_mem_rd32(context + 6726 u32 magic = *(u32 *)(context + ctxsw_prog_local_magic_value_o());
6773 ctxsw_prog_local_magic_value_o(), 0);
6774 gk20a_dbg(gpu_dbg_gpu_dbg, "local magic=0x%x", magic); 6727 gk20a_dbg(gpu_dbg_gpu_dbg, "local magic=0x%x", magic);
6775 return magic == ctxsw_prog_local_magic_value_v_value_v(); 6728 return magic == ctxsw_prog_local_magic_value_v_value_v();
6776 6729
@@ -6814,7 +6767,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
6814 u32 num_gpcs, num_tpcs; 6767 u32 num_gpcs, num_tpcs;
6815 u32 chk_addr; 6768 u32 chk_addr;
6816 u32 ext_priv_offset, ext_priv_size; 6769 u32 ext_priv_offset, ext_priv_size;
6817 void *context; 6770 u8 *context;
6818 u32 offset_to_segment, offset_to_segment_end; 6771 u32 offset_to_segment, offset_to_segment_end;
6819 u32 sm_dsm_perf_reg_id = ILLEGAL_ID; 6772 u32 sm_dsm_perf_reg_id = ILLEGAL_ID;
6820 u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID; 6773 u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID;
@@ -6856,14 +6809,14 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
6856 /* note below is in words/num_registers */ 6809 /* note below is in words/num_registers */
6857 marker_size = ctxsw_prog_extended_marker_size_in_bytes_v() >> 2; 6810 marker_size = ctxsw_prog_extended_marker_size_in_bytes_v() >> 2;
6858 6811
6859 context = context_buffer; 6812 context = (u8 *)context_buffer;
6860 /* sanity check main header */ 6813 /* sanity check main header */
6861 if (!check_main_image_header_magic(context)) { 6814 if (!check_main_image_header_magic(context)) {
6862 gk20a_err(dev_from_gk20a(g), 6815 gk20a_err(dev_from_gk20a(g),
6863 "Invalid main header: magic value"); 6816 "Invalid main header: magic value");
6864 return -EINVAL; 6817 return -EINVAL;
6865 } 6818 }
6866 num_gpcs = gk20a_mem_rd32(context + ctxsw_prog_main_image_num_gpcs_o(), 0); 6819 num_gpcs = *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o());
6867 if (gpc_num >= num_gpcs) { 6820 if (gpc_num >= num_gpcs) {
6868 gk20a_err(dev_from_gk20a(g), 6821 gk20a_err(dev_from_gk20a(g),
6869 "GPC 0x%08x is greater than total count 0x%08x!\n", 6822 "GPC 0x%08x is greater than total count 0x%08x!\n",
@@ -6871,7 +6824,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
6871 return -EINVAL; 6824 return -EINVAL;
6872 } 6825 }
6873 6826
6874 data32 = gk20a_mem_rd32(context + ctxsw_prog_main_extended_buffer_ctl_o(), 0); 6827 data32 = *(u32 *)(context + ctxsw_prog_main_extended_buffer_ctl_o());
6875 ext_priv_size = ctxsw_prog_main_extended_buffer_ctl_size_v(data32); 6828 ext_priv_size = ctxsw_prog_main_extended_buffer_ctl_size_v(data32);
6876 if (0 == ext_priv_size) { 6829 if (0 == ext_priv_size) {
6877 gk20a_dbg_info(" No extended memory in context buffer"); 6830 gk20a_dbg_info(" No extended memory in context buffer");
@@ -7149,7 +7102,7 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g,
7149} 7102}
7150 7103
7151static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, 7104static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
7152 void *context, 7105 u8 *context,
7153 u32 *num_ppcs, u32 *ppc_mask, 7106 u32 *num_ppcs, u32 *ppc_mask,
7154 u32 *reg_ppc_count) 7107 u32 *reg_ppc_count)
7155{ 7108{
@@ -7165,7 +7118,7 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
7165 (num_pes_per_gpc > 1))) 7118 (num_pes_per_gpc > 1)))
7166 return -EINVAL; 7119 return -EINVAL;
7167 7120
7168 data32 = gk20a_mem_rd32(context + ctxsw_prog_local_image_ppc_info_o(), 0); 7121 data32 = *(u32 *)(context + ctxsw_prog_local_image_ppc_info_o());
7169 7122
7170 *num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data32); 7123 *num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data32);
7171 *ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data32); 7124 *ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data32);
@@ -7177,7 +7130,7 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
7177 7130
7178/* 7131/*
7179 * This function will return the 32 bit offset for a priv register if it is 7132 * This function will return the 32 bit offset for a priv register if it is
7180 * present in the context buffer. 7133 * present in the context buffer. The context buffer is in CPU memory.
7181 */ 7134 */
7182static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, 7135static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7183 u32 addr, 7136 u32 addr,
@@ -7196,7 +7149,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7196 u32 offset; 7149 u32 offset;
7197 u32 sys_priv_offset, gpc_priv_offset; 7150 u32 sys_priv_offset, gpc_priv_offset;
7198 u32 ppc_mask, reg_list_ppc_count; 7151 u32 ppc_mask, reg_list_ppc_count;
7199 void *context; 7152 u8 *context;
7200 u32 offset_to_segment; 7153 u32 offset_to_segment;
7201 7154
7202 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); 7155 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
@@ -7207,13 +7160,13 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7207 if (err) 7160 if (err)
7208 return err; 7161 return err;
7209 7162
7210 context = context_buffer; 7163 context = (u8 *)context_buffer;
7211 if (!check_main_image_header_magic(context)) { 7164 if (!check_main_image_header_magic(context)) {
7212 gk20a_err(dev_from_gk20a(g), 7165 gk20a_err(dev_from_gk20a(g),
7213 "Invalid main header: magic value"); 7166 "Invalid main header: magic value");
7214 return -EINVAL; 7167 return -EINVAL;
7215 } 7168 }
7216 num_gpcs = gk20a_mem_rd32(context + ctxsw_prog_main_image_num_gpcs_o(), 0); 7169 num_gpcs = *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o());
7217 7170
7218 /* Parse the FECS local header. */ 7171 /* Parse the FECS local header. */
7219 context += ctxsw_prog_ucode_header_size_in_bytes(); 7172 context += ctxsw_prog_ucode_header_size_in_bytes();
@@ -7222,7 +7175,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7222 "Invalid FECS local header: magic value\n"); 7175 "Invalid FECS local header: magic value\n");
7223 return -EINVAL; 7176 return -EINVAL;
7224 } 7177 }
7225 data32 = gk20a_mem_rd32(context + ctxsw_prog_local_priv_register_ctl_o(), 0); 7178 data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o());
7226 sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); 7179 sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
7227 7180
7228 /* If found in Ext buffer, ok. 7181 /* If found in Ext buffer, ok.
@@ -7268,7 +7221,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7268 return -EINVAL; 7221 return -EINVAL;
7269 7222
7270 } 7223 }
7271 data32 = gk20a_mem_rd32(context + ctxsw_prog_local_priv_register_ctl_o(), 0); 7224 data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o());
7272 gpc_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); 7225 gpc_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
7273 7226
7274 err = gr_gk20a_determine_ppc_configuration(g, context, 7227 err = gr_gk20a_determine_ppc_configuration(g, context,
@@ -7277,7 +7230,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7277 if (err) 7230 if (err)
7278 return err; 7231 return err;
7279 7232
7280 num_tpcs = gk20a_mem_rd32(context + ctxsw_prog_local_image_num_tpcs_o(), 0); 7233 num_tpcs = *(u32 *)(context + ctxsw_prog_local_image_num_tpcs_o());
7281 7234
7282 if ((i == gpc_num) && ((tpc_num + 1) > num_tpcs)) { 7235 if ((i == gpc_num) && ((tpc_num + 1) > num_tpcs)) {
7283 gk20a_err(dev_from_gk20a(g), 7236 gk20a_err(dev_from_gk20a(g),
@@ -7689,9 +7642,9 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7689{ 7642{
7690 struct gk20a *g = ch->g; 7643 struct gk20a *g = ch->g;
7691 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; 7644 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
7692 void *ctx_ptr = NULL; 7645 bool gr_ctx_ready = false;
7693 void *pm_ctx_ptr = NULL; 7646 bool pm_ctx_ready = false;
7694 void *base_ptr = NULL; 7647 struct mem_desc *current_mem = NULL;
7695 bool ch_is_curr_ctx, restart_gr_ctxsw = false; 7648 bool ch_is_curr_ctx, restart_gr_ctxsw = false;
7696 u32 i, j, offset, v; 7649 u32 i, j, offset, v;
7697 struct gr_gk20a *gr = &g->gr; 7650 struct gr_gk20a *gr = &g->gr;
@@ -7821,20 +7774,18 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7821 ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD), 7774 ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD),
7822 ctx_ops[i].quad); 7775 ctx_ops[i].quad);
7823 if (!err) { 7776 if (!err) {
7824 if (!ctx_ptr) { 7777 if (!gr_ctx_ready) {
7825 /* would have been a variant of 7778 /* would have been a variant of
7826 * gr_gk20a_apply_instmem_overrides, 7779 * gr_gk20a_apply_instmem_overrides,
7827 * recoded in-place instead. 7780 * recoded in-place instead.
7828 */ 7781 */
7829 ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, 7782 if (gk20a_mem_begin(g, &ch_ctx->gr_ctx->mem)) {
7830 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
7831 0, pgprot_writecombine(PAGE_KERNEL));
7832 if (!ctx_ptr) {
7833 err = -ENOMEM; 7783 err = -ENOMEM;
7834 goto cleanup; 7784 goto cleanup;
7835 } 7785 }
7786 gr_ctx_ready = true;
7836 } 7787 }
7837 base_ptr = ctx_ptr; 7788 current_mem = &ch_ctx->gr_ctx->mem;
7838 } else { 7789 } else {
7839 err = gr_gk20a_get_pm_ctx_buffer_offsets(g, 7790 err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
7840 ctx_ops[i].offset, 7791 ctx_ops[i].offset,
@@ -7849,7 +7800,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7849 NVGPU_DBG_GPU_REG_OP_STATUS_INVALID_OFFSET; 7800 NVGPU_DBG_GPU_REG_OP_STATUS_INVALID_OFFSET;
7850 continue; 7801 continue;
7851 } 7802 }
7852 if (!pm_ctx_ptr) { 7803 if (!pm_ctx_ready) {
7853 /* Make sure ctx buffer was initialized */ 7804 /* Make sure ctx buffer was initialized */
7854 if (!ch_ctx->pm_ctx.mem.pages) { 7805 if (!ch_ctx->pm_ctx.mem.pages) {
7855 gk20a_err(dev_from_gk20a(g), 7806 gk20a_err(dev_from_gk20a(g),
@@ -7857,15 +7808,13 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7857 err = -EINVAL; 7808 err = -EINVAL;
7858 goto cleanup; 7809 goto cleanup;
7859 } 7810 }
7860 pm_ctx_ptr = vmap(ch_ctx->pm_ctx.mem.pages, 7811 if (gk20a_mem_begin(g, &ch_ctx->pm_ctx.mem)) {
7861 PAGE_ALIGN(ch_ctx->pm_ctx.mem.size) >> PAGE_SHIFT,
7862 0, pgprot_writecombine(PAGE_KERNEL));
7863 if (!pm_ctx_ptr) {
7864 err = -ENOMEM; 7812 err = -ENOMEM;
7865 goto cleanup; 7813 goto cleanup;
7866 } 7814 }
7815 pm_ctx_ready = true;
7867 } 7816 }
7868 base_ptr = pm_ctx_ptr; 7817 current_mem = &ch_ctx->pm_ctx.mem;
7869 } 7818 }
7870 7819
7871 /* if this is a quad access, setup for special access*/ 7820 /* if this is a quad access, setup for special access*/
@@ -7878,24 +7827,24 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7878 /* sanity check gr ctxt offsets, 7827 /* sanity check gr ctxt offsets,
7879 * don't write outside, worst case 7828 * don't write outside, worst case
7880 */ 7829 */
7881 if ((base_ptr == ctx_ptr) && 7830 if ((current_mem == &ch_ctx->gr_ctx->mem) &&
7882 (offsets[j] >= g->gr.ctx_vars.golden_image_size)) 7831 (offsets[j] >= g->gr.ctx_vars.golden_image_size))
7883 continue; 7832 continue;
7884 if (pass == 0) { /* write pass */ 7833 if (pass == 0) { /* write pass */
7885 v = gk20a_mem_rd32(base_ptr + offsets[j], 0); 7834 v = gk20a_mem_rd(g, current_mem, offsets[j]);
7886 v &= ~ctx_ops[i].and_n_mask_lo; 7835 v &= ~ctx_ops[i].and_n_mask_lo;
7887 v |= ctx_ops[i].value_lo; 7836 v |= ctx_ops[i].value_lo;
7888 gk20a_mem_wr32(base_ptr + offsets[j], 0, v); 7837 gk20a_mem_wr(g, current_mem, offsets[j], v);
7889 7838
7890 gk20a_dbg(gpu_dbg_gpu_dbg, 7839 gk20a_dbg(gpu_dbg_gpu_dbg,
7891 "context wr: offset=0x%x v=0x%x", 7840 "context wr: offset=0x%x v=0x%x",
7892 offsets[j], v); 7841 offsets[j], v);
7893 7842
7894 if (ctx_ops[i].op == REGOP(WRITE_64)) { 7843 if (ctx_ops[i].op == REGOP(WRITE_64)) {
7895 v = gk20a_mem_rd32(base_ptr + offsets[j] + 4, 0); 7844 v = gk20a_mem_rd(g, current_mem, offsets[j] + 4);
7896 v &= ~ctx_ops[i].and_n_mask_hi; 7845 v &= ~ctx_ops[i].and_n_mask_hi;
7897 v |= ctx_ops[i].value_hi; 7846 v |= ctx_ops[i].value_hi;
7898 gk20a_mem_wr32(base_ptr + offsets[j] + 4, 0, v); 7847 gk20a_mem_wr(g, current_mem, offsets[j] + 4, v);
7899 7848
7900 gk20a_dbg(gpu_dbg_gpu_dbg, 7849 gk20a_dbg(gpu_dbg_gpu_dbg,
7901 "context wr: offset=0x%x v=0x%x", 7850 "context wr: offset=0x%x v=0x%x",
@@ -7905,18 +7854,18 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7905 /* check to see if we need to add a special WAR 7854 /* check to see if we need to add a special WAR
7906 for some of the SMPC perf regs */ 7855 for some of the SMPC perf regs */
7907 gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j], 7856 gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j],
7908 v, base_ptr); 7857 v, current_mem);
7909 7858
7910 } else { /* read pass */ 7859 } else { /* read pass */
7911 ctx_ops[i].value_lo = 7860 ctx_ops[i].value_lo =
7912 gk20a_mem_rd32(base_ptr + offsets[0], 0); 7861 gk20a_mem_rd(g, current_mem, offsets[0]);
7913 7862
7914 gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x", 7863 gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x",
7915 offsets[0], ctx_ops[i].value_lo); 7864 offsets[0], ctx_ops[i].value_lo);
7916 7865
7917 if (ctx_ops[i].op == REGOP(READ_64)) { 7866 if (ctx_ops[i].op == REGOP(READ_64)) {
7918 ctx_ops[i].value_hi = 7867 ctx_ops[i].value_hi =
7919 gk20a_mem_rd32(base_ptr + offsets[0] + 4, 0); 7868 gk20a_mem_rd(g, current_mem, offsets[0] + 4);
7920 7869
7921 gk20a_dbg(gpu_dbg_gpu_dbg, 7870 gk20a_dbg(gpu_dbg_gpu_dbg,
7922 "context rd: offset=0x%x v=0x%x", 7871 "context rd: offset=0x%x v=0x%x",
@@ -7943,12 +7892,10 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7943 7892
7944 if (ch_ctx->patch_ctx.mem.cpu_va) 7893 if (ch_ctx->patch_ctx.mem.cpu_va)
7945 gr_gk20a_ctx_patch_write_end(g, ch_ctx); 7894 gr_gk20a_ctx_patch_write_end(g, ch_ctx);
7946 7895 if (gr_ctx_ready)
7947 if (ctx_ptr) 7896 gk20a_mem_end(g, &ch_ctx->gr_ctx->mem);
7948 vunmap(ctx_ptr); 7897 if (pm_ctx_ready)
7949 7898 gk20a_mem_end(g, &ch_ctx->pm_ctx.mem);
7950 if (pm_ctx_ptr)
7951 vunmap(pm_ctx_ptr);
7952 7899
7953 if (restart_gr_ctxsw) { 7900 if (restart_gr_ctxsw) {
7954 int tmp_err = gr_gk20a_enable_ctxsw(g); 7901 int tmp_err = gr_gk20a_enable_ctxsw(g);