summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c306
1 files changed, 121 insertions, 185 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index b7b68575..558a1b06 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -777,31 +777,6 @@ int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm)
777 return vm->mmu_levels[0].lo_bit[0]; 777 return vm->mmu_levels[0].lo_bit[0];
778} 778}
779 779
780/* given address range (inclusive) determine the pdes crossed */
781void pde_range_from_vaddr_range(struct vm_gk20a *vm,
782 u64 addr_lo, u64 addr_hi,
783 u32 *pde_lo, u32 *pde_hi)
784{
785 int pde_shift = gk20a_mm_pde_coverage_bit_count(vm);
786
787 *pde_lo = (u32)(addr_lo >> pde_shift);
788 *pde_hi = (u32)(addr_hi >> pde_shift);
789 gk20a_dbg(gpu_dbg_pte, "addr_lo=0x%llx addr_hi=0x%llx pde_ss=%d",
790 addr_lo, addr_hi, pde_shift);
791 gk20a_dbg(gpu_dbg_pte, "pde_lo=%d pde_hi=%d",
792 *pde_lo, *pde_hi);
793}
794
795static u32 pde_from_index(u32 i)
796{
797 return i * gmmu_pde__size_v() / sizeof(u32);
798}
799
800static u32 pte_from_index(u32 i)
801{
802 return i * gmmu_pte__size_v() / sizeof(u32);
803}
804
805int nvgpu_vm_get_buffers(struct vm_gk20a *vm, 780int nvgpu_vm_get_buffers(struct vm_gk20a *vm,
806 struct nvgpu_mapped_buf ***mapped_buffers, 781 struct nvgpu_mapped_buf ***mapped_buffers,
807 int *num_buffers) 782 int *num_buffers)
@@ -1478,7 +1453,7 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
1478 * If mem is in VIDMEM, return base address in vidmem 1453 * If mem is in VIDMEM, return base address in vidmem
1479 * else return IOVA address for SYSMEM 1454 * else return IOVA address for SYSMEM
1480 */ 1455 */
1481u64 gk20a_mem_get_base_addr(struct gk20a *g, struct nvgpu_mem *mem, 1456u64 nvgpu_mem_get_base_addr(struct gk20a *g, struct nvgpu_mem *mem,
1482 u32 flags) 1457 u32 flags)
1483{ 1458{
1484 struct nvgpu_page_alloc *alloc; 1459 struct nvgpu_page_alloc *alloc;
@@ -1580,203 +1555,168 @@ u64 gk20a_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl,
1580 return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); 1555 return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl));
1581} 1556}
1582 1557
1583void gk20a_pde_wr32(struct gk20a *g, struct gk20a_mm_entry *entry,
1584 size_t w, size_t data)
1585{
1586 nvgpu_mem_wr32(g, &entry->mem, entry->woffset + w, data);
1587}
1588
1589u64 gk20a_pde_addr(struct gk20a *g, struct gk20a_mm_entry *entry)
1590{
1591 u64 base;
1592
1593 if (g->mm.has_physical_mode)
1594 base = sg_phys(entry->mem.priv.sgt->sgl);
1595 else
1596 base = gk20a_mem_get_base_addr(g, &entry->mem, 0);
1597
1598 return base + entry->woffset * sizeof(u32);
1599}
1600
1601/* for gk20a the "video memory" apertures here are misnomers. */ 1558/* for gk20a the "video memory" apertures here are misnomers. */
1602static inline u32 big_valid_pde0_bits(struct gk20a *g, 1559static inline u32 big_valid_pde0_bits(struct gk20a *g,
1603 struct gk20a_mm_entry *entry) 1560 struct nvgpu_gmmu_pd *pd, u64 addr)
1604{ 1561{
1605 u64 pte_addr = gk20a_pde_addr(g, entry);
1606 u32 pde0_bits = 1562 u32 pde0_bits =
1607 nvgpu_aperture_mask(g, &entry->mem, 1563 nvgpu_aperture_mask(g, &pd->mem,
1608 gmmu_pde_aperture_big_sys_mem_ncoh_f(), 1564 gmmu_pde_aperture_big_sys_mem_ncoh_f(),
1609 gmmu_pde_aperture_big_video_memory_f()) | 1565 gmmu_pde_aperture_big_video_memory_f()) |
1610 gmmu_pde_address_big_sys_f( 1566 gmmu_pde_address_big_sys_f(
1611 (u32)(pte_addr >> gmmu_pde_address_shift_v())); 1567 (u32)(addr >> gmmu_pde_address_shift_v()));
1612 1568
1613 return pde0_bits; 1569 return pde0_bits;
1614} 1570}
1615 1571
1616static inline u32 small_valid_pde1_bits(struct gk20a *g, 1572static inline u32 small_valid_pde1_bits(struct gk20a *g,
1617 struct gk20a_mm_entry *entry) 1573 struct nvgpu_gmmu_pd *pd, u64 addr)
1618{ 1574{
1619 u64 pte_addr = gk20a_pde_addr(g, entry);
1620 u32 pde1_bits = 1575 u32 pde1_bits =
1621 nvgpu_aperture_mask(g, &entry->mem, 1576 nvgpu_aperture_mask(g, &pd->mem,
1622 gmmu_pde_aperture_small_sys_mem_ncoh_f(), 1577 gmmu_pde_aperture_small_sys_mem_ncoh_f(),
1623 gmmu_pde_aperture_small_video_memory_f()) | 1578 gmmu_pde_aperture_small_video_memory_f()) |
1624 gmmu_pde_vol_small_true_f() | /* tbd: why? */ 1579 gmmu_pde_vol_small_true_f() | /* tbd: why? */
1625 gmmu_pde_address_small_sys_f( 1580 gmmu_pde_address_small_sys_f(
1626 (u32)(pte_addr >> gmmu_pde_address_shift_v())); 1581 (u32)(addr >> gmmu_pde_address_shift_v()));
1627 1582
1628 return pde1_bits; 1583 return pde1_bits;
1629} 1584}
1630 1585
1631/* Given the current state of the ptes associated with a pde, 1586static void update_gmmu_pde_locked(struct vm_gk20a *vm,
1632 determine value and write it out. There's no checking 1587 const struct gk20a_mmu_level *l,
1633 here to determine whether or not a change was actually 1588 struct nvgpu_gmmu_pd *pd,
1634 made. So, superfluous updates will cause unnecessary 1589 u32 pd_idx,
1635 pde invalidations. 1590 u64 virt_addr,
1636*/ 1591 u64 phys_addr,
1637static int update_gmmu_pde_locked(struct vm_gk20a *vm, 1592 struct nvgpu_gmmu_attrs *attrs)
1638 struct gk20a_mm_entry *pte,
1639 u32 i, u32 gmmu_pgsz_idx,
1640 struct scatterlist **sgl,
1641 u64 *offset,
1642 u64 *iova,
1643 u32 kind_v, u64 *ctag,
1644 bool cacheable, bool unammped_pte,
1645 int rw_flag, bool sparse, bool priv,
1646 enum nvgpu_aperture aperture)
1647{ 1593{
1648 struct gk20a *g = gk20a_from_vm(vm); 1594 struct gk20a *g = gk20a_from_vm(vm);
1649 bool small_valid, big_valid; 1595 bool small_valid, big_valid;
1650 struct gk20a_mm_entry *entry = vm->pdb.entries + i; 1596 u32 pd_offset = pd_offset_from_index(l, pd_idx);
1651 u32 pde_v[2] = {0, 0}; 1597 u32 pde_v[2] = {0, 0};
1652 u32 pde;
1653 1598
1654 gk20a_dbg_fn(""); 1599 small_valid = attrs->pgsz == gmmu_page_size_small;
1655 1600 big_valid = attrs->pgsz == gmmu_page_size_big;
1656 small_valid = entry->mem.size && entry->pgsz == gmmu_page_size_small;
1657 big_valid = entry->mem.size && entry->pgsz == gmmu_page_size_big;
1658 1601
1659 pde_v[0] = gmmu_pde_size_full_f(); 1602 pde_v[0] = gmmu_pde_size_full_f();
1660 pde_v[0] |= big_valid ? 1603 pde_v[0] |= big_valid ?
1661 big_valid_pde0_bits(g, entry) : 1604 big_valid_pde0_bits(g, pd, phys_addr) :
1662 gmmu_pde_aperture_big_invalid_f(); 1605 gmmu_pde_aperture_big_invalid_f();
1663 1606
1664 pde_v[1] |= (small_valid ? 1607 pde_v[1] |= (small_valid ? small_valid_pde1_bits(g, pd, phys_addr) :
1665 small_valid_pde1_bits(g, entry) :
1666 (gmmu_pde_aperture_small_invalid_f() | 1608 (gmmu_pde_aperture_small_invalid_f() |
1667 gmmu_pde_vol_small_false_f())) 1609 gmmu_pde_vol_small_false_f()))
1668 | 1610 |
1669 (big_valid ? (gmmu_pde_vol_big_true_f()) : 1611 (big_valid ? (gmmu_pde_vol_big_true_f()) :
1670 gmmu_pde_vol_big_false_f()); 1612 gmmu_pde_vol_big_false_f());
1671 1613
1672 pde = pde_from_index(i); 1614 pte_dbg(g, attrs,
1615 "PDE: i=%-4u size=%-2u offs=%-4u pgsz: %c%c | "
1616 "GPU %#-12llx phys %#-12llx "
1617 "[0x%08x, 0x%08x]",
1618 pd_idx, l->entry_size, pd_offset,
1619 small_valid ? 'S' : '-',
1620 big_valid ? 'B' : '-',
1621 virt_addr, phys_addr,
1622 pde_v[1], pde_v[0]);
1673 1623
1674 gk20a_pde_wr32(g, &vm->pdb, pde + 0, pde_v[0]); 1624 pd_write(g, &vm->pdb, pd_offset + 0, pde_v[0]);
1675 gk20a_pde_wr32(g, &vm->pdb, pde + 1, pde_v[1]); 1625 pd_write(g, &vm->pdb, pd_offset + 1, pde_v[1]);
1626}
1676 1627
1677 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", 1628static void __update_pte_sparse(u32 *pte_w)
1678 i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); 1629{
1679 return 0; 1630 pte_w[0] = gmmu_pte_valid_false_f();
1631 pte_w[1] |= gmmu_pte_vol_true_f();
1680} 1632}
1681 1633
1682static int update_gmmu_pte_locked(struct vm_gk20a *vm, 1634static void __update_pte(struct vm_gk20a *vm,
1683 struct gk20a_mm_entry *pte, 1635 u32 *pte_w,
1684 u32 i, u32 gmmu_pgsz_idx, 1636 u64 phys_addr,
1685 struct scatterlist **sgl, 1637 struct nvgpu_gmmu_attrs *attrs)
1686 u64 *offset,
1687 u64 *iova,
1688 u32 kind_v, u64 *ctag,
1689 bool cacheable, bool unmapped_pte,
1690 int rw_flag, bool sparse, bool priv,
1691 enum nvgpu_aperture aperture)
1692{ 1638{
1693 struct gk20a *g = gk20a_from_vm(vm); 1639 struct gk20a *g = gk20a_from_vm(vm);
1640 u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
1641 u32 pte_valid = attrs->valid ?
1642 gmmu_pte_valid_true_f() :
1643 gmmu_pte_valid_false_f();
1644 u32 phys_shifted = phys_addr >> gmmu_pte_address_shift_v();
1645 u32 addr = attrs->aperture == APERTURE_SYSMEM ?
1646 gmmu_pte_address_sys_f(phys_shifted) :
1647 gmmu_pte_address_vid_f(phys_shifted);
1694 int ctag_shift = ilog2(g->ops.fb.compression_page_size(g)); 1648 int ctag_shift = ilog2(g->ops.fb.compression_page_size(g));
1695 u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx];
1696 u32 pte_w[2] = {0, 0}; /* invalid pte */
1697
1698 if (*iova) {
1699 u32 pte_valid = unmapped_pte ?
1700 gmmu_pte_valid_false_f() :
1701 gmmu_pte_valid_true_f();
1702 u32 iova_v = *iova >> gmmu_pte_address_shift_v();
1703 u32 pte_addr = aperture == APERTURE_SYSMEM ?
1704 gmmu_pte_address_sys_f(iova_v) :
1705 gmmu_pte_address_vid_f(iova_v);
1706
1707 pte_w[0] = pte_valid | pte_addr;
1708
1709 if (priv)
1710 pte_w[0] |= gmmu_pte_privilege_true_f();
1711
1712 pte_w[1] = __nvgpu_aperture_mask(g, aperture,
1713 gmmu_pte_aperture_sys_mem_ncoh_f(),
1714 gmmu_pte_aperture_video_memory_f()) |
1715 gmmu_pte_kind_f(kind_v) |
1716 gmmu_pte_comptagline_f((u32)(*ctag >> ctag_shift));
1717
1718 if (*ctag && vm->mm->use_full_comp_tag_line && *iova & 0x10000)
1719 pte_w[1] |= gmmu_pte_comptagline_f(
1720 1 << (gmmu_pte_comptagline_s() - 1));
1721
1722 if (rw_flag == gk20a_mem_flag_read_only) {
1723 pte_w[0] |= gmmu_pte_read_only_true_f();
1724 pte_w[1] |=
1725 gmmu_pte_write_disable_true_f();
1726 } else if (rw_flag ==
1727 gk20a_mem_flag_write_only) {
1728 pte_w[1] |=
1729 gmmu_pte_read_disable_true_f();
1730 }
1731 if (!unmapped_pte) {
1732 if (!cacheable)
1733 pte_w[1] |=
1734 gmmu_pte_vol_true_f();
1735 } else {
1736 /* Store cacheable value behind
1737 * gmmu_pte_write_disable_true_f */
1738 if (!cacheable)
1739 pte_w[1] |=
1740 gmmu_pte_write_disable_true_f();
1741 }
1742 1649
1743 gk20a_dbg(gpu_dbg_pte, 1650 pte_w[0] = pte_valid | addr;
1744 "pte=%d iova=0x%llx kind=%d ctag=%d vol=%d [0x%08x, 0x%08x]",
1745 i, *iova,
1746 kind_v, (u32)(*ctag >> ctag_shift), !cacheable,
1747 pte_w[1], pte_w[0]);
1748 1651
1749 if (*ctag) 1652 if (attrs->priv)
1750 *ctag += page_size; 1653 pte_w[0] |= gmmu_pte_privilege_true_f();
1751 } else if (sparse) {
1752 pte_w[0] = gmmu_pte_valid_false_f();
1753 pte_w[1] |= gmmu_pte_vol_true_f();
1754 } else {
1755 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i);
1756 }
1757 1654
1758 gk20a_pde_wr32(g, pte, pte_from_index(i) + 0, pte_w[0]); 1655 pte_w[1] = __nvgpu_aperture_mask(g, attrs->aperture,
1759 gk20a_pde_wr32(g, pte, pte_from_index(i) + 1, pte_w[1]); 1656 gmmu_pte_aperture_sys_mem_ncoh_f(),
1760 1657 gmmu_pte_aperture_video_memory_f()) |
1761 if (*iova) { 1658 gmmu_pte_kind_f(attrs->kind_v) |
1762 *iova += page_size; 1659 gmmu_pte_comptagline_f((u32)(attrs->ctag >> ctag_shift));
1763 *offset += page_size; 1660
1764 if (*sgl && *offset + page_size > (*sgl)->length) { 1661 if (attrs->ctag && vm->mm->use_full_comp_tag_line &&
1765 u64 new_iova; 1662 phys_addr & 0x10000)
1766 *sgl = sg_next(*sgl); 1663 pte_w[1] |= gmmu_pte_comptagline_f(
1767 if (*sgl) { 1664 1 << (gmmu_pte_comptagline_s() - 1));
1768 new_iova = sg_phys(*sgl); 1665
1769 gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d", 1666 if (attrs->rw_flag == gk20a_mem_flag_read_only) {
1770 new_iova, (*sgl)->length); 1667 pte_w[0] |= gmmu_pte_read_only_true_f();
1771 if (new_iova) { 1668 pte_w[1] |= gmmu_pte_write_disable_true_f();
1772 *offset = 0; 1669 } else if (attrs->rw_flag == gk20a_mem_flag_write_only) {
1773 *iova = new_iova; 1670 pte_w[1] |= gmmu_pte_read_disable_true_f();
1774 }
1775 }
1776 }
1777 } 1671 }
1778 1672
1779 return 0; 1673 if (!attrs->cacheable)
1674 pte_w[1] |= gmmu_pte_vol_true_f();
1675
1676 if (attrs->ctag)
1677 attrs->ctag += page_size;
1678}
1679
1680static void update_gmmu_pte_locked(struct vm_gk20a *vm,
1681 const struct gk20a_mmu_level *l,
1682 struct nvgpu_gmmu_pd *pd,
1683 u32 pd_idx,
1684 u64 virt_addr,
1685 u64 phys_addr,
1686 struct nvgpu_gmmu_attrs *attrs)
1687{
1688 struct gk20a *g = gk20a_from_vm(vm);
1689 u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
1690 u32 pd_offset = pd_offset_from_index(l, pd_idx);
1691 u32 pte_w[2] = {0, 0};
1692 int ctag_shift = ilog2(g->ops.fb.compression_page_size(g));
1693
1694 if (phys_addr)
1695 __update_pte(vm, pte_w, phys_addr, attrs);
1696 else if (attrs->sparse)
1697 __update_pte_sparse(pte_w);
1698
1699 pte_dbg(g, attrs,
1700 "PTE: i=%-4u size=%-2u offs=%-4u | "
1701 "GPU %#-12llx phys %#-12llx "
1702 "pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c "
1703 "ctag=0x%08x "
1704 "[0x%08x, 0x%08x]",
1705 pd_idx, l->entry_size, pd_offset,
1706 virt_addr, phys_addr,
1707 page_size >> 10,
1708 nvgpu_gmmu_perm_str(attrs->rw_flag),
1709 attrs->kind_v,
1710 nvgpu_aperture_str(attrs->aperture),
1711 attrs->valid ? 'V' : '-',
1712 attrs->cacheable ? 'C' : '-',
1713 attrs->sparse ? 'S' : '-',
1714 attrs->priv ? 'P' : '-',
1715 (u32)attrs->ctag >> ctag_shift,
1716 pte_w[1], pte_w[0]);
1717
1718 pd_write(g, pd, pd_offset + 0, pte_w[0]);
1719 pd_write(g, pd, pd_offset + 1, pte_w[1]);
1780} 1720}
1781 1721
1782/* NOTE! mapped_buffers lock must be held */ 1722/* NOTE! mapped_buffers lock must be held */
@@ -1809,13 +1749,6 @@ void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer,
1809 mapped_buffer->vm_area->sparse : false, 1749 mapped_buffer->vm_area->sparse : false,
1810 batch); 1750 batch);
1811 1751
1812 gk20a_dbg(gpu_dbg_map,
1813 "gv: 0x%04x_%08x pgsz=%-3dKb as=%-2d own_mem_ref=%d",
1814 u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr),
1815 vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
1816 vm_aspace_id(vm),
1817 mapped_buffer->own_mem_ref);
1818
1819 gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->dmabuf, 1752 gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->dmabuf,
1820 mapped_buffer->sgt); 1753 mapped_buffer->sgt);
1821 1754
@@ -1942,6 +1875,9 @@ int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch)
1942 if (err) 1875 if (err)
1943 ch->vm = NULL; 1876 ch->vm = NULL;
1944 1877
1878 nvgpu_log(gk20a_from_vm(vm), gpu_dbg_map, "Binding ch=%d -> VM:%s",
1879 ch->chid, vm->name);
1880
1945 return err; 1881 return err;
1946} 1882}
1947 1883
@@ -2114,7 +2050,7 @@ u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block)
2114 if (g->mm.has_physical_mode) 2050 if (g->mm.has_physical_mode)
2115 addr = gk20a_mem_phys(inst_block); 2051 addr = gk20a_mem_phys(inst_block);
2116 else 2052 else
2117 addr = gk20a_mem_get_base_addr(g, inst_block, 0); 2053 addr = nvgpu_mem_get_base_addr(g, inst_block, 0);
2118 2054
2119 return addr; 2055 return addr;
2120} 2056}
@@ -2237,7 +2173,7 @@ static int gk20a_init_ce_vm(struct mm_gk20a *mm)
2237void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block, 2173void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
2238 struct vm_gk20a *vm) 2174 struct vm_gk20a *vm)
2239{ 2175{
2240 u64 pdb_addr = gk20a_mem_get_base_addr(g, &vm->pdb.mem, 0); 2176 u64 pdb_addr = nvgpu_mem_get_base_addr(g, &vm->pdb.mem, 0);
2241 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); 2177 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
2242 u32 pdb_addr_hi = u64_hi32(pdb_addr); 2178 u32 pdb_addr_hi = u64_hi32(pdb_addr);
2243 2179