diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 658 |
1 files changed, 331 insertions, 327 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 80c766b6..d8bd3e70 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -98,7 +98,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
98 | struct sg_table *sgt, u64 buffer_offset, | 98 | struct sg_table *sgt, u64 buffer_offset, |
99 | u64 first_vaddr, u64 last_vaddr, | 99 | u64 first_vaddr, u64 last_vaddr, |
100 | u8 kind_v, u32 ctag_offset, bool cacheable, | 100 | u8 kind_v, u32 ctag_offset, bool cacheable, |
101 | int rw_flag); | 101 | int rw_flag, |
102 | bool sparse); | ||
102 | static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm); | 103 | static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm); |
103 | static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); | 104 | static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); |
104 | static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm); | 105 | static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm); |
@@ -605,34 +606,46 @@ void unmap_gmmu_pages(struct gk20a_mm_entry *entry) | |||
605 | 606 | ||
606 | static int gk20a_zalloc_gmmu_page_table(struct vm_gk20a *vm, | 607 | static int gk20a_zalloc_gmmu_page_table(struct vm_gk20a *vm, |
607 | enum gmmu_pgsz_gk20a pgsz_idx, | 608 | enum gmmu_pgsz_gk20a pgsz_idx, |
609 | const struct gk20a_mmu_level *l, | ||
608 | struct gk20a_mm_entry *entry) | 610 | struct gk20a_mm_entry *entry) |
609 | { | 611 | { |
610 | int err; | 612 | int err; |
611 | u32 pte_order; | 613 | int order; |
612 | 614 | ||
613 | gk20a_dbg_fn(""); | 615 | gk20a_dbg_fn(""); |
614 | 616 | ||
615 | /* allocate enough pages for the table */ | 617 | /* allocate enough pages for the table */ |
616 | pte_order = vm->page_table_sizing[pgsz_idx].order; | 618 | order = l->hi_bit[pgsz_idx] - l->lo_bit[pgsz_idx] + 1; |
619 | order += ilog2(l->entry_size); | ||
620 | order -= PAGE_SHIFT; | ||
621 | order = max(0, order); | ||
617 | 622 | ||
618 | err = alloc_gmmu_pages(vm, pte_order, entry); | 623 | err = alloc_gmmu_pages(vm, order, entry); |
619 | gk20a_dbg(gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d", | 624 | gk20a_dbg(gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d", |
620 | entry, gk20a_mm_iova_addr(vm->mm->g, entry->sgt->sgl), | 625 | entry, gk20a_mm_iova_addr(vm->mm->g, entry->sgt->sgl), order); |
621 | pte_order); | 626 | if (err) |
627 | return err; | ||
622 | entry->pgsz = pgsz_idx; | 628 | entry->pgsz = pgsz_idx; |
623 | 629 | ||
624 | return err; | 630 | return err; |
625 | } | 631 | } |
626 | 632 | ||
633 | int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm) | ||
634 | { | ||
635 | return vm->mmu_levels[0].lo_bit[0]; | ||
636 | } | ||
637 | |||
627 | /* given address range (inclusive) determine the pdes crossed */ | 638 | /* given address range (inclusive) determine the pdes crossed */ |
628 | void pde_range_from_vaddr_range(struct vm_gk20a *vm, | 639 | void pde_range_from_vaddr_range(struct vm_gk20a *vm, |
629 | u64 addr_lo, u64 addr_hi, | 640 | u64 addr_lo, u64 addr_hi, |
630 | u32 *pde_lo, u32 *pde_hi) | 641 | u32 *pde_lo, u32 *pde_hi) |
631 | { | 642 | { |
632 | *pde_lo = (u32)(addr_lo >> vm->pde_stride_shift); | 643 | int pde_shift = gk20a_mm_pde_coverage_bit_count(vm); |
633 | *pde_hi = (u32)(addr_hi >> vm->pde_stride_shift); | 644 | |
645 | *pde_lo = (u32)(addr_lo >> pde_shift); | ||
646 | *pde_hi = (u32)(addr_hi >> pde_shift); | ||
634 | gk20a_dbg(gpu_dbg_pte, "addr_lo=0x%llx addr_hi=0x%llx pde_ss=%d", | 647 | gk20a_dbg(gpu_dbg_pte, "addr_lo=0x%llx addr_hi=0x%llx pde_ss=%d", |
635 | addr_lo, addr_hi, vm->pde_stride_shift); | 648 | addr_lo, addr_hi, pde_shift); |
636 | gk20a_dbg(gpu_dbg_pte, "pde_lo=%d pde_hi=%d", | 649 | gk20a_dbg(gpu_dbg_pte, "pde_lo=%d pde_hi=%d", |
637 | *pde_lo, *pde_hi); | 650 | *pde_lo, *pde_hi); |
638 | } | 651 | } |
@@ -647,7 +660,7 @@ u32 pte_index_from_vaddr(struct vm_gk20a *vm, | |||
647 | { | 660 | { |
648 | u32 ret; | 661 | u32 ret; |
649 | /* mask off pde part */ | 662 | /* mask off pde part */ |
650 | addr = addr & ((((u64)1) << vm->pde_stride_shift) - ((u64)1)); | 663 | addr = addr & ((1ULL << gk20a_mm_pde_coverage_bit_count(vm)) - 1ULL); |
651 | 664 | ||
652 | /* shift over to get pte index. note assumption that pte index | 665 | /* shift over to get pte index. note assumption that pte index |
653 | * doesn't leak over into the high 32b */ | 666 | * doesn't leak over into the high 32b */ |
@@ -657,57 +670,6 @@ u32 pte_index_from_vaddr(struct vm_gk20a *vm, | |||
657 | return ret; | 670 | return ret; |
658 | } | 671 | } |
659 | 672 | ||
660 | static inline void pte_space_page_offset_from_index(u32 i, u32 *pte_page, | ||
661 | u32 *pte_offset) | ||
662 | { | ||
663 | /* ptes are 8B regardless of pagesize */ | ||
664 | /* pte space pages are 4KB. so 512 ptes per 4KB page*/ | ||
665 | *pte_page = i >> 9; | ||
666 | |||
667 | /* this offset is a pte offset, not a byte offset */ | ||
668 | *pte_offset = i & ((1<<9)-1); | ||
669 | |||
670 | gk20a_dbg(gpu_dbg_pte, "i=0x%x pte_page=0x%x pte_offset=0x%x", | ||
671 | i, *pte_page, *pte_offset); | ||
672 | } | ||
673 | |||
674 | |||
675 | /* | ||
676 | * given a pde index/page table number make sure it has | ||
677 | * backing store and if not go ahead allocate it and | ||
678 | * record it in the appropriate pde | ||
679 | */ | ||
680 | int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm, | ||
681 | u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx) | ||
682 | { | ||
683 | int err; | ||
684 | struct gk20a_mm_entry *entry = vm->pdb.entries + i; | ||
685 | |||
686 | gk20a_dbg_fn(""); | ||
687 | |||
688 | /* if it's already in place it's valid */ | ||
689 | if (entry->size) | ||
690 | return 0; | ||
691 | |||
692 | gk20a_dbg(gpu_dbg_pte, "alloc %dKB ptes for pde %d", | ||
693 | vm->gmmu_page_sizes[gmmu_pgsz_idx]/1024, i); | ||
694 | |||
695 | err = gk20a_zalloc_gmmu_page_table(vm, gmmu_pgsz_idx, entry); | ||
696 | if (err) | ||
697 | return err; | ||
698 | |||
699 | /* rewrite pde */ | ||
700 | err = map_gmmu_pages(&vm->pdb); | ||
701 | if (err) | ||
702 | return err; | ||
703 | |||
704 | update_gmmu_pde_locked(vm, i); | ||
705 | |||
706 | unmap_gmmu_pages(&vm->pdb); | ||
707 | |||
708 | return 0; | ||
709 | } | ||
710 | |||
711 | static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm, | 673 | static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm, |
712 | u64 addr) | 674 | u64 addr) |
713 | { | 675 | { |
@@ -1117,11 +1079,11 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | |||
1117 | u32 ctag_offset, | 1079 | u32 ctag_offset, |
1118 | u32 flags, | 1080 | u32 flags, |
1119 | int rw_flag, | 1081 | int rw_flag, |
1120 | bool clear_ctags) | 1082 | bool clear_ctags, |
1083 | bool sparse) | ||
1121 | { | 1084 | { |
1122 | int err = 0, i = 0; | 1085 | int err = 0; |
1123 | bool allocated = false; | 1086 | bool allocated = false; |
1124 | u32 pde_lo, pde_hi; | ||
1125 | struct device *d = dev_from_vm(vm); | 1087 | struct device *d = dev_from_vm(vm); |
1126 | struct gk20a *g = gk20a_from_vm(vm); | 1088 | struct gk20a *g = gk20a_from_vm(vm); |
1127 | int ctag_granularity = g->ops.fb.compression_page_size(g); | 1089 | int ctag_granularity = g->ops.fb.compression_page_size(g); |
@@ -1146,31 +1108,16 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | |||
1146 | allocated = true; | 1108 | allocated = true; |
1147 | } | 1109 | } |
1148 | 1110 | ||
1149 | pde_range_from_vaddr_range(vm, | ||
1150 | map_offset, | ||
1151 | map_offset + size - 1, | ||
1152 | &pde_lo, &pde_hi); | ||
1153 | |||
1154 | /* mark the addr range valid (but with 0 phys addr, which will fault) */ | ||
1155 | for (i = pde_lo; i <= pde_hi; i++) { | ||
1156 | err = validate_gmmu_page_table_gk20a_locked(vm, i, | ||
1157 | pgsz_idx); | ||
1158 | if (err) { | ||
1159 | gk20a_err(d, "failed to validate page table %d: %d", | ||
1160 | i, err); | ||
1161 | goto fail_validate; | ||
1162 | } | ||
1163 | } | ||
1164 | |||
1165 | err = update_gmmu_ptes_locked(vm, pgsz_idx, | 1111 | err = update_gmmu_ptes_locked(vm, pgsz_idx, |
1166 | sgt, | 1112 | sgt, |
1167 | buffer_offset, | 1113 | buffer_offset, |
1168 | map_offset, map_offset + size - 1, | 1114 | map_offset, map_offset + size, |
1169 | kind_v, | 1115 | kind_v, |
1170 | ctag_offset, | 1116 | ctag_offset, |
1171 | flags & | 1117 | flags & |
1172 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | 1118 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, |
1173 | rw_flag); | 1119 | rw_flag, |
1120 | sparse); | ||
1174 | if (err) { | 1121 | if (err) { |
1175 | gk20a_err(d, "failed to update ptes on map"); | 1122 | gk20a_err(d, "failed to update ptes on map"); |
1176 | goto fail_validate; | 1123 | goto fail_validate; |
@@ -1192,7 +1139,8 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, | |||
1192 | u64 size, | 1139 | u64 size, |
1193 | int pgsz_idx, | 1140 | int pgsz_idx, |
1194 | bool va_allocated, | 1141 | bool va_allocated, |
1195 | int rw_flag) | 1142 | int rw_flag, |
1143 | bool sparse) | ||
1196 | { | 1144 | { |
1197 | int err = 0; | 1145 | int err = 0; |
1198 | struct gk20a *g = gk20a_from_vm(vm); | 1146 | struct gk20a *g = gk20a_from_vm(vm); |
@@ -1212,9 +1160,10 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, | |||
1212 | NULL, /* n/a for unmap */ | 1160 | NULL, /* n/a for unmap */ |
1213 | 0, | 1161 | 0, |
1214 | vaddr, | 1162 | vaddr, |
1215 | vaddr + size - 1, | 1163 | vaddr + size, |
1216 | 0, 0, false /* n/a for unmap */, | 1164 | 0, 0, false /* n/a for unmap */, |
1217 | rw_flag); | 1165 | rw_flag, |
1166 | sparse); | ||
1218 | if (err) | 1167 | if (err) |
1219 | dev_err(dev_from_vm(vm), | 1168 | dev_err(dev_from_vm(vm), |
1220 | "failed to update gmmu ptes on unmap"); | 1169 | "failed to update gmmu ptes on unmap"); |
@@ -1439,7 +1388,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1439 | bfr.kind_v, | 1388 | bfr.kind_v, |
1440 | bfr.ctag_offset, | 1389 | bfr.ctag_offset, |
1441 | flags, rw_flag, | 1390 | flags, rw_flag, |
1442 | clear_ctags); | 1391 | clear_ctags, |
1392 | false); | ||
1443 | if (!map_offset) | 1393 | if (!map_offset) |
1444 | goto clean_up; | 1394 | goto clean_up; |
1445 | 1395 | ||
@@ -1555,7 +1505,7 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm, | |||
1555 | 0, /* page size index = 0 i.e. SZ_4K */ | 1505 | 0, /* page size index = 0 i.e. SZ_4K */ |
1556 | 0, /* kind */ | 1506 | 0, /* kind */ |
1557 | 0, /* ctag_offset */ | 1507 | 0, /* ctag_offset */ |
1558 | flags, rw_flag, false); | 1508 | flags, rw_flag, false, false); |
1559 | mutex_unlock(&vm->update_gmmu_lock); | 1509 | mutex_unlock(&vm->update_gmmu_lock); |
1560 | if (!vaddr) { | 1510 | if (!vaddr) { |
1561 | gk20a_err(dev_from_vm(vm), "failed to allocate va space"); | 1511 | gk20a_err(dev_from_vm(vm), "failed to allocate va space"); |
@@ -1642,7 +1592,8 @@ void gk20a_gmmu_unmap(struct vm_gk20a *vm, | |||
1642 | size, | 1592 | size, |
1643 | 0, /* page size 4K */ | 1593 | 0, /* page size 4K */ |
1644 | true, /*va_allocated */ | 1594 | true, /*va_allocated */ |
1645 | rw_flag); | 1595 | rw_flag, |
1596 | false); | ||
1646 | mutex_unlock(&vm->update_gmmu_lock); | 1597 | mutex_unlock(&vm->update_gmmu_lock); |
1647 | } | 1598 | } |
1648 | 1599 | ||
@@ -1748,157 +1699,6 @@ u64 gk20a_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl) | |||
1748 | return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); | 1699 | return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); |
1749 | } | 1700 | } |
1750 | 1701 | ||
1751 | static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | ||
1752 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
1753 | struct sg_table *sgt, | ||
1754 | u64 buffer_offset, | ||
1755 | u64 first_vaddr, u64 last_vaddr, | ||
1756 | u8 kind_v, u32 ctag_offset, | ||
1757 | bool cacheable, | ||
1758 | int rw_flag) | ||
1759 | { | ||
1760 | int err; | ||
1761 | u32 pde_lo, pde_hi, pde_i; | ||
1762 | struct scatterlist *cur_chunk; | ||
1763 | unsigned int cur_offset; | ||
1764 | u32 pte_w[2] = {0, 0}; /* invalid pte */ | ||
1765 | struct gk20a *g = gk20a_from_vm(vm); | ||
1766 | u32 ctag_granularity = g->ops.fb.compression_page_size(g); | ||
1767 | u32 ctag = ctag_offset * ctag_granularity; | ||
1768 | u32 ctag_incr; | ||
1769 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; | ||
1770 | u64 addr = 0; | ||
1771 | u64 space_to_skip = buffer_offset; | ||
1772 | |||
1773 | pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr, | ||
1774 | &pde_lo, &pde_hi); | ||
1775 | |||
1776 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d", | ||
1777 | pgsz_idx, pde_lo, pde_hi); | ||
1778 | |||
1779 | ctag_incr = ctag_offset ? page_size : 0; | ||
1780 | |||
1781 | cur_offset = 0; | ||
1782 | if (sgt) { | ||
1783 | cur_chunk = sgt->sgl; | ||
1784 | /* space_to_skip must be page aligned */ | ||
1785 | BUG_ON(space_to_skip & (page_size - 1)); | ||
1786 | |||
1787 | while (space_to_skip > 0 && cur_chunk) { | ||
1788 | u64 new_addr = gk20a_mm_iova_addr(vm->mm->g, cur_chunk); | ||
1789 | if (new_addr) { | ||
1790 | addr = new_addr; | ||
1791 | addr += cur_offset; | ||
1792 | } | ||
1793 | cur_offset += page_size; | ||
1794 | addr += page_size; | ||
1795 | while (cur_chunk && | ||
1796 | cur_offset >= cur_chunk->length) { | ||
1797 | cur_offset -= cur_chunk->length; | ||
1798 | cur_chunk = sg_next(cur_chunk); | ||
1799 | } | ||
1800 | space_to_skip -= page_size; | ||
1801 | } | ||
1802 | } | ||
1803 | else | ||
1804 | cur_chunk = NULL; | ||
1805 | |||
1806 | for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) { | ||
1807 | u32 pte_lo, pte_hi; | ||
1808 | u32 pte_cur; | ||
1809 | |||
1810 | struct gk20a_mm_entry *entry = vm->pdb.entries + pde_i; | ||
1811 | |||
1812 | if (pde_i == pde_lo) | ||
1813 | pte_lo = pte_index_from_vaddr(vm, first_vaddr, | ||
1814 | pgsz_idx); | ||
1815 | else | ||
1816 | pte_lo = 0; | ||
1817 | |||
1818 | if ((pde_i != pde_hi) && (pde_hi != pde_lo)) | ||
1819 | pte_hi = vm->page_table_sizing[pgsz_idx].num_ptes-1; | ||
1820 | else | ||
1821 | pte_hi = pte_index_from_vaddr(vm, last_vaddr, | ||
1822 | pgsz_idx); | ||
1823 | |||
1824 | /* get cpu access to the ptes */ | ||
1825 | err = map_gmmu_pages(entry); | ||
1826 | if (err) { | ||
1827 | gk20a_err(dev_from_vm(vm), | ||
1828 | "couldn't map ptes for update as=%d", | ||
1829 | vm_aspace_id(vm)); | ||
1830 | goto clean_up; | ||
1831 | } | ||
1832 | |||
1833 | gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); | ||
1834 | for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { | ||
1835 | if (likely(sgt)) { | ||
1836 | u64 new_addr = gk20a_mm_iova_addr(vm->mm->g, | ||
1837 | cur_chunk); | ||
1838 | if (new_addr) { | ||
1839 | addr = new_addr; | ||
1840 | addr += cur_offset; | ||
1841 | } | ||
1842 | pte_w[0] = gmmu_pte_valid_true_f() | | ||
1843 | gmmu_pte_address_sys_f(addr | ||
1844 | >> gmmu_pte_address_shift_v()); | ||
1845 | pte_w[1] = gmmu_pte_aperture_video_memory_f() | | ||
1846 | gmmu_pte_kind_f(kind_v) | | ||
1847 | gmmu_pte_comptagline_f(ctag | ||
1848 | / ctag_granularity); | ||
1849 | |||
1850 | if (rw_flag == gk20a_mem_flag_read_only) { | ||
1851 | pte_w[0] |= gmmu_pte_read_only_true_f(); | ||
1852 | pte_w[1] |= | ||
1853 | gmmu_pte_write_disable_true_f(); | ||
1854 | } else if (rw_flag == | ||
1855 | gk20a_mem_flag_write_only) { | ||
1856 | pte_w[1] |= | ||
1857 | gmmu_pte_read_disable_true_f(); | ||
1858 | } | ||
1859 | if (!cacheable) | ||
1860 | pte_w[1] |= gmmu_pte_vol_true_f(); | ||
1861 | |||
1862 | gk20a_dbg(gpu_dbg_pte, "pte_cur=%d addr=0x%x,%08x kind=%d" | ||
1863 | " ctag=%d vol=%d" | ||
1864 | " [0x%08x,0x%08x]", | ||
1865 | pte_cur, hi32(addr), lo32(addr), | ||
1866 | kind_v, ctag, !cacheable, | ||
1867 | pte_w[1], pte_w[0]); | ||
1868 | ctag += ctag_incr; | ||
1869 | cur_offset += page_size; | ||
1870 | addr += page_size; | ||
1871 | while (cur_chunk && | ||
1872 | cur_offset >= cur_chunk->length) { | ||
1873 | cur_offset -= cur_chunk->length; | ||
1874 | cur_chunk = sg_next(cur_chunk); | ||
1875 | } | ||
1876 | |||
1877 | } else { | ||
1878 | gk20a_dbg(gpu_dbg_pte, | ||
1879 | "pte_cur=%d [0x0,0x0]", | ||
1880 | pte_cur); | ||
1881 | } | ||
1882 | |||
1883 | gk20a_mem_wr32(entry->cpu_va + pte_cur*8, 0, pte_w[0]); | ||
1884 | gk20a_mem_wr32(entry->cpu_va + pte_cur*8, 1, pte_w[1]); | ||
1885 | } | ||
1886 | |||
1887 | unmap_gmmu_pages(entry); | ||
1888 | } | ||
1889 | |||
1890 | smp_mb(); | ||
1891 | |||
1892 | return 0; | ||
1893 | |||
1894 | clean_up: | ||
1895 | /*TBD: potentially rewrite above to pre-map everything it needs to | ||
1896 | * as that's the only way it can fail */ | ||
1897 | return err; | ||
1898 | |||
1899 | } | ||
1900 | |||
1901 | |||
1902 | /* for gk20a the "video memory" apertures here are misnomers. */ | 1702 | /* for gk20a the "video memory" apertures here are misnomers. */ |
1903 | static inline u32 big_valid_pde0_bits(u64 pte_addr) | 1703 | static inline u32 big_valid_pde0_bits(u64 pte_addr) |
1904 | { | 1704 | { |
@@ -1908,6 +1708,7 @@ static inline u32 big_valid_pde0_bits(u64 pte_addr) | |||
1908 | (u32)(pte_addr >> gmmu_pde_address_shift_v())); | 1708 | (u32)(pte_addr >> gmmu_pde_address_shift_v())); |
1909 | return pde0_bits; | 1709 | return pde0_bits; |
1910 | } | 1710 | } |
1711 | |||
1911 | static inline u32 small_valid_pde1_bits(u64 pte_addr) | 1712 | static inline u32 small_valid_pde1_bits(u64 pte_addr) |
1912 | { | 1713 | { |
1913 | u32 pde1_bits = | 1714 | u32 pde1_bits = |
@@ -1924,10 +1725,15 @@ static inline u32 small_valid_pde1_bits(u64 pte_addr) | |||
1924 | made. So, superfluous updates will cause unnecessary | 1725 | made. So, superfluous updates will cause unnecessary |
1925 | pde invalidations. | 1726 | pde invalidations. |
1926 | */ | 1727 | */ |
1927 | void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i) | 1728 | int update_gmmu_pde_locked(struct vm_gk20a *vm, |
1729 | struct gk20a_mm_entry *pte, | ||
1730 | u32 i, u32 gmmu_pgsz_idx, | ||
1731 | u64 iova, | ||
1732 | u32 kind_v, u32 *ctag, | ||
1733 | bool cacheable, int rw_flag, bool sparse) | ||
1928 | { | 1734 | { |
1929 | bool small_valid, big_valid; | 1735 | bool small_valid, big_valid; |
1930 | u64 pte_addr[2] = {0, 0}; | 1736 | u64 pte_addr_small = 0, pte_addr_big = 0; |
1931 | struct gk20a_mm_entry *entry = vm->pdb.entries + i; | 1737 | struct gk20a_mm_entry *entry = vm->pdb.entries + i; |
1932 | u32 pde_v[2] = {0, 0}; | 1738 | u32 pde_v[2] = {0, 0}; |
1933 | u32 *pde; | 1739 | u32 *pde; |
@@ -1938,44 +1744,227 @@ void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i) | |||
1938 | big_valid = entry->size && entry->pgsz == gmmu_page_size_big; | 1744 | big_valid = entry->size && entry->pgsz == gmmu_page_size_big; |
1939 | 1745 | ||
1940 | if (small_valid) | 1746 | if (small_valid) |
1941 | pte_addr[gmmu_page_size_small] = | 1747 | pte_addr_small = gk20a_mm_iova_addr(vm->mm->g, entry->sgt->sgl); |
1942 | gk20a_mm_iova_addr(vm->mm->g, entry->sgt->sgl); | ||
1943 | 1748 | ||
1944 | if (big_valid) | 1749 | if (big_valid) |
1945 | pte_addr[gmmu_page_size_big] = | 1750 | pte_addr_big = gk20a_mm_iova_addr(vm->mm->g, entry->sgt->sgl); |
1946 | gk20a_mm_iova_addr(vm->mm->g, entry->sgt->sgl); | ||
1947 | 1751 | ||
1948 | pde_v[0] = gmmu_pde_size_full_f(); | 1752 | pde_v[0] = gmmu_pde_size_full_f(); |
1949 | pde_v[0] |= big_valid ? | 1753 | pde_v[0] |= big_valid ? big_valid_pde0_bits(pte_addr_big) : |
1950 | big_valid_pde0_bits(pte_addr[gmmu_page_size_big]) | ||
1951 | : | ||
1952 | (gmmu_pde_aperture_big_invalid_f()); | 1754 | (gmmu_pde_aperture_big_invalid_f()); |
1953 | 1755 | ||
1954 | pde_v[1] |= (small_valid ? | 1756 | pde_v[1] |= (small_valid ? |
1955 | small_valid_pde1_bits(pte_addr[gmmu_page_size_small]) | 1757 | small_valid_pde1_bits(pte_addr_small) : |
1956 | : | ||
1957 | (gmmu_pde_aperture_small_invalid_f() | | 1758 | (gmmu_pde_aperture_small_invalid_f() | |
1958 | gmmu_pde_vol_small_false_f()) | 1759 | gmmu_pde_vol_small_false_f())) |
1959 | ) | 1760 | | |
1960 | | | 1761 | (big_valid ? (gmmu_pde_vol_big_true_f()) : |
1961 | (big_valid ? (gmmu_pde_vol_big_true_f()) : | 1762 | gmmu_pde_vol_big_false_f()); |
1962 | gmmu_pde_vol_big_false_f()); | ||
1963 | 1763 | ||
1964 | pde = pde_from_index(vm, i); | 1764 | pde = pde_from_index(vm, i); |
1965 | 1765 | ||
1966 | gk20a_mem_wr32(pde, 0, pde_v[0]); | 1766 | gk20a_mem_wr32(pde, 0, pde_v[0]); |
1967 | gk20a_mem_wr32(pde, 1, pde_v[1]); | 1767 | gk20a_mem_wr32(pde, 1, pde_v[1]); |
1968 | 1768 | ||
1969 | smp_mb(); | 1769 | gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", |
1770 | i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); | ||
1771 | return 0; | ||
1772 | } | ||
1773 | |||
1774 | int update_gmmu_pte_locked(struct vm_gk20a *vm, | ||
1775 | struct gk20a_mm_entry *pte, | ||
1776 | u32 i, u32 gmmu_pgsz_idx, | ||
1777 | u64 iova, | ||
1778 | u32 kind_v, u32 *ctag, | ||
1779 | bool cacheable, int rw_flag, bool sparse) | ||
1780 | { | ||
1781 | struct gk20a *g = gk20a_from_vm(vm); | ||
1782 | u32 ctag_granularity = g->ops.fb.compression_page_size(g); | ||
1783 | u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; | ||
1784 | u32 pte_w[2] = {0, 0}; /* invalid pte */ | ||
1785 | |||
1786 | if (iova) { | ||
1787 | pte_w[0] = gmmu_pte_valid_true_f() | | ||
1788 | gmmu_pte_address_sys_f(iova | ||
1789 | >> gmmu_pte_address_shift_v()); | ||
1790 | pte_w[1] = gmmu_pte_aperture_video_memory_f() | | ||
1791 | gmmu_pte_kind_f(kind_v) | | ||
1792 | gmmu_pte_comptagline_f(*ctag / ctag_granularity); | ||
1793 | |||
1794 | if (rw_flag == gk20a_mem_flag_read_only) { | ||
1795 | pte_w[0] |= gmmu_pte_read_only_true_f(); | ||
1796 | pte_w[1] |= | ||
1797 | gmmu_pte_write_disable_true_f(); | ||
1798 | } else if (rw_flag == | ||
1799 | gk20a_mem_flag_write_only) { | ||
1800 | pte_w[1] |= | ||
1801 | gmmu_pte_read_disable_true_f(); | ||
1802 | } | ||
1803 | if (!cacheable) | ||
1804 | pte_w[1] |= gmmu_pte_vol_true_f(); | ||
1805 | |||
1806 | gk20a_dbg(gpu_dbg_pte, | ||
1807 | "pte=%d iova=0x%llx kind=%d ctag=%d vol=%d [0x%08x, 0x%08x]", | ||
1808 | i, iova, | ||
1809 | kind_v, *ctag, !cacheable, | ||
1810 | pte_w[1], pte_w[0]); | ||
1811 | |||
1812 | if (*ctag) | ||
1813 | *ctag += page_size; | ||
1814 | } else if (sparse) { | ||
1815 | pte_w[0] = gmmu_pte_valid_false_f(); | ||
1816 | pte_w[1] |= gmmu_pte_vol_true_f(); | ||
1817 | } else { | ||
1818 | gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); | ||
1819 | } | ||
1820 | |||
1821 | gk20a_mem_wr32(pte->cpu_va + i*8, 0, pte_w[0]); | ||
1822 | gk20a_mem_wr32(pte->cpu_va + i*8, 1, pte_w[1]); | ||
1823 | |||
1824 | return 0; | ||
1825 | } | ||
1826 | |||
1827 | static int update_gmmu_level_locked(struct vm_gk20a *vm, | ||
1828 | struct gk20a_mm_entry *pte, | ||
1829 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
1830 | u64 iova, | ||
1831 | u64 gpu_va, u64 gpu_end, | ||
1832 | u8 kind_v, u32 *ctag, | ||
1833 | bool cacheable, | ||
1834 | int rw_flag, | ||
1835 | bool sparse, | ||
1836 | int lvl) | ||
1837 | { | ||
1838 | const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl]; | ||
1839 | const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl+1]; | ||
1840 | int err = 0; | ||
1841 | u32 pde_i; | ||
1842 | u64 pde_size = 1ULL << (u64)l->lo_bit[pgsz_idx]; | ||
1843 | |||
1844 | gk20a_dbg_fn(""); | ||
1845 | |||
1846 | pde_i = (gpu_va & ((1ULL << ((u64)l->hi_bit[pgsz_idx]+1)) - 1ULL)) | ||
1847 | >> (u64)l->lo_bit[pgsz_idx]; | ||
1848 | |||
1849 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, l: %d, [%llx,%llx], iova=%llx", | ||
1850 | pgsz_idx, lvl, gpu_va, gpu_end-1, iova); | ||
1851 | |||
1852 | while (gpu_va < gpu_end) { | ||
1853 | struct gk20a_mm_entry *next_pte = NULL; | ||
1854 | u64 next = min((gpu_va + pde_size) & ~(pde_size-1), gpu_end); | ||
1855 | |||
1856 | /* Allocate next level */ | ||
1857 | if (next_l->update_entry) { | ||
1858 | if (!pte->entries) { | ||
1859 | int num_entries = | ||
1860 | 1 << | ||
1861 | (l->hi_bit[pgsz_idx] | ||
1862 | - l->lo_bit[pgsz_idx]); | ||
1863 | pte->entries = | ||
1864 | kzalloc(sizeof(struct gk20a_mm_entry) * | ||
1865 | num_entries, GFP_KERNEL); | ||
1866 | pte->pgsz = pgsz_idx; | ||
1867 | if (!pte->entries) | ||
1868 | return -ENOMEM; | ||
1869 | } | ||
1870 | next_pte = pte->entries + pde_i; | ||
1871 | |||
1872 | if (!next_pte->size) { | ||
1873 | err = gk20a_zalloc_gmmu_page_table(vm, | ||
1874 | pgsz_idx, next_l, next_pte); | ||
1875 | if (err) | ||
1876 | return err; | ||
1877 | } | ||
1878 | } | ||
1879 | |||
1880 | err = l->update_entry(vm, pte, pde_i, pgsz_idx, | ||
1881 | iova, kind_v, ctag, cacheable, | ||
1882 | rw_flag, sparse); | ||
1883 | if (err) | ||
1884 | return err; | ||
1885 | |||
1886 | if (next_l->update_entry) { | ||
1887 | /* get cpu access to the ptes */ | ||
1888 | err = map_gmmu_pages(next_pte); | ||
1889 | if (err) { | ||
1890 | gk20a_err(dev_from_vm(vm), | ||
1891 | "couldn't map ptes for update as=%d", | ||
1892 | vm_aspace_id(vm)); | ||
1893 | return err; | ||
1894 | } | ||
1895 | err = update_gmmu_level_locked(vm, next_pte, | ||
1896 | pgsz_idx, | ||
1897 | iova, | ||
1898 | gpu_va, | ||
1899 | next, | ||
1900 | kind_v, ctag, | ||
1901 | cacheable, rw_flag, sparse, lvl+1); | ||
1902 | unmap_gmmu_pages(next_pte); | ||
1903 | |||
1904 | if (err) | ||
1905 | return err; | ||
1906 | } | ||
1907 | |||
1908 | if (iova) | ||
1909 | iova += next - gpu_va; | ||
1910 | pde_i++; | ||
1911 | gpu_va = next; | ||
1912 | } | ||
1913 | |||
1914 | gk20a_dbg_fn("done"); | ||
1915 | |||
1916 | return 0; | ||
1917 | } | ||
1918 | |||
1919 | static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | ||
1920 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
1921 | struct sg_table *sgt, | ||
1922 | u64 buffer_offset, | ||
1923 | u64 gpu_va, u64 gpu_end, | ||
1924 | u8 kind_v, u32 ctag_offset, | ||
1925 | bool cacheable, | ||
1926 | int rw_flag, | ||
1927 | bool sparse) | ||
1928 | { | ||
1929 | struct gk20a *g = gk20a_from_vm(vm); | ||
1930 | int ctag_granularity = g->ops.fb.compression_page_size(g); | ||
1931 | u32 ctag = ctag_offset * ctag_granularity; | ||
1932 | u64 iova = 0; | ||
1933 | u64 space_to_skip = buffer_offset; | ||
1934 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; | ||
1935 | int err; | ||
1936 | |||
1937 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, iova=%llx", | ||
1938 | pgsz_idx, | ||
1939 | sgt ? gk20a_mm_iova_addr(vm->mm->g, sgt->sgl) : 0ULL); | ||
1970 | 1940 | ||
1971 | FLUSH_CPU_DCACHE(pde, | 1941 | if (space_to_skip & (page_size - 1)) |
1972 | sg_phys(vm->pdb.sgt->sgl) + (i*gmmu_pde__size_v()), | 1942 | return -EINVAL; |
1973 | sizeof(u32)*2); | 1943 | |
1944 | if (sgt) | ||
1945 | iova = gk20a_mm_iova_addr(vm->mm->g, sgt->sgl) + space_to_skip; | ||
1974 | 1946 | ||
1975 | gk20a_mm_l2_invalidate(vm->mm->g); | 1947 | gk20a_dbg(gpu_dbg_map, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx", |
1948 | pgsz_idx, gpu_va, gpu_end-1, iova); | ||
1949 | err = map_gmmu_pages(&vm->pdb); | ||
1950 | if (err) { | ||
1951 | gk20a_err(dev_from_vm(vm), | ||
1952 | "couldn't map ptes for update as=%d", | ||
1953 | vm_aspace_id(vm)); | ||
1954 | return err; | ||
1955 | } | ||
1956 | err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx, | ||
1957 | iova, | ||
1958 | gpu_va, gpu_end, | ||
1959 | kind_v, &ctag, | ||
1960 | cacheable, rw_flag, sparse, 0); | ||
1961 | unmap_gmmu_pages(&vm->pdb); | ||
1962 | |||
1963 | smp_mb(); | ||
1976 | 1964 | ||
1977 | gk20a_dbg(gpu_dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]); | ||
1978 | gk20a_dbg_fn("done"); | 1965 | gk20a_dbg_fn("done"); |
1966 | |||
1967 | return err; | ||
1979 | } | 1968 | } |
1980 | 1969 | ||
1981 | /* NOTE! mapped_buffers lock must be held */ | 1970 | /* NOTE! mapped_buffers lock must be held */ |
@@ -1984,29 +1973,14 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) | |||
1984 | struct vm_gk20a *vm = mapped_buffer->vm; | 1973 | struct vm_gk20a *vm = mapped_buffer->vm; |
1985 | struct gk20a *g = vm->mm->g; | 1974 | struct gk20a *g = vm->mm->g; |
1986 | 1975 | ||
1987 | if (mapped_buffer->va_node && | 1976 | g->ops.mm.gmmu_unmap(vm, |
1988 | mapped_buffer->va_node->sparse) { | 1977 | mapped_buffer->addr, |
1989 | u64 vaddr = mapped_buffer->addr; | 1978 | mapped_buffer->size, |
1990 | u32 pgsz_idx = mapped_buffer->pgsz_idx; | 1979 | mapped_buffer->pgsz_idx, |
1991 | u32 num_pages = mapped_buffer->size >> | 1980 | mapped_buffer->va_allocated, |
1992 | ilog2(vm->gmmu_page_sizes[pgsz_idx]); | 1981 | gk20a_mem_flag_none, |
1993 | 1982 | mapped_buffer->va_node ? | |
1994 | /* there is little we can do if this fails... */ | 1983 | mapped_buffer->va_node->sparse : false); |
1995 | g->ops.mm.gmmu_unmap(vm, | ||
1996 | mapped_buffer->addr, | ||
1997 | mapped_buffer->size, | ||
1998 | mapped_buffer->pgsz_idx, | ||
1999 | mapped_buffer->va_allocated, | ||
2000 | gk20a_mem_flag_none); | ||
2001 | g->ops.mm.set_sparse(vm, vaddr, | ||
2002 | num_pages, pgsz_idx, false); | ||
2003 | } else | ||
2004 | g->ops.mm.gmmu_unmap(vm, | ||
2005 | mapped_buffer->addr, | ||
2006 | mapped_buffer->size, | ||
2007 | mapped_buffer->pgsz_idx, | ||
2008 | mapped_buffer->va_allocated, | ||
2009 | gk20a_mem_flag_none); | ||
2010 | 1984 | ||
2011 | gk20a_dbg(gpu_dbg_map, "as=%d pgsz=%d gv=0x%x,%08x own_mem_ref=%d", | 1985 | gk20a_dbg(gpu_dbg_map, "as=%d pgsz=%d gv=0x%x,%08x own_mem_ref=%d", |
2012 | vm_aspace_id(vm), | 1986 | vm_aspace_id(vm), |
@@ -2057,7 +2031,7 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm) | |||
2057 | struct vm_reserved_va_node *va_node, *va_node_tmp; | 2031 | struct vm_reserved_va_node *va_node, *va_node_tmp; |
2058 | struct rb_node *node; | 2032 | struct rb_node *node; |
2059 | int i; | 2033 | int i; |
2060 | u32 pde_lo, pde_hi; | 2034 | u32 pde_lo = 0, pde_hi = 0; |
2061 | 2035 | ||
2062 | gk20a_dbg_fn(""); | 2036 | gk20a_dbg_fn(""); |
2063 | mutex_lock(&vm->update_gmmu_lock); | 2037 | mutex_lock(&vm->update_gmmu_lock); |
@@ -2082,7 +2056,8 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm) | |||
2082 | 2056 | ||
2083 | /* unmapping all buffers above may not actually free | 2057 | /* unmapping all buffers above may not actually free |
2084 | * all vm ptes. jettison them here for certain... */ | 2058 | * all vm ptes. jettison them here for certain... */ |
2085 | pde_range_from_vaddr_range(vm, 0, vm->va_limit-1, | 2059 | pde_range_from_vaddr_range(vm, |
2060 | 0, vm->va_limit-1, | ||
2086 | &pde_lo, &pde_hi); | 2061 | &pde_lo, &pde_hi); |
2087 | for (i = 0; i < pde_hi + 1; i++) { | 2062 | for (i = 0; i < pde_hi + 1; i++) { |
2088 | struct gk20a_mm_entry *entry = &vm->pdb.entries[i]; | 2063 | struct gk20a_mm_entry *entry = &vm->pdb.entries[i]; |
@@ -2125,6 +2100,30 @@ void gk20a_vm_put(struct vm_gk20a *vm) | |||
2125 | kref_put(&vm->ref, gk20a_vm_remove_support_kref); | 2100 | kref_put(&vm->ref, gk20a_vm_remove_support_kref); |
2126 | } | 2101 | } |
2127 | 2102 | ||
2103 | const struct gk20a_mmu_level gk20a_mm_levels_64k[] = { | ||
2104 | {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1}, | ||
2105 | .lo_bit = {26, 26}, | ||
2106 | .update_entry = update_gmmu_pde_locked, | ||
2107 | .entry_size = 8}, | ||
2108 | {.hi_bit = {25, 25}, | ||
2109 | .lo_bit = {12, 16}, | ||
2110 | .update_entry = update_gmmu_pte_locked, | ||
2111 | .entry_size = 8}, | ||
2112 | {.update_entry = NULL} | ||
2113 | }; | ||
2114 | |||
2115 | const struct gk20a_mmu_level gk20a_mm_levels_128k[] = { | ||
2116 | {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1}, | ||
2117 | .lo_bit = {27, 27}, | ||
2118 | .update_entry = update_gmmu_pde_locked, | ||
2119 | .entry_size = 8}, | ||
2120 | {.hi_bit = {26, 26}, | ||
2121 | .lo_bit = {12, 17}, | ||
2122 | .update_entry = update_gmmu_pte_locked, | ||
2123 | .entry_size = 8}, | ||
2124 | {.update_entry = NULL} | ||
2125 | }; | ||
2126 | |||
2128 | int gk20a_init_vm(struct mm_gk20a *mm, | 2127 | int gk20a_init_vm(struct mm_gk20a *mm, |
2129 | struct vm_gk20a *vm, | 2128 | struct vm_gk20a *vm, |
2130 | u32 big_page_size, | 2129 | u32 big_page_size, |
@@ -2149,38 +2148,18 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2149 | vm->big_pages = big_pages; | 2148 | vm->big_pages = big_pages; |
2150 | 2149 | ||
2151 | vm->big_page_size = gmmu_page_sizes[gmmu_page_size_big]; | 2150 | vm->big_page_size = gmmu_page_sizes[gmmu_page_size_big]; |
2152 | vm->pde_stride = vm->big_page_size << 10; | ||
2153 | vm->pde_stride_shift = ilog2(vm->pde_stride); | ||
2154 | 2151 | ||
2155 | for (i = 0; i < gmmu_nr_page_sizes; i++) { | 2152 | vm->mmu_levels = vm->mm->g->ops.mm.get_mmu_levels(vm->mm->g, |
2156 | u32 num_ptes, pte_space, num_pages; | 2153 | vm->big_page_size); |
2157 | 2154 | ||
2155 | for (i = 0; i < gmmu_nr_page_sizes; i++) | ||
2158 | vm->gmmu_page_sizes[i] = gmmu_page_sizes[i]; | 2156 | vm->gmmu_page_sizes[i] = gmmu_page_sizes[i]; |
2159 | 2157 | ||
2160 | /* assuming "full" page tables */ | 2158 | gk20a_dbg_info("small page-size (%dKB)", |
2161 | num_ptes = vm->pde_stride / gmmu_page_sizes[i]; | 2159 | vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); |
2162 | |||
2163 | pte_space = num_ptes * gmmu_pte__size_v(); | ||
2164 | /* allocate whole pages */ | ||
2165 | pte_space = roundup(pte_space, PAGE_SIZE); | ||
2166 | 2160 | ||
2167 | num_pages = pte_space / PAGE_SIZE; | 2161 | gk20a_dbg_info("big page-size (%dKB)", |
2168 | /* make sure "order" is viable */ | 2162 | vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); |
2169 | BUG_ON(!is_power_of_2(num_pages)); | ||
2170 | |||
2171 | vm->page_table_sizing[i].num_ptes = num_ptes; | ||
2172 | vm->page_table_sizing[i].order = ilog2(num_pages); | ||
2173 | } | ||
2174 | |||
2175 | gk20a_dbg_info("small page-size (%dKB) pte array: %dKB", | ||
2176 | vm->gmmu_page_sizes[gmmu_page_size_small] >> 10, | ||
2177 | (vm->page_table_sizing[gmmu_page_size_small].num_ptes * | ||
2178 | gmmu_pte__size_v()) >> 10); | ||
2179 | |||
2180 | gk20a_dbg_info("big page-size (%dKB) pte array: %dKB", | ||
2181 | vm->gmmu_page_sizes[gmmu_page_size_big] >> 10, | ||
2182 | (vm->page_table_sizing[gmmu_page_size_big].num_ptes * | ||
2183 | gmmu_pte__size_v()) >> 10); | ||
2184 | 2163 | ||
2185 | pde_range_from_vaddr_range(vm, | 2164 | pde_range_from_vaddr_range(vm, |
2186 | 0, vm->va_limit-1, | 2165 | 0, vm->va_limit-1, |
@@ -2197,7 +2176,7 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2197 | name, vm->va_limit, pde_hi + 1); | 2176 | name, vm->va_limit, pde_hi + 1); |
2198 | 2177 | ||
2199 | /* allocate the page table directory */ | 2178 | /* allocate the page table directory */ |
2200 | err = gk20a_zalloc_gmmu_page_table(vm, 0, &vm->pdb); | 2179 | err = gk20a_zalloc_gmmu_page_table(vm, 0, &vm->mmu_levels[0], &vm->pdb); |
2201 | if (err) | 2180 | if (err) |
2202 | goto clean_up_ptes; | 2181 | goto clean_up_ptes; |
2203 | 2182 | ||
@@ -2382,9 +2361,18 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
2382 | 2361 | ||
2383 | /* mark that we need to use sparse mappings here */ | 2362 | /* mark that we need to use sparse mappings here */ |
2384 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE) { | 2363 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE) { |
2385 | err = g->ops.mm.set_sparse(vm, vaddr_start, args->pages, | 2364 | u64 map_offset = g->ops.mm.gmmu_map(vm, vaddr_start, |
2386 | pgsz_idx, true); | 2365 | NULL, |
2387 | if (err) { | 2366 | 0, |
2367 | va_node->size, | ||
2368 | pgsz_idx, | ||
2369 | 0, | ||
2370 | 0, | ||
2371 | args->flags, | ||
2372 | gk20a_mem_flag_none, | ||
2373 | false, | ||
2374 | true); | ||
2375 | if (!map_offset) { | ||
2388 | mutex_unlock(&vm->update_gmmu_lock); | 2376 | mutex_unlock(&vm->update_gmmu_lock); |
2389 | vma->free(vma, start_page_nr, args->pages, 1); | 2377 | vma->free(vma, start_page_nr, args->pages, 1); |
2390 | kfree(va_node); | 2378 | kfree(va_node); |
@@ -2462,7 +2450,8 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
2462 | va_node->size, | 2450 | va_node->size, |
2463 | va_node->pgsz_idx, | 2451 | va_node->pgsz_idx, |
2464 | true, | 2452 | true, |
2465 | gk20a_mem_flag_none); | 2453 | gk20a_mem_flag_none, |
2454 | true); | ||
2466 | kfree(va_node); | 2455 | kfree(va_node); |
2467 | } | 2456 | } |
2468 | mutex_unlock(&vm->update_gmmu_lock); | 2457 | mutex_unlock(&vm->update_gmmu_lock); |
@@ -2741,13 +2730,25 @@ static int gk20a_init_hwpm(struct mm_gk20a *mm) | |||
2741 | return 0; | 2730 | return 0; |
2742 | } | 2731 | } |
2743 | 2732 | ||
2733 | void gk20a_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr) | ||
2734 | { | ||
2735 | u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); | ||
2736 | u32 pdb_addr_hi = u64_hi32(pdb_addr); | ||
2737 | |||
2738 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), | ||
2739 | ram_in_page_dir_base_target_vid_mem_f() | | ||
2740 | ram_in_page_dir_base_vol_true_f() | | ||
2741 | ram_in_page_dir_base_lo_f(pdb_addr_lo)); | ||
2742 | |||
2743 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), | ||
2744 | ram_in_page_dir_base_hi_f(pdb_addr_hi)); | ||
2745 | } | ||
2746 | |||
2744 | void gk20a_init_inst_block(struct inst_desc *inst_block, struct vm_gk20a *vm, | 2747 | void gk20a_init_inst_block(struct inst_desc *inst_block, struct vm_gk20a *vm, |
2745 | u32 big_page_size) | 2748 | u32 big_page_size) |
2746 | { | 2749 | { |
2747 | struct gk20a *g = gk20a_from_vm(vm); | 2750 | struct gk20a *g = gk20a_from_vm(vm); |
2748 | u64 pde_addr = gk20a_mm_iova_addr(g, vm->pdb.sgt->sgl); | 2751 | u64 pde_addr = gk20a_mm_iova_addr(g, vm->pdb.sgt->sgl); |
2749 | u32 pde_addr_lo = u64_lo32(pde_addr >> ram_in_base_shift_v()); | ||
2750 | u32 pde_addr_hi = u64_hi32(pde_addr); | ||
2751 | phys_addr_t inst_pa = inst_block->cpu_pa; | 2752 | phys_addr_t inst_pa = inst_block->cpu_pa; |
2752 | void *inst_ptr = inst_block->cpuva; | 2753 | void *inst_ptr = inst_block->cpuva; |
2753 | 2754 | ||
@@ -2756,13 +2757,7 @@ void gk20a_init_inst_block(struct inst_desc *inst_block, struct vm_gk20a *vm, | |||
2756 | 2757 | ||
2757 | gk20a_dbg_info("pde pa=0x%llx", (u64)pde_addr); | 2758 | gk20a_dbg_info("pde pa=0x%llx", (u64)pde_addr); |
2758 | 2759 | ||
2759 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), | 2760 | g->ops.mm.init_pdb(g, inst_ptr, pde_addr); |
2760 | ram_in_page_dir_base_target_vid_mem_f() | | ||
2761 | ram_in_page_dir_base_vol_true_f() | | ||
2762 | ram_in_page_dir_base_lo_f(pde_addr_lo)); | ||
2763 | |||
2764 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), | ||
2765 | ram_in_page_dir_base_hi_f(pde_addr_hi)); | ||
2766 | 2761 | ||
2767 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(), | 2762 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(), |
2768 | u64_lo32(vm->va_limit) | 0xFFF); | 2763 | u64_lo32(vm->va_limit) | 0xFFF); |
@@ -3030,6 +3025,13 @@ u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g) | |||
3030 | return 34; | 3025 | return 34; |
3031 | } | 3026 | } |
3032 | 3027 | ||
3028 | const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, | ||
3029 | u32 big_page_size) | ||
3030 | { | ||
3031 | return (big_page_size == SZ_64K) ? | ||
3032 | gk20a_mm_levels_64k : gk20a_mm_levels_128k; | ||
3033 | } | ||
3034 | |||
3033 | void gk20a_init_mm(struct gpu_ops *gops) | 3035 | void gk20a_init_mm(struct gpu_ops *gops) |
3034 | { | 3036 | { |
3035 | gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled; | 3037 | gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled; |
@@ -3043,5 +3045,7 @@ void gk20a_init_mm(struct gpu_ops *gops) | |||
3043 | gops->mm.l2_flush = gk20a_mm_l2_flush; | 3045 | gops->mm.l2_flush = gk20a_mm_l2_flush; |
3044 | gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate; | 3046 | gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate; |
3045 | gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits; | 3047 | gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits; |
3048 | gops->mm.get_mmu_levels = gk20a_mm_get_mmu_levels; | ||
3049 | gops->mm.init_pdb = gk20a_mm_init_pdb; | ||
3046 | } | 3050 | } |
3047 | 3051 | ||