summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2015-03-03 01:42:58 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-04-04 22:02:17 -0400
commit38fc3a48a0c2fbdda4f2e69dc7c4c619a534e468 (patch)
tree580db3c38b292245184e0a8b119cdb0eeeb63369 /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent87ccc6a02f9133805c58ad7b1791baf4f33249f7 (diff)
gpu: nvgpu: add platform specific get_iova_addr()
Add platform specific API pointer (*get_iova_addr)() which can be used to get iova/physical address from given scatterlist and flags Use this API with g->ops.mm.get_iova_addr() instead of calling API gk20a_mm_iova_addr() which makes it platform specific Bug 1605653 Change-Id: I798763db1501bd0b16e84daab68f6093a83caac2 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/713089 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c53
1 files changed, 33 insertions, 20 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 9a61a779..3d2e5450 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -99,7 +99,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
99 u64 first_vaddr, u64 last_vaddr, 99 u64 first_vaddr, u64 last_vaddr,
100 u8 kind_v, u32 ctag_offset, bool cacheable, 100 u8 kind_v, u32 ctag_offset, bool cacheable,
101 bool umapped_pte, int rw_flag, 101 bool umapped_pte, int rw_flag,
102 bool sparse); 102 bool sparse,
103 u32 flags);
103static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm); 104static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm);
104static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); 105static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm);
105static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm); 106static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm);
@@ -610,6 +611,7 @@ static int gk20a_zalloc_gmmu_page_table(struct vm_gk20a *vm,
610{ 611{
611 int err; 612 int err;
612 int order; 613 int order;
614 struct gk20a *g = gk20a_from_vm(vm);
613 615
614 gk20a_dbg_fn(""); 616 gk20a_dbg_fn("");
615 617
@@ -621,7 +623,8 @@ static int gk20a_zalloc_gmmu_page_table(struct vm_gk20a *vm,
621 623
622 err = alloc_gmmu_pages(vm, order, entry); 624 err = alloc_gmmu_pages(vm, order, entry);
623 gk20a_dbg(gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d", 625 gk20a_dbg(gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d",
624 entry, gk20a_mm_iova_addr(vm->mm->g, entry->sgt->sgl), order); 626 entry, g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0),
627 order);
625 if (err) 628 if (err)
626 return err; 629 return err;
627 entry->pgsz = pgsz_idx; 630 entry->pgsz = pgsz_idx;
@@ -1118,7 +1121,8 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
1118 flags & 1121 flags &
1119 NVGPU_GPU_FLAGS_SUPPORT_UNMAPPED_PTE, 1122 NVGPU_GPU_FLAGS_SUPPORT_UNMAPPED_PTE,
1120 rw_flag, 1123 rw_flag,
1121 sparse); 1124 sparse,
1125 flags);
1122 if (err) { 1126 if (err) {
1123 gk20a_err(d, "failed to update ptes on map"); 1127 gk20a_err(d, "failed to update ptes on map");
1124 goto fail_validate; 1128 goto fail_validate;
@@ -1164,7 +1168,7 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
1164 vaddr + size, 1168 vaddr + size,
1165 0, 0, false /* n/a for unmap */, 1169 0, 0, false /* n/a for unmap */,
1166 false, rw_flag, 1170 false, rw_flag,
1167 sparse); 1171 sparse, 0);
1168 if (err) 1172 if (err)
1169 dev_err(dev_from_vm(vm), 1173 dev_err(dev_from_vm(vm),
1170 "failed to update gmmu ptes on unmap"); 1174 "failed to update gmmu ptes on unmap");
@@ -1645,11 +1649,13 @@ dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
1645{ 1649{
1646 struct mapped_buffer_node *buffer; 1650 struct mapped_buffer_node *buffer;
1647 dma_addr_t addr = 0; 1651 dma_addr_t addr = 0;
1652 struct gk20a *g = gk20a_from_vm(vm);
1648 1653
1649 mutex_lock(&vm->update_gmmu_lock); 1654 mutex_lock(&vm->update_gmmu_lock);
1650 buffer = find_mapped_buffer_locked(&vm->mapped_buffers, gpu_vaddr); 1655 buffer = find_mapped_buffer_locked(&vm->mapped_buffers, gpu_vaddr);
1651 if (buffer) 1656 if (buffer)
1652 addr = gk20a_mm_iova_addr(vm->mm->g, buffer->sgt->sgl); 1657 addr = g->ops.mm.get_iova_addr(g, buffer->sgt->sgl,
1658 buffer->flags);
1653 mutex_unlock(&vm->update_gmmu_lock); 1659 mutex_unlock(&vm->update_gmmu_lock);
1654 1660
1655 return addr; 1661 return addr;
@@ -1761,7 +1767,8 @@ u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova)
1761 return iova | 1ULL << g->ops.mm.get_physical_addr_bits(g); 1767 return iova | 1ULL << g->ops.mm.get_physical_addr_bits(g);
1762} 1768}
1763 1769
1764u64 gk20a_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl) 1770u64 gk20a_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl,
1771 u32 flags)
1765{ 1772{
1766 if (!device_is_iommuable(dev_from_gk20a(g))) 1773 if (!device_is_iommuable(dev_from_gk20a(g)))
1767 return sg_phys(sgl); 1774 return sg_phys(sgl);
@@ -1807,8 +1814,9 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm,
1807 u64 iova, 1814 u64 iova,
1808 u32 kind_v, u32 *ctag, 1815 u32 kind_v, u32 *ctag,
1809 bool cacheable, bool unammped_pte, 1816 bool cacheable, bool unammped_pte,
1810 int rw_flag, bool sparse) 1817 int rw_flag, bool sparse, u32 flags)
1811{ 1818{
1819 struct gk20a *g = gk20a_from_vm(vm);
1812 bool small_valid, big_valid; 1820 bool small_valid, big_valid;
1813 u64 pte_addr_small = 0, pte_addr_big = 0; 1821 u64 pte_addr_small = 0, pte_addr_big = 0;
1814 struct gk20a_mm_entry *entry = vm->pdb.entries + i; 1822 struct gk20a_mm_entry *entry = vm->pdb.entries + i;
@@ -1821,10 +1829,10 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm,
1821 big_valid = entry->size && entry->pgsz == gmmu_page_size_big; 1829 big_valid = entry->size && entry->pgsz == gmmu_page_size_big;
1822 1830
1823 if (small_valid) 1831 if (small_valid)
1824 pte_addr_small = gk20a_mm_iova_addr(vm->mm->g, entry->sgt->sgl); 1832 pte_addr_small = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0);
1825 1833
1826 if (big_valid) 1834 if (big_valid)
1827 pte_addr_big = gk20a_mm_iova_addr(vm->mm->g, entry->sgt->sgl); 1835 pte_addr_big = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0);
1828 1836
1829 pde_v[0] = gmmu_pde_size_full_f(); 1837 pde_v[0] = gmmu_pde_size_full_f();
1830 pde_v[0] |= big_valid ? big_valid_pde0_bits(pte_addr_big) : 1838 pde_v[0] |= big_valid ? big_valid_pde0_bits(pte_addr_big) :
@@ -1854,7 +1862,7 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
1854 u64 iova, 1862 u64 iova,
1855 u32 kind_v, u32 *ctag, 1863 u32 kind_v, u32 *ctag,
1856 bool cacheable, bool unmapped_pte, 1864 bool cacheable, bool unmapped_pte,
1857 int rw_flag, bool sparse) 1865 int rw_flag, bool sparse, u32 flags)
1858{ 1866{
1859 struct gk20a *g = gk20a_from_vm(vm); 1867 struct gk20a *g = gk20a_from_vm(vm);
1860 u32 ctag_granularity = g->ops.fb.compression_page_size(g); 1868 u32 ctag_granularity = g->ops.fb.compression_page_size(g);
@@ -1927,7 +1935,8 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
1927 bool cacheable, bool unmapped_pte, 1935 bool cacheable, bool unmapped_pte,
1928 int rw_flag, 1936 int rw_flag,
1929 bool sparse, 1937 bool sparse,
1930 int lvl) 1938 int lvl,
1939 u32 flags)
1931{ 1940{
1932 const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl]; 1941 const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl];
1933 const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl+1]; 1942 const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl+1];
@@ -1973,7 +1982,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
1973 1982
1974 err = l->update_entry(vm, pte, pde_i, pgsz_idx, 1983 err = l->update_entry(vm, pte, pde_i, pgsz_idx,
1975 iova, kind_v, ctag, cacheable, unmapped_pte, 1984 iova, kind_v, ctag, cacheable, unmapped_pte,
1976 rw_flag, sparse); 1985 rw_flag, sparse, flags);
1977 if (err) 1986 if (err)
1978 return err; 1987 return err;
1979 1988
@@ -1992,7 +2001,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
1992 gpu_va, 2001 gpu_va,
1993 next, 2002 next,
1994 kind_v, ctag, cacheable, unmapped_pte, 2003 kind_v, ctag, cacheable, unmapped_pte,
1995 rw_flag, sparse, lvl+1); 2004 rw_flag, sparse, lvl+1, flags);
1996 unmap_gmmu_pages(next_pte); 2005 unmap_gmmu_pages(next_pte);
1997 2006
1998 if (err) 2007 if (err)
@@ -2018,7 +2027,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
2018 u8 kind_v, u32 ctag_offset, 2027 u8 kind_v, u32 ctag_offset,
2019 bool cacheable, bool unmapped_pte, 2028 bool cacheable, bool unmapped_pte,
2020 int rw_flag, 2029 int rw_flag,
2021 bool sparse) 2030 bool sparse,
2031 u32 flags)
2022{ 2032{
2023 struct gk20a *g = gk20a_from_vm(vm); 2033 struct gk20a *g = gk20a_from_vm(vm);
2024 int ctag_granularity = g->ops.fb.compression_page_size(g); 2034 int ctag_granularity = g->ops.fb.compression_page_size(g);
@@ -2030,13 +2040,15 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
2030 2040
2031 gk20a_dbg(gpu_dbg_pte, "size_idx=%d, iova=%llx", 2041 gk20a_dbg(gpu_dbg_pte, "size_idx=%d, iova=%llx",
2032 pgsz_idx, 2042 pgsz_idx,
2033 sgt ? gk20a_mm_iova_addr(vm->mm->g, sgt->sgl) : 0ULL); 2043 sgt ? g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, flags)
2044 : 0ULL);
2034 2045
2035 if (space_to_skip & (page_size - 1)) 2046 if (space_to_skip & (page_size - 1))
2036 return -EINVAL; 2047 return -EINVAL;
2037 2048
2038 if (sgt) 2049 if (sgt)
2039 iova = gk20a_mm_iova_addr(vm->mm->g, sgt->sgl) + space_to_skip; 2050 iova = g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, flags)
2051 + space_to_skip;
2040 2052
2041 gk20a_dbg(gpu_dbg_map, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx", 2053 gk20a_dbg(gpu_dbg_map, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx",
2042 pgsz_idx, gpu_va, gpu_end-1, iova); 2054 pgsz_idx, gpu_va, gpu_end-1, iova);
@@ -2051,7 +2063,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
2051 iova, 2063 iova,
2052 gpu_va, gpu_end, 2064 gpu_va, gpu_end,
2053 kind_v, &ctag, 2065 kind_v, &ctag,
2054 cacheable, unmapped_pte, rw_flag, sparse, 0); 2066 cacheable, unmapped_pte, rw_flag, sparse, 0, flags);
2055 unmap_gmmu_pages(&vm->pdb); 2067 unmap_gmmu_pages(&vm->pdb);
2056 2068
2057 smp_mb(); 2069 smp_mb();
@@ -2824,7 +2836,7 @@ void gk20a_init_inst_block(struct mem_desc *inst_block, struct vm_gk20a *vm,
2824 u32 big_page_size) 2836 u32 big_page_size)
2825{ 2837{
2826 struct gk20a *g = gk20a_from_vm(vm); 2838 struct gk20a *g = gk20a_from_vm(vm);
2827 u64 pde_addr = gk20a_mm_iova_addr(g, vm->pdb.sgt->sgl); 2839 u64 pde_addr = g->ops.mm.get_iova_addr(g, vm->pdb.sgt->sgl, 0);
2828 phys_addr_t inst_pa = gk20a_mem_phys(inst_block); 2840 phys_addr_t inst_pa = gk20a_mem_phys(inst_block);
2829 void *inst_ptr = inst_block->cpu_va; 2841 void *inst_ptr = inst_block->cpu_va;
2830 2842
@@ -3019,8 +3031,8 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
3019void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) 3031void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
3020{ 3032{
3021 struct gk20a *g = gk20a_from_vm(vm); 3033 struct gk20a *g = gk20a_from_vm(vm);
3022 u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->mm->g, 3034 u32 addr_lo = u64_lo32(g->ops.mm.get_iova_addr(vm->mm->g,
3023 vm->pdb.sgt->sgl) >> 12); 3035 vm->pdb.sgt->sgl, 0) >> 12);
3024 u32 data; 3036 u32 data;
3025 s32 retry = 2000; 3037 s32 retry = 2000;
3026 static DEFINE_MUTEX(tlb_lock); 3038 static DEFINE_MUTEX(tlb_lock);
@@ -3122,6 +3134,7 @@ void gk20a_init_mm(struct gpu_ops *gops)
3122 gops->mm.l2_invalidate = gk20a_mm_l2_invalidate; 3134 gops->mm.l2_invalidate = gk20a_mm_l2_invalidate;
3123 gops->mm.l2_flush = gk20a_mm_l2_flush; 3135 gops->mm.l2_flush = gk20a_mm_l2_flush;
3124 gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate; 3136 gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate;
3137 gops->mm.get_iova_addr = gk20a_mm_iova_addr;
3125 gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits; 3138 gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits;
3126 gops->mm.get_mmu_levels = gk20a_mm_get_mmu_levels; 3139 gops->mm.get_mmu_levels = gk20a_mm_get_mmu_levels;
3127 gops->mm.init_pdb = gk20a_mm_init_pdb; 3140 gops->mm.init_pdb = gk20a_mm_init_pdb;