summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2014-12-12 06:52:28 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:12:32 -0400
commit0bc513fc4618feb5262079b4ef8842ba419d8111 (patch)
treef1741f426400c9cdaadb46113a62b0a064e38811 /drivers
parente462c6a7ad0ce05775bc15c58963df4a1a5606e8 (diff)
gpu: nvgpu: Remove gk20a sparse texture & PTE freeing
Remove support for gk20a sparse textures. We're using implementation from user space, so gk20a code is never invoked. Also removes ref_cnt for PTEs, so we never free PTEs when unmapping pages, but only at VM delete time. Change-Id: I04d7d43d9bff23ee46fd0570ad189faece35dd14 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/663294
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c4
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c149
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h5
-rw-r--r--drivers/gpu/nvgpu/gm20b/mm_gm20b.c40
5 files changed, 23 insertions, 179 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 419b8675..2b8276e3 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -1786,9 +1786,11 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
1786 gpu->available_big_page_sizes |= g->ops.mm.get_big_page_sizes(); 1786 gpu->available_big_page_sizes |= g->ops.mm.get_big_page_sizes();
1787 1787
1788 gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS 1788 gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS
1789 | NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS
1790 | NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS; 1789 | NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS;
1791 1790
1791 if (g->ops.mm.set_sparse)
1792 gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS;
1793
1792 if (IS_ENABLED(CONFIG_TEGRA_GK20A) && 1794 if (IS_ENABLED(CONFIG_TEGRA_GK20A) &&
1793 gk20a_platform_has_syncpoints(g->dev)) 1795 gk20a_platform_has_syncpoints(g->dev))
1794 gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS; 1796 gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 4333cd20..4ddea431 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -292,10 +292,6 @@ struct gpu_ops {
292 struct { 292 struct {
293 int (*set_sparse)(struct vm_gk20a *vm, u64 vaddr, 293 int (*set_sparse)(struct vm_gk20a *vm, u64 vaddr,
294 u32 num_pages, u32 pgsz_idx, bool refplus); 294 u32 num_pages, u32 pgsz_idx, bool refplus);
295 int (*put_empty)(struct vm_gk20a *vm, u64 vaddr,
296 u32 num_pages, u32 pgsz_idx);
297 void (*clear_sparse)(struct vm_gk20a *vm, u64 vaddr,
298 u64 size, u32 pgsz_idx);
299 bool (*is_debug_mode_enabled)(struct gk20a *g); 295 bool (*is_debug_mode_enabled)(struct gk20a *g);
300 u64 (*gmmu_map)(struct vm_gk20a *vm, 296 u64 (*gmmu_map)(struct vm_gk20a *vm,
301 u64 map_offset, 297 u64 map_offset,
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 5e925d65..e51ce7c5 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1806,8 +1806,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1806 pte->size); 1806 pte->size);
1807 if (err) { 1807 if (err) {
1808 gk20a_err(dev_from_vm(vm), 1808 gk20a_err(dev_from_vm(vm),
1809 "couldn't map ptes for update as=%d pte_ref_cnt=%d", 1809 "couldn't map ptes for update as=%d",
1810 vm_aspace_id(vm), pte->ref_cnt); 1810 vm_aspace_id(vm));
1811 goto clean_up; 1811 goto clean_up;
1812 } 1812 }
1813 1813
@@ -1839,13 +1839,12 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1839 if (!cacheable) 1839 if (!cacheable)
1840 pte_w[1] |= gmmu_pte_vol_true_f(); 1840 pte_w[1] |= gmmu_pte_vol_true_f();
1841 1841
1842 pte->ref_cnt++;
1843 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d addr=0x%x,%08x kind=%d" 1842 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d addr=0x%x,%08x kind=%d"
1844 " ctag=%d vol=%d refs=%d" 1843 " ctag=%d vol=%d"
1845 " [0x%08x,0x%08x]", 1844 " [0x%08x,0x%08x]",
1846 pte_cur, hi32(addr), lo32(addr), 1845 pte_cur, hi32(addr), lo32(addr),
1847 kind_v, ctag, !cacheable, 1846 kind_v, ctag, !cacheable,
1848 pte->ref_cnt, pte_w[1], pte_w[0]); 1847 pte_w[1], pte_w[0]);
1849 ctag += ctag_incr; 1848 ctag += ctag_incr;
1850 cur_offset += page_size; 1849 cur_offset += page_size;
1851 addr += page_size; 1850 addr += page_size;
@@ -1856,10 +1855,9 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1856 } 1855 }
1857 1856
1858 } else { 1857 } else {
1859 pte->ref_cnt--;
1860 gk20a_dbg(gpu_dbg_pte, 1858 gk20a_dbg(gpu_dbg_pte,
1861 "pte_cur=%d ref=%d [0x0,0x0]", 1859 "pte_cur=%d [0x0,0x0]",
1862 pte_cur, pte->ref_cnt); 1860 pte_cur);
1863 } 1861 }
1864 1862
1865 gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]); 1863 gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]);
@@ -1867,24 +1865,6 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1867 } 1865 }
1868 1866
1869 unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur); 1867 unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur);
1870
1871 if (pte->ref_cnt == 0) {
1872 /* It can make sense to keep around one page table for
1873 * each flavor (empty)... in case a new map is coming
1874 * right back to alloc (and fill it in) again.
1875 * But: deferring unmapping should help with pathologic
1876 * unmap/map/unmap/map cases where we'd trigger pte
1877 * free/alloc/free/alloc.
1878 */
1879 free_gmmu_pages(vm, pte->ref, pte->sgt,
1880 vm->page_table_sizing[pgsz_idx].order,
1881 pte->size);
1882 pte->ref = NULL;
1883
1884 /* rewrite pde */
1885 update_gmmu_pde_locked(vm, pde_i);
1886 }
1887
1888 } 1868 }
1889 1869
1890 smp_mb(); 1870 smp_mb();
@@ -1982,85 +1962,6 @@ void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
1982 vm->tlb_dirty = true; 1962 vm->tlb_dirty = true;
1983} 1963}
1984 1964
1985static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
1986 u32 num_pages, u32 pgsz_idx)
1987{
1988 struct mm_gk20a *mm = vm->mm;
1989 struct gk20a *g = mm->g;
1990 u32 pgsz = vm->gmmu_page_sizes[pgsz_idx];
1991 u32 i;
1992 dma_addr_t iova;
1993
1994 /* allocate the zero page if the va does not already have one */
1995 if (!vm->zero_page_cpuva) {
1996 int err = 0;
1997 vm->zero_page_cpuva = dma_alloc_coherent(&g->dev->dev,
1998 vm->big_page_size,
1999 &iova,
2000 GFP_KERNEL);
2001 if (!vm->zero_page_cpuva) {
2002 dev_err(&g->dev->dev, "failed to allocate zero page\n");
2003 return -ENOMEM;
2004 }
2005
2006 vm->zero_page_iova = iova;
2007 err = gk20a_get_sgtable(&g->dev->dev, &vm->zero_page_sgt,
2008 vm->zero_page_cpuva, vm->zero_page_iova,
2009 vm->big_page_size);
2010 if (err) {
2011 dma_free_coherent(&g->dev->dev, vm->big_page_size,
2012 vm->zero_page_cpuva,
2013 vm->zero_page_iova);
2014 vm->zero_page_iova = 0;
2015 vm->zero_page_cpuva = NULL;
2016
2017 dev_err(&g->dev->dev, "failed to create sg table for zero page\n");
2018 return -ENOMEM;
2019 }
2020 }
2021
2022 for (i = 0; i < num_pages; i++) {
2023 u64 page_vaddr = g->ops.mm.gmmu_map(vm, vaddr,
2024 vm->zero_page_sgt, 0, pgsz, pgsz_idx, 0, 0,
2025 NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET,
2026 gk20a_mem_flag_none, false);
2027
2028 if (!page_vaddr) {
2029 gk20a_err(dev_from_vm(vm), "failed to remap clean buffers!");
2030 goto err_unmap;
2031 }
2032 vaddr += pgsz;
2033 }
2034
2035 return 0;
2036
2037err_unmap:
2038
2039 WARN_ON(1);
2040 /* something went wrong. unmap pages */
2041 while (i--) {
2042 vaddr -= pgsz;
2043 g->ops.mm.gmmu_unmap(vm, vaddr, pgsz, pgsz_idx, 0,
2044 gk20a_mem_flag_none);
2045 }
2046
2047 return -EINVAL;
2048}
2049
2050static int gk20a_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
2051 u32 num_pages, u32 pgsz_idx, bool refplus)
2052{
2053 return gk20a_vm_put_empty(vm, vaddr, num_pages, pgsz_idx);
2054}
2055
2056static void gk20a_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr,
2057 u64 size, u32 pgsz_idx) {
2058 struct gk20a *g = vm->mm->g;
2059
2060 g->ops.mm.gmmu_unmap(vm, vaddr, size, pgsz_idx,
2061 false, gk20a_mem_flag_none);
2062}
2063
2064/* NOTE! mapped_buffers lock must be held */ 1965/* NOTE! mapped_buffers lock must be held */
2065void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) 1966void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
2066{ 1967{
@@ -2075,18 +1976,14 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
2075 ilog2(vm->gmmu_page_sizes[pgsz_idx]); 1976 ilog2(vm->gmmu_page_sizes[pgsz_idx]);
2076 1977
2077 /* there is little we can do if this fails... */ 1978 /* there is little we can do if this fails... */
2078 if (g->ops.mm.put_empty) { 1979 g->ops.mm.gmmu_unmap(vm,
2079 g->ops.mm.put_empty(vm, vaddr, num_pages, pgsz_idx); 1980 mapped_buffer->addr,
2080 } else { 1981 mapped_buffer->size,
2081 g->ops.mm.gmmu_unmap(vm, 1982 mapped_buffer->pgsz_idx,
2082 mapped_buffer->addr, 1983 mapped_buffer->va_allocated,
2083 mapped_buffer->size, 1984 gk20a_mem_flag_none);
2084 mapped_buffer->pgsz_idx, 1985 g->ops.mm.set_sparse(vm, vaddr,
2085 mapped_buffer->va_allocated, 1986 num_pages, pgsz_idx, false);
2086 gk20a_mem_flag_none);
2087 g->ops.mm.set_sparse(vm, vaddr,
2088 num_pages, pgsz_idx, false);
2089 }
2090 } else 1987 } else
2091 g->ops.mm.gmmu_unmap(vm, 1988 g->ops.mm.gmmu_unmap(vm,
2092 mapped_buffer->addr, 1989 mapped_buffer->addr,
@@ -2140,7 +2037,6 @@ void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset)
2140 2037
2141static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm) 2038static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm)
2142{ 2039{
2143 struct gk20a *g = vm->mm->g;
2144 struct mapped_buffer_node *mapped_buffer; 2040 struct mapped_buffer_node *mapped_buffer;
2145 struct vm_reserved_va_node *va_node, *va_node_tmp; 2041 struct vm_reserved_va_node *va_node, *va_node_tmp;
2146 struct rb_node *node; 2042 struct rb_node *node;
@@ -2197,11 +2093,6 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm)
2197 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); 2093 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
2198 2094
2199 mutex_unlock(&vm->update_gmmu_lock); 2095 mutex_unlock(&vm->update_gmmu_lock);
2200
2201 /* release zero page if used */
2202 if (vm->zero_page_cpuva)
2203 dma_free_coherent(&g->dev->dev, vm->big_page_size,
2204 vm->zero_page_cpuva, vm->zero_page_iova);
2205} 2096}
2206 2097
2207void gk20a_vm_remove_support(struct vm_gk20a *vm) 2098void gk20a_vm_remove_support(struct vm_gk20a *vm)
@@ -2582,10 +2473,12 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2582 2473
2583 /* if this was a sparse mapping, free the va */ 2474 /* if this was a sparse mapping, free the va */
2584 if (va_node->sparse) 2475 if (va_node->sparse)
2585 g->ops.mm.clear_sparse(vm, 2476 g->ops.mm.gmmu_unmap(vm,
2586 va_node->vaddr_start, 2477 va_node->vaddr_start,
2587 va_node->size, 2478 va_node->size,
2588 va_node->pgsz_idx); 2479 va_node->pgsz_idx,
2480 true,
2481 gk20a_mem_flag_none);
2589 kfree(va_node); 2482 kfree(va_node);
2590 } 2483 }
2591 mutex_unlock(&vm->update_gmmu_lock); 2484 mutex_unlock(&vm->update_gmmu_lock);
@@ -3180,12 +3073,6 @@ u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g)
3180 3073
3181void gk20a_init_mm(struct gpu_ops *gops) 3074void gk20a_init_mm(struct gpu_ops *gops)
3182{ 3075{
3183 /* remember to remove NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS in
3184 * characteristics flags if sparse support is removed */
3185 gops->mm.set_sparse = gk20a_vm_put_sparse;
3186 gops->mm.put_empty = gk20a_vm_put_empty;
3187 gops->mm.clear_sparse = gk20a_vm_clear_sparse;
3188
3189 gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled; 3076 gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled;
3190 gops->mm.gmmu_map = gk20a_locked_gmmu_map; 3077 gops->mm.gmmu_map = gk20a_locked_gmmu_map;
3191 gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap; 3078 gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 4dbde580..b3564409 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -193,7 +193,6 @@ struct page_table_gk20a {
193 /* Either a *page or a *mem_handle */ 193 /* Either a *page or a *mem_handle */
194 void *ref; 194 void *ref;
195 /* track mapping cnt on this page table */ 195 /* track mapping cnt on this page table */
196 u32 ref_cnt;
197 struct sg_table *sgt; 196 struct sg_table *sgt;
198 size_t size; 197 size_t size;
199}; 198};
@@ -308,10 +307,6 @@ struct vm_gk20a {
308 307
309 struct list_head reserved_va_list; 308 struct list_head reserved_va_list;
310 309
311 dma_addr_t zero_page_iova;
312 void *zero_page_cpuva;
313 struct sg_table *zero_page_sgt;
314
315#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION 310#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
316 u64 handle; 311 u64 handle;
317#endif 312#endif
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
index cd40132f..5b1a9a04 100644
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -47,8 +47,6 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
47 BUG_ON(pde_lo != pde_hi); 47 BUG_ON(pde_lo != pde_hi);
48 48
49 pte = vm->pdes.ptes[pgsz_idx] + pde_lo; 49 pte = vm->pdes.ptes[pgsz_idx] + pde_lo;
50 if (refplus)
51 pte->ref_cnt++;
52 50
53 pte_lo = pte_index_from_vaddr(vm, first_vaddr, pgsz_idx); 51 pte_lo = pte_index_from_vaddr(vm, first_vaddr, pgsz_idx);
54 pte_hi = pte_index_from_vaddr(vm, last_vaddr, pgsz_idx); 52 pte_hi = pte_index_from_vaddr(vm, last_vaddr, pgsz_idx);
@@ -64,10 +62,10 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
64 pte_w[1] = clear ? 0 : gmmu_pte_vol_true_f(); 62 pte_w[1] = clear ? 0 : gmmu_pte_vol_true_f();
65 63
66 gk20a_dbg(gpu_dbg_pte, 64 gk20a_dbg(gpu_dbg_pte,
67 "pte_cur=%d addr=%llx refs=%d" 65 "pte_cur=%d addr=%llx"
68 " [0x%08x,0x%08x]", 66 " [0x%08x,0x%08x]",
69 pte_cur, addr, 67 pte_cur, addr,
70 pte->ref_cnt, pte_w[1], pte_w[0]); 68 pte_w[1], pte_w[0]);
71 69
72 gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]); 70 gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]);
73 gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 1, pte_w[1]); 71 gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 1, pte_w[1]);
@@ -220,39 +218,6 @@ fail:
220 return ret; 218 return ret;
221} 219}
222 220
223static void gm20b_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr,
224 u64 size, u32 pgsz_idx) {
225 u64 vaddr_hi;
226 u32 pde_lo, pde_hi, pde_i;
227
228 gk20a_dbg_fn("");
229 vaddr_hi = vaddr + size - 1;
230 pde_range_from_vaddr_range(vm,
231 vaddr,
232 vaddr_hi,
233 &pde_lo, &pde_hi);
234
235 gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, "
236 "pde_hi: 0x%x, pgsz_idx: %d, pde_stride_shift: %d",
237 vaddr, vaddr_hi, pde_lo, pde_hi, pgsz_idx,
238 vm->pde_stride_shift);
239
240 for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) {
241 struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i;
242 pte->ref_cnt--;
243
244 if (pte->ref_cnt == 0) {
245 free_gmmu_pages(vm, pte->ref, pte->sgt,
246 vm->page_table_sizing[pgsz_idx].order,
247 pte->size);
248 pte->ref = NULL;
249 update_gmmu_pde_locked(vm, pde_i);
250 }
251 }
252
253 return;
254}
255
256static bool gm20b_mm_mmu_debug_mode_enabled(struct gk20a *g) 221static bool gm20b_mm_mmu_debug_mode_enabled(struct gk20a *g)
257{ 222{
258 u32 debug_ctrl = gk20a_readl(g, gr_gpcs_pri_mmu_debug_ctrl_r()); 223 u32 debug_ctrl = gk20a_readl(g, gr_gpcs_pri_mmu_debug_ctrl_r());
@@ -288,7 +253,6 @@ static u32 gm20b_mm_get_big_page_sizes(void)
288void gm20b_init_mm(struct gpu_ops *gops) 253void gm20b_init_mm(struct gpu_ops *gops)
289{ 254{
290 gops->mm.set_sparse = gm20b_vm_put_sparse; 255 gops->mm.set_sparse = gm20b_vm_put_sparse;
291 gops->mm.clear_sparse = gm20b_vm_clear_sparse;
292 gops->mm.is_debug_mode_enabled = gm20b_mm_mmu_debug_mode_enabled; 256 gops->mm.is_debug_mode_enabled = gm20b_mm_mmu_debug_mode_enabled;
293 gops->mm.gmmu_map = gk20a_locked_gmmu_map; 257 gops->mm.gmmu_map = gk20a_locked_gmmu_map;
294 gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap; 258 gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap;