summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gk20a/as_gk20a.c10
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c23
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c139
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h6
-rw-r--r--drivers/gpu/nvgpu/gk20a/platform_gk20a.h6
6 files changed, 144 insertions, 42 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
index 3d690e01..038fa4c8 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -183,12 +183,16 @@ static int gk20a_as_ioctl_get_va_regions(
183 unsigned int write_entries; 183 unsigned int write_entries;
184 struct nvgpu_as_va_region __user *user_region_ptr; 184 struct nvgpu_as_va_region __user *user_region_ptr;
185 struct vm_gk20a *vm = as_share->vm; 185 struct vm_gk20a *vm = as_share->vm;
186 int page_sizes = gmmu_nr_page_sizes;
186 187
187 gk20a_dbg_fn(""); 188 gk20a_dbg_fn("");
188 189
190 if (!vm->big_pages)
191 page_sizes--;
192
189 write_entries = args->buf_size / sizeof(struct nvgpu_as_va_region); 193 write_entries = args->buf_size / sizeof(struct nvgpu_as_va_region);
190 if (write_entries > gmmu_nr_page_sizes) 194 if (write_entries > page_sizes)
191 write_entries = gmmu_nr_page_sizes; 195 write_entries = page_sizes;
192 196
193 user_region_ptr = 197 user_region_ptr =
194 (struct nvgpu_as_va_region __user *)(uintptr_t)args->buf_addr; 198 (struct nvgpu_as_va_region __user *)(uintptr_t)args->buf_addr;
@@ -216,7 +220,7 @@ static int gk20a_as_ioctl_get_va_regions(
216 } 220 }
217 221
218 args->buf_size = 222 args->buf_size =
219 gmmu_nr_page_sizes * sizeof(struct nvgpu_as_va_region); 223 page_sizes * sizeof(struct nvgpu_as_va_region);
220 224
221 return 0; 225 return 0;
222} 226}
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index f3a333f3..f6c9f901 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -1482,6 +1482,8 @@ static int gk20a_probe(struct platform_device *dev)
1482 spin_lock_init(&gk20a->debugfs_lock); 1482 spin_lock_init(&gk20a->debugfs_lock);
1483 gk20a->mm.ltc_enabled = true; 1483 gk20a->mm.ltc_enabled = true;
1484 gk20a->mm.ltc_enabled_debug = true; 1484 gk20a->mm.ltc_enabled_debug = true;
1485 gk20a->mm.bypass_smmu = platform->bypass_smmu;
1486 gk20a->mm.disable_bigpage = platform->disable_bigpage;
1485 gk20a->debugfs_ltc_enabled = 1487 gk20a->debugfs_ltc_enabled =
1486 debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR, 1488 debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR,
1487 platform->debugfs, 1489 platform->debugfs,
@@ -1496,6 +1498,16 @@ static int gk20a_probe(struct platform_device *dev)
1496 S_IRUGO|S_IWUSR, 1498 S_IRUGO|S_IWUSR,
1497 platform->debugfs, 1499 platform->debugfs,
1498 &gk20a->timeouts_enabled); 1500 &gk20a->timeouts_enabled);
1501 gk20a->debugfs_bypass_smmu =
1502 debugfs_create_bool("bypass_smmu",
1503 S_IRUGO|S_IWUSR,
1504 platform->debugfs,
1505 &gk20a->mm.bypass_smmu);
1506 gk20a->debugfs_disable_bigpage =
1507 debugfs_create_bool("disable_bigpage",
1508 S_IRUGO|S_IWUSR,
1509 platform->debugfs,
1510 &gk20a->mm.disable_bigpage);
1499 gk20a_pmu_debugfs_init(dev); 1511 gk20a_pmu_debugfs_init(dev);
1500 gk20a_cde_debugfs_init(dev); 1512 gk20a_cde_debugfs_init(dev);
1501#endif 1513#endif
@@ -1929,9 +1941,14 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
1929 gpu->pde_coverage_bit_count = 1941 gpu->pde_coverage_bit_count =
1930 gk20a_mm_pde_coverage_bit_count(&g->mm.pmu.vm); 1942 gk20a_mm_pde_coverage_bit_count(&g->mm.pmu.vm);
1931 1943
1932 gpu->available_big_page_sizes = gpu->big_page_size; 1944 if (g->mm.disable_bigpage) {
1933 if (g->ops.mm.get_big_page_sizes) 1945 gpu->big_page_size = 0;
1934 gpu->available_big_page_sizes |= g->ops.mm.get_big_page_sizes(); 1946 gpu->available_big_page_sizes = 0;
1947 } else {
1948 gpu->available_big_page_sizes = gpu->big_page_size;
1949 if (g->ops.mm.get_big_page_sizes)
1950 gpu->available_big_page_sizes |= g->ops.mm.get_big_page_sizes();
1951 }
1935 1952
1936 gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS 1953 gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS
1937 | NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS; 1954 | NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 7e919e2e..defc5904 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -452,6 +452,8 @@ struct gk20a {
452 struct dentry *debugfs_ltc_enabled; 452 struct dentry *debugfs_ltc_enabled;
453 struct dentry *debugfs_timeouts_enabled; 453 struct dentry *debugfs_timeouts_enabled;
454 struct dentry *debugfs_gr_idle_timeout_default; 454 struct dentry *debugfs_gr_idle_timeout_default;
455 struct dentry *debugfs_bypass_smmu;
456 struct dentry *debugfs_disable_bigpage;
455#endif 457#endif
456 struct gk20a_ctxsw_ucode_info ctxsw_ucode_info; 458 struct gk20a_ctxsw_ucode_info ctxsw_ucode_info;
457 459
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 452e064e..37d47c18 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1274,8 +1274,9 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1274 int err = 0; 1274 int err = 0;
1275 struct buffer_attrs bfr = {NULL}; 1275 struct buffer_attrs bfr = {NULL};
1276 struct gk20a_comptags comptags; 1276 struct gk20a_comptags comptags;
1277 u64 buf_addr;
1278 bool clear_ctags = false; 1277 bool clear_ctags = false;
1278 struct scatterlist *sgl;
1279 u64 buf_addr;
1279 1280
1280 mutex_lock(&vm->update_gmmu_lock); 1281 mutex_lock(&vm->update_gmmu_lock);
1281 1282
@@ -1308,10 +1309,24 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1308 1309
1309 bfr.kind_v = kind; 1310 bfr.kind_v = kind;
1310 bfr.size = dmabuf->size; 1311 bfr.size = dmabuf->size;
1312 sgl = bfr.sgt->sgl;
1311 buf_addr = (u64)sg_dma_address(bfr.sgt->sgl); 1313 buf_addr = (u64)sg_dma_address(bfr.sgt->sgl);
1312 if (unlikely(!buf_addr)) 1314 if (g->mm.bypass_smmu || buf_addr == DMA_ERROR_CODE || !buf_addr) {
1313 buf_addr = (u64)sg_phys(bfr.sgt->sgl); 1315 while (sgl) {
1314 bfr.align = 1 << __ffs(buf_addr); 1316 u64 align;
1317
1318 buf_addr = (u64)sg_phys(sgl);
1319
1320 align = 1 << __ffs(buf_addr | (u64)sgl->length);
1321 if (bfr.align)
1322 bfr.align = min_t(u64, align, bfr.align);
1323 else
1324 bfr.align = align;
1325 sgl = sg_next(sgl);
1326 }
1327 } else
1328 bfr.align = 1 << __ffs(buf_addr);
1329
1315 bfr.pgsz_idx = -1; 1330 bfr.pgsz_idx = -1;
1316 mapping_size = mapping_size ? mapping_size : bfr.size; 1331 mapping_size = mapping_size ? mapping_size : bfr.size;
1317 1332
@@ -1829,7 +1844,9 @@ static inline u32 small_valid_pde1_bits(u64 pte_addr)
1829static int update_gmmu_pde_locked(struct vm_gk20a *vm, 1844static int update_gmmu_pde_locked(struct vm_gk20a *vm,
1830 struct gk20a_mm_entry *pte, 1845 struct gk20a_mm_entry *pte,
1831 u32 i, u32 gmmu_pgsz_idx, 1846 u32 i, u32 gmmu_pgsz_idx,
1832 u64 iova, 1847 struct scatterlist **sgl,
1848 u64 *offset,
1849 u64 *iova,
1833 u32 kind_v, u32 *ctag, 1850 u32 kind_v, u32 *ctag,
1834 bool cacheable, bool unammped_pte, 1851 bool cacheable, bool unammped_pte,
1835 int rw_flag, bool sparse, u32 flags) 1852 int rw_flag, bool sparse, u32 flags)
@@ -1877,7 +1894,9 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm,
1877static int update_gmmu_pte_locked(struct vm_gk20a *vm, 1894static int update_gmmu_pte_locked(struct vm_gk20a *vm,
1878 struct gk20a_mm_entry *pte, 1895 struct gk20a_mm_entry *pte,
1879 u32 i, u32 gmmu_pgsz_idx, 1896 u32 i, u32 gmmu_pgsz_idx,
1880 u64 iova, 1897 struct scatterlist **sgl,
1898 u64 *offset,
1899 u64 *iova,
1881 u32 kind_v, u32 *ctag, 1900 u32 kind_v, u32 *ctag,
1882 bool cacheable, bool unmapped_pte, 1901 bool cacheable, bool unmapped_pte,
1883 int rw_flag, bool sparse, u32 flags) 1902 int rw_flag, bool sparse, u32 flags)
@@ -1887,14 +1906,14 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
1887 u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; 1906 u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx];
1888 u32 pte_w[2] = {0, 0}; /* invalid pte */ 1907 u32 pte_w[2] = {0, 0}; /* invalid pte */
1889 1908
1890 if (iova) { 1909 if (*iova) {
1891 if (unmapped_pte) 1910 if (unmapped_pte)
1892 pte_w[0] = gmmu_pte_valid_false_f() | 1911 pte_w[0] = gmmu_pte_valid_false_f() |
1893 gmmu_pte_address_sys_f(iova 1912 gmmu_pte_address_sys_f(*iova
1894 >> gmmu_pte_address_shift_v()); 1913 >> gmmu_pte_address_shift_v());
1895 else 1914 else
1896 pte_w[0] = gmmu_pte_valid_true_f() | 1915 pte_w[0] = gmmu_pte_valid_true_f() |
1897 gmmu_pte_address_sys_f(iova 1916 gmmu_pte_address_sys_f(*iova
1898 >> gmmu_pte_address_shift_v()); 1917 >> gmmu_pte_address_shift_v());
1899 1918
1900 pte_w[1] = gmmu_pte_aperture_video_memory_f() | 1919 pte_w[1] = gmmu_pte_aperture_video_memory_f() |
@@ -1925,7 +1944,7 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
1925 1944
1926 gk20a_dbg(gpu_dbg_pte, 1945 gk20a_dbg(gpu_dbg_pte,
1927 "pte=%d iova=0x%llx kind=%d ctag=%d vol=%d [0x%08x, 0x%08x]", 1946 "pte=%d iova=0x%llx kind=%d ctag=%d vol=%d [0x%08x, 0x%08x]",
1928 i, iova, 1947 i, *iova,
1929 kind_v, *ctag / ctag_granularity, !cacheable, 1948 kind_v, *ctag / ctag_granularity, !cacheable,
1930 pte_w[1], pte_w[0]); 1949 pte_w[1], pte_w[0]);
1931 1950
@@ -1941,13 +1960,33 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
1941 gk20a_mem_wr32(pte->cpu_va + i*8, 0, pte_w[0]); 1960 gk20a_mem_wr32(pte->cpu_va + i*8, 0, pte_w[0]);
1942 gk20a_mem_wr32(pte->cpu_va + i*8, 1, pte_w[1]); 1961 gk20a_mem_wr32(pte->cpu_va + i*8, 1, pte_w[1]);
1943 1962
1963 if (*iova) {
1964 *iova += page_size;
1965 *offset += page_size;
1966 if (*sgl && *offset + page_size > (*sgl)->length) {
1967 u64 new_iova;
1968 *sgl = sg_next(*sgl);
1969 if (*sgl) {
1970 new_iova = sg_phys(*sgl);
1971 gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",
1972 new_iova, (*sgl)->length);
1973 if (new_iova) {
1974 *offset = 0;
1975 *iova = new_iova;
1976 }
1977 }
1978 }
1979 }
1980
1944 return 0; 1981 return 0;
1945} 1982}
1946 1983
1947static int update_gmmu_level_locked(struct vm_gk20a *vm, 1984static int update_gmmu_level_locked(struct vm_gk20a *vm,
1948 struct gk20a_mm_entry *pte, 1985 struct gk20a_mm_entry *pte,
1949 enum gmmu_pgsz_gk20a pgsz_idx, 1986 enum gmmu_pgsz_gk20a pgsz_idx,
1950 u64 iova, 1987 struct scatterlist **sgl,
1988 u64 *offset,
1989 u64 *iova,
1951 u64 gpu_va, u64 gpu_end, 1990 u64 gpu_va, u64 gpu_end,
1952 u8 kind_v, u32 *ctag, 1991 u8 kind_v, u32 *ctag,
1953 bool cacheable, bool unmapped_pte, 1992 bool cacheable, bool unmapped_pte,
@@ -1968,7 +2007,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
1968 >> (u64)l->lo_bit[pgsz_idx]; 2007 >> (u64)l->lo_bit[pgsz_idx];
1969 2008
1970 gk20a_dbg(gpu_dbg_pte, "size_idx=%d, l: %d, [%llx,%llx], iova=%llx", 2009 gk20a_dbg(gpu_dbg_pte, "size_idx=%d, l: %d, [%llx,%llx], iova=%llx",
1971 pgsz_idx, lvl, gpu_va, gpu_end-1, iova); 2010 pgsz_idx, lvl, gpu_va, gpu_end-1, *iova);
1972 2011
1973 while (gpu_va < gpu_end) { 2012 while (gpu_va < gpu_end) {
1974 struct gk20a_mm_entry *next_pte = NULL; 2013 struct gk20a_mm_entry *next_pte = NULL;
@@ -2000,7 +2039,8 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
2000 } 2039 }
2001 2040
2002 err = l->update_entry(vm, pte, pde_i, pgsz_idx, 2041 err = l->update_entry(vm, pte, pde_i, pgsz_idx,
2003 iova, kind_v, ctag, cacheable, unmapped_pte, 2042 sgl, offset, iova,
2043 kind_v, ctag, cacheable, unmapped_pte,
2004 rw_flag, sparse, flags); 2044 rw_flag, sparse, flags);
2005 if (err) 2045 if (err)
2006 return err; 2046 return err;
@@ -2016,6 +2056,8 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
2016 } 2056 }
2017 err = update_gmmu_level_locked(vm, next_pte, 2057 err = update_gmmu_level_locked(vm, next_pte,
2018 pgsz_idx, 2058 pgsz_idx,
2059 sgl,
2060 offset,
2019 iova, 2061 iova,
2020 gpu_va, 2062 gpu_va,
2021 next, 2063 next,
@@ -2027,8 +2069,6 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
2027 return err; 2069 return err;
2028 } 2070 }
2029 2071
2030 if (iova)
2031 iova += next - gpu_va;
2032 pde_i++; 2072 pde_i++;
2033 gpu_va = next; 2073 gpu_va = next;
2034 } 2074 }
@@ -2056,18 +2096,39 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
2056 u64 space_to_skip = buffer_offset; 2096 u64 space_to_skip = buffer_offset;
2057 u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; 2097 u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
2058 int err; 2098 int err;
2099 struct scatterlist *sgl = NULL;
2059 2100
2060 gk20a_dbg(gpu_dbg_pte, "size_idx=%d, iova=%llx", 2101 gk20a_dbg(gpu_dbg_pte, "size_idx=%d, iova=%llx, buffer offset %lld, nents %d",
2061 pgsz_idx, 2102 pgsz_idx,
2062 sgt ? g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, flags) 2103 sgt ? g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, flags)
2063 : 0ULL); 2104 : 0ULL,
2105 buffer_offset,
2106 sgt ? sgt->nents : 0);
2064 2107
2065 if (space_to_skip & (page_size - 1)) 2108 if (space_to_skip & (page_size - 1))
2066 return -EINVAL; 2109 return -EINVAL;
2067 2110
2068 if (sgt) 2111 if (sgt) {
2069 iova = g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, flags) 2112 iova = g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, flags);
2070 + space_to_skip; 2113 if (!vm->mm->bypass_smmu && iova) {
2114 iova += space_to_skip;
2115 } else {
2116 sgl = sgt->sgl;
2117
2118 gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",
2119 (u64)sg_phys(sgl),
2120 sgl->length);
2121 while (space_to_skip && sgl &&
2122 space_to_skip + page_size > sgl->length) {
2123 space_to_skip -= sgl->length;
2124 sgl = sg_next(sgl);
2125 gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d",
2126 (u64)sg_phys(sgl),
2127 sgl->length);
2128 }
2129 iova = sg_phys(sgl) + space_to_skip;
2130 }
2131 }
2071 2132
2072 gk20a_dbg(gpu_dbg_map, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx", 2133 gk20a_dbg(gpu_dbg_map, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx",
2073 pgsz_idx, gpu_va, gpu_end-1, iova); 2134 pgsz_idx, gpu_va, gpu_end-1, iova);
@@ -2079,7 +2140,9 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
2079 return err; 2140 return err;
2080 } 2141 }
2081 err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx, 2142 err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx,
2082 iova, 2143 &sgl,
2144 &space_to_skip,
2145 &iova,
2083 gpu_va, gpu_end, 2146 gpu_va, gpu_end,
2084 kind_v, &ctag, 2147 kind_v, &ctag,
2085 cacheable, unmapped_pte, rw_flag, sparse, 0, flags); 2148 cacheable, unmapped_pte, rw_flag, sparse, 0, flags);
@@ -2370,15 +2433,16 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size)
2370 2433
2371 gk20a_dbg_fn(""); 2434 gk20a_dbg_fn("");
2372 2435
2373 if (big_page_size == 0) 2436 if (big_page_size == 0) {
2374 big_page_size = 2437 big_page_size =
2375 gk20a_get_platform(g->dev)->default_big_page_size; 2438 gk20a_get_platform(g->dev)->default_big_page_size;
2439 } else {
2440 if (!is_power_of_2(big_page_size))
2441 return -EINVAL;
2376 2442
2377 if (!is_power_of_2(big_page_size)) 2443 if (!(big_page_size & g->gpu_characteristics.available_big_page_sizes))
2378 return -EINVAL; 2444 return -EINVAL;
2379 2445 }
2380 if (!(big_page_size & g->gpu_characteristics.available_big_page_sizes))
2381 return -EINVAL;
2382 2446
2383 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 2447 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
2384 if (!vm) 2448 if (!vm)
@@ -2391,7 +2455,7 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size)
2391 snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id); 2455 snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id);
2392 2456
2393 err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10, 2457 err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10,
2394 mm->channel.size, true, name); 2458 mm->channel.size, !mm->disable_bigpage, name);
2395 2459
2396 return err; 2460 return err;
2397} 2461}
@@ -2417,27 +2481,28 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2417 struct nvgpu_as_alloc_space_args *args) 2481 struct nvgpu_as_alloc_space_args *args)
2418 2482
2419{ int err = -ENOMEM; 2483{ int err = -ENOMEM;
2420 int pgsz_idx; 2484 int pgsz_idx = gmmu_page_size_small;
2421 u32 start_page_nr; 2485 u32 start_page_nr;
2422 struct gk20a_allocator *vma; 2486 struct gk20a_allocator *vma;
2423 struct vm_gk20a *vm = as_share->vm; 2487 struct vm_gk20a *vm = as_share->vm;
2424 struct gk20a *g = vm->mm->g; 2488 struct gk20a *g = vm->mm->g;
2425 struct vm_reserved_va_node *va_node; 2489 struct vm_reserved_va_node *va_node;
2426 u64 vaddr_start = 0; 2490 u64 vaddr_start = 0;
2491 int page_sizes = gmmu_nr_page_sizes;
2427 2492
2428 gk20a_dbg_fn("flags=0x%x pgsz=0x%x nr_pages=0x%x o/a=0x%llx", 2493 gk20a_dbg_fn("flags=0x%x pgsz=0x%x nr_pages=0x%x o/a=0x%llx",
2429 args->flags, args->page_size, args->pages, 2494 args->flags, args->page_size, args->pages,
2430 args->o_a.offset); 2495 args->o_a.offset);
2431 2496
2432 /* determine pagesz idx */ 2497 if (!vm->big_pages)
2433 for (pgsz_idx = gmmu_page_size_small; 2498 page_sizes--;
2434 pgsz_idx < gmmu_nr_page_sizes; 2499
2435 pgsz_idx++) { 2500 for (; pgsz_idx < page_sizes; pgsz_idx++) {
2436 if (vm->gmmu_page_sizes[pgsz_idx] == args->page_size) 2501 if (vm->gmmu_page_sizes[pgsz_idx] == args->page_size)
2437 break; 2502 break;
2438 } 2503 }
2439 2504
2440 if (pgsz_idx >= gmmu_nr_page_sizes) { 2505 if (pgsz_idx >= page_sizes) {
2441 err = -EINVAL; 2506 err = -EINVAL;
2442 goto clean_up; 2507 goto clean_up;
2443 } 2508 }
@@ -2720,7 +2785,8 @@ int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset)
2720 2785
2721void gk20a_deinit_vm(struct vm_gk20a *vm) 2786void gk20a_deinit_vm(struct vm_gk20a *vm)
2722{ 2787{
2723 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); 2788 if (vm->big_pages)
2789 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
2724 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); 2790 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
2725 2791
2726 gk20a_vm_free_entries(vm, &vm->pdb, 0); 2792 gk20a_vm_free_entries(vm, &vm->pdb, 0);
@@ -2731,12 +2797,15 @@ int gk20a_alloc_inst_block(struct gk20a *g, struct mem_desc *inst_block)
2731 struct device *dev = dev_from_gk20a(g); 2797 struct device *dev = dev_from_gk20a(g);
2732 int err; 2798 int err;
2733 2799
2800 gk20a_dbg_fn("");
2801
2734 err = gk20a_gmmu_alloc(g, ram_in_alloc_size_v(), inst_block); 2802 err = gk20a_gmmu_alloc(g, ram_in_alloc_size_v(), inst_block);
2735 if (err) { 2803 if (err) {
2736 gk20a_err(dev, "%s: memory allocation failed\n", __func__); 2804 gk20a_err(dev, "%s: memory allocation failed\n", __func__);
2737 return err; 2805 return err;
2738 } 2806 }
2739 2807
2808 gk20a_dbg_fn("done");
2740 return 0; 2809 return 0;
2741} 2810}
2742 2811
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index f6806309..895e52ff 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -210,7 +210,9 @@ struct gk20a_mmu_level {
210 int (*update_entry)(struct vm_gk20a *vm, 210 int (*update_entry)(struct vm_gk20a *vm,
211 struct gk20a_mm_entry *pte, 211 struct gk20a_mm_entry *pte,
212 u32 i, u32 gmmu_pgsz_idx, 212 u32 i, u32 gmmu_pgsz_idx,
213 u64 iova, 213 struct scatterlist **sgl,
214 u64 *offset,
215 u64 *iova,
214 u32 kind_v, u32 *ctag, 216 u32 kind_v, u32 *ctag,
215 bool cacheable, bool unmapped_pte, 217 bool cacheable, bool unmapped_pte,
216 int rw_flag, bool sparse, u32 flags); 218 int rw_flag, bool sparse, u32 flags);
@@ -303,6 +305,8 @@ struct mm_gk20a {
303#ifdef CONFIG_DEBUG_FS 305#ifdef CONFIG_DEBUG_FS
304 u32 ltc_enabled; 306 u32 ltc_enabled;
305 u32 ltc_enabled_debug; 307 u32 ltc_enabled_debug;
308 u32 bypass_smmu;
309 u32 disable_bigpage;
306#endif 310#endif
307}; 311};
308 312
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
index f4301dab..f142cb9f 100644
--- a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
@@ -76,6 +76,12 @@ struct gk20a_platform {
76 /* Adaptative ELPG: true = enable flase = disable */ 76 /* Adaptative ELPG: true = enable flase = disable */
77 bool enable_aelpg; 77 bool enable_aelpg;
78 78
79 /* Enable SMMU bypass by default */
80 bool bypass_smmu;
81
82 /* Disable big page support */
83 bool disable_bigpage;
84
79 /* 85 /*
80 * gk20a_do_idle() API can take GPU either into rail gate or CAR reset 86 * gk20a_do_idle() API can take GPU either into rail gate or CAR reset
81 * This flag can be used to force CAR reset case instead of rail gate 87 * This flag can be used to force CAR reset case instead of rail gate