diff options
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/as_gk20a.c | 10 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 23 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 139 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/platform_gk20a.h | 6 |
6 files changed, 144 insertions, 42 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c index 3d690e01..038fa4c8 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c | |||
@@ -183,12 +183,16 @@ static int gk20a_as_ioctl_get_va_regions( | |||
183 | unsigned int write_entries; | 183 | unsigned int write_entries; |
184 | struct nvgpu_as_va_region __user *user_region_ptr; | 184 | struct nvgpu_as_va_region __user *user_region_ptr; |
185 | struct vm_gk20a *vm = as_share->vm; | 185 | struct vm_gk20a *vm = as_share->vm; |
186 | int page_sizes = gmmu_nr_page_sizes; | ||
186 | 187 | ||
187 | gk20a_dbg_fn(""); | 188 | gk20a_dbg_fn(""); |
188 | 189 | ||
190 | if (!vm->big_pages) | ||
191 | page_sizes--; | ||
192 | |||
189 | write_entries = args->buf_size / sizeof(struct nvgpu_as_va_region); | 193 | write_entries = args->buf_size / sizeof(struct nvgpu_as_va_region); |
190 | if (write_entries > gmmu_nr_page_sizes) | 194 | if (write_entries > page_sizes) |
191 | write_entries = gmmu_nr_page_sizes; | 195 | write_entries = page_sizes; |
192 | 196 | ||
193 | user_region_ptr = | 197 | user_region_ptr = |
194 | (struct nvgpu_as_va_region __user *)(uintptr_t)args->buf_addr; | 198 | (struct nvgpu_as_va_region __user *)(uintptr_t)args->buf_addr; |
@@ -216,7 +220,7 @@ static int gk20a_as_ioctl_get_va_regions( | |||
216 | } | 220 | } |
217 | 221 | ||
218 | args->buf_size = | 222 | args->buf_size = |
219 | gmmu_nr_page_sizes * sizeof(struct nvgpu_as_va_region); | 223 | page_sizes * sizeof(struct nvgpu_as_va_region); |
220 | 224 | ||
221 | return 0; | 225 | return 0; |
222 | } | 226 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index f3a333f3..f6c9f901 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -1482,6 +1482,8 @@ static int gk20a_probe(struct platform_device *dev) | |||
1482 | spin_lock_init(&gk20a->debugfs_lock); | 1482 | spin_lock_init(&gk20a->debugfs_lock); |
1483 | gk20a->mm.ltc_enabled = true; | 1483 | gk20a->mm.ltc_enabled = true; |
1484 | gk20a->mm.ltc_enabled_debug = true; | 1484 | gk20a->mm.ltc_enabled_debug = true; |
1485 | gk20a->mm.bypass_smmu = platform->bypass_smmu; | ||
1486 | gk20a->mm.disable_bigpage = platform->disable_bigpage; | ||
1485 | gk20a->debugfs_ltc_enabled = | 1487 | gk20a->debugfs_ltc_enabled = |
1486 | debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR, | 1488 | debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR, |
1487 | platform->debugfs, | 1489 | platform->debugfs, |
@@ -1496,6 +1498,16 @@ static int gk20a_probe(struct platform_device *dev) | |||
1496 | S_IRUGO|S_IWUSR, | 1498 | S_IRUGO|S_IWUSR, |
1497 | platform->debugfs, | 1499 | platform->debugfs, |
1498 | &gk20a->timeouts_enabled); | 1500 | &gk20a->timeouts_enabled); |
1501 | gk20a->debugfs_bypass_smmu = | ||
1502 | debugfs_create_bool("bypass_smmu", | ||
1503 | S_IRUGO|S_IWUSR, | ||
1504 | platform->debugfs, | ||
1505 | &gk20a->mm.bypass_smmu); | ||
1506 | gk20a->debugfs_disable_bigpage = | ||
1507 | debugfs_create_bool("disable_bigpage", | ||
1508 | S_IRUGO|S_IWUSR, | ||
1509 | platform->debugfs, | ||
1510 | &gk20a->mm.disable_bigpage); | ||
1499 | gk20a_pmu_debugfs_init(dev); | 1511 | gk20a_pmu_debugfs_init(dev); |
1500 | gk20a_cde_debugfs_init(dev); | 1512 | gk20a_cde_debugfs_init(dev); |
1501 | #endif | 1513 | #endif |
@@ -1929,9 +1941,14 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) | |||
1929 | gpu->pde_coverage_bit_count = | 1941 | gpu->pde_coverage_bit_count = |
1930 | gk20a_mm_pde_coverage_bit_count(&g->mm.pmu.vm); | 1942 | gk20a_mm_pde_coverage_bit_count(&g->mm.pmu.vm); |
1931 | 1943 | ||
1932 | gpu->available_big_page_sizes = gpu->big_page_size; | 1944 | if (g->mm.disable_bigpage) { |
1933 | if (g->ops.mm.get_big_page_sizes) | 1945 | gpu->big_page_size = 0; |
1934 | gpu->available_big_page_sizes |= g->ops.mm.get_big_page_sizes(); | 1946 | gpu->available_big_page_sizes = 0; |
1947 | } else { | ||
1948 | gpu->available_big_page_sizes = gpu->big_page_size; | ||
1949 | if (g->ops.mm.get_big_page_sizes) | ||
1950 | gpu->available_big_page_sizes |= g->ops.mm.get_big_page_sizes(); | ||
1951 | } | ||
1935 | 1952 | ||
1936 | gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS | 1953 | gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS |
1937 | | NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS; | 1954 | | NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS; |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 7e919e2e..defc5904 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -452,6 +452,8 @@ struct gk20a { | |||
452 | struct dentry *debugfs_ltc_enabled; | 452 | struct dentry *debugfs_ltc_enabled; |
453 | struct dentry *debugfs_timeouts_enabled; | 453 | struct dentry *debugfs_timeouts_enabled; |
454 | struct dentry *debugfs_gr_idle_timeout_default; | 454 | struct dentry *debugfs_gr_idle_timeout_default; |
455 | struct dentry *debugfs_bypass_smmu; | ||
456 | struct dentry *debugfs_disable_bigpage; | ||
455 | #endif | 457 | #endif |
456 | struct gk20a_ctxsw_ucode_info ctxsw_ucode_info; | 458 | struct gk20a_ctxsw_ucode_info ctxsw_ucode_info; |
457 | 459 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 452e064e..37d47c18 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -1274,8 +1274,9 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1274 | int err = 0; | 1274 | int err = 0; |
1275 | struct buffer_attrs bfr = {NULL}; | 1275 | struct buffer_attrs bfr = {NULL}; |
1276 | struct gk20a_comptags comptags; | 1276 | struct gk20a_comptags comptags; |
1277 | u64 buf_addr; | ||
1278 | bool clear_ctags = false; | 1277 | bool clear_ctags = false; |
1278 | struct scatterlist *sgl; | ||
1279 | u64 buf_addr; | ||
1279 | 1280 | ||
1280 | mutex_lock(&vm->update_gmmu_lock); | 1281 | mutex_lock(&vm->update_gmmu_lock); |
1281 | 1282 | ||
@@ -1308,10 +1309,24 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1308 | 1309 | ||
1309 | bfr.kind_v = kind; | 1310 | bfr.kind_v = kind; |
1310 | bfr.size = dmabuf->size; | 1311 | bfr.size = dmabuf->size; |
1312 | sgl = bfr.sgt->sgl; | ||
1311 | buf_addr = (u64)sg_dma_address(bfr.sgt->sgl); | 1313 | buf_addr = (u64)sg_dma_address(bfr.sgt->sgl); |
1312 | if (unlikely(!buf_addr)) | 1314 | if (g->mm.bypass_smmu || buf_addr == DMA_ERROR_CODE || !buf_addr) { |
1313 | buf_addr = (u64)sg_phys(bfr.sgt->sgl); | 1315 | while (sgl) { |
1314 | bfr.align = 1 << __ffs(buf_addr); | 1316 | u64 align; |
1317 | |||
1318 | buf_addr = (u64)sg_phys(sgl); | ||
1319 | |||
1320 | align = 1 << __ffs(buf_addr | (u64)sgl->length); | ||
1321 | if (bfr.align) | ||
1322 | bfr.align = min_t(u64, align, bfr.align); | ||
1323 | else | ||
1324 | bfr.align = align; | ||
1325 | sgl = sg_next(sgl); | ||
1326 | } | ||
1327 | } else | ||
1328 | bfr.align = 1 << __ffs(buf_addr); | ||
1329 | |||
1315 | bfr.pgsz_idx = -1; | 1330 | bfr.pgsz_idx = -1; |
1316 | mapping_size = mapping_size ? mapping_size : bfr.size; | 1331 | mapping_size = mapping_size ? mapping_size : bfr.size; |
1317 | 1332 | ||
@@ -1829,7 +1844,9 @@ static inline u32 small_valid_pde1_bits(u64 pte_addr) | |||
1829 | static int update_gmmu_pde_locked(struct vm_gk20a *vm, | 1844 | static int update_gmmu_pde_locked(struct vm_gk20a *vm, |
1830 | struct gk20a_mm_entry *pte, | 1845 | struct gk20a_mm_entry *pte, |
1831 | u32 i, u32 gmmu_pgsz_idx, | 1846 | u32 i, u32 gmmu_pgsz_idx, |
1832 | u64 iova, | 1847 | struct scatterlist **sgl, |
1848 | u64 *offset, | ||
1849 | u64 *iova, | ||
1833 | u32 kind_v, u32 *ctag, | 1850 | u32 kind_v, u32 *ctag, |
1834 | bool cacheable, bool unammped_pte, | 1851 | bool cacheable, bool unammped_pte, |
1835 | int rw_flag, bool sparse, u32 flags) | 1852 | int rw_flag, bool sparse, u32 flags) |
@@ -1877,7 +1894,9 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm, | |||
1877 | static int update_gmmu_pte_locked(struct vm_gk20a *vm, | 1894 | static int update_gmmu_pte_locked(struct vm_gk20a *vm, |
1878 | struct gk20a_mm_entry *pte, | 1895 | struct gk20a_mm_entry *pte, |
1879 | u32 i, u32 gmmu_pgsz_idx, | 1896 | u32 i, u32 gmmu_pgsz_idx, |
1880 | u64 iova, | 1897 | struct scatterlist **sgl, |
1898 | u64 *offset, | ||
1899 | u64 *iova, | ||
1881 | u32 kind_v, u32 *ctag, | 1900 | u32 kind_v, u32 *ctag, |
1882 | bool cacheable, bool unmapped_pte, | 1901 | bool cacheable, bool unmapped_pte, |
1883 | int rw_flag, bool sparse, u32 flags) | 1902 | int rw_flag, bool sparse, u32 flags) |
@@ -1887,14 +1906,14 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm, | |||
1887 | u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; | 1906 | u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; |
1888 | u32 pte_w[2] = {0, 0}; /* invalid pte */ | 1907 | u32 pte_w[2] = {0, 0}; /* invalid pte */ |
1889 | 1908 | ||
1890 | if (iova) { | 1909 | if (*iova) { |
1891 | if (unmapped_pte) | 1910 | if (unmapped_pte) |
1892 | pte_w[0] = gmmu_pte_valid_false_f() | | 1911 | pte_w[0] = gmmu_pte_valid_false_f() | |
1893 | gmmu_pte_address_sys_f(iova | 1912 | gmmu_pte_address_sys_f(*iova |
1894 | >> gmmu_pte_address_shift_v()); | 1913 | >> gmmu_pte_address_shift_v()); |
1895 | else | 1914 | else |
1896 | pte_w[0] = gmmu_pte_valid_true_f() | | 1915 | pte_w[0] = gmmu_pte_valid_true_f() | |
1897 | gmmu_pte_address_sys_f(iova | 1916 | gmmu_pte_address_sys_f(*iova |
1898 | >> gmmu_pte_address_shift_v()); | 1917 | >> gmmu_pte_address_shift_v()); |
1899 | 1918 | ||
1900 | pte_w[1] = gmmu_pte_aperture_video_memory_f() | | 1919 | pte_w[1] = gmmu_pte_aperture_video_memory_f() | |
@@ -1925,7 +1944,7 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm, | |||
1925 | 1944 | ||
1926 | gk20a_dbg(gpu_dbg_pte, | 1945 | gk20a_dbg(gpu_dbg_pte, |
1927 | "pte=%d iova=0x%llx kind=%d ctag=%d vol=%d [0x%08x, 0x%08x]", | 1946 | "pte=%d iova=0x%llx kind=%d ctag=%d vol=%d [0x%08x, 0x%08x]", |
1928 | i, iova, | 1947 | i, *iova, |
1929 | kind_v, *ctag / ctag_granularity, !cacheable, | 1948 | kind_v, *ctag / ctag_granularity, !cacheable, |
1930 | pte_w[1], pte_w[0]); | 1949 | pte_w[1], pte_w[0]); |
1931 | 1950 | ||
@@ -1941,13 +1960,33 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm, | |||
1941 | gk20a_mem_wr32(pte->cpu_va + i*8, 0, pte_w[0]); | 1960 | gk20a_mem_wr32(pte->cpu_va + i*8, 0, pte_w[0]); |
1942 | gk20a_mem_wr32(pte->cpu_va + i*8, 1, pte_w[1]); | 1961 | gk20a_mem_wr32(pte->cpu_va + i*8, 1, pte_w[1]); |
1943 | 1962 | ||
1963 | if (*iova) { | ||
1964 | *iova += page_size; | ||
1965 | *offset += page_size; | ||
1966 | if (*sgl && *offset + page_size > (*sgl)->length) { | ||
1967 | u64 new_iova; | ||
1968 | *sgl = sg_next(*sgl); | ||
1969 | if (*sgl) { | ||
1970 | new_iova = sg_phys(*sgl); | ||
1971 | gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d", | ||
1972 | new_iova, (*sgl)->length); | ||
1973 | if (new_iova) { | ||
1974 | *offset = 0; | ||
1975 | *iova = new_iova; | ||
1976 | } | ||
1977 | } | ||
1978 | } | ||
1979 | } | ||
1980 | |||
1944 | return 0; | 1981 | return 0; |
1945 | } | 1982 | } |
1946 | 1983 | ||
1947 | static int update_gmmu_level_locked(struct vm_gk20a *vm, | 1984 | static int update_gmmu_level_locked(struct vm_gk20a *vm, |
1948 | struct gk20a_mm_entry *pte, | 1985 | struct gk20a_mm_entry *pte, |
1949 | enum gmmu_pgsz_gk20a pgsz_idx, | 1986 | enum gmmu_pgsz_gk20a pgsz_idx, |
1950 | u64 iova, | 1987 | struct scatterlist **sgl, |
1988 | u64 *offset, | ||
1989 | u64 *iova, | ||
1951 | u64 gpu_va, u64 gpu_end, | 1990 | u64 gpu_va, u64 gpu_end, |
1952 | u8 kind_v, u32 *ctag, | 1991 | u8 kind_v, u32 *ctag, |
1953 | bool cacheable, bool unmapped_pte, | 1992 | bool cacheable, bool unmapped_pte, |
@@ -1968,7 +2007,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm, | |||
1968 | >> (u64)l->lo_bit[pgsz_idx]; | 2007 | >> (u64)l->lo_bit[pgsz_idx]; |
1969 | 2008 | ||
1970 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, l: %d, [%llx,%llx], iova=%llx", | 2009 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, l: %d, [%llx,%llx], iova=%llx", |
1971 | pgsz_idx, lvl, gpu_va, gpu_end-1, iova); | 2010 | pgsz_idx, lvl, gpu_va, gpu_end-1, *iova); |
1972 | 2011 | ||
1973 | while (gpu_va < gpu_end) { | 2012 | while (gpu_va < gpu_end) { |
1974 | struct gk20a_mm_entry *next_pte = NULL; | 2013 | struct gk20a_mm_entry *next_pte = NULL; |
@@ -2000,7 +2039,8 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm, | |||
2000 | } | 2039 | } |
2001 | 2040 | ||
2002 | err = l->update_entry(vm, pte, pde_i, pgsz_idx, | 2041 | err = l->update_entry(vm, pte, pde_i, pgsz_idx, |
2003 | iova, kind_v, ctag, cacheable, unmapped_pte, | 2042 | sgl, offset, iova, |
2043 | kind_v, ctag, cacheable, unmapped_pte, | ||
2004 | rw_flag, sparse, flags); | 2044 | rw_flag, sparse, flags); |
2005 | if (err) | 2045 | if (err) |
2006 | return err; | 2046 | return err; |
@@ -2016,6 +2056,8 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm, | |||
2016 | } | 2056 | } |
2017 | err = update_gmmu_level_locked(vm, next_pte, | 2057 | err = update_gmmu_level_locked(vm, next_pte, |
2018 | pgsz_idx, | 2058 | pgsz_idx, |
2059 | sgl, | ||
2060 | offset, | ||
2019 | iova, | 2061 | iova, |
2020 | gpu_va, | 2062 | gpu_va, |
2021 | next, | 2063 | next, |
@@ -2027,8 +2069,6 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm, | |||
2027 | return err; | 2069 | return err; |
2028 | } | 2070 | } |
2029 | 2071 | ||
2030 | if (iova) | ||
2031 | iova += next - gpu_va; | ||
2032 | pde_i++; | 2072 | pde_i++; |
2033 | gpu_va = next; | 2073 | gpu_va = next; |
2034 | } | 2074 | } |
@@ -2056,18 +2096,39 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
2056 | u64 space_to_skip = buffer_offset; | 2096 | u64 space_to_skip = buffer_offset; |
2057 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; | 2097 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; |
2058 | int err; | 2098 | int err; |
2099 | struct scatterlist *sgl = NULL; | ||
2059 | 2100 | ||
2060 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, iova=%llx", | 2101 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, iova=%llx, buffer offset %lld, nents %d", |
2061 | pgsz_idx, | 2102 | pgsz_idx, |
2062 | sgt ? g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, flags) | 2103 | sgt ? g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, flags) |
2063 | : 0ULL); | 2104 | : 0ULL, |
2105 | buffer_offset, | ||
2106 | sgt ? sgt->nents : 0); | ||
2064 | 2107 | ||
2065 | if (space_to_skip & (page_size - 1)) | 2108 | if (space_to_skip & (page_size - 1)) |
2066 | return -EINVAL; | 2109 | return -EINVAL; |
2067 | 2110 | ||
2068 | if (sgt) | 2111 | if (sgt) { |
2069 | iova = g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, flags) | 2112 | iova = g->ops.mm.get_iova_addr(vm->mm->g, sgt->sgl, flags); |
2070 | + space_to_skip; | 2113 | if (!vm->mm->bypass_smmu && iova) { |
2114 | iova += space_to_skip; | ||
2115 | } else { | ||
2116 | sgl = sgt->sgl; | ||
2117 | |||
2118 | gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d", | ||
2119 | (u64)sg_phys(sgl), | ||
2120 | sgl->length); | ||
2121 | while (space_to_skip && sgl && | ||
2122 | space_to_skip + page_size > sgl->length) { | ||
2123 | space_to_skip -= sgl->length; | ||
2124 | sgl = sg_next(sgl); | ||
2125 | gk20a_dbg(gpu_dbg_pte, "chunk address %llx, size %d", | ||
2126 | (u64)sg_phys(sgl), | ||
2127 | sgl->length); | ||
2128 | } | ||
2129 | iova = sg_phys(sgl) + space_to_skip; | ||
2130 | } | ||
2131 | } | ||
2071 | 2132 | ||
2072 | gk20a_dbg(gpu_dbg_map, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx", | 2133 | gk20a_dbg(gpu_dbg_map, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx", |
2073 | pgsz_idx, gpu_va, gpu_end-1, iova); | 2134 | pgsz_idx, gpu_va, gpu_end-1, iova); |
@@ -2079,7 +2140,9 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
2079 | return err; | 2140 | return err; |
2080 | } | 2141 | } |
2081 | err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx, | 2142 | err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx, |
2082 | iova, | 2143 | &sgl, |
2144 | &space_to_skip, | ||
2145 | &iova, | ||
2083 | gpu_va, gpu_end, | 2146 | gpu_va, gpu_end, |
2084 | kind_v, &ctag, | 2147 | kind_v, &ctag, |
2085 | cacheable, unmapped_pte, rw_flag, sparse, 0, flags); | 2148 | cacheable, unmapped_pte, rw_flag, sparse, 0, flags); |
@@ -2370,15 +2433,16 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size) | |||
2370 | 2433 | ||
2371 | gk20a_dbg_fn(""); | 2434 | gk20a_dbg_fn(""); |
2372 | 2435 | ||
2373 | if (big_page_size == 0) | 2436 | if (big_page_size == 0) { |
2374 | big_page_size = | 2437 | big_page_size = |
2375 | gk20a_get_platform(g->dev)->default_big_page_size; | 2438 | gk20a_get_platform(g->dev)->default_big_page_size; |
2439 | } else { | ||
2440 | if (!is_power_of_2(big_page_size)) | ||
2441 | return -EINVAL; | ||
2376 | 2442 | ||
2377 | if (!is_power_of_2(big_page_size)) | 2443 | if (!(big_page_size & g->gpu_characteristics.available_big_page_sizes)) |
2378 | return -EINVAL; | 2444 | return -EINVAL; |
2379 | 2445 | } | |
2380 | if (!(big_page_size & g->gpu_characteristics.available_big_page_sizes)) | ||
2381 | return -EINVAL; | ||
2382 | 2446 | ||
2383 | vm = kzalloc(sizeof(*vm), GFP_KERNEL); | 2447 | vm = kzalloc(sizeof(*vm), GFP_KERNEL); |
2384 | if (!vm) | 2448 | if (!vm) |
@@ -2391,7 +2455,7 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size) | |||
2391 | snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id); | 2455 | snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id); |
2392 | 2456 | ||
2393 | err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10, | 2457 | err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10, |
2394 | mm->channel.size, true, name); | 2458 | mm->channel.size, !mm->disable_bigpage, name); |
2395 | 2459 | ||
2396 | return err; | 2460 | return err; |
2397 | } | 2461 | } |
@@ -2417,27 +2481,28 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
2417 | struct nvgpu_as_alloc_space_args *args) | 2481 | struct nvgpu_as_alloc_space_args *args) |
2418 | 2482 | ||
2419 | { int err = -ENOMEM; | 2483 | { int err = -ENOMEM; |
2420 | int pgsz_idx; | 2484 | int pgsz_idx = gmmu_page_size_small; |
2421 | u32 start_page_nr; | 2485 | u32 start_page_nr; |
2422 | struct gk20a_allocator *vma; | 2486 | struct gk20a_allocator *vma; |
2423 | struct vm_gk20a *vm = as_share->vm; | 2487 | struct vm_gk20a *vm = as_share->vm; |
2424 | struct gk20a *g = vm->mm->g; | 2488 | struct gk20a *g = vm->mm->g; |
2425 | struct vm_reserved_va_node *va_node; | 2489 | struct vm_reserved_va_node *va_node; |
2426 | u64 vaddr_start = 0; | 2490 | u64 vaddr_start = 0; |
2491 | int page_sizes = gmmu_nr_page_sizes; | ||
2427 | 2492 | ||
2428 | gk20a_dbg_fn("flags=0x%x pgsz=0x%x nr_pages=0x%x o/a=0x%llx", | 2493 | gk20a_dbg_fn("flags=0x%x pgsz=0x%x nr_pages=0x%x o/a=0x%llx", |
2429 | args->flags, args->page_size, args->pages, | 2494 | args->flags, args->page_size, args->pages, |
2430 | args->o_a.offset); | 2495 | args->o_a.offset); |
2431 | 2496 | ||
2432 | /* determine pagesz idx */ | 2497 | if (!vm->big_pages) |
2433 | for (pgsz_idx = gmmu_page_size_small; | 2498 | page_sizes--; |
2434 | pgsz_idx < gmmu_nr_page_sizes; | 2499 | |
2435 | pgsz_idx++) { | 2500 | for (; pgsz_idx < page_sizes; pgsz_idx++) { |
2436 | if (vm->gmmu_page_sizes[pgsz_idx] == args->page_size) | 2501 | if (vm->gmmu_page_sizes[pgsz_idx] == args->page_size) |
2437 | break; | 2502 | break; |
2438 | } | 2503 | } |
2439 | 2504 | ||
2440 | if (pgsz_idx >= gmmu_nr_page_sizes) { | 2505 | if (pgsz_idx >= page_sizes) { |
2441 | err = -EINVAL; | 2506 | err = -EINVAL; |
2442 | goto clean_up; | 2507 | goto clean_up; |
2443 | } | 2508 | } |
@@ -2720,7 +2785,8 @@ int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset) | |||
2720 | 2785 | ||
2721 | void gk20a_deinit_vm(struct vm_gk20a *vm) | 2786 | void gk20a_deinit_vm(struct vm_gk20a *vm) |
2722 | { | 2787 | { |
2723 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); | 2788 | if (vm->big_pages) |
2789 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); | ||
2724 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); | 2790 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); |
2725 | 2791 | ||
2726 | gk20a_vm_free_entries(vm, &vm->pdb, 0); | 2792 | gk20a_vm_free_entries(vm, &vm->pdb, 0); |
@@ -2731,12 +2797,15 @@ int gk20a_alloc_inst_block(struct gk20a *g, struct mem_desc *inst_block) | |||
2731 | struct device *dev = dev_from_gk20a(g); | 2797 | struct device *dev = dev_from_gk20a(g); |
2732 | int err; | 2798 | int err; |
2733 | 2799 | ||
2800 | gk20a_dbg_fn(""); | ||
2801 | |||
2734 | err = gk20a_gmmu_alloc(g, ram_in_alloc_size_v(), inst_block); | 2802 | err = gk20a_gmmu_alloc(g, ram_in_alloc_size_v(), inst_block); |
2735 | if (err) { | 2803 | if (err) { |
2736 | gk20a_err(dev, "%s: memory allocation failed\n", __func__); | 2804 | gk20a_err(dev, "%s: memory allocation failed\n", __func__); |
2737 | return err; | 2805 | return err; |
2738 | } | 2806 | } |
2739 | 2807 | ||
2808 | gk20a_dbg_fn("done"); | ||
2740 | return 0; | 2809 | return 0; |
2741 | } | 2810 | } |
2742 | 2811 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index f6806309..895e52ff 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -210,7 +210,9 @@ struct gk20a_mmu_level { | |||
210 | int (*update_entry)(struct vm_gk20a *vm, | 210 | int (*update_entry)(struct vm_gk20a *vm, |
211 | struct gk20a_mm_entry *pte, | 211 | struct gk20a_mm_entry *pte, |
212 | u32 i, u32 gmmu_pgsz_idx, | 212 | u32 i, u32 gmmu_pgsz_idx, |
213 | u64 iova, | 213 | struct scatterlist **sgl, |
214 | u64 *offset, | ||
215 | u64 *iova, | ||
214 | u32 kind_v, u32 *ctag, | 216 | u32 kind_v, u32 *ctag, |
215 | bool cacheable, bool unmapped_pte, | 217 | bool cacheable, bool unmapped_pte, |
216 | int rw_flag, bool sparse, u32 flags); | 218 | int rw_flag, bool sparse, u32 flags); |
@@ -303,6 +305,8 @@ struct mm_gk20a { | |||
303 | #ifdef CONFIG_DEBUG_FS | 305 | #ifdef CONFIG_DEBUG_FS |
304 | u32 ltc_enabled; | 306 | u32 ltc_enabled; |
305 | u32 ltc_enabled_debug; | 307 | u32 ltc_enabled_debug; |
308 | u32 bypass_smmu; | ||
309 | u32 disable_bigpage; | ||
306 | #endif | 310 | #endif |
307 | }; | 311 | }; |
308 | 312 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h index f4301dab..f142cb9f 100644 --- a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h | |||
@@ -76,6 +76,12 @@ struct gk20a_platform { | |||
76 | /* Adaptative ELPG: true = enable flase = disable */ | 76 | /* Adaptative ELPG: true = enable flase = disable */ |
77 | bool enable_aelpg; | 77 | bool enable_aelpg; |
78 | 78 | ||
79 | /* Enable SMMU bypass by default */ | ||
80 | bool bypass_smmu; | ||
81 | |||
82 | /* Disable big page support */ | ||
83 | bool disable_bigpage; | ||
84 | |||
79 | /* | 85 | /* |
80 | * gk20a_do_idle() API can take GPU either into rail gate or CAR reset | 86 | * gk20a_do_idle() API can take GPU either into rail gate or CAR reset |
81 | * This flag can be used to force CAR reset case instead of rail gate | 87 | * This flag can be used to force CAR reset case instead of rail gate |