summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2014-10-03 00:32:19 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:11:46 -0400
commitecc6f27fd13e7560d124faf67d114b93d47b73de (patch)
tree4d0d065b630976db87f21ee99a63f1477499fbd5 /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent5200902f57d0223e30dfce548355b5fe06a25203 (diff)
gpu: nvgpu: Common VM initializer
Merge initialization code from gk20a_init_system_vm(), gk20a_init_bar1_vm() and gk20a_vm_alloc_share() into gk20a_init_vm(). Remove redundant page size data, and move the page size fields to be VM specific. Bug 1558739 Bug 1560370 Change-Id: I4557d9e04d65ccb48fe1f2b116dd1bfa74cae98e Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c528
1 files changed, 228 insertions, 300 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index b6a556ac..70f4294b 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -98,15 +98,9 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
98 u64 first_vaddr, u64 last_vaddr, 98 u64 first_vaddr, u64 last_vaddr,
99 u8 kind_v, u32 ctag_offset, bool cacheable, 99 u8 kind_v, u32 ctag_offset, bool cacheable,
100 int rw_flag); 100 int rw_flag);
101static int gk20a_init_system_vm(struct mm_gk20a *mm); 101static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm);
102static int gk20a_init_bar1_vm(struct mm_gk20a *mm); 102static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm);
103 103
104/* note: keep the page sizes sorted lowest to highest here */
105static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K };
106static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 };
107static const u64 gmmu_page_offset_masks[gmmu_nr_page_sizes] = { 0xfffLL,
108 0x1ffffLL };
109static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL };
110 104
111struct gk20a_dmabuf_priv { 105struct gk20a_dmabuf_priv {
112 struct mutex lock; 106 struct mutex lock;
@@ -290,7 +284,7 @@ void gk20a_remove_mm_support(struct mm_gk20a *mm)
290int gk20a_init_mm_setup_sw(struct gk20a *g) 284int gk20a_init_mm_setup_sw(struct gk20a *g)
291{ 285{
292 struct mm_gk20a *mm = &g->mm; 286 struct mm_gk20a *mm = &g->mm;
293 int i; 287 int err;
294 288
295 gk20a_dbg_fn(""); 289 gk20a_dbg_fn("");
296 290
@@ -301,49 +295,19 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
301 295
302 mm->g = g; 296 mm->g = g;
303 mutex_init(&mm->l2_op_lock); 297 mutex_init(&mm->l2_op_lock);
304 mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];
305 mm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big];
306 mm->pde_stride = mm->big_page_size << 10;
307 mm->pde_stride_shift = ilog2(mm->pde_stride);
308 BUG_ON(mm->pde_stride_shift > 31); /* we have assumptions about this */
309
310 for (i = 0; i < ARRAY_SIZE(gmmu_page_sizes); i++) {
311
312 u32 num_ptes, pte_space, num_pages;
313
314 /* assuming "full" page tables */
315 num_ptes = mm->pde_stride / gmmu_page_sizes[i];
316
317 pte_space = num_ptes * gmmu_pte__size_v();
318 /* allocate whole pages */
319 pte_space = roundup(pte_space, PAGE_SIZE);
320
321 num_pages = pte_space / PAGE_SIZE;
322 /* make sure "order" is viable */
323 BUG_ON(!is_power_of_2(num_pages));
324
325 mm->page_table_sizing[i].num_ptes = num_ptes;
326 mm->page_table_sizing[i].order = ilog2(num_pages);
327 }
328 298
329 /*TBD: make channel vm size configurable */ 299 /*TBD: make channel vm size configurable */
330 mm->channel.size = 1ULL << NV_GMMU_VA_RANGE; 300 mm->channel.size = 1ULL << NV_GMMU_VA_RANGE;
331 301
332 gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20)); 302 gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20));
333 303
334 gk20a_dbg_info("small page-size (%dKB) pte array: %dKB", 304 err = gk20a_init_bar1_vm(mm);
335 gmmu_page_sizes[gmmu_page_size_small] >> 10, 305 if (err)
336 (mm->page_table_sizing[gmmu_page_size_small].num_ptes * 306 return err;
337 gmmu_pte__size_v()) >> 10);
338
339 gk20a_dbg_info("big page-size (%dKB) pte array: %dKB",
340 gmmu_page_sizes[gmmu_page_size_big] >> 10,
341 (mm->page_table_sizing[gmmu_page_size_big].num_ptes *
342 gmmu_pte__size_v()) >> 10);
343
344 307
345 gk20a_init_bar1_vm(mm); 308 err = gk20a_init_system_vm(mm);
346 gk20a_init_system_vm(mm); 309 if (err)
310 return err;
347 311
348 /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */ 312 /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */
349 g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share; 313 g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share;
@@ -368,12 +332,9 @@ static int gk20a_init_mm_setup_hw(struct gk20a *g)
368 { 332 {
369 u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r()); 333 u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r());
370 334
371 if (gmmu_page_sizes[gmmu_page_size_big] == SZ_128K) 335 fb_mmu_ctrl = (fb_mmu_ctrl &
372 fb_mmu_ctrl = (fb_mmu_ctrl & 336 ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) |
373 ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) | 337 fb_mmu_ctrl_vm_pg_size_128kb_f();
374 fb_mmu_ctrl_vm_pg_size_128kb_f();
375 else
376 BUG_ON(1); /* no support/testing for larger ones yet */
377 338
378 gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl); 339 gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl);
379 } 340 }
@@ -633,7 +594,7 @@ int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,
633 gk20a_dbg_fn(""); 594 gk20a_dbg_fn("");
634 595
635 /* allocate enough pages for the table */ 596 /* allocate enough pages for the table */
636 pte_order = vm->mm->page_table_sizing[gmmu_pgsz_idx].order; 597 pte_order = vm->page_table_sizing[gmmu_pgsz_idx].order;
637 598
638 err = alloc_gmmu_pages(vm, pte_order, &handle, &sgt, &size); 599 err = alloc_gmmu_pages(vm, pte_order, &handle, &sgt, &size);
639 if (err) 600 if (err)
@@ -654,10 +615,10 @@ void pde_range_from_vaddr_range(struct vm_gk20a *vm,
654 u64 addr_lo, u64 addr_hi, 615 u64 addr_lo, u64 addr_hi,
655 u32 *pde_lo, u32 *pde_hi) 616 u32 *pde_lo, u32 *pde_hi)
656{ 617{
657 *pde_lo = (u32)(addr_lo >> vm->mm->pde_stride_shift); 618 *pde_lo = (u32)(addr_lo >> vm->pde_stride_shift);
658 *pde_hi = (u32)(addr_hi >> vm->mm->pde_stride_shift); 619 *pde_hi = (u32)(addr_hi >> vm->pde_stride_shift);
659 gk20a_dbg(gpu_dbg_pte, "addr_lo=0x%llx addr_hi=0x%llx pde_ss=%d", 620 gk20a_dbg(gpu_dbg_pte, "addr_lo=0x%llx addr_hi=0x%llx pde_ss=%d",
660 addr_lo, addr_hi, vm->mm->pde_stride_shift); 621 addr_lo, addr_hi, vm->pde_stride_shift);
661 gk20a_dbg(gpu_dbg_pte, "pde_lo=%d pde_hi=%d", 622 gk20a_dbg(gpu_dbg_pte, "pde_lo=%d pde_hi=%d",
662 *pde_lo, *pde_hi); 623 *pde_lo, *pde_hi);
663} 624}
@@ -672,10 +633,11 @@ u32 pte_index_from_vaddr(struct vm_gk20a *vm,
672{ 633{
673 u32 ret; 634 u32 ret;
674 /* mask off pde part */ 635 /* mask off pde part */
675 addr = addr & ((((u64)1) << vm->mm->pde_stride_shift) - ((u64)1)); 636 addr = addr & ((((u64)1) << vm->pde_stride_shift) - ((u64)1));
637
676 /* shift over to get pte index. note assumption that pte index 638 /* shift over to get pte index. note assumption that pte index
677 * doesn't leak over into the high 32b */ 639 * doesn't leak over into the high 32b */
678 ret = (u32)(addr >> gmmu_page_shifts[pgsz_idx]); 640 ret = (u32)(addr >> ilog2(vm->gmmu_page_sizes[pgsz_idx]));
679 641
680 gk20a_dbg(gpu_dbg_pte, "addr=0x%llx pte_i=0x%x", addr, ret); 642 gk20a_dbg(gpu_dbg_pte, "addr=0x%llx pte_i=0x%x", addr, ret);
681 return ret; 643 return ret;
@@ -715,7 +677,7 @@ int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm,
715 return 0; 677 return 0;
716 678
717 gk20a_dbg(gpu_dbg_pte, "alloc %dKB ptes for pde %d", 679 gk20a_dbg(gpu_dbg_pte, "alloc %dKB ptes for pde %d",
718 gmmu_page_sizes[gmmu_pgsz_idx]/1024, i); 680 vm->gmmu_page_sizes[gmmu_pgsz_idx]/1024, i);
719 681
720 err = zalloc_gmmu_page_table_gk20a(vm, gmmu_pgsz_idx, pte); 682 err = zalloc_gmmu_page_table_gk20a(vm, gmmu_pgsz_idx, pte);
721 if (err) 683 if (err)
@@ -854,18 +816,18 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
854 int err; 816 int err;
855 u64 offset; 817 u64 offset;
856 u32 start_page_nr = 0, num_pages; 818 u32 start_page_nr = 0, num_pages;
857 u64 gmmu_page_size = gmmu_page_sizes[gmmu_pgsz_idx]; 819 u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx];
858 820
859 if (gmmu_pgsz_idx >= ARRAY_SIZE(gmmu_page_sizes)) { 821 if (gmmu_pgsz_idx >= gmmu_nr_page_sizes) {
860 dev_warn(dev_from_vm(vm), 822 dev_warn(dev_from_vm(vm),
861 "invalid page size requested in gk20a vm alloc"); 823 "invalid page size requested in gk20a vm alloc");
862 return -EINVAL; 824 return 0;
863 } 825 }
864 826
865 if ((gmmu_pgsz_idx == gmmu_page_size_big) && !vm->big_pages) { 827 if ((gmmu_pgsz_idx == gmmu_page_size_big) && !vm->big_pages) {
866 dev_warn(dev_from_vm(vm), 828 dev_warn(dev_from_vm(vm),
867 "unsupportd page size requested"); 829 "unsupportd page size requested");
868 return -EINVAL; 830 return 0;
869 831
870 } 832 }
871 833
@@ -874,10 +836,10 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
874 size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1); 836 size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1);
875 837
876 gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, 838 gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size,
877 gmmu_page_sizes[gmmu_pgsz_idx]>>10); 839 vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10);
878 840
879 /* The vma allocator represents page accounting. */ 841 /* The vma allocator represents page accounting. */
880 num_pages = size >> gmmu_page_shifts[gmmu_pgsz_idx]; 842 num_pages = size >> ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]);
881 843
882 err = vma->alloc(vma, &start_page_nr, num_pages); 844 err = vma->alloc(vma, &start_page_nr, num_pages);
883 845
@@ -887,7 +849,8 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
887 return 0; 849 return 0;
888 } 850 }
889 851
890 offset = (u64)start_page_nr << gmmu_page_shifts[gmmu_pgsz_idx]; 852 offset = (u64)start_page_nr <<
853 ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]);
891 gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset); 854 gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset);
892 855
893 return offset; 856 return offset;
@@ -898,8 +861,8 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
898 enum gmmu_pgsz_gk20a pgsz_idx) 861 enum gmmu_pgsz_gk20a pgsz_idx)
899{ 862{
900 struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; 863 struct gk20a_allocator *vma = &vm->vma[pgsz_idx];
901 u32 page_size = gmmu_page_sizes[pgsz_idx]; 864 u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
902 u32 page_shift = gmmu_page_shifts[pgsz_idx]; 865 u32 page_shift = ilog2(page_size);
903 u32 start_page_nr, num_pages; 866 u32 start_page_nr, num_pages;
904 int err; 867 int err;
905 868
@@ -1011,26 +974,25 @@ struct buffer_attrs {
1011 u8 uc_kind_v; 974 u8 uc_kind_v;
1012}; 975};
1013 976
1014static void gmmu_select_page_size(struct buffer_attrs *bfr) 977static void gmmu_select_page_size(struct vm_gk20a *vm,
978 struct buffer_attrs *bfr)
1015{ 979{
1016 int i; 980 int i;
1017 /* choose the biggest first (top->bottom) */ 981 /* choose the biggest first (top->bottom) */
1018 for (i = (gmmu_nr_page_sizes-1); i >= 0; i--) 982 for (i = gmmu_nr_page_sizes-1; i >= 0; i--)
1019 if (!(gmmu_page_offset_masks[i] & bfr->align)) { 983 if (!((vm->gmmu_page_sizes[i] - 1) & bfr->align)) {
1020 /* would like to add this too but nvmap returns the
1021 * original requested size not the allocated size.
1022 * (!(gmmu_page_offset_masks[i] & bfr->size)) */
1023 bfr->pgsz_idx = i; 984 bfr->pgsz_idx = i;
1024 break; 985 break;
1025 } 986 }
1026} 987}
1027 988
1028static int setup_buffer_kind_and_compression(struct device *d, 989static int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
1029 u32 flags, 990 u32 flags,
1030 struct buffer_attrs *bfr, 991 struct buffer_attrs *bfr,
1031 enum gmmu_pgsz_gk20a pgsz_idx) 992 enum gmmu_pgsz_gk20a pgsz_idx)
1032{ 993{
1033 bool kind_compressible; 994 bool kind_compressible;
995 struct device *d = dev_from_gk20a(vm->mm->g);
1034 996
1035 if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v())) 997 if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v()))
1036 bfr->kind_v = gmmu_pte_kind_pitch_v(); 998 bfr->kind_v = gmmu_pte_kind_pitch_v();
@@ -1055,7 +1017,7 @@ static int setup_buffer_kind_and_compression(struct device *d,
1055 } 1017 }
1056 /* comptags only supported for suitable kinds, 128KB pagesize */ 1018 /* comptags only supported for suitable kinds, 128KB pagesize */
1057 if (unlikely(kind_compressible && 1019 if (unlikely(kind_compressible &&
1058 (gmmu_page_sizes[pgsz_idx] != 128*1024))) { 1020 (vm->gmmu_page_sizes[pgsz_idx] != 128*1024))) {
1059 /* 1021 /*
1060 gk20a_warn(d, "comptags specified" 1022 gk20a_warn(d, "comptags specified"
1061 " but pagesize being used doesn't support it");*/ 1023 " but pagesize being used doesn't support it");*/
@@ -1088,7 +1050,7 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
1088 return -EINVAL; 1050 return -EINVAL;
1089 } 1051 }
1090 1052
1091 if (map_offset & gmmu_page_offset_masks[bfr->pgsz_idx]) { 1053 if (map_offset & (vm->gmmu_page_sizes[bfr->pgsz_idx] - 1)) {
1092 gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx", 1054 gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx",
1093 map_offset); 1055 map_offset);
1094 return -EINVAL; 1056 return -EINVAL;
@@ -1381,7 +1343,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1381 gmmu_page_size_big : gmmu_page_size_small; 1343 gmmu_page_size_big : gmmu_page_size_small;
1382 } else { 1344 } else {
1383 if (vm->big_pages) 1345 if (vm->big_pages)
1384 gmmu_select_page_size(&bfr); 1346 gmmu_select_page_size(vm, &bfr);
1385 else 1347 else
1386 bfr.pgsz_idx = gmmu_page_size_small; 1348 bfr.pgsz_idx = gmmu_page_size_small;
1387 } 1349 }
@@ -1398,7 +1360,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1398 err = -EINVAL; 1360 err = -EINVAL;
1399 goto clean_up; 1361 goto clean_up;
1400 } 1362 }
1401 gmmu_page_size = gmmu_page_sizes[bfr.pgsz_idx]; 1363 gmmu_page_size = vm->gmmu_page_sizes[bfr.pgsz_idx];
1402 1364
1403 /* Check if we should use a fixed offset for mapping this buffer */ 1365 /* Check if we should use a fixed offset for mapping this buffer */
1404 1366
@@ -1416,7 +1378,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1416 if (sgt) 1378 if (sgt)
1417 *sgt = bfr.sgt; 1379 *sgt = bfr.sgt;
1418 1380
1419 err = setup_buffer_kind_and_compression(d, flags, &bfr, bfr.pgsz_idx); 1381 err = setup_buffer_kind_and_compression(vm, flags, &bfr, bfr.pgsz_idx);
1420 if (unlikely(err)) { 1382 if (unlikely(err)) {
1421 gk20a_err(d, "failure setting up kind and compression"); 1383 gk20a_err(d, "failure setting up kind and compression");
1422 goto clean_up; 1384 goto clean_up;
@@ -1729,7 +1691,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1729 u32 pte_w[2] = {0, 0}; /* invalid pte */ 1691 u32 pte_w[2] = {0, 0}; /* invalid pte */
1730 u32 ctag = ctag_offset; 1692 u32 ctag = ctag_offset;
1731 u32 ctag_incr; 1693 u32 ctag_incr;
1732 u32 page_size = gmmu_page_sizes[pgsz_idx]; 1694 u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
1733 u64 addr = 0; 1695 u64 addr = 0;
1734 u64 space_to_skip = buffer_offset; 1696 u64 space_to_skip = buffer_offset;
1735 1697
@@ -1783,7 +1745,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1783 pte_lo = 0; 1745 pte_lo = 0;
1784 1746
1785 if ((pde_i != pde_hi) && (pde_hi != pde_lo)) 1747 if ((pde_i != pde_hi) && (pde_hi != pde_lo))
1786 pte_hi = vm->mm->page_table_sizing[pgsz_idx].num_ptes-1; 1748 pte_hi = vm->page_table_sizing[pgsz_idx].num_ptes-1;
1787 else 1749 else
1788 pte_hi = pte_index_from_vaddr(vm, last_vaddr, 1750 pte_hi = pte_index_from_vaddr(vm, last_vaddr,
1789 pgsz_idx); 1751 pgsz_idx);
@@ -1863,7 +1825,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1863 * free/alloc/free/alloc. 1825 * free/alloc/free/alloc.
1864 */ 1826 */
1865 free_gmmu_pages(vm, pte->ref, pte->sgt, 1827 free_gmmu_pages(vm, pte->ref, pte->sgt,
1866 vm->mm->page_table_sizing[pgsz_idx].order, 1828 vm->page_table_sizing[pgsz_idx].order,
1867 pte->size); 1829 pte->size);
1868 pte->ref = NULL; 1830 pte->ref = NULL;
1869 1831
@@ -1973,7 +1935,7 @@ static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
1973{ 1935{
1974 struct mm_gk20a *mm = vm->mm; 1936 struct mm_gk20a *mm = vm->mm;
1975 struct gk20a *g = mm->g; 1937 struct gk20a *g = mm->g;
1976 u32 pgsz = gmmu_page_sizes[pgsz_idx]; 1938 u32 pgsz = vm->gmmu_page_sizes[pgsz_idx];
1977 u32 i; 1939 u32 i;
1978 dma_addr_t iova; 1940 dma_addr_t iova;
1979 1941
@@ -1981,7 +1943,7 @@ static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
1981 if (!vm->zero_page_cpuva) { 1943 if (!vm->zero_page_cpuva) {
1982 int err = 0; 1944 int err = 0;
1983 vm->zero_page_cpuva = dma_alloc_coherent(&g->dev->dev, 1945 vm->zero_page_cpuva = dma_alloc_coherent(&g->dev->dev,
1984 mm->big_page_size, 1946 vm->big_page_size,
1985 &iova, 1947 &iova,
1986 GFP_KERNEL); 1948 GFP_KERNEL);
1987 if (!vm->zero_page_cpuva) { 1949 if (!vm->zero_page_cpuva) {
@@ -1992,9 +1954,9 @@ static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
1992 vm->zero_page_iova = iova; 1954 vm->zero_page_iova = iova;
1993 err = gk20a_get_sgtable(&g->dev->dev, &vm->zero_page_sgt, 1955 err = gk20a_get_sgtable(&g->dev->dev, &vm->zero_page_sgt,
1994 vm->zero_page_cpuva, vm->zero_page_iova, 1956 vm->zero_page_cpuva, vm->zero_page_iova,
1995 mm->big_page_size); 1957 vm->big_page_size);
1996 if (err) { 1958 if (err) {
1997 dma_free_coherent(&g->dev->dev, mm->big_page_size, 1959 dma_free_coherent(&g->dev->dev, vm->big_page_size,
1998 vm->zero_page_cpuva, 1960 vm->zero_page_cpuva,
1999 vm->zero_page_iova); 1961 vm->zero_page_iova);
2000 vm->zero_page_iova = 0; 1962 vm->zero_page_iova = 0;
@@ -2058,7 +2020,7 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
2058 u64 vaddr = mapped_buffer->addr; 2020 u64 vaddr = mapped_buffer->addr;
2059 u32 pgsz_idx = mapped_buffer->pgsz_idx; 2021 u32 pgsz_idx = mapped_buffer->pgsz_idx;
2060 u32 num_pages = mapped_buffer->size >> 2022 u32 num_pages = mapped_buffer->size >>
2061 gmmu_page_shifts[pgsz_idx]; 2023 ilog2(vm->gmmu_page_sizes[pgsz_idx]);
2062 2024
2063 /* there is little we can do if this fails... */ 2025 /* there is little we can do if this fails... */
2064 if (g->ops.mm.put_empty) { 2026 if (g->ops.mm.put_empty) {
@@ -2082,7 +2044,8 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
2082 gk20a_mem_flag_none); 2044 gk20a_mem_flag_none);
2083 2045
2084 gk20a_dbg(gpu_dbg_map, "as=%d pgsz=%d gv=0x%x,%08x own_mem_ref=%d", 2046 gk20a_dbg(gpu_dbg_map, "as=%d pgsz=%d gv=0x%x,%08x own_mem_ref=%d",
2085 vm_aspace_id(vm), gmmu_page_sizes[mapped_buffer->pgsz_idx], 2047 vm_aspace_id(vm),
2048 vm->gmmu_page_sizes[mapped_buffer->pgsz_idx],
2086 hi32(mapped_buffer->addr), lo32(mapped_buffer->addr), 2049 hi32(mapped_buffer->addr), lo32(mapped_buffer->addr),
2087 mapped_buffer->own_mem_ref); 2050 mapped_buffer->own_mem_ref);
2088 2051
@@ -2159,14 +2122,14 @@ void gk20a_vm_remove_support(struct vm_gk20a *vm)
2159 &vm->pdes.ptes[gmmu_page_size_small][i]; 2122 &vm->pdes.ptes[gmmu_page_size_small][i];
2160 if (pte->ref) { 2123 if (pte->ref) {
2161 free_gmmu_pages(vm, pte->ref, pte->sgt, 2124 free_gmmu_pages(vm, pte->ref, pte->sgt,
2162 vm->mm->page_table_sizing[gmmu_page_size_small].order, 2125 vm->page_table_sizing[gmmu_page_size_small].order,
2163 pte->size); 2126 pte->size);
2164 pte->ref = NULL; 2127 pte->ref = NULL;
2165 } 2128 }
2166 pte = &vm->pdes.ptes[gmmu_page_size_big][i]; 2129 pte = &vm->pdes.ptes[gmmu_page_size_big][i];
2167 if (pte->ref) { 2130 if (pte->ref) {
2168 free_gmmu_pages(vm, pte->ref, pte->sgt, 2131 free_gmmu_pages(vm, pte->ref, pte->sgt,
2169 vm->mm->page_table_sizing[gmmu_page_size_big].order, 2132 vm->page_table_sizing[gmmu_page_size_big].order,
2170 pte->size); 2133 pte->size);
2171 pte->ref = NULL; 2134 pte->ref = NULL;
2172 } 2135 }
@@ -2184,7 +2147,7 @@ void gk20a_vm_remove_support(struct vm_gk20a *vm)
2184 2147
2185 /* release zero page if used */ 2148 /* release zero page if used */
2186 if (vm->zero_page_cpuva) 2149 if (vm->zero_page_cpuva)
2187 dma_free_coherent(&g->dev->dev, vm->mm->big_page_size, 2150 dma_free_coherent(&g->dev->dev, vm->big_page_size,
2188 vm->zero_page_cpuva, vm->zero_page_iova); 2151 vm->zero_page_cpuva, vm->zero_page_iova);
2189 2152
2190 /* vm is not used anymore. release it. */ 2153 /* vm is not used anymore. release it. */
@@ -2208,34 +2171,62 @@ void gk20a_vm_put(struct vm_gk20a *vm)
2208 kref_put(&vm->ref, gk20a_vm_remove_support_kref); 2171 kref_put(&vm->ref, gk20a_vm_remove_support_kref);
2209} 2172}
2210 2173
2211/* address space interfaces for the gk20a module */ 2174static int gk20a_init_vm(struct mm_gk20a *mm,
2212int gk20a_vm_alloc_share(struct gk20a_as_share *as_share) 2175 struct vm_gk20a *vm,
2176 u64 low_hole,
2177 u64 aperture_size,
2178 bool big_pages,
2179 char *name)
2213{ 2180{
2214 struct gk20a_as *as = as_share->as; 2181 int err, i;
2215 struct gk20a *g = gk20a_from_as(as);
2216 struct mm_gk20a *mm = &g->mm;
2217 struct vm_gk20a *vm;
2218 u64 vma_size;
2219 u32 num_pages, low_hole_pages; 2182 u32 num_pages, low_hole_pages;
2220 char name[32]; 2183 char alloc_name[32];
2221 int err; 2184 size_t vma_size;
2222 2185
2223 gk20a_dbg_fn(""); 2186 /* note: keep the page sizes sorted lowest to highest here */
2187 u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K };
2224 2188
2225 vm = kzalloc(sizeof(*vm), GFP_KERNEL); 2189 vm->mm = mm;
2226 if (!vm)
2227 return -ENOMEM;
2228 2190
2229 as_share->vm = vm; 2191 vm->va_start = low_hole;
2192 vm->va_limit = aperture_size;
2193 vm->big_pages = big_pages;
2230 2194
2231 vm->mm = mm; 2195 vm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];
2232 vm->as_share = as_share; 2196 vm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big];
2197 vm->pde_stride = vm->big_page_size << 10;
2198 vm->pde_stride_shift = ilog2(vm->pde_stride);
2199
2200 for (i = 0; i < gmmu_nr_page_sizes; i++) {
2201 u32 num_ptes, pte_space, num_pages;
2202
2203 vm->gmmu_page_sizes[i] = gmmu_page_sizes[i];
2204
2205 /* assuming "full" page tables */
2206 num_ptes = vm->pde_stride / gmmu_page_sizes[i];
2207
2208 pte_space = num_ptes * gmmu_pte__size_v();
2209 /* allocate whole pages */
2210 pte_space = roundup(pte_space, PAGE_SIZE);
2211
2212 num_pages = pte_space / PAGE_SIZE;
2213 /* make sure "order" is viable */
2214 BUG_ON(!is_power_of_2(num_pages));
2233 2215
2234 vm->big_pages = true; 2216 vm->page_table_sizing[i].num_ptes = num_ptes;
2217 vm->page_table_sizing[i].order = ilog2(num_pages);
2218 }
2219
2220 gk20a_dbg_info("small page-size (%dKB) pte array: %dKB",
2221 vm->gmmu_page_sizes[gmmu_page_size_small] >> 10,
2222 (vm->page_table_sizing[gmmu_page_size_small].num_ptes *
2223 gmmu_pte__size_v()) >> 10);
2224
2225 gk20a_dbg_info("big page-size (%dKB) pte array: %dKB",
2226 vm->gmmu_page_sizes[gmmu_page_size_big] >> 10,
2227 (vm->page_table_sizing[gmmu_page_size_big].num_ptes *
2228 gmmu_pte__size_v()) >> 10);
2235 2229
2236 vm->va_start = mm->pde_stride; /* create a one pde hole */
2237 vm->va_limit = mm->channel.size; /* note this means channel.size is
2238 really just the max */
2239 { 2230 {
2240 u32 pde_lo, pde_hi; 2231 u32 pde_lo, pde_hi;
2241 pde_range_from_vaddr_range(vm, 2232 pde_range_from_vaddr_range(vm,
@@ -2248,61 +2239,75 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share)
2248 kzalloc(sizeof(struct page_table_gk20a) * 2239 kzalloc(sizeof(struct page_table_gk20a) *
2249 vm->pdes.num_pdes, GFP_KERNEL); 2240 vm->pdes.num_pdes, GFP_KERNEL);
2250 2241
2242 if (!vm->pdes.ptes[gmmu_page_size_small]) {
2243 err = -ENOMEM;
2244 goto clean_up_pdes;
2245 }
2246
2251 vm->pdes.ptes[gmmu_page_size_big] = 2247 vm->pdes.ptes[gmmu_page_size_big] =
2252 kzalloc(sizeof(struct page_table_gk20a) * 2248 kzalloc(sizeof(struct page_table_gk20a) *
2253 vm->pdes.num_pdes, GFP_KERNEL); 2249 vm->pdes.num_pdes, GFP_KERNEL);
2254 2250
2255 if (!(vm->pdes.ptes[gmmu_page_size_small] && 2251 if (!vm->pdes.ptes[gmmu_page_size_big]) {
2256 vm->pdes.ptes[gmmu_page_size_big])) 2252 err = -ENOMEM;
2257 return -ENOMEM; 2253 goto clean_up_pdes;
2254 }
2258 2255
2259 gk20a_dbg_info("init space for va_limit=0x%llx num_pdes=%d", 2256 gk20a_dbg_info("init space for %s va_limit=0x%llx num_pdes=%d",
2260 vm->va_limit, vm->pdes.num_pdes); 2257 name, vm->va_limit, vm->pdes.num_pdes);
2261 2258
2262 /* allocate the page table directory */ 2259 /* allocate the page table directory */
2263 err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref, 2260 err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
2264 &vm->pdes.sgt, &vm->pdes.size); 2261 &vm->pdes.sgt, &vm->pdes.size);
2265 if (err) 2262 if (err)
2266 return -ENOMEM; 2263 goto clean_up_pdes;
2267 2264
2268 err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv, 2265 err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv,
2269 vm->pdes.size); 2266 vm->pdes.size);
2270 if (err) { 2267 if (err) {
2271 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, 2268 goto clean_up_ptes;
2272 vm->pdes.size);
2273 return -ENOMEM;
2274 } 2269 }
2275 gk20a_dbg(gpu_dbg_pte, "pdes.kv = 0x%p, pdes.phys = 0x%llx", 2270 gk20a_dbg(gpu_dbg_pte, "bar 1 pdes.kv = 0x%p, pdes.phys = 0x%llx",
2276 vm->pdes.kv, 2271 vm->pdes.kv, gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2277 gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2278 /* we could release vm->pdes.kv but it's only one page... */ 2272 /* we could release vm->pdes.kv but it's only one page... */
2279 2273
2280
2281 /* low-half: alloc small pages */ 2274 /* low-half: alloc small pages */
2282 /* high-half: alloc big pages */ 2275 /* high-half: alloc big pages */
2283 vma_size = mm->channel.size >> 1; 2276 vma_size = vm->va_limit;
2277 if (big_pages)
2278 vma_size /= 2;
2284 2279
2285 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, 2280 num_pages = (u32)(vma_size >>
2286 gmmu_page_sizes[gmmu_page_size_small]>>10); 2281 ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
2287 num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_small]);
2288 2282
2289 /* num_pages above is without regard to the low-side hole. */ 2283 /* num_pages above is without regard to the low-side hole. */
2290 low_hole_pages = (vm->va_start >> 2284 low_hole_pages = (vm->va_start >>
2291 gmmu_page_shifts[gmmu_page_size_small]); 2285 ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
2292 2286
2293 gk20a_allocator_init(&vm->vma[gmmu_page_size_small], name, 2287 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name,
2294 low_hole_pages, /* start */ 2288 vm->gmmu_page_sizes[gmmu_page_size_small]>>10);
2295 num_pages - low_hole_pages, /* length */ 2289 err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
2296 1); /* align */ 2290 alloc_name,
2297 2291 low_hole_pages, /*start*/
2298 snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, 2292 num_pages - low_hole_pages,/* length*/
2299 gmmu_page_sizes[gmmu_page_size_big]>>10); 2293 1); /* align */
2300 2294 if (err)
2301 num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_big]); 2295 goto clean_up_map_pde;
2302 gk20a_allocator_init(&vm->vma[gmmu_page_size_big], name, 2296
2303 num_pages, /* start */ 2297 if (big_pages) {
2304 num_pages, /* length */ 2298 num_pages = (u32)((vm->va_limit / 2) >>
2305 1); /* align */ 2299 ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
2300
2301 snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB",
2302 name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10);
2303 err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
2304 alloc_name,
2305 num_pages, /* start */
2306 num_pages, /* length */
2307 1); /* align */
2308 if (err)
2309 goto clean_up_small_allocator;
2310 }
2306 2311
2307 vm->mapped_buffers = RB_ROOT; 2312 vm->mapped_buffers = RB_ROOT;
2308 2313
@@ -2310,12 +2315,48 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share)
2310 kref_init(&vm->ref); 2315 kref_init(&vm->ref);
2311 INIT_LIST_HEAD(&vm->reserved_va_list); 2316 INIT_LIST_HEAD(&vm->reserved_va_list);
2312 2317
2318 return 0;
2319
2320clean_up_small_allocator:
2321 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
2322clean_up_map_pde:
2323 unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv);
2324clean_up_ptes:
2325 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
2326 vm->pdes.size);
2327clean_up_pdes:
2328 kfree(vm->pdes.ptes[gmmu_page_size_small]);
2329 kfree(vm->pdes.ptes[gmmu_page_size_big]);
2330 return err;
2331}
2332
2333/* address space interfaces for the gk20a module */
2334int gk20a_vm_alloc_share(struct gk20a_as_share *as_share)
2335{
2336 struct gk20a_as *as = as_share->as;
2337 struct gk20a *g = gk20a_from_as(as);
2338 struct mm_gk20a *mm = &g->mm;
2339 struct vm_gk20a *vm;
2340 char name[32];
2341 int err;
2342
2343 gk20a_dbg_fn("");
2344
2345 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
2346 if (!vm)
2347 return -ENOMEM;
2348
2349 as_share->vm = vm;
2350 vm->as_share = as_share;
2313 vm->enable_ctag = true; 2351 vm->enable_ctag = true;
2314 2352
2353 snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id);
2354 err = gk20a_init_vm(mm, vm,
2355 SZ_128K << 10, mm->channel.size, true, name);
2356
2315 return 0; 2357 return 0;
2316} 2358}
2317 2359
2318
2319int gk20a_vm_release_share(struct gk20a_as_share *as_share) 2360int gk20a_vm_release_share(struct gk20a_as_share *as_share)
2320{ 2361{
2321 struct vm_gk20a *vm = as_share->vm; 2362 struct vm_gk20a *vm = as_share->vm;
@@ -2353,7 +2394,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2353 for (pgsz_idx = gmmu_page_size_small; 2394 for (pgsz_idx = gmmu_page_size_small;
2354 pgsz_idx < gmmu_nr_page_sizes; 2395 pgsz_idx < gmmu_nr_page_sizes;
2355 pgsz_idx++) { 2396 pgsz_idx++) {
2356 if (gmmu_page_sizes[pgsz_idx] == args->page_size) 2397 if (vm->gmmu_page_sizes[pgsz_idx] == args->page_size)
2357 break; 2398 break;
2358 } 2399 }
2359 2400
@@ -2378,7 +2419,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2378 start_page_nr = 0; 2419 start_page_nr = 0;
2379 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) 2420 if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
2380 start_page_nr = (u32)(args->o_a.offset >> 2421 start_page_nr = (u32)(args->o_a.offset >>
2381 gmmu_page_shifts[pgsz_idx]); 2422 ilog2(vm->gmmu_page_sizes[pgsz_idx]));
2382 2423
2383 vma = &vm->vma[pgsz_idx]; 2424 vma = &vm->vma[pgsz_idx];
2384 err = vma->alloc(vma, &start_page_nr, args->pages); 2425 err = vma->alloc(vma, &start_page_nr, args->pages);
@@ -2387,7 +2428,8 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2387 goto clean_up; 2428 goto clean_up;
2388 } 2429 }
2389 2430
2390 vaddr_start = (u64)start_page_nr << gmmu_page_shifts[pgsz_idx]; 2431 vaddr_start = (u64)start_page_nr <<
2432 ilog2(vm->gmmu_page_sizes[pgsz_idx]);
2391 2433
2392 va_node->vaddr_start = vaddr_start; 2434 va_node->vaddr_start = vaddr_start;
2393 va_node->size = (u64)args->page_size * (u64)args->pages; 2435 va_node->size = (u64)args->page_size * (u64)args->pages;
@@ -2438,7 +2480,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2438 for (pgsz_idx = gmmu_page_size_small; 2480 for (pgsz_idx = gmmu_page_size_small;
2439 pgsz_idx < gmmu_nr_page_sizes; 2481 pgsz_idx < gmmu_nr_page_sizes;
2440 pgsz_idx++) { 2482 pgsz_idx++) {
2441 if (gmmu_page_sizes[pgsz_idx] == args->page_size) 2483 if (vm->gmmu_page_sizes[pgsz_idx] == args->page_size)
2442 break; 2484 break;
2443 } 2485 }
2444 2486
@@ -2448,7 +2490,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2448 } 2490 }
2449 2491
2450 start_page_nr = (u32)(args->offset >> 2492 start_page_nr = (u32)(args->offset >>
2451 gmmu_page_shifts[pgsz_idx]); 2493 ilog2(vm->gmmu_page_sizes[pgsz_idx]));
2452 2494
2453 vma = &vm->vma[pgsz_idx]; 2495 vma = &vm->vma[pgsz_idx];
2454 err = vma->free(vma, start_page_nr, args->pages); 2496 err = vma->free(vma, start_page_nr, args->pages);
@@ -2643,6 +2685,17 @@ int gk20a_vm_unmap_buffer(struct gk20a_as_share *as_share, u64 offset)
2643 return 0; 2685 return 0;
2644} 2686}
2645 2687
2688static void gk20a_deinit_vm(struct vm_gk20a *vm)
2689{
2690 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
2691 gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
2692 unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv);
2693 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
2694 vm->pdes.size);
2695 kfree(vm->pdes.ptes[gmmu_page_size_small]);
2696 kfree(vm->pdes.ptes[gmmu_page_size_big]);
2697}
2698
2646static int gk20a_init_bar1_vm(struct mm_gk20a *mm) 2699static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
2647{ 2700{
2648 int err; 2701 int err;
@@ -2650,73 +2703,24 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
2650 void *inst_ptr; 2703 void *inst_ptr;
2651 struct vm_gk20a *vm = &mm->bar1.vm; 2704 struct vm_gk20a *vm = &mm->bar1.vm;
2652 struct gk20a *g = gk20a_from_mm(mm); 2705 struct gk20a *g = gk20a_from_mm(mm);
2653 struct device *d = dev_from_gk20a(g);
2654 struct inst_desc *inst_block = &mm->bar1.inst_block;
2655 u64 pde_addr; 2706 u64 pde_addr;
2656 u32 pde_addr_lo; 2707 u32 pde_addr_lo;
2657 u32 pde_addr_hi; 2708 u32 pde_addr_hi;
2709 struct device *d = dev_from_gk20a(g);
2710 struct inst_desc *inst_block = &mm->bar1.inst_block;
2658 dma_addr_t iova; 2711 dma_addr_t iova;
2659 2712
2660 vm->mm = mm;
2661
2662 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; 2713 mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
2663
2664 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); 2714 gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
2715 gk20a_init_vm(mm, vm, SZ_4K, mm->bar1.aperture_size, false, "bar1");
2665 2716
2666 vm->va_start = mm->pde_stride * 1; 2717 gk20a_dbg_info("pde pa=0x%llx",
2667 vm->va_limit = mm->bar1.aperture_size; 2718 (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2668
2669 {
2670 u32 pde_lo, pde_hi;
2671 pde_range_from_vaddr_range(vm,
2672 0, vm->va_limit-1,
2673 &pde_lo, &pde_hi);
2674 vm->pdes.num_pdes = pde_hi + 1;
2675 }
2676
2677 /* bar1 is likely only to ever use/need small page sizes. */
2678 /* But just in case, for now... arrange for both.*/
2679 vm->pdes.ptes[gmmu_page_size_small] =
2680 kzalloc(sizeof(struct page_table_gk20a) *
2681 vm->pdes.num_pdes, GFP_KERNEL);
2682
2683 vm->pdes.ptes[gmmu_page_size_big] =
2684 kzalloc(sizeof(struct page_table_gk20a) *
2685 vm->pdes.num_pdes, GFP_KERNEL);
2686
2687 if (!(vm->pdes.ptes[gmmu_page_size_small] &&
2688 vm->pdes.ptes[gmmu_page_size_big]))
2689 return -ENOMEM;
2690
2691 gk20a_dbg_info("init space for bar1 va_limit=0x%llx num_pdes=%d",
2692 vm->va_limit, vm->pdes.num_pdes);
2693
2694
2695 /* allocate the page table directory */
2696 err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
2697 &vm->pdes.sgt, &vm->pdes.size);
2698 if (err)
2699 goto clean_up;
2700
2701 err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv,
2702 vm->pdes.size);
2703 if (err) {
2704 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
2705 vm->pdes.size);
2706 goto clean_up;
2707 }
2708 gk20a_dbg(gpu_dbg_pte, "bar 1 pdes.kv = 0x%p, pdes.phys = 0x%llx",
2709 vm->pdes.kv, gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2710 /* we could release vm->pdes.kv but it's only one page... */
2711 2719
2712 pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl); 2720 pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl);
2713 pde_addr_lo = u64_lo32(pde_addr >> 12); 2721 pde_addr_lo = u64_lo32(pde_addr >> ram_in_base_shift_v());
2714 pde_addr_hi = u64_hi32(pde_addr); 2722 pde_addr_hi = u64_hi32(pde_addr);
2715 2723
2716 gk20a_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x",
2717 (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl),
2718 pde_addr_lo, pde_addr_hi);
2719
2720 /* allocate instance mem for bar1 */ 2724 /* allocate instance mem for bar1 */
2721 inst_block->size = ram_in_alloc_size_v(); 2725 inst_block->size = ram_in_alloc_size_v();
2722 inst_block->cpuva = dma_alloc_coherent(d, inst_block->size, 2726 inst_block->cpuva = dma_alloc_coherent(d, inst_block->size,
@@ -2724,7 +2728,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
2724 if (!inst_block->cpuva) { 2728 if (!inst_block->cpuva) {
2725 gk20a_err(d, "%s: memory allocation failed\n", __func__); 2729 gk20a_err(d, "%s: memory allocation failed\n", __func__);
2726 err = -ENOMEM; 2730 err = -ENOMEM;
2727 goto clean_up; 2731 goto clean_up_va;
2728 } 2732 }
2729 2733
2730 inst_block->iova = iova; 2734 inst_block->iova = iova;
@@ -2732,7 +2736,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
2732 if (!inst_block->cpu_pa) { 2736 if (!inst_block->cpu_pa) {
2733 gk20a_err(d, "%s: failed to get phys address\n", __func__); 2737 gk20a_err(d, "%s: failed to get phys address\n", __func__);
2734 err = -ENOMEM; 2738 err = -ENOMEM;
2735 goto clean_up; 2739 goto clean_up_inst_block;
2736 } 2740 }
2737 2741
2738 inst_pa = inst_block->cpu_pa; 2742 inst_pa = inst_block->cpu_pa;
@@ -2741,7 +2745,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
2741 gk20a_dbg_info("bar1 inst block physical phys = 0x%llx, kv = 0x%p", 2745 gk20a_dbg_info("bar1 inst block physical phys = 0x%llx, kv = 0x%p",
2742 (u64)inst_pa, inst_ptr); 2746 (u64)inst_pa, inst_ptr);
2743 2747
2744 memset(inst_ptr, 0, ram_fc_size_val_v()); 2748 memset(inst_ptr, 0, inst_block->size);
2745 2749
2746 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), 2750 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
2747 ram_in_page_dir_base_target_vid_mem_f() | 2751 ram_in_page_dir_base_target_vid_mem_f() |
@@ -2758,31 +2762,16 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
2758 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); 2762 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
2759 2763
2760 gk20a_dbg_info("bar1 inst block ptr: %08llx", (u64)inst_pa); 2764 gk20a_dbg_info("bar1 inst block ptr: %08llx", (u64)inst_pa);
2761 gk20a_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_bar1",
2762 1,/*start*/
2763 (vm->va_limit >> 12) - 1 /* length*/,
2764 1); /* align */
2765 /* initialize just in case we try to use it anyway */
2766 gk20a_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_bar1-unused",
2767 0x0badc0de, /* start */
2768 1, /* length */
2769 1); /* align */
2770
2771 vm->mapped_buffers = RB_ROOT;
2772
2773 mutex_init(&vm->update_gmmu_lock);
2774 kref_init(&vm->ref);
2775 INIT_LIST_HEAD(&vm->reserved_va_list);
2776
2777 return 0; 2765 return 0;
2778 2766
2779clean_up: 2767clean_up_inst_block:
2780 /* free, etc */
2781 if (inst_block->cpuva) 2768 if (inst_block->cpuva)
2782 dma_free_coherent(d, inst_block->size, 2769 dma_free_coherent(d, inst_block->size,
2783 inst_block->cpuva, inst_block->iova); 2770 inst_block->cpuva, inst_block->iova);
2784 inst_block->cpuva = NULL; 2771 inst_block->cpuva = NULL;
2785 inst_block->iova = 0; 2772 inst_block->iova = 0;
2773clean_up_va:
2774 gk20a_deinit_vm(vm);
2786 return err; 2775 return err;
2787} 2776}
2788 2777
@@ -2794,79 +2783,34 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
2794 void *inst_ptr; 2783 void *inst_ptr;
2795 struct vm_gk20a *vm = &mm->pmu.vm; 2784 struct vm_gk20a *vm = &mm->pmu.vm;
2796 struct gk20a *g = gk20a_from_mm(mm); 2785 struct gk20a *g = gk20a_from_mm(mm);
2797 struct device *d = dev_from_gk20a(g);
2798 struct inst_desc *inst_block = &mm->pmu.inst_block;
2799 u64 pde_addr; 2786 u64 pde_addr;
2800 u32 pde_addr_lo; 2787 u32 pde_addr_lo;
2801 u32 pde_addr_hi; 2788 u32 pde_addr_hi;
2789 struct device *d = dev_from_gk20a(g);
2790 struct inst_desc *inst_block = &mm->pmu.inst_block;
2802 dma_addr_t iova; 2791 dma_addr_t iova;
2803 2792
2804 vm->mm = mm;
2805
2806 mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; 2793 mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
2807
2808 gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); 2794 gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
2809 2795
2810 vm->va_start = GK20A_PMU_VA_START; 2796 gk20a_init_vm(mm, vm,
2811 vm->va_limit = vm->va_start + mm->pmu.aperture_size; 2797 SZ_128K << 10, GK20A_PMU_VA_SIZE, false, "system");
2812
2813 {
2814 u32 pde_lo, pde_hi;
2815 pde_range_from_vaddr_range(vm,
2816 0, vm->va_limit-1,
2817 &pde_lo, &pde_hi);
2818 vm->pdes.num_pdes = pde_hi + 1;
2819 }
2820
2821 /* The pmu is likely only to ever use/need small page sizes. */
2822 /* But just in case, for now... arrange for both.*/
2823 vm->pdes.ptes[gmmu_page_size_small] =
2824 kzalloc(sizeof(struct page_table_gk20a) *
2825 vm->pdes.num_pdes, GFP_KERNEL);
2826
2827 vm->pdes.ptes[gmmu_page_size_big] =
2828 kzalloc(sizeof(struct page_table_gk20a) *
2829 vm->pdes.num_pdes, GFP_KERNEL);
2830
2831 if (!(vm->pdes.ptes[gmmu_page_size_small] &&
2832 vm->pdes.ptes[gmmu_page_size_big]))
2833 return -ENOMEM;
2834
2835 gk20a_dbg_info("init space for pmu va_limit=0x%llx num_pdes=%d",
2836 vm->va_limit, vm->pdes.num_pdes);
2837
2838 /* allocate the page table directory */
2839 err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
2840 &vm->pdes.sgt, &vm->pdes.size);
2841 if (err)
2842 goto clean_up;
2843 2798
2844 err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv, 2799 gk20a_dbg_info("pde pa=0x%llx",
2845 vm->pdes.size); 2800 (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2846 if (err) {
2847 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
2848 vm->pdes.size);
2849 goto clean_up;
2850 }
2851 gk20a_dbg_info("pmu pdes phys @ 0x%llx",
2852 (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2853 /* we could release vm->pdes.kv but it's only one page... */
2854 2801
2855 pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl); 2802 pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl);
2856 pde_addr_lo = u64_lo32(pde_addr >> 12); 2803 pde_addr_lo = u64_lo32(pde_addr >> ram_in_base_shift_v());
2857 pde_addr_hi = u64_hi32(pde_addr); 2804 pde_addr_hi = u64_hi32(pde_addr);
2858 2805
2859 gk20a_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x",
2860 (u64)pde_addr, pde_addr_lo, pde_addr_hi);
2861
2862 /* allocate instance mem for pmu */ 2806 /* allocate instance mem for pmu */
2863 inst_block->size = GK20A_PMU_INST_SIZE; 2807 inst_block->size = ram_in_alloc_size_v();
2864 inst_block->cpuva = dma_alloc_coherent(d, inst_block->size, 2808 inst_block->cpuva = dma_alloc_coherent(d, inst_block->size,
2865 &iova, GFP_KERNEL); 2809 &iova, GFP_KERNEL);
2866 if (!inst_block->cpuva) { 2810 if (!inst_block->cpuva) {
2867 gk20a_err(d, "%s: memory allocation failed\n", __func__); 2811 gk20a_err(d, "%s: memory allocation failed\n", __func__);
2868 err = -ENOMEM; 2812 err = -ENOMEM;
2869 goto clean_up; 2813 goto clean_up_va;
2870 } 2814 }
2871 2815
2872 inst_block->iova = iova; 2816 inst_block->iova = iova;
@@ -2874,7 +2818,7 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
2874 if (!inst_block->cpu_pa) { 2818 if (!inst_block->cpu_pa) {
2875 gk20a_err(d, "%s: failed to get phys address\n", __func__); 2819 gk20a_err(d, "%s: failed to get phys address\n", __func__);
2876 err = -ENOMEM; 2820 err = -ENOMEM;
2877 goto clean_up; 2821 goto clean_up_inst_block;
2878 } 2822 }
2879 2823
2880 inst_pa = inst_block->cpu_pa; 2824 inst_pa = inst_block->cpu_pa;
@@ -2882,7 +2826,7 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
2882 2826
2883 gk20a_dbg_info("pmu inst block physical addr: 0x%llx", (u64)inst_pa); 2827 gk20a_dbg_info("pmu inst block physical addr: 0x%llx", (u64)inst_pa);
2884 2828
2885 memset(inst_ptr, 0, GK20A_PMU_INST_SIZE); 2829 memset(inst_ptr, 0, inst_block->size);
2886 2830
2887 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), 2831 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
2888 ram_in_page_dir_base_target_vid_mem_f() | 2832 ram_in_page_dir_base_target_vid_mem_f() |
@@ -2898,32 +2842,16 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
2898 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), 2842 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
2899 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); 2843 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
2900 2844
2901 gk20a_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_pmu",
2902 (vm->va_start >> 12), /* start */
2903 (vm->va_limit - vm->va_start) >> 12, /*length*/
2904 1); /* align */
2905 /* initialize just in case we try to use it anyway */
2906 gk20a_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_pmu-unused",
2907 0x0badc0de, /* start */
2908 1, /* length */
2909 1); /* align */
2910
2911
2912 vm->mapped_buffers = RB_ROOT;
2913
2914 mutex_init(&vm->update_gmmu_lock);
2915 kref_init(&vm->ref);
2916 INIT_LIST_HEAD(&vm->reserved_va_list);
2917
2918 return 0; 2845 return 0;
2919 2846
2920clean_up: 2847clean_up_inst_block:
2921 /* free, etc */
2922 if (inst_block->cpuva) 2848 if (inst_block->cpuva)
2923 dma_free_coherent(d, inst_block->size, 2849 dma_free_coherent(d, inst_block->size,
2924 inst_block->cpuva, inst_block->iova); 2850 inst_block->cpuva, inst_block->iova);
2925 inst_block->cpuva = NULL; 2851 inst_block->cpuva = NULL;
2926 inst_block->iova = 0; 2852 inst_block->iova = 0;
2853clean_up_va:
2854 gk20a_deinit_vm(vm);
2927 return err; 2855 return err;
2928} 2856}
2929 2857