diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 528 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 22 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/pmu_gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/mm_gm20b.c | 20 |
5 files changed, 249 insertions, 329 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 05877159..76985994 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -1887,9 +1887,9 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) | |||
1887 | 1887 | ||
1888 | gpu->bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */ | 1888 | gpu->bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */ |
1889 | 1889 | ||
1890 | gpu->big_page_size = g->mm.big_page_size; | 1890 | gpu->big_page_size = g->mm.pmu.vm.big_page_size; |
1891 | gpu->compression_page_size = g->mm.compression_page_size; | 1891 | gpu->compression_page_size = g->mm.pmu.vm.compression_page_size; |
1892 | gpu->pde_coverage_bit_count = g->mm.pde_stride_shift; | 1892 | gpu->pde_coverage_bit_count = g->mm.pmu.vm.pde_stride_shift; |
1893 | 1893 | ||
1894 | gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS | 1894 | gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS |
1895 | | NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS; | 1895 | | NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS; |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index b6a556ac..70f4294b 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -98,15 +98,9 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
98 | u64 first_vaddr, u64 last_vaddr, | 98 | u64 first_vaddr, u64 last_vaddr, |
99 | u8 kind_v, u32 ctag_offset, bool cacheable, | 99 | u8 kind_v, u32 ctag_offset, bool cacheable, |
100 | int rw_flag); | 100 | int rw_flag); |
101 | static int gk20a_init_system_vm(struct mm_gk20a *mm); | 101 | static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm); |
102 | static int gk20a_init_bar1_vm(struct mm_gk20a *mm); | 102 | static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); |
103 | 103 | ||
104 | /* note: keep the page sizes sorted lowest to highest here */ | ||
105 | static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K }; | ||
106 | static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 }; | ||
107 | static const u64 gmmu_page_offset_masks[gmmu_nr_page_sizes] = { 0xfffLL, | ||
108 | 0x1ffffLL }; | ||
109 | static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL }; | ||
110 | 104 | ||
111 | struct gk20a_dmabuf_priv { | 105 | struct gk20a_dmabuf_priv { |
112 | struct mutex lock; | 106 | struct mutex lock; |
@@ -290,7 +284,7 @@ void gk20a_remove_mm_support(struct mm_gk20a *mm) | |||
290 | int gk20a_init_mm_setup_sw(struct gk20a *g) | 284 | int gk20a_init_mm_setup_sw(struct gk20a *g) |
291 | { | 285 | { |
292 | struct mm_gk20a *mm = &g->mm; | 286 | struct mm_gk20a *mm = &g->mm; |
293 | int i; | 287 | int err; |
294 | 288 | ||
295 | gk20a_dbg_fn(""); | 289 | gk20a_dbg_fn(""); |
296 | 290 | ||
@@ -301,49 +295,19 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
301 | 295 | ||
302 | mm->g = g; | 296 | mm->g = g; |
303 | mutex_init(&mm->l2_op_lock); | 297 | mutex_init(&mm->l2_op_lock); |
304 | mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big]; | ||
305 | mm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big]; | ||
306 | mm->pde_stride = mm->big_page_size << 10; | ||
307 | mm->pde_stride_shift = ilog2(mm->pde_stride); | ||
308 | BUG_ON(mm->pde_stride_shift > 31); /* we have assumptions about this */ | ||
309 | |||
310 | for (i = 0; i < ARRAY_SIZE(gmmu_page_sizes); i++) { | ||
311 | |||
312 | u32 num_ptes, pte_space, num_pages; | ||
313 | |||
314 | /* assuming "full" page tables */ | ||
315 | num_ptes = mm->pde_stride / gmmu_page_sizes[i]; | ||
316 | |||
317 | pte_space = num_ptes * gmmu_pte__size_v(); | ||
318 | /* allocate whole pages */ | ||
319 | pte_space = roundup(pte_space, PAGE_SIZE); | ||
320 | |||
321 | num_pages = pte_space / PAGE_SIZE; | ||
322 | /* make sure "order" is viable */ | ||
323 | BUG_ON(!is_power_of_2(num_pages)); | ||
324 | |||
325 | mm->page_table_sizing[i].num_ptes = num_ptes; | ||
326 | mm->page_table_sizing[i].order = ilog2(num_pages); | ||
327 | } | ||
328 | 298 | ||
329 | /*TBD: make channel vm size configurable */ | 299 | /*TBD: make channel vm size configurable */ |
330 | mm->channel.size = 1ULL << NV_GMMU_VA_RANGE; | 300 | mm->channel.size = 1ULL << NV_GMMU_VA_RANGE; |
331 | 301 | ||
332 | gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20)); | 302 | gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20)); |
333 | 303 | ||
334 | gk20a_dbg_info("small page-size (%dKB) pte array: %dKB", | 304 | err = gk20a_init_bar1_vm(mm); |
335 | gmmu_page_sizes[gmmu_page_size_small] >> 10, | 305 | if (err) |
336 | (mm->page_table_sizing[gmmu_page_size_small].num_ptes * | 306 | return err; |
337 | gmmu_pte__size_v()) >> 10); | ||
338 | |||
339 | gk20a_dbg_info("big page-size (%dKB) pte array: %dKB", | ||
340 | gmmu_page_sizes[gmmu_page_size_big] >> 10, | ||
341 | (mm->page_table_sizing[gmmu_page_size_big].num_ptes * | ||
342 | gmmu_pte__size_v()) >> 10); | ||
343 | |||
344 | 307 | ||
345 | gk20a_init_bar1_vm(mm); | 308 | err = gk20a_init_system_vm(mm); |
346 | gk20a_init_system_vm(mm); | 309 | if (err) |
310 | return err; | ||
347 | 311 | ||
348 | /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */ | 312 | /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */ |
349 | g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share; | 313 | g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share; |
@@ -368,12 +332,9 @@ static int gk20a_init_mm_setup_hw(struct gk20a *g) | |||
368 | { | 332 | { |
369 | u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r()); | 333 | u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r()); |
370 | 334 | ||
371 | if (gmmu_page_sizes[gmmu_page_size_big] == SZ_128K) | 335 | fb_mmu_ctrl = (fb_mmu_ctrl & |
372 | fb_mmu_ctrl = (fb_mmu_ctrl & | 336 | ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) | |
373 | ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) | | 337 | fb_mmu_ctrl_vm_pg_size_128kb_f(); |
374 | fb_mmu_ctrl_vm_pg_size_128kb_f(); | ||
375 | else | ||
376 | BUG_ON(1); /* no support/testing for larger ones yet */ | ||
377 | 338 | ||
378 | gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl); | 339 | gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl); |
379 | } | 340 | } |
@@ -633,7 +594,7 @@ int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm, | |||
633 | gk20a_dbg_fn(""); | 594 | gk20a_dbg_fn(""); |
634 | 595 | ||
635 | /* allocate enough pages for the table */ | 596 | /* allocate enough pages for the table */ |
636 | pte_order = vm->mm->page_table_sizing[gmmu_pgsz_idx].order; | 597 | pte_order = vm->page_table_sizing[gmmu_pgsz_idx].order; |
637 | 598 | ||
638 | err = alloc_gmmu_pages(vm, pte_order, &handle, &sgt, &size); | 599 | err = alloc_gmmu_pages(vm, pte_order, &handle, &sgt, &size); |
639 | if (err) | 600 | if (err) |
@@ -654,10 +615,10 @@ void pde_range_from_vaddr_range(struct vm_gk20a *vm, | |||
654 | u64 addr_lo, u64 addr_hi, | 615 | u64 addr_lo, u64 addr_hi, |
655 | u32 *pde_lo, u32 *pde_hi) | 616 | u32 *pde_lo, u32 *pde_hi) |
656 | { | 617 | { |
657 | *pde_lo = (u32)(addr_lo >> vm->mm->pde_stride_shift); | 618 | *pde_lo = (u32)(addr_lo >> vm->pde_stride_shift); |
658 | *pde_hi = (u32)(addr_hi >> vm->mm->pde_stride_shift); | 619 | *pde_hi = (u32)(addr_hi >> vm->pde_stride_shift); |
659 | gk20a_dbg(gpu_dbg_pte, "addr_lo=0x%llx addr_hi=0x%llx pde_ss=%d", | 620 | gk20a_dbg(gpu_dbg_pte, "addr_lo=0x%llx addr_hi=0x%llx pde_ss=%d", |
660 | addr_lo, addr_hi, vm->mm->pde_stride_shift); | 621 | addr_lo, addr_hi, vm->pde_stride_shift); |
661 | gk20a_dbg(gpu_dbg_pte, "pde_lo=%d pde_hi=%d", | 622 | gk20a_dbg(gpu_dbg_pte, "pde_lo=%d pde_hi=%d", |
662 | *pde_lo, *pde_hi); | 623 | *pde_lo, *pde_hi); |
663 | } | 624 | } |
@@ -672,10 +633,11 @@ u32 pte_index_from_vaddr(struct vm_gk20a *vm, | |||
672 | { | 633 | { |
673 | u32 ret; | 634 | u32 ret; |
674 | /* mask off pde part */ | 635 | /* mask off pde part */ |
675 | addr = addr & ((((u64)1) << vm->mm->pde_stride_shift) - ((u64)1)); | 636 | addr = addr & ((((u64)1) << vm->pde_stride_shift) - ((u64)1)); |
637 | |||
676 | /* shift over to get pte index. note assumption that pte index | 638 | /* shift over to get pte index. note assumption that pte index |
677 | * doesn't leak over into the high 32b */ | 639 | * doesn't leak over into the high 32b */ |
678 | ret = (u32)(addr >> gmmu_page_shifts[pgsz_idx]); | 640 | ret = (u32)(addr >> ilog2(vm->gmmu_page_sizes[pgsz_idx])); |
679 | 641 | ||
680 | gk20a_dbg(gpu_dbg_pte, "addr=0x%llx pte_i=0x%x", addr, ret); | 642 | gk20a_dbg(gpu_dbg_pte, "addr=0x%llx pte_i=0x%x", addr, ret); |
681 | return ret; | 643 | return ret; |
@@ -715,7 +677,7 @@ int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm, | |||
715 | return 0; | 677 | return 0; |
716 | 678 | ||
717 | gk20a_dbg(gpu_dbg_pte, "alloc %dKB ptes for pde %d", | 679 | gk20a_dbg(gpu_dbg_pte, "alloc %dKB ptes for pde %d", |
718 | gmmu_page_sizes[gmmu_pgsz_idx]/1024, i); | 680 | vm->gmmu_page_sizes[gmmu_pgsz_idx]/1024, i); |
719 | 681 | ||
720 | err = zalloc_gmmu_page_table_gk20a(vm, gmmu_pgsz_idx, pte); | 682 | err = zalloc_gmmu_page_table_gk20a(vm, gmmu_pgsz_idx, pte); |
721 | if (err) | 683 | if (err) |
@@ -854,18 +816,18 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, | |||
854 | int err; | 816 | int err; |
855 | u64 offset; | 817 | u64 offset; |
856 | u32 start_page_nr = 0, num_pages; | 818 | u32 start_page_nr = 0, num_pages; |
857 | u64 gmmu_page_size = gmmu_page_sizes[gmmu_pgsz_idx]; | 819 | u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; |
858 | 820 | ||
859 | if (gmmu_pgsz_idx >= ARRAY_SIZE(gmmu_page_sizes)) { | 821 | if (gmmu_pgsz_idx >= gmmu_nr_page_sizes) { |
860 | dev_warn(dev_from_vm(vm), | 822 | dev_warn(dev_from_vm(vm), |
861 | "invalid page size requested in gk20a vm alloc"); | 823 | "invalid page size requested in gk20a vm alloc"); |
862 | return -EINVAL; | 824 | return 0; |
863 | } | 825 | } |
864 | 826 | ||
865 | if ((gmmu_pgsz_idx == gmmu_page_size_big) && !vm->big_pages) { | 827 | if ((gmmu_pgsz_idx == gmmu_page_size_big) && !vm->big_pages) { |
866 | dev_warn(dev_from_vm(vm), | 828 | dev_warn(dev_from_vm(vm), |
867 | "unsupportd page size requested"); | 829 | "unsupportd page size requested"); |
868 | return -EINVAL; | 830 | return 0; |
869 | 831 | ||
870 | } | 832 | } |
871 | 833 | ||
@@ -874,10 +836,10 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, | |||
874 | size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1); | 836 | size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1); |
875 | 837 | ||
876 | gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, | 838 | gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, |
877 | gmmu_page_sizes[gmmu_pgsz_idx]>>10); | 839 | vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10); |
878 | 840 | ||
879 | /* The vma allocator represents page accounting. */ | 841 | /* The vma allocator represents page accounting. */ |
880 | num_pages = size >> gmmu_page_shifts[gmmu_pgsz_idx]; | 842 | num_pages = size >> ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]); |
881 | 843 | ||
882 | err = vma->alloc(vma, &start_page_nr, num_pages); | 844 | err = vma->alloc(vma, &start_page_nr, num_pages); |
883 | 845 | ||
@@ -887,7 +849,8 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, | |||
887 | return 0; | 849 | return 0; |
888 | } | 850 | } |
889 | 851 | ||
890 | offset = (u64)start_page_nr << gmmu_page_shifts[gmmu_pgsz_idx]; | 852 | offset = (u64)start_page_nr << |
853 | ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]); | ||
891 | gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset); | 854 | gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset); |
892 | 855 | ||
893 | return offset; | 856 | return offset; |
@@ -898,8 +861,8 @@ int gk20a_vm_free_va(struct vm_gk20a *vm, | |||
898 | enum gmmu_pgsz_gk20a pgsz_idx) | 861 | enum gmmu_pgsz_gk20a pgsz_idx) |
899 | { | 862 | { |
900 | struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; | 863 | struct gk20a_allocator *vma = &vm->vma[pgsz_idx]; |
901 | u32 page_size = gmmu_page_sizes[pgsz_idx]; | 864 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; |
902 | u32 page_shift = gmmu_page_shifts[pgsz_idx]; | 865 | u32 page_shift = ilog2(page_size); |
903 | u32 start_page_nr, num_pages; | 866 | u32 start_page_nr, num_pages; |
904 | int err; | 867 | int err; |
905 | 868 | ||
@@ -1011,26 +974,25 @@ struct buffer_attrs { | |||
1011 | u8 uc_kind_v; | 974 | u8 uc_kind_v; |
1012 | }; | 975 | }; |
1013 | 976 | ||
1014 | static void gmmu_select_page_size(struct buffer_attrs *bfr) | 977 | static void gmmu_select_page_size(struct vm_gk20a *vm, |
978 | struct buffer_attrs *bfr) | ||
1015 | { | 979 | { |
1016 | int i; | 980 | int i; |
1017 | /* choose the biggest first (top->bottom) */ | 981 | /* choose the biggest first (top->bottom) */ |
1018 | for (i = (gmmu_nr_page_sizes-1); i >= 0; i--) | 982 | for (i = gmmu_nr_page_sizes-1; i >= 0; i--) |
1019 | if (!(gmmu_page_offset_masks[i] & bfr->align)) { | 983 | if (!((vm->gmmu_page_sizes[i] - 1) & bfr->align)) { |
1020 | /* would like to add this too but nvmap returns the | ||
1021 | * original requested size not the allocated size. | ||
1022 | * (!(gmmu_page_offset_masks[i] & bfr->size)) */ | ||
1023 | bfr->pgsz_idx = i; | 984 | bfr->pgsz_idx = i; |
1024 | break; | 985 | break; |
1025 | } | 986 | } |
1026 | } | 987 | } |
1027 | 988 | ||
1028 | static int setup_buffer_kind_and_compression(struct device *d, | 989 | static int setup_buffer_kind_and_compression(struct vm_gk20a *vm, |
1029 | u32 flags, | 990 | u32 flags, |
1030 | struct buffer_attrs *bfr, | 991 | struct buffer_attrs *bfr, |
1031 | enum gmmu_pgsz_gk20a pgsz_idx) | 992 | enum gmmu_pgsz_gk20a pgsz_idx) |
1032 | { | 993 | { |
1033 | bool kind_compressible; | 994 | bool kind_compressible; |
995 | struct device *d = dev_from_gk20a(vm->mm->g); | ||
1034 | 996 | ||
1035 | if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v())) | 997 | if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v())) |
1036 | bfr->kind_v = gmmu_pte_kind_pitch_v(); | 998 | bfr->kind_v = gmmu_pte_kind_pitch_v(); |
@@ -1055,7 +1017,7 @@ static int setup_buffer_kind_and_compression(struct device *d, | |||
1055 | } | 1017 | } |
1056 | /* comptags only supported for suitable kinds, 128KB pagesize */ | 1018 | /* comptags only supported for suitable kinds, 128KB pagesize */ |
1057 | if (unlikely(kind_compressible && | 1019 | if (unlikely(kind_compressible && |
1058 | (gmmu_page_sizes[pgsz_idx] != 128*1024))) { | 1020 | (vm->gmmu_page_sizes[pgsz_idx] != 128*1024))) { |
1059 | /* | 1021 | /* |
1060 | gk20a_warn(d, "comptags specified" | 1022 | gk20a_warn(d, "comptags specified" |
1061 | " but pagesize being used doesn't support it");*/ | 1023 | " but pagesize being used doesn't support it");*/ |
@@ -1088,7 +1050,7 @@ static int validate_fixed_buffer(struct vm_gk20a *vm, | |||
1088 | return -EINVAL; | 1050 | return -EINVAL; |
1089 | } | 1051 | } |
1090 | 1052 | ||
1091 | if (map_offset & gmmu_page_offset_masks[bfr->pgsz_idx]) { | 1053 | if (map_offset & (vm->gmmu_page_sizes[bfr->pgsz_idx] - 1)) { |
1092 | gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx", | 1054 | gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx", |
1093 | map_offset); | 1055 | map_offset); |
1094 | return -EINVAL; | 1056 | return -EINVAL; |
@@ -1381,7 +1343,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1381 | gmmu_page_size_big : gmmu_page_size_small; | 1343 | gmmu_page_size_big : gmmu_page_size_small; |
1382 | } else { | 1344 | } else { |
1383 | if (vm->big_pages) | 1345 | if (vm->big_pages) |
1384 | gmmu_select_page_size(&bfr); | 1346 | gmmu_select_page_size(vm, &bfr); |
1385 | else | 1347 | else |
1386 | bfr.pgsz_idx = gmmu_page_size_small; | 1348 | bfr.pgsz_idx = gmmu_page_size_small; |
1387 | } | 1349 | } |
@@ -1398,7 +1360,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1398 | err = -EINVAL; | 1360 | err = -EINVAL; |
1399 | goto clean_up; | 1361 | goto clean_up; |
1400 | } | 1362 | } |
1401 | gmmu_page_size = gmmu_page_sizes[bfr.pgsz_idx]; | 1363 | gmmu_page_size = vm->gmmu_page_sizes[bfr.pgsz_idx]; |
1402 | 1364 | ||
1403 | /* Check if we should use a fixed offset for mapping this buffer */ | 1365 | /* Check if we should use a fixed offset for mapping this buffer */ |
1404 | 1366 | ||
@@ -1416,7 +1378,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1416 | if (sgt) | 1378 | if (sgt) |
1417 | *sgt = bfr.sgt; | 1379 | *sgt = bfr.sgt; |
1418 | 1380 | ||
1419 | err = setup_buffer_kind_and_compression(d, flags, &bfr, bfr.pgsz_idx); | 1381 | err = setup_buffer_kind_and_compression(vm, flags, &bfr, bfr.pgsz_idx); |
1420 | if (unlikely(err)) { | 1382 | if (unlikely(err)) { |
1421 | gk20a_err(d, "failure setting up kind and compression"); | 1383 | gk20a_err(d, "failure setting up kind and compression"); |
1422 | goto clean_up; | 1384 | goto clean_up; |
@@ -1729,7 +1691,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
1729 | u32 pte_w[2] = {0, 0}; /* invalid pte */ | 1691 | u32 pte_w[2] = {0, 0}; /* invalid pte */ |
1730 | u32 ctag = ctag_offset; | 1692 | u32 ctag = ctag_offset; |
1731 | u32 ctag_incr; | 1693 | u32 ctag_incr; |
1732 | u32 page_size = gmmu_page_sizes[pgsz_idx]; | 1694 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; |
1733 | u64 addr = 0; | 1695 | u64 addr = 0; |
1734 | u64 space_to_skip = buffer_offset; | 1696 | u64 space_to_skip = buffer_offset; |
1735 | 1697 | ||
@@ -1783,7 +1745,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
1783 | pte_lo = 0; | 1745 | pte_lo = 0; |
1784 | 1746 | ||
1785 | if ((pde_i != pde_hi) && (pde_hi != pde_lo)) | 1747 | if ((pde_i != pde_hi) && (pde_hi != pde_lo)) |
1786 | pte_hi = vm->mm->page_table_sizing[pgsz_idx].num_ptes-1; | 1748 | pte_hi = vm->page_table_sizing[pgsz_idx].num_ptes-1; |
1787 | else | 1749 | else |
1788 | pte_hi = pte_index_from_vaddr(vm, last_vaddr, | 1750 | pte_hi = pte_index_from_vaddr(vm, last_vaddr, |
1789 | pgsz_idx); | 1751 | pgsz_idx); |
@@ -1863,7 +1825,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
1863 | * free/alloc/free/alloc. | 1825 | * free/alloc/free/alloc. |
1864 | */ | 1826 | */ |
1865 | free_gmmu_pages(vm, pte->ref, pte->sgt, | 1827 | free_gmmu_pages(vm, pte->ref, pte->sgt, |
1866 | vm->mm->page_table_sizing[pgsz_idx].order, | 1828 | vm->page_table_sizing[pgsz_idx].order, |
1867 | pte->size); | 1829 | pte->size); |
1868 | pte->ref = NULL; | 1830 | pte->ref = NULL; |
1869 | 1831 | ||
@@ -1973,7 +1935,7 @@ static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr, | |||
1973 | { | 1935 | { |
1974 | struct mm_gk20a *mm = vm->mm; | 1936 | struct mm_gk20a *mm = vm->mm; |
1975 | struct gk20a *g = mm->g; | 1937 | struct gk20a *g = mm->g; |
1976 | u32 pgsz = gmmu_page_sizes[pgsz_idx]; | 1938 | u32 pgsz = vm->gmmu_page_sizes[pgsz_idx]; |
1977 | u32 i; | 1939 | u32 i; |
1978 | dma_addr_t iova; | 1940 | dma_addr_t iova; |
1979 | 1941 | ||
@@ -1981,7 +1943,7 @@ static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr, | |||
1981 | if (!vm->zero_page_cpuva) { | 1943 | if (!vm->zero_page_cpuva) { |
1982 | int err = 0; | 1944 | int err = 0; |
1983 | vm->zero_page_cpuva = dma_alloc_coherent(&g->dev->dev, | 1945 | vm->zero_page_cpuva = dma_alloc_coherent(&g->dev->dev, |
1984 | mm->big_page_size, | 1946 | vm->big_page_size, |
1985 | &iova, | 1947 | &iova, |
1986 | GFP_KERNEL); | 1948 | GFP_KERNEL); |
1987 | if (!vm->zero_page_cpuva) { | 1949 | if (!vm->zero_page_cpuva) { |
@@ -1992,9 +1954,9 @@ static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr, | |||
1992 | vm->zero_page_iova = iova; | 1954 | vm->zero_page_iova = iova; |
1993 | err = gk20a_get_sgtable(&g->dev->dev, &vm->zero_page_sgt, | 1955 | err = gk20a_get_sgtable(&g->dev->dev, &vm->zero_page_sgt, |
1994 | vm->zero_page_cpuva, vm->zero_page_iova, | 1956 | vm->zero_page_cpuva, vm->zero_page_iova, |
1995 | mm->big_page_size); | 1957 | vm->big_page_size); |
1996 | if (err) { | 1958 | if (err) { |
1997 | dma_free_coherent(&g->dev->dev, mm->big_page_size, | 1959 | dma_free_coherent(&g->dev->dev, vm->big_page_size, |
1998 | vm->zero_page_cpuva, | 1960 | vm->zero_page_cpuva, |
1999 | vm->zero_page_iova); | 1961 | vm->zero_page_iova); |
2000 | vm->zero_page_iova = 0; | 1962 | vm->zero_page_iova = 0; |
@@ -2058,7 +2020,7 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) | |||
2058 | u64 vaddr = mapped_buffer->addr; | 2020 | u64 vaddr = mapped_buffer->addr; |
2059 | u32 pgsz_idx = mapped_buffer->pgsz_idx; | 2021 | u32 pgsz_idx = mapped_buffer->pgsz_idx; |
2060 | u32 num_pages = mapped_buffer->size >> | 2022 | u32 num_pages = mapped_buffer->size >> |
2061 | gmmu_page_shifts[pgsz_idx]; | 2023 | ilog2(vm->gmmu_page_sizes[pgsz_idx]); |
2062 | 2024 | ||
2063 | /* there is little we can do if this fails... */ | 2025 | /* there is little we can do if this fails... */ |
2064 | if (g->ops.mm.put_empty) { | 2026 | if (g->ops.mm.put_empty) { |
@@ -2082,7 +2044,8 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) | |||
2082 | gk20a_mem_flag_none); | 2044 | gk20a_mem_flag_none); |
2083 | 2045 | ||
2084 | gk20a_dbg(gpu_dbg_map, "as=%d pgsz=%d gv=0x%x,%08x own_mem_ref=%d", | 2046 | gk20a_dbg(gpu_dbg_map, "as=%d pgsz=%d gv=0x%x,%08x own_mem_ref=%d", |
2085 | vm_aspace_id(vm), gmmu_page_sizes[mapped_buffer->pgsz_idx], | 2047 | vm_aspace_id(vm), |
2048 | vm->gmmu_page_sizes[mapped_buffer->pgsz_idx], | ||
2086 | hi32(mapped_buffer->addr), lo32(mapped_buffer->addr), | 2049 | hi32(mapped_buffer->addr), lo32(mapped_buffer->addr), |
2087 | mapped_buffer->own_mem_ref); | 2050 | mapped_buffer->own_mem_ref); |
2088 | 2051 | ||
@@ -2159,14 +2122,14 @@ void gk20a_vm_remove_support(struct vm_gk20a *vm) | |||
2159 | &vm->pdes.ptes[gmmu_page_size_small][i]; | 2122 | &vm->pdes.ptes[gmmu_page_size_small][i]; |
2160 | if (pte->ref) { | 2123 | if (pte->ref) { |
2161 | free_gmmu_pages(vm, pte->ref, pte->sgt, | 2124 | free_gmmu_pages(vm, pte->ref, pte->sgt, |
2162 | vm->mm->page_table_sizing[gmmu_page_size_small].order, | 2125 | vm->page_table_sizing[gmmu_page_size_small].order, |
2163 | pte->size); | 2126 | pte->size); |
2164 | pte->ref = NULL; | 2127 | pte->ref = NULL; |
2165 | } | 2128 | } |
2166 | pte = &vm->pdes.ptes[gmmu_page_size_big][i]; | 2129 | pte = &vm->pdes.ptes[gmmu_page_size_big][i]; |
2167 | if (pte->ref) { | 2130 | if (pte->ref) { |
2168 | free_gmmu_pages(vm, pte->ref, pte->sgt, | 2131 | free_gmmu_pages(vm, pte->ref, pte->sgt, |
2169 | vm->mm->page_table_sizing[gmmu_page_size_big].order, | 2132 | vm->page_table_sizing[gmmu_page_size_big].order, |
2170 | pte->size); | 2133 | pte->size); |
2171 | pte->ref = NULL; | 2134 | pte->ref = NULL; |
2172 | } | 2135 | } |
@@ -2184,7 +2147,7 @@ void gk20a_vm_remove_support(struct vm_gk20a *vm) | |||
2184 | 2147 | ||
2185 | /* release zero page if used */ | 2148 | /* release zero page if used */ |
2186 | if (vm->zero_page_cpuva) | 2149 | if (vm->zero_page_cpuva) |
2187 | dma_free_coherent(&g->dev->dev, vm->mm->big_page_size, | 2150 | dma_free_coherent(&g->dev->dev, vm->big_page_size, |
2188 | vm->zero_page_cpuva, vm->zero_page_iova); | 2151 | vm->zero_page_cpuva, vm->zero_page_iova); |
2189 | 2152 | ||
2190 | /* vm is not used anymore. release it. */ | 2153 | /* vm is not used anymore. release it. */ |
@@ -2208,34 +2171,62 @@ void gk20a_vm_put(struct vm_gk20a *vm) | |||
2208 | kref_put(&vm->ref, gk20a_vm_remove_support_kref); | 2171 | kref_put(&vm->ref, gk20a_vm_remove_support_kref); |
2209 | } | 2172 | } |
2210 | 2173 | ||
2211 | /* address space interfaces for the gk20a module */ | 2174 | static int gk20a_init_vm(struct mm_gk20a *mm, |
2212 | int gk20a_vm_alloc_share(struct gk20a_as_share *as_share) | 2175 | struct vm_gk20a *vm, |
2176 | u64 low_hole, | ||
2177 | u64 aperture_size, | ||
2178 | bool big_pages, | ||
2179 | char *name) | ||
2213 | { | 2180 | { |
2214 | struct gk20a_as *as = as_share->as; | 2181 | int err, i; |
2215 | struct gk20a *g = gk20a_from_as(as); | ||
2216 | struct mm_gk20a *mm = &g->mm; | ||
2217 | struct vm_gk20a *vm; | ||
2218 | u64 vma_size; | ||
2219 | u32 num_pages, low_hole_pages; | 2182 | u32 num_pages, low_hole_pages; |
2220 | char name[32]; | 2183 | char alloc_name[32]; |
2221 | int err; | 2184 | size_t vma_size; |
2222 | 2185 | ||
2223 | gk20a_dbg_fn(""); | 2186 | /* note: keep the page sizes sorted lowest to highest here */ |
2187 | u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K }; | ||
2224 | 2188 | ||
2225 | vm = kzalloc(sizeof(*vm), GFP_KERNEL); | 2189 | vm->mm = mm; |
2226 | if (!vm) | ||
2227 | return -ENOMEM; | ||
2228 | 2190 | ||
2229 | as_share->vm = vm; | 2191 | vm->va_start = low_hole; |
2192 | vm->va_limit = aperture_size; | ||
2193 | vm->big_pages = big_pages; | ||
2230 | 2194 | ||
2231 | vm->mm = mm; | 2195 | vm->big_page_size = gmmu_page_sizes[gmmu_page_size_big]; |
2232 | vm->as_share = as_share; | 2196 | vm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big]; |
2197 | vm->pde_stride = vm->big_page_size << 10; | ||
2198 | vm->pde_stride_shift = ilog2(vm->pde_stride); | ||
2199 | |||
2200 | for (i = 0; i < gmmu_nr_page_sizes; i++) { | ||
2201 | u32 num_ptes, pte_space, num_pages; | ||
2202 | |||
2203 | vm->gmmu_page_sizes[i] = gmmu_page_sizes[i]; | ||
2204 | |||
2205 | /* assuming "full" page tables */ | ||
2206 | num_ptes = vm->pde_stride / gmmu_page_sizes[i]; | ||
2207 | |||
2208 | pte_space = num_ptes * gmmu_pte__size_v(); | ||
2209 | /* allocate whole pages */ | ||
2210 | pte_space = roundup(pte_space, PAGE_SIZE); | ||
2211 | |||
2212 | num_pages = pte_space / PAGE_SIZE; | ||
2213 | /* make sure "order" is viable */ | ||
2214 | BUG_ON(!is_power_of_2(num_pages)); | ||
2233 | 2215 | ||
2234 | vm->big_pages = true; | 2216 | vm->page_table_sizing[i].num_ptes = num_ptes; |
2217 | vm->page_table_sizing[i].order = ilog2(num_pages); | ||
2218 | } | ||
2219 | |||
2220 | gk20a_dbg_info("small page-size (%dKB) pte array: %dKB", | ||
2221 | vm->gmmu_page_sizes[gmmu_page_size_small] >> 10, | ||
2222 | (vm->page_table_sizing[gmmu_page_size_small].num_ptes * | ||
2223 | gmmu_pte__size_v()) >> 10); | ||
2224 | |||
2225 | gk20a_dbg_info("big page-size (%dKB) pte array: %dKB", | ||
2226 | vm->gmmu_page_sizes[gmmu_page_size_big] >> 10, | ||
2227 | (vm->page_table_sizing[gmmu_page_size_big].num_ptes * | ||
2228 | gmmu_pte__size_v()) >> 10); | ||
2235 | 2229 | ||
2236 | vm->va_start = mm->pde_stride; /* create a one pde hole */ | ||
2237 | vm->va_limit = mm->channel.size; /* note this means channel.size is | ||
2238 | really just the max */ | ||
2239 | { | 2230 | { |
2240 | u32 pde_lo, pde_hi; | 2231 | u32 pde_lo, pde_hi; |
2241 | pde_range_from_vaddr_range(vm, | 2232 | pde_range_from_vaddr_range(vm, |
@@ -2248,61 +2239,75 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share) | |||
2248 | kzalloc(sizeof(struct page_table_gk20a) * | 2239 | kzalloc(sizeof(struct page_table_gk20a) * |
2249 | vm->pdes.num_pdes, GFP_KERNEL); | 2240 | vm->pdes.num_pdes, GFP_KERNEL); |
2250 | 2241 | ||
2242 | if (!vm->pdes.ptes[gmmu_page_size_small]) { | ||
2243 | err = -ENOMEM; | ||
2244 | goto clean_up_pdes; | ||
2245 | } | ||
2246 | |||
2251 | vm->pdes.ptes[gmmu_page_size_big] = | 2247 | vm->pdes.ptes[gmmu_page_size_big] = |
2252 | kzalloc(sizeof(struct page_table_gk20a) * | 2248 | kzalloc(sizeof(struct page_table_gk20a) * |
2253 | vm->pdes.num_pdes, GFP_KERNEL); | 2249 | vm->pdes.num_pdes, GFP_KERNEL); |
2254 | 2250 | ||
2255 | if (!(vm->pdes.ptes[gmmu_page_size_small] && | 2251 | if (!vm->pdes.ptes[gmmu_page_size_big]) { |
2256 | vm->pdes.ptes[gmmu_page_size_big])) | 2252 | err = -ENOMEM; |
2257 | return -ENOMEM; | 2253 | goto clean_up_pdes; |
2254 | } | ||
2258 | 2255 | ||
2259 | gk20a_dbg_info("init space for va_limit=0x%llx num_pdes=%d", | 2256 | gk20a_dbg_info("init space for %s va_limit=0x%llx num_pdes=%d", |
2260 | vm->va_limit, vm->pdes.num_pdes); | 2257 | name, vm->va_limit, vm->pdes.num_pdes); |
2261 | 2258 | ||
2262 | /* allocate the page table directory */ | 2259 | /* allocate the page table directory */ |
2263 | err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref, | 2260 | err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref, |
2264 | &vm->pdes.sgt, &vm->pdes.size); | 2261 | &vm->pdes.sgt, &vm->pdes.size); |
2265 | if (err) | 2262 | if (err) |
2266 | return -ENOMEM; | 2263 | goto clean_up_pdes; |
2267 | 2264 | ||
2268 | err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv, | 2265 | err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv, |
2269 | vm->pdes.size); | 2266 | vm->pdes.size); |
2270 | if (err) { | 2267 | if (err) { |
2271 | free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, | 2268 | goto clean_up_ptes; |
2272 | vm->pdes.size); | ||
2273 | return -ENOMEM; | ||
2274 | } | 2269 | } |
2275 | gk20a_dbg(gpu_dbg_pte, "pdes.kv = 0x%p, pdes.phys = 0x%llx", | 2270 | gk20a_dbg(gpu_dbg_pte, "bar 1 pdes.kv = 0x%p, pdes.phys = 0x%llx", |
2276 | vm->pdes.kv, | 2271 | vm->pdes.kv, gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); |
2277 | gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); | ||
2278 | /* we could release vm->pdes.kv but it's only one page... */ | 2272 | /* we could release vm->pdes.kv but it's only one page... */ |
2279 | 2273 | ||
2280 | |||
2281 | /* low-half: alloc small pages */ | 2274 | /* low-half: alloc small pages */ |
2282 | /* high-half: alloc big pages */ | 2275 | /* high-half: alloc big pages */ |
2283 | vma_size = mm->channel.size >> 1; | 2276 | vma_size = vm->va_limit; |
2277 | if (big_pages) | ||
2278 | vma_size /= 2; | ||
2284 | 2279 | ||
2285 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, | 2280 | num_pages = (u32)(vma_size >> |
2286 | gmmu_page_sizes[gmmu_page_size_small]>>10); | 2281 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_small])); |
2287 | num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_small]); | ||
2288 | 2282 | ||
2289 | /* num_pages above is without regard to the low-side hole. */ | 2283 | /* num_pages above is without regard to the low-side hole. */ |
2290 | low_hole_pages = (vm->va_start >> | 2284 | low_hole_pages = (vm->va_start >> |
2291 | gmmu_page_shifts[gmmu_page_size_small]); | 2285 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_small])); |
2292 | 2286 | ||
2293 | gk20a_allocator_init(&vm->vma[gmmu_page_size_small], name, | 2287 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, |
2294 | low_hole_pages, /* start */ | 2288 | vm->gmmu_page_sizes[gmmu_page_size_small]>>10); |
2295 | num_pages - low_hole_pages, /* length */ | 2289 | err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small], |
2296 | 1); /* align */ | 2290 | alloc_name, |
2297 | 2291 | low_hole_pages, /*start*/ | |
2298 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, | 2292 | num_pages - low_hole_pages,/* length*/ |
2299 | gmmu_page_sizes[gmmu_page_size_big]>>10); | 2293 | 1); /* align */ |
2300 | 2294 | if (err) | |
2301 | num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_big]); | 2295 | goto clean_up_map_pde; |
2302 | gk20a_allocator_init(&vm->vma[gmmu_page_size_big], name, | 2296 | |
2303 | num_pages, /* start */ | 2297 | if (big_pages) { |
2304 | num_pages, /* length */ | 2298 | num_pages = (u32)((vm->va_limit / 2) >> |
2305 | 1); /* align */ | 2299 | ilog2(vm->gmmu_page_sizes[gmmu_page_size_big])); |
2300 | |||
2301 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", | ||
2302 | name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10); | ||
2303 | err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big], | ||
2304 | alloc_name, | ||
2305 | num_pages, /* start */ | ||
2306 | num_pages, /* length */ | ||
2307 | 1); /* align */ | ||
2308 | if (err) | ||
2309 | goto clean_up_small_allocator; | ||
2310 | } | ||
2306 | 2311 | ||
2307 | vm->mapped_buffers = RB_ROOT; | 2312 | vm->mapped_buffers = RB_ROOT; |
2308 | 2313 | ||
@@ -2310,12 +2315,48 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share) | |||
2310 | kref_init(&vm->ref); | 2315 | kref_init(&vm->ref); |
2311 | INIT_LIST_HEAD(&vm->reserved_va_list); | 2316 | INIT_LIST_HEAD(&vm->reserved_va_list); |
2312 | 2317 | ||
2318 | return 0; | ||
2319 | |||
2320 | clean_up_small_allocator: | ||
2321 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); | ||
2322 | clean_up_map_pde: | ||
2323 | unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv); | ||
2324 | clean_up_ptes: | ||
2325 | free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, | ||
2326 | vm->pdes.size); | ||
2327 | clean_up_pdes: | ||
2328 | kfree(vm->pdes.ptes[gmmu_page_size_small]); | ||
2329 | kfree(vm->pdes.ptes[gmmu_page_size_big]); | ||
2330 | return err; | ||
2331 | } | ||
2332 | |||
2333 | /* address space interfaces for the gk20a module */ | ||
2334 | int gk20a_vm_alloc_share(struct gk20a_as_share *as_share) | ||
2335 | { | ||
2336 | struct gk20a_as *as = as_share->as; | ||
2337 | struct gk20a *g = gk20a_from_as(as); | ||
2338 | struct mm_gk20a *mm = &g->mm; | ||
2339 | struct vm_gk20a *vm; | ||
2340 | char name[32]; | ||
2341 | int err; | ||
2342 | |||
2343 | gk20a_dbg_fn(""); | ||
2344 | |||
2345 | vm = kzalloc(sizeof(*vm), GFP_KERNEL); | ||
2346 | if (!vm) | ||
2347 | return -ENOMEM; | ||
2348 | |||
2349 | as_share->vm = vm; | ||
2350 | vm->as_share = as_share; | ||
2313 | vm->enable_ctag = true; | 2351 | vm->enable_ctag = true; |
2314 | 2352 | ||
2353 | snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id); | ||
2354 | err = gk20a_init_vm(mm, vm, | ||
2355 | SZ_128K << 10, mm->channel.size, true, name); | ||
2356 | |||
2315 | return 0; | 2357 | return 0; |
2316 | } | 2358 | } |
2317 | 2359 | ||
2318 | |||
2319 | int gk20a_vm_release_share(struct gk20a_as_share *as_share) | 2360 | int gk20a_vm_release_share(struct gk20a_as_share *as_share) |
2320 | { | 2361 | { |
2321 | struct vm_gk20a *vm = as_share->vm; | 2362 | struct vm_gk20a *vm = as_share->vm; |
@@ -2353,7 +2394,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
2353 | for (pgsz_idx = gmmu_page_size_small; | 2394 | for (pgsz_idx = gmmu_page_size_small; |
2354 | pgsz_idx < gmmu_nr_page_sizes; | 2395 | pgsz_idx < gmmu_nr_page_sizes; |
2355 | pgsz_idx++) { | 2396 | pgsz_idx++) { |
2356 | if (gmmu_page_sizes[pgsz_idx] == args->page_size) | 2397 | if (vm->gmmu_page_sizes[pgsz_idx] == args->page_size) |
2357 | break; | 2398 | break; |
2358 | } | 2399 | } |
2359 | 2400 | ||
@@ -2378,7 +2419,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
2378 | start_page_nr = 0; | 2419 | start_page_nr = 0; |
2379 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) | 2420 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) |
2380 | start_page_nr = (u32)(args->o_a.offset >> | 2421 | start_page_nr = (u32)(args->o_a.offset >> |
2381 | gmmu_page_shifts[pgsz_idx]); | 2422 | ilog2(vm->gmmu_page_sizes[pgsz_idx])); |
2382 | 2423 | ||
2383 | vma = &vm->vma[pgsz_idx]; | 2424 | vma = &vm->vma[pgsz_idx]; |
2384 | err = vma->alloc(vma, &start_page_nr, args->pages); | 2425 | err = vma->alloc(vma, &start_page_nr, args->pages); |
@@ -2387,7 +2428,8 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
2387 | goto clean_up; | 2428 | goto clean_up; |
2388 | } | 2429 | } |
2389 | 2430 | ||
2390 | vaddr_start = (u64)start_page_nr << gmmu_page_shifts[pgsz_idx]; | 2431 | vaddr_start = (u64)start_page_nr << |
2432 | ilog2(vm->gmmu_page_sizes[pgsz_idx]); | ||
2391 | 2433 | ||
2392 | va_node->vaddr_start = vaddr_start; | 2434 | va_node->vaddr_start = vaddr_start; |
2393 | va_node->size = (u64)args->page_size * (u64)args->pages; | 2435 | va_node->size = (u64)args->page_size * (u64)args->pages; |
@@ -2438,7 +2480,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
2438 | for (pgsz_idx = gmmu_page_size_small; | 2480 | for (pgsz_idx = gmmu_page_size_small; |
2439 | pgsz_idx < gmmu_nr_page_sizes; | 2481 | pgsz_idx < gmmu_nr_page_sizes; |
2440 | pgsz_idx++) { | 2482 | pgsz_idx++) { |
2441 | if (gmmu_page_sizes[pgsz_idx] == args->page_size) | 2483 | if (vm->gmmu_page_sizes[pgsz_idx] == args->page_size) |
2442 | break; | 2484 | break; |
2443 | } | 2485 | } |
2444 | 2486 | ||
@@ -2448,7 +2490,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
2448 | } | 2490 | } |
2449 | 2491 | ||
2450 | start_page_nr = (u32)(args->offset >> | 2492 | start_page_nr = (u32)(args->offset >> |
2451 | gmmu_page_shifts[pgsz_idx]); | 2493 | ilog2(vm->gmmu_page_sizes[pgsz_idx])); |
2452 | 2494 | ||
2453 | vma = &vm->vma[pgsz_idx]; | 2495 | vma = &vm->vma[pgsz_idx]; |
2454 | err = vma->free(vma, start_page_nr, args->pages); | 2496 | err = vma->free(vma, start_page_nr, args->pages); |
@@ -2643,6 +2685,17 @@ int gk20a_vm_unmap_buffer(struct gk20a_as_share *as_share, u64 offset) | |||
2643 | return 0; | 2685 | return 0; |
2644 | } | 2686 | } |
2645 | 2687 | ||
2688 | static void gk20a_deinit_vm(struct vm_gk20a *vm) | ||
2689 | { | ||
2690 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); | ||
2691 | gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); | ||
2692 | unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv); | ||
2693 | free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, | ||
2694 | vm->pdes.size); | ||
2695 | kfree(vm->pdes.ptes[gmmu_page_size_small]); | ||
2696 | kfree(vm->pdes.ptes[gmmu_page_size_big]); | ||
2697 | } | ||
2698 | |||
2646 | static int gk20a_init_bar1_vm(struct mm_gk20a *mm) | 2699 | static int gk20a_init_bar1_vm(struct mm_gk20a *mm) |
2647 | { | 2700 | { |
2648 | int err; | 2701 | int err; |
@@ -2650,73 +2703,24 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm) | |||
2650 | void *inst_ptr; | 2703 | void *inst_ptr; |
2651 | struct vm_gk20a *vm = &mm->bar1.vm; | 2704 | struct vm_gk20a *vm = &mm->bar1.vm; |
2652 | struct gk20a *g = gk20a_from_mm(mm); | 2705 | struct gk20a *g = gk20a_from_mm(mm); |
2653 | struct device *d = dev_from_gk20a(g); | ||
2654 | struct inst_desc *inst_block = &mm->bar1.inst_block; | ||
2655 | u64 pde_addr; | 2706 | u64 pde_addr; |
2656 | u32 pde_addr_lo; | 2707 | u32 pde_addr_lo; |
2657 | u32 pde_addr_hi; | 2708 | u32 pde_addr_hi; |
2709 | struct device *d = dev_from_gk20a(g); | ||
2710 | struct inst_desc *inst_block = &mm->bar1.inst_block; | ||
2658 | dma_addr_t iova; | 2711 | dma_addr_t iova; |
2659 | 2712 | ||
2660 | vm->mm = mm; | ||
2661 | |||
2662 | mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; | 2713 | mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; |
2663 | |||
2664 | gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); | 2714 | gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); |
2715 | gk20a_init_vm(mm, vm, SZ_4K, mm->bar1.aperture_size, false, "bar1"); | ||
2665 | 2716 | ||
2666 | vm->va_start = mm->pde_stride * 1; | 2717 | gk20a_dbg_info("pde pa=0x%llx", |
2667 | vm->va_limit = mm->bar1.aperture_size; | 2718 | (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); |
2668 | |||
2669 | { | ||
2670 | u32 pde_lo, pde_hi; | ||
2671 | pde_range_from_vaddr_range(vm, | ||
2672 | 0, vm->va_limit-1, | ||
2673 | &pde_lo, &pde_hi); | ||
2674 | vm->pdes.num_pdes = pde_hi + 1; | ||
2675 | } | ||
2676 | |||
2677 | /* bar1 is likely only to ever use/need small page sizes. */ | ||
2678 | /* But just in case, for now... arrange for both.*/ | ||
2679 | vm->pdes.ptes[gmmu_page_size_small] = | ||
2680 | kzalloc(sizeof(struct page_table_gk20a) * | ||
2681 | vm->pdes.num_pdes, GFP_KERNEL); | ||
2682 | |||
2683 | vm->pdes.ptes[gmmu_page_size_big] = | ||
2684 | kzalloc(sizeof(struct page_table_gk20a) * | ||
2685 | vm->pdes.num_pdes, GFP_KERNEL); | ||
2686 | |||
2687 | if (!(vm->pdes.ptes[gmmu_page_size_small] && | ||
2688 | vm->pdes.ptes[gmmu_page_size_big])) | ||
2689 | return -ENOMEM; | ||
2690 | |||
2691 | gk20a_dbg_info("init space for bar1 va_limit=0x%llx num_pdes=%d", | ||
2692 | vm->va_limit, vm->pdes.num_pdes); | ||
2693 | |||
2694 | |||
2695 | /* allocate the page table directory */ | ||
2696 | err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref, | ||
2697 | &vm->pdes.sgt, &vm->pdes.size); | ||
2698 | if (err) | ||
2699 | goto clean_up; | ||
2700 | |||
2701 | err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv, | ||
2702 | vm->pdes.size); | ||
2703 | if (err) { | ||
2704 | free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, | ||
2705 | vm->pdes.size); | ||
2706 | goto clean_up; | ||
2707 | } | ||
2708 | gk20a_dbg(gpu_dbg_pte, "bar 1 pdes.kv = 0x%p, pdes.phys = 0x%llx", | ||
2709 | vm->pdes.kv, gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); | ||
2710 | /* we could release vm->pdes.kv but it's only one page... */ | ||
2711 | 2719 | ||
2712 | pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl); | 2720 | pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl); |
2713 | pde_addr_lo = u64_lo32(pde_addr >> 12); | 2721 | pde_addr_lo = u64_lo32(pde_addr >> ram_in_base_shift_v()); |
2714 | pde_addr_hi = u64_hi32(pde_addr); | 2722 | pde_addr_hi = u64_hi32(pde_addr); |
2715 | 2723 | ||
2716 | gk20a_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x", | ||
2717 | (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl), | ||
2718 | pde_addr_lo, pde_addr_hi); | ||
2719 | |||
2720 | /* allocate instance mem for bar1 */ | 2724 | /* allocate instance mem for bar1 */ |
2721 | inst_block->size = ram_in_alloc_size_v(); | 2725 | inst_block->size = ram_in_alloc_size_v(); |
2722 | inst_block->cpuva = dma_alloc_coherent(d, inst_block->size, | 2726 | inst_block->cpuva = dma_alloc_coherent(d, inst_block->size, |
@@ -2724,7 +2728,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm) | |||
2724 | if (!inst_block->cpuva) { | 2728 | if (!inst_block->cpuva) { |
2725 | gk20a_err(d, "%s: memory allocation failed\n", __func__); | 2729 | gk20a_err(d, "%s: memory allocation failed\n", __func__); |
2726 | err = -ENOMEM; | 2730 | err = -ENOMEM; |
2727 | goto clean_up; | 2731 | goto clean_up_va; |
2728 | } | 2732 | } |
2729 | 2733 | ||
2730 | inst_block->iova = iova; | 2734 | inst_block->iova = iova; |
@@ -2732,7 +2736,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm) | |||
2732 | if (!inst_block->cpu_pa) { | 2736 | if (!inst_block->cpu_pa) { |
2733 | gk20a_err(d, "%s: failed to get phys address\n", __func__); | 2737 | gk20a_err(d, "%s: failed to get phys address\n", __func__); |
2734 | err = -ENOMEM; | 2738 | err = -ENOMEM; |
2735 | goto clean_up; | 2739 | goto clean_up_inst_block; |
2736 | } | 2740 | } |
2737 | 2741 | ||
2738 | inst_pa = inst_block->cpu_pa; | 2742 | inst_pa = inst_block->cpu_pa; |
@@ -2741,7 +2745,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm) | |||
2741 | gk20a_dbg_info("bar1 inst block physical phys = 0x%llx, kv = 0x%p", | 2745 | gk20a_dbg_info("bar1 inst block physical phys = 0x%llx, kv = 0x%p", |
2742 | (u64)inst_pa, inst_ptr); | 2746 | (u64)inst_pa, inst_ptr); |
2743 | 2747 | ||
2744 | memset(inst_ptr, 0, ram_fc_size_val_v()); | 2748 | memset(inst_ptr, 0, inst_block->size); |
2745 | 2749 | ||
2746 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), | 2750 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), |
2747 | ram_in_page_dir_base_target_vid_mem_f() | | 2751 | ram_in_page_dir_base_target_vid_mem_f() | |
@@ -2758,31 +2762,16 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm) | |||
2758 | ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); | 2762 | ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); |
2759 | 2763 | ||
2760 | gk20a_dbg_info("bar1 inst block ptr: %08llx", (u64)inst_pa); | 2764 | gk20a_dbg_info("bar1 inst block ptr: %08llx", (u64)inst_pa); |
2761 | gk20a_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_bar1", | ||
2762 | 1,/*start*/ | ||
2763 | (vm->va_limit >> 12) - 1 /* length*/, | ||
2764 | 1); /* align */ | ||
2765 | /* initialize just in case we try to use it anyway */ | ||
2766 | gk20a_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_bar1-unused", | ||
2767 | 0x0badc0de, /* start */ | ||
2768 | 1, /* length */ | ||
2769 | 1); /* align */ | ||
2770 | |||
2771 | vm->mapped_buffers = RB_ROOT; | ||
2772 | |||
2773 | mutex_init(&vm->update_gmmu_lock); | ||
2774 | kref_init(&vm->ref); | ||
2775 | INIT_LIST_HEAD(&vm->reserved_va_list); | ||
2776 | |||
2777 | return 0; | 2765 | return 0; |
2778 | 2766 | ||
2779 | clean_up: | 2767 | clean_up_inst_block: |
2780 | /* free, etc */ | ||
2781 | if (inst_block->cpuva) | 2768 | if (inst_block->cpuva) |
2782 | dma_free_coherent(d, inst_block->size, | 2769 | dma_free_coherent(d, inst_block->size, |
2783 | inst_block->cpuva, inst_block->iova); | 2770 | inst_block->cpuva, inst_block->iova); |
2784 | inst_block->cpuva = NULL; | 2771 | inst_block->cpuva = NULL; |
2785 | inst_block->iova = 0; | 2772 | inst_block->iova = 0; |
2773 | clean_up_va: | ||
2774 | gk20a_deinit_vm(vm); | ||
2786 | return err; | 2775 | return err; |
2787 | } | 2776 | } |
2788 | 2777 | ||
@@ -2794,79 +2783,34 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm) | |||
2794 | void *inst_ptr; | 2783 | void *inst_ptr; |
2795 | struct vm_gk20a *vm = &mm->pmu.vm; | 2784 | struct vm_gk20a *vm = &mm->pmu.vm; |
2796 | struct gk20a *g = gk20a_from_mm(mm); | 2785 | struct gk20a *g = gk20a_from_mm(mm); |
2797 | struct device *d = dev_from_gk20a(g); | ||
2798 | struct inst_desc *inst_block = &mm->pmu.inst_block; | ||
2799 | u64 pde_addr; | 2786 | u64 pde_addr; |
2800 | u32 pde_addr_lo; | 2787 | u32 pde_addr_lo; |
2801 | u32 pde_addr_hi; | 2788 | u32 pde_addr_hi; |
2789 | struct device *d = dev_from_gk20a(g); | ||
2790 | struct inst_desc *inst_block = &mm->pmu.inst_block; | ||
2802 | dma_addr_t iova; | 2791 | dma_addr_t iova; |
2803 | 2792 | ||
2804 | vm->mm = mm; | ||
2805 | |||
2806 | mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; | 2793 | mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; |
2807 | |||
2808 | gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); | 2794 | gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); |
2809 | 2795 | ||
2810 | vm->va_start = GK20A_PMU_VA_START; | 2796 | gk20a_init_vm(mm, vm, |
2811 | vm->va_limit = vm->va_start + mm->pmu.aperture_size; | 2797 | SZ_128K << 10, GK20A_PMU_VA_SIZE, false, "system"); |
2812 | |||
2813 | { | ||
2814 | u32 pde_lo, pde_hi; | ||
2815 | pde_range_from_vaddr_range(vm, | ||
2816 | 0, vm->va_limit-1, | ||
2817 | &pde_lo, &pde_hi); | ||
2818 | vm->pdes.num_pdes = pde_hi + 1; | ||
2819 | } | ||
2820 | |||
2821 | /* The pmu is likely only to ever use/need small page sizes. */ | ||
2822 | /* But just in case, for now... arrange for both.*/ | ||
2823 | vm->pdes.ptes[gmmu_page_size_small] = | ||
2824 | kzalloc(sizeof(struct page_table_gk20a) * | ||
2825 | vm->pdes.num_pdes, GFP_KERNEL); | ||
2826 | |||
2827 | vm->pdes.ptes[gmmu_page_size_big] = | ||
2828 | kzalloc(sizeof(struct page_table_gk20a) * | ||
2829 | vm->pdes.num_pdes, GFP_KERNEL); | ||
2830 | |||
2831 | if (!(vm->pdes.ptes[gmmu_page_size_small] && | ||
2832 | vm->pdes.ptes[gmmu_page_size_big])) | ||
2833 | return -ENOMEM; | ||
2834 | |||
2835 | gk20a_dbg_info("init space for pmu va_limit=0x%llx num_pdes=%d", | ||
2836 | vm->va_limit, vm->pdes.num_pdes); | ||
2837 | |||
2838 | /* allocate the page table directory */ | ||
2839 | err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref, | ||
2840 | &vm->pdes.sgt, &vm->pdes.size); | ||
2841 | if (err) | ||
2842 | goto clean_up; | ||
2843 | 2798 | ||
2844 | err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv, | 2799 | gk20a_dbg_info("pde pa=0x%llx", |
2845 | vm->pdes.size); | 2800 | (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); |
2846 | if (err) { | ||
2847 | free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, | ||
2848 | vm->pdes.size); | ||
2849 | goto clean_up; | ||
2850 | } | ||
2851 | gk20a_dbg_info("pmu pdes phys @ 0x%llx", | ||
2852 | (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); | ||
2853 | /* we could release vm->pdes.kv but it's only one page... */ | ||
2854 | 2801 | ||
2855 | pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl); | 2802 | pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl); |
2856 | pde_addr_lo = u64_lo32(pde_addr >> 12); | 2803 | pde_addr_lo = u64_lo32(pde_addr >> ram_in_base_shift_v()); |
2857 | pde_addr_hi = u64_hi32(pde_addr); | 2804 | pde_addr_hi = u64_hi32(pde_addr); |
2858 | 2805 | ||
2859 | gk20a_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x", | ||
2860 | (u64)pde_addr, pde_addr_lo, pde_addr_hi); | ||
2861 | |||
2862 | /* allocate instance mem for pmu */ | 2806 | /* allocate instance mem for pmu */ |
2863 | inst_block->size = GK20A_PMU_INST_SIZE; | 2807 | inst_block->size = ram_in_alloc_size_v(); |
2864 | inst_block->cpuva = dma_alloc_coherent(d, inst_block->size, | 2808 | inst_block->cpuva = dma_alloc_coherent(d, inst_block->size, |
2865 | &iova, GFP_KERNEL); | 2809 | &iova, GFP_KERNEL); |
2866 | if (!inst_block->cpuva) { | 2810 | if (!inst_block->cpuva) { |
2867 | gk20a_err(d, "%s: memory allocation failed\n", __func__); | 2811 | gk20a_err(d, "%s: memory allocation failed\n", __func__); |
2868 | err = -ENOMEM; | 2812 | err = -ENOMEM; |
2869 | goto clean_up; | 2813 | goto clean_up_va; |
2870 | } | 2814 | } |
2871 | 2815 | ||
2872 | inst_block->iova = iova; | 2816 | inst_block->iova = iova; |
@@ -2874,7 +2818,7 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm) | |||
2874 | if (!inst_block->cpu_pa) { | 2818 | if (!inst_block->cpu_pa) { |
2875 | gk20a_err(d, "%s: failed to get phys address\n", __func__); | 2819 | gk20a_err(d, "%s: failed to get phys address\n", __func__); |
2876 | err = -ENOMEM; | 2820 | err = -ENOMEM; |
2877 | goto clean_up; | 2821 | goto clean_up_inst_block; |
2878 | } | 2822 | } |
2879 | 2823 | ||
2880 | inst_pa = inst_block->cpu_pa; | 2824 | inst_pa = inst_block->cpu_pa; |
@@ -2882,7 +2826,7 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm) | |||
2882 | 2826 | ||
2883 | gk20a_dbg_info("pmu inst block physical addr: 0x%llx", (u64)inst_pa); | 2827 | gk20a_dbg_info("pmu inst block physical addr: 0x%llx", (u64)inst_pa); |
2884 | 2828 | ||
2885 | memset(inst_ptr, 0, GK20A_PMU_INST_SIZE); | 2829 | memset(inst_ptr, 0, inst_block->size); |
2886 | 2830 | ||
2887 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), | 2831 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), |
2888 | ram_in_page_dir_base_target_vid_mem_f() | | 2832 | ram_in_page_dir_base_target_vid_mem_f() | |
@@ -2898,32 +2842,16 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm) | |||
2898 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), | 2842 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), |
2899 | ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); | 2843 | ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); |
2900 | 2844 | ||
2901 | gk20a_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_pmu", | ||
2902 | (vm->va_start >> 12), /* start */ | ||
2903 | (vm->va_limit - vm->va_start) >> 12, /*length*/ | ||
2904 | 1); /* align */ | ||
2905 | /* initialize just in case we try to use it anyway */ | ||
2906 | gk20a_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_pmu-unused", | ||
2907 | 0x0badc0de, /* start */ | ||
2908 | 1, /* length */ | ||
2909 | 1); /* align */ | ||
2910 | |||
2911 | |||
2912 | vm->mapped_buffers = RB_ROOT; | ||
2913 | |||
2914 | mutex_init(&vm->update_gmmu_lock); | ||
2915 | kref_init(&vm->ref); | ||
2916 | INIT_LIST_HEAD(&vm->reserved_va_list); | ||
2917 | |||
2918 | return 0; | 2845 | return 0; |
2919 | 2846 | ||
2920 | clean_up: | 2847 | clean_up_inst_block: |
2921 | /* free, etc */ | ||
2922 | if (inst_block->cpuva) | 2848 | if (inst_block->cpuva) |
2923 | dma_free_coherent(d, inst_block->size, | 2849 | dma_free_coherent(d, inst_block->size, |
2924 | inst_block->cpuva, inst_block->iova); | 2850 | inst_block->cpuva, inst_block->iova); |
2925 | inst_block->cpuva = NULL; | 2851 | inst_block->cpuva = NULL; |
2926 | inst_block->iova = 0; | 2852 | inst_block->iova = 0; |
2853 | clean_up_va: | ||
2854 | gk20a_deinit_vm(vm); | ||
2927 | return err; | 2855 | return err; |
2928 | } | 2856 | } |
2929 | 2857 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 6c46e113..b28daef7 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -296,6 +296,16 @@ struct vm_gk20a { | |||
296 | bool tlb_dirty; | 296 | bool tlb_dirty; |
297 | bool mapped; | 297 | bool mapped; |
298 | 298 | ||
299 | u32 compression_page_size; | ||
300 | u32 big_page_size; | ||
301 | u32 pde_stride; | ||
302 | u32 pde_stride_shift; | ||
303 | |||
304 | struct { | ||
305 | u32 order; | ||
306 | u32 num_ptes; | ||
307 | } page_table_sizing[gmmu_nr_page_sizes]; | ||
308 | |||
299 | struct kref ref; | 309 | struct kref ref; |
300 | 310 | ||
301 | struct mutex update_gmmu_lock; | 311 | struct mutex update_gmmu_lock; |
@@ -314,6 +324,7 @@ struct vm_gk20a { | |||
314 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION | 324 | #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION |
315 | u64 handle; | 325 | u64 handle; |
316 | #endif | 326 | #endif |
327 | u32 gmmu_page_sizes[gmmu_nr_page_sizes]; | ||
317 | }; | 328 | }; |
318 | 329 | ||
319 | struct gk20a; | 330 | struct gk20a; |
@@ -329,17 +340,6 @@ void gk20a_mm_l2_invalidate(struct gk20a *g); | |||
329 | struct mm_gk20a { | 340 | struct mm_gk20a { |
330 | struct gk20a *g; | 341 | struct gk20a *g; |
331 | 342 | ||
332 | u32 compression_page_size; | ||
333 | u32 big_page_size; | ||
334 | u32 pde_stride; | ||
335 | u32 pde_stride_shift; | ||
336 | |||
337 | struct { | ||
338 | u32 order; | ||
339 | u32 num_ptes; | ||
340 | } page_table_sizing[gmmu_nr_page_sizes]; | ||
341 | |||
342 | |||
343 | struct { | 343 | struct { |
344 | u64 size; | 344 | u64 size; |
345 | } channel; | 345 | } channel; |
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h index 0745136c..6dd1ad3b 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h | |||
@@ -22,9 +22,7 @@ | |||
22 | #define __PMU_GK20A_H__ | 22 | #define __PMU_GK20A_H__ |
23 | 23 | ||
24 | /* defined by pmu hw spec */ | 24 | /* defined by pmu hw spec */ |
25 | #define GK20A_PMU_VA_START ((128 * 1024) << 10) | ||
26 | #define GK20A_PMU_VA_SIZE (512 * 1024 * 1024) | 25 | #define GK20A_PMU_VA_SIZE (512 * 1024 * 1024) |
27 | #define GK20A_PMU_INST_SIZE (4 * 1024) | ||
28 | #define GK20A_PMU_UCODE_SIZE_MAX (256 * 1024) | 26 | #define GK20A_PMU_UCODE_SIZE_MAX (256 * 1024) |
29 | #define GK20A_PMU_SEQ_BUF_SIZE 4096 | 27 | #define GK20A_PMU_SEQ_BUF_SIZE 4096 |
30 | 28 | ||
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c index 278ae9a6..b4622c0b 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c | |||
@@ -20,12 +20,6 @@ | |||
20 | #include "hw_fb_gm20b.h" | 20 | #include "hw_fb_gm20b.h" |
21 | #include "hw_gr_gm20b.h" | 21 | #include "hw_gr_gm20b.h" |
22 | 22 | ||
23 | static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K }; | ||
24 | static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 }; | ||
25 | static const u64 gmmu_page_offset_masks[gmmu_nr_page_sizes] = { 0xfffLL, | ||
26 | 0x1ffffLL }; | ||
27 | static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL }; | ||
28 | |||
29 | static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, | 23 | static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, |
30 | enum gmmu_pgsz_gk20a pgsz_idx, | 24 | enum gmmu_pgsz_gk20a pgsz_idx, |
31 | u64 first_vaddr, u64 last_vaddr, | 25 | u64 first_vaddr, u64 last_vaddr, |
@@ -97,9 +91,9 @@ static bool gm20b_vm_is_pde_in_range(struct vm_gk20a *vm, u64 vaddr_lo, | |||
97 | 91 | ||
98 | gk20a_dbg_fn(""); | 92 | gk20a_dbg_fn(""); |
99 | 93 | ||
100 | pde_vaddr_lo = (u64)pde << vm->mm->pde_stride_shift; | 94 | pde_vaddr_lo = (u64)pde << vm->pde_stride_shift; |
101 | pde_vaddr_hi = pde_vaddr_lo | | 95 | pde_vaddr_hi = pde_vaddr_lo | |
102 | ((0x1UL << (vm->mm->pde_stride_shift)) - 1); | 96 | ((0x1UL << (vm->pde_stride_shift)) - 1); |
103 | 97 | ||
104 | return ((vaddr_lo <= pde_vaddr_lo) && (vaddr_hi) >= pde_vaddr_hi); | 98 | return ((vaddr_lo <= pde_vaddr_lo) && (vaddr_hi) >= pde_vaddr_hi); |
105 | } | 99 | } |
@@ -108,8 +102,8 @@ static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, | |||
108 | u32 num_pages, u32 pgsz_idx, bool refplus) | 102 | u32 num_pages, u32 pgsz_idx, bool refplus) |
109 | { | 103 | { |
110 | struct mm_gk20a *mm = vm->mm; | 104 | struct mm_gk20a *mm = vm->mm; |
111 | u32 pgsz = gmmu_page_sizes[pgsz_idx]; | 105 | u32 pgsz = vm->gmmu_page_sizes[pgsz_idx]; |
112 | u32 pde_shift = vm->mm->pde_stride_shift; | 106 | u32 pde_shift = vm->pde_stride_shift; |
113 | u64 vaddr_hi; | 107 | u64 vaddr_hi; |
114 | u64 vaddr_pde_start; | 108 | u64 vaddr_pde_start; |
115 | u32 i; | 109 | u32 i; |
@@ -127,7 +121,7 @@ static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, | |||
127 | gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, " | 121 | gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, " |
128 | "pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d", | 122 | "pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d", |
129 | vaddr, vaddr_hi, pde_lo, pde_hi, pgsz, | 123 | vaddr, vaddr_hi, pde_lo, pde_hi, pgsz, |
130 | vm->mm->pde_stride_shift); | 124 | vm->pde_stride_shift); |
131 | 125 | ||
132 | for (i = pde_lo; i <= pde_hi; i++) { | 126 | for (i = pde_lo; i <= pde_hi; i++) { |
133 | /* Mark all ptes as sparse. */ | 127 | /* Mark all ptes as sparse. */ |
@@ -240,7 +234,7 @@ void gm20b_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr, | |||
240 | gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, " | 234 | gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, " |
241 | "pde_hi: 0x%x, pgsz_idx: %d, pde_stride_shift: %d", | 235 | "pde_hi: 0x%x, pgsz_idx: %d, pde_stride_shift: %d", |
242 | vaddr, vaddr_hi, pde_lo, pde_hi, pgsz_idx, | 236 | vaddr, vaddr_hi, pde_lo, pde_hi, pgsz_idx, |
243 | vm->mm->pde_stride_shift); | 237 | vm->pde_stride_shift); |
244 | 238 | ||
245 | for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) { | 239 | for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) { |
246 | struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i; | 240 | struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i; |
@@ -248,7 +242,7 @@ void gm20b_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr, | |||
248 | 242 | ||
249 | if (pte->ref_cnt == 0) { | 243 | if (pte->ref_cnt == 0) { |
250 | free_gmmu_pages(vm, pte->ref, pte->sgt, | 244 | free_gmmu_pages(vm, pte->ref, pte->sgt, |
251 | vm->mm->page_table_sizing[pgsz_idx].order, | 245 | vm->page_table_sizing[pgsz_idx].order, |
252 | pte->size); | 246 | pte->size); |
253 | pte->ref = NULL; | 247 | pte->ref = NULL; |
254 | update_gmmu_pde_locked(vm, pde_i); | 248 | update_gmmu_pde_locked(vm, pde_i); |