diff options
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 12 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 658 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 37 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/mm_gm20b.c | 155 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 10 |
6 files changed, 382 insertions, 497 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 57d5f09a..76237fae 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * GK20A Graphics | 2 | * GK20A Graphics |
3 | * | 3 | * |
4 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -1789,7 +1789,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) | |||
1789 | 1789 | ||
1790 | gpu->big_page_size = g->mm.pmu.vm.big_page_size; | 1790 | gpu->big_page_size = g->mm.pmu.vm.big_page_size; |
1791 | gpu->compression_page_size = g->ops.fb.compression_page_size(g); | 1791 | gpu->compression_page_size = g->ops.fb.compression_page_size(g); |
1792 | gpu->pde_coverage_bit_count = g->mm.pmu.vm.pde_stride_shift; | 1792 | gpu->pde_coverage_bit_count = |
1793 | gk20a_mm_pde_coverage_bit_count(&g->mm.pmu.vm); | ||
1793 | 1794 | ||
1794 | gpu->available_big_page_sizes = gpu->big_page_size; | 1795 | gpu->available_big_page_sizes = gpu->big_page_size; |
1795 | if (g->ops.mm.get_big_page_sizes) | 1796 | if (g->ops.mm.get_big_page_sizes) |
@@ -1798,7 +1799,7 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) | |||
1798 | gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS | 1799 | gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS |
1799 | | NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS; | 1800 | | NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS; |
1800 | 1801 | ||
1801 | if (g->ops.mm.set_sparse) | 1802 | if (g->ops.mm.support_sparse && g->ops.mm.support_sparse(g)) |
1802 | gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS; | 1803 | gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS; |
1803 | 1804 | ||
1804 | if (IS_ENABLED(CONFIG_TEGRA_GK20A) && | 1805 | if (IS_ENABLED(CONFIG_TEGRA_GK20A) && |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index fa80f010..ef8068e5 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -296,8 +296,7 @@ struct gpu_ops { | |||
296 | bool (*is_fw_defined)(void); | 296 | bool (*is_fw_defined)(void); |
297 | } gr_ctx; | 297 | } gr_ctx; |
298 | struct { | 298 | struct { |
299 | int (*set_sparse)(struct vm_gk20a *vm, u64 vaddr, | 299 | bool (*support_sparse)(struct gk20a *g); |
300 | u32 num_pages, u32 pgsz_idx, bool refplus); | ||
301 | bool (*is_debug_mode_enabled)(struct gk20a *g); | 300 | bool (*is_debug_mode_enabled)(struct gk20a *g); |
302 | u64 (*gmmu_map)(struct vm_gk20a *vm, | 301 | u64 (*gmmu_map)(struct vm_gk20a *vm, |
303 | u64 map_offset, | 302 | u64 map_offset, |
@@ -309,13 +308,15 @@ struct gpu_ops { | |||
309 | u32 ctag_offset, | 308 | u32 ctag_offset, |
310 | u32 flags, | 309 | u32 flags, |
311 | int rw_flag, | 310 | int rw_flag, |
312 | bool clear_ctags); | 311 | bool clear_ctags, |
312 | bool sparse); | ||
313 | void (*gmmu_unmap)(struct vm_gk20a *vm, | 313 | void (*gmmu_unmap)(struct vm_gk20a *vm, |
314 | u64 vaddr, | 314 | u64 vaddr, |
315 | u64 size, | 315 | u64 size, |
316 | int pgsz_idx, | 316 | int pgsz_idx, |
317 | bool va_allocated, | 317 | bool va_allocated, |
318 | int rw_flag); | 318 | int rw_flag, |
319 | bool sparse); | ||
319 | void (*vm_remove)(struct vm_gk20a *vm); | 320 | void (*vm_remove)(struct vm_gk20a *vm); |
320 | int (*vm_alloc_share)(struct gk20a_as_share *as_share, | 321 | int (*vm_alloc_share)(struct gk20a_as_share *as_share, |
321 | u32 flags); | 322 | u32 flags); |
@@ -331,6 +332,9 @@ struct gpu_ops { | |||
331 | u32 (*get_physical_addr_bits)(struct gk20a *g); | 332 | u32 (*get_physical_addr_bits)(struct gk20a *g); |
332 | int (*init_bar2_vm)(struct gk20a *g); | 333 | int (*init_bar2_vm)(struct gk20a *g); |
333 | int (*init_bar2_mm_hw_setup)(struct gk20a *g); | 334 | int (*init_bar2_mm_hw_setup)(struct gk20a *g); |
335 | const struct gk20a_mmu_level * | ||
336 | (*get_mmu_levels)(struct gk20a *g, u32 big_page_size); | ||
337 | void (*init_pdb)(struct gk20a *g, void *inst_ptr, u64 pdb_addr); | ||
334 | } mm; | 338 | } mm; |
335 | struct { | 339 | struct { |
336 | int (*prepare_ucode)(struct gk20a *g); | 340 | int (*prepare_ucode)(struct gk20a *g); |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 80c766b6..d8bd3e70 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -98,7 +98,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
98 | struct sg_table *sgt, u64 buffer_offset, | 98 | struct sg_table *sgt, u64 buffer_offset, |
99 | u64 first_vaddr, u64 last_vaddr, | 99 | u64 first_vaddr, u64 last_vaddr, |
100 | u8 kind_v, u32 ctag_offset, bool cacheable, | 100 | u8 kind_v, u32 ctag_offset, bool cacheable, |
101 | int rw_flag); | 101 | int rw_flag, |
102 | bool sparse); | ||
102 | static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm); | 103 | static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm); |
103 | static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); | 104 | static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm); |
104 | static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm); | 105 | static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm); |
@@ -605,34 +606,46 @@ void unmap_gmmu_pages(struct gk20a_mm_entry *entry) | |||
605 | 606 | ||
606 | static int gk20a_zalloc_gmmu_page_table(struct vm_gk20a *vm, | 607 | static int gk20a_zalloc_gmmu_page_table(struct vm_gk20a *vm, |
607 | enum gmmu_pgsz_gk20a pgsz_idx, | 608 | enum gmmu_pgsz_gk20a pgsz_idx, |
609 | const struct gk20a_mmu_level *l, | ||
608 | struct gk20a_mm_entry *entry) | 610 | struct gk20a_mm_entry *entry) |
609 | { | 611 | { |
610 | int err; | 612 | int err; |
611 | u32 pte_order; | 613 | int order; |
612 | 614 | ||
613 | gk20a_dbg_fn(""); | 615 | gk20a_dbg_fn(""); |
614 | 616 | ||
615 | /* allocate enough pages for the table */ | 617 | /* allocate enough pages for the table */ |
616 | pte_order = vm->page_table_sizing[pgsz_idx].order; | 618 | order = l->hi_bit[pgsz_idx] - l->lo_bit[pgsz_idx] + 1; |
619 | order += ilog2(l->entry_size); | ||
620 | order -= PAGE_SHIFT; | ||
621 | order = max(0, order); | ||
617 | 622 | ||
618 | err = alloc_gmmu_pages(vm, pte_order, entry); | 623 | err = alloc_gmmu_pages(vm, order, entry); |
619 | gk20a_dbg(gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d", | 624 | gk20a_dbg(gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d", |
620 | entry, gk20a_mm_iova_addr(vm->mm->g, entry->sgt->sgl), | 625 | entry, gk20a_mm_iova_addr(vm->mm->g, entry->sgt->sgl), order); |
621 | pte_order); | 626 | if (err) |
627 | return err; | ||
622 | entry->pgsz = pgsz_idx; | 628 | entry->pgsz = pgsz_idx; |
623 | 629 | ||
624 | return err; | 630 | return err; |
625 | } | 631 | } |
626 | 632 | ||
633 | int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm) | ||
634 | { | ||
635 | return vm->mmu_levels[0].lo_bit[0]; | ||
636 | } | ||
637 | |||
627 | /* given address range (inclusive) determine the pdes crossed */ | 638 | /* given address range (inclusive) determine the pdes crossed */ |
628 | void pde_range_from_vaddr_range(struct vm_gk20a *vm, | 639 | void pde_range_from_vaddr_range(struct vm_gk20a *vm, |
629 | u64 addr_lo, u64 addr_hi, | 640 | u64 addr_lo, u64 addr_hi, |
630 | u32 *pde_lo, u32 *pde_hi) | 641 | u32 *pde_lo, u32 *pde_hi) |
631 | { | 642 | { |
632 | *pde_lo = (u32)(addr_lo >> vm->pde_stride_shift); | 643 | int pde_shift = gk20a_mm_pde_coverage_bit_count(vm); |
633 | *pde_hi = (u32)(addr_hi >> vm->pde_stride_shift); | 644 | |
645 | *pde_lo = (u32)(addr_lo >> pde_shift); | ||
646 | *pde_hi = (u32)(addr_hi >> pde_shift); | ||
634 | gk20a_dbg(gpu_dbg_pte, "addr_lo=0x%llx addr_hi=0x%llx pde_ss=%d", | 647 | gk20a_dbg(gpu_dbg_pte, "addr_lo=0x%llx addr_hi=0x%llx pde_ss=%d", |
635 | addr_lo, addr_hi, vm->pde_stride_shift); | 648 | addr_lo, addr_hi, pde_shift); |
636 | gk20a_dbg(gpu_dbg_pte, "pde_lo=%d pde_hi=%d", | 649 | gk20a_dbg(gpu_dbg_pte, "pde_lo=%d pde_hi=%d", |
637 | *pde_lo, *pde_hi); | 650 | *pde_lo, *pde_hi); |
638 | } | 651 | } |
@@ -647,7 +660,7 @@ u32 pte_index_from_vaddr(struct vm_gk20a *vm, | |||
647 | { | 660 | { |
648 | u32 ret; | 661 | u32 ret; |
649 | /* mask off pde part */ | 662 | /* mask off pde part */ |
650 | addr = addr & ((((u64)1) << vm->pde_stride_shift) - ((u64)1)); | 663 | addr = addr & ((1ULL << gk20a_mm_pde_coverage_bit_count(vm)) - 1ULL); |
651 | 664 | ||
652 | /* shift over to get pte index. note assumption that pte index | 665 | /* shift over to get pte index. note assumption that pte index |
653 | * doesn't leak over into the high 32b */ | 666 | * doesn't leak over into the high 32b */ |
@@ -657,57 +670,6 @@ u32 pte_index_from_vaddr(struct vm_gk20a *vm, | |||
657 | return ret; | 670 | return ret; |
658 | } | 671 | } |
659 | 672 | ||
660 | static inline void pte_space_page_offset_from_index(u32 i, u32 *pte_page, | ||
661 | u32 *pte_offset) | ||
662 | { | ||
663 | /* ptes are 8B regardless of pagesize */ | ||
664 | /* pte space pages are 4KB. so 512 ptes per 4KB page*/ | ||
665 | *pte_page = i >> 9; | ||
666 | |||
667 | /* this offset is a pte offset, not a byte offset */ | ||
668 | *pte_offset = i & ((1<<9)-1); | ||
669 | |||
670 | gk20a_dbg(gpu_dbg_pte, "i=0x%x pte_page=0x%x pte_offset=0x%x", | ||
671 | i, *pte_page, *pte_offset); | ||
672 | } | ||
673 | |||
674 | |||
675 | /* | ||
676 | * given a pde index/page table number make sure it has | ||
677 | * backing store and if not go ahead allocate it and | ||
678 | * record it in the appropriate pde | ||
679 | */ | ||
680 | int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm, | ||
681 | u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx) | ||
682 | { | ||
683 | int err; | ||
684 | struct gk20a_mm_entry *entry = vm->pdb.entries + i; | ||
685 | |||
686 | gk20a_dbg_fn(""); | ||
687 | |||
688 | /* if it's already in place it's valid */ | ||
689 | if (entry->size) | ||
690 | return 0; | ||
691 | |||
692 | gk20a_dbg(gpu_dbg_pte, "alloc %dKB ptes for pde %d", | ||
693 | vm->gmmu_page_sizes[gmmu_pgsz_idx]/1024, i); | ||
694 | |||
695 | err = gk20a_zalloc_gmmu_page_table(vm, gmmu_pgsz_idx, entry); | ||
696 | if (err) | ||
697 | return err; | ||
698 | |||
699 | /* rewrite pde */ | ||
700 | err = map_gmmu_pages(&vm->pdb); | ||
701 | if (err) | ||
702 | return err; | ||
703 | |||
704 | update_gmmu_pde_locked(vm, i); | ||
705 | |||
706 | unmap_gmmu_pages(&vm->pdb); | ||
707 | |||
708 | return 0; | ||
709 | } | ||
710 | |||
711 | static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm, | 673 | static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm, |
712 | u64 addr) | 674 | u64 addr) |
713 | { | 675 | { |
@@ -1117,11 +1079,11 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | |||
1117 | u32 ctag_offset, | 1079 | u32 ctag_offset, |
1118 | u32 flags, | 1080 | u32 flags, |
1119 | int rw_flag, | 1081 | int rw_flag, |
1120 | bool clear_ctags) | 1082 | bool clear_ctags, |
1083 | bool sparse) | ||
1121 | { | 1084 | { |
1122 | int err = 0, i = 0; | 1085 | int err = 0; |
1123 | bool allocated = false; | 1086 | bool allocated = false; |
1124 | u32 pde_lo, pde_hi; | ||
1125 | struct device *d = dev_from_vm(vm); | 1087 | struct device *d = dev_from_vm(vm); |
1126 | struct gk20a *g = gk20a_from_vm(vm); | 1088 | struct gk20a *g = gk20a_from_vm(vm); |
1127 | int ctag_granularity = g->ops.fb.compression_page_size(g); | 1089 | int ctag_granularity = g->ops.fb.compression_page_size(g); |
@@ -1146,31 +1108,16 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | |||
1146 | allocated = true; | 1108 | allocated = true; |
1147 | } | 1109 | } |
1148 | 1110 | ||
1149 | pde_range_from_vaddr_range(vm, | ||
1150 | map_offset, | ||
1151 | map_offset + size - 1, | ||
1152 | &pde_lo, &pde_hi); | ||
1153 | |||
1154 | /* mark the addr range valid (but with 0 phys addr, which will fault) */ | ||
1155 | for (i = pde_lo; i <= pde_hi; i++) { | ||
1156 | err = validate_gmmu_page_table_gk20a_locked(vm, i, | ||
1157 | pgsz_idx); | ||
1158 | if (err) { | ||
1159 | gk20a_err(d, "failed to validate page table %d: %d", | ||
1160 | i, err); | ||
1161 | goto fail_validate; | ||
1162 | } | ||
1163 | } | ||
1164 | |||
1165 | err = update_gmmu_ptes_locked(vm, pgsz_idx, | 1111 | err = update_gmmu_ptes_locked(vm, pgsz_idx, |
1166 | sgt, | 1112 | sgt, |
1167 | buffer_offset, | 1113 | buffer_offset, |
1168 | map_offset, map_offset + size - 1, | 1114 | map_offset, map_offset + size, |
1169 | kind_v, | 1115 | kind_v, |
1170 | ctag_offset, | 1116 | ctag_offset, |
1171 | flags & | 1117 | flags & |
1172 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | 1118 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, |
1173 | rw_flag); | 1119 | rw_flag, |
1120 | sparse); | ||
1174 | if (err) { | 1121 | if (err) { |
1175 | gk20a_err(d, "failed to update ptes on map"); | 1122 | gk20a_err(d, "failed to update ptes on map"); |
1176 | goto fail_validate; | 1123 | goto fail_validate; |
@@ -1192,7 +1139,8 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, | |||
1192 | u64 size, | 1139 | u64 size, |
1193 | int pgsz_idx, | 1140 | int pgsz_idx, |
1194 | bool va_allocated, | 1141 | bool va_allocated, |
1195 | int rw_flag) | 1142 | int rw_flag, |
1143 | bool sparse) | ||
1196 | { | 1144 | { |
1197 | int err = 0; | 1145 | int err = 0; |
1198 | struct gk20a *g = gk20a_from_vm(vm); | 1146 | struct gk20a *g = gk20a_from_vm(vm); |
@@ -1212,9 +1160,10 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, | |||
1212 | NULL, /* n/a for unmap */ | 1160 | NULL, /* n/a for unmap */ |
1213 | 0, | 1161 | 0, |
1214 | vaddr, | 1162 | vaddr, |
1215 | vaddr + size - 1, | 1163 | vaddr + size, |
1216 | 0, 0, false /* n/a for unmap */, | 1164 | 0, 0, false /* n/a for unmap */, |
1217 | rw_flag); | 1165 | rw_flag, |
1166 | sparse); | ||
1218 | if (err) | 1167 | if (err) |
1219 | dev_err(dev_from_vm(vm), | 1168 | dev_err(dev_from_vm(vm), |
1220 | "failed to update gmmu ptes on unmap"); | 1169 | "failed to update gmmu ptes on unmap"); |
@@ -1439,7 +1388,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1439 | bfr.kind_v, | 1388 | bfr.kind_v, |
1440 | bfr.ctag_offset, | 1389 | bfr.ctag_offset, |
1441 | flags, rw_flag, | 1390 | flags, rw_flag, |
1442 | clear_ctags); | 1391 | clear_ctags, |
1392 | false); | ||
1443 | if (!map_offset) | 1393 | if (!map_offset) |
1444 | goto clean_up; | 1394 | goto clean_up; |
1445 | 1395 | ||
@@ -1555,7 +1505,7 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm, | |||
1555 | 0, /* page size index = 0 i.e. SZ_4K */ | 1505 | 0, /* page size index = 0 i.e. SZ_4K */ |
1556 | 0, /* kind */ | 1506 | 0, /* kind */ |
1557 | 0, /* ctag_offset */ | 1507 | 0, /* ctag_offset */ |
1558 | flags, rw_flag, false); | 1508 | flags, rw_flag, false, false); |
1559 | mutex_unlock(&vm->update_gmmu_lock); | 1509 | mutex_unlock(&vm->update_gmmu_lock); |
1560 | if (!vaddr) { | 1510 | if (!vaddr) { |
1561 | gk20a_err(dev_from_vm(vm), "failed to allocate va space"); | 1511 | gk20a_err(dev_from_vm(vm), "failed to allocate va space"); |
@@ -1642,7 +1592,8 @@ void gk20a_gmmu_unmap(struct vm_gk20a *vm, | |||
1642 | size, | 1592 | size, |
1643 | 0, /* page size 4K */ | 1593 | 0, /* page size 4K */ |
1644 | true, /*va_allocated */ | 1594 | true, /*va_allocated */ |
1645 | rw_flag); | 1595 | rw_flag, |
1596 | false); | ||
1646 | mutex_unlock(&vm->update_gmmu_lock); | 1597 | mutex_unlock(&vm->update_gmmu_lock); |
1647 | } | 1598 | } |
1648 | 1599 | ||
@@ -1748,157 +1699,6 @@ u64 gk20a_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl) | |||
1748 | return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); | 1699 | return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); |
1749 | } | 1700 | } |
1750 | 1701 | ||
1751 | static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | ||
1752 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
1753 | struct sg_table *sgt, | ||
1754 | u64 buffer_offset, | ||
1755 | u64 first_vaddr, u64 last_vaddr, | ||
1756 | u8 kind_v, u32 ctag_offset, | ||
1757 | bool cacheable, | ||
1758 | int rw_flag) | ||
1759 | { | ||
1760 | int err; | ||
1761 | u32 pde_lo, pde_hi, pde_i; | ||
1762 | struct scatterlist *cur_chunk; | ||
1763 | unsigned int cur_offset; | ||
1764 | u32 pte_w[2] = {0, 0}; /* invalid pte */ | ||
1765 | struct gk20a *g = gk20a_from_vm(vm); | ||
1766 | u32 ctag_granularity = g->ops.fb.compression_page_size(g); | ||
1767 | u32 ctag = ctag_offset * ctag_granularity; | ||
1768 | u32 ctag_incr; | ||
1769 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; | ||
1770 | u64 addr = 0; | ||
1771 | u64 space_to_skip = buffer_offset; | ||
1772 | |||
1773 | pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr, | ||
1774 | &pde_lo, &pde_hi); | ||
1775 | |||
1776 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d", | ||
1777 | pgsz_idx, pde_lo, pde_hi); | ||
1778 | |||
1779 | ctag_incr = ctag_offset ? page_size : 0; | ||
1780 | |||
1781 | cur_offset = 0; | ||
1782 | if (sgt) { | ||
1783 | cur_chunk = sgt->sgl; | ||
1784 | /* space_to_skip must be page aligned */ | ||
1785 | BUG_ON(space_to_skip & (page_size - 1)); | ||
1786 | |||
1787 | while (space_to_skip > 0 && cur_chunk) { | ||
1788 | u64 new_addr = gk20a_mm_iova_addr(vm->mm->g, cur_chunk); | ||
1789 | if (new_addr) { | ||
1790 | addr = new_addr; | ||
1791 | addr += cur_offset; | ||
1792 | } | ||
1793 | cur_offset += page_size; | ||
1794 | addr += page_size; | ||
1795 | while (cur_chunk && | ||
1796 | cur_offset >= cur_chunk->length) { | ||
1797 | cur_offset -= cur_chunk->length; | ||
1798 | cur_chunk = sg_next(cur_chunk); | ||
1799 | } | ||
1800 | space_to_skip -= page_size; | ||
1801 | } | ||
1802 | } | ||
1803 | else | ||
1804 | cur_chunk = NULL; | ||
1805 | |||
1806 | for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) { | ||
1807 | u32 pte_lo, pte_hi; | ||
1808 | u32 pte_cur; | ||
1809 | |||
1810 | struct gk20a_mm_entry *entry = vm->pdb.entries + pde_i; | ||
1811 | |||
1812 | if (pde_i == pde_lo) | ||
1813 | pte_lo = pte_index_from_vaddr(vm, first_vaddr, | ||
1814 | pgsz_idx); | ||
1815 | else | ||
1816 | pte_lo = 0; | ||
1817 | |||
1818 | if ((pde_i != pde_hi) && (pde_hi != pde_lo)) | ||
1819 | pte_hi = vm->page_table_sizing[pgsz_idx].num_ptes-1; | ||
1820 | else | ||
1821 | pte_hi = pte_index_from_vaddr(vm, last_vaddr, | ||
1822 | pgsz_idx); | ||
1823 | |||
1824 | /* get cpu access to the ptes */ | ||
1825 | err = map_gmmu_pages(entry); | ||
1826 | if (err) { | ||
1827 | gk20a_err(dev_from_vm(vm), | ||
1828 | "couldn't map ptes for update as=%d", | ||
1829 | vm_aspace_id(vm)); | ||
1830 | goto clean_up; | ||
1831 | } | ||
1832 | |||
1833 | gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); | ||
1834 | for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { | ||
1835 | if (likely(sgt)) { | ||
1836 | u64 new_addr = gk20a_mm_iova_addr(vm->mm->g, | ||
1837 | cur_chunk); | ||
1838 | if (new_addr) { | ||
1839 | addr = new_addr; | ||
1840 | addr += cur_offset; | ||
1841 | } | ||
1842 | pte_w[0] = gmmu_pte_valid_true_f() | | ||
1843 | gmmu_pte_address_sys_f(addr | ||
1844 | >> gmmu_pte_address_shift_v()); | ||
1845 | pte_w[1] = gmmu_pte_aperture_video_memory_f() | | ||
1846 | gmmu_pte_kind_f(kind_v) | | ||
1847 | gmmu_pte_comptagline_f(ctag | ||
1848 | / ctag_granularity); | ||
1849 | |||
1850 | if (rw_flag == gk20a_mem_flag_read_only) { | ||
1851 | pte_w[0] |= gmmu_pte_read_only_true_f(); | ||
1852 | pte_w[1] |= | ||
1853 | gmmu_pte_write_disable_true_f(); | ||
1854 | } else if (rw_flag == | ||
1855 | gk20a_mem_flag_write_only) { | ||
1856 | pte_w[1] |= | ||
1857 | gmmu_pte_read_disable_true_f(); | ||
1858 | } | ||
1859 | if (!cacheable) | ||
1860 | pte_w[1] |= gmmu_pte_vol_true_f(); | ||
1861 | |||
1862 | gk20a_dbg(gpu_dbg_pte, "pte_cur=%d addr=0x%x,%08x kind=%d" | ||
1863 | " ctag=%d vol=%d" | ||
1864 | " [0x%08x,0x%08x]", | ||
1865 | pte_cur, hi32(addr), lo32(addr), | ||
1866 | kind_v, ctag, !cacheable, | ||
1867 | pte_w[1], pte_w[0]); | ||
1868 | ctag += ctag_incr; | ||
1869 | cur_offset += page_size; | ||
1870 | addr += page_size; | ||
1871 | while (cur_chunk && | ||
1872 | cur_offset >= cur_chunk->length) { | ||
1873 | cur_offset -= cur_chunk->length; | ||
1874 | cur_chunk = sg_next(cur_chunk); | ||
1875 | } | ||
1876 | |||
1877 | } else { | ||
1878 | gk20a_dbg(gpu_dbg_pte, | ||
1879 | "pte_cur=%d [0x0,0x0]", | ||
1880 | pte_cur); | ||
1881 | } | ||
1882 | |||
1883 | gk20a_mem_wr32(entry->cpu_va + pte_cur*8, 0, pte_w[0]); | ||
1884 | gk20a_mem_wr32(entry->cpu_va + pte_cur*8, 1, pte_w[1]); | ||
1885 | } | ||
1886 | |||
1887 | unmap_gmmu_pages(entry); | ||
1888 | } | ||
1889 | |||
1890 | smp_mb(); | ||
1891 | |||
1892 | return 0; | ||
1893 | |||
1894 | clean_up: | ||
1895 | /*TBD: potentially rewrite above to pre-map everything it needs to | ||
1896 | * as that's the only way it can fail */ | ||
1897 | return err; | ||
1898 | |||
1899 | } | ||
1900 | |||
1901 | |||
1902 | /* for gk20a the "video memory" apertures here are misnomers. */ | 1702 | /* for gk20a the "video memory" apertures here are misnomers. */ |
1903 | static inline u32 big_valid_pde0_bits(u64 pte_addr) | 1703 | static inline u32 big_valid_pde0_bits(u64 pte_addr) |
1904 | { | 1704 | { |
@@ -1908,6 +1708,7 @@ static inline u32 big_valid_pde0_bits(u64 pte_addr) | |||
1908 | (u32)(pte_addr >> gmmu_pde_address_shift_v())); | 1708 | (u32)(pte_addr >> gmmu_pde_address_shift_v())); |
1909 | return pde0_bits; | 1709 | return pde0_bits; |
1910 | } | 1710 | } |
1711 | |||
1911 | static inline u32 small_valid_pde1_bits(u64 pte_addr) | 1712 | static inline u32 small_valid_pde1_bits(u64 pte_addr) |
1912 | { | 1713 | { |
1913 | u32 pde1_bits = | 1714 | u32 pde1_bits = |
@@ -1924,10 +1725,15 @@ static inline u32 small_valid_pde1_bits(u64 pte_addr) | |||
1924 | made. So, superfluous updates will cause unnecessary | 1725 | made. So, superfluous updates will cause unnecessary |
1925 | pde invalidations. | 1726 | pde invalidations. |
1926 | */ | 1727 | */ |
1927 | void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i) | 1728 | int update_gmmu_pde_locked(struct vm_gk20a *vm, |
1729 | struct gk20a_mm_entry *pte, | ||
1730 | u32 i, u32 gmmu_pgsz_idx, | ||
1731 | u64 iova, | ||
1732 | u32 kind_v, u32 *ctag, | ||
1733 | bool cacheable, int rw_flag, bool sparse) | ||
1928 | { | 1734 | { |
1929 | bool small_valid, big_valid; | 1735 | bool small_valid, big_valid; |
1930 | u64 pte_addr[2] = {0, 0}; | 1736 | u64 pte_addr_small = 0, pte_addr_big = 0; |
1931 | struct gk20a_mm_entry *entry = vm->pdb.entries + i; | 1737 | struct gk20a_mm_entry *entry = vm->pdb.entries + i; |
1932 | u32 pde_v[2] = {0, 0}; | 1738 | u32 pde_v[2] = {0, 0}; |
1933 | u32 *pde; | 1739 | u32 *pde; |
@@ -1938,44 +1744,227 @@ void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i) | |||
1938 | big_valid = entry->size && entry->pgsz == gmmu_page_size_big; | 1744 | big_valid = entry->size && entry->pgsz == gmmu_page_size_big; |
1939 | 1745 | ||
1940 | if (small_valid) | 1746 | if (small_valid) |
1941 | pte_addr[gmmu_page_size_small] = | 1747 | pte_addr_small = gk20a_mm_iova_addr(vm->mm->g, entry->sgt->sgl); |
1942 | gk20a_mm_iova_addr(vm->mm->g, entry->sgt->sgl); | ||
1943 | 1748 | ||
1944 | if (big_valid) | 1749 | if (big_valid) |
1945 | pte_addr[gmmu_page_size_big] = | 1750 | pte_addr_big = gk20a_mm_iova_addr(vm->mm->g, entry->sgt->sgl); |
1946 | gk20a_mm_iova_addr(vm->mm->g, entry->sgt->sgl); | ||
1947 | 1751 | ||
1948 | pde_v[0] = gmmu_pde_size_full_f(); | 1752 | pde_v[0] = gmmu_pde_size_full_f(); |
1949 | pde_v[0] |= big_valid ? | 1753 | pde_v[0] |= big_valid ? big_valid_pde0_bits(pte_addr_big) : |
1950 | big_valid_pde0_bits(pte_addr[gmmu_page_size_big]) | ||
1951 | : | ||
1952 | (gmmu_pde_aperture_big_invalid_f()); | 1754 | (gmmu_pde_aperture_big_invalid_f()); |
1953 | 1755 | ||
1954 | pde_v[1] |= (small_valid ? | 1756 | pde_v[1] |= (small_valid ? |
1955 | small_valid_pde1_bits(pte_addr[gmmu_page_size_small]) | 1757 | small_valid_pde1_bits(pte_addr_small) : |
1956 | : | ||
1957 | (gmmu_pde_aperture_small_invalid_f() | | 1758 | (gmmu_pde_aperture_small_invalid_f() | |
1958 | gmmu_pde_vol_small_false_f()) | 1759 | gmmu_pde_vol_small_false_f())) |
1959 | ) | 1760 | | |
1960 | | | 1761 | (big_valid ? (gmmu_pde_vol_big_true_f()) : |
1961 | (big_valid ? (gmmu_pde_vol_big_true_f()) : | 1762 | gmmu_pde_vol_big_false_f()); |
1962 | gmmu_pde_vol_big_false_f()); | ||
1963 | 1763 | ||
1964 | pde = pde_from_index(vm, i); | 1764 | pde = pde_from_index(vm, i); |
1965 | 1765 | ||
1966 | gk20a_mem_wr32(pde, 0, pde_v[0]); | 1766 | gk20a_mem_wr32(pde, 0, pde_v[0]); |
1967 | gk20a_mem_wr32(pde, 1, pde_v[1]); | 1767 | gk20a_mem_wr32(pde, 1, pde_v[1]); |
1968 | 1768 | ||
1969 | smp_mb(); | 1769 | gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", |
1770 | i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); | ||
1771 | return 0; | ||
1772 | } | ||
1773 | |||
1774 | int update_gmmu_pte_locked(struct vm_gk20a *vm, | ||
1775 | struct gk20a_mm_entry *pte, | ||
1776 | u32 i, u32 gmmu_pgsz_idx, | ||
1777 | u64 iova, | ||
1778 | u32 kind_v, u32 *ctag, | ||
1779 | bool cacheable, int rw_flag, bool sparse) | ||
1780 | { | ||
1781 | struct gk20a *g = gk20a_from_vm(vm); | ||
1782 | u32 ctag_granularity = g->ops.fb.compression_page_size(g); | ||
1783 | u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; | ||
1784 | u32 pte_w[2] = {0, 0}; /* invalid pte */ | ||
1785 | |||
1786 | if (iova) { | ||
1787 | pte_w[0] = gmmu_pte_valid_true_f() | | ||
1788 | gmmu_pte_address_sys_f(iova | ||
1789 | >> gmmu_pte_address_shift_v()); | ||
1790 | pte_w[1] = gmmu_pte_aperture_video_memory_f() | | ||
1791 | gmmu_pte_kind_f(kind_v) | | ||
1792 | gmmu_pte_comptagline_f(*ctag / ctag_granularity); | ||
1793 | |||
1794 | if (rw_flag == gk20a_mem_flag_read_only) { | ||
1795 | pte_w[0] |= gmmu_pte_read_only_true_f(); | ||
1796 | pte_w[1] |= | ||
1797 | gmmu_pte_write_disable_true_f(); | ||
1798 | } else if (rw_flag == | ||
1799 | gk20a_mem_flag_write_only) { | ||
1800 | pte_w[1] |= | ||
1801 | gmmu_pte_read_disable_true_f(); | ||
1802 | } | ||
1803 | if (!cacheable) | ||
1804 | pte_w[1] |= gmmu_pte_vol_true_f(); | ||
1805 | |||
1806 | gk20a_dbg(gpu_dbg_pte, | ||
1807 | "pte=%d iova=0x%llx kind=%d ctag=%d vol=%d [0x%08x, 0x%08x]", | ||
1808 | i, iova, | ||
1809 | kind_v, *ctag, !cacheable, | ||
1810 | pte_w[1], pte_w[0]); | ||
1811 | |||
1812 | if (*ctag) | ||
1813 | *ctag += page_size; | ||
1814 | } else if (sparse) { | ||
1815 | pte_w[0] = gmmu_pte_valid_false_f(); | ||
1816 | pte_w[1] |= gmmu_pte_vol_true_f(); | ||
1817 | } else { | ||
1818 | gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); | ||
1819 | } | ||
1820 | |||
1821 | gk20a_mem_wr32(pte->cpu_va + i*8, 0, pte_w[0]); | ||
1822 | gk20a_mem_wr32(pte->cpu_va + i*8, 1, pte_w[1]); | ||
1823 | |||
1824 | return 0; | ||
1825 | } | ||
1826 | |||
1827 | static int update_gmmu_level_locked(struct vm_gk20a *vm, | ||
1828 | struct gk20a_mm_entry *pte, | ||
1829 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
1830 | u64 iova, | ||
1831 | u64 gpu_va, u64 gpu_end, | ||
1832 | u8 kind_v, u32 *ctag, | ||
1833 | bool cacheable, | ||
1834 | int rw_flag, | ||
1835 | bool sparse, | ||
1836 | int lvl) | ||
1837 | { | ||
1838 | const struct gk20a_mmu_level *l = &vm->mmu_levels[lvl]; | ||
1839 | const struct gk20a_mmu_level *next_l = &vm->mmu_levels[lvl+1]; | ||
1840 | int err = 0; | ||
1841 | u32 pde_i; | ||
1842 | u64 pde_size = 1ULL << (u64)l->lo_bit[pgsz_idx]; | ||
1843 | |||
1844 | gk20a_dbg_fn(""); | ||
1845 | |||
1846 | pde_i = (gpu_va & ((1ULL << ((u64)l->hi_bit[pgsz_idx]+1)) - 1ULL)) | ||
1847 | >> (u64)l->lo_bit[pgsz_idx]; | ||
1848 | |||
1849 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, l: %d, [%llx,%llx], iova=%llx", | ||
1850 | pgsz_idx, lvl, gpu_va, gpu_end-1, iova); | ||
1851 | |||
1852 | while (gpu_va < gpu_end) { | ||
1853 | struct gk20a_mm_entry *next_pte = NULL; | ||
1854 | u64 next = min((gpu_va + pde_size) & ~(pde_size-1), gpu_end); | ||
1855 | |||
1856 | /* Allocate next level */ | ||
1857 | if (next_l->update_entry) { | ||
1858 | if (!pte->entries) { | ||
1859 | int num_entries = | ||
1860 | 1 << | ||
1861 | (l->hi_bit[pgsz_idx] | ||
1862 | - l->lo_bit[pgsz_idx]); | ||
1863 | pte->entries = | ||
1864 | kzalloc(sizeof(struct gk20a_mm_entry) * | ||
1865 | num_entries, GFP_KERNEL); | ||
1866 | pte->pgsz = pgsz_idx; | ||
1867 | if (!pte->entries) | ||
1868 | return -ENOMEM; | ||
1869 | } | ||
1870 | next_pte = pte->entries + pde_i; | ||
1871 | |||
1872 | if (!next_pte->size) { | ||
1873 | err = gk20a_zalloc_gmmu_page_table(vm, | ||
1874 | pgsz_idx, next_l, next_pte); | ||
1875 | if (err) | ||
1876 | return err; | ||
1877 | } | ||
1878 | } | ||
1879 | |||
1880 | err = l->update_entry(vm, pte, pde_i, pgsz_idx, | ||
1881 | iova, kind_v, ctag, cacheable, | ||
1882 | rw_flag, sparse); | ||
1883 | if (err) | ||
1884 | return err; | ||
1885 | |||
1886 | if (next_l->update_entry) { | ||
1887 | /* get cpu access to the ptes */ | ||
1888 | err = map_gmmu_pages(next_pte); | ||
1889 | if (err) { | ||
1890 | gk20a_err(dev_from_vm(vm), | ||
1891 | "couldn't map ptes for update as=%d", | ||
1892 | vm_aspace_id(vm)); | ||
1893 | return err; | ||
1894 | } | ||
1895 | err = update_gmmu_level_locked(vm, next_pte, | ||
1896 | pgsz_idx, | ||
1897 | iova, | ||
1898 | gpu_va, | ||
1899 | next, | ||
1900 | kind_v, ctag, | ||
1901 | cacheable, rw_flag, sparse, lvl+1); | ||
1902 | unmap_gmmu_pages(next_pte); | ||
1903 | |||
1904 | if (err) | ||
1905 | return err; | ||
1906 | } | ||
1907 | |||
1908 | if (iova) | ||
1909 | iova += next - gpu_va; | ||
1910 | pde_i++; | ||
1911 | gpu_va = next; | ||
1912 | } | ||
1913 | |||
1914 | gk20a_dbg_fn("done"); | ||
1915 | |||
1916 | return 0; | ||
1917 | } | ||
1918 | |||
1919 | static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | ||
1920 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
1921 | struct sg_table *sgt, | ||
1922 | u64 buffer_offset, | ||
1923 | u64 gpu_va, u64 gpu_end, | ||
1924 | u8 kind_v, u32 ctag_offset, | ||
1925 | bool cacheable, | ||
1926 | int rw_flag, | ||
1927 | bool sparse) | ||
1928 | { | ||
1929 | struct gk20a *g = gk20a_from_vm(vm); | ||
1930 | int ctag_granularity = g->ops.fb.compression_page_size(g); | ||
1931 | u32 ctag = ctag_offset * ctag_granularity; | ||
1932 | u64 iova = 0; | ||
1933 | u64 space_to_skip = buffer_offset; | ||
1934 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; | ||
1935 | int err; | ||
1936 | |||
1937 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, iova=%llx", | ||
1938 | pgsz_idx, | ||
1939 | sgt ? gk20a_mm_iova_addr(vm->mm->g, sgt->sgl) : 0ULL); | ||
1970 | 1940 | ||
1971 | FLUSH_CPU_DCACHE(pde, | 1941 | if (space_to_skip & (page_size - 1)) |
1972 | sg_phys(vm->pdb.sgt->sgl) + (i*gmmu_pde__size_v()), | 1942 | return -EINVAL; |
1973 | sizeof(u32)*2); | 1943 | |
1944 | if (sgt) | ||
1945 | iova = gk20a_mm_iova_addr(vm->mm->g, sgt->sgl) + space_to_skip; | ||
1974 | 1946 | ||
1975 | gk20a_mm_l2_invalidate(vm->mm->g); | 1947 | gk20a_dbg(gpu_dbg_map, "size_idx=%d, gpu_va=[%llx,%llx], iova=%llx", |
1948 | pgsz_idx, gpu_va, gpu_end-1, iova); | ||
1949 | err = map_gmmu_pages(&vm->pdb); | ||
1950 | if (err) { | ||
1951 | gk20a_err(dev_from_vm(vm), | ||
1952 | "couldn't map ptes for update as=%d", | ||
1953 | vm_aspace_id(vm)); | ||
1954 | return err; | ||
1955 | } | ||
1956 | err = update_gmmu_level_locked(vm, &vm->pdb, pgsz_idx, | ||
1957 | iova, | ||
1958 | gpu_va, gpu_end, | ||
1959 | kind_v, &ctag, | ||
1960 | cacheable, rw_flag, sparse, 0); | ||
1961 | unmap_gmmu_pages(&vm->pdb); | ||
1962 | |||
1963 | smp_mb(); | ||
1976 | 1964 | ||
1977 | gk20a_dbg(gpu_dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]); | ||
1978 | gk20a_dbg_fn("done"); | 1965 | gk20a_dbg_fn("done"); |
1966 | |||
1967 | return err; | ||
1979 | } | 1968 | } |
1980 | 1969 | ||
1981 | /* NOTE! mapped_buffers lock must be held */ | 1970 | /* NOTE! mapped_buffers lock must be held */ |
@@ -1984,29 +1973,14 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) | |||
1984 | struct vm_gk20a *vm = mapped_buffer->vm; | 1973 | struct vm_gk20a *vm = mapped_buffer->vm; |
1985 | struct gk20a *g = vm->mm->g; | 1974 | struct gk20a *g = vm->mm->g; |
1986 | 1975 | ||
1987 | if (mapped_buffer->va_node && | 1976 | g->ops.mm.gmmu_unmap(vm, |
1988 | mapped_buffer->va_node->sparse) { | 1977 | mapped_buffer->addr, |
1989 | u64 vaddr = mapped_buffer->addr; | 1978 | mapped_buffer->size, |
1990 | u32 pgsz_idx = mapped_buffer->pgsz_idx; | 1979 | mapped_buffer->pgsz_idx, |
1991 | u32 num_pages = mapped_buffer->size >> | 1980 | mapped_buffer->va_allocated, |
1992 | ilog2(vm->gmmu_page_sizes[pgsz_idx]); | 1981 | gk20a_mem_flag_none, |
1993 | 1982 | mapped_buffer->va_node ? | |
1994 | /* there is little we can do if this fails... */ | 1983 | mapped_buffer->va_node->sparse : false); |
1995 | g->ops.mm.gmmu_unmap(vm, | ||
1996 | mapped_buffer->addr, | ||
1997 | mapped_buffer->size, | ||
1998 | mapped_buffer->pgsz_idx, | ||
1999 | mapped_buffer->va_allocated, | ||
2000 | gk20a_mem_flag_none); | ||
2001 | g->ops.mm.set_sparse(vm, vaddr, | ||
2002 | num_pages, pgsz_idx, false); | ||
2003 | } else | ||
2004 | g->ops.mm.gmmu_unmap(vm, | ||
2005 | mapped_buffer->addr, | ||
2006 | mapped_buffer->size, | ||
2007 | mapped_buffer->pgsz_idx, | ||
2008 | mapped_buffer->va_allocated, | ||
2009 | gk20a_mem_flag_none); | ||
2010 | 1984 | ||
2011 | gk20a_dbg(gpu_dbg_map, "as=%d pgsz=%d gv=0x%x,%08x own_mem_ref=%d", | 1985 | gk20a_dbg(gpu_dbg_map, "as=%d pgsz=%d gv=0x%x,%08x own_mem_ref=%d", |
2012 | vm_aspace_id(vm), | 1986 | vm_aspace_id(vm), |
@@ -2057,7 +2031,7 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm) | |||
2057 | struct vm_reserved_va_node *va_node, *va_node_tmp; | 2031 | struct vm_reserved_va_node *va_node, *va_node_tmp; |
2058 | struct rb_node *node; | 2032 | struct rb_node *node; |
2059 | int i; | 2033 | int i; |
2060 | u32 pde_lo, pde_hi; | 2034 | u32 pde_lo = 0, pde_hi = 0; |
2061 | 2035 | ||
2062 | gk20a_dbg_fn(""); | 2036 | gk20a_dbg_fn(""); |
2063 | mutex_lock(&vm->update_gmmu_lock); | 2037 | mutex_lock(&vm->update_gmmu_lock); |
@@ -2082,7 +2056,8 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm) | |||
2082 | 2056 | ||
2083 | /* unmapping all buffers above may not actually free | 2057 | /* unmapping all buffers above may not actually free |
2084 | * all vm ptes. jettison them here for certain... */ | 2058 | * all vm ptes. jettison them here for certain... */ |
2085 | pde_range_from_vaddr_range(vm, 0, vm->va_limit-1, | 2059 | pde_range_from_vaddr_range(vm, |
2060 | 0, vm->va_limit-1, | ||
2086 | &pde_lo, &pde_hi); | 2061 | &pde_lo, &pde_hi); |
2087 | for (i = 0; i < pde_hi + 1; i++) { | 2062 | for (i = 0; i < pde_hi + 1; i++) { |
2088 | struct gk20a_mm_entry *entry = &vm->pdb.entries[i]; | 2063 | struct gk20a_mm_entry *entry = &vm->pdb.entries[i]; |
@@ -2125,6 +2100,30 @@ void gk20a_vm_put(struct vm_gk20a *vm) | |||
2125 | kref_put(&vm->ref, gk20a_vm_remove_support_kref); | 2100 | kref_put(&vm->ref, gk20a_vm_remove_support_kref); |
2126 | } | 2101 | } |
2127 | 2102 | ||
2103 | const struct gk20a_mmu_level gk20a_mm_levels_64k[] = { | ||
2104 | {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1}, | ||
2105 | .lo_bit = {26, 26}, | ||
2106 | .update_entry = update_gmmu_pde_locked, | ||
2107 | .entry_size = 8}, | ||
2108 | {.hi_bit = {25, 25}, | ||
2109 | .lo_bit = {12, 16}, | ||
2110 | .update_entry = update_gmmu_pte_locked, | ||
2111 | .entry_size = 8}, | ||
2112 | {.update_entry = NULL} | ||
2113 | }; | ||
2114 | |||
2115 | const struct gk20a_mmu_level gk20a_mm_levels_128k[] = { | ||
2116 | {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1}, | ||
2117 | .lo_bit = {27, 27}, | ||
2118 | .update_entry = update_gmmu_pde_locked, | ||
2119 | .entry_size = 8}, | ||
2120 | {.hi_bit = {26, 26}, | ||
2121 | .lo_bit = {12, 17}, | ||
2122 | .update_entry = update_gmmu_pte_locked, | ||
2123 | .entry_size = 8}, | ||
2124 | {.update_entry = NULL} | ||
2125 | }; | ||
2126 | |||
2128 | int gk20a_init_vm(struct mm_gk20a *mm, | 2127 | int gk20a_init_vm(struct mm_gk20a *mm, |
2129 | struct vm_gk20a *vm, | 2128 | struct vm_gk20a *vm, |
2130 | u32 big_page_size, | 2129 | u32 big_page_size, |
@@ -2149,38 +2148,18 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2149 | vm->big_pages = big_pages; | 2148 | vm->big_pages = big_pages; |
2150 | 2149 | ||
2151 | vm->big_page_size = gmmu_page_sizes[gmmu_page_size_big]; | 2150 | vm->big_page_size = gmmu_page_sizes[gmmu_page_size_big]; |
2152 | vm->pde_stride = vm->big_page_size << 10; | ||
2153 | vm->pde_stride_shift = ilog2(vm->pde_stride); | ||
2154 | 2151 | ||
2155 | for (i = 0; i < gmmu_nr_page_sizes; i++) { | 2152 | vm->mmu_levels = vm->mm->g->ops.mm.get_mmu_levels(vm->mm->g, |
2156 | u32 num_ptes, pte_space, num_pages; | 2153 | vm->big_page_size); |
2157 | 2154 | ||
2155 | for (i = 0; i < gmmu_nr_page_sizes; i++) | ||
2158 | vm->gmmu_page_sizes[i] = gmmu_page_sizes[i]; | 2156 | vm->gmmu_page_sizes[i] = gmmu_page_sizes[i]; |
2159 | 2157 | ||
2160 | /* assuming "full" page tables */ | 2158 | gk20a_dbg_info("small page-size (%dKB)", |
2161 | num_ptes = vm->pde_stride / gmmu_page_sizes[i]; | 2159 | vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); |
2162 | |||
2163 | pte_space = num_ptes * gmmu_pte__size_v(); | ||
2164 | /* allocate whole pages */ | ||
2165 | pte_space = roundup(pte_space, PAGE_SIZE); | ||
2166 | 2160 | ||
2167 | num_pages = pte_space / PAGE_SIZE; | 2161 | gk20a_dbg_info("big page-size (%dKB)", |
2168 | /* make sure "order" is viable */ | 2162 | vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); |
2169 | BUG_ON(!is_power_of_2(num_pages)); | ||
2170 | |||
2171 | vm->page_table_sizing[i].num_ptes = num_ptes; | ||
2172 | vm->page_table_sizing[i].order = ilog2(num_pages); | ||
2173 | } | ||
2174 | |||
2175 | gk20a_dbg_info("small page-size (%dKB) pte array: %dKB", | ||
2176 | vm->gmmu_page_sizes[gmmu_page_size_small] >> 10, | ||
2177 | (vm->page_table_sizing[gmmu_page_size_small].num_ptes * | ||
2178 | gmmu_pte__size_v()) >> 10); | ||
2179 | |||
2180 | gk20a_dbg_info("big page-size (%dKB) pte array: %dKB", | ||
2181 | vm->gmmu_page_sizes[gmmu_page_size_big] >> 10, | ||
2182 | (vm->page_table_sizing[gmmu_page_size_big].num_ptes * | ||
2183 | gmmu_pte__size_v()) >> 10); | ||
2184 | 2163 | ||
2185 | pde_range_from_vaddr_range(vm, | 2164 | pde_range_from_vaddr_range(vm, |
2186 | 0, vm->va_limit-1, | 2165 | 0, vm->va_limit-1, |
@@ -2197,7 +2176,7 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2197 | name, vm->va_limit, pde_hi + 1); | 2176 | name, vm->va_limit, pde_hi + 1); |
2198 | 2177 | ||
2199 | /* allocate the page table directory */ | 2178 | /* allocate the page table directory */ |
2200 | err = gk20a_zalloc_gmmu_page_table(vm, 0, &vm->pdb); | 2179 | err = gk20a_zalloc_gmmu_page_table(vm, 0, &vm->mmu_levels[0], &vm->pdb); |
2201 | if (err) | 2180 | if (err) |
2202 | goto clean_up_ptes; | 2181 | goto clean_up_ptes; |
2203 | 2182 | ||
@@ -2382,9 +2361,18 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
2382 | 2361 | ||
2383 | /* mark that we need to use sparse mappings here */ | 2362 | /* mark that we need to use sparse mappings here */ |
2384 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE) { | 2363 | if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE) { |
2385 | err = g->ops.mm.set_sparse(vm, vaddr_start, args->pages, | 2364 | u64 map_offset = g->ops.mm.gmmu_map(vm, vaddr_start, |
2386 | pgsz_idx, true); | 2365 | NULL, |
2387 | if (err) { | 2366 | 0, |
2367 | va_node->size, | ||
2368 | pgsz_idx, | ||
2369 | 0, | ||
2370 | 0, | ||
2371 | args->flags, | ||
2372 | gk20a_mem_flag_none, | ||
2373 | false, | ||
2374 | true); | ||
2375 | if (!map_offset) { | ||
2388 | mutex_unlock(&vm->update_gmmu_lock); | 2376 | mutex_unlock(&vm->update_gmmu_lock); |
2389 | vma->free(vma, start_page_nr, args->pages, 1); | 2377 | vma->free(vma, start_page_nr, args->pages, 1); |
2390 | kfree(va_node); | 2378 | kfree(va_node); |
@@ -2462,7 +2450,8 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, | |||
2462 | va_node->size, | 2450 | va_node->size, |
2463 | va_node->pgsz_idx, | 2451 | va_node->pgsz_idx, |
2464 | true, | 2452 | true, |
2465 | gk20a_mem_flag_none); | 2453 | gk20a_mem_flag_none, |
2454 | true); | ||
2466 | kfree(va_node); | 2455 | kfree(va_node); |
2467 | } | 2456 | } |
2468 | mutex_unlock(&vm->update_gmmu_lock); | 2457 | mutex_unlock(&vm->update_gmmu_lock); |
@@ -2741,13 +2730,25 @@ static int gk20a_init_hwpm(struct mm_gk20a *mm) | |||
2741 | return 0; | 2730 | return 0; |
2742 | } | 2731 | } |
2743 | 2732 | ||
2733 | void gk20a_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr) | ||
2734 | { | ||
2735 | u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); | ||
2736 | u32 pdb_addr_hi = u64_hi32(pdb_addr); | ||
2737 | |||
2738 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), | ||
2739 | ram_in_page_dir_base_target_vid_mem_f() | | ||
2740 | ram_in_page_dir_base_vol_true_f() | | ||
2741 | ram_in_page_dir_base_lo_f(pdb_addr_lo)); | ||
2742 | |||
2743 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), | ||
2744 | ram_in_page_dir_base_hi_f(pdb_addr_hi)); | ||
2745 | } | ||
2746 | |||
2744 | void gk20a_init_inst_block(struct inst_desc *inst_block, struct vm_gk20a *vm, | 2747 | void gk20a_init_inst_block(struct inst_desc *inst_block, struct vm_gk20a *vm, |
2745 | u32 big_page_size) | 2748 | u32 big_page_size) |
2746 | { | 2749 | { |
2747 | struct gk20a *g = gk20a_from_vm(vm); | 2750 | struct gk20a *g = gk20a_from_vm(vm); |
2748 | u64 pde_addr = gk20a_mm_iova_addr(g, vm->pdb.sgt->sgl); | 2751 | u64 pde_addr = gk20a_mm_iova_addr(g, vm->pdb.sgt->sgl); |
2749 | u32 pde_addr_lo = u64_lo32(pde_addr >> ram_in_base_shift_v()); | ||
2750 | u32 pde_addr_hi = u64_hi32(pde_addr); | ||
2751 | phys_addr_t inst_pa = inst_block->cpu_pa; | 2752 | phys_addr_t inst_pa = inst_block->cpu_pa; |
2752 | void *inst_ptr = inst_block->cpuva; | 2753 | void *inst_ptr = inst_block->cpuva; |
2753 | 2754 | ||
@@ -2756,13 +2757,7 @@ void gk20a_init_inst_block(struct inst_desc *inst_block, struct vm_gk20a *vm, | |||
2756 | 2757 | ||
2757 | gk20a_dbg_info("pde pa=0x%llx", (u64)pde_addr); | 2758 | gk20a_dbg_info("pde pa=0x%llx", (u64)pde_addr); |
2758 | 2759 | ||
2759 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), | 2760 | g->ops.mm.init_pdb(g, inst_ptr, pde_addr); |
2760 | ram_in_page_dir_base_target_vid_mem_f() | | ||
2761 | ram_in_page_dir_base_vol_true_f() | | ||
2762 | ram_in_page_dir_base_lo_f(pde_addr_lo)); | ||
2763 | |||
2764 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), | ||
2765 | ram_in_page_dir_base_hi_f(pde_addr_hi)); | ||
2766 | 2761 | ||
2767 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(), | 2762 | gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(), |
2768 | u64_lo32(vm->va_limit) | 0xFFF); | 2763 | u64_lo32(vm->va_limit) | 0xFFF); |
@@ -3030,6 +3025,13 @@ u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g) | |||
3030 | return 34; | 3025 | return 34; |
3031 | } | 3026 | } |
3032 | 3027 | ||
3028 | const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, | ||
3029 | u32 big_page_size) | ||
3030 | { | ||
3031 | return (big_page_size == SZ_64K) ? | ||
3032 | gk20a_mm_levels_64k : gk20a_mm_levels_128k; | ||
3033 | } | ||
3034 | |||
3033 | void gk20a_init_mm(struct gpu_ops *gops) | 3035 | void gk20a_init_mm(struct gpu_ops *gops) |
3034 | { | 3036 | { |
3035 | gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled; | 3037 | gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled; |
@@ -3043,5 +3045,7 @@ void gk20a_init_mm(struct gpu_ops *gops) | |||
3043 | gops->mm.l2_flush = gk20a_mm_l2_flush; | 3045 | gops->mm.l2_flush = gk20a_mm_l2_flush; |
3044 | gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate; | 3046 | gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate; |
3045 | gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits; | 3047 | gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits; |
3048 | gops->mm.get_mmu_levels = gk20a_mm_get_mmu_levels; | ||
3049 | gops->mm.init_pdb = gk20a_mm_init_pdb; | ||
3046 | } | 3050 | } |
3047 | 3051 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 7b355436..42c164be 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -268,6 +268,18 @@ struct vm_reserved_va_node { | |||
268 | bool sparse; | 268 | bool sparse; |
269 | }; | 269 | }; |
270 | 270 | ||
271 | struct gk20a_mmu_level { | ||
272 | int hi_bit[2]; | ||
273 | int lo_bit[2]; | ||
274 | int (*update_entry)(struct vm_gk20a *vm, | ||
275 | struct gk20a_mm_entry *pte, | ||
276 | u32 i, u32 gmmu_pgsz_idx, | ||
277 | u64 iova, | ||
278 | u32 kind_v, u32 *ctag, | ||
279 | bool cacheable, int rw_flag, bool sparse); | ||
280 | size_t entry_size; | ||
281 | }; | ||
282 | |||
271 | struct vm_gk20a { | 283 | struct vm_gk20a { |
272 | struct mm_gk20a *mm; | 284 | struct mm_gk20a *mm; |
273 | struct gk20a_as_share *as_share; /* as_share this represents */ | 285 | struct gk20a_as_share *as_share; /* as_share this represents */ |
@@ -282,13 +294,8 @@ struct vm_gk20a { | |||
282 | bool mapped; | 294 | bool mapped; |
283 | 295 | ||
284 | u32 big_page_size; | 296 | u32 big_page_size; |
285 | u32 pde_stride; | ||
286 | u32 pde_stride_shift; | ||
287 | 297 | ||
288 | struct { | 298 | const struct gk20a_mmu_level *mmu_levels; |
289 | u32 order; | ||
290 | u32 num_ptes; | ||
291 | } page_table_sizing[gmmu_nr_page_sizes]; | ||
292 | 299 | ||
293 | struct kref ref; | 300 | struct kref ref; |
294 | 301 | ||
@@ -450,7 +457,8 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | |||
450 | u32 ctag_offset, | 457 | u32 ctag_offset, |
451 | u32 flags, | 458 | u32 flags, |
452 | int rw_flag, | 459 | int rw_flag, |
453 | bool clear_ctags); | 460 | bool clear_ctags, |
461 | bool sparse); | ||
454 | 462 | ||
455 | void gk20a_gmmu_unmap(struct vm_gk20a *vm, | 463 | void gk20a_gmmu_unmap(struct vm_gk20a *vm, |
456 | u64 vaddr, | 464 | u64 vaddr, |
@@ -462,7 +470,8 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, | |||
462 | u64 size, | 470 | u64 size, |
463 | int pgsz_idx, | 471 | int pgsz_idx, |
464 | bool va_allocated, | 472 | bool va_allocated, |
465 | int rw_flag); | 473 | int rw_flag, |
474 | bool sparse); | ||
466 | 475 | ||
467 | struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf); | 476 | struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf); |
468 | void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, | 477 | void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, |
@@ -557,13 +566,10 @@ void unmap_gmmu_pages(struct gk20a_mm_entry *entry); | |||
557 | void pde_range_from_vaddr_range(struct vm_gk20a *vm, | 566 | void pde_range_from_vaddr_range(struct vm_gk20a *vm, |
558 | u64 addr_lo, u64 addr_hi, | 567 | u64 addr_lo, u64 addr_hi, |
559 | u32 *pde_lo, u32 *pde_hi); | 568 | u32 *pde_lo, u32 *pde_hi); |
569 | int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm); | ||
560 | u32 *pde_from_index(struct vm_gk20a *vm, u32 i); | 570 | u32 *pde_from_index(struct vm_gk20a *vm, u32 i); |
561 | u32 pte_index_from_vaddr(struct vm_gk20a *vm, | 571 | u32 pte_index_from_vaddr(struct vm_gk20a *vm, |
562 | u64 addr, enum gmmu_pgsz_gk20a pgsz_idx); | 572 | u64 addr, enum gmmu_pgsz_gk20a pgsz_idx); |
563 | int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm, | ||
564 | u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx); | ||
565 | |||
566 | void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i); | ||
567 | void free_gmmu_pages(struct vm_gk20a *vm, | 573 | void free_gmmu_pages(struct vm_gk20a *vm, |
568 | struct gk20a_mm_entry *entry); | 574 | struct gk20a_mm_entry *entry); |
569 | 575 | ||
@@ -571,4 +577,11 @@ u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g); | |||
571 | 577 | ||
572 | struct gpu_ops; | 578 | struct gpu_ops; |
573 | void gk20a_init_mm(struct gpu_ops *gops); | 579 | void gk20a_init_mm(struct gpu_ops *gops); |
580 | const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, | ||
581 | u32 big_page_size); | ||
582 | void gk20a_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr); | ||
583 | |||
584 | extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; | ||
585 | extern const struct gk20a_mmu_level gk20a_mm_levels_128k[]; | ||
586 | |||
574 | #endif /* MM_GK20A_H */ | 587 | #endif /* MM_GK20A_H */ |
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c index 37ab70fa..f85a1718 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c | |||
@@ -21,152 +21,6 @@ | |||
21 | #include "hw_gr_gm20b.h" | 21 | #include "hw_gr_gm20b.h" |
22 | #include "hw_ram_gm20b.h" | 22 | #include "hw_ram_gm20b.h" |
23 | 23 | ||
24 | static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, | ||
25 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
26 | u64 first_vaddr, u64 last_vaddr, | ||
27 | bool clear, bool refplus) | ||
28 | { | ||
29 | int err; | ||
30 | u32 pte_lo, pte_hi; | ||
31 | u32 pde_lo, pde_hi; | ||
32 | u32 pte_w[2] = {0, 0}; /* invalid pte */ | ||
33 | u64 addr = 0; | ||
34 | u32 pte_cur; | ||
35 | struct gk20a_mm_entry *entry; | ||
36 | struct gk20a *g = gk20a_from_vm(vm); | ||
37 | |||
38 | gk20a_dbg_fn(""); | ||
39 | |||
40 | pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr, | ||
41 | &pde_lo, &pde_hi); | ||
42 | |||
43 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d", | ||
44 | pgsz_idx, pde_lo, pde_hi); | ||
45 | |||
46 | /* Expect ptes of the same pde */ | ||
47 | BUG_ON(pde_lo != pde_hi); | ||
48 | |||
49 | entry = vm->pdb.entries + pde_lo; | ||
50 | |||
51 | pte_lo = pte_index_from_vaddr(vm, first_vaddr, pgsz_idx); | ||
52 | pte_hi = pte_index_from_vaddr(vm, last_vaddr, pgsz_idx); | ||
53 | |||
54 | /* get cpu access to the ptes */ | ||
55 | err = map_gmmu_pages(entry); | ||
56 | if (err) | ||
57 | goto fail; | ||
58 | |||
59 | gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); | ||
60 | for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { | ||
61 | pte_w[0] = gmmu_pte_valid_false_f(); | ||
62 | pte_w[1] = clear ? 0 : gmmu_pte_vol_true_f(); | ||
63 | |||
64 | gk20a_dbg(gpu_dbg_pte, | ||
65 | "pte_cur=%d addr=%llx" | ||
66 | " [0x%08x,0x%08x]", | ||
67 | pte_cur, addr, | ||
68 | pte_w[1], pte_w[0]); | ||
69 | |||
70 | gk20a_mem_wr32(entry->cpu_va + pte_cur*8, 0, pte_w[0]); | ||
71 | gk20a_mem_wr32(entry->cpu_va + pte_cur*8, 1, pte_w[1]); | ||
72 | } | ||
73 | |||
74 | unmap_gmmu_pages(entry); | ||
75 | |||
76 | smp_mb(); | ||
77 | g->ops.mm.tlb_invalidate(vm); | ||
78 | |||
79 | return 0; | ||
80 | fail: | ||
81 | return err; | ||
82 | |||
83 | } | ||
84 | |||
85 | static bool gm20b_vm_is_pde_in_range(struct vm_gk20a *vm, u64 vaddr_lo, | ||
86 | u64 vaddr_hi, u32 pde) | ||
87 | { | ||
88 | u64 pde_vaddr_lo, pde_vaddr_hi; | ||
89 | |||
90 | gk20a_dbg_fn(""); | ||
91 | |||
92 | pde_vaddr_lo = (u64)pde << vm->pde_stride_shift; | ||
93 | pde_vaddr_hi = pde_vaddr_lo | | ||
94 | ((0x1UL << (vm->pde_stride_shift)) - 1); | ||
95 | |||
96 | return ((vaddr_lo <= pde_vaddr_lo) && (vaddr_hi) >= pde_vaddr_hi); | ||
97 | } | ||
98 | |||
99 | static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, | ||
100 | u32 num_pages, u32 pgsz_idx, bool refplus) | ||
101 | { | ||
102 | struct mm_gk20a *mm = vm->mm; | ||
103 | u32 pgsz = vm->gmmu_page_sizes[pgsz_idx]; | ||
104 | u32 pde_shift = vm->pde_stride_shift; | ||
105 | u64 vaddr_hi; | ||
106 | u64 vaddr_pde_start; | ||
107 | u32 i; | ||
108 | u32 pde_lo, pde_hi; | ||
109 | int err; | ||
110 | |||
111 | gk20a_dbg_fn(""); | ||
112 | |||
113 | vaddr_hi = vaddr + pgsz * (u64)num_pages - 1; | ||
114 | pde_range_from_vaddr_range(vm, | ||
115 | vaddr, | ||
116 | vaddr_hi, | ||
117 | &pde_lo, &pde_hi); | ||
118 | |||
119 | gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, " | ||
120 | "pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d", | ||
121 | vaddr, vaddr_hi, pde_lo, pde_hi, pgsz, | ||
122 | vm->pde_stride_shift); | ||
123 | |||
124 | for (i = pde_lo; i <= pde_hi; i++) { | ||
125 | /* Mark all ptes as sparse. */ | ||
126 | err = validate_gmmu_page_table_gk20a_locked(vm, i, | ||
127 | pgsz_idx); | ||
128 | if (err) { | ||
129 | gk20a_err(dev_from_vm(vm), | ||
130 | "failed to validate page table %d: %d", | ||
131 | i, err); | ||
132 | goto fail; | ||
133 | } | ||
134 | |||
135 | if (gm20b_vm_is_pde_in_range(vm, vaddr, vaddr_hi, i)) { | ||
136 | /* entire pde is marked as sparse */ | ||
137 | vaddr_pde_start = (u64)i << pde_shift; | ||
138 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, | ||
139 | vaddr_pde_start, | ||
140 | PDE_ADDR_END(vaddr_pde_start, | ||
141 | pde_shift), false, refplus); | ||
142 | } else { | ||
143 | /* Check leading and trailing spaces which doesn't fit | ||
144 | * into entire pde. */ | ||
145 | if (pde_lo == pde_hi) | ||
146 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, | ||
147 | vaddr_hi, false, refplus); | ||
148 | else if (i == pde_lo) | ||
149 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, | ||
150 | PDE_ADDR_END(vaddr, pde_shift), false, | ||
151 | refplus); | ||
152 | else | ||
153 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, | ||
154 | PDE_ADDR_START(vaddr_hi, pde_shift), | ||
155 | vaddr_hi, false, | ||
156 | refplus); | ||
157 | } | ||
158 | } | ||
159 | |||
160 | gk20a_mm_l2_flush(mm->g, true); | ||
161 | |||
162 | return 0; | ||
163 | |||
164 | fail: | ||
165 | WARN_ON(1); | ||
166 | |||
167 | return err; | ||
168 | } | ||
169 | |||
170 | static int gm20b_mm_mmu_vpr_info_fetch_wait(struct gk20a *g, | 24 | static int gm20b_mm_mmu_vpr_info_fetch_wait(struct gk20a *g, |
171 | const unsigned int msec) | 25 | const unsigned int msec) |
172 | { | 26 | { |
@@ -249,9 +103,14 @@ static u32 gm20b_mm_get_big_page_sizes(void) | |||
249 | return SZ_64K | SZ_128K; | 103 | return SZ_64K | SZ_128K; |
250 | } | 104 | } |
251 | 105 | ||
106 | static bool gm20b_mm_support_sparse(struct gk20a *g) | ||
107 | { | ||
108 | return true; | ||
109 | } | ||
110 | |||
252 | void gm20b_init_mm(struct gpu_ops *gops) | 111 | void gm20b_init_mm(struct gpu_ops *gops) |
253 | { | 112 | { |
254 | gops->mm.set_sparse = gm20b_vm_put_sparse; | 113 | gops->mm.support_sparse = gm20b_mm_support_sparse; |
255 | gops->mm.is_debug_mode_enabled = gm20b_mm_mmu_debug_mode_enabled; | 114 | gops->mm.is_debug_mode_enabled = gm20b_mm_mmu_debug_mode_enabled; |
256 | gops->mm.gmmu_map = gk20a_locked_gmmu_map; | 115 | gops->mm.gmmu_map = gk20a_locked_gmmu_map; |
257 | gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap; | 116 | gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap; |
@@ -265,4 +124,6 @@ void gm20b_init_mm(struct gpu_ops *gops) | |||
265 | gops->mm.set_big_page_size = gm20b_mm_set_big_page_size; | 124 | gops->mm.set_big_page_size = gm20b_mm_set_big_page_size; |
266 | gops->mm.get_big_page_sizes = gm20b_mm_get_big_page_sizes; | 125 | gops->mm.get_big_page_sizes = gm20b_mm_get_big_page_sizes; |
267 | gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits; | 126 | gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits; |
127 | gops->mm.get_mmu_levels = gk20a_mm_get_mmu_levels; | ||
128 | gops->mm.init_pdb = gk20a_mm_init_pdb; | ||
268 | } | 129 | } |
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index 57814f1b..9b7c7dbd 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c | |||
@@ -40,8 +40,8 @@ static int vgpu_init_mm_setup_sw(struct gk20a *g) | |||
40 | 40 | ||
41 | /* gk20a_init_gpu_characteristics expects this to be populated */ | 41 | /* gk20a_init_gpu_characteristics expects this to be populated */ |
42 | vm->big_page_size = big_page_size; | 42 | vm->big_page_size = big_page_size; |
43 | vm->pde_stride = vm->big_page_size << 10; | 43 | vm->mmu_levels = (vm->big_page_size == SZ_64K) ? |
44 | vm->pde_stride_shift = ilog2(vm->pde_stride); | 44 | gk20a_mm_levels_64k : gk20a_mm_levels_128k; |
45 | 45 | ||
46 | mm->sw_ready = true; | 46 | mm->sw_ready = true; |
47 | 47 | ||
@@ -65,7 +65,8 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, | |||
65 | u32 ctag_offset, | 65 | u32 ctag_offset, |
66 | u32 flags, | 66 | u32 flags, |
67 | int rw_flag, | 67 | int rw_flag, |
68 | bool clear_ctags) | 68 | bool clear_ctags, |
69 | bool sparse) | ||
69 | { | 70 | { |
70 | int err = 0; | 71 | int err = 0; |
71 | struct device *d = dev_from_vm(vm); | 72 | struct device *d = dev_from_vm(vm); |
@@ -128,7 +129,8 @@ static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm, | |||
128 | u64 size, | 129 | u64 size, |
129 | int pgsz_idx, | 130 | int pgsz_idx, |
130 | bool va_allocated, | 131 | bool va_allocated, |
131 | int rw_flag) | 132 | int rw_flag, |
133 | bool sparse) | ||
132 | { | 134 | { |
133 | struct gk20a *g = gk20a_from_vm(vm); | 135 | struct gk20a *g = gk20a_from_vm(vm); |
134 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | 136 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); |