diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2015-01-14 11:54:26 -0500 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-04-04 21:08:16 -0400 |
commit | f3a920cb01d1517db5432c8062b660d6b60eb4de (patch) | |
tree | bc366a7df51745ea2d6b740395403cf2add2ebef /drivers/gpu/nvgpu/gm20b | |
parent | 8d1ab756ed8a7f4d3138dc5da9d2de9f52915261 (diff) |
gpu: nvgpu: Refactor page mapping code
Pass always the directory structure to mm functions instead of
pointers to members to it. Also split update_gmmu_ptes_locked()
into smaller functions, and turn the hard
coded MMU levels (PDE, PTE) into run-time parameters.
Change-Id: I315ef7aebbea1e61156705361f2e2a63b5fb7bf1
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/672485
Reviewed-by: Automatic_Commit_Validation_User
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b')
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/mm_gm20b.c | 155 |
1 files changed, 8 insertions, 147 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c index 37ab70fa..f85a1718 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c | |||
@@ -21,152 +21,6 @@ | |||
21 | #include "hw_gr_gm20b.h" | 21 | #include "hw_gr_gm20b.h" |
22 | #include "hw_ram_gm20b.h" | 22 | #include "hw_ram_gm20b.h" |
23 | 23 | ||
24 | static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, | ||
25 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
26 | u64 first_vaddr, u64 last_vaddr, | ||
27 | bool clear, bool refplus) | ||
28 | { | ||
29 | int err; | ||
30 | u32 pte_lo, pte_hi; | ||
31 | u32 pde_lo, pde_hi; | ||
32 | u32 pte_w[2] = {0, 0}; /* invalid pte */ | ||
33 | u64 addr = 0; | ||
34 | u32 pte_cur; | ||
35 | struct gk20a_mm_entry *entry; | ||
36 | struct gk20a *g = gk20a_from_vm(vm); | ||
37 | |||
38 | gk20a_dbg_fn(""); | ||
39 | |||
40 | pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr, | ||
41 | &pde_lo, &pde_hi); | ||
42 | |||
43 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d", | ||
44 | pgsz_idx, pde_lo, pde_hi); | ||
45 | |||
46 | /* Expect ptes of the same pde */ | ||
47 | BUG_ON(pde_lo != pde_hi); | ||
48 | |||
49 | entry = vm->pdb.entries + pde_lo; | ||
50 | |||
51 | pte_lo = pte_index_from_vaddr(vm, first_vaddr, pgsz_idx); | ||
52 | pte_hi = pte_index_from_vaddr(vm, last_vaddr, pgsz_idx); | ||
53 | |||
54 | /* get cpu access to the ptes */ | ||
55 | err = map_gmmu_pages(entry); | ||
56 | if (err) | ||
57 | goto fail; | ||
58 | |||
59 | gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); | ||
60 | for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { | ||
61 | pte_w[0] = gmmu_pte_valid_false_f(); | ||
62 | pte_w[1] = clear ? 0 : gmmu_pte_vol_true_f(); | ||
63 | |||
64 | gk20a_dbg(gpu_dbg_pte, | ||
65 | "pte_cur=%d addr=%llx" | ||
66 | " [0x%08x,0x%08x]", | ||
67 | pte_cur, addr, | ||
68 | pte_w[1], pte_w[0]); | ||
69 | |||
70 | gk20a_mem_wr32(entry->cpu_va + pte_cur*8, 0, pte_w[0]); | ||
71 | gk20a_mem_wr32(entry->cpu_va + pte_cur*8, 1, pte_w[1]); | ||
72 | } | ||
73 | |||
74 | unmap_gmmu_pages(entry); | ||
75 | |||
76 | smp_mb(); | ||
77 | g->ops.mm.tlb_invalidate(vm); | ||
78 | |||
79 | return 0; | ||
80 | fail: | ||
81 | return err; | ||
82 | |||
83 | } | ||
84 | |||
85 | static bool gm20b_vm_is_pde_in_range(struct vm_gk20a *vm, u64 vaddr_lo, | ||
86 | u64 vaddr_hi, u32 pde) | ||
87 | { | ||
88 | u64 pde_vaddr_lo, pde_vaddr_hi; | ||
89 | |||
90 | gk20a_dbg_fn(""); | ||
91 | |||
92 | pde_vaddr_lo = (u64)pde << vm->pde_stride_shift; | ||
93 | pde_vaddr_hi = pde_vaddr_lo | | ||
94 | ((0x1UL << (vm->pde_stride_shift)) - 1); | ||
95 | |||
96 | return ((vaddr_lo <= pde_vaddr_lo) && (vaddr_hi) >= pde_vaddr_hi); | ||
97 | } | ||
98 | |||
99 | static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr, | ||
100 | u32 num_pages, u32 pgsz_idx, bool refplus) | ||
101 | { | ||
102 | struct mm_gk20a *mm = vm->mm; | ||
103 | u32 pgsz = vm->gmmu_page_sizes[pgsz_idx]; | ||
104 | u32 pde_shift = vm->pde_stride_shift; | ||
105 | u64 vaddr_hi; | ||
106 | u64 vaddr_pde_start; | ||
107 | u32 i; | ||
108 | u32 pde_lo, pde_hi; | ||
109 | int err; | ||
110 | |||
111 | gk20a_dbg_fn(""); | ||
112 | |||
113 | vaddr_hi = vaddr + pgsz * (u64)num_pages - 1; | ||
114 | pde_range_from_vaddr_range(vm, | ||
115 | vaddr, | ||
116 | vaddr_hi, | ||
117 | &pde_lo, &pde_hi); | ||
118 | |||
119 | gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, " | ||
120 | "pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d", | ||
121 | vaddr, vaddr_hi, pde_lo, pde_hi, pgsz, | ||
122 | vm->pde_stride_shift); | ||
123 | |||
124 | for (i = pde_lo; i <= pde_hi; i++) { | ||
125 | /* Mark all ptes as sparse. */ | ||
126 | err = validate_gmmu_page_table_gk20a_locked(vm, i, | ||
127 | pgsz_idx); | ||
128 | if (err) { | ||
129 | gk20a_err(dev_from_vm(vm), | ||
130 | "failed to validate page table %d: %d", | ||
131 | i, err); | ||
132 | goto fail; | ||
133 | } | ||
134 | |||
135 | if (gm20b_vm_is_pde_in_range(vm, vaddr, vaddr_hi, i)) { | ||
136 | /* entire pde is marked as sparse */ | ||
137 | vaddr_pde_start = (u64)i << pde_shift; | ||
138 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, | ||
139 | vaddr_pde_start, | ||
140 | PDE_ADDR_END(vaddr_pde_start, | ||
141 | pde_shift), false, refplus); | ||
142 | } else { | ||
143 | /* Check leading and trailing spaces which doesn't fit | ||
144 | * into entire pde. */ | ||
145 | if (pde_lo == pde_hi) | ||
146 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, | ||
147 | vaddr_hi, false, refplus); | ||
148 | else if (i == pde_lo) | ||
149 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr, | ||
150 | PDE_ADDR_END(vaddr, pde_shift), false, | ||
151 | refplus); | ||
152 | else | ||
153 | allocate_gmmu_ptes_sparse(vm, pgsz_idx, | ||
154 | PDE_ADDR_START(vaddr_hi, pde_shift), | ||
155 | vaddr_hi, false, | ||
156 | refplus); | ||
157 | } | ||
158 | } | ||
159 | |||
160 | gk20a_mm_l2_flush(mm->g, true); | ||
161 | |||
162 | return 0; | ||
163 | |||
164 | fail: | ||
165 | WARN_ON(1); | ||
166 | |||
167 | return err; | ||
168 | } | ||
169 | |||
170 | static int gm20b_mm_mmu_vpr_info_fetch_wait(struct gk20a *g, | 24 | static int gm20b_mm_mmu_vpr_info_fetch_wait(struct gk20a *g, |
171 | const unsigned int msec) | 25 | const unsigned int msec) |
172 | { | 26 | { |
@@ -249,9 +103,14 @@ static u32 gm20b_mm_get_big_page_sizes(void) | |||
249 | return SZ_64K | SZ_128K; | 103 | return SZ_64K | SZ_128K; |
250 | } | 104 | } |
251 | 105 | ||
106 | static bool gm20b_mm_support_sparse(struct gk20a *g) | ||
107 | { | ||
108 | return true; | ||
109 | } | ||
110 | |||
252 | void gm20b_init_mm(struct gpu_ops *gops) | 111 | void gm20b_init_mm(struct gpu_ops *gops) |
253 | { | 112 | { |
254 | gops->mm.set_sparse = gm20b_vm_put_sparse; | 113 | gops->mm.support_sparse = gm20b_mm_support_sparse; |
255 | gops->mm.is_debug_mode_enabled = gm20b_mm_mmu_debug_mode_enabled; | 114 | gops->mm.is_debug_mode_enabled = gm20b_mm_mmu_debug_mode_enabled; |
256 | gops->mm.gmmu_map = gk20a_locked_gmmu_map; | 115 | gops->mm.gmmu_map = gk20a_locked_gmmu_map; |
257 | gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap; | 116 | gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap; |
@@ -265,4 +124,6 @@ void gm20b_init_mm(struct gpu_ops *gops) | |||
265 | gops->mm.set_big_page_size = gm20b_mm_set_big_page_size; | 124 | gops->mm.set_big_page_size = gm20b_mm_set_big_page_size; |
266 | gops->mm.get_big_page_sizes = gm20b_mm_get_big_page_sizes; | 125 | gops->mm.get_big_page_sizes = gm20b_mm_get_big_page_sizes; |
267 | gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits; | 126 | gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits; |
127 | gops->mm.get_mmu_levels = gk20a_mm_get_mmu_levels; | ||
128 | gops->mm.init_pdb = gk20a_mm_init_pdb; | ||
268 | } | 129 | } |