diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2015-01-29 14:29:56 -0500 |
---|---|---|
committer | Deepak Nibade <dnibade@nvidia.com> | 2016-12-27 04:52:05 -0500 |
commit | ac0cd782ab539d3a89ac2cc50955f80b8be456d1 (patch) | |
tree | e7feacdff08a9798db8ff9bf343f6895eb6d3035 /drivers/gpu/nvgpu/gp10b/mm_gp10b.c | |
parent | 1fcd7fd547daac5374993f243fad77a822a5a048 (diff) |
gpu: nvgpu: gp10b: Implement new page table format
Implement the 5-level Pascal page table format. It is enabled
only for simulation.
Change-Id: I6767fac8b52fe0f6a2e2f86312de5fc93af6518e
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/682114
Diffstat (limited to 'drivers/gpu/nvgpu/gp10b/mm_gp10b.c')
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 197 |
1 files changed, 197 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index e3e2c173..1608b176 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c | |||
@@ -20,6 +20,8 @@ | |||
20 | #include "rpfb_gp10b.h" | 20 | #include "rpfb_gp10b.h" |
21 | #include "hw_ram_gp10b.h" | 21 | #include "hw_ram_gp10b.h" |
22 | #include "hw_bus_gp10b.h" | 22 | #include "hw_bus_gp10b.h" |
23 | #include "hw_gmmu_gp10b.h" | ||
24 | #include "gk20a/semaphore_gk20a.h" | ||
23 | 25 | ||
24 | static u32 gp10b_mm_get_physical_addr_bits(struct gk20a *g) | 26 | static u32 gp10b_mm_get_physical_addr_bits(struct gk20a *g) |
25 | { | 27 | { |
@@ -138,6 +140,197 @@ static u64 gp10b_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl, | |||
138 | return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); | 140 | return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); |
139 | } | 141 | } |
140 | 142 | ||
143 | u32 *pde3_from_index(struct gk20a_mm_entry *entry, u32 i) | ||
144 | { | ||
145 | return (u32 *) (((u8 *)entry->cpu_va) + i*gmmu_new_pde__size_v()); | ||
146 | } | ||
147 | |||
148 | static int update_gmmu_pde3_locked(struct vm_gk20a *vm, | ||
149 | struct gk20a_mm_entry *parent, | ||
150 | u32 i, u32 gmmu_pgsz_idx, | ||
151 | u64 iova, | ||
152 | u32 kind_v, u32 *ctag, | ||
153 | bool cacheable, bool unmapped_pte, | ||
154 | int rw_flag, bool sparse, u32 flags) | ||
155 | { | ||
156 | u64 pte_addr = 0; | ||
157 | u64 pde_addr = 0; | ||
158 | struct gk20a_mm_entry *pte = parent->entries + i; | ||
159 | u32 pde_v[2] = {0, 0}; | ||
160 | u32 *pde; | ||
161 | struct gk20a *g = vm->mm->g; | ||
162 | |||
163 | gk20a_dbg_fn(""); | ||
164 | |||
165 | pte_addr = g->ops.mm.get_iova_addr(g, pte->sgt->sgl, 0) | ||
166 | >> gmmu_new_pde_address_shift_v(); | ||
167 | pde_addr = g->ops.mm.get_iova_addr(g, parent->sgt->sgl, 0); | ||
168 | |||
169 | pde_v[0] |= gmmu_new_pde_aperture_video_memory_f(); | ||
170 | pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr)); | ||
171 | |||
172 | pde = pde3_from_index(parent, i); | ||
173 | |||
174 | gk20a_mem_wr32(pde, 0, pde_v[0]); | ||
175 | gk20a_mem_wr32(pde, 1, pde_v[1]); | ||
176 | |||
177 | gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", | ||
178 | i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); | ||
179 | gk20a_dbg_fn("done"); | ||
180 | return 0; | ||
181 | } | ||
182 | |||
183 | u32 *pde0_from_index(struct gk20a_mm_entry *entry, u32 i) | ||
184 | { | ||
185 | return (u32 *) (((u8 *)entry->cpu_va) + i*gmmu_new_dual_pde__size_v()); | ||
186 | } | ||
187 | |||
188 | static int update_gmmu_pde0_locked(struct vm_gk20a *vm, | ||
189 | struct gk20a_mm_entry *pte, | ||
190 | u32 i, u32 gmmu_pgsz_idx, | ||
191 | u64 iova, | ||
192 | u32 kind_v, u32 *ctag, | ||
193 | bool cacheable, bool unmapped_pte, | ||
194 | int rw_flag, bool sparse, u32 flags) | ||
195 | { | ||
196 | bool small_valid, big_valid; | ||
197 | u32 pte_addr_small = 0, pte_addr_big = 0; | ||
198 | struct gk20a_mm_entry *entry = pte->entries + i; | ||
199 | u32 pde_v[4] = {0, 0, 0, 0}; | ||
200 | u32 *pde; | ||
201 | struct gk20a *g = vm->mm->g; | ||
202 | |||
203 | gk20a_dbg_fn(""); | ||
204 | |||
205 | small_valid = entry->size && entry->pgsz == gmmu_page_size_small; | ||
206 | big_valid = entry->size && entry->pgsz == gmmu_page_size_big; | ||
207 | |||
208 | if (small_valid) | ||
209 | pte_addr_small = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0) | ||
210 | >> gmmu_new_dual_pde_address_shift_v(); | ||
211 | |||
212 | if (big_valid) | ||
213 | pte_addr_big = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0) | ||
214 | >> gmmu_new_dual_pde_address_big_shift_v(); | ||
215 | |||
216 | if (small_valid) { | ||
217 | pde_v[2] |= gmmu_new_dual_pde_address_small_sys_f(pte_addr_small); | ||
218 | pde_v[2] |= gmmu_new_dual_pde_aperture_small_video_memory_f(); | ||
219 | pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f(); | ||
220 | } | ||
221 | |||
222 | if (big_valid) { | ||
223 | pde_v[0] |= gmmu_new_dual_pde_address_big_sys_f(pte_addr_big); | ||
224 | pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f(); | ||
225 | pde_v[0] |= gmmu_new_dual_pde_aperture_big_video_memory_f(); | ||
226 | } | ||
227 | |||
228 | pde = pde0_from_index(pte, i); | ||
229 | |||
230 | gk20a_mem_wr32(pde, 0, pde_v[0]); | ||
231 | gk20a_mem_wr32(pde, 1, pde_v[1]); | ||
232 | gk20a_mem_wr32(pde, 2, pde_v[2]); | ||
233 | gk20a_mem_wr32(pde, 3, pde_v[3]); | ||
234 | |||
235 | gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]", | ||
236 | i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]); | ||
237 | gk20a_dbg_fn("done"); | ||
238 | return 0; | ||
239 | } | ||
240 | |||
241 | static int update_gmmu_pte_locked(struct vm_gk20a *vm, | ||
242 | struct gk20a_mm_entry *pte, | ||
243 | u32 i, u32 gmmu_pgsz_idx, | ||
244 | u64 iova, | ||
245 | u32 kind_v, u32 *ctag, | ||
246 | bool cacheable, bool unmapped_pte, | ||
247 | int rw_flag, bool sparse, u32 flags) | ||
248 | { | ||
249 | u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; | ||
250 | u32 pte_w[2] = {0, 0}; /* invalid pte */ | ||
251 | |||
252 | gk20a_dbg_fn(""); | ||
253 | |||
254 | if (iova) { | ||
255 | pte_w[0] = gmmu_new_pte_valid_true_f() | | ||
256 | gmmu_new_pte_address_sys_f(iova | ||
257 | >> gmmu_new_pte_address_shift_v()); | ||
258 | pte_w[1] = gmmu_new_pte_aperture_video_memory_f() | | ||
259 | gmmu_new_pte_kind_f(kind_v) | | ||
260 | gmmu_new_pte_comptagline_f(*ctag / SZ_128K); | ||
261 | |||
262 | if (rw_flag == gk20a_mem_flag_read_only) | ||
263 | pte_w[0] |= gmmu_new_pte_read_only_true_f(); | ||
264 | if (!cacheable) | ||
265 | pte_w[1] |= gmmu_new_pte_vol_true_f(); | ||
266 | |||
267 | gk20a_dbg(gpu_dbg_pte, "pte=%d iova=0x%llx kind=%d" | ||
268 | " ctag=%d vol=%d" | ||
269 | " [0x%08x, 0x%08x]", | ||
270 | i, iova, | ||
271 | kind_v, *ctag, !cacheable, | ||
272 | pte_w[1], pte_w[0]); | ||
273 | |||
274 | if (*ctag) | ||
275 | *ctag += page_size; | ||
276 | } else if (sparse) { | ||
277 | pte_w[0] = gmmu_new_pte_valid_false_f(); | ||
278 | pte_w[1] |= gmmu_new_pte_vol_true_f(); | ||
279 | } else { | ||
280 | gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); | ||
281 | } | ||
282 | |||
283 | gk20a_mem_wr32(pte->cpu_va + i*8, 0, pte_w[0]); | ||
284 | gk20a_mem_wr32(pte->cpu_va + i*8, 1, pte_w[1]); | ||
285 | |||
286 | gk20a_dbg_fn("done"); | ||
287 | return 0; | ||
288 | } | ||
289 | |||
290 | const struct gk20a_mmu_level gp10b_mm_levels[] = { | ||
291 | {.hi_bit = {48, 48}, | ||
292 | .lo_bit = {47, 47}, | ||
293 | .update_entry = update_gmmu_pde3_locked, | ||
294 | .entry_size = 8}, | ||
295 | {.hi_bit = {46, 46}, | ||
296 | .lo_bit = {38, 38}, | ||
297 | .update_entry = update_gmmu_pde3_locked, | ||
298 | .entry_size = 8}, | ||
299 | {.hi_bit = {37, 37}, | ||
300 | .lo_bit = {29, 29}, | ||
301 | .update_entry = update_gmmu_pde3_locked, | ||
302 | .entry_size = 8}, | ||
303 | {.hi_bit = {28, 28}, | ||
304 | .lo_bit = {21, 21}, | ||
305 | .update_entry = update_gmmu_pde0_locked, | ||
306 | .entry_size = 16}, | ||
307 | {.hi_bit = {20, 20}, | ||
308 | .lo_bit = {12, 16}, | ||
309 | .update_entry = update_gmmu_pte_locked, | ||
310 | .entry_size = 8}, | ||
311 | {.update_entry = NULL} | ||
312 | }; | ||
313 | |||
314 | const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g, u32 big_page_size) | ||
315 | { | ||
316 | return gp10b_mm_levels; | ||
317 | } | ||
318 | |||
319 | static void gp10b_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr) | ||
320 | { | ||
321 | u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); | ||
322 | u32 pdb_addr_hi = u64_hi32(pdb_addr); | ||
323 | |||
324 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), | ||
325 | ram_in_page_dir_base_target_vid_mem_f() | | ||
326 | ram_in_page_dir_base_vol_true_f() | | ||
327 | ram_in_page_dir_base_lo_f(pdb_addr_lo) | | ||
328 | 1 << 10); | ||
329 | |||
330 | gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), | ||
331 | ram_in_page_dir_base_hi_f(pdb_addr_hi)); | ||
332 | } | ||
333 | |||
141 | void gp10b_init_mm(struct gpu_ops *gops) | 334 | void gp10b_init_mm(struct gpu_ops *gops) |
142 | { | 335 | { |
143 | gm20b_init_mm(gops); | 336 | gm20b_init_mm(gops); |
@@ -146,4 +339,8 @@ void gp10b_init_mm(struct gpu_ops *gops) | |||
146 | gops->mm.init_bar2_vm = gb10b_init_bar2_vm; | 339 | gops->mm.init_bar2_vm = gb10b_init_bar2_vm; |
147 | gops->mm.init_bar2_mm_hw_setup = gb10b_init_bar2_mm_hw_setup; | 340 | gops->mm.init_bar2_mm_hw_setup = gb10b_init_bar2_mm_hw_setup; |
148 | gops->mm.get_iova_addr = gp10b_mm_iova_addr; | 341 | gops->mm.get_iova_addr = gp10b_mm_iova_addr; |
342 | if (tegra_platform_is_linsim()) { | ||
343 | gops->mm.get_mmu_levels = gp10b_mm_get_mmu_levels; | ||
344 | gops->mm.init_pdb = gp10b_mm_init_pdb; | ||
345 | } | ||
149 | } | 346 | } |