diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/gmmu.c | 138 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/vm.c | 344 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 485 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/gmmu.h | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/vm.h | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 4 |
9 files changed, 493 insertions, 498 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index a2ed3f3a..695347bc 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c | |||
@@ -15,12 +15,150 @@ | |||
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <nvgpu/log.h> | 17 | #include <nvgpu/log.h> |
18 | #include <nvgpu/dma.h> | ||
18 | #include <nvgpu/gmmu.h> | 19 | #include <nvgpu/gmmu.h> |
19 | #include <nvgpu/nvgpu_mem.h> | 20 | #include <nvgpu/nvgpu_mem.h> |
20 | 21 | ||
21 | #include "gk20a/gk20a.h" | 22 | #include "gk20a/gk20a.h" |
22 | #include "gk20a/mm_gk20a.h" | 23 | #include "gk20a/mm_gk20a.h" |
23 | 24 | ||
25 | static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order, | ||
26 | struct gk20a_mm_entry *entry) | ||
27 | { | ||
28 | u32 num_pages = 1 << order; | ||
29 | u32 len = num_pages * PAGE_SIZE; | ||
30 | int err; | ||
31 | struct page *pages; | ||
32 | struct gk20a *g = vm->mm->g; | ||
33 | |||
34 | /* note: mem_desc slightly abused (wrt. alloc_gmmu_pages) */ | ||
35 | |||
36 | pages = alloc_pages(GFP_KERNEL, order); | ||
37 | if (!pages) { | ||
38 | nvgpu_log(g, gpu_dbg_pte, "alloc_pages failed"); | ||
39 | goto err_out; | ||
40 | } | ||
41 | entry->mem.priv.sgt = nvgpu_kzalloc(g, sizeof(*entry->mem.priv.sgt)); | ||
42 | if (!entry->mem.priv.sgt) { | ||
43 | nvgpu_log(g, gpu_dbg_pte, "cannot allocate sg table"); | ||
44 | goto err_alloced; | ||
45 | } | ||
46 | err = sg_alloc_table(entry->mem.priv.sgt, 1, GFP_KERNEL); | ||
47 | if (err) { | ||
48 | nvgpu_log(g, gpu_dbg_pte, "sg_alloc_table failed"); | ||
49 | goto err_sg_table; | ||
50 | } | ||
51 | sg_set_page(entry->mem.priv.sgt->sgl, pages, len, 0); | ||
52 | entry->mem.cpu_va = page_address(pages); | ||
53 | memset(entry->mem.cpu_va, 0, len); | ||
54 | entry->mem.size = len; | ||
55 | entry->mem.aperture = APERTURE_SYSMEM; | ||
56 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, | ||
57 | sg_phys(entry->mem.priv.sgt->sgl), len); | ||
58 | |||
59 | return 0; | ||
60 | |||
61 | err_sg_table: | ||
62 | nvgpu_kfree(vm->mm->g, entry->mem.priv.sgt); | ||
63 | err_alloced: | ||
64 | __free_pages(pages, order); | ||
65 | err_out: | ||
66 | return -ENOMEM; | ||
67 | } | ||
68 | |||
69 | static int nvgpu_alloc_gmmu_pages(struct vm_gk20a *vm, u32 order, | ||
70 | struct gk20a_mm_entry *entry) | ||
71 | { | ||
72 | struct gk20a *g = gk20a_from_vm(vm); | ||
73 | u32 num_pages = 1 << order; | ||
74 | u32 len = num_pages * PAGE_SIZE; | ||
75 | int err; | ||
76 | |||
77 | if (g->is_fmodel) | ||
78 | return alloc_gmmu_phys_pages(vm, order, entry); | ||
79 | |||
80 | /* | ||
81 | * On arm32 we're limited by vmalloc space, so we do not map pages by | ||
82 | * default. | ||
83 | */ | ||
84 | if (IS_ENABLED(CONFIG_ARM64)) | ||
85 | err = nvgpu_dma_alloc(g, len, &entry->mem); | ||
86 | else | ||
87 | err = nvgpu_dma_alloc_flags(g, NVGPU_DMA_NO_KERNEL_MAPPING, | ||
88 | len, &entry->mem); | ||
89 | |||
90 | |||
91 | if (err) { | ||
92 | nvgpu_err(g, "memory allocation failed"); | ||
93 | return -ENOMEM; | ||
94 | } | ||
95 | |||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | /* | ||
100 | * Allocate a phys contig region big enough for a full | ||
101 | * sized gmmu page table for the given gmmu_page_size. | ||
102 | * the whole range is zeroed so it's "invalid"/will fault. | ||
103 | * | ||
104 | * If a previous entry is supplied, its memory will be used for | ||
105 | * suballocation for this next entry too, if there is space. | ||
106 | */ | ||
107 | int nvgpu_zalloc_gmmu_page_table(struct vm_gk20a *vm, | ||
108 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
109 | const struct gk20a_mmu_level *l, | ||
110 | struct gk20a_mm_entry *entry, | ||
111 | struct gk20a_mm_entry *prev_entry) | ||
112 | { | ||
113 | int err = -ENOMEM; | ||
114 | int order; | ||
115 | struct gk20a *g = gk20a_from_vm(vm); | ||
116 | u32 bytes; | ||
117 | |||
118 | /* allocate enough pages for the table */ | ||
119 | order = l->hi_bit[pgsz_idx] - l->lo_bit[pgsz_idx] + 1; | ||
120 | order += ilog2(l->entry_size); | ||
121 | bytes = 1 << order; | ||
122 | order -= PAGE_SHIFT; | ||
123 | if (order < 0 && prev_entry) { | ||
124 | /* try to suballocate from previous chunk */ | ||
125 | u32 capacity = prev_entry->mem.size / bytes; | ||
126 | u32 prev = prev_entry->woffset * sizeof(u32) / bytes; | ||
127 | u32 free = capacity - prev - 1; | ||
128 | |||
129 | nvgpu_log(g, gpu_dbg_pte, "cap %d prev %d free %d bytes %d", | ||
130 | capacity, prev, free, bytes); | ||
131 | |||
132 | if (free) { | ||
133 | memcpy(&entry->mem, &prev_entry->mem, | ||
134 | sizeof(entry->mem)); | ||
135 | entry->woffset = prev_entry->woffset | ||
136 | + bytes / sizeof(u32); | ||
137 | err = 0; | ||
138 | } | ||
139 | } | ||
140 | |||
141 | if (err) { | ||
142 | /* no suballoc space */ | ||
143 | order = max(0, order); | ||
144 | err = nvgpu_alloc_gmmu_pages(vm, order, entry); | ||
145 | entry->woffset = 0; | ||
146 | } | ||
147 | |||
148 | nvgpu_log(g, gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d, woff %x", | ||
149 | entry, | ||
150 | (entry->mem.priv.sgt && | ||
151 | entry->mem.aperture == APERTURE_SYSMEM) ? | ||
152 | g->ops.mm.get_iova_addr(g, entry->mem.priv.sgt->sgl, 0) : 0, | ||
153 | order, entry->woffset); | ||
154 | if (err) | ||
155 | return err; | ||
156 | entry->pgsz = pgsz_idx; | ||
157 | entry->mem.skip_wmb = true; | ||
158 | |||
159 | return err; | ||
160 | } | ||
161 | |||
24 | /* | 162 | /* |
25 | * Core GMMU map function for the kernel to use. If @addr is 0 then the GPU | 163 | * Core GMMU map function for the kernel to use. If @addr is 0 then the GPU |
26 | * VA will be allocated for you. If addr is non-zero then the buffer will be | 164 | * VA will be allocated for you. If addr is non-zero then the buffer will be |
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index 3b3b7a10..e42c7c5a 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c | |||
@@ -14,6 +14,8 @@ | |||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <nvgpu/log.h> | ||
18 | #include <nvgpu/dma.h> | ||
17 | #include <nvgpu/vm.h> | 19 | #include <nvgpu/vm.h> |
18 | #include <nvgpu/vm_area.h> | 20 | #include <nvgpu/vm_area.h> |
19 | #include <nvgpu/lock.h> | 21 | #include <nvgpu/lock.h> |
@@ -23,6 +25,7 @@ | |||
23 | 25 | ||
24 | #include "gk20a/gk20a.h" | 26 | #include "gk20a/gk20a.h" |
25 | #include "gk20a/mm_gk20a.h" | 27 | #include "gk20a/mm_gk20a.h" |
28 | #include "gk20a/platform_gk20a.h" | ||
26 | 29 | ||
27 | int vm_aspace_id(struct vm_gk20a *vm) | 30 | int vm_aspace_id(struct vm_gk20a *vm) |
28 | { | 31 | { |
@@ -104,6 +107,341 @@ void nvgpu_vm_mapping_batch_finish(struct vm_gk20a *vm, | |||
104 | nvgpu_mutex_release(&vm->update_gmmu_lock); | 107 | nvgpu_mutex_release(&vm->update_gmmu_lock); |
105 | } | 108 | } |
106 | 109 | ||
110 | static int nvgpu_vm_init_page_tables(struct vm_gk20a *vm) | ||
111 | { | ||
112 | u32 pde_lo, pde_hi; | ||
113 | int err; | ||
114 | |||
115 | pde_range_from_vaddr_range(vm, | ||
116 | 0, vm->va_limit-1, | ||
117 | &pde_lo, &pde_hi); | ||
118 | vm->pdb.entries = nvgpu_vzalloc(vm->mm->g, | ||
119 | sizeof(struct gk20a_mm_entry) * | ||
120 | (pde_hi + 1)); | ||
121 | vm->pdb.num_entries = pde_hi + 1; | ||
122 | |||
123 | if (!vm->pdb.entries) | ||
124 | return -ENOMEM; | ||
125 | |||
126 | err = nvgpu_zalloc_gmmu_page_table(vm, 0, &vm->mmu_levels[0], | ||
127 | &vm->pdb, NULL); | ||
128 | if (err) { | ||
129 | nvgpu_vfree(vm->mm->g, vm->pdb.entries); | ||
130 | return err; | ||
131 | } | ||
132 | |||
133 | return 0; | ||
134 | } | ||
135 | |||
136 | /* | ||
137 | * Determine if the passed address space can support big pages or not. | ||
138 | */ | ||
139 | int nvgpu_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size) | ||
140 | { | ||
141 | u64 mask = ((u64)vm->big_page_size << 10) - 1; | ||
142 | |||
143 | if (base & mask || size & mask) | ||
144 | return 0; | ||
145 | return 1; | ||
146 | } | ||
147 | |||
148 | /* | ||
149 | * Initialize a semaphore pool. Just return successfully if we do not need | ||
150 | * semaphores (i.e when sync-pts are active). | ||
151 | */ | ||
152 | static int nvgpu_init_sema_pool(struct vm_gk20a *vm) | ||
153 | { | ||
154 | struct nvgpu_semaphore_sea *sema_sea; | ||
155 | struct mm_gk20a *mm = vm->mm; | ||
156 | struct gk20a *g = mm->g; | ||
157 | int err; | ||
158 | |||
159 | /* | ||
160 | * Don't waste the memory on semaphores if we don't need them. | ||
161 | */ | ||
162 | if (g->gpu_characteristics.flags & NVGPU_GPU_FLAGS_HAS_SYNCPOINTS) | ||
163 | return 0; | ||
164 | |||
165 | if (vm->sema_pool) | ||
166 | return 0; | ||
167 | |||
168 | sema_sea = nvgpu_semaphore_sea_create(g); | ||
169 | if (!sema_sea) | ||
170 | return -ENOMEM; | ||
171 | |||
172 | vm->sema_pool = nvgpu_semaphore_pool_alloc(sema_sea); | ||
173 | if (!vm->sema_pool) | ||
174 | return -ENOMEM; | ||
175 | |||
176 | /* | ||
177 | * Allocate a chunk of GPU VA space for mapping the semaphores. We will | ||
178 | * do a fixed alloc in the kernel VM so that all channels have the same | ||
179 | * RO address range for the semaphores. | ||
180 | * | ||
181 | * !!! TODO: cleanup. | ||
182 | */ | ||
183 | sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->kernel, | ||
184 | vm->va_limit - | ||
185 | mm->channel.kernel_size, | ||
186 | 512 * PAGE_SIZE, | ||
187 | SZ_4K); | ||
188 | if (!sema_sea->gpu_va) { | ||
189 | nvgpu_free(&vm->kernel, sema_sea->gpu_va); | ||
190 | nvgpu_vm_put(vm); | ||
191 | return -ENOMEM; | ||
192 | } | ||
193 | |||
194 | err = nvgpu_semaphore_pool_map(vm->sema_pool, vm); | ||
195 | if (err) { | ||
196 | nvgpu_semaphore_pool_unmap(vm->sema_pool, vm); | ||
197 | nvgpu_free(vm->vma[gmmu_page_size_small], | ||
198 | vm->sema_pool->gpu_va); | ||
199 | return err; | ||
200 | } | ||
201 | |||
202 | return 0; | ||
203 | } | ||
204 | |||
205 | /** | ||
206 | * nvgpu_init_vm() - Initialize an address space. | ||
207 | * | ||
208 | * @mm - Parent MM. | ||
209 | * @vm - The VM to init. | ||
210 | * @big_page_size - Size of big pages associated with this VM. | ||
211 | * @low_hole - The size of the low hole (unaddressable memory at the bottom of | ||
212 | * the address space. | ||
213 | * @kernel_reserved - Space reserved for kernel only allocations. | ||
214 | * @aperture_size - Total size of the aperture. | ||
215 | * @big_pages - Ignored. Will be set based on other passed params. | ||
216 | * @name - Name of the address space. | ||
217 | * | ||
218 | * This function initializes an address space according to the following map: | ||
219 | * | ||
220 | * +--+ 0x0 | ||
221 | * | | | ||
222 | * +--+ @low_hole | ||
223 | * | | | ||
224 | * ~ ~ This is the "user" section. | ||
225 | * | | | ||
226 | * +--+ @aperture_size - @kernel_reserved | ||
227 | * | | | ||
228 | * ~ ~ This is the "kernel" section. | ||
229 | * | | | ||
230 | * +--+ @aperture_size | ||
231 | * | ||
232 | * The user section is therefor what ever is left over after the @low_hole and | ||
233 | * @kernel_reserved memory have been portioned out. The @kernel_reserved is | ||
234 | * always persent at the top of the memory space and the @low_hole is always at | ||
235 | * the bottom. | ||
236 | * | ||
237 | * For certain address spaces a "user" section makes no sense (bar1, etc) so in | ||
238 | * such cases the @kernel_reserved and @low_hole should sum to exactly | ||
239 | * @aperture_size. | ||
240 | */ | ||
241 | int nvgpu_init_vm(struct mm_gk20a *mm, | ||
242 | struct vm_gk20a *vm, | ||
243 | u32 big_page_size, | ||
244 | u64 low_hole, | ||
245 | u64 kernel_reserved, | ||
246 | u64 aperture_size, | ||
247 | bool big_pages, | ||
248 | bool userspace_managed, | ||
249 | char *name) | ||
250 | { | ||
251 | int err; | ||
252 | char alloc_name[32]; | ||
253 | u64 kernel_vma_flags; | ||
254 | u64 user_vma_start, user_vma_limit; | ||
255 | u64 user_lp_vma_start, user_lp_vma_limit; | ||
256 | u64 kernel_vma_start, kernel_vma_limit; | ||
257 | struct gk20a *g = mm->g; | ||
258 | struct gk20a_platform *p = gk20a_get_platform(g->dev); | ||
259 | |||
260 | if (WARN_ON(kernel_reserved + low_hole > aperture_size)) | ||
261 | return -ENOMEM; | ||
262 | |||
263 | nvgpu_log_info(g, "Init space for %s: valimit=0x%llx, " | ||
264 | "LP size=0x%x lowhole=0x%llx", | ||
265 | name, aperture_size, | ||
266 | (unsigned int)big_page_size, low_hole); | ||
267 | |||
268 | vm->mm = mm; | ||
269 | |||
270 | vm->gmmu_page_sizes[gmmu_page_size_small] = SZ_4K; | ||
271 | vm->gmmu_page_sizes[gmmu_page_size_big] = big_page_size; | ||
272 | vm->gmmu_page_sizes[gmmu_page_size_kernel] = SZ_4K; | ||
273 | |||
274 | /* Set up vma pointers. */ | ||
275 | vm->vma[gmmu_page_size_small] = &vm->user; | ||
276 | vm->vma[gmmu_page_size_big] = &vm->user; | ||
277 | vm->vma[gmmu_page_size_kernel] = &vm->kernel; | ||
278 | if (!p->unify_address_spaces) | ||
279 | vm->vma[gmmu_page_size_big] = &vm->user_lp; | ||
280 | |||
281 | vm->va_start = low_hole; | ||
282 | vm->va_limit = aperture_size; | ||
283 | vm->big_pages = big_pages; | ||
284 | |||
285 | vm->big_page_size = vm->gmmu_page_sizes[gmmu_page_size_big]; | ||
286 | vm->userspace_managed = userspace_managed; | ||
287 | vm->mmu_levels = g->ops.mm.get_mmu_levels(g, vm->big_page_size); | ||
288 | |||
289 | /* Initialize the page table data structures. */ | ||
290 | err = nvgpu_vm_init_page_tables(vm); | ||
291 | if (err) | ||
292 | return err; | ||
293 | |||
294 | /* Setup vma limits. */ | ||
295 | if (kernel_reserved + low_hole < aperture_size) { | ||
296 | if (p->unify_address_spaces) { | ||
297 | user_vma_start = low_hole; | ||
298 | user_vma_limit = vm->va_limit - kernel_reserved; | ||
299 | user_lp_vma_start = user_vma_limit; | ||
300 | user_lp_vma_limit = user_vma_limit; | ||
301 | } else { | ||
302 | user_vma_start = low_hole; | ||
303 | user_vma_limit = __nv_gmmu_va_small_page_limit(); | ||
304 | user_lp_vma_start = __nv_gmmu_va_small_page_limit(); | ||
305 | user_lp_vma_limit = vm->va_limit - kernel_reserved; | ||
306 | } | ||
307 | } else { | ||
308 | user_vma_start = 0; | ||
309 | user_vma_limit = 0; | ||
310 | user_lp_vma_start = 0; | ||
311 | user_lp_vma_limit = 0; | ||
312 | } | ||
313 | kernel_vma_start = vm->va_limit - kernel_reserved; | ||
314 | kernel_vma_limit = vm->va_limit; | ||
315 | |||
316 | nvgpu_log_info(g, "user_vma [0x%llx,0x%llx)", | ||
317 | user_vma_start, user_vma_limit); | ||
318 | nvgpu_log_info(g, "user_lp_vma [0x%llx,0x%llx)", | ||
319 | user_lp_vma_start, user_lp_vma_limit); | ||
320 | nvgpu_log_info(g, "kernel_vma [0x%llx,0x%llx)", | ||
321 | kernel_vma_start, kernel_vma_limit); | ||
322 | |||
323 | if (WARN_ON(user_vma_start > user_vma_limit) || | ||
324 | WARN_ON(user_lp_vma_start > user_lp_vma_limit) || | ||
325 | WARN_ON(kernel_vma_start >= kernel_vma_limit)) { | ||
326 | err = -EINVAL; | ||
327 | goto clean_up_page_tables; | ||
328 | } | ||
329 | |||
330 | kernel_vma_flags = (kernel_reserved + low_hole) == aperture_size ? | ||
331 | 0 : GPU_ALLOC_GVA_SPACE; | ||
332 | |||
333 | /* | ||
334 | * A "user" area only makes sense for the GVA spaces. For VMs where | ||
335 | * there is no "user" area user_vma_start will be equal to | ||
336 | * user_vma_limit (i.e a 0 sized space). In such a situation the kernel | ||
337 | * area must be non-zero in length. | ||
338 | */ | ||
339 | if (user_vma_start >= user_vma_limit && | ||
340 | kernel_vma_start >= kernel_vma_limit) { | ||
341 | err = -EINVAL; | ||
342 | goto clean_up_page_tables; | ||
343 | } | ||
344 | |||
345 | /* | ||
346 | * Determine if big pages are possible in this VM. If a split address | ||
347 | * space is used then check the user_lp vma instead of the user vma. | ||
348 | */ | ||
349 | if (p->unify_address_spaces) | ||
350 | vm->big_pages = nvgpu_big_pages_possible(vm, user_vma_start, | ||
351 | user_vma_limit - user_vma_start); | ||
352 | else | ||
353 | vm->big_pages = nvgpu_big_pages_possible(vm, user_lp_vma_start, | ||
354 | user_lp_vma_limit - user_lp_vma_start); | ||
355 | |||
356 | /* | ||
357 | * User VMA. | ||
358 | */ | ||
359 | if (user_vma_start < user_vma_limit) { | ||
360 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s", name); | ||
361 | err = __nvgpu_buddy_allocator_init(g, &vm->user, | ||
362 | vm, alloc_name, | ||
363 | user_vma_start, | ||
364 | user_vma_limit - | ||
365 | user_vma_start, | ||
366 | SZ_4K, | ||
367 | GPU_BALLOC_MAX_ORDER, | ||
368 | GPU_ALLOC_GVA_SPACE); | ||
369 | if (err) | ||
370 | goto clean_up_page_tables; | ||
371 | } else { | ||
372 | /* | ||
373 | * Make these allocator pointers point to the kernel allocator | ||
374 | * since we still use the legacy notion of page size to choose | ||
375 | * the allocator. | ||
376 | */ | ||
377 | vm->vma[0] = &vm->kernel; | ||
378 | vm->vma[1] = &vm->kernel; | ||
379 | } | ||
380 | |||
381 | /* | ||
382 | * User VMA for large pages when a split address range is used. | ||
383 | */ | ||
384 | if (user_lp_vma_start < user_lp_vma_limit) { | ||
385 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s_lp", name); | ||
386 | err = __nvgpu_buddy_allocator_init(g, &vm->user_lp, | ||
387 | vm, alloc_name, | ||
388 | user_lp_vma_start, | ||
389 | user_lp_vma_limit - | ||
390 | user_lp_vma_start, | ||
391 | vm->big_page_size, | ||
392 | GPU_BALLOC_MAX_ORDER, | ||
393 | GPU_ALLOC_GVA_SPACE); | ||
394 | if (err) | ||
395 | goto clean_up_allocators; | ||
396 | } | ||
397 | |||
398 | /* | ||
399 | * Kernel VMA. Must always exist for an address space. | ||
400 | */ | ||
401 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-sys", name); | ||
402 | err = __nvgpu_buddy_allocator_init(g, &vm->kernel, | ||
403 | vm, alloc_name, | ||
404 | kernel_vma_start, | ||
405 | kernel_vma_limit - kernel_vma_start, | ||
406 | SZ_4K, | ||
407 | GPU_BALLOC_MAX_ORDER, | ||
408 | kernel_vma_flags); | ||
409 | if (err) | ||
410 | goto clean_up_allocators; | ||
411 | |||
412 | vm->mapped_buffers = NULL; | ||
413 | |||
414 | nvgpu_mutex_init(&vm->update_gmmu_lock); | ||
415 | kref_init(&vm->ref); | ||
416 | nvgpu_init_list_node(&vm->vm_area_list); | ||
417 | |||
418 | /* | ||
419 | * This is only necessary for channel address spaces. The best way to | ||
420 | * distinguish channel address spaces from other address spaces is by | ||
421 | * size - if the address space is 4GB or less, it's not a channel. | ||
422 | */ | ||
423 | if (vm->va_limit > SZ_4G) { | ||
424 | err = nvgpu_init_sema_pool(vm); | ||
425 | if (err) | ||
426 | goto clean_up_allocators; | ||
427 | } | ||
428 | |||
429 | return 0; | ||
430 | |||
431 | clean_up_allocators: | ||
432 | if (nvgpu_alloc_initialized(&vm->kernel)) | ||
433 | nvgpu_alloc_destroy(&vm->kernel); | ||
434 | if (nvgpu_alloc_initialized(&vm->user)) | ||
435 | nvgpu_alloc_destroy(&vm->user); | ||
436 | if (nvgpu_alloc_initialized(&vm->user_lp)) | ||
437 | nvgpu_alloc_destroy(&vm->user_lp); | ||
438 | clean_up_page_tables: | ||
439 | /* Cleans up nvgpu_vm_init_page_tables() */ | ||
440 | nvgpu_vfree(g, vm->pdb.entries); | ||
441 | free_gmmu_pages(vm, &vm->pdb); | ||
442 | return err; | ||
443 | } | ||
444 | |||
107 | void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm) | 445 | void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm) |
108 | { | 446 | { |
109 | struct nvgpu_mapped_buf *mapped_buffer; | 447 | struct nvgpu_mapped_buf *mapped_buffer; |
@@ -111,8 +449,6 @@ void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm) | |||
111 | struct nvgpu_rbtree_node *node = NULL; | 449 | struct nvgpu_rbtree_node *node = NULL; |
112 | struct gk20a *g = vm->mm->g; | 450 | struct gk20a *g = vm->mm->g; |
113 | 451 | ||
114 | gk20a_dbg_fn(""); | ||
115 | |||
116 | /* | 452 | /* |
117 | * Do this outside of the update_gmmu_lock since unmapping the semaphore | 453 | * Do this outside of the update_gmmu_lock since unmapping the semaphore |
118 | * pool involves unmapping a GMMU mapping which means aquiring the | 454 | * pool involves unmapping a GMMU mapping which means aquiring the |
@@ -172,12 +508,10 @@ void nvgpu_vm_put(struct vm_gk20a *vm) | |||
172 | kref_put(&vm->ref, nvgpu_vm_remove_support_kref); | 508 | kref_put(&vm->ref, nvgpu_vm_remove_support_kref); |
173 | } | 509 | } |
174 | 510 | ||
175 | void nvgpu_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block) | 511 | void nvgpu_vm_remove(struct vm_gk20a *vm, struct nvgpu_mem *inst_block) |
176 | { | 512 | { |
177 | struct gk20a *g = vm->mm->g; | 513 | struct gk20a *g = vm->mm->g; |
178 | 514 | ||
179 | gk20a_dbg_fn(""); | ||
180 | |||
181 | gk20a_free_inst_block(g, inst_block); | 515 | gk20a_free_inst_block(g, inst_block); |
182 | nvgpu_vm_remove_support_nofree(vm); | 516 | nvgpu_vm_remove_support_nofree(vm); |
183 | } | 517 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 54317195..82bf7b3e 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | |||
@@ -1924,7 +1924,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, | |||
1924 | err_unmap: | 1924 | err_unmap: |
1925 | nvgpu_vm_unmap_buffer(vm, args->offset, NULL); | 1925 | nvgpu_vm_unmap_buffer(vm, args->offset, NULL); |
1926 | err_remove_vm: | 1926 | err_remove_vm: |
1927 | nvgpu_remove_vm(vm, &mm->perfbuf.inst_block); | 1927 | nvgpu_vm_remove(vm, &mm->perfbuf.inst_block); |
1928 | nvgpu_mutex_release(&g->dbg_sessions_lock); | 1928 | nvgpu_mutex_release(&g->dbg_sessions_lock); |
1929 | return err; | 1929 | return err; |
1930 | } | 1930 | } |
@@ -1962,7 +1962,7 @@ static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset) | |||
1962 | err = gk20a_perfbuf_disable_locked(g); | 1962 | err = gk20a_perfbuf_disable_locked(g); |
1963 | 1963 | ||
1964 | nvgpu_vm_unmap_buffer(vm, offset, NULL); | 1964 | nvgpu_vm_unmap_buffer(vm, offset, NULL); |
1965 | nvgpu_remove_vm(vm, &mm->perfbuf.inst_block); | 1965 | nvgpu_vm_remove(vm, &mm->perfbuf.inst_block); |
1966 | 1966 | ||
1967 | g->perfbuf.owner = NULL; | 1967 | g->perfbuf.owner = NULL; |
1968 | g->perfbuf.offset = 0; | 1968 | g->perfbuf.offset = 0; |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 2642a0b1..17f1622f 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -476,9 +476,9 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm) | |||
476 | g->ops.mm.remove_bar2_vm(g); | 476 | g->ops.mm.remove_bar2_vm(g); |
477 | 477 | ||
478 | if (g->ops.mm.is_bar1_supported(g)) | 478 | if (g->ops.mm.is_bar1_supported(g)) |
479 | nvgpu_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block); | 479 | nvgpu_vm_remove(&mm->bar1.vm, &mm->bar1.inst_block); |
480 | 480 | ||
481 | nvgpu_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block); | 481 | nvgpu_vm_remove(&mm->pmu.vm, &mm->pmu.inst_block); |
482 | gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block); | 482 | gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block); |
483 | nvgpu_vm_remove_support_nofree(&mm->cde.vm); | 483 | nvgpu_vm_remove_support_nofree(&mm->cde.vm); |
484 | 484 | ||
@@ -779,52 +779,6 @@ void gk20a_init_mm_ce_context(struct gk20a *g) | |||
779 | #endif | 779 | #endif |
780 | } | 780 | } |
781 | 781 | ||
782 | static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order, | ||
783 | struct gk20a_mm_entry *entry) | ||
784 | { | ||
785 | u32 num_pages = 1 << order; | ||
786 | u32 len = num_pages * PAGE_SIZE; | ||
787 | int err; | ||
788 | struct page *pages; | ||
789 | struct gk20a *g = vm->mm->g; | ||
790 | |||
791 | gk20a_dbg_fn(""); | ||
792 | |||
793 | /* note: mem_desc slightly abused (wrt. alloc_gmmu_pages) */ | ||
794 | |||
795 | pages = alloc_pages(GFP_KERNEL, order); | ||
796 | if (!pages) { | ||
797 | gk20a_dbg(gpu_dbg_pte, "alloc_pages failed"); | ||
798 | goto err_out; | ||
799 | } | ||
800 | entry->mem.priv.sgt = nvgpu_kzalloc(g, sizeof(*entry->mem.priv.sgt)); | ||
801 | if (!entry->mem.priv.sgt) { | ||
802 | gk20a_dbg(gpu_dbg_pte, "cannot allocate sg table"); | ||
803 | goto err_alloced; | ||
804 | } | ||
805 | err = sg_alloc_table(entry->mem.priv.sgt, 1, GFP_KERNEL); | ||
806 | if (err) { | ||
807 | gk20a_dbg(gpu_dbg_pte, "sg_alloc_table failed"); | ||
808 | goto err_sg_table; | ||
809 | } | ||
810 | sg_set_page(entry->mem.priv.sgt->sgl, pages, len, 0); | ||
811 | entry->mem.cpu_va = page_address(pages); | ||
812 | memset(entry->mem.cpu_va, 0, len); | ||
813 | entry->mem.size = len; | ||
814 | entry->mem.aperture = APERTURE_SYSMEM; | ||
815 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, | ||
816 | sg_phys(entry->mem.priv.sgt->sgl), len); | ||
817 | |||
818 | return 0; | ||
819 | |||
820 | err_sg_table: | ||
821 | nvgpu_kfree(vm->mm->g, entry->mem.priv.sgt); | ||
822 | err_alloced: | ||
823 | __free_pages(pages, order); | ||
824 | err_out: | ||
825 | return -ENOMEM; | ||
826 | } | ||
827 | |||
828 | static void free_gmmu_phys_pages(struct vm_gk20a *vm, | 782 | static void free_gmmu_phys_pages(struct vm_gk20a *vm, |
829 | struct gk20a_mm_entry *entry) | 783 | struct gk20a_mm_entry *entry) |
830 | { | 784 | { |
@@ -857,38 +811,6 @@ static void unmap_gmmu_phys_pages(struct gk20a_mm_entry *entry) | |||
857 | entry->mem.priv.sgt->sgl->length); | 811 | entry->mem.priv.sgt->sgl->length); |
858 | } | 812 | } |
859 | 813 | ||
860 | static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order, | ||
861 | struct gk20a_mm_entry *entry) | ||
862 | { | ||
863 | struct gk20a *g = gk20a_from_vm(vm); | ||
864 | u32 num_pages = 1 << order; | ||
865 | u32 len = num_pages * PAGE_SIZE; | ||
866 | int err; | ||
867 | |||
868 | gk20a_dbg_fn(""); | ||
869 | |||
870 | if (g->is_fmodel) | ||
871 | return alloc_gmmu_phys_pages(vm, order, entry); | ||
872 | |||
873 | /* | ||
874 | * On arm32 we're limited by vmalloc space, so we do not map pages by | ||
875 | * default. | ||
876 | */ | ||
877 | if (IS_ENABLED(CONFIG_ARM64)) | ||
878 | err = nvgpu_dma_alloc(g, len, &entry->mem); | ||
879 | else | ||
880 | err = nvgpu_dma_alloc_flags(g, NVGPU_DMA_NO_KERNEL_MAPPING, | ||
881 | len, &entry->mem); | ||
882 | |||
883 | |||
884 | if (err) { | ||
885 | nvgpu_err(g, "memory allocation failed"); | ||
886 | return -ENOMEM; | ||
887 | } | ||
888 | |||
889 | return 0; | ||
890 | } | ||
891 | |||
892 | void free_gmmu_pages(struct vm_gk20a *vm, | 814 | void free_gmmu_pages(struct vm_gk20a *vm, |
893 | struct gk20a_mm_entry *entry) | 815 | struct gk20a_mm_entry *entry) |
894 | { | 816 | { |
@@ -955,72 +877,6 @@ void unmap_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry) | |||
955 | } | 877 | } |
956 | } | 878 | } |
957 | 879 | ||
958 | /* | ||
959 | * Allocate a phys contig region big enough for a full | ||
960 | * sized gmmu page table for the given gmmu_page_size. | ||
961 | * the whole range is zeroed so it's "invalid"/will fault. | ||
962 | * | ||
963 | * If a previous entry is supplied, its memory will be used for | ||
964 | * suballocation for this next entry too, if there is space. | ||
965 | */ | ||
966 | |||
967 | static int gk20a_zalloc_gmmu_page_table(struct vm_gk20a *vm, | ||
968 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
969 | const struct gk20a_mmu_level *l, | ||
970 | struct gk20a_mm_entry *entry, | ||
971 | struct gk20a_mm_entry *prev_entry) | ||
972 | { | ||
973 | int err = -ENOMEM; | ||
974 | int order; | ||
975 | struct gk20a *g = gk20a_from_vm(vm); | ||
976 | u32 bytes; | ||
977 | |||
978 | gk20a_dbg_fn(""); | ||
979 | |||
980 | /* allocate enough pages for the table */ | ||
981 | order = l->hi_bit[pgsz_idx] - l->lo_bit[pgsz_idx] + 1; | ||
982 | order += ilog2(l->entry_size); | ||
983 | bytes = 1 << order; | ||
984 | order -= PAGE_SHIFT; | ||
985 | if (order < 0 && prev_entry) { | ||
986 | /* try to suballocate from previous chunk */ | ||
987 | u32 capacity = prev_entry->mem.size / bytes; | ||
988 | u32 prev = prev_entry->woffset * sizeof(u32) / bytes; | ||
989 | u32 free = capacity - prev - 1; | ||
990 | |||
991 | gk20a_dbg(gpu_dbg_pte, "cap %d prev %d free %d bytes %d", | ||
992 | capacity, prev, free, bytes); | ||
993 | |||
994 | if (free) { | ||
995 | memcpy(&entry->mem, &prev_entry->mem, | ||
996 | sizeof(entry->mem)); | ||
997 | entry->woffset = prev_entry->woffset | ||
998 | + bytes / sizeof(u32); | ||
999 | err = 0; | ||
1000 | } | ||
1001 | } | ||
1002 | |||
1003 | if (err) { | ||
1004 | /* no suballoc space */ | ||
1005 | order = max(0, order); | ||
1006 | err = alloc_gmmu_pages(vm, order, entry); | ||
1007 | entry->woffset = 0; | ||
1008 | } | ||
1009 | |||
1010 | gk20a_dbg(gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d, woff %x", | ||
1011 | entry, | ||
1012 | (entry->mem.priv.sgt && | ||
1013 | entry->mem.aperture == APERTURE_SYSMEM) ? | ||
1014 | g->ops.mm.get_iova_addr(g, entry->mem.priv.sgt->sgl, 0) : 0, | ||
1015 | order, entry->woffset); | ||
1016 | if (err) | ||
1017 | return err; | ||
1018 | entry->pgsz = pgsz_idx; | ||
1019 | entry->mem.skip_wmb = true; | ||
1020 | |||
1021 | return err; | ||
1022 | } | ||
1023 | |||
1024 | int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm) | 880 | int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm) |
1025 | { | 881 | { |
1026 | return vm->mmu_levels[0].lo_bit[0]; | 882 | return vm->mmu_levels[0].lo_bit[0]; |
@@ -2230,7 +2086,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm, | |||
2230 | next_pte = pte->entries + pde_i; | 2086 | next_pte = pte->entries + pde_i; |
2231 | 2087 | ||
2232 | if (!next_pte->mem.size) { | 2088 | if (!next_pte->mem.size) { |
2233 | err = gk20a_zalloc_gmmu_page_table(vm, | 2089 | err = nvgpu_zalloc_gmmu_page_table(vm, |
2234 | pgsz_idx, next_l, next_pte, prev_pte); | 2090 | pgsz_idx, next_l, next_pte, prev_pte); |
2235 | if (err) | 2091 | if (err) |
2236 | return err; | 2092 | return err; |
@@ -2523,75 +2379,6 @@ const struct gk20a_mmu_level gk20a_mm_levels_128k[] = { | |||
2523 | }; | 2379 | }; |
2524 | 2380 | ||
2525 | /* | 2381 | /* |
2526 | * Initialize a semaphore pool. Just return successfully if we do not need | ||
2527 | * semaphores (i.e when sync-pts are active). | ||
2528 | */ | ||
2529 | static int gk20a_init_sema_pool(struct vm_gk20a *vm) | ||
2530 | { | ||
2531 | struct nvgpu_semaphore_sea *sema_sea; | ||
2532 | struct mm_gk20a *mm = vm->mm; | ||
2533 | struct gk20a *g = mm->g; | ||
2534 | int err; | ||
2535 | |||
2536 | /* | ||
2537 | * Don't waste the memory on semaphores if we don't need them. | ||
2538 | */ | ||
2539 | if (g->gpu_characteristics.flags & NVGPU_GPU_FLAGS_HAS_SYNCPOINTS) | ||
2540 | return 0; | ||
2541 | |||
2542 | if (vm->sema_pool) | ||
2543 | return 0; | ||
2544 | |||
2545 | sema_sea = nvgpu_semaphore_sea_create(g); | ||
2546 | if (!sema_sea) | ||
2547 | return -ENOMEM; | ||
2548 | |||
2549 | vm->sema_pool = nvgpu_semaphore_pool_alloc(sema_sea); | ||
2550 | if (!vm->sema_pool) | ||
2551 | return -ENOMEM; | ||
2552 | |||
2553 | /* | ||
2554 | * Allocate a chunk of GPU VA space for mapping the semaphores. We will | ||
2555 | * do a fixed alloc in the kernel VM so that all channels have the same | ||
2556 | * RO address range for the semaphores. | ||
2557 | * | ||
2558 | * !!! TODO: cleanup. | ||
2559 | */ | ||
2560 | sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->kernel, | ||
2561 | vm->va_limit - | ||
2562 | mm->channel.kernel_size, | ||
2563 | 512 * PAGE_SIZE, | ||
2564 | SZ_4K); | ||
2565 | if (!sema_sea->gpu_va) { | ||
2566 | nvgpu_free(&vm->kernel, sema_sea->gpu_va); | ||
2567 | nvgpu_vm_put(vm); | ||
2568 | return -ENOMEM; | ||
2569 | } | ||
2570 | |||
2571 | err = nvgpu_semaphore_pool_map(vm->sema_pool, vm); | ||
2572 | if (err) { | ||
2573 | nvgpu_semaphore_pool_unmap(vm->sema_pool, vm); | ||
2574 | nvgpu_free(vm->vma[gmmu_page_size_small], | ||
2575 | vm->sema_pool->gpu_va); | ||
2576 | return err; | ||
2577 | } | ||
2578 | |||
2579 | return 0; | ||
2580 | } | ||
2581 | |||
2582 | /* | ||
2583 | * Determine if the passed address space can support big pages or not. | ||
2584 | */ | ||
2585 | int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size) | ||
2586 | { | ||
2587 | u64 mask = ((u64)vm->big_page_size << 10) - 1; | ||
2588 | |||
2589 | if (base & mask || size & mask) | ||
2590 | return 0; | ||
2591 | return 1; | ||
2592 | } | ||
2593 | |||
2594 | /* | ||
2595 | * Attempt to find a reserved memory area to determine PTE size for the passed | 2382 | * Attempt to find a reserved memory area to determine PTE size for the passed |
2596 | * mapping. If no reserved area can be found use small pages. | 2383 | * mapping. If no reserved area can be found use small pages. |
2597 | */ | 2384 | */ |
@@ -2661,272 +2448,6 @@ enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size) | |||
2661 | return gmmu_page_size_small; | 2448 | return gmmu_page_size_small; |
2662 | } | 2449 | } |
2663 | 2450 | ||
2664 | static int init_vm_page_tables(struct vm_gk20a *vm) | ||
2665 | { | ||
2666 | u32 pde_lo, pde_hi; | ||
2667 | int err; | ||
2668 | |||
2669 | pde_range_from_vaddr_range(vm, | ||
2670 | 0, vm->va_limit-1, | ||
2671 | &pde_lo, &pde_hi); | ||
2672 | vm->pdb.entries = nvgpu_vzalloc(vm->mm->g, | ||
2673 | sizeof(struct gk20a_mm_entry) * | ||
2674 | (pde_hi + 1)); | ||
2675 | vm->pdb.num_entries = pde_hi + 1; | ||
2676 | |||
2677 | if (!vm->pdb.entries) | ||
2678 | return -ENOMEM; | ||
2679 | |||
2680 | err = gk20a_zalloc_gmmu_page_table(vm, 0, &vm->mmu_levels[0], | ||
2681 | &vm->pdb, NULL); | ||
2682 | if (err) { | ||
2683 | nvgpu_vfree(vm->mm->g, vm->pdb.entries); | ||
2684 | return err; | ||
2685 | } | ||
2686 | |||
2687 | return 0; | ||
2688 | } | ||
2689 | |||
2690 | /** | ||
2691 | * nvgpu_init_vm() - Initialize an address space. | ||
2692 | * | ||
2693 | * @mm - Parent MM. | ||
2694 | * @vm - The VM to init. | ||
2695 | * @big_page_size - Size of big pages associated with this VM. | ||
2696 | * @low_hole - The size of the low hole (unaddressable memory at the bottom of | ||
2697 | * the address space. | ||
2698 | * @kernel_reserved - Space reserved for kernel only allocations. | ||
2699 | * @aperture_size - Total size of the aperture. | ||
2700 | * @big_pages - Ignored. Will be set based on other passed params. | ||
2701 | * @name - Name of the address space. | ||
2702 | * | ||
2703 | * This function initializes an address space according to the following map: | ||
2704 | * | ||
2705 | * +--+ 0x0 | ||
2706 | * | | | ||
2707 | * +--+ @low_hole | ||
2708 | * | | | ||
2709 | * ~ ~ This is the "user" section. | ||
2710 | * | | | ||
2711 | * +--+ @aperture_size - @kernel_reserved | ||
2712 | * | | | ||
2713 | * ~ ~ This is the "kernel" section. | ||
2714 | * | | | ||
2715 | * +--+ @aperture_size | ||
2716 | * | ||
2717 | * The user section is therefor what ever is left over after the @low_hole and | ||
2718 | * @kernel_reserved memory have been portioned out. The @kernel_reserved is | ||
2719 | * always persent at the top of the memory space and the @low_hole is always at | ||
2720 | * the bottom. | ||
2721 | * | ||
2722 | * For certain address spaces a "user" section makes no sense (bar1, etc) so in | ||
2723 | * such cases the @kernel_reserved and @low_hole should sum to exactly | ||
2724 | * @aperture_size. | ||
2725 | */ | ||
2726 | int nvgpu_init_vm(struct mm_gk20a *mm, | ||
2727 | struct vm_gk20a *vm, | ||
2728 | u32 big_page_size, | ||
2729 | u64 low_hole, | ||
2730 | u64 kernel_reserved, | ||
2731 | u64 aperture_size, | ||
2732 | bool big_pages, | ||
2733 | bool userspace_managed, | ||
2734 | char *name) | ||
2735 | { | ||
2736 | int err; | ||
2737 | char alloc_name[32]; | ||
2738 | u64 kernel_vma_flags; | ||
2739 | u64 user_vma_start, user_vma_limit; | ||
2740 | u64 user_lp_vma_start, user_lp_vma_limit; | ||
2741 | u64 kernel_vma_start, kernel_vma_limit; | ||
2742 | struct gk20a *g = mm->g; | ||
2743 | struct gk20a_platform *p = gk20a_get_platform(g->dev); | ||
2744 | |||
2745 | if (WARN_ON(kernel_reserved + low_hole > aperture_size)) | ||
2746 | return -ENOMEM; | ||
2747 | |||
2748 | gk20a_dbg_info("Init space for %s: va_limit=0x%llx, " | ||
2749 | "big_page_size=0x%x low_hole=0x%llx", | ||
2750 | name, aperture_size, | ||
2751 | (unsigned int)big_page_size, low_hole); | ||
2752 | |||
2753 | vm->mm = mm; | ||
2754 | |||
2755 | vm->gmmu_page_sizes[gmmu_page_size_small] = SZ_4K; | ||
2756 | vm->gmmu_page_sizes[gmmu_page_size_big] = big_page_size; | ||
2757 | vm->gmmu_page_sizes[gmmu_page_size_kernel] = SZ_4K; | ||
2758 | |||
2759 | /* Set up vma pointers. */ | ||
2760 | vm->vma[gmmu_page_size_small] = &vm->user; | ||
2761 | vm->vma[gmmu_page_size_big] = &vm->user; | ||
2762 | vm->vma[gmmu_page_size_kernel] = &vm->kernel; | ||
2763 | if (!p->unify_address_spaces) | ||
2764 | vm->vma[gmmu_page_size_big] = &vm->user_lp; | ||
2765 | |||
2766 | vm->va_start = low_hole; | ||
2767 | vm->va_limit = aperture_size; | ||
2768 | vm->big_pages = big_pages; | ||
2769 | |||
2770 | vm->big_page_size = vm->gmmu_page_sizes[gmmu_page_size_big]; | ||
2771 | vm->userspace_managed = userspace_managed; | ||
2772 | vm->mmu_levels = g->ops.mm.get_mmu_levels(g, vm->big_page_size); | ||
2773 | |||
2774 | /* Initialize the page table data structures. */ | ||
2775 | err = init_vm_page_tables(vm); | ||
2776 | if (err) | ||
2777 | return err; | ||
2778 | |||
2779 | /* Setup vma limits. */ | ||
2780 | if (kernel_reserved + low_hole < aperture_size) { | ||
2781 | if (p->unify_address_spaces) { | ||
2782 | user_vma_start = low_hole; | ||
2783 | user_vma_limit = vm->va_limit - kernel_reserved; | ||
2784 | user_lp_vma_start = user_vma_limit; | ||
2785 | user_lp_vma_limit = user_vma_limit; | ||
2786 | } else { | ||
2787 | user_vma_start = low_hole; | ||
2788 | user_vma_limit = __nv_gmmu_va_small_page_limit(); | ||
2789 | user_lp_vma_start = __nv_gmmu_va_small_page_limit(); | ||
2790 | user_lp_vma_limit = vm->va_limit - kernel_reserved; | ||
2791 | } | ||
2792 | } else { | ||
2793 | user_vma_start = 0; | ||
2794 | user_vma_limit = 0; | ||
2795 | user_lp_vma_start = 0; | ||
2796 | user_lp_vma_limit = 0; | ||
2797 | } | ||
2798 | kernel_vma_start = vm->va_limit - kernel_reserved; | ||
2799 | kernel_vma_limit = vm->va_limit; | ||
2800 | |||
2801 | gk20a_dbg_info("user_vma [0x%llx,0x%llx)", | ||
2802 | user_vma_start, user_vma_limit); | ||
2803 | gk20a_dbg_info("user_lp_vma [0x%llx,0x%llx)", | ||
2804 | user_lp_vma_start, user_lp_vma_limit); | ||
2805 | gk20a_dbg_info("kernel_vma [0x%llx,0x%llx)", | ||
2806 | kernel_vma_start, kernel_vma_limit); | ||
2807 | |||
2808 | if (WARN_ON(user_vma_start > user_vma_limit) || | ||
2809 | WARN_ON(user_lp_vma_start > user_lp_vma_limit) || | ||
2810 | WARN_ON(kernel_vma_start >= kernel_vma_limit)) { | ||
2811 | err = -EINVAL; | ||
2812 | goto clean_up_page_tables; | ||
2813 | } | ||
2814 | |||
2815 | kernel_vma_flags = (kernel_reserved + low_hole) == aperture_size ? | ||
2816 | 0 : GPU_ALLOC_GVA_SPACE; | ||
2817 | |||
2818 | /* | ||
2819 | * A "user" area only makes sense for the GVA spaces. For VMs where | ||
2820 | * there is no "user" area user_vma_start will be equal to | ||
2821 | * user_vma_limit (i.e a 0 sized space). In such a situation the kernel | ||
2822 | * area must be non-zero in length. | ||
2823 | */ | ||
2824 | if (user_vma_start >= user_vma_limit && | ||
2825 | kernel_vma_start >= kernel_vma_limit) { | ||
2826 | err = -EINVAL; | ||
2827 | goto clean_up_page_tables; | ||
2828 | } | ||
2829 | |||
2830 | /* | ||
2831 | * Determine if big pages are possible in this VM. If a split address | ||
2832 | * space is used then check the user_lp vma instead of the user vma. | ||
2833 | */ | ||
2834 | if (p->unify_address_spaces) | ||
2835 | vm->big_pages = gk20a_big_pages_possible(vm, user_vma_start, | ||
2836 | user_vma_limit - user_vma_start); | ||
2837 | else | ||
2838 | vm->big_pages = gk20a_big_pages_possible(vm, user_lp_vma_start, | ||
2839 | user_lp_vma_limit - user_lp_vma_start); | ||
2840 | |||
2841 | /* | ||
2842 | * User VMA. | ||
2843 | */ | ||
2844 | if (user_vma_start < user_vma_limit) { | ||
2845 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s", name); | ||
2846 | err = __nvgpu_buddy_allocator_init(g, &vm->user, | ||
2847 | vm, alloc_name, | ||
2848 | user_vma_start, | ||
2849 | user_vma_limit - | ||
2850 | user_vma_start, | ||
2851 | SZ_4K, | ||
2852 | GPU_BALLOC_MAX_ORDER, | ||
2853 | GPU_ALLOC_GVA_SPACE); | ||
2854 | if (err) | ||
2855 | goto clean_up_page_tables; | ||
2856 | } else { | ||
2857 | /* | ||
2858 | * Make these allocator pointers point to the kernel allocator | ||
2859 | * since we still use the legacy notion of page size to choose | ||
2860 | * the allocator. | ||
2861 | */ | ||
2862 | vm->vma[0] = &vm->kernel; | ||
2863 | vm->vma[1] = &vm->kernel; | ||
2864 | } | ||
2865 | |||
2866 | /* | ||
2867 | * User VMA for large pages when a split address range is used. | ||
2868 | */ | ||
2869 | if (user_lp_vma_start < user_lp_vma_limit) { | ||
2870 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s_lp", name); | ||
2871 | err = __nvgpu_buddy_allocator_init(g, &vm->user_lp, | ||
2872 | vm, alloc_name, | ||
2873 | user_lp_vma_start, | ||
2874 | user_lp_vma_limit - | ||
2875 | user_lp_vma_start, | ||
2876 | vm->big_page_size, | ||
2877 | GPU_BALLOC_MAX_ORDER, | ||
2878 | GPU_ALLOC_GVA_SPACE); | ||
2879 | if (err) | ||
2880 | goto clean_up_allocators; | ||
2881 | } | ||
2882 | |||
2883 | /* | ||
2884 | * Kernel VMA. Must always exist for an address space. | ||
2885 | */ | ||
2886 | snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-sys", name); | ||
2887 | err = __nvgpu_buddy_allocator_init(g, &vm->kernel, | ||
2888 | vm, alloc_name, | ||
2889 | kernel_vma_start, | ||
2890 | kernel_vma_limit - kernel_vma_start, | ||
2891 | SZ_4K, | ||
2892 | GPU_BALLOC_MAX_ORDER, | ||
2893 | kernel_vma_flags); | ||
2894 | if (err) | ||
2895 | goto clean_up_allocators; | ||
2896 | |||
2897 | vm->mapped_buffers = NULL; | ||
2898 | |||
2899 | nvgpu_mutex_init(&vm->update_gmmu_lock); | ||
2900 | kref_init(&vm->ref); | ||
2901 | nvgpu_init_list_node(&vm->vm_area_list); | ||
2902 | |||
2903 | /* | ||
2904 | * This is only necessary for channel address spaces. The best way to | ||
2905 | * distinguish channel address spaces from other address spaces is by | ||
2906 | * size - if the address space is 4GB or less, it's not a channel. | ||
2907 | */ | ||
2908 | if (vm->va_limit > SZ_4G) { | ||
2909 | err = gk20a_init_sema_pool(vm); | ||
2910 | if (err) | ||
2911 | goto clean_up_allocators; | ||
2912 | } | ||
2913 | |||
2914 | return 0; | ||
2915 | |||
2916 | clean_up_allocators: | ||
2917 | if (nvgpu_alloc_initialized(&vm->kernel)) | ||
2918 | nvgpu_alloc_destroy(&vm->kernel); | ||
2919 | if (nvgpu_alloc_initialized(&vm->user)) | ||
2920 | nvgpu_alloc_destroy(&vm->user); | ||
2921 | if (nvgpu_alloc_initialized(&vm->user_lp)) | ||
2922 | nvgpu_alloc_destroy(&vm->user_lp); | ||
2923 | clean_up_page_tables: | ||
2924 | /* Cleans up init_vm_page_tables() */ | ||
2925 | nvgpu_vfree(g, vm->pdb.entries); | ||
2926 | free_gmmu_pages(vm, &vm->pdb); | ||
2927 | return err; | ||
2928 | } | ||
2929 | |||
2930 | /* address space interfaces for the gk20a module */ | 2451 | /* address space interfaces for the gk20a module */ |
2931 | int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size, | 2452 | int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size, |
2932 | u32 flags) | 2453 | u32 flags) |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 27681199..90a72811 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -456,8 +456,6 @@ const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, | |||
456 | void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *mem, | 456 | void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *mem, |
457 | struct vm_gk20a *vm); | 457 | struct vm_gk20a *vm); |
458 | 458 | ||
459 | int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size); | ||
460 | |||
461 | extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; | 459 | extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; |
462 | extern const struct gk20a_mmu_level gk20a_mm_levels_128k[]; | 460 | extern const struct gk20a_mmu_level gk20a_mm_levels_128k[]; |
463 | 461 | ||
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index 75dfcc86..6acea549 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c | |||
@@ -401,7 +401,7 @@ static void gp10b_remove_bar2_vm(struct gk20a *g) | |||
401 | struct mm_gk20a *mm = &g->mm; | 401 | struct mm_gk20a *mm = &g->mm; |
402 | 402 | ||
403 | gp10b_replayable_pagefault_buffer_deinit(g); | 403 | gp10b_replayable_pagefault_buffer_deinit(g); |
404 | nvgpu_remove_vm(&mm->bar2.vm, &mm->bar2.inst_block); | 404 | nvgpu_vm_remove(&mm->bar2.vm, &mm->bar2.inst_block); |
405 | } | 405 | } |
406 | 406 | ||
407 | 407 | ||
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h index 6d8aa025..45c5def4 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h | |||
@@ -63,6 +63,12 @@ struct gk20a_mmu_level { | |||
63 | size_t entry_size; | 63 | size_t entry_size; |
64 | }; | 64 | }; |
65 | 65 | ||
66 | int nvgpu_zalloc_gmmu_page_table(struct vm_gk20a *vm, | ||
67 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
68 | const struct gk20a_mmu_level *l, | ||
69 | struct gk20a_mm_entry *entry, | ||
70 | struct gk20a_mm_entry *prev_entry); | ||
71 | |||
66 | /** | 72 | /** |
67 | * nvgpu_gmmu_map - Map memory into the GMMU. | 73 | * nvgpu_gmmu_map - Map memory into the GMMU. |
68 | * | 74 | * |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h index fb55483d..c89282bf 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h | |||
@@ -181,6 +181,7 @@ void nvgpu_vm_get(struct vm_gk20a *vm); | |||
181 | void nvgpu_vm_put(struct vm_gk20a *vm); | 181 | void nvgpu_vm_put(struct vm_gk20a *vm); |
182 | 182 | ||
183 | int vm_aspace_id(struct vm_gk20a *vm); | 183 | int vm_aspace_id(struct vm_gk20a *vm); |
184 | int nvgpu_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size); | ||
184 | 185 | ||
185 | /* batching eliminates redundant cache flushes and invalidates */ | 186 | /* batching eliminates redundant cache flushes and invalidates */ |
186 | void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch); | 187 | void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch); |
@@ -194,7 +195,6 @@ void nvgpu_vm_mapping_batch_finish_locked( | |||
194 | int nvgpu_vm_get_buffers(struct vm_gk20a *vm, | 195 | int nvgpu_vm_get_buffers(struct vm_gk20a *vm, |
195 | struct nvgpu_mapped_buf ***mapped_buffers, | 196 | struct nvgpu_mapped_buf ***mapped_buffers, |
196 | int *num_buffers); | 197 | int *num_buffers); |
197 | |||
198 | /* put references on the given buffers */ | 198 | /* put references on the given buffers */ |
199 | void nvgpu_vm_put_buffers(struct vm_gk20a *vm, | 199 | void nvgpu_vm_put_buffers(struct vm_gk20a *vm, |
200 | struct nvgpu_mapped_buf **mapped_buffers, | 200 | struct nvgpu_mapped_buf **mapped_buffers, |
@@ -220,7 +220,6 @@ struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_less_than( | |||
220 | int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va, | 220 | int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va, |
221 | struct dma_buf **dmabuf, | 221 | struct dma_buf **dmabuf, |
222 | u64 *offset); | 222 | u64 *offset); |
223 | |||
224 | int nvgpu_insert_mapped_buf(struct vm_gk20a *vm, | 223 | int nvgpu_insert_mapped_buf(struct vm_gk20a *vm, |
225 | struct nvgpu_mapped_buf *mapped_buffer); | 224 | struct nvgpu_mapped_buf *mapped_buffer); |
226 | void nvgpu_remove_mapped_buf(struct vm_gk20a *vm, | 225 | void nvgpu_remove_mapped_buf(struct vm_gk20a *vm, |
@@ -228,8 +227,7 @@ void nvgpu_remove_mapped_buf(struct vm_gk20a *vm, | |||
228 | 227 | ||
229 | void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm); | 228 | void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm); |
230 | void nvgpu_vm_remove_support(struct vm_gk20a *vm); | 229 | void nvgpu_vm_remove_support(struct vm_gk20a *vm); |
231 | 230 | void nvgpu_vm_remove(struct vm_gk20a *vm, struct nvgpu_mem *inst_block); | |
232 | void nvgpu_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block); | ||
233 | 231 | ||
234 | int nvgpu_init_vm(struct mm_gk20a *mm, | 232 | int nvgpu_init_vm(struct mm_gk20a *mm, |
235 | struct vm_gk20a *vm, | 233 | struct vm_gk20a *vm, |
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index b8b5985c..63490aa5 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c | |||
@@ -364,7 +364,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, | |||
364 | if (user_vma_start < user_vma_limit) { | 364 | if (user_vma_start < user_vma_limit) { |
365 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, | 365 | snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, |
366 | gmmu_page_sizes[gmmu_page_size_small] >> 10); | 366 | gmmu_page_sizes[gmmu_page_size_small] >> 10); |
367 | if (!gk20a_big_pages_possible(vm, user_vma_start, | 367 | if (!nvgpu_big_pages_possible(vm, user_vma_start, |
368 | user_vma_limit - user_vma_start)) | 368 | user_vma_limit - user_vma_start)) |
369 | vm->big_pages = false; | 369 | vm->big_pages = false; |
370 | 370 | ||
@@ -391,7 +391,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, | |||
391 | 391 | ||
392 | snprintf(name, sizeof(name), "gk20a_as_%dKB-sys", | 392 | snprintf(name, sizeof(name), "gk20a_as_%dKB-sys", |
393 | gmmu_page_sizes[gmmu_page_size_kernel] >> 10); | 393 | gmmu_page_sizes[gmmu_page_size_kernel] >> 10); |
394 | if (!gk20a_big_pages_possible(vm, kernel_vma_start, | 394 | if (!nvgpu_big_pages_possible(vm, kernel_vma_start, |
395 | kernel_vma_limit - kernel_vma_start)) | 395 | kernel_vma_limit - kernel_vma_start)) |
396 | vm->big_pages = false; | 396 | vm->big_pages = false; |
397 | 397 | ||