diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm/page_allocator.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/page_allocator.c | 937 |
1 files changed, 937 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c new file mode 100644 index 00000000..c61b2238 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c | |||
@@ -0,0 +1,937 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/slab.h> | ||
19 | #include <linux/bitops.h> | ||
20 | #include <linux/mm.h> | ||
21 | |||
22 | #include <nvgpu/allocator.h> | ||
23 | #include <nvgpu/page_allocator.h> | ||
24 | |||
25 | #include "buddy_allocator_priv.h" | ||
26 | |||
27 | #define palloc_dbg(a, fmt, arg...) \ | ||
28 | alloc_dbg(palloc_owner(a), fmt, ##arg) | ||
29 | |||
30 | static struct kmem_cache *page_alloc_cache; | ||
31 | static struct kmem_cache *page_alloc_chunk_cache; | ||
32 | static struct kmem_cache *page_alloc_slab_page_cache; | ||
33 | static DEFINE_MUTEX(meta_data_cache_lock); | ||
34 | |||
35 | /* | ||
36 | * Handle the book-keeping for these operations. | ||
37 | */ | ||
38 | static inline void add_slab_page_to_empty(struct page_alloc_slab *slab, | ||
39 | struct page_alloc_slab_page *page) | ||
40 | { | ||
41 | BUG_ON(page->state != SP_NONE); | ||
42 | list_add(&page->list_entry, &slab->empty); | ||
43 | slab->nr_empty++; | ||
44 | page->state = SP_EMPTY; | ||
45 | } | ||
46 | static inline void add_slab_page_to_partial(struct page_alloc_slab *slab, | ||
47 | struct page_alloc_slab_page *page) | ||
48 | { | ||
49 | BUG_ON(page->state != SP_NONE); | ||
50 | list_add(&page->list_entry, &slab->partial); | ||
51 | slab->nr_partial++; | ||
52 | page->state = SP_PARTIAL; | ||
53 | } | ||
54 | static inline void add_slab_page_to_full(struct page_alloc_slab *slab, | ||
55 | struct page_alloc_slab_page *page) | ||
56 | { | ||
57 | BUG_ON(page->state != SP_NONE); | ||
58 | list_add(&page->list_entry, &slab->full); | ||
59 | slab->nr_full++; | ||
60 | page->state = SP_FULL; | ||
61 | } | ||
62 | |||
63 | static inline void del_slab_page_from_empty(struct page_alloc_slab *slab, | ||
64 | struct page_alloc_slab_page *page) | ||
65 | { | ||
66 | list_del_init(&page->list_entry); | ||
67 | slab->nr_empty--; | ||
68 | page->state = SP_NONE; | ||
69 | } | ||
70 | static inline void del_slab_page_from_partial(struct page_alloc_slab *slab, | ||
71 | struct page_alloc_slab_page *page) | ||
72 | { | ||
73 | list_del_init(&page->list_entry); | ||
74 | slab->nr_partial--; | ||
75 | page->state = SP_NONE; | ||
76 | } | ||
77 | static inline void del_slab_page_from_full(struct page_alloc_slab *slab, | ||
78 | struct page_alloc_slab_page *page) | ||
79 | { | ||
80 | list_del_init(&page->list_entry); | ||
81 | slab->nr_full--; | ||
82 | page->state = SP_NONE; | ||
83 | } | ||
84 | |||
85 | static u64 nvgpu_page_alloc_length(struct nvgpu_allocator *a) | ||
86 | { | ||
87 | struct nvgpu_page_allocator *va = a->priv; | ||
88 | |||
89 | return nvgpu_alloc_length(&va->source_allocator); | ||
90 | } | ||
91 | |||
92 | static u64 nvgpu_page_alloc_base(struct nvgpu_allocator *a) | ||
93 | { | ||
94 | struct nvgpu_page_allocator *va = a->priv; | ||
95 | |||
96 | return nvgpu_alloc_base(&va->source_allocator); | ||
97 | } | ||
98 | |||
99 | static int nvgpu_page_alloc_inited(struct nvgpu_allocator *a) | ||
100 | { | ||
101 | struct nvgpu_page_allocator *va = a->priv; | ||
102 | |||
103 | return nvgpu_alloc_initialized(&va->source_allocator); | ||
104 | } | ||
105 | |||
106 | static u64 nvgpu_page_alloc_end(struct nvgpu_allocator *a) | ||
107 | { | ||
108 | struct nvgpu_page_allocator *va = a->priv; | ||
109 | |||
110 | return nvgpu_alloc_end(&va->source_allocator); | ||
111 | } | ||
112 | |||
113 | static u64 nvgpu_page_alloc_space(struct nvgpu_allocator *a) | ||
114 | { | ||
115 | struct nvgpu_page_allocator *va = a->priv; | ||
116 | |||
117 | return nvgpu_alloc_space(&va->source_allocator); | ||
118 | } | ||
119 | |||
120 | static int nvgpu_page_reserve_co(struct nvgpu_allocator *a, | ||
121 | struct nvgpu_alloc_carveout *co) | ||
122 | { | ||
123 | struct nvgpu_page_allocator *va = a->priv; | ||
124 | |||
125 | return nvgpu_alloc_reserve_carveout(&va->source_allocator, co); | ||
126 | } | ||
127 | |||
128 | static void nvgpu_page_release_co(struct nvgpu_allocator *a, | ||
129 | struct nvgpu_alloc_carveout *co) | ||
130 | { | ||
131 | struct nvgpu_page_allocator *va = a->priv; | ||
132 | |||
133 | nvgpu_alloc_release_carveout(&va->source_allocator, co); | ||
134 | } | ||
135 | |||
136 | static void __nvgpu_free_pages(struct nvgpu_page_allocator *a, | ||
137 | struct nvgpu_page_alloc *alloc, | ||
138 | bool free_buddy_alloc) | ||
139 | { | ||
140 | struct page_alloc_chunk *chunk; | ||
141 | |||
142 | while (!list_empty(&alloc->alloc_chunks)) { | ||
143 | chunk = list_first_entry(&alloc->alloc_chunks, | ||
144 | struct page_alloc_chunk, | ||
145 | list_entry); | ||
146 | list_del(&chunk->list_entry); | ||
147 | |||
148 | if (free_buddy_alloc) | ||
149 | nvgpu_free(&a->source_allocator, chunk->base); | ||
150 | kfree(chunk); | ||
151 | } | ||
152 | |||
153 | kfree(alloc); | ||
154 | } | ||
155 | |||
156 | static int __insert_page_alloc(struct nvgpu_page_allocator *a, | ||
157 | struct nvgpu_page_alloc *alloc) | ||
158 | { | ||
159 | struct rb_node **new = &a->allocs.rb_node; | ||
160 | struct rb_node *parent = NULL; | ||
161 | |||
162 | while (*new) { | ||
163 | struct nvgpu_page_alloc *tmp = | ||
164 | container_of(*new, struct nvgpu_page_alloc, | ||
165 | tree_entry); | ||
166 | |||
167 | parent = *new; | ||
168 | if (alloc->base < tmp->base) { | ||
169 | new = &((*new)->rb_left); | ||
170 | } else if (alloc->base > tmp->base) { | ||
171 | new = &((*new)->rb_right); | ||
172 | } else { | ||
173 | WARN(1, "Duplicate entries in allocated list!\n"); | ||
174 | return 0; | ||
175 | } | ||
176 | } | ||
177 | |||
178 | rb_link_node(&alloc->tree_entry, parent, new); | ||
179 | rb_insert_color(&alloc->tree_entry, &a->allocs); | ||
180 | |||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | static struct nvgpu_page_alloc *__find_page_alloc( | ||
185 | struct nvgpu_page_allocator *a, | ||
186 | u64 addr) | ||
187 | { | ||
188 | struct rb_node *node = a->allocs.rb_node; | ||
189 | struct nvgpu_page_alloc *alloc; | ||
190 | |||
191 | while (node) { | ||
192 | alloc = container_of(node, struct nvgpu_page_alloc, tree_entry); | ||
193 | |||
194 | if (addr < alloc->base) | ||
195 | node = node->rb_left; | ||
196 | else if (addr > alloc->base) | ||
197 | node = node->rb_right; | ||
198 | else | ||
199 | break; | ||
200 | } | ||
201 | |||
202 | if (!node) | ||
203 | return NULL; | ||
204 | |||
205 | rb_erase(node, &a->allocs); | ||
206 | |||
207 | return alloc; | ||
208 | } | ||
209 | |||
210 | static struct page_alloc_slab_page *alloc_slab_page( | ||
211 | struct nvgpu_page_allocator *a, | ||
212 | struct page_alloc_slab *slab) | ||
213 | { | ||
214 | struct page_alloc_slab_page *slab_page; | ||
215 | |||
216 | slab_page = kmem_cache_alloc(page_alloc_slab_page_cache, GFP_KERNEL); | ||
217 | if (!slab_page) { | ||
218 | palloc_dbg(a, "OOM: unable to alloc slab_page struct!\n"); | ||
219 | return ERR_PTR(-ENOMEM); | ||
220 | } | ||
221 | |||
222 | memset(slab_page, 0, sizeof(*slab_page)); | ||
223 | |||
224 | slab_page->page_addr = nvgpu_alloc(&a->source_allocator, a->page_size); | ||
225 | if (!slab_page->page_addr) { | ||
226 | kfree(slab_page); | ||
227 | palloc_dbg(a, "OOM: vidmem is full!\n"); | ||
228 | return ERR_PTR(-ENOMEM); | ||
229 | } | ||
230 | |||
231 | INIT_LIST_HEAD(&slab_page->list_entry); | ||
232 | slab_page->slab_size = slab->slab_size; | ||
233 | slab_page->nr_objects = (u32)a->page_size / slab->slab_size; | ||
234 | slab_page->nr_objects_alloced = 0; | ||
235 | slab_page->owner = slab; | ||
236 | slab_page->state = SP_NONE; | ||
237 | |||
238 | a->pages_alloced++; | ||
239 | |||
240 | palloc_dbg(a, "Allocated new slab page @ 0x%012llx size=%u\n", | ||
241 | slab_page->page_addr, slab_page->slab_size); | ||
242 | |||
243 | return slab_page; | ||
244 | } | ||
245 | |||
246 | static void free_slab_page(struct nvgpu_page_allocator *a, | ||
247 | struct page_alloc_slab_page *slab_page) | ||
248 | { | ||
249 | palloc_dbg(a, "Freeing slab page @ 0x%012llx\n", slab_page->page_addr); | ||
250 | |||
251 | BUG_ON((slab_page->state != SP_NONE && slab_page->state != SP_EMPTY) || | ||
252 | slab_page->nr_objects_alloced != 0 || | ||
253 | slab_page->bitmap != 0); | ||
254 | |||
255 | nvgpu_free(&a->source_allocator, slab_page->page_addr); | ||
256 | a->pages_freed++; | ||
257 | |||
258 | kmem_cache_free(page_alloc_slab_page_cache, slab_page); | ||
259 | } | ||
260 | |||
261 | /* | ||
262 | * This expects @alloc to have 1 empty page_alloc_chunk already added to the | ||
263 | * alloc_chunks list. | ||
264 | */ | ||
265 | static int __do_slab_alloc(struct nvgpu_page_allocator *a, | ||
266 | struct page_alloc_slab *slab, | ||
267 | struct nvgpu_page_alloc *alloc) | ||
268 | { | ||
269 | struct page_alloc_slab_page *slab_page = NULL; | ||
270 | struct page_alloc_chunk *chunk; | ||
271 | unsigned long offs; | ||
272 | |||
273 | /* | ||
274 | * Check the partial and empty lists to see if we have some space | ||
275 | * readily available. Take the slab_page out of what ever list it | ||
276 | * was in since it may be put back into a different list later. | ||
277 | */ | ||
278 | if (!list_empty(&slab->partial)) { | ||
279 | slab_page = list_first_entry(&slab->partial, | ||
280 | struct page_alloc_slab_page, | ||
281 | list_entry); | ||
282 | del_slab_page_from_partial(slab, slab_page); | ||
283 | } else if (!list_empty(&slab->empty)) { | ||
284 | slab_page = list_first_entry(&slab->empty, | ||
285 | struct page_alloc_slab_page, | ||
286 | list_entry); | ||
287 | del_slab_page_from_empty(slab, slab_page); | ||
288 | } | ||
289 | |||
290 | if (!slab_page) { | ||
291 | slab_page = alloc_slab_page(a, slab); | ||
292 | if (IS_ERR(slab_page)) | ||
293 | return PTR_ERR(slab_page); | ||
294 | } | ||
295 | |||
296 | /* | ||
297 | * We now have a slab_page. Do the alloc. | ||
298 | */ | ||
299 | offs = bitmap_find_next_zero_area(&slab_page->bitmap, | ||
300 | slab_page->nr_objects, | ||
301 | 0, 1, 0); | ||
302 | if (offs >= slab_page->nr_objects) { | ||
303 | WARN(1, "Empty/partial slab with no free objects?"); | ||
304 | |||
305 | /* Add the buggy page to the full list... This isn't ideal. */ | ||
306 | add_slab_page_to_full(slab, slab_page); | ||
307 | return -ENOMEM; | ||
308 | } | ||
309 | |||
310 | bitmap_set(&slab_page->bitmap, offs, 1); | ||
311 | slab_page->nr_objects_alloced++; | ||
312 | |||
313 | if (slab_page->nr_objects_alloced < slab_page->nr_objects) | ||
314 | add_slab_page_to_partial(slab, slab_page); | ||
315 | else if (slab_page->nr_objects_alloced == slab_page->nr_objects) | ||
316 | add_slab_page_to_full(slab, slab_page); | ||
317 | else | ||
318 | BUG(); /* Should be impossible to hit this. */ | ||
319 | |||
320 | /* | ||
321 | * Handle building the nvgpu_page_alloc struct. We expect one | ||
322 | * page_alloc_chunk to be present. | ||
323 | */ | ||
324 | alloc->slab_page = slab_page; | ||
325 | alloc->nr_chunks = 1; | ||
326 | alloc->length = slab_page->slab_size; | ||
327 | alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); | ||
328 | |||
329 | chunk = list_first_entry(&alloc->alloc_chunks, | ||
330 | struct page_alloc_chunk, list_entry); | ||
331 | chunk->base = alloc->base; | ||
332 | chunk->length = alloc->length; | ||
333 | |||
334 | return 0; | ||
335 | } | ||
336 | |||
337 | /* | ||
338 | * Allocate from a slab instead of directly from the page allocator. | ||
339 | */ | ||
340 | static struct nvgpu_page_alloc *__nvgpu_alloc_slab( | ||
341 | struct nvgpu_page_allocator *a, u64 len) | ||
342 | { | ||
343 | int err, slab_nr; | ||
344 | struct page_alloc_slab *slab; | ||
345 | struct nvgpu_page_alloc *alloc = NULL; | ||
346 | struct page_alloc_chunk *chunk = NULL; | ||
347 | |||
348 | /* | ||
349 | * Align the length to a page and then divide by the page size (4k for | ||
350 | * this code). ilog2() of that then gets us the correct slab to use. | ||
351 | */ | ||
352 | slab_nr = (int)ilog2(PAGE_ALIGN(len) >> 12); | ||
353 | slab = &a->slabs[slab_nr]; | ||
354 | |||
355 | alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL); | ||
356 | if (!alloc) { | ||
357 | palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); | ||
358 | goto fail; | ||
359 | } | ||
360 | chunk = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL); | ||
361 | if (!chunk) { | ||
362 | palloc_dbg(a, "OOM: could not alloc alloc_chunk struct!\n"); | ||
363 | goto fail; | ||
364 | } | ||
365 | |||
366 | INIT_LIST_HEAD(&alloc->alloc_chunks); | ||
367 | list_add(&chunk->list_entry, &alloc->alloc_chunks); | ||
368 | |||
369 | err = __do_slab_alloc(a, slab, alloc); | ||
370 | if (err) | ||
371 | goto fail; | ||
372 | |||
373 | palloc_dbg(a, "Alloc 0x%04llx sr=%d id=0x%010llx [slab]\n", | ||
374 | len, slab_nr, alloc->base); | ||
375 | a->nr_slab_allocs++; | ||
376 | |||
377 | return alloc; | ||
378 | |||
379 | fail: | ||
380 | kfree(alloc); | ||
381 | kfree(chunk); | ||
382 | return NULL; | ||
383 | } | ||
384 | |||
385 | static void __nvgpu_free_slab(struct nvgpu_page_allocator *a, | ||
386 | struct nvgpu_page_alloc *alloc) | ||
387 | { | ||
388 | struct page_alloc_slab_page *slab_page = alloc->slab_page; | ||
389 | struct page_alloc_slab *slab = slab_page->owner; | ||
390 | enum slab_page_state new_state; | ||
391 | int offs; | ||
392 | |||
393 | offs = (u32)(alloc->base - slab_page->page_addr) / slab_page->slab_size; | ||
394 | bitmap_clear(&slab_page->bitmap, offs, 1); | ||
395 | |||
396 | slab_page->nr_objects_alloced--; | ||
397 | |||
398 | if (slab_page->nr_objects_alloced == 0) | ||
399 | new_state = SP_EMPTY; | ||
400 | else | ||
401 | new_state = SP_PARTIAL; | ||
402 | |||
403 | /* | ||
404 | * Need to migrate the page to a different list. | ||
405 | */ | ||
406 | if (new_state != slab_page->state) { | ||
407 | /* Delete - can't be in empty. */ | ||
408 | if (slab_page->state == SP_PARTIAL) | ||
409 | del_slab_page_from_partial(slab, slab_page); | ||
410 | else | ||
411 | del_slab_page_from_full(slab, slab_page); | ||
412 | |||
413 | /* And add. */ | ||
414 | if (new_state == SP_EMPTY) { | ||
415 | if (list_empty(&slab->empty)) | ||
416 | add_slab_page_to_empty(slab, slab_page); | ||
417 | else | ||
418 | free_slab_page(a, slab_page); | ||
419 | } else { | ||
420 | add_slab_page_to_partial(slab, slab_page); | ||
421 | } | ||
422 | } | ||
423 | |||
424 | /* | ||
425 | * Now handle the page_alloc. | ||
426 | */ | ||
427 | __nvgpu_free_pages(a, alloc, false); | ||
428 | a->nr_slab_frees++; | ||
429 | |||
430 | return; | ||
431 | } | ||
432 | |||
433 | /* | ||
434 | * Allocate physical pages. Since the underlying allocator is a buddy allocator | ||
435 | * the returned pages are always contiguous. However, since there could be | ||
436 | * fragmentation in the space this allocator will collate smaller non-contiguous | ||
437 | * allocations together if necessary. | ||
438 | */ | ||
439 | static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages( | ||
440 | struct nvgpu_page_allocator *a, u64 pages) | ||
441 | { | ||
442 | struct nvgpu_page_alloc *alloc; | ||
443 | struct page_alloc_chunk *c; | ||
444 | u64 max_chunk_len = pages << a->page_shift; | ||
445 | int i = 0; | ||
446 | |||
447 | alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL); | ||
448 | if (!alloc) | ||
449 | goto fail; | ||
450 | |||
451 | memset(alloc, 0, sizeof(*alloc)); | ||
452 | |||
453 | INIT_LIST_HEAD(&alloc->alloc_chunks); | ||
454 | alloc->length = pages << a->page_shift; | ||
455 | |||
456 | while (pages) { | ||
457 | u64 chunk_addr = 0; | ||
458 | u64 chunk_pages = (u64)1 << __fls(pages); | ||
459 | u64 chunk_len = chunk_pages << a->page_shift; | ||
460 | |||
461 | /* | ||
462 | * Take care of the possibility that the allocation must be | ||
463 | * contiguous. If this is not the first iteration then that | ||
464 | * means the first iteration failed to alloc the entire | ||
465 | * requested size. The buddy allocator guarantees any given | ||
466 | * single alloc is contiguous. | ||
467 | */ | ||
468 | if (a->flags & GPU_ALLOC_FORCE_CONTIG && i != 0) | ||
469 | goto fail_cleanup; | ||
470 | |||
471 | if (chunk_len > max_chunk_len) | ||
472 | chunk_len = max_chunk_len; | ||
473 | |||
474 | /* | ||
475 | * Keep attempting to allocate in smaller chunks until the alloc | ||
476 | * either succeeds or is smaller than the page_size of the | ||
477 | * allocator (i.e the allocator is OOM). | ||
478 | */ | ||
479 | do { | ||
480 | chunk_addr = nvgpu_alloc(&a->source_allocator, | ||
481 | chunk_len); | ||
482 | |||
483 | /* Divide by 2 and try again */ | ||
484 | if (!chunk_addr) { | ||
485 | palloc_dbg(a, "balloc failed: 0x%llx\n", | ||
486 | chunk_len); | ||
487 | chunk_len >>= 1; | ||
488 | max_chunk_len = chunk_len; | ||
489 | } | ||
490 | } while (!chunk_addr && chunk_len >= a->page_size); | ||
491 | |||
492 | chunk_pages = chunk_len >> a->page_shift; | ||
493 | |||
494 | if (!chunk_addr) { | ||
495 | palloc_dbg(a, "bailing @ 0x%llx\n", chunk_len); | ||
496 | goto fail_cleanup; | ||
497 | } | ||
498 | |||
499 | c = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL); | ||
500 | if (!c) { | ||
501 | nvgpu_free(&a->source_allocator, chunk_addr); | ||
502 | goto fail_cleanup; | ||
503 | } | ||
504 | |||
505 | pages -= chunk_pages; | ||
506 | |||
507 | c->base = chunk_addr; | ||
508 | c->length = chunk_len; | ||
509 | list_add(&c->list_entry, &alloc->alloc_chunks); | ||
510 | |||
511 | i++; | ||
512 | } | ||
513 | |||
514 | alloc->nr_chunks = i; | ||
515 | c = list_first_entry(&alloc->alloc_chunks, | ||
516 | struct page_alloc_chunk, list_entry); | ||
517 | alloc->base = c->base; | ||
518 | |||
519 | return alloc; | ||
520 | |||
521 | fail_cleanup: | ||
522 | while (!list_empty(&alloc->alloc_chunks)) { | ||
523 | c = list_first_entry(&alloc->alloc_chunks, | ||
524 | struct page_alloc_chunk, list_entry); | ||
525 | list_del(&c->list_entry); | ||
526 | nvgpu_free(&a->source_allocator, c->base); | ||
527 | kfree(c); | ||
528 | } | ||
529 | kfree(alloc); | ||
530 | fail: | ||
531 | return ERR_PTR(-ENOMEM); | ||
532 | } | ||
533 | |||
534 | static struct nvgpu_page_alloc *__nvgpu_alloc_pages( | ||
535 | struct nvgpu_page_allocator *a, u64 len) | ||
536 | { | ||
537 | struct nvgpu_page_alloc *alloc = NULL; | ||
538 | struct page_alloc_chunk *c; | ||
539 | u64 pages; | ||
540 | int i = 0; | ||
541 | |||
542 | pages = ALIGN(len, a->page_size) >> a->page_shift; | ||
543 | |||
544 | alloc = __do_nvgpu_alloc_pages(a, pages); | ||
545 | if (IS_ERR(alloc)) { | ||
546 | palloc_dbg(a, "Alloc 0x%llx (%llu) (failed)\n", | ||
547 | pages << a->page_shift, pages); | ||
548 | return NULL; | ||
549 | } | ||
550 | |||
551 | palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", | ||
552 | pages << a->page_shift, pages, alloc->base); | ||
553 | list_for_each_entry(c, &alloc->alloc_chunks, list_entry) { | ||
554 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", | ||
555 | i++, c->base, c->length); | ||
556 | } | ||
557 | |||
558 | return alloc; | ||
559 | } | ||
560 | |||
561 | /* | ||
562 | * Allocate enough pages to satisfy @len. Page size is determined at | ||
563 | * initialization of the allocator. | ||
564 | * | ||
565 | * The return is actually a pointer to a struct nvgpu_page_alloc pointer. This | ||
566 | * is because it doesn't make a lot of sense to return the address of the first | ||
567 | * page in the list of pages (since they could be discontiguous). This has | ||
568 | * precedent in the dma_alloc APIs, though, it's really just an annoying | ||
569 | * artifact of the fact that the nvgpu_alloc() API requires a u64 return type. | ||
570 | */ | ||
571 | static u64 nvgpu_page_alloc(struct nvgpu_allocator *__a, u64 len) | ||
572 | { | ||
573 | struct nvgpu_page_allocator *a = page_allocator(__a); | ||
574 | struct nvgpu_page_alloc *alloc = NULL; | ||
575 | u64 real_len; | ||
576 | |||
577 | /* | ||
578 | * If we want contig pages we have to round up to a power of two. It's | ||
579 | * easier to do that here than in the buddy allocator. | ||
580 | */ | ||
581 | real_len = a->flags & GPU_ALLOC_FORCE_CONTIG ? | ||
582 | roundup_pow_of_two(len) : len; | ||
583 | |||
584 | alloc_lock(__a); | ||
585 | if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES && | ||
586 | real_len <= (a->page_size / 2)) | ||
587 | alloc = __nvgpu_alloc_slab(a, real_len); | ||
588 | else | ||
589 | alloc = __nvgpu_alloc_pages(a, real_len); | ||
590 | |||
591 | if (!alloc) { | ||
592 | alloc_unlock(__a); | ||
593 | return 0; | ||
594 | } | ||
595 | |||
596 | __insert_page_alloc(a, alloc); | ||
597 | |||
598 | a->nr_allocs++; | ||
599 | if (real_len > a->page_size / 2) | ||
600 | a->pages_alloced += alloc->length >> a->page_shift; | ||
601 | alloc_unlock(__a); | ||
602 | |||
603 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) | ||
604 | return alloc->base; | ||
605 | else | ||
606 | return (u64) (uintptr_t) alloc; | ||
607 | } | ||
608 | |||
609 | /* | ||
610 | * Note: this will remove the nvgpu_page_alloc struct from the RB tree | ||
611 | * if it's found. | ||
612 | */ | ||
613 | static void nvgpu_page_free(struct nvgpu_allocator *__a, u64 base) | ||
614 | { | ||
615 | struct nvgpu_page_allocator *a = page_allocator(__a); | ||
616 | struct nvgpu_page_alloc *alloc; | ||
617 | |||
618 | alloc_lock(__a); | ||
619 | |||
620 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) | ||
621 | alloc = __find_page_alloc(a, base); | ||
622 | else | ||
623 | alloc = __find_page_alloc(a, | ||
624 | ((struct nvgpu_page_alloc *)(uintptr_t)base)->base); | ||
625 | |||
626 | if (!alloc) { | ||
627 | palloc_dbg(a, "Hrm, found no alloc?\n"); | ||
628 | goto done; | ||
629 | } | ||
630 | |||
631 | a->nr_frees++; | ||
632 | |||
633 | palloc_dbg(a, "Free 0x%llx id=0x%010llx\n", | ||
634 | alloc->length, alloc->base); | ||
635 | |||
636 | /* | ||
637 | * Frees *alloc. | ||
638 | */ | ||
639 | if (alloc->slab_page) { | ||
640 | __nvgpu_free_slab(a, alloc); | ||
641 | } else { | ||
642 | a->pages_freed += (alloc->length >> a->page_shift); | ||
643 | __nvgpu_free_pages(a, alloc, true); | ||
644 | } | ||
645 | |||
646 | done: | ||
647 | alloc_unlock(__a); | ||
648 | } | ||
649 | |||
650 | static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( | ||
651 | struct nvgpu_page_allocator *a, u64 base, u64 length) | ||
652 | { | ||
653 | struct nvgpu_page_alloc *alloc; | ||
654 | struct page_alloc_chunk *c; | ||
655 | |||
656 | alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL); | ||
657 | c = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL); | ||
658 | if (!alloc || !c) | ||
659 | goto fail; | ||
660 | |||
661 | alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length); | ||
662 | if (!alloc->base) { | ||
663 | WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base); | ||
664 | goto fail; | ||
665 | } | ||
666 | |||
667 | alloc->nr_chunks = 1; | ||
668 | alloc->length = length; | ||
669 | INIT_LIST_HEAD(&alloc->alloc_chunks); | ||
670 | |||
671 | c->base = alloc->base; | ||
672 | c->length = length; | ||
673 | list_add(&c->list_entry, &alloc->alloc_chunks); | ||
674 | |||
675 | return alloc; | ||
676 | |||
677 | fail: | ||
678 | kfree(c); | ||
679 | kfree(alloc); | ||
680 | return ERR_PTR(-ENOMEM); | ||
681 | } | ||
682 | |||
683 | static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, | ||
684 | u64 base, u64 len) | ||
685 | { | ||
686 | struct nvgpu_page_allocator *a = page_allocator(__a); | ||
687 | struct nvgpu_page_alloc *alloc = NULL; | ||
688 | struct page_alloc_chunk *c; | ||
689 | u64 aligned_len, pages; | ||
690 | int i = 0; | ||
691 | |||
692 | aligned_len = ALIGN(len, a->page_size); | ||
693 | pages = aligned_len >> a->page_shift; | ||
694 | |||
695 | alloc_lock(__a); | ||
696 | |||
697 | alloc = __nvgpu_alloc_pages_fixed(a, base, aligned_len); | ||
698 | if (IS_ERR(alloc)) { | ||
699 | alloc_unlock(__a); | ||
700 | return 0; | ||
701 | } | ||
702 | |||
703 | __insert_page_alloc(a, alloc); | ||
704 | alloc_unlock(__a); | ||
705 | |||
706 | palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", | ||
707 | alloc->base, aligned_len, pages); | ||
708 | list_for_each_entry(c, &alloc->alloc_chunks, list_entry) { | ||
709 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", | ||
710 | i++, c->base, c->length); | ||
711 | } | ||
712 | |||
713 | a->nr_fixed_allocs++; | ||
714 | a->pages_alloced += pages; | ||
715 | |||
716 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) | ||
717 | return alloc->base; | ||
718 | else | ||
719 | return (u64) (uintptr_t) alloc; | ||
720 | } | ||
721 | |||
722 | static void nvgpu_page_free_fixed(struct nvgpu_allocator *__a, | ||
723 | u64 base, u64 len) | ||
724 | { | ||
725 | struct nvgpu_page_allocator *a = page_allocator(__a); | ||
726 | struct nvgpu_page_alloc *alloc; | ||
727 | |||
728 | alloc_lock(__a); | ||
729 | |||
730 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) { | ||
731 | alloc = __find_page_alloc(a, base); | ||
732 | if (!alloc) | ||
733 | goto done; | ||
734 | } else { | ||
735 | alloc = (struct nvgpu_page_alloc *) (uintptr_t) base; | ||
736 | } | ||
737 | |||
738 | palloc_dbg(a, "Free [fixed] 0x%010llx + 0x%llx\n", | ||
739 | alloc->base, alloc->length); | ||
740 | |||
741 | a->nr_fixed_frees++; | ||
742 | a->pages_freed += (alloc->length >> a->page_shift); | ||
743 | |||
744 | /* | ||
745 | * This works for the time being since the buddy allocator | ||
746 | * uses the same free function for both fixed and regular | ||
747 | * allocs. This would have to be updated if the underlying | ||
748 | * allocator were to change. | ||
749 | */ | ||
750 | __nvgpu_free_pages(a, alloc, true); | ||
751 | |||
752 | done: | ||
753 | alloc_unlock(__a); | ||
754 | } | ||
755 | |||
756 | static void nvgpu_page_allocator_destroy(struct nvgpu_allocator *__a) | ||
757 | { | ||
758 | struct nvgpu_page_allocator *a = page_allocator(__a); | ||
759 | |||
760 | alloc_lock(__a); | ||
761 | kfree(a); | ||
762 | __a->priv = NULL; | ||
763 | alloc_unlock(__a); | ||
764 | } | ||
765 | |||
766 | static void nvgpu_page_print_stats(struct nvgpu_allocator *__a, | ||
767 | struct seq_file *s, int lock) | ||
768 | { | ||
769 | struct nvgpu_page_allocator *a = page_allocator(__a); | ||
770 | int i; | ||
771 | |||
772 | if (lock) | ||
773 | alloc_lock(__a); | ||
774 | |||
775 | __alloc_pstat(s, __a, "Page allocator:\n"); | ||
776 | __alloc_pstat(s, __a, " allocs %lld\n", a->nr_allocs); | ||
777 | __alloc_pstat(s, __a, " frees %lld\n", a->nr_frees); | ||
778 | __alloc_pstat(s, __a, " fixed_allocs %lld\n", a->nr_fixed_allocs); | ||
779 | __alloc_pstat(s, __a, " fixed_frees %lld\n", a->nr_fixed_frees); | ||
780 | __alloc_pstat(s, __a, " slab_allocs %lld\n", a->nr_slab_allocs); | ||
781 | __alloc_pstat(s, __a, " slab_frees %lld\n", a->nr_slab_frees); | ||
782 | __alloc_pstat(s, __a, " pages alloced %lld\n", a->pages_alloced); | ||
783 | __alloc_pstat(s, __a, " pages freed %lld\n", a->pages_freed); | ||
784 | __alloc_pstat(s, __a, "\n"); | ||
785 | |||
786 | /* | ||
787 | * Slab info. | ||
788 | */ | ||
789 | if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES) { | ||
790 | __alloc_pstat(s, __a, "Slabs:\n"); | ||
791 | __alloc_pstat(s, __a, " size empty partial full\n"); | ||
792 | __alloc_pstat(s, __a, " ---- ----- ------- ----\n"); | ||
793 | |||
794 | for (i = 0; i < a->nr_slabs; i++) { | ||
795 | struct page_alloc_slab *slab = &a->slabs[i]; | ||
796 | |||
797 | __alloc_pstat(s, __a, " %-9u %-9d %-9u %u\n", | ||
798 | slab->slab_size, | ||
799 | slab->nr_empty, slab->nr_partial, | ||
800 | slab->nr_full); | ||
801 | } | ||
802 | __alloc_pstat(s, __a, "\n"); | ||
803 | } | ||
804 | |||
805 | __alloc_pstat(s, __a, "Source alloc: %s\n", | ||
806 | a->source_allocator.name); | ||
807 | nvgpu_alloc_print_stats(&a->source_allocator, s, lock); | ||
808 | |||
809 | if (lock) | ||
810 | alloc_unlock(__a); | ||
811 | } | ||
812 | |||
813 | static const struct nvgpu_allocator_ops page_ops = { | ||
814 | .alloc = nvgpu_page_alloc, | ||
815 | .free = nvgpu_page_free, | ||
816 | |||
817 | .alloc_fixed = nvgpu_page_alloc_fixed, | ||
818 | .free_fixed = nvgpu_page_free_fixed, | ||
819 | |||
820 | .reserve_carveout = nvgpu_page_reserve_co, | ||
821 | .release_carveout = nvgpu_page_release_co, | ||
822 | |||
823 | .base = nvgpu_page_alloc_base, | ||
824 | .length = nvgpu_page_alloc_length, | ||
825 | .end = nvgpu_page_alloc_end, | ||
826 | .inited = nvgpu_page_alloc_inited, | ||
827 | .space = nvgpu_page_alloc_space, | ||
828 | |||
829 | .fini = nvgpu_page_allocator_destroy, | ||
830 | |||
831 | .print_stats = nvgpu_page_print_stats, | ||
832 | }; | ||
833 | |||
834 | /* | ||
835 | * nr_slabs is computed as follows: divide page_size by 4096 to get number of | ||
836 | * 4k pages in page_size. Then take the base 2 log of that to get number of | ||
837 | * slabs. For 64k page_size that works on like: | ||
838 | * | ||
839 | * 1024*64 / 1024*4 = 16 | ||
840 | * ilog2(16) = 4 | ||
841 | * | ||
842 | * That gives buckets of 1, 2, 4, and 8 pages (i.e 4k, 8k, 16k, 32k). | ||
843 | */ | ||
844 | static int nvgpu_page_alloc_init_slabs(struct nvgpu_page_allocator *a) | ||
845 | { | ||
846 | size_t nr_slabs = ilog2(a->page_size >> 12); | ||
847 | unsigned int i; | ||
848 | |||
849 | a->slabs = kcalloc(nr_slabs, | ||
850 | sizeof(struct page_alloc_slab), | ||
851 | GFP_KERNEL); | ||
852 | if (!a->slabs) | ||
853 | return -ENOMEM; | ||
854 | a->nr_slabs = nr_slabs; | ||
855 | |||
856 | for (i = 0; i < nr_slabs; i++) { | ||
857 | struct page_alloc_slab *slab = &a->slabs[i]; | ||
858 | |||
859 | slab->slab_size = SZ_4K * (1 << i); | ||
860 | INIT_LIST_HEAD(&slab->empty); | ||
861 | INIT_LIST_HEAD(&slab->partial); | ||
862 | INIT_LIST_HEAD(&slab->full); | ||
863 | slab->nr_empty = 0; | ||
864 | slab->nr_partial = 0; | ||
865 | slab->nr_full = 0; | ||
866 | } | ||
867 | |||
868 | return 0; | ||
869 | } | ||
870 | |||
871 | int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, | ||
872 | const char *name, u64 base, u64 length, | ||
873 | u64 blk_size, u64 flags) | ||
874 | { | ||
875 | struct nvgpu_page_allocator *a; | ||
876 | char buddy_name[sizeof(__a->name)]; | ||
877 | int err; | ||
878 | |||
879 | mutex_lock(&meta_data_cache_lock); | ||
880 | if (!page_alloc_cache) | ||
881 | page_alloc_cache = KMEM_CACHE(nvgpu_page_alloc, 0); | ||
882 | if (!page_alloc_chunk_cache) | ||
883 | page_alloc_chunk_cache = KMEM_CACHE(page_alloc_chunk, 0); | ||
884 | if (!page_alloc_slab_page_cache) | ||
885 | page_alloc_slab_page_cache = | ||
886 | KMEM_CACHE(page_alloc_slab_page, 0); | ||
887 | mutex_unlock(&meta_data_cache_lock); | ||
888 | |||
889 | if (!page_alloc_cache || !page_alloc_chunk_cache) | ||
890 | return -ENOMEM; | ||
891 | |||
892 | if (blk_size < SZ_4K) | ||
893 | return -EINVAL; | ||
894 | |||
895 | a = kzalloc(sizeof(struct nvgpu_page_allocator), GFP_KERNEL); | ||
896 | if (!a) | ||
897 | return -ENOMEM; | ||
898 | |||
899 | err = __nvgpu_alloc_common_init(__a, name, a, false, &page_ops); | ||
900 | if (err) | ||
901 | goto fail; | ||
902 | |||
903 | a->base = base; | ||
904 | a->length = length; | ||
905 | a->page_size = blk_size; | ||
906 | a->page_shift = __ffs(blk_size); | ||
907 | a->allocs = RB_ROOT; | ||
908 | a->owner = __a; | ||
909 | a->flags = flags; | ||
910 | |||
911 | if (flags & GPU_ALLOC_4K_VIDMEM_PAGES && blk_size > SZ_4K) { | ||
912 | err = nvgpu_page_alloc_init_slabs(a); | ||
913 | if (err) | ||
914 | goto fail; | ||
915 | } | ||
916 | |||
917 | snprintf(buddy_name, sizeof(buddy_name), "%s-src", name); | ||
918 | |||
919 | err = nvgpu_buddy_allocator_init(g, &a->source_allocator, buddy_name, | ||
920 | base, length, blk_size, 0); | ||
921 | if (err) | ||
922 | goto fail; | ||
923 | |||
924 | nvgpu_init_alloc_debug(g, __a); | ||
925 | palloc_dbg(a, "New allocator: type page\n"); | ||
926 | palloc_dbg(a, " base 0x%llx\n", a->base); | ||
927 | palloc_dbg(a, " size 0x%llx\n", a->length); | ||
928 | palloc_dbg(a, " page_size 0x%llx\n", a->page_size); | ||
929 | palloc_dbg(a, " flags 0x%llx\n", a->flags); | ||
930 | palloc_dbg(a, " slabs: %d\n", a->nr_slabs); | ||
931 | |||
932 | return 0; | ||
933 | |||
934 | fail: | ||
935 | kfree(a); | ||
936 | return err; | ||
937 | } | ||