diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c | 936 |
1 files changed, 0 insertions, 936 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c deleted file mode 100644 index 9717a726..00000000 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c +++ /dev/null | |||
@@ -1,936 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/slab.h> | ||
19 | #include <linux/bitops.h> | ||
20 | #include <linux/mm.h> | ||
21 | |||
22 | #include "gk20a_allocator.h" | ||
23 | #include "buddy_allocator_priv.h" | ||
24 | #include "page_allocator_priv.h" | ||
25 | |||
26 | #define palloc_dbg(a, fmt, arg...) \ | ||
27 | alloc_dbg(palloc_owner(a), fmt, ##arg) | ||
28 | |||
29 | static struct kmem_cache *page_alloc_cache; | ||
30 | static struct kmem_cache *page_alloc_chunk_cache; | ||
31 | static struct kmem_cache *page_alloc_slab_page_cache; | ||
32 | static DEFINE_MUTEX(meta_data_cache_lock); | ||
33 | |||
34 | /* | ||
35 | * Handle the book-keeping for these operations. | ||
36 | */ | ||
37 | static inline void add_slab_page_to_empty(struct page_alloc_slab *slab, | ||
38 | struct page_alloc_slab_page *page) | ||
39 | { | ||
40 | BUG_ON(page->state != SP_NONE); | ||
41 | list_add(&page->list_entry, &slab->empty); | ||
42 | slab->nr_empty++; | ||
43 | page->state = SP_EMPTY; | ||
44 | } | ||
45 | static inline void add_slab_page_to_partial(struct page_alloc_slab *slab, | ||
46 | struct page_alloc_slab_page *page) | ||
47 | { | ||
48 | BUG_ON(page->state != SP_NONE); | ||
49 | list_add(&page->list_entry, &slab->partial); | ||
50 | slab->nr_partial++; | ||
51 | page->state = SP_PARTIAL; | ||
52 | } | ||
53 | static inline void add_slab_page_to_full(struct page_alloc_slab *slab, | ||
54 | struct page_alloc_slab_page *page) | ||
55 | { | ||
56 | BUG_ON(page->state != SP_NONE); | ||
57 | list_add(&page->list_entry, &slab->full); | ||
58 | slab->nr_full++; | ||
59 | page->state = SP_FULL; | ||
60 | } | ||
61 | |||
62 | static inline void del_slab_page_from_empty(struct page_alloc_slab *slab, | ||
63 | struct page_alloc_slab_page *page) | ||
64 | { | ||
65 | list_del_init(&page->list_entry); | ||
66 | slab->nr_empty--; | ||
67 | page->state = SP_NONE; | ||
68 | } | ||
69 | static inline void del_slab_page_from_partial(struct page_alloc_slab *slab, | ||
70 | struct page_alloc_slab_page *page) | ||
71 | { | ||
72 | list_del_init(&page->list_entry); | ||
73 | slab->nr_partial--; | ||
74 | page->state = SP_NONE; | ||
75 | } | ||
76 | static inline void del_slab_page_from_full(struct page_alloc_slab *slab, | ||
77 | struct page_alloc_slab_page *page) | ||
78 | { | ||
79 | list_del_init(&page->list_entry); | ||
80 | slab->nr_full--; | ||
81 | page->state = SP_NONE; | ||
82 | } | ||
83 | |||
84 | static u64 gk20a_page_alloc_length(struct gk20a_allocator *a) | ||
85 | { | ||
86 | struct gk20a_page_allocator *va = a->priv; | ||
87 | |||
88 | return gk20a_alloc_length(&va->source_allocator); | ||
89 | } | ||
90 | |||
91 | static u64 gk20a_page_alloc_base(struct gk20a_allocator *a) | ||
92 | { | ||
93 | struct gk20a_page_allocator *va = a->priv; | ||
94 | |||
95 | return gk20a_alloc_base(&va->source_allocator); | ||
96 | } | ||
97 | |||
98 | static int gk20a_page_alloc_inited(struct gk20a_allocator *a) | ||
99 | { | ||
100 | struct gk20a_page_allocator *va = a->priv; | ||
101 | |||
102 | return gk20a_alloc_initialized(&va->source_allocator); | ||
103 | } | ||
104 | |||
105 | static u64 gk20a_page_alloc_end(struct gk20a_allocator *a) | ||
106 | { | ||
107 | struct gk20a_page_allocator *va = a->priv; | ||
108 | |||
109 | return gk20a_alloc_end(&va->source_allocator); | ||
110 | } | ||
111 | |||
112 | static u64 gk20a_page_alloc_space(struct gk20a_allocator *a) | ||
113 | { | ||
114 | struct gk20a_page_allocator *va = a->priv; | ||
115 | |||
116 | return gk20a_alloc_space(&va->source_allocator); | ||
117 | } | ||
118 | |||
119 | static int gk20a_page_reserve_co(struct gk20a_allocator *a, | ||
120 | struct gk20a_alloc_carveout *co) | ||
121 | { | ||
122 | struct gk20a_page_allocator *va = a->priv; | ||
123 | |||
124 | return gk20a_alloc_reserve_carveout(&va->source_allocator, co); | ||
125 | } | ||
126 | |||
127 | static void gk20a_page_release_co(struct gk20a_allocator *a, | ||
128 | struct gk20a_alloc_carveout *co) | ||
129 | { | ||
130 | struct gk20a_page_allocator *va = a->priv; | ||
131 | |||
132 | gk20a_alloc_release_carveout(&va->source_allocator, co); | ||
133 | } | ||
134 | |||
135 | static void __gk20a_free_pages(struct gk20a_page_allocator *a, | ||
136 | struct gk20a_page_alloc *alloc, | ||
137 | bool free_buddy_alloc) | ||
138 | { | ||
139 | struct page_alloc_chunk *chunk; | ||
140 | |||
141 | while (!list_empty(&alloc->alloc_chunks)) { | ||
142 | chunk = list_first_entry(&alloc->alloc_chunks, | ||
143 | struct page_alloc_chunk, | ||
144 | list_entry); | ||
145 | list_del(&chunk->list_entry); | ||
146 | |||
147 | if (free_buddy_alloc) | ||
148 | gk20a_free(&a->source_allocator, chunk->base); | ||
149 | kfree(chunk); | ||
150 | } | ||
151 | |||
152 | kfree(alloc); | ||
153 | } | ||
154 | |||
155 | static int __insert_page_alloc(struct gk20a_page_allocator *a, | ||
156 | struct gk20a_page_alloc *alloc) | ||
157 | { | ||
158 | struct rb_node **new = &a->allocs.rb_node; | ||
159 | struct rb_node *parent = NULL; | ||
160 | |||
161 | while (*new) { | ||
162 | struct gk20a_page_alloc *tmp = | ||
163 | container_of(*new, struct gk20a_page_alloc, | ||
164 | tree_entry); | ||
165 | |||
166 | parent = *new; | ||
167 | if (alloc->base < tmp->base) { | ||
168 | new = &((*new)->rb_left); | ||
169 | } else if (alloc->base > tmp->base) { | ||
170 | new = &((*new)->rb_right); | ||
171 | } else { | ||
172 | WARN(1, "Duplicate entries in allocated list!\n"); | ||
173 | return 0; | ||
174 | } | ||
175 | } | ||
176 | |||
177 | rb_link_node(&alloc->tree_entry, parent, new); | ||
178 | rb_insert_color(&alloc->tree_entry, &a->allocs); | ||
179 | |||
180 | return 0; | ||
181 | } | ||
182 | |||
183 | static struct gk20a_page_alloc *__find_page_alloc( | ||
184 | struct gk20a_page_allocator *a, | ||
185 | u64 addr) | ||
186 | { | ||
187 | struct rb_node *node = a->allocs.rb_node; | ||
188 | struct gk20a_page_alloc *alloc; | ||
189 | |||
190 | while (node) { | ||
191 | alloc = container_of(node, struct gk20a_page_alloc, tree_entry); | ||
192 | |||
193 | if (addr < alloc->base) | ||
194 | node = node->rb_left; | ||
195 | else if (addr > alloc->base) | ||
196 | node = node->rb_right; | ||
197 | else | ||
198 | break; | ||
199 | } | ||
200 | |||
201 | if (!node) | ||
202 | return NULL; | ||
203 | |||
204 | rb_erase(node, &a->allocs); | ||
205 | |||
206 | return alloc; | ||
207 | } | ||
208 | |||
209 | static struct page_alloc_slab_page *alloc_slab_page( | ||
210 | struct gk20a_page_allocator *a, | ||
211 | struct page_alloc_slab *slab) | ||
212 | { | ||
213 | struct page_alloc_slab_page *slab_page; | ||
214 | |||
215 | slab_page = kmem_cache_alloc(page_alloc_slab_page_cache, GFP_KERNEL); | ||
216 | if (!slab_page) { | ||
217 | palloc_dbg(a, "OOM: unable to alloc slab_page struct!\n"); | ||
218 | return ERR_PTR(-ENOMEM); | ||
219 | } | ||
220 | |||
221 | memset(slab_page, 0, sizeof(*slab_page)); | ||
222 | |||
223 | slab_page->page_addr = gk20a_alloc(&a->source_allocator, a->page_size); | ||
224 | if (!slab_page->page_addr) { | ||
225 | kfree(slab_page); | ||
226 | palloc_dbg(a, "OOM: vidmem is full!\n"); | ||
227 | return ERR_PTR(-ENOMEM); | ||
228 | } | ||
229 | |||
230 | INIT_LIST_HEAD(&slab_page->list_entry); | ||
231 | slab_page->slab_size = slab->slab_size; | ||
232 | slab_page->nr_objects = (u32)a->page_size / slab->slab_size; | ||
233 | slab_page->nr_objects_alloced = 0; | ||
234 | slab_page->owner = slab; | ||
235 | slab_page->state = SP_NONE; | ||
236 | |||
237 | a->pages_alloced++; | ||
238 | |||
239 | palloc_dbg(a, "Allocated new slab page @ 0x%012llx size=%u\n", | ||
240 | slab_page->page_addr, slab_page->slab_size); | ||
241 | |||
242 | return slab_page; | ||
243 | } | ||
244 | |||
245 | static void free_slab_page(struct gk20a_page_allocator *a, | ||
246 | struct page_alloc_slab_page *slab_page) | ||
247 | { | ||
248 | palloc_dbg(a, "Freeing slab page @ 0x%012llx\n", slab_page->page_addr); | ||
249 | |||
250 | BUG_ON((slab_page->state != SP_NONE && slab_page->state != SP_EMPTY) || | ||
251 | slab_page->nr_objects_alloced != 0 || | ||
252 | slab_page->bitmap != 0); | ||
253 | |||
254 | gk20a_free(&a->source_allocator, slab_page->page_addr); | ||
255 | a->pages_freed++; | ||
256 | |||
257 | kmem_cache_free(page_alloc_slab_page_cache, slab_page); | ||
258 | } | ||
259 | |||
260 | /* | ||
261 | * This expects @alloc to have 1 empty page_alloc_chunk already added to the | ||
262 | * alloc_chunks list. | ||
263 | */ | ||
264 | static int __do_slab_alloc(struct gk20a_page_allocator *a, | ||
265 | struct page_alloc_slab *slab, | ||
266 | struct gk20a_page_alloc *alloc) | ||
267 | { | ||
268 | struct page_alloc_slab_page *slab_page = NULL; | ||
269 | struct page_alloc_chunk *chunk; | ||
270 | unsigned long offs; | ||
271 | |||
272 | /* | ||
273 | * Check the partial and empty lists to see if we have some space | ||
274 | * readily available. Take the slab_page out of what ever list it | ||
275 | * was in since it may be put back into a different list later. | ||
276 | */ | ||
277 | if (!list_empty(&slab->partial)) { | ||
278 | slab_page = list_first_entry(&slab->partial, | ||
279 | struct page_alloc_slab_page, | ||
280 | list_entry); | ||
281 | del_slab_page_from_partial(slab, slab_page); | ||
282 | } else if (!list_empty(&slab->empty)) { | ||
283 | slab_page = list_first_entry(&slab->empty, | ||
284 | struct page_alloc_slab_page, | ||
285 | list_entry); | ||
286 | del_slab_page_from_empty(slab, slab_page); | ||
287 | } | ||
288 | |||
289 | if (!slab_page) { | ||
290 | slab_page = alloc_slab_page(a, slab); | ||
291 | if (IS_ERR(slab_page)) | ||
292 | return PTR_ERR(slab_page); | ||
293 | } | ||
294 | |||
295 | /* | ||
296 | * We now have a slab_page. Do the alloc. | ||
297 | */ | ||
298 | offs = bitmap_find_next_zero_area(&slab_page->bitmap, | ||
299 | slab_page->nr_objects, | ||
300 | 0, 1, 0); | ||
301 | if (offs >= slab_page->nr_objects) { | ||
302 | WARN(1, "Empty/partial slab with no free objects?"); | ||
303 | |||
304 | /* Add the buggy page to the full list... This isn't ideal. */ | ||
305 | add_slab_page_to_full(slab, slab_page); | ||
306 | return -ENOMEM; | ||
307 | } | ||
308 | |||
309 | bitmap_set(&slab_page->bitmap, offs, 1); | ||
310 | slab_page->nr_objects_alloced++; | ||
311 | |||
312 | if (slab_page->nr_objects_alloced < slab_page->nr_objects) | ||
313 | add_slab_page_to_partial(slab, slab_page); | ||
314 | else if (slab_page->nr_objects_alloced == slab_page->nr_objects) | ||
315 | add_slab_page_to_full(slab, slab_page); | ||
316 | else | ||
317 | BUG(); /* Should be impossible to hit this. */ | ||
318 | |||
319 | /* | ||
320 | * Handle building the gk20a_page_alloc struct. We expect one | ||
321 | * page_alloc_chunk to be present. | ||
322 | */ | ||
323 | alloc->slab_page = slab_page; | ||
324 | alloc->nr_chunks = 1; | ||
325 | alloc->length = slab_page->slab_size; | ||
326 | alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); | ||
327 | |||
328 | chunk = list_first_entry(&alloc->alloc_chunks, | ||
329 | struct page_alloc_chunk, list_entry); | ||
330 | chunk->base = alloc->base; | ||
331 | chunk->length = alloc->length; | ||
332 | |||
333 | return 0; | ||
334 | } | ||
335 | |||
336 | /* | ||
337 | * Allocate from a slab instead of directly from the page allocator. | ||
338 | */ | ||
339 | static struct gk20a_page_alloc *__gk20a_alloc_slab( | ||
340 | struct gk20a_page_allocator *a, u64 len) | ||
341 | { | ||
342 | int err, slab_nr; | ||
343 | struct page_alloc_slab *slab; | ||
344 | struct gk20a_page_alloc *alloc = NULL; | ||
345 | struct page_alloc_chunk *chunk = NULL; | ||
346 | |||
347 | /* | ||
348 | * Align the length to a page and then divide by the page size (4k for | ||
349 | * this code). ilog2() of that then gets us the correct slab to use. | ||
350 | */ | ||
351 | slab_nr = (int)ilog2(PAGE_ALIGN(len) >> 12); | ||
352 | slab = &a->slabs[slab_nr]; | ||
353 | |||
354 | alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL); | ||
355 | if (!alloc) { | ||
356 | palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); | ||
357 | goto fail; | ||
358 | } | ||
359 | chunk = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL); | ||
360 | if (!chunk) { | ||
361 | palloc_dbg(a, "OOM: could not alloc alloc_chunk struct!\n"); | ||
362 | goto fail; | ||
363 | } | ||
364 | |||
365 | INIT_LIST_HEAD(&alloc->alloc_chunks); | ||
366 | list_add(&chunk->list_entry, &alloc->alloc_chunks); | ||
367 | |||
368 | err = __do_slab_alloc(a, slab, alloc); | ||
369 | if (err) | ||
370 | goto fail; | ||
371 | |||
372 | palloc_dbg(a, "Alloc 0x%04llx sr=%d id=0x%010llx [slab]\n", | ||
373 | len, slab_nr, alloc->base); | ||
374 | a->nr_slab_allocs++; | ||
375 | |||
376 | return alloc; | ||
377 | |||
378 | fail: | ||
379 | kfree(alloc); | ||
380 | kfree(chunk); | ||
381 | return NULL; | ||
382 | } | ||
383 | |||
384 | static void __gk20a_free_slab(struct gk20a_page_allocator *a, | ||
385 | struct gk20a_page_alloc *alloc) | ||
386 | { | ||
387 | struct page_alloc_slab_page *slab_page = alloc->slab_page; | ||
388 | struct page_alloc_slab *slab = slab_page->owner; | ||
389 | enum slab_page_state new_state; | ||
390 | int offs; | ||
391 | |||
392 | offs = (u32)(alloc->base - slab_page->page_addr) / slab_page->slab_size; | ||
393 | bitmap_clear(&slab_page->bitmap, offs, 1); | ||
394 | |||
395 | slab_page->nr_objects_alloced--; | ||
396 | |||
397 | if (slab_page->nr_objects_alloced == 0) | ||
398 | new_state = SP_EMPTY; | ||
399 | else | ||
400 | new_state = SP_PARTIAL; | ||
401 | |||
402 | /* | ||
403 | * Need to migrate the page to a different list. | ||
404 | */ | ||
405 | if (new_state != slab_page->state) { | ||
406 | /* Delete - can't be in empty. */ | ||
407 | if (slab_page->state == SP_PARTIAL) | ||
408 | del_slab_page_from_partial(slab, slab_page); | ||
409 | else | ||
410 | del_slab_page_from_full(slab, slab_page); | ||
411 | |||
412 | /* And add. */ | ||
413 | if (new_state == SP_EMPTY) { | ||
414 | if (list_empty(&slab->empty)) | ||
415 | add_slab_page_to_empty(slab, slab_page); | ||
416 | else | ||
417 | free_slab_page(a, slab_page); | ||
418 | } else { | ||
419 | add_slab_page_to_partial(slab, slab_page); | ||
420 | } | ||
421 | } | ||
422 | |||
423 | /* | ||
424 | * Now handle the page_alloc. | ||
425 | */ | ||
426 | __gk20a_free_pages(a, alloc, false); | ||
427 | a->nr_slab_frees++; | ||
428 | |||
429 | return; | ||
430 | } | ||
431 | |||
432 | /* | ||
433 | * Allocate physical pages. Since the underlying allocator is a buddy allocator | ||
434 | * the returned pages are always contiguous. However, since there could be | ||
435 | * fragmentation in the space this allocator will collate smaller non-contiguous | ||
436 | * allocations together if necessary. | ||
437 | */ | ||
438 | static struct gk20a_page_alloc *__do_gk20a_alloc_pages( | ||
439 | struct gk20a_page_allocator *a, u64 pages) | ||
440 | { | ||
441 | struct gk20a_page_alloc *alloc; | ||
442 | struct page_alloc_chunk *c; | ||
443 | u64 max_chunk_len = pages << a->page_shift; | ||
444 | int i = 0; | ||
445 | |||
446 | alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL); | ||
447 | if (!alloc) | ||
448 | goto fail; | ||
449 | |||
450 | memset(alloc, 0, sizeof(*alloc)); | ||
451 | |||
452 | INIT_LIST_HEAD(&alloc->alloc_chunks); | ||
453 | alloc->length = pages << a->page_shift; | ||
454 | |||
455 | while (pages) { | ||
456 | u64 chunk_addr = 0; | ||
457 | u64 chunk_pages = (u64)1 << __fls(pages); | ||
458 | u64 chunk_len = chunk_pages << a->page_shift; | ||
459 | |||
460 | /* | ||
461 | * Take care of the possibility that the allocation must be | ||
462 | * contiguous. If this is not the first iteration then that | ||
463 | * means the first iteration failed to alloc the entire | ||
464 | * requested size. The buddy allocator guarantees any given | ||
465 | * single alloc is contiguous. | ||
466 | */ | ||
467 | if (a->flags & GPU_ALLOC_FORCE_CONTIG && i != 0) | ||
468 | goto fail_cleanup; | ||
469 | |||
470 | if (chunk_len > max_chunk_len) | ||
471 | chunk_len = max_chunk_len; | ||
472 | |||
473 | /* | ||
474 | * Keep attempting to allocate in smaller chunks until the alloc | ||
475 | * either succeeds or is smaller than the page_size of the | ||
476 | * allocator (i.e the allocator is OOM). | ||
477 | */ | ||
478 | do { | ||
479 | chunk_addr = gk20a_alloc(&a->source_allocator, | ||
480 | chunk_len); | ||
481 | |||
482 | /* Divide by 2 and try again */ | ||
483 | if (!chunk_addr) { | ||
484 | palloc_dbg(a, "balloc failed: 0x%llx\n", | ||
485 | chunk_len); | ||
486 | chunk_len >>= 1; | ||
487 | max_chunk_len = chunk_len; | ||
488 | } | ||
489 | } while (!chunk_addr && chunk_len >= a->page_size); | ||
490 | |||
491 | chunk_pages = chunk_len >> a->page_shift; | ||
492 | |||
493 | if (!chunk_addr) { | ||
494 | palloc_dbg(a, "bailing @ 0x%llx\n", chunk_len); | ||
495 | goto fail_cleanup; | ||
496 | } | ||
497 | |||
498 | c = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL); | ||
499 | if (!c) { | ||
500 | gk20a_free(&a->source_allocator, chunk_addr); | ||
501 | goto fail_cleanup; | ||
502 | } | ||
503 | |||
504 | pages -= chunk_pages; | ||
505 | |||
506 | c->base = chunk_addr; | ||
507 | c->length = chunk_len; | ||
508 | list_add(&c->list_entry, &alloc->alloc_chunks); | ||
509 | |||
510 | i++; | ||
511 | } | ||
512 | |||
513 | alloc->nr_chunks = i; | ||
514 | c = list_first_entry(&alloc->alloc_chunks, | ||
515 | struct page_alloc_chunk, list_entry); | ||
516 | alloc->base = c->base; | ||
517 | |||
518 | return alloc; | ||
519 | |||
520 | fail_cleanup: | ||
521 | while (!list_empty(&alloc->alloc_chunks)) { | ||
522 | c = list_first_entry(&alloc->alloc_chunks, | ||
523 | struct page_alloc_chunk, list_entry); | ||
524 | list_del(&c->list_entry); | ||
525 | gk20a_free(&a->source_allocator, c->base); | ||
526 | kfree(c); | ||
527 | } | ||
528 | kfree(alloc); | ||
529 | fail: | ||
530 | return ERR_PTR(-ENOMEM); | ||
531 | } | ||
532 | |||
533 | static struct gk20a_page_alloc *__gk20a_alloc_pages( | ||
534 | struct gk20a_page_allocator *a, u64 len) | ||
535 | { | ||
536 | struct gk20a_page_alloc *alloc = NULL; | ||
537 | struct page_alloc_chunk *c; | ||
538 | u64 pages; | ||
539 | int i = 0; | ||
540 | |||
541 | pages = ALIGN(len, a->page_size) >> a->page_shift; | ||
542 | |||
543 | alloc = __do_gk20a_alloc_pages(a, pages); | ||
544 | if (IS_ERR(alloc)) { | ||
545 | palloc_dbg(a, "Alloc 0x%llx (%llu) (failed)\n", | ||
546 | pages << a->page_shift, pages); | ||
547 | return NULL; | ||
548 | } | ||
549 | |||
550 | palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", | ||
551 | pages << a->page_shift, pages, alloc->base); | ||
552 | list_for_each_entry(c, &alloc->alloc_chunks, list_entry) { | ||
553 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", | ||
554 | i++, c->base, c->length); | ||
555 | } | ||
556 | |||
557 | return alloc; | ||
558 | } | ||
559 | |||
560 | /* | ||
561 | * Allocate enough pages to satisfy @len. Page size is determined at | ||
562 | * initialization of the allocator. | ||
563 | * | ||
564 | * The return is actually a pointer to a struct gk20a_page_alloc pointer. This | ||
565 | * is because it doesn't make a lot of sense to return the address of the first | ||
566 | * page in the list of pages (since they could be discontiguous). This has | ||
567 | * precedent in the dma_alloc APIs, though, it's really just an annoying | ||
568 | * artifact of the fact that the gk20a_alloc() API requires a u64 return type. | ||
569 | */ | ||
570 | static u64 gk20a_page_alloc(struct gk20a_allocator *__a, u64 len) | ||
571 | { | ||
572 | struct gk20a_page_allocator *a = page_allocator(__a); | ||
573 | struct gk20a_page_alloc *alloc = NULL; | ||
574 | u64 real_len; | ||
575 | |||
576 | /* | ||
577 | * If we want contig pages we have to round up to a power of two. It's | ||
578 | * easier to do that here than in the buddy allocator. | ||
579 | */ | ||
580 | real_len = a->flags & GPU_ALLOC_FORCE_CONTIG ? | ||
581 | roundup_pow_of_two(len) : len; | ||
582 | |||
583 | alloc_lock(__a); | ||
584 | if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES && | ||
585 | real_len <= (a->page_size / 2)) | ||
586 | alloc = __gk20a_alloc_slab(a, real_len); | ||
587 | else | ||
588 | alloc = __gk20a_alloc_pages(a, real_len); | ||
589 | |||
590 | if (!alloc) { | ||
591 | alloc_unlock(__a); | ||
592 | return 0; | ||
593 | } | ||
594 | |||
595 | __insert_page_alloc(a, alloc); | ||
596 | |||
597 | a->nr_allocs++; | ||
598 | if (real_len > a->page_size / 2) | ||
599 | a->pages_alloced += alloc->length >> a->page_shift; | ||
600 | alloc_unlock(__a); | ||
601 | |||
602 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) | ||
603 | return alloc->base; | ||
604 | else | ||
605 | return (u64) (uintptr_t) alloc; | ||
606 | } | ||
607 | |||
608 | /* | ||
609 | * Note: this will remove the gk20a_page_alloc struct from the RB tree | ||
610 | * if it's found. | ||
611 | */ | ||
612 | static void gk20a_page_free(struct gk20a_allocator *__a, u64 base) | ||
613 | { | ||
614 | struct gk20a_page_allocator *a = page_allocator(__a); | ||
615 | struct gk20a_page_alloc *alloc; | ||
616 | |||
617 | alloc_lock(__a); | ||
618 | |||
619 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) | ||
620 | alloc = __find_page_alloc(a, base); | ||
621 | else | ||
622 | alloc = __find_page_alloc(a, | ||
623 | ((struct gk20a_page_alloc *)(uintptr_t)base)->base); | ||
624 | |||
625 | if (!alloc) { | ||
626 | palloc_dbg(a, "Hrm, found no alloc?\n"); | ||
627 | goto done; | ||
628 | } | ||
629 | |||
630 | a->nr_frees++; | ||
631 | |||
632 | palloc_dbg(a, "Free 0x%llx id=0x%010llx\n", | ||
633 | alloc->length, alloc->base); | ||
634 | |||
635 | /* | ||
636 | * Frees *alloc. | ||
637 | */ | ||
638 | if (alloc->slab_page) { | ||
639 | __gk20a_free_slab(a, alloc); | ||
640 | } else { | ||
641 | a->pages_freed += (alloc->length >> a->page_shift); | ||
642 | __gk20a_free_pages(a, alloc, true); | ||
643 | } | ||
644 | |||
645 | done: | ||
646 | alloc_unlock(__a); | ||
647 | } | ||
648 | |||
649 | static struct gk20a_page_alloc *__gk20a_alloc_pages_fixed( | ||
650 | struct gk20a_page_allocator *a, u64 base, u64 length) | ||
651 | { | ||
652 | struct gk20a_page_alloc *alloc; | ||
653 | struct page_alloc_chunk *c; | ||
654 | |||
655 | alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL); | ||
656 | c = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL); | ||
657 | if (!alloc || !c) | ||
658 | goto fail; | ||
659 | |||
660 | alloc->base = gk20a_alloc_fixed(&a->source_allocator, base, length); | ||
661 | if (!alloc->base) { | ||
662 | WARN(1, "gk20a: failed to fixed alloc pages @ 0x%010llx", base); | ||
663 | goto fail; | ||
664 | } | ||
665 | |||
666 | alloc->nr_chunks = 1; | ||
667 | alloc->length = length; | ||
668 | INIT_LIST_HEAD(&alloc->alloc_chunks); | ||
669 | |||
670 | c->base = alloc->base; | ||
671 | c->length = length; | ||
672 | list_add(&c->list_entry, &alloc->alloc_chunks); | ||
673 | |||
674 | return alloc; | ||
675 | |||
676 | fail: | ||
677 | kfree(c); | ||
678 | kfree(alloc); | ||
679 | return ERR_PTR(-ENOMEM); | ||
680 | } | ||
681 | |||
682 | static u64 gk20a_page_alloc_fixed(struct gk20a_allocator *__a, | ||
683 | u64 base, u64 len) | ||
684 | { | ||
685 | struct gk20a_page_allocator *a = page_allocator(__a); | ||
686 | struct gk20a_page_alloc *alloc = NULL; | ||
687 | struct page_alloc_chunk *c; | ||
688 | u64 aligned_len, pages; | ||
689 | int i = 0; | ||
690 | |||
691 | aligned_len = ALIGN(len, a->page_size); | ||
692 | pages = aligned_len >> a->page_shift; | ||
693 | |||
694 | alloc_lock(__a); | ||
695 | |||
696 | alloc = __gk20a_alloc_pages_fixed(a, base, aligned_len); | ||
697 | if (IS_ERR(alloc)) { | ||
698 | alloc_unlock(__a); | ||
699 | return 0; | ||
700 | } | ||
701 | |||
702 | __insert_page_alloc(a, alloc); | ||
703 | alloc_unlock(__a); | ||
704 | |||
705 | palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", | ||
706 | alloc->base, aligned_len, pages); | ||
707 | list_for_each_entry(c, &alloc->alloc_chunks, list_entry) { | ||
708 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", | ||
709 | i++, c->base, c->length); | ||
710 | } | ||
711 | |||
712 | a->nr_fixed_allocs++; | ||
713 | a->pages_alloced += pages; | ||
714 | |||
715 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) | ||
716 | return alloc->base; | ||
717 | else | ||
718 | return (u64) (uintptr_t) alloc; | ||
719 | } | ||
720 | |||
721 | static void gk20a_page_free_fixed(struct gk20a_allocator *__a, | ||
722 | u64 base, u64 len) | ||
723 | { | ||
724 | struct gk20a_page_allocator *a = page_allocator(__a); | ||
725 | struct gk20a_page_alloc *alloc; | ||
726 | |||
727 | alloc_lock(__a); | ||
728 | |||
729 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) { | ||
730 | alloc = __find_page_alloc(a, base); | ||
731 | if (!alloc) | ||
732 | goto done; | ||
733 | } else { | ||
734 | alloc = (struct gk20a_page_alloc *) (uintptr_t) base; | ||
735 | } | ||
736 | |||
737 | palloc_dbg(a, "Free [fixed] 0x%010llx + 0x%llx\n", | ||
738 | alloc->base, alloc->length); | ||
739 | |||
740 | a->nr_fixed_frees++; | ||
741 | a->pages_freed += (alloc->length >> a->page_shift); | ||
742 | |||
743 | /* | ||
744 | * This works for the time being since the buddy allocator | ||
745 | * uses the same free function for both fixed and regular | ||
746 | * allocs. This would have to be updated if the underlying | ||
747 | * allocator were to change. | ||
748 | */ | ||
749 | __gk20a_free_pages(a, alloc, true); | ||
750 | |||
751 | done: | ||
752 | alloc_unlock(__a); | ||
753 | } | ||
754 | |||
755 | static void gk20a_page_allocator_destroy(struct gk20a_allocator *__a) | ||
756 | { | ||
757 | struct gk20a_page_allocator *a = page_allocator(__a); | ||
758 | |||
759 | alloc_lock(__a); | ||
760 | kfree(a); | ||
761 | __a->priv = NULL; | ||
762 | alloc_unlock(__a); | ||
763 | } | ||
764 | |||
765 | static void gk20a_page_print_stats(struct gk20a_allocator *__a, | ||
766 | struct seq_file *s, int lock) | ||
767 | { | ||
768 | struct gk20a_page_allocator *a = page_allocator(__a); | ||
769 | int i; | ||
770 | |||
771 | if (lock) | ||
772 | alloc_lock(__a); | ||
773 | |||
774 | __alloc_pstat(s, __a, "Page allocator:\n"); | ||
775 | __alloc_pstat(s, __a, " allocs %lld\n", a->nr_allocs); | ||
776 | __alloc_pstat(s, __a, " frees %lld\n", a->nr_frees); | ||
777 | __alloc_pstat(s, __a, " fixed_allocs %lld\n", a->nr_fixed_allocs); | ||
778 | __alloc_pstat(s, __a, " fixed_frees %lld\n", a->nr_fixed_frees); | ||
779 | __alloc_pstat(s, __a, " slab_allocs %lld\n", a->nr_slab_allocs); | ||
780 | __alloc_pstat(s, __a, " slab_frees %lld\n", a->nr_slab_frees); | ||
781 | __alloc_pstat(s, __a, " pages alloced %lld\n", a->pages_alloced); | ||
782 | __alloc_pstat(s, __a, " pages freed %lld\n", a->pages_freed); | ||
783 | __alloc_pstat(s, __a, "\n"); | ||
784 | |||
785 | /* | ||
786 | * Slab info. | ||
787 | */ | ||
788 | if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES) { | ||
789 | __alloc_pstat(s, __a, "Slabs:\n"); | ||
790 | __alloc_pstat(s, __a, " size empty partial full\n"); | ||
791 | __alloc_pstat(s, __a, " ---- ----- ------- ----\n"); | ||
792 | |||
793 | for (i = 0; i < a->nr_slabs; i++) { | ||
794 | struct page_alloc_slab *slab = &a->slabs[i]; | ||
795 | |||
796 | __alloc_pstat(s, __a, " %-9u %-9d %-9u %u\n", | ||
797 | slab->slab_size, | ||
798 | slab->nr_empty, slab->nr_partial, | ||
799 | slab->nr_full); | ||
800 | } | ||
801 | __alloc_pstat(s, __a, "\n"); | ||
802 | } | ||
803 | |||
804 | __alloc_pstat(s, __a, "Source alloc: %s\n", | ||
805 | a->source_allocator.name); | ||
806 | gk20a_alloc_print_stats(&a->source_allocator, s, lock); | ||
807 | |||
808 | if (lock) | ||
809 | alloc_unlock(__a); | ||
810 | } | ||
811 | |||
812 | static const struct gk20a_allocator_ops page_ops = { | ||
813 | .alloc = gk20a_page_alloc, | ||
814 | .free = gk20a_page_free, | ||
815 | |||
816 | .alloc_fixed = gk20a_page_alloc_fixed, | ||
817 | .free_fixed = gk20a_page_free_fixed, | ||
818 | |||
819 | .reserve_carveout = gk20a_page_reserve_co, | ||
820 | .release_carveout = gk20a_page_release_co, | ||
821 | |||
822 | .base = gk20a_page_alloc_base, | ||
823 | .length = gk20a_page_alloc_length, | ||
824 | .end = gk20a_page_alloc_end, | ||
825 | .inited = gk20a_page_alloc_inited, | ||
826 | .space = gk20a_page_alloc_space, | ||
827 | |||
828 | .fini = gk20a_page_allocator_destroy, | ||
829 | |||
830 | .print_stats = gk20a_page_print_stats, | ||
831 | }; | ||
832 | |||
833 | /* | ||
834 | * nr_slabs is computed as follows: divide page_size by 4096 to get number of | ||
835 | * 4k pages in page_size. Then take the base 2 log of that to get number of | ||
836 | * slabs. For 64k page_size that works on like: | ||
837 | * | ||
838 | * 1024*64 / 1024*4 = 16 | ||
839 | * ilog2(16) = 4 | ||
840 | * | ||
841 | * That gives buckets of 1, 2, 4, and 8 pages (i.e 4k, 8k, 16k, 32k). | ||
842 | */ | ||
843 | static int gk20a_page_alloc_init_slabs(struct gk20a_page_allocator *a) | ||
844 | { | ||
845 | size_t nr_slabs = ilog2(a->page_size >> 12); | ||
846 | unsigned int i; | ||
847 | |||
848 | a->slabs = kcalloc(nr_slabs, | ||
849 | sizeof(struct page_alloc_slab), | ||
850 | GFP_KERNEL); | ||
851 | if (!a->slabs) | ||
852 | return -ENOMEM; | ||
853 | a->nr_slabs = nr_slabs; | ||
854 | |||
855 | for (i = 0; i < nr_slabs; i++) { | ||
856 | struct page_alloc_slab *slab = &a->slabs[i]; | ||
857 | |||
858 | slab->slab_size = SZ_4K * (1 << i); | ||
859 | INIT_LIST_HEAD(&slab->empty); | ||
860 | INIT_LIST_HEAD(&slab->partial); | ||
861 | INIT_LIST_HEAD(&slab->full); | ||
862 | slab->nr_empty = 0; | ||
863 | slab->nr_partial = 0; | ||
864 | slab->nr_full = 0; | ||
865 | } | ||
866 | |||
867 | return 0; | ||
868 | } | ||
869 | |||
870 | int gk20a_page_allocator_init(struct gk20a *g, struct gk20a_allocator *__a, | ||
871 | const char *name, u64 base, u64 length, | ||
872 | u64 blk_size, u64 flags) | ||
873 | { | ||
874 | struct gk20a_page_allocator *a; | ||
875 | char buddy_name[sizeof(__a->name)]; | ||
876 | int err; | ||
877 | |||
878 | mutex_lock(&meta_data_cache_lock); | ||
879 | if (!page_alloc_cache) | ||
880 | page_alloc_cache = KMEM_CACHE(gk20a_page_alloc, 0); | ||
881 | if (!page_alloc_chunk_cache) | ||
882 | page_alloc_chunk_cache = KMEM_CACHE(page_alloc_chunk, 0); | ||
883 | if (!page_alloc_slab_page_cache) | ||
884 | page_alloc_slab_page_cache = | ||
885 | KMEM_CACHE(page_alloc_slab_page, 0); | ||
886 | mutex_unlock(&meta_data_cache_lock); | ||
887 | |||
888 | if (!page_alloc_cache || !page_alloc_chunk_cache) | ||
889 | return -ENOMEM; | ||
890 | |||
891 | if (blk_size < SZ_4K) | ||
892 | return -EINVAL; | ||
893 | |||
894 | a = kzalloc(sizeof(struct gk20a_page_allocator), GFP_KERNEL); | ||
895 | if (!a) | ||
896 | return -ENOMEM; | ||
897 | |||
898 | err = __gk20a_alloc_common_init(__a, name, a, false, &page_ops); | ||
899 | if (err) | ||
900 | goto fail; | ||
901 | |||
902 | a->base = base; | ||
903 | a->length = length; | ||
904 | a->page_size = blk_size; | ||
905 | a->page_shift = __ffs(blk_size); | ||
906 | a->allocs = RB_ROOT; | ||
907 | a->owner = __a; | ||
908 | a->flags = flags; | ||
909 | |||
910 | if (flags & GPU_ALLOC_4K_VIDMEM_PAGES && blk_size > SZ_4K) { | ||
911 | err = gk20a_page_alloc_init_slabs(a); | ||
912 | if (err) | ||
913 | goto fail; | ||
914 | } | ||
915 | |||
916 | snprintf(buddy_name, sizeof(buddy_name), "%s-src", name); | ||
917 | |||
918 | err = gk20a_buddy_allocator_init(g, &a->source_allocator, buddy_name, | ||
919 | base, length, blk_size, 0); | ||
920 | if (err) | ||
921 | goto fail; | ||
922 | |||
923 | gk20a_init_alloc_debug(g, __a); | ||
924 | palloc_dbg(a, "New allocator: type page\n"); | ||
925 | palloc_dbg(a, " base 0x%llx\n", a->base); | ||
926 | palloc_dbg(a, " size 0x%llx\n", a->length); | ||
927 | palloc_dbg(a, " page_size 0x%llx\n", a->page_size); | ||
928 | palloc_dbg(a, " flags 0x%llx\n", a->flags); | ||
929 | palloc_dbg(a, " slabs: %d\n", a->nr_slabs); | ||
930 | |||
931 | return 0; | ||
932 | |||
933 | fail: | ||
934 | kfree(a); | ||
935 | return err; | ||
936 | } | ||