summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm/buddy_allocator.c')
-rw-r--r--drivers/gpu/nvgpu/common/mm/buddy_allocator.c1329
1 files changed, 1329 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
new file mode 100644
index 00000000..39a53801
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
@@ -0,0 +1,1329 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/kernel.h>
18#include <linux/slab.h>
19
20#include <nvgpu/allocator.h>
21
22#include "gk20a/mm_gk20a.h"
23#include "gk20a/platform_gk20a.h"
24
25#include "buddy_allocator_priv.h"
26
27static struct kmem_cache *buddy_cache; /* slab cache for meta data. */
28
29/* Some other buddy allocator functions. */
30static struct nvgpu_buddy *balloc_free_buddy(struct nvgpu_buddy_allocator *a,
31 u64 addr);
32static void balloc_coalesce(struct nvgpu_buddy_allocator *a,
33 struct nvgpu_buddy *b);
34static void __balloc_do_free_fixed(struct nvgpu_buddy_allocator *a,
35 struct nvgpu_fixed_alloc *falloc);
36
37/*
38 * This function is not present in older kernel's list.h code.
39 */
40#ifndef list_last_entry
41#define list_last_entry(ptr, type, member) \
42 list_entry((ptr)->prev, type, member)
43#endif
44
45/*
46 * GPU buddy allocator for various address spaces.
47 *
48 * Current limitations:
49 * o A fixed allocation could potentially be made that borders PDEs with
50 * different PTE sizes. This would require that fixed buffer to have
51 * different sized PTEs for different parts of the allocation. Probably
52 * best to just require PDE alignment for fixed address allocs.
53 *
54 * o It is currently possible to make an allocator that has a buddy alignment
55 * out of sync with the PDE block size alignment. A simple example is a
56 * 32GB address space starting at byte 1. Every buddy is shifted off by 1
57 * which means each buddy corresponf to more than one actual GPU page. The
58 * best way to fix this is probably just require PDE blocksize alignment
59 * for the start of the address space. At the moment all allocators are
60 * easily PDE aligned so this hasn't been a problem.
61 */
62
63/*
64 * Pick a suitable maximum order for this allocator.
65 *
66 * Hueristic: Just guessing that the best max order is the largest single
67 * block that will fit in the address space.
68 */
69static void balloc_compute_max_order(struct nvgpu_buddy_allocator *a)
70{
71 u64 true_max_order = ilog2(a->blks);
72
73 if (a->max_order == 0) {
74 a->max_order = true_max_order;
75 return;
76 }
77
78 if (a->max_order > true_max_order)
79 a->max_order = true_max_order;
80 if (a->max_order > GPU_BALLOC_MAX_ORDER)
81 a->max_order = GPU_BALLOC_MAX_ORDER;
82}
83
84/*
85 * Since we can only allocate in chucks of a->blk_size we need to trim off
86 * any excess data that is not aligned to a->blk_size.
87 */
88static void balloc_allocator_align(struct nvgpu_buddy_allocator *a)
89{
90 a->start = ALIGN(a->base, a->blk_size);
91 WARN_ON(a->start != a->base);
92 a->end = (a->base + a->length) & ~(a->blk_size - 1);
93 a->count = a->end - a->start;
94 a->blks = a->count >> a->blk_shift;
95}
96
97/*
98 * Pass NULL for parent if you want a top level buddy.
99 */
100static struct nvgpu_buddy *balloc_new_buddy(struct nvgpu_buddy_allocator *a,
101 struct nvgpu_buddy *parent,
102 u64 start, u64 order)
103{
104 struct nvgpu_buddy *new_buddy;
105
106 new_buddy = kmem_cache_alloc(buddy_cache, GFP_KERNEL);
107 if (!new_buddy)
108 return NULL;
109
110 memset(new_buddy, 0, sizeof(struct nvgpu_buddy));
111
112 new_buddy->parent = parent;
113 new_buddy->start = start;
114 new_buddy->order = order;
115 new_buddy->end = start + (1 << order) * a->blk_size;
116 new_buddy->pte_size = BALLOC_PTE_SIZE_ANY;
117
118 return new_buddy;
119}
120
121static void __balloc_buddy_list_add(struct nvgpu_buddy_allocator *a,
122 struct nvgpu_buddy *b,
123 struct list_head *list)
124{
125 if (buddy_is_in_list(b)) {
126 alloc_dbg(balloc_owner(a),
127 "Oops: adding added buddy (%llu:0x%llx)\n",
128 b->order, b->start);
129 BUG();
130 }
131
132 /*
133 * Add big PTE blocks to the tail, small to the head for GVA spaces.
134 * This lets the code that checks if there are available blocks check
135 * without cycling through the entire list.
136 */
137 if (a->flags & GPU_ALLOC_GVA_SPACE &&
138 b->pte_size == gmmu_page_size_big)
139 list_add_tail(&b->buddy_entry, list);
140 else
141 list_add(&b->buddy_entry, list);
142
143 buddy_set_in_list(b);
144}
145
146static void __balloc_buddy_list_rem(struct nvgpu_buddy_allocator *a,
147 struct nvgpu_buddy *b)
148{
149 if (!buddy_is_in_list(b)) {
150 alloc_dbg(balloc_owner(a),
151 "Oops: removing removed buddy (%llu:0x%llx)\n",
152 b->order, b->start);
153 BUG();
154 }
155
156 list_del_init(&b->buddy_entry);
157 buddy_clr_in_list(b);
158}
159
160/*
161 * Add a buddy to one of the buddy lists and deal with the necessary
162 * book keeping. Adds the buddy to the list specified by the buddy's order.
163 */
164static void balloc_blist_add(struct nvgpu_buddy_allocator *a,
165 struct nvgpu_buddy *b)
166{
167 __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order));
168 a->buddy_list_len[b->order]++;
169}
170
171static void balloc_blist_rem(struct nvgpu_buddy_allocator *a,
172 struct nvgpu_buddy *b)
173{
174 __balloc_buddy_list_rem(a, b);
175 a->buddy_list_len[b->order]--;
176}
177
178static u64 balloc_get_order(struct nvgpu_buddy_allocator *a, u64 len)
179{
180 if (len == 0)
181 return 0;
182
183 len--;
184 len >>= a->blk_shift;
185
186 return fls(len);
187}
188
189static u64 __balloc_max_order_in(struct nvgpu_buddy_allocator *a,
190 u64 start, u64 end)
191{
192 u64 size = (end - start) >> a->blk_shift;
193
194 if (size > 0)
195 return min_t(u64, ilog2(size), a->max_order);
196 else
197 return GPU_BALLOC_MAX_ORDER;
198}
199
200/*
201 * Initialize the buddy lists.
202 */
203static int balloc_init_lists(struct nvgpu_buddy_allocator *a)
204{
205 int i;
206 u64 bstart, bend, order;
207 struct nvgpu_buddy *buddy;
208
209 bstart = a->start;
210 bend = a->end;
211
212 /* First make sure the LLs are valid. */
213 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++)
214 INIT_LIST_HEAD(balloc_get_order_list(a, i));
215
216 while (bstart < bend) {
217 order = __balloc_max_order_in(a, bstart, bend);
218
219 buddy = balloc_new_buddy(a, NULL, bstart, order);
220 if (!buddy)
221 goto cleanup;
222
223 balloc_blist_add(a, buddy);
224 bstart += balloc_order_to_len(a, order);
225 }
226
227 return 0;
228
229cleanup:
230 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) {
231 if (!list_empty(balloc_get_order_list(a, i))) {
232 buddy = list_first_entry(balloc_get_order_list(a, i),
233 struct nvgpu_buddy, buddy_entry);
234 balloc_blist_rem(a, buddy);
235 kmem_cache_free(buddy_cache, buddy);
236 }
237 }
238
239 return -ENOMEM;
240}
241
242/*
243 * Clean up and destroy the passed allocator.
244 */
245static void nvgpu_buddy_allocator_destroy(struct nvgpu_allocator *__a)
246{
247 int i;
248 struct rb_node *node;
249 struct nvgpu_buddy *bud;
250 struct nvgpu_fixed_alloc *falloc;
251 struct nvgpu_buddy_allocator *a = __a->priv;
252
253 alloc_lock(__a);
254
255 nvgpu_fini_alloc_debug(__a);
256
257 /*
258 * Free the fixed allocs first.
259 */
260 while ((node = rb_first(&a->fixed_allocs)) != NULL) {
261 falloc = container_of(node,
262 struct nvgpu_fixed_alloc, alloced_entry);
263
264 rb_erase(node, &a->fixed_allocs);
265 __balloc_do_free_fixed(a, falloc);
266 }
267
268 /*
269 * And now free all outstanding allocations.
270 */
271 while ((node = rb_first(&a->alloced_buddies)) != NULL) {
272 bud = container_of(node, struct nvgpu_buddy, alloced_entry);
273 balloc_free_buddy(a, bud->start);
274 balloc_blist_add(a, bud);
275 balloc_coalesce(a, bud);
276 }
277
278 /*
279 * Now clean up the unallocated buddies.
280 */
281 for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) {
282 BUG_ON(a->buddy_list_alloced[i] != 0);
283
284 while (!list_empty(balloc_get_order_list(a, i))) {
285 bud = list_first_entry(balloc_get_order_list(a, i),
286 struct nvgpu_buddy, buddy_entry);
287 balloc_blist_rem(a, bud);
288 kmem_cache_free(buddy_cache, bud);
289 }
290
291 if (a->buddy_list_len[i] != 0) {
292 pr_info("Excess buddies!!! (%d: %llu)\n",
293 i, a->buddy_list_len[i]);
294 BUG();
295 }
296 if (a->buddy_list_split[i] != 0) {
297 pr_info("Excess split nodes!!! (%d: %llu)\n",
298 i, a->buddy_list_split[i]);
299 BUG();
300 }
301 if (a->buddy_list_alloced[i] != 0) {
302 pr_info("Excess alloced nodes!!! (%d: %llu)\n",
303 i, a->buddy_list_alloced[i]);
304 BUG();
305 }
306 }
307
308 kfree(a);
309
310 alloc_unlock(__a);
311}
312
313/*
314 * Combine the passed buddy if possible. The pointer in @b may not be valid
315 * after this as the buddy may be freed.
316 *
317 * @a must be locked.
318 */
319static void balloc_coalesce(struct nvgpu_buddy_allocator *a,
320 struct nvgpu_buddy *b)
321{
322 struct nvgpu_buddy *parent;
323
324 if (buddy_is_alloced(b) || buddy_is_split(b))
325 return;
326
327 /*
328 * If both our buddy and I are both not allocated and not split then
329 * we can coalesce ourselves.
330 */
331 if (!b->buddy)
332 return;
333 if (buddy_is_alloced(b->buddy) || buddy_is_split(b->buddy))
334 return;
335
336 parent = b->parent;
337
338 balloc_blist_rem(a, b);
339 balloc_blist_rem(a, b->buddy);
340
341 buddy_clr_split(parent);
342 a->buddy_list_split[parent->order]--;
343 balloc_blist_add(a, parent);
344
345 /*
346 * Recursively coalesce as far as we can go.
347 */
348 balloc_coalesce(a, parent);
349
350 /* Clean up the remains. */
351 kmem_cache_free(buddy_cache, b->buddy);
352 kmem_cache_free(buddy_cache, b);
353}
354
355/*
356 * Split a buddy into two new buddies who are 1/2 the size of the parent buddy.
357 *
358 * @a must be locked.
359 */
360static int balloc_split_buddy(struct nvgpu_buddy_allocator *a,
361 struct nvgpu_buddy *b, int pte_size)
362{
363 struct nvgpu_buddy *left, *right;
364 u64 half;
365
366 left = balloc_new_buddy(a, b, b->start, b->order - 1);
367 if (!left)
368 return -ENOMEM;
369
370 half = (b->end - b->start) / 2;
371
372 right = balloc_new_buddy(a, b, b->start + half, b->order - 1);
373 if (!right) {
374 kmem_cache_free(buddy_cache, left);
375 return -ENOMEM;
376 }
377
378 buddy_set_split(b);
379 a->buddy_list_split[b->order]++;
380
381 b->left = left;
382 b->right = right;
383 left->buddy = right;
384 right->buddy = left;
385 left->parent = b;
386 right->parent = b;
387
388 /* PTE considerations. */
389 if (a->flags & GPU_ALLOC_GVA_SPACE &&
390 left->order <= a->pte_blk_order) {
391 left->pte_size = pte_size;
392 right->pte_size = pte_size;
393 }
394
395 balloc_blist_rem(a, b);
396 balloc_blist_add(a, left);
397 balloc_blist_add(a, right);
398
399 return 0;
400}
401
402/*
403 * Place the passed buddy into the RB tree for allocated buddies. Never fails
404 * unless the passed entry is a duplicate which is a bug.
405 *
406 * @a must be locked.
407 */
408static void balloc_alloc_buddy(struct nvgpu_buddy_allocator *a,
409 struct nvgpu_buddy *b)
410{
411 struct rb_node **new = &(a->alloced_buddies.rb_node);
412 struct rb_node *parent = NULL;
413
414 while (*new) {
415 struct nvgpu_buddy *bud = container_of(*new, struct nvgpu_buddy,
416 alloced_entry);
417
418 parent = *new;
419 if (b->start < bud->start)
420 new = &((*new)->rb_left);
421 else if (b->start > bud->start)
422 new = &((*new)->rb_right);
423 else
424 BUG_ON("Duplicate entries in allocated list!\n");
425 }
426
427 rb_link_node(&b->alloced_entry, parent, new);
428 rb_insert_color(&b->alloced_entry, &a->alloced_buddies);
429
430 buddy_set_alloced(b);
431 a->buddy_list_alloced[b->order]++;
432}
433
434/*
435 * Remove the passed buddy from the allocated buddy RB tree. Returns the
436 * deallocated buddy for further processing.
437 *
438 * @a must be locked.
439 */
440static struct nvgpu_buddy *balloc_free_buddy(struct nvgpu_buddy_allocator *a,
441 u64 addr)
442{
443 struct rb_node *node = a->alloced_buddies.rb_node;
444 struct nvgpu_buddy *bud;
445
446 while (node) {
447 bud = container_of(node, struct nvgpu_buddy, alloced_entry);
448
449 if (addr < bud->start)
450 node = node->rb_left;
451 else if (addr > bud->start)
452 node = node->rb_right;
453 else
454 break;
455 }
456
457 if (!node)
458 return NULL;
459
460 rb_erase(node, &a->alloced_buddies);
461 buddy_clr_alloced(bud);
462 a->buddy_list_alloced[bud->order]--;
463
464 return bud;
465}
466
467/*
468 * Find a suitable buddy for the given order and PTE type (big or little).
469 */
470static struct nvgpu_buddy *__balloc_find_buddy(struct nvgpu_buddy_allocator *a,
471 u64 order, int pte_size)
472{
473 struct nvgpu_buddy *bud;
474
475 if (order > a->max_order ||
476 list_empty(balloc_get_order_list(a, order)))
477 return NULL;
478
479 if (a->flags & GPU_ALLOC_GVA_SPACE &&
480 pte_size == gmmu_page_size_big)
481 bud = list_last_entry(balloc_get_order_list(a, order),
482 struct nvgpu_buddy, buddy_entry);
483 else
484 bud = list_first_entry(balloc_get_order_list(a, order),
485 struct nvgpu_buddy, buddy_entry);
486
487 if (bud->pte_size != BALLOC_PTE_SIZE_ANY &&
488 bud->pte_size != pte_size)
489 return NULL;
490
491 return bud;
492}
493
494/*
495 * Allocate a suitably sized buddy. If no suitable buddy exists split higher
496 * order buddies until we have a suitable buddy to allocate.
497 *
498 * For PDE grouping add an extra check to see if a buddy is suitable: that the
499 * buddy exists in a PDE who's PTE size is reasonable
500 *
501 * @a must be locked.
502 */
503static u64 __balloc_do_alloc(struct nvgpu_buddy_allocator *a,
504 u64 order, int pte_size)
505{
506 u64 split_order;
507 struct nvgpu_buddy *bud = NULL;
508
509 split_order = order;
510 while (split_order <= a->max_order &&
511 !(bud = __balloc_find_buddy(a, split_order, pte_size)))
512 split_order++;
513
514 /* Out of memory! */
515 if (!bud)
516 return 0;
517
518 while (bud->order != order) {
519 if (balloc_split_buddy(a, bud, pte_size))
520 return 0; /* No mem... */
521 bud = bud->left;
522 }
523
524 balloc_blist_rem(a, bud);
525 balloc_alloc_buddy(a, bud);
526
527 return bud->start;
528}
529
530/*
531 * See if the passed range is actually available for allocation. If so, then
532 * return 1, otherwise return 0.
533 *
534 * TODO: Right now this uses the unoptimal approach of going through all
535 * outstanding allocations and checking their base/ends. This could be better.
536 */
537static int balloc_is_range_free(struct nvgpu_buddy_allocator *a,
538 u64 base, u64 end)
539{
540 struct rb_node *node;
541 struct nvgpu_buddy *bud;
542
543 node = rb_first(&a->alloced_buddies);
544 if (!node)
545 return 1; /* No allocs yet. */
546
547 bud = container_of(node, struct nvgpu_buddy, alloced_entry);
548
549 while (bud->start < end) {
550 if ((bud->start > base && bud->start < end) ||
551 (bud->end > base && bud->end < end))
552 return 0;
553
554 node = rb_next(node);
555 if (!node)
556 break;
557 bud = container_of(node, struct nvgpu_buddy, alloced_entry);
558 }
559
560 return 1;
561}
562
563static void balloc_alloc_fixed(struct nvgpu_buddy_allocator *a,
564 struct nvgpu_fixed_alloc *f)
565{
566 struct rb_node **new = &(a->fixed_allocs.rb_node);
567 struct rb_node *parent = NULL;
568
569 while (*new) {
570 struct nvgpu_fixed_alloc *falloc =
571 container_of(*new, struct nvgpu_fixed_alloc,
572 alloced_entry);
573
574 BUG_ON(!virt_addr_valid(falloc));
575
576 parent = *new;
577 if (f->start < falloc->start)
578 new = &((*new)->rb_left);
579 else if (f->start > falloc->start)
580 new = &((*new)->rb_right);
581 else
582 BUG_ON("Duplicate entries in allocated list!\n");
583 }
584
585 rb_link_node(&f->alloced_entry, parent, new);
586 rb_insert_color(&f->alloced_entry, &a->fixed_allocs);
587}
588
589/*
590 * Remove the passed buddy from the allocated buddy RB tree. Returns the
591 * deallocated buddy for further processing.
592 *
593 * @a must be locked.
594 */
595static struct nvgpu_fixed_alloc *balloc_free_fixed(
596 struct nvgpu_buddy_allocator *a, u64 addr)
597{
598 struct rb_node *node = a->fixed_allocs.rb_node;
599 struct nvgpu_fixed_alloc *falloc;
600
601 while (node) {
602 falloc = container_of(node,
603 struct nvgpu_fixed_alloc, alloced_entry);
604
605 if (addr < falloc->start)
606 node = node->rb_left;
607 else if (addr > falloc->start)
608 node = node->rb_right;
609 else
610 break;
611 }
612
613 if (!node)
614 return NULL;
615
616 rb_erase(node, &a->fixed_allocs);
617
618 return falloc;
619}
620
621/*
622 * Find the parent range - doesn't necessarily need the parent to actually exist
623 * as a buddy. Finding an existing parent comes later...
624 */
625static void __balloc_get_parent_range(struct nvgpu_buddy_allocator *a,
626 u64 base, u64 order,
627 u64 *pbase, u64 *porder)
628{
629 u64 base_mask;
630 u64 shifted_base = balloc_base_shift(a, base);
631
632 order++;
633 base_mask = ~((a->blk_size << order) - 1);
634
635 shifted_base &= base_mask;
636
637 *pbase = balloc_base_unshift(a, shifted_base);
638 *porder = order;
639}
640
641/*
642 * Makes a buddy at the passed address. This will make all parent buddies
643 * necessary for this buddy to exist as well.
644 */
645static struct nvgpu_buddy *__balloc_make_fixed_buddy(
646 struct nvgpu_buddy_allocator *a, u64 base, u64 order)
647{
648 struct nvgpu_buddy *bud = NULL;
649 struct list_head *order_list;
650 u64 cur_order = order, cur_base = base;
651
652 /*
653 * Algo:
654 * 1. Keep jumping up a buddy order until we find the real buddy that
655 * this buddy exists in.
656 * 2. Then work our way down through the buddy tree until we hit a dead
657 * end.
658 * 3. Start splitting buddies until we split to the one we need to
659 * make.
660 */
661 while (cur_order <= a->max_order) {
662 int found = 0;
663
664 order_list = balloc_get_order_list(a, cur_order);
665 list_for_each_entry(bud, order_list, buddy_entry) {
666 if (bud->start == cur_base) {
667 found = 1;
668 break;
669 }
670 }
671
672 if (found)
673 break;
674
675 __balloc_get_parent_range(a, cur_base, cur_order,
676 &cur_base, &cur_order);
677 }
678
679 if (cur_order > a->max_order) {
680 alloc_dbg(balloc_owner(a), "No buddy for range ???\n");
681 return NULL;
682 }
683
684 /* Split this buddy as necessary until we get the target buddy. */
685 while (bud->start != base || bud->order != order) {
686 if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) {
687 balloc_coalesce(a, bud);
688 return NULL;
689 }
690
691 if (base < bud->right->start)
692 bud = bud->left;
693 else
694 bud = bud->right;
695
696 }
697
698 return bud;
699}
700
701static u64 __balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a,
702 struct nvgpu_fixed_alloc *falloc,
703 u64 base, u64 len)
704{
705 u64 shifted_base, inc_base;
706 u64 align_order;
707
708 shifted_base = balloc_base_shift(a, base);
709 if (shifted_base == 0)
710 align_order = __fls(len >> a->blk_shift);
711 else
712 align_order = min_t(u64,
713 __ffs(shifted_base >> a->blk_shift),
714 __fls(len >> a->blk_shift));
715
716 if (align_order > a->max_order) {
717 alloc_dbg(balloc_owner(a),
718 "Align order too big: %llu > %llu\n",
719 align_order, a->max_order);
720 return 0;
721 }
722
723 /*
724 * Generate a list of buddies that satisfy this allocation.
725 */
726 inc_base = shifted_base;
727 while (inc_base < (shifted_base + len)) {
728 u64 order_len = balloc_order_to_len(a, align_order);
729 u64 remaining;
730 struct nvgpu_buddy *bud;
731
732 bud = __balloc_make_fixed_buddy(a,
733 balloc_base_unshift(a, inc_base),
734 align_order);
735 if (!bud) {
736 alloc_dbg(balloc_owner(a),
737 "Fixed buddy failed: {0x%llx, %llu}!\n",
738 balloc_base_unshift(a, inc_base),
739 align_order);
740 goto err_and_cleanup;
741 }
742
743 balloc_blist_rem(a, bud);
744 balloc_alloc_buddy(a, bud);
745 __balloc_buddy_list_add(a, bud, &falloc->buddies);
746
747 /* Book keeping. */
748 inc_base += order_len;
749 remaining = (shifted_base + len) - inc_base;
750 align_order = __ffs(inc_base >> a->blk_shift);
751
752 /* If we don't have much left - trim down align_order. */
753 if (balloc_order_to_len(a, align_order) > remaining)
754 align_order = __balloc_max_order_in(a, inc_base,
755 inc_base + remaining);
756 }
757
758 return base;
759
760err_and_cleanup:
761 while (!list_empty(&falloc->buddies)) {
762 struct nvgpu_buddy *bud = list_first_entry(&falloc->buddies,
763 struct nvgpu_buddy,
764 buddy_entry);
765
766 __balloc_buddy_list_rem(a, bud);
767 balloc_free_buddy(a, bud->start);
768 kmem_cache_free(buddy_cache, bud);
769 }
770
771 return 0;
772}
773
774static void __balloc_do_free_fixed(struct nvgpu_buddy_allocator *a,
775 struct nvgpu_fixed_alloc *falloc)
776{
777 struct nvgpu_buddy *bud;
778
779 while (!list_empty(&falloc->buddies)) {
780 bud = list_first_entry(&falloc->buddies,
781 struct nvgpu_buddy,
782 buddy_entry);
783 __balloc_buddy_list_rem(a, bud);
784
785 balloc_free_buddy(a, bud->start);
786 balloc_blist_add(a, bud);
787 a->bytes_freed += balloc_order_to_len(a, bud->order);
788
789 /*
790 * Attemp to defrag the allocation.
791 */
792 balloc_coalesce(a, bud);
793 }
794
795 kfree(falloc);
796}
797
798/*
799 * Allocate memory from the passed allocator.
800 */
801static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *__a, u64 len)
802{
803 u64 order, addr;
804 int pte_size;
805 struct nvgpu_buddy_allocator *a = __a->priv;
806
807 nvgpu_alloc_trace_func();
808
809 alloc_lock(__a);
810
811 order = balloc_get_order(a, len);
812
813 if (order > a->max_order) {
814 alloc_unlock(__a);
815 alloc_dbg(balloc_owner(a), "Alloc fail\n");
816 nvgpu_alloc_trace_func_done();
817 return 0;
818 }
819
820 /*
821 * For now pass the base address of the allocator's region to
822 * __get_pte_size(). This ensures we get the right page size for
823 * the alloc but we don't have to know what the real address is
824 * going to be quite yet.
825 *
826 * TODO: once userspace supports a unified address space pass 0 for
827 * the base. This will make only 'len' affect the PTE size.
828 */
829 if (a->flags & GPU_ALLOC_GVA_SPACE)
830 pte_size = __get_pte_size(a->vm, a->base, len);
831 else
832 pte_size = BALLOC_PTE_SIZE_ANY;
833
834 addr = __balloc_do_alloc(a, order, pte_size);
835
836 if (addr) {
837 a->bytes_alloced += len;
838 a->bytes_alloced_real += balloc_order_to_len(a, order);
839 alloc_dbg(balloc_owner(a),
840 "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n",
841 addr, order, len,
842 pte_size == gmmu_page_size_big ? "big" :
843 pte_size == gmmu_page_size_small ? "small" :
844 "NA/any");
845 } else {
846 alloc_dbg(balloc_owner(a), "Alloc failed: no mem!\n");
847 }
848
849 a->alloc_made = 1;
850
851 alloc_unlock(__a);
852
853 nvgpu_alloc_trace_func_done();
854 return addr;
855}
856
857/*
858 * Requires @__a to be locked.
859 */
860static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a,
861 u64 base, u64 len)
862{
863 u64 ret, real_bytes = 0;
864 struct nvgpu_buddy *bud;
865 struct nvgpu_fixed_alloc *falloc = NULL;
866 struct nvgpu_buddy_allocator *a = __a->priv;
867
868 nvgpu_alloc_trace_func();
869
870 /* If base isn't aligned to an order 0 block, fail. */
871 if (base & (a->blk_size - 1))
872 goto fail;
873
874 if (len == 0)
875 goto fail;
876
877 falloc = kmalloc(sizeof(*falloc), GFP_KERNEL);
878 if (!falloc)
879 goto fail;
880
881 INIT_LIST_HEAD(&falloc->buddies);
882 falloc->start = base;
883 falloc->end = base + len;
884
885 if (!balloc_is_range_free(a, base, base + len)) {
886 alloc_dbg(balloc_owner(a),
887 "Range not free: 0x%llx -> 0x%llx\n",
888 base, base + len);
889 goto fail_unlock;
890 }
891
892 ret = __balloc_do_alloc_fixed(a, falloc, base, len);
893 if (!ret) {
894 alloc_dbg(balloc_owner(a),
895 "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n",
896 base, base + len);
897 goto fail_unlock;
898 }
899
900 balloc_alloc_fixed(a, falloc);
901
902 list_for_each_entry(bud, &falloc->buddies, buddy_entry)
903 real_bytes += (bud->end - bud->start);
904
905 a->bytes_alloced += len;
906 a->bytes_alloced_real += real_bytes;
907
908 alloc_dbg(balloc_owner(a), "Alloc (fixed) 0x%llx\n", base);
909
910 nvgpu_alloc_trace_func_done();
911 return base;
912
913fail_unlock:
914 alloc_unlock(__a);
915fail:
916 kfree(falloc);
917 nvgpu_alloc_trace_func_done();
918 return 0;
919}
920
921/*
922 * Allocate a fixed address allocation. The address of the allocation is @base
923 * and the length is @len. This is not a typical buddy allocator operation and
924 * as such has a high posibility of failure if the address space is heavily in
925 * use.
926 *
927 * Please do not use this function unless _absolutely_ necessary.
928 */
929static u64 nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a,
930 u64 base, u64 len)
931{
932 u64 alloc;
933 struct nvgpu_buddy_allocator *a = __a->priv;
934
935 alloc_lock(__a);
936 alloc = __nvgpu_balloc_fixed_buddy(__a, base, len);
937 a->alloc_made = 1;
938 alloc_unlock(__a);
939
940 return alloc;
941}
942
943/*
944 * Free the passed allocation.
945 */
946static void nvgpu_buddy_bfree(struct nvgpu_allocator *__a, u64 addr)
947{
948 struct nvgpu_buddy *bud;
949 struct nvgpu_fixed_alloc *falloc;
950 struct nvgpu_buddy_allocator *a = __a->priv;
951
952 nvgpu_alloc_trace_func();
953
954 if (!addr) {
955 nvgpu_alloc_trace_func_done();
956 return;
957 }
958
959 alloc_lock(__a);
960
961 /*
962 * First see if this is a fixed alloc. If not fall back to a regular
963 * buddy.
964 */
965 falloc = balloc_free_fixed(a, addr);
966 if (falloc) {
967 __balloc_do_free_fixed(a, falloc);
968 goto done;
969 }
970
971 bud = balloc_free_buddy(a, addr);
972 if (!bud)
973 goto done;
974
975 balloc_blist_add(a, bud);
976 a->bytes_freed += balloc_order_to_len(a, bud->order);
977
978 /*
979 * Attemp to defrag the allocation.
980 */
981 balloc_coalesce(a, bud);
982
983done:
984 alloc_unlock(__a);
985 alloc_dbg(balloc_owner(a), "Free 0x%llx\n", addr);
986 nvgpu_alloc_trace_func_done();
987 return;
988}
989
990static bool nvgpu_buddy_reserve_is_possible(struct nvgpu_buddy_allocator *a,
991 struct nvgpu_alloc_carveout *co)
992{
993 struct nvgpu_alloc_carveout *tmp;
994 u64 co_base, co_end;
995
996 co_base = co->base;
997 co_end = co->base + co->length;
998
999 /*
1000 * Not the fastest approach but we should not have that many carveouts
1001 * for any reasonable allocator.
1002 */
1003 list_for_each_entry(tmp, &a->co_list, co_entry) {
1004 if ((co_base >= tmp->base &&
1005 co_base < (tmp->base + tmp->length)) ||
1006 (co_end >= tmp->base &&
1007 co_end < (tmp->base + tmp->length)))
1008 return false;
1009 }
1010
1011 return true;
1012}
1013
1014/*
1015 * Carveouts can only be reserved before any regular allocations have been
1016 * made.
1017 */
1018static int nvgpu_buddy_reserve_co(struct nvgpu_allocator *__a,
1019 struct nvgpu_alloc_carveout *co)
1020{
1021 struct nvgpu_buddy_allocator *a = __a->priv;
1022 u64 addr;
1023 int err = 0;
1024
1025 if (co->base < a->start || (co->base + co->length) > a->end ||
1026 a->alloc_made)
1027 return -EINVAL;
1028
1029 alloc_lock(__a);
1030
1031 if (!nvgpu_buddy_reserve_is_possible(a, co)) {
1032 err = -EBUSY;
1033 goto done;
1034 }
1035
1036 /* Should not be possible to fail... */
1037 addr = __nvgpu_balloc_fixed_buddy(__a, co->base, co->length);
1038 if (!addr) {
1039 err = -ENOMEM;
1040 pr_warn("%s: Failed to reserve a valid carveout!\n", __func__);
1041 goto done;
1042 }
1043
1044 list_add(&co->co_entry, &a->co_list);
1045
1046done:
1047 alloc_unlock(__a);
1048 return err;
1049}
1050
1051/*
1052 * Carveouts can be release at any time.
1053 */
1054static void nvgpu_buddy_release_co(struct nvgpu_allocator *__a,
1055 struct nvgpu_alloc_carveout *co)
1056{
1057 alloc_lock(__a);
1058
1059 list_del_init(&co->co_entry);
1060 nvgpu_free(__a, co->base);
1061
1062 alloc_unlock(__a);
1063}
1064
1065static u64 nvgpu_buddy_alloc_length(struct nvgpu_allocator *a)
1066{
1067 struct nvgpu_buddy_allocator *ba = a->priv;
1068
1069 return ba->length;
1070}
1071
1072static u64 nvgpu_buddy_alloc_base(struct nvgpu_allocator *a)
1073{
1074 struct nvgpu_buddy_allocator *ba = a->priv;
1075
1076 return ba->start;
1077}
1078
1079static int nvgpu_buddy_alloc_inited(struct nvgpu_allocator *a)
1080{
1081 struct nvgpu_buddy_allocator *ba = a->priv;
1082 int inited = ba->initialized;
1083
1084 rmb();
1085 return inited;
1086}
1087
1088static u64 nvgpu_buddy_alloc_end(struct nvgpu_allocator *a)
1089{
1090 struct nvgpu_buddy_allocator *ba = a->priv;
1091
1092 return ba->end;
1093}
1094
1095static u64 nvgpu_buddy_alloc_space(struct nvgpu_allocator *a)
1096{
1097 struct nvgpu_buddy_allocator *ba = a->priv;
1098 u64 space;
1099
1100 alloc_lock(a);
1101 space = ba->end - ba->start -
1102 (ba->bytes_alloced_real - ba->bytes_freed);
1103 alloc_unlock(a);
1104
1105 return space;
1106}
1107
1108/*
1109 * Print the buddy allocator top level stats. If you pass @s as NULL then the
1110 * stats are printed to the kernel log. This lets this code be used for
1111 * debugging purposes internal to the allocator.
1112 */
1113static void nvgpu_buddy_print_stats(struct nvgpu_allocator *__a,
1114 struct seq_file *s, int lock)
1115{
1116 int i = 0;
1117 struct rb_node *node;
1118 struct nvgpu_fixed_alloc *falloc;
1119 struct nvgpu_alloc_carveout *tmp;
1120 struct nvgpu_buddy_allocator *a = __a->priv;
1121
1122 __alloc_pstat(s, __a, "base = %llu, limit = %llu, blk_size = %llu\n",
1123 a->base, a->length, a->blk_size);
1124 __alloc_pstat(s, __a, "Internal params:\n");
1125 __alloc_pstat(s, __a, " start = 0x%llx\n", a->start);
1126 __alloc_pstat(s, __a, " end = 0x%llx\n", a->end);
1127 __alloc_pstat(s, __a, " count = 0x%llx\n", a->count);
1128 __alloc_pstat(s, __a, " blks = 0x%llx\n", a->blks);
1129 __alloc_pstat(s, __a, " max_order = %llu\n", a->max_order);
1130
1131 if (lock)
1132 alloc_lock(__a);
1133
1134 if (!list_empty(&a->co_list)) {
1135 __alloc_pstat(s, __a, "\n");
1136 __alloc_pstat(s, __a, "Carveouts:\n");
1137 list_for_each_entry(tmp, &a->co_list, co_entry)
1138 __alloc_pstat(s, __a,
1139 " CO %2d: %-20s 0x%010llx + 0x%llx\n",
1140 i++, tmp->name, tmp->base, tmp->length);
1141 }
1142
1143 __alloc_pstat(s, __a, "\n");
1144 __alloc_pstat(s, __a, "Buddy blocks:\n");
1145 __alloc_pstat(s, __a, " Order Free Alloced Split\n");
1146 __alloc_pstat(s, __a, " ----- ---- ------- -----\n");
1147
1148 for (i = a->max_order; i >= 0; i--) {
1149 if (a->buddy_list_len[i] == 0 &&
1150 a->buddy_list_alloced[i] == 0 &&
1151 a->buddy_list_split[i] == 0)
1152 continue;
1153
1154 __alloc_pstat(s, __a, " %3d %-7llu %-9llu %llu\n", i,
1155 a->buddy_list_len[i],
1156 a->buddy_list_alloced[i],
1157 a->buddy_list_split[i]);
1158 }
1159
1160 __alloc_pstat(s, __a, "\n");
1161
1162 for (node = rb_first(&a->fixed_allocs), i = 1;
1163 node != NULL;
1164 node = rb_next(node)) {
1165 falloc = container_of(node,
1166 struct nvgpu_fixed_alloc, alloced_entry);
1167
1168 __alloc_pstat(s, __a, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n",
1169 i, falloc->start, falloc->end);
1170 }
1171
1172 __alloc_pstat(s, __a, "\n");
1173 __alloc_pstat(s, __a, "Bytes allocated: %llu\n",
1174 a->bytes_alloced);
1175 __alloc_pstat(s, __a, "Bytes allocated (real): %llu\n",
1176 a->bytes_alloced_real);
1177 __alloc_pstat(s, __a, "Bytes freed: %llu\n",
1178 a->bytes_freed);
1179
1180 if (lock)
1181 alloc_unlock(__a);
1182}
1183
1184static const struct nvgpu_allocator_ops buddy_ops = {
1185 .alloc = nvgpu_buddy_balloc,
1186 .free = nvgpu_buddy_bfree,
1187
1188 .alloc_fixed = nvgpu_balloc_fixed_buddy,
1189 /* .free_fixed not needed. */
1190
1191 .reserve_carveout = nvgpu_buddy_reserve_co,
1192 .release_carveout = nvgpu_buddy_release_co,
1193
1194 .base = nvgpu_buddy_alloc_base,
1195 .length = nvgpu_buddy_alloc_length,
1196 .end = nvgpu_buddy_alloc_end,
1197 .inited = nvgpu_buddy_alloc_inited,
1198 .space = nvgpu_buddy_alloc_space,
1199
1200 .fini = nvgpu_buddy_allocator_destroy,
1201
1202 .print_stats = nvgpu_buddy_print_stats,
1203};
1204
1205/*
1206 * Initialize a buddy allocator. Returns 0 on success. This allocator does
1207 * not necessarily manage bytes. It manages distinct ranges of resources. This
1208 * allows the allocator to work for things like comp_tags, semaphores, etc.
1209 *
1210 * @allocator: Ptr to an allocator struct to init.
1211 * @vm: GPU VM to associate this allocator with. Can be NULL. Will be used to
1212 * get PTE size for GVA spaces.
1213 * @name: Name of the allocator. Doesn't have to be static storage.
1214 * @base: The base address of the resource pool being managed.
1215 * @size: Number of resources in the pool.
1216 * @blk_size: Minimum number of resources to allocate at once. For things like
1217 * semaphores this is 1. For GVA this might be as much as 64k. This
1218 * corresponds to order 0. Must be power of 2.
1219 * @max_order: Pick a maximum order. If you leave this as 0, the buddy allocator
1220 * will try and pick a reasonable max order.
1221 * @flags: Extra flags necessary. See GPU_BALLOC_*.
1222 */
1223int __nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
1224 struct vm_gk20a *vm, const char *name,
1225 u64 base, u64 size, u64 blk_size,
1226 u64 max_order, u64 flags)
1227{
1228 int err;
1229 u64 pde_size;
1230 struct nvgpu_buddy_allocator *a;
1231
1232 /* blk_size must be greater than 0 and a power of 2. */
1233 if (blk_size == 0)
1234 return -EINVAL;
1235 if (blk_size & (blk_size - 1))
1236 return -EINVAL;
1237
1238 if (max_order > GPU_BALLOC_MAX_ORDER)
1239 return -EINVAL;
1240
1241 /* If this is to manage a GVA space we need a VM. */
1242 if (flags & GPU_ALLOC_GVA_SPACE && !vm)
1243 return -EINVAL;
1244
1245 a = kzalloc(sizeof(struct nvgpu_buddy_allocator), GFP_KERNEL);
1246 if (!a)
1247 return -ENOMEM;
1248
1249 err = __nvgpu_alloc_common_init(__a, name, a, false, &buddy_ops);
1250 if (err)
1251 goto fail;
1252
1253 a->base = base;
1254 a->length = size;
1255 a->blk_size = blk_size;
1256 a->blk_shift = __ffs(blk_size);
1257 a->owner = __a;
1258
1259 /*
1260 * If base is 0 then modfy base to be the size of one block so that we
1261 * can return errors by returning addr == 0.
1262 */
1263 if (a->base == 0) {
1264 a->base = a->blk_size;
1265 a->length -= a->blk_size;
1266 }
1267
1268 a->vm = vm;
1269 if (flags & GPU_ALLOC_GVA_SPACE) {
1270 pde_size = ((u64)vm->big_page_size) << 10;
1271 a->pte_blk_order = balloc_get_order(a, pde_size);
1272 }
1273
1274 /*
1275 * When we have a GVA space with big_pages enabled the size and base
1276 * must be PDE aligned. If big_pages are not enabled then this
1277 * requirement is not necessary.
1278 */
1279 if (flags & GPU_ALLOC_GVA_SPACE && vm->big_pages &&
1280 (base & ((vm->big_page_size << 10) - 1) ||
1281 size & ((vm->big_page_size << 10) - 1)))
1282 return -EINVAL;
1283
1284 a->flags = flags;
1285 a->max_order = max_order;
1286
1287 balloc_allocator_align(a);
1288 balloc_compute_max_order(a);
1289
1290 /* Shared buddy kmem_cache for all allocators. */
1291 if (!buddy_cache)
1292 buddy_cache = KMEM_CACHE(nvgpu_buddy, 0);
1293 if (!buddy_cache) {
1294 err = -ENOMEM;
1295 goto fail;
1296 }
1297
1298 a->alloced_buddies = RB_ROOT;
1299 a->fixed_allocs = RB_ROOT;
1300 INIT_LIST_HEAD(&a->co_list);
1301 err = balloc_init_lists(a);
1302 if (err)
1303 goto fail;
1304
1305 wmb();
1306 a->initialized = 1;
1307
1308 nvgpu_init_alloc_debug(g, __a);
1309 alloc_dbg(__a, "New allocator: type buddy\n");
1310 alloc_dbg(__a, " base 0x%llx\n", a->base);
1311 alloc_dbg(__a, " size 0x%llx\n", a->length);
1312 alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size);
1313 alloc_dbg(__a, " max_order %llu\n", a->max_order);
1314 alloc_dbg(__a, " flags 0x%llx\n", a->flags);
1315
1316 return 0;
1317
1318fail:
1319 kfree(a);
1320 return err;
1321}
1322
1323int nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *a,
1324 const char *name, u64 base, u64 size,
1325 u64 blk_size, u64 flags)
1326{
1327 return __nvgpu_buddy_allocator_init(g, a, NULL, name,
1328 base, size, blk_size, 0, 0);
1329}